11// Copyright (c) Six Labors.
22// Licensed under the Six Labors Split License.
33
4+ using System . Buffers ;
45using System . Numerics ;
56using System . Runtime . CompilerServices ;
67using System . Runtime . InteropServices ;
78using SixLabors . ImageSharp . Advanced ;
9+ using SixLabors . ImageSharp . ColorProfiles . Companding ;
810using SixLabors . ImageSharp . Memory ;
911using SixLabors . ImageSharp . PixelFormats ;
1012using SixLabors . ImageSharp . Processing . Processors . Convolution . Parameters ;
@@ -77,22 +79,36 @@ protected override void OnFrameApply(ImageFrame<TPixel> source)
7779 {
7880 Rectangle sourceRectangle = Rectangle . Intersect ( this . SourceRectangle , source . Bounds ) ;
7981
82+ MemoryAllocator allocator = this . Configuration . MemoryAllocator ;
83+
84+ // Convolution is memory-bandwidth-bound with low arithmetic intensity.
85+ // Parallelization degrades performance due to cache line contention from
86+ // overlapping source row reads. See #3111.
87+
8088 // Preliminary gamma highlight pass
8189 if ( this . gamma == 3F )
8290 {
8391 ApplyGamma3ExposureRowOperation gammaOperation = new ( sourceRectangle , source . PixelBuffer , this . Configuration ) ;
84- ParallelRowIterator . IterateRows < ApplyGamma3ExposureRowOperation , Vector4 > (
85- this . Configuration ,
86- sourceRectangle ,
87- in gammaOperation ) ;
92+
93+ using IMemoryOwner < Vector4 > gammaBuffer = allocator . Allocate < Vector4 > ( gammaOperation . GetRequiredBufferLength ( sourceRectangle ) ) ;
94+ Span < Vector4 > gammaSpan = gammaBuffer . Memory . Span ;
95+
96+ for ( int y = sourceRectangle . Top ; y < sourceRectangle . Bottom ; y ++ )
97+ {
98+ gammaOperation . Invoke ( y , gammaSpan ) ;
99+ }
88100 }
89101 else
90102 {
91103 ApplyGammaExposureRowOperation gammaOperation = new ( sourceRectangle , source . PixelBuffer , this . Configuration , this . gamma ) ;
92- ParallelRowIterator . IterateRows < ApplyGammaExposureRowOperation , Vector4 > (
93- this . Configuration ,
94- sourceRectangle ,
95- in gammaOperation ) ;
104+
105+ using IMemoryOwner < Vector4 > gammaBuffer = allocator . Allocate < Vector4 > ( gammaOperation . GetRequiredBufferLength ( sourceRectangle ) ) ;
106+ Span < Vector4 > gammaSpan = gammaBuffer . Memory . Span ;
107+
108+ for ( int y = sourceRectangle . Top ; y < sourceRectangle . Bottom ; y ++ )
109+ {
110+ gammaOperation . Invoke ( y , gammaSpan ) ;
111+ }
96112 }
97113
98114 // Create a 0-filled buffer to use to store the result of the component convolutions
@@ -105,18 +121,20 @@ protected override void OnFrameApply(ImageFrame<TPixel> source)
105121 if ( this . gamma == 3F )
106122 {
107123 ApplyInverseGamma3ExposureRowOperation operation = new ( sourceRectangle , source . PixelBuffer , processingBuffer , this . Configuration ) ;
108- ParallelRowIterator . IterateRows (
109- this . Configuration ,
110- sourceRectangle ,
111- in operation ) ;
124+
125+ for ( int y = sourceRectangle . Top ; y < sourceRectangle . Bottom ; y ++ )
126+ {
127+ operation . Invoke ( y ) ;
128+ }
112129 }
113130 else
114131 {
115- ApplyInverseGammaExposureRowOperation operation = new ( sourceRectangle , source . PixelBuffer , processingBuffer , this . Configuration , 1 / this . gamma ) ;
116- ParallelRowIterator . IterateRows (
117- this . Configuration ,
118- sourceRectangle ,
119- in operation ) ;
132+ ApplyInverseGammaExposureRowOperation operation = new ( sourceRectangle , source . PixelBuffer , processingBuffer , this . Configuration , this . gamma ) ;
133+
134+ for ( int y = sourceRectangle . Top ; y < sourceRectangle . Bottom ; y ++ )
135+ {
136+ operation . Invoke ( y ) ;
137+ }
120138 }
121139 }
122140
@@ -169,10 +187,15 @@ private void OnFrameApplyCore(
169187 kernel ,
170188 configuration ) ;
171189
172- ParallelRowIterator . IterateRows < FirstPassConvolutionRowOperation , Vector4 > (
173- configuration ,
174- sourceRectangle ,
175- in horizontalOperation ) ;
190+ using ( IMemoryOwner < Vector4 > hBuffer = configuration . MemoryAllocator . Allocate < Vector4 > ( horizontalOperation . GetRequiredBufferLength ( sourceRectangle ) ) )
191+ {
192+ Span < Vector4 > hSpan = hBuffer . Memory . Span ;
193+
194+ for ( int y = sourceRectangle . Top ; y < sourceRectangle . Bottom ; y ++ )
195+ {
196+ horizontalOperation . Invoke ( y , hSpan ) ;
197+ }
198+ }
176199
177200 // Vertical 1D convolutions to accumulate the partial results on the target buffer
178201 BokehBlurProcessor . SecondPassConvolutionRowOperation verticalOperation = new (
@@ -184,10 +207,10 @@ private void OnFrameApplyCore(
184207 parameters . Z ,
185208 parameters . W ) ;
186209
187- ParallelRowIterator . IterateRows (
188- configuration ,
189- sourceRectangle ,
190- in verticalOperation ) ;
210+ for ( int y = sourceRectangle . Top ; y < sourceRectangle . Bottom ; y ++ )
211+ {
212+ verticalOperation . Invoke ( y ) ;
213+ }
191214 }
192215 }
193216
@@ -305,15 +328,9 @@ public void Invoke(int y, Span<Vector4> span)
305328 {
306329 Span < TPixel > targetRowSpan = this . targetPixels . DangerousGetRowSpan ( y ) [ this . bounds . X ..] ;
307330 PixelOperations < TPixel > . Instance . ToVector4 ( this . configuration , targetRowSpan [ ..span . Length ] , span , PixelConversionModifiers . Premultiply ) ;
308- ref Vector4 baseRef = ref MemoryMarshal . GetReference ( span ) ;
309331
310- for ( int x = 0 ; x < this . bounds . Width ; x ++ )
311- {
312- ref Vector4 v = ref Unsafe . Add ( ref baseRef , ( uint ) x ) ;
313- v . X = MathF . Pow ( v . X , this . gamma ) ;
314- v . Y = MathF . Pow ( v . Y , this . gamma ) ;
315- v . Z = MathF . Pow ( v . Z , this . gamma ) ;
316- }
332+ // Input is premultiplied [0,1] so the LUT is safe here.
333+ GammaCompanding . Expand ( span [ ..this . bounds . Width ] , this . gamma ) ;
317334
318335 PixelOperations < TPixel > . Instance . FromVector4Destructive ( this . configuration , span , targetRowSpan ) ;
319336 }
@@ -367,44 +384,34 @@ public void Invoke(int y, Span<Vector4> span)
367384 private readonly Buffer2D < TPixel > targetPixels ;
368385 private readonly Buffer2D < Vector4 > sourceValues ;
369386 private readonly Configuration configuration ;
370- private readonly float inverseGamma ;
387+ private readonly float gamma ;
371388
372389 [ MethodImpl ( InliningOptions . ShortMethod ) ]
373390 public ApplyInverseGammaExposureRowOperation (
374391 Rectangle bounds ,
375392 Buffer2D < TPixel > targetPixels ,
376393 Buffer2D < Vector4 > sourceValues ,
377394 Configuration configuration ,
378- float inverseGamma )
395+ float gamma )
379396 {
380397 this . bounds = bounds ;
381398 this . targetPixels = targetPixels ;
382399 this . sourceValues = sourceValues ;
383400 this . configuration = configuration ;
384- this . inverseGamma = inverseGamma ;
401+ this . gamma = gamma ;
385402 }
386403
387404 /// <inheritdoc/>
388405 [ MethodImpl ( InliningOptions . ShortMethod ) ]
389406 public void Invoke ( int y )
390407 {
391- Vector4 low = Vector4 . Zero ;
392- Vector4 high = new ( float . PositiveInfinity , float . PositiveInfinity , float . PositiveInfinity , float . PositiveInfinity ) ;
393-
394408 Span < TPixel > targetPixelSpan = this . targetPixels . DangerousGetRowSpan ( y ) [ this . bounds . X ..] ;
395- Span < Vector4 > sourceRowSpan = this . sourceValues . DangerousGetRowSpan ( y ) [ this . bounds . X ..] ;
396- ref Vector4 sourceRef = ref MemoryMarshal . GetReference ( sourceRowSpan ) ;
409+ Span < Vector4 > sourceRowSpan = this . sourceValues . DangerousGetRowSpan ( y ) . Slice ( this . bounds . X , this . bounds . Width ) ;
397410
398- for ( int x = 0 ; x < this . bounds . Width ; x ++ )
399- {
400- ref Vector4 v = ref Unsafe . Add ( ref sourceRef , ( uint ) x ) ;
401- Vector4 clamp = Numerics . Clamp ( v , low , high ) ;
402- v . X = MathF . Pow ( clamp . X , this . inverseGamma ) ;
403- v . Y = MathF . Pow ( clamp . Y , this . inverseGamma ) ;
404- v . Z = MathF . Pow ( clamp . Z , this . inverseGamma ) ;
405- }
411+ Numerics . Clamp ( MemoryMarshal . Cast < Vector4 , float > ( sourceRowSpan ) , 0 , 1F ) ;
412+ GammaCompanding . Compress ( sourceRowSpan , this . gamma ) ;
406413
407- PixelOperations < TPixel > . Instance . FromVector4Destructive ( this . configuration , sourceRowSpan [ .. this . bounds . Width ] , targetPixelSpan , PixelConversionModifiers . Premultiply ) ;
414+ PixelOperations < TPixel > . Instance . FromVector4Destructive ( this . configuration , sourceRowSpan , targetPixelSpan , PixelConversionModifiers . Premultiply ) ;
408415 }
409416 }
410417
@@ -433,17 +440,16 @@ public ApplyInverseGamma3ExposureRowOperation(
433440
434441 /// <inheritdoc/>
435442 [ MethodImpl ( InliningOptions . ShortMethod ) ]
436- public unsafe void Invoke ( int y )
443+ public void Invoke ( int y )
437444 {
438445 Span < Vector4 > sourceRowSpan = this . sourceValues . DangerousGetRowSpan ( y ) . Slice ( this . bounds . X , this . bounds . Width ) ;
439- ref Vector4 sourceRef = ref MemoryMarshal . GetReference ( sourceRowSpan ) ;
440446
441- Numerics . Clamp ( MemoryMarshal . Cast < Vector4 , float > ( sourceRowSpan ) , 0 , float . PositiveInfinity ) ;
447+ Numerics . Clamp ( MemoryMarshal . Cast < Vector4 , float > ( sourceRowSpan ) , 0 , 1F ) ;
442448 Numerics . CubeRootOnXYZ ( sourceRowSpan ) ;
443449
444450 Span < TPixel > targetPixelSpan = this . targetPixels . DangerousGetRowSpan ( y ) [ this . bounds . X ..] ;
445451
446- PixelOperations < TPixel > . Instance . FromVector4Destructive ( this . configuration , sourceRowSpan [ .. this . bounds . Width ] , targetPixelSpan , PixelConversionModifiers . Premultiply ) ;
452+ PixelOperations < TPixel > . Instance . FromVector4Destructive ( this . configuration , sourceRowSpan , targetPixelSpan , PixelConversionModifiers . Premultiply ) ;
447453 }
448454 }
449455}
0 commit comments