33
44using System . Runtime . CompilerServices ;
55using System . Runtime . InteropServices ;
6+ using System . Runtime . Intrinsics ;
7+ using SixLabors . ImageSharp . Common . Helpers ;
68using SixLabors . ImageSharp . PixelFormats ;
79
810namespace SixLabors . ImageSharp . Processing . Processors . Quantization ;
@@ -71,32 +73,107 @@ public override void Clear(ReadOnlyMemory<TPixel> palette)
7173 [ MethodImpl ( InliningOptions . ColdPath ) ]
7274 private int GetClosestColorSlow ( Rgba32 rgba , ref TPixel paletteRef , out TPixel match )
7375 {
74- // Loop through the palette and find the nearest match.
76+ ReadOnlySpan < Rgba32 > rgbaPalette = this . rgbaPalette ;
77+ ref Rgba32 rgbaPaletteRef = ref MemoryMarshal . GetReference ( rgbaPalette ) ;
7578 int index = 0 ;
76- float leastDistance = float . MaxValue ;
77- for ( int i = 0 ; i < this . rgbaPalette . Length ; i ++ )
79+ int leastDistance = int . MaxValue ;
80+ int i = 0 ;
81+
82+ if ( Vector128 . IsHardwareAccelerated && rgbaPalette . Length >= 4 )
7883 {
79- Rgba32 candidate = this . rgbaPalette [ i ] ;
80- if ( candidate . PackedValue == rgba . PackedValue )
81- {
82- index = i ;
83- break ;
84- }
84+ // Duplicate the query color so one 128-bit register can be subtracted from
85+ // two packed RGBA candidates at a time after widening.
86+ Vector128 < short > pixel = Vector128 . Create (
87+ rgba . R ,
88+ rgba . G ,
89+ rgba . B ,
90+ rgba . A ,
91+ rgba . R ,
92+ rgba . G ,
93+ rgba . B ,
94+ rgba . A ) ;
8595
86- float distance = DistanceSquared ( rgba , candidate ) ;
87- if ( distance == 0 )
96+ int vectorizedLength = rgbaPalette . Length & ~ 0x03 ;
97+
98+ for ( ; i < vectorizedLength ; i += 4 )
8899 {
89- index = i ;
90- break ;
100+ // Load four packed Rgba32 values (16 bytes) and widen them into two vectors:
101+ // [c0.r, c0.g, c0.b, c0.a, c1.r, ...] and [c2.r, c2.g, c2.b, c2.a, c3.r, ...].
102+ Vector128 < byte > packed = Vector128 . LoadUnsafe ( ref Unsafe . As < Rgba32 , byte > ( ref Unsafe . Add ( ref rgbaPaletteRef , i ) ) ) ;
103+ Vector128 < short > lowerDiff = Vector128 . WidenLower ( packed ) . AsInt16 ( ) - pixel ;
104+ Vector128 < short > upperDiff = Vector128 . WidenUpper ( packed ) . AsInt16 ( ) - pixel ;
105+
106+ // MultiplyAddAdjacent collapses channel squares into RG + BA partial sums,
107+ // so each pair of int lanes still corresponds to one candidate color.
108+ Vector128 < int > lowerPairs = Vector128_ . MultiplyAddAdjacent ( lowerDiff , lowerDiff ) ;
109+ Vector128 < int > upperPairs = Vector128_ . MultiplyAddAdjacent ( upperDiff , upperDiff ) ;
110+
111+ // Sum the two partials for candidates i and i + 1.
112+ ref int lowerRef = ref Unsafe . As < Vector128 < int > , int > ( ref lowerPairs ) ;
113+ int distance = lowerRef + Unsafe . Add ( ref lowerRef , 1 ) ;
114+ if ( distance < leastDistance )
115+ {
116+ index = i ;
117+ leastDistance = distance ;
118+ if ( distance == 0 )
119+ {
120+ goto Found ;
121+ }
122+ }
123+
124+ distance = Unsafe . Add ( ref lowerRef , 2 ) + Unsafe . Add ( ref lowerRef , 3 ) ;
125+ if ( distance < leastDistance )
126+ {
127+ index = i + 1 ;
128+ leastDistance = distance ;
129+ if ( distance == 0 )
130+ {
131+ goto Found ;
132+ }
133+ }
134+
135+ // Sum the two partials for candidates i + 2 and i + 3.
136+ ref int upperRef = ref Unsafe . As < Vector128 < int > , int > ( ref upperPairs ) ;
137+ distance = upperRef + Unsafe . Add ( ref upperRef , 1 ) ;
138+ if ( distance < leastDistance )
139+ {
140+ index = i + 2 ;
141+ leastDistance = distance ;
142+ if ( distance == 0 )
143+ {
144+ goto Found ;
145+ }
146+ }
147+
148+ distance = Unsafe . Add ( ref upperRef , 2 ) + Unsafe . Add ( ref upperRef , 3 ) ;
149+ if ( distance < leastDistance )
150+ {
151+ index = i + 3 ;
152+ leastDistance = distance ;
153+ if ( distance == 0 )
154+ {
155+ goto Found ;
156+ }
157+ }
91158 }
159+ }
92160
161+ for ( ; i < rgbaPalette . Length ; i ++ )
162+ {
163+ int distance = DistanceSquared ( rgba , Unsafe . Add ( ref rgbaPaletteRef , i ) ) ;
93164 if ( distance < leastDistance )
94165 {
95166 index = i ;
96167 leastDistance = distance ;
168+ if ( distance == 0 )
169+ {
170+ goto Found ;
171+ }
97172 }
98173 }
99174
175+ Found :
176+
100177 // Now I have the index, pop it into the cache for next time
101178 _ = this . cache . TryAdd ( rgba , ( short ) index ) ;
102179 match = Unsafe . Add ( ref paletteRef , ( uint ) index ) ;
@@ -111,12 +188,12 @@ private int GetClosestColorSlow(Rgba32 rgba, ref TPixel paletteRef, out TPixel m
111188 /// <param name="b">The second point.</param>
112189 /// <returns>The distance squared.</returns>
113190 [ MethodImpl ( InliningOptions . ShortMethod ) ]
114- private static float DistanceSquared ( Rgba32 a , Rgba32 b )
191+ private static int DistanceSquared ( Rgba32 a , Rgba32 b )
115192 {
116- float deltaR = a . R - b . R ;
117- float deltaG = a . G - b . G ;
118- float deltaB = a . B - b . B ;
119- float deltaA = a . A - b . A ;
193+ int deltaR = a . R - b . R ;
194+ int deltaG = a . G - b . G ;
195+ int deltaB = a . B - b . B ;
196+ int deltaA = a . A - b . A ;
120197 return ( deltaR * deltaR ) + ( deltaG * deltaG ) + ( deltaB * deltaB ) + ( deltaA * deltaA ) ;
121198 }
122199
@@ -177,8 +254,7 @@ public static PixelMap<TPixel> Create<TPixel>(
177254 ColorMatchingMode colorMatchingMode )
178255 where TPixel : unmanaged, IPixel < TPixel > => colorMatchingMode switch
179256 {
180- ColorMatchingMode . Hybrid => new EuclideanPixelMap < TPixel , HybridCache > ( configuration , palette ) ,
181- ColorMatchingMode . Exact => new EuclideanPixelMap < TPixel , NullCache > ( configuration , palette ) ,
257+ ColorMatchingMode . Exact => new EuclideanPixelMap < TPixel , AccurateCache > ( configuration , palette ) ,
182258 _ => new EuclideanPixelMap < TPixel , CoarseCache > ( configuration , palette ) ,
183259 } ;
184260}
0 commit comments