@@ -89,7 +89,34 @@ var blenders = new []{
8989 {
9090 amount = Numerics.Clamp(amount, 0, 1);
9191
92- if (Avx2.IsSupported && destination.Length >= 2)
92+ if (Avx512F.IsSupported && destination.Length >= 4)
93+ {
94+ // Divide by 4 as 4 elements per Vector4 and 16 per Vector512<float>
95+ ref Vector512<float> destinationBase = ref Unsafe.As<Vector4, Vector512<float>>(ref MemoryMarshal.GetReference(destination));
96+ ref Vector512<float> destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u);
97+
98+ ref Vector512<float> backgroundBase = ref Unsafe.As<Vector4, Vector512<float>>(ref MemoryMarshal.GetReference(background));
99+ ref Vector512<float> sourceBase = ref Unsafe.As<Vector4, Vector512<float>>(ref MemoryMarshal.GetReference(source));
100+ Vector512<float> opacity = Vector512.Create(amount);
101+
102+ while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast))
103+ {
104+ destinationBase = PorterDuffFunctions.<#=blender_composer#>(backgroundBase, sourceBase, opacity);
105+ destinationBase = ref Unsafe.Add(ref destinationBase, 1);
106+ backgroundBase = ref Unsafe.Add(ref backgroundBase, 1);
107+ sourceBase = ref Unsafe.Add(ref sourceBase, 1);
108+ }
109+
110+ int remainder = Numerics.Modulo4(destination.Length);
111+ if (remainder != 0)
112+ {
113+ for (int i = destination.Length - remainder; i < destination.Length; i++)
114+ {
115+ destination[i] = PorterDuffFunctions.<#=blender_composer#>(background[i], source[i], amount);
116+ }
117+ }
118+ }
119+ else if (Avx2.IsSupported && destination.Length >= 2)
93120 {
94121 // Divide by 2 as 4 elements per Vector4 and 8 per Vector256<float>
95122 ref Vector256<float> destinationBase = ref Unsafe.As<Vector4, Vector256<float>>(ref MemoryMarshal.GetReference(destination));
@@ -128,7 +155,37 @@ var blenders = new []{
128155 {
129156 amount = Numerics.Clamp(amount, 0, 1);
130157
131- if (Avx2.IsSupported && destination.Length >= 2)
158+ if (Avx512F.IsSupported && destination.Length >= 4)
159+ {
160+ // Divide by 4 as 4 elements per Vector4 and 16 per Vector512<float>
161+ ref Vector512<float> destinationBase = ref Unsafe.As<Vector4, Vector512<float>>(ref MemoryMarshal.GetReference(destination));
162+ ref Vector512<float> destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u);
163+
164+ ref Vector512<float> backgroundBase = ref Unsafe.As<Vector4, Vector512<float>>(ref MemoryMarshal.GetReference(background));
165+ Vector512<float> sourceBase = Vector512.Create(
166+ source.X, source.Y, source.Z, source.W,
167+ source.X, source.Y, source.Z, source.W,
168+ source.X, source.Y, source.Z, source.W,
169+ source.X, source.Y, source.Z, source.W);
170+ Vector512<float> opacity = Vector512.Create(amount);
171+
172+ while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast))
173+ {
174+ destinationBase = PorterDuffFunctions.<#=blender_composer#>(backgroundBase, sourceBase, opacity);
175+ destinationBase = ref Unsafe.Add(ref destinationBase, 1);
176+ backgroundBase = ref Unsafe.Add(ref backgroundBase, 1);
177+ }
178+
179+ int remainder = Numerics.Modulo4(destination.Length);
180+ if (remainder != 0)
181+ {
182+ for (int i = destination.Length - remainder; i < destination.Length; i++)
183+ {
184+ destination[i] = PorterDuffFunctions.<#=blender_composer#>(background[i], source, amount);
185+ }
186+ }
187+ }
188+ else if (Avx2.IsSupported && destination.Length >= 2)
132189 {
133190 // Divide by 2 as 4 elements per Vector4 and 8 per Vector256<float>
134191 ref Vector256<float> destinationBase = ref Unsafe.As<Vector4, Vector256<float>>(ref MemoryMarshal.GetReference(destination));
@@ -164,7 +221,51 @@ var blenders = new []{
164221 /// <inheritdoc />
165222 protected override void BlendFunction(Span<Vector4> destination, ReadOnlySpan<Vector4> background, ReadOnlySpan<Vector4> source, ReadOnlySpan<float> amount)
166223 {
167- if (Avx2.IsSupported && destination.Length >= 2)
224+ if (Avx512F.IsSupported && destination.Length >= 4)
225+ {
226+ // Divide by 4 as 4 elements per Vector4 and 16 per Vector512<float>
227+ ref Vector512<float> destinationBase = ref Unsafe.As<Vector4, Vector512<float>>(ref MemoryMarshal.GetReference(destination));
228+ ref Vector512<float> destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u);
229+
230+ ref Vector512<float> backgroundBase = ref Unsafe.As<Vector4, Vector512<float>>(ref MemoryMarshal.GetReference(background));
231+ ref Vector512<float> sourceBase = ref Unsafe.As<Vector4, Vector512<float>>(ref MemoryMarshal.GetReference(source));
232+ ref float amountBase = ref MemoryMarshal.GetReference(amount);
233+
234+ Vector512<float> vOne = Vector512.Create(1F);
235+
236+ while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast))
237+ {
238+ float amount0 = amountBase;
239+ float amount1 = Unsafe.Add(ref amountBase, 1);
240+ float amount2 = Unsafe.Add(ref amountBase, 2);
241+ float amount3 = Unsafe.Add(ref amountBase, 3);
242+
243+ // We need to create a Vector512<float> containing the current four amount values
244+ // taking up each quarter of the Vector512<float> and then clamp them.
245+ Vector512<float> opacity = Vector512.Create(
246+ amount0, amount0, amount0, amount0,
247+ amount1, amount1, amount1, amount1,
248+ amount2, amount2, amount2, amount2,
249+ amount3, amount3, amount3, amount3);
250+ opacity = Vector512.Min(Vector512.Max(Vector512<float>.Zero, opacity), vOne);
251+
252+ destinationBase = PorterDuffFunctions.<#=blender_composer#>(backgroundBase, sourceBase, opacity);
253+ destinationBase = ref Unsafe.Add(ref destinationBase, 1);
254+ backgroundBase = ref Unsafe.Add(ref backgroundBase, 1);
255+ sourceBase = ref Unsafe.Add(ref sourceBase, 1);
256+ amountBase = ref Unsafe.Add(ref amountBase, 4);
257+ }
258+
259+ int remainder = Numerics.Modulo4(destination.Length);
260+ if (remainder != 0)
261+ {
262+ for (int i = destination.Length - remainder; i < destination.Length; i++)
263+ {
264+ destination[i] = PorterDuffFunctions.<#=blender_composer#>(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F));
265+ }
266+ }
267+ }
268+ else if (Avx2.IsSupported && destination.Length >= 2)
168269 {
169270 // Divide by 2 as 4 elements per Vector4 and 8 per Vector256<float>
170271 ref Vector256<float> destinationBase = ref Unsafe.As<Vector4, Vector256<float>>(ref MemoryMarshal.GetReference(destination));
@@ -211,7 +312,54 @@ var blenders = new []{
211312 /// <inheritdoc />
212313 protected override void BlendFunction(Span<Vector4> destination, ReadOnlySpan<Vector4> background, Vector4 source, ReadOnlySpan<float> amount)
213314 {
214- if (Avx2.IsSupported && destination.Length >= 2)
315+ if (Avx512F.IsSupported && destination.Length >= 4)
316+ {
317+ // Divide by 4 as 4 elements per Vector4 and 16 per Vector512<float>
318+ ref Vector512<float> destinationBase = ref Unsafe.As<Vector4, Vector512<float>>(ref MemoryMarshal.GetReference(destination));
319+ ref Vector512<float> destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u);
320+
321+ ref Vector512<float> backgroundBase = ref Unsafe.As<Vector4, Vector512<float>>(ref MemoryMarshal.GetReference(background));
322+ ref float amountBase = ref MemoryMarshal.GetReference(amount);
323+
324+ Vector512<float> sourceBase = Vector512.Create(
325+ source.X, source.Y, source.Z, source.W,
326+ source.X, source.Y, source.Z, source.W,
327+ source.X, source.Y, source.Z, source.W,
328+ source.X, source.Y, source.Z, source.W);
329+ Vector512<float> vOne = Vector512.Create(1F);
330+
331+ while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast))
332+ {
333+ float amount0 = amountBase;
334+ float amount1 = Unsafe.Add(ref amountBase, 1);
335+ float amount2 = Unsafe.Add(ref amountBase, 2);
336+ float amount3 = Unsafe.Add(ref amountBase, 3);
337+
338+ // We need to create a Vector512<float> containing the current four amount values
339+ // taking up each quarter of the Vector512<float> and then clamp them.
340+ Vector512<float> opacity = Vector512.Create(
341+ amount0, amount0, amount0, amount0,
342+ amount1, amount1, amount1, amount1,
343+ amount2, amount2, amount2, amount2,
344+ amount3, amount3, amount3, amount3);
345+ opacity = Vector512.Min(Vector512.Max(Vector512<float>.Zero, opacity), vOne);
346+
347+ destinationBase = PorterDuffFunctions.<#=blender_composer#>(backgroundBase, sourceBase, opacity);
348+ destinationBase = ref Unsafe.Add(ref destinationBase, 1);
349+ backgroundBase = ref Unsafe.Add(ref backgroundBase, 1);
350+ amountBase = ref Unsafe.Add(ref amountBase, 4);
351+ }
352+
353+ int remainder = Numerics.Modulo4(destination.Length);
354+ if (remainder != 0)
355+ {
356+ for (int i = destination.Length - remainder; i < destination.Length; i++)
357+ {
358+ destination[i] = PorterDuffFunctions.<#=blender_composer#>(background[i], source, Numerics.Clamp(amount[i], 0, 1F));
359+ }
360+ }
361+ }
362+ else if (Avx2.IsSupported && destination.Length >= 2)
215363 {
216364 // Divide by 2 as 4 elements per Vector4 and 8 per Vector256<float>
217365 ref Vector256<float> destinationBase = ref Unsafe.As<Vector4, Vector256<float>>(ref MemoryMarshal.GetReference(destination));
0 commit comments