xref: /aosp_15_r20/external/skia/src/gpu/BlurUtils.cpp (revision c8dee2aa9b3f27cf6c858bd81872bdeb2c07ed17)
1 /*
2  * Copyright 2023 Google LLC
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 #include "src/gpu/BlurUtils.h"
8 
9 #include "include/core/SkBitmap.h"
10 #include "include/core/SkColorPriv.h"
11 #include "include/core/SkImageInfo.h"
12 #include "include/core/SkRRect.h"
13 #include "include/core/SkRect.h"
14 #include "include/core/SkScalar.h"
15 #include "include/core/SkSize.h"
16 #include "include/private/base/SkAssert.h"
17 #include "include/private/base/SkFloatingPoint.h"
18 #include "include/private/base/SkMath.h"
19 #include "include/private/base/SkPoint_impl.h"
20 #include "include/private/base/SkTemplates.h"
21 #include "include/private/base/SkTo.h"
22 #include "src/base/SkMathPriv.h"
23 
24 #include <algorithm>
25 #include <cmath>
26 #include <cstdint>
27 #include <cstring>
28 #include <memory>
29 #include <vector>
30 
31 namespace skgpu {
32 
33 ///////////////////////////////////////////////////////////////////////////////
34 //  Rect Blur
35 ///////////////////////////////////////////////////////////////////////////////
36 
37 // TODO: it seems like there should be some synergy with SkBlurMask::ComputeBlurProfile
CreateIntegralTable(int width)38 SkBitmap CreateIntegralTable(int width) {
39     SkBitmap table;
40 
41     if (width <= 0) {
42         return table;
43     }
44 
45     if (!table.tryAllocPixels(SkImageInfo::MakeA8(width, 1))) {
46         return table;
47     }
48     *table.getAddr8(0, 0) = 255;
49     const float invWidth = 1.f / width;
50     for (int i = 1; i < width - 1; ++i) {
51         float x = (i + 0.5f) * invWidth;
52         x = (-6 * x + 3) * SK_ScalarRoot2Over2;
53         float integral = 0.5f * (std::erf(x) + 1.f);
54         *table.getAddr8(i, 0) = SkToU8(sk_float_round2int(255.f * integral));
55     }
56 
57     *table.getAddr8(width - 1, 0) = 0;
58     table.setImmutable();
59     return table;
60 }
61 
ComputeIntegralTableWidth(float sixSigma)62 int ComputeIntegralTableWidth(float sixSigma) {
63     // Check for NaN/infinity
64     if (!SkIsFinite(sixSigma)) {
65         return 0;
66     }
67     // Avoid overflow, covers both multiplying by 2 and finding next power of 2:
68     // 2*((2^31-1)/4 + 1) = 2*(2^29-1) + 2 = 2^30 and SkNextPow2(2^30) = 2^30
69     if (sixSigma > SK_MaxS32 / 4 + 1) {
70         return 0;
71     }
72     // The texture we're producing represents the integral of a normal distribution over a
73     // six-sigma range centered at zero. We want enough resolution so that the linear
74     // interpolation done in texture lookup doesn't introduce noticeable artifacts. We
75     // conservatively choose to have 2 texels for each dst pixel.
76     int minWidth = 2 * ((int)std::ceil(sixSigma));
77     // Bin by powers of 2 with a minimum so we get good profile reuse.
78     return std::max(SkNextPow2(minWidth), 32);
79 }
80 
81 ///////////////////////////////////////////////////////////////////////////////
82 //  Circle Blur
83 ///////////////////////////////////////////////////////////////////////////////
84 
85 // Computes an unnormalized half kernel (right side). Returns the summation of all the half
86 // kernel values.
make_unnormalized_half_kernel(float * halfKernel,int halfKernelSize,float sigma)87 static float make_unnormalized_half_kernel(float* halfKernel, int halfKernelSize, float sigma) {
88     const float invSigma = 1.0f / sigma;
89     const float b = -0.5f * invSigma * invSigma;
90     float tot = 0.0f;
91     // Compute half kernel values at half pixel steps out from the center.
92     float t = 0.5f;
93     for (int i = 0; i < halfKernelSize; ++i) {
94         float value = expf(t * t * b);
95         tot += value;
96         halfKernel[i] = value;
97         t += 1.0f;
98     }
99     return tot;
100 }
101 
102 // Create a Gaussian half-kernel (right side) and a summed area table given a sigma and number
103 // of discrete steps. The half kernel is normalized to sum to 0.5.
make_half_kernel_and_summed_table(float * halfKernel,float * summedHalfKernel,int halfKernelSize,float sigma)104 static void make_half_kernel_and_summed_table(float* halfKernel,
105                                               float* summedHalfKernel,
106                                               int halfKernelSize,
107                                               float sigma) {
108     // The half kernel should sum to 0.5 not 1.0.
109     const float tot = 2.0f * make_unnormalized_half_kernel(halfKernel, halfKernelSize, sigma);
110     float sum = 0.0f;
111     for (int i = 0; i < halfKernelSize; ++i) {
112         halfKernel[i] /= tot;
113         sum += halfKernel[i];
114         summedHalfKernel[i] = sum;
115     }
116 }
117 
118 // Applies the 1D half kernel vertically at points along the x axis to a circle centered at the
119 // origin with radius circleR.
apply_kernel_in_y(float * results,int numSteps,float firstX,float circleR,int halfKernelSize,const float * summedHalfKernelTable)120 static void apply_kernel_in_y(float* results,
121                               int numSteps,
122                               float firstX,
123                               float circleR,
124                               int halfKernelSize,
125                               const float* summedHalfKernelTable) {
126     float x = firstX;
127     for (int i = 0; i < numSteps; ++i, x += 1.0f) {
128         if (x < -circleR || x > circleR) {
129             results[i] = 0;
130             continue;
131         }
132         float y = sqrtf(circleR * circleR - x * x);
133         // In the column at x we exit the circle at +y and -y
134         // The summed table entry j is actually reflects an offset of j + 0.5.
135         y -= 0.5f;
136         int yInt = SkScalarFloorToInt(y);
137         SkASSERT(yInt >= -1);
138         if (y < 0) {
139             results[i] = (y + 0.5f) * summedHalfKernelTable[0];
140         } else if (yInt >= halfKernelSize - 1) {
141             results[i] = 0.5f;
142         } else {
143             float yFrac = y - yInt;
144             results[i] = (1.0f - yFrac) * summedHalfKernelTable[yInt] +
145                          yFrac * summedHalfKernelTable[yInt + 1];
146         }
147     }
148 }
149 
150 // Apply a Gaussian at point (evalX, 0) to a circle centered at the origin with radius circleR.
151 // This relies on having a half kernel computed for the Gaussian and a table of applications of
152 // the half kernel in y to columns at (evalX - halfKernel, evalX - halfKernel + 1, ..., evalX +
153 // halfKernel) passed in as yKernelEvaluations.
eval_at(float evalX,float circleR,const float * halfKernel,int halfKernelSize,const float * yKernelEvaluations)154 static uint8_t eval_at(float evalX,
155                        float circleR,
156                        const float* halfKernel,
157                        int halfKernelSize,
158                        const float* yKernelEvaluations) {
159     float acc = 0;
160 
161     float x = evalX - halfKernelSize;
162     for (int i = 0; i < halfKernelSize; ++i, x += 1.0f) {
163         if (x < -circleR || x > circleR) {
164             continue;
165         }
166         float verticalEval = yKernelEvaluations[i];
167         acc += verticalEval * halfKernel[halfKernelSize - i - 1];
168     }
169     for (int i = 0; i < halfKernelSize; ++i, x += 1.0f) {
170         if (x < -circleR || x > circleR) {
171             continue;
172         }
173         float verticalEval = yKernelEvaluations[i + halfKernelSize];
174         acc += verticalEval * halfKernel[i];
175     }
176     // Since we applied a half kernel in y we multiply acc by 2 (the circle is symmetric about
177     // the x axis).
178     return SkUnitScalarClampToByte(2.0f * acc);
179 }
180 
181 // This function creates a profile of a blurred circle. It does this by computing a kernel for
182 // half the Gaussian and a matching summed area table. The summed area table is used to compute
183 // an array of vertical applications of the half kernel to the circle along the x axis. The
184 // table of y evaluations has 2 * k + n entries where k is the size of the half kernel and n is
185 // the size of the profile being computed. Then for each of the n profile entries we walk out k
186 // steps in each horizontal direction multiplying the corresponding y evaluation by the half
187 // kernel entry and sum these values to compute the profile entry.
CreateCircleProfile(float sigma,float radius,int profileWidth)188 SkBitmap CreateCircleProfile(float sigma, float radius, int profileWidth) {
189     SkBitmap bitmap;
190     if (!bitmap.tryAllocPixels(SkImageInfo::MakeA8(profileWidth, 1))) {
191         return bitmap;
192     }
193 
194     uint8_t* profile = bitmap.getAddr8(0, 0);
195 
196     const int numSteps = profileWidth;
197 
198     // The full kernel is 6 sigmas wide.
199     int halfKernelSize = SkScalarCeilToInt(6.0f * sigma);
200     // Round up to next multiple of 2 and then divide by 2.
201     halfKernelSize = ((halfKernelSize + 1) & ~1) >> 1;
202 
203     // Number of x steps at which to apply kernel in y to cover all the profile samples in x.
204     const int numYSteps = numSteps + 2 * halfKernelSize;
205 
206     skia_private::AutoTArray<float> bulkAlloc(halfKernelSize + halfKernelSize + numYSteps);
207     float* halfKernel = bulkAlloc.get();
208     float* summedKernel = bulkAlloc.get() + halfKernelSize;
209     float* yEvals = bulkAlloc.get() + 2 * halfKernelSize;
210     make_half_kernel_and_summed_table(halfKernel, summedKernel, halfKernelSize, sigma);
211 
212     float firstX = -halfKernelSize + 0.5f;
213     apply_kernel_in_y(yEvals, numYSteps, firstX, radius, halfKernelSize, summedKernel);
214 
215     for (int i = 0; i < numSteps - 1; ++i) {
216         float evalX = i + 0.5f;
217         profile[i] = eval_at(evalX, radius, halfKernel, halfKernelSize, yEvals + i);
218     }
219     // Ensure the tail of the Gaussian goes to zero.
220     profile[numSteps - 1] = 0;
221 
222     bitmap.setImmutable();
223     return bitmap;
224 }
225 
CreateHalfPlaneProfile(int profileWidth)226 SkBitmap CreateHalfPlaneProfile(int profileWidth) {
227     SkASSERT(!(profileWidth & 0x1));
228 
229     SkBitmap bitmap;
230     if (!bitmap.tryAllocPixels(SkImageInfo::MakeA8(profileWidth, 1))) {
231         return bitmap;
232     }
233 
234     uint8_t* profile = bitmap.getAddr8(0, 0);
235 
236     // The full kernel is 6 sigmas wide.
237     const float sigma = profileWidth / 6.0f;
238     const int halfKernelSize = profileWidth / 2;
239 
240     skia_private::AutoTArray<float> halfKernel(halfKernelSize);
241 
242     // The half kernel should sum to 0.5.
243     const float tot = 2.0f * make_unnormalized_half_kernel(halfKernel.get(), halfKernelSize, sigma);
244     float sum = 0.0f;
245     // Populate the profile from the right edge to the middle.
246     for (int i = 0; i < halfKernelSize; ++i) {
247         halfKernel[halfKernelSize - i - 1] /= tot;
248         sum += halfKernel[halfKernelSize - i - 1];
249         profile[profileWidth - i - 1] = SkUnitScalarClampToByte(sum);
250     }
251     // Populate the profile from the middle to the left edge (by flipping the half kernel and
252     // continuing the summation).
253     for (int i = 0; i < halfKernelSize; ++i) {
254         sum += halfKernel[i];
255         profile[halfKernelSize - i - 1] = SkUnitScalarClampToByte(sum);
256     }
257     // Ensure the tail of the Gaussian goes to zero.
258     profile[profileWidth - 1] = 0;
259 
260     bitmap.setImmutable();
261     return bitmap;
262 }
263 
264 ///////////////////////////////////////////////////////////////////////////////
265 //  RRect Blur
266 ///////////////////////////////////////////////////////////////////////////////
267 
268 // Evaluate the vertical blur at the specified 'y' value given the location of the top of the
269 // rrect.
eval_V(float top,int y,const uint8_t * integral,int integralSize,float sixSigma)270 static uint8_t eval_V(float top, int y, const uint8_t* integral, int integralSize, float sixSigma) {
271     if (top < 0) {
272         return 0;  // an empty column
273     }
274 
275     float fT = (top - y - 0.5f) * (integralSize / sixSigma);
276     if (fT < 0) {
277         return 255;
278     } else if (fT >= integralSize - 1) {
279         return 0;
280     }
281 
282     int lower = (int)fT;
283     float frac = fT - lower;
284 
285     SkASSERT(lower + 1 < integralSize);
286 
287     return integral[lower] * (1.0f - frac) + integral[lower + 1] * frac;
288 }
289 
290 // Apply a gaussian 'kernel' horizontally at the specified 'x', 'y' location.
eval_H(int x,int y,const std::vector<float> & topVec,const float * kernel,int kernelSize,const uint8_t * integral,int integralSize,float sixSigma)291 static uint8_t eval_H(int x,
292                       int y,
293                       const std::vector<float>& topVec,
294                       const float* kernel,
295                       int kernelSize,
296                       const uint8_t* integral,
297                       int integralSize,
298                       float sixSigma) {
299     SkASSERT(0 <= x && x < (int)topVec.size());
300     SkASSERT(kernelSize % 2);
301 
302     float accum = 0.0f;
303 
304     int xSampleLoc = x - (kernelSize / 2);
305     for (int i = 0; i < kernelSize; ++i, ++xSampleLoc) {
306         if (xSampleLoc < 0 || xSampleLoc >= (int)topVec.size()) {
307             continue;
308         }
309 
310         accum += kernel[i] * eval_V(topVec[xSampleLoc], y, integral, integralSize, sixSigma);
311     }
312 
313     return accum + 0.5f;
314 }
315 
CreateRRectBlurMask(const SkRRect & rrectToDraw,const SkISize & dimensions,float sigma)316 SkBitmap CreateRRectBlurMask(const SkRRect& rrectToDraw, const SkISize& dimensions, float sigma) {
317     SkASSERT(!skgpu::BlurIsEffectivelyIdentity(sigma));
318     int radius = skgpu::BlurSigmaRadius(sigma);
319     int kernelSize = skgpu::BlurKernelWidth(radius);
320 
321     SkASSERT(kernelSize % 2);
322     SkASSERT(dimensions.width() % 2);
323     SkASSERT(dimensions.height() % 2);
324 
325     SkVector radii = rrectToDraw.getSimpleRadii();
326     SkASSERT(SkScalarNearlyEqual(radii.fX, radii.fY));
327 
328     const int halfWidthPlus1 = (dimensions.width() / 2) + 1;
329     const int halfHeightPlus1 = (dimensions.height() / 2) + 1;
330 
331     std::unique_ptr<float[]> kernel(new float[kernelSize]);
332     skgpu::Compute1DBlurKernel(sigma, radius, SkSpan<float>(kernel.get(), kernelSize));
333 
334     const int tableWidth = ComputeIntegralTableWidth(6.0f * sigma);
335     SkBitmap integral = CreateIntegralTable(tableWidth);
336     if (integral.empty()) {
337         return {};
338     }
339 
340     SkBitmap result;
341     if (!result.tryAllocPixels(SkImageInfo::MakeA8(dimensions.width(), dimensions.height()))) {
342         return {};
343     }
344 
345     std::vector<float> topVec;
346     topVec.reserve(dimensions.width());
347     for (int x = 0; x < dimensions.width(); ++x) {
348         if (x < rrectToDraw.rect().fLeft || x > rrectToDraw.rect().fRight) {
349             topVec.push_back(-1);
350         } else {
351             if (x + 0.5f < rrectToDraw.rect().fLeft + radii.fX) {  // in the circular section
352                 float xDist = rrectToDraw.rect().fLeft + radii.fX - x - 0.5f;
353                 float h = sqrtf(radii.fX * radii.fX - xDist * xDist);
354                 SkASSERT(0 <= h && h < radii.fY);
355                 topVec.push_back(rrectToDraw.rect().fTop + radii.fX - h + 3 * sigma);
356             } else {
357                 topVec.push_back(rrectToDraw.rect().fTop + 3 * sigma);
358             }
359         }
360     }
361 
362     for (int y = 0; y < halfHeightPlus1; ++y) {
363         uint8_t* scanline = result.getAddr8(0, y);
364 
365         for (int x = 0; x < halfWidthPlus1; ++x) {
366             scanline[x] = eval_H(x,
367                                  y,
368                                  topVec,
369                                  kernel.get(),
370                                  kernelSize,
371                                  integral.getAddr8(0, 0),
372                                  integral.width(),
373                                  6.0f * sigma);
374             scanline[dimensions.width() - x - 1] = scanline[x];
375         }
376 
377         memcpy(result.getAddr8(0, dimensions.height() - y - 1), scanline, result.rowBytes());
378     }
379 
380     result.setImmutable();
381     return result;
382 }
383 
384 } // namespace skgpu
385