1 /*
2 * Copyright 2023 Google LLC
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7 #include "src/gpu/BlurUtils.h"
8
9 #include "include/core/SkBitmap.h"
10 #include "include/core/SkColorPriv.h"
11 #include "include/core/SkImageInfo.h"
12 #include "include/core/SkRRect.h"
13 #include "include/core/SkRect.h"
14 #include "include/core/SkScalar.h"
15 #include "include/core/SkSize.h"
16 #include "include/private/base/SkAssert.h"
17 #include "include/private/base/SkFloatingPoint.h"
18 #include "include/private/base/SkMath.h"
19 #include "include/private/base/SkPoint_impl.h"
20 #include "include/private/base/SkTemplates.h"
21 #include "include/private/base/SkTo.h"
22 #include "src/base/SkMathPriv.h"
23
24 #include <algorithm>
25 #include <cmath>
26 #include <cstdint>
27 #include <cstring>
28 #include <memory>
29 #include <vector>
30
31 namespace skgpu {
32
33 ///////////////////////////////////////////////////////////////////////////////
34 // Rect Blur
35 ///////////////////////////////////////////////////////////////////////////////
36
37 // TODO: it seems like there should be some synergy with SkBlurMask::ComputeBlurProfile
CreateIntegralTable(int width)38 SkBitmap CreateIntegralTable(int width) {
39 SkBitmap table;
40
41 if (width <= 0) {
42 return table;
43 }
44
45 if (!table.tryAllocPixels(SkImageInfo::MakeA8(width, 1))) {
46 return table;
47 }
48 *table.getAddr8(0, 0) = 255;
49 const float invWidth = 1.f / width;
50 for (int i = 1; i < width - 1; ++i) {
51 float x = (i + 0.5f) * invWidth;
52 x = (-6 * x + 3) * SK_ScalarRoot2Over2;
53 float integral = 0.5f * (std::erf(x) + 1.f);
54 *table.getAddr8(i, 0) = SkToU8(sk_float_round2int(255.f * integral));
55 }
56
57 *table.getAddr8(width - 1, 0) = 0;
58 table.setImmutable();
59 return table;
60 }
61
ComputeIntegralTableWidth(float sixSigma)62 int ComputeIntegralTableWidth(float sixSigma) {
63 // Check for NaN/infinity
64 if (!SkIsFinite(sixSigma)) {
65 return 0;
66 }
67 // Avoid overflow, covers both multiplying by 2 and finding next power of 2:
68 // 2*((2^31-1)/4 + 1) = 2*(2^29-1) + 2 = 2^30 and SkNextPow2(2^30) = 2^30
69 if (sixSigma > SK_MaxS32 / 4 + 1) {
70 return 0;
71 }
72 // The texture we're producing represents the integral of a normal distribution over a
73 // six-sigma range centered at zero. We want enough resolution so that the linear
74 // interpolation done in texture lookup doesn't introduce noticeable artifacts. We
75 // conservatively choose to have 2 texels for each dst pixel.
76 int minWidth = 2 * ((int)std::ceil(sixSigma));
77 // Bin by powers of 2 with a minimum so we get good profile reuse.
78 return std::max(SkNextPow2(minWidth), 32);
79 }
80
81 ///////////////////////////////////////////////////////////////////////////////
82 // Circle Blur
83 ///////////////////////////////////////////////////////////////////////////////
84
85 // Computes an unnormalized half kernel (right side). Returns the summation of all the half
86 // kernel values.
make_unnormalized_half_kernel(float * halfKernel,int halfKernelSize,float sigma)87 static float make_unnormalized_half_kernel(float* halfKernel, int halfKernelSize, float sigma) {
88 const float invSigma = 1.0f / sigma;
89 const float b = -0.5f * invSigma * invSigma;
90 float tot = 0.0f;
91 // Compute half kernel values at half pixel steps out from the center.
92 float t = 0.5f;
93 for (int i = 0; i < halfKernelSize; ++i) {
94 float value = expf(t * t * b);
95 tot += value;
96 halfKernel[i] = value;
97 t += 1.0f;
98 }
99 return tot;
100 }
101
102 // Create a Gaussian half-kernel (right side) and a summed area table given a sigma and number
103 // of discrete steps. The half kernel is normalized to sum to 0.5.
make_half_kernel_and_summed_table(float * halfKernel,float * summedHalfKernel,int halfKernelSize,float sigma)104 static void make_half_kernel_and_summed_table(float* halfKernel,
105 float* summedHalfKernel,
106 int halfKernelSize,
107 float sigma) {
108 // The half kernel should sum to 0.5 not 1.0.
109 const float tot = 2.0f * make_unnormalized_half_kernel(halfKernel, halfKernelSize, sigma);
110 float sum = 0.0f;
111 for (int i = 0; i < halfKernelSize; ++i) {
112 halfKernel[i] /= tot;
113 sum += halfKernel[i];
114 summedHalfKernel[i] = sum;
115 }
116 }
117
118 // Applies the 1D half kernel vertically at points along the x axis to a circle centered at the
119 // origin with radius circleR.
apply_kernel_in_y(float * results,int numSteps,float firstX,float circleR,int halfKernelSize,const float * summedHalfKernelTable)120 static void apply_kernel_in_y(float* results,
121 int numSteps,
122 float firstX,
123 float circleR,
124 int halfKernelSize,
125 const float* summedHalfKernelTable) {
126 float x = firstX;
127 for (int i = 0; i < numSteps; ++i, x += 1.0f) {
128 if (x < -circleR || x > circleR) {
129 results[i] = 0;
130 continue;
131 }
132 float y = sqrtf(circleR * circleR - x * x);
133 // In the column at x we exit the circle at +y and -y
134 // The summed table entry j is actually reflects an offset of j + 0.5.
135 y -= 0.5f;
136 int yInt = SkScalarFloorToInt(y);
137 SkASSERT(yInt >= -1);
138 if (y < 0) {
139 results[i] = (y + 0.5f) * summedHalfKernelTable[0];
140 } else if (yInt >= halfKernelSize - 1) {
141 results[i] = 0.5f;
142 } else {
143 float yFrac = y - yInt;
144 results[i] = (1.0f - yFrac) * summedHalfKernelTable[yInt] +
145 yFrac * summedHalfKernelTable[yInt + 1];
146 }
147 }
148 }
149
150 // Apply a Gaussian at point (evalX, 0) to a circle centered at the origin with radius circleR.
151 // This relies on having a half kernel computed for the Gaussian and a table of applications of
152 // the half kernel in y to columns at (evalX - halfKernel, evalX - halfKernel + 1, ..., evalX +
153 // halfKernel) passed in as yKernelEvaluations.
eval_at(float evalX,float circleR,const float * halfKernel,int halfKernelSize,const float * yKernelEvaluations)154 static uint8_t eval_at(float evalX,
155 float circleR,
156 const float* halfKernel,
157 int halfKernelSize,
158 const float* yKernelEvaluations) {
159 float acc = 0;
160
161 float x = evalX - halfKernelSize;
162 for (int i = 0; i < halfKernelSize; ++i, x += 1.0f) {
163 if (x < -circleR || x > circleR) {
164 continue;
165 }
166 float verticalEval = yKernelEvaluations[i];
167 acc += verticalEval * halfKernel[halfKernelSize - i - 1];
168 }
169 for (int i = 0; i < halfKernelSize; ++i, x += 1.0f) {
170 if (x < -circleR || x > circleR) {
171 continue;
172 }
173 float verticalEval = yKernelEvaluations[i + halfKernelSize];
174 acc += verticalEval * halfKernel[i];
175 }
176 // Since we applied a half kernel in y we multiply acc by 2 (the circle is symmetric about
177 // the x axis).
178 return SkUnitScalarClampToByte(2.0f * acc);
179 }
180
181 // This function creates a profile of a blurred circle. It does this by computing a kernel for
182 // half the Gaussian and a matching summed area table. The summed area table is used to compute
183 // an array of vertical applications of the half kernel to the circle along the x axis. The
184 // table of y evaluations has 2 * k + n entries where k is the size of the half kernel and n is
185 // the size of the profile being computed. Then for each of the n profile entries we walk out k
186 // steps in each horizontal direction multiplying the corresponding y evaluation by the half
187 // kernel entry and sum these values to compute the profile entry.
CreateCircleProfile(float sigma,float radius,int profileWidth)188 SkBitmap CreateCircleProfile(float sigma, float radius, int profileWidth) {
189 SkBitmap bitmap;
190 if (!bitmap.tryAllocPixels(SkImageInfo::MakeA8(profileWidth, 1))) {
191 return bitmap;
192 }
193
194 uint8_t* profile = bitmap.getAddr8(0, 0);
195
196 const int numSteps = profileWidth;
197
198 // The full kernel is 6 sigmas wide.
199 int halfKernelSize = SkScalarCeilToInt(6.0f * sigma);
200 // Round up to next multiple of 2 and then divide by 2.
201 halfKernelSize = ((halfKernelSize + 1) & ~1) >> 1;
202
203 // Number of x steps at which to apply kernel in y to cover all the profile samples in x.
204 const int numYSteps = numSteps + 2 * halfKernelSize;
205
206 skia_private::AutoTArray<float> bulkAlloc(halfKernelSize + halfKernelSize + numYSteps);
207 float* halfKernel = bulkAlloc.get();
208 float* summedKernel = bulkAlloc.get() + halfKernelSize;
209 float* yEvals = bulkAlloc.get() + 2 * halfKernelSize;
210 make_half_kernel_and_summed_table(halfKernel, summedKernel, halfKernelSize, sigma);
211
212 float firstX = -halfKernelSize + 0.5f;
213 apply_kernel_in_y(yEvals, numYSteps, firstX, radius, halfKernelSize, summedKernel);
214
215 for (int i = 0; i < numSteps - 1; ++i) {
216 float evalX = i + 0.5f;
217 profile[i] = eval_at(evalX, radius, halfKernel, halfKernelSize, yEvals + i);
218 }
219 // Ensure the tail of the Gaussian goes to zero.
220 profile[numSteps - 1] = 0;
221
222 bitmap.setImmutable();
223 return bitmap;
224 }
225
CreateHalfPlaneProfile(int profileWidth)226 SkBitmap CreateHalfPlaneProfile(int profileWidth) {
227 SkASSERT(!(profileWidth & 0x1));
228
229 SkBitmap bitmap;
230 if (!bitmap.tryAllocPixels(SkImageInfo::MakeA8(profileWidth, 1))) {
231 return bitmap;
232 }
233
234 uint8_t* profile = bitmap.getAddr8(0, 0);
235
236 // The full kernel is 6 sigmas wide.
237 const float sigma = profileWidth / 6.0f;
238 const int halfKernelSize = profileWidth / 2;
239
240 skia_private::AutoTArray<float> halfKernel(halfKernelSize);
241
242 // The half kernel should sum to 0.5.
243 const float tot = 2.0f * make_unnormalized_half_kernel(halfKernel.get(), halfKernelSize, sigma);
244 float sum = 0.0f;
245 // Populate the profile from the right edge to the middle.
246 for (int i = 0; i < halfKernelSize; ++i) {
247 halfKernel[halfKernelSize - i - 1] /= tot;
248 sum += halfKernel[halfKernelSize - i - 1];
249 profile[profileWidth - i - 1] = SkUnitScalarClampToByte(sum);
250 }
251 // Populate the profile from the middle to the left edge (by flipping the half kernel and
252 // continuing the summation).
253 for (int i = 0; i < halfKernelSize; ++i) {
254 sum += halfKernel[i];
255 profile[halfKernelSize - i - 1] = SkUnitScalarClampToByte(sum);
256 }
257 // Ensure the tail of the Gaussian goes to zero.
258 profile[profileWidth - 1] = 0;
259
260 bitmap.setImmutable();
261 return bitmap;
262 }
263
264 ///////////////////////////////////////////////////////////////////////////////
265 // RRect Blur
266 ///////////////////////////////////////////////////////////////////////////////
267
268 // Evaluate the vertical blur at the specified 'y' value given the location of the top of the
269 // rrect.
eval_V(float top,int y,const uint8_t * integral,int integralSize,float sixSigma)270 static uint8_t eval_V(float top, int y, const uint8_t* integral, int integralSize, float sixSigma) {
271 if (top < 0) {
272 return 0; // an empty column
273 }
274
275 float fT = (top - y - 0.5f) * (integralSize / sixSigma);
276 if (fT < 0) {
277 return 255;
278 } else if (fT >= integralSize - 1) {
279 return 0;
280 }
281
282 int lower = (int)fT;
283 float frac = fT - lower;
284
285 SkASSERT(lower + 1 < integralSize);
286
287 return integral[lower] * (1.0f - frac) + integral[lower + 1] * frac;
288 }
289
290 // Apply a gaussian 'kernel' horizontally at the specified 'x', 'y' location.
eval_H(int x,int y,const std::vector<float> & topVec,const float * kernel,int kernelSize,const uint8_t * integral,int integralSize,float sixSigma)291 static uint8_t eval_H(int x,
292 int y,
293 const std::vector<float>& topVec,
294 const float* kernel,
295 int kernelSize,
296 const uint8_t* integral,
297 int integralSize,
298 float sixSigma) {
299 SkASSERT(0 <= x && x < (int)topVec.size());
300 SkASSERT(kernelSize % 2);
301
302 float accum = 0.0f;
303
304 int xSampleLoc = x - (kernelSize / 2);
305 for (int i = 0; i < kernelSize; ++i, ++xSampleLoc) {
306 if (xSampleLoc < 0 || xSampleLoc >= (int)topVec.size()) {
307 continue;
308 }
309
310 accum += kernel[i] * eval_V(topVec[xSampleLoc], y, integral, integralSize, sixSigma);
311 }
312
313 return accum + 0.5f;
314 }
315
CreateRRectBlurMask(const SkRRect & rrectToDraw,const SkISize & dimensions,float sigma)316 SkBitmap CreateRRectBlurMask(const SkRRect& rrectToDraw, const SkISize& dimensions, float sigma) {
317 SkASSERT(!skgpu::BlurIsEffectivelyIdentity(sigma));
318 int radius = skgpu::BlurSigmaRadius(sigma);
319 int kernelSize = skgpu::BlurKernelWidth(radius);
320
321 SkASSERT(kernelSize % 2);
322 SkASSERT(dimensions.width() % 2);
323 SkASSERT(dimensions.height() % 2);
324
325 SkVector radii = rrectToDraw.getSimpleRadii();
326 SkASSERT(SkScalarNearlyEqual(radii.fX, radii.fY));
327
328 const int halfWidthPlus1 = (dimensions.width() / 2) + 1;
329 const int halfHeightPlus1 = (dimensions.height() / 2) + 1;
330
331 std::unique_ptr<float[]> kernel(new float[kernelSize]);
332 skgpu::Compute1DBlurKernel(sigma, radius, SkSpan<float>(kernel.get(), kernelSize));
333
334 const int tableWidth = ComputeIntegralTableWidth(6.0f * sigma);
335 SkBitmap integral = CreateIntegralTable(tableWidth);
336 if (integral.empty()) {
337 return {};
338 }
339
340 SkBitmap result;
341 if (!result.tryAllocPixels(SkImageInfo::MakeA8(dimensions.width(), dimensions.height()))) {
342 return {};
343 }
344
345 std::vector<float> topVec;
346 topVec.reserve(dimensions.width());
347 for (int x = 0; x < dimensions.width(); ++x) {
348 if (x < rrectToDraw.rect().fLeft || x > rrectToDraw.rect().fRight) {
349 topVec.push_back(-1);
350 } else {
351 if (x + 0.5f < rrectToDraw.rect().fLeft + radii.fX) { // in the circular section
352 float xDist = rrectToDraw.rect().fLeft + radii.fX - x - 0.5f;
353 float h = sqrtf(radii.fX * radii.fX - xDist * xDist);
354 SkASSERT(0 <= h && h < radii.fY);
355 topVec.push_back(rrectToDraw.rect().fTop + radii.fX - h + 3 * sigma);
356 } else {
357 topVec.push_back(rrectToDraw.rect().fTop + 3 * sigma);
358 }
359 }
360 }
361
362 for (int y = 0; y < halfHeightPlus1; ++y) {
363 uint8_t* scanline = result.getAddr8(0, y);
364
365 for (int x = 0; x < halfWidthPlus1; ++x) {
366 scanline[x] = eval_H(x,
367 y,
368 topVec,
369 kernel.get(),
370 kernelSize,
371 integral.getAddr8(0, 0),
372 integral.width(),
373 6.0f * sigma);
374 scanline[dimensions.width() - x - 1] = scanline[x];
375 }
376
377 memcpy(result.getAddr8(0, dimensions.height() - y - 1), scanline, result.rowBytes());
378 }
379
380 result.setImmutable();
381 return result;
382 }
383
384 } // namespace skgpu
385