xref: /aosp_15_r20/frameworks/rs/toolkit/Convolve3x3.cpp (revision e1eccf28f96817838ad6867f7f39d2351ec11f56)
1*e1eccf28SAndroid Build Coastguard Worker /*
2*e1eccf28SAndroid Build Coastguard Worker  * Copyright (C) 2012 The Android Open Source Project
3*e1eccf28SAndroid Build Coastguard Worker  *
4*e1eccf28SAndroid Build Coastguard Worker  * Licensed under the Apache License, Version 2.0 (the "License");
5*e1eccf28SAndroid Build Coastguard Worker  * you may not use this file except in compliance with the License.
6*e1eccf28SAndroid Build Coastguard Worker  * You may obtain a copy of the License at
7*e1eccf28SAndroid Build Coastguard Worker  *
8*e1eccf28SAndroid Build Coastguard Worker  *      http://www.apache.org/licenses/LICENSE-2.0
9*e1eccf28SAndroid Build Coastguard Worker  *
10*e1eccf28SAndroid Build Coastguard Worker  * Unless required by applicable law or agreed to in writing, software
11*e1eccf28SAndroid Build Coastguard Worker  * distributed under the License is distributed on an "AS IS" BASIS,
12*e1eccf28SAndroid Build Coastguard Worker  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13*e1eccf28SAndroid Build Coastguard Worker  * See the License for the specific language governing permissions and
14*e1eccf28SAndroid Build Coastguard Worker  * limitations under the License.
15*e1eccf28SAndroid Build Coastguard Worker  */
16*e1eccf28SAndroid Build Coastguard Worker 
17*e1eccf28SAndroid Build Coastguard Worker #include <cstdint>
18*e1eccf28SAndroid Build Coastguard Worker 
19*e1eccf28SAndroid Build Coastguard Worker #include "RenderScriptToolkit.h"
20*e1eccf28SAndroid Build Coastguard Worker #include "TaskProcessor.h"
21*e1eccf28SAndroid Build Coastguard Worker #include "Utils.h"
22*e1eccf28SAndroid Build Coastguard Worker 
23*e1eccf28SAndroid Build Coastguard Worker #define LOG_TAG "renderscript.toolkit.Convolve3x3"
24*e1eccf28SAndroid Build Coastguard Worker 
25*e1eccf28SAndroid Build Coastguard Worker namespace android {
26*e1eccf28SAndroid Build Coastguard Worker namespace renderscript {
27*e1eccf28SAndroid Build Coastguard Worker 
28*e1eccf28SAndroid Build Coastguard Worker extern "C" void rsdIntrinsicConvolve3x3_K(void* dst, const void* y0, const void* y1, const void* y2,
29*e1eccf28SAndroid Build Coastguard Worker                                           const int16_t* coef, uint32_t count);
30*e1eccf28SAndroid Build Coastguard Worker 
31*e1eccf28SAndroid Build Coastguard Worker class Convolve3x3Task : public Task {
32*e1eccf28SAndroid Build Coastguard Worker     const void* mIn;
33*e1eccf28SAndroid Build Coastguard Worker     void* mOut;
34*e1eccf28SAndroid Build Coastguard Worker     // Even though we have exactly 9 coefficients, store them in an array of size 16 so that
35*e1eccf28SAndroid Build Coastguard Worker     // the SIMD instructions can load them in chunks multiple of 8.
36*e1eccf28SAndroid Build Coastguard Worker     float mFp[16];
37*e1eccf28SAndroid Build Coastguard Worker     int16_t mIp[16];
38*e1eccf28SAndroid Build Coastguard Worker 
39*e1eccf28SAndroid Build Coastguard Worker     void kernelU4(uchar* out, uint32_t xstart, uint32_t xend, const uchar* py0, const uchar* py1,
40*e1eccf28SAndroid Build Coastguard Worker                   const uchar* py2);
41*e1eccf28SAndroid Build Coastguard Worker     void convolveU4(const uchar* pin, uchar* pout, size_t vectorSize, size_t sizeX, size_t sizeY,
42*e1eccf28SAndroid Build Coastguard Worker                     size_t startX, size_t startY, size_t endX, size_t endY);
43*e1eccf28SAndroid Build Coastguard Worker 
44*e1eccf28SAndroid Build Coastguard Worker     // Process a 2D tile of the overall work. threadIndex identifies which thread does the work.
45*e1eccf28SAndroid Build Coastguard Worker     virtual void processData(int threadIndex, size_t startX, size_t startY, size_t endX,
46*e1eccf28SAndroid Build Coastguard Worker                              size_t endY) override;
47*e1eccf28SAndroid Build Coastguard Worker 
48*e1eccf28SAndroid Build Coastguard Worker    public:
Convolve3x3Task(const void * in,void * out,size_t vectorSize,size_t sizeX,size_t sizeY,const float * coefficients,const Restriction * restriction)49*e1eccf28SAndroid Build Coastguard Worker     Convolve3x3Task(const void* in, void* out, size_t vectorSize, size_t sizeX, size_t sizeY,
50*e1eccf28SAndroid Build Coastguard Worker                     const float* coefficients, const Restriction* restriction)
51*e1eccf28SAndroid Build Coastguard Worker         : Task{sizeX, sizeY, vectorSize, false, restriction}, mIn{in}, mOut{out} {
52*e1eccf28SAndroid Build Coastguard Worker         for (int ct = 0; ct < 9; ct++) {
53*e1eccf28SAndroid Build Coastguard Worker             mFp[ct] = coefficients[ct];
54*e1eccf28SAndroid Build Coastguard Worker             if (mFp[ct] >= 0) {
55*e1eccf28SAndroid Build Coastguard Worker                 mIp[ct] = (int16_t)(mFp[ct] * 256.f + 0.5f);
56*e1eccf28SAndroid Build Coastguard Worker             } else {
57*e1eccf28SAndroid Build Coastguard Worker                 mIp[ct] = (int16_t)(mFp[ct] * 256.f - 0.5f);
58*e1eccf28SAndroid Build Coastguard Worker             }
59*e1eccf28SAndroid Build Coastguard Worker         }
60*e1eccf28SAndroid Build Coastguard Worker     }
61*e1eccf28SAndroid Build Coastguard Worker };
62*e1eccf28SAndroid Build Coastguard Worker 
63*e1eccf28SAndroid Build Coastguard Worker /**
64*e1eccf28SAndroid Build Coastguard Worker  * Computes one convolution and stores the result in the output. This is used for uchar, uchar2,
65*e1eccf28SAndroid Build Coastguard Worker  * uchar3, and uchar4 vectors.
66*e1eccf28SAndroid Build Coastguard Worker  *
67*e1eccf28SAndroid Build Coastguard Worker  * @tparam InputOutputType Type of the input and output arrays. A vector type, e.g. uchar4.
68*e1eccf28SAndroid Build Coastguard Worker  * @tparam ComputationType Type we use for the intermediate computations.
69*e1eccf28SAndroid Build Coastguard Worker  * @param x The index in the row of the value we'll convolve.
70*e1eccf28SAndroid Build Coastguard Worker  * @param out The location in the output array where we store the value.
71*e1eccf28SAndroid Build Coastguard Worker  * @param py0 The start of the top row.
72*e1eccf28SAndroid Build Coastguard Worker  * @param py1 The start of the middle row.
73*e1eccf28SAndroid Build Coastguard Worker  * @param py2 The start of the bottom row.
74*e1eccf28SAndroid Build Coastguard Worker  * @param coeff Pointer to the float coefficients, in row major format.
75*e1eccf28SAndroid Build Coastguard Worker  * @param sizeX The number of cells of one row.
76*e1eccf28SAndroid Build Coastguard Worker  */
77*e1eccf28SAndroid Build Coastguard Worker template <typename InputOutputType, typename ComputationType>
convolveOneU(uint32_t x,InputOutputType * out,const InputOutputType * py0,const InputOutputType * py1,const InputOutputType * py2,const float * coeff,int32_t sizeX)78*e1eccf28SAndroid Build Coastguard Worker static void convolveOneU(uint32_t x, InputOutputType* out, const InputOutputType* py0,
79*e1eccf28SAndroid Build Coastguard Worker                          const InputOutputType* py1, const InputOutputType* py2, const float* coeff,
80*e1eccf28SAndroid Build Coastguard Worker                          int32_t sizeX) {
81*e1eccf28SAndroid Build Coastguard Worker     uint32_t x1 = std::max((int32_t)x - 1, 0);
82*e1eccf28SAndroid Build Coastguard Worker     uint32_t x2 = std::min((int32_t)x + 1, sizeX - 1);
83*e1eccf28SAndroid Build Coastguard Worker 
84*e1eccf28SAndroid Build Coastguard Worker     ComputationType px = convert<ComputationType>(py0[x1]) * coeff[0] +
85*e1eccf28SAndroid Build Coastguard Worker                          convert<ComputationType>(py0[x]) * coeff[1] +
86*e1eccf28SAndroid Build Coastguard Worker                          convert<ComputationType>(py0[x2]) * coeff[2] +
87*e1eccf28SAndroid Build Coastguard Worker                          convert<ComputationType>(py1[x1]) * coeff[3] +
88*e1eccf28SAndroid Build Coastguard Worker                          convert<ComputationType>(py1[x]) * coeff[4] +
89*e1eccf28SAndroid Build Coastguard Worker                          convert<ComputationType>(py1[x2]) * coeff[5] +
90*e1eccf28SAndroid Build Coastguard Worker                          convert<ComputationType>(py2[x1]) * coeff[6] +
91*e1eccf28SAndroid Build Coastguard Worker                          convert<ComputationType>(py2[x]) * coeff[7] +
92*e1eccf28SAndroid Build Coastguard Worker                          convert<ComputationType>(py2[x2]) * coeff[8];
93*e1eccf28SAndroid Build Coastguard Worker 
94*e1eccf28SAndroid Build Coastguard Worker     px = clamp(px + 0.5f, 0.f, 255.f);
95*e1eccf28SAndroid Build Coastguard Worker     *out = convert<InputOutputType>(px);
96*e1eccf28SAndroid Build Coastguard Worker }
97*e1eccf28SAndroid Build Coastguard Worker 
98*e1eccf28SAndroid Build Coastguard Worker #ifdef ANDROID_RENDERSCRIPT_TOOLKIT_SUPPORTS_FLOAT
99*e1eccf28SAndroid Build Coastguard Worker /**
100*e1eccf28SAndroid Build Coastguard Worker  * Computes one convolution and stores the result in the output. This is used for float, float2,
101*e1eccf28SAndroid Build Coastguard Worker  * float3, and float4 vectors.
102*e1eccf28SAndroid Build Coastguard Worker  *
103*e1eccf28SAndroid Build Coastguard Worker  * @tparam InputOutputType Type of the input and output arrays. A vector type, e.g. float4.
104*e1eccf28SAndroid Build Coastguard Worker  * @param x The index in the row of the value we'll convolve.
105*e1eccf28SAndroid Build Coastguard Worker  * @param out The location in the output array where we store the value.
106*e1eccf28SAndroid Build Coastguard Worker  * @param py0 The start of the top row.
107*e1eccf28SAndroid Build Coastguard Worker  * @param py1 The start of the middle row.
108*e1eccf28SAndroid Build Coastguard Worker  * @param py2 The start of the bottom row.
109*e1eccf28SAndroid Build Coastguard Worker  * @param coeff Pointer to the float coefficients, in row major format.
110*e1eccf28SAndroid Build Coastguard Worker  * @param sizeX The number of cells of one row.
111*e1eccf28SAndroid Build Coastguard Worker  */
112*e1eccf28SAndroid Build Coastguard Worker template <typename InputOutputType>
ConvolveOneF(uint32_t x,InputOutputType * out,const InputOutputType * py0,const InputOutputType * py1,const InputOutputType * py2,const float * coeff,int32_t sizeX)113*e1eccf28SAndroid Build Coastguard Worker static void ConvolveOneF(uint32_t x, InputOutputType* out, const InputOutputType* py0,
114*e1eccf28SAndroid Build Coastguard Worker                          const InputOutputType* py1, const InputOutputType* py2, const float* coeff,
115*e1eccf28SAndroid Build Coastguard Worker                          int32_t sizeX) {
116*e1eccf28SAndroid Build Coastguard Worker     uint32_t x1 = std::max((int32_t)x - 1, 0);
117*e1eccf28SAndroid Build Coastguard Worker     uint32_t x2 = std::min((int32_t)x + 1, sizeX - 1);
118*e1eccf28SAndroid Build Coastguard Worker     *out = (py0[x1] * coeff[0]) + (py0[x] * coeff[1]) + (py0[x2] * coeff[2]) +
119*e1eccf28SAndroid Build Coastguard Worker            (py1[x1] * coeff[3]) + (py1[x] * coeff[4]) + (py1[x2] * coeff[5]) +
120*e1eccf28SAndroid Build Coastguard Worker            (py2[x1] * coeff[6]) + (py2[x] * coeff[7]) + (py2[x2] * coeff[8]);
121*e1eccf28SAndroid Build Coastguard Worker }
122*e1eccf28SAndroid Build Coastguard Worker #endif  // ANDROID_RENDERSCRIPT_TOOLKIT_SUPPORTS_FLOAT
123*e1eccf28SAndroid Build Coastguard Worker 
124*e1eccf28SAndroid Build Coastguard Worker /**
125*e1eccf28SAndroid Build Coastguard Worker  * This function convolves one line.
126*e1eccf28SAndroid Build Coastguard Worker  *
127*e1eccf28SAndroid Build Coastguard Worker  * @param pout Where to place the next output.
128*e1eccf28SAndroid Build Coastguard Worker  * @param xstart Index in the X direction of where to start.
129*e1eccf28SAndroid Build Coastguard Worker  * @param xend End index
130*e1eccf28SAndroid Build Coastguard Worker  * @param ppy0 Points to the start of the previous line.
131*e1eccf28SAndroid Build Coastguard Worker  * @param ppy1 Points to the start of the current line.
132*e1eccf28SAndroid Build Coastguard Worker  * @param ppy2 Points to the start of the next line.
133*e1eccf28SAndroid Build Coastguard Worker  */
kernelU4(uchar * pout,uint32_t xstart,uint32_t xend,const uchar * ppy0,const uchar * ppy1,const uchar * ppy2)134*e1eccf28SAndroid Build Coastguard Worker void Convolve3x3Task::kernelU4(uchar* pout, uint32_t xstart, uint32_t xend, const uchar* ppy0,
135*e1eccf28SAndroid Build Coastguard Worker                                const uchar* ppy1, const uchar* ppy2) {
136*e1eccf28SAndroid Build Coastguard Worker     uchar4* out = (uchar4*)pout;
137*e1eccf28SAndroid Build Coastguard Worker     const uchar4* py0 = (const uchar4*)ppy0;
138*e1eccf28SAndroid Build Coastguard Worker     const uchar4* py1 = (const uchar4*)ppy1;
139*e1eccf28SAndroid Build Coastguard Worker     const uchar4* py2 = (const uchar4*)ppy2;
140*e1eccf28SAndroid Build Coastguard Worker 
141*e1eccf28SAndroid Build Coastguard Worker     uint32_t x1 = xstart;
142*e1eccf28SAndroid Build Coastguard Worker     uint32_t x2 = xend;
143*e1eccf28SAndroid Build Coastguard Worker     if (x1 == 0) {
144*e1eccf28SAndroid Build Coastguard Worker         convolveOneU<uchar4, float4>(0, out, py0, py1, py2, mFp, mSizeX);
145*e1eccf28SAndroid Build Coastguard Worker         x1++;
146*e1eccf28SAndroid Build Coastguard Worker         out++;
147*e1eccf28SAndroid Build Coastguard Worker     }
148*e1eccf28SAndroid Build Coastguard Worker 
149*e1eccf28SAndroid Build Coastguard Worker     if (x2 > x1) {
150*e1eccf28SAndroid Build Coastguard Worker #if defined(ARCH_ARM_USE_INTRINSICS) || defined(ARCH_X86_HAVE_SSSE3)
151*e1eccf28SAndroid Build Coastguard Worker         if (mUsesSimd) {
152*e1eccf28SAndroid Build Coastguard Worker             int32_t len = (x2 - x1 - 1) >> 1;
153*e1eccf28SAndroid Build Coastguard Worker             if (len > 0) {
154*e1eccf28SAndroid Build Coastguard Worker                 rsdIntrinsicConvolve3x3_K(out, &py0[x1 - 1], &py1[x1 - 1], &py2[x1 - 1], mIp, len);
155*e1eccf28SAndroid Build Coastguard Worker                 x1 += len << 1;
156*e1eccf28SAndroid Build Coastguard Worker                 out += len << 1;
157*e1eccf28SAndroid Build Coastguard Worker             }
158*e1eccf28SAndroid Build Coastguard Worker         }
159*e1eccf28SAndroid Build Coastguard Worker #endif
160*e1eccf28SAndroid Build Coastguard Worker 
161*e1eccf28SAndroid Build Coastguard Worker         while (x1 != x2) {
162*e1eccf28SAndroid Build Coastguard Worker             convolveOneU<uchar4, float4>(x1, out, py0, py1, py2, mFp, mSizeX);
163*e1eccf28SAndroid Build Coastguard Worker             out++;
164*e1eccf28SAndroid Build Coastguard Worker             x1++;
165*e1eccf28SAndroid Build Coastguard Worker         }
166*e1eccf28SAndroid Build Coastguard Worker     }
167*e1eccf28SAndroid Build Coastguard Worker }
168*e1eccf28SAndroid Build Coastguard Worker 
169*e1eccf28SAndroid Build Coastguard Worker #ifdef ANDROID_RENDERSCRIPT_TOOLKIT_SUPPORTS_FLOAT
170*e1eccf28SAndroid Build Coastguard Worker template <typename T>
RsdCpuScriptIntrinsicConvolve3x3_kernelF(void * in,T * out,uint32_t xstart,uint32_t xend,uint32_t currentY,size_t sizeX,size_t sizeY,size_t vectorSize,float * fp)171*e1eccf28SAndroid Build Coastguard Worker void RsdCpuScriptIntrinsicConvolve3x3_kernelF(void* in, T* out, uint32_t xstart, uint32_t xend,
172*e1eccf28SAndroid Build Coastguard Worker                                               uint32_t currentY, size_t sizeX, size_t sizeY,
173*e1eccf28SAndroid Build Coastguard Worker                                               size_t vectorSize, float* fp) {
174*e1eccf28SAndroid Build Coastguard Worker     const uchar* pin = (const uchar*)in;
175*e1eccf28SAndroid Build Coastguard Worker     const size_t stride = sizeX * vectorSize * 4;  // float takes 4 bytes
176*e1eccf28SAndroid Build Coastguard Worker 
177*e1eccf28SAndroid Build Coastguard Worker     uint32_t y1 = std::min((int32_t)currentY + 1, (int32_t)(sizeY - 1));
178*e1eccf28SAndroid Build Coastguard Worker     uint32_t y2 = std::max((int32_t)currentY - 1, 0);
179*e1eccf28SAndroid Build Coastguard Worker     const T* py0 = (const T*)(pin + stride * y2);
180*e1eccf28SAndroid Build Coastguard Worker     const T* py1 = (const T*)(pin + stride * currentY);
181*e1eccf28SAndroid Build Coastguard Worker     const T* py2 = (const T*)(pin + stride * y1);
182*e1eccf28SAndroid Build Coastguard Worker 
183*e1eccf28SAndroid Build Coastguard Worker     for (uint32_t x = xstart; x < xend; x++, out++) {
184*e1eccf28SAndroid Build Coastguard Worker         ConvolveOneF<T>(x, out, py0, py1, py2, fp, sizeX);
185*e1eccf28SAndroid Build Coastguard Worker     }
186*e1eccf28SAndroid Build Coastguard Worker }
187*e1eccf28SAndroid Build Coastguard Worker #endif  // ANDROID_RENDERSCRIPT_TOOLKIT_SUPPORTS_FLOAT
188*e1eccf28SAndroid Build Coastguard Worker 
189*e1eccf28SAndroid Build Coastguard Worker template <typename InputOutputType, typename ComputationType>
convolveU(const uchar * pin,uchar * pout,size_t vectorSize,size_t sizeX,size_t sizeY,size_t startX,size_t startY,size_t endX,size_t endY,float * fp)190*e1eccf28SAndroid Build Coastguard Worker static void convolveU(const uchar* pin, uchar* pout, size_t vectorSize, size_t sizeX, size_t sizeY,
191*e1eccf28SAndroid Build Coastguard Worker                       size_t startX, size_t startY, size_t endX, size_t endY, float* fp) {
192*e1eccf28SAndroid Build Coastguard Worker     const size_t stride = vectorSize * sizeX;
193*e1eccf28SAndroid Build Coastguard Worker     for (size_t y = startY; y < endY; y++) {
194*e1eccf28SAndroid Build Coastguard Worker         uint32_t y1 = std::min((int32_t)y + 1, (int32_t)(sizeY - 1));
195*e1eccf28SAndroid Build Coastguard Worker         uint32_t y2 = std::max((int32_t)y - 1, 0);
196*e1eccf28SAndroid Build Coastguard Worker 
197*e1eccf28SAndroid Build Coastguard Worker         size_t offset = (y * sizeX + startX) * vectorSize;
198*e1eccf28SAndroid Build Coastguard Worker         InputOutputType* px = (InputOutputType*)(pout + offset);
199*e1eccf28SAndroid Build Coastguard Worker         InputOutputType* py0 = (InputOutputType*)(pin + stride * y2);
200*e1eccf28SAndroid Build Coastguard Worker         InputOutputType* py1 = (InputOutputType*)(pin + stride * y);
201*e1eccf28SAndroid Build Coastguard Worker         InputOutputType* py2 = (InputOutputType*)(pin + stride * y1);
202*e1eccf28SAndroid Build Coastguard Worker         for (uint32_t x = startX; x < endX; x++, px++) {
203*e1eccf28SAndroid Build Coastguard Worker             convolveOneU<InputOutputType, ComputationType>(x, px, py0, py1, py2, fp, sizeX);
204*e1eccf28SAndroid Build Coastguard Worker         }
205*e1eccf28SAndroid Build Coastguard Worker     }
206*e1eccf28SAndroid Build Coastguard Worker }
207*e1eccf28SAndroid Build Coastguard Worker 
convolveU4(const uchar * pin,uchar * pout,size_t vectorSize,size_t sizeX,size_t sizeY,size_t startX,size_t startY,size_t endX,size_t endY)208*e1eccf28SAndroid Build Coastguard Worker void Convolve3x3Task::convolveU4(const uchar* pin, uchar* pout, size_t vectorSize, size_t sizeX,
209*e1eccf28SAndroid Build Coastguard Worker                                  size_t sizeY, size_t startX, size_t startY, size_t endX,
210*e1eccf28SAndroid Build Coastguard Worker                                  size_t endY) {
211*e1eccf28SAndroid Build Coastguard Worker     const size_t stride = paddedSize(vectorSize) * sizeX;
212*e1eccf28SAndroid Build Coastguard Worker     for (size_t y = startY; y < endY; y++) {
213*e1eccf28SAndroid Build Coastguard Worker         uint32_t y1 = std::min((int32_t)y + 1, (int32_t)(sizeY - 1));
214*e1eccf28SAndroid Build Coastguard Worker         uint32_t y2 = std::max((int32_t)y - 1, 0);
215*e1eccf28SAndroid Build Coastguard Worker 
216*e1eccf28SAndroid Build Coastguard Worker         size_t offset = (y * sizeX + startX) * paddedSize(vectorSize);
217*e1eccf28SAndroid Build Coastguard Worker         uchar* px = pout + offset;
218*e1eccf28SAndroid Build Coastguard Worker         const uchar* py0 = pin + stride * y2;
219*e1eccf28SAndroid Build Coastguard Worker         const uchar* py1 = pin + stride * y;
220*e1eccf28SAndroid Build Coastguard Worker         const uchar* py2 = pin + stride * y1;
221*e1eccf28SAndroid Build Coastguard Worker         kernelU4(px, startX, endX, py0, py1, py2);
222*e1eccf28SAndroid Build Coastguard Worker     }
223*e1eccf28SAndroid Build Coastguard Worker }
224*e1eccf28SAndroid Build Coastguard Worker 
processData(int,size_t startX,size_t startY,size_t endX,size_t endY)225*e1eccf28SAndroid Build Coastguard Worker void Convolve3x3Task::processData(int /* threadIndex */, size_t startX, size_t startY, size_t endX,
226*e1eccf28SAndroid Build Coastguard Worker                                   size_t endY) {
227*e1eccf28SAndroid Build Coastguard Worker     // ALOGI("Thread %d start tile from (%zd, %zd) to (%zd, %zd)", threadIndex, startX, startY,
228*e1eccf28SAndroid Build Coastguard Worker     // endX, endY);
229*e1eccf28SAndroid Build Coastguard Worker     switch (mVectorSize) {
230*e1eccf28SAndroid Build Coastguard Worker         case 1:
231*e1eccf28SAndroid Build Coastguard Worker             convolveU<uchar, float>((const uchar*)mIn, (uchar*)mOut, mVectorSize, mSizeX, mSizeY,
232*e1eccf28SAndroid Build Coastguard Worker                                     startX, startY, endX, endY, mFp);
233*e1eccf28SAndroid Build Coastguard Worker             break;
234*e1eccf28SAndroid Build Coastguard Worker         case 2:
235*e1eccf28SAndroid Build Coastguard Worker             convolveU<uchar2, float2>((const uchar*)mIn, (uchar*)mOut, mVectorSize, mSizeX, mSizeY,
236*e1eccf28SAndroid Build Coastguard Worker                                       startX, startY, endX, endY, mFp);
237*e1eccf28SAndroid Build Coastguard Worker             break;
238*e1eccf28SAndroid Build Coastguard Worker         case 3:
239*e1eccf28SAndroid Build Coastguard Worker         case 4:
240*e1eccf28SAndroid Build Coastguard Worker             convolveU4((const uchar*)mIn, (uchar*)mOut, mVectorSize, mSizeX, mSizeY, startX, startY,
241*e1eccf28SAndroid Build Coastguard Worker                        endX, endY);
242*e1eccf28SAndroid Build Coastguard Worker             break;
243*e1eccf28SAndroid Build Coastguard Worker     }
244*e1eccf28SAndroid Build Coastguard Worker }
245*e1eccf28SAndroid Build Coastguard Worker 
convolve3x3(const void * in,void * out,size_t vectorSize,size_t sizeX,size_t sizeY,const float * coefficients,const Restriction * restriction)246*e1eccf28SAndroid Build Coastguard Worker void RenderScriptToolkit::convolve3x3(const void* in, void* out, size_t vectorSize, size_t sizeX,
247*e1eccf28SAndroid Build Coastguard Worker                                       size_t sizeY, const float* coefficients,
248*e1eccf28SAndroid Build Coastguard Worker                                       const Restriction* restriction) {
249*e1eccf28SAndroid Build Coastguard Worker #ifdef ANDROID_RENDERSCRIPT_TOOLKIT_VALIDATE
250*e1eccf28SAndroid Build Coastguard Worker     if (!validRestriction(LOG_TAG, sizeX, sizeY, restriction)) {
251*e1eccf28SAndroid Build Coastguard Worker         return;
252*e1eccf28SAndroid Build Coastguard Worker     }
253*e1eccf28SAndroid Build Coastguard Worker     if (vectorSize < 1 || vectorSize > 4) {
254*e1eccf28SAndroid Build Coastguard Worker         ALOGE("The vectorSize should be between 1 and 4. %zu provided.", vectorSize);
255*e1eccf28SAndroid Build Coastguard Worker         return;
256*e1eccf28SAndroid Build Coastguard Worker     }
257*e1eccf28SAndroid Build Coastguard Worker #endif
258*e1eccf28SAndroid Build Coastguard Worker 
259*e1eccf28SAndroid Build Coastguard Worker     Convolve3x3Task task(in, out, vectorSize, sizeX, sizeY, coefficients, restriction);
260*e1eccf28SAndroid Build Coastguard Worker     processor->doTask(&task);
261*e1eccf28SAndroid Build Coastguard Worker }
262*e1eccf28SAndroid Build Coastguard Worker 
263*e1eccf28SAndroid Build Coastguard Worker }  // namespace renderscript
264*e1eccf28SAndroid Build Coastguard Worker }  // namespace android
265