xref: /aosp_15_r20/frameworks/rs/cpu_ref/rsCpuIntrinsicConvolve3x3.cpp (revision e1eccf28f96817838ad6867f7f39d2351ec11f56)
1*e1eccf28SAndroid Build Coastguard Worker /*
2*e1eccf28SAndroid Build Coastguard Worker  * Copyright (C) 2012 The Android Open Source Project
3*e1eccf28SAndroid Build Coastguard Worker  *
4*e1eccf28SAndroid Build Coastguard Worker  * Licensed under the Apache License, Version 2.0 (the "License");
5*e1eccf28SAndroid Build Coastguard Worker  * you may not use this file except in compliance with the License.
6*e1eccf28SAndroid Build Coastguard Worker  * You may obtain a copy of the License at
7*e1eccf28SAndroid Build Coastguard Worker  *
8*e1eccf28SAndroid Build Coastguard Worker  *      http://www.apache.org/licenses/LICENSE-2.0
9*e1eccf28SAndroid Build Coastguard Worker  *
10*e1eccf28SAndroid Build Coastguard Worker  * Unless required by applicable law or agreed to in writing, software
11*e1eccf28SAndroid Build Coastguard Worker  * distributed under the License is distributed on an "AS IS" BASIS,
12*e1eccf28SAndroid Build Coastguard Worker  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13*e1eccf28SAndroid Build Coastguard Worker  * See the License for the specific language governing permissions and
14*e1eccf28SAndroid Build Coastguard Worker  * limitations under the License.
15*e1eccf28SAndroid Build Coastguard Worker  */
16*e1eccf28SAndroid Build Coastguard Worker 
17*e1eccf28SAndroid Build Coastguard Worker 
18*e1eccf28SAndroid Build Coastguard Worker #include "rsCpuIntrinsic.h"
19*e1eccf28SAndroid Build Coastguard Worker #include "rsCpuIntrinsicInlines.h"
20*e1eccf28SAndroid Build Coastguard Worker 
21*e1eccf28SAndroid Build Coastguard Worker namespace android {
22*e1eccf28SAndroid Build Coastguard Worker namespace renderscript {
23*e1eccf28SAndroid Build Coastguard Worker 
24*e1eccf28SAndroid Build Coastguard Worker 
25*e1eccf28SAndroid Build Coastguard Worker class RsdCpuScriptIntrinsicConvolve3x3 : public RsdCpuScriptIntrinsic {
26*e1eccf28SAndroid Build Coastguard Worker public:
27*e1eccf28SAndroid Build Coastguard Worker     void populateScript(Script *) override;
28*e1eccf28SAndroid Build Coastguard Worker     void invokeFreeChildren() override;
29*e1eccf28SAndroid Build Coastguard Worker 
30*e1eccf28SAndroid Build Coastguard Worker     void setGlobalVar(uint32_t slot, const void *data, size_t dataLength) override;
31*e1eccf28SAndroid Build Coastguard Worker     void setGlobalObj(uint32_t slot, ObjectBase *data) override;
32*e1eccf28SAndroid Build Coastguard Worker 
33*e1eccf28SAndroid Build Coastguard Worker     ~RsdCpuScriptIntrinsicConvolve3x3() override;
34*e1eccf28SAndroid Build Coastguard Worker     RsdCpuScriptIntrinsicConvolve3x3(RsdCpuReferenceImpl *ctx, const Script *s, const Element *);
35*e1eccf28SAndroid Build Coastguard Worker 
36*e1eccf28SAndroid Build Coastguard Worker protected:
37*e1eccf28SAndroid Build Coastguard Worker     float mFp[16];
38*e1eccf28SAndroid Build Coastguard Worker     int16_t mIp[16];
39*e1eccf28SAndroid Build Coastguard Worker     ObjectBaseRef<const Allocation> mAlloc;
40*e1eccf28SAndroid Build Coastguard Worker     ObjectBaseRef<const Element> mElement;
41*e1eccf28SAndroid Build Coastguard Worker 
42*e1eccf28SAndroid Build Coastguard Worker     static void kernelU1(const RsExpandKernelDriverInfo *info,
43*e1eccf28SAndroid Build Coastguard Worker                          uint32_t xstart, uint32_t xend,
44*e1eccf28SAndroid Build Coastguard Worker                          uint32_t outstep);
45*e1eccf28SAndroid Build Coastguard Worker     static void kernelU2(const RsExpandKernelDriverInfo *info,
46*e1eccf28SAndroid Build Coastguard Worker                          uint32_t xstart, uint32_t xend,
47*e1eccf28SAndroid Build Coastguard Worker                          uint32_t outstep);
48*e1eccf28SAndroid Build Coastguard Worker     static void kernelU4(const RsExpandKernelDriverInfo *info,
49*e1eccf28SAndroid Build Coastguard Worker                          uint32_t xstart, uint32_t xend,
50*e1eccf28SAndroid Build Coastguard Worker                          uint32_t outstep);
51*e1eccf28SAndroid Build Coastguard Worker     static void kernelF1(const RsExpandKernelDriverInfo *info,
52*e1eccf28SAndroid Build Coastguard Worker                          uint32_t xstart, uint32_t xend,
53*e1eccf28SAndroid Build Coastguard Worker                          uint32_t outstep);
54*e1eccf28SAndroid Build Coastguard Worker     static void kernelF2(const RsExpandKernelDriverInfo *info,
55*e1eccf28SAndroid Build Coastguard Worker                          uint32_t xstart, uint32_t xend,
56*e1eccf28SAndroid Build Coastguard Worker                          uint32_t outstep);
57*e1eccf28SAndroid Build Coastguard Worker     static void kernelF4(const RsExpandKernelDriverInfo *info,
58*e1eccf28SAndroid Build Coastguard Worker                          uint32_t xstart, uint32_t xend,
59*e1eccf28SAndroid Build Coastguard Worker                          uint32_t outstep);
60*e1eccf28SAndroid Build Coastguard Worker };
61*e1eccf28SAndroid Build Coastguard Worker 
setGlobalObj(uint32_t slot,ObjectBase * data)62*e1eccf28SAndroid Build Coastguard Worker void RsdCpuScriptIntrinsicConvolve3x3::setGlobalObj(uint32_t slot, ObjectBase *data) {
63*e1eccf28SAndroid Build Coastguard Worker     rsAssert(slot == 1);
64*e1eccf28SAndroid Build Coastguard Worker     mAlloc.set(static_cast<Allocation *>(data));
65*e1eccf28SAndroid Build Coastguard Worker }
66*e1eccf28SAndroid Build Coastguard Worker 
setGlobalVar(uint32_t slot,const void * data,size_t dataLength)67*e1eccf28SAndroid Build Coastguard Worker void RsdCpuScriptIntrinsicConvolve3x3::setGlobalVar(uint32_t slot, const void *data,
68*e1eccf28SAndroid Build Coastguard Worker                                                     size_t dataLength) {
69*e1eccf28SAndroid Build Coastguard Worker     rsAssert(slot == 0);
70*e1eccf28SAndroid Build Coastguard Worker     memcpy (&mFp, data, dataLength);
71*e1eccf28SAndroid Build Coastguard Worker     for(int ct=0; ct < 9; ct++) {
72*e1eccf28SAndroid Build Coastguard Worker         if (mFp[ct] >= 0) {
73*e1eccf28SAndroid Build Coastguard Worker             mIp[ct] = (int16_t)(mFp[ct] * 256.f + 0.5f);
74*e1eccf28SAndroid Build Coastguard Worker         } else {
75*e1eccf28SAndroid Build Coastguard Worker             mIp[ct] = (int16_t)(mFp[ct] * 256.f - 0.5f);
76*e1eccf28SAndroid Build Coastguard Worker         }
77*e1eccf28SAndroid Build Coastguard Worker     }
78*e1eccf28SAndroid Build Coastguard Worker }
79*e1eccf28SAndroid Build Coastguard Worker 
80*e1eccf28SAndroid Build Coastguard Worker extern "C" void rsdIntrinsicConvolve3x3_K(void *dst, const void *y0, const void *y1,
81*e1eccf28SAndroid Build Coastguard Worker                                           const void *y2, const int16_t *coef, uint32_t count);
82*e1eccf28SAndroid Build Coastguard Worker 
83*e1eccf28SAndroid Build Coastguard Worker 
ConvolveOneU4(const RsExpandKernelDriverInfo * info,uint32_t x,uchar4 * out,const uchar4 * py0,const uchar4 * py1,const uchar4 * py2,const float * coeff)84*e1eccf28SAndroid Build Coastguard Worker static void ConvolveOneU4(const RsExpandKernelDriverInfo *info, uint32_t x, uchar4 *out,
85*e1eccf28SAndroid Build Coastguard Worker                           const uchar4 *py0, const uchar4 *py1, const uchar4 *py2,
86*e1eccf28SAndroid Build Coastguard Worker                           const float* coeff) {
87*e1eccf28SAndroid Build Coastguard Worker 
88*e1eccf28SAndroid Build Coastguard Worker     uint32_t x1 = rsMax((int32_t)x-1, 0);
89*e1eccf28SAndroid Build Coastguard Worker     uint32_t x2 = rsMin((int32_t)x+1, (int32_t)info->dim.x-1);
90*e1eccf28SAndroid Build Coastguard Worker 
91*e1eccf28SAndroid Build Coastguard Worker     float4 px = convert_float4(py0[x1]) * coeff[0] +
92*e1eccf28SAndroid Build Coastguard Worker                 convert_float4(py0[x]) * coeff[1] +
93*e1eccf28SAndroid Build Coastguard Worker                 convert_float4(py0[x2]) * coeff[2] +
94*e1eccf28SAndroid Build Coastguard Worker                 convert_float4(py1[x1]) * coeff[3] +
95*e1eccf28SAndroid Build Coastguard Worker                 convert_float4(py1[x]) * coeff[4] +
96*e1eccf28SAndroid Build Coastguard Worker                 convert_float4(py1[x2]) * coeff[5] +
97*e1eccf28SAndroid Build Coastguard Worker                 convert_float4(py2[x1]) * coeff[6] +
98*e1eccf28SAndroid Build Coastguard Worker                 convert_float4(py2[x]) * coeff[7] +
99*e1eccf28SAndroid Build Coastguard Worker                 convert_float4(py2[x2]) * coeff[8];
100*e1eccf28SAndroid Build Coastguard Worker 
101*e1eccf28SAndroid Build Coastguard Worker     px = clamp(px + 0.5f, 0.f, 255.f);
102*e1eccf28SAndroid Build Coastguard Worker     uchar4 o = {(uchar)px.x, (uchar)px.y, (uchar)px.z, (uchar)px.w};
103*e1eccf28SAndroid Build Coastguard Worker     *out = o;
104*e1eccf28SAndroid Build Coastguard Worker }
105*e1eccf28SAndroid Build Coastguard Worker 
ConvolveOneU2(const RsExpandKernelDriverInfo * info,uint32_t x,uchar2 * out,const uchar2 * py0,const uchar2 * py1,const uchar2 * py2,const float * coeff)106*e1eccf28SAndroid Build Coastguard Worker static void ConvolveOneU2(const RsExpandKernelDriverInfo *info, uint32_t x, uchar2 *out,
107*e1eccf28SAndroid Build Coastguard Worker                           const uchar2 *py0, const uchar2 *py1, const uchar2 *py2,
108*e1eccf28SAndroid Build Coastguard Worker                           const float* coeff) {
109*e1eccf28SAndroid Build Coastguard Worker 
110*e1eccf28SAndroid Build Coastguard Worker     uint32_t x1 = rsMax((int32_t)x-1, 0);
111*e1eccf28SAndroid Build Coastguard Worker     uint32_t x2 = rsMin((int32_t)x+1, (int32_t)info->dim.x-1);
112*e1eccf28SAndroid Build Coastguard Worker 
113*e1eccf28SAndroid Build Coastguard Worker     float2 px = convert_float2(py0[x1]) * coeff[0] +
114*e1eccf28SAndroid Build Coastguard Worker                 convert_float2(py0[x]) * coeff[1] +
115*e1eccf28SAndroid Build Coastguard Worker                 convert_float2(py0[x2]) * coeff[2] +
116*e1eccf28SAndroid Build Coastguard Worker                 convert_float2(py1[x1]) * coeff[3] +
117*e1eccf28SAndroid Build Coastguard Worker                 convert_float2(py1[x]) * coeff[4] +
118*e1eccf28SAndroid Build Coastguard Worker                 convert_float2(py1[x2]) * coeff[5] +
119*e1eccf28SAndroid Build Coastguard Worker                 convert_float2(py2[x1]) * coeff[6] +
120*e1eccf28SAndroid Build Coastguard Worker                 convert_float2(py2[x]) * coeff[7] +
121*e1eccf28SAndroid Build Coastguard Worker                 convert_float2(py2[x2]) * coeff[8];
122*e1eccf28SAndroid Build Coastguard Worker 
123*e1eccf28SAndroid Build Coastguard Worker     px = clamp(px + 0.5f, 0.f, 255.f);
124*e1eccf28SAndroid Build Coastguard Worker     *out = convert_uchar2(px);
125*e1eccf28SAndroid Build Coastguard Worker }
126*e1eccf28SAndroid Build Coastguard Worker 
ConvolveOneU1(const RsExpandKernelDriverInfo * info,uint32_t x,uchar * out,const uchar * py0,const uchar * py1,const uchar * py2,const float * coeff)127*e1eccf28SAndroid Build Coastguard Worker static void ConvolveOneU1(const RsExpandKernelDriverInfo *info, uint32_t x, uchar *out,
128*e1eccf28SAndroid Build Coastguard Worker                           const uchar *py0, const uchar *py1, const uchar *py2,
129*e1eccf28SAndroid Build Coastguard Worker                           const float* coeff) {
130*e1eccf28SAndroid Build Coastguard Worker 
131*e1eccf28SAndroid Build Coastguard Worker     uint32_t x1 = rsMax((int32_t)x-1, 0);
132*e1eccf28SAndroid Build Coastguard Worker     uint32_t x2 = rsMin((int32_t)x+1, (int32_t)info->dim.x-1);
133*e1eccf28SAndroid Build Coastguard Worker 
134*e1eccf28SAndroid Build Coastguard Worker     float px = ((float)py0[x1]) * coeff[0] +
135*e1eccf28SAndroid Build Coastguard Worker                ((float)py0[x]) * coeff[1] +
136*e1eccf28SAndroid Build Coastguard Worker                ((float)py0[x2]) * coeff[2] +
137*e1eccf28SAndroid Build Coastguard Worker                ((float)py1[x1]) * coeff[3] +
138*e1eccf28SAndroid Build Coastguard Worker                ((float)py1[x]) * coeff[4] +
139*e1eccf28SAndroid Build Coastguard Worker                ((float)py1[x2]) * coeff[5] +
140*e1eccf28SAndroid Build Coastguard Worker                ((float)py2[x1]) * coeff[6] +
141*e1eccf28SAndroid Build Coastguard Worker                ((float)py2[x]) * coeff[7] +
142*e1eccf28SAndroid Build Coastguard Worker                ((float)py2[x2]) * coeff[8];
143*e1eccf28SAndroid Build Coastguard Worker     *out = clamp(px + 0.5f, 0.f, 255.f);
144*e1eccf28SAndroid Build Coastguard Worker }
145*e1eccf28SAndroid Build Coastguard Worker 
ConvolveOneF4(const RsExpandKernelDriverInfo * info,uint32_t x,float4 * out,const float4 * py0,const float4 * py1,const float4 * py2,const float * coeff)146*e1eccf28SAndroid Build Coastguard Worker static void ConvolveOneF4(const RsExpandKernelDriverInfo *info, uint32_t x, float4 *out,
147*e1eccf28SAndroid Build Coastguard Worker                           const float4 *py0, const float4 *py1, const float4 *py2,
148*e1eccf28SAndroid Build Coastguard Worker                           const float* coeff) {
149*e1eccf28SAndroid Build Coastguard Worker 
150*e1eccf28SAndroid Build Coastguard Worker     uint32_t x1 = rsMax((int32_t)x-1, 0);
151*e1eccf28SAndroid Build Coastguard Worker     uint32_t x2 = rsMin((int32_t)x+1, (int32_t)info->dim.x-1);
152*e1eccf28SAndroid Build Coastguard Worker     *out = (py0[x1] * coeff[0]) + (py0[x] * coeff[1]) + (py0[x2] * coeff[2]) +
153*e1eccf28SAndroid Build Coastguard Worker            (py1[x1] * coeff[3]) + (py1[x] * coeff[4]) + (py1[x2] * coeff[5]) +
154*e1eccf28SAndroid Build Coastguard Worker            (py2[x1] * coeff[6]) + (py2[x] * coeff[7]) + (py2[x2] * coeff[8]);
155*e1eccf28SAndroid Build Coastguard Worker }
156*e1eccf28SAndroid Build Coastguard Worker 
ConvolveOneF2(const RsExpandKernelDriverInfo * info,uint32_t x,float2 * out,const float2 * py0,const float2 * py1,const float2 * py2,const float * coeff)157*e1eccf28SAndroid Build Coastguard Worker static void ConvolveOneF2(const RsExpandKernelDriverInfo *info, uint32_t x, float2 *out,
158*e1eccf28SAndroid Build Coastguard Worker                           const float2 *py0, const float2 *py1, const float2 *py2,
159*e1eccf28SAndroid Build Coastguard Worker                           const float* coeff) {
160*e1eccf28SAndroid Build Coastguard Worker 
161*e1eccf28SAndroid Build Coastguard Worker     uint32_t x1 = rsMax((int32_t)x-1, 0);
162*e1eccf28SAndroid Build Coastguard Worker     uint32_t x2 = rsMin((int32_t)x+1, (int32_t)info->dim.x-1);
163*e1eccf28SAndroid Build Coastguard Worker     *out = (py0[x1] * coeff[0]) + (py0[x] * coeff[1]) + (py0[x2] * coeff[2]) +
164*e1eccf28SAndroid Build Coastguard Worker            (py1[x1] * coeff[3]) + (py1[x] * coeff[4]) + (py1[x2] * coeff[5]) +
165*e1eccf28SAndroid Build Coastguard Worker            (py2[x1] * coeff[6]) + (py2[x] * coeff[7]) + (py2[x2] * coeff[8]);
166*e1eccf28SAndroid Build Coastguard Worker }
167*e1eccf28SAndroid Build Coastguard Worker 
ConvolveOneF1(const RsExpandKernelDriverInfo * info,uint32_t x,float * out,const float * py0,const float * py1,const float * py2,const float * coeff)168*e1eccf28SAndroid Build Coastguard Worker static void ConvolveOneF1(const RsExpandKernelDriverInfo *info, uint32_t x, float *out,
169*e1eccf28SAndroid Build Coastguard Worker                           const float *py0, const float *py1, const float *py2,
170*e1eccf28SAndroid Build Coastguard Worker                           const float* coeff) {
171*e1eccf28SAndroid Build Coastguard Worker 
172*e1eccf28SAndroid Build Coastguard Worker     uint32_t x1 = rsMax((int32_t)x-1, 0);
173*e1eccf28SAndroid Build Coastguard Worker     uint32_t x2 = rsMin((int32_t)x+1, (int32_t)info->dim.x-1);
174*e1eccf28SAndroid Build Coastguard Worker     *out = (py0[x1] * coeff[0]) + (py0[x] * coeff[1]) + (py0[x2] * coeff[2]) +
175*e1eccf28SAndroid Build Coastguard Worker            (py1[x1] * coeff[3]) + (py1[x] * coeff[4]) + (py1[x2] * coeff[5]) +
176*e1eccf28SAndroid Build Coastguard Worker            (py2[x1] * coeff[6]) + (py2[x] * coeff[7]) + (py2[x2] * coeff[8]);
177*e1eccf28SAndroid Build Coastguard Worker }
178*e1eccf28SAndroid Build Coastguard Worker 
kernelU4(const RsExpandKernelDriverInfo * info,uint32_t xstart,uint32_t xend,uint32_t outstep)179*e1eccf28SAndroid Build Coastguard Worker void RsdCpuScriptIntrinsicConvolve3x3::kernelU4(const RsExpandKernelDriverInfo *info,
180*e1eccf28SAndroid Build Coastguard Worker                                                 uint32_t xstart, uint32_t xend,
181*e1eccf28SAndroid Build Coastguard Worker                                                 uint32_t outstep) {
182*e1eccf28SAndroid Build Coastguard Worker     RsdCpuScriptIntrinsicConvolve3x3 *cp = (RsdCpuScriptIntrinsicConvolve3x3 *)info->usr;
183*e1eccf28SAndroid Build Coastguard Worker 
184*e1eccf28SAndroid Build Coastguard Worker     if (!cp->mAlloc.get()) {
185*e1eccf28SAndroid Build Coastguard Worker         ALOGE("Convolve3x3 executed without input, skipping");
186*e1eccf28SAndroid Build Coastguard Worker         return;
187*e1eccf28SAndroid Build Coastguard Worker     }
188*e1eccf28SAndroid Build Coastguard Worker     const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
189*e1eccf28SAndroid Build Coastguard Worker     const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
190*e1eccf28SAndroid Build Coastguard Worker 
191*e1eccf28SAndroid Build Coastguard Worker     uint32_t y1 = rsMin((int32_t)info->current.y + 1, (int32_t)(info->dim.y-1));
192*e1eccf28SAndroid Build Coastguard Worker     uint32_t y2 = rsMax((int32_t)info->current.y - 1, 0);
193*e1eccf28SAndroid Build Coastguard Worker     const uchar4 *py0 = (const uchar4 *)(pin + stride * y2);
194*e1eccf28SAndroid Build Coastguard Worker     const uchar4 *py1 = (const uchar4 *)(pin + stride * info->current.y);
195*e1eccf28SAndroid Build Coastguard Worker     const uchar4 *py2 = (const uchar4 *)(pin + stride * y1);
196*e1eccf28SAndroid Build Coastguard Worker 
197*e1eccf28SAndroid Build Coastguard Worker     uchar4 *out = (uchar4 *)info->outPtr[0];
198*e1eccf28SAndroid Build Coastguard Worker     uint32_t x1 = xstart;
199*e1eccf28SAndroid Build Coastguard Worker     uint32_t x2 = xend;
200*e1eccf28SAndroid Build Coastguard Worker     if(x1 == 0) {
201*e1eccf28SAndroid Build Coastguard Worker         ConvolveOneU4(info, 0, out, py0, py1, py2, cp->mFp);
202*e1eccf28SAndroid Build Coastguard Worker         x1 ++;
203*e1eccf28SAndroid Build Coastguard Worker         out++;
204*e1eccf28SAndroid Build Coastguard Worker     }
205*e1eccf28SAndroid Build Coastguard Worker 
206*e1eccf28SAndroid Build Coastguard Worker     if(x2 > x1) {
207*e1eccf28SAndroid Build Coastguard Worker #if defined(ARCH_ARM_USE_INTRINSICS) || defined(ARCH_X86_HAVE_SSSE3)
208*e1eccf28SAndroid Build Coastguard Worker         if (gArchUseSIMD) {
209*e1eccf28SAndroid Build Coastguard Worker             int32_t len = (x2 - x1 - 1) >> 1;
210*e1eccf28SAndroid Build Coastguard Worker             if(len > 0) {
211*e1eccf28SAndroid Build Coastguard Worker                 rsdIntrinsicConvolve3x3_K(out, &py0[x1-1], &py1[x1-1], &py2[x1-1], cp->mIp, len);
212*e1eccf28SAndroid Build Coastguard Worker                 x1 += len << 1;
213*e1eccf28SAndroid Build Coastguard Worker                 out += len << 1;
214*e1eccf28SAndroid Build Coastguard Worker             }
215*e1eccf28SAndroid Build Coastguard Worker         }
216*e1eccf28SAndroid Build Coastguard Worker #endif
217*e1eccf28SAndroid Build Coastguard Worker 
218*e1eccf28SAndroid Build Coastguard Worker         while(x1 != x2) {
219*e1eccf28SAndroid Build Coastguard Worker             ConvolveOneU4(info, x1, out, py0, py1, py2, cp->mFp);
220*e1eccf28SAndroid Build Coastguard Worker             out++;
221*e1eccf28SAndroid Build Coastguard Worker             x1++;
222*e1eccf28SAndroid Build Coastguard Worker         }
223*e1eccf28SAndroid Build Coastguard Worker     }
224*e1eccf28SAndroid Build Coastguard Worker }
225*e1eccf28SAndroid Build Coastguard Worker 
kernelU2(const RsExpandKernelDriverInfo * info,uint32_t xstart,uint32_t xend,uint32_t outstep)226*e1eccf28SAndroid Build Coastguard Worker void RsdCpuScriptIntrinsicConvolve3x3::kernelU2(const RsExpandKernelDriverInfo *info,
227*e1eccf28SAndroid Build Coastguard Worker                                                 uint32_t xstart, uint32_t xend,
228*e1eccf28SAndroid Build Coastguard Worker                                                 uint32_t outstep) {
229*e1eccf28SAndroid Build Coastguard Worker     RsdCpuScriptIntrinsicConvolve3x3 *cp = (RsdCpuScriptIntrinsicConvolve3x3 *)info->usr;
230*e1eccf28SAndroid Build Coastguard Worker 
231*e1eccf28SAndroid Build Coastguard Worker     if (!cp->mAlloc.get()) {
232*e1eccf28SAndroid Build Coastguard Worker         ALOGE("Convolve3x3 executed without input, skipping");
233*e1eccf28SAndroid Build Coastguard Worker         return;
234*e1eccf28SAndroid Build Coastguard Worker     }
235*e1eccf28SAndroid Build Coastguard Worker     const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
236*e1eccf28SAndroid Build Coastguard Worker     const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
237*e1eccf28SAndroid Build Coastguard Worker 
238*e1eccf28SAndroid Build Coastguard Worker     uint32_t y1 = rsMin((int32_t)info->current.y + 1, (int32_t)(info->dim.y-1));
239*e1eccf28SAndroid Build Coastguard Worker     uint32_t y2 = rsMax((int32_t)info->current.y - 1, 0);
240*e1eccf28SAndroid Build Coastguard Worker     const uchar2 *py0 = (const uchar2 *)(pin + stride * y2);
241*e1eccf28SAndroid Build Coastguard Worker     const uchar2 *py1 = (const uchar2 *)(pin + stride * info->current.y);
242*e1eccf28SAndroid Build Coastguard Worker     const uchar2 *py2 = (const uchar2 *)(pin + stride * y1);
243*e1eccf28SAndroid Build Coastguard Worker 
244*e1eccf28SAndroid Build Coastguard Worker     uchar2 *out = (uchar2 *)info->outPtr[0];
245*e1eccf28SAndroid Build Coastguard Worker     uint32_t x1 = xstart;
246*e1eccf28SAndroid Build Coastguard Worker     uint32_t x2 = xend;
247*e1eccf28SAndroid Build Coastguard Worker     if(x1 == 0) {
248*e1eccf28SAndroid Build Coastguard Worker         ConvolveOneU2(info, 0, out, py0, py1, py2, cp->mFp);
249*e1eccf28SAndroid Build Coastguard Worker         x1 ++;
250*e1eccf28SAndroid Build Coastguard Worker         out++;
251*e1eccf28SAndroid Build Coastguard Worker     }
252*e1eccf28SAndroid Build Coastguard Worker 
253*e1eccf28SAndroid Build Coastguard Worker     if(x2 > x1) {
254*e1eccf28SAndroid Build Coastguard Worker #if 0//defined(ARCH_ARM_HAVE_NEON)
255*e1eccf28SAndroid Build Coastguard Worker         int32_t len = (x2 - x1 - 1) >> 1;
256*e1eccf28SAndroid Build Coastguard Worker         if(len > 0) {
257*e1eccf28SAndroid Build Coastguard Worker             rsdIntrinsicConvolve3x3_K(out, &py0[x1-1], &py1[x1-1], &py2[x1-1], cp->mIp, len);
258*e1eccf28SAndroid Build Coastguard Worker             x1 += len << 1;
259*e1eccf28SAndroid Build Coastguard Worker             out += len << 1;
260*e1eccf28SAndroid Build Coastguard Worker         }
261*e1eccf28SAndroid Build Coastguard Worker #endif
262*e1eccf28SAndroid Build Coastguard Worker 
263*e1eccf28SAndroid Build Coastguard Worker         while(x1 != x2) {
264*e1eccf28SAndroid Build Coastguard Worker             ConvolveOneU2(info, x1, out, py0, py1, py2, cp->mFp);
265*e1eccf28SAndroid Build Coastguard Worker             out++;
266*e1eccf28SAndroid Build Coastguard Worker             x1++;
267*e1eccf28SAndroid Build Coastguard Worker         }
268*e1eccf28SAndroid Build Coastguard Worker     }
269*e1eccf28SAndroid Build Coastguard Worker }
270*e1eccf28SAndroid Build Coastguard Worker 
kernelU1(const RsExpandKernelDriverInfo * info,uint32_t xstart,uint32_t xend,uint32_t outstep)271*e1eccf28SAndroid Build Coastguard Worker void RsdCpuScriptIntrinsicConvolve3x3::kernelU1(const RsExpandKernelDriverInfo *info,
272*e1eccf28SAndroid Build Coastguard Worker                                                 uint32_t xstart, uint32_t xend,
273*e1eccf28SAndroid Build Coastguard Worker                                                 uint32_t outstep) {
274*e1eccf28SAndroid Build Coastguard Worker     RsdCpuScriptIntrinsicConvolve3x3 *cp = (RsdCpuScriptIntrinsicConvolve3x3 *)info->usr;
275*e1eccf28SAndroid Build Coastguard Worker 
276*e1eccf28SAndroid Build Coastguard Worker     if (!cp->mAlloc.get()) {
277*e1eccf28SAndroid Build Coastguard Worker         ALOGE("Convolve3x3 executed without input, skipping");
278*e1eccf28SAndroid Build Coastguard Worker         return;
279*e1eccf28SAndroid Build Coastguard Worker     }
280*e1eccf28SAndroid Build Coastguard Worker     const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
281*e1eccf28SAndroid Build Coastguard Worker     const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
282*e1eccf28SAndroid Build Coastguard Worker 
283*e1eccf28SAndroid Build Coastguard Worker     uint32_t y1 = rsMin((int32_t)info->current.y + 1, (int32_t)(info->dim.y-1));
284*e1eccf28SAndroid Build Coastguard Worker     uint32_t y2 = rsMax((int32_t)info->current.y - 1, 0);
285*e1eccf28SAndroid Build Coastguard Worker     const uchar *py0 = (const uchar *)(pin + stride * y2);
286*e1eccf28SAndroid Build Coastguard Worker     const uchar *py1 = (const uchar *)(pin + stride * info->current.y);
287*e1eccf28SAndroid Build Coastguard Worker     const uchar *py2 = (const uchar *)(pin + stride * y1);
288*e1eccf28SAndroid Build Coastguard Worker 
289*e1eccf28SAndroid Build Coastguard Worker     uchar *out = (uchar *)info->outPtr[0];
290*e1eccf28SAndroid Build Coastguard Worker     uint32_t x1 = xstart;
291*e1eccf28SAndroid Build Coastguard Worker     uint32_t x2 = xend;
292*e1eccf28SAndroid Build Coastguard Worker     if(x1 == 0) {
293*e1eccf28SAndroid Build Coastguard Worker         ConvolveOneU1(info, 0, out, py0, py1, py2, cp->mFp);
294*e1eccf28SAndroid Build Coastguard Worker         x1 ++;
295*e1eccf28SAndroid Build Coastguard Worker         out++;
296*e1eccf28SAndroid Build Coastguard Worker     }
297*e1eccf28SAndroid Build Coastguard Worker 
298*e1eccf28SAndroid Build Coastguard Worker     if(x2 > x1) {
299*e1eccf28SAndroid Build Coastguard Worker #if 0//defined(ARCH_ARM_HAVE_NEON)
300*e1eccf28SAndroid Build Coastguard Worker         int32_t len = (x2 - x1 - 1) >> 1;
301*e1eccf28SAndroid Build Coastguard Worker         if(len > 0) {
302*e1eccf28SAndroid Build Coastguard Worker             rsdIntrinsicConvolve3x3_K(out, &py0[x1-1], &py1[x1-1], &py2[x1-1], cp->mIp, len);
303*e1eccf28SAndroid Build Coastguard Worker             x1 += len << 1;
304*e1eccf28SAndroid Build Coastguard Worker             out += len << 1;
305*e1eccf28SAndroid Build Coastguard Worker         }
306*e1eccf28SAndroid Build Coastguard Worker #endif
307*e1eccf28SAndroid Build Coastguard Worker 
308*e1eccf28SAndroid Build Coastguard Worker         while(x1 != x2) {
309*e1eccf28SAndroid Build Coastguard Worker             ConvolveOneU1(info, x1, out, py0, py1, py2, cp->mFp);
310*e1eccf28SAndroid Build Coastguard Worker             out++;
311*e1eccf28SAndroid Build Coastguard Worker             x1++;
312*e1eccf28SAndroid Build Coastguard Worker         }
313*e1eccf28SAndroid Build Coastguard Worker     }
314*e1eccf28SAndroid Build Coastguard Worker }
315*e1eccf28SAndroid Build Coastguard Worker 
kernelF4(const RsExpandKernelDriverInfo * info,uint32_t xstart,uint32_t xend,uint32_t outstep)316*e1eccf28SAndroid Build Coastguard Worker void RsdCpuScriptIntrinsicConvolve3x3::kernelF4(const RsExpandKernelDriverInfo *info,
317*e1eccf28SAndroid Build Coastguard Worker                                                 uint32_t xstart, uint32_t xend,
318*e1eccf28SAndroid Build Coastguard Worker                                                 uint32_t outstep) {
319*e1eccf28SAndroid Build Coastguard Worker     RsdCpuScriptIntrinsicConvolve3x3 *cp = (RsdCpuScriptIntrinsicConvolve3x3 *)info->usr;
320*e1eccf28SAndroid Build Coastguard Worker 
321*e1eccf28SAndroid Build Coastguard Worker     if (!cp->mAlloc.get()) {
322*e1eccf28SAndroid Build Coastguard Worker         ALOGE("Convolve3x3 executed without input, skipping");
323*e1eccf28SAndroid Build Coastguard Worker         return;
324*e1eccf28SAndroid Build Coastguard Worker     }
325*e1eccf28SAndroid Build Coastguard Worker     const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
326*e1eccf28SAndroid Build Coastguard Worker     const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
327*e1eccf28SAndroid Build Coastguard Worker 
328*e1eccf28SAndroid Build Coastguard Worker     uint32_t y1 = rsMin((int32_t)info->current.y + 1, (int32_t)(info->dim.y-1));
329*e1eccf28SAndroid Build Coastguard Worker     uint32_t y2 = rsMax((int32_t)info->current.y - 1, 0);
330*e1eccf28SAndroid Build Coastguard Worker     const float4 *py0 = (const float4 *)(pin + stride * y2);
331*e1eccf28SAndroid Build Coastguard Worker     const float4 *py1 = (const float4 *)(pin + stride * info->current.y);
332*e1eccf28SAndroid Build Coastguard Worker     const float4 *py2 = (const float4 *)(pin + stride * y1);
333*e1eccf28SAndroid Build Coastguard Worker 
334*e1eccf28SAndroid Build Coastguard Worker     float4 *out = (float4 *)info->outPtr[0];
335*e1eccf28SAndroid Build Coastguard Worker     uint32_t x1 = xstart;
336*e1eccf28SAndroid Build Coastguard Worker     uint32_t x2 = xend;
337*e1eccf28SAndroid Build Coastguard Worker     if(x1 == 0) {
338*e1eccf28SAndroid Build Coastguard Worker         ConvolveOneF4(info, 0, out, py0, py1, py2, cp->mFp);
339*e1eccf28SAndroid Build Coastguard Worker         x1 ++;
340*e1eccf28SAndroid Build Coastguard Worker         out++;
341*e1eccf28SAndroid Build Coastguard Worker     }
342*e1eccf28SAndroid Build Coastguard Worker 
343*e1eccf28SAndroid Build Coastguard Worker     if(x2 > x1) {
344*e1eccf28SAndroid Build Coastguard Worker #if 0//defined(ARCH_ARM_HAVE_NEON)
345*e1eccf28SAndroid Build Coastguard Worker         int32_t len = (x2 - x1 - 1) >> 1;
346*e1eccf28SAndroid Build Coastguard Worker         if(len > 0) {
347*e1eccf28SAndroid Build Coastguard Worker             rsdIntrinsicConvolve3x3_K(out, &py0[x1-1], &py1[x1-1], &py2[x1-1], cp->mIp, len);
348*e1eccf28SAndroid Build Coastguard Worker             x1 += len << 1;
349*e1eccf28SAndroid Build Coastguard Worker             out += len << 1;
350*e1eccf28SAndroid Build Coastguard Worker         }
351*e1eccf28SAndroid Build Coastguard Worker #endif
352*e1eccf28SAndroid Build Coastguard Worker 
353*e1eccf28SAndroid Build Coastguard Worker         while(x1 != x2) {
354*e1eccf28SAndroid Build Coastguard Worker             ConvolveOneF4(info, x1, out, py0, py1, py2, cp->mFp);
355*e1eccf28SAndroid Build Coastguard Worker             out++;
356*e1eccf28SAndroid Build Coastguard Worker             x1++;
357*e1eccf28SAndroid Build Coastguard Worker         }
358*e1eccf28SAndroid Build Coastguard Worker     }
359*e1eccf28SAndroid Build Coastguard Worker }
360*e1eccf28SAndroid Build Coastguard Worker 
kernelF2(const RsExpandKernelDriverInfo * info,uint32_t xstart,uint32_t xend,uint32_t outstep)361*e1eccf28SAndroid Build Coastguard Worker void RsdCpuScriptIntrinsicConvolve3x3::kernelF2(const RsExpandKernelDriverInfo *info,
362*e1eccf28SAndroid Build Coastguard Worker                                                 uint32_t xstart, uint32_t xend,
363*e1eccf28SAndroid Build Coastguard Worker                                                 uint32_t outstep) {
364*e1eccf28SAndroid Build Coastguard Worker     RsdCpuScriptIntrinsicConvolve3x3 *cp = (RsdCpuScriptIntrinsicConvolve3x3 *)info->usr;
365*e1eccf28SAndroid Build Coastguard Worker 
366*e1eccf28SAndroid Build Coastguard Worker     if (!cp->mAlloc.get()) {
367*e1eccf28SAndroid Build Coastguard Worker         ALOGE("Convolve3x3 executed without input, skipping");
368*e1eccf28SAndroid Build Coastguard Worker         return;
369*e1eccf28SAndroid Build Coastguard Worker     }
370*e1eccf28SAndroid Build Coastguard Worker     const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
371*e1eccf28SAndroid Build Coastguard Worker     const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
372*e1eccf28SAndroid Build Coastguard Worker 
373*e1eccf28SAndroid Build Coastguard Worker     uint32_t y1 = rsMin((int32_t)info->current.y + 1, (int32_t)(info->dim.y-1));
374*e1eccf28SAndroid Build Coastguard Worker     uint32_t y2 = rsMax((int32_t)info->current.y - 1, 0);
375*e1eccf28SAndroid Build Coastguard Worker     const float2 *py0 = (const float2 *)(pin + stride * y2);
376*e1eccf28SAndroid Build Coastguard Worker     const float2 *py1 = (const float2 *)(pin + stride * info->current.y);
377*e1eccf28SAndroid Build Coastguard Worker     const float2 *py2 = (const float2 *)(pin + stride * y1);
378*e1eccf28SAndroid Build Coastguard Worker 
379*e1eccf28SAndroid Build Coastguard Worker     float2 *out = (float2 *)info->outPtr[0];
380*e1eccf28SAndroid Build Coastguard Worker     uint32_t x1 = xstart;
381*e1eccf28SAndroid Build Coastguard Worker     uint32_t x2 = xend;
382*e1eccf28SAndroid Build Coastguard Worker     if(x1 == 0) {
383*e1eccf28SAndroid Build Coastguard Worker         ConvolveOneF2(info, 0, out, py0, py1, py2, cp->mFp);
384*e1eccf28SAndroid Build Coastguard Worker         x1 ++;
385*e1eccf28SAndroid Build Coastguard Worker         out++;
386*e1eccf28SAndroid Build Coastguard Worker     }
387*e1eccf28SAndroid Build Coastguard Worker 
388*e1eccf28SAndroid Build Coastguard Worker     if(x2 > x1) {
389*e1eccf28SAndroid Build Coastguard Worker #if 0//defined(ARCH_ARM_HAVE_NEON)
390*e1eccf28SAndroid Build Coastguard Worker         int32_t len = (x2 - x1 - 1) >> 1;
391*e1eccf28SAndroid Build Coastguard Worker         if(len > 0) {
392*e1eccf28SAndroid Build Coastguard Worker             rsdIntrinsicConvolve3x3_K(out, &py0[x1-1], &py1[x1-1], &py2[x1-1], cp->mIp, len);
393*e1eccf28SAndroid Build Coastguard Worker             x1 += len << 1;
394*e1eccf28SAndroid Build Coastguard Worker             out += len << 1;
395*e1eccf28SAndroid Build Coastguard Worker         }
396*e1eccf28SAndroid Build Coastguard Worker #endif
397*e1eccf28SAndroid Build Coastguard Worker 
398*e1eccf28SAndroid Build Coastguard Worker         while(x1 != x2) {
399*e1eccf28SAndroid Build Coastguard Worker             ConvolveOneF2(info, x1, out, py0, py1, py2, cp->mFp);
400*e1eccf28SAndroid Build Coastguard Worker             out++;
401*e1eccf28SAndroid Build Coastguard Worker             x1++;
402*e1eccf28SAndroid Build Coastguard Worker         }
403*e1eccf28SAndroid Build Coastguard Worker     }
404*e1eccf28SAndroid Build Coastguard Worker }
kernelF1(const RsExpandKernelDriverInfo * info,uint32_t xstart,uint32_t xend,uint32_t outstep)405*e1eccf28SAndroid Build Coastguard Worker void RsdCpuScriptIntrinsicConvolve3x3::kernelF1(const RsExpandKernelDriverInfo *info,
406*e1eccf28SAndroid Build Coastguard Worker                                                 uint32_t xstart, uint32_t xend,
407*e1eccf28SAndroid Build Coastguard Worker                                                 uint32_t outstep) {
408*e1eccf28SAndroid Build Coastguard Worker     RsdCpuScriptIntrinsicConvolve3x3 *cp = (RsdCpuScriptIntrinsicConvolve3x3 *)info->usr;
409*e1eccf28SAndroid Build Coastguard Worker 
410*e1eccf28SAndroid Build Coastguard Worker     if (!cp->mAlloc.get()) {
411*e1eccf28SAndroid Build Coastguard Worker         ALOGE("Convolve3x3 executed without input, skipping");
412*e1eccf28SAndroid Build Coastguard Worker         return;
413*e1eccf28SAndroid Build Coastguard Worker     }
414*e1eccf28SAndroid Build Coastguard Worker     const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
415*e1eccf28SAndroid Build Coastguard Worker     const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
416*e1eccf28SAndroid Build Coastguard Worker 
417*e1eccf28SAndroid Build Coastguard Worker     uint32_t y1 = rsMin((int32_t)info->current.y + 1, (int32_t)(info->dim.y-1));
418*e1eccf28SAndroid Build Coastguard Worker     uint32_t y2 = rsMax((int32_t)info->current.y - 1, 0);
419*e1eccf28SAndroid Build Coastguard Worker     const float *py0 = (const float *)(pin + stride * y2);
420*e1eccf28SAndroid Build Coastguard Worker     const float *py1 = (const float *)(pin + stride * info->current.y);
421*e1eccf28SAndroid Build Coastguard Worker     const float *py2 = (const float *)(pin + stride * y1);
422*e1eccf28SAndroid Build Coastguard Worker 
423*e1eccf28SAndroid Build Coastguard Worker     float *out = (float *)info->outPtr[0];
424*e1eccf28SAndroid Build Coastguard Worker     uint32_t x1 = xstart;
425*e1eccf28SAndroid Build Coastguard Worker     uint32_t x2 = xend;
426*e1eccf28SAndroid Build Coastguard Worker     if(x1 == 0) {
427*e1eccf28SAndroid Build Coastguard Worker         ConvolveOneF1(info, 0, out, py0, py1, py2, cp->mFp);
428*e1eccf28SAndroid Build Coastguard Worker         x1 ++;
429*e1eccf28SAndroid Build Coastguard Worker         out++;
430*e1eccf28SAndroid Build Coastguard Worker     }
431*e1eccf28SAndroid Build Coastguard Worker 
432*e1eccf28SAndroid Build Coastguard Worker     if(x2 > x1) {
433*e1eccf28SAndroid Build Coastguard Worker #if 0//defined(ARCH_ARM_HAVE_NEON)
434*e1eccf28SAndroid Build Coastguard Worker         int32_t len = (x2 - x1 - 1) >> 1;
435*e1eccf28SAndroid Build Coastguard Worker         if(len > 0) {
436*e1eccf28SAndroid Build Coastguard Worker             rsdIntrinsicConvolve3x3_K(out, &py0[x1-1], &py1[x1-1], &py2[x1-1], cp->mIp, len);
437*e1eccf28SAndroid Build Coastguard Worker             x1 += len << 1;
438*e1eccf28SAndroid Build Coastguard Worker             out += len << 1;
439*e1eccf28SAndroid Build Coastguard Worker         }
440*e1eccf28SAndroid Build Coastguard Worker #endif
441*e1eccf28SAndroid Build Coastguard Worker 
442*e1eccf28SAndroid Build Coastguard Worker         while(x1 != x2) {
443*e1eccf28SAndroid Build Coastguard Worker             ConvolveOneF1(info, x1, out, py0, py1, py2, cp->mFp);
444*e1eccf28SAndroid Build Coastguard Worker             out++;
445*e1eccf28SAndroid Build Coastguard Worker             x1++;
446*e1eccf28SAndroid Build Coastguard Worker         }
447*e1eccf28SAndroid Build Coastguard Worker     }
448*e1eccf28SAndroid Build Coastguard Worker }
449*e1eccf28SAndroid Build Coastguard Worker 
RsdCpuScriptIntrinsicConvolve3x3(RsdCpuReferenceImpl * ctx,const Script * s,const Element * e)450*e1eccf28SAndroid Build Coastguard Worker RsdCpuScriptIntrinsicConvolve3x3::RsdCpuScriptIntrinsicConvolve3x3(
451*e1eccf28SAndroid Build Coastguard Worker             RsdCpuReferenceImpl *ctx, const Script *s, const Element *e)
452*e1eccf28SAndroid Build Coastguard Worker             : RsdCpuScriptIntrinsic(ctx, s, e, RS_SCRIPT_INTRINSIC_ID_CONVOLVE_3x3) {
453*e1eccf28SAndroid Build Coastguard Worker 
454*e1eccf28SAndroid Build Coastguard Worker     if (e->getType() == RS_TYPE_FLOAT_32) {
455*e1eccf28SAndroid Build Coastguard Worker         switch(e->getVectorSize()) {
456*e1eccf28SAndroid Build Coastguard Worker         case 1:
457*e1eccf28SAndroid Build Coastguard Worker             mRootPtr = &kernelF1;
458*e1eccf28SAndroid Build Coastguard Worker             break;
459*e1eccf28SAndroid Build Coastguard Worker         case 2:
460*e1eccf28SAndroid Build Coastguard Worker             mRootPtr = &kernelF2;
461*e1eccf28SAndroid Build Coastguard Worker             break;
462*e1eccf28SAndroid Build Coastguard Worker         case 3:
463*e1eccf28SAndroid Build Coastguard Worker         case 4:
464*e1eccf28SAndroid Build Coastguard Worker             mRootPtr = &kernelF4;
465*e1eccf28SAndroid Build Coastguard Worker             break;
466*e1eccf28SAndroid Build Coastguard Worker         }
467*e1eccf28SAndroid Build Coastguard Worker     } else {
468*e1eccf28SAndroid Build Coastguard Worker         switch(e->getVectorSize()) {
469*e1eccf28SAndroid Build Coastguard Worker         case 1:
470*e1eccf28SAndroid Build Coastguard Worker             mRootPtr = &kernelU1;
471*e1eccf28SAndroid Build Coastguard Worker             break;
472*e1eccf28SAndroid Build Coastguard Worker         case 2:
473*e1eccf28SAndroid Build Coastguard Worker             mRootPtr = &kernelU2;
474*e1eccf28SAndroid Build Coastguard Worker             break;
475*e1eccf28SAndroid Build Coastguard Worker         case 3:
476*e1eccf28SAndroid Build Coastguard Worker         case 4:
477*e1eccf28SAndroid Build Coastguard Worker             mRootPtr = &kernelU4;
478*e1eccf28SAndroid Build Coastguard Worker             break;
479*e1eccf28SAndroid Build Coastguard Worker         }
480*e1eccf28SAndroid Build Coastguard Worker     }
481*e1eccf28SAndroid Build Coastguard Worker     for(int ct=0; ct < 9; ct++) {
482*e1eccf28SAndroid Build Coastguard Worker         mFp[ct] = 1.f / 9.f;
483*e1eccf28SAndroid Build Coastguard Worker         mIp[ct] = (int16_t)(mFp[ct] * 256.f + 0.5f);
484*e1eccf28SAndroid Build Coastguard Worker     }
485*e1eccf28SAndroid Build Coastguard Worker }
486*e1eccf28SAndroid Build Coastguard Worker 
~RsdCpuScriptIntrinsicConvolve3x3()487*e1eccf28SAndroid Build Coastguard Worker RsdCpuScriptIntrinsicConvolve3x3::~RsdCpuScriptIntrinsicConvolve3x3() {
488*e1eccf28SAndroid Build Coastguard Worker }
489*e1eccf28SAndroid Build Coastguard Worker 
populateScript(Script * s)490*e1eccf28SAndroid Build Coastguard Worker void RsdCpuScriptIntrinsicConvolve3x3::populateScript(Script *s) {
491*e1eccf28SAndroid Build Coastguard Worker     s->mHal.info.exportedVariableCount = 2;
492*e1eccf28SAndroid Build Coastguard Worker }
493*e1eccf28SAndroid Build Coastguard Worker 
invokeFreeChildren()494*e1eccf28SAndroid Build Coastguard Worker void RsdCpuScriptIntrinsicConvolve3x3::invokeFreeChildren() {
495*e1eccf28SAndroid Build Coastguard Worker     mAlloc.clear();
496*e1eccf28SAndroid Build Coastguard Worker }
497*e1eccf28SAndroid Build Coastguard Worker 
rsdIntrinsic_Convolve3x3(RsdCpuReferenceImpl * ctx,const Script * s,const Element * e)498*e1eccf28SAndroid Build Coastguard Worker RsdCpuScriptImpl * rsdIntrinsic_Convolve3x3(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e) {
499*e1eccf28SAndroid Build Coastguard Worker 
500*e1eccf28SAndroid Build Coastguard Worker     return new RsdCpuScriptIntrinsicConvolve3x3(ctx, s, e);
501*e1eccf28SAndroid Build Coastguard Worker }
502*e1eccf28SAndroid Build Coastguard Worker 
503*e1eccf28SAndroid Build Coastguard Worker } // namespace renderscript
504*e1eccf28SAndroid Build Coastguard Worker } // namespace android
505