xref: /aosp_15_r20/frameworks/rs/cpu_ref/rsCpuIntrinsicBlend.cpp (revision e1eccf28f96817838ad6867f7f39d2351ec11f56)
1 /*
2  * Copyright (C) 2012 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 
18 #include "rsCpuIntrinsic.h"
19 #include "rsCpuIntrinsicInlines.h"
20 
21 namespace android {
22 namespace renderscript {
23 
24 
25 class RsdCpuScriptIntrinsicBlend : public RsdCpuScriptIntrinsic {
26 public:
27     void populateScript(Script *) override;
28 
29     ~RsdCpuScriptIntrinsicBlend() override;
30     RsdCpuScriptIntrinsicBlend(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e);
31 
32 protected:
33     static void kernel(const RsExpandKernelDriverInfo *info, uint32_t xstart,
34                        uint32_t xend, uint32_t outstep);
35 };
36 
37 } // namespace renderscript
38 } // namespace android
39 
40 
41 enum {
42     BLEND_CLEAR = 0,
43     BLEND_SRC = 1,
44     BLEND_DST = 2,
45     BLEND_SRC_OVER = 3,
46     BLEND_DST_OVER = 4,
47     BLEND_SRC_IN = 5,
48     BLEND_DST_IN = 6,
49     BLEND_SRC_OUT = 7,
50     BLEND_DST_OUT = 8,
51     BLEND_SRC_ATOP = 9,
52     BLEND_DST_ATOP = 10,
53     BLEND_XOR = 11,
54 
55     BLEND_NORMAL = 12,
56     BLEND_AVERAGE = 13,
57     BLEND_MULTIPLY = 14,
58     BLEND_SCREEN = 15,
59     BLEND_DARKEN = 16,
60     BLEND_LIGHTEN = 17,
61     BLEND_OVERLAY = 18,
62     BLEND_HARDLIGHT = 19,
63     BLEND_SOFTLIGHT = 20,
64     BLEND_DIFFERENCE = 21,
65     BLEND_NEGATION = 22,
66     BLEND_EXCLUSION = 23,
67     BLEND_COLOR_DODGE = 24,
68     BLEND_INVERSE_COLOR_DODGE = 25,
69     BLEND_SOFT_DODGE = 26,
70     BLEND_COLOR_BURN = 27,
71     BLEND_INVERSE_COLOR_BURN = 28,
72     BLEND_SOFT_BURN = 29,
73     BLEND_REFLECT = 30,
74     BLEND_GLOW = 31,
75     BLEND_FREEZE = 32,
76     BLEND_HEAT = 33,
77     BLEND_ADD = 34,
78     BLEND_SUBTRACT = 35,
79     BLEND_STAMP = 36,
80     BLEND_RED = 37,
81     BLEND_GREEN = 38,
82     BLEND_BLUE = 39,
83     BLEND_HUE = 40,
84     BLEND_SATURATION = 41,
85     BLEND_COLOR = 42,
86     BLEND_LUMINOSITY = 43
87 };
88 
89 #if defined(ARCH_ARM_USE_INTRINSICS)
90 extern "C" int rsdIntrinsicBlend_K(uchar4 *out, uchar4 const *in, int slot,
91                     uint32_t xstart, uint32_t xend);
92 #endif
93 
94 #if defined(ARCH_X86_HAVE_SSSE3)
95 extern void rsdIntrinsicBlendSrcOver_K(void *dst, const void *src, uint32_t count8);
96 extern void rsdIntrinsicBlendDstOver_K(void *dst, const void *src, uint32_t count8);
97 extern void rsdIntrinsicBlendSrcIn_K(void *dst, const void *src, uint32_t count8);
98 extern void rsdIntrinsicBlendDstIn_K(void *dst, const void *src, uint32_t count8);
99 extern void rsdIntrinsicBlendSrcOut_K(void *dst, const void *src, uint32_t count8);
100 extern void rsdIntrinsicBlendDstOut_K(void *dst, const void *src, uint32_t count8);
101 extern void rsdIntrinsicBlendSrcAtop_K(void *dst, const void *src, uint32_t count8);
102 extern void rsdIntrinsicBlendDstAtop_K(void *dst, const void *src, uint32_t count8);
103 extern void rsdIntrinsicBlendXor_K(void *dst, const void *src, uint32_t count8);
104 extern void rsdIntrinsicBlendMultiply_K(void *dst, const void *src, uint32_t count8);
105 extern void rsdIntrinsicBlendAdd_K(void *dst, const void *src, uint32_t count8);
106 extern void rsdIntrinsicBlendSub_K(void *dst, const void *src, uint32_t count8);
107 #endif
108 
109 namespace android {
110 namespace renderscript {
111 
112 // Convert vector to uchar4, clipping each value to 255.
113 template <typename TI>
convertClipped(TI amount)114 static inline uchar4 convertClipped(TI amount) {
115     return uchar4 { static_cast<uchar>(amount.x > 255 ? 255 : amount.x),
116                     static_cast<uchar>(amount.y > 255 ? 255 : amount.y),
117                     static_cast<uchar>(amount.z > 255 ? 255 : amount.z),
118                     static_cast<uchar>(amount.w > 255 ? 255 : amount.w)};
119 }
120 
kernel(const RsExpandKernelDriverInfo * info,uint32_t xstart,uint32_t xend,uint32_t outstep)121 void RsdCpuScriptIntrinsicBlend::kernel(const RsExpandKernelDriverInfo *info,
122                                         uint32_t xstart, uint32_t xend,
123                                         uint32_t outstep) {
124     // instep/outstep can be ignored--sizeof(uchar4) known at compile time
125     uchar4 *out = (uchar4 *)info->outPtr[0];
126     uchar4 *in = (uchar4 *)info->inPtr[0];
127     uint32_t x1 = xstart;
128     uint32_t x2 = xend;
129 
130 #if defined(ARCH_ARM_USE_INTRINSICS)
131     if (gArchUseSIMD) {
132         if (rsdIntrinsicBlend_K(out, in, info->slot, 0, x2 - x1) >= 0) {
133             return;
134         } else {
135             ALOGW("Intrinsic Blend failed to use SIMD for %d", info->slot);
136         }
137     }
138 #endif
139     switch (info->slot) {
140     case BLEND_CLEAR:
141         for (;x1 < x2; x1++, out++) {
142             *out = 0;
143         }
144         break;
145     case BLEND_SRC:
146         for (;x1 < x2; x1++, out++, in++) {
147           *out = *in;
148         }
149         break;
150     //BLEND_DST is a NOP
151     case BLEND_DST:
152         break;
153     case BLEND_SRC_OVER:
154     #if defined(ARCH_X86_HAVE_SSSE3)
155         if (gArchUseSIMD) {
156             if ((x1 + 8) < x2) {
157                 uint32_t len = (x2 - x1) >> 3;
158                 rsdIntrinsicBlendSrcOver_K(out, in, len);
159                 x1 += len << 3;
160                 out += len << 3;
161                 in += len << 3;
162             }
163         }
164     #endif
165         for (;x1 < x2; x1++, out++, in++) {
166             ushort4 in_s = convert_ushort4(*in);
167             ushort4 out_s = convert_ushort4(*out);
168             in_s = in_s + ((out_s * (ushort4)(255 - in_s.w)) >> (ushort4)8);
169             *out = convertClipped(in_s);
170         }
171         break;
172     case BLEND_DST_OVER:
173     #if defined(ARCH_X86_HAVE_SSSE3)
174         if (gArchUseSIMD) {
175             if ((x1 + 8) < x2) {
176                 uint32_t len = (x2 - x1) >> 3;
177                 rsdIntrinsicBlendDstOver_K(out, in, len);
178                 x1 += len << 3;
179                 out += len << 3;
180                 in += len << 3;
181             }
182         }
183      #endif
184         for (;x1 < x2; x1++, out++, in++) {
185             ushort4 in_s = convert_ushort4(*in);
186             ushort4 out_s = convert_ushort4(*out);
187             in_s = out_s + ((in_s * (ushort4)(255 - out_s.w)) >> (ushort4)8);
188             *out = convertClipped(in_s);
189         }
190         break;
191     case BLEND_SRC_IN:
192     #if defined(ARCH_X86_HAVE_SSSE3)
193         if (gArchUseSIMD) {
194             if ((x1 + 8) < x2) {
195                 uint32_t len = (x2 - x1) >> 3;
196                 rsdIntrinsicBlendSrcIn_K(out, in, len);
197                 x1 += len << 3;
198                 out += len << 3;
199                 in += len << 3;
200             }
201         }
202     #endif
203         for (;x1 < x2; x1++, out++, in++) {
204             ushort4 in_s = convert_ushort4(*in);
205             in_s = (in_s * out->w) >> (ushort4)8;
206             *out = convert_uchar4(in_s);
207         }
208         break;
209     case BLEND_DST_IN:
210     #if defined(ARCH_X86_HAVE_SSSE3)
211         if (gArchUseSIMD) {
212             if ((x1 + 8) < x2) {
213                 uint32_t len = (x2 - x1) >> 3;
214                 rsdIntrinsicBlendDstIn_K(out, in, len);
215                 x1 += len << 3;
216                 out += len << 3;
217                 in += len << 3;
218             }
219         }
220      #endif
221         for (;x1 < x2; x1++, out++, in++) {
222             short4 out_s = convert_short4(*out);
223             out_s = (out_s * in->w) >> (short4)8;
224             *out = convert_uchar4(out_s);
225         }
226         break;
227     case BLEND_SRC_OUT:
228     #if defined(ARCH_X86_HAVE_SSSE3)
229         if (gArchUseSIMD) {
230             if ((x1 + 8) < x2) {
231                 uint32_t len = (x2 - x1) >> 3;
232                 rsdIntrinsicBlendSrcOut_K(out, in, len);
233                 x1 += len << 3;
234                 out += len << 3;
235                 in += len << 3;
236             }
237         }
238     #endif
239         for (;x1 < x2; x1++, out++, in++) {
240             short4 in_s = convert_short4(*in);
241             in_s = (in_s * (short4)(255 - out->w)) >> (short4)8;
242             *out = convert_uchar4(in_s);
243         }
244         break;
245     case BLEND_DST_OUT:
246     #if defined(ARCH_X86_HAVE_SSSE3)
247         if (gArchUseSIMD) {
248             if ((x1 + 8) < x2) {
249                 uint32_t len = (x2 - x1) >> 3;
250                 rsdIntrinsicBlendDstOut_K(out, in, len);
251                 x1 += len << 3;
252                 out += len << 3;
253                 in += len << 3;
254             }
255         }
256     #endif
257         for (;x1 < x2; x1++, out++, in++) {
258             short4 out_s = convert_short4(*out);
259             out_s = (out_s * (short4)(255 - in->w)) >> (short4)8;
260             *out = convert_uchar4(out_s);
261         }
262         break;
263     case BLEND_SRC_ATOP:
264     #if defined(ARCH_X86_HAVE_SSSE3)
265         if (gArchUseSIMD) {
266             if ((x1 + 8) < x2) {
267                 uint32_t len = (x2 - x1) >> 3;
268                 rsdIntrinsicBlendSrcAtop_K(out, in, len);
269                 x1 += len << 3;
270                 out += len << 3;
271                 in += len << 3;
272             }
273         }
274     #endif
275         for (;x1 < x2; x1++, out++, in++) {
276             // The max value the operation could produce before the shift
277             // is 255 * 255 + 255 * (255 - 0) = 130050, or 0x1FC02.
278             // That value does not fit in a ushort, so we use uint.
279             uint4 in_s = convert_uint4(*in);
280             uint4 out_s = convert_uint4(*out);
281             out_s.xyz = ((in_s.xyz * out_s.w) +
282               (out_s.xyz * ((uint3)255 - (uint3)in_s.w))) >> (uint3)8;
283             *out = convertClipped(out_s);
284         }
285         break;
286     case BLEND_DST_ATOP:
287     #if defined(ARCH_X86_HAVE_SSSE3)
288         if (gArchUseSIMD) {
289             if ((x1 + 8) < x2) {
290                 uint32_t len = (x2 - x1) >> 3;
291                 rsdIntrinsicBlendDstAtop_K(out, in, len);
292                 x1 += len << 3;
293                 out += len << 3;
294                 in += len << 3;
295             }
296         }
297      #endif
298         for (;x1 < x2; x1++, out++, in++) {
299             uint4 in_s = convert_uint4(*in);
300             uint4 out_s = convert_uint4(*out);
301             out_s.xyz = ((out_s.xyz * in_s.w) +
302               (in_s.xyz * ((uint3)255 - (uint3)out_s.w))) >> (uint3)8;
303             out_s.w = in_s.w;
304             *out = convertClipped(out_s);
305         }
306         break;
307     case BLEND_XOR:
308     #if defined(ARCH_X86_HAVE_SSSE3)
309         if (gArchUseSIMD) {
310             if ((x1 + 8) < x2) {
311                 uint32_t len = (x2 - x1) >> 3;
312                 rsdIntrinsicBlendXor_K(out, in, len);
313                 x1 += len << 3;
314                 out += len << 3;
315                 in += len << 3;
316             }
317         }
318     #endif
319         for (;x1 < x2; x1++, out++, in++) {
320             *out = *in ^ *out;
321         }
322         break;
323     case BLEND_NORMAL:
324         ALOGE("Called unimplemented blend intrinsic BLEND_NORMAL");
325         rsAssert(false);
326         break;
327     case BLEND_AVERAGE:
328         ALOGE("Called unimplemented blend intrinsic BLEND_AVERAGE");
329         rsAssert(false);
330         break;
331     case BLEND_MULTIPLY:
332     #if defined(ARCH_X86_HAVE_SSSE3)
333         if (gArchUseSIMD) {
334             if ((x1 + 8) < x2) {
335                 uint32_t len = (x2 - x1) >> 3;
336                 rsdIntrinsicBlendMultiply_K(out, in, len);
337                 x1 += len << 3;
338                 out += len << 3;
339                 in += len << 3;
340             }
341         }
342     #endif
343         for (;x1 < x2; x1++, out++, in++) {
344           *out = convert_uchar4((convert_short4(*in) * convert_short4(*out))
345                                 >> (short4)8);
346         }
347         break;
348     case BLEND_SCREEN:
349         ALOGE("Called unimplemented blend intrinsic BLEND_SCREEN");
350         rsAssert(false);
351         break;
352     case BLEND_DARKEN:
353         ALOGE("Called unimplemented blend intrinsic BLEND_DARKEN");
354         rsAssert(false);
355         break;
356     case BLEND_LIGHTEN:
357         ALOGE("Called unimplemented blend intrinsic BLEND_LIGHTEN");
358         rsAssert(false);
359         break;
360     case BLEND_OVERLAY:
361         ALOGE("Called unimplemented blend intrinsic BLEND_OVERLAY");
362         rsAssert(false);
363         break;
364     case BLEND_HARDLIGHT:
365         ALOGE("Called unimplemented blend intrinsic BLEND_HARDLIGHT");
366         rsAssert(false);
367         break;
368     case BLEND_SOFTLIGHT:
369         ALOGE("Called unimplemented blend intrinsic BLEND_SOFTLIGHT");
370         rsAssert(false);
371         break;
372     case BLEND_DIFFERENCE:
373         ALOGE("Called unimplemented blend intrinsic BLEND_DIFFERENCE");
374         rsAssert(false);
375         break;
376     case BLEND_NEGATION:
377         ALOGE("Called unimplemented blend intrinsic BLEND_NEGATION");
378         rsAssert(false);
379         break;
380     case BLEND_EXCLUSION:
381         ALOGE("Called unimplemented blend intrinsic BLEND_EXCLUSION");
382         rsAssert(false);
383         break;
384     case BLEND_COLOR_DODGE:
385         ALOGE("Called unimplemented blend intrinsic BLEND_COLOR_DODGE");
386         rsAssert(false);
387         break;
388     case BLEND_INVERSE_COLOR_DODGE:
389         ALOGE("Called unimplemented blend intrinsic BLEND_INVERSE_COLOR_DODGE");
390         rsAssert(false);
391         break;
392     case BLEND_SOFT_DODGE:
393         ALOGE("Called unimplemented blend intrinsic BLEND_SOFT_DODGE");
394         rsAssert(false);
395         break;
396     case BLEND_COLOR_BURN:
397         ALOGE("Called unimplemented blend intrinsic BLEND_COLOR_BURN");
398         rsAssert(false);
399         break;
400     case BLEND_INVERSE_COLOR_BURN:
401         ALOGE("Called unimplemented blend intrinsic BLEND_INVERSE_COLOR_BURN");
402         rsAssert(false);
403         break;
404     case BLEND_SOFT_BURN:
405         ALOGE("Called unimplemented blend intrinsic BLEND_SOFT_BURN");
406         rsAssert(false);
407         break;
408     case BLEND_REFLECT:
409         ALOGE("Called unimplemented blend intrinsic BLEND_REFLECT");
410         rsAssert(false);
411         break;
412     case BLEND_GLOW:
413         ALOGE("Called unimplemented blend intrinsic BLEND_GLOW");
414         rsAssert(false);
415         break;
416     case BLEND_FREEZE:
417         ALOGE("Called unimplemented blend intrinsic BLEND_FREEZE");
418         rsAssert(false);
419         break;
420     case BLEND_HEAT:
421         ALOGE("Called unimplemented blend intrinsic BLEND_HEAT");
422         rsAssert(false);
423         break;
424     case BLEND_ADD:
425     #if defined(ARCH_X86_HAVE_SSSE3)
426         if (gArchUseSIMD) {
427             if((x1 + 8) < x2) {
428                 uint32_t len = (x2 - x1) >> 3;
429                 rsdIntrinsicBlendAdd_K(out, in, len);
430                 x1 += len << 3;
431                 out += len << 3;
432                 in += len << 3;
433             }
434         }
435     #endif
436         for (;x1 < x2; x1++, out++, in++) {
437             uint32_t iR = in->x, iG = in->y, iB = in->z, iA = in->w,
438                 oR = out->x, oG = out->y, oB = out->z, oA = out->w;
439             out->x = (oR + iR) > 255 ? 255 : oR + iR;
440             out->y = (oG + iG) > 255 ? 255 : oG + iG;
441             out->z = (oB + iB) > 255 ? 255 : oB + iB;
442             out->w = (oA + iA) > 255 ? 255 : oA + iA;
443         }
444         break;
445     case BLEND_SUBTRACT:
446     #if defined(ARCH_X86_HAVE_SSSE3)
447         if (gArchUseSIMD) {
448             if((x1 + 8) < x2) {
449                 uint32_t len = (x2 - x1) >> 3;
450                 rsdIntrinsicBlendSub_K(out, in, len);
451                 x1 += len << 3;
452                 out += len << 3;
453                 in += len << 3;
454             }
455         }
456     #endif
457         for (;x1 < x2; x1++, out++, in++) {
458             int32_t iR = in->x, iG = in->y, iB = in->z, iA = in->w,
459                 oR = out->x, oG = out->y, oB = out->z, oA = out->w;
460             out->x = (oR - iR) < 0 ? 0 : oR - iR;
461             out->y = (oG - iG) < 0 ? 0 : oG - iG;
462             out->z = (oB - iB) < 0 ? 0 : oB - iB;
463             out->w = (oA - iA) < 0 ? 0 : oA - iA;
464         }
465         break;
466     case BLEND_STAMP:
467         ALOGE("Called unimplemented blend intrinsic BLEND_STAMP");
468         rsAssert(false);
469         break;
470     case BLEND_RED:
471         ALOGE("Called unimplemented blend intrinsic BLEND_RED");
472         rsAssert(false);
473         break;
474     case BLEND_GREEN:
475         ALOGE("Called unimplemented blend intrinsic BLEND_GREEN");
476         rsAssert(false);
477         break;
478     case BLEND_BLUE:
479         ALOGE("Called unimplemented blend intrinsic BLEND_BLUE");
480         rsAssert(false);
481         break;
482     case BLEND_HUE:
483         ALOGE("Called unimplemented blend intrinsic BLEND_HUE");
484         rsAssert(false);
485         break;
486     case BLEND_SATURATION:
487         ALOGE("Called unimplemented blend intrinsic BLEND_SATURATION");
488         rsAssert(false);
489         break;
490     case BLEND_COLOR:
491         ALOGE("Called unimplemented blend intrinsic BLEND_COLOR");
492         rsAssert(false);
493         break;
494     case BLEND_LUMINOSITY:
495         ALOGE("Called unimplemented blend intrinsic BLEND_LUMINOSITY");
496         rsAssert(false);
497         break;
498 
499     default:
500         ALOGE("Called unimplemented value %d", info->slot);
501         rsAssert(false);
502 
503     }
504 }
505 
506 
RsdCpuScriptIntrinsicBlend(RsdCpuReferenceImpl * ctx,const Script * s,const Element * e)507 RsdCpuScriptIntrinsicBlend::RsdCpuScriptIntrinsicBlend(RsdCpuReferenceImpl *ctx,
508                                                        const Script *s, const Element *e)
509             : RsdCpuScriptIntrinsic(ctx, s, e, RS_SCRIPT_INTRINSIC_ID_BLEND) {
510 
511     mRootPtr = &kernel;
512 }
513 
~RsdCpuScriptIntrinsicBlend()514 RsdCpuScriptIntrinsicBlend::~RsdCpuScriptIntrinsicBlend() {
515 }
516 
populateScript(Script * s)517 void RsdCpuScriptIntrinsicBlend::populateScript(Script *s) {
518     s->mHal.info.exportedVariableCount = 0;
519 }
520 
rsdIntrinsic_Blend(RsdCpuReferenceImpl * ctx,const Script * s,const Element * e)521 RsdCpuScriptImpl * rsdIntrinsic_Blend(RsdCpuReferenceImpl *ctx,
522                                       const Script *s, const Element *e) {
523     return new RsdCpuScriptIntrinsicBlend(ctx, s, e);
524 }
525 
526 } // namespace renderscript
527 } // namespace android
528