1 /*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17
18 #include "rsCpuIntrinsic.h"
19 #include "rsCpuIntrinsicInlines.h"
20
21 namespace android {
22 namespace renderscript {
23
24
25 class RsdCpuScriptIntrinsicBlend : public RsdCpuScriptIntrinsic {
26 public:
27 void populateScript(Script *) override;
28
29 ~RsdCpuScriptIntrinsicBlend() override;
30 RsdCpuScriptIntrinsicBlend(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e);
31
32 protected:
33 static void kernel(const RsExpandKernelDriverInfo *info, uint32_t xstart,
34 uint32_t xend, uint32_t outstep);
35 };
36
37 } // namespace renderscript
38 } // namespace android
39
40
41 enum {
42 BLEND_CLEAR = 0,
43 BLEND_SRC = 1,
44 BLEND_DST = 2,
45 BLEND_SRC_OVER = 3,
46 BLEND_DST_OVER = 4,
47 BLEND_SRC_IN = 5,
48 BLEND_DST_IN = 6,
49 BLEND_SRC_OUT = 7,
50 BLEND_DST_OUT = 8,
51 BLEND_SRC_ATOP = 9,
52 BLEND_DST_ATOP = 10,
53 BLEND_XOR = 11,
54
55 BLEND_NORMAL = 12,
56 BLEND_AVERAGE = 13,
57 BLEND_MULTIPLY = 14,
58 BLEND_SCREEN = 15,
59 BLEND_DARKEN = 16,
60 BLEND_LIGHTEN = 17,
61 BLEND_OVERLAY = 18,
62 BLEND_HARDLIGHT = 19,
63 BLEND_SOFTLIGHT = 20,
64 BLEND_DIFFERENCE = 21,
65 BLEND_NEGATION = 22,
66 BLEND_EXCLUSION = 23,
67 BLEND_COLOR_DODGE = 24,
68 BLEND_INVERSE_COLOR_DODGE = 25,
69 BLEND_SOFT_DODGE = 26,
70 BLEND_COLOR_BURN = 27,
71 BLEND_INVERSE_COLOR_BURN = 28,
72 BLEND_SOFT_BURN = 29,
73 BLEND_REFLECT = 30,
74 BLEND_GLOW = 31,
75 BLEND_FREEZE = 32,
76 BLEND_HEAT = 33,
77 BLEND_ADD = 34,
78 BLEND_SUBTRACT = 35,
79 BLEND_STAMP = 36,
80 BLEND_RED = 37,
81 BLEND_GREEN = 38,
82 BLEND_BLUE = 39,
83 BLEND_HUE = 40,
84 BLEND_SATURATION = 41,
85 BLEND_COLOR = 42,
86 BLEND_LUMINOSITY = 43
87 };
88
89 #if defined(ARCH_ARM_USE_INTRINSICS)
90 extern "C" int rsdIntrinsicBlend_K(uchar4 *out, uchar4 const *in, int slot,
91 uint32_t xstart, uint32_t xend);
92 #endif
93
94 #if defined(ARCH_X86_HAVE_SSSE3)
95 extern void rsdIntrinsicBlendSrcOver_K(void *dst, const void *src, uint32_t count8);
96 extern void rsdIntrinsicBlendDstOver_K(void *dst, const void *src, uint32_t count8);
97 extern void rsdIntrinsicBlendSrcIn_K(void *dst, const void *src, uint32_t count8);
98 extern void rsdIntrinsicBlendDstIn_K(void *dst, const void *src, uint32_t count8);
99 extern void rsdIntrinsicBlendSrcOut_K(void *dst, const void *src, uint32_t count8);
100 extern void rsdIntrinsicBlendDstOut_K(void *dst, const void *src, uint32_t count8);
101 extern void rsdIntrinsicBlendSrcAtop_K(void *dst, const void *src, uint32_t count8);
102 extern void rsdIntrinsicBlendDstAtop_K(void *dst, const void *src, uint32_t count8);
103 extern void rsdIntrinsicBlendXor_K(void *dst, const void *src, uint32_t count8);
104 extern void rsdIntrinsicBlendMultiply_K(void *dst, const void *src, uint32_t count8);
105 extern void rsdIntrinsicBlendAdd_K(void *dst, const void *src, uint32_t count8);
106 extern void rsdIntrinsicBlendSub_K(void *dst, const void *src, uint32_t count8);
107 #endif
108
109 namespace android {
110 namespace renderscript {
111
112 // Convert vector to uchar4, clipping each value to 255.
113 template <typename TI>
convertClipped(TI amount)114 static inline uchar4 convertClipped(TI amount) {
115 return uchar4 { static_cast<uchar>(amount.x > 255 ? 255 : amount.x),
116 static_cast<uchar>(amount.y > 255 ? 255 : amount.y),
117 static_cast<uchar>(amount.z > 255 ? 255 : amount.z),
118 static_cast<uchar>(amount.w > 255 ? 255 : amount.w)};
119 }
120
kernel(const RsExpandKernelDriverInfo * info,uint32_t xstart,uint32_t xend,uint32_t outstep)121 void RsdCpuScriptIntrinsicBlend::kernel(const RsExpandKernelDriverInfo *info,
122 uint32_t xstart, uint32_t xend,
123 uint32_t outstep) {
124 // instep/outstep can be ignored--sizeof(uchar4) known at compile time
125 uchar4 *out = (uchar4 *)info->outPtr[0];
126 uchar4 *in = (uchar4 *)info->inPtr[0];
127 uint32_t x1 = xstart;
128 uint32_t x2 = xend;
129
130 #if defined(ARCH_ARM_USE_INTRINSICS)
131 if (gArchUseSIMD) {
132 if (rsdIntrinsicBlend_K(out, in, info->slot, 0, x2 - x1) >= 0) {
133 return;
134 } else {
135 ALOGW("Intrinsic Blend failed to use SIMD for %d", info->slot);
136 }
137 }
138 #endif
139 switch (info->slot) {
140 case BLEND_CLEAR:
141 for (;x1 < x2; x1++, out++) {
142 *out = 0;
143 }
144 break;
145 case BLEND_SRC:
146 for (;x1 < x2; x1++, out++, in++) {
147 *out = *in;
148 }
149 break;
150 //BLEND_DST is a NOP
151 case BLEND_DST:
152 break;
153 case BLEND_SRC_OVER:
154 #if defined(ARCH_X86_HAVE_SSSE3)
155 if (gArchUseSIMD) {
156 if ((x1 + 8) < x2) {
157 uint32_t len = (x2 - x1) >> 3;
158 rsdIntrinsicBlendSrcOver_K(out, in, len);
159 x1 += len << 3;
160 out += len << 3;
161 in += len << 3;
162 }
163 }
164 #endif
165 for (;x1 < x2; x1++, out++, in++) {
166 ushort4 in_s = convert_ushort4(*in);
167 ushort4 out_s = convert_ushort4(*out);
168 in_s = in_s + ((out_s * (ushort4)(255 - in_s.w)) >> (ushort4)8);
169 *out = convertClipped(in_s);
170 }
171 break;
172 case BLEND_DST_OVER:
173 #if defined(ARCH_X86_HAVE_SSSE3)
174 if (gArchUseSIMD) {
175 if ((x1 + 8) < x2) {
176 uint32_t len = (x2 - x1) >> 3;
177 rsdIntrinsicBlendDstOver_K(out, in, len);
178 x1 += len << 3;
179 out += len << 3;
180 in += len << 3;
181 }
182 }
183 #endif
184 for (;x1 < x2; x1++, out++, in++) {
185 ushort4 in_s = convert_ushort4(*in);
186 ushort4 out_s = convert_ushort4(*out);
187 in_s = out_s + ((in_s * (ushort4)(255 - out_s.w)) >> (ushort4)8);
188 *out = convertClipped(in_s);
189 }
190 break;
191 case BLEND_SRC_IN:
192 #if defined(ARCH_X86_HAVE_SSSE3)
193 if (gArchUseSIMD) {
194 if ((x1 + 8) < x2) {
195 uint32_t len = (x2 - x1) >> 3;
196 rsdIntrinsicBlendSrcIn_K(out, in, len);
197 x1 += len << 3;
198 out += len << 3;
199 in += len << 3;
200 }
201 }
202 #endif
203 for (;x1 < x2; x1++, out++, in++) {
204 ushort4 in_s = convert_ushort4(*in);
205 in_s = (in_s * out->w) >> (ushort4)8;
206 *out = convert_uchar4(in_s);
207 }
208 break;
209 case BLEND_DST_IN:
210 #if defined(ARCH_X86_HAVE_SSSE3)
211 if (gArchUseSIMD) {
212 if ((x1 + 8) < x2) {
213 uint32_t len = (x2 - x1) >> 3;
214 rsdIntrinsicBlendDstIn_K(out, in, len);
215 x1 += len << 3;
216 out += len << 3;
217 in += len << 3;
218 }
219 }
220 #endif
221 for (;x1 < x2; x1++, out++, in++) {
222 short4 out_s = convert_short4(*out);
223 out_s = (out_s * in->w) >> (short4)8;
224 *out = convert_uchar4(out_s);
225 }
226 break;
227 case BLEND_SRC_OUT:
228 #if defined(ARCH_X86_HAVE_SSSE3)
229 if (gArchUseSIMD) {
230 if ((x1 + 8) < x2) {
231 uint32_t len = (x2 - x1) >> 3;
232 rsdIntrinsicBlendSrcOut_K(out, in, len);
233 x1 += len << 3;
234 out += len << 3;
235 in += len << 3;
236 }
237 }
238 #endif
239 for (;x1 < x2; x1++, out++, in++) {
240 short4 in_s = convert_short4(*in);
241 in_s = (in_s * (short4)(255 - out->w)) >> (short4)8;
242 *out = convert_uchar4(in_s);
243 }
244 break;
245 case BLEND_DST_OUT:
246 #if defined(ARCH_X86_HAVE_SSSE3)
247 if (gArchUseSIMD) {
248 if ((x1 + 8) < x2) {
249 uint32_t len = (x2 - x1) >> 3;
250 rsdIntrinsicBlendDstOut_K(out, in, len);
251 x1 += len << 3;
252 out += len << 3;
253 in += len << 3;
254 }
255 }
256 #endif
257 for (;x1 < x2; x1++, out++, in++) {
258 short4 out_s = convert_short4(*out);
259 out_s = (out_s * (short4)(255 - in->w)) >> (short4)8;
260 *out = convert_uchar4(out_s);
261 }
262 break;
263 case BLEND_SRC_ATOP:
264 #if defined(ARCH_X86_HAVE_SSSE3)
265 if (gArchUseSIMD) {
266 if ((x1 + 8) < x2) {
267 uint32_t len = (x2 - x1) >> 3;
268 rsdIntrinsicBlendSrcAtop_K(out, in, len);
269 x1 += len << 3;
270 out += len << 3;
271 in += len << 3;
272 }
273 }
274 #endif
275 for (;x1 < x2; x1++, out++, in++) {
276 // The max value the operation could produce before the shift
277 // is 255 * 255 + 255 * (255 - 0) = 130050, or 0x1FC02.
278 // That value does not fit in a ushort, so we use uint.
279 uint4 in_s = convert_uint4(*in);
280 uint4 out_s = convert_uint4(*out);
281 out_s.xyz = ((in_s.xyz * out_s.w) +
282 (out_s.xyz * ((uint3)255 - (uint3)in_s.w))) >> (uint3)8;
283 *out = convertClipped(out_s);
284 }
285 break;
286 case BLEND_DST_ATOP:
287 #if defined(ARCH_X86_HAVE_SSSE3)
288 if (gArchUseSIMD) {
289 if ((x1 + 8) < x2) {
290 uint32_t len = (x2 - x1) >> 3;
291 rsdIntrinsicBlendDstAtop_K(out, in, len);
292 x1 += len << 3;
293 out += len << 3;
294 in += len << 3;
295 }
296 }
297 #endif
298 for (;x1 < x2; x1++, out++, in++) {
299 uint4 in_s = convert_uint4(*in);
300 uint4 out_s = convert_uint4(*out);
301 out_s.xyz = ((out_s.xyz * in_s.w) +
302 (in_s.xyz * ((uint3)255 - (uint3)out_s.w))) >> (uint3)8;
303 out_s.w = in_s.w;
304 *out = convertClipped(out_s);
305 }
306 break;
307 case BLEND_XOR:
308 #if defined(ARCH_X86_HAVE_SSSE3)
309 if (gArchUseSIMD) {
310 if ((x1 + 8) < x2) {
311 uint32_t len = (x2 - x1) >> 3;
312 rsdIntrinsicBlendXor_K(out, in, len);
313 x1 += len << 3;
314 out += len << 3;
315 in += len << 3;
316 }
317 }
318 #endif
319 for (;x1 < x2; x1++, out++, in++) {
320 *out = *in ^ *out;
321 }
322 break;
323 case BLEND_NORMAL:
324 ALOGE("Called unimplemented blend intrinsic BLEND_NORMAL");
325 rsAssert(false);
326 break;
327 case BLEND_AVERAGE:
328 ALOGE("Called unimplemented blend intrinsic BLEND_AVERAGE");
329 rsAssert(false);
330 break;
331 case BLEND_MULTIPLY:
332 #if defined(ARCH_X86_HAVE_SSSE3)
333 if (gArchUseSIMD) {
334 if ((x1 + 8) < x2) {
335 uint32_t len = (x2 - x1) >> 3;
336 rsdIntrinsicBlendMultiply_K(out, in, len);
337 x1 += len << 3;
338 out += len << 3;
339 in += len << 3;
340 }
341 }
342 #endif
343 for (;x1 < x2; x1++, out++, in++) {
344 *out = convert_uchar4((convert_short4(*in) * convert_short4(*out))
345 >> (short4)8);
346 }
347 break;
348 case BLEND_SCREEN:
349 ALOGE("Called unimplemented blend intrinsic BLEND_SCREEN");
350 rsAssert(false);
351 break;
352 case BLEND_DARKEN:
353 ALOGE("Called unimplemented blend intrinsic BLEND_DARKEN");
354 rsAssert(false);
355 break;
356 case BLEND_LIGHTEN:
357 ALOGE("Called unimplemented blend intrinsic BLEND_LIGHTEN");
358 rsAssert(false);
359 break;
360 case BLEND_OVERLAY:
361 ALOGE("Called unimplemented blend intrinsic BLEND_OVERLAY");
362 rsAssert(false);
363 break;
364 case BLEND_HARDLIGHT:
365 ALOGE("Called unimplemented blend intrinsic BLEND_HARDLIGHT");
366 rsAssert(false);
367 break;
368 case BLEND_SOFTLIGHT:
369 ALOGE("Called unimplemented blend intrinsic BLEND_SOFTLIGHT");
370 rsAssert(false);
371 break;
372 case BLEND_DIFFERENCE:
373 ALOGE("Called unimplemented blend intrinsic BLEND_DIFFERENCE");
374 rsAssert(false);
375 break;
376 case BLEND_NEGATION:
377 ALOGE("Called unimplemented blend intrinsic BLEND_NEGATION");
378 rsAssert(false);
379 break;
380 case BLEND_EXCLUSION:
381 ALOGE("Called unimplemented blend intrinsic BLEND_EXCLUSION");
382 rsAssert(false);
383 break;
384 case BLEND_COLOR_DODGE:
385 ALOGE("Called unimplemented blend intrinsic BLEND_COLOR_DODGE");
386 rsAssert(false);
387 break;
388 case BLEND_INVERSE_COLOR_DODGE:
389 ALOGE("Called unimplemented blend intrinsic BLEND_INVERSE_COLOR_DODGE");
390 rsAssert(false);
391 break;
392 case BLEND_SOFT_DODGE:
393 ALOGE("Called unimplemented blend intrinsic BLEND_SOFT_DODGE");
394 rsAssert(false);
395 break;
396 case BLEND_COLOR_BURN:
397 ALOGE("Called unimplemented blend intrinsic BLEND_COLOR_BURN");
398 rsAssert(false);
399 break;
400 case BLEND_INVERSE_COLOR_BURN:
401 ALOGE("Called unimplemented blend intrinsic BLEND_INVERSE_COLOR_BURN");
402 rsAssert(false);
403 break;
404 case BLEND_SOFT_BURN:
405 ALOGE("Called unimplemented blend intrinsic BLEND_SOFT_BURN");
406 rsAssert(false);
407 break;
408 case BLEND_REFLECT:
409 ALOGE("Called unimplemented blend intrinsic BLEND_REFLECT");
410 rsAssert(false);
411 break;
412 case BLEND_GLOW:
413 ALOGE("Called unimplemented blend intrinsic BLEND_GLOW");
414 rsAssert(false);
415 break;
416 case BLEND_FREEZE:
417 ALOGE("Called unimplemented blend intrinsic BLEND_FREEZE");
418 rsAssert(false);
419 break;
420 case BLEND_HEAT:
421 ALOGE("Called unimplemented blend intrinsic BLEND_HEAT");
422 rsAssert(false);
423 break;
424 case BLEND_ADD:
425 #if defined(ARCH_X86_HAVE_SSSE3)
426 if (gArchUseSIMD) {
427 if((x1 + 8) < x2) {
428 uint32_t len = (x2 - x1) >> 3;
429 rsdIntrinsicBlendAdd_K(out, in, len);
430 x1 += len << 3;
431 out += len << 3;
432 in += len << 3;
433 }
434 }
435 #endif
436 for (;x1 < x2; x1++, out++, in++) {
437 uint32_t iR = in->x, iG = in->y, iB = in->z, iA = in->w,
438 oR = out->x, oG = out->y, oB = out->z, oA = out->w;
439 out->x = (oR + iR) > 255 ? 255 : oR + iR;
440 out->y = (oG + iG) > 255 ? 255 : oG + iG;
441 out->z = (oB + iB) > 255 ? 255 : oB + iB;
442 out->w = (oA + iA) > 255 ? 255 : oA + iA;
443 }
444 break;
445 case BLEND_SUBTRACT:
446 #if defined(ARCH_X86_HAVE_SSSE3)
447 if (gArchUseSIMD) {
448 if((x1 + 8) < x2) {
449 uint32_t len = (x2 - x1) >> 3;
450 rsdIntrinsicBlendSub_K(out, in, len);
451 x1 += len << 3;
452 out += len << 3;
453 in += len << 3;
454 }
455 }
456 #endif
457 for (;x1 < x2; x1++, out++, in++) {
458 int32_t iR = in->x, iG = in->y, iB = in->z, iA = in->w,
459 oR = out->x, oG = out->y, oB = out->z, oA = out->w;
460 out->x = (oR - iR) < 0 ? 0 : oR - iR;
461 out->y = (oG - iG) < 0 ? 0 : oG - iG;
462 out->z = (oB - iB) < 0 ? 0 : oB - iB;
463 out->w = (oA - iA) < 0 ? 0 : oA - iA;
464 }
465 break;
466 case BLEND_STAMP:
467 ALOGE("Called unimplemented blend intrinsic BLEND_STAMP");
468 rsAssert(false);
469 break;
470 case BLEND_RED:
471 ALOGE("Called unimplemented blend intrinsic BLEND_RED");
472 rsAssert(false);
473 break;
474 case BLEND_GREEN:
475 ALOGE("Called unimplemented blend intrinsic BLEND_GREEN");
476 rsAssert(false);
477 break;
478 case BLEND_BLUE:
479 ALOGE("Called unimplemented blend intrinsic BLEND_BLUE");
480 rsAssert(false);
481 break;
482 case BLEND_HUE:
483 ALOGE("Called unimplemented blend intrinsic BLEND_HUE");
484 rsAssert(false);
485 break;
486 case BLEND_SATURATION:
487 ALOGE("Called unimplemented blend intrinsic BLEND_SATURATION");
488 rsAssert(false);
489 break;
490 case BLEND_COLOR:
491 ALOGE("Called unimplemented blend intrinsic BLEND_COLOR");
492 rsAssert(false);
493 break;
494 case BLEND_LUMINOSITY:
495 ALOGE("Called unimplemented blend intrinsic BLEND_LUMINOSITY");
496 rsAssert(false);
497 break;
498
499 default:
500 ALOGE("Called unimplemented value %d", info->slot);
501 rsAssert(false);
502
503 }
504 }
505
506
RsdCpuScriptIntrinsicBlend(RsdCpuReferenceImpl * ctx,const Script * s,const Element * e)507 RsdCpuScriptIntrinsicBlend::RsdCpuScriptIntrinsicBlend(RsdCpuReferenceImpl *ctx,
508 const Script *s, const Element *e)
509 : RsdCpuScriptIntrinsic(ctx, s, e, RS_SCRIPT_INTRINSIC_ID_BLEND) {
510
511 mRootPtr = &kernel;
512 }
513
~RsdCpuScriptIntrinsicBlend()514 RsdCpuScriptIntrinsicBlend::~RsdCpuScriptIntrinsicBlend() {
515 }
516
populateScript(Script * s)517 void RsdCpuScriptIntrinsicBlend::populateScript(Script *s) {
518 s->mHal.info.exportedVariableCount = 0;
519 }
520
rsdIntrinsic_Blend(RsdCpuReferenceImpl * ctx,const Script * s,const Element * e)521 RsdCpuScriptImpl * rsdIntrinsic_Blend(RsdCpuReferenceImpl *ctx,
522 const Script *s, const Element *e) {
523 return new RsdCpuScriptIntrinsicBlend(ctx, s, e);
524 }
525
526 } // namespace renderscript
527 } // namespace android
528