1 /*
2 * Copyright 2018 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8 #include "src/skcms_public.h" // NO_G3_REWRITE
9 #include "src/skcms_internals.h" // NO_G3_REWRITE
10 #include "src/skcms_Transform.h" // NO_G3_REWRITE
11 #include <assert.h>
12 #include <float.h>
13 #include <limits.h>
14 #include <stdlib.h>
15 #include <string.h>
16
17 #if defined(__ARM_NEON)
18 #include <arm_neon.h>
19 #elif defined(__SSE__)
20 #include <immintrin.h>
21
22 #if defined(__clang__)
23 // That #include <immintrin.h> is usually enough, but Clang's headers
24 // "helpfully" skip including the whole kitchen sink when _MSC_VER is
25 // defined, because lots of programs on Windows would include that and
26 // it'd be a lot slower. But we want all those headers included so we
27 // can use their features after runtime checks later.
28 #include <smmintrin.h>
29 #include <avxintrin.h>
30 #include <avx2intrin.h>
31 #include <avx512fintrin.h>
32 #include <avx512dqintrin.h>
33 #endif
34 #endif
35
36 using namespace skcms_private;
37
38 static bool sAllowRuntimeCPUDetection = true;
39
skcms_DisableRuntimeCPUDetection()40 void skcms_DisableRuntimeCPUDetection() {
41 sAllowRuntimeCPUDetection = false;
42 }
43
log2f_(float x)44 static float log2f_(float x) {
45 // The first approximation of log2(x) is its exponent 'e', minus 127.
46 int32_t bits;
47 memcpy(&bits, &x, sizeof(bits));
48
49 float e = (float)bits * (1.0f / (1<<23));
50
51 // If we use the mantissa too we can refine the error signficantly.
52 int32_t m_bits = (bits & 0x007fffff) | 0x3f000000;
53 float m;
54 memcpy(&m, &m_bits, sizeof(m));
55
56 return (e - 124.225514990f
57 - 1.498030302f*m
58 - 1.725879990f/(0.3520887068f + m));
59 }
logf_(float x)60 static float logf_(float x) {
61 const float ln2 = 0.69314718f;
62 return ln2*log2f_(x);
63 }
64
exp2f_(float x)65 static float exp2f_(float x) {
66 if (x > 128.0f) {
67 return INFINITY_;
68 } else if (x < -127.0f) {
69 return 0.0f;
70 }
71 float fract = x - floorf_(x);
72
73 float fbits = (1.0f * (1<<23)) * (x + 121.274057500f
74 - 1.490129070f*fract
75 + 27.728023300f/(4.84252568f - fract));
76
77 // Before we cast fbits to int32_t, check for out of range values to pacify UBSAN.
78 // INT_MAX is not exactly representable as a float, so exclude it as effectively infinite.
79 // Negative values are effectively underflow - we'll end up returning a (different) negative
80 // value, which makes no sense. So clamp to zero.
81 if (fbits >= (float)INT_MAX) {
82 return INFINITY_;
83 } else if (fbits < 0) {
84 return 0;
85 }
86
87 int32_t bits = (int32_t)fbits;
88 memcpy(&x, &bits, sizeof(x));
89 return x;
90 }
91
92 // Not static, as it's used by some test tools.
powf_(float x,float y)93 float powf_(float x, float y) {
94 if (x <= 0.f) {
95 return 0.f;
96 }
97 if (x == 1.f) {
98 return 1.f;
99 }
100 return exp2f_(log2f_(x) * y);
101 }
102
expf_(float x)103 static float expf_(float x) {
104 const float log2_e = 1.4426950408889634074f;
105 return exp2f_(log2_e * x);
106 }
107
fmaxf_(float x,float y)108 static float fmaxf_(float x, float y) { return x > y ? x : y; }
fminf_(float x,float y)109 static float fminf_(float x, float y) { return x < y ? x : y; }
110
isfinitef_(float x)111 static bool isfinitef_(float x) { return 0 == x*0; }
112
minus_1_ulp(float x)113 static float minus_1_ulp(float x) {
114 int32_t bits;
115 memcpy(&bits, &x, sizeof(bits));
116 bits = bits - 1;
117 memcpy(&x, &bits, sizeof(bits));
118 return x;
119 }
120
121 // Most transfer functions we work with are sRGBish.
122 // For exotic HDR transfer functions, we encode them using a tf.g that makes no sense,
123 // and repurpose the other fields to hold the parameters of the HDR functions.
124 struct TF_PQish { float A,B,C,D,E,F; };
125 struct TF_HLGish { float R,G,a,b,c,K_minus_1; };
126 // We didn't originally support a scale factor K for HLG, and instead just stored 0 in
127 // the unused `f` field of skcms_TransferFunction for HLGish and HLGInvish transfer functions.
128 // By storing f=K-1, those old unusued f=0 values now mean K=1, a noop scale factor.
129
TFKind_marker(skcms_TFType kind)130 static float TFKind_marker(skcms_TFType kind) {
131 // We'd use different NaNs, but those aren't guaranteed to be preserved by WASM.
132 return -(float)kind;
133 }
134
classify(const skcms_TransferFunction & tf,TF_PQish * pq=nullptr,TF_HLGish * hlg=nullptr)135 static skcms_TFType classify(const skcms_TransferFunction& tf, TF_PQish* pq = nullptr
136 , TF_HLGish* hlg = nullptr) {
137 if (tf.g < 0) {
138 // Negative "g" is mapped to enum values; large negative are for sure invalid.
139 if (tf.g < -128) {
140 return skcms_TFType_Invalid;
141 }
142 int enum_g = -static_cast<int>(tf.g);
143 // Non-whole "g" values are invalid as well.
144 if (static_cast<float>(-enum_g) != tf.g) {
145 return skcms_TFType_Invalid;
146 }
147 // TODO: soundness checks for PQ/HLG like we do for sRGBish?
148 switch (enum_g) {
149 case skcms_TFType_PQish:
150 if (pq) {
151 memcpy(pq , &tf.a, sizeof(*pq ));
152 }
153 return skcms_TFType_PQish;
154 case skcms_TFType_HLGish:
155 if (hlg) {
156 memcpy(hlg, &tf.a, sizeof(*hlg));
157 }
158 return skcms_TFType_HLGish;
159 case skcms_TFType_HLGinvish:
160 if (hlg) {
161 memcpy(hlg, &tf.a, sizeof(*hlg));
162 }
163 return skcms_TFType_HLGinvish;
164 }
165 return skcms_TFType_Invalid;
166 }
167
168 // Basic soundness checks for sRGBish transfer functions.
169 if (isfinitef_(tf.a + tf.b + tf.c + tf.d + tf.e + tf.f + tf.g)
170 // a,c,d,g should be non-negative to make any sense.
171 && tf.a >= 0
172 && tf.c >= 0
173 && tf.d >= 0
174 && tf.g >= 0
175 // Raising a negative value to a fractional tf->g produces complex numbers.
176 && tf.a * tf.d + tf.b >= 0) {
177 return skcms_TFType_sRGBish;
178 }
179
180 return skcms_TFType_Invalid;
181 }
182
skcms_TransferFunction_getType(const skcms_TransferFunction * tf)183 skcms_TFType skcms_TransferFunction_getType(const skcms_TransferFunction* tf) {
184 return classify(*tf);
185 }
skcms_TransferFunction_isSRGBish(const skcms_TransferFunction * tf)186 bool skcms_TransferFunction_isSRGBish(const skcms_TransferFunction* tf) {
187 return classify(*tf) == skcms_TFType_sRGBish;
188 }
skcms_TransferFunction_isPQish(const skcms_TransferFunction * tf)189 bool skcms_TransferFunction_isPQish(const skcms_TransferFunction* tf) {
190 return classify(*tf) == skcms_TFType_PQish;
191 }
skcms_TransferFunction_isHLGish(const skcms_TransferFunction * tf)192 bool skcms_TransferFunction_isHLGish(const skcms_TransferFunction* tf) {
193 return classify(*tf) == skcms_TFType_HLGish;
194 }
195
skcms_TransferFunction_makePQish(skcms_TransferFunction * tf,float A,float B,float C,float D,float E,float F)196 bool skcms_TransferFunction_makePQish(skcms_TransferFunction* tf,
197 float A, float B, float C,
198 float D, float E, float F) {
199 *tf = { TFKind_marker(skcms_TFType_PQish), A,B,C,D,E,F };
200 assert(skcms_TransferFunction_isPQish(tf));
201 return true;
202 }
203
skcms_TransferFunction_makeScaledHLGish(skcms_TransferFunction * tf,float K,float R,float G,float a,float b,float c)204 bool skcms_TransferFunction_makeScaledHLGish(skcms_TransferFunction* tf,
205 float K, float R, float G,
206 float a, float b, float c) {
207 *tf = { TFKind_marker(skcms_TFType_HLGish), R,G, a,b,c, K-1.0f };
208 assert(skcms_TransferFunction_isHLGish(tf));
209 return true;
210 }
211
skcms_TransferFunction_eval(const skcms_TransferFunction * tf,float x)212 float skcms_TransferFunction_eval(const skcms_TransferFunction* tf, float x) {
213 float sign = x < 0 ? -1.0f : 1.0f;
214 x *= sign;
215
216 TF_PQish pq;
217 TF_HLGish hlg;
218 switch (classify(*tf, &pq, &hlg)) {
219 case skcms_TFType_Invalid: break;
220
221 case skcms_TFType_HLGish: {
222 const float K = hlg.K_minus_1 + 1.0f;
223 return K * sign * (x*hlg.R <= 1 ? powf_(x*hlg.R, hlg.G)
224 : expf_((x-hlg.c)*hlg.a) + hlg.b);
225 }
226
227 // skcms_TransferFunction_invert() inverts R, G, and a for HLGinvish so this math is fast.
228 case skcms_TFType_HLGinvish: {
229 const float K = hlg.K_minus_1 + 1.0f;
230 x /= K;
231 return sign * (x <= 1 ? hlg.R * powf_(x, hlg.G)
232 : hlg.a * logf_(x - hlg.b) + hlg.c);
233 }
234
235 case skcms_TFType_sRGBish:
236 return sign * (x < tf->d ? tf->c * x + tf->f
237 : powf_(tf->a * x + tf->b, tf->g) + tf->e);
238
239 case skcms_TFType_PQish:
240 return sign *
241 powf_((pq.A + pq.B * powf_(x, pq.C)) / (pq.D + pq.E * powf_(x, pq.C)), pq.F);
242 }
243 return 0;
244 }
245
246
eval_curve(const skcms_Curve * curve,float x)247 static float eval_curve(const skcms_Curve* curve, float x) {
248 if (curve->table_entries == 0) {
249 return skcms_TransferFunction_eval(&curve->parametric, x);
250 }
251
252 float ix = fmaxf_(0, fminf_(x, 1)) * static_cast<float>(curve->table_entries - 1);
253 int lo = (int) ix ,
254 hi = (int)(float)minus_1_ulp(ix + 1.0f);
255 float t = ix - (float)lo;
256
257 float l, h;
258 if (curve->table_8) {
259 l = curve->table_8[lo] * (1/255.0f);
260 h = curve->table_8[hi] * (1/255.0f);
261 } else {
262 uint16_t be_l, be_h;
263 memcpy(&be_l, curve->table_16 + 2*lo, 2);
264 memcpy(&be_h, curve->table_16 + 2*hi, 2);
265 uint16_t le_l = ((be_l << 8) | (be_l >> 8)) & 0xffff;
266 uint16_t le_h = ((be_h << 8) | (be_h >> 8)) & 0xffff;
267 l = le_l * (1/65535.0f);
268 h = le_h * (1/65535.0f);
269 }
270 return l + (h-l)*t;
271 }
272
skcms_MaxRoundtripError(const skcms_Curve * curve,const skcms_TransferFunction * inv_tf)273 float skcms_MaxRoundtripError(const skcms_Curve* curve, const skcms_TransferFunction* inv_tf) {
274 uint32_t N = curve->table_entries > 256 ? curve->table_entries : 256;
275 const float dx = 1.0f / static_cast<float>(N - 1);
276 float err = 0;
277 for (uint32_t i = 0; i < N; i++) {
278 float x = static_cast<float>(i) * dx,
279 y = eval_curve(curve, x);
280 err = fmaxf_(err, fabsf_(x - skcms_TransferFunction_eval(inv_tf, y)));
281 }
282 return err;
283 }
284
skcms_AreApproximateInverses(const skcms_Curve * curve,const skcms_TransferFunction * inv_tf)285 bool skcms_AreApproximateInverses(const skcms_Curve* curve, const skcms_TransferFunction* inv_tf) {
286 return skcms_MaxRoundtripError(curve, inv_tf) < (1/512.0f);
287 }
288
289 // Additional ICC signature values that are only used internally
290 enum {
291 // File signature
292 skcms_Signature_acsp = 0x61637370,
293
294 // Tag signatures
295 skcms_Signature_rTRC = 0x72545243,
296 skcms_Signature_gTRC = 0x67545243,
297 skcms_Signature_bTRC = 0x62545243,
298 skcms_Signature_kTRC = 0x6B545243,
299
300 skcms_Signature_rXYZ = 0x7258595A,
301 skcms_Signature_gXYZ = 0x6758595A,
302 skcms_Signature_bXYZ = 0x6258595A,
303
304 skcms_Signature_A2B0 = 0x41324230,
305 skcms_Signature_B2A0 = 0x42324130,
306
307 skcms_Signature_CHAD = 0x63686164,
308 skcms_Signature_WTPT = 0x77747074,
309
310 skcms_Signature_CICP = 0x63696370,
311
312 // Type signatures
313 skcms_Signature_curv = 0x63757276,
314 skcms_Signature_mft1 = 0x6D667431,
315 skcms_Signature_mft2 = 0x6D667432,
316 skcms_Signature_mAB = 0x6D414220,
317 skcms_Signature_mBA = 0x6D424120,
318 skcms_Signature_para = 0x70617261,
319 skcms_Signature_sf32 = 0x73663332,
320 // XYZ is also a PCS signature, so it's defined in skcms.h
321 // skcms_Signature_XYZ = 0x58595A20,
322 };
323
read_big_u16(const uint8_t * ptr)324 static uint16_t read_big_u16(const uint8_t* ptr) {
325 uint16_t be;
326 memcpy(&be, ptr, sizeof(be));
327 #if defined(_MSC_VER)
328 return _byteswap_ushort(be);
329 #else
330 return __builtin_bswap16(be);
331 #endif
332 }
333
read_big_u32(const uint8_t * ptr)334 static uint32_t read_big_u32(const uint8_t* ptr) {
335 uint32_t be;
336 memcpy(&be, ptr, sizeof(be));
337 #if defined(_MSC_VER)
338 return _byteswap_ulong(be);
339 #else
340 return __builtin_bswap32(be);
341 #endif
342 }
343
read_big_i32(const uint8_t * ptr)344 static int32_t read_big_i32(const uint8_t* ptr) {
345 return (int32_t)read_big_u32(ptr);
346 }
347
read_big_fixed(const uint8_t * ptr)348 static float read_big_fixed(const uint8_t* ptr) {
349 return static_cast<float>(read_big_i32(ptr)) * (1.0f / 65536.0f);
350 }
351
352 // Maps to an in-memory profile so that fields line up to the locations specified
353 // in ICC.1:2010, section 7.2
354 typedef struct {
355 uint8_t size [ 4];
356 uint8_t cmm_type [ 4];
357 uint8_t version [ 4];
358 uint8_t profile_class [ 4];
359 uint8_t data_color_space [ 4];
360 uint8_t pcs [ 4];
361 uint8_t creation_date_time [12];
362 uint8_t signature [ 4];
363 uint8_t platform [ 4];
364 uint8_t flags [ 4];
365 uint8_t device_manufacturer [ 4];
366 uint8_t device_model [ 4];
367 uint8_t device_attributes [ 8];
368 uint8_t rendering_intent [ 4];
369 uint8_t illuminant_X [ 4];
370 uint8_t illuminant_Y [ 4];
371 uint8_t illuminant_Z [ 4];
372 uint8_t creator [ 4];
373 uint8_t profile_id [16];
374 uint8_t reserved [28];
375 uint8_t tag_count [ 4]; // Technically not part of header, but required
376 } header_Layout;
377
378 typedef struct {
379 uint8_t signature [4];
380 uint8_t offset [4];
381 uint8_t size [4];
382 } tag_Layout;
383
get_tag_table(const skcms_ICCProfile * profile)384 static const tag_Layout* get_tag_table(const skcms_ICCProfile* profile) {
385 return (const tag_Layout*)(profile->buffer + SAFE_SIZEOF(header_Layout));
386 }
387
388 // s15Fixed16ArrayType is technically variable sized, holding N values. However, the only valid
389 // use of the type is for the CHAD tag that stores exactly nine values.
390 typedef struct {
391 uint8_t type [ 4];
392 uint8_t reserved [ 4];
393 uint8_t values [36];
394 } sf32_Layout;
395
skcms_GetCHAD(const skcms_ICCProfile * profile,skcms_Matrix3x3 * m)396 bool skcms_GetCHAD(const skcms_ICCProfile* profile, skcms_Matrix3x3* m) {
397 skcms_ICCTag tag;
398 if (!skcms_GetTagBySignature(profile, skcms_Signature_CHAD, &tag)) {
399 return false;
400 }
401
402 if (tag.type != skcms_Signature_sf32 || tag.size < SAFE_SIZEOF(sf32_Layout)) {
403 return false;
404 }
405
406 const sf32_Layout* sf32Tag = (const sf32_Layout*)tag.buf;
407 const uint8_t* values = sf32Tag->values;
408 for (int r = 0; r < 3; ++r)
409 for (int c = 0; c < 3; ++c, values += 4) {
410 m->vals[r][c] = read_big_fixed(values);
411 }
412 return true;
413 }
414
415 // XYZType is technically variable sized, holding N XYZ triples. However, the only valid uses of
416 // the type are for tags/data that store exactly one triple.
417 typedef struct {
418 uint8_t type [4];
419 uint8_t reserved [4];
420 uint8_t X [4];
421 uint8_t Y [4];
422 uint8_t Z [4];
423 } XYZ_Layout;
424
read_tag_xyz(const skcms_ICCTag * tag,float * x,float * y,float * z)425 static bool read_tag_xyz(const skcms_ICCTag* tag, float* x, float* y, float* z) {
426 if (tag->type != skcms_Signature_XYZ || tag->size < SAFE_SIZEOF(XYZ_Layout)) {
427 return false;
428 }
429
430 const XYZ_Layout* xyzTag = (const XYZ_Layout*)tag->buf;
431
432 *x = read_big_fixed(xyzTag->X);
433 *y = read_big_fixed(xyzTag->Y);
434 *z = read_big_fixed(xyzTag->Z);
435 return true;
436 }
437
skcms_GetWTPT(const skcms_ICCProfile * profile,float xyz[3])438 bool skcms_GetWTPT(const skcms_ICCProfile* profile, float xyz[3]) {
439 skcms_ICCTag tag;
440 return skcms_GetTagBySignature(profile, skcms_Signature_WTPT, &tag) &&
441 read_tag_xyz(&tag, &xyz[0], &xyz[1], &xyz[2]);
442 }
443
data_color_space_channel_count(uint32_t data_color_space)444 static int data_color_space_channel_count(uint32_t data_color_space) {
445 switch (data_color_space) {
446 case skcms_Signature_CMYK: return 4;
447 case skcms_Signature_Gray: return 1;
448 case skcms_Signature_RGB: return 3;
449 case skcms_Signature_Lab: return 3;
450 case skcms_Signature_XYZ: return 3;
451 case skcms_Signature_CIELUV: return 3;
452 case skcms_Signature_YCbCr: return 3;
453 case skcms_Signature_CIEYxy: return 3;
454 case skcms_Signature_HSV: return 3;
455 case skcms_Signature_HLS: return 3;
456 case skcms_Signature_CMY: return 3;
457 case skcms_Signature_2CLR: return 2;
458 case skcms_Signature_3CLR: return 3;
459 case skcms_Signature_4CLR: return 4;
460 case skcms_Signature_5CLR: return 5;
461 case skcms_Signature_6CLR: return 6;
462 case skcms_Signature_7CLR: return 7;
463 case skcms_Signature_8CLR: return 8;
464 case skcms_Signature_9CLR: return 9;
465 case skcms_Signature_10CLR: return 10;
466 case skcms_Signature_11CLR: return 11;
467 case skcms_Signature_12CLR: return 12;
468 case skcms_Signature_13CLR: return 13;
469 case skcms_Signature_14CLR: return 14;
470 case skcms_Signature_15CLR: return 15;
471 default: return -1;
472 }
473 }
474
skcms_GetInputChannelCount(const skcms_ICCProfile * profile)475 int skcms_GetInputChannelCount(const skcms_ICCProfile* profile) {
476 int a2b_count = 0;
477 if (profile->has_A2B) {
478 a2b_count = profile->A2B.input_channels != 0
479 ? static_cast<int>(profile->A2B.input_channels)
480 : 3;
481 }
482
483 skcms_ICCTag tag;
484 int trc_count = 0;
485 if (skcms_GetTagBySignature(profile, skcms_Signature_kTRC, &tag)) {
486 trc_count = 1;
487 } else if (profile->has_trc) {
488 trc_count = 3;
489 }
490
491 int dcs_count = data_color_space_channel_count(profile->data_color_space);
492
493 if (dcs_count < 0) {
494 return -1;
495 }
496
497 if (a2b_count > 0 && a2b_count != dcs_count) {
498 return -1;
499 }
500 if (trc_count > 0 && trc_count != dcs_count) {
501 return -1;
502 }
503
504 return dcs_count;
505 }
506
read_to_XYZD50(const skcms_ICCTag * rXYZ,const skcms_ICCTag * gXYZ,const skcms_ICCTag * bXYZ,skcms_Matrix3x3 * toXYZ)507 static bool read_to_XYZD50(const skcms_ICCTag* rXYZ, const skcms_ICCTag* gXYZ,
508 const skcms_ICCTag* bXYZ, skcms_Matrix3x3* toXYZ) {
509 return read_tag_xyz(rXYZ, &toXYZ->vals[0][0], &toXYZ->vals[1][0], &toXYZ->vals[2][0]) &&
510 read_tag_xyz(gXYZ, &toXYZ->vals[0][1], &toXYZ->vals[1][1], &toXYZ->vals[2][1]) &&
511 read_tag_xyz(bXYZ, &toXYZ->vals[0][2], &toXYZ->vals[1][2], &toXYZ->vals[2][2]);
512 }
513
514 typedef struct {
515 uint8_t type [4];
516 uint8_t reserved_a [4];
517 uint8_t function_type [2];
518 uint8_t reserved_b [2];
519 uint8_t variable [1/*variable*/]; // 1, 3, 4, 5, or 7 s15.16, depending on function_type
520 } para_Layout;
521
read_curve_para(const uint8_t * buf,uint32_t size,skcms_Curve * curve,uint32_t * curve_size)522 static bool read_curve_para(const uint8_t* buf, uint32_t size,
523 skcms_Curve* curve, uint32_t* curve_size) {
524 if (size < SAFE_FIXED_SIZE(para_Layout)) {
525 return false;
526 }
527
528 const para_Layout* paraTag = (const para_Layout*)buf;
529
530 enum { kG = 0, kGAB = 1, kGABC = 2, kGABCD = 3, kGABCDEF = 4 };
531 uint16_t function_type = read_big_u16(paraTag->function_type);
532 if (function_type > kGABCDEF) {
533 return false;
534 }
535
536 static const uint32_t curve_bytes[] = { 4, 12, 16, 20, 28 };
537 if (size < SAFE_FIXED_SIZE(para_Layout) + curve_bytes[function_type]) {
538 return false;
539 }
540
541 if (curve_size) {
542 *curve_size = SAFE_FIXED_SIZE(para_Layout) + curve_bytes[function_type];
543 }
544
545 curve->table_entries = 0;
546 curve->parametric.a = 1.0f;
547 curve->parametric.b = 0.0f;
548 curve->parametric.c = 0.0f;
549 curve->parametric.d = 0.0f;
550 curve->parametric.e = 0.0f;
551 curve->parametric.f = 0.0f;
552 curve->parametric.g = read_big_fixed(paraTag->variable);
553
554 switch (function_type) {
555 case kGAB:
556 curve->parametric.a = read_big_fixed(paraTag->variable + 4);
557 curve->parametric.b = read_big_fixed(paraTag->variable + 8);
558 if (curve->parametric.a == 0) {
559 return false;
560 }
561 curve->parametric.d = -curve->parametric.b / curve->parametric.a;
562 break;
563 case kGABC:
564 curve->parametric.a = read_big_fixed(paraTag->variable + 4);
565 curve->parametric.b = read_big_fixed(paraTag->variable + 8);
566 curve->parametric.e = read_big_fixed(paraTag->variable + 12);
567 if (curve->parametric.a == 0) {
568 return false;
569 }
570 curve->parametric.d = -curve->parametric.b / curve->parametric.a;
571 curve->parametric.f = curve->parametric.e;
572 break;
573 case kGABCD:
574 curve->parametric.a = read_big_fixed(paraTag->variable + 4);
575 curve->parametric.b = read_big_fixed(paraTag->variable + 8);
576 curve->parametric.c = read_big_fixed(paraTag->variable + 12);
577 curve->parametric.d = read_big_fixed(paraTag->variable + 16);
578 break;
579 case kGABCDEF:
580 curve->parametric.a = read_big_fixed(paraTag->variable + 4);
581 curve->parametric.b = read_big_fixed(paraTag->variable + 8);
582 curve->parametric.c = read_big_fixed(paraTag->variable + 12);
583 curve->parametric.d = read_big_fixed(paraTag->variable + 16);
584 curve->parametric.e = read_big_fixed(paraTag->variable + 20);
585 curve->parametric.f = read_big_fixed(paraTag->variable + 24);
586 break;
587 }
588 return skcms_TransferFunction_isSRGBish(&curve->parametric);
589 }
590
591 typedef struct {
592 uint8_t type [4];
593 uint8_t reserved [4];
594 uint8_t value_count [4];
595 uint8_t variable [1/*variable*/]; // value_count, 8.8 if 1, uint16 (n*65535) if > 1
596 } curv_Layout;
597
read_curve_curv(const uint8_t * buf,uint32_t size,skcms_Curve * curve,uint32_t * curve_size)598 static bool read_curve_curv(const uint8_t* buf, uint32_t size,
599 skcms_Curve* curve, uint32_t* curve_size) {
600 if (size < SAFE_FIXED_SIZE(curv_Layout)) {
601 return false;
602 }
603
604 const curv_Layout* curvTag = (const curv_Layout*)buf;
605
606 uint32_t value_count = read_big_u32(curvTag->value_count);
607 if (size < SAFE_FIXED_SIZE(curv_Layout) + value_count * SAFE_SIZEOF(uint16_t)) {
608 return false;
609 }
610
611 if (curve_size) {
612 *curve_size = SAFE_FIXED_SIZE(curv_Layout) + value_count * SAFE_SIZEOF(uint16_t);
613 }
614
615 if (value_count < 2) {
616 curve->table_entries = 0;
617 curve->parametric.a = 1.0f;
618 curve->parametric.b = 0.0f;
619 curve->parametric.c = 0.0f;
620 curve->parametric.d = 0.0f;
621 curve->parametric.e = 0.0f;
622 curve->parametric.f = 0.0f;
623 if (value_count == 0) {
624 // Empty tables are a shorthand for an identity curve
625 curve->parametric.g = 1.0f;
626 } else {
627 // Single entry tables are a shorthand for simple gamma
628 curve->parametric.g = read_big_u16(curvTag->variable) * (1.0f / 256.0f);
629 }
630 } else {
631 curve->table_8 = nullptr;
632 curve->table_16 = curvTag->variable;
633 curve->table_entries = value_count;
634 }
635
636 return true;
637 }
638
639 // Parses both curveType and parametricCurveType data. Ensures that at most 'size' bytes are read.
640 // If curve_size is not nullptr, writes the number of bytes used by the curve in (*curve_size).
read_curve(const uint8_t * buf,uint32_t size,skcms_Curve * curve,uint32_t * curve_size)641 static bool read_curve(const uint8_t* buf, uint32_t size,
642 skcms_Curve* curve, uint32_t* curve_size) {
643 if (!buf || size < 4 || !curve) {
644 return false;
645 }
646
647 uint32_t type = read_big_u32(buf);
648 if (type == skcms_Signature_para) {
649 return read_curve_para(buf, size, curve, curve_size);
650 } else if (type == skcms_Signature_curv) {
651 return read_curve_curv(buf, size, curve, curve_size);
652 }
653
654 return false;
655 }
656
657 // mft1 and mft2 share a large chunk of data
658 typedef struct {
659 uint8_t type [ 4];
660 uint8_t reserved_a [ 4];
661 uint8_t input_channels [ 1];
662 uint8_t output_channels [ 1];
663 uint8_t grid_points [ 1];
664 uint8_t reserved_b [ 1];
665 uint8_t matrix [36];
666 } mft_CommonLayout;
667
668 typedef struct {
669 mft_CommonLayout common [1];
670
671 uint8_t variable [1/*variable*/];
672 } mft1_Layout;
673
674 typedef struct {
675 mft_CommonLayout common [1];
676
677 uint8_t input_table_entries [2];
678 uint8_t output_table_entries [2];
679 uint8_t variable [1/*variable*/];
680 } mft2_Layout;
681
read_mft_common(const mft_CommonLayout * mftTag,skcms_A2B * a2b)682 static bool read_mft_common(const mft_CommonLayout* mftTag, skcms_A2B* a2b) {
683 // MFT matrices are applied before the first set of curves, but must be identity unless the
684 // input is PCSXYZ. We don't support PCSXYZ profiles, so we ignore this matrix. Note that the
685 // matrix in skcms_A2B is applied later in the pipe, so supporting this would require another
686 // field/flag.
687 a2b->matrix_channels = 0;
688 a2b-> input_channels = mftTag-> input_channels[0];
689 a2b->output_channels = mftTag->output_channels[0];
690
691 // We require exactly three (ie XYZ/Lab/RGB) output channels
692 if (a2b->output_channels != ARRAY_COUNT(a2b->output_curves)) {
693 return false;
694 }
695 // We require at least one, and no more than four (ie CMYK) input channels
696 if (a2b->input_channels < 1 || a2b->input_channels > ARRAY_COUNT(a2b->input_curves)) {
697 return false;
698 }
699
700 for (uint32_t i = 0; i < a2b->input_channels; ++i) {
701 a2b->grid_points[i] = mftTag->grid_points[0];
702 }
703 // The grid only makes sense with at least two points along each axis
704 if (a2b->grid_points[0] < 2) {
705 return false;
706 }
707 return true;
708 }
709
710 // All as the A2B version above, except where noted.
read_mft_common(const mft_CommonLayout * mftTag,skcms_B2A * b2a)711 static bool read_mft_common(const mft_CommonLayout* mftTag, skcms_B2A* b2a) {
712 // Same as A2B.
713 b2a->matrix_channels = 0;
714 b2a-> input_channels = mftTag-> input_channels[0];
715 b2a->output_channels = mftTag->output_channels[0];
716
717
718 // For B2A, exactly 3 input channels (XYZ) and 3 (RGB) or 4 (CMYK) output channels.
719 if (b2a->input_channels != ARRAY_COUNT(b2a->input_curves)) {
720 return false;
721 }
722 if (b2a->output_channels < 3 || b2a->output_channels > ARRAY_COUNT(b2a->output_curves)) {
723 return false;
724 }
725
726 // Same as A2B.
727 for (uint32_t i = 0; i < b2a->input_channels; ++i) {
728 b2a->grid_points[i] = mftTag->grid_points[0];
729 }
730 if (b2a->grid_points[0] < 2) {
731 return false;
732 }
733 return true;
734 }
735
736 template <typename A2B_or_B2A>
init_tables(const uint8_t * table_base,uint64_t max_tables_len,uint32_t byte_width,uint32_t input_table_entries,uint32_t output_table_entries,A2B_or_B2A * out)737 static bool init_tables(const uint8_t* table_base, uint64_t max_tables_len, uint32_t byte_width,
738 uint32_t input_table_entries, uint32_t output_table_entries,
739 A2B_or_B2A* out) {
740 // byte_width is 1 or 2, [input|output]_table_entries are in [2, 4096], so no overflow
741 uint32_t byte_len_per_input_table = input_table_entries * byte_width;
742 uint32_t byte_len_per_output_table = output_table_entries * byte_width;
743
744 // [input|output]_channels are <= 4, so still no overflow
745 uint32_t byte_len_all_input_tables = out->input_channels * byte_len_per_input_table;
746 uint32_t byte_len_all_output_tables = out->output_channels * byte_len_per_output_table;
747
748 uint64_t grid_size = out->output_channels * byte_width;
749 for (uint32_t axis = 0; axis < out->input_channels; ++axis) {
750 grid_size *= out->grid_points[axis];
751 }
752
753 if (max_tables_len < byte_len_all_input_tables + grid_size + byte_len_all_output_tables) {
754 return false;
755 }
756
757 for (uint32_t i = 0; i < out->input_channels; ++i) {
758 out->input_curves[i].table_entries = input_table_entries;
759 if (byte_width == 1) {
760 out->input_curves[i].table_8 = table_base + i * byte_len_per_input_table;
761 out->input_curves[i].table_16 = nullptr;
762 } else {
763 out->input_curves[i].table_8 = nullptr;
764 out->input_curves[i].table_16 = table_base + i * byte_len_per_input_table;
765 }
766 }
767
768 if (byte_width == 1) {
769 out->grid_8 = table_base + byte_len_all_input_tables;
770 out->grid_16 = nullptr;
771 } else {
772 out->grid_8 = nullptr;
773 out->grid_16 = table_base + byte_len_all_input_tables;
774 }
775
776 const uint8_t* output_table_base = table_base + byte_len_all_input_tables + grid_size;
777 for (uint32_t i = 0; i < out->output_channels; ++i) {
778 out->output_curves[i].table_entries = output_table_entries;
779 if (byte_width == 1) {
780 out->output_curves[i].table_8 = output_table_base + i * byte_len_per_output_table;
781 out->output_curves[i].table_16 = nullptr;
782 } else {
783 out->output_curves[i].table_8 = nullptr;
784 out->output_curves[i].table_16 = output_table_base + i * byte_len_per_output_table;
785 }
786 }
787
788 return true;
789 }
790
791 template <typename A2B_or_B2A>
read_tag_mft1(const skcms_ICCTag * tag,A2B_or_B2A * out)792 static bool read_tag_mft1(const skcms_ICCTag* tag, A2B_or_B2A* out) {
793 if (tag->size < SAFE_FIXED_SIZE(mft1_Layout)) {
794 return false;
795 }
796
797 const mft1_Layout* mftTag = (const mft1_Layout*)tag->buf;
798 if (!read_mft_common(mftTag->common, out)) {
799 return false;
800 }
801
802 uint32_t input_table_entries = 256;
803 uint32_t output_table_entries = 256;
804
805 return init_tables(mftTag->variable, tag->size - SAFE_FIXED_SIZE(mft1_Layout), 1,
806 input_table_entries, output_table_entries, out);
807 }
808
809 template <typename A2B_or_B2A>
read_tag_mft2(const skcms_ICCTag * tag,A2B_or_B2A * out)810 static bool read_tag_mft2(const skcms_ICCTag* tag, A2B_or_B2A* out) {
811 if (tag->size < SAFE_FIXED_SIZE(mft2_Layout)) {
812 return false;
813 }
814
815 const mft2_Layout* mftTag = (const mft2_Layout*)tag->buf;
816 if (!read_mft_common(mftTag->common, out)) {
817 return false;
818 }
819
820 uint32_t input_table_entries = read_big_u16(mftTag->input_table_entries);
821 uint32_t output_table_entries = read_big_u16(mftTag->output_table_entries);
822
823 // ICC spec mandates that 2 <= table_entries <= 4096
824 if (input_table_entries < 2 || input_table_entries > 4096 ||
825 output_table_entries < 2 || output_table_entries > 4096) {
826 return false;
827 }
828
829 return init_tables(mftTag->variable, tag->size - SAFE_FIXED_SIZE(mft2_Layout), 2,
830 input_table_entries, output_table_entries, out);
831 }
832
read_curves(const uint8_t * buf,uint32_t size,uint32_t curve_offset,uint32_t num_curves,skcms_Curve * curves)833 static bool read_curves(const uint8_t* buf, uint32_t size, uint32_t curve_offset,
834 uint32_t num_curves, skcms_Curve* curves) {
835 for (uint32_t i = 0; i < num_curves; ++i) {
836 if (curve_offset > size) {
837 return false;
838 }
839
840 uint32_t curve_bytes;
841 if (!read_curve(buf + curve_offset, size - curve_offset, &curves[i], &curve_bytes)) {
842 return false;
843 }
844
845 if (curve_bytes > UINT32_MAX - 3) {
846 return false;
847 }
848 curve_bytes = (curve_bytes + 3) & ~3U;
849
850 uint64_t new_offset_64 = (uint64_t)curve_offset + curve_bytes;
851 curve_offset = (uint32_t)new_offset_64;
852 if (new_offset_64 != curve_offset) {
853 return false;
854 }
855 }
856
857 return true;
858 }
859
860 // mAB and mBA tags use the same encoding, including color lookup tables.
861 typedef struct {
862 uint8_t type [ 4];
863 uint8_t reserved_a [ 4];
864 uint8_t input_channels [ 1];
865 uint8_t output_channels [ 1];
866 uint8_t reserved_b [ 2];
867 uint8_t b_curve_offset [ 4];
868 uint8_t matrix_offset [ 4];
869 uint8_t m_curve_offset [ 4];
870 uint8_t clut_offset [ 4];
871 uint8_t a_curve_offset [ 4];
872 } mAB_or_mBA_Layout;
873
874 typedef struct {
875 uint8_t grid_points [16];
876 uint8_t grid_byte_width [ 1];
877 uint8_t reserved [ 3];
878 uint8_t variable [1/*variable*/];
879 } CLUT_Layout;
880
read_tag_mab(const skcms_ICCTag * tag,skcms_A2B * a2b,bool pcs_is_xyz)881 static bool read_tag_mab(const skcms_ICCTag* tag, skcms_A2B* a2b, bool pcs_is_xyz) {
882 if (tag->size < SAFE_SIZEOF(mAB_or_mBA_Layout)) {
883 return false;
884 }
885
886 const mAB_or_mBA_Layout* mABTag = (const mAB_or_mBA_Layout*)tag->buf;
887
888 a2b->input_channels = mABTag->input_channels[0];
889 a2b->output_channels = mABTag->output_channels[0];
890
891 // We require exactly three (ie XYZ/Lab/RGB) output channels
892 if (a2b->output_channels != ARRAY_COUNT(a2b->output_curves)) {
893 return false;
894 }
895 // We require no more than four (ie CMYK) input channels
896 if (a2b->input_channels > ARRAY_COUNT(a2b->input_curves)) {
897 return false;
898 }
899
900 uint32_t b_curve_offset = read_big_u32(mABTag->b_curve_offset);
901 uint32_t matrix_offset = read_big_u32(mABTag->matrix_offset);
902 uint32_t m_curve_offset = read_big_u32(mABTag->m_curve_offset);
903 uint32_t clut_offset = read_big_u32(mABTag->clut_offset);
904 uint32_t a_curve_offset = read_big_u32(mABTag->a_curve_offset);
905
906 // "B" curves must be present
907 if (0 == b_curve_offset) {
908 return false;
909 }
910
911 if (!read_curves(tag->buf, tag->size, b_curve_offset, a2b->output_channels,
912 a2b->output_curves)) {
913 return false;
914 }
915
916 // "M" curves and Matrix must be used together
917 if (0 != m_curve_offset) {
918 if (0 == matrix_offset) {
919 return false;
920 }
921 a2b->matrix_channels = a2b->output_channels;
922 if (!read_curves(tag->buf, tag->size, m_curve_offset, a2b->matrix_channels,
923 a2b->matrix_curves)) {
924 return false;
925 }
926
927 // Read matrix, which is stored as a row-major 3x3, followed by the fourth column
928 if (tag->size < matrix_offset + 12 * SAFE_SIZEOF(uint32_t)) {
929 return false;
930 }
931 float encoding_factor = pcs_is_xyz ? (65535 / 32768.0f) : 1.0f;
932 const uint8_t* mtx_buf = tag->buf + matrix_offset;
933 a2b->matrix.vals[0][0] = encoding_factor * read_big_fixed(mtx_buf + 0);
934 a2b->matrix.vals[0][1] = encoding_factor * read_big_fixed(mtx_buf + 4);
935 a2b->matrix.vals[0][2] = encoding_factor * read_big_fixed(mtx_buf + 8);
936 a2b->matrix.vals[1][0] = encoding_factor * read_big_fixed(mtx_buf + 12);
937 a2b->matrix.vals[1][1] = encoding_factor * read_big_fixed(mtx_buf + 16);
938 a2b->matrix.vals[1][2] = encoding_factor * read_big_fixed(mtx_buf + 20);
939 a2b->matrix.vals[2][0] = encoding_factor * read_big_fixed(mtx_buf + 24);
940 a2b->matrix.vals[2][1] = encoding_factor * read_big_fixed(mtx_buf + 28);
941 a2b->matrix.vals[2][2] = encoding_factor * read_big_fixed(mtx_buf + 32);
942 a2b->matrix.vals[0][3] = encoding_factor * read_big_fixed(mtx_buf + 36);
943 a2b->matrix.vals[1][3] = encoding_factor * read_big_fixed(mtx_buf + 40);
944 a2b->matrix.vals[2][3] = encoding_factor * read_big_fixed(mtx_buf + 44);
945 } else {
946 if (0 != matrix_offset) {
947 return false;
948 }
949 a2b->matrix_channels = 0;
950 }
951
952 // "A" curves and CLUT must be used together
953 if (0 != a_curve_offset) {
954 if (0 == clut_offset) {
955 return false;
956 }
957 if (!read_curves(tag->buf, tag->size, a_curve_offset, a2b->input_channels,
958 a2b->input_curves)) {
959 return false;
960 }
961
962 if (tag->size < clut_offset + SAFE_FIXED_SIZE(CLUT_Layout)) {
963 return false;
964 }
965 const CLUT_Layout* clut = (const CLUT_Layout*)(tag->buf + clut_offset);
966
967 if (clut->grid_byte_width[0] == 1) {
968 a2b->grid_8 = clut->variable;
969 a2b->grid_16 = nullptr;
970 } else if (clut->grid_byte_width[0] == 2) {
971 a2b->grid_8 = nullptr;
972 a2b->grid_16 = clut->variable;
973 } else {
974 return false;
975 }
976
977 uint64_t grid_size = a2b->output_channels * clut->grid_byte_width[0]; // the payload
978 for (uint32_t i = 0; i < a2b->input_channels; ++i) {
979 a2b->grid_points[i] = clut->grid_points[i];
980 // The grid only makes sense with at least two points along each axis
981 if (a2b->grid_points[i] < 2) {
982 return false;
983 }
984 grid_size *= a2b->grid_points[i];
985 }
986 if (tag->size < clut_offset + SAFE_FIXED_SIZE(CLUT_Layout) + grid_size) {
987 return false;
988 }
989 } else {
990 if (0 != clut_offset) {
991 return false;
992 }
993
994 // If there is no CLUT, the number of input and output channels must match
995 if (a2b->input_channels != a2b->output_channels) {
996 return false;
997 }
998
999 // Zero out the number of input channels to signal that we're skipping this stage
1000 a2b->input_channels = 0;
1001 }
1002
1003 return true;
1004 }
1005
1006 // Exactly the same as read_tag_mab(), except where there are comments.
1007 // TODO: refactor the two to eliminate common code?
read_tag_mba(const skcms_ICCTag * tag,skcms_B2A * b2a,bool pcs_is_xyz)1008 static bool read_tag_mba(const skcms_ICCTag* tag, skcms_B2A* b2a, bool pcs_is_xyz) {
1009 if (tag->size < SAFE_SIZEOF(mAB_or_mBA_Layout)) {
1010 return false;
1011 }
1012
1013 const mAB_or_mBA_Layout* mBATag = (const mAB_or_mBA_Layout*)tag->buf;
1014
1015 b2a->input_channels = mBATag->input_channels[0];
1016 b2a->output_channels = mBATag->output_channels[0];
1017
1018 // Require exactly 3 inputs (XYZ) and 3 (RGB) or 4 (CMYK) outputs.
1019 if (b2a->input_channels != ARRAY_COUNT(b2a->input_curves)) {
1020 return false;
1021 }
1022 if (b2a->output_channels < 3 || b2a->output_channels > ARRAY_COUNT(b2a->output_curves)) {
1023 return false;
1024 }
1025
1026 uint32_t b_curve_offset = read_big_u32(mBATag->b_curve_offset);
1027 uint32_t matrix_offset = read_big_u32(mBATag->matrix_offset);
1028 uint32_t m_curve_offset = read_big_u32(mBATag->m_curve_offset);
1029 uint32_t clut_offset = read_big_u32(mBATag->clut_offset);
1030 uint32_t a_curve_offset = read_big_u32(mBATag->a_curve_offset);
1031
1032 if (0 == b_curve_offset) {
1033 return false;
1034 }
1035
1036 // "B" curves are our inputs, not outputs.
1037 if (!read_curves(tag->buf, tag->size, b_curve_offset, b2a->input_channels,
1038 b2a->input_curves)) {
1039 return false;
1040 }
1041
1042 if (0 != m_curve_offset) {
1043 if (0 == matrix_offset) {
1044 return false;
1045 }
1046 // Matrix channels is tied to input_channels (3), not output_channels.
1047 b2a->matrix_channels = b2a->input_channels;
1048
1049 if (!read_curves(tag->buf, tag->size, m_curve_offset, b2a->matrix_channels,
1050 b2a->matrix_curves)) {
1051 return false;
1052 }
1053
1054 if (tag->size < matrix_offset + 12 * SAFE_SIZEOF(uint32_t)) {
1055 return false;
1056 }
1057 float encoding_factor = pcs_is_xyz ? (32768 / 65535.0f) : 1.0f; // TODO: understand
1058 const uint8_t* mtx_buf = tag->buf + matrix_offset;
1059 b2a->matrix.vals[0][0] = encoding_factor * read_big_fixed(mtx_buf + 0);
1060 b2a->matrix.vals[0][1] = encoding_factor * read_big_fixed(mtx_buf + 4);
1061 b2a->matrix.vals[0][2] = encoding_factor * read_big_fixed(mtx_buf + 8);
1062 b2a->matrix.vals[1][0] = encoding_factor * read_big_fixed(mtx_buf + 12);
1063 b2a->matrix.vals[1][1] = encoding_factor * read_big_fixed(mtx_buf + 16);
1064 b2a->matrix.vals[1][2] = encoding_factor * read_big_fixed(mtx_buf + 20);
1065 b2a->matrix.vals[2][0] = encoding_factor * read_big_fixed(mtx_buf + 24);
1066 b2a->matrix.vals[2][1] = encoding_factor * read_big_fixed(mtx_buf + 28);
1067 b2a->matrix.vals[2][2] = encoding_factor * read_big_fixed(mtx_buf + 32);
1068 b2a->matrix.vals[0][3] = encoding_factor * read_big_fixed(mtx_buf + 36);
1069 b2a->matrix.vals[1][3] = encoding_factor * read_big_fixed(mtx_buf + 40);
1070 b2a->matrix.vals[2][3] = encoding_factor * read_big_fixed(mtx_buf + 44);
1071 } else {
1072 if (0 != matrix_offset) {
1073 return false;
1074 }
1075 b2a->matrix_channels = 0;
1076 }
1077
1078 if (0 != a_curve_offset) {
1079 if (0 == clut_offset) {
1080 return false;
1081 }
1082
1083 // "A" curves are our output, not input.
1084 if (!read_curves(tag->buf, tag->size, a_curve_offset, b2a->output_channels,
1085 b2a->output_curves)) {
1086 return false;
1087 }
1088
1089 if (tag->size < clut_offset + SAFE_FIXED_SIZE(CLUT_Layout)) {
1090 return false;
1091 }
1092 const CLUT_Layout* clut = (const CLUT_Layout*)(tag->buf + clut_offset);
1093
1094 if (clut->grid_byte_width[0] == 1) {
1095 b2a->grid_8 = clut->variable;
1096 b2a->grid_16 = nullptr;
1097 } else if (clut->grid_byte_width[0] == 2) {
1098 b2a->grid_8 = nullptr;
1099 b2a->grid_16 = clut->variable;
1100 } else {
1101 return false;
1102 }
1103
1104 uint64_t grid_size = b2a->output_channels * clut->grid_byte_width[0];
1105 for (uint32_t i = 0; i < b2a->input_channels; ++i) {
1106 b2a->grid_points[i] = clut->grid_points[i];
1107 if (b2a->grid_points[i] < 2) {
1108 return false;
1109 }
1110 grid_size *= b2a->grid_points[i];
1111 }
1112 if (tag->size < clut_offset + SAFE_FIXED_SIZE(CLUT_Layout) + grid_size) {
1113 return false;
1114 }
1115 } else {
1116 if (0 != clut_offset) {
1117 return false;
1118 }
1119
1120 if (b2a->input_channels != b2a->output_channels) {
1121 return false;
1122 }
1123
1124 // Zero out *output* channels to skip this stage.
1125 b2a->output_channels = 0;
1126 }
1127 return true;
1128 }
1129
1130 // If you pass f, we'll fit a possibly-non-zero value for *f.
1131 // If you pass nullptr, we'll assume you want *f to be treated as zero.
fit_linear(const skcms_Curve * curve,int N,float tol,float * c,float * d,float * f=nullptr)1132 static int fit_linear(const skcms_Curve* curve, int N, float tol,
1133 float* c, float* d, float* f = nullptr) {
1134 assert(N > 1);
1135 // We iteratively fit the first points to the TF's linear piece.
1136 // We want the cx + f line to pass through the first and last points we fit exactly.
1137 //
1138 // As we walk along the points we find the minimum and maximum slope of the line before the
1139 // error would exceed our tolerance. We stop when the range [slope_min, slope_max] becomes
1140 // emtpy, when we definitely can't add any more points.
1141 //
1142 // Some points' error intervals may intersect the running interval but not lie fully
1143 // within it. So we keep track of the last point we saw that is a valid end point candidate,
1144 // and once the search is done, back up to build the line through *that* point.
1145 const float dx = 1.0f / static_cast<float>(N - 1);
1146
1147 int lin_points = 1;
1148
1149 float f_zero = 0.0f;
1150 if (f) {
1151 *f = eval_curve(curve, 0);
1152 } else {
1153 f = &f_zero;
1154 }
1155
1156
1157 float slope_min = -INFINITY_;
1158 float slope_max = +INFINITY_;
1159 for (int i = 1; i < N; ++i) {
1160 float x = static_cast<float>(i) * dx;
1161 float y = eval_curve(curve, x);
1162
1163 float slope_max_i = (y + tol - *f) / x,
1164 slope_min_i = (y - tol - *f) / x;
1165 if (slope_max_i < slope_min || slope_max < slope_min_i) {
1166 // Slope intervals would no longer overlap.
1167 break;
1168 }
1169 slope_max = fminf_(slope_max, slope_max_i);
1170 slope_min = fmaxf_(slope_min, slope_min_i);
1171
1172 float cur_slope = (y - *f) / x;
1173 if (slope_min <= cur_slope && cur_slope <= slope_max) {
1174 lin_points = i + 1;
1175 *c = cur_slope;
1176 }
1177 }
1178
1179 // Set D to the last point that met our tolerance.
1180 *d = static_cast<float>(lin_points - 1) * dx;
1181 return lin_points;
1182 }
1183
1184 // If this skcms_Curve holds an identity table, rewrite it as an identity skcms_TransferFunction.
canonicalize_identity(skcms_Curve * curve)1185 static void canonicalize_identity(skcms_Curve* curve) {
1186 if (curve->table_entries && curve->table_entries <= (uint32_t)INT_MAX) {
1187 int N = (int)curve->table_entries;
1188
1189 float c = 0.0f, d = 0.0f, f = 0.0f;
1190 if (N == fit_linear(curve, N, 1.0f/static_cast<float>(2*N), &c,&d,&f)
1191 && c == 1.0f
1192 && f == 0.0f) {
1193 curve->table_entries = 0;
1194 curve->table_8 = nullptr;
1195 curve->table_16 = nullptr;
1196 curve->parametric = skcms_TransferFunction{1,1,0,0,0,0,0};
1197 }
1198 }
1199 }
1200
read_a2b(const skcms_ICCTag * tag,skcms_A2B * a2b,bool pcs_is_xyz)1201 static bool read_a2b(const skcms_ICCTag* tag, skcms_A2B* a2b, bool pcs_is_xyz) {
1202 bool ok = false;
1203 if (tag->type == skcms_Signature_mft1) { ok = read_tag_mft1(tag, a2b); }
1204 if (tag->type == skcms_Signature_mft2) { ok = read_tag_mft2(tag, a2b); }
1205 if (tag->type == skcms_Signature_mAB ) { ok = read_tag_mab(tag, a2b, pcs_is_xyz); }
1206 if (!ok) {
1207 return false;
1208 }
1209
1210 if (a2b->input_channels > 0) { canonicalize_identity(a2b->input_curves + 0); }
1211 if (a2b->input_channels > 1) { canonicalize_identity(a2b->input_curves + 1); }
1212 if (a2b->input_channels > 2) { canonicalize_identity(a2b->input_curves + 2); }
1213 if (a2b->input_channels > 3) { canonicalize_identity(a2b->input_curves + 3); }
1214
1215 if (a2b->matrix_channels > 0) { canonicalize_identity(a2b->matrix_curves + 0); }
1216 if (a2b->matrix_channels > 1) { canonicalize_identity(a2b->matrix_curves + 1); }
1217 if (a2b->matrix_channels > 2) { canonicalize_identity(a2b->matrix_curves + 2); }
1218
1219 if (a2b->output_channels > 0) { canonicalize_identity(a2b->output_curves + 0); }
1220 if (a2b->output_channels > 1) { canonicalize_identity(a2b->output_curves + 1); }
1221 if (a2b->output_channels > 2) { canonicalize_identity(a2b->output_curves + 2); }
1222
1223 return true;
1224 }
1225
read_b2a(const skcms_ICCTag * tag,skcms_B2A * b2a,bool pcs_is_xyz)1226 static bool read_b2a(const skcms_ICCTag* tag, skcms_B2A* b2a, bool pcs_is_xyz) {
1227 bool ok = false;
1228 if (tag->type == skcms_Signature_mft1) { ok = read_tag_mft1(tag, b2a); }
1229 if (tag->type == skcms_Signature_mft2) { ok = read_tag_mft2(tag, b2a); }
1230 if (tag->type == skcms_Signature_mBA ) { ok = read_tag_mba(tag, b2a, pcs_is_xyz); }
1231 if (!ok) {
1232 return false;
1233 }
1234
1235 if (b2a->input_channels > 0) { canonicalize_identity(b2a->input_curves + 0); }
1236 if (b2a->input_channels > 1) { canonicalize_identity(b2a->input_curves + 1); }
1237 if (b2a->input_channels > 2) { canonicalize_identity(b2a->input_curves + 2); }
1238
1239 if (b2a->matrix_channels > 0) { canonicalize_identity(b2a->matrix_curves + 0); }
1240 if (b2a->matrix_channels > 1) { canonicalize_identity(b2a->matrix_curves + 1); }
1241 if (b2a->matrix_channels > 2) { canonicalize_identity(b2a->matrix_curves + 2); }
1242
1243 if (b2a->output_channels > 0) { canonicalize_identity(b2a->output_curves + 0); }
1244 if (b2a->output_channels > 1) { canonicalize_identity(b2a->output_curves + 1); }
1245 if (b2a->output_channels > 2) { canonicalize_identity(b2a->output_curves + 2); }
1246 if (b2a->output_channels > 3) { canonicalize_identity(b2a->output_curves + 3); }
1247
1248 return true;
1249 }
1250
1251 typedef struct {
1252 uint8_t type [4];
1253 uint8_t reserved [4];
1254 uint8_t color_primaries [1];
1255 uint8_t transfer_characteristics [1];
1256 uint8_t matrix_coefficients [1];
1257 uint8_t video_full_range_flag [1];
1258 } CICP_Layout;
1259
read_cicp(const skcms_ICCTag * tag,skcms_CICP * cicp)1260 static bool read_cicp(const skcms_ICCTag* tag, skcms_CICP* cicp) {
1261 if (tag->type != skcms_Signature_CICP || tag->size < SAFE_SIZEOF(CICP_Layout)) {
1262 return false;
1263 }
1264
1265 const CICP_Layout* cicpTag = (const CICP_Layout*)tag->buf;
1266
1267 cicp->color_primaries = cicpTag->color_primaries[0];
1268 cicp->transfer_characteristics = cicpTag->transfer_characteristics[0];
1269 cicp->matrix_coefficients = cicpTag->matrix_coefficients[0];
1270 cicp->video_full_range_flag = cicpTag->video_full_range_flag[0];
1271 return true;
1272 }
1273
skcms_GetTagByIndex(const skcms_ICCProfile * profile,uint32_t idx,skcms_ICCTag * tag)1274 void skcms_GetTagByIndex(const skcms_ICCProfile* profile, uint32_t idx, skcms_ICCTag* tag) {
1275 if (!profile || !profile->buffer || !tag) { return; }
1276 if (idx > profile->tag_count) { return; }
1277 const tag_Layout* tags = get_tag_table(profile);
1278 tag->signature = read_big_u32(tags[idx].signature);
1279 tag->size = read_big_u32(tags[idx].size);
1280 tag->buf = read_big_u32(tags[idx].offset) + profile->buffer;
1281 tag->type = read_big_u32(tag->buf);
1282 }
1283
skcms_GetTagBySignature(const skcms_ICCProfile * profile,uint32_t sig,skcms_ICCTag * tag)1284 bool skcms_GetTagBySignature(const skcms_ICCProfile* profile, uint32_t sig, skcms_ICCTag* tag) {
1285 if (!profile || !profile->buffer || !tag) { return false; }
1286 const tag_Layout* tags = get_tag_table(profile);
1287 for (uint32_t i = 0; i < profile->tag_count; ++i) {
1288 if (read_big_u32(tags[i].signature) == sig) {
1289 tag->signature = sig;
1290 tag->size = read_big_u32(tags[i].size);
1291 tag->buf = read_big_u32(tags[i].offset) + profile->buffer;
1292 tag->type = read_big_u32(tag->buf);
1293 return true;
1294 }
1295 }
1296 return false;
1297 }
1298
usable_as_src(const skcms_ICCProfile * profile)1299 static bool usable_as_src(const skcms_ICCProfile* profile) {
1300 return profile->has_A2B
1301 || (profile->has_trc && profile->has_toXYZD50);
1302 }
1303
skcms_ParseWithA2BPriority(const void * buf,size_t len,const int priority[],const int priorities,skcms_ICCProfile * profile)1304 bool skcms_ParseWithA2BPriority(const void* buf, size_t len,
1305 const int priority[], const int priorities,
1306 skcms_ICCProfile* profile) {
1307 static_assert(SAFE_SIZEOF(header_Layout) == 132, "need to update header code");
1308
1309 if (!profile) {
1310 return false;
1311 }
1312 memset(profile, 0, SAFE_SIZEOF(*profile));
1313
1314 if (len < SAFE_SIZEOF(header_Layout)) {
1315 return false;
1316 }
1317
1318 // Byte-swap all header fields
1319 const header_Layout* header = (const header_Layout*)buf;
1320 profile->buffer = (const uint8_t*)buf;
1321 profile->size = read_big_u32(header->size);
1322 uint32_t version = read_big_u32(header->version);
1323 profile->data_color_space = read_big_u32(header->data_color_space);
1324 profile->pcs = read_big_u32(header->pcs);
1325 uint32_t signature = read_big_u32(header->signature);
1326 float illuminant_X = read_big_fixed(header->illuminant_X);
1327 float illuminant_Y = read_big_fixed(header->illuminant_Y);
1328 float illuminant_Z = read_big_fixed(header->illuminant_Z);
1329 profile->tag_count = read_big_u32(header->tag_count);
1330
1331 // Validate signature, size (smaller than buffer, large enough to hold tag table),
1332 // and major version
1333 uint64_t tag_table_size = profile->tag_count * SAFE_SIZEOF(tag_Layout);
1334 if (signature != skcms_Signature_acsp ||
1335 profile->size > len ||
1336 profile->size < SAFE_SIZEOF(header_Layout) + tag_table_size ||
1337 (version >> 24) > 4) {
1338 return false;
1339 }
1340
1341 // Validate that illuminant is D50 white
1342 if (fabsf_(illuminant_X - 0.9642f) > 0.0100f ||
1343 fabsf_(illuminant_Y - 1.0000f) > 0.0100f ||
1344 fabsf_(illuminant_Z - 0.8249f) > 0.0100f) {
1345 return false;
1346 }
1347
1348 // Validate that all tag entries have sane offset + size
1349 const tag_Layout* tags = get_tag_table(profile);
1350 for (uint32_t i = 0; i < profile->tag_count; ++i) {
1351 uint32_t tag_offset = read_big_u32(tags[i].offset);
1352 uint32_t tag_size = read_big_u32(tags[i].size);
1353 uint64_t tag_end = (uint64_t)tag_offset + (uint64_t)tag_size;
1354 if (tag_size < 4 || tag_end > profile->size) {
1355 return false;
1356 }
1357 }
1358
1359 if (profile->pcs != skcms_Signature_XYZ && profile->pcs != skcms_Signature_Lab) {
1360 return false;
1361 }
1362
1363 bool pcs_is_xyz = profile->pcs == skcms_Signature_XYZ;
1364
1365 // Pre-parse commonly used tags.
1366 skcms_ICCTag kTRC;
1367 if (profile->data_color_space == skcms_Signature_Gray &&
1368 skcms_GetTagBySignature(profile, skcms_Signature_kTRC, &kTRC)) {
1369 if (!read_curve(kTRC.buf, kTRC.size, &profile->trc[0], nullptr)) {
1370 // Malformed tag
1371 return false;
1372 }
1373 profile->trc[1] = profile->trc[0];
1374 profile->trc[2] = profile->trc[0];
1375 profile->has_trc = true;
1376
1377 if (pcs_is_xyz) {
1378 profile->toXYZD50.vals[0][0] = illuminant_X;
1379 profile->toXYZD50.vals[1][1] = illuminant_Y;
1380 profile->toXYZD50.vals[2][2] = illuminant_Z;
1381 profile->has_toXYZD50 = true;
1382 }
1383 } else {
1384 skcms_ICCTag rTRC, gTRC, bTRC;
1385 if (skcms_GetTagBySignature(profile, skcms_Signature_rTRC, &rTRC) &&
1386 skcms_GetTagBySignature(profile, skcms_Signature_gTRC, &gTRC) &&
1387 skcms_GetTagBySignature(profile, skcms_Signature_bTRC, &bTRC)) {
1388 if (!read_curve(rTRC.buf, rTRC.size, &profile->trc[0], nullptr) ||
1389 !read_curve(gTRC.buf, gTRC.size, &profile->trc[1], nullptr) ||
1390 !read_curve(bTRC.buf, bTRC.size, &profile->trc[2], nullptr)) {
1391 // Malformed TRC tags
1392 return false;
1393 }
1394 profile->has_trc = true;
1395 }
1396
1397 skcms_ICCTag rXYZ, gXYZ, bXYZ;
1398 if (skcms_GetTagBySignature(profile, skcms_Signature_rXYZ, &rXYZ) &&
1399 skcms_GetTagBySignature(profile, skcms_Signature_gXYZ, &gXYZ) &&
1400 skcms_GetTagBySignature(profile, skcms_Signature_bXYZ, &bXYZ)) {
1401 if (!read_to_XYZD50(&rXYZ, &gXYZ, &bXYZ, &profile->toXYZD50)) {
1402 // Malformed XYZ tags
1403 return false;
1404 }
1405 profile->has_toXYZD50 = true;
1406 }
1407 }
1408
1409 for (int i = 0; i < priorities; i++) {
1410 // enum { perceptual, relative_colormetric, saturation }
1411 if (priority[i] < 0 || priority[i] > 2) {
1412 return false;
1413 }
1414 uint32_t sig = skcms_Signature_A2B0 + static_cast<uint32_t>(priority[i]);
1415 skcms_ICCTag tag;
1416 if (skcms_GetTagBySignature(profile, sig, &tag)) {
1417 if (!read_a2b(&tag, &profile->A2B, pcs_is_xyz)) {
1418 // Malformed A2B tag
1419 return false;
1420 }
1421 profile->has_A2B = true;
1422 break;
1423 }
1424 }
1425
1426 for (int i = 0; i < priorities; i++) {
1427 // enum { perceptual, relative_colormetric, saturation }
1428 if (priority[i] < 0 || priority[i] > 2) {
1429 return false;
1430 }
1431 uint32_t sig = skcms_Signature_B2A0 + static_cast<uint32_t>(priority[i]);
1432 skcms_ICCTag tag;
1433 if (skcms_GetTagBySignature(profile, sig, &tag)) {
1434 if (!read_b2a(&tag, &profile->B2A, pcs_is_xyz)) {
1435 // Malformed B2A tag
1436 return false;
1437 }
1438 profile->has_B2A = true;
1439 break;
1440 }
1441 }
1442
1443 skcms_ICCTag cicp_tag;
1444 if (skcms_GetTagBySignature(profile, skcms_Signature_CICP, &cicp_tag)) {
1445 if (!read_cicp(&cicp_tag, &profile->CICP)) {
1446 // Malformed CICP tag
1447 return false;
1448 }
1449 profile->has_CICP = true;
1450 }
1451
1452 return usable_as_src(profile);
1453 }
1454
1455
skcms_sRGB_profile()1456 const skcms_ICCProfile* skcms_sRGB_profile() {
1457 static const skcms_ICCProfile sRGB_profile = {
1458 nullptr, // buffer, moot here
1459
1460 0, // size, moot here
1461 skcms_Signature_RGB, // data_color_space
1462 skcms_Signature_XYZ, // pcs
1463 0, // tag count, moot here
1464
1465 // We choose to represent sRGB with its canonical transfer function,
1466 // and with its canonical XYZD50 gamut matrix.
1467 { // the 3 trc curves
1468 {{0, {2.4f, (float)(1/1.055), (float)(0.055/1.055), (float)(1/12.92), 0.04045f, 0, 0}}},
1469 {{0, {2.4f, (float)(1/1.055), (float)(0.055/1.055), (float)(1/12.92), 0.04045f, 0, 0}}},
1470 {{0, {2.4f, (float)(1/1.055), (float)(0.055/1.055), (float)(1/12.92), 0.04045f, 0, 0}}},
1471 },
1472
1473 {{ // 3x3 toXYZD50 matrix
1474 { 0.436065674f, 0.385147095f, 0.143066406f },
1475 { 0.222488403f, 0.716873169f, 0.060607910f },
1476 { 0.013916016f, 0.097076416f, 0.714096069f },
1477 }},
1478
1479 { // an empty A2B
1480 { // input_curves
1481 {{0, {0,0, 0,0,0,0,0}}},
1482 {{0, {0,0, 0,0,0,0,0}}},
1483 {{0, {0,0, 0,0,0,0,0}}},
1484 {{0, {0,0, 0,0,0,0,0}}},
1485 },
1486 nullptr, // grid_8
1487 nullptr, // grid_16
1488 0, // input_channels
1489 {0,0,0,0}, // grid_points
1490
1491 { // matrix_curves
1492 {{0, {0,0, 0,0,0,0,0}}},
1493 {{0, {0,0, 0,0,0,0,0}}},
1494 {{0, {0,0, 0,0,0,0,0}}},
1495 },
1496 {{ // matrix (3x4)
1497 { 0,0,0,0 },
1498 { 0,0,0,0 },
1499 { 0,0,0,0 },
1500 }},
1501 0, // matrix_channels
1502
1503 0, // output_channels
1504 { // output_curves
1505 {{0, {0,0, 0,0,0,0,0}}},
1506 {{0, {0,0, 0,0,0,0,0}}},
1507 {{0, {0,0, 0,0,0,0,0}}},
1508 },
1509 },
1510
1511 { // an empty B2A
1512 { // input_curves
1513 {{0, {0,0, 0,0,0,0,0}}},
1514 {{0, {0,0, 0,0,0,0,0}}},
1515 {{0, {0,0, 0,0,0,0,0}}},
1516 },
1517 0, // input_channels
1518
1519 0, // matrix_channels
1520 { // matrix_curves
1521 {{0, {0,0, 0,0,0,0,0}}},
1522 {{0, {0,0, 0,0,0,0,0}}},
1523 {{0, {0,0, 0,0,0,0,0}}},
1524 },
1525 {{ // matrix (3x4)
1526 { 0,0,0,0 },
1527 { 0,0,0,0 },
1528 { 0,0,0,0 },
1529 }},
1530
1531 { // output_curves
1532 {{0, {0,0, 0,0,0,0,0}}},
1533 {{0, {0,0, 0,0,0,0,0}}},
1534 {{0, {0,0, 0,0,0,0,0}}},
1535 {{0, {0,0, 0,0,0,0,0}}},
1536 },
1537 nullptr, // grid_8
1538 nullptr, // grid_16
1539 {0,0,0,0}, // grid_points
1540 0, // output_channels
1541 },
1542
1543 { 0, 0, 0, 0 }, // an empty CICP
1544
1545 true, // has_trc
1546 true, // has_toXYZD50
1547 false, // has_A2B
1548 false, // has B2A
1549 false, // has_CICP
1550 };
1551 return &sRGB_profile;
1552 }
1553
skcms_XYZD50_profile()1554 const skcms_ICCProfile* skcms_XYZD50_profile() {
1555 // Just like sRGB above, but with identity transfer functions and toXYZD50 matrix.
1556 static const skcms_ICCProfile XYZD50_profile = {
1557 nullptr, // buffer, moot here
1558
1559 0, // size, moot here
1560 skcms_Signature_RGB, // data_color_space
1561 skcms_Signature_XYZ, // pcs
1562 0, // tag count, moot here
1563
1564 { // the 3 trc curves
1565 {{0, {1,1, 0,0,0,0,0}}},
1566 {{0, {1,1, 0,0,0,0,0}}},
1567 {{0, {1,1, 0,0,0,0,0}}},
1568 },
1569
1570 {{ // 3x3 toXYZD50 matrix
1571 { 1,0,0 },
1572 { 0,1,0 },
1573 { 0,0,1 },
1574 }},
1575
1576 { // an empty A2B
1577 { // input_curves
1578 {{0, {0,0, 0,0,0,0,0}}},
1579 {{0, {0,0, 0,0,0,0,0}}},
1580 {{0, {0,0, 0,0,0,0,0}}},
1581 {{0, {0,0, 0,0,0,0,0}}},
1582 },
1583 nullptr, // grid_8
1584 nullptr, // grid_16
1585 0, // input_channels
1586 {0,0,0,0}, // grid_points
1587
1588 { // matrix_curves
1589 {{0, {0,0, 0,0,0,0,0}}},
1590 {{0, {0,0, 0,0,0,0,0}}},
1591 {{0, {0,0, 0,0,0,0,0}}},
1592 },
1593 {{ // matrix (3x4)
1594 { 0,0,0,0 },
1595 { 0,0,0,0 },
1596 { 0,0,0,0 },
1597 }},
1598 0, // matrix_channels
1599
1600 0, // output_channels
1601 { // output_curves
1602 {{0, {0,0, 0,0,0,0,0}}},
1603 {{0, {0,0, 0,0,0,0,0}}},
1604 {{0, {0,0, 0,0,0,0,0}}},
1605 },
1606 },
1607
1608 { // an empty B2A
1609 { // input_curves
1610 {{0, {0,0, 0,0,0,0,0}}},
1611 {{0, {0,0, 0,0,0,0,0}}},
1612 {{0, {0,0, 0,0,0,0,0}}},
1613 },
1614 0, // input_channels
1615
1616 0, // matrix_channels
1617 { // matrix_curves
1618 {{0, {0,0, 0,0,0,0,0}}},
1619 {{0, {0,0, 0,0,0,0,0}}},
1620 {{0, {0,0, 0,0,0,0,0}}},
1621 },
1622 {{ // matrix (3x4)
1623 { 0,0,0,0 },
1624 { 0,0,0,0 },
1625 { 0,0,0,0 },
1626 }},
1627
1628 { // output_curves
1629 {{0, {0,0, 0,0,0,0,0}}},
1630 {{0, {0,0, 0,0,0,0,0}}},
1631 {{0, {0,0, 0,0,0,0,0}}},
1632 {{0, {0,0, 0,0,0,0,0}}},
1633 },
1634 nullptr, // grid_8
1635 nullptr, // grid_16
1636 {0,0,0,0}, // grid_points
1637 0, // output_channels
1638 },
1639
1640 { 0, 0, 0, 0 }, // an empty CICP
1641
1642 true, // has_trc
1643 true, // has_toXYZD50
1644 false, // has_A2B
1645 false, // has B2A
1646 false, // has_CICP
1647 };
1648
1649 return &XYZD50_profile;
1650 }
1651
skcms_sRGB_TransferFunction()1652 const skcms_TransferFunction* skcms_sRGB_TransferFunction() {
1653 return &skcms_sRGB_profile()->trc[0].parametric;
1654 }
1655
skcms_sRGB_Inverse_TransferFunction()1656 const skcms_TransferFunction* skcms_sRGB_Inverse_TransferFunction() {
1657 static const skcms_TransferFunction sRGB_inv =
1658 {0.416666657f, 1.137283325f, -0.0f, 12.920000076f, 0.003130805f, -0.054969788f, -0.0f};
1659 return &sRGB_inv;
1660 }
1661
skcms_Identity_TransferFunction()1662 const skcms_TransferFunction* skcms_Identity_TransferFunction() {
1663 static const skcms_TransferFunction identity = {1,1,0,0,0,0,0};
1664 return &identity;
1665 }
1666
1667 const uint8_t skcms_252_random_bytes[] = {
1668 8, 179, 128, 204, 253, 38, 134, 184, 68, 102, 32, 138, 99, 39, 169, 215,
1669 119, 26, 3, 223, 95, 239, 52, 132, 114, 74, 81, 234, 97, 116, 244, 205, 30,
1670 154, 173, 12, 51, 159, 122, 153, 61, 226, 236, 178, 229, 55, 181, 220, 191,
1671 194, 160, 126, 168, 82, 131, 18, 180, 245, 163, 22, 246, 69, 235, 252, 57,
1672 108, 14, 6, 152, 240, 255, 171, 242, 20, 227, 177, 238, 96, 85, 16, 211,
1673 70, 200, 149, 155, 146, 127, 145, 100, 151, 109, 19, 165, 208, 195, 164,
1674 137, 254, 182, 248, 64, 201, 45, 209, 5, 147, 207, 210, 113, 162, 83, 225,
1675 9, 31, 15, 231, 115, 37, 58, 53, 24, 49, 197, 56, 120, 172, 48, 21, 214,
1676 129, 111, 11, 50, 187, 196, 34, 60, 103, 71, 144, 47, 203, 77, 80, 232,
1677 140, 222, 250, 206, 166, 247, 139, 249, 221, 72, 106, 27, 199, 117, 54,
1678 219, 135, 118, 40, 79, 41, 251, 46, 93, 212, 92, 233, 148, 28, 121, 63,
1679 123, 158, 105, 59, 29, 42, 143, 23, 0, 107, 176, 87, 104, 183, 156, 193,
1680 189, 90, 188, 65, 190, 17, 198, 7, 186, 161, 1, 124, 78, 125, 170, 133,
1681 174, 218, 67, 157, 75, 101, 89, 217, 62, 33, 141, 228, 25, 35, 91, 230, 4,
1682 2, 13, 73, 86, 167, 237, 84, 243, 44, 185, 66, 130, 110, 150, 142, 216, 88,
1683 112, 36, 224, 136, 202, 76, 94, 98, 175, 213
1684 };
1685
skcms_ApproximatelyEqualProfiles(const skcms_ICCProfile * A,const skcms_ICCProfile * B)1686 bool skcms_ApproximatelyEqualProfiles(const skcms_ICCProfile* A, const skcms_ICCProfile* B) {
1687 // Test for exactly equal profiles first.
1688 if (A == B || 0 == memcmp(A,B, sizeof(skcms_ICCProfile))) {
1689 return true;
1690 }
1691
1692 // For now this is the essentially the same strategy we use in test_only.c
1693 // for our skcms_Transform() smoke tests:
1694 // 1) transform A to XYZD50
1695 // 2) transform B to XYZD50
1696 // 3) return true if they're similar enough
1697 // Our current criterion in 3) is maximum 1 bit error per XYZD50 byte.
1698
1699 // skcms_252_random_bytes are 252 of a random shuffle of all possible bytes.
1700 // 252 is evenly divisible by 3 and 4. Only 192, 10, 241, and 43 are missing.
1701
1702 // We want to allow otherwise equivalent profiles tagged as grayscale and RGB
1703 // to be treated as equal. But CMYK profiles are a totally different ballgame.
1704 const auto CMYK = skcms_Signature_CMYK;
1705 if ((A->data_color_space == CMYK) != (B->data_color_space == CMYK)) {
1706 return false;
1707 }
1708
1709 // Interpret as RGB_888 if data color space is RGB or GRAY, RGBA_8888 if CMYK.
1710 // TODO: working with RGBA_8888 either way is probably fastest.
1711 skcms_PixelFormat fmt = skcms_PixelFormat_RGB_888;
1712 size_t npixels = 84;
1713 if (A->data_color_space == skcms_Signature_CMYK) {
1714 fmt = skcms_PixelFormat_RGBA_8888;
1715 npixels = 63;
1716 }
1717
1718 // TODO: if A or B is a known profile (skcms_sRGB_profile, skcms_XYZD50_profile),
1719 // use pre-canned results and skip that skcms_Transform() call?
1720 uint8_t dstA[252],
1721 dstB[252];
1722 if (!skcms_Transform(
1723 skcms_252_random_bytes, fmt, skcms_AlphaFormat_Unpremul, A,
1724 dstA, skcms_PixelFormat_RGB_888, skcms_AlphaFormat_Unpremul, skcms_XYZD50_profile(),
1725 npixels)) {
1726 return false;
1727 }
1728 if (!skcms_Transform(
1729 skcms_252_random_bytes, fmt, skcms_AlphaFormat_Unpremul, B,
1730 dstB, skcms_PixelFormat_RGB_888, skcms_AlphaFormat_Unpremul, skcms_XYZD50_profile(),
1731 npixels)) {
1732 return false;
1733 }
1734
1735 // TODO: make sure this final check has reasonable codegen.
1736 for (size_t i = 0; i < 252; i++) {
1737 if (abs((int)dstA[i] - (int)dstB[i]) > 1) {
1738 return false;
1739 }
1740 }
1741 return true;
1742 }
1743
skcms_TRCs_AreApproximateInverse(const skcms_ICCProfile * profile,const skcms_TransferFunction * inv_tf)1744 bool skcms_TRCs_AreApproximateInverse(const skcms_ICCProfile* profile,
1745 const skcms_TransferFunction* inv_tf) {
1746 if (!profile || !profile->has_trc) {
1747 return false;
1748 }
1749
1750 return skcms_AreApproximateInverses(&profile->trc[0], inv_tf) &&
1751 skcms_AreApproximateInverses(&profile->trc[1], inv_tf) &&
1752 skcms_AreApproximateInverses(&profile->trc[2], inv_tf);
1753 }
1754
is_zero_to_one(float x)1755 static bool is_zero_to_one(float x) {
1756 return 0 <= x && x <= 1;
1757 }
1758
1759 typedef struct { float vals[3]; } skcms_Vector3;
1760
mv_mul(const skcms_Matrix3x3 * m,const skcms_Vector3 * v)1761 static skcms_Vector3 mv_mul(const skcms_Matrix3x3* m, const skcms_Vector3* v) {
1762 skcms_Vector3 dst = {{0,0,0}};
1763 for (int row = 0; row < 3; ++row) {
1764 dst.vals[row] = m->vals[row][0] * v->vals[0]
1765 + m->vals[row][1] * v->vals[1]
1766 + m->vals[row][2] * v->vals[2];
1767 }
1768 return dst;
1769 }
1770
skcms_AdaptToXYZD50(float wx,float wy,skcms_Matrix3x3 * toXYZD50)1771 bool skcms_AdaptToXYZD50(float wx, float wy,
1772 skcms_Matrix3x3* toXYZD50) {
1773 if (!is_zero_to_one(wx) || !is_zero_to_one(wy) ||
1774 !toXYZD50) {
1775 return false;
1776 }
1777
1778 // Assumes that Y is 1.0f.
1779 skcms_Vector3 wXYZ = { { wx / wy, 1, (1 - wx - wy) / wy } };
1780
1781 // Now convert toXYZ matrix to toXYZD50.
1782 skcms_Vector3 wXYZD50 = { { 0.96422f, 1.0f, 0.82521f } };
1783
1784 // Calculate the chromatic adaptation matrix. We will use the Bradford method, thus
1785 // the matrices below. The Bradford method is used by Adobe and is widely considered
1786 // to be the best.
1787 skcms_Matrix3x3 xyz_to_lms = {{
1788 { 0.8951f, 0.2664f, -0.1614f },
1789 { -0.7502f, 1.7135f, 0.0367f },
1790 { 0.0389f, -0.0685f, 1.0296f },
1791 }};
1792 skcms_Matrix3x3 lms_to_xyz = {{
1793 { 0.9869929f, -0.1470543f, 0.1599627f },
1794 { 0.4323053f, 0.5183603f, 0.0492912f },
1795 { -0.0085287f, 0.0400428f, 0.9684867f },
1796 }};
1797
1798 skcms_Vector3 srcCone = mv_mul(&xyz_to_lms, &wXYZ);
1799 skcms_Vector3 dstCone = mv_mul(&xyz_to_lms, &wXYZD50);
1800
1801 *toXYZD50 = {{
1802 { dstCone.vals[0] / srcCone.vals[0], 0, 0 },
1803 { 0, dstCone.vals[1] / srcCone.vals[1], 0 },
1804 { 0, 0, dstCone.vals[2] / srcCone.vals[2] },
1805 }};
1806 *toXYZD50 = skcms_Matrix3x3_concat(toXYZD50, &xyz_to_lms);
1807 *toXYZD50 = skcms_Matrix3x3_concat(&lms_to_xyz, toXYZD50);
1808
1809 return true;
1810 }
1811
skcms_PrimariesToXYZD50(float rx,float ry,float gx,float gy,float bx,float by,float wx,float wy,skcms_Matrix3x3 * toXYZD50)1812 bool skcms_PrimariesToXYZD50(float rx, float ry,
1813 float gx, float gy,
1814 float bx, float by,
1815 float wx, float wy,
1816 skcms_Matrix3x3* toXYZD50) {
1817 if (!is_zero_to_one(rx) || !is_zero_to_one(ry) ||
1818 !is_zero_to_one(gx) || !is_zero_to_one(gy) ||
1819 !is_zero_to_one(bx) || !is_zero_to_one(by) ||
1820 !is_zero_to_one(wx) || !is_zero_to_one(wy) ||
1821 !toXYZD50) {
1822 return false;
1823 }
1824
1825 // First, we need to convert xy values (primaries) to XYZ.
1826 skcms_Matrix3x3 primaries = {{
1827 { rx, gx, bx },
1828 { ry, gy, by },
1829 { 1 - rx - ry, 1 - gx - gy, 1 - bx - by },
1830 }};
1831 skcms_Matrix3x3 primaries_inv;
1832 if (!skcms_Matrix3x3_invert(&primaries, &primaries_inv)) {
1833 return false;
1834 }
1835
1836 // Assumes that Y is 1.0f.
1837 skcms_Vector3 wXYZ = { { wx / wy, 1, (1 - wx - wy) / wy } };
1838 skcms_Vector3 XYZ = mv_mul(&primaries_inv, &wXYZ);
1839
1840 skcms_Matrix3x3 toXYZ = {{
1841 { XYZ.vals[0], 0, 0 },
1842 { 0, XYZ.vals[1], 0 },
1843 { 0, 0, XYZ.vals[2] },
1844 }};
1845 toXYZ = skcms_Matrix3x3_concat(&primaries, &toXYZ);
1846
1847 skcms_Matrix3x3 DXtoD50;
1848 if (!skcms_AdaptToXYZD50(wx, wy, &DXtoD50)) {
1849 return false;
1850 }
1851
1852 *toXYZD50 = skcms_Matrix3x3_concat(&DXtoD50, &toXYZ);
1853 return true;
1854 }
1855
1856
skcms_Matrix3x3_invert(const skcms_Matrix3x3 * src,skcms_Matrix3x3 * dst)1857 bool skcms_Matrix3x3_invert(const skcms_Matrix3x3* src, skcms_Matrix3x3* dst) {
1858 double a00 = src->vals[0][0],
1859 a01 = src->vals[1][0],
1860 a02 = src->vals[2][0],
1861 a10 = src->vals[0][1],
1862 a11 = src->vals[1][1],
1863 a12 = src->vals[2][1],
1864 a20 = src->vals[0][2],
1865 a21 = src->vals[1][2],
1866 a22 = src->vals[2][2];
1867
1868 double b0 = a00*a11 - a01*a10,
1869 b1 = a00*a12 - a02*a10,
1870 b2 = a01*a12 - a02*a11,
1871 b3 = a20,
1872 b4 = a21,
1873 b5 = a22;
1874
1875 double determinant = b0*b5
1876 - b1*b4
1877 + b2*b3;
1878
1879 if (determinant == 0) {
1880 return false;
1881 }
1882
1883 double invdet = 1.0 / determinant;
1884 if (invdet > +FLT_MAX || invdet < -FLT_MAX || !isfinitef_((float)invdet)) {
1885 return false;
1886 }
1887
1888 b0 *= invdet;
1889 b1 *= invdet;
1890 b2 *= invdet;
1891 b3 *= invdet;
1892 b4 *= invdet;
1893 b5 *= invdet;
1894
1895 dst->vals[0][0] = (float)( a11*b5 - a12*b4 );
1896 dst->vals[1][0] = (float)( a02*b4 - a01*b5 );
1897 dst->vals[2][0] = (float)( + b2 );
1898 dst->vals[0][1] = (float)( a12*b3 - a10*b5 );
1899 dst->vals[1][1] = (float)( a00*b5 - a02*b3 );
1900 dst->vals[2][1] = (float)( - b1 );
1901 dst->vals[0][2] = (float)( a10*b4 - a11*b3 );
1902 dst->vals[1][2] = (float)( a01*b3 - a00*b4 );
1903 dst->vals[2][2] = (float)( + b0 );
1904
1905 for (int r = 0; r < 3; ++r)
1906 for (int c = 0; c < 3; ++c) {
1907 if (!isfinitef_(dst->vals[r][c])) {
1908 return false;
1909 }
1910 }
1911 return true;
1912 }
1913
skcms_Matrix3x3_concat(const skcms_Matrix3x3 * A,const skcms_Matrix3x3 * B)1914 skcms_Matrix3x3 skcms_Matrix3x3_concat(const skcms_Matrix3x3* A, const skcms_Matrix3x3* B) {
1915 skcms_Matrix3x3 m = { { { 0,0,0 },{ 0,0,0 },{ 0,0,0 } } };
1916 for (int r = 0; r < 3; r++)
1917 for (int c = 0; c < 3; c++) {
1918 m.vals[r][c] = A->vals[r][0] * B->vals[0][c]
1919 + A->vals[r][1] * B->vals[1][c]
1920 + A->vals[r][2] * B->vals[2][c];
1921 }
1922 return m;
1923 }
1924
1925 #if defined(__clang__)
1926 [[clang::no_sanitize("float-divide-by-zero")]] // Checked for by classify() on the way out.
1927 #endif
skcms_TransferFunction_invert(const skcms_TransferFunction * src,skcms_TransferFunction * dst)1928 bool skcms_TransferFunction_invert(const skcms_TransferFunction* src, skcms_TransferFunction* dst) {
1929 TF_PQish pq;
1930 TF_HLGish hlg;
1931 switch (classify(*src, &pq, &hlg)) {
1932 case skcms_TFType_Invalid: return false;
1933 case skcms_TFType_sRGBish: break; // handled below
1934
1935 case skcms_TFType_PQish:
1936 *dst = { TFKind_marker(skcms_TFType_PQish), -pq.A, pq.D, 1.0f/pq.F
1937 , pq.B, -pq.E, 1.0f/pq.C};
1938 return true;
1939
1940 case skcms_TFType_HLGish:
1941 *dst = { TFKind_marker(skcms_TFType_HLGinvish), 1.0f/hlg.R, 1.0f/hlg.G
1942 , 1.0f/hlg.a, hlg.b, hlg.c
1943 , hlg.K_minus_1 };
1944 return true;
1945
1946 case skcms_TFType_HLGinvish:
1947 *dst = { TFKind_marker(skcms_TFType_HLGish), 1.0f/hlg.R, 1.0f/hlg.G
1948 , 1.0f/hlg.a, hlg.b, hlg.c
1949 , hlg.K_minus_1 };
1950 return true;
1951 }
1952
1953 assert (classify(*src) == skcms_TFType_sRGBish);
1954
1955 // We're inverting this function, solving for x in terms of y.
1956 // y = (cx + f) x < d
1957 // (ax + b)^g + e x ≥ d
1958 // The inverse of this function can be expressed in the same piecewise form.
1959 skcms_TransferFunction inv = {0,0,0,0,0,0,0};
1960
1961 // We'll start by finding the new threshold inv.d.
1962 // In principle we should be able to find that by solving for y at x=d from either side.
1963 // (If those two d values aren't the same, it's a discontinuous transfer function.)
1964 float d_l = src->c * src->d + src->f,
1965 d_r = powf_(src->a * src->d + src->b, src->g) + src->e;
1966 if (fabsf_(d_l - d_r) > 1/512.0f) {
1967 return false;
1968 }
1969 inv.d = d_l; // TODO(mtklein): better in practice to choose d_r?
1970
1971 // When d=0, the linear section collapses to a point. We leave c,d,f all zero in that case.
1972 if (inv.d > 0) {
1973 // Inverting the linear section is pretty straightfoward:
1974 // y = cx + f
1975 // y - f = cx
1976 // (1/c)y - f/c = x
1977 inv.c = 1.0f/src->c;
1978 inv.f = -src->f/src->c;
1979 }
1980
1981 // The interesting part is inverting the nonlinear section:
1982 // y = (ax + b)^g + e.
1983 // y - e = (ax + b)^g
1984 // (y - e)^1/g = ax + b
1985 // (y - e)^1/g - b = ax
1986 // (1/a)(y - e)^1/g - b/a = x
1987 //
1988 // To make that fit our form, we need to move the (1/a) term inside the exponentiation:
1989 // let k = (1/a)^g
1990 // (1/a)( y - e)^1/g - b/a = x
1991 // (ky - ke)^1/g - b/a = x
1992
1993 float k = powf_(src->a, -src->g); // (1/a)^g == a^-g
1994 inv.g = 1.0f / src->g;
1995 inv.a = k;
1996 inv.b = -k * src->e;
1997 inv.e = -src->b / src->a;
1998
1999 // We need to enforce the same constraints here that we do when fitting a curve,
2000 // a >= 0 and ad+b >= 0. These constraints are checked by classify(), so they're true
2001 // of the source function if we're here.
2002
2003 // Just like when fitting the curve, there's really no way to rescue a < 0.
2004 if (inv.a < 0) {
2005 return false;
2006 }
2007 // On the other hand we can rescue an ad+b that's gone slightly negative here.
2008 if (inv.a * inv.d + inv.b < 0) {
2009 inv.b = -inv.a * inv.d;
2010 }
2011
2012 // That should usually make classify(inv) == sRGBish true, but there are a couple situations
2013 // where we might still fail here, like non-finite parameter values.
2014 if (classify(inv) != skcms_TFType_sRGBish) {
2015 return false;
2016 }
2017
2018 assert (inv.a >= 0);
2019 assert (inv.a * inv.d + inv.b >= 0);
2020
2021 // Now in principle we're done.
2022 // But to preserve the valuable invariant inv(src(1.0f)) == 1.0f, we'll tweak
2023 // e or f of the inverse, depending on which segment contains src(1.0f).
2024 float s = skcms_TransferFunction_eval(src, 1.0f);
2025 if (!isfinitef_(s)) {
2026 return false;
2027 }
2028
2029 float sign = s < 0 ? -1.0f : 1.0f;
2030 s *= sign;
2031 if (s < inv.d) {
2032 inv.f = 1.0f - sign * inv.c * s;
2033 } else {
2034 inv.e = 1.0f - sign * powf_(inv.a * s + inv.b, inv.g);
2035 }
2036
2037 *dst = inv;
2038 return classify(*dst) == skcms_TFType_sRGBish;
2039 }
2040
2041 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ //
2042
2043 // From here below we're approximating an skcms_Curve with an skcms_TransferFunction{g,a,b,c,d,e,f}:
2044 //
2045 // tf(x) = cx + f x < d
2046 // tf(x) = (ax + b)^g + e x ≥ d
2047 //
2048 // When fitting, we add the additional constraint that both pieces meet at d:
2049 //
2050 // cd + f = (ad + b)^g + e
2051 //
2052 // Solving for e and folding it through gives an alternate formulation of the non-linear piece:
2053 //
2054 // tf(x) = cx + f x < d
2055 // tf(x) = (ax + b)^g - (ad + b)^g + cd + f x ≥ d
2056 //
2057 // Our overall strategy is then:
2058 // For a couple tolerances,
2059 // - fit_linear(): fit c,d,f iteratively to as many points as our tolerance allows
2060 // - invert c,d,f
2061 // - fit_nonlinear(): fit g,a,b using Gauss-Newton given those inverted c,d,f
2062 // (and by constraint, inverted e) to the inverse of the table.
2063 // Return the parameters with least maximum error.
2064 //
2065 // To run Gauss-Newton to find g,a,b, we'll also need the gradient of the residuals
2066 // of round-trip f_inv(x), the inverse of the non-linear piece of f(x).
2067 //
2068 // let y = Table(x)
2069 // r(x) = x - f_inv(y)
2070 //
2071 // ∂r/∂g = ln(ay + b)*(ay + b)^g
2072 // - ln(ad + b)*(ad + b)^g
2073 // ∂r/∂a = yg(ay + b)^(g-1)
2074 // - dg(ad + b)^(g-1)
2075 // ∂r/∂b = g(ay + b)^(g-1)
2076 // - g(ad + b)^(g-1)
2077
2078 // Return the residual of roundtripping skcms_Curve(x) through f_inv(y) with parameters P,
2079 // and fill out the gradient of the residual into dfdP.
rg_nonlinear(float x,const skcms_Curve * curve,const skcms_TransferFunction * tf,float dfdP[3])2080 static float rg_nonlinear(float x,
2081 const skcms_Curve* curve,
2082 const skcms_TransferFunction* tf,
2083 float dfdP[3]) {
2084 const float y = eval_curve(curve, x);
2085
2086 const float g = tf->g, a = tf->a, b = tf->b,
2087 c = tf->c, d = tf->d, f = tf->f;
2088
2089 const float Y = fmaxf_(a*y + b, 0.0f),
2090 D = a*d + b;
2091 assert (D >= 0);
2092
2093 // The gradient.
2094 dfdP[0] = logf_(Y)*powf_(Y, g)
2095 - logf_(D)*powf_(D, g);
2096 dfdP[1] = y*g*powf_(Y, g-1)
2097 - d*g*powf_(D, g-1);
2098 dfdP[2] = g*powf_(Y, g-1)
2099 - g*powf_(D, g-1);
2100
2101 // The residual.
2102 const float f_inv = powf_(Y, g)
2103 - powf_(D, g)
2104 + c*d + f;
2105 return x - f_inv;
2106 }
2107
gauss_newton_step(const skcms_Curve * curve,skcms_TransferFunction * tf,float x0,float dx,int N)2108 static bool gauss_newton_step(const skcms_Curve* curve,
2109 skcms_TransferFunction* tf,
2110 float x0, float dx, int N) {
2111 // We'll sample x from the range [x0,x1] (both inclusive) N times with even spacing.
2112 //
2113 // Let P = [ tf->g, tf->a, tf->b ] (the three terms that we're adjusting).
2114 //
2115 // We want to do P' = P + (Jf^T Jf)^-1 Jf^T r(P),
2116 // where r(P) is the residual vector
2117 // and Jf is the Jacobian matrix of f(), ∂r/∂P.
2118 //
2119 // Let's review the shape of each of these expressions:
2120 // r(P) is [N x 1], a column vector with one entry per value of x tested
2121 // Jf is [N x 3], a matrix with an entry for each (x,P) pair
2122 // Jf^T is [3 x N], the transpose of Jf
2123 //
2124 // Jf^T Jf is [3 x N] * [N x 3] == [3 x 3], a 3x3 matrix,
2125 // and so is its inverse (Jf^T Jf)^-1
2126 // Jf^T r(P) is [3 x N] * [N x 1] == [3 x 1], a column vector with the same shape as P
2127 //
2128 // Our implementation strategy to get to the final ∆P is
2129 // 1) evaluate Jf^T Jf, call that lhs
2130 // 2) evaluate Jf^T r(P), call that rhs
2131 // 3) invert lhs
2132 // 4) multiply inverse lhs by rhs
2133 //
2134 // This is a friendly implementation strategy because we don't have to have any
2135 // buffers that scale with N, and equally nice don't have to perform any matrix
2136 // operations that are variable size.
2137 //
2138 // Other implementation strategies could trade this off, e.g. evaluating the
2139 // pseudoinverse of Jf ( (Jf^T Jf)^-1 Jf^T ) directly, then multiplying that by
2140 // the residuals. That would probably require implementing singular value
2141 // decomposition, and would create a [3 x N] matrix to be multiplied by the
2142 // [N x 1] residual vector, but on the upside I think that'd eliminate the
2143 // possibility of this gauss_newton_step() function ever failing.
2144
2145 // 0) start off with lhs and rhs safely zeroed.
2146 skcms_Matrix3x3 lhs = {{ {0,0,0}, {0,0,0}, {0,0,0} }};
2147 skcms_Vector3 rhs = { {0,0,0} };
2148
2149 // 1,2) evaluate lhs and evaluate rhs
2150 // We want to evaluate Jf only once, but both lhs and rhs involve Jf^T,
2151 // so we'll have to update lhs and rhs at the same time.
2152 for (int i = 0; i < N; i++) {
2153 float x = x0 + static_cast<float>(i)*dx;
2154
2155 float dfdP[3] = {0,0,0};
2156 float resid = rg_nonlinear(x,curve,tf, dfdP);
2157
2158 for (int r = 0; r < 3; r++) {
2159 for (int c = 0; c < 3; c++) {
2160 lhs.vals[r][c] += dfdP[r] * dfdP[c];
2161 }
2162 rhs.vals[r] += dfdP[r] * resid;
2163 }
2164 }
2165
2166 // If any of the 3 P parameters are unused, this matrix will be singular.
2167 // Detect those cases and fix them up to indentity instead, so we can invert.
2168 for (int k = 0; k < 3; k++) {
2169 if (lhs.vals[0][k]==0 && lhs.vals[1][k]==0 && lhs.vals[2][k]==0 &&
2170 lhs.vals[k][0]==0 && lhs.vals[k][1]==0 && lhs.vals[k][2]==0) {
2171 lhs.vals[k][k] = 1;
2172 }
2173 }
2174
2175 // 3) invert lhs
2176 skcms_Matrix3x3 lhs_inv;
2177 if (!skcms_Matrix3x3_invert(&lhs, &lhs_inv)) {
2178 return false;
2179 }
2180
2181 // 4) multiply inverse lhs by rhs
2182 skcms_Vector3 dP = mv_mul(&lhs_inv, &rhs);
2183 tf->g += dP.vals[0];
2184 tf->a += dP.vals[1];
2185 tf->b += dP.vals[2];
2186 return isfinitef_(tf->g) && isfinitef_(tf->a) && isfinitef_(tf->b);
2187 }
2188
max_roundtrip_error_checked(const skcms_Curve * curve,const skcms_TransferFunction * tf_inv)2189 static float max_roundtrip_error_checked(const skcms_Curve* curve,
2190 const skcms_TransferFunction* tf_inv) {
2191 skcms_TransferFunction tf;
2192 if (!skcms_TransferFunction_invert(tf_inv, &tf) || skcms_TFType_sRGBish != classify(tf)) {
2193 return INFINITY_;
2194 }
2195
2196 skcms_TransferFunction tf_inv_again;
2197 if (!skcms_TransferFunction_invert(&tf, &tf_inv_again)) {
2198 return INFINITY_;
2199 }
2200
2201 return skcms_MaxRoundtripError(curve, &tf_inv_again);
2202 }
2203
2204 // Fit the points in [L,N) to the non-linear piece of tf, or return false if we can't.
fit_nonlinear(const skcms_Curve * curve,int L,int N,skcms_TransferFunction * tf)2205 static bool fit_nonlinear(const skcms_Curve* curve, int L, int N, skcms_TransferFunction* tf) {
2206 // This enforces a few constraints that are not modeled in gauss_newton_step()'s optimization.
2207 auto fixup_tf = [tf]() {
2208 // a must be non-negative. That ensures the function is monotonically increasing.
2209 // We don't really know how to fix up a if it goes negative.
2210 if (tf->a < 0) {
2211 return false;
2212 }
2213 // ad+b must be non-negative. That ensures we don't end up with complex numbers in powf.
2214 // We feel just barely not uneasy enough to tweak b so ad+b is zero in this case.
2215 if (tf->a * tf->d + tf->b < 0) {
2216 tf->b = -tf->a * tf->d;
2217 }
2218 assert (tf->a >= 0 &&
2219 tf->a * tf->d + tf->b >= 0);
2220
2221 // cd+f must be ~= (ad+b)^g+e. That ensures the function is continuous. We keep e as a free
2222 // parameter so we can guarantee this.
2223 tf->e = tf->c*tf->d + tf->f
2224 - powf_(tf->a*tf->d + tf->b, tf->g);
2225
2226 return isfinitef_(tf->e);
2227 };
2228
2229 if (!fixup_tf()) {
2230 return false;
2231 }
2232
2233 // No matter where we start, dx should always represent N even steps from 0 to 1.
2234 const float dx = 1.0f / static_cast<float>(N-1);
2235
2236 skcms_TransferFunction best_tf = *tf;
2237 float best_max_error = INFINITY_;
2238
2239 // Need this or several curves get worse... *sigh*
2240 float init_error = max_roundtrip_error_checked(curve, tf);
2241 if (init_error < best_max_error) {
2242 best_max_error = init_error;
2243 best_tf = *tf;
2244 }
2245
2246 // As far as we can tell, 1 Gauss-Newton step won't converge, and 3 steps is no better than 2.
2247 for (int j = 0; j < 8; j++) {
2248 if (!gauss_newton_step(curve, tf, static_cast<float>(L)*dx, dx, N-L) || !fixup_tf()) {
2249 *tf = best_tf;
2250 return isfinitef_(best_max_error);
2251 }
2252
2253 float max_error = max_roundtrip_error_checked(curve, tf);
2254 if (max_error < best_max_error) {
2255 best_max_error = max_error;
2256 best_tf = *tf;
2257 }
2258 }
2259
2260 *tf = best_tf;
2261 return isfinitef_(best_max_error);
2262 }
2263
skcms_ApproximateCurve(const skcms_Curve * curve,skcms_TransferFunction * approx,float * max_error)2264 bool skcms_ApproximateCurve(const skcms_Curve* curve,
2265 skcms_TransferFunction* approx,
2266 float* max_error) {
2267 if (!curve || !approx || !max_error) {
2268 return false;
2269 }
2270
2271 if (curve->table_entries == 0) {
2272 // No point approximating an skcms_TransferFunction with an skcms_TransferFunction!
2273 return false;
2274 }
2275
2276 if (curve->table_entries == 1 || curve->table_entries > (uint32_t)INT_MAX) {
2277 // We need at least two points, and must put some reasonable cap on the maximum number.
2278 return false;
2279 }
2280
2281 int N = (int)curve->table_entries;
2282 const float dx = 1.0f / static_cast<float>(N - 1);
2283
2284 *max_error = INFINITY_;
2285 const float kTolerances[] = { 1.5f / 65535.0f, 1.0f / 512.0f };
2286 for (int t = 0; t < ARRAY_COUNT(kTolerances); t++) {
2287 skcms_TransferFunction tf,
2288 tf_inv;
2289
2290 // It's problematic to fit curves with non-zero f, so always force it to zero explicitly.
2291 tf.f = 0.0f;
2292 int L = fit_linear(curve, N, kTolerances[t], &tf.c, &tf.d);
2293
2294 if (L == N) {
2295 // If the entire data set was linear, move the coefficients to the nonlinear portion
2296 // with G == 1. This lets use a canonical representation with d == 0.
2297 tf.g = 1;
2298 tf.a = tf.c;
2299 tf.b = tf.f;
2300 tf.c = tf.d = tf.e = tf.f = 0;
2301 } else if (L == N - 1) {
2302 // Degenerate case with only two points in the nonlinear segment. Solve directly.
2303 tf.g = 1;
2304 tf.a = (eval_curve(curve, static_cast<float>(N-1)*dx) -
2305 eval_curve(curve, static_cast<float>(N-2)*dx))
2306 / dx;
2307 tf.b = eval_curve(curve, static_cast<float>(N-2)*dx)
2308 - tf.a * static_cast<float>(N-2)*dx;
2309 tf.e = 0;
2310 } else {
2311 // Start by guessing a gamma-only curve through the midpoint.
2312 int mid = (L + N) / 2;
2313 float mid_x = static_cast<float>(mid) / static_cast<float>(N - 1);
2314 float mid_y = eval_curve(curve, mid_x);
2315 tf.g = log2f_(mid_y) / log2f_(mid_x);
2316 tf.a = 1;
2317 tf.b = 0;
2318 tf.e = tf.c*tf.d + tf.f
2319 - powf_(tf.a*tf.d + tf.b, tf.g);
2320
2321
2322 if (!skcms_TransferFunction_invert(&tf, &tf_inv) ||
2323 !fit_nonlinear(curve, L,N, &tf_inv)) {
2324 continue;
2325 }
2326
2327 // We fit tf_inv, so calculate tf to keep in sync.
2328 // fit_nonlinear() should guarantee invertibility.
2329 if (!skcms_TransferFunction_invert(&tf_inv, &tf)) {
2330 assert(false);
2331 continue;
2332 }
2333 }
2334
2335 // We'd better have a sane, sRGB-ish TF by now.
2336 // Other non-Bad TFs would be fine, but we know we've only ever tried to fit sRGBish;
2337 // anything else is just some accident of math and the way we pun tf.g as a type flag.
2338 // fit_nonlinear() should guarantee this, but the special cases may fail this test.
2339 if (skcms_TFType_sRGBish != classify(tf)) {
2340 continue;
2341 }
2342
2343 // We find our error by roundtripping the table through tf_inv.
2344 //
2345 // (The most likely use case for this approximation is to be inverted and
2346 // used as the transfer function for a destination color space.)
2347 //
2348 // We've kept tf and tf_inv in sync above, but we can't guarantee that tf is
2349 // invertible, so re-verify that here (and use the new inverse for testing).
2350 // fit_nonlinear() should guarantee this, but the special cases that don't use
2351 // it may fail this test.
2352 if (!skcms_TransferFunction_invert(&tf, &tf_inv)) {
2353 continue;
2354 }
2355
2356 float err = skcms_MaxRoundtripError(curve, &tf_inv);
2357 if (*max_error > err) {
2358 *max_error = err;
2359 *approx = tf;
2360 }
2361 }
2362 return isfinitef_(*max_error);
2363 }
2364
2365 enum class CpuType { Baseline, HSW, SKX };
2366
cpu_type()2367 static CpuType cpu_type() {
2368 #if defined(SKCMS_PORTABLE) || !defined(__x86_64__) || defined(SKCMS_FORCE_BASELINE)
2369 return CpuType::Baseline;
2370 #elif defined(SKCMS_FORCE_HSW)
2371 return CpuType::HSW;
2372 #elif defined(SKCMS_FORCE_SKX)
2373 return CpuType::SKX;
2374 #else
2375 static const CpuType type = []{
2376 if (!sAllowRuntimeCPUDetection) {
2377 return CpuType::Baseline;
2378 }
2379 // See http://www.sandpile.org/x86/cpuid.htm
2380
2381 // First, a basic cpuid(1) lets us check prerequisites for HSW, SKX.
2382 uint32_t eax, ebx, ecx, edx;
2383 __asm__ __volatile__("cpuid" : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
2384 : "0"(1), "2"(0));
2385 if ((edx & (1u<<25)) && // SSE
2386 (edx & (1u<<26)) && // SSE2
2387 (ecx & (1u<< 0)) && // SSE3
2388 (ecx & (1u<< 9)) && // SSSE3
2389 (ecx & (1u<<12)) && // FMA (N.B. not used, avoided even)
2390 (ecx & (1u<<19)) && // SSE4.1
2391 (ecx & (1u<<20)) && // SSE4.2
2392 (ecx & (1u<<26)) && // XSAVE
2393 (ecx & (1u<<27)) && // OSXSAVE
2394 (ecx & (1u<<28)) && // AVX
2395 (ecx & (1u<<29))) { // F16C
2396
2397 // Call cpuid(7) to check for AVX2 and AVX-512 bits.
2398 __asm__ __volatile__("cpuid" : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
2399 : "0"(7), "2"(0));
2400 // eax from xgetbv(0) will tell us whether XMM, YMM, and ZMM state is saved.
2401 uint32_t xcr0, dont_need_edx;
2402 __asm__ __volatile__("xgetbv" : "=a"(xcr0), "=d"(dont_need_edx) : "c"(0));
2403
2404 if ((xcr0 & (1u<<1)) && // XMM register state saved?
2405 (xcr0 & (1u<<2)) && // YMM register state saved?
2406 (ebx & (1u<<5))) { // AVX2
2407 // At this point we're at least HSW. Continue checking for SKX.
2408 if ((xcr0 & (1u<< 5)) && // Opmasks state saved?
2409 (xcr0 & (1u<< 6)) && // First 16 ZMM registers saved?
2410 (xcr0 & (1u<< 7)) && // High 16 ZMM registers saved?
2411 (ebx & (1u<<16)) && // AVX512F
2412 (ebx & (1u<<17)) && // AVX512DQ
2413 (ebx & (1u<<28)) && // AVX512CD
2414 (ebx & (1u<<30)) && // AVX512BW
2415 (ebx & (1u<<31))) { // AVX512VL
2416 return CpuType::SKX;
2417 }
2418 return CpuType::HSW;
2419 }
2420 }
2421 return CpuType::Baseline;
2422 }();
2423 return type;
2424 #endif
2425 }
2426
tf_is_gamma(const skcms_TransferFunction & tf)2427 static bool tf_is_gamma(const skcms_TransferFunction& tf) {
2428 return tf.g > 0 && tf.a == 1 &&
2429 tf.b == 0 && tf.c == 0 && tf.d == 0 && tf.e == 0 && tf.f == 0;
2430 }
2431
2432 struct OpAndArg {
2433 Op op;
2434 const void* arg;
2435 };
2436
select_curve_op(const skcms_Curve * curve,int channel)2437 static OpAndArg select_curve_op(const skcms_Curve* curve, int channel) {
2438 struct OpType {
2439 Op sGamma, sRGBish, PQish, HLGish, HLGinvish, table;
2440 };
2441 static constexpr OpType kOps[] = {
2442 { Op::gamma_r, Op::tf_r, Op::pq_r, Op::hlg_r, Op::hlginv_r, Op::table_r },
2443 { Op::gamma_g, Op::tf_g, Op::pq_g, Op::hlg_g, Op::hlginv_g, Op::table_g },
2444 { Op::gamma_b, Op::tf_b, Op::pq_b, Op::hlg_b, Op::hlginv_b, Op::table_b },
2445 { Op::gamma_a, Op::tf_a, Op::pq_a, Op::hlg_a, Op::hlginv_a, Op::table_a },
2446 };
2447 const auto& op = kOps[channel];
2448
2449 if (curve->table_entries == 0) {
2450 const OpAndArg noop = { Op::load_a8/*doesn't matter*/, nullptr };
2451
2452 const skcms_TransferFunction& tf = curve->parametric;
2453
2454 if (tf_is_gamma(tf)) {
2455 return tf.g != 1 ? OpAndArg{op.sGamma, &tf}
2456 : noop;
2457 }
2458
2459 switch (classify(tf)) {
2460 case skcms_TFType_Invalid: return noop;
2461 case skcms_TFType_sRGBish: return OpAndArg{op.sRGBish, &tf};
2462 case skcms_TFType_PQish: return OpAndArg{op.PQish, &tf};
2463 case skcms_TFType_HLGish: return OpAndArg{op.HLGish, &tf};
2464 case skcms_TFType_HLGinvish: return OpAndArg{op.HLGinvish, &tf};
2465 }
2466 }
2467 return OpAndArg{op.table, curve};
2468 }
2469
select_curve_ops(const skcms_Curve * curves,int numChannels,OpAndArg * ops)2470 static int select_curve_ops(const skcms_Curve* curves, int numChannels, OpAndArg* ops) {
2471 // We process the channels in reverse order, yielding ops in ABGR order.
2472 // (Working backwards allows us to fuse trailing B+G+R ops into a single RGB op.)
2473 int cursor = 0;
2474 for (int index = numChannels; index-- > 0; ) {
2475 ops[cursor] = select_curve_op(&curves[index], index);
2476 if (ops[cursor].arg) {
2477 ++cursor;
2478 }
2479 }
2480
2481 // Identify separate B+G+R ops and fuse them into a single RGB op.
2482 if (cursor >= 3) {
2483 struct FusableOps {
2484 Op r, g, b, rgb;
2485 };
2486 static constexpr FusableOps kFusableOps[] = {
2487 {Op::gamma_r, Op::gamma_g, Op::gamma_b, Op::gamma_rgb},
2488 {Op::tf_r, Op::tf_g, Op::tf_b, Op::tf_rgb},
2489 {Op::pq_r, Op::pq_g, Op::pq_b, Op::pq_rgb},
2490 {Op::hlg_r, Op::hlg_g, Op::hlg_b, Op::hlg_rgb},
2491 {Op::hlginv_r, Op::hlginv_g, Op::hlginv_b, Op::hlginv_rgb},
2492 };
2493
2494 int posR = cursor - 1;
2495 int posG = cursor - 2;
2496 int posB = cursor - 3;
2497 for (const FusableOps& fusableOp : kFusableOps) {
2498 if (ops[posR].op == fusableOp.r &&
2499 ops[posG].op == fusableOp.g &&
2500 ops[posB].op == fusableOp.b &&
2501 (0 == memcmp(ops[posR].arg, ops[posG].arg, sizeof(skcms_TransferFunction))) &&
2502 (0 == memcmp(ops[posR].arg, ops[posB].arg, sizeof(skcms_TransferFunction)))) {
2503 // Fuse the three matching ops into one.
2504 ops[posB].op = fusableOp.rgb;
2505 cursor -= 2;
2506 break;
2507 }
2508 }
2509 }
2510
2511 return cursor;
2512 }
2513
bytes_per_pixel(skcms_PixelFormat fmt)2514 static size_t bytes_per_pixel(skcms_PixelFormat fmt) {
2515 switch (fmt >> 1) { // ignore rgb/bgr
2516 case skcms_PixelFormat_A_8 >> 1: return 1;
2517 case skcms_PixelFormat_G_8 >> 1: return 1;
2518 case skcms_PixelFormat_GA_88 >> 1: return 2;
2519 case skcms_PixelFormat_ABGR_4444 >> 1: return 2;
2520 case skcms_PixelFormat_RGB_565 >> 1: return 2;
2521 case skcms_PixelFormat_RGB_888 >> 1: return 3;
2522 case skcms_PixelFormat_RGBA_8888 >> 1: return 4;
2523 case skcms_PixelFormat_RGBA_8888_sRGB >> 1: return 4;
2524 case skcms_PixelFormat_RGBA_1010102 >> 1: return 4;
2525 case skcms_PixelFormat_RGB_101010x_XR >> 1: return 4;
2526 case skcms_PixelFormat_RGB_161616LE >> 1: return 6;
2527 case skcms_PixelFormat_RGBA_10101010_XR >> 1: return 8;
2528 case skcms_PixelFormat_RGBA_16161616LE >> 1: return 8;
2529 case skcms_PixelFormat_RGB_161616BE >> 1: return 6;
2530 case skcms_PixelFormat_RGBA_16161616BE >> 1: return 8;
2531 case skcms_PixelFormat_RGB_hhh_Norm >> 1: return 6;
2532 case skcms_PixelFormat_RGBA_hhhh_Norm >> 1: return 8;
2533 case skcms_PixelFormat_RGB_hhh >> 1: return 6;
2534 case skcms_PixelFormat_RGBA_hhhh >> 1: return 8;
2535 case skcms_PixelFormat_RGB_fff >> 1: return 12;
2536 case skcms_PixelFormat_RGBA_ffff >> 1: return 16;
2537 }
2538 assert(false);
2539 return 0;
2540 }
2541
prep_for_destination(const skcms_ICCProfile * profile,skcms_Matrix3x3 * fromXYZD50,skcms_TransferFunction * invR,skcms_TransferFunction * invG,skcms_TransferFunction * invB)2542 static bool prep_for_destination(const skcms_ICCProfile* profile,
2543 skcms_Matrix3x3* fromXYZD50,
2544 skcms_TransferFunction* invR,
2545 skcms_TransferFunction* invG,
2546 skcms_TransferFunction* invB) {
2547 // skcms_Transform() supports B2A destinations...
2548 if (profile->has_B2A) { return true; }
2549 // ...and destinations with parametric transfer functions and an XYZD50 gamut matrix.
2550 return profile->has_trc
2551 && profile->has_toXYZD50
2552 && profile->trc[0].table_entries == 0
2553 && profile->trc[1].table_entries == 0
2554 && profile->trc[2].table_entries == 0
2555 && skcms_TransferFunction_invert(&profile->trc[0].parametric, invR)
2556 && skcms_TransferFunction_invert(&profile->trc[1].parametric, invG)
2557 && skcms_TransferFunction_invert(&profile->trc[2].parametric, invB)
2558 && skcms_Matrix3x3_invert(&profile->toXYZD50, fromXYZD50);
2559 }
2560
skcms_Transform(const void * src,skcms_PixelFormat srcFmt,skcms_AlphaFormat srcAlpha,const skcms_ICCProfile * srcProfile,void * dst,skcms_PixelFormat dstFmt,skcms_AlphaFormat dstAlpha,const skcms_ICCProfile * dstProfile,size_t nz)2561 bool skcms_Transform(const void* src,
2562 skcms_PixelFormat srcFmt,
2563 skcms_AlphaFormat srcAlpha,
2564 const skcms_ICCProfile* srcProfile,
2565 void* dst,
2566 skcms_PixelFormat dstFmt,
2567 skcms_AlphaFormat dstAlpha,
2568 const skcms_ICCProfile* dstProfile,
2569 size_t nz) {
2570 const size_t dst_bpp = bytes_per_pixel(dstFmt),
2571 src_bpp = bytes_per_pixel(srcFmt);
2572 // Let's just refuse if the request is absurdly big.
2573 if (nz * dst_bpp > INT_MAX || nz * src_bpp > INT_MAX) {
2574 return false;
2575 }
2576 int n = (int)nz;
2577
2578 // Null profiles default to sRGB. Passing null for both is handy when doing format conversion.
2579 if (!srcProfile) {
2580 srcProfile = skcms_sRGB_profile();
2581 }
2582 if (!dstProfile) {
2583 dstProfile = skcms_sRGB_profile();
2584 }
2585
2586 // We can't transform in place unless the PixelFormats are the same size.
2587 if (dst == src && dst_bpp != src_bpp) {
2588 return false;
2589 }
2590 // TODO: more careful alias rejection (like, dst == src + 1)?
2591
2592 Op program[32];
2593 const void* context[32];
2594
2595 Op* ops = program;
2596 const void** contexts = context;
2597
2598 auto add_op = [&](Op o) {
2599 *ops++ = o;
2600 *contexts++ = nullptr;
2601 };
2602
2603 auto add_op_ctx = [&](Op o, const void* c) {
2604 *ops++ = o;
2605 *contexts++ = c;
2606 };
2607
2608 auto add_curve_ops = [&](const skcms_Curve* curves, int numChannels) {
2609 OpAndArg oa[4];
2610 assert(numChannels <= ARRAY_COUNT(oa));
2611
2612 int numOps = select_curve_ops(curves, numChannels, oa);
2613
2614 for (int i = 0; i < numOps; ++i) {
2615 add_op_ctx(oa[i].op, oa[i].arg);
2616 }
2617 };
2618
2619 // These are always parametric curves of some sort.
2620 skcms_Curve dst_curves[3];
2621 dst_curves[0].table_entries =
2622 dst_curves[1].table_entries =
2623 dst_curves[2].table_entries = 0;
2624
2625 skcms_Matrix3x3 from_xyz;
2626
2627 switch (srcFmt >> 1) {
2628 default: return false;
2629 case skcms_PixelFormat_A_8 >> 1: add_op(Op::load_a8); break;
2630 case skcms_PixelFormat_G_8 >> 1: add_op(Op::load_g8); break;
2631 case skcms_PixelFormat_GA_88 >> 1: add_op(Op::load_ga88); break;
2632 case skcms_PixelFormat_ABGR_4444 >> 1: add_op(Op::load_4444); break;
2633 case skcms_PixelFormat_RGB_565 >> 1: add_op(Op::load_565); break;
2634 case skcms_PixelFormat_RGB_888 >> 1: add_op(Op::load_888); break;
2635 case skcms_PixelFormat_RGBA_8888 >> 1: add_op(Op::load_8888); break;
2636 case skcms_PixelFormat_RGBA_1010102 >> 1: add_op(Op::load_1010102); break;
2637 case skcms_PixelFormat_RGB_101010x_XR >> 1: add_op(Op::load_101010x_XR); break;
2638 case skcms_PixelFormat_RGBA_10101010_XR >> 1: add_op(Op::load_10101010_XR); break;
2639 case skcms_PixelFormat_RGB_161616LE >> 1: add_op(Op::load_161616LE); break;
2640 case skcms_PixelFormat_RGBA_16161616LE >> 1: add_op(Op::load_16161616LE); break;
2641 case skcms_PixelFormat_RGB_161616BE >> 1: add_op(Op::load_161616BE); break;
2642 case skcms_PixelFormat_RGBA_16161616BE >> 1: add_op(Op::load_16161616BE); break;
2643 case skcms_PixelFormat_RGB_hhh_Norm >> 1: add_op(Op::load_hhh); break;
2644 case skcms_PixelFormat_RGBA_hhhh_Norm >> 1: add_op(Op::load_hhhh); break;
2645 case skcms_PixelFormat_RGB_hhh >> 1: add_op(Op::load_hhh); break;
2646 case skcms_PixelFormat_RGBA_hhhh >> 1: add_op(Op::load_hhhh); break;
2647 case skcms_PixelFormat_RGB_fff >> 1: add_op(Op::load_fff); break;
2648 case skcms_PixelFormat_RGBA_ffff >> 1: add_op(Op::load_ffff); break;
2649
2650 case skcms_PixelFormat_RGBA_8888_sRGB >> 1:
2651 add_op(Op::load_8888);
2652 add_op_ctx(Op::tf_rgb, skcms_sRGB_TransferFunction());
2653 break;
2654 }
2655 if (srcFmt == skcms_PixelFormat_RGB_hhh_Norm ||
2656 srcFmt == skcms_PixelFormat_RGBA_hhhh_Norm) {
2657 add_op(Op::clamp);
2658 }
2659 if (srcFmt & 1) {
2660 add_op(Op::swap_rb);
2661 }
2662 skcms_ICCProfile gray_dst_profile;
2663 switch (dstFmt >> 1) {
2664 case skcms_PixelFormat_G_8:
2665 case skcms_PixelFormat_GA_88:
2666 // When transforming to gray, stop at XYZ (by setting toXYZ to identity), then transform
2667 // luminance (Y) by the destination transfer function.
2668 gray_dst_profile = *dstProfile;
2669 skcms_SetXYZD50(&gray_dst_profile, &skcms_XYZD50_profile()->toXYZD50);
2670 dstProfile = &gray_dst_profile;
2671 break;
2672 default:
2673 break;
2674 }
2675
2676 if (srcProfile->data_color_space == skcms_Signature_CMYK) {
2677 // Photoshop creates CMYK images as inverse CMYK.
2678 // These happen to be the only ones we've _ever_ seen.
2679 add_op(Op::invert);
2680 // With CMYK, ignore the alpha type, to avoid changing K or conflating CMY with K.
2681 srcAlpha = skcms_AlphaFormat_Unpremul;
2682 }
2683
2684 if (srcAlpha == skcms_AlphaFormat_Opaque) {
2685 add_op(Op::force_opaque);
2686 } else if (srcAlpha == skcms_AlphaFormat_PremulAsEncoded) {
2687 add_op(Op::unpremul);
2688 }
2689
2690 if (dstProfile != srcProfile) {
2691
2692 if (!prep_for_destination(dstProfile,
2693 &from_xyz,
2694 &dst_curves[0].parametric,
2695 &dst_curves[1].parametric,
2696 &dst_curves[2].parametric)) {
2697 return false;
2698 }
2699
2700 if (srcProfile->has_A2B) {
2701 if (srcProfile->A2B.input_channels) {
2702 add_curve_ops(srcProfile->A2B.input_curves,
2703 (int)srcProfile->A2B.input_channels);
2704 add_op(Op::clamp);
2705 add_op_ctx(Op::clut_A2B, &srcProfile->A2B);
2706 }
2707
2708 if (srcProfile->A2B.matrix_channels == 3) {
2709 add_curve_ops(srcProfile->A2B.matrix_curves, /*numChannels=*/3);
2710
2711 static const skcms_Matrix3x4 I = {{
2712 {1,0,0,0},
2713 {0,1,0,0},
2714 {0,0,1,0},
2715 }};
2716 if (0 != memcmp(&I, &srcProfile->A2B.matrix, sizeof(I))) {
2717 add_op_ctx(Op::matrix_3x4, &srcProfile->A2B.matrix);
2718 }
2719 }
2720
2721 if (srcProfile->A2B.output_channels == 3) {
2722 add_curve_ops(srcProfile->A2B.output_curves, /*numChannels=*/3);
2723 }
2724
2725 if (srcProfile->pcs == skcms_Signature_Lab) {
2726 add_op(Op::lab_to_xyz);
2727 }
2728
2729 } else if (srcProfile->has_trc && srcProfile->has_toXYZD50) {
2730 add_curve_ops(srcProfile->trc, /*numChannels=*/3);
2731 } else {
2732 return false;
2733 }
2734
2735 // A2B sources are in XYZD50 by now, but TRC sources are still in their original gamut.
2736 assert (srcProfile->has_A2B || srcProfile->has_toXYZD50);
2737
2738 if (dstProfile->has_B2A) {
2739 // B2A needs its input in XYZD50, so transform TRC sources now.
2740 if (!srcProfile->has_A2B) {
2741 add_op_ctx(Op::matrix_3x3, &srcProfile->toXYZD50);
2742 }
2743
2744 if (dstProfile->pcs == skcms_Signature_Lab) {
2745 add_op(Op::xyz_to_lab);
2746 }
2747
2748 if (dstProfile->B2A.input_channels == 3) {
2749 add_curve_ops(dstProfile->B2A.input_curves, /*numChannels=*/3);
2750 }
2751
2752 if (dstProfile->B2A.matrix_channels == 3) {
2753 static const skcms_Matrix3x4 I = {{
2754 {1,0,0,0},
2755 {0,1,0,0},
2756 {0,0,1,0},
2757 }};
2758 if (0 != memcmp(&I, &dstProfile->B2A.matrix, sizeof(I))) {
2759 add_op_ctx(Op::matrix_3x4, &dstProfile->B2A.matrix);
2760 }
2761
2762 add_curve_ops(dstProfile->B2A.matrix_curves, /*numChannels=*/3);
2763 }
2764
2765 if (dstProfile->B2A.output_channels) {
2766 add_op(Op::clamp);
2767 add_op_ctx(Op::clut_B2A, &dstProfile->B2A);
2768
2769 add_curve_ops(dstProfile->B2A.output_curves,
2770 (int)dstProfile->B2A.output_channels);
2771 }
2772 } else {
2773 // This is a TRC destination.
2774 // We'll concat any src->xyz matrix with our xyz->dst matrix into one src->dst matrix.
2775 // (A2B sources are already in XYZD50, making that src->xyz matrix I.)
2776 static const skcms_Matrix3x3 I = {{
2777 { 1.0f, 0.0f, 0.0f },
2778 { 0.0f, 1.0f, 0.0f },
2779 { 0.0f, 0.0f, 1.0f },
2780 }};
2781 const skcms_Matrix3x3* to_xyz = srcProfile->has_A2B ? &I : &srcProfile->toXYZD50;
2782
2783 // There's a chance the source and destination gamuts are identical,
2784 // in which case we can skip the gamut transform.
2785 if (0 != memcmp(&dstProfile->toXYZD50, to_xyz, sizeof(skcms_Matrix3x3))) {
2786 // Concat the entire gamut transform into from_xyz,
2787 // now slightly misnamed but it's a handy spot to stash the result.
2788 from_xyz = skcms_Matrix3x3_concat(&from_xyz, to_xyz);
2789 add_op_ctx(Op::matrix_3x3, &from_xyz);
2790 }
2791
2792 // Encode back to dst RGB using its parametric transfer functions.
2793 OpAndArg oa[3];
2794 int numOps = select_curve_ops(dst_curves, /*numChannels=*/3, oa);
2795 for (int index = 0; index < numOps; ++index) {
2796 assert(oa[index].op != Op::table_r &&
2797 oa[index].op != Op::table_g &&
2798 oa[index].op != Op::table_b &&
2799 oa[index].op != Op::table_a);
2800 add_op_ctx(oa[index].op, oa[index].arg);
2801 }
2802 }
2803 }
2804
2805 // Clamp here before premul to make sure we're clamping to normalized values _and_ gamut,
2806 // not just to values that fit in [0,1].
2807 //
2808 // E.g. r = 1.1, a = 0.5 would fit fine in fixed point after premul (ra=0.55,a=0.5),
2809 // but would be carrying r > 1, which is really unexpected for downstream consumers.
2810 if (dstFmt < skcms_PixelFormat_RGB_hhh) {
2811 add_op(Op::clamp);
2812 }
2813
2814 if (dstProfile->data_color_space == skcms_Signature_CMYK) {
2815 // Photoshop creates CMYK images as inverse CMYK.
2816 // These happen to be the only ones we've _ever_ seen.
2817 add_op(Op::invert);
2818
2819 // CMYK has no alpha channel, so make sure dstAlpha is a no-op.
2820 dstAlpha = skcms_AlphaFormat_Unpremul;
2821 }
2822
2823 if (dstAlpha == skcms_AlphaFormat_Opaque) {
2824 add_op(Op::force_opaque);
2825 } else if (dstAlpha == skcms_AlphaFormat_PremulAsEncoded) {
2826 add_op(Op::premul);
2827 }
2828 if (dstFmt & 1) {
2829 add_op(Op::swap_rb);
2830 }
2831 switch (dstFmt >> 1) {
2832 default: return false;
2833 case skcms_PixelFormat_A_8 >> 1: add_op(Op::store_a8); break;
2834 case skcms_PixelFormat_G_8 >> 1: add_op(Op::store_g8); break;
2835 case skcms_PixelFormat_GA_88 >> 1: add_op(Op::store_ga88); break;
2836 case skcms_PixelFormat_ABGR_4444 >> 1: add_op(Op::store_4444); break;
2837 case skcms_PixelFormat_RGB_565 >> 1: add_op(Op::store_565); break;
2838 case skcms_PixelFormat_RGB_888 >> 1: add_op(Op::store_888); break;
2839 case skcms_PixelFormat_RGBA_8888 >> 1: add_op(Op::store_8888); break;
2840 case skcms_PixelFormat_RGBA_1010102 >> 1: add_op(Op::store_1010102); break;
2841 case skcms_PixelFormat_RGB_161616LE >> 1: add_op(Op::store_161616LE); break;
2842 case skcms_PixelFormat_RGBA_16161616LE >> 1: add_op(Op::store_16161616LE); break;
2843 case skcms_PixelFormat_RGB_161616BE >> 1: add_op(Op::store_161616BE); break;
2844 case skcms_PixelFormat_RGBA_16161616BE >> 1: add_op(Op::store_16161616BE); break;
2845 case skcms_PixelFormat_RGB_hhh_Norm >> 1: add_op(Op::store_hhh); break;
2846 case skcms_PixelFormat_RGBA_hhhh_Norm >> 1: add_op(Op::store_hhhh); break;
2847 case skcms_PixelFormat_RGB_101010x_XR >> 1: add_op(Op::store_101010x_XR); break;
2848 case skcms_PixelFormat_RGB_hhh >> 1: add_op(Op::store_hhh); break;
2849 case skcms_PixelFormat_RGBA_hhhh >> 1: add_op(Op::store_hhhh); break;
2850 case skcms_PixelFormat_RGB_fff >> 1: add_op(Op::store_fff); break;
2851 case skcms_PixelFormat_RGBA_ffff >> 1: add_op(Op::store_ffff); break;
2852
2853 case skcms_PixelFormat_RGBA_8888_sRGB >> 1:
2854 add_op_ctx(Op::tf_rgb, skcms_sRGB_Inverse_TransferFunction());
2855 add_op(Op::store_8888);
2856 break;
2857 }
2858
2859 assert(ops <= program + ARRAY_COUNT(program));
2860 assert(contexts <= context + ARRAY_COUNT(context));
2861
2862 auto run = baseline::run_program;
2863 switch (cpu_type()) {
2864 case CpuType::SKX:
2865 #if !defined(SKCMS_DISABLE_SKX)
2866 run = skx::run_program;
2867 break;
2868 #endif
2869
2870 case CpuType::HSW:
2871 #if !defined(SKCMS_DISABLE_HSW)
2872 run = hsw::run_program;
2873 break;
2874 #endif
2875
2876 case CpuType::Baseline:
2877 break;
2878 }
2879
2880 run(program, context, ops - program, (const char*)src, (char*)dst, n, src_bpp,dst_bpp);
2881 return true;
2882 }
2883
assert_usable_as_destination(const skcms_ICCProfile * profile)2884 static void assert_usable_as_destination(const skcms_ICCProfile* profile) {
2885 #if defined(NDEBUG)
2886 (void)profile;
2887 #else
2888 skcms_Matrix3x3 fromXYZD50;
2889 skcms_TransferFunction invR, invG, invB;
2890 assert(prep_for_destination(profile, &fromXYZD50, &invR, &invG, &invB));
2891 #endif
2892 }
2893
skcms_MakeUsableAsDestination(skcms_ICCProfile * profile)2894 bool skcms_MakeUsableAsDestination(skcms_ICCProfile* profile) {
2895 if (!profile->has_B2A) {
2896 skcms_Matrix3x3 fromXYZD50;
2897 if (!profile->has_trc || !profile->has_toXYZD50
2898 || !skcms_Matrix3x3_invert(&profile->toXYZD50, &fromXYZD50)) {
2899 return false;
2900 }
2901
2902 skcms_TransferFunction tf[3];
2903 for (int i = 0; i < 3; i++) {
2904 skcms_TransferFunction inv;
2905 if (profile->trc[i].table_entries == 0
2906 && skcms_TransferFunction_invert(&profile->trc[i].parametric, &inv)) {
2907 tf[i] = profile->trc[i].parametric;
2908 continue;
2909 }
2910
2911 float max_error;
2912 // Parametric curves from skcms_ApproximateCurve() are guaranteed to be invertible.
2913 if (!skcms_ApproximateCurve(&profile->trc[i], &tf[i], &max_error)) {
2914 return false;
2915 }
2916 }
2917
2918 for (int i = 0; i < 3; ++i) {
2919 profile->trc[i].table_entries = 0;
2920 profile->trc[i].parametric = tf[i];
2921 }
2922 }
2923 assert_usable_as_destination(profile);
2924 return true;
2925 }
2926
skcms_MakeUsableAsDestinationWithSingleCurve(skcms_ICCProfile * profile)2927 bool skcms_MakeUsableAsDestinationWithSingleCurve(skcms_ICCProfile* profile) {
2928 // Call skcms_MakeUsableAsDestination() with B2A disabled;
2929 // on success that'll return a TRC/XYZ profile with three skcms_TransferFunctions.
2930 skcms_ICCProfile result = *profile;
2931 result.has_B2A = false;
2932 if (!skcms_MakeUsableAsDestination(&result)) {
2933 return false;
2934 }
2935
2936 // Of the three, pick the transfer function that best fits the other two.
2937 int best_tf = 0;
2938 float min_max_error = INFINITY_;
2939 for (int i = 0; i < 3; i++) {
2940 skcms_TransferFunction inv;
2941 if (!skcms_TransferFunction_invert(&result.trc[i].parametric, &inv)) {
2942 return false;
2943 }
2944
2945 float err = 0;
2946 for (int j = 0; j < 3; ++j) {
2947 err = fmaxf_(err, skcms_MaxRoundtripError(&profile->trc[j], &inv));
2948 }
2949 if (min_max_error > err) {
2950 min_max_error = err;
2951 best_tf = i;
2952 }
2953 }
2954
2955 for (int i = 0; i < 3; i++) {
2956 result.trc[i].parametric = result.trc[best_tf].parametric;
2957 }
2958
2959 *profile = result;
2960 assert_usable_as_destination(profile);
2961 return true;
2962 }
2963