xref: /aosp_15_r20/external/skia/modules/skcms/skcms.cc (revision c8dee2aa9b3f27cf6c858bd81872bdeb2c07ed17)
1 /*
2  * Copyright 2018 Google Inc.
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 
8 #include "src/skcms_public.h"  // NO_G3_REWRITE
9 #include "src/skcms_internals.h"  // NO_G3_REWRITE
10 #include "src/skcms_Transform.h"  // NO_G3_REWRITE
11 #include <assert.h>
12 #include <float.h>
13 #include <limits.h>
14 #include <stdlib.h>
15 #include <string.h>
16 
17 #if defined(__ARM_NEON)
18     #include <arm_neon.h>
19 #elif defined(__SSE__)
20     #include <immintrin.h>
21 
22     #if defined(__clang__)
23         // That #include <immintrin.h> is usually enough, but Clang's headers
24         // "helpfully" skip including the whole kitchen sink when _MSC_VER is
25         // defined, because lots of programs on Windows would include that and
26         // it'd be a lot slower.  But we want all those headers included so we
27         // can use their features after runtime checks later.
28         #include <smmintrin.h>
29         #include <avxintrin.h>
30         #include <avx2intrin.h>
31         #include <avx512fintrin.h>
32         #include <avx512dqintrin.h>
33     #endif
34 #endif
35 
36 using namespace skcms_private;
37 
38 static bool sAllowRuntimeCPUDetection = true;
39 
skcms_DisableRuntimeCPUDetection()40 void skcms_DisableRuntimeCPUDetection() {
41     sAllowRuntimeCPUDetection = false;
42 }
43 
log2f_(float x)44 static float log2f_(float x) {
45     // The first approximation of log2(x) is its exponent 'e', minus 127.
46     int32_t bits;
47     memcpy(&bits, &x, sizeof(bits));
48 
49     float e = (float)bits * (1.0f / (1<<23));
50 
51     // If we use the mantissa too we can refine the error signficantly.
52     int32_t m_bits = (bits & 0x007fffff) | 0x3f000000;
53     float m;
54     memcpy(&m, &m_bits, sizeof(m));
55 
56     return (e - 124.225514990f
57               -   1.498030302f*m
58               -   1.725879990f/(0.3520887068f + m));
59 }
logf_(float x)60 static float logf_(float x) {
61     const float ln2 = 0.69314718f;
62     return ln2*log2f_(x);
63 }
64 
exp2f_(float x)65 static float exp2f_(float x) {
66     if (x > 128.0f) {
67         return INFINITY_;
68     } else if (x < -127.0f) {
69         return 0.0f;
70     }
71     float fract = x - floorf_(x);
72 
73     float fbits = (1.0f * (1<<23)) * (x + 121.274057500f
74                                         -   1.490129070f*fract
75                                         +  27.728023300f/(4.84252568f - fract));
76 
77     // Before we cast fbits to int32_t, check for out of range values to pacify UBSAN.
78     // INT_MAX is not exactly representable as a float, so exclude it as effectively infinite.
79     // Negative values are effectively underflow - we'll end up returning a (different) negative
80     // value, which makes no sense. So clamp to zero.
81     if (fbits >= (float)INT_MAX) {
82         return INFINITY_;
83     } else if (fbits < 0) {
84         return 0;
85     }
86 
87     int32_t bits = (int32_t)fbits;
88     memcpy(&x, &bits, sizeof(x));
89     return x;
90 }
91 
92 // Not static, as it's used by some test tools.
powf_(float x,float y)93 float powf_(float x, float y) {
94     if (x <= 0.f) {
95         return 0.f;
96     }
97     if (x == 1.f) {
98         return 1.f;
99     }
100     return exp2f_(log2f_(x) * y);
101 }
102 
expf_(float x)103 static float expf_(float x) {
104     const float log2_e = 1.4426950408889634074f;
105     return exp2f_(log2_e * x);
106 }
107 
fmaxf_(float x,float y)108 static float fmaxf_(float x, float y) { return x > y ? x : y; }
fminf_(float x,float y)109 static float fminf_(float x, float y) { return x < y ? x : y; }
110 
isfinitef_(float x)111 static bool isfinitef_(float x) { return 0 == x*0; }
112 
minus_1_ulp(float x)113 static float minus_1_ulp(float x) {
114     int32_t bits;
115     memcpy(&bits, &x, sizeof(bits));
116     bits = bits - 1;
117     memcpy(&x, &bits, sizeof(bits));
118     return x;
119 }
120 
121 // Most transfer functions we work with are sRGBish.
122 // For exotic HDR transfer functions, we encode them using a tf.g that makes no sense,
123 // and repurpose the other fields to hold the parameters of the HDR functions.
124 struct TF_PQish  { float A,B,C,D,E,F; };
125 struct TF_HLGish { float R,G,a,b,c,K_minus_1; };
126 // We didn't originally support a scale factor K for HLG, and instead just stored 0 in
127 // the unused `f` field of skcms_TransferFunction for HLGish and HLGInvish transfer functions.
128 // By storing f=K-1, those old unusued f=0 values now mean K=1, a noop scale factor.
129 
TFKind_marker(skcms_TFType kind)130 static float TFKind_marker(skcms_TFType kind) {
131     // We'd use different NaNs, but those aren't guaranteed to be preserved by WASM.
132     return -(float)kind;
133 }
134 
classify(const skcms_TransferFunction & tf,TF_PQish * pq=nullptr,TF_HLGish * hlg=nullptr)135 static skcms_TFType classify(const skcms_TransferFunction& tf, TF_PQish*   pq = nullptr
136                                                              , TF_HLGish* hlg = nullptr) {
137     if (tf.g < 0) {
138         // Negative "g" is mapped to enum values; large negative are for sure invalid.
139         if (tf.g < -128) {
140             return skcms_TFType_Invalid;
141         }
142         int enum_g = -static_cast<int>(tf.g);
143         // Non-whole "g" values are invalid as well.
144         if (static_cast<float>(-enum_g) != tf.g) {
145             return skcms_TFType_Invalid;
146         }
147         // TODO: soundness checks for PQ/HLG like we do for sRGBish?
148         switch (enum_g) {
149             case skcms_TFType_PQish:
150                 if (pq) {
151                     memcpy(pq , &tf.a, sizeof(*pq ));
152                 }
153                 return skcms_TFType_PQish;
154             case skcms_TFType_HLGish:
155                 if (hlg) {
156                     memcpy(hlg, &tf.a, sizeof(*hlg));
157                 }
158                 return skcms_TFType_HLGish;
159             case skcms_TFType_HLGinvish:
160                 if (hlg) {
161                     memcpy(hlg, &tf.a, sizeof(*hlg));
162                 }
163                 return skcms_TFType_HLGinvish;
164         }
165         return skcms_TFType_Invalid;
166     }
167 
168     // Basic soundness checks for sRGBish transfer functions.
169     if (isfinitef_(tf.a + tf.b + tf.c + tf.d + tf.e + tf.f + tf.g)
170             // a,c,d,g should be non-negative to make any sense.
171             && tf.a >= 0
172             && tf.c >= 0
173             && tf.d >= 0
174             && tf.g >= 0
175             // Raising a negative value to a fractional tf->g produces complex numbers.
176             && tf.a * tf.d + tf.b >= 0) {
177         return skcms_TFType_sRGBish;
178     }
179 
180     return skcms_TFType_Invalid;
181 }
182 
skcms_TransferFunction_getType(const skcms_TransferFunction * tf)183 skcms_TFType skcms_TransferFunction_getType(const skcms_TransferFunction* tf) {
184     return classify(*tf);
185 }
skcms_TransferFunction_isSRGBish(const skcms_TransferFunction * tf)186 bool skcms_TransferFunction_isSRGBish(const skcms_TransferFunction* tf) {
187     return classify(*tf) == skcms_TFType_sRGBish;
188 }
skcms_TransferFunction_isPQish(const skcms_TransferFunction * tf)189 bool skcms_TransferFunction_isPQish(const skcms_TransferFunction* tf) {
190     return classify(*tf) == skcms_TFType_PQish;
191 }
skcms_TransferFunction_isHLGish(const skcms_TransferFunction * tf)192 bool skcms_TransferFunction_isHLGish(const skcms_TransferFunction* tf) {
193     return classify(*tf) == skcms_TFType_HLGish;
194 }
195 
skcms_TransferFunction_makePQish(skcms_TransferFunction * tf,float A,float B,float C,float D,float E,float F)196 bool skcms_TransferFunction_makePQish(skcms_TransferFunction* tf,
197                                       float A, float B, float C,
198                                       float D, float E, float F) {
199     *tf = { TFKind_marker(skcms_TFType_PQish), A,B,C,D,E,F };
200     assert(skcms_TransferFunction_isPQish(tf));
201     return true;
202 }
203 
skcms_TransferFunction_makeScaledHLGish(skcms_TransferFunction * tf,float K,float R,float G,float a,float b,float c)204 bool skcms_TransferFunction_makeScaledHLGish(skcms_TransferFunction* tf,
205                                              float K, float R, float G,
206                                              float a, float b, float c) {
207     *tf = { TFKind_marker(skcms_TFType_HLGish), R,G, a,b,c, K-1.0f };
208     assert(skcms_TransferFunction_isHLGish(tf));
209     return true;
210 }
211 
skcms_TransferFunction_eval(const skcms_TransferFunction * tf,float x)212 float skcms_TransferFunction_eval(const skcms_TransferFunction* tf, float x) {
213     float sign = x < 0 ? -1.0f : 1.0f;
214     x *= sign;
215 
216     TF_PQish  pq;
217     TF_HLGish hlg;
218     switch (classify(*tf, &pq, &hlg)) {
219         case skcms_TFType_Invalid: break;
220 
221         case skcms_TFType_HLGish: {
222             const float K = hlg.K_minus_1 + 1.0f;
223             return K * sign * (x*hlg.R <= 1 ? powf_(x*hlg.R, hlg.G)
224                                             : expf_((x-hlg.c)*hlg.a) + hlg.b);
225         }
226 
227         // skcms_TransferFunction_invert() inverts R, G, and a for HLGinvish so this math is fast.
228         case skcms_TFType_HLGinvish: {
229             const float K = hlg.K_minus_1 + 1.0f;
230             x /= K;
231             return sign * (x <= 1 ? hlg.R * powf_(x, hlg.G)
232                                   : hlg.a * logf_(x - hlg.b) + hlg.c);
233         }
234 
235         case skcms_TFType_sRGBish:
236             return sign * (x < tf->d ?       tf->c * x + tf->f
237                                      : powf_(tf->a * x + tf->b, tf->g) + tf->e);
238 
239         case skcms_TFType_PQish:
240             return sign *
241                    powf_((pq.A + pq.B * powf_(x, pq.C)) / (pq.D + pq.E * powf_(x, pq.C)), pq.F);
242     }
243     return 0;
244 }
245 
246 
eval_curve(const skcms_Curve * curve,float x)247 static float eval_curve(const skcms_Curve* curve, float x) {
248     if (curve->table_entries == 0) {
249         return skcms_TransferFunction_eval(&curve->parametric, x);
250     }
251 
252     float ix = fmaxf_(0, fminf_(x, 1)) * static_cast<float>(curve->table_entries - 1);
253     int   lo = (int)                   ix        ,
254           hi = (int)(float)minus_1_ulp(ix + 1.0f);
255     float t = ix - (float)lo;
256 
257     float l, h;
258     if (curve->table_8) {
259         l = curve->table_8[lo] * (1/255.0f);
260         h = curve->table_8[hi] * (1/255.0f);
261     } else {
262         uint16_t be_l, be_h;
263         memcpy(&be_l, curve->table_16 + 2*lo, 2);
264         memcpy(&be_h, curve->table_16 + 2*hi, 2);
265         uint16_t le_l = ((be_l << 8) | (be_l >> 8)) & 0xffff;
266         uint16_t le_h = ((be_h << 8) | (be_h >> 8)) & 0xffff;
267         l = le_l * (1/65535.0f);
268         h = le_h * (1/65535.0f);
269     }
270     return l + (h-l)*t;
271 }
272 
skcms_MaxRoundtripError(const skcms_Curve * curve,const skcms_TransferFunction * inv_tf)273 float skcms_MaxRoundtripError(const skcms_Curve* curve, const skcms_TransferFunction* inv_tf) {
274     uint32_t N = curve->table_entries > 256 ? curve->table_entries : 256;
275     const float dx = 1.0f / static_cast<float>(N - 1);
276     float err = 0;
277     for (uint32_t i = 0; i < N; i++) {
278         float x = static_cast<float>(i) * dx,
279               y = eval_curve(curve, x);
280         err = fmaxf_(err, fabsf_(x - skcms_TransferFunction_eval(inv_tf, y)));
281     }
282     return err;
283 }
284 
skcms_AreApproximateInverses(const skcms_Curve * curve,const skcms_TransferFunction * inv_tf)285 bool skcms_AreApproximateInverses(const skcms_Curve* curve, const skcms_TransferFunction* inv_tf) {
286     return skcms_MaxRoundtripError(curve, inv_tf) < (1/512.0f);
287 }
288 
289 // Additional ICC signature values that are only used internally
290 enum {
291     // File signature
292     skcms_Signature_acsp = 0x61637370,
293 
294     // Tag signatures
295     skcms_Signature_rTRC = 0x72545243,
296     skcms_Signature_gTRC = 0x67545243,
297     skcms_Signature_bTRC = 0x62545243,
298     skcms_Signature_kTRC = 0x6B545243,
299 
300     skcms_Signature_rXYZ = 0x7258595A,
301     skcms_Signature_gXYZ = 0x6758595A,
302     skcms_Signature_bXYZ = 0x6258595A,
303 
304     skcms_Signature_A2B0 = 0x41324230,
305     skcms_Signature_B2A0 = 0x42324130,
306 
307     skcms_Signature_CHAD = 0x63686164,
308     skcms_Signature_WTPT = 0x77747074,
309 
310     skcms_Signature_CICP = 0x63696370,
311 
312     // Type signatures
313     skcms_Signature_curv = 0x63757276,
314     skcms_Signature_mft1 = 0x6D667431,
315     skcms_Signature_mft2 = 0x6D667432,
316     skcms_Signature_mAB  = 0x6D414220,
317     skcms_Signature_mBA  = 0x6D424120,
318     skcms_Signature_para = 0x70617261,
319     skcms_Signature_sf32 = 0x73663332,
320     // XYZ is also a PCS signature, so it's defined in skcms.h
321     // skcms_Signature_XYZ = 0x58595A20,
322 };
323 
read_big_u16(const uint8_t * ptr)324 static uint16_t read_big_u16(const uint8_t* ptr) {
325     uint16_t be;
326     memcpy(&be, ptr, sizeof(be));
327 #if defined(_MSC_VER)
328     return _byteswap_ushort(be);
329 #else
330     return __builtin_bswap16(be);
331 #endif
332 }
333 
read_big_u32(const uint8_t * ptr)334 static uint32_t read_big_u32(const uint8_t* ptr) {
335     uint32_t be;
336     memcpy(&be, ptr, sizeof(be));
337 #if defined(_MSC_VER)
338     return _byteswap_ulong(be);
339 #else
340     return __builtin_bswap32(be);
341 #endif
342 }
343 
read_big_i32(const uint8_t * ptr)344 static int32_t read_big_i32(const uint8_t* ptr) {
345     return (int32_t)read_big_u32(ptr);
346 }
347 
read_big_fixed(const uint8_t * ptr)348 static float read_big_fixed(const uint8_t* ptr) {
349     return static_cast<float>(read_big_i32(ptr)) * (1.0f / 65536.0f);
350 }
351 
352 // Maps to an in-memory profile so that fields line up to the locations specified
353 // in ICC.1:2010, section 7.2
354 typedef struct {
355     uint8_t size                [ 4];
356     uint8_t cmm_type            [ 4];
357     uint8_t version             [ 4];
358     uint8_t profile_class       [ 4];
359     uint8_t data_color_space    [ 4];
360     uint8_t pcs                 [ 4];
361     uint8_t creation_date_time  [12];
362     uint8_t signature           [ 4];
363     uint8_t platform            [ 4];
364     uint8_t flags               [ 4];
365     uint8_t device_manufacturer [ 4];
366     uint8_t device_model        [ 4];
367     uint8_t device_attributes   [ 8];
368     uint8_t rendering_intent    [ 4];
369     uint8_t illuminant_X        [ 4];
370     uint8_t illuminant_Y        [ 4];
371     uint8_t illuminant_Z        [ 4];
372     uint8_t creator             [ 4];
373     uint8_t profile_id          [16];
374     uint8_t reserved            [28];
375     uint8_t tag_count           [ 4]; // Technically not part of header, but required
376 } header_Layout;
377 
378 typedef struct {
379     uint8_t signature [4];
380     uint8_t offset    [4];
381     uint8_t size      [4];
382 } tag_Layout;
383 
get_tag_table(const skcms_ICCProfile * profile)384 static const tag_Layout* get_tag_table(const skcms_ICCProfile* profile) {
385     return (const tag_Layout*)(profile->buffer + SAFE_SIZEOF(header_Layout));
386 }
387 
388 // s15Fixed16ArrayType is technically variable sized, holding N values. However, the only valid
389 // use of the type is for the CHAD tag that stores exactly nine values.
390 typedef struct {
391     uint8_t type     [ 4];
392     uint8_t reserved [ 4];
393     uint8_t values   [36];
394 } sf32_Layout;
395 
skcms_GetCHAD(const skcms_ICCProfile * profile,skcms_Matrix3x3 * m)396 bool skcms_GetCHAD(const skcms_ICCProfile* profile, skcms_Matrix3x3* m) {
397     skcms_ICCTag tag;
398     if (!skcms_GetTagBySignature(profile, skcms_Signature_CHAD, &tag)) {
399         return false;
400     }
401 
402     if (tag.type != skcms_Signature_sf32 || tag.size < SAFE_SIZEOF(sf32_Layout)) {
403         return false;
404     }
405 
406     const sf32_Layout* sf32Tag = (const sf32_Layout*)tag.buf;
407     const uint8_t* values = sf32Tag->values;
408     for (int r = 0; r < 3; ++r)
409     for (int c = 0; c < 3; ++c, values += 4) {
410         m->vals[r][c] = read_big_fixed(values);
411     }
412     return true;
413 }
414 
415 // XYZType is technically variable sized, holding N XYZ triples. However, the only valid uses of
416 // the type are for tags/data that store exactly one triple.
417 typedef struct {
418     uint8_t type     [4];
419     uint8_t reserved [4];
420     uint8_t X        [4];
421     uint8_t Y        [4];
422     uint8_t Z        [4];
423 } XYZ_Layout;
424 
read_tag_xyz(const skcms_ICCTag * tag,float * x,float * y,float * z)425 static bool read_tag_xyz(const skcms_ICCTag* tag, float* x, float* y, float* z) {
426     if (tag->type != skcms_Signature_XYZ || tag->size < SAFE_SIZEOF(XYZ_Layout)) {
427         return false;
428     }
429 
430     const XYZ_Layout* xyzTag = (const XYZ_Layout*)tag->buf;
431 
432     *x = read_big_fixed(xyzTag->X);
433     *y = read_big_fixed(xyzTag->Y);
434     *z = read_big_fixed(xyzTag->Z);
435     return true;
436 }
437 
skcms_GetWTPT(const skcms_ICCProfile * profile,float xyz[3])438 bool skcms_GetWTPT(const skcms_ICCProfile* profile, float xyz[3]) {
439     skcms_ICCTag tag;
440     return skcms_GetTagBySignature(profile, skcms_Signature_WTPT, &tag) &&
441            read_tag_xyz(&tag, &xyz[0], &xyz[1], &xyz[2]);
442 }
443 
data_color_space_channel_count(uint32_t data_color_space)444 static int data_color_space_channel_count(uint32_t data_color_space) {
445     switch (data_color_space) {
446         case skcms_Signature_CMYK:   return 4;
447         case skcms_Signature_Gray:   return 1;
448         case skcms_Signature_RGB:    return 3;
449         case skcms_Signature_Lab:    return 3;
450         case skcms_Signature_XYZ:    return 3;
451         case skcms_Signature_CIELUV: return 3;
452         case skcms_Signature_YCbCr:  return 3;
453         case skcms_Signature_CIEYxy: return 3;
454         case skcms_Signature_HSV:    return 3;
455         case skcms_Signature_HLS:    return 3;
456         case skcms_Signature_CMY:    return 3;
457         case skcms_Signature_2CLR:   return 2;
458         case skcms_Signature_3CLR:   return 3;
459         case skcms_Signature_4CLR:   return 4;
460         case skcms_Signature_5CLR:   return 5;
461         case skcms_Signature_6CLR:   return 6;
462         case skcms_Signature_7CLR:   return 7;
463         case skcms_Signature_8CLR:   return 8;
464         case skcms_Signature_9CLR:   return 9;
465         case skcms_Signature_10CLR:  return 10;
466         case skcms_Signature_11CLR:  return 11;
467         case skcms_Signature_12CLR:  return 12;
468         case skcms_Signature_13CLR:  return 13;
469         case skcms_Signature_14CLR:  return 14;
470         case skcms_Signature_15CLR:  return 15;
471         default:                     return -1;
472     }
473 }
474 
skcms_GetInputChannelCount(const skcms_ICCProfile * profile)475 int skcms_GetInputChannelCount(const skcms_ICCProfile* profile) {
476     int a2b_count = 0;
477     if (profile->has_A2B) {
478         a2b_count = profile->A2B.input_channels != 0
479                         ? static_cast<int>(profile->A2B.input_channels)
480                         : 3;
481     }
482 
483     skcms_ICCTag tag;
484     int trc_count = 0;
485     if (skcms_GetTagBySignature(profile, skcms_Signature_kTRC, &tag)) {
486         trc_count = 1;
487     } else if (profile->has_trc) {
488         trc_count = 3;
489     }
490 
491     int dcs_count = data_color_space_channel_count(profile->data_color_space);
492 
493     if (dcs_count < 0) {
494         return -1;
495     }
496 
497     if (a2b_count > 0 && a2b_count != dcs_count) {
498         return -1;
499     }
500     if (trc_count > 0 && trc_count != dcs_count) {
501         return -1;
502     }
503 
504     return dcs_count;
505 }
506 
read_to_XYZD50(const skcms_ICCTag * rXYZ,const skcms_ICCTag * gXYZ,const skcms_ICCTag * bXYZ,skcms_Matrix3x3 * toXYZ)507 static bool read_to_XYZD50(const skcms_ICCTag* rXYZ, const skcms_ICCTag* gXYZ,
508                            const skcms_ICCTag* bXYZ, skcms_Matrix3x3* toXYZ) {
509     return read_tag_xyz(rXYZ, &toXYZ->vals[0][0], &toXYZ->vals[1][0], &toXYZ->vals[2][0]) &&
510            read_tag_xyz(gXYZ, &toXYZ->vals[0][1], &toXYZ->vals[1][1], &toXYZ->vals[2][1]) &&
511            read_tag_xyz(bXYZ, &toXYZ->vals[0][2], &toXYZ->vals[1][2], &toXYZ->vals[2][2]);
512 }
513 
514 typedef struct {
515     uint8_t type          [4];
516     uint8_t reserved_a    [4];
517     uint8_t function_type [2];
518     uint8_t reserved_b    [2];
519     uint8_t variable      [1/*variable*/];  // 1, 3, 4, 5, or 7 s15.16, depending on function_type
520 } para_Layout;
521 
read_curve_para(const uint8_t * buf,uint32_t size,skcms_Curve * curve,uint32_t * curve_size)522 static bool read_curve_para(const uint8_t* buf, uint32_t size,
523                             skcms_Curve* curve, uint32_t* curve_size) {
524     if (size < SAFE_FIXED_SIZE(para_Layout)) {
525         return false;
526     }
527 
528     const para_Layout* paraTag = (const para_Layout*)buf;
529 
530     enum { kG = 0, kGAB = 1, kGABC = 2, kGABCD = 3, kGABCDEF = 4 };
531     uint16_t function_type = read_big_u16(paraTag->function_type);
532     if (function_type > kGABCDEF) {
533         return false;
534     }
535 
536     static const uint32_t curve_bytes[] = { 4, 12, 16, 20, 28 };
537     if (size < SAFE_FIXED_SIZE(para_Layout) + curve_bytes[function_type]) {
538         return false;
539     }
540 
541     if (curve_size) {
542         *curve_size = SAFE_FIXED_SIZE(para_Layout) + curve_bytes[function_type];
543     }
544 
545     curve->table_entries = 0;
546     curve->parametric.a  = 1.0f;
547     curve->parametric.b  = 0.0f;
548     curve->parametric.c  = 0.0f;
549     curve->parametric.d  = 0.0f;
550     curve->parametric.e  = 0.0f;
551     curve->parametric.f  = 0.0f;
552     curve->parametric.g  = read_big_fixed(paraTag->variable);
553 
554     switch (function_type) {
555         case kGAB:
556             curve->parametric.a = read_big_fixed(paraTag->variable + 4);
557             curve->parametric.b = read_big_fixed(paraTag->variable + 8);
558             if (curve->parametric.a == 0) {
559                 return false;
560             }
561             curve->parametric.d = -curve->parametric.b / curve->parametric.a;
562             break;
563         case kGABC:
564             curve->parametric.a = read_big_fixed(paraTag->variable + 4);
565             curve->parametric.b = read_big_fixed(paraTag->variable + 8);
566             curve->parametric.e = read_big_fixed(paraTag->variable + 12);
567             if (curve->parametric.a == 0) {
568                 return false;
569             }
570             curve->parametric.d = -curve->parametric.b / curve->parametric.a;
571             curve->parametric.f = curve->parametric.e;
572             break;
573         case kGABCD:
574             curve->parametric.a = read_big_fixed(paraTag->variable + 4);
575             curve->parametric.b = read_big_fixed(paraTag->variable + 8);
576             curve->parametric.c = read_big_fixed(paraTag->variable + 12);
577             curve->parametric.d = read_big_fixed(paraTag->variable + 16);
578             break;
579         case kGABCDEF:
580             curve->parametric.a = read_big_fixed(paraTag->variable + 4);
581             curve->parametric.b = read_big_fixed(paraTag->variable + 8);
582             curve->parametric.c = read_big_fixed(paraTag->variable + 12);
583             curve->parametric.d = read_big_fixed(paraTag->variable + 16);
584             curve->parametric.e = read_big_fixed(paraTag->variable + 20);
585             curve->parametric.f = read_big_fixed(paraTag->variable + 24);
586             break;
587     }
588     return skcms_TransferFunction_isSRGBish(&curve->parametric);
589 }
590 
591 typedef struct {
592     uint8_t type          [4];
593     uint8_t reserved      [4];
594     uint8_t value_count   [4];
595     uint8_t variable      [1/*variable*/];  // value_count, 8.8 if 1, uint16 (n*65535) if > 1
596 } curv_Layout;
597 
read_curve_curv(const uint8_t * buf,uint32_t size,skcms_Curve * curve,uint32_t * curve_size)598 static bool read_curve_curv(const uint8_t* buf, uint32_t size,
599                             skcms_Curve* curve, uint32_t* curve_size) {
600     if (size < SAFE_FIXED_SIZE(curv_Layout)) {
601         return false;
602     }
603 
604     const curv_Layout* curvTag = (const curv_Layout*)buf;
605 
606     uint32_t value_count = read_big_u32(curvTag->value_count);
607     if (size < SAFE_FIXED_SIZE(curv_Layout) + value_count * SAFE_SIZEOF(uint16_t)) {
608         return false;
609     }
610 
611     if (curve_size) {
612         *curve_size = SAFE_FIXED_SIZE(curv_Layout) + value_count * SAFE_SIZEOF(uint16_t);
613     }
614 
615     if (value_count < 2) {
616         curve->table_entries = 0;
617         curve->parametric.a  = 1.0f;
618         curve->parametric.b  = 0.0f;
619         curve->parametric.c  = 0.0f;
620         curve->parametric.d  = 0.0f;
621         curve->parametric.e  = 0.0f;
622         curve->parametric.f  = 0.0f;
623         if (value_count == 0) {
624             // Empty tables are a shorthand for an identity curve
625             curve->parametric.g = 1.0f;
626         } else {
627             // Single entry tables are a shorthand for simple gamma
628             curve->parametric.g = read_big_u16(curvTag->variable) * (1.0f / 256.0f);
629         }
630     } else {
631         curve->table_8       = nullptr;
632         curve->table_16      = curvTag->variable;
633         curve->table_entries = value_count;
634     }
635 
636     return true;
637 }
638 
639 // Parses both curveType and parametricCurveType data. Ensures that at most 'size' bytes are read.
640 // If curve_size is not nullptr, writes the number of bytes used by the curve in (*curve_size).
read_curve(const uint8_t * buf,uint32_t size,skcms_Curve * curve,uint32_t * curve_size)641 static bool read_curve(const uint8_t* buf, uint32_t size,
642                        skcms_Curve* curve, uint32_t* curve_size) {
643     if (!buf || size < 4 || !curve) {
644         return false;
645     }
646 
647     uint32_t type = read_big_u32(buf);
648     if (type == skcms_Signature_para) {
649         return read_curve_para(buf, size, curve, curve_size);
650     } else if (type == skcms_Signature_curv) {
651         return read_curve_curv(buf, size, curve, curve_size);
652     }
653 
654     return false;
655 }
656 
657 // mft1 and mft2 share a large chunk of data
658 typedef struct {
659     uint8_t type                 [ 4];
660     uint8_t reserved_a           [ 4];
661     uint8_t input_channels       [ 1];
662     uint8_t output_channels      [ 1];
663     uint8_t grid_points          [ 1];
664     uint8_t reserved_b           [ 1];
665     uint8_t matrix               [36];
666 } mft_CommonLayout;
667 
668 typedef struct {
669     mft_CommonLayout common      [1];
670 
671     uint8_t variable             [1/*variable*/];
672 } mft1_Layout;
673 
674 typedef struct {
675     mft_CommonLayout common      [1];
676 
677     uint8_t input_table_entries  [2];
678     uint8_t output_table_entries [2];
679     uint8_t variable             [1/*variable*/];
680 } mft2_Layout;
681 
read_mft_common(const mft_CommonLayout * mftTag,skcms_A2B * a2b)682 static bool read_mft_common(const mft_CommonLayout* mftTag, skcms_A2B* a2b) {
683     // MFT matrices are applied before the first set of curves, but must be identity unless the
684     // input is PCSXYZ. We don't support PCSXYZ profiles, so we ignore this matrix. Note that the
685     // matrix in skcms_A2B is applied later in the pipe, so supporting this would require another
686     // field/flag.
687     a2b->matrix_channels = 0;
688     a2b-> input_channels = mftTag-> input_channels[0];
689     a2b->output_channels = mftTag->output_channels[0];
690 
691     // We require exactly three (ie XYZ/Lab/RGB) output channels
692     if (a2b->output_channels != ARRAY_COUNT(a2b->output_curves)) {
693         return false;
694     }
695     // We require at least one, and no more than four (ie CMYK) input channels
696     if (a2b->input_channels < 1 || a2b->input_channels > ARRAY_COUNT(a2b->input_curves)) {
697         return false;
698     }
699 
700     for (uint32_t i = 0; i < a2b->input_channels; ++i) {
701         a2b->grid_points[i] = mftTag->grid_points[0];
702     }
703     // The grid only makes sense with at least two points along each axis
704     if (a2b->grid_points[0] < 2) {
705         return false;
706     }
707     return true;
708 }
709 
710 // All as the A2B version above, except where noted.
read_mft_common(const mft_CommonLayout * mftTag,skcms_B2A * b2a)711 static bool read_mft_common(const mft_CommonLayout* mftTag, skcms_B2A* b2a) {
712     // Same as A2B.
713     b2a->matrix_channels = 0;
714     b2a-> input_channels = mftTag-> input_channels[0];
715     b2a->output_channels = mftTag->output_channels[0];
716 
717 
718     // For B2A, exactly 3 input channels (XYZ) and 3 (RGB) or 4 (CMYK) output channels.
719     if (b2a->input_channels != ARRAY_COUNT(b2a->input_curves)) {
720         return false;
721     }
722     if (b2a->output_channels < 3 || b2a->output_channels > ARRAY_COUNT(b2a->output_curves)) {
723         return false;
724     }
725 
726     // Same as A2B.
727     for (uint32_t i = 0; i < b2a->input_channels; ++i) {
728         b2a->grid_points[i] = mftTag->grid_points[0];
729     }
730     if (b2a->grid_points[0] < 2) {
731         return false;
732     }
733     return true;
734 }
735 
736 template <typename A2B_or_B2A>
init_tables(const uint8_t * table_base,uint64_t max_tables_len,uint32_t byte_width,uint32_t input_table_entries,uint32_t output_table_entries,A2B_or_B2A * out)737 static bool init_tables(const uint8_t* table_base, uint64_t max_tables_len, uint32_t byte_width,
738                         uint32_t input_table_entries, uint32_t output_table_entries,
739                         A2B_or_B2A* out) {
740     // byte_width is 1 or 2, [input|output]_table_entries are in [2, 4096], so no overflow
741     uint32_t byte_len_per_input_table  = input_table_entries * byte_width;
742     uint32_t byte_len_per_output_table = output_table_entries * byte_width;
743 
744     // [input|output]_channels are <= 4, so still no overflow
745     uint32_t byte_len_all_input_tables  = out->input_channels * byte_len_per_input_table;
746     uint32_t byte_len_all_output_tables = out->output_channels * byte_len_per_output_table;
747 
748     uint64_t grid_size = out->output_channels * byte_width;
749     for (uint32_t axis = 0; axis < out->input_channels; ++axis) {
750         grid_size *= out->grid_points[axis];
751     }
752 
753     if (max_tables_len < byte_len_all_input_tables + grid_size + byte_len_all_output_tables) {
754         return false;
755     }
756 
757     for (uint32_t i = 0; i < out->input_channels; ++i) {
758         out->input_curves[i].table_entries = input_table_entries;
759         if (byte_width == 1) {
760             out->input_curves[i].table_8  = table_base + i * byte_len_per_input_table;
761             out->input_curves[i].table_16 = nullptr;
762         } else {
763             out->input_curves[i].table_8  = nullptr;
764             out->input_curves[i].table_16 = table_base + i * byte_len_per_input_table;
765         }
766     }
767 
768     if (byte_width == 1) {
769         out->grid_8  = table_base + byte_len_all_input_tables;
770         out->grid_16 = nullptr;
771     } else {
772         out->grid_8  = nullptr;
773         out->grid_16 = table_base + byte_len_all_input_tables;
774     }
775 
776     const uint8_t* output_table_base = table_base + byte_len_all_input_tables + grid_size;
777     for (uint32_t i = 0; i < out->output_channels; ++i) {
778         out->output_curves[i].table_entries = output_table_entries;
779         if (byte_width == 1) {
780             out->output_curves[i].table_8  = output_table_base + i * byte_len_per_output_table;
781             out->output_curves[i].table_16 = nullptr;
782         } else {
783             out->output_curves[i].table_8  = nullptr;
784             out->output_curves[i].table_16 = output_table_base + i * byte_len_per_output_table;
785         }
786     }
787 
788     return true;
789 }
790 
791 template <typename A2B_or_B2A>
read_tag_mft1(const skcms_ICCTag * tag,A2B_or_B2A * out)792 static bool read_tag_mft1(const skcms_ICCTag* tag, A2B_or_B2A* out) {
793     if (tag->size < SAFE_FIXED_SIZE(mft1_Layout)) {
794         return false;
795     }
796 
797     const mft1_Layout* mftTag = (const mft1_Layout*)tag->buf;
798     if (!read_mft_common(mftTag->common, out)) {
799         return false;
800     }
801 
802     uint32_t input_table_entries  = 256;
803     uint32_t output_table_entries = 256;
804 
805     return init_tables(mftTag->variable, tag->size - SAFE_FIXED_SIZE(mft1_Layout), 1,
806                        input_table_entries, output_table_entries, out);
807 }
808 
809 template <typename A2B_or_B2A>
read_tag_mft2(const skcms_ICCTag * tag,A2B_or_B2A * out)810 static bool read_tag_mft2(const skcms_ICCTag* tag, A2B_or_B2A* out) {
811     if (tag->size < SAFE_FIXED_SIZE(mft2_Layout)) {
812         return false;
813     }
814 
815     const mft2_Layout* mftTag = (const mft2_Layout*)tag->buf;
816     if (!read_mft_common(mftTag->common, out)) {
817         return false;
818     }
819 
820     uint32_t input_table_entries = read_big_u16(mftTag->input_table_entries);
821     uint32_t output_table_entries = read_big_u16(mftTag->output_table_entries);
822 
823     // ICC spec mandates that 2 <= table_entries <= 4096
824     if (input_table_entries < 2 || input_table_entries > 4096 ||
825         output_table_entries < 2 || output_table_entries > 4096) {
826         return false;
827     }
828 
829     return init_tables(mftTag->variable, tag->size - SAFE_FIXED_SIZE(mft2_Layout), 2,
830                        input_table_entries, output_table_entries, out);
831 }
832 
read_curves(const uint8_t * buf,uint32_t size,uint32_t curve_offset,uint32_t num_curves,skcms_Curve * curves)833 static bool read_curves(const uint8_t* buf, uint32_t size, uint32_t curve_offset,
834                         uint32_t num_curves, skcms_Curve* curves) {
835     for (uint32_t i = 0; i < num_curves; ++i) {
836         if (curve_offset > size) {
837             return false;
838         }
839 
840         uint32_t curve_bytes;
841         if (!read_curve(buf + curve_offset, size - curve_offset, &curves[i], &curve_bytes)) {
842             return false;
843         }
844 
845         if (curve_bytes > UINT32_MAX - 3) {
846             return false;
847         }
848         curve_bytes = (curve_bytes + 3) & ~3U;
849 
850         uint64_t new_offset_64 = (uint64_t)curve_offset + curve_bytes;
851         curve_offset = (uint32_t)new_offset_64;
852         if (new_offset_64 != curve_offset) {
853             return false;
854         }
855     }
856 
857     return true;
858 }
859 
860 // mAB and mBA tags use the same encoding, including color lookup tables.
861 typedef struct {
862     uint8_t type                 [ 4];
863     uint8_t reserved_a           [ 4];
864     uint8_t input_channels       [ 1];
865     uint8_t output_channels      [ 1];
866     uint8_t reserved_b           [ 2];
867     uint8_t b_curve_offset       [ 4];
868     uint8_t matrix_offset        [ 4];
869     uint8_t m_curve_offset       [ 4];
870     uint8_t clut_offset          [ 4];
871     uint8_t a_curve_offset       [ 4];
872 } mAB_or_mBA_Layout;
873 
874 typedef struct {
875     uint8_t grid_points          [16];
876     uint8_t grid_byte_width      [ 1];
877     uint8_t reserved             [ 3];
878     uint8_t variable             [1/*variable*/];
879 } CLUT_Layout;
880 
read_tag_mab(const skcms_ICCTag * tag,skcms_A2B * a2b,bool pcs_is_xyz)881 static bool read_tag_mab(const skcms_ICCTag* tag, skcms_A2B* a2b, bool pcs_is_xyz) {
882     if (tag->size < SAFE_SIZEOF(mAB_or_mBA_Layout)) {
883         return false;
884     }
885 
886     const mAB_or_mBA_Layout* mABTag = (const mAB_or_mBA_Layout*)tag->buf;
887 
888     a2b->input_channels  = mABTag->input_channels[0];
889     a2b->output_channels = mABTag->output_channels[0];
890 
891     // We require exactly three (ie XYZ/Lab/RGB) output channels
892     if (a2b->output_channels != ARRAY_COUNT(a2b->output_curves)) {
893         return false;
894     }
895     // We require no more than four (ie CMYK) input channels
896     if (a2b->input_channels > ARRAY_COUNT(a2b->input_curves)) {
897         return false;
898     }
899 
900     uint32_t b_curve_offset = read_big_u32(mABTag->b_curve_offset);
901     uint32_t matrix_offset  = read_big_u32(mABTag->matrix_offset);
902     uint32_t m_curve_offset = read_big_u32(mABTag->m_curve_offset);
903     uint32_t clut_offset    = read_big_u32(mABTag->clut_offset);
904     uint32_t a_curve_offset = read_big_u32(mABTag->a_curve_offset);
905 
906     // "B" curves must be present
907     if (0 == b_curve_offset) {
908         return false;
909     }
910 
911     if (!read_curves(tag->buf, tag->size, b_curve_offset, a2b->output_channels,
912                      a2b->output_curves)) {
913         return false;
914     }
915 
916     // "M" curves and Matrix must be used together
917     if (0 != m_curve_offset) {
918         if (0 == matrix_offset) {
919             return false;
920         }
921         a2b->matrix_channels = a2b->output_channels;
922         if (!read_curves(tag->buf, tag->size, m_curve_offset, a2b->matrix_channels,
923                          a2b->matrix_curves)) {
924             return false;
925         }
926 
927         // Read matrix, which is stored as a row-major 3x3, followed by the fourth column
928         if (tag->size < matrix_offset + 12 * SAFE_SIZEOF(uint32_t)) {
929             return false;
930         }
931         float encoding_factor = pcs_is_xyz ? (65535 / 32768.0f) : 1.0f;
932         const uint8_t* mtx_buf = tag->buf + matrix_offset;
933         a2b->matrix.vals[0][0] = encoding_factor * read_big_fixed(mtx_buf +  0);
934         a2b->matrix.vals[0][1] = encoding_factor * read_big_fixed(mtx_buf +  4);
935         a2b->matrix.vals[0][2] = encoding_factor * read_big_fixed(mtx_buf +  8);
936         a2b->matrix.vals[1][0] = encoding_factor * read_big_fixed(mtx_buf + 12);
937         a2b->matrix.vals[1][1] = encoding_factor * read_big_fixed(mtx_buf + 16);
938         a2b->matrix.vals[1][2] = encoding_factor * read_big_fixed(mtx_buf + 20);
939         a2b->matrix.vals[2][0] = encoding_factor * read_big_fixed(mtx_buf + 24);
940         a2b->matrix.vals[2][1] = encoding_factor * read_big_fixed(mtx_buf + 28);
941         a2b->matrix.vals[2][2] = encoding_factor * read_big_fixed(mtx_buf + 32);
942         a2b->matrix.vals[0][3] = encoding_factor * read_big_fixed(mtx_buf + 36);
943         a2b->matrix.vals[1][3] = encoding_factor * read_big_fixed(mtx_buf + 40);
944         a2b->matrix.vals[2][3] = encoding_factor * read_big_fixed(mtx_buf + 44);
945     } else {
946         if (0 != matrix_offset) {
947             return false;
948         }
949         a2b->matrix_channels = 0;
950     }
951 
952     // "A" curves and CLUT must be used together
953     if (0 != a_curve_offset) {
954         if (0 == clut_offset) {
955             return false;
956         }
957         if (!read_curves(tag->buf, tag->size, a_curve_offset, a2b->input_channels,
958                          a2b->input_curves)) {
959             return false;
960         }
961 
962         if (tag->size < clut_offset + SAFE_FIXED_SIZE(CLUT_Layout)) {
963             return false;
964         }
965         const CLUT_Layout* clut = (const CLUT_Layout*)(tag->buf + clut_offset);
966 
967         if (clut->grid_byte_width[0] == 1) {
968             a2b->grid_8  = clut->variable;
969             a2b->grid_16 = nullptr;
970         } else if (clut->grid_byte_width[0] == 2) {
971             a2b->grid_8  = nullptr;
972             a2b->grid_16 = clut->variable;
973         } else {
974             return false;
975         }
976 
977         uint64_t grid_size = a2b->output_channels * clut->grid_byte_width[0];  // the payload
978         for (uint32_t i = 0; i < a2b->input_channels; ++i) {
979             a2b->grid_points[i] = clut->grid_points[i];
980             // The grid only makes sense with at least two points along each axis
981             if (a2b->grid_points[i] < 2) {
982                 return false;
983             }
984             grid_size *= a2b->grid_points[i];
985         }
986         if (tag->size < clut_offset + SAFE_FIXED_SIZE(CLUT_Layout) + grid_size) {
987             return false;
988         }
989     } else {
990         if (0 != clut_offset) {
991             return false;
992         }
993 
994         // If there is no CLUT, the number of input and output channels must match
995         if (a2b->input_channels != a2b->output_channels) {
996             return false;
997         }
998 
999         // Zero out the number of input channels to signal that we're skipping this stage
1000         a2b->input_channels = 0;
1001     }
1002 
1003     return true;
1004 }
1005 
1006 // Exactly the same as read_tag_mab(), except where there are comments.
1007 // TODO: refactor the two to eliminate common code?
read_tag_mba(const skcms_ICCTag * tag,skcms_B2A * b2a,bool pcs_is_xyz)1008 static bool read_tag_mba(const skcms_ICCTag* tag, skcms_B2A* b2a, bool pcs_is_xyz) {
1009     if (tag->size < SAFE_SIZEOF(mAB_or_mBA_Layout)) {
1010         return false;
1011     }
1012 
1013     const mAB_or_mBA_Layout* mBATag = (const mAB_or_mBA_Layout*)tag->buf;
1014 
1015     b2a->input_channels  = mBATag->input_channels[0];
1016     b2a->output_channels = mBATag->output_channels[0];
1017 
1018     // Require exactly 3 inputs (XYZ) and 3 (RGB) or 4 (CMYK) outputs.
1019     if (b2a->input_channels != ARRAY_COUNT(b2a->input_curves)) {
1020         return false;
1021     }
1022     if (b2a->output_channels < 3 || b2a->output_channels > ARRAY_COUNT(b2a->output_curves)) {
1023         return false;
1024     }
1025 
1026     uint32_t b_curve_offset = read_big_u32(mBATag->b_curve_offset);
1027     uint32_t matrix_offset  = read_big_u32(mBATag->matrix_offset);
1028     uint32_t m_curve_offset = read_big_u32(mBATag->m_curve_offset);
1029     uint32_t clut_offset    = read_big_u32(mBATag->clut_offset);
1030     uint32_t a_curve_offset = read_big_u32(mBATag->a_curve_offset);
1031 
1032     if (0 == b_curve_offset) {
1033         return false;
1034     }
1035 
1036     // "B" curves are our inputs, not outputs.
1037     if (!read_curves(tag->buf, tag->size, b_curve_offset, b2a->input_channels,
1038                      b2a->input_curves)) {
1039         return false;
1040     }
1041 
1042     if (0 != m_curve_offset) {
1043         if (0 == matrix_offset) {
1044             return false;
1045         }
1046         // Matrix channels is tied to input_channels (3), not output_channels.
1047         b2a->matrix_channels = b2a->input_channels;
1048 
1049         if (!read_curves(tag->buf, tag->size, m_curve_offset, b2a->matrix_channels,
1050                          b2a->matrix_curves)) {
1051             return false;
1052         }
1053 
1054         if (tag->size < matrix_offset + 12 * SAFE_SIZEOF(uint32_t)) {
1055             return false;
1056         }
1057         float encoding_factor = pcs_is_xyz ? (32768 / 65535.0f) : 1.0f;  // TODO: understand
1058         const uint8_t* mtx_buf = tag->buf + matrix_offset;
1059         b2a->matrix.vals[0][0] = encoding_factor * read_big_fixed(mtx_buf +  0);
1060         b2a->matrix.vals[0][1] = encoding_factor * read_big_fixed(mtx_buf +  4);
1061         b2a->matrix.vals[0][2] = encoding_factor * read_big_fixed(mtx_buf +  8);
1062         b2a->matrix.vals[1][0] = encoding_factor * read_big_fixed(mtx_buf + 12);
1063         b2a->matrix.vals[1][1] = encoding_factor * read_big_fixed(mtx_buf + 16);
1064         b2a->matrix.vals[1][2] = encoding_factor * read_big_fixed(mtx_buf + 20);
1065         b2a->matrix.vals[2][0] = encoding_factor * read_big_fixed(mtx_buf + 24);
1066         b2a->matrix.vals[2][1] = encoding_factor * read_big_fixed(mtx_buf + 28);
1067         b2a->matrix.vals[2][2] = encoding_factor * read_big_fixed(mtx_buf + 32);
1068         b2a->matrix.vals[0][3] = encoding_factor * read_big_fixed(mtx_buf + 36);
1069         b2a->matrix.vals[1][3] = encoding_factor * read_big_fixed(mtx_buf + 40);
1070         b2a->matrix.vals[2][3] = encoding_factor * read_big_fixed(mtx_buf + 44);
1071     } else {
1072         if (0 != matrix_offset) {
1073             return false;
1074         }
1075         b2a->matrix_channels = 0;
1076     }
1077 
1078     if (0 != a_curve_offset) {
1079         if (0 == clut_offset) {
1080             return false;
1081         }
1082 
1083         // "A" curves are our output, not input.
1084         if (!read_curves(tag->buf, tag->size, a_curve_offset, b2a->output_channels,
1085                          b2a->output_curves)) {
1086             return false;
1087         }
1088 
1089         if (tag->size < clut_offset + SAFE_FIXED_SIZE(CLUT_Layout)) {
1090             return false;
1091         }
1092         const CLUT_Layout* clut = (const CLUT_Layout*)(tag->buf + clut_offset);
1093 
1094         if (clut->grid_byte_width[0] == 1) {
1095             b2a->grid_8  = clut->variable;
1096             b2a->grid_16 = nullptr;
1097         } else if (clut->grid_byte_width[0] == 2) {
1098             b2a->grid_8  = nullptr;
1099             b2a->grid_16 = clut->variable;
1100         } else {
1101             return false;
1102         }
1103 
1104         uint64_t grid_size = b2a->output_channels * clut->grid_byte_width[0];
1105         for (uint32_t i = 0; i < b2a->input_channels; ++i) {
1106             b2a->grid_points[i] = clut->grid_points[i];
1107             if (b2a->grid_points[i] < 2) {
1108                 return false;
1109             }
1110             grid_size *= b2a->grid_points[i];
1111         }
1112         if (tag->size < clut_offset + SAFE_FIXED_SIZE(CLUT_Layout) + grid_size) {
1113             return false;
1114         }
1115     } else {
1116         if (0 != clut_offset) {
1117             return false;
1118         }
1119 
1120         if (b2a->input_channels != b2a->output_channels) {
1121             return false;
1122         }
1123 
1124         // Zero out *output* channels to skip this stage.
1125         b2a->output_channels = 0;
1126     }
1127     return true;
1128 }
1129 
1130 // If you pass f, we'll fit a possibly-non-zero value for *f.
1131 // If you pass nullptr, we'll assume you want *f to be treated as zero.
fit_linear(const skcms_Curve * curve,int N,float tol,float * c,float * d,float * f=nullptr)1132 static int fit_linear(const skcms_Curve* curve, int N, float tol,
1133                       float* c, float* d, float* f = nullptr) {
1134     assert(N > 1);
1135     // We iteratively fit the first points to the TF's linear piece.
1136     // We want the cx + f line to pass through the first and last points we fit exactly.
1137     //
1138     // As we walk along the points we find the minimum and maximum slope of the line before the
1139     // error would exceed our tolerance.  We stop when the range [slope_min, slope_max] becomes
1140     // emtpy, when we definitely can't add any more points.
1141     //
1142     // Some points' error intervals may intersect the running interval but not lie fully
1143     // within it.  So we keep track of the last point we saw that is a valid end point candidate,
1144     // and once the search is done, back up to build the line through *that* point.
1145     const float dx = 1.0f / static_cast<float>(N - 1);
1146 
1147     int lin_points = 1;
1148 
1149     float f_zero = 0.0f;
1150     if (f) {
1151         *f = eval_curve(curve, 0);
1152     } else {
1153         f = &f_zero;
1154     }
1155 
1156 
1157     float slope_min = -INFINITY_;
1158     float slope_max = +INFINITY_;
1159     for (int i = 1; i < N; ++i) {
1160         float x = static_cast<float>(i) * dx;
1161         float y = eval_curve(curve, x);
1162 
1163         float slope_max_i = (y + tol - *f) / x,
1164               slope_min_i = (y - tol - *f) / x;
1165         if (slope_max_i < slope_min || slope_max < slope_min_i) {
1166             // Slope intervals would no longer overlap.
1167             break;
1168         }
1169         slope_max = fminf_(slope_max, slope_max_i);
1170         slope_min = fmaxf_(slope_min, slope_min_i);
1171 
1172         float cur_slope = (y - *f) / x;
1173         if (slope_min <= cur_slope && cur_slope <= slope_max) {
1174             lin_points = i + 1;
1175             *c = cur_slope;
1176         }
1177     }
1178 
1179     // Set D to the last point that met our tolerance.
1180     *d = static_cast<float>(lin_points - 1) * dx;
1181     return lin_points;
1182 }
1183 
1184 // If this skcms_Curve holds an identity table, rewrite it as an identity skcms_TransferFunction.
canonicalize_identity(skcms_Curve * curve)1185 static void canonicalize_identity(skcms_Curve* curve) {
1186     if (curve->table_entries && curve->table_entries <= (uint32_t)INT_MAX) {
1187         int N = (int)curve->table_entries;
1188 
1189         float c = 0.0f, d = 0.0f, f = 0.0f;
1190         if (N == fit_linear(curve, N, 1.0f/static_cast<float>(2*N), &c,&d,&f)
1191             && c == 1.0f
1192             && f == 0.0f) {
1193             curve->table_entries = 0;
1194             curve->table_8       = nullptr;
1195             curve->table_16      = nullptr;
1196             curve->parametric    = skcms_TransferFunction{1,1,0,0,0,0,0};
1197         }
1198     }
1199 }
1200 
read_a2b(const skcms_ICCTag * tag,skcms_A2B * a2b,bool pcs_is_xyz)1201 static bool read_a2b(const skcms_ICCTag* tag, skcms_A2B* a2b, bool pcs_is_xyz) {
1202     bool ok = false;
1203     if (tag->type == skcms_Signature_mft1) { ok = read_tag_mft1(tag, a2b); }
1204     if (tag->type == skcms_Signature_mft2) { ok = read_tag_mft2(tag, a2b); }
1205     if (tag->type == skcms_Signature_mAB ) { ok = read_tag_mab(tag, a2b, pcs_is_xyz); }
1206     if (!ok) {
1207         return false;
1208     }
1209 
1210     if (a2b->input_channels > 0) { canonicalize_identity(a2b->input_curves + 0); }
1211     if (a2b->input_channels > 1) { canonicalize_identity(a2b->input_curves + 1); }
1212     if (a2b->input_channels > 2) { canonicalize_identity(a2b->input_curves + 2); }
1213     if (a2b->input_channels > 3) { canonicalize_identity(a2b->input_curves + 3); }
1214 
1215     if (a2b->matrix_channels > 0) { canonicalize_identity(a2b->matrix_curves + 0); }
1216     if (a2b->matrix_channels > 1) { canonicalize_identity(a2b->matrix_curves + 1); }
1217     if (a2b->matrix_channels > 2) { canonicalize_identity(a2b->matrix_curves + 2); }
1218 
1219     if (a2b->output_channels > 0) { canonicalize_identity(a2b->output_curves + 0); }
1220     if (a2b->output_channels > 1) { canonicalize_identity(a2b->output_curves + 1); }
1221     if (a2b->output_channels > 2) { canonicalize_identity(a2b->output_curves + 2); }
1222 
1223     return true;
1224 }
1225 
read_b2a(const skcms_ICCTag * tag,skcms_B2A * b2a,bool pcs_is_xyz)1226 static bool read_b2a(const skcms_ICCTag* tag, skcms_B2A* b2a, bool pcs_is_xyz) {
1227     bool ok = false;
1228     if (tag->type == skcms_Signature_mft1) { ok = read_tag_mft1(tag, b2a); }
1229     if (tag->type == skcms_Signature_mft2) { ok = read_tag_mft2(tag, b2a); }
1230     if (tag->type == skcms_Signature_mBA ) { ok = read_tag_mba(tag, b2a, pcs_is_xyz); }
1231     if (!ok) {
1232         return false;
1233     }
1234 
1235     if (b2a->input_channels > 0) { canonicalize_identity(b2a->input_curves + 0); }
1236     if (b2a->input_channels > 1) { canonicalize_identity(b2a->input_curves + 1); }
1237     if (b2a->input_channels > 2) { canonicalize_identity(b2a->input_curves + 2); }
1238 
1239     if (b2a->matrix_channels > 0) { canonicalize_identity(b2a->matrix_curves + 0); }
1240     if (b2a->matrix_channels > 1) { canonicalize_identity(b2a->matrix_curves + 1); }
1241     if (b2a->matrix_channels > 2) { canonicalize_identity(b2a->matrix_curves + 2); }
1242 
1243     if (b2a->output_channels > 0) { canonicalize_identity(b2a->output_curves + 0); }
1244     if (b2a->output_channels > 1) { canonicalize_identity(b2a->output_curves + 1); }
1245     if (b2a->output_channels > 2) { canonicalize_identity(b2a->output_curves + 2); }
1246     if (b2a->output_channels > 3) { canonicalize_identity(b2a->output_curves + 3); }
1247 
1248     return true;
1249 }
1250 
1251 typedef struct {
1252     uint8_t type                     [4];
1253     uint8_t reserved                 [4];
1254     uint8_t color_primaries          [1];
1255     uint8_t transfer_characteristics [1];
1256     uint8_t matrix_coefficients      [1];
1257     uint8_t video_full_range_flag    [1];
1258 } CICP_Layout;
1259 
read_cicp(const skcms_ICCTag * tag,skcms_CICP * cicp)1260 static bool read_cicp(const skcms_ICCTag* tag, skcms_CICP* cicp) {
1261     if (tag->type != skcms_Signature_CICP || tag->size < SAFE_SIZEOF(CICP_Layout)) {
1262         return false;
1263     }
1264 
1265     const CICP_Layout* cicpTag = (const CICP_Layout*)tag->buf;
1266 
1267     cicp->color_primaries          = cicpTag->color_primaries[0];
1268     cicp->transfer_characteristics = cicpTag->transfer_characteristics[0];
1269     cicp->matrix_coefficients      = cicpTag->matrix_coefficients[0];
1270     cicp->video_full_range_flag    = cicpTag->video_full_range_flag[0];
1271     return true;
1272 }
1273 
skcms_GetTagByIndex(const skcms_ICCProfile * profile,uint32_t idx,skcms_ICCTag * tag)1274 void skcms_GetTagByIndex(const skcms_ICCProfile* profile, uint32_t idx, skcms_ICCTag* tag) {
1275     if (!profile || !profile->buffer || !tag) { return; }
1276     if (idx > profile->tag_count) { return; }
1277     const tag_Layout* tags = get_tag_table(profile);
1278     tag->signature = read_big_u32(tags[idx].signature);
1279     tag->size      = read_big_u32(tags[idx].size);
1280     tag->buf       = read_big_u32(tags[idx].offset) + profile->buffer;
1281     tag->type      = read_big_u32(tag->buf);
1282 }
1283 
skcms_GetTagBySignature(const skcms_ICCProfile * profile,uint32_t sig,skcms_ICCTag * tag)1284 bool skcms_GetTagBySignature(const skcms_ICCProfile* profile, uint32_t sig, skcms_ICCTag* tag) {
1285     if (!profile || !profile->buffer || !tag) { return false; }
1286     const tag_Layout* tags = get_tag_table(profile);
1287     for (uint32_t i = 0; i < profile->tag_count; ++i) {
1288         if (read_big_u32(tags[i].signature) == sig) {
1289             tag->signature = sig;
1290             tag->size      = read_big_u32(tags[i].size);
1291             tag->buf       = read_big_u32(tags[i].offset) + profile->buffer;
1292             tag->type      = read_big_u32(tag->buf);
1293             return true;
1294         }
1295     }
1296     return false;
1297 }
1298 
usable_as_src(const skcms_ICCProfile * profile)1299 static bool usable_as_src(const skcms_ICCProfile* profile) {
1300     return profile->has_A2B
1301        || (profile->has_trc && profile->has_toXYZD50);
1302 }
1303 
skcms_ParseWithA2BPriority(const void * buf,size_t len,const int priority[],const int priorities,skcms_ICCProfile * profile)1304 bool skcms_ParseWithA2BPriority(const void* buf, size_t len,
1305                                 const int priority[], const int priorities,
1306                                 skcms_ICCProfile* profile) {
1307     static_assert(SAFE_SIZEOF(header_Layout) == 132, "need to update header code");
1308 
1309     if (!profile) {
1310         return false;
1311     }
1312     memset(profile, 0, SAFE_SIZEOF(*profile));
1313 
1314     if (len < SAFE_SIZEOF(header_Layout)) {
1315         return false;
1316     }
1317 
1318     // Byte-swap all header fields
1319     const header_Layout* header  = (const header_Layout*)buf;
1320     profile->buffer              = (const uint8_t*)buf;
1321     profile->size                = read_big_u32(header->size);
1322     uint32_t version             = read_big_u32(header->version);
1323     profile->data_color_space    = read_big_u32(header->data_color_space);
1324     profile->pcs                 = read_big_u32(header->pcs);
1325     uint32_t signature           = read_big_u32(header->signature);
1326     float illuminant_X           = read_big_fixed(header->illuminant_X);
1327     float illuminant_Y           = read_big_fixed(header->illuminant_Y);
1328     float illuminant_Z           = read_big_fixed(header->illuminant_Z);
1329     profile->tag_count           = read_big_u32(header->tag_count);
1330 
1331     // Validate signature, size (smaller than buffer, large enough to hold tag table),
1332     // and major version
1333     uint64_t tag_table_size = profile->tag_count * SAFE_SIZEOF(tag_Layout);
1334     if (signature != skcms_Signature_acsp ||
1335         profile->size > len ||
1336         profile->size < SAFE_SIZEOF(header_Layout) + tag_table_size ||
1337         (version >> 24) > 4) {
1338         return false;
1339     }
1340 
1341     // Validate that illuminant is D50 white
1342     if (fabsf_(illuminant_X - 0.9642f) > 0.0100f ||
1343         fabsf_(illuminant_Y - 1.0000f) > 0.0100f ||
1344         fabsf_(illuminant_Z - 0.8249f) > 0.0100f) {
1345         return false;
1346     }
1347 
1348     // Validate that all tag entries have sane offset + size
1349     const tag_Layout* tags = get_tag_table(profile);
1350     for (uint32_t i = 0; i < profile->tag_count; ++i) {
1351         uint32_t tag_offset = read_big_u32(tags[i].offset);
1352         uint32_t tag_size   = read_big_u32(tags[i].size);
1353         uint64_t tag_end    = (uint64_t)tag_offset + (uint64_t)tag_size;
1354         if (tag_size < 4 || tag_end > profile->size) {
1355             return false;
1356         }
1357     }
1358 
1359     if (profile->pcs != skcms_Signature_XYZ && profile->pcs != skcms_Signature_Lab) {
1360         return false;
1361     }
1362 
1363     bool pcs_is_xyz = profile->pcs == skcms_Signature_XYZ;
1364 
1365     // Pre-parse commonly used tags.
1366     skcms_ICCTag kTRC;
1367     if (profile->data_color_space == skcms_Signature_Gray &&
1368         skcms_GetTagBySignature(profile, skcms_Signature_kTRC, &kTRC)) {
1369         if (!read_curve(kTRC.buf, kTRC.size, &profile->trc[0], nullptr)) {
1370             // Malformed tag
1371             return false;
1372         }
1373         profile->trc[1] = profile->trc[0];
1374         profile->trc[2] = profile->trc[0];
1375         profile->has_trc = true;
1376 
1377         if (pcs_is_xyz) {
1378             profile->toXYZD50.vals[0][0] = illuminant_X;
1379             profile->toXYZD50.vals[1][1] = illuminant_Y;
1380             profile->toXYZD50.vals[2][2] = illuminant_Z;
1381             profile->has_toXYZD50 = true;
1382         }
1383     } else {
1384         skcms_ICCTag rTRC, gTRC, bTRC;
1385         if (skcms_GetTagBySignature(profile, skcms_Signature_rTRC, &rTRC) &&
1386             skcms_GetTagBySignature(profile, skcms_Signature_gTRC, &gTRC) &&
1387             skcms_GetTagBySignature(profile, skcms_Signature_bTRC, &bTRC)) {
1388             if (!read_curve(rTRC.buf, rTRC.size, &profile->trc[0], nullptr) ||
1389                 !read_curve(gTRC.buf, gTRC.size, &profile->trc[1], nullptr) ||
1390                 !read_curve(bTRC.buf, bTRC.size, &profile->trc[2], nullptr)) {
1391                 // Malformed TRC tags
1392                 return false;
1393             }
1394             profile->has_trc = true;
1395         }
1396 
1397         skcms_ICCTag rXYZ, gXYZ, bXYZ;
1398         if (skcms_GetTagBySignature(profile, skcms_Signature_rXYZ, &rXYZ) &&
1399             skcms_GetTagBySignature(profile, skcms_Signature_gXYZ, &gXYZ) &&
1400             skcms_GetTagBySignature(profile, skcms_Signature_bXYZ, &bXYZ)) {
1401             if (!read_to_XYZD50(&rXYZ, &gXYZ, &bXYZ, &profile->toXYZD50)) {
1402                 // Malformed XYZ tags
1403                 return false;
1404             }
1405             profile->has_toXYZD50 = true;
1406         }
1407     }
1408 
1409     for (int i = 0; i < priorities; i++) {
1410         // enum { perceptual, relative_colormetric, saturation }
1411         if (priority[i] < 0 || priority[i] > 2) {
1412             return false;
1413         }
1414         uint32_t sig = skcms_Signature_A2B0 + static_cast<uint32_t>(priority[i]);
1415         skcms_ICCTag tag;
1416         if (skcms_GetTagBySignature(profile, sig, &tag)) {
1417             if (!read_a2b(&tag, &profile->A2B, pcs_is_xyz)) {
1418                 // Malformed A2B tag
1419                 return false;
1420             }
1421             profile->has_A2B = true;
1422             break;
1423         }
1424     }
1425 
1426     for (int i = 0; i < priorities; i++) {
1427         // enum { perceptual, relative_colormetric, saturation }
1428         if (priority[i] < 0 || priority[i] > 2) {
1429             return false;
1430         }
1431         uint32_t sig = skcms_Signature_B2A0 + static_cast<uint32_t>(priority[i]);
1432         skcms_ICCTag tag;
1433         if (skcms_GetTagBySignature(profile, sig, &tag)) {
1434             if (!read_b2a(&tag, &profile->B2A, pcs_is_xyz)) {
1435                 // Malformed B2A tag
1436                 return false;
1437             }
1438             profile->has_B2A = true;
1439             break;
1440         }
1441     }
1442 
1443     skcms_ICCTag cicp_tag;
1444     if (skcms_GetTagBySignature(profile, skcms_Signature_CICP, &cicp_tag)) {
1445         if (!read_cicp(&cicp_tag, &profile->CICP)) {
1446             // Malformed CICP tag
1447             return false;
1448         }
1449         profile->has_CICP = true;
1450     }
1451 
1452     return usable_as_src(profile);
1453 }
1454 
1455 
skcms_sRGB_profile()1456 const skcms_ICCProfile* skcms_sRGB_profile() {
1457     static const skcms_ICCProfile sRGB_profile = {
1458         nullptr,               // buffer, moot here
1459 
1460         0,                     // size, moot here
1461         skcms_Signature_RGB,   // data_color_space
1462         skcms_Signature_XYZ,   // pcs
1463         0,                     // tag count, moot here
1464 
1465         // We choose to represent sRGB with its canonical transfer function,
1466         // and with its canonical XYZD50 gamut matrix.
1467         {   // the 3 trc curves
1468             {{0, {2.4f, (float)(1/1.055), (float)(0.055/1.055), (float)(1/12.92), 0.04045f, 0, 0}}},
1469             {{0, {2.4f, (float)(1/1.055), (float)(0.055/1.055), (float)(1/12.92), 0.04045f, 0, 0}}},
1470             {{0, {2.4f, (float)(1/1.055), (float)(0.055/1.055), (float)(1/12.92), 0.04045f, 0, 0}}},
1471         },
1472 
1473         {{  // 3x3 toXYZD50 matrix
1474             { 0.436065674f, 0.385147095f, 0.143066406f },
1475             { 0.222488403f, 0.716873169f, 0.060607910f },
1476             { 0.013916016f, 0.097076416f, 0.714096069f },
1477         }},
1478 
1479         {   // an empty A2B
1480             {   // input_curves
1481                 {{0, {0,0, 0,0,0,0,0}}},
1482                 {{0, {0,0, 0,0,0,0,0}}},
1483                 {{0, {0,0, 0,0,0,0,0}}},
1484                 {{0, {0,0, 0,0,0,0,0}}},
1485             },
1486             nullptr,   // grid_8
1487             nullptr,   // grid_16
1488             0,         // input_channels
1489             {0,0,0,0}, // grid_points
1490 
1491             {   // matrix_curves
1492                 {{0, {0,0, 0,0,0,0,0}}},
1493                 {{0, {0,0, 0,0,0,0,0}}},
1494                 {{0, {0,0, 0,0,0,0,0}}},
1495             },
1496             {{  // matrix (3x4)
1497                 { 0,0,0,0 },
1498                 { 0,0,0,0 },
1499                 { 0,0,0,0 },
1500             }},
1501             0,  // matrix_channels
1502 
1503             0,  // output_channels
1504             {   // output_curves
1505                 {{0, {0,0, 0,0,0,0,0}}},
1506                 {{0, {0,0, 0,0,0,0,0}}},
1507                 {{0, {0,0, 0,0,0,0,0}}},
1508             },
1509         },
1510 
1511         {   // an empty B2A
1512             {   // input_curves
1513                 {{0, {0,0, 0,0,0,0,0}}},
1514                 {{0, {0,0, 0,0,0,0,0}}},
1515                 {{0, {0,0, 0,0,0,0,0}}},
1516             },
1517             0,  // input_channels
1518 
1519             0,  // matrix_channels
1520             {   // matrix_curves
1521                 {{0, {0,0, 0,0,0,0,0}}},
1522                 {{0, {0,0, 0,0,0,0,0}}},
1523                 {{0, {0,0, 0,0,0,0,0}}},
1524             },
1525             {{  // matrix (3x4)
1526                 { 0,0,0,0 },
1527                 { 0,0,0,0 },
1528                 { 0,0,0,0 },
1529             }},
1530 
1531             {   // output_curves
1532                 {{0, {0,0, 0,0,0,0,0}}},
1533                 {{0, {0,0, 0,0,0,0,0}}},
1534                 {{0, {0,0, 0,0,0,0,0}}},
1535                 {{0, {0,0, 0,0,0,0,0}}},
1536             },
1537             nullptr,    // grid_8
1538             nullptr,    // grid_16
1539             {0,0,0,0},  // grid_points
1540             0,          // output_channels
1541         },
1542 
1543         { 0, 0, 0, 0 },  // an empty CICP
1544 
1545         true,  // has_trc
1546         true,  // has_toXYZD50
1547         false, // has_A2B
1548         false, // has B2A
1549         false, // has_CICP
1550     };
1551     return &sRGB_profile;
1552 }
1553 
skcms_XYZD50_profile()1554 const skcms_ICCProfile* skcms_XYZD50_profile() {
1555     // Just like sRGB above, but with identity transfer functions and toXYZD50 matrix.
1556     static const skcms_ICCProfile XYZD50_profile = {
1557         nullptr,               // buffer, moot here
1558 
1559         0,                     // size, moot here
1560         skcms_Signature_RGB,   // data_color_space
1561         skcms_Signature_XYZ,   // pcs
1562         0,                     // tag count, moot here
1563 
1564         {   // the 3 trc curves
1565             {{0, {1,1, 0,0,0,0,0}}},
1566             {{0, {1,1, 0,0,0,0,0}}},
1567             {{0, {1,1, 0,0,0,0,0}}},
1568         },
1569 
1570         {{  // 3x3 toXYZD50 matrix
1571             { 1,0,0 },
1572             { 0,1,0 },
1573             { 0,0,1 },
1574         }},
1575 
1576         {   // an empty A2B
1577             {   // input_curves
1578                 {{0, {0,0, 0,0,0,0,0}}},
1579                 {{0, {0,0, 0,0,0,0,0}}},
1580                 {{0, {0,0, 0,0,0,0,0}}},
1581                 {{0, {0,0, 0,0,0,0,0}}},
1582             },
1583             nullptr,   // grid_8
1584             nullptr,   // grid_16
1585             0,         // input_channels
1586             {0,0,0,0}, // grid_points
1587 
1588             {   // matrix_curves
1589                 {{0, {0,0, 0,0,0,0,0}}},
1590                 {{0, {0,0, 0,0,0,0,0}}},
1591                 {{0, {0,0, 0,0,0,0,0}}},
1592             },
1593             {{  // matrix (3x4)
1594                 { 0,0,0,0 },
1595                 { 0,0,0,0 },
1596                 { 0,0,0,0 },
1597             }},
1598             0,  // matrix_channels
1599 
1600             0,  // output_channels
1601             {   // output_curves
1602                 {{0, {0,0, 0,0,0,0,0}}},
1603                 {{0, {0,0, 0,0,0,0,0}}},
1604                 {{0, {0,0, 0,0,0,0,0}}},
1605             },
1606         },
1607 
1608         {   // an empty B2A
1609             {   // input_curves
1610                 {{0, {0,0, 0,0,0,0,0}}},
1611                 {{0, {0,0, 0,0,0,0,0}}},
1612                 {{0, {0,0, 0,0,0,0,0}}},
1613             },
1614             0,  // input_channels
1615 
1616             0,  // matrix_channels
1617             {   // matrix_curves
1618                 {{0, {0,0, 0,0,0,0,0}}},
1619                 {{0, {0,0, 0,0,0,0,0}}},
1620                 {{0, {0,0, 0,0,0,0,0}}},
1621             },
1622             {{  // matrix (3x4)
1623                 { 0,0,0,0 },
1624                 { 0,0,0,0 },
1625                 { 0,0,0,0 },
1626             }},
1627 
1628             {   // output_curves
1629                 {{0, {0,0, 0,0,0,0,0}}},
1630                 {{0, {0,0, 0,0,0,0,0}}},
1631                 {{0, {0,0, 0,0,0,0,0}}},
1632                 {{0, {0,0, 0,0,0,0,0}}},
1633             },
1634             nullptr,    // grid_8
1635             nullptr,    // grid_16
1636             {0,0,0,0},  // grid_points
1637             0,          // output_channels
1638         },
1639 
1640         { 0, 0, 0, 0 },  // an empty CICP
1641 
1642         true,  // has_trc
1643         true,  // has_toXYZD50
1644         false, // has_A2B
1645         false, // has B2A
1646         false, // has_CICP
1647     };
1648 
1649     return &XYZD50_profile;
1650 }
1651 
skcms_sRGB_TransferFunction()1652 const skcms_TransferFunction* skcms_sRGB_TransferFunction() {
1653     return &skcms_sRGB_profile()->trc[0].parametric;
1654 }
1655 
skcms_sRGB_Inverse_TransferFunction()1656 const skcms_TransferFunction* skcms_sRGB_Inverse_TransferFunction() {
1657     static const skcms_TransferFunction sRGB_inv =
1658         {0.416666657f, 1.137283325f, -0.0f, 12.920000076f, 0.003130805f, -0.054969788f, -0.0f};
1659     return &sRGB_inv;
1660 }
1661 
skcms_Identity_TransferFunction()1662 const skcms_TransferFunction* skcms_Identity_TransferFunction() {
1663     static const skcms_TransferFunction identity = {1,1,0,0,0,0,0};
1664     return &identity;
1665 }
1666 
1667 const uint8_t skcms_252_random_bytes[] = {
1668     8, 179, 128, 204, 253, 38, 134, 184, 68, 102, 32, 138, 99, 39, 169, 215,
1669     119, 26, 3, 223, 95, 239, 52, 132, 114, 74, 81, 234, 97, 116, 244, 205, 30,
1670     154, 173, 12, 51, 159, 122, 153, 61, 226, 236, 178, 229, 55, 181, 220, 191,
1671     194, 160, 126, 168, 82, 131, 18, 180, 245, 163, 22, 246, 69, 235, 252, 57,
1672     108, 14, 6, 152, 240, 255, 171, 242, 20, 227, 177, 238, 96, 85, 16, 211,
1673     70, 200, 149, 155, 146, 127, 145, 100, 151, 109, 19, 165, 208, 195, 164,
1674     137, 254, 182, 248, 64, 201, 45, 209, 5, 147, 207, 210, 113, 162, 83, 225,
1675     9, 31, 15, 231, 115, 37, 58, 53, 24, 49, 197, 56, 120, 172, 48, 21, 214,
1676     129, 111, 11, 50, 187, 196, 34, 60, 103, 71, 144, 47, 203, 77, 80, 232,
1677     140, 222, 250, 206, 166, 247, 139, 249, 221, 72, 106, 27, 199, 117, 54,
1678     219, 135, 118, 40, 79, 41, 251, 46, 93, 212, 92, 233, 148, 28, 121, 63,
1679     123, 158, 105, 59, 29, 42, 143, 23, 0, 107, 176, 87, 104, 183, 156, 193,
1680     189, 90, 188, 65, 190, 17, 198, 7, 186, 161, 1, 124, 78, 125, 170, 133,
1681     174, 218, 67, 157, 75, 101, 89, 217, 62, 33, 141, 228, 25, 35, 91, 230, 4,
1682     2, 13, 73, 86, 167, 237, 84, 243, 44, 185, 66, 130, 110, 150, 142, 216, 88,
1683     112, 36, 224, 136, 202, 76, 94, 98, 175, 213
1684 };
1685 
skcms_ApproximatelyEqualProfiles(const skcms_ICCProfile * A,const skcms_ICCProfile * B)1686 bool skcms_ApproximatelyEqualProfiles(const skcms_ICCProfile* A, const skcms_ICCProfile* B) {
1687     // Test for exactly equal profiles first.
1688     if (A == B || 0 == memcmp(A,B, sizeof(skcms_ICCProfile))) {
1689         return true;
1690     }
1691 
1692     // For now this is the essentially the same strategy we use in test_only.c
1693     // for our skcms_Transform() smoke tests:
1694     //    1) transform A to XYZD50
1695     //    2) transform B to XYZD50
1696     //    3) return true if they're similar enough
1697     // Our current criterion in 3) is maximum 1 bit error per XYZD50 byte.
1698 
1699     // skcms_252_random_bytes are 252 of a random shuffle of all possible bytes.
1700     // 252 is evenly divisible by 3 and 4.  Only 192, 10, 241, and 43 are missing.
1701 
1702     // We want to allow otherwise equivalent profiles tagged as grayscale and RGB
1703     // to be treated as equal.  But CMYK profiles are a totally different ballgame.
1704     const auto CMYK = skcms_Signature_CMYK;
1705     if ((A->data_color_space == CMYK) != (B->data_color_space == CMYK)) {
1706         return false;
1707     }
1708 
1709     // Interpret as RGB_888 if data color space is RGB or GRAY, RGBA_8888 if CMYK.
1710     // TODO: working with RGBA_8888 either way is probably fastest.
1711     skcms_PixelFormat fmt = skcms_PixelFormat_RGB_888;
1712     size_t npixels = 84;
1713     if (A->data_color_space == skcms_Signature_CMYK) {
1714         fmt = skcms_PixelFormat_RGBA_8888;
1715         npixels = 63;
1716     }
1717 
1718     // TODO: if A or B is a known profile (skcms_sRGB_profile, skcms_XYZD50_profile),
1719     // use pre-canned results and skip that skcms_Transform() call?
1720     uint8_t dstA[252],
1721             dstB[252];
1722     if (!skcms_Transform(
1723                 skcms_252_random_bytes,     fmt, skcms_AlphaFormat_Unpremul, A,
1724                 dstA, skcms_PixelFormat_RGB_888, skcms_AlphaFormat_Unpremul, skcms_XYZD50_profile(),
1725                 npixels)) {
1726         return false;
1727     }
1728     if (!skcms_Transform(
1729                 skcms_252_random_bytes,     fmt, skcms_AlphaFormat_Unpremul, B,
1730                 dstB, skcms_PixelFormat_RGB_888, skcms_AlphaFormat_Unpremul, skcms_XYZD50_profile(),
1731                 npixels)) {
1732         return false;
1733     }
1734 
1735     // TODO: make sure this final check has reasonable codegen.
1736     for (size_t i = 0; i < 252; i++) {
1737         if (abs((int)dstA[i] - (int)dstB[i]) > 1) {
1738             return false;
1739         }
1740     }
1741     return true;
1742 }
1743 
skcms_TRCs_AreApproximateInverse(const skcms_ICCProfile * profile,const skcms_TransferFunction * inv_tf)1744 bool skcms_TRCs_AreApproximateInverse(const skcms_ICCProfile* profile,
1745                                       const skcms_TransferFunction* inv_tf) {
1746     if (!profile || !profile->has_trc) {
1747         return false;
1748     }
1749 
1750     return skcms_AreApproximateInverses(&profile->trc[0], inv_tf) &&
1751            skcms_AreApproximateInverses(&profile->trc[1], inv_tf) &&
1752            skcms_AreApproximateInverses(&profile->trc[2], inv_tf);
1753 }
1754 
is_zero_to_one(float x)1755 static bool is_zero_to_one(float x) {
1756     return 0 <= x && x <= 1;
1757 }
1758 
1759 typedef struct { float vals[3]; } skcms_Vector3;
1760 
mv_mul(const skcms_Matrix3x3 * m,const skcms_Vector3 * v)1761 static skcms_Vector3 mv_mul(const skcms_Matrix3x3* m, const skcms_Vector3* v) {
1762     skcms_Vector3 dst = {{0,0,0}};
1763     for (int row = 0; row < 3; ++row) {
1764         dst.vals[row] = m->vals[row][0] * v->vals[0]
1765                       + m->vals[row][1] * v->vals[1]
1766                       + m->vals[row][2] * v->vals[2];
1767     }
1768     return dst;
1769 }
1770 
skcms_AdaptToXYZD50(float wx,float wy,skcms_Matrix3x3 * toXYZD50)1771 bool skcms_AdaptToXYZD50(float wx, float wy,
1772                          skcms_Matrix3x3* toXYZD50) {
1773     if (!is_zero_to_one(wx) || !is_zero_to_one(wy) ||
1774         !toXYZD50) {
1775         return false;
1776     }
1777 
1778     // Assumes that Y is 1.0f.
1779     skcms_Vector3 wXYZ = { { wx / wy, 1, (1 - wx - wy) / wy } };
1780 
1781     // Now convert toXYZ matrix to toXYZD50.
1782     skcms_Vector3 wXYZD50 = { { 0.96422f, 1.0f, 0.82521f } };
1783 
1784     // Calculate the chromatic adaptation matrix.  We will use the Bradford method, thus
1785     // the matrices below.  The Bradford method is used by Adobe and is widely considered
1786     // to be the best.
1787     skcms_Matrix3x3 xyz_to_lms = {{
1788         {  0.8951f,  0.2664f, -0.1614f },
1789         { -0.7502f,  1.7135f,  0.0367f },
1790         {  0.0389f, -0.0685f,  1.0296f },
1791     }};
1792     skcms_Matrix3x3 lms_to_xyz = {{
1793         {  0.9869929f, -0.1470543f, 0.1599627f },
1794         {  0.4323053f,  0.5183603f, 0.0492912f },
1795         { -0.0085287f,  0.0400428f, 0.9684867f },
1796     }};
1797 
1798     skcms_Vector3 srcCone = mv_mul(&xyz_to_lms, &wXYZ);
1799     skcms_Vector3 dstCone = mv_mul(&xyz_to_lms, &wXYZD50);
1800 
1801     *toXYZD50 = {{
1802         { dstCone.vals[0] / srcCone.vals[0], 0, 0 },
1803         { 0, dstCone.vals[1] / srcCone.vals[1], 0 },
1804         { 0, 0, dstCone.vals[2] / srcCone.vals[2] },
1805     }};
1806     *toXYZD50 = skcms_Matrix3x3_concat(toXYZD50, &xyz_to_lms);
1807     *toXYZD50 = skcms_Matrix3x3_concat(&lms_to_xyz, toXYZD50);
1808 
1809     return true;
1810 }
1811 
skcms_PrimariesToXYZD50(float rx,float ry,float gx,float gy,float bx,float by,float wx,float wy,skcms_Matrix3x3 * toXYZD50)1812 bool skcms_PrimariesToXYZD50(float rx, float ry,
1813                              float gx, float gy,
1814                              float bx, float by,
1815                              float wx, float wy,
1816                              skcms_Matrix3x3* toXYZD50) {
1817     if (!is_zero_to_one(rx) || !is_zero_to_one(ry) ||
1818         !is_zero_to_one(gx) || !is_zero_to_one(gy) ||
1819         !is_zero_to_one(bx) || !is_zero_to_one(by) ||
1820         !is_zero_to_one(wx) || !is_zero_to_one(wy) ||
1821         !toXYZD50) {
1822         return false;
1823     }
1824 
1825     // First, we need to convert xy values (primaries) to XYZ.
1826     skcms_Matrix3x3 primaries = {{
1827         { rx, gx, bx },
1828         { ry, gy, by },
1829         { 1 - rx - ry, 1 - gx - gy, 1 - bx - by },
1830     }};
1831     skcms_Matrix3x3 primaries_inv;
1832     if (!skcms_Matrix3x3_invert(&primaries, &primaries_inv)) {
1833         return false;
1834     }
1835 
1836     // Assumes that Y is 1.0f.
1837     skcms_Vector3 wXYZ = { { wx / wy, 1, (1 - wx - wy) / wy } };
1838     skcms_Vector3 XYZ = mv_mul(&primaries_inv, &wXYZ);
1839 
1840     skcms_Matrix3x3 toXYZ = {{
1841         { XYZ.vals[0],           0,           0 },
1842         {           0, XYZ.vals[1],           0 },
1843         {           0,           0, XYZ.vals[2] },
1844     }};
1845     toXYZ = skcms_Matrix3x3_concat(&primaries, &toXYZ);
1846 
1847     skcms_Matrix3x3 DXtoD50;
1848     if (!skcms_AdaptToXYZD50(wx, wy, &DXtoD50)) {
1849         return false;
1850     }
1851 
1852     *toXYZD50 = skcms_Matrix3x3_concat(&DXtoD50, &toXYZ);
1853     return true;
1854 }
1855 
1856 
skcms_Matrix3x3_invert(const skcms_Matrix3x3 * src,skcms_Matrix3x3 * dst)1857 bool skcms_Matrix3x3_invert(const skcms_Matrix3x3* src, skcms_Matrix3x3* dst) {
1858     double a00 = src->vals[0][0],
1859            a01 = src->vals[1][0],
1860            a02 = src->vals[2][0],
1861            a10 = src->vals[0][1],
1862            a11 = src->vals[1][1],
1863            a12 = src->vals[2][1],
1864            a20 = src->vals[0][2],
1865            a21 = src->vals[1][2],
1866            a22 = src->vals[2][2];
1867 
1868     double b0 = a00*a11 - a01*a10,
1869            b1 = a00*a12 - a02*a10,
1870            b2 = a01*a12 - a02*a11,
1871            b3 = a20,
1872            b4 = a21,
1873            b5 = a22;
1874 
1875     double determinant = b0*b5
1876                        - b1*b4
1877                        + b2*b3;
1878 
1879     if (determinant == 0) {
1880         return false;
1881     }
1882 
1883     double invdet = 1.0 / determinant;
1884     if (invdet > +FLT_MAX || invdet < -FLT_MAX || !isfinitef_((float)invdet)) {
1885         return false;
1886     }
1887 
1888     b0 *= invdet;
1889     b1 *= invdet;
1890     b2 *= invdet;
1891     b3 *= invdet;
1892     b4 *= invdet;
1893     b5 *= invdet;
1894 
1895     dst->vals[0][0] = (float)( a11*b5 - a12*b4 );
1896     dst->vals[1][0] = (float)( a02*b4 - a01*b5 );
1897     dst->vals[2][0] = (float)(        +     b2 );
1898     dst->vals[0][1] = (float)( a12*b3 - a10*b5 );
1899     dst->vals[1][1] = (float)( a00*b5 - a02*b3 );
1900     dst->vals[2][1] = (float)(        -     b1 );
1901     dst->vals[0][2] = (float)( a10*b4 - a11*b3 );
1902     dst->vals[1][2] = (float)( a01*b3 - a00*b4 );
1903     dst->vals[2][2] = (float)(        +     b0 );
1904 
1905     for (int r = 0; r < 3; ++r)
1906     for (int c = 0; c < 3; ++c) {
1907         if (!isfinitef_(dst->vals[r][c])) {
1908             return false;
1909         }
1910     }
1911     return true;
1912 }
1913 
skcms_Matrix3x3_concat(const skcms_Matrix3x3 * A,const skcms_Matrix3x3 * B)1914 skcms_Matrix3x3 skcms_Matrix3x3_concat(const skcms_Matrix3x3* A, const skcms_Matrix3x3* B) {
1915     skcms_Matrix3x3 m = { { { 0,0,0 },{ 0,0,0 },{ 0,0,0 } } };
1916     for (int r = 0; r < 3; r++)
1917         for (int c = 0; c < 3; c++) {
1918             m.vals[r][c] = A->vals[r][0] * B->vals[0][c]
1919                          + A->vals[r][1] * B->vals[1][c]
1920                          + A->vals[r][2] * B->vals[2][c];
1921         }
1922     return m;
1923 }
1924 
1925 #if defined(__clang__)
1926     [[clang::no_sanitize("float-divide-by-zero")]]  // Checked for by classify() on the way out.
1927 #endif
skcms_TransferFunction_invert(const skcms_TransferFunction * src,skcms_TransferFunction * dst)1928 bool skcms_TransferFunction_invert(const skcms_TransferFunction* src, skcms_TransferFunction* dst) {
1929     TF_PQish  pq;
1930     TF_HLGish hlg;
1931     switch (classify(*src, &pq, &hlg)) {
1932         case skcms_TFType_Invalid: return false;
1933         case skcms_TFType_sRGBish: break;  // handled below
1934 
1935         case skcms_TFType_PQish:
1936             *dst = { TFKind_marker(skcms_TFType_PQish), -pq.A,  pq.D, 1.0f/pq.F
1937                                                       ,  pq.B, -pq.E, 1.0f/pq.C};
1938             return true;
1939 
1940         case skcms_TFType_HLGish:
1941             *dst = { TFKind_marker(skcms_TFType_HLGinvish), 1.0f/hlg.R, 1.0f/hlg.G
1942                                                           , 1.0f/hlg.a, hlg.b, hlg.c
1943                                                           , hlg.K_minus_1 };
1944             return true;
1945 
1946         case skcms_TFType_HLGinvish:
1947             *dst = { TFKind_marker(skcms_TFType_HLGish), 1.0f/hlg.R, 1.0f/hlg.G
1948                                                        , 1.0f/hlg.a, hlg.b, hlg.c
1949                                                        , hlg.K_minus_1 };
1950             return true;
1951     }
1952 
1953     assert (classify(*src) == skcms_TFType_sRGBish);
1954 
1955     // We're inverting this function, solving for x in terms of y.
1956     //   y = (cx + f)         x < d
1957     //       (ax + b)^g + e   x ≥ d
1958     // The inverse of this function can be expressed in the same piecewise form.
1959     skcms_TransferFunction inv = {0,0,0,0,0,0,0};
1960 
1961     // We'll start by finding the new threshold inv.d.
1962     // In principle we should be able to find that by solving for y at x=d from either side.
1963     // (If those two d values aren't the same, it's a discontinuous transfer function.)
1964     float d_l =       src->c * src->d + src->f,
1965           d_r = powf_(src->a * src->d + src->b, src->g) + src->e;
1966     if (fabsf_(d_l - d_r) > 1/512.0f) {
1967         return false;
1968     }
1969     inv.d = d_l;  // TODO(mtklein): better in practice to choose d_r?
1970 
1971     // When d=0, the linear section collapses to a point.  We leave c,d,f all zero in that case.
1972     if (inv.d > 0) {
1973         // Inverting the linear section is pretty straightfoward:
1974         //        y       = cx + f
1975         //        y - f   = cx
1976         //   (1/c)y - f/c = x
1977         inv.c =    1.0f/src->c;
1978         inv.f = -src->f/src->c;
1979     }
1980 
1981     // The interesting part is inverting the nonlinear section:
1982     //         y                = (ax + b)^g + e.
1983     //         y - e            = (ax + b)^g
1984     //        (y - e)^1/g       =  ax + b
1985     //        (y - e)^1/g - b   =  ax
1986     //   (1/a)(y - e)^1/g - b/a =   x
1987     //
1988     // To make that fit our form, we need to move the (1/a) term inside the exponentiation:
1989     //   let k = (1/a)^g
1990     //   (1/a)( y -  e)^1/g - b/a = x
1991     //        (ky - ke)^1/g - b/a = x
1992 
1993     float k = powf_(src->a, -src->g);  // (1/a)^g == a^-g
1994     inv.g = 1.0f / src->g;
1995     inv.a = k;
1996     inv.b = -k * src->e;
1997     inv.e = -src->b / src->a;
1998 
1999     // We need to enforce the same constraints here that we do when fitting a curve,
2000     // a >= 0 and ad+b >= 0.  These constraints are checked by classify(), so they're true
2001     // of the source function if we're here.
2002 
2003     // Just like when fitting the curve, there's really no way to rescue a < 0.
2004     if (inv.a < 0) {
2005         return false;
2006     }
2007     // On the other hand we can rescue an ad+b that's gone slightly negative here.
2008     if (inv.a * inv.d + inv.b < 0) {
2009         inv.b = -inv.a * inv.d;
2010     }
2011 
2012     // That should usually make classify(inv) == sRGBish true, but there are a couple situations
2013     // where we might still fail here, like non-finite parameter values.
2014     if (classify(inv) != skcms_TFType_sRGBish) {
2015         return false;
2016     }
2017 
2018     assert (inv.a >= 0);
2019     assert (inv.a * inv.d + inv.b >= 0);
2020 
2021     // Now in principle we're done.
2022     // But to preserve the valuable invariant inv(src(1.0f)) == 1.0f, we'll tweak
2023     // e or f of the inverse, depending on which segment contains src(1.0f).
2024     float s = skcms_TransferFunction_eval(src, 1.0f);
2025     if (!isfinitef_(s)) {
2026         return false;
2027     }
2028 
2029     float sign = s < 0 ? -1.0f : 1.0f;
2030     s *= sign;
2031     if (s < inv.d) {
2032         inv.f = 1.0f - sign * inv.c * s;
2033     } else {
2034         inv.e = 1.0f - sign * powf_(inv.a * s + inv.b, inv.g);
2035     }
2036 
2037     *dst = inv;
2038     return classify(*dst) == skcms_TFType_sRGBish;
2039 }
2040 
2041 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ //
2042 
2043 // From here below we're approximating an skcms_Curve with an skcms_TransferFunction{g,a,b,c,d,e,f}:
2044 //
2045 //   tf(x) =  cx + f          x < d
2046 //   tf(x) = (ax + b)^g + e   x ≥ d
2047 //
2048 // When fitting, we add the additional constraint that both pieces meet at d:
2049 //
2050 //   cd + f = (ad + b)^g + e
2051 //
2052 // Solving for e and folding it through gives an alternate formulation of the non-linear piece:
2053 //
2054 //   tf(x) =                           cx + f   x < d
2055 //   tf(x) = (ax + b)^g - (ad + b)^g + cd + f   x ≥ d
2056 //
2057 // Our overall strategy is then:
2058 //    For a couple tolerances,
2059 //       - fit_linear():    fit c,d,f iteratively to as many points as our tolerance allows
2060 //       - invert c,d,f
2061 //       - fit_nonlinear(): fit g,a,b using Gauss-Newton given those inverted c,d,f
2062 //                          (and by constraint, inverted e) to the inverse of the table.
2063 //    Return the parameters with least maximum error.
2064 //
2065 // To run Gauss-Newton to find g,a,b, we'll also need the gradient of the residuals
2066 // of round-trip f_inv(x), the inverse of the non-linear piece of f(x).
2067 //
2068 //    let y = Table(x)
2069 //    r(x) = x - f_inv(y)
2070 //
2071 //    ∂r/∂g = ln(ay + b)*(ay + b)^g
2072 //          - ln(ad + b)*(ad + b)^g
2073 //    ∂r/∂a = yg(ay + b)^(g-1)
2074 //          - dg(ad + b)^(g-1)
2075 //    ∂r/∂b =  g(ay + b)^(g-1)
2076 //          -  g(ad + b)^(g-1)
2077 
2078 // Return the residual of roundtripping skcms_Curve(x) through f_inv(y) with parameters P,
2079 // and fill out the gradient of the residual into dfdP.
rg_nonlinear(float x,const skcms_Curve * curve,const skcms_TransferFunction * tf,float dfdP[3])2080 static float rg_nonlinear(float x,
2081                           const skcms_Curve* curve,
2082                           const skcms_TransferFunction* tf,
2083                           float dfdP[3]) {
2084     const float y = eval_curve(curve, x);
2085 
2086     const float g = tf->g, a = tf->a, b = tf->b,
2087                 c = tf->c, d = tf->d, f = tf->f;
2088 
2089     const float Y = fmaxf_(a*y + b, 0.0f),
2090                 D =        a*d + b;
2091     assert (D >= 0);
2092 
2093     // The gradient.
2094     dfdP[0] = logf_(Y)*powf_(Y, g)
2095             - logf_(D)*powf_(D, g);
2096     dfdP[1] = y*g*powf_(Y, g-1)
2097             - d*g*powf_(D, g-1);
2098     dfdP[2] =   g*powf_(Y, g-1)
2099             -   g*powf_(D, g-1);
2100 
2101     // The residual.
2102     const float f_inv = powf_(Y, g)
2103                       - powf_(D, g)
2104                       + c*d + f;
2105     return x - f_inv;
2106 }
2107 
gauss_newton_step(const skcms_Curve * curve,skcms_TransferFunction * tf,float x0,float dx,int N)2108 static bool gauss_newton_step(const skcms_Curve* curve,
2109                                     skcms_TransferFunction* tf,
2110                               float x0, float dx, int N) {
2111     // We'll sample x from the range [x0,x1] (both inclusive) N times with even spacing.
2112     //
2113     // Let P = [ tf->g, tf->a, tf->b ] (the three terms that we're adjusting).
2114     //
2115     // We want to do P' = P + (Jf^T Jf)^-1 Jf^T r(P),
2116     //   where r(P) is the residual vector
2117     //   and Jf is the Jacobian matrix of f(), ∂r/∂P.
2118     //
2119     // Let's review the shape of each of these expressions:
2120     //   r(P)   is [N x 1], a column vector with one entry per value of x tested
2121     //   Jf     is [N x 3], a matrix with an entry for each (x,P) pair
2122     //   Jf^T   is [3 x N], the transpose of Jf
2123     //
2124     //   Jf^T Jf   is [3 x N] * [N x 3] == [3 x 3], a 3x3 matrix,
2125     //                                              and so is its inverse (Jf^T Jf)^-1
2126     //   Jf^T r(P) is [3 x N] * [N x 1] == [3 x 1], a column vector with the same shape as P
2127     //
2128     // Our implementation strategy to get to the final ∆P is
2129     //   1) evaluate Jf^T Jf,   call that lhs
2130     //   2) evaluate Jf^T r(P), call that rhs
2131     //   3) invert lhs
2132     //   4) multiply inverse lhs by rhs
2133     //
2134     // This is a friendly implementation strategy because we don't have to have any
2135     // buffers that scale with N, and equally nice don't have to perform any matrix
2136     // operations that are variable size.
2137     //
2138     // Other implementation strategies could trade this off, e.g. evaluating the
2139     // pseudoinverse of Jf ( (Jf^T Jf)^-1 Jf^T ) directly, then multiplying that by
2140     // the residuals.  That would probably require implementing singular value
2141     // decomposition, and would create a [3 x N] matrix to be multiplied by the
2142     // [N x 1] residual vector, but on the upside I think that'd eliminate the
2143     // possibility of this gauss_newton_step() function ever failing.
2144 
2145     // 0) start off with lhs and rhs safely zeroed.
2146     skcms_Matrix3x3 lhs = {{ {0,0,0}, {0,0,0}, {0,0,0} }};
2147     skcms_Vector3   rhs = {  {0,0,0} };
2148 
2149     // 1,2) evaluate lhs and evaluate rhs
2150     //   We want to evaluate Jf only once, but both lhs and rhs involve Jf^T,
2151     //   so we'll have to update lhs and rhs at the same time.
2152     for (int i = 0; i < N; i++) {
2153         float x = x0 + static_cast<float>(i)*dx;
2154 
2155         float dfdP[3] = {0,0,0};
2156         float resid = rg_nonlinear(x,curve,tf, dfdP);
2157 
2158         for (int r = 0; r < 3; r++) {
2159             for (int c = 0; c < 3; c++) {
2160                 lhs.vals[r][c] += dfdP[r] * dfdP[c];
2161             }
2162             rhs.vals[r] += dfdP[r] * resid;
2163         }
2164     }
2165 
2166     // If any of the 3 P parameters are unused, this matrix will be singular.
2167     // Detect those cases and fix them up to indentity instead, so we can invert.
2168     for (int k = 0; k < 3; k++) {
2169         if (lhs.vals[0][k]==0 && lhs.vals[1][k]==0 && lhs.vals[2][k]==0 &&
2170             lhs.vals[k][0]==0 && lhs.vals[k][1]==0 && lhs.vals[k][2]==0) {
2171             lhs.vals[k][k] = 1;
2172         }
2173     }
2174 
2175     // 3) invert lhs
2176     skcms_Matrix3x3 lhs_inv;
2177     if (!skcms_Matrix3x3_invert(&lhs, &lhs_inv)) {
2178         return false;
2179     }
2180 
2181     // 4) multiply inverse lhs by rhs
2182     skcms_Vector3 dP = mv_mul(&lhs_inv, &rhs);
2183     tf->g += dP.vals[0];
2184     tf->a += dP.vals[1];
2185     tf->b += dP.vals[2];
2186     return isfinitef_(tf->g) && isfinitef_(tf->a) && isfinitef_(tf->b);
2187 }
2188 
max_roundtrip_error_checked(const skcms_Curve * curve,const skcms_TransferFunction * tf_inv)2189 static float max_roundtrip_error_checked(const skcms_Curve* curve,
2190                                          const skcms_TransferFunction* tf_inv) {
2191     skcms_TransferFunction tf;
2192     if (!skcms_TransferFunction_invert(tf_inv, &tf) || skcms_TFType_sRGBish != classify(tf)) {
2193         return INFINITY_;
2194     }
2195 
2196     skcms_TransferFunction tf_inv_again;
2197     if (!skcms_TransferFunction_invert(&tf, &tf_inv_again)) {
2198         return INFINITY_;
2199     }
2200 
2201     return skcms_MaxRoundtripError(curve, &tf_inv_again);
2202 }
2203 
2204 // Fit the points in [L,N) to the non-linear piece of tf, or return false if we can't.
fit_nonlinear(const skcms_Curve * curve,int L,int N,skcms_TransferFunction * tf)2205 static bool fit_nonlinear(const skcms_Curve* curve, int L, int N, skcms_TransferFunction* tf) {
2206     // This enforces a few constraints that are not modeled in gauss_newton_step()'s optimization.
2207     auto fixup_tf = [tf]() {
2208         // a must be non-negative. That ensures the function is monotonically increasing.
2209         // We don't really know how to fix up a if it goes negative.
2210         if (tf->a < 0) {
2211             return false;
2212         }
2213         // ad+b must be non-negative. That ensures we don't end up with complex numbers in powf.
2214         // We feel just barely not uneasy enough to tweak b so ad+b is zero in this case.
2215         if (tf->a * tf->d + tf->b < 0) {
2216             tf->b = -tf->a * tf->d;
2217         }
2218         assert (tf->a >= 0 &&
2219                 tf->a * tf->d + tf->b >= 0);
2220 
2221         // cd+f must be ~= (ad+b)^g+e. That ensures the function is continuous. We keep e as a free
2222         // parameter so we can guarantee this.
2223         tf->e =   tf->c*tf->d + tf->f
2224           - powf_(tf->a*tf->d + tf->b, tf->g);
2225 
2226         return isfinitef_(tf->e);
2227     };
2228 
2229     if (!fixup_tf()) {
2230         return false;
2231     }
2232 
2233     // No matter where we start, dx should always represent N even steps from 0 to 1.
2234     const float dx = 1.0f / static_cast<float>(N-1);
2235 
2236     skcms_TransferFunction best_tf = *tf;
2237     float best_max_error = INFINITY_;
2238 
2239     // Need this or several curves get worse... *sigh*
2240     float init_error = max_roundtrip_error_checked(curve, tf);
2241     if (init_error < best_max_error) {
2242         best_max_error = init_error;
2243         best_tf = *tf;
2244     }
2245 
2246     // As far as we can tell, 1 Gauss-Newton step won't converge, and 3 steps is no better than 2.
2247     for (int j = 0; j < 8; j++) {
2248         if (!gauss_newton_step(curve, tf, static_cast<float>(L)*dx, dx, N-L) || !fixup_tf()) {
2249             *tf = best_tf;
2250             return isfinitef_(best_max_error);
2251         }
2252 
2253         float max_error = max_roundtrip_error_checked(curve, tf);
2254         if (max_error < best_max_error) {
2255             best_max_error = max_error;
2256             best_tf = *tf;
2257         }
2258     }
2259 
2260     *tf = best_tf;
2261     return isfinitef_(best_max_error);
2262 }
2263 
skcms_ApproximateCurve(const skcms_Curve * curve,skcms_TransferFunction * approx,float * max_error)2264 bool skcms_ApproximateCurve(const skcms_Curve* curve,
2265                             skcms_TransferFunction* approx,
2266                             float* max_error) {
2267     if (!curve || !approx || !max_error) {
2268         return false;
2269     }
2270 
2271     if (curve->table_entries == 0) {
2272         // No point approximating an skcms_TransferFunction with an skcms_TransferFunction!
2273         return false;
2274     }
2275 
2276     if (curve->table_entries == 1 || curve->table_entries > (uint32_t)INT_MAX) {
2277         // We need at least two points, and must put some reasonable cap on the maximum number.
2278         return false;
2279     }
2280 
2281     int N = (int)curve->table_entries;
2282     const float dx = 1.0f / static_cast<float>(N - 1);
2283 
2284     *max_error = INFINITY_;
2285     const float kTolerances[] = { 1.5f / 65535.0f, 1.0f / 512.0f };
2286     for (int t = 0; t < ARRAY_COUNT(kTolerances); t++) {
2287         skcms_TransferFunction tf,
2288                                tf_inv;
2289 
2290         // It's problematic to fit curves with non-zero f, so always force it to zero explicitly.
2291         tf.f = 0.0f;
2292         int L = fit_linear(curve, N, kTolerances[t], &tf.c, &tf.d);
2293 
2294         if (L == N) {
2295             // If the entire data set was linear, move the coefficients to the nonlinear portion
2296             // with G == 1.  This lets use a canonical representation with d == 0.
2297             tf.g = 1;
2298             tf.a = tf.c;
2299             tf.b = tf.f;
2300             tf.c = tf.d = tf.e = tf.f = 0;
2301         } else if (L == N - 1) {
2302             // Degenerate case with only two points in the nonlinear segment. Solve directly.
2303             tf.g = 1;
2304             tf.a = (eval_curve(curve, static_cast<float>(N-1)*dx) -
2305                     eval_curve(curve, static_cast<float>(N-2)*dx))
2306                  / dx;
2307             tf.b = eval_curve(curve, static_cast<float>(N-2)*dx)
2308                  - tf.a * static_cast<float>(N-2)*dx;
2309             tf.e = 0;
2310         } else {
2311             // Start by guessing a gamma-only curve through the midpoint.
2312             int mid = (L + N) / 2;
2313             float mid_x = static_cast<float>(mid) / static_cast<float>(N - 1);
2314             float mid_y = eval_curve(curve, mid_x);
2315             tf.g = log2f_(mid_y) / log2f_(mid_x);
2316             tf.a = 1;
2317             tf.b = 0;
2318             tf.e =    tf.c*tf.d + tf.f
2319               - powf_(tf.a*tf.d + tf.b, tf.g);
2320 
2321 
2322             if (!skcms_TransferFunction_invert(&tf, &tf_inv) ||
2323                 !fit_nonlinear(curve, L,N, &tf_inv)) {
2324                 continue;
2325             }
2326 
2327             // We fit tf_inv, so calculate tf to keep in sync.
2328             // fit_nonlinear() should guarantee invertibility.
2329             if (!skcms_TransferFunction_invert(&tf_inv, &tf)) {
2330                 assert(false);
2331                 continue;
2332             }
2333         }
2334 
2335         // We'd better have a sane, sRGB-ish TF by now.
2336         // Other non-Bad TFs would be fine, but we know we've only ever tried to fit sRGBish;
2337         // anything else is just some accident of math and the way we pun tf.g as a type flag.
2338         // fit_nonlinear() should guarantee this, but the special cases may fail this test.
2339         if (skcms_TFType_sRGBish != classify(tf)) {
2340             continue;
2341         }
2342 
2343         // We find our error by roundtripping the table through tf_inv.
2344         //
2345         // (The most likely use case for this approximation is to be inverted and
2346         // used as the transfer function for a destination color space.)
2347         //
2348         // We've kept tf and tf_inv in sync above, but we can't guarantee that tf is
2349         // invertible, so re-verify that here (and use the new inverse for testing).
2350         // fit_nonlinear() should guarantee this, but the special cases that don't use
2351         // it may fail this test.
2352         if (!skcms_TransferFunction_invert(&tf, &tf_inv)) {
2353             continue;
2354         }
2355 
2356         float err = skcms_MaxRoundtripError(curve, &tf_inv);
2357         if (*max_error > err) {
2358             *max_error = err;
2359             *approx    = tf;
2360         }
2361     }
2362     return isfinitef_(*max_error);
2363 }
2364 
2365 enum class CpuType { Baseline, HSW, SKX };
2366 
cpu_type()2367 static CpuType cpu_type() {
2368     #if defined(SKCMS_PORTABLE) || !defined(__x86_64__) || defined(SKCMS_FORCE_BASELINE)
2369         return CpuType::Baseline;
2370     #elif defined(SKCMS_FORCE_HSW)
2371         return CpuType::HSW;
2372     #elif defined(SKCMS_FORCE_SKX)
2373         return CpuType::SKX;
2374     #else
2375         static const CpuType type = []{
2376             if (!sAllowRuntimeCPUDetection) {
2377                 return CpuType::Baseline;
2378             }
2379             // See http://www.sandpile.org/x86/cpuid.htm
2380 
2381             // First, a basic cpuid(1) lets us check prerequisites for HSW, SKX.
2382             uint32_t eax, ebx, ecx, edx;
2383             __asm__ __volatile__("cpuid" : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
2384                                          : "0"(1), "2"(0));
2385             if ((edx & (1u<<25)) &&  // SSE
2386                 (edx & (1u<<26)) &&  // SSE2
2387                 (ecx & (1u<< 0)) &&  // SSE3
2388                 (ecx & (1u<< 9)) &&  // SSSE3
2389                 (ecx & (1u<<12)) &&  // FMA (N.B. not used, avoided even)
2390                 (ecx & (1u<<19)) &&  // SSE4.1
2391                 (ecx & (1u<<20)) &&  // SSE4.2
2392                 (ecx & (1u<<26)) &&  // XSAVE
2393                 (ecx & (1u<<27)) &&  // OSXSAVE
2394                 (ecx & (1u<<28)) &&  // AVX
2395                 (ecx & (1u<<29))) {  // F16C
2396 
2397                 // Call cpuid(7) to check for AVX2 and AVX-512 bits.
2398                 __asm__ __volatile__("cpuid" : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
2399                                              : "0"(7), "2"(0));
2400                 // eax from xgetbv(0) will tell us whether XMM, YMM, and ZMM state is saved.
2401                 uint32_t xcr0, dont_need_edx;
2402                 __asm__ __volatile__("xgetbv" : "=a"(xcr0), "=d"(dont_need_edx) : "c"(0));
2403 
2404                 if ((xcr0 & (1u<<1)) &&  // XMM register state saved?
2405                     (xcr0 & (1u<<2)) &&  // YMM register state saved?
2406                     (ebx  & (1u<<5))) {  // AVX2
2407                     // At this point we're at least HSW.  Continue checking for SKX.
2408                     if ((xcr0 & (1u<< 5)) && // Opmasks state saved?
2409                         (xcr0 & (1u<< 6)) && // First 16 ZMM registers saved?
2410                         (xcr0 & (1u<< 7)) && // High 16 ZMM registers saved?
2411                         (ebx  & (1u<<16)) && // AVX512F
2412                         (ebx  & (1u<<17)) && // AVX512DQ
2413                         (ebx  & (1u<<28)) && // AVX512CD
2414                         (ebx  & (1u<<30)) && // AVX512BW
2415                         (ebx  & (1u<<31))) { // AVX512VL
2416                         return CpuType::SKX;
2417                     }
2418                     return CpuType::HSW;
2419                 }
2420             }
2421             return CpuType::Baseline;
2422         }();
2423         return type;
2424     #endif
2425 }
2426 
tf_is_gamma(const skcms_TransferFunction & tf)2427 static bool tf_is_gamma(const skcms_TransferFunction& tf) {
2428     return tf.g > 0 && tf.a == 1 &&
2429            tf.b == 0 && tf.c == 0 && tf.d == 0 && tf.e == 0 && tf.f == 0;
2430 }
2431 
2432 struct OpAndArg {
2433     Op          op;
2434     const void* arg;
2435 };
2436 
select_curve_op(const skcms_Curve * curve,int channel)2437 static OpAndArg select_curve_op(const skcms_Curve* curve, int channel) {
2438     struct OpType {
2439         Op sGamma, sRGBish, PQish, HLGish, HLGinvish, table;
2440     };
2441     static constexpr OpType kOps[] = {
2442         { Op::gamma_r, Op::tf_r, Op::pq_r, Op::hlg_r, Op::hlginv_r, Op::table_r },
2443         { Op::gamma_g, Op::tf_g, Op::pq_g, Op::hlg_g, Op::hlginv_g, Op::table_g },
2444         { Op::gamma_b, Op::tf_b, Op::pq_b, Op::hlg_b, Op::hlginv_b, Op::table_b },
2445         { Op::gamma_a, Op::tf_a, Op::pq_a, Op::hlg_a, Op::hlginv_a, Op::table_a },
2446     };
2447     const auto& op = kOps[channel];
2448 
2449     if (curve->table_entries == 0) {
2450         const OpAndArg noop = { Op::load_a8/*doesn't matter*/, nullptr };
2451 
2452         const skcms_TransferFunction& tf = curve->parametric;
2453 
2454         if (tf_is_gamma(tf)) {
2455             return tf.g != 1 ? OpAndArg{op.sGamma, &tf}
2456                              : noop;
2457         }
2458 
2459         switch (classify(tf)) {
2460             case skcms_TFType_Invalid:    return noop;
2461             case skcms_TFType_sRGBish:    return OpAndArg{op.sRGBish,   &tf};
2462             case skcms_TFType_PQish:      return OpAndArg{op.PQish,     &tf};
2463             case skcms_TFType_HLGish:     return OpAndArg{op.HLGish,    &tf};
2464             case skcms_TFType_HLGinvish:  return OpAndArg{op.HLGinvish, &tf};
2465         }
2466     }
2467     return OpAndArg{op.table, curve};
2468 }
2469 
select_curve_ops(const skcms_Curve * curves,int numChannels,OpAndArg * ops)2470 static int select_curve_ops(const skcms_Curve* curves, int numChannels, OpAndArg* ops) {
2471     // We process the channels in reverse order, yielding ops in ABGR order.
2472     // (Working backwards allows us to fuse trailing B+G+R ops into a single RGB op.)
2473     int cursor = 0;
2474     for (int index = numChannels; index-- > 0; ) {
2475         ops[cursor] = select_curve_op(&curves[index], index);
2476         if (ops[cursor].arg) {
2477             ++cursor;
2478         }
2479     }
2480 
2481     // Identify separate B+G+R ops and fuse them into a single RGB op.
2482     if (cursor >= 3) {
2483         struct FusableOps {
2484             Op r, g, b, rgb;
2485         };
2486         static constexpr FusableOps kFusableOps[] = {
2487             {Op::gamma_r,  Op::gamma_g,  Op::gamma_b,  Op::gamma_rgb},
2488             {Op::tf_r,     Op::tf_g,     Op::tf_b,     Op::tf_rgb},
2489             {Op::pq_r,     Op::pq_g,     Op::pq_b,     Op::pq_rgb},
2490             {Op::hlg_r,    Op::hlg_g,    Op::hlg_b,    Op::hlg_rgb},
2491             {Op::hlginv_r, Op::hlginv_g, Op::hlginv_b, Op::hlginv_rgb},
2492         };
2493 
2494         int posR = cursor - 1;
2495         int posG = cursor - 2;
2496         int posB = cursor - 3;
2497         for (const FusableOps& fusableOp : kFusableOps) {
2498             if (ops[posR].op == fusableOp.r &&
2499                 ops[posG].op == fusableOp.g &&
2500                 ops[posB].op == fusableOp.b &&
2501                 (0 == memcmp(ops[posR].arg, ops[posG].arg, sizeof(skcms_TransferFunction))) &&
2502                 (0 == memcmp(ops[posR].arg, ops[posB].arg, sizeof(skcms_TransferFunction)))) {
2503                 // Fuse the three matching ops into one.
2504                 ops[posB].op = fusableOp.rgb;
2505                 cursor -= 2;
2506                 break;
2507             }
2508         }
2509     }
2510 
2511     return cursor;
2512 }
2513 
bytes_per_pixel(skcms_PixelFormat fmt)2514 static size_t bytes_per_pixel(skcms_PixelFormat fmt) {
2515     switch (fmt >> 1) {   // ignore rgb/bgr
2516         case skcms_PixelFormat_A_8              >> 1: return  1;
2517         case skcms_PixelFormat_G_8              >> 1: return  1;
2518         case skcms_PixelFormat_GA_88            >> 1: return  2;
2519         case skcms_PixelFormat_ABGR_4444        >> 1: return  2;
2520         case skcms_PixelFormat_RGB_565          >> 1: return  2;
2521         case skcms_PixelFormat_RGB_888          >> 1: return  3;
2522         case skcms_PixelFormat_RGBA_8888        >> 1: return  4;
2523         case skcms_PixelFormat_RGBA_8888_sRGB   >> 1: return  4;
2524         case skcms_PixelFormat_RGBA_1010102     >> 1: return  4;
2525         case skcms_PixelFormat_RGB_101010x_XR   >> 1: return  4;
2526         case skcms_PixelFormat_RGB_161616LE     >> 1: return  6;
2527         case skcms_PixelFormat_RGBA_10101010_XR >> 1: return  8;
2528         case skcms_PixelFormat_RGBA_16161616LE  >> 1: return  8;
2529         case skcms_PixelFormat_RGB_161616BE     >> 1: return  6;
2530         case skcms_PixelFormat_RGBA_16161616BE  >> 1: return  8;
2531         case skcms_PixelFormat_RGB_hhh_Norm     >> 1: return  6;
2532         case skcms_PixelFormat_RGBA_hhhh_Norm   >> 1: return  8;
2533         case skcms_PixelFormat_RGB_hhh          >> 1: return  6;
2534         case skcms_PixelFormat_RGBA_hhhh        >> 1: return  8;
2535         case skcms_PixelFormat_RGB_fff          >> 1: return 12;
2536         case skcms_PixelFormat_RGBA_ffff        >> 1: return 16;
2537     }
2538     assert(false);
2539     return 0;
2540 }
2541 
prep_for_destination(const skcms_ICCProfile * profile,skcms_Matrix3x3 * fromXYZD50,skcms_TransferFunction * invR,skcms_TransferFunction * invG,skcms_TransferFunction * invB)2542 static bool prep_for_destination(const skcms_ICCProfile* profile,
2543                                  skcms_Matrix3x3* fromXYZD50,
2544                                  skcms_TransferFunction* invR,
2545                                  skcms_TransferFunction* invG,
2546                                  skcms_TransferFunction* invB) {
2547     // skcms_Transform() supports B2A destinations...
2548     if (profile->has_B2A) { return true; }
2549     // ...and destinations with parametric transfer functions and an XYZD50 gamut matrix.
2550     return profile->has_trc
2551         && profile->has_toXYZD50
2552         && profile->trc[0].table_entries == 0
2553         && profile->trc[1].table_entries == 0
2554         && profile->trc[2].table_entries == 0
2555         && skcms_TransferFunction_invert(&profile->trc[0].parametric, invR)
2556         && skcms_TransferFunction_invert(&profile->trc[1].parametric, invG)
2557         && skcms_TransferFunction_invert(&profile->trc[2].parametric, invB)
2558         && skcms_Matrix3x3_invert(&profile->toXYZD50, fromXYZD50);
2559 }
2560 
skcms_Transform(const void * src,skcms_PixelFormat srcFmt,skcms_AlphaFormat srcAlpha,const skcms_ICCProfile * srcProfile,void * dst,skcms_PixelFormat dstFmt,skcms_AlphaFormat dstAlpha,const skcms_ICCProfile * dstProfile,size_t nz)2561 bool skcms_Transform(const void*             src,
2562                      skcms_PixelFormat       srcFmt,
2563                      skcms_AlphaFormat       srcAlpha,
2564                      const skcms_ICCProfile* srcProfile,
2565                      void*                   dst,
2566                      skcms_PixelFormat       dstFmt,
2567                      skcms_AlphaFormat       dstAlpha,
2568                      const skcms_ICCProfile* dstProfile,
2569                      size_t                  nz) {
2570     const size_t dst_bpp = bytes_per_pixel(dstFmt),
2571                  src_bpp = bytes_per_pixel(srcFmt);
2572     // Let's just refuse if the request is absurdly big.
2573     if (nz * dst_bpp > INT_MAX || nz * src_bpp > INT_MAX) {
2574         return false;
2575     }
2576     int n = (int)nz;
2577 
2578     // Null profiles default to sRGB. Passing null for both is handy when doing format conversion.
2579     if (!srcProfile) {
2580         srcProfile = skcms_sRGB_profile();
2581     }
2582     if (!dstProfile) {
2583         dstProfile = skcms_sRGB_profile();
2584     }
2585 
2586     // We can't transform in place unless the PixelFormats are the same size.
2587     if (dst == src && dst_bpp != src_bpp) {
2588         return false;
2589     }
2590     // TODO: more careful alias rejection (like, dst == src + 1)?
2591 
2592     Op          program[32];
2593     const void* context[32];
2594 
2595     Op*          ops      = program;
2596     const void** contexts = context;
2597 
2598     auto add_op = [&](Op o) {
2599         *ops++ = o;
2600         *contexts++ = nullptr;
2601     };
2602 
2603     auto add_op_ctx = [&](Op o, const void* c) {
2604         *ops++ = o;
2605         *contexts++ = c;
2606     };
2607 
2608     auto add_curve_ops = [&](const skcms_Curve* curves, int numChannels) {
2609         OpAndArg oa[4];
2610         assert(numChannels <= ARRAY_COUNT(oa));
2611 
2612         int numOps = select_curve_ops(curves, numChannels, oa);
2613 
2614         for (int i = 0; i < numOps; ++i) {
2615             add_op_ctx(oa[i].op, oa[i].arg);
2616         }
2617     };
2618 
2619     // These are always parametric curves of some sort.
2620     skcms_Curve dst_curves[3];
2621     dst_curves[0].table_entries =
2622     dst_curves[1].table_entries =
2623     dst_curves[2].table_entries = 0;
2624 
2625     skcms_Matrix3x3        from_xyz;
2626 
2627     switch (srcFmt >> 1) {
2628         default: return false;
2629         case skcms_PixelFormat_A_8              >> 1: add_op(Op::load_a8);          break;
2630         case skcms_PixelFormat_G_8              >> 1: add_op(Op::load_g8);          break;
2631         case skcms_PixelFormat_GA_88            >> 1: add_op(Op::load_ga88);        break;
2632         case skcms_PixelFormat_ABGR_4444        >> 1: add_op(Op::load_4444);        break;
2633         case skcms_PixelFormat_RGB_565          >> 1: add_op(Op::load_565);         break;
2634         case skcms_PixelFormat_RGB_888          >> 1: add_op(Op::load_888);         break;
2635         case skcms_PixelFormat_RGBA_8888        >> 1: add_op(Op::load_8888);        break;
2636         case skcms_PixelFormat_RGBA_1010102     >> 1: add_op(Op::load_1010102);     break;
2637         case skcms_PixelFormat_RGB_101010x_XR   >> 1: add_op(Op::load_101010x_XR);  break;
2638         case skcms_PixelFormat_RGBA_10101010_XR >> 1: add_op(Op::load_10101010_XR); break;
2639         case skcms_PixelFormat_RGB_161616LE     >> 1: add_op(Op::load_161616LE);    break;
2640         case skcms_PixelFormat_RGBA_16161616LE  >> 1: add_op(Op::load_16161616LE);  break;
2641         case skcms_PixelFormat_RGB_161616BE     >> 1: add_op(Op::load_161616BE);    break;
2642         case skcms_PixelFormat_RGBA_16161616BE  >> 1: add_op(Op::load_16161616BE);  break;
2643         case skcms_PixelFormat_RGB_hhh_Norm     >> 1: add_op(Op::load_hhh);         break;
2644         case skcms_PixelFormat_RGBA_hhhh_Norm   >> 1: add_op(Op::load_hhhh);        break;
2645         case skcms_PixelFormat_RGB_hhh          >> 1: add_op(Op::load_hhh);         break;
2646         case skcms_PixelFormat_RGBA_hhhh        >> 1: add_op(Op::load_hhhh);        break;
2647         case skcms_PixelFormat_RGB_fff          >> 1: add_op(Op::load_fff);         break;
2648         case skcms_PixelFormat_RGBA_ffff        >> 1: add_op(Op::load_ffff);        break;
2649 
2650         case skcms_PixelFormat_RGBA_8888_sRGB >> 1:
2651             add_op(Op::load_8888);
2652             add_op_ctx(Op::tf_rgb, skcms_sRGB_TransferFunction());
2653             break;
2654     }
2655     if (srcFmt == skcms_PixelFormat_RGB_hhh_Norm ||
2656         srcFmt == skcms_PixelFormat_RGBA_hhhh_Norm) {
2657         add_op(Op::clamp);
2658     }
2659     if (srcFmt & 1) {
2660         add_op(Op::swap_rb);
2661     }
2662     skcms_ICCProfile gray_dst_profile;
2663     switch (dstFmt >> 1) {
2664         case skcms_PixelFormat_G_8:
2665         case skcms_PixelFormat_GA_88:
2666             // When transforming to gray, stop at XYZ (by setting toXYZ to identity), then transform
2667             // luminance (Y) by the destination transfer function.
2668             gray_dst_profile = *dstProfile;
2669             skcms_SetXYZD50(&gray_dst_profile, &skcms_XYZD50_profile()->toXYZD50);
2670             dstProfile = &gray_dst_profile;
2671             break;
2672         default:
2673             break;
2674     }
2675 
2676     if (srcProfile->data_color_space == skcms_Signature_CMYK) {
2677         // Photoshop creates CMYK images as inverse CMYK.
2678         // These happen to be the only ones we've _ever_ seen.
2679         add_op(Op::invert);
2680         // With CMYK, ignore the alpha type, to avoid changing K or conflating CMY with K.
2681         srcAlpha = skcms_AlphaFormat_Unpremul;
2682     }
2683 
2684     if (srcAlpha == skcms_AlphaFormat_Opaque) {
2685         add_op(Op::force_opaque);
2686     } else if (srcAlpha == skcms_AlphaFormat_PremulAsEncoded) {
2687         add_op(Op::unpremul);
2688     }
2689 
2690     if (dstProfile != srcProfile) {
2691 
2692         if (!prep_for_destination(dstProfile,
2693                                   &from_xyz,
2694                                   &dst_curves[0].parametric,
2695                                   &dst_curves[1].parametric,
2696                                   &dst_curves[2].parametric)) {
2697             return false;
2698         }
2699 
2700         if (srcProfile->has_A2B) {
2701             if (srcProfile->A2B.input_channels) {
2702                 add_curve_ops(srcProfile->A2B.input_curves,
2703                               (int)srcProfile->A2B.input_channels);
2704                 add_op(Op::clamp);
2705                 add_op_ctx(Op::clut_A2B, &srcProfile->A2B);
2706             }
2707 
2708             if (srcProfile->A2B.matrix_channels == 3) {
2709                 add_curve_ops(srcProfile->A2B.matrix_curves, /*numChannels=*/3);
2710 
2711                 static const skcms_Matrix3x4 I = {{
2712                     {1,0,0,0},
2713                     {0,1,0,0},
2714                     {0,0,1,0},
2715                 }};
2716                 if (0 != memcmp(&I, &srcProfile->A2B.matrix, sizeof(I))) {
2717                     add_op_ctx(Op::matrix_3x4, &srcProfile->A2B.matrix);
2718                 }
2719             }
2720 
2721             if (srcProfile->A2B.output_channels == 3) {
2722                 add_curve_ops(srcProfile->A2B.output_curves, /*numChannels=*/3);
2723             }
2724 
2725             if (srcProfile->pcs == skcms_Signature_Lab) {
2726                 add_op(Op::lab_to_xyz);
2727             }
2728 
2729         } else if (srcProfile->has_trc && srcProfile->has_toXYZD50) {
2730             add_curve_ops(srcProfile->trc, /*numChannels=*/3);
2731         } else {
2732             return false;
2733         }
2734 
2735         // A2B sources are in XYZD50 by now, but TRC sources are still in their original gamut.
2736         assert (srcProfile->has_A2B || srcProfile->has_toXYZD50);
2737 
2738         if (dstProfile->has_B2A) {
2739             // B2A needs its input in XYZD50, so transform TRC sources now.
2740             if (!srcProfile->has_A2B) {
2741                 add_op_ctx(Op::matrix_3x3, &srcProfile->toXYZD50);
2742             }
2743 
2744             if (dstProfile->pcs == skcms_Signature_Lab) {
2745                 add_op(Op::xyz_to_lab);
2746             }
2747 
2748             if (dstProfile->B2A.input_channels == 3) {
2749                 add_curve_ops(dstProfile->B2A.input_curves, /*numChannels=*/3);
2750             }
2751 
2752             if (dstProfile->B2A.matrix_channels == 3) {
2753                 static const skcms_Matrix3x4 I = {{
2754                     {1,0,0,0},
2755                     {0,1,0,0},
2756                     {0,0,1,0},
2757                 }};
2758                 if (0 != memcmp(&I, &dstProfile->B2A.matrix, sizeof(I))) {
2759                     add_op_ctx(Op::matrix_3x4, &dstProfile->B2A.matrix);
2760                 }
2761 
2762                 add_curve_ops(dstProfile->B2A.matrix_curves, /*numChannels=*/3);
2763             }
2764 
2765             if (dstProfile->B2A.output_channels) {
2766                 add_op(Op::clamp);
2767                 add_op_ctx(Op::clut_B2A, &dstProfile->B2A);
2768 
2769                 add_curve_ops(dstProfile->B2A.output_curves,
2770                               (int)dstProfile->B2A.output_channels);
2771             }
2772         } else {
2773             // This is a TRC destination.
2774             // We'll concat any src->xyz matrix with our xyz->dst matrix into one src->dst matrix.
2775             // (A2B sources are already in XYZD50, making that src->xyz matrix I.)
2776             static const skcms_Matrix3x3 I = {{
2777                 { 1.0f, 0.0f, 0.0f },
2778                 { 0.0f, 1.0f, 0.0f },
2779                 { 0.0f, 0.0f, 1.0f },
2780             }};
2781             const skcms_Matrix3x3* to_xyz = srcProfile->has_A2B ? &I : &srcProfile->toXYZD50;
2782 
2783             // There's a chance the source and destination gamuts are identical,
2784             // in which case we can skip the gamut transform.
2785             if (0 != memcmp(&dstProfile->toXYZD50, to_xyz, sizeof(skcms_Matrix3x3))) {
2786                 // Concat the entire gamut transform into from_xyz,
2787                 // now slightly misnamed but it's a handy spot to stash the result.
2788                 from_xyz = skcms_Matrix3x3_concat(&from_xyz, to_xyz);
2789                 add_op_ctx(Op::matrix_3x3, &from_xyz);
2790             }
2791 
2792             // Encode back to dst RGB using its parametric transfer functions.
2793             OpAndArg oa[3];
2794             int numOps = select_curve_ops(dst_curves, /*numChannels=*/3, oa);
2795             for (int index = 0; index < numOps; ++index) {
2796                 assert(oa[index].op != Op::table_r &&
2797                        oa[index].op != Op::table_g &&
2798                        oa[index].op != Op::table_b &&
2799                        oa[index].op != Op::table_a);
2800                 add_op_ctx(oa[index].op, oa[index].arg);
2801             }
2802         }
2803     }
2804 
2805     // Clamp here before premul to make sure we're clamping to normalized values _and_ gamut,
2806     // not just to values that fit in [0,1].
2807     //
2808     // E.g. r = 1.1, a = 0.5 would fit fine in fixed point after premul (ra=0.55,a=0.5),
2809     // but would be carrying r > 1, which is really unexpected for downstream consumers.
2810     if (dstFmt < skcms_PixelFormat_RGB_hhh) {
2811         add_op(Op::clamp);
2812     }
2813 
2814     if (dstProfile->data_color_space == skcms_Signature_CMYK) {
2815         // Photoshop creates CMYK images as inverse CMYK.
2816         // These happen to be the only ones we've _ever_ seen.
2817         add_op(Op::invert);
2818 
2819         // CMYK has no alpha channel, so make sure dstAlpha is a no-op.
2820         dstAlpha = skcms_AlphaFormat_Unpremul;
2821     }
2822 
2823     if (dstAlpha == skcms_AlphaFormat_Opaque) {
2824         add_op(Op::force_opaque);
2825     } else if (dstAlpha == skcms_AlphaFormat_PremulAsEncoded) {
2826         add_op(Op::premul);
2827     }
2828     if (dstFmt & 1) {
2829         add_op(Op::swap_rb);
2830     }
2831     switch (dstFmt >> 1) {
2832         default: return false;
2833         case skcms_PixelFormat_A_8             >> 1: add_op(Op::store_a8);         break;
2834         case skcms_PixelFormat_G_8             >> 1: add_op(Op::store_g8);         break;
2835         case skcms_PixelFormat_GA_88           >> 1: add_op(Op::store_ga88);       break;
2836         case skcms_PixelFormat_ABGR_4444       >> 1: add_op(Op::store_4444);       break;
2837         case skcms_PixelFormat_RGB_565         >> 1: add_op(Op::store_565);        break;
2838         case skcms_PixelFormat_RGB_888         >> 1: add_op(Op::store_888);        break;
2839         case skcms_PixelFormat_RGBA_8888       >> 1: add_op(Op::store_8888);       break;
2840         case skcms_PixelFormat_RGBA_1010102    >> 1: add_op(Op::store_1010102);    break;
2841         case skcms_PixelFormat_RGB_161616LE    >> 1: add_op(Op::store_161616LE);   break;
2842         case skcms_PixelFormat_RGBA_16161616LE >> 1: add_op(Op::store_16161616LE); break;
2843         case skcms_PixelFormat_RGB_161616BE    >> 1: add_op(Op::store_161616BE);   break;
2844         case skcms_PixelFormat_RGBA_16161616BE >> 1: add_op(Op::store_16161616BE); break;
2845         case skcms_PixelFormat_RGB_hhh_Norm    >> 1: add_op(Op::store_hhh);        break;
2846         case skcms_PixelFormat_RGBA_hhhh_Norm  >> 1: add_op(Op::store_hhhh);       break;
2847         case skcms_PixelFormat_RGB_101010x_XR  >> 1: add_op(Op::store_101010x_XR); break;
2848         case skcms_PixelFormat_RGB_hhh         >> 1: add_op(Op::store_hhh);        break;
2849         case skcms_PixelFormat_RGBA_hhhh       >> 1: add_op(Op::store_hhhh);       break;
2850         case skcms_PixelFormat_RGB_fff         >> 1: add_op(Op::store_fff);        break;
2851         case skcms_PixelFormat_RGBA_ffff       >> 1: add_op(Op::store_ffff);       break;
2852 
2853         case skcms_PixelFormat_RGBA_8888_sRGB >> 1:
2854             add_op_ctx(Op::tf_rgb, skcms_sRGB_Inverse_TransferFunction());
2855             add_op(Op::store_8888);
2856             break;
2857     }
2858 
2859     assert(ops      <= program + ARRAY_COUNT(program));
2860     assert(contexts <= context + ARRAY_COUNT(context));
2861 
2862     auto run = baseline::run_program;
2863     switch (cpu_type()) {
2864         case CpuType::SKX:
2865             #if !defined(SKCMS_DISABLE_SKX)
2866                 run = skx::run_program;
2867                 break;
2868             #endif
2869 
2870         case CpuType::HSW:
2871             #if !defined(SKCMS_DISABLE_HSW)
2872                 run = hsw::run_program;
2873                 break;
2874             #endif
2875 
2876         case CpuType::Baseline:
2877             break;
2878     }
2879 
2880     run(program, context, ops - program, (const char*)src, (char*)dst, n, src_bpp,dst_bpp);
2881     return true;
2882 }
2883 
assert_usable_as_destination(const skcms_ICCProfile * profile)2884 static void assert_usable_as_destination(const skcms_ICCProfile* profile) {
2885 #if defined(NDEBUG)
2886     (void)profile;
2887 #else
2888     skcms_Matrix3x3 fromXYZD50;
2889     skcms_TransferFunction invR, invG, invB;
2890     assert(prep_for_destination(profile, &fromXYZD50, &invR, &invG, &invB));
2891 #endif
2892 }
2893 
skcms_MakeUsableAsDestination(skcms_ICCProfile * profile)2894 bool skcms_MakeUsableAsDestination(skcms_ICCProfile* profile) {
2895     if (!profile->has_B2A) {
2896         skcms_Matrix3x3 fromXYZD50;
2897         if (!profile->has_trc || !profile->has_toXYZD50
2898             || !skcms_Matrix3x3_invert(&profile->toXYZD50, &fromXYZD50)) {
2899             return false;
2900         }
2901 
2902         skcms_TransferFunction tf[3];
2903         for (int i = 0; i < 3; i++) {
2904             skcms_TransferFunction inv;
2905             if (profile->trc[i].table_entries == 0
2906                 && skcms_TransferFunction_invert(&profile->trc[i].parametric, &inv)) {
2907                 tf[i] = profile->trc[i].parametric;
2908                 continue;
2909             }
2910 
2911             float max_error;
2912             // Parametric curves from skcms_ApproximateCurve() are guaranteed to be invertible.
2913             if (!skcms_ApproximateCurve(&profile->trc[i], &tf[i], &max_error)) {
2914                 return false;
2915             }
2916         }
2917 
2918         for (int i = 0; i < 3; ++i) {
2919             profile->trc[i].table_entries = 0;
2920             profile->trc[i].parametric = tf[i];
2921         }
2922     }
2923     assert_usable_as_destination(profile);
2924     return true;
2925 }
2926 
skcms_MakeUsableAsDestinationWithSingleCurve(skcms_ICCProfile * profile)2927 bool skcms_MakeUsableAsDestinationWithSingleCurve(skcms_ICCProfile* profile) {
2928     // Call skcms_MakeUsableAsDestination() with B2A disabled;
2929     // on success that'll return a TRC/XYZ profile with three skcms_TransferFunctions.
2930     skcms_ICCProfile result = *profile;
2931     result.has_B2A = false;
2932     if (!skcms_MakeUsableAsDestination(&result)) {
2933         return false;
2934     }
2935 
2936     // Of the three, pick the transfer function that best fits the other two.
2937     int best_tf = 0;
2938     float min_max_error = INFINITY_;
2939     for (int i = 0; i < 3; i++) {
2940         skcms_TransferFunction inv;
2941         if (!skcms_TransferFunction_invert(&result.trc[i].parametric, &inv)) {
2942             return false;
2943         }
2944 
2945         float err = 0;
2946         for (int j = 0; j < 3; ++j) {
2947             err = fmaxf_(err, skcms_MaxRoundtripError(&profile->trc[j], &inv));
2948         }
2949         if (min_max_error > err) {
2950             min_max_error = err;
2951             best_tf = i;
2952         }
2953     }
2954 
2955     for (int i = 0; i < 3; i++) {
2956         result.trc[i].parametric = result.trc[best_tf].parametric;
2957     }
2958 
2959     *profile = result;
2960     assert_usable_as_destination(profile);
2961     return true;
2962 }
2963