xref: /aosp_15_r20/external/harfbuzz_ng/src/hb-bit-page.hh (revision 2d1272b857b1f7575e6e246373e1cb218663db8a)
1 /*
2  * Copyright © 2012,2017  Google, Inc.
3  * Copyright © 2021 Behdad Esfahbod
4  *
5  *  This is part of HarfBuzz, a text shaping library.
6  *
7  * Permission is hereby granted, without written agreement and without
8  * license or royalty fees, to use, copy, modify, and distribute this
9  * software and its documentation for any purpose, provided that the
10  * above copyright notice and the following two paragraphs appear in
11  * all copies of this software.
12  *
13  * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
14  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
15  * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
16  * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
17  * DAMAGE.
18  *
19  * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
20  * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
21  * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
22  * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
23  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
24  *
25  * Google Author(s): Behdad Esfahbod
26  */
27 
28 #ifndef HB_BIT_PAGE_HH
29 #define HB_BIT_PAGE_HH
30 
31 #include "hb.hh"
32 
33 
34 /* Compiler-assisted vectorization. */
35 
36 /* Type behaving similar to vectorized vars defined using __attribute__((vector_size(...))),
37  * basically a fixed-size bitset. We can't use the compiler type because hb_vector_t cannot
38  * guarantee alignment requirements. */
39 template <typename elt_t, unsigned int byte_size>
40 struct hb_vector_size_t
41 {
operator []hb_vector_size_t42   elt_t& operator [] (unsigned int i) { return v[i]; }
operator []hb_vector_size_t43   const elt_t& operator [] (unsigned int i) const { return v[i]; }
44 
init0hb_vector_size_t45   void init0 ()
46   {
47     for (unsigned int i = 0; i < ARRAY_LENGTH (v); i++)
48       v[i] = 0;
49   }
init1hb_vector_size_t50   void init1 ()
51   {
52     for (unsigned int i = 0; i < ARRAY_LENGTH (v); i++)
53       v[i] = (elt_t) -1;
54   }
55 
56   template <typename Op>
processhb_vector_size_t57   hb_vector_size_t process (const Op& op) const
58   {
59     hb_vector_size_t r;
60     for (unsigned int i = 0; i < ARRAY_LENGTH (v); i++)
61       r.v[i] = op (v[i]);
62     return r;
63   }
64   template <typename Op>
processhb_vector_size_t65   hb_vector_size_t process (const Op& op, const hb_vector_size_t &o) const
66   {
67     hb_vector_size_t r;
68     for (unsigned int i = 0; i < ARRAY_LENGTH (v); i++)
69       r.v[i] = op (v[i], o.v[i]);
70     return r;
71   }
operator |hb_vector_size_t72   hb_vector_size_t operator | (const hb_vector_size_t &o) const
73   { return process (hb_bitwise_or, o); }
operator &hb_vector_size_t74   hb_vector_size_t operator & (const hb_vector_size_t &o) const
75   { return process (hb_bitwise_and, o); }
operator ^hb_vector_size_t76   hb_vector_size_t operator ^ (const hb_vector_size_t &o) const
77   { return process (hb_bitwise_xor, o); }
operator ~hb_vector_size_t78   hb_vector_size_t operator ~ () const
79   { return process (hb_bitwise_neg); }
80 
iterhb_vector_size_t81   hb_array_t<const elt_t> iter () const
82   { return hb_array (v); }
83 
84   private:
85   static_assert (0 == byte_size % sizeof (elt_t), "");
86   elt_t v[byte_size / sizeof (elt_t)];
87 };
88 
89 
90 struct hb_bit_page_t
91 {
init0hb_bit_page_t92   void init0 () { v.init0 (); population = 0; }
init1hb_bit_page_t93   void init1 () { v.init1 (); population = PAGE_BITS; }
94 
dirtyhb_bit_page_t95   void dirty () { population = UINT_MAX; }
96 
lenhb_bit_page_t97   static inline constexpr unsigned len ()
98   { return ARRAY_LENGTH_CONST (v); }
99 
operator boolhb_bit_page_t100   operator bool () const { return !is_empty (); }
is_emptyhb_bit_page_t101   bool is_empty () const
102   {
103     if (has_population ()) return !population;
104     return
105     + hb_iter (v)
106     | hb_none
107     ;
108   }
hashhb_bit_page_t109   uint32_t hash () const
110   {
111     return hb_bytes_t ((const char *) &v, sizeof (v)).hash ();
112   }
113 
addhb_bit_page_t114   void add (hb_codepoint_t g) { elt (g) |= mask (g); dirty (); }
delhb_bit_page_t115   void del (hb_codepoint_t g) { elt (g) &= ~mask (g); dirty (); }
sethb_bit_page_t116   void set (hb_codepoint_t g, bool value) { if (value) add (g); else del (g); }
gethb_bit_page_t117   bool get (hb_codepoint_t g) const { return elt (g) & mask (g); }
118 
add_rangehb_bit_page_t119   void add_range (hb_codepoint_t a, hb_codepoint_t b)
120   {
121     elt_t *la = &elt (a);
122     elt_t *lb = &elt (b);
123     if (la == lb)
124       *la |= (mask (b) << 1) - mask(a);
125     else
126     {
127       *la |= ~(mask (a) - 1llu);
128       la++;
129 
130       hb_memset (la, 0xff, (char *) lb - (char *) la);
131 
132       *lb |= ((mask (b) << 1) - 1llu);
133     }
134     dirty ();
135   }
del_rangehb_bit_page_t136   void del_range (hb_codepoint_t a, hb_codepoint_t b)
137   {
138     elt_t *la = &elt (a);
139     elt_t *lb = &elt (b);
140     if (la == lb)
141       *la &= ~((mask (b) << 1llu) - mask(a));
142     else
143     {
144       *la &= mask (a) - 1;
145       la++;
146 
147       hb_memset (la, 0, (char *) lb - (char *) la);
148 
149       *lb &= ~((mask (b) << 1) - 1llu);
150     }
151     dirty ();
152   }
set_rangehb_bit_page_t153   void set_range (hb_codepoint_t a, hb_codepoint_t b, bool v)
154   { if (v) add_range (a, b); else del_range (a, b); }
155 
156 
157   // Writes out page values to the array p. Returns the number of values
158   // written. At most size codepoints will be written.
writehb_bit_page_t159   unsigned int write (uint32_t        base,
160 		      unsigned int    start_value,
161 		      hb_codepoint_t *p,
162 		      unsigned int    size) const
163   {
164     unsigned int start_v = start_value / ELT_BITS;
165     unsigned int start_bit = start_value & ELT_MASK;
166     unsigned int count = 0;
167     for (unsigned i = start_v; i < len () && count < size; i++)
168     {
169       elt_t bits = v[i];
170       uint32_t v_base = base | (i * ELT_BITS);
171       for (unsigned int j = start_bit; j < ELT_BITS && count < size; j++)
172       {
173 	if ((elt_t(1) << j) & bits) {
174 	  *p++ = v_base | j;
175 	  count++;
176 	}
177       }
178       start_bit = 0;
179     }
180     return count;
181   }
182 
183   // Writes out the values NOT in this page to the array p. Returns the
184   // number of values written. At most size codepoints will be written.
185   // Returns the number of codepoints written. next_value holds the next value
186   // that should be written (if not present in this page). This is used to fill
187   // any missing value gaps between this page and the previous page, if any.
188   // next_value is updated to one more than the last value present in this page.
write_invertedhb_bit_page_t189   unsigned int write_inverted (uint32_t        base,
190 			       unsigned int    start_value,
191 			       hb_codepoint_t *p,
192 			       unsigned int    size,
193 			       hb_codepoint_t *next_value) const
194   {
195     unsigned int start_v = start_value / ELT_BITS;
196     unsigned int start_bit = start_value & ELT_MASK;
197     unsigned int count = 0;
198     for (unsigned i = start_v; i < len () && count < size; i++)
199     {
200       elt_t bits = v[i];
201       uint32_t v_offset = i * ELT_BITS;
202       for (unsigned int j = start_bit; j < ELT_BITS && count < size; j++)
203       {
204 	if ((elt_t(1) << j) & bits)
205 	{
206 	  hb_codepoint_t value = base | v_offset | j;
207 	  // Emit all the missing values from next_value up to value - 1.
208 	  for (hb_codepoint_t k = *next_value; k < value && count < size; k++)
209 	  {
210 	    *p++ = k;
211 	    count++;
212 	  }
213 	  // Skip over this value;
214 	  *next_value = value + 1;
215 	}
216       }
217       start_bit = 0;
218     }
219     return count;
220   }
221 
operator ==hb_bit_page_t222   bool operator == (const hb_bit_page_t &other) const { return is_equal (other); }
is_equalhb_bit_page_t223   bool is_equal (const hb_bit_page_t &other) const
224   {
225     for (unsigned i = 0; i < len (); i++)
226       if (v[i] != other.v[i])
227 	return false;
228     return true;
229   }
operator <=hb_bit_page_t230   bool operator <= (const hb_bit_page_t &larger_page) const { return is_subset (larger_page); }
is_subsethb_bit_page_t231   bool is_subset (const hb_bit_page_t &larger_page) const
232   {
233     if (has_population () && larger_page.has_population () &&
234 	population > larger_page.population)
235       return false;
236 
237     for (unsigned i = 0; i < len (); i++)
238       if (~larger_page.v[i] & v[i])
239 	return false;
240     return true;
241   }
242 
has_populationhb_bit_page_t243   bool has_population () const { return population != UINT_MAX; }
get_populationhb_bit_page_t244   unsigned int get_population () const
245   {
246     if (has_population ()) return population;
247     population =
248     + hb_iter (v)
249     | hb_reduce ([] (unsigned pop, const elt_t &_) { return pop + hb_popcount (_); }, 0u)
250     ;
251     return population;
252   }
253 
nexthb_bit_page_t254   bool next (hb_codepoint_t *codepoint) const
255   {
256     unsigned int m = (*codepoint + 1) & MASK;
257     if (!m)
258     {
259       *codepoint = INVALID;
260       return false;
261     }
262     unsigned int i = m / ELT_BITS;
263     unsigned int j = m & ELT_MASK;
264 
265     const elt_t vv = v[i] & ~((elt_t (1) << j) - 1);
266     for (const elt_t *p = &vv; i < len (); p = &v[++i])
267       if (*p)
268       {
269 	*codepoint = i * ELT_BITS + elt_get_min (*p);
270 	return true;
271       }
272 
273     *codepoint = INVALID;
274     return false;
275   }
previoushb_bit_page_t276   bool previous (hb_codepoint_t *codepoint) const
277   {
278     unsigned int m = (*codepoint - 1) & MASK;
279     if (m == MASK)
280     {
281       *codepoint = INVALID;
282       return false;
283     }
284     unsigned int i = m / ELT_BITS;
285     unsigned int j = m & ELT_MASK;
286 
287     /* Fancy mask to avoid shifting by elt_t bitsize, which is undefined. */
288     const elt_t mask = j < 8 * sizeof (elt_t) - 1 ?
289 		       ((elt_t (1) << (j + 1)) - 1) :
290 		       (elt_t) -1;
291     const elt_t vv = v[i] & mask;
292     const elt_t *p = &vv;
293     while (true)
294     {
295       if (*p)
296       {
297 	*codepoint = i * ELT_BITS + elt_get_max (*p);
298 	return true;
299       }
300       if ((int) i <= 0) break;
301       p = &v[--i];
302     }
303 
304     *codepoint = INVALID;
305     return false;
306   }
get_minhb_bit_page_t307   hb_codepoint_t get_min () const
308   {
309     for (unsigned int i = 0; i < len (); i++)
310       if (v[i])
311 	return i * ELT_BITS + elt_get_min (v[i]);
312     return INVALID;
313   }
get_maxhb_bit_page_t314   hb_codepoint_t get_max () const
315   {
316     for (int i = len () - 1; i >= 0; i--)
317       if (v[i])
318 	return i * ELT_BITS + elt_get_max (v[i]);
319     return 0;
320   }
321 
322   static constexpr hb_codepoint_t INVALID = HB_SET_VALUE_INVALID;
323 
324   typedef unsigned long long elt_t;
325   static constexpr unsigned PAGE_BITS_LOG_2 = 9; // 512 bits
326   static constexpr unsigned PAGE_BITS = 1 << PAGE_BITS_LOG_2;
327   static_assert (1 << PAGE_BITS_LOG_2 == PAGE_BITS, "");
328   static_assert ((PAGE_BITS & ((PAGE_BITS) - 1)) == 0, "");
329   static constexpr unsigned PAGE_BITMASK = PAGE_BITS - 1;
330 
elt_get_minhb_bit_page_t331   static unsigned int elt_get_min (const elt_t &elt) { return hb_ctz (elt); }
elt_get_maxhb_bit_page_t332   static unsigned int elt_get_max (const elt_t &elt) { return hb_bit_storage (elt) - 1; }
333 
334   typedef hb_vector_size_t<elt_t, PAGE_BITS / 8> vector_t;
335 
336   static constexpr unsigned ELT_BITS = sizeof (elt_t) * 8;
337   static constexpr unsigned ELT_MASK = ELT_BITS - 1;
338 
339   static constexpr unsigned BITS = sizeof (vector_t) * 8;
340   static constexpr unsigned MASK = BITS - 1;
341   static_assert ((unsigned) PAGE_BITS == (unsigned) BITS, "");
342 
elthb_bit_page_t343   elt_t &elt (hb_codepoint_t g) { return v[(g & MASK) / ELT_BITS]; }
elthb_bit_page_t344   const elt_t& elt (hb_codepoint_t g) const { return v[(g & MASK) / ELT_BITS]; }
maskhb_bit_page_t345   static constexpr elt_t mask (hb_codepoint_t g) { return elt_t (1) << (g & ELT_MASK); }
346 
347   mutable unsigned population;
348   vector_t v;
349 };
350 
351 
352 #endif /* HB_BIT_PAGE_HH */
353