xref: /aosp_15_r20/external/harfbuzz_ng/src/hb-subset-cff-common.hh (revision 2d1272b857b1f7575e6e246373e1cb218663db8a)
1 /*
2  * Copyright © 2018 Adobe Inc.
3  *
4  *  This is part of HarfBuzz, a text shaping library.
5  *
6  * Permission is hereby granted, without written agreement and without
7  * license or royalty fees, to use, copy, modify, and distribute this
8  * software and its documentation for any purpose, provided that the
9  * above copyright notice and the following two paragraphs appear in
10  * all copies of this software.
11  *
12  * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
13  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
14  * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
15  * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
16  * DAMAGE.
17  *
18  * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
19  * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
20  * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
21  * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
22  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
23  *
24  * Adobe Author(s): Michiharu Ariza
25  */
26 
27 #ifndef HB_SUBSET_CFF_COMMON_HH
28 #define HB_SUBSET_CFF_COMMON_HH
29 
30 #include "hb.hh"
31 
32 #include "hb-subset-plan.hh"
33 #include "hb-cff-interp-cs-common.hh"
34 
35 namespace CFF {
36 
37 /* Used for writing a temporary charstring */
38 struct str_encoder_t
39 {
str_encoder_tCFF::str_encoder_t40   str_encoder_t (str_buff_t &buff_)
41     : buff (buff_) {}
42 
resetCFF::str_encoder_t43   void reset () { buff.reset (); }
44 
encode_byteCFF::str_encoder_t45   void encode_byte (unsigned char b)
46   {
47     if (likely ((signed) buff.length < buff.allocated))
48       buff.arrayZ[buff.length++] = b;
49     else
50       buff.push (b);
51   }
52 
encode_intCFF::str_encoder_t53   void encode_int (int v)
54   {
55     if ((-1131 <= v) && (v <= 1131))
56     {
57       if ((-107 <= v) && (v <= 107))
58 	encode_byte (v + 139);
59       else if (v > 0)
60       {
61 	v -= 108;
62 	encode_byte ((v >> 8) + OpCode_TwoBytePosInt0);
63 	encode_byte (v & 0xFF);
64       }
65       else
66       {
67 	v = -v - 108;
68 	encode_byte ((v >> 8) + OpCode_TwoByteNegInt0);
69 	encode_byte (v & 0xFF);
70       }
71     }
72     else
73     {
74       if (unlikely (v < -32768))
75 	v = -32768;
76       else if (unlikely (v > 32767))
77 	v = 32767;
78       encode_byte (OpCode_shortint);
79       encode_byte ((v >> 8) & 0xFF);
80       encode_byte (v & 0xFF);
81     }
82   }
83 
84   // Encode number for CharString
encode_num_csCFF::str_encoder_t85   void encode_num_cs (const number_t& n)
86   {
87     if (n.in_int_range ())
88     {
89       encode_int (n.to_int ());
90     }
91     else
92     {
93       int32_t v = n.to_fixed ();
94       encode_byte (OpCode_fixedcs);
95       encode_byte ((v >> 24) & 0xFF);
96       encode_byte ((v >> 16) & 0xFF);
97       encode_byte ((v >> 8) & 0xFF);
98       encode_byte (v & 0xFF);
99     }
100   }
101 
102   // Encode number for TopDict / Private
encode_num_tpCFF::str_encoder_t103   void encode_num_tp (const number_t& n)
104   {
105     if (n.in_int_range ())
106     {
107       // TODO longint
108       encode_int (n.to_int ());
109     }
110     else
111     {
112       // Sigh. BCD
113       // https://learn.microsoft.com/en-us/typography/opentype/spec/cff2#table-5-nibble-definitions
114       double v = n.to_real ();
115       encode_byte (OpCode_BCD);
116 
117       // Based on:
118       // https://github.com/fonttools/fonttools/blob/0738c41dfbcbc213ab9263f486ef0cccc6eb5ce5/Lib/fontTools/misc/psCharStrings.py#L267-L316
119 
120       char buf[16];
121       /* FontTools has the following comment:
122        *
123        * # Note: 14 decimal digits seems to be the limitation for CFF real numbers
124        * # in macOS. However, we use 8 here to match the implementation of AFDKO.
125        *
126        * We use 8 here to match FontTools X-).
127        */
128 
129       hb_locale_t clocale HB_UNUSED;
130       hb_locale_t oldlocale HB_UNUSED;
131       oldlocale = hb_uselocale (clocale = newlocale (LC_ALL_MASK, "C", NULL));
132       snprintf (buf, sizeof (buf), "%.8G", v);
133       (void) hb_uselocale (((void) freelocale (clocale), oldlocale));
134 
135       char *s = buf;
136       size_t len;
137       char *comma = strchr (s, ',');
138       if (comma) // Comma for some European locales in case no uselocale available.
139 	*comma = '.';
140       if (s[0] == '0' && s[1] == '.')
141 	s++;
142       else if (s[0] == '-' && s[1] == '0' && s[2] == '.')
143       {
144 	s[1] = '-';
145 	s++;
146       }
147       else if ((len = strlen (s)) > 3 && !strcmp (s + len - 3, "000"))
148       {
149 	unsigned exponent = len - 3;
150 	char *s2 = s + exponent - 1;
151 	while (*s2 == '0' && exponent > 1)
152 	{
153 	  s2--;
154 	  exponent++;
155 	}
156 	snprintf (s2 + 1, sizeof (buf) - (s2 + 1 - buf), "E%u", exponent);
157       }
158       else
159       {
160 	char *dot = strchr (s, '.');
161 	char *e = strchr (s, 'E');
162 	if (dot && e)
163 	{
164 	  memmove (dot, dot + 1, e - (dot + 1));
165 	  int exponent = atoi (e + 1);
166 	  int new_exponent = exponent - (e - (dot + 1));
167 	  if (new_exponent == 1)
168 	  {
169 	    e[-1] = '0';
170 	    e[0] = '\0';
171 	  }
172 	  else
173 	    snprintf (e - 1, sizeof (buf) - (e - 1 - buf), "E%d", new_exponent);
174 	}
175       }
176       if ((s[0] == '.' && s[1] == '0') || (s[0] == '-' && s[1] == '.' && s[2] == '0'))
177       {
178 	int sign = s[0] == '-';
179 	char *s2 = s + sign + 1;
180 	while (*s2 == '0')
181 	  s2++;
182 	len = strlen (s2);
183 	memmove (s + sign, s2, len);
184 	snprintf (s + sign + len, sizeof (buf) - (s + sign + len - buf), "E-%u", (unsigned) (strlen (s + sign) - 1));
185       }
186       hb_vector_t<char> nibbles;
187       while (*s)
188       {
189 	char c = s[0];
190 	s++;
191 
192 	switch (c)
193 	{
194 	  case 'E':
195 	  {
196 	    char c2 = *s;
197 	    if (c2 == '-')
198 	    {
199 	      s++;
200 	      nibbles.push (0x0C); // E-
201 	    } else {
202 	      if (c2 == '+')
203 		s++;
204 	      nibbles.push (0x0B); // E
205 	    }
206 	    if (*s == '0')
207 	      s++;
208 	    continue;
209 	  }
210 
211 	  case '.':
212 	    nibbles.push (0x0A); // .
213 	    continue;
214 
215 	  case '-':
216 	    nibbles.push (0x0E); // -
217 	    continue;
218 	}
219 
220 	nibbles.push (c - '0');
221       }
222       nibbles.push (0x0F);
223       if (nibbles.length % 2)
224 	nibbles.push (0x0F);
225 
226       unsigned count = nibbles.length;
227       for (unsigned i = 0; i < count; i += 2)
228         encode_byte ((nibbles[i] << 4) | nibbles[i+1]);
229     }
230   }
231 
encode_opCFF::str_encoder_t232   void encode_op (op_code_t op)
233   {
234     if (Is_OpCode_ESC (op))
235     {
236       encode_byte (OpCode_escape);
237       encode_byte (Unmake_OpCode_ESC (op));
238     }
239     else
240       encode_byte (op);
241   }
242 
copy_strCFF::str_encoder_t243   void copy_str (const unsigned char *str, unsigned length)
244   {
245     assert ((signed) (buff.length + length) <= buff.allocated);
246     hb_memcpy (buff.arrayZ + buff.length, str, length);
247     buff.length += length;
248   }
249 
in_errorCFF::str_encoder_t250   bool in_error () const { return buff.in_error (); }
251 
252   protected:
253 
254   str_buff_t &buff;
255 };
256 
257 struct cff_sub_table_info_t {
cff_sub_table_info_tCFF::cff_sub_table_info_t258   cff_sub_table_info_t ()
259     : fd_array_link (0),
260       char_strings_link (0)
261   {
262     fd_select.init ();
263   }
264 
265   table_info_t     fd_select;
266   objidx_t     	   fd_array_link;
267   objidx_t     	   char_strings_link;
268 };
269 
270 template <typename OPSTR=op_str_t>
271 struct cff_top_dict_op_serializer_t : op_serializer_t
272 {
serializeCFF::cff_top_dict_op_serializer_t273   bool serialize (hb_serialize_context_t *c,
274 		  const OPSTR &opstr,
275 		  const cff_sub_table_info_t &info) const
276   {
277     TRACE_SERIALIZE (this);
278 
279     switch (opstr.op)
280     {
281       case OpCode_CharStrings:
282 	return_trace (FontDict::serialize_link4_op(c, opstr.op, info.char_strings_link, whence_t::Absolute));
283 
284       case OpCode_FDArray:
285 	return_trace (FontDict::serialize_link4_op(c, opstr.op, info.fd_array_link, whence_t::Absolute));
286 
287       case OpCode_FDSelect:
288 	return_trace (FontDict::serialize_link4_op(c, opstr.op, info.fd_select.link, whence_t::Absolute));
289 
290       default:
291 	return_trace (copy_opstr (c, opstr));
292     }
293     return_trace (true);
294   }
295 };
296 
297 struct cff_font_dict_op_serializer_t : op_serializer_t
298 {
serializeCFF::cff_font_dict_op_serializer_t299   bool serialize (hb_serialize_context_t *c,
300 		  const op_str_t &opstr,
301 		  const table_info_t &privateDictInfo) const
302   {
303     TRACE_SERIALIZE (this);
304 
305     if (opstr.op == OpCode_Private)
306     {
307       /* serialize the private dict size & offset as 2-byte & 4-byte integers */
308       return_trace (UnsizedByteStr::serialize_int2 (c, privateDictInfo.size) &&
309 		    Dict::serialize_link4_op (c, opstr.op, privateDictInfo.link, whence_t::Absolute));
310     }
311     else
312     {
313       unsigned char *d = c->allocate_size<unsigned char> (opstr.length);
314       if (unlikely (!d)) return_trace (false);
315       /* Faster than hb_memcpy for small strings. */
316       for (unsigned i = 0; i < opstr.length; i++)
317 	d[i] = opstr.ptr[i];
318       //hb_memcpy (d, opstr.ptr, opstr.length);
319     }
320     return_trace (true);
321   }
322 };
323 
324 struct flatten_param_t
325 {
326   str_buff_t     &flatStr;
327   bool	drop_hints;
328   const hb_subset_plan_t *plan;
329 };
330 
331 template <typename ACC, typename ENV, typename OPSET, op_code_t endchar_op=OpCode_Invalid>
332 struct subr_flattener_t
333 {
subr_flattener_tCFF::subr_flattener_t334   subr_flattener_t (const ACC &acc_,
335 		    const hb_subset_plan_t *plan_)
336 		   : acc (acc_), plan (plan_) {}
337 
flattenCFF::subr_flattener_t338   bool flatten (str_buff_vec_t &flat_charstrings)
339   {
340     unsigned count = plan->num_output_glyphs ();
341     if (!flat_charstrings.resize_exact (count))
342       return false;
343     for (unsigned int i = 0; i < count; i++)
344     {
345       hb_codepoint_t  glyph;
346       if (!plan->old_gid_for_new_gid (i, &glyph))
347       {
348 	/* add an endchar only charstring for a missing glyph if CFF1 */
349 	if (endchar_op != OpCode_Invalid) flat_charstrings[i].push (endchar_op);
350 	continue;
351       }
352       const hb_ubytes_t str = (*acc.charStrings)[glyph];
353       unsigned int fd = acc.fdSelect->get_fd (glyph);
354       if (unlikely (fd >= acc.fdCount))
355 	return false;
356 
357 
358       ENV env (str, acc, fd,
359 	       plan->normalized_coords.arrayZ, plan->normalized_coords.length);
360       cs_interpreter_t<ENV, OPSET, flatten_param_t> interp (env);
361       flatten_param_t  param = {
362         flat_charstrings.arrayZ[i],
363         (bool) (plan->flags & HB_SUBSET_FLAGS_NO_HINTING),
364 	plan
365       };
366       if (unlikely (!interp.interpret (param)))
367 	return false;
368     }
369     return true;
370   }
371 
372   const ACC &acc;
373   const hb_subset_plan_t *plan;
374 };
375 
376 struct subr_closures_t
377 {
subr_closures_tCFF::subr_closures_t378   subr_closures_t (unsigned int fd_count) : global_closure (), local_closures ()
379   {
380     local_closures.resize_exact (fd_count);
381   }
382 
resetCFF::subr_closures_t383   void reset ()
384   {
385     global_closure.clear();
386     for (unsigned int i = 0; i < local_closures.length; i++)
387       local_closures[i].clear();
388   }
389 
in_errorCFF::subr_closures_t390   bool in_error () const { return local_closures.in_error (); }
391   hb_set_t  global_closure;
392   hb_vector_t<hb_set_t> local_closures;
393 };
394 
395 struct parsed_cs_op_t : op_str_t
396 {
parsed_cs_op_tCFF::parsed_cs_op_t397   parsed_cs_op_t (unsigned int subr_num_ = 0) :
398     subr_num (subr_num_) {}
399 
is_hintingCFF::parsed_cs_op_t400   bool is_hinting () const { return hinting_flag; }
set_hintingCFF::parsed_cs_op_t401   void set_hinting ()       { hinting_flag = true; }
402 
403   /* The layout of this struct is designed to fit within the
404    * padding of op_str_t! */
405 
406   protected:
407   bool	  hinting_flag = false;
408 
409   public:
410   uint16_t subr_num;
411 };
412 
413 struct parsed_cs_str_t : parsed_values_t<parsed_cs_op_t>
414 {
parsed_cs_str_tCFF::parsed_cs_str_t415   parsed_cs_str_t () :
416     parsed (false),
417     hint_dropped (false),
418     has_prefix_ (false),
419     has_calls_ (false)
420   {
421     SUPER::init ();
422   }
423 
add_opCFF::parsed_cs_str_t424   void add_op (op_code_t op, const byte_str_ref_t& str_ref)
425   {
426     if (!is_parsed ())
427       SUPER::add_op (op, str_ref);
428   }
429 
add_call_opCFF::parsed_cs_str_t430   void add_call_op (op_code_t op, const byte_str_ref_t& str_ref, unsigned int subr_num)
431   {
432     if (!is_parsed ())
433     {
434       has_calls_ = true;
435 
436       /* Pop the subroutine number. */
437       values.pop ();
438 
439       SUPER::add_op (op, str_ref, {subr_num});
440     }
441   }
442 
set_prefixCFF::parsed_cs_str_t443   void set_prefix (const number_t &num, op_code_t op = OpCode_Invalid)
444   {
445     has_prefix_ = true;
446     prefix_op_ = op;
447     prefix_num_ = num;
448   }
449 
at_endCFF::parsed_cs_str_t450   bool at_end (unsigned int pos) const
451   {
452     return ((pos + 1 >= values.length) /* CFF2 */
453 	|| (values[pos + 1].op == OpCode_return));
454   }
455 
is_parsedCFF::parsed_cs_str_t456   bool is_parsed () const { return parsed; }
set_parsedCFF::parsed_cs_str_t457   void set_parsed ()      { parsed = true; }
458 
is_hint_droppedCFF::parsed_cs_str_t459   bool is_hint_dropped () const { return hint_dropped; }
set_hint_droppedCFF::parsed_cs_str_t460   void set_hint_dropped ()      { hint_dropped = true; }
461 
is_vsindex_droppedCFF::parsed_cs_str_t462   bool is_vsindex_dropped () const { return vsindex_dropped; }
set_vsindex_droppedCFF::parsed_cs_str_t463   void set_vsindex_dropped ()      { vsindex_dropped = true; }
464 
has_prefixCFF::parsed_cs_str_t465   bool has_prefix () const          { return has_prefix_; }
prefix_opCFF::parsed_cs_str_t466   op_code_t prefix_op () const         { return prefix_op_; }
prefix_numCFF::parsed_cs_str_t467   const number_t &prefix_num () const { return prefix_num_; }
468 
has_callsCFF::parsed_cs_str_t469   bool has_calls () const          { return has_calls_; }
470 
compactCFF::parsed_cs_str_t471   void compact ()
472   {
473     unsigned count = values.length;
474     if (!count) return;
475     auto &opstr = values.arrayZ;
476     unsigned j = 0;
477     for (unsigned i = 1; i < count; i++)
478     {
479       /* See if we can combine op j and op i. */
480       bool combine =
481         (opstr[j].op != OpCode_callsubr && opstr[j].op != OpCode_callgsubr) &&
482         (opstr[i].op != OpCode_callsubr && opstr[i].op != OpCode_callgsubr) &&
483         (opstr[j].is_hinting () == opstr[i].is_hinting ()) &&
484         (opstr[j].ptr + opstr[j].length == opstr[i].ptr) &&
485         (opstr[j].length + opstr[i].length <= 255);
486 
487       if (combine)
488       {
489 	opstr[j].length += opstr[i].length;
490 	opstr[j].op = OpCode_Invalid;
491       }
492       else
493       {
494 	opstr[++j] = opstr[i];
495       }
496     }
497     values.shrink (j + 1);
498   }
499 
500   protected:
501   bool    parsed : 1;
502   bool    hint_dropped : 1;
503   bool    vsindex_dropped : 1;
504   bool    has_prefix_ : 1;
505   bool    has_calls_ : 1;
506   op_code_t	prefix_op_;
507   number_t	prefix_num_;
508 
509   private:
510   typedef parsed_values_t<parsed_cs_op_t> SUPER;
511 };
512 
513 struct parsed_cs_str_vec_t : hb_vector_t<parsed_cs_str_t>
514 {
515   private:
516   typedef hb_vector_t<parsed_cs_str_t> SUPER;
517 };
518 
519 struct cff_subset_accelerator_t
520 {
createCFF::cff_subset_accelerator_t521   static cff_subset_accelerator_t* create (
522       hb_blob_t* original_blob,
523       const parsed_cs_str_vec_t& parsed_charstrings,
524       const parsed_cs_str_vec_t& parsed_global_subrs,
525       const hb_vector_t<parsed_cs_str_vec_t>& parsed_local_subrs) {
526     cff_subset_accelerator_t* accel =
527         (cff_subset_accelerator_t*) hb_malloc (sizeof(cff_subset_accelerator_t));
528     if (unlikely (!accel)) return nullptr;
529     new (accel) cff_subset_accelerator_t (original_blob,
530                                           parsed_charstrings,
531                                           parsed_global_subrs,
532                                           parsed_local_subrs);
533     return accel;
534   }
535 
destroyCFF::cff_subset_accelerator_t536   static void destroy (void* value) {
537     if (!value) return;
538 
539     cff_subset_accelerator_t* accel = (cff_subset_accelerator_t*) value;
540     accel->~cff_subset_accelerator_t ();
541     hb_free (accel);
542   }
543 
cff_subset_accelerator_tCFF::cff_subset_accelerator_t544   cff_subset_accelerator_t(
545       hb_blob_t* original_blob_,
546       const parsed_cs_str_vec_t& parsed_charstrings_,
547       const parsed_cs_str_vec_t& parsed_global_subrs_,
548       const hb_vector_t<parsed_cs_str_vec_t>& parsed_local_subrs_)
549   {
550     parsed_charstrings = parsed_charstrings_;
551     parsed_global_subrs = parsed_global_subrs_;
552     parsed_local_subrs = parsed_local_subrs_;
553 
554     // the parsed charstrings point to memory in the original CFF table so we must hold a reference
555     // to it to keep the memory valid.
556     original_blob = hb_blob_reference (original_blob_);
557   }
558 
~cff_subset_accelerator_tCFF::cff_subset_accelerator_t559   ~cff_subset_accelerator_t()
560   {
561     hb_blob_destroy (original_blob);
562     auto *mapping = glyph_to_sid_map.get_relaxed ();
563     if (mapping)
564     {
565       mapping->~glyph_to_sid_map_t ();
566       hb_free (mapping);
567     }
568   }
569 
570   parsed_cs_str_vec_t parsed_charstrings;
571   parsed_cs_str_vec_t parsed_global_subrs;
572   hb_vector_t<parsed_cs_str_vec_t> parsed_local_subrs;
573   mutable hb_atomic_ptr_t<glyph_to_sid_map_t> glyph_to_sid_map;
574 
575  private:
576   hb_blob_t* original_blob;
577 };
578 
579 struct subr_subset_param_t
580 {
subr_subset_param_tCFF::subr_subset_param_t581   subr_subset_param_t (parsed_cs_str_t *parsed_charstring_,
582 		       parsed_cs_str_vec_t *parsed_global_subrs_,
583 		       parsed_cs_str_vec_t *parsed_local_subrs_,
584 		       hb_set_t *global_closure_,
585 		       hb_set_t *local_closure_,
586 		       bool drop_hints_) :
587       current_parsed_str (parsed_charstring_),
588       parsed_charstring (parsed_charstring_),
589       parsed_global_subrs (parsed_global_subrs_),
590       parsed_local_subrs (parsed_local_subrs_),
591       global_closure (global_closure_),
592       local_closure (local_closure_),
593       drop_hints (drop_hints_) {}
594 
get_parsed_str_for_contextCFF::subr_subset_param_t595   parsed_cs_str_t *get_parsed_str_for_context (call_context_t &context)
596   {
597     switch (context.type)
598     {
599       case CSType_CharString:
600 	return parsed_charstring;
601 
602       case CSType_LocalSubr:
603 	if (likely (context.subr_num < parsed_local_subrs->length))
604 	  return &(*parsed_local_subrs)[context.subr_num];
605 	break;
606 
607       case CSType_GlobalSubr:
608 	if (likely (context.subr_num < parsed_global_subrs->length))
609 	  return &(*parsed_global_subrs)[context.subr_num];
610 	break;
611     }
612     return nullptr;
613   }
614 
615   template <typename ENV>
set_current_strCFF::subr_subset_param_t616   void set_current_str (ENV &env, bool calling)
617   {
618     parsed_cs_str_t *parsed_str = get_parsed_str_for_context (env.context);
619     if (unlikely (!parsed_str))
620     {
621       env.set_error ();
622       return;
623     }
624     /* If the called subroutine is parsed partially but not completely yet,
625      * it must be because we are calling it recursively.
626      * Handle it as an error. */
627     if (unlikely (calling && !parsed_str->is_parsed () && (parsed_str->values.length > 0)))
628       env.set_error ();
629     else
630     {
631       if (!parsed_str->is_parsed ())
632         parsed_str->alloc (env.str_ref.total_size ());
633       current_parsed_str = parsed_str;
634     }
635   }
636 
637   parsed_cs_str_t	*current_parsed_str;
638 
639   parsed_cs_str_t	*parsed_charstring;
640   parsed_cs_str_vec_t	*parsed_global_subrs;
641   parsed_cs_str_vec_t	*parsed_local_subrs;
642   hb_set_t      *global_closure;
643   hb_set_t      *local_closure;
644   bool	  drop_hints;
645 };
646 
647 struct subr_remap_t : hb_inc_bimap_t
648 {
createCFF::subr_remap_t649   void create (const hb_set_t *closure)
650   {
651     /* create a remapping of subroutine numbers from old to new.
652      * no optimization based on usage counts. fonttools doesn't appear doing that either.
653      */
654 
655     alloc (closure->get_population ());
656     for (auto old_num : *closure)
657       add (old_num);
658 
659     if (get_population () < 1240)
660       bias = 107;
661     else if (get_population () < 33900)
662       bias = 1131;
663     else
664       bias = 32768;
665   }
666 
biased_numCFF::subr_remap_t667   int biased_num (unsigned int old_num) const
668   {
669     hb_codepoint_t new_num = get (old_num);
670     return (int)new_num - bias;
671   }
672 
673   protected:
674   int bias;
675 };
676 
677 struct subr_remaps_t
678 {
subr_remaps_tCFF::subr_remaps_t679   subr_remaps_t (unsigned int fdCount)
680   {
681     local_remaps.resize (fdCount);
682   }
683 
in_errorCFF::subr_remaps_t684   bool in_error()
685   {
686     return local_remaps.in_error ();
687   }
688 
createCFF::subr_remaps_t689   void create (subr_closures_t& closures)
690   {
691     global_remap.create (&closures.global_closure);
692     for (unsigned int i = 0; i < local_remaps.length; i++)
693       local_remaps.arrayZ[i].create (&closures.local_closures[i]);
694   }
695 
696   subr_remap_t	       global_remap;
697   hb_vector_t<subr_remap_t>  local_remaps;
698 };
699 
700 template <typename SUBSETTER, typename SUBRS, typename ACC, typename ENV, typename OPSET, op_code_t endchar_op=OpCode_Invalid>
701 struct subr_subsetter_t
702 {
subr_subsetter_tCFF::subr_subsetter_t703   subr_subsetter_t (ACC &acc_, const hb_subset_plan_t *plan_)
704       : acc (acc_), plan (plan_), closures(acc_.fdCount),
705         remaps(acc_.fdCount)
706   {}
707 
708   /* Subroutine subsetting with --no-desubroutinize runs in phases:
709    *
710    * 1. execute charstrings/subroutines to determine subroutine closures
711    * 2. parse out all operators and numbers
712    * 3. mark hint operators and operands for removal if --no-hinting
713    * 4. re-encode all charstrings and subroutines with new subroutine numbers
714    *
715    * Phases #1 and #2 are done at the same time in collect_subrs ().
716    * Phase #3 walks charstrings/subroutines forward then backward (hence parsing required),
717    * because we can't tell if a number belongs to a hint op until we see the first moveto.
718    *
719    * Assumption: a callsubr/callgsubr operator must immediately follow a (biased) subroutine number
720    * within the same charstring/subroutine, e.g., not split across a charstring and a subroutine.
721    */
subsetCFF::subr_subsetter_t722   bool subset (void)
723   {
724     unsigned fd_count = acc.fdCount;
725     const cff_subset_accelerator_t* cff_accelerator = nullptr;
726     if (acc.cff_accelerator) {
727       cff_accelerator = acc.cff_accelerator;
728       fd_count = cff_accelerator->parsed_local_subrs.length;
729     }
730 
731     if (cff_accelerator) {
732       // If we are not dropping hinting then charstrings are not modified so we can
733       // just use a reference to the cached copies.
734       cached_charstrings.resize_exact (plan->num_output_glyphs ());
735       parsed_global_subrs = &cff_accelerator->parsed_global_subrs;
736       parsed_local_subrs = &cff_accelerator->parsed_local_subrs;
737     } else {
738       parsed_charstrings.resize_exact (plan->num_output_glyphs ());
739       parsed_global_subrs_storage.resize_exact (acc.globalSubrs->count);
740 
741       if (unlikely (!parsed_local_subrs_storage.resize (fd_count))) return false;
742 
743       for (unsigned int i = 0; i < acc.fdCount; i++)
744       {
745         unsigned count = acc.privateDicts[i].localSubrs->count;
746         parsed_local_subrs_storage[i].resize (count);
747         if (unlikely (parsed_local_subrs_storage[i].in_error ())) return false;
748       }
749 
750       parsed_global_subrs = &parsed_global_subrs_storage;
751       parsed_local_subrs = &parsed_local_subrs_storage;
752     }
753 
754     if (unlikely (remaps.in_error()
755                   || cached_charstrings.in_error ()
756                   || parsed_charstrings.in_error ()
757                   || parsed_global_subrs->in_error ()
758                   || closures.in_error ())) {
759       return false;
760     }
761 
762     /* phase 1 & 2 */
763     for (auto _ : plan->new_to_old_gid_list)
764     {
765       hb_codepoint_t new_glyph = _.first;
766       hb_codepoint_t old_glyph = _.second;
767 
768       const hb_ubytes_t str = (*acc.charStrings)[old_glyph];
769       unsigned int fd = acc.fdSelect->get_fd (old_glyph);
770       if (unlikely (fd >= acc.fdCount))
771         return false;
772 
773       if (cff_accelerator)
774       {
775         // parsed string already exists in accelerator, copy it and move
776         // on.
777         if (cached_charstrings)
778           cached_charstrings[new_glyph] = &cff_accelerator->parsed_charstrings[old_glyph];
779         else
780           parsed_charstrings[new_glyph] = cff_accelerator->parsed_charstrings[old_glyph];
781 
782         continue;
783       }
784 
785       ENV env (str, acc, fd);
786       cs_interpreter_t<ENV, OPSET, subr_subset_param_t> interp (env);
787 
788       parsed_charstrings[new_glyph].alloc (str.length);
789       subr_subset_param_t  param (&parsed_charstrings[new_glyph],
790                                   &parsed_global_subrs_storage,
791                                   &parsed_local_subrs_storage[fd],
792                                   &closures.global_closure,
793                                   &closures.local_closures[fd],
794                                   plan->flags & HB_SUBSET_FLAGS_NO_HINTING);
795 
796       if (unlikely (!interp.interpret (param)))
797         return false;
798 
799       /* complete parsed string esp. copy CFF1 width or CFF2 vsindex to the parsed charstring for encoding */
800       SUBSETTER::complete_parsed_str (interp.env, param, parsed_charstrings[new_glyph]);
801 
802       /* mark hint ops and arguments for drop */
803       if ((plan->flags & HB_SUBSET_FLAGS_NO_HINTING) || plan->inprogress_accelerator)
804       {
805 	subr_subset_param_t  param (&parsed_charstrings[new_glyph],
806 				    &parsed_global_subrs_storage,
807 				    &parsed_local_subrs_storage[fd],
808 				    &closures.global_closure,
809 				    &closures.local_closures[fd],
810 				    plan->flags & HB_SUBSET_FLAGS_NO_HINTING);
811 
812 	drop_hints_param_t  drop;
813 	if (drop_hints_in_str (parsed_charstrings[new_glyph], param, drop))
814 	{
815 	  parsed_charstrings[new_glyph].set_hint_dropped ();
816 	  if (drop.vsindex_dropped)
817 	    parsed_charstrings[new_glyph].set_vsindex_dropped ();
818 	}
819       }
820 
821       /* Doing this here one by one instead of compacting all at the end
822        * has massive peak-memory saving.
823        *
824        * The compacting both saves memory and makes further operations
825        * faster.
826        */
827       parsed_charstrings[new_glyph].compact ();
828     }
829 
830     /* Since parsed strings were loaded from accelerator, we still need
831      * to compute the subroutine closures which would have normally happened during
832      * parsing.
833      *
834      * Or if we are dropping hinting, redo closure to get actually used subrs.
835      */
836     if ((cff_accelerator ||
837 	(!cff_accelerator && plan->flags & HB_SUBSET_FLAGS_NO_HINTING)) &&
838         !closure_subroutines(*parsed_global_subrs,
839                              *parsed_local_subrs))
840       return false;
841 
842     remaps.create (closures);
843 
844     populate_subset_accelerator ();
845     return true;
846   }
847 
encode_charstringsCFF::subr_subsetter_t848   bool encode_charstrings (str_buff_vec_t &buffArray, bool encode_prefix = true) const
849   {
850     unsigned num_glyphs = plan->num_output_glyphs ();
851     if (unlikely (!buffArray.resize_exact (num_glyphs)))
852       return false;
853     hb_codepoint_t last = 0;
854     for (auto _ : plan->new_to_old_gid_list)
855     {
856       hb_codepoint_t gid = _.first;
857       hb_codepoint_t old_glyph = _.second;
858 
859       if (endchar_op != OpCode_Invalid)
860         for (; last < gid; last++)
861 	{
862 	  // Hack to point vector to static string.
863 	  auto &b = buffArray.arrayZ[last];
864 	  b.length = 1;
865 	  b.arrayZ = const_cast<unsigned char *>(endchar_str);
866 	}
867 
868       last++; // Skip over gid
869       unsigned int  fd = acc.fdSelect->get_fd (old_glyph);
870       if (unlikely (fd >= acc.fdCount))
871 	return false;
872       if (unlikely (!encode_str (get_parsed_charstring (gid), fd, buffArray.arrayZ[gid], encode_prefix)))
873 	return false;
874     }
875     if (endchar_op != OpCode_Invalid)
876       for (; last < num_glyphs; last++)
877       {
878 	// Hack to point vector to static string.
879 	auto &b = buffArray.arrayZ[last];
880 	b.length = 1;
881 	b.arrayZ = const_cast<unsigned char *>(endchar_str);
882       }
883 
884     return true;
885   }
886 
encode_subrsCFF::subr_subsetter_t887   bool encode_subrs (const parsed_cs_str_vec_t &subrs, const subr_remap_t& remap, unsigned int fd, str_buff_vec_t &buffArray) const
888   {
889     unsigned int  count = remap.get_population ();
890 
891     if (unlikely (!buffArray.resize_exact (count)))
892       return false;
893     for (unsigned int new_num = 0; new_num < count; new_num++)
894     {
895       hb_codepoint_t old_num = remap.backward (new_num);
896       assert (old_num != CFF_UNDEF_CODE);
897 
898       if (unlikely (!encode_str (subrs[old_num], fd, buffArray[new_num])))
899 	return false;
900     }
901     return true;
902   }
903 
encode_globalsubrsCFF::subr_subsetter_t904   bool encode_globalsubrs (str_buff_vec_t &buffArray)
905   {
906     return encode_subrs (*parsed_global_subrs, remaps.global_remap, 0, buffArray);
907   }
908 
encode_localsubrsCFF::subr_subsetter_t909   bool encode_localsubrs (unsigned int fd, str_buff_vec_t &buffArray) const
910   {
911     return encode_subrs ((*parsed_local_subrs)[fd], remaps.local_remaps[fd], fd, buffArray);
912   }
913 
914   protected:
915   struct drop_hints_param_t
916   {
drop_hints_param_tCFF::subr_subsetter_t::drop_hints_param_t917     drop_hints_param_t ()
918       : seen_moveto (false),
919 	ends_in_hint (false),
920 	all_dropped (false),
921 	vsindex_dropped (false) {}
922 
923     bool  seen_moveto;
924     bool  ends_in_hint;
925     bool  all_dropped;
926     bool  vsindex_dropped;
927   };
928 
drop_hints_in_subrCFF::subr_subsetter_t929   bool drop_hints_in_subr (parsed_cs_str_t &str, unsigned int pos,
930 			   parsed_cs_str_vec_t &subrs, unsigned int subr_num,
931 			   const subr_subset_param_t &param, drop_hints_param_t &drop)
932   {
933     drop.ends_in_hint = false;
934     bool has_hint = drop_hints_in_str (subrs[subr_num], param, drop);
935 
936     /* if this subr ends with a stem hint (i.e., not a number; potential argument for moveto),
937      * then this entire subroutine must be a hint. drop its call. */
938     if (drop.ends_in_hint)
939     {
940       str.values[pos].set_hinting ();
941       /* if this subr call is at the end of the parent subr, propagate the flag
942        * otherwise reset the flag */
943       if (!str.at_end (pos))
944 	drop.ends_in_hint = false;
945     }
946     else if (drop.all_dropped)
947     {
948       str.values[pos].set_hinting ();
949     }
950 
951     return has_hint;
952   }
953 
954   /* returns true if it sees a hint op before the first moveto */
drop_hints_in_strCFF::subr_subsetter_t955   bool drop_hints_in_str (parsed_cs_str_t &str, const subr_subset_param_t &param, drop_hints_param_t &drop)
956   {
957     bool  seen_hint = false;
958 
959     unsigned count = str.values.length;
960     auto *values = str.values.arrayZ;
961     for (unsigned int pos = 0; pos < count; pos++)
962     {
963       bool  has_hint = false;
964       switch (values[pos].op)
965       {
966 	case OpCode_callsubr:
967 	  has_hint = drop_hints_in_subr (str, pos,
968 					*param.parsed_local_subrs, values[pos].subr_num,
969 					param, drop);
970 	  break;
971 
972 	case OpCode_callgsubr:
973 	  has_hint = drop_hints_in_subr (str, pos,
974 					*param.parsed_global_subrs, values[pos].subr_num,
975 					param, drop);
976 	  break;
977 
978 	case OpCode_rmoveto:
979 	case OpCode_hmoveto:
980 	case OpCode_vmoveto:
981 	  drop.seen_moveto = true;
982 	  break;
983 
984 	case OpCode_hintmask:
985 	case OpCode_cntrmask:
986 	  if (drop.seen_moveto)
987 	  {
988 	    values[pos].set_hinting ();
989 	    break;
990 	  }
991 	  HB_FALLTHROUGH;
992 
993 	case OpCode_hstemhm:
994 	case OpCode_vstemhm:
995 	case OpCode_hstem:
996 	case OpCode_vstem:
997 	  has_hint = true;
998 	  values[pos].set_hinting ();
999 	  if (str.at_end (pos))
1000 	    drop.ends_in_hint = true;
1001 	  break;
1002 
1003 	case OpCode_dotsection:
1004 	  values[pos].set_hinting ();
1005 	  break;
1006 
1007 	default:
1008 	  /* NONE */
1009 	  break;
1010       }
1011       if (has_hint)
1012       {
1013 	for (int i = pos - 1; i >= 0; i--)
1014 	{
1015 	  parsed_cs_op_t  &csop = values[(unsigned)i];
1016 	  if (csop.is_hinting ())
1017 	    break;
1018 	  csop.set_hinting ();
1019 	  if (csop.op == OpCode_vsindexcs)
1020 	    drop.vsindex_dropped = true;
1021 	}
1022 	seen_hint |= has_hint;
1023       }
1024     }
1025 
1026     /* Raise all_dropped flag if all operators except return are dropped from a subr.
1027      * It may happen even after seeing the first moveto if a subr contains
1028      * only (usually one) hintmask operator, then calls to this subr can be dropped.
1029      */
1030     drop.all_dropped = true;
1031     for (unsigned int pos = 0; pos < count; pos++)
1032     {
1033       parsed_cs_op_t  &csop = values[pos];
1034       if (csop.op == OpCode_return)
1035 	break;
1036       if (!csop.is_hinting ())
1037       {
1038 	drop.all_dropped = false;
1039 	break;
1040       }
1041     }
1042 
1043     return seen_hint;
1044   }
1045 
closure_subroutinesCFF::subr_subsetter_t1046   bool closure_subroutines (const parsed_cs_str_vec_t& global_subrs,
1047                             const hb_vector_t<parsed_cs_str_vec_t>& local_subrs)
1048   {
1049     closures.reset ();
1050     for (auto _ : plan->new_to_old_gid_list)
1051     {
1052       hb_codepoint_t new_glyph = _.first;
1053       hb_codepoint_t old_glyph = _.second;
1054       unsigned int fd = acc.fdSelect->get_fd (old_glyph);
1055       if (unlikely (fd >= acc.fdCount))
1056         return false;
1057 
1058       // Note: const cast is safe here because the collect_subr_refs_in_str only performs a
1059       //       closure and does not modify any of the charstrings.
1060       subr_subset_param_t  param (const_cast<parsed_cs_str_t*> (&get_parsed_charstring (new_glyph)),
1061                                   const_cast<parsed_cs_str_vec_t*> (&global_subrs),
1062                                   const_cast<parsed_cs_str_vec_t*> (&local_subrs[fd]),
1063                                   &closures.global_closure,
1064                                   &closures.local_closures[fd],
1065                                   plan->flags & HB_SUBSET_FLAGS_NO_HINTING);
1066       collect_subr_refs_in_str (get_parsed_charstring (new_glyph), param);
1067     }
1068 
1069     return true;
1070   }
1071 
collect_subr_refs_in_subrCFF::subr_subsetter_t1072   void collect_subr_refs_in_subr (unsigned int subr_num, parsed_cs_str_vec_t &subrs,
1073 				  hb_set_t *closure,
1074 				  const subr_subset_param_t &param)
1075   {
1076     if (closure->has (subr_num))
1077       return;
1078     closure->add (subr_num);
1079     collect_subr_refs_in_str (subrs[subr_num], param);
1080   }
1081 
collect_subr_refs_in_strCFF::subr_subsetter_t1082   void collect_subr_refs_in_str (const parsed_cs_str_t &str,
1083                                  const subr_subset_param_t &param)
1084   {
1085     if (!str.has_calls ())
1086       return;
1087 
1088     for (auto &opstr : str.values)
1089     {
1090       if (!param.drop_hints || !opstr.is_hinting ())
1091       {
1092 	switch (opstr.op)
1093 	{
1094 	  case OpCode_callsubr:
1095 	    collect_subr_refs_in_subr (opstr.subr_num, *param.parsed_local_subrs,
1096 				       param.local_closure, param);
1097 	    break;
1098 
1099 	  case OpCode_callgsubr:
1100 	    collect_subr_refs_in_subr (opstr.subr_num, *param.parsed_global_subrs,
1101 				       param.global_closure, param);
1102 	    break;
1103 
1104 	  default: break;
1105 	}
1106       }
1107     }
1108   }
1109 
encode_strCFF::subr_subsetter_t1110   bool encode_str (const parsed_cs_str_t &str, const unsigned int fd, str_buff_t &buff, bool encode_prefix = true) const
1111   {
1112     str_encoder_t  encoder (buff);
1113     encoder.reset ();
1114     bool hinting = !(plan->flags & HB_SUBSET_FLAGS_NO_HINTING);
1115     /* if a prefix (CFF1 width or CFF2 vsindex) has been removed along with hints,
1116      * re-insert it at the beginning of charstreing */
1117     if (encode_prefix && str.has_prefix () && !hinting && str.is_hint_dropped ())
1118     {
1119       encoder.encode_num_cs (str.prefix_num ());
1120       if (str.prefix_op () != OpCode_Invalid)
1121 	encoder.encode_op (str.prefix_op ());
1122     }
1123 
1124     unsigned size = 0;
1125     for (auto &opstr : str.values)
1126     {
1127       size += opstr.length;
1128       if (opstr.op == OpCode_callsubr || opstr.op == OpCode_callgsubr)
1129         size += 3;
1130     }
1131     if (!buff.alloc (buff.length + size, true))
1132       return false;
1133 
1134     for (auto &opstr : str.values)
1135     {
1136       if (hinting || !opstr.is_hinting ())
1137       {
1138 	switch (opstr.op)
1139 	{
1140 	  case OpCode_callsubr:
1141 	    encoder.encode_int (remaps.local_remaps[fd].biased_num (opstr.subr_num));
1142 	    encoder.copy_str (opstr.ptr, opstr.length);
1143 	    break;
1144 
1145 	  case OpCode_callgsubr:
1146 	    encoder.encode_int (remaps.global_remap.biased_num (opstr.subr_num));
1147 	    encoder.copy_str (opstr.ptr, opstr.length);
1148 	    break;
1149 
1150 	  default:
1151 	    encoder.copy_str (opstr.ptr, opstr.length);
1152 	    break;
1153 	}
1154       }
1155     }
1156     return !encoder.in_error ();
1157   }
1158 
compact_parsed_subrsCFF::subr_subsetter_t1159   void compact_parsed_subrs () const
1160   {
1161     for (auto &cs : parsed_global_subrs_storage)
1162       cs.compact ();
1163     for (auto &vec : parsed_local_subrs_storage)
1164       for (auto &cs : vec)
1165 	cs.compact ();
1166   }
1167 
populate_subset_acceleratorCFF::subr_subsetter_t1168   void populate_subset_accelerator () const
1169   {
1170     if (!plan->inprogress_accelerator) return;
1171 
1172     compact_parsed_subrs ();
1173 
1174     acc.cff_accelerator =
1175         cff_subset_accelerator_t::create(acc.blob,
1176                                          parsed_charstrings,
1177                                          parsed_global_subrs_storage,
1178                                          parsed_local_subrs_storage);
1179   }
1180 
get_parsed_charstringCFF::subr_subsetter_t1181   const parsed_cs_str_t& get_parsed_charstring (unsigned i) const
1182   {
1183     if (cached_charstrings) return *(cached_charstrings[i]);
1184     return parsed_charstrings[i];
1185   }
1186 
1187   protected:
1188   const ACC			&acc;
1189   const hb_subset_plan_t	*plan;
1190 
1191   subr_closures_t		closures;
1192 
1193   hb_vector_t<const parsed_cs_str_t*>     cached_charstrings;
1194   const parsed_cs_str_vec_t*              parsed_global_subrs;
1195   const hb_vector_t<parsed_cs_str_vec_t>* parsed_local_subrs;
1196 
1197   subr_remaps_t			remaps;
1198 
1199   private:
1200 
1201   parsed_cs_str_vec_t		parsed_charstrings;
1202   parsed_cs_str_vec_t		parsed_global_subrs_storage;
1203   hb_vector_t<parsed_cs_str_vec_t>  parsed_local_subrs_storage;
1204   typedef typename SUBRS::count_type subr_count_type;
1205 };
1206 
1207 } /* namespace CFF */
1208 
1209 HB_INTERNAL bool
1210 hb_plan_subset_cff_fdselect (const hb_subset_plan_t *plan,
1211 			    unsigned int fdCount,
1212 			    const CFF::FDSelect &src, /* IN */
1213 			    unsigned int &subset_fd_count /* OUT */,
1214 			    unsigned int &subset_fdselect_size /* OUT */,
1215 			    unsigned int &subset_fdselect_format /* OUT */,
1216 			    hb_vector_t<CFF::code_pair_t> &fdselect_ranges /* OUT */,
1217 			    hb_inc_bimap_t &fdmap /* OUT */);
1218 
1219 HB_INTERNAL bool
1220 hb_serialize_cff_fdselect (hb_serialize_context_t *c,
1221 			  unsigned int num_glyphs,
1222 			  const CFF::FDSelect &src,
1223 			  unsigned int fd_count,
1224 			  unsigned int fdselect_format,
1225 			  unsigned int size,
1226 			  const hb_vector_t<CFF::code_pair_t> &fdselect_ranges);
1227 
1228 #endif /* HB_SUBSET_CFF_COMMON_HH */
1229