1 use alloc::{collections::BTreeMap, vec::Vec};
2 
3 use crate::{
4     dfa::{
5         dense::{self, BuildError},
6         DEAD,
7     },
8     nfa::thompson,
9     util::{
10         self,
11         alphabet::{self, ByteSet},
12         determinize::{State, StateBuilderEmpty, StateBuilderNFA},
13         primitives::{PatternID, StateID},
14         search::{Anchored, MatchKind},
15         sparse_set::SparseSets,
16         start::Start,
17     },
18 };
19 
20 /// A builder for configuring and running a DFA determinizer.
21 #[derive(Clone, Debug)]
22 pub(crate) struct Config {
23     match_kind: MatchKind,
24     quit: ByteSet,
25     dfa_size_limit: Option<usize>,
26     determinize_size_limit: Option<usize>,
27 }
28 
29 impl Config {
30     /// Create a new default config for a determinizer. The determinizer may be
31     /// configured before calling `run`.
new() -> Config32     pub fn new() -> Config {
33         Config {
34             match_kind: MatchKind::LeftmostFirst,
35             quit: ByteSet::empty(),
36             dfa_size_limit: None,
37             determinize_size_limit: None,
38         }
39     }
40 
41     /// Run determinization on the given NFA and write the resulting DFA into
42     /// the one given. The DFA given should be initialized but otherwise empty.
43     /// "Initialized" means that it is setup to handle the NFA's byte classes,
44     /// number of patterns and whether to build start states for each pattern.
run( &self, nfa: &thompson::NFA, dfa: &mut dense::OwnedDFA, ) -> Result<(), BuildError>45     pub fn run(
46         &self,
47         nfa: &thompson::NFA,
48         dfa: &mut dense::OwnedDFA,
49     ) -> Result<(), BuildError> {
50         let dead = State::dead();
51         let quit = State::dead();
52         let mut cache = StateMap::default();
53         // We only insert the dead state here since its representation is
54         // identical to the quit state. And we never want anything pointing
55         // to the quit state other than specific transitions derived from the
56         // determinizer's configured "quit" bytes.
57         //
58         // We do put the quit state into 'builder_states' below. This ensures
59         // that a proper DFA state ID is allocated for it, and that no other
60         // DFA state uses the "location after the DEAD state." That is, it
61         // is assumed that the quit state is always the state immediately
62         // following the DEAD state.
63         cache.insert(dead.clone(), DEAD);
64 
65         let runner = Runner {
66             config: self.clone(),
67             nfa,
68             dfa,
69             builder_states: alloc::vec![dead, quit],
70             cache,
71             memory_usage_state: 0,
72             sparses: SparseSets::new(nfa.states().len()),
73             stack: alloc::vec![],
74             scratch_state_builder: StateBuilderEmpty::new(),
75         };
76         runner.run()
77     }
78 
79     /// The match semantics to use for determinization.
80     ///
81     /// MatchKind::All corresponds to the standard textbook construction.
82     /// All possible match states are represented in the DFA.
83     /// MatchKind::LeftmostFirst permits greediness and otherwise tries to
84     /// simulate the match semantics of backtracking regex engines. Namely,
85     /// only a subset of match states are built, and dead states are used to
86     /// stop searches with an unanchored prefix.
87     ///
88     /// The default is MatchKind::LeftmostFirst.
match_kind(&mut self, kind: MatchKind) -> &mut Config89     pub fn match_kind(&mut self, kind: MatchKind) -> &mut Config {
90         self.match_kind = kind;
91         self
92     }
93 
94     /// The set of bytes to use that will cause the DFA to enter a quit state,
95     /// stop searching and return an error. By default, this is empty.
quit(&mut self, set: ByteSet) -> &mut Config96     pub fn quit(&mut self, set: ByteSet) -> &mut Config {
97         self.quit = set;
98         self
99     }
100 
101     /// The limit, in bytes of the heap, that the DFA is permitted to use. This
102     /// does not include the auxiliary heap storage used by determinization.
dfa_size_limit(&mut self, bytes: Option<usize>) -> &mut Config103     pub fn dfa_size_limit(&mut self, bytes: Option<usize>) -> &mut Config {
104         self.dfa_size_limit = bytes;
105         self
106     }
107 
108     /// The limit, in bytes of the heap, that determinization itself is allowed
109     /// to use. This does not include the size of the DFA being built.
determinize_size_limit( &mut self, bytes: Option<usize>, ) -> &mut Config110     pub fn determinize_size_limit(
111         &mut self,
112         bytes: Option<usize>,
113     ) -> &mut Config {
114         self.determinize_size_limit = bytes;
115         self
116     }
117 }
118 
119 /// The actual implementation of determinization that converts an NFA to a DFA
120 /// through powerset construction.
121 ///
122 /// This determinizer roughly follows the typical powerset construction, where
123 /// each DFA state is comprised of one or more NFA states. In the worst case,
124 /// there is one DFA state for every possible combination of NFA states. In
125 /// practice, this only happens in certain conditions, typically when there are
126 /// bounded repetitions.
127 ///
128 /// The main differences between this implementation and typical deteminization
129 /// are that this implementation delays matches by one state and hackily makes
130 /// look-around work. Comments below attempt to explain this.
131 ///
132 /// The lifetime variable `'a` refers to the lifetime of the NFA or DFA,
133 /// whichever is shorter.
134 #[derive(Debug)]
135 struct Runner<'a> {
136     /// The configuration used to initialize determinization.
137     config: Config,
138     /// The NFA we're converting into a DFA.
139     nfa: &'a thompson::NFA,
140     /// The DFA we're building.
141     dfa: &'a mut dense::OwnedDFA,
142     /// Each DFA state being built is defined as an *ordered* set of NFA
143     /// states, along with some meta facts about the ordered set of NFA states.
144     ///
145     /// This is never empty. The first state is always a dummy state such that
146     /// a state id == 0 corresponds to a dead state. The second state is always
147     /// the quit state.
148     ///
149     /// Why do we have states in both a `Vec` and in a cache map below?
150     /// Well, they serve two different roles based on access patterns.
151     /// `builder_states` is the canonical home of each state, and provides
152     /// constant random access by a DFA state's ID. The cache map below, on
153     /// the other hand, provides a quick way of searching for identical DFA
154     /// states by using the DFA state as a key in the map. Of course, we use
155     /// reference counting to avoid actually duplicating the state's data
156     /// itself. (Although this has never been benchmarked.) Note that the cache
157     /// map does not give us full minimization; it just lets us avoid some very
158     /// obvious redundant states.
159     ///
160     /// Note that the index into this Vec isn't quite the DFA's state ID.
161     /// Rather, it's just an index. To get the state ID, you have to multiply
162     /// it by the DFA's stride. That's done by self.dfa.from_index. And the
163     /// inverse is self.dfa.to_index.
164     ///
165     /// Moreover, DFA states don't usually retain the IDs assigned to them
166     /// by their position in this Vec. After determinization completes,
167     /// states are shuffled around to support other optimizations. See the
168     /// sibling 'special' module for more details on that. (The reason for
169     /// mentioning this is that if you print out the DFA for debugging during
170     /// determinization, and then print out the final DFA after it is fully
171     /// built, then the state IDs likely won't match up.)
172     builder_states: Vec<State>,
173     /// A cache of DFA states that already exist and can be easily looked up
174     /// via ordered sets of NFA states.
175     ///
176     /// See `builder_states` docs for why we store states in two different
177     /// ways.
178     cache: StateMap,
179     /// The memory usage, in bytes, used by builder_states and cache. We track
180     /// this as new states are added since states use a variable amount of
181     /// heap. Tracking this as we add states makes it possible to compute the
182     /// total amount of memory used by the determinizer in constant time.
183     memory_usage_state: usize,
184     /// A pair of sparse sets for tracking ordered sets of NFA state IDs.
185     /// These are reused throughout determinization. A bounded sparse set
186     /// gives us constant time insertion, membership testing and clearing.
187     sparses: SparseSets,
188     /// Scratch space for a stack of NFA states to visit, for depth first
189     /// visiting without recursion.
190     stack: Vec<StateID>,
191     /// Scratch space for storing an ordered sequence of NFA states, for
192     /// amortizing allocation. This is principally useful for when we avoid
193     /// adding a new DFA state since it already exists. In order to detect this
194     /// case though, we still need an ordered set of NFA state IDs. So we use
195     /// this space to stage that ordered set before we know whether we need to
196     /// create a new DFA state or not.
197     scratch_state_builder: StateBuilderEmpty,
198 }
199 
200 /// A map from states to state identifiers. When using std, we use a standard
201 /// hashmap, since it's a bit faster for this use case. (Other maps, like
202 /// one's based on FNV, have not yet been benchmarked.)
203 ///
204 /// The main purpose of this map is to reuse states where possible. This won't
205 /// fully minimize the DFA, but it works well in a lot of cases.
206 #[cfg(feature = "std")]
207 type StateMap = std::collections::HashMap<State, StateID>;
208 #[cfg(not(feature = "std"))]
209 type StateMap = BTreeMap<State, StateID>;
210 
211 impl<'a> Runner<'a> {
212     /// Build the DFA. If there was a problem constructing the DFA (e.g., if
213     /// the chosen state identifier representation is too small), then an error
214     /// is returned.
run(mut self) -> Result<(), BuildError>215     fn run(mut self) -> Result<(), BuildError> {
216         if self.nfa.look_set_any().contains_word_unicode()
217             && !self.config.quit.contains_range(0x80, 0xFF)
218         {
219             return Err(BuildError::unsupported_dfa_word_boundary_unicode());
220         }
221 
222         // A sequence of "representative" bytes drawn from each equivalence
223         // class. These representative bytes are fed to the NFA to compute
224         // state transitions. This allows us to avoid re-computing state
225         // transitions for bytes that are guaranteed to produce identical
226         // results. Since computing the representatives needs to do a little
227         // work, we do it once here because we'll be iterating over them a lot.
228         let representatives: Vec<alphabet::Unit> =
229             self.dfa.byte_classes().representatives(..).collect();
230         // The set of all DFA state IDs that still need to have their
231         // transitions set. We start by seeding this with all starting states.
232         let mut uncompiled = alloc::vec![];
233         self.add_all_starts(&mut uncompiled)?;
234         while let Some(dfa_id) = uncompiled.pop() {
235             for &unit in &representatives {
236                 if unit.as_u8().map_or(false, |b| self.config.quit.contains(b))
237                 {
238                     continue;
239                 }
240                 // In many cases, the state we transition to has already been
241                 // computed. 'cached_state' will do the minimal amount of work
242                 // to check this, and if it exists, immediately return an
243                 // already existing state ID.
244                 let (next_dfa_id, is_new) = self.cached_state(dfa_id, unit)?;
245                 self.dfa.set_transition(dfa_id, unit, next_dfa_id);
246                 // If the state ID we got back is newly created, then we need
247                 // to compile it, so add it to our uncompiled frontier.
248                 if is_new {
249                     uncompiled.push(next_dfa_id);
250                 }
251             }
252         }
253         debug!(
254             "determinization complete, memory usage: {}, \
255              dense DFA size: {}, \
256              is reverse? {}",
257             self.memory_usage(),
258             self.dfa.memory_usage(),
259             self.nfa.is_reverse(),
260         );
261 
262         // A map from DFA state ID to one or more NFA match IDs. Each NFA match
263         // ID corresponds to a distinct regex pattern that matches in the state
264         // corresponding to the key.
265         let mut matches: BTreeMap<StateID, Vec<PatternID>> = BTreeMap::new();
266         self.cache.clear();
267         #[cfg(feature = "logging")]
268         let mut total_pat_len = 0;
269         for (i, state) in self.builder_states.into_iter().enumerate() {
270             if let Some(pat_ids) = state.match_pattern_ids() {
271                 let id = self.dfa.to_state_id(i);
272                 log! {
273                     total_pat_len += pat_ids.len();
274                 }
275                 matches.insert(id, pat_ids);
276             }
277         }
278         log! {
279             use core::mem::size_of;
280             let per_elem = size_of::<StateID>() + size_of::<Vec<PatternID>>();
281             let pats = total_pat_len * size_of::<PatternID>();
282             let mem = (matches.len() * per_elem) + pats;
283             log::debug!("matches map built, memory usage: {}", mem);
284         }
285         // At this point, we shuffle the "special" states in the final DFA.
286         // This permits a DFA's match loop to detect a match condition (among
287         // other things) by merely inspecting the current state's identifier,
288         // and avoids the need for any additional auxiliary storage.
289         self.dfa.shuffle(matches)?;
290         Ok(())
291     }
292 
293     /// Return the identifier for the next DFA state given an existing DFA
294     /// state and an input byte. If the next DFA state already exists, then
295     /// return its identifier from the cache. Otherwise, build the state, cache
296     /// it and return its identifier.
297     ///
298     /// This routine returns a boolean indicating whether a new state was
299     /// built. If a new state is built, then the caller needs to add it to its
300     /// frontier of uncompiled DFA states to compute transitions for.
cached_state( &mut self, dfa_id: StateID, unit: alphabet::Unit, ) -> Result<(StateID, bool), BuildError>301     fn cached_state(
302         &mut self,
303         dfa_id: StateID,
304         unit: alphabet::Unit,
305     ) -> Result<(StateID, bool), BuildError> {
306         // Compute the set of all reachable NFA states, including epsilons.
307         let empty_builder = self.get_state_builder();
308         let builder = util::determinize::next(
309             self.nfa,
310             self.config.match_kind,
311             &mut self.sparses,
312             &mut self.stack,
313             &self.builder_states[self.dfa.to_index(dfa_id)],
314             unit,
315             empty_builder,
316         );
317         self.maybe_add_state(builder)
318     }
319 
320     /// Compute the set of DFA start states and add their identifiers in
321     /// 'dfa_state_ids' (no duplicates are added).
add_all_starts( &mut self, dfa_state_ids: &mut Vec<StateID>, ) -> Result<(), BuildError>322     fn add_all_starts(
323         &mut self,
324         dfa_state_ids: &mut Vec<StateID>,
325     ) -> Result<(), BuildError> {
326         // These should be the first states added.
327         assert!(dfa_state_ids.is_empty());
328         // We only want to add (un)anchored starting states that is consistent
329         // with our DFA's configuration. Unconditionally adding both (although
330         // it is the default) can make DFAs quite a bit bigger.
331         if self.dfa.start_kind().has_unanchored() {
332             self.add_start_group(Anchored::No, dfa_state_ids)?;
333         }
334         if self.dfa.start_kind().has_anchored() {
335             self.add_start_group(Anchored::Yes, dfa_state_ids)?;
336         }
337         // I previously has an 'assert' here checking that either
338         // 'dfa_state_ids' was non-empty, or the NFA had zero patterns. But it
339         // turns out this isn't always true. For example, the NFA might have
340         // one or more patterns but where all such patterns are just 'fail'
341         // states. These will ultimately just compile down to DFA dead states,
342         // and since the dead state was added earlier, no new DFA states are
343         // added. And thus, it is valid and okay for 'dfa_state_ids' to be
344         // empty even if there are a non-zero number of patterns in the NFA.
345 
346         // We only need to compute anchored start states for each pattern if it
347         // was requested to do so.
348         if self.dfa.starts_for_each_pattern() {
349             for pid in self.nfa.patterns() {
350                 self.add_start_group(Anchored::Pattern(pid), dfa_state_ids)?;
351             }
352         }
353         Ok(())
354     }
355 
356     /// Add a group of start states for the given match pattern ID. Any new
357     /// DFA states added are pushed on to 'dfa_state_ids'. (No duplicates are
358     /// pushed.)
359     ///
360     /// When pattern_id is None, then this will compile a group of unanchored
361     /// start states (if the DFA is unanchored). When the pattern_id is
362     /// present, then this will compile a group of anchored start states that
363     /// only match the given pattern.
364     ///
365     /// This panics if `anchored` corresponds to an invalid pattern ID.
add_start_group( &mut self, anchored: Anchored, dfa_state_ids: &mut Vec<StateID>, ) -> Result<(), BuildError>366     fn add_start_group(
367         &mut self,
368         anchored: Anchored,
369         dfa_state_ids: &mut Vec<StateID>,
370     ) -> Result<(), BuildError> {
371         let nfa_start = match anchored {
372             Anchored::No => self.nfa.start_unanchored(),
373             Anchored::Yes => self.nfa.start_anchored(),
374             Anchored::Pattern(pid) => {
375                 self.nfa.start_pattern(pid).expect("valid pattern ID")
376             }
377         };
378 
379         // When compiling start states, we're careful not to build additional
380         // states that aren't necessary. For example, if the NFA has no word
381         // boundary assertion, then there's no reason to have distinct start
382         // states for 'NonWordByte' and 'WordByte' starting configurations.
383         // Instead, the 'WordByte' starting configuration can just point
384         // directly to the start state for the 'NonWordByte' config.
385         //
386         // Note though that we only need to care about assertions in the prefix
387         // of an NFA since this only concerns the starting states. (Actually,
388         // the most precisely thing we could do it is look at the prefix
389         // assertions of each pattern when 'anchored == Anchored::Pattern',
390         // and then only compile extra states if the prefix is non-empty.) But
391         // we settle for simplicity here instead of absolute minimalism. It is
392         // somewhat rare, after all, for multiple patterns in the same regex to
393         // have different prefix look-arounds.
394 
395         let (id, is_new) =
396             self.add_one_start(nfa_start, Start::NonWordByte)?;
397         self.dfa.set_start_state(anchored, Start::NonWordByte, id);
398         if is_new {
399             dfa_state_ids.push(id);
400         }
401 
402         if !self.nfa.look_set_prefix_any().contains_word() {
403             self.dfa.set_start_state(anchored, Start::WordByte, id);
404         } else {
405             let (id, is_new) =
406                 self.add_one_start(nfa_start, Start::WordByte)?;
407             self.dfa.set_start_state(anchored, Start::WordByte, id);
408             if is_new {
409                 dfa_state_ids.push(id);
410             }
411         }
412         if !self.nfa.look_set_prefix_any().contains_anchor() {
413             self.dfa.set_start_state(anchored, Start::Text, id);
414             self.dfa.set_start_state(anchored, Start::LineLF, id);
415             self.dfa.set_start_state(anchored, Start::LineCR, id);
416             self.dfa.set_start_state(
417                 anchored,
418                 Start::CustomLineTerminator,
419                 id,
420             );
421         } else {
422             let (id, is_new) = self.add_one_start(nfa_start, Start::Text)?;
423             self.dfa.set_start_state(anchored, Start::Text, id);
424             if is_new {
425                 dfa_state_ids.push(id);
426             }
427 
428             let (id, is_new) = self.add_one_start(nfa_start, Start::LineLF)?;
429             self.dfa.set_start_state(anchored, Start::LineLF, id);
430             if is_new {
431                 dfa_state_ids.push(id);
432             }
433 
434             let (id, is_new) = self.add_one_start(nfa_start, Start::LineCR)?;
435             self.dfa.set_start_state(anchored, Start::LineCR, id);
436             if is_new {
437                 dfa_state_ids.push(id);
438             }
439 
440             let (id, is_new) =
441                 self.add_one_start(nfa_start, Start::CustomLineTerminator)?;
442             self.dfa.set_start_state(
443                 anchored,
444                 Start::CustomLineTerminator,
445                 id,
446             );
447             if is_new {
448                 dfa_state_ids.push(id);
449             }
450         }
451 
452         Ok(())
453     }
454 
455     /// Add a new DFA start state corresponding to the given starting NFA
456     /// state, and the starting search configuration. (The starting search
457     /// configuration essentially tells us which look-behind assertions are
458     /// true for this particular state.)
459     ///
460     /// The boolean returned indicates whether the state ID returned is a newly
461     /// created state, or a previously cached state.
add_one_start( &mut self, nfa_start: StateID, start: Start, ) -> Result<(StateID, bool), BuildError>462     fn add_one_start(
463         &mut self,
464         nfa_start: StateID,
465         start: Start,
466     ) -> Result<(StateID, bool), BuildError> {
467         // Compute the look-behind assertions that are true in this starting
468         // configuration, and the determine the epsilon closure. While
469         // computing the epsilon closure, we only follow condiional epsilon
470         // transitions that satisfy the look-behind assertions in 'look_have'.
471         let mut builder_matches = self.get_state_builder().into_matches();
472         util::determinize::set_lookbehind_from_start(
473             self.nfa,
474             &start,
475             &mut builder_matches,
476         );
477         self.sparses.set1.clear();
478         util::determinize::epsilon_closure(
479             self.nfa,
480             nfa_start,
481             builder_matches.look_have(),
482             &mut self.stack,
483             &mut self.sparses.set1,
484         );
485         let mut builder = builder_matches.into_nfa();
486         util::determinize::add_nfa_states(
487             &self.nfa,
488             &self.sparses.set1,
489             &mut builder,
490         );
491         self.maybe_add_state(builder)
492     }
493 
494     /// Adds the given state to the DFA being built depending on whether it
495     /// already exists in this determinizer's cache.
496     ///
497     /// If it does exist, then the memory used by 'state' is put back into the
498     /// determinizer and the previously created state's ID is returned. (Along
499     /// with 'false', indicating that no new state was added.)
500     ///
501     /// If it does not exist, then the state is added to the DFA being built
502     /// and a fresh ID is allocated (if ID allocation fails, then an error is
503     /// returned) and returned. (Along with 'true', indicating that a new state
504     /// was added.)
maybe_add_state( &mut self, builder: StateBuilderNFA, ) -> Result<(StateID, bool), BuildError>505     fn maybe_add_state(
506         &mut self,
507         builder: StateBuilderNFA,
508     ) -> Result<(StateID, bool), BuildError> {
509         if let Some(&cached_id) = self.cache.get(builder.as_bytes()) {
510             // Since we have a cached state, put the constructed state's
511             // memory back into our scratch space, so that it can be reused.
512             self.put_state_builder(builder);
513             return Ok((cached_id, false));
514         }
515         self.add_state(builder).map(|sid| (sid, true))
516     }
517 
518     /// Add the given state to the DFA and make it available in the cache.
519     ///
520     /// The state initially has no transitions. That is, it transitions to the
521     /// dead state for all possible inputs, and transitions to the quit state
522     /// for all quit bytes.
523     ///
524     /// If adding the state would exceed the maximum value for StateID, then an
525     /// error is returned.
add_state( &mut self, builder: StateBuilderNFA, ) -> Result<StateID, BuildError>526     fn add_state(
527         &mut self,
528         builder: StateBuilderNFA,
529     ) -> Result<StateID, BuildError> {
530         let id = self.dfa.add_empty_state()?;
531         if !self.config.quit.is_empty() {
532             for b in self.config.quit.iter() {
533                 self.dfa.set_transition(
534                     id,
535                     alphabet::Unit::u8(b),
536                     self.dfa.quit_id(),
537                 );
538             }
539         }
540         let state = builder.to_state();
541         // States use reference counting internally, so we only need to count
542         // their memory usage once.
543         self.memory_usage_state += state.memory_usage();
544         self.builder_states.push(state.clone());
545         self.cache.insert(state, id);
546         self.put_state_builder(builder);
547         if let Some(limit) = self.config.dfa_size_limit {
548             if self.dfa.memory_usage() > limit {
549                 return Err(BuildError::dfa_exceeded_size_limit(limit));
550             }
551         }
552         if let Some(limit) = self.config.determinize_size_limit {
553             if self.memory_usage() > limit {
554                 return Err(BuildError::determinize_exceeded_size_limit(
555                     limit,
556                 ));
557             }
558         }
559         Ok(id)
560     }
561 
562     /// Returns a state builder from this determinizer that might have existing
563     /// capacity. This helps avoid allocs in cases where a state is built that
564     /// turns out to already be cached.
565     ///
566     /// Callers must put the state builder back with 'put_state_builder',
567     /// otherwise the allocation reuse won't work.
get_state_builder(&mut self) -> StateBuilderEmpty568     fn get_state_builder(&mut self) -> StateBuilderEmpty {
569         core::mem::replace(
570             &mut self.scratch_state_builder,
571             StateBuilderEmpty::new(),
572         )
573     }
574 
575     /// Puts the given state builder back into this determinizer for reuse.
576     ///
577     /// Note that building a 'State' from a builder always creates a new
578     /// alloc, so callers should always put the builder back.
put_state_builder(&mut self, builder: StateBuilderNFA)579     fn put_state_builder(&mut self, builder: StateBuilderNFA) {
580         let _ = core::mem::replace(
581             &mut self.scratch_state_builder,
582             builder.clear(),
583         );
584     }
585 
586     /// Return the memory usage, in bytes, of this determinizer at the current
587     /// point in time. This does not include memory used by the NFA or the
588     /// dense DFA itself.
memory_usage(&self) -> usize589     fn memory_usage(&self) -> usize {
590         use core::mem::size_of;
591 
592         self.builder_states.len() * size_of::<State>()
593         // Maps likely use more memory than this, but it's probably close.
594         + self.cache.len() * (size_of::<State>() + size_of::<StateID>())
595         + self.memory_usage_state
596         + self.stack.capacity() * size_of::<StateID>()
597         + self.scratch_state_builder.capacity()
598     }
599 }
600