xref: /aosp_15_r20/external/marisa-trie/lib/marisa/grimoire/trie/tail.cc (revision ab8db090fce404b23716c4c9194221ee27efe31c)
1*ab8db090SAndroid Build Coastguard Worker #include "marisa/grimoire/algorithm.h"
2*ab8db090SAndroid Build Coastguard Worker #include "marisa/grimoire/trie/state.h"
3*ab8db090SAndroid Build Coastguard Worker #include "marisa/grimoire/trie/tail.h"
4*ab8db090SAndroid Build Coastguard Worker 
5*ab8db090SAndroid Build Coastguard Worker namespace marisa {
6*ab8db090SAndroid Build Coastguard Worker namespace grimoire {
7*ab8db090SAndroid Build Coastguard Worker namespace trie {
8*ab8db090SAndroid Build Coastguard Worker 
Tail()9*ab8db090SAndroid Build Coastguard Worker Tail::Tail() : buf_(), end_flags_() {}
10*ab8db090SAndroid Build Coastguard Worker 
build(Vector<Entry> & entries,Vector<UInt32> * offsets,TailMode mode)11*ab8db090SAndroid Build Coastguard Worker void Tail::build(Vector<Entry> &entries, Vector<UInt32> *offsets,
12*ab8db090SAndroid Build Coastguard Worker     TailMode mode) {
13*ab8db090SAndroid Build Coastguard Worker   MARISA_THROW_IF(offsets == NULL, MARISA_NULL_ERROR);
14*ab8db090SAndroid Build Coastguard Worker 
15*ab8db090SAndroid Build Coastguard Worker   switch (mode) {
16*ab8db090SAndroid Build Coastguard Worker     case MARISA_TEXT_TAIL: {
17*ab8db090SAndroid Build Coastguard Worker       for (std::size_t i = 0; i < entries.size(); ++i) {
18*ab8db090SAndroid Build Coastguard Worker         const char * const ptr = entries[i].ptr();
19*ab8db090SAndroid Build Coastguard Worker         const std::size_t length = entries[i].length();
20*ab8db090SAndroid Build Coastguard Worker         for (std::size_t j = 0; j < length; ++j) {
21*ab8db090SAndroid Build Coastguard Worker           if (ptr[j] == '\0') {
22*ab8db090SAndroid Build Coastguard Worker             mode = MARISA_BINARY_TAIL;
23*ab8db090SAndroid Build Coastguard Worker             break;
24*ab8db090SAndroid Build Coastguard Worker           }
25*ab8db090SAndroid Build Coastguard Worker         }
26*ab8db090SAndroid Build Coastguard Worker         if (mode == MARISA_BINARY_TAIL) {
27*ab8db090SAndroid Build Coastguard Worker           break;
28*ab8db090SAndroid Build Coastguard Worker         }
29*ab8db090SAndroid Build Coastguard Worker       }
30*ab8db090SAndroid Build Coastguard Worker       break;
31*ab8db090SAndroid Build Coastguard Worker     }
32*ab8db090SAndroid Build Coastguard Worker     case MARISA_BINARY_TAIL: {
33*ab8db090SAndroid Build Coastguard Worker       break;
34*ab8db090SAndroid Build Coastguard Worker     }
35*ab8db090SAndroid Build Coastguard Worker     default: {
36*ab8db090SAndroid Build Coastguard Worker       MARISA_THROW(MARISA_CODE_ERROR, "undefined tail mode");
37*ab8db090SAndroid Build Coastguard Worker     }
38*ab8db090SAndroid Build Coastguard Worker   }
39*ab8db090SAndroid Build Coastguard Worker 
40*ab8db090SAndroid Build Coastguard Worker   Tail temp;
41*ab8db090SAndroid Build Coastguard Worker   temp.build_(entries, offsets, mode);
42*ab8db090SAndroid Build Coastguard Worker   swap(temp);
43*ab8db090SAndroid Build Coastguard Worker }
44*ab8db090SAndroid Build Coastguard Worker 
map(Mapper & mapper)45*ab8db090SAndroid Build Coastguard Worker void Tail::map(Mapper &mapper) {
46*ab8db090SAndroid Build Coastguard Worker   Tail temp;
47*ab8db090SAndroid Build Coastguard Worker   temp.map_(mapper);
48*ab8db090SAndroid Build Coastguard Worker   swap(temp);
49*ab8db090SAndroid Build Coastguard Worker }
50*ab8db090SAndroid Build Coastguard Worker 
read(Reader & reader)51*ab8db090SAndroid Build Coastguard Worker void Tail::read(Reader &reader) {
52*ab8db090SAndroid Build Coastguard Worker   Tail temp;
53*ab8db090SAndroid Build Coastguard Worker   temp.read_(reader);
54*ab8db090SAndroid Build Coastguard Worker   swap(temp);
55*ab8db090SAndroid Build Coastguard Worker }
56*ab8db090SAndroid Build Coastguard Worker 
write(Writer & writer) const57*ab8db090SAndroid Build Coastguard Worker void Tail::write(Writer &writer) const {
58*ab8db090SAndroid Build Coastguard Worker   write_(writer);
59*ab8db090SAndroid Build Coastguard Worker }
60*ab8db090SAndroid Build Coastguard Worker 
restore(Agent & agent,std::size_t offset) const61*ab8db090SAndroid Build Coastguard Worker void Tail::restore(Agent &agent, std::size_t offset) const {
62*ab8db090SAndroid Build Coastguard Worker   MARISA_DEBUG_IF(buf_.empty(), MARISA_STATE_ERROR);
63*ab8db090SAndroid Build Coastguard Worker 
64*ab8db090SAndroid Build Coastguard Worker   State &state = agent.state();
65*ab8db090SAndroid Build Coastguard Worker   if (end_flags_.empty()) {
66*ab8db090SAndroid Build Coastguard Worker     for (const char *ptr = &buf_[offset]; *ptr != '\0'; ++ptr) {
67*ab8db090SAndroid Build Coastguard Worker       state.key_buf().push_back(*ptr);
68*ab8db090SAndroid Build Coastguard Worker     }
69*ab8db090SAndroid Build Coastguard Worker   } else {
70*ab8db090SAndroid Build Coastguard Worker     do {
71*ab8db090SAndroid Build Coastguard Worker       state.key_buf().push_back(buf_[offset]);
72*ab8db090SAndroid Build Coastguard Worker     } while (!end_flags_[offset++]);
73*ab8db090SAndroid Build Coastguard Worker   }
74*ab8db090SAndroid Build Coastguard Worker }
75*ab8db090SAndroid Build Coastguard Worker 
match(Agent & agent,std::size_t offset) const76*ab8db090SAndroid Build Coastguard Worker bool Tail::match(Agent &agent, std::size_t offset) const {
77*ab8db090SAndroid Build Coastguard Worker   MARISA_DEBUG_IF(buf_.empty(), MARISA_STATE_ERROR);
78*ab8db090SAndroid Build Coastguard Worker   MARISA_DEBUG_IF(agent.state().query_pos() >= agent.query().length(),
79*ab8db090SAndroid Build Coastguard Worker       MARISA_BOUND_ERROR);
80*ab8db090SAndroid Build Coastguard Worker 
81*ab8db090SAndroid Build Coastguard Worker   State &state = agent.state();
82*ab8db090SAndroid Build Coastguard Worker   if (end_flags_.empty()) {
83*ab8db090SAndroid Build Coastguard Worker     const char * const ptr = &buf_[offset] - state.query_pos();
84*ab8db090SAndroid Build Coastguard Worker     do {
85*ab8db090SAndroid Build Coastguard Worker       if (ptr[state.query_pos()] != agent.query()[state.query_pos()]) {
86*ab8db090SAndroid Build Coastguard Worker         return false;
87*ab8db090SAndroid Build Coastguard Worker       }
88*ab8db090SAndroid Build Coastguard Worker       state.set_query_pos(state.query_pos() + 1);
89*ab8db090SAndroid Build Coastguard Worker       if (ptr[state.query_pos()] == '\0') {
90*ab8db090SAndroid Build Coastguard Worker         return true;
91*ab8db090SAndroid Build Coastguard Worker       }
92*ab8db090SAndroid Build Coastguard Worker     } while (state.query_pos() < agent.query().length());
93*ab8db090SAndroid Build Coastguard Worker     return false;
94*ab8db090SAndroid Build Coastguard Worker   } else {
95*ab8db090SAndroid Build Coastguard Worker     do {
96*ab8db090SAndroid Build Coastguard Worker       if (buf_[offset] != agent.query()[state.query_pos()]) {
97*ab8db090SAndroid Build Coastguard Worker         return false;
98*ab8db090SAndroid Build Coastguard Worker       }
99*ab8db090SAndroid Build Coastguard Worker       state.set_query_pos(state.query_pos() + 1);
100*ab8db090SAndroid Build Coastguard Worker       if (end_flags_[offset++]) {
101*ab8db090SAndroid Build Coastguard Worker         return true;
102*ab8db090SAndroid Build Coastguard Worker       }
103*ab8db090SAndroid Build Coastguard Worker     } while (state.query_pos() < agent.query().length());
104*ab8db090SAndroid Build Coastguard Worker     return false;
105*ab8db090SAndroid Build Coastguard Worker   }
106*ab8db090SAndroid Build Coastguard Worker }
107*ab8db090SAndroid Build Coastguard Worker 
prefix_match(Agent & agent,std::size_t offset) const108*ab8db090SAndroid Build Coastguard Worker bool Tail::prefix_match(Agent &agent, std::size_t offset) const {
109*ab8db090SAndroid Build Coastguard Worker   MARISA_DEBUG_IF(buf_.empty(), MARISA_STATE_ERROR);
110*ab8db090SAndroid Build Coastguard Worker 
111*ab8db090SAndroid Build Coastguard Worker   State &state = agent.state();
112*ab8db090SAndroid Build Coastguard Worker   if (end_flags_.empty()) {
113*ab8db090SAndroid Build Coastguard Worker     const char *ptr = &buf_[offset] - state.query_pos();
114*ab8db090SAndroid Build Coastguard Worker     do {
115*ab8db090SAndroid Build Coastguard Worker       if (ptr[state.query_pos()] != agent.query()[state.query_pos()]) {
116*ab8db090SAndroid Build Coastguard Worker         return false;
117*ab8db090SAndroid Build Coastguard Worker       }
118*ab8db090SAndroid Build Coastguard Worker       state.key_buf().push_back(ptr[state.query_pos()]);
119*ab8db090SAndroid Build Coastguard Worker       state.set_query_pos(state.query_pos() + 1);
120*ab8db090SAndroid Build Coastguard Worker       if (ptr[state.query_pos()] == '\0') {
121*ab8db090SAndroid Build Coastguard Worker         return true;
122*ab8db090SAndroid Build Coastguard Worker       }
123*ab8db090SAndroid Build Coastguard Worker     } while (state.query_pos() < agent.query().length());
124*ab8db090SAndroid Build Coastguard Worker     ptr += state.query_pos();
125*ab8db090SAndroid Build Coastguard Worker     do {
126*ab8db090SAndroid Build Coastguard Worker       state.key_buf().push_back(*ptr);
127*ab8db090SAndroid Build Coastguard Worker     } while (*++ptr != '\0');
128*ab8db090SAndroid Build Coastguard Worker     return true;
129*ab8db090SAndroid Build Coastguard Worker   } else {
130*ab8db090SAndroid Build Coastguard Worker     do {
131*ab8db090SAndroid Build Coastguard Worker       if (buf_[offset] != agent.query()[state.query_pos()]) {
132*ab8db090SAndroid Build Coastguard Worker         return false;
133*ab8db090SAndroid Build Coastguard Worker       }
134*ab8db090SAndroid Build Coastguard Worker       state.key_buf().push_back(buf_[offset]);
135*ab8db090SAndroid Build Coastguard Worker       state.set_query_pos(state.query_pos() + 1);
136*ab8db090SAndroid Build Coastguard Worker       if (end_flags_[offset++]) {
137*ab8db090SAndroid Build Coastguard Worker         return true;
138*ab8db090SAndroid Build Coastguard Worker       }
139*ab8db090SAndroid Build Coastguard Worker     } while (state.query_pos() < agent.query().length());
140*ab8db090SAndroid Build Coastguard Worker     do {
141*ab8db090SAndroid Build Coastguard Worker       state.key_buf().push_back(buf_[offset]);
142*ab8db090SAndroid Build Coastguard Worker     } while (!end_flags_[offset++]);
143*ab8db090SAndroid Build Coastguard Worker     return true;
144*ab8db090SAndroid Build Coastguard Worker   }
145*ab8db090SAndroid Build Coastguard Worker }
146*ab8db090SAndroid Build Coastguard Worker 
clear()147*ab8db090SAndroid Build Coastguard Worker void Tail::clear() {
148*ab8db090SAndroid Build Coastguard Worker   Tail().swap(*this);
149*ab8db090SAndroid Build Coastguard Worker }
150*ab8db090SAndroid Build Coastguard Worker 
swap(Tail & rhs)151*ab8db090SAndroid Build Coastguard Worker void Tail::swap(Tail &rhs) {
152*ab8db090SAndroid Build Coastguard Worker   buf_.swap(rhs.buf_);
153*ab8db090SAndroid Build Coastguard Worker   end_flags_.swap(rhs.end_flags_);
154*ab8db090SAndroid Build Coastguard Worker }
155*ab8db090SAndroid Build Coastguard Worker 
build_(Vector<Entry> & entries,Vector<UInt32> * offsets,TailMode mode)156*ab8db090SAndroid Build Coastguard Worker void Tail::build_(Vector<Entry> &entries, Vector<UInt32> *offsets,
157*ab8db090SAndroid Build Coastguard Worker     TailMode mode) {
158*ab8db090SAndroid Build Coastguard Worker   for (std::size_t i = 0; i < entries.size(); ++i) {
159*ab8db090SAndroid Build Coastguard Worker     entries[i].set_id(i);
160*ab8db090SAndroid Build Coastguard Worker   }
161*ab8db090SAndroid Build Coastguard Worker   Algorithm().sort(entries.begin(), entries.end());
162*ab8db090SAndroid Build Coastguard Worker 
163*ab8db090SAndroid Build Coastguard Worker   Vector<UInt32> temp_offsets;
164*ab8db090SAndroid Build Coastguard Worker   temp_offsets.resize(entries.size(), 0);
165*ab8db090SAndroid Build Coastguard Worker 
166*ab8db090SAndroid Build Coastguard Worker   const Entry dummy;
167*ab8db090SAndroid Build Coastguard Worker   const Entry *last = &dummy;
168*ab8db090SAndroid Build Coastguard Worker   for (std::size_t i = entries.size(); i > 0; --i) {
169*ab8db090SAndroid Build Coastguard Worker     const Entry &current = entries[i - 1];
170*ab8db090SAndroid Build Coastguard Worker     MARISA_THROW_IF(current.length() == 0, MARISA_RANGE_ERROR);
171*ab8db090SAndroid Build Coastguard Worker     std::size_t match = 0;
172*ab8db090SAndroid Build Coastguard Worker     while ((match < current.length()) && (match < last->length()) &&
173*ab8db090SAndroid Build Coastguard Worker         ((*last)[match] == current[match])) {
174*ab8db090SAndroid Build Coastguard Worker       ++match;
175*ab8db090SAndroid Build Coastguard Worker     }
176*ab8db090SAndroid Build Coastguard Worker     if ((match == current.length()) && (last->length() != 0)) {
177*ab8db090SAndroid Build Coastguard Worker       temp_offsets[current.id()] = (UInt32)(
178*ab8db090SAndroid Build Coastguard Worker           temp_offsets[last->id()] + (last->length() - match));
179*ab8db090SAndroid Build Coastguard Worker     } else {
180*ab8db090SAndroid Build Coastguard Worker       temp_offsets[current.id()] = (UInt32)buf_.size();
181*ab8db090SAndroid Build Coastguard Worker       for (std::size_t j = 1; j <= current.length(); ++j) {
182*ab8db090SAndroid Build Coastguard Worker         buf_.push_back(current[current.length() - j]);
183*ab8db090SAndroid Build Coastguard Worker       }
184*ab8db090SAndroid Build Coastguard Worker       if (mode == MARISA_TEXT_TAIL) {
185*ab8db090SAndroid Build Coastguard Worker         buf_.push_back('\0');
186*ab8db090SAndroid Build Coastguard Worker       } else {
187*ab8db090SAndroid Build Coastguard Worker         for (std::size_t j = 1; j < current.length(); ++j) {
188*ab8db090SAndroid Build Coastguard Worker           end_flags_.push_back(false);
189*ab8db090SAndroid Build Coastguard Worker         }
190*ab8db090SAndroid Build Coastguard Worker         end_flags_.push_back(true);
191*ab8db090SAndroid Build Coastguard Worker       }
192*ab8db090SAndroid Build Coastguard Worker       MARISA_THROW_IF(buf_.size() > MARISA_UINT32_MAX, MARISA_SIZE_ERROR);
193*ab8db090SAndroid Build Coastguard Worker     }
194*ab8db090SAndroid Build Coastguard Worker     last = &current;
195*ab8db090SAndroid Build Coastguard Worker   }
196*ab8db090SAndroid Build Coastguard Worker   buf_.shrink();
197*ab8db090SAndroid Build Coastguard Worker 
198*ab8db090SAndroid Build Coastguard Worker   offsets->swap(temp_offsets);
199*ab8db090SAndroid Build Coastguard Worker }
200*ab8db090SAndroid Build Coastguard Worker 
map_(Mapper & mapper)201*ab8db090SAndroid Build Coastguard Worker void Tail::map_(Mapper &mapper) {
202*ab8db090SAndroid Build Coastguard Worker   buf_.map(mapper);
203*ab8db090SAndroid Build Coastguard Worker   end_flags_.map(mapper);
204*ab8db090SAndroid Build Coastguard Worker }
205*ab8db090SAndroid Build Coastguard Worker 
read_(Reader & reader)206*ab8db090SAndroid Build Coastguard Worker void Tail::read_(Reader &reader) {
207*ab8db090SAndroid Build Coastguard Worker   buf_.read(reader);
208*ab8db090SAndroid Build Coastguard Worker   end_flags_.read(reader);
209*ab8db090SAndroid Build Coastguard Worker }
210*ab8db090SAndroid Build Coastguard Worker 
write_(Writer & writer) const211*ab8db090SAndroid Build Coastguard Worker void Tail::write_(Writer &writer) const {
212*ab8db090SAndroid Build Coastguard Worker   buf_.write(writer);
213*ab8db090SAndroid Build Coastguard Worker   end_flags_.write(writer);
214*ab8db090SAndroid Build Coastguard Worker }
215*ab8db090SAndroid Build Coastguard Worker 
216*ab8db090SAndroid Build Coastguard Worker }  // namespace trie
217*ab8db090SAndroid Build Coastguard Worker }  // namespace grimoire
218*ab8db090SAndroid Build Coastguard Worker }  // namespace marisa
219