xref: /aosp_15_r20/external/marisa-trie/lib/marisa/grimoire/trie/tail.cc (revision ab8db090fce404b23716c4c9194221ee27efe31c)
1 #include "marisa/grimoire/algorithm.h"
2 #include "marisa/grimoire/trie/state.h"
3 #include "marisa/grimoire/trie/tail.h"
4 
5 namespace marisa {
6 namespace grimoire {
7 namespace trie {
8 
Tail()9 Tail::Tail() : buf_(), end_flags_() {}
10 
build(Vector<Entry> & entries,Vector<UInt32> * offsets,TailMode mode)11 void Tail::build(Vector<Entry> &entries, Vector<UInt32> *offsets,
12     TailMode mode) {
13   MARISA_THROW_IF(offsets == NULL, MARISA_NULL_ERROR);
14 
15   switch (mode) {
16     case MARISA_TEXT_TAIL: {
17       for (std::size_t i = 0; i < entries.size(); ++i) {
18         const char * const ptr = entries[i].ptr();
19         const std::size_t length = entries[i].length();
20         for (std::size_t j = 0; j < length; ++j) {
21           if (ptr[j] == '\0') {
22             mode = MARISA_BINARY_TAIL;
23             break;
24           }
25         }
26         if (mode == MARISA_BINARY_TAIL) {
27           break;
28         }
29       }
30       break;
31     }
32     case MARISA_BINARY_TAIL: {
33       break;
34     }
35     default: {
36       MARISA_THROW(MARISA_CODE_ERROR, "undefined tail mode");
37     }
38   }
39 
40   Tail temp;
41   temp.build_(entries, offsets, mode);
42   swap(temp);
43 }
44 
map(Mapper & mapper)45 void Tail::map(Mapper &mapper) {
46   Tail temp;
47   temp.map_(mapper);
48   swap(temp);
49 }
50 
read(Reader & reader)51 void Tail::read(Reader &reader) {
52   Tail temp;
53   temp.read_(reader);
54   swap(temp);
55 }
56 
write(Writer & writer) const57 void Tail::write(Writer &writer) const {
58   write_(writer);
59 }
60 
restore(Agent & agent,std::size_t offset) const61 void Tail::restore(Agent &agent, std::size_t offset) const {
62   MARISA_DEBUG_IF(buf_.empty(), MARISA_STATE_ERROR);
63 
64   State &state = agent.state();
65   if (end_flags_.empty()) {
66     for (const char *ptr = &buf_[offset]; *ptr != '\0'; ++ptr) {
67       state.key_buf().push_back(*ptr);
68     }
69   } else {
70     do {
71       state.key_buf().push_back(buf_[offset]);
72     } while (!end_flags_[offset++]);
73   }
74 }
75 
match(Agent & agent,std::size_t offset) const76 bool Tail::match(Agent &agent, std::size_t offset) const {
77   MARISA_DEBUG_IF(buf_.empty(), MARISA_STATE_ERROR);
78   MARISA_DEBUG_IF(agent.state().query_pos() >= agent.query().length(),
79       MARISA_BOUND_ERROR);
80 
81   State &state = agent.state();
82   if (end_flags_.empty()) {
83     const char * const ptr = &buf_[offset] - state.query_pos();
84     do {
85       if (ptr[state.query_pos()] != agent.query()[state.query_pos()]) {
86         return false;
87       }
88       state.set_query_pos(state.query_pos() + 1);
89       if (ptr[state.query_pos()] == '\0') {
90         return true;
91       }
92     } while (state.query_pos() < agent.query().length());
93     return false;
94   } else {
95     do {
96       if (buf_[offset] != agent.query()[state.query_pos()]) {
97         return false;
98       }
99       state.set_query_pos(state.query_pos() + 1);
100       if (end_flags_[offset++]) {
101         return true;
102       }
103     } while (state.query_pos() < agent.query().length());
104     return false;
105   }
106 }
107 
prefix_match(Agent & agent,std::size_t offset) const108 bool Tail::prefix_match(Agent &agent, std::size_t offset) const {
109   MARISA_DEBUG_IF(buf_.empty(), MARISA_STATE_ERROR);
110 
111   State &state = agent.state();
112   if (end_flags_.empty()) {
113     const char *ptr = &buf_[offset] - state.query_pos();
114     do {
115       if (ptr[state.query_pos()] != agent.query()[state.query_pos()]) {
116         return false;
117       }
118       state.key_buf().push_back(ptr[state.query_pos()]);
119       state.set_query_pos(state.query_pos() + 1);
120       if (ptr[state.query_pos()] == '\0') {
121         return true;
122       }
123     } while (state.query_pos() < agent.query().length());
124     ptr += state.query_pos();
125     do {
126       state.key_buf().push_back(*ptr);
127     } while (*++ptr != '\0');
128     return true;
129   } else {
130     do {
131       if (buf_[offset] != agent.query()[state.query_pos()]) {
132         return false;
133       }
134       state.key_buf().push_back(buf_[offset]);
135       state.set_query_pos(state.query_pos() + 1);
136       if (end_flags_[offset++]) {
137         return true;
138       }
139     } while (state.query_pos() < agent.query().length());
140     do {
141       state.key_buf().push_back(buf_[offset]);
142     } while (!end_flags_[offset++]);
143     return true;
144   }
145 }
146 
clear()147 void Tail::clear() {
148   Tail().swap(*this);
149 }
150 
swap(Tail & rhs)151 void Tail::swap(Tail &rhs) {
152   buf_.swap(rhs.buf_);
153   end_flags_.swap(rhs.end_flags_);
154 }
155 
build_(Vector<Entry> & entries,Vector<UInt32> * offsets,TailMode mode)156 void Tail::build_(Vector<Entry> &entries, Vector<UInt32> *offsets,
157     TailMode mode) {
158   for (std::size_t i = 0; i < entries.size(); ++i) {
159     entries[i].set_id(i);
160   }
161   Algorithm().sort(entries.begin(), entries.end());
162 
163   Vector<UInt32> temp_offsets;
164   temp_offsets.resize(entries.size(), 0);
165 
166   const Entry dummy;
167   const Entry *last = &dummy;
168   for (std::size_t i = entries.size(); i > 0; --i) {
169     const Entry &current = entries[i - 1];
170     MARISA_THROW_IF(current.length() == 0, MARISA_RANGE_ERROR);
171     std::size_t match = 0;
172     while ((match < current.length()) && (match < last->length()) &&
173         ((*last)[match] == current[match])) {
174       ++match;
175     }
176     if ((match == current.length()) && (last->length() != 0)) {
177       temp_offsets[current.id()] = (UInt32)(
178           temp_offsets[last->id()] + (last->length() - match));
179     } else {
180       temp_offsets[current.id()] = (UInt32)buf_.size();
181       for (std::size_t j = 1; j <= current.length(); ++j) {
182         buf_.push_back(current[current.length() - j]);
183       }
184       if (mode == MARISA_TEXT_TAIL) {
185         buf_.push_back('\0');
186       } else {
187         for (std::size_t j = 1; j < current.length(); ++j) {
188           end_flags_.push_back(false);
189         }
190         end_flags_.push_back(true);
191       }
192       MARISA_THROW_IF(buf_.size() > MARISA_UINT32_MAX, MARISA_SIZE_ERROR);
193     }
194     last = &current;
195   }
196   buf_.shrink();
197 
198   offsets->swap(temp_offsets);
199 }
200 
map_(Mapper & mapper)201 void Tail::map_(Mapper &mapper) {
202   buf_.map(mapper);
203   end_flags_.map(mapper);
204 }
205 
read_(Reader & reader)206 void Tail::read_(Reader &reader) {
207   buf_.read(reader);
208   end_flags_.read(reader);
209 }
210 
write_(Writer & writer) const211 void Tail::write_(Writer &writer) const {
212   buf_.write(writer);
213   end_flags_.write(writer);
214 }
215 
216 }  // namespace trie
217 }  // namespace grimoire
218 }  // namespace marisa
219