1 #include "marisa/grimoire/algorithm.h"
2 #include "marisa/grimoire/trie/state.h"
3 #include "marisa/grimoire/trie/tail.h"
4
5 namespace marisa {
6 namespace grimoire {
7 namespace trie {
8
Tail()9 Tail::Tail() : buf_(), end_flags_() {}
10
build(Vector<Entry> & entries,Vector<UInt32> * offsets,TailMode mode)11 void Tail::build(Vector<Entry> &entries, Vector<UInt32> *offsets,
12 TailMode mode) {
13 MARISA_THROW_IF(offsets == NULL, MARISA_NULL_ERROR);
14
15 switch (mode) {
16 case MARISA_TEXT_TAIL: {
17 for (std::size_t i = 0; i < entries.size(); ++i) {
18 const char * const ptr = entries[i].ptr();
19 const std::size_t length = entries[i].length();
20 for (std::size_t j = 0; j < length; ++j) {
21 if (ptr[j] == '\0') {
22 mode = MARISA_BINARY_TAIL;
23 break;
24 }
25 }
26 if (mode == MARISA_BINARY_TAIL) {
27 break;
28 }
29 }
30 break;
31 }
32 case MARISA_BINARY_TAIL: {
33 break;
34 }
35 default: {
36 MARISA_THROW(MARISA_CODE_ERROR, "undefined tail mode");
37 }
38 }
39
40 Tail temp;
41 temp.build_(entries, offsets, mode);
42 swap(temp);
43 }
44
map(Mapper & mapper)45 void Tail::map(Mapper &mapper) {
46 Tail temp;
47 temp.map_(mapper);
48 swap(temp);
49 }
50
read(Reader & reader)51 void Tail::read(Reader &reader) {
52 Tail temp;
53 temp.read_(reader);
54 swap(temp);
55 }
56
write(Writer & writer) const57 void Tail::write(Writer &writer) const {
58 write_(writer);
59 }
60
restore(Agent & agent,std::size_t offset) const61 void Tail::restore(Agent &agent, std::size_t offset) const {
62 MARISA_DEBUG_IF(buf_.empty(), MARISA_STATE_ERROR);
63
64 State &state = agent.state();
65 if (end_flags_.empty()) {
66 for (const char *ptr = &buf_[offset]; *ptr != '\0'; ++ptr) {
67 state.key_buf().push_back(*ptr);
68 }
69 } else {
70 do {
71 state.key_buf().push_back(buf_[offset]);
72 } while (!end_flags_[offset++]);
73 }
74 }
75
match(Agent & agent,std::size_t offset) const76 bool Tail::match(Agent &agent, std::size_t offset) const {
77 MARISA_DEBUG_IF(buf_.empty(), MARISA_STATE_ERROR);
78 MARISA_DEBUG_IF(agent.state().query_pos() >= agent.query().length(),
79 MARISA_BOUND_ERROR);
80
81 State &state = agent.state();
82 if (end_flags_.empty()) {
83 const char * const ptr = &buf_[offset] - state.query_pos();
84 do {
85 if (ptr[state.query_pos()] != agent.query()[state.query_pos()]) {
86 return false;
87 }
88 state.set_query_pos(state.query_pos() + 1);
89 if (ptr[state.query_pos()] == '\0') {
90 return true;
91 }
92 } while (state.query_pos() < agent.query().length());
93 return false;
94 } else {
95 do {
96 if (buf_[offset] != agent.query()[state.query_pos()]) {
97 return false;
98 }
99 state.set_query_pos(state.query_pos() + 1);
100 if (end_flags_[offset++]) {
101 return true;
102 }
103 } while (state.query_pos() < agent.query().length());
104 return false;
105 }
106 }
107
prefix_match(Agent & agent,std::size_t offset) const108 bool Tail::prefix_match(Agent &agent, std::size_t offset) const {
109 MARISA_DEBUG_IF(buf_.empty(), MARISA_STATE_ERROR);
110
111 State &state = agent.state();
112 if (end_flags_.empty()) {
113 const char *ptr = &buf_[offset] - state.query_pos();
114 do {
115 if (ptr[state.query_pos()] != agent.query()[state.query_pos()]) {
116 return false;
117 }
118 state.key_buf().push_back(ptr[state.query_pos()]);
119 state.set_query_pos(state.query_pos() + 1);
120 if (ptr[state.query_pos()] == '\0') {
121 return true;
122 }
123 } while (state.query_pos() < agent.query().length());
124 ptr += state.query_pos();
125 do {
126 state.key_buf().push_back(*ptr);
127 } while (*++ptr != '\0');
128 return true;
129 } else {
130 do {
131 if (buf_[offset] != agent.query()[state.query_pos()]) {
132 return false;
133 }
134 state.key_buf().push_back(buf_[offset]);
135 state.set_query_pos(state.query_pos() + 1);
136 if (end_flags_[offset++]) {
137 return true;
138 }
139 } while (state.query_pos() < agent.query().length());
140 do {
141 state.key_buf().push_back(buf_[offset]);
142 } while (!end_flags_[offset++]);
143 return true;
144 }
145 }
146
clear()147 void Tail::clear() {
148 Tail().swap(*this);
149 }
150
swap(Tail & rhs)151 void Tail::swap(Tail &rhs) {
152 buf_.swap(rhs.buf_);
153 end_flags_.swap(rhs.end_flags_);
154 }
155
build_(Vector<Entry> & entries,Vector<UInt32> * offsets,TailMode mode)156 void Tail::build_(Vector<Entry> &entries, Vector<UInt32> *offsets,
157 TailMode mode) {
158 for (std::size_t i = 0; i < entries.size(); ++i) {
159 entries[i].set_id(i);
160 }
161 Algorithm().sort(entries.begin(), entries.end());
162
163 Vector<UInt32> temp_offsets;
164 temp_offsets.resize(entries.size(), 0);
165
166 const Entry dummy;
167 const Entry *last = &dummy;
168 for (std::size_t i = entries.size(); i > 0; --i) {
169 const Entry ¤t = entries[i - 1];
170 MARISA_THROW_IF(current.length() == 0, MARISA_RANGE_ERROR);
171 std::size_t match = 0;
172 while ((match < current.length()) && (match < last->length()) &&
173 ((*last)[match] == current[match])) {
174 ++match;
175 }
176 if ((match == current.length()) && (last->length() != 0)) {
177 temp_offsets[current.id()] = (UInt32)(
178 temp_offsets[last->id()] + (last->length() - match));
179 } else {
180 temp_offsets[current.id()] = (UInt32)buf_.size();
181 for (std::size_t j = 1; j <= current.length(); ++j) {
182 buf_.push_back(current[current.length() - j]);
183 }
184 if (mode == MARISA_TEXT_TAIL) {
185 buf_.push_back('\0');
186 } else {
187 for (std::size_t j = 1; j < current.length(); ++j) {
188 end_flags_.push_back(false);
189 }
190 end_flags_.push_back(true);
191 }
192 MARISA_THROW_IF(buf_.size() > MARISA_UINT32_MAX, MARISA_SIZE_ERROR);
193 }
194 last = ¤t;
195 }
196 buf_.shrink();
197
198 offsets->swap(temp_offsets);
199 }
200
map_(Mapper & mapper)201 void Tail::map_(Mapper &mapper) {
202 buf_.map(mapper);
203 end_flags_.map(mapper);
204 }
205
read_(Reader & reader)206 void Tail::read_(Reader &reader) {
207 buf_.read(reader);
208 end_flags_.read(reader);
209 }
210
write_(Writer & writer) const211 void Tail::write_(Writer &writer) const {
212 buf_.write(writer);
213 end_flags_.write(writer);
214 }
215
216 } // namespace trie
217 } // namespace grimoire
218 } // namespace marisa
219