1*993b0882SAndroid Build Coastguard Worker /* 2*993b0882SAndroid Build Coastguard Worker * Copyright (C) 2018 The Android Open Source Project 3*993b0882SAndroid Build Coastguard Worker * 4*993b0882SAndroid Build Coastguard Worker * Licensed under the Apache License, Version 2.0 (the "License"); 5*993b0882SAndroid Build Coastguard Worker * you may not use this file except in compliance with the License. 6*993b0882SAndroid Build Coastguard Worker * You may obtain a copy of the License at 7*993b0882SAndroid Build Coastguard Worker * 8*993b0882SAndroid Build Coastguard Worker * http://www.apache.org/licenses/LICENSE-2.0 9*993b0882SAndroid Build Coastguard Worker * 10*993b0882SAndroid Build Coastguard Worker * Unless required by applicable law or agreed to in writing, software 11*993b0882SAndroid Build Coastguard Worker * distributed under the License is distributed on an "AS IS" BASIS, 12*993b0882SAndroid Build Coastguard Worker * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13*993b0882SAndroid Build Coastguard Worker * See the License for the specific language governing permissions and 14*993b0882SAndroid Build Coastguard Worker * limitations under the License. 15*993b0882SAndroid Build Coastguard Worker */ 16*993b0882SAndroid Build Coastguard Worker 17*993b0882SAndroid Build Coastguard Worker #ifndef LIBTEXTCLASSIFIER_UTILS_CONTAINER_DOUBLE_ARRAY_TRIE_H_ 18*993b0882SAndroid Build Coastguard Worker #define LIBTEXTCLASSIFIER_UTILS_CONTAINER_DOUBLE_ARRAY_TRIE_H_ 19*993b0882SAndroid Build Coastguard Worker 20*993b0882SAndroid Build Coastguard Worker #include <functional> 21*993b0882SAndroid Build Coastguard Worker #include <vector> 22*993b0882SAndroid Build Coastguard Worker 23*993b0882SAndroid Build Coastguard Worker #include "utils/base/endian.h" 24*993b0882SAndroid Build Coastguard Worker #include "utils/base/integral_types.h" 25*993b0882SAndroid Build Coastguard Worker #include "utils/container/string-set.h" 26*993b0882SAndroid Build Coastguard Worker #include "utils/strings/stringpiece.h" 27*993b0882SAndroid Build Coastguard Worker 28*993b0882SAndroid Build Coastguard Worker namespace libtextclassifier3 { 29*993b0882SAndroid Build Coastguard Worker 30*993b0882SAndroid Build Coastguard Worker // A trie node specifies a node in the tree, either an intermediate node or 31*993b0882SAndroid Build Coastguard Worker // a leaf node. 32*993b0882SAndroid Build Coastguard Worker // A leaf node contains the id as an int of the string match. This id is encoded 33*993b0882SAndroid Build Coastguard Worker // in the lower 31 bits, thus the number of distinct ids is 2^31. 34*993b0882SAndroid Build Coastguard Worker // An intermediate node has an associated label and an offset to it's children. 35*993b0882SAndroid Build Coastguard Worker // The label is encoded in the least significant byte and must match the input 36*993b0882SAndroid Build Coastguard Worker // character during matching. 37*993b0882SAndroid Build Coastguard Worker // We account for endianness when using the node values, as they are serialized 38*993b0882SAndroid Build Coastguard Worker // (in little endian) as bytes in the flatbuffer model. 39*993b0882SAndroid Build Coastguard Worker typedef uint32 TrieNode; 40*993b0882SAndroid Build Coastguard Worker 41*993b0882SAndroid Build Coastguard Worker // A memory mappable trie, compatible with Darts::DoubleArray. 42*993b0882SAndroid Build Coastguard Worker class DoubleArrayTrie : public StringSet { 43*993b0882SAndroid Build Coastguard Worker public: 44*993b0882SAndroid Build Coastguard Worker // nodes and nodes_length specify the array of the nodes of the trie. DoubleArrayTrie(const TrieNode * nodes,const int nodes_length)45*993b0882SAndroid Build Coastguard Worker DoubleArrayTrie(const TrieNode* nodes, const int nodes_length) 46*993b0882SAndroid Build Coastguard Worker : nodes_(nodes), nodes_length_(nodes_length) {} 47*993b0882SAndroid Build Coastguard Worker 48*993b0882SAndroid Build Coastguard Worker // Find matches that are prefixes of a string. 49*993b0882SAndroid Build Coastguard Worker bool FindAllPrefixMatches(StringPiece input, 50*993b0882SAndroid Build Coastguard Worker std::vector<Match>* matches) const override; 51*993b0882SAndroid Build Coastguard Worker // Find the longest prefix match of a string. 52*993b0882SAndroid Build Coastguard Worker bool LongestPrefixMatch(StringPiece input, 53*993b0882SAndroid Build Coastguard Worker Match* longest_match) const override; 54*993b0882SAndroid Build Coastguard Worker 55*993b0882SAndroid Build Coastguard Worker private: 56*993b0882SAndroid Build Coastguard Worker // Returns whether a node as a leaf as a child. has_leaf(uint32 i)57*993b0882SAndroid Build Coastguard Worker bool has_leaf(uint32 i) const { return nodes_[i] & 0x100; } 58*993b0882SAndroid Build Coastguard Worker 59*993b0882SAndroid Build Coastguard Worker // Available when a node is a leaf. value(uint32 i)60*993b0882SAndroid Build Coastguard Worker int value(uint32 i) const { 61*993b0882SAndroid Build Coastguard Worker return static_cast<int>(LittleEndian::ToHost32(nodes_[i]) & 0x7fffffff); 62*993b0882SAndroid Build Coastguard Worker } 63*993b0882SAndroid Build Coastguard Worker 64*993b0882SAndroid Build Coastguard Worker // Label associated with a node. 65*993b0882SAndroid Build Coastguard Worker // A leaf node will have the MSB set and thus return an invalid label. label(uint32 i)66*993b0882SAndroid Build Coastguard Worker uint32 label(uint32 i) const { 67*993b0882SAndroid Build Coastguard Worker return LittleEndian::ToHost32(nodes_[i]) & 0x800000ff; 68*993b0882SAndroid Build Coastguard Worker } 69*993b0882SAndroid Build Coastguard Worker 70*993b0882SAndroid Build Coastguard Worker // Returns offset to children. offset(uint32 i)71*993b0882SAndroid Build Coastguard Worker uint32 offset(uint32 i) const { 72*993b0882SAndroid Build Coastguard Worker const uint32 node = LittleEndian::ToHost32(nodes_[i]); 73*993b0882SAndroid Build Coastguard Worker return (node >> 10) << ((node & 0x200) >> 6); 74*993b0882SAndroid Build Coastguard Worker } 75*993b0882SAndroid Build Coastguard Worker 76*993b0882SAndroid Build Coastguard Worker bool GatherPrefixMatches(StringPiece input, 77*993b0882SAndroid Build Coastguard Worker const std::function<void(Match)>& update_fn) const; 78*993b0882SAndroid Build Coastguard Worker 79*993b0882SAndroid Build Coastguard Worker const TrieNode* nodes_; 80*993b0882SAndroid Build Coastguard Worker const int nodes_length_; 81*993b0882SAndroid Build Coastguard Worker }; 82*993b0882SAndroid Build Coastguard Worker 83*993b0882SAndroid Build Coastguard Worker } // namespace libtextclassifier3 84*993b0882SAndroid Build Coastguard Worker 85*993b0882SAndroid Build Coastguard Worker #endif // LIBTEXTCLASSIFIER_UTILS_CONTAINER_DOUBLE_ARRAY_TRIE_H_ 86