xref: /aosp_15_r20/external/marisa-trie/tools/marisa-common-prefix-search.cc (revision ab8db090fce404b23716c4c9194221ee27efe31c)
1*ab8db090SAndroid Build Coastguard Worker #include <cstdlib>
2*ab8db090SAndroid Build Coastguard Worker #include <iostream>
3*ab8db090SAndroid Build Coastguard Worker #include <string>
4*ab8db090SAndroid Build Coastguard Worker 
5*ab8db090SAndroid Build Coastguard Worker #include <marisa.h>
6*ab8db090SAndroid Build Coastguard Worker 
7*ab8db090SAndroid Build Coastguard Worker #include "cmdopt.h"
8*ab8db090SAndroid Build Coastguard Worker 
9*ab8db090SAndroid Build Coastguard Worker namespace {
10*ab8db090SAndroid Build Coastguard Worker 
11*ab8db090SAndroid Build Coastguard Worker std::size_t max_num_results = 10;
12*ab8db090SAndroid Build Coastguard Worker bool mmap_flag = true;
13*ab8db090SAndroid Build Coastguard Worker 
print_help(const char * cmd)14*ab8db090SAndroid Build Coastguard Worker void print_help(const char *cmd) {
15*ab8db090SAndroid Build Coastguard Worker   std::cerr << "Usage: " << cmd << " [OPTION]... DIC\n\n"
16*ab8db090SAndroid Build Coastguard Worker       "Options:\n"
17*ab8db090SAndroid Build Coastguard Worker       "  -n, --max-num-results=[N]  limit the number of results to N"
18*ab8db090SAndroid Build Coastguard Worker       " (default: 10)\n"
19*ab8db090SAndroid Build Coastguard Worker       "                             0: no limit\n"
20*ab8db090SAndroid Build Coastguard Worker       "  -m, --mmap-dictionary  use memory-mapped I/O to load a dictionary"
21*ab8db090SAndroid Build Coastguard Worker       " (default)\n"
22*ab8db090SAndroid Build Coastguard Worker       "  -r, --read-dictionary  read an entire dictionary into memory\n"
23*ab8db090SAndroid Build Coastguard Worker       "  -h, --help             print this help\n"
24*ab8db090SAndroid Build Coastguard Worker       << std::endl;
25*ab8db090SAndroid Build Coastguard Worker }
26*ab8db090SAndroid Build Coastguard Worker 
common_prefix_search(const char * const * args,std::size_t num_args)27*ab8db090SAndroid Build Coastguard Worker int common_prefix_search(const char * const *args, std::size_t num_args) {
28*ab8db090SAndroid Build Coastguard Worker   if (num_args == 0) {
29*ab8db090SAndroid Build Coastguard Worker     std::cerr << "error: dictionary is not specified" << std::endl;
30*ab8db090SAndroid Build Coastguard Worker     return 10;
31*ab8db090SAndroid Build Coastguard Worker   } else if (num_args > 1) {
32*ab8db090SAndroid Build Coastguard Worker     std::cerr << "error: more than one dictionaries are specified"
33*ab8db090SAndroid Build Coastguard Worker         << std::endl;
34*ab8db090SAndroid Build Coastguard Worker     return 11;
35*ab8db090SAndroid Build Coastguard Worker   }
36*ab8db090SAndroid Build Coastguard Worker 
37*ab8db090SAndroid Build Coastguard Worker   marisa::Trie trie;
38*ab8db090SAndroid Build Coastguard Worker   if (mmap_flag) {
39*ab8db090SAndroid Build Coastguard Worker     try {
40*ab8db090SAndroid Build Coastguard Worker       trie.mmap(args[0]);
41*ab8db090SAndroid Build Coastguard Worker     } catch (const marisa::Exception &ex) {
42*ab8db090SAndroid Build Coastguard Worker       std::cerr << ex.what() << ": failed to mmap a dictionary file: "
43*ab8db090SAndroid Build Coastguard Worker           << args[0] << std::endl;
44*ab8db090SAndroid Build Coastguard Worker       return 20;
45*ab8db090SAndroid Build Coastguard Worker     }
46*ab8db090SAndroid Build Coastguard Worker   } else {
47*ab8db090SAndroid Build Coastguard Worker     try {
48*ab8db090SAndroid Build Coastguard Worker       trie.load(args[0]);
49*ab8db090SAndroid Build Coastguard Worker     } catch (const marisa::Exception &ex) {
50*ab8db090SAndroid Build Coastguard Worker       std::cerr << ex.what() << ": failed to load a dictionary file: "
51*ab8db090SAndroid Build Coastguard Worker           << args[0] << std::endl;
52*ab8db090SAndroid Build Coastguard Worker       return 21;
53*ab8db090SAndroid Build Coastguard Worker     }
54*ab8db090SAndroid Build Coastguard Worker   }
55*ab8db090SAndroid Build Coastguard Worker 
56*ab8db090SAndroid Build Coastguard Worker   marisa::Agent agent;
57*ab8db090SAndroid Build Coastguard Worker   marisa::Keyset keyset;
58*ab8db090SAndroid Build Coastguard Worker   std::string str;
59*ab8db090SAndroid Build Coastguard Worker   while (std::getline(std::cin, str)) {
60*ab8db090SAndroid Build Coastguard Worker     try {
61*ab8db090SAndroid Build Coastguard Worker       agent.set_query(str.c_str(), str.length());
62*ab8db090SAndroid Build Coastguard Worker       while (trie.common_prefix_search(agent)) {
63*ab8db090SAndroid Build Coastguard Worker         keyset.push_back(agent.key());
64*ab8db090SAndroid Build Coastguard Worker       }
65*ab8db090SAndroid Build Coastguard Worker       if (keyset.empty()) {
66*ab8db090SAndroid Build Coastguard Worker         std::cout << "not found" << std::endl;
67*ab8db090SAndroid Build Coastguard Worker       } else {
68*ab8db090SAndroid Build Coastguard Worker         std::cout << keyset.size() << " found" << std::endl;
69*ab8db090SAndroid Build Coastguard Worker         const std::size_t end = std::min(max_num_results, keyset.size());
70*ab8db090SAndroid Build Coastguard Worker         for (std::size_t i = 0; i < end; ++i) {
71*ab8db090SAndroid Build Coastguard Worker           std::cout << keyset[i].id() << '\t';
72*ab8db090SAndroid Build Coastguard Worker           std::cout.write(keyset[i].ptr(),
73*ab8db090SAndroid Build Coastguard Worker               static_cast<std::streamsize>(keyset[i].length())) << '\t';
74*ab8db090SAndroid Build Coastguard Worker           std::cout << str << '\n';
75*ab8db090SAndroid Build Coastguard Worker         }
76*ab8db090SAndroid Build Coastguard Worker       }
77*ab8db090SAndroid Build Coastguard Worker       keyset.reset();
78*ab8db090SAndroid Build Coastguard Worker     } catch (const marisa::Exception &ex) {
79*ab8db090SAndroid Build Coastguard Worker       std::cerr << ex.what() << ": common_prefix_search() failed: "
80*ab8db090SAndroid Build Coastguard Worker           << str << std::endl;
81*ab8db090SAndroid Build Coastguard Worker       return 30;
82*ab8db090SAndroid Build Coastguard Worker     }
83*ab8db090SAndroid Build Coastguard Worker 
84*ab8db090SAndroid Build Coastguard Worker     if (!std::cout) {
85*ab8db090SAndroid Build Coastguard Worker       std::cerr << "error: failed to write results to standard output"
86*ab8db090SAndroid Build Coastguard Worker           << std::endl;
87*ab8db090SAndroid Build Coastguard Worker       return 31;
88*ab8db090SAndroid Build Coastguard Worker     }
89*ab8db090SAndroid Build Coastguard Worker   }
90*ab8db090SAndroid Build Coastguard Worker 
91*ab8db090SAndroid Build Coastguard Worker   return 0;
92*ab8db090SAndroid Build Coastguard Worker }
93*ab8db090SAndroid Build Coastguard Worker 
94*ab8db090SAndroid Build Coastguard Worker }  // namespace
95*ab8db090SAndroid Build Coastguard Worker 
main(int argc,char * argv[])96*ab8db090SAndroid Build Coastguard Worker int main(int argc, char *argv[]) {
97*ab8db090SAndroid Build Coastguard Worker   std::ios::sync_with_stdio(false);
98*ab8db090SAndroid Build Coastguard Worker 
99*ab8db090SAndroid Build Coastguard Worker   ::cmdopt_option long_options[] = {
100*ab8db090SAndroid Build Coastguard Worker     { "max-num-results", 1, NULL, 'n' },
101*ab8db090SAndroid Build Coastguard Worker     { "mmap-dictionary", 0, NULL, 'm' },
102*ab8db090SAndroid Build Coastguard Worker     { "read-dictionary", 0, NULL, 'r' },
103*ab8db090SAndroid Build Coastguard Worker     { "help", 0, NULL, 'h' },
104*ab8db090SAndroid Build Coastguard Worker     { NULL, 0, NULL, 0 }
105*ab8db090SAndroid Build Coastguard Worker   };
106*ab8db090SAndroid Build Coastguard Worker   ::cmdopt_t cmdopt;
107*ab8db090SAndroid Build Coastguard Worker   ::cmdopt_init(&cmdopt, argc, argv, "n:mrh", long_options);
108*ab8db090SAndroid Build Coastguard Worker   int label;
109*ab8db090SAndroid Build Coastguard Worker   while ((label = ::cmdopt_get(&cmdopt)) != -1) {
110*ab8db090SAndroid Build Coastguard Worker     switch (label) {
111*ab8db090SAndroid Build Coastguard Worker       case 'n': {
112*ab8db090SAndroid Build Coastguard Worker         char *end_of_value;
113*ab8db090SAndroid Build Coastguard Worker         const long value = std::strtol(cmdopt.optarg, &end_of_value, 10);
114*ab8db090SAndroid Build Coastguard Worker         if ((*end_of_value != '\0') || (value < 0)) {
115*ab8db090SAndroid Build Coastguard Worker           std::cerr << "error: option `-n' with an invalid argument: "
116*ab8db090SAndroid Build Coastguard Worker               << cmdopt.optarg << std::endl;
117*ab8db090SAndroid Build Coastguard Worker         }
118*ab8db090SAndroid Build Coastguard Worker         if ((value == 0) || ((unsigned long long)value > MARISA_SIZE_MAX)) {
119*ab8db090SAndroid Build Coastguard Worker           max_num_results = MARISA_SIZE_MAX;
120*ab8db090SAndroid Build Coastguard Worker         } else {
121*ab8db090SAndroid Build Coastguard Worker           max_num_results = (std::size_t)value;
122*ab8db090SAndroid Build Coastguard Worker         }
123*ab8db090SAndroid Build Coastguard Worker         break;
124*ab8db090SAndroid Build Coastguard Worker       }
125*ab8db090SAndroid Build Coastguard Worker       case 'm': {
126*ab8db090SAndroid Build Coastguard Worker         mmap_flag = true;
127*ab8db090SAndroid Build Coastguard Worker         break;
128*ab8db090SAndroid Build Coastguard Worker       }
129*ab8db090SAndroid Build Coastguard Worker       case 'r': {
130*ab8db090SAndroid Build Coastguard Worker         mmap_flag = false;
131*ab8db090SAndroid Build Coastguard Worker         break;
132*ab8db090SAndroid Build Coastguard Worker       }
133*ab8db090SAndroid Build Coastguard Worker       case 'h': {
134*ab8db090SAndroid Build Coastguard Worker         print_help(argv[0]);
135*ab8db090SAndroid Build Coastguard Worker         return 0;
136*ab8db090SAndroid Build Coastguard Worker       }
137*ab8db090SAndroid Build Coastguard Worker       default: {
138*ab8db090SAndroid Build Coastguard Worker         return 1;
139*ab8db090SAndroid Build Coastguard Worker       }
140*ab8db090SAndroid Build Coastguard Worker     }
141*ab8db090SAndroid Build Coastguard Worker   }
142*ab8db090SAndroid Build Coastguard Worker   return common_prefix_search(cmdopt.argv + cmdopt.optind,
143*ab8db090SAndroid Build Coastguard Worker       static_cast<std::size_t>(cmdopt.argc - cmdopt.optind));
144*ab8db090SAndroid Build Coastguard Worker }
145