xref: /aosp_15_r20/external/marisa-trie/tools/marisa-dump.cc (revision ab8db090fce404b23716c4c9194221ee27efe31c)
1*ab8db090SAndroid Build Coastguard Worker #ifdef _WIN32
2*ab8db090SAndroid Build Coastguard Worker  #include <fcntl.h>
3*ab8db090SAndroid Build Coastguard Worker  #include <io.h>
4*ab8db090SAndroid Build Coastguard Worker  #include <stdio.h>
5*ab8db090SAndroid Build Coastguard Worker #endif  // _WIN32
6*ab8db090SAndroid Build Coastguard Worker 
7*ab8db090SAndroid Build Coastguard Worker #include <cstdlib>
8*ab8db090SAndroid Build Coastguard Worker #include <iostream>
9*ab8db090SAndroid Build Coastguard Worker #include <string>
10*ab8db090SAndroid Build Coastguard Worker 
11*ab8db090SAndroid Build Coastguard Worker #include <marisa.h>
12*ab8db090SAndroid Build Coastguard Worker 
13*ab8db090SAndroid Build Coastguard Worker #include "cmdopt.h"
14*ab8db090SAndroid Build Coastguard Worker 
15*ab8db090SAndroid Build Coastguard Worker namespace {
16*ab8db090SAndroid Build Coastguard Worker 
17*ab8db090SAndroid Build Coastguard Worker const char *delimiter = "\n";
18*ab8db090SAndroid Build Coastguard Worker bool mmap_flag = true;
19*ab8db090SAndroid Build Coastguard Worker 
print_help(const char * cmd)20*ab8db090SAndroid Build Coastguard Worker void print_help(const char *cmd) {
21*ab8db090SAndroid Build Coastguard Worker   std::cerr << "Usage: " << cmd << " [OPTION]... DIC...\n\n"
22*ab8db090SAndroid Build Coastguard Worker       "Options:\n"
23*ab8db090SAndroid Build Coastguard Worker       "  -d, --delimiter=[S]    specify the delimier (default: \"\\n\")\n"
24*ab8db090SAndroid Build Coastguard Worker       "  -m, --mmap-dictionary  use memory-mapped I/O to load a dictionary"
25*ab8db090SAndroid Build Coastguard Worker       " (default)\n"
26*ab8db090SAndroid Build Coastguard Worker       "  -r, --read-dictionary  read an entire dictionary into memory\n"
27*ab8db090SAndroid Build Coastguard Worker       "  -h, --help             print this help\n"
28*ab8db090SAndroid Build Coastguard Worker       << std::endl;
29*ab8db090SAndroid Build Coastguard Worker }
30*ab8db090SAndroid Build Coastguard Worker 
dump(const marisa::Trie & trie)31*ab8db090SAndroid Build Coastguard Worker int dump(const marisa::Trie &trie) {
32*ab8db090SAndroid Build Coastguard Worker   std::size_t num_keys = 0;
33*ab8db090SAndroid Build Coastguard Worker   marisa::Agent agent;
34*ab8db090SAndroid Build Coastguard Worker   agent.set_query("");
35*ab8db090SAndroid Build Coastguard Worker   try {
36*ab8db090SAndroid Build Coastguard Worker     while (trie.predictive_search(agent)) {
37*ab8db090SAndroid Build Coastguard Worker       std::cout.write(agent.key().ptr(),
38*ab8db090SAndroid Build Coastguard Worker           static_cast<std::streamsize>(agent.key().length())) << delimiter;
39*ab8db090SAndroid Build Coastguard Worker       if (!std::cout) {
40*ab8db090SAndroid Build Coastguard Worker         std::cerr << "error: failed to write results to standard output"
41*ab8db090SAndroid Build Coastguard Worker             << std::endl;
42*ab8db090SAndroid Build Coastguard Worker         return 20;
43*ab8db090SAndroid Build Coastguard Worker       }
44*ab8db090SAndroid Build Coastguard Worker       ++num_keys;
45*ab8db090SAndroid Build Coastguard Worker     }
46*ab8db090SAndroid Build Coastguard Worker   } catch (const marisa::Exception &ex) {
47*ab8db090SAndroid Build Coastguard Worker     std::cerr << ex.what() << ": predictive_search() failed" << std::endl;
48*ab8db090SAndroid Build Coastguard Worker     return 21;
49*ab8db090SAndroid Build Coastguard Worker   }
50*ab8db090SAndroid Build Coastguard Worker   std::cerr << "#keys: " << num_keys << std::endl;
51*ab8db090SAndroid Build Coastguard Worker   return 0;
52*ab8db090SAndroid Build Coastguard Worker }
53*ab8db090SAndroid Build Coastguard Worker 
dump(const char * filename)54*ab8db090SAndroid Build Coastguard Worker int dump(const char *filename) {
55*ab8db090SAndroid Build Coastguard Worker   marisa::Trie trie;
56*ab8db090SAndroid Build Coastguard Worker   if (filename != NULL) {
57*ab8db090SAndroid Build Coastguard Worker     std::cerr << "input: " << filename << std::endl;
58*ab8db090SAndroid Build Coastguard Worker     if (mmap_flag) {
59*ab8db090SAndroid Build Coastguard Worker       try {
60*ab8db090SAndroid Build Coastguard Worker         trie.mmap(filename);
61*ab8db090SAndroid Build Coastguard Worker       } catch (const marisa::Exception &ex) {
62*ab8db090SAndroid Build Coastguard Worker         std::cerr << ex.what() << ": failed to mmap a dictionary file: "
63*ab8db090SAndroid Build Coastguard Worker             << filename << std::endl;
64*ab8db090SAndroid Build Coastguard Worker         return 10;
65*ab8db090SAndroid Build Coastguard Worker       }
66*ab8db090SAndroid Build Coastguard Worker     } else {
67*ab8db090SAndroid Build Coastguard Worker       try {
68*ab8db090SAndroid Build Coastguard Worker         trie.load(filename);
69*ab8db090SAndroid Build Coastguard Worker       } catch (const marisa::Exception &ex) {
70*ab8db090SAndroid Build Coastguard Worker         std::cerr << ex.what() << ": failed to load a dictionary file: "
71*ab8db090SAndroid Build Coastguard Worker             << filename << std::endl;
72*ab8db090SAndroid Build Coastguard Worker         return 11;
73*ab8db090SAndroid Build Coastguard Worker       }
74*ab8db090SAndroid Build Coastguard Worker     }
75*ab8db090SAndroid Build Coastguard Worker   } else {
76*ab8db090SAndroid Build Coastguard Worker     std::cerr << "input: <stdin>" << std::endl;
77*ab8db090SAndroid Build Coastguard Worker #ifdef _WIN32
78*ab8db090SAndroid Build Coastguard Worker     const int stdin_fileno = ::_fileno(stdin);
79*ab8db090SAndroid Build Coastguard Worker     if (stdin_fileno < 0) {
80*ab8db090SAndroid Build Coastguard Worker       std::cerr << "error: failed to get the file descriptor of "
81*ab8db090SAndroid Build Coastguard Worker           "standard input" << std::endl;
82*ab8db090SAndroid Build Coastguard Worker       return 20;
83*ab8db090SAndroid Build Coastguard Worker     }
84*ab8db090SAndroid Build Coastguard Worker     if (::_setmode(stdin_fileno, _O_BINARY) == -1) {
85*ab8db090SAndroid Build Coastguard Worker       std::cerr << "error: failed to set binary mode" << std::endl;
86*ab8db090SAndroid Build Coastguard Worker       return 21;
87*ab8db090SAndroid Build Coastguard Worker     }
88*ab8db090SAndroid Build Coastguard Worker #endif  // _WIN32
89*ab8db090SAndroid Build Coastguard Worker     try {
90*ab8db090SAndroid Build Coastguard Worker       std::cin >> trie;
91*ab8db090SAndroid Build Coastguard Worker     } catch (const marisa::Exception &ex) {
92*ab8db090SAndroid Build Coastguard Worker       std::cerr << ex.what()
93*ab8db090SAndroid Build Coastguard Worker           << ": failed to read a dictionary from standard input" << std::endl;
94*ab8db090SAndroid Build Coastguard Worker       return 22;
95*ab8db090SAndroid Build Coastguard Worker     }
96*ab8db090SAndroid Build Coastguard Worker   }
97*ab8db090SAndroid Build Coastguard Worker   return dump(trie);
98*ab8db090SAndroid Build Coastguard Worker }
99*ab8db090SAndroid Build Coastguard Worker 
dump(const char * const * args,std::size_t num_args)100*ab8db090SAndroid Build Coastguard Worker int dump(const char * const *args, std::size_t num_args) {
101*ab8db090SAndroid Build Coastguard Worker   if (num_args == 0) {
102*ab8db090SAndroid Build Coastguard Worker     return dump(NULL);
103*ab8db090SAndroid Build Coastguard Worker   }
104*ab8db090SAndroid Build Coastguard Worker   for (std::size_t i = 0; i < num_args; ++i) {
105*ab8db090SAndroid Build Coastguard Worker     const int result = dump(args[i]);
106*ab8db090SAndroid Build Coastguard Worker     if (result != 0) {
107*ab8db090SAndroid Build Coastguard Worker       return result;
108*ab8db090SAndroid Build Coastguard Worker     }
109*ab8db090SAndroid Build Coastguard Worker   }
110*ab8db090SAndroid Build Coastguard Worker   return 0;
111*ab8db090SAndroid Build Coastguard Worker }
112*ab8db090SAndroid Build Coastguard Worker 
113*ab8db090SAndroid Build Coastguard Worker }  // namespace
114*ab8db090SAndroid Build Coastguard Worker 
main(int argc,char * argv[])115*ab8db090SAndroid Build Coastguard Worker int main(int argc, char *argv[]) {
116*ab8db090SAndroid Build Coastguard Worker   std::ios::sync_with_stdio(false);
117*ab8db090SAndroid Build Coastguard Worker 
118*ab8db090SAndroid Build Coastguard Worker   ::cmdopt_option long_options[] = {
119*ab8db090SAndroid Build Coastguard Worker     { "delimiter", 1, NULL, 'd' },
120*ab8db090SAndroid Build Coastguard Worker     { "mmap-dictionary", 0, NULL, 'm' },
121*ab8db090SAndroid Build Coastguard Worker     { "read-dictionary", 0, NULL, 'r' },
122*ab8db090SAndroid Build Coastguard Worker     { "help", 0, NULL, 'h' },
123*ab8db090SAndroid Build Coastguard Worker     { NULL, 0, NULL, 0 }
124*ab8db090SAndroid Build Coastguard Worker   };
125*ab8db090SAndroid Build Coastguard Worker   ::cmdopt_t cmdopt;
126*ab8db090SAndroid Build Coastguard Worker   ::cmdopt_init(&cmdopt, argc, argv, "d:mrh", long_options);
127*ab8db090SAndroid Build Coastguard Worker   int label;
128*ab8db090SAndroid Build Coastguard Worker   while ((label = ::cmdopt_get(&cmdopt)) != -1) {
129*ab8db090SAndroid Build Coastguard Worker     switch (label) {
130*ab8db090SAndroid Build Coastguard Worker       case 'd': {
131*ab8db090SAndroid Build Coastguard Worker         delimiter = cmdopt.optarg;
132*ab8db090SAndroid Build Coastguard Worker         break;
133*ab8db090SAndroid Build Coastguard Worker       }
134*ab8db090SAndroid Build Coastguard Worker       case 'm': {
135*ab8db090SAndroid Build Coastguard Worker         mmap_flag = true;
136*ab8db090SAndroid Build Coastguard Worker         break;
137*ab8db090SAndroid Build Coastguard Worker       }
138*ab8db090SAndroid Build Coastguard Worker       case 'r': {
139*ab8db090SAndroid Build Coastguard Worker         mmap_flag = false;
140*ab8db090SAndroid Build Coastguard Worker         break;
141*ab8db090SAndroid Build Coastguard Worker       }
142*ab8db090SAndroid Build Coastguard Worker       case 'h': {
143*ab8db090SAndroid Build Coastguard Worker         print_help(argv[0]);
144*ab8db090SAndroid Build Coastguard Worker         return 0;
145*ab8db090SAndroid Build Coastguard Worker       }
146*ab8db090SAndroid Build Coastguard Worker       default: {
147*ab8db090SAndroid Build Coastguard Worker         return 1;
148*ab8db090SAndroid Build Coastguard Worker       }
149*ab8db090SAndroid Build Coastguard Worker     }
150*ab8db090SAndroid Build Coastguard Worker   }
151*ab8db090SAndroid Build Coastguard Worker   return dump(cmdopt.argv + cmdopt.optind,
152*ab8db090SAndroid Build Coastguard Worker       static_cast<std::size_t>(cmdopt.argc - cmdopt.optind));
153*ab8db090SAndroid Build Coastguard Worker }
154