1*ab8db090SAndroid Build Coastguard Worker #ifdef _WIN32
2*ab8db090SAndroid Build Coastguard Worker #include <fcntl.h>
3*ab8db090SAndroid Build Coastguard Worker #include <io.h>
4*ab8db090SAndroid Build Coastguard Worker #include <stdio.h>
5*ab8db090SAndroid Build Coastguard Worker #endif // _WIN32
6*ab8db090SAndroid Build Coastguard Worker
7*ab8db090SAndroid Build Coastguard Worker #include <cstdlib>
8*ab8db090SAndroid Build Coastguard Worker #include <iostream>
9*ab8db090SAndroid Build Coastguard Worker #include <string>
10*ab8db090SAndroid Build Coastguard Worker
11*ab8db090SAndroid Build Coastguard Worker #include <marisa.h>
12*ab8db090SAndroid Build Coastguard Worker
13*ab8db090SAndroid Build Coastguard Worker #include "cmdopt.h"
14*ab8db090SAndroid Build Coastguard Worker
15*ab8db090SAndroid Build Coastguard Worker namespace {
16*ab8db090SAndroid Build Coastguard Worker
17*ab8db090SAndroid Build Coastguard Worker const char *delimiter = "\n";
18*ab8db090SAndroid Build Coastguard Worker bool mmap_flag = true;
19*ab8db090SAndroid Build Coastguard Worker
print_help(const char * cmd)20*ab8db090SAndroid Build Coastguard Worker void print_help(const char *cmd) {
21*ab8db090SAndroid Build Coastguard Worker std::cerr << "Usage: " << cmd << " [OPTION]... DIC...\n\n"
22*ab8db090SAndroid Build Coastguard Worker "Options:\n"
23*ab8db090SAndroid Build Coastguard Worker " -d, --delimiter=[S] specify the delimier (default: \"\\n\")\n"
24*ab8db090SAndroid Build Coastguard Worker " -m, --mmap-dictionary use memory-mapped I/O to load a dictionary"
25*ab8db090SAndroid Build Coastguard Worker " (default)\n"
26*ab8db090SAndroid Build Coastguard Worker " -r, --read-dictionary read an entire dictionary into memory\n"
27*ab8db090SAndroid Build Coastguard Worker " -h, --help print this help\n"
28*ab8db090SAndroid Build Coastguard Worker << std::endl;
29*ab8db090SAndroid Build Coastguard Worker }
30*ab8db090SAndroid Build Coastguard Worker
dump(const marisa::Trie & trie)31*ab8db090SAndroid Build Coastguard Worker int dump(const marisa::Trie &trie) {
32*ab8db090SAndroid Build Coastguard Worker std::size_t num_keys = 0;
33*ab8db090SAndroid Build Coastguard Worker marisa::Agent agent;
34*ab8db090SAndroid Build Coastguard Worker agent.set_query("");
35*ab8db090SAndroid Build Coastguard Worker try {
36*ab8db090SAndroid Build Coastguard Worker while (trie.predictive_search(agent)) {
37*ab8db090SAndroid Build Coastguard Worker std::cout.write(agent.key().ptr(),
38*ab8db090SAndroid Build Coastguard Worker static_cast<std::streamsize>(agent.key().length())) << delimiter;
39*ab8db090SAndroid Build Coastguard Worker if (!std::cout) {
40*ab8db090SAndroid Build Coastguard Worker std::cerr << "error: failed to write results to standard output"
41*ab8db090SAndroid Build Coastguard Worker << std::endl;
42*ab8db090SAndroid Build Coastguard Worker return 20;
43*ab8db090SAndroid Build Coastguard Worker }
44*ab8db090SAndroid Build Coastguard Worker ++num_keys;
45*ab8db090SAndroid Build Coastguard Worker }
46*ab8db090SAndroid Build Coastguard Worker } catch (const marisa::Exception &ex) {
47*ab8db090SAndroid Build Coastguard Worker std::cerr << ex.what() << ": predictive_search() failed" << std::endl;
48*ab8db090SAndroid Build Coastguard Worker return 21;
49*ab8db090SAndroid Build Coastguard Worker }
50*ab8db090SAndroid Build Coastguard Worker std::cerr << "#keys: " << num_keys << std::endl;
51*ab8db090SAndroid Build Coastguard Worker return 0;
52*ab8db090SAndroid Build Coastguard Worker }
53*ab8db090SAndroid Build Coastguard Worker
dump(const char * filename)54*ab8db090SAndroid Build Coastguard Worker int dump(const char *filename) {
55*ab8db090SAndroid Build Coastguard Worker marisa::Trie trie;
56*ab8db090SAndroid Build Coastguard Worker if (filename != NULL) {
57*ab8db090SAndroid Build Coastguard Worker std::cerr << "input: " << filename << std::endl;
58*ab8db090SAndroid Build Coastguard Worker if (mmap_flag) {
59*ab8db090SAndroid Build Coastguard Worker try {
60*ab8db090SAndroid Build Coastguard Worker trie.mmap(filename);
61*ab8db090SAndroid Build Coastguard Worker } catch (const marisa::Exception &ex) {
62*ab8db090SAndroid Build Coastguard Worker std::cerr << ex.what() << ": failed to mmap a dictionary file: "
63*ab8db090SAndroid Build Coastguard Worker << filename << std::endl;
64*ab8db090SAndroid Build Coastguard Worker return 10;
65*ab8db090SAndroid Build Coastguard Worker }
66*ab8db090SAndroid Build Coastguard Worker } else {
67*ab8db090SAndroid Build Coastguard Worker try {
68*ab8db090SAndroid Build Coastguard Worker trie.load(filename);
69*ab8db090SAndroid Build Coastguard Worker } catch (const marisa::Exception &ex) {
70*ab8db090SAndroid Build Coastguard Worker std::cerr << ex.what() << ": failed to load a dictionary file: "
71*ab8db090SAndroid Build Coastguard Worker << filename << std::endl;
72*ab8db090SAndroid Build Coastguard Worker return 11;
73*ab8db090SAndroid Build Coastguard Worker }
74*ab8db090SAndroid Build Coastguard Worker }
75*ab8db090SAndroid Build Coastguard Worker } else {
76*ab8db090SAndroid Build Coastguard Worker std::cerr << "input: <stdin>" << std::endl;
77*ab8db090SAndroid Build Coastguard Worker #ifdef _WIN32
78*ab8db090SAndroid Build Coastguard Worker const int stdin_fileno = ::_fileno(stdin);
79*ab8db090SAndroid Build Coastguard Worker if (stdin_fileno < 0) {
80*ab8db090SAndroid Build Coastguard Worker std::cerr << "error: failed to get the file descriptor of "
81*ab8db090SAndroid Build Coastguard Worker "standard input" << std::endl;
82*ab8db090SAndroid Build Coastguard Worker return 20;
83*ab8db090SAndroid Build Coastguard Worker }
84*ab8db090SAndroid Build Coastguard Worker if (::_setmode(stdin_fileno, _O_BINARY) == -1) {
85*ab8db090SAndroid Build Coastguard Worker std::cerr << "error: failed to set binary mode" << std::endl;
86*ab8db090SAndroid Build Coastguard Worker return 21;
87*ab8db090SAndroid Build Coastguard Worker }
88*ab8db090SAndroid Build Coastguard Worker #endif // _WIN32
89*ab8db090SAndroid Build Coastguard Worker try {
90*ab8db090SAndroid Build Coastguard Worker std::cin >> trie;
91*ab8db090SAndroid Build Coastguard Worker } catch (const marisa::Exception &ex) {
92*ab8db090SAndroid Build Coastguard Worker std::cerr << ex.what()
93*ab8db090SAndroid Build Coastguard Worker << ": failed to read a dictionary from standard input" << std::endl;
94*ab8db090SAndroid Build Coastguard Worker return 22;
95*ab8db090SAndroid Build Coastguard Worker }
96*ab8db090SAndroid Build Coastguard Worker }
97*ab8db090SAndroid Build Coastguard Worker return dump(trie);
98*ab8db090SAndroid Build Coastguard Worker }
99*ab8db090SAndroid Build Coastguard Worker
dump(const char * const * args,std::size_t num_args)100*ab8db090SAndroid Build Coastguard Worker int dump(const char * const *args, std::size_t num_args) {
101*ab8db090SAndroid Build Coastguard Worker if (num_args == 0) {
102*ab8db090SAndroid Build Coastguard Worker return dump(NULL);
103*ab8db090SAndroid Build Coastguard Worker }
104*ab8db090SAndroid Build Coastguard Worker for (std::size_t i = 0; i < num_args; ++i) {
105*ab8db090SAndroid Build Coastguard Worker const int result = dump(args[i]);
106*ab8db090SAndroid Build Coastguard Worker if (result != 0) {
107*ab8db090SAndroid Build Coastguard Worker return result;
108*ab8db090SAndroid Build Coastguard Worker }
109*ab8db090SAndroid Build Coastguard Worker }
110*ab8db090SAndroid Build Coastguard Worker return 0;
111*ab8db090SAndroid Build Coastguard Worker }
112*ab8db090SAndroid Build Coastguard Worker
113*ab8db090SAndroid Build Coastguard Worker } // namespace
114*ab8db090SAndroid Build Coastguard Worker
main(int argc,char * argv[])115*ab8db090SAndroid Build Coastguard Worker int main(int argc, char *argv[]) {
116*ab8db090SAndroid Build Coastguard Worker std::ios::sync_with_stdio(false);
117*ab8db090SAndroid Build Coastguard Worker
118*ab8db090SAndroid Build Coastguard Worker ::cmdopt_option long_options[] = {
119*ab8db090SAndroid Build Coastguard Worker { "delimiter", 1, NULL, 'd' },
120*ab8db090SAndroid Build Coastguard Worker { "mmap-dictionary", 0, NULL, 'm' },
121*ab8db090SAndroid Build Coastguard Worker { "read-dictionary", 0, NULL, 'r' },
122*ab8db090SAndroid Build Coastguard Worker { "help", 0, NULL, 'h' },
123*ab8db090SAndroid Build Coastguard Worker { NULL, 0, NULL, 0 }
124*ab8db090SAndroid Build Coastguard Worker };
125*ab8db090SAndroid Build Coastguard Worker ::cmdopt_t cmdopt;
126*ab8db090SAndroid Build Coastguard Worker ::cmdopt_init(&cmdopt, argc, argv, "d:mrh", long_options);
127*ab8db090SAndroid Build Coastguard Worker int label;
128*ab8db090SAndroid Build Coastguard Worker while ((label = ::cmdopt_get(&cmdopt)) != -1) {
129*ab8db090SAndroid Build Coastguard Worker switch (label) {
130*ab8db090SAndroid Build Coastguard Worker case 'd': {
131*ab8db090SAndroid Build Coastguard Worker delimiter = cmdopt.optarg;
132*ab8db090SAndroid Build Coastguard Worker break;
133*ab8db090SAndroid Build Coastguard Worker }
134*ab8db090SAndroid Build Coastguard Worker case 'm': {
135*ab8db090SAndroid Build Coastguard Worker mmap_flag = true;
136*ab8db090SAndroid Build Coastguard Worker break;
137*ab8db090SAndroid Build Coastguard Worker }
138*ab8db090SAndroid Build Coastguard Worker case 'r': {
139*ab8db090SAndroid Build Coastguard Worker mmap_flag = false;
140*ab8db090SAndroid Build Coastguard Worker break;
141*ab8db090SAndroid Build Coastguard Worker }
142*ab8db090SAndroid Build Coastguard Worker case 'h': {
143*ab8db090SAndroid Build Coastguard Worker print_help(argv[0]);
144*ab8db090SAndroid Build Coastguard Worker return 0;
145*ab8db090SAndroid Build Coastguard Worker }
146*ab8db090SAndroid Build Coastguard Worker default: {
147*ab8db090SAndroid Build Coastguard Worker return 1;
148*ab8db090SAndroid Build Coastguard Worker }
149*ab8db090SAndroid Build Coastguard Worker }
150*ab8db090SAndroid Build Coastguard Worker }
151*ab8db090SAndroid Build Coastguard Worker return dump(cmdopt.argv + cmdopt.optind,
152*ab8db090SAndroid Build Coastguard Worker static_cast<std::size_t>(cmdopt.argc - cmdopt.optind));
153*ab8db090SAndroid Build Coastguard Worker }
154