1*ab8db090SAndroid Build Coastguard Worker #include <cstdlib>
2*ab8db090SAndroid Build Coastguard Worker #include <iostream>
3*ab8db090SAndroid Build Coastguard Worker #include <string>
4*ab8db090SAndroid Build Coastguard Worker
5*ab8db090SAndroid Build Coastguard Worker #include <marisa.h>
6*ab8db090SAndroid Build Coastguard Worker
7*ab8db090SAndroid Build Coastguard Worker #include "cmdopt.h"
8*ab8db090SAndroid Build Coastguard Worker
9*ab8db090SAndroid Build Coastguard Worker namespace {
10*ab8db090SAndroid Build Coastguard Worker
11*ab8db090SAndroid Build Coastguard Worker std::size_t max_num_results = 10;
12*ab8db090SAndroid Build Coastguard Worker bool mmap_flag = true;
13*ab8db090SAndroid Build Coastguard Worker
print_help(const char * cmd)14*ab8db090SAndroid Build Coastguard Worker void print_help(const char *cmd) {
15*ab8db090SAndroid Build Coastguard Worker std::cerr << "Usage: " << cmd << " [OPTION]... DIC\n\n"
16*ab8db090SAndroid Build Coastguard Worker "Options:\n"
17*ab8db090SAndroid Build Coastguard Worker " -n, --max-num-results=[N] limit the number of results to N"
18*ab8db090SAndroid Build Coastguard Worker " (default: 10)\n"
19*ab8db090SAndroid Build Coastguard Worker " 0: no limit\n"
20*ab8db090SAndroid Build Coastguard Worker " -m, --mmap-dictionary use memory-mapped I/O to load a dictionary"
21*ab8db090SAndroid Build Coastguard Worker " (default)\n"
22*ab8db090SAndroid Build Coastguard Worker " -r, --read-dictionary read an entire dictionary into memory\n"
23*ab8db090SAndroid Build Coastguard Worker " -h, --help print this help\n"
24*ab8db090SAndroid Build Coastguard Worker << std::endl;
25*ab8db090SAndroid Build Coastguard Worker }
26*ab8db090SAndroid Build Coastguard Worker
common_prefix_search(const char * const * args,std::size_t num_args)27*ab8db090SAndroid Build Coastguard Worker int common_prefix_search(const char * const *args, std::size_t num_args) {
28*ab8db090SAndroid Build Coastguard Worker if (num_args == 0) {
29*ab8db090SAndroid Build Coastguard Worker std::cerr << "error: dictionary is not specified" << std::endl;
30*ab8db090SAndroid Build Coastguard Worker return 10;
31*ab8db090SAndroid Build Coastguard Worker } else if (num_args > 1) {
32*ab8db090SAndroid Build Coastguard Worker std::cerr << "error: more than one dictionaries are specified"
33*ab8db090SAndroid Build Coastguard Worker << std::endl;
34*ab8db090SAndroid Build Coastguard Worker return 11;
35*ab8db090SAndroid Build Coastguard Worker }
36*ab8db090SAndroid Build Coastguard Worker
37*ab8db090SAndroid Build Coastguard Worker marisa::Trie trie;
38*ab8db090SAndroid Build Coastguard Worker if (mmap_flag) {
39*ab8db090SAndroid Build Coastguard Worker try {
40*ab8db090SAndroid Build Coastguard Worker trie.mmap(args[0]);
41*ab8db090SAndroid Build Coastguard Worker } catch (const marisa::Exception &ex) {
42*ab8db090SAndroid Build Coastguard Worker std::cerr << ex.what() << ": failed to mmap a dictionary file: "
43*ab8db090SAndroid Build Coastguard Worker << args[0] << std::endl;
44*ab8db090SAndroid Build Coastguard Worker return 20;
45*ab8db090SAndroid Build Coastguard Worker }
46*ab8db090SAndroid Build Coastguard Worker } else {
47*ab8db090SAndroid Build Coastguard Worker try {
48*ab8db090SAndroid Build Coastguard Worker trie.load(args[0]);
49*ab8db090SAndroid Build Coastguard Worker } catch (const marisa::Exception &ex) {
50*ab8db090SAndroid Build Coastguard Worker std::cerr << ex.what() << ": failed to load a dictionary file: "
51*ab8db090SAndroid Build Coastguard Worker << args[0] << std::endl;
52*ab8db090SAndroid Build Coastguard Worker return 21;
53*ab8db090SAndroid Build Coastguard Worker }
54*ab8db090SAndroid Build Coastguard Worker }
55*ab8db090SAndroid Build Coastguard Worker
56*ab8db090SAndroid Build Coastguard Worker marisa::Agent agent;
57*ab8db090SAndroid Build Coastguard Worker marisa::Keyset keyset;
58*ab8db090SAndroid Build Coastguard Worker std::string str;
59*ab8db090SAndroid Build Coastguard Worker while (std::getline(std::cin, str)) {
60*ab8db090SAndroid Build Coastguard Worker try {
61*ab8db090SAndroid Build Coastguard Worker agent.set_query(str.c_str(), str.length());
62*ab8db090SAndroid Build Coastguard Worker while (trie.common_prefix_search(agent)) {
63*ab8db090SAndroid Build Coastguard Worker keyset.push_back(agent.key());
64*ab8db090SAndroid Build Coastguard Worker }
65*ab8db090SAndroid Build Coastguard Worker if (keyset.empty()) {
66*ab8db090SAndroid Build Coastguard Worker std::cout << "not found" << std::endl;
67*ab8db090SAndroid Build Coastguard Worker } else {
68*ab8db090SAndroid Build Coastguard Worker std::cout << keyset.size() << " found" << std::endl;
69*ab8db090SAndroid Build Coastguard Worker const std::size_t end = std::min(max_num_results, keyset.size());
70*ab8db090SAndroid Build Coastguard Worker for (std::size_t i = 0; i < end; ++i) {
71*ab8db090SAndroid Build Coastguard Worker std::cout << keyset[i].id() << '\t';
72*ab8db090SAndroid Build Coastguard Worker std::cout.write(keyset[i].ptr(),
73*ab8db090SAndroid Build Coastguard Worker static_cast<std::streamsize>(keyset[i].length())) << '\t';
74*ab8db090SAndroid Build Coastguard Worker std::cout << str << '\n';
75*ab8db090SAndroid Build Coastguard Worker }
76*ab8db090SAndroid Build Coastguard Worker }
77*ab8db090SAndroid Build Coastguard Worker keyset.reset();
78*ab8db090SAndroid Build Coastguard Worker } catch (const marisa::Exception &ex) {
79*ab8db090SAndroid Build Coastguard Worker std::cerr << ex.what() << ": common_prefix_search() failed: "
80*ab8db090SAndroid Build Coastguard Worker << str << std::endl;
81*ab8db090SAndroid Build Coastguard Worker return 30;
82*ab8db090SAndroid Build Coastguard Worker }
83*ab8db090SAndroid Build Coastguard Worker
84*ab8db090SAndroid Build Coastguard Worker if (!std::cout) {
85*ab8db090SAndroid Build Coastguard Worker std::cerr << "error: failed to write results to standard output"
86*ab8db090SAndroid Build Coastguard Worker << std::endl;
87*ab8db090SAndroid Build Coastguard Worker return 31;
88*ab8db090SAndroid Build Coastguard Worker }
89*ab8db090SAndroid Build Coastguard Worker }
90*ab8db090SAndroid Build Coastguard Worker
91*ab8db090SAndroid Build Coastguard Worker return 0;
92*ab8db090SAndroid Build Coastguard Worker }
93*ab8db090SAndroid Build Coastguard Worker
94*ab8db090SAndroid Build Coastguard Worker } // namespace
95*ab8db090SAndroid Build Coastguard Worker
main(int argc,char * argv[])96*ab8db090SAndroid Build Coastguard Worker int main(int argc, char *argv[]) {
97*ab8db090SAndroid Build Coastguard Worker std::ios::sync_with_stdio(false);
98*ab8db090SAndroid Build Coastguard Worker
99*ab8db090SAndroid Build Coastguard Worker ::cmdopt_option long_options[] = {
100*ab8db090SAndroid Build Coastguard Worker { "max-num-results", 1, NULL, 'n' },
101*ab8db090SAndroid Build Coastguard Worker { "mmap-dictionary", 0, NULL, 'm' },
102*ab8db090SAndroid Build Coastguard Worker { "read-dictionary", 0, NULL, 'r' },
103*ab8db090SAndroid Build Coastguard Worker { "help", 0, NULL, 'h' },
104*ab8db090SAndroid Build Coastguard Worker { NULL, 0, NULL, 0 }
105*ab8db090SAndroid Build Coastguard Worker };
106*ab8db090SAndroid Build Coastguard Worker ::cmdopt_t cmdopt;
107*ab8db090SAndroid Build Coastguard Worker ::cmdopt_init(&cmdopt, argc, argv, "n:mrh", long_options);
108*ab8db090SAndroid Build Coastguard Worker int label;
109*ab8db090SAndroid Build Coastguard Worker while ((label = ::cmdopt_get(&cmdopt)) != -1) {
110*ab8db090SAndroid Build Coastguard Worker switch (label) {
111*ab8db090SAndroid Build Coastguard Worker case 'n': {
112*ab8db090SAndroid Build Coastguard Worker char *end_of_value;
113*ab8db090SAndroid Build Coastguard Worker const long value = std::strtol(cmdopt.optarg, &end_of_value, 10);
114*ab8db090SAndroid Build Coastguard Worker if ((*end_of_value != '\0') || (value < 0)) {
115*ab8db090SAndroid Build Coastguard Worker std::cerr << "error: option `-n' with an invalid argument: "
116*ab8db090SAndroid Build Coastguard Worker << cmdopt.optarg << std::endl;
117*ab8db090SAndroid Build Coastguard Worker }
118*ab8db090SAndroid Build Coastguard Worker if ((value == 0) || ((unsigned long long)value > MARISA_SIZE_MAX)) {
119*ab8db090SAndroid Build Coastguard Worker max_num_results = MARISA_SIZE_MAX;
120*ab8db090SAndroid Build Coastguard Worker } else {
121*ab8db090SAndroid Build Coastguard Worker max_num_results = (std::size_t)value;
122*ab8db090SAndroid Build Coastguard Worker }
123*ab8db090SAndroid Build Coastguard Worker break;
124*ab8db090SAndroid Build Coastguard Worker }
125*ab8db090SAndroid Build Coastguard Worker case 'm': {
126*ab8db090SAndroid Build Coastguard Worker mmap_flag = true;
127*ab8db090SAndroid Build Coastguard Worker break;
128*ab8db090SAndroid Build Coastguard Worker }
129*ab8db090SAndroid Build Coastguard Worker case 'r': {
130*ab8db090SAndroid Build Coastguard Worker mmap_flag = false;
131*ab8db090SAndroid Build Coastguard Worker break;
132*ab8db090SAndroid Build Coastguard Worker }
133*ab8db090SAndroid Build Coastguard Worker case 'h': {
134*ab8db090SAndroid Build Coastguard Worker print_help(argv[0]);
135*ab8db090SAndroid Build Coastguard Worker return 0;
136*ab8db090SAndroid Build Coastguard Worker }
137*ab8db090SAndroid Build Coastguard Worker default: {
138*ab8db090SAndroid Build Coastguard Worker return 1;
139*ab8db090SAndroid Build Coastguard Worker }
140*ab8db090SAndroid Build Coastguard Worker }
141*ab8db090SAndroid Build Coastguard Worker }
142*ab8db090SAndroid Build Coastguard Worker return common_prefix_search(cmdopt.argv + cmdopt.optind,
143*ab8db090SAndroid Build Coastguard Worker static_cast<std::size_t>(cmdopt.argc - cmdopt.optind));
144*ab8db090SAndroid Build Coastguard Worker }
145