xref: /aosp_15_r20/external/toybox/toys/pending/tr.c (revision cf5a6c84e2b8763fc1a7db14496fd4742913b199)
1*cf5a6c84SAndroid Build Coastguard Worker /* tr.c - translate or delete characters
2*cf5a6c84SAndroid Build Coastguard Worker  *
3*cf5a6c84SAndroid Build Coastguard Worker  * Copyright 2014 Sandeep Sharma <[email protected]>
4*cf5a6c84SAndroid Build Coastguard Worker  *
5*cf5a6c84SAndroid Build Coastguard Worker  * See http://pubs.opengroup.org/onlinepubs/9699919799/utilities/tr.html
6*cf5a6c84SAndroid Build Coastguard Worker  * TODO: -a (ascii)
7*cf5a6c84SAndroid Build Coastguard Worker 
8*cf5a6c84SAndroid Build Coastguard Worker USE_TR(NEWTOY(tr, "^<1>2Ccstd[+cC]", TOYFLAG_USR|TOYFLAG_BIN))
9*cf5a6c84SAndroid Build Coastguard Worker 
10*cf5a6c84SAndroid Build Coastguard Worker config TR
11*cf5a6c84SAndroid Build Coastguard Worker   bool "tr"
12*cf5a6c84SAndroid Build Coastguard Worker   default n
13*cf5a6c84SAndroid Build Coastguard Worker   help
14*cf5a6c84SAndroid Build Coastguard Worker     usage: tr [-cdst] SET1 [SET2]
15*cf5a6c84SAndroid Build Coastguard Worker 
16*cf5a6c84SAndroid Build Coastguard Worker     Translate, squeeze, or delete characters from stdin, writing to stdout
17*cf5a6c84SAndroid Build Coastguard Worker 
18*cf5a6c84SAndroid Build Coastguard Worker     -c/-C  Take complement of SET1
19*cf5a6c84SAndroid Build Coastguard Worker     -d     Delete input characters coded SET1
20*cf5a6c84SAndroid Build Coastguard Worker     -s     Squeeze multiple output characters of SET2 into one character
21*cf5a6c84SAndroid Build Coastguard Worker     -t     Truncate SET1 to length of SET2
22*cf5a6c84SAndroid Build Coastguard Worker */
23*cf5a6c84SAndroid Build Coastguard Worker 
24*cf5a6c84SAndroid Build Coastguard Worker #define FOR_tr
25*cf5a6c84SAndroid Build Coastguard Worker #include "toys.h"
26*cf5a6c84SAndroid Build Coastguard Worker 
27*cf5a6c84SAndroid Build Coastguard Worker GLOBALS(
28*cf5a6c84SAndroid Build Coastguard Worker   short *map;
29*cf5a6c84SAndroid Build Coastguard Worker   int len1, len2;
30*cf5a6c84SAndroid Build Coastguard Worker )
31*cf5a6c84SAndroid Build Coastguard Worker 
32*cf5a6c84SAndroid Build Coastguard Worker enum {
33*cf5a6c84SAndroid Build Coastguard Worker   class_alpha, class_alnum, class_digit,
34*cf5a6c84SAndroid Build Coastguard Worker   class_lower,class_upper,class_space,class_blank,
35*cf5a6c84SAndroid Build Coastguard Worker   class_punct,class_cntrl,class_xdigit,class_invalid
36*cf5a6c84SAndroid Build Coastguard Worker };
37*cf5a6c84SAndroid Build Coastguard Worker 
map_translation(char * set1,char * set2)38*cf5a6c84SAndroid Build Coastguard Worker static void map_translation(char *set1 , char *set2)
39*cf5a6c84SAndroid Build Coastguard Worker {
40*cf5a6c84SAndroid Build Coastguard Worker   int i = TT.len1, k = 0;
41*cf5a6c84SAndroid Build Coastguard Worker 
42*cf5a6c84SAndroid Build Coastguard Worker   if (FLAG(d))
43*cf5a6c84SAndroid Build Coastguard Worker     for (; i; i--, k++) TT.map[set1[k]] = set1[k]|0x100; //set delete bit
44*cf5a6c84SAndroid Build Coastguard Worker 
45*cf5a6c84SAndroid Build Coastguard Worker   if (FLAG(s)) {
46*cf5a6c84SAndroid Build Coastguard Worker     for (i = TT.len1, k = 0; i; i--, k++)
47*cf5a6c84SAndroid Build Coastguard Worker       TT.map[set1[k]] = TT.map[set1[k]]|0x200;
48*cf5a6c84SAndroid Build Coastguard Worker     for (i = TT.len2, k = 0; i; i--, k++)
49*cf5a6c84SAndroid Build Coastguard Worker       TT.map[set2[k]] = TT.map[set2[k]]|0x200;
50*cf5a6c84SAndroid Build Coastguard Worker   }
51*cf5a6c84SAndroid Build Coastguard Worker   i = k = 0;
52*cf5a6c84SAndroid Build Coastguard Worker   while (!FLAG(d) && set2 && TT.len1--) { //ignore set2 if -d present
53*cf5a6c84SAndroid Build Coastguard Worker     TT.map[set1[i]] = ((TT.map[set1[i]] & 0xFF00) | set2[k]);
54*cf5a6c84SAndroid Build Coastguard Worker     if (set2[k + 1]) k++;
55*cf5a6c84SAndroid Build Coastguard Worker     i++;
56*cf5a6c84SAndroid Build Coastguard Worker   }
57*cf5a6c84SAndroid Build Coastguard Worker }
58*cf5a6c84SAndroid Build Coastguard Worker 
handle_escape_char(char ** esc_val)59*cf5a6c84SAndroid Build Coastguard Worker static int handle_escape_char(char **esc_val) //taken from printf
60*cf5a6c84SAndroid Build Coastguard Worker {
61*cf5a6c84SAndroid Build Coastguard Worker   char *ptr = *esc_val;
62*cf5a6c84SAndroid Build Coastguard Worker   int esc_length = 0;
63*cf5a6c84SAndroid Build Coastguard Worker   unsigned  base = 0, num = 0, result = 0, count = 0;
64*cf5a6c84SAndroid Build Coastguard Worker 
65*cf5a6c84SAndroid Build Coastguard Worker   if (*ptr == 'x') {
66*cf5a6c84SAndroid Build Coastguard Worker     ptr++;
67*cf5a6c84SAndroid Build Coastguard Worker     esc_length++;
68*cf5a6c84SAndroid Build Coastguard Worker     base = 16;
69*cf5a6c84SAndroid Build Coastguard Worker   } else if (isdigit(*ptr)) base = 8;
70*cf5a6c84SAndroid Build Coastguard Worker 
71*cf5a6c84SAndroid Build Coastguard Worker   while (esc_length < 3 && base) {
72*cf5a6c84SAndroid Build Coastguard Worker     num = tolower(*ptr) - '0';
73*cf5a6c84SAndroid Build Coastguard Worker     if (num > 10) num += ('0' - 'a' + 10);
74*cf5a6c84SAndroid Build Coastguard Worker     if (num >= base) {
75*cf5a6c84SAndroid Build Coastguard Worker       if (base == 16) {
76*cf5a6c84SAndroid Build Coastguard Worker         esc_length--;
77*cf5a6c84SAndroid Build Coastguard Worker         if (!esc_length) {// Invalid hex value eg. /xvd, print as it is /xvd
78*cf5a6c84SAndroid Build Coastguard Worker           result = '\\';
79*cf5a6c84SAndroid Build Coastguard Worker           ptr--;
80*cf5a6c84SAndroid Build Coastguard Worker         }
81*cf5a6c84SAndroid Build Coastguard Worker       }
82*cf5a6c84SAndroid Build Coastguard Worker       break;
83*cf5a6c84SAndroid Build Coastguard Worker     }
84*cf5a6c84SAndroid Build Coastguard Worker     esc_length++;
85*cf5a6c84SAndroid Build Coastguard Worker     result = (char)(count = (count * base) + num);
86*cf5a6c84SAndroid Build Coastguard Worker     ptr++;
87*cf5a6c84SAndroid Build Coastguard Worker   }
88*cf5a6c84SAndroid Build Coastguard Worker   if (base) ptr--;
89*cf5a6c84SAndroid Build Coastguard Worker   else if (!(result = unescape(*ptr))) {
90*cf5a6c84SAndroid Build Coastguard Worker     result = '\\';
91*cf5a6c84SAndroid Build Coastguard Worker     ptr--;
92*cf5a6c84SAndroid Build Coastguard Worker   }
93*cf5a6c84SAndroid Build Coastguard Worker   *esc_val = ptr;
94*cf5a6c84SAndroid Build Coastguard Worker   return result;
95*cf5a6c84SAndroid Build Coastguard Worker }
96*cf5a6c84SAndroid Build Coastguard Worker 
find_class(char * class_name)97*cf5a6c84SAndroid Build Coastguard Worker static int find_class(char *class_name)
98*cf5a6c84SAndroid Build Coastguard Worker {
99*cf5a6c84SAndroid Build Coastguard Worker   int i;
100*cf5a6c84SAndroid Build Coastguard Worker   static char *class[] = {
101*cf5a6c84SAndroid Build Coastguard Worker     "[:alpha:]","[:alnum:]","[:digit:]", "[:lower:]","[:upper:]","[:space:]",
102*cf5a6c84SAndroid Build Coastguard Worker     "[:blank:]","[:punct:]","[:cntrl:]", "[:xdigit:]"
103*cf5a6c84SAndroid Build Coastguard Worker   };
104*cf5a6c84SAndroid Build Coastguard Worker 
105*cf5a6c84SAndroid Build Coastguard Worker   for (i = 0; i != class_invalid; i++)
106*cf5a6c84SAndroid Build Coastguard Worker     if (!memcmp(class_name, class[i], 9+(*class_name == 'x'))) break;
107*cf5a6c84SAndroid Build Coastguard Worker 
108*cf5a6c84SAndroid Build Coastguard Worker   return i;
109*cf5a6c84SAndroid Build Coastguard Worker }
110*cf5a6c84SAndroid Build Coastguard Worker 
expand_set(char * arg,int * len,size_t until)111*cf5a6c84SAndroid Build Coastguard Worker static char *expand_set(char *arg, int *len, size_t until)
112*cf5a6c84SAndroid Build Coastguard Worker {
113*cf5a6c84SAndroid Build Coastguard Worker   int i = 0, j, k, size = 256;
114*cf5a6c84SAndroid Build Coastguard Worker   char *set = xzalloc(size), *orig = arg;
115*cf5a6c84SAndroid Build Coastguard Worker 
116*cf5a6c84SAndroid Build Coastguard Worker   while (*arg) {
117*cf5a6c84SAndroid Build Coastguard Worker     if (arg-orig >= until) break;
118*cf5a6c84SAndroid Build Coastguard Worker     if (i >= size) {
119*cf5a6c84SAndroid Build Coastguard Worker       size += 256;
120*cf5a6c84SAndroid Build Coastguard Worker       set = xrealloc(set, size);
121*cf5a6c84SAndroid Build Coastguard Worker     }
122*cf5a6c84SAndroid Build Coastguard Worker     if (*arg == '\\') {
123*cf5a6c84SAndroid Build Coastguard Worker       arg++;
124*cf5a6c84SAndroid Build Coastguard Worker       set[i++] = handle_escape_char(&arg);
125*cf5a6c84SAndroid Build Coastguard Worker       arg++;
126*cf5a6c84SAndroid Build Coastguard Worker       continue;
127*cf5a6c84SAndroid Build Coastguard Worker     }
128*cf5a6c84SAndroid Build Coastguard Worker     if (arg[1] == '-') {
129*cf5a6c84SAndroid Build Coastguard Worker       if (!arg[2]) goto save;
130*cf5a6c84SAndroid Build Coastguard Worker       j = *arg;
131*cf5a6c84SAndroid Build Coastguard Worker       k = arg[2];
132*cf5a6c84SAndroid Build Coastguard Worker       if (j > k) perror_exit("reverse colating order");
133*cf5a6c84SAndroid Build Coastguard Worker       while (j <= k) set[i++] = j++;
134*cf5a6c84SAndroid Build Coastguard Worker       arg += 3;
135*cf5a6c84SAndroid Build Coastguard Worker       continue;
136*cf5a6c84SAndroid Build Coastguard Worker     }
137*cf5a6c84SAndroid Build Coastguard Worker     if (*arg == '[' && arg[1] == ':') {
138*cf5a6c84SAndroid Build Coastguard Worker 
139*cf5a6c84SAndroid Build Coastguard Worker       if ((j = find_class(arg)) == class_invalid) goto save;
140*cf5a6c84SAndroid Build Coastguard Worker 
141*cf5a6c84SAndroid Build Coastguard Worker       if ((j == class_alpha) || (j == class_upper) || (j == class_alnum))
142*cf5a6c84SAndroid Build Coastguard Worker         for (k = 'A'; k <= 'Z'; k++) set[i++] = k;
143*cf5a6c84SAndroid Build Coastguard Worker       if ((j == class_alpha) || (j == class_lower) || (j == class_alnum))
144*cf5a6c84SAndroid Build Coastguard Worker         for (k = 'a'; k <= 'z'; k++) set[i++] = k;
145*cf5a6c84SAndroid Build Coastguard Worker       if ((j == class_alnum) || (j == class_digit) || (j == class_xdigit))
146*cf5a6c84SAndroid Build Coastguard Worker         for (k = '0'; k <= '9'; k++) set[i++] = k;
147*cf5a6c84SAndroid Build Coastguard Worker       if (j == class_space || j == class_blank) {
148*cf5a6c84SAndroid Build Coastguard Worker         set[i++] = '\t';
149*cf5a6c84SAndroid Build Coastguard Worker         if (j == class_space) {
150*cf5a6c84SAndroid Build Coastguard Worker           set[i++] = '\n';
151*cf5a6c84SAndroid Build Coastguard Worker           set[i++] = '\f';
152*cf5a6c84SAndroid Build Coastguard Worker           set[i++] = '\r';
153*cf5a6c84SAndroid Build Coastguard Worker           set[i++] = '\v';
154*cf5a6c84SAndroid Build Coastguard Worker         }
155*cf5a6c84SAndroid Build Coastguard Worker         set[i++] = ' ';
156*cf5a6c84SAndroid Build Coastguard Worker       }
157*cf5a6c84SAndroid Build Coastguard Worker       if (j == class_punct)
158*cf5a6c84SAndroid Build Coastguard Worker         for (k = 0; k <= 255; k++) if (ispunct(k)) set[i++] = k;
159*cf5a6c84SAndroid Build Coastguard Worker       if (j == class_cntrl)
160*cf5a6c84SAndroid Build Coastguard Worker         for (k = 0; k <= 255; k++) if (iscntrl(k)) set[i++] = k;
161*cf5a6c84SAndroid Build Coastguard Worker       if (j == class_xdigit) {
162*cf5a6c84SAndroid Build Coastguard Worker         for (k = 'A'; k <= 'F'; k++) {
163*cf5a6c84SAndroid Build Coastguard Worker           set[i + 6] = k | 0x20;
164*cf5a6c84SAndroid Build Coastguard Worker           set[i++] = k;
165*cf5a6c84SAndroid Build Coastguard Worker         }
166*cf5a6c84SAndroid Build Coastguard Worker         i += 6;
167*cf5a6c84SAndroid Build Coastguard Worker         arg += 10;
168*cf5a6c84SAndroid Build Coastguard Worker         continue;
169*cf5a6c84SAndroid Build Coastguard Worker       }
170*cf5a6c84SAndroid Build Coastguard Worker 
171*cf5a6c84SAndroid Build Coastguard Worker       arg += 9; //never here for class_xdigit.
172*cf5a6c84SAndroid Build Coastguard Worker       continue;
173*cf5a6c84SAndroid Build Coastguard Worker     }
174*cf5a6c84SAndroid Build Coastguard Worker     if (*arg == '[' && arg[1] == '=') { //[=char=] only
175*cf5a6c84SAndroid Build Coastguard Worker       arg += 2;
176*cf5a6c84SAndroid Build Coastguard Worker       if (*arg) set[i++] = *arg;
177*cf5a6c84SAndroid Build Coastguard Worker       if (!arg[1] || arg[1] != '=' || arg[2] != ']')
178*cf5a6c84SAndroid Build Coastguard Worker         error_exit("bad equiv class");
179*cf5a6c84SAndroid Build Coastguard Worker       continue;
180*cf5a6c84SAndroid Build Coastguard Worker     }
181*cf5a6c84SAndroid Build Coastguard Worker save:
182*cf5a6c84SAndroid Build Coastguard Worker     set[i++] = *arg++;
183*cf5a6c84SAndroid Build Coastguard Worker   }
184*cf5a6c84SAndroid Build Coastguard Worker   *len = i;
185*cf5a6c84SAndroid Build Coastguard Worker   return set;
186*cf5a6c84SAndroid Build Coastguard Worker }
187*cf5a6c84SAndroid Build Coastguard Worker 
print_map(char * set1,char * set2)188*cf5a6c84SAndroid Build Coastguard Worker static void print_map(char *set1, char *set2)
189*cf5a6c84SAndroid Build Coastguard Worker {
190*cf5a6c84SAndroid Build Coastguard Worker   int n, ch, src, dst, prev = -1;
191*cf5a6c84SAndroid Build Coastguard Worker 
192*cf5a6c84SAndroid Build Coastguard Worker   while ((n = read(0, toybuf, sizeof(toybuf)))) {
193*cf5a6c84SAndroid Build Coastguard Worker     if (!FLAG(d) && !FLAG(s))
194*cf5a6c84SAndroid Build Coastguard Worker       for (dst = 0; dst < n; dst++) toybuf[dst] = TT.map[toybuf[dst]];
195*cf5a6c84SAndroid Build Coastguard Worker     else for (src = dst = 0; src < n; src++) {
196*cf5a6c84SAndroid Build Coastguard Worker       ch = TT.map[toybuf[src]];
197*cf5a6c84SAndroid Build Coastguard Worker       if (FLAG(d) && (ch & 0x100)) continue;
198*cf5a6c84SAndroid Build Coastguard Worker       if (FLAG(s) && ((ch & 0x200) && prev == ch)) continue;
199*cf5a6c84SAndroid Build Coastguard Worker       toybuf[dst++] = prev = ch;
200*cf5a6c84SAndroid Build Coastguard Worker     }
201*cf5a6c84SAndroid Build Coastguard Worker     xwrite(1, toybuf, dst);
202*cf5a6c84SAndroid Build Coastguard Worker   }
203*cf5a6c84SAndroid Build Coastguard Worker }
204*cf5a6c84SAndroid Build Coastguard Worker 
do_complement(char ** set)205*cf5a6c84SAndroid Build Coastguard Worker static void do_complement(char **set)
206*cf5a6c84SAndroid Build Coastguard Worker {
207*cf5a6c84SAndroid Build Coastguard Worker   int i = 0, j = 0;
208*cf5a6c84SAndroid Build Coastguard Worker   char *comp = xmalloc(256);
209*cf5a6c84SAndroid Build Coastguard Worker 
210*cf5a6c84SAndroid Build Coastguard Worker   for (; i < 256; i++) {
211*cf5a6c84SAndroid Build Coastguard Worker     if (memchr(*set, i, TT.len1)) continue;
212*cf5a6c84SAndroid Build Coastguard Worker     else comp[j++] = (char)i;
213*cf5a6c84SAndroid Build Coastguard Worker   }
214*cf5a6c84SAndroid Build Coastguard Worker   free(*set);
215*cf5a6c84SAndroid Build Coastguard Worker   TT.len1 = j;
216*cf5a6c84SAndroid Build Coastguard Worker   *set = comp;
217*cf5a6c84SAndroid Build Coastguard Worker }
218*cf5a6c84SAndroid Build Coastguard Worker 
tr_main(void)219*cf5a6c84SAndroid Build Coastguard Worker void tr_main(void)
220*cf5a6c84SAndroid Build Coastguard Worker {
221*cf5a6c84SAndroid Build Coastguard Worker   char *set1, *set2 = NULL;
222*cf5a6c84SAndroid Build Coastguard Worker   int i = 0;
223*cf5a6c84SAndroid Build Coastguard Worker 
224*cf5a6c84SAndroid Build Coastguard Worker   TT.map = xmalloc(256*sizeof(*TT.map));
225*cf5a6c84SAndroid Build Coastguard Worker   for (; i < 256; i++) TT.map[i] = i; //init map
226*cf5a6c84SAndroid Build Coastguard Worker 
227*cf5a6c84SAndroid Build Coastguard Worker   set1 = expand_set(*toys.optargs, &TT.len1,
228*cf5a6c84SAndroid Build Coastguard Worker       (FLAG(t) && toys.optargs[1]) ? strlen(toys.optargs[1]) : -1);
229*cf5a6c84SAndroid Build Coastguard Worker   if (FLAG(c)) do_complement(&set1);
230*cf5a6c84SAndroid Build Coastguard Worker   if (toys.optargs[1]) {
231*cf5a6c84SAndroid Build Coastguard Worker     if (!*toys.optargs[1]) error_exit("set2 can't be empty string");
232*cf5a6c84SAndroid Build Coastguard Worker     set2 = expand_set(toys.optargs[1], &TT.len2, -1);
233*cf5a6c84SAndroid Build Coastguard Worker   }
234*cf5a6c84SAndroid Build Coastguard Worker   map_translation(set1, set2);
235*cf5a6c84SAndroid Build Coastguard Worker 
236*cf5a6c84SAndroid Build Coastguard Worker   print_map(set1, set2);
237*cf5a6c84SAndroid Build Coastguard Worker   free(set1);
238*cf5a6c84SAndroid Build Coastguard Worker   free(set2);
239*cf5a6c84SAndroid Build Coastguard Worker }
240