xref: /aosp_15_r20/external/toybox/toys/pending/csplit.c (revision cf5a6c84e2b8763fc1a7db14496fd4742913b199)
1*cf5a6c84SAndroid Build Coastguard Worker /* csplit.c - split files on context
2*cf5a6c84SAndroid Build Coastguard Worker  *
3*cf5a6c84SAndroid Build Coastguard Worker  * Copyright 2023 Oliver Webb <[email protected]>
4*cf5a6c84SAndroid Build Coastguard Worker  *
5*cf5a6c84SAndroid Build Coastguard Worker  * See https://pubs.opengroup.org/onlinepubs/9699919799/utilities/csplit.html
6*cf5a6c84SAndroid Build Coastguard Worker  *
7*cf5a6c84SAndroid Build Coastguard Worker  * Deviations From POSIX: Add "{*}", file size is %ld, no negative offsets
8*cf5a6c84SAndroid Build Coastguard Worker 
9*cf5a6c84SAndroid Build Coastguard Worker USE_CSPLIT(NEWTOY(csplit, "<2skf:n#", TOYFLAG_USR|TOYFLAG_BIN))
10*cf5a6c84SAndroid Build Coastguard Worker 
11*cf5a6c84SAndroid Build Coastguard Worker config CSPLIT
12*cf5a6c84SAndroid Build Coastguard Worker   bool "csplit"
13*cf5a6c84SAndroid Build Coastguard Worker   default n
14*cf5a6c84SAndroid Build Coastguard Worker   help
15*cf5a6c84SAndroid Build Coastguard Worker     usage: csplit [-ks] [-f PREFIX] [-n INTEGER] file arg...
16*cf5a6c84SAndroid Build Coastguard Worker 
17*cf5a6c84SAndroid Build Coastguard Worker     Split files into multiple files based on list of rules
18*cf5a6c84SAndroid Build Coastguard Worker 
19*cf5a6c84SAndroid Build Coastguard Worker     -k	Does not delete Files on error
20*cf5a6c84SAndroid Build Coastguard Worker     -s	No file output size messages
21*cf5a6c84SAndroid Build Coastguard Worker     -f [PREFIX] Use [PREFIX] as filename prefix instead of "xx"
22*cf5a6c84SAndroid Build Coastguard Worker     -n [INTEGER] Make all filename numbers [INTEGER] characters long
23*cf5a6c84SAndroid Build Coastguard Worker 
24*cf5a6c84SAndroid Build Coastguard Worker     Valid Rules:
25*cf5a6c84SAndroid Build Coastguard Worker     /regexp/[INTEGER] Break file before line that regexp matches,
26*cf5a6c84SAndroid Build Coastguard Worker     %regexp%[INTEGER] Exclude untill line matches regexp
27*cf5a6c84SAndroid Build Coastguard Worker     If a offset is specified for these rules, the break will happen [INTEGER]
28*cf5a6c84SAndroid Build Coastguard Worker     lines after the regexp match
29*cf5a6c84SAndroid Build Coastguard Worker     if a offset is specified, it will break at [INTEGER] lines after the offset
30*cf5a6c84SAndroid Build Coastguard Worker     [INTEGER] Break file at line before [INTEGER]
31*cf5a6c84SAndroid Build Coastguard Worker     {INTEGER} Repeat Previous Pattern INTEGER Number of times if INTEGER is *
32*cf5a6c84SAndroid Build Coastguard Worker     The pattern repeats forever
33*cf5a6c84SAndroid Build Coastguard Worker */
34*cf5a6c84SAndroid Build Coastguard Worker 
35*cf5a6c84SAndroid Build Coastguard Worker #define FOR_csplit
36*cf5a6c84SAndroid Build Coastguard Worker #include "toys.h"
37*cf5a6c84SAndroid Build Coastguard Worker 
38*cf5a6c84SAndroid Build Coastguard Worker GLOBALS(
39*cf5a6c84SAndroid Build Coastguard Worker   long n;
40*cf5a6c84SAndroid Build Coastguard Worker   char *f;
41*cf5a6c84SAndroid Build Coastguard Worker 
42*cf5a6c84SAndroid Build Coastguard Worker   size_t indx, findx, lineno;
43*cf5a6c84SAndroid Build Coastguard Worker   char *filefmt, *prefix;
44*cf5a6c84SAndroid Build Coastguard Worker   // Variables the context checker need to track between lines
45*cf5a6c84SAndroid Build Coastguard Worker   size_t btc, tmp;
46*cf5a6c84SAndroid Build Coastguard Worker   int offset, withld, inf;
47*cf5a6c84SAndroid Build Coastguard Worker )
48*cf5a6c84SAndroid Build Coastguard Worker 
abrt(char * err)49*cf5a6c84SAndroid Build Coastguard Worker static _Noreturn void abrt(char *err)
50*cf5a6c84SAndroid Build Coastguard Worker {
51*cf5a6c84SAndroid Build Coastguard Worker   // Cycle down through index instead of keeping track of what files we made
52*cf5a6c84SAndroid Build Coastguard Worker   if (!FLAG(k)) for (; TT.indx>=1; TT.indx--)
53*cf5a6c84SAndroid Build Coastguard Worker     remove(xmprintf(TT.filefmt, TT.prefix, TT.findx));
54*cf5a6c84SAndroid Build Coastguard Worker 
55*cf5a6c84SAndroid Build Coastguard Worker   error_exit("%s\n", err);
56*cf5a6c84SAndroid Build Coastguard Worker }
57*cf5a6c84SAndroid Build Coastguard Worker 
rgmatch(char * rxrl,char * line,char * fmt)58*cf5a6c84SAndroid Build Coastguard Worker static int rgmatch(char *rxrl, char *line, char *fmt)
59*cf5a6c84SAndroid Build Coastguard Worker {
60*cf5a6c84SAndroid Build Coastguard Worker   regex_t rxp;
61*cf5a6c84SAndroid Build Coastguard Worker   int rr;
62*cf5a6c84SAndroid Build Coastguard Worker 
63*cf5a6c84SAndroid Build Coastguard Worker   sscanf(rxrl, fmt, toybuf, &TT.offset);
64*cf5a6c84SAndroid Build Coastguard Worker   xregcomp(&rxp, toybuf, 0);
65*cf5a6c84SAndroid Build Coastguard Worker   rr = regexec(&rxp, line, 0, 0, 0);
66*cf5a6c84SAndroid Build Coastguard Worker   if (!rr) return 1;
67*cf5a6c84SAndroid Build Coastguard Worker   else if (rr == REG_NOMATCH) return 0;
68*cf5a6c84SAndroid Build Coastguard Worker   abrt("bad regex");
69*cf5a6c84SAndroid Build Coastguard Worker }
70*cf5a6c84SAndroid Build Coastguard Worker 
cntxt(char * line,char * rule)71*cf5a6c84SAndroid Build Coastguard Worker static int cntxt(char *line, char *rule)
72*cf5a6c84SAndroid Build Coastguard Worker {
73*cf5a6c84SAndroid Build Coastguard Worker   size_t llv;
74*cf5a6c84SAndroid Build Coastguard Worker   if (TT.indx == toys.optc) return 0;
75*cf5a6c84SAndroid Build Coastguard Worker 
76*cf5a6c84SAndroid Build Coastguard Worker   if (TT.offset < 0);
77*cf5a6c84SAndroid Build Coastguard Worker   else if (TT.offset == 0) {
78*cf5a6c84SAndroid Build Coastguard Worker     TT.offset = -1;
79*cf5a6c84SAndroid Build Coastguard Worker 
80*cf5a6c84SAndroid Build Coastguard Worker     return 1;
81*cf5a6c84SAndroid Build Coastguard Worker   } else {
82*cf5a6c84SAndroid Build Coastguard Worker     TT.offset--;
83*cf5a6c84SAndroid Build Coastguard Worker 
84*cf5a6c84SAndroid Build Coastguard Worker     return 0;
85*cf5a6c84SAndroid Build Coastguard Worker   }
86*cf5a6c84SAndroid Build Coastguard Worker 
87*cf5a6c84SAndroid Build Coastguard Worker   switch (rule[0]) {
88*cf5a6c84SAndroid Build Coastguard Worker     case '/':
89*cf5a6c84SAndroid Build Coastguard Worker       return rgmatch(rule, line, "/%[^/%]/%d");
90*cf5a6c84SAndroid Build Coastguard Worker       break;
91*cf5a6c84SAndroid Build Coastguard Worker 
92*cf5a6c84SAndroid Build Coastguard Worker     case '%':
93*cf5a6c84SAndroid Build Coastguard Worker       TT.withld = 1;
94*cf5a6c84SAndroid Build Coastguard Worker       return rgmatch(rule, line, "%%%[^/%]%%%d");
95*cf5a6c84SAndroid Build Coastguard Worker 
96*cf5a6c84SAndroid Build Coastguard Worker     case '{':
97*cf5a6c84SAndroid Build Coastguard Worker       if (TT.indx < 2) abrt("bad rule order");
98*cf5a6c84SAndroid Build Coastguard Worker 
99*cf5a6c84SAndroid Build Coastguard Worker       if (!strcmp(rule,"{*}")) {
100*cf5a6c84SAndroid Build Coastguard Worker         TT.btc = -1;
101*cf5a6c84SAndroid Build Coastguard Worker         TT.inf = 1;
102*cf5a6c84SAndroid Build Coastguard Worker       } else if (!sscanf(rule,"{%lu}",&TT.btc)) abrt("bad rule");
103*cf5a6c84SAndroid Build Coastguard Worker 
104*cf5a6c84SAndroid Build Coastguard Worker       if (TT.tmp == -1) TT.tmp = TT.lineno;
105*cf5a6c84SAndroid Build Coastguard Worker       if ((llv = atoll(toys.optargs[TT.indx-1]))) {
106*cf5a6c84SAndroid Build Coastguard Worker         if (((TT.lineno-TT.tmp) % llv+1) == llv) {
107*cf5a6c84SAndroid Build Coastguard Worker           TT.tmp = -1;
108*cf5a6c84SAndroid Build Coastguard Worker           TT.indx--;
109*cf5a6c84SAndroid Build Coastguard Worker 
110*cf5a6c84SAndroid Build Coastguard Worker           return 1;
111*cf5a6c84SAndroid Build Coastguard Worker         } else return 0;
112*cf5a6c84SAndroid Build Coastguard Worker       }
113*cf5a6c84SAndroid Build Coastguard Worker 
114*cf5a6c84SAndroid Build Coastguard Worker       if (cntxt(line, toys.optargs[TT.indx-1])) {
115*cf5a6c84SAndroid Build Coastguard Worker         // Manipulate the rule then return to it later so we create a
116*cf5a6c84SAndroid Build Coastguard Worker         // new file but are still on the same rule. This is the only
117*cf5a6c84SAndroid Build Coastguard Worker         // reason why we differentiate between rule and file Index
118*cf5a6c84SAndroid Build Coastguard Worker         if (TT.btc != 1) {
119*cf5a6c84SAndroid Build Coastguard Worker           toys.optargs[TT.indx] = xmprintf("{%lu}",TT.btc-1);
120*cf5a6c84SAndroid Build Coastguard Worker           TT.indx--;
121*cf5a6c84SAndroid Build Coastguard Worker         }
122*cf5a6c84SAndroid Build Coastguard Worker         return 1;
123*cf5a6c84SAndroid Build Coastguard Worker       }
124*cf5a6c84SAndroid Build Coastguard Worker       return 0;
125*cf5a6c84SAndroid Build Coastguard Worker 
126*cf5a6c84SAndroid Build Coastguard Worker     default:
127*cf5a6c84SAndroid Build Coastguard Worker       if (TT.lineno > atoll(rule)) abrt("bad rule order");
128*cf5a6c84SAndroid Build Coastguard Worker       else if (!(atoll(rule))) abrt("bad rule");
129*cf5a6c84SAndroid Build Coastguard Worker       else {
130*cf5a6c84SAndroid Build Coastguard Worker         if (TT.lineno == atoll(rule)) TT.offset++;
131*cf5a6c84SAndroid Build Coastguard Worker         return 0;
132*cf5a6c84SAndroid Build Coastguard Worker       }
133*cf5a6c84SAndroid Build Coastguard Worker   }
134*cf5a6c84SAndroid Build Coastguard Worker }
135*cf5a6c84SAndroid Build Coastguard Worker 
csplit_main(void)136*cf5a6c84SAndroid Build Coastguard Worker void csplit_main(void)
137*cf5a6c84SAndroid Build Coastguard Worker {
138*cf5a6c84SAndroid Build Coastguard Worker   FILE *actvfile;
139*cf5a6c84SAndroid Build Coastguard Worker   FILE *fin = (*toys.optargs[0] != '-') ? xfopen(toys.optargs[0], "r") : stdin;
140*cf5a6c84SAndroid Build Coastguard Worker   char *line;
141*cf5a6c84SAndroid Build Coastguard Worker   size_t filesize = 0;
142*cf5a6c84SAndroid Build Coastguard Worker 
143*cf5a6c84SAndroid Build Coastguard Worker   TT.indx = TT.lineno = 1;
144*cf5a6c84SAndroid Build Coastguard Worker   TT.tmp = TT.offset = -1;
145*cf5a6c84SAndroid Build Coastguard Worker 
146*cf5a6c84SAndroid Build Coastguard Worker   // -f and -n formatting
147*cf5a6c84SAndroid Build Coastguard Worker   TT.filefmt = xmprintf("%%s%%0%lud", TT.n ? TT.n : 2);
148*cf5a6c84SAndroid Build Coastguard Worker   TT.prefix = TT.f ? TT.f : "xx";
149*cf5a6c84SAndroid Build Coastguard Worker 
150*cf5a6c84SAndroid Build Coastguard Worker   actvfile = xfopen(xmprintf(TT.filefmt, TT.prefix, TT.findx), "w+");
151*cf5a6c84SAndroid Build Coastguard Worker   for (; (line = xgetline(fin)); free(line)) {
152*cf5a6c84SAndroid Build Coastguard Worker     TT.lineno++;
153*cf5a6c84SAndroid Build Coastguard Worker     if (!TT.withld) filesize += strlen(line)+1;
154*cf5a6c84SAndroid Build Coastguard Worker 
155*cf5a6c84SAndroid Build Coastguard Worker     if (cntxt(line, toys.optargs[TT.indx])) {
156*cf5a6c84SAndroid Build Coastguard Worker       if (!TT.withld) {
157*cf5a6c84SAndroid Build Coastguard Worker         fclose(actvfile);
158*cf5a6c84SAndroid Build Coastguard Worker         if (!FLAG(s)) printf("%ld\n", filesize);
159*cf5a6c84SAndroid Build Coastguard Worker         filesize = 0;
160*cf5a6c84SAndroid Build Coastguard Worker         TT.findx++;
161*cf5a6c84SAndroid Build Coastguard Worker         actvfile = xfopen(xmprintf(TT.filefmt, TT.prefix, TT.findx), "w+");
162*cf5a6c84SAndroid Build Coastguard Worker       }
163*cf5a6c84SAndroid Build Coastguard Worker 
164*cf5a6c84SAndroid Build Coastguard Worker       TT.indx++;
165*cf5a6c84SAndroid Build Coastguard Worker       TT.withld = 0;
166*cf5a6c84SAndroid Build Coastguard Worker     }
167*cf5a6c84SAndroid Build Coastguard Worker     if (!TT.withld) fprintf(actvfile, "%s\n", line);
168*cf5a6c84SAndroid Build Coastguard Worker   }
169*cf5a6c84SAndroid Build Coastguard Worker   if (!FLAG(s)) printf("%ld\n", filesize);
170*cf5a6c84SAndroid Build Coastguard Worker 
171*cf5a6c84SAndroid Build Coastguard Worker   // Abort Case: Not All Rules Processed
172*cf5a6c84SAndroid Build Coastguard Worker   if (!((TT.indx == toys.optc) || TT.inf)) abrt("Rules not processed");
173*cf5a6c84SAndroid Build Coastguard Worker }
174