1*cf5a6c84SAndroid Build Coastguard Worker /* csplit.c - split files on context
2*cf5a6c84SAndroid Build Coastguard Worker *
3*cf5a6c84SAndroid Build Coastguard Worker * Copyright 2023 Oliver Webb <[email protected]>
4*cf5a6c84SAndroid Build Coastguard Worker *
5*cf5a6c84SAndroid Build Coastguard Worker * See https://pubs.opengroup.org/onlinepubs/9699919799/utilities/csplit.html
6*cf5a6c84SAndroid Build Coastguard Worker *
7*cf5a6c84SAndroid Build Coastguard Worker * Deviations From POSIX: Add "{*}", file size is %ld, no negative offsets
8*cf5a6c84SAndroid Build Coastguard Worker
9*cf5a6c84SAndroid Build Coastguard Worker USE_CSPLIT(NEWTOY(csplit, "<2skf:n#", TOYFLAG_USR|TOYFLAG_BIN))
10*cf5a6c84SAndroid Build Coastguard Worker
11*cf5a6c84SAndroid Build Coastguard Worker config CSPLIT
12*cf5a6c84SAndroid Build Coastguard Worker bool "csplit"
13*cf5a6c84SAndroid Build Coastguard Worker default n
14*cf5a6c84SAndroid Build Coastguard Worker help
15*cf5a6c84SAndroid Build Coastguard Worker usage: csplit [-ks] [-f PREFIX] [-n INTEGER] file arg...
16*cf5a6c84SAndroid Build Coastguard Worker
17*cf5a6c84SAndroid Build Coastguard Worker Split files into multiple files based on list of rules
18*cf5a6c84SAndroid Build Coastguard Worker
19*cf5a6c84SAndroid Build Coastguard Worker -k Does not delete Files on error
20*cf5a6c84SAndroid Build Coastguard Worker -s No file output size messages
21*cf5a6c84SAndroid Build Coastguard Worker -f [PREFIX] Use [PREFIX] as filename prefix instead of "xx"
22*cf5a6c84SAndroid Build Coastguard Worker -n [INTEGER] Make all filename numbers [INTEGER] characters long
23*cf5a6c84SAndroid Build Coastguard Worker
24*cf5a6c84SAndroid Build Coastguard Worker Valid Rules:
25*cf5a6c84SAndroid Build Coastguard Worker /regexp/[INTEGER] Break file before line that regexp matches,
26*cf5a6c84SAndroid Build Coastguard Worker %regexp%[INTEGER] Exclude untill line matches regexp
27*cf5a6c84SAndroid Build Coastguard Worker If a offset is specified for these rules, the break will happen [INTEGER]
28*cf5a6c84SAndroid Build Coastguard Worker lines after the regexp match
29*cf5a6c84SAndroid Build Coastguard Worker if a offset is specified, it will break at [INTEGER] lines after the offset
30*cf5a6c84SAndroid Build Coastguard Worker [INTEGER] Break file at line before [INTEGER]
31*cf5a6c84SAndroid Build Coastguard Worker {INTEGER} Repeat Previous Pattern INTEGER Number of times if INTEGER is *
32*cf5a6c84SAndroid Build Coastguard Worker The pattern repeats forever
33*cf5a6c84SAndroid Build Coastguard Worker */
34*cf5a6c84SAndroid Build Coastguard Worker
35*cf5a6c84SAndroid Build Coastguard Worker #define FOR_csplit
36*cf5a6c84SAndroid Build Coastguard Worker #include "toys.h"
37*cf5a6c84SAndroid Build Coastguard Worker
38*cf5a6c84SAndroid Build Coastguard Worker GLOBALS(
39*cf5a6c84SAndroid Build Coastguard Worker long n;
40*cf5a6c84SAndroid Build Coastguard Worker char *f;
41*cf5a6c84SAndroid Build Coastguard Worker
42*cf5a6c84SAndroid Build Coastguard Worker size_t indx, findx, lineno;
43*cf5a6c84SAndroid Build Coastguard Worker char *filefmt, *prefix;
44*cf5a6c84SAndroid Build Coastguard Worker // Variables the context checker need to track between lines
45*cf5a6c84SAndroid Build Coastguard Worker size_t btc, tmp;
46*cf5a6c84SAndroid Build Coastguard Worker int offset, withld, inf;
47*cf5a6c84SAndroid Build Coastguard Worker )
48*cf5a6c84SAndroid Build Coastguard Worker
abrt(char * err)49*cf5a6c84SAndroid Build Coastguard Worker static _Noreturn void abrt(char *err)
50*cf5a6c84SAndroid Build Coastguard Worker {
51*cf5a6c84SAndroid Build Coastguard Worker // Cycle down through index instead of keeping track of what files we made
52*cf5a6c84SAndroid Build Coastguard Worker if (!FLAG(k)) for (; TT.indx>=1; TT.indx--)
53*cf5a6c84SAndroid Build Coastguard Worker remove(xmprintf(TT.filefmt, TT.prefix, TT.findx));
54*cf5a6c84SAndroid Build Coastguard Worker
55*cf5a6c84SAndroid Build Coastguard Worker error_exit("%s\n", err);
56*cf5a6c84SAndroid Build Coastguard Worker }
57*cf5a6c84SAndroid Build Coastguard Worker
rgmatch(char * rxrl,char * line,char * fmt)58*cf5a6c84SAndroid Build Coastguard Worker static int rgmatch(char *rxrl, char *line, char *fmt)
59*cf5a6c84SAndroid Build Coastguard Worker {
60*cf5a6c84SAndroid Build Coastguard Worker regex_t rxp;
61*cf5a6c84SAndroid Build Coastguard Worker int rr;
62*cf5a6c84SAndroid Build Coastguard Worker
63*cf5a6c84SAndroid Build Coastguard Worker sscanf(rxrl, fmt, toybuf, &TT.offset);
64*cf5a6c84SAndroid Build Coastguard Worker xregcomp(&rxp, toybuf, 0);
65*cf5a6c84SAndroid Build Coastguard Worker rr = regexec(&rxp, line, 0, 0, 0);
66*cf5a6c84SAndroid Build Coastguard Worker if (!rr) return 1;
67*cf5a6c84SAndroid Build Coastguard Worker else if (rr == REG_NOMATCH) return 0;
68*cf5a6c84SAndroid Build Coastguard Worker abrt("bad regex");
69*cf5a6c84SAndroid Build Coastguard Worker }
70*cf5a6c84SAndroid Build Coastguard Worker
cntxt(char * line,char * rule)71*cf5a6c84SAndroid Build Coastguard Worker static int cntxt(char *line, char *rule)
72*cf5a6c84SAndroid Build Coastguard Worker {
73*cf5a6c84SAndroid Build Coastguard Worker size_t llv;
74*cf5a6c84SAndroid Build Coastguard Worker if (TT.indx == toys.optc) return 0;
75*cf5a6c84SAndroid Build Coastguard Worker
76*cf5a6c84SAndroid Build Coastguard Worker if (TT.offset < 0);
77*cf5a6c84SAndroid Build Coastguard Worker else if (TT.offset == 0) {
78*cf5a6c84SAndroid Build Coastguard Worker TT.offset = -1;
79*cf5a6c84SAndroid Build Coastguard Worker
80*cf5a6c84SAndroid Build Coastguard Worker return 1;
81*cf5a6c84SAndroid Build Coastguard Worker } else {
82*cf5a6c84SAndroid Build Coastguard Worker TT.offset--;
83*cf5a6c84SAndroid Build Coastguard Worker
84*cf5a6c84SAndroid Build Coastguard Worker return 0;
85*cf5a6c84SAndroid Build Coastguard Worker }
86*cf5a6c84SAndroid Build Coastguard Worker
87*cf5a6c84SAndroid Build Coastguard Worker switch (rule[0]) {
88*cf5a6c84SAndroid Build Coastguard Worker case '/':
89*cf5a6c84SAndroid Build Coastguard Worker return rgmatch(rule, line, "/%[^/%]/%d");
90*cf5a6c84SAndroid Build Coastguard Worker break;
91*cf5a6c84SAndroid Build Coastguard Worker
92*cf5a6c84SAndroid Build Coastguard Worker case '%':
93*cf5a6c84SAndroid Build Coastguard Worker TT.withld = 1;
94*cf5a6c84SAndroid Build Coastguard Worker return rgmatch(rule, line, "%%%[^/%]%%%d");
95*cf5a6c84SAndroid Build Coastguard Worker
96*cf5a6c84SAndroid Build Coastguard Worker case '{':
97*cf5a6c84SAndroid Build Coastguard Worker if (TT.indx < 2) abrt("bad rule order");
98*cf5a6c84SAndroid Build Coastguard Worker
99*cf5a6c84SAndroid Build Coastguard Worker if (!strcmp(rule,"{*}")) {
100*cf5a6c84SAndroid Build Coastguard Worker TT.btc = -1;
101*cf5a6c84SAndroid Build Coastguard Worker TT.inf = 1;
102*cf5a6c84SAndroid Build Coastguard Worker } else if (!sscanf(rule,"{%lu}",&TT.btc)) abrt("bad rule");
103*cf5a6c84SAndroid Build Coastguard Worker
104*cf5a6c84SAndroid Build Coastguard Worker if (TT.tmp == -1) TT.tmp = TT.lineno;
105*cf5a6c84SAndroid Build Coastguard Worker if ((llv = atoll(toys.optargs[TT.indx-1]))) {
106*cf5a6c84SAndroid Build Coastguard Worker if (((TT.lineno-TT.tmp) % llv+1) == llv) {
107*cf5a6c84SAndroid Build Coastguard Worker TT.tmp = -1;
108*cf5a6c84SAndroid Build Coastguard Worker TT.indx--;
109*cf5a6c84SAndroid Build Coastguard Worker
110*cf5a6c84SAndroid Build Coastguard Worker return 1;
111*cf5a6c84SAndroid Build Coastguard Worker } else return 0;
112*cf5a6c84SAndroid Build Coastguard Worker }
113*cf5a6c84SAndroid Build Coastguard Worker
114*cf5a6c84SAndroid Build Coastguard Worker if (cntxt(line, toys.optargs[TT.indx-1])) {
115*cf5a6c84SAndroid Build Coastguard Worker // Manipulate the rule then return to it later so we create a
116*cf5a6c84SAndroid Build Coastguard Worker // new file but are still on the same rule. This is the only
117*cf5a6c84SAndroid Build Coastguard Worker // reason why we differentiate between rule and file Index
118*cf5a6c84SAndroid Build Coastguard Worker if (TT.btc != 1) {
119*cf5a6c84SAndroid Build Coastguard Worker toys.optargs[TT.indx] = xmprintf("{%lu}",TT.btc-1);
120*cf5a6c84SAndroid Build Coastguard Worker TT.indx--;
121*cf5a6c84SAndroid Build Coastguard Worker }
122*cf5a6c84SAndroid Build Coastguard Worker return 1;
123*cf5a6c84SAndroid Build Coastguard Worker }
124*cf5a6c84SAndroid Build Coastguard Worker return 0;
125*cf5a6c84SAndroid Build Coastguard Worker
126*cf5a6c84SAndroid Build Coastguard Worker default:
127*cf5a6c84SAndroid Build Coastguard Worker if (TT.lineno > atoll(rule)) abrt("bad rule order");
128*cf5a6c84SAndroid Build Coastguard Worker else if (!(atoll(rule))) abrt("bad rule");
129*cf5a6c84SAndroid Build Coastguard Worker else {
130*cf5a6c84SAndroid Build Coastguard Worker if (TT.lineno == atoll(rule)) TT.offset++;
131*cf5a6c84SAndroid Build Coastguard Worker return 0;
132*cf5a6c84SAndroid Build Coastguard Worker }
133*cf5a6c84SAndroid Build Coastguard Worker }
134*cf5a6c84SAndroid Build Coastguard Worker }
135*cf5a6c84SAndroid Build Coastguard Worker
csplit_main(void)136*cf5a6c84SAndroid Build Coastguard Worker void csplit_main(void)
137*cf5a6c84SAndroid Build Coastguard Worker {
138*cf5a6c84SAndroid Build Coastguard Worker FILE *actvfile;
139*cf5a6c84SAndroid Build Coastguard Worker FILE *fin = (*toys.optargs[0] != '-') ? xfopen(toys.optargs[0], "r") : stdin;
140*cf5a6c84SAndroid Build Coastguard Worker char *line;
141*cf5a6c84SAndroid Build Coastguard Worker size_t filesize = 0;
142*cf5a6c84SAndroid Build Coastguard Worker
143*cf5a6c84SAndroid Build Coastguard Worker TT.indx = TT.lineno = 1;
144*cf5a6c84SAndroid Build Coastguard Worker TT.tmp = TT.offset = -1;
145*cf5a6c84SAndroid Build Coastguard Worker
146*cf5a6c84SAndroid Build Coastguard Worker // -f and -n formatting
147*cf5a6c84SAndroid Build Coastguard Worker TT.filefmt = xmprintf("%%s%%0%lud", TT.n ? TT.n : 2);
148*cf5a6c84SAndroid Build Coastguard Worker TT.prefix = TT.f ? TT.f : "xx";
149*cf5a6c84SAndroid Build Coastguard Worker
150*cf5a6c84SAndroid Build Coastguard Worker actvfile = xfopen(xmprintf(TT.filefmt, TT.prefix, TT.findx), "w+");
151*cf5a6c84SAndroid Build Coastguard Worker for (; (line = xgetline(fin)); free(line)) {
152*cf5a6c84SAndroid Build Coastguard Worker TT.lineno++;
153*cf5a6c84SAndroid Build Coastguard Worker if (!TT.withld) filesize += strlen(line)+1;
154*cf5a6c84SAndroid Build Coastguard Worker
155*cf5a6c84SAndroid Build Coastguard Worker if (cntxt(line, toys.optargs[TT.indx])) {
156*cf5a6c84SAndroid Build Coastguard Worker if (!TT.withld) {
157*cf5a6c84SAndroid Build Coastguard Worker fclose(actvfile);
158*cf5a6c84SAndroid Build Coastguard Worker if (!FLAG(s)) printf("%ld\n", filesize);
159*cf5a6c84SAndroid Build Coastguard Worker filesize = 0;
160*cf5a6c84SAndroid Build Coastguard Worker TT.findx++;
161*cf5a6c84SAndroid Build Coastguard Worker actvfile = xfopen(xmprintf(TT.filefmt, TT.prefix, TT.findx), "w+");
162*cf5a6c84SAndroid Build Coastguard Worker }
163*cf5a6c84SAndroid Build Coastguard Worker
164*cf5a6c84SAndroid Build Coastguard Worker TT.indx++;
165*cf5a6c84SAndroid Build Coastguard Worker TT.withld = 0;
166*cf5a6c84SAndroid Build Coastguard Worker }
167*cf5a6c84SAndroid Build Coastguard Worker if (!TT.withld) fprintf(actvfile, "%s\n", line);
168*cf5a6c84SAndroid Build Coastguard Worker }
169*cf5a6c84SAndroid Build Coastguard Worker if (!FLAG(s)) printf("%ld\n", filesize);
170*cf5a6c84SAndroid Build Coastguard Worker
171*cf5a6c84SAndroid Build Coastguard Worker // Abort Case: Not All Rules Processed
172*cf5a6c84SAndroid Build Coastguard Worker if (!((TT.indx == toys.optc) || TT.inf)) abrt("Rules not processed");
173*cf5a6c84SAndroid Build Coastguard Worker }
174