xref: /aosp_15_r20/external/toybox/toys/pending/csplit.c (revision cf5a6c84e2b8763fc1a7db14496fd4742913b199)
1 /* csplit.c - split files on context
2  *
3  * Copyright 2023 Oliver Webb <[email protected]>
4  *
5  * See https://pubs.opengroup.org/onlinepubs/9699919799/utilities/csplit.html
6  *
7  * Deviations From POSIX: Add "{*}", file size is %ld, no negative offsets
8 
9 USE_CSPLIT(NEWTOY(csplit, "<2skf:n#", TOYFLAG_USR|TOYFLAG_BIN))
10 
11 config CSPLIT
12   bool "csplit"
13   default n
14   help
15     usage: csplit [-ks] [-f PREFIX] [-n INTEGER] file arg...
16 
17     Split files into multiple files based on list of rules
18 
19     -k	Does not delete Files on error
20     -s	No file output size messages
21     -f [PREFIX] Use [PREFIX] as filename prefix instead of "xx"
22     -n [INTEGER] Make all filename numbers [INTEGER] characters long
23 
24     Valid Rules:
25     /regexp/[INTEGER] Break file before line that regexp matches,
26     %regexp%[INTEGER] Exclude untill line matches regexp
27     If a offset is specified for these rules, the break will happen [INTEGER]
28     lines after the regexp match
29     if a offset is specified, it will break at [INTEGER] lines after the offset
30     [INTEGER] Break file at line before [INTEGER]
31     {INTEGER} Repeat Previous Pattern INTEGER Number of times if INTEGER is *
32     The pattern repeats forever
33 */
34 
35 #define FOR_csplit
36 #include "toys.h"
37 
38 GLOBALS(
39   long n;
40   char *f;
41 
42   size_t indx, findx, lineno;
43   char *filefmt, *prefix;
44   // Variables the context checker need to track between lines
45   size_t btc, tmp;
46   int offset, withld, inf;
47 )
48 
abrt(char * err)49 static _Noreturn void abrt(char *err)
50 {
51   // Cycle down through index instead of keeping track of what files we made
52   if (!FLAG(k)) for (; TT.indx>=1; TT.indx--)
53     remove(xmprintf(TT.filefmt, TT.prefix, TT.findx));
54 
55   error_exit("%s\n", err);
56 }
57 
rgmatch(char * rxrl,char * line,char * fmt)58 static int rgmatch(char *rxrl, char *line, char *fmt)
59 {
60   regex_t rxp;
61   int rr;
62 
63   sscanf(rxrl, fmt, toybuf, &TT.offset);
64   xregcomp(&rxp, toybuf, 0);
65   rr = regexec(&rxp, line, 0, 0, 0);
66   if (!rr) return 1;
67   else if (rr == REG_NOMATCH) return 0;
68   abrt("bad regex");
69 }
70 
cntxt(char * line,char * rule)71 static int cntxt(char *line, char *rule)
72 {
73   size_t llv;
74   if (TT.indx == toys.optc) return 0;
75 
76   if (TT.offset < 0);
77   else if (TT.offset == 0) {
78     TT.offset = -1;
79 
80     return 1;
81   } else {
82     TT.offset--;
83 
84     return 0;
85   }
86 
87   switch (rule[0]) {
88     case '/':
89       return rgmatch(rule, line, "/%[^/%]/%d");
90       break;
91 
92     case '%':
93       TT.withld = 1;
94       return rgmatch(rule, line, "%%%[^/%]%%%d");
95 
96     case '{':
97       if (TT.indx < 2) abrt("bad rule order");
98 
99       if (!strcmp(rule,"{*}")) {
100         TT.btc = -1;
101         TT.inf = 1;
102       } else if (!sscanf(rule,"{%lu}",&TT.btc)) abrt("bad rule");
103 
104       if (TT.tmp == -1) TT.tmp = TT.lineno;
105       if ((llv = atoll(toys.optargs[TT.indx-1]))) {
106         if (((TT.lineno-TT.tmp) % llv+1) == llv) {
107           TT.tmp = -1;
108           TT.indx--;
109 
110           return 1;
111         } else return 0;
112       }
113 
114       if (cntxt(line, toys.optargs[TT.indx-1])) {
115         // Manipulate the rule then return to it later so we create a
116         // new file but are still on the same rule. This is the only
117         // reason why we differentiate between rule and file Index
118         if (TT.btc != 1) {
119           toys.optargs[TT.indx] = xmprintf("{%lu}",TT.btc-1);
120           TT.indx--;
121         }
122         return 1;
123       }
124       return 0;
125 
126     default:
127       if (TT.lineno > atoll(rule)) abrt("bad rule order");
128       else if (!(atoll(rule))) abrt("bad rule");
129       else {
130         if (TT.lineno == atoll(rule)) TT.offset++;
131         return 0;
132       }
133   }
134 }
135 
csplit_main(void)136 void csplit_main(void)
137 {
138   FILE *actvfile;
139   FILE *fin = (*toys.optargs[0] != '-') ? xfopen(toys.optargs[0], "r") : stdin;
140   char *line;
141   size_t filesize = 0;
142 
143   TT.indx = TT.lineno = 1;
144   TT.tmp = TT.offset = -1;
145 
146   // -f and -n formatting
147   TT.filefmt = xmprintf("%%s%%0%lud", TT.n ? TT.n : 2);
148   TT.prefix = TT.f ? TT.f : "xx";
149 
150   actvfile = xfopen(xmprintf(TT.filefmt, TT.prefix, TT.findx), "w+");
151   for (; (line = xgetline(fin)); free(line)) {
152     TT.lineno++;
153     if (!TT.withld) filesize += strlen(line)+1;
154 
155     if (cntxt(line, toys.optargs[TT.indx])) {
156       if (!TT.withld) {
157         fclose(actvfile);
158         if (!FLAG(s)) printf("%ld\n", filesize);
159         filesize = 0;
160         TT.findx++;
161         actvfile = xfopen(xmprintf(TT.filefmt, TT.prefix, TT.findx), "w+");
162       }
163 
164       TT.indx++;
165       TT.withld = 0;
166     }
167     if (!TT.withld) fprintf(actvfile, "%s\n", line);
168   }
169   if (!FLAG(s)) printf("%ld\n", filesize);
170 
171   // Abort Case: Not All Rules Processed
172   if (!((TT.indx == toys.optc) || TT.inf)) abrt("Rules not processed");
173 }
174