1 /* csplit.c - split files on context
2 *
3 * Copyright 2023 Oliver Webb <[email protected]>
4 *
5 * See https://pubs.opengroup.org/onlinepubs/9699919799/utilities/csplit.html
6 *
7 * Deviations From POSIX: Add "{*}", file size is %ld, no negative offsets
8
9 USE_CSPLIT(NEWTOY(csplit, "<2skf:n#", TOYFLAG_USR|TOYFLAG_BIN))
10
11 config CSPLIT
12 bool "csplit"
13 default n
14 help
15 usage: csplit [-ks] [-f PREFIX] [-n INTEGER] file arg...
16
17 Split files into multiple files based on list of rules
18
19 -k Does not delete Files on error
20 -s No file output size messages
21 -f [PREFIX] Use [PREFIX] as filename prefix instead of "xx"
22 -n [INTEGER] Make all filename numbers [INTEGER] characters long
23
24 Valid Rules:
25 /regexp/[INTEGER] Break file before line that regexp matches,
26 %regexp%[INTEGER] Exclude untill line matches regexp
27 If a offset is specified for these rules, the break will happen [INTEGER]
28 lines after the regexp match
29 if a offset is specified, it will break at [INTEGER] lines after the offset
30 [INTEGER] Break file at line before [INTEGER]
31 {INTEGER} Repeat Previous Pattern INTEGER Number of times if INTEGER is *
32 The pattern repeats forever
33 */
34
35 #define FOR_csplit
36 #include "toys.h"
37
38 GLOBALS(
39 long n;
40 char *f;
41
42 size_t indx, findx, lineno;
43 char *filefmt, *prefix;
44 // Variables the context checker need to track between lines
45 size_t btc, tmp;
46 int offset, withld, inf;
47 )
48
abrt(char * err)49 static _Noreturn void abrt(char *err)
50 {
51 // Cycle down through index instead of keeping track of what files we made
52 if (!FLAG(k)) for (; TT.indx>=1; TT.indx--)
53 remove(xmprintf(TT.filefmt, TT.prefix, TT.findx));
54
55 error_exit("%s\n", err);
56 }
57
rgmatch(char * rxrl,char * line,char * fmt)58 static int rgmatch(char *rxrl, char *line, char *fmt)
59 {
60 regex_t rxp;
61 int rr;
62
63 sscanf(rxrl, fmt, toybuf, &TT.offset);
64 xregcomp(&rxp, toybuf, 0);
65 rr = regexec(&rxp, line, 0, 0, 0);
66 if (!rr) return 1;
67 else if (rr == REG_NOMATCH) return 0;
68 abrt("bad regex");
69 }
70
cntxt(char * line,char * rule)71 static int cntxt(char *line, char *rule)
72 {
73 size_t llv;
74 if (TT.indx == toys.optc) return 0;
75
76 if (TT.offset < 0);
77 else if (TT.offset == 0) {
78 TT.offset = -1;
79
80 return 1;
81 } else {
82 TT.offset--;
83
84 return 0;
85 }
86
87 switch (rule[0]) {
88 case '/':
89 return rgmatch(rule, line, "/%[^/%]/%d");
90 break;
91
92 case '%':
93 TT.withld = 1;
94 return rgmatch(rule, line, "%%%[^/%]%%%d");
95
96 case '{':
97 if (TT.indx < 2) abrt("bad rule order");
98
99 if (!strcmp(rule,"{*}")) {
100 TT.btc = -1;
101 TT.inf = 1;
102 } else if (!sscanf(rule,"{%lu}",&TT.btc)) abrt("bad rule");
103
104 if (TT.tmp == -1) TT.tmp = TT.lineno;
105 if ((llv = atoll(toys.optargs[TT.indx-1]))) {
106 if (((TT.lineno-TT.tmp) % llv+1) == llv) {
107 TT.tmp = -1;
108 TT.indx--;
109
110 return 1;
111 } else return 0;
112 }
113
114 if (cntxt(line, toys.optargs[TT.indx-1])) {
115 // Manipulate the rule then return to it later so we create a
116 // new file but are still on the same rule. This is the only
117 // reason why we differentiate between rule and file Index
118 if (TT.btc != 1) {
119 toys.optargs[TT.indx] = xmprintf("{%lu}",TT.btc-1);
120 TT.indx--;
121 }
122 return 1;
123 }
124 return 0;
125
126 default:
127 if (TT.lineno > atoll(rule)) abrt("bad rule order");
128 else if (!(atoll(rule))) abrt("bad rule");
129 else {
130 if (TT.lineno == atoll(rule)) TT.offset++;
131 return 0;
132 }
133 }
134 }
135
csplit_main(void)136 void csplit_main(void)
137 {
138 FILE *actvfile;
139 FILE *fin = (*toys.optargs[0] != '-') ? xfopen(toys.optargs[0], "r") : stdin;
140 char *line;
141 size_t filesize = 0;
142
143 TT.indx = TT.lineno = 1;
144 TT.tmp = TT.offset = -1;
145
146 // -f and -n formatting
147 TT.filefmt = xmprintf("%%s%%0%lud", TT.n ? TT.n : 2);
148 TT.prefix = TT.f ? TT.f : "xx";
149
150 actvfile = xfopen(xmprintf(TT.filefmt, TT.prefix, TT.findx), "w+");
151 for (; (line = xgetline(fin)); free(line)) {
152 TT.lineno++;
153 if (!TT.withld) filesize += strlen(line)+1;
154
155 if (cntxt(line, toys.optargs[TT.indx])) {
156 if (!TT.withld) {
157 fclose(actvfile);
158 if (!FLAG(s)) printf("%ld\n", filesize);
159 filesize = 0;
160 TT.findx++;
161 actvfile = xfopen(xmprintf(TT.filefmt, TT.prefix, TT.findx), "w+");
162 }
163
164 TT.indx++;
165 TT.withld = 0;
166 }
167 if (!TT.withld) fprintf(actvfile, "%s\n", line);
168 }
169 if (!FLAG(s)) printf("%ld\n", filesize);
170
171 // Abort Case: Not All Rules Processed
172 if (!((TT.indx == toys.optc) || TT.inf)) abrt("Rules not processed");
173 }
174