1*cf5a6c84SAndroid Build Coastguard Worker /* cut.c - print selected ranges from a file
2*cf5a6c84SAndroid Build Coastguard Worker *
3*cf5a6c84SAndroid Build Coastguard Worker * Copyright 2016 Rob Landley <[email protected]>
4*cf5a6c84SAndroid Build Coastguard Worker *
5*cf5a6c84SAndroid Build Coastguard Worker * http://pubs.opengroup.org/onlinepubs/9699919799/utilities/cut.html
6*cf5a6c84SAndroid Build Coastguard Worker *
7*cf5a6c84SAndroid Build Coastguard Worker * Deviations from posix: added -DF. We can only accept 512 selections, and
8*cf5a6c84SAndroid Build Coastguard Worker * "-" counts as start to end. Using spaces to separate a comma-separated list
9*cf5a6c84SAndroid Build Coastguard Worker * is silly and inconsistent with dd, ps, cp, and mount.
10*cf5a6c84SAndroid Build Coastguard Worker *
11*cf5a6c84SAndroid Build Coastguard Worker * TODO: -s with -c
12*cf5a6c84SAndroid Build Coastguard Worker
13*cf5a6c84SAndroid Build Coastguard Worker USE_CUT(NEWTOY(cut, "b*|c*|f*|F(regex-fields)*|C*|O(output-delimiter):d:sD(allow-duplicates)n[!cbfF]", TOYFLAG_USR|TOYFLAG_BIN))
14*cf5a6c84SAndroid Build Coastguard Worker
15*cf5a6c84SAndroid Build Coastguard Worker config CUT
16*cf5a6c84SAndroid Build Coastguard Worker bool "cut"
17*cf5a6c84SAndroid Build Coastguard Worker default y
18*cf5a6c84SAndroid Build Coastguard Worker help
19*cf5a6c84SAndroid Build Coastguard Worker usage: cut [-Ds] [-bcCfF LIST] [-dO DELIM] [FILE...]
20*cf5a6c84SAndroid Build Coastguard Worker
21*cf5a6c84SAndroid Build Coastguard Worker Print selected parts of lines from each FILE to standard output.
22*cf5a6c84SAndroid Build Coastguard Worker
23*cf5a6c84SAndroid Build Coastguard Worker Each selection LIST is comma separated, either numbers (counting from 1)
24*cf5a6c84SAndroid Build Coastguard Worker or dash separated ranges (inclusive, with X- meaning to end of line and -X
25*cf5a6c84SAndroid Build Coastguard Worker from start). By default selection ranges are sorted and collated, use -D
26*cf5a6c84SAndroid Build Coastguard Worker to prevent that.
27*cf5a6c84SAndroid Build Coastguard Worker
28*cf5a6c84SAndroid Build Coastguard Worker -b Select bytes (with -n round start/end down to start of utf8 char)
29*cf5a6c84SAndroid Build Coastguard Worker -c Select UTF-8 characters
30*cf5a6c84SAndroid Build Coastguard Worker -C Select unicode columns
31*cf5a6c84SAndroid Build Coastguard Worker -d Input delimiter (default is TAB for -f, run of whitespace for -F)
32*cf5a6c84SAndroid Build Coastguard Worker -D Don't sort/collate selections or match -fF lines without delimiter
33*cf5a6c84SAndroid Build Coastguard Worker -f Select fields (words) separated by single DELIM character
34*cf5a6c84SAndroid Build Coastguard Worker -F Select fields separated by DELIM regex
35*cf5a6c84SAndroid Build Coastguard Worker -O Output separator (default one space for -F, input delim for -f)
36*cf5a6c84SAndroid Build Coastguard Worker -s Skip lines without delimiters
37*cf5a6c84SAndroid Build Coastguard Worker */
38*cf5a6c84SAndroid Build Coastguard Worker #define FOR_cut
39*cf5a6c84SAndroid Build Coastguard Worker #include "toys.h"
40*cf5a6c84SAndroid Build Coastguard Worker
41*cf5a6c84SAndroid Build Coastguard Worker GLOBALS(
42*cf5a6c84SAndroid Build Coastguard Worker char *d, *O;
43*cf5a6c84SAndroid Build Coastguard Worker struct arg_list *select[5]; // we treat them the same, so loop through
44*cf5a6c84SAndroid Build Coastguard Worker
45*cf5a6c84SAndroid Build Coastguard Worker unsigned line;
46*cf5a6c84SAndroid Build Coastguard Worker int pairs;
47*cf5a6c84SAndroid Build Coastguard Worker regex_t reg;
48*cf5a6c84SAndroid Build Coastguard Worker )
49*cf5a6c84SAndroid Build Coastguard Worker
50*cf5a6c84SAndroid Build Coastguard Worker // Apply selections to an input line, producing output
cut_line(char ** pline,long len)51*cf5a6c84SAndroid Build Coastguard Worker static void cut_line(char **pline, long len)
52*cf5a6c84SAndroid Build Coastguard Worker {
53*cf5a6c84SAndroid Build Coastguard Worker unsigned *pairs = (void *)toybuf, wc;
54*cf5a6c84SAndroid Build Coastguard Worker char *line;
55*cf5a6c84SAndroid Build Coastguard Worker int i, j, k;
56*cf5a6c84SAndroid Build Coastguard Worker
57*cf5a6c84SAndroid Build Coastguard Worker if (!pline) return;
58*cf5a6c84SAndroid Build Coastguard Worker line = *pline;
59*cf5a6c84SAndroid Build Coastguard Worker if (len && line[len-1]=='\n') line[--len] = 0;
60*cf5a6c84SAndroid Build Coastguard Worker TT.line++;
61*cf5a6c84SAndroid Build Coastguard Worker
62*cf5a6c84SAndroid Build Coastguard Worker // Loop through selections
63*cf5a6c84SAndroid Build Coastguard Worker for (i=0; i<TT.pairs; i++) {
64*cf5a6c84SAndroid Build Coastguard Worker unsigned start = pairs[2*i], end = pairs[(2*i)+1], count;
65*cf5a6c84SAndroid Build Coastguard Worker char *s = line, *ss, *sss;
66*cf5a6c84SAndroid Build Coastguard Worker
67*cf5a6c84SAndroid Build Coastguard Worker // when the delimiter is \n output lines.
68*cf5a6c84SAndroid Build Coastguard Worker if (*TT.d == '\n') {
69*cf5a6c84SAndroid Build Coastguard Worker if (TT.line<start || TT.line>end) {
70*cf5a6c84SAndroid Build Coastguard Worker if (i+1 == TT.pairs) return;
71*cf5a6c84SAndroid Build Coastguard Worker continue;
72*cf5a6c84SAndroid Build Coastguard Worker }
73*cf5a6c84SAndroid Build Coastguard Worker goto write_line;
74*cf5a6c84SAndroid Build Coastguard Worker }
75*cf5a6c84SAndroid Build Coastguard Worker
76*cf5a6c84SAndroid Build Coastguard Worker // input: start/end position, count=difference between them
77*cf5a6c84SAndroid Build Coastguard Worker // output: s = start of string, len = bytes to output
78*cf5a6c84SAndroid Build Coastguard Worker
79*cf5a6c84SAndroid Build Coastguard Worker if (start) start--;
80*cf5a6c84SAndroid Build Coastguard Worker if (start>=len) continue;
81*cf5a6c84SAndroid Build Coastguard Worker if (!end || end>len) end = len;
82*cf5a6c84SAndroid Build Coastguard Worker count = end-start;
83*cf5a6c84SAndroid Build Coastguard Worker
84*cf5a6c84SAndroid Build Coastguard Worker // Find start and end of output string for the relevant selection type
85*cf5a6c84SAndroid Build Coastguard Worker if (FLAG(b)) {
86*cf5a6c84SAndroid Build Coastguard Worker if (!FLAG(n)) s += start;
87*cf5a6c84SAndroid Build Coastguard Worker else {
88*cf5a6c84SAndroid Build Coastguard Worker if (end>len) end = len;
89*cf5a6c84SAndroid Build Coastguard Worker for (sss = ss = s; (k = (ss-line))<end;) {
90*cf5a6c84SAndroid Build Coastguard Worker if (0>(j = utf8towc(&wc, ss, len))) ss++;
91*cf5a6c84SAndroid Build Coastguard Worker else {
92*cf5a6c84SAndroid Build Coastguard Worker if (((ss += j)-line)<=end) sss = ss;
93*cf5a6c84SAndroid Build Coastguard Worker if ((ss-line)<=start) s = ss;
94*cf5a6c84SAndroid Build Coastguard Worker }
95*cf5a6c84SAndroid Build Coastguard Worker }
96*cf5a6c84SAndroid Build Coastguard Worker if (!(count = sss-s)) continue;
97*cf5a6c84SAndroid Build Coastguard Worker }
98*cf5a6c84SAndroid Build Coastguard Worker } else if (FLAG(C)) {
99*cf5a6c84SAndroid Build Coastguard Worker // crunch_str() currently assumes that combining characters get
100*cf5a6c84SAndroid Build Coastguard Worker // escaped, to provide an unambiguous visual representation.
101*cf5a6c84SAndroid Build Coastguard Worker // This assumes the input string is null terminated.
102*cf5a6c84SAndroid Build Coastguard Worker if (start) crunch_str(&s, start, 0, 0, 0);
103*cf5a6c84SAndroid Build Coastguard Worker if (!*s) continue;
104*cf5a6c84SAndroid Build Coastguard Worker start = s-line;
105*cf5a6c84SAndroid Build Coastguard Worker ss = s;
106*cf5a6c84SAndroid Build Coastguard Worker crunch_str(&ss, count, 0, 0, 0);
107*cf5a6c84SAndroid Build Coastguard Worker count = ss-s;
108*cf5a6c84SAndroid Build Coastguard Worker
109*cf5a6c84SAndroid Build Coastguard Worker } else if (FLAG(c)) {
110*cf5a6c84SAndroid Build Coastguard Worker
111*cf5a6c84SAndroid Build Coastguard Worker // Find start
112*cf5a6c84SAndroid Build Coastguard Worker ss = line+len;
113*cf5a6c84SAndroid Build Coastguard Worker while (start && s<ss) {
114*cf5a6c84SAndroid Build Coastguard Worker if (0<=(j = utf8towc(&wc, s, len))) start--;
115*cf5a6c84SAndroid Build Coastguard Worker s += (j<1) ? 1 : j;
116*cf5a6c84SAndroid Build Coastguard Worker }
117*cf5a6c84SAndroid Build Coastguard Worker if (s == ss) continue;
118*cf5a6c84SAndroid Build Coastguard Worker
119*cf5a6c84SAndroid Build Coastguard Worker // Find end
120*cf5a6c84SAndroid Build Coastguard Worker end = count;
121*cf5a6c84SAndroid Build Coastguard Worker sss = s;
122*cf5a6c84SAndroid Build Coastguard Worker while (end && sss<ss) {
123*cf5a6c84SAndroid Build Coastguard Worker if (0<=(j = utf8towc(&wc, sss, len))) end--;
124*cf5a6c84SAndroid Build Coastguard Worker sss += (j<1) ? 1 : j;
125*cf5a6c84SAndroid Build Coastguard Worker }
126*cf5a6c84SAndroid Build Coastguard Worker count = sss-s;
127*cf5a6c84SAndroid Build Coastguard Worker } else {
128*cf5a6c84SAndroid Build Coastguard Worker regmatch_t match;
129*cf5a6c84SAndroid Build Coastguard Worker
130*cf5a6c84SAndroid Build Coastguard Worker // Loop through skipping appropriate number of fields
131*cf5a6c84SAndroid Build Coastguard Worker for (j = 0; j<2; j++) {
132*cf5a6c84SAndroid Build Coastguard Worker ss = s;
133*cf5a6c84SAndroid Build Coastguard Worker if (j) start = count;
134*cf5a6c84SAndroid Build Coastguard Worker else end = start;
135*cf5a6c84SAndroid Build Coastguard Worker while (*ss && start) {
136*cf5a6c84SAndroid Build Coastguard Worker if (FLAG(f)) {
137*cf5a6c84SAndroid Build Coastguard Worker if (!strchr(TT.d, *ss++)) continue;
138*cf5a6c84SAndroid Build Coastguard Worker if (!--start && j) ss--;
139*cf5a6c84SAndroid Build Coastguard Worker } else {
140*cf5a6c84SAndroid Build Coastguard Worker if (regexec(&TT.reg, ss, 1, &match, REG_NOTBOL|REG_NOTEOL)) {
141*cf5a6c84SAndroid Build Coastguard Worker ss = line+len;
142*cf5a6c84SAndroid Build Coastguard Worker continue;
143*cf5a6c84SAndroid Build Coastguard Worker }
144*cf5a6c84SAndroid Build Coastguard Worker if (!match.rm_eo) break; // zero length match == no delimiter
145*cf5a6c84SAndroid Build Coastguard Worker ss += (!--start && j) ? match.rm_so : match.rm_eo;
146*cf5a6c84SAndroid Build Coastguard Worker }
147*cf5a6c84SAndroid Build Coastguard Worker }
148*cf5a6c84SAndroid Build Coastguard Worker if (!j && !*(s = ss)) break;
149*cf5a6c84SAndroid Build Coastguard Worker }
150*cf5a6c84SAndroid Build Coastguard Worker
151*cf5a6c84SAndroid Build Coastguard Worker // If we never encountered even one separator, print whole line (posix!)
152*cf5a6c84SAndroid Build Coastguard Worker if (!j && end == start) {
153*cf5a6c84SAndroid Build Coastguard Worker if (FLAG(D)) break;
154*cf5a6c84SAndroid Build Coastguard Worker if (FLAG(s)) return;
155*cf5a6c84SAndroid Build Coastguard Worker write_line:
156*cf5a6c84SAndroid Build Coastguard Worker fwrite(line, len, 1, stdout);
157*cf5a6c84SAndroid Build Coastguard Worker break;
158*cf5a6c84SAndroid Build Coastguard Worker } else if (!*s) continue;
159*cf5a6c84SAndroid Build Coastguard Worker count = ss-s;
160*cf5a6c84SAndroid Build Coastguard Worker }
161*cf5a6c84SAndroid Build Coastguard Worker if (i && TT.O) fputs(TT.O, stdout);
162*cf5a6c84SAndroid Build Coastguard Worker fwrite(s, count, 1, stdout);
163*cf5a6c84SAndroid Build Coastguard Worker }
164*cf5a6c84SAndroid Build Coastguard Worker xputc('\n');
165*cf5a6c84SAndroid Build Coastguard Worker }
166*cf5a6c84SAndroid Build Coastguard Worker
compar(unsigned * a,unsigned * b)167*cf5a6c84SAndroid Build Coastguard Worker static int compar(unsigned *a, unsigned *b)
168*cf5a6c84SAndroid Build Coastguard Worker {
169*cf5a6c84SAndroid Build Coastguard Worker if (*a<*b) return -1;
170*cf5a6c84SAndroid Build Coastguard Worker if (*a>*b) return 1;
171*cf5a6c84SAndroid Build Coastguard Worker if (a[1]<b[1]) return -1;
172*cf5a6c84SAndroid Build Coastguard Worker if (a[1]>b[1]) return 1;
173*cf5a6c84SAndroid Build Coastguard Worker
174*cf5a6c84SAndroid Build Coastguard Worker return 0;
175*cf5a6c84SAndroid Build Coastguard Worker }
176*cf5a6c84SAndroid Build Coastguard Worker
177*cf5a6c84SAndroid Build Coastguard Worker // parse A or A-B or A- or -B
get_range(void * data,char * str,int len)178*cf5a6c84SAndroid Build Coastguard Worker static char *get_range(void *data, char *str, int len)
179*cf5a6c84SAndroid Build Coastguard Worker {
180*cf5a6c84SAndroid Build Coastguard Worker char *end = str;
181*cf5a6c84SAndroid Build Coastguard Worker unsigned *pairs = (void *)toybuf, i;
182*cf5a6c84SAndroid Build Coastguard Worker
183*cf5a6c84SAndroid Build Coastguard Worker // Using toybuf[] to store ranges means we can have 512 selections max.
184*cf5a6c84SAndroid Build Coastguard Worker if (TT.pairs == sizeof(toybuf)/sizeof(int)) perror_exit("select limit");
185*cf5a6c84SAndroid Build Coastguard Worker pairs += 2*TT.pairs++;
186*cf5a6c84SAndroid Build Coastguard Worker
187*cf5a6c84SAndroid Build Coastguard Worker pairs[1] = UINT_MAX;
188*cf5a6c84SAndroid Build Coastguard Worker for (i = 0; ;i++) {
189*cf5a6c84SAndroid Build Coastguard Worker if (i==2) return end;
190*cf5a6c84SAndroid Build Coastguard Worker if (isdigit(*end)) {
191*cf5a6c84SAndroid Build Coastguard Worker long long ll = estrtol(end, &end, 10);
192*cf5a6c84SAndroid Build Coastguard Worker
193*cf5a6c84SAndroid Build Coastguard Worker if (ll<1 || ll>UINT_MAX || errno) return end;
194*cf5a6c84SAndroid Build Coastguard Worker pairs[i] = ll;
195*cf5a6c84SAndroid Build Coastguard Worker }
196*cf5a6c84SAndroid Build Coastguard Worker if (*end++ != '-') break;
197*cf5a6c84SAndroid Build Coastguard Worker }
198*cf5a6c84SAndroid Build Coastguard Worker if (!i) pairs[1] = pairs[0];
199*cf5a6c84SAndroid Build Coastguard Worker if ((end-str)<len) return end;
200*cf5a6c84SAndroid Build Coastguard Worker if (pairs[0]>pairs[1]) return str;
201*cf5a6c84SAndroid Build Coastguard Worker
202*cf5a6c84SAndroid Build Coastguard Worker // No error
203*cf5a6c84SAndroid Build Coastguard Worker return 0;
204*cf5a6c84SAndroid Build Coastguard Worker }
205*cf5a6c84SAndroid Build Coastguard Worker
cut_main(void)206*cf5a6c84SAndroid Build Coastguard Worker void cut_main(void)
207*cf5a6c84SAndroid Build Coastguard Worker {
208*cf5a6c84SAndroid Build Coastguard Worker int i;
209*cf5a6c84SAndroid Build Coastguard Worker char buf[8];
210*cf5a6c84SAndroid Build Coastguard Worker
211*cf5a6c84SAndroid Build Coastguard Worker // Parse command line arguments
212*cf5a6c84SAndroid Build Coastguard Worker if ((toys.optflags&(FLAG_s|FLAG_f|FLAG_F))==FLAG_s)
213*cf5a6c84SAndroid Build Coastguard Worker error_exit("-s needs -Ff");
214*cf5a6c84SAndroid Build Coastguard Worker if ((toys.optflags&(FLAG_d|FLAG_f|FLAG_F))==FLAG_d)
215*cf5a6c84SAndroid Build Coastguard Worker error_exit("-d needs -Ff");
216*cf5a6c84SAndroid Build Coastguard Worker if (!TT.d) TT.d = FLAG(F) ? "[[:space:]][[:space:]]*" : "\t";
217*cf5a6c84SAndroid Build Coastguard Worker if (FLAG(F)) xregcomp(&TT.reg, TT.d, REG_EXTENDED);
218*cf5a6c84SAndroid Build Coastguard Worker if (!TT.O) {
219*cf5a6c84SAndroid Build Coastguard Worker if (FLAG(F)) TT.O = " ";
220*cf5a6c84SAndroid Build Coastguard Worker else if (FLAG(f)) TT.O = TT.d;
221*cf5a6c84SAndroid Build Coastguard Worker }
222*cf5a6c84SAndroid Build Coastguard Worker
223*cf5a6c84SAndroid Build Coastguard Worker // Parse ranges, which are attached to a selection type (only one can be set)
224*cf5a6c84SAndroid Build Coastguard Worker for (i = 0; i<ARRAY_LEN(TT.select); i++) {
225*cf5a6c84SAndroid Build Coastguard Worker sprintf(buf, "bad -%c", "CFfcb"[i]); // reverse order from newtoy optstr
226*cf5a6c84SAndroid Build Coastguard Worker if (TT.select[i]) comma_args(TT.select[i], 0, buf, get_range);
227*cf5a6c84SAndroid Build Coastguard Worker }
228*cf5a6c84SAndroid Build Coastguard Worker if (!TT.pairs) error_exit("no selections");
229*cf5a6c84SAndroid Build Coastguard Worker
230*cf5a6c84SAndroid Build Coastguard Worker // Sort and collate selections
231*cf5a6c84SAndroid Build Coastguard Worker if (!FLAG(D)) {
232*cf5a6c84SAndroid Build Coastguard Worker int from, to;
233*cf5a6c84SAndroid Build Coastguard Worker unsigned *pairs = (void *)toybuf;
234*cf5a6c84SAndroid Build Coastguard Worker
235*cf5a6c84SAndroid Build Coastguard Worker qsort(toybuf, TT.pairs, 8, (void *)compar);
236*cf5a6c84SAndroid Build Coastguard Worker for (to = 0, from = 2; from/2 < TT.pairs; from += 2) {
237*cf5a6c84SAndroid Build Coastguard Worker if (pairs[from] > pairs[to+1]) {
238*cf5a6c84SAndroid Build Coastguard Worker to += 2;
239*cf5a6c84SAndroid Build Coastguard Worker memcpy(pairs+to, pairs+from, 2*sizeof(unsigned));
240*cf5a6c84SAndroid Build Coastguard Worker } else if (pairs[from+1] > pairs[to+1]) pairs[to+1] = pairs[from+1];
241*cf5a6c84SAndroid Build Coastguard Worker }
242*cf5a6c84SAndroid Build Coastguard Worker TT.pairs = (to/2)+1;
243*cf5a6c84SAndroid Build Coastguard Worker }
244*cf5a6c84SAndroid Build Coastguard Worker
245*cf5a6c84SAndroid Build Coastguard Worker // For each argument, loop through lines of file and call cut_line() on each
246*cf5a6c84SAndroid Build Coastguard Worker loopfiles_lines(toys.optargs, cut_line);
247*cf5a6c84SAndroid Build Coastguard Worker }
248