1 /* sort.c - put input lines into order
2 *
3 * Copyright 2004, 2008 Rob Landley <[email protected]>
4 *
5 * See http://opengroup.org/onlinepubs/007904975/utilities/sort.html
6 *
7 * Deviations from POSIX: Lots.
8 * We invented -x
9
10 USE_SORT(NEWTOY(sort, USE_SORT_FLOAT("g")"S:T:m" "o:k*t:" "xVbMCcszdfirun", TOYFLAG_USR|TOYFLAG_BIN|TOYFLAG_ARGFAIL(2)))
11
12 config SORT
13 bool "sort"
14 default y
15 help
16 usage: sort [-runbCcdfiMsxVz] [FILE...] [-k#[,#[x]] [-t X]] [-o FILE]
17
18 Sort all lines of text from input files (or stdin) to stdout.
19
20 -r Reverse
21 -u Unique lines only
22 -n Numeric order (instead of alphabetical)
23
24 -b Ignore leading blanks (or trailing blanks in second part of key)
25 -C Check whether input is sorted
26 -c Warn if input is unsorted
27 -d Dictionary order (use alphanumeric and whitespace chars only)
28 -f Force uppercase (case insensitive sort)
29 -i Ignore nonprinting characters
30 -k Sort by "key" (see below)
31 -M Month sort (jan, feb, etc)
32 -o Output to FILE instead of stdout
33 -s Skip fallback sort (only sort with keys)
34 -t Use a key separator other than whitespace
35 -x Hexadecimal numerical sort
36 -V Version numbers (name-1.234-rc6.5b.tgz)
37 -z Zero (null) terminated lines
38
39 Sorting by key looks at a subset of the words on each line. -k2 uses the
40 second word to the end of the line, -k2,2 looks at only the second word,
41 -k2,4 looks from the start of the second to the end of the fourth word.
42 -k2.4,5 starts from the fourth character of the second word, to the end
43 of the fifth word. Negative values count from the end. Specifying multiple
44 keys uses the later keys as tie breakers, in order. A type specifier
45 appended to a sort key (such as -2,2n) applies only to sorting that key.
46
47 config SORT_FLOAT
48 bool
49 default y
50 depends on TOYBOX_FLOAT
51 help
52 usage: sort [-g]
53
54 -g General numeric sort (double precision with nan and inf)
55 */
56
57 #define FOR_sort
58 #include "toys.h"
59
60 GLOBALS(
61 char *t;
62 struct arg_list *k;
63 char *o, *T, S;
64
65 void *key_list;
66 unsigned linecount;
67 char **lines, *name;
68 )
69
70 // The sort types are n, g, and M.
71 // u, c, s, and z apply to top level only, not to keys.
72 // b at top level implies bb.
73 // The remaining options can be applied to search keys.
74
75 #define FLAG_bb (1<<31) // Ignore trailing blanks
76
77 struct sort_key {
78 struct sort_key *next_key; // linked list
79 long range[4]; // start word, start char, end word, end char
80 int flags;
81 };
82
skip_key(char * str)83 static int skip_key(char *str)
84 {
85 int end = 0;
86
87 // Skip leading blanks
88 if (str[end] && !TT.t) while (isspace(str[end])) end++;
89
90 // Skip body of key
91 for (; str[end]; end++) {
92 if (TT.t) {
93 if (str[end]==*TT.t) {
94 end++;
95 break;
96 }
97 } else if (isspace(str[end])) break;
98 }
99
100 return end;
101 }
102
103 // Copy of the part of this string corresponding to a key/flags.
104
get_key_data(char * str,struct sort_key * key,int flags)105 static char *get_key_data(char *str, struct sort_key *key, int flags)
106 {
107 long start = 0, end, len, h, i, j, k;
108
109 // Special case whole string, so we don't have to make a copy
110 if(key->range[0]==1 && !key->range[1] && !key->range[2] && !key->range[3]
111 && !(flags&(FLAG_b|FLAG_d|FLAG_i|FLAG_bb))) return str;
112
113 // Find start of key on first pass, end on second pass
114 len = strlen(str);
115 for (j=0; j<2; j++) {
116 if (!(k = key->range[2*j])) end=len;
117
118 // Loop through fields
119 else {
120 if (k<1) for (end = h = 0;; end += h) {
121 ++k;
122 if (!(h = skip_key(str+end))) break;
123 }
124 if (k<1) end = len*!j;
125 else for (end = 0, i = 1; i<k+j; i++) end += skip_key(str+end);
126 }
127 if (!j) start = end;
128 }
129
130 // Key with explicit separator starts after the separator
131 if (TT.t && str[start]==*TT.t) start++;
132
133 // Strip leading and trailing whitespace if necessary
134 if ((flags&FLAG_b) || (!TT.t && !key->range[3]))
135 while (isspace(str[start])) start++;
136 if (flags&FLAG_bb) while (end>start && isspace(str[end-1])) end--;
137
138 // Handle offsets on start and end
139 if (key->range[3]>0) {
140 end += key->range[3]-1;
141 if (end>len) end=len;
142 }
143 if (key->range[1]>0) {
144 start += key->range[1]-1;
145 if (start>len) start=len;
146 }
147
148 // Make the copy
149 if (end<start) end = start;
150 str = xstrndup(str+start, end-start);
151
152 // Handle -d
153 if (flags&FLAG_d) {
154 for (start = end = 0; str[end]; end++)
155 if (isspace(str[end]) || isalnum(str[end])) str[start++] = str[end];
156 str[start] = 0;
157 }
158
159 // Handle -i
160 if (flags&FLAG_i) {
161 for (start = end = 0; str[end]; end++)
162 if (isprint(str[end])) str[start++] = str[end];
163 str[start] = 0;
164 }
165
166 return str;
167 }
168
169 // append a sort_key to key_list.
170
add_key(void)171 static struct sort_key *add_key(void)
172 {
173 struct sort_key **pkey = (struct sort_key **)&TT.key_list;
174
175 while (*pkey) pkey = &((*pkey)->next_key);
176 return *pkey = xzalloc(sizeof(struct sort_key));
177 }
178
179 // Perform actual comparison
compare_values(int flags,char * x,char * y)180 static int compare_values(int flags, char *x, char *y)
181 {
182 if (CFG_SORT_FLOAT && (flags & FLAG_g)) {
183 char *xx,*yy;
184 double dx = strtod(x,&xx), dy = strtod(y,&yy);
185 int xinf, yinf;
186
187 // not numbers < NaN < -infinity < numbers < +infinity
188
189 if (x==xx) return y==yy ? 0 : -1;
190 if (y==yy) return 1;
191
192 // Check for isnan
193 if (dx!=dx) return (dy!=dy) ? 0 : -1;
194 if (dy!=dy) return 1;
195
196 // Check for infinity. (Could underflow, but avoids needing libm.)
197 xinf = (1.0/dx == 0.0);
198 yinf = (1.0/dy == 0.0);
199 if (xinf) {
200 if(dx<0) return (yinf && dy<0) ? 0 : -1;
201 return (yinf && dy>0) ? 0 : 1;
202 }
203 if (yinf) return dy<0 ? 1 : -1;
204
205 return dx<dy ? -1 : dx>dy;
206 } else if (flags & FLAG_M) {
207 struct tm thyme;
208 int dx;
209 char *xx,*yy;
210
211 xx = strptime(x,"%b",&thyme);
212 dx = thyme.tm_mon;
213 yy = strptime(y,"%b",&thyme);
214 if (!xx) return !yy ? 0 : -1;
215 else if (!yy) return 1;
216 else return dx==thyme.tm_mon ? 0 : dx-thyme.tm_mon;
217
218 } else if (flags & FLAG_x) return strtol(x, NULL, 16)-strtol(y, NULL, 16);
219 else if (flags & FLAG_V) {
220 while (*x && *y) {
221 while (*x && *x == *y) x++, y++;
222 if (isdigit(*x) && isdigit(*y)) {
223 long long xx = strtoll(x, &x, 10), yy = strtoll(y, &y, 10);
224
225 if (xx<yy) return -1;
226 if (xx>yy) return 1;
227 } else {
228 char xx = *x ? *x : x[-1], yy = *y ? *y : y[-1];
229
230 // -rc/-pre hack so abc-123 > abc-123-rc1 (other way already - < 0-9)
231 if (xx != yy) {
232 if (xx<yy && !strstart(&y, "-rc") && !strstart(&y, "-pre")) return -1;
233 else return 1;
234 }
235 }
236 }
237 return *x ? !!*y : -1;
238 // This is actually an integer sort with decimals sorted by string fallback.
239 } else if (flags & FLAG_n) {
240 long long dx = atoll(x), dy = atoll(y);
241
242 return dx<dy ? -1 : dx>dy;
243
244 // Ascii sort
245 } else return ((flags&FLAG_f) ? strcasecmp : strcmp)(x, y);
246 }
247
248 // Callback from qsort(): Iterate through key_list and perform comparisons.
compare_keys(const void * xarg,const void * yarg)249 static int compare_keys(const void *xarg, const void *yarg)
250 {
251 int flags = toys.optflags, retval = 0;
252 char *x, *y, *xx = *(char **)xarg, *yy = *(char **)yarg;
253 struct sort_key *key;
254
255 for (key=(void *)TT.key_list; !retval && key; key = key->next_key) {
256 flags = key->flags ? : toys.optflags;
257
258 // Chop out and modify key chunks, handling -dfib
259
260 x = get_key_data(xx, key, flags);
261 y = get_key_data(yy, key, flags);
262
263 retval = compare_values(flags, x, y);
264
265 // Free the copies get_key_data() made.
266
267 if (x != xx) free(x);
268 if (y != yy) free(y);
269
270 if (retval) break;
271 }
272
273 // Perform fallback sort if necessary (always case insensitive, no -f,
274 // the point is to get a stable order even for -f sorts)
275 if (!retval && !FLAG(s)) {
276 flags = toys.optflags;
277 retval = strcmp(xx, yy);
278 }
279
280 return retval * ((flags&FLAG_r) ? -1 : 1);
281 }
282
283 // Read each line from file, appending to a big array.
sort_lines(char ** pline,long len)284 static void sort_lines(char **pline, long len)
285 {
286 char *line;
287
288 if (!pline) return;
289 line = *pline;
290 if (!FLAG(z) && len && line[len-1]=='\n') line[--len] = 0;
291 *pline = 0;
292
293 // handle -c here so we don't allocate more memory than necessary.
294 if (FLAG(C)||FLAG(c)) {
295 if (TT.lines && compare_keys((void *)&TT.lines, &line)>-FLAG(u)) {
296 toys.exitval = 1;
297 if (FLAG(C)) xexit();
298 error_exit("%s: Check line %u", TT.name, TT.linecount+1);
299 }
300 free(TT.lines);
301 TT.lines = (void *)line;
302 } else {
303 if (!(TT.linecount&63))
304 TT.lines = xrealloc(TT.lines, sizeof(char *)*(TT.linecount+64));
305 TT.lines[TT.linecount] = line;
306 }
307 TT.linecount++;
308 }
309
310 // Callback from loopfiles to handle input files.
sort_read(int fd,char * name)311 static void sort_read(int fd, char *name)
312 {
313 TT.name = name;
314 do_lines(fd, '\n'*!FLAG(z), sort_lines);
315 }
316
sort_main(void)317 void sort_main(void)
318 {
319 int idx, jdx, fd = 1;
320
321 if (FLAG(u)) toys.optflags |= FLAG_s;
322
323 // Parse -k sort keys.
324 if (TT.k) {
325 struct arg_list *arg;
326
327 for (arg = TT.k; arg; arg = arg->next) {
328 struct sort_key *key = add_key();
329 char *temp, *temp2, *optlist;
330 int flag;
331
332 idx = 0;
333 temp = arg->arg;
334 while (*temp) {
335 // Start of range
336 key->range[2*idx] = strtol(temp, &temp, 10);
337 if (*temp=='.') key->range[(2*idx)+1] = strtol(temp+1, &temp, 10);
338
339 // Handle flags appended to a key type.
340 for (;*temp;temp++) {
341
342 // Second comma becomes an "Unknown key" error.
343 if (*temp==',' && !idx++) {
344 temp++;
345 break;
346 }
347
348 // Which flag is this?
349 optlist = toys.which->options;
350 temp2 = strchr(optlist, *temp);
351 flag = 1<<(optlist-temp2+strlen(optlist)-1);
352
353 // Was it a flag that can apply to a key?
354 if (!temp2 || flag>FLAG_x || (flag&(FLAG_u|FLAG_c|FLAG_s|FLAG_z)))
355 error_exit("Unknown key option.");
356
357 // b after , means strip _trailing_ space, not leading.
358 if (idx && flag==FLAG_b) flag = FLAG_bb;
359 key->flags |= flag;
360 }
361 }
362 }
363 }
364
365 // global b flag strips both leading and trailing spaces
366 if (FLAG(b)) toys.optflags |= FLAG_bb;
367
368 // If no keys, perform alphabetic sort over the whole line.
369 if (!TT.key_list) add_key()->range[0] = 1;
370
371 // Open input files and read data, populating TT.lines[TT.linecount]
372 loopfiles(toys.optargs, sort_read);
373
374 // The compare (-c) logic was handled in sort_read(),
375 // so if we got here, we're done.
376 if (FLAG(C)||FLAG(c)) goto exit_now;
377
378 // Perform the actual sort
379 qsort(TT.lines, TT.linecount, sizeof(char *), compare_keys);
380
381 // handle unique (-u)
382 if (FLAG(u)) {
383 for (jdx=0, idx=1; idx<TT.linecount; idx++) {
384 if (!compare_keys(&TT.lines[jdx], &TT.lines[idx])) free(TT.lines[idx]);
385 else TT.lines[++jdx] = TT.lines[idx];
386 }
387 if (TT.linecount) TT.linecount = jdx+1;
388 }
389
390 // Open output file if necessary. We can't do this until we've finished
391 // reading in case the output file is one of the input files.
392 if (TT.o) fd = xcreate(TT.o, O_CREAT|O_TRUNC|O_WRONLY, 0666);
393
394 // Output result
395 for (idx = 0; idx<TT.linecount; idx++) {
396 char *s = TT.lines[idx];
397 unsigned i = strlen(s);
398
399 if (!FLAG(z)) s[i] = '\n';
400 xwrite(fd, s, i+1);
401 if (CFG_TOYBOX_FREE) free(s);
402 }
403
404 exit_now:
405 if (CFG_TOYBOX_FREE) {
406 if (fd != 1) close(fd);
407 free(TT.lines);
408 }
409 }
410