xref: /aosp_15_r20/external/toybox/toys/posix/sort.c (revision cf5a6c84e2b8763fc1a7db14496fd4742913b199)
1 /* sort.c - put input lines into order
2  *
3  * Copyright 2004, 2008 Rob Landley <[email protected]>
4  *
5  * See http://opengroup.org/onlinepubs/007904975/utilities/sort.html
6  *
7  * Deviations from POSIX: Lots.
8  * We invented -x
9 
10 USE_SORT(NEWTOY(sort, USE_SORT_FLOAT("g")"S:T:m" "o:k*t:" "xVbMCcszdfirun", TOYFLAG_USR|TOYFLAG_BIN|TOYFLAG_ARGFAIL(2)))
11 
12 config SORT
13   bool "sort"
14   default y
15   help
16     usage: sort [-runbCcdfiMsxVz] [FILE...] [-k#[,#[x]] [-t X]] [-o FILE]
17 
18     Sort all lines of text from input files (or stdin) to stdout.
19 
20     -r	Reverse
21     -u	Unique lines only
22     -n	Numeric order (instead of alphabetical)
23 
24     -b	Ignore leading blanks (or trailing blanks in second part of key)
25     -C	Check whether input is sorted
26     -c	Warn if input is unsorted
27     -d	Dictionary order (use alphanumeric and whitespace chars only)
28     -f	Force uppercase (case insensitive sort)
29     -i	Ignore nonprinting characters
30     -k	Sort by "key" (see below)
31     -M	Month sort (jan, feb, etc)
32     -o	Output to FILE instead of stdout
33     -s	Skip fallback sort (only sort with keys)
34     -t	Use a key separator other than whitespace
35     -x	Hexadecimal numerical sort
36     -V	Version numbers (name-1.234-rc6.5b.tgz)
37     -z	Zero (null) terminated lines
38 
39     Sorting by key looks at a subset of the words on each line. -k2 uses the
40     second word to the end of the line, -k2,2 looks at only the second word,
41     -k2,4 looks from the start of the second to the end of the fourth word.
42     -k2.4,5 starts from the fourth character of the second word, to the end
43     of the fifth word. Negative values count from the end. Specifying multiple
44     keys uses the later keys as tie breakers, in order. A type specifier
45     appended to a sort key (such as -2,2n) applies only to sorting that key.
46 
47 config SORT_FLOAT
48   bool
49   default y
50   depends on TOYBOX_FLOAT
51   help
52     usage: sort [-g]
53 
54     -g	General numeric sort (double precision with nan and inf)
55 */
56 
57 #define FOR_sort
58 #include "toys.h"
59 
60 GLOBALS(
61   char *t;
62   struct arg_list *k;
63   char *o, *T, S;
64 
65   void *key_list;
66   unsigned linecount;
67   char **lines, *name;
68 )
69 
70 // The sort types are n, g, and M.
71 // u, c, s, and z apply to top level only, not to keys.
72 // b at top level implies bb.
73 // The remaining options can be applied to search keys.
74 
75 #define FLAG_bb (1<<31)  // Ignore trailing blanks
76 
77 struct sort_key {
78   struct sort_key *next_key;  // linked list
79   long range[4];              // start word, start char, end word, end char
80   int flags;
81 };
82 
skip_key(char * str)83 static int skip_key(char *str)
84 {
85   int end = 0;
86 
87   // Skip leading blanks
88   if (str[end] && !TT.t) while (isspace(str[end])) end++;
89 
90   // Skip body of key
91   for (; str[end]; end++) {
92     if (TT.t) {
93       if (str[end]==*TT.t) {
94         end++;
95         break;
96       }
97     } else if (isspace(str[end])) break;
98   }
99 
100   return end;
101 }
102 
103 // Copy of the part of this string corresponding to a key/flags.
104 
get_key_data(char * str,struct sort_key * key,int flags)105 static char *get_key_data(char *str, struct sort_key *key, int flags)
106 {
107   long start = 0, end, len, h, i, j, k;
108 
109   // Special case whole string, so we don't have to make a copy
110   if(key->range[0]==1 && !key->range[1] && !key->range[2] && !key->range[3]
111     && !(flags&(FLAG_b|FLAG_d|FLAG_i|FLAG_bb))) return str;
112 
113   // Find start of key on first pass, end on second pass
114   len = strlen(str);
115   for (j=0; j<2; j++) {
116     if (!(k = key->range[2*j])) end=len;
117 
118     // Loop through fields
119     else {
120       if (k<1) for (end = h = 0;; end += h) {
121         ++k;
122         if (!(h = skip_key(str+end))) break;
123       }
124       if (k<1) end = len*!j;
125       else for (end = 0, i = 1; i<k+j; i++) end += skip_key(str+end);
126     }
127     if (!j) start = end;
128   }
129 
130   // Key with explicit separator starts after the separator
131   if (TT.t && str[start]==*TT.t) start++;
132 
133   // Strip leading and trailing whitespace if necessary
134   if ((flags&FLAG_b) || (!TT.t && !key->range[3]))
135     while (isspace(str[start])) start++;
136   if (flags&FLAG_bb) while (end>start && isspace(str[end-1])) end--;
137 
138   // Handle offsets on start and end
139   if (key->range[3]>0) {
140     end += key->range[3]-1;
141     if (end>len) end=len;
142   }
143   if (key->range[1]>0) {
144     start += key->range[1]-1;
145     if (start>len) start=len;
146   }
147 
148   // Make the copy
149   if (end<start) end = start;
150   str = xstrndup(str+start, end-start);
151 
152   // Handle -d
153   if (flags&FLAG_d) {
154     for (start = end = 0; str[end]; end++)
155       if (isspace(str[end]) || isalnum(str[end])) str[start++] = str[end];
156     str[start] = 0;
157   }
158 
159   // Handle -i
160   if (flags&FLAG_i) {
161     for (start = end = 0; str[end]; end++)
162       if (isprint(str[end])) str[start++] = str[end];
163     str[start] = 0;
164   }
165 
166   return str;
167 }
168 
169 // append a sort_key to key_list.
170 
add_key(void)171 static struct sort_key *add_key(void)
172 {
173   struct sort_key **pkey = (struct sort_key **)&TT.key_list;
174 
175   while (*pkey) pkey = &((*pkey)->next_key);
176   return *pkey = xzalloc(sizeof(struct sort_key));
177 }
178 
179 // Perform actual comparison
compare_values(int flags,char * x,char * y)180 static int compare_values(int flags, char *x, char *y)
181 {
182   if (CFG_SORT_FLOAT && (flags & FLAG_g)) {
183     char *xx,*yy;
184     double dx = strtod(x,&xx), dy = strtod(y,&yy);
185     int xinf, yinf;
186 
187     // not numbers < NaN < -infinity < numbers < +infinity
188 
189     if (x==xx) return y==yy ? 0 : -1;
190     if (y==yy) return 1;
191 
192     // Check for isnan
193     if (dx!=dx) return (dy!=dy) ? 0 : -1;
194     if (dy!=dy) return 1;
195 
196     // Check for infinity.  (Could underflow, but avoids needing libm.)
197     xinf = (1.0/dx == 0.0);
198     yinf = (1.0/dy == 0.0);
199     if (xinf) {
200       if(dx<0) return (yinf && dy<0) ? 0 : -1;
201       return (yinf && dy>0) ? 0 : 1;
202     }
203     if (yinf) return dy<0 ? 1 : -1;
204 
205     return dx<dy ? -1 : dx>dy;
206   } else if (flags & FLAG_M) {
207     struct tm thyme;
208     int dx;
209     char *xx,*yy;
210 
211     xx = strptime(x,"%b",&thyme);
212     dx = thyme.tm_mon;
213     yy = strptime(y,"%b",&thyme);
214     if (!xx) return !yy ? 0 : -1;
215     else if (!yy) return 1;
216     else return dx==thyme.tm_mon ? 0 : dx-thyme.tm_mon;
217 
218   } else if (flags & FLAG_x) return strtol(x, NULL, 16)-strtol(y, NULL, 16);
219   else if (flags & FLAG_V) {
220     while (*x && *y) {
221       while (*x && *x == *y) x++, y++;
222       if (isdigit(*x) && isdigit(*y)) {
223         long long xx = strtoll(x, &x, 10), yy = strtoll(y, &y, 10);
224 
225         if (xx<yy) return -1;
226         if (xx>yy) return 1;
227       } else {
228         char xx = *x ? *x : x[-1], yy = *y ? *y : y[-1];
229 
230         // -rc/-pre hack so abc-123 > abc-123-rc1 (other way already - < 0-9)
231         if (xx != yy) {
232           if (xx<yy && !strstart(&y, "-rc") && !strstart(&y, "-pre")) return -1;
233           else return 1;
234         }
235       }
236     }
237     return *x ? !!*y : -1;
238   // This is actually an integer sort with decimals sorted by string fallback.
239   } else if (flags & FLAG_n) {
240     long long dx = atoll(x), dy = atoll(y);
241 
242     return dx<dy ? -1 : dx>dy;
243 
244   // Ascii sort
245   } else return ((flags&FLAG_f) ? strcasecmp : strcmp)(x, y);
246 }
247 
248 // Callback from qsort(): Iterate through key_list and perform comparisons.
compare_keys(const void * xarg,const void * yarg)249 static int compare_keys(const void *xarg, const void *yarg)
250 {
251   int flags = toys.optflags, retval = 0;
252   char *x, *y, *xx = *(char **)xarg, *yy = *(char **)yarg;
253   struct sort_key *key;
254 
255   for (key=(void *)TT.key_list; !retval && key; key = key->next_key) {
256     flags = key->flags ? : toys.optflags;
257 
258     // Chop out and modify key chunks, handling -dfib
259 
260     x = get_key_data(xx, key, flags);
261     y = get_key_data(yy, key, flags);
262 
263     retval = compare_values(flags, x, y);
264 
265     // Free the copies get_key_data() made.
266 
267     if (x != xx) free(x);
268     if (y != yy) free(y);
269 
270     if (retval) break;
271   }
272 
273   // Perform fallback sort if necessary (always case insensitive, no -f,
274   // the point is to get a stable order even for -f sorts)
275   if (!retval && !FLAG(s)) {
276     flags = toys.optflags;
277     retval = strcmp(xx, yy);
278   }
279 
280   return retval * ((flags&FLAG_r) ? -1 : 1);
281 }
282 
283 // Read each line from file, appending to a big array.
sort_lines(char ** pline,long len)284 static void sort_lines(char **pline, long len)
285 {
286   char *line;
287 
288   if (!pline) return;
289   line = *pline;
290   if (!FLAG(z) && len && line[len-1]=='\n') line[--len] = 0;
291   *pline = 0;
292 
293   // handle -c here so we don't allocate more memory than necessary.
294   if (FLAG(C)||FLAG(c)) {
295     if (TT.lines && compare_keys((void *)&TT.lines, &line)>-FLAG(u)) {
296       toys.exitval = 1;
297       if (FLAG(C)) xexit();
298       error_exit("%s: Check line %u", TT.name, TT.linecount+1);
299     }
300     free(TT.lines);
301     TT.lines = (void *)line;
302   } else {
303     if (!(TT.linecount&63))
304       TT.lines = xrealloc(TT.lines, sizeof(char *)*(TT.linecount+64));
305     TT.lines[TT.linecount] = line;
306   }
307   TT.linecount++;
308 }
309 
310 // Callback from loopfiles to handle input files.
sort_read(int fd,char * name)311 static void sort_read(int fd, char *name)
312 {
313   TT.name = name;
314   do_lines(fd, '\n'*!FLAG(z), sort_lines);
315 }
316 
sort_main(void)317 void sort_main(void)
318 {
319   int idx, jdx, fd = 1;
320 
321   if (FLAG(u)) toys.optflags |= FLAG_s;
322 
323   // Parse -k sort keys.
324   if (TT.k) {
325     struct arg_list *arg;
326 
327     for (arg = TT.k; arg; arg = arg->next) {
328       struct sort_key *key = add_key();
329       char *temp, *temp2, *optlist;
330       int flag;
331 
332       idx = 0;
333       temp = arg->arg;
334       while (*temp) {
335         // Start of range
336         key->range[2*idx] = strtol(temp, &temp, 10);
337         if (*temp=='.') key->range[(2*idx)+1] = strtol(temp+1, &temp, 10);
338 
339         // Handle flags appended to a key type.
340         for (;*temp;temp++) {
341 
342           // Second comma becomes an "Unknown key" error.
343           if (*temp==',' && !idx++) {
344             temp++;
345             break;
346           }
347 
348           // Which flag is this?
349           optlist = toys.which->options;
350           temp2 = strchr(optlist, *temp);
351           flag = 1<<(optlist-temp2+strlen(optlist)-1);
352 
353           // Was it a flag that can apply to a key?
354           if (!temp2 || flag>FLAG_x || (flag&(FLAG_u|FLAG_c|FLAG_s|FLAG_z)))
355             error_exit("Unknown key option.");
356 
357           // b after , means strip _trailing_ space, not leading.
358           if (idx && flag==FLAG_b) flag = FLAG_bb;
359           key->flags |= flag;
360         }
361       }
362     }
363   }
364 
365   // global b flag strips both leading and trailing spaces
366   if (FLAG(b)) toys.optflags |= FLAG_bb;
367 
368   // If no keys, perform alphabetic sort over the whole line.
369   if (!TT.key_list) add_key()->range[0] = 1;
370 
371   // Open input files and read data, populating TT.lines[TT.linecount]
372   loopfiles(toys.optargs, sort_read);
373 
374   // The compare (-c) logic was handled in sort_read(),
375   // so if we got here, we're done.
376   if (FLAG(C)||FLAG(c)) goto exit_now;
377 
378   // Perform the actual sort
379   qsort(TT.lines, TT.linecount, sizeof(char *), compare_keys);
380 
381   // handle unique (-u)
382   if (FLAG(u)) {
383     for (jdx=0, idx=1; idx<TT.linecount; idx++) {
384       if (!compare_keys(&TT.lines[jdx], &TT.lines[idx])) free(TT.lines[idx]);
385       else TT.lines[++jdx] = TT.lines[idx];
386     }
387     if (TT.linecount) TT.linecount = jdx+1;
388   }
389 
390   // Open output file if necessary. We can't do this until we've finished
391   // reading in case the output file is one of the input files.
392   if (TT.o) fd = xcreate(TT.o, O_CREAT|O_TRUNC|O_WRONLY, 0666);
393 
394   // Output result
395   for (idx = 0; idx<TT.linecount; idx++) {
396     char *s = TT.lines[idx];
397     unsigned i = strlen(s);
398 
399     if (!FLAG(z)) s[i] = '\n';
400     xwrite(fd, s, i+1);
401     if (CFG_TOYBOX_FREE) free(s);
402   }
403 
404 exit_now:
405   if (CFG_TOYBOX_FREE) {
406     if (fd != 1) close(fd);
407     free(TT.lines);
408   }
409 }
410