xref: /aosp_15_r20/external/toybox/toys/pending/diff.c (revision cf5a6c84e2b8763fc1a7db14496fd4742913b199)
1 /* diff.c - compare files line by line
2  *
3  * Copyright 2014 Sandeep Sharma <[email protected]>
4  * Copyright 2014 Ashwini Kumar <[email protected]>
5  *
6  * See https://pubs.opengroup.org/onlinepubs/9699919799/utilities/diff.html
7  * and https://www.cs.dartmouth.edu/~doug/diff.pdf
8  *
9  * Deviations from posix: always does -u
10 
11 USE_DIFF(NEWTOY(diff, "<2>2(unchanged-line-format):;(old-line-format):;(no-dereference);(new-line-format):;(color)(strip-trailing-cr)B(ignore-blank-lines)d(minimal)b(ignore-space-change)ut(expand-tabs)w(ignore-all-space)i(ignore-case)T(initial-tab)s(report-identical-files)q(brief)a(text)S(starting-file):F(show-function-line):;L(label)*N(new-file)r(recursive)U(unified)#<0=3", TOYFLAG_USR|TOYFLAG_BIN|TOYFLAG_ARGFAIL(2)))
12 
13 config DIFF
14   bool "diff"
15   default n
16   help
17     usage: diff [-abBdiNqrTstw] [-L LABEL] [-S FILE] [-U LINES] [-F REGEX ] FILE1 FILE2
18 
19     -a	Treat all files as text
20     -b	Ignore changes in the amount of whitespace
21     -B	Ignore changes whose lines are all blank
22     -d	Try hard to find a smaller set of changes
23     -F 	Show the most recent line matching the regex
24     -i	Ignore case differences
25     -L	Use LABEL instead of the filename in the unified header
26     -N	Treat absent files as empty
27     -q	Output only whether files differ
28     -r	Recurse
29     -S	Start with FILE when comparing directories
30     -s	Report when two files are the same
31     -T	Make tabs line up by prefixing a tab when necessary
32     -t	Expand tabs to spaces in output
33     -u	Unified diff
34     -U	Output LINES lines of context
35     -w	Ignore all whitespace
36 
37     --color     Color output   --strip-trailing-cr   Strip '\r' from input lines
38     --no-dereference Don't follow symbolic links
39     --TYPE-line-format=FORMAT  Display TYPE (unchanged/old/new) lines using FORMAT
40       FORMAT uses printf integer escapes (ala %-2.4x) followed by LETTER: FELMNn
41     Supported format specifiers are:
42     * %l, the contents of the line, without the trailing newline
43     * %L, the contents of the line, including the trailing newline
44     * %%, the character '%'
45 */
46 
47 #define FOR_diff
48 #include "toys.h"
49 
50 GLOBALS(
51   long U;
52   struct arg_list *L;
53   char *F, *S, *new_line_format, *old_line_format, *unchanged_line_format;
54 
55   int dir_num, size, is_binary, is_symlink, differ, change, len[2], *offset[2];
56   struct stat st[2];
57   struct {
58     char **list;
59     int nr_elm;
60   } dir[2];
61   struct {
62     FILE *fp;
63     int len;
64   } file[2];
65   struct {
66     char *name;
67     int len;
68   } link[2];
69 )
70 
71 #define IS_STDIN(s)     (*(s)=='-' && !(s)[1])
72 
73 struct v_vector {
74   unsigned serial:31;
75   unsigned last:1;
76   union {
77     unsigned hash;
78     unsigned p;
79   };
80 };
81 
82 struct diff {
83   long a, b, c, d, prev, suff;
84 };
85 
86 struct candidate {
87   struct candidate *next, *prev;
88   int a, b;
89 };
90 
91 enum {
92   empty = 1 << 9,
93   eol = 1 << 10,
94   eof = 1 << 11,
95   space = 1 << 12
96 };
97 
xlstat(char * path,struct stat * st)98 void xlstat(char *path, struct stat *st)
99 {
100   if(lstat(path, st)) perror_exit("Can't lstat %s", path);
101 }
102 
comp(void * a,void * b)103 static int comp(void *a, void *b)
104 {
105   int i = ((struct v_vector *)a)->hash - ((struct v_vector *)b)->hash;
106 
107   return i ? : ((struct v_vector *)a)->serial - ((struct v_vector *)b)->serial;
108 }
109 
search(struct candidate ** K,int r,int k,int j)110 static int search(struct candidate **K, int r, int k, int j)
111 {
112   int low = r, upper = k, mid;
113 
114   while (low<=(mid = (low+upper)/2)) {
115     if (K[mid]->b < j && K[mid + 1]->b > j) return mid;
116     if (K[mid]->b < j) low = mid + 1;
117     else if (K[mid]->b > j) upper = mid - 1;
118     else return -1;
119   }
120   return -1;
121 }
122 
new_candidate(int i,int j,struct candidate * prev)123 static struct candidate *new_candidate(int i, int j, struct candidate *prev)
124 {
125   struct candidate *c = xzalloc(sizeof(struct candidate));
126 
127   c->a = i;
128   c->b = j;
129   c->prev = prev;
130   return c;
131 }
132 
133 /* 1. Search K[r: k] for an element K[s] such that K[s]-> b < j and K[s + 1]->b > j
134  * 2. if found do
135  *  2.a. If K[s + 1]->b > j do K[r] = c; r = s+1 and c = candidate(i, j, K[s]) //we have a candidate
136  *  2.b. if s = k (fence reached move it further) do K[k + 2] = K[k + 1], k++
137  * 3. if E[p].last true break i.e we have reached at the end of an equiv class
138  *    else p = p + 1 //keep traversing the equiv class.
139  * 4. K[r] = c //Save the sucessfully filled k-candidate.
140  */
do_merge(struct candidate ** K,int * k,int i,struct v_vector * E,int p)141 static void do_merge(struct candidate **K, int *k, int i,
142     struct v_vector *E, int p)
143 {
144   int r = 0, s, j;
145   struct candidate *pr = 0, *c = K[0];
146 
147   for (;;) {
148     j = E[p].serial;
149     s = search(K, r, *k, j);
150     if (s>=0 && K[s]->b<j && K[s+1]->b>j) {
151       if (K[s+1]->b>j) {
152         pr = K[s];
153         if (r && K[r]) c->next = K[r];
154         K[r] = c;
155         r = s+1;
156         c = new_candidate(i , j, pr);
157       }
158       if (s == *k) {
159         ++*k;
160         K[*k+1] = K[*k];
161         break;
162       }
163     }
164     if (E[p].last) break;
165     else p++;
166   }
167   K[r] = c;
168 }
169 
read_tok(FILE * fp,off_t * off,int tok)170 static int read_tok(FILE *fp, off_t *off, int tok)
171 {
172   int t = 0, is_space;
173 
174   tok |= empty;
175   while (!(tok & eol)) {
176     t = fgetc(fp);
177 
178     if (FLAG(strip_trailing_cr) && t == '\r') {
179       int t2 = fgetc(fp);
180       if (t2 == '\n') {
181         t = t2;
182         if (off) (*off)++;
183       } else {
184         ungetc(t2, fp);
185       }
186     }
187 
188     if (off && t != EOF) *off += 1;
189     is_space = isspace(t) || (t == EOF);
190     tok |= (t & (eof + eol)); //set tok eof+eol when t is eof
191 
192     if (t == '\n') tok |= eol;
193     if (FLAG(i)) if (t >= 'A' && t <= 'Z') t = tolower(t);
194 
195     if (FLAG(w) && is_space) continue;
196 
197     if (FLAG(b)) {
198       if (tok & space) {
199         if (is_space) continue;
200         tok &= ~space;
201       } else if (is_space) t = space + ' ';
202     }
203     tok &= ~(empty + 0xff);  //remove empty and char too.
204     tok |= t; //add most recent char
205     break;
206   }
207 
208   return tok;
209 }
210 
bcomp(void * a,void * b)211 int bcomp(void *a, void *b)
212 {
213   struct v_vector *l = (struct v_vector *)a, *r = (struct v_vector *)b;
214 
215   return (l->hash-r->hash) ? : r[-1].last ? 0 : -1;
216 }
217 
218 /*  file[0] corresponds file 1 and file[1] correspond file 2.
219  * 1. calc hashes for both the files and store them in vector(v[0], v[1])
220  * 2. sort file[1] with hash as primary and serial as sec. key
221  * 3. Form the equivalance class of file[1] stored in e vector. It lists all the equivalence
222  *    classes of lines in file[1], with e.last = true on the last element of each class.
223  *    The elements are ordered by serial within classes.
224  * 4. Form the p vector stored in  p_vector. p_vector[i], if non-zero, now points in e vector
225  *    to the beginning of the equiv class of lines in file[1] equivalent to line
226  *    i in file[0].
227  * 5. Form the k-candidates as discribed in do_merge.
228  * 6. Create a vector J[i] = j, such that i'th line in file[0] is j'th line of
229  *    file[1], i.e J comprises LCS
230  */
create_j_vector()231 static int *create_j_vector()
232 {
233   int tok, i, j, size = 100, k;
234   off_t off;
235   long hash;
236   int *p_vector, *J;
237   struct v_vector *v[2], *e;
238   struct candidate **kcand, *pr;
239 
240   for (i = 0; i < 2; i++) {
241     tok = off = 0;
242     hash = 5831;
243     v[i] = xzalloc(size * sizeof(struct v_vector));
244     TT.offset[i] = xzalloc(size * sizeof(int));
245     TT.file[i].len = 0;
246     if (fseek(TT.file[i].fp, 0, SEEK_SET)) perror_exit("fseek failed");
247 
248     while (1) {
249       tok  = read_tok(TT.file[i].fp, &off, tok);
250       if (!(tok & empty)) {
251         hash = ((hash << 5) + hash) + (tok & 0xff);
252         continue;
253       }
254 
255       if (size == ++TT.file[i].len) {
256         size = size * 11 / 10;
257         v[i] = xrealloc(v[i], size*sizeof(struct v_vector));
258         TT.offset[i] = xrealloc(TT.offset[i], size*sizeof(int));
259       }
260 
261       v[i][TT.file[i].len].hash = hash & INT_MAX;
262       TT.offset[i][TT.file[i].len] = off;
263       if ((tok & eof)) {
264         TT.offset[i][TT.file[i].len] = ++off;
265         break;
266       }
267       hash = 5831;  //next line
268       tok = 0;
269     }
270     if (TT.offset[i][TT.file[i].len]-TT.offset[i][TT.file[i].len-1] == 1)
271       TT.file[i].len--;
272   }
273 
274   for (i = 0; i<=TT.file[1].len; i++) v[1][i].serial = i;
275   qsort(v[1]+1, TT.file[1].len, sizeof(struct v_vector), (void *)comp);
276 
277   e = v[1];
278   e[0].serial = 0;
279   e[0].last = 1;
280   for (i = 1; i<=TT.file[1].len; i++)
281     e[i].last = i==TT.file[1].len || v[1][i].hash!=v[1][i+1].hash;
282 
283   p_vector = xzalloc((TT.file[0].len+2)*sizeof(int));
284   for (i = 1; i<=TT.file[0].len; i++) {
285     void *r = bsearch(&v[0][i], e+1, TT.file[1].len, sizeof(*e), (void *)bcomp);
286     if (r) p_vector[i] = (struct v_vector *)r - e;
287   }
288 
289   for (i = 1; i<=TT.file[0].len; i++) e[i].p = p_vector[i];
290   free(p_vector);
291 
292   size = 100;
293   kcand = xzalloc(size * sizeof(struct candidate *));
294 
295   kcand[0] = new_candidate(0 , 0, 0);
296   kcand[1] = new_candidate(TT.file[0].len+1, TT.file[1].len+1, 0); //the fence
297 
298   k = 0;  //last successfully filled k candidate.
299   for (i = 1; i<=TT.file[0].len; i++) {
300     if (!e[i].p) continue;
301     if ((size - 2) == k) {
302       size = size * 11 / 10;
303       kcand = xrealloc(kcand, (size*sizeof(struct candidate *)));
304     }
305     do_merge(kcand, &k, i, e, e[i].p);
306   }
307   free(v[0]); //no need for v_vector now.
308   free(v[1]);
309 
310   J = xzalloc((TT.file[0].len+2)*sizeof(int));
311 
312   for (pr = kcand[k]; pr; pr = pr->prev) J[pr->a] = pr->b;
313   J[TT.file[0].len+1] = TT.file[1].len+1; //mark boundary
314 
315   for (i = k+1; i>=0; i--) llist_traverse(kcand[i], free);
316   free(kcand);
317 
318   for (i = 1; i<=TT.file[0].len; i++) { // jackpot?
319     if (!J[i]) continue;
320 
321     if (fseek(TT.file[0].fp, TT.offset[0][i-1], SEEK_SET)
322      || fseek(TT.file[1].fp, TT.offset[1][J[i]-1], SEEK_SET))
323        perror_exit("fseek");
324 
325     for (j = J[i]; i<=TT.file[0].len && J[i]==j; i++, j++) {
326       int tok0 = 0, tok1 = 0;
327 
328       do {
329         tok0 = read_tok(TT.file[0].fp, NULL, tok0);
330         tok1 = read_tok(TT.file[1].fp, NULL, tok1);
331         if (((tok0 ^ tok1) & empty) || ((tok0 & 0xff) != (tok1 & 0xff)))
332           J[i] = 0;
333       } while (!(tok0 & tok1 & empty));
334     }
335   }
336   return J;
337 }
338 
diff(char ** files)339 static int *diff(char **files)
340 {
341   size_t i ,j;
342   int s, t;
343   char *bufi, *bufj;
344 
345   TT.is_binary = 0; //loop calls to diff
346   TT.differ = 0;
347 
348   for (i = 0; i < 2; i++) {
349     if ((j = !strcmp(files[i], "-")) || S_ISFIFO(TT.st[i].st_mode)) {
350       char *tmp_name;
351       int srcfd = j ? 0 : open(files[i], O_RDONLY),
352         tmpfd = xtempfile("fifo", &tmp_name);
353 
354       unlink(tmp_name);
355       free(tmp_name);
356 
357       xsendfile(srcfd, tmpfd);
358       if (!j) close(srcfd);
359       TT.file[i].fp = fdopen(tmpfd, "r");
360     } else TT.file[i].fp = fopen(files[i], "r");
361 
362     if (!TT.file[i].fp) {
363       perror_msg("%s", files[i]);
364       TT.differ = 2;
365       return 0; //return SAME
366     }
367   }
368 
369   s = sizeof(toybuf)/2;
370   bufi = toybuf;
371   bufj = toybuf+s;
372 
373   if (fseek(TT.file[0].fp, 0, SEEK_SET) || fseek(TT.file[1].fp, 0, SEEK_SET))
374     perror_exit("fseek");
375 
376   if (FLAG(a)) return create_j_vector();
377 
378   while (1) {
379     i = fread(bufi, 1, s, TT.file[0].fp);
380     j = fread(bufj, 1, s, TT.file[1].fp);
381 
382     if (i != j) TT.differ = 1;
383 
384     for (t = 0; t < i && !TT.is_binary; t++) if (!bufi[t]) TT.is_binary = 1;
385     for (t = 0; t < j && !TT.is_binary; t++) if (!bufj[t]) TT.is_binary = 1;
386 
387     i = minof(i, j);
388     for (t = 0; t < i; t++) if (bufi[t] != bufj[t]) TT.differ = 1;
389 
390     if (!i || !j) break;
391   }
392   if (TT.is_binary || !TT.differ) return 0;
393 
394   return create_j_vector();
395 }
396 
print_line_matching_regex(int a,regex_t * reg,int * off_set,FILE * fp)397 static void print_line_matching_regex(int a, regex_t *reg, int *off_set, FILE *fp) {
398   int i = 0, j = 0, line_buf_size = 100, cc = 0;
399   char* line = xzalloc(line_buf_size * sizeof(char));
400   for (i = a; a > 0; --i) {
401     int line_len = 0;
402     if (fseek(fp, off_set[i - 1], SEEK_SET)) perror_exit("fseek failed");
403     for (j = 0; j < (off_set[i] - off_set[i - 1]); j++) {
404       cc = fgetc(fp);
405       if (cc == EOF || cc == '\n') {
406         break;
407       }
408       ++line_len;
409       if (line_len >= line_buf_size) {
410         line_buf_size = line_buf_size * 11 / 10;
411         line = xrealloc(line, line_buf_size*sizeof(char));
412       }
413       line[j] = cc;
414     }
415     line[line_len] = '\0';
416     if (!regexec0(reg, line, line_len, 0, NULL, 0)) {
417       printf(" %s", line);
418       break;
419     }
420   }
421   free(line);
422 }
423 
print_diff(int a,int b,char c,int * off_set,FILE * fp)424 static void print_diff(int a, int b, char c, int *off_set, FILE *fp)
425 {
426   int i, j, cc, cl;
427   char *reset = 0, *fmt = 0;
428 
429   if (!TT.new_line_format && c!=' ' && FLAG(color)) {
430     printf("\e[%dm", 31+(c=='+'));
431     reset = "\e[0m";
432   }
433 
434   for (i = a; i <= b; i++) {
435     if (fseek(fp, off_set[i - 1], SEEK_SET)) perror_exit("fseek failed");
436     if (TT.new_line_format) {
437       if (c == '+') fmt = TT.new_line_format;
438       else if (c == '-') fmt = TT.old_line_format;
439       else fmt = TT.unchanged_line_format;
440       while (*fmt) {
441         if (*fmt == '%') {
442           fmt++;
443           char f = *fmt++;
444           if (f == '%') putchar('%');
445           else if (f == 'l' || f == 'L') {
446             for (j = 0; j <  (off_set[i] - off_set[i - 1]); j++) {
447               cc = fgetc(fp);
448               if (cc == EOF) break;
449               if (cc != '\n' || f == 'L') putchar(cc);
450             }
451           } else error_exit("Unrecognized format specifier %%%c", f);
452         } else putchar(*fmt++);
453       }
454       continue;
455     }
456     putchar(c);
457     if (FLAG(T)) putchar('\t');
458     for (j = 0, cl = 0; j <  (off_set[i] - off_set[i - 1]); j++) {
459       cc = fgetc(fp);
460       if (cc == EOF) {
461         printf("%s\n\\ No newline at end of file\n", reset ? : "");
462         return;
463       }
464       if ((cc == '\t') && FLAG(t)) do putchar(' '); while (++cl & 7);
465       else {
466         putchar(cc); //xputc has calls to fflush, it hurts performance badly.
467         cl++;
468       }
469     }
470   }
471   if (reset) xputsn(reset);
472 }
473 
concat_file_path(char * path,char * default_path)474 static char *concat_file_path(char *path, char *default_path)
475 {
476   char *final_path;
477 
478   if ('/' == path[strlen(path) - 1]) {
479     while (*default_path == '/') ++default_path;
480     final_path = xmprintf("%s%s", path, default_path);
481   }
482   else if (*default_path != '/')
483     final_path = xmprintf("%s/%s", path, default_path);
484   else final_path = xmprintf("%s%s", path, default_path);
485   return final_path;
486 }
487 
skip(struct dirtree * node)488 static int skip(struct dirtree *node)
489 {
490   int len = strlen(toys.optargs[TT.dir_num]), ret = 0;
491   char *tmp = NULL, *ptr, *f_path = dirtree_path(node, NULL);
492   struct stat st;
493 
494   ptr = f_path;
495   ptr += len;
496   if (ptr[0]) {
497     tmp = concat_file_path(toys.optargs[1 - TT.dir_num], ptr);
498     if (tmp && !stat(tmp, &st)) ret = 0; //it is there on other side
499     else ret = 1; //not present on other side.
500   }
501   free(f_path);
502   if (tmp) free(tmp);
503   return ret; //add otherwise
504 }
505 
add_to_list(struct dirtree * node)506 static void add_to_list(struct dirtree *node)
507 {
508   char *full_path;
509 
510   TT.dir[TT.dir_num].list = xrealloc(TT.dir[TT.dir_num].list,
511       (TT.size + 1)*sizeof(char*));
512   TT.size++;
513   full_path = dirtree_path(node, NULL);
514   TT.dir[TT.dir_num].list[TT.size - 1] = full_path;
515 }
516 
list_dir(struct dirtree * node)517 static int list_dir(struct dirtree *node)
518 {
519   int ret = 0;
520 
521   if (!dirtree_notdotdot(node)) return 0;
522 
523   if (S_ISDIR(node->st.st_mode) && !node->parent) { //add root dirs.
524     add_to_list(node);
525     return (DIRTREE_RECURSE|((FLAG(no_dereference)) ? 0 : DIRTREE_SYMFOLLOW));
526   }
527 
528   if (S_ISDIR(node->st.st_mode) && FLAG(r)) {
529     if (!FLAG(N)) ret = skip(node);
530     if (!ret) return DIRTREE_RECURSE|((FLAG(no_dereference)) ? 0 : DIRTREE_SYMFOLLOW);
531     else {
532       add_to_list(node); //only at one side.
533       return 0;
534     }
535   } else {
536     add_to_list(node);
537     return S_ISDIR(node->st.st_mode) ? 0 : (DIRTREE_RECURSE|((FLAG(no_dereference)) ? 0 : DIRTREE_SYMFOLLOW));
538   }
539 }
540 
cmp(void * p1,void * p2)541 static int cmp(void *p1, void *p2)
542 {
543    return strcmp(*(char **)p1, *(char **)p2);
544 }
545 
546 // quote and escape filenames that have awkward characters
quote_filename(char * filename)547 char *quote_filename(char *filename)
548 {
549   char *to = "abfnrtv\"\\", *from = "\a\b\f\n\r\t\v\"\\", *s, *t=0, *u;
550   int len, quote = 0;
551 
552   for (;;) {
553     // measure escapes on first pass, write on second
554     len = 0;
555     for (s = filename; *s; s++) {
556       if ((u = strchr(from, *s))) {
557         if (t) t[len] = '\\', t[len+1] = to[u-from];
558         len += 2;
559       } else if (*s<0x20 || *s>=0x80)
560         len += snprintf(t+len, 5*!!t, "\\%.3o", *s);
561       else {
562         if (t) t[len] = *s;
563         len++;
564       }
565     }
566     if (t) {
567       if (quote) t[len++] = '"';
568       t[len] = 0;
569 
570       return t-quote;
571     }
572 
573     // construct the new string
574     quote = strlen(filename)!=len || strchr(filename, ' ');
575     t = xmalloc(len+1+2*quote);
576     if (quote) *t++ = '"';
577   }
578 }
579 
show_label(char * prefix,char * filename,struct stat * sb)580 static void show_label(char *prefix, char *filename, struct stat *sb)
581 {
582   char date[36];
583   char *quoted_file;
584 
585   quoted_file = quote_filename(filename);
586   printf("%s %s\t%s\n", prefix, quoted_file,
587     format_iso_time(date, sizeof(date), &sb->st_mtim));
588   free(quoted_file);
589 }
590 
do_symlink_diff(char ** files)591 static void do_symlink_diff(char **files)
592 {
593   size_t i;
594   int s = sizeof(toybuf)/2;
595 
596   TT.is_symlink = 1;
597   TT.differ = 0;
598   TT.link[0].name = TT.link[1].name = NULL;
599   for (i = 0; i < 2; i++) {
600     TT.link[i].name = xreadlink(files[i]);
601     if (TT.link[i].name == 0) {
602       perror_msg("readlink failed");
603       TT.differ = 2;
604       free(TT.link[0].name);
605       return;
606     }
607     TT.link[i].len = strlen(TT.link[i].name);
608   }
609 
610   if (TT.link[0].len != TT.link[1].len) TT.differ = 1;
611   else if (smemcmp(TT.link[0].name, TT.link[1].name, TT.link[0].len))
612     TT.differ = 1;
613   free(TT.link[0].name);
614   free(TT.link[1].name);
615   return;
616 }
617 
do_diff(char ** files)618 static void do_diff(char **files)
619 {
620   long i = 1, size = 1, x = 0, change = 0, ignore_white,
621    start1, end1, start2, end2;
622   struct diff *d;
623   struct arg_list *llist = TT.L;
624   int *J;
625   regex_t reg;
626 
627   TT.offset[0] = TT.offset[1] = NULL;
628   J = diff(files);
629 
630   if (!J) return; //No need to compare, have to status only
631 
632   if (TT.F) {
633     xregcomp(&reg, TT.F, 0);
634   }
635 
636   d = xzalloc(size *sizeof(struct diff));
637   do {
638     ignore_white = 0;
639     for (d[x].a = i; d[x].a<=TT.file[0].len; d[x].a++) {
640       if (J[d[x].a] != (J[d[x].a - 1] + 1)) break;
641       else continue;
642     }
643     d[x].c = (J[d[x].a - 1] + 1);
644 
645     for (d[x].b = (d[x].a - 1); d[x].b<=TT.file[0].len; d[x].b++) {
646       if (J[d[x].b + 1]) break;
647       else continue;
648     }
649     d[x].d = (J[d[x].b + 1] - 1);
650 
651     if (FLAG(B)) {
652       if (d[x].a <= d[x].b) {
653         if ((TT.offset[0][d[x].b] - TT.offset[0][d[x].a - 1])
654             == (d[x].b - d[x].a + 1))
655           ignore_white = 1;
656       } else if (d[x].c <= d[x].d){
657         if ((TT.offset[1][d[x].d] - TT.offset[1][d[x].c - 1])
658             == (d[x].d - d[x].c + 1))
659           ignore_white = 1;
660       }
661     }
662 
663     //is we have diff ?   TODO: lolcat?
664     if ((d[x].a <= d[x].b || d[x].c <= d[x].d) && !ignore_white) change = 1;
665 
666     if (!ignore_white) d = xrealloc(d, (x + 2) *sizeof(struct diff));
667     i = d[x].b + 1;
668     if (i>TT.file[0].len) break;
669     J[d[x].b] = d[x].d;
670     if (!ignore_white) x++;
671   } while (i<=TT.file[0].len);
672 
673   i = x+1;
674   TT.differ = change; //update status, may change bcoz of -w etc.
675 
676   if (!FLAG(q) && change) {
677     if (!TT.new_line_format) {
678       if (FLAG(color)) printf("\e[1m");
679       if (FLAG(L)) printf("--- %s\n", llist->arg);
680       else show_label("---", files[0], &(TT).st[0]);
681       if (!FLAG(L) || !llist->next) show_label("+++", files[1], &(TT).st[1]);
682       else {
683         while (llist->next) llist = llist->next;
684         printf("+++ %s\n", llist->arg);
685       }
686       if (FLAG(color)) printf("\e[0m");
687     }
688 
689     struct diff *t, *ptr1 = d, *ptr2 = d;
690     while (i) {
691       long a,b;
692 
693       // trim context to file len.
694       if (TT.new_line_format || TT.U>TT.file[0].len) TT.U = TT.file[0].len;
695       if (ptr1->b < ptr1->a && ptr1->d < ptr1->c) {
696         i--;
697         continue;
698       }
699       //Handle the context stuff
700       a =  ptr1->a;
701       b = minof(TT.file[0].len, ptr1->b);
702       if (i == x + 1) ptr1->suff = maxof(1, a-TT.U);
703       else if (ptr1[-1].prev >= ptr1->a-TT.U) ptr1->suff = ptr1[-1].prev+1;
704       else ptr1->suff =  ptr1->a-TT.U;
705 calc_ct:
706       if (i > 1) {
707         if ((ptr2->b + TT.U) >= (ptr2  + 1)->a) {
708           ptr2++;
709           i--;
710           goto calc_ct;
711         } else ptr2->prev = ptr2->b + TT.U;
712       } else ptr2->prev = ptr2->b;
713       start1 = (ptr2->prev - ptr1->suff + 1);
714       end1 = (start1 == 1) ? -1 : start1;
715       start2 = maxof(1, ptr1->c - (ptr1->a - ptr1->suff));
716       end2 = ptr2->prev - ptr2->b + ptr2->d;
717 
718       if (!TT.new_line_format) {
719         if (FLAG(color)) printf("\e[36m");
720         printf("@@ -%ld", start1 ? ptr1->suff: (ptr1->suff -1));
721         if (end1 != -1) printf(",%ld ", ptr2->prev-ptr1->suff + 1);
722         else putchar(' ');
723 
724         printf("+%ld", (end2 - start2 + 1) ? start2: (start2 -1));
725         if ((end2 - start2 +1) != 1) printf(",%ld ", (end2 - start2 +1));
726         else putchar(' ');
727         printf("@@");
728         if (FLAG(color)) printf("\e[0m");
729         if (TT.F) {
730           print_line_matching_regex(ptr1->suff-1, &reg, TT.offset[0], TT.file[0].fp);
731         }
732         putchar('\n');
733       }
734 
735       for (t = ptr1; t <= ptr2; t++) {
736         if (t==ptr1) print_diff(t->suff, t->a-1, ' ', TT.offset[0], TT.file[0].fp);
737         print_diff(t->a, t->b, '-', TT.offset[0], TT.file[0].fp);
738         print_diff(t->c, t->d, '+', TT.offset[1], TT.file[1].fp);
739         if (t == ptr2)
740           print_diff(t->b+1, (t)->prev, ' ', TT.offset[0], TT.file[0].fp);
741         else print_diff(t->b+1, (t+1)->a-1, ' ', TT.offset[0], TT.file[0].fp);
742       }
743       ptr2++;
744       ptr1 = ptr2;
745       i--;
746     } //end of while
747   } //End of !FLAG_q
748   free(d);
749   free(J);
750   free(TT.offset[0]);
751   free(TT.offset[1]);
752 }
753 
show_status(char ** files)754 static void show_status(char **files)
755 {
756   if (TT.differ==2) return; // TODO: needed?
757   if (TT.differ ? FLAG(q) || TT.is_binary || TT.is_symlink : FLAG(s))
758     printf("%s %s and %s %s\n", TT.is_symlink ? "Symbolic links" : "Files",
759       files[0], files[1], TT.differ ? "differ" : "are identical");
760 }
761 
create_empty_entry(int l,int r,int j)762 static void create_empty_entry(int l , int r, int j)
763 {
764   struct stat st[2];
765   char *f[2], *path[2];
766   int i;
767 
768   for (i = 0; i < 2; i++) {
769     if (j) {
770       if (!FLAG(N) || i!=(j>0)) continue;
771       path[!i] = concat_file_path(TT.dir[!i].list[0],
772         TT.dir[i].list[i ? r : l]+TT.len[i]);
773       f[!i] = "/dev/null";
774     }
775     path[i] = f[i] = TT.dir[i].list[i ? r : l];
776     (FLAG(no_dereference) ? lstat : stat)(f[i], st+i);
777     if (j) st[!i] = st[i];
778   }
779 
780   for (i = 0; i<2; i++) {
781     if (!S_ISREG(st[i].st_mode) && !S_ISDIR(st[i].st_mode) && !S_ISLNK(st[i].st_mode)) {
782       printf("File %s is not a regular file, symbolic link, or directory and was skipped\n",
783         path[i]);
784       break;
785     }
786   }
787 
788   if (i != 2);
789   else if ((st[0].st_mode & S_IFMT) != (st[1].st_mode & S_IFMT)) {
790     i = S_ISREG(st[0].st_mode) + 2 * S_ISLNK(st[0].st_mode);
791     int k = S_ISREG(st[1].st_mode) + 2 * S_ISLNK(st[1].st_mode);
792     char *fidir[] = {"directory", "regular file", "symbolic link"};
793     printf("File %s is a %s while file %s is a %s\n",
794       path[0], fidir[i], path[1], fidir[k]);
795   } else if (S_ISDIR(st[0].st_mode))
796     printf("Common subdirectories: %s and %s\n", path[0], path[1]);
797   else if (S_ISLNK(st[0].st_mode)) {
798     do_symlink_diff(f);
799     show_status(path);
800   } else {
801     do_diff(f);
802     show_status(path);
803     if (TT.file[0].fp) fclose(TT.file[0].fp);
804     if (TT.file[1].fp) fclose(TT.file[1].fp);
805   }
806 
807   if (FLAG(N) && j) free(path[j<=0]);
808 }
809 
diff_dir(int * start)810 static void diff_dir(int *start)
811 {
812   int l, r, j = 0;
813 
814   l = start[0]; //left side file start
815   r = start[1]; //right side file start
816   while (l < TT.dir[0].nr_elm && r < TT.dir[1].nr_elm) {
817     if ((j = strcmp (TT.dir[0].list[l]+TT.len[0],
818             (TT.dir[1].list[r]+TT.len[1]))) && !FLAG(N)) {
819       if (j > 0) {
820         printf("Only in %s: %s\n", TT.dir[1].list[0], TT.dir[1].list[r]+TT.len[1]);
821         free(TT.dir[1].list[r++]);
822       } else {
823         printf ("Only in %s: %s\n", TT.dir[0].list[0], TT.dir[0].list[l]+TT.len[0]);
824         free(TT.dir[0].list[l++]);
825       }
826       TT.differ = 1;
827     } else {
828       create_empty_entry(l, r, j); //create non empty dirs/files if -N.
829       if (j>=0) free(TT.dir[1].list[r++]);
830       if (j<=0) free(TT.dir[0].list[l++]);
831     }
832   }
833 
834   if (l == TT.dir[0].nr_elm) {
835     while (r<TT.dir[1].nr_elm) {
836       if (!FLAG(N)) {
837         printf ("Only in %s: %s\n", TT.dir[1].list[0], TT.dir[1].list[r]+TT.len[1]);
838         TT.differ = 1;
839       } else create_empty_entry(l, r, 1);
840       free(TT.dir[1].list[r++]);
841     }
842   } else if (r == TT.dir[1].nr_elm) {
843     while (l<TT.dir[0].nr_elm) {
844       if (!FLAG(N)) {
845         printf ("Only in %s: %s\n", TT.dir[0].list[0], TT.dir[0].list[l]+TT.len[0]);
846         TT.differ = 1;
847       } else create_empty_entry(l, r, -1);
848       free(TT.dir[0].list[l++]);
849     }
850   }
851   free(TT.dir[0].list[0]); //we are done, free root nodes too
852   free(TT.dir[0].list);
853   free(TT.dir[1].list[0]);
854   free(TT.dir[1].list);
855 }
856 
diff_main(void)857 void diff_main(void)
858 {
859   int i, j = 0, k = 1, start[2] = {1, 1};
860   char **files = toys.optargs;
861 
862   toys.exitval = 2;
863   if (FLAG(color) && !isatty(1)) toys.optflags ^= FLAG_color;
864 
865   for (j = 0; j < 2; j++) {
866     if (IS_STDIN(files[j])) fstat(0, &TT.st[j]);
867     else if (FLAG(no_dereference)) xlstat(files[j], &TT.st[j]);
868     else xstat(files[j], &TT.st[j]);
869   }
870 
871   if (S_ISDIR(TT.st[0].st_mode) != S_ISDIR(TT.st[1].st_mode))
872     error_exit("can't compare directory to non-directory");
873 
874   if (TT.unchanged_line_format || TT.old_line_format || TT.new_line_format) {
875     if (S_ISDIR(TT.st[0].st_mode) && S_ISDIR(TT.st[1].st_mode))
876       error_exit("can't use line format with directories");
877     if (!TT.unchanged_line_format) TT.unchanged_line_format = "%l\n";
878     if (!TT.old_line_format) TT.old_line_format = "%l\n";
879     if (!TT.new_line_format) TT.new_line_format = "%l\n";
880   }
881 
882   if (same_file(TT.st, TT.st+1)) {
883     toys.exitval = 0;
884     return show_status(files);
885   }
886 
887   if (S_ISDIR(TT.st[0].st_mode) && S_ISDIR(TT.st[1].st_mode)) {
888     for (j = 0; j < 2; j++) {
889       memset(TT.dir+j, 0, sizeof(*TT.dir));
890       dirtree_flagread(files[j], (FLAG(no_dereference)) ? 0 : DIRTREE_SYMFOLLOW, list_dir);
891       TT.dir[j].nr_elm = TT.size; //size updated in list_dir
892       qsort(&TT.dir[j].list[1], TT.size-1, sizeof(char *), (void *)cmp);
893 
894       TT.len[j] = strlen(TT.dir[j].list[0]); //calc root node len
895       TT.len[j] += TT.dir[j].list[0][TT.len[j]-1] != '/';
896 
897       if (FLAG(S)) {
898         while (k<TT.size && strcmp(TT.dir[j].list[k]+TT.len[j], TT.S)<0) {
899           start[j]++;
900           k++;
901         }
902       }
903       TT.dir_num++;
904       TT.size = 0;
905       k = 1;
906     }
907     diff_dir(start);
908   } else {
909     if (S_ISDIR(TT.st[0].st_mode) || S_ISDIR(TT.st[1].st_mode)) {
910       int d = S_ISDIR(TT.st[0].st_mode);
911       char *slash = strrchr(files[d], '/');
912 
913       files[!d] = concat_file_path(files[!d], slash ? slash+1 : files[d]);
914       if ((FLAG(no_dereference) ? lstat : stat)(files[!d], &TT.st[!d]))
915         perror_exit("%s", files[!d]);
916     }
917     if ((S_ISLNK(TT.st[0].st_mode)) != S_ISLNK(TT.st[1].st_mode)) {
918       i = !strcmp(files[0], "-") ? 0 : S_ISREG(TT.st[0].st_mode) + 2 * S_ISLNK(TT.st[0].st_mode);
919       int k = !strcmp(files[0], "-") ? 0 : S_ISREG(TT.st[1].st_mode) + 2 * S_ISLNK(TT.st[1].st_mode);
920       char *fidir[] = {"fifo", "regular file", "symbolic link"};
921       printf("File %s is a %s while file %s is a %s\n",
922         files[0], fidir[i], files[1], fidir[k]);
923       TT.differ = 1;
924     } else {
925       if (S_ISLNK(TT.st[0].st_mode)) do_symlink_diff(files);
926       else do_diff(files);
927       show_status(files);
928     }
929     if (TT.file[0].fp) fclose(TT.file[0].fp);
930     if (TT.file[1].fp) fclose(TT.file[1].fp);
931   }
932   toys.exitval = TT.differ; //exit status will be the status
933 }
934