xref: /aosp_15_r20/external/toybox/toys/pending/tr.c (revision cf5a6c84e2b8763fc1a7db14496fd4742913b199)
1 /* tr.c - translate or delete characters
2  *
3  * Copyright 2014 Sandeep Sharma <[email protected]>
4  *
5  * See http://pubs.opengroup.org/onlinepubs/9699919799/utilities/tr.html
6  * TODO: -a (ascii)
7 
8 USE_TR(NEWTOY(tr, "^<1>2Ccstd[+cC]", TOYFLAG_USR|TOYFLAG_BIN))
9 
10 config TR
11   bool "tr"
12   default n
13   help
14     usage: tr [-cdst] SET1 [SET2]
15 
16     Translate, squeeze, or delete characters from stdin, writing to stdout
17 
18     -c/-C  Take complement of SET1
19     -d     Delete input characters coded SET1
20     -s     Squeeze multiple output characters of SET2 into one character
21     -t     Truncate SET1 to length of SET2
22 */
23 
24 #define FOR_tr
25 #include "toys.h"
26 
27 GLOBALS(
28   short *map;
29   int len1, len2;
30 )
31 
32 enum {
33   class_alpha, class_alnum, class_digit,
34   class_lower,class_upper,class_space,class_blank,
35   class_punct,class_cntrl,class_xdigit,class_invalid
36 };
37 
map_translation(char * set1,char * set2)38 static void map_translation(char *set1 , char *set2)
39 {
40   int i = TT.len1, k = 0;
41 
42   if (FLAG(d))
43     for (; i; i--, k++) TT.map[set1[k]] = set1[k]|0x100; //set delete bit
44 
45   if (FLAG(s)) {
46     for (i = TT.len1, k = 0; i; i--, k++)
47       TT.map[set1[k]] = TT.map[set1[k]]|0x200;
48     for (i = TT.len2, k = 0; i; i--, k++)
49       TT.map[set2[k]] = TT.map[set2[k]]|0x200;
50   }
51   i = k = 0;
52   while (!FLAG(d) && set2 && TT.len1--) { //ignore set2 if -d present
53     TT.map[set1[i]] = ((TT.map[set1[i]] & 0xFF00) | set2[k]);
54     if (set2[k + 1]) k++;
55     i++;
56   }
57 }
58 
handle_escape_char(char ** esc_val)59 static int handle_escape_char(char **esc_val) //taken from printf
60 {
61   char *ptr = *esc_val;
62   int esc_length = 0;
63   unsigned  base = 0, num = 0, result = 0, count = 0;
64 
65   if (*ptr == 'x') {
66     ptr++;
67     esc_length++;
68     base = 16;
69   } else if (isdigit(*ptr)) base = 8;
70 
71   while (esc_length < 3 && base) {
72     num = tolower(*ptr) - '0';
73     if (num > 10) num += ('0' - 'a' + 10);
74     if (num >= base) {
75       if (base == 16) {
76         esc_length--;
77         if (!esc_length) {// Invalid hex value eg. /xvd, print as it is /xvd
78           result = '\\';
79           ptr--;
80         }
81       }
82       break;
83     }
84     esc_length++;
85     result = (char)(count = (count * base) + num);
86     ptr++;
87   }
88   if (base) ptr--;
89   else if (!(result = unescape(*ptr))) {
90     result = '\\';
91     ptr--;
92   }
93   *esc_val = ptr;
94   return result;
95 }
96 
find_class(char * class_name)97 static int find_class(char *class_name)
98 {
99   int i;
100   static char *class[] = {
101     "[:alpha:]","[:alnum:]","[:digit:]", "[:lower:]","[:upper:]","[:space:]",
102     "[:blank:]","[:punct:]","[:cntrl:]", "[:xdigit:]"
103   };
104 
105   for (i = 0; i != class_invalid; i++)
106     if (!memcmp(class_name, class[i], 9+(*class_name == 'x'))) break;
107 
108   return i;
109 }
110 
expand_set(char * arg,int * len,size_t until)111 static char *expand_set(char *arg, int *len, size_t until)
112 {
113   int i = 0, j, k, size = 256;
114   char *set = xzalloc(size), *orig = arg;
115 
116   while (*arg) {
117     if (arg-orig >= until) break;
118     if (i >= size) {
119       size += 256;
120       set = xrealloc(set, size);
121     }
122     if (*arg == '\\') {
123       arg++;
124       set[i++] = handle_escape_char(&arg);
125       arg++;
126       continue;
127     }
128     if (arg[1] == '-') {
129       if (!arg[2]) goto save;
130       j = *arg;
131       k = arg[2];
132       if (j > k) perror_exit("reverse colating order");
133       while (j <= k) set[i++] = j++;
134       arg += 3;
135       continue;
136     }
137     if (*arg == '[' && arg[1] == ':') {
138 
139       if ((j = find_class(arg)) == class_invalid) goto save;
140 
141       if ((j == class_alpha) || (j == class_upper) || (j == class_alnum))
142         for (k = 'A'; k <= 'Z'; k++) set[i++] = k;
143       if ((j == class_alpha) || (j == class_lower) || (j == class_alnum))
144         for (k = 'a'; k <= 'z'; k++) set[i++] = k;
145       if ((j == class_alnum) || (j == class_digit) || (j == class_xdigit))
146         for (k = '0'; k <= '9'; k++) set[i++] = k;
147       if (j == class_space || j == class_blank) {
148         set[i++] = '\t';
149         if (j == class_space) {
150           set[i++] = '\n';
151           set[i++] = '\f';
152           set[i++] = '\r';
153           set[i++] = '\v';
154         }
155         set[i++] = ' ';
156       }
157       if (j == class_punct)
158         for (k = 0; k <= 255; k++) if (ispunct(k)) set[i++] = k;
159       if (j == class_cntrl)
160         for (k = 0; k <= 255; k++) if (iscntrl(k)) set[i++] = k;
161       if (j == class_xdigit) {
162         for (k = 'A'; k <= 'F'; k++) {
163           set[i + 6] = k | 0x20;
164           set[i++] = k;
165         }
166         i += 6;
167         arg += 10;
168         continue;
169       }
170 
171       arg += 9; //never here for class_xdigit.
172       continue;
173     }
174     if (*arg == '[' && arg[1] == '=') { //[=char=] only
175       arg += 2;
176       if (*arg) set[i++] = *arg;
177       if (!arg[1] || arg[1] != '=' || arg[2] != ']')
178         error_exit("bad equiv class");
179       continue;
180     }
181 save:
182     set[i++] = *arg++;
183   }
184   *len = i;
185   return set;
186 }
187 
print_map(char * set1,char * set2)188 static void print_map(char *set1, char *set2)
189 {
190   int n, ch, src, dst, prev = -1;
191 
192   while ((n = read(0, toybuf, sizeof(toybuf)))) {
193     if (!FLAG(d) && !FLAG(s))
194       for (dst = 0; dst < n; dst++) toybuf[dst] = TT.map[toybuf[dst]];
195     else for (src = dst = 0; src < n; src++) {
196       ch = TT.map[toybuf[src]];
197       if (FLAG(d) && (ch & 0x100)) continue;
198       if (FLAG(s) && ((ch & 0x200) && prev == ch)) continue;
199       toybuf[dst++] = prev = ch;
200     }
201     xwrite(1, toybuf, dst);
202   }
203 }
204 
do_complement(char ** set)205 static void do_complement(char **set)
206 {
207   int i = 0, j = 0;
208   char *comp = xmalloc(256);
209 
210   for (; i < 256; i++) {
211     if (memchr(*set, i, TT.len1)) continue;
212     else comp[j++] = (char)i;
213   }
214   free(*set);
215   TT.len1 = j;
216   *set = comp;
217 }
218 
tr_main(void)219 void tr_main(void)
220 {
221   char *set1, *set2 = NULL;
222   int i = 0;
223 
224   TT.map = xmalloc(256*sizeof(*TT.map));
225   for (; i < 256; i++) TT.map[i] = i; //init map
226 
227   set1 = expand_set(*toys.optargs, &TT.len1,
228       (FLAG(t) && toys.optargs[1]) ? strlen(toys.optargs[1]) : -1);
229   if (FLAG(c)) do_complement(&set1);
230   if (toys.optargs[1]) {
231     if (!*toys.optargs[1]) error_exit("set2 can't be empty string");
232     set2 = expand_set(toys.optargs[1], &TT.len2, -1);
233   }
234   map_translation(set1, set2);
235 
236   print_map(set1, set2);
237   free(set1);
238   free(set2);
239 }
240