1 /* wc.c - Word count
2 *
3 * Copyright 2011 Rob Landley <[email protected]>
4 *
5 * See http://opengroup.org/onlinepubs/9699919799/utilities/wc.html
6
7 USE_WC(NEWTOY(wc, "Lcmwl", TOYFLAG_USR|TOYFLAG_BIN))
8
9 config WC
10 bool "wc"
11 default y
12 help
13 usage: wc [-Llwcm] [FILE...]
14
15 Count lines, words, and characters in input.
16
17 -L Show max line length
18 -l Show lines
19 -w Show words
20 -c Show bytes
21 -m Show characters
22
23 By default outputs lines, words, bytes, and filename for each
24 argument (or from stdin if none). Displays only either bytes
25 or characters.
26 */
27
28 #define FOR_wc
29 #include "toys.h"
30
GLOBALS(unsigned long totals[5];)31 GLOBALS(
32 unsigned long totals[5];
33 )
34
35 static void show_lengths(unsigned long *lengths, char *name)
36 {
37 int i, space = 0, first = 1;
38
39 // POSIX says there should never be leading spaces, but accepts that
40 // traditional implementations use 7 spaces, unless only one file (or
41 // just stdin) is being counted, when there should be no leading spaces,
42 // *except* for the case where we're going to output multiple numbers.
43 // And, yes, folks have test scripts that rely on all this nonsense :-(
44 // Note: sufficiently modern versions of coreutils wc will use the smallest
45 // column width necessary to have all columns be equal width rather than 0.
46 if (!(!toys.optc && !(toys.optflags & (toys.optflags-1))) && toys.optc!=1)
47 space = 7;
48
49 for (i = 0; i<ARRAY_LEN(TT.totals); i++) {
50 if (toys.optflags&(1<<i)) {
51 printf(" %*ld"+first, space, lengths[i]);
52 first = 0;
53 }
54 if (i==4) TT.totals[i] = maxof(TT.totals[i], lengths[i]);
55 else TT.totals[i] += lengths[i];
56 }
57 if (*toys.optargs) printf(" %s", name);
58 xputc('\n');
59 }
60
do_wc(int fd,char * name)61 static void do_wc(int fd, char *name)
62 {
63 int len = 0, clen = 1, space = 0;
64 unsigned long word = 0, lengths[ARRAY_LEN(TT.totals)] = {0}, line = 0;
65
66 // fast path: wc -c normalfile is file length.
67 if (toys.optflags == FLAG_c) {
68 struct stat st;
69
70 // On Linux, files in /proc often report their size as 0.
71 if (!fstat(fd, &st) && S_ISREG(st.st_mode) && st.st_size) {
72 lengths[3] = st.st_size;
73 goto show;
74 }
75 }
76
77 for (;;) {
78 int pos, done = 0, len2 = read(fd, toybuf+len, sizeof(toybuf)-len);
79 unsigned wchar;
80
81 if (len2<0) perror_msg_raw(name);
82 else len += len2;
83 if (len2<1) done++;
84
85 for (pos = 0; pos<len; pos++) {
86 if (toybuf[pos]=='\n') lengths[0]++;
87 lengths[3]++;
88 if (FLAG(m)||FLAG(L)) {
89 // If we've consumed next wide char
90 if (--clen<1) {
91 // next wide size, don't count invalid, fetch more data if necessary
92 clen = utf8towc(&wchar, toybuf+pos, len-pos);
93 if (clen == -1) continue;
94 if (clen == -2 && !done) break;
95
96 lengths[2]++;
97 line += maxof(wcwidth(wchar), 0);
98 if (wchar=='\t') line += 8-(line&7);
99 else if (wchar=='\n' || wchar=='\r') {
100 if (line>lengths[4]) lengths[4] = line;
101 line = 0;
102 }
103
104 space = iswspace(wchar);
105 }
106 } else space = isspace(toybuf[pos]);
107
108 if (space) word=0;
109 else {
110 if (!word) lengths[1]++;
111 word=1;
112 }
113 }
114 if (done) break;
115 if (pos != len) memmove(toybuf, toybuf+pos, len-pos);
116 len -= pos;
117 }
118 if (line>lengths[4]) lengths[4] = line;
119
120 show:
121 show_lengths(lengths, name);
122 }
123
wc_main(void)124 void wc_main(void)
125 {
126 if (!toys.optflags) toys.optflags = FLAG_l|FLAG_w|FLAG_c;
127 loopfiles(toys.optargs, do_wc);
128 if (toys.optc>1) show_lengths(TT.totals, "total");
129 }
130