xref: /aosp_15_r20/external/pcre/src/pcre2grep.c (revision 22dc650d8ae982c6770746019a6f94af92b0f024)
1 /*************************************************
2 *               pcre2grep program                *
3 *************************************************/
4 
5 /* This is a grep program that uses the 8-bit PCRE regular expression library
6 via the PCRE2 updated API to do its pattern matching. On Unix-like, Windows,
7 and native z/OS systems it can recurse into directories, and in z/OS it can
8 handle PDS files.
9 
10 Note that for native z/OS, in addition to defining the NATIVE_ZOS macro, an
11 additional header is required. That header is not included in the main PCRE2
12 distribution because other apparatus is needed to compile pcre2grep for z/OS.
13 The header can be found in the special z/OS distribution, which is available
14 from www.zaconsultants.net or from www.cbttape.org.
15 
16            Copyright (c) 1997-2023 University of Cambridge
17 
18 -----------------------------------------------------------------------------
19 Redistribution and use in source and binary forms, with or without
20 modification, are permitted provided that the following conditions are met:
21 
22     * Redistributions of source code must retain the above copyright notice,
23       this list of conditions and the following disclaimer.
24 
25     * Redistributions in binary form must reproduce the above copyright
26       notice, this list of conditions and the following disclaimer in the
27       documentation and/or other materials provided with the distribution.
28 
29     * Neither the name of the University of Cambridge nor the names of its
30       contributors may be used to endorse or promote products derived from
31       this software without specific prior written permission.
32 
33 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
34 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
35 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
36 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
37 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
38 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
39 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
40 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
41 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
42 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
43 POSSIBILITY OF SUCH DAMAGE.
44 -----------------------------------------------------------------------------
45 */
46 
47 #ifdef HAVE_CONFIG_H
48 #include "config.h"
49 #endif
50 
51 #include <ctype.h>
52 #include <locale.h>
53 #include <stdio.h>
54 #include <string.h>
55 #include <stdlib.h>
56 #include <errno.h>
57 
58 #include <sys/types.h>
59 #include <sys/stat.h>
60 
61 #if (defined _WIN32 || (defined HAVE_WINDOWS_H && HAVE_WINDOWS_H)) \
62   && !defined WIN32 && !defined(__CYGWIN__)
63 #define WIN32
64 #endif
65 
66 /* Some CMake's define it still */
67 #if defined(__CYGWIN__) && defined(WIN32)
68 #undef WIN32
69 #endif
70 
71 #ifdef __VMS
72 #include clidef
73 #include descrip
74 #include lib$routines
75 #endif
76 
77 #ifdef WIN32
78 #include <io.h>                /* For _setmode() */
79 #include <fcntl.h>             /* For _O_BINARY */
80 #endif
81 
82 #if defined(SUPPORT_PCRE2GREP_CALLOUT) && defined(SUPPORT_PCRE2GREP_CALLOUT_FORK)
83 #ifdef WIN32
84 #include <process.h>
85 #else
86 #include <sys/wait.h>
87 #endif
88 #endif
89 
90 #ifdef HAVE_UNISTD_H
91 #include <unistd.h>
92 #endif
93 
94 #ifdef SUPPORT_LIBZ
95 #include <zlib.h>
96 #endif
97 
98 #ifdef SUPPORT_LIBBZ2
99 #include <bzlib.h>
100 #endif
101 
102 #define PCRE2_CODE_UNIT_WIDTH 8
103 #include "pcre2.h"
104 
105 /* Older versions of MSVC lack snprintf(). This define allows for
106 warning/error-free compilation and testing with MSVC compilers back to at least
107 MSVC 10/2010. Except for VC6 (which is missing some fundamentals and fails). */
108 
109 #if defined(_MSC_VER) && (_MSC_VER < 1900)
110 #define snprintf _snprintf
111 #endif
112 
113 /* old VC and older compilers don't support %td or %zu, and even some that claim to
114 be C99 don't support it (hence DISABLE_PERCENT_ZT). */
115 
116 #if defined(DISABLE_PERCENT_ZT) || (defined(_MSC_VER) && (_MSC_VER < 1800)) || \
117   (!defined(_MSC_VER) && (!defined(__STDC_VERSION__) || __STDC_VERSION__ < 199901L))
118 #ifdef _WIN64
119 #define SIZ_FORM "llu"
120 #else
121 #define SIZ_FORM "lu"
122 #endif
123 #else
124 #define SIZ_FORM "zu"
125 #endif
126 
127 #define FALSE 0
128 #define TRUE 1
129 
130 typedef int BOOL;
131 
132 #define DEFAULT_CAPTURE_MAX 50
133 
134 #if BUFSIZ > 8192
135 #define MAXPATLEN BUFSIZ
136 #else
137 #define MAXPATLEN 8192
138 #endif
139 
140 #define FNBUFSIZ 2048
141 #define ERRBUFSIZ 256
142 
143 /* Values for the "filenames" variable, which specifies options for file name
144 output. The order is important; it is assumed that a file name is wanted for
145 all values greater than FN_DEFAULT. */
146 
147 enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
148 
149 /* File reading styles */
150 
151 enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
152 
153 /* Actions for the -d and -D options */
154 
155 enum { dee_READ, dee_SKIP, dee_RECURSE };
156 enum { DEE_READ, DEE_SKIP };
157 
158 /* Actions for special processing options (flag bits) */
159 
160 #define PO_WORD_MATCH     0x0001
161 #define PO_LINE_MATCH     0x0002
162 #define PO_FIXED_STRINGS  0x0004
163 
164 /* Binary file options */
165 
166 enum { BIN_BINARY, BIN_NOMATCH, BIN_TEXT };
167 
168 /* Return values from decode_dollar_escape() */
169 
170 enum { DDE_ERROR, DDE_CAPTURE, DDE_CHAR };
171 
172 /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
173 environments), a warning is issued if the value of fwrite() is ignored.
174 Unfortunately, casting to (void) does not suppress the warning. To get round
175 this, we use a macro that compiles a fudge. Oddly, this does not also seem to
176 apply to fprintf(). */
177 
178 #define FWRITE_IGNORE(a,b,c,d) if (fwrite(a,b,c,d)) {}
179 
180 /* Under Windows, we have to set stdout to be binary, so that it does not
181 convert \r\n at the ends of output lines to \r\r\n. However, that means that
182 any messages written to stdout must have \r\n as their line terminator. This is
183 handled by using STDOUT_NL as the newline string. We also use a normal double
184 quote for the example, as single quotes aren't usually available. */
185 
186 #ifdef WIN32
187 #define STDOUT_NL     "\r\n"
188 #define STDOUT_NL_LEN  2
189 #define QUOT          "\""
190 #else
191 #define STDOUT_NL      "\n"
192 #define STDOUT_NL_LEN  1
193 #define QUOT           "'"
194 #endif
195 
196 /* This code is returned from decode_dollar_escape() when $n is encountered,
197 and used to mean "output STDOUT_NL". It is, of course, not a valid Unicode code
198 point. */
199 
200 #define STDOUT_NL_CODE 0x7fffffffu
201 
202 
203 
204 /*************************************************
205 *               Global variables                 *
206 *************************************************/
207 
208 static const char *colour_string = "1;31";
209 static const char *colour_option = NULL;
210 static const char *dee_option = NULL;
211 static const char *DEE_option = NULL;
212 static const char *locale = NULL;
213 static const char *newline_arg = NULL;
214 static const char *group_separator = "--";
215 static const char *om_separator = NULL;
216 static const char *stdin_name = "(standard input)";
217 static const char *output_text = NULL;
218 
219 static char *main_buffer = NULL;
220 
221 static const char *printname_nl = STDOUT_NL;  /* Changed to NULL for -Z */
222 static int printname_colon = ':';             /* Changed to 0 for -Z */
223 static int printname_hyphen = '-';            /* Changed to 0 for -Z */
224 
225 static int after_context = 0;
226 static int before_context = 0;
227 static int binary_files = BIN_BINARY;
228 static int both_context = 0;
229 static int endlinetype;
230 
231 static int count_limit = -1;  /* Not long, so that it works with OP_NUMBER */
232 static unsigned long int counts_printed = 0;
233 static unsigned long int total_count = 0;
234 
235 static PCRE2_SIZE bufthird = PCRE2GREP_BUFSIZE;
236 static PCRE2_SIZE max_bufthird = PCRE2GREP_MAX_BUFSIZE;
237 static PCRE2_SIZE bufsize = 3*PCRE2GREP_BUFSIZE;
238 
239 #ifdef WIN32
240 static int dee_action = dee_SKIP;
241 #else
242 static int dee_action = dee_READ;
243 #endif
244 
245 static int DEE_action = DEE_READ;
246 static int error_count = 0;
247 static int filenames = FN_DEFAULT;
248 
249 #ifdef SUPPORT_PCRE2GREP_JIT
250 static BOOL use_jit = TRUE;
251 #else
252 static BOOL use_jit = FALSE;
253 #endif
254 
255 static const uint8_t *character_tables = NULL;
256 
257 static uint32_t pcre2_options = 0;
258 static uint32_t extra_options = 0;
259 static PCRE2_SIZE heap_limit = PCRE2_UNSET;
260 static uint32_t match_limit = 0;
261 static uint32_t depth_limit = 0;
262 
263 static pcre2_compile_context *compile_context;
264 static pcre2_match_context *match_context;
265 static pcre2_match_data *match_data, *match_data_pair[2];
266 static PCRE2_SIZE *offsets, *offsets_pair[2];
267 static int match_data_toggle;
268 static uint32_t offset_size;
269 static uint32_t capture_max = DEFAULT_CAPTURE_MAX;
270 
271 static BOOL all_matches = FALSE;
272 static BOOL case_restrict = FALSE;
273 static BOOL count_only = FALSE;
274 static BOOL do_colour = FALSE;
275 #ifdef WIN32
276 static BOOL do_ansi = FALSE;
277 #endif
278 static BOOL file_offsets = FALSE;
279 static BOOL hyphenpending = FALSE;
280 static BOOL invert = FALSE;
281 static BOOL line_buffered = FALSE;
282 static BOOL line_offsets = FALSE;
283 static BOOL multiline = FALSE;
284 static BOOL no_ucp = FALSE;
285 static BOOL number = FALSE;
286 static BOOL omit_zero_count = FALSE;
287 static BOOL resource_error = FALSE;
288 static BOOL quiet = FALSE;
289 static BOOL show_total_count = FALSE;
290 static BOOL silent = FALSE;
291 static BOOL utf = FALSE;
292 static BOOL posix_digit = FALSE;
293 
294 static uint8_t utf8_buffer[8];
295 
296 
297 /* Structure for list of --only-matching capturing numbers. */
298 
299 typedef struct omstr {
300   struct omstr *next;
301   int groupnum;
302 } omstr;
303 
304 static omstr *only_matching = NULL;
305 static omstr *only_matching_last = NULL;
306 static int only_matching_count;
307 
308 /* Structure for holding the two variables that describe a number chain. */
309 
310 typedef struct omdatastr {
311   omstr **anchor;
312   omstr **lastptr;
313 } omdatastr;
314 
315 static omdatastr only_matching_data = { &only_matching, &only_matching_last };
316 
317 /* Structure for list of file names (for -f and --{in,ex}clude-from) */
318 
319 typedef struct fnstr {
320   struct fnstr *next;
321   char *name;
322 } fnstr;
323 
324 static fnstr *exclude_from = NULL;
325 static fnstr *exclude_from_last = NULL;
326 static fnstr *include_from = NULL;
327 static fnstr *include_from_last = NULL;
328 
329 static fnstr *file_lists = NULL;
330 static fnstr *file_lists_last = NULL;
331 static fnstr *pattern_files = NULL;
332 static fnstr *pattern_files_last = NULL;
333 
334 /* Structure for holding the two variables that describe a file name chain. */
335 
336 typedef struct fndatastr {
337   fnstr **anchor;
338   fnstr **lastptr;
339 } fndatastr;
340 
341 static fndatastr exclude_from_data = { &exclude_from, &exclude_from_last };
342 static fndatastr include_from_data = { &include_from, &include_from_last };
343 static fndatastr file_lists_data = { &file_lists, &file_lists_last };
344 static fndatastr pattern_files_data = { &pattern_files, &pattern_files_last };
345 
346 /* Structure for pattern and its compiled form; used for matching patterns and
347 also for include/exclude patterns. */
348 
349 typedef struct patstr {
350   struct patstr *next;
351   char *string;
352   PCRE2_SIZE length;
353   pcre2_code *compiled;
354 } patstr;
355 
356 static patstr *patterns = NULL;
357 static patstr *patterns_last = NULL;
358 static patstr *include_patterns = NULL;
359 static patstr *include_patterns_last = NULL;
360 static patstr *exclude_patterns = NULL;
361 static patstr *exclude_patterns_last = NULL;
362 static patstr *include_dir_patterns = NULL;
363 static patstr *include_dir_patterns_last = NULL;
364 static patstr *exclude_dir_patterns = NULL;
365 static patstr *exclude_dir_patterns_last = NULL;
366 
367 /* Structure holding the two variables that describe a pattern chain. A pointer
368 to such structures is used for each appropriate option. */
369 
370 typedef struct patdatastr {
371   patstr **anchor;
372   patstr **lastptr;
373 } patdatastr;
374 
375 static patdatastr match_patdata = { &patterns, &patterns_last };
376 static patdatastr include_patdata = { &include_patterns, &include_patterns_last };
377 static patdatastr exclude_patdata = { &exclude_patterns, &exclude_patterns_last };
378 static patdatastr include_dir_patdata = { &include_dir_patterns, &include_dir_patterns_last };
379 static patdatastr exclude_dir_patdata = { &exclude_dir_patterns, &exclude_dir_patterns_last };
380 
381 static patstr **incexlist[4] = { &include_patterns, &exclude_patterns,
382                                  &include_dir_patterns, &exclude_dir_patterns };
383 
384 static const char *incexname[4] = { "--include", "--exclude",
385                                     "--include-dir", "--exclude-dir" };
386 
387 /* Structure for options and list of them */
388 
389 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_U32NUMBER, OP_SIZE,
390        OP_OP_NUMBER, OP_OP_NUMBERS, OP_PATLIST, OP_FILELIST, OP_BINFILES };
391 
392 typedef struct option_item {
393   int type;
394   int one_char;
395   void *dataptr;
396   const char *long_name;
397   const char *help_text;
398 } option_item;
399 
400 /* Options without a single-letter equivalent get a negative value. This can be
401 used to identify them. */
402 
403 #define N_COLOUR       (-1)
404 #define N_EXCLUDE      (-2)
405 #define N_EXCLUDE_DIR  (-3)
406 #define N_HELP         (-4)
407 #define N_INCLUDE      (-5)
408 #define N_INCLUDE_DIR  (-6)
409 #define N_LABEL        (-7)
410 #define N_LOCALE       (-8)
411 #define N_NULL         (-9)
412 #define N_LOFFSETS     (-10)
413 #define N_FOFFSETS     (-11)
414 #define N_LBUFFER      (-12)
415 #define N_H_LIMIT      (-13)
416 #define N_M_LIMIT      (-14)
417 #define N_M_LIMIT_DEP  (-15)
418 #define N_BUFSIZE      (-16)
419 #define N_NOJIT        (-17)
420 #define N_FILE_LIST    (-18)
421 #define N_BINARY_FILES (-19)
422 #define N_EXCLUDE_FROM (-20)
423 #define N_INCLUDE_FROM (-21)
424 #define N_OM_SEPARATOR (-22)
425 #define N_MAX_BUFSIZE  (-23)
426 #define N_OM_CAPTURE   (-24)
427 #define N_ALLABSK      (-25)
428 #define N_POSIX_DIGIT  (-26)
429 #define N_GROUP_SEPARATOR (-27)
430 #define N_NO_GROUP_SEPARATOR (-28)
431 
432 static option_item optionlist[] = {
433   { OP_NODATA,     N_NULL,   NULL,              "",              "terminate options" },
434   { OP_NODATA,     N_HELP,   NULL,              "help",          "display this help and exit" },
435   { OP_NUMBER,     'A',      &after_context,    "after-context=number", "set number of following context lines" },
436   { OP_NODATA,     'a',      NULL,              "text",          "treat binary files as text" },
437   { OP_NUMBER,     'B',      &before_context,   "before-context=number", "set number of prior context lines" },
438   { OP_BINFILES,   N_BINARY_FILES, NULL,        "binary-files=word", "set treatment of binary files" },
439   { OP_SIZE,       N_BUFSIZE,&bufthird,         "buffer-size=number", "set processing buffer starting size" },
440   { OP_SIZE,       N_MAX_BUFSIZE,&max_bufthird, "max-buffer-size=number",  "set processing buffer maximum size" },
441   { OP_OP_STRING,  N_COLOUR, &colour_option,    "color=option",  "matched text color option" },
442   { OP_OP_STRING,  N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },
443   { OP_NUMBER,     'C',      &both_context,     "context=number", "set number of context lines, before & after" },
444   { OP_NODATA,     'c',      NULL,              "count",         "print only a count of matching lines per FILE" },
445   { OP_STRING,     'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },
446   { OP_STRING,     'd',      &dee_option,       "directories=action", "how to handle directories" },
447   { OP_NODATA, N_POSIX_DIGIT, NULL,             "posix-digit",   "\\d always matches [0-9], even in UTF/UCP mode" },
448   { OP_NODATA,     'E',      NULL,              "case-restrict", "restrict case matching (no mix ASCII/non-ASCII)" },
449   { OP_PATLIST,    'e',      &match_patdata,    "regex(p)=pattern", "specify pattern (may be used more than once)" },
450   { OP_NODATA,     'F',      NULL,              "fixed-strings", "patterns are sets of newline-separated strings" },
451   { OP_FILELIST,   'f',      &pattern_files_data, "file=path",   "read patterns from file" },
452   { OP_FILELIST,   N_FILE_LIST, &file_lists_data, "file-list=path","read files to search from file" },
453   { OP_NODATA,     N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },
454   { OP_STRING,     N_GROUP_SEPARATOR, &group_separator, "group-separator=text", "set separator between groups of lines" },
455   { OP_NODATA,     'H',      NULL,              "with-filename", "force the prefixing filename on output" },
456   { OP_NODATA,     'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },
457   { OP_NODATA,     'I',      NULL,              "",              "treat binary files as not matching (ignore)" },
458   { OP_NODATA,     'i',      NULL,              "ignore-case",   "ignore case distinctions" },
459   { OP_NODATA,     'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
460   { OP_NODATA,     'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
461   { OP_STRING,     N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
462   { OP_NODATA,     N_LBUFFER, NULL,             "line-buffered", "use line buffering" },
463   { OP_NODATA,     N_LOFFSETS, NULL,            "line-offsets",  "output line numbers and offsets, not text" },
464   { OP_STRING,     N_LOCALE, &locale,           "locale=locale", "use the named locale" },
465   { OP_SIZE,       N_H_LIMIT, &heap_limit,      "heap-limit=number",  "set PCRE2 heap limit option (kibibytes)" },
466   { OP_U32NUMBER,  N_M_LIMIT, &match_limit,     "match-limit=number", "set PCRE2 match limit option" },
467   { OP_U32NUMBER,  N_M_LIMIT_DEP, &depth_limit, "depth-limit=number", "set PCRE2 depth limit option" },
468   { OP_U32NUMBER,  N_M_LIMIT_DEP, &depth_limit, "recursion-limit=number", "obsolete synonym for depth-limit" },
469   { OP_NODATA,     'M',      NULL,              "multiline",     "run in multiline mode" },
470   { OP_NUMBER,     'm',      &count_limit,      "max-count=number", "stop after <number> matched lines" },
471   { OP_STRING,     'N',      &newline_arg,      "newline=type",  "set newline type (CR, LF, CRLF, ANYCRLF, ANY, or NUL)" },
472   { OP_NODATA,     'n',      NULL,              "line-number",   "print line number with output lines" },
473 #ifdef SUPPORT_PCRE2GREP_JIT
474   { OP_NODATA,     N_NOJIT,  NULL,              "no-jit",        "do not use just-in-time compiler optimization" },
475 #else
476   { OP_NODATA,     N_NOJIT,  NULL,              "no-jit",        "ignored: this pcre2grep does not support JIT" },
477 #endif
478   { OP_NODATA,     N_NO_GROUP_SEPARATOR, NULL,   "no-group-separator", "suppress separators between groups of lines" },
479   { OP_STRING,     'O',      &output_text,       "output=text",   "show only this text (possibly expanded)" },
480   { OP_OP_NUMBERS, 'o',      &only_matching_data, "only-matching=n", "show only the part of the line that matched" },
481   { OP_STRING,     N_OM_SEPARATOR, &om_separator, "om-separator=text", "set separator for multiple -o output" },
482   { OP_U32NUMBER,  N_OM_CAPTURE, &capture_max,  "om-capture=n",  "set capture count for --only-matching" },
483   { OP_NODATA,     'P',      NULL,              "no-ucp",        "do not enable UCP mode with Unicode" },
484   { OP_NODATA,     'q',      NULL,              "quiet",         "suppress output, just set return code" },
485   { OP_NODATA,     'r',      NULL,              "recursive",     "recursively scan sub-directories" },
486   { OP_PATLIST,    N_EXCLUDE,&exclude_patdata,  "exclude=pattern","exclude matching files when recursing" },
487   { OP_PATLIST,    N_INCLUDE,&include_patdata,  "include=pattern","include matching files when recursing" },
488   { OP_PATLIST,    N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude-dir=pattern","exclude matching directories when recursing" },
489   { OP_PATLIST,    N_INCLUDE_DIR,&include_dir_patdata, "include-dir=pattern","include matching directories when recursing" },
490   { OP_FILELIST,   N_EXCLUDE_FROM,&exclude_from_data, "exclude-from=path", "read exclude list from file" },
491   { OP_FILELIST,   N_INCLUDE_FROM,&include_from_data, "include-from=path", "read include list from file" },
492   { OP_NODATA,    's',      NULL,              "no-messages",   "suppress error messages" },
493   { OP_NODATA,    't',      NULL,              "total-count",   "print total count of matching lines" },
494   { OP_NODATA,    'u',      NULL,              "utf",           "use UTF/Unicode" },
495   { OP_NODATA,    'U',      NULL,              "utf-allow-invalid", "use UTF/Unicode, allow for invalid code units" },
496   { OP_NODATA,    'V',      NULL,              "version",       "print version information and exit" },
497   { OP_NODATA,    'v',      NULL,              "invert-match",  "select non-matching lines" },
498   { OP_NODATA,    'w',      NULL,              "word-regex(p)", "force patterns to match only as words"  },
499   { OP_NODATA,    'x',      NULL,              "line-regex(p)", "force patterns to match only whole lines" },
500   { OP_NODATA,   N_ALLABSK, NULL,              "allow-lookaround-bsk", "allow \\K in lookarounds" },
501   { OP_NODATA,    'Z',      NULL,              "null",          "output 0 byte after file names"  },
502   { OP_NODATA,    0,        NULL,               NULL,            NULL }
503 };
504 
505 /* Table of names for newline types. Must be kept in step with the definitions
506 of PCRE2_NEWLINE_xx in pcre2.h. */
507 
508 static const char *newlines[] = {
509   "DEFAULT", "CR", "LF", "CRLF", "ANY", "ANYCRLF", "NUL" };
510 
511 /* UTF-8 tables  */
512 
513 const int utf8_table1[] =
514   { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff};
515 const int utf8_table1_size = sizeof(utf8_table1) / sizeof(int);
516 
517 const int utf8_table2[] = { 0,    0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
518 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
519 
520 const char utf8_table4[] = {
521   1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
522   1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
523   2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
524   3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
525 
526 
527 #if !defined(VPCOMPAT) && !defined(HAVE_MEMMOVE)
528 /*************************************************
529 *    Emulated memmove() for systems without it   *
530 *************************************************/
531 
532 /* This function can make use of bcopy() if it is available. Otherwise do it by
533 steam, as there are some non-Unix environments that lack both memmove() and
534 bcopy(). */
535 
536 static void *
emulated_memmove(void * d,const void * s,size_t n)537 emulated_memmove(void *d, const void *s, size_t n)
538 {
539 #ifdef HAVE_BCOPY
540 bcopy(s, d, n);
541 return d;
542 #else
543 size_t i;
544 unsigned char *dest = (unsigned char *)d;
545 const unsigned char *src = (const unsigned char *)s;
546 if (dest > src)
547   {
548   dest += n;
549   src += n;
550   for (i = 0; i < n; ++i) *(--dest) = *(--src);
551   return (void *)dest;
552   }
553 else
554   {
555   for (i = 0; i < n; ++i) *dest++ = *src++;
556   return (void *)(dest - n);
557   }
558 #endif   /* not HAVE_BCOPY */
559 }
560 #undef memmove
561 #define memmove(d,s,n) emulated_memmove(d,s,n)
562 #endif   /* not VPCOMPAT && not HAVE_MEMMOVE */
563 
564 
565 
566 /*************************************************
567 *           Convert code point to UTF-8          *
568 *************************************************/
569 
570 /* A static buffer is used. Returns the number of bytes. */
571 
572 static int
ord2utf8(uint32_t value)573 ord2utf8(uint32_t value)
574 {
575 int i, j;
576 uint8_t *utf8bytes = utf8_buffer;
577 for (i = 0; i < utf8_table1_size; i++)
578   if (value <= (uint32_t)utf8_table1[i]) break;
579 utf8bytes += i;
580 for (j = i; j > 0; j--)
581   {
582   *utf8bytes-- = 0x80 | (value & 0x3f);
583   value >>= 6;
584   }
585 *utf8bytes = utf8_table2[i] | value;
586 return i + 1;
587 }
588 
589 
590 
591 /*************************************************
592 *         Case-independent string compare        *
593 *************************************************/
594 
595 static int
strcmpic(const char * str1,const char * str2)596 strcmpic(const char *str1, const char *str2)
597 {
598 unsigned int c1, c2;
599 while (*str1 != '\0' || *str2 != '\0')
600   {
601   c1 = tolower(*str1++);
602   c2 = tolower(*str2++);
603   if (c1 != c2) return ((c1 > c2) << 1) - 1;
604   }
605 return 0;
606 }
607 
608 
609 /*************************************************
610 *         Parse GREP_COLORS                      *
611 *************************************************/
612 
613 /* Extract ms or mt from GREP_COLORS.
614 
615 Argument:  the string, possibly NULL
616 Returns:   the value of ms or mt, or NULL if neither present
617 */
618 
619 static char *
parse_grep_colors(const char * gc)620 parse_grep_colors(const char *gc)
621 {
622 static char seq[16];
623 char *col;
624 uint32_t len;
625 if (gc == NULL) return NULL;
626 col = strstr(gc, "ms=");
627 if (col == NULL) col = strstr(gc, "mt=");
628 if (col == NULL) return NULL;
629 len = 0;
630 col += 3;
631 while (*col != ':' && *col != 0 && len < sizeof(seq)-1)
632   seq[len++] = *col++;
633 seq[len] = 0;
634 return seq;
635 }
636 
637 
638 /*************************************************
639 *         Exit from the program                  *
640 *************************************************/
641 
642 /* If there has been a resource error, give a suitable message.
643 
644 Argument:  the return code
645 Returns:   does not return
646 */
647 
648 static void
pcre2grep_exit(int rc)649 pcre2grep_exit(int rc)
650 {
651 /* VMS does exit codes differently: both exit(1) and exit(0) return with a
652 status of 1, which is not helpful. To help with this problem, define a symbol
653 (akin to an environment variable) called "PCRE2GREP_RC" and put the exit code
654 therein. */
655 
656 #ifdef __VMS
657   char val_buf[4];
658   $DESCRIPTOR(sym_nam, "PCRE2GREP_RC");
659   $DESCRIPTOR(sym_val, val_buf);
660   sprintf(val_buf, "%d", rc);
661   sym_val.dsc$w_length = strlen(val_buf);
662   lib$set_symbol(&sym_nam, &sym_val);
663 #endif
664 
665 if (resource_error)
666   {
667   fprintf(stderr, "pcre2grep: Error %d, %d, %d or %d means that a resource "
668     "limit was exceeded.\n", PCRE2_ERROR_JIT_STACKLIMIT, PCRE2_ERROR_MATCHLIMIT,
669     PCRE2_ERROR_DEPTHLIMIT, PCRE2_ERROR_HEAPLIMIT);
670   fprintf(stderr, "pcre2grep: Check your regex for nested unlimited loops.\n");
671   }
672 exit(rc);
673 }
674 
675 
676 /*************************************************
677 *          Add item to chain of patterns         *
678 *************************************************/
679 
680 /* Used to add an item onto a chain, or just return an unconnected item if the
681 "after" argument is NULL.
682 
683 Arguments:
684   s          pattern string to add
685   patlen     length of pattern
686   after      if not NULL points to item to insert after
687 
688 Returns:     new pattern block or NULL on error
689 */
690 
691 static patstr *
add_pattern(char * s,PCRE2_SIZE patlen,patstr * after)692 add_pattern(char *s, PCRE2_SIZE patlen, patstr *after)
693 {
694 patstr *p = (patstr *)malloc(sizeof(patstr));
695 
696 /* LCOV_EXCL_START - These won't be hit in normal testing. */
697 
698 if (p == NULL)
699   {
700   fprintf(stderr, "pcre2grep: malloc failed\n");
701   pcre2grep_exit(2);
702   }
703 if (patlen > MAXPATLEN)
704   {
705   fprintf(stderr, "pcre2grep: pattern is too long (limit is %d bytes)\n",
706     MAXPATLEN);
707   free(p);
708   return NULL;
709   }
710 
711 /* LCOV_EXCL_STOP */
712 
713 p->next = NULL;
714 p->string = s;
715 p->length = patlen;
716 p->compiled = NULL;
717 
718 if (after != NULL)
719   {
720   p->next = after->next;
721   after->next = p;
722   }
723 return p;
724 }
725 
726 
727 /*************************************************
728 *           Free chain of patterns               *
729 *************************************************/
730 
731 /* Used for several chains of patterns.
732 
733 Argument: pointer to start of chain
734 Returns:  nothing
735 */
736 
737 static void
free_pattern_chain(patstr * pc)738 free_pattern_chain(patstr *pc)
739 {
740 while (pc != NULL)
741   {
742   patstr *p = pc;
743   pc = p->next;
744   if (p->compiled != NULL) pcre2_code_free(p->compiled);
745   free(p);
746   }
747 }
748 
749 
750 /*************************************************
751 *           Free chain of file names             *
752 *************************************************/
753 
754 /*
755 Argument: pointer to start of chain
756 Returns:  nothing
757 */
758 
759 static void
free_file_chain(fnstr * fn)760 free_file_chain(fnstr *fn)
761 {
762 while (fn != NULL)
763   {
764   fnstr *f = fn;
765   fn = f->next;
766   free(f);
767   }
768 }
769 
770 
771 /*************************************************
772 *            OS-specific functions               *
773 *************************************************/
774 
775 /* These definitions are needed in all Windows environments, even those where
776 Unix-style directory scanning can be used (see below). */
777 
778 #ifdef WIN32
779 
780 #ifndef STRICT
781 # define STRICT
782 #endif
783 #ifndef WIN32_LEAN_AND_MEAN
784 # define WIN32_LEAN_AND_MEAN
785 #endif
786 
787 #include <windows.h>
788 
789 #define iswild(name) (strpbrk(name, "*?") != NULL)
790 
791 /* Convert ANSI BGR format to RGB used by Windows */
792 #define BGR_RGB(x) (((x) & 1 ? 4 : 0) | ((x) & 2) | ((x) & 4 ? 1 : 0))
793 
794 static HANDLE hstdout;
795 static CONSOLE_SCREEN_BUFFER_INFO csbi;
796 static WORD match_colour;
797 
798 static WORD
decode_ANSI_colour(const char * cs)799 decode_ANSI_colour(const char *cs)
800 {
801 WORD result = csbi.wAttributes;
802 while (*cs)
803   {
804   if (isdigit((unsigned char)(*cs)))
805     {
806     int code = atoi(cs);
807     if (code == 1) result |= 0x08;
808     else if (code == 4) result |= 0x8000;
809     else if (code == 5) result |= 0x80;
810     else if (code >= 30 && code <= 37) result = (result & 0xF8) | BGR_RGB(code - 30);
811     else if (code == 39) result = (result & 0xF0) | (csbi.wAttributes & 0x0F);
812     else if (code >= 40 && code <= 47) result = (result & 0x8F) | (BGR_RGB(code - 40) << 4);
813     else if (code == 49) result = (result & 0x0F) | (csbi.wAttributes & 0xF0);
814     /* aixterm high intensity colour codes */
815     else if (code >= 90 && code <= 97) result = (result & 0xF0) | BGR_RGB(code - 90) | 0x08;
816     else if (code >= 100 && code <= 107) result = (result & 0x0F) | (BGR_RGB(code - 100) << 4) | 0x80;
817 
818     while (isdigit((unsigned char)(*cs))) cs++;
819     }
820   if (*cs) cs++;
821   }
822 return result;
823 }
824 
825 
826 static void
init_colour_output()827 init_colour_output()
828 {
829 if (do_colour)
830   {
831   hstdout = GetStdHandle(STD_OUTPUT_HANDLE);
832   /* This fails when redirected to con; try again if so. */
833   if (!GetConsoleScreenBufferInfo(hstdout, &csbi) && !do_ansi)
834     {
835     HANDLE hcon = CreateFile("CONOUT$", GENERIC_READ | GENERIC_WRITE,
836       FILE_SHARE_WRITE, NULL, OPEN_EXISTING, 0, NULL);
837     GetConsoleScreenBufferInfo(hcon, &csbi);
838     CloseHandle(hcon);
839     }
840   match_colour = decode_ANSI_colour(colour_string);
841   /* No valid colour found - turn off colouring */
842   if (!match_colour) do_colour = FALSE;
843   }
844 }
845 
846 #endif  /* WIN32 */
847 
848 
849 /* The following sets of functions are defined so that they can be made system
850 specific. At present there are versions for Unix-style environments, Windows,
851 native z/OS, and "no support". */
852 
853 
854 /************* Directory scanning Unix-style and z/OS ***********/
855 
856 #if (defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H) || defined NATIVE_ZOS
857 #include <sys/types.h>
858 #include <sys/stat.h>
859 #include <dirent.h>
860 
861 #if defined NATIVE_ZOS
862 /************* Directory and PDS/E scanning for z/OS ***********/
863 /************* z/OS looks mostly like Unix with USS ************/
864 /* However, z/OS needs the #include statements in this header */
865 #include "pcrzosfs.h"
866 /* That header is not included in the main PCRE distribution because
867    other apparatus is needed to compile pcre2grep for z/OS. The header
868    can be found in the special z/OS distribution, which is available
869    from www.zaconsultants.net or from www.cbttape.org. */
870 #endif
871 
872 typedef DIR directory_type;
873 #define FILESEP '/'
874 
875 static int
isdirectory(char * filename)876 isdirectory(char *filename)
877 {
878 struct stat statbuf;
879 if (stat(filename, &statbuf) < 0)
880   return 0;        /* In the expectation that opening as a file will fail */
881 return S_ISDIR(statbuf.st_mode);
882 }
883 
884 static directory_type *
opendirectory(char * filename)885 opendirectory(char *filename)
886 {
887 return opendir(filename);
888 }
889 
890 static char *
readdirectory(directory_type * dir)891 readdirectory(directory_type *dir)
892 {
893 for (;;)
894   {
895   struct dirent *dent = readdir(dir);
896   if (dent == NULL) return NULL;
897   if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
898     return dent->d_name;
899   }
900 /* Control never reaches here */
901 }
902 
903 static void
closedirectory(directory_type * dir)904 closedirectory(directory_type *dir)
905 {
906 closedir(dir);
907 }
908 
909 
910 /************* Test for regular file, Unix-style **********/
911 
912 static int
isregfile(char * filename)913 isregfile(char *filename)
914 {
915 struct stat statbuf;
916 if (stat(filename, &statbuf) < 0)
917   return 1;        /* In the expectation that opening as a file will fail */
918 return S_ISREG(statbuf.st_mode);
919 }
920 
921 
922 #if defined NATIVE_ZOS
923 /************* Test for a terminal in z/OS **********/
924 /* isatty() does not work in a TSO environment, so always give FALSE.*/
925 
926 static BOOL
is_stdout_tty(void)927 is_stdout_tty(void)
928 {
929 return FALSE;
930 }
931 
932 static BOOL
is_file_tty(FILE * f)933 is_file_tty(FILE *f)
934 {
935 return FALSE;
936 }
937 
938 
939 /************* Test for a terminal, Unix-style **********/
940 
941 #else
942 static BOOL
is_stdout_tty(void)943 is_stdout_tty(void)
944 {
945 return isatty(fileno(stdout));
946 }
947 
948 static BOOL
is_file_tty(FILE * f)949 is_file_tty(FILE *f)
950 {
951 return isatty(fileno(f));
952 }
953 #endif
954 
955 
956 /************* Print optionally coloured match Unix-style and z/OS **********/
957 
958 static void
print_match(const void * buf,int length)959 print_match(const void *buf, int length)
960 {
961 if (length == 0) return;
962 if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
963 FWRITE_IGNORE(buf, 1, length, stdout);
964 if (do_colour) fprintf(stdout, "%c[0m", 0x1b);
965 }
966 
967 /* End of Unix-style or native z/OS environment functions. */
968 
969 
970 /************* Directory scanning in Windows ***********/
971 
972 /* I (Philip Hazel) have no means of testing this code. It was contributed by
973 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
974 when it did not exist. David Byron added a patch that moved the #include of
975 <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
976 */
977 
978 #elif defined WIN32
979 
980 #ifndef INVALID_FILE_ATTRIBUTES
981 #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
982 #endif
983 
984 typedef struct directory_type
985 {
986 HANDLE handle;
987 BOOL first;
988 WIN32_FIND_DATA data;
989 } directory_type;
990 
991 #define FILESEP '/'
992 
993 int
isdirectory(char * filename)994 isdirectory(char *filename)
995 {
996 DWORD attr = GetFileAttributes(filename);
997 if (attr == INVALID_FILE_ATTRIBUTES)
998   return 0;
999 return (attr & FILE_ATTRIBUTE_DIRECTORY) != 0;
1000 }
1001 
1002 directory_type *
opendirectory(char * filename)1003 opendirectory(char *filename)
1004 {
1005 size_t len;
1006 char *pattern;
1007 directory_type *dir;
1008 DWORD err;
1009 len = strlen(filename);
1010 pattern = (char *)malloc(len + 3);
1011 dir = (directory_type *)malloc(sizeof(*dir));
1012 if ((pattern == NULL) || (dir == NULL))
1013   {
1014   fprintf(stderr, "pcre2grep: malloc failed\n");
1015   pcre2grep_exit(2);
1016   }
1017 memcpy(pattern, filename, len);
1018 if (iswild(filename))
1019   pattern[len] = 0;
1020 else
1021   memcpy(&(pattern[len]), "\\*", 3);
1022 dir->handle = FindFirstFile(pattern, &(dir->data));
1023 if (dir->handle != INVALID_HANDLE_VALUE)
1024   {
1025   free(pattern);
1026   dir->first = TRUE;
1027   return dir;
1028   }
1029 err = GetLastError();
1030 free(pattern);
1031 free(dir);
1032 errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
1033 return NULL;
1034 }
1035 
1036 char *
readdirectory(directory_type * dir)1037 readdirectory(directory_type *dir)
1038 {
1039 for (;;)
1040   {
1041   if (!dir->first)
1042     {
1043     if (!FindNextFile(dir->handle, &(dir->data)))
1044       return NULL;
1045     }
1046   else
1047     {
1048     dir->first = FALSE;
1049     }
1050   if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
1051     return dir->data.cFileName;
1052   }
1053 #ifndef _MSC_VER
1054 return NULL;   /* Keep compiler happy; never executed */
1055 #endif
1056 }
1057 
1058 void
closedirectory(directory_type * dir)1059 closedirectory(directory_type *dir)
1060 {
1061 FindClose(dir->handle);
1062 free(dir);
1063 }
1064 
1065 
1066 /************* Test for regular file in Windows **********/
1067 
1068 /* I don't know how to do this, or if it can be done; assume all paths are
1069 regular if they are not directories. */
1070 
isregfile(char * filename)1071 int isregfile(char *filename)
1072 {
1073 return !isdirectory(filename);
1074 }
1075 
1076 
1077 /************* Test for a terminal in Windows **********/
1078 
1079 static BOOL
is_stdout_tty(void)1080 is_stdout_tty(void)
1081 {
1082 return _isatty(_fileno(stdout));
1083 }
1084 
1085 static BOOL
is_file_tty(FILE * f)1086 is_file_tty(FILE *f)
1087 {
1088 return _isatty(_fileno(f));
1089 }
1090 
1091 
1092 /************* Print optionally coloured match in Windows **********/
1093 
1094 static void
print_match(const void * buf,int length)1095 print_match(const void *buf, int length)
1096 {
1097 if (length == 0) return;
1098 if (do_colour)
1099   {
1100   if (do_ansi) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1101     else SetConsoleTextAttribute(hstdout, match_colour);
1102   }
1103 FWRITE_IGNORE(buf, 1, length, stdout);
1104 if (do_colour)
1105   {
1106   if (do_ansi) fprintf(stdout, "%c[0m", 0x1b);
1107     else SetConsoleTextAttribute(hstdout, csbi.wAttributes);
1108   }
1109 }
1110 
1111 /* End of Windows functions */
1112 
1113 
1114 /************* Directory scanning when we can't do it ***********/
1115 
1116 /* The type is void, and apart from isdirectory(), the functions do nothing. */
1117 
1118 #else
1119 
1120 #define FILESEP 0
1121 typedef void directory_type;
1122 
isdirectory(char * filename)1123 int isdirectory(char *filename) { return 0; }
opendirectory(char * filename)1124 directory_type * opendirectory(char *filename) { return (directory_type*)0;}
readdirectory(directory_type * dir)1125 char *readdirectory(directory_type *dir) { return (char*)0;}
closedirectory(directory_type * dir)1126 void closedirectory(directory_type *dir) {}
1127 
1128 
1129 /************* Test for regular file when we can't do it **********/
1130 
1131 /* Assume all files are regular. */
1132 
isregfile(char * filename)1133 int isregfile(char *filename) { return 1; }
1134 
1135 
1136 /************* Test for a terminal when we can't do it **********/
1137 
1138 static BOOL
is_stdout_tty(void)1139 is_stdout_tty(void)
1140 {
1141 return FALSE;
1142 }
1143 
1144 static BOOL
is_file_tty(FILE * f)1145 is_file_tty(FILE *f)
1146 {
1147 return FALSE;
1148 }
1149 
1150 
1151 /************* Print optionally coloured match when we can't do it **********/
1152 
1153 static void
print_match(const void * buf,int length)1154 print_match(const void *buf, int length)
1155 {
1156 if (length == 0) return;
1157 FWRITE_IGNORE(buf, 1, length, stdout);
1158 }
1159 
1160 #endif  /* End of system-specific functions */
1161 
1162 
1163 
1164 #ifndef HAVE_STRERROR
1165 /*************************************************
1166 *     Provide strerror() for non-ANSI libraries  *
1167 *************************************************/
1168 
1169 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1170 in their libraries, but can provide the same facility by this simple
1171 alternative function. */
1172 
1173 extern int   sys_nerr;
1174 extern char *sys_errlist[];
1175 
1176 char *
strerror(int n)1177 strerror(int n)
1178 {
1179 if (n < 0 || n >= sys_nerr) return "unknown error number";
1180 return sys_errlist[n];
1181 }
1182 #endif /* HAVE_STRERROR */
1183 
1184 
1185 
1186 /*************************************************
1187 *                Usage function                  *
1188 *************************************************/
1189 
1190 static int
usage(int rc)1191 usage(int rc)
1192 {
1193 option_item *op;
1194 fprintf(stderr, "Usage: pcre2grep [-");
1195 for (op = optionlist; op->one_char != 0; op++)
1196   {
1197   if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1198   }
1199 fprintf(stderr, "] [long options] [pattern] [files]\n");
1200 fprintf(stderr, "Type \"pcre2grep --help\" for more information and the long "
1201   "options.\n");
1202 return rc;
1203 }
1204 
1205 
1206 
1207 /*************************************************
1208 *                Help function                   *
1209 *************************************************/
1210 
1211 static void
help(void)1212 help(void)
1213 {
1214 option_item *op;
1215 
1216 printf("Usage: pcre2grep [OPTION]... [PATTERN] [FILE1 FILE2 ...]" STDOUT_NL);
1217 printf("Search for PATTERN in each FILE or standard input." STDOUT_NL);
1218 printf("PATTERN must be present if neither -e nor -f is used." STDOUT_NL);
1219 
1220 #ifdef SUPPORT_PCRE2GREP_CALLOUT
1221 #ifdef SUPPORT_PCRE2GREP_CALLOUT_FORK
1222 printf("All callout scripts in patterns are supported." STDOUT_NL);
1223 #else
1224 printf("Non-fork callout scripts in patterns are supported." STDOUT_NL);
1225 #endif
1226 #else
1227 printf("Callout scripts are not supported in this pcre2grep." STDOUT_NL);
1228 #endif
1229 
1230 printf("\"-\" can be used as a file name to mean STDIN." STDOUT_NL);
1231 
1232 #ifdef SUPPORT_LIBZ
1233 printf("Files whose names end in .gz are read using zlib." STDOUT_NL);
1234 #endif
1235 
1236 #ifdef SUPPORT_LIBBZ2
1237 printf("Files whose names end in .bz2 are read using bzlib2." STDOUT_NL);
1238 #endif
1239 
1240 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1241 printf("Other files and the standard input are read as plain files." STDOUT_NL STDOUT_NL);
1242 #else
1243 printf("All files are read as plain files, without any interpretation." STDOUT_NL STDOUT_NL);
1244 #endif
1245 
1246 printf("Example: pcre2grep -i " QUOT "hello.*world" QUOT " menu.h main.c" STDOUT_NL STDOUT_NL);
1247 printf("Options:" STDOUT_NL);
1248 
1249 for (op = optionlist; op->one_char != 0; op++)
1250   {
1251   int n;
1252   char s[4];
1253 
1254   if (op->one_char > 0 && (op->long_name)[0] == 0)
1255     n = 31 - printf("  -%c", op->one_char);
1256   else
1257     {
1258     if (op->one_char > 0) sprintf(s, "-%c,", op->one_char);
1259       else strcpy(s, "   ");
1260     n = 31 - printf("  %s --%s", s, op->long_name);
1261     }
1262 
1263   if (n < 1) n = 1;
1264   printf("%.*s%s" STDOUT_NL, n, "                           ", op->help_text);
1265   }
1266 
1267 printf(STDOUT_NL "Numbers may be followed by K or M, e.g. --max-buffer-size=100K." STDOUT_NL);
1268 printf("The default value for --buffer-size is %d." STDOUT_NL, PCRE2GREP_BUFSIZE);
1269 printf("The default value for --max-buffer-size is %d." STDOUT_NL, PCRE2GREP_MAX_BUFSIZE);
1270 printf("When reading patterns or file names from a file, trailing white" STDOUT_NL);
1271 printf("space is removed and blank lines are ignored." STDOUT_NL);
1272 printf("The maximum size of any pattern is %d bytes." STDOUT_NL, MAXPATLEN);
1273 
1274 printf(STDOUT_NL "With no FILEs, read standard input. If fewer than two FILEs given, assume -h." STDOUT_NL);
1275 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble." STDOUT_NL);
1276 }
1277 
1278 
1279 
1280 /*************************************************
1281 *            Test exclude/includes               *
1282 *************************************************/
1283 
1284 /* If any exclude pattern matches, the path is excluded. Otherwise, unless
1285 there are no includes, the path must match an include pattern.
1286 
1287 Arguments:
1288   path      the path to be matched
1289   ip        the chain of include patterns
1290   ep        the chain of exclude patterns
1291 
1292 Returns:    TRUE if the path is not excluded
1293 */
1294 
1295 static BOOL
test_incexc(char * path,patstr * ip,patstr * ep)1296 test_incexc(char *path, patstr *ip, patstr *ep)
1297 {
1298 int plen = strlen((const char *)path);
1299 
1300 for (; ep != NULL; ep = ep->next)
1301   {
1302   if (pcre2_match(ep->compiled, (PCRE2_SPTR)path, plen, 0, 0, match_data, NULL) >= 0)
1303     return FALSE;
1304   }
1305 
1306 if (ip == NULL) return TRUE;
1307 
1308 for (; ip != NULL; ip = ip->next)
1309   {
1310   if (pcre2_match(ip->compiled, (PCRE2_SPTR)path, plen, 0, 0, match_data, NULL) >= 0)
1311     return TRUE;
1312   }
1313 
1314 return FALSE;
1315 }
1316 
1317 
1318 
1319 /*************************************************
1320 *         Decode integer argument value          *
1321 *************************************************/
1322 
1323 /* Integer arguments can be followed by K or M. Avoid the use of strtoul()
1324 because SunOS4 doesn't have it. This is used only for unpicking arguments, so
1325 just keep it simple.
1326 
1327 Arguments:
1328   option_data   the option data string
1329   op            the option item (for error messages)
1330   longop        TRUE if option given in long form
1331 
1332 Returns:        a long integer
1333 */
1334 
1335 static long int
decode_number(char * option_data,option_item * op,BOOL longop)1336 decode_number(char *option_data, option_item *op, BOOL longop)
1337 {
1338 unsigned long int n = 0;
1339 char *endptr = option_data;
1340 while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
1341 while (isdigit((unsigned char)(*endptr)))
1342   n = n * 10 + (int)(*endptr++ - '0');
1343 if (toupper(*endptr) == 'K')
1344   {
1345   n *= 1024;
1346   endptr++;
1347   }
1348 else if (toupper(*endptr) == 'M')
1349   {
1350   n *= 1024*1024;
1351   endptr++;
1352   }
1353 
1354 if (*endptr != 0)   /* Error */
1355   {
1356   if (longop)
1357     {
1358     char *equals = strchr(op->long_name, '=');
1359     int nlen = (equals == NULL)? (int)strlen(op->long_name) :
1360       (int)(equals - op->long_name);
1361     fprintf(stderr, "pcre2grep: Malformed number \"%s\" after --%.*s\n",
1362       option_data, nlen, op->long_name);
1363     }
1364   else
1365     fprintf(stderr, "pcre2grep: Malformed number \"%s\" after -%c\n",
1366       option_data, op->one_char);
1367   pcre2grep_exit(usage(2));
1368   }
1369 
1370 return n;
1371 }
1372 
1373 
1374 
1375 /*************************************************
1376 *       Add item to a chain of numbers           *
1377 *************************************************/
1378 
1379 /* Used to add an item onto a chain, or just return an unconnected item if the
1380 "after" argument is NULL.
1381 
1382 Arguments:
1383   n          the number to add
1384   after      if not NULL points to item to insert after
1385 
1386 Returns:     new number block
1387 */
1388 
1389 static omstr *
add_number(int n,omstr * after)1390 add_number(int n, omstr *after)
1391 {
1392 omstr *om = (omstr *)malloc(sizeof(omstr));
1393 
1394 /* LCOV_EXCL_START - These lines won't be hit in normal testing. */
1395 
1396 if (om == NULL)
1397   {
1398   fprintf(stderr, "pcre2grep: malloc failed\n");
1399   pcre2grep_exit(2);
1400   }
1401 
1402 /* LCOV_EXCL_STOP */
1403 
1404 om->next = NULL;
1405 om->groupnum = n;
1406 
1407 if (after != NULL)
1408   {
1409   om->next = after->next;
1410   after->next = om;
1411   }
1412 return om;
1413 }
1414 
1415 
1416 
1417 /*************************************************
1418 *            Read one line of input              *
1419 *************************************************/
1420 
1421 /* Normally, input that is to be scanned is read using fread() (or gzread, or
1422 BZ2_read) into a large buffer, so many lines may be read at once. However,
1423 doing this for tty input means that no output appears until a lot of input has
1424 been typed. Instead, tty input is handled line by line. We cannot use fgets()
1425 for this, because it does not stop at a binary zero, and therefore there is no
1426 way of telling how many characters it has read, because there may be binary
1427 zeros embedded in the data. This function is also used for reading patterns
1428 from files (the -f option).
1429 
1430 Arguments:
1431   buffer     the buffer to read into
1432   length     the maximum number of characters to read
1433   f          the file
1434 
1435 Returns:     the number of characters read, zero at end of file
1436 */
1437 
1438 static PCRE2_SIZE
read_one_line(char * buffer,PCRE2_SIZE length,FILE * f)1439 read_one_line(char *buffer, PCRE2_SIZE length, FILE *f)
1440 {
1441 int c;
1442 PCRE2_SIZE yield = 0;
1443 while ((c = fgetc(f)) != EOF)
1444   {
1445   buffer[yield++] = c;
1446   if (c == '\n' || yield >= length) break;
1447   }
1448 return yield;
1449 }
1450 
1451 
1452 
1453 /*************************************************
1454 *             Find end of line                   *
1455 *************************************************/
1456 
1457 /* The length of the endline sequence that is found is set via lenptr. This may
1458 be zero at the very end of the file if there is no line-ending sequence there.
1459 
1460 Arguments:
1461   p         current position in line
1462   endptr    end of available data
1463   lenptr    where to put the length of the eol sequence
1464 
1465 Returns:    pointer after the last byte of the line,
1466             including the newline byte(s)
1467 */
1468 
1469 static char *
end_of_line(char * p,char * endptr,int * lenptr)1470 end_of_line(char *p, char *endptr, int *lenptr)
1471 {
1472 switch(endlinetype)
1473   {
1474   default:      /* Just in case */
1475   case PCRE2_NEWLINE_LF:
1476   while (p < endptr && *p != '\n') p++;
1477   if (p < endptr)
1478     {
1479     *lenptr = 1;
1480     return p + 1;
1481     }
1482   *lenptr = 0;
1483   return endptr;
1484 
1485   case PCRE2_NEWLINE_CR:
1486   while (p < endptr && *p != '\r') p++;
1487   if (p < endptr)
1488     {
1489     *lenptr = 1;
1490     return p + 1;
1491     }
1492   *lenptr = 0;
1493   return endptr;
1494 
1495   case PCRE2_NEWLINE_NUL:
1496   while (p < endptr && *p != '\0') p++;
1497   if (p < endptr)
1498     {
1499     *lenptr = 1;
1500     return p + 1;
1501     }
1502   *lenptr = 0;
1503   return endptr;
1504 
1505   case PCRE2_NEWLINE_CRLF:
1506   for (;;)
1507     {
1508     while (p < endptr && *p != '\r') p++;
1509     if (++p >= endptr)
1510       {
1511       *lenptr = 0;
1512       return endptr;
1513       }
1514     if (*p == '\n')
1515       {
1516       *lenptr = 2;
1517       return p + 1;
1518       }
1519     }
1520   break;
1521 
1522   case PCRE2_NEWLINE_ANYCRLF:
1523   while (p < endptr)
1524     {
1525     int extra = 0;
1526     int c = *((unsigned char *)p);
1527 
1528     if (utf && c >= 0xc0)
1529       {
1530       int gcii, gcss;
1531       extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
1532       gcss = 6*extra;
1533       c = (c & utf8_table3[extra]) << gcss;
1534       for (gcii = 1; gcii <= extra; gcii++)
1535         {
1536         gcss -= 6;
1537         c |= (p[gcii] & 0x3f) << gcss;
1538         }
1539       }
1540 
1541     p += 1 + extra;
1542 
1543     switch (c)
1544       {
1545       case '\n':
1546       *lenptr = 1;
1547       return p;
1548 
1549       case '\r':
1550       if (p < endptr && *p == '\n')
1551         {
1552         *lenptr = 2;
1553         p++;
1554         }
1555       else *lenptr = 1;
1556       return p;
1557 
1558       default:
1559       break;
1560       }
1561     }   /* End of loop for ANYCRLF case */
1562 
1563   *lenptr = 0;  /* Must have hit the end */
1564   return endptr;
1565 
1566   case PCRE2_NEWLINE_ANY:
1567   while (p < endptr)
1568     {
1569     int extra = 0;
1570     int c = *((unsigned char *)p);
1571 
1572     if (utf && c >= 0xc0)
1573       {
1574       int gcii, gcss;
1575       extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
1576       gcss = 6*extra;
1577       c = (c & utf8_table3[extra]) << gcss;
1578       for (gcii = 1; gcii <= extra; gcii++)
1579         {
1580         gcss -= 6;
1581         c |= (p[gcii] & 0x3f) << gcss;
1582         }
1583       }
1584 
1585     p += 1 + extra;
1586 
1587     switch (c)
1588       {
1589       case '\n':    /* LF */
1590       case '\v':    /* VT */
1591       case '\f':    /* FF */
1592       *lenptr = 1;
1593       return p;
1594 
1595       case '\r':    /* CR */
1596       if (p < endptr && *p == '\n')
1597         {
1598         *lenptr = 2;
1599         p++;
1600         }
1601       else *lenptr = 1;
1602       return p;
1603 
1604 #ifndef EBCDIC
1605       case 0x85:    /* Unicode NEL */
1606       *lenptr = utf? 2 : 1;
1607       return p;
1608 
1609       case 0x2028:  /* Unicode LS */
1610       case 0x2029:  /* Unicode PS */
1611       *lenptr = 3;
1612       return p;
1613 #endif  /* Not EBCDIC */
1614 
1615       default:
1616       break;
1617       }
1618     }   /* End of loop for ANY case */
1619 
1620   *lenptr = 0;  /* Must have hit the end */
1621   return endptr;
1622   }     /* End of overall switch */
1623 }
1624 
1625 
1626 
1627 /*************************************************
1628 *         Find start of previous line            *
1629 *************************************************/
1630 
1631 /* This is called when looking back for before lines to print.
1632 
1633 Arguments:
1634   p         start of the subsequent line
1635   startptr  start of available data
1636 
1637 Returns:    pointer to the start of the previous line
1638 */
1639 
1640 static char *
previous_line(char * p,char * startptr)1641 previous_line(char *p, char *startptr)
1642 {
1643 switch(endlinetype)
1644   {
1645   default:      /* Just in case */
1646   case PCRE2_NEWLINE_LF:
1647   p--;
1648   while (p > startptr && p[-1] != '\n') p--;
1649   return p;
1650 
1651   case PCRE2_NEWLINE_CR:
1652   p--;
1653   while (p > startptr && p[-1] != '\n') p--;
1654   return p;
1655 
1656   case PCRE2_NEWLINE_NUL:
1657   p--;
1658   while (p > startptr && p[-1] != '\0') p--;
1659   return p;
1660 
1661   case PCRE2_NEWLINE_CRLF:
1662   for (;;)
1663     {
1664     p -= 2;
1665     while (p > startptr && p[-1] != '\n') p--;
1666     if (p <= startptr + 1 || p[-2] == '\r') return p;
1667     }
1668   /* Control can never get here */
1669 
1670   case PCRE2_NEWLINE_ANY:
1671   case PCRE2_NEWLINE_ANYCRLF:
1672   if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
1673   if (utf) while ((*p & 0xc0) == 0x80) p--;
1674 
1675   while (p > startptr)
1676     {
1677     unsigned int c;
1678     char *pp = p - 1;
1679 
1680     if (utf)
1681       {
1682       int extra = 0;
1683       while ((*pp & 0xc0) == 0x80) pp--;
1684       c = *((unsigned char *)pp);
1685       if (c >= 0xc0)
1686         {
1687         int gcii, gcss;
1688         extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
1689         gcss = 6*extra;
1690         c = (c & utf8_table3[extra]) << gcss;
1691         for (gcii = 1; gcii <= extra; gcii++)
1692           {
1693           gcss -= 6;
1694           c |= (pp[gcii] & 0x3f) << gcss;
1695           }
1696         }
1697       }
1698     else c = *((unsigned char *)pp);
1699 
1700     if (endlinetype == PCRE2_NEWLINE_ANYCRLF) switch (c)
1701       {
1702       case '\n':    /* LF */
1703       case '\r':    /* CR */
1704       return p;
1705 
1706       default:
1707       break;
1708       }
1709 
1710     else switch (c)
1711       {
1712       case '\n':    /* LF */
1713       case '\v':    /* VT */
1714       case '\f':    /* FF */
1715       case '\r':    /* CR */
1716 #ifndef EBCDIC
1717       case 0x85:    /* Unicode NEL */
1718       case 0x2028:  /* Unicode LS */
1719       case 0x2029:  /* Unicode PS */
1720 #endif  /* Not EBCDIC */
1721       return p;
1722 
1723       default:
1724       break;
1725       }
1726 
1727     p = pp;  /* Back one character */
1728     }        /* End of loop for ANY case */
1729 
1730   return startptr;  /* Hit start of data */
1731   }     /* End of overall switch */
1732 }
1733 
1734 
1735 
1736 /*************************************************
1737 *              Output newline at end             *
1738 *************************************************/
1739 
1740 /* This function is called if the final line of a file has been written to
1741 stdout, but it does not have a terminating newline.
1742 
1743 Arguments:  none
1744 Returns:    nothing
1745 */
1746 
1747 static void
write_final_newline(void)1748 write_final_newline(void)
1749 {
1750 switch(endlinetype)
1751   {
1752   default:      /* Just in case */
1753   case PCRE2_NEWLINE_LF:
1754   case PCRE2_NEWLINE_ANY:
1755   case PCRE2_NEWLINE_ANYCRLF:
1756   fprintf(stdout, "\n");
1757   break;
1758 
1759   case PCRE2_NEWLINE_CR:
1760   fprintf(stdout, "\r");
1761   break;
1762 
1763   case PCRE2_NEWLINE_CRLF:
1764   fprintf(stdout, "\r\n");
1765   break;
1766 
1767   case PCRE2_NEWLINE_NUL:
1768   fprintf(stdout, "%c", 0);
1769   break;
1770   }
1771 }
1772 
1773 
1774 /*************************************************
1775 *       Print the previous "after" lines         *
1776 *************************************************/
1777 
1778 /* This is called if we are about to lose said lines because of buffer filling,
1779 and at the end of the file. The data in the line is written using fwrite() so
1780 that a binary zero does not terminate it.
1781 
1782 Arguments:
1783   lastmatchnumber   the number of the last matching line, plus one
1784   lastmatchrestart  where we restarted after the last match
1785   endptr            end of available data
1786   printname         filename for printing
1787 
1788 Returns:            nothing
1789 */
1790 
1791 static void
do_after_lines(unsigned long int lastmatchnumber,char * lastmatchrestart,char * endptr,const char * printname)1792 do_after_lines(unsigned long int lastmatchnumber, char *lastmatchrestart,
1793   char *endptr, const char *printname)
1794 {
1795 if (after_context > 0 && lastmatchnumber > 0)
1796   {
1797   int count = 0;
1798   int ellength = 0;
1799   while (lastmatchrestart < endptr && count < after_context)
1800     {
1801     char *pp = end_of_line(lastmatchrestart, endptr, &ellength);
1802     if (ellength == 0 && pp == main_buffer + bufsize) break;
1803     if (printname != NULL) fprintf(stdout, "%s%c", printname, printname_hyphen);
1804     if (number) fprintf(stdout, "%lu-", lastmatchnumber++);
1805     FWRITE_IGNORE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1806     lastmatchrestart = pp;
1807     count++;
1808     }
1809 
1810   /* If we have printed any lines, arrange for a hyphen separator if anything
1811   else follows. Also, if the last line is the final line in the file and it had
1812   no newline, add one. */
1813 
1814   if (count > 0)
1815     {
1816     hyphenpending = TRUE;
1817     if (ellength == 0 && lastmatchrestart >= endptr)
1818       write_final_newline();
1819     }
1820   }
1821 }
1822 
1823 
1824 
1825 /*************************************************
1826 *   Apply patterns to subject till one matches   *
1827 *************************************************/
1828 
1829 /* This function is called to run through all the patterns, looking for a
1830 match. When all possible matches are required, for example, for colouring, it
1831 checks all patterns for matching, and returns the earliest match. Otherwise, it
1832 returns the first pattern that has matched.
1833 
1834 Arguments:
1835   matchptr     the start of the subject
1836   length       the length of the subject to match
1837   options      options for pcre2_match
1838   startoffset  where to start matching
1839   mrc          address of where to put the result of pcre2_match()
1840 
1841 Returns:       TRUE if there was a match, match_data and offsets are set
1842                FALSE if there was no match (but no errors)
1843                invert if there was a non-fatal error
1844 */
1845 
1846 static BOOL
match_patterns(char * matchptr,PCRE2_SIZE length,unsigned int options,PCRE2_SIZE startoffset,int * mrc)1847 match_patterns(char *matchptr, PCRE2_SIZE length, unsigned int options,
1848   PCRE2_SIZE startoffset, int *mrc)
1849 {
1850 PCRE2_SIZE slen = length;
1851 int first = -1;
1852 int firstrc = 0;
1853 patstr *p = patterns;
1854 const char *msg = "this text:\n\n";
1855 
1856 if (slen > 200)
1857   {
1858   slen = 200;
1859   msg = "text that starts:\n\n";
1860   }
1861 
1862 for (int i = 1; p != NULL; p = p->next, i++)
1863   {
1864   int rc = pcre2_match(p->compiled, (PCRE2_SPTR)matchptr, length,
1865     startoffset, options, match_data, match_context);
1866   if (rc == PCRE2_ERROR_NOMATCH) continue;
1867 
1868   /* Handle a successful match. When all_matches is false, we are done.
1869   Otherwise we must save the earliest match. */
1870 
1871   if (rc >= 0)
1872     {
1873     if (!all_matches)
1874       {
1875       *mrc = rc;
1876       return TRUE;
1877       }
1878 
1879     if (first < 0 || offsets[0] < offsets_pair[first][0] ||
1880          (offsets[0] == offsets_pair[first][0] &&
1881           offsets[1] > offsets_pair[first][1]))
1882       {
1883       first = match_data_toggle;
1884       firstrc = rc;
1885       match_data_toggle ^= 1;
1886       match_data = match_data_pair[match_data_toggle];
1887       offsets = offsets_pair[match_data_toggle];
1888       }
1889     continue;
1890     }
1891 
1892   /* Deal with PCRE2 error. */
1893 
1894   fprintf(stderr, "pcre2grep: pcre2_match() gave error %d while matching ", rc);
1895   if (patterns->next != NULL) fprintf(stderr, "pattern number %d to ", i);
1896   fprintf(stderr, "%s", msg);
1897   FWRITE_IGNORE(matchptr, 1, slen, stderr);   /* In case binary zero included */
1898   fprintf(stderr, "\n\n");
1899   if (rc <= PCRE2_ERROR_UTF8_ERR1 &&
1900       rc >= PCRE2_ERROR_UTF8_ERR21)
1901     {
1902     unsigned char mbuffer[256];
1903     PCRE2_SIZE startchar = pcre2_get_startchar(match_data);
1904     (void)pcre2_get_error_message(rc, mbuffer, sizeof(mbuffer));
1905     fprintf(stderr, "%s at offset %" SIZ_FORM "\n\n", mbuffer, startchar);
1906     }
1907   if (rc == PCRE2_ERROR_MATCHLIMIT || rc == PCRE2_ERROR_DEPTHLIMIT ||
1908       rc == PCRE2_ERROR_HEAPLIMIT || rc == PCRE2_ERROR_JIT_STACKLIMIT)
1909     resource_error = TRUE;
1910   if (error_count++ > 20)
1911     {
1912     fprintf(stderr, "pcre2grep: Too many errors - abandoned.\n");
1913     pcre2grep_exit(2);
1914     }
1915   return invert;    /* No more matching; don't show the line again */
1916   }
1917 
1918 /* We get here when all patterns have been tried. If all_matches is false,
1919 this means that none of them matched. If all_matches is true, matched_first
1920 will be non-NULL if there was at least one match, and it will point to the
1921 appropriate match_data block. */
1922 
1923 if (!all_matches || first < 0) return FALSE;
1924 
1925 match_data_toggle = first;
1926 match_data = match_data_pair[first];
1927 offsets = offsets_pair[first];
1928 *mrc = firstrc;
1929 return TRUE;
1930 }
1931 
1932 
1933 
1934 /*************************************************
1935 *          Decode dollar escape sequence         *
1936 *************************************************/
1937 
1938 /* Called from various places to decode $ escapes in output strings. The escape
1939 sequences are as follows:
1940 
1941 $<digits> or ${<digits>} returns a capture number. However, if callout is TRUE,
1942 zero is never returned; '0' is substituted.
1943 
1944 $a returns bell.
1945 $b returns backspace.
1946 $e returns escape.
1947 $f returns form feed.
1948 $n returns newline.
1949 $r returns carriage return.
1950 $t returns tab.
1951 $v returns vertical tab.
1952 $o<digits> returns the character represented by the given octal
1953   number; up to three digits are processed.
1954 $o{<digits>} does the same, up to 7 digits, but gives an error for mode-invalid
1955   code points.
1956 $x<digits> returns the character represented by the given hexadecimal
1957   number; up to two digits are processed.
1958 $x{<digits} does the same, up to 6 digits, but gives an error for mode-invalid
1959   code points.
1960 Any other character is substituted by itself. E.g: $$ is replaced by a single
1961 dollar.
1962 
1963 Arguments:
1964   begin      the start of the whole string
1965   string     points to the $
1966   callout    TRUE if in a callout (inhibits error messages)
1967   value      where to return a value
1968   last       where to return pointer to the last used character
1969 
1970 Returns:     DDE_ERROR    after a syntax error
1971              DDE_CAPTURE  if *value is a capture number
1972              DDE_CHAR     if *value is a character code
1973 */
1974 
1975 static int
decode_dollar_escape(PCRE2_SPTR begin,PCRE2_SPTR string,BOOL callout,uint32_t * value,PCRE2_SPTR * last)1976 decode_dollar_escape(PCRE2_SPTR begin, PCRE2_SPTR string, BOOL callout,
1977   uint32_t *value, PCRE2_SPTR *last)
1978 {
1979 uint32_t c = 0;
1980 int base = 10;
1981 int dcount;
1982 int rc = DDE_CHAR;
1983 BOOL brace = FALSE;
1984 
1985 switch (*(++string))
1986   {
1987   case 0:   /* Syntax error: a character must be present after $. */
1988   if (!callout)
1989     fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
1990       (int)(string - begin), "no character after $");
1991   *last = string;
1992   return DDE_ERROR;
1993 
1994   case '{':
1995   brace = TRUE;
1996   string++;
1997   if (!isdigit((unsigned char)(*string)))  /* Syntax error: a decimal number required. */
1998     {
1999     if (!callout)
2000       fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
2001         (int)(string - begin), "decimal number expected");
2002     rc = DDE_ERROR;
2003     break;
2004     }
2005 
2006   /* Fall through */
2007 
2008   /* The maximum capture number is 65535, so any number greater than that will
2009   always be an unknown capture number. We just stop incrementing, in order to
2010   avoid overflow. */
2011 
2012   case '0': case '1': case '2': case '3': case '4':
2013   case '5': case '6': case '7': case '8': case '9':
2014   do
2015     {
2016     if (c <= 65535) c = c * 10 + (*string - '0');
2017     string++;
2018     }
2019   while (*string >= '0' && *string <= '9');
2020   string--;  /* Point to last digit */
2021 
2022   /* In a callout, capture number 0 is not available. No error can be given,
2023   so just return the character '0'. */
2024 
2025   if (callout && c == 0)
2026     {
2027     *value = '0';
2028     }
2029   else
2030     {
2031     *value = c;
2032     rc = DDE_CAPTURE;
2033     }
2034   break;
2035 
2036   /* Limit octal numbers to 3 digits without braces, or up to 7 with braces,
2037   for valid Unicode code points. */
2038 
2039   case 'o':
2040   base = 8;
2041   string++;
2042   if (*string == '{')
2043     {
2044     brace = TRUE;
2045     string++;
2046     dcount = 7;
2047     }
2048   else dcount = 3;
2049   for (; dcount > 0; dcount--)
2050     {
2051     if (*string < '0' || *string > '7') break;
2052     c = c * 8 + (*string++ - '0');
2053     }
2054   *value = c;
2055   string--;  /* Point to last digit */
2056   break;
2057 
2058   /* Limit hex numbers to 2 digits without braces, or up to 6 with braces,
2059   for valid Unicode code points. */
2060 
2061   case 'x':
2062   base = 16;
2063   string++;
2064   if (*string == '{')
2065     {
2066     brace = TRUE;
2067     string++;
2068     dcount = 6;
2069     }
2070   else dcount = 2;
2071   for (; dcount > 0; dcount--)
2072     {
2073     if (!isxdigit(*string)) break;
2074     if (*string >= '0' && *string <= '9')
2075       c = c *16 + *string++ - '0';
2076     else
2077       c = c * 16 + (*string++ | 0x20) - 'a' + 10;
2078     }
2079   *value = c;
2080   string--;  /* Point to last digit */
2081   break;
2082 
2083   case 'a': *value = '\a'; break;
2084   case 'b': *value = '\b'; break;
2085 #ifndef EBCDIC
2086   case 'e': *value = '\033'; break;
2087 #else
2088   case 'e': *value = '\047'; break;
2089 #endif
2090   case 'f': *value = '\f'; break;
2091   case 'n': *value = STDOUT_NL_CODE; break;
2092   case 'r': *value = '\r'; break;
2093   case 't': *value = '\t'; break;
2094   case 'v': *value = '\v'; break;
2095 
2096   default: *value = *string; break;
2097   }
2098 
2099 if (brace)
2100   {
2101   c = string[1];
2102   if (c != '}')
2103     {
2104     rc = DDE_ERROR;
2105     if (!callout)
2106       {
2107       if ((base == 8 && c >= '0' && c <= '7') ||
2108           (base == 16 && isxdigit(c)))
2109         {
2110         fprintf(stderr, "pcre2grep: Error in output text at offset %d: "
2111           "too many %s digits\n", (int)(string - begin),
2112           (base == 8)? "octal" : "hex");
2113         }
2114       else
2115         {
2116         fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
2117           (int)(string - begin), "missing closing brace");
2118         }
2119       }
2120     }
2121   else string++;
2122   }
2123 
2124 /* Check maximum code point values, but take note of STDOUT_NL_CODE. */
2125 
2126 if (rc == DDE_CHAR && *value != STDOUT_NL_CODE)
2127   {
2128   uint32_t max = utf? 0x0010ffffu : 0xffu;
2129   if (*value > max)
2130     {
2131     if (!callout)
2132       fprintf(stderr, "pcre2grep: Error in output text at offset %d: "
2133         "code point greater than 0x%x is invalid\n", (int)(string - begin), max);
2134     rc = DDE_ERROR;
2135     }
2136   }
2137 
2138 *last = string;
2139 return rc;
2140 }
2141 
2142 
2143 
2144 /*************************************************
2145 *          Check output text for errors          *
2146 *************************************************/
2147 
2148 /* Called early, to get errors before doing anything for -O text; also called
2149 from callouts to check before outputting.
2150 
2151 Arguments:
2152   string    an --output text string
2153   callout   TRUE if in a callout (stops printing errors)
2154 
2155 Returns:    TRUE if OK, FALSE on error
2156 */
2157 
2158 static BOOL
syntax_check_output_text(PCRE2_SPTR string,BOOL callout)2159 syntax_check_output_text(PCRE2_SPTR string, BOOL callout)
2160 {
2161 uint32_t value;
2162 PCRE2_SPTR begin = string;
2163 
2164 for (; *string != 0; string++)
2165   {
2166   if (*string == '$' &&
2167     decode_dollar_escape(begin, string, callout, &value, &string) == DDE_ERROR)
2168       return FALSE;
2169   }
2170 
2171 return TRUE;
2172 }
2173 
2174 
2175 /*************************************************
2176 *              Display output text               *
2177 *************************************************/
2178 
2179 /* Display the output text, which is assumed to have already been syntax
2180 checked. Output may contain escape sequences started by the dollar sign.
2181 
2182 Arguments:
2183   string:       the output text
2184   callout:      TRUE for the builtin callout, FALSE for --output
2185   subject       the start of the subject
2186   ovector:      capture offsets
2187   capture_top:  number of captures
2188 
2189 Returns:        TRUE if something was output, other than newline
2190                 FALSE if nothing was output, or newline was last output
2191 */
2192 
2193 static BOOL
display_output_text(PCRE2_SPTR string,BOOL callout,PCRE2_SPTR subject,PCRE2_SIZE * ovector,PCRE2_SIZE capture_top)2194 display_output_text(PCRE2_SPTR string, BOOL callout, PCRE2_SPTR subject,
2195   PCRE2_SIZE *ovector, PCRE2_SIZE capture_top)
2196 {
2197 uint32_t value;
2198 BOOL printed = FALSE;
2199 PCRE2_SPTR begin = string;
2200 
2201 for (; *string != 0; string++)
2202   {
2203   if (*string == '$')
2204     {
2205     switch(decode_dollar_escape(begin, string, callout, &value, &string))
2206       {
2207       case DDE_CHAR:
2208       if (value == STDOUT_NL_CODE)
2209         {
2210         fprintf(stdout, STDOUT_NL);
2211         printed = FALSE;
2212         continue;
2213         }
2214       break;  /* Will print value */
2215 
2216       case DDE_CAPTURE:
2217       if (value < capture_top)
2218         {
2219         PCRE2_SIZE capturesize;
2220         value *= 2;
2221         capturesize = ovector[value + 1] - ovector[value];
2222         if (capturesize > 0)
2223           {
2224           print_match(subject + ovector[value], capturesize);
2225           printed = TRUE;
2226           }
2227         }
2228       continue;
2229 
2230       /* LCOV_EXCL_START */
2231       default:  /* Should not occur */
2232       break;
2233       /* LCOV_EXCL_STOP */
2234       }
2235     }
2236 
2237   else value = *string;  /* Not a $ escape */
2238 
2239   if (!utf || value <= 127) fprintf(stdout, "%c", value); else
2240     {
2241     int n = ord2utf8(value);
2242     for (int i = 0; i < n; i++) fputc(utf8_buffer[i], stdout);
2243     }
2244 
2245   printed = TRUE;
2246   }
2247 
2248 return printed;
2249 }
2250 
2251 
2252 #ifdef SUPPORT_PCRE2GREP_CALLOUT
2253 
2254 /*************************************************
2255 *        Parse and execute callout scripts       *
2256 *************************************************/
2257 
2258 /* If SUPPORT_PCRE2GREP_CALLOUT_FORK is defined, this function parses a callout
2259 string block and executes the program specified by the string. The string is a
2260 list of substrings separated by pipe characters. The first substring represents
2261 the executable name, and the following substrings specify the arguments:
2262 
2263   program_name|param1|param2|...
2264 
2265 Any substring (including the program name) can contain escape sequences
2266 started by the dollar character. The escape sequences are substituted as
2267 follows:
2268 
2269   $<digits> or ${<digits>} is replaced by the captured substring of the given
2270   decimal number, which must be greater than zero. If the number is greater
2271   than the number of capturing substrings, or if the capture is unset, the
2272   replacement is empty.
2273 
2274   Any other character is substituted by itself. E.g: $$ is replaced by a single
2275   dollar or $| replaced by a pipe character.
2276 
2277 Alternatively, if string starts with pipe, the remainder is taken as an output
2278 string, same as --output. This is the only form that is supported if
2279 SUPPORT_PCRE2GREP_FORK is not defined. In this case, --om-separator is used to
2280 separate each callout, defaulting to newline.
2281 
2282 Example:
2283 
2284   echo -e "abcde\n12345" | pcre2grep \
2285     '(.)(..(.))(?C"/bin/echo|Arg1: [$1] [$2] [$3]|Arg2: $|${1}$| ($4)")()' -
2286 
2287   Output:
2288 
2289     Arg1: [a] [bcd] [d] Arg2: |a| ()
2290     abcde
2291     Arg1: [1] [234] [4] Arg2: |1| ()
2292     12345
2293 
2294 Arguments:
2295   blockptr     the callout block
2296 
2297 Returns:       currently it always returns with 0
2298 */
2299 
2300 static int
pcre2grep_callout(pcre2_callout_block * calloutptr,void * unused)2301 pcre2grep_callout(pcre2_callout_block *calloutptr, void *unused)
2302 {
2303 PCRE2_SIZE length = calloutptr->callout_string_length;
2304 PCRE2_SPTR string = calloutptr->callout_string;
2305 PCRE2_SPTR subject = calloutptr->subject;
2306 PCRE2_SIZE *ovector = calloutptr->offset_vector;
2307 PCRE2_SIZE capture_top = calloutptr->capture_top;
2308 
2309 #ifdef SUPPORT_PCRE2GREP_CALLOUT_FORK
2310 PCRE2_SIZE argsvectorlen = 2;
2311 PCRE2_SIZE argslen = 1;
2312 char *args;
2313 char *argsptr;
2314 char **argsvector;
2315 char **argsvectorptr;
2316 #ifndef WIN32
2317 pid_t pid;
2318 #endif
2319 int result = 0;
2320 #endif  /* SUPPORT_PCRE2GREP_CALLOUT_FORK */
2321 
2322 (void)unused;   /* Avoid compiler warning */
2323 
2324 /* Only callouts with strings are supported. */
2325 
2326 if (string == NULL || length == 0) return 0;
2327 
2328 /* If there's no command, output the remainder directly. */
2329 
2330 if (*string == '|')
2331   {
2332   string++;
2333   if (!syntax_check_output_text(string, TRUE)) return 0;
2334   (void)display_output_text(string, TRUE, subject, ovector, capture_top);
2335   return 0;
2336   }
2337 
2338 #ifndef SUPPORT_PCRE2GREP_CALLOUT_FORK
2339 return 0;
2340 #else
2341 
2342 /* Checking syntax and compute the number of string fragments. Callout strings
2343 are silently ignored in the event of a syntax error. */
2344 
2345 while (length > 0)
2346   {
2347   if (*string == '|')
2348     {
2349     argsvectorlen++;
2350     if (argsvectorlen > 10000) return 0;  /* Too many args */
2351     }
2352 
2353   else if (*string == '$')
2354     {
2355     uint32_t value;
2356     PCRE2_SPTR begin = string;
2357 
2358     switch (decode_dollar_escape(begin, string, TRUE, &value, &string))
2359       {
2360       case DDE_CAPTURE:
2361       if (value < capture_top)
2362         {
2363         value *= 2;
2364         argslen += ovector[value + 1] - ovector[value];
2365         }
2366       argslen--;   /* Negate the effect of argslen++ below. */
2367       break;
2368 
2369       case DDE_CHAR:
2370       if (value == STDOUT_NL_CODE) argslen += STDOUT_NL_LEN - 1;
2371         else if (utf && value > 127) argslen += ord2utf8(value) - 1;
2372       break;
2373 
2374       /* LCOV_EXCL_START */
2375       default:         /* Should not occur */
2376       case DDE_ERROR:
2377       return 0;
2378       /* LCOV_EXCL_STOP */
2379       }
2380 
2381     length -= (string - begin);
2382     }
2383 
2384   string++;
2385   length--;
2386   argslen++;
2387   }
2388 
2389 /* Get memory for the argument vector and its strings. */
2390 
2391 args = (char*)malloc(argslen);
2392 if (args == NULL) return 0;
2393 
2394 argsvector = (char**)malloc(argsvectorlen * sizeof(char*));
2395 if (argsvector == NULL)
2396   {
2397   /* LCOV_EXCL_START */
2398   free(args);
2399   return 0;
2400   /* LCOV_EXCL_STOP */
2401   }
2402 
2403 /* Now reprocess the string and set up the arguments. */
2404 
2405 argsptr = args;
2406 argsvectorptr = argsvector;
2407 *argsvectorptr++ = argsptr;
2408 
2409 length = calloutptr->callout_string_length;
2410 string = calloutptr->callout_string;
2411 
2412 while (length > 0)
2413   {
2414   if (*string == '|')
2415     {
2416     *argsptr++ = '\0';
2417     *argsvectorptr++ = argsptr;
2418     }
2419 
2420   else if (*string == '$')
2421     {
2422     uint32_t value;
2423     PCRE2_SPTR begin = string;
2424 
2425     switch (decode_dollar_escape(begin, string, TRUE, &value, &string))
2426       {
2427       case DDE_CAPTURE:
2428       if (value < capture_top)
2429         {
2430         PCRE2_SIZE capturesize;
2431         value *= 2;
2432         capturesize = ovector[value + 1] - ovector[value];
2433         memcpy(argsptr, subject + ovector[value], capturesize);
2434         argsptr += capturesize;
2435         }
2436       break;
2437 
2438       case DDE_CHAR:
2439       if (value == STDOUT_NL_CODE)
2440         {
2441         memcpy(argsptr, STDOUT_NL, STDOUT_NL_LEN);
2442         argsptr += STDOUT_NL_LEN;
2443         }
2444       else if (utf && value > 127)
2445         {
2446         int n = ord2utf8(value);
2447         memcpy(argsptr, utf8_buffer, n);
2448         argsptr += n;
2449         }
2450       else
2451         {
2452         *argsptr++ = value;
2453         }
2454       break;
2455 
2456       /* LCOV_EXCL_START */
2457       default:         /* Even though this should not occur, the string having */
2458       case DDE_ERROR:  /* been checked above, we need to include the free() */
2459       free(args);      /* calls so that source checkers do not complain. */
2460       free(argsvector);
2461       return 0;
2462       /* LCOV_EXCL_STOP */
2463       }
2464 
2465     length -= (string - begin);
2466     }
2467 
2468   else *argsptr++ = *string;
2469 
2470   /* Advance along the string */
2471 
2472   string++;
2473   length--;
2474   }
2475 
2476 *argsptr++ = '\0';
2477 *argsvectorptr = NULL;
2478 
2479 /* Running an external command is system-dependent. Handle Windows and VMS as
2480 necessary, otherwise assume fork(). */
2481 
2482 #ifdef WIN32
2483 result = _spawnvp(_P_WAIT, argsvector[0], (const char * const *)argsvector);
2484 
2485 #elif defined __VMS
2486   {
2487   char cmdbuf[500];
2488   short i = 0;
2489   int flags = CLI$M_NOCLISYM|CLI$M_NOLOGNAM|CLI$M_NOKEYPAD, status, retstat;
2490   $DESCRIPTOR(cmd, cmdbuf);
2491 
2492   cmdbuf[0] = 0;
2493   while (argsvector[i])
2494   {
2495     strcat(cmdbuf, argsvector[i]);
2496     strcat(cmdbuf, " ");
2497     i++;
2498   }
2499   cmd.dsc$w_length = strlen(cmdbuf) - 1;
2500   status = lib$spawn(&cmd, 0,0, &flags, 0,0, &retstat);
2501   if (!(status & 1)) result = 0;
2502   else result = retstat & 1 ? 0 : 1;
2503   }
2504 
2505 #else  /* Neither Windows nor VMS */
2506 pid = fork();
2507 if (pid == 0)
2508   {
2509   (void)execv(argsvector[0], argsvector);
2510   /* Control gets here if there is an error, e.g. a non-existent program */
2511   exit(1);
2512   }
2513 else if (pid > 0)
2514   {
2515   (void)fflush(stdout);
2516   (void)waitpid(pid, &result, 0);
2517   (void)fflush(stdout);
2518   }
2519 #endif  /* End Windows/VMS/other handling */
2520 
2521 free(args);
2522 free(argsvector);
2523 
2524 /* Currently negative return values are not supported, only zero (match
2525 continues) or non-zero (match fails). */
2526 
2527 return result != 0;
2528 #endif  /* SUPPORT_PCRE2GREP_CALLOUT_FORK */
2529 }
2530 #endif  /* SUPPORT_PCRE2GREP_CALLOUT */
2531 
2532 
2533 
2534 /*************************************************
2535 *     Read a portion of the file into buffer     *
2536 *************************************************/
2537 
2538 static PCRE2_SIZE
fill_buffer(void * handle,int frtype,char * buffer,PCRE2_SIZE length,BOOL input_line_buffered)2539 fill_buffer(void *handle, int frtype, char *buffer, PCRE2_SIZE length,
2540   BOOL input_line_buffered)
2541 {
2542 (void)frtype;  /* Avoid warning when not used */
2543 
2544 #ifdef SUPPORT_LIBZ
2545 if (frtype == FR_LIBZ)
2546   return gzread((gzFile)handle, buffer, length);
2547 else
2548 #endif
2549 
2550 #ifdef SUPPORT_LIBBZ2
2551 if (frtype == FR_LIBBZ2)
2552   return (PCRE2_SIZE)BZ2_bzread((BZFILE *)handle, buffer, length);
2553 else
2554 #endif
2555 
2556 return (input_line_buffered ?
2557   read_one_line(buffer, length, (FILE *)handle) :
2558   fread(buffer, 1, length, (FILE *)handle));
2559 }
2560 
2561 
2562 
2563 /*************************************************
2564 *            Grep an individual file             *
2565 *************************************************/
2566 
2567 /* This is called from grep_or_recurse() below. It uses a buffer that is three
2568 times the value of bufthird. The matching point is never allowed to stray into
2569 the top third of the buffer, thus keeping more of the file available for
2570 context printing or for multiline scanning. For large files, the pointer will
2571 be in the middle third most of the time, so the bottom third is available for
2572 "before" context printing.
2573 
2574 Arguments:
2575   handle       the fopened FILE stream for a normal file
2576                the gzFile pointer when reading is via libz
2577                the BZFILE pointer when reading is via libbz2
2578   frtype       FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
2579   filename     the file name or NULL (for errors)
2580   printname    the file name if it is to be printed for each match
2581                or NULL if the file name is not to be printed
2582                it cannot be NULL if filenames[_nomatch]_only is set
2583 
2584 Returns:       0 if there was at least one match
2585                1 otherwise (no matches)
2586                2 if an overlong line is encountered
2587                3 if there is a read error on a .bz2 file
2588 */
2589 
2590 static int
pcre2grep(void * handle,int frtype,const char * filename,const char * printname)2591 pcre2grep(void *handle, int frtype, const char *filename, const char *printname)
2592 {
2593 int rc = 1;
2594 int filepos = 0;
2595 unsigned long int linenumber = 1;
2596 unsigned long int lastmatchnumber = 0;
2597 unsigned long int count = 0;
2598 long int count_matched_lines = 0;
2599 char *lastmatchrestart = main_buffer;
2600 char *ptr = main_buffer;
2601 char *endptr;
2602 PCRE2_SIZE bufflength;
2603 BOOL binary = FALSE;
2604 BOOL endhyphenpending = FALSE;
2605 BOOL lines_printed = FALSE;
2606 BOOL input_line_buffered = line_buffered;
2607 FILE *in = NULL;                    /* Ensure initialized */
2608 long stream_start = -1;             /* Only non-negative if relevant */
2609 
2610 /* Do the first read into the start of the buffer and set up the pointer to end
2611 of what we have. In the case of libz, a non-zipped .gz file will be read as a
2612 plain file. However, if a .bz2 file isn't actually bzipped, the first read will
2613 fail. */
2614 
2615 if (frtype != FR_LIBZ && frtype != FR_LIBBZ2)
2616   {
2617   in = (FILE *)handle;
2618   if (feof(in)) return 1;
2619   if (is_file_tty(in)) input_line_buffered = TRUE;
2620   else
2621     {
2622     if (count_limit >= 0  && filename == stdin_name)
2623       stream_start = ftell(in);
2624     }
2625   }
2626 else input_line_buffered = FALSE;
2627 
2628 bufflength = fill_buffer(handle, frtype, main_buffer, bufsize,
2629   input_line_buffered);
2630 
2631 #ifdef SUPPORT_LIBBZ2
2632 if (frtype == FR_LIBBZ2 && (int)bufflength < 0) return 3;   /* Gotcha: bufflength is PCRE2_SIZE */
2633 #endif
2634 
2635 endptr = main_buffer + bufflength;
2636 
2637 /* Unless binary-files=text, see if we have a binary file. This uses the same
2638 rule as GNU grep, namely, a search for a binary zero byte near the start of the
2639 file. However, when the newline convention is binary zero, we can't do this. */
2640 
2641 if (binary_files != BIN_TEXT)
2642   {
2643   if (endlinetype != PCRE2_NEWLINE_NUL)
2644     binary = memchr(main_buffer, 0, (bufflength > 1024)? 1024 : bufflength)
2645       != NULL;
2646   if (binary && binary_files == BIN_NOMATCH) return 1;
2647   }
2648 
2649 /* Loop while the current pointer is not at the end of the file. For large
2650 files, endptr will be at the end of the buffer when we are in the middle of the
2651 file, but ptr will never get there, because as soon as it gets over 2/3 of the
2652 way, the buffer is shifted left and re-filled. */
2653 
2654 while (ptr < endptr)
2655   {
2656   int endlinelength;
2657   int mrc = 0;
2658   unsigned int options = 0;
2659   BOOL match;
2660   BOOL line_matched = FALSE;
2661   char *t = ptr;
2662   PCRE2_SIZE length, linelength;
2663   PCRE2_SIZE startoffset = 0;
2664 
2665   /* If the -m option set a limit for the number of matched or non-matched
2666   lines, check it here. A limit of zero means that no matching is ever done.
2667   For stdin from a file, set the file position. */
2668 
2669   if (count_limit >= 0 && count_matched_lines >= count_limit)
2670     {
2671     if (stream_start >= 0)
2672       (void)fseek(handle, stream_start + (long int)filepos, SEEK_SET);
2673     rc = (count_limit == 0)? 1 : 0;
2674     break;
2675     }
2676 
2677   /* At this point, ptr is at the start of a line. We need to find the length
2678   of the subject string to pass to pcre2_match(). In multiline mode, it is the
2679   length remainder of the data in the buffer. Otherwise, it is the length of
2680   the next line, excluding the terminating newline. After matching, we always
2681   advance by the length of the next line. In multiline mode the PCRE2_FIRSTLINE
2682   option is used for compiling, so that any match is constrained to be in the
2683   first line. */
2684 
2685   t = end_of_line(t, endptr, &endlinelength);
2686   linelength = t - ptr - endlinelength;
2687   length = multiline? (PCRE2_SIZE)(endptr - ptr) : linelength;
2688 
2689   /* Check to see if the line we are looking at extends right to the very end
2690   of the buffer without a line terminator. This means the line is too long to
2691   handle at the current buffer size. Until the buffer reaches its maximum size,
2692   try doubling it and reading more data. */
2693 
2694   if (endlinelength == 0 && t == main_buffer + bufsize)
2695     {
2696     if (bufthird < max_bufthird)
2697       {
2698       char *new_buffer;
2699       PCRE2_SIZE new_bufthird = 2*bufthird;
2700 
2701       if (new_bufthird > max_bufthird) new_bufthird = max_bufthird;
2702       new_buffer = (char *)malloc(3*new_bufthird);
2703 
2704       if (new_buffer == NULL)
2705         {
2706         /* LCOV_EXCL_START */
2707         fprintf(stderr,
2708           "pcre2grep: line %lu%s%s is too long for the internal buffer\n"
2709           "pcre2grep: not enough memory to increase the buffer size to %"
2710             SIZ_FORM "\n",
2711           linenumber,
2712           (filename == NULL)? "" : " of file ",
2713           (filename == NULL)? "" : filename,
2714           new_bufthird);
2715         return 2;
2716         /* LCOV_EXCL_STOP */
2717         }
2718 
2719       /* Copy the data and adjust pointers to the new buffer location. */
2720 
2721       memcpy(new_buffer, main_buffer, bufsize);
2722       bufthird = new_bufthird;
2723       bufsize = 3*bufthird;
2724       ptr = new_buffer + (ptr - main_buffer);
2725       lastmatchrestart = new_buffer + (lastmatchrestart - main_buffer);
2726       free(main_buffer);
2727       main_buffer = new_buffer;
2728 
2729       /* Read more data into the buffer and then try to find the line ending
2730       again. */
2731 
2732       bufflength += fill_buffer(handle, frtype, main_buffer + bufflength,
2733         bufsize - bufflength, input_line_buffered);
2734       endptr = main_buffer + bufflength;
2735       continue;
2736       }
2737     else
2738       {
2739       fprintf(stderr,
2740         "pcre2grep: line %lu%s%s is too long for the internal buffer\n"
2741         "pcre2grep: the maximum buffer size is %" SIZ_FORM "\n"
2742         "pcre2grep: use the --max-buffer-size option to change it\n",
2743         linenumber,
2744         (filename == NULL)? "" : " of file ",
2745         (filename == NULL)? "" : filename,
2746         bufthird);
2747       return 2;
2748       }
2749     }
2750 
2751   /* We come back here after a match when only_matching_count is non-zero, in
2752   order to find any further matches in the same line. This applies to
2753   --only-matching, --file-offsets, and --line-offsets. */
2754 
2755   ONLY_MATCHING_RESTART:
2756 
2757   /* Run through all the patterns until one matches or there is an error other
2758   than NOMATCH. This code is in a subroutine so that it can be re-used for
2759   finding subsequent matches when colouring matched lines. After finding one
2760   match, set PCRE2_NOTEMPTY to disable any further matches of null strings in
2761   this line. */
2762 
2763   match = match_patterns(ptr, length, options, startoffset, &mrc);
2764   options = PCRE2_NOTEMPTY;
2765 
2766   /* If it's a match or a not-match (as required), do what's wanted. NOTE: Use
2767   only FWRITE_IGNORE() - which is just a packaged fwrite() that ignores its
2768   return code - to output data lines, so that binary zeroes are treated as just
2769   another data character. */
2770 
2771   if (match != invert)
2772     {
2773     BOOL hyphenprinted = FALSE;
2774 
2775     /* We've failed if we want a file that doesn't have any matches. */
2776 
2777     if (filenames == FN_NOMATCH_ONLY) return 1;
2778 
2779     /* Remember that this line matched (for counting matched lines) */
2780 
2781     line_matched = TRUE;
2782 
2783     /* If all we want is a yes/no answer, we can return immediately. */
2784 
2785     if (quiet) return 0;
2786 
2787     /* Just count if just counting is wanted. */
2788 
2789     else if (count_only || show_total_count) count++;
2790 
2791     /* When handling a binary file and binary-files==binary, the "binary"
2792     variable will be set true (it's false in all other cases). In this
2793     situation we just want to output the file name. No need to scan further. */
2794 
2795     else if (binary)
2796       {
2797       fprintf(stdout, "Binary file %s matches" STDOUT_NL, filename);
2798       return 0;
2799       }
2800 
2801     /* Likewise, if all we want is a file name, there is no need to scan any
2802     more lines in the file. */
2803 
2804     else if (filenames == FN_MATCH_ONLY)
2805       {
2806       fprintf(stdout, "%s", printname);
2807       if (printname_nl == NULL) fprintf(stdout, "%c", 0);
2808         else fprintf(stdout, "%s", printname_nl);
2809       return 0;
2810       }
2811 
2812     /* The --only-matching option prints just the substring that matched,
2813     and/or one or more captured portions of it, as long as these strings are
2814     not empty. The --file-offsets and --line-offsets options output offsets for
2815     the matching substring (all three set only_matching_count non-zero). None
2816     of these mutually exclusive options prints any context. Afterwards, adjust
2817     the start and then jump back to look for further matches in the same line.
2818     If we are in invert mode, however, nothing is printed and we do not restart
2819     - this could still be useful because the return code is set. */
2820 
2821     else if (only_matching_count != 0)
2822       {
2823       if (!invert)
2824         {
2825         PCRE2_SIZE oldstartoffset;
2826 
2827         if (printname != NULL) fprintf(stdout, "%s%c", printname,
2828           printname_colon);
2829         if (number) fprintf(stdout, "%lu:", linenumber);
2830 
2831         /* Handle --line-offsets */
2832 
2833         if (line_offsets)
2834           fprintf(stdout, "%d,%d" STDOUT_NL, (int)(ptr + offsets[0] - ptr),
2835             (int)(offsets[1] - offsets[0]));
2836 
2837         /* Handle --file-offsets */
2838 
2839         else if (file_offsets)
2840           fprintf(stdout, "%d,%d" STDOUT_NL,
2841             (int)(filepos + ptr + offsets[0] - ptr),
2842             (int)(offsets[1] - offsets[0]));
2843 
2844         /* Handle --output (which has already been syntax checked) */
2845 
2846         else if (output_text != NULL)
2847           {
2848           (void)display_output_text((PCRE2_SPTR)output_text, FALSE,
2849               (PCRE2_SPTR)ptr, offsets, mrc);
2850           fprintf(stdout, STDOUT_NL);
2851           }
2852 
2853         /* Handle --only-matching, which may occur many times */
2854 
2855         else
2856           {
2857           BOOL printed = FALSE;
2858           omstr *om;
2859 
2860           for (om = only_matching; om != NULL; om = om->next)
2861             {
2862             int n = om->groupnum;
2863             if (n == 0 || n < mrc)
2864               {
2865               int plen = offsets[2*n + 1] - offsets[2*n];
2866               if (plen > 0)
2867                 {
2868                 if (printed && om_separator != NULL)
2869                   fprintf(stdout, "%s", om_separator);
2870                 print_match(ptr + offsets[n*2], plen);
2871                 printed = TRUE;
2872                 }
2873               }
2874             }
2875           if (printed || printname != NULL || number)
2876             fprintf(stdout, STDOUT_NL);
2877           }
2878 
2879         /* Prepare to repeat to find the next match in the line. */
2880 
2881         //match = FALSE;
2882         if (line_buffered) fflush(stdout);
2883         rc = 0;                      /* Had some success */
2884 
2885         /* If the pattern contained a lookbehind that included \K, it is
2886         possible that the end of the match might be at or before the actual
2887         starting offset we have just used. In this case, start one character
2888         further on. */
2889 
2890         startoffset = offsets[1];    /* Restart after the match */
2891         oldstartoffset = pcre2_get_startchar(match_data);
2892         if (startoffset <= oldstartoffset)
2893           {
2894           if (startoffset >= length) goto END_ONE_MATCH;  /* Were at end */
2895           startoffset = oldstartoffset + 1;
2896           if (utf) while ((ptr[startoffset] & 0xc0) == 0x80) startoffset++;
2897           }
2898 
2899         /* If the current match ended past the end of the line (only possible
2900         in multiline mode), we must move on to the line in which it did end
2901         before searching for more matches. */
2902 
2903         while (startoffset > linelength)
2904           {
2905           ptr += linelength + endlinelength;
2906           filepos += (int)(linelength + endlinelength);
2907           linenumber++;
2908           startoffset -= (int)(linelength + endlinelength);
2909           t = end_of_line(ptr, endptr, &endlinelength);
2910           linelength = t - ptr - endlinelength;
2911           length = (PCRE2_SIZE)(endptr - ptr);
2912           }
2913 
2914         goto ONLY_MATCHING_RESTART;
2915         }
2916       }
2917 
2918     /* This is the default case when none of the above options is set. We print
2919     the matching lines(s), possibly preceded and/or followed by other lines of
2920     context. */
2921 
2922     else
2923       {
2924       lines_printed = TRUE;
2925 
2926       /* See if there is a requirement to print some "after" lines from a
2927       previous match. We never print any overlaps. */
2928 
2929       if (after_context > 0 && lastmatchnumber > 0)
2930         {
2931         int ellength;
2932         int linecount = 0;
2933         char *p = lastmatchrestart;
2934 
2935         while (p < ptr && linecount < after_context)
2936           {
2937           p = end_of_line(p, ptr, &ellength);
2938           linecount++;
2939           }
2940 
2941         /* It is important to advance lastmatchrestart during this printing so
2942         that it interacts correctly with any "before" printing below. Print
2943         each line's data using fwrite() in case there are binary zeroes. */
2944 
2945         while (lastmatchrestart < p)
2946           {
2947           char *pp = lastmatchrestart;
2948           if (printname != NULL) fprintf(stdout, "%s%c", printname,
2949             printname_hyphen);
2950           if (number) fprintf(stdout, "%lu-", lastmatchnumber++);
2951           pp = end_of_line(pp, endptr, &ellength);
2952           FWRITE_IGNORE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
2953           lastmatchrestart = pp;
2954           }
2955         if (lastmatchrestart != ptr) hyphenpending = TRUE;
2956         }
2957 
2958       /* If there were non-contiguous lines printed above, insert hyphens. */
2959 
2960       if (hyphenpending)
2961         {
2962         if (group_separator != NULL)
2963           fprintf(stdout, "%s%s", group_separator, STDOUT_NL);
2964         hyphenpending = FALSE;
2965         hyphenprinted = TRUE;
2966         }
2967 
2968       /* See if there is a requirement to print some "before" lines for this
2969       match. Again, don't print overlaps. */
2970 
2971       if (before_context > 0)
2972         {
2973         int linecount = 0;
2974         char *p = ptr;
2975 
2976         while (p > main_buffer &&
2977                (lastmatchnumber == 0 || p > lastmatchrestart) &&
2978                linecount < before_context)
2979           {
2980           linecount++;
2981           p = previous_line(p, main_buffer);
2982           }
2983 
2984         if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted &&
2985             group_separator != NULL)
2986           fprintf(stdout, "%s%s", group_separator, STDOUT_NL);
2987 
2988         while (p < ptr)
2989           {
2990           int ellength;
2991           char *pp = p;
2992           if (printname != NULL) fprintf(stdout, "%s%c", printname,
2993             printname_hyphen);
2994           if (number) fprintf(stdout, "%lu-", linenumber - linecount--);
2995           pp = end_of_line(pp, endptr, &ellength);
2996           FWRITE_IGNORE(p, 1, pp - p, stdout);
2997           p = pp;
2998           }
2999         }
3000 
3001       /* Now print the matching line(s); ensure we set hyphenpending at the end
3002       of the file if any context lines are being output. */
3003 
3004       if (after_context > 0 || before_context > 0)
3005         endhyphenpending = TRUE;
3006 
3007       if (printname != NULL) fprintf(stdout, "%s%c", printname,
3008         printname_colon);
3009       if (number) fprintf(stdout, "%lu:", linenumber);
3010 
3011       /* In multiline mode, or if colouring, we have to split the line(s) up
3012       and search for further matches, but not of course if the line is a
3013       non-match. In multiline mode this is necessary in case there is another
3014       match that spans the end of the current line. When colouring we want to
3015       colour all matches. */
3016 
3017       if ((multiline || do_colour) && !invert)
3018         {
3019         int plength;
3020         PCRE2_SIZE endprevious;
3021 
3022         /* The use of \K may make the end offset earlier than the start. In
3023         this situation, swap them round. */
3024 
3025         if (offsets[0] > offsets[1])
3026           {
3027           PCRE2_SIZE temp = offsets[0];
3028           offsets[0] = offsets[1];
3029           offsets[1] = temp;
3030           }
3031 
3032         FWRITE_IGNORE(ptr, 1, offsets[0], stdout);
3033         print_match(ptr + offsets[0], offsets[1] - offsets[0]);
3034 
3035         for (;;)
3036           {
3037           PCRE2_SIZE oldstartoffset = pcre2_get_startchar(match_data);
3038 
3039           endprevious = offsets[1];
3040           startoffset = endprevious;  /* Advance after previous match. */
3041 
3042           /* If the pattern contained a lookbehind that included \K, it is
3043           possible that the end of the match might be at or before the actual
3044           starting offset we have just used. In this case, start one character
3045           further on. */
3046 
3047           if (startoffset <= oldstartoffset)
3048             {
3049             startoffset = oldstartoffset + 1;
3050             if (utf) while ((ptr[startoffset] & 0xc0) == 0x80) startoffset++;
3051             }
3052 
3053           /* If the current match ended past the end of the line (only possible
3054           in multiline mode), we must move on to the line in which it did end
3055           before searching for more matches. Because the PCRE2_FIRSTLINE option
3056           is set, the start of the match will always be before the first
3057           newline sequence. */
3058 
3059           while (startoffset > linelength + endlinelength)
3060             {
3061             ptr += linelength + endlinelength;
3062             filepos += (int)(linelength + endlinelength);
3063             linenumber++;
3064             startoffset -= (int)(linelength + endlinelength);
3065             endprevious -= (int)(linelength + endlinelength);
3066             t = end_of_line(ptr, endptr, &endlinelength);
3067             linelength = t - ptr - endlinelength;
3068             length = (PCRE2_SIZE)(endptr - ptr);
3069             }
3070 
3071           /* If startoffset is at the exact end of the line it means this
3072           complete line was the final part of the match, so there is nothing
3073           more to do. */
3074 
3075           if (startoffset == linelength + endlinelength) break;
3076 
3077           /* Otherwise, run a match from within the final line, and if found,
3078           loop for any that may follow. */
3079 
3080           if (!match_patterns(ptr, length, options, startoffset, &mrc)) break;
3081 
3082           /* The use of \K may make the end offset earlier than the start. In
3083           this situation, swap them round. */
3084 
3085           if (offsets[0] > offsets[1])
3086             {
3087             PCRE2_SIZE temp = offsets[0];
3088             offsets[0] = offsets[1];
3089             offsets[1] = temp;
3090             }
3091 
3092           FWRITE_IGNORE(ptr + endprevious, 1, offsets[0] - endprevious, stdout);
3093           print_match(ptr + offsets[0], offsets[1] - offsets[0]);
3094           }
3095 
3096         /* In multiline mode, we may have already printed the complete line
3097         and its line-ending characters (if they matched the pattern), so there
3098         may be no more to print. */
3099 
3100         plength = (int)((linelength + endlinelength) - endprevious);
3101         if (plength > 0) FWRITE_IGNORE(ptr + endprevious, 1, plength, stdout);
3102         }
3103 
3104       /* Not colouring or multiline; no need to search for further matches. */
3105 
3106       else FWRITE_IGNORE(ptr, 1, linelength + endlinelength, stdout);
3107       }
3108 
3109     /* End of doing what has to be done for a match. If --line-buffered was
3110     given, flush the output. */
3111 
3112     if (line_buffered) fflush(stdout);
3113     rc = 0;    /* Had some success */
3114 
3115     /* Remember where the last match happened for after_context. We remember
3116     where we are about to restart, and that line's number. */
3117 
3118     lastmatchrestart = ptr + linelength + endlinelength;
3119     lastmatchnumber = linenumber + 1;
3120 
3121     /* If a line was printed and we are now at the end of the file and the last
3122     line had no newline, output one. */
3123 
3124     if (lines_printed && lastmatchrestart >= endptr && endlinelength == 0)
3125       write_final_newline();
3126     }
3127 
3128   /* For a match in multiline inverted mode (which of course did not cause
3129   anything to be printed), we have to move on to the end of the match before
3130   proceeding. */
3131 
3132   if (multiline && invert && match)
3133     {
3134     int ellength;
3135     char *endmatch = ptr + offsets[1];
3136     t = ptr;
3137     while (t < endmatch)
3138       {
3139       t = end_of_line(t, endptr, &ellength);
3140       if (t <= endmatch) linenumber++; else break;
3141       }
3142     endmatch = end_of_line(endmatch, endptr, &ellength);
3143     linelength = endmatch - ptr - ellength;
3144     }
3145 
3146   /* Advance to after the newline and increment the line number. The file
3147   offset to the current line is maintained in filepos. */
3148 
3149   END_ONE_MATCH:
3150   ptr += linelength + endlinelength;
3151   filepos += (int)(linelength + endlinelength);
3152   linenumber++;
3153 
3154   /* If there was at least one match (or a non-match, as required) in the line,
3155   increment the count for the -m option. */
3156 
3157   if (line_matched) count_matched_lines++;
3158 
3159   /* If input is line buffered, and the buffer is not yet full, read another
3160   line and add it into the buffer. */
3161 
3162   if (input_line_buffered && bufflength < (PCRE2_SIZE)bufsize)
3163     {
3164     PCRE2_SIZE add = read_one_line(ptr, bufsize - (ptr - main_buffer), in);
3165     bufflength += add;
3166     endptr += add;
3167     }
3168 
3169   /* If we haven't yet reached the end of the file (the buffer is full), and
3170   the current point is in the top 1/3 of the buffer, slide the buffer down by
3171   1/3 and refill it. Before we do this, if some unprinted "after" lines are
3172   about to be lost, print them. */
3173 
3174   if (bufflength >= (PCRE2_SIZE)bufsize && ptr > main_buffer + 2*bufthird)
3175     {
3176     if (after_context > 0 &&
3177         lastmatchnumber > 0 &&
3178         lastmatchrestart < main_buffer + bufthird)
3179       {
3180       do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
3181       lastmatchnumber = 0;  /* Indicates no after lines pending */
3182       }
3183 
3184     /* Now do the shuffle */
3185 
3186     (void)memmove(main_buffer, main_buffer + bufthird, 2*bufthird);
3187     ptr -= bufthird;
3188 
3189     bufflength = 2*bufthird + fill_buffer(handle, frtype,
3190       main_buffer + 2*bufthird, bufthird, input_line_buffered);
3191     endptr = main_buffer + bufflength;
3192 
3193     /* Adjust any last match point */
3194 
3195     if (lastmatchnumber > 0) lastmatchrestart -= bufthird;
3196     }
3197   }     /* Loop through the whole file */
3198 
3199 /* End of file; print final "after" lines if wanted; do_after_lines sets
3200 hyphenpending if it prints something. */
3201 
3202 if (only_matching_count == 0 && !(count_only|show_total_count))
3203   {
3204   do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
3205   hyphenpending |= endhyphenpending;
3206   }
3207 
3208 /* Print the file name if we are looking for those without matches and there
3209 were none. If we found a match, we won't have got this far. */
3210 
3211 if (filenames == FN_NOMATCH_ONLY)
3212   {
3213   fprintf(stdout, "%s", printname);
3214   if (printname_nl == NULL) fprintf(stdout, "%c", 0);
3215     else fprintf(stdout, "%s", printname_nl);
3216   return 0;
3217   }
3218 
3219 /* Print the match count if wanted */
3220 
3221 if (count_only && !quiet)
3222   {
3223   if (count > 0 || !omit_zero_count)
3224     {
3225     if (printname != NULL && filenames != FN_NONE)
3226       fprintf(stdout, "%s%c", printname, printname_colon);
3227     fprintf(stdout, "%lu" STDOUT_NL, count);
3228     counts_printed++;
3229     }
3230   }
3231 
3232 total_count += count;   /* Can be set without count_only */
3233 return rc;
3234 }
3235 
3236 
3237 
3238 /*************************************************
3239 *     Grep a file or recurse into a directory    *
3240 *************************************************/
3241 
3242 /* Given a path name, if it's a directory, scan all the files if we are
3243 recursing; if it's a file, grep it.
3244 
3245 Arguments:
3246   pathname          the path to investigate
3247   dir_recurse       TRUE if recursing is wanted (-r or -drecurse)
3248   only_one_at_top   TRUE if the path is the only one at toplevel
3249 
3250 Returns:  -1 the file/directory was skipped
3251            0 if there was at least one match
3252            1 if there were no matches
3253            2 there was some kind of error
3254 
3255 However, file opening failures are suppressed if "silent" is set.
3256 */
3257 
3258 static int
grep_or_recurse(char * pathname,BOOL dir_recurse,BOOL only_one_at_top)3259 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
3260 {
3261 int rc = 1;
3262 int frtype;
3263 void *handle;
3264 char *lastcomp;
3265 FILE *in = NULL;           /* Ensure initialized */
3266 
3267 #ifdef SUPPORT_LIBZ
3268 gzFile ingz = NULL;
3269 #endif
3270 
3271 #ifdef SUPPORT_LIBBZ2
3272 BZFILE *inbz2 = NULL;
3273 #endif
3274 
3275 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
3276 int pathlen;
3277 #endif
3278 
3279 #if defined NATIVE_ZOS
3280 int zos_type;
3281 FILE *zos_test_file;
3282 #endif
3283 
3284 /* If the file name is "-" we scan stdin */
3285 
3286 if (strcmp(pathname, "-") == 0)
3287   {
3288   if (count_limit >= 0) setbuf(stdin, NULL);
3289   return pcre2grep(stdin, FR_PLAIN, stdin_name,
3290     (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
3291       stdin_name : NULL);
3292   }
3293 
3294 /* Inclusion and exclusion: --include-dir and --exclude-dir apply only to
3295 directories, whereas --include and --exclude apply to everything else. The test
3296 is against the final component of the path. */
3297 
3298 lastcomp = strrchr(pathname, FILESEP);
3299 lastcomp = (lastcomp == NULL)? pathname : lastcomp + 1;
3300 
3301 /* If the file is a directory, skip if not recursing or if explicitly excluded.
3302 Otherwise, scan the directory and recurse for each path within it. The scanning
3303 code is localized so it can be made system-specific. */
3304 
3305 
3306 /* For z/OS, determine the file type. */
3307 
3308 #if defined NATIVE_ZOS
3309 zos_test_file =  fopen(pathname,"rb");
3310 
3311 if (zos_test_file == NULL)
3312    {
3313    if (!silent) fprintf(stderr, "pcre2grep: failed to test next file %s\n",
3314      pathname, strerror(errno));
3315    return -1;
3316    }
3317 zos_type = identifyzosfiletype (zos_test_file);
3318 fclose (zos_test_file);
3319 
3320 /* Handle a PDS in separate code */
3321 
3322 if (zos_type == __ZOS_PDS || zos_type == __ZOS_PDSE)
3323    {
3324    return travelonpdsdir (pathname, only_one_at_top);
3325    }
3326 
3327 /* Deal with regular files in the normal way below. These types are:
3328    zos_type == __ZOS_PDS_MEMBER
3329    zos_type == __ZOS_PS
3330    zos_type == __ZOS_VSAM_KSDS
3331    zos_type == __ZOS_VSAM_ESDS
3332    zos_type == __ZOS_VSAM_RRDS
3333 */
3334 
3335 /* Handle a z/OS directory using common code. */
3336 
3337 else if (zos_type == __ZOS_HFS)
3338  {
3339 #endif  /* NATIVE_ZOS */
3340 
3341 
3342 /* Handle directories: common code for all OS */
3343 
3344 if (isdirectory(pathname))
3345   {
3346   if (dee_action == dee_SKIP ||
3347       !test_incexc(lastcomp, include_dir_patterns, exclude_dir_patterns))
3348     return -1;
3349 
3350   if (dee_action == dee_RECURSE)
3351     {
3352     char childpath[FNBUFSIZ];
3353     char *nextfile;
3354     directory_type *dir = opendirectory(pathname);
3355 
3356     if (dir == NULL)
3357       {
3358       /* LCOV_EXCL_START - this is a "never" event */
3359       if (!silent)
3360         fprintf(stderr, "pcre2grep: Failed to open directory %s: %s\n", pathname,
3361           strerror(errno));
3362       return 2;
3363       /* LCOV_EXCL_STOP */
3364       }
3365 
3366     while ((nextfile = readdirectory(dir)) != NULL)
3367       {
3368       int frc;
3369       int fnlength = strlen(pathname) + strlen(nextfile) + 2;
3370       if (fnlength > FNBUFSIZ)
3371         {
3372         /* LCOV_EXCL_START - this is a "never" event */
3373         fprintf(stderr, "pcre2grep: recursive filename is too long\n");
3374         rc = 2;
3375         break;
3376         /* LCOV_EXCL_STOP */
3377         }
3378       sprintf(childpath, "%s%c%s", pathname, FILESEP, nextfile);
3379 
3380       /* If the realpath() function is available, we can try to prevent endless
3381       recursion caused by a symlink pointing to a parent directory (GitHub
3382       issue #2 (old Bugzilla #2794). Original patch from Thomas Tempelmann.
3383       Modified to avoid using strlcat() because that isn't a standard C
3384       function, and also modified not to copy back the fully resolved path,
3385       because that affects the output from pcre2grep. */
3386 
3387 #ifdef HAVE_REALPATH
3388       {
3389       char resolvedpath[PATH_MAX];
3390       BOOL isSame;
3391       size_t rlen;
3392       if (realpath(childpath, resolvedpath) == NULL)
3393         /* LCOV_EXCL_START - this is a "never" event */
3394         continue;     /* This path is invalid - we can skip processing this */
3395         /* LCOV_EXCL_STOP */
3396       isSame = strcmp(pathname, resolvedpath) == 0;
3397       if (isSame) continue;    /* We have a recursion */
3398       rlen = strlen(resolvedpath);
3399       if (rlen++ < sizeof(resolvedpath) - 3)
3400         {
3401         BOOL contained;
3402         strcat(resolvedpath, "/");
3403         contained = strncmp(pathname, resolvedpath, rlen) == 0;
3404         if (contained) continue;    /* We have a recursion */
3405         }
3406       }
3407 #endif  /* HAVE_REALPATH */
3408 
3409       frc = grep_or_recurse(childpath, dir_recurse, FALSE);
3410       if (frc > 1) rc = frc;
3411        else if (frc == 0 && rc == 1) rc = 0;
3412       }
3413 
3414     closedirectory(dir);
3415     return rc;
3416     }
3417   }
3418 
3419 #ifdef WIN32
3420 if (iswild(pathname))
3421   {
3422   char buffer[1024];
3423   char *nextfile;
3424   char *name;
3425   directory_type *dir = opendirectory(pathname);
3426 
3427   if (dir == NULL)
3428     return 0;
3429 
3430   for (nextfile = name = pathname; *nextfile != 0; nextfile++)
3431     if (*nextfile == '/' || *nextfile == '\\')
3432       name = nextfile + 1;
3433   *name = 0;
3434 
3435   while ((nextfile = readdirectory(dir)) != NULL)
3436     {
3437     int frc;
3438     sprintf(buffer, "%.512s%.128s", pathname, nextfile);
3439     frc = grep_or_recurse(buffer, dir_recurse, FALSE);
3440     if (frc > 1) rc = frc;
3441      else if (frc == 0 && rc == 1) rc = 0;
3442     }
3443 
3444   closedirectory(dir);
3445   return rc;
3446   }
3447 #endif
3448 
3449 #if defined NATIVE_ZOS
3450  }
3451 #endif
3452 
3453 /* If the file is not a directory, check for a regular file, and if it is not,
3454 skip it if that's been requested. Otherwise, check for an explicit inclusion or
3455 exclusion. */
3456 
3457 else if (
3458 #if defined NATIVE_ZOS
3459         (zos_type == __ZOS_NOFILE && DEE_action == DEE_SKIP) ||
3460 #else  /* all other OS */
3461         (!isregfile(pathname) && DEE_action == DEE_SKIP) ||
3462 #endif
3463         !test_incexc(lastcomp, include_patterns, exclude_patterns))
3464   return -1;  /* File skipped */
3465 
3466 /* Control reaches here if we have a regular file, or if we have a directory
3467 and recursion or skipping was not requested, or if we have anything else and
3468 skipping was not requested. The scan proceeds. If this is the first and only
3469 argument at top level, we don't show the file name, unless we are only showing
3470 the file name, or the filename was forced (-H). */
3471 
3472 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
3473 pathlen = (int)(strlen(pathname));
3474 #endif
3475 
3476 /* Open using zlib if it is supported and the file name ends with .gz. */
3477 
3478 #ifdef SUPPORT_LIBZ
3479 if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
3480   {
3481   ingz = gzopen(pathname, "rb");
3482   if (ingz == NULL)
3483     {
3484     /* LCOV_EXCL_START */
3485     if (!silent)
3486       fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", pathname,
3487         strerror(errno));
3488     return 2;
3489     /* LCOV_EXCL_STOP */
3490     }
3491   handle = (void *)ingz;
3492   frtype = FR_LIBZ;
3493   }
3494 else
3495 #endif
3496 
3497 /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
3498 
3499 #ifdef SUPPORT_LIBBZ2
3500 if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
3501   {
3502   inbz2 = BZ2_bzopen(pathname, "rb");
3503   handle = (void *)inbz2;
3504   frtype = FR_LIBBZ2;
3505   }
3506 else
3507 #endif
3508 
3509 /* Otherwise use plain fopen(). The label is so that we can come back here if
3510 an attempt to read a .bz2 file indicates that it really is a plain file. */
3511 
3512 #ifdef SUPPORT_LIBBZ2
3513 PLAIN_FILE:
3514 #endif
3515   {
3516   in = fopen(pathname, "rb");
3517   handle = (void *)in;
3518   frtype = FR_PLAIN;
3519   }
3520 
3521 /* All the opening methods return errno when they fail. */
3522 
3523 if (handle == NULL)
3524   {
3525   if (!silent)
3526     fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", pathname,
3527       strerror(errno));
3528   return 2;
3529   }
3530 
3531 /* Now grep the file */
3532 
3533 rc = pcre2grep(handle, frtype, pathname, (filenames > FN_DEFAULT ||
3534   (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
3535 
3536 /* Close in an appropriate manner. */
3537 
3538 #ifdef SUPPORT_LIBZ
3539 if (frtype == FR_LIBZ)
3540   gzclose(ingz);
3541 else
3542 #endif
3543 
3544 /* If it is a .bz2 file and the result is 3, it means that the first attempt to
3545 read failed. If the error indicates that the file isn't in fact bzipped, try
3546 again as a normal file. */
3547 
3548 #ifdef SUPPORT_LIBBZ2
3549 if (frtype == FR_LIBBZ2)
3550   {
3551   if (rc == 3)
3552     {
3553     int errnum;
3554     const char *err = BZ2_bzerror(inbz2, &errnum);
3555     if (errnum == BZ_DATA_ERROR_MAGIC)
3556       {
3557       BZ2_bzclose(inbz2);
3558       goto PLAIN_FILE;
3559       }
3560     /* LCOV_EXCL_START */
3561     else if (!silent)
3562       fprintf(stderr, "pcre2grep: Failed to read %s using bzlib: %s\n",
3563         pathname, err);
3564     rc = 2;    /* The normal "something went wrong" code */
3565     /* LCOV_EXCL_STOP */
3566     }
3567   BZ2_bzclose(inbz2);
3568   }
3569 else
3570 #endif
3571 
3572 /* Normal file close */
3573 
3574 fclose(in);
3575 
3576 /* Pass back the yield from pcre2grep(). */
3577 
3578 return rc;
3579 }
3580 
3581 
3582 
3583 /*************************************************
3584 *          Handle a no-data option               *
3585 *************************************************/
3586 
3587 /* This is called when a known option has been identified. */
3588 
3589 static int
handle_option(int letter,int options)3590 handle_option(int letter, int options)
3591 {
3592 switch(letter)
3593   {
3594   case N_FOFFSETS: file_offsets = TRUE; break;
3595   case N_HELP: help(); pcre2grep_exit(0); break; /* Stops compiler warning */
3596   case N_LBUFFER: line_buffered = TRUE; break;
3597   case N_LOFFSETS: line_offsets = number = TRUE; break;
3598   case N_NOJIT: use_jit = FALSE; break;
3599   case N_ALLABSK: extra_options |= PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK; break;
3600   case N_NO_GROUP_SEPARATOR: group_separator = NULL; break;
3601   case 'a': binary_files = BIN_TEXT; break;
3602   case 'c': count_only = TRUE; break;
3603   case N_POSIX_DIGIT: posix_digit = TRUE; break;
3604   case 'E': case_restrict = TRUE; break;
3605   case 'F': options |= PCRE2_LITERAL; break;
3606   case 'H': filenames = FN_FORCE; break;
3607   case 'I': binary_files = BIN_NOMATCH; break;
3608   case 'h': filenames = FN_NONE; break;
3609   case 'i': options |= PCRE2_CASELESS; break;
3610   case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
3611   case 'L': filenames = FN_NOMATCH_ONLY; break;
3612   case 'M': multiline = TRUE; options |= PCRE2_MULTILINE|PCRE2_FIRSTLINE; break;
3613   case 'n': number = TRUE; break;
3614 
3615   case 'o':
3616   only_matching_last = add_number(0, only_matching_last);
3617   if (only_matching == NULL) only_matching = only_matching_last;
3618   break;
3619 
3620   case 'P': no_ucp = TRUE; break;
3621   case 'q': quiet = TRUE; break;
3622   case 'r': dee_action = dee_RECURSE; break;
3623   case 's': silent = TRUE; break;
3624   case 't': show_total_count = TRUE; break;
3625   case 'u': options |= PCRE2_UTF | PCRE2_UCP; utf = TRUE; break;
3626   case 'U': options |= PCRE2_UTF | PCRE2_MATCH_INVALID_UTF | PCRE2_UCP;
3627             utf = TRUE; break;
3628   case 'v': invert = TRUE; break;
3629 
3630   case 'V':
3631     {
3632     unsigned char buffer[128];
3633     (void)pcre2_config(PCRE2_CONFIG_VERSION, buffer);
3634     fprintf(stdout, "pcre2grep version %s" STDOUT_NL, buffer);
3635     }
3636   pcre2grep_exit(0);
3637   break;  /* LCOV_EXCL_LINE - statement kept to avoid compiler warning */
3638 
3639   case 'w': extra_options |= PCRE2_EXTRA_MATCH_WORD; break;
3640   case 'x': extra_options |= PCRE2_EXTRA_MATCH_LINE; break;
3641   case 'Z': printname_colon = printname_hyphen = 0; printname_nl = NULL; break;
3642 
3643   /* LCOV_EXCL_START - this is a "never event" */
3644   default:
3645   fprintf(stderr, "pcre2grep: Unknown option -%c\n", letter);
3646   pcre2grep_exit(usage(2));
3647   /* LCOV_EXCL_STOP */
3648   }
3649 
3650 return options;
3651 }
3652 
3653 
3654 
3655 /*************************************************
3656 *          Construct printed ordinal             *
3657 *************************************************/
3658 
3659 /* This turns a number into "1st", "3rd", etc. */
3660 
3661 static char *
ordin(int n)3662 ordin(int n)
3663 {
3664 static char buffer[14];
3665 char *p = buffer;
3666 sprintf(p, "%d", n);
3667 while (*p != 0) p++;
3668 n %= 100;
3669 if (n >= 11 && n <= 13) n = 0;
3670 switch (n%10)
3671   {
3672   case 1: strcpy(p, "st"); break;
3673   case 2: strcpy(p, "nd"); break;
3674   case 3: strcpy(p, "rd"); break;
3675   default: strcpy(p, "th"); break;
3676   }
3677 return buffer;
3678 }
3679 
3680 
3681 
3682 /*************************************************
3683 *          Compile a single pattern              *
3684 *************************************************/
3685 
3686 /* Do nothing if the pattern has already been compiled. This is the case for
3687 include/exclude patterns read from a file.
3688 
3689 When the -F option has been used, each "pattern" may be a list of strings,
3690 separated by line breaks. They will be matched literally. We split such a
3691 string and compile the first substring, inserting an additional block into the
3692 pattern chain.
3693 
3694 Arguments:
3695   p              points to the pattern block
3696   options        the PCRE options
3697   fromfile       TRUE if the pattern was read from a file
3698   fromtext       file name or identifying text (e.g. "include")
3699   count          0 if this is the only command line pattern, or
3700                  number of the command line pattern, or
3701                  linenumber for a pattern from a file
3702 
3703 Returns:         TRUE on success, FALSE after an error
3704 */
3705 
3706 static BOOL
compile_pattern(patstr * p,int options,int fromfile,const char * fromtext,int count)3707 compile_pattern(patstr *p, int options, int fromfile, const char *fromtext,
3708   int count)
3709 {
3710 char *ps;
3711 int errcode;
3712 PCRE2_SIZE patlen, erroffset;
3713 PCRE2_UCHAR errmessbuffer[ERRBUFSIZ];
3714 
3715 if (p->compiled != NULL) return TRUE;
3716 ps = p->string;
3717 patlen = p->length;
3718 
3719 if ((options & PCRE2_LITERAL) != 0)
3720   {
3721   int ellength;
3722   char *eop = ps + patlen;
3723   char *pe = end_of_line(ps, eop, &ellength);
3724 
3725   if (ellength != 0)
3726     {
3727     patlen = pe - ps - ellength;
3728     if (add_pattern(pe, p->length-patlen-ellength, p) == NULL) return FALSE;
3729     }
3730   }
3731 
3732 p->compiled = pcre2_compile((PCRE2_SPTR)ps, patlen, options, &errcode,
3733   &erroffset, compile_context);
3734 
3735 /* Handle successful compile. Try JIT-compiling if supported and enabled. We
3736 ignore any JIT compiler errors, relying falling back to interpreting if
3737 anything goes wrong with JIT. */
3738 
3739 if (p->compiled != NULL)
3740   {
3741 #ifdef SUPPORT_PCRE2GREP_JIT
3742   if (use_jit) (void)pcre2_jit_compile(p->compiled, PCRE2_JIT_COMPLETE);
3743 #endif
3744   return TRUE;
3745   }
3746 
3747 /* Handle compile errors */
3748 
3749 if (erroffset > patlen) erroffset = patlen;
3750 pcre2_get_error_message(errcode, errmessbuffer, sizeof(errmessbuffer));
3751 
3752 if (fromfile)
3753   {
3754   fprintf(stderr, "pcre2grep: Error in regex in line %d of %s "
3755     "at offset %d: %s\n", count, fromtext, (int)erroffset, errmessbuffer);
3756   }
3757 else
3758   {
3759   if (count == 0)
3760     fprintf(stderr, "pcre2grep: Error in %s regex at offset %d: %s\n",
3761       fromtext, (int)erroffset, errmessbuffer);
3762   else
3763     fprintf(stderr, "pcre2grep: Error in %s %s regex at offset %d: %s\n",
3764       ordin(count), fromtext, (int)erroffset, errmessbuffer);
3765   }
3766 
3767 return FALSE;
3768 }
3769 
3770 
3771 
3772 /*************************************************
3773 *     Read and compile a file of patterns        *
3774 *************************************************/
3775 
3776 /* This is used for --filelist, --include-from, and --exclude-from.
3777 
3778 Arguments:
3779   name         the name of the file; "-" is stdin
3780   patptr       pointer to the pattern chain anchor
3781   patlastptr   pointer to the last pattern pointer
3782 
3783 Returns:       TRUE if all went well
3784 */
3785 
3786 static BOOL
read_pattern_file(char * name,patstr ** patptr,patstr ** patlastptr)3787 read_pattern_file(char *name, patstr **patptr, patstr **patlastptr)
3788 {
3789 int linenumber = 0;
3790 PCRE2_SIZE patlen;
3791 FILE *f;
3792 const char *filename;
3793 char buffer[MAXPATLEN+20];
3794 
3795 if (strcmp(name, "-") == 0)
3796   {
3797   f = stdin;
3798   filename = stdin_name;
3799   }
3800 else
3801   {
3802   f = fopen(name, "r");
3803   if (f == NULL)
3804     {
3805     fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", name, strerror(errno));
3806     return FALSE;
3807     }
3808   filename = name;
3809   }
3810 
3811 while ((patlen = read_one_line(buffer, sizeof(buffer), f)) > 0)
3812   {
3813   while (patlen > 0 && isspace((unsigned char)(buffer[patlen-1]))) patlen--;
3814   linenumber++;
3815   if (patlen == 0) continue;   /* Skip blank lines */
3816 
3817   /* Note: this call to add_pattern() puts a pointer to the local variable
3818   "buffer" into the pattern chain. However, that pointer is used only when
3819   compiling the pattern, which happens immediately below, so we flatten it
3820   afterwards, as a precaution against any later code trying to use it. */
3821 
3822   *patlastptr = add_pattern(buffer, patlen, *patlastptr);
3823   if (*patlastptr == NULL)
3824     {
3825     /* LCOV_EXCL_START - won't happen in testing */
3826     if (f != stdin) fclose(f);
3827     return FALSE;
3828     /* LCOV_EXCL_STOP */
3829     }
3830   if (*patptr == NULL) *patptr = *patlastptr;
3831 
3832   /* This loop is needed because compiling a "pattern" when -F is set may add
3833   on additional literal patterns if the original contains a newline. In the
3834   common case, it never will, because read_one_line() stops at a newline.
3835   However, the -N option can be used to give pcre2grep a different newline
3836   setting. */
3837 
3838   for(;;)
3839     {
3840     if (!compile_pattern(*patlastptr, pcre2_options, TRUE, filename,
3841         linenumber))
3842       {
3843       if (f != stdin) fclose(f);
3844       return FALSE;
3845       }
3846     (*patlastptr)->string = NULL;            /* Insurance */
3847     if ((*patlastptr)->next == NULL) break;
3848     *patlastptr = (*patlastptr)->next;
3849     }
3850   }
3851 
3852 if (f != stdin) fclose(f);
3853 return TRUE;
3854 }
3855 
3856 
3857 
3858 /*************************************************
3859 *                Main program                    *
3860 *************************************************/
3861 
3862 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
3863 
3864 int
main(int argc,char ** argv)3865 main(int argc, char **argv)
3866 {
3867 int i, j;
3868 int rc = 1;
3869 BOOL only_one_at_top;
3870 patstr *cp;
3871 fnstr *fn;
3872 omstr *om;
3873 const char *locale_from = "--locale";
3874 
3875 #ifdef SUPPORT_PCRE2GREP_JIT
3876 pcre2_jit_stack *jit_stack = NULL;
3877 #endif
3878 
3879 /* In Windows, stdout is set up as a text stream, which means that \n is
3880 converted to \r\n. This causes output lines that are copied from the input to
3881 change from ....\r\n to ....\r\r\n, which is not right. We therefore ensure
3882 that stdout is a binary stream. Note that this means all other output to stdout
3883 must use STDOUT_NL to terminate lines. */
3884 
3885 #ifdef WIN32
3886 _setmode(_fileno(stdout), _O_BINARY);
3887 #endif
3888 
3889 /* Process the options */
3890 
3891 for (i = 1; i < argc; i++)
3892   {
3893   option_item *op = NULL;
3894   char *option_data = (char *)"";    /* default to keep compiler happy */
3895   BOOL longop;
3896   BOOL longopwasequals = FALSE;
3897 
3898   if (argv[i][0] != '-') break;
3899 
3900   /* If we hit an argument that is just "-", it may be a reference to STDIN,
3901   but only if we have previously had -e or -f to define the patterns. */
3902 
3903   if (argv[i][1] == 0)
3904     {
3905     if (pattern_files != NULL || patterns != NULL) break;
3906       else pcre2grep_exit(usage(2));
3907     }
3908 
3909   /* Handle a long name option, or -- to terminate the options */
3910 
3911   if (argv[i][1] == '-')
3912     {
3913     char *arg = argv[i] + 2;
3914     char *argequals = strchr(arg, '=');
3915 
3916     if (*arg == 0)    /* -- terminates options */
3917       {
3918       i++;
3919       break;                /* out of the options-handling loop */
3920       }
3921 
3922     longop = TRUE;
3923 
3924     /* Some long options have data that follows after =, for example file=name.
3925     Some options have variations in the long name spelling: specifically, we
3926     allow "regexp" because GNU grep allows it, though I personally go along
3927     with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
3928     These options are entered in the table as "regex(p)". Options can be in
3929     both these categories. */
3930 
3931     for (op = optionlist; op->one_char != 0; op++)
3932       {
3933       char *opbra = strchr(op->long_name, '(');
3934       char *equals = strchr(op->long_name, '=');
3935 
3936       /* Handle options with only one spelling of the name */
3937 
3938       if (opbra == NULL)     /* Does not contain '(' */
3939         {
3940         if (equals == NULL)  /* Not thing=data case */
3941           {
3942           if (strcmp(arg, op->long_name) == 0) break;
3943           }
3944         else                 /* Special case xxx=data */
3945           {
3946           int oplen = (int)(equals - op->long_name);
3947           int arglen = (argequals == NULL)?
3948             (int)strlen(arg) : (int)(argequals - arg);
3949           if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
3950             {
3951             option_data = arg + arglen;
3952             if (*option_data == '=')
3953               {
3954               option_data++;
3955               longopwasequals = TRUE;
3956               }
3957             break;
3958             }
3959           }
3960         }
3961 
3962       /* Handle options with an alternate spelling of the name */
3963 
3964       else
3965         {
3966         char buff1[24];
3967         char buff2[24];
3968         int ret;
3969 
3970         int baselen = (int)(opbra - op->long_name);
3971         int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
3972         int arglen = (argequals == NULL || equals == NULL)?
3973           (int)strlen(arg) : (int)(argequals - arg);
3974 
3975         if ((ret = snprintf(buff1, sizeof(buff1), "%.*s", baselen, op->long_name),
3976              ret < 0 || ret > (int)sizeof(buff1)) ||
3977             (ret = snprintf(buff2, sizeof(buff2), "%s%.*s", buff1,
3978                      fulllen - baselen - 2, opbra + 1),
3979              ret < 0 || ret > (int)sizeof(buff2)))
3980           {
3981           /* LCOV_EXCL_START - this is a "never" event */
3982           fprintf(stderr, "pcre2grep: Buffer overflow when parsing %s option\n",
3983             op->long_name);
3984           pcre2grep_exit(2);
3985           /* LCOV_EXCL_STOP */
3986           }
3987 
3988         if (strncmp(arg, buff1, arglen) == 0 ||
3989            strncmp(arg, buff2, arglen) == 0)
3990           {
3991           if (equals != NULL && argequals != NULL)
3992             {
3993             option_data = argequals;
3994             if (*option_data == '=')
3995               {
3996               option_data++;
3997               longopwasequals = TRUE;
3998               }
3999             }
4000           break;
4001           }
4002         }
4003       }
4004 
4005     if (op->one_char == 0)
4006       {
4007       fprintf(stderr, "pcre2grep: Unknown option %s\n", argv[i]);
4008       pcre2grep_exit(usage(2));
4009       }
4010     }
4011 
4012   /* One-char options; many that have no data may be in a single argument; we
4013   continue till we hit the last one or one that needs data. */
4014 
4015   else
4016     {
4017     char *s = argv[i] + 1;
4018     longop = FALSE;
4019 
4020     while (*s != 0)
4021       {
4022       for (op = optionlist; op->one_char != 0; op++)
4023         {
4024         if (*s == op->one_char) break;
4025         }
4026       if (op->one_char == 0)
4027         {
4028         fprintf(stderr, "pcre2grep: Unknown option letter '%c' in \"%s\"\n",
4029           *s, argv[i]);
4030         pcre2grep_exit(usage(2));
4031         }
4032 
4033       option_data = s+1;
4034 
4035       /* Break out if this is the last character in the string; it's handled
4036       below like a single multi-char option. */
4037 
4038       if (*option_data == 0) break;
4039 
4040       /* Check for a single-character option that has data: OP_OP_NUMBER(S)
4041       are used for ones that either have a numerical number or defaults, i.e.
4042       the data is optional. If a digit follows, there is data; if not, carry on
4043       with other single-character options in the same string. */
4044 
4045       if (op->type == OP_OP_NUMBER || op->type == OP_OP_NUMBERS)
4046         {
4047         if (isdigit((unsigned char)(s[1]))) break;
4048         }
4049       else   /* Check for an option with data */
4050         {
4051         if (op->type != OP_NODATA) break;
4052         }
4053 
4054       /* Handle a single-character option with no data, then loop for the
4055       next character in the string. */
4056 
4057       pcre2_options = handle_option(*s++, pcre2_options);
4058       }
4059     }
4060 
4061   /* At this point we should have op pointing to a matched option. If the type
4062   is NO_DATA, it means that there is no data, and the option might set
4063   something in the PCRE options. */
4064 
4065   if (op->type == OP_NODATA)
4066     {
4067     pcre2_options = handle_option(op->one_char, pcre2_options);
4068     continue;
4069     }
4070 
4071   /* If the option type is OP_OP_STRING or OP_OP_NUMBER(S), it's an option that
4072   either has a value or defaults to something. It cannot have data in a
4073   separate item. At the moment, the only such options are "colo(u)r",
4074   and "only-matching". */
4075 
4076   if (*option_data == 0 &&
4077       (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER ||
4078        op->type == OP_OP_NUMBERS))
4079     {
4080     switch (op->one_char)
4081       {
4082       case N_COLOUR:
4083       colour_option = "auto";
4084       break;
4085 
4086       case 'o':
4087       only_matching_last = add_number(0, only_matching_last);
4088       if (only_matching == NULL) only_matching = only_matching_last;
4089       break;
4090       }
4091     continue;
4092     }
4093 
4094   /* Otherwise, find the data string for the option. */
4095 
4096   if (*option_data == 0)
4097     {
4098     if (i >= argc - 1 || longopwasequals)
4099       {
4100       fprintf(stderr, "pcre2grep: Data missing after %s\n", argv[i]);
4101       pcre2grep_exit(usage(2));
4102       }
4103     option_data = argv[++i];
4104     }
4105 
4106   /* If the option type is OP_OP_NUMBERS, the value is a number that is to be
4107   added to a chain of numbers. */
4108 
4109   if (op->type == OP_OP_NUMBERS)
4110     {
4111     unsigned long int n = decode_number(option_data, op, longop);
4112     omdatastr *omd = (omdatastr *)op->dataptr;
4113     *(omd->lastptr) = add_number((int)n, *(omd->lastptr));
4114     if (*(omd->anchor) == NULL) *(omd->anchor) = *(omd->lastptr);
4115     }
4116 
4117   /* If the option type is OP_PATLIST, it's the -e option, or one of the
4118   include/exclude options, which can be called multiple times to create lists
4119   of patterns. */
4120 
4121   else if (op->type == OP_PATLIST)
4122     {
4123     patdatastr *pd = (patdatastr *)op->dataptr;
4124     *(pd->lastptr) = add_pattern(option_data, (PCRE2_SIZE)strlen(option_data),
4125       *(pd->lastptr));
4126     if (*(pd->lastptr) == NULL) goto EXIT2;
4127     if (*(pd->anchor) == NULL) *(pd->anchor) = *(pd->lastptr);
4128     }
4129 
4130   /* If the option type is OP_FILELIST, it's one of the options that names a
4131   file. */
4132 
4133   else if (op->type == OP_FILELIST)
4134     {
4135     fndatastr *fd = (fndatastr *)op->dataptr;
4136     fn = (fnstr *)malloc(sizeof(fnstr));
4137     if (fn == NULL)
4138       {
4139       /* LCOV_EXCL_START */
4140       fprintf(stderr, "pcre2grep: malloc failed\n");
4141       goto EXIT2;
4142       /* LCOV_EXCL_STOP */
4143       }
4144     fn->next = NULL;
4145     fn->name = option_data;
4146     if (*(fd->anchor) == NULL)
4147       *(fd->anchor) = fn;
4148     else
4149       (*(fd->lastptr))->next = fn;
4150     *(fd->lastptr) = fn;
4151     }
4152 
4153   /* Handle OP_BINARY_FILES */
4154 
4155   else if (op->type == OP_BINFILES)
4156     {
4157     if (strcmp(option_data, "binary") == 0)
4158       binary_files = BIN_BINARY;
4159     else if (strcmp(option_data, "without-match") == 0)
4160       binary_files = BIN_NOMATCH;
4161     else if (strcmp(option_data, "text") == 0)
4162       binary_files = BIN_TEXT;
4163     else
4164       {
4165       fprintf(stderr, "pcre2grep: unknown value \"%s\" for binary-files\n",
4166         option_data);
4167       pcre2grep_exit(usage(2));
4168       }
4169     }
4170 
4171   /* Otherwise, deal with a single string or numeric data value. */
4172 
4173   else if (op->type != OP_NUMBER && op->type != OP_U32NUMBER &&
4174            op->type != OP_OP_NUMBER && op->type != OP_SIZE)
4175     {
4176     *((char **)op->dataptr) = option_data;
4177     }
4178   else
4179     {
4180     unsigned long int n = decode_number(option_data, op, longop);
4181     if (op->type == OP_U32NUMBER) *((uint32_t *)op->dataptr) = n;
4182       else if (op->type == OP_SIZE) *((PCRE2_SIZE *)op->dataptr) = n;
4183       else *((int *)op->dataptr) = n;
4184     }
4185   }
4186 
4187 /* Options have been decoded. If -C was used, its value is used as a default
4188 for -A and -B. */
4189 
4190 if (both_context > 0)
4191   {
4192   if (after_context == 0) after_context = both_context;
4193   if (before_context == 0) before_context = both_context;
4194   }
4195 
4196 /* Only one of --only-matching, --output, --file-offsets, or --line-offsets is
4197 permitted. They display, each in their own way, only the data that has matched.
4198 */
4199 
4200 only_matching_count = (only_matching != NULL) + (output_text != NULL) +
4201   file_offsets + line_offsets;
4202 
4203 if (only_matching_count > 1)
4204   {
4205   fprintf(stderr, "pcre2grep: Cannot mix --only-matching, --output, "
4206     "--file-offsets and/or --line-offsets\n");
4207   pcre2grep_exit(usage(2));
4208   }
4209 
4210 /* Check that there is a big enough ovector for all -o settings. */
4211 
4212 for (om = only_matching; om != NULL; om = om->next)
4213   {
4214   int n = om->groupnum;
4215   if (n > (int)capture_max)
4216     {
4217     fprintf(stderr, "pcre2grep: Requested group %d cannot be captured.\n", n);
4218     fprintf(stderr, "pcre2grep: Use --om-capture to increase the size of the capture vector.\n");
4219     goto EXIT2;
4220     }
4221   }
4222 
4223 /* Check the text supplied to --output for errors. */
4224 
4225 if (output_text != NULL &&
4226     !syntax_check_output_text((PCRE2_SPTR)output_text, FALSE))
4227   goto EXIT2;
4228 
4229 /* Set up default compile and match contexts and match data blocks. */
4230 
4231 offset_size = capture_max + 1;
4232 compile_context = pcre2_compile_context_create(NULL);
4233 match_context = pcre2_match_context_create(NULL);
4234 match_data_pair[0] = pcre2_match_data_create(offset_size, NULL);
4235 match_data_pair[1] = pcre2_match_data_create(offset_size, NULL);
4236 offsets_pair[0] = pcre2_get_ovector_pointer(match_data_pair[0]);
4237 offsets_pair[1] = pcre2_get_ovector_pointer(match_data_pair[1]);
4238 match_data = match_data_pair[0];
4239 offsets = offsets_pair[0];
4240 match_data_toggle = 0;
4241 
4242 /* If string (script) callouts are supported, set up the callout processing
4243 function in the match context. */
4244 
4245 #ifdef SUPPORT_PCRE2GREP_CALLOUT
4246 pcre2_set_callout(match_context, pcre2grep_callout, NULL);
4247 #endif
4248 
4249 /* Put limits into the match context. */
4250 
4251 if (heap_limit != PCRE2_UNSET) pcre2_set_heap_limit(match_context, heap_limit);
4252 if (match_limit > 0) pcre2_set_match_limit(match_context, match_limit);
4253 if (depth_limit > 0) pcre2_set_depth_limit(match_context, depth_limit);
4254 
4255 /* If a locale has not been provided as an option, see if the LC_CTYPE or
4256 LC_ALL environment variable is set, and if so, use it. */
4257 
4258 if (locale == NULL)
4259   {
4260   locale = getenv("LC_ALL");
4261   locale_from = "LC_ALL";
4262   }
4263 
4264 if (locale == NULL)
4265   {
4266   locale = getenv("LC_CTYPE");
4267   locale_from = "LC_CTYPE";
4268   }
4269 
4270 /* If a locale is set, use it to generate the tables the PCRE needs. Passing
4271 NULL to pcre2_maketables() means that malloc() is used to get the memory. */
4272 
4273 if (locale != NULL)
4274   {
4275   if (setlocale(LC_CTYPE, locale) == NULL)
4276     {
4277     fprintf(stderr, "pcre2grep: Failed to set locale %s (obtained from %s)\n",
4278       locale, locale_from);
4279     goto EXIT2;
4280     }
4281   character_tables = pcre2_maketables(NULL);
4282   pcre2_set_character_tables(compile_context, character_tables);
4283   }
4284 
4285 /* Sort out colouring */
4286 
4287 if (colour_option != NULL && strcmp(colour_option, "never") != 0)
4288   {
4289   if (strcmp(colour_option, "always") == 0)
4290 #ifdef WIN32
4291     do_ansi = !is_stdout_tty(),
4292 #endif
4293     do_colour = TRUE;
4294   else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
4295   else
4296     {
4297     fprintf(stderr, "pcre2grep: Unknown colour setting \"%s\"\n",
4298       colour_option);
4299     goto EXIT2;
4300     }
4301   if (do_colour)
4302     {
4303     char *cs = getenv("PCRE2GREP_COLOUR");
4304     if (cs == NULL) cs = getenv("PCRE2GREP_COLOR");
4305     if (cs == NULL) cs = getenv("PCREGREP_COLOUR");
4306     if (cs == NULL) cs = getenv("PCREGREP_COLOR");
4307     if (cs == NULL) cs = parse_grep_colors(getenv("GREP_COLORS"));
4308     if (cs == NULL) cs = getenv("GREP_COLOR");
4309     if (cs != NULL)
4310       {
4311       if (strspn(cs, ";0123456789") == strlen(cs)) colour_string = cs;
4312       }
4313 #ifdef WIN32
4314     init_colour_output();
4315 #endif
4316     }
4317   }
4318 
4319 /* When colouring or otherwise identifying matching substrings, we need to find
4320 all possible matches when there are multiple patterns. */
4321 
4322 all_matches = do_colour || only_matching_count != 0;
4323 
4324 /* Sort out a newline setting. */
4325 
4326 if (newline_arg != NULL)
4327   {
4328   for (endlinetype = 1; endlinetype < (int)(sizeof(newlines)/sizeof(char *));
4329        endlinetype++)
4330     {
4331     if (strcmpic(newline_arg, newlines[endlinetype]) == 0) break;
4332     }
4333   if (endlinetype < (int)(sizeof(newlines)/sizeof(char *)))
4334     pcre2_set_newline(compile_context, endlinetype);
4335   else
4336     {
4337     fprintf(stderr, "pcre2grep: Invalid newline specifier \"%s\"\n",
4338       newline_arg);
4339     goto EXIT2;
4340     }
4341   }
4342 
4343 /* Find default newline convention */
4344 
4345 else
4346   {
4347   (void)pcre2_config(PCRE2_CONFIG_NEWLINE, &endlinetype);
4348   }
4349 
4350 /* Interpret the text values for -d and -D */
4351 
4352 if (dee_option != NULL)
4353   {
4354   if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
4355   else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
4356   else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
4357   else
4358     {
4359     fprintf(stderr, "pcre2grep: Invalid value \"%s\" for -d\n", dee_option);
4360     goto EXIT2;
4361     }
4362   }
4363 
4364 if (DEE_option != NULL)
4365   {
4366   if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
4367   else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
4368   else
4369     {
4370     fprintf(stderr, "pcre2grep: Invalid value \"%s\" for -D\n", DEE_option);
4371     goto EXIT2;
4372     }
4373   }
4374 
4375 /* If no_ucp is set, remove PCRE2_UCP from the compile options. */
4376 
4377 if (no_ucp) pcre2_options &= ~PCRE2_UCP;
4378 
4379 /* adjust the extra options. */
4380 
4381 if (case_restrict) extra_options |= PCRE2_EXTRA_CASELESS_RESTRICT;
4382 if (posix_digit)
4383   extra_options |= (PCRE2_EXTRA_ASCII_BSD | PCRE2_EXTRA_ASCII_DIGIT);
4384 
4385 /* Set the extra options in the compile context. */
4386 
4387 (void)pcre2_set_compile_extra_options(compile_context, extra_options);
4388 
4389 /* If use_jit is set, check whether JIT is available. If not, do not try
4390 to use JIT. */
4391 
4392 if (use_jit)
4393   {
4394   uint32_t answer;
4395   (void)pcre2_config(PCRE2_CONFIG_JIT, &answer);
4396   if (!answer) use_jit = FALSE;
4397   }
4398 
4399 /* Get memory for the main buffer. */
4400 
4401 if (bufthird <= 0)
4402   {
4403   fprintf(stderr, "pcre2grep: --buffer-size must be greater than zero\n");
4404   goto EXIT2;
4405   }
4406 
4407 bufsize = 3*bufthird;
4408 main_buffer = (char *)malloc(bufsize);
4409 
4410 if (main_buffer == NULL)
4411   {
4412   /* LCOV_EXCL_START */
4413   fprintf(stderr, "pcre2grep: malloc failed\n");
4414   goto EXIT2;
4415   /* LCOV_EXCL_STOP */
4416   }
4417 
4418 /* If no patterns were provided by -e, and there are no files provided by -f,
4419 the first argument is the one and only pattern, and it must exist. */
4420 
4421 if (patterns == NULL && pattern_files == NULL)
4422   {
4423   if (i >= argc) return usage(2);
4424   patterns = patterns_last = add_pattern(argv[i], (PCRE2_SIZE)strlen(argv[i]),
4425     NULL);
4426   i++;
4427   if (patterns == NULL) goto EXIT2;
4428   }
4429 
4430 /* Compile the patterns that were provided on the command line, either by
4431 multiple uses of -e or as a single unkeyed pattern. We cannot do this until
4432 after all the command-line options are read so that we know which PCRE options
4433 to use. When -F is used, compile_pattern() may add another block into the
4434 chain, so we must not access the next pointer till after the compile. */
4435 
4436 for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
4437   {
4438   if (!compile_pattern(cp, pcre2_options, FALSE, "command-line",
4439        (j == 1 && patterns->next == NULL)? 0 : j))
4440     goto EXIT2;
4441   }
4442 
4443 /* Read and compile the regular expressions that are provided in files. */
4444 
4445 for (fn = pattern_files; fn != NULL; fn = fn->next)
4446   {
4447   if (!read_pattern_file(fn->name, &patterns, &patterns_last)) goto EXIT2;
4448   }
4449 
4450 /* Unless JIT has been explicitly disabled, arrange a stack for it to use. */
4451 
4452 #ifdef SUPPORT_PCRE2GREP_JIT
4453 if (use_jit)
4454   {
4455   jit_stack = pcre2_jit_stack_create(32*1024, 1024*1024, NULL);
4456   if (jit_stack != NULL                        )
4457     pcre2_jit_stack_assign(match_context, NULL, jit_stack);
4458   }
4459 #endif
4460 
4461 /* -F, -w, and -x do not apply to include or exclude patterns, so we must
4462 adjust the options. */
4463 
4464 pcre2_options &= ~PCRE2_LITERAL;
4465 (void)pcre2_set_compile_extra_options(compile_context, 0);
4466 
4467 /* If there are include or exclude patterns read from the command line, compile
4468 them. */
4469 
4470 for (j = 0; j < 4; j++)
4471   {
4472   int k;
4473   for (k = 1, cp = *(incexlist[j]); cp != NULL; k++, cp = cp->next)
4474     {
4475     if (!compile_pattern(cp, pcre2_options, FALSE, incexname[j],
4476          (k == 1 && cp->next == NULL)? 0 : k))
4477       goto EXIT2;
4478     }
4479   }
4480 
4481 /* Read and compile include/exclude patterns from files. */
4482 
4483 for (fn = include_from; fn != NULL; fn = fn->next)
4484   {
4485   if (!read_pattern_file(fn->name, &include_patterns, &include_patterns_last))
4486     goto EXIT2;
4487   }
4488 
4489 for (fn = exclude_from; fn != NULL; fn = fn->next)
4490   {
4491   if (!read_pattern_file(fn->name, &exclude_patterns, &exclude_patterns_last))
4492     goto EXIT2;
4493   }
4494 
4495 /* If there are no files that contain lists of files to search, and there are
4496 no file arguments, search stdin, and then exit. */
4497 
4498 if (file_lists == NULL && i >= argc)
4499   {
4500   /* Using a buffered stdin, that then is seek is not portable,
4501      so attempt to remove the buffer, to workaround reported issues
4502      affecting several BSD and AIX */
4503   if (count_limit >= 0)
4504     setbuf(stdin, NULL);
4505   rc = pcre2grep(stdin, FR_PLAIN, stdin_name,
4506     (filenames > FN_DEFAULT)? stdin_name : NULL);
4507   goto EXIT;
4508   }
4509 
4510 /* If any files that contains a list of files to search have been specified,
4511 read them line by line and search the given files. */
4512 
4513 for (fn = file_lists; fn != NULL; fn = fn->next)
4514   {
4515   char buffer[FNBUFSIZ];
4516   FILE *fl;
4517   if (strcmp(fn->name, "-") == 0) fl = stdin; else
4518     {
4519     fl = fopen(fn->name, "rb");
4520     if (fl == NULL)
4521       {
4522       fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", fn->name,
4523         strerror(errno));
4524       goto EXIT2;
4525       }
4526     }
4527   while (fgets(buffer, sizeof(buffer), fl) != NULL)
4528     {
4529     int frc;
4530     char *end = buffer + (int)strlen(buffer);
4531     while (end > buffer && isspace((unsigned char)(end[-1]))) end--;
4532     *end = 0;
4533     if (*buffer != 0)
4534       {
4535       frc = grep_or_recurse(buffer, dee_action == dee_RECURSE, FALSE);
4536       if (frc > 1) rc = frc;
4537         else if (frc == 0 && rc == 1) rc = 0;
4538       }
4539     }
4540   if (fl != stdin) fclose(fl);
4541   }
4542 
4543 /* After handling file-list, work through remaining arguments. Pass in the fact
4544 that there is only one argument at top level - this suppresses the file name if
4545 the argument is not a directory and filenames are not otherwise forced. */
4546 
4547 only_one_at_top = i == argc - 1 && file_lists == NULL;
4548 
4549 for (; i < argc; i++)
4550   {
4551   int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
4552     only_one_at_top);
4553   if (frc > 1) rc = frc;
4554     else if (frc == 0 && rc == 1) rc = 0;
4555   }
4556 
4557 /* Show the total number of matches if requested, but not if only one file's
4558 count was printed. */
4559 
4560 if (show_total_count && counts_printed != 1 && filenames != FN_NOMATCH_ONLY)
4561   {
4562   if (counts_printed != 0 && filenames >= FN_DEFAULT)
4563     fprintf(stdout, "TOTAL:");
4564   fprintf(stdout, "%lu" STDOUT_NL, total_count);
4565   }
4566 
4567 EXIT:
4568 #ifdef SUPPORT_PCRE2GREP_JIT
4569 pcre2_jit_free_unused_memory(NULL);
4570 if (jit_stack != NULL) pcre2_jit_stack_free(jit_stack);
4571 #endif
4572 
4573 free(main_buffer);
4574 if (character_tables != NULL) pcre2_maketables_free(NULL, character_tables);
4575 
4576 pcre2_compile_context_free(compile_context);
4577 pcre2_match_context_free(match_context);
4578 pcre2_match_data_free(match_data_pair[0]);
4579 pcre2_match_data_free(match_data_pair[1]);
4580 
4581 free_pattern_chain(patterns);
4582 free_pattern_chain(include_patterns);
4583 free_pattern_chain(include_dir_patterns);
4584 free_pattern_chain(exclude_patterns);
4585 free_pattern_chain(exclude_dir_patterns);
4586 
4587 free_file_chain(exclude_from);
4588 free_file_chain(include_from);
4589 free_file_chain(pattern_files);
4590 free_file_chain(file_lists);
4591 
4592 while (only_matching != NULL)
4593   {
4594   omstr *this = only_matching;
4595   only_matching = this->next;
4596   free(this);
4597   }
4598 
4599 pcre2grep_exit(rc);
4600 
4601 EXIT2:
4602 rc = 2;
4603 goto EXIT;
4604 }
4605 
4606 /* End of pcre2grep */
4607