xref: /aosp_15_r20/external/pcre/src/pcre2grep.c (revision 22dc650d8ae982c6770746019a6f94af92b0f024)
1*22dc650dSSadaf Ebrahimi /*************************************************
2*22dc650dSSadaf Ebrahimi *               pcre2grep program                *
3*22dc650dSSadaf Ebrahimi *************************************************/
4*22dc650dSSadaf Ebrahimi 
5*22dc650dSSadaf Ebrahimi /* This is a grep program that uses the 8-bit PCRE regular expression library
6*22dc650dSSadaf Ebrahimi via the PCRE2 updated API to do its pattern matching. On Unix-like, Windows,
7*22dc650dSSadaf Ebrahimi and native z/OS systems it can recurse into directories, and in z/OS it can
8*22dc650dSSadaf Ebrahimi handle PDS files.
9*22dc650dSSadaf Ebrahimi 
10*22dc650dSSadaf Ebrahimi Note that for native z/OS, in addition to defining the NATIVE_ZOS macro, an
11*22dc650dSSadaf Ebrahimi additional header is required. That header is not included in the main PCRE2
12*22dc650dSSadaf Ebrahimi distribution because other apparatus is needed to compile pcre2grep for z/OS.
13*22dc650dSSadaf Ebrahimi The header can be found in the special z/OS distribution, which is available
14*22dc650dSSadaf Ebrahimi from www.zaconsultants.net or from www.cbttape.org.
15*22dc650dSSadaf Ebrahimi 
16*22dc650dSSadaf Ebrahimi            Copyright (c) 1997-2023 University of Cambridge
17*22dc650dSSadaf Ebrahimi 
18*22dc650dSSadaf Ebrahimi -----------------------------------------------------------------------------
19*22dc650dSSadaf Ebrahimi Redistribution and use in source and binary forms, with or without
20*22dc650dSSadaf Ebrahimi modification, are permitted provided that the following conditions are met:
21*22dc650dSSadaf Ebrahimi 
22*22dc650dSSadaf Ebrahimi     * Redistributions of source code must retain the above copyright notice,
23*22dc650dSSadaf Ebrahimi       this list of conditions and the following disclaimer.
24*22dc650dSSadaf Ebrahimi 
25*22dc650dSSadaf Ebrahimi     * Redistributions in binary form must reproduce the above copyright
26*22dc650dSSadaf Ebrahimi       notice, this list of conditions and the following disclaimer in the
27*22dc650dSSadaf Ebrahimi       documentation and/or other materials provided with the distribution.
28*22dc650dSSadaf Ebrahimi 
29*22dc650dSSadaf Ebrahimi     * Neither the name of the University of Cambridge nor the names of its
30*22dc650dSSadaf Ebrahimi       contributors may be used to endorse or promote products derived from
31*22dc650dSSadaf Ebrahimi       this software without specific prior written permission.
32*22dc650dSSadaf Ebrahimi 
33*22dc650dSSadaf Ebrahimi THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
34*22dc650dSSadaf Ebrahimi AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
35*22dc650dSSadaf Ebrahimi IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
36*22dc650dSSadaf Ebrahimi ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
37*22dc650dSSadaf Ebrahimi LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
38*22dc650dSSadaf Ebrahimi CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
39*22dc650dSSadaf Ebrahimi SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
40*22dc650dSSadaf Ebrahimi INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
41*22dc650dSSadaf Ebrahimi CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
42*22dc650dSSadaf Ebrahimi ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
43*22dc650dSSadaf Ebrahimi POSSIBILITY OF SUCH DAMAGE.
44*22dc650dSSadaf Ebrahimi -----------------------------------------------------------------------------
45*22dc650dSSadaf Ebrahimi */
46*22dc650dSSadaf Ebrahimi 
47*22dc650dSSadaf Ebrahimi #ifdef HAVE_CONFIG_H
48*22dc650dSSadaf Ebrahimi #include "config.h"
49*22dc650dSSadaf Ebrahimi #endif
50*22dc650dSSadaf Ebrahimi 
51*22dc650dSSadaf Ebrahimi #include <ctype.h>
52*22dc650dSSadaf Ebrahimi #include <locale.h>
53*22dc650dSSadaf Ebrahimi #include <stdio.h>
54*22dc650dSSadaf Ebrahimi #include <string.h>
55*22dc650dSSadaf Ebrahimi #include <stdlib.h>
56*22dc650dSSadaf Ebrahimi #include <errno.h>
57*22dc650dSSadaf Ebrahimi 
58*22dc650dSSadaf Ebrahimi #include <sys/types.h>
59*22dc650dSSadaf Ebrahimi #include <sys/stat.h>
60*22dc650dSSadaf Ebrahimi 
61*22dc650dSSadaf Ebrahimi #if (defined _WIN32 || (defined HAVE_WINDOWS_H && HAVE_WINDOWS_H)) \
62*22dc650dSSadaf Ebrahimi   && !defined WIN32 && !defined(__CYGWIN__)
63*22dc650dSSadaf Ebrahimi #define WIN32
64*22dc650dSSadaf Ebrahimi #endif
65*22dc650dSSadaf Ebrahimi 
66*22dc650dSSadaf Ebrahimi /* Some CMake's define it still */
67*22dc650dSSadaf Ebrahimi #if defined(__CYGWIN__) && defined(WIN32)
68*22dc650dSSadaf Ebrahimi #undef WIN32
69*22dc650dSSadaf Ebrahimi #endif
70*22dc650dSSadaf Ebrahimi 
71*22dc650dSSadaf Ebrahimi #ifdef __VMS
72*22dc650dSSadaf Ebrahimi #include clidef
73*22dc650dSSadaf Ebrahimi #include descrip
74*22dc650dSSadaf Ebrahimi #include lib$routines
75*22dc650dSSadaf Ebrahimi #endif
76*22dc650dSSadaf Ebrahimi 
77*22dc650dSSadaf Ebrahimi #ifdef WIN32
78*22dc650dSSadaf Ebrahimi #include <io.h>                /* For _setmode() */
79*22dc650dSSadaf Ebrahimi #include <fcntl.h>             /* For _O_BINARY */
80*22dc650dSSadaf Ebrahimi #endif
81*22dc650dSSadaf Ebrahimi 
82*22dc650dSSadaf Ebrahimi #if defined(SUPPORT_PCRE2GREP_CALLOUT) && defined(SUPPORT_PCRE2GREP_CALLOUT_FORK)
83*22dc650dSSadaf Ebrahimi #ifdef WIN32
84*22dc650dSSadaf Ebrahimi #include <process.h>
85*22dc650dSSadaf Ebrahimi #else
86*22dc650dSSadaf Ebrahimi #include <sys/wait.h>
87*22dc650dSSadaf Ebrahimi #endif
88*22dc650dSSadaf Ebrahimi #endif
89*22dc650dSSadaf Ebrahimi 
90*22dc650dSSadaf Ebrahimi #ifdef HAVE_UNISTD_H
91*22dc650dSSadaf Ebrahimi #include <unistd.h>
92*22dc650dSSadaf Ebrahimi #endif
93*22dc650dSSadaf Ebrahimi 
94*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_LIBZ
95*22dc650dSSadaf Ebrahimi #include <zlib.h>
96*22dc650dSSadaf Ebrahimi #endif
97*22dc650dSSadaf Ebrahimi 
98*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_LIBBZ2
99*22dc650dSSadaf Ebrahimi #include <bzlib.h>
100*22dc650dSSadaf Ebrahimi #endif
101*22dc650dSSadaf Ebrahimi 
102*22dc650dSSadaf Ebrahimi #define PCRE2_CODE_UNIT_WIDTH 8
103*22dc650dSSadaf Ebrahimi #include "pcre2.h"
104*22dc650dSSadaf Ebrahimi 
105*22dc650dSSadaf Ebrahimi /* Older versions of MSVC lack snprintf(). This define allows for
106*22dc650dSSadaf Ebrahimi warning/error-free compilation and testing with MSVC compilers back to at least
107*22dc650dSSadaf Ebrahimi MSVC 10/2010. Except for VC6 (which is missing some fundamentals and fails). */
108*22dc650dSSadaf Ebrahimi 
109*22dc650dSSadaf Ebrahimi #if defined(_MSC_VER) && (_MSC_VER < 1900)
110*22dc650dSSadaf Ebrahimi #define snprintf _snprintf
111*22dc650dSSadaf Ebrahimi #endif
112*22dc650dSSadaf Ebrahimi 
113*22dc650dSSadaf Ebrahimi /* old VC and older compilers don't support %td or %zu, and even some that claim to
114*22dc650dSSadaf Ebrahimi be C99 don't support it (hence DISABLE_PERCENT_ZT). */
115*22dc650dSSadaf Ebrahimi 
116*22dc650dSSadaf Ebrahimi #if defined(DISABLE_PERCENT_ZT) || (defined(_MSC_VER) && (_MSC_VER < 1800)) || \
117*22dc650dSSadaf Ebrahimi   (!defined(_MSC_VER) && (!defined(__STDC_VERSION__) || __STDC_VERSION__ < 199901L))
118*22dc650dSSadaf Ebrahimi #ifdef _WIN64
119*22dc650dSSadaf Ebrahimi #define SIZ_FORM "llu"
120*22dc650dSSadaf Ebrahimi #else
121*22dc650dSSadaf Ebrahimi #define SIZ_FORM "lu"
122*22dc650dSSadaf Ebrahimi #endif
123*22dc650dSSadaf Ebrahimi #else
124*22dc650dSSadaf Ebrahimi #define SIZ_FORM "zu"
125*22dc650dSSadaf Ebrahimi #endif
126*22dc650dSSadaf Ebrahimi 
127*22dc650dSSadaf Ebrahimi #define FALSE 0
128*22dc650dSSadaf Ebrahimi #define TRUE 1
129*22dc650dSSadaf Ebrahimi 
130*22dc650dSSadaf Ebrahimi typedef int BOOL;
131*22dc650dSSadaf Ebrahimi 
132*22dc650dSSadaf Ebrahimi #define DEFAULT_CAPTURE_MAX 50
133*22dc650dSSadaf Ebrahimi 
134*22dc650dSSadaf Ebrahimi #if BUFSIZ > 8192
135*22dc650dSSadaf Ebrahimi #define MAXPATLEN BUFSIZ
136*22dc650dSSadaf Ebrahimi #else
137*22dc650dSSadaf Ebrahimi #define MAXPATLEN 8192
138*22dc650dSSadaf Ebrahimi #endif
139*22dc650dSSadaf Ebrahimi 
140*22dc650dSSadaf Ebrahimi #define FNBUFSIZ 2048
141*22dc650dSSadaf Ebrahimi #define ERRBUFSIZ 256
142*22dc650dSSadaf Ebrahimi 
143*22dc650dSSadaf Ebrahimi /* Values for the "filenames" variable, which specifies options for file name
144*22dc650dSSadaf Ebrahimi output. The order is important; it is assumed that a file name is wanted for
145*22dc650dSSadaf Ebrahimi all values greater than FN_DEFAULT. */
146*22dc650dSSadaf Ebrahimi 
147*22dc650dSSadaf Ebrahimi enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
148*22dc650dSSadaf Ebrahimi 
149*22dc650dSSadaf Ebrahimi /* File reading styles */
150*22dc650dSSadaf Ebrahimi 
151*22dc650dSSadaf Ebrahimi enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
152*22dc650dSSadaf Ebrahimi 
153*22dc650dSSadaf Ebrahimi /* Actions for the -d and -D options */
154*22dc650dSSadaf Ebrahimi 
155*22dc650dSSadaf Ebrahimi enum { dee_READ, dee_SKIP, dee_RECURSE };
156*22dc650dSSadaf Ebrahimi enum { DEE_READ, DEE_SKIP };
157*22dc650dSSadaf Ebrahimi 
158*22dc650dSSadaf Ebrahimi /* Actions for special processing options (flag bits) */
159*22dc650dSSadaf Ebrahimi 
160*22dc650dSSadaf Ebrahimi #define PO_WORD_MATCH     0x0001
161*22dc650dSSadaf Ebrahimi #define PO_LINE_MATCH     0x0002
162*22dc650dSSadaf Ebrahimi #define PO_FIXED_STRINGS  0x0004
163*22dc650dSSadaf Ebrahimi 
164*22dc650dSSadaf Ebrahimi /* Binary file options */
165*22dc650dSSadaf Ebrahimi 
166*22dc650dSSadaf Ebrahimi enum { BIN_BINARY, BIN_NOMATCH, BIN_TEXT };
167*22dc650dSSadaf Ebrahimi 
168*22dc650dSSadaf Ebrahimi /* Return values from decode_dollar_escape() */
169*22dc650dSSadaf Ebrahimi 
170*22dc650dSSadaf Ebrahimi enum { DDE_ERROR, DDE_CAPTURE, DDE_CHAR };
171*22dc650dSSadaf Ebrahimi 
172*22dc650dSSadaf Ebrahimi /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
173*22dc650dSSadaf Ebrahimi environments), a warning is issued if the value of fwrite() is ignored.
174*22dc650dSSadaf Ebrahimi Unfortunately, casting to (void) does not suppress the warning. To get round
175*22dc650dSSadaf Ebrahimi this, we use a macro that compiles a fudge. Oddly, this does not also seem to
176*22dc650dSSadaf Ebrahimi apply to fprintf(). */
177*22dc650dSSadaf Ebrahimi 
178*22dc650dSSadaf Ebrahimi #define FWRITE_IGNORE(a,b,c,d) if (fwrite(a,b,c,d)) {}
179*22dc650dSSadaf Ebrahimi 
180*22dc650dSSadaf Ebrahimi /* Under Windows, we have to set stdout to be binary, so that it does not
181*22dc650dSSadaf Ebrahimi convert \r\n at the ends of output lines to \r\r\n. However, that means that
182*22dc650dSSadaf Ebrahimi any messages written to stdout must have \r\n as their line terminator. This is
183*22dc650dSSadaf Ebrahimi handled by using STDOUT_NL as the newline string. We also use a normal double
184*22dc650dSSadaf Ebrahimi quote for the example, as single quotes aren't usually available. */
185*22dc650dSSadaf Ebrahimi 
186*22dc650dSSadaf Ebrahimi #ifdef WIN32
187*22dc650dSSadaf Ebrahimi #define STDOUT_NL     "\r\n"
188*22dc650dSSadaf Ebrahimi #define STDOUT_NL_LEN  2
189*22dc650dSSadaf Ebrahimi #define QUOT          "\""
190*22dc650dSSadaf Ebrahimi #else
191*22dc650dSSadaf Ebrahimi #define STDOUT_NL      "\n"
192*22dc650dSSadaf Ebrahimi #define STDOUT_NL_LEN  1
193*22dc650dSSadaf Ebrahimi #define QUOT           "'"
194*22dc650dSSadaf Ebrahimi #endif
195*22dc650dSSadaf Ebrahimi 
196*22dc650dSSadaf Ebrahimi /* This code is returned from decode_dollar_escape() when $n is encountered,
197*22dc650dSSadaf Ebrahimi and used to mean "output STDOUT_NL". It is, of course, not a valid Unicode code
198*22dc650dSSadaf Ebrahimi point. */
199*22dc650dSSadaf Ebrahimi 
200*22dc650dSSadaf Ebrahimi #define STDOUT_NL_CODE 0x7fffffffu
201*22dc650dSSadaf Ebrahimi 
202*22dc650dSSadaf Ebrahimi 
203*22dc650dSSadaf Ebrahimi 
204*22dc650dSSadaf Ebrahimi /*************************************************
205*22dc650dSSadaf Ebrahimi *               Global variables                 *
206*22dc650dSSadaf Ebrahimi *************************************************/
207*22dc650dSSadaf Ebrahimi 
208*22dc650dSSadaf Ebrahimi static const char *colour_string = "1;31";
209*22dc650dSSadaf Ebrahimi static const char *colour_option = NULL;
210*22dc650dSSadaf Ebrahimi static const char *dee_option = NULL;
211*22dc650dSSadaf Ebrahimi static const char *DEE_option = NULL;
212*22dc650dSSadaf Ebrahimi static const char *locale = NULL;
213*22dc650dSSadaf Ebrahimi static const char *newline_arg = NULL;
214*22dc650dSSadaf Ebrahimi static const char *group_separator = "--";
215*22dc650dSSadaf Ebrahimi static const char *om_separator = NULL;
216*22dc650dSSadaf Ebrahimi static const char *stdin_name = "(standard input)";
217*22dc650dSSadaf Ebrahimi static const char *output_text = NULL;
218*22dc650dSSadaf Ebrahimi 
219*22dc650dSSadaf Ebrahimi static char *main_buffer = NULL;
220*22dc650dSSadaf Ebrahimi 
221*22dc650dSSadaf Ebrahimi static const char *printname_nl = STDOUT_NL;  /* Changed to NULL for -Z */
222*22dc650dSSadaf Ebrahimi static int printname_colon = ':';             /* Changed to 0 for -Z */
223*22dc650dSSadaf Ebrahimi static int printname_hyphen = '-';            /* Changed to 0 for -Z */
224*22dc650dSSadaf Ebrahimi 
225*22dc650dSSadaf Ebrahimi static int after_context = 0;
226*22dc650dSSadaf Ebrahimi static int before_context = 0;
227*22dc650dSSadaf Ebrahimi static int binary_files = BIN_BINARY;
228*22dc650dSSadaf Ebrahimi static int both_context = 0;
229*22dc650dSSadaf Ebrahimi static int endlinetype;
230*22dc650dSSadaf Ebrahimi 
231*22dc650dSSadaf Ebrahimi static int count_limit = -1;  /* Not long, so that it works with OP_NUMBER */
232*22dc650dSSadaf Ebrahimi static unsigned long int counts_printed = 0;
233*22dc650dSSadaf Ebrahimi static unsigned long int total_count = 0;
234*22dc650dSSadaf Ebrahimi 
235*22dc650dSSadaf Ebrahimi static PCRE2_SIZE bufthird = PCRE2GREP_BUFSIZE;
236*22dc650dSSadaf Ebrahimi static PCRE2_SIZE max_bufthird = PCRE2GREP_MAX_BUFSIZE;
237*22dc650dSSadaf Ebrahimi static PCRE2_SIZE bufsize = 3*PCRE2GREP_BUFSIZE;
238*22dc650dSSadaf Ebrahimi 
239*22dc650dSSadaf Ebrahimi #ifdef WIN32
240*22dc650dSSadaf Ebrahimi static int dee_action = dee_SKIP;
241*22dc650dSSadaf Ebrahimi #else
242*22dc650dSSadaf Ebrahimi static int dee_action = dee_READ;
243*22dc650dSSadaf Ebrahimi #endif
244*22dc650dSSadaf Ebrahimi 
245*22dc650dSSadaf Ebrahimi static int DEE_action = DEE_READ;
246*22dc650dSSadaf Ebrahimi static int error_count = 0;
247*22dc650dSSadaf Ebrahimi static int filenames = FN_DEFAULT;
248*22dc650dSSadaf Ebrahimi 
249*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_PCRE2GREP_JIT
250*22dc650dSSadaf Ebrahimi static BOOL use_jit = TRUE;
251*22dc650dSSadaf Ebrahimi #else
252*22dc650dSSadaf Ebrahimi static BOOL use_jit = FALSE;
253*22dc650dSSadaf Ebrahimi #endif
254*22dc650dSSadaf Ebrahimi 
255*22dc650dSSadaf Ebrahimi static const uint8_t *character_tables = NULL;
256*22dc650dSSadaf Ebrahimi 
257*22dc650dSSadaf Ebrahimi static uint32_t pcre2_options = 0;
258*22dc650dSSadaf Ebrahimi static uint32_t extra_options = 0;
259*22dc650dSSadaf Ebrahimi static PCRE2_SIZE heap_limit = PCRE2_UNSET;
260*22dc650dSSadaf Ebrahimi static uint32_t match_limit = 0;
261*22dc650dSSadaf Ebrahimi static uint32_t depth_limit = 0;
262*22dc650dSSadaf Ebrahimi 
263*22dc650dSSadaf Ebrahimi static pcre2_compile_context *compile_context;
264*22dc650dSSadaf Ebrahimi static pcre2_match_context *match_context;
265*22dc650dSSadaf Ebrahimi static pcre2_match_data *match_data, *match_data_pair[2];
266*22dc650dSSadaf Ebrahimi static PCRE2_SIZE *offsets, *offsets_pair[2];
267*22dc650dSSadaf Ebrahimi static int match_data_toggle;
268*22dc650dSSadaf Ebrahimi static uint32_t offset_size;
269*22dc650dSSadaf Ebrahimi static uint32_t capture_max = DEFAULT_CAPTURE_MAX;
270*22dc650dSSadaf Ebrahimi 
271*22dc650dSSadaf Ebrahimi static BOOL all_matches = FALSE;
272*22dc650dSSadaf Ebrahimi static BOOL case_restrict = FALSE;
273*22dc650dSSadaf Ebrahimi static BOOL count_only = FALSE;
274*22dc650dSSadaf Ebrahimi static BOOL do_colour = FALSE;
275*22dc650dSSadaf Ebrahimi #ifdef WIN32
276*22dc650dSSadaf Ebrahimi static BOOL do_ansi = FALSE;
277*22dc650dSSadaf Ebrahimi #endif
278*22dc650dSSadaf Ebrahimi static BOOL file_offsets = FALSE;
279*22dc650dSSadaf Ebrahimi static BOOL hyphenpending = FALSE;
280*22dc650dSSadaf Ebrahimi static BOOL invert = FALSE;
281*22dc650dSSadaf Ebrahimi static BOOL line_buffered = FALSE;
282*22dc650dSSadaf Ebrahimi static BOOL line_offsets = FALSE;
283*22dc650dSSadaf Ebrahimi static BOOL multiline = FALSE;
284*22dc650dSSadaf Ebrahimi static BOOL no_ucp = FALSE;
285*22dc650dSSadaf Ebrahimi static BOOL number = FALSE;
286*22dc650dSSadaf Ebrahimi static BOOL omit_zero_count = FALSE;
287*22dc650dSSadaf Ebrahimi static BOOL resource_error = FALSE;
288*22dc650dSSadaf Ebrahimi static BOOL quiet = FALSE;
289*22dc650dSSadaf Ebrahimi static BOOL show_total_count = FALSE;
290*22dc650dSSadaf Ebrahimi static BOOL silent = FALSE;
291*22dc650dSSadaf Ebrahimi static BOOL utf = FALSE;
292*22dc650dSSadaf Ebrahimi static BOOL posix_digit = FALSE;
293*22dc650dSSadaf Ebrahimi 
294*22dc650dSSadaf Ebrahimi static uint8_t utf8_buffer[8];
295*22dc650dSSadaf Ebrahimi 
296*22dc650dSSadaf Ebrahimi 
297*22dc650dSSadaf Ebrahimi /* Structure for list of --only-matching capturing numbers. */
298*22dc650dSSadaf Ebrahimi 
299*22dc650dSSadaf Ebrahimi typedef struct omstr {
300*22dc650dSSadaf Ebrahimi   struct omstr *next;
301*22dc650dSSadaf Ebrahimi   int groupnum;
302*22dc650dSSadaf Ebrahimi } omstr;
303*22dc650dSSadaf Ebrahimi 
304*22dc650dSSadaf Ebrahimi static omstr *only_matching = NULL;
305*22dc650dSSadaf Ebrahimi static omstr *only_matching_last = NULL;
306*22dc650dSSadaf Ebrahimi static int only_matching_count;
307*22dc650dSSadaf Ebrahimi 
308*22dc650dSSadaf Ebrahimi /* Structure for holding the two variables that describe a number chain. */
309*22dc650dSSadaf Ebrahimi 
310*22dc650dSSadaf Ebrahimi typedef struct omdatastr {
311*22dc650dSSadaf Ebrahimi   omstr **anchor;
312*22dc650dSSadaf Ebrahimi   omstr **lastptr;
313*22dc650dSSadaf Ebrahimi } omdatastr;
314*22dc650dSSadaf Ebrahimi 
315*22dc650dSSadaf Ebrahimi static omdatastr only_matching_data = { &only_matching, &only_matching_last };
316*22dc650dSSadaf Ebrahimi 
317*22dc650dSSadaf Ebrahimi /* Structure for list of file names (for -f and --{in,ex}clude-from) */
318*22dc650dSSadaf Ebrahimi 
319*22dc650dSSadaf Ebrahimi typedef struct fnstr {
320*22dc650dSSadaf Ebrahimi   struct fnstr *next;
321*22dc650dSSadaf Ebrahimi   char *name;
322*22dc650dSSadaf Ebrahimi } fnstr;
323*22dc650dSSadaf Ebrahimi 
324*22dc650dSSadaf Ebrahimi static fnstr *exclude_from = NULL;
325*22dc650dSSadaf Ebrahimi static fnstr *exclude_from_last = NULL;
326*22dc650dSSadaf Ebrahimi static fnstr *include_from = NULL;
327*22dc650dSSadaf Ebrahimi static fnstr *include_from_last = NULL;
328*22dc650dSSadaf Ebrahimi 
329*22dc650dSSadaf Ebrahimi static fnstr *file_lists = NULL;
330*22dc650dSSadaf Ebrahimi static fnstr *file_lists_last = NULL;
331*22dc650dSSadaf Ebrahimi static fnstr *pattern_files = NULL;
332*22dc650dSSadaf Ebrahimi static fnstr *pattern_files_last = NULL;
333*22dc650dSSadaf Ebrahimi 
334*22dc650dSSadaf Ebrahimi /* Structure for holding the two variables that describe a file name chain. */
335*22dc650dSSadaf Ebrahimi 
336*22dc650dSSadaf Ebrahimi typedef struct fndatastr {
337*22dc650dSSadaf Ebrahimi   fnstr **anchor;
338*22dc650dSSadaf Ebrahimi   fnstr **lastptr;
339*22dc650dSSadaf Ebrahimi } fndatastr;
340*22dc650dSSadaf Ebrahimi 
341*22dc650dSSadaf Ebrahimi static fndatastr exclude_from_data = { &exclude_from, &exclude_from_last };
342*22dc650dSSadaf Ebrahimi static fndatastr include_from_data = { &include_from, &include_from_last };
343*22dc650dSSadaf Ebrahimi static fndatastr file_lists_data = { &file_lists, &file_lists_last };
344*22dc650dSSadaf Ebrahimi static fndatastr pattern_files_data = { &pattern_files, &pattern_files_last };
345*22dc650dSSadaf Ebrahimi 
346*22dc650dSSadaf Ebrahimi /* Structure for pattern and its compiled form; used for matching patterns and
347*22dc650dSSadaf Ebrahimi also for include/exclude patterns. */
348*22dc650dSSadaf Ebrahimi 
349*22dc650dSSadaf Ebrahimi typedef struct patstr {
350*22dc650dSSadaf Ebrahimi   struct patstr *next;
351*22dc650dSSadaf Ebrahimi   char *string;
352*22dc650dSSadaf Ebrahimi   PCRE2_SIZE length;
353*22dc650dSSadaf Ebrahimi   pcre2_code *compiled;
354*22dc650dSSadaf Ebrahimi } patstr;
355*22dc650dSSadaf Ebrahimi 
356*22dc650dSSadaf Ebrahimi static patstr *patterns = NULL;
357*22dc650dSSadaf Ebrahimi static patstr *patterns_last = NULL;
358*22dc650dSSadaf Ebrahimi static patstr *include_patterns = NULL;
359*22dc650dSSadaf Ebrahimi static patstr *include_patterns_last = NULL;
360*22dc650dSSadaf Ebrahimi static patstr *exclude_patterns = NULL;
361*22dc650dSSadaf Ebrahimi static patstr *exclude_patterns_last = NULL;
362*22dc650dSSadaf Ebrahimi static patstr *include_dir_patterns = NULL;
363*22dc650dSSadaf Ebrahimi static patstr *include_dir_patterns_last = NULL;
364*22dc650dSSadaf Ebrahimi static patstr *exclude_dir_patterns = NULL;
365*22dc650dSSadaf Ebrahimi static patstr *exclude_dir_patterns_last = NULL;
366*22dc650dSSadaf Ebrahimi 
367*22dc650dSSadaf Ebrahimi /* Structure holding the two variables that describe a pattern chain. A pointer
368*22dc650dSSadaf Ebrahimi to such structures is used for each appropriate option. */
369*22dc650dSSadaf Ebrahimi 
370*22dc650dSSadaf Ebrahimi typedef struct patdatastr {
371*22dc650dSSadaf Ebrahimi   patstr **anchor;
372*22dc650dSSadaf Ebrahimi   patstr **lastptr;
373*22dc650dSSadaf Ebrahimi } patdatastr;
374*22dc650dSSadaf Ebrahimi 
375*22dc650dSSadaf Ebrahimi static patdatastr match_patdata = { &patterns, &patterns_last };
376*22dc650dSSadaf Ebrahimi static patdatastr include_patdata = { &include_patterns, &include_patterns_last };
377*22dc650dSSadaf Ebrahimi static patdatastr exclude_patdata = { &exclude_patterns, &exclude_patterns_last };
378*22dc650dSSadaf Ebrahimi static patdatastr include_dir_patdata = { &include_dir_patterns, &include_dir_patterns_last };
379*22dc650dSSadaf Ebrahimi static patdatastr exclude_dir_patdata = { &exclude_dir_patterns, &exclude_dir_patterns_last };
380*22dc650dSSadaf Ebrahimi 
381*22dc650dSSadaf Ebrahimi static patstr **incexlist[4] = { &include_patterns, &exclude_patterns,
382*22dc650dSSadaf Ebrahimi                                  &include_dir_patterns, &exclude_dir_patterns };
383*22dc650dSSadaf Ebrahimi 
384*22dc650dSSadaf Ebrahimi static const char *incexname[4] = { "--include", "--exclude",
385*22dc650dSSadaf Ebrahimi                                     "--include-dir", "--exclude-dir" };
386*22dc650dSSadaf Ebrahimi 
387*22dc650dSSadaf Ebrahimi /* Structure for options and list of them */
388*22dc650dSSadaf Ebrahimi 
389*22dc650dSSadaf Ebrahimi enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_U32NUMBER, OP_SIZE,
390*22dc650dSSadaf Ebrahimi        OP_OP_NUMBER, OP_OP_NUMBERS, OP_PATLIST, OP_FILELIST, OP_BINFILES };
391*22dc650dSSadaf Ebrahimi 
392*22dc650dSSadaf Ebrahimi typedef struct option_item {
393*22dc650dSSadaf Ebrahimi   int type;
394*22dc650dSSadaf Ebrahimi   int one_char;
395*22dc650dSSadaf Ebrahimi   void *dataptr;
396*22dc650dSSadaf Ebrahimi   const char *long_name;
397*22dc650dSSadaf Ebrahimi   const char *help_text;
398*22dc650dSSadaf Ebrahimi } option_item;
399*22dc650dSSadaf Ebrahimi 
400*22dc650dSSadaf Ebrahimi /* Options without a single-letter equivalent get a negative value. This can be
401*22dc650dSSadaf Ebrahimi used to identify them. */
402*22dc650dSSadaf Ebrahimi 
403*22dc650dSSadaf Ebrahimi #define N_COLOUR       (-1)
404*22dc650dSSadaf Ebrahimi #define N_EXCLUDE      (-2)
405*22dc650dSSadaf Ebrahimi #define N_EXCLUDE_DIR  (-3)
406*22dc650dSSadaf Ebrahimi #define N_HELP         (-4)
407*22dc650dSSadaf Ebrahimi #define N_INCLUDE      (-5)
408*22dc650dSSadaf Ebrahimi #define N_INCLUDE_DIR  (-6)
409*22dc650dSSadaf Ebrahimi #define N_LABEL        (-7)
410*22dc650dSSadaf Ebrahimi #define N_LOCALE       (-8)
411*22dc650dSSadaf Ebrahimi #define N_NULL         (-9)
412*22dc650dSSadaf Ebrahimi #define N_LOFFSETS     (-10)
413*22dc650dSSadaf Ebrahimi #define N_FOFFSETS     (-11)
414*22dc650dSSadaf Ebrahimi #define N_LBUFFER      (-12)
415*22dc650dSSadaf Ebrahimi #define N_H_LIMIT      (-13)
416*22dc650dSSadaf Ebrahimi #define N_M_LIMIT      (-14)
417*22dc650dSSadaf Ebrahimi #define N_M_LIMIT_DEP  (-15)
418*22dc650dSSadaf Ebrahimi #define N_BUFSIZE      (-16)
419*22dc650dSSadaf Ebrahimi #define N_NOJIT        (-17)
420*22dc650dSSadaf Ebrahimi #define N_FILE_LIST    (-18)
421*22dc650dSSadaf Ebrahimi #define N_BINARY_FILES (-19)
422*22dc650dSSadaf Ebrahimi #define N_EXCLUDE_FROM (-20)
423*22dc650dSSadaf Ebrahimi #define N_INCLUDE_FROM (-21)
424*22dc650dSSadaf Ebrahimi #define N_OM_SEPARATOR (-22)
425*22dc650dSSadaf Ebrahimi #define N_MAX_BUFSIZE  (-23)
426*22dc650dSSadaf Ebrahimi #define N_OM_CAPTURE   (-24)
427*22dc650dSSadaf Ebrahimi #define N_ALLABSK      (-25)
428*22dc650dSSadaf Ebrahimi #define N_POSIX_DIGIT  (-26)
429*22dc650dSSadaf Ebrahimi #define N_GROUP_SEPARATOR (-27)
430*22dc650dSSadaf Ebrahimi #define N_NO_GROUP_SEPARATOR (-28)
431*22dc650dSSadaf Ebrahimi 
432*22dc650dSSadaf Ebrahimi static option_item optionlist[] = {
433*22dc650dSSadaf Ebrahimi   { OP_NODATA,     N_NULL,   NULL,              "",              "terminate options" },
434*22dc650dSSadaf Ebrahimi   { OP_NODATA,     N_HELP,   NULL,              "help",          "display this help and exit" },
435*22dc650dSSadaf Ebrahimi   { OP_NUMBER,     'A',      &after_context,    "after-context=number", "set number of following context lines" },
436*22dc650dSSadaf Ebrahimi   { OP_NODATA,     'a',      NULL,              "text",          "treat binary files as text" },
437*22dc650dSSadaf Ebrahimi   { OP_NUMBER,     'B',      &before_context,   "before-context=number", "set number of prior context lines" },
438*22dc650dSSadaf Ebrahimi   { OP_BINFILES,   N_BINARY_FILES, NULL,        "binary-files=word", "set treatment of binary files" },
439*22dc650dSSadaf Ebrahimi   { OP_SIZE,       N_BUFSIZE,&bufthird,         "buffer-size=number", "set processing buffer starting size" },
440*22dc650dSSadaf Ebrahimi   { OP_SIZE,       N_MAX_BUFSIZE,&max_bufthird, "max-buffer-size=number",  "set processing buffer maximum size" },
441*22dc650dSSadaf Ebrahimi   { OP_OP_STRING,  N_COLOUR, &colour_option,    "color=option",  "matched text color option" },
442*22dc650dSSadaf Ebrahimi   { OP_OP_STRING,  N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },
443*22dc650dSSadaf Ebrahimi   { OP_NUMBER,     'C',      &both_context,     "context=number", "set number of context lines, before & after" },
444*22dc650dSSadaf Ebrahimi   { OP_NODATA,     'c',      NULL,              "count",         "print only a count of matching lines per FILE" },
445*22dc650dSSadaf Ebrahimi   { OP_STRING,     'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },
446*22dc650dSSadaf Ebrahimi   { OP_STRING,     'd',      &dee_option,       "directories=action", "how to handle directories" },
447*22dc650dSSadaf Ebrahimi   { OP_NODATA, N_POSIX_DIGIT, NULL,             "posix-digit",   "\\d always matches [0-9], even in UTF/UCP mode" },
448*22dc650dSSadaf Ebrahimi   { OP_NODATA,     'E',      NULL,              "case-restrict", "restrict case matching (no mix ASCII/non-ASCII)" },
449*22dc650dSSadaf Ebrahimi   { OP_PATLIST,    'e',      &match_patdata,    "regex(p)=pattern", "specify pattern (may be used more than once)" },
450*22dc650dSSadaf Ebrahimi   { OP_NODATA,     'F',      NULL,              "fixed-strings", "patterns are sets of newline-separated strings" },
451*22dc650dSSadaf Ebrahimi   { OP_FILELIST,   'f',      &pattern_files_data, "file=path",   "read patterns from file" },
452*22dc650dSSadaf Ebrahimi   { OP_FILELIST,   N_FILE_LIST, &file_lists_data, "file-list=path","read files to search from file" },
453*22dc650dSSadaf Ebrahimi   { OP_NODATA,     N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },
454*22dc650dSSadaf Ebrahimi   { OP_STRING,     N_GROUP_SEPARATOR, &group_separator, "group-separator=text", "set separator between groups of lines" },
455*22dc650dSSadaf Ebrahimi   { OP_NODATA,     'H',      NULL,              "with-filename", "force the prefixing filename on output" },
456*22dc650dSSadaf Ebrahimi   { OP_NODATA,     'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },
457*22dc650dSSadaf Ebrahimi   { OP_NODATA,     'I',      NULL,              "",              "treat binary files as not matching (ignore)" },
458*22dc650dSSadaf Ebrahimi   { OP_NODATA,     'i',      NULL,              "ignore-case",   "ignore case distinctions" },
459*22dc650dSSadaf Ebrahimi   { OP_NODATA,     'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
460*22dc650dSSadaf Ebrahimi   { OP_NODATA,     'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
461*22dc650dSSadaf Ebrahimi   { OP_STRING,     N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
462*22dc650dSSadaf Ebrahimi   { OP_NODATA,     N_LBUFFER, NULL,             "line-buffered", "use line buffering" },
463*22dc650dSSadaf Ebrahimi   { OP_NODATA,     N_LOFFSETS, NULL,            "line-offsets",  "output line numbers and offsets, not text" },
464*22dc650dSSadaf Ebrahimi   { OP_STRING,     N_LOCALE, &locale,           "locale=locale", "use the named locale" },
465*22dc650dSSadaf Ebrahimi   { OP_SIZE,       N_H_LIMIT, &heap_limit,      "heap-limit=number",  "set PCRE2 heap limit option (kibibytes)" },
466*22dc650dSSadaf Ebrahimi   { OP_U32NUMBER,  N_M_LIMIT, &match_limit,     "match-limit=number", "set PCRE2 match limit option" },
467*22dc650dSSadaf Ebrahimi   { OP_U32NUMBER,  N_M_LIMIT_DEP, &depth_limit, "depth-limit=number", "set PCRE2 depth limit option" },
468*22dc650dSSadaf Ebrahimi   { OP_U32NUMBER,  N_M_LIMIT_DEP, &depth_limit, "recursion-limit=number", "obsolete synonym for depth-limit" },
469*22dc650dSSadaf Ebrahimi   { OP_NODATA,     'M',      NULL,              "multiline",     "run in multiline mode" },
470*22dc650dSSadaf Ebrahimi   { OP_NUMBER,     'm',      &count_limit,      "max-count=number", "stop after <number> matched lines" },
471*22dc650dSSadaf Ebrahimi   { OP_STRING,     'N',      &newline_arg,      "newline=type",  "set newline type (CR, LF, CRLF, ANYCRLF, ANY, or NUL)" },
472*22dc650dSSadaf Ebrahimi   { OP_NODATA,     'n',      NULL,              "line-number",   "print line number with output lines" },
473*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_PCRE2GREP_JIT
474*22dc650dSSadaf Ebrahimi   { OP_NODATA,     N_NOJIT,  NULL,              "no-jit",        "do not use just-in-time compiler optimization" },
475*22dc650dSSadaf Ebrahimi #else
476*22dc650dSSadaf Ebrahimi   { OP_NODATA,     N_NOJIT,  NULL,              "no-jit",        "ignored: this pcre2grep does not support JIT" },
477*22dc650dSSadaf Ebrahimi #endif
478*22dc650dSSadaf Ebrahimi   { OP_NODATA,     N_NO_GROUP_SEPARATOR, NULL,   "no-group-separator", "suppress separators between groups of lines" },
479*22dc650dSSadaf Ebrahimi   { OP_STRING,     'O',      &output_text,       "output=text",   "show only this text (possibly expanded)" },
480*22dc650dSSadaf Ebrahimi   { OP_OP_NUMBERS, 'o',      &only_matching_data, "only-matching=n", "show only the part of the line that matched" },
481*22dc650dSSadaf Ebrahimi   { OP_STRING,     N_OM_SEPARATOR, &om_separator, "om-separator=text", "set separator for multiple -o output" },
482*22dc650dSSadaf Ebrahimi   { OP_U32NUMBER,  N_OM_CAPTURE, &capture_max,  "om-capture=n",  "set capture count for --only-matching" },
483*22dc650dSSadaf Ebrahimi   { OP_NODATA,     'P',      NULL,              "no-ucp",        "do not enable UCP mode with Unicode" },
484*22dc650dSSadaf Ebrahimi   { OP_NODATA,     'q',      NULL,              "quiet",         "suppress output, just set return code" },
485*22dc650dSSadaf Ebrahimi   { OP_NODATA,     'r',      NULL,              "recursive",     "recursively scan sub-directories" },
486*22dc650dSSadaf Ebrahimi   { OP_PATLIST,    N_EXCLUDE,&exclude_patdata,  "exclude=pattern","exclude matching files when recursing" },
487*22dc650dSSadaf Ebrahimi   { OP_PATLIST,    N_INCLUDE,&include_patdata,  "include=pattern","include matching files when recursing" },
488*22dc650dSSadaf Ebrahimi   { OP_PATLIST,    N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude-dir=pattern","exclude matching directories when recursing" },
489*22dc650dSSadaf Ebrahimi   { OP_PATLIST,    N_INCLUDE_DIR,&include_dir_patdata, "include-dir=pattern","include matching directories when recursing" },
490*22dc650dSSadaf Ebrahimi   { OP_FILELIST,   N_EXCLUDE_FROM,&exclude_from_data, "exclude-from=path", "read exclude list from file" },
491*22dc650dSSadaf Ebrahimi   { OP_FILELIST,   N_INCLUDE_FROM,&include_from_data, "include-from=path", "read include list from file" },
492*22dc650dSSadaf Ebrahimi   { OP_NODATA,    's',      NULL,              "no-messages",   "suppress error messages" },
493*22dc650dSSadaf Ebrahimi   { OP_NODATA,    't',      NULL,              "total-count",   "print total count of matching lines" },
494*22dc650dSSadaf Ebrahimi   { OP_NODATA,    'u',      NULL,              "utf",           "use UTF/Unicode" },
495*22dc650dSSadaf Ebrahimi   { OP_NODATA,    'U',      NULL,              "utf-allow-invalid", "use UTF/Unicode, allow for invalid code units" },
496*22dc650dSSadaf Ebrahimi   { OP_NODATA,    'V',      NULL,              "version",       "print version information and exit" },
497*22dc650dSSadaf Ebrahimi   { OP_NODATA,    'v',      NULL,              "invert-match",  "select non-matching lines" },
498*22dc650dSSadaf Ebrahimi   { OP_NODATA,    'w',      NULL,              "word-regex(p)", "force patterns to match only as words"  },
499*22dc650dSSadaf Ebrahimi   { OP_NODATA,    'x',      NULL,              "line-regex(p)", "force patterns to match only whole lines" },
500*22dc650dSSadaf Ebrahimi   { OP_NODATA,   N_ALLABSK, NULL,              "allow-lookaround-bsk", "allow \\K in lookarounds" },
501*22dc650dSSadaf Ebrahimi   { OP_NODATA,    'Z',      NULL,              "null",          "output 0 byte after file names"  },
502*22dc650dSSadaf Ebrahimi   { OP_NODATA,    0,        NULL,               NULL,            NULL }
503*22dc650dSSadaf Ebrahimi };
504*22dc650dSSadaf Ebrahimi 
505*22dc650dSSadaf Ebrahimi /* Table of names for newline types. Must be kept in step with the definitions
506*22dc650dSSadaf Ebrahimi of PCRE2_NEWLINE_xx in pcre2.h. */
507*22dc650dSSadaf Ebrahimi 
508*22dc650dSSadaf Ebrahimi static const char *newlines[] = {
509*22dc650dSSadaf Ebrahimi   "DEFAULT", "CR", "LF", "CRLF", "ANY", "ANYCRLF", "NUL" };
510*22dc650dSSadaf Ebrahimi 
511*22dc650dSSadaf Ebrahimi /* UTF-8 tables  */
512*22dc650dSSadaf Ebrahimi 
513*22dc650dSSadaf Ebrahimi const int utf8_table1[] =
514*22dc650dSSadaf Ebrahimi   { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff};
515*22dc650dSSadaf Ebrahimi const int utf8_table1_size = sizeof(utf8_table1) / sizeof(int);
516*22dc650dSSadaf Ebrahimi 
517*22dc650dSSadaf Ebrahimi const int utf8_table2[] = { 0,    0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
518*22dc650dSSadaf Ebrahimi const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
519*22dc650dSSadaf Ebrahimi 
520*22dc650dSSadaf Ebrahimi const char utf8_table4[] = {
521*22dc650dSSadaf Ebrahimi   1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
522*22dc650dSSadaf Ebrahimi   1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
523*22dc650dSSadaf Ebrahimi   2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
524*22dc650dSSadaf Ebrahimi   3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
525*22dc650dSSadaf Ebrahimi 
526*22dc650dSSadaf Ebrahimi 
527*22dc650dSSadaf Ebrahimi #if !defined(VPCOMPAT) && !defined(HAVE_MEMMOVE)
528*22dc650dSSadaf Ebrahimi /*************************************************
529*22dc650dSSadaf Ebrahimi *    Emulated memmove() for systems without it   *
530*22dc650dSSadaf Ebrahimi *************************************************/
531*22dc650dSSadaf Ebrahimi 
532*22dc650dSSadaf Ebrahimi /* This function can make use of bcopy() if it is available. Otherwise do it by
533*22dc650dSSadaf Ebrahimi steam, as there are some non-Unix environments that lack both memmove() and
534*22dc650dSSadaf Ebrahimi bcopy(). */
535*22dc650dSSadaf Ebrahimi 
536*22dc650dSSadaf Ebrahimi static void *
emulated_memmove(void * d,const void * s,size_t n)537*22dc650dSSadaf Ebrahimi emulated_memmove(void *d, const void *s, size_t n)
538*22dc650dSSadaf Ebrahimi {
539*22dc650dSSadaf Ebrahimi #ifdef HAVE_BCOPY
540*22dc650dSSadaf Ebrahimi bcopy(s, d, n);
541*22dc650dSSadaf Ebrahimi return d;
542*22dc650dSSadaf Ebrahimi #else
543*22dc650dSSadaf Ebrahimi size_t i;
544*22dc650dSSadaf Ebrahimi unsigned char *dest = (unsigned char *)d;
545*22dc650dSSadaf Ebrahimi const unsigned char *src = (const unsigned char *)s;
546*22dc650dSSadaf Ebrahimi if (dest > src)
547*22dc650dSSadaf Ebrahimi   {
548*22dc650dSSadaf Ebrahimi   dest += n;
549*22dc650dSSadaf Ebrahimi   src += n;
550*22dc650dSSadaf Ebrahimi   for (i = 0; i < n; ++i) *(--dest) = *(--src);
551*22dc650dSSadaf Ebrahimi   return (void *)dest;
552*22dc650dSSadaf Ebrahimi   }
553*22dc650dSSadaf Ebrahimi else
554*22dc650dSSadaf Ebrahimi   {
555*22dc650dSSadaf Ebrahimi   for (i = 0; i < n; ++i) *dest++ = *src++;
556*22dc650dSSadaf Ebrahimi   return (void *)(dest - n);
557*22dc650dSSadaf Ebrahimi   }
558*22dc650dSSadaf Ebrahimi #endif   /* not HAVE_BCOPY */
559*22dc650dSSadaf Ebrahimi }
560*22dc650dSSadaf Ebrahimi #undef memmove
561*22dc650dSSadaf Ebrahimi #define memmove(d,s,n) emulated_memmove(d,s,n)
562*22dc650dSSadaf Ebrahimi #endif   /* not VPCOMPAT && not HAVE_MEMMOVE */
563*22dc650dSSadaf Ebrahimi 
564*22dc650dSSadaf Ebrahimi 
565*22dc650dSSadaf Ebrahimi 
566*22dc650dSSadaf Ebrahimi /*************************************************
567*22dc650dSSadaf Ebrahimi *           Convert code point to UTF-8          *
568*22dc650dSSadaf Ebrahimi *************************************************/
569*22dc650dSSadaf Ebrahimi 
570*22dc650dSSadaf Ebrahimi /* A static buffer is used. Returns the number of bytes. */
571*22dc650dSSadaf Ebrahimi 
572*22dc650dSSadaf Ebrahimi static int
ord2utf8(uint32_t value)573*22dc650dSSadaf Ebrahimi ord2utf8(uint32_t value)
574*22dc650dSSadaf Ebrahimi {
575*22dc650dSSadaf Ebrahimi int i, j;
576*22dc650dSSadaf Ebrahimi uint8_t *utf8bytes = utf8_buffer;
577*22dc650dSSadaf Ebrahimi for (i = 0; i < utf8_table1_size; i++)
578*22dc650dSSadaf Ebrahimi   if (value <= (uint32_t)utf8_table1[i]) break;
579*22dc650dSSadaf Ebrahimi utf8bytes += i;
580*22dc650dSSadaf Ebrahimi for (j = i; j > 0; j--)
581*22dc650dSSadaf Ebrahimi   {
582*22dc650dSSadaf Ebrahimi   *utf8bytes-- = 0x80 | (value & 0x3f);
583*22dc650dSSadaf Ebrahimi   value >>= 6;
584*22dc650dSSadaf Ebrahimi   }
585*22dc650dSSadaf Ebrahimi *utf8bytes = utf8_table2[i] | value;
586*22dc650dSSadaf Ebrahimi return i + 1;
587*22dc650dSSadaf Ebrahimi }
588*22dc650dSSadaf Ebrahimi 
589*22dc650dSSadaf Ebrahimi 
590*22dc650dSSadaf Ebrahimi 
591*22dc650dSSadaf Ebrahimi /*************************************************
592*22dc650dSSadaf Ebrahimi *         Case-independent string compare        *
593*22dc650dSSadaf Ebrahimi *************************************************/
594*22dc650dSSadaf Ebrahimi 
595*22dc650dSSadaf Ebrahimi static int
strcmpic(const char * str1,const char * str2)596*22dc650dSSadaf Ebrahimi strcmpic(const char *str1, const char *str2)
597*22dc650dSSadaf Ebrahimi {
598*22dc650dSSadaf Ebrahimi unsigned int c1, c2;
599*22dc650dSSadaf Ebrahimi while (*str1 != '\0' || *str2 != '\0')
600*22dc650dSSadaf Ebrahimi   {
601*22dc650dSSadaf Ebrahimi   c1 = tolower(*str1++);
602*22dc650dSSadaf Ebrahimi   c2 = tolower(*str2++);
603*22dc650dSSadaf Ebrahimi   if (c1 != c2) return ((c1 > c2) << 1) - 1;
604*22dc650dSSadaf Ebrahimi   }
605*22dc650dSSadaf Ebrahimi return 0;
606*22dc650dSSadaf Ebrahimi }
607*22dc650dSSadaf Ebrahimi 
608*22dc650dSSadaf Ebrahimi 
609*22dc650dSSadaf Ebrahimi /*************************************************
610*22dc650dSSadaf Ebrahimi *         Parse GREP_COLORS                      *
611*22dc650dSSadaf Ebrahimi *************************************************/
612*22dc650dSSadaf Ebrahimi 
613*22dc650dSSadaf Ebrahimi /* Extract ms or mt from GREP_COLORS.
614*22dc650dSSadaf Ebrahimi 
615*22dc650dSSadaf Ebrahimi Argument:  the string, possibly NULL
616*22dc650dSSadaf Ebrahimi Returns:   the value of ms or mt, or NULL if neither present
617*22dc650dSSadaf Ebrahimi */
618*22dc650dSSadaf Ebrahimi 
619*22dc650dSSadaf Ebrahimi static char *
parse_grep_colors(const char * gc)620*22dc650dSSadaf Ebrahimi parse_grep_colors(const char *gc)
621*22dc650dSSadaf Ebrahimi {
622*22dc650dSSadaf Ebrahimi static char seq[16];
623*22dc650dSSadaf Ebrahimi char *col;
624*22dc650dSSadaf Ebrahimi uint32_t len;
625*22dc650dSSadaf Ebrahimi if (gc == NULL) return NULL;
626*22dc650dSSadaf Ebrahimi col = strstr(gc, "ms=");
627*22dc650dSSadaf Ebrahimi if (col == NULL) col = strstr(gc, "mt=");
628*22dc650dSSadaf Ebrahimi if (col == NULL) return NULL;
629*22dc650dSSadaf Ebrahimi len = 0;
630*22dc650dSSadaf Ebrahimi col += 3;
631*22dc650dSSadaf Ebrahimi while (*col != ':' && *col != 0 && len < sizeof(seq)-1)
632*22dc650dSSadaf Ebrahimi   seq[len++] = *col++;
633*22dc650dSSadaf Ebrahimi seq[len] = 0;
634*22dc650dSSadaf Ebrahimi return seq;
635*22dc650dSSadaf Ebrahimi }
636*22dc650dSSadaf Ebrahimi 
637*22dc650dSSadaf Ebrahimi 
638*22dc650dSSadaf Ebrahimi /*************************************************
639*22dc650dSSadaf Ebrahimi *         Exit from the program                  *
640*22dc650dSSadaf Ebrahimi *************************************************/
641*22dc650dSSadaf Ebrahimi 
642*22dc650dSSadaf Ebrahimi /* If there has been a resource error, give a suitable message.
643*22dc650dSSadaf Ebrahimi 
644*22dc650dSSadaf Ebrahimi Argument:  the return code
645*22dc650dSSadaf Ebrahimi Returns:   does not return
646*22dc650dSSadaf Ebrahimi */
647*22dc650dSSadaf Ebrahimi 
648*22dc650dSSadaf Ebrahimi static void
pcre2grep_exit(int rc)649*22dc650dSSadaf Ebrahimi pcre2grep_exit(int rc)
650*22dc650dSSadaf Ebrahimi {
651*22dc650dSSadaf Ebrahimi /* VMS does exit codes differently: both exit(1) and exit(0) return with a
652*22dc650dSSadaf Ebrahimi status of 1, which is not helpful. To help with this problem, define a symbol
653*22dc650dSSadaf Ebrahimi (akin to an environment variable) called "PCRE2GREP_RC" and put the exit code
654*22dc650dSSadaf Ebrahimi therein. */
655*22dc650dSSadaf Ebrahimi 
656*22dc650dSSadaf Ebrahimi #ifdef __VMS
657*22dc650dSSadaf Ebrahimi   char val_buf[4];
658*22dc650dSSadaf Ebrahimi   $DESCRIPTOR(sym_nam, "PCRE2GREP_RC");
659*22dc650dSSadaf Ebrahimi   $DESCRIPTOR(sym_val, val_buf);
660*22dc650dSSadaf Ebrahimi   sprintf(val_buf, "%d", rc);
661*22dc650dSSadaf Ebrahimi   sym_val.dsc$w_length = strlen(val_buf);
662*22dc650dSSadaf Ebrahimi   lib$set_symbol(&sym_nam, &sym_val);
663*22dc650dSSadaf Ebrahimi #endif
664*22dc650dSSadaf Ebrahimi 
665*22dc650dSSadaf Ebrahimi if (resource_error)
666*22dc650dSSadaf Ebrahimi   {
667*22dc650dSSadaf Ebrahimi   fprintf(stderr, "pcre2grep: Error %d, %d, %d or %d means that a resource "
668*22dc650dSSadaf Ebrahimi     "limit was exceeded.\n", PCRE2_ERROR_JIT_STACKLIMIT, PCRE2_ERROR_MATCHLIMIT,
669*22dc650dSSadaf Ebrahimi     PCRE2_ERROR_DEPTHLIMIT, PCRE2_ERROR_HEAPLIMIT);
670*22dc650dSSadaf Ebrahimi   fprintf(stderr, "pcre2grep: Check your regex for nested unlimited loops.\n");
671*22dc650dSSadaf Ebrahimi   }
672*22dc650dSSadaf Ebrahimi exit(rc);
673*22dc650dSSadaf Ebrahimi }
674*22dc650dSSadaf Ebrahimi 
675*22dc650dSSadaf Ebrahimi 
676*22dc650dSSadaf Ebrahimi /*************************************************
677*22dc650dSSadaf Ebrahimi *          Add item to chain of patterns         *
678*22dc650dSSadaf Ebrahimi *************************************************/
679*22dc650dSSadaf Ebrahimi 
680*22dc650dSSadaf Ebrahimi /* Used to add an item onto a chain, or just return an unconnected item if the
681*22dc650dSSadaf Ebrahimi "after" argument is NULL.
682*22dc650dSSadaf Ebrahimi 
683*22dc650dSSadaf Ebrahimi Arguments:
684*22dc650dSSadaf Ebrahimi   s          pattern string to add
685*22dc650dSSadaf Ebrahimi   patlen     length of pattern
686*22dc650dSSadaf Ebrahimi   after      if not NULL points to item to insert after
687*22dc650dSSadaf Ebrahimi 
688*22dc650dSSadaf Ebrahimi Returns:     new pattern block or NULL on error
689*22dc650dSSadaf Ebrahimi */
690*22dc650dSSadaf Ebrahimi 
691*22dc650dSSadaf Ebrahimi static patstr *
add_pattern(char * s,PCRE2_SIZE patlen,patstr * after)692*22dc650dSSadaf Ebrahimi add_pattern(char *s, PCRE2_SIZE patlen, patstr *after)
693*22dc650dSSadaf Ebrahimi {
694*22dc650dSSadaf Ebrahimi patstr *p = (patstr *)malloc(sizeof(patstr));
695*22dc650dSSadaf Ebrahimi 
696*22dc650dSSadaf Ebrahimi /* LCOV_EXCL_START - These won't be hit in normal testing. */
697*22dc650dSSadaf Ebrahimi 
698*22dc650dSSadaf Ebrahimi if (p == NULL)
699*22dc650dSSadaf Ebrahimi   {
700*22dc650dSSadaf Ebrahimi   fprintf(stderr, "pcre2grep: malloc failed\n");
701*22dc650dSSadaf Ebrahimi   pcre2grep_exit(2);
702*22dc650dSSadaf Ebrahimi   }
703*22dc650dSSadaf Ebrahimi if (patlen > MAXPATLEN)
704*22dc650dSSadaf Ebrahimi   {
705*22dc650dSSadaf Ebrahimi   fprintf(stderr, "pcre2grep: pattern is too long (limit is %d bytes)\n",
706*22dc650dSSadaf Ebrahimi     MAXPATLEN);
707*22dc650dSSadaf Ebrahimi   free(p);
708*22dc650dSSadaf Ebrahimi   return NULL;
709*22dc650dSSadaf Ebrahimi   }
710*22dc650dSSadaf Ebrahimi 
711*22dc650dSSadaf Ebrahimi /* LCOV_EXCL_STOP */
712*22dc650dSSadaf Ebrahimi 
713*22dc650dSSadaf Ebrahimi p->next = NULL;
714*22dc650dSSadaf Ebrahimi p->string = s;
715*22dc650dSSadaf Ebrahimi p->length = patlen;
716*22dc650dSSadaf Ebrahimi p->compiled = NULL;
717*22dc650dSSadaf Ebrahimi 
718*22dc650dSSadaf Ebrahimi if (after != NULL)
719*22dc650dSSadaf Ebrahimi   {
720*22dc650dSSadaf Ebrahimi   p->next = after->next;
721*22dc650dSSadaf Ebrahimi   after->next = p;
722*22dc650dSSadaf Ebrahimi   }
723*22dc650dSSadaf Ebrahimi return p;
724*22dc650dSSadaf Ebrahimi }
725*22dc650dSSadaf Ebrahimi 
726*22dc650dSSadaf Ebrahimi 
727*22dc650dSSadaf Ebrahimi /*************************************************
728*22dc650dSSadaf Ebrahimi *           Free chain of patterns               *
729*22dc650dSSadaf Ebrahimi *************************************************/
730*22dc650dSSadaf Ebrahimi 
731*22dc650dSSadaf Ebrahimi /* Used for several chains of patterns.
732*22dc650dSSadaf Ebrahimi 
733*22dc650dSSadaf Ebrahimi Argument: pointer to start of chain
734*22dc650dSSadaf Ebrahimi Returns:  nothing
735*22dc650dSSadaf Ebrahimi */
736*22dc650dSSadaf Ebrahimi 
737*22dc650dSSadaf Ebrahimi static void
free_pattern_chain(patstr * pc)738*22dc650dSSadaf Ebrahimi free_pattern_chain(patstr *pc)
739*22dc650dSSadaf Ebrahimi {
740*22dc650dSSadaf Ebrahimi while (pc != NULL)
741*22dc650dSSadaf Ebrahimi   {
742*22dc650dSSadaf Ebrahimi   patstr *p = pc;
743*22dc650dSSadaf Ebrahimi   pc = p->next;
744*22dc650dSSadaf Ebrahimi   if (p->compiled != NULL) pcre2_code_free(p->compiled);
745*22dc650dSSadaf Ebrahimi   free(p);
746*22dc650dSSadaf Ebrahimi   }
747*22dc650dSSadaf Ebrahimi }
748*22dc650dSSadaf Ebrahimi 
749*22dc650dSSadaf Ebrahimi 
750*22dc650dSSadaf Ebrahimi /*************************************************
751*22dc650dSSadaf Ebrahimi *           Free chain of file names             *
752*22dc650dSSadaf Ebrahimi *************************************************/
753*22dc650dSSadaf Ebrahimi 
754*22dc650dSSadaf Ebrahimi /*
755*22dc650dSSadaf Ebrahimi Argument: pointer to start of chain
756*22dc650dSSadaf Ebrahimi Returns:  nothing
757*22dc650dSSadaf Ebrahimi */
758*22dc650dSSadaf Ebrahimi 
759*22dc650dSSadaf Ebrahimi static void
free_file_chain(fnstr * fn)760*22dc650dSSadaf Ebrahimi free_file_chain(fnstr *fn)
761*22dc650dSSadaf Ebrahimi {
762*22dc650dSSadaf Ebrahimi while (fn != NULL)
763*22dc650dSSadaf Ebrahimi   {
764*22dc650dSSadaf Ebrahimi   fnstr *f = fn;
765*22dc650dSSadaf Ebrahimi   fn = f->next;
766*22dc650dSSadaf Ebrahimi   free(f);
767*22dc650dSSadaf Ebrahimi   }
768*22dc650dSSadaf Ebrahimi }
769*22dc650dSSadaf Ebrahimi 
770*22dc650dSSadaf Ebrahimi 
771*22dc650dSSadaf Ebrahimi /*************************************************
772*22dc650dSSadaf Ebrahimi *            OS-specific functions               *
773*22dc650dSSadaf Ebrahimi *************************************************/
774*22dc650dSSadaf Ebrahimi 
775*22dc650dSSadaf Ebrahimi /* These definitions are needed in all Windows environments, even those where
776*22dc650dSSadaf Ebrahimi Unix-style directory scanning can be used (see below). */
777*22dc650dSSadaf Ebrahimi 
778*22dc650dSSadaf Ebrahimi #ifdef WIN32
779*22dc650dSSadaf Ebrahimi 
780*22dc650dSSadaf Ebrahimi #ifndef STRICT
781*22dc650dSSadaf Ebrahimi # define STRICT
782*22dc650dSSadaf Ebrahimi #endif
783*22dc650dSSadaf Ebrahimi #ifndef WIN32_LEAN_AND_MEAN
784*22dc650dSSadaf Ebrahimi # define WIN32_LEAN_AND_MEAN
785*22dc650dSSadaf Ebrahimi #endif
786*22dc650dSSadaf Ebrahimi 
787*22dc650dSSadaf Ebrahimi #include <windows.h>
788*22dc650dSSadaf Ebrahimi 
789*22dc650dSSadaf Ebrahimi #define iswild(name) (strpbrk(name, "*?") != NULL)
790*22dc650dSSadaf Ebrahimi 
791*22dc650dSSadaf Ebrahimi /* Convert ANSI BGR format to RGB used by Windows */
792*22dc650dSSadaf Ebrahimi #define BGR_RGB(x) (((x) & 1 ? 4 : 0) | ((x) & 2) | ((x) & 4 ? 1 : 0))
793*22dc650dSSadaf Ebrahimi 
794*22dc650dSSadaf Ebrahimi static HANDLE hstdout;
795*22dc650dSSadaf Ebrahimi static CONSOLE_SCREEN_BUFFER_INFO csbi;
796*22dc650dSSadaf Ebrahimi static WORD match_colour;
797*22dc650dSSadaf Ebrahimi 
798*22dc650dSSadaf Ebrahimi static WORD
decode_ANSI_colour(const char * cs)799*22dc650dSSadaf Ebrahimi decode_ANSI_colour(const char *cs)
800*22dc650dSSadaf Ebrahimi {
801*22dc650dSSadaf Ebrahimi WORD result = csbi.wAttributes;
802*22dc650dSSadaf Ebrahimi while (*cs)
803*22dc650dSSadaf Ebrahimi   {
804*22dc650dSSadaf Ebrahimi   if (isdigit((unsigned char)(*cs)))
805*22dc650dSSadaf Ebrahimi     {
806*22dc650dSSadaf Ebrahimi     int code = atoi(cs);
807*22dc650dSSadaf Ebrahimi     if (code == 1) result |= 0x08;
808*22dc650dSSadaf Ebrahimi     else if (code == 4) result |= 0x8000;
809*22dc650dSSadaf Ebrahimi     else if (code == 5) result |= 0x80;
810*22dc650dSSadaf Ebrahimi     else if (code >= 30 && code <= 37) result = (result & 0xF8) | BGR_RGB(code - 30);
811*22dc650dSSadaf Ebrahimi     else if (code == 39) result = (result & 0xF0) | (csbi.wAttributes & 0x0F);
812*22dc650dSSadaf Ebrahimi     else if (code >= 40 && code <= 47) result = (result & 0x8F) | (BGR_RGB(code - 40) << 4);
813*22dc650dSSadaf Ebrahimi     else if (code == 49) result = (result & 0x0F) | (csbi.wAttributes & 0xF0);
814*22dc650dSSadaf Ebrahimi     /* aixterm high intensity colour codes */
815*22dc650dSSadaf Ebrahimi     else if (code >= 90 && code <= 97) result = (result & 0xF0) | BGR_RGB(code - 90) | 0x08;
816*22dc650dSSadaf Ebrahimi     else if (code >= 100 && code <= 107) result = (result & 0x0F) | (BGR_RGB(code - 100) << 4) | 0x80;
817*22dc650dSSadaf Ebrahimi 
818*22dc650dSSadaf Ebrahimi     while (isdigit((unsigned char)(*cs))) cs++;
819*22dc650dSSadaf Ebrahimi     }
820*22dc650dSSadaf Ebrahimi   if (*cs) cs++;
821*22dc650dSSadaf Ebrahimi   }
822*22dc650dSSadaf Ebrahimi return result;
823*22dc650dSSadaf Ebrahimi }
824*22dc650dSSadaf Ebrahimi 
825*22dc650dSSadaf Ebrahimi 
826*22dc650dSSadaf Ebrahimi static void
init_colour_output()827*22dc650dSSadaf Ebrahimi init_colour_output()
828*22dc650dSSadaf Ebrahimi {
829*22dc650dSSadaf Ebrahimi if (do_colour)
830*22dc650dSSadaf Ebrahimi   {
831*22dc650dSSadaf Ebrahimi   hstdout = GetStdHandle(STD_OUTPUT_HANDLE);
832*22dc650dSSadaf Ebrahimi   /* This fails when redirected to con; try again if so. */
833*22dc650dSSadaf Ebrahimi   if (!GetConsoleScreenBufferInfo(hstdout, &csbi) && !do_ansi)
834*22dc650dSSadaf Ebrahimi     {
835*22dc650dSSadaf Ebrahimi     HANDLE hcon = CreateFile("CONOUT$", GENERIC_READ | GENERIC_WRITE,
836*22dc650dSSadaf Ebrahimi       FILE_SHARE_WRITE, NULL, OPEN_EXISTING, 0, NULL);
837*22dc650dSSadaf Ebrahimi     GetConsoleScreenBufferInfo(hcon, &csbi);
838*22dc650dSSadaf Ebrahimi     CloseHandle(hcon);
839*22dc650dSSadaf Ebrahimi     }
840*22dc650dSSadaf Ebrahimi   match_colour = decode_ANSI_colour(colour_string);
841*22dc650dSSadaf Ebrahimi   /* No valid colour found - turn off colouring */
842*22dc650dSSadaf Ebrahimi   if (!match_colour) do_colour = FALSE;
843*22dc650dSSadaf Ebrahimi   }
844*22dc650dSSadaf Ebrahimi }
845*22dc650dSSadaf Ebrahimi 
846*22dc650dSSadaf Ebrahimi #endif  /* WIN32 */
847*22dc650dSSadaf Ebrahimi 
848*22dc650dSSadaf Ebrahimi 
849*22dc650dSSadaf Ebrahimi /* The following sets of functions are defined so that they can be made system
850*22dc650dSSadaf Ebrahimi specific. At present there are versions for Unix-style environments, Windows,
851*22dc650dSSadaf Ebrahimi native z/OS, and "no support". */
852*22dc650dSSadaf Ebrahimi 
853*22dc650dSSadaf Ebrahimi 
854*22dc650dSSadaf Ebrahimi /************* Directory scanning Unix-style and z/OS ***********/
855*22dc650dSSadaf Ebrahimi 
856*22dc650dSSadaf Ebrahimi #if (defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H) || defined NATIVE_ZOS
857*22dc650dSSadaf Ebrahimi #include <sys/types.h>
858*22dc650dSSadaf Ebrahimi #include <sys/stat.h>
859*22dc650dSSadaf Ebrahimi #include <dirent.h>
860*22dc650dSSadaf Ebrahimi 
861*22dc650dSSadaf Ebrahimi #if defined NATIVE_ZOS
862*22dc650dSSadaf Ebrahimi /************* Directory and PDS/E scanning for z/OS ***********/
863*22dc650dSSadaf Ebrahimi /************* z/OS looks mostly like Unix with USS ************/
864*22dc650dSSadaf Ebrahimi /* However, z/OS needs the #include statements in this header */
865*22dc650dSSadaf Ebrahimi #include "pcrzosfs.h"
866*22dc650dSSadaf Ebrahimi /* That header is not included in the main PCRE distribution because
867*22dc650dSSadaf Ebrahimi    other apparatus is needed to compile pcre2grep for z/OS. The header
868*22dc650dSSadaf Ebrahimi    can be found in the special z/OS distribution, which is available
869*22dc650dSSadaf Ebrahimi    from www.zaconsultants.net or from www.cbttape.org. */
870*22dc650dSSadaf Ebrahimi #endif
871*22dc650dSSadaf Ebrahimi 
872*22dc650dSSadaf Ebrahimi typedef DIR directory_type;
873*22dc650dSSadaf Ebrahimi #define FILESEP '/'
874*22dc650dSSadaf Ebrahimi 
875*22dc650dSSadaf Ebrahimi static int
isdirectory(char * filename)876*22dc650dSSadaf Ebrahimi isdirectory(char *filename)
877*22dc650dSSadaf Ebrahimi {
878*22dc650dSSadaf Ebrahimi struct stat statbuf;
879*22dc650dSSadaf Ebrahimi if (stat(filename, &statbuf) < 0)
880*22dc650dSSadaf Ebrahimi   return 0;        /* In the expectation that opening as a file will fail */
881*22dc650dSSadaf Ebrahimi return S_ISDIR(statbuf.st_mode);
882*22dc650dSSadaf Ebrahimi }
883*22dc650dSSadaf Ebrahimi 
884*22dc650dSSadaf Ebrahimi static directory_type *
opendirectory(char * filename)885*22dc650dSSadaf Ebrahimi opendirectory(char *filename)
886*22dc650dSSadaf Ebrahimi {
887*22dc650dSSadaf Ebrahimi return opendir(filename);
888*22dc650dSSadaf Ebrahimi }
889*22dc650dSSadaf Ebrahimi 
890*22dc650dSSadaf Ebrahimi static char *
readdirectory(directory_type * dir)891*22dc650dSSadaf Ebrahimi readdirectory(directory_type *dir)
892*22dc650dSSadaf Ebrahimi {
893*22dc650dSSadaf Ebrahimi for (;;)
894*22dc650dSSadaf Ebrahimi   {
895*22dc650dSSadaf Ebrahimi   struct dirent *dent = readdir(dir);
896*22dc650dSSadaf Ebrahimi   if (dent == NULL) return NULL;
897*22dc650dSSadaf Ebrahimi   if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
898*22dc650dSSadaf Ebrahimi     return dent->d_name;
899*22dc650dSSadaf Ebrahimi   }
900*22dc650dSSadaf Ebrahimi /* Control never reaches here */
901*22dc650dSSadaf Ebrahimi }
902*22dc650dSSadaf Ebrahimi 
903*22dc650dSSadaf Ebrahimi static void
closedirectory(directory_type * dir)904*22dc650dSSadaf Ebrahimi closedirectory(directory_type *dir)
905*22dc650dSSadaf Ebrahimi {
906*22dc650dSSadaf Ebrahimi closedir(dir);
907*22dc650dSSadaf Ebrahimi }
908*22dc650dSSadaf Ebrahimi 
909*22dc650dSSadaf Ebrahimi 
910*22dc650dSSadaf Ebrahimi /************* Test for regular file, Unix-style **********/
911*22dc650dSSadaf Ebrahimi 
912*22dc650dSSadaf Ebrahimi static int
isregfile(char * filename)913*22dc650dSSadaf Ebrahimi isregfile(char *filename)
914*22dc650dSSadaf Ebrahimi {
915*22dc650dSSadaf Ebrahimi struct stat statbuf;
916*22dc650dSSadaf Ebrahimi if (stat(filename, &statbuf) < 0)
917*22dc650dSSadaf Ebrahimi   return 1;        /* In the expectation that opening as a file will fail */
918*22dc650dSSadaf Ebrahimi return S_ISREG(statbuf.st_mode);
919*22dc650dSSadaf Ebrahimi }
920*22dc650dSSadaf Ebrahimi 
921*22dc650dSSadaf Ebrahimi 
922*22dc650dSSadaf Ebrahimi #if defined NATIVE_ZOS
923*22dc650dSSadaf Ebrahimi /************* Test for a terminal in z/OS **********/
924*22dc650dSSadaf Ebrahimi /* isatty() does not work in a TSO environment, so always give FALSE.*/
925*22dc650dSSadaf Ebrahimi 
926*22dc650dSSadaf Ebrahimi static BOOL
is_stdout_tty(void)927*22dc650dSSadaf Ebrahimi is_stdout_tty(void)
928*22dc650dSSadaf Ebrahimi {
929*22dc650dSSadaf Ebrahimi return FALSE;
930*22dc650dSSadaf Ebrahimi }
931*22dc650dSSadaf Ebrahimi 
932*22dc650dSSadaf Ebrahimi static BOOL
is_file_tty(FILE * f)933*22dc650dSSadaf Ebrahimi is_file_tty(FILE *f)
934*22dc650dSSadaf Ebrahimi {
935*22dc650dSSadaf Ebrahimi return FALSE;
936*22dc650dSSadaf Ebrahimi }
937*22dc650dSSadaf Ebrahimi 
938*22dc650dSSadaf Ebrahimi 
939*22dc650dSSadaf Ebrahimi /************* Test for a terminal, Unix-style **********/
940*22dc650dSSadaf Ebrahimi 
941*22dc650dSSadaf Ebrahimi #else
942*22dc650dSSadaf Ebrahimi static BOOL
is_stdout_tty(void)943*22dc650dSSadaf Ebrahimi is_stdout_tty(void)
944*22dc650dSSadaf Ebrahimi {
945*22dc650dSSadaf Ebrahimi return isatty(fileno(stdout));
946*22dc650dSSadaf Ebrahimi }
947*22dc650dSSadaf Ebrahimi 
948*22dc650dSSadaf Ebrahimi static BOOL
is_file_tty(FILE * f)949*22dc650dSSadaf Ebrahimi is_file_tty(FILE *f)
950*22dc650dSSadaf Ebrahimi {
951*22dc650dSSadaf Ebrahimi return isatty(fileno(f));
952*22dc650dSSadaf Ebrahimi }
953*22dc650dSSadaf Ebrahimi #endif
954*22dc650dSSadaf Ebrahimi 
955*22dc650dSSadaf Ebrahimi 
956*22dc650dSSadaf Ebrahimi /************* Print optionally coloured match Unix-style and z/OS **********/
957*22dc650dSSadaf Ebrahimi 
958*22dc650dSSadaf Ebrahimi static void
print_match(const void * buf,int length)959*22dc650dSSadaf Ebrahimi print_match(const void *buf, int length)
960*22dc650dSSadaf Ebrahimi {
961*22dc650dSSadaf Ebrahimi if (length == 0) return;
962*22dc650dSSadaf Ebrahimi if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
963*22dc650dSSadaf Ebrahimi FWRITE_IGNORE(buf, 1, length, stdout);
964*22dc650dSSadaf Ebrahimi if (do_colour) fprintf(stdout, "%c[0m", 0x1b);
965*22dc650dSSadaf Ebrahimi }
966*22dc650dSSadaf Ebrahimi 
967*22dc650dSSadaf Ebrahimi /* End of Unix-style or native z/OS environment functions. */
968*22dc650dSSadaf Ebrahimi 
969*22dc650dSSadaf Ebrahimi 
970*22dc650dSSadaf Ebrahimi /************* Directory scanning in Windows ***********/
971*22dc650dSSadaf Ebrahimi 
972*22dc650dSSadaf Ebrahimi /* I (Philip Hazel) have no means of testing this code. It was contributed by
973*22dc650dSSadaf Ebrahimi Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
974*22dc650dSSadaf Ebrahimi when it did not exist. David Byron added a patch that moved the #include of
975*22dc650dSSadaf Ebrahimi <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
976*22dc650dSSadaf Ebrahimi */
977*22dc650dSSadaf Ebrahimi 
978*22dc650dSSadaf Ebrahimi #elif defined WIN32
979*22dc650dSSadaf Ebrahimi 
980*22dc650dSSadaf Ebrahimi #ifndef INVALID_FILE_ATTRIBUTES
981*22dc650dSSadaf Ebrahimi #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
982*22dc650dSSadaf Ebrahimi #endif
983*22dc650dSSadaf Ebrahimi 
984*22dc650dSSadaf Ebrahimi typedef struct directory_type
985*22dc650dSSadaf Ebrahimi {
986*22dc650dSSadaf Ebrahimi HANDLE handle;
987*22dc650dSSadaf Ebrahimi BOOL first;
988*22dc650dSSadaf Ebrahimi WIN32_FIND_DATA data;
989*22dc650dSSadaf Ebrahimi } directory_type;
990*22dc650dSSadaf Ebrahimi 
991*22dc650dSSadaf Ebrahimi #define FILESEP '/'
992*22dc650dSSadaf Ebrahimi 
993*22dc650dSSadaf Ebrahimi int
isdirectory(char * filename)994*22dc650dSSadaf Ebrahimi isdirectory(char *filename)
995*22dc650dSSadaf Ebrahimi {
996*22dc650dSSadaf Ebrahimi DWORD attr = GetFileAttributes(filename);
997*22dc650dSSadaf Ebrahimi if (attr == INVALID_FILE_ATTRIBUTES)
998*22dc650dSSadaf Ebrahimi   return 0;
999*22dc650dSSadaf Ebrahimi return (attr & FILE_ATTRIBUTE_DIRECTORY) != 0;
1000*22dc650dSSadaf Ebrahimi }
1001*22dc650dSSadaf Ebrahimi 
1002*22dc650dSSadaf Ebrahimi directory_type *
opendirectory(char * filename)1003*22dc650dSSadaf Ebrahimi opendirectory(char *filename)
1004*22dc650dSSadaf Ebrahimi {
1005*22dc650dSSadaf Ebrahimi size_t len;
1006*22dc650dSSadaf Ebrahimi char *pattern;
1007*22dc650dSSadaf Ebrahimi directory_type *dir;
1008*22dc650dSSadaf Ebrahimi DWORD err;
1009*22dc650dSSadaf Ebrahimi len = strlen(filename);
1010*22dc650dSSadaf Ebrahimi pattern = (char *)malloc(len + 3);
1011*22dc650dSSadaf Ebrahimi dir = (directory_type *)malloc(sizeof(*dir));
1012*22dc650dSSadaf Ebrahimi if ((pattern == NULL) || (dir == NULL))
1013*22dc650dSSadaf Ebrahimi   {
1014*22dc650dSSadaf Ebrahimi   fprintf(stderr, "pcre2grep: malloc failed\n");
1015*22dc650dSSadaf Ebrahimi   pcre2grep_exit(2);
1016*22dc650dSSadaf Ebrahimi   }
1017*22dc650dSSadaf Ebrahimi memcpy(pattern, filename, len);
1018*22dc650dSSadaf Ebrahimi if (iswild(filename))
1019*22dc650dSSadaf Ebrahimi   pattern[len] = 0;
1020*22dc650dSSadaf Ebrahimi else
1021*22dc650dSSadaf Ebrahimi   memcpy(&(pattern[len]), "\\*", 3);
1022*22dc650dSSadaf Ebrahimi dir->handle = FindFirstFile(pattern, &(dir->data));
1023*22dc650dSSadaf Ebrahimi if (dir->handle != INVALID_HANDLE_VALUE)
1024*22dc650dSSadaf Ebrahimi   {
1025*22dc650dSSadaf Ebrahimi   free(pattern);
1026*22dc650dSSadaf Ebrahimi   dir->first = TRUE;
1027*22dc650dSSadaf Ebrahimi   return dir;
1028*22dc650dSSadaf Ebrahimi   }
1029*22dc650dSSadaf Ebrahimi err = GetLastError();
1030*22dc650dSSadaf Ebrahimi free(pattern);
1031*22dc650dSSadaf Ebrahimi free(dir);
1032*22dc650dSSadaf Ebrahimi errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
1033*22dc650dSSadaf Ebrahimi return NULL;
1034*22dc650dSSadaf Ebrahimi }
1035*22dc650dSSadaf Ebrahimi 
1036*22dc650dSSadaf Ebrahimi char *
readdirectory(directory_type * dir)1037*22dc650dSSadaf Ebrahimi readdirectory(directory_type *dir)
1038*22dc650dSSadaf Ebrahimi {
1039*22dc650dSSadaf Ebrahimi for (;;)
1040*22dc650dSSadaf Ebrahimi   {
1041*22dc650dSSadaf Ebrahimi   if (!dir->first)
1042*22dc650dSSadaf Ebrahimi     {
1043*22dc650dSSadaf Ebrahimi     if (!FindNextFile(dir->handle, &(dir->data)))
1044*22dc650dSSadaf Ebrahimi       return NULL;
1045*22dc650dSSadaf Ebrahimi     }
1046*22dc650dSSadaf Ebrahimi   else
1047*22dc650dSSadaf Ebrahimi     {
1048*22dc650dSSadaf Ebrahimi     dir->first = FALSE;
1049*22dc650dSSadaf Ebrahimi     }
1050*22dc650dSSadaf Ebrahimi   if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
1051*22dc650dSSadaf Ebrahimi     return dir->data.cFileName;
1052*22dc650dSSadaf Ebrahimi   }
1053*22dc650dSSadaf Ebrahimi #ifndef _MSC_VER
1054*22dc650dSSadaf Ebrahimi return NULL;   /* Keep compiler happy; never executed */
1055*22dc650dSSadaf Ebrahimi #endif
1056*22dc650dSSadaf Ebrahimi }
1057*22dc650dSSadaf Ebrahimi 
1058*22dc650dSSadaf Ebrahimi void
closedirectory(directory_type * dir)1059*22dc650dSSadaf Ebrahimi closedirectory(directory_type *dir)
1060*22dc650dSSadaf Ebrahimi {
1061*22dc650dSSadaf Ebrahimi FindClose(dir->handle);
1062*22dc650dSSadaf Ebrahimi free(dir);
1063*22dc650dSSadaf Ebrahimi }
1064*22dc650dSSadaf Ebrahimi 
1065*22dc650dSSadaf Ebrahimi 
1066*22dc650dSSadaf Ebrahimi /************* Test for regular file in Windows **********/
1067*22dc650dSSadaf Ebrahimi 
1068*22dc650dSSadaf Ebrahimi /* I don't know how to do this, or if it can be done; assume all paths are
1069*22dc650dSSadaf Ebrahimi regular if they are not directories. */
1070*22dc650dSSadaf Ebrahimi 
isregfile(char * filename)1071*22dc650dSSadaf Ebrahimi int isregfile(char *filename)
1072*22dc650dSSadaf Ebrahimi {
1073*22dc650dSSadaf Ebrahimi return !isdirectory(filename);
1074*22dc650dSSadaf Ebrahimi }
1075*22dc650dSSadaf Ebrahimi 
1076*22dc650dSSadaf Ebrahimi 
1077*22dc650dSSadaf Ebrahimi /************* Test for a terminal in Windows **********/
1078*22dc650dSSadaf Ebrahimi 
1079*22dc650dSSadaf Ebrahimi static BOOL
is_stdout_tty(void)1080*22dc650dSSadaf Ebrahimi is_stdout_tty(void)
1081*22dc650dSSadaf Ebrahimi {
1082*22dc650dSSadaf Ebrahimi return _isatty(_fileno(stdout));
1083*22dc650dSSadaf Ebrahimi }
1084*22dc650dSSadaf Ebrahimi 
1085*22dc650dSSadaf Ebrahimi static BOOL
is_file_tty(FILE * f)1086*22dc650dSSadaf Ebrahimi is_file_tty(FILE *f)
1087*22dc650dSSadaf Ebrahimi {
1088*22dc650dSSadaf Ebrahimi return _isatty(_fileno(f));
1089*22dc650dSSadaf Ebrahimi }
1090*22dc650dSSadaf Ebrahimi 
1091*22dc650dSSadaf Ebrahimi 
1092*22dc650dSSadaf Ebrahimi /************* Print optionally coloured match in Windows **********/
1093*22dc650dSSadaf Ebrahimi 
1094*22dc650dSSadaf Ebrahimi static void
print_match(const void * buf,int length)1095*22dc650dSSadaf Ebrahimi print_match(const void *buf, int length)
1096*22dc650dSSadaf Ebrahimi {
1097*22dc650dSSadaf Ebrahimi if (length == 0) return;
1098*22dc650dSSadaf Ebrahimi if (do_colour)
1099*22dc650dSSadaf Ebrahimi   {
1100*22dc650dSSadaf Ebrahimi   if (do_ansi) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1101*22dc650dSSadaf Ebrahimi     else SetConsoleTextAttribute(hstdout, match_colour);
1102*22dc650dSSadaf Ebrahimi   }
1103*22dc650dSSadaf Ebrahimi FWRITE_IGNORE(buf, 1, length, stdout);
1104*22dc650dSSadaf Ebrahimi if (do_colour)
1105*22dc650dSSadaf Ebrahimi   {
1106*22dc650dSSadaf Ebrahimi   if (do_ansi) fprintf(stdout, "%c[0m", 0x1b);
1107*22dc650dSSadaf Ebrahimi     else SetConsoleTextAttribute(hstdout, csbi.wAttributes);
1108*22dc650dSSadaf Ebrahimi   }
1109*22dc650dSSadaf Ebrahimi }
1110*22dc650dSSadaf Ebrahimi 
1111*22dc650dSSadaf Ebrahimi /* End of Windows functions */
1112*22dc650dSSadaf Ebrahimi 
1113*22dc650dSSadaf Ebrahimi 
1114*22dc650dSSadaf Ebrahimi /************* Directory scanning when we can't do it ***********/
1115*22dc650dSSadaf Ebrahimi 
1116*22dc650dSSadaf Ebrahimi /* The type is void, and apart from isdirectory(), the functions do nothing. */
1117*22dc650dSSadaf Ebrahimi 
1118*22dc650dSSadaf Ebrahimi #else
1119*22dc650dSSadaf Ebrahimi 
1120*22dc650dSSadaf Ebrahimi #define FILESEP 0
1121*22dc650dSSadaf Ebrahimi typedef void directory_type;
1122*22dc650dSSadaf Ebrahimi 
isdirectory(char * filename)1123*22dc650dSSadaf Ebrahimi int isdirectory(char *filename) { return 0; }
opendirectory(char * filename)1124*22dc650dSSadaf Ebrahimi directory_type * opendirectory(char *filename) { return (directory_type*)0;}
readdirectory(directory_type * dir)1125*22dc650dSSadaf Ebrahimi char *readdirectory(directory_type *dir) { return (char*)0;}
closedirectory(directory_type * dir)1126*22dc650dSSadaf Ebrahimi void closedirectory(directory_type *dir) {}
1127*22dc650dSSadaf Ebrahimi 
1128*22dc650dSSadaf Ebrahimi 
1129*22dc650dSSadaf Ebrahimi /************* Test for regular file when we can't do it **********/
1130*22dc650dSSadaf Ebrahimi 
1131*22dc650dSSadaf Ebrahimi /* Assume all files are regular. */
1132*22dc650dSSadaf Ebrahimi 
isregfile(char * filename)1133*22dc650dSSadaf Ebrahimi int isregfile(char *filename) { return 1; }
1134*22dc650dSSadaf Ebrahimi 
1135*22dc650dSSadaf Ebrahimi 
1136*22dc650dSSadaf Ebrahimi /************* Test for a terminal when we can't do it **********/
1137*22dc650dSSadaf Ebrahimi 
1138*22dc650dSSadaf Ebrahimi static BOOL
is_stdout_tty(void)1139*22dc650dSSadaf Ebrahimi is_stdout_tty(void)
1140*22dc650dSSadaf Ebrahimi {
1141*22dc650dSSadaf Ebrahimi return FALSE;
1142*22dc650dSSadaf Ebrahimi }
1143*22dc650dSSadaf Ebrahimi 
1144*22dc650dSSadaf Ebrahimi static BOOL
is_file_tty(FILE * f)1145*22dc650dSSadaf Ebrahimi is_file_tty(FILE *f)
1146*22dc650dSSadaf Ebrahimi {
1147*22dc650dSSadaf Ebrahimi return FALSE;
1148*22dc650dSSadaf Ebrahimi }
1149*22dc650dSSadaf Ebrahimi 
1150*22dc650dSSadaf Ebrahimi 
1151*22dc650dSSadaf Ebrahimi /************* Print optionally coloured match when we can't do it **********/
1152*22dc650dSSadaf Ebrahimi 
1153*22dc650dSSadaf Ebrahimi static void
print_match(const void * buf,int length)1154*22dc650dSSadaf Ebrahimi print_match(const void *buf, int length)
1155*22dc650dSSadaf Ebrahimi {
1156*22dc650dSSadaf Ebrahimi if (length == 0) return;
1157*22dc650dSSadaf Ebrahimi FWRITE_IGNORE(buf, 1, length, stdout);
1158*22dc650dSSadaf Ebrahimi }
1159*22dc650dSSadaf Ebrahimi 
1160*22dc650dSSadaf Ebrahimi #endif  /* End of system-specific functions */
1161*22dc650dSSadaf Ebrahimi 
1162*22dc650dSSadaf Ebrahimi 
1163*22dc650dSSadaf Ebrahimi 
1164*22dc650dSSadaf Ebrahimi #ifndef HAVE_STRERROR
1165*22dc650dSSadaf Ebrahimi /*************************************************
1166*22dc650dSSadaf Ebrahimi *     Provide strerror() for non-ANSI libraries  *
1167*22dc650dSSadaf Ebrahimi *************************************************/
1168*22dc650dSSadaf Ebrahimi 
1169*22dc650dSSadaf Ebrahimi /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1170*22dc650dSSadaf Ebrahimi in their libraries, but can provide the same facility by this simple
1171*22dc650dSSadaf Ebrahimi alternative function. */
1172*22dc650dSSadaf Ebrahimi 
1173*22dc650dSSadaf Ebrahimi extern int   sys_nerr;
1174*22dc650dSSadaf Ebrahimi extern char *sys_errlist[];
1175*22dc650dSSadaf Ebrahimi 
1176*22dc650dSSadaf Ebrahimi char *
strerror(int n)1177*22dc650dSSadaf Ebrahimi strerror(int n)
1178*22dc650dSSadaf Ebrahimi {
1179*22dc650dSSadaf Ebrahimi if (n < 0 || n >= sys_nerr) return "unknown error number";
1180*22dc650dSSadaf Ebrahimi return sys_errlist[n];
1181*22dc650dSSadaf Ebrahimi }
1182*22dc650dSSadaf Ebrahimi #endif /* HAVE_STRERROR */
1183*22dc650dSSadaf Ebrahimi 
1184*22dc650dSSadaf Ebrahimi 
1185*22dc650dSSadaf Ebrahimi 
1186*22dc650dSSadaf Ebrahimi /*************************************************
1187*22dc650dSSadaf Ebrahimi *                Usage function                  *
1188*22dc650dSSadaf Ebrahimi *************************************************/
1189*22dc650dSSadaf Ebrahimi 
1190*22dc650dSSadaf Ebrahimi static int
usage(int rc)1191*22dc650dSSadaf Ebrahimi usage(int rc)
1192*22dc650dSSadaf Ebrahimi {
1193*22dc650dSSadaf Ebrahimi option_item *op;
1194*22dc650dSSadaf Ebrahimi fprintf(stderr, "Usage: pcre2grep [-");
1195*22dc650dSSadaf Ebrahimi for (op = optionlist; op->one_char != 0; op++)
1196*22dc650dSSadaf Ebrahimi   {
1197*22dc650dSSadaf Ebrahimi   if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1198*22dc650dSSadaf Ebrahimi   }
1199*22dc650dSSadaf Ebrahimi fprintf(stderr, "] [long options] [pattern] [files]\n");
1200*22dc650dSSadaf Ebrahimi fprintf(stderr, "Type \"pcre2grep --help\" for more information and the long "
1201*22dc650dSSadaf Ebrahimi   "options.\n");
1202*22dc650dSSadaf Ebrahimi return rc;
1203*22dc650dSSadaf Ebrahimi }
1204*22dc650dSSadaf Ebrahimi 
1205*22dc650dSSadaf Ebrahimi 
1206*22dc650dSSadaf Ebrahimi 
1207*22dc650dSSadaf Ebrahimi /*************************************************
1208*22dc650dSSadaf Ebrahimi *                Help function                   *
1209*22dc650dSSadaf Ebrahimi *************************************************/
1210*22dc650dSSadaf Ebrahimi 
1211*22dc650dSSadaf Ebrahimi static void
help(void)1212*22dc650dSSadaf Ebrahimi help(void)
1213*22dc650dSSadaf Ebrahimi {
1214*22dc650dSSadaf Ebrahimi option_item *op;
1215*22dc650dSSadaf Ebrahimi 
1216*22dc650dSSadaf Ebrahimi printf("Usage: pcre2grep [OPTION]... [PATTERN] [FILE1 FILE2 ...]" STDOUT_NL);
1217*22dc650dSSadaf Ebrahimi printf("Search for PATTERN in each FILE or standard input." STDOUT_NL);
1218*22dc650dSSadaf Ebrahimi printf("PATTERN must be present if neither -e nor -f is used." STDOUT_NL);
1219*22dc650dSSadaf Ebrahimi 
1220*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_PCRE2GREP_CALLOUT
1221*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_PCRE2GREP_CALLOUT_FORK
1222*22dc650dSSadaf Ebrahimi printf("All callout scripts in patterns are supported." STDOUT_NL);
1223*22dc650dSSadaf Ebrahimi #else
1224*22dc650dSSadaf Ebrahimi printf("Non-fork callout scripts in patterns are supported." STDOUT_NL);
1225*22dc650dSSadaf Ebrahimi #endif
1226*22dc650dSSadaf Ebrahimi #else
1227*22dc650dSSadaf Ebrahimi printf("Callout scripts are not supported in this pcre2grep." STDOUT_NL);
1228*22dc650dSSadaf Ebrahimi #endif
1229*22dc650dSSadaf Ebrahimi 
1230*22dc650dSSadaf Ebrahimi printf("\"-\" can be used as a file name to mean STDIN." STDOUT_NL);
1231*22dc650dSSadaf Ebrahimi 
1232*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_LIBZ
1233*22dc650dSSadaf Ebrahimi printf("Files whose names end in .gz are read using zlib." STDOUT_NL);
1234*22dc650dSSadaf Ebrahimi #endif
1235*22dc650dSSadaf Ebrahimi 
1236*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_LIBBZ2
1237*22dc650dSSadaf Ebrahimi printf("Files whose names end in .bz2 are read using bzlib2." STDOUT_NL);
1238*22dc650dSSadaf Ebrahimi #endif
1239*22dc650dSSadaf Ebrahimi 
1240*22dc650dSSadaf Ebrahimi #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1241*22dc650dSSadaf Ebrahimi printf("Other files and the standard input are read as plain files." STDOUT_NL STDOUT_NL);
1242*22dc650dSSadaf Ebrahimi #else
1243*22dc650dSSadaf Ebrahimi printf("All files are read as plain files, without any interpretation." STDOUT_NL STDOUT_NL);
1244*22dc650dSSadaf Ebrahimi #endif
1245*22dc650dSSadaf Ebrahimi 
1246*22dc650dSSadaf Ebrahimi printf("Example: pcre2grep -i " QUOT "hello.*world" QUOT " menu.h main.c" STDOUT_NL STDOUT_NL);
1247*22dc650dSSadaf Ebrahimi printf("Options:" STDOUT_NL);
1248*22dc650dSSadaf Ebrahimi 
1249*22dc650dSSadaf Ebrahimi for (op = optionlist; op->one_char != 0; op++)
1250*22dc650dSSadaf Ebrahimi   {
1251*22dc650dSSadaf Ebrahimi   int n;
1252*22dc650dSSadaf Ebrahimi   char s[4];
1253*22dc650dSSadaf Ebrahimi 
1254*22dc650dSSadaf Ebrahimi   if (op->one_char > 0 && (op->long_name)[0] == 0)
1255*22dc650dSSadaf Ebrahimi     n = 31 - printf("  -%c", op->one_char);
1256*22dc650dSSadaf Ebrahimi   else
1257*22dc650dSSadaf Ebrahimi     {
1258*22dc650dSSadaf Ebrahimi     if (op->one_char > 0) sprintf(s, "-%c,", op->one_char);
1259*22dc650dSSadaf Ebrahimi       else strcpy(s, "   ");
1260*22dc650dSSadaf Ebrahimi     n = 31 - printf("  %s --%s", s, op->long_name);
1261*22dc650dSSadaf Ebrahimi     }
1262*22dc650dSSadaf Ebrahimi 
1263*22dc650dSSadaf Ebrahimi   if (n < 1) n = 1;
1264*22dc650dSSadaf Ebrahimi   printf("%.*s%s" STDOUT_NL, n, "                           ", op->help_text);
1265*22dc650dSSadaf Ebrahimi   }
1266*22dc650dSSadaf Ebrahimi 
1267*22dc650dSSadaf Ebrahimi printf(STDOUT_NL "Numbers may be followed by K or M, e.g. --max-buffer-size=100K." STDOUT_NL);
1268*22dc650dSSadaf Ebrahimi printf("The default value for --buffer-size is %d." STDOUT_NL, PCRE2GREP_BUFSIZE);
1269*22dc650dSSadaf Ebrahimi printf("The default value for --max-buffer-size is %d." STDOUT_NL, PCRE2GREP_MAX_BUFSIZE);
1270*22dc650dSSadaf Ebrahimi printf("When reading patterns or file names from a file, trailing white" STDOUT_NL);
1271*22dc650dSSadaf Ebrahimi printf("space is removed and blank lines are ignored." STDOUT_NL);
1272*22dc650dSSadaf Ebrahimi printf("The maximum size of any pattern is %d bytes." STDOUT_NL, MAXPATLEN);
1273*22dc650dSSadaf Ebrahimi 
1274*22dc650dSSadaf Ebrahimi printf(STDOUT_NL "With no FILEs, read standard input. If fewer than two FILEs given, assume -h." STDOUT_NL);
1275*22dc650dSSadaf Ebrahimi printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble." STDOUT_NL);
1276*22dc650dSSadaf Ebrahimi }
1277*22dc650dSSadaf Ebrahimi 
1278*22dc650dSSadaf Ebrahimi 
1279*22dc650dSSadaf Ebrahimi 
1280*22dc650dSSadaf Ebrahimi /*************************************************
1281*22dc650dSSadaf Ebrahimi *            Test exclude/includes               *
1282*22dc650dSSadaf Ebrahimi *************************************************/
1283*22dc650dSSadaf Ebrahimi 
1284*22dc650dSSadaf Ebrahimi /* If any exclude pattern matches, the path is excluded. Otherwise, unless
1285*22dc650dSSadaf Ebrahimi there are no includes, the path must match an include pattern.
1286*22dc650dSSadaf Ebrahimi 
1287*22dc650dSSadaf Ebrahimi Arguments:
1288*22dc650dSSadaf Ebrahimi   path      the path to be matched
1289*22dc650dSSadaf Ebrahimi   ip        the chain of include patterns
1290*22dc650dSSadaf Ebrahimi   ep        the chain of exclude patterns
1291*22dc650dSSadaf Ebrahimi 
1292*22dc650dSSadaf Ebrahimi Returns:    TRUE if the path is not excluded
1293*22dc650dSSadaf Ebrahimi */
1294*22dc650dSSadaf Ebrahimi 
1295*22dc650dSSadaf Ebrahimi static BOOL
test_incexc(char * path,patstr * ip,patstr * ep)1296*22dc650dSSadaf Ebrahimi test_incexc(char *path, patstr *ip, patstr *ep)
1297*22dc650dSSadaf Ebrahimi {
1298*22dc650dSSadaf Ebrahimi int plen = strlen((const char *)path);
1299*22dc650dSSadaf Ebrahimi 
1300*22dc650dSSadaf Ebrahimi for (; ep != NULL; ep = ep->next)
1301*22dc650dSSadaf Ebrahimi   {
1302*22dc650dSSadaf Ebrahimi   if (pcre2_match(ep->compiled, (PCRE2_SPTR)path, plen, 0, 0, match_data, NULL) >= 0)
1303*22dc650dSSadaf Ebrahimi     return FALSE;
1304*22dc650dSSadaf Ebrahimi   }
1305*22dc650dSSadaf Ebrahimi 
1306*22dc650dSSadaf Ebrahimi if (ip == NULL) return TRUE;
1307*22dc650dSSadaf Ebrahimi 
1308*22dc650dSSadaf Ebrahimi for (; ip != NULL; ip = ip->next)
1309*22dc650dSSadaf Ebrahimi   {
1310*22dc650dSSadaf Ebrahimi   if (pcre2_match(ip->compiled, (PCRE2_SPTR)path, plen, 0, 0, match_data, NULL) >= 0)
1311*22dc650dSSadaf Ebrahimi     return TRUE;
1312*22dc650dSSadaf Ebrahimi   }
1313*22dc650dSSadaf Ebrahimi 
1314*22dc650dSSadaf Ebrahimi return FALSE;
1315*22dc650dSSadaf Ebrahimi }
1316*22dc650dSSadaf Ebrahimi 
1317*22dc650dSSadaf Ebrahimi 
1318*22dc650dSSadaf Ebrahimi 
1319*22dc650dSSadaf Ebrahimi /*************************************************
1320*22dc650dSSadaf Ebrahimi *         Decode integer argument value          *
1321*22dc650dSSadaf Ebrahimi *************************************************/
1322*22dc650dSSadaf Ebrahimi 
1323*22dc650dSSadaf Ebrahimi /* Integer arguments can be followed by K or M. Avoid the use of strtoul()
1324*22dc650dSSadaf Ebrahimi because SunOS4 doesn't have it. This is used only for unpicking arguments, so
1325*22dc650dSSadaf Ebrahimi just keep it simple.
1326*22dc650dSSadaf Ebrahimi 
1327*22dc650dSSadaf Ebrahimi Arguments:
1328*22dc650dSSadaf Ebrahimi   option_data   the option data string
1329*22dc650dSSadaf Ebrahimi   op            the option item (for error messages)
1330*22dc650dSSadaf Ebrahimi   longop        TRUE if option given in long form
1331*22dc650dSSadaf Ebrahimi 
1332*22dc650dSSadaf Ebrahimi Returns:        a long integer
1333*22dc650dSSadaf Ebrahimi */
1334*22dc650dSSadaf Ebrahimi 
1335*22dc650dSSadaf Ebrahimi static long int
decode_number(char * option_data,option_item * op,BOOL longop)1336*22dc650dSSadaf Ebrahimi decode_number(char *option_data, option_item *op, BOOL longop)
1337*22dc650dSSadaf Ebrahimi {
1338*22dc650dSSadaf Ebrahimi unsigned long int n = 0;
1339*22dc650dSSadaf Ebrahimi char *endptr = option_data;
1340*22dc650dSSadaf Ebrahimi while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
1341*22dc650dSSadaf Ebrahimi while (isdigit((unsigned char)(*endptr)))
1342*22dc650dSSadaf Ebrahimi   n = n * 10 + (int)(*endptr++ - '0');
1343*22dc650dSSadaf Ebrahimi if (toupper(*endptr) == 'K')
1344*22dc650dSSadaf Ebrahimi   {
1345*22dc650dSSadaf Ebrahimi   n *= 1024;
1346*22dc650dSSadaf Ebrahimi   endptr++;
1347*22dc650dSSadaf Ebrahimi   }
1348*22dc650dSSadaf Ebrahimi else if (toupper(*endptr) == 'M')
1349*22dc650dSSadaf Ebrahimi   {
1350*22dc650dSSadaf Ebrahimi   n *= 1024*1024;
1351*22dc650dSSadaf Ebrahimi   endptr++;
1352*22dc650dSSadaf Ebrahimi   }
1353*22dc650dSSadaf Ebrahimi 
1354*22dc650dSSadaf Ebrahimi if (*endptr != 0)   /* Error */
1355*22dc650dSSadaf Ebrahimi   {
1356*22dc650dSSadaf Ebrahimi   if (longop)
1357*22dc650dSSadaf Ebrahimi     {
1358*22dc650dSSadaf Ebrahimi     char *equals = strchr(op->long_name, '=');
1359*22dc650dSSadaf Ebrahimi     int nlen = (equals == NULL)? (int)strlen(op->long_name) :
1360*22dc650dSSadaf Ebrahimi       (int)(equals - op->long_name);
1361*22dc650dSSadaf Ebrahimi     fprintf(stderr, "pcre2grep: Malformed number \"%s\" after --%.*s\n",
1362*22dc650dSSadaf Ebrahimi       option_data, nlen, op->long_name);
1363*22dc650dSSadaf Ebrahimi     }
1364*22dc650dSSadaf Ebrahimi   else
1365*22dc650dSSadaf Ebrahimi     fprintf(stderr, "pcre2grep: Malformed number \"%s\" after -%c\n",
1366*22dc650dSSadaf Ebrahimi       option_data, op->one_char);
1367*22dc650dSSadaf Ebrahimi   pcre2grep_exit(usage(2));
1368*22dc650dSSadaf Ebrahimi   }
1369*22dc650dSSadaf Ebrahimi 
1370*22dc650dSSadaf Ebrahimi return n;
1371*22dc650dSSadaf Ebrahimi }
1372*22dc650dSSadaf Ebrahimi 
1373*22dc650dSSadaf Ebrahimi 
1374*22dc650dSSadaf Ebrahimi 
1375*22dc650dSSadaf Ebrahimi /*************************************************
1376*22dc650dSSadaf Ebrahimi *       Add item to a chain of numbers           *
1377*22dc650dSSadaf Ebrahimi *************************************************/
1378*22dc650dSSadaf Ebrahimi 
1379*22dc650dSSadaf Ebrahimi /* Used to add an item onto a chain, or just return an unconnected item if the
1380*22dc650dSSadaf Ebrahimi "after" argument is NULL.
1381*22dc650dSSadaf Ebrahimi 
1382*22dc650dSSadaf Ebrahimi Arguments:
1383*22dc650dSSadaf Ebrahimi   n          the number to add
1384*22dc650dSSadaf Ebrahimi   after      if not NULL points to item to insert after
1385*22dc650dSSadaf Ebrahimi 
1386*22dc650dSSadaf Ebrahimi Returns:     new number block
1387*22dc650dSSadaf Ebrahimi */
1388*22dc650dSSadaf Ebrahimi 
1389*22dc650dSSadaf Ebrahimi static omstr *
add_number(int n,omstr * after)1390*22dc650dSSadaf Ebrahimi add_number(int n, omstr *after)
1391*22dc650dSSadaf Ebrahimi {
1392*22dc650dSSadaf Ebrahimi omstr *om = (omstr *)malloc(sizeof(omstr));
1393*22dc650dSSadaf Ebrahimi 
1394*22dc650dSSadaf Ebrahimi /* LCOV_EXCL_START - These lines won't be hit in normal testing. */
1395*22dc650dSSadaf Ebrahimi 
1396*22dc650dSSadaf Ebrahimi if (om == NULL)
1397*22dc650dSSadaf Ebrahimi   {
1398*22dc650dSSadaf Ebrahimi   fprintf(stderr, "pcre2grep: malloc failed\n");
1399*22dc650dSSadaf Ebrahimi   pcre2grep_exit(2);
1400*22dc650dSSadaf Ebrahimi   }
1401*22dc650dSSadaf Ebrahimi 
1402*22dc650dSSadaf Ebrahimi /* LCOV_EXCL_STOP */
1403*22dc650dSSadaf Ebrahimi 
1404*22dc650dSSadaf Ebrahimi om->next = NULL;
1405*22dc650dSSadaf Ebrahimi om->groupnum = n;
1406*22dc650dSSadaf Ebrahimi 
1407*22dc650dSSadaf Ebrahimi if (after != NULL)
1408*22dc650dSSadaf Ebrahimi   {
1409*22dc650dSSadaf Ebrahimi   om->next = after->next;
1410*22dc650dSSadaf Ebrahimi   after->next = om;
1411*22dc650dSSadaf Ebrahimi   }
1412*22dc650dSSadaf Ebrahimi return om;
1413*22dc650dSSadaf Ebrahimi }
1414*22dc650dSSadaf Ebrahimi 
1415*22dc650dSSadaf Ebrahimi 
1416*22dc650dSSadaf Ebrahimi 
1417*22dc650dSSadaf Ebrahimi /*************************************************
1418*22dc650dSSadaf Ebrahimi *            Read one line of input              *
1419*22dc650dSSadaf Ebrahimi *************************************************/
1420*22dc650dSSadaf Ebrahimi 
1421*22dc650dSSadaf Ebrahimi /* Normally, input that is to be scanned is read using fread() (or gzread, or
1422*22dc650dSSadaf Ebrahimi BZ2_read) into a large buffer, so many lines may be read at once. However,
1423*22dc650dSSadaf Ebrahimi doing this for tty input means that no output appears until a lot of input has
1424*22dc650dSSadaf Ebrahimi been typed. Instead, tty input is handled line by line. We cannot use fgets()
1425*22dc650dSSadaf Ebrahimi for this, because it does not stop at a binary zero, and therefore there is no
1426*22dc650dSSadaf Ebrahimi way of telling how many characters it has read, because there may be binary
1427*22dc650dSSadaf Ebrahimi zeros embedded in the data. This function is also used for reading patterns
1428*22dc650dSSadaf Ebrahimi from files (the -f option).
1429*22dc650dSSadaf Ebrahimi 
1430*22dc650dSSadaf Ebrahimi Arguments:
1431*22dc650dSSadaf Ebrahimi   buffer     the buffer to read into
1432*22dc650dSSadaf Ebrahimi   length     the maximum number of characters to read
1433*22dc650dSSadaf Ebrahimi   f          the file
1434*22dc650dSSadaf Ebrahimi 
1435*22dc650dSSadaf Ebrahimi Returns:     the number of characters read, zero at end of file
1436*22dc650dSSadaf Ebrahimi */
1437*22dc650dSSadaf Ebrahimi 
1438*22dc650dSSadaf Ebrahimi static PCRE2_SIZE
read_one_line(char * buffer,PCRE2_SIZE length,FILE * f)1439*22dc650dSSadaf Ebrahimi read_one_line(char *buffer, PCRE2_SIZE length, FILE *f)
1440*22dc650dSSadaf Ebrahimi {
1441*22dc650dSSadaf Ebrahimi int c;
1442*22dc650dSSadaf Ebrahimi PCRE2_SIZE yield = 0;
1443*22dc650dSSadaf Ebrahimi while ((c = fgetc(f)) != EOF)
1444*22dc650dSSadaf Ebrahimi   {
1445*22dc650dSSadaf Ebrahimi   buffer[yield++] = c;
1446*22dc650dSSadaf Ebrahimi   if (c == '\n' || yield >= length) break;
1447*22dc650dSSadaf Ebrahimi   }
1448*22dc650dSSadaf Ebrahimi return yield;
1449*22dc650dSSadaf Ebrahimi }
1450*22dc650dSSadaf Ebrahimi 
1451*22dc650dSSadaf Ebrahimi 
1452*22dc650dSSadaf Ebrahimi 
1453*22dc650dSSadaf Ebrahimi /*************************************************
1454*22dc650dSSadaf Ebrahimi *             Find end of line                   *
1455*22dc650dSSadaf Ebrahimi *************************************************/
1456*22dc650dSSadaf Ebrahimi 
1457*22dc650dSSadaf Ebrahimi /* The length of the endline sequence that is found is set via lenptr. This may
1458*22dc650dSSadaf Ebrahimi be zero at the very end of the file if there is no line-ending sequence there.
1459*22dc650dSSadaf Ebrahimi 
1460*22dc650dSSadaf Ebrahimi Arguments:
1461*22dc650dSSadaf Ebrahimi   p         current position in line
1462*22dc650dSSadaf Ebrahimi   endptr    end of available data
1463*22dc650dSSadaf Ebrahimi   lenptr    where to put the length of the eol sequence
1464*22dc650dSSadaf Ebrahimi 
1465*22dc650dSSadaf Ebrahimi Returns:    pointer after the last byte of the line,
1466*22dc650dSSadaf Ebrahimi             including the newline byte(s)
1467*22dc650dSSadaf Ebrahimi */
1468*22dc650dSSadaf Ebrahimi 
1469*22dc650dSSadaf Ebrahimi static char *
end_of_line(char * p,char * endptr,int * lenptr)1470*22dc650dSSadaf Ebrahimi end_of_line(char *p, char *endptr, int *lenptr)
1471*22dc650dSSadaf Ebrahimi {
1472*22dc650dSSadaf Ebrahimi switch(endlinetype)
1473*22dc650dSSadaf Ebrahimi   {
1474*22dc650dSSadaf Ebrahimi   default:      /* Just in case */
1475*22dc650dSSadaf Ebrahimi   case PCRE2_NEWLINE_LF:
1476*22dc650dSSadaf Ebrahimi   while (p < endptr && *p != '\n') p++;
1477*22dc650dSSadaf Ebrahimi   if (p < endptr)
1478*22dc650dSSadaf Ebrahimi     {
1479*22dc650dSSadaf Ebrahimi     *lenptr = 1;
1480*22dc650dSSadaf Ebrahimi     return p + 1;
1481*22dc650dSSadaf Ebrahimi     }
1482*22dc650dSSadaf Ebrahimi   *lenptr = 0;
1483*22dc650dSSadaf Ebrahimi   return endptr;
1484*22dc650dSSadaf Ebrahimi 
1485*22dc650dSSadaf Ebrahimi   case PCRE2_NEWLINE_CR:
1486*22dc650dSSadaf Ebrahimi   while (p < endptr && *p != '\r') p++;
1487*22dc650dSSadaf Ebrahimi   if (p < endptr)
1488*22dc650dSSadaf Ebrahimi     {
1489*22dc650dSSadaf Ebrahimi     *lenptr = 1;
1490*22dc650dSSadaf Ebrahimi     return p + 1;
1491*22dc650dSSadaf Ebrahimi     }
1492*22dc650dSSadaf Ebrahimi   *lenptr = 0;
1493*22dc650dSSadaf Ebrahimi   return endptr;
1494*22dc650dSSadaf Ebrahimi 
1495*22dc650dSSadaf Ebrahimi   case PCRE2_NEWLINE_NUL:
1496*22dc650dSSadaf Ebrahimi   while (p < endptr && *p != '\0') p++;
1497*22dc650dSSadaf Ebrahimi   if (p < endptr)
1498*22dc650dSSadaf Ebrahimi     {
1499*22dc650dSSadaf Ebrahimi     *lenptr = 1;
1500*22dc650dSSadaf Ebrahimi     return p + 1;
1501*22dc650dSSadaf Ebrahimi     }
1502*22dc650dSSadaf Ebrahimi   *lenptr = 0;
1503*22dc650dSSadaf Ebrahimi   return endptr;
1504*22dc650dSSadaf Ebrahimi 
1505*22dc650dSSadaf Ebrahimi   case PCRE2_NEWLINE_CRLF:
1506*22dc650dSSadaf Ebrahimi   for (;;)
1507*22dc650dSSadaf Ebrahimi     {
1508*22dc650dSSadaf Ebrahimi     while (p < endptr && *p != '\r') p++;
1509*22dc650dSSadaf Ebrahimi     if (++p >= endptr)
1510*22dc650dSSadaf Ebrahimi       {
1511*22dc650dSSadaf Ebrahimi       *lenptr = 0;
1512*22dc650dSSadaf Ebrahimi       return endptr;
1513*22dc650dSSadaf Ebrahimi       }
1514*22dc650dSSadaf Ebrahimi     if (*p == '\n')
1515*22dc650dSSadaf Ebrahimi       {
1516*22dc650dSSadaf Ebrahimi       *lenptr = 2;
1517*22dc650dSSadaf Ebrahimi       return p + 1;
1518*22dc650dSSadaf Ebrahimi       }
1519*22dc650dSSadaf Ebrahimi     }
1520*22dc650dSSadaf Ebrahimi   break;
1521*22dc650dSSadaf Ebrahimi 
1522*22dc650dSSadaf Ebrahimi   case PCRE2_NEWLINE_ANYCRLF:
1523*22dc650dSSadaf Ebrahimi   while (p < endptr)
1524*22dc650dSSadaf Ebrahimi     {
1525*22dc650dSSadaf Ebrahimi     int extra = 0;
1526*22dc650dSSadaf Ebrahimi     int c = *((unsigned char *)p);
1527*22dc650dSSadaf Ebrahimi 
1528*22dc650dSSadaf Ebrahimi     if (utf && c >= 0xc0)
1529*22dc650dSSadaf Ebrahimi       {
1530*22dc650dSSadaf Ebrahimi       int gcii, gcss;
1531*22dc650dSSadaf Ebrahimi       extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
1532*22dc650dSSadaf Ebrahimi       gcss = 6*extra;
1533*22dc650dSSadaf Ebrahimi       c = (c & utf8_table3[extra]) << gcss;
1534*22dc650dSSadaf Ebrahimi       for (gcii = 1; gcii <= extra; gcii++)
1535*22dc650dSSadaf Ebrahimi         {
1536*22dc650dSSadaf Ebrahimi         gcss -= 6;
1537*22dc650dSSadaf Ebrahimi         c |= (p[gcii] & 0x3f) << gcss;
1538*22dc650dSSadaf Ebrahimi         }
1539*22dc650dSSadaf Ebrahimi       }
1540*22dc650dSSadaf Ebrahimi 
1541*22dc650dSSadaf Ebrahimi     p += 1 + extra;
1542*22dc650dSSadaf Ebrahimi 
1543*22dc650dSSadaf Ebrahimi     switch (c)
1544*22dc650dSSadaf Ebrahimi       {
1545*22dc650dSSadaf Ebrahimi       case '\n':
1546*22dc650dSSadaf Ebrahimi       *lenptr = 1;
1547*22dc650dSSadaf Ebrahimi       return p;
1548*22dc650dSSadaf Ebrahimi 
1549*22dc650dSSadaf Ebrahimi       case '\r':
1550*22dc650dSSadaf Ebrahimi       if (p < endptr && *p == '\n')
1551*22dc650dSSadaf Ebrahimi         {
1552*22dc650dSSadaf Ebrahimi         *lenptr = 2;
1553*22dc650dSSadaf Ebrahimi         p++;
1554*22dc650dSSadaf Ebrahimi         }
1555*22dc650dSSadaf Ebrahimi       else *lenptr = 1;
1556*22dc650dSSadaf Ebrahimi       return p;
1557*22dc650dSSadaf Ebrahimi 
1558*22dc650dSSadaf Ebrahimi       default:
1559*22dc650dSSadaf Ebrahimi       break;
1560*22dc650dSSadaf Ebrahimi       }
1561*22dc650dSSadaf Ebrahimi     }   /* End of loop for ANYCRLF case */
1562*22dc650dSSadaf Ebrahimi 
1563*22dc650dSSadaf Ebrahimi   *lenptr = 0;  /* Must have hit the end */
1564*22dc650dSSadaf Ebrahimi   return endptr;
1565*22dc650dSSadaf Ebrahimi 
1566*22dc650dSSadaf Ebrahimi   case PCRE2_NEWLINE_ANY:
1567*22dc650dSSadaf Ebrahimi   while (p < endptr)
1568*22dc650dSSadaf Ebrahimi     {
1569*22dc650dSSadaf Ebrahimi     int extra = 0;
1570*22dc650dSSadaf Ebrahimi     int c = *((unsigned char *)p);
1571*22dc650dSSadaf Ebrahimi 
1572*22dc650dSSadaf Ebrahimi     if (utf && c >= 0xc0)
1573*22dc650dSSadaf Ebrahimi       {
1574*22dc650dSSadaf Ebrahimi       int gcii, gcss;
1575*22dc650dSSadaf Ebrahimi       extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
1576*22dc650dSSadaf Ebrahimi       gcss = 6*extra;
1577*22dc650dSSadaf Ebrahimi       c = (c & utf8_table3[extra]) << gcss;
1578*22dc650dSSadaf Ebrahimi       for (gcii = 1; gcii <= extra; gcii++)
1579*22dc650dSSadaf Ebrahimi         {
1580*22dc650dSSadaf Ebrahimi         gcss -= 6;
1581*22dc650dSSadaf Ebrahimi         c |= (p[gcii] & 0x3f) << gcss;
1582*22dc650dSSadaf Ebrahimi         }
1583*22dc650dSSadaf Ebrahimi       }
1584*22dc650dSSadaf Ebrahimi 
1585*22dc650dSSadaf Ebrahimi     p += 1 + extra;
1586*22dc650dSSadaf Ebrahimi 
1587*22dc650dSSadaf Ebrahimi     switch (c)
1588*22dc650dSSadaf Ebrahimi       {
1589*22dc650dSSadaf Ebrahimi       case '\n':    /* LF */
1590*22dc650dSSadaf Ebrahimi       case '\v':    /* VT */
1591*22dc650dSSadaf Ebrahimi       case '\f':    /* FF */
1592*22dc650dSSadaf Ebrahimi       *lenptr = 1;
1593*22dc650dSSadaf Ebrahimi       return p;
1594*22dc650dSSadaf Ebrahimi 
1595*22dc650dSSadaf Ebrahimi       case '\r':    /* CR */
1596*22dc650dSSadaf Ebrahimi       if (p < endptr && *p == '\n')
1597*22dc650dSSadaf Ebrahimi         {
1598*22dc650dSSadaf Ebrahimi         *lenptr = 2;
1599*22dc650dSSadaf Ebrahimi         p++;
1600*22dc650dSSadaf Ebrahimi         }
1601*22dc650dSSadaf Ebrahimi       else *lenptr = 1;
1602*22dc650dSSadaf Ebrahimi       return p;
1603*22dc650dSSadaf Ebrahimi 
1604*22dc650dSSadaf Ebrahimi #ifndef EBCDIC
1605*22dc650dSSadaf Ebrahimi       case 0x85:    /* Unicode NEL */
1606*22dc650dSSadaf Ebrahimi       *lenptr = utf? 2 : 1;
1607*22dc650dSSadaf Ebrahimi       return p;
1608*22dc650dSSadaf Ebrahimi 
1609*22dc650dSSadaf Ebrahimi       case 0x2028:  /* Unicode LS */
1610*22dc650dSSadaf Ebrahimi       case 0x2029:  /* Unicode PS */
1611*22dc650dSSadaf Ebrahimi       *lenptr = 3;
1612*22dc650dSSadaf Ebrahimi       return p;
1613*22dc650dSSadaf Ebrahimi #endif  /* Not EBCDIC */
1614*22dc650dSSadaf Ebrahimi 
1615*22dc650dSSadaf Ebrahimi       default:
1616*22dc650dSSadaf Ebrahimi       break;
1617*22dc650dSSadaf Ebrahimi       }
1618*22dc650dSSadaf Ebrahimi     }   /* End of loop for ANY case */
1619*22dc650dSSadaf Ebrahimi 
1620*22dc650dSSadaf Ebrahimi   *lenptr = 0;  /* Must have hit the end */
1621*22dc650dSSadaf Ebrahimi   return endptr;
1622*22dc650dSSadaf Ebrahimi   }     /* End of overall switch */
1623*22dc650dSSadaf Ebrahimi }
1624*22dc650dSSadaf Ebrahimi 
1625*22dc650dSSadaf Ebrahimi 
1626*22dc650dSSadaf Ebrahimi 
1627*22dc650dSSadaf Ebrahimi /*************************************************
1628*22dc650dSSadaf Ebrahimi *         Find start of previous line            *
1629*22dc650dSSadaf Ebrahimi *************************************************/
1630*22dc650dSSadaf Ebrahimi 
1631*22dc650dSSadaf Ebrahimi /* This is called when looking back for before lines to print.
1632*22dc650dSSadaf Ebrahimi 
1633*22dc650dSSadaf Ebrahimi Arguments:
1634*22dc650dSSadaf Ebrahimi   p         start of the subsequent line
1635*22dc650dSSadaf Ebrahimi   startptr  start of available data
1636*22dc650dSSadaf Ebrahimi 
1637*22dc650dSSadaf Ebrahimi Returns:    pointer to the start of the previous line
1638*22dc650dSSadaf Ebrahimi */
1639*22dc650dSSadaf Ebrahimi 
1640*22dc650dSSadaf Ebrahimi static char *
previous_line(char * p,char * startptr)1641*22dc650dSSadaf Ebrahimi previous_line(char *p, char *startptr)
1642*22dc650dSSadaf Ebrahimi {
1643*22dc650dSSadaf Ebrahimi switch(endlinetype)
1644*22dc650dSSadaf Ebrahimi   {
1645*22dc650dSSadaf Ebrahimi   default:      /* Just in case */
1646*22dc650dSSadaf Ebrahimi   case PCRE2_NEWLINE_LF:
1647*22dc650dSSadaf Ebrahimi   p--;
1648*22dc650dSSadaf Ebrahimi   while (p > startptr && p[-1] != '\n') p--;
1649*22dc650dSSadaf Ebrahimi   return p;
1650*22dc650dSSadaf Ebrahimi 
1651*22dc650dSSadaf Ebrahimi   case PCRE2_NEWLINE_CR:
1652*22dc650dSSadaf Ebrahimi   p--;
1653*22dc650dSSadaf Ebrahimi   while (p > startptr && p[-1] != '\n') p--;
1654*22dc650dSSadaf Ebrahimi   return p;
1655*22dc650dSSadaf Ebrahimi 
1656*22dc650dSSadaf Ebrahimi   case PCRE2_NEWLINE_NUL:
1657*22dc650dSSadaf Ebrahimi   p--;
1658*22dc650dSSadaf Ebrahimi   while (p > startptr && p[-1] != '\0') p--;
1659*22dc650dSSadaf Ebrahimi   return p;
1660*22dc650dSSadaf Ebrahimi 
1661*22dc650dSSadaf Ebrahimi   case PCRE2_NEWLINE_CRLF:
1662*22dc650dSSadaf Ebrahimi   for (;;)
1663*22dc650dSSadaf Ebrahimi     {
1664*22dc650dSSadaf Ebrahimi     p -= 2;
1665*22dc650dSSadaf Ebrahimi     while (p > startptr && p[-1] != '\n') p--;
1666*22dc650dSSadaf Ebrahimi     if (p <= startptr + 1 || p[-2] == '\r') return p;
1667*22dc650dSSadaf Ebrahimi     }
1668*22dc650dSSadaf Ebrahimi   /* Control can never get here */
1669*22dc650dSSadaf Ebrahimi 
1670*22dc650dSSadaf Ebrahimi   case PCRE2_NEWLINE_ANY:
1671*22dc650dSSadaf Ebrahimi   case PCRE2_NEWLINE_ANYCRLF:
1672*22dc650dSSadaf Ebrahimi   if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
1673*22dc650dSSadaf Ebrahimi   if (utf) while ((*p & 0xc0) == 0x80) p--;
1674*22dc650dSSadaf Ebrahimi 
1675*22dc650dSSadaf Ebrahimi   while (p > startptr)
1676*22dc650dSSadaf Ebrahimi     {
1677*22dc650dSSadaf Ebrahimi     unsigned int c;
1678*22dc650dSSadaf Ebrahimi     char *pp = p - 1;
1679*22dc650dSSadaf Ebrahimi 
1680*22dc650dSSadaf Ebrahimi     if (utf)
1681*22dc650dSSadaf Ebrahimi       {
1682*22dc650dSSadaf Ebrahimi       int extra = 0;
1683*22dc650dSSadaf Ebrahimi       while ((*pp & 0xc0) == 0x80) pp--;
1684*22dc650dSSadaf Ebrahimi       c = *((unsigned char *)pp);
1685*22dc650dSSadaf Ebrahimi       if (c >= 0xc0)
1686*22dc650dSSadaf Ebrahimi         {
1687*22dc650dSSadaf Ebrahimi         int gcii, gcss;
1688*22dc650dSSadaf Ebrahimi         extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
1689*22dc650dSSadaf Ebrahimi         gcss = 6*extra;
1690*22dc650dSSadaf Ebrahimi         c = (c & utf8_table3[extra]) << gcss;
1691*22dc650dSSadaf Ebrahimi         for (gcii = 1; gcii <= extra; gcii++)
1692*22dc650dSSadaf Ebrahimi           {
1693*22dc650dSSadaf Ebrahimi           gcss -= 6;
1694*22dc650dSSadaf Ebrahimi           c |= (pp[gcii] & 0x3f) << gcss;
1695*22dc650dSSadaf Ebrahimi           }
1696*22dc650dSSadaf Ebrahimi         }
1697*22dc650dSSadaf Ebrahimi       }
1698*22dc650dSSadaf Ebrahimi     else c = *((unsigned char *)pp);
1699*22dc650dSSadaf Ebrahimi 
1700*22dc650dSSadaf Ebrahimi     if (endlinetype == PCRE2_NEWLINE_ANYCRLF) switch (c)
1701*22dc650dSSadaf Ebrahimi       {
1702*22dc650dSSadaf Ebrahimi       case '\n':    /* LF */
1703*22dc650dSSadaf Ebrahimi       case '\r':    /* CR */
1704*22dc650dSSadaf Ebrahimi       return p;
1705*22dc650dSSadaf Ebrahimi 
1706*22dc650dSSadaf Ebrahimi       default:
1707*22dc650dSSadaf Ebrahimi       break;
1708*22dc650dSSadaf Ebrahimi       }
1709*22dc650dSSadaf Ebrahimi 
1710*22dc650dSSadaf Ebrahimi     else switch (c)
1711*22dc650dSSadaf Ebrahimi       {
1712*22dc650dSSadaf Ebrahimi       case '\n':    /* LF */
1713*22dc650dSSadaf Ebrahimi       case '\v':    /* VT */
1714*22dc650dSSadaf Ebrahimi       case '\f':    /* FF */
1715*22dc650dSSadaf Ebrahimi       case '\r':    /* CR */
1716*22dc650dSSadaf Ebrahimi #ifndef EBCDIC
1717*22dc650dSSadaf Ebrahimi       case 0x85:    /* Unicode NEL */
1718*22dc650dSSadaf Ebrahimi       case 0x2028:  /* Unicode LS */
1719*22dc650dSSadaf Ebrahimi       case 0x2029:  /* Unicode PS */
1720*22dc650dSSadaf Ebrahimi #endif  /* Not EBCDIC */
1721*22dc650dSSadaf Ebrahimi       return p;
1722*22dc650dSSadaf Ebrahimi 
1723*22dc650dSSadaf Ebrahimi       default:
1724*22dc650dSSadaf Ebrahimi       break;
1725*22dc650dSSadaf Ebrahimi       }
1726*22dc650dSSadaf Ebrahimi 
1727*22dc650dSSadaf Ebrahimi     p = pp;  /* Back one character */
1728*22dc650dSSadaf Ebrahimi     }        /* End of loop for ANY case */
1729*22dc650dSSadaf Ebrahimi 
1730*22dc650dSSadaf Ebrahimi   return startptr;  /* Hit start of data */
1731*22dc650dSSadaf Ebrahimi   }     /* End of overall switch */
1732*22dc650dSSadaf Ebrahimi }
1733*22dc650dSSadaf Ebrahimi 
1734*22dc650dSSadaf Ebrahimi 
1735*22dc650dSSadaf Ebrahimi 
1736*22dc650dSSadaf Ebrahimi /*************************************************
1737*22dc650dSSadaf Ebrahimi *              Output newline at end             *
1738*22dc650dSSadaf Ebrahimi *************************************************/
1739*22dc650dSSadaf Ebrahimi 
1740*22dc650dSSadaf Ebrahimi /* This function is called if the final line of a file has been written to
1741*22dc650dSSadaf Ebrahimi stdout, but it does not have a terminating newline.
1742*22dc650dSSadaf Ebrahimi 
1743*22dc650dSSadaf Ebrahimi Arguments:  none
1744*22dc650dSSadaf Ebrahimi Returns:    nothing
1745*22dc650dSSadaf Ebrahimi */
1746*22dc650dSSadaf Ebrahimi 
1747*22dc650dSSadaf Ebrahimi static void
write_final_newline(void)1748*22dc650dSSadaf Ebrahimi write_final_newline(void)
1749*22dc650dSSadaf Ebrahimi {
1750*22dc650dSSadaf Ebrahimi switch(endlinetype)
1751*22dc650dSSadaf Ebrahimi   {
1752*22dc650dSSadaf Ebrahimi   default:      /* Just in case */
1753*22dc650dSSadaf Ebrahimi   case PCRE2_NEWLINE_LF:
1754*22dc650dSSadaf Ebrahimi   case PCRE2_NEWLINE_ANY:
1755*22dc650dSSadaf Ebrahimi   case PCRE2_NEWLINE_ANYCRLF:
1756*22dc650dSSadaf Ebrahimi   fprintf(stdout, "\n");
1757*22dc650dSSadaf Ebrahimi   break;
1758*22dc650dSSadaf Ebrahimi 
1759*22dc650dSSadaf Ebrahimi   case PCRE2_NEWLINE_CR:
1760*22dc650dSSadaf Ebrahimi   fprintf(stdout, "\r");
1761*22dc650dSSadaf Ebrahimi   break;
1762*22dc650dSSadaf Ebrahimi 
1763*22dc650dSSadaf Ebrahimi   case PCRE2_NEWLINE_CRLF:
1764*22dc650dSSadaf Ebrahimi   fprintf(stdout, "\r\n");
1765*22dc650dSSadaf Ebrahimi   break;
1766*22dc650dSSadaf Ebrahimi 
1767*22dc650dSSadaf Ebrahimi   case PCRE2_NEWLINE_NUL:
1768*22dc650dSSadaf Ebrahimi   fprintf(stdout, "%c", 0);
1769*22dc650dSSadaf Ebrahimi   break;
1770*22dc650dSSadaf Ebrahimi   }
1771*22dc650dSSadaf Ebrahimi }
1772*22dc650dSSadaf Ebrahimi 
1773*22dc650dSSadaf Ebrahimi 
1774*22dc650dSSadaf Ebrahimi /*************************************************
1775*22dc650dSSadaf Ebrahimi *       Print the previous "after" lines         *
1776*22dc650dSSadaf Ebrahimi *************************************************/
1777*22dc650dSSadaf Ebrahimi 
1778*22dc650dSSadaf Ebrahimi /* This is called if we are about to lose said lines because of buffer filling,
1779*22dc650dSSadaf Ebrahimi and at the end of the file. The data in the line is written using fwrite() so
1780*22dc650dSSadaf Ebrahimi that a binary zero does not terminate it.
1781*22dc650dSSadaf Ebrahimi 
1782*22dc650dSSadaf Ebrahimi Arguments:
1783*22dc650dSSadaf Ebrahimi   lastmatchnumber   the number of the last matching line, plus one
1784*22dc650dSSadaf Ebrahimi   lastmatchrestart  where we restarted after the last match
1785*22dc650dSSadaf Ebrahimi   endptr            end of available data
1786*22dc650dSSadaf Ebrahimi   printname         filename for printing
1787*22dc650dSSadaf Ebrahimi 
1788*22dc650dSSadaf Ebrahimi Returns:            nothing
1789*22dc650dSSadaf Ebrahimi */
1790*22dc650dSSadaf Ebrahimi 
1791*22dc650dSSadaf Ebrahimi static void
do_after_lines(unsigned long int lastmatchnumber,char * lastmatchrestart,char * endptr,const char * printname)1792*22dc650dSSadaf Ebrahimi do_after_lines(unsigned long int lastmatchnumber, char *lastmatchrestart,
1793*22dc650dSSadaf Ebrahimi   char *endptr, const char *printname)
1794*22dc650dSSadaf Ebrahimi {
1795*22dc650dSSadaf Ebrahimi if (after_context > 0 && lastmatchnumber > 0)
1796*22dc650dSSadaf Ebrahimi   {
1797*22dc650dSSadaf Ebrahimi   int count = 0;
1798*22dc650dSSadaf Ebrahimi   int ellength = 0;
1799*22dc650dSSadaf Ebrahimi   while (lastmatchrestart < endptr && count < after_context)
1800*22dc650dSSadaf Ebrahimi     {
1801*22dc650dSSadaf Ebrahimi     char *pp = end_of_line(lastmatchrestart, endptr, &ellength);
1802*22dc650dSSadaf Ebrahimi     if (ellength == 0 && pp == main_buffer + bufsize) break;
1803*22dc650dSSadaf Ebrahimi     if (printname != NULL) fprintf(stdout, "%s%c", printname, printname_hyphen);
1804*22dc650dSSadaf Ebrahimi     if (number) fprintf(stdout, "%lu-", lastmatchnumber++);
1805*22dc650dSSadaf Ebrahimi     FWRITE_IGNORE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1806*22dc650dSSadaf Ebrahimi     lastmatchrestart = pp;
1807*22dc650dSSadaf Ebrahimi     count++;
1808*22dc650dSSadaf Ebrahimi     }
1809*22dc650dSSadaf Ebrahimi 
1810*22dc650dSSadaf Ebrahimi   /* If we have printed any lines, arrange for a hyphen separator if anything
1811*22dc650dSSadaf Ebrahimi   else follows. Also, if the last line is the final line in the file and it had
1812*22dc650dSSadaf Ebrahimi   no newline, add one. */
1813*22dc650dSSadaf Ebrahimi 
1814*22dc650dSSadaf Ebrahimi   if (count > 0)
1815*22dc650dSSadaf Ebrahimi     {
1816*22dc650dSSadaf Ebrahimi     hyphenpending = TRUE;
1817*22dc650dSSadaf Ebrahimi     if (ellength == 0 && lastmatchrestart >= endptr)
1818*22dc650dSSadaf Ebrahimi       write_final_newline();
1819*22dc650dSSadaf Ebrahimi     }
1820*22dc650dSSadaf Ebrahimi   }
1821*22dc650dSSadaf Ebrahimi }
1822*22dc650dSSadaf Ebrahimi 
1823*22dc650dSSadaf Ebrahimi 
1824*22dc650dSSadaf Ebrahimi 
1825*22dc650dSSadaf Ebrahimi /*************************************************
1826*22dc650dSSadaf Ebrahimi *   Apply patterns to subject till one matches   *
1827*22dc650dSSadaf Ebrahimi *************************************************/
1828*22dc650dSSadaf Ebrahimi 
1829*22dc650dSSadaf Ebrahimi /* This function is called to run through all the patterns, looking for a
1830*22dc650dSSadaf Ebrahimi match. When all possible matches are required, for example, for colouring, it
1831*22dc650dSSadaf Ebrahimi checks all patterns for matching, and returns the earliest match. Otherwise, it
1832*22dc650dSSadaf Ebrahimi returns the first pattern that has matched.
1833*22dc650dSSadaf Ebrahimi 
1834*22dc650dSSadaf Ebrahimi Arguments:
1835*22dc650dSSadaf Ebrahimi   matchptr     the start of the subject
1836*22dc650dSSadaf Ebrahimi   length       the length of the subject to match
1837*22dc650dSSadaf Ebrahimi   options      options for pcre2_match
1838*22dc650dSSadaf Ebrahimi   startoffset  where to start matching
1839*22dc650dSSadaf Ebrahimi   mrc          address of where to put the result of pcre2_match()
1840*22dc650dSSadaf Ebrahimi 
1841*22dc650dSSadaf Ebrahimi Returns:       TRUE if there was a match, match_data and offsets are set
1842*22dc650dSSadaf Ebrahimi                FALSE if there was no match (but no errors)
1843*22dc650dSSadaf Ebrahimi                invert if there was a non-fatal error
1844*22dc650dSSadaf Ebrahimi */
1845*22dc650dSSadaf Ebrahimi 
1846*22dc650dSSadaf Ebrahimi static BOOL
match_patterns(char * matchptr,PCRE2_SIZE length,unsigned int options,PCRE2_SIZE startoffset,int * mrc)1847*22dc650dSSadaf Ebrahimi match_patterns(char *matchptr, PCRE2_SIZE length, unsigned int options,
1848*22dc650dSSadaf Ebrahimi   PCRE2_SIZE startoffset, int *mrc)
1849*22dc650dSSadaf Ebrahimi {
1850*22dc650dSSadaf Ebrahimi PCRE2_SIZE slen = length;
1851*22dc650dSSadaf Ebrahimi int first = -1;
1852*22dc650dSSadaf Ebrahimi int firstrc = 0;
1853*22dc650dSSadaf Ebrahimi patstr *p = patterns;
1854*22dc650dSSadaf Ebrahimi const char *msg = "this text:\n\n";
1855*22dc650dSSadaf Ebrahimi 
1856*22dc650dSSadaf Ebrahimi if (slen > 200)
1857*22dc650dSSadaf Ebrahimi   {
1858*22dc650dSSadaf Ebrahimi   slen = 200;
1859*22dc650dSSadaf Ebrahimi   msg = "text that starts:\n\n";
1860*22dc650dSSadaf Ebrahimi   }
1861*22dc650dSSadaf Ebrahimi 
1862*22dc650dSSadaf Ebrahimi for (int i = 1; p != NULL; p = p->next, i++)
1863*22dc650dSSadaf Ebrahimi   {
1864*22dc650dSSadaf Ebrahimi   int rc = pcre2_match(p->compiled, (PCRE2_SPTR)matchptr, length,
1865*22dc650dSSadaf Ebrahimi     startoffset, options, match_data, match_context);
1866*22dc650dSSadaf Ebrahimi   if (rc == PCRE2_ERROR_NOMATCH) continue;
1867*22dc650dSSadaf Ebrahimi 
1868*22dc650dSSadaf Ebrahimi   /* Handle a successful match. When all_matches is false, we are done.
1869*22dc650dSSadaf Ebrahimi   Otherwise we must save the earliest match. */
1870*22dc650dSSadaf Ebrahimi 
1871*22dc650dSSadaf Ebrahimi   if (rc >= 0)
1872*22dc650dSSadaf Ebrahimi     {
1873*22dc650dSSadaf Ebrahimi     if (!all_matches)
1874*22dc650dSSadaf Ebrahimi       {
1875*22dc650dSSadaf Ebrahimi       *mrc = rc;
1876*22dc650dSSadaf Ebrahimi       return TRUE;
1877*22dc650dSSadaf Ebrahimi       }
1878*22dc650dSSadaf Ebrahimi 
1879*22dc650dSSadaf Ebrahimi     if (first < 0 || offsets[0] < offsets_pair[first][0] ||
1880*22dc650dSSadaf Ebrahimi          (offsets[0] == offsets_pair[first][0] &&
1881*22dc650dSSadaf Ebrahimi           offsets[1] > offsets_pair[first][1]))
1882*22dc650dSSadaf Ebrahimi       {
1883*22dc650dSSadaf Ebrahimi       first = match_data_toggle;
1884*22dc650dSSadaf Ebrahimi       firstrc = rc;
1885*22dc650dSSadaf Ebrahimi       match_data_toggle ^= 1;
1886*22dc650dSSadaf Ebrahimi       match_data = match_data_pair[match_data_toggle];
1887*22dc650dSSadaf Ebrahimi       offsets = offsets_pair[match_data_toggle];
1888*22dc650dSSadaf Ebrahimi       }
1889*22dc650dSSadaf Ebrahimi     continue;
1890*22dc650dSSadaf Ebrahimi     }
1891*22dc650dSSadaf Ebrahimi 
1892*22dc650dSSadaf Ebrahimi   /* Deal with PCRE2 error. */
1893*22dc650dSSadaf Ebrahimi 
1894*22dc650dSSadaf Ebrahimi   fprintf(stderr, "pcre2grep: pcre2_match() gave error %d while matching ", rc);
1895*22dc650dSSadaf Ebrahimi   if (patterns->next != NULL) fprintf(stderr, "pattern number %d to ", i);
1896*22dc650dSSadaf Ebrahimi   fprintf(stderr, "%s", msg);
1897*22dc650dSSadaf Ebrahimi   FWRITE_IGNORE(matchptr, 1, slen, stderr);   /* In case binary zero included */
1898*22dc650dSSadaf Ebrahimi   fprintf(stderr, "\n\n");
1899*22dc650dSSadaf Ebrahimi   if (rc <= PCRE2_ERROR_UTF8_ERR1 &&
1900*22dc650dSSadaf Ebrahimi       rc >= PCRE2_ERROR_UTF8_ERR21)
1901*22dc650dSSadaf Ebrahimi     {
1902*22dc650dSSadaf Ebrahimi     unsigned char mbuffer[256];
1903*22dc650dSSadaf Ebrahimi     PCRE2_SIZE startchar = pcre2_get_startchar(match_data);
1904*22dc650dSSadaf Ebrahimi     (void)pcre2_get_error_message(rc, mbuffer, sizeof(mbuffer));
1905*22dc650dSSadaf Ebrahimi     fprintf(stderr, "%s at offset %" SIZ_FORM "\n\n", mbuffer, startchar);
1906*22dc650dSSadaf Ebrahimi     }
1907*22dc650dSSadaf Ebrahimi   if (rc == PCRE2_ERROR_MATCHLIMIT || rc == PCRE2_ERROR_DEPTHLIMIT ||
1908*22dc650dSSadaf Ebrahimi       rc == PCRE2_ERROR_HEAPLIMIT || rc == PCRE2_ERROR_JIT_STACKLIMIT)
1909*22dc650dSSadaf Ebrahimi     resource_error = TRUE;
1910*22dc650dSSadaf Ebrahimi   if (error_count++ > 20)
1911*22dc650dSSadaf Ebrahimi     {
1912*22dc650dSSadaf Ebrahimi     fprintf(stderr, "pcre2grep: Too many errors - abandoned.\n");
1913*22dc650dSSadaf Ebrahimi     pcre2grep_exit(2);
1914*22dc650dSSadaf Ebrahimi     }
1915*22dc650dSSadaf Ebrahimi   return invert;    /* No more matching; don't show the line again */
1916*22dc650dSSadaf Ebrahimi   }
1917*22dc650dSSadaf Ebrahimi 
1918*22dc650dSSadaf Ebrahimi /* We get here when all patterns have been tried. If all_matches is false,
1919*22dc650dSSadaf Ebrahimi this means that none of them matched. If all_matches is true, matched_first
1920*22dc650dSSadaf Ebrahimi will be non-NULL if there was at least one match, and it will point to the
1921*22dc650dSSadaf Ebrahimi appropriate match_data block. */
1922*22dc650dSSadaf Ebrahimi 
1923*22dc650dSSadaf Ebrahimi if (!all_matches || first < 0) return FALSE;
1924*22dc650dSSadaf Ebrahimi 
1925*22dc650dSSadaf Ebrahimi match_data_toggle = first;
1926*22dc650dSSadaf Ebrahimi match_data = match_data_pair[first];
1927*22dc650dSSadaf Ebrahimi offsets = offsets_pair[first];
1928*22dc650dSSadaf Ebrahimi *mrc = firstrc;
1929*22dc650dSSadaf Ebrahimi return TRUE;
1930*22dc650dSSadaf Ebrahimi }
1931*22dc650dSSadaf Ebrahimi 
1932*22dc650dSSadaf Ebrahimi 
1933*22dc650dSSadaf Ebrahimi 
1934*22dc650dSSadaf Ebrahimi /*************************************************
1935*22dc650dSSadaf Ebrahimi *          Decode dollar escape sequence         *
1936*22dc650dSSadaf Ebrahimi *************************************************/
1937*22dc650dSSadaf Ebrahimi 
1938*22dc650dSSadaf Ebrahimi /* Called from various places to decode $ escapes in output strings. The escape
1939*22dc650dSSadaf Ebrahimi sequences are as follows:
1940*22dc650dSSadaf Ebrahimi 
1941*22dc650dSSadaf Ebrahimi $<digits> or ${<digits>} returns a capture number. However, if callout is TRUE,
1942*22dc650dSSadaf Ebrahimi zero is never returned; '0' is substituted.
1943*22dc650dSSadaf Ebrahimi 
1944*22dc650dSSadaf Ebrahimi $a returns bell.
1945*22dc650dSSadaf Ebrahimi $b returns backspace.
1946*22dc650dSSadaf Ebrahimi $e returns escape.
1947*22dc650dSSadaf Ebrahimi $f returns form feed.
1948*22dc650dSSadaf Ebrahimi $n returns newline.
1949*22dc650dSSadaf Ebrahimi $r returns carriage return.
1950*22dc650dSSadaf Ebrahimi $t returns tab.
1951*22dc650dSSadaf Ebrahimi $v returns vertical tab.
1952*22dc650dSSadaf Ebrahimi $o<digits> returns the character represented by the given octal
1953*22dc650dSSadaf Ebrahimi   number; up to three digits are processed.
1954*22dc650dSSadaf Ebrahimi $o{<digits>} does the same, up to 7 digits, but gives an error for mode-invalid
1955*22dc650dSSadaf Ebrahimi   code points.
1956*22dc650dSSadaf Ebrahimi $x<digits> returns the character represented by the given hexadecimal
1957*22dc650dSSadaf Ebrahimi   number; up to two digits are processed.
1958*22dc650dSSadaf Ebrahimi $x{<digits} does the same, up to 6 digits, but gives an error for mode-invalid
1959*22dc650dSSadaf Ebrahimi   code points.
1960*22dc650dSSadaf Ebrahimi Any other character is substituted by itself. E.g: $$ is replaced by a single
1961*22dc650dSSadaf Ebrahimi dollar.
1962*22dc650dSSadaf Ebrahimi 
1963*22dc650dSSadaf Ebrahimi Arguments:
1964*22dc650dSSadaf Ebrahimi   begin      the start of the whole string
1965*22dc650dSSadaf Ebrahimi   string     points to the $
1966*22dc650dSSadaf Ebrahimi   callout    TRUE if in a callout (inhibits error messages)
1967*22dc650dSSadaf Ebrahimi   value      where to return a value
1968*22dc650dSSadaf Ebrahimi   last       where to return pointer to the last used character
1969*22dc650dSSadaf Ebrahimi 
1970*22dc650dSSadaf Ebrahimi Returns:     DDE_ERROR    after a syntax error
1971*22dc650dSSadaf Ebrahimi              DDE_CAPTURE  if *value is a capture number
1972*22dc650dSSadaf Ebrahimi              DDE_CHAR     if *value is a character code
1973*22dc650dSSadaf Ebrahimi */
1974*22dc650dSSadaf Ebrahimi 
1975*22dc650dSSadaf Ebrahimi static int
decode_dollar_escape(PCRE2_SPTR begin,PCRE2_SPTR string,BOOL callout,uint32_t * value,PCRE2_SPTR * last)1976*22dc650dSSadaf Ebrahimi decode_dollar_escape(PCRE2_SPTR begin, PCRE2_SPTR string, BOOL callout,
1977*22dc650dSSadaf Ebrahimi   uint32_t *value, PCRE2_SPTR *last)
1978*22dc650dSSadaf Ebrahimi {
1979*22dc650dSSadaf Ebrahimi uint32_t c = 0;
1980*22dc650dSSadaf Ebrahimi int base = 10;
1981*22dc650dSSadaf Ebrahimi int dcount;
1982*22dc650dSSadaf Ebrahimi int rc = DDE_CHAR;
1983*22dc650dSSadaf Ebrahimi BOOL brace = FALSE;
1984*22dc650dSSadaf Ebrahimi 
1985*22dc650dSSadaf Ebrahimi switch (*(++string))
1986*22dc650dSSadaf Ebrahimi   {
1987*22dc650dSSadaf Ebrahimi   case 0:   /* Syntax error: a character must be present after $. */
1988*22dc650dSSadaf Ebrahimi   if (!callout)
1989*22dc650dSSadaf Ebrahimi     fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
1990*22dc650dSSadaf Ebrahimi       (int)(string - begin), "no character after $");
1991*22dc650dSSadaf Ebrahimi   *last = string;
1992*22dc650dSSadaf Ebrahimi   return DDE_ERROR;
1993*22dc650dSSadaf Ebrahimi 
1994*22dc650dSSadaf Ebrahimi   case '{':
1995*22dc650dSSadaf Ebrahimi   brace = TRUE;
1996*22dc650dSSadaf Ebrahimi   string++;
1997*22dc650dSSadaf Ebrahimi   if (!isdigit((unsigned char)(*string)))  /* Syntax error: a decimal number required. */
1998*22dc650dSSadaf Ebrahimi     {
1999*22dc650dSSadaf Ebrahimi     if (!callout)
2000*22dc650dSSadaf Ebrahimi       fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
2001*22dc650dSSadaf Ebrahimi         (int)(string - begin), "decimal number expected");
2002*22dc650dSSadaf Ebrahimi     rc = DDE_ERROR;
2003*22dc650dSSadaf Ebrahimi     break;
2004*22dc650dSSadaf Ebrahimi     }
2005*22dc650dSSadaf Ebrahimi 
2006*22dc650dSSadaf Ebrahimi   /* Fall through */
2007*22dc650dSSadaf Ebrahimi 
2008*22dc650dSSadaf Ebrahimi   /* The maximum capture number is 65535, so any number greater than that will
2009*22dc650dSSadaf Ebrahimi   always be an unknown capture number. We just stop incrementing, in order to
2010*22dc650dSSadaf Ebrahimi   avoid overflow. */
2011*22dc650dSSadaf Ebrahimi 
2012*22dc650dSSadaf Ebrahimi   case '0': case '1': case '2': case '3': case '4':
2013*22dc650dSSadaf Ebrahimi   case '5': case '6': case '7': case '8': case '9':
2014*22dc650dSSadaf Ebrahimi   do
2015*22dc650dSSadaf Ebrahimi     {
2016*22dc650dSSadaf Ebrahimi     if (c <= 65535) c = c * 10 + (*string - '0');
2017*22dc650dSSadaf Ebrahimi     string++;
2018*22dc650dSSadaf Ebrahimi     }
2019*22dc650dSSadaf Ebrahimi   while (*string >= '0' && *string <= '9');
2020*22dc650dSSadaf Ebrahimi   string--;  /* Point to last digit */
2021*22dc650dSSadaf Ebrahimi 
2022*22dc650dSSadaf Ebrahimi   /* In a callout, capture number 0 is not available. No error can be given,
2023*22dc650dSSadaf Ebrahimi   so just return the character '0'. */
2024*22dc650dSSadaf Ebrahimi 
2025*22dc650dSSadaf Ebrahimi   if (callout && c == 0)
2026*22dc650dSSadaf Ebrahimi     {
2027*22dc650dSSadaf Ebrahimi     *value = '0';
2028*22dc650dSSadaf Ebrahimi     }
2029*22dc650dSSadaf Ebrahimi   else
2030*22dc650dSSadaf Ebrahimi     {
2031*22dc650dSSadaf Ebrahimi     *value = c;
2032*22dc650dSSadaf Ebrahimi     rc = DDE_CAPTURE;
2033*22dc650dSSadaf Ebrahimi     }
2034*22dc650dSSadaf Ebrahimi   break;
2035*22dc650dSSadaf Ebrahimi 
2036*22dc650dSSadaf Ebrahimi   /* Limit octal numbers to 3 digits without braces, or up to 7 with braces,
2037*22dc650dSSadaf Ebrahimi   for valid Unicode code points. */
2038*22dc650dSSadaf Ebrahimi 
2039*22dc650dSSadaf Ebrahimi   case 'o':
2040*22dc650dSSadaf Ebrahimi   base = 8;
2041*22dc650dSSadaf Ebrahimi   string++;
2042*22dc650dSSadaf Ebrahimi   if (*string == '{')
2043*22dc650dSSadaf Ebrahimi     {
2044*22dc650dSSadaf Ebrahimi     brace = TRUE;
2045*22dc650dSSadaf Ebrahimi     string++;
2046*22dc650dSSadaf Ebrahimi     dcount = 7;
2047*22dc650dSSadaf Ebrahimi     }
2048*22dc650dSSadaf Ebrahimi   else dcount = 3;
2049*22dc650dSSadaf Ebrahimi   for (; dcount > 0; dcount--)
2050*22dc650dSSadaf Ebrahimi     {
2051*22dc650dSSadaf Ebrahimi     if (*string < '0' || *string > '7') break;
2052*22dc650dSSadaf Ebrahimi     c = c * 8 + (*string++ - '0');
2053*22dc650dSSadaf Ebrahimi     }
2054*22dc650dSSadaf Ebrahimi   *value = c;
2055*22dc650dSSadaf Ebrahimi   string--;  /* Point to last digit */
2056*22dc650dSSadaf Ebrahimi   break;
2057*22dc650dSSadaf Ebrahimi 
2058*22dc650dSSadaf Ebrahimi   /* Limit hex numbers to 2 digits without braces, or up to 6 with braces,
2059*22dc650dSSadaf Ebrahimi   for valid Unicode code points. */
2060*22dc650dSSadaf Ebrahimi 
2061*22dc650dSSadaf Ebrahimi   case 'x':
2062*22dc650dSSadaf Ebrahimi   base = 16;
2063*22dc650dSSadaf Ebrahimi   string++;
2064*22dc650dSSadaf Ebrahimi   if (*string == '{')
2065*22dc650dSSadaf Ebrahimi     {
2066*22dc650dSSadaf Ebrahimi     brace = TRUE;
2067*22dc650dSSadaf Ebrahimi     string++;
2068*22dc650dSSadaf Ebrahimi     dcount = 6;
2069*22dc650dSSadaf Ebrahimi     }
2070*22dc650dSSadaf Ebrahimi   else dcount = 2;
2071*22dc650dSSadaf Ebrahimi   for (; dcount > 0; dcount--)
2072*22dc650dSSadaf Ebrahimi     {
2073*22dc650dSSadaf Ebrahimi     if (!isxdigit(*string)) break;
2074*22dc650dSSadaf Ebrahimi     if (*string >= '0' && *string <= '9')
2075*22dc650dSSadaf Ebrahimi       c = c *16 + *string++ - '0';
2076*22dc650dSSadaf Ebrahimi     else
2077*22dc650dSSadaf Ebrahimi       c = c * 16 + (*string++ | 0x20) - 'a' + 10;
2078*22dc650dSSadaf Ebrahimi     }
2079*22dc650dSSadaf Ebrahimi   *value = c;
2080*22dc650dSSadaf Ebrahimi   string--;  /* Point to last digit */
2081*22dc650dSSadaf Ebrahimi   break;
2082*22dc650dSSadaf Ebrahimi 
2083*22dc650dSSadaf Ebrahimi   case 'a': *value = '\a'; break;
2084*22dc650dSSadaf Ebrahimi   case 'b': *value = '\b'; break;
2085*22dc650dSSadaf Ebrahimi #ifndef EBCDIC
2086*22dc650dSSadaf Ebrahimi   case 'e': *value = '\033'; break;
2087*22dc650dSSadaf Ebrahimi #else
2088*22dc650dSSadaf Ebrahimi   case 'e': *value = '\047'; break;
2089*22dc650dSSadaf Ebrahimi #endif
2090*22dc650dSSadaf Ebrahimi   case 'f': *value = '\f'; break;
2091*22dc650dSSadaf Ebrahimi   case 'n': *value = STDOUT_NL_CODE; break;
2092*22dc650dSSadaf Ebrahimi   case 'r': *value = '\r'; break;
2093*22dc650dSSadaf Ebrahimi   case 't': *value = '\t'; break;
2094*22dc650dSSadaf Ebrahimi   case 'v': *value = '\v'; break;
2095*22dc650dSSadaf Ebrahimi 
2096*22dc650dSSadaf Ebrahimi   default: *value = *string; break;
2097*22dc650dSSadaf Ebrahimi   }
2098*22dc650dSSadaf Ebrahimi 
2099*22dc650dSSadaf Ebrahimi if (brace)
2100*22dc650dSSadaf Ebrahimi   {
2101*22dc650dSSadaf Ebrahimi   c = string[1];
2102*22dc650dSSadaf Ebrahimi   if (c != '}')
2103*22dc650dSSadaf Ebrahimi     {
2104*22dc650dSSadaf Ebrahimi     rc = DDE_ERROR;
2105*22dc650dSSadaf Ebrahimi     if (!callout)
2106*22dc650dSSadaf Ebrahimi       {
2107*22dc650dSSadaf Ebrahimi       if ((base == 8 && c >= '0' && c <= '7') ||
2108*22dc650dSSadaf Ebrahimi           (base == 16 && isxdigit(c)))
2109*22dc650dSSadaf Ebrahimi         {
2110*22dc650dSSadaf Ebrahimi         fprintf(stderr, "pcre2grep: Error in output text at offset %d: "
2111*22dc650dSSadaf Ebrahimi           "too many %s digits\n", (int)(string - begin),
2112*22dc650dSSadaf Ebrahimi           (base == 8)? "octal" : "hex");
2113*22dc650dSSadaf Ebrahimi         }
2114*22dc650dSSadaf Ebrahimi       else
2115*22dc650dSSadaf Ebrahimi         {
2116*22dc650dSSadaf Ebrahimi         fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
2117*22dc650dSSadaf Ebrahimi           (int)(string - begin), "missing closing brace");
2118*22dc650dSSadaf Ebrahimi         }
2119*22dc650dSSadaf Ebrahimi       }
2120*22dc650dSSadaf Ebrahimi     }
2121*22dc650dSSadaf Ebrahimi   else string++;
2122*22dc650dSSadaf Ebrahimi   }
2123*22dc650dSSadaf Ebrahimi 
2124*22dc650dSSadaf Ebrahimi /* Check maximum code point values, but take note of STDOUT_NL_CODE. */
2125*22dc650dSSadaf Ebrahimi 
2126*22dc650dSSadaf Ebrahimi if (rc == DDE_CHAR && *value != STDOUT_NL_CODE)
2127*22dc650dSSadaf Ebrahimi   {
2128*22dc650dSSadaf Ebrahimi   uint32_t max = utf? 0x0010ffffu : 0xffu;
2129*22dc650dSSadaf Ebrahimi   if (*value > max)
2130*22dc650dSSadaf Ebrahimi     {
2131*22dc650dSSadaf Ebrahimi     if (!callout)
2132*22dc650dSSadaf Ebrahimi       fprintf(stderr, "pcre2grep: Error in output text at offset %d: "
2133*22dc650dSSadaf Ebrahimi         "code point greater than 0x%x is invalid\n", (int)(string - begin), max);
2134*22dc650dSSadaf Ebrahimi     rc = DDE_ERROR;
2135*22dc650dSSadaf Ebrahimi     }
2136*22dc650dSSadaf Ebrahimi   }
2137*22dc650dSSadaf Ebrahimi 
2138*22dc650dSSadaf Ebrahimi *last = string;
2139*22dc650dSSadaf Ebrahimi return rc;
2140*22dc650dSSadaf Ebrahimi }
2141*22dc650dSSadaf Ebrahimi 
2142*22dc650dSSadaf Ebrahimi 
2143*22dc650dSSadaf Ebrahimi 
2144*22dc650dSSadaf Ebrahimi /*************************************************
2145*22dc650dSSadaf Ebrahimi *          Check output text for errors          *
2146*22dc650dSSadaf Ebrahimi *************************************************/
2147*22dc650dSSadaf Ebrahimi 
2148*22dc650dSSadaf Ebrahimi /* Called early, to get errors before doing anything for -O text; also called
2149*22dc650dSSadaf Ebrahimi from callouts to check before outputting.
2150*22dc650dSSadaf Ebrahimi 
2151*22dc650dSSadaf Ebrahimi Arguments:
2152*22dc650dSSadaf Ebrahimi   string    an --output text string
2153*22dc650dSSadaf Ebrahimi   callout   TRUE if in a callout (stops printing errors)
2154*22dc650dSSadaf Ebrahimi 
2155*22dc650dSSadaf Ebrahimi Returns:    TRUE if OK, FALSE on error
2156*22dc650dSSadaf Ebrahimi */
2157*22dc650dSSadaf Ebrahimi 
2158*22dc650dSSadaf Ebrahimi static BOOL
syntax_check_output_text(PCRE2_SPTR string,BOOL callout)2159*22dc650dSSadaf Ebrahimi syntax_check_output_text(PCRE2_SPTR string, BOOL callout)
2160*22dc650dSSadaf Ebrahimi {
2161*22dc650dSSadaf Ebrahimi uint32_t value;
2162*22dc650dSSadaf Ebrahimi PCRE2_SPTR begin = string;
2163*22dc650dSSadaf Ebrahimi 
2164*22dc650dSSadaf Ebrahimi for (; *string != 0; string++)
2165*22dc650dSSadaf Ebrahimi   {
2166*22dc650dSSadaf Ebrahimi   if (*string == '$' &&
2167*22dc650dSSadaf Ebrahimi     decode_dollar_escape(begin, string, callout, &value, &string) == DDE_ERROR)
2168*22dc650dSSadaf Ebrahimi       return FALSE;
2169*22dc650dSSadaf Ebrahimi   }
2170*22dc650dSSadaf Ebrahimi 
2171*22dc650dSSadaf Ebrahimi return TRUE;
2172*22dc650dSSadaf Ebrahimi }
2173*22dc650dSSadaf Ebrahimi 
2174*22dc650dSSadaf Ebrahimi 
2175*22dc650dSSadaf Ebrahimi /*************************************************
2176*22dc650dSSadaf Ebrahimi *              Display output text               *
2177*22dc650dSSadaf Ebrahimi *************************************************/
2178*22dc650dSSadaf Ebrahimi 
2179*22dc650dSSadaf Ebrahimi /* Display the output text, which is assumed to have already been syntax
2180*22dc650dSSadaf Ebrahimi checked. Output may contain escape sequences started by the dollar sign.
2181*22dc650dSSadaf Ebrahimi 
2182*22dc650dSSadaf Ebrahimi Arguments:
2183*22dc650dSSadaf Ebrahimi   string:       the output text
2184*22dc650dSSadaf Ebrahimi   callout:      TRUE for the builtin callout, FALSE for --output
2185*22dc650dSSadaf Ebrahimi   subject       the start of the subject
2186*22dc650dSSadaf Ebrahimi   ovector:      capture offsets
2187*22dc650dSSadaf Ebrahimi   capture_top:  number of captures
2188*22dc650dSSadaf Ebrahimi 
2189*22dc650dSSadaf Ebrahimi Returns:        TRUE if something was output, other than newline
2190*22dc650dSSadaf Ebrahimi                 FALSE if nothing was output, or newline was last output
2191*22dc650dSSadaf Ebrahimi */
2192*22dc650dSSadaf Ebrahimi 
2193*22dc650dSSadaf Ebrahimi static BOOL
display_output_text(PCRE2_SPTR string,BOOL callout,PCRE2_SPTR subject,PCRE2_SIZE * ovector,PCRE2_SIZE capture_top)2194*22dc650dSSadaf Ebrahimi display_output_text(PCRE2_SPTR string, BOOL callout, PCRE2_SPTR subject,
2195*22dc650dSSadaf Ebrahimi   PCRE2_SIZE *ovector, PCRE2_SIZE capture_top)
2196*22dc650dSSadaf Ebrahimi {
2197*22dc650dSSadaf Ebrahimi uint32_t value;
2198*22dc650dSSadaf Ebrahimi BOOL printed = FALSE;
2199*22dc650dSSadaf Ebrahimi PCRE2_SPTR begin = string;
2200*22dc650dSSadaf Ebrahimi 
2201*22dc650dSSadaf Ebrahimi for (; *string != 0; string++)
2202*22dc650dSSadaf Ebrahimi   {
2203*22dc650dSSadaf Ebrahimi   if (*string == '$')
2204*22dc650dSSadaf Ebrahimi     {
2205*22dc650dSSadaf Ebrahimi     switch(decode_dollar_escape(begin, string, callout, &value, &string))
2206*22dc650dSSadaf Ebrahimi       {
2207*22dc650dSSadaf Ebrahimi       case DDE_CHAR:
2208*22dc650dSSadaf Ebrahimi       if (value == STDOUT_NL_CODE)
2209*22dc650dSSadaf Ebrahimi         {
2210*22dc650dSSadaf Ebrahimi         fprintf(stdout, STDOUT_NL);
2211*22dc650dSSadaf Ebrahimi         printed = FALSE;
2212*22dc650dSSadaf Ebrahimi         continue;
2213*22dc650dSSadaf Ebrahimi         }
2214*22dc650dSSadaf Ebrahimi       break;  /* Will print value */
2215*22dc650dSSadaf Ebrahimi 
2216*22dc650dSSadaf Ebrahimi       case DDE_CAPTURE:
2217*22dc650dSSadaf Ebrahimi       if (value < capture_top)
2218*22dc650dSSadaf Ebrahimi         {
2219*22dc650dSSadaf Ebrahimi         PCRE2_SIZE capturesize;
2220*22dc650dSSadaf Ebrahimi         value *= 2;
2221*22dc650dSSadaf Ebrahimi         capturesize = ovector[value + 1] - ovector[value];
2222*22dc650dSSadaf Ebrahimi         if (capturesize > 0)
2223*22dc650dSSadaf Ebrahimi           {
2224*22dc650dSSadaf Ebrahimi           print_match(subject + ovector[value], capturesize);
2225*22dc650dSSadaf Ebrahimi           printed = TRUE;
2226*22dc650dSSadaf Ebrahimi           }
2227*22dc650dSSadaf Ebrahimi         }
2228*22dc650dSSadaf Ebrahimi       continue;
2229*22dc650dSSadaf Ebrahimi 
2230*22dc650dSSadaf Ebrahimi       /* LCOV_EXCL_START */
2231*22dc650dSSadaf Ebrahimi       default:  /* Should not occur */
2232*22dc650dSSadaf Ebrahimi       break;
2233*22dc650dSSadaf Ebrahimi       /* LCOV_EXCL_STOP */
2234*22dc650dSSadaf Ebrahimi       }
2235*22dc650dSSadaf Ebrahimi     }
2236*22dc650dSSadaf Ebrahimi 
2237*22dc650dSSadaf Ebrahimi   else value = *string;  /* Not a $ escape */
2238*22dc650dSSadaf Ebrahimi 
2239*22dc650dSSadaf Ebrahimi   if (!utf || value <= 127) fprintf(stdout, "%c", value); else
2240*22dc650dSSadaf Ebrahimi     {
2241*22dc650dSSadaf Ebrahimi     int n = ord2utf8(value);
2242*22dc650dSSadaf Ebrahimi     for (int i = 0; i < n; i++) fputc(utf8_buffer[i], stdout);
2243*22dc650dSSadaf Ebrahimi     }
2244*22dc650dSSadaf Ebrahimi 
2245*22dc650dSSadaf Ebrahimi   printed = TRUE;
2246*22dc650dSSadaf Ebrahimi   }
2247*22dc650dSSadaf Ebrahimi 
2248*22dc650dSSadaf Ebrahimi return printed;
2249*22dc650dSSadaf Ebrahimi }
2250*22dc650dSSadaf Ebrahimi 
2251*22dc650dSSadaf Ebrahimi 
2252*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_PCRE2GREP_CALLOUT
2253*22dc650dSSadaf Ebrahimi 
2254*22dc650dSSadaf Ebrahimi /*************************************************
2255*22dc650dSSadaf Ebrahimi *        Parse and execute callout scripts       *
2256*22dc650dSSadaf Ebrahimi *************************************************/
2257*22dc650dSSadaf Ebrahimi 
2258*22dc650dSSadaf Ebrahimi /* If SUPPORT_PCRE2GREP_CALLOUT_FORK is defined, this function parses a callout
2259*22dc650dSSadaf Ebrahimi string block and executes the program specified by the string. The string is a
2260*22dc650dSSadaf Ebrahimi list of substrings separated by pipe characters. The first substring represents
2261*22dc650dSSadaf Ebrahimi the executable name, and the following substrings specify the arguments:
2262*22dc650dSSadaf Ebrahimi 
2263*22dc650dSSadaf Ebrahimi   program_name|param1|param2|...
2264*22dc650dSSadaf Ebrahimi 
2265*22dc650dSSadaf Ebrahimi Any substring (including the program name) can contain escape sequences
2266*22dc650dSSadaf Ebrahimi started by the dollar character. The escape sequences are substituted as
2267*22dc650dSSadaf Ebrahimi follows:
2268*22dc650dSSadaf Ebrahimi 
2269*22dc650dSSadaf Ebrahimi   $<digits> or ${<digits>} is replaced by the captured substring of the given
2270*22dc650dSSadaf Ebrahimi   decimal number, which must be greater than zero. If the number is greater
2271*22dc650dSSadaf Ebrahimi   than the number of capturing substrings, or if the capture is unset, the
2272*22dc650dSSadaf Ebrahimi   replacement is empty.
2273*22dc650dSSadaf Ebrahimi 
2274*22dc650dSSadaf Ebrahimi   Any other character is substituted by itself. E.g: $$ is replaced by a single
2275*22dc650dSSadaf Ebrahimi   dollar or $| replaced by a pipe character.
2276*22dc650dSSadaf Ebrahimi 
2277*22dc650dSSadaf Ebrahimi Alternatively, if string starts with pipe, the remainder is taken as an output
2278*22dc650dSSadaf Ebrahimi string, same as --output. This is the only form that is supported if
2279*22dc650dSSadaf Ebrahimi SUPPORT_PCRE2GREP_FORK is not defined. In this case, --om-separator is used to
2280*22dc650dSSadaf Ebrahimi separate each callout, defaulting to newline.
2281*22dc650dSSadaf Ebrahimi 
2282*22dc650dSSadaf Ebrahimi Example:
2283*22dc650dSSadaf Ebrahimi 
2284*22dc650dSSadaf Ebrahimi   echo -e "abcde\n12345" | pcre2grep \
2285*22dc650dSSadaf Ebrahimi     '(.)(..(.))(?C"/bin/echo|Arg1: [$1] [$2] [$3]|Arg2: $|${1}$| ($4)")()' -
2286*22dc650dSSadaf Ebrahimi 
2287*22dc650dSSadaf Ebrahimi   Output:
2288*22dc650dSSadaf Ebrahimi 
2289*22dc650dSSadaf Ebrahimi     Arg1: [a] [bcd] [d] Arg2: |a| ()
2290*22dc650dSSadaf Ebrahimi     abcde
2291*22dc650dSSadaf Ebrahimi     Arg1: [1] [234] [4] Arg2: |1| ()
2292*22dc650dSSadaf Ebrahimi     12345
2293*22dc650dSSadaf Ebrahimi 
2294*22dc650dSSadaf Ebrahimi Arguments:
2295*22dc650dSSadaf Ebrahimi   blockptr     the callout block
2296*22dc650dSSadaf Ebrahimi 
2297*22dc650dSSadaf Ebrahimi Returns:       currently it always returns with 0
2298*22dc650dSSadaf Ebrahimi */
2299*22dc650dSSadaf Ebrahimi 
2300*22dc650dSSadaf Ebrahimi static int
pcre2grep_callout(pcre2_callout_block * calloutptr,void * unused)2301*22dc650dSSadaf Ebrahimi pcre2grep_callout(pcre2_callout_block *calloutptr, void *unused)
2302*22dc650dSSadaf Ebrahimi {
2303*22dc650dSSadaf Ebrahimi PCRE2_SIZE length = calloutptr->callout_string_length;
2304*22dc650dSSadaf Ebrahimi PCRE2_SPTR string = calloutptr->callout_string;
2305*22dc650dSSadaf Ebrahimi PCRE2_SPTR subject = calloutptr->subject;
2306*22dc650dSSadaf Ebrahimi PCRE2_SIZE *ovector = calloutptr->offset_vector;
2307*22dc650dSSadaf Ebrahimi PCRE2_SIZE capture_top = calloutptr->capture_top;
2308*22dc650dSSadaf Ebrahimi 
2309*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_PCRE2GREP_CALLOUT_FORK
2310*22dc650dSSadaf Ebrahimi PCRE2_SIZE argsvectorlen = 2;
2311*22dc650dSSadaf Ebrahimi PCRE2_SIZE argslen = 1;
2312*22dc650dSSadaf Ebrahimi char *args;
2313*22dc650dSSadaf Ebrahimi char *argsptr;
2314*22dc650dSSadaf Ebrahimi char **argsvector;
2315*22dc650dSSadaf Ebrahimi char **argsvectorptr;
2316*22dc650dSSadaf Ebrahimi #ifndef WIN32
2317*22dc650dSSadaf Ebrahimi pid_t pid;
2318*22dc650dSSadaf Ebrahimi #endif
2319*22dc650dSSadaf Ebrahimi int result = 0;
2320*22dc650dSSadaf Ebrahimi #endif  /* SUPPORT_PCRE2GREP_CALLOUT_FORK */
2321*22dc650dSSadaf Ebrahimi 
2322*22dc650dSSadaf Ebrahimi (void)unused;   /* Avoid compiler warning */
2323*22dc650dSSadaf Ebrahimi 
2324*22dc650dSSadaf Ebrahimi /* Only callouts with strings are supported. */
2325*22dc650dSSadaf Ebrahimi 
2326*22dc650dSSadaf Ebrahimi if (string == NULL || length == 0) return 0;
2327*22dc650dSSadaf Ebrahimi 
2328*22dc650dSSadaf Ebrahimi /* If there's no command, output the remainder directly. */
2329*22dc650dSSadaf Ebrahimi 
2330*22dc650dSSadaf Ebrahimi if (*string == '|')
2331*22dc650dSSadaf Ebrahimi   {
2332*22dc650dSSadaf Ebrahimi   string++;
2333*22dc650dSSadaf Ebrahimi   if (!syntax_check_output_text(string, TRUE)) return 0;
2334*22dc650dSSadaf Ebrahimi   (void)display_output_text(string, TRUE, subject, ovector, capture_top);
2335*22dc650dSSadaf Ebrahimi   return 0;
2336*22dc650dSSadaf Ebrahimi   }
2337*22dc650dSSadaf Ebrahimi 
2338*22dc650dSSadaf Ebrahimi #ifndef SUPPORT_PCRE2GREP_CALLOUT_FORK
2339*22dc650dSSadaf Ebrahimi return 0;
2340*22dc650dSSadaf Ebrahimi #else
2341*22dc650dSSadaf Ebrahimi 
2342*22dc650dSSadaf Ebrahimi /* Checking syntax and compute the number of string fragments. Callout strings
2343*22dc650dSSadaf Ebrahimi are silently ignored in the event of a syntax error. */
2344*22dc650dSSadaf Ebrahimi 
2345*22dc650dSSadaf Ebrahimi while (length > 0)
2346*22dc650dSSadaf Ebrahimi   {
2347*22dc650dSSadaf Ebrahimi   if (*string == '|')
2348*22dc650dSSadaf Ebrahimi     {
2349*22dc650dSSadaf Ebrahimi     argsvectorlen++;
2350*22dc650dSSadaf Ebrahimi     if (argsvectorlen > 10000) return 0;  /* Too many args */
2351*22dc650dSSadaf Ebrahimi     }
2352*22dc650dSSadaf Ebrahimi 
2353*22dc650dSSadaf Ebrahimi   else if (*string == '$')
2354*22dc650dSSadaf Ebrahimi     {
2355*22dc650dSSadaf Ebrahimi     uint32_t value;
2356*22dc650dSSadaf Ebrahimi     PCRE2_SPTR begin = string;
2357*22dc650dSSadaf Ebrahimi 
2358*22dc650dSSadaf Ebrahimi     switch (decode_dollar_escape(begin, string, TRUE, &value, &string))
2359*22dc650dSSadaf Ebrahimi       {
2360*22dc650dSSadaf Ebrahimi       case DDE_CAPTURE:
2361*22dc650dSSadaf Ebrahimi       if (value < capture_top)
2362*22dc650dSSadaf Ebrahimi         {
2363*22dc650dSSadaf Ebrahimi         value *= 2;
2364*22dc650dSSadaf Ebrahimi         argslen += ovector[value + 1] - ovector[value];
2365*22dc650dSSadaf Ebrahimi         }
2366*22dc650dSSadaf Ebrahimi       argslen--;   /* Negate the effect of argslen++ below. */
2367*22dc650dSSadaf Ebrahimi       break;
2368*22dc650dSSadaf Ebrahimi 
2369*22dc650dSSadaf Ebrahimi       case DDE_CHAR:
2370*22dc650dSSadaf Ebrahimi       if (value == STDOUT_NL_CODE) argslen += STDOUT_NL_LEN - 1;
2371*22dc650dSSadaf Ebrahimi         else if (utf && value > 127) argslen += ord2utf8(value) - 1;
2372*22dc650dSSadaf Ebrahimi       break;
2373*22dc650dSSadaf Ebrahimi 
2374*22dc650dSSadaf Ebrahimi       /* LCOV_EXCL_START */
2375*22dc650dSSadaf Ebrahimi       default:         /* Should not occur */
2376*22dc650dSSadaf Ebrahimi       case DDE_ERROR:
2377*22dc650dSSadaf Ebrahimi       return 0;
2378*22dc650dSSadaf Ebrahimi       /* LCOV_EXCL_STOP */
2379*22dc650dSSadaf Ebrahimi       }
2380*22dc650dSSadaf Ebrahimi 
2381*22dc650dSSadaf Ebrahimi     length -= (string - begin);
2382*22dc650dSSadaf Ebrahimi     }
2383*22dc650dSSadaf Ebrahimi 
2384*22dc650dSSadaf Ebrahimi   string++;
2385*22dc650dSSadaf Ebrahimi   length--;
2386*22dc650dSSadaf Ebrahimi   argslen++;
2387*22dc650dSSadaf Ebrahimi   }
2388*22dc650dSSadaf Ebrahimi 
2389*22dc650dSSadaf Ebrahimi /* Get memory for the argument vector and its strings. */
2390*22dc650dSSadaf Ebrahimi 
2391*22dc650dSSadaf Ebrahimi args = (char*)malloc(argslen);
2392*22dc650dSSadaf Ebrahimi if (args == NULL) return 0;
2393*22dc650dSSadaf Ebrahimi 
2394*22dc650dSSadaf Ebrahimi argsvector = (char**)malloc(argsvectorlen * sizeof(char*));
2395*22dc650dSSadaf Ebrahimi if (argsvector == NULL)
2396*22dc650dSSadaf Ebrahimi   {
2397*22dc650dSSadaf Ebrahimi   /* LCOV_EXCL_START */
2398*22dc650dSSadaf Ebrahimi   free(args);
2399*22dc650dSSadaf Ebrahimi   return 0;
2400*22dc650dSSadaf Ebrahimi   /* LCOV_EXCL_STOP */
2401*22dc650dSSadaf Ebrahimi   }
2402*22dc650dSSadaf Ebrahimi 
2403*22dc650dSSadaf Ebrahimi /* Now reprocess the string and set up the arguments. */
2404*22dc650dSSadaf Ebrahimi 
2405*22dc650dSSadaf Ebrahimi argsptr = args;
2406*22dc650dSSadaf Ebrahimi argsvectorptr = argsvector;
2407*22dc650dSSadaf Ebrahimi *argsvectorptr++ = argsptr;
2408*22dc650dSSadaf Ebrahimi 
2409*22dc650dSSadaf Ebrahimi length = calloutptr->callout_string_length;
2410*22dc650dSSadaf Ebrahimi string = calloutptr->callout_string;
2411*22dc650dSSadaf Ebrahimi 
2412*22dc650dSSadaf Ebrahimi while (length > 0)
2413*22dc650dSSadaf Ebrahimi   {
2414*22dc650dSSadaf Ebrahimi   if (*string == '|')
2415*22dc650dSSadaf Ebrahimi     {
2416*22dc650dSSadaf Ebrahimi     *argsptr++ = '\0';
2417*22dc650dSSadaf Ebrahimi     *argsvectorptr++ = argsptr;
2418*22dc650dSSadaf Ebrahimi     }
2419*22dc650dSSadaf Ebrahimi 
2420*22dc650dSSadaf Ebrahimi   else if (*string == '$')
2421*22dc650dSSadaf Ebrahimi     {
2422*22dc650dSSadaf Ebrahimi     uint32_t value;
2423*22dc650dSSadaf Ebrahimi     PCRE2_SPTR begin = string;
2424*22dc650dSSadaf Ebrahimi 
2425*22dc650dSSadaf Ebrahimi     switch (decode_dollar_escape(begin, string, TRUE, &value, &string))
2426*22dc650dSSadaf Ebrahimi       {
2427*22dc650dSSadaf Ebrahimi       case DDE_CAPTURE:
2428*22dc650dSSadaf Ebrahimi       if (value < capture_top)
2429*22dc650dSSadaf Ebrahimi         {
2430*22dc650dSSadaf Ebrahimi         PCRE2_SIZE capturesize;
2431*22dc650dSSadaf Ebrahimi         value *= 2;
2432*22dc650dSSadaf Ebrahimi         capturesize = ovector[value + 1] - ovector[value];
2433*22dc650dSSadaf Ebrahimi         memcpy(argsptr, subject + ovector[value], capturesize);
2434*22dc650dSSadaf Ebrahimi         argsptr += capturesize;
2435*22dc650dSSadaf Ebrahimi         }
2436*22dc650dSSadaf Ebrahimi       break;
2437*22dc650dSSadaf Ebrahimi 
2438*22dc650dSSadaf Ebrahimi       case DDE_CHAR:
2439*22dc650dSSadaf Ebrahimi       if (value == STDOUT_NL_CODE)
2440*22dc650dSSadaf Ebrahimi         {
2441*22dc650dSSadaf Ebrahimi         memcpy(argsptr, STDOUT_NL, STDOUT_NL_LEN);
2442*22dc650dSSadaf Ebrahimi         argsptr += STDOUT_NL_LEN;
2443*22dc650dSSadaf Ebrahimi         }
2444*22dc650dSSadaf Ebrahimi       else if (utf && value > 127)
2445*22dc650dSSadaf Ebrahimi         {
2446*22dc650dSSadaf Ebrahimi         int n = ord2utf8(value);
2447*22dc650dSSadaf Ebrahimi         memcpy(argsptr, utf8_buffer, n);
2448*22dc650dSSadaf Ebrahimi         argsptr += n;
2449*22dc650dSSadaf Ebrahimi         }
2450*22dc650dSSadaf Ebrahimi       else
2451*22dc650dSSadaf Ebrahimi         {
2452*22dc650dSSadaf Ebrahimi         *argsptr++ = value;
2453*22dc650dSSadaf Ebrahimi         }
2454*22dc650dSSadaf Ebrahimi       break;
2455*22dc650dSSadaf Ebrahimi 
2456*22dc650dSSadaf Ebrahimi       /* LCOV_EXCL_START */
2457*22dc650dSSadaf Ebrahimi       default:         /* Even though this should not occur, the string having */
2458*22dc650dSSadaf Ebrahimi       case DDE_ERROR:  /* been checked above, we need to include the free() */
2459*22dc650dSSadaf Ebrahimi       free(args);      /* calls so that source checkers do not complain. */
2460*22dc650dSSadaf Ebrahimi       free(argsvector);
2461*22dc650dSSadaf Ebrahimi       return 0;
2462*22dc650dSSadaf Ebrahimi       /* LCOV_EXCL_STOP */
2463*22dc650dSSadaf Ebrahimi       }
2464*22dc650dSSadaf Ebrahimi 
2465*22dc650dSSadaf Ebrahimi     length -= (string - begin);
2466*22dc650dSSadaf Ebrahimi     }
2467*22dc650dSSadaf Ebrahimi 
2468*22dc650dSSadaf Ebrahimi   else *argsptr++ = *string;
2469*22dc650dSSadaf Ebrahimi 
2470*22dc650dSSadaf Ebrahimi   /* Advance along the string */
2471*22dc650dSSadaf Ebrahimi 
2472*22dc650dSSadaf Ebrahimi   string++;
2473*22dc650dSSadaf Ebrahimi   length--;
2474*22dc650dSSadaf Ebrahimi   }
2475*22dc650dSSadaf Ebrahimi 
2476*22dc650dSSadaf Ebrahimi *argsptr++ = '\0';
2477*22dc650dSSadaf Ebrahimi *argsvectorptr = NULL;
2478*22dc650dSSadaf Ebrahimi 
2479*22dc650dSSadaf Ebrahimi /* Running an external command is system-dependent. Handle Windows and VMS as
2480*22dc650dSSadaf Ebrahimi necessary, otherwise assume fork(). */
2481*22dc650dSSadaf Ebrahimi 
2482*22dc650dSSadaf Ebrahimi #ifdef WIN32
2483*22dc650dSSadaf Ebrahimi result = _spawnvp(_P_WAIT, argsvector[0], (const char * const *)argsvector);
2484*22dc650dSSadaf Ebrahimi 
2485*22dc650dSSadaf Ebrahimi #elif defined __VMS
2486*22dc650dSSadaf Ebrahimi   {
2487*22dc650dSSadaf Ebrahimi   char cmdbuf[500];
2488*22dc650dSSadaf Ebrahimi   short i = 0;
2489*22dc650dSSadaf Ebrahimi   int flags = CLI$M_NOCLISYM|CLI$M_NOLOGNAM|CLI$M_NOKEYPAD, status, retstat;
2490*22dc650dSSadaf Ebrahimi   $DESCRIPTOR(cmd, cmdbuf);
2491*22dc650dSSadaf Ebrahimi 
2492*22dc650dSSadaf Ebrahimi   cmdbuf[0] = 0;
2493*22dc650dSSadaf Ebrahimi   while (argsvector[i])
2494*22dc650dSSadaf Ebrahimi   {
2495*22dc650dSSadaf Ebrahimi     strcat(cmdbuf, argsvector[i]);
2496*22dc650dSSadaf Ebrahimi     strcat(cmdbuf, " ");
2497*22dc650dSSadaf Ebrahimi     i++;
2498*22dc650dSSadaf Ebrahimi   }
2499*22dc650dSSadaf Ebrahimi   cmd.dsc$w_length = strlen(cmdbuf) - 1;
2500*22dc650dSSadaf Ebrahimi   status = lib$spawn(&cmd, 0,0, &flags, 0,0, &retstat);
2501*22dc650dSSadaf Ebrahimi   if (!(status & 1)) result = 0;
2502*22dc650dSSadaf Ebrahimi   else result = retstat & 1 ? 0 : 1;
2503*22dc650dSSadaf Ebrahimi   }
2504*22dc650dSSadaf Ebrahimi 
2505*22dc650dSSadaf Ebrahimi #else  /* Neither Windows nor VMS */
2506*22dc650dSSadaf Ebrahimi pid = fork();
2507*22dc650dSSadaf Ebrahimi if (pid == 0)
2508*22dc650dSSadaf Ebrahimi   {
2509*22dc650dSSadaf Ebrahimi   (void)execv(argsvector[0], argsvector);
2510*22dc650dSSadaf Ebrahimi   /* Control gets here if there is an error, e.g. a non-existent program */
2511*22dc650dSSadaf Ebrahimi   exit(1);
2512*22dc650dSSadaf Ebrahimi   }
2513*22dc650dSSadaf Ebrahimi else if (pid > 0)
2514*22dc650dSSadaf Ebrahimi   {
2515*22dc650dSSadaf Ebrahimi   (void)fflush(stdout);
2516*22dc650dSSadaf Ebrahimi   (void)waitpid(pid, &result, 0);
2517*22dc650dSSadaf Ebrahimi   (void)fflush(stdout);
2518*22dc650dSSadaf Ebrahimi   }
2519*22dc650dSSadaf Ebrahimi #endif  /* End Windows/VMS/other handling */
2520*22dc650dSSadaf Ebrahimi 
2521*22dc650dSSadaf Ebrahimi free(args);
2522*22dc650dSSadaf Ebrahimi free(argsvector);
2523*22dc650dSSadaf Ebrahimi 
2524*22dc650dSSadaf Ebrahimi /* Currently negative return values are not supported, only zero (match
2525*22dc650dSSadaf Ebrahimi continues) or non-zero (match fails). */
2526*22dc650dSSadaf Ebrahimi 
2527*22dc650dSSadaf Ebrahimi return result != 0;
2528*22dc650dSSadaf Ebrahimi #endif  /* SUPPORT_PCRE2GREP_CALLOUT_FORK */
2529*22dc650dSSadaf Ebrahimi }
2530*22dc650dSSadaf Ebrahimi #endif  /* SUPPORT_PCRE2GREP_CALLOUT */
2531*22dc650dSSadaf Ebrahimi 
2532*22dc650dSSadaf Ebrahimi 
2533*22dc650dSSadaf Ebrahimi 
2534*22dc650dSSadaf Ebrahimi /*************************************************
2535*22dc650dSSadaf Ebrahimi *     Read a portion of the file into buffer     *
2536*22dc650dSSadaf Ebrahimi *************************************************/
2537*22dc650dSSadaf Ebrahimi 
2538*22dc650dSSadaf Ebrahimi static PCRE2_SIZE
fill_buffer(void * handle,int frtype,char * buffer,PCRE2_SIZE length,BOOL input_line_buffered)2539*22dc650dSSadaf Ebrahimi fill_buffer(void *handle, int frtype, char *buffer, PCRE2_SIZE length,
2540*22dc650dSSadaf Ebrahimi   BOOL input_line_buffered)
2541*22dc650dSSadaf Ebrahimi {
2542*22dc650dSSadaf Ebrahimi (void)frtype;  /* Avoid warning when not used */
2543*22dc650dSSadaf Ebrahimi 
2544*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_LIBZ
2545*22dc650dSSadaf Ebrahimi if (frtype == FR_LIBZ)
2546*22dc650dSSadaf Ebrahimi   return gzread((gzFile)handle, buffer, length);
2547*22dc650dSSadaf Ebrahimi else
2548*22dc650dSSadaf Ebrahimi #endif
2549*22dc650dSSadaf Ebrahimi 
2550*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_LIBBZ2
2551*22dc650dSSadaf Ebrahimi if (frtype == FR_LIBBZ2)
2552*22dc650dSSadaf Ebrahimi   return (PCRE2_SIZE)BZ2_bzread((BZFILE *)handle, buffer, length);
2553*22dc650dSSadaf Ebrahimi else
2554*22dc650dSSadaf Ebrahimi #endif
2555*22dc650dSSadaf Ebrahimi 
2556*22dc650dSSadaf Ebrahimi return (input_line_buffered ?
2557*22dc650dSSadaf Ebrahimi   read_one_line(buffer, length, (FILE *)handle) :
2558*22dc650dSSadaf Ebrahimi   fread(buffer, 1, length, (FILE *)handle));
2559*22dc650dSSadaf Ebrahimi }
2560*22dc650dSSadaf Ebrahimi 
2561*22dc650dSSadaf Ebrahimi 
2562*22dc650dSSadaf Ebrahimi 
2563*22dc650dSSadaf Ebrahimi /*************************************************
2564*22dc650dSSadaf Ebrahimi *            Grep an individual file             *
2565*22dc650dSSadaf Ebrahimi *************************************************/
2566*22dc650dSSadaf Ebrahimi 
2567*22dc650dSSadaf Ebrahimi /* This is called from grep_or_recurse() below. It uses a buffer that is three
2568*22dc650dSSadaf Ebrahimi times the value of bufthird. The matching point is never allowed to stray into
2569*22dc650dSSadaf Ebrahimi the top third of the buffer, thus keeping more of the file available for
2570*22dc650dSSadaf Ebrahimi context printing or for multiline scanning. For large files, the pointer will
2571*22dc650dSSadaf Ebrahimi be in the middle third most of the time, so the bottom third is available for
2572*22dc650dSSadaf Ebrahimi "before" context printing.
2573*22dc650dSSadaf Ebrahimi 
2574*22dc650dSSadaf Ebrahimi Arguments:
2575*22dc650dSSadaf Ebrahimi   handle       the fopened FILE stream for a normal file
2576*22dc650dSSadaf Ebrahimi                the gzFile pointer when reading is via libz
2577*22dc650dSSadaf Ebrahimi                the BZFILE pointer when reading is via libbz2
2578*22dc650dSSadaf Ebrahimi   frtype       FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
2579*22dc650dSSadaf Ebrahimi   filename     the file name or NULL (for errors)
2580*22dc650dSSadaf Ebrahimi   printname    the file name if it is to be printed for each match
2581*22dc650dSSadaf Ebrahimi                or NULL if the file name is not to be printed
2582*22dc650dSSadaf Ebrahimi                it cannot be NULL if filenames[_nomatch]_only is set
2583*22dc650dSSadaf Ebrahimi 
2584*22dc650dSSadaf Ebrahimi Returns:       0 if there was at least one match
2585*22dc650dSSadaf Ebrahimi                1 otherwise (no matches)
2586*22dc650dSSadaf Ebrahimi                2 if an overlong line is encountered
2587*22dc650dSSadaf Ebrahimi                3 if there is a read error on a .bz2 file
2588*22dc650dSSadaf Ebrahimi */
2589*22dc650dSSadaf Ebrahimi 
2590*22dc650dSSadaf Ebrahimi static int
pcre2grep(void * handle,int frtype,const char * filename,const char * printname)2591*22dc650dSSadaf Ebrahimi pcre2grep(void *handle, int frtype, const char *filename, const char *printname)
2592*22dc650dSSadaf Ebrahimi {
2593*22dc650dSSadaf Ebrahimi int rc = 1;
2594*22dc650dSSadaf Ebrahimi int filepos = 0;
2595*22dc650dSSadaf Ebrahimi unsigned long int linenumber = 1;
2596*22dc650dSSadaf Ebrahimi unsigned long int lastmatchnumber = 0;
2597*22dc650dSSadaf Ebrahimi unsigned long int count = 0;
2598*22dc650dSSadaf Ebrahimi long int count_matched_lines = 0;
2599*22dc650dSSadaf Ebrahimi char *lastmatchrestart = main_buffer;
2600*22dc650dSSadaf Ebrahimi char *ptr = main_buffer;
2601*22dc650dSSadaf Ebrahimi char *endptr;
2602*22dc650dSSadaf Ebrahimi PCRE2_SIZE bufflength;
2603*22dc650dSSadaf Ebrahimi BOOL binary = FALSE;
2604*22dc650dSSadaf Ebrahimi BOOL endhyphenpending = FALSE;
2605*22dc650dSSadaf Ebrahimi BOOL lines_printed = FALSE;
2606*22dc650dSSadaf Ebrahimi BOOL input_line_buffered = line_buffered;
2607*22dc650dSSadaf Ebrahimi FILE *in = NULL;                    /* Ensure initialized */
2608*22dc650dSSadaf Ebrahimi long stream_start = -1;             /* Only non-negative if relevant */
2609*22dc650dSSadaf Ebrahimi 
2610*22dc650dSSadaf Ebrahimi /* Do the first read into the start of the buffer and set up the pointer to end
2611*22dc650dSSadaf Ebrahimi of what we have. In the case of libz, a non-zipped .gz file will be read as a
2612*22dc650dSSadaf Ebrahimi plain file. However, if a .bz2 file isn't actually bzipped, the first read will
2613*22dc650dSSadaf Ebrahimi fail. */
2614*22dc650dSSadaf Ebrahimi 
2615*22dc650dSSadaf Ebrahimi if (frtype != FR_LIBZ && frtype != FR_LIBBZ2)
2616*22dc650dSSadaf Ebrahimi   {
2617*22dc650dSSadaf Ebrahimi   in = (FILE *)handle;
2618*22dc650dSSadaf Ebrahimi   if (feof(in)) return 1;
2619*22dc650dSSadaf Ebrahimi   if (is_file_tty(in)) input_line_buffered = TRUE;
2620*22dc650dSSadaf Ebrahimi   else
2621*22dc650dSSadaf Ebrahimi     {
2622*22dc650dSSadaf Ebrahimi     if (count_limit >= 0  && filename == stdin_name)
2623*22dc650dSSadaf Ebrahimi       stream_start = ftell(in);
2624*22dc650dSSadaf Ebrahimi     }
2625*22dc650dSSadaf Ebrahimi   }
2626*22dc650dSSadaf Ebrahimi else input_line_buffered = FALSE;
2627*22dc650dSSadaf Ebrahimi 
2628*22dc650dSSadaf Ebrahimi bufflength = fill_buffer(handle, frtype, main_buffer, bufsize,
2629*22dc650dSSadaf Ebrahimi   input_line_buffered);
2630*22dc650dSSadaf Ebrahimi 
2631*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_LIBBZ2
2632*22dc650dSSadaf Ebrahimi if (frtype == FR_LIBBZ2 && (int)bufflength < 0) return 3;   /* Gotcha: bufflength is PCRE2_SIZE */
2633*22dc650dSSadaf Ebrahimi #endif
2634*22dc650dSSadaf Ebrahimi 
2635*22dc650dSSadaf Ebrahimi endptr = main_buffer + bufflength;
2636*22dc650dSSadaf Ebrahimi 
2637*22dc650dSSadaf Ebrahimi /* Unless binary-files=text, see if we have a binary file. This uses the same
2638*22dc650dSSadaf Ebrahimi rule as GNU grep, namely, a search for a binary zero byte near the start of the
2639*22dc650dSSadaf Ebrahimi file. However, when the newline convention is binary zero, we can't do this. */
2640*22dc650dSSadaf Ebrahimi 
2641*22dc650dSSadaf Ebrahimi if (binary_files != BIN_TEXT)
2642*22dc650dSSadaf Ebrahimi   {
2643*22dc650dSSadaf Ebrahimi   if (endlinetype != PCRE2_NEWLINE_NUL)
2644*22dc650dSSadaf Ebrahimi     binary = memchr(main_buffer, 0, (bufflength > 1024)? 1024 : bufflength)
2645*22dc650dSSadaf Ebrahimi       != NULL;
2646*22dc650dSSadaf Ebrahimi   if (binary && binary_files == BIN_NOMATCH) return 1;
2647*22dc650dSSadaf Ebrahimi   }
2648*22dc650dSSadaf Ebrahimi 
2649*22dc650dSSadaf Ebrahimi /* Loop while the current pointer is not at the end of the file. For large
2650*22dc650dSSadaf Ebrahimi files, endptr will be at the end of the buffer when we are in the middle of the
2651*22dc650dSSadaf Ebrahimi file, but ptr will never get there, because as soon as it gets over 2/3 of the
2652*22dc650dSSadaf Ebrahimi way, the buffer is shifted left and re-filled. */
2653*22dc650dSSadaf Ebrahimi 
2654*22dc650dSSadaf Ebrahimi while (ptr < endptr)
2655*22dc650dSSadaf Ebrahimi   {
2656*22dc650dSSadaf Ebrahimi   int endlinelength;
2657*22dc650dSSadaf Ebrahimi   int mrc = 0;
2658*22dc650dSSadaf Ebrahimi   unsigned int options = 0;
2659*22dc650dSSadaf Ebrahimi   BOOL match;
2660*22dc650dSSadaf Ebrahimi   BOOL line_matched = FALSE;
2661*22dc650dSSadaf Ebrahimi   char *t = ptr;
2662*22dc650dSSadaf Ebrahimi   PCRE2_SIZE length, linelength;
2663*22dc650dSSadaf Ebrahimi   PCRE2_SIZE startoffset = 0;
2664*22dc650dSSadaf Ebrahimi 
2665*22dc650dSSadaf Ebrahimi   /* If the -m option set a limit for the number of matched or non-matched
2666*22dc650dSSadaf Ebrahimi   lines, check it here. A limit of zero means that no matching is ever done.
2667*22dc650dSSadaf Ebrahimi   For stdin from a file, set the file position. */
2668*22dc650dSSadaf Ebrahimi 
2669*22dc650dSSadaf Ebrahimi   if (count_limit >= 0 && count_matched_lines >= count_limit)
2670*22dc650dSSadaf Ebrahimi     {
2671*22dc650dSSadaf Ebrahimi     if (stream_start >= 0)
2672*22dc650dSSadaf Ebrahimi       (void)fseek(handle, stream_start + (long int)filepos, SEEK_SET);
2673*22dc650dSSadaf Ebrahimi     rc = (count_limit == 0)? 1 : 0;
2674*22dc650dSSadaf Ebrahimi     break;
2675*22dc650dSSadaf Ebrahimi     }
2676*22dc650dSSadaf Ebrahimi 
2677*22dc650dSSadaf Ebrahimi   /* At this point, ptr is at the start of a line. We need to find the length
2678*22dc650dSSadaf Ebrahimi   of the subject string to pass to pcre2_match(). In multiline mode, it is the
2679*22dc650dSSadaf Ebrahimi   length remainder of the data in the buffer. Otherwise, it is the length of
2680*22dc650dSSadaf Ebrahimi   the next line, excluding the terminating newline. After matching, we always
2681*22dc650dSSadaf Ebrahimi   advance by the length of the next line. In multiline mode the PCRE2_FIRSTLINE
2682*22dc650dSSadaf Ebrahimi   option is used for compiling, so that any match is constrained to be in the
2683*22dc650dSSadaf Ebrahimi   first line. */
2684*22dc650dSSadaf Ebrahimi 
2685*22dc650dSSadaf Ebrahimi   t = end_of_line(t, endptr, &endlinelength);
2686*22dc650dSSadaf Ebrahimi   linelength = t - ptr - endlinelength;
2687*22dc650dSSadaf Ebrahimi   length = multiline? (PCRE2_SIZE)(endptr - ptr) : linelength;
2688*22dc650dSSadaf Ebrahimi 
2689*22dc650dSSadaf Ebrahimi   /* Check to see if the line we are looking at extends right to the very end
2690*22dc650dSSadaf Ebrahimi   of the buffer without a line terminator. This means the line is too long to
2691*22dc650dSSadaf Ebrahimi   handle at the current buffer size. Until the buffer reaches its maximum size,
2692*22dc650dSSadaf Ebrahimi   try doubling it and reading more data. */
2693*22dc650dSSadaf Ebrahimi 
2694*22dc650dSSadaf Ebrahimi   if (endlinelength == 0 && t == main_buffer + bufsize)
2695*22dc650dSSadaf Ebrahimi     {
2696*22dc650dSSadaf Ebrahimi     if (bufthird < max_bufthird)
2697*22dc650dSSadaf Ebrahimi       {
2698*22dc650dSSadaf Ebrahimi       char *new_buffer;
2699*22dc650dSSadaf Ebrahimi       PCRE2_SIZE new_bufthird = 2*bufthird;
2700*22dc650dSSadaf Ebrahimi 
2701*22dc650dSSadaf Ebrahimi       if (new_bufthird > max_bufthird) new_bufthird = max_bufthird;
2702*22dc650dSSadaf Ebrahimi       new_buffer = (char *)malloc(3*new_bufthird);
2703*22dc650dSSadaf Ebrahimi 
2704*22dc650dSSadaf Ebrahimi       if (new_buffer == NULL)
2705*22dc650dSSadaf Ebrahimi         {
2706*22dc650dSSadaf Ebrahimi         /* LCOV_EXCL_START */
2707*22dc650dSSadaf Ebrahimi         fprintf(stderr,
2708*22dc650dSSadaf Ebrahimi           "pcre2grep: line %lu%s%s is too long for the internal buffer\n"
2709*22dc650dSSadaf Ebrahimi           "pcre2grep: not enough memory to increase the buffer size to %"
2710*22dc650dSSadaf Ebrahimi             SIZ_FORM "\n",
2711*22dc650dSSadaf Ebrahimi           linenumber,
2712*22dc650dSSadaf Ebrahimi           (filename == NULL)? "" : " of file ",
2713*22dc650dSSadaf Ebrahimi           (filename == NULL)? "" : filename,
2714*22dc650dSSadaf Ebrahimi           new_bufthird);
2715*22dc650dSSadaf Ebrahimi         return 2;
2716*22dc650dSSadaf Ebrahimi         /* LCOV_EXCL_STOP */
2717*22dc650dSSadaf Ebrahimi         }
2718*22dc650dSSadaf Ebrahimi 
2719*22dc650dSSadaf Ebrahimi       /* Copy the data and adjust pointers to the new buffer location. */
2720*22dc650dSSadaf Ebrahimi 
2721*22dc650dSSadaf Ebrahimi       memcpy(new_buffer, main_buffer, bufsize);
2722*22dc650dSSadaf Ebrahimi       bufthird = new_bufthird;
2723*22dc650dSSadaf Ebrahimi       bufsize = 3*bufthird;
2724*22dc650dSSadaf Ebrahimi       ptr = new_buffer + (ptr - main_buffer);
2725*22dc650dSSadaf Ebrahimi       lastmatchrestart = new_buffer + (lastmatchrestart - main_buffer);
2726*22dc650dSSadaf Ebrahimi       free(main_buffer);
2727*22dc650dSSadaf Ebrahimi       main_buffer = new_buffer;
2728*22dc650dSSadaf Ebrahimi 
2729*22dc650dSSadaf Ebrahimi       /* Read more data into the buffer and then try to find the line ending
2730*22dc650dSSadaf Ebrahimi       again. */
2731*22dc650dSSadaf Ebrahimi 
2732*22dc650dSSadaf Ebrahimi       bufflength += fill_buffer(handle, frtype, main_buffer + bufflength,
2733*22dc650dSSadaf Ebrahimi         bufsize - bufflength, input_line_buffered);
2734*22dc650dSSadaf Ebrahimi       endptr = main_buffer + bufflength;
2735*22dc650dSSadaf Ebrahimi       continue;
2736*22dc650dSSadaf Ebrahimi       }
2737*22dc650dSSadaf Ebrahimi     else
2738*22dc650dSSadaf Ebrahimi       {
2739*22dc650dSSadaf Ebrahimi       fprintf(stderr,
2740*22dc650dSSadaf Ebrahimi         "pcre2grep: line %lu%s%s is too long for the internal buffer\n"
2741*22dc650dSSadaf Ebrahimi         "pcre2grep: the maximum buffer size is %" SIZ_FORM "\n"
2742*22dc650dSSadaf Ebrahimi         "pcre2grep: use the --max-buffer-size option to change it\n",
2743*22dc650dSSadaf Ebrahimi         linenumber,
2744*22dc650dSSadaf Ebrahimi         (filename == NULL)? "" : " of file ",
2745*22dc650dSSadaf Ebrahimi         (filename == NULL)? "" : filename,
2746*22dc650dSSadaf Ebrahimi         bufthird);
2747*22dc650dSSadaf Ebrahimi       return 2;
2748*22dc650dSSadaf Ebrahimi       }
2749*22dc650dSSadaf Ebrahimi     }
2750*22dc650dSSadaf Ebrahimi 
2751*22dc650dSSadaf Ebrahimi   /* We come back here after a match when only_matching_count is non-zero, in
2752*22dc650dSSadaf Ebrahimi   order to find any further matches in the same line. This applies to
2753*22dc650dSSadaf Ebrahimi   --only-matching, --file-offsets, and --line-offsets. */
2754*22dc650dSSadaf Ebrahimi 
2755*22dc650dSSadaf Ebrahimi   ONLY_MATCHING_RESTART:
2756*22dc650dSSadaf Ebrahimi 
2757*22dc650dSSadaf Ebrahimi   /* Run through all the patterns until one matches or there is an error other
2758*22dc650dSSadaf Ebrahimi   than NOMATCH. This code is in a subroutine so that it can be re-used for
2759*22dc650dSSadaf Ebrahimi   finding subsequent matches when colouring matched lines. After finding one
2760*22dc650dSSadaf Ebrahimi   match, set PCRE2_NOTEMPTY to disable any further matches of null strings in
2761*22dc650dSSadaf Ebrahimi   this line. */
2762*22dc650dSSadaf Ebrahimi 
2763*22dc650dSSadaf Ebrahimi   match = match_patterns(ptr, length, options, startoffset, &mrc);
2764*22dc650dSSadaf Ebrahimi   options = PCRE2_NOTEMPTY;
2765*22dc650dSSadaf Ebrahimi 
2766*22dc650dSSadaf Ebrahimi   /* If it's a match or a not-match (as required), do what's wanted. NOTE: Use
2767*22dc650dSSadaf Ebrahimi   only FWRITE_IGNORE() - which is just a packaged fwrite() that ignores its
2768*22dc650dSSadaf Ebrahimi   return code - to output data lines, so that binary zeroes are treated as just
2769*22dc650dSSadaf Ebrahimi   another data character. */
2770*22dc650dSSadaf Ebrahimi 
2771*22dc650dSSadaf Ebrahimi   if (match != invert)
2772*22dc650dSSadaf Ebrahimi     {
2773*22dc650dSSadaf Ebrahimi     BOOL hyphenprinted = FALSE;
2774*22dc650dSSadaf Ebrahimi 
2775*22dc650dSSadaf Ebrahimi     /* We've failed if we want a file that doesn't have any matches. */
2776*22dc650dSSadaf Ebrahimi 
2777*22dc650dSSadaf Ebrahimi     if (filenames == FN_NOMATCH_ONLY) return 1;
2778*22dc650dSSadaf Ebrahimi 
2779*22dc650dSSadaf Ebrahimi     /* Remember that this line matched (for counting matched lines) */
2780*22dc650dSSadaf Ebrahimi 
2781*22dc650dSSadaf Ebrahimi     line_matched = TRUE;
2782*22dc650dSSadaf Ebrahimi 
2783*22dc650dSSadaf Ebrahimi     /* If all we want is a yes/no answer, we can return immediately. */
2784*22dc650dSSadaf Ebrahimi 
2785*22dc650dSSadaf Ebrahimi     if (quiet) return 0;
2786*22dc650dSSadaf Ebrahimi 
2787*22dc650dSSadaf Ebrahimi     /* Just count if just counting is wanted. */
2788*22dc650dSSadaf Ebrahimi 
2789*22dc650dSSadaf Ebrahimi     else if (count_only || show_total_count) count++;
2790*22dc650dSSadaf Ebrahimi 
2791*22dc650dSSadaf Ebrahimi     /* When handling a binary file and binary-files==binary, the "binary"
2792*22dc650dSSadaf Ebrahimi     variable will be set true (it's false in all other cases). In this
2793*22dc650dSSadaf Ebrahimi     situation we just want to output the file name. No need to scan further. */
2794*22dc650dSSadaf Ebrahimi 
2795*22dc650dSSadaf Ebrahimi     else if (binary)
2796*22dc650dSSadaf Ebrahimi       {
2797*22dc650dSSadaf Ebrahimi       fprintf(stdout, "Binary file %s matches" STDOUT_NL, filename);
2798*22dc650dSSadaf Ebrahimi       return 0;
2799*22dc650dSSadaf Ebrahimi       }
2800*22dc650dSSadaf Ebrahimi 
2801*22dc650dSSadaf Ebrahimi     /* Likewise, if all we want is a file name, there is no need to scan any
2802*22dc650dSSadaf Ebrahimi     more lines in the file. */
2803*22dc650dSSadaf Ebrahimi 
2804*22dc650dSSadaf Ebrahimi     else if (filenames == FN_MATCH_ONLY)
2805*22dc650dSSadaf Ebrahimi       {
2806*22dc650dSSadaf Ebrahimi       fprintf(stdout, "%s", printname);
2807*22dc650dSSadaf Ebrahimi       if (printname_nl == NULL) fprintf(stdout, "%c", 0);
2808*22dc650dSSadaf Ebrahimi         else fprintf(stdout, "%s", printname_nl);
2809*22dc650dSSadaf Ebrahimi       return 0;
2810*22dc650dSSadaf Ebrahimi       }
2811*22dc650dSSadaf Ebrahimi 
2812*22dc650dSSadaf Ebrahimi     /* The --only-matching option prints just the substring that matched,
2813*22dc650dSSadaf Ebrahimi     and/or one or more captured portions of it, as long as these strings are
2814*22dc650dSSadaf Ebrahimi     not empty. The --file-offsets and --line-offsets options output offsets for
2815*22dc650dSSadaf Ebrahimi     the matching substring (all three set only_matching_count non-zero). None
2816*22dc650dSSadaf Ebrahimi     of these mutually exclusive options prints any context. Afterwards, adjust
2817*22dc650dSSadaf Ebrahimi     the start and then jump back to look for further matches in the same line.
2818*22dc650dSSadaf Ebrahimi     If we are in invert mode, however, nothing is printed and we do not restart
2819*22dc650dSSadaf Ebrahimi     - this could still be useful because the return code is set. */
2820*22dc650dSSadaf Ebrahimi 
2821*22dc650dSSadaf Ebrahimi     else if (only_matching_count != 0)
2822*22dc650dSSadaf Ebrahimi       {
2823*22dc650dSSadaf Ebrahimi       if (!invert)
2824*22dc650dSSadaf Ebrahimi         {
2825*22dc650dSSadaf Ebrahimi         PCRE2_SIZE oldstartoffset;
2826*22dc650dSSadaf Ebrahimi 
2827*22dc650dSSadaf Ebrahimi         if (printname != NULL) fprintf(stdout, "%s%c", printname,
2828*22dc650dSSadaf Ebrahimi           printname_colon);
2829*22dc650dSSadaf Ebrahimi         if (number) fprintf(stdout, "%lu:", linenumber);
2830*22dc650dSSadaf Ebrahimi 
2831*22dc650dSSadaf Ebrahimi         /* Handle --line-offsets */
2832*22dc650dSSadaf Ebrahimi 
2833*22dc650dSSadaf Ebrahimi         if (line_offsets)
2834*22dc650dSSadaf Ebrahimi           fprintf(stdout, "%d,%d" STDOUT_NL, (int)(ptr + offsets[0] - ptr),
2835*22dc650dSSadaf Ebrahimi             (int)(offsets[1] - offsets[0]));
2836*22dc650dSSadaf Ebrahimi 
2837*22dc650dSSadaf Ebrahimi         /* Handle --file-offsets */
2838*22dc650dSSadaf Ebrahimi 
2839*22dc650dSSadaf Ebrahimi         else if (file_offsets)
2840*22dc650dSSadaf Ebrahimi           fprintf(stdout, "%d,%d" STDOUT_NL,
2841*22dc650dSSadaf Ebrahimi             (int)(filepos + ptr + offsets[0] - ptr),
2842*22dc650dSSadaf Ebrahimi             (int)(offsets[1] - offsets[0]));
2843*22dc650dSSadaf Ebrahimi 
2844*22dc650dSSadaf Ebrahimi         /* Handle --output (which has already been syntax checked) */
2845*22dc650dSSadaf Ebrahimi 
2846*22dc650dSSadaf Ebrahimi         else if (output_text != NULL)
2847*22dc650dSSadaf Ebrahimi           {
2848*22dc650dSSadaf Ebrahimi           (void)display_output_text((PCRE2_SPTR)output_text, FALSE,
2849*22dc650dSSadaf Ebrahimi               (PCRE2_SPTR)ptr, offsets, mrc);
2850*22dc650dSSadaf Ebrahimi           fprintf(stdout, STDOUT_NL);
2851*22dc650dSSadaf Ebrahimi           }
2852*22dc650dSSadaf Ebrahimi 
2853*22dc650dSSadaf Ebrahimi         /* Handle --only-matching, which may occur many times */
2854*22dc650dSSadaf Ebrahimi 
2855*22dc650dSSadaf Ebrahimi         else
2856*22dc650dSSadaf Ebrahimi           {
2857*22dc650dSSadaf Ebrahimi           BOOL printed = FALSE;
2858*22dc650dSSadaf Ebrahimi           omstr *om;
2859*22dc650dSSadaf Ebrahimi 
2860*22dc650dSSadaf Ebrahimi           for (om = only_matching; om != NULL; om = om->next)
2861*22dc650dSSadaf Ebrahimi             {
2862*22dc650dSSadaf Ebrahimi             int n = om->groupnum;
2863*22dc650dSSadaf Ebrahimi             if (n == 0 || n < mrc)
2864*22dc650dSSadaf Ebrahimi               {
2865*22dc650dSSadaf Ebrahimi               int plen = offsets[2*n + 1] - offsets[2*n];
2866*22dc650dSSadaf Ebrahimi               if (plen > 0)
2867*22dc650dSSadaf Ebrahimi                 {
2868*22dc650dSSadaf Ebrahimi                 if (printed && om_separator != NULL)
2869*22dc650dSSadaf Ebrahimi                   fprintf(stdout, "%s", om_separator);
2870*22dc650dSSadaf Ebrahimi                 print_match(ptr + offsets[n*2], plen);
2871*22dc650dSSadaf Ebrahimi                 printed = TRUE;
2872*22dc650dSSadaf Ebrahimi                 }
2873*22dc650dSSadaf Ebrahimi               }
2874*22dc650dSSadaf Ebrahimi             }
2875*22dc650dSSadaf Ebrahimi           if (printed || printname != NULL || number)
2876*22dc650dSSadaf Ebrahimi             fprintf(stdout, STDOUT_NL);
2877*22dc650dSSadaf Ebrahimi           }
2878*22dc650dSSadaf Ebrahimi 
2879*22dc650dSSadaf Ebrahimi         /* Prepare to repeat to find the next match in the line. */
2880*22dc650dSSadaf Ebrahimi 
2881*22dc650dSSadaf Ebrahimi         //match = FALSE;
2882*22dc650dSSadaf Ebrahimi         if (line_buffered) fflush(stdout);
2883*22dc650dSSadaf Ebrahimi         rc = 0;                      /* Had some success */
2884*22dc650dSSadaf Ebrahimi 
2885*22dc650dSSadaf Ebrahimi         /* If the pattern contained a lookbehind that included \K, it is
2886*22dc650dSSadaf Ebrahimi         possible that the end of the match might be at or before the actual
2887*22dc650dSSadaf Ebrahimi         starting offset we have just used. In this case, start one character
2888*22dc650dSSadaf Ebrahimi         further on. */
2889*22dc650dSSadaf Ebrahimi 
2890*22dc650dSSadaf Ebrahimi         startoffset = offsets[1];    /* Restart after the match */
2891*22dc650dSSadaf Ebrahimi         oldstartoffset = pcre2_get_startchar(match_data);
2892*22dc650dSSadaf Ebrahimi         if (startoffset <= oldstartoffset)
2893*22dc650dSSadaf Ebrahimi           {
2894*22dc650dSSadaf Ebrahimi           if (startoffset >= length) goto END_ONE_MATCH;  /* Were at end */
2895*22dc650dSSadaf Ebrahimi           startoffset = oldstartoffset + 1;
2896*22dc650dSSadaf Ebrahimi           if (utf) while ((ptr[startoffset] & 0xc0) == 0x80) startoffset++;
2897*22dc650dSSadaf Ebrahimi           }
2898*22dc650dSSadaf Ebrahimi 
2899*22dc650dSSadaf Ebrahimi         /* If the current match ended past the end of the line (only possible
2900*22dc650dSSadaf Ebrahimi         in multiline mode), we must move on to the line in which it did end
2901*22dc650dSSadaf Ebrahimi         before searching for more matches. */
2902*22dc650dSSadaf Ebrahimi 
2903*22dc650dSSadaf Ebrahimi         while (startoffset > linelength)
2904*22dc650dSSadaf Ebrahimi           {
2905*22dc650dSSadaf Ebrahimi           ptr += linelength + endlinelength;
2906*22dc650dSSadaf Ebrahimi           filepos += (int)(linelength + endlinelength);
2907*22dc650dSSadaf Ebrahimi           linenumber++;
2908*22dc650dSSadaf Ebrahimi           startoffset -= (int)(linelength + endlinelength);
2909*22dc650dSSadaf Ebrahimi           t = end_of_line(ptr, endptr, &endlinelength);
2910*22dc650dSSadaf Ebrahimi           linelength = t - ptr - endlinelength;
2911*22dc650dSSadaf Ebrahimi           length = (PCRE2_SIZE)(endptr - ptr);
2912*22dc650dSSadaf Ebrahimi           }
2913*22dc650dSSadaf Ebrahimi 
2914*22dc650dSSadaf Ebrahimi         goto ONLY_MATCHING_RESTART;
2915*22dc650dSSadaf Ebrahimi         }
2916*22dc650dSSadaf Ebrahimi       }
2917*22dc650dSSadaf Ebrahimi 
2918*22dc650dSSadaf Ebrahimi     /* This is the default case when none of the above options is set. We print
2919*22dc650dSSadaf Ebrahimi     the matching lines(s), possibly preceded and/or followed by other lines of
2920*22dc650dSSadaf Ebrahimi     context. */
2921*22dc650dSSadaf Ebrahimi 
2922*22dc650dSSadaf Ebrahimi     else
2923*22dc650dSSadaf Ebrahimi       {
2924*22dc650dSSadaf Ebrahimi       lines_printed = TRUE;
2925*22dc650dSSadaf Ebrahimi 
2926*22dc650dSSadaf Ebrahimi       /* See if there is a requirement to print some "after" lines from a
2927*22dc650dSSadaf Ebrahimi       previous match. We never print any overlaps. */
2928*22dc650dSSadaf Ebrahimi 
2929*22dc650dSSadaf Ebrahimi       if (after_context > 0 && lastmatchnumber > 0)
2930*22dc650dSSadaf Ebrahimi         {
2931*22dc650dSSadaf Ebrahimi         int ellength;
2932*22dc650dSSadaf Ebrahimi         int linecount = 0;
2933*22dc650dSSadaf Ebrahimi         char *p = lastmatchrestart;
2934*22dc650dSSadaf Ebrahimi 
2935*22dc650dSSadaf Ebrahimi         while (p < ptr && linecount < after_context)
2936*22dc650dSSadaf Ebrahimi           {
2937*22dc650dSSadaf Ebrahimi           p = end_of_line(p, ptr, &ellength);
2938*22dc650dSSadaf Ebrahimi           linecount++;
2939*22dc650dSSadaf Ebrahimi           }
2940*22dc650dSSadaf Ebrahimi 
2941*22dc650dSSadaf Ebrahimi         /* It is important to advance lastmatchrestart during this printing so
2942*22dc650dSSadaf Ebrahimi         that it interacts correctly with any "before" printing below. Print
2943*22dc650dSSadaf Ebrahimi         each line's data using fwrite() in case there are binary zeroes. */
2944*22dc650dSSadaf Ebrahimi 
2945*22dc650dSSadaf Ebrahimi         while (lastmatchrestart < p)
2946*22dc650dSSadaf Ebrahimi           {
2947*22dc650dSSadaf Ebrahimi           char *pp = lastmatchrestart;
2948*22dc650dSSadaf Ebrahimi           if (printname != NULL) fprintf(stdout, "%s%c", printname,
2949*22dc650dSSadaf Ebrahimi             printname_hyphen);
2950*22dc650dSSadaf Ebrahimi           if (number) fprintf(stdout, "%lu-", lastmatchnumber++);
2951*22dc650dSSadaf Ebrahimi           pp = end_of_line(pp, endptr, &ellength);
2952*22dc650dSSadaf Ebrahimi           FWRITE_IGNORE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
2953*22dc650dSSadaf Ebrahimi           lastmatchrestart = pp;
2954*22dc650dSSadaf Ebrahimi           }
2955*22dc650dSSadaf Ebrahimi         if (lastmatchrestart != ptr) hyphenpending = TRUE;
2956*22dc650dSSadaf Ebrahimi         }
2957*22dc650dSSadaf Ebrahimi 
2958*22dc650dSSadaf Ebrahimi       /* If there were non-contiguous lines printed above, insert hyphens. */
2959*22dc650dSSadaf Ebrahimi 
2960*22dc650dSSadaf Ebrahimi       if (hyphenpending)
2961*22dc650dSSadaf Ebrahimi         {
2962*22dc650dSSadaf Ebrahimi         if (group_separator != NULL)
2963*22dc650dSSadaf Ebrahimi           fprintf(stdout, "%s%s", group_separator, STDOUT_NL);
2964*22dc650dSSadaf Ebrahimi         hyphenpending = FALSE;
2965*22dc650dSSadaf Ebrahimi         hyphenprinted = TRUE;
2966*22dc650dSSadaf Ebrahimi         }
2967*22dc650dSSadaf Ebrahimi 
2968*22dc650dSSadaf Ebrahimi       /* See if there is a requirement to print some "before" lines for this
2969*22dc650dSSadaf Ebrahimi       match. Again, don't print overlaps. */
2970*22dc650dSSadaf Ebrahimi 
2971*22dc650dSSadaf Ebrahimi       if (before_context > 0)
2972*22dc650dSSadaf Ebrahimi         {
2973*22dc650dSSadaf Ebrahimi         int linecount = 0;
2974*22dc650dSSadaf Ebrahimi         char *p = ptr;
2975*22dc650dSSadaf Ebrahimi 
2976*22dc650dSSadaf Ebrahimi         while (p > main_buffer &&
2977*22dc650dSSadaf Ebrahimi                (lastmatchnumber == 0 || p > lastmatchrestart) &&
2978*22dc650dSSadaf Ebrahimi                linecount < before_context)
2979*22dc650dSSadaf Ebrahimi           {
2980*22dc650dSSadaf Ebrahimi           linecount++;
2981*22dc650dSSadaf Ebrahimi           p = previous_line(p, main_buffer);
2982*22dc650dSSadaf Ebrahimi           }
2983*22dc650dSSadaf Ebrahimi 
2984*22dc650dSSadaf Ebrahimi         if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted &&
2985*22dc650dSSadaf Ebrahimi             group_separator != NULL)
2986*22dc650dSSadaf Ebrahimi           fprintf(stdout, "%s%s", group_separator, STDOUT_NL);
2987*22dc650dSSadaf Ebrahimi 
2988*22dc650dSSadaf Ebrahimi         while (p < ptr)
2989*22dc650dSSadaf Ebrahimi           {
2990*22dc650dSSadaf Ebrahimi           int ellength;
2991*22dc650dSSadaf Ebrahimi           char *pp = p;
2992*22dc650dSSadaf Ebrahimi           if (printname != NULL) fprintf(stdout, "%s%c", printname,
2993*22dc650dSSadaf Ebrahimi             printname_hyphen);
2994*22dc650dSSadaf Ebrahimi           if (number) fprintf(stdout, "%lu-", linenumber - linecount--);
2995*22dc650dSSadaf Ebrahimi           pp = end_of_line(pp, endptr, &ellength);
2996*22dc650dSSadaf Ebrahimi           FWRITE_IGNORE(p, 1, pp - p, stdout);
2997*22dc650dSSadaf Ebrahimi           p = pp;
2998*22dc650dSSadaf Ebrahimi           }
2999*22dc650dSSadaf Ebrahimi         }
3000*22dc650dSSadaf Ebrahimi 
3001*22dc650dSSadaf Ebrahimi       /* Now print the matching line(s); ensure we set hyphenpending at the end
3002*22dc650dSSadaf Ebrahimi       of the file if any context lines are being output. */
3003*22dc650dSSadaf Ebrahimi 
3004*22dc650dSSadaf Ebrahimi       if (after_context > 0 || before_context > 0)
3005*22dc650dSSadaf Ebrahimi         endhyphenpending = TRUE;
3006*22dc650dSSadaf Ebrahimi 
3007*22dc650dSSadaf Ebrahimi       if (printname != NULL) fprintf(stdout, "%s%c", printname,
3008*22dc650dSSadaf Ebrahimi         printname_colon);
3009*22dc650dSSadaf Ebrahimi       if (number) fprintf(stdout, "%lu:", linenumber);
3010*22dc650dSSadaf Ebrahimi 
3011*22dc650dSSadaf Ebrahimi       /* In multiline mode, or if colouring, we have to split the line(s) up
3012*22dc650dSSadaf Ebrahimi       and search for further matches, but not of course if the line is a
3013*22dc650dSSadaf Ebrahimi       non-match. In multiline mode this is necessary in case there is another
3014*22dc650dSSadaf Ebrahimi       match that spans the end of the current line. When colouring we want to
3015*22dc650dSSadaf Ebrahimi       colour all matches. */
3016*22dc650dSSadaf Ebrahimi 
3017*22dc650dSSadaf Ebrahimi       if ((multiline || do_colour) && !invert)
3018*22dc650dSSadaf Ebrahimi         {
3019*22dc650dSSadaf Ebrahimi         int plength;
3020*22dc650dSSadaf Ebrahimi         PCRE2_SIZE endprevious;
3021*22dc650dSSadaf Ebrahimi 
3022*22dc650dSSadaf Ebrahimi         /* The use of \K may make the end offset earlier than the start. In
3023*22dc650dSSadaf Ebrahimi         this situation, swap them round. */
3024*22dc650dSSadaf Ebrahimi 
3025*22dc650dSSadaf Ebrahimi         if (offsets[0] > offsets[1])
3026*22dc650dSSadaf Ebrahimi           {
3027*22dc650dSSadaf Ebrahimi           PCRE2_SIZE temp = offsets[0];
3028*22dc650dSSadaf Ebrahimi           offsets[0] = offsets[1];
3029*22dc650dSSadaf Ebrahimi           offsets[1] = temp;
3030*22dc650dSSadaf Ebrahimi           }
3031*22dc650dSSadaf Ebrahimi 
3032*22dc650dSSadaf Ebrahimi         FWRITE_IGNORE(ptr, 1, offsets[0], stdout);
3033*22dc650dSSadaf Ebrahimi         print_match(ptr + offsets[0], offsets[1] - offsets[0]);
3034*22dc650dSSadaf Ebrahimi 
3035*22dc650dSSadaf Ebrahimi         for (;;)
3036*22dc650dSSadaf Ebrahimi           {
3037*22dc650dSSadaf Ebrahimi           PCRE2_SIZE oldstartoffset = pcre2_get_startchar(match_data);
3038*22dc650dSSadaf Ebrahimi 
3039*22dc650dSSadaf Ebrahimi           endprevious = offsets[1];
3040*22dc650dSSadaf Ebrahimi           startoffset = endprevious;  /* Advance after previous match. */
3041*22dc650dSSadaf Ebrahimi 
3042*22dc650dSSadaf Ebrahimi           /* If the pattern contained a lookbehind that included \K, it is
3043*22dc650dSSadaf Ebrahimi           possible that the end of the match might be at or before the actual
3044*22dc650dSSadaf Ebrahimi           starting offset we have just used. In this case, start one character
3045*22dc650dSSadaf Ebrahimi           further on. */
3046*22dc650dSSadaf Ebrahimi 
3047*22dc650dSSadaf Ebrahimi           if (startoffset <= oldstartoffset)
3048*22dc650dSSadaf Ebrahimi             {
3049*22dc650dSSadaf Ebrahimi             startoffset = oldstartoffset + 1;
3050*22dc650dSSadaf Ebrahimi             if (utf) while ((ptr[startoffset] & 0xc0) == 0x80) startoffset++;
3051*22dc650dSSadaf Ebrahimi             }
3052*22dc650dSSadaf Ebrahimi 
3053*22dc650dSSadaf Ebrahimi           /* If the current match ended past the end of the line (only possible
3054*22dc650dSSadaf Ebrahimi           in multiline mode), we must move on to the line in which it did end
3055*22dc650dSSadaf Ebrahimi           before searching for more matches. Because the PCRE2_FIRSTLINE option
3056*22dc650dSSadaf Ebrahimi           is set, the start of the match will always be before the first
3057*22dc650dSSadaf Ebrahimi           newline sequence. */
3058*22dc650dSSadaf Ebrahimi 
3059*22dc650dSSadaf Ebrahimi           while (startoffset > linelength + endlinelength)
3060*22dc650dSSadaf Ebrahimi             {
3061*22dc650dSSadaf Ebrahimi             ptr += linelength + endlinelength;
3062*22dc650dSSadaf Ebrahimi             filepos += (int)(linelength + endlinelength);
3063*22dc650dSSadaf Ebrahimi             linenumber++;
3064*22dc650dSSadaf Ebrahimi             startoffset -= (int)(linelength + endlinelength);
3065*22dc650dSSadaf Ebrahimi             endprevious -= (int)(linelength + endlinelength);
3066*22dc650dSSadaf Ebrahimi             t = end_of_line(ptr, endptr, &endlinelength);
3067*22dc650dSSadaf Ebrahimi             linelength = t - ptr - endlinelength;
3068*22dc650dSSadaf Ebrahimi             length = (PCRE2_SIZE)(endptr - ptr);
3069*22dc650dSSadaf Ebrahimi             }
3070*22dc650dSSadaf Ebrahimi 
3071*22dc650dSSadaf Ebrahimi           /* If startoffset is at the exact end of the line it means this
3072*22dc650dSSadaf Ebrahimi           complete line was the final part of the match, so there is nothing
3073*22dc650dSSadaf Ebrahimi           more to do. */
3074*22dc650dSSadaf Ebrahimi 
3075*22dc650dSSadaf Ebrahimi           if (startoffset == linelength + endlinelength) break;
3076*22dc650dSSadaf Ebrahimi 
3077*22dc650dSSadaf Ebrahimi           /* Otherwise, run a match from within the final line, and if found,
3078*22dc650dSSadaf Ebrahimi           loop for any that may follow. */
3079*22dc650dSSadaf Ebrahimi 
3080*22dc650dSSadaf Ebrahimi           if (!match_patterns(ptr, length, options, startoffset, &mrc)) break;
3081*22dc650dSSadaf Ebrahimi 
3082*22dc650dSSadaf Ebrahimi           /* The use of \K may make the end offset earlier than the start. In
3083*22dc650dSSadaf Ebrahimi           this situation, swap them round. */
3084*22dc650dSSadaf Ebrahimi 
3085*22dc650dSSadaf Ebrahimi           if (offsets[0] > offsets[1])
3086*22dc650dSSadaf Ebrahimi             {
3087*22dc650dSSadaf Ebrahimi             PCRE2_SIZE temp = offsets[0];
3088*22dc650dSSadaf Ebrahimi             offsets[0] = offsets[1];
3089*22dc650dSSadaf Ebrahimi             offsets[1] = temp;
3090*22dc650dSSadaf Ebrahimi             }
3091*22dc650dSSadaf Ebrahimi 
3092*22dc650dSSadaf Ebrahimi           FWRITE_IGNORE(ptr + endprevious, 1, offsets[0] - endprevious, stdout);
3093*22dc650dSSadaf Ebrahimi           print_match(ptr + offsets[0], offsets[1] - offsets[0]);
3094*22dc650dSSadaf Ebrahimi           }
3095*22dc650dSSadaf Ebrahimi 
3096*22dc650dSSadaf Ebrahimi         /* In multiline mode, we may have already printed the complete line
3097*22dc650dSSadaf Ebrahimi         and its line-ending characters (if they matched the pattern), so there
3098*22dc650dSSadaf Ebrahimi         may be no more to print. */
3099*22dc650dSSadaf Ebrahimi 
3100*22dc650dSSadaf Ebrahimi         plength = (int)((linelength + endlinelength) - endprevious);
3101*22dc650dSSadaf Ebrahimi         if (plength > 0) FWRITE_IGNORE(ptr + endprevious, 1, plength, stdout);
3102*22dc650dSSadaf Ebrahimi         }
3103*22dc650dSSadaf Ebrahimi 
3104*22dc650dSSadaf Ebrahimi       /* Not colouring or multiline; no need to search for further matches. */
3105*22dc650dSSadaf Ebrahimi 
3106*22dc650dSSadaf Ebrahimi       else FWRITE_IGNORE(ptr, 1, linelength + endlinelength, stdout);
3107*22dc650dSSadaf Ebrahimi       }
3108*22dc650dSSadaf Ebrahimi 
3109*22dc650dSSadaf Ebrahimi     /* End of doing what has to be done for a match. If --line-buffered was
3110*22dc650dSSadaf Ebrahimi     given, flush the output. */
3111*22dc650dSSadaf Ebrahimi 
3112*22dc650dSSadaf Ebrahimi     if (line_buffered) fflush(stdout);
3113*22dc650dSSadaf Ebrahimi     rc = 0;    /* Had some success */
3114*22dc650dSSadaf Ebrahimi 
3115*22dc650dSSadaf Ebrahimi     /* Remember where the last match happened for after_context. We remember
3116*22dc650dSSadaf Ebrahimi     where we are about to restart, and that line's number. */
3117*22dc650dSSadaf Ebrahimi 
3118*22dc650dSSadaf Ebrahimi     lastmatchrestart = ptr + linelength + endlinelength;
3119*22dc650dSSadaf Ebrahimi     lastmatchnumber = linenumber + 1;
3120*22dc650dSSadaf Ebrahimi 
3121*22dc650dSSadaf Ebrahimi     /* If a line was printed and we are now at the end of the file and the last
3122*22dc650dSSadaf Ebrahimi     line had no newline, output one. */
3123*22dc650dSSadaf Ebrahimi 
3124*22dc650dSSadaf Ebrahimi     if (lines_printed && lastmatchrestart >= endptr && endlinelength == 0)
3125*22dc650dSSadaf Ebrahimi       write_final_newline();
3126*22dc650dSSadaf Ebrahimi     }
3127*22dc650dSSadaf Ebrahimi 
3128*22dc650dSSadaf Ebrahimi   /* For a match in multiline inverted mode (which of course did not cause
3129*22dc650dSSadaf Ebrahimi   anything to be printed), we have to move on to the end of the match before
3130*22dc650dSSadaf Ebrahimi   proceeding. */
3131*22dc650dSSadaf Ebrahimi 
3132*22dc650dSSadaf Ebrahimi   if (multiline && invert && match)
3133*22dc650dSSadaf Ebrahimi     {
3134*22dc650dSSadaf Ebrahimi     int ellength;
3135*22dc650dSSadaf Ebrahimi     char *endmatch = ptr + offsets[1];
3136*22dc650dSSadaf Ebrahimi     t = ptr;
3137*22dc650dSSadaf Ebrahimi     while (t < endmatch)
3138*22dc650dSSadaf Ebrahimi       {
3139*22dc650dSSadaf Ebrahimi       t = end_of_line(t, endptr, &ellength);
3140*22dc650dSSadaf Ebrahimi       if (t <= endmatch) linenumber++; else break;
3141*22dc650dSSadaf Ebrahimi       }
3142*22dc650dSSadaf Ebrahimi     endmatch = end_of_line(endmatch, endptr, &ellength);
3143*22dc650dSSadaf Ebrahimi     linelength = endmatch - ptr - ellength;
3144*22dc650dSSadaf Ebrahimi     }
3145*22dc650dSSadaf Ebrahimi 
3146*22dc650dSSadaf Ebrahimi   /* Advance to after the newline and increment the line number. The file
3147*22dc650dSSadaf Ebrahimi   offset to the current line is maintained in filepos. */
3148*22dc650dSSadaf Ebrahimi 
3149*22dc650dSSadaf Ebrahimi   END_ONE_MATCH:
3150*22dc650dSSadaf Ebrahimi   ptr += linelength + endlinelength;
3151*22dc650dSSadaf Ebrahimi   filepos += (int)(linelength + endlinelength);
3152*22dc650dSSadaf Ebrahimi   linenumber++;
3153*22dc650dSSadaf Ebrahimi 
3154*22dc650dSSadaf Ebrahimi   /* If there was at least one match (or a non-match, as required) in the line,
3155*22dc650dSSadaf Ebrahimi   increment the count for the -m option. */
3156*22dc650dSSadaf Ebrahimi 
3157*22dc650dSSadaf Ebrahimi   if (line_matched) count_matched_lines++;
3158*22dc650dSSadaf Ebrahimi 
3159*22dc650dSSadaf Ebrahimi   /* If input is line buffered, and the buffer is not yet full, read another
3160*22dc650dSSadaf Ebrahimi   line and add it into the buffer. */
3161*22dc650dSSadaf Ebrahimi 
3162*22dc650dSSadaf Ebrahimi   if (input_line_buffered && bufflength < (PCRE2_SIZE)bufsize)
3163*22dc650dSSadaf Ebrahimi     {
3164*22dc650dSSadaf Ebrahimi     PCRE2_SIZE add = read_one_line(ptr, bufsize - (ptr - main_buffer), in);
3165*22dc650dSSadaf Ebrahimi     bufflength += add;
3166*22dc650dSSadaf Ebrahimi     endptr += add;
3167*22dc650dSSadaf Ebrahimi     }
3168*22dc650dSSadaf Ebrahimi 
3169*22dc650dSSadaf Ebrahimi   /* If we haven't yet reached the end of the file (the buffer is full), and
3170*22dc650dSSadaf Ebrahimi   the current point is in the top 1/3 of the buffer, slide the buffer down by
3171*22dc650dSSadaf Ebrahimi   1/3 and refill it. Before we do this, if some unprinted "after" lines are
3172*22dc650dSSadaf Ebrahimi   about to be lost, print them. */
3173*22dc650dSSadaf Ebrahimi 
3174*22dc650dSSadaf Ebrahimi   if (bufflength >= (PCRE2_SIZE)bufsize && ptr > main_buffer + 2*bufthird)
3175*22dc650dSSadaf Ebrahimi     {
3176*22dc650dSSadaf Ebrahimi     if (after_context > 0 &&
3177*22dc650dSSadaf Ebrahimi         lastmatchnumber > 0 &&
3178*22dc650dSSadaf Ebrahimi         lastmatchrestart < main_buffer + bufthird)
3179*22dc650dSSadaf Ebrahimi       {
3180*22dc650dSSadaf Ebrahimi       do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
3181*22dc650dSSadaf Ebrahimi       lastmatchnumber = 0;  /* Indicates no after lines pending */
3182*22dc650dSSadaf Ebrahimi       }
3183*22dc650dSSadaf Ebrahimi 
3184*22dc650dSSadaf Ebrahimi     /* Now do the shuffle */
3185*22dc650dSSadaf Ebrahimi 
3186*22dc650dSSadaf Ebrahimi     (void)memmove(main_buffer, main_buffer + bufthird, 2*bufthird);
3187*22dc650dSSadaf Ebrahimi     ptr -= bufthird;
3188*22dc650dSSadaf Ebrahimi 
3189*22dc650dSSadaf Ebrahimi     bufflength = 2*bufthird + fill_buffer(handle, frtype,
3190*22dc650dSSadaf Ebrahimi       main_buffer + 2*bufthird, bufthird, input_line_buffered);
3191*22dc650dSSadaf Ebrahimi     endptr = main_buffer + bufflength;
3192*22dc650dSSadaf Ebrahimi 
3193*22dc650dSSadaf Ebrahimi     /* Adjust any last match point */
3194*22dc650dSSadaf Ebrahimi 
3195*22dc650dSSadaf Ebrahimi     if (lastmatchnumber > 0) lastmatchrestart -= bufthird;
3196*22dc650dSSadaf Ebrahimi     }
3197*22dc650dSSadaf Ebrahimi   }     /* Loop through the whole file */
3198*22dc650dSSadaf Ebrahimi 
3199*22dc650dSSadaf Ebrahimi /* End of file; print final "after" lines if wanted; do_after_lines sets
3200*22dc650dSSadaf Ebrahimi hyphenpending if it prints something. */
3201*22dc650dSSadaf Ebrahimi 
3202*22dc650dSSadaf Ebrahimi if (only_matching_count == 0 && !(count_only|show_total_count))
3203*22dc650dSSadaf Ebrahimi   {
3204*22dc650dSSadaf Ebrahimi   do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
3205*22dc650dSSadaf Ebrahimi   hyphenpending |= endhyphenpending;
3206*22dc650dSSadaf Ebrahimi   }
3207*22dc650dSSadaf Ebrahimi 
3208*22dc650dSSadaf Ebrahimi /* Print the file name if we are looking for those without matches and there
3209*22dc650dSSadaf Ebrahimi were none. If we found a match, we won't have got this far. */
3210*22dc650dSSadaf Ebrahimi 
3211*22dc650dSSadaf Ebrahimi if (filenames == FN_NOMATCH_ONLY)
3212*22dc650dSSadaf Ebrahimi   {
3213*22dc650dSSadaf Ebrahimi   fprintf(stdout, "%s", printname);
3214*22dc650dSSadaf Ebrahimi   if (printname_nl == NULL) fprintf(stdout, "%c", 0);
3215*22dc650dSSadaf Ebrahimi     else fprintf(stdout, "%s", printname_nl);
3216*22dc650dSSadaf Ebrahimi   return 0;
3217*22dc650dSSadaf Ebrahimi   }
3218*22dc650dSSadaf Ebrahimi 
3219*22dc650dSSadaf Ebrahimi /* Print the match count if wanted */
3220*22dc650dSSadaf Ebrahimi 
3221*22dc650dSSadaf Ebrahimi if (count_only && !quiet)
3222*22dc650dSSadaf Ebrahimi   {
3223*22dc650dSSadaf Ebrahimi   if (count > 0 || !omit_zero_count)
3224*22dc650dSSadaf Ebrahimi     {
3225*22dc650dSSadaf Ebrahimi     if (printname != NULL && filenames != FN_NONE)
3226*22dc650dSSadaf Ebrahimi       fprintf(stdout, "%s%c", printname, printname_colon);
3227*22dc650dSSadaf Ebrahimi     fprintf(stdout, "%lu" STDOUT_NL, count);
3228*22dc650dSSadaf Ebrahimi     counts_printed++;
3229*22dc650dSSadaf Ebrahimi     }
3230*22dc650dSSadaf Ebrahimi   }
3231*22dc650dSSadaf Ebrahimi 
3232*22dc650dSSadaf Ebrahimi total_count += count;   /* Can be set without count_only */
3233*22dc650dSSadaf Ebrahimi return rc;
3234*22dc650dSSadaf Ebrahimi }
3235*22dc650dSSadaf Ebrahimi 
3236*22dc650dSSadaf Ebrahimi 
3237*22dc650dSSadaf Ebrahimi 
3238*22dc650dSSadaf Ebrahimi /*************************************************
3239*22dc650dSSadaf Ebrahimi *     Grep a file or recurse into a directory    *
3240*22dc650dSSadaf Ebrahimi *************************************************/
3241*22dc650dSSadaf Ebrahimi 
3242*22dc650dSSadaf Ebrahimi /* Given a path name, if it's a directory, scan all the files if we are
3243*22dc650dSSadaf Ebrahimi recursing; if it's a file, grep it.
3244*22dc650dSSadaf Ebrahimi 
3245*22dc650dSSadaf Ebrahimi Arguments:
3246*22dc650dSSadaf Ebrahimi   pathname          the path to investigate
3247*22dc650dSSadaf Ebrahimi   dir_recurse       TRUE if recursing is wanted (-r or -drecurse)
3248*22dc650dSSadaf Ebrahimi   only_one_at_top   TRUE if the path is the only one at toplevel
3249*22dc650dSSadaf Ebrahimi 
3250*22dc650dSSadaf Ebrahimi Returns:  -1 the file/directory was skipped
3251*22dc650dSSadaf Ebrahimi            0 if there was at least one match
3252*22dc650dSSadaf Ebrahimi            1 if there were no matches
3253*22dc650dSSadaf Ebrahimi            2 there was some kind of error
3254*22dc650dSSadaf Ebrahimi 
3255*22dc650dSSadaf Ebrahimi However, file opening failures are suppressed if "silent" is set.
3256*22dc650dSSadaf Ebrahimi */
3257*22dc650dSSadaf Ebrahimi 
3258*22dc650dSSadaf Ebrahimi static int
grep_or_recurse(char * pathname,BOOL dir_recurse,BOOL only_one_at_top)3259*22dc650dSSadaf Ebrahimi grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
3260*22dc650dSSadaf Ebrahimi {
3261*22dc650dSSadaf Ebrahimi int rc = 1;
3262*22dc650dSSadaf Ebrahimi int frtype;
3263*22dc650dSSadaf Ebrahimi void *handle;
3264*22dc650dSSadaf Ebrahimi char *lastcomp;
3265*22dc650dSSadaf Ebrahimi FILE *in = NULL;           /* Ensure initialized */
3266*22dc650dSSadaf Ebrahimi 
3267*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_LIBZ
3268*22dc650dSSadaf Ebrahimi gzFile ingz = NULL;
3269*22dc650dSSadaf Ebrahimi #endif
3270*22dc650dSSadaf Ebrahimi 
3271*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_LIBBZ2
3272*22dc650dSSadaf Ebrahimi BZFILE *inbz2 = NULL;
3273*22dc650dSSadaf Ebrahimi #endif
3274*22dc650dSSadaf Ebrahimi 
3275*22dc650dSSadaf Ebrahimi #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
3276*22dc650dSSadaf Ebrahimi int pathlen;
3277*22dc650dSSadaf Ebrahimi #endif
3278*22dc650dSSadaf Ebrahimi 
3279*22dc650dSSadaf Ebrahimi #if defined NATIVE_ZOS
3280*22dc650dSSadaf Ebrahimi int zos_type;
3281*22dc650dSSadaf Ebrahimi FILE *zos_test_file;
3282*22dc650dSSadaf Ebrahimi #endif
3283*22dc650dSSadaf Ebrahimi 
3284*22dc650dSSadaf Ebrahimi /* If the file name is "-" we scan stdin */
3285*22dc650dSSadaf Ebrahimi 
3286*22dc650dSSadaf Ebrahimi if (strcmp(pathname, "-") == 0)
3287*22dc650dSSadaf Ebrahimi   {
3288*22dc650dSSadaf Ebrahimi   if (count_limit >= 0) setbuf(stdin, NULL);
3289*22dc650dSSadaf Ebrahimi   return pcre2grep(stdin, FR_PLAIN, stdin_name,
3290*22dc650dSSadaf Ebrahimi     (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
3291*22dc650dSSadaf Ebrahimi       stdin_name : NULL);
3292*22dc650dSSadaf Ebrahimi   }
3293*22dc650dSSadaf Ebrahimi 
3294*22dc650dSSadaf Ebrahimi /* Inclusion and exclusion: --include-dir and --exclude-dir apply only to
3295*22dc650dSSadaf Ebrahimi directories, whereas --include and --exclude apply to everything else. The test
3296*22dc650dSSadaf Ebrahimi is against the final component of the path. */
3297*22dc650dSSadaf Ebrahimi 
3298*22dc650dSSadaf Ebrahimi lastcomp = strrchr(pathname, FILESEP);
3299*22dc650dSSadaf Ebrahimi lastcomp = (lastcomp == NULL)? pathname : lastcomp + 1;
3300*22dc650dSSadaf Ebrahimi 
3301*22dc650dSSadaf Ebrahimi /* If the file is a directory, skip if not recursing or if explicitly excluded.
3302*22dc650dSSadaf Ebrahimi Otherwise, scan the directory and recurse for each path within it. The scanning
3303*22dc650dSSadaf Ebrahimi code is localized so it can be made system-specific. */
3304*22dc650dSSadaf Ebrahimi 
3305*22dc650dSSadaf Ebrahimi 
3306*22dc650dSSadaf Ebrahimi /* For z/OS, determine the file type. */
3307*22dc650dSSadaf Ebrahimi 
3308*22dc650dSSadaf Ebrahimi #if defined NATIVE_ZOS
3309*22dc650dSSadaf Ebrahimi zos_test_file =  fopen(pathname,"rb");
3310*22dc650dSSadaf Ebrahimi 
3311*22dc650dSSadaf Ebrahimi if (zos_test_file == NULL)
3312*22dc650dSSadaf Ebrahimi    {
3313*22dc650dSSadaf Ebrahimi    if (!silent) fprintf(stderr, "pcre2grep: failed to test next file %s\n",
3314*22dc650dSSadaf Ebrahimi      pathname, strerror(errno));
3315*22dc650dSSadaf Ebrahimi    return -1;
3316*22dc650dSSadaf Ebrahimi    }
3317*22dc650dSSadaf Ebrahimi zos_type = identifyzosfiletype (zos_test_file);
3318*22dc650dSSadaf Ebrahimi fclose (zos_test_file);
3319*22dc650dSSadaf Ebrahimi 
3320*22dc650dSSadaf Ebrahimi /* Handle a PDS in separate code */
3321*22dc650dSSadaf Ebrahimi 
3322*22dc650dSSadaf Ebrahimi if (zos_type == __ZOS_PDS || zos_type == __ZOS_PDSE)
3323*22dc650dSSadaf Ebrahimi    {
3324*22dc650dSSadaf Ebrahimi    return travelonpdsdir (pathname, only_one_at_top);
3325*22dc650dSSadaf Ebrahimi    }
3326*22dc650dSSadaf Ebrahimi 
3327*22dc650dSSadaf Ebrahimi /* Deal with regular files in the normal way below. These types are:
3328*22dc650dSSadaf Ebrahimi    zos_type == __ZOS_PDS_MEMBER
3329*22dc650dSSadaf Ebrahimi    zos_type == __ZOS_PS
3330*22dc650dSSadaf Ebrahimi    zos_type == __ZOS_VSAM_KSDS
3331*22dc650dSSadaf Ebrahimi    zos_type == __ZOS_VSAM_ESDS
3332*22dc650dSSadaf Ebrahimi    zos_type == __ZOS_VSAM_RRDS
3333*22dc650dSSadaf Ebrahimi */
3334*22dc650dSSadaf Ebrahimi 
3335*22dc650dSSadaf Ebrahimi /* Handle a z/OS directory using common code. */
3336*22dc650dSSadaf Ebrahimi 
3337*22dc650dSSadaf Ebrahimi else if (zos_type == __ZOS_HFS)
3338*22dc650dSSadaf Ebrahimi  {
3339*22dc650dSSadaf Ebrahimi #endif  /* NATIVE_ZOS */
3340*22dc650dSSadaf Ebrahimi 
3341*22dc650dSSadaf Ebrahimi 
3342*22dc650dSSadaf Ebrahimi /* Handle directories: common code for all OS */
3343*22dc650dSSadaf Ebrahimi 
3344*22dc650dSSadaf Ebrahimi if (isdirectory(pathname))
3345*22dc650dSSadaf Ebrahimi   {
3346*22dc650dSSadaf Ebrahimi   if (dee_action == dee_SKIP ||
3347*22dc650dSSadaf Ebrahimi       !test_incexc(lastcomp, include_dir_patterns, exclude_dir_patterns))
3348*22dc650dSSadaf Ebrahimi     return -1;
3349*22dc650dSSadaf Ebrahimi 
3350*22dc650dSSadaf Ebrahimi   if (dee_action == dee_RECURSE)
3351*22dc650dSSadaf Ebrahimi     {
3352*22dc650dSSadaf Ebrahimi     char childpath[FNBUFSIZ];
3353*22dc650dSSadaf Ebrahimi     char *nextfile;
3354*22dc650dSSadaf Ebrahimi     directory_type *dir = opendirectory(pathname);
3355*22dc650dSSadaf Ebrahimi 
3356*22dc650dSSadaf Ebrahimi     if (dir == NULL)
3357*22dc650dSSadaf Ebrahimi       {
3358*22dc650dSSadaf Ebrahimi       /* LCOV_EXCL_START - this is a "never" event */
3359*22dc650dSSadaf Ebrahimi       if (!silent)
3360*22dc650dSSadaf Ebrahimi         fprintf(stderr, "pcre2grep: Failed to open directory %s: %s\n", pathname,
3361*22dc650dSSadaf Ebrahimi           strerror(errno));
3362*22dc650dSSadaf Ebrahimi       return 2;
3363*22dc650dSSadaf Ebrahimi       /* LCOV_EXCL_STOP */
3364*22dc650dSSadaf Ebrahimi       }
3365*22dc650dSSadaf Ebrahimi 
3366*22dc650dSSadaf Ebrahimi     while ((nextfile = readdirectory(dir)) != NULL)
3367*22dc650dSSadaf Ebrahimi       {
3368*22dc650dSSadaf Ebrahimi       int frc;
3369*22dc650dSSadaf Ebrahimi       int fnlength = strlen(pathname) + strlen(nextfile) + 2;
3370*22dc650dSSadaf Ebrahimi       if (fnlength > FNBUFSIZ)
3371*22dc650dSSadaf Ebrahimi         {
3372*22dc650dSSadaf Ebrahimi         /* LCOV_EXCL_START - this is a "never" event */
3373*22dc650dSSadaf Ebrahimi         fprintf(stderr, "pcre2grep: recursive filename is too long\n");
3374*22dc650dSSadaf Ebrahimi         rc = 2;
3375*22dc650dSSadaf Ebrahimi         break;
3376*22dc650dSSadaf Ebrahimi         /* LCOV_EXCL_STOP */
3377*22dc650dSSadaf Ebrahimi         }
3378*22dc650dSSadaf Ebrahimi       sprintf(childpath, "%s%c%s", pathname, FILESEP, nextfile);
3379*22dc650dSSadaf Ebrahimi 
3380*22dc650dSSadaf Ebrahimi       /* If the realpath() function is available, we can try to prevent endless
3381*22dc650dSSadaf Ebrahimi       recursion caused by a symlink pointing to a parent directory (GitHub
3382*22dc650dSSadaf Ebrahimi       issue #2 (old Bugzilla #2794). Original patch from Thomas Tempelmann.
3383*22dc650dSSadaf Ebrahimi       Modified to avoid using strlcat() because that isn't a standard C
3384*22dc650dSSadaf Ebrahimi       function, and also modified not to copy back the fully resolved path,
3385*22dc650dSSadaf Ebrahimi       because that affects the output from pcre2grep. */
3386*22dc650dSSadaf Ebrahimi 
3387*22dc650dSSadaf Ebrahimi #ifdef HAVE_REALPATH
3388*22dc650dSSadaf Ebrahimi       {
3389*22dc650dSSadaf Ebrahimi       char resolvedpath[PATH_MAX];
3390*22dc650dSSadaf Ebrahimi       BOOL isSame;
3391*22dc650dSSadaf Ebrahimi       size_t rlen;
3392*22dc650dSSadaf Ebrahimi       if (realpath(childpath, resolvedpath) == NULL)
3393*22dc650dSSadaf Ebrahimi         /* LCOV_EXCL_START - this is a "never" event */
3394*22dc650dSSadaf Ebrahimi         continue;     /* This path is invalid - we can skip processing this */
3395*22dc650dSSadaf Ebrahimi         /* LCOV_EXCL_STOP */
3396*22dc650dSSadaf Ebrahimi       isSame = strcmp(pathname, resolvedpath) == 0;
3397*22dc650dSSadaf Ebrahimi       if (isSame) continue;    /* We have a recursion */
3398*22dc650dSSadaf Ebrahimi       rlen = strlen(resolvedpath);
3399*22dc650dSSadaf Ebrahimi       if (rlen++ < sizeof(resolvedpath) - 3)
3400*22dc650dSSadaf Ebrahimi         {
3401*22dc650dSSadaf Ebrahimi         BOOL contained;
3402*22dc650dSSadaf Ebrahimi         strcat(resolvedpath, "/");
3403*22dc650dSSadaf Ebrahimi         contained = strncmp(pathname, resolvedpath, rlen) == 0;
3404*22dc650dSSadaf Ebrahimi         if (contained) continue;    /* We have a recursion */
3405*22dc650dSSadaf Ebrahimi         }
3406*22dc650dSSadaf Ebrahimi       }
3407*22dc650dSSadaf Ebrahimi #endif  /* HAVE_REALPATH */
3408*22dc650dSSadaf Ebrahimi 
3409*22dc650dSSadaf Ebrahimi       frc = grep_or_recurse(childpath, dir_recurse, FALSE);
3410*22dc650dSSadaf Ebrahimi       if (frc > 1) rc = frc;
3411*22dc650dSSadaf Ebrahimi        else if (frc == 0 && rc == 1) rc = 0;
3412*22dc650dSSadaf Ebrahimi       }
3413*22dc650dSSadaf Ebrahimi 
3414*22dc650dSSadaf Ebrahimi     closedirectory(dir);
3415*22dc650dSSadaf Ebrahimi     return rc;
3416*22dc650dSSadaf Ebrahimi     }
3417*22dc650dSSadaf Ebrahimi   }
3418*22dc650dSSadaf Ebrahimi 
3419*22dc650dSSadaf Ebrahimi #ifdef WIN32
3420*22dc650dSSadaf Ebrahimi if (iswild(pathname))
3421*22dc650dSSadaf Ebrahimi   {
3422*22dc650dSSadaf Ebrahimi   char buffer[1024];
3423*22dc650dSSadaf Ebrahimi   char *nextfile;
3424*22dc650dSSadaf Ebrahimi   char *name;
3425*22dc650dSSadaf Ebrahimi   directory_type *dir = opendirectory(pathname);
3426*22dc650dSSadaf Ebrahimi 
3427*22dc650dSSadaf Ebrahimi   if (dir == NULL)
3428*22dc650dSSadaf Ebrahimi     return 0;
3429*22dc650dSSadaf Ebrahimi 
3430*22dc650dSSadaf Ebrahimi   for (nextfile = name = pathname; *nextfile != 0; nextfile++)
3431*22dc650dSSadaf Ebrahimi     if (*nextfile == '/' || *nextfile == '\\')
3432*22dc650dSSadaf Ebrahimi       name = nextfile + 1;
3433*22dc650dSSadaf Ebrahimi   *name = 0;
3434*22dc650dSSadaf Ebrahimi 
3435*22dc650dSSadaf Ebrahimi   while ((nextfile = readdirectory(dir)) != NULL)
3436*22dc650dSSadaf Ebrahimi     {
3437*22dc650dSSadaf Ebrahimi     int frc;
3438*22dc650dSSadaf Ebrahimi     sprintf(buffer, "%.512s%.128s", pathname, nextfile);
3439*22dc650dSSadaf Ebrahimi     frc = grep_or_recurse(buffer, dir_recurse, FALSE);
3440*22dc650dSSadaf Ebrahimi     if (frc > 1) rc = frc;
3441*22dc650dSSadaf Ebrahimi      else if (frc == 0 && rc == 1) rc = 0;
3442*22dc650dSSadaf Ebrahimi     }
3443*22dc650dSSadaf Ebrahimi 
3444*22dc650dSSadaf Ebrahimi   closedirectory(dir);
3445*22dc650dSSadaf Ebrahimi   return rc;
3446*22dc650dSSadaf Ebrahimi   }
3447*22dc650dSSadaf Ebrahimi #endif
3448*22dc650dSSadaf Ebrahimi 
3449*22dc650dSSadaf Ebrahimi #if defined NATIVE_ZOS
3450*22dc650dSSadaf Ebrahimi  }
3451*22dc650dSSadaf Ebrahimi #endif
3452*22dc650dSSadaf Ebrahimi 
3453*22dc650dSSadaf Ebrahimi /* If the file is not a directory, check for a regular file, and if it is not,
3454*22dc650dSSadaf Ebrahimi skip it if that's been requested. Otherwise, check for an explicit inclusion or
3455*22dc650dSSadaf Ebrahimi exclusion. */
3456*22dc650dSSadaf Ebrahimi 
3457*22dc650dSSadaf Ebrahimi else if (
3458*22dc650dSSadaf Ebrahimi #if defined NATIVE_ZOS
3459*22dc650dSSadaf Ebrahimi         (zos_type == __ZOS_NOFILE && DEE_action == DEE_SKIP) ||
3460*22dc650dSSadaf Ebrahimi #else  /* all other OS */
3461*22dc650dSSadaf Ebrahimi         (!isregfile(pathname) && DEE_action == DEE_SKIP) ||
3462*22dc650dSSadaf Ebrahimi #endif
3463*22dc650dSSadaf Ebrahimi         !test_incexc(lastcomp, include_patterns, exclude_patterns))
3464*22dc650dSSadaf Ebrahimi   return -1;  /* File skipped */
3465*22dc650dSSadaf Ebrahimi 
3466*22dc650dSSadaf Ebrahimi /* Control reaches here if we have a regular file, or if we have a directory
3467*22dc650dSSadaf Ebrahimi and recursion or skipping was not requested, or if we have anything else and
3468*22dc650dSSadaf Ebrahimi skipping was not requested. The scan proceeds. If this is the first and only
3469*22dc650dSSadaf Ebrahimi argument at top level, we don't show the file name, unless we are only showing
3470*22dc650dSSadaf Ebrahimi the file name, or the filename was forced (-H). */
3471*22dc650dSSadaf Ebrahimi 
3472*22dc650dSSadaf Ebrahimi #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
3473*22dc650dSSadaf Ebrahimi pathlen = (int)(strlen(pathname));
3474*22dc650dSSadaf Ebrahimi #endif
3475*22dc650dSSadaf Ebrahimi 
3476*22dc650dSSadaf Ebrahimi /* Open using zlib if it is supported and the file name ends with .gz. */
3477*22dc650dSSadaf Ebrahimi 
3478*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_LIBZ
3479*22dc650dSSadaf Ebrahimi if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
3480*22dc650dSSadaf Ebrahimi   {
3481*22dc650dSSadaf Ebrahimi   ingz = gzopen(pathname, "rb");
3482*22dc650dSSadaf Ebrahimi   if (ingz == NULL)
3483*22dc650dSSadaf Ebrahimi     {
3484*22dc650dSSadaf Ebrahimi     /* LCOV_EXCL_START */
3485*22dc650dSSadaf Ebrahimi     if (!silent)
3486*22dc650dSSadaf Ebrahimi       fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", pathname,
3487*22dc650dSSadaf Ebrahimi         strerror(errno));
3488*22dc650dSSadaf Ebrahimi     return 2;
3489*22dc650dSSadaf Ebrahimi     /* LCOV_EXCL_STOP */
3490*22dc650dSSadaf Ebrahimi     }
3491*22dc650dSSadaf Ebrahimi   handle = (void *)ingz;
3492*22dc650dSSadaf Ebrahimi   frtype = FR_LIBZ;
3493*22dc650dSSadaf Ebrahimi   }
3494*22dc650dSSadaf Ebrahimi else
3495*22dc650dSSadaf Ebrahimi #endif
3496*22dc650dSSadaf Ebrahimi 
3497*22dc650dSSadaf Ebrahimi /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
3498*22dc650dSSadaf Ebrahimi 
3499*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_LIBBZ2
3500*22dc650dSSadaf Ebrahimi if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
3501*22dc650dSSadaf Ebrahimi   {
3502*22dc650dSSadaf Ebrahimi   inbz2 = BZ2_bzopen(pathname, "rb");
3503*22dc650dSSadaf Ebrahimi   handle = (void *)inbz2;
3504*22dc650dSSadaf Ebrahimi   frtype = FR_LIBBZ2;
3505*22dc650dSSadaf Ebrahimi   }
3506*22dc650dSSadaf Ebrahimi else
3507*22dc650dSSadaf Ebrahimi #endif
3508*22dc650dSSadaf Ebrahimi 
3509*22dc650dSSadaf Ebrahimi /* Otherwise use plain fopen(). The label is so that we can come back here if
3510*22dc650dSSadaf Ebrahimi an attempt to read a .bz2 file indicates that it really is a plain file. */
3511*22dc650dSSadaf Ebrahimi 
3512*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_LIBBZ2
3513*22dc650dSSadaf Ebrahimi PLAIN_FILE:
3514*22dc650dSSadaf Ebrahimi #endif
3515*22dc650dSSadaf Ebrahimi   {
3516*22dc650dSSadaf Ebrahimi   in = fopen(pathname, "rb");
3517*22dc650dSSadaf Ebrahimi   handle = (void *)in;
3518*22dc650dSSadaf Ebrahimi   frtype = FR_PLAIN;
3519*22dc650dSSadaf Ebrahimi   }
3520*22dc650dSSadaf Ebrahimi 
3521*22dc650dSSadaf Ebrahimi /* All the opening methods return errno when they fail. */
3522*22dc650dSSadaf Ebrahimi 
3523*22dc650dSSadaf Ebrahimi if (handle == NULL)
3524*22dc650dSSadaf Ebrahimi   {
3525*22dc650dSSadaf Ebrahimi   if (!silent)
3526*22dc650dSSadaf Ebrahimi     fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", pathname,
3527*22dc650dSSadaf Ebrahimi       strerror(errno));
3528*22dc650dSSadaf Ebrahimi   return 2;
3529*22dc650dSSadaf Ebrahimi   }
3530*22dc650dSSadaf Ebrahimi 
3531*22dc650dSSadaf Ebrahimi /* Now grep the file */
3532*22dc650dSSadaf Ebrahimi 
3533*22dc650dSSadaf Ebrahimi rc = pcre2grep(handle, frtype, pathname, (filenames > FN_DEFAULT ||
3534*22dc650dSSadaf Ebrahimi   (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
3535*22dc650dSSadaf Ebrahimi 
3536*22dc650dSSadaf Ebrahimi /* Close in an appropriate manner. */
3537*22dc650dSSadaf Ebrahimi 
3538*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_LIBZ
3539*22dc650dSSadaf Ebrahimi if (frtype == FR_LIBZ)
3540*22dc650dSSadaf Ebrahimi   gzclose(ingz);
3541*22dc650dSSadaf Ebrahimi else
3542*22dc650dSSadaf Ebrahimi #endif
3543*22dc650dSSadaf Ebrahimi 
3544*22dc650dSSadaf Ebrahimi /* If it is a .bz2 file and the result is 3, it means that the first attempt to
3545*22dc650dSSadaf Ebrahimi read failed. If the error indicates that the file isn't in fact bzipped, try
3546*22dc650dSSadaf Ebrahimi again as a normal file. */
3547*22dc650dSSadaf Ebrahimi 
3548*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_LIBBZ2
3549*22dc650dSSadaf Ebrahimi if (frtype == FR_LIBBZ2)
3550*22dc650dSSadaf Ebrahimi   {
3551*22dc650dSSadaf Ebrahimi   if (rc == 3)
3552*22dc650dSSadaf Ebrahimi     {
3553*22dc650dSSadaf Ebrahimi     int errnum;
3554*22dc650dSSadaf Ebrahimi     const char *err = BZ2_bzerror(inbz2, &errnum);
3555*22dc650dSSadaf Ebrahimi     if (errnum == BZ_DATA_ERROR_MAGIC)
3556*22dc650dSSadaf Ebrahimi       {
3557*22dc650dSSadaf Ebrahimi       BZ2_bzclose(inbz2);
3558*22dc650dSSadaf Ebrahimi       goto PLAIN_FILE;
3559*22dc650dSSadaf Ebrahimi       }
3560*22dc650dSSadaf Ebrahimi     /* LCOV_EXCL_START */
3561*22dc650dSSadaf Ebrahimi     else if (!silent)
3562*22dc650dSSadaf Ebrahimi       fprintf(stderr, "pcre2grep: Failed to read %s using bzlib: %s\n",
3563*22dc650dSSadaf Ebrahimi         pathname, err);
3564*22dc650dSSadaf Ebrahimi     rc = 2;    /* The normal "something went wrong" code */
3565*22dc650dSSadaf Ebrahimi     /* LCOV_EXCL_STOP */
3566*22dc650dSSadaf Ebrahimi     }
3567*22dc650dSSadaf Ebrahimi   BZ2_bzclose(inbz2);
3568*22dc650dSSadaf Ebrahimi   }
3569*22dc650dSSadaf Ebrahimi else
3570*22dc650dSSadaf Ebrahimi #endif
3571*22dc650dSSadaf Ebrahimi 
3572*22dc650dSSadaf Ebrahimi /* Normal file close */
3573*22dc650dSSadaf Ebrahimi 
3574*22dc650dSSadaf Ebrahimi fclose(in);
3575*22dc650dSSadaf Ebrahimi 
3576*22dc650dSSadaf Ebrahimi /* Pass back the yield from pcre2grep(). */
3577*22dc650dSSadaf Ebrahimi 
3578*22dc650dSSadaf Ebrahimi return rc;
3579*22dc650dSSadaf Ebrahimi }
3580*22dc650dSSadaf Ebrahimi 
3581*22dc650dSSadaf Ebrahimi 
3582*22dc650dSSadaf Ebrahimi 
3583*22dc650dSSadaf Ebrahimi /*************************************************
3584*22dc650dSSadaf Ebrahimi *          Handle a no-data option               *
3585*22dc650dSSadaf Ebrahimi *************************************************/
3586*22dc650dSSadaf Ebrahimi 
3587*22dc650dSSadaf Ebrahimi /* This is called when a known option has been identified. */
3588*22dc650dSSadaf Ebrahimi 
3589*22dc650dSSadaf Ebrahimi static int
handle_option(int letter,int options)3590*22dc650dSSadaf Ebrahimi handle_option(int letter, int options)
3591*22dc650dSSadaf Ebrahimi {
3592*22dc650dSSadaf Ebrahimi switch(letter)
3593*22dc650dSSadaf Ebrahimi   {
3594*22dc650dSSadaf Ebrahimi   case N_FOFFSETS: file_offsets = TRUE; break;
3595*22dc650dSSadaf Ebrahimi   case N_HELP: help(); pcre2grep_exit(0); break; /* Stops compiler warning */
3596*22dc650dSSadaf Ebrahimi   case N_LBUFFER: line_buffered = TRUE; break;
3597*22dc650dSSadaf Ebrahimi   case N_LOFFSETS: line_offsets = number = TRUE; break;
3598*22dc650dSSadaf Ebrahimi   case N_NOJIT: use_jit = FALSE; break;
3599*22dc650dSSadaf Ebrahimi   case N_ALLABSK: extra_options |= PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK; break;
3600*22dc650dSSadaf Ebrahimi   case N_NO_GROUP_SEPARATOR: group_separator = NULL; break;
3601*22dc650dSSadaf Ebrahimi   case 'a': binary_files = BIN_TEXT; break;
3602*22dc650dSSadaf Ebrahimi   case 'c': count_only = TRUE; break;
3603*22dc650dSSadaf Ebrahimi   case N_POSIX_DIGIT: posix_digit = TRUE; break;
3604*22dc650dSSadaf Ebrahimi   case 'E': case_restrict = TRUE; break;
3605*22dc650dSSadaf Ebrahimi   case 'F': options |= PCRE2_LITERAL; break;
3606*22dc650dSSadaf Ebrahimi   case 'H': filenames = FN_FORCE; break;
3607*22dc650dSSadaf Ebrahimi   case 'I': binary_files = BIN_NOMATCH; break;
3608*22dc650dSSadaf Ebrahimi   case 'h': filenames = FN_NONE; break;
3609*22dc650dSSadaf Ebrahimi   case 'i': options |= PCRE2_CASELESS; break;
3610*22dc650dSSadaf Ebrahimi   case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
3611*22dc650dSSadaf Ebrahimi   case 'L': filenames = FN_NOMATCH_ONLY; break;
3612*22dc650dSSadaf Ebrahimi   case 'M': multiline = TRUE; options |= PCRE2_MULTILINE|PCRE2_FIRSTLINE; break;
3613*22dc650dSSadaf Ebrahimi   case 'n': number = TRUE; break;
3614*22dc650dSSadaf Ebrahimi 
3615*22dc650dSSadaf Ebrahimi   case 'o':
3616*22dc650dSSadaf Ebrahimi   only_matching_last = add_number(0, only_matching_last);
3617*22dc650dSSadaf Ebrahimi   if (only_matching == NULL) only_matching = only_matching_last;
3618*22dc650dSSadaf Ebrahimi   break;
3619*22dc650dSSadaf Ebrahimi 
3620*22dc650dSSadaf Ebrahimi   case 'P': no_ucp = TRUE; break;
3621*22dc650dSSadaf Ebrahimi   case 'q': quiet = TRUE; break;
3622*22dc650dSSadaf Ebrahimi   case 'r': dee_action = dee_RECURSE; break;
3623*22dc650dSSadaf Ebrahimi   case 's': silent = TRUE; break;
3624*22dc650dSSadaf Ebrahimi   case 't': show_total_count = TRUE; break;
3625*22dc650dSSadaf Ebrahimi   case 'u': options |= PCRE2_UTF | PCRE2_UCP; utf = TRUE; break;
3626*22dc650dSSadaf Ebrahimi   case 'U': options |= PCRE2_UTF | PCRE2_MATCH_INVALID_UTF | PCRE2_UCP;
3627*22dc650dSSadaf Ebrahimi             utf = TRUE; break;
3628*22dc650dSSadaf Ebrahimi   case 'v': invert = TRUE; break;
3629*22dc650dSSadaf Ebrahimi 
3630*22dc650dSSadaf Ebrahimi   case 'V':
3631*22dc650dSSadaf Ebrahimi     {
3632*22dc650dSSadaf Ebrahimi     unsigned char buffer[128];
3633*22dc650dSSadaf Ebrahimi     (void)pcre2_config(PCRE2_CONFIG_VERSION, buffer);
3634*22dc650dSSadaf Ebrahimi     fprintf(stdout, "pcre2grep version %s" STDOUT_NL, buffer);
3635*22dc650dSSadaf Ebrahimi     }
3636*22dc650dSSadaf Ebrahimi   pcre2grep_exit(0);
3637*22dc650dSSadaf Ebrahimi   break;  /* LCOV_EXCL_LINE - statement kept to avoid compiler warning */
3638*22dc650dSSadaf Ebrahimi 
3639*22dc650dSSadaf Ebrahimi   case 'w': extra_options |= PCRE2_EXTRA_MATCH_WORD; break;
3640*22dc650dSSadaf Ebrahimi   case 'x': extra_options |= PCRE2_EXTRA_MATCH_LINE; break;
3641*22dc650dSSadaf Ebrahimi   case 'Z': printname_colon = printname_hyphen = 0; printname_nl = NULL; break;
3642*22dc650dSSadaf Ebrahimi 
3643*22dc650dSSadaf Ebrahimi   /* LCOV_EXCL_START - this is a "never event" */
3644*22dc650dSSadaf Ebrahimi   default:
3645*22dc650dSSadaf Ebrahimi   fprintf(stderr, "pcre2grep: Unknown option -%c\n", letter);
3646*22dc650dSSadaf Ebrahimi   pcre2grep_exit(usage(2));
3647*22dc650dSSadaf Ebrahimi   /* LCOV_EXCL_STOP */
3648*22dc650dSSadaf Ebrahimi   }
3649*22dc650dSSadaf Ebrahimi 
3650*22dc650dSSadaf Ebrahimi return options;
3651*22dc650dSSadaf Ebrahimi }
3652*22dc650dSSadaf Ebrahimi 
3653*22dc650dSSadaf Ebrahimi 
3654*22dc650dSSadaf Ebrahimi 
3655*22dc650dSSadaf Ebrahimi /*************************************************
3656*22dc650dSSadaf Ebrahimi *          Construct printed ordinal             *
3657*22dc650dSSadaf Ebrahimi *************************************************/
3658*22dc650dSSadaf Ebrahimi 
3659*22dc650dSSadaf Ebrahimi /* This turns a number into "1st", "3rd", etc. */
3660*22dc650dSSadaf Ebrahimi 
3661*22dc650dSSadaf Ebrahimi static char *
ordin(int n)3662*22dc650dSSadaf Ebrahimi ordin(int n)
3663*22dc650dSSadaf Ebrahimi {
3664*22dc650dSSadaf Ebrahimi static char buffer[14];
3665*22dc650dSSadaf Ebrahimi char *p = buffer;
3666*22dc650dSSadaf Ebrahimi sprintf(p, "%d", n);
3667*22dc650dSSadaf Ebrahimi while (*p != 0) p++;
3668*22dc650dSSadaf Ebrahimi n %= 100;
3669*22dc650dSSadaf Ebrahimi if (n >= 11 && n <= 13) n = 0;
3670*22dc650dSSadaf Ebrahimi switch (n%10)
3671*22dc650dSSadaf Ebrahimi   {
3672*22dc650dSSadaf Ebrahimi   case 1: strcpy(p, "st"); break;
3673*22dc650dSSadaf Ebrahimi   case 2: strcpy(p, "nd"); break;
3674*22dc650dSSadaf Ebrahimi   case 3: strcpy(p, "rd"); break;
3675*22dc650dSSadaf Ebrahimi   default: strcpy(p, "th"); break;
3676*22dc650dSSadaf Ebrahimi   }
3677*22dc650dSSadaf Ebrahimi return buffer;
3678*22dc650dSSadaf Ebrahimi }
3679*22dc650dSSadaf Ebrahimi 
3680*22dc650dSSadaf Ebrahimi 
3681*22dc650dSSadaf Ebrahimi 
3682*22dc650dSSadaf Ebrahimi /*************************************************
3683*22dc650dSSadaf Ebrahimi *          Compile a single pattern              *
3684*22dc650dSSadaf Ebrahimi *************************************************/
3685*22dc650dSSadaf Ebrahimi 
3686*22dc650dSSadaf Ebrahimi /* Do nothing if the pattern has already been compiled. This is the case for
3687*22dc650dSSadaf Ebrahimi include/exclude patterns read from a file.
3688*22dc650dSSadaf Ebrahimi 
3689*22dc650dSSadaf Ebrahimi When the -F option has been used, each "pattern" may be a list of strings,
3690*22dc650dSSadaf Ebrahimi separated by line breaks. They will be matched literally. We split such a
3691*22dc650dSSadaf Ebrahimi string and compile the first substring, inserting an additional block into the
3692*22dc650dSSadaf Ebrahimi pattern chain.
3693*22dc650dSSadaf Ebrahimi 
3694*22dc650dSSadaf Ebrahimi Arguments:
3695*22dc650dSSadaf Ebrahimi   p              points to the pattern block
3696*22dc650dSSadaf Ebrahimi   options        the PCRE options
3697*22dc650dSSadaf Ebrahimi   fromfile       TRUE if the pattern was read from a file
3698*22dc650dSSadaf Ebrahimi   fromtext       file name or identifying text (e.g. "include")
3699*22dc650dSSadaf Ebrahimi   count          0 if this is the only command line pattern, or
3700*22dc650dSSadaf Ebrahimi                  number of the command line pattern, or
3701*22dc650dSSadaf Ebrahimi                  linenumber for a pattern from a file
3702*22dc650dSSadaf Ebrahimi 
3703*22dc650dSSadaf Ebrahimi Returns:         TRUE on success, FALSE after an error
3704*22dc650dSSadaf Ebrahimi */
3705*22dc650dSSadaf Ebrahimi 
3706*22dc650dSSadaf Ebrahimi static BOOL
compile_pattern(patstr * p,int options,int fromfile,const char * fromtext,int count)3707*22dc650dSSadaf Ebrahimi compile_pattern(patstr *p, int options, int fromfile, const char *fromtext,
3708*22dc650dSSadaf Ebrahimi   int count)
3709*22dc650dSSadaf Ebrahimi {
3710*22dc650dSSadaf Ebrahimi char *ps;
3711*22dc650dSSadaf Ebrahimi int errcode;
3712*22dc650dSSadaf Ebrahimi PCRE2_SIZE patlen, erroffset;
3713*22dc650dSSadaf Ebrahimi PCRE2_UCHAR errmessbuffer[ERRBUFSIZ];
3714*22dc650dSSadaf Ebrahimi 
3715*22dc650dSSadaf Ebrahimi if (p->compiled != NULL) return TRUE;
3716*22dc650dSSadaf Ebrahimi ps = p->string;
3717*22dc650dSSadaf Ebrahimi patlen = p->length;
3718*22dc650dSSadaf Ebrahimi 
3719*22dc650dSSadaf Ebrahimi if ((options & PCRE2_LITERAL) != 0)
3720*22dc650dSSadaf Ebrahimi   {
3721*22dc650dSSadaf Ebrahimi   int ellength;
3722*22dc650dSSadaf Ebrahimi   char *eop = ps + patlen;
3723*22dc650dSSadaf Ebrahimi   char *pe = end_of_line(ps, eop, &ellength);
3724*22dc650dSSadaf Ebrahimi 
3725*22dc650dSSadaf Ebrahimi   if (ellength != 0)
3726*22dc650dSSadaf Ebrahimi     {
3727*22dc650dSSadaf Ebrahimi     patlen = pe - ps - ellength;
3728*22dc650dSSadaf Ebrahimi     if (add_pattern(pe, p->length-patlen-ellength, p) == NULL) return FALSE;
3729*22dc650dSSadaf Ebrahimi     }
3730*22dc650dSSadaf Ebrahimi   }
3731*22dc650dSSadaf Ebrahimi 
3732*22dc650dSSadaf Ebrahimi p->compiled = pcre2_compile((PCRE2_SPTR)ps, patlen, options, &errcode,
3733*22dc650dSSadaf Ebrahimi   &erroffset, compile_context);
3734*22dc650dSSadaf Ebrahimi 
3735*22dc650dSSadaf Ebrahimi /* Handle successful compile. Try JIT-compiling if supported and enabled. We
3736*22dc650dSSadaf Ebrahimi ignore any JIT compiler errors, relying falling back to interpreting if
3737*22dc650dSSadaf Ebrahimi anything goes wrong with JIT. */
3738*22dc650dSSadaf Ebrahimi 
3739*22dc650dSSadaf Ebrahimi if (p->compiled != NULL)
3740*22dc650dSSadaf Ebrahimi   {
3741*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_PCRE2GREP_JIT
3742*22dc650dSSadaf Ebrahimi   if (use_jit) (void)pcre2_jit_compile(p->compiled, PCRE2_JIT_COMPLETE);
3743*22dc650dSSadaf Ebrahimi #endif
3744*22dc650dSSadaf Ebrahimi   return TRUE;
3745*22dc650dSSadaf Ebrahimi   }
3746*22dc650dSSadaf Ebrahimi 
3747*22dc650dSSadaf Ebrahimi /* Handle compile errors */
3748*22dc650dSSadaf Ebrahimi 
3749*22dc650dSSadaf Ebrahimi if (erroffset > patlen) erroffset = patlen;
3750*22dc650dSSadaf Ebrahimi pcre2_get_error_message(errcode, errmessbuffer, sizeof(errmessbuffer));
3751*22dc650dSSadaf Ebrahimi 
3752*22dc650dSSadaf Ebrahimi if (fromfile)
3753*22dc650dSSadaf Ebrahimi   {
3754*22dc650dSSadaf Ebrahimi   fprintf(stderr, "pcre2grep: Error in regex in line %d of %s "
3755*22dc650dSSadaf Ebrahimi     "at offset %d: %s\n", count, fromtext, (int)erroffset, errmessbuffer);
3756*22dc650dSSadaf Ebrahimi   }
3757*22dc650dSSadaf Ebrahimi else
3758*22dc650dSSadaf Ebrahimi   {
3759*22dc650dSSadaf Ebrahimi   if (count == 0)
3760*22dc650dSSadaf Ebrahimi     fprintf(stderr, "pcre2grep: Error in %s regex at offset %d: %s\n",
3761*22dc650dSSadaf Ebrahimi       fromtext, (int)erroffset, errmessbuffer);
3762*22dc650dSSadaf Ebrahimi   else
3763*22dc650dSSadaf Ebrahimi     fprintf(stderr, "pcre2grep: Error in %s %s regex at offset %d: %s\n",
3764*22dc650dSSadaf Ebrahimi       ordin(count), fromtext, (int)erroffset, errmessbuffer);
3765*22dc650dSSadaf Ebrahimi   }
3766*22dc650dSSadaf Ebrahimi 
3767*22dc650dSSadaf Ebrahimi return FALSE;
3768*22dc650dSSadaf Ebrahimi }
3769*22dc650dSSadaf Ebrahimi 
3770*22dc650dSSadaf Ebrahimi 
3771*22dc650dSSadaf Ebrahimi 
3772*22dc650dSSadaf Ebrahimi /*************************************************
3773*22dc650dSSadaf Ebrahimi *     Read and compile a file of patterns        *
3774*22dc650dSSadaf Ebrahimi *************************************************/
3775*22dc650dSSadaf Ebrahimi 
3776*22dc650dSSadaf Ebrahimi /* This is used for --filelist, --include-from, and --exclude-from.
3777*22dc650dSSadaf Ebrahimi 
3778*22dc650dSSadaf Ebrahimi Arguments:
3779*22dc650dSSadaf Ebrahimi   name         the name of the file; "-" is stdin
3780*22dc650dSSadaf Ebrahimi   patptr       pointer to the pattern chain anchor
3781*22dc650dSSadaf Ebrahimi   patlastptr   pointer to the last pattern pointer
3782*22dc650dSSadaf Ebrahimi 
3783*22dc650dSSadaf Ebrahimi Returns:       TRUE if all went well
3784*22dc650dSSadaf Ebrahimi */
3785*22dc650dSSadaf Ebrahimi 
3786*22dc650dSSadaf Ebrahimi static BOOL
read_pattern_file(char * name,patstr ** patptr,patstr ** patlastptr)3787*22dc650dSSadaf Ebrahimi read_pattern_file(char *name, patstr **patptr, patstr **patlastptr)
3788*22dc650dSSadaf Ebrahimi {
3789*22dc650dSSadaf Ebrahimi int linenumber = 0;
3790*22dc650dSSadaf Ebrahimi PCRE2_SIZE patlen;
3791*22dc650dSSadaf Ebrahimi FILE *f;
3792*22dc650dSSadaf Ebrahimi const char *filename;
3793*22dc650dSSadaf Ebrahimi char buffer[MAXPATLEN+20];
3794*22dc650dSSadaf Ebrahimi 
3795*22dc650dSSadaf Ebrahimi if (strcmp(name, "-") == 0)
3796*22dc650dSSadaf Ebrahimi   {
3797*22dc650dSSadaf Ebrahimi   f = stdin;
3798*22dc650dSSadaf Ebrahimi   filename = stdin_name;
3799*22dc650dSSadaf Ebrahimi   }
3800*22dc650dSSadaf Ebrahimi else
3801*22dc650dSSadaf Ebrahimi   {
3802*22dc650dSSadaf Ebrahimi   f = fopen(name, "r");
3803*22dc650dSSadaf Ebrahimi   if (f == NULL)
3804*22dc650dSSadaf Ebrahimi     {
3805*22dc650dSSadaf Ebrahimi     fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", name, strerror(errno));
3806*22dc650dSSadaf Ebrahimi     return FALSE;
3807*22dc650dSSadaf Ebrahimi     }
3808*22dc650dSSadaf Ebrahimi   filename = name;
3809*22dc650dSSadaf Ebrahimi   }
3810*22dc650dSSadaf Ebrahimi 
3811*22dc650dSSadaf Ebrahimi while ((patlen = read_one_line(buffer, sizeof(buffer), f)) > 0)
3812*22dc650dSSadaf Ebrahimi   {
3813*22dc650dSSadaf Ebrahimi   while (patlen > 0 && isspace((unsigned char)(buffer[patlen-1]))) patlen--;
3814*22dc650dSSadaf Ebrahimi   linenumber++;
3815*22dc650dSSadaf Ebrahimi   if (patlen == 0) continue;   /* Skip blank lines */
3816*22dc650dSSadaf Ebrahimi 
3817*22dc650dSSadaf Ebrahimi   /* Note: this call to add_pattern() puts a pointer to the local variable
3818*22dc650dSSadaf Ebrahimi   "buffer" into the pattern chain. However, that pointer is used only when
3819*22dc650dSSadaf Ebrahimi   compiling the pattern, which happens immediately below, so we flatten it
3820*22dc650dSSadaf Ebrahimi   afterwards, as a precaution against any later code trying to use it. */
3821*22dc650dSSadaf Ebrahimi 
3822*22dc650dSSadaf Ebrahimi   *patlastptr = add_pattern(buffer, patlen, *patlastptr);
3823*22dc650dSSadaf Ebrahimi   if (*patlastptr == NULL)
3824*22dc650dSSadaf Ebrahimi     {
3825*22dc650dSSadaf Ebrahimi     /* LCOV_EXCL_START - won't happen in testing */
3826*22dc650dSSadaf Ebrahimi     if (f != stdin) fclose(f);
3827*22dc650dSSadaf Ebrahimi     return FALSE;
3828*22dc650dSSadaf Ebrahimi     /* LCOV_EXCL_STOP */
3829*22dc650dSSadaf Ebrahimi     }
3830*22dc650dSSadaf Ebrahimi   if (*patptr == NULL) *patptr = *patlastptr;
3831*22dc650dSSadaf Ebrahimi 
3832*22dc650dSSadaf Ebrahimi   /* This loop is needed because compiling a "pattern" when -F is set may add
3833*22dc650dSSadaf Ebrahimi   on additional literal patterns if the original contains a newline. In the
3834*22dc650dSSadaf Ebrahimi   common case, it never will, because read_one_line() stops at a newline.
3835*22dc650dSSadaf Ebrahimi   However, the -N option can be used to give pcre2grep a different newline
3836*22dc650dSSadaf Ebrahimi   setting. */
3837*22dc650dSSadaf Ebrahimi 
3838*22dc650dSSadaf Ebrahimi   for(;;)
3839*22dc650dSSadaf Ebrahimi     {
3840*22dc650dSSadaf Ebrahimi     if (!compile_pattern(*patlastptr, pcre2_options, TRUE, filename,
3841*22dc650dSSadaf Ebrahimi         linenumber))
3842*22dc650dSSadaf Ebrahimi       {
3843*22dc650dSSadaf Ebrahimi       if (f != stdin) fclose(f);
3844*22dc650dSSadaf Ebrahimi       return FALSE;
3845*22dc650dSSadaf Ebrahimi       }
3846*22dc650dSSadaf Ebrahimi     (*patlastptr)->string = NULL;            /* Insurance */
3847*22dc650dSSadaf Ebrahimi     if ((*patlastptr)->next == NULL) break;
3848*22dc650dSSadaf Ebrahimi     *patlastptr = (*patlastptr)->next;
3849*22dc650dSSadaf Ebrahimi     }
3850*22dc650dSSadaf Ebrahimi   }
3851*22dc650dSSadaf Ebrahimi 
3852*22dc650dSSadaf Ebrahimi if (f != stdin) fclose(f);
3853*22dc650dSSadaf Ebrahimi return TRUE;
3854*22dc650dSSadaf Ebrahimi }
3855*22dc650dSSadaf Ebrahimi 
3856*22dc650dSSadaf Ebrahimi 
3857*22dc650dSSadaf Ebrahimi 
3858*22dc650dSSadaf Ebrahimi /*************************************************
3859*22dc650dSSadaf Ebrahimi *                Main program                    *
3860*22dc650dSSadaf Ebrahimi *************************************************/
3861*22dc650dSSadaf Ebrahimi 
3862*22dc650dSSadaf Ebrahimi /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
3863*22dc650dSSadaf Ebrahimi 
3864*22dc650dSSadaf Ebrahimi int
main(int argc,char ** argv)3865*22dc650dSSadaf Ebrahimi main(int argc, char **argv)
3866*22dc650dSSadaf Ebrahimi {
3867*22dc650dSSadaf Ebrahimi int i, j;
3868*22dc650dSSadaf Ebrahimi int rc = 1;
3869*22dc650dSSadaf Ebrahimi BOOL only_one_at_top;
3870*22dc650dSSadaf Ebrahimi patstr *cp;
3871*22dc650dSSadaf Ebrahimi fnstr *fn;
3872*22dc650dSSadaf Ebrahimi omstr *om;
3873*22dc650dSSadaf Ebrahimi const char *locale_from = "--locale";
3874*22dc650dSSadaf Ebrahimi 
3875*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_PCRE2GREP_JIT
3876*22dc650dSSadaf Ebrahimi pcre2_jit_stack *jit_stack = NULL;
3877*22dc650dSSadaf Ebrahimi #endif
3878*22dc650dSSadaf Ebrahimi 
3879*22dc650dSSadaf Ebrahimi /* In Windows, stdout is set up as a text stream, which means that \n is
3880*22dc650dSSadaf Ebrahimi converted to \r\n. This causes output lines that are copied from the input to
3881*22dc650dSSadaf Ebrahimi change from ....\r\n to ....\r\r\n, which is not right. We therefore ensure
3882*22dc650dSSadaf Ebrahimi that stdout is a binary stream. Note that this means all other output to stdout
3883*22dc650dSSadaf Ebrahimi must use STDOUT_NL to terminate lines. */
3884*22dc650dSSadaf Ebrahimi 
3885*22dc650dSSadaf Ebrahimi #ifdef WIN32
3886*22dc650dSSadaf Ebrahimi _setmode(_fileno(stdout), _O_BINARY);
3887*22dc650dSSadaf Ebrahimi #endif
3888*22dc650dSSadaf Ebrahimi 
3889*22dc650dSSadaf Ebrahimi /* Process the options */
3890*22dc650dSSadaf Ebrahimi 
3891*22dc650dSSadaf Ebrahimi for (i = 1; i < argc; i++)
3892*22dc650dSSadaf Ebrahimi   {
3893*22dc650dSSadaf Ebrahimi   option_item *op = NULL;
3894*22dc650dSSadaf Ebrahimi   char *option_data = (char *)"";    /* default to keep compiler happy */
3895*22dc650dSSadaf Ebrahimi   BOOL longop;
3896*22dc650dSSadaf Ebrahimi   BOOL longopwasequals = FALSE;
3897*22dc650dSSadaf Ebrahimi 
3898*22dc650dSSadaf Ebrahimi   if (argv[i][0] != '-') break;
3899*22dc650dSSadaf Ebrahimi 
3900*22dc650dSSadaf Ebrahimi   /* If we hit an argument that is just "-", it may be a reference to STDIN,
3901*22dc650dSSadaf Ebrahimi   but only if we have previously had -e or -f to define the patterns. */
3902*22dc650dSSadaf Ebrahimi 
3903*22dc650dSSadaf Ebrahimi   if (argv[i][1] == 0)
3904*22dc650dSSadaf Ebrahimi     {
3905*22dc650dSSadaf Ebrahimi     if (pattern_files != NULL || patterns != NULL) break;
3906*22dc650dSSadaf Ebrahimi       else pcre2grep_exit(usage(2));
3907*22dc650dSSadaf Ebrahimi     }
3908*22dc650dSSadaf Ebrahimi 
3909*22dc650dSSadaf Ebrahimi   /* Handle a long name option, or -- to terminate the options */
3910*22dc650dSSadaf Ebrahimi 
3911*22dc650dSSadaf Ebrahimi   if (argv[i][1] == '-')
3912*22dc650dSSadaf Ebrahimi     {
3913*22dc650dSSadaf Ebrahimi     char *arg = argv[i] + 2;
3914*22dc650dSSadaf Ebrahimi     char *argequals = strchr(arg, '=');
3915*22dc650dSSadaf Ebrahimi 
3916*22dc650dSSadaf Ebrahimi     if (*arg == 0)    /* -- terminates options */
3917*22dc650dSSadaf Ebrahimi       {
3918*22dc650dSSadaf Ebrahimi       i++;
3919*22dc650dSSadaf Ebrahimi       break;                /* out of the options-handling loop */
3920*22dc650dSSadaf Ebrahimi       }
3921*22dc650dSSadaf Ebrahimi 
3922*22dc650dSSadaf Ebrahimi     longop = TRUE;
3923*22dc650dSSadaf Ebrahimi 
3924*22dc650dSSadaf Ebrahimi     /* Some long options have data that follows after =, for example file=name.
3925*22dc650dSSadaf Ebrahimi     Some options have variations in the long name spelling: specifically, we
3926*22dc650dSSadaf Ebrahimi     allow "regexp" because GNU grep allows it, though I personally go along
3927*22dc650dSSadaf Ebrahimi     with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
3928*22dc650dSSadaf Ebrahimi     These options are entered in the table as "regex(p)". Options can be in
3929*22dc650dSSadaf Ebrahimi     both these categories. */
3930*22dc650dSSadaf Ebrahimi 
3931*22dc650dSSadaf Ebrahimi     for (op = optionlist; op->one_char != 0; op++)
3932*22dc650dSSadaf Ebrahimi       {
3933*22dc650dSSadaf Ebrahimi       char *opbra = strchr(op->long_name, '(');
3934*22dc650dSSadaf Ebrahimi       char *equals = strchr(op->long_name, '=');
3935*22dc650dSSadaf Ebrahimi 
3936*22dc650dSSadaf Ebrahimi       /* Handle options with only one spelling of the name */
3937*22dc650dSSadaf Ebrahimi 
3938*22dc650dSSadaf Ebrahimi       if (opbra == NULL)     /* Does not contain '(' */
3939*22dc650dSSadaf Ebrahimi         {
3940*22dc650dSSadaf Ebrahimi         if (equals == NULL)  /* Not thing=data case */
3941*22dc650dSSadaf Ebrahimi           {
3942*22dc650dSSadaf Ebrahimi           if (strcmp(arg, op->long_name) == 0) break;
3943*22dc650dSSadaf Ebrahimi           }
3944*22dc650dSSadaf Ebrahimi         else                 /* Special case xxx=data */
3945*22dc650dSSadaf Ebrahimi           {
3946*22dc650dSSadaf Ebrahimi           int oplen = (int)(equals - op->long_name);
3947*22dc650dSSadaf Ebrahimi           int arglen = (argequals == NULL)?
3948*22dc650dSSadaf Ebrahimi             (int)strlen(arg) : (int)(argequals - arg);
3949*22dc650dSSadaf Ebrahimi           if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
3950*22dc650dSSadaf Ebrahimi             {
3951*22dc650dSSadaf Ebrahimi             option_data = arg + arglen;
3952*22dc650dSSadaf Ebrahimi             if (*option_data == '=')
3953*22dc650dSSadaf Ebrahimi               {
3954*22dc650dSSadaf Ebrahimi               option_data++;
3955*22dc650dSSadaf Ebrahimi               longopwasequals = TRUE;
3956*22dc650dSSadaf Ebrahimi               }
3957*22dc650dSSadaf Ebrahimi             break;
3958*22dc650dSSadaf Ebrahimi             }
3959*22dc650dSSadaf Ebrahimi           }
3960*22dc650dSSadaf Ebrahimi         }
3961*22dc650dSSadaf Ebrahimi 
3962*22dc650dSSadaf Ebrahimi       /* Handle options with an alternate spelling of the name */
3963*22dc650dSSadaf Ebrahimi 
3964*22dc650dSSadaf Ebrahimi       else
3965*22dc650dSSadaf Ebrahimi         {
3966*22dc650dSSadaf Ebrahimi         char buff1[24];
3967*22dc650dSSadaf Ebrahimi         char buff2[24];
3968*22dc650dSSadaf Ebrahimi         int ret;
3969*22dc650dSSadaf Ebrahimi 
3970*22dc650dSSadaf Ebrahimi         int baselen = (int)(opbra - op->long_name);
3971*22dc650dSSadaf Ebrahimi         int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
3972*22dc650dSSadaf Ebrahimi         int arglen = (argequals == NULL || equals == NULL)?
3973*22dc650dSSadaf Ebrahimi           (int)strlen(arg) : (int)(argequals - arg);
3974*22dc650dSSadaf Ebrahimi 
3975*22dc650dSSadaf Ebrahimi         if ((ret = snprintf(buff1, sizeof(buff1), "%.*s", baselen, op->long_name),
3976*22dc650dSSadaf Ebrahimi              ret < 0 || ret > (int)sizeof(buff1)) ||
3977*22dc650dSSadaf Ebrahimi             (ret = snprintf(buff2, sizeof(buff2), "%s%.*s", buff1,
3978*22dc650dSSadaf Ebrahimi                      fulllen - baselen - 2, opbra + 1),
3979*22dc650dSSadaf Ebrahimi              ret < 0 || ret > (int)sizeof(buff2)))
3980*22dc650dSSadaf Ebrahimi           {
3981*22dc650dSSadaf Ebrahimi           /* LCOV_EXCL_START - this is a "never" event */
3982*22dc650dSSadaf Ebrahimi           fprintf(stderr, "pcre2grep: Buffer overflow when parsing %s option\n",
3983*22dc650dSSadaf Ebrahimi             op->long_name);
3984*22dc650dSSadaf Ebrahimi           pcre2grep_exit(2);
3985*22dc650dSSadaf Ebrahimi           /* LCOV_EXCL_STOP */
3986*22dc650dSSadaf Ebrahimi           }
3987*22dc650dSSadaf Ebrahimi 
3988*22dc650dSSadaf Ebrahimi         if (strncmp(arg, buff1, arglen) == 0 ||
3989*22dc650dSSadaf Ebrahimi            strncmp(arg, buff2, arglen) == 0)
3990*22dc650dSSadaf Ebrahimi           {
3991*22dc650dSSadaf Ebrahimi           if (equals != NULL && argequals != NULL)
3992*22dc650dSSadaf Ebrahimi             {
3993*22dc650dSSadaf Ebrahimi             option_data = argequals;
3994*22dc650dSSadaf Ebrahimi             if (*option_data == '=')
3995*22dc650dSSadaf Ebrahimi               {
3996*22dc650dSSadaf Ebrahimi               option_data++;
3997*22dc650dSSadaf Ebrahimi               longopwasequals = TRUE;
3998*22dc650dSSadaf Ebrahimi               }
3999*22dc650dSSadaf Ebrahimi             }
4000*22dc650dSSadaf Ebrahimi           break;
4001*22dc650dSSadaf Ebrahimi           }
4002*22dc650dSSadaf Ebrahimi         }
4003*22dc650dSSadaf Ebrahimi       }
4004*22dc650dSSadaf Ebrahimi 
4005*22dc650dSSadaf Ebrahimi     if (op->one_char == 0)
4006*22dc650dSSadaf Ebrahimi       {
4007*22dc650dSSadaf Ebrahimi       fprintf(stderr, "pcre2grep: Unknown option %s\n", argv[i]);
4008*22dc650dSSadaf Ebrahimi       pcre2grep_exit(usage(2));
4009*22dc650dSSadaf Ebrahimi       }
4010*22dc650dSSadaf Ebrahimi     }
4011*22dc650dSSadaf Ebrahimi 
4012*22dc650dSSadaf Ebrahimi   /* One-char options; many that have no data may be in a single argument; we
4013*22dc650dSSadaf Ebrahimi   continue till we hit the last one or one that needs data. */
4014*22dc650dSSadaf Ebrahimi 
4015*22dc650dSSadaf Ebrahimi   else
4016*22dc650dSSadaf Ebrahimi     {
4017*22dc650dSSadaf Ebrahimi     char *s = argv[i] + 1;
4018*22dc650dSSadaf Ebrahimi     longop = FALSE;
4019*22dc650dSSadaf Ebrahimi 
4020*22dc650dSSadaf Ebrahimi     while (*s != 0)
4021*22dc650dSSadaf Ebrahimi       {
4022*22dc650dSSadaf Ebrahimi       for (op = optionlist; op->one_char != 0; op++)
4023*22dc650dSSadaf Ebrahimi         {
4024*22dc650dSSadaf Ebrahimi         if (*s == op->one_char) break;
4025*22dc650dSSadaf Ebrahimi         }
4026*22dc650dSSadaf Ebrahimi       if (op->one_char == 0)
4027*22dc650dSSadaf Ebrahimi         {
4028*22dc650dSSadaf Ebrahimi         fprintf(stderr, "pcre2grep: Unknown option letter '%c' in \"%s\"\n",
4029*22dc650dSSadaf Ebrahimi           *s, argv[i]);
4030*22dc650dSSadaf Ebrahimi         pcre2grep_exit(usage(2));
4031*22dc650dSSadaf Ebrahimi         }
4032*22dc650dSSadaf Ebrahimi 
4033*22dc650dSSadaf Ebrahimi       option_data = s+1;
4034*22dc650dSSadaf Ebrahimi 
4035*22dc650dSSadaf Ebrahimi       /* Break out if this is the last character in the string; it's handled
4036*22dc650dSSadaf Ebrahimi       below like a single multi-char option. */
4037*22dc650dSSadaf Ebrahimi 
4038*22dc650dSSadaf Ebrahimi       if (*option_data == 0) break;
4039*22dc650dSSadaf Ebrahimi 
4040*22dc650dSSadaf Ebrahimi       /* Check for a single-character option that has data: OP_OP_NUMBER(S)
4041*22dc650dSSadaf Ebrahimi       are used for ones that either have a numerical number or defaults, i.e.
4042*22dc650dSSadaf Ebrahimi       the data is optional. If a digit follows, there is data; if not, carry on
4043*22dc650dSSadaf Ebrahimi       with other single-character options in the same string. */
4044*22dc650dSSadaf Ebrahimi 
4045*22dc650dSSadaf Ebrahimi       if (op->type == OP_OP_NUMBER || op->type == OP_OP_NUMBERS)
4046*22dc650dSSadaf Ebrahimi         {
4047*22dc650dSSadaf Ebrahimi         if (isdigit((unsigned char)(s[1]))) break;
4048*22dc650dSSadaf Ebrahimi         }
4049*22dc650dSSadaf Ebrahimi       else   /* Check for an option with data */
4050*22dc650dSSadaf Ebrahimi         {
4051*22dc650dSSadaf Ebrahimi         if (op->type != OP_NODATA) break;
4052*22dc650dSSadaf Ebrahimi         }
4053*22dc650dSSadaf Ebrahimi 
4054*22dc650dSSadaf Ebrahimi       /* Handle a single-character option with no data, then loop for the
4055*22dc650dSSadaf Ebrahimi       next character in the string. */
4056*22dc650dSSadaf Ebrahimi 
4057*22dc650dSSadaf Ebrahimi       pcre2_options = handle_option(*s++, pcre2_options);
4058*22dc650dSSadaf Ebrahimi       }
4059*22dc650dSSadaf Ebrahimi     }
4060*22dc650dSSadaf Ebrahimi 
4061*22dc650dSSadaf Ebrahimi   /* At this point we should have op pointing to a matched option. If the type
4062*22dc650dSSadaf Ebrahimi   is NO_DATA, it means that there is no data, and the option might set
4063*22dc650dSSadaf Ebrahimi   something in the PCRE options. */
4064*22dc650dSSadaf Ebrahimi 
4065*22dc650dSSadaf Ebrahimi   if (op->type == OP_NODATA)
4066*22dc650dSSadaf Ebrahimi     {
4067*22dc650dSSadaf Ebrahimi     pcre2_options = handle_option(op->one_char, pcre2_options);
4068*22dc650dSSadaf Ebrahimi     continue;
4069*22dc650dSSadaf Ebrahimi     }
4070*22dc650dSSadaf Ebrahimi 
4071*22dc650dSSadaf Ebrahimi   /* If the option type is OP_OP_STRING or OP_OP_NUMBER(S), it's an option that
4072*22dc650dSSadaf Ebrahimi   either has a value or defaults to something. It cannot have data in a
4073*22dc650dSSadaf Ebrahimi   separate item. At the moment, the only such options are "colo(u)r",
4074*22dc650dSSadaf Ebrahimi   and "only-matching". */
4075*22dc650dSSadaf Ebrahimi 
4076*22dc650dSSadaf Ebrahimi   if (*option_data == 0 &&
4077*22dc650dSSadaf Ebrahimi       (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER ||
4078*22dc650dSSadaf Ebrahimi        op->type == OP_OP_NUMBERS))
4079*22dc650dSSadaf Ebrahimi     {
4080*22dc650dSSadaf Ebrahimi     switch (op->one_char)
4081*22dc650dSSadaf Ebrahimi       {
4082*22dc650dSSadaf Ebrahimi       case N_COLOUR:
4083*22dc650dSSadaf Ebrahimi       colour_option = "auto";
4084*22dc650dSSadaf Ebrahimi       break;
4085*22dc650dSSadaf Ebrahimi 
4086*22dc650dSSadaf Ebrahimi       case 'o':
4087*22dc650dSSadaf Ebrahimi       only_matching_last = add_number(0, only_matching_last);
4088*22dc650dSSadaf Ebrahimi       if (only_matching == NULL) only_matching = only_matching_last;
4089*22dc650dSSadaf Ebrahimi       break;
4090*22dc650dSSadaf Ebrahimi       }
4091*22dc650dSSadaf Ebrahimi     continue;
4092*22dc650dSSadaf Ebrahimi     }
4093*22dc650dSSadaf Ebrahimi 
4094*22dc650dSSadaf Ebrahimi   /* Otherwise, find the data string for the option. */
4095*22dc650dSSadaf Ebrahimi 
4096*22dc650dSSadaf Ebrahimi   if (*option_data == 0)
4097*22dc650dSSadaf Ebrahimi     {
4098*22dc650dSSadaf Ebrahimi     if (i >= argc - 1 || longopwasequals)
4099*22dc650dSSadaf Ebrahimi       {
4100*22dc650dSSadaf Ebrahimi       fprintf(stderr, "pcre2grep: Data missing after %s\n", argv[i]);
4101*22dc650dSSadaf Ebrahimi       pcre2grep_exit(usage(2));
4102*22dc650dSSadaf Ebrahimi       }
4103*22dc650dSSadaf Ebrahimi     option_data = argv[++i];
4104*22dc650dSSadaf Ebrahimi     }
4105*22dc650dSSadaf Ebrahimi 
4106*22dc650dSSadaf Ebrahimi   /* If the option type is OP_OP_NUMBERS, the value is a number that is to be
4107*22dc650dSSadaf Ebrahimi   added to a chain of numbers. */
4108*22dc650dSSadaf Ebrahimi 
4109*22dc650dSSadaf Ebrahimi   if (op->type == OP_OP_NUMBERS)
4110*22dc650dSSadaf Ebrahimi     {
4111*22dc650dSSadaf Ebrahimi     unsigned long int n = decode_number(option_data, op, longop);
4112*22dc650dSSadaf Ebrahimi     omdatastr *omd = (omdatastr *)op->dataptr;
4113*22dc650dSSadaf Ebrahimi     *(omd->lastptr) = add_number((int)n, *(omd->lastptr));
4114*22dc650dSSadaf Ebrahimi     if (*(omd->anchor) == NULL) *(omd->anchor) = *(omd->lastptr);
4115*22dc650dSSadaf Ebrahimi     }
4116*22dc650dSSadaf Ebrahimi 
4117*22dc650dSSadaf Ebrahimi   /* If the option type is OP_PATLIST, it's the -e option, or one of the
4118*22dc650dSSadaf Ebrahimi   include/exclude options, which can be called multiple times to create lists
4119*22dc650dSSadaf Ebrahimi   of patterns. */
4120*22dc650dSSadaf Ebrahimi 
4121*22dc650dSSadaf Ebrahimi   else if (op->type == OP_PATLIST)
4122*22dc650dSSadaf Ebrahimi     {
4123*22dc650dSSadaf Ebrahimi     patdatastr *pd = (patdatastr *)op->dataptr;
4124*22dc650dSSadaf Ebrahimi     *(pd->lastptr) = add_pattern(option_data, (PCRE2_SIZE)strlen(option_data),
4125*22dc650dSSadaf Ebrahimi       *(pd->lastptr));
4126*22dc650dSSadaf Ebrahimi     if (*(pd->lastptr) == NULL) goto EXIT2;
4127*22dc650dSSadaf Ebrahimi     if (*(pd->anchor) == NULL) *(pd->anchor) = *(pd->lastptr);
4128*22dc650dSSadaf Ebrahimi     }
4129*22dc650dSSadaf Ebrahimi 
4130*22dc650dSSadaf Ebrahimi   /* If the option type is OP_FILELIST, it's one of the options that names a
4131*22dc650dSSadaf Ebrahimi   file. */
4132*22dc650dSSadaf Ebrahimi 
4133*22dc650dSSadaf Ebrahimi   else if (op->type == OP_FILELIST)
4134*22dc650dSSadaf Ebrahimi     {
4135*22dc650dSSadaf Ebrahimi     fndatastr *fd = (fndatastr *)op->dataptr;
4136*22dc650dSSadaf Ebrahimi     fn = (fnstr *)malloc(sizeof(fnstr));
4137*22dc650dSSadaf Ebrahimi     if (fn == NULL)
4138*22dc650dSSadaf Ebrahimi       {
4139*22dc650dSSadaf Ebrahimi       /* LCOV_EXCL_START */
4140*22dc650dSSadaf Ebrahimi       fprintf(stderr, "pcre2grep: malloc failed\n");
4141*22dc650dSSadaf Ebrahimi       goto EXIT2;
4142*22dc650dSSadaf Ebrahimi       /* LCOV_EXCL_STOP */
4143*22dc650dSSadaf Ebrahimi       }
4144*22dc650dSSadaf Ebrahimi     fn->next = NULL;
4145*22dc650dSSadaf Ebrahimi     fn->name = option_data;
4146*22dc650dSSadaf Ebrahimi     if (*(fd->anchor) == NULL)
4147*22dc650dSSadaf Ebrahimi       *(fd->anchor) = fn;
4148*22dc650dSSadaf Ebrahimi     else
4149*22dc650dSSadaf Ebrahimi       (*(fd->lastptr))->next = fn;
4150*22dc650dSSadaf Ebrahimi     *(fd->lastptr) = fn;
4151*22dc650dSSadaf Ebrahimi     }
4152*22dc650dSSadaf Ebrahimi 
4153*22dc650dSSadaf Ebrahimi   /* Handle OP_BINARY_FILES */
4154*22dc650dSSadaf Ebrahimi 
4155*22dc650dSSadaf Ebrahimi   else if (op->type == OP_BINFILES)
4156*22dc650dSSadaf Ebrahimi     {
4157*22dc650dSSadaf Ebrahimi     if (strcmp(option_data, "binary") == 0)
4158*22dc650dSSadaf Ebrahimi       binary_files = BIN_BINARY;
4159*22dc650dSSadaf Ebrahimi     else if (strcmp(option_data, "without-match") == 0)
4160*22dc650dSSadaf Ebrahimi       binary_files = BIN_NOMATCH;
4161*22dc650dSSadaf Ebrahimi     else if (strcmp(option_data, "text") == 0)
4162*22dc650dSSadaf Ebrahimi       binary_files = BIN_TEXT;
4163*22dc650dSSadaf Ebrahimi     else
4164*22dc650dSSadaf Ebrahimi       {
4165*22dc650dSSadaf Ebrahimi       fprintf(stderr, "pcre2grep: unknown value \"%s\" for binary-files\n",
4166*22dc650dSSadaf Ebrahimi         option_data);
4167*22dc650dSSadaf Ebrahimi       pcre2grep_exit(usage(2));
4168*22dc650dSSadaf Ebrahimi       }
4169*22dc650dSSadaf Ebrahimi     }
4170*22dc650dSSadaf Ebrahimi 
4171*22dc650dSSadaf Ebrahimi   /* Otherwise, deal with a single string or numeric data value. */
4172*22dc650dSSadaf Ebrahimi 
4173*22dc650dSSadaf Ebrahimi   else if (op->type != OP_NUMBER && op->type != OP_U32NUMBER &&
4174*22dc650dSSadaf Ebrahimi            op->type != OP_OP_NUMBER && op->type != OP_SIZE)
4175*22dc650dSSadaf Ebrahimi     {
4176*22dc650dSSadaf Ebrahimi     *((char **)op->dataptr) = option_data;
4177*22dc650dSSadaf Ebrahimi     }
4178*22dc650dSSadaf Ebrahimi   else
4179*22dc650dSSadaf Ebrahimi     {
4180*22dc650dSSadaf Ebrahimi     unsigned long int n = decode_number(option_data, op, longop);
4181*22dc650dSSadaf Ebrahimi     if (op->type == OP_U32NUMBER) *((uint32_t *)op->dataptr) = n;
4182*22dc650dSSadaf Ebrahimi       else if (op->type == OP_SIZE) *((PCRE2_SIZE *)op->dataptr) = n;
4183*22dc650dSSadaf Ebrahimi       else *((int *)op->dataptr) = n;
4184*22dc650dSSadaf Ebrahimi     }
4185*22dc650dSSadaf Ebrahimi   }
4186*22dc650dSSadaf Ebrahimi 
4187*22dc650dSSadaf Ebrahimi /* Options have been decoded. If -C was used, its value is used as a default
4188*22dc650dSSadaf Ebrahimi for -A and -B. */
4189*22dc650dSSadaf Ebrahimi 
4190*22dc650dSSadaf Ebrahimi if (both_context > 0)
4191*22dc650dSSadaf Ebrahimi   {
4192*22dc650dSSadaf Ebrahimi   if (after_context == 0) after_context = both_context;
4193*22dc650dSSadaf Ebrahimi   if (before_context == 0) before_context = both_context;
4194*22dc650dSSadaf Ebrahimi   }
4195*22dc650dSSadaf Ebrahimi 
4196*22dc650dSSadaf Ebrahimi /* Only one of --only-matching, --output, --file-offsets, or --line-offsets is
4197*22dc650dSSadaf Ebrahimi permitted. They display, each in their own way, only the data that has matched.
4198*22dc650dSSadaf Ebrahimi */
4199*22dc650dSSadaf Ebrahimi 
4200*22dc650dSSadaf Ebrahimi only_matching_count = (only_matching != NULL) + (output_text != NULL) +
4201*22dc650dSSadaf Ebrahimi   file_offsets + line_offsets;
4202*22dc650dSSadaf Ebrahimi 
4203*22dc650dSSadaf Ebrahimi if (only_matching_count > 1)
4204*22dc650dSSadaf Ebrahimi   {
4205*22dc650dSSadaf Ebrahimi   fprintf(stderr, "pcre2grep: Cannot mix --only-matching, --output, "
4206*22dc650dSSadaf Ebrahimi     "--file-offsets and/or --line-offsets\n");
4207*22dc650dSSadaf Ebrahimi   pcre2grep_exit(usage(2));
4208*22dc650dSSadaf Ebrahimi   }
4209*22dc650dSSadaf Ebrahimi 
4210*22dc650dSSadaf Ebrahimi /* Check that there is a big enough ovector for all -o settings. */
4211*22dc650dSSadaf Ebrahimi 
4212*22dc650dSSadaf Ebrahimi for (om = only_matching; om != NULL; om = om->next)
4213*22dc650dSSadaf Ebrahimi   {
4214*22dc650dSSadaf Ebrahimi   int n = om->groupnum;
4215*22dc650dSSadaf Ebrahimi   if (n > (int)capture_max)
4216*22dc650dSSadaf Ebrahimi     {
4217*22dc650dSSadaf Ebrahimi     fprintf(stderr, "pcre2grep: Requested group %d cannot be captured.\n", n);
4218*22dc650dSSadaf Ebrahimi     fprintf(stderr, "pcre2grep: Use --om-capture to increase the size of the capture vector.\n");
4219*22dc650dSSadaf Ebrahimi     goto EXIT2;
4220*22dc650dSSadaf Ebrahimi     }
4221*22dc650dSSadaf Ebrahimi   }
4222*22dc650dSSadaf Ebrahimi 
4223*22dc650dSSadaf Ebrahimi /* Check the text supplied to --output for errors. */
4224*22dc650dSSadaf Ebrahimi 
4225*22dc650dSSadaf Ebrahimi if (output_text != NULL &&
4226*22dc650dSSadaf Ebrahimi     !syntax_check_output_text((PCRE2_SPTR)output_text, FALSE))
4227*22dc650dSSadaf Ebrahimi   goto EXIT2;
4228*22dc650dSSadaf Ebrahimi 
4229*22dc650dSSadaf Ebrahimi /* Set up default compile and match contexts and match data blocks. */
4230*22dc650dSSadaf Ebrahimi 
4231*22dc650dSSadaf Ebrahimi offset_size = capture_max + 1;
4232*22dc650dSSadaf Ebrahimi compile_context = pcre2_compile_context_create(NULL);
4233*22dc650dSSadaf Ebrahimi match_context = pcre2_match_context_create(NULL);
4234*22dc650dSSadaf Ebrahimi match_data_pair[0] = pcre2_match_data_create(offset_size, NULL);
4235*22dc650dSSadaf Ebrahimi match_data_pair[1] = pcre2_match_data_create(offset_size, NULL);
4236*22dc650dSSadaf Ebrahimi offsets_pair[0] = pcre2_get_ovector_pointer(match_data_pair[0]);
4237*22dc650dSSadaf Ebrahimi offsets_pair[1] = pcre2_get_ovector_pointer(match_data_pair[1]);
4238*22dc650dSSadaf Ebrahimi match_data = match_data_pair[0];
4239*22dc650dSSadaf Ebrahimi offsets = offsets_pair[0];
4240*22dc650dSSadaf Ebrahimi match_data_toggle = 0;
4241*22dc650dSSadaf Ebrahimi 
4242*22dc650dSSadaf Ebrahimi /* If string (script) callouts are supported, set up the callout processing
4243*22dc650dSSadaf Ebrahimi function in the match context. */
4244*22dc650dSSadaf Ebrahimi 
4245*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_PCRE2GREP_CALLOUT
4246*22dc650dSSadaf Ebrahimi pcre2_set_callout(match_context, pcre2grep_callout, NULL);
4247*22dc650dSSadaf Ebrahimi #endif
4248*22dc650dSSadaf Ebrahimi 
4249*22dc650dSSadaf Ebrahimi /* Put limits into the match context. */
4250*22dc650dSSadaf Ebrahimi 
4251*22dc650dSSadaf Ebrahimi if (heap_limit != PCRE2_UNSET) pcre2_set_heap_limit(match_context, heap_limit);
4252*22dc650dSSadaf Ebrahimi if (match_limit > 0) pcre2_set_match_limit(match_context, match_limit);
4253*22dc650dSSadaf Ebrahimi if (depth_limit > 0) pcre2_set_depth_limit(match_context, depth_limit);
4254*22dc650dSSadaf Ebrahimi 
4255*22dc650dSSadaf Ebrahimi /* If a locale has not been provided as an option, see if the LC_CTYPE or
4256*22dc650dSSadaf Ebrahimi LC_ALL environment variable is set, and if so, use it. */
4257*22dc650dSSadaf Ebrahimi 
4258*22dc650dSSadaf Ebrahimi if (locale == NULL)
4259*22dc650dSSadaf Ebrahimi   {
4260*22dc650dSSadaf Ebrahimi   locale = getenv("LC_ALL");
4261*22dc650dSSadaf Ebrahimi   locale_from = "LC_ALL";
4262*22dc650dSSadaf Ebrahimi   }
4263*22dc650dSSadaf Ebrahimi 
4264*22dc650dSSadaf Ebrahimi if (locale == NULL)
4265*22dc650dSSadaf Ebrahimi   {
4266*22dc650dSSadaf Ebrahimi   locale = getenv("LC_CTYPE");
4267*22dc650dSSadaf Ebrahimi   locale_from = "LC_CTYPE";
4268*22dc650dSSadaf Ebrahimi   }
4269*22dc650dSSadaf Ebrahimi 
4270*22dc650dSSadaf Ebrahimi /* If a locale is set, use it to generate the tables the PCRE needs. Passing
4271*22dc650dSSadaf Ebrahimi NULL to pcre2_maketables() means that malloc() is used to get the memory. */
4272*22dc650dSSadaf Ebrahimi 
4273*22dc650dSSadaf Ebrahimi if (locale != NULL)
4274*22dc650dSSadaf Ebrahimi   {
4275*22dc650dSSadaf Ebrahimi   if (setlocale(LC_CTYPE, locale) == NULL)
4276*22dc650dSSadaf Ebrahimi     {
4277*22dc650dSSadaf Ebrahimi     fprintf(stderr, "pcre2grep: Failed to set locale %s (obtained from %s)\n",
4278*22dc650dSSadaf Ebrahimi       locale, locale_from);
4279*22dc650dSSadaf Ebrahimi     goto EXIT2;
4280*22dc650dSSadaf Ebrahimi     }
4281*22dc650dSSadaf Ebrahimi   character_tables = pcre2_maketables(NULL);
4282*22dc650dSSadaf Ebrahimi   pcre2_set_character_tables(compile_context, character_tables);
4283*22dc650dSSadaf Ebrahimi   }
4284*22dc650dSSadaf Ebrahimi 
4285*22dc650dSSadaf Ebrahimi /* Sort out colouring */
4286*22dc650dSSadaf Ebrahimi 
4287*22dc650dSSadaf Ebrahimi if (colour_option != NULL && strcmp(colour_option, "never") != 0)
4288*22dc650dSSadaf Ebrahimi   {
4289*22dc650dSSadaf Ebrahimi   if (strcmp(colour_option, "always") == 0)
4290*22dc650dSSadaf Ebrahimi #ifdef WIN32
4291*22dc650dSSadaf Ebrahimi     do_ansi = !is_stdout_tty(),
4292*22dc650dSSadaf Ebrahimi #endif
4293*22dc650dSSadaf Ebrahimi     do_colour = TRUE;
4294*22dc650dSSadaf Ebrahimi   else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
4295*22dc650dSSadaf Ebrahimi   else
4296*22dc650dSSadaf Ebrahimi     {
4297*22dc650dSSadaf Ebrahimi     fprintf(stderr, "pcre2grep: Unknown colour setting \"%s\"\n",
4298*22dc650dSSadaf Ebrahimi       colour_option);
4299*22dc650dSSadaf Ebrahimi     goto EXIT2;
4300*22dc650dSSadaf Ebrahimi     }
4301*22dc650dSSadaf Ebrahimi   if (do_colour)
4302*22dc650dSSadaf Ebrahimi     {
4303*22dc650dSSadaf Ebrahimi     char *cs = getenv("PCRE2GREP_COLOUR");
4304*22dc650dSSadaf Ebrahimi     if (cs == NULL) cs = getenv("PCRE2GREP_COLOR");
4305*22dc650dSSadaf Ebrahimi     if (cs == NULL) cs = getenv("PCREGREP_COLOUR");
4306*22dc650dSSadaf Ebrahimi     if (cs == NULL) cs = getenv("PCREGREP_COLOR");
4307*22dc650dSSadaf Ebrahimi     if (cs == NULL) cs = parse_grep_colors(getenv("GREP_COLORS"));
4308*22dc650dSSadaf Ebrahimi     if (cs == NULL) cs = getenv("GREP_COLOR");
4309*22dc650dSSadaf Ebrahimi     if (cs != NULL)
4310*22dc650dSSadaf Ebrahimi       {
4311*22dc650dSSadaf Ebrahimi       if (strspn(cs, ";0123456789") == strlen(cs)) colour_string = cs;
4312*22dc650dSSadaf Ebrahimi       }
4313*22dc650dSSadaf Ebrahimi #ifdef WIN32
4314*22dc650dSSadaf Ebrahimi     init_colour_output();
4315*22dc650dSSadaf Ebrahimi #endif
4316*22dc650dSSadaf Ebrahimi     }
4317*22dc650dSSadaf Ebrahimi   }
4318*22dc650dSSadaf Ebrahimi 
4319*22dc650dSSadaf Ebrahimi /* When colouring or otherwise identifying matching substrings, we need to find
4320*22dc650dSSadaf Ebrahimi all possible matches when there are multiple patterns. */
4321*22dc650dSSadaf Ebrahimi 
4322*22dc650dSSadaf Ebrahimi all_matches = do_colour || only_matching_count != 0;
4323*22dc650dSSadaf Ebrahimi 
4324*22dc650dSSadaf Ebrahimi /* Sort out a newline setting. */
4325*22dc650dSSadaf Ebrahimi 
4326*22dc650dSSadaf Ebrahimi if (newline_arg != NULL)
4327*22dc650dSSadaf Ebrahimi   {
4328*22dc650dSSadaf Ebrahimi   for (endlinetype = 1; endlinetype < (int)(sizeof(newlines)/sizeof(char *));
4329*22dc650dSSadaf Ebrahimi        endlinetype++)
4330*22dc650dSSadaf Ebrahimi     {
4331*22dc650dSSadaf Ebrahimi     if (strcmpic(newline_arg, newlines[endlinetype]) == 0) break;
4332*22dc650dSSadaf Ebrahimi     }
4333*22dc650dSSadaf Ebrahimi   if (endlinetype < (int)(sizeof(newlines)/sizeof(char *)))
4334*22dc650dSSadaf Ebrahimi     pcre2_set_newline(compile_context, endlinetype);
4335*22dc650dSSadaf Ebrahimi   else
4336*22dc650dSSadaf Ebrahimi     {
4337*22dc650dSSadaf Ebrahimi     fprintf(stderr, "pcre2grep: Invalid newline specifier \"%s\"\n",
4338*22dc650dSSadaf Ebrahimi       newline_arg);
4339*22dc650dSSadaf Ebrahimi     goto EXIT2;
4340*22dc650dSSadaf Ebrahimi     }
4341*22dc650dSSadaf Ebrahimi   }
4342*22dc650dSSadaf Ebrahimi 
4343*22dc650dSSadaf Ebrahimi /* Find default newline convention */
4344*22dc650dSSadaf Ebrahimi 
4345*22dc650dSSadaf Ebrahimi else
4346*22dc650dSSadaf Ebrahimi   {
4347*22dc650dSSadaf Ebrahimi   (void)pcre2_config(PCRE2_CONFIG_NEWLINE, &endlinetype);
4348*22dc650dSSadaf Ebrahimi   }
4349*22dc650dSSadaf Ebrahimi 
4350*22dc650dSSadaf Ebrahimi /* Interpret the text values for -d and -D */
4351*22dc650dSSadaf Ebrahimi 
4352*22dc650dSSadaf Ebrahimi if (dee_option != NULL)
4353*22dc650dSSadaf Ebrahimi   {
4354*22dc650dSSadaf Ebrahimi   if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
4355*22dc650dSSadaf Ebrahimi   else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
4356*22dc650dSSadaf Ebrahimi   else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
4357*22dc650dSSadaf Ebrahimi   else
4358*22dc650dSSadaf Ebrahimi     {
4359*22dc650dSSadaf Ebrahimi     fprintf(stderr, "pcre2grep: Invalid value \"%s\" for -d\n", dee_option);
4360*22dc650dSSadaf Ebrahimi     goto EXIT2;
4361*22dc650dSSadaf Ebrahimi     }
4362*22dc650dSSadaf Ebrahimi   }
4363*22dc650dSSadaf Ebrahimi 
4364*22dc650dSSadaf Ebrahimi if (DEE_option != NULL)
4365*22dc650dSSadaf Ebrahimi   {
4366*22dc650dSSadaf Ebrahimi   if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
4367*22dc650dSSadaf Ebrahimi   else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
4368*22dc650dSSadaf Ebrahimi   else
4369*22dc650dSSadaf Ebrahimi     {
4370*22dc650dSSadaf Ebrahimi     fprintf(stderr, "pcre2grep: Invalid value \"%s\" for -D\n", DEE_option);
4371*22dc650dSSadaf Ebrahimi     goto EXIT2;
4372*22dc650dSSadaf Ebrahimi     }
4373*22dc650dSSadaf Ebrahimi   }
4374*22dc650dSSadaf Ebrahimi 
4375*22dc650dSSadaf Ebrahimi /* If no_ucp is set, remove PCRE2_UCP from the compile options. */
4376*22dc650dSSadaf Ebrahimi 
4377*22dc650dSSadaf Ebrahimi if (no_ucp) pcre2_options &= ~PCRE2_UCP;
4378*22dc650dSSadaf Ebrahimi 
4379*22dc650dSSadaf Ebrahimi /* adjust the extra options. */
4380*22dc650dSSadaf Ebrahimi 
4381*22dc650dSSadaf Ebrahimi if (case_restrict) extra_options |= PCRE2_EXTRA_CASELESS_RESTRICT;
4382*22dc650dSSadaf Ebrahimi if (posix_digit)
4383*22dc650dSSadaf Ebrahimi   extra_options |= (PCRE2_EXTRA_ASCII_BSD | PCRE2_EXTRA_ASCII_DIGIT);
4384*22dc650dSSadaf Ebrahimi 
4385*22dc650dSSadaf Ebrahimi /* Set the extra options in the compile context. */
4386*22dc650dSSadaf Ebrahimi 
4387*22dc650dSSadaf Ebrahimi (void)pcre2_set_compile_extra_options(compile_context, extra_options);
4388*22dc650dSSadaf Ebrahimi 
4389*22dc650dSSadaf Ebrahimi /* If use_jit is set, check whether JIT is available. If not, do not try
4390*22dc650dSSadaf Ebrahimi to use JIT. */
4391*22dc650dSSadaf Ebrahimi 
4392*22dc650dSSadaf Ebrahimi if (use_jit)
4393*22dc650dSSadaf Ebrahimi   {
4394*22dc650dSSadaf Ebrahimi   uint32_t answer;
4395*22dc650dSSadaf Ebrahimi   (void)pcre2_config(PCRE2_CONFIG_JIT, &answer);
4396*22dc650dSSadaf Ebrahimi   if (!answer) use_jit = FALSE;
4397*22dc650dSSadaf Ebrahimi   }
4398*22dc650dSSadaf Ebrahimi 
4399*22dc650dSSadaf Ebrahimi /* Get memory for the main buffer. */
4400*22dc650dSSadaf Ebrahimi 
4401*22dc650dSSadaf Ebrahimi if (bufthird <= 0)
4402*22dc650dSSadaf Ebrahimi   {
4403*22dc650dSSadaf Ebrahimi   fprintf(stderr, "pcre2grep: --buffer-size must be greater than zero\n");
4404*22dc650dSSadaf Ebrahimi   goto EXIT2;
4405*22dc650dSSadaf Ebrahimi   }
4406*22dc650dSSadaf Ebrahimi 
4407*22dc650dSSadaf Ebrahimi bufsize = 3*bufthird;
4408*22dc650dSSadaf Ebrahimi main_buffer = (char *)malloc(bufsize);
4409*22dc650dSSadaf Ebrahimi 
4410*22dc650dSSadaf Ebrahimi if (main_buffer == NULL)
4411*22dc650dSSadaf Ebrahimi   {
4412*22dc650dSSadaf Ebrahimi   /* LCOV_EXCL_START */
4413*22dc650dSSadaf Ebrahimi   fprintf(stderr, "pcre2grep: malloc failed\n");
4414*22dc650dSSadaf Ebrahimi   goto EXIT2;
4415*22dc650dSSadaf Ebrahimi   /* LCOV_EXCL_STOP */
4416*22dc650dSSadaf Ebrahimi   }
4417*22dc650dSSadaf Ebrahimi 
4418*22dc650dSSadaf Ebrahimi /* If no patterns were provided by -e, and there are no files provided by -f,
4419*22dc650dSSadaf Ebrahimi the first argument is the one and only pattern, and it must exist. */
4420*22dc650dSSadaf Ebrahimi 
4421*22dc650dSSadaf Ebrahimi if (patterns == NULL && pattern_files == NULL)
4422*22dc650dSSadaf Ebrahimi   {
4423*22dc650dSSadaf Ebrahimi   if (i >= argc) return usage(2);
4424*22dc650dSSadaf Ebrahimi   patterns = patterns_last = add_pattern(argv[i], (PCRE2_SIZE)strlen(argv[i]),
4425*22dc650dSSadaf Ebrahimi     NULL);
4426*22dc650dSSadaf Ebrahimi   i++;
4427*22dc650dSSadaf Ebrahimi   if (patterns == NULL) goto EXIT2;
4428*22dc650dSSadaf Ebrahimi   }
4429*22dc650dSSadaf Ebrahimi 
4430*22dc650dSSadaf Ebrahimi /* Compile the patterns that were provided on the command line, either by
4431*22dc650dSSadaf Ebrahimi multiple uses of -e or as a single unkeyed pattern. We cannot do this until
4432*22dc650dSSadaf Ebrahimi after all the command-line options are read so that we know which PCRE options
4433*22dc650dSSadaf Ebrahimi to use. When -F is used, compile_pattern() may add another block into the
4434*22dc650dSSadaf Ebrahimi chain, so we must not access the next pointer till after the compile. */
4435*22dc650dSSadaf Ebrahimi 
4436*22dc650dSSadaf Ebrahimi for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
4437*22dc650dSSadaf Ebrahimi   {
4438*22dc650dSSadaf Ebrahimi   if (!compile_pattern(cp, pcre2_options, FALSE, "command-line",
4439*22dc650dSSadaf Ebrahimi        (j == 1 && patterns->next == NULL)? 0 : j))
4440*22dc650dSSadaf Ebrahimi     goto EXIT2;
4441*22dc650dSSadaf Ebrahimi   }
4442*22dc650dSSadaf Ebrahimi 
4443*22dc650dSSadaf Ebrahimi /* Read and compile the regular expressions that are provided in files. */
4444*22dc650dSSadaf Ebrahimi 
4445*22dc650dSSadaf Ebrahimi for (fn = pattern_files; fn != NULL; fn = fn->next)
4446*22dc650dSSadaf Ebrahimi   {
4447*22dc650dSSadaf Ebrahimi   if (!read_pattern_file(fn->name, &patterns, &patterns_last)) goto EXIT2;
4448*22dc650dSSadaf Ebrahimi   }
4449*22dc650dSSadaf Ebrahimi 
4450*22dc650dSSadaf Ebrahimi /* Unless JIT has been explicitly disabled, arrange a stack for it to use. */
4451*22dc650dSSadaf Ebrahimi 
4452*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_PCRE2GREP_JIT
4453*22dc650dSSadaf Ebrahimi if (use_jit)
4454*22dc650dSSadaf Ebrahimi   {
4455*22dc650dSSadaf Ebrahimi   jit_stack = pcre2_jit_stack_create(32*1024, 1024*1024, NULL);
4456*22dc650dSSadaf Ebrahimi   if (jit_stack != NULL                        )
4457*22dc650dSSadaf Ebrahimi     pcre2_jit_stack_assign(match_context, NULL, jit_stack);
4458*22dc650dSSadaf Ebrahimi   }
4459*22dc650dSSadaf Ebrahimi #endif
4460*22dc650dSSadaf Ebrahimi 
4461*22dc650dSSadaf Ebrahimi /* -F, -w, and -x do not apply to include or exclude patterns, so we must
4462*22dc650dSSadaf Ebrahimi adjust the options. */
4463*22dc650dSSadaf Ebrahimi 
4464*22dc650dSSadaf Ebrahimi pcre2_options &= ~PCRE2_LITERAL;
4465*22dc650dSSadaf Ebrahimi (void)pcre2_set_compile_extra_options(compile_context, 0);
4466*22dc650dSSadaf Ebrahimi 
4467*22dc650dSSadaf Ebrahimi /* If there are include or exclude patterns read from the command line, compile
4468*22dc650dSSadaf Ebrahimi them. */
4469*22dc650dSSadaf Ebrahimi 
4470*22dc650dSSadaf Ebrahimi for (j = 0; j < 4; j++)
4471*22dc650dSSadaf Ebrahimi   {
4472*22dc650dSSadaf Ebrahimi   int k;
4473*22dc650dSSadaf Ebrahimi   for (k = 1, cp = *(incexlist[j]); cp != NULL; k++, cp = cp->next)
4474*22dc650dSSadaf Ebrahimi     {
4475*22dc650dSSadaf Ebrahimi     if (!compile_pattern(cp, pcre2_options, FALSE, incexname[j],
4476*22dc650dSSadaf Ebrahimi          (k == 1 && cp->next == NULL)? 0 : k))
4477*22dc650dSSadaf Ebrahimi       goto EXIT2;
4478*22dc650dSSadaf Ebrahimi     }
4479*22dc650dSSadaf Ebrahimi   }
4480*22dc650dSSadaf Ebrahimi 
4481*22dc650dSSadaf Ebrahimi /* Read and compile include/exclude patterns from files. */
4482*22dc650dSSadaf Ebrahimi 
4483*22dc650dSSadaf Ebrahimi for (fn = include_from; fn != NULL; fn = fn->next)
4484*22dc650dSSadaf Ebrahimi   {
4485*22dc650dSSadaf Ebrahimi   if (!read_pattern_file(fn->name, &include_patterns, &include_patterns_last))
4486*22dc650dSSadaf Ebrahimi     goto EXIT2;
4487*22dc650dSSadaf Ebrahimi   }
4488*22dc650dSSadaf Ebrahimi 
4489*22dc650dSSadaf Ebrahimi for (fn = exclude_from; fn != NULL; fn = fn->next)
4490*22dc650dSSadaf Ebrahimi   {
4491*22dc650dSSadaf Ebrahimi   if (!read_pattern_file(fn->name, &exclude_patterns, &exclude_patterns_last))
4492*22dc650dSSadaf Ebrahimi     goto EXIT2;
4493*22dc650dSSadaf Ebrahimi   }
4494*22dc650dSSadaf Ebrahimi 
4495*22dc650dSSadaf Ebrahimi /* If there are no files that contain lists of files to search, and there are
4496*22dc650dSSadaf Ebrahimi no file arguments, search stdin, and then exit. */
4497*22dc650dSSadaf Ebrahimi 
4498*22dc650dSSadaf Ebrahimi if (file_lists == NULL && i >= argc)
4499*22dc650dSSadaf Ebrahimi   {
4500*22dc650dSSadaf Ebrahimi   /* Using a buffered stdin, that then is seek is not portable,
4501*22dc650dSSadaf Ebrahimi      so attempt to remove the buffer, to workaround reported issues
4502*22dc650dSSadaf Ebrahimi      affecting several BSD and AIX */
4503*22dc650dSSadaf Ebrahimi   if (count_limit >= 0)
4504*22dc650dSSadaf Ebrahimi     setbuf(stdin, NULL);
4505*22dc650dSSadaf Ebrahimi   rc = pcre2grep(stdin, FR_PLAIN, stdin_name,
4506*22dc650dSSadaf Ebrahimi     (filenames > FN_DEFAULT)? stdin_name : NULL);
4507*22dc650dSSadaf Ebrahimi   goto EXIT;
4508*22dc650dSSadaf Ebrahimi   }
4509*22dc650dSSadaf Ebrahimi 
4510*22dc650dSSadaf Ebrahimi /* If any files that contains a list of files to search have been specified,
4511*22dc650dSSadaf Ebrahimi read them line by line and search the given files. */
4512*22dc650dSSadaf Ebrahimi 
4513*22dc650dSSadaf Ebrahimi for (fn = file_lists; fn != NULL; fn = fn->next)
4514*22dc650dSSadaf Ebrahimi   {
4515*22dc650dSSadaf Ebrahimi   char buffer[FNBUFSIZ];
4516*22dc650dSSadaf Ebrahimi   FILE *fl;
4517*22dc650dSSadaf Ebrahimi   if (strcmp(fn->name, "-") == 0) fl = stdin; else
4518*22dc650dSSadaf Ebrahimi     {
4519*22dc650dSSadaf Ebrahimi     fl = fopen(fn->name, "rb");
4520*22dc650dSSadaf Ebrahimi     if (fl == NULL)
4521*22dc650dSSadaf Ebrahimi       {
4522*22dc650dSSadaf Ebrahimi       fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", fn->name,
4523*22dc650dSSadaf Ebrahimi         strerror(errno));
4524*22dc650dSSadaf Ebrahimi       goto EXIT2;
4525*22dc650dSSadaf Ebrahimi       }
4526*22dc650dSSadaf Ebrahimi     }
4527*22dc650dSSadaf Ebrahimi   while (fgets(buffer, sizeof(buffer), fl) != NULL)
4528*22dc650dSSadaf Ebrahimi     {
4529*22dc650dSSadaf Ebrahimi     int frc;
4530*22dc650dSSadaf Ebrahimi     char *end = buffer + (int)strlen(buffer);
4531*22dc650dSSadaf Ebrahimi     while (end > buffer && isspace((unsigned char)(end[-1]))) end--;
4532*22dc650dSSadaf Ebrahimi     *end = 0;
4533*22dc650dSSadaf Ebrahimi     if (*buffer != 0)
4534*22dc650dSSadaf Ebrahimi       {
4535*22dc650dSSadaf Ebrahimi       frc = grep_or_recurse(buffer, dee_action == dee_RECURSE, FALSE);
4536*22dc650dSSadaf Ebrahimi       if (frc > 1) rc = frc;
4537*22dc650dSSadaf Ebrahimi         else if (frc == 0 && rc == 1) rc = 0;
4538*22dc650dSSadaf Ebrahimi       }
4539*22dc650dSSadaf Ebrahimi     }
4540*22dc650dSSadaf Ebrahimi   if (fl != stdin) fclose(fl);
4541*22dc650dSSadaf Ebrahimi   }
4542*22dc650dSSadaf Ebrahimi 
4543*22dc650dSSadaf Ebrahimi /* After handling file-list, work through remaining arguments. Pass in the fact
4544*22dc650dSSadaf Ebrahimi that there is only one argument at top level - this suppresses the file name if
4545*22dc650dSSadaf Ebrahimi the argument is not a directory and filenames are not otherwise forced. */
4546*22dc650dSSadaf Ebrahimi 
4547*22dc650dSSadaf Ebrahimi only_one_at_top = i == argc - 1 && file_lists == NULL;
4548*22dc650dSSadaf Ebrahimi 
4549*22dc650dSSadaf Ebrahimi for (; i < argc; i++)
4550*22dc650dSSadaf Ebrahimi   {
4551*22dc650dSSadaf Ebrahimi   int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
4552*22dc650dSSadaf Ebrahimi     only_one_at_top);
4553*22dc650dSSadaf Ebrahimi   if (frc > 1) rc = frc;
4554*22dc650dSSadaf Ebrahimi     else if (frc == 0 && rc == 1) rc = 0;
4555*22dc650dSSadaf Ebrahimi   }
4556*22dc650dSSadaf Ebrahimi 
4557*22dc650dSSadaf Ebrahimi /* Show the total number of matches if requested, but not if only one file's
4558*22dc650dSSadaf Ebrahimi count was printed. */
4559*22dc650dSSadaf Ebrahimi 
4560*22dc650dSSadaf Ebrahimi if (show_total_count && counts_printed != 1 && filenames != FN_NOMATCH_ONLY)
4561*22dc650dSSadaf Ebrahimi   {
4562*22dc650dSSadaf Ebrahimi   if (counts_printed != 0 && filenames >= FN_DEFAULT)
4563*22dc650dSSadaf Ebrahimi     fprintf(stdout, "TOTAL:");
4564*22dc650dSSadaf Ebrahimi   fprintf(stdout, "%lu" STDOUT_NL, total_count);
4565*22dc650dSSadaf Ebrahimi   }
4566*22dc650dSSadaf Ebrahimi 
4567*22dc650dSSadaf Ebrahimi EXIT:
4568*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_PCRE2GREP_JIT
4569*22dc650dSSadaf Ebrahimi pcre2_jit_free_unused_memory(NULL);
4570*22dc650dSSadaf Ebrahimi if (jit_stack != NULL) pcre2_jit_stack_free(jit_stack);
4571*22dc650dSSadaf Ebrahimi #endif
4572*22dc650dSSadaf Ebrahimi 
4573*22dc650dSSadaf Ebrahimi free(main_buffer);
4574*22dc650dSSadaf Ebrahimi if (character_tables != NULL) pcre2_maketables_free(NULL, character_tables);
4575*22dc650dSSadaf Ebrahimi 
4576*22dc650dSSadaf Ebrahimi pcre2_compile_context_free(compile_context);
4577*22dc650dSSadaf Ebrahimi pcre2_match_context_free(match_context);
4578*22dc650dSSadaf Ebrahimi pcre2_match_data_free(match_data_pair[0]);
4579*22dc650dSSadaf Ebrahimi pcre2_match_data_free(match_data_pair[1]);
4580*22dc650dSSadaf Ebrahimi 
4581*22dc650dSSadaf Ebrahimi free_pattern_chain(patterns);
4582*22dc650dSSadaf Ebrahimi free_pattern_chain(include_patterns);
4583*22dc650dSSadaf Ebrahimi free_pattern_chain(include_dir_patterns);
4584*22dc650dSSadaf Ebrahimi free_pattern_chain(exclude_patterns);
4585*22dc650dSSadaf Ebrahimi free_pattern_chain(exclude_dir_patterns);
4586*22dc650dSSadaf Ebrahimi 
4587*22dc650dSSadaf Ebrahimi free_file_chain(exclude_from);
4588*22dc650dSSadaf Ebrahimi free_file_chain(include_from);
4589*22dc650dSSadaf Ebrahimi free_file_chain(pattern_files);
4590*22dc650dSSadaf Ebrahimi free_file_chain(file_lists);
4591*22dc650dSSadaf Ebrahimi 
4592*22dc650dSSadaf Ebrahimi while (only_matching != NULL)
4593*22dc650dSSadaf Ebrahimi   {
4594*22dc650dSSadaf Ebrahimi   omstr *this = only_matching;
4595*22dc650dSSadaf Ebrahimi   only_matching = this->next;
4596*22dc650dSSadaf Ebrahimi   free(this);
4597*22dc650dSSadaf Ebrahimi   }
4598*22dc650dSSadaf Ebrahimi 
4599*22dc650dSSadaf Ebrahimi pcre2grep_exit(rc);
4600*22dc650dSSadaf Ebrahimi 
4601*22dc650dSSadaf Ebrahimi EXIT2:
4602*22dc650dSSadaf Ebrahimi rc = 2;
4603*22dc650dSSadaf Ebrahimi goto EXIT;
4604*22dc650dSSadaf Ebrahimi }
4605*22dc650dSSadaf Ebrahimi 
4606*22dc650dSSadaf Ebrahimi /* End of pcre2grep */
4607