1*22dc650dSSadaf Ebrahimi /*************************************************
2*22dc650dSSadaf Ebrahimi * pcre2grep program *
3*22dc650dSSadaf Ebrahimi *************************************************/
4*22dc650dSSadaf Ebrahimi
5*22dc650dSSadaf Ebrahimi /* This is a grep program that uses the 8-bit PCRE regular expression library
6*22dc650dSSadaf Ebrahimi via the PCRE2 updated API to do its pattern matching. On Unix-like, Windows,
7*22dc650dSSadaf Ebrahimi and native z/OS systems it can recurse into directories, and in z/OS it can
8*22dc650dSSadaf Ebrahimi handle PDS files.
9*22dc650dSSadaf Ebrahimi
10*22dc650dSSadaf Ebrahimi Note that for native z/OS, in addition to defining the NATIVE_ZOS macro, an
11*22dc650dSSadaf Ebrahimi additional header is required. That header is not included in the main PCRE2
12*22dc650dSSadaf Ebrahimi distribution because other apparatus is needed to compile pcre2grep for z/OS.
13*22dc650dSSadaf Ebrahimi The header can be found in the special z/OS distribution, which is available
14*22dc650dSSadaf Ebrahimi from www.zaconsultants.net or from www.cbttape.org.
15*22dc650dSSadaf Ebrahimi
16*22dc650dSSadaf Ebrahimi Copyright (c) 1997-2023 University of Cambridge
17*22dc650dSSadaf Ebrahimi
18*22dc650dSSadaf Ebrahimi -----------------------------------------------------------------------------
19*22dc650dSSadaf Ebrahimi Redistribution and use in source and binary forms, with or without
20*22dc650dSSadaf Ebrahimi modification, are permitted provided that the following conditions are met:
21*22dc650dSSadaf Ebrahimi
22*22dc650dSSadaf Ebrahimi * Redistributions of source code must retain the above copyright notice,
23*22dc650dSSadaf Ebrahimi this list of conditions and the following disclaimer.
24*22dc650dSSadaf Ebrahimi
25*22dc650dSSadaf Ebrahimi * Redistributions in binary form must reproduce the above copyright
26*22dc650dSSadaf Ebrahimi notice, this list of conditions and the following disclaimer in the
27*22dc650dSSadaf Ebrahimi documentation and/or other materials provided with the distribution.
28*22dc650dSSadaf Ebrahimi
29*22dc650dSSadaf Ebrahimi * Neither the name of the University of Cambridge nor the names of its
30*22dc650dSSadaf Ebrahimi contributors may be used to endorse or promote products derived from
31*22dc650dSSadaf Ebrahimi this software without specific prior written permission.
32*22dc650dSSadaf Ebrahimi
33*22dc650dSSadaf Ebrahimi THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
34*22dc650dSSadaf Ebrahimi AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
35*22dc650dSSadaf Ebrahimi IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
36*22dc650dSSadaf Ebrahimi ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
37*22dc650dSSadaf Ebrahimi LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
38*22dc650dSSadaf Ebrahimi CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
39*22dc650dSSadaf Ebrahimi SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
40*22dc650dSSadaf Ebrahimi INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
41*22dc650dSSadaf Ebrahimi CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
42*22dc650dSSadaf Ebrahimi ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
43*22dc650dSSadaf Ebrahimi POSSIBILITY OF SUCH DAMAGE.
44*22dc650dSSadaf Ebrahimi -----------------------------------------------------------------------------
45*22dc650dSSadaf Ebrahimi */
46*22dc650dSSadaf Ebrahimi
47*22dc650dSSadaf Ebrahimi #ifdef HAVE_CONFIG_H
48*22dc650dSSadaf Ebrahimi #include "config.h"
49*22dc650dSSadaf Ebrahimi #endif
50*22dc650dSSadaf Ebrahimi
51*22dc650dSSadaf Ebrahimi #include <ctype.h>
52*22dc650dSSadaf Ebrahimi #include <locale.h>
53*22dc650dSSadaf Ebrahimi #include <stdio.h>
54*22dc650dSSadaf Ebrahimi #include <string.h>
55*22dc650dSSadaf Ebrahimi #include <stdlib.h>
56*22dc650dSSadaf Ebrahimi #include <errno.h>
57*22dc650dSSadaf Ebrahimi
58*22dc650dSSadaf Ebrahimi #include <sys/types.h>
59*22dc650dSSadaf Ebrahimi #include <sys/stat.h>
60*22dc650dSSadaf Ebrahimi
61*22dc650dSSadaf Ebrahimi #if (defined _WIN32 || (defined HAVE_WINDOWS_H && HAVE_WINDOWS_H)) \
62*22dc650dSSadaf Ebrahimi && !defined WIN32 && !defined(__CYGWIN__)
63*22dc650dSSadaf Ebrahimi #define WIN32
64*22dc650dSSadaf Ebrahimi #endif
65*22dc650dSSadaf Ebrahimi
66*22dc650dSSadaf Ebrahimi /* Some CMake's define it still */
67*22dc650dSSadaf Ebrahimi #if defined(__CYGWIN__) && defined(WIN32)
68*22dc650dSSadaf Ebrahimi #undef WIN32
69*22dc650dSSadaf Ebrahimi #endif
70*22dc650dSSadaf Ebrahimi
71*22dc650dSSadaf Ebrahimi #ifdef __VMS
72*22dc650dSSadaf Ebrahimi #include clidef
73*22dc650dSSadaf Ebrahimi #include descrip
74*22dc650dSSadaf Ebrahimi #include lib$routines
75*22dc650dSSadaf Ebrahimi #endif
76*22dc650dSSadaf Ebrahimi
77*22dc650dSSadaf Ebrahimi #ifdef WIN32
78*22dc650dSSadaf Ebrahimi #include <io.h> /* For _setmode() */
79*22dc650dSSadaf Ebrahimi #include <fcntl.h> /* For _O_BINARY */
80*22dc650dSSadaf Ebrahimi #endif
81*22dc650dSSadaf Ebrahimi
82*22dc650dSSadaf Ebrahimi #if defined(SUPPORT_PCRE2GREP_CALLOUT) && defined(SUPPORT_PCRE2GREP_CALLOUT_FORK)
83*22dc650dSSadaf Ebrahimi #ifdef WIN32
84*22dc650dSSadaf Ebrahimi #include <process.h>
85*22dc650dSSadaf Ebrahimi #else
86*22dc650dSSadaf Ebrahimi #include <sys/wait.h>
87*22dc650dSSadaf Ebrahimi #endif
88*22dc650dSSadaf Ebrahimi #endif
89*22dc650dSSadaf Ebrahimi
90*22dc650dSSadaf Ebrahimi #ifdef HAVE_UNISTD_H
91*22dc650dSSadaf Ebrahimi #include <unistd.h>
92*22dc650dSSadaf Ebrahimi #endif
93*22dc650dSSadaf Ebrahimi
94*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_LIBZ
95*22dc650dSSadaf Ebrahimi #include <zlib.h>
96*22dc650dSSadaf Ebrahimi #endif
97*22dc650dSSadaf Ebrahimi
98*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_LIBBZ2
99*22dc650dSSadaf Ebrahimi #include <bzlib.h>
100*22dc650dSSadaf Ebrahimi #endif
101*22dc650dSSadaf Ebrahimi
102*22dc650dSSadaf Ebrahimi #define PCRE2_CODE_UNIT_WIDTH 8
103*22dc650dSSadaf Ebrahimi #include "pcre2.h"
104*22dc650dSSadaf Ebrahimi
105*22dc650dSSadaf Ebrahimi /* Older versions of MSVC lack snprintf(). This define allows for
106*22dc650dSSadaf Ebrahimi warning/error-free compilation and testing with MSVC compilers back to at least
107*22dc650dSSadaf Ebrahimi MSVC 10/2010. Except for VC6 (which is missing some fundamentals and fails). */
108*22dc650dSSadaf Ebrahimi
109*22dc650dSSadaf Ebrahimi #if defined(_MSC_VER) && (_MSC_VER < 1900)
110*22dc650dSSadaf Ebrahimi #define snprintf _snprintf
111*22dc650dSSadaf Ebrahimi #endif
112*22dc650dSSadaf Ebrahimi
113*22dc650dSSadaf Ebrahimi /* old VC and older compilers don't support %td or %zu, and even some that claim to
114*22dc650dSSadaf Ebrahimi be C99 don't support it (hence DISABLE_PERCENT_ZT). */
115*22dc650dSSadaf Ebrahimi
116*22dc650dSSadaf Ebrahimi #if defined(DISABLE_PERCENT_ZT) || (defined(_MSC_VER) && (_MSC_VER < 1800)) || \
117*22dc650dSSadaf Ebrahimi (!defined(_MSC_VER) && (!defined(__STDC_VERSION__) || __STDC_VERSION__ < 199901L))
118*22dc650dSSadaf Ebrahimi #ifdef _WIN64
119*22dc650dSSadaf Ebrahimi #define SIZ_FORM "llu"
120*22dc650dSSadaf Ebrahimi #else
121*22dc650dSSadaf Ebrahimi #define SIZ_FORM "lu"
122*22dc650dSSadaf Ebrahimi #endif
123*22dc650dSSadaf Ebrahimi #else
124*22dc650dSSadaf Ebrahimi #define SIZ_FORM "zu"
125*22dc650dSSadaf Ebrahimi #endif
126*22dc650dSSadaf Ebrahimi
127*22dc650dSSadaf Ebrahimi #define FALSE 0
128*22dc650dSSadaf Ebrahimi #define TRUE 1
129*22dc650dSSadaf Ebrahimi
130*22dc650dSSadaf Ebrahimi typedef int BOOL;
131*22dc650dSSadaf Ebrahimi
132*22dc650dSSadaf Ebrahimi #define DEFAULT_CAPTURE_MAX 50
133*22dc650dSSadaf Ebrahimi
134*22dc650dSSadaf Ebrahimi #if BUFSIZ > 8192
135*22dc650dSSadaf Ebrahimi #define MAXPATLEN BUFSIZ
136*22dc650dSSadaf Ebrahimi #else
137*22dc650dSSadaf Ebrahimi #define MAXPATLEN 8192
138*22dc650dSSadaf Ebrahimi #endif
139*22dc650dSSadaf Ebrahimi
140*22dc650dSSadaf Ebrahimi #define FNBUFSIZ 2048
141*22dc650dSSadaf Ebrahimi #define ERRBUFSIZ 256
142*22dc650dSSadaf Ebrahimi
143*22dc650dSSadaf Ebrahimi /* Values for the "filenames" variable, which specifies options for file name
144*22dc650dSSadaf Ebrahimi output. The order is important; it is assumed that a file name is wanted for
145*22dc650dSSadaf Ebrahimi all values greater than FN_DEFAULT. */
146*22dc650dSSadaf Ebrahimi
147*22dc650dSSadaf Ebrahimi enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
148*22dc650dSSadaf Ebrahimi
149*22dc650dSSadaf Ebrahimi /* File reading styles */
150*22dc650dSSadaf Ebrahimi
151*22dc650dSSadaf Ebrahimi enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
152*22dc650dSSadaf Ebrahimi
153*22dc650dSSadaf Ebrahimi /* Actions for the -d and -D options */
154*22dc650dSSadaf Ebrahimi
155*22dc650dSSadaf Ebrahimi enum { dee_READ, dee_SKIP, dee_RECURSE };
156*22dc650dSSadaf Ebrahimi enum { DEE_READ, DEE_SKIP };
157*22dc650dSSadaf Ebrahimi
158*22dc650dSSadaf Ebrahimi /* Actions for special processing options (flag bits) */
159*22dc650dSSadaf Ebrahimi
160*22dc650dSSadaf Ebrahimi #define PO_WORD_MATCH 0x0001
161*22dc650dSSadaf Ebrahimi #define PO_LINE_MATCH 0x0002
162*22dc650dSSadaf Ebrahimi #define PO_FIXED_STRINGS 0x0004
163*22dc650dSSadaf Ebrahimi
164*22dc650dSSadaf Ebrahimi /* Binary file options */
165*22dc650dSSadaf Ebrahimi
166*22dc650dSSadaf Ebrahimi enum { BIN_BINARY, BIN_NOMATCH, BIN_TEXT };
167*22dc650dSSadaf Ebrahimi
168*22dc650dSSadaf Ebrahimi /* Return values from decode_dollar_escape() */
169*22dc650dSSadaf Ebrahimi
170*22dc650dSSadaf Ebrahimi enum { DDE_ERROR, DDE_CAPTURE, DDE_CHAR };
171*22dc650dSSadaf Ebrahimi
172*22dc650dSSadaf Ebrahimi /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
173*22dc650dSSadaf Ebrahimi environments), a warning is issued if the value of fwrite() is ignored.
174*22dc650dSSadaf Ebrahimi Unfortunately, casting to (void) does not suppress the warning. To get round
175*22dc650dSSadaf Ebrahimi this, we use a macro that compiles a fudge. Oddly, this does not also seem to
176*22dc650dSSadaf Ebrahimi apply to fprintf(). */
177*22dc650dSSadaf Ebrahimi
178*22dc650dSSadaf Ebrahimi #define FWRITE_IGNORE(a,b,c,d) if (fwrite(a,b,c,d)) {}
179*22dc650dSSadaf Ebrahimi
180*22dc650dSSadaf Ebrahimi /* Under Windows, we have to set stdout to be binary, so that it does not
181*22dc650dSSadaf Ebrahimi convert \r\n at the ends of output lines to \r\r\n. However, that means that
182*22dc650dSSadaf Ebrahimi any messages written to stdout must have \r\n as their line terminator. This is
183*22dc650dSSadaf Ebrahimi handled by using STDOUT_NL as the newline string. We also use a normal double
184*22dc650dSSadaf Ebrahimi quote for the example, as single quotes aren't usually available. */
185*22dc650dSSadaf Ebrahimi
186*22dc650dSSadaf Ebrahimi #ifdef WIN32
187*22dc650dSSadaf Ebrahimi #define STDOUT_NL "\r\n"
188*22dc650dSSadaf Ebrahimi #define STDOUT_NL_LEN 2
189*22dc650dSSadaf Ebrahimi #define QUOT "\""
190*22dc650dSSadaf Ebrahimi #else
191*22dc650dSSadaf Ebrahimi #define STDOUT_NL "\n"
192*22dc650dSSadaf Ebrahimi #define STDOUT_NL_LEN 1
193*22dc650dSSadaf Ebrahimi #define QUOT "'"
194*22dc650dSSadaf Ebrahimi #endif
195*22dc650dSSadaf Ebrahimi
196*22dc650dSSadaf Ebrahimi /* This code is returned from decode_dollar_escape() when $n is encountered,
197*22dc650dSSadaf Ebrahimi and used to mean "output STDOUT_NL". It is, of course, not a valid Unicode code
198*22dc650dSSadaf Ebrahimi point. */
199*22dc650dSSadaf Ebrahimi
200*22dc650dSSadaf Ebrahimi #define STDOUT_NL_CODE 0x7fffffffu
201*22dc650dSSadaf Ebrahimi
202*22dc650dSSadaf Ebrahimi
203*22dc650dSSadaf Ebrahimi
204*22dc650dSSadaf Ebrahimi /*************************************************
205*22dc650dSSadaf Ebrahimi * Global variables *
206*22dc650dSSadaf Ebrahimi *************************************************/
207*22dc650dSSadaf Ebrahimi
208*22dc650dSSadaf Ebrahimi static const char *colour_string = "1;31";
209*22dc650dSSadaf Ebrahimi static const char *colour_option = NULL;
210*22dc650dSSadaf Ebrahimi static const char *dee_option = NULL;
211*22dc650dSSadaf Ebrahimi static const char *DEE_option = NULL;
212*22dc650dSSadaf Ebrahimi static const char *locale = NULL;
213*22dc650dSSadaf Ebrahimi static const char *newline_arg = NULL;
214*22dc650dSSadaf Ebrahimi static const char *group_separator = "--";
215*22dc650dSSadaf Ebrahimi static const char *om_separator = NULL;
216*22dc650dSSadaf Ebrahimi static const char *stdin_name = "(standard input)";
217*22dc650dSSadaf Ebrahimi static const char *output_text = NULL;
218*22dc650dSSadaf Ebrahimi
219*22dc650dSSadaf Ebrahimi static char *main_buffer = NULL;
220*22dc650dSSadaf Ebrahimi
221*22dc650dSSadaf Ebrahimi static const char *printname_nl = STDOUT_NL; /* Changed to NULL for -Z */
222*22dc650dSSadaf Ebrahimi static int printname_colon = ':'; /* Changed to 0 for -Z */
223*22dc650dSSadaf Ebrahimi static int printname_hyphen = '-'; /* Changed to 0 for -Z */
224*22dc650dSSadaf Ebrahimi
225*22dc650dSSadaf Ebrahimi static int after_context = 0;
226*22dc650dSSadaf Ebrahimi static int before_context = 0;
227*22dc650dSSadaf Ebrahimi static int binary_files = BIN_BINARY;
228*22dc650dSSadaf Ebrahimi static int both_context = 0;
229*22dc650dSSadaf Ebrahimi static int endlinetype;
230*22dc650dSSadaf Ebrahimi
231*22dc650dSSadaf Ebrahimi static int count_limit = -1; /* Not long, so that it works with OP_NUMBER */
232*22dc650dSSadaf Ebrahimi static unsigned long int counts_printed = 0;
233*22dc650dSSadaf Ebrahimi static unsigned long int total_count = 0;
234*22dc650dSSadaf Ebrahimi
235*22dc650dSSadaf Ebrahimi static PCRE2_SIZE bufthird = PCRE2GREP_BUFSIZE;
236*22dc650dSSadaf Ebrahimi static PCRE2_SIZE max_bufthird = PCRE2GREP_MAX_BUFSIZE;
237*22dc650dSSadaf Ebrahimi static PCRE2_SIZE bufsize = 3*PCRE2GREP_BUFSIZE;
238*22dc650dSSadaf Ebrahimi
239*22dc650dSSadaf Ebrahimi #ifdef WIN32
240*22dc650dSSadaf Ebrahimi static int dee_action = dee_SKIP;
241*22dc650dSSadaf Ebrahimi #else
242*22dc650dSSadaf Ebrahimi static int dee_action = dee_READ;
243*22dc650dSSadaf Ebrahimi #endif
244*22dc650dSSadaf Ebrahimi
245*22dc650dSSadaf Ebrahimi static int DEE_action = DEE_READ;
246*22dc650dSSadaf Ebrahimi static int error_count = 0;
247*22dc650dSSadaf Ebrahimi static int filenames = FN_DEFAULT;
248*22dc650dSSadaf Ebrahimi
249*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_PCRE2GREP_JIT
250*22dc650dSSadaf Ebrahimi static BOOL use_jit = TRUE;
251*22dc650dSSadaf Ebrahimi #else
252*22dc650dSSadaf Ebrahimi static BOOL use_jit = FALSE;
253*22dc650dSSadaf Ebrahimi #endif
254*22dc650dSSadaf Ebrahimi
255*22dc650dSSadaf Ebrahimi static const uint8_t *character_tables = NULL;
256*22dc650dSSadaf Ebrahimi
257*22dc650dSSadaf Ebrahimi static uint32_t pcre2_options = 0;
258*22dc650dSSadaf Ebrahimi static uint32_t extra_options = 0;
259*22dc650dSSadaf Ebrahimi static PCRE2_SIZE heap_limit = PCRE2_UNSET;
260*22dc650dSSadaf Ebrahimi static uint32_t match_limit = 0;
261*22dc650dSSadaf Ebrahimi static uint32_t depth_limit = 0;
262*22dc650dSSadaf Ebrahimi
263*22dc650dSSadaf Ebrahimi static pcre2_compile_context *compile_context;
264*22dc650dSSadaf Ebrahimi static pcre2_match_context *match_context;
265*22dc650dSSadaf Ebrahimi static pcre2_match_data *match_data, *match_data_pair[2];
266*22dc650dSSadaf Ebrahimi static PCRE2_SIZE *offsets, *offsets_pair[2];
267*22dc650dSSadaf Ebrahimi static int match_data_toggle;
268*22dc650dSSadaf Ebrahimi static uint32_t offset_size;
269*22dc650dSSadaf Ebrahimi static uint32_t capture_max = DEFAULT_CAPTURE_MAX;
270*22dc650dSSadaf Ebrahimi
271*22dc650dSSadaf Ebrahimi static BOOL all_matches = FALSE;
272*22dc650dSSadaf Ebrahimi static BOOL case_restrict = FALSE;
273*22dc650dSSadaf Ebrahimi static BOOL count_only = FALSE;
274*22dc650dSSadaf Ebrahimi static BOOL do_colour = FALSE;
275*22dc650dSSadaf Ebrahimi #ifdef WIN32
276*22dc650dSSadaf Ebrahimi static BOOL do_ansi = FALSE;
277*22dc650dSSadaf Ebrahimi #endif
278*22dc650dSSadaf Ebrahimi static BOOL file_offsets = FALSE;
279*22dc650dSSadaf Ebrahimi static BOOL hyphenpending = FALSE;
280*22dc650dSSadaf Ebrahimi static BOOL invert = FALSE;
281*22dc650dSSadaf Ebrahimi static BOOL line_buffered = FALSE;
282*22dc650dSSadaf Ebrahimi static BOOL line_offsets = FALSE;
283*22dc650dSSadaf Ebrahimi static BOOL multiline = FALSE;
284*22dc650dSSadaf Ebrahimi static BOOL no_ucp = FALSE;
285*22dc650dSSadaf Ebrahimi static BOOL number = FALSE;
286*22dc650dSSadaf Ebrahimi static BOOL omit_zero_count = FALSE;
287*22dc650dSSadaf Ebrahimi static BOOL resource_error = FALSE;
288*22dc650dSSadaf Ebrahimi static BOOL quiet = FALSE;
289*22dc650dSSadaf Ebrahimi static BOOL show_total_count = FALSE;
290*22dc650dSSadaf Ebrahimi static BOOL silent = FALSE;
291*22dc650dSSadaf Ebrahimi static BOOL utf = FALSE;
292*22dc650dSSadaf Ebrahimi static BOOL posix_digit = FALSE;
293*22dc650dSSadaf Ebrahimi
294*22dc650dSSadaf Ebrahimi static uint8_t utf8_buffer[8];
295*22dc650dSSadaf Ebrahimi
296*22dc650dSSadaf Ebrahimi
297*22dc650dSSadaf Ebrahimi /* Structure for list of --only-matching capturing numbers. */
298*22dc650dSSadaf Ebrahimi
299*22dc650dSSadaf Ebrahimi typedef struct omstr {
300*22dc650dSSadaf Ebrahimi struct omstr *next;
301*22dc650dSSadaf Ebrahimi int groupnum;
302*22dc650dSSadaf Ebrahimi } omstr;
303*22dc650dSSadaf Ebrahimi
304*22dc650dSSadaf Ebrahimi static omstr *only_matching = NULL;
305*22dc650dSSadaf Ebrahimi static omstr *only_matching_last = NULL;
306*22dc650dSSadaf Ebrahimi static int only_matching_count;
307*22dc650dSSadaf Ebrahimi
308*22dc650dSSadaf Ebrahimi /* Structure for holding the two variables that describe a number chain. */
309*22dc650dSSadaf Ebrahimi
310*22dc650dSSadaf Ebrahimi typedef struct omdatastr {
311*22dc650dSSadaf Ebrahimi omstr **anchor;
312*22dc650dSSadaf Ebrahimi omstr **lastptr;
313*22dc650dSSadaf Ebrahimi } omdatastr;
314*22dc650dSSadaf Ebrahimi
315*22dc650dSSadaf Ebrahimi static omdatastr only_matching_data = { &only_matching, &only_matching_last };
316*22dc650dSSadaf Ebrahimi
317*22dc650dSSadaf Ebrahimi /* Structure for list of file names (for -f and --{in,ex}clude-from) */
318*22dc650dSSadaf Ebrahimi
319*22dc650dSSadaf Ebrahimi typedef struct fnstr {
320*22dc650dSSadaf Ebrahimi struct fnstr *next;
321*22dc650dSSadaf Ebrahimi char *name;
322*22dc650dSSadaf Ebrahimi } fnstr;
323*22dc650dSSadaf Ebrahimi
324*22dc650dSSadaf Ebrahimi static fnstr *exclude_from = NULL;
325*22dc650dSSadaf Ebrahimi static fnstr *exclude_from_last = NULL;
326*22dc650dSSadaf Ebrahimi static fnstr *include_from = NULL;
327*22dc650dSSadaf Ebrahimi static fnstr *include_from_last = NULL;
328*22dc650dSSadaf Ebrahimi
329*22dc650dSSadaf Ebrahimi static fnstr *file_lists = NULL;
330*22dc650dSSadaf Ebrahimi static fnstr *file_lists_last = NULL;
331*22dc650dSSadaf Ebrahimi static fnstr *pattern_files = NULL;
332*22dc650dSSadaf Ebrahimi static fnstr *pattern_files_last = NULL;
333*22dc650dSSadaf Ebrahimi
334*22dc650dSSadaf Ebrahimi /* Structure for holding the two variables that describe a file name chain. */
335*22dc650dSSadaf Ebrahimi
336*22dc650dSSadaf Ebrahimi typedef struct fndatastr {
337*22dc650dSSadaf Ebrahimi fnstr **anchor;
338*22dc650dSSadaf Ebrahimi fnstr **lastptr;
339*22dc650dSSadaf Ebrahimi } fndatastr;
340*22dc650dSSadaf Ebrahimi
341*22dc650dSSadaf Ebrahimi static fndatastr exclude_from_data = { &exclude_from, &exclude_from_last };
342*22dc650dSSadaf Ebrahimi static fndatastr include_from_data = { &include_from, &include_from_last };
343*22dc650dSSadaf Ebrahimi static fndatastr file_lists_data = { &file_lists, &file_lists_last };
344*22dc650dSSadaf Ebrahimi static fndatastr pattern_files_data = { &pattern_files, &pattern_files_last };
345*22dc650dSSadaf Ebrahimi
346*22dc650dSSadaf Ebrahimi /* Structure for pattern and its compiled form; used for matching patterns and
347*22dc650dSSadaf Ebrahimi also for include/exclude patterns. */
348*22dc650dSSadaf Ebrahimi
349*22dc650dSSadaf Ebrahimi typedef struct patstr {
350*22dc650dSSadaf Ebrahimi struct patstr *next;
351*22dc650dSSadaf Ebrahimi char *string;
352*22dc650dSSadaf Ebrahimi PCRE2_SIZE length;
353*22dc650dSSadaf Ebrahimi pcre2_code *compiled;
354*22dc650dSSadaf Ebrahimi } patstr;
355*22dc650dSSadaf Ebrahimi
356*22dc650dSSadaf Ebrahimi static patstr *patterns = NULL;
357*22dc650dSSadaf Ebrahimi static patstr *patterns_last = NULL;
358*22dc650dSSadaf Ebrahimi static patstr *include_patterns = NULL;
359*22dc650dSSadaf Ebrahimi static patstr *include_patterns_last = NULL;
360*22dc650dSSadaf Ebrahimi static patstr *exclude_patterns = NULL;
361*22dc650dSSadaf Ebrahimi static patstr *exclude_patterns_last = NULL;
362*22dc650dSSadaf Ebrahimi static patstr *include_dir_patterns = NULL;
363*22dc650dSSadaf Ebrahimi static patstr *include_dir_patterns_last = NULL;
364*22dc650dSSadaf Ebrahimi static patstr *exclude_dir_patterns = NULL;
365*22dc650dSSadaf Ebrahimi static patstr *exclude_dir_patterns_last = NULL;
366*22dc650dSSadaf Ebrahimi
367*22dc650dSSadaf Ebrahimi /* Structure holding the two variables that describe a pattern chain. A pointer
368*22dc650dSSadaf Ebrahimi to such structures is used for each appropriate option. */
369*22dc650dSSadaf Ebrahimi
370*22dc650dSSadaf Ebrahimi typedef struct patdatastr {
371*22dc650dSSadaf Ebrahimi patstr **anchor;
372*22dc650dSSadaf Ebrahimi patstr **lastptr;
373*22dc650dSSadaf Ebrahimi } patdatastr;
374*22dc650dSSadaf Ebrahimi
375*22dc650dSSadaf Ebrahimi static patdatastr match_patdata = { &patterns, &patterns_last };
376*22dc650dSSadaf Ebrahimi static patdatastr include_patdata = { &include_patterns, &include_patterns_last };
377*22dc650dSSadaf Ebrahimi static patdatastr exclude_patdata = { &exclude_patterns, &exclude_patterns_last };
378*22dc650dSSadaf Ebrahimi static patdatastr include_dir_patdata = { &include_dir_patterns, &include_dir_patterns_last };
379*22dc650dSSadaf Ebrahimi static patdatastr exclude_dir_patdata = { &exclude_dir_patterns, &exclude_dir_patterns_last };
380*22dc650dSSadaf Ebrahimi
381*22dc650dSSadaf Ebrahimi static patstr **incexlist[4] = { &include_patterns, &exclude_patterns,
382*22dc650dSSadaf Ebrahimi &include_dir_patterns, &exclude_dir_patterns };
383*22dc650dSSadaf Ebrahimi
384*22dc650dSSadaf Ebrahimi static const char *incexname[4] = { "--include", "--exclude",
385*22dc650dSSadaf Ebrahimi "--include-dir", "--exclude-dir" };
386*22dc650dSSadaf Ebrahimi
387*22dc650dSSadaf Ebrahimi /* Structure for options and list of them */
388*22dc650dSSadaf Ebrahimi
389*22dc650dSSadaf Ebrahimi enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_U32NUMBER, OP_SIZE,
390*22dc650dSSadaf Ebrahimi OP_OP_NUMBER, OP_OP_NUMBERS, OP_PATLIST, OP_FILELIST, OP_BINFILES };
391*22dc650dSSadaf Ebrahimi
392*22dc650dSSadaf Ebrahimi typedef struct option_item {
393*22dc650dSSadaf Ebrahimi int type;
394*22dc650dSSadaf Ebrahimi int one_char;
395*22dc650dSSadaf Ebrahimi void *dataptr;
396*22dc650dSSadaf Ebrahimi const char *long_name;
397*22dc650dSSadaf Ebrahimi const char *help_text;
398*22dc650dSSadaf Ebrahimi } option_item;
399*22dc650dSSadaf Ebrahimi
400*22dc650dSSadaf Ebrahimi /* Options without a single-letter equivalent get a negative value. This can be
401*22dc650dSSadaf Ebrahimi used to identify them. */
402*22dc650dSSadaf Ebrahimi
403*22dc650dSSadaf Ebrahimi #define N_COLOUR (-1)
404*22dc650dSSadaf Ebrahimi #define N_EXCLUDE (-2)
405*22dc650dSSadaf Ebrahimi #define N_EXCLUDE_DIR (-3)
406*22dc650dSSadaf Ebrahimi #define N_HELP (-4)
407*22dc650dSSadaf Ebrahimi #define N_INCLUDE (-5)
408*22dc650dSSadaf Ebrahimi #define N_INCLUDE_DIR (-6)
409*22dc650dSSadaf Ebrahimi #define N_LABEL (-7)
410*22dc650dSSadaf Ebrahimi #define N_LOCALE (-8)
411*22dc650dSSadaf Ebrahimi #define N_NULL (-9)
412*22dc650dSSadaf Ebrahimi #define N_LOFFSETS (-10)
413*22dc650dSSadaf Ebrahimi #define N_FOFFSETS (-11)
414*22dc650dSSadaf Ebrahimi #define N_LBUFFER (-12)
415*22dc650dSSadaf Ebrahimi #define N_H_LIMIT (-13)
416*22dc650dSSadaf Ebrahimi #define N_M_LIMIT (-14)
417*22dc650dSSadaf Ebrahimi #define N_M_LIMIT_DEP (-15)
418*22dc650dSSadaf Ebrahimi #define N_BUFSIZE (-16)
419*22dc650dSSadaf Ebrahimi #define N_NOJIT (-17)
420*22dc650dSSadaf Ebrahimi #define N_FILE_LIST (-18)
421*22dc650dSSadaf Ebrahimi #define N_BINARY_FILES (-19)
422*22dc650dSSadaf Ebrahimi #define N_EXCLUDE_FROM (-20)
423*22dc650dSSadaf Ebrahimi #define N_INCLUDE_FROM (-21)
424*22dc650dSSadaf Ebrahimi #define N_OM_SEPARATOR (-22)
425*22dc650dSSadaf Ebrahimi #define N_MAX_BUFSIZE (-23)
426*22dc650dSSadaf Ebrahimi #define N_OM_CAPTURE (-24)
427*22dc650dSSadaf Ebrahimi #define N_ALLABSK (-25)
428*22dc650dSSadaf Ebrahimi #define N_POSIX_DIGIT (-26)
429*22dc650dSSadaf Ebrahimi #define N_GROUP_SEPARATOR (-27)
430*22dc650dSSadaf Ebrahimi #define N_NO_GROUP_SEPARATOR (-28)
431*22dc650dSSadaf Ebrahimi
432*22dc650dSSadaf Ebrahimi static option_item optionlist[] = {
433*22dc650dSSadaf Ebrahimi { OP_NODATA, N_NULL, NULL, "", "terminate options" },
434*22dc650dSSadaf Ebrahimi { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" },
435*22dc650dSSadaf Ebrahimi { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
436*22dc650dSSadaf Ebrahimi { OP_NODATA, 'a', NULL, "text", "treat binary files as text" },
437*22dc650dSSadaf Ebrahimi { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
438*22dc650dSSadaf Ebrahimi { OP_BINFILES, N_BINARY_FILES, NULL, "binary-files=word", "set treatment of binary files" },
439*22dc650dSSadaf Ebrahimi { OP_SIZE, N_BUFSIZE,&bufthird, "buffer-size=number", "set processing buffer starting size" },
440*22dc650dSSadaf Ebrahimi { OP_SIZE, N_MAX_BUFSIZE,&max_bufthird, "max-buffer-size=number", "set processing buffer maximum size" },
441*22dc650dSSadaf Ebrahimi { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" },
442*22dc650dSSadaf Ebrahimi { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" },
443*22dc650dSSadaf Ebrahimi { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
444*22dc650dSSadaf Ebrahimi { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
445*22dc650dSSadaf Ebrahimi { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" },
446*22dc650dSSadaf Ebrahimi { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" },
447*22dc650dSSadaf Ebrahimi { OP_NODATA, N_POSIX_DIGIT, NULL, "posix-digit", "\\d always matches [0-9], even in UTF/UCP mode" },
448*22dc650dSSadaf Ebrahimi { OP_NODATA, 'E', NULL, "case-restrict", "restrict case matching (no mix ASCII/non-ASCII)" },
449*22dc650dSSadaf Ebrahimi { OP_PATLIST, 'e', &match_patdata, "regex(p)=pattern", "specify pattern (may be used more than once)" },
450*22dc650dSSadaf Ebrahimi { OP_NODATA, 'F', NULL, "fixed-strings", "patterns are sets of newline-separated strings" },
451*22dc650dSSadaf Ebrahimi { OP_FILELIST, 'f', &pattern_files_data, "file=path", "read patterns from file" },
452*22dc650dSSadaf Ebrahimi { OP_FILELIST, N_FILE_LIST, &file_lists_data, "file-list=path","read files to search from file" },
453*22dc650dSSadaf Ebrahimi { OP_NODATA, N_FOFFSETS, NULL, "file-offsets", "output file offsets, not text" },
454*22dc650dSSadaf Ebrahimi { OP_STRING, N_GROUP_SEPARATOR, &group_separator, "group-separator=text", "set separator between groups of lines" },
455*22dc650dSSadaf Ebrahimi { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
456*22dc650dSSadaf Ebrahimi { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
457*22dc650dSSadaf Ebrahimi { OP_NODATA, 'I', NULL, "", "treat binary files as not matching (ignore)" },
458*22dc650dSSadaf Ebrahimi { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
459*22dc650dSSadaf Ebrahimi { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
460*22dc650dSSadaf Ebrahimi { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
461*22dc650dSSadaf Ebrahimi { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
462*22dc650dSSadaf Ebrahimi { OP_NODATA, N_LBUFFER, NULL, "line-buffered", "use line buffering" },
463*22dc650dSSadaf Ebrahimi { OP_NODATA, N_LOFFSETS, NULL, "line-offsets", "output line numbers and offsets, not text" },
464*22dc650dSSadaf Ebrahimi { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
465*22dc650dSSadaf Ebrahimi { OP_SIZE, N_H_LIMIT, &heap_limit, "heap-limit=number", "set PCRE2 heap limit option (kibibytes)" },
466*22dc650dSSadaf Ebrahimi { OP_U32NUMBER, N_M_LIMIT, &match_limit, "match-limit=number", "set PCRE2 match limit option" },
467*22dc650dSSadaf Ebrahimi { OP_U32NUMBER, N_M_LIMIT_DEP, &depth_limit, "depth-limit=number", "set PCRE2 depth limit option" },
468*22dc650dSSadaf Ebrahimi { OP_U32NUMBER, N_M_LIMIT_DEP, &depth_limit, "recursion-limit=number", "obsolete synonym for depth-limit" },
469*22dc650dSSadaf Ebrahimi { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
470*22dc650dSSadaf Ebrahimi { OP_NUMBER, 'm', &count_limit, "max-count=number", "stop after <number> matched lines" },
471*22dc650dSSadaf Ebrahimi { OP_STRING, 'N', &newline_arg, "newline=type", "set newline type (CR, LF, CRLF, ANYCRLF, ANY, or NUL)" },
472*22dc650dSSadaf Ebrahimi { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
473*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_PCRE2GREP_JIT
474*22dc650dSSadaf Ebrahimi { OP_NODATA, N_NOJIT, NULL, "no-jit", "do not use just-in-time compiler optimization" },
475*22dc650dSSadaf Ebrahimi #else
476*22dc650dSSadaf Ebrahimi { OP_NODATA, N_NOJIT, NULL, "no-jit", "ignored: this pcre2grep does not support JIT" },
477*22dc650dSSadaf Ebrahimi #endif
478*22dc650dSSadaf Ebrahimi { OP_NODATA, N_NO_GROUP_SEPARATOR, NULL, "no-group-separator", "suppress separators between groups of lines" },
479*22dc650dSSadaf Ebrahimi { OP_STRING, 'O', &output_text, "output=text", "show only this text (possibly expanded)" },
480*22dc650dSSadaf Ebrahimi { OP_OP_NUMBERS, 'o', &only_matching_data, "only-matching=n", "show only the part of the line that matched" },
481*22dc650dSSadaf Ebrahimi { OP_STRING, N_OM_SEPARATOR, &om_separator, "om-separator=text", "set separator for multiple -o output" },
482*22dc650dSSadaf Ebrahimi { OP_U32NUMBER, N_OM_CAPTURE, &capture_max, "om-capture=n", "set capture count for --only-matching" },
483*22dc650dSSadaf Ebrahimi { OP_NODATA, 'P', NULL, "no-ucp", "do not enable UCP mode with Unicode" },
484*22dc650dSSadaf Ebrahimi { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
485*22dc650dSSadaf Ebrahimi { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
486*22dc650dSSadaf Ebrahimi { OP_PATLIST, N_EXCLUDE,&exclude_patdata, "exclude=pattern","exclude matching files when recursing" },
487*22dc650dSSadaf Ebrahimi { OP_PATLIST, N_INCLUDE,&include_patdata, "include=pattern","include matching files when recursing" },
488*22dc650dSSadaf Ebrahimi { OP_PATLIST, N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude-dir=pattern","exclude matching directories when recursing" },
489*22dc650dSSadaf Ebrahimi { OP_PATLIST, N_INCLUDE_DIR,&include_dir_patdata, "include-dir=pattern","include matching directories when recursing" },
490*22dc650dSSadaf Ebrahimi { OP_FILELIST, N_EXCLUDE_FROM,&exclude_from_data, "exclude-from=path", "read exclude list from file" },
491*22dc650dSSadaf Ebrahimi { OP_FILELIST, N_INCLUDE_FROM,&include_from_data, "include-from=path", "read include list from file" },
492*22dc650dSSadaf Ebrahimi { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
493*22dc650dSSadaf Ebrahimi { OP_NODATA, 't', NULL, "total-count", "print total count of matching lines" },
494*22dc650dSSadaf Ebrahimi { OP_NODATA, 'u', NULL, "utf", "use UTF/Unicode" },
495*22dc650dSSadaf Ebrahimi { OP_NODATA, 'U', NULL, "utf-allow-invalid", "use UTF/Unicode, allow for invalid code units" },
496*22dc650dSSadaf Ebrahimi { OP_NODATA, 'V', NULL, "version", "print version information and exit" },
497*22dc650dSSadaf Ebrahimi { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
498*22dc650dSSadaf Ebrahimi { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
499*22dc650dSSadaf Ebrahimi { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
500*22dc650dSSadaf Ebrahimi { OP_NODATA, N_ALLABSK, NULL, "allow-lookaround-bsk", "allow \\K in lookarounds" },
501*22dc650dSSadaf Ebrahimi { OP_NODATA, 'Z', NULL, "null", "output 0 byte after file names" },
502*22dc650dSSadaf Ebrahimi { OP_NODATA, 0, NULL, NULL, NULL }
503*22dc650dSSadaf Ebrahimi };
504*22dc650dSSadaf Ebrahimi
505*22dc650dSSadaf Ebrahimi /* Table of names for newline types. Must be kept in step with the definitions
506*22dc650dSSadaf Ebrahimi of PCRE2_NEWLINE_xx in pcre2.h. */
507*22dc650dSSadaf Ebrahimi
508*22dc650dSSadaf Ebrahimi static const char *newlines[] = {
509*22dc650dSSadaf Ebrahimi "DEFAULT", "CR", "LF", "CRLF", "ANY", "ANYCRLF", "NUL" };
510*22dc650dSSadaf Ebrahimi
511*22dc650dSSadaf Ebrahimi /* UTF-8 tables */
512*22dc650dSSadaf Ebrahimi
513*22dc650dSSadaf Ebrahimi const int utf8_table1[] =
514*22dc650dSSadaf Ebrahimi { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff};
515*22dc650dSSadaf Ebrahimi const int utf8_table1_size = sizeof(utf8_table1) / sizeof(int);
516*22dc650dSSadaf Ebrahimi
517*22dc650dSSadaf Ebrahimi const int utf8_table2[] = { 0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
518*22dc650dSSadaf Ebrahimi const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
519*22dc650dSSadaf Ebrahimi
520*22dc650dSSadaf Ebrahimi const char utf8_table4[] = {
521*22dc650dSSadaf Ebrahimi 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
522*22dc650dSSadaf Ebrahimi 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
523*22dc650dSSadaf Ebrahimi 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
524*22dc650dSSadaf Ebrahimi 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
525*22dc650dSSadaf Ebrahimi
526*22dc650dSSadaf Ebrahimi
527*22dc650dSSadaf Ebrahimi #if !defined(VPCOMPAT) && !defined(HAVE_MEMMOVE)
528*22dc650dSSadaf Ebrahimi /*************************************************
529*22dc650dSSadaf Ebrahimi * Emulated memmove() for systems without it *
530*22dc650dSSadaf Ebrahimi *************************************************/
531*22dc650dSSadaf Ebrahimi
532*22dc650dSSadaf Ebrahimi /* This function can make use of bcopy() if it is available. Otherwise do it by
533*22dc650dSSadaf Ebrahimi steam, as there are some non-Unix environments that lack both memmove() and
534*22dc650dSSadaf Ebrahimi bcopy(). */
535*22dc650dSSadaf Ebrahimi
536*22dc650dSSadaf Ebrahimi static void *
emulated_memmove(void * d,const void * s,size_t n)537*22dc650dSSadaf Ebrahimi emulated_memmove(void *d, const void *s, size_t n)
538*22dc650dSSadaf Ebrahimi {
539*22dc650dSSadaf Ebrahimi #ifdef HAVE_BCOPY
540*22dc650dSSadaf Ebrahimi bcopy(s, d, n);
541*22dc650dSSadaf Ebrahimi return d;
542*22dc650dSSadaf Ebrahimi #else
543*22dc650dSSadaf Ebrahimi size_t i;
544*22dc650dSSadaf Ebrahimi unsigned char *dest = (unsigned char *)d;
545*22dc650dSSadaf Ebrahimi const unsigned char *src = (const unsigned char *)s;
546*22dc650dSSadaf Ebrahimi if (dest > src)
547*22dc650dSSadaf Ebrahimi {
548*22dc650dSSadaf Ebrahimi dest += n;
549*22dc650dSSadaf Ebrahimi src += n;
550*22dc650dSSadaf Ebrahimi for (i = 0; i < n; ++i) *(--dest) = *(--src);
551*22dc650dSSadaf Ebrahimi return (void *)dest;
552*22dc650dSSadaf Ebrahimi }
553*22dc650dSSadaf Ebrahimi else
554*22dc650dSSadaf Ebrahimi {
555*22dc650dSSadaf Ebrahimi for (i = 0; i < n; ++i) *dest++ = *src++;
556*22dc650dSSadaf Ebrahimi return (void *)(dest - n);
557*22dc650dSSadaf Ebrahimi }
558*22dc650dSSadaf Ebrahimi #endif /* not HAVE_BCOPY */
559*22dc650dSSadaf Ebrahimi }
560*22dc650dSSadaf Ebrahimi #undef memmove
561*22dc650dSSadaf Ebrahimi #define memmove(d,s,n) emulated_memmove(d,s,n)
562*22dc650dSSadaf Ebrahimi #endif /* not VPCOMPAT && not HAVE_MEMMOVE */
563*22dc650dSSadaf Ebrahimi
564*22dc650dSSadaf Ebrahimi
565*22dc650dSSadaf Ebrahimi
566*22dc650dSSadaf Ebrahimi /*************************************************
567*22dc650dSSadaf Ebrahimi * Convert code point to UTF-8 *
568*22dc650dSSadaf Ebrahimi *************************************************/
569*22dc650dSSadaf Ebrahimi
570*22dc650dSSadaf Ebrahimi /* A static buffer is used. Returns the number of bytes. */
571*22dc650dSSadaf Ebrahimi
572*22dc650dSSadaf Ebrahimi static int
ord2utf8(uint32_t value)573*22dc650dSSadaf Ebrahimi ord2utf8(uint32_t value)
574*22dc650dSSadaf Ebrahimi {
575*22dc650dSSadaf Ebrahimi int i, j;
576*22dc650dSSadaf Ebrahimi uint8_t *utf8bytes = utf8_buffer;
577*22dc650dSSadaf Ebrahimi for (i = 0; i < utf8_table1_size; i++)
578*22dc650dSSadaf Ebrahimi if (value <= (uint32_t)utf8_table1[i]) break;
579*22dc650dSSadaf Ebrahimi utf8bytes += i;
580*22dc650dSSadaf Ebrahimi for (j = i; j > 0; j--)
581*22dc650dSSadaf Ebrahimi {
582*22dc650dSSadaf Ebrahimi *utf8bytes-- = 0x80 | (value & 0x3f);
583*22dc650dSSadaf Ebrahimi value >>= 6;
584*22dc650dSSadaf Ebrahimi }
585*22dc650dSSadaf Ebrahimi *utf8bytes = utf8_table2[i] | value;
586*22dc650dSSadaf Ebrahimi return i + 1;
587*22dc650dSSadaf Ebrahimi }
588*22dc650dSSadaf Ebrahimi
589*22dc650dSSadaf Ebrahimi
590*22dc650dSSadaf Ebrahimi
591*22dc650dSSadaf Ebrahimi /*************************************************
592*22dc650dSSadaf Ebrahimi * Case-independent string compare *
593*22dc650dSSadaf Ebrahimi *************************************************/
594*22dc650dSSadaf Ebrahimi
595*22dc650dSSadaf Ebrahimi static int
strcmpic(const char * str1,const char * str2)596*22dc650dSSadaf Ebrahimi strcmpic(const char *str1, const char *str2)
597*22dc650dSSadaf Ebrahimi {
598*22dc650dSSadaf Ebrahimi unsigned int c1, c2;
599*22dc650dSSadaf Ebrahimi while (*str1 != '\0' || *str2 != '\0')
600*22dc650dSSadaf Ebrahimi {
601*22dc650dSSadaf Ebrahimi c1 = tolower(*str1++);
602*22dc650dSSadaf Ebrahimi c2 = tolower(*str2++);
603*22dc650dSSadaf Ebrahimi if (c1 != c2) return ((c1 > c2) << 1) - 1;
604*22dc650dSSadaf Ebrahimi }
605*22dc650dSSadaf Ebrahimi return 0;
606*22dc650dSSadaf Ebrahimi }
607*22dc650dSSadaf Ebrahimi
608*22dc650dSSadaf Ebrahimi
609*22dc650dSSadaf Ebrahimi /*************************************************
610*22dc650dSSadaf Ebrahimi * Parse GREP_COLORS *
611*22dc650dSSadaf Ebrahimi *************************************************/
612*22dc650dSSadaf Ebrahimi
613*22dc650dSSadaf Ebrahimi /* Extract ms or mt from GREP_COLORS.
614*22dc650dSSadaf Ebrahimi
615*22dc650dSSadaf Ebrahimi Argument: the string, possibly NULL
616*22dc650dSSadaf Ebrahimi Returns: the value of ms or mt, or NULL if neither present
617*22dc650dSSadaf Ebrahimi */
618*22dc650dSSadaf Ebrahimi
619*22dc650dSSadaf Ebrahimi static char *
parse_grep_colors(const char * gc)620*22dc650dSSadaf Ebrahimi parse_grep_colors(const char *gc)
621*22dc650dSSadaf Ebrahimi {
622*22dc650dSSadaf Ebrahimi static char seq[16];
623*22dc650dSSadaf Ebrahimi char *col;
624*22dc650dSSadaf Ebrahimi uint32_t len;
625*22dc650dSSadaf Ebrahimi if (gc == NULL) return NULL;
626*22dc650dSSadaf Ebrahimi col = strstr(gc, "ms=");
627*22dc650dSSadaf Ebrahimi if (col == NULL) col = strstr(gc, "mt=");
628*22dc650dSSadaf Ebrahimi if (col == NULL) return NULL;
629*22dc650dSSadaf Ebrahimi len = 0;
630*22dc650dSSadaf Ebrahimi col += 3;
631*22dc650dSSadaf Ebrahimi while (*col != ':' && *col != 0 && len < sizeof(seq)-1)
632*22dc650dSSadaf Ebrahimi seq[len++] = *col++;
633*22dc650dSSadaf Ebrahimi seq[len] = 0;
634*22dc650dSSadaf Ebrahimi return seq;
635*22dc650dSSadaf Ebrahimi }
636*22dc650dSSadaf Ebrahimi
637*22dc650dSSadaf Ebrahimi
638*22dc650dSSadaf Ebrahimi /*************************************************
639*22dc650dSSadaf Ebrahimi * Exit from the program *
640*22dc650dSSadaf Ebrahimi *************************************************/
641*22dc650dSSadaf Ebrahimi
642*22dc650dSSadaf Ebrahimi /* If there has been a resource error, give a suitable message.
643*22dc650dSSadaf Ebrahimi
644*22dc650dSSadaf Ebrahimi Argument: the return code
645*22dc650dSSadaf Ebrahimi Returns: does not return
646*22dc650dSSadaf Ebrahimi */
647*22dc650dSSadaf Ebrahimi
648*22dc650dSSadaf Ebrahimi static void
pcre2grep_exit(int rc)649*22dc650dSSadaf Ebrahimi pcre2grep_exit(int rc)
650*22dc650dSSadaf Ebrahimi {
651*22dc650dSSadaf Ebrahimi /* VMS does exit codes differently: both exit(1) and exit(0) return with a
652*22dc650dSSadaf Ebrahimi status of 1, which is not helpful. To help with this problem, define a symbol
653*22dc650dSSadaf Ebrahimi (akin to an environment variable) called "PCRE2GREP_RC" and put the exit code
654*22dc650dSSadaf Ebrahimi therein. */
655*22dc650dSSadaf Ebrahimi
656*22dc650dSSadaf Ebrahimi #ifdef __VMS
657*22dc650dSSadaf Ebrahimi char val_buf[4];
658*22dc650dSSadaf Ebrahimi $DESCRIPTOR(sym_nam, "PCRE2GREP_RC");
659*22dc650dSSadaf Ebrahimi $DESCRIPTOR(sym_val, val_buf);
660*22dc650dSSadaf Ebrahimi sprintf(val_buf, "%d", rc);
661*22dc650dSSadaf Ebrahimi sym_val.dsc$w_length = strlen(val_buf);
662*22dc650dSSadaf Ebrahimi lib$set_symbol(&sym_nam, &sym_val);
663*22dc650dSSadaf Ebrahimi #endif
664*22dc650dSSadaf Ebrahimi
665*22dc650dSSadaf Ebrahimi if (resource_error)
666*22dc650dSSadaf Ebrahimi {
667*22dc650dSSadaf Ebrahimi fprintf(stderr, "pcre2grep: Error %d, %d, %d or %d means that a resource "
668*22dc650dSSadaf Ebrahimi "limit was exceeded.\n", PCRE2_ERROR_JIT_STACKLIMIT, PCRE2_ERROR_MATCHLIMIT,
669*22dc650dSSadaf Ebrahimi PCRE2_ERROR_DEPTHLIMIT, PCRE2_ERROR_HEAPLIMIT);
670*22dc650dSSadaf Ebrahimi fprintf(stderr, "pcre2grep: Check your regex for nested unlimited loops.\n");
671*22dc650dSSadaf Ebrahimi }
672*22dc650dSSadaf Ebrahimi exit(rc);
673*22dc650dSSadaf Ebrahimi }
674*22dc650dSSadaf Ebrahimi
675*22dc650dSSadaf Ebrahimi
676*22dc650dSSadaf Ebrahimi /*************************************************
677*22dc650dSSadaf Ebrahimi * Add item to chain of patterns *
678*22dc650dSSadaf Ebrahimi *************************************************/
679*22dc650dSSadaf Ebrahimi
680*22dc650dSSadaf Ebrahimi /* Used to add an item onto a chain, or just return an unconnected item if the
681*22dc650dSSadaf Ebrahimi "after" argument is NULL.
682*22dc650dSSadaf Ebrahimi
683*22dc650dSSadaf Ebrahimi Arguments:
684*22dc650dSSadaf Ebrahimi s pattern string to add
685*22dc650dSSadaf Ebrahimi patlen length of pattern
686*22dc650dSSadaf Ebrahimi after if not NULL points to item to insert after
687*22dc650dSSadaf Ebrahimi
688*22dc650dSSadaf Ebrahimi Returns: new pattern block or NULL on error
689*22dc650dSSadaf Ebrahimi */
690*22dc650dSSadaf Ebrahimi
691*22dc650dSSadaf Ebrahimi static patstr *
add_pattern(char * s,PCRE2_SIZE patlen,patstr * after)692*22dc650dSSadaf Ebrahimi add_pattern(char *s, PCRE2_SIZE patlen, patstr *after)
693*22dc650dSSadaf Ebrahimi {
694*22dc650dSSadaf Ebrahimi patstr *p = (patstr *)malloc(sizeof(patstr));
695*22dc650dSSadaf Ebrahimi
696*22dc650dSSadaf Ebrahimi /* LCOV_EXCL_START - These won't be hit in normal testing. */
697*22dc650dSSadaf Ebrahimi
698*22dc650dSSadaf Ebrahimi if (p == NULL)
699*22dc650dSSadaf Ebrahimi {
700*22dc650dSSadaf Ebrahimi fprintf(stderr, "pcre2grep: malloc failed\n");
701*22dc650dSSadaf Ebrahimi pcre2grep_exit(2);
702*22dc650dSSadaf Ebrahimi }
703*22dc650dSSadaf Ebrahimi if (patlen > MAXPATLEN)
704*22dc650dSSadaf Ebrahimi {
705*22dc650dSSadaf Ebrahimi fprintf(stderr, "pcre2grep: pattern is too long (limit is %d bytes)\n",
706*22dc650dSSadaf Ebrahimi MAXPATLEN);
707*22dc650dSSadaf Ebrahimi free(p);
708*22dc650dSSadaf Ebrahimi return NULL;
709*22dc650dSSadaf Ebrahimi }
710*22dc650dSSadaf Ebrahimi
711*22dc650dSSadaf Ebrahimi /* LCOV_EXCL_STOP */
712*22dc650dSSadaf Ebrahimi
713*22dc650dSSadaf Ebrahimi p->next = NULL;
714*22dc650dSSadaf Ebrahimi p->string = s;
715*22dc650dSSadaf Ebrahimi p->length = patlen;
716*22dc650dSSadaf Ebrahimi p->compiled = NULL;
717*22dc650dSSadaf Ebrahimi
718*22dc650dSSadaf Ebrahimi if (after != NULL)
719*22dc650dSSadaf Ebrahimi {
720*22dc650dSSadaf Ebrahimi p->next = after->next;
721*22dc650dSSadaf Ebrahimi after->next = p;
722*22dc650dSSadaf Ebrahimi }
723*22dc650dSSadaf Ebrahimi return p;
724*22dc650dSSadaf Ebrahimi }
725*22dc650dSSadaf Ebrahimi
726*22dc650dSSadaf Ebrahimi
727*22dc650dSSadaf Ebrahimi /*************************************************
728*22dc650dSSadaf Ebrahimi * Free chain of patterns *
729*22dc650dSSadaf Ebrahimi *************************************************/
730*22dc650dSSadaf Ebrahimi
731*22dc650dSSadaf Ebrahimi /* Used for several chains of patterns.
732*22dc650dSSadaf Ebrahimi
733*22dc650dSSadaf Ebrahimi Argument: pointer to start of chain
734*22dc650dSSadaf Ebrahimi Returns: nothing
735*22dc650dSSadaf Ebrahimi */
736*22dc650dSSadaf Ebrahimi
737*22dc650dSSadaf Ebrahimi static void
free_pattern_chain(patstr * pc)738*22dc650dSSadaf Ebrahimi free_pattern_chain(patstr *pc)
739*22dc650dSSadaf Ebrahimi {
740*22dc650dSSadaf Ebrahimi while (pc != NULL)
741*22dc650dSSadaf Ebrahimi {
742*22dc650dSSadaf Ebrahimi patstr *p = pc;
743*22dc650dSSadaf Ebrahimi pc = p->next;
744*22dc650dSSadaf Ebrahimi if (p->compiled != NULL) pcre2_code_free(p->compiled);
745*22dc650dSSadaf Ebrahimi free(p);
746*22dc650dSSadaf Ebrahimi }
747*22dc650dSSadaf Ebrahimi }
748*22dc650dSSadaf Ebrahimi
749*22dc650dSSadaf Ebrahimi
750*22dc650dSSadaf Ebrahimi /*************************************************
751*22dc650dSSadaf Ebrahimi * Free chain of file names *
752*22dc650dSSadaf Ebrahimi *************************************************/
753*22dc650dSSadaf Ebrahimi
754*22dc650dSSadaf Ebrahimi /*
755*22dc650dSSadaf Ebrahimi Argument: pointer to start of chain
756*22dc650dSSadaf Ebrahimi Returns: nothing
757*22dc650dSSadaf Ebrahimi */
758*22dc650dSSadaf Ebrahimi
759*22dc650dSSadaf Ebrahimi static void
free_file_chain(fnstr * fn)760*22dc650dSSadaf Ebrahimi free_file_chain(fnstr *fn)
761*22dc650dSSadaf Ebrahimi {
762*22dc650dSSadaf Ebrahimi while (fn != NULL)
763*22dc650dSSadaf Ebrahimi {
764*22dc650dSSadaf Ebrahimi fnstr *f = fn;
765*22dc650dSSadaf Ebrahimi fn = f->next;
766*22dc650dSSadaf Ebrahimi free(f);
767*22dc650dSSadaf Ebrahimi }
768*22dc650dSSadaf Ebrahimi }
769*22dc650dSSadaf Ebrahimi
770*22dc650dSSadaf Ebrahimi
771*22dc650dSSadaf Ebrahimi /*************************************************
772*22dc650dSSadaf Ebrahimi * OS-specific functions *
773*22dc650dSSadaf Ebrahimi *************************************************/
774*22dc650dSSadaf Ebrahimi
775*22dc650dSSadaf Ebrahimi /* These definitions are needed in all Windows environments, even those where
776*22dc650dSSadaf Ebrahimi Unix-style directory scanning can be used (see below). */
777*22dc650dSSadaf Ebrahimi
778*22dc650dSSadaf Ebrahimi #ifdef WIN32
779*22dc650dSSadaf Ebrahimi
780*22dc650dSSadaf Ebrahimi #ifndef STRICT
781*22dc650dSSadaf Ebrahimi # define STRICT
782*22dc650dSSadaf Ebrahimi #endif
783*22dc650dSSadaf Ebrahimi #ifndef WIN32_LEAN_AND_MEAN
784*22dc650dSSadaf Ebrahimi # define WIN32_LEAN_AND_MEAN
785*22dc650dSSadaf Ebrahimi #endif
786*22dc650dSSadaf Ebrahimi
787*22dc650dSSadaf Ebrahimi #include <windows.h>
788*22dc650dSSadaf Ebrahimi
789*22dc650dSSadaf Ebrahimi #define iswild(name) (strpbrk(name, "*?") != NULL)
790*22dc650dSSadaf Ebrahimi
791*22dc650dSSadaf Ebrahimi /* Convert ANSI BGR format to RGB used by Windows */
792*22dc650dSSadaf Ebrahimi #define BGR_RGB(x) (((x) & 1 ? 4 : 0) | ((x) & 2) | ((x) & 4 ? 1 : 0))
793*22dc650dSSadaf Ebrahimi
794*22dc650dSSadaf Ebrahimi static HANDLE hstdout;
795*22dc650dSSadaf Ebrahimi static CONSOLE_SCREEN_BUFFER_INFO csbi;
796*22dc650dSSadaf Ebrahimi static WORD match_colour;
797*22dc650dSSadaf Ebrahimi
798*22dc650dSSadaf Ebrahimi static WORD
decode_ANSI_colour(const char * cs)799*22dc650dSSadaf Ebrahimi decode_ANSI_colour(const char *cs)
800*22dc650dSSadaf Ebrahimi {
801*22dc650dSSadaf Ebrahimi WORD result = csbi.wAttributes;
802*22dc650dSSadaf Ebrahimi while (*cs)
803*22dc650dSSadaf Ebrahimi {
804*22dc650dSSadaf Ebrahimi if (isdigit((unsigned char)(*cs)))
805*22dc650dSSadaf Ebrahimi {
806*22dc650dSSadaf Ebrahimi int code = atoi(cs);
807*22dc650dSSadaf Ebrahimi if (code == 1) result |= 0x08;
808*22dc650dSSadaf Ebrahimi else if (code == 4) result |= 0x8000;
809*22dc650dSSadaf Ebrahimi else if (code == 5) result |= 0x80;
810*22dc650dSSadaf Ebrahimi else if (code >= 30 && code <= 37) result = (result & 0xF8) | BGR_RGB(code - 30);
811*22dc650dSSadaf Ebrahimi else if (code == 39) result = (result & 0xF0) | (csbi.wAttributes & 0x0F);
812*22dc650dSSadaf Ebrahimi else if (code >= 40 && code <= 47) result = (result & 0x8F) | (BGR_RGB(code - 40) << 4);
813*22dc650dSSadaf Ebrahimi else if (code == 49) result = (result & 0x0F) | (csbi.wAttributes & 0xF0);
814*22dc650dSSadaf Ebrahimi /* aixterm high intensity colour codes */
815*22dc650dSSadaf Ebrahimi else if (code >= 90 && code <= 97) result = (result & 0xF0) | BGR_RGB(code - 90) | 0x08;
816*22dc650dSSadaf Ebrahimi else if (code >= 100 && code <= 107) result = (result & 0x0F) | (BGR_RGB(code - 100) << 4) | 0x80;
817*22dc650dSSadaf Ebrahimi
818*22dc650dSSadaf Ebrahimi while (isdigit((unsigned char)(*cs))) cs++;
819*22dc650dSSadaf Ebrahimi }
820*22dc650dSSadaf Ebrahimi if (*cs) cs++;
821*22dc650dSSadaf Ebrahimi }
822*22dc650dSSadaf Ebrahimi return result;
823*22dc650dSSadaf Ebrahimi }
824*22dc650dSSadaf Ebrahimi
825*22dc650dSSadaf Ebrahimi
826*22dc650dSSadaf Ebrahimi static void
init_colour_output()827*22dc650dSSadaf Ebrahimi init_colour_output()
828*22dc650dSSadaf Ebrahimi {
829*22dc650dSSadaf Ebrahimi if (do_colour)
830*22dc650dSSadaf Ebrahimi {
831*22dc650dSSadaf Ebrahimi hstdout = GetStdHandle(STD_OUTPUT_HANDLE);
832*22dc650dSSadaf Ebrahimi /* This fails when redirected to con; try again if so. */
833*22dc650dSSadaf Ebrahimi if (!GetConsoleScreenBufferInfo(hstdout, &csbi) && !do_ansi)
834*22dc650dSSadaf Ebrahimi {
835*22dc650dSSadaf Ebrahimi HANDLE hcon = CreateFile("CONOUT$", GENERIC_READ | GENERIC_WRITE,
836*22dc650dSSadaf Ebrahimi FILE_SHARE_WRITE, NULL, OPEN_EXISTING, 0, NULL);
837*22dc650dSSadaf Ebrahimi GetConsoleScreenBufferInfo(hcon, &csbi);
838*22dc650dSSadaf Ebrahimi CloseHandle(hcon);
839*22dc650dSSadaf Ebrahimi }
840*22dc650dSSadaf Ebrahimi match_colour = decode_ANSI_colour(colour_string);
841*22dc650dSSadaf Ebrahimi /* No valid colour found - turn off colouring */
842*22dc650dSSadaf Ebrahimi if (!match_colour) do_colour = FALSE;
843*22dc650dSSadaf Ebrahimi }
844*22dc650dSSadaf Ebrahimi }
845*22dc650dSSadaf Ebrahimi
846*22dc650dSSadaf Ebrahimi #endif /* WIN32 */
847*22dc650dSSadaf Ebrahimi
848*22dc650dSSadaf Ebrahimi
849*22dc650dSSadaf Ebrahimi /* The following sets of functions are defined so that they can be made system
850*22dc650dSSadaf Ebrahimi specific. At present there are versions for Unix-style environments, Windows,
851*22dc650dSSadaf Ebrahimi native z/OS, and "no support". */
852*22dc650dSSadaf Ebrahimi
853*22dc650dSSadaf Ebrahimi
854*22dc650dSSadaf Ebrahimi /************* Directory scanning Unix-style and z/OS ***********/
855*22dc650dSSadaf Ebrahimi
856*22dc650dSSadaf Ebrahimi #if (defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H) || defined NATIVE_ZOS
857*22dc650dSSadaf Ebrahimi #include <sys/types.h>
858*22dc650dSSadaf Ebrahimi #include <sys/stat.h>
859*22dc650dSSadaf Ebrahimi #include <dirent.h>
860*22dc650dSSadaf Ebrahimi
861*22dc650dSSadaf Ebrahimi #if defined NATIVE_ZOS
862*22dc650dSSadaf Ebrahimi /************* Directory and PDS/E scanning for z/OS ***********/
863*22dc650dSSadaf Ebrahimi /************* z/OS looks mostly like Unix with USS ************/
864*22dc650dSSadaf Ebrahimi /* However, z/OS needs the #include statements in this header */
865*22dc650dSSadaf Ebrahimi #include "pcrzosfs.h"
866*22dc650dSSadaf Ebrahimi /* That header is not included in the main PCRE distribution because
867*22dc650dSSadaf Ebrahimi other apparatus is needed to compile pcre2grep for z/OS. The header
868*22dc650dSSadaf Ebrahimi can be found in the special z/OS distribution, which is available
869*22dc650dSSadaf Ebrahimi from www.zaconsultants.net or from www.cbttape.org. */
870*22dc650dSSadaf Ebrahimi #endif
871*22dc650dSSadaf Ebrahimi
872*22dc650dSSadaf Ebrahimi typedef DIR directory_type;
873*22dc650dSSadaf Ebrahimi #define FILESEP '/'
874*22dc650dSSadaf Ebrahimi
875*22dc650dSSadaf Ebrahimi static int
isdirectory(char * filename)876*22dc650dSSadaf Ebrahimi isdirectory(char *filename)
877*22dc650dSSadaf Ebrahimi {
878*22dc650dSSadaf Ebrahimi struct stat statbuf;
879*22dc650dSSadaf Ebrahimi if (stat(filename, &statbuf) < 0)
880*22dc650dSSadaf Ebrahimi return 0; /* In the expectation that opening as a file will fail */
881*22dc650dSSadaf Ebrahimi return S_ISDIR(statbuf.st_mode);
882*22dc650dSSadaf Ebrahimi }
883*22dc650dSSadaf Ebrahimi
884*22dc650dSSadaf Ebrahimi static directory_type *
opendirectory(char * filename)885*22dc650dSSadaf Ebrahimi opendirectory(char *filename)
886*22dc650dSSadaf Ebrahimi {
887*22dc650dSSadaf Ebrahimi return opendir(filename);
888*22dc650dSSadaf Ebrahimi }
889*22dc650dSSadaf Ebrahimi
890*22dc650dSSadaf Ebrahimi static char *
readdirectory(directory_type * dir)891*22dc650dSSadaf Ebrahimi readdirectory(directory_type *dir)
892*22dc650dSSadaf Ebrahimi {
893*22dc650dSSadaf Ebrahimi for (;;)
894*22dc650dSSadaf Ebrahimi {
895*22dc650dSSadaf Ebrahimi struct dirent *dent = readdir(dir);
896*22dc650dSSadaf Ebrahimi if (dent == NULL) return NULL;
897*22dc650dSSadaf Ebrahimi if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
898*22dc650dSSadaf Ebrahimi return dent->d_name;
899*22dc650dSSadaf Ebrahimi }
900*22dc650dSSadaf Ebrahimi /* Control never reaches here */
901*22dc650dSSadaf Ebrahimi }
902*22dc650dSSadaf Ebrahimi
903*22dc650dSSadaf Ebrahimi static void
closedirectory(directory_type * dir)904*22dc650dSSadaf Ebrahimi closedirectory(directory_type *dir)
905*22dc650dSSadaf Ebrahimi {
906*22dc650dSSadaf Ebrahimi closedir(dir);
907*22dc650dSSadaf Ebrahimi }
908*22dc650dSSadaf Ebrahimi
909*22dc650dSSadaf Ebrahimi
910*22dc650dSSadaf Ebrahimi /************* Test for regular file, Unix-style **********/
911*22dc650dSSadaf Ebrahimi
912*22dc650dSSadaf Ebrahimi static int
isregfile(char * filename)913*22dc650dSSadaf Ebrahimi isregfile(char *filename)
914*22dc650dSSadaf Ebrahimi {
915*22dc650dSSadaf Ebrahimi struct stat statbuf;
916*22dc650dSSadaf Ebrahimi if (stat(filename, &statbuf) < 0)
917*22dc650dSSadaf Ebrahimi return 1; /* In the expectation that opening as a file will fail */
918*22dc650dSSadaf Ebrahimi return S_ISREG(statbuf.st_mode);
919*22dc650dSSadaf Ebrahimi }
920*22dc650dSSadaf Ebrahimi
921*22dc650dSSadaf Ebrahimi
922*22dc650dSSadaf Ebrahimi #if defined NATIVE_ZOS
923*22dc650dSSadaf Ebrahimi /************* Test for a terminal in z/OS **********/
924*22dc650dSSadaf Ebrahimi /* isatty() does not work in a TSO environment, so always give FALSE.*/
925*22dc650dSSadaf Ebrahimi
926*22dc650dSSadaf Ebrahimi static BOOL
is_stdout_tty(void)927*22dc650dSSadaf Ebrahimi is_stdout_tty(void)
928*22dc650dSSadaf Ebrahimi {
929*22dc650dSSadaf Ebrahimi return FALSE;
930*22dc650dSSadaf Ebrahimi }
931*22dc650dSSadaf Ebrahimi
932*22dc650dSSadaf Ebrahimi static BOOL
is_file_tty(FILE * f)933*22dc650dSSadaf Ebrahimi is_file_tty(FILE *f)
934*22dc650dSSadaf Ebrahimi {
935*22dc650dSSadaf Ebrahimi return FALSE;
936*22dc650dSSadaf Ebrahimi }
937*22dc650dSSadaf Ebrahimi
938*22dc650dSSadaf Ebrahimi
939*22dc650dSSadaf Ebrahimi /************* Test for a terminal, Unix-style **********/
940*22dc650dSSadaf Ebrahimi
941*22dc650dSSadaf Ebrahimi #else
942*22dc650dSSadaf Ebrahimi static BOOL
is_stdout_tty(void)943*22dc650dSSadaf Ebrahimi is_stdout_tty(void)
944*22dc650dSSadaf Ebrahimi {
945*22dc650dSSadaf Ebrahimi return isatty(fileno(stdout));
946*22dc650dSSadaf Ebrahimi }
947*22dc650dSSadaf Ebrahimi
948*22dc650dSSadaf Ebrahimi static BOOL
is_file_tty(FILE * f)949*22dc650dSSadaf Ebrahimi is_file_tty(FILE *f)
950*22dc650dSSadaf Ebrahimi {
951*22dc650dSSadaf Ebrahimi return isatty(fileno(f));
952*22dc650dSSadaf Ebrahimi }
953*22dc650dSSadaf Ebrahimi #endif
954*22dc650dSSadaf Ebrahimi
955*22dc650dSSadaf Ebrahimi
956*22dc650dSSadaf Ebrahimi /************* Print optionally coloured match Unix-style and z/OS **********/
957*22dc650dSSadaf Ebrahimi
958*22dc650dSSadaf Ebrahimi static void
print_match(const void * buf,int length)959*22dc650dSSadaf Ebrahimi print_match(const void *buf, int length)
960*22dc650dSSadaf Ebrahimi {
961*22dc650dSSadaf Ebrahimi if (length == 0) return;
962*22dc650dSSadaf Ebrahimi if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
963*22dc650dSSadaf Ebrahimi FWRITE_IGNORE(buf, 1, length, stdout);
964*22dc650dSSadaf Ebrahimi if (do_colour) fprintf(stdout, "%c[0m", 0x1b);
965*22dc650dSSadaf Ebrahimi }
966*22dc650dSSadaf Ebrahimi
967*22dc650dSSadaf Ebrahimi /* End of Unix-style or native z/OS environment functions. */
968*22dc650dSSadaf Ebrahimi
969*22dc650dSSadaf Ebrahimi
970*22dc650dSSadaf Ebrahimi /************* Directory scanning in Windows ***********/
971*22dc650dSSadaf Ebrahimi
972*22dc650dSSadaf Ebrahimi /* I (Philip Hazel) have no means of testing this code. It was contributed by
973*22dc650dSSadaf Ebrahimi Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
974*22dc650dSSadaf Ebrahimi when it did not exist. David Byron added a patch that moved the #include of
975*22dc650dSSadaf Ebrahimi <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
976*22dc650dSSadaf Ebrahimi */
977*22dc650dSSadaf Ebrahimi
978*22dc650dSSadaf Ebrahimi #elif defined WIN32
979*22dc650dSSadaf Ebrahimi
980*22dc650dSSadaf Ebrahimi #ifndef INVALID_FILE_ATTRIBUTES
981*22dc650dSSadaf Ebrahimi #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
982*22dc650dSSadaf Ebrahimi #endif
983*22dc650dSSadaf Ebrahimi
984*22dc650dSSadaf Ebrahimi typedef struct directory_type
985*22dc650dSSadaf Ebrahimi {
986*22dc650dSSadaf Ebrahimi HANDLE handle;
987*22dc650dSSadaf Ebrahimi BOOL first;
988*22dc650dSSadaf Ebrahimi WIN32_FIND_DATA data;
989*22dc650dSSadaf Ebrahimi } directory_type;
990*22dc650dSSadaf Ebrahimi
991*22dc650dSSadaf Ebrahimi #define FILESEP '/'
992*22dc650dSSadaf Ebrahimi
993*22dc650dSSadaf Ebrahimi int
isdirectory(char * filename)994*22dc650dSSadaf Ebrahimi isdirectory(char *filename)
995*22dc650dSSadaf Ebrahimi {
996*22dc650dSSadaf Ebrahimi DWORD attr = GetFileAttributes(filename);
997*22dc650dSSadaf Ebrahimi if (attr == INVALID_FILE_ATTRIBUTES)
998*22dc650dSSadaf Ebrahimi return 0;
999*22dc650dSSadaf Ebrahimi return (attr & FILE_ATTRIBUTE_DIRECTORY) != 0;
1000*22dc650dSSadaf Ebrahimi }
1001*22dc650dSSadaf Ebrahimi
1002*22dc650dSSadaf Ebrahimi directory_type *
opendirectory(char * filename)1003*22dc650dSSadaf Ebrahimi opendirectory(char *filename)
1004*22dc650dSSadaf Ebrahimi {
1005*22dc650dSSadaf Ebrahimi size_t len;
1006*22dc650dSSadaf Ebrahimi char *pattern;
1007*22dc650dSSadaf Ebrahimi directory_type *dir;
1008*22dc650dSSadaf Ebrahimi DWORD err;
1009*22dc650dSSadaf Ebrahimi len = strlen(filename);
1010*22dc650dSSadaf Ebrahimi pattern = (char *)malloc(len + 3);
1011*22dc650dSSadaf Ebrahimi dir = (directory_type *)malloc(sizeof(*dir));
1012*22dc650dSSadaf Ebrahimi if ((pattern == NULL) || (dir == NULL))
1013*22dc650dSSadaf Ebrahimi {
1014*22dc650dSSadaf Ebrahimi fprintf(stderr, "pcre2grep: malloc failed\n");
1015*22dc650dSSadaf Ebrahimi pcre2grep_exit(2);
1016*22dc650dSSadaf Ebrahimi }
1017*22dc650dSSadaf Ebrahimi memcpy(pattern, filename, len);
1018*22dc650dSSadaf Ebrahimi if (iswild(filename))
1019*22dc650dSSadaf Ebrahimi pattern[len] = 0;
1020*22dc650dSSadaf Ebrahimi else
1021*22dc650dSSadaf Ebrahimi memcpy(&(pattern[len]), "\\*", 3);
1022*22dc650dSSadaf Ebrahimi dir->handle = FindFirstFile(pattern, &(dir->data));
1023*22dc650dSSadaf Ebrahimi if (dir->handle != INVALID_HANDLE_VALUE)
1024*22dc650dSSadaf Ebrahimi {
1025*22dc650dSSadaf Ebrahimi free(pattern);
1026*22dc650dSSadaf Ebrahimi dir->first = TRUE;
1027*22dc650dSSadaf Ebrahimi return dir;
1028*22dc650dSSadaf Ebrahimi }
1029*22dc650dSSadaf Ebrahimi err = GetLastError();
1030*22dc650dSSadaf Ebrahimi free(pattern);
1031*22dc650dSSadaf Ebrahimi free(dir);
1032*22dc650dSSadaf Ebrahimi errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
1033*22dc650dSSadaf Ebrahimi return NULL;
1034*22dc650dSSadaf Ebrahimi }
1035*22dc650dSSadaf Ebrahimi
1036*22dc650dSSadaf Ebrahimi char *
readdirectory(directory_type * dir)1037*22dc650dSSadaf Ebrahimi readdirectory(directory_type *dir)
1038*22dc650dSSadaf Ebrahimi {
1039*22dc650dSSadaf Ebrahimi for (;;)
1040*22dc650dSSadaf Ebrahimi {
1041*22dc650dSSadaf Ebrahimi if (!dir->first)
1042*22dc650dSSadaf Ebrahimi {
1043*22dc650dSSadaf Ebrahimi if (!FindNextFile(dir->handle, &(dir->data)))
1044*22dc650dSSadaf Ebrahimi return NULL;
1045*22dc650dSSadaf Ebrahimi }
1046*22dc650dSSadaf Ebrahimi else
1047*22dc650dSSadaf Ebrahimi {
1048*22dc650dSSadaf Ebrahimi dir->first = FALSE;
1049*22dc650dSSadaf Ebrahimi }
1050*22dc650dSSadaf Ebrahimi if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
1051*22dc650dSSadaf Ebrahimi return dir->data.cFileName;
1052*22dc650dSSadaf Ebrahimi }
1053*22dc650dSSadaf Ebrahimi #ifndef _MSC_VER
1054*22dc650dSSadaf Ebrahimi return NULL; /* Keep compiler happy; never executed */
1055*22dc650dSSadaf Ebrahimi #endif
1056*22dc650dSSadaf Ebrahimi }
1057*22dc650dSSadaf Ebrahimi
1058*22dc650dSSadaf Ebrahimi void
closedirectory(directory_type * dir)1059*22dc650dSSadaf Ebrahimi closedirectory(directory_type *dir)
1060*22dc650dSSadaf Ebrahimi {
1061*22dc650dSSadaf Ebrahimi FindClose(dir->handle);
1062*22dc650dSSadaf Ebrahimi free(dir);
1063*22dc650dSSadaf Ebrahimi }
1064*22dc650dSSadaf Ebrahimi
1065*22dc650dSSadaf Ebrahimi
1066*22dc650dSSadaf Ebrahimi /************* Test for regular file in Windows **********/
1067*22dc650dSSadaf Ebrahimi
1068*22dc650dSSadaf Ebrahimi /* I don't know how to do this, or if it can be done; assume all paths are
1069*22dc650dSSadaf Ebrahimi regular if they are not directories. */
1070*22dc650dSSadaf Ebrahimi
isregfile(char * filename)1071*22dc650dSSadaf Ebrahimi int isregfile(char *filename)
1072*22dc650dSSadaf Ebrahimi {
1073*22dc650dSSadaf Ebrahimi return !isdirectory(filename);
1074*22dc650dSSadaf Ebrahimi }
1075*22dc650dSSadaf Ebrahimi
1076*22dc650dSSadaf Ebrahimi
1077*22dc650dSSadaf Ebrahimi /************* Test for a terminal in Windows **********/
1078*22dc650dSSadaf Ebrahimi
1079*22dc650dSSadaf Ebrahimi static BOOL
is_stdout_tty(void)1080*22dc650dSSadaf Ebrahimi is_stdout_tty(void)
1081*22dc650dSSadaf Ebrahimi {
1082*22dc650dSSadaf Ebrahimi return _isatty(_fileno(stdout));
1083*22dc650dSSadaf Ebrahimi }
1084*22dc650dSSadaf Ebrahimi
1085*22dc650dSSadaf Ebrahimi static BOOL
is_file_tty(FILE * f)1086*22dc650dSSadaf Ebrahimi is_file_tty(FILE *f)
1087*22dc650dSSadaf Ebrahimi {
1088*22dc650dSSadaf Ebrahimi return _isatty(_fileno(f));
1089*22dc650dSSadaf Ebrahimi }
1090*22dc650dSSadaf Ebrahimi
1091*22dc650dSSadaf Ebrahimi
1092*22dc650dSSadaf Ebrahimi /************* Print optionally coloured match in Windows **********/
1093*22dc650dSSadaf Ebrahimi
1094*22dc650dSSadaf Ebrahimi static void
print_match(const void * buf,int length)1095*22dc650dSSadaf Ebrahimi print_match(const void *buf, int length)
1096*22dc650dSSadaf Ebrahimi {
1097*22dc650dSSadaf Ebrahimi if (length == 0) return;
1098*22dc650dSSadaf Ebrahimi if (do_colour)
1099*22dc650dSSadaf Ebrahimi {
1100*22dc650dSSadaf Ebrahimi if (do_ansi) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1101*22dc650dSSadaf Ebrahimi else SetConsoleTextAttribute(hstdout, match_colour);
1102*22dc650dSSadaf Ebrahimi }
1103*22dc650dSSadaf Ebrahimi FWRITE_IGNORE(buf, 1, length, stdout);
1104*22dc650dSSadaf Ebrahimi if (do_colour)
1105*22dc650dSSadaf Ebrahimi {
1106*22dc650dSSadaf Ebrahimi if (do_ansi) fprintf(stdout, "%c[0m", 0x1b);
1107*22dc650dSSadaf Ebrahimi else SetConsoleTextAttribute(hstdout, csbi.wAttributes);
1108*22dc650dSSadaf Ebrahimi }
1109*22dc650dSSadaf Ebrahimi }
1110*22dc650dSSadaf Ebrahimi
1111*22dc650dSSadaf Ebrahimi /* End of Windows functions */
1112*22dc650dSSadaf Ebrahimi
1113*22dc650dSSadaf Ebrahimi
1114*22dc650dSSadaf Ebrahimi /************* Directory scanning when we can't do it ***********/
1115*22dc650dSSadaf Ebrahimi
1116*22dc650dSSadaf Ebrahimi /* The type is void, and apart from isdirectory(), the functions do nothing. */
1117*22dc650dSSadaf Ebrahimi
1118*22dc650dSSadaf Ebrahimi #else
1119*22dc650dSSadaf Ebrahimi
1120*22dc650dSSadaf Ebrahimi #define FILESEP 0
1121*22dc650dSSadaf Ebrahimi typedef void directory_type;
1122*22dc650dSSadaf Ebrahimi
isdirectory(char * filename)1123*22dc650dSSadaf Ebrahimi int isdirectory(char *filename) { return 0; }
opendirectory(char * filename)1124*22dc650dSSadaf Ebrahimi directory_type * opendirectory(char *filename) { return (directory_type*)0;}
readdirectory(directory_type * dir)1125*22dc650dSSadaf Ebrahimi char *readdirectory(directory_type *dir) { return (char*)0;}
closedirectory(directory_type * dir)1126*22dc650dSSadaf Ebrahimi void closedirectory(directory_type *dir) {}
1127*22dc650dSSadaf Ebrahimi
1128*22dc650dSSadaf Ebrahimi
1129*22dc650dSSadaf Ebrahimi /************* Test for regular file when we can't do it **********/
1130*22dc650dSSadaf Ebrahimi
1131*22dc650dSSadaf Ebrahimi /* Assume all files are regular. */
1132*22dc650dSSadaf Ebrahimi
isregfile(char * filename)1133*22dc650dSSadaf Ebrahimi int isregfile(char *filename) { return 1; }
1134*22dc650dSSadaf Ebrahimi
1135*22dc650dSSadaf Ebrahimi
1136*22dc650dSSadaf Ebrahimi /************* Test for a terminal when we can't do it **********/
1137*22dc650dSSadaf Ebrahimi
1138*22dc650dSSadaf Ebrahimi static BOOL
is_stdout_tty(void)1139*22dc650dSSadaf Ebrahimi is_stdout_tty(void)
1140*22dc650dSSadaf Ebrahimi {
1141*22dc650dSSadaf Ebrahimi return FALSE;
1142*22dc650dSSadaf Ebrahimi }
1143*22dc650dSSadaf Ebrahimi
1144*22dc650dSSadaf Ebrahimi static BOOL
is_file_tty(FILE * f)1145*22dc650dSSadaf Ebrahimi is_file_tty(FILE *f)
1146*22dc650dSSadaf Ebrahimi {
1147*22dc650dSSadaf Ebrahimi return FALSE;
1148*22dc650dSSadaf Ebrahimi }
1149*22dc650dSSadaf Ebrahimi
1150*22dc650dSSadaf Ebrahimi
1151*22dc650dSSadaf Ebrahimi /************* Print optionally coloured match when we can't do it **********/
1152*22dc650dSSadaf Ebrahimi
1153*22dc650dSSadaf Ebrahimi static void
print_match(const void * buf,int length)1154*22dc650dSSadaf Ebrahimi print_match(const void *buf, int length)
1155*22dc650dSSadaf Ebrahimi {
1156*22dc650dSSadaf Ebrahimi if (length == 0) return;
1157*22dc650dSSadaf Ebrahimi FWRITE_IGNORE(buf, 1, length, stdout);
1158*22dc650dSSadaf Ebrahimi }
1159*22dc650dSSadaf Ebrahimi
1160*22dc650dSSadaf Ebrahimi #endif /* End of system-specific functions */
1161*22dc650dSSadaf Ebrahimi
1162*22dc650dSSadaf Ebrahimi
1163*22dc650dSSadaf Ebrahimi
1164*22dc650dSSadaf Ebrahimi #ifndef HAVE_STRERROR
1165*22dc650dSSadaf Ebrahimi /*************************************************
1166*22dc650dSSadaf Ebrahimi * Provide strerror() for non-ANSI libraries *
1167*22dc650dSSadaf Ebrahimi *************************************************/
1168*22dc650dSSadaf Ebrahimi
1169*22dc650dSSadaf Ebrahimi /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1170*22dc650dSSadaf Ebrahimi in their libraries, but can provide the same facility by this simple
1171*22dc650dSSadaf Ebrahimi alternative function. */
1172*22dc650dSSadaf Ebrahimi
1173*22dc650dSSadaf Ebrahimi extern int sys_nerr;
1174*22dc650dSSadaf Ebrahimi extern char *sys_errlist[];
1175*22dc650dSSadaf Ebrahimi
1176*22dc650dSSadaf Ebrahimi char *
strerror(int n)1177*22dc650dSSadaf Ebrahimi strerror(int n)
1178*22dc650dSSadaf Ebrahimi {
1179*22dc650dSSadaf Ebrahimi if (n < 0 || n >= sys_nerr) return "unknown error number";
1180*22dc650dSSadaf Ebrahimi return sys_errlist[n];
1181*22dc650dSSadaf Ebrahimi }
1182*22dc650dSSadaf Ebrahimi #endif /* HAVE_STRERROR */
1183*22dc650dSSadaf Ebrahimi
1184*22dc650dSSadaf Ebrahimi
1185*22dc650dSSadaf Ebrahimi
1186*22dc650dSSadaf Ebrahimi /*************************************************
1187*22dc650dSSadaf Ebrahimi * Usage function *
1188*22dc650dSSadaf Ebrahimi *************************************************/
1189*22dc650dSSadaf Ebrahimi
1190*22dc650dSSadaf Ebrahimi static int
usage(int rc)1191*22dc650dSSadaf Ebrahimi usage(int rc)
1192*22dc650dSSadaf Ebrahimi {
1193*22dc650dSSadaf Ebrahimi option_item *op;
1194*22dc650dSSadaf Ebrahimi fprintf(stderr, "Usage: pcre2grep [-");
1195*22dc650dSSadaf Ebrahimi for (op = optionlist; op->one_char != 0; op++)
1196*22dc650dSSadaf Ebrahimi {
1197*22dc650dSSadaf Ebrahimi if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1198*22dc650dSSadaf Ebrahimi }
1199*22dc650dSSadaf Ebrahimi fprintf(stderr, "] [long options] [pattern] [files]\n");
1200*22dc650dSSadaf Ebrahimi fprintf(stderr, "Type \"pcre2grep --help\" for more information and the long "
1201*22dc650dSSadaf Ebrahimi "options.\n");
1202*22dc650dSSadaf Ebrahimi return rc;
1203*22dc650dSSadaf Ebrahimi }
1204*22dc650dSSadaf Ebrahimi
1205*22dc650dSSadaf Ebrahimi
1206*22dc650dSSadaf Ebrahimi
1207*22dc650dSSadaf Ebrahimi /*************************************************
1208*22dc650dSSadaf Ebrahimi * Help function *
1209*22dc650dSSadaf Ebrahimi *************************************************/
1210*22dc650dSSadaf Ebrahimi
1211*22dc650dSSadaf Ebrahimi static void
help(void)1212*22dc650dSSadaf Ebrahimi help(void)
1213*22dc650dSSadaf Ebrahimi {
1214*22dc650dSSadaf Ebrahimi option_item *op;
1215*22dc650dSSadaf Ebrahimi
1216*22dc650dSSadaf Ebrahimi printf("Usage: pcre2grep [OPTION]... [PATTERN] [FILE1 FILE2 ...]" STDOUT_NL);
1217*22dc650dSSadaf Ebrahimi printf("Search for PATTERN in each FILE or standard input." STDOUT_NL);
1218*22dc650dSSadaf Ebrahimi printf("PATTERN must be present if neither -e nor -f is used." STDOUT_NL);
1219*22dc650dSSadaf Ebrahimi
1220*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_PCRE2GREP_CALLOUT
1221*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_PCRE2GREP_CALLOUT_FORK
1222*22dc650dSSadaf Ebrahimi printf("All callout scripts in patterns are supported." STDOUT_NL);
1223*22dc650dSSadaf Ebrahimi #else
1224*22dc650dSSadaf Ebrahimi printf("Non-fork callout scripts in patterns are supported." STDOUT_NL);
1225*22dc650dSSadaf Ebrahimi #endif
1226*22dc650dSSadaf Ebrahimi #else
1227*22dc650dSSadaf Ebrahimi printf("Callout scripts are not supported in this pcre2grep." STDOUT_NL);
1228*22dc650dSSadaf Ebrahimi #endif
1229*22dc650dSSadaf Ebrahimi
1230*22dc650dSSadaf Ebrahimi printf("\"-\" can be used as a file name to mean STDIN." STDOUT_NL);
1231*22dc650dSSadaf Ebrahimi
1232*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_LIBZ
1233*22dc650dSSadaf Ebrahimi printf("Files whose names end in .gz are read using zlib." STDOUT_NL);
1234*22dc650dSSadaf Ebrahimi #endif
1235*22dc650dSSadaf Ebrahimi
1236*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_LIBBZ2
1237*22dc650dSSadaf Ebrahimi printf("Files whose names end in .bz2 are read using bzlib2." STDOUT_NL);
1238*22dc650dSSadaf Ebrahimi #endif
1239*22dc650dSSadaf Ebrahimi
1240*22dc650dSSadaf Ebrahimi #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1241*22dc650dSSadaf Ebrahimi printf("Other files and the standard input are read as plain files." STDOUT_NL STDOUT_NL);
1242*22dc650dSSadaf Ebrahimi #else
1243*22dc650dSSadaf Ebrahimi printf("All files are read as plain files, without any interpretation." STDOUT_NL STDOUT_NL);
1244*22dc650dSSadaf Ebrahimi #endif
1245*22dc650dSSadaf Ebrahimi
1246*22dc650dSSadaf Ebrahimi printf("Example: pcre2grep -i " QUOT "hello.*world" QUOT " menu.h main.c" STDOUT_NL STDOUT_NL);
1247*22dc650dSSadaf Ebrahimi printf("Options:" STDOUT_NL);
1248*22dc650dSSadaf Ebrahimi
1249*22dc650dSSadaf Ebrahimi for (op = optionlist; op->one_char != 0; op++)
1250*22dc650dSSadaf Ebrahimi {
1251*22dc650dSSadaf Ebrahimi int n;
1252*22dc650dSSadaf Ebrahimi char s[4];
1253*22dc650dSSadaf Ebrahimi
1254*22dc650dSSadaf Ebrahimi if (op->one_char > 0 && (op->long_name)[0] == 0)
1255*22dc650dSSadaf Ebrahimi n = 31 - printf(" -%c", op->one_char);
1256*22dc650dSSadaf Ebrahimi else
1257*22dc650dSSadaf Ebrahimi {
1258*22dc650dSSadaf Ebrahimi if (op->one_char > 0) sprintf(s, "-%c,", op->one_char);
1259*22dc650dSSadaf Ebrahimi else strcpy(s, " ");
1260*22dc650dSSadaf Ebrahimi n = 31 - printf(" %s --%s", s, op->long_name);
1261*22dc650dSSadaf Ebrahimi }
1262*22dc650dSSadaf Ebrahimi
1263*22dc650dSSadaf Ebrahimi if (n < 1) n = 1;
1264*22dc650dSSadaf Ebrahimi printf("%.*s%s" STDOUT_NL, n, " ", op->help_text);
1265*22dc650dSSadaf Ebrahimi }
1266*22dc650dSSadaf Ebrahimi
1267*22dc650dSSadaf Ebrahimi printf(STDOUT_NL "Numbers may be followed by K or M, e.g. --max-buffer-size=100K." STDOUT_NL);
1268*22dc650dSSadaf Ebrahimi printf("The default value for --buffer-size is %d." STDOUT_NL, PCRE2GREP_BUFSIZE);
1269*22dc650dSSadaf Ebrahimi printf("The default value for --max-buffer-size is %d." STDOUT_NL, PCRE2GREP_MAX_BUFSIZE);
1270*22dc650dSSadaf Ebrahimi printf("When reading patterns or file names from a file, trailing white" STDOUT_NL);
1271*22dc650dSSadaf Ebrahimi printf("space is removed and blank lines are ignored." STDOUT_NL);
1272*22dc650dSSadaf Ebrahimi printf("The maximum size of any pattern is %d bytes." STDOUT_NL, MAXPATLEN);
1273*22dc650dSSadaf Ebrahimi
1274*22dc650dSSadaf Ebrahimi printf(STDOUT_NL "With no FILEs, read standard input. If fewer than two FILEs given, assume -h." STDOUT_NL);
1275*22dc650dSSadaf Ebrahimi printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble." STDOUT_NL);
1276*22dc650dSSadaf Ebrahimi }
1277*22dc650dSSadaf Ebrahimi
1278*22dc650dSSadaf Ebrahimi
1279*22dc650dSSadaf Ebrahimi
1280*22dc650dSSadaf Ebrahimi /*************************************************
1281*22dc650dSSadaf Ebrahimi * Test exclude/includes *
1282*22dc650dSSadaf Ebrahimi *************************************************/
1283*22dc650dSSadaf Ebrahimi
1284*22dc650dSSadaf Ebrahimi /* If any exclude pattern matches, the path is excluded. Otherwise, unless
1285*22dc650dSSadaf Ebrahimi there are no includes, the path must match an include pattern.
1286*22dc650dSSadaf Ebrahimi
1287*22dc650dSSadaf Ebrahimi Arguments:
1288*22dc650dSSadaf Ebrahimi path the path to be matched
1289*22dc650dSSadaf Ebrahimi ip the chain of include patterns
1290*22dc650dSSadaf Ebrahimi ep the chain of exclude patterns
1291*22dc650dSSadaf Ebrahimi
1292*22dc650dSSadaf Ebrahimi Returns: TRUE if the path is not excluded
1293*22dc650dSSadaf Ebrahimi */
1294*22dc650dSSadaf Ebrahimi
1295*22dc650dSSadaf Ebrahimi static BOOL
test_incexc(char * path,patstr * ip,patstr * ep)1296*22dc650dSSadaf Ebrahimi test_incexc(char *path, patstr *ip, patstr *ep)
1297*22dc650dSSadaf Ebrahimi {
1298*22dc650dSSadaf Ebrahimi int plen = strlen((const char *)path);
1299*22dc650dSSadaf Ebrahimi
1300*22dc650dSSadaf Ebrahimi for (; ep != NULL; ep = ep->next)
1301*22dc650dSSadaf Ebrahimi {
1302*22dc650dSSadaf Ebrahimi if (pcre2_match(ep->compiled, (PCRE2_SPTR)path, plen, 0, 0, match_data, NULL) >= 0)
1303*22dc650dSSadaf Ebrahimi return FALSE;
1304*22dc650dSSadaf Ebrahimi }
1305*22dc650dSSadaf Ebrahimi
1306*22dc650dSSadaf Ebrahimi if (ip == NULL) return TRUE;
1307*22dc650dSSadaf Ebrahimi
1308*22dc650dSSadaf Ebrahimi for (; ip != NULL; ip = ip->next)
1309*22dc650dSSadaf Ebrahimi {
1310*22dc650dSSadaf Ebrahimi if (pcre2_match(ip->compiled, (PCRE2_SPTR)path, plen, 0, 0, match_data, NULL) >= 0)
1311*22dc650dSSadaf Ebrahimi return TRUE;
1312*22dc650dSSadaf Ebrahimi }
1313*22dc650dSSadaf Ebrahimi
1314*22dc650dSSadaf Ebrahimi return FALSE;
1315*22dc650dSSadaf Ebrahimi }
1316*22dc650dSSadaf Ebrahimi
1317*22dc650dSSadaf Ebrahimi
1318*22dc650dSSadaf Ebrahimi
1319*22dc650dSSadaf Ebrahimi /*************************************************
1320*22dc650dSSadaf Ebrahimi * Decode integer argument value *
1321*22dc650dSSadaf Ebrahimi *************************************************/
1322*22dc650dSSadaf Ebrahimi
1323*22dc650dSSadaf Ebrahimi /* Integer arguments can be followed by K or M. Avoid the use of strtoul()
1324*22dc650dSSadaf Ebrahimi because SunOS4 doesn't have it. This is used only for unpicking arguments, so
1325*22dc650dSSadaf Ebrahimi just keep it simple.
1326*22dc650dSSadaf Ebrahimi
1327*22dc650dSSadaf Ebrahimi Arguments:
1328*22dc650dSSadaf Ebrahimi option_data the option data string
1329*22dc650dSSadaf Ebrahimi op the option item (for error messages)
1330*22dc650dSSadaf Ebrahimi longop TRUE if option given in long form
1331*22dc650dSSadaf Ebrahimi
1332*22dc650dSSadaf Ebrahimi Returns: a long integer
1333*22dc650dSSadaf Ebrahimi */
1334*22dc650dSSadaf Ebrahimi
1335*22dc650dSSadaf Ebrahimi static long int
decode_number(char * option_data,option_item * op,BOOL longop)1336*22dc650dSSadaf Ebrahimi decode_number(char *option_data, option_item *op, BOOL longop)
1337*22dc650dSSadaf Ebrahimi {
1338*22dc650dSSadaf Ebrahimi unsigned long int n = 0;
1339*22dc650dSSadaf Ebrahimi char *endptr = option_data;
1340*22dc650dSSadaf Ebrahimi while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
1341*22dc650dSSadaf Ebrahimi while (isdigit((unsigned char)(*endptr)))
1342*22dc650dSSadaf Ebrahimi n = n * 10 + (int)(*endptr++ - '0');
1343*22dc650dSSadaf Ebrahimi if (toupper(*endptr) == 'K')
1344*22dc650dSSadaf Ebrahimi {
1345*22dc650dSSadaf Ebrahimi n *= 1024;
1346*22dc650dSSadaf Ebrahimi endptr++;
1347*22dc650dSSadaf Ebrahimi }
1348*22dc650dSSadaf Ebrahimi else if (toupper(*endptr) == 'M')
1349*22dc650dSSadaf Ebrahimi {
1350*22dc650dSSadaf Ebrahimi n *= 1024*1024;
1351*22dc650dSSadaf Ebrahimi endptr++;
1352*22dc650dSSadaf Ebrahimi }
1353*22dc650dSSadaf Ebrahimi
1354*22dc650dSSadaf Ebrahimi if (*endptr != 0) /* Error */
1355*22dc650dSSadaf Ebrahimi {
1356*22dc650dSSadaf Ebrahimi if (longop)
1357*22dc650dSSadaf Ebrahimi {
1358*22dc650dSSadaf Ebrahimi char *equals = strchr(op->long_name, '=');
1359*22dc650dSSadaf Ebrahimi int nlen = (equals == NULL)? (int)strlen(op->long_name) :
1360*22dc650dSSadaf Ebrahimi (int)(equals - op->long_name);
1361*22dc650dSSadaf Ebrahimi fprintf(stderr, "pcre2grep: Malformed number \"%s\" after --%.*s\n",
1362*22dc650dSSadaf Ebrahimi option_data, nlen, op->long_name);
1363*22dc650dSSadaf Ebrahimi }
1364*22dc650dSSadaf Ebrahimi else
1365*22dc650dSSadaf Ebrahimi fprintf(stderr, "pcre2grep: Malformed number \"%s\" after -%c\n",
1366*22dc650dSSadaf Ebrahimi option_data, op->one_char);
1367*22dc650dSSadaf Ebrahimi pcre2grep_exit(usage(2));
1368*22dc650dSSadaf Ebrahimi }
1369*22dc650dSSadaf Ebrahimi
1370*22dc650dSSadaf Ebrahimi return n;
1371*22dc650dSSadaf Ebrahimi }
1372*22dc650dSSadaf Ebrahimi
1373*22dc650dSSadaf Ebrahimi
1374*22dc650dSSadaf Ebrahimi
1375*22dc650dSSadaf Ebrahimi /*************************************************
1376*22dc650dSSadaf Ebrahimi * Add item to a chain of numbers *
1377*22dc650dSSadaf Ebrahimi *************************************************/
1378*22dc650dSSadaf Ebrahimi
1379*22dc650dSSadaf Ebrahimi /* Used to add an item onto a chain, or just return an unconnected item if the
1380*22dc650dSSadaf Ebrahimi "after" argument is NULL.
1381*22dc650dSSadaf Ebrahimi
1382*22dc650dSSadaf Ebrahimi Arguments:
1383*22dc650dSSadaf Ebrahimi n the number to add
1384*22dc650dSSadaf Ebrahimi after if not NULL points to item to insert after
1385*22dc650dSSadaf Ebrahimi
1386*22dc650dSSadaf Ebrahimi Returns: new number block
1387*22dc650dSSadaf Ebrahimi */
1388*22dc650dSSadaf Ebrahimi
1389*22dc650dSSadaf Ebrahimi static omstr *
add_number(int n,omstr * after)1390*22dc650dSSadaf Ebrahimi add_number(int n, omstr *after)
1391*22dc650dSSadaf Ebrahimi {
1392*22dc650dSSadaf Ebrahimi omstr *om = (omstr *)malloc(sizeof(omstr));
1393*22dc650dSSadaf Ebrahimi
1394*22dc650dSSadaf Ebrahimi /* LCOV_EXCL_START - These lines won't be hit in normal testing. */
1395*22dc650dSSadaf Ebrahimi
1396*22dc650dSSadaf Ebrahimi if (om == NULL)
1397*22dc650dSSadaf Ebrahimi {
1398*22dc650dSSadaf Ebrahimi fprintf(stderr, "pcre2grep: malloc failed\n");
1399*22dc650dSSadaf Ebrahimi pcre2grep_exit(2);
1400*22dc650dSSadaf Ebrahimi }
1401*22dc650dSSadaf Ebrahimi
1402*22dc650dSSadaf Ebrahimi /* LCOV_EXCL_STOP */
1403*22dc650dSSadaf Ebrahimi
1404*22dc650dSSadaf Ebrahimi om->next = NULL;
1405*22dc650dSSadaf Ebrahimi om->groupnum = n;
1406*22dc650dSSadaf Ebrahimi
1407*22dc650dSSadaf Ebrahimi if (after != NULL)
1408*22dc650dSSadaf Ebrahimi {
1409*22dc650dSSadaf Ebrahimi om->next = after->next;
1410*22dc650dSSadaf Ebrahimi after->next = om;
1411*22dc650dSSadaf Ebrahimi }
1412*22dc650dSSadaf Ebrahimi return om;
1413*22dc650dSSadaf Ebrahimi }
1414*22dc650dSSadaf Ebrahimi
1415*22dc650dSSadaf Ebrahimi
1416*22dc650dSSadaf Ebrahimi
1417*22dc650dSSadaf Ebrahimi /*************************************************
1418*22dc650dSSadaf Ebrahimi * Read one line of input *
1419*22dc650dSSadaf Ebrahimi *************************************************/
1420*22dc650dSSadaf Ebrahimi
1421*22dc650dSSadaf Ebrahimi /* Normally, input that is to be scanned is read using fread() (or gzread, or
1422*22dc650dSSadaf Ebrahimi BZ2_read) into a large buffer, so many lines may be read at once. However,
1423*22dc650dSSadaf Ebrahimi doing this for tty input means that no output appears until a lot of input has
1424*22dc650dSSadaf Ebrahimi been typed. Instead, tty input is handled line by line. We cannot use fgets()
1425*22dc650dSSadaf Ebrahimi for this, because it does not stop at a binary zero, and therefore there is no
1426*22dc650dSSadaf Ebrahimi way of telling how many characters it has read, because there may be binary
1427*22dc650dSSadaf Ebrahimi zeros embedded in the data. This function is also used for reading patterns
1428*22dc650dSSadaf Ebrahimi from files (the -f option).
1429*22dc650dSSadaf Ebrahimi
1430*22dc650dSSadaf Ebrahimi Arguments:
1431*22dc650dSSadaf Ebrahimi buffer the buffer to read into
1432*22dc650dSSadaf Ebrahimi length the maximum number of characters to read
1433*22dc650dSSadaf Ebrahimi f the file
1434*22dc650dSSadaf Ebrahimi
1435*22dc650dSSadaf Ebrahimi Returns: the number of characters read, zero at end of file
1436*22dc650dSSadaf Ebrahimi */
1437*22dc650dSSadaf Ebrahimi
1438*22dc650dSSadaf Ebrahimi static PCRE2_SIZE
read_one_line(char * buffer,PCRE2_SIZE length,FILE * f)1439*22dc650dSSadaf Ebrahimi read_one_line(char *buffer, PCRE2_SIZE length, FILE *f)
1440*22dc650dSSadaf Ebrahimi {
1441*22dc650dSSadaf Ebrahimi int c;
1442*22dc650dSSadaf Ebrahimi PCRE2_SIZE yield = 0;
1443*22dc650dSSadaf Ebrahimi while ((c = fgetc(f)) != EOF)
1444*22dc650dSSadaf Ebrahimi {
1445*22dc650dSSadaf Ebrahimi buffer[yield++] = c;
1446*22dc650dSSadaf Ebrahimi if (c == '\n' || yield >= length) break;
1447*22dc650dSSadaf Ebrahimi }
1448*22dc650dSSadaf Ebrahimi return yield;
1449*22dc650dSSadaf Ebrahimi }
1450*22dc650dSSadaf Ebrahimi
1451*22dc650dSSadaf Ebrahimi
1452*22dc650dSSadaf Ebrahimi
1453*22dc650dSSadaf Ebrahimi /*************************************************
1454*22dc650dSSadaf Ebrahimi * Find end of line *
1455*22dc650dSSadaf Ebrahimi *************************************************/
1456*22dc650dSSadaf Ebrahimi
1457*22dc650dSSadaf Ebrahimi /* The length of the endline sequence that is found is set via lenptr. This may
1458*22dc650dSSadaf Ebrahimi be zero at the very end of the file if there is no line-ending sequence there.
1459*22dc650dSSadaf Ebrahimi
1460*22dc650dSSadaf Ebrahimi Arguments:
1461*22dc650dSSadaf Ebrahimi p current position in line
1462*22dc650dSSadaf Ebrahimi endptr end of available data
1463*22dc650dSSadaf Ebrahimi lenptr where to put the length of the eol sequence
1464*22dc650dSSadaf Ebrahimi
1465*22dc650dSSadaf Ebrahimi Returns: pointer after the last byte of the line,
1466*22dc650dSSadaf Ebrahimi including the newline byte(s)
1467*22dc650dSSadaf Ebrahimi */
1468*22dc650dSSadaf Ebrahimi
1469*22dc650dSSadaf Ebrahimi static char *
end_of_line(char * p,char * endptr,int * lenptr)1470*22dc650dSSadaf Ebrahimi end_of_line(char *p, char *endptr, int *lenptr)
1471*22dc650dSSadaf Ebrahimi {
1472*22dc650dSSadaf Ebrahimi switch(endlinetype)
1473*22dc650dSSadaf Ebrahimi {
1474*22dc650dSSadaf Ebrahimi default: /* Just in case */
1475*22dc650dSSadaf Ebrahimi case PCRE2_NEWLINE_LF:
1476*22dc650dSSadaf Ebrahimi while (p < endptr && *p != '\n') p++;
1477*22dc650dSSadaf Ebrahimi if (p < endptr)
1478*22dc650dSSadaf Ebrahimi {
1479*22dc650dSSadaf Ebrahimi *lenptr = 1;
1480*22dc650dSSadaf Ebrahimi return p + 1;
1481*22dc650dSSadaf Ebrahimi }
1482*22dc650dSSadaf Ebrahimi *lenptr = 0;
1483*22dc650dSSadaf Ebrahimi return endptr;
1484*22dc650dSSadaf Ebrahimi
1485*22dc650dSSadaf Ebrahimi case PCRE2_NEWLINE_CR:
1486*22dc650dSSadaf Ebrahimi while (p < endptr && *p != '\r') p++;
1487*22dc650dSSadaf Ebrahimi if (p < endptr)
1488*22dc650dSSadaf Ebrahimi {
1489*22dc650dSSadaf Ebrahimi *lenptr = 1;
1490*22dc650dSSadaf Ebrahimi return p + 1;
1491*22dc650dSSadaf Ebrahimi }
1492*22dc650dSSadaf Ebrahimi *lenptr = 0;
1493*22dc650dSSadaf Ebrahimi return endptr;
1494*22dc650dSSadaf Ebrahimi
1495*22dc650dSSadaf Ebrahimi case PCRE2_NEWLINE_NUL:
1496*22dc650dSSadaf Ebrahimi while (p < endptr && *p != '\0') p++;
1497*22dc650dSSadaf Ebrahimi if (p < endptr)
1498*22dc650dSSadaf Ebrahimi {
1499*22dc650dSSadaf Ebrahimi *lenptr = 1;
1500*22dc650dSSadaf Ebrahimi return p + 1;
1501*22dc650dSSadaf Ebrahimi }
1502*22dc650dSSadaf Ebrahimi *lenptr = 0;
1503*22dc650dSSadaf Ebrahimi return endptr;
1504*22dc650dSSadaf Ebrahimi
1505*22dc650dSSadaf Ebrahimi case PCRE2_NEWLINE_CRLF:
1506*22dc650dSSadaf Ebrahimi for (;;)
1507*22dc650dSSadaf Ebrahimi {
1508*22dc650dSSadaf Ebrahimi while (p < endptr && *p != '\r') p++;
1509*22dc650dSSadaf Ebrahimi if (++p >= endptr)
1510*22dc650dSSadaf Ebrahimi {
1511*22dc650dSSadaf Ebrahimi *lenptr = 0;
1512*22dc650dSSadaf Ebrahimi return endptr;
1513*22dc650dSSadaf Ebrahimi }
1514*22dc650dSSadaf Ebrahimi if (*p == '\n')
1515*22dc650dSSadaf Ebrahimi {
1516*22dc650dSSadaf Ebrahimi *lenptr = 2;
1517*22dc650dSSadaf Ebrahimi return p + 1;
1518*22dc650dSSadaf Ebrahimi }
1519*22dc650dSSadaf Ebrahimi }
1520*22dc650dSSadaf Ebrahimi break;
1521*22dc650dSSadaf Ebrahimi
1522*22dc650dSSadaf Ebrahimi case PCRE2_NEWLINE_ANYCRLF:
1523*22dc650dSSadaf Ebrahimi while (p < endptr)
1524*22dc650dSSadaf Ebrahimi {
1525*22dc650dSSadaf Ebrahimi int extra = 0;
1526*22dc650dSSadaf Ebrahimi int c = *((unsigned char *)p);
1527*22dc650dSSadaf Ebrahimi
1528*22dc650dSSadaf Ebrahimi if (utf && c >= 0xc0)
1529*22dc650dSSadaf Ebrahimi {
1530*22dc650dSSadaf Ebrahimi int gcii, gcss;
1531*22dc650dSSadaf Ebrahimi extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
1532*22dc650dSSadaf Ebrahimi gcss = 6*extra;
1533*22dc650dSSadaf Ebrahimi c = (c & utf8_table3[extra]) << gcss;
1534*22dc650dSSadaf Ebrahimi for (gcii = 1; gcii <= extra; gcii++)
1535*22dc650dSSadaf Ebrahimi {
1536*22dc650dSSadaf Ebrahimi gcss -= 6;
1537*22dc650dSSadaf Ebrahimi c |= (p[gcii] & 0x3f) << gcss;
1538*22dc650dSSadaf Ebrahimi }
1539*22dc650dSSadaf Ebrahimi }
1540*22dc650dSSadaf Ebrahimi
1541*22dc650dSSadaf Ebrahimi p += 1 + extra;
1542*22dc650dSSadaf Ebrahimi
1543*22dc650dSSadaf Ebrahimi switch (c)
1544*22dc650dSSadaf Ebrahimi {
1545*22dc650dSSadaf Ebrahimi case '\n':
1546*22dc650dSSadaf Ebrahimi *lenptr = 1;
1547*22dc650dSSadaf Ebrahimi return p;
1548*22dc650dSSadaf Ebrahimi
1549*22dc650dSSadaf Ebrahimi case '\r':
1550*22dc650dSSadaf Ebrahimi if (p < endptr && *p == '\n')
1551*22dc650dSSadaf Ebrahimi {
1552*22dc650dSSadaf Ebrahimi *lenptr = 2;
1553*22dc650dSSadaf Ebrahimi p++;
1554*22dc650dSSadaf Ebrahimi }
1555*22dc650dSSadaf Ebrahimi else *lenptr = 1;
1556*22dc650dSSadaf Ebrahimi return p;
1557*22dc650dSSadaf Ebrahimi
1558*22dc650dSSadaf Ebrahimi default:
1559*22dc650dSSadaf Ebrahimi break;
1560*22dc650dSSadaf Ebrahimi }
1561*22dc650dSSadaf Ebrahimi } /* End of loop for ANYCRLF case */
1562*22dc650dSSadaf Ebrahimi
1563*22dc650dSSadaf Ebrahimi *lenptr = 0; /* Must have hit the end */
1564*22dc650dSSadaf Ebrahimi return endptr;
1565*22dc650dSSadaf Ebrahimi
1566*22dc650dSSadaf Ebrahimi case PCRE2_NEWLINE_ANY:
1567*22dc650dSSadaf Ebrahimi while (p < endptr)
1568*22dc650dSSadaf Ebrahimi {
1569*22dc650dSSadaf Ebrahimi int extra = 0;
1570*22dc650dSSadaf Ebrahimi int c = *((unsigned char *)p);
1571*22dc650dSSadaf Ebrahimi
1572*22dc650dSSadaf Ebrahimi if (utf && c >= 0xc0)
1573*22dc650dSSadaf Ebrahimi {
1574*22dc650dSSadaf Ebrahimi int gcii, gcss;
1575*22dc650dSSadaf Ebrahimi extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
1576*22dc650dSSadaf Ebrahimi gcss = 6*extra;
1577*22dc650dSSadaf Ebrahimi c = (c & utf8_table3[extra]) << gcss;
1578*22dc650dSSadaf Ebrahimi for (gcii = 1; gcii <= extra; gcii++)
1579*22dc650dSSadaf Ebrahimi {
1580*22dc650dSSadaf Ebrahimi gcss -= 6;
1581*22dc650dSSadaf Ebrahimi c |= (p[gcii] & 0x3f) << gcss;
1582*22dc650dSSadaf Ebrahimi }
1583*22dc650dSSadaf Ebrahimi }
1584*22dc650dSSadaf Ebrahimi
1585*22dc650dSSadaf Ebrahimi p += 1 + extra;
1586*22dc650dSSadaf Ebrahimi
1587*22dc650dSSadaf Ebrahimi switch (c)
1588*22dc650dSSadaf Ebrahimi {
1589*22dc650dSSadaf Ebrahimi case '\n': /* LF */
1590*22dc650dSSadaf Ebrahimi case '\v': /* VT */
1591*22dc650dSSadaf Ebrahimi case '\f': /* FF */
1592*22dc650dSSadaf Ebrahimi *lenptr = 1;
1593*22dc650dSSadaf Ebrahimi return p;
1594*22dc650dSSadaf Ebrahimi
1595*22dc650dSSadaf Ebrahimi case '\r': /* CR */
1596*22dc650dSSadaf Ebrahimi if (p < endptr && *p == '\n')
1597*22dc650dSSadaf Ebrahimi {
1598*22dc650dSSadaf Ebrahimi *lenptr = 2;
1599*22dc650dSSadaf Ebrahimi p++;
1600*22dc650dSSadaf Ebrahimi }
1601*22dc650dSSadaf Ebrahimi else *lenptr = 1;
1602*22dc650dSSadaf Ebrahimi return p;
1603*22dc650dSSadaf Ebrahimi
1604*22dc650dSSadaf Ebrahimi #ifndef EBCDIC
1605*22dc650dSSadaf Ebrahimi case 0x85: /* Unicode NEL */
1606*22dc650dSSadaf Ebrahimi *lenptr = utf? 2 : 1;
1607*22dc650dSSadaf Ebrahimi return p;
1608*22dc650dSSadaf Ebrahimi
1609*22dc650dSSadaf Ebrahimi case 0x2028: /* Unicode LS */
1610*22dc650dSSadaf Ebrahimi case 0x2029: /* Unicode PS */
1611*22dc650dSSadaf Ebrahimi *lenptr = 3;
1612*22dc650dSSadaf Ebrahimi return p;
1613*22dc650dSSadaf Ebrahimi #endif /* Not EBCDIC */
1614*22dc650dSSadaf Ebrahimi
1615*22dc650dSSadaf Ebrahimi default:
1616*22dc650dSSadaf Ebrahimi break;
1617*22dc650dSSadaf Ebrahimi }
1618*22dc650dSSadaf Ebrahimi } /* End of loop for ANY case */
1619*22dc650dSSadaf Ebrahimi
1620*22dc650dSSadaf Ebrahimi *lenptr = 0; /* Must have hit the end */
1621*22dc650dSSadaf Ebrahimi return endptr;
1622*22dc650dSSadaf Ebrahimi } /* End of overall switch */
1623*22dc650dSSadaf Ebrahimi }
1624*22dc650dSSadaf Ebrahimi
1625*22dc650dSSadaf Ebrahimi
1626*22dc650dSSadaf Ebrahimi
1627*22dc650dSSadaf Ebrahimi /*************************************************
1628*22dc650dSSadaf Ebrahimi * Find start of previous line *
1629*22dc650dSSadaf Ebrahimi *************************************************/
1630*22dc650dSSadaf Ebrahimi
1631*22dc650dSSadaf Ebrahimi /* This is called when looking back for before lines to print.
1632*22dc650dSSadaf Ebrahimi
1633*22dc650dSSadaf Ebrahimi Arguments:
1634*22dc650dSSadaf Ebrahimi p start of the subsequent line
1635*22dc650dSSadaf Ebrahimi startptr start of available data
1636*22dc650dSSadaf Ebrahimi
1637*22dc650dSSadaf Ebrahimi Returns: pointer to the start of the previous line
1638*22dc650dSSadaf Ebrahimi */
1639*22dc650dSSadaf Ebrahimi
1640*22dc650dSSadaf Ebrahimi static char *
previous_line(char * p,char * startptr)1641*22dc650dSSadaf Ebrahimi previous_line(char *p, char *startptr)
1642*22dc650dSSadaf Ebrahimi {
1643*22dc650dSSadaf Ebrahimi switch(endlinetype)
1644*22dc650dSSadaf Ebrahimi {
1645*22dc650dSSadaf Ebrahimi default: /* Just in case */
1646*22dc650dSSadaf Ebrahimi case PCRE2_NEWLINE_LF:
1647*22dc650dSSadaf Ebrahimi p--;
1648*22dc650dSSadaf Ebrahimi while (p > startptr && p[-1] != '\n') p--;
1649*22dc650dSSadaf Ebrahimi return p;
1650*22dc650dSSadaf Ebrahimi
1651*22dc650dSSadaf Ebrahimi case PCRE2_NEWLINE_CR:
1652*22dc650dSSadaf Ebrahimi p--;
1653*22dc650dSSadaf Ebrahimi while (p > startptr && p[-1] != '\n') p--;
1654*22dc650dSSadaf Ebrahimi return p;
1655*22dc650dSSadaf Ebrahimi
1656*22dc650dSSadaf Ebrahimi case PCRE2_NEWLINE_NUL:
1657*22dc650dSSadaf Ebrahimi p--;
1658*22dc650dSSadaf Ebrahimi while (p > startptr && p[-1] != '\0') p--;
1659*22dc650dSSadaf Ebrahimi return p;
1660*22dc650dSSadaf Ebrahimi
1661*22dc650dSSadaf Ebrahimi case PCRE2_NEWLINE_CRLF:
1662*22dc650dSSadaf Ebrahimi for (;;)
1663*22dc650dSSadaf Ebrahimi {
1664*22dc650dSSadaf Ebrahimi p -= 2;
1665*22dc650dSSadaf Ebrahimi while (p > startptr && p[-1] != '\n') p--;
1666*22dc650dSSadaf Ebrahimi if (p <= startptr + 1 || p[-2] == '\r') return p;
1667*22dc650dSSadaf Ebrahimi }
1668*22dc650dSSadaf Ebrahimi /* Control can never get here */
1669*22dc650dSSadaf Ebrahimi
1670*22dc650dSSadaf Ebrahimi case PCRE2_NEWLINE_ANY:
1671*22dc650dSSadaf Ebrahimi case PCRE2_NEWLINE_ANYCRLF:
1672*22dc650dSSadaf Ebrahimi if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
1673*22dc650dSSadaf Ebrahimi if (utf) while ((*p & 0xc0) == 0x80) p--;
1674*22dc650dSSadaf Ebrahimi
1675*22dc650dSSadaf Ebrahimi while (p > startptr)
1676*22dc650dSSadaf Ebrahimi {
1677*22dc650dSSadaf Ebrahimi unsigned int c;
1678*22dc650dSSadaf Ebrahimi char *pp = p - 1;
1679*22dc650dSSadaf Ebrahimi
1680*22dc650dSSadaf Ebrahimi if (utf)
1681*22dc650dSSadaf Ebrahimi {
1682*22dc650dSSadaf Ebrahimi int extra = 0;
1683*22dc650dSSadaf Ebrahimi while ((*pp & 0xc0) == 0x80) pp--;
1684*22dc650dSSadaf Ebrahimi c = *((unsigned char *)pp);
1685*22dc650dSSadaf Ebrahimi if (c >= 0xc0)
1686*22dc650dSSadaf Ebrahimi {
1687*22dc650dSSadaf Ebrahimi int gcii, gcss;
1688*22dc650dSSadaf Ebrahimi extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
1689*22dc650dSSadaf Ebrahimi gcss = 6*extra;
1690*22dc650dSSadaf Ebrahimi c = (c & utf8_table3[extra]) << gcss;
1691*22dc650dSSadaf Ebrahimi for (gcii = 1; gcii <= extra; gcii++)
1692*22dc650dSSadaf Ebrahimi {
1693*22dc650dSSadaf Ebrahimi gcss -= 6;
1694*22dc650dSSadaf Ebrahimi c |= (pp[gcii] & 0x3f) << gcss;
1695*22dc650dSSadaf Ebrahimi }
1696*22dc650dSSadaf Ebrahimi }
1697*22dc650dSSadaf Ebrahimi }
1698*22dc650dSSadaf Ebrahimi else c = *((unsigned char *)pp);
1699*22dc650dSSadaf Ebrahimi
1700*22dc650dSSadaf Ebrahimi if (endlinetype == PCRE2_NEWLINE_ANYCRLF) switch (c)
1701*22dc650dSSadaf Ebrahimi {
1702*22dc650dSSadaf Ebrahimi case '\n': /* LF */
1703*22dc650dSSadaf Ebrahimi case '\r': /* CR */
1704*22dc650dSSadaf Ebrahimi return p;
1705*22dc650dSSadaf Ebrahimi
1706*22dc650dSSadaf Ebrahimi default:
1707*22dc650dSSadaf Ebrahimi break;
1708*22dc650dSSadaf Ebrahimi }
1709*22dc650dSSadaf Ebrahimi
1710*22dc650dSSadaf Ebrahimi else switch (c)
1711*22dc650dSSadaf Ebrahimi {
1712*22dc650dSSadaf Ebrahimi case '\n': /* LF */
1713*22dc650dSSadaf Ebrahimi case '\v': /* VT */
1714*22dc650dSSadaf Ebrahimi case '\f': /* FF */
1715*22dc650dSSadaf Ebrahimi case '\r': /* CR */
1716*22dc650dSSadaf Ebrahimi #ifndef EBCDIC
1717*22dc650dSSadaf Ebrahimi case 0x85: /* Unicode NEL */
1718*22dc650dSSadaf Ebrahimi case 0x2028: /* Unicode LS */
1719*22dc650dSSadaf Ebrahimi case 0x2029: /* Unicode PS */
1720*22dc650dSSadaf Ebrahimi #endif /* Not EBCDIC */
1721*22dc650dSSadaf Ebrahimi return p;
1722*22dc650dSSadaf Ebrahimi
1723*22dc650dSSadaf Ebrahimi default:
1724*22dc650dSSadaf Ebrahimi break;
1725*22dc650dSSadaf Ebrahimi }
1726*22dc650dSSadaf Ebrahimi
1727*22dc650dSSadaf Ebrahimi p = pp; /* Back one character */
1728*22dc650dSSadaf Ebrahimi } /* End of loop for ANY case */
1729*22dc650dSSadaf Ebrahimi
1730*22dc650dSSadaf Ebrahimi return startptr; /* Hit start of data */
1731*22dc650dSSadaf Ebrahimi } /* End of overall switch */
1732*22dc650dSSadaf Ebrahimi }
1733*22dc650dSSadaf Ebrahimi
1734*22dc650dSSadaf Ebrahimi
1735*22dc650dSSadaf Ebrahimi
1736*22dc650dSSadaf Ebrahimi /*************************************************
1737*22dc650dSSadaf Ebrahimi * Output newline at end *
1738*22dc650dSSadaf Ebrahimi *************************************************/
1739*22dc650dSSadaf Ebrahimi
1740*22dc650dSSadaf Ebrahimi /* This function is called if the final line of a file has been written to
1741*22dc650dSSadaf Ebrahimi stdout, but it does not have a terminating newline.
1742*22dc650dSSadaf Ebrahimi
1743*22dc650dSSadaf Ebrahimi Arguments: none
1744*22dc650dSSadaf Ebrahimi Returns: nothing
1745*22dc650dSSadaf Ebrahimi */
1746*22dc650dSSadaf Ebrahimi
1747*22dc650dSSadaf Ebrahimi static void
write_final_newline(void)1748*22dc650dSSadaf Ebrahimi write_final_newline(void)
1749*22dc650dSSadaf Ebrahimi {
1750*22dc650dSSadaf Ebrahimi switch(endlinetype)
1751*22dc650dSSadaf Ebrahimi {
1752*22dc650dSSadaf Ebrahimi default: /* Just in case */
1753*22dc650dSSadaf Ebrahimi case PCRE2_NEWLINE_LF:
1754*22dc650dSSadaf Ebrahimi case PCRE2_NEWLINE_ANY:
1755*22dc650dSSadaf Ebrahimi case PCRE2_NEWLINE_ANYCRLF:
1756*22dc650dSSadaf Ebrahimi fprintf(stdout, "\n");
1757*22dc650dSSadaf Ebrahimi break;
1758*22dc650dSSadaf Ebrahimi
1759*22dc650dSSadaf Ebrahimi case PCRE2_NEWLINE_CR:
1760*22dc650dSSadaf Ebrahimi fprintf(stdout, "\r");
1761*22dc650dSSadaf Ebrahimi break;
1762*22dc650dSSadaf Ebrahimi
1763*22dc650dSSadaf Ebrahimi case PCRE2_NEWLINE_CRLF:
1764*22dc650dSSadaf Ebrahimi fprintf(stdout, "\r\n");
1765*22dc650dSSadaf Ebrahimi break;
1766*22dc650dSSadaf Ebrahimi
1767*22dc650dSSadaf Ebrahimi case PCRE2_NEWLINE_NUL:
1768*22dc650dSSadaf Ebrahimi fprintf(stdout, "%c", 0);
1769*22dc650dSSadaf Ebrahimi break;
1770*22dc650dSSadaf Ebrahimi }
1771*22dc650dSSadaf Ebrahimi }
1772*22dc650dSSadaf Ebrahimi
1773*22dc650dSSadaf Ebrahimi
1774*22dc650dSSadaf Ebrahimi /*************************************************
1775*22dc650dSSadaf Ebrahimi * Print the previous "after" lines *
1776*22dc650dSSadaf Ebrahimi *************************************************/
1777*22dc650dSSadaf Ebrahimi
1778*22dc650dSSadaf Ebrahimi /* This is called if we are about to lose said lines because of buffer filling,
1779*22dc650dSSadaf Ebrahimi and at the end of the file. The data in the line is written using fwrite() so
1780*22dc650dSSadaf Ebrahimi that a binary zero does not terminate it.
1781*22dc650dSSadaf Ebrahimi
1782*22dc650dSSadaf Ebrahimi Arguments:
1783*22dc650dSSadaf Ebrahimi lastmatchnumber the number of the last matching line, plus one
1784*22dc650dSSadaf Ebrahimi lastmatchrestart where we restarted after the last match
1785*22dc650dSSadaf Ebrahimi endptr end of available data
1786*22dc650dSSadaf Ebrahimi printname filename for printing
1787*22dc650dSSadaf Ebrahimi
1788*22dc650dSSadaf Ebrahimi Returns: nothing
1789*22dc650dSSadaf Ebrahimi */
1790*22dc650dSSadaf Ebrahimi
1791*22dc650dSSadaf Ebrahimi static void
do_after_lines(unsigned long int lastmatchnumber,char * lastmatchrestart,char * endptr,const char * printname)1792*22dc650dSSadaf Ebrahimi do_after_lines(unsigned long int lastmatchnumber, char *lastmatchrestart,
1793*22dc650dSSadaf Ebrahimi char *endptr, const char *printname)
1794*22dc650dSSadaf Ebrahimi {
1795*22dc650dSSadaf Ebrahimi if (after_context > 0 && lastmatchnumber > 0)
1796*22dc650dSSadaf Ebrahimi {
1797*22dc650dSSadaf Ebrahimi int count = 0;
1798*22dc650dSSadaf Ebrahimi int ellength = 0;
1799*22dc650dSSadaf Ebrahimi while (lastmatchrestart < endptr && count < after_context)
1800*22dc650dSSadaf Ebrahimi {
1801*22dc650dSSadaf Ebrahimi char *pp = end_of_line(lastmatchrestart, endptr, &ellength);
1802*22dc650dSSadaf Ebrahimi if (ellength == 0 && pp == main_buffer + bufsize) break;
1803*22dc650dSSadaf Ebrahimi if (printname != NULL) fprintf(stdout, "%s%c", printname, printname_hyphen);
1804*22dc650dSSadaf Ebrahimi if (number) fprintf(stdout, "%lu-", lastmatchnumber++);
1805*22dc650dSSadaf Ebrahimi FWRITE_IGNORE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1806*22dc650dSSadaf Ebrahimi lastmatchrestart = pp;
1807*22dc650dSSadaf Ebrahimi count++;
1808*22dc650dSSadaf Ebrahimi }
1809*22dc650dSSadaf Ebrahimi
1810*22dc650dSSadaf Ebrahimi /* If we have printed any lines, arrange for a hyphen separator if anything
1811*22dc650dSSadaf Ebrahimi else follows. Also, if the last line is the final line in the file and it had
1812*22dc650dSSadaf Ebrahimi no newline, add one. */
1813*22dc650dSSadaf Ebrahimi
1814*22dc650dSSadaf Ebrahimi if (count > 0)
1815*22dc650dSSadaf Ebrahimi {
1816*22dc650dSSadaf Ebrahimi hyphenpending = TRUE;
1817*22dc650dSSadaf Ebrahimi if (ellength == 0 && lastmatchrestart >= endptr)
1818*22dc650dSSadaf Ebrahimi write_final_newline();
1819*22dc650dSSadaf Ebrahimi }
1820*22dc650dSSadaf Ebrahimi }
1821*22dc650dSSadaf Ebrahimi }
1822*22dc650dSSadaf Ebrahimi
1823*22dc650dSSadaf Ebrahimi
1824*22dc650dSSadaf Ebrahimi
1825*22dc650dSSadaf Ebrahimi /*************************************************
1826*22dc650dSSadaf Ebrahimi * Apply patterns to subject till one matches *
1827*22dc650dSSadaf Ebrahimi *************************************************/
1828*22dc650dSSadaf Ebrahimi
1829*22dc650dSSadaf Ebrahimi /* This function is called to run through all the patterns, looking for a
1830*22dc650dSSadaf Ebrahimi match. When all possible matches are required, for example, for colouring, it
1831*22dc650dSSadaf Ebrahimi checks all patterns for matching, and returns the earliest match. Otherwise, it
1832*22dc650dSSadaf Ebrahimi returns the first pattern that has matched.
1833*22dc650dSSadaf Ebrahimi
1834*22dc650dSSadaf Ebrahimi Arguments:
1835*22dc650dSSadaf Ebrahimi matchptr the start of the subject
1836*22dc650dSSadaf Ebrahimi length the length of the subject to match
1837*22dc650dSSadaf Ebrahimi options options for pcre2_match
1838*22dc650dSSadaf Ebrahimi startoffset where to start matching
1839*22dc650dSSadaf Ebrahimi mrc address of where to put the result of pcre2_match()
1840*22dc650dSSadaf Ebrahimi
1841*22dc650dSSadaf Ebrahimi Returns: TRUE if there was a match, match_data and offsets are set
1842*22dc650dSSadaf Ebrahimi FALSE if there was no match (but no errors)
1843*22dc650dSSadaf Ebrahimi invert if there was a non-fatal error
1844*22dc650dSSadaf Ebrahimi */
1845*22dc650dSSadaf Ebrahimi
1846*22dc650dSSadaf Ebrahimi static BOOL
match_patterns(char * matchptr,PCRE2_SIZE length,unsigned int options,PCRE2_SIZE startoffset,int * mrc)1847*22dc650dSSadaf Ebrahimi match_patterns(char *matchptr, PCRE2_SIZE length, unsigned int options,
1848*22dc650dSSadaf Ebrahimi PCRE2_SIZE startoffset, int *mrc)
1849*22dc650dSSadaf Ebrahimi {
1850*22dc650dSSadaf Ebrahimi PCRE2_SIZE slen = length;
1851*22dc650dSSadaf Ebrahimi int first = -1;
1852*22dc650dSSadaf Ebrahimi int firstrc = 0;
1853*22dc650dSSadaf Ebrahimi patstr *p = patterns;
1854*22dc650dSSadaf Ebrahimi const char *msg = "this text:\n\n";
1855*22dc650dSSadaf Ebrahimi
1856*22dc650dSSadaf Ebrahimi if (slen > 200)
1857*22dc650dSSadaf Ebrahimi {
1858*22dc650dSSadaf Ebrahimi slen = 200;
1859*22dc650dSSadaf Ebrahimi msg = "text that starts:\n\n";
1860*22dc650dSSadaf Ebrahimi }
1861*22dc650dSSadaf Ebrahimi
1862*22dc650dSSadaf Ebrahimi for (int i = 1; p != NULL; p = p->next, i++)
1863*22dc650dSSadaf Ebrahimi {
1864*22dc650dSSadaf Ebrahimi int rc = pcre2_match(p->compiled, (PCRE2_SPTR)matchptr, length,
1865*22dc650dSSadaf Ebrahimi startoffset, options, match_data, match_context);
1866*22dc650dSSadaf Ebrahimi if (rc == PCRE2_ERROR_NOMATCH) continue;
1867*22dc650dSSadaf Ebrahimi
1868*22dc650dSSadaf Ebrahimi /* Handle a successful match. When all_matches is false, we are done.
1869*22dc650dSSadaf Ebrahimi Otherwise we must save the earliest match. */
1870*22dc650dSSadaf Ebrahimi
1871*22dc650dSSadaf Ebrahimi if (rc >= 0)
1872*22dc650dSSadaf Ebrahimi {
1873*22dc650dSSadaf Ebrahimi if (!all_matches)
1874*22dc650dSSadaf Ebrahimi {
1875*22dc650dSSadaf Ebrahimi *mrc = rc;
1876*22dc650dSSadaf Ebrahimi return TRUE;
1877*22dc650dSSadaf Ebrahimi }
1878*22dc650dSSadaf Ebrahimi
1879*22dc650dSSadaf Ebrahimi if (first < 0 || offsets[0] < offsets_pair[first][0] ||
1880*22dc650dSSadaf Ebrahimi (offsets[0] == offsets_pair[first][0] &&
1881*22dc650dSSadaf Ebrahimi offsets[1] > offsets_pair[first][1]))
1882*22dc650dSSadaf Ebrahimi {
1883*22dc650dSSadaf Ebrahimi first = match_data_toggle;
1884*22dc650dSSadaf Ebrahimi firstrc = rc;
1885*22dc650dSSadaf Ebrahimi match_data_toggle ^= 1;
1886*22dc650dSSadaf Ebrahimi match_data = match_data_pair[match_data_toggle];
1887*22dc650dSSadaf Ebrahimi offsets = offsets_pair[match_data_toggle];
1888*22dc650dSSadaf Ebrahimi }
1889*22dc650dSSadaf Ebrahimi continue;
1890*22dc650dSSadaf Ebrahimi }
1891*22dc650dSSadaf Ebrahimi
1892*22dc650dSSadaf Ebrahimi /* Deal with PCRE2 error. */
1893*22dc650dSSadaf Ebrahimi
1894*22dc650dSSadaf Ebrahimi fprintf(stderr, "pcre2grep: pcre2_match() gave error %d while matching ", rc);
1895*22dc650dSSadaf Ebrahimi if (patterns->next != NULL) fprintf(stderr, "pattern number %d to ", i);
1896*22dc650dSSadaf Ebrahimi fprintf(stderr, "%s", msg);
1897*22dc650dSSadaf Ebrahimi FWRITE_IGNORE(matchptr, 1, slen, stderr); /* In case binary zero included */
1898*22dc650dSSadaf Ebrahimi fprintf(stderr, "\n\n");
1899*22dc650dSSadaf Ebrahimi if (rc <= PCRE2_ERROR_UTF8_ERR1 &&
1900*22dc650dSSadaf Ebrahimi rc >= PCRE2_ERROR_UTF8_ERR21)
1901*22dc650dSSadaf Ebrahimi {
1902*22dc650dSSadaf Ebrahimi unsigned char mbuffer[256];
1903*22dc650dSSadaf Ebrahimi PCRE2_SIZE startchar = pcre2_get_startchar(match_data);
1904*22dc650dSSadaf Ebrahimi (void)pcre2_get_error_message(rc, mbuffer, sizeof(mbuffer));
1905*22dc650dSSadaf Ebrahimi fprintf(stderr, "%s at offset %" SIZ_FORM "\n\n", mbuffer, startchar);
1906*22dc650dSSadaf Ebrahimi }
1907*22dc650dSSadaf Ebrahimi if (rc == PCRE2_ERROR_MATCHLIMIT || rc == PCRE2_ERROR_DEPTHLIMIT ||
1908*22dc650dSSadaf Ebrahimi rc == PCRE2_ERROR_HEAPLIMIT || rc == PCRE2_ERROR_JIT_STACKLIMIT)
1909*22dc650dSSadaf Ebrahimi resource_error = TRUE;
1910*22dc650dSSadaf Ebrahimi if (error_count++ > 20)
1911*22dc650dSSadaf Ebrahimi {
1912*22dc650dSSadaf Ebrahimi fprintf(stderr, "pcre2grep: Too many errors - abandoned.\n");
1913*22dc650dSSadaf Ebrahimi pcre2grep_exit(2);
1914*22dc650dSSadaf Ebrahimi }
1915*22dc650dSSadaf Ebrahimi return invert; /* No more matching; don't show the line again */
1916*22dc650dSSadaf Ebrahimi }
1917*22dc650dSSadaf Ebrahimi
1918*22dc650dSSadaf Ebrahimi /* We get here when all patterns have been tried. If all_matches is false,
1919*22dc650dSSadaf Ebrahimi this means that none of them matched. If all_matches is true, matched_first
1920*22dc650dSSadaf Ebrahimi will be non-NULL if there was at least one match, and it will point to the
1921*22dc650dSSadaf Ebrahimi appropriate match_data block. */
1922*22dc650dSSadaf Ebrahimi
1923*22dc650dSSadaf Ebrahimi if (!all_matches || first < 0) return FALSE;
1924*22dc650dSSadaf Ebrahimi
1925*22dc650dSSadaf Ebrahimi match_data_toggle = first;
1926*22dc650dSSadaf Ebrahimi match_data = match_data_pair[first];
1927*22dc650dSSadaf Ebrahimi offsets = offsets_pair[first];
1928*22dc650dSSadaf Ebrahimi *mrc = firstrc;
1929*22dc650dSSadaf Ebrahimi return TRUE;
1930*22dc650dSSadaf Ebrahimi }
1931*22dc650dSSadaf Ebrahimi
1932*22dc650dSSadaf Ebrahimi
1933*22dc650dSSadaf Ebrahimi
1934*22dc650dSSadaf Ebrahimi /*************************************************
1935*22dc650dSSadaf Ebrahimi * Decode dollar escape sequence *
1936*22dc650dSSadaf Ebrahimi *************************************************/
1937*22dc650dSSadaf Ebrahimi
1938*22dc650dSSadaf Ebrahimi /* Called from various places to decode $ escapes in output strings. The escape
1939*22dc650dSSadaf Ebrahimi sequences are as follows:
1940*22dc650dSSadaf Ebrahimi
1941*22dc650dSSadaf Ebrahimi $<digits> or ${<digits>} returns a capture number. However, if callout is TRUE,
1942*22dc650dSSadaf Ebrahimi zero is never returned; '0' is substituted.
1943*22dc650dSSadaf Ebrahimi
1944*22dc650dSSadaf Ebrahimi $a returns bell.
1945*22dc650dSSadaf Ebrahimi $b returns backspace.
1946*22dc650dSSadaf Ebrahimi $e returns escape.
1947*22dc650dSSadaf Ebrahimi $f returns form feed.
1948*22dc650dSSadaf Ebrahimi $n returns newline.
1949*22dc650dSSadaf Ebrahimi $r returns carriage return.
1950*22dc650dSSadaf Ebrahimi $t returns tab.
1951*22dc650dSSadaf Ebrahimi $v returns vertical tab.
1952*22dc650dSSadaf Ebrahimi $o<digits> returns the character represented by the given octal
1953*22dc650dSSadaf Ebrahimi number; up to three digits are processed.
1954*22dc650dSSadaf Ebrahimi $o{<digits>} does the same, up to 7 digits, but gives an error for mode-invalid
1955*22dc650dSSadaf Ebrahimi code points.
1956*22dc650dSSadaf Ebrahimi $x<digits> returns the character represented by the given hexadecimal
1957*22dc650dSSadaf Ebrahimi number; up to two digits are processed.
1958*22dc650dSSadaf Ebrahimi $x{<digits} does the same, up to 6 digits, but gives an error for mode-invalid
1959*22dc650dSSadaf Ebrahimi code points.
1960*22dc650dSSadaf Ebrahimi Any other character is substituted by itself. E.g: $$ is replaced by a single
1961*22dc650dSSadaf Ebrahimi dollar.
1962*22dc650dSSadaf Ebrahimi
1963*22dc650dSSadaf Ebrahimi Arguments:
1964*22dc650dSSadaf Ebrahimi begin the start of the whole string
1965*22dc650dSSadaf Ebrahimi string points to the $
1966*22dc650dSSadaf Ebrahimi callout TRUE if in a callout (inhibits error messages)
1967*22dc650dSSadaf Ebrahimi value where to return a value
1968*22dc650dSSadaf Ebrahimi last where to return pointer to the last used character
1969*22dc650dSSadaf Ebrahimi
1970*22dc650dSSadaf Ebrahimi Returns: DDE_ERROR after a syntax error
1971*22dc650dSSadaf Ebrahimi DDE_CAPTURE if *value is a capture number
1972*22dc650dSSadaf Ebrahimi DDE_CHAR if *value is a character code
1973*22dc650dSSadaf Ebrahimi */
1974*22dc650dSSadaf Ebrahimi
1975*22dc650dSSadaf Ebrahimi static int
decode_dollar_escape(PCRE2_SPTR begin,PCRE2_SPTR string,BOOL callout,uint32_t * value,PCRE2_SPTR * last)1976*22dc650dSSadaf Ebrahimi decode_dollar_escape(PCRE2_SPTR begin, PCRE2_SPTR string, BOOL callout,
1977*22dc650dSSadaf Ebrahimi uint32_t *value, PCRE2_SPTR *last)
1978*22dc650dSSadaf Ebrahimi {
1979*22dc650dSSadaf Ebrahimi uint32_t c = 0;
1980*22dc650dSSadaf Ebrahimi int base = 10;
1981*22dc650dSSadaf Ebrahimi int dcount;
1982*22dc650dSSadaf Ebrahimi int rc = DDE_CHAR;
1983*22dc650dSSadaf Ebrahimi BOOL brace = FALSE;
1984*22dc650dSSadaf Ebrahimi
1985*22dc650dSSadaf Ebrahimi switch (*(++string))
1986*22dc650dSSadaf Ebrahimi {
1987*22dc650dSSadaf Ebrahimi case 0: /* Syntax error: a character must be present after $. */
1988*22dc650dSSadaf Ebrahimi if (!callout)
1989*22dc650dSSadaf Ebrahimi fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
1990*22dc650dSSadaf Ebrahimi (int)(string - begin), "no character after $");
1991*22dc650dSSadaf Ebrahimi *last = string;
1992*22dc650dSSadaf Ebrahimi return DDE_ERROR;
1993*22dc650dSSadaf Ebrahimi
1994*22dc650dSSadaf Ebrahimi case '{':
1995*22dc650dSSadaf Ebrahimi brace = TRUE;
1996*22dc650dSSadaf Ebrahimi string++;
1997*22dc650dSSadaf Ebrahimi if (!isdigit((unsigned char)(*string))) /* Syntax error: a decimal number required. */
1998*22dc650dSSadaf Ebrahimi {
1999*22dc650dSSadaf Ebrahimi if (!callout)
2000*22dc650dSSadaf Ebrahimi fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
2001*22dc650dSSadaf Ebrahimi (int)(string - begin), "decimal number expected");
2002*22dc650dSSadaf Ebrahimi rc = DDE_ERROR;
2003*22dc650dSSadaf Ebrahimi break;
2004*22dc650dSSadaf Ebrahimi }
2005*22dc650dSSadaf Ebrahimi
2006*22dc650dSSadaf Ebrahimi /* Fall through */
2007*22dc650dSSadaf Ebrahimi
2008*22dc650dSSadaf Ebrahimi /* The maximum capture number is 65535, so any number greater than that will
2009*22dc650dSSadaf Ebrahimi always be an unknown capture number. We just stop incrementing, in order to
2010*22dc650dSSadaf Ebrahimi avoid overflow. */
2011*22dc650dSSadaf Ebrahimi
2012*22dc650dSSadaf Ebrahimi case '0': case '1': case '2': case '3': case '4':
2013*22dc650dSSadaf Ebrahimi case '5': case '6': case '7': case '8': case '9':
2014*22dc650dSSadaf Ebrahimi do
2015*22dc650dSSadaf Ebrahimi {
2016*22dc650dSSadaf Ebrahimi if (c <= 65535) c = c * 10 + (*string - '0');
2017*22dc650dSSadaf Ebrahimi string++;
2018*22dc650dSSadaf Ebrahimi }
2019*22dc650dSSadaf Ebrahimi while (*string >= '0' && *string <= '9');
2020*22dc650dSSadaf Ebrahimi string--; /* Point to last digit */
2021*22dc650dSSadaf Ebrahimi
2022*22dc650dSSadaf Ebrahimi /* In a callout, capture number 0 is not available. No error can be given,
2023*22dc650dSSadaf Ebrahimi so just return the character '0'. */
2024*22dc650dSSadaf Ebrahimi
2025*22dc650dSSadaf Ebrahimi if (callout && c == 0)
2026*22dc650dSSadaf Ebrahimi {
2027*22dc650dSSadaf Ebrahimi *value = '0';
2028*22dc650dSSadaf Ebrahimi }
2029*22dc650dSSadaf Ebrahimi else
2030*22dc650dSSadaf Ebrahimi {
2031*22dc650dSSadaf Ebrahimi *value = c;
2032*22dc650dSSadaf Ebrahimi rc = DDE_CAPTURE;
2033*22dc650dSSadaf Ebrahimi }
2034*22dc650dSSadaf Ebrahimi break;
2035*22dc650dSSadaf Ebrahimi
2036*22dc650dSSadaf Ebrahimi /* Limit octal numbers to 3 digits without braces, or up to 7 with braces,
2037*22dc650dSSadaf Ebrahimi for valid Unicode code points. */
2038*22dc650dSSadaf Ebrahimi
2039*22dc650dSSadaf Ebrahimi case 'o':
2040*22dc650dSSadaf Ebrahimi base = 8;
2041*22dc650dSSadaf Ebrahimi string++;
2042*22dc650dSSadaf Ebrahimi if (*string == '{')
2043*22dc650dSSadaf Ebrahimi {
2044*22dc650dSSadaf Ebrahimi brace = TRUE;
2045*22dc650dSSadaf Ebrahimi string++;
2046*22dc650dSSadaf Ebrahimi dcount = 7;
2047*22dc650dSSadaf Ebrahimi }
2048*22dc650dSSadaf Ebrahimi else dcount = 3;
2049*22dc650dSSadaf Ebrahimi for (; dcount > 0; dcount--)
2050*22dc650dSSadaf Ebrahimi {
2051*22dc650dSSadaf Ebrahimi if (*string < '0' || *string > '7') break;
2052*22dc650dSSadaf Ebrahimi c = c * 8 + (*string++ - '0');
2053*22dc650dSSadaf Ebrahimi }
2054*22dc650dSSadaf Ebrahimi *value = c;
2055*22dc650dSSadaf Ebrahimi string--; /* Point to last digit */
2056*22dc650dSSadaf Ebrahimi break;
2057*22dc650dSSadaf Ebrahimi
2058*22dc650dSSadaf Ebrahimi /* Limit hex numbers to 2 digits without braces, or up to 6 with braces,
2059*22dc650dSSadaf Ebrahimi for valid Unicode code points. */
2060*22dc650dSSadaf Ebrahimi
2061*22dc650dSSadaf Ebrahimi case 'x':
2062*22dc650dSSadaf Ebrahimi base = 16;
2063*22dc650dSSadaf Ebrahimi string++;
2064*22dc650dSSadaf Ebrahimi if (*string == '{')
2065*22dc650dSSadaf Ebrahimi {
2066*22dc650dSSadaf Ebrahimi brace = TRUE;
2067*22dc650dSSadaf Ebrahimi string++;
2068*22dc650dSSadaf Ebrahimi dcount = 6;
2069*22dc650dSSadaf Ebrahimi }
2070*22dc650dSSadaf Ebrahimi else dcount = 2;
2071*22dc650dSSadaf Ebrahimi for (; dcount > 0; dcount--)
2072*22dc650dSSadaf Ebrahimi {
2073*22dc650dSSadaf Ebrahimi if (!isxdigit(*string)) break;
2074*22dc650dSSadaf Ebrahimi if (*string >= '0' && *string <= '9')
2075*22dc650dSSadaf Ebrahimi c = c *16 + *string++ - '0';
2076*22dc650dSSadaf Ebrahimi else
2077*22dc650dSSadaf Ebrahimi c = c * 16 + (*string++ | 0x20) - 'a' + 10;
2078*22dc650dSSadaf Ebrahimi }
2079*22dc650dSSadaf Ebrahimi *value = c;
2080*22dc650dSSadaf Ebrahimi string--; /* Point to last digit */
2081*22dc650dSSadaf Ebrahimi break;
2082*22dc650dSSadaf Ebrahimi
2083*22dc650dSSadaf Ebrahimi case 'a': *value = '\a'; break;
2084*22dc650dSSadaf Ebrahimi case 'b': *value = '\b'; break;
2085*22dc650dSSadaf Ebrahimi #ifndef EBCDIC
2086*22dc650dSSadaf Ebrahimi case 'e': *value = '\033'; break;
2087*22dc650dSSadaf Ebrahimi #else
2088*22dc650dSSadaf Ebrahimi case 'e': *value = '\047'; break;
2089*22dc650dSSadaf Ebrahimi #endif
2090*22dc650dSSadaf Ebrahimi case 'f': *value = '\f'; break;
2091*22dc650dSSadaf Ebrahimi case 'n': *value = STDOUT_NL_CODE; break;
2092*22dc650dSSadaf Ebrahimi case 'r': *value = '\r'; break;
2093*22dc650dSSadaf Ebrahimi case 't': *value = '\t'; break;
2094*22dc650dSSadaf Ebrahimi case 'v': *value = '\v'; break;
2095*22dc650dSSadaf Ebrahimi
2096*22dc650dSSadaf Ebrahimi default: *value = *string; break;
2097*22dc650dSSadaf Ebrahimi }
2098*22dc650dSSadaf Ebrahimi
2099*22dc650dSSadaf Ebrahimi if (brace)
2100*22dc650dSSadaf Ebrahimi {
2101*22dc650dSSadaf Ebrahimi c = string[1];
2102*22dc650dSSadaf Ebrahimi if (c != '}')
2103*22dc650dSSadaf Ebrahimi {
2104*22dc650dSSadaf Ebrahimi rc = DDE_ERROR;
2105*22dc650dSSadaf Ebrahimi if (!callout)
2106*22dc650dSSadaf Ebrahimi {
2107*22dc650dSSadaf Ebrahimi if ((base == 8 && c >= '0' && c <= '7') ||
2108*22dc650dSSadaf Ebrahimi (base == 16 && isxdigit(c)))
2109*22dc650dSSadaf Ebrahimi {
2110*22dc650dSSadaf Ebrahimi fprintf(stderr, "pcre2grep: Error in output text at offset %d: "
2111*22dc650dSSadaf Ebrahimi "too many %s digits\n", (int)(string - begin),
2112*22dc650dSSadaf Ebrahimi (base == 8)? "octal" : "hex");
2113*22dc650dSSadaf Ebrahimi }
2114*22dc650dSSadaf Ebrahimi else
2115*22dc650dSSadaf Ebrahimi {
2116*22dc650dSSadaf Ebrahimi fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
2117*22dc650dSSadaf Ebrahimi (int)(string - begin), "missing closing brace");
2118*22dc650dSSadaf Ebrahimi }
2119*22dc650dSSadaf Ebrahimi }
2120*22dc650dSSadaf Ebrahimi }
2121*22dc650dSSadaf Ebrahimi else string++;
2122*22dc650dSSadaf Ebrahimi }
2123*22dc650dSSadaf Ebrahimi
2124*22dc650dSSadaf Ebrahimi /* Check maximum code point values, but take note of STDOUT_NL_CODE. */
2125*22dc650dSSadaf Ebrahimi
2126*22dc650dSSadaf Ebrahimi if (rc == DDE_CHAR && *value != STDOUT_NL_CODE)
2127*22dc650dSSadaf Ebrahimi {
2128*22dc650dSSadaf Ebrahimi uint32_t max = utf? 0x0010ffffu : 0xffu;
2129*22dc650dSSadaf Ebrahimi if (*value > max)
2130*22dc650dSSadaf Ebrahimi {
2131*22dc650dSSadaf Ebrahimi if (!callout)
2132*22dc650dSSadaf Ebrahimi fprintf(stderr, "pcre2grep: Error in output text at offset %d: "
2133*22dc650dSSadaf Ebrahimi "code point greater than 0x%x is invalid\n", (int)(string - begin), max);
2134*22dc650dSSadaf Ebrahimi rc = DDE_ERROR;
2135*22dc650dSSadaf Ebrahimi }
2136*22dc650dSSadaf Ebrahimi }
2137*22dc650dSSadaf Ebrahimi
2138*22dc650dSSadaf Ebrahimi *last = string;
2139*22dc650dSSadaf Ebrahimi return rc;
2140*22dc650dSSadaf Ebrahimi }
2141*22dc650dSSadaf Ebrahimi
2142*22dc650dSSadaf Ebrahimi
2143*22dc650dSSadaf Ebrahimi
2144*22dc650dSSadaf Ebrahimi /*************************************************
2145*22dc650dSSadaf Ebrahimi * Check output text for errors *
2146*22dc650dSSadaf Ebrahimi *************************************************/
2147*22dc650dSSadaf Ebrahimi
2148*22dc650dSSadaf Ebrahimi /* Called early, to get errors before doing anything for -O text; also called
2149*22dc650dSSadaf Ebrahimi from callouts to check before outputting.
2150*22dc650dSSadaf Ebrahimi
2151*22dc650dSSadaf Ebrahimi Arguments:
2152*22dc650dSSadaf Ebrahimi string an --output text string
2153*22dc650dSSadaf Ebrahimi callout TRUE if in a callout (stops printing errors)
2154*22dc650dSSadaf Ebrahimi
2155*22dc650dSSadaf Ebrahimi Returns: TRUE if OK, FALSE on error
2156*22dc650dSSadaf Ebrahimi */
2157*22dc650dSSadaf Ebrahimi
2158*22dc650dSSadaf Ebrahimi static BOOL
syntax_check_output_text(PCRE2_SPTR string,BOOL callout)2159*22dc650dSSadaf Ebrahimi syntax_check_output_text(PCRE2_SPTR string, BOOL callout)
2160*22dc650dSSadaf Ebrahimi {
2161*22dc650dSSadaf Ebrahimi uint32_t value;
2162*22dc650dSSadaf Ebrahimi PCRE2_SPTR begin = string;
2163*22dc650dSSadaf Ebrahimi
2164*22dc650dSSadaf Ebrahimi for (; *string != 0; string++)
2165*22dc650dSSadaf Ebrahimi {
2166*22dc650dSSadaf Ebrahimi if (*string == '$' &&
2167*22dc650dSSadaf Ebrahimi decode_dollar_escape(begin, string, callout, &value, &string) == DDE_ERROR)
2168*22dc650dSSadaf Ebrahimi return FALSE;
2169*22dc650dSSadaf Ebrahimi }
2170*22dc650dSSadaf Ebrahimi
2171*22dc650dSSadaf Ebrahimi return TRUE;
2172*22dc650dSSadaf Ebrahimi }
2173*22dc650dSSadaf Ebrahimi
2174*22dc650dSSadaf Ebrahimi
2175*22dc650dSSadaf Ebrahimi /*************************************************
2176*22dc650dSSadaf Ebrahimi * Display output text *
2177*22dc650dSSadaf Ebrahimi *************************************************/
2178*22dc650dSSadaf Ebrahimi
2179*22dc650dSSadaf Ebrahimi /* Display the output text, which is assumed to have already been syntax
2180*22dc650dSSadaf Ebrahimi checked. Output may contain escape sequences started by the dollar sign.
2181*22dc650dSSadaf Ebrahimi
2182*22dc650dSSadaf Ebrahimi Arguments:
2183*22dc650dSSadaf Ebrahimi string: the output text
2184*22dc650dSSadaf Ebrahimi callout: TRUE for the builtin callout, FALSE for --output
2185*22dc650dSSadaf Ebrahimi subject the start of the subject
2186*22dc650dSSadaf Ebrahimi ovector: capture offsets
2187*22dc650dSSadaf Ebrahimi capture_top: number of captures
2188*22dc650dSSadaf Ebrahimi
2189*22dc650dSSadaf Ebrahimi Returns: TRUE if something was output, other than newline
2190*22dc650dSSadaf Ebrahimi FALSE if nothing was output, or newline was last output
2191*22dc650dSSadaf Ebrahimi */
2192*22dc650dSSadaf Ebrahimi
2193*22dc650dSSadaf Ebrahimi static BOOL
display_output_text(PCRE2_SPTR string,BOOL callout,PCRE2_SPTR subject,PCRE2_SIZE * ovector,PCRE2_SIZE capture_top)2194*22dc650dSSadaf Ebrahimi display_output_text(PCRE2_SPTR string, BOOL callout, PCRE2_SPTR subject,
2195*22dc650dSSadaf Ebrahimi PCRE2_SIZE *ovector, PCRE2_SIZE capture_top)
2196*22dc650dSSadaf Ebrahimi {
2197*22dc650dSSadaf Ebrahimi uint32_t value;
2198*22dc650dSSadaf Ebrahimi BOOL printed = FALSE;
2199*22dc650dSSadaf Ebrahimi PCRE2_SPTR begin = string;
2200*22dc650dSSadaf Ebrahimi
2201*22dc650dSSadaf Ebrahimi for (; *string != 0; string++)
2202*22dc650dSSadaf Ebrahimi {
2203*22dc650dSSadaf Ebrahimi if (*string == '$')
2204*22dc650dSSadaf Ebrahimi {
2205*22dc650dSSadaf Ebrahimi switch(decode_dollar_escape(begin, string, callout, &value, &string))
2206*22dc650dSSadaf Ebrahimi {
2207*22dc650dSSadaf Ebrahimi case DDE_CHAR:
2208*22dc650dSSadaf Ebrahimi if (value == STDOUT_NL_CODE)
2209*22dc650dSSadaf Ebrahimi {
2210*22dc650dSSadaf Ebrahimi fprintf(stdout, STDOUT_NL);
2211*22dc650dSSadaf Ebrahimi printed = FALSE;
2212*22dc650dSSadaf Ebrahimi continue;
2213*22dc650dSSadaf Ebrahimi }
2214*22dc650dSSadaf Ebrahimi break; /* Will print value */
2215*22dc650dSSadaf Ebrahimi
2216*22dc650dSSadaf Ebrahimi case DDE_CAPTURE:
2217*22dc650dSSadaf Ebrahimi if (value < capture_top)
2218*22dc650dSSadaf Ebrahimi {
2219*22dc650dSSadaf Ebrahimi PCRE2_SIZE capturesize;
2220*22dc650dSSadaf Ebrahimi value *= 2;
2221*22dc650dSSadaf Ebrahimi capturesize = ovector[value + 1] - ovector[value];
2222*22dc650dSSadaf Ebrahimi if (capturesize > 0)
2223*22dc650dSSadaf Ebrahimi {
2224*22dc650dSSadaf Ebrahimi print_match(subject + ovector[value], capturesize);
2225*22dc650dSSadaf Ebrahimi printed = TRUE;
2226*22dc650dSSadaf Ebrahimi }
2227*22dc650dSSadaf Ebrahimi }
2228*22dc650dSSadaf Ebrahimi continue;
2229*22dc650dSSadaf Ebrahimi
2230*22dc650dSSadaf Ebrahimi /* LCOV_EXCL_START */
2231*22dc650dSSadaf Ebrahimi default: /* Should not occur */
2232*22dc650dSSadaf Ebrahimi break;
2233*22dc650dSSadaf Ebrahimi /* LCOV_EXCL_STOP */
2234*22dc650dSSadaf Ebrahimi }
2235*22dc650dSSadaf Ebrahimi }
2236*22dc650dSSadaf Ebrahimi
2237*22dc650dSSadaf Ebrahimi else value = *string; /* Not a $ escape */
2238*22dc650dSSadaf Ebrahimi
2239*22dc650dSSadaf Ebrahimi if (!utf || value <= 127) fprintf(stdout, "%c", value); else
2240*22dc650dSSadaf Ebrahimi {
2241*22dc650dSSadaf Ebrahimi int n = ord2utf8(value);
2242*22dc650dSSadaf Ebrahimi for (int i = 0; i < n; i++) fputc(utf8_buffer[i], stdout);
2243*22dc650dSSadaf Ebrahimi }
2244*22dc650dSSadaf Ebrahimi
2245*22dc650dSSadaf Ebrahimi printed = TRUE;
2246*22dc650dSSadaf Ebrahimi }
2247*22dc650dSSadaf Ebrahimi
2248*22dc650dSSadaf Ebrahimi return printed;
2249*22dc650dSSadaf Ebrahimi }
2250*22dc650dSSadaf Ebrahimi
2251*22dc650dSSadaf Ebrahimi
2252*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_PCRE2GREP_CALLOUT
2253*22dc650dSSadaf Ebrahimi
2254*22dc650dSSadaf Ebrahimi /*************************************************
2255*22dc650dSSadaf Ebrahimi * Parse and execute callout scripts *
2256*22dc650dSSadaf Ebrahimi *************************************************/
2257*22dc650dSSadaf Ebrahimi
2258*22dc650dSSadaf Ebrahimi /* If SUPPORT_PCRE2GREP_CALLOUT_FORK is defined, this function parses a callout
2259*22dc650dSSadaf Ebrahimi string block and executes the program specified by the string. The string is a
2260*22dc650dSSadaf Ebrahimi list of substrings separated by pipe characters. The first substring represents
2261*22dc650dSSadaf Ebrahimi the executable name, and the following substrings specify the arguments:
2262*22dc650dSSadaf Ebrahimi
2263*22dc650dSSadaf Ebrahimi program_name|param1|param2|...
2264*22dc650dSSadaf Ebrahimi
2265*22dc650dSSadaf Ebrahimi Any substring (including the program name) can contain escape sequences
2266*22dc650dSSadaf Ebrahimi started by the dollar character. The escape sequences are substituted as
2267*22dc650dSSadaf Ebrahimi follows:
2268*22dc650dSSadaf Ebrahimi
2269*22dc650dSSadaf Ebrahimi $<digits> or ${<digits>} is replaced by the captured substring of the given
2270*22dc650dSSadaf Ebrahimi decimal number, which must be greater than zero. If the number is greater
2271*22dc650dSSadaf Ebrahimi than the number of capturing substrings, or if the capture is unset, the
2272*22dc650dSSadaf Ebrahimi replacement is empty.
2273*22dc650dSSadaf Ebrahimi
2274*22dc650dSSadaf Ebrahimi Any other character is substituted by itself. E.g: $$ is replaced by a single
2275*22dc650dSSadaf Ebrahimi dollar or $| replaced by a pipe character.
2276*22dc650dSSadaf Ebrahimi
2277*22dc650dSSadaf Ebrahimi Alternatively, if string starts with pipe, the remainder is taken as an output
2278*22dc650dSSadaf Ebrahimi string, same as --output. This is the only form that is supported if
2279*22dc650dSSadaf Ebrahimi SUPPORT_PCRE2GREP_FORK is not defined. In this case, --om-separator is used to
2280*22dc650dSSadaf Ebrahimi separate each callout, defaulting to newline.
2281*22dc650dSSadaf Ebrahimi
2282*22dc650dSSadaf Ebrahimi Example:
2283*22dc650dSSadaf Ebrahimi
2284*22dc650dSSadaf Ebrahimi echo -e "abcde\n12345" | pcre2grep \
2285*22dc650dSSadaf Ebrahimi '(.)(..(.))(?C"/bin/echo|Arg1: [$1] [$2] [$3]|Arg2: $|${1}$| ($4)")()' -
2286*22dc650dSSadaf Ebrahimi
2287*22dc650dSSadaf Ebrahimi Output:
2288*22dc650dSSadaf Ebrahimi
2289*22dc650dSSadaf Ebrahimi Arg1: [a] [bcd] [d] Arg2: |a| ()
2290*22dc650dSSadaf Ebrahimi abcde
2291*22dc650dSSadaf Ebrahimi Arg1: [1] [234] [4] Arg2: |1| ()
2292*22dc650dSSadaf Ebrahimi 12345
2293*22dc650dSSadaf Ebrahimi
2294*22dc650dSSadaf Ebrahimi Arguments:
2295*22dc650dSSadaf Ebrahimi blockptr the callout block
2296*22dc650dSSadaf Ebrahimi
2297*22dc650dSSadaf Ebrahimi Returns: currently it always returns with 0
2298*22dc650dSSadaf Ebrahimi */
2299*22dc650dSSadaf Ebrahimi
2300*22dc650dSSadaf Ebrahimi static int
pcre2grep_callout(pcre2_callout_block * calloutptr,void * unused)2301*22dc650dSSadaf Ebrahimi pcre2grep_callout(pcre2_callout_block *calloutptr, void *unused)
2302*22dc650dSSadaf Ebrahimi {
2303*22dc650dSSadaf Ebrahimi PCRE2_SIZE length = calloutptr->callout_string_length;
2304*22dc650dSSadaf Ebrahimi PCRE2_SPTR string = calloutptr->callout_string;
2305*22dc650dSSadaf Ebrahimi PCRE2_SPTR subject = calloutptr->subject;
2306*22dc650dSSadaf Ebrahimi PCRE2_SIZE *ovector = calloutptr->offset_vector;
2307*22dc650dSSadaf Ebrahimi PCRE2_SIZE capture_top = calloutptr->capture_top;
2308*22dc650dSSadaf Ebrahimi
2309*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_PCRE2GREP_CALLOUT_FORK
2310*22dc650dSSadaf Ebrahimi PCRE2_SIZE argsvectorlen = 2;
2311*22dc650dSSadaf Ebrahimi PCRE2_SIZE argslen = 1;
2312*22dc650dSSadaf Ebrahimi char *args;
2313*22dc650dSSadaf Ebrahimi char *argsptr;
2314*22dc650dSSadaf Ebrahimi char **argsvector;
2315*22dc650dSSadaf Ebrahimi char **argsvectorptr;
2316*22dc650dSSadaf Ebrahimi #ifndef WIN32
2317*22dc650dSSadaf Ebrahimi pid_t pid;
2318*22dc650dSSadaf Ebrahimi #endif
2319*22dc650dSSadaf Ebrahimi int result = 0;
2320*22dc650dSSadaf Ebrahimi #endif /* SUPPORT_PCRE2GREP_CALLOUT_FORK */
2321*22dc650dSSadaf Ebrahimi
2322*22dc650dSSadaf Ebrahimi (void)unused; /* Avoid compiler warning */
2323*22dc650dSSadaf Ebrahimi
2324*22dc650dSSadaf Ebrahimi /* Only callouts with strings are supported. */
2325*22dc650dSSadaf Ebrahimi
2326*22dc650dSSadaf Ebrahimi if (string == NULL || length == 0) return 0;
2327*22dc650dSSadaf Ebrahimi
2328*22dc650dSSadaf Ebrahimi /* If there's no command, output the remainder directly. */
2329*22dc650dSSadaf Ebrahimi
2330*22dc650dSSadaf Ebrahimi if (*string == '|')
2331*22dc650dSSadaf Ebrahimi {
2332*22dc650dSSadaf Ebrahimi string++;
2333*22dc650dSSadaf Ebrahimi if (!syntax_check_output_text(string, TRUE)) return 0;
2334*22dc650dSSadaf Ebrahimi (void)display_output_text(string, TRUE, subject, ovector, capture_top);
2335*22dc650dSSadaf Ebrahimi return 0;
2336*22dc650dSSadaf Ebrahimi }
2337*22dc650dSSadaf Ebrahimi
2338*22dc650dSSadaf Ebrahimi #ifndef SUPPORT_PCRE2GREP_CALLOUT_FORK
2339*22dc650dSSadaf Ebrahimi return 0;
2340*22dc650dSSadaf Ebrahimi #else
2341*22dc650dSSadaf Ebrahimi
2342*22dc650dSSadaf Ebrahimi /* Checking syntax and compute the number of string fragments. Callout strings
2343*22dc650dSSadaf Ebrahimi are silently ignored in the event of a syntax error. */
2344*22dc650dSSadaf Ebrahimi
2345*22dc650dSSadaf Ebrahimi while (length > 0)
2346*22dc650dSSadaf Ebrahimi {
2347*22dc650dSSadaf Ebrahimi if (*string == '|')
2348*22dc650dSSadaf Ebrahimi {
2349*22dc650dSSadaf Ebrahimi argsvectorlen++;
2350*22dc650dSSadaf Ebrahimi if (argsvectorlen > 10000) return 0; /* Too many args */
2351*22dc650dSSadaf Ebrahimi }
2352*22dc650dSSadaf Ebrahimi
2353*22dc650dSSadaf Ebrahimi else if (*string == '$')
2354*22dc650dSSadaf Ebrahimi {
2355*22dc650dSSadaf Ebrahimi uint32_t value;
2356*22dc650dSSadaf Ebrahimi PCRE2_SPTR begin = string;
2357*22dc650dSSadaf Ebrahimi
2358*22dc650dSSadaf Ebrahimi switch (decode_dollar_escape(begin, string, TRUE, &value, &string))
2359*22dc650dSSadaf Ebrahimi {
2360*22dc650dSSadaf Ebrahimi case DDE_CAPTURE:
2361*22dc650dSSadaf Ebrahimi if (value < capture_top)
2362*22dc650dSSadaf Ebrahimi {
2363*22dc650dSSadaf Ebrahimi value *= 2;
2364*22dc650dSSadaf Ebrahimi argslen += ovector[value + 1] - ovector[value];
2365*22dc650dSSadaf Ebrahimi }
2366*22dc650dSSadaf Ebrahimi argslen--; /* Negate the effect of argslen++ below. */
2367*22dc650dSSadaf Ebrahimi break;
2368*22dc650dSSadaf Ebrahimi
2369*22dc650dSSadaf Ebrahimi case DDE_CHAR:
2370*22dc650dSSadaf Ebrahimi if (value == STDOUT_NL_CODE) argslen += STDOUT_NL_LEN - 1;
2371*22dc650dSSadaf Ebrahimi else if (utf && value > 127) argslen += ord2utf8(value) - 1;
2372*22dc650dSSadaf Ebrahimi break;
2373*22dc650dSSadaf Ebrahimi
2374*22dc650dSSadaf Ebrahimi /* LCOV_EXCL_START */
2375*22dc650dSSadaf Ebrahimi default: /* Should not occur */
2376*22dc650dSSadaf Ebrahimi case DDE_ERROR:
2377*22dc650dSSadaf Ebrahimi return 0;
2378*22dc650dSSadaf Ebrahimi /* LCOV_EXCL_STOP */
2379*22dc650dSSadaf Ebrahimi }
2380*22dc650dSSadaf Ebrahimi
2381*22dc650dSSadaf Ebrahimi length -= (string - begin);
2382*22dc650dSSadaf Ebrahimi }
2383*22dc650dSSadaf Ebrahimi
2384*22dc650dSSadaf Ebrahimi string++;
2385*22dc650dSSadaf Ebrahimi length--;
2386*22dc650dSSadaf Ebrahimi argslen++;
2387*22dc650dSSadaf Ebrahimi }
2388*22dc650dSSadaf Ebrahimi
2389*22dc650dSSadaf Ebrahimi /* Get memory for the argument vector and its strings. */
2390*22dc650dSSadaf Ebrahimi
2391*22dc650dSSadaf Ebrahimi args = (char*)malloc(argslen);
2392*22dc650dSSadaf Ebrahimi if (args == NULL) return 0;
2393*22dc650dSSadaf Ebrahimi
2394*22dc650dSSadaf Ebrahimi argsvector = (char**)malloc(argsvectorlen * sizeof(char*));
2395*22dc650dSSadaf Ebrahimi if (argsvector == NULL)
2396*22dc650dSSadaf Ebrahimi {
2397*22dc650dSSadaf Ebrahimi /* LCOV_EXCL_START */
2398*22dc650dSSadaf Ebrahimi free(args);
2399*22dc650dSSadaf Ebrahimi return 0;
2400*22dc650dSSadaf Ebrahimi /* LCOV_EXCL_STOP */
2401*22dc650dSSadaf Ebrahimi }
2402*22dc650dSSadaf Ebrahimi
2403*22dc650dSSadaf Ebrahimi /* Now reprocess the string and set up the arguments. */
2404*22dc650dSSadaf Ebrahimi
2405*22dc650dSSadaf Ebrahimi argsptr = args;
2406*22dc650dSSadaf Ebrahimi argsvectorptr = argsvector;
2407*22dc650dSSadaf Ebrahimi *argsvectorptr++ = argsptr;
2408*22dc650dSSadaf Ebrahimi
2409*22dc650dSSadaf Ebrahimi length = calloutptr->callout_string_length;
2410*22dc650dSSadaf Ebrahimi string = calloutptr->callout_string;
2411*22dc650dSSadaf Ebrahimi
2412*22dc650dSSadaf Ebrahimi while (length > 0)
2413*22dc650dSSadaf Ebrahimi {
2414*22dc650dSSadaf Ebrahimi if (*string == '|')
2415*22dc650dSSadaf Ebrahimi {
2416*22dc650dSSadaf Ebrahimi *argsptr++ = '\0';
2417*22dc650dSSadaf Ebrahimi *argsvectorptr++ = argsptr;
2418*22dc650dSSadaf Ebrahimi }
2419*22dc650dSSadaf Ebrahimi
2420*22dc650dSSadaf Ebrahimi else if (*string == '$')
2421*22dc650dSSadaf Ebrahimi {
2422*22dc650dSSadaf Ebrahimi uint32_t value;
2423*22dc650dSSadaf Ebrahimi PCRE2_SPTR begin = string;
2424*22dc650dSSadaf Ebrahimi
2425*22dc650dSSadaf Ebrahimi switch (decode_dollar_escape(begin, string, TRUE, &value, &string))
2426*22dc650dSSadaf Ebrahimi {
2427*22dc650dSSadaf Ebrahimi case DDE_CAPTURE:
2428*22dc650dSSadaf Ebrahimi if (value < capture_top)
2429*22dc650dSSadaf Ebrahimi {
2430*22dc650dSSadaf Ebrahimi PCRE2_SIZE capturesize;
2431*22dc650dSSadaf Ebrahimi value *= 2;
2432*22dc650dSSadaf Ebrahimi capturesize = ovector[value + 1] - ovector[value];
2433*22dc650dSSadaf Ebrahimi memcpy(argsptr, subject + ovector[value], capturesize);
2434*22dc650dSSadaf Ebrahimi argsptr += capturesize;
2435*22dc650dSSadaf Ebrahimi }
2436*22dc650dSSadaf Ebrahimi break;
2437*22dc650dSSadaf Ebrahimi
2438*22dc650dSSadaf Ebrahimi case DDE_CHAR:
2439*22dc650dSSadaf Ebrahimi if (value == STDOUT_NL_CODE)
2440*22dc650dSSadaf Ebrahimi {
2441*22dc650dSSadaf Ebrahimi memcpy(argsptr, STDOUT_NL, STDOUT_NL_LEN);
2442*22dc650dSSadaf Ebrahimi argsptr += STDOUT_NL_LEN;
2443*22dc650dSSadaf Ebrahimi }
2444*22dc650dSSadaf Ebrahimi else if (utf && value > 127)
2445*22dc650dSSadaf Ebrahimi {
2446*22dc650dSSadaf Ebrahimi int n = ord2utf8(value);
2447*22dc650dSSadaf Ebrahimi memcpy(argsptr, utf8_buffer, n);
2448*22dc650dSSadaf Ebrahimi argsptr += n;
2449*22dc650dSSadaf Ebrahimi }
2450*22dc650dSSadaf Ebrahimi else
2451*22dc650dSSadaf Ebrahimi {
2452*22dc650dSSadaf Ebrahimi *argsptr++ = value;
2453*22dc650dSSadaf Ebrahimi }
2454*22dc650dSSadaf Ebrahimi break;
2455*22dc650dSSadaf Ebrahimi
2456*22dc650dSSadaf Ebrahimi /* LCOV_EXCL_START */
2457*22dc650dSSadaf Ebrahimi default: /* Even though this should not occur, the string having */
2458*22dc650dSSadaf Ebrahimi case DDE_ERROR: /* been checked above, we need to include the free() */
2459*22dc650dSSadaf Ebrahimi free(args); /* calls so that source checkers do not complain. */
2460*22dc650dSSadaf Ebrahimi free(argsvector);
2461*22dc650dSSadaf Ebrahimi return 0;
2462*22dc650dSSadaf Ebrahimi /* LCOV_EXCL_STOP */
2463*22dc650dSSadaf Ebrahimi }
2464*22dc650dSSadaf Ebrahimi
2465*22dc650dSSadaf Ebrahimi length -= (string - begin);
2466*22dc650dSSadaf Ebrahimi }
2467*22dc650dSSadaf Ebrahimi
2468*22dc650dSSadaf Ebrahimi else *argsptr++ = *string;
2469*22dc650dSSadaf Ebrahimi
2470*22dc650dSSadaf Ebrahimi /* Advance along the string */
2471*22dc650dSSadaf Ebrahimi
2472*22dc650dSSadaf Ebrahimi string++;
2473*22dc650dSSadaf Ebrahimi length--;
2474*22dc650dSSadaf Ebrahimi }
2475*22dc650dSSadaf Ebrahimi
2476*22dc650dSSadaf Ebrahimi *argsptr++ = '\0';
2477*22dc650dSSadaf Ebrahimi *argsvectorptr = NULL;
2478*22dc650dSSadaf Ebrahimi
2479*22dc650dSSadaf Ebrahimi /* Running an external command is system-dependent. Handle Windows and VMS as
2480*22dc650dSSadaf Ebrahimi necessary, otherwise assume fork(). */
2481*22dc650dSSadaf Ebrahimi
2482*22dc650dSSadaf Ebrahimi #ifdef WIN32
2483*22dc650dSSadaf Ebrahimi result = _spawnvp(_P_WAIT, argsvector[0], (const char * const *)argsvector);
2484*22dc650dSSadaf Ebrahimi
2485*22dc650dSSadaf Ebrahimi #elif defined __VMS
2486*22dc650dSSadaf Ebrahimi {
2487*22dc650dSSadaf Ebrahimi char cmdbuf[500];
2488*22dc650dSSadaf Ebrahimi short i = 0;
2489*22dc650dSSadaf Ebrahimi int flags = CLI$M_NOCLISYM|CLI$M_NOLOGNAM|CLI$M_NOKEYPAD, status, retstat;
2490*22dc650dSSadaf Ebrahimi $DESCRIPTOR(cmd, cmdbuf);
2491*22dc650dSSadaf Ebrahimi
2492*22dc650dSSadaf Ebrahimi cmdbuf[0] = 0;
2493*22dc650dSSadaf Ebrahimi while (argsvector[i])
2494*22dc650dSSadaf Ebrahimi {
2495*22dc650dSSadaf Ebrahimi strcat(cmdbuf, argsvector[i]);
2496*22dc650dSSadaf Ebrahimi strcat(cmdbuf, " ");
2497*22dc650dSSadaf Ebrahimi i++;
2498*22dc650dSSadaf Ebrahimi }
2499*22dc650dSSadaf Ebrahimi cmd.dsc$w_length = strlen(cmdbuf) - 1;
2500*22dc650dSSadaf Ebrahimi status = lib$spawn(&cmd, 0,0, &flags, 0,0, &retstat);
2501*22dc650dSSadaf Ebrahimi if (!(status & 1)) result = 0;
2502*22dc650dSSadaf Ebrahimi else result = retstat & 1 ? 0 : 1;
2503*22dc650dSSadaf Ebrahimi }
2504*22dc650dSSadaf Ebrahimi
2505*22dc650dSSadaf Ebrahimi #else /* Neither Windows nor VMS */
2506*22dc650dSSadaf Ebrahimi pid = fork();
2507*22dc650dSSadaf Ebrahimi if (pid == 0)
2508*22dc650dSSadaf Ebrahimi {
2509*22dc650dSSadaf Ebrahimi (void)execv(argsvector[0], argsvector);
2510*22dc650dSSadaf Ebrahimi /* Control gets here if there is an error, e.g. a non-existent program */
2511*22dc650dSSadaf Ebrahimi exit(1);
2512*22dc650dSSadaf Ebrahimi }
2513*22dc650dSSadaf Ebrahimi else if (pid > 0)
2514*22dc650dSSadaf Ebrahimi {
2515*22dc650dSSadaf Ebrahimi (void)fflush(stdout);
2516*22dc650dSSadaf Ebrahimi (void)waitpid(pid, &result, 0);
2517*22dc650dSSadaf Ebrahimi (void)fflush(stdout);
2518*22dc650dSSadaf Ebrahimi }
2519*22dc650dSSadaf Ebrahimi #endif /* End Windows/VMS/other handling */
2520*22dc650dSSadaf Ebrahimi
2521*22dc650dSSadaf Ebrahimi free(args);
2522*22dc650dSSadaf Ebrahimi free(argsvector);
2523*22dc650dSSadaf Ebrahimi
2524*22dc650dSSadaf Ebrahimi /* Currently negative return values are not supported, only zero (match
2525*22dc650dSSadaf Ebrahimi continues) or non-zero (match fails). */
2526*22dc650dSSadaf Ebrahimi
2527*22dc650dSSadaf Ebrahimi return result != 0;
2528*22dc650dSSadaf Ebrahimi #endif /* SUPPORT_PCRE2GREP_CALLOUT_FORK */
2529*22dc650dSSadaf Ebrahimi }
2530*22dc650dSSadaf Ebrahimi #endif /* SUPPORT_PCRE2GREP_CALLOUT */
2531*22dc650dSSadaf Ebrahimi
2532*22dc650dSSadaf Ebrahimi
2533*22dc650dSSadaf Ebrahimi
2534*22dc650dSSadaf Ebrahimi /*************************************************
2535*22dc650dSSadaf Ebrahimi * Read a portion of the file into buffer *
2536*22dc650dSSadaf Ebrahimi *************************************************/
2537*22dc650dSSadaf Ebrahimi
2538*22dc650dSSadaf Ebrahimi static PCRE2_SIZE
fill_buffer(void * handle,int frtype,char * buffer,PCRE2_SIZE length,BOOL input_line_buffered)2539*22dc650dSSadaf Ebrahimi fill_buffer(void *handle, int frtype, char *buffer, PCRE2_SIZE length,
2540*22dc650dSSadaf Ebrahimi BOOL input_line_buffered)
2541*22dc650dSSadaf Ebrahimi {
2542*22dc650dSSadaf Ebrahimi (void)frtype; /* Avoid warning when not used */
2543*22dc650dSSadaf Ebrahimi
2544*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_LIBZ
2545*22dc650dSSadaf Ebrahimi if (frtype == FR_LIBZ)
2546*22dc650dSSadaf Ebrahimi return gzread((gzFile)handle, buffer, length);
2547*22dc650dSSadaf Ebrahimi else
2548*22dc650dSSadaf Ebrahimi #endif
2549*22dc650dSSadaf Ebrahimi
2550*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_LIBBZ2
2551*22dc650dSSadaf Ebrahimi if (frtype == FR_LIBBZ2)
2552*22dc650dSSadaf Ebrahimi return (PCRE2_SIZE)BZ2_bzread((BZFILE *)handle, buffer, length);
2553*22dc650dSSadaf Ebrahimi else
2554*22dc650dSSadaf Ebrahimi #endif
2555*22dc650dSSadaf Ebrahimi
2556*22dc650dSSadaf Ebrahimi return (input_line_buffered ?
2557*22dc650dSSadaf Ebrahimi read_one_line(buffer, length, (FILE *)handle) :
2558*22dc650dSSadaf Ebrahimi fread(buffer, 1, length, (FILE *)handle));
2559*22dc650dSSadaf Ebrahimi }
2560*22dc650dSSadaf Ebrahimi
2561*22dc650dSSadaf Ebrahimi
2562*22dc650dSSadaf Ebrahimi
2563*22dc650dSSadaf Ebrahimi /*************************************************
2564*22dc650dSSadaf Ebrahimi * Grep an individual file *
2565*22dc650dSSadaf Ebrahimi *************************************************/
2566*22dc650dSSadaf Ebrahimi
2567*22dc650dSSadaf Ebrahimi /* This is called from grep_or_recurse() below. It uses a buffer that is three
2568*22dc650dSSadaf Ebrahimi times the value of bufthird. The matching point is never allowed to stray into
2569*22dc650dSSadaf Ebrahimi the top third of the buffer, thus keeping more of the file available for
2570*22dc650dSSadaf Ebrahimi context printing or for multiline scanning. For large files, the pointer will
2571*22dc650dSSadaf Ebrahimi be in the middle third most of the time, so the bottom third is available for
2572*22dc650dSSadaf Ebrahimi "before" context printing.
2573*22dc650dSSadaf Ebrahimi
2574*22dc650dSSadaf Ebrahimi Arguments:
2575*22dc650dSSadaf Ebrahimi handle the fopened FILE stream for a normal file
2576*22dc650dSSadaf Ebrahimi the gzFile pointer when reading is via libz
2577*22dc650dSSadaf Ebrahimi the BZFILE pointer when reading is via libbz2
2578*22dc650dSSadaf Ebrahimi frtype FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
2579*22dc650dSSadaf Ebrahimi filename the file name or NULL (for errors)
2580*22dc650dSSadaf Ebrahimi printname the file name if it is to be printed for each match
2581*22dc650dSSadaf Ebrahimi or NULL if the file name is not to be printed
2582*22dc650dSSadaf Ebrahimi it cannot be NULL if filenames[_nomatch]_only is set
2583*22dc650dSSadaf Ebrahimi
2584*22dc650dSSadaf Ebrahimi Returns: 0 if there was at least one match
2585*22dc650dSSadaf Ebrahimi 1 otherwise (no matches)
2586*22dc650dSSadaf Ebrahimi 2 if an overlong line is encountered
2587*22dc650dSSadaf Ebrahimi 3 if there is a read error on a .bz2 file
2588*22dc650dSSadaf Ebrahimi */
2589*22dc650dSSadaf Ebrahimi
2590*22dc650dSSadaf Ebrahimi static int
pcre2grep(void * handle,int frtype,const char * filename,const char * printname)2591*22dc650dSSadaf Ebrahimi pcre2grep(void *handle, int frtype, const char *filename, const char *printname)
2592*22dc650dSSadaf Ebrahimi {
2593*22dc650dSSadaf Ebrahimi int rc = 1;
2594*22dc650dSSadaf Ebrahimi int filepos = 0;
2595*22dc650dSSadaf Ebrahimi unsigned long int linenumber = 1;
2596*22dc650dSSadaf Ebrahimi unsigned long int lastmatchnumber = 0;
2597*22dc650dSSadaf Ebrahimi unsigned long int count = 0;
2598*22dc650dSSadaf Ebrahimi long int count_matched_lines = 0;
2599*22dc650dSSadaf Ebrahimi char *lastmatchrestart = main_buffer;
2600*22dc650dSSadaf Ebrahimi char *ptr = main_buffer;
2601*22dc650dSSadaf Ebrahimi char *endptr;
2602*22dc650dSSadaf Ebrahimi PCRE2_SIZE bufflength;
2603*22dc650dSSadaf Ebrahimi BOOL binary = FALSE;
2604*22dc650dSSadaf Ebrahimi BOOL endhyphenpending = FALSE;
2605*22dc650dSSadaf Ebrahimi BOOL lines_printed = FALSE;
2606*22dc650dSSadaf Ebrahimi BOOL input_line_buffered = line_buffered;
2607*22dc650dSSadaf Ebrahimi FILE *in = NULL; /* Ensure initialized */
2608*22dc650dSSadaf Ebrahimi long stream_start = -1; /* Only non-negative if relevant */
2609*22dc650dSSadaf Ebrahimi
2610*22dc650dSSadaf Ebrahimi /* Do the first read into the start of the buffer and set up the pointer to end
2611*22dc650dSSadaf Ebrahimi of what we have. In the case of libz, a non-zipped .gz file will be read as a
2612*22dc650dSSadaf Ebrahimi plain file. However, if a .bz2 file isn't actually bzipped, the first read will
2613*22dc650dSSadaf Ebrahimi fail. */
2614*22dc650dSSadaf Ebrahimi
2615*22dc650dSSadaf Ebrahimi if (frtype != FR_LIBZ && frtype != FR_LIBBZ2)
2616*22dc650dSSadaf Ebrahimi {
2617*22dc650dSSadaf Ebrahimi in = (FILE *)handle;
2618*22dc650dSSadaf Ebrahimi if (feof(in)) return 1;
2619*22dc650dSSadaf Ebrahimi if (is_file_tty(in)) input_line_buffered = TRUE;
2620*22dc650dSSadaf Ebrahimi else
2621*22dc650dSSadaf Ebrahimi {
2622*22dc650dSSadaf Ebrahimi if (count_limit >= 0 && filename == stdin_name)
2623*22dc650dSSadaf Ebrahimi stream_start = ftell(in);
2624*22dc650dSSadaf Ebrahimi }
2625*22dc650dSSadaf Ebrahimi }
2626*22dc650dSSadaf Ebrahimi else input_line_buffered = FALSE;
2627*22dc650dSSadaf Ebrahimi
2628*22dc650dSSadaf Ebrahimi bufflength = fill_buffer(handle, frtype, main_buffer, bufsize,
2629*22dc650dSSadaf Ebrahimi input_line_buffered);
2630*22dc650dSSadaf Ebrahimi
2631*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_LIBBZ2
2632*22dc650dSSadaf Ebrahimi if (frtype == FR_LIBBZ2 && (int)bufflength < 0) return 3; /* Gotcha: bufflength is PCRE2_SIZE */
2633*22dc650dSSadaf Ebrahimi #endif
2634*22dc650dSSadaf Ebrahimi
2635*22dc650dSSadaf Ebrahimi endptr = main_buffer + bufflength;
2636*22dc650dSSadaf Ebrahimi
2637*22dc650dSSadaf Ebrahimi /* Unless binary-files=text, see if we have a binary file. This uses the same
2638*22dc650dSSadaf Ebrahimi rule as GNU grep, namely, a search for a binary zero byte near the start of the
2639*22dc650dSSadaf Ebrahimi file. However, when the newline convention is binary zero, we can't do this. */
2640*22dc650dSSadaf Ebrahimi
2641*22dc650dSSadaf Ebrahimi if (binary_files != BIN_TEXT)
2642*22dc650dSSadaf Ebrahimi {
2643*22dc650dSSadaf Ebrahimi if (endlinetype != PCRE2_NEWLINE_NUL)
2644*22dc650dSSadaf Ebrahimi binary = memchr(main_buffer, 0, (bufflength > 1024)? 1024 : bufflength)
2645*22dc650dSSadaf Ebrahimi != NULL;
2646*22dc650dSSadaf Ebrahimi if (binary && binary_files == BIN_NOMATCH) return 1;
2647*22dc650dSSadaf Ebrahimi }
2648*22dc650dSSadaf Ebrahimi
2649*22dc650dSSadaf Ebrahimi /* Loop while the current pointer is not at the end of the file. For large
2650*22dc650dSSadaf Ebrahimi files, endptr will be at the end of the buffer when we are in the middle of the
2651*22dc650dSSadaf Ebrahimi file, but ptr will never get there, because as soon as it gets over 2/3 of the
2652*22dc650dSSadaf Ebrahimi way, the buffer is shifted left and re-filled. */
2653*22dc650dSSadaf Ebrahimi
2654*22dc650dSSadaf Ebrahimi while (ptr < endptr)
2655*22dc650dSSadaf Ebrahimi {
2656*22dc650dSSadaf Ebrahimi int endlinelength;
2657*22dc650dSSadaf Ebrahimi int mrc = 0;
2658*22dc650dSSadaf Ebrahimi unsigned int options = 0;
2659*22dc650dSSadaf Ebrahimi BOOL match;
2660*22dc650dSSadaf Ebrahimi BOOL line_matched = FALSE;
2661*22dc650dSSadaf Ebrahimi char *t = ptr;
2662*22dc650dSSadaf Ebrahimi PCRE2_SIZE length, linelength;
2663*22dc650dSSadaf Ebrahimi PCRE2_SIZE startoffset = 0;
2664*22dc650dSSadaf Ebrahimi
2665*22dc650dSSadaf Ebrahimi /* If the -m option set a limit for the number of matched or non-matched
2666*22dc650dSSadaf Ebrahimi lines, check it here. A limit of zero means that no matching is ever done.
2667*22dc650dSSadaf Ebrahimi For stdin from a file, set the file position. */
2668*22dc650dSSadaf Ebrahimi
2669*22dc650dSSadaf Ebrahimi if (count_limit >= 0 && count_matched_lines >= count_limit)
2670*22dc650dSSadaf Ebrahimi {
2671*22dc650dSSadaf Ebrahimi if (stream_start >= 0)
2672*22dc650dSSadaf Ebrahimi (void)fseek(handle, stream_start + (long int)filepos, SEEK_SET);
2673*22dc650dSSadaf Ebrahimi rc = (count_limit == 0)? 1 : 0;
2674*22dc650dSSadaf Ebrahimi break;
2675*22dc650dSSadaf Ebrahimi }
2676*22dc650dSSadaf Ebrahimi
2677*22dc650dSSadaf Ebrahimi /* At this point, ptr is at the start of a line. We need to find the length
2678*22dc650dSSadaf Ebrahimi of the subject string to pass to pcre2_match(). In multiline mode, it is the
2679*22dc650dSSadaf Ebrahimi length remainder of the data in the buffer. Otherwise, it is the length of
2680*22dc650dSSadaf Ebrahimi the next line, excluding the terminating newline. After matching, we always
2681*22dc650dSSadaf Ebrahimi advance by the length of the next line. In multiline mode the PCRE2_FIRSTLINE
2682*22dc650dSSadaf Ebrahimi option is used for compiling, so that any match is constrained to be in the
2683*22dc650dSSadaf Ebrahimi first line. */
2684*22dc650dSSadaf Ebrahimi
2685*22dc650dSSadaf Ebrahimi t = end_of_line(t, endptr, &endlinelength);
2686*22dc650dSSadaf Ebrahimi linelength = t - ptr - endlinelength;
2687*22dc650dSSadaf Ebrahimi length = multiline? (PCRE2_SIZE)(endptr - ptr) : linelength;
2688*22dc650dSSadaf Ebrahimi
2689*22dc650dSSadaf Ebrahimi /* Check to see if the line we are looking at extends right to the very end
2690*22dc650dSSadaf Ebrahimi of the buffer without a line terminator. This means the line is too long to
2691*22dc650dSSadaf Ebrahimi handle at the current buffer size. Until the buffer reaches its maximum size,
2692*22dc650dSSadaf Ebrahimi try doubling it and reading more data. */
2693*22dc650dSSadaf Ebrahimi
2694*22dc650dSSadaf Ebrahimi if (endlinelength == 0 && t == main_buffer + bufsize)
2695*22dc650dSSadaf Ebrahimi {
2696*22dc650dSSadaf Ebrahimi if (bufthird < max_bufthird)
2697*22dc650dSSadaf Ebrahimi {
2698*22dc650dSSadaf Ebrahimi char *new_buffer;
2699*22dc650dSSadaf Ebrahimi PCRE2_SIZE new_bufthird = 2*bufthird;
2700*22dc650dSSadaf Ebrahimi
2701*22dc650dSSadaf Ebrahimi if (new_bufthird > max_bufthird) new_bufthird = max_bufthird;
2702*22dc650dSSadaf Ebrahimi new_buffer = (char *)malloc(3*new_bufthird);
2703*22dc650dSSadaf Ebrahimi
2704*22dc650dSSadaf Ebrahimi if (new_buffer == NULL)
2705*22dc650dSSadaf Ebrahimi {
2706*22dc650dSSadaf Ebrahimi /* LCOV_EXCL_START */
2707*22dc650dSSadaf Ebrahimi fprintf(stderr,
2708*22dc650dSSadaf Ebrahimi "pcre2grep: line %lu%s%s is too long for the internal buffer\n"
2709*22dc650dSSadaf Ebrahimi "pcre2grep: not enough memory to increase the buffer size to %"
2710*22dc650dSSadaf Ebrahimi SIZ_FORM "\n",
2711*22dc650dSSadaf Ebrahimi linenumber,
2712*22dc650dSSadaf Ebrahimi (filename == NULL)? "" : " of file ",
2713*22dc650dSSadaf Ebrahimi (filename == NULL)? "" : filename,
2714*22dc650dSSadaf Ebrahimi new_bufthird);
2715*22dc650dSSadaf Ebrahimi return 2;
2716*22dc650dSSadaf Ebrahimi /* LCOV_EXCL_STOP */
2717*22dc650dSSadaf Ebrahimi }
2718*22dc650dSSadaf Ebrahimi
2719*22dc650dSSadaf Ebrahimi /* Copy the data and adjust pointers to the new buffer location. */
2720*22dc650dSSadaf Ebrahimi
2721*22dc650dSSadaf Ebrahimi memcpy(new_buffer, main_buffer, bufsize);
2722*22dc650dSSadaf Ebrahimi bufthird = new_bufthird;
2723*22dc650dSSadaf Ebrahimi bufsize = 3*bufthird;
2724*22dc650dSSadaf Ebrahimi ptr = new_buffer + (ptr - main_buffer);
2725*22dc650dSSadaf Ebrahimi lastmatchrestart = new_buffer + (lastmatchrestart - main_buffer);
2726*22dc650dSSadaf Ebrahimi free(main_buffer);
2727*22dc650dSSadaf Ebrahimi main_buffer = new_buffer;
2728*22dc650dSSadaf Ebrahimi
2729*22dc650dSSadaf Ebrahimi /* Read more data into the buffer and then try to find the line ending
2730*22dc650dSSadaf Ebrahimi again. */
2731*22dc650dSSadaf Ebrahimi
2732*22dc650dSSadaf Ebrahimi bufflength += fill_buffer(handle, frtype, main_buffer + bufflength,
2733*22dc650dSSadaf Ebrahimi bufsize - bufflength, input_line_buffered);
2734*22dc650dSSadaf Ebrahimi endptr = main_buffer + bufflength;
2735*22dc650dSSadaf Ebrahimi continue;
2736*22dc650dSSadaf Ebrahimi }
2737*22dc650dSSadaf Ebrahimi else
2738*22dc650dSSadaf Ebrahimi {
2739*22dc650dSSadaf Ebrahimi fprintf(stderr,
2740*22dc650dSSadaf Ebrahimi "pcre2grep: line %lu%s%s is too long for the internal buffer\n"
2741*22dc650dSSadaf Ebrahimi "pcre2grep: the maximum buffer size is %" SIZ_FORM "\n"
2742*22dc650dSSadaf Ebrahimi "pcre2grep: use the --max-buffer-size option to change it\n",
2743*22dc650dSSadaf Ebrahimi linenumber,
2744*22dc650dSSadaf Ebrahimi (filename == NULL)? "" : " of file ",
2745*22dc650dSSadaf Ebrahimi (filename == NULL)? "" : filename,
2746*22dc650dSSadaf Ebrahimi bufthird);
2747*22dc650dSSadaf Ebrahimi return 2;
2748*22dc650dSSadaf Ebrahimi }
2749*22dc650dSSadaf Ebrahimi }
2750*22dc650dSSadaf Ebrahimi
2751*22dc650dSSadaf Ebrahimi /* We come back here after a match when only_matching_count is non-zero, in
2752*22dc650dSSadaf Ebrahimi order to find any further matches in the same line. This applies to
2753*22dc650dSSadaf Ebrahimi --only-matching, --file-offsets, and --line-offsets. */
2754*22dc650dSSadaf Ebrahimi
2755*22dc650dSSadaf Ebrahimi ONLY_MATCHING_RESTART:
2756*22dc650dSSadaf Ebrahimi
2757*22dc650dSSadaf Ebrahimi /* Run through all the patterns until one matches or there is an error other
2758*22dc650dSSadaf Ebrahimi than NOMATCH. This code is in a subroutine so that it can be re-used for
2759*22dc650dSSadaf Ebrahimi finding subsequent matches when colouring matched lines. After finding one
2760*22dc650dSSadaf Ebrahimi match, set PCRE2_NOTEMPTY to disable any further matches of null strings in
2761*22dc650dSSadaf Ebrahimi this line. */
2762*22dc650dSSadaf Ebrahimi
2763*22dc650dSSadaf Ebrahimi match = match_patterns(ptr, length, options, startoffset, &mrc);
2764*22dc650dSSadaf Ebrahimi options = PCRE2_NOTEMPTY;
2765*22dc650dSSadaf Ebrahimi
2766*22dc650dSSadaf Ebrahimi /* If it's a match or a not-match (as required), do what's wanted. NOTE: Use
2767*22dc650dSSadaf Ebrahimi only FWRITE_IGNORE() - which is just a packaged fwrite() that ignores its
2768*22dc650dSSadaf Ebrahimi return code - to output data lines, so that binary zeroes are treated as just
2769*22dc650dSSadaf Ebrahimi another data character. */
2770*22dc650dSSadaf Ebrahimi
2771*22dc650dSSadaf Ebrahimi if (match != invert)
2772*22dc650dSSadaf Ebrahimi {
2773*22dc650dSSadaf Ebrahimi BOOL hyphenprinted = FALSE;
2774*22dc650dSSadaf Ebrahimi
2775*22dc650dSSadaf Ebrahimi /* We've failed if we want a file that doesn't have any matches. */
2776*22dc650dSSadaf Ebrahimi
2777*22dc650dSSadaf Ebrahimi if (filenames == FN_NOMATCH_ONLY) return 1;
2778*22dc650dSSadaf Ebrahimi
2779*22dc650dSSadaf Ebrahimi /* Remember that this line matched (for counting matched lines) */
2780*22dc650dSSadaf Ebrahimi
2781*22dc650dSSadaf Ebrahimi line_matched = TRUE;
2782*22dc650dSSadaf Ebrahimi
2783*22dc650dSSadaf Ebrahimi /* If all we want is a yes/no answer, we can return immediately. */
2784*22dc650dSSadaf Ebrahimi
2785*22dc650dSSadaf Ebrahimi if (quiet) return 0;
2786*22dc650dSSadaf Ebrahimi
2787*22dc650dSSadaf Ebrahimi /* Just count if just counting is wanted. */
2788*22dc650dSSadaf Ebrahimi
2789*22dc650dSSadaf Ebrahimi else if (count_only || show_total_count) count++;
2790*22dc650dSSadaf Ebrahimi
2791*22dc650dSSadaf Ebrahimi /* When handling a binary file and binary-files==binary, the "binary"
2792*22dc650dSSadaf Ebrahimi variable will be set true (it's false in all other cases). In this
2793*22dc650dSSadaf Ebrahimi situation we just want to output the file name. No need to scan further. */
2794*22dc650dSSadaf Ebrahimi
2795*22dc650dSSadaf Ebrahimi else if (binary)
2796*22dc650dSSadaf Ebrahimi {
2797*22dc650dSSadaf Ebrahimi fprintf(stdout, "Binary file %s matches" STDOUT_NL, filename);
2798*22dc650dSSadaf Ebrahimi return 0;
2799*22dc650dSSadaf Ebrahimi }
2800*22dc650dSSadaf Ebrahimi
2801*22dc650dSSadaf Ebrahimi /* Likewise, if all we want is a file name, there is no need to scan any
2802*22dc650dSSadaf Ebrahimi more lines in the file. */
2803*22dc650dSSadaf Ebrahimi
2804*22dc650dSSadaf Ebrahimi else if (filenames == FN_MATCH_ONLY)
2805*22dc650dSSadaf Ebrahimi {
2806*22dc650dSSadaf Ebrahimi fprintf(stdout, "%s", printname);
2807*22dc650dSSadaf Ebrahimi if (printname_nl == NULL) fprintf(stdout, "%c", 0);
2808*22dc650dSSadaf Ebrahimi else fprintf(stdout, "%s", printname_nl);
2809*22dc650dSSadaf Ebrahimi return 0;
2810*22dc650dSSadaf Ebrahimi }
2811*22dc650dSSadaf Ebrahimi
2812*22dc650dSSadaf Ebrahimi /* The --only-matching option prints just the substring that matched,
2813*22dc650dSSadaf Ebrahimi and/or one or more captured portions of it, as long as these strings are
2814*22dc650dSSadaf Ebrahimi not empty. The --file-offsets and --line-offsets options output offsets for
2815*22dc650dSSadaf Ebrahimi the matching substring (all three set only_matching_count non-zero). None
2816*22dc650dSSadaf Ebrahimi of these mutually exclusive options prints any context. Afterwards, adjust
2817*22dc650dSSadaf Ebrahimi the start and then jump back to look for further matches in the same line.
2818*22dc650dSSadaf Ebrahimi If we are in invert mode, however, nothing is printed and we do not restart
2819*22dc650dSSadaf Ebrahimi - this could still be useful because the return code is set. */
2820*22dc650dSSadaf Ebrahimi
2821*22dc650dSSadaf Ebrahimi else if (only_matching_count != 0)
2822*22dc650dSSadaf Ebrahimi {
2823*22dc650dSSadaf Ebrahimi if (!invert)
2824*22dc650dSSadaf Ebrahimi {
2825*22dc650dSSadaf Ebrahimi PCRE2_SIZE oldstartoffset;
2826*22dc650dSSadaf Ebrahimi
2827*22dc650dSSadaf Ebrahimi if (printname != NULL) fprintf(stdout, "%s%c", printname,
2828*22dc650dSSadaf Ebrahimi printname_colon);
2829*22dc650dSSadaf Ebrahimi if (number) fprintf(stdout, "%lu:", linenumber);
2830*22dc650dSSadaf Ebrahimi
2831*22dc650dSSadaf Ebrahimi /* Handle --line-offsets */
2832*22dc650dSSadaf Ebrahimi
2833*22dc650dSSadaf Ebrahimi if (line_offsets)
2834*22dc650dSSadaf Ebrahimi fprintf(stdout, "%d,%d" STDOUT_NL, (int)(ptr + offsets[0] - ptr),
2835*22dc650dSSadaf Ebrahimi (int)(offsets[1] - offsets[0]));
2836*22dc650dSSadaf Ebrahimi
2837*22dc650dSSadaf Ebrahimi /* Handle --file-offsets */
2838*22dc650dSSadaf Ebrahimi
2839*22dc650dSSadaf Ebrahimi else if (file_offsets)
2840*22dc650dSSadaf Ebrahimi fprintf(stdout, "%d,%d" STDOUT_NL,
2841*22dc650dSSadaf Ebrahimi (int)(filepos + ptr + offsets[0] - ptr),
2842*22dc650dSSadaf Ebrahimi (int)(offsets[1] - offsets[0]));
2843*22dc650dSSadaf Ebrahimi
2844*22dc650dSSadaf Ebrahimi /* Handle --output (which has already been syntax checked) */
2845*22dc650dSSadaf Ebrahimi
2846*22dc650dSSadaf Ebrahimi else if (output_text != NULL)
2847*22dc650dSSadaf Ebrahimi {
2848*22dc650dSSadaf Ebrahimi (void)display_output_text((PCRE2_SPTR)output_text, FALSE,
2849*22dc650dSSadaf Ebrahimi (PCRE2_SPTR)ptr, offsets, mrc);
2850*22dc650dSSadaf Ebrahimi fprintf(stdout, STDOUT_NL);
2851*22dc650dSSadaf Ebrahimi }
2852*22dc650dSSadaf Ebrahimi
2853*22dc650dSSadaf Ebrahimi /* Handle --only-matching, which may occur many times */
2854*22dc650dSSadaf Ebrahimi
2855*22dc650dSSadaf Ebrahimi else
2856*22dc650dSSadaf Ebrahimi {
2857*22dc650dSSadaf Ebrahimi BOOL printed = FALSE;
2858*22dc650dSSadaf Ebrahimi omstr *om;
2859*22dc650dSSadaf Ebrahimi
2860*22dc650dSSadaf Ebrahimi for (om = only_matching; om != NULL; om = om->next)
2861*22dc650dSSadaf Ebrahimi {
2862*22dc650dSSadaf Ebrahimi int n = om->groupnum;
2863*22dc650dSSadaf Ebrahimi if (n == 0 || n < mrc)
2864*22dc650dSSadaf Ebrahimi {
2865*22dc650dSSadaf Ebrahimi int plen = offsets[2*n + 1] - offsets[2*n];
2866*22dc650dSSadaf Ebrahimi if (plen > 0)
2867*22dc650dSSadaf Ebrahimi {
2868*22dc650dSSadaf Ebrahimi if (printed && om_separator != NULL)
2869*22dc650dSSadaf Ebrahimi fprintf(stdout, "%s", om_separator);
2870*22dc650dSSadaf Ebrahimi print_match(ptr + offsets[n*2], plen);
2871*22dc650dSSadaf Ebrahimi printed = TRUE;
2872*22dc650dSSadaf Ebrahimi }
2873*22dc650dSSadaf Ebrahimi }
2874*22dc650dSSadaf Ebrahimi }
2875*22dc650dSSadaf Ebrahimi if (printed || printname != NULL || number)
2876*22dc650dSSadaf Ebrahimi fprintf(stdout, STDOUT_NL);
2877*22dc650dSSadaf Ebrahimi }
2878*22dc650dSSadaf Ebrahimi
2879*22dc650dSSadaf Ebrahimi /* Prepare to repeat to find the next match in the line. */
2880*22dc650dSSadaf Ebrahimi
2881*22dc650dSSadaf Ebrahimi //match = FALSE;
2882*22dc650dSSadaf Ebrahimi if (line_buffered) fflush(stdout);
2883*22dc650dSSadaf Ebrahimi rc = 0; /* Had some success */
2884*22dc650dSSadaf Ebrahimi
2885*22dc650dSSadaf Ebrahimi /* If the pattern contained a lookbehind that included \K, it is
2886*22dc650dSSadaf Ebrahimi possible that the end of the match might be at or before the actual
2887*22dc650dSSadaf Ebrahimi starting offset we have just used. In this case, start one character
2888*22dc650dSSadaf Ebrahimi further on. */
2889*22dc650dSSadaf Ebrahimi
2890*22dc650dSSadaf Ebrahimi startoffset = offsets[1]; /* Restart after the match */
2891*22dc650dSSadaf Ebrahimi oldstartoffset = pcre2_get_startchar(match_data);
2892*22dc650dSSadaf Ebrahimi if (startoffset <= oldstartoffset)
2893*22dc650dSSadaf Ebrahimi {
2894*22dc650dSSadaf Ebrahimi if (startoffset >= length) goto END_ONE_MATCH; /* Were at end */
2895*22dc650dSSadaf Ebrahimi startoffset = oldstartoffset + 1;
2896*22dc650dSSadaf Ebrahimi if (utf) while ((ptr[startoffset] & 0xc0) == 0x80) startoffset++;
2897*22dc650dSSadaf Ebrahimi }
2898*22dc650dSSadaf Ebrahimi
2899*22dc650dSSadaf Ebrahimi /* If the current match ended past the end of the line (only possible
2900*22dc650dSSadaf Ebrahimi in multiline mode), we must move on to the line in which it did end
2901*22dc650dSSadaf Ebrahimi before searching for more matches. */
2902*22dc650dSSadaf Ebrahimi
2903*22dc650dSSadaf Ebrahimi while (startoffset > linelength)
2904*22dc650dSSadaf Ebrahimi {
2905*22dc650dSSadaf Ebrahimi ptr += linelength + endlinelength;
2906*22dc650dSSadaf Ebrahimi filepos += (int)(linelength + endlinelength);
2907*22dc650dSSadaf Ebrahimi linenumber++;
2908*22dc650dSSadaf Ebrahimi startoffset -= (int)(linelength + endlinelength);
2909*22dc650dSSadaf Ebrahimi t = end_of_line(ptr, endptr, &endlinelength);
2910*22dc650dSSadaf Ebrahimi linelength = t - ptr - endlinelength;
2911*22dc650dSSadaf Ebrahimi length = (PCRE2_SIZE)(endptr - ptr);
2912*22dc650dSSadaf Ebrahimi }
2913*22dc650dSSadaf Ebrahimi
2914*22dc650dSSadaf Ebrahimi goto ONLY_MATCHING_RESTART;
2915*22dc650dSSadaf Ebrahimi }
2916*22dc650dSSadaf Ebrahimi }
2917*22dc650dSSadaf Ebrahimi
2918*22dc650dSSadaf Ebrahimi /* This is the default case when none of the above options is set. We print
2919*22dc650dSSadaf Ebrahimi the matching lines(s), possibly preceded and/or followed by other lines of
2920*22dc650dSSadaf Ebrahimi context. */
2921*22dc650dSSadaf Ebrahimi
2922*22dc650dSSadaf Ebrahimi else
2923*22dc650dSSadaf Ebrahimi {
2924*22dc650dSSadaf Ebrahimi lines_printed = TRUE;
2925*22dc650dSSadaf Ebrahimi
2926*22dc650dSSadaf Ebrahimi /* See if there is a requirement to print some "after" lines from a
2927*22dc650dSSadaf Ebrahimi previous match. We never print any overlaps. */
2928*22dc650dSSadaf Ebrahimi
2929*22dc650dSSadaf Ebrahimi if (after_context > 0 && lastmatchnumber > 0)
2930*22dc650dSSadaf Ebrahimi {
2931*22dc650dSSadaf Ebrahimi int ellength;
2932*22dc650dSSadaf Ebrahimi int linecount = 0;
2933*22dc650dSSadaf Ebrahimi char *p = lastmatchrestart;
2934*22dc650dSSadaf Ebrahimi
2935*22dc650dSSadaf Ebrahimi while (p < ptr && linecount < after_context)
2936*22dc650dSSadaf Ebrahimi {
2937*22dc650dSSadaf Ebrahimi p = end_of_line(p, ptr, &ellength);
2938*22dc650dSSadaf Ebrahimi linecount++;
2939*22dc650dSSadaf Ebrahimi }
2940*22dc650dSSadaf Ebrahimi
2941*22dc650dSSadaf Ebrahimi /* It is important to advance lastmatchrestart during this printing so
2942*22dc650dSSadaf Ebrahimi that it interacts correctly with any "before" printing below. Print
2943*22dc650dSSadaf Ebrahimi each line's data using fwrite() in case there are binary zeroes. */
2944*22dc650dSSadaf Ebrahimi
2945*22dc650dSSadaf Ebrahimi while (lastmatchrestart < p)
2946*22dc650dSSadaf Ebrahimi {
2947*22dc650dSSadaf Ebrahimi char *pp = lastmatchrestart;
2948*22dc650dSSadaf Ebrahimi if (printname != NULL) fprintf(stdout, "%s%c", printname,
2949*22dc650dSSadaf Ebrahimi printname_hyphen);
2950*22dc650dSSadaf Ebrahimi if (number) fprintf(stdout, "%lu-", lastmatchnumber++);
2951*22dc650dSSadaf Ebrahimi pp = end_of_line(pp, endptr, &ellength);
2952*22dc650dSSadaf Ebrahimi FWRITE_IGNORE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
2953*22dc650dSSadaf Ebrahimi lastmatchrestart = pp;
2954*22dc650dSSadaf Ebrahimi }
2955*22dc650dSSadaf Ebrahimi if (lastmatchrestart != ptr) hyphenpending = TRUE;
2956*22dc650dSSadaf Ebrahimi }
2957*22dc650dSSadaf Ebrahimi
2958*22dc650dSSadaf Ebrahimi /* If there were non-contiguous lines printed above, insert hyphens. */
2959*22dc650dSSadaf Ebrahimi
2960*22dc650dSSadaf Ebrahimi if (hyphenpending)
2961*22dc650dSSadaf Ebrahimi {
2962*22dc650dSSadaf Ebrahimi if (group_separator != NULL)
2963*22dc650dSSadaf Ebrahimi fprintf(stdout, "%s%s", group_separator, STDOUT_NL);
2964*22dc650dSSadaf Ebrahimi hyphenpending = FALSE;
2965*22dc650dSSadaf Ebrahimi hyphenprinted = TRUE;
2966*22dc650dSSadaf Ebrahimi }
2967*22dc650dSSadaf Ebrahimi
2968*22dc650dSSadaf Ebrahimi /* See if there is a requirement to print some "before" lines for this
2969*22dc650dSSadaf Ebrahimi match. Again, don't print overlaps. */
2970*22dc650dSSadaf Ebrahimi
2971*22dc650dSSadaf Ebrahimi if (before_context > 0)
2972*22dc650dSSadaf Ebrahimi {
2973*22dc650dSSadaf Ebrahimi int linecount = 0;
2974*22dc650dSSadaf Ebrahimi char *p = ptr;
2975*22dc650dSSadaf Ebrahimi
2976*22dc650dSSadaf Ebrahimi while (p > main_buffer &&
2977*22dc650dSSadaf Ebrahimi (lastmatchnumber == 0 || p > lastmatchrestart) &&
2978*22dc650dSSadaf Ebrahimi linecount < before_context)
2979*22dc650dSSadaf Ebrahimi {
2980*22dc650dSSadaf Ebrahimi linecount++;
2981*22dc650dSSadaf Ebrahimi p = previous_line(p, main_buffer);
2982*22dc650dSSadaf Ebrahimi }
2983*22dc650dSSadaf Ebrahimi
2984*22dc650dSSadaf Ebrahimi if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted &&
2985*22dc650dSSadaf Ebrahimi group_separator != NULL)
2986*22dc650dSSadaf Ebrahimi fprintf(stdout, "%s%s", group_separator, STDOUT_NL);
2987*22dc650dSSadaf Ebrahimi
2988*22dc650dSSadaf Ebrahimi while (p < ptr)
2989*22dc650dSSadaf Ebrahimi {
2990*22dc650dSSadaf Ebrahimi int ellength;
2991*22dc650dSSadaf Ebrahimi char *pp = p;
2992*22dc650dSSadaf Ebrahimi if (printname != NULL) fprintf(stdout, "%s%c", printname,
2993*22dc650dSSadaf Ebrahimi printname_hyphen);
2994*22dc650dSSadaf Ebrahimi if (number) fprintf(stdout, "%lu-", linenumber - linecount--);
2995*22dc650dSSadaf Ebrahimi pp = end_of_line(pp, endptr, &ellength);
2996*22dc650dSSadaf Ebrahimi FWRITE_IGNORE(p, 1, pp - p, stdout);
2997*22dc650dSSadaf Ebrahimi p = pp;
2998*22dc650dSSadaf Ebrahimi }
2999*22dc650dSSadaf Ebrahimi }
3000*22dc650dSSadaf Ebrahimi
3001*22dc650dSSadaf Ebrahimi /* Now print the matching line(s); ensure we set hyphenpending at the end
3002*22dc650dSSadaf Ebrahimi of the file if any context lines are being output. */
3003*22dc650dSSadaf Ebrahimi
3004*22dc650dSSadaf Ebrahimi if (after_context > 0 || before_context > 0)
3005*22dc650dSSadaf Ebrahimi endhyphenpending = TRUE;
3006*22dc650dSSadaf Ebrahimi
3007*22dc650dSSadaf Ebrahimi if (printname != NULL) fprintf(stdout, "%s%c", printname,
3008*22dc650dSSadaf Ebrahimi printname_colon);
3009*22dc650dSSadaf Ebrahimi if (number) fprintf(stdout, "%lu:", linenumber);
3010*22dc650dSSadaf Ebrahimi
3011*22dc650dSSadaf Ebrahimi /* In multiline mode, or if colouring, we have to split the line(s) up
3012*22dc650dSSadaf Ebrahimi and search for further matches, but not of course if the line is a
3013*22dc650dSSadaf Ebrahimi non-match. In multiline mode this is necessary in case there is another
3014*22dc650dSSadaf Ebrahimi match that spans the end of the current line. When colouring we want to
3015*22dc650dSSadaf Ebrahimi colour all matches. */
3016*22dc650dSSadaf Ebrahimi
3017*22dc650dSSadaf Ebrahimi if ((multiline || do_colour) && !invert)
3018*22dc650dSSadaf Ebrahimi {
3019*22dc650dSSadaf Ebrahimi int plength;
3020*22dc650dSSadaf Ebrahimi PCRE2_SIZE endprevious;
3021*22dc650dSSadaf Ebrahimi
3022*22dc650dSSadaf Ebrahimi /* The use of \K may make the end offset earlier than the start. In
3023*22dc650dSSadaf Ebrahimi this situation, swap them round. */
3024*22dc650dSSadaf Ebrahimi
3025*22dc650dSSadaf Ebrahimi if (offsets[0] > offsets[1])
3026*22dc650dSSadaf Ebrahimi {
3027*22dc650dSSadaf Ebrahimi PCRE2_SIZE temp = offsets[0];
3028*22dc650dSSadaf Ebrahimi offsets[0] = offsets[1];
3029*22dc650dSSadaf Ebrahimi offsets[1] = temp;
3030*22dc650dSSadaf Ebrahimi }
3031*22dc650dSSadaf Ebrahimi
3032*22dc650dSSadaf Ebrahimi FWRITE_IGNORE(ptr, 1, offsets[0], stdout);
3033*22dc650dSSadaf Ebrahimi print_match(ptr + offsets[0], offsets[1] - offsets[0]);
3034*22dc650dSSadaf Ebrahimi
3035*22dc650dSSadaf Ebrahimi for (;;)
3036*22dc650dSSadaf Ebrahimi {
3037*22dc650dSSadaf Ebrahimi PCRE2_SIZE oldstartoffset = pcre2_get_startchar(match_data);
3038*22dc650dSSadaf Ebrahimi
3039*22dc650dSSadaf Ebrahimi endprevious = offsets[1];
3040*22dc650dSSadaf Ebrahimi startoffset = endprevious; /* Advance after previous match. */
3041*22dc650dSSadaf Ebrahimi
3042*22dc650dSSadaf Ebrahimi /* If the pattern contained a lookbehind that included \K, it is
3043*22dc650dSSadaf Ebrahimi possible that the end of the match might be at or before the actual
3044*22dc650dSSadaf Ebrahimi starting offset we have just used. In this case, start one character
3045*22dc650dSSadaf Ebrahimi further on. */
3046*22dc650dSSadaf Ebrahimi
3047*22dc650dSSadaf Ebrahimi if (startoffset <= oldstartoffset)
3048*22dc650dSSadaf Ebrahimi {
3049*22dc650dSSadaf Ebrahimi startoffset = oldstartoffset + 1;
3050*22dc650dSSadaf Ebrahimi if (utf) while ((ptr[startoffset] & 0xc0) == 0x80) startoffset++;
3051*22dc650dSSadaf Ebrahimi }
3052*22dc650dSSadaf Ebrahimi
3053*22dc650dSSadaf Ebrahimi /* If the current match ended past the end of the line (only possible
3054*22dc650dSSadaf Ebrahimi in multiline mode), we must move on to the line in which it did end
3055*22dc650dSSadaf Ebrahimi before searching for more matches. Because the PCRE2_FIRSTLINE option
3056*22dc650dSSadaf Ebrahimi is set, the start of the match will always be before the first
3057*22dc650dSSadaf Ebrahimi newline sequence. */
3058*22dc650dSSadaf Ebrahimi
3059*22dc650dSSadaf Ebrahimi while (startoffset > linelength + endlinelength)
3060*22dc650dSSadaf Ebrahimi {
3061*22dc650dSSadaf Ebrahimi ptr += linelength + endlinelength;
3062*22dc650dSSadaf Ebrahimi filepos += (int)(linelength + endlinelength);
3063*22dc650dSSadaf Ebrahimi linenumber++;
3064*22dc650dSSadaf Ebrahimi startoffset -= (int)(linelength + endlinelength);
3065*22dc650dSSadaf Ebrahimi endprevious -= (int)(linelength + endlinelength);
3066*22dc650dSSadaf Ebrahimi t = end_of_line(ptr, endptr, &endlinelength);
3067*22dc650dSSadaf Ebrahimi linelength = t - ptr - endlinelength;
3068*22dc650dSSadaf Ebrahimi length = (PCRE2_SIZE)(endptr - ptr);
3069*22dc650dSSadaf Ebrahimi }
3070*22dc650dSSadaf Ebrahimi
3071*22dc650dSSadaf Ebrahimi /* If startoffset is at the exact end of the line it means this
3072*22dc650dSSadaf Ebrahimi complete line was the final part of the match, so there is nothing
3073*22dc650dSSadaf Ebrahimi more to do. */
3074*22dc650dSSadaf Ebrahimi
3075*22dc650dSSadaf Ebrahimi if (startoffset == linelength + endlinelength) break;
3076*22dc650dSSadaf Ebrahimi
3077*22dc650dSSadaf Ebrahimi /* Otherwise, run a match from within the final line, and if found,
3078*22dc650dSSadaf Ebrahimi loop for any that may follow. */
3079*22dc650dSSadaf Ebrahimi
3080*22dc650dSSadaf Ebrahimi if (!match_patterns(ptr, length, options, startoffset, &mrc)) break;
3081*22dc650dSSadaf Ebrahimi
3082*22dc650dSSadaf Ebrahimi /* The use of \K may make the end offset earlier than the start. In
3083*22dc650dSSadaf Ebrahimi this situation, swap them round. */
3084*22dc650dSSadaf Ebrahimi
3085*22dc650dSSadaf Ebrahimi if (offsets[0] > offsets[1])
3086*22dc650dSSadaf Ebrahimi {
3087*22dc650dSSadaf Ebrahimi PCRE2_SIZE temp = offsets[0];
3088*22dc650dSSadaf Ebrahimi offsets[0] = offsets[1];
3089*22dc650dSSadaf Ebrahimi offsets[1] = temp;
3090*22dc650dSSadaf Ebrahimi }
3091*22dc650dSSadaf Ebrahimi
3092*22dc650dSSadaf Ebrahimi FWRITE_IGNORE(ptr + endprevious, 1, offsets[0] - endprevious, stdout);
3093*22dc650dSSadaf Ebrahimi print_match(ptr + offsets[0], offsets[1] - offsets[0]);
3094*22dc650dSSadaf Ebrahimi }
3095*22dc650dSSadaf Ebrahimi
3096*22dc650dSSadaf Ebrahimi /* In multiline mode, we may have already printed the complete line
3097*22dc650dSSadaf Ebrahimi and its line-ending characters (if they matched the pattern), so there
3098*22dc650dSSadaf Ebrahimi may be no more to print. */
3099*22dc650dSSadaf Ebrahimi
3100*22dc650dSSadaf Ebrahimi plength = (int)((linelength + endlinelength) - endprevious);
3101*22dc650dSSadaf Ebrahimi if (plength > 0) FWRITE_IGNORE(ptr + endprevious, 1, plength, stdout);
3102*22dc650dSSadaf Ebrahimi }
3103*22dc650dSSadaf Ebrahimi
3104*22dc650dSSadaf Ebrahimi /* Not colouring or multiline; no need to search for further matches. */
3105*22dc650dSSadaf Ebrahimi
3106*22dc650dSSadaf Ebrahimi else FWRITE_IGNORE(ptr, 1, linelength + endlinelength, stdout);
3107*22dc650dSSadaf Ebrahimi }
3108*22dc650dSSadaf Ebrahimi
3109*22dc650dSSadaf Ebrahimi /* End of doing what has to be done for a match. If --line-buffered was
3110*22dc650dSSadaf Ebrahimi given, flush the output. */
3111*22dc650dSSadaf Ebrahimi
3112*22dc650dSSadaf Ebrahimi if (line_buffered) fflush(stdout);
3113*22dc650dSSadaf Ebrahimi rc = 0; /* Had some success */
3114*22dc650dSSadaf Ebrahimi
3115*22dc650dSSadaf Ebrahimi /* Remember where the last match happened for after_context. We remember
3116*22dc650dSSadaf Ebrahimi where we are about to restart, and that line's number. */
3117*22dc650dSSadaf Ebrahimi
3118*22dc650dSSadaf Ebrahimi lastmatchrestart = ptr + linelength + endlinelength;
3119*22dc650dSSadaf Ebrahimi lastmatchnumber = linenumber + 1;
3120*22dc650dSSadaf Ebrahimi
3121*22dc650dSSadaf Ebrahimi /* If a line was printed and we are now at the end of the file and the last
3122*22dc650dSSadaf Ebrahimi line had no newline, output one. */
3123*22dc650dSSadaf Ebrahimi
3124*22dc650dSSadaf Ebrahimi if (lines_printed && lastmatchrestart >= endptr && endlinelength == 0)
3125*22dc650dSSadaf Ebrahimi write_final_newline();
3126*22dc650dSSadaf Ebrahimi }
3127*22dc650dSSadaf Ebrahimi
3128*22dc650dSSadaf Ebrahimi /* For a match in multiline inverted mode (which of course did not cause
3129*22dc650dSSadaf Ebrahimi anything to be printed), we have to move on to the end of the match before
3130*22dc650dSSadaf Ebrahimi proceeding. */
3131*22dc650dSSadaf Ebrahimi
3132*22dc650dSSadaf Ebrahimi if (multiline && invert && match)
3133*22dc650dSSadaf Ebrahimi {
3134*22dc650dSSadaf Ebrahimi int ellength;
3135*22dc650dSSadaf Ebrahimi char *endmatch = ptr + offsets[1];
3136*22dc650dSSadaf Ebrahimi t = ptr;
3137*22dc650dSSadaf Ebrahimi while (t < endmatch)
3138*22dc650dSSadaf Ebrahimi {
3139*22dc650dSSadaf Ebrahimi t = end_of_line(t, endptr, &ellength);
3140*22dc650dSSadaf Ebrahimi if (t <= endmatch) linenumber++; else break;
3141*22dc650dSSadaf Ebrahimi }
3142*22dc650dSSadaf Ebrahimi endmatch = end_of_line(endmatch, endptr, &ellength);
3143*22dc650dSSadaf Ebrahimi linelength = endmatch - ptr - ellength;
3144*22dc650dSSadaf Ebrahimi }
3145*22dc650dSSadaf Ebrahimi
3146*22dc650dSSadaf Ebrahimi /* Advance to after the newline and increment the line number. The file
3147*22dc650dSSadaf Ebrahimi offset to the current line is maintained in filepos. */
3148*22dc650dSSadaf Ebrahimi
3149*22dc650dSSadaf Ebrahimi END_ONE_MATCH:
3150*22dc650dSSadaf Ebrahimi ptr += linelength + endlinelength;
3151*22dc650dSSadaf Ebrahimi filepos += (int)(linelength + endlinelength);
3152*22dc650dSSadaf Ebrahimi linenumber++;
3153*22dc650dSSadaf Ebrahimi
3154*22dc650dSSadaf Ebrahimi /* If there was at least one match (or a non-match, as required) in the line,
3155*22dc650dSSadaf Ebrahimi increment the count for the -m option. */
3156*22dc650dSSadaf Ebrahimi
3157*22dc650dSSadaf Ebrahimi if (line_matched) count_matched_lines++;
3158*22dc650dSSadaf Ebrahimi
3159*22dc650dSSadaf Ebrahimi /* If input is line buffered, and the buffer is not yet full, read another
3160*22dc650dSSadaf Ebrahimi line and add it into the buffer. */
3161*22dc650dSSadaf Ebrahimi
3162*22dc650dSSadaf Ebrahimi if (input_line_buffered && bufflength < (PCRE2_SIZE)bufsize)
3163*22dc650dSSadaf Ebrahimi {
3164*22dc650dSSadaf Ebrahimi PCRE2_SIZE add = read_one_line(ptr, bufsize - (ptr - main_buffer), in);
3165*22dc650dSSadaf Ebrahimi bufflength += add;
3166*22dc650dSSadaf Ebrahimi endptr += add;
3167*22dc650dSSadaf Ebrahimi }
3168*22dc650dSSadaf Ebrahimi
3169*22dc650dSSadaf Ebrahimi /* If we haven't yet reached the end of the file (the buffer is full), and
3170*22dc650dSSadaf Ebrahimi the current point is in the top 1/3 of the buffer, slide the buffer down by
3171*22dc650dSSadaf Ebrahimi 1/3 and refill it. Before we do this, if some unprinted "after" lines are
3172*22dc650dSSadaf Ebrahimi about to be lost, print them. */
3173*22dc650dSSadaf Ebrahimi
3174*22dc650dSSadaf Ebrahimi if (bufflength >= (PCRE2_SIZE)bufsize && ptr > main_buffer + 2*bufthird)
3175*22dc650dSSadaf Ebrahimi {
3176*22dc650dSSadaf Ebrahimi if (after_context > 0 &&
3177*22dc650dSSadaf Ebrahimi lastmatchnumber > 0 &&
3178*22dc650dSSadaf Ebrahimi lastmatchrestart < main_buffer + bufthird)
3179*22dc650dSSadaf Ebrahimi {
3180*22dc650dSSadaf Ebrahimi do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
3181*22dc650dSSadaf Ebrahimi lastmatchnumber = 0; /* Indicates no after lines pending */
3182*22dc650dSSadaf Ebrahimi }
3183*22dc650dSSadaf Ebrahimi
3184*22dc650dSSadaf Ebrahimi /* Now do the shuffle */
3185*22dc650dSSadaf Ebrahimi
3186*22dc650dSSadaf Ebrahimi (void)memmove(main_buffer, main_buffer + bufthird, 2*bufthird);
3187*22dc650dSSadaf Ebrahimi ptr -= bufthird;
3188*22dc650dSSadaf Ebrahimi
3189*22dc650dSSadaf Ebrahimi bufflength = 2*bufthird + fill_buffer(handle, frtype,
3190*22dc650dSSadaf Ebrahimi main_buffer + 2*bufthird, bufthird, input_line_buffered);
3191*22dc650dSSadaf Ebrahimi endptr = main_buffer + bufflength;
3192*22dc650dSSadaf Ebrahimi
3193*22dc650dSSadaf Ebrahimi /* Adjust any last match point */
3194*22dc650dSSadaf Ebrahimi
3195*22dc650dSSadaf Ebrahimi if (lastmatchnumber > 0) lastmatchrestart -= bufthird;
3196*22dc650dSSadaf Ebrahimi }
3197*22dc650dSSadaf Ebrahimi } /* Loop through the whole file */
3198*22dc650dSSadaf Ebrahimi
3199*22dc650dSSadaf Ebrahimi /* End of file; print final "after" lines if wanted; do_after_lines sets
3200*22dc650dSSadaf Ebrahimi hyphenpending if it prints something. */
3201*22dc650dSSadaf Ebrahimi
3202*22dc650dSSadaf Ebrahimi if (only_matching_count == 0 && !(count_only|show_total_count))
3203*22dc650dSSadaf Ebrahimi {
3204*22dc650dSSadaf Ebrahimi do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
3205*22dc650dSSadaf Ebrahimi hyphenpending |= endhyphenpending;
3206*22dc650dSSadaf Ebrahimi }
3207*22dc650dSSadaf Ebrahimi
3208*22dc650dSSadaf Ebrahimi /* Print the file name if we are looking for those without matches and there
3209*22dc650dSSadaf Ebrahimi were none. If we found a match, we won't have got this far. */
3210*22dc650dSSadaf Ebrahimi
3211*22dc650dSSadaf Ebrahimi if (filenames == FN_NOMATCH_ONLY)
3212*22dc650dSSadaf Ebrahimi {
3213*22dc650dSSadaf Ebrahimi fprintf(stdout, "%s", printname);
3214*22dc650dSSadaf Ebrahimi if (printname_nl == NULL) fprintf(stdout, "%c", 0);
3215*22dc650dSSadaf Ebrahimi else fprintf(stdout, "%s", printname_nl);
3216*22dc650dSSadaf Ebrahimi return 0;
3217*22dc650dSSadaf Ebrahimi }
3218*22dc650dSSadaf Ebrahimi
3219*22dc650dSSadaf Ebrahimi /* Print the match count if wanted */
3220*22dc650dSSadaf Ebrahimi
3221*22dc650dSSadaf Ebrahimi if (count_only && !quiet)
3222*22dc650dSSadaf Ebrahimi {
3223*22dc650dSSadaf Ebrahimi if (count > 0 || !omit_zero_count)
3224*22dc650dSSadaf Ebrahimi {
3225*22dc650dSSadaf Ebrahimi if (printname != NULL && filenames != FN_NONE)
3226*22dc650dSSadaf Ebrahimi fprintf(stdout, "%s%c", printname, printname_colon);
3227*22dc650dSSadaf Ebrahimi fprintf(stdout, "%lu" STDOUT_NL, count);
3228*22dc650dSSadaf Ebrahimi counts_printed++;
3229*22dc650dSSadaf Ebrahimi }
3230*22dc650dSSadaf Ebrahimi }
3231*22dc650dSSadaf Ebrahimi
3232*22dc650dSSadaf Ebrahimi total_count += count; /* Can be set without count_only */
3233*22dc650dSSadaf Ebrahimi return rc;
3234*22dc650dSSadaf Ebrahimi }
3235*22dc650dSSadaf Ebrahimi
3236*22dc650dSSadaf Ebrahimi
3237*22dc650dSSadaf Ebrahimi
3238*22dc650dSSadaf Ebrahimi /*************************************************
3239*22dc650dSSadaf Ebrahimi * Grep a file or recurse into a directory *
3240*22dc650dSSadaf Ebrahimi *************************************************/
3241*22dc650dSSadaf Ebrahimi
3242*22dc650dSSadaf Ebrahimi /* Given a path name, if it's a directory, scan all the files if we are
3243*22dc650dSSadaf Ebrahimi recursing; if it's a file, grep it.
3244*22dc650dSSadaf Ebrahimi
3245*22dc650dSSadaf Ebrahimi Arguments:
3246*22dc650dSSadaf Ebrahimi pathname the path to investigate
3247*22dc650dSSadaf Ebrahimi dir_recurse TRUE if recursing is wanted (-r or -drecurse)
3248*22dc650dSSadaf Ebrahimi only_one_at_top TRUE if the path is the only one at toplevel
3249*22dc650dSSadaf Ebrahimi
3250*22dc650dSSadaf Ebrahimi Returns: -1 the file/directory was skipped
3251*22dc650dSSadaf Ebrahimi 0 if there was at least one match
3252*22dc650dSSadaf Ebrahimi 1 if there were no matches
3253*22dc650dSSadaf Ebrahimi 2 there was some kind of error
3254*22dc650dSSadaf Ebrahimi
3255*22dc650dSSadaf Ebrahimi However, file opening failures are suppressed if "silent" is set.
3256*22dc650dSSadaf Ebrahimi */
3257*22dc650dSSadaf Ebrahimi
3258*22dc650dSSadaf Ebrahimi static int
grep_or_recurse(char * pathname,BOOL dir_recurse,BOOL only_one_at_top)3259*22dc650dSSadaf Ebrahimi grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
3260*22dc650dSSadaf Ebrahimi {
3261*22dc650dSSadaf Ebrahimi int rc = 1;
3262*22dc650dSSadaf Ebrahimi int frtype;
3263*22dc650dSSadaf Ebrahimi void *handle;
3264*22dc650dSSadaf Ebrahimi char *lastcomp;
3265*22dc650dSSadaf Ebrahimi FILE *in = NULL; /* Ensure initialized */
3266*22dc650dSSadaf Ebrahimi
3267*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_LIBZ
3268*22dc650dSSadaf Ebrahimi gzFile ingz = NULL;
3269*22dc650dSSadaf Ebrahimi #endif
3270*22dc650dSSadaf Ebrahimi
3271*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_LIBBZ2
3272*22dc650dSSadaf Ebrahimi BZFILE *inbz2 = NULL;
3273*22dc650dSSadaf Ebrahimi #endif
3274*22dc650dSSadaf Ebrahimi
3275*22dc650dSSadaf Ebrahimi #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
3276*22dc650dSSadaf Ebrahimi int pathlen;
3277*22dc650dSSadaf Ebrahimi #endif
3278*22dc650dSSadaf Ebrahimi
3279*22dc650dSSadaf Ebrahimi #if defined NATIVE_ZOS
3280*22dc650dSSadaf Ebrahimi int zos_type;
3281*22dc650dSSadaf Ebrahimi FILE *zos_test_file;
3282*22dc650dSSadaf Ebrahimi #endif
3283*22dc650dSSadaf Ebrahimi
3284*22dc650dSSadaf Ebrahimi /* If the file name is "-" we scan stdin */
3285*22dc650dSSadaf Ebrahimi
3286*22dc650dSSadaf Ebrahimi if (strcmp(pathname, "-") == 0)
3287*22dc650dSSadaf Ebrahimi {
3288*22dc650dSSadaf Ebrahimi if (count_limit >= 0) setbuf(stdin, NULL);
3289*22dc650dSSadaf Ebrahimi return pcre2grep(stdin, FR_PLAIN, stdin_name,
3290*22dc650dSSadaf Ebrahimi (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
3291*22dc650dSSadaf Ebrahimi stdin_name : NULL);
3292*22dc650dSSadaf Ebrahimi }
3293*22dc650dSSadaf Ebrahimi
3294*22dc650dSSadaf Ebrahimi /* Inclusion and exclusion: --include-dir and --exclude-dir apply only to
3295*22dc650dSSadaf Ebrahimi directories, whereas --include and --exclude apply to everything else. The test
3296*22dc650dSSadaf Ebrahimi is against the final component of the path. */
3297*22dc650dSSadaf Ebrahimi
3298*22dc650dSSadaf Ebrahimi lastcomp = strrchr(pathname, FILESEP);
3299*22dc650dSSadaf Ebrahimi lastcomp = (lastcomp == NULL)? pathname : lastcomp + 1;
3300*22dc650dSSadaf Ebrahimi
3301*22dc650dSSadaf Ebrahimi /* If the file is a directory, skip if not recursing or if explicitly excluded.
3302*22dc650dSSadaf Ebrahimi Otherwise, scan the directory and recurse for each path within it. The scanning
3303*22dc650dSSadaf Ebrahimi code is localized so it can be made system-specific. */
3304*22dc650dSSadaf Ebrahimi
3305*22dc650dSSadaf Ebrahimi
3306*22dc650dSSadaf Ebrahimi /* For z/OS, determine the file type. */
3307*22dc650dSSadaf Ebrahimi
3308*22dc650dSSadaf Ebrahimi #if defined NATIVE_ZOS
3309*22dc650dSSadaf Ebrahimi zos_test_file = fopen(pathname,"rb");
3310*22dc650dSSadaf Ebrahimi
3311*22dc650dSSadaf Ebrahimi if (zos_test_file == NULL)
3312*22dc650dSSadaf Ebrahimi {
3313*22dc650dSSadaf Ebrahimi if (!silent) fprintf(stderr, "pcre2grep: failed to test next file %s\n",
3314*22dc650dSSadaf Ebrahimi pathname, strerror(errno));
3315*22dc650dSSadaf Ebrahimi return -1;
3316*22dc650dSSadaf Ebrahimi }
3317*22dc650dSSadaf Ebrahimi zos_type = identifyzosfiletype (zos_test_file);
3318*22dc650dSSadaf Ebrahimi fclose (zos_test_file);
3319*22dc650dSSadaf Ebrahimi
3320*22dc650dSSadaf Ebrahimi /* Handle a PDS in separate code */
3321*22dc650dSSadaf Ebrahimi
3322*22dc650dSSadaf Ebrahimi if (zos_type == __ZOS_PDS || zos_type == __ZOS_PDSE)
3323*22dc650dSSadaf Ebrahimi {
3324*22dc650dSSadaf Ebrahimi return travelonpdsdir (pathname, only_one_at_top);
3325*22dc650dSSadaf Ebrahimi }
3326*22dc650dSSadaf Ebrahimi
3327*22dc650dSSadaf Ebrahimi /* Deal with regular files in the normal way below. These types are:
3328*22dc650dSSadaf Ebrahimi zos_type == __ZOS_PDS_MEMBER
3329*22dc650dSSadaf Ebrahimi zos_type == __ZOS_PS
3330*22dc650dSSadaf Ebrahimi zos_type == __ZOS_VSAM_KSDS
3331*22dc650dSSadaf Ebrahimi zos_type == __ZOS_VSAM_ESDS
3332*22dc650dSSadaf Ebrahimi zos_type == __ZOS_VSAM_RRDS
3333*22dc650dSSadaf Ebrahimi */
3334*22dc650dSSadaf Ebrahimi
3335*22dc650dSSadaf Ebrahimi /* Handle a z/OS directory using common code. */
3336*22dc650dSSadaf Ebrahimi
3337*22dc650dSSadaf Ebrahimi else if (zos_type == __ZOS_HFS)
3338*22dc650dSSadaf Ebrahimi {
3339*22dc650dSSadaf Ebrahimi #endif /* NATIVE_ZOS */
3340*22dc650dSSadaf Ebrahimi
3341*22dc650dSSadaf Ebrahimi
3342*22dc650dSSadaf Ebrahimi /* Handle directories: common code for all OS */
3343*22dc650dSSadaf Ebrahimi
3344*22dc650dSSadaf Ebrahimi if (isdirectory(pathname))
3345*22dc650dSSadaf Ebrahimi {
3346*22dc650dSSadaf Ebrahimi if (dee_action == dee_SKIP ||
3347*22dc650dSSadaf Ebrahimi !test_incexc(lastcomp, include_dir_patterns, exclude_dir_patterns))
3348*22dc650dSSadaf Ebrahimi return -1;
3349*22dc650dSSadaf Ebrahimi
3350*22dc650dSSadaf Ebrahimi if (dee_action == dee_RECURSE)
3351*22dc650dSSadaf Ebrahimi {
3352*22dc650dSSadaf Ebrahimi char childpath[FNBUFSIZ];
3353*22dc650dSSadaf Ebrahimi char *nextfile;
3354*22dc650dSSadaf Ebrahimi directory_type *dir = opendirectory(pathname);
3355*22dc650dSSadaf Ebrahimi
3356*22dc650dSSadaf Ebrahimi if (dir == NULL)
3357*22dc650dSSadaf Ebrahimi {
3358*22dc650dSSadaf Ebrahimi /* LCOV_EXCL_START - this is a "never" event */
3359*22dc650dSSadaf Ebrahimi if (!silent)
3360*22dc650dSSadaf Ebrahimi fprintf(stderr, "pcre2grep: Failed to open directory %s: %s\n", pathname,
3361*22dc650dSSadaf Ebrahimi strerror(errno));
3362*22dc650dSSadaf Ebrahimi return 2;
3363*22dc650dSSadaf Ebrahimi /* LCOV_EXCL_STOP */
3364*22dc650dSSadaf Ebrahimi }
3365*22dc650dSSadaf Ebrahimi
3366*22dc650dSSadaf Ebrahimi while ((nextfile = readdirectory(dir)) != NULL)
3367*22dc650dSSadaf Ebrahimi {
3368*22dc650dSSadaf Ebrahimi int frc;
3369*22dc650dSSadaf Ebrahimi int fnlength = strlen(pathname) + strlen(nextfile) + 2;
3370*22dc650dSSadaf Ebrahimi if (fnlength > FNBUFSIZ)
3371*22dc650dSSadaf Ebrahimi {
3372*22dc650dSSadaf Ebrahimi /* LCOV_EXCL_START - this is a "never" event */
3373*22dc650dSSadaf Ebrahimi fprintf(stderr, "pcre2grep: recursive filename is too long\n");
3374*22dc650dSSadaf Ebrahimi rc = 2;
3375*22dc650dSSadaf Ebrahimi break;
3376*22dc650dSSadaf Ebrahimi /* LCOV_EXCL_STOP */
3377*22dc650dSSadaf Ebrahimi }
3378*22dc650dSSadaf Ebrahimi sprintf(childpath, "%s%c%s", pathname, FILESEP, nextfile);
3379*22dc650dSSadaf Ebrahimi
3380*22dc650dSSadaf Ebrahimi /* If the realpath() function is available, we can try to prevent endless
3381*22dc650dSSadaf Ebrahimi recursion caused by a symlink pointing to a parent directory (GitHub
3382*22dc650dSSadaf Ebrahimi issue #2 (old Bugzilla #2794). Original patch from Thomas Tempelmann.
3383*22dc650dSSadaf Ebrahimi Modified to avoid using strlcat() because that isn't a standard C
3384*22dc650dSSadaf Ebrahimi function, and also modified not to copy back the fully resolved path,
3385*22dc650dSSadaf Ebrahimi because that affects the output from pcre2grep. */
3386*22dc650dSSadaf Ebrahimi
3387*22dc650dSSadaf Ebrahimi #ifdef HAVE_REALPATH
3388*22dc650dSSadaf Ebrahimi {
3389*22dc650dSSadaf Ebrahimi char resolvedpath[PATH_MAX];
3390*22dc650dSSadaf Ebrahimi BOOL isSame;
3391*22dc650dSSadaf Ebrahimi size_t rlen;
3392*22dc650dSSadaf Ebrahimi if (realpath(childpath, resolvedpath) == NULL)
3393*22dc650dSSadaf Ebrahimi /* LCOV_EXCL_START - this is a "never" event */
3394*22dc650dSSadaf Ebrahimi continue; /* This path is invalid - we can skip processing this */
3395*22dc650dSSadaf Ebrahimi /* LCOV_EXCL_STOP */
3396*22dc650dSSadaf Ebrahimi isSame = strcmp(pathname, resolvedpath) == 0;
3397*22dc650dSSadaf Ebrahimi if (isSame) continue; /* We have a recursion */
3398*22dc650dSSadaf Ebrahimi rlen = strlen(resolvedpath);
3399*22dc650dSSadaf Ebrahimi if (rlen++ < sizeof(resolvedpath) - 3)
3400*22dc650dSSadaf Ebrahimi {
3401*22dc650dSSadaf Ebrahimi BOOL contained;
3402*22dc650dSSadaf Ebrahimi strcat(resolvedpath, "/");
3403*22dc650dSSadaf Ebrahimi contained = strncmp(pathname, resolvedpath, rlen) == 0;
3404*22dc650dSSadaf Ebrahimi if (contained) continue; /* We have a recursion */
3405*22dc650dSSadaf Ebrahimi }
3406*22dc650dSSadaf Ebrahimi }
3407*22dc650dSSadaf Ebrahimi #endif /* HAVE_REALPATH */
3408*22dc650dSSadaf Ebrahimi
3409*22dc650dSSadaf Ebrahimi frc = grep_or_recurse(childpath, dir_recurse, FALSE);
3410*22dc650dSSadaf Ebrahimi if (frc > 1) rc = frc;
3411*22dc650dSSadaf Ebrahimi else if (frc == 0 && rc == 1) rc = 0;
3412*22dc650dSSadaf Ebrahimi }
3413*22dc650dSSadaf Ebrahimi
3414*22dc650dSSadaf Ebrahimi closedirectory(dir);
3415*22dc650dSSadaf Ebrahimi return rc;
3416*22dc650dSSadaf Ebrahimi }
3417*22dc650dSSadaf Ebrahimi }
3418*22dc650dSSadaf Ebrahimi
3419*22dc650dSSadaf Ebrahimi #ifdef WIN32
3420*22dc650dSSadaf Ebrahimi if (iswild(pathname))
3421*22dc650dSSadaf Ebrahimi {
3422*22dc650dSSadaf Ebrahimi char buffer[1024];
3423*22dc650dSSadaf Ebrahimi char *nextfile;
3424*22dc650dSSadaf Ebrahimi char *name;
3425*22dc650dSSadaf Ebrahimi directory_type *dir = opendirectory(pathname);
3426*22dc650dSSadaf Ebrahimi
3427*22dc650dSSadaf Ebrahimi if (dir == NULL)
3428*22dc650dSSadaf Ebrahimi return 0;
3429*22dc650dSSadaf Ebrahimi
3430*22dc650dSSadaf Ebrahimi for (nextfile = name = pathname; *nextfile != 0; nextfile++)
3431*22dc650dSSadaf Ebrahimi if (*nextfile == '/' || *nextfile == '\\')
3432*22dc650dSSadaf Ebrahimi name = nextfile + 1;
3433*22dc650dSSadaf Ebrahimi *name = 0;
3434*22dc650dSSadaf Ebrahimi
3435*22dc650dSSadaf Ebrahimi while ((nextfile = readdirectory(dir)) != NULL)
3436*22dc650dSSadaf Ebrahimi {
3437*22dc650dSSadaf Ebrahimi int frc;
3438*22dc650dSSadaf Ebrahimi sprintf(buffer, "%.512s%.128s", pathname, nextfile);
3439*22dc650dSSadaf Ebrahimi frc = grep_or_recurse(buffer, dir_recurse, FALSE);
3440*22dc650dSSadaf Ebrahimi if (frc > 1) rc = frc;
3441*22dc650dSSadaf Ebrahimi else if (frc == 0 && rc == 1) rc = 0;
3442*22dc650dSSadaf Ebrahimi }
3443*22dc650dSSadaf Ebrahimi
3444*22dc650dSSadaf Ebrahimi closedirectory(dir);
3445*22dc650dSSadaf Ebrahimi return rc;
3446*22dc650dSSadaf Ebrahimi }
3447*22dc650dSSadaf Ebrahimi #endif
3448*22dc650dSSadaf Ebrahimi
3449*22dc650dSSadaf Ebrahimi #if defined NATIVE_ZOS
3450*22dc650dSSadaf Ebrahimi }
3451*22dc650dSSadaf Ebrahimi #endif
3452*22dc650dSSadaf Ebrahimi
3453*22dc650dSSadaf Ebrahimi /* If the file is not a directory, check for a regular file, and if it is not,
3454*22dc650dSSadaf Ebrahimi skip it if that's been requested. Otherwise, check for an explicit inclusion or
3455*22dc650dSSadaf Ebrahimi exclusion. */
3456*22dc650dSSadaf Ebrahimi
3457*22dc650dSSadaf Ebrahimi else if (
3458*22dc650dSSadaf Ebrahimi #if defined NATIVE_ZOS
3459*22dc650dSSadaf Ebrahimi (zos_type == __ZOS_NOFILE && DEE_action == DEE_SKIP) ||
3460*22dc650dSSadaf Ebrahimi #else /* all other OS */
3461*22dc650dSSadaf Ebrahimi (!isregfile(pathname) && DEE_action == DEE_SKIP) ||
3462*22dc650dSSadaf Ebrahimi #endif
3463*22dc650dSSadaf Ebrahimi !test_incexc(lastcomp, include_patterns, exclude_patterns))
3464*22dc650dSSadaf Ebrahimi return -1; /* File skipped */
3465*22dc650dSSadaf Ebrahimi
3466*22dc650dSSadaf Ebrahimi /* Control reaches here if we have a regular file, or if we have a directory
3467*22dc650dSSadaf Ebrahimi and recursion or skipping was not requested, or if we have anything else and
3468*22dc650dSSadaf Ebrahimi skipping was not requested. The scan proceeds. If this is the first and only
3469*22dc650dSSadaf Ebrahimi argument at top level, we don't show the file name, unless we are only showing
3470*22dc650dSSadaf Ebrahimi the file name, or the filename was forced (-H). */
3471*22dc650dSSadaf Ebrahimi
3472*22dc650dSSadaf Ebrahimi #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
3473*22dc650dSSadaf Ebrahimi pathlen = (int)(strlen(pathname));
3474*22dc650dSSadaf Ebrahimi #endif
3475*22dc650dSSadaf Ebrahimi
3476*22dc650dSSadaf Ebrahimi /* Open using zlib if it is supported and the file name ends with .gz. */
3477*22dc650dSSadaf Ebrahimi
3478*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_LIBZ
3479*22dc650dSSadaf Ebrahimi if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
3480*22dc650dSSadaf Ebrahimi {
3481*22dc650dSSadaf Ebrahimi ingz = gzopen(pathname, "rb");
3482*22dc650dSSadaf Ebrahimi if (ingz == NULL)
3483*22dc650dSSadaf Ebrahimi {
3484*22dc650dSSadaf Ebrahimi /* LCOV_EXCL_START */
3485*22dc650dSSadaf Ebrahimi if (!silent)
3486*22dc650dSSadaf Ebrahimi fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", pathname,
3487*22dc650dSSadaf Ebrahimi strerror(errno));
3488*22dc650dSSadaf Ebrahimi return 2;
3489*22dc650dSSadaf Ebrahimi /* LCOV_EXCL_STOP */
3490*22dc650dSSadaf Ebrahimi }
3491*22dc650dSSadaf Ebrahimi handle = (void *)ingz;
3492*22dc650dSSadaf Ebrahimi frtype = FR_LIBZ;
3493*22dc650dSSadaf Ebrahimi }
3494*22dc650dSSadaf Ebrahimi else
3495*22dc650dSSadaf Ebrahimi #endif
3496*22dc650dSSadaf Ebrahimi
3497*22dc650dSSadaf Ebrahimi /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
3498*22dc650dSSadaf Ebrahimi
3499*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_LIBBZ2
3500*22dc650dSSadaf Ebrahimi if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
3501*22dc650dSSadaf Ebrahimi {
3502*22dc650dSSadaf Ebrahimi inbz2 = BZ2_bzopen(pathname, "rb");
3503*22dc650dSSadaf Ebrahimi handle = (void *)inbz2;
3504*22dc650dSSadaf Ebrahimi frtype = FR_LIBBZ2;
3505*22dc650dSSadaf Ebrahimi }
3506*22dc650dSSadaf Ebrahimi else
3507*22dc650dSSadaf Ebrahimi #endif
3508*22dc650dSSadaf Ebrahimi
3509*22dc650dSSadaf Ebrahimi /* Otherwise use plain fopen(). The label is so that we can come back here if
3510*22dc650dSSadaf Ebrahimi an attempt to read a .bz2 file indicates that it really is a plain file. */
3511*22dc650dSSadaf Ebrahimi
3512*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_LIBBZ2
3513*22dc650dSSadaf Ebrahimi PLAIN_FILE:
3514*22dc650dSSadaf Ebrahimi #endif
3515*22dc650dSSadaf Ebrahimi {
3516*22dc650dSSadaf Ebrahimi in = fopen(pathname, "rb");
3517*22dc650dSSadaf Ebrahimi handle = (void *)in;
3518*22dc650dSSadaf Ebrahimi frtype = FR_PLAIN;
3519*22dc650dSSadaf Ebrahimi }
3520*22dc650dSSadaf Ebrahimi
3521*22dc650dSSadaf Ebrahimi /* All the opening methods return errno when they fail. */
3522*22dc650dSSadaf Ebrahimi
3523*22dc650dSSadaf Ebrahimi if (handle == NULL)
3524*22dc650dSSadaf Ebrahimi {
3525*22dc650dSSadaf Ebrahimi if (!silent)
3526*22dc650dSSadaf Ebrahimi fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", pathname,
3527*22dc650dSSadaf Ebrahimi strerror(errno));
3528*22dc650dSSadaf Ebrahimi return 2;
3529*22dc650dSSadaf Ebrahimi }
3530*22dc650dSSadaf Ebrahimi
3531*22dc650dSSadaf Ebrahimi /* Now grep the file */
3532*22dc650dSSadaf Ebrahimi
3533*22dc650dSSadaf Ebrahimi rc = pcre2grep(handle, frtype, pathname, (filenames > FN_DEFAULT ||
3534*22dc650dSSadaf Ebrahimi (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
3535*22dc650dSSadaf Ebrahimi
3536*22dc650dSSadaf Ebrahimi /* Close in an appropriate manner. */
3537*22dc650dSSadaf Ebrahimi
3538*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_LIBZ
3539*22dc650dSSadaf Ebrahimi if (frtype == FR_LIBZ)
3540*22dc650dSSadaf Ebrahimi gzclose(ingz);
3541*22dc650dSSadaf Ebrahimi else
3542*22dc650dSSadaf Ebrahimi #endif
3543*22dc650dSSadaf Ebrahimi
3544*22dc650dSSadaf Ebrahimi /* If it is a .bz2 file and the result is 3, it means that the first attempt to
3545*22dc650dSSadaf Ebrahimi read failed. If the error indicates that the file isn't in fact bzipped, try
3546*22dc650dSSadaf Ebrahimi again as a normal file. */
3547*22dc650dSSadaf Ebrahimi
3548*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_LIBBZ2
3549*22dc650dSSadaf Ebrahimi if (frtype == FR_LIBBZ2)
3550*22dc650dSSadaf Ebrahimi {
3551*22dc650dSSadaf Ebrahimi if (rc == 3)
3552*22dc650dSSadaf Ebrahimi {
3553*22dc650dSSadaf Ebrahimi int errnum;
3554*22dc650dSSadaf Ebrahimi const char *err = BZ2_bzerror(inbz2, &errnum);
3555*22dc650dSSadaf Ebrahimi if (errnum == BZ_DATA_ERROR_MAGIC)
3556*22dc650dSSadaf Ebrahimi {
3557*22dc650dSSadaf Ebrahimi BZ2_bzclose(inbz2);
3558*22dc650dSSadaf Ebrahimi goto PLAIN_FILE;
3559*22dc650dSSadaf Ebrahimi }
3560*22dc650dSSadaf Ebrahimi /* LCOV_EXCL_START */
3561*22dc650dSSadaf Ebrahimi else if (!silent)
3562*22dc650dSSadaf Ebrahimi fprintf(stderr, "pcre2grep: Failed to read %s using bzlib: %s\n",
3563*22dc650dSSadaf Ebrahimi pathname, err);
3564*22dc650dSSadaf Ebrahimi rc = 2; /* The normal "something went wrong" code */
3565*22dc650dSSadaf Ebrahimi /* LCOV_EXCL_STOP */
3566*22dc650dSSadaf Ebrahimi }
3567*22dc650dSSadaf Ebrahimi BZ2_bzclose(inbz2);
3568*22dc650dSSadaf Ebrahimi }
3569*22dc650dSSadaf Ebrahimi else
3570*22dc650dSSadaf Ebrahimi #endif
3571*22dc650dSSadaf Ebrahimi
3572*22dc650dSSadaf Ebrahimi /* Normal file close */
3573*22dc650dSSadaf Ebrahimi
3574*22dc650dSSadaf Ebrahimi fclose(in);
3575*22dc650dSSadaf Ebrahimi
3576*22dc650dSSadaf Ebrahimi /* Pass back the yield from pcre2grep(). */
3577*22dc650dSSadaf Ebrahimi
3578*22dc650dSSadaf Ebrahimi return rc;
3579*22dc650dSSadaf Ebrahimi }
3580*22dc650dSSadaf Ebrahimi
3581*22dc650dSSadaf Ebrahimi
3582*22dc650dSSadaf Ebrahimi
3583*22dc650dSSadaf Ebrahimi /*************************************************
3584*22dc650dSSadaf Ebrahimi * Handle a no-data option *
3585*22dc650dSSadaf Ebrahimi *************************************************/
3586*22dc650dSSadaf Ebrahimi
3587*22dc650dSSadaf Ebrahimi /* This is called when a known option has been identified. */
3588*22dc650dSSadaf Ebrahimi
3589*22dc650dSSadaf Ebrahimi static int
handle_option(int letter,int options)3590*22dc650dSSadaf Ebrahimi handle_option(int letter, int options)
3591*22dc650dSSadaf Ebrahimi {
3592*22dc650dSSadaf Ebrahimi switch(letter)
3593*22dc650dSSadaf Ebrahimi {
3594*22dc650dSSadaf Ebrahimi case N_FOFFSETS: file_offsets = TRUE; break;
3595*22dc650dSSadaf Ebrahimi case N_HELP: help(); pcre2grep_exit(0); break; /* Stops compiler warning */
3596*22dc650dSSadaf Ebrahimi case N_LBUFFER: line_buffered = TRUE; break;
3597*22dc650dSSadaf Ebrahimi case N_LOFFSETS: line_offsets = number = TRUE; break;
3598*22dc650dSSadaf Ebrahimi case N_NOJIT: use_jit = FALSE; break;
3599*22dc650dSSadaf Ebrahimi case N_ALLABSK: extra_options |= PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK; break;
3600*22dc650dSSadaf Ebrahimi case N_NO_GROUP_SEPARATOR: group_separator = NULL; break;
3601*22dc650dSSadaf Ebrahimi case 'a': binary_files = BIN_TEXT; break;
3602*22dc650dSSadaf Ebrahimi case 'c': count_only = TRUE; break;
3603*22dc650dSSadaf Ebrahimi case N_POSIX_DIGIT: posix_digit = TRUE; break;
3604*22dc650dSSadaf Ebrahimi case 'E': case_restrict = TRUE; break;
3605*22dc650dSSadaf Ebrahimi case 'F': options |= PCRE2_LITERAL; break;
3606*22dc650dSSadaf Ebrahimi case 'H': filenames = FN_FORCE; break;
3607*22dc650dSSadaf Ebrahimi case 'I': binary_files = BIN_NOMATCH; break;
3608*22dc650dSSadaf Ebrahimi case 'h': filenames = FN_NONE; break;
3609*22dc650dSSadaf Ebrahimi case 'i': options |= PCRE2_CASELESS; break;
3610*22dc650dSSadaf Ebrahimi case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
3611*22dc650dSSadaf Ebrahimi case 'L': filenames = FN_NOMATCH_ONLY; break;
3612*22dc650dSSadaf Ebrahimi case 'M': multiline = TRUE; options |= PCRE2_MULTILINE|PCRE2_FIRSTLINE; break;
3613*22dc650dSSadaf Ebrahimi case 'n': number = TRUE; break;
3614*22dc650dSSadaf Ebrahimi
3615*22dc650dSSadaf Ebrahimi case 'o':
3616*22dc650dSSadaf Ebrahimi only_matching_last = add_number(0, only_matching_last);
3617*22dc650dSSadaf Ebrahimi if (only_matching == NULL) only_matching = only_matching_last;
3618*22dc650dSSadaf Ebrahimi break;
3619*22dc650dSSadaf Ebrahimi
3620*22dc650dSSadaf Ebrahimi case 'P': no_ucp = TRUE; break;
3621*22dc650dSSadaf Ebrahimi case 'q': quiet = TRUE; break;
3622*22dc650dSSadaf Ebrahimi case 'r': dee_action = dee_RECURSE; break;
3623*22dc650dSSadaf Ebrahimi case 's': silent = TRUE; break;
3624*22dc650dSSadaf Ebrahimi case 't': show_total_count = TRUE; break;
3625*22dc650dSSadaf Ebrahimi case 'u': options |= PCRE2_UTF | PCRE2_UCP; utf = TRUE; break;
3626*22dc650dSSadaf Ebrahimi case 'U': options |= PCRE2_UTF | PCRE2_MATCH_INVALID_UTF | PCRE2_UCP;
3627*22dc650dSSadaf Ebrahimi utf = TRUE; break;
3628*22dc650dSSadaf Ebrahimi case 'v': invert = TRUE; break;
3629*22dc650dSSadaf Ebrahimi
3630*22dc650dSSadaf Ebrahimi case 'V':
3631*22dc650dSSadaf Ebrahimi {
3632*22dc650dSSadaf Ebrahimi unsigned char buffer[128];
3633*22dc650dSSadaf Ebrahimi (void)pcre2_config(PCRE2_CONFIG_VERSION, buffer);
3634*22dc650dSSadaf Ebrahimi fprintf(stdout, "pcre2grep version %s" STDOUT_NL, buffer);
3635*22dc650dSSadaf Ebrahimi }
3636*22dc650dSSadaf Ebrahimi pcre2grep_exit(0);
3637*22dc650dSSadaf Ebrahimi break; /* LCOV_EXCL_LINE - statement kept to avoid compiler warning */
3638*22dc650dSSadaf Ebrahimi
3639*22dc650dSSadaf Ebrahimi case 'w': extra_options |= PCRE2_EXTRA_MATCH_WORD; break;
3640*22dc650dSSadaf Ebrahimi case 'x': extra_options |= PCRE2_EXTRA_MATCH_LINE; break;
3641*22dc650dSSadaf Ebrahimi case 'Z': printname_colon = printname_hyphen = 0; printname_nl = NULL; break;
3642*22dc650dSSadaf Ebrahimi
3643*22dc650dSSadaf Ebrahimi /* LCOV_EXCL_START - this is a "never event" */
3644*22dc650dSSadaf Ebrahimi default:
3645*22dc650dSSadaf Ebrahimi fprintf(stderr, "pcre2grep: Unknown option -%c\n", letter);
3646*22dc650dSSadaf Ebrahimi pcre2grep_exit(usage(2));
3647*22dc650dSSadaf Ebrahimi /* LCOV_EXCL_STOP */
3648*22dc650dSSadaf Ebrahimi }
3649*22dc650dSSadaf Ebrahimi
3650*22dc650dSSadaf Ebrahimi return options;
3651*22dc650dSSadaf Ebrahimi }
3652*22dc650dSSadaf Ebrahimi
3653*22dc650dSSadaf Ebrahimi
3654*22dc650dSSadaf Ebrahimi
3655*22dc650dSSadaf Ebrahimi /*************************************************
3656*22dc650dSSadaf Ebrahimi * Construct printed ordinal *
3657*22dc650dSSadaf Ebrahimi *************************************************/
3658*22dc650dSSadaf Ebrahimi
3659*22dc650dSSadaf Ebrahimi /* This turns a number into "1st", "3rd", etc. */
3660*22dc650dSSadaf Ebrahimi
3661*22dc650dSSadaf Ebrahimi static char *
ordin(int n)3662*22dc650dSSadaf Ebrahimi ordin(int n)
3663*22dc650dSSadaf Ebrahimi {
3664*22dc650dSSadaf Ebrahimi static char buffer[14];
3665*22dc650dSSadaf Ebrahimi char *p = buffer;
3666*22dc650dSSadaf Ebrahimi sprintf(p, "%d", n);
3667*22dc650dSSadaf Ebrahimi while (*p != 0) p++;
3668*22dc650dSSadaf Ebrahimi n %= 100;
3669*22dc650dSSadaf Ebrahimi if (n >= 11 && n <= 13) n = 0;
3670*22dc650dSSadaf Ebrahimi switch (n%10)
3671*22dc650dSSadaf Ebrahimi {
3672*22dc650dSSadaf Ebrahimi case 1: strcpy(p, "st"); break;
3673*22dc650dSSadaf Ebrahimi case 2: strcpy(p, "nd"); break;
3674*22dc650dSSadaf Ebrahimi case 3: strcpy(p, "rd"); break;
3675*22dc650dSSadaf Ebrahimi default: strcpy(p, "th"); break;
3676*22dc650dSSadaf Ebrahimi }
3677*22dc650dSSadaf Ebrahimi return buffer;
3678*22dc650dSSadaf Ebrahimi }
3679*22dc650dSSadaf Ebrahimi
3680*22dc650dSSadaf Ebrahimi
3681*22dc650dSSadaf Ebrahimi
3682*22dc650dSSadaf Ebrahimi /*************************************************
3683*22dc650dSSadaf Ebrahimi * Compile a single pattern *
3684*22dc650dSSadaf Ebrahimi *************************************************/
3685*22dc650dSSadaf Ebrahimi
3686*22dc650dSSadaf Ebrahimi /* Do nothing if the pattern has already been compiled. This is the case for
3687*22dc650dSSadaf Ebrahimi include/exclude patterns read from a file.
3688*22dc650dSSadaf Ebrahimi
3689*22dc650dSSadaf Ebrahimi When the -F option has been used, each "pattern" may be a list of strings,
3690*22dc650dSSadaf Ebrahimi separated by line breaks. They will be matched literally. We split such a
3691*22dc650dSSadaf Ebrahimi string and compile the first substring, inserting an additional block into the
3692*22dc650dSSadaf Ebrahimi pattern chain.
3693*22dc650dSSadaf Ebrahimi
3694*22dc650dSSadaf Ebrahimi Arguments:
3695*22dc650dSSadaf Ebrahimi p points to the pattern block
3696*22dc650dSSadaf Ebrahimi options the PCRE options
3697*22dc650dSSadaf Ebrahimi fromfile TRUE if the pattern was read from a file
3698*22dc650dSSadaf Ebrahimi fromtext file name or identifying text (e.g. "include")
3699*22dc650dSSadaf Ebrahimi count 0 if this is the only command line pattern, or
3700*22dc650dSSadaf Ebrahimi number of the command line pattern, or
3701*22dc650dSSadaf Ebrahimi linenumber for a pattern from a file
3702*22dc650dSSadaf Ebrahimi
3703*22dc650dSSadaf Ebrahimi Returns: TRUE on success, FALSE after an error
3704*22dc650dSSadaf Ebrahimi */
3705*22dc650dSSadaf Ebrahimi
3706*22dc650dSSadaf Ebrahimi static BOOL
compile_pattern(patstr * p,int options,int fromfile,const char * fromtext,int count)3707*22dc650dSSadaf Ebrahimi compile_pattern(patstr *p, int options, int fromfile, const char *fromtext,
3708*22dc650dSSadaf Ebrahimi int count)
3709*22dc650dSSadaf Ebrahimi {
3710*22dc650dSSadaf Ebrahimi char *ps;
3711*22dc650dSSadaf Ebrahimi int errcode;
3712*22dc650dSSadaf Ebrahimi PCRE2_SIZE patlen, erroffset;
3713*22dc650dSSadaf Ebrahimi PCRE2_UCHAR errmessbuffer[ERRBUFSIZ];
3714*22dc650dSSadaf Ebrahimi
3715*22dc650dSSadaf Ebrahimi if (p->compiled != NULL) return TRUE;
3716*22dc650dSSadaf Ebrahimi ps = p->string;
3717*22dc650dSSadaf Ebrahimi patlen = p->length;
3718*22dc650dSSadaf Ebrahimi
3719*22dc650dSSadaf Ebrahimi if ((options & PCRE2_LITERAL) != 0)
3720*22dc650dSSadaf Ebrahimi {
3721*22dc650dSSadaf Ebrahimi int ellength;
3722*22dc650dSSadaf Ebrahimi char *eop = ps + patlen;
3723*22dc650dSSadaf Ebrahimi char *pe = end_of_line(ps, eop, &ellength);
3724*22dc650dSSadaf Ebrahimi
3725*22dc650dSSadaf Ebrahimi if (ellength != 0)
3726*22dc650dSSadaf Ebrahimi {
3727*22dc650dSSadaf Ebrahimi patlen = pe - ps - ellength;
3728*22dc650dSSadaf Ebrahimi if (add_pattern(pe, p->length-patlen-ellength, p) == NULL) return FALSE;
3729*22dc650dSSadaf Ebrahimi }
3730*22dc650dSSadaf Ebrahimi }
3731*22dc650dSSadaf Ebrahimi
3732*22dc650dSSadaf Ebrahimi p->compiled = pcre2_compile((PCRE2_SPTR)ps, patlen, options, &errcode,
3733*22dc650dSSadaf Ebrahimi &erroffset, compile_context);
3734*22dc650dSSadaf Ebrahimi
3735*22dc650dSSadaf Ebrahimi /* Handle successful compile. Try JIT-compiling if supported and enabled. We
3736*22dc650dSSadaf Ebrahimi ignore any JIT compiler errors, relying falling back to interpreting if
3737*22dc650dSSadaf Ebrahimi anything goes wrong with JIT. */
3738*22dc650dSSadaf Ebrahimi
3739*22dc650dSSadaf Ebrahimi if (p->compiled != NULL)
3740*22dc650dSSadaf Ebrahimi {
3741*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_PCRE2GREP_JIT
3742*22dc650dSSadaf Ebrahimi if (use_jit) (void)pcre2_jit_compile(p->compiled, PCRE2_JIT_COMPLETE);
3743*22dc650dSSadaf Ebrahimi #endif
3744*22dc650dSSadaf Ebrahimi return TRUE;
3745*22dc650dSSadaf Ebrahimi }
3746*22dc650dSSadaf Ebrahimi
3747*22dc650dSSadaf Ebrahimi /* Handle compile errors */
3748*22dc650dSSadaf Ebrahimi
3749*22dc650dSSadaf Ebrahimi if (erroffset > patlen) erroffset = patlen;
3750*22dc650dSSadaf Ebrahimi pcre2_get_error_message(errcode, errmessbuffer, sizeof(errmessbuffer));
3751*22dc650dSSadaf Ebrahimi
3752*22dc650dSSadaf Ebrahimi if (fromfile)
3753*22dc650dSSadaf Ebrahimi {
3754*22dc650dSSadaf Ebrahimi fprintf(stderr, "pcre2grep: Error in regex in line %d of %s "
3755*22dc650dSSadaf Ebrahimi "at offset %d: %s\n", count, fromtext, (int)erroffset, errmessbuffer);
3756*22dc650dSSadaf Ebrahimi }
3757*22dc650dSSadaf Ebrahimi else
3758*22dc650dSSadaf Ebrahimi {
3759*22dc650dSSadaf Ebrahimi if (count == 0)
3760*22dc650dSSadaf Ebrahimi fprintf(stderr, "pcre2grep: Error in %s regex at offset %d: %s\n",
3761*22dc650dSSadaf Ebrahimi fromtext, (int)erroffset, errmessbuffer);
3762*22dc650dSSadaf Ebrahimi else
3763*22dc650dSSadaf Ebrahimi fprintf(stderr, "pcre2grep: Error in %s %s regex at offset %d: %s\n",
3764*22dc650dSSadaf Ebrahimi ordin(count), fromtext, (int)erroffset, errmessbuffer);
3765*22dc650dSSadaf Ebrahimi }
3766*22dc650dSSadaf Ebrahimi
3767*22dc650dSSadaf Ebrahimi return FALSE;
3768*22dc650dSSadaf Ebrahimi }
3769*22dc650dSSadaf Ebrahimi
3770*22dc650dSSadaf Ebrahimi
3771*22dc650dSSadaf Ebrahimi
3772*22dc650dSSadaf Ebrahimi /*************************************************
3773*22dc650dSSadaf Ebrahimi * Read and compile a file of patterns *
3774*22dc650dSSadaf Ebrahimi *************************************************/
3775*22dc650dSSadaf Ebrahimi
3776*22dc650dSSadaf Ebrahimi /* This is used for --filelist, --include-from, and --exclude-from.
3777*22dc650dSSadaf Ebrahimi
3778*22dc650dSSadaf Ebrahimi Arguments:
3779*22dc650dSSadaf Ebrahimi name the name of the file; "-" is stdin
3780*22dc650dSSadaf Ebrahimi patptr pointer to the pattern chain anchor
3781*22dc650dSSadaf Ebrahimi patlastptr pointer to the last pattern pointer
3782*22dc650dSSadaf Ebrahimi
3783*22dc650dSSadaf Ebrahimi Returns: TRUE if all went well
3784*22dc650dSSadaf Ebrahimi */
3785*22dc650dSSadaf Ebrahimi
3786*22dc650dSSadaf Ebrahimi static BOOL
read_pattern_file(char * name,patstr ** patptr,patstr ** patlastptr)3787*22dc650dSSadaf Ebrahimi read_pattern_file(char *name, patstr **patptr, patstr **patlastptr)
3788*22dc650dSSadaf Ebrahimi {
3789*22dc650dSSadaf Ebrahimi int linenumber = 0;
3790*22dc650dSSadaf Ebrahimi PCRE2_SIZE patlen;
3791*22dc650dSSadaf Ebrahimi FILE *f;
3792*22dc650dSSadaf Ebrahimi const char *filename;
3793*22dc650dSSadaf Ebrahimi char buffer[MAXPATLEN+20];
3794*22dc650dSSadaf Ebrahimi
3795*22dc650dSSadaf Ebrahimi if (strcmp(name, "-") == 0)
3796*22dc650dSSadaf Ebrahimi {
3797*22dc650dSSadaf Ebrahimi f = stdin;
3798*22dc650dSSadaf Ebrahimi filename = stdin_name;
3799*22dc650dSSadaf Ebrahimi }
3800*22dc650dSSadaf Ebrahimi else
3801*22dc650dSSadaf Ebrahimi {
3802*22dc650dSSadaf Ebrahimi f = fopen(name, "r");
3803*22dc650dSSadaf Ebrahimi if (f == NULL)
3804*22dc650dSSadaf Ebrahimi {
3805*22dc650dSSadaf Ebrahimi fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", name, strerror(errno));
3806*22dc650dSSadaf Ebrahimi return FALSE;
3807*22dc650dSSadaf Ebrahimi }
3808*22dc650dSSadaf Ebrahimi filename = name;
3809*22dc650dSSadaf Ebrahimi }
3810*22dc650dSSadaf Ebrahimi
3811*22dc650dSSadaf Ebrahimi while ((patlen = read_one_line(buffer, sizeof(buffer), f)) > 0)
3812*22dc650dSSadaf Ebrahimi {
3813*22dc650dSSadaf Ebrahimi while (patlen > 0 && isspace((unsigned char)(buffer[patlen-1]))) patlen--;
3814*22dc650dSSadaf Ebrahimi linenumber++;
3815*22dc650dSSadaf Ebrahimi if (patlen == 0) continue; /* Skip blank lines */
3816*22dc650dSSadaf Ebrahimi
3817*22dc650dSSadaf Ebrahimi /* Note: this call to add_pattern() puts a pointer to the local variable
3818*22dc650dSSadaf Ebrahimi "buffer" into the pattern chain. However, that pointer is used only when
3819*22dc650dSSadaf Ebrahimi compiling the pattern, which happens immediately below, so we flatten it
3820*22dc650dSSadaf Ebrahimi afterwards, as a precaution against any later code trying to use it. */
3821*22dc650dSSadaf Ebrahimi
3822*22dc650dSSadaf Ebrahimi *patlastptr = add_pattern(buffer, patlen, *patlastptr);
3823*22dc650dSSadaf Ebrahimi if (*patlastptr == NULL)
3824*22dc650dSSadaf Ebrahimi {
3825*22dc650dSSadaf Ebrahimi /* LCOV_EXCL_START - won't happen in testing */
3826*22dc650dSSadaf Ebrahimi if (f != stdin) fclose(f);
3827*22dc650dSSadaf Ebrahimi return FALSE;
3828*22dc650dSSadaf Ebrahimi /* LCOV_EXCL_STOP */
3829*22dc650dSSadaf Ebrahimi }
3830*22dc650dSSadaf Ebrahimi if (*patptr == NULL) *patptr = *patlastptr;
3831*22dc650dSSadaf Ebrahimi
3832*22dc650dSSadaf Ebrahimi /* This loop is needed because compiling a "pattern" when -F is set may add
3833*22dc650dSSadaf Ebrahimi on additional literal patterns if the original contains a newline. In the
3834*22dc650dSSadaf Ebrahimi common case, it never will, because read_one_line() stops at a newline.
3835*22dc650dSSadaf Ebrahimi However, the -N option can be used to give pcre2grep a different newline
3836*22dc650dSSadaf Ebrahimi setting. */
3837*22dc650dSSadaf Ebrahimi
3838*22dc650dSSadaf Ebrahimi for(;;)
3839*22dc650dSSadaf Ebrahimi {
3840*22dc650dSSadaf Ebrahimi if (!compile_pattern(*patlastptr, pcre2_options, TRUE, filename,
3841*22dc650dSSadaf Ebrahimi linenumber))
3842*22dc650dSSadaf Ebrahimi {
3843*22dc650dSSadaf Ebrahimi if (f != stdin) fclose(f);
3844*22dc650dSSadaf Ebrahimi return FALSE;
3845*22dc650dSSadaf Ebrahimi }
3846*22dc650dSSadaf Ebrahimi (*patlastptr)->string = NULL; /* Insurance */
3847*22dc650dSSadaf Ebrahimi if ((*patlastptr)->next == NULL) break;
3848*22dc650dSSadaf Ebrahimi *patlastptr = (*patlastptr)->next;
3849*22dc650dSSadaf Ebrahimi }
3850*22dc650dSSadaf Ebrahimi }
3851*22dc650dSSadaf Ebrahimi
3852*22dc650dSSadaf Ebrahimi if (f != stdin) fclose(f);
3853*22dc650dSSadaf Ebrahimi return TRUE;
3854*22dc650dSSadaf Ebrahimi }
3855*22dc650dSSadaf Ebrahimi
3856*22dc650dSSadaf Ebrahimi
3857*22dc650dSSadaf Ebrahimi
3858*22dc650dSSadaf Ebrahimi /*************************************************
3859*22dc650dSSadaf Ebrahimi * Main program *
3860*22dc650dSSadaf Ebrahimi *************************************************/
3861*22dc650dSSadaf Ebrahimi
3862*22dc650dSSadaf Ebrahimi /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
3863*22dc650dSSadaf Ebrahimi
3864*22dc650dSSadaf Ebrahimi int
main(int argc,char ** argv)3865*22dc650dSSadaf Ebrahimi main(int argc, char **argv)
3866*22dc650dSSadaf Ebrahimi {
3867*22dc650dSSadaf Ebrahimi int i, j;
3868*22dc650dSSadaf Ebrahimi int rc = 1;
3869*22dc650dSSadaf Ebrahimi BOOL only_one_at_top;
3870*22dc650dSSadaf Ebrahimi patstr *cp;
3871*22dc650dSSadaf Ebrahimi fnstr *fn;
3872*22dc650dSSadaf Ebrahimi omstr *om;
3873*22dc650dSSadaf Ebrahimi const char *locale_from = "--locale";
3874*22dc650dSSadaf Ebrahimi
3875*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_PCRE2GREP_JIT
3876*22dc650dSSadaf Ebrahimi pcre2_jit_stack *jit_stack = NULL;
3877*22dc650dSSadaf Ebrahimi #endif
3878*22dc650dSSadaf Ebrahimi
3879*22dc650dSSadaf Ebrahimi /* In Windows, stdout is set up as a text stream, which means that \n is
3880*22dc650dSSadaf Ebrahimi converted to \r\n. This causes output lines that are copied from the input to
3881*22dc650dSSadaf Ebrahimi change from ....\r\n to ....\r\r\n, which is not right. We therefore ensure
3882*22dc650dSSadaf Ebrahimi that stdout is a binary stream. Note that this means all other output to stdout
3883*22dc650dSSadaf Ebrahimi must use STDOUT_NL to terminate lines. */
3884*22dc650dSSadaf Ebrahimi
3885*22dc650dSSadaf Ebrahimi #ifdef WIN32
3886*22dc650dSSadaf Ebrahimi _setmode(_fileno(stdout), _O_BINARY);
3887*22dc650dSSadaf Ebrahimi #endif
3888*22dc650dSSadaf Ebrahimi
3889*22dc650dSSadaf Ebrahimi /* Process the options */
3890*22dc650dSSadaf Ebrahimi
3891*22dc650dSSadaf Ebrahimi for (i = 1; i < argc; i++)
3892*22dc650dSSadaf Ebrahimi {
3893*22dc650dSSadaf Ebrahimi option_item *op = NULL;
3894*22dc650dSSadaf Ebrahimi char *option_data = (char *)""; /* default to keep compiler happy */
3895*22dc650dSSadaf Ebrahimi BOOL longop;
3896*22dc650dSSadaf Ebrahimi BOOL longopwasequals = FALSE;
3897*22dc650dSSadaf Ebrahimi
3898*22dc650dSSadaf Ebrahimi if (argv[i][0] != '-') break;
3899*22dc650dSSadaf Ebrahimi
3900*22dc650dSSadaf Ebrahimi /* If we hit an argument that is just "-", it may be a reference to STDIN,
3901*22dc650dSSadaf Ebrahimi but only if we have previously had -e or -f to define the patterns. */
3902*22dc650dSSadaf Ebrahimi
3903*22dc650dSSadaf Ebrahimi if (argv[i][1] == 0)
3904*22dc650dSSadaf Ebrahimi {
3905*22dc650dSSadaf Ebrahimi if (pattern_files != NULL || patterns != NULL) break;
3906*22dc650dSSadaf Ebrahimi else pcre2grep_exit(usage(2));
3907*22dc650dSSadaf Ebrahimi }
3908*22dc650dSSadaf Ebrahimi
3909*22dc650dSSadaf Ebrahimi /* Handle a long name option, or -- to terminate the options */
3910*22dc650dSSadaf Ebrahimi
3911*22dc650dSSadaf Ebrahimi if (argv[i][1] == '-')
3912*22dc650dSSadaf Ebrahimi {
3913*22dc650dSSadaf Ebrahimi char *arg = argv[i] + 2;
3914*22dc650dSSadaf Ebrahimi char *argequals = strchr(arg, '=');
3915*22dc650dSSadaf Ebrahimi
3916*22dc650dSSadaf Ebrahimi if (*arg == 0) /* -- terminates options */
3917*22dc650dSSadaf Ebrahimi {
3918*22dc650dSSadaf Ebrahimi i++;
3919*22dc650dSSadaf Ebrahimi break; /* out of the options-handling loop */
3920*22dc650dSSadaf Ebrahimi }
3921*22dc650dSSadaf Ebrahimi
3922*22dc650dSSadaf Ebrahimi longop = TRUE;
3923*22dc650dSSadaf Ebrahimi
3924*22dc650dSSadaf Ebrahimi /* Some long options have data that follows after =, for example file=name.
3925*22dc650dSSadaf Ebrahimi Some options have variations in the long name spelling: specifically, we
3926*22dc650dSSadaf Ebrahimi allow "regexp" because GNU grep allows it, though I personally go along
3927*22dc650dSSadaf Ebrahimi with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
3928*22dc650dSSadaf Ebrahimi These options are entered in the table as "regex(p)". Options can be in
3929*22dc650dSSadaf Ebrahimi both these categories. */
3930*22dc650dSSadaf Ebrahimi
3931*22dc650dSSadaf Ebrahimi for (op = optionlist; op->one_char != 0; op++)
3932*22dc650dSSadaf Ebrahimi {
3933*22dc650dSSadaf Ebrahimi char *opbra = strchr(op->long_name, '(');
3934*22dc650dSSadaf Ebrahimi char *equals = strchr(op->long_name, '=');
3935*22dc650dSSadaf Ebrahimi
3936*22dc650dSSadaf Ebrahimi /* Handle options with only one spelling of the name */
3937*22dc650dSSadaf Ebrahimi
3938*22dc650dSSadaf Ebrahimi if (opbra == NULL) /* Does not contain '(' */
3939*22dc650dSSadaf Ebrahimi {
3940*22dc650dSSadaf Ebrahimi if (equals == NULL) /* Not thing=data case */
3941*22dc650dSSadaf Ebrahimi {
3942*22dc650dSSadaf Ebrahimi if (strcmp(arg, op->long_name) == 0) break;
3943*22dc650dSSadaf Ebrahimi }
3944*22dc650dSSadaf Ebrahimi else /* Special case xxx=data */
3945*22dc650dSSadaf Ebrahimi {
3946*22dc650dSSadaf Ebrahimi int oplen = (int)(equals - op->long_name);
3947*22dc650dSSadaf Ebrahimi int arglen = (argequals == NULL)?
3948*22dc650dSSadaf Ebrahimi (int)strlen(arg) : (int)(argequals - arg);
3949*22dc650dSSadaf Ebrahimi if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
3950*22dc650dSSadaf Ebrahimi {
3951*22dc650dSSadaf Ebrahimi option_data = arg + arglen;
3952*22dc650dSSadaf Ebrahimi if (*option_data == '=')
3953*22dc650dSSadaf Ebrahimi {
3954*22dc650dSSadaf Ebrahimi option_data++;
3955*22dc650dSSadaf Ebrahimi longopwasequals = TRUE;
3956*22dc650dSSadaf Ebrahimi }
3957*22dc650dSSadaf Ebrahimi break;
3958*22dc650dSSadaf Ebrahimi }
3959*22dc650dSSadaf Ebrahimi }
3960*22dc650dSSadaf Ebrahimi }
3961*22dc650dSSadaf Ebrahimi
3962*22dc650dSSadaf Ebrahimi /* Handle options with an alternate spelling of the name */
3963*22dc650dSSadaf Ebrahimi
3964*22dc650dSSadaf Ebrahimi else
3965*22dc650dSSadaf Ebrahimi {
3966*22dc650dSSadaf Ebrahimi char buff1[24];
3967*22dc650dSSadaf Ebrahimi char buff2[24];
3968*22dc650dSSadaf Ebrahimi int ret;
3969*22dc650dSSadaf Ebrahimi
3970*22dc650dSSadaf Ebrahimi int baselen = (int)(opbra - op->long_name);
3971*22dc650dSSadaf Ebrahimi int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
3972*22dc650dSSadaf Ebrahimi int arglen = (argequals == NULL || equals == NULL)?
3973*22dc650dSSadaf Ebrahimi (int)strlen(arg) : (int)(argequals - arg);
3974*22dc650dSSadaf Ebrahimi
3975*22dc650dSSadaf Ebrahimi if ((ret = snprintf(buff1, sizeof(buff1), "%.*s", baselen, op->long_name),
3976*22dc650dSSadaf Ebrahimi ret < 0 || ret > (int)sizeof(buff1)) ||
3977*22dc650dSSadaf Ebrahimi (ret = snprintf(buff2, sizeof(buff2), "%s%.*s", buff1,
3978*22dc650dSSadaf Ebrahimi fulllen - baselen - 2, opbra + 1),
3979*22dc650dSSadaf Ebrahimi ret < 0 || ret > (int)sizeof(buff2)))
3980*22dc650dSSadaf Ebrahimi {
3981*22dc650dSSadaf Ebrahimi /* LCOV_EXCL_START - this is a "never" event */
3982*22dc650dSSadaf Ebrahimi fprintf(stderr, "pcre2grep: Buffer overflow when parsing %s option\n",
3983*22dc650dSSadaf Ebrahimi op->long_name);
3984*22dc650dSSadaf Ebrahimi pcre2grep_exit(2);
3985*22dc650dSSadaf Ebrahimi /* LCOV_EXCL_STOP */
3986*22dc650dSSadaf Ebrahimi }
3987*22dc650dSSadaf Ebrahimi
3988*22dc650dSSadaf Ebrahimi if (strncmp(arg, buff1, arglen) == 0 ||
3989*22dc650dSSadaf Ebrahimi strncmp(arg, buff2, arglen) == 0)
3990*22dc650dSSadaf Ebrahimi {
3991*22dc650dSSadaf Ebrahimi if (equals != NULL && argequals != NULL)
3992*22dc650dSSadaf Ebrahimi {
3993*22dc650dSSadaf Ebrahimi option_data = argequals;
3994*22dc650dSSadaf Ebrahimi if (*option_data == '=')
3995*22dc650dSSadaf Ebrahimi {
3996*22dc650dSSadaf Ebrahimi option_data++;
3997*22dc650dSSadaf Ebrahimi longopwasequals = TRUE;
3998*22dc650dSSadaf Ebrahimi }
3999*22dc650dSSadaf Ebrahimi }
4000*22dc650dSSadaf Ebrahimi break;
4001*22dc650dSSadaf Ebrahimi }
4002*22dc650dSSadaf Ebrahimi }
4003*22dc650dSSadaf Ebrahimi }
4004*22dc650dSSadaf Ebrahimi
4005*22dc650dSSadaf Ebrahimi if (op->one_char == 0)
4006*22dc650dSSadaf Ebrahimi {
4007*22dc650dSSadaf Ebrahimi fprintf(stderr, "pcre2grep: Unknown option %s\n", argv[i]);
4008*22dc650dSSadaf Ebrahimi pcre2grep_exit(usage(2));
4009*22dc650dSSadaf Ebrahimi }
4010*22dc650dSSadaf Ebrahimi }
4011*22dc650dSSadaf Ebrahimi
4012*22dc650dSSadaf Ebrahimi /* One-char options; many that have no data may be in a single argument; we
4013*22dc650dSSadaf Ebrahimi continue till we hit the last one or one that needs data. */
4014*22dc650dSSadaf Ebrahimi
4015*22dc650dSSadaf Ebrahimi else
4016*22dc650dSSadaf Ebrahimi {
4017*22dc650dSSadaf Ebrahimi char *s = argv[i] + 1;
4018*22dc650dSSadaf Ebrahimi longop = FALSE;
4019*22dc650dSSadaf Ebrahimi
4020*22dc650dSSadaf Ebrahimi while (*s != 0)
4021*22dc650dSSadaf Ebrahimi {
4022*22dc650dSSadaf Ebrahimi for (op = optionlist; op->one_char != 0; op++)
4023*22dc650dSSadaf Ebrahimi {
4024*22dc650dSSadaf Ebrahimi if (*s == op->one_char) break;
4025*22dc650dSSadaf Ebrahimi }
4026*22dc650dSSadaf Ebrahimi if (op->one_char == 0)
4027*22dc650dSSadaf Ebrahimi {
4028*22dc650dSSadaf Ebrahimi fprintf(stderr, "pcre2grep: Unknown option letter '%c' in \"%s\"\n",
4029*22dc650dSSadaf Ebrahimi *s, argv[i]);
4030*22dc650dSSadaf Ebrahimi pcre2grep_exit(usage(2));
4031*22dc650dSSadaf Ebrahimi }
4032*22dc650dSSadaf Ebrahimi
4033*22dc650dSSadaf Ebrahimi option_data = s+1;
4034*22dc650dSSadaf Ebrahimi
4035*22dc650dSSadaf Ebrahimi /* Break out if this is the last character in the string; it's handled
4036*22dc650dSSadaf Ebrahimi below like a single multi-char option. */
4037*22dc650dSSadaf Ebrahimi
4038*22dc650dSSadaf Ebrahimi if (*option_data == 0) break;
4039*22dc650dSSadaf Ebrahimi
4040*22dc650dSSadaf Ebrahimi /* Check for a single-character option that has data: OP_OP_NUMBER(S)
4041*22dc650dSSadaf Ebrahimi are used for ones that either have a numerical number or defaults, i.e.
4042*22dc650dSSadaf Ebrahimi the data is optional. If a digit follows, there is data; if not, carry on
4043*22dc650dSSadaf Ebrahimi with other single-character options in the same string. */
4044*22dc650dSSadaf Ebrahimi
4045*22dc650dSSadaf Ebrahimi if (op->type == OP_OP_NUMBER || op->type == OP_OP_NUMBERS)
4046*22dc650dSSadaf Ebrahimi {
4047*22dc650dSSadaf Ebrahimi if (isdigit((unsigned char)(s[1]))) break;
4048*22dc650dSSadaf Ebrahimi }
4049*22dc650dSSadaf Ebrahimi else /* Check for an option with data */
4050*22dc650dSSadaf Ebrahimi {
4051*22dc650dSSadaf Ebrahimi if (op->type != OP_NODATA) break;
4052*22dc650dSSadaf Ebrahimi }
4053*22dc650dSSadaf Ebrahimi
4054*22dc650dSSadaf Ebrahimi /* Handle a single-character option with no data, then loop for the
4055*22dc650dSSadaf Ebrahimi next character in the string. */
4056*22dc650dSSadaf Ebrahimi
4057*22dc650dSSadaf Ebrahimi pcre2_options = handle_option(*s++, pcre2_options);
4058*22dc650dSSadaf Ebrahimi }
4059*22dc650dSSadaf Ebrahimi }
4060*22dc650dSSadaf Ebrahimi
4061*22dc650dSSadaf Ebrahimi /* At this point we should have op pointing to a matched option. If the type
4062*22dc650dSSadaf Ebrahimi is NO_DATA, it means that there is no data, and the option might set
4063*22dc650dSSadaf Ebrahimi something in the PCRE options. */
4064*22dc650dSSadaf Ebrahimi
4065*22dc650dSSadaf Ebrahimi if (op->type == OP_NODATA)
4066*22dc650dSSadaf Ebrahimi {
4067*22dc650dSSadaf Ebrahimi pcre2_options = handle_option(op->one_char, pcre2_options);
4068*22dc650dSSadaf Ebrahimi continue;
4069*22dc650dSSadaf Ebrahimi }
4070*22dc650dSSadaf Ebrahimi
4071*22dc650dSSadaf Ebrahimi /* If the option type is OP_OP_STRING or OP_OP_NUMBER(S), it's an option that
4072*22dc650dSSadaf Ebrahimi either has a value or defaults to something. It cannot have data in a
4073*22dc650dSSadaf Ebrahimi separate item. At the moment, the only such options are "colo(u)r",
4074*22dc650dSSadaf Ebrahimi and "only-matching". */
4075*22dc650dSSadaf Ebrahimi
4076*22dc650dSSadaf Ebrahimi if (*option_data == 0 &&
4077*22dc650dSSadaf Ebrahimi (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER ||
4078*22dc650dSSadaf Ebrahimi op->type == OP_OP_NUMBERS))
4079*22dc650dSSadaf Ebrahimi {
4080*22dc650dSSadaf Ebrahimi switch (op->one_char)
4081*22dc650dSSadaf Ebrahimi {
4082*22dc650dSSadaf Ebrahimi case N_COLOUR:
4083*22dc650dSSadaf Ebrahimi colour_option = "auto";
4084*22dc650dSSadaf Ebrahimi break;
4085*22dc650dSSadaf Ebrahimi
4086*22dc650dSSadaf Ebrahimi case 'o':
4087*22dc650dSSadaf Ebrahimi only_matching_last = add_number(0, only_matching_last);
4088*22dc650dSSadaf Ebrahimi if (only_matching == NULL) only_matching = only_matching_last;
4089*22dc650dSSadaf Ebrahimi break;
4090*22dc650dSSadaf Ebrahimi }
4091*22dc650dSSadaf Ebrahimi continue;
4092*22dc650dSSadaf Ebrahimi }
4093*22dc650dSSadaf Ebrahimi
4094*22dc650dSSadaf Ebrahimi /* Otherwise, find the data string for the option. */
4095*22dc650dSSadaf Ebrahimi
4096*22dc650dSSadaf Ebrahimi if (*option_data == 0)
4097*22dc650dSSadaf Ebrahimi {
4098*22dc650dSSadaf Ebrahimi if (i >= argc - 1 || longopwasequals)
4099*22dc650dSSadaf Ebrahimi {
4100*22dc650dSSadaf Ebrahimi fprintf(stderr, "pcre2grep: Data missing after %s\n", argv[i]);
4101*22dc650dSSadaf Ebrahimi pcre2grep_exit(usage(2));
4102*22dc650dSSadaf Ebrahimi }
4103*22dc650dSSadaf Ebrahimi option_data = argv[++i];
4104*22dc650dSSadaf Ebrahimi }
4105*22dc650dSSadaf Ebrahimi
4106*22dc650dSSadaf Ebrahimi /* If the option type is OP_OP_NUMBERS, the value is a number that is to be
4107*22dc650dSSadaf Ebrahimi added to a chain of numbers. */
4108*22dc650dSSadaf Ebrahimi
4109*22dc650dSSadaf Ebrahimi if (op->type == OP_OP_NUMBERS)
4110*22dc650dSSadaf Ebrahimi {
4111*22dc650dSSadaf Ebrahimi unsigned long int n = decode_number(option_data, op, longop);
4112*22dc650dSSadaf Ebrahimi omdatastr *omd = (omdatastr *)op->dataptr;
4113*22dc650dSSadaf Ebrahimi *(omd->lastptr) = add_number((int)n, *(omd->lastptr));
4114*22dc650dSSadaf Ebrahimi if (*(omd->anchor) == NULL) *(omd->anchor) = *(omd->lastptr);
4115*22dc650dSSadaf Ebrahimi }
4116*22dc650dSSadaf Ebrahimi
4117*22dc650dSSadaf Ebrahimi /* If the option type is OP_PATLIST, it's the -e option, or one of the
4118*22dc650dSSadaf Ebrahimi include/exclude options, which can be called multiple times to create lists
4119*22dc650dSSadaf Ebrahimi of patterns. */
4120*22dc650dSSadaf Ebrahimi
4121*22dc650dSSadaf Ebrahimi else if (op->type == OP_PATLIST)
4122*22dc650dSSadaf Ebrahimi {
4123*22dc650dSSadaf Ebrahimi patdatastr *pd = (patdatastr *)op->dataptr;
4124*22dc650dSSadaf Ebrahimi *(pd->lastptr) = add_pattern(option_data, (PCRE2_SIZE)strlen(option_data),
4125*22dc650dSSadaf Ebrahimi *(pd->lastptr));
4126*22dc650dSSadaf Ebrahimi if (*(pd->lastptr) == NULL) goto EXIT2;
4127*22dc650dSSadaf Ebrahimi if (*(pd->anchor) == NULL) *(pd->anchor) = *(pd->lastptr);
4128*22dc650dSSadaf Ebrahimi }
4129*22dc650dSSadaf Ebrahimi
4130*22dc650dSSadaf Ebrahimi /* If the option type is OP_FILELIST, it's one of the options that names a
4131*22dc650dSSadaf Ebrahimi file. */
4132*22dc650dSSadaf Ebrahimi
4133*22dc650dSSadaf Ebrahimi else if (op->type == OP_FILELIST)
4134*22dc650dSSadaf Ebrahimi {
4135*22dc650dSSadaf Ebrahimi fndatastr *fd = (fndatastr *)op->dataptr;
4136*22dc650dSSadaf Ebrahimi fn = (fnstr *)malloc(sizeof(fnstr));
4137*22dc650dSSadaf Ebrahimi if (fn == NULL)
4138*22dc650dSSadaf Ebrahimi {
4139*22dc650dSSadaf Ebrahimi /* LCOV_EXCL_START */
4140*22dc650dSSadaf Ebrahimi fprintf(stderr, "pcre2grep: malloc failed\n");
4141*22dc650dSSadaf Ebrahimi goto EXIT2;
4142*22dc650dSSadaf Ebrahimi /* LCOV_EXCL_STOP */
4143*22dc650dSSadaf Ebrahimi }
4144*22dc650dSSadaf Ebrahimi fn->next = NULL;
4145*22dc650dSSadaf Ebrahimi fn->name = option_data;
4146*22dc650dSSadaf Ebrahimi if (*(fd->anchor) == NULL)
4147*22dc650dSSadaf Ebrahimi *(fd->anchor) = fn;
4148*22dc650dSSadaf Ebrahimi else
4149*22dc650dSSadaf Ebrahimi (*(fd->lastptr))->next = fn;
4150*22dc650dSSadaf Ebrahimi *(fd->lastptr) = fn;
4151*22dc650dSSadaf Ebrahimi }
4152*22dc650dSSadaf Ebrahimi
4153*22dc650dSSadaf Ebrahimi /* Handle OP_BINARY_FILES */
4154*22dc650dSSadaf Ebrahimi
4155*22dc650dSSadaf Ebrahimi else if (op->type == OP_BINFILES)
4156*22dc650dSSadaf Ebrahimi {
4157*22dc650dSSadaf Ebrahimi if (strcmp(option_data, "binary") == 0)
4158*22dc650dSSadaf Ebrahimi binary_files = BIN_BINARY;
4159*22dc650dSSadaf Ebrahimi else if (strcmp(option_data, "without-match") == 0)
4160*22dc650dSSadaf Ebrahimi binary_files = BIN_NOMATCH;
4161*22dc650dSSadaf Ebrahimi else if (strcmp(option_data, "text") == 0)
4162*22dc650dSSadaf Ebrahimi binary_files = BIN_TEXT;
4163*22dc650dSSadaf Ebrahimi else
4164*22dc650dSSadaf Ebrahimi {
4165*22dc650dSSadaf Ebrahimi fprintf(stderr, "pcre2grep: unknown value \"%s\" for binary-files\n",
4166*22dc650dSSadaf Ebrahimi option_data);
4167*22dc650dSSadaf Ebrahimi pcre2grep_exit(usage(2));
4168*22dc650dSSadaf Ebrahimi }
4169*22dc650dSSadaf Ebrahimi }
4170*22dc650dSSadaf Ebrahimi
4171*22dc650dSSadaf Ebrahimi /* Otherwise, deal with a single string or numeric data value. */
4172*22dc650dSSadaf Ebrahimi
4173*22dc650dSSadaf Ebrahimi else if (op->type != OP_NUMBER && op->type != OP_U32NUMBER &&
4174*22dc650dSSadaf Ebrahimi op->type != OP_OP_NUMBER && op->type != OP_SIZE)
4175*22dc650dSSadaf Ebrahimi {
4176*22dc650dSSadaf Ebrahimi *((char **)op->dataptr) = option_data;
4177*22dc650dSSadaf Ebrahimi }
4178*22dc650dSSadaf Ebrahimi else
4179*22dc650dSSadaf Ebrahimi {
4180*22dc650dSSadaf Ebrahimi unsigned long int n = decode_number(option_data, op, longop);
4181*22dc650dSSadaf Ebrahimi if (op->type == OP_U32NUMBER) *((uint32_t *)op->dataptr) = n;
4182*22dc650dSSadaf Ebrahimi else if (op->type == OP_SIZE) *((PCRE2_SIZE *)op->dataptr) = n;
4183*22dc650dSSadaf Ebrahimi else *((int *)op->dataptr) = n;
4184*22dc650dSSadaf Ebrahimi }
4185*22dc650dSSadaf Ebrahimi }
4186*22dc650dSSadaf Ebrahimi
4187*22dc650dSSadaf Ebrahimi /* Options have been decoded. If -C was used, its value is used as a default
4188*22dc650dSSadaf Ebrahimi for -A and -B. */
4189*22dc650dSSadaf Ebrahimi
4190*22dc650dSSadaf Ebrahimi if (both_context > 0)
4191*22dc650dSSadaf Ebrahimi {
4192*22dc650dSSadaf Ebrahimi if (after_context == 0) after_context = both_context;
4193*22dc650dSSadaf Ebrahimi if (before_context == 0) before_context = both_context;
4194*22dc650dSSadaf Ebrahimi }
4195*22dc650dSSadaf Ebrahimi
4196*22dc650dSSadaf Ebrahimi /* Only one of --only-matching, --output, --file-offsets, or --line-offsets is
4197*22dc650dSSadaf Ebrahimi permitted. They display, each in their own way, only the data that has matched.
4198*22dc650dSSadaf Ebrahimi */
4199*22dc650dSSadaf Ebrahimi
4200*22dc650dSSadaf Ebrahimi only_matching_count = (only_matching != NULL) + (output_text != NULL) +
4201*22dc650dSSadaf Ebrahimi file_offsets + line_offsets;
4202*22dc650dSSadaf Ebrahimi
4203*22dc650dSSadaf Ebrahimi if (only_matching_count > 1)
4204*22dc650dSSadaf Ebrahimi {
4205*22dc650dSSadaf Ebrahimi fprintf(stderr, "pcre2grep: Cannot mix --only-matching, --output, "
4206*22dc650dSSadaf Ebrahimi "--file-offsets and/or --line-offsets\n");
4207*22dc650dSSadaf Ebrahimi pcre2grep_exit(usage(2));
4208*22dc650dSSadaf Ebrahimi }
4209*22dc650dSSadaf Ebrahimi
4210*22dc650dSSadaf Ebrahimi /* Check that there is a big enough ovector for all -o settings. */
4211*22dc650dSSadaf Ebrahimi
4212*22dc650dSSadaf Ebrahimi for (om = only_matching; om != NULL; om = om->next)
4213*22dc650dSSadaf Ebrahimi {
4214*22dc650dSSadaf Ebrahimi int n = om->groupnum;
4215*22dc650dSSadaf Ebrahimi if (n > (int)capture_max)
4216*22dc650dSSadaf Ebrahimi {
4217*22dc650dSSadaf Ebrahimi fprintf(stderr, "pcre2grep: Requested group %d cannot be captured.\n", n);
4218*22dc650dSSadaf Ebrahimi fprintf(stderr, "pcre2grep: Use --om-capture to increase the size of the capture vector.\n");
4219*22dc650dSSadaf Ebrahimi goto EXIT2;
4220*22dc650dSSadaf Ebrahimi }
4221*22dc650dSSadaf Ebrahimi }
4222*22dc650dSSadaf Ebrahimi
4223*22dc650dSSadaf Ebrahimi /* Check the text supplied to --output for errors. */
4224*22dc650dSSadaf Ebrahimi
4225*22dc650dSSadaf Ebrahimi if (output_text != NULL &&
4226*22dc650dSSadaf Ebrahimi !syntax_check_output_text((PCRE2_SPTR)output_text, FALSE))
4227*22dc650dSSadaf Ebrahimi goto EXIT2;
4228*22dc650dSSadaf Ebrahimi
4229*22dc650dSSadaf Ebrahimi /* Set up default compile and match contexts and match data blocks. */
4230*22dc650dSSadaf Ebrahimi
4231*22dc650dSSadaf Ebrahimi offset_size = capture_max + 1;
4232*22dc650dSSadaf Ebrahimi compile_context = pcre2_compile_context_create(NULL);
4233*22dc650dSSadaf Ebrahimi match_context = pcre2_match_context_create(NULL);
4234*22dc650dSSadaf Ebrahimi match_data_pair[0] = pcre2_match_data_create(offset_size, NULL);
4235*22dc650dSSadaf Ebrahimi match_data_pair[1] = pcre2_match_data_create(offset_size, NULL);
4236*22dc650dSSadaf Ebrahimi offsets_pair[0] = pcre2_get_ovector_pointer(match_data_pair[0]);
4237*22dc650dSSadaf Ebrahimi offsets_pair[1] = pcre2_get_ovector_pointer(match_data_pair[1]);
4238*22dc650dSSadaf Ebrahimi match_data = match_data_pair[0];
4239*22dc650dSSadaf Ebrahimi offsets = offsets_pair[0];
4240*22dc650dSSadaf Ebrahimi match_data_toggle = 0;
4241*22dc650dSSadaf Ebrahimi
4242*22dc650dSSadaf Ebrahimi /* If string (script) callouts are supported, set up the callout processing
4243*22dc650dSSadaf Ebrahimi function in the match context. */
4244*22dc650dSSadaf Ebrahimi
4245*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_PCRE2GREP_CALLOUT
4246*22dc650dSSadaf Ebrahimi pcre2_set_callout(match_context, pcre2grep_callout, NULL);
4247*22dc650dSSadaf Ebrahimi #endif
4248*22dc650dSSadaf Ebrahimi
4249*22dc650dSSadaf Ebrahimi /* Put limits into the match context. */
4250*22dc650dSSadaf Ebrahimi
4251*22dc650dSSadaf Ebrahimi if (heap_limit != PCRE2_UNSET) pcre2_set_heap_limit(match_context, heap_limit);
4252*22dc650dSSadaf Ebrahimi if (match_limit > 0) pcre2_set_match_limit(match_context, match_limit);
4253*22dc650dSSadaf Ebrahimi if (depth_limit > 0) pcre2_set_depth_limit(match_context, depth_limit);
4254*22dc650dSSadaf Ebrahimi
4255*22dc650dSSadaf Ebrahimi /* If a locale has not been provided as an option, see if the LC_CTYPE or
4256*22dc650dSSadaf Ebrahimi LC_ALL environment variable is set, and if so, use it. */
4257*22dc650dSSadaf Ebrahimi
4258*22dc650dSSadaf Ebrahimi if (locale == NULL)
4259*22dc650dSSadaf Ebrahimi {
4260*22dc650dSSadaf Ebrahimi locale = getenv("LC_ALL");
4261*22dc650dSSadaf Ebrahimi locale_from = "LC_ALL";
4262*22dc650dSSadaf Ebrahimi }
4263*22dc650dSSadaf Ebrahimi
4264*22dc650dSSadaf Ebrahimi if (locale == NULL)
4265*22dc650dSSadaf Ebrahimi {
4266*22dc650dSSadaf Ebrahimi locale = getenv("LC_CTYPE");
4267*22dc650dSSadaf Ebrahimi locale_from = "LC_CTYPE";
4268*22dc650dSSadaf Ebrahimi }
4269*22dc650dSSadaf Ebrahimi
4270*22dc650dSSadaf Ebrahimi /* If a locale is set, use it to generate the tables the PCRE needs. Passing
4271*22dc650dSSadaf Ebrahimi NULL to pcre2_maketables() means that malloc() is used to get the memory. */
4272*22dc650dSSadaf Ebrahimi
4273*22dc650dSSadaf Ebrahimi if (locale != NULL)
4274*22dc650dSSadaf Ebrahimi {
4275*22dc650dSSadaf Ebrahimi if (setlocale(LC_CTYPE, locale) == NULL)
4276*22dc650dSSadaf Ebrahimi {
4277*22dc650dSSadaf Ebrahimi fprintf(stderr, "pcre2grep: Failed to set locale %s (obtained from %s)\n",
4278*22dc650dSSadaf Ebrahimi locale, locale_from);
4279*22dc650dSSadaf Ebrahimi goto EXIT2;
4280*22dc650dSSadaf Ebrahimi }
4281*22dc650dSSadaf Ebrahimi character_tables = pcre2_maketables(NULL);
4282*22dc650dSSadaf Ebrahimi pcre2_set_character_tables(compile_context, character_tables);
4283*22dc650dSSadaf Ebrahimi }
4284*22dc650dSSadaf Ebrahimi
4285*22dc650dSSadaf Ebrahimi /* Sort out colouring */
4286*22dc650dSSadaf Ebrahimi
4287*22dc650dSSadaf Ebrahimi if (colour_option != NULL && strcmp(colour_option, "never") != 0)
4288*22dc650dSSadaf Ebrahimi {
4289*22dc650dSSadaf Ebrahimi if (strcmp(colour_option, "always") == 0)
4290*22dc650dSSadaf Ebrahimi #ifdef WIN32
4291*22dc650dSSadaf Ebrahimi do_ansi = !is_stdout_tty(),
4292*22dc650dSSadaf Ebrahimi #endif
4293*22dc650dSSadaf Ebrahimi do_colour = TRUE;
4294*22dc650dSSadaf Ebrahimi else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
4295*22dc650dSSadaf Ebrahimi else
4296*22dc650dSSadaf Ebrahimi {
4297*22dc650dSSadaf Ebrahimi fprintf(stderr, "pcre2grep: Unknown colour setting \"%s\"\n",
4298*22dc650dSSadaf Ebrahimi colour_option);
4299*22dc650dSSadaf Ebrahimi goto EXIT2;
4300*22dc650dSSadaf Ebrahimi }
4301*22dc650dSSadaf Ebrahimi if (do_colour)
4302*22dc650dSSadaf Ebrahimi {
4303*22dc650dSSadaf Ebrahimi char *cs = getenv("PCRE2GREP_COLOUR");
4304*22dc650dSSadaf Ebrahimi if (cs == NULL) cs = getenv("PCRE2GREP_COLOR");
4305*22dc650dSSadaf Ebrahimi if (cs == NULL) cs = getenv("PCREGREP_COLOUR");
4306*22dc650dSSadaf Ebrahimi if (cs == NULL) cs = getenv("PCREGREP_COLOR");
4307*22dc650dSSadaf Ebrahimi if (cs == NULL) cs = parse_grep_colors(getenv("GREP_COLORS"));
4308*22dc650dSSadaf Ebrahimi if (cs == NULL) cs = getenv("GREP_COLOR");
4309*22dc650dSSadaf Ebrahimi if (cs != NULL)
4310*22dc650dSSadaf Ebrahimi {
4311*22dc650dSSadaf Ebrahimi if (strspn(cs, ";0123456789") == strlen(cs)) colour_string = cs;
4312*22dc650dSSadaf Ebrahimi }
4313*22dc650dSSadaf Ebrahimi #ifdef WIN32
4314*22dc650dSSadaf Ebrahimi init_colour_output();
4315*22dc650dSSadaf Ebrahimi #endif
4316*22dc650dSSadaf Ebrahimi }
4317*22dc650dSSadaf Ebrahimi }
4318*22dc650dSSadaf Ebrahimi
4319*22dc650dSSadaf Ebrahimi /* When colouring or otherwise identifying matching substrings, we need to find
4320*22dc650dSSadaf Ebrahimi all possible matches when there are multiple patterns. */
4321*22dc650dSSadaf Ebrahimi
4322*22dc650dSSadaf Ebrahimi all_matches = do_colour || only_matching_count != 0;
4323*22dc650dSSadaf Ebrahimi
4324*22dc650dSSadaf Ebrahimi /* Sort out a newline setting. */
4325*22dc650dSSadaf Ebrahimi
4326*22dc650dSSadaf Ebrahimi if (newline_arg != NULL)
4327*22dc650dSSadaf Ebrahimi {
4328*22dc650dSSadaf Ebrahimi for (endlinetype = 1; endlinetype < (int)(sizeof(newlines)/sizeof(char *));
4329*22dc650dSSadaf Ebrahimi endlinetype++)
4330*22dc650dSSadaf Ebrahimi {
4331*22dc650dSSadaf Ebrahimi if (strcmpic(newline_arg, newlines[endlinetype]) == 0) break;
4332*22dc650dSSadaf Ebrahimi }
4333*22dc650dSSadaf Ebrahimi if (endlinetype < (int)(sizeof(newlines)/sizeof(char *)))
4334*22dc650dSSadaf Ebrahimi pcre2_set_newline(compile_context, endlinetype);
4335*22dc650dSSadaf Ebrahimi else
4336*22dc650dSSadaf Ebrahimi {
4337*22dc650dSSadaf Ebrahimi fprintf(stderr, "pcre2grep: Invalid newline specifier \"%s\"\n",
4338*22dc650dSSadaf Ebrahimi newline_arg);
4339*22dc650dSSadaf Ebrahimi goto EXIT2;
4340*22dc650dSSadaf Ebrahimi }
4341*22dc650dSSadaf Ebrahimi }
4342*22dc650dSSadaf Ebrahimi
4343*22dc650dSSadaf Ebrahimi /* Find default newline convention */
4344*22dc650dSSadaf Ebrahimi
4345*22dc650dSSadaf Ebrahimi else
4346*22dc650dSSadaf Ebrahimi {
4347*22dc650dSSadaf Ebrahimi (void)pcre2_config(PCRE2_CONFIG_NEWLINE, &endlinetype);
4348*22dc650dSSadaf Ebrahimi }
4349*22dc650dSSadaf Ebrahimi
4350*22dc650dSSadaf Ebrahimi /* Interpret the text values for -d and -D */
4351*22dc650dSSadaf Ebrahimi
4352*22dc650dSSadaf Ebrahimi if (dee_option != NULL)
4353*22dc650dSSadaf Ebrahimi {
4354*22dc650dSSadaf Ebrahimi if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
4355*22dc650dSSadaf Ebrahimi else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
4356*22dc650dSSadaf Ebrahimi else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
4357*22dc650dSSadaf Ebrahimi else
4358*22dc650dSSadaf Ebrahimi {
4359*22dc650dSSadaf Ebrahimi fprintf(stderr, "pcre2grep: Invalid value \"%s\" for -d\n", dee_option);
4360*22dc650dSSadaf Ebrahimi goto EXIT2;
4361*22dc650dSSadaf Ebrahimi }
4362*22dc650dSSadaf Ebrahimi }
4363*22dc650dSSadaf Ebrahimi
4364*22dc650dSSadaf Ebrahimi if (DEE_option != NULL)
4365*22dc650dSSadaf Ebrahimi {
4366*22dc650dSSadaf Ebrahimi if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
4367*22dc650dSSadaf Ebrahimi else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
4368*22dc650dSSadaf Ebrahimi else
4369*22dc650dSSadaf Ebrahimi {
4370*22dc650dSSadaf Ebrahimi fprintf(stderr, "pcre2grep: Invalid value \"%s\" for -D\n", DEE_option);
4371*22dc650dSSadaf Ebrahimi goto EXIT2;
4372*22dc650dSSadaf Ebrahimi }
4373*22dc650dSSadaf Ebrahimi }
4374*22dc650dSSadaf Ebrahimi
4375*22dc650dSSadaf Ebrahimi /* If no_ucp is set, remove PCRE2_UCP from the compile options. */
4376*22dc650dSSadaf Ebrahimi
4377*22dc650dSSadaf Ebrahimi if (no_ucp) pcre2_options &= ~PCRE2_UCP;
4378*22dc650dSSadaf Ebrahimi
4379*22dc650dSSadaf Ebrahimi /* adjust the extra options. */
4380*22dc650dSSadaf Ebrahimi
4381*22dc650dSSadaf Ebrahimi if (case_restrict) extra_options |= PCRE2_EXTRA_CASELESS_RESTRICT;
4382*22dc650dSSadaf Ebrahimi if (posix_digit)
4383*22dc650dSSadaf Ebrahimi extra_options |= (PCRE2_EXTRA_ASCII_BSD | PCRE2_EXTRA_ASCII_DIGIT);
4384*22dc650dSSadaf Ebrahimi
4385*22dc650dSSadaf Ebrahimi /* Set the extra options in the compile context. */
4386*22dc650dSSadaf Ebrahimi
4387*22dc650dSSadaf Ebrahimi (void)pcre2_set_compile_extra_options(compile_context, extra_options);
4388*22dc650dSSadaf Ebrahimi
4389*22dc650dSSadaf Ebrahimi /* If use_jit is set, check whether JIT is available. If not, do not try
4390*22dc650dSSadaf Ebrahimi to use JIT. */
4391*22dc650dSSadaf Ebrahimi
4392*22dc650dSSadaf Ebrahimi if (use_jit)
4393*22dc650dSSadaf Ebrahimi {
4394*22dc650dSSadaf Ebrahimi uint32_t answer;
4395*22dc650dSSadaf Ebrahimi (void)pcre2_config(PCRE2_CONFIG_JIT, &answer);
4396*22dc650dSSadaf Ebrahimi if (!answer) use_jit = FALSE;
4397*22dc650dSSadaf Ebrahimi }
4398*22dc650dSSadaf Ebrahimi
4399*22dc650dSSadaf Ebrahimi /* Get memory for the main buffer. */
4400*22dc650dSSadaf Ebrahimi
4401*22dc650dSSadaf Ebrahimi if (bufthird <= 0)
4402*22dc650dSSadaf Ebrahimi {
4403*22dc650dSSadaf Ebrahimi fprintf(stderr, "pcre2grep: --buffer-size must be greater than zero\n");
4404*22dc650dSSadaf Ebrahimi goto EXIT2;
4405*22dc650dSSadaf Ebrahimi }
4406*22dc650dSSadaf Ebrahimi
4407*22dc650dSSadaf Ebrahimi bufsize = 3*bufthird;
4408*22dc650dSSadaf Ebrahimi main_buffer = (char *)malloc(bufsize);
4409*22dc650dSSadaf Ebrahimi
4410*22dc650dSSadaf Ebrahimi if (main_buffer == NULL)
4411*22dc650dSSadaf Ebrahimi {
4412*22dc650dSSadaf Ebrahimi /* LCOV_EXCL_START */
4413*22dc650dSSadaf Ebrahimi fprintf(stderr, "pcre2grep: malloc failed\n");
4414*22dc650dSSadaf Ebrahimi goto EXIT2;
4415*22dc650dSSadaf Ebrahimi /* LCOV_EXCL_STOP */
4416*22dc650dSSadaf Ebrahimi }
4417*22dc650dSSadaf Ebrahimi
4418*22dc650dSSadaf Ebrahimi /* If no patterns were provided by -e, and there are no files provided by -f,
4419*22dc650dSSadaf Ebrahimi the first argument is the one and only pattern, and it must exist. */
4420*22dc650dSSadaf Ebrahimi
4421*22dc650dSSadaf Ebrahimi if (patterns == NULL && pattern_files == NULL)
4422*22dc650dSSadaf Ebrahimi {
4423*22dc650dSSadaf Ebrahimi if (i >= argc) return usage(2);
4424*22dc650dSSadaf Ebrahimi patterns = patterns_last = add_pattern(argv[i], (PCRE2_SIZE)strlen(argv[i]),
4425*22dc650dSSadaf Ebrahimi NULL);
4426*22dc650dSSadaf Ebrahimi i++;
4427*22dc650dSSadaf Ebrahimi if (patterns == NULL) goto EXIT2;
4428*22dc650dSSadaf Ebrahimi }
4429*22dc650dSSadaf Ebrahimi
4430*22dc650dSSadaf Ebrahimi /* Compile the patterns that were provided on the command line, either by
4431*22dc650dSSadaf Ebrahimi multiple uses of -e or as a single unkeyed pattern. We cannot do this until
4432*22dc650dSSadaf Ebrahimi after all the command-line options are read so that we know which PCRE options
4433*22dc650dSSadaf Ebrahimi to use. When -F is used, compile_pattern() may add another block into the
4434*22dc650dSSadaf Ebrahimi chain, so we must not access the next pointer till after the compile. */
4435*22dc650dSSadaf Ebrahimi
4436*22dc650dSSadaf Ebrahimi for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
4437*22dc650dSSadaf Ebrahimi {
4438*22dc650dSSadaf Ebrahimi if (!compile_pattern(cp, pcre2_options, FALSE, "command-line",
4439*22dc650dSSadaf Ebrahimi (j == 1 && patterns->next == NULL)? 0 : j))
4440*22dc650dSSadaf Ebrahimi goto EXIT2;
4441*22dc650dSSadaf Ebrahimi }
4442*22dc650dSSadaf Ebrahimi
4443*22dc650dSSadaf Ebrahimi /* Read and compile the regular expressions that are provided in files. */
4444*22dc650dSSadaf Ebrahimi
4445*22dc650dSSadaf Ebrahimi for (fn = pattern_files; fn != NULL; fn = fn->next)
4446*22dc650dSSadaf Ebrahimi {
4447*22dc650dSSadaf Ebrahimi if (!read_pattern_file(fn->name, &patterns, &patterns_last)) goto EXIT2;
4448*22dc650dSSadaf Ebrahimi }
4449*22dc650dSSadaf Ebrahimi
4450*22dc650dSSadaf Ebrahimi /* Unless JIT has been explicitly disabled, arrange a stack for it to use. */
4451*22dc650dSSadaf Ebrahimi
4452*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_PCRE2GREP_JIT
4453*22dc650dSSadaf Ebrahimi if (use_jit)
4454*22dc650dSSadaf Ebrahimi {
4455*22dc650dSSadaf Ebrahimi jit_stack = pcre2_jit_stack_create(32*1024, 1024*1024, NULL);
4456*22dc650dSSadaf Ebrahimi if (jit_stack != NULL )
4457*22dc650dSSadaf Ebrahimi pcre2_jit_stack_assign(match_context, NULL, jit_stack);
4458*22dc650dSSadaf Ebrahimi }
4459*22dc650dSSadaf Ebrahimi #endif
4460*22dc650dSSadaf Ebrahimi
4461*22dc650dSSadaf Ebrahimi /* -F, -w, and -x do not apply to include or exclude patterns, so we must
4462*22dc650dSSadaf Ebrahimi adjust the options. */
4463*22dc650dSSadaf Ebrahimi
4464*22dc650dSSadaf Ebrahimi pcre2_options &= ~PCRE2_LITERAL;
4465*22dc650dSSadaf Ebrahimi (void)pcre2_set_compile_extra_options(compile_context, 0);
4466*22dc650dSSadaf Ebrahimi
4467*22dc650dSSadaf Ebrahimi /* If there are include or exclude patterns read from the command line, compile
4468*22dc650dSSadaf Ebrahimi them. */
4469*22dc650dSSadaf Ebrahimi
4470*22dc650dSSadaf Ebrahimi for (j = 0; j < 4; j++)
4471*22dc650dSSadaf Ebrahimi {
4472*22dc650dSSadaf Ebrahimi int k;
4473*22dc650dSSadaf Ebrahimi for (k = 1, cp = *(incexlist[j]); cp != NULL; k++, cp = cp->next)
4474*22dc650dSSadaf Ebrahimi {
4475*22dc650dSSadaf Ebrahimi if (!compile_pattern(cp, pcre2_options, FALSE, incexname[j],
4476*22dc650dSSadaf Ebrahimi (k == 1 && cp->next == NULL)? 0 : k))
4477*22dc650dSSadaf Ebrahimi goto EXIT2;
4478*22dc650dSSadaf Ebrahimi }
4479*22dc650dSSadaf Ebrahimi }
4480*22dc650dSSadaf Ebrahimi
4481*22dc650dSSadaf Ebrahimi /* Read and compile include/exclude patterns from files. */
4482*22dc650dSSadaf Ebrahimi
4483*22dc650dSSadaf Ebrahimi for (fn = include_from; fn != NULL; fn = fn->next)
4484*22dc650dSSadaf Ebrahimi {
4485*22dc650dSSadaf Ebrahimi if (!read_pattern_file(fn->name, &include_patterns, &include_patterns_last))
4486*22dc650dSSadaf Ebrahimi goto EXIT2;
4487*22dc650dSSadaf Ebrahimi }
4488*22dc650dSSadaf Ebrahimi
4489*22dc650dSSadaf Ebrahimi for (fn = exclude_from; fn != NULL; fn = fn->next)
4490*22dc650dSSadaf Ebrahimi {
4491*22dc650dSSadaf Ebrahimi if (!read_pattern_file(fn->name, &exclude_patterns, &exclude_patterns_last))
4492*22dc650dSSadaf Ebrahimi goto EXIT2;
4493*22dc650dSSadaf Ebrahimi }
4494*22dc650dSSadaf Ebrahimi
4495*22dc650dSSadaf Ebrahimi /* If there are no files that contain lists of files to search, and there are
4496*22dc650dSSadaf Ebrahimi no file arguments, search stdin, and then exit. */
4497*22dc650dSSadaf Ebrahimi
4498*22dc650dSSadaf Ebrahimi if (file_lists == NULL && i >= argc)
4499*22dc650dSSadaf Ebrahimi {
4500*22dc650dSSadaf Ebrahimi /* Using a buffered stdin, that then is seek is not portable,
4501*22dc650dSSadaf Ebrahimi so attempt to remove the buffer, to workaround reported issues
4502*22dc650dSSadaf Ebrahimi affecting several BSD and AIX */
4503*22dc650dSSadaf Ebrahimi if (count_limit >= 0)
4504*22dc650dSSadaf Ebrahimi setbuf(stdin, NULL);
4505*22dc650dSSadaf Ebrahimi rc = pcre2grep(stdin, FR_PLAIN, stdin_name,
4506*22dc650dSSadaf Ebrahimi (filenames > FN_DEFAULT)? stdin_name : NULL);
4507*22dc650dSSadaf Ebrahimi goto EXIT;
4508*22dc650dSSadaf Ebrahimi }
4509*22dc650dSSadaf Ebrahimi
4510*22dc650dSSadaf Ebrahimi /* If any files that contains a list of files to search have been specified,
4511*22dc650dSSadaf Ebrahimi read them line by line and search the given files. */
4512*22dc650dSSadaf Ebrahimi
4513*22dc650dSSadaf Ebrahimi for (fn = file_lists; fn != NULL; fn = fn->next)
4514*22dc650dSSadaf Ebrahimi {
4515*22dc650dSSadaf Ebrahimi char buffer[FNBUFSIZ];
4516*22dc650dSSadaf Ebrahimi FILE *fl;
4517*22dc650dSSadaf Ebrahimi if (strcmp(fn->name, "-") == 0) fl = stdin; else
4518*22dc650dSSadaf Ebrahimi {
4519*22dc650dSSadaf Ebrahimi fl = fopen(fn->name, "rb");
4520*22dc650dSSadaf Ebrahimi if (fl == NULL)
4521*22dc650dSSadaf Ebrahimi {
4522*22dc650dSSadaf Ebrahimi fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", fn->name,
4523*22dc650dSSadaf Ebrahimi strerror(errno));
4524*22dc650dSSadaf Ebrahimi goto EXIT2;
4525*22dc650dSSadaf Ebrahimi }
4526*22dc650dSSadaf Ebrahimi }
4527*22dc650dSSadaf Ebrahimi while (fgets(buffer, sizeof(buffer), fl) != NULL)
4528*22dc650dSSadaf Ebrahimi {
4529*22dc650dSSadaf Ebrahimi int frc;
4530*22dc650dSSadaf Ebrahimi char *end = buffer + (int)strlen(buffer);
4531*22dc650dSSadaf Ebrahimi while (end > buffer && isspace((unsigned char)(end[-1]))) end--;
4532*22dc650dSSadaf Ebrahimi *end = 0;
4533*22dc650dSSadaf Ebrahimi if (*buffer != 0)
4534*22dc650dSSadaf Ebrahimi {
4535*22dc650dSSadaf Ebrahimi frc = grep_or_recurse(buffer, dee_action == dee_RECURSE, FALSE);
4536*22dc650dSSadaf Ebrahimi if (frc > 1) rc = frc;
4537*22dc650dSSadaf Ebrahimi else if (frc == 0 && rc == 1) rc = 0;
4538*22dc650dSSadaf Ebrahimi }
4539*22dc650dSSadaf Ebrahimi }
4540*22dc650dSSadaf Ebrahimi if (fl != stdin) fclose(fl);
4541*22dc650dSSadaf Ebrahimi }
4542*22dc650dSSadaf Ebrahimi
4543*22dc650dSSadaf Ebrahimi /* After handling file-list, work through remaining arguments. Pass in the fact
4544*22dc650dSSadaf Ebrahimi that there is only one argument at top level - this suppresses the file name if
4545*22dc650dSSadaf Ebrahimi the argument is not a directory and filenames are not otherwise forced. */
4546*22dc650dSSadaf Ebrahimi
4547*22dc650dSSadaf Ebrahimi only_one_at_top = i == argc - 1 && file_lists == NULL;
4548*22dc650dSSadaf Ebrahimi
4549*22dc650dSSadaf Ebrahimi for (; i < argc; i++)
4550*22dc650dSSadaf Ebrahimi {
4551*22dc650dSSadaf Ebrahimi int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
4552*22dc650dSSadaf Ebrahimi only_one_at_top);
4553*22dc650dSSadaf Ebrahimi if (frc > 1) rc = frc;
4554*22dc650dSSadaf Ebrahimi else if (frc == 0 && rc == 1) rc = 0;
4555*22dc650dSSadaf Ebrahimi }
4556*22dc650dSSadaf Ebrahimi
4557*22dc650dSSadaf Ebrahimi /* Show the total number of matches if requested, but not if only one file's
4558*22dc650dSSadaf Ebrahimi count was printed. */
4559*22dc650dSSadaf Ebrahimi
4560*22dc650dSSadaf Ebrahimi if (show_total_count && counts_printed != 1 && filenames != FN_NOMATCH_ONLY)
4561*22dc650dSSadaf Ebrahimi {
4562*22dc650dSSadaf Ebrahimi if (counts_printed != 0 && filenames >= FN_DEFAULT)
4563*22dc650dSSadaf Ebrahimi fprintf(stdout, "TOTAL:");
4564*22dc650dSSadaf Ebrahimi fprintf(stdout, "%lu" STDOUT_NL, total_count);
4565*22dc650dSSadaf Ebrahimi }
4566*22dc650dSSadaf Ebrahimi
4567*22dc650dSSadaf Ebrahimi EXIT:
4568*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_PCRE2GREP_JIT
4569*22dc650dSSadaf Ebrahimi pcre2_jit_free_unused_memory(NULL);
4570*22dc650dSSadaf Ebrahimi if (jit_stack != NULL) pcre2_jit_stack_free(jit_stack);
4571*22dc650dSSadaf Ebrahimi #endif
4572*22dc650dSSadaf Ebrahimi
4573*22dc650dSSadaf Ebrahimi free(main_buffer);
4574*22dc650dSSadaf Ebrahimi if (character_tables != NULL) pcre2_maketables_free(NULL, character_tables);
4575*22dc650dSSadaf Ebrahimi
4576*22dc650dSSadaf Ebrahimi pcre2_compile_context_free(compile_context);
4577*22dc650dSSadaf Ebrahimi pcre2_match_context_free(match_context);
4578*22dc650dSSadaf Ebrahimi pcre2_match_data_free(match_data_pair[0]);
4579*22dc650dSSadaf Ebrahimi pcre2_match_data_free(match_data_pair[1]);
4580*22dc650dSSadaf Ebrahimi
4581*22dc650dSSadaf Ebrahimi free_pattern_chain(patterns);
4582*22dc650dSSadaf Ebrahimi free_pattern_chain(include_patterns);
4583*22dc650dSSadaf Ebrahimi free_pattern_chain(include_dir_patterns);
4584*22dc650dSSadaf Ebrahimi free_pattern_chain(exclude_patterns);
4585*22dc650dSSadaf Ebrahimi free_pattern_chain(exclude_dir_patterns);
4586*22dc650dSSadaf Ebrahimi
4587*22dc650dSSadaf Ebrahimi free_file_chain(exclude_from);
4588*22dc650dSSadaf Ebrahimi free_file_chain(include_from);
4589*22dc650dSSadaf Ebrahimi free_file_chain(pattern_files);
4590*22dc650dSSadaf Ebrahimi free_file_chain(file_lists);
4591*22dc650dSSadaf Ebrahimi
4592*22dc650dSSadaf Ebrahimi while (only_matching != NULL)
4593*22dc650dSSadaf Ebrahimi {
4594*22dc650dSSadaf Ebrahimi omstr *this = only_matching;
4595*22dc650dSSadaf Ebrahimi only_matching = this->next;
4596*22dc650dSSadaf Ebrahimi free(this);
4597*22dc650dSSadaf Ebrahimi }
4598*22dc650dSSadaf Ebrahimi
4599*22dc650dSSadaf Ebrahimi pcre2grep_exit(rc);
4600*22dc650dSSadaf Ebrahimi
4601*22dc650dSSadaf Ebrahimi EXIT2:
4602*22dc650dSSadaf Ebrahimi rc = 2;
4603*22dc650dSSadaf Ebrahimi goto EXIT;
4604*22dc650dSSadaf Ebrahimi }
4605*22dc650dSSadaf Ebrahimi
4606*22dc650dSSadaf Ebrahimi /* End of pcre2grep */
4607