xref: /aosp_15_r20/external/minijail/tools/parser.py (revision 4b9c6d91573e8b3a96609339b46361b5476dd0f9)
1*4b9c6d91SCole Faust#!/usr/bin/env python3
2*4b9c6d91SCole Faust# -*- coding: utf-8 -*-
3*4b9c6d91SCole Faust#
4*4b9c6d91SCole Faust# Copyright (C) 2018 The Android Open Source Project
5*4b9c6d91SCole Faust#
6*4b9c6d91SCole Faust# Licensed under the Apache License, Version 2.0 (the "License");
7*4b9c6d91SCole Faust# you may not use this file except in compliance with the License.
8*4b9c6d91SCole Faust# You may obtain a copy of the License at
9*4b9c6d91SCole Faust#
10*4b9c6d91SCole Faust#      http://www.apache.org/licenses/LICENSE-2.0
11*4b9c6d91SCole Faust#
12*4b9c6d91SCole Faust# Unless required by applicable law or agreed to in writing, software
13*4b9c6d91SCole Faust# distributed under the License is distributed on an "AS IS" BASIS,
14*4b9c6d91SCole Faust# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15*4b9c6d91SCole Faust# See the License for the specific language governing permissions and
16*4b9c6d91SCole Faust# limitations under the License.
17*4b9c6d91SCole Faust"""A parser for the Minijail policy file."""
18*4b9c6d91SCole Faust
19*4b9c6d91SCole Faustfrom __future__ import absolute_import
20*4b9c6d91SCole Faustfrom __future__ import division
21*4b9c6d91SCole Faustfrom __future__ import print_function
22*4b9c6d91SCole Faust
23*4b9c6d91SCole Faustimport collections
24*4b9c6d91SCole Faustimport itertools
25*4b9c6d91SCole Faustimport os.path
26*4b9c6d91SCole Faustimport re
27*4b9c6d91SCole Faust
28*4b9c6d91SCole Fausttry:
29*4b9c6d91SCole Faust    import bpf
30*4b9c6d91SCole Faustexcept ImportError:
31*4b9c6d91SCole Faust    from minijail import bpf
32*4b9c6d91SCole Faust
33*4b9c6d91SCole Faust
34*4b9c6d91SCole Faust# Representations of numbers with different radix (base) in C.
35*4b9c6d91SCole FaustHEX_REGEX = r'-?0[xX][0-9a-fA-F]+'
36*4b9c6d91SCole FaustOCTAL_REGEX = r'-?0[0-7]+'
37*4b9c6d91SCole FaustDECIMAL_REGEX = r'-?[0-9]+'
38*4b9c6d91SCole Faust
39*4b9c6d91SCole Faust
40*4b9c6d91SCole FaustToken = collections.namedtuple(
41*4b9c6d91SCole Faust    'Token', ['type', 'value', 'filename', 'line', 'line_number', 'column'])
42*4b9c6d91SCole Faust
43*4b9c6d91SCole Faust# A regex that can tokenize a Minijail policy file line.
44*4b9c6d91SCole Faust_TOKEN_SPECIFICATION = (
45*4b9c6d91SCole Faust    ('COMMENT', r'#.*$'),
46*4b9c6d91SCole Faust    ('WHITESPACE', r'\s+'),
47*4b9c6d91SCole Faust    ('CONTINUATION', r'\\$'),
48*4b9c6d91SCole Faust    ('DEFAULT', r'@default\b'),
49*4b9c6d91SCole Faust    ('INCLUDE', r'@include\b'),
50*4b9c6d91SCole Faust    ('FREQUENCY', r'@frequency\b'),
51*4b9c6d91SCole Faust    ('DENYLIST', r'@denylist$'),
52*4b9c6d91SCole Faust    ('PATH', r'(?:\.)?/\S+'),
53*4b9c6d91SCole Faust    ('NUMERIC_CONSTANT', f'{HEX_REGEX}|{OCTAL_REGEX}|{DECIMAL_REGEX}'),
54*4b9c6d91SCole Faust    ('COLON', r':'),
55*4b9c6d91SCole Faust    ('SEMICOLON', r';'),
56*4b9c6d91SCole Faust    ('COMMA', r','),
57*4b9c6d91SCole Faust    ('BITWISE_COMPLEMENT', r'~'),
58*4b9c6d91SCole Faust    ('LPAREN', r'\('),
59*4b9c6d91SCole Faust    ('RPAREN', r'\)'),
60*4b9c6d91SCole Faust    ('LBRACE', r'\{'),
61*4b9c6d91SCole Faust    ('RBRACE', r'\}'),
62*4b9c6d91SCole Faust    ('RBRACKET', r'\]'),
63*4b9c6d91SCole Faust    ('LBRACKET', r'\['),
64*4b9c6d91SCole Faust    ('OR', r'\|\|'),
65*4b9c6d91SCole Faust    ('AND', r'&&'),
66*4b9c6d91SCole Faust    ('BITWISE_OR', r'\|'),
67*4b9c6d91SCole Faust    ('OP', r'&|\bin\b|==|!=|<=|<|>=|>'),
68*4b9c6d91SCole Faust    ('EQUAL', r'='),
69*4b9c6d91SCole Faust    ('ARGUMENT', r'\barg[0-9]+\b'),
70*4b9c6d91SCole Faust    ('RETURN', r'\breturn\b'),
71*4b9c6d91SCole Faust    ('ACTION',
72*4b9c6d91SCole Faust     r'\ballow\b|\bkill-process\b|\bkill-thread\b|\bkill\b|\btrap\b|'
73*4b9c6d91SCole Faust     r'\btrace\b|\blog\b|\buser-notify\b'
74*4b9c6d91SCole Faust    ),
75*4b9c6d91SCole Faust    ('IDENTIFIER', r'[a-zA-Z_][a-zA-Z_0-9-@]*'),
76*4b9c6d91SCole Faust)
77*4b9c6d91SCole Faust_TOKEN_RE = re.compile('|'.join(
78*4b9c6d91SCole Faust    r'(?P<%s>%s)' % pair for pair in _TOKEN_SPECIFICATION))
79*4b9c6d91SCole Faust
80*4b9c6d91SCole Faust
81*4b9c6d91SCole Faustclass ParseException(Exception):
82*4b9c6d91SCole Faust    """An exception that is raised when parsing fails."""
83*4b9c6d91SCole Faust
84*4b9c6d91SCole Faust    # pylint: disable=too-many-arguments
85*4b9c6d91SCole Faust    def __init__(self,
86*4b9c6d91SCole Faust                 message,
87*4b9c6d91SCole Faust                 filename,
88*4b9c6d91SCole Faust                 *,
89*4b9c6d91SCole Faust                 line='',
90*4b9c6d91SCole Faust                 line_number=1,
91*4b9c6d91SCole Faust                 token=None):
92*4b9c6d91SCole Faust        if token:
93*4b9c6d91SCole Faust            line = token.line
94*4b9c6d91SCole Faust            line_number = token.line_number
95*4b9c6d91SCole Faust            column = token.column
96*4b9c6d91SCole Faust            length = len(token.value)
97*4b9c6d91SCole Faust        else:
98*4b9c6d91SCole Faust            column = len(line)
99*4b9c6d91SCole Faust            length = 1
100*4b9c6d91SCole Faust
101*4b9c6d91SCole Faust        message = ('%s(%d:%d): %s') % (filename, line_number, column + 1,
102*4b9c6d91SCole Faust                                       message)
103*4b9c6d91SCole Faust        message += '\n    %s' % line
104*4b9c6d91SCole Faust        message += '\n    %s%s' % (' ' * column, '^' * length)
105*4b9c6d91SCole Faust        super().__init__(message)
106*4b9c6d91SCole Faust
107*4b9c6d91SCole Faust
108*4b9c6d91SCole Faustclass ParserState:
109*4b9c6d91SCole Faust    """Stores the state of the Parser to provide better diagnostics."""
110*4b9c6d91SCole Faust
111*4b9c6d91SCole Faust    def __init__(self, filename):
112*4b9c6d91SCole Faust        self._filename = filename
113*4b9c6d91SCole Faust        self._line = ''
114*4b9c6d91SCole Faust        self._line_number = 0
115*4b9c6d91SCole Faust
116*4b9c6d91SCole Faust    @property
117*4b9c6d91SCole Faust    def filename(self):
118*4b9c6d91SCole Faust        """Return the name of the file being processed."""
119*4b9c6d91SCole Faust        return self._filename
120*4b9c6d91SCole Faust
121*4b9c6d91SCole Faust    @property
122*4b9c6d91SCole Faust    def line(self):
123*4b9c6d91SCole Faust        """Return the current line being processed."""
124*4b9c6d91SCole Faust        return self._line
125*4b9c6d91SCole Faust
126*4b9c6d91SCole Faust    @property
127*4b9c6d91SCole Faust    def line_number(self):
128*4b9c6d91SCole Faust        """Return the current line number being processed."""
129*4b9c6d91SCole Faust        return self._line_number
130*4b9c6d91SCole Faust
131*4b9c6d91SCole Faust    def error(self, message, token=None):
132*4b9c6d91SCole Faust        """Raise a ParserException with the provided message."""
133*4b9c6d91SCole Faust        raise ParseException(
134*4b9c6d91SCole Faust            message,
135*4b9c6d91SCole Faust            self.filename,
136*4b9c6d91SCole Faust            line=self._line,
137*4b9c6d91SCole Faust            line_number=self._line_number,
138*4b9c6d91SCole Faust            token=token)
139*4b9c6d91SCole Faust
140*4b9c6d91SCole Faust    def tokenize(self, lines):
141*4b9c6d91SCole Faust        """Return a list of tokens for the current line."""
142*4b9c6d91SCole Faust        tokens = []
143*4b9c6d91SCole Faust
144*4b9c6d91SCole Faust        for line_number, line in enumerate(lines):
145*4b9c6d91SCole Faust            self._line_number = line_number + 1
146*4b9c6d91SCole Faust            self._line = line.rstrip('\r\n')
147*4b9c6d91SCole Faust
148*4b9c6d91SCole Faust            last_end = 0
149*4b9c6d91SCole Faust            for token in _TOKEN_RE.finditer(self._line):
150*4b9c6d91SCole Faust                if token.start() != last_end:
151*4b9c6d91SCole Faust                    self.error(
152*4b9c6d91SCole Faust                        'invalid token',
153*4b9c6d91SCole Faust                        token=Token('INVALID',
154*4b9c6d91SCole Faust                                    self._line[last_end:token.start()],
155*4b9c6d91SCole Faust                                    self.filename, self._line,
156*4b9c6d91SCole Faust                                    self._line_number, last_end))
157*4b9c6d91SCole Faust                last_end = token.end()
158*4b9c6d91SCole Faust
159*4b9c6d91SCole Faust                # Omit whitespace and comments now to avoid sprinkling this logic
160*4b9c6d91SCole Faust                # elsewhere.
161*4b9c6d91SCole Faust                if token.lastgroup in ('WHITESPACE', 'COMMENT',
162*4b9c6d91SCole Faust                                       'CONTINUATION'):
163*4b9c6d91SCole Faust                    continue
164*4b9c6d91SCole Faust                tokens.append(
165*4b9c6d91SCole Faust                    Token(token.lastgroup, token.group(), self.filename,
166*4b9c6d91SCole Faust                          self._line, self._line_number, token.start()))
167*4b9c6d91SCole Faust            if last_end != len(self._line):
168*4b9c6d91SCole Faust                self.error(
169*4b9c6d91SCole Faust                    'invalid token',
170*4b9c6d91SCole Faust                    token=Token('INVALID', self._line[last_end:],
171*4b9c6d91SCole Faust                                self.filename, self._line, self._line_number,
172*4b9c6d91SCole Faust                                last_end))
173*4b9c6d91SCole Faust
174*4b9c6d91SCole Faust            if self._line.endswith('\\'):
175*4b9c6d91SCole Faust                # This line is not finished yet.
176*4b9c6d91SCole Faust                continue
177*4b9c6d91SCole Faust
178*4b9c6d91SCole Faust            if tokens:
179*4b9c6d91SCole Faust                # Return a copy of the token list so that the caller can be free
180*4b9c6d91SCole Faust                # to modify it.
181*4b9c6d91SCole Faust                yield tokens[::]
182*4b9c6d91SCole Faust            tokens.clear()
183*4b9c6d91SCole Faust
184*4b9c6d91SCole Faust
185*4b9c6d91SCole FaustAtom = collections.namedtuple('Atom', ['argument_index', 'op', 'value'])
186*4b9c6d91SCole Faust"""A single boolean comparison within a filter expression."""
187*4b9c6d91SCole Faust
188*4b9c6d91SCole FaustFilter = collections.namedtuple('Filter', ['expression', 'action'])
189*4b9c6d91SCole Faust"""The result of parsing a DNF filter expression, with its action.
190*4b9c6d91SCole Faust
191*4b9c6d91SCole FaustSince the expression is in Disjunctive Normal Form, it is composed of two levels
192*4b9c6d91SCole Faustof lists, one for disjunctions and the inner one for conjunctions. The elements
193*4b9c6d91SCole Faustof the inner list are Atoms.
194*4b9c6d91SCole Faust"""
195*4b9c6d91SCole Faust
196*4b9c6d91SCole FaustSyscall = collections.namedtuple('Syscall', ['name', 'number'])
197*4b9c6d91SCole Faust"""A system call."""
198*4b9c6d91SCole Faust
199*4b9c6d91SCole FaustParsedFilterStatement = collections.namedtuple(
200*4b9c6d91SCole Faust    'ParsedFilterStatement', ['syscalls', 'filters', 'token'])
201*4b9c6d91SCole Faust"""The result of parsing a filter statement.
202*4b9c6d91SCole Faust
203*4b9c6d91SCole FaustStatements have a list of syscalls, and an associated list of filters that will
204*4b9c6d91SCole Faustbe evaluated sequentially when any of the syscalls is invoked.
205*4b9c6d91SCole Faust"""
206*4b9c6d91SCole Faust
207*4b9c6d91SCole FaustFilterStatement = collections.namedtuple('FilterStatement',
208*4b9c6d91SCole Faust                                         ['syscall', 'frequency', 'filters'])
209*4b9c6d91SCole Faust"""The filter list for a particular syscall.
210*4b9c6d91SCole Faust
211*4b9c6d91SCole FaustThis is a mapping from one syscall to a list of filters that are evaluated
212*4b9c6d91SCole Faustsequentially. The last filter is always an unconditional action.
213*4b9c6d91SCole Faust"""
214*4b9c6d91SCole Faust
215*4b9c6d91SCole FaustParsedPolicy = collections.namedtuple('ParsedPolicy',
216*4b9c6d91SCole Faust                                      ['default_action', 'filter_statements'])
217*4b9c6d91SCole Faust"""The result of parsing a minijail .policy file."""
218*4b9c6d91SCole Faust
219*4b9c6d91SCole Faust
220*4b9c6d91SCole Faust# pylint: disable=too-few-public-methods
221*4b9c6d91SCole Faustclass PolicyParser:
222*4b9c6d91SCole Faust    """A parser for the Minijail seccomp policy file format."""
223*4b9c6d91SCole Faust
224*4b9c6d91SCole Faust    def __init__(self,
225*4b9c6d91SCole Faust                 arch,
226*4b9c6d91SCole Faust                 *,
227*4b9c6d91SCole Faust                 kill_action,
228*4b9c6d91SCole Faust                 include_depth_limit=10,
229*4b9c6d91SCole Faust                 override_default_action=None,
230*4b9c6d91SCole Faust                 denylist=False,
231*4b9c6d91SCole Faust                 ret_log=False):
232*4b9c6d91SCole Faust        self._parser_states = [ParserState("<memory>")]
233*4b9c6d91SCole Faust        self._kill_action = kill_action
234*4b9c6d91SCole Faust        self._include_depth_limit = include_depth_limit
235*4b9c6d91SCole Faust        if denylist:
236*4b9c6d91SCole Faust            self._default_action = bpf.Allow()
237*4b9c6d91SCole Faust        else:
238*4b9c6d91SCole Faust            self._default_action = self._kill_action
239*4b9c6d91SCole Faust        self._override_default_action = override_default_action
240*4b9c6d91SCole Faust        self._frequency_mapping = collections.defaultdict(int)
241*4b9c6d91SCole Faust        self._arch = arch
242*4b9c6d91SCole Faust        self._denylist = denylist
243*4b9c6d91SCole Faust        self._ret_log = ret_log
244*4b9c6d91SCole Faust
245*4b9c6d91SCole Faust    @property
246*4b9c6d91SCole Faust    def _parser_state(self):
247*4b9c6d91SCole Faust        return self._parser_states[-1]
248*4b9c6d91SCole Faust
249*4b9c6d91SCole Faust    # single-constant = identifier
250*4b9c6d91SCole Faust    #                 | numeric-constant
251*4b9c6d91SCole Faust    #                 ;
252*4b9c6d91SCole Faust    def _parse_single_constant(self, token):
253*4b9c6d91SCole Faust        if token.type == 'IDENTIFIER':
254*4b9c6d91SCole Faust            if token.value not in self._arch.constants:
255*4b9c6d91SCole Faust                self._parser_state.error('invalid constant', token=token)
256*4b9c6d91SCole Faust            single_constant = self._arch.constants[token.value]
257*4b9c6d91SCole Faust        elif token.type == 'NUMERIC_CONSTANT':
258*4b9c6d91SCole Faust            # As `int(_, 0)` in Python != `strtol(_, _, 0)` in C, to make sure
259*4b9c6d91SCole Faust            # the number parsing behaves exactly in C, instead of using `int()`
260*4b9c6d91SCole Faust            # directly, we list out all the possible formats for octal, decimal
261*4b9c6d91SCole Faust            # and hex numbers, and determine the corresponding base by regex.
262*4b9c6d91SCole Faust            try:
263*4b9c6d91SCole Faust                if re.match(HEX_REGEX, token.value):
264*4b9c6d91SCole Faust                    base = 16
265*4b9c6d91SCole Faust                elif re.match(OCTAL_REGEX, token.value):
266*4b9c6d91SCole Faust                    base = 8
267*4b9c6d91SCole Faust                elif re.match(DECIMAL_REGEX, token.value):
268*4b9c6d91SCole Faust                    base = 10
269*4b9c6d91SCole Faust                else:
270*4b9c6d91SCole Faust                    # This should never happen.
271*4b9c6d91SCole Faust                    raise ValueError
272*4b9c6d91SCole Faust                single_constant = int(token.value, base=base)
273*4b9c6d91SCole Faust            except ValueError:
274*4b9c6d91SCole Faust                self._parser_state.error('invalid constant', token=token)
275*4b9c6d91SCole Faust        else:
276*4b9c6d91SCole Faust            self._parser_state.error('invalid constant', token=token)
277*4b9c6d91SCole Faust        if single_constant > self._arch.max_unsigned:
278*4b9c6d91SCole Faust            self._parser_state.error('unsigned overflow', token=token)
279*4b9c6d91SCole Faust        elif single_constant < self._arch.min_signed:
280*4b9c6d91SCole Faust            self._parser_state.error('signed underflow', token=token)
281*4b9c6d91SCole Faust        elif single_constant < 0:
282*4b9c6d91SCole Faust            # This converts the constant to an unsigned representation of the
283*4b9c6d91SCole Faust            # same value, since BPF only uses unsigned values.
284*4b9c6d91SCole Faust            single_constant = self._arch.truncate_word(single_constant)
285*4b9c6d91SCole Faust        return single_constant
286*4b9c6d91SCole Faust
287*4b9c6d91SCole Faust    # constant = [ '~' ] , '(' , value , ')'
288*4b9c6d91SCole Faust    #          | [ '~' ] , single-constant
289*4b9c6d91SCole Faust    #          ;
290*4b9c6d91SCole Faust    def _parse_constant(self, tokens):
291*4b9c6d91SCole Faust        negate = False
292*4b9c6d91SCole Faust        if tokens[0].type == 'BITWISE_COMPLEMENT':
293*4b9c6d91SCole Faust            negate = True
294*4b9c6d91SCole Faust            tokens.pop(0)
295*4b9c6d91SCole Faust            if not tokens:
296*4b9c6d91SCole Faust                self._parser_state.error('empty complement')
297*4b9c6d91SCole Faust            if tokens[0].type == 'BITWISE_COMPLEMENT':
298*4b9c6d91SCole Faust                self._parser_state.error(
299*4b9c6d91SCole Faust                    'invalid double complement', token=tokens[0])
300*4b9c6d91SCole Faust        if tokens[0].type == 'LPAREN':
301*4b9c6d91SCole Faust            last_open_paren = tokens.pop(0)
302*4b9c6d91SCole Faust            single_value = self.parse_value(tokens)
303*4b9c6d91SCole Faust            if not tokens or tokens[0].type != 'RPAREN':
304*4b9c6d91SCole Faust                self._parser_state.error(
305*4b9c6d91SCole Faust                    'unclosed parenthesis', token=last_open_paren)
306*4b9c6d91SCole Faust        else:
307*4b9c6d91SCole Faust            single_value = self._parse_single_constant(tokens[0])
308*4b9c6d91SCole Faust        tokens.pop(0)
309*4b9c6d91SCole Faust        if negate:
310*4b9c6d91SCole Faust            single_value = self._arch.truncate_word(~single_value)
311*4b9c6d91SCole Faust        return single_value
312*4b9c6d91SCole Faust
313*4b9c6d91SCole Faust    # value = constant , [ { '|' , constant } ]
314*4b9c6d91SCole Faust    #       ;
315*4b9c6d91SCole Faust    def parse_value(self, tokens):
316*4b9c6d91SCole Faust        """Parse constants separated bitwise OR operator |.
317*4b9c6d91SCole Faust
318*4b9c6d91SCole Faust        Constants can be:
319*4b9c6d91SCole Faust
320*4b9c6d91SCole Faust        - A number that can be parsed with strtol() in C.
321*4b9c6d91SCole Faust        - A named constant expression.
322*4b9c6d91SCole Faust        - A parenthesized, valid constant expression.
323*4b9c6d91SCole Faust        - A valid constant expression prefixed with the unary bitwise
324*4b9c6d91SCole Faust          complement operator ~.
325*4b9c6d91SCole Faust        - A series of valid constant expressions separated by bitwise
326*4b9c6d91SCole Faust          OR operator |.
327*4b9c6d91SCole Faust
328*4b9c6d91SCole Faust        If there is an error parsing any of the constants, the whole process
329*4b9c6d91SCole Faust        fails.
330*4b9c6d91SCole Faust        """
331*4b9c6d91SCole Faust
332*4b9c6d91SCole Faust        value = 0
333*4b9c6d91SCole Faust        while tokens:
334*4b9c6d91SCole Faust            value |= self._parse_constant(tokens)
335*4b9c6d91SCole Faust            if not tokens or tokens[0].type != 'BITWISE_OR':
336*4b9c6d91SCole Faust                break
337*4b9c6d91SCole Faust            tokens.pop(0)
338*4b9c6d91SCole Faust        else:
339*4b9c6d91SCole Faust            self._parser_state.error('empty constant')
340*4b9c6d91SCole Faust        return value
341*4b9c6d91SCole Faust
342*4b9c6d91SCole Faust    # atom = argument , op , value
343*4b9c6d91SCole Faust    #      ;
344*4b9c6d91SCole Faust    def _parse_atom(self, tokens):
345*4b9c6d91SCole Faust        if not tokens:
346*4b9c6d91SCole Faust            self._parser_state.error('missing argument')
347*4b9c6d91SCole Faust        argument = tokens.pop(0)
348*4b9c6d91SCole Faust        if argument.type != 'ARGUMENT':
349*4b9c6d91SCole Faust            self._parser_state.error('invalid argument', token=argument)
350*4b9c6d91SCole Faust
351*4b9c6d91SCole Faust        if not tokens:
352*4b9c6d91SCole Faust            self._parser_state.error('missing operator')
353*4b9c6d91SCole Faust        operator = tokens.pop(0)
354*4b9c6d91SCole Faust        if operator.type != 'OP':
355*4b9c6d91SCole Faust            self._parser_state.error('invalid operator', token=operator)
356*4b9c6d91SCole Faust
357*4b9c6d91SCole Faust        value = self.parse_value(tokens)
358*4b9c6d91SCole Faust        argument_index = int(argument.value[3:])
359*4b9c6d91SCole Faust        if not (0 <= argument_index < bpf.MAX_SYSCALL_ARGUMENTS):
360*4b9c6d91SCole Faust            self._parser_state.error('invalid argument', token=argument)
361*4b9c6d91SCole Faust        return Atom(argument_index, operator.value, value)
362*4b9c6d91SCole Faust
363*4b9c6d91SCole Faust    # clause = atom , [ { '&&' , atom } ]
364*4b9c6d91SCole Faust    #        ;
365*4b9c6d91SCole Faust    def _parse_clause(self, tokens):
366*4b9c6d91SCole Faust        atoms = []
367*4b9c6d91SCole Faust        while tokens:
368*4b9c6d91SCole Faust            atoms.append(self._parse_atom(tokens))
369*4b9c6d91SCole Faust            if not tokens or tokens[0].type != 'AND':
370*4b9c6d91SCole Faust                break
371*4b9c6d91SCole Faust            tokens.pop(0)
372*4b9c6d91SCole Faust        else:
373*4b9c6d91SCole Faust            self._parser_state.error('empty clause')
374*4b9c6d91SCole Faust        return atoms
375*4b9c6d91SCole Faust
376*4b9c6d91SCole Faust    # argument-expression = clause , [ { '||' , clause } ]
377*4b9c6d91SCole Faust    #                   ;
378*4b9c6d91SCole Faust    def parse_argument_expression(self, tokens):
379*4b9c6d91SCole Faust        """Parse a argument expression in Disjunctive Normal Form.
380*4b9c6d91SCole Faust
381*4b9c6d91SCole Faust        Since BPF disallows back jumps, we build the basic blocks in reverse
382*4b9c6d91SCole Faust        order so that all the jump targets are known by the time we need to
383*4b9c6d91SCole Faust        reference them.
384*4b9c6d91SCole Faust        """
385*4b9c6d91SCole Faust
386*4b9c6d91SCole Faust        clauses = []
387*4b9c6d91SCole Faust        while tokens:
388*4b9c6d91SCole Faust            clauses.append(self._parse_clause(tokens))
389*4b9c6d91SCole Faust            if not tokens or tokens[0].type != 'OR':
390*4b9c6d91SCole Faust                break
391*4b9c6d91SCole Faust            tokens.pop(0)
392*4b9c6d91SCole Faust        else:
393*4b9c6d91SCole Faust            self._parser_state.error('empty argument expression')
394*4b9c6d91SCole Faust        return clauses
395*4b9c6d91SCole Faust
396*4b9c6d91SCole Faust    # default-action = 'kill-process'
397*4b9c6d91SCole Faust    #                | 'kill-thread'
398*4b9c6d91SCole Faust    #                | 'kill'
399*4b9c6d91SCole Faust    #                | 'trap'
400*4b9c6d91SCole Faust    #                | 'user-notify'
401*4b9c6d91SCole Faust    #                ;
402*4b9c6d91SCole Faust    def _parse_default_action(self, tokens):
403*4b9c6d91SCole Faust        if not tokens:
404*4b9c6d91SCole Faust            self._parser_state.error('missing default action')
405*4b9c6d91SCole Faust        action_token = tokens.pop(0)
406*4b9c6d91SCole Faust        if action_token.type != 'ACTION':
407*4b9c6d91SCole Faust            return self._parser_state.error(
408*4b9c6d91SCole Faust                'invalid default action', token=action_token)
409*4b9c6d91SCole Faust        if action_token.value == 'kill-process':
410*4b9c6d91SCole Faust            return bpf.KillProcess()
411*4b9c6d91SCole Faust        if action_token.value == 'kill-thread':
412*4b9c6d91SCole Faust            return bpf.KillThread()
413*4b9c6d91SCole Faust        if action_token.value == 'kill':
414*4b9c6d91SCole Faust            return self._kill_action
415*4b9c6d91SCole Faust        if action_token.value == 'trap':
416*4b9c6d91SCole Faust            return bpf.Trap()
417*4b9c6d91SCole Faust        if action_token.value == 'user-notify':
418*4b9c6d91SCole Faust            return bpf.UserNotify()
419*4b9c6d91SCole Faust        return self._parser_state.error(
420*4b9c6d91SCole Faust            'invalid permissive default action', token=action_token)
421*4b9c6d91SCole Faust
422*4b9c6d91SCole Faust    # action = 'allow' | '1'
423*4b9c6d91SCole Faust    #        | 'kill-process'
424*4b9c6d91SCole Faust    #        | 'kill-thread'
425*4b9c6d91SCole Faust    #        | 'kill'
426*4b9c6d91SCole Faust    #        | 'trap'
427*4b9c6d91SCole Faust    #        | 'trace'
428*4b9c6d91SCole Faust    #        | 'log'
429*4b9c6d91SCole Faust    #        | 'user-notify'
430*4b9c6d91SCole Faust    #        | 'return' , single-constant
431*4b9c6d91SCole Faust    #        ;
432*4b9c6d91SCole Faust    def parse_action(self, tokens):
433*4b9c6d91SCole Faust        if not tokens:
434*4b9c6d91SCole Faust            self._parser_state.error('missing action')
435*4b9c6d91SCole Faust        action_token = tokens.pop(0)
436*4b9c6d91SCole Faust        # denylist policies must specify a return for every line.
437*4b9c6d91SCole Faust        if self._denylist:
438*4b9c6d91SCole Faust            if action_token.type != 'RETURN':
439*4b9c6d91SCole Faust                self._parser_state.error('invalid denylist policy')
440*4b9c6d91SCole Faust
441*4b9c6d91SCole Faust        if action_token.type == 'ACTION':
442*4b9c6d91SCole Faust            if action_token.value == 'allow':
443*4b9c6d91SCole Faust                return bpf.Allow()
444*4b9c6d91SCole Faust            if action_token.value == 'kill':
445*4b9c6d91SCole Faust                return self._kill_action
446*4b9c6d91SCole Faust            if action_token.value == 'kill-process':
447*4b9c6d91SCole Faust                return bpf.KillProcess()
448*4b9c6d91SCole Faust            if action_token.value == 'kill-thread':
449*4b9c6d91SCole Faust                return bpf.KillThread()
450*4b9c6d91SCole Faust            if action_token.value == 'trap':
451*4b9c6d91SCole Faust                return bpf.Trap()
452*4b9c6d91SCole Faust            if action_token.value == 'trace':
453*4b9c6d91SCole Faust                return bpf.Trace()
454*4b9c6d91SCole Faust            if action_token.value == 'user-notify':
455*4b9c6d91SCole Faust                return bpf.UserNotify()
456*4b9c6d91SCole Faust            if action_token.value == 'log':
457*4b9c6d91SCole Faust                return bpf.Log()
458*4b9c6d91SCole Faust        elif action_token.type == 'NUMERIC_CONSTANT':
459*4b9c6d91SCole Faust            constant = self._parse_single_constant(action_token)
460*4b9c6d91SCole Faust            if constant == 1:
461*4b9c6d91SCole Faust                return bpf.Allow()
462*4b9c6d91SCole Faust        elif action_token.type == 'RETURN':
463*4b9c6d91SCole Faust            if not tokens:
464*4b9c6d91SCole Faust                self._parser_state.error('missing return value')
465*4b9c6d91SCole Faust            if self._ret_log:
466*4b9c6d91SCole Faust                tokens.pop(0)
467*4b9c6d91SCole Faust                return bpf.Log()
468*4b9c6d91SCole Faust            else:
469*4b9c6d91SCole Faust                return bpf.ReturnErrno(self._parse_single_constant(tokens.pop(0)))
470*4b9c6d91SCole Faust        return self._parser_state.error('invalid action', token=action_token)
471*4b9c6d91SCole Faust
472*4b9c6d91SCole Faust    # single-filter = action
473*4b9c6d91SCole Faust    #               | argument-expression , [ ';' , action ]
474*4b9c6d91SCole Faust    #               | '!','(', argument-expression, [ ';', action ], ')'
475*4b9c6d91SCole Faust    #               ;
476*4b9c6d91SCole Faust    def _parse_single_filter(self, tokens):
477*4b9c6d91SCole Faust        if not tokens:
478*4b9c6d91SCole Faust            self._parser_state.error('missing filter')
479*4b9c6d91SCole Faust        if tokens[0].type == 'ARGUMENT':
480*4b9c6d91SCole Faust	    # Only argument expressions can start with an ARGUMENT token.
481*4b9c6d91SCole Faust            argument_expression = self.parse_argument_expression(tokens)
482*4b9c6d91SCole Faust            if tokens and tokens[0].type == 'SEMICOLON':
483*4b9c6d91SCole Faust                tokens.pop(0)
484*4b9c6d91SCole Faust                action = self.parse_action(tokens)
485*4b9c6d91SCole Faust            else:
486*4b9c6d91SCole Faust                action = bpf.Allow()
487*4b9c6d91SCole Faust            return Filter(argument_expression, action)
488*4b9c6d91SCole Faust        else:
489*4b9c6d91SCole Faust            return Filter(None, self.parse_action(tokens))
490*4b9c6d91SCole Faust
491*4b9c6d91SCole Faust    # filter = '{' , single-filter , [ { ',' , single-filter } ] , '}'
492*4b9c6d91SCole Faust    #        | single-filter
493*4b9c6d91SCole Faust    #        ;
494*4b9c6d91SCole Faust    def parse_filter(self, tokens):
495*4b9c6d91SCole Faust        """Parse a filter and return a list of Filter objects."""
496*4b9c6d91SCole Faust        if not tokens:
497*4b9c6d91SCole Faust            self._parser_state.error('missing filter')
498*4b9c6d91SCole Faust        filters = []
499*4b9c6d91SCole Faust        if tokens[0].type == 'LBRACE':
500*4b9c6d91SCole Faust            opening_brace = tokens.pop(0)
501*4b9c6d91SCole Faust            while tokens:
502*4b9c6d91SCole Faust                filters.append(self._parse_single_filter(tokens))
503*4b9c6d91SCole Faust                if not tokens or tokens[0].type != 'COMMA':
504*4b9c6d91SCole Faust                    break
505*4b9c6d91SCole Faust                tokens.pop(0)
506*4b9c6d91SCole Faust            if not tokens or tokens[0].type != 'RBRACE':
507*4b9c6d91SCole Faust                self._parser_state.error('unclosed brace', token=opening_brace)
508*4b9c6d91SCole Faust            tokens.pop(0)
509*4b9c6d91SCole Faust        else:
510*4b9c6d91SCole Faust            filters.append(self._parse_single_filter(tokens))
511*4b9c6d91SCole Faust        return filters
512*4b9c6d91SCole Faust
513*4b9c6d91SCole Faust    # key-value-pair = identifier , '=', identifier , [ { ',' , identifier } ]
514*4b9c6d91SCole Faust    #                ;
515*4b9c6d91SCole Faust    def _parse_key_value_pair(self, tokens):
516*4b9c6d91SCole Faust        if not tokens:
517*4b9c6d91SCole Faust            self._parser_state.error('missing key')
518*4b9c6d91SCole Faust        key = tokens.pop(0)
519*4b9c6d91SCole Faust        if key.type != 'IDENTIFIER':
520*4b9c6d91SCole Faust            self._parser_state.error('invalid key', token=key)
521*4b9c6d91SCole Faust        if not tokens:
522*4b9c6d91SCole Faust            self._parser_state.error('missing equal')
523*4b9c6d91SCole Faust        if tokens[0].type != 'EQUAL':
524*4b9c6d91SCole Faust            self._parser_state.error('invalid equal', token=tokens[0])
525*4b9c6d91SCole Faust        tokens.pop(0)
526*4b9c6d91SCole Faust        value_list = []
527*4b9c6d91SCole Faust        while tokens:
528*4b9c6d91SCole Faust            value = tokens.pop(0)
529*4b9c6d91SCole Faust            if value.type != 'IDENTIFIER':
530*4b9c6d91SCole Faust                self._parser_state.error('invalid value', token=value)
531*4b9c6d91SCole Faust            value_list.append(value.value)
532*4b9c6d91SCole Faust            if not tokens or tokens[0].type != 'COMMA':
533*4b9c6d91SCole Faust                break
534*4b9c6d91SCole Faust            tokens.pop(0)
535*4b9c6d91SCole Faust        else:
536*4b9c6d91SCole Faust            self._parser_state.error('empty value')
537*4b9c6d91SCole Faust        return (key.value, value_list)
538*4b9c6d91SCole Faust
539*4b9c6d91SCole Faust    # metadata = '[' , key-value-pair , [ { ';' , key-value-pair } ] , ']'
540*4b9c6d91SCole Faust    #          ;
541*4b9c6d91SCole Faust    def _parse_metadata(self, tokens):
542*4b9c6d91SCole Faust        if not tokens:
543*4b9c6d91SCole Faust            self._parser_state.error('missing opening bracket')
544*4b9c6d91SCole Faust        opening_bracket = tokens.pop(0)
545*4b9c6d91SCole Faust        if opening_bracket.type != 'LBRACKET':
546*4b9c6d91SCole Faust            self._parser_state.error(
547*4b9c6d91SCole Faust                'invalid opening bracket', token=opening_bracket)
548*4b9c6d91SCole Faust        metadata = {}
549*4b9c6d91SCole Faust        while tokens:
550*4b9c6d91SCole Faust            first_token = tokens[0]
551*4b9c6d91SCole Faust            key, value = self._parse_key_value_pair(tokens)
552*4b9c6d91SCole Faust            if key in metadata:
553*4b9c6d91SCole Faust                self._parser_state.error(
554*4b9c6d91SCole Faust                    'duplicate metadata key: "%s"' % key, token=first_token)
555*4b9c6d91SCole Faust            metadata[key] = value
556*4b9c6d91SCole Faust            if not tokens or tokens[0].type != 'SEMICOLON':
557*4b9c6d91SCole Faust                break
558*4b9c6d91SCole Faust            tokens.pop(0)
559*4b9c6d91SCole Faust        if not tokens or tokens[0].type != 'RBRACKET':
560*4b9c6d91SCole Faust            self._parser_state.error('unclosed bracket', token=opening_bracket)
561*4b9c6d91SCole Faust        tokens.pop(0)
562*4b9c6d91SCole Faust        return metadata
563*4b9c6d91SCole Faust
564*4b9c6d91SCole Faust    # syscall-descriptor = syscall-name , [ metadata ]
565*4b9c6d91SCole Faust    #                    | syscall-group-name , [ metadata ]
566*4b9c6d91SCole Faust    #                    ;
567*4b9c6d91SCole Faust    def _parse_syscall_descriptor(self, tokens):
568*4b9c6d91SCole Faust        if not tokens:
569*4b9c6d91SCole Faust            self._parser_state.error('missing syscall descriptor')
570*4b9c6d91SCole Faust        syscall_descriptor = tokens.pop(0)
571*4b9c6d91SCole Faust        # `kill` as a syscall name is a special case since kill is also a valid
572*4b9c6d91SCole Faust        # action and actions have precendence over identifiers.
573*4b9c6d91SCole Faust        if (syscall_descriptor.type != 'IDENTIFIER' and
574*4b9c6d91SCole Faust            syscall_descriptor.value != 'kill'):
575*4b9c6d91SCole Faust            self._parser_state.error(
576*4b9c6d91SCole Faust                'invalid syscall descriptor', token=syscall_descriptor)
577*4b9c6d91SCole Faust        if tokens and tokens[0].type == 'LBRACKET':
578*4b9c6d91SCole Faust            metadata = self._parse_metadata(tokens)
579*4b9c6d91SCole Faust            if 'arch' in metadata and self._arch.arch_name not in metadata['arch']:
580*4b9c6d91SCole Faust                return ()
581*4b9c6d91SCole Faust        if '@' in syscall_descriptor.value:
582*4b9c6d91SCole Faust            # This is a syscall group.
583*4b9c6d91SCole Faust            subtokens = syscall_descriptor.value.split('@')
584*4b9c6d91SCole Faust            if len(subtokens) != 2:
585*4b9c6d91SCole Faust                self._parser_state.error(
586*4b9c6d91SCole Faust                    'invalid syscall group name', token=syscall_descriptor)
587*4b9c6d91SCole Faust            syscall_group_name, syscall_namespace_name = subtokens
588*4b9c6d91SCole Faust            if syscall_namespace_name not in self._arch.syscall_groups:
589*4b9c6d91SCole Faust                self._parser_state.error(
590*4b9c6d91SCole Faust                    'nonexistent syscall group namespace',
591*4b9c6d91SCole Faust                    token=syscall_descriptor)
592*4b9c6d91SCole Faust            syscall_namespace = self._arch.syscall_groups[
593*4b9c6d91SCole Faust                syscall_namespace_name]
594*4b9c6d91SCole Faust            if syscall_group_name not in syscall_namespace:
595*4b9c6d91SCole Faust                self._parser_state.error(
596*4b9c6d91SCole Faust                    'nonexistent syscall group', token=syscall_descriptor)
597*4b9c6d91SCole Faust            return (Syscall(name, self._arch.syscalls[name])
598*4b9c6d91SCole Faust                    for name in syscall_namespace[syscall_group_name])
599*4b9c6d91SCole Faust        if syscall_descriptor.value not in self._arch.syscalls:
600*4b9c6d91SCole Faust            self._parser_state.error(
601*4b9c6d91SCole Faust                'nonexistent syscall', token=syscall_descriptor)
602*4b9c6d91SCole Faust        return (Syscall(syscall_descriptor.value,
603*4b9c6d91SCole Faust                        self._arch.syscalls[syscall_descriptor.value]), )
604*4b9c6d91SCole Faust
605*4b9c6d91SCole Faust    # filter-statement = '{' , syscall-descriptor , [ { ',', syscall-descriptor } ] , '}' ,
606*4b9c6d91SCole Faust    #                       ':' , filter
607*4b9c6d91SCole Faust    #                  | syscall-descriptor , ':' , filter
608*4b9c6d91SCole Faust    #                  ;
609*4b9c6d91SCole Faust    def parse_filter_statement(self, tokens):
610*4b9c6d91SCole Faust        """Parse a filter statement and return a ParsedFilterStatement."""
611*4b9c6d91SCole Faust        if not tokens:
612*4b9c6d91SCole Faust            self._parser_state.error('empty filter statement')
613*4b9c6d91SCole Faust        syscall_descriptors = []
614*4b9c6d91SCole Faust        if tokens[0].type == 'LBRACE':
615*4b9c6d91SCole Faust            opening_brace = tokens.pop(0)
616*4b9c6d91SCole Faust            while tokens:
617*4b9c6d91SCole Faust                syscall_descriptors.extend(
618*4b9c6d91SCole Faust                    self._parse_syscall_descriptor(tokens))
619*4b9c6d91SCole Faust                if not tokens or tokens[0].type != 'COMMA':
620*4b9c6d91SCole Faust                    break
621*4b9c6d91SCole Faust                tokens.pop(0)
622*4b9c6d91SCole Faust            if not tokens or tokens[0].type != 'RBRACE':
623*4b9c6d91SCole Faust                self._parser_state.error('unclosed brace', token=opening_brace)
624*4b9c6d91SCole Faust            tokens.pop(0)
625*4b9c6d91SCole Faust        else:
626*4b9c6d91SCole Faust            syscall_descriptors.extend(self._parse_syscall_descriptor(tokens))
627*4b9c6d91SCole Faust        if not tokens:
628*4b9c6d91SCole Faust            self._parser_state.error('missing colon')
629*4b9c6d91SCole Faust        if tokens[0].type != 'COLON':
630*4b9c6d91SCole Faust            self._parser_state.error('invalid colon', token=tokens[0])
631*4b9c6d91SCole Faust        # Given that there can be multiple syscalls and filters in a single
632*4b9c6d91SCole Faust        # filter statement, use the colon token as the anchor for error location
633*4b9c6d91SCole Faust        # purposes.
634*4b9c6d91SCole Faust        colon_token = tokens.pop(0)
635*4b9c6d91SCole Faust        parsed_filter = self.parse_filter(tokens)
636*4b9c6d91SCole Faust        if not syscall_descriptors:
637*4b9c6d91SCole Faust            return None
638*4b9c6d91SCole Faust        return ParsedFilterStatement(
639*4b9c6d91SCole Faust            tuple(syscall_descriptors), parsed_filter, colon_token)
640*4b9c6d91SCole Faust
641*4b9c6d91SCole Faust    # include-statement = '@include' , posix-path
642*4b9c6d91SCole Faust    #                   ;
643*4b9c6d91SCole Faust    def _parse_include_statement(self, tokens):
644*4b9c6d91SCole Faust        if not tokens:
645*4b9c6d91SCole Faust            self._parser_state.error('empty filter statement')
646*4b9c6d91SCole Faust        if tokens[0].type != 'INCLUDE':
647*4b9c6d91SCole Faust            self._parser_state.error('invalid include', token=tokens[0])
648*4b9c6d91SCole Faust        tokens.pop(0)
649*4b9c6d91SCole Faust        if not tokens:
650*4b9c6d91SCole Faust            self._parser_state.error('empty include path')
651*4b9c6d91SCole Faust        include_path = tokens.pop(0)
652*4b9c6d91SCole Faust        if include_path.type != 'PATH':
653*4b9c6d91SCole Faust            self._parser_state.error(
654*4b9c6d91SCole Faust                'invalid include path', token=include_path)
655*4b9c6d91SCole Faust        if len(self._parser_states) == self._include_depth_limit:
656*4b9c6d91SCole Faust            self._parser_state.error('@include statement nested too deep')
657*4b9c6d91SCole Faust        include_filename = os.path.normpath(
658*4b9c6d91SCole Faust            os.path.join(
659*4b9c6d91SCole Faust                os.path.dirname(self._parser_state.filename),
660*4b9c6d91SCole Faust                include_path.value))
661*4b9c6d91SCole Faust        if not os.path.isfile(include_filename):
662*4b9c6d91SCole Faust            self._parser_state.error(
663*4b9c6d91SCole Faust                'Could not @include %s' % include_filename, token=include_path)
664*4b9c6d91SCole Faust        return self._parse_policy_file(include_filename)
665*4b9c6d91SCole Faust
666*4b9c6d91SCole Faust    def _parse_frequency_file(self, filename):
667*4b9c6d91SCole Faust        self._parser_states.append(ParserState(filename))
668*4b9c6d91SCole Faust        try:
669*4b9c6d91SCole Faust            frequency_mapping = collections.defaultdict(int)
670*4b9c6d91SCole Faust            with open(filename) as frequency_file:
671*4b9c6d91SCole Faust                for tokens in self._parser_state.tokenize(frequency_file):
672*4b9c6d91SCole Faust                    syscall_numbers = self._parse_syscall_descriptor(tokens)
673*4b9c6d91SCole Faust                    if not tokens:
674*4b9c6d91SCole Faust                        self._parser_state.error('missing colon')
675*4b9c6d91SCole Faust                    if tokens[0].type != 'COLON':
676*4b9c6d91SCole Faust                        self._parser_state.error(
677*4b9c6d91SCole Faust                            'invalid colon', token=tokens[0])
678*4b9c6d91SCole Faust                    tokens.pop(0)
679*4b9c6d91SCole Faust
680*4b9c6d91SCole Faust                    if not tokens:
681*4b9c6d91SCole Faust                        self._parser_state.error('missing number')
682*4b9c6d91SCole Faust                    number = tokens.pop(0)
683*4b9c6d91SCole Faust                    if number.type != 'NUMERIC_CONSTANT':
684*4b9c6d91SCole Faust                        self._parser_state.error(
685*4b9c6d91SCole Faust                            'invalid number', token=number)
686*4b9c6d91SCole Faust                    number_value = int(number.value, base=0)
687*4b9c6d91SCole Faust                    if number_value < 0:
688*4b9c6d91SCole Faust                        self._parser_state.error(
689*4b9c6d91SCole Faust                            'invalid number', token=number)
690*4b9c6d91SCole Faust
691*4b9c6d91SCole Faust                    for syscall_number in syscall_numbers:
692*4b9c6d91SCole Faust                        frequency_mapping[syscall_number] += number_value
693*4b9c6d91SCole Faust            return frequency_mapping
694*4b9c6d91SCole Faust        finally:
695*4b9c6d91SCole Faust            self._parser_states.pop()
696*4b9c6d91SCole Faust
697*4b9c6d91SCole Faust    # frequency-statement = '@frequency' , posix-path
698*4b9c6d91SCole Faust    #                      ;
699*4b9c6d91SCole Faust    def _parse_frequency_statement(self, tokens):
700*4b9c6d91SCole Faust        if not tokens:
701*4b9c6d91SCole Faust            self._parser_state.error('empty frequency statement')
702*4b9c6d91SCole Faust        if tokens[0].type != 'FREQUENCY':
703*4b9c6d91SCole Faust            self._parser_state.error('invalid frequency', token=tokens[0])
704*4b9c6d91SCole Faust        tokens.pop(0)
705*4b9c6d91SCole Faust        if not tokens:
706*4b9c6d91SCole Faust            self._parser_state.error('empty frequency path')
707*4b9c6d91SCole Faust        frequency_path = tokens.pop(0)
708*4b9c6d91SCole Faust        if frequency_path.type != 'PATH':
709*4b9c6d91SCole Faust            self._parser_state.error(
710*4b9c6d91SCole Faust                'invalid frequency path', token=frequency_path)
711*4b9c6d91SCole Faust        frequency_filename = os.path.normpath(
712*4b9c6d91SCole Faust            os.path.join(
713*4b9c6d91SCole Faust                os.path.dirname(self._parser_state.filename),
714*4b9c6d91SCole Faust                frequency_path.value))
715*4b9c6d91SCole Faust        if not os.path.isfile(frequency_filename):
716*4b9c6d91SCole Faust            self._parser_state.error(
717*4b9c6d91SCole Faust                'Could not open frequency file %s' % frequency_filename,
718*4b9c6d91SCole Faust                token=frequency_path)
719*4b9c6d91SCole Faust        return self._parse_frequency_file(frequency_filename)
720*4b9c6d91SCole Faust
721*4b9c6d91SCole Faust    # default-statement = '@default' , default-action
722*4b9c6d91SCole Faust    #                   ;
723*4b9c6d91SCole Faust    def _parse_default_statement(self, tokens):
724*4b9c6d91SCole Faust        if not tokens:
725*4b9c6d91SCole Faust            self._parser_state.error('empty default statement')
726*4b9c6d91SCole Faust        if tokens[0].type != 'DEFAULT':
727*4b9c6d91SCole Faust            self._parser_state.error('invalid default', token=tokens[0])
728*4b9c6d91SCole Faust        tokens.pop(0)
729*4b9c6d91SCole Faust        if not tokens:
730*4b9c6d91SCole Faust            self._parser_state.error('empty action')
731*4b9c6d91SCole Faust        return self._parse_default_action(tokens)
732*4b9c6d91SCole Faust
733*4b9c6d91SCole Faust    def _parse_policy_file(self, filename):
734*4b9c6d91SCole Faust        self._parser_states.append(ParserState(filename))
735*4b9c6d91SCole Faust        try:
736*4b9c6d91SCole Faust            statements = []
737*4b9c6d91SCole Faust            denylist_header = False
738*4b9c6d91SCole Faust            with open(filename) as policy_file:
739*4b9c6d91SCole Faust                for tokens in self._parser_state.tokenize(policy_file):
740*4b9c6d91SCole Faust                    if tokens[0].type == 'INCLUDE':
741*4b9c6d91SCole Faust                        statements.extend(
742*4b9c6d91SCole Faust                            self._parse_include_statement(tokens))
743*4b9c6d91SCole Faust                    elif tokens[0].type == 'FREQUENCY':
744*4b9c6d91SCole Faust                        for syscall_number, frequency in self._parse_frequency_statement(
745*4b9c6d91SCole Faust                                tokens).items():
746*4b9c6d91SCole Faust                            self._frequency_mapping[
747*4b9c6d91SCole Faust                                syscall_number] += frequency
748*4b9c6d91SCole Faust                    elif tokens[0].type == 'DEFAULT':
749*4b9c6d91SCole Faust                        self._default_action = self._parse_default_statement(
750*4b9c6d91SCole Faust                            tokens)
751*4b9c6d91SCole Faust                    elif tokens[0].type == 'DENYLIST':
752*4b9c6d91SCole Faust                        tokens.pop()
753*4b9c6d91SCole Faust                        if not self._denylist:
754*4b9c6d91SCole Faust                            self._parser_state.error('policy is denylist, but '
755*4b9c6d91SCole Faust                                                     'flag --denylist not '
756*4b9c6d91SCole Faust                                                     'passed in.')
757*4b9c6d91SCole Faust                        else:
758*4b9c6d91SCole Faust                            denylist_header = True
759*4b9c6d91SCole Faust                    else:
760*4b9c6d91SCole Faust                        statement = self.parse_filter_statement(tokens)
761*4b9c6d91SCole Faust                        if statement is None:
762*4b9c6d91SCole Faust                            # If all the syscalls in the statement are for
763*4b9c6d91SCole Faust                            # another arch, skip the whole statement.
764*4b9c6d91SCole Faust                            continue
765*4b9c6d91SCole Faust                        statements.append(statement)
766*4b9c6d91SCole Faust
767*4b9c6d91SCole Faust                    if tokens:
768*4b9c6d91SCole Faust                        self._parser_state.error(
769*4b9c6d91SCole Faust                            'extra tokens', token=tokens[0])
770*4b9c6d91SCole Faust            if self._denylist and not denylist_header:
771*4b9c6d91SCole Faust                self._parser_state.error('policy must contain @denylist flag to'
772*4b9c6d91SCole Faust                                         ' be compiled with --denylist flag.')
773*4b9c6d91SCole Faust            return statements
774*4b9c6d91SCole Faust        finally:
775*4b9c6d91SCole Faust            self._parser_states.pop()
776*4b9c6d91SCole Faust
777*4b9c6d91SCole Faust    def parse_file(self, filename):
778*4b9c6d91SCole Faust        """Parse a file and return the list of FilterStatements."""
779*4b9c6d91SCole Faust        self._frequency_mapping = collections.defaultdict(int)
780*4b9c6d91SCole Faust        try:
781*4b9c6d91SCole Faust            statements = [x for x in self._parse_policy_file(filename)]
782*4b9c6d91SCole Faust        except RecursionError:
783*4b9c6d91SCole Faust            raise ParseException(
784*4b9c6d91SCole Faust                'recursion limit exceeded',
785*4b9c6d91SCole Faust                filename,
786*4b9c6d91SCole Faust                line=self._parser_states[-1].line)
787*4b9c6d91SCole Faust
788*4b9c6d91SCole Faust        # Collapse statements into a single syscall-to-filter-list, remembering
789*4b9c6d91SCole Faust        # the token for each filter for better diagnostics.
790*4b9c6d91SCole Faust        syscall_filter_mapping = {}
791*4b9c6d91SCole Faust        syscall_filter_definitions = {}
792*4b9c6d91SCole Faust        filter_statements = []
793*4b9c6d91SCole Faust        for syscalls, filters, token in statements:
794*4b9c6d91SCole Faust            for syscall in syscalls:
795*4b9c6d91SCole Faust                if syscall not in syscall_filter_mapping:
796*4b9c6d91SCole Faust                    filter_statements.append(
797*4b9c6d91SCole Faust                        FilterStatement(
798*4b9c6d91SCole Faust                            syscall, self._frequency_mapping.get(syscall, 1),
799*4b9c6d91SCole Faust                            []))
800*4b9c6d91SCole Faust                    syscall_filter_mapping[syscall] = filter_statements[-1]
801*4b9c6d91SCole Faust                    syscall_filter_definitions[syscall] = []
802*4b9c6d91SCole Faust                for filt in filters:
803*4b9c6d91SCole Faust                    syscall_filter_mapping[syscall].filters.append(filt)
804*4b9c6d91SCole Faust                    syscall_filter_definitions[syscall].append(token)
805*4b9c6d91SCole Faust        default_action = self._override_default_action or self._default_action
806*4b9c6d91SCole Faust        for filter_statement in filter_statements:
807*4b9c6d91SCole Faust            unconditional_actions_suffix = list(
808*4b9c6d91SCole Faust                itertools.dropwhile(lambda filt: filt.expression is not None,
809*4b9c6d91SCole Faust                                    filter_statement.filters))
810*4b9c6d91SCole Faust            if len(unconditional_actions_suffix) == 1:
811*4b9c6d91SCole Faust                # The last filter already has an unconditional action, no need
812*4b9c6d91SCole Faust                # to add another one.
813*4b9c6d91SCole Faust                continue
814*4b9c6d91SCole Faust            if len(unconditional_actions_suffix) > 1:
815*4b9c6d91SCole Faust                previous_definition_token = syscall_filter_definitions[
816*4b9c6d91SCole Faust                    filter_statement.syscall][
817*4b9c6d91SCole Faust                        -len(unconditional_actions_suffix)]
818*4b9c6d91SCole Faust                current_definition_token = syscall_filter_definitions[
819*4b9c6d91SCole Faust                    filter_statement.syscall][
820*4b9c6d91SCole Faust                        -len(unconditional_actions_suffix) + 1]
821*4b9c6d91SCole Faust                raise ParseException(
822*4b9c6d91SCole Faust                    ('Syscall %s (number %d) already had '
823*4b9c6d91SCole Faust                     'an unconditional action applied') %
824*4b9c6d91SCole Faust                    (filter_statement.syscall.name,
825*4b9c6d91SCole Faust                     filter_statement.syscall.number),
826*4b9c6d91SCole Faust                    filename=current_definition_token.filename,
827*4b9c6d91SCole Faust                    token=current_definition_token) from ParseException(
828*4b9c6d91SCole Faust                        'Previous definition',
829*4b9c6d91SCole Faust                        filename=previous_definition_token.filename,
830*4b9c6d91SCole Faust                        token=previous_definition_token)
831*4b9c6d91SCole Faust            assert not unconditional_actions_suffix
832*4b9c6d91SCole Faust            filter_statement.filters.append(
833*4b9c6d91SCole Faust                Filter(expression=None, action=default_action))
834*4b9c6d91SCole Faust        return ParsedPolicy(default_action, filter_statements)
835