xref: /aosp_15_r20/external/perfetto/python/generators/sql_processing/docs_parse.py (revision 6dbdd20afdafa5e3ca9b8809fa73465d530080dc)
1#!/usr/bin/env python3
2# Copyright (C) 2022 The Android Open Source Project
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8#      http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15
16from abc import ABC
17from dataclasses import dataclass
18import re
19import sys
20from typing import Dict, List, Optional, Set, NamedTuple
21
22from python.generators.sql_processing.docs_extractor import DocsExtractor
23from python.generators.sql_processing.utils import ObjKind
24from python.generators.sql_processing.utils import COLUMN_TYPES, MACRO_ARG_TYPES
25
26from python.generators.sql_processing.utils import ALLOWED_PREFIXES
27from python.generators.sql_processing.utils import OBJECT_NAME_ALLOWLIST
28
29from python.generators.sql_processing.utils import ANY_PATTERN
30from python.generators.sql_processing.utils import ARG_DEFINITION_PATTERN
31
32
33def _is_internal(name: str) -> bool:
34  return re.match(r'^_.*', name, re.IGNORECASE) is not None
35
36
37def _is_snake_case(s: str) -> bool:
38  return re.fullmatch(r'^[a-z_0-9]*$', s) is not None
39
40
41def parse_comment(comment: str) -> str:
42  """
43  Parse a SQL comment (i.e. -- Foo\n -- bar.) into a string (i.e. "Foo bar.").
44  """
45  return ' '.join(line.strip().lstrip('--').lstrip()
46                  for line in comment.strip().split('\n'))
47
48
49def get_module_prefix_error(name: str, path: str, module: str) -> Optional[str]:
50  """Returns error message if the name is not correct, None otherwise."""
51  if module in ["common", "prelude", "deprecated"]:
52    if name.startswith(module):
53      return (f'Names of tables/views/functions in the "{module}" module '
54              f'should not start with {module}')
55    return None
56  if name.startswith(module):
57    # Module prefix is always allowed.
58    return None
59  allowed_prefixes = [module]
60  for (path_prefix, allowed_name_prefixes) in ALLOWED_PREFIXES.items():
61    if path.startswith(path_prefix):
62      for prefix in allowed_name_prefixes:
63        if name.startswith(prefix):
64          return None
65      allowed_prefixes.extend(allowed_name_prefixes)
66    if path in OBJECT_NAME_ALLOWLIST and name in OBJECT_NAME_ALLOWLIST[path]:
67      return None
68  return (
69      f'Names of tables/views/functions at path "{path}" should be prefixed '
70      f'with one of following names: {", ".join(allowed_prefixes)}')
71
72
73class Arg(NamedTuple):
74  type: str
75  long_type: str
76  description: str
77  joinid_column: Optional[str]
78
79
80class AbstractDocParser(ABC):
81
82  @dataclass
83  class Column:
84    pass
85
86  def __init__(self, path: str, module: str):
87    self.path = path
88    self.module = module
89    self.name = None
90    self.errors = []
91
92  def _parse_name(self, upper: bool = False):
93    assert self.name
94    assert isinstance(self.name, str)
95    module_prefix_error = get_module_prefix_error(self.name, self.path,
96                                                  self.module)
97    if module_prefix_error is not None:
98      self._error(module_prefix_error)
99    return self.name.strip()
100
101  def _parse_desc_not_empty(self, desc: str):
102    if not desc:
103      self._error('Description of the table/view/function/macro is missing')
104    return desc.strip()
105
106  def _parse_columns(self, schema: str, kind: ObjKind) -> Dict[str, Arg]:
107    columns = self._parse_args_definition(schema) if schema else {}
108    for column_name, properties in columns.items():
109      if not properties.description:
110        self._error(f'Column "{column_name}" is missing a description. Please add a '
111                    'comment in front of the column definition')
112        continue
113
114      upper_arg_type = properties.type.upper()
115      if kind is ObjKind.table_function:
116        if upper_arg_type not in COLUMN_TYPES:
117          self._error(
118              f'Table function column "{column_name}" has unsupported type "{properties.type}".')
119      elif kind is ObjKind.table_view:
120        if upper_arg_type not in COLUMN_TYPES:
121          self._error(
122              f'Table/view column "{column_name}" has unsupported type "{properties.type}".')
123      else:
124        self._error(f'This Perfetto SQL object doesnt support columns".')
125
126    return columns
127
128  def _parse_args(self, sql_args_str: str, kind: ObjKind) -> Dict[str, Arg]:
129    args = self._parse_args_definition(sql_args_str)
130
131    for arg in args:
132      if not args[arg].description:
133        self._error(f'Arg "{arg}" is missing a description. '
134                    'Please add a comment in front of the arg definition.')
135
136      upper_arg_type = args[arg].type.upper()
137      if (kind is ObjKind.function or kind is ObjKind.table_function):
138        if upper_arg_type not in COLUMN_TYPES:
139          self._error(
140              f'Function arg "{arg}" has unsupported type "{args[arg].type}".')
141      elif (kind is ObjKind.macro):
142        if upper_arg_type not in MACRO_ARG_TYPES:
143          self._error(
144              f'Macro arg "{arg}" has unsupported type "{args[arg].type}".')
145      else:
146        self._error(f'This Perfetto SQL object doesnt support types".')
147
148    return args
149
150  # Parse function argument definition list or a table schema, e.g.
151  # arg1 INT, arg2 STRING, including their comments.
152  def _parse_args_definition(self, args_str: str) -> Dict[str, Arg]:
153    result = {}
154    remaining_args = args_str.strip()
155    while remaining_args:
156      m = re.match(fr'^{ARG_DEFINITION_PATTERN}({ANY_PATTERN})', remaining_args)
157      if not m:
158        self._error(f'Expected "{args_str}" to correspond to '
159                    '"-- Comment\n arg_name TYPE" format '
160                    '({ARG_DEFINITION_PATTERN})')
161        return result
162      groups = m.groups()
163      comment = '' if groups[0] is None else parse_comment(groups[0])
164      name = groups[-3]
165      type = groups[-2]
166
167      m = re.match(r'JOINID\(([_A-Za-z\.]*)\)', type)
168      if m:
169        result[name] = Arg('JOINID', type, comment, m.groups()[0])
170      else:
171        result[name] = Arg(type, type, comment, None)
172      # Strip whitespace and comma and parse the next arg.
173      remaining_args = groups[-1].lstrip().lstrip(',').lstrip()
174
175    return result
176
177  def _error(self, error: str):
178    self.errors.append(
179        f'Error while parsing documentation for "{self.name}" in {self.path}: '
180        f'{error}')
181
182
183class TableOrView:
184  name: str
185  type: str
186  desc: str
187  cols: Dict[str, Arg]
188  id_columns: List[str]
189  joinid_cols: Dict[str, Arg]
190
191  def __init__(self, name, type, desc, cols, id_columns, joinid_columns):
192    self.name = name
193    self.type = type
194    self.desc = desc
195    self.cols = cols
196    self.id_columns = id_columns
197    self.joinid_cols = joinid_columns
198
199
200class TableViewDocParser(AbstractDocParser):
201  """Parses documentation for CREATE TABLE and CREATE VIEW statements."""
202
203  def __init__(self, path: str, module: str):
204    super().__init__(path, module)
205
206  def parse(self, doc: DocsExtractor.Extract) -> Optional[TableOrView]:
207    assert doc.obj_kind == ObjKind.table_view
208
209    or_replace, perfetto_or_virtual, type, self.name, schema = doc.obj_match
210
211    if or_replace is not None:
212      self._error(
213          f'{type} "{self.name}": CREATE OR REPLACE is not allowed in stdlib '
214          f'as standard library modules can only included once. Please just '
215          f'use CREATE instead.')
216      return
217
218    if _is_internal(self.name):
219      return None
220
221    if not schema and self.name.lower() != "window":
222      self._error(
223          f'{type} "{self.name}": schema is missing for a non-internal stdlib'
224          f' perfetto table or view')
225      return
226
227    if type.lower() == "table" and not perfetto_or_virtual:
228      self._error(
229          f'{type} "{self.name}": Can only expose CREATE PERFETTO tables')
230      return
231
232    is_virtual_table = type.lower() == "table" and perfetto_or_virtual.lower(
233    ) == "virtual"
234    if is_virtual_table and self.name.lower() != "window":
235      self._error(f'{type} "{self.name}": Virtual tables cannot be exposed.')
236      return
237
238    cols = self._parse_columns(schema, ObjKind.table_view)
239    id_columns = []
240    joinid_cols = {}
241
242    for col_name, arg in cols.items():
243      if arg.type == "ID":
244        id_columns.append(col_name)
245      elif arg.type == "JOINID":
246        joinid_cols[col_name] = arg
247
248    return TableOrView(
249        name=self._parse_name(),
250        type=type,
251        desc=self._parse_desc_not_empty(doc.description),
252        cols=self._parse_columns(schema, ObjKind.table_view),
253        id_columns=id_columns,
254        joinid_columns=joinid_cols)
255
256
257class Function:
258  name: str
259  desc: str
260  args: Dict[str, Arg]
261  return_type: str
262  return_desc: str
263
264  def __init__(self, name, desc, args, return_type, return_desc):
265    self.name = name
266    self.desc = desc
267    self.args = args
268    self.return_type = return_type
269    self.return_desc = return_desc
270
271
272class FunctionDocParser(AbstractDocParser):
273  """Parses documentation for CREATE_FUNCTION statements."""
274
275  def __init__(self, path: str, module: str):
276    super().__init__(path, module)
277
278  def parse(self, doc: DocsExtractor.Extract) -> Optional[Function]:
279    or_replace, self.name, args, ret_comment, ret_type = doc.obj_match
280
281    if or_replace is not None:
282      self._error(
283          f'Function "{self.name}": CREATE OR REPLACE is not allowed in stdlib '
284          f'as standard library modules can only included once. Please just '
285          f'use CREATE instead.')
286
287    # Ignore internal functions.
288    if _is_internal(self.name):
289      return None
290
291    name = self._parse_name()
292
293    if not _is_snake_case(name):
294      self._error(f'Function name "{name}" is not snake_case'
295                  f' (should be {name.casefold()})')
296
297    ret_desc = None if ret_comment is None else parse_comment(ret_comment)
298    if not ret_desc:
299      self._error(f'Function "{name}": return description is missing')
300
301    return Function(
302        name=name,
303        desc=self._parse_desc_not_empty(doc.description),
304        args=self._parse_args(args, ObjKind.function),
305        return_type=ret_type,
306        return_desc=ret_desc,
307    )
308
309
310class TableFunction:
311  name: str
312  desc: str
313  cols: Dict[str, Arg]
314  args: Dict[str, Arg]
315
316  def __init__(self, name, desc, cols, args):
317    self.name = name
318    self.desc = desc
319    self.cols = cols
320    self.args = args
321
322
323class TableFunctionDocParser(AbstractDocParser):
324  """Parses documentation for table function statements."""
325
326  def __init__(self, path: str, module: str):
327    super().__init__(path, module)
328
329  def parse(self, doc: DocsExtractor.Extract) -> Optional[TableFunction]:
330    or_replace, self.name, args, ret_comment, columns = doc.obj_match
331
332    if or_replace is not None:
333      self._error(
334          f'Function "{self.name}": CREATE OR REPLACE is not allowed in stdlib '
335          f'as standard library modules can only included once. Please just '
336          f'use CREATE instead.')
337      return
338
339    # Ignore internal functions.
340    if _is_internal(self.name):
341      return None
342
343    name = self._parse_name()
344
345    if not _is_snake_case(name):
346      self._error(f'Function name "{name}" is not snake_case'
347                  f' (should be "{name.casefold()}")')
348
349    return TableFunction(
350        name=name,
351        desc=self._parse_desc_not_empty(doc.description),
352        cols=self._parse_columns(columns, ObjKind.table_function),
353        args=self._parse_args(args, ObjKind.table_function),
354    )
355
356
357class Macro:
358  name: str
359  desc: str
360  return_desc: str
361  return_type: str
362  args: Dict[str, Arg]
363
364  def __init__(self, name: str, desc: str, return_desc: str, return_type: str,
365               args: Dict[str, Arg]):
366    self.name = name
367    self.desc = desc
368    self.return_desc = return_desc
369    self.return_type = return_type
370    self.args = args
371
372
373class MacroDocParser(AbstractDocParser):
374  """Parses documentation for macro statements."""
375
376  def __init__(self, path: str, module: str):
377    super().__init__(path, module)
378
379  def parse(self, doc: DocsExtractor.Extract) -> Optional[Macro]:
380    or_replace, self.name, args, return_desc, return_type = doc.obj_match
381
382    if or_replace is not None:
383      self._error(
384          f'Function "{self.name}": CREATE OR REPLACE is not allowed in stdlib '
385          f'as standard library modules can only included once. Please just '
386          f'use CREATE instead.')
387
388    # Ignore internal macros.
389    if _is_internal(self.name):
390      return None
391
392    name = self._parse_name()
393
394    if not _is_snake_case(name):
395      self._error(f'Macro name "{name}" is not snake_case'
396                  f' (should be "{name.casefold()}")')
397
398    return Macro(
399        name=name,
400        desc=self._parse_desc_not_empty(doc.description),
401        return_desc=parse_comment(return_desc),
402        return_type=return_type,
403        args=self._parse_args(args, ObjKind.macro),
404    )
405
406
407class Include:
408  package: str
409  module: str
410  module_as_list: List[str]
411
412  def __init__(self, package: str, module: str, module_as_list: List[str]):
413    self.package = package
414    self.module = module
415    self.module_as_list = module_as_list
416
417
418class IncludeParser(AbstractDocParser):
419  """Parses the includes of module."""
420
421  def __init__(self, path: str, module: str):
422    super().__init__(path, module)
423
424  def parse(self, doc: DocsExtractor.Extract) -> Optional[Include]:
425    self.module = list(doc.obj_match)[0]
426    module_as_list = self.module.split('.')
427
428    return Include(
429        package=module_as_list[0],
430        module=self.module,
431        module_as_list=module_as_list,
432    )
433
434
435class ParsedModule:
436  """Data class containing all of the documentation of single SQL file"""
437  package_name: str = ""
438  module_as_list: List[str]
439  module: str
440  errors: List[str] = []
441  table_views: List[TableOrView] = []
442  functions: List[Function] = []
443  table_functions: List[TableFunction] = []
444  macros: List[Macro] = []
445  includes: List[Include]
446  id_columns: Dict[str, List[str]]
447
448  def __init__(self, package_name: str, module_as_list: List[str],
449               errors: List[str], table_views: List[TableOrView],
450               functions: List[Function], table_functions: List[TableFunction],
451               macros: List[Macro], includes: List[Include]):
452    self.package_name = package_name
453    self.module_as_list = module_as_list
454    self.module = ".".join(module_as_list)
455    self.errors = errors
456    self.table_views = table_views
457    self.functions = functions
458    self.table_functions = table_functions
459    self.macros = macros
460    self.includes = includes
461    self.id_columns = {o.name: o.id_columns for o in table_views}
462
463
464def parse_file(path: str, sql: str) -> Optional[ParsedModule]:
465  """Reads the provided SQL and, if possible, generates a dictionary with data
466    from documentation together with errors from validation of the schema."""
467  if sys.platform.startswith('win'):
468    path = path.replace('\\', '/')
469
470  module_as_list: List[str] = path.split('/stdlib/')[-1].split(".sql")[0].split(
471      '/')
472
473  # Get package name
474  package_name = module_as_list[0]
475
476  # Disable support for `deprecated` package
477  if package_name == "deprecated":
478    return
479
480  # Extract all the docs from the SQL.
481  extractor = DocsExtractor(path, package_name, sql)
482  docs = extractor.extract()
483  if extractor.errors:
484    return ParsedModule(package_name, module_as_list, extractor.errors, [], [],
485                        [], [], [])
486
487  # Parse the extracted docs.
488  errors: List[str] = []
489  table_views: List[TableOrView] = []
490  functions: List[Function] = []
491  table_functions: List[TableFunction] = []
492  macros: List[Macro] = []
493  includes: List[Include] = []
494  for doc in docs:
495    if doc.obj_kind == ObjKind.table_view:
496      parser = TableViewDocParser(path, package_name)
497      res = parser.parse(doc)
498      if res:
499        table_views.append(res)
500      errors += parser.errors
501    if doc.obj_kind == ObjKind.function:
502      parser = FunctionDocParser(path, package_name)
503      res = parser.parse(doc)
504      if res:
505        functions.append(res)
506      errors += parser.errors
507    if doc.obj_kind == ObjKind.table_function:
508      parser = TableFunctionDocParser(path, package_name)
509      res = parser.parse(doc)
510      if res:
511        table_functions.append(res)
512      errors += parser.errors
513    if doc.obj_kind == ObjKind.macro:
514      parser = MacroDocParser(path, package_name)
515      res = parser.parse(doc)
516      if res:
517        macros.append(res)
518      errors += parser.errors
519    if doc.obj_kind == ObjKind.include:
520      parser = IncludeParser(path, package_name)
521      res = parser.parse(doc)
522      if res:
523        includes.append(res)
524      errors += parser.errors
525
526  return ParsedModule(package_name, module_as_list, errors, table_views,
527                      functions, table_functions, macros, includes)
528