xref: /aosp_15_r20/external/perfetto/python/perfetto/common/query_result_iterator.py (revision 6dbdd20afdafa5e3ca9b8809fa73465d530080dc)
1# Copyright (C) 2024 The Android Open Source Project
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#      http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15from perfetto.common.exceptions import PerfettoException
16
17
18# Provides a Python interface to operate on the contents of QueryResult protos
19class QueryResultIterator:
20  # Values of these constants correspond to the QueryResponse message at
21  # protos/perfetto/trace_processor/trace_processor.proto
22  QUERY_CELL_INVALID_FIELD_ID = 0
23  QUERY_CELL_NULL_FIELD_ID = 1
24  QUERY_CELL_VARINT_FIELD_ID = 2
25  QUERY_CELL_FLOAT64_FIELD_ID = 3
26  QUERY_CELL_STRING_FIELD_ID = 4
27  QUERY_CELL_BLOB_FIELD_ID = 5
28
29  # This is the class returned to the user and contains one row of the
30  # resultant query. Each column name is stored as an attribute of this
31  # class, with the value corresponding to the column name and row in
32  # the query results table.
33  class Row(object):
34    # Required for pytype to correctly infer attributes from Row objects
35    _HAS_DYNAMIC_ATTRIBUTES = True
36
37    def __str__(self):
38      return str(self.__dict__)
39
40    def __repr__(self):
41      return self.__dict__
42
43  def __init__(self, column_names, batches):
44    self.__column_names = list(column_names)
45    self.__column_count = 0
46    self.__count = 0
47    self.__cells = []
48    self.__data_lists = [[], [], [], [], [], []]
49    self.__data_lists_index = [0, 0, 0, 0, 0, 0]
50    self.__current_index = 0
51
52    # Iterate over all the batches and collect their
53    # contents into lists based on the type of the batch
54    batch_index = 0
55    while True:
56      # It's possible on some occasions that there are non UTF-8 characters
57      # in the string_cells field. If this is the case, string_cells is
58      # a bytestring which needs to be decoded (but passing ignore so that
59      # we don't fail in decoding).
60      strings_batch_str = batches[batch_index].string_cells
61      try:
62        strings_batch_str = strings_batch_str.decode('utf-8', 'ignore')
63      except AttributeError:
64        # AttributeError can occur when |strings_batch_str| is an str which
65        # happens when everything in it is UTF-8 (protobuf automatically
66        # does the conversion if it can).
67        pass
68
69      # Null-terminated strings in a batch are concatenated
70      # into a single large byte array, so we split on the
71      # null-terminator to get the individual strings
72      strings_batch = strings_batch_str.split('\0')[:-1]
73      self.__data_lists[QueryResultIterator.QUERY_CELL_STRING_FIELD_ID].extend(
74          strings_batch)
75      self.__data_lists[QueryResultIterator.QUERY_CELL_VARINT_FIELD_ID].extend(
76          batches[batch_index].varint_cells)
77      self.__data_lists[QueryResultIterator.QUERY_CELL_FLOAT64_FIELD_ID].extend(
78          batches[batch_index].float64_cells)
79      self.__data_lists[QueryResultIterator.QUERY_CELL_BLOB_FIELD_ID].extend(
80          batches[batch_index].blob_cells)
81      self.__cells.extend(batches[batch_index].cells)
82
83      if batches[batch_index].is_last_batch:
84        break
85
86      batch_index += 1
87
88    # If there are no rows in the query result, don't bother updating the
89    # counts to avoid dealing with / 0 errors.
90    if len(self.__cells) == 0:
91      return
92
93    # The count we collected so far was a count of all individual columns
94    # in the query result, so we divide by the number of columns in a row
95    # to get the number of rows
96    self.__column_count = len(self.__column_names)
97    self.__count = int(len(self.__cells) / self.__column_count)
98
99    # Data integrity check - see that we have the expected amount of cells
100    # for the number of rows that we need to return
101    if len(self.__cells) % self.__column_count != 0:
102      raise PerfettoException("Cell count " + str(len(self.__cells)) +
103                              " is not a multiple of column count " +
104                              str(len(self.__column_names)))
105
106  # To use the query result as a populated Pandas dataframe, this
107  # function must be called directly after calling query inside
108  # TraceProcessor / Bigtrace.
109  def as_pandas_dataframe(self):
110    try:
111      import pandas as pd
112
113      # Populate the dataframe with the query results
114      rows = []
115      for i in range(0, self.__count):
116        row = []
117        base_cell_index = i * self.__column_count
118        for num in range(len(self.__column_names)):
119          col_type = self.__cells[base_cell_index + num]
120          if col_type == QueryResultIterator.QUERY_CELL_INVALID_FIELD_ID:
121            raise PerfettoException('Invalid cell type')
122
123          if col_type == QueryResultIterator.QUERY_CELL_NULL_FIELD_ID:
124            row.append(None)
125          else:
126            col_index = self.__data_lists_index[col_type]
127            self.__data_lists_index[col_type] += 1
128            row.append(self.__data_lists[col_type][col_index])
129        rows.append(row)
130
131      df = pd.DataFrame(rows, columns=self.__column_names)
132      return df.astype(object).where(df.notnull(), None).reset_index(drop=True)
133
134    except ModuleNotFoundError:
135      raise PerfettoException(
136          'Python dependencies missing. Please pip3 install pandas numpy')
137
138  def __len__(self):
139    return self.__count
140
141  def __iter__(self):
142    return self
143
144  def __next__(self):
145    if self.__current_index == self.__count:
146      raise StopIteration
147    result = QueryResultIterator.Row()
148    base_cell_index = self.__current_index * self.__column_count
149    for num, column_name in enumerate(self.__column_names):
150      col_type = self.__cells[base_cell_index + num]
151      if col_type == QueryResultIterator.QUERY_CELL_INVALID_FIELD_ID:
152        raise PerfettoException('Invalid cell type')
153      if col_type != QueryResultIterator.QUERY_CELL_NULL_FIELD_ID:
154        col_index = self.__data_lists_index[col_type]
155        self.__data_lists_index[col_type] += 1
156        setattr(result, column_name, self.__data_lists[col_type][col_index])
157      else:
158        setattr(result, column_name, None)
159
160    self.__current_index += 1
161    return result
162