1# Copyright (C) 2024 The Android Open Source Project 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14 15from perfetto.common.exceptions import PerfettoException 16 17 18# Provides a Python interface to operate on the contents of QueryResult protos 19class QueryResultIterator: 20 # Values of these constants correspond to the QueryResponse message at 21 # protos/perfetto/trace_processor/trace_processor.proto 22 QUERY_CELL_INVALID_FIELD_ID = 0 23 QUERY_CELL_NULL_FIELD_ID = 1 24 QUERY_CELL_VARINT_FIELD_ID = 2 25 QUERY_CELL_FLOAT64_FIELD_ID = 3 26 QUERY_CELL_STRING_FIELD_ID = 4 27 QUERY_CELL_BLOB_FIELD_ID = 5 28 29 # This is the class returned to the user and contains one row of the 30 # resultant query. Each column name is stored as an attribute of this 31 # class, with the value corresponding to the column name and row in 32 # the query results table. 33 class Row(object): 34 # Required for pytype to correctly infer attributes from Row objects 35 _HAS_DYNAMIC_ATTRIBUTES = True 36 37 def __str__(self): 38 return str(self.__dict__) 39 40 def __repr__(self): 41 return self.__dict__ 42 43 def __init__(self, column_names, batches): 44 self.__column_names = list(column_names) 45 self.__column_count = 0 46 self.__count = 0 47 self.__cells = [] 48 self.__data_lists = [[], [], [], [], [], []] 49 self.__data_lists_index = [0, 0, 0, 0, 0, 0] 50 self.__current_index = 0 51 52 # Iterate over all the batches and collect their 53 # contents into lists based on the type of the batch 54 batch_index = 0 55 while True: 56 # It's possible on some occasions that there are non UTF-8 characters 57 # in the string_cells field. If this is the case, string_cells is 58 # a bytestring which needs to be decoded (but passing ignore so that 59 # we don't fail in decoding). 60 strings_batch_str = batches[batch_index].string_cells 61 try: 62 strings_batch_str = strings_batch_str.decode('utf-8', 'ignore') 63 except AttributeError: 64 # AttributeError can occur when |strings_batch_str| is an str which 65 # happens when everything in it is UTF-8 (protobuf automatically 66 # does the conversion if it can). 67 pass 68 69 # Null-terminated strings in a batch are concatenated 70 # into a single large byte array, so we split on the 71 # null-terminator to get the individual strings 72 strings_batch = strings_batch_str.split('\0')[:-1] 73 self.__data_lists[QueryResultIterator.QUERY_CELL_STRING_FIELD_ID].extend( 74 strings_batch) 75 self.__data_lists[QueryResultIterator.QUERY_CELL_VARINT_FIELD_ID].extend( 76 batches[batch_index].varint_cells) 77 self.__data_lists[QueryResultIterator.QUERY_CELL_FLOAT64_FIELD_ID].extend( 78 batches[batch_index].float64_cells) 79 self.__data_lists[QueryResultIterator.QUERY_CELL_BLOB_FIELD_ID].extend( 80 batches[batch_index].blob_cells) 81 self.__cells.extend(batches[batch_index].cells) 82 83 if batches[batch_index].is_last_batch: 84 break 85 86 batch_index += 1 87 88 # If there are no rows in the query result, don't bother updating the 89 # counts to avoid dealing with / 0 errors. 90 if len(self.__cells) == 0: 91 return 92 93 # The count we collected so far was a count of all individual columns 94 # in the query result, so we divide by the number of columns in a row 95 # to get the number of rows 96 self.__column_count = len(self.__column_names) 97 self.__count = int(len(self.__cells) / self.__column_count) 98 99 # Data integrity check - see that we have the expected amount of cells 100 # for the number of rows that we need to return 101 if len(self.__cells) % self.__column_count != 0: 102 raise PerfettoException("Cell count " + str(len(self.__cells)) + 103 " is not a multiple of column count " + 104 str(len(self.__column_names))) 105 106 # To use the query result as a populated Pandas dataframe, this 107 # function must be called directly after calling query inside 108 # TraceProcessor / Bigtrace. 109 def as_pandas_dataframe(self): 110 try: 111 import pandas as pd 112 113 # Populate the dataframe with the query results 114 rows = [] 115 for i in range(0, self.__count): 116 row = [] 117 base_cell_index = i * self.__column_count 118 for num in range(len(self.__column_names)): 119 col_type = self.__cells[base_cell_index + num] 120 if col_type == QueryResultIterator.QUERY_CELL_INVALID_FIELD_ID: 121 raise PerfettoException('Invalid cell type') 122 123 if col_type == QueryResultIterator.QUERY_CELL_NULL_FIELD_ID: 124 row.append(None) 125 else: 126 col_index = self.__data_lists_index[col_type] 127 self.__data_lists_index[col_type] += 1 128 row.append(self.__data_lists[col_type][col_index]) 129 rows.append(row) 130 131 df = pd.DataFrame(rows, columns=self.__column_names) 132 return df.astype(object).where(df.notnull(), None).reset_index(drop=True) 133 134 except ModuleNotFoundError: 135 raise PerfettoException( 136 'Python dependencies missing. Please pip3 install pandas numpy') 137 138 def __len__(self): 139 return self.__count 140 141 def __iter__(self): 142 return self 143 144 def __next__(self): 145 if self.__current_index == self.__count: 146 raise StopIteration 147 result = QueryResultIterator.Row() 148 base_cell_index = self.__current_index * self.__column_count 149 for num, column_name in enumerate(self.__column_names): 150 col_type = self.__cells[base_cell_index + num] 151 if col_type == QueryResultIterator.QUERY_CELL_INVALID_FIELD_ID: 152 raise PerfettoException('Invalid cell type') 153 if col_type != QueryResultIterator.QUERY_CELL_NULL_FIELD_ID: 154 col_index = self.__data_lists_index[col_type] 155 self.__data_lists_index[col_type] += 1 156 setattr(result, column_name, self.__data_lists[col_type][col_index]) 157 else: 158 setattr(result, column_name, None) 159 160 self.__current_index += 1 161 return result 162