1 /* 2 * Copyright (C) 2022 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef SRC_TRACE_PROCESSOR_UTIL_STREAMING_LINE_READER_H_ 18 #define SRC_TRACE_PROCESSOR_UTIL_STREAMING_LINE_READER_H_ 19 20 #include <functional> 21 #include <vector> 22 23 #include "perfetto/ext/base/string_view.h" 24 25 namespace perfetto { 26 namespace trace_processor { 27 namespace util { 28 29 // A streaming line tokenizer for efficiently processing large text files on a 30 // line-by-line basis. It's designed to be used in conjunction with ZipReader to 31 // stream lines out of a compressed file (think of a bugreport) without having 32 // to decompress the whole file in memory upfront. 33 // Internally it deals with the necessary buffering and line-merging across 34 // different chunks. 35 // Usage: 36 // - The caller should pass a callback into the ctor. The callback is invoked 37 // whenever a batch of lines has been tokenized. This happens after calls to 38 // either BeginWrite()+EndWrite() or Tokenize(). In order to avoid too much 39 // virtual dispatch overhead, the callback argument is a vector of lines, not 40 // a single line. 41 // - The caller can call either: 42 // - Tokenize(whole input): this exist to avoid a copy in the case of 43 // non-compressed (STORE) files in zip archive. 44 // - A sequence of BeginWrite() + EndWrite() as follows: 45 // - BeginWrite(n) guarantees that the caller can write at least `n` char. 46 // `n` is typically the decompression buffer passed to zlib. 47 // - The caller writes at most `n` bytes into the pointer returned above. 48 // - The caller calls EndWrite(m) passing the number of bytes actually 49 // written (`m` <= `n`); 50 // NOTE: 51 // This implementation slightly diverges from base::StringSplitter as follows: 52 // 1. It does NOT skip empty lines. SS coalesces empty tokens, this doesn't. 53 // 2. it won't output the last line unless it terminates with a \n. SS doesn't 54 // tell the difference between "foo\nbar" and "foo\nbar\n". This is 55 // fundamental for streaming, where we cannot tell upfront if we got the end. 56 class StreamingLineReader { 57 public: 58 // Note: the lifetime of the lines passed in the vector argument is valid only 59 // for the duration of the callback. Don't retain the StringView(s) passed. 60 using LinesCallback = 61 std::function<void(const std::vector<base::StringView>&)>; 62 63 explicit StreamingLineReader(LinesCallback); 64 ~StreamingLineReader(); 65 66 // This can be used when the whole input is known upfront and we just need 67 // splitting. This exist mostly for convenience when processing uncompressed 68 // (STORE) files in zip archives. If you just need a tokenizer outside of the 69 // context of a zip file, you are better off just using base::StringSplitter. 70 size_t Tokenize(base::StringView input); 71 72 // Reserves `write_buf_size` bytes into the internal buffer. The caller is 73 // expected to write at most `write_buf_size` on the returned pointer and 74 // then call EndWrite(). 75 char* BeginWrite(size_t write_buf_size); 76 77 // Finishes the write reporting the number of bytes actually written, which 78 // must be <= `write_buf_size`. If one or more lines can be tokenized, this 79 // will cause one or more calls to the LinesCallback. 80 void EndWrite(size_t size_written); 81 82 private: 83 std::vector<char> buf_; 84 LinesCallback lines_callback_; 85 size_t size_before_write_ = 0; 86 }; 87 88 } // namespace util 89 } // namespace trace_processor 90 } // namespace perfetto 91 92 #endif // SRC_TRACE_PROCESSOR_UTIL_STREAMING_LINE_READER_H_ 93