xref: /aosp_15_r20/external/perfetto/src/trace_processor/util/streaming_line_reader.h (revision 6dbdd20afdafa5e3ca9b8809fa73465d530080dc)
1 /*
2  * Copyright (C) 2022 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef SRC_TRACE_PROCESSOR_UTIL_STREAMING_LINE_READER_H_
18 #define SRC_TRACE_PROCESSOR_UTIL_STREAMING_LINE_READER_H_
19 
20 #include <functional>
21 #include <vector>
22 
23 #include "perfetto/ext/base/string_view.h"
24 
25 namespace perfetto {
26 namespace trace_processor {
27 namespace util {
28 
29 // A streaming line tokenizer for efficiently processing large text files on a
30 // line-by-line basis. It's designed to be used in conjunction with ZipReader to
31 // stream lines out of a compressed file (think of a bugreport) without having
32 // to decompress the whole file in memory upfront.
33 // Internally it deals with the necessary buffering and line-merging across
34 // different chunks.
35 // Usage:
36 // - The caller should pass a callback into the ctor. The callback is invoked
37 //   whenever a batch of lines has been tokenized. This happens after calls to
38 //   either BeginWrite()+EndWrite() or Tokenize(). In order to avoid too much
39 //   virtual dispatch overhead, the callback argument is a vector of lines, not
40 //   a single line.
41 // - The caller can call either:
42 //   - Tokenize(whole input): this exist to avoid a copy in the case of
43 //     non-compressed (STORE) files in zip archive.
44 //   - A sequence of BeginWrite() + EndWrite() as follows:
45 //     - BeginWrite(n) guarantees that the caller can write at least `n` char.
46 //       `n` is typically the decompression buffer passed to zlib.
47 //     - The caller writes at most `n` bytes into the pointer returned above.
48 //     - The caller calls EndWrite(m) passing the number of bytes actually
49 //       written (`m` <= `n`);
50 // NOTE:
51 // This implementation slightly diverges from base::StringSplitter as follows:
52 // 1. It does NOT skip empty lines. SS coalesces empty tokens, this doesn't.
53 // 2. it won't output the last line unless it terminates with a \n. SS doesn't
54 //    tell the difference between "foo\nbar" and "foo\nbar\n". This is
55 //    fundamental for streaming, where we cannot tell upfront if we got the end.
56 class StreamingLineReader {
57  public:
58   // Note: the lifetime of the lines passed in the vector argument is valid only
59   // for the duration of the callback. Don't retain the StringView(s) passed.
60   using LinesCallback =
61       std::function<void(const std::vector<base::StringView>&)>;
62 
63   explicit StreamingLineReader(LinesCallback);
64   ~StreamingLineReader();
65 
66   // This can be used when the whole input is known upfront and we just need
67   // splitting. This exist mostly for convenience when processing uncompressed
68   // (STORE) files in zip archives. If you just need a tokenizer outside of the
69   // context of a zip file, you are better off just using base::StringSplitter.
70   size_t Tokenize(base::StringView input);
71 
72   // Reserves `write_buf_size` bytes into the internal buffer. The caller is
73   // expected to write at most `write_buf_size` on the returned pointer and
74   // then call EndWrite().
75   char* BeginWrite(size_t write_buf_size);
76 
77   // Finishes the write reporting the number of bytes actually written, which
78   // must be <= `write_buf_size`. If one or more lines can be tokenized, this
79   // will cause one or more calls to the LinesCallback.
80   void EndWrite(size_t size_written);
81 
82  private:
83   std::vector<char> buf_;
84   LinesCallback lines_callback_;
85   size_t size_before_write_ = 0;
86 };
87 
88 }  // namespace util
89 }  // namespace trace_processor
90 }  // namespace perfetto
91 
92 #endif  // SRC_TRACE_PROCESSOR_UTIL_STREAMING_LINE_READER_H_
93