xref: /aosp_15_r20/external/perfetto/src/trace_processor/util/gzip_utils.h (revision 6dbdd20afdafa5e3ca9b8809fa73465d530080dc)
1 /*
2  * Copyright (C) 2020 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef SRC_TRACE_PROCESSOR_UTIL_GZIP_UTILS_H_
18 #define SRC_TRACE_PROCESSOR_UTIL_GZIP_UTILS_H_
19 
20 #include <cstddef>
21 #include <cstdint>
22 #include <memory>
23 #include <vector>
24 
25 #include "perfetto/base/build_config.h"
26 
27 struct z_stream_s;
28 
29 namespace perfetto::trace_processor::util {
30 
31 // Returns whether gzip related functioanlity is supported with the current
32 // build flags.
IsGzipSupported()33 constexpr bool IsGzipSupported() {
34 #if PERFETTO_BUILDFLAG(PERFETTO_ZLIB)
35   return true;
36 #else
37   return false;
38 #endif
39 }
40 
41 // Usage: To decompress in a streaming way, there are two ways of using it:
42 // 1. [Commonly used] - Feed the sequence of mem-blocks in 'FeedAndExtract' one
43 //    by one. Output will be produced in given output_consumer, which is simply
44 //    a callback. On each 'FeedAndExtract', output_consumer could get invoked
45 //    any number of times, based on how much partial output is available.
46 
47 // 2. [Uncommon ; Discouraged] - Feed the sequence of mem-blocks one by one, by
48 //    calling 'Feed'. For each time 'Feed' is called, client should call
49 //    'ExtractOutput' again and again to extrat the partially available output,
50 //    until there in no more output to extract. Also see 'ResultCode' enum.
51 class GzipDecompressor {
52  public:
53   enum class ResultCode {
54     // 'kOk' means nothing bad happened so far, but continue doing what you
55     // were doing.
56     kOk,
57     // While calling 'ExtractOutput' repeatedly, if we get 'kEof', it means
58     // we have extracted all the partially available data and we are also
59     // done, i.e. there is no need to feed more input.
60     kEof,
61     // Some error. Possibly invalid compressed stream or corrupted data.
62     kError,
63     // While calling 'ExtractOutput' repeatedly, if we get 'kNeedsMoreInput',
64     // it means we have extracted all the partially available data, but we are
65     // not done yet. We need to call the 'Feed' to feed the next input
66     // mem-block and go through the ExtractOutput loop again.
67     kNeedsMoreInput,
68   };
69   struct Result {
70     // The return code of the decompression.
71     ResultCode ret;
72 
73     // The amount of bytes written to output.
74     // Valid in all cases except |ResultCode::kError|.
75     size_t bytes_written;
76   };
77   enum class InputMode {
78     // The input stream contains a gzip header. This is for the common case of
79     // decompressing .gz files.
80     kGzip = 0,
81 
82     // A raw deflate stream. This is for the case of uncompressing files from
83     // a .zip archive, where the compression type is specified in the zip file
84     // entry, rather than in the stream header.
85     kRawDeflate = 1,
86   };
87 
88   explicit GzipDecompressor(InputMode = InputMode::kGzip);
89 
90   // Feed the next mem-block.
91   void Feed(const uint8_t* data, size_t size);
92 
93   // Feed the next mem-block and extract output in the callback consumer.
94   // callback can get invoked multiple times if there are multiple
95   // mem-blocks to output.
96   //
97   // Note the output of this function is guaranteed *not* to be kOk.
98   template <typename Callback = void(const uint8_t* ptr, size_t size)>
FeedAndExtract(const uint8_t * data,size_t size,const Callback & output_consumer)99   ResultCode FeedAndExtract(const uint8_t* data,
100                             size_t size,
101                             const Callback& output_consumer) {
102     Feed(data, size);
103     uint8_t buffer[4096];
104     Result result;
105     do {
106       result = ExtractOutput(buffer, sizeof(buffer));
107       if (result.ret != ResultCode::kError && result.bytes_written > 0) {
108         output_consumer(buffer, result.bytes_written);
109       }
110     } while (result.ret == ResultCode::kOk);
111     return result.ret;
112   }
113 
114   // Extract the newly available partial output. On each 'Feed', this method
115   // should be called repeatedly until there is no more data to output
116   // i.e. (either 'kEof' or 'kNeedsMoreInput').
117   Result ExtractOutput(uint8_t* out, size_t out_capacity);
118 
119   // Sets the state of the decompressor to reuse with other gzip streams.
120   // This is almost like constructing a new 'GzipDecompressor' object
121   // but without paying the cost of internal memory allocation.
122   void Reset();
123 
124   // Decompress the entire mem-block and return decompressed mem-block.
125   // This is used for decompressing small strings or small files
126   // which doesn't require streaming decompression.
127   static std::vector<uint8_t> DecompressFully(const uint8_t* data, size_t len);
128 
129   // Returns the amount of input bytes left unprocessed.
130   size_t AvailIn() const;
131 
132  private:
133   struct Deleter {
134     void operator()(z_stream_s*) const;
135   };
136   std::unique_ptr<z_stream_s, Deleter> z_stream_;
137 };
138 
139 }  // namespace perfetto::trace_processor::util
140 
141 #endif  // SRC_TRACE_PROCESSOR_UTIL_GZIP_UTILS_H_
142