xref: /aosp_15_r20/external/icing/icing/util/crc32.h (revision 8b6cd535a057e39b3b86660c4aa06c99747c2136)
1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef ICING_UTIL_CRC32_H_
16 #define ICING_UTIL_CRC32_H_
17 
18 #include <cstdint>
19 #include <string_view>
20 
21 #include "icing/text_classifier/lib3/utils/base/statusor.h"
22 
23 namespace icing {
24 namespace lib {
25 
26 // Efficient mechanism to incrementally compute checksum of a file and keep it
27 // updated when its content changes. Internally uses zlib based crc32()
28 // implementation.
29 //
30 // See https://www.zlib.net/manual.html#Checksum for more details.
31 class Crc32 {
32  public:
33   // Default to the checksum of an empty string, that is "0".
Crc32()34   Crc32() : crc_(0) {}
35 
Crc32(uint32_t init_crc)36   explicit Crc32(uint32_t init_crc) : crc_(init_crc) {}
37 
Crc32(std::string_view str)38   explicit Crc32(std::string_view str) : crc_(0) { Append(str); }
39 
40   inline bool operator==(const Crc32& other) const {
41     return crc_ == other.Get();
42   }
43 
44   inline bool operator!=(const Crc32& other) const {
45     return crc_ != other.Get();
46   }
47 
48   // Returns the checksum of all the data that has been processed till now.
49   uint32_t Get() const;
50 
51   // Incrementally update the current checksum to reflect the fact that the
52   // underlying data has been appended with 'str'. It calculates a new crc32
53   // based on the current crc value and the newly appended string.
54   //
55   // NOTE: As this method accepts incremental appends, all these 3 will lead to
56   // the same checksum:
57   // 1) crc32.Append("AAA"); crc32.Append("BBB");
58   // 2) crc32.Append("AAABBB");
59   // 3) crc32.Append("AA"); crc32.Append("AB"); crc32.Append("BB");
60   //
61   // NOTE: While this class internally uses zlib's crc32(),
62   // Crc32(base_crc).Append(str) is not the same as zlib::crc32(base_crc, str);
63   uint32_t Append(std::string_view str);
64 
65   // Update a string's rolling crc when some content is modified in the middle
66   // at an offset. We need the xored_str, which is the new value xored with the
67   // original value.
68   //
69   // Original string:
70   //   string(original_start | original_mid | original_end)
71   //          -------------------------------------------> full_data_size
72   //                         ^ offset position
73   //
74   // Modified string:
75   //   string(original_start | changed_mid | original_end)
76   //                         ^ offset position
77   //
78   // And where
79   //   xored_str = changed_mid ^ original_mid
80   //   xored_len = length(xored_str)
81   //   full_data_size = the length of all the strings that have been Appended to
82   //                    generate the current checksum
83   //
84   // REQUIRES: offset position + xored_len <= full_data_size.
85   //
86   // E.g.
87   // Old data: ABCDEF; New data: ABXYZF
88   //
89   // Crc32 crc32; crc32.Append("ABCDEF");
90   // crc32.UpdateWithXor("CDE" xor "XYZ", 6, 2);
91   //
92   // This is the same as
93   // Crc32 crc32; crc32.Append("ABXYZF");
94   //
95   // See .cc file for implementation notes.
96   //
97   // Returns:
98   //   Updated crc on success
99   //   INVALID_ARGUMENT if offset position + xored_len > full_data_size
100   libtextclassifier3::StatusOr<uint32_t> UpdateWithXor(
101       std::string_view xored_str, int full_data_size, int position);
102 
103  private:
104   uint32_t crc_;
105 };
106 
107 }  // namespace lib
108 }  // namespace icing
109 
110 #endif  // ICING_UTIL_CRC32_H_
111