1 // Copyright (C) 2019 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #ifndef ICING_UTIL_CRC32_H_ 16 #define ICING_UTIL_CRC32_H_ 17 18 #include <cstdint> 19 #include <string_view> 20 21 #include "icing/text_classifier/lib3/utils/base/statusor.h" 22 23 namespace icing { 24 namespace lib { 25 26 // Efficient mechanism to incrementally compute checksum of a file and keep it 27 // updated when its content changes. Internally uses zlib based crc32() 28 // implementation. 29 // 30 // See https://www.zlib.net/manual.html#Checksum for more details. 31 class Crc32 { 32 public: 33 // Default to the checksum of an empty string, that is "0". Crc32()34 Crc32() : crc_(0) {} 35 Crc32(uint32_t init_crc)36 explicit Crc32(uint32_t init_crc) : crc_(init_crc) {} 37 Crc32(std::string_view str)38 explicit Crc32(std::string_view str) : crc_(0) { Append(str); } 39 40 inline bool operator==(const Crc32& other) const { 41 return crc_ == other.Get(); 42 } 43 44 inline bool operator!=(const Crc32& other) const { 45 return crc_ != other.Get(); 46 } 47 48 // Returns the checksum of all the data that has been processed till now. 49 uint32_t Get() const; 50 51 // Incrementally update the current checksum to reflect the fact that the 52 // underlying data has been appended with 'str'. It calculates a new crc32 53 // based on the current crc value and the newly appended string. 54 // 55 // NOTE: As this method accepts incremental appends, all these 3 will lead to 56 // the same checksum: 57 // 1) crc32.Append("AAA"); crc32.Append("BBB"); 58 // 2) crc32.Append("AAABBB"); 59 // 3) crc32.Append("AA"); crc32.Append("AB"); crc32.Append("BB"); 60 // 61 // NOTE: While this class internally uses zlib's crc32(), 62 // Crc32(base_crc).Append(str) is not the same as zlib::crc32(base_crc, str); 63 uint32_t Append(std::string_view str); 64 65 // Update a string's rolling crc when some content is modified in the middle 66 // at an offset. We need the xored_str, which is the new value xored with the 67 // original value. 68 // 69 // Original string: 70 // string(original_start | original_mid | original_end) 71 // -------------------------------------------> full_data_size 72 // ^ offset position 73 // 74 // Modified string: 75 // string(original_start | changed_mid | original_end) 76 // ^ offset position 77 // 78 // And where 79 // xored_str = changed_mid ^ original_mid 80 // xored_len = length(xored_str) 81 // full_data_size = the length of all the strings that have been Appended to 82 // generate the current checksum 83 // 84 // REQUIRES: offset position + xored_len <= full_data_size. 85 // 86 // E.g. 87 // Old data: ABCDEF; New data: ABXYZF 88 // 89 // Crc32 crc32; crc32.Append("ABCDEF"); 90 // crc32.UpdateWithXor("CDE" xor "XYZ", 6, 2); 91 // 92 // This is the same as 93 // Crc32 crc32; crc32.Append("ABXYZF"); 94 // 95 // See .cc file for implementation notes. 96 // 97 // Returns: 98 // Updated crc on success 99 // INVALID_ARGUMENT if offset position + xored_len > full_data_size 100 libtextclassifier3::StatusOr<uint32_t> UpdateWithXor( 101 std::string_view xored_str, int full_data_size, int position); 102 103 private: 104 uint32_t crc_; 105 }; 106 107 } // namespace lib 108 } // namespace icing 109 110 #endif // ICING_UTIL_CRC32_H_ 111