1 // Copyright (C) 2023 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "icing/file/version-util.h"
16
17 #include <cstdint>
18 #include <memory>
19 #include <string>
20 #include <string_view>
21 #include <unordered_set>
22 #include <utility>
23
24 #include "icing/text_classifier/lib3/utils/base/status.h"
25 #include "icing/text_classifier/lib3/utils/base/statusor.h"
26 #include "icing/absl_ports/canonical_errors.h"
27 #include "icing/absl_ports/str_cat.h"
28 #include "icing/file/file-backed-proto.h"
29 #include "icing/file/filesystem.h"
30 #include "icing/index/index.h"
31 #include "icing/proto/initialize.pb.h"
32 #include "icing/util/status-macros.h"
33
34 namespace icing {
35 namespace lib {
36
37 namespace version_util {
38
39 namespace {
40
ReadV1VersionInfo(const Filesystem & filesystem,const std::string & version_file_dir,const std::string & index_base_dir)41 libtextclassifier3::StatusOr<VersionInfo> ReadV1VersionInfo(
42 const Filesystem& filesystem, const std::string& version_file_dir,
43 const std::string& index_base_dir) {
44 // 1. Read the version info.
45 const std::string v1_version_filepath =
46 MakeVersionFilePath(version_file_dir, kVersionFilenameV1);
47 VersionInfo existing_version_info(-1, -1);
48 if (filesystem.FileExists(v1_version_filepath.c_str()) &&
49 !filesystem.PRead(v1_version_filepath.c_str(), &existing_version_info,
50 sizeof(VersionInfo), /*offset=*/0)) {
51 return absl_ports::InternalError("Failed to read v1 version file");
52 }
53
54 // 2. Check the Index magic to see if we're actually on version 0.
55 libtextclassifier3::StatusOr<int> existing_flash_index_magic =
56 Index::ReadFlashIndexMagic(&filesystem, index_base_dir);
57 if (!existing_flash_index_magic.ok()) {
58 if (absl_ports::IsNotFound(existing_flash_index_magic.status())) {
59 // Flash index magic doesn't exist. In this case, we're unable to
60 // determine the version change state correctly (regardless of the
61 // existence of the version file), so invalidate VersionInfo by setting
62 // version to -1, but still keep the max_version value read in step 1.
63 existing_version_info.version = -1;
64 return existing_version_info;
65 }
66 // Real error.
67 return std::move(existing_flash_index_magic).status();
68 }
69 if (existing_flash_index_magic.ValueOrDie() == kVersionZeroFlashIndexMagic) {
70 existing_version_info.version = 0;
71 if (existing_version_info.max_version == -1) {
72 existing_version_info.max_version = 0;
73 }
74 }
75
76 return existing_version_info;
77 }
78
ReadV2VersionInfo(const Filesystem & filesystem,const std::string & version_file_dir)79 libtextclassifier3::StatusOr<IcingSearchEngineVersionProto> ReadV2VersionInfo(
80 const Filesystem& filesystem, const std::string& version_file_dir) {
81 // Read the v2 version file. V2 version file stores the
82 // IcingSearchEngineVersionProto as a file-backed proto.
83 const std::string v2_version_filepath =
84 MakeVersionFilePath(version_file_dir, kVersionFilenameV2);
85 FileBackedProto<IcingSearchEngineVersionProto> v2_version_file(
86 filesystem, v2_version_filepath);
87 ICING_ASSIGN_OR_RETURN(const IcingSearchEngineVersionProto* v2_version_proto,
88 v2_version_file.Read());
89
90 return *v2_version_proto;
91 }
92
93 } // namespace
94
ReadVersion(const Filesystem & filesystem,const std::string & version_file_dir,const std::string & index_base_dir)95 libtextclassifier3::StatusOr<IcingSearchEngineVersionProto> ReadVersion(
96 const Filesystem& filesystem, const std::string& version_file_dir,
97 const std::string& index_base_dir) {
98 // 1. Read the v1 version file
99 ICING_ASSIGN_OR_RETURN(
100 VersionInfo v1_version_info,
101 ReadV1VersionInfo(filesystem, version_file_dir, index_base_dir));
102 if (!v1_version_info.IsValid()) {
103 // This happens if IcingLib's state is invalid (e.g. flash index header file
104 // is missing). Return the invalid version numbers in this case.
105 IcingSearchEngineVersionProto version_proto;
106 version_proto.set_version(v1_version_info.version);
107 version_proto.set_max_version(v1_version_info.max_version);
108 return version_proto;
109 }
110
111 // 2. Read the v2 version file
112 auto v2_version_proto = ReadV2VersionInfo(filesystem, version_file_dir);
113 if (!v2_version_proto.ok()) {
114 if (!absl_ports::IsNotFound(v2_version_proto.status())) {
115 // Real error.
116 return std::move(v2_version_proto).status();
117 }
118 // The v2 version file has not been written
119 IcingSearchEngineVersionProto version_proto;
120 if (v1_version_info.version < kFirstV2Version) {
121 // There are two scenarios for this case:
122 // 1. It's the first time that we're upgrading from a lower version to a
123 // version >= kFirstV2Version.
124 // - It's expected that the v2 version file has not been written yet in
125 // this case and we return the v1 version numbers instead.
126 // 2. We're rolling forward from a version < kFirstV2Version, after
127 // rolling back from a previous version >= kFirstV2Version, and for
128 // some unknown reason we lost the v2 version file in the previous
129 // version.
130 // - e.g. version #4 -> version #1 -> version #4, but we lost the v2
131 // file during version #1.
132 // - This is a rollforward case, but it's still fine to return the v1
133 // version number here as ShouldRebuildDerivedFiles can handle
134 // rollforwards correctly.
135 version_proto.set_version(v1_version_info.version);
136 version_proto.set_max_version(v1_version_info.max_version);
137 } else {
138 // Something weird has happened. During last initialization we were
139 // already on a version >= kFirstV2Version, so the v2 version file
140 // should have been written.
141 // Return an invalid version number in this case and trigger rebuilding
142 // everything.
143 version_proto.set_version(-1);
144 version_proto.set_max_version(v1_version_info.max_version);
145 }
146 return version_proto;
147 }
148
149 // 3. Check if versions match. If not, it means that we're rolling forward
150 // from a version < kFirstV2Version. In order to trigger rebuilding
151 // everything, we return an invalid version number in this case.
152 IcingSearchEngineVersionProto v2_version_proto_value =
153 std::move(v2_version_proto).ValueOrDie();
154 if (v1_version_info.version != v2_version_proto_value.version()) {
155 v2_version_proto_value.set_version(-1);
156 v2_version_proto_value.mutable_enabled_features()->Clear();
157 }
158
159 return v2_version_proto_value;
160 }
161
WriteV1Version(const Filesystem & filesystem,const std::string & version_file_dir,const VersionInfo & version_info)162 libtextclassifier3::Status WriteV1Version(const Filesystem& filesystem,
163 const std::string& version_file_dir,
164 const VersionInfo& version_info) {
165 ScopedFd scoped_fd(filesystem.OpenForWrite(
166 MakeVersionFilePath(version_file_dir, kVersionFilenameV1).c_str()));
167 if (!scoped_fd.is_valid() ||
168 !filesystem.PWrite(scoped_fd.get(), /*offset=*/0, &version_info,
169 sizeof(VersionInfo)) ||
170 !filesystem.DataSync(scoped_fd.get())) {
171 return absl_ports::InternalError("Failed to write v1 version file");
172 }
173 return libtextclassifier3::Status::OK;
174 }
175
WriteV2Version(const Filesystem & filesystem,const std::string & version_file_dir,std::unique_ptr<IcingSearchEngineVersionProto> version_proto)176 libtextclassifier3::Status WriteV2Version(
177 const Filesystem& filesystem, const std::string& version_file_dir,
178 std::unique_ptr<IcingSearchEngineVersionProto> version_proto) {
179 FileBackedProto<IcingSearchEngineVersionProto> v2_version_file(
180 filesystem, MakeVersionFilePath(version_file_dir, kVersionFilenameV2));
181 libtextclassifier3::Status v2_write_status =
182 v2_version_file.Write(std::move(version_proto));
183 if (!v2_write_status.ok()) {
184 return absl_ports::InternalError(absl_ports::StrCat(
185 "Failed to write v2 version file: ", v2_write_status.error_message()));
186 }
187 return libtextclassifier3::Status::OK;
188 }
189
DiscardVersionFiles(const Filesystem & filesystem,std::string_view version_file_dir)190 libtextclassifier3::Status DiscardVersionFiles(
191 const Filesystem& filesystem, std::string_view version_file_dir) {
192 if (!filesystem.DeleteFile(
193 MakeVersionFilePath(version_file_dir, kVersionFilenameV1).c_str()) ||
194 !filesystem.DeleteFile(
195 MakeVersionFilePath(version_file_dir, kVersionFilenameV2).c_str())) {
196 return absl_ports::InternalError("Failed to discard version files");
197 }
198 return libtextclassifier3::Status::OK;
199 }
200
GetVersionStateChange(const VersionInfo & existing_version_info,int32_t curr_version)201 StateChange GetVersionStateChange(const VersionInfo& existing_version_info,
202 int32_t curr_version) {
203 if (!existing_version_info.IsValid()) {
204 return StateChange::kUndetermined;
205 }
206
207 if (existing_version_info.version == 0) {
208 return (existing_version_info.max_version == existing_version_info.version)
209 ? StateChange::kVersionZeroUpgrade
210 : StateChange::kVersionZeroRollForward;
211 }
212
213 if (existing_version_info.version == curr_version) {
214 return StateChange::kCompatible;
215 } else if (existing_version_info.version > curr_version) {
216 return StateChange::kRollBack;
217 } else { // existing_version_info.version < curr_version
218 return (existing_version_info.max_version == existing_version_info.version)
219 ? StateChange::kUpgrade
220 : StateChange::kRollForward;
221 }
222 }
223
CalculateRequiredDerivedFilesRebuild(const IcingSearchEngineVersionProto & prev_version_proto,const IcingSearchEngineVersionProto & curr_version_proto)224 DerivedFilesRebuildResult CalculateRequiredDerivedFilesRebuild(
225 const IcingSearchEngineVersionProto& prev_version_proto,
226 const IcingSearchEngineVersionProto& curr_version_proto) {
227 // 1. Do version check using version and max_version numbers
228 if (ShouldRebuildDerivedFiles(GetVersionInfoFromProto(prev_version_proto),
229 curr_version_proto.version())) {
230 return DerivedFilesRebuildResult(
231 /*needs_document_store_derived_files_rebuild=*/true,
232 /*needs_schema_store_derived_files_rebuild=*/true,
233 /*needs_term_index_rebuild=*/true,
234 /*needs_integer_index_rebuild=*/true,
235 /*needs_qualified_id_join_index_rebuild=*/true,
236 /*needs_embedding_index_rebuild=*/true);
237 }
238
239 // 2. Compare the previous enabled features with the current enabled features
240 // and rebuild if there are differences.
241 std::unordered_set<IcingSearchEngineFeatureInfoProto::FlaggedFeatureType>
242 prev_features;
243 for (const auto& feature : prev_version_proto.enabled_features()) {
244 prev_features.insert(feature.feature_type());
245 }
246 std::unordered_set<IcingSearchEngineFeatureInfoProto::FlaggedFeatureType>
247 curr_features;
248 for (const auto& feature : curr_version_proto.enabled_features()) {
249 curr_features.insert(feature.feature_type());
250 }
251 DerivedFilesRebuildResult result;
252 for (const auto& prev_feature : prev_features) {
253 // If there is an UNKNOWN feature in the previous feature set (note that we
254 // never use UNKNOWN when writing the version proto), it means that:
255 // - The previous version proto contains a feature enum that is only defined
256 // in a newer version.
257 // - We've now rolled back to an old version that doesn't understand this
258 // new enum value, and proto serialization defaults it to 0 (UNKNOWN).
259 // - In this case we need to rebuild everything.
260 if (prev_feature == IcingSearchEngineFeatureInfoProto::UNKNOWN) {
261 return DerivedFilesRebuildResult(
262 /*needs_document_store_derived_files_rebuild=*/true,
263 /*needs_schema_store_derived_files_rebuild=*/true,
264 /*needs_term_index_rebuild=*/true,
265 /*needs_integer_index_rebuild=*/true,
266 /*needs_qualified_id_join_index_rebuild=*/true,
267 /*needs_embedding_index_rebuild=*/true);
268 }
269 if (curr_features.find(prev_feature) == curr_features.end()) {
270 DerivedFilesRebuildResult required_rebuilds =
271 GetFeatureDerivedFilesRebuildResult(prev_feature);
272 result.CombineWithOtherRebuildResultOr(required_rebuilds);
273 }
274 }
275 for (const auto& curr_feature : curr_features) {
276 if (prev_features.find(curr_feature) == prev_features.end()) {
277 DerivedFilesRebuildResult required_rebuilds =
278 GetFeatureDerivedFilesRebuildResult(curr_feature);
279 result.CombineWithOtherRebuildResultOr(required_rebuilds);
280 }
281 }
282 return result;
283 }
284
ShouldRebuildDerivedFiles(const VersionInfo & existing_version_info,int32_t curr_version)285 bool ShouldRebuildDerivedFiles(const VersionInfo& existing_version_info,
286 int32_t curr_version) {
287 StateChange state_change =
288 GetVersionStateChange(existing_version_info, curr_version);
289 switch (state_change) {
290 case StateChange::kCompatible:
291 return false;
292 case StateChange::kUndetermined:
293 [[fallthrough]];
294 case StateChange::kRollBack:
295 [[fallthrough]];
296 case StateChange::kRollForward:
297 [[fallthrough]];
298 case StateChange::kVersionZeroRollForward:
299 [[fallthrough]];
300 case StateChange::kVersionZeroUpgrade:
301 return true;
302 case StateChange::kUpgrade:
303 break;
304 }
305
306 bool should_rebuild = false;
307 int32_t existing_version = existing_version_info.version;
308 while (existing_version < curr_version) {
309 switch (existing_version) {
310 case 1: {
311 // version 1 -> version 2 upgrade, no need to rebuild
312 break;
313 }
314 case 2: {
315 // version 2 -> version 3 upgrade, no need to rebuild
316 break;
317 }
318 case 3: {
319 // version 3 -> version 4 upgrade, no need to rebuild
320 break;
321 }
322 case 4: {
323 // version 4 -> version 5 upgrade, no need to rebuild
324 break;
325 }
326 default:
327 // This should not happen. Rebuild anyway if unsure.
328 should_rebuild |= true;
329 }
330 ++existing_version;
331 }
332 return should_rebuild;
333 }
334
GetFeatureDerivedFilesRebuildResult(IcingSearchEngineFeatureInfoProto::FlaggedFeatureType feature)335 DerivedFilesRebuildResult GetFeatureDerivedFilesRebuildResult(
336 IcingSearchEngineFeatureInfoProto::FlaggedFeatureType feature) {
337 switch (feature) {
338 case IcingSearchEngineFeatureInfoProto::FEATURE_SCORABLE_PROPERTIES: {
339 return DerivedFilesRebuildResult(
340 /*needs_document_store_derived_files_rebuild=*/true,
341 /*needs_schema_store_derived_files_rebuild=*/false,
342 /*needs_term_index_rebuild=*/false,
343 /*needs_integer_index_rebuild=*/false,
344 /*needs_qualified_id_join_index_rebuild=*/false,
345 /*needs_embedding_index_rebuild=*/false);
346 }
347 case IcingSearchEngineFeatureInfoProto::FEATURE_HAS_PROPERTY_OPERATOR: {
348 return DerivedFilesRebuildResult(
349 /*needs_document_store_derived_files_rebuild=*/false,
350 /*needs_schema_store_derived_files_rebuild=*/false,
351 /*needs_term_index_rebuild=*/true,
352 /*needs_integer_index_rebuild=*/false,
353 /*needs_qualified_id_join_index_rebuild=*/false,
354 /*needs_embedding_index_rebuild=*/false);
355 }
356 case IcingSearchEngineFeatureInfoProto::FEATURE_EMBEDDING_INDEX: {
357 return DerivedFilesRebuildResult(
358 /*needs_document_store_derived_files_rebuild=*/false,
359 /*needs_schema_store_derived_files_rebuild=*/false,
360 /*needs_term_index_rebuild=*/false,
361 /*needs_integer_index_rebuild=*/false,
362 /*needs_qualified_id_join_index_rebuild=*/false,
363 /*needs_embedding_index_rebuild=*/true);
364 }
365 case IcingSearchEngineFeatureInfoProto::FEATURE_EMBEDDING_QUANTIZATION: {
366 return DerivedFilesRebuildResult(
367 /*needs_document_store_derived_files_rebuild=*/false,
368 /*needs_schema_store_derived_files_rebuild=*/false,
369 /*needs_term_index_rebuild=*/false,
370 /*needs_integer_index_rebuild=*/false,
371 /*needs_qualified_id_join_index_rebuild=*/false,
372 /*needs_embedding_index_rebuild=*/true);
373 }
374 case IcingSearchEngineFeatureInfoProto::FEATURE_SCHEMA_DATABASE: {
375 // The schema database feature requires schema-store migration, which is
376 // done separately from derived files rebuild.
377 return DerivedFilesRebuildResult(
378 /*needs_document_store_derived_files_rebuild=*/false,
379 /*needs_schema_store_derived_files_rebuild=*/false,
380 /*needs_term_index_rebuild=*/false,
381 /*needs_integer_index_rebuild=*/false,
382 /*needs_qualified_id_join_index_rebuild=*/false,
383 /*needs_embedding_index_rebuild=*/false);
384 }
385 case IcingSearchEngineFeatureInfoProto::
386 FEATURE_QUALIFIED_ID_JOIN_INDEX_V3_AND_DELETE_PROPAGATE_FROM: {
387 return DerivedFilesRebuildResult(
388 /*needs_document_store_derived_files_rebuild=*/false,
389 /*needs_schema_store_derived_files_rebuild=*/false,
390 /*needs_term_index_rebuild=*/false,
391 /*needs_integer_index_rebuild=*/false,
392 /*needs_qualified_id_join_index_rebuild=*/true,
393 /*needs_embedding_index_rebuild=*/false);
394 }
395 case IcingSearchEngineFeatureInfoProto::UNKNOWN:
396 return DerivedFilesRebuildResult(
397 /*needs_document_store_derived_files_rebuild=*/true,
398 /*needs_schema_store_derived_files_rebuild=*/true,
399 /*needs_term_index_rebuild=*/true,
400 /*needs_integer_index_rebuild=*/true,
401 /*needs_qualified_id_join_index_rebuild=*/true,
402 /*needs_embedding_index_rebuild=*/true);
403 }
404 }
405
SchemaDatabaseMigrationRequired(const IcingSearchEngineVersionProto & prev_version_proto)406 bool SchemaDatabaseMigrationRequired(
407 const IcingSearchEngineVersionProto& prev_version_proto) {
408 if (prev_version_proto.version() < kSchemaDatabaseVersion) {
409 return true;
410 }
411 for (const auto& feature : prev_version_proto.enabled_features()) {
412 // The schema database feature was enabled in the previous version, so no
413 // need to migrate.
414 if (feature.feature_type() ==
415 IcingSearchEngineFeatureInfoProto::FEATURE_SCHEMA_DATABASE) {
416 return false;
417 }
418 }
419 return true;
420 }
421
GetFeatureInfoProto(IcingSearchEngineFeatureInfoProto::FlaggedFeatureType feature)422 IcingSearchEngineFeatureInfoProto GetFeatureInfoProto(
423 IcingSearchEngineFeatureInfoProto::FlaggedFeatureType feature) {
424 IcingSearchEngineFeatureInfoProto info;
425 info.set_feature_type(feature);
426
427 DerivedFilesRebuildResult result =
428 GetFeatureDerivedFilesRebuildResult(feature);
429 info.set_needs_document_store_rebuild(
430 result.needs_document_store_derived_files_rebuild);
431 info.set_needs_schema_store_rebuild(
432 result.needs_schema_store_derived_files_rebuild);
433 info.set_needs_term_index_rebuild(result.needs_term_index_rebuild);
434 info.set_needs_integer_index_rebuild(result.needs_integer_index_rebuild);
435 info.set_needs_qualified_id_join_index_rebuild(
436 result.needs_qualified_id_join_index_rebuild);
437 info.set_needs_embedding_index_rebuild(result.needs_embedding_index_rebuild);
438
439 return info;
440 }
441
AddEnabledFeatures(const IcingSearchEngineOptions & options,IcingSearchEngineVersionProto * version_proto)442 void AddEnabledFeatures(const IcingSearchEngineOptions& options,
443 IcingSearchEngineVersionProto* version_proto) {
444 auto* enabled_features = version_proto->mutable_enabled_features();
445 // HasPropertyOperator feature
446 if (options.build_property_existence_metadata_hits()) {
447 enabled_features->Add(GetFeatureInfoProto(
448 IcingSearchEngineFeatureInfoProto::FEATURE_HAS_PROPERTY_OPERATOR));
449 }
450 // EmbeddingIndex feature
451 if (options.enable_embedding_index()) {
452 enabled_features->Add(GetFeatureInfoProto(
453 IcingSearchEngineFeatureInfoProto::FEATURE_EMBEDDING_INDEX));
454 }
455 if (options.enable_scorable_properties()) {
456 enabled_features->Add(GetFeatureInfoProto(
457 IcingSearchEngineFeatureInfoProto::FEATURE_SCORABLE_PROPERTIES));
458 }
459 // EmbeddingQuantization feature
460 if (options.enable_embedding_quantization()) {
461 enabled_features->Add(GetFeatureInfoProto(
462 IcingSearchEngineFeatureInfoProto::FEATURE_EMBEDDING_QUANTIZATION));
463 }
464 // SchemaDatabase feature
465 if (options.enable_schema_database()) {
466 enabled_features->Add(GetFeatureInfoProto(
467 IcingSearchEngineFeatureInfoProto::FEATURE_SCHEMA_DATABASE));
468 }
469 // QualifiedIdJoinIndex V3 and delete propagation type PROPAGATE_FROM feature
470 if (options.enable_qualified_id_join_index_v3_and_delete_propagate_from()) {
471 enabled_features->Add(GetFeatureInfoProto(
472 IcingSearchEngineFeatureInfoProto::
473 FEATURE_QUALIFIED_ID_JOIN_INDEX_V3_AND_DELETE_PROPAGATE_FROM));
474 }
475 }
476
477 } // namespace version_util
478
479 } // namespace lib
480 } // namespace icing
481