xref: /aosp_15_r20/external/icing/icing/file/version-util.cc (revision 8b6cd535a057e39b3b86660c4aa06c99747c2136)
1 // Copyright (C) 2023 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "icing/file/version-util.h"
16 
17 #include <cstdint>
18 #include <memory>
19 #include <string>
20 #include <string_view>
21 #include <unordered_set>
22 #include <utility>
23 
24 #include "icing/text_classifier/lib3/utils/base/status.h"
25 #include "icing/text_classifier/lib3/utils/base/statusor.h"
26 #include "icing/absl_ports/canonical_errors.h"
27 #include "icing/absl_ports/str_cat.h"
28 #include "icing/file/file-backed-proto.h"
29 #include "icing/file/filesystem.h"
30 #include "icing/index/index.h"
31 #include "icing/proto/initialize.pb.h"
32 #include "icing/util/status-macros.h"
33 
34 namespace icing {
35 namespace lib {
36 
37 namespace version_util {
38 
39 namespace {
40 
ReadV1VersionInfo(const Filesystem & filesystem,const std::string & version_file_dir,const std::string & index_base_dir)41 libtextclassifier3::StatusOr<VersionInfo> ReadV1VersionInfo(
42     const Filesystem& filesystem, const std::string& version_file_dir,
43     const std::string& index_base_dir) {
44   // 1. Read the version info.
45   const std::string v1_version_filepath =
46       MakeVersionFilePath(version_file_dir, kVersionFilenameV1);
47   VersionInfo existing_version_info(-1, -1);
48   if (filesystem.FileExists(v1_version_filepath.c_str()) &&
49       !filesystem.PRead(v1_version_filepath.c_str(), &existing_version_info,
50                         sizeof(VersionInfo), /*offset=*/0)) {
51     return absl_ports::InternalError("Failed to read v1 version file");
52   }
53 
54   // 2. Check the Index magic to see if we're actually on version 0.
55   libtextclassifier3::StatusOr<int> existing_flash_index_magic =
56       Index::ReadFlashIndexMagic(&filesystem, index_base_dir);
57   if (!existing_flash_index_magic.ok()) {
58     if (absl_ports::IsNotFound(existing_flash_index_magic.status())) {
59       // Flash index magic doesn't exist. In this case, we're unable to
60       // determine the version change state correctly (regardless of the
61       // existence of the version file), so invalidate VersionInfo by setting
62       // version to -1, but still keep the max_version value read in step 1.
63       existing_version_info.version = -1;
64       return existing_version_info;
65     }
66     // Real error.
67     return std::move(existing_flash_index_magic).status();
68   }
69   if (existing_flash_index_magic.ValueOrDie() == kVersionZeroFlashIndexMagic) {
70     existing_version_info.version = 0;
71     if (existing_version_info.max_version == -1) {
72       existing_version_info.max_version = 0;
73     }
74   }
75 
76   return existing_version_info;
77 }
78 
ReadV2VersionInfo(const Filesystem & filesystem,const std::string & version_file_dir)79 libtextclassifier3::StatusOr<IcingSearchEngineVersionProto> ReadV2VersionInfo(
80     const Filesystem& filesystem, const std::string& version_file_dir) {
81   // Read the v2 version file. V2 version file stores the
82   // IcingSearchEngineVersionProto as a file-backed proto.
83   const std::string v2_version_filepath =
84       MakeVersionFilePath(version_file_dir, kVersionFilenameV2);
85   FileBackedProto<IcingSearchEngineVersionProto> v2_version_file(
86       filesystem, v2_version_filepath);
87   ICING_ASSIGN_OR_RETURN(const IcingSearchEngineVersionProto* v2_version_proto,
88                          v2_version_file.Read());
89 
90   return *v2_version_proto;
91 }
92 
93 }  // namespace
94 
ReadVersion(const Filesystem & filesystem,const std::string & version_file_dir,const std::string & index_base_dir)95 libtextclassifier3::StatusOr<IcingSearchEngineVersionProto> ReadVersion(
96     const Filesystem& filesystem, const std::string& version_file_dir,
97     const std::string& index_base_dir) {
98   // 1. Read the v1 version file
99   ICING_ASSIGN_OR_RETURN(
100       VersionInfo v1_version_info,
101       ReadV1VersionInfo(filesystem, version_file_dir, index_base_dir));
102   if (!v1_version_info.IsValid()) {
103     // This happens if IcingLib's state is invalid (e.g. flash index header file
104     // is missing). Return the invalid version numbers in this case.
105     IcingSearchEngineVersionProto version_proto;
106     version_proto.set_version(v1_version_info.version);
107     version_proto.set_max_version(v1_version_info.max_version);
108     return version_proto;
109   }
110 
111   // 2. Read the v2 version file
112   auto v2_version_proto = ReadV2VersionInfo(filesystem, version_file_dir);
113   if (!v2_version_proto.ok()) {
114     if (!absl_ports::IsNotFound(v2_version_proto.status())) {
115       // Real error.
116       return std::move(v2_version_proto).status();
117     }
118     // The v2 version file has not been written
119     IcingSearchEngineVersionProto version_proto;
120     if (v1_version_info.version < kFirstV2Version) {
121       // There are two scenarios for this case:
122       // 1. It's the first time that we're upgrading from a lower version to a
123       //    version >= kFirstV2Version.
124       //    - It's expected that the v2 version file has not been written yet in
125       //      this case and we return the v1 version numbers instead.
126       // 2. We're rolling forward from a version < kFirstV2Version, after
127       //    rolling back from a previous version >= kFirstV2Version, and for
128       //    some unknown reason we lost the v2 version file in the previous
129       //    version.
130       //    - e.g. version #4 -> version #1 -> version #4, but we lost the v2
131       //      file during version #1.
132       //    - This is a rollforward case, but it's still fine to return the v1
133       //      version number here as ShouldRebuildDerivedFiles can handle
134       //      rollforwards correctly.
135       version_proto.set_version(v1_version_info.version);
136       version_proto.set_max_version(v1_version_info.max_version);
137     } else {
138       // Something weird has happened. During last initialization we were
139       // already on a version >= kFirstV2Version, so the v2 version file
140       // should have been written.
141       // Return an invalid version number in this case and trigger rebuilding
142       // everything.
143       version_proto.set_version(-1);
144       version_proto.set_max_version(v1_version_info.max_version);
145     }
146     return version_proto;
147   }
148 
149   // 3. Check if versions match. If not, it means that we're rolling forward
150   // from a version < kFirstV2Version. In order to trigger rebuilding
151   // everything, we return an invalid version number in this case.
152   IcingSearchEngineVersionProto v2_version_proto_value =
153       std::move(v2_version_proto).ValueOrDie();
154   if (v1_version_info.version != v2_version_proto_value.version()) {
155     v2_version_proto_value.set_version(-1);
156     v2_version_proto_value.mutable_enabled_features()->Clear();
157   }
158 
159   return v2_version_proto_value;
160 }
161 
WriteV1Version(const Filesystem & filesystem,const std::string & version_file_dir,const VersionInfo & version_info)162 libtextclassifier3::Status WriteV1Version(const Filesystem& filesystem,
163                                           const std::string& version_file_dir,
164                                           const VersionInfo& version_info) {
165   ScopedFd scoped_fd(filesystem.OpenForWrite(
166       MakeVersionFilePath(version_file_dir, kVersionFilenameV1).c_str()));
167   if (!scoped_fd.is_valid() ||
168       !filesystem.PWrite(scoped_fd.get(), /*offset=*/0, &version_info,
169                          sizeof(VersionInfo)) ||
170       !filesystem.DataSync(scoped_fd.get())) {
171     return absl_ports::InternalError("Failed to write v1 version file");
172   }
173   return libtextclassifier3::Status::OK;
174 }
175 
WriteV2Version(const Filesystem & filesystem,const std::string & version_file_dir,std::unique_ptr<IcingSearchEngineVersionProto> version_proto)176 libtextclassifier3::Status WriteV2Version(
177     const Filesystem& filesystem, const std::string& version_file_dir,
178     std::unique_ptr<IcingSearchEngineVersionProto> version_proto) {
179   FileBackedProto<IcingSearchEngineVersionProto> v2_version_file(
180       filesystem, MakeVersionFilePath(version_file_dir, kVersionFilenameV2));
181   libtextclassifier3::Status v2_write_status =
182       v2_version_file.Write(std::move(version_proto));
183   if (!v2_write_status.ok()) {
184     return absl_ports::InternalError(absl_ports::StrCat(
185         "Failed to write v2 version file: ", v2_write_status.error_message()));
186   }
187   return libtextclassifier3::Status::OK;
188 }
189 
DiscardVersionFiles(const Filesystem & filesystem,std::string_view version_file_dir)190 libtextclassifier3::Status DiscardVersionFiles(
191     const Filesystem& filesystem, std::string_view version_file_dir) {
192   if (!filesystem.DeleteFile(
193           MakeVersionFilePath(version_file_dir, kVersionFilenameV1).c_str()) ||
194       !filesystem.DeleteFile(
195           MakeVersionFilePath(version_file_dir, kVersionFilenameV2).c_str())) {
196     return absl_ports::InternalError("Failed to discard version files");
197   }
198   return libtextclassifier3::Status::OK;
199 }
200 
GetVersionStateChange(const VersionInfo & existing_version_info,int32_t curr_version)201 StateChange GetVersionStateChange(const VersionInfo& existing_version_info,
202                                   int32_t curr_version) {
203   if (!existing_version_info.IsValid()) {
204     return StateChange::kUndetermined;
205   }
206 
207   if (existing_version_info.version == 0) {
208     return (existing_version_info.max_version == existing_version_info.version)
209                ? StateChange::kVersionZeroUpgrade
210                : StateChange::kVersionZeroRollForward;
211   }
212 
213   if (existing_version_info.version == curr_version) {
214     return StateChange::kCompatible;
215   } else if (existing_version_info.version > curr_version) {
216     return StateChange::kRollBack;
217   } else {  // existing_version_info.version < curr_version
218     return (existing_version_info.max_version == existing_version_info.version)
219                ? StateChange::kUpgrade
220                : StateChange::kRollForward;
221   }
222 }
223 
CalculateRequiredDerivedFilesRebuild(const IcingSearchEngineVersionProto & prev_version_proto,const IcingSearchEngineVersionProto & curr_version_proto)224 DerivedFilesRebuildResult CalculateRequiredDerivedFilesRebuild(
225     const IcingSearchEngineVersionProto& prev_version_proto,
226     const IcingSearchEngineVersionProto& curr_version_proto) {
227   // 1. Do version check using version and max_version numbers
228   if (ShouldRebuildDerivedFiles(GetVersionInfoFromProto(prev_version_proto),
229                                 curr_version_proto.version())) {
230     return DerivedFilesRebuildResult(
231         /*needs_document_store_derived_files_rebuild=*/true,
232         /*needs_schema_store_derived_files_rebuild=*/true,
233         /*needs_term_index_rebuild=*/true,
234         /*needs_integer_index_rebuild=*/true,
235         /*needs_qualified_id_join_index_rebuild=*/true,
236         /*needs_embedding_index_rebuild=*/true);
237   }
238 
239   // 2. Compare the previous enabled features with the current enabled features
240   // and rebuild if there are differences.
241   std::unordered_set<IcingSearchEngineFeatureInfoProto::FlaggedFeatureType>
242       prev_features;
243   for (const auto& feature : prev_version_proto.enabled_features()) {
244     prev_features.insert(feature.feature_type());
245   }
246   std::unordered_set<IcingSearchEngineFeatureInfoProto::FlaggedFeatureType>
247       curr_features;
248   for (const auto& feature : curr_version_proto.enabled_features()) {
249     curr_features.insert(feature.feature_type());
250   }
251   DerivedFilesRebuildResult result;
252   for (const auto& prev_feature : prev_features) {
253     // If there is an UNKNOWN feature in the previous feature set (note that we
254     // never use UNKNOWN  when writing the version proto), it means that:
255     // - The previous version proto contains a feature enum that is only defined
256     //   in a newer version.
257     // - We've now rolled back to an old version that doesn't understand this
258     //   new enum value, and proto serialization defaults it to 0 (UNKNOWN).
259     // - In this case we need to rebuild everything.
260     if (prev_feature == IcingSearchEngineFeatureInfoProto::UNKNOWN) {
261       return DerivedFilesRebuildResult(
262           /*needs_document_store_derived_files_rebuild=*/true,
263           /*needs_schema_store_derived_files_rebuild=*/true,
264           /*needs_term_index_rebuild=*/true,
265           /*needs_integer_index_rebuild=*/true,
266           /*needs_qualified_id_join_index_rebuild=*/true,
267           /*needs_embedding_index_rebuild=*/true);
268     }
269     if (curr_features.find(prev_feature) == curr_features.end()) {
270       DerivedFilesRebuildResult required_rebuilds =
271           GetFeatureDerivedFilesRebuildResult(prev_feature);
272       result.CombineWithOtherRebuildResultOr(required_rebuilds);
273     }
274   }
275   for (const auto& curr_feature : curr_features) {
276     if (prev_features.find(curr_feature) == prev_features.end()) {
277       DerivedFilesRebuildResult required_rebuilds =
278           GetFeatureDerivedFilesRebuildResult(curr_feature);
279       result.CombineWithOtherRebuildResultOr(required_rebuilds);
280     }
281   }
282   return result;
283 }
284 
ShouldRebuildDerivedFiles(const VersionInfo & existing_version_info,int32_t curr_version)285 bool ShouldRebuildDerivedFiles(const VersionInfo& existing_version_info,
286                                int32_t curr_version) {
287   StateChange state_change =
288       GetVersionStateChange(existing_version_info, curr_version);
289   switch (state_change) {
290     case StateChange::kCompatible:
291       return false;
292     case StateChange::kUndetermined:
293       [[fallthrough]];
294     case StateChange::kRollBack:
295       [[fallthrough]];
296     case StateChange::kRollForward:
297       [[fallthrough]];
298     case StateChange::kVersionZeroRollForward:
299       [[fallthrough]];
300     case StateChange::kVersionZeroUpgrade:
301       return true;
302     case StateChange::kUpgrade:
303       break;
304   }
305 
306   bool should_rebuild = false;
307   int32_t existing_version = existing_version_info.version;
308   while (existing_version < curr_version) {
309     switch (existing_version) {
310       case 1: {
311         // version 1 -> version 2 upgrade, no need to rebuild
312         break;
313       }
314       case 2: {
315         // version 2 -> version 3 upgrade, no need to rebuild
316         break;
317       }
318       case 3: {
319         // version 3 -> version 4 upgrade, no need to rebuild
320         break;
321       }
322       case 4: {
323         // version 4 -> version 5 upgrade, no need to rebuild
324         break;
325       }
326       default:
327         // This should not happen. Rebuild anyway if unsure.
328         should_rebuild |= true;
329     }
330     ++existing_version;
331   }
332   return should_rebuild;
333 }
334 
GetFeatureDerivedFilesRebuildResult(IcingSearchEngineFeatureInfoProto::FlaggedFeatureType feature)335 DerivedFilesRebuildResult GetFeatureDerivedFilesRebuildResult(
336     IcingSearchEngineFeatureInfoProto::FlaggedFeatureType feature) {
337   switch (feature) {
338     case IcingSearchEngineFeatureInfoProto::FEATURE_SCORABLE_PROPERTIES: {
339       return DerivedFilesRebuildResult(
340           /*needs_document_store_derived_files_rebuild=*/true,
341           /*needs_schema_store_derived_files_rebuild=*/false,
342           /*needs_term_index_rebuild=*/false,
343           /*needs_integer_index_rebuild=*/false,
344           /*needs_qualified_id_join_index_rebuild=*/false,
345           /*needs_embedding_index_rebuild=*/false);
346     }
347     case IcingSearchEngineFeatureInfoProto::FEATURE_HAS_PROPERTY_OPERATOR: {
348       return DerivedFilesRebuildResult(
349           /*needs_document_store_derived_files_rebuild=*/false,
350           /*needs_schema_store_derived_files_rebuild=*/false,
351           /*needs_term_index_rebuild=*/true,
352           /*needs_integer_index_rebuild=*/false,
353           /*needs_qualified_id_join_index_rebuild=*/false,
354           /*needs_embedding_index_rebuild=*/false);
355     }
356     case IcingSearchEngineFeatureInfoProto::FEATURE_EMBEDDING_INDEX: {
357       return DerivedFilesRebuildResult(
358           /*needs_document_store_derived_files_rebuild=*/false,
359           /*needs_schema_store_derived_files_rebuild=*/false,
360           /*needs_term_index_rebuild=*/false,
361           /*needs_integer_index_rebuild=*/false,
362           /*needs_qualified_id_join_index_rebuild=*/false,
363           /*needs_embedding_index_rebuild=*/true);
364     }
365     case IcingSearchEngineFeatureInfoProto::FEATURE_EMBEDDING_QUANTIZATION: {
366       return DerivedFilesRebuildResult(
367           /*needs_document_store_derived_files_rebuild=*/false,
368           /*needs_schema_store_derived_files_rebuild=*/false,
369           /*needs_term_index_rebuild=*/false,
370           /*needs_integer_index_rebuild=*/false,
371           /*needs_qualified_id_join_index_rebuild=*/false,
372           /*needs_embedding_index_rebuild=*/true);
373     }
374     case IcingSearchEngineFeatureInfoProto::FEATURE_SCHEMA_DATABASE: {
375       // The schema database feature requires schema-store migration, which is
376       // done separately from derived files rebuild.
377       return DerivedFilesRebuildResult(
378           /*needs_document_store_derived_files_rebuild=*/false,
379           /*needs_schema_store_derived_files_rebuild=*/false,
380           /*needs_term_index_rebuild=*/false,
381           /*needs_integer_index_rebuild=*/false,
382           /*needs_qualified_id_join_index_rebuild=*/false,
383           /*needs_embedding_index_rebuild=*/false);
384     }
385     case IcingSearchEngineFeatureInfoProto::
386         FEATURE_QUALIFIED_ID_JOIN_INDEX_V3_AND_DELETE_PROPAGATE_FROM: {
387       return DerivedFilesRebuildResult(
388           /*needs_document_store_derived_files_rebuild=*/false,
389           /*needs_schema_store_derived_files_rebuild=*/false,
390           /*needs_term_index_rebuild=*/false,
391           /*needs_integer_index_rebuild=*/false,
392           /*needs_qualified_id_join_index_rebuild=*/true,
393           /*needs_embedding_index_rebuild=*/false);
394     }
395     case IcingSearchEngineFeatureInfoProto::UNKNOWN:
396       return DerivedFilesRebuildResult(
397           /*needs_document_store_derived_files_rebuild=*/true,
398           /*needs_schema_store_derived_files_rebuild=*/true,
399           /*needs_term_index_rebuild=*/true,
400           /*needs_integer_index_rebuild=*/true,
401           /*needs_qualified_id_join_index_rebuild=*/true,
402           /*needs_embedding_index_rebuild=*/true);
403   }
404 }
405 
SchemaDatabaseMigrationRequired(const IcingSearchEngineVersionProto & prev_version_proto)406 bool SchemaDatabaseMigrationRequired(
407     const IcingSearchEngineVersionProto& prev_version_proto) {
408   if (prev_version_proto.version() < kSchemaDatabaseVersion) {
409     return true;
410   }
411   for (const auto& feature : prev_version_proto.enabled_features()) {
412     // The schema database feature was enabled in the previous version, so no
413     // need to migrate.
414     if (feature.feature_type() ==
415         IcingSearchEngineFeatureInfoProto::FEATURE_SCHEMA_DATABASE) {
416       return false;
417     }
418   }
419   return true;
420 }
421 
GetFeatureInfoProto(IcingSearchEngineFeatureInfoProto::FlaggedFeatureType feature)422 IcingSearchEngineFeatureInfoProto GetFeatureInfoProto(
423     IcingSearchEngineFeatureInfoProto::FlaggedFeatureType feature) {
424   IcingSearchEngineFeatureInfoProto info;
425   info.set_feature_type(feature);
426 
427   DerivedFilesRebuildResult result =
428       GetFeatureDerivedFilesRebuildResult(feature);
429   info.set_needs_document_store_rebuild(
430       result.needs_document_store_derived_files_rebuild);
431   info.set_needs_schema_store_rebuild(
432       result.needs_schema_store_derived_files_rebuild);
433   info.set_needs_term_index_rebuild(result.needs_term_index_rebuild);
434   info.set_needs_integer_index_rebuild(result.needs_integer_index_rebuild);
435   info.set_needs_qualified_id_join_index_rebuild(
436       result.needs_qualified_id_join_index_rebuild);
437   info.set_needs_embedding_index_rebuild(result.needs_embedding_index_rebuild);
438 
439   return info;
440 }
441 
AddEnabledFeatures(const IcingSearchEngineOptions & options,IcingSearchEngineVersionProto * version_proto)442 void AddEnabledFeatures(const IcingSearchEngineOptions& options,
443                         IcingSearchEngineVersionProto* version_proto) {
444   auto* enabled_features = version_proto->mutable_enabled_features();
445   // HasPropertyOperator feature
446   if (options.build_property_existence_metadata_hits()) {
447     enabled_features->Add(GetFeatureInfoProto(
448         IcingSearchEngineFeatureInfoProto::FEATURE_HAS_PROPERTY_OPERATOR));
449   }
450   // EmbeddingIndex feature
451   if (options.enable_embedding_index()) {
452     enabled_features->Add(GetFeatureInfoProto(
453         IcingSearchEngineFeatureInfoProto::FEATURE_EMBEDDING_INDEX));
454   }
455   if (options.enable_scorable_properties()) {
456     enabled_features->Add(GetFeatureInfoProto(
457         IcingSearchEngineFeatureInfoProto::FEATURE_SCORABLE_PROPERTIES));
458   }
459   // EmbeddingQuantization feature
460   if (options.enable_embedding_quantization()) {
461     enabled_features->Add(GetFeatureInfoProto(
462         IcingSearchEngineFeatureInfoProto::FEATURE_EMBEDDING_QUANTIZATION));
463   }
464   // SchemaDatabase feature
465   if (options.enable_schema_database()) {
466     enabled_features->Add(GetFeatureInfoProto(
467         IcingSearchEngineFeatureInfoProto::FEATURE_SCHEMA_DATABASE));
468   }
469   // QualifiedIdJoinIndex V3 and delete propagation type PROPAGATE_FROM feature
470   if (options.enable_qualified_id_join_index_v3_and_delete_propagate_from()) {
471     enabled_features->Add(GetFeatureInfoProto(
472         IcingSearchEngineFeatureInfoProto::
473             FEATURE_QUALIFIED_ID_JOIN_INDEX_V3_AND_DELETE_PROPAGATE_FROM));
474   }
475 }
476 
477 }  // namespace version_util
478 
479 }  // namespace lib
480 }  // namespace icing
481