xref: /aosp_15_r20/external/icing/icing/query/query-processor_benchmark.cc (revision 8b6cd535a057e39b3b86660c4aa06c99747c2136)
1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include <cstdint>
16 #include <limits>
17 #include <memory>
18 #include <string>
19 #include <utility>
20 
21 #include "icing/text_classifier/lib3/utils/base/statusor.h"
22 #include "testing/base/public/benchmark.h"
23 #include "third_party/absl/flags/flag.h"
24 #include "icing/absl_ports/str_cat.h"
25 #include "icing/document-builder.h"
26 #include "icing/feature-flags.h"
27 #include "icing/file/filesystem.h"
28 #include "icing/file/portable-file-backed-proto-log.h"
29 #include "icing/index/embed/embedding-index.h"
30 #include "icing/index/index.h"
31 #include "icing/index/numeric/dummy-numeric-index.h"
32 #include "icing/legacy/index/icing-filesystem.h"
33 #include "icing/proto/schema.pb.h"
34 #include "icing/proto/search.pb.h"
35 #include "icing/proto/term.pb.h"
36 #include "icing/query/query-processor.h"
37 #include "icing/query/query-results.h"
38 #include "icing/schema/schema-store.h"
39 #include "icing/schema/section.h"
40 #include "icing/store/document-id.h"
41 #include "icing/store/document-store.h"
42 #include "icing/testing/common-matchers.h"
43 #include "icing/testing/test-data.h"
44 #include "icing/testing/test-feature-flags.h"
45 #include "icing/testing/tmp-directory.h"
46 #include "icing/tokenization/language-segmenter-factory.h"
47 #include "icing/tokenization/language-segmenter.h"
48 #include "icing/transform/normalizer-factory.h"
49 #include "icing/transform/normalizer.h"
50 #include "icing/util/clock.h"
51 #include "icing/util/icu-data-file-helper.h"
52 #include "icing/util/logging.h"
53 #include "unicode/uloc.h"
54 
55 // Run on a Linux workstation:
56 //    $ blaze build -c opt --dynamic_mode=off --copt=-gmlt
57 //    //icing/query:query-processor_benchmark
58 //
59 //    $ blaze-bin/icing/query/query-processor_benchmark
60 //    --benchmark_filter=all
61 //
62 // Run on an Android device:
63 //    Make target //icing/tokenization:language-segmenter depend on
64 //    //third_party/icu
65 //
66 //    Make target //icing/transform:normalizer depend on
67 //    //third_party/icu
68 //
69 //    $ blaze build --copt="-DGOOGLE_COMMANDLINEFLAGS_FULL_API=1"
70 //    --config=android_arm64 -c opt --dynamic_mode=off --copt=-gmlt
71 //    //icing/query:query-processor_benchmark
72 //
73 //    $ adb push blaze-bin/icing/query/query-processor_benchmark
74 //    /data/local/tmp/
75 //
76 //    $ adb shell /data/local/tmp/query-processor_benchmark
77 //    --benchmark_filter=all --adb
78 
79 // Flag to tell the benchmark that it'll be run on an Android device via adb,
80 // the benchmark will set up data files accordingly.
81 ABSL_FLAG(bool, adb, false, "run benchmark via ADB on an Android device");
82 
83 namespace icing {
84 namespace lib {
85 
86 namespace {
87 
AddTokenToIndex(Index * index,DocumentId document_id,SectionId section_id,TermMatchType::Code term_match_type,const std::string & token)88 void AddTokenToIndex(Index* index, DocumentId document_id, SectionId section_id,
89                      TermMatchType::Code term_match_type,
90                      const std::string& token) {
91   Index::Editor editor =
92       index->Edit(document_id, section_id, /*namespace_id=*/0);
93   ICING_ASSERT_OK(editor.BufferTerm(token, term_match_type));
94   ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
95 }
96 
CreateIndex(const IcingFilesystem & icing_filesystem,const Filesystem & filesystem,const std::string & index_dir)97 std::unique_ptr<Index> CreateIndex(const IcingFilesystem& icing_filesystem,
98                                    const Filesystem& filesystem,
99                                    const std::string& index_dir) {
100   Index::Options options(index_dir, /*index_merge_size=*/1024 * 1024 * 10,
101                          /*lite_index_sort_at_indexing=*/true,
102                          /*lite_index_sort_size=*/1024 * 8);
103   return Index::Create(options, &filesystem, &icing_filesystem).ValueOrDie();
104 }
105 
CreateNormalizer()106 std::unique_ptr<Normalizer> CreateNormalizer() {
107   return normalizer_factory::Create(
108 
109              /*max_term_byte_size=*/std::numeric_limits<int>::max())
110       .ValueOrDie();
111 }
112 
CreateDocumentStore(const Filesystem * filesystem,const std::string & base_dir,const Clock * clock,const SchemaStore * schema_store,const FeatureFlags & feature_flags)113 libtextclassifier3::StatusOr<DocumentStore::CreateResult> CreateDocumentStore(
114     const Filesystem* filesystem, const std::string& base_dir,
115     const Clock* clock, const SchemaStore* schema_store,
116     const FeatureFlags& feature_flags) {
117   return DocumentStore::Create(
118       filesystem, base_dir, clock, schema_store, &feature_flags,
119       /*force_recovery_and_revalidate_documents=*/false,
120       /*pre_mapping_fbv=*/false, /*use_persistent_hash_map=*/true,
121       PortableFileBackedProtoLog<DocumentWrapper>::kDefaultCompressionLevel,
122       /*initialize_stats=*/nullptr);
123 }
124 
BM_QueryOneTerm(benchmark::State & state)125 void BM_QueryOneTerm(benchmark::State& state) {
126   bool run_via_adb = absl::GetFlag(FLAGS_adb);
127   if (!run_via_adb) {
128     ICING_ASSERT_OK(icu_data_file_helper::SetUpIcuDataFile(
129         GetTestFilePath("icing/icu.dat")));
130   }
131 
132   FeatureFlags feature_flags = GetTestFeatureFlags();
133   IcingFilesystem icing_filesystem;
134   Filesystem filesystem;
135   const std::string base_dir = GetTestTempDir() + "/query_processor_benchmark";
136   const std::string index_dir = base_dir + "/index";
137   const std::string numeric_index_dir = base_dir + "/numeric_index";
138   const std::string embedding_index_dir = base_dir + "/embedding_index";
139   const std::string schema_dir = base_dir + "/schema";
140   const std::string doc_store_dir = base_dir + "/store";
141 
142   filesystem.DeleteDirectoryRecursively(base_dir.c_str());
143   if (!filesystem.CreateDirectoryRecursively(index_dir.c_str()) ||
144       !filesystem.CreateDirectoryRecursively(schema_dir.c_str()) ||
145       !filesystem.CreateDirectoryRecursively(doc_store_dir.c_str())) {
146     ICING_LOG(ERROR) << "Failed to create test directories";
147   }
148 
149   std::unique_ptr<Index> index =
150       CreateIndex(icing_filesystem, filesystem, index_dir);
151   // TODO(b/249829533): switch to use persistent numeric index.
152   ICING_ASSERT_OK_AND_ASSIGN(
153       auto numeric_index,
154       DummyNumericIndex<int64_t>::Create(filesystem, numeric_index_dir));
155 
156   language_segmenter_factory::SegmenterOptions options(ULOC_US);
157   std::unique_ptr<LanguageSegmenter> language_segmenter =
158       language_segmenter_factory::Create(std::move(options)).ValueOrDie();
159   std::unique_ptr<Normalizer> normalizer = CreateNormalizer();
160 
161   SchemaProto schema;
162   auto type_config = schema.add_types();
163   type_config->set_schema_type("type1");
164   Clock clock;
165   ICING_ASSERT_OK_AND_ASSIGN(
166       std::unique_ptr<SchemaStore> schema_store,
167       SchemaStore::Create(&filesystem, schema_dir, &clock, &feature_flags));
168   ICING_ASSERT_OK(schema_store->SetSchema(
169       schema, /*ignore_errors_and_delete_documents=*/false,
170       /*allow_circular_schema_definitions=*/false));
171 
172   DocumentStore::CreateResult create_result =
173       CreateDocumentStore(&filesystem, doc_store_dir, &clock,
174                           schema_store.get(), feature_flags)
175           .ValueOrDie();
176   std::unique_ptr<DocumentStore> document_store =
177       std::move(create_result.document_store);
178 
179   ICING_ASSERT_OK_AND_ASSIGN(
180       auto embedding_index,
181       EmbeddingIndex::Create(&filesystem, embedding_index_dir, &clock,
182                              &feature_flags));
183 
184   DocumentId document_id = document_store
185                                ->Put(DocumentBuilder()
186                                          .SetKey("icing", "type1")
187                                          .SetSchema("type1")
188                                          .Build())
189                                .ValueOrDie()
190                                .new_document_id;
191 
192   const std::string input_string(state.range(0), 'A');
193   AddTokenToIndex(index.get(), document_id, /*section_id=*/0,
194                   TermMatchType::EXACT_ONLY, input_string);
195 
196   ICING_ASSERT_OK_AND_ASSIGN(
197       std::unique_ptr<QueryProcessor> query_processor,
198       QueryProcessor::Create(
199           index.get(), numeric_index.get(), embedding_index.get(),
200           language_segmenter.get(), normalizer.get(), document_store.get(),
201           schema_store.get(), /*join_children_fetcher=*/nullptr, &clock,
202           &feature_flags));
203 
204   SearchSpecProto search_spec;
205   search_spec.set_query(input_string);
206   search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
207 
208   for (auto _ : state) {
209     QueryResults results =
210         query_processor
211             ->ParseSearch(search_spec,
212                           ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
213                           clock.GetSystemTimeMilliseconds())
214             .ValueOrDie();
215     while (results.root_iterator->Advance().ok()) {
216       results.root_iterator->doc_hit_info();
217     }
218   }
219 
220   // Destroy document store and schema store before the whole directory is
221   // removed because they persist data in destructor.
222   document_store.reset();
223   schema_store.reset();
224   filesystem.DeleteDirectoryRecursively(base_dir.c_str());
225 }
226 BENCHMARK(BM_QueryOneTerm)
227     // The testing numbers are in an ascending order with a fixed interval, that
228     // way we can tell if the performance increments are linear, exponential, or
229     // something else.
230     ->Arg(1000)
231     ->Arg(3000)
232     ->Arg(5000)
233     ->Arg(7000)
234     ->Arg(9000)
235     ->Arg(11000)
236     ->Arg(13000)
237     ->Arg(15000)
238     ->Arg(17000)
239     ->Arg(19000)
240     ->Arg(21000)
241     ->Arg(23000)
242     ->Arg(25000)
243     ->Arg(27000)
244     ->Arg(29000)
245     ->Arg(31000)
246     ->Arg(33000)
247     ->Arg(35000)
248     ->Arg(37000)
249     ->Arg(39000)
250     ->Arg(41000)
251     ->Arg(43000)
252     ->Arg(45000)
253     ->Arg(47000)
254     ->Arg(49000)
255     ->Arg(64000)
256     ->Arg(128000)
257     ->Arg(256000)
258     ->Arg(384000)
259     ->Arg(512000)
260     ->Arg(1024000)
261     ->Arg(2048000)
262     ->Arg(4096000);
263 
BM_QueryFiveTerms(benchmark::State & state)264 void BM_QueryFiveTerms(benchmark::State& state) {
265   bool run_via_adb = absl::GetFlag(FLAGS_adb);
266   if (!run_via_adb) {
267     ICING_ASSERT_OK(icu_data_file_helper::SetUpIcuDataFile(
268         GetTestFilePath("icing/icu.dat")));
269   }
270 
271   FeatureFlags feature_flags = GetTestFeatureFlags();
272   IcingFilesystem icing_filesystem;
273   Filesystem filesystem;
274   const std::string base_dir = GetTestTempDir() + "/query_processor_benchmark";
275   const std::string index_dir = base_dir + "/index";
276   const std::string numeric_index_dir = base_dir + "/numeric_index";
277   const std::string embedding_index_dir = base_dir + "/embedding_index";
278   const std::string schema_dir = base_dir + "/schema";
279   const std::string doc_store_dir = base_dir + "/store";
280 
281   filesystem.DeleteDirectoryRecursively(base_dir.c_str());
282   if (!filesystem.CreateDirectoryRecursively(index_dir.c_str()) ||
283       !filesystem.CreateDirectoryRecursively(schema_dir.c_str()) ||
284       !filesystem.CreateDirectoryRecursively(doc_store_dir.c_str())) {
285     ICING_LOG(ERROR) << "Failed to create test directories";
286   }
287 
288   std::unique_ptr<Index> index =
289       CreateIndex(icing_filesystem, filesystem, index_dir);
290   // TODO(b/249829533): switch to use persistent numeric index.
291   ICING_ASSERT_OK_AND_ASSIGN(
292       auto numeric_index,
293       DummyNumericIndex<int64_t>::Create(filesystem, numeric_index_dir));
294 
295   language_segmenter_factory::SegmenterOptions options(ULOC_US);
296   std::unique_ptr<LanguageSegmenter> language_segmenter =
297       language_segmenter_factory::Create(std::move(options)).ValueOrDie();
298   std::unique_ptr<Normalizer> normalizer = CreateNormalizer();
299 
300   SchemaProto schema;
301   auto type_config = schema.add_types();
302   type_config->set_schema_type("type1");
303   Clock clock;
304   ICING_ASSERT_OK_AND_ASSIGN(
305       std::unique_ptr<SchemaStore> schema_store,
306       SchemaStore::Create(&filesystem, schema_dir, &clock, &feature_flags));
307   ICING_ASSERT_OK(schema_store->SetSchema(
308       schema, /*ignore_errors_and_delete_documents=*/false,
309       /*allow_circular_schema_definitions=*/false));
310 
311   DocumentStore::CreateResult create_result =
312       CreateDocumentStore(&filesystem, doc_store_dir, &clock,
313                           schema_store.get(), feature_flags)
314           .ValueOrDie();
315   std::unique_ptr<DocumentStore> document_store =
316       std::move(create_result.document_store);
317 
318   ICING_ASSERT_OK_AND_ASSIGN(
319       auto embedding_index,
320       EmbeddingIndex::Create(&filesystem, embedding_index_dir, &clock,
321                              &feature_flags));
322 
323   DocumentId document_id = document_store
324                                ->Put(DocumentBuilder()
325                                          .SetKey("icing", "type1")
326                                          .SetSchema("type1")
327                                          .Build())
328                                .ValueOrDie()
329                                .new_document_id;
330 
331   int term_length = state.range(0) / 5;
332 
333   const std::string input_string_a(term_length, 'A');
334   const std::string input_string_b(term_length, 'B');
335   const std::string input_string_c(term_length, 'C');
336   const std::string input_string_d(term_length, 'D');
337   const std::string input_string_e(term_length, 'E');
338   AddTokenToIndex(index.get(), document_id, /*section_id=*/0,
339                   TermMatchType::EXACT_ONLY, input_string_a);
340   AddTokenToIndex(index.get(), document_id, /*section_id=*/1,
341                   TermMatchType::EXACT_ONLY, input_string_b);
342   AddTokenToIndex(index.get(), document_id, /*section_id=*/2,
343                   TermMatchType::EXACT_ONLY, input_string_c);
344   AddTokenToIndex(index.get(), document_id, /*section_id=*/3,
345                   TermMatchType::EXACT_ONLY, input_string_d);
346   AddTokenToIndex(index.get(), document_id, /*section_id=*/4,
347                   TermMatchType::EXACT_ONLY, input_string_e);
348 
349   ICING_ASSERT_OK_AND_ASSIGN(
350       std::unique_ptr<QueryProcessor> query_processor,
351       QueryProcessor::Create(
352           index.get(), numeric_index.get(), embedding_index.get(),
353           language_segmenter.get(), normalizer.get(), document_store.get(),
354           schema_store.get(), /*join_children_fetcher=*/nullptr, &clock,
355           &feature_flags));
356 
357   const std::string query_string = absl_ports::StrCat(
358       input_string_a, " ", input_string_b, " ", input_string_c, " ",
359       input_string_d, " ", input_string_e);
360 
361   SearchSpecProto search_spec;
362   search_spec.set_query(query_string);
363   search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
364 
365   for (auto _ : state) {
366     QueryResults results =
367         query_processor
368             ->ParseSearch(search_spec,
369                           ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
370                           clock.GetSystemTimeMilliseconds())
371             .ValueOrDie();
372     while (results.root_iterator->Advance().ok()) {
373       results.root_iterator->doc_hit_info();
374     }
375   }
376 
377   // Destroy document store and schema store before the whole directory is
378   // removed because they persist data in destructor.
379   document_store.reset();
380   schema_store.reset();
381   filesystem.DeleteDirectoryRecursively(base_dir.c_str());
382 }
383 BENCHMARK(BM_QueryFiveTerms)
384     // The testing numbers are in an ascending order with a fixed interval, that
385     // way we can tell if the performance increments are linear, exponential, or
386     // something else.
387     ->Arg(1000)
388     ->Arg(3000)
389     ->Arg(5000)
390     ->Arg(7000)
391     ->Arg(9000)
392     ->Arg(11000)
393     ->Arg(13000)
394     ->Arg(15000)
395     ->Arg(17000)
396     ->Arg(19000)
397     ->Arg(21000)
398     ->Arg(23000)
399     ->Arg(25000)
400     ->Arg(27000)
401     ->Arg(29000)
402     ->Arg(31000)
403     ->Arg(33000)
404     ->Arg(35000)
405     ->Arg(37000)
406     ->Arg(39000)
407     ->Arg(41000)
408     ->Arg(43000)
409     ->Arg(45000)
410     ->Arg(47000)
411     ->Arg(49000)
412     ->Arg(64000)
413     ->Arg(128000)
414     ->Arg(256000)
415     ->Arg(384000)
416     ->Arg(512000)
417     ->Arg(1024000)
418     ->Arg(2048000)
419     ->Arg(4096000);
420 
BM_QueryDiacriticTerm(benchmark::State & state)421 void BM_QueryDiacriticTerm(benchmark::State& state) {
422   bool run_via_adb = absl::GetFlag(FLAGS_adb);
423   if (!run_via_adb) {
424     ICING_ASSERT_OK(icu_data_file_helper::SetUpIcuDataFile(
425         GetTestFilePath("icing/icu.dat")));
426   }
427 
428   FeatureFlags feature_flags = GetTestFeatureFlags();
429   IcingFilesystem icing_filesystem;
430   Filesystem filesystem;
431   const std::string base_dir = GetTestTempDir() + "/query_processor_benchmark";
432   const std::string index_dir = base_dir + "/index";
433   const std::string numeric_index_dir = base_dir + "/numeric_index";
434   const std::string embedding_index_dir = base_dir + "/embedding_index";
435   const std::string schema_dir = base_dir + "/schema";
436   const std::string doc_store_dir = base_dir + "/store";
437 
438   filesystem.DeleteDirectoryRecursively(base_dir.c_str());
439   if (!filesystem.CreateDirectoryRecursively(index_dir.c_str()) ||
440       !filesystem.CreateDirectoryRecursively(schema_dir.c_str()) ||
441       !filesystem.CreateDirectoryRecursively(doc_store_dir.c_str())) {
442     ICING_LOG(ERROR) << "Failed to create test directories";
443   }
444 
445   std::unique_ptr<Index> index =
446       CreateIndex(icing_filesystem, filesystem, index_dir);
447   // TODO(b/249829533): switch to use persistent numeric index.
448   ICING_ASSERT_OK_AND_ASSIGN(
449       auto numeric_index,
450       DummyNumericIndex<int64_t>::Create(filesystem, numeric_index_dir));
451 
452   language_segmenter_factory::SegmenterOptions options(ULOC_US);
453   std::unique_ptr<LanguageSegmenter> language_segmenter =
454       language_segmenter_factory::Create(std::move(options)).ValueOrDie();
455   std::unique_ptr<Normalizer> normalizer = CreateNormalizer();
456 
457   SchemaProto schema;
458   auto type_config = schema.add_types();
459   type_config->set_schema_type("type1");
460   Clock clock;
461   ICING_ASSERT_OK_AND_ASSIGN(
462       std::unique_ptr<SchemaStore> schema_store,
463       SchemaStore::Create(&filesystem, schema_dir, &clock, &feature_flags));
464   ICING_ASSERT_OK(schema_store->SetSchema(
465       schema, /*ignore_errors_and_delete_documents=*/false,
466       /*allow_circular_schema_definitions=*/false));
467 
468   DocumentStore::CreateResult create_result =
469       CreateDocumentStore(&filesystem, doc_store_dir, &clock,
470                           schema_store.get(), feature_flags)
471           .ValueOrDie();
472   std::unique_ptr<DocumentStore> document_store =
473       std::move(create_result.document_store);
474 
475   ICING_ASSERT_OK_AND_ASSIGN(
476       auto embedding_index,
477       EmbeddingIndex::Create(&filesystem, embedding_index_dir, &clock,
478                              &feature_flags));
479 
480   DocumentId document_id = document_store
481                                ->Put(DocumentBuilder()
482                                          .SetKey("icing", "type1")
483                                          .SetSchema("type1")
484                                          .Build())
485                                .ValueOrDie()
486                                .new_document_id;
487 
488   std::string input_string;
489   while (input_string.length() < state.range(0)) {
490     input_string.append("àáâãā");
491   }
492   AddTokenToIndex(index.get(), document_id, /*section_id=*/0,
493                   TermMatchType::EXACT_ONLY, input_string);
494 
495   ICING_ASSERT_OK_AND_ASSIGN(
496       std::unique_ptr<QueryProcessor> query_processor,
497       QueryProcessor::Create(
498           index.get(), numeric_index.get(), embedding_index.get(),
499           language_segmenter.get(), normalizer.get(), document_store.get(),
500           schema_store.get(), /*join_children_fetcher=*/nullptr, &clock,
501           &feature_flags));
502 
503   SearchSpecProto search_spec;
504   search_spec.set_query(input_string);
505   search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
506 
507   for (auto _ : state) {
508     QueryResults results =
509         query_processor
510             ->ParseSearch(search_spec,
511                           ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
512                           clock.GetSystemTimeMilliseconds())
513             .ValueOrDie();
514     while (results.root_iterator->Advance().ok()) {
515       results.root_iterator->doc_hit_info();
516     }
517   }
518 
519   // Destroy document store and schema store before the whole directory is
520   // removed because they persist data in destructor.
521   document_store.reset();
522   schema_store.reset();
523   filesystem.DeleteDirectoryRecursively(base_dir.c_str());
524 }
525 BENCHMARK(BM_QueryDiacriticTerm)
526     // The testing numbers are in an ascending order with a fixed interval, that
527     // way we can tell if the performance increments are linear, exponential, or
528     // something else.
529     ->Arg(1000)
530     ->Arg(3000)
531     ->Arg(5000)
532     ->Arg(7000)
533     ->Arg(9000)
534     ->Arg(11000)
535     ->Arg(13000)
536     ->Arg(15000)
537     ->Arg(17000)
538     ->Arg(19000)
539     ->Arg(21000)
540     ->Arg(23000)
541     ->Arg(25000)
542     ->Arg(27000)
543     ->Arg(29000)
544     ->Arg(31000)
545     ->Arg(33000)
546     ->Arg(35000)
547     ->Arg(37000)
548     ->Arg(39000)
549     ->Arg(41000)
550     ->Arg(43000)
551     ->Arg(45000)
552     ->Arg(47000)
553     ->Arg(49000)
554     ->Arg(64000)
555     ->Arg(128000)
556     ->Arg(256000)
557     ->Arg(384000)
558     ->Arg(512000)
559     ->Arg(1024000)
560     ->Arg(2048000)
561     ->Arg(4096000);
562 
BM_QueryHiragana(benchmark::State & state)563 void BM_QueryHiragana(benchmark::State& state) {
564   bool run_via_adb = absl::GetFlag(FLAGS_adb);
565   if (!run_via_adb) {
566     ICING_ASSERT_OK(icu_data_file_helper::SetUpIcuDataFile(
567         GetTestFilePath("icing/icu.dat")));
568   }
569 
570   FeatureFlags feature_flags = GetTestFeatureFlags();
571   IcingFilesystem icing_filesystem;
572   Filesystem filesystem;
573   const std::string base_dir = GetTestTempDir() + "/query_processor_benchmark";
574   const std::string index_dir = base_dir + "/index";
575   const std::string numeric_index_dir = base_dir + "/numeric_index";
576   const std::string embedding_index_dir = base_dir + "/embedding_index";
577   const std::string schema_dir = base_dir + "/schema";
578   const std::string doc_store_dir = base_dir + "/store";
579 
580   filesystem.DeleteDirectoryRecursively(base_dir.c_str());
581   if (!filesystem.CreateDirectoryRecursively(index_dir.c_str()) ||
582       !filesystem.CreateDirectoryRecursively(schema_dir.c_str()) ||
583       !filesystem.CreateDirectoryRecursively(doc_store_dir.c_str())) {
584     ICING_LOG(ERROR) << "Failed to create test directories";
585   }
586 
587   std::unique_ptr<Index> index =
588       CreateIndex(icing_filesystem, filesystem, index_dir);
589   // TODO(b/249829533): switch to use persistent numeric index.
590   ICING_ASSERT_OK_AND_ASSIGN(
591       auto numeric_index,
592       DummyNumericIndex<int64_t>::Create(filesystem, numeric_index_dir));
593 
594   language_segmenter_factory::SegmenterOptions options(ULOC_US);
595   std::unique_ptr<LanguageSegmenter> language_segmenter =
596       language_segmenter_factory::Create(std::move(options)).ValueOrDie();
597   std::unique_ptr<Normalizer> normalizer = CreateNormalizer();
598 
599   SchemaProto schema;
600   auto type_config = schema.add_types();
601   type_config->set_schema_type("type1");
602   Clock clock;
603   ICING_ASSERT_OK_AND_ASSIGN(
604       std::unique_ptr<SchemaStore> schema_store,
605       SchemaStore::Create(&filesystem, schema_dir, &clock, &feature_flags));
606   ICING_ASSERT_OK(schema_store->SetSchema(
607       schema, /*ignore_errors_and_delete_documents=*/false,
608       /*allow_circular_schema_definitions=*/false));
609 
610   DocumentStore::CreateResult create_result =
611       CreateDocumentStore(&filesystem, doc_store_dir, &clock,
612                           schema_store.get(), feature_flags)
613           .ValueOrDie();
614   std::unique_ptr<DocumentStore> document_store =
615       std::move(create_result.document_store);
616 
617   ICING_ASSERT_OK_AND_ASSIGN(
618       auto embedding_index,
619       EmbeddingIndex::Create(&filesystem, embedding_index_dir, &clock,
620                              &feature_flags));
621 
622   DocumentId document_id = document_store
623                                ->Put(DocumentBuilder()
624                                          .SetKey("icing", "type1")
625                                          .SetSchema("type1")
626                                          .Build())
627                                .ValueOrDie()
628                                .new_document_id;
629 
630   std::string input_string;
631   while (input_string.length() < state.range(0)) {
632     input_string.append("あいうえお");
633   }
634   AddTokenToIndex(index.get(), document_id, /*section_id=*/0,
635                   TermMatchType::EXACT_ONLY, input_string);
636 
637   ICING_ASSERT_OK_AND_ASSIGN(
638       std::unique_ptr<QueryProcessor> query_processor,
639       QueryProcessor::Create(
640           index.get(), numeric_index.get(), embedding_index.get(),
641           language_segmenter.get(), normalizer.get(), document_store.get(),
642           schema_store.get(), /*join_children_fetcher=*/nullptr, &clock,
643           &feature_flags));
644 
645   SearchSpecProto search_spec;
646   search_spec.set_query(input_string);
647   search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
648 
649   for (auto _ : state) {
650     QueryResults results =
651         query_processor
652             ->ParseSearch(search_spec,
653                           ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
654                           clock.GetSystemTimeMilliseconds())
655             .ValueOrDie();
656     while (results.root_iterator->Advance().ok()) {
657       results.root_iterator->doc_hit_info();
658     }
659   }
660 
661   // Destroy document store and schema store before the whole directory is
662   // removed because they persist data in destructor.
663   document_store.reset();
664   schema_store.reset();
665   filesystem.DeleteDirectoryRecursively(base_dir.c_str());
666 }
667 BENCHMARK(BM_QueryHiragana)
668     // The testing numbers are in an ascending order with a fixed interval, that
669     // way we can tell if the performance increments are linear, exponential, or
670     // something else.
671     ->Arg(1000)
672     ->Arg(3000)
673     ->Arg(5000)
674     ->Arg(7000)
675     ->Arg(9000)
676     ->Arg(11000)
677     ->Arg(13000)
678     ->Arg(15000)
679     ->Arg(17000)
680     ->Arg(19000)
681     ->Arg(21000)
682     ->Arg(23000)
683     ->Arg(25000)
684     ->Arg(27000)
685     ->Arg(29000)
686     ->Arg(31000)
687     ->Arg(33000)
688     ->Arg(35000)
689     ->Arg(37000)
690     ->Arg(39000)
691     ->Arg(41000)
692     ->Arg(43000)
693     ->Arg(45000)
694     ->Arg(47000)
695     ->Arg(49000)
696     ->Arg(64000)
697     ->Arg(128000)
698     ->Arg(256000)
699     ->Arg(384000)
700     ->Arg(512000)
701     ->Arg(1024000)
702     ->Arg(2048000)
703     ->Arg(4096000);
704 }  // namespace
705 
706 }  // namespace lib
707 }  // namespace icing
708