1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include <cstdint>
16 #include <limits>
17 #include <memory>
18 #include <string>
19 #include <utility>
20
21 #include "icing/text_classifier/lib3/utils/base/statusor.h"
22 #include "testing/base/public/benchmark.h"
23 #include "third_party/absl/flags/flag.h"
24 #include "icing/absl_ports/str_cat.h"
25 #include "icing/document-builder.h"
26 #include "icing/feature-flags.h"
27 #include "icing/file/filesystem.h"
28 #include "icing/file/portable-file-backed-proto-log.h"
29 #include "icing/index/embed/embedding-index.h"
30 #include "icing/index/index.h"
31 #include "icing/index/numeric/dummy-numeric-index.h"
32 #include "icing/legacy/index/icing-filesystem.h"
33 #include "icing/proto/schema.pb.h"
34 #include "icing/proto/search.pb.h"
35 #include "icing/proto/term.pb.h"
36 #include "icing/query/query-processor.h"
37 #include "icing/query/query-results.h"
38 #include "icing/schema/schema-store.h"
39 #include "icing/schema/section.h"
40 #include "icing/store/document-id.h"
41 #include "icing/store/document-store.h"
42 #include "icing/testing/common-matchers.h"
43 #include "icing/testing/test-data.h"
44 #include "icing/testing/test-feature-flags.h"
45 #include "icing/testing/tmp-directory.h"
46 #include "icing/tokenization/language-segmenter-factory.h"
47 #include "icing/tokenization/language-segmenter.h"
48 #include "icing/transform/normalizer-factory.h"
49 #include "icing/transform/normalizer.h"
50 #include "icing/util/clock.h"
51 #include "icing/util/icu-data-file-helper.h"
52 #include "icing/util/logging.h"
53 #include "unicode/uloc.h"
54
55 // Run on a Linux workstation:
56 // $ blaze build -c opt --dynamic_mode=off --copt=-gmlt
57 // //icing/query:query-processor_benchmark
58 //
59 // $ blaze-bin/icing/query/query-processor_benchmark
60 // --benchmark_filter=all
61 //
62 // Run on an Android device:
63 // Make target //icing/tokenization:language-segmenter depend on
64 // //third_party/icu
65 //
66 // Make target //icing/transform:normalizer depend on
67 // //third_party/icu
68 //
69 // $ blaze build --copt="-DGOOGLE_COMMANDLINEFLAGS_FULL_API=1"
70 // --config=android_arm64 -c opt --dynamic_mode=off --copt=-gmlt
71 // //icing/query:query-processor_benchmark
72 //
73 // $ adb push blaze-bin/icing/query/query-processor_benchmark
74 // /data/local/tmp/
75 //
76 // $ adb shell /data/local/tmp/query-processor_benchmark
77 // --benchmark_filter=all --adb
78
79 // Flag to tell the benchmark that it'll be run on an Android device via adb,
80 // the benchmark will set up data files accordingly.
81 ABSL_FLAG(bool, adb, false, "run benchmark via ADB on an Android device");
82
83 namespace icing {
84 namespace lib {
85
86 namespace {
87
AddTokenToIndex(Index * index,DocumentId document_id,SectionId section_id,TermMatchType::Code term_match_type,const std::string & token)88 void AddTokenToIndex(Index* index, DocumentId document_id, SectionId section_id,
89 TermMatchType::Code term_match_type,
90 const std::string& token) {
91 Index::Editor editor =
92 index->Edit(document_id, section_id, /*namespace_id=*/0);
93 ICING_ASSERT_OK(editor.BufferTerm(token, term_match_type));
94 ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
95 }
96
CreateIndex(const IcingFilesystem & icing_filesystem,const Filesystem & filesystem,const std::string & index_dir)97 std::unique_ptr<Index> CreateIndex(const IcingFilesystem& icing_filesystem,
98 const Filesystem& filesystem,
99 const std::string& index_dir) {
100 Index::Options options(index_dir, /*index_merge_size=*/1024 * 1024 * 10,
101 /*lite_index_sort_at_indexing=*/true,
102 /*lite_index_sort_size=*/1024 * 8);
103 return Index::Create(options, &filesystem, &icing_filesystem).ValueOrDie();
104 }
105
CreateNormalizer()106 std::unique_ptr<Normalizer> CreateNormalizer() {
107 return normalizer_factory::Create(
108
109 /*max_term_byte_size=*/std::numeric_limits<int>::max())
110 .ValueOrDie();
111 }
112
CreateDocumentStore(const Filesystem * filesystem,const std::string & base_dir,const Clock * clock,const SchemaStore * schema_store,const FeatureFlags & feature_flags)113 libtextclassifier3::StatusOr<DocumentStore::CreateResult> CreateDocumentStore(
114 const Filesystem* filesystem, const std::string& base_dir,
115 const Clock* clock, const SchemaStore* schema_store,
116 const FeatureFlags& feature_flags) {
117 return DocumentStore::Create(
118 filesystem, base_dir, clock, schema_store, &feature_flags,
119 /*force_recovery_and_revalidate_documents=*/false,
120 /*pre_mapping_fbv=*/false, /*use_persistent_hash_map=*/true,
121 PortableFileBackedProtoLog<DocumentWrapper>::kDefaultCompressionLevel,
122 /*initialize_stats=*/nullptr);
123 }
124
BM_QueryOneTerm(benchmark::State & state)125 void BM_QueryOneTerm(benchmark::State& state) {
126 bool run_via_adb = absl::GetFlag(FLAGS_adb);
127 if (!run_via_adb) {
128 ICING_ASSERT_OK(icu_data_file_helper::SetUpIcuDataFile(
129 GetTestFilePath("icing/icu.dat")));
130 }
131
132 FeatureFlags feature_flags = GetTestFeatureFlags();
133 IcingFilesystem icing_filesystem;
134 Filesystem filesystem;
135 const std::string base_dir = GetTestTempDir() + "/query_processor_benchmark";
136 const std::string index_dir = base_dir + "/index";
137 const std::string numeric_index_dir = base_dir + "/numeric_index";
138 const std::string embedding_index_dir = base_dir + "/embedding_index";
139 const std::string schema_dir = base_dir + "/schema";
140 const std::string doc_store_dir = base_dir + "/store";
141
142 filesystem.DeleteDirectoryRecursively(base_dir.c_str());
143 if (!filesystem.CreateDirectoryRecursively(index_dir.c_str()) ||
144 !filesystem.CreateDirectoryRecursively(schema_dir.c_str()) ||
145 !filesystem.CreateDirectoryRecursively(doc_store_dir.c_str())) {
146 ICING_LOG(ERROR) << "Failed to create test directories";
147 }
148
149 std::unique_ptr<Index> index =
150 CreateIndex(icing_filesystem, filesystem, index_dir);
151 // TODO(b/249829533): switch to use persistent numeric index.
152 ICING_ASSERT_OK_AND_ASSIGN(
153 auto numeric_index,
154 DummyNumericIndex<int64_t>::Create(filesystem, numeric_index_dir));
155
156 language_segmenter_factory::SegmenterOptions options(ULOC_US);
157 std::unique_ptr<LanguageSegmenter> language_segmenter =
158 language_segmenter_factory::Create(std::move(options)).ValueOrDie();
159 std::unique_ptr<Normalizer> normalizer = CreateNormalizer();
160
161 SchemaProto schema;
162 auto type_config = schema.add_types();
163 type_config->set_schema_type("type1");
164 Clock clock;
165 ICING_ASSERT_OK_AND_ASSIGN(
166 std::unique_ptr<SchemaStore> schema_store,
167 SchemaStore::Create(&filesystem, schema_dir, &clock, &feature_flags));
168 ICING_ASSERT_OK(schema_store->SetSchema(
169 schema, /*ignore_errors_and_delete_documents=*/false,
170 /*allow_circular_schema_definitions=*/false));
171
172 DocumentStore::CreateResult create_result =
173 CreateDocumentStore(&filesystem, doc_store_dir, &clock,
174 schema_store.get(), feature_flags)
175 .ValueOrDie();
176 std::unique_ptr<DocumentStore> document_store =
177 std::move(create_result.document_store);
178
179 ICING_ASSERT_OK_AND_ASSIGN(
180 auto embedding_index,
181 EmbeddingIndex::Create(&filesystem, embedding_index_dir, &clock,
182 &feature_flags));
183
184 DocumentId document_id = document_store
185 ->Put(DocumentBuilder()
186 .SetKey("icing", "type1")
187 .SetSchema("type1")
188 .Build())
189 .ValueOrDie()
190 .new_document_id;
191
192 const std::string input_string(state.range(0), 'A');
193 AddTokenToIndex(index.get(), document_id, /*section_id=*/0,
194 TermMatchType::EXACT_ONLY, input_string);
195
196 ICING_ASSERT_OK_AND_ASSIGN(
197 std::unique_ptr<QueryProcessor> query_processor,
198 QueryProcessor::Create(
199 index.get(), numeric_index.get(), embedding_index.get(),
200 language_segmenter.get(), normalizer.get(), document_store.get(),
201 schema_store.get(), /*join_children_fetcher=*/nullptr, &clock,
202 &feature_flags));
203
204 SearchSpecProto search_spec;
205 search_spec.set_query(input_string);
206 search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
207
208 for (auto _ : state) {
209 QueryResults results =
210 query_processor
211 ->ParseSearch(search_spec,
212 ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
213 clock.GetSystemTimeMilliseconds())
214 .ValueOrDie();
215 while (results.root_iterator->Advance().ok()) {
216 results.root_iterator->doc_hit_info();
217 }
218 }
219
220 // Destroy document store and schema store before the whole directory is
221 // removed because they persist data in destructor.
222 document_store.reset();
223 schema_store.reset();
224 filesystem.DeleteDirectoryRecursively(base_dir.c_str());
225 }
226 BENCHMARK(BM_QueryOneTerm)
227 // The testing numbers are in an ascending order with a fixed interval, that
228 // way we can tell if the performance increments are linear, exponential, or
229 // something else.
230 ->Arg(1000)
231 ->Arg(3000)
232 ->Arg(5000)
233 ->Arg(7000)
234 ->Arg(9000)
235 ->Arg(11000)
236 ->Arg(13000)
237 ->Arg(15000)
238 ->Arg(17000)
239 ->Arg(19000)
240 ->Arg(21000)
241 ->Arg(23000)
242 ->Arg(25000)
243 ->Arg(27000)
244 ->Arg(29000)
245 ->Arg(31000)
246 ->Arg(33000)
247 ->Arg(35000)
248 ->Arg(37000)
249 ->Arg(39000)
250 ->Arg(41000)
251 ->Arg(43000)
252 ->Arg(45000)
253 ->Arg(47000)
254 ->Arg(49000)
255 ->Arg(64000)
256 ->Arg(128000)
257 ->Arg(256000)
258 ->Arg(384000)
259 ->Arg(512000)
260 ->Arg(1024000)
261 ->Arg(2048000)
262 ->Arg(4096000);
263
BM_QueryFiveTerms(benchmark::State & state)264 void BM_QueryFiveTerms(benchmark::State& state) {
265 bool run_via_adb = absl::GetFlag(FLAGS_adb);
266 if (!run_via_adb) {
267 ICING_ASSERT_OK(icu_data_file_helper::SetUpIcuDataFile(
268 GetTestFilePath("icing/icu.dat")));
269 }
270
271 FeatureFlags feature_flags = GetTestFeatureFlags();
272 IcingFilesystem icing_filesystem;
273 Filesystem filesystem;
274 const std::string base_dir = GetTestTempDir() + "/query_processor_benchmark";
275 const std::string index_dir = base_dir + "/index";
276 const std::string numeric_index_dir = base_dir + "/numeric_index";
277 const std::string embedding_index_dir = base_dir + "/embedding_index";
278 const std::string schema_dir = base_dir + "/schema";
279 const std::string doc_store_dir = base_dir + "/store";
280
281 filesystem.DeleteDirectoryRecursively(base_dir.c_str());
282 if (!filesystem.CreateDirectoryRecursively(index_dir.c_str()) ||
283 !filesystem.CreateDirectoryRecursively(schema_dir.c_str()) ||
284 !filesystem.CreateDirectoryRecursively(doc_store_dir.c_str())) {
285 ICING_LOG(ERROR) << "Failed to create test directories";
286 }
287
288 std::unique_ptr<Index> index =
289 CreateIndex(icing_filesystem, filesystem, index_dir);
290 // TODO(b/249829533): switch to use persistent numeric index.
291 ICING_ASSERT_OK_AND_ASSIGN(
292 auto numeric_index,
293 DummyNumericIndex<int64_t>::Create(filesystem, numeric_index_dir));
294
295 language_segmenter_factory::SegmenterOptions options(ULOC_US);
296 std::unique_ptr<LanguageSegmenter> language_segmenter =
297 language_segmenter_factory::Create(std::move(options)).ValueOrDie();
298 std::unique_ptr<Normalizer> normalizer = CreateNormalizer();
299
300 SchemaProto schema;
301 auto type_config = schema.add_types();
302 type_config->set_schema_type("type1");
303 Clock clock;
304 ICING_ASSERT_OK_AND_ASSIGN(
305 std::unique_ptr<SchemaStore> schema_store,
306 SchemaStore::Create(&filesystem, schema_dir, &clock, &feature_flags));
307 ICING_ASSERT_OK(schema_store->SetSchema(
308 schema, /*ignore_errors_and_delete_documents=*/false,
309 /*allow_circular_schema_definitions=*/false));
310
311 DocumentStore::CreateResult create_result =
312 CreateDocumentStore(&filesystem, doc_store_dir, &clock,
313 schema_store.get(), feature_flags)
314 .ValueOrDie();
315 std::unique_ptr<DocumentStore> document_store =
316 std::move(create_result.document_store);
317
318 ICING_ASSERT_OK_AND_ASSIGN(
319 auto embedding_index,
320 EmbeddingIndex::Create(&filesystem, embedding_index_dir, &clock,
321 &feature_flags));
322
323 DocumentId document_id = document_store
324 ->Put(DocumentBuilder()
325 .SetKey("icing", "type1")
326 .SetSchema("type1")
327 .Build())
328 .ValueOrDie()
329 .new_document_id;
330
331 int term_length = state.range(0) / 5;
332
333 const std::string input_string_a(term_length, 'A');
334 const std::string input_string_b(term_length, 'B');
335 const std::string input_string_c(term_length, 'C');
336 const std::string input_string_d(term_length, 'D');
337 const std::string input_string_e(term_length, 'E');
338 AddTokenToIndex(index.get(), document_id, /*section_id=*/0,
339 TermMatchType::EXACT_ONLY, input_string_a);
340 AddTokenToIndex(index.get(), document_id, /*section_id=*/1,
341 TermMatchType::EXACT_ONLY, input_string_b);
342 AddTokenToIndex(index.get(), document_id, /*section_id=*/2,
343 TermMatchType::EXACT_ONLY, input_string_c);
344 AddTokenToIndex(index.get(), document_id, /*section_id=*/3,
345 TermMatchType::EXACT_ONLY, input_string_d);
346 AddTokenToIndex(index.get(), document_id, /*section_id=*/4,
347 TermMatchType::EXACT_ONLY, input_string_e);
348
349 ICING_ASSERT_OK_AND_ASSIGN(
350 std::unique_ptr<QueryProcessor> query_processor,
351 QueryProcessor::Create(
352 index.get(), numeric_index.get(), embedding_index.get(),
353 language_segmenter.get(), normalizer.get(), document_store.get(),
354 schema_store.get(), /*join_children_fetcher=*/nullptr, &clock,
355 &feature_flags));
356
357 const std::string query_string = absl_ports::StrCat(
358 input_string_a, " ", input_string_b, " ", input_string_c, " ",
359 input_string_d, " ", input_string_e);
360
361 SearchSpecProto search_spec;
362 search_spec.set_query(query_string);
363 search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
364
365 for (auto _ : state) {
366 QueryResults results =
367 query_processor
368 ->ParseSearch(search_spec,
369 ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
370 clock.GetSystemTimeMilliseconds())
371 .ValueOrDie();
372 while (results.root_iterator->Advance().ok()) {
373 results.root_iterator->doc_hit_info();
374 }
375 }
376
377 // Destroy document store and schema store before the whole directory is
378 // removed because they persist data in destructor.
379 document_store.reset();
380 schema_store.reset();
381 filesystem.DeleteDirectoryRecursively(base_dir.c_str());
382 }
383 BENCHMARK(BM_QueryFiveTerms)
384 // The testing numbers are in an ascending order with a fixed interval, that
385 // way we can tell if the performance increments are linear, exponential, or
386 // something else.
387 ->Arg(1000)
388 ->Arg(3000)
389 ->Arg(5000)
390 ->Arg(7000)
391 ->Arg(9000)
392 ->Arg(11000)
393 ->Arg(13000)
394 ->Arg(15000)
395 ->Arg(17000)
396 ->Arg(19000)
397 ->Arg(21000)
398 ->Arg(23000)
399 ->Arg(25000)
400 ->Arg(27000)
401 ->Arg(29000)
402 ->Arg(31000)
403 ->Arg(33000)
404 ->Arg(35000)
405 ->Arg(37000)
406 ->Arg(39000)
407 ->Arg(41000)
408 ->Arg(43000)
409 ->Arg(45000)
410 ->Arg(47000)
411 ->Arg(49000)
412 ->Arg(64000)
413 ->Arg(128000)
414 ->Arg(256000)
415 ->Arg(384000)
416 ->Arg(512000)
417 ->Arg(1024000)
418 ->Arg(2048000)
419 ->Arg(4096000);
420
BM_QueryDiacriticTerm(benchmark::State & state)421 void BM_QueryDiacriticTerm(benchmark::State& state) {
422 bool run_via_adb = absl::GetFlag(FLAGS_adb);
423 if (!run_via_adb) {
424 ICING_ASSERT_OK(icu_data_file_helper::SetUpIcuDataFile(
425 GetTestFilePath("icing/icu.dat")));
426 }
427
428 FeatureFlags feature_flags = GetTestFeatureFlags();
429 IcingFilesystem icing_filesystem;
430 Filesystem filesystem;
431 const std::string base_dir = GetTestTempDir() + "/query_processor_benchmark";
432 const std::string index_dir = base_dir + "/index";
433 const std::string numeric_index_dir = base_dir + "/numeric_index";
434 const std::string embedding_index_dir = base_dir + "/embedding_index";
435 const std::string schema_dir = base_dir + "/schema";
436 const std::string doc_store_dir = base_dir + "/store";
437
438 filesystem.DeleteDirectoryRecursively(base_dir.c_str());
439 if (!filesystem.CreateDirectoryRecursively(index_dir.c_str()) ||
440 !filesystem.CreateDirectoryRecursively(schema_dir.c_str()) ||
441 !filesystem.CreateDirectoryRecursively(doc_store_dir.c_str())) {
442 ICING_LOG(ERROR) << "Failed to create test directories";
443 }
444
445 std::unique_ptr<Index> index =
446 CreateIndex(icing_filesystem, filesystem, index_dir);
447 // TODO(b/249829533): switch to use persistent numeric index.
448 ICING_ASSERT_OK_AND_ASSIGN(
449 auto numeric_index,
450 DummyNumericIndex<int64_t>::Create(filesystem, numeric_index_dir));
451
452 language_segmenter_factory::SegmenterOptions options(ULOC_US);
453 std::unique_ptr<LanguageSegmenter> language_segmenter =
454 language_segmenter_factory::Create(std::move(options)).ValueOrDie();
455 std::unique_ptr<Normalizer> normalizer = CreateNormalizer();
456
457 SchemaProto schema;
458 auto type_config = schema.add_types();
459 type_config->set_schema_type("type1");
460 Clock clock;
461 ICING_ASSERT_OK_AND_ASSIGN(
462 std::unique_ptr<SchemaStore> schema_store,
463 SchemaStore::Create(&filesystem, schema_dir, &clock, &feature_flags));
464 ICING_ASSERT_OK(schema_store->SetSchema(
465 schema, /*ignore_errors_and_delete_documents=*/false,
466 /*allow_circular_schema_definitions=*/false));
467
468 DocumentStore::CreateResult create_result =
469 CreateDocumentStore(&filesystem, doc_store_dir, &clock,
470 schema_store.get(), feature_flags)
471 .ValueOrDie();
472 std::unique_ptr<DocumentStore> document_store =
473 std::move(create_result.document_store);
474
475 ICING_ASSERT_OK_AND_ASSIGN(
476 auto embedding_index,
477 EmbeddingIndex::Create(&filesystem, embedding_index_dir, &clock,
478 &feature_flags));
479
480 DocumentId document_id = document_store
481 ->Put(DocumentBuilder()
482 .SetKey("icing", "type1")
483 .SetSchema("type1")
484 .Build())
485 .ValueOrDie()
486 .new_document_id;
487
488 std::string input_string;
489 while (input_string.length() < state.range(0)) {
490 input_string.append("àáâãā");
491 }
492 AddTokenToIndex(index.get(), document_id, /*section_id=*/0,
493 TermMatchType::EXACT_ONLY, input_string);
494
495 ICING_ASSERT_OK_AND_ASSIGN(
496 std::unique_ptr<QueryProcessor> query_processor,
497 QueryProcessor::Create(
498 index.get(), numeric_index.get(), embedding_index.get(),
499 language_segmenter.get(), normalizer.get(), document_store.get(),
500 schema_store.get(), /*join_children_fetcher=*/nullptr, &clock,
501 &feature_flags));
502
503 SearchSpecProto search_spec;
504 search_spec.set_query(input_string);
505 search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
506
507 for (auto _ : state) {
508 QueryResults results =
509 query_processor
510 ->ParseSearch(search_spec,
511 ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
512 clock.GetSystemTimeMilliseconds())
513 .ValueOrDie();
514 while (results.root_iterator->Advance().ok()) {
515 results.root_iterator->doc_hit_info();
516 }
517 }
518
519 // Destroy document store and schema store before the whole directory is
520 // removed because they persist data in destructor.
521 document_store.reset();
522 schema_store.reset();
523 filesystem.DeleteDirectoryRecursively(base_dir.c_str());
524 }
525 BENCHMARK(BM_QueryDiacriticTerm)
526 // The testing numbers are in an ascending order with a fixed interval, that
527 // way we can tell if the performance increments are linear, exponential, or
528 // something else.
529 ->Arg(1000)
530 ->Arg(3000)
531 ->Arg(5000)
532 ->Arg(7000)
533 ->Arg(9000)
534 ->Arg(11000)
535 ->Arg(13000)
536 ->Arg(15000)
537 ->Arg(17000)
538 ->Arg(19000)
539 ->Arg(21000)
540 ->Arg(23000)
541 ->Arg(25000)
542 ->Arg(27000)
543 ->Arg(29000)
544 ->Arg(31000)
545 ->Arg(33000)
546 ->Arg(35000)
547 ->Arg(37000)
548 ->Arg(39000)
549 ->Arg(41000)
550 ->Arg(43000)
551 ->Arg(45000)
552 ->Arg(47000)
553 ->Arg(49000)
554 ->Arg(64000)
555 ->Arg(128000)
556 ->Arg(256000)
557 ->Arg(384000)
558 ->Arg(512000)
559 ->Arg(1024000)
560 ->Arg(2048000)
561 ->Arg(4096000);
562
BM_QueryHiragana(benchmark::State & state)563 void BM_QueryHiragana(benchmark::State& state) {
564 bool run_via_adb = absl::GetFlag(FLAGS_adb);
565 if (!run_via_adb) {
566 ICING_ASSERT_OK(icu_data_file_helper::SetUpIcuDataFile(
567 GetTestFilePath("icing/icu.dat")));
568 }
569
570 FeatureFlags feature_flags = GetTestFeatureFlags();
571 IcingFilesystem icing_filesystem;
572 Filesystem filesystem;
573 const std::string base_dir = GetTestTempDir() + "/query_processor_benchmark";
574 const std::string index_dir = base_dir + "/index";
575 const std::string numeric_index_dir = base_dir + "/numeric_index";
576 const std::string embedding_index_dir = base_dir + "/embedding_index";
577 const std::string schema_dir = base_dir + "/schema";
578 const std::string doc_store_dir = base_dir + "/store";
579
580 filesystem.DeleteDirectoryRecursively(base_dir.c_str());
581 if (!filesystem.CreateDirectoryRecursively(index_dir.c_str()) ||
582 !filesystem.CreateDirectoryRecursively(schema_dir.c_str()) ||
583 !filesystem.CreateDirectoryRecursively(doc_store_dir.c_str())) {
584 ICING_LOG(ERROR) << "Failed to create test directories";
585 }
586
587 std::unique_ptr<Index> index =
588 CreateIndex(icing_filesystem, filesystem, index_dir);
589 // TODO(b/249829533): switch to use persistent numeric index.
590 ICING_ASSERT_OK_AND_ASSIGN(
591 auto numeric_index,
592 DummyNumericIndex<int64_t>::Create(filesystem, numeric_index_dir));
593
594 language_segmenter_factory::SegmenterOptions options(ULOC_US);
595 std::unique_ptr<LanguageSegmenter> language_segmenter =
596 language_segmenter_factory::Create(std::move(options)).ValueOrDie();
597 std::unique_ptr<Normalizer> normalizer = CreateNormalizer();
598
599 SchemaProto schema;
600 auto type_config = schema.add_types();
601 type_config->set_schema_type("type1");
602 Clock clock;
603 ICING_ASSERT_OK_AND_ASSIGN(
604 std::unique_ptr<SchemaStore> schema_store,
605 SchemaStore::Create(&filesystem, schema_dir, &clock, &feature_flags));
606 ICING_ASSERT_OK(schema_store->SetSchema(
607 schema, /*ignore_errors_and_delete_documents=*/false,
608 /*allow_circular_schema_definitions=*/false));
609
610 DocumentStore::CreateResult create_result =
611 CreateDocumentStore(&filesystem, doc_store_dir, &clock,
612 schema_store.get(), feature_flags)
613 .ValueOrDie();
614 std::unique_ptr<DocumentStore> document_store =
615 std::move(create_result.document_store);
616
617 ICING_ASSERT_OK_AND_ASSIGN(
618 auto embedding_index,
619 EmbeddingIndex::Create(&filesystem, embedding_index_dir, &clock,
620 &feature_flags));
621
622 DocumentId document_id = document_store
623 ->Put(DocumentBuilder()
624 .SetKey("icing", "type1")
625 .SetSchema("type1")
626 .Build())
627 .ValueOrDie()
628 .new_document_id;
629
630 std::string input_string;
631 while (input_string.length() < state.range(0)) {
632 input_string.append("あいうえお");
633 }
634 AddTokenToIndex(index.get(), document_id, /*section_id=*/0,
635 TermMatchType::EXACT_ONLY, input_string);
636
637 ICING_ASSERT_OK_AND_ASSIGN(
638 std::unique_ptr<QueryProcessor> query_processor,
639 QueryProcessor::Create(
640 index.get(), numeric_index.get(), embedding_index.get(),
641 language_segmenter.get(), normalizer.get(), document_store.get(),
642 schema_store.get(), /*join_children_fetcher=*/nullptr, &clock,
643 &feature_flags));
644
645 SearchSpecProto search_spec;
646 search_spec.set_query(input_string);
647 search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
648
649 for (auto _ : state) {
650 QueryResults results =
651 query_processor
652 ->ParseSearch(search_spec,
653 ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
654 clock.GetSystemTimeMilliseconds())
655 .ValueOrDie();
656 while (results.root_iterator->Advance().ok()) {
657 results.root_iterator->doc_hit_info();
658 }
659 }
660
661 // Destroy document store and schema store before the whole directory is
662 // removed because they persist data in destructor.
663 document_store.reset();
664 schema_store.reset();
665 filesystem.DeleteDirectoryRecursively(base_dir.c_str());
666 }
667 BENCHMARK(BM_QueryHiragana)
668 // The testing numbers are in an ascending order with a fixed interval, that
669 // way we can tell if the performance increments are linear, exponential, or
670 // something else.
671 ->Arg(1000)
672 ->Arg(3000)
673 ->Arg(5000)
674 ->Arg(7000)
675 ->Arg(9000)
676 ->Arg(11000)
677 ->Arg(13000)
678 ->Arg(15000)
679 ->Arg(17000)
680 ->Arg(19000)
681 ->Arg(21000)
682 ->Arg(23000)
683 ->Arg(25000)
684 ->Arg(27000)
685 ->Arg(29000)
686 ->Arg(31000)
687 ->Arg(33000)
688 ->Arg(35000)
689 ->Arg(37000)
690 ->Arg(39000)
691 ->Arg(41000)
692 ->Arg(43000)
693 ->Arg(45000)
694 ->Arg(47000)
695 ->Arg(49000)
696 ->Arg(64000)
697 ->Arg(128000)
698 ->Arg(256000)
699 ->Arg(384000)
700 ->Arg(512000)
701 ->Arg(1024000)
702 ->Arg(2048000)
703 ->Arg(4096000);
704 } // namespace
705
706 } // namespace lib
707 } // namespace icing
708