1 // Copyright (C) 2024 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "icing/index/embed/embedding-scorer.h"
16
17 #include <cstdint>
18 #include <memory>
19 #include <vector>
20
21 #include "gtest/gtest.h"
22 #include "icing/index/embed/quantizer.h"
23 #include "icing/testing/common-matchers.h"
24
25 namespace icing {
26 namespace lib {
27
28 namespace {
29
QuantizeVector(std::vector<float> v,const Quantizer & quantizer)30 std::vector<uint8_t> QuantizeVector(std::vector<float> v,
31 const Quantizer& quantizer) {
32 std::vector<uint8_t> quantized;
33 quantized.reserve(v.size());
34 for (float value : v) {
35 quantized.push_back(quantizer.Quantize(value));
36 }
37 return quantized;
38 }
39
TEST(EmbeddingScorerTest,DotProduct)40 TEST(EmbeddingScorerTest, DotProduct) {
41 constexpr float eps_quantized = 0.01f;
42
43 ICING_ASSERT_OK_AND_ASSIGN(
44 std::unique_ptr<EmbeddingScorer> embedding_scorer,
45 EmbeddingScorer::Create(
46 SearchSpecProto::EmbeddingQueryMetricType::DOT_PRODUCT));
47 ICING_ASSERT_OK_AND_ASSIGN(
48 Quantizer quantizer,
49 Quantizer::Create(/*float_min=*/-1.0f, /*float_max=*/1.0f));
50
51 int dimension = 3;
52 std::vector<float> v1 = {0.1f, 0.2f, 0.3f};
53 std::vector<float> v2 = {0.5f, 0.5f, 0.6f};
54 std::vector<uint8_t> v2_quantized = QuantizeVector(v2, quantizer);
55 float expected_dot_product = 0.1f * 0.5f + 0.2f * 0.5f + 0.3f * 0.6f;
56
57 // Test float computation
58 EXPECT_FLOAT_EQ(embedding_scorer->Score(dimension, v1.data(), v2.data()),
59 expected_dot_product);
60
61 // Test quantization
62 EXPECT_NEAR(embedding_scorer->Score(dimension, v1.data(), v2_quantized.data(),
63 quantizer),
64 expected_dot_product, eps_quantized);
65 }
66
TEST(EmbeddingScorerTest,Cosine)67 TEST(EmbeddingScorerTest, Cosine) {
68 constexpr float eps = 0.001f;
69 constexpr float eps_quantized = 0.01f;
70
71 ICING_ASSERT_OK_AND_ASSIGN(
72 std::unique_ptr<EmbeddingScorer> embedding_scorer,
73 EmbeddingScorer::Create(
74 SearchSpecProto::EmbeddingQueryMetricType::COSINE));
75 ICING_ASSERT_OK_AND_ASSIGN(
76 Quantizer quantizer,
77 Quantizer::Create(/*float_min=*/-1.0f, /*float_max=*/1.0f));
78
79 int dimension = 3;
80 std::vector<float> v1 = {0.7f, -0.3f, -0.6f};
81 std::vector<float> v2 = {-0.5f, 0.1f, -0.2f};
82 std::vector<uint8_t> v2_quantized = QuantizeVector(v2, quantizer);
83 float expected_cosine = -0.4896f;
84
85 // Test float computation
86 EXPECT_NEAR(embedding_scorer->Score(dimension, v1.data(), v2.data()),
87 expected_cosine, eps);
88
89 // Test quantization
90 EXPECT_NEAR(embedding_scorer->Score(dimension, v1.data(), v2_quantized.data(),
91 quantizer),
92 expected_cosine, eps_quantized);
93 }
94
TEST(EmbeddingScorerTest,Euclidean)95 TEST(EmbeddingScorerTest, Euclidean) {
96 constexpr float eps = 0.001f;
97 constexpr float eps_quantized = 0.01f;
98
99 ICING_ASSERT_OK_AND_ASSIGN(
100 std::unique_ptr<EmbeddingScorer> embedding_scorer,
101 EmbeddingScorer::Create(
102 SearchSpecProto::EmbeddingQueryMetricType::EUCLIDEAN));
103 ICING_ASSERT_OK_AND_ASSIGN(
104 Quantizer quantizer,
105 Quantizer::Create(/*float_min=*/-1.0f, /*float_max=*/1.0f));
106
107 int dimension = 3;
108 std::vector<float> v1 = {0.6f, -0.2f, 0.9f};
109 std::vector<float> v2 = {-0.8f, -0.4f, 0.2f};
110 std::vector<uint8_t> v2_quantized = QuantizeVector(v2, quantizer);
111 float expected_euclidean = 1.5780f;
112
113 // Test float computation
114 EXPECT_NEAR(embedding_scorer->Score(dimension, v1.data(), v2.data()),
115 expected_euclidean, eps);
116
117 // Test quantization
118 EXPECT_NEAR(embedding_scorer->Score(dimension, v1.data(), v2_quantized.data(),
119 quantizer),
120 expected_euclidean, eps_quantized);
121 }
122
123 } // namespace
124
125 } // namespace lib
126 } // namespace icing
127