xref: /aosp_15_r20/external/grpc-grpc/third_party/utf8_range/utf8_validity_test.cc (revision cc02d7e222339f7a4f6ba5f422e6413f4bd931f2)
1 #include "utf8_validity.h"
2 
3 #include <gtest/gtest.h>
4 #include "absl/strings/string_view.h"
5 
6 namespace utf8_range {
7 
TEST(Utf8Validity,SpanStructurallyValid)8 TEST(Utf8Validity, SpanStructurallyValid) {
9   // Test simple good strings
10   EXPECT_EQ(4, SpanStructurallyValid("abcd"));
11   EXPECT_EQ(4, SpanStructurallyValid(absl::string_view("a\0cd", 4)));  // NULL
12   EXPECT_EQ(4, SpanStructurallyValid("ab\xc2\x81"));                   // 2-byte
13   EXPECT_EQ(4, SpanStructurallyValid("a\xe2\x81\x81"));                // 3-byte
14   EXPECT_EQ(4, SpanStructurallyValid("\xf2\x81\x81\x81"));             // 4
15 
16   // Test simple bad strings
17   EXPECT_EQ(3, SpanStructurallyValid("abc\x80"));           // bad char
18   EXPECT_EQ(3, SpanStructurallyValid("abc\xc2"));           // trunc 2
19   EXPECT_EQ(2, SpanStructurallyValid("ab\xe2\x81"));        // trunc 3
20   EXPECT_EQ(1, SpanStructurallyValid("a\xf2\x81\x81"));     // trunc 4
21   EXPECT_EQ(2, SpanStructurallyValid("ab\xc0\x81"));        // not 1
22   EXPECT_EQ(1, SpanStructurallyValid("a\xe0\x81\x81"));     // not 2
23   EXPECT_EQ(0, SpanStructurallyValid("\xf0\x81\x81\x81"));  // not 3
24   EXPECT_EQ(0, SpanStructurallyValid("\xf4\xbf\xbf\xbf"));  // big
25   // surrogate min, max
26   EXPECT_EQ(0, SpanStructurallyValid("\xED\xA0\x80"));  // U+D800
27   EXPECT_EQ(0, SpanStructurallyValid("\xED\xBF\xBF"));  // U+DFFF
28 
29   // non-shortest forms should all return false
30   EXPECT_EQ(0, SpanStructurallyValid("\xc0\x80"));
31   EXPECT_EQ(0, SpanStructurallyValid("\xc1\xbf"));
32   EXPECT_EQ(0, SpanStructurallyValid("\xe0\x80\x80"));
33   EXPECT_EQ(0, SpanStructurallyValid("\xe0\x9f\xbf"));
34   EXPECT_EQ(0, SpanStructurallyValid("\xf0\x80\x80\x80"));
35   EXPECT_EQ(0, SpanStructurallyValid("\xf0\x83\xbf\xbf"));
36 
37   // This string unchecked caused GWS to crash 7/2006:
38   // invalid sequence 0xc7 0xc8 0xcd 0xcb
39   EXPECT_EQ(0, SpanStructurallyValid("\xc7\xc8\xcd\xcb"));
40 }
41 
TEST(Utf8Validity,IsStructurallyValid)42 TEST(Utf8Validity, IsStructurallyValid) {
43   // Test simple good strings
44   EXPECT_TRUE(IsStructurallyValid("abcd"));
45   EXPECT_TRUE(IsStructurallyValid(absl::string_view("a\0cd", 4)));  // NULL
46   EXPECT_TRUE(IsStructurallyValid("ab\xc2\x81"));                   // 2-byte
47   EXPECT_TRUE(IsStructurallyValid("a\xe2\x81\x81"));                // 3-byte
48   EXPECT_TRUE(IsStructurallyValid("\xf2\x81\x81\x81"));             // 4
49 
50   // Test simple bad strings
51   EXPECT_FALSE(IsStructurallyValid("abc\x80"));           // bad char
52   EXPECT_FALSE(IsStructurallyValid("abc\xc2"));           // trunc 2
53   EXPECT_FALSE(IsStructurallyValid("ab\xe2\x81"));        // trunc 3
54   EXPECT_FALSE(IsStructurallyValid("a\xf2\x81\x81"));     // trunc 4
55   EXPECT_FALSE(IsStructurallyValid("ab\xc0\x81"));        // not 1
56   EXPECT_FALSE(IsStructurallyValid("a\xe0\x81\x81"));     // not 2
57   EXPECT_FALSE(IsStructurallyValid("\xf0\x81\x81\x81"));  // not 3
58   EXPECT_FALSE(IsStructurallyValid("\xf4\xbf\xbf\xbf"));  // big
59   // surrogate min, max
60   EXPECT_FALSE(IsStructurallyValid("\xED\xA0\x80"));  // U+D800
61   EXPECT_FALSE(IsStructurallyValid("\xED\xBF\xBF"));  // U+DFFF
62 
63   // non-shortest forms should all return false
64   EXPECT_FALSE(IsStructurallyValid("\xc0\x80"));
65   EXPECT_FALSE(IsStructurallyValid("\xc1\xbf"));
66   EXPECT_FALSE(IsStructurallyValid("\xe0\x80\x80"));
67   EXPECT_FALSE(IsStructurallyValid("\xe0\x9f\xbf"));
68   EXPECT_FALSE(IsStructurallyValid("\xf0\x80\x80\x80"));
69   EXPECT_FALSE(IsStructurallyValid("\xf0\x83\xbf\xbf"));
70 
71   // This string unchecked caused GWS to crash 7/2006:
72   // invalid sequence 0xc7 0xc8 0xcd 0xcb
73   EXPECT_FALSE(IsStructurallyValid("\xc7\xc8\xcd\xcb"));
74 }
75 
76 }  // namespace utf8_range
77