xref: /aosp_15_r20/external/pigweed/pw_tokenizer/py/tokens_test.py (revision 61c4878ac05f98d0ceed94b57d316916de578985)
1*61c4878aSAndroid Build Coastguard Worker#!/usr/bin/env python3
2*61c4878aSAndroid Build Coastguard Worker# Copyright 2020 The Pigweed Authors
3*61c4878aSAndroid Build Coastguard Worker#
4*61c4878aSAndroid Build Coastguard Worker# Licensed under the Apache License, Version 2.0 (the "License"); you may not
5*61c4878aSAndroid Build Coastguard Worker# use this file except in compliance with the License. You may obtain a copy of
6*61c4878aSAndroid Build Coastguard Worker# the License at
7*61c4878aSAndroid Build Coastguard Worker#
8*61c4878aSAndroid Build Coastguard Worker#     https://www.apache.org/licenses/LICENSE-2.0
9*61c4878aSAndroid Build Coastguard Worker#
10*61c4878aSAndroid Build Coastguard Worker# Unless required by applicable law or agreed to in writing, software
11*61c4878aSAndroid Build Coastguard Worker# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12*61c4878aSAndroid Build Coastguard Worker# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13*61c4878aSAndroid Build Coastguard Worker# License for the specific language governing permissions and limitations under
14*61c4878aSAndroid Build Coastguard Worker# the License.
15*61c4878aSAndroid Build Coastguard Worker"""Tests for the tokens module."""
16*61c4878aSAndroid Build Coastguard Worker
17*61c4878aSAndroid Build Coastguard Workerfrom datetime import datetime
18*61c4878aSAndroid Build Coastguard Workerimport io
19*61c4878aSAndroid Build Coastguard Workerimport logging
20*61c4878aSAndroid Build Coastguard Workerfrom pathlib import Path
21*61c4878aSAndroid Build Coastguard Workerimport shutil
22*61c4878aSAndroid Build Coastguard Workerimport tempfile
23*61c4878aSAndroid Build Coastguard Workerfrom typing import Iterator
24*61c4878aSAndroid Build Coastguard Workerimport unittest
25*61c4878aSAndroid Build Coastguard Worker
26*61c4878aSAndroid Build Coastguard Workerfrom pw_tokenizer import tokens
27*61c4878aSAndroid Build Coastguard Workerfrom pw_tokenizer.tokens import c_hash, DIR_DB_SUFFIX, _LOG
28*61c4878aSAndroid Build Coastguard Worker
29*61c4878aSAndroid Build Coastguard WorkerCSV_DATABASE = '''\
30*61c4878aSAndroid Build Coastguard Worker00000000,2019-06-10,"",""
31*61c4878aSAndroid Build Coastguard Worker141c35d5,          ,"","The answer: ""%s"""
32*61c4878aSAndroid Build Coastguard Worker2db1515f,          ,"","%u%d%02x%X%hu%hhu%d%ld%lu%lld%llu%c%c%c"
33*61c4878aSAndroid Build Coastguard Worker2e668cd6,2019-06-11,"","Jello, world!"
34*61c4878aSAndroid Build Coastguard Worker31631781,          ,"","%d"
35*61c4878aSAndroid Build Coastguard Worker61fd1e26,          ,"","%ld"
36*61c4878aSAndroid Build Coastguard Worker68ab92da,          ,"","%s there are %x (%.2f) of them%c"
37*61c4878aSAndroid Build Coastguard Worker7b940e2a,          ,"","Hello %s! %hd %e"
38*61c4878aSAndroid Build Coastguard Worker851beeb6,          ,"","%u %d"
39*61c4878aSAndroid Build Coastguard Worker881436a0,          ,"","The answer is: %s"
40*61c4878aSAndroid Build Coastguard Workerad002c97,          ,"","%llx"
41*61c4878aSAndroid Build Coastguard Workerb3653e13,2019-06-12,"","Jello!"
42*61c4878aSAndroid Build Coastguard Workerb912567b,          ,"","%x%lld%1.2f%s"
43*61c4878aSAndroid Build Coastguard Workercc6d3131,2020-01-01,"","Jello?"
44*61c4878aSAndroid Build Coastguard Workere13b0f94,          ,"","%llu"
45*61c4878aSAndroid Build Coastguard Workere65aefef,2019-06-10,"","Won't fit : %s%d"
46*61c4878aSAndroid Build Coastguard Worker'''
47*61c4878aSAndroid Build Coastguard Worker
48*61c4878aSAndroid Build Coastguard Worker# The date 2019-06-10 is 07E3-06-0A in hex. In database order, it's 0A 06 E3 07.
49*61c4878aSAndroid Build Coastguard WorkerBINARY_DATABASE = (
50*61c4878aSAndroid Build Coastguard Worker    b'TOKENS\x00\x00\x10\x00\x00\x00\0\0\0\0'  # header (0x10 entries)
51*61c4878aSAndroid Build Coastguard Worker    b'\x00\x00\x00\x00\x0a\x06\xe3\x07'  # 0x01
52*61c4878aSAndroid Build Coastguard Worker    b'\xd5\x35\x1c\x14\xff\xff\xff\xff'  # 0x02
53*61c4878aSAndroid Build Coastguard Worker    b'\x5f\x51\xb1\x2d\xff\xff\xff\xff'  # 0x03
54*61c4878aSAndroid Build Coastguard Worker    b'\xd6\x8c\x66\x2e\x0b\x06\xe3\x07'  # 0x04
55*61c4878aSAndroid Build Coastguard Worker    b'\x81\x17\x63\x31\xff\xff\xff\xff'  # 0x05
56*61c4878aSAndroid Build Coastguard Worker    b'\x26\x1e\xfd\x61\xff\xff\xff\xff'  # 0x06
57*61c4878aSAndroid Build Coastguard Worker    b'\xda\x92\xab\x68\xff\xff\xff\xff'  # 0x07
58*61c4878aSAndroid Build Coastguard Worker    b'\x2a\x0e\x94\x7b\xff\xff\xff\xff'  # 0x08
59*61c4878aSAndroid Build Coastguard Worker    b'\xb6\xee\x1b\x85\xff\xff\xff\xff'  # 0x09
60*61c4878aSAndroid Build Coastguard Worker    b'\xa0\x36\x14\x88\xff\xff\xff\xff'  # 0x0a
61*61c4878aSAndroid Build Coastguard Worker    b'\x97\x2c\x00\xad\xff\xff\xff\xff'  # 0x0b
62*61c4878aSAndroid Build Coastguard Worker    b'\x13\x3e\x65\xb3\x0c\x06\xe3\x07'  # 0x0c
63*61c4878aSAndroid Build Coastguard Worker    b'\x7b\x56\x12\xb9\xff\xff\xff\xff'  # 0x0d
64*61c4878aSAndroid Build Coastguard Worker    b'\x31\x31\x6d\xcc\x01\x01\xe4\x07'  # 0x0e
65*61c4878aSAndroid Build Coastguard Worker    b'\x94\x0f\x3b\xe1\xff\xff\xff\xff'  # 0x0f
66*61c4878aSAndroid Build Coastguard Worker    b'\xef\xef\x5a\xe6\x0a\x06\xe3\x07'  # 0x10
67*61c4878aSAndroid Build Coastguard Worker    b'\x00'
68*61c4878aSAndroid Build Coastguard Worker    b'The answer: "%s"\x00'
69*61c4878aSAndroid Build Coastguard Worker    b'%u%d%02x%X%hu%hhu%d%ld%lu%lld%llu%c%c%c\x00'
70*61c4878aSAndroid Build Coastguard Worker    b'Jello, world!\x00'
71*61c4878aSAndroid Build Coastguard Worker    b'%d\x00'
72*61c4878aSAndroid Build Coastguard Worker    b'%ld\x00'
73*61c4878aSAndroid Build Coastguard Worker    b'%s there are %x (%.2f) of them%c\x00'
74*61c4878aSAndroid Build Coastguard Worker    b'Hello %s! %hd %e\x00'
75*61c4878aSAndroid Build Coastguard Worker    b'%u %d\x00'
76*61c4878aSAndroid Build Coastguard Worker    b'The answer is: %s\x00'
77*61c4878aSAndroid Build Coastguard Worker    b'%llx\x00'
78*61c4878aSAndroid Build Coastguard Worker    b'Jello!\x00'
79*61c4878aSAndroid Build Coastguard Worker    b'%x%lld%1.2f%s\x00'
80*61c4878aSAndroid Build Coastguard Worker    b'Jello?\x00'
81*61c4878aSAndroid Build Coastguard Worker    b'%llu\x00'
82*61c4878aSAndroid Build Coastguard Worker    b'Won\'t fit : %s%d\x00'
83*61c4878aSAndroid Build Coastguard Worker)
84*61c4878aSAndroid Build Coastguard Worker
85*61c4878aSAndroid Build Coastguard WorkerINVALID_CSV = """\
86*61c4878aSAndroid Build Coastguard Worker1,,"Whoa there!"
87*61c4878aSAndroid Build Coastguard Worker2,this is totally invalid,"Whoa there!"
88*61c4878aSAndroid Build Coastguard Worker3,,"This one's OK"
89*61c4878aSAndroid Build Coastguard Worker,,"Also broken"
90*61c4878aSAndroid Build Coastguard Worker5,1845-02-02,"I'm %s fine"
91*61c4878aSAndroid Build Coastguard Worker6,"Missing fields"
92*61c4878aSAndroid Build Coastguard Worker"""
93*61c4878aSAndroid Build Coastguard Worker
94*61c4878aSAndroid Build Coastguard WorkerCSV_DATABASE_2 = '''\
95*61c4878aSAndroid Build Coastguard Worker00000000,          ,"",""
96*61c4878aSAndroid Build Coastguard Worker141c35d5,          ,"","The answer: ""%s"""
97*61c4878aSAndroid Build Coastguard Worker29aef586,          ,"","1234"
98*61c4878aSAndroid Build Coastguard Worker2b78825f,          ,"","[:-)"
99*61c4878aSAndroid Build Coastguard Worker2e668cd6,          ,"","Jello, world!"
100*61c4878aSAndroid Build Coastguard Worker31631781,          ,"","%d"
101*61c4878aSAndroid Build Coastguard Worker61fd1e26,          ,"","%ld"
102*61c4878aSAndroid Build Coastguard Worker68ab92da,          ,"","%s there are %x (%.2f) of them%c"
103*61c4878aSAndroid Build Coastguard Worker7b940e2a,          ,"","Hello %s! %hd %e"
104*61c4878aSAndroid Build Coastguard Worker7da55d52,          ,"",">:-[]"
105*61c4878aSAndroid Build Coastguard Worker7f35a9a5,          ,"","TestName"
106*61c4878aSAndroid Build Coastguard Worker851beeb6,          ,"","%u %d"
107*61c4878aSAndroid Build Coastguard Worker881436a0,          ,"","The answer is: %s"
108*61c4878aSAndroid Build Coastguard Worker88808930,          ,"","%u%d%02x%X%hu%hhd%d%ld%lu%lld%llu%c%c%c"
109*61c4878aSAndroid Build Coastguard Worker92723f44,          ,"","???"
110*61c4878aSAndroid Build Coastguard Workera09d6698,          ,"","won-won-won-wonderful"
111*61c4878aSAndroid Build Coastguard Workeraa9ffa66,          ,"","void pw::tokenizer::{anonymous}::TestName()"
112*61c4878aSAndroid Build Coastguard Workerad002c97,          ,"","%llx"
113*61c4878aSAndroid Build Coastguard Workerb3653e13,          ,"","Jello!"
114*61c4878aSAndroid Build Coastguard Workercc6d3131,          ,"","Jello?"
115*61c4878aSAndroid Build Coastguard Workere13b0f94,          ,"","%llu"
116*61c4878aSAndroid Build Coastguard Workere65aefef,          ,"","Won't fit : %s%d"
117*61c4878aSAndroid Build Coastguard Worker'''
118*61c4878aSAndroid Build Coastguard Worker
119*61c4878aSAndroid Build Coastguard WorkerCSV_DATABASE_3 = """\
120*61c4878aSAndroid Build Coastguard Worker17fa86d3,          ,"TEST_DOMAIN","hello"
121*61c4878aSAndroid Build Coastguard Worker18c5017c,          ,"TEST_DOMAIN","yes"
122*61c4878aSAndroid Build Coastguard Worker59b2701c,          ,"TEST_DOMAIN","The answer was: %s"
123*61c4878aSAndroid Build Coastguard Worker881436a0,          ,"TEST_DOMAIN","The answer is: %s"
124*61c4878aSAndroid Build Coastguard Workerd18ada0f,          ,"TEST_DOMAIN","something"
125*61c4878aSAndroid Build Coastguard Worker"""
126*61c4878aSAndroid Build Coastguard Worker
127*61c4878aSAndroid Build Coastguard WorkerCSV_DATABASE_4 = '''\
128*61c4878aSAndroid Build Coastguard Worker00000000,          ,"",""
129*61c4878aSAndroid Build Coastguard Worker141c35d5,          ,"","The answer: ""%s"""
130*61c4878aSAndroid Build Coastguard Worker29aef586,          ,"","1234"
131*61c4878aSAndroid Build Coastguard Worker2b78825f,          ,"","[:-)"
132*61c4878aSAndroid Build Coastguard Worker2e668cd6,          ,"","Jello, world!"
133*61c4878aSAndroid Build Coastguard Worker31631781,          ,"","%d"
134*61c4878aSAndroid Build Coastguard Worker61fd1e26,          ,"","%ld"
135*61c4878aSAndroid Build Coastguard Worker68ab92da,          ,"","%s there are %x (%.2f) of them%c"
136*61c4878aSAndroid Build Coastguard Worker7b940e2a,          ,"","Hello %s! %hd %e"
137*61c4878aSAndroid Build Coastguard Worker7da55d52,          ,"",">:-[]"
138*61c4878aSAndroid Build Coastguard Worker7f35a9a5,          ,"","TestName"
139*61c4878aSAndroid Build Coastguard Worker851beeb6,          ,"","%u %d"
140*61c4878aSAndroid Build Coastguard Worker881436a0,          ,"","The answer is: %s"
141*61c4878aSAndroid Build Coastguard Worker88808930,          ,"","%u%d%02x%X%hu%hhd%d%ld%lu%lld%llu%c%c%c"
142*61c4878aSAndroid Build Coastguard Worker92723f44,          ,"","???"
143*61c4878aSAndroid Build Coastguard Workera09d6698,          ,"","won-won-won-wonderful"
144*61c4878aSAndroid Build Coastguard Workeraa9ffa66,          ,"","void pw::tokenizer::{anonymous}::TestName()"
145*61c4878aSAndroid Build Coastguard Workerad002c97,          ,"","%llx"
146*61c4878aSAndroid Build Coastguard Workerb3653e13,          ,"","Jello!"
147*61c4878aSAndroid Build Coastguard Workercc6d3131,          ,"","Jello?"
148*61c4878aSAndroid Build Coastguard Workere13b0f94,          ,"","%llu"
149*61c4878aSAndroid Build Coastguard Workere65aefef,          ,"","Won't fit : %s%d"
150*61c4878aSAndroid Build Coastguard Worker17fa86d3,          ,"TEST_DOMAIN","hello"
151*61c4878aSAndroid Build Coastguard Worker18c5017c,          ,"TEST_DOMAIN","yes"
152*61c4878aSAndroid Build Coastguard Worker59b2701c,          ,"TEST_DOMAIN","The answer was: %s"
153*61c4878aSAndroid Build Coastguard Worker881436a0,          ,"TEST_DOMAIN","The answer is: %s"
154*61c4878aSAndroid Build Coastguard Workerd18ada0f,          ,"TEST_DOMAIN","something"
155*61c4878aSAndroid Build Coastguard Worker'''
156*61c4878aSAndroid Build Coastguard Worker
157*61c4878aSAndroid Build Coastguard WorkerCSV_DATABASE_5 = """\
158*61c4878aSAndroid Build Coastguard Worker00000001,1998-09-04,"Domain","hello"
159*61c4878aSAndroid Build Coastguard Worker00000002,          ,"","yes"
160*61c4878aSAndroid Build Coastguard Worker00000002,          ,"Domain","No!"
161*61c4878aSAndroid Build Coastguard Worker00000004,          ,"?","The answer is: %s"
162*61c4878aSAndroid Build Coastguard Worker"""
163*61c4878aSAndroid Build Coastguard Worker
164*61c4878aSAndroid Build Coastguard WorkerCSV_DATABASE_5_NO_DOMAIN = """\
165*61c4878aSAndroid Build Coastguard Worker00000001,1998-09-04,"hello"
166*61c4878aSAndroid Build Coastguard Worker00000002,          ,"yes"
167*61c4878aSAndroid Build Coastguard Worker00000002,          ,"No!"
168*61c4878aSAndroid Build Coastguard Worker00000004,          ,"The answer is: %s"
169*61c4878aSAndroid Build Coastguard Worker"""
170*61c4878aSAndroid Build Coastguard Worker
171*61c4878aSAndroid Build Coastguard WorkerCSV_DATABASE_6_DOMAIN_WHITESPACE = """\
172*61c4878aSAndroid Build Coastguard Worker00000001,2001-09-04,"Domain 1","hello"
173*61c4878aSAndroid Build Coastguard Worker00000002,          ,"\t","yes"
174*61c4878aSAndroid Build Coastguard Worker00000002,          ,"  Domain\t20\n","No!"
175*61c4878aSAndroid Build Coastguard Worker00000004,          ,"  ?   ","The answer is: %s"
176*61c4878aSAndroid Build Coastguard Worker"""
177*61c4878aSAndroid Build Coastguard Worker
178*61c4878aSAndroid Build Coastguard Worker
179*61c4878aSAndroid Build Coastguard Workerdef read_db_from_csv(csv_str: str) -> tokens.Database:
180*61c4878aSAndroid Build Coastguard Worker    with io.StringIO(csv_str) as csv_db:
181*61c4878aSAndroid Build Coastguard Worker        return tokens.Database(tokens.parse_csv(csv_db))
182*61c4878aSAndroid Build Coastguard Worker
183*61c4878aSAndroid Build Coastguard Worker
184*61c4878aSAndroid Build Coastguard Workerdef _entries(*strings: str) -> Iterator[tokens.TokenizedStringEntry]:
185*61c4878aSAndroid Build Coastguard Worker    for string in strings:
186*61c4878aSAndroid Build Coastguard Worker        yield tokens.TokenizedStringEntry(c_hash(string), string)
187*61c4878aSAndroid Build Coastguard Worker
188*61c4878aSAndroid Build Coastguard Worker
189*61c4878aSAndroid Build Coastguard Workerclass TokenDatabaseTest(unittest.TestCase):
190*61c4878aSAndroid Build Coastguard Worker    """Tests the token database class."""
191*61c4878aSAndroid Build Coastguard Worker
192*61c4878aSAndroid Build Coastguard Worker    def test_csv(self) -> None:
193*61c4878aSAndroid Build Coastguard Worker        db = read_db_from_csv(CSV_DATABASE)
194*61c4878aSAndroid Build Coastguard Worker        self.assertEqual(str(db), CSV_DATABASE)
195*61c4878aSAndroid Build Coastguard Worker
196*61c4878aSAndroid Build Coastguard Worker        db = read_db_from_csv(CSV_DATABASE_4)
197*61c4878aSAndroid Build Coastguard Worker        self.assertEqual(str(db), CSV_DATABASE_4)
198*61c4878aSAndroid Build Coastguard Worker
199*61c4878aSAndroid Build Coastguard Worker        db = read_db_from_csv('')
200*61c4878aSAndroid Build Coastguard Worker        self.assertEqual(str(db), '')
201*61c4878aSAndroid Build Coastguard Worker
202*61c4878aSAndroid Build Coastguard Worker    def test_csv_loads_domains(self) -> None:
203*61c4878aSAndroid Build Coastguard Worker        db = read_db_from_csv(CSV_DATABASE_5)
204*61c4878aSAndroid Build Coastguard Worker        self.assertEqual(
205*61c4878aSAndroid Build Coastguard Worker            db.token_to_entries[1],
206*61c4878aSAndroid Build Coastguard Worker            [
207*61c4878aSAndroid Build Coastguard Worker                tokens.TokenizedStringEntry(
208*61c4878aSAndroid Build Coastguard Worker                    token=1,
209*61c4878aSAndroid Build Coastguard Worker                    string='hello',
210*61c4878aSAndroid Build Coastguard Worker                    domain='Domain',
211*61c4878aSAndroid Build Coastguard Worker                    date_removed=datetime(1998, 9, 4),
212*61c4878aSAndroid Build Coastguard Worker                )
213*61c4878aSAndroid Build Coastguard Worker            ],
214*61c4878aSAndroid Build Coastguard Worker        )
215*61c4878aSAndroid Build Coastguard Worker        self.assertEqual(
216*61c4878aSAndroid Build Coastguard Worker            db.token_to_entries[2],
217*61c4878aSAndroid Build Coastguard Worker            [
218*61c4878aSAndroid Build Coastguard Worker                tokens.TokenizedStringEntry(token=2, string='yes', domain=''),
219*61c4878aSAndroid Build Coastguard Worker                tokens.TokenizedStringEntry(
220*61c4878aSAndroid Build Coastguard Worker                    token=2, string='No!', domain='Domain'
221*61c4878aSAndroid Build Coastguard Worker                ),
222*61c4878aSAndroid Build Coastguard Worker            ],
223*61c4878aSAndroid Build Coastguard Worker        )
224*61c4878aSAndroid Build Coastguard Worker        self.assertEqual(
225*61c4878aSAndroid Build Coastguard Worker            db.token_to_entries[4],
226*61c4878aSAndroid Build Coastguard Worker            [
227*61c4878aSAndroid Build Coastguard Worker                tokens.TokenizedStringEntry(
228*61c4878aSAndroid Build Coastguard Worker                    token=4, string='The answer is: %s', domain='?'
229*61c4878aSAndroid Build Coastguard Worker                ),
230*61c4878aSAndroid Build Coastguard Worker            ],
231*61c4878aSAndroid Build Coastguard Worker        )
232*61c4878aSAndroid Build Coastguard Worker
233*61c4878aSAndroid Build Coastguard Worker    def test_csv_legacy_no_domain_database(self) -> None:
234*61c4878aSAndroid Build Coastguard Worker        db = read_db_from_csv(CSV_DATABASE_5_NO_DOMAIN)
235*61c4878aSAndroid Build Coastguard Worker        self.assertEqual(
236*61c4878aSAndroid Build Coastguard Worker            db.token_to_entries[1],
237*61c4878aSAndroid Build Coastguard Worker            [
238*61c4878aSAndroid Build Coastguard Worker                tokens.TokenizedStringEntry(
239*61c4878aSAndroid Build Coastguard Worker                    token=1,
240*61c4878aSAndroid Build Coastguard Worker                    string='hello',
241*61c4878aSAndroid Build Coastguard Worker                    domain='',
242*61c4878aSAndroid Build Coastguard Worker                    date_removed=datetime(1998, 9, 4),
243*61c4878aSAndroid Build Coastguard Worker                )
244*61c4878aSAndroid Build Coastguard Worker            ],
245*61c4878aSAndroid Build Coastguard Worker        )
246*61c4878aSAndroid Build Coastguard Worker        self.assertEqual(
247*61c4878aSAndroid Build Coastguard Worker            db.token_to_entries[2],
248*61c4878aSAndroid Build Coastguard Worker            [
249*61c4878aSAndroid Build Coastguard Worker                tokens.TokenizedStringEntry(token=2, string='No!', domain=''),
250*61c4878aSAndroid Build Coastguard Worker                tokens.TokenizedStringEntry(token=2, string='yes', domain=''),
251*61c4878aSAndroid Build Coastguard Worker            ],
252*61c4878aSAndroid Build Coastguard Worker        )
253*61c4878aSAndroid Build Coastguard Worker        self.assertEqual(
254*61c4878aSAndroid Build Coastguard Worker            db.token_to_entries[4],
255*61c4878aSAndroid Build Coastguard Worker            [
256*61c4878aSAndroid Build Coastguard Worker                tokens.TokenizedStringEntry(
257*61c4878aSAndroid Build Coastguard Worker                    token=4, string='The answer is: %s', domain=''
258*61c4878aSAndroid Build Coastguard Worker                ),
259*61c4878aSAndroid Build Coastguard Worker            ],
260*61c4878aSAndroid Build Coastguard Worker        )
261*61c4878aSAndroid Build Coastguard Worker
262*61c4878aSAndroid Build Coastguard Worker    def test_csv_formatting(self) -> None:
263*61c4878aSAndroid Build Coastguard Worker        db = read_db_from_csv('')
264*61c4878aSAndroid Build Coastguard Worker        self.assertEqual(str(db), '')
265*61c4878aSAndroid Build Coastguard Worker
266*61c4878aSAndroid Build Coastguard Worker        db = read_db_from_csv('abc123,2048-04-01,Fake string\n')
267*61c4878aSAndroid Build Coastguard Worker        self.assertEqual(str(db), '00abc123,2048-04-01,"","Fake string"\n')
268*61c4878aSAndroid Build Coastguard Worker
269*61c4878aSAndroid Build Coastguard Worker        db = read_db_from_csv(
270*61c4878aSAndroid Build Coastguard Worker            '1,1990-01-01,"","Quotes"""\n' '0,1990-02-01,"Commas,"",,"\n'
271*61c4878aSAndroid Build Coastguard Worker        )
272*61c4878aSAndroid Build Coastguard Worker        self.assertEqual(
273*61c4878aSAndroid Build Coastguard Worker            str(db),
274*61c4878aSAndroid Build Coastguard Worker            (
275*61c4878aSAndroid Build Coastguard Worker                '00000000,1990-02-01,"","Commas,"",,"\n'
276*61c4878aSAndroid Build Coastguard Worker                '00000001,1990-01-01,"","Quotes"""\n'
277*61c4878aSAndroid Build Coastguard Worker            ),
278*61c4878aSAndroid Build Coastguard Worker        )
279*61c4878aSAndroid Build Coastguard Worker
280*61c4878aSAndroid Build Coastguard Worker    def test_bad_csv(self) -> None:
281*61c4878aSAndroid Build Coastguard Worker        with self.assertLogs(_LOG, logging.ERROR) as logs:
282*61c4878aSAndroid Build Coastguard Worker            db = read_db_from_csv(INVALID_CSV)
283*61c4878aSAndroid Build Coastguard Worker
284*61c4878aSAndroid Build Coastguard Worker        self.assertGreaterEqual(len(logs.output), 3)
285*61c4878aSAndroid Build Coastguard Worker        self.assertEqual(len(db.token_to_entries), 3)
286*61c4878aSAndroid Build Coastguard Worker
287*61c4878aSAndroid Build Coastguard Worker        self.assertEqual(db.token_to_entries[1][0].string, 'Whoa there!')
288*61c4878aSAndroid Build Coastguard Worker        self.assertFalse(db.token_to_entries[2])
289*61c4878aSAndroid Build Coastguard Worker        self.assertNotIn(2, db.token_to_entries)
290*61c4878aSAndroid Build Coastguard Worker        self.assertEqual(db.token_to_entries[3][0].string, "This one's OK")
291*61c4878aSAndroid Build Coastguard Worker        self.assertFalse(db.token_to_entries[4])
292*61c4878aSAndroid Build Coastguard Worker        self.assertNotIn(4, db.token_to_entries)
293*61c4878aSAndroid Build Coastguard Worker        self.assertEqual(db.token_to_entries[5][0].string, "I'm %s fine")
294*61c4878aSAndroid Build Coastguard Worker        self.assertFalse(db.token_to_entries[6])
295*61c4878aSAndroid Build Coastguard Worker        self.assertNotIn(6, db.token_to_entries)
296*61c4878aSAndroid Build Coastguard Worker
297*61c4878aSAndroid Build Coastguard Worker    def test_lookup(self) -> None:
298*61c4878aSAndroid Build Coastguard Worker        db = read_db_from_csv(CSV_DATABASE)
299*61c4878aSAndroid Build Coastguard Worker        self.assertSequenceEqual(db.token_to_entries[0x9999], [])
300*61c4878aSAndroid Build Coastguard Worker        self.assertNotIn(0x9999, db.token_to_entries)
301*61c4878aSAndroid Build Coastguard Worker        self.assertIsNone(db.token_to_entries.get(0x9999))
302*61c4878aSAndroid Build Coastguard Worker
303*61c4878aSAndroid Build Coastguard Worker        matches = db.token_to_entries[0x2E668CD6]
304*61c4878aSAndroid Build Coastguard Worker        self.assertEqual(len(matches), 1)
305*61c4878aSAndroid Build Coastguard Worker        jello = matches[0]
306*61c4878aSAndroid Build Coastguard Worker
307*61c4878aSAndroid Build Coastguard Worker        self.assertEqual(jello.token, 0x2E668CD6)
308*61c4878aSAndroid Build Coastguard Worker        self.assertEqual(jello.string, 'Jello, world!')
309*61c4878aSAndroid Build Coastguard Worker        self.assertEqual(jello.date_removed, datetime(2019, 6, 11))
310*61c4878aSAndroid Build Coastguard Worker
311*61c4878aSAndroid Build Coastguard Worker        matches = db.token_to_entries[0xE13B0F94]
312*61c4878aSAndroid Build Coastguard Worker        self.assertEqual(len(matches), 1)
313*61c4878aSAndroid Build Coastguard Worker        llu = matches[0]
314*61c4878aSAndroid Build Coastguard Worker        self.assertEqual(llu.token, 0xE13B0F94)
315*61c4878aSAndroid Build Coastguard Worker        self.assertEqual(llu.string, '%llu')
316*61c4878aSAndroid Build Coastguard Worker        self.assertIsNone(llu.date_removed)
317*61c4878aSAndroid Build Coastguard Worker
318*61c4878aSAndroid Build Coastguard Worker        (answer,) = db.token_to_entries[0x141C35D5]
319*61c4878aSAndroid Build Coastguard Worker        self.assertEqual(answer.string, 'The answer: "%s"')
320*61c4878aSAndroid Build Coastguard Worker
321*61c4878aSAndroid Build Coastguard Worker    def test_domains(self) -> None:
322*61c4878aSAndroid Build Coastguard Worker        """Tests the domains mapping."""
323*61c4878aSAndroid Build Coastguard Worker        db = tokens.Database(
324*61c4878aSAndroid Build Coastguard Worker            [
325*61c4878aSAndroid Build Coastguard Worker                tokens.TokenizedStringEntry(1, 'one', 'D1'),
326*61c4878aSAndroid Build Coastguard Worker                tokens.TokenizedStringEntry(2, 'two', 'D1'),
327*61c4878aSAndroid Build Coastguard Worker                tokens.TokenizedStringEntry(1, 'one', 'D2'),
328*61c4878aSAndroid Build Coastguard Worker                tokens.TokenizedStringEntry(1, 'one!', 'D3', datetime.min),
329*61c4878aSAndroid Build Coastguard Worker                tokens.TokenizedStringEntry(3, 'zzz', 'D1'),
330*61c4878aSAndroid Build Coastguard Worker                tokens.TokenizedStringEntry(3, 'three', 'D1', datetime.min),
331*61c4878aSAndroid Build Coastguard Worker                tokens.TokenizedStringEntry(3, 'three', 'D1'),
332*61c4878aSAndroid Build Coastguard Worker                tokens.TokenizedStringEntry(3, 'zzzz', 'D1'),
333*61c4878aSAndroid Build Coastguard Worker            ]
334*61c4878aSAndroid Build Coastguard Worker        )
335*61c4878aSAndroid Build Coastguard Worker        self.assertEqual(db.domains.keys(), {'D1', 'D2', 'D3'})
336*61c4878aSAndroid Build Coastguard Worker        self.assertEqual(
337*61c4878aSAndroid Build Coastguard Worker            db.domains['D1'],
338*61c4878aSAndroid Build Coastguard Worker            {
339*61c4878aSAndroid Build Coastguard Worker                1: [tokens.TokenizedStringEntry(1, 'one', 'D1')],
340*61c4878aSAndroid Build Coastguard Worker                2: [tokens.TokenizedStringEntry(2, 'two', 'D1')],
341*61c4878aSAndroid Build Coastguard Worker                3: [
342*61c4878aSAndroid Build Coastguard Worker                    tokens.TokenizedStringEntry(3, 'three', 'D1'),
343*61c4878aSAndroid Build Coastguard Worker                    tokens.TokenizedStringEntry(3, 'zzz', 'D1'),
344*61c4878aSAndroid Build Coastguard Worker                    tokens.TokenizedStringEntry(3, 'zzzz', 'D1'),
345*61c4878aSAndroid Build Coastguard Worker                ],
346*61c4878aSAndroid Build Coastguard Worker            },
347*61c4878aSAndroid Build Coastguard Worker        )
348*61c4878aSAndroid Build Coastguard Worker        self.assertEqual(
349*61c4878aSAndroid Build Coastguard Worker            db.domains['D2'],
350*61c4878aSAndroid Build Coastguard Worker            {
351*61c4878aSAndroid Build Coastguard Worker                1: [tokens.TokenizedStringEntry(1, 'one', 'D2')],
352*61c4878aSAndroid Build Coastguard Worker            },
353*61c4878aSAndroid Build Coastguard Worker        )
354*61c4878aSAndroid Build Coastguard Worker        self.assertEqual(
355*61c4878aSAndroid Build Coastguard Worker            db.domains['D3'],
356*61c4878aSAndroid Build Coastguard Worker            {
357*61c4878aSAndroid Build Coastguard Worker                1: [tokens.TokenizedStringEntry(1, 'one!', 'D3', datetime.min)],
358*61c4878aSAndroid Build Coastguard Worker            },
359*61c4878aSAndroid Build Coastguard Worker        )
360*61c4878aSAndroid Build Coastguard Worker        self.assertEqual(db.domains['not a domain!'], {})
361*61c4878aSAndroid Build Coastguard Worker        self.assertNotIn('not a domain!', db.domains)
362*61c4878aSAndroid Build Coastguard Worker        self.assertIsNone(db.domains.get('not a domain'))
363*61c4878aSAndroid Build Coastguard Worker
364*61c4878aSAndroid Build Coastguard Worker    def test_collisions(self) -> None:
365*61c4878aSAndroid Build Coastguard Worker        hash_1 = tokens.c_hash('o000', 96)
366*61c4878aSAndroid Build Coastguard Worker        hash_2 = tokens.c_hash('0Q1Q', 96)
367*61c4878aSAndroid Build Coastguard Worker        self.assertEqual(hash_1, hash_2)
368*61c4878aSAndroid Build Coastguard Worker
369*61c4878aSAndroid Build Coastguard Worker        db = tokens.Database.from_strings(['o000', '0Q1Q'])
370*61c4878aSAndroid Build Coastguard Worker
371*61c4878aSAndroid Build Coastguard Worker        self.assertEqual(len(db.token_to_entries[hash_1]), 2)
372*61c4878aSAndroid Build Coastguard Worker        self.assertCountEqual(
373*61c4878aSAndroid Build Coastguard Worker            [entry.string for entry in db.token_to_entries[hash_1]],
374*61c4878aSAndroid Build Coastguard Worker            ['o000', '0Q1Q'],
375*61c4878aSAndroid Build Coastguard Worker        )
376*61c4878aSAndroid Build Coastguard Worker
377*61c4878aSAndroid Build Coastguard Worker    def test_purge(self) -> None:
378*61c4878aSAndroid Build Coastguard Worker        db = read_db_from_csv(CSV_DATABASE)
379*61c4878aSAndroid Build Coastguard Worker        original_length = len(db.token_to_entries)
380*61c4878aSAndroid Build Coastguard Worker
381*61c4878aSAndroid Build Coastguard Worker        self.assertEqual(db.token_to_entries[0][0].string, '')
382*61c4878aSAndroid Build Coastguard Worker        self.assertEqual(db.token_to_entries[0x31631781][0].string, '%d')
383*61c4878aSAndroid Build Coastguard Worker        self.assertEqual(
384*61c4878aSAndroid Build Coastguard Worker            db.token_to_entries[0x2E668CD6][0].string, 'Jello, world!'
385*61c4878aSAndroid Build Coastguard Worker        )
386*61c4878aSAndroid Build Coastguard Worker        self.assertEqual(db.token_to_entries[0xB3653E13][0].string, 'Jello!')
387*61c4878aSAndroid Build Coastguard Worker        self.assertEqual(db.token_to_entries[0xCC6D3131][0].string, 'Jello?')
388*61c4878aSAndroid Build Coastguard Worker        self.assertEqual(
389*61c4878aSAndroid Build Coastguard Worker            db.token_to_entries[0xE65AEFEF][0].string, "Won't fit : %s%d"
390*61c4878aSAndroid Build Coastguard Worker        )
391*61c4878aSAndroid Build Coastguard Worker
392*61c4878aSAndroid Build Coastguard Worker        db.purge(datetime(2019, 6, 11))
393*61c4878aSAndroid Build Coastguard Worker        self.assertLess(len(db.token_to_entries), original_length)
394*61c4878aSAndroid Build Coastguard Worker        self.assertEqual(len(db.token_to_entries), len(db.entries()))
395*61c4878aSAndroid Build Coastguard Worker
396*61c4878aSAndroid Build Coastguard Worker        self.assertFalse(db.token_to_entries[0])
397*61c4878aSAndroid Build Coastguard Worker        self.assertNotIn(0, db.token_to_entries)
398*61c4878aSAndroid Build Coastguard Worker        self.assertSequenceEqual(db.token_to_entries[0], [])
399*61c4878aSAndroid Build Coastguard Worker        self.assertEqual(db.token_to_entries[0x31631781][0].string, '%d')
400*61c4878aSAndroid Build Coastguard Worker        self.assertFalse(db.token_to_entries[0x2E668CD6])
401*61c4878aSAndroid Build Coastguard Worker        self.assertNotIn(0x2E668CD6, db.token_to_entries)
402*61c4878aSAndroid Build Coastguard Worker        self.assertEqual(db.token_to_entries[0xB3653E13][0].string, 'Jello!')
403*61c4878aSAndroid Build Coastguard Worker        self.assertEqual(db.token_to_entries[0xCC6D3131][0].string, 'Jello?')
404*61c4878aSAndroid Build Coastguard Worker        self.assertFalse(db.token_to_entries[0xE65AEFEF])
405*61c4878aSAndroid Build Coastguard Worker        self.assertNotIn(0xE65AEFEF, db.token_to_entries)
406*61c4878aSAndroid Build Coastguard Worker
407*61c4878aSAndroid Build Coastguard Worker    def test_merge(self) -> None:
408*61c4878aSAndroid Build Coastguard Worker        """Tests the tokens.Database merge method."""
409*61c4878aSAndroid Build Coastguard Worker
410*61c4878aSAndroid Build Coastguard Worker        db = tokens.Database()
411*61c4878aSAndroid Build Coastguard Worker
412*61c4878aSAndroid Build Coastguard Worker        # Test basic merging into an empty database.
413*61c4878aSAndroid Build Coastguard Worker        db.merge(
414*61c4878aSAndroid Build Coastguard Worker            tokens.Database(
415*61c4878aSAndroid Build Coastguard Worker                [
416*61c4878aSAndroid Build Coastguard Worker                    tokens.TokenizedStringEntry(
417*61c4878aSAndroid Build Coastguard Worker                        1, 'one', date_removed=datetime.min
418*61c4878aSAndroid Build Coastguard Worker                    ),
419*61c4878aSAndroid Build Coastguard Worker                    tokens.TokenizedStringEntry(
420*61c4878aSAndroid Build Coastguard Worker                        2, 'two', 'domain', date_removed=datetime.min
421*61c4878aSAndroid Build Coastguard Worker                    ),
422*61c4878aSAndroid Build Coastguard Worker                ]
423*61c4878aSAndroid Build Coastguard Worker            )
424*61c4878aSAndroid Build Coastguard Worker        )
425*61c4878aSAndroid Build Coastguard Worker        self.assertEqual({str(e) for e in db.entries()}, {'one', 'two'})
426*61c4878aSAndroid Build Coastguard Worker        self.assertEqual(db.token_to_entries[1][0].date_removed, datetime.min)
427*61c4878aSAndroid Build Coastguard Worker        self.assertEqual(db.token_to_entries[2][0].date_removed, datetime.min)
428*61c4878aSAndroid Build Coastguard Worker
429*61c4878aSAndroid Build Coastguard Worker        # Test merging in an entry with a removal date.
430*61c4878aSAndroid Build Coastguard Worker        db.merge(
431*61c4878aSAndroid Build Coastguard Worker            tokens.Database(
432*61c4878aSAndroid Build Coastguard Worker                [
433*61c4878aSAndroid Build Coastguard Worker                    tokens.TokenizedStringEntry(3, 'three'),
434*61c4878aSAndroid Build Coastguard Worker                    tokens.TokenizedStringEntry(
435*61c4878aSAndroid Build Coastguard Worker                        4, 'four', date_removed=datetime.min
436*61c4878aSAndroid Build Coastguard Worker                    ),
437*61c4878aSAndroid Build Coastguard Worker                ]
438*61c4878aSAndroid Build Coastguard Worker            )
439*61c4878aSAndroid Build Coastguard Worker        )
440*61c4878aSAndroid Build Coastguard Worker        self.assertEqual(
441*61c4878aSAndroid Build Coastguard Worker            {str(e) for e in db.entries()}, {'one', 'two', 'three', 'four'}
442*61c4878aSAndroid Build Coastguard Worker        )
443*61c4878aSAndroid Build Coastguard Worker        self.assertIsNone(db.token_to_entries[3][0].date_removed)
444*61c4878aSAndroid Build Coastguard Worker        self.assertEqual(db.token_to_entries[4][0].date_removed, datetime.min)
445*61c4878aSAndroid Build Coastguard Worker
446*61c4878aSAndroid Build Coastguard Worker        # Test merging in one entry.
447*61c4878aSAndroid Build Coastguard Worker        db.merge(
448*61c4878aSAndroid Build Coastguard Worker            tokens.Database(
449*61c4878aSAndroid Build Coastguard Worker                [
450*61c4878aSAndroid Build Coastguard Worker                    tokens.TokenizedStringEntry(5, 'five'),
451*61c4878aSAndroid Build Coastguard Worker                ]
452*61c4878aSAndroid Build Coastguard Worker            )
453*61c4878aSAndroid Build Coastguard Worker        )
454*61c4878aSAndroid Build Coastguard Worker        self.assertEqual(
455*61c4878aSAndroid Build Coastguard Worker            {str(e) for e in db.entries()},
456*61c4878aSAndroid Build Coastguard Worker            {'one', 'two', 'three', 'four', 'five'},
457*61c4878aSAndroid Build Coastguard Worker        )
458*61c4878aSAndroid Build Coastguard Worker        self.assertEqual(db.token_to_entries[4][0].date_removed, datetime.min)
459*61c4878aSAndroid Build Coastguard Worker        self.assertIsNone(db.token_to_entries[5][0].date_removed)
460*61c4878aSAndroid Build Coastguard Worker
461*61c4878aSAndroid Build Coastguard Worker        # Merge in repeated entries different removal dates.
462*61c4878aSAndroid Build Coastguard Worker        db.merge(
463*61c4878aSAndroid Build Coastguard Worker            tokens.Database(
464*61c4878aSAndroid Build Coastguard Worker                [
465*61c4878aSAndroid Build Coastguard Worker                    tokens.TokenizedStringEntry(
466*61c4878aSAndroid Build Coastguard Worker                        4, 'four', date_removed=datetime.max
467*61c4878aSAndroid Build Coastguard Worker                    ),
468*61c4878aSAndroid Build Coastguard Worker                    tokens.TokenizedStringEntry(
469*61c4878aSAndroid Build Coastguard Worker                        5, 'five', date_removed=datetime.max
470*61c4878aSAndroid Build Coastguard Worker                    ),
471*61c4878aSAndroid Build Coastguard Worker                ]
472*61c4878aSAndroid Build Coastguard Worker            )
473*61c4878aSAndroid Build Coastguard Worker        )
474*61c4878aSAndroid Build Coastguard Worker        self.assertEqual(len(db.entries()), 5)
475*61c4878aSAndroid Build Coastguard Worker        self.assertEqual(
476*61c4878aSAndroid Build Coastguard Worker            {str(e) for e in db.entries()},
477*61c4878aSAndroid Build Coastguard Worker            {'one', 'two', 'three', 'four', 'five'},
478*61c4878aSAndroid Build Coastguard Worker        )
479*61c4878aSAndroid Build Coastguard Worker        self.assertEqual(db.token_to_entries[4][0].date_removed, datetime.max)
480*61c4878aSAndroid Build Coastguard Worker        self.assertIsNone(db.token_to_entries[5][0].date_removed)
481*61c4878aSAndroid Build Coastguard Worker
482*61c4878aSAndroid Build Coastguard Worker        # Merge in the same repeated entries now without removal dates.
483*61c4878aSAndroid Build Coastguard Worker        db.merge(
484*61c4878aSAndroid Build Coastguard Worker            tokens.Database(
485*61c4878aSAndroid Build Coastguard Worker                [
486*61c4878aSAndroid Build Coastguard Worker                    tokens.TokenizedStringEntry(4, 'four'),
487*61c4878aSAndroid Build Coastguard Worker                    tokens.TokenizedStringEntry(5, 'five'),
488*61c4878aSAndroid Build Coastguard Worker                ]
489*61c4878aSAndroid Build Coastguard Worker            )
490*61c4878aSAndroid Build Coastguard Worker        )
491*61c4878aSAndroid Build Coastguard Worker        self.assertEqual(len(db.entries()), 5)
492*61c4878aSAndroid Build Coastguard Worker        self.assertEqual(
493*61c4878aSAndroid Build Coastguard Worker            {str(e) for e in db.entries()},
494*61c4878aSAndroid Build Coastguard Worker            {'one', 'two', 'three', 'four', 'five'},
495*61c4878aSAndroid Build Coastguard Worker        )
496*61c4878aSAndroid Build Coastguard Worker        self.assertIsNone(db.token_to_entries[4][0].date_removed)
497*61c4878aSAndroid Build Coastguard Worker        self.assertIsNone(db.token_to_entries[5][0].date_removed)
498*61c4878aSAndroid Build Coastguard Worker
499*61c4878aSAndroid Build Coastguard Worker        # Merge in an empty databsse.
500*61c4878aSAndroid Build Coastguard Worker        db.merge(tokens.Database([]))
501*61c4878aSAndroid Build Coastguard Worker        self.assertEqual(
502*61c4878aSAndroid Build Coastguard Worker            {str(e) for e in db.entries()},
503*61c4878aSAndroid Build Coastguard Worker            {'one', 'two', 'three', 'four', 'five'},
504*61c4878aSAndroid Build Coastguard Worker        )
505*61c4878aSAndroid Build Coastguard Worker
506*61c4878aSAndroid Build Coastguard Worker    def test_merge_multiple_datbases_in_one_call(self) -> None:
507*61c4878aSAndroid Build Coastguard Worker        """Tests the merge and merged methods with multiple databases."""
508*61c4878aSAndroid Build Coastguard Worker        db = tokens.Database.merged(
509*61c4878aSAndroid Build Coastguard Worker            tokens.Database(
510*61c4878aSAndroid Build Coastguard Worker                [
511*61c4878aSAndroid Build Coastguard Worker                    tokens.TokenizedStringEntry(
512*61c4878aSAndroid Build Coastguard Worker                        1, 'one', date_removed=datetime.max
513*61c4878aSAndroid Build Coastguard Worker                    )
514*61c4878aSAndroid Build Coastguard Worker                ]
515*61c4878aSAndroid Build Coastguard Worker            ),
516*61c4878aSAndroid Build Coastguard Worker            tokens.Database(
517*61c4878aSAndroid Build Coastguard Worker                [
518*61c4878aSAndroid Build Coastguard Worker                    tokens.TokenizedStringEntry(
519*61c4878aSAndroid Build Coastguard Worker                        2, 'two', date_removed=datetime.min
520*61c4878aSAndroid Build Coastguard Worker                    )
521*61c4878aSAndroid Build Coastguard Worker                ]
522*61c4878aSAndroid Build Coastguard Worker            ),
523*61c4878aSAndroid Build Coastguard Worker            tokens.Database(
524*61c4878aSAndroid Build Coastguard Worker                [
525*61c4878aSAndroid Build Coastguard Worker                    tokens.TokenizedStringEntry(
526*61c4878aSAndroid Build Coastguard Worker                        1, 'one', date_removed=datetime.min
527*61c4878aSAndroid Build Coastguard Worker                    )
528*61c4878aSAndroid Build Coastguard Worker                ]
529*61c4878aSAndroid Build Coastguard Worker            ),
530*61c4878aSAndroid Build Coastguard Worker        )
531*61c4878aSAndroid Build Coastguard Worker        self.assertEqual({str(e) for e in db.entries()}, {'one', 'two'})
532*61c4878aSAndroid Build Coastguard Worker
533*61c4878aSAndroid Build Coastguard Worker        db.merge(
534*61c4878aSAndroid Build Coastguard Worker            tokens.Database(
535*61c4878aSAndroid Build Coastguard Worker                [
536*61c4878aSAndroid Build Coastguard Worker                    tokens.TokenizedStringEntry(
537*61c4878aSAndroid Build Coastguard Worker                        4, 'four', date_removed=datetime.max
538*61c4878aSAndroid Build Coastguard Worker                    )
539*61c4878aSAndroid Build Coastguard Worker                ]
540*61c4878aSAndroid Build Coastguard Worker            ),
541*61c4878aSAndroid Build Coastguard Worker            tokens.Database(
542*61c4878aSAndroid Build Coastguard Worker                [
543*61c4878aSAndroid Build Coastguard Worker                    tokens.TokenizedStringEntry(
544*61c4878aSAndroid Build Coastguard Worker                        2, 'two', date_removed=datetime.max
545*61c4878aSAndroid Build Coastguard Worker                    )
546*61c4878aSAndroid Build Coastguard Worker                ]
547*61c4878aSAndroid Build Coastguard Worker            ),
548*61c4878aSAndroid Build Coastguard Worker            tokens.Database(
549*61c4878aSAndroid Build Coastguard Worker                [
550*61c4878aSAndroid Build Coastguard Worker                    tokens.TokenizedStringEntry(
551*61c4878aSAndroid Build Coastguard Worker                        3, 'three', date_removed=datetime.min
552*61c4878aSAndroid Build Coastguard Worker                    )
553*61c4878aSAndroid Build Coastguard Worker                ]
554*61c4878aSAndroid Build Coastguard Worker            ),
555*61c4878aSAndroid Build Coastguard Worker        )
556*61c4878aSAndroid Build Coastguard Worker        self.assertEqual(
557*61c4878aSAndroid Build Coastguard Worker            {str(e) for e in db.entries()}, {'one', 'two', 'three', 'four'}
558*61c4878aSAndroid Build Coastguard Worker        )
559*61c4878aSAndroid Build Coastguard Worker
560*61c4878aSAndroid Build Coastguard Worker    def test_merge_same_tokens_different_domains(self) -> None:
561*61c4878aSAndroid Build Coastguard Worker        db = tokens.Database.merged(
562*61c4878aSAndroid Build Coastguard Worker            tokens.Database([tokens.TokenizedStringEntry(2, 'two', 'D1')]),
563*61c4878aSAndroid Build Coastguard Worker            tokens.Database([tokens.TokenizedStringEntry(1, 'one', 'D2')]),
564*61c4878aSAndroid Build Coastguard Worker            tokens.Database([tokens.TokenizedStringEntry(1, 'one', 'D2')]),
565*61c4878aSAndroid Build Coastguard Worker            tokens.Database([tokens.TokenizedStringEntry(1, 'one!', 'D3')]),
566*61c4878aSAndroid Build Coastguard Worker            tokens.Database([tokens.TokenizedStringEntry(1, 'one', 'D1')]),
567*61c4878aSAndroid Build Coastguard Worker        )
568*61c4878aSAndroid Build Coastguard Worker        self.assertEqual(
569*61c4878aSAndroid Build Coastguard Worker            sorted(db.entries()),
570*61c4878aSAndroid Build Coastguard Worker            sorted(
571*61c4878aSAndroid Build Coastguard Worker                [
572*61c4878aSAndroid Build Coastguard Worker                    tokens.TokenizedStringEntry(1, 'one', 'D1'),
573*61c4878aSAndroid Build Coastguard Worker                    tokens.TokenizedStringEntry(2, 'two', 'D1'),
574*61c4878aSAndroid Build Coastguard Worker                    tokens.TokenizedStringEntry(1, 'one', 'D2'),
575*61c4878aSAndroid Build Coastguard Worker                    tokens.TokenizedStringEntry(1, 'one!', 'D3'),
576*61c4878aSAndroid Build Coastguard Worker                ]
577*61c4878aSAndroid Build Coastguard Worker            ),
578*61c4878aSAndroid Build Coastguard Worker        )
579*61c4878aSAndroid Build Coastguard Worker        self.assertEqual(
580*61c4878aSAndroid Build Coastguard Worker            db.token_to_entries[1],
581*61c4878aSAndroid Build Coastguard Worker            [
582*61c4878aSAndroid Build Coastguard Worker                tokens.TokenizedStringEntry(1, 'one', 'D1'),
583*61c4878aSAndroid Build Coastguard Worker                tokens.TokenizedStringEntry(1, 'one', 'D2'),
584*61c4878aSAndroid Build Coastguard Worker                tokens.TokenizedStringEntry(1, 'one!', 'D3'),
585*61c4878aSAndroid Build Coastguard Worker            ],
586*61c4878aSAndroid Build Coastguard Worker        )
587*61c4878aSAndroid Build Coastguard Worker
588*61c4878aSAndroid Build Coastguard Worker    def test_entry_counts(self) -> None:
589*61c4878aSAndroid Build Coastguard Worker        self.assertEqual(len(CSV_DATABASE.splitlines()), 16)
590*61c4878aSAndroid Build Coastguard Worker
591*61c4878aSAndroid Build Coastguard Worker        db = read_db_from_csv(CSV_DATABASE)
592*61c4878aSAndroid Build Coastguard Worker        self.assertEqual(len(db.entries()), 16)
593*61c4878aSAndroid Build Coastguard Worker        self.assertEqual(len(db.token_to_entries), 16)
594*61c4878aSAndroid Build Coastguard Worker
595*61c4878aSAndroid Build Coastguard Worker        # Add two strings with the same hash.
596*61c4878aSAndroid Build Coastguard Worker        db.add(_entries('o000', '0Q1Q'))
597*61c4878aSAndroid Build Coastguard Worker
598*61c4878aSAndroid Build Coastguard Worker        self.assertEqual(len(db.entries()), 18)
599*61c4878aSAndroid Build Coastguard Worker        self.assertEqual(len(db.token_to_entries), 17)
600*61c4878aSAndroid Build Coastguard Worker
601*61c4878aSAndroid Build Coastguard Worker    def test_mark_removed(self) -> None:
602*61c4878aSAndroid Build Coastguard Worker        """Tests that date_removed field is set by mark_removed."""
603*61c4878aSAndroid Build Coastguard Worker        db = tokens.Database.from_strings(
604*61c4878aSAndroid Build Coastguard Worker            ['MILK', 'apples', 'oranges', 'CHEESE', 'pears']
605*61c4878aSAndroid Build Coastguard Worker        )
606*61c4878aSAndroid Build Coastguard Worker
607*61c4878aSAndroid Build Coastguard Worker        self.assertTrue(
608*61c4878aSAndroid Build Coastguard Worker            all(entry.date_removed is None for entry in db.entries())
609*61c4878aSAndroid Build Coastguard Worker        )
610*61c4878aSAndroid Build Coastguard Worker        date_1 = datetime(1, 2, 3)
611*61c4878aSAndroid Build Coastguard Worker
612*61c4878aSAndroid Build Coastguard Worker        db.mark_removed(_entries('apples', 'oranges', 'pears'), date_1)
613*61c4878aSAndroid Build Coastguard Worker
614*61c4878aSAndroid Build Coastguard Worker        self.assertEqual(
615*61c4878aSAndroid Build Coastguard Worker            db.token_to_entries[c_hash('MILK')][0].date_removed, date_1
616*61c4878aSAndroid Build Coastguard Worker        )
617*61c4878aSAndroid Build Coastguard Worker        self.assertEqual(
618*61c4878aSAndroid Build Coastguard Worker            db.token_to_entries[c_hash('CHEESE')][0].date_removed, date_1
619*61c4878aSAndroid Build Coastguard Worker        )
620*61c4878aSAndroid Build Coastguard Worker
621*61c4878aSAndroid Build Coastguard Worker        now = datetime.now()
622*61c4878aSAndroid Build Coastguard Worker        db.mark_removed(_entries('MILK', 'CHEESE', 'pears'))
623*61c4878aSAndroid Build Coastguard Worker
624*61c4878aSAndroid Build Coastguard Worker        # New strings are not added or re-added in mark_removed().
625*61c4878aSAndroid Build Coastguard Worker        milk_date = db.token_to_entries[c_hash('MILK')][0].date_removed
626*61c4878aSAndroid Build Coastguard Worker        assert milk_date is not None
627*61c4878aSAndroid Build Coastguard Worker        self.assertGreaterEqual(milk_date, date_1)
628*61c4878aSAndroid Build Coastguard Worker
629*61c4878aSAndroid Build Coastguard Worker        cheese_date = db.token_to_entries[c_hash('CHEESE')][0].date_removed
630*61c4878aSAndroid Build Coastguard Worker        assert cheese_date is not None
631*61c4878aSAndroid Build Coastguard Worker        self.assertGreaterEqual(cheese_date, date_1)
632*61c4878aSAndroid Build Coastguard Worker
633*61c4878aSAndroid Build Coastguard Worker        # These strings were removed.
634*61c4878aSAndroid Build Coastguard Worker        apples_date = db.token_to_entries[c_hash('apples')][0].date_removed
635*61c4878aSAndroid Build Coastguard Worker        assert apples_date is not None
636*61c4878aSAndroid Build Coastguard Worker        self.assertGreaterEqual(apples_date, now)
637*61c4878aSAndroid Build Coastguard Worker
638*61c4878aSAndroid Build Coastguard Worker        oranges_date = db.token_to_entries[c_hash('oranges')][0].date_removed
639*61c4878aSAndroid Build Coastguard Worker        assert oranges_date is not None
640*61c4878aSAndroid Build Coastguard Worker        self.assertGreaterEqual(oranges_date, now)
641*61c4878aSAndroid Build Coastguard Worker        self.assertIsNone(db.token_to_entries[c_hash('pears')][0].date_removed)
642*61c4878aSAndroid Build Coastguard Worker
643*61c4878aSAndroid Build Coastguard Worker    def test_add(self) -> None:
644*61c4878aSAndroid Build Coastguard Worker        db = tokens.Database()
645*61c4878aSAndroid Build Coastguard Worker        db.add(_entries('MILK', 'apples'))
646*61c4878aSAndroid Build Coastguard Worker        self.assertEqual({e.string for e in db.entries()}, {'MILK', 'apples'})
647*61c4878aSAndroid Build Coastguard Worker
648*61c4878aSAndroid Build Coastguard Worker        db.add(_entries('oranges', 'CHEESE', 'pears'))
649*61c4878aSAndroid Build Coastguard Worker        self.assertEqual(len(db.entries()), 5)
650*61c4878aSAndroid Build Coastguard Worker
651*61c4878aSAndroid Build Coastguard Worker        db.add(_entries('MILK', 'apples', 'only this one is new'))
652*61c4878aSAndroid Build Coastguard Worker        self.assertEqual(len(db.entries()), 6)
653*61c4878aSAndroid Build Coastguard Worker
654*61c4878aSAndroid Build Coastguard Worker        db.add(_entries('MILK'))
655*61c4878aSAndroid Build Coastguard Worker        self.assertEqual(
656*61c4878aSAndroid Build Coastguard Worker            {e.string for e in db.entries()},
657*61c4878aSAndroid Build Coastguard Worker            {
658*61c4878aSAndroid Build Coastguard Worker                'MILK',
659*61c4878aSAndroid Build Coastguard Worker                'apples',
660*61c4878aSAndroid Build Coastguard Worker                'oranges',
661*61c4878aSAndroid Build Coastguard Worker                'CHEESE',
662*61c4878aSAndroid Build Coastguard Worker                'pears',
663*61c4878aSAndroid Build Coastguard Worker                'only this one is new',
664*61c4878aSAndroid Build Coastguard Worker            },
665*61c4878aSAndroid Build Coastguard Worker        )
666*61c4878aSAndroid Build Coastguard Worker
667*61c4878aSAndroid Build Coastguard Worker    def test_add_duplicate_entries_keeps_none_as_removal_date(self) -> None:
668*61c4878aSAndroid Build Coastguard Worker        db = tokens.Database()
669*61c4878aSAndroid Build Coastguard Worker        db.add(
670*61c4878aSAndroid Build Coastguard Worker            [
671*61c4878aSAndroid Build Coastguard Worker                tokens.TokenizedStringEntry(1, 'Spam', '', datetime.now()),
672*61c4878aSAndroid Build Coastguard Worker                tokens.TokenizedStringEntry(1, 'Spam', ''),
673*61c4878aSAndroid Build Coastguard Worker                tokens.TokenizedStringEntry(1, 'Spam', '', datetime.min),
674*61c4878aSAndroid Build Coastguard Worker            ]
675*61c4878aSAndroid Build Coastguard Worker        )
676*61c4878aSAndroid Build Coastguard Worker        self.assertEqual(len(db), 1)
677*61c4878aSAndroid Build Coastguard Worker        self.assertIsNone(db.token_to_entries[1][0].date_removed)
678*61c4878aSAndroid Build Coastguard Worker
679*61c4878aSAndroid Build Coastguard Worker    def test_add_duplicate_entries_keeps_newest_removal_date(self) -> None:
680*61c4878aSAndroid Build Coastguard Worker        db = tokens.Database()
681*61c4878aSAndroid Build Coastguard Worker        db.add(
682*61c4878aSAndroid Build Coastguard Worker            [
683*61c4878aSAndroid Build Coastguard Worker                tokens.TokenizedStringEntry(1, 'Spam', '', datetime.now()),
684*61c4878aSAndroid Build Coastguard Worker                tokens.TokenizedStringEntry(1, 'Spam', '', datetime.max),
685*61c4878aSAndroid Build Coastguard Worker                tokens.TokenizedStringEntry(1, 'Spam', '', datetime.now()),
686*61c4878aSAndroid Build Coastguard Worker                tokens.TokenizedStringEntry(1, 'Spam', '', datetime.min),
687*61c4878aSAndroid Build Coastguard Worker            ]
688*61c4878aSAndroid Build Coastguard Worker        )
689*61c4878aSAndroid Build Coastguard Worker        self.assertEqual(len(db), 1)
690*61c4878aSAndroid Build Coastguard Worker        self.assertEqual(db.token_to_entries[1][0].date_removed, datetime.max)
691*61c4878aSAndroid Build Coastguard Worker
692*61c4878aSAndroid Build Coastguard Worker    def test_difference(self) -> None:
693*61c4878aSAndroid Build Coastguard Worker        first = tokens.Database(
694*61c4878aSAndroid Build Coastguard Worker            [
695*61c4878aSAndroid Build Coastguard Worker                tokens.TokenizedStringEntry(1, 'one'),
696*61c4878aSAndroid Build Coastguard Worker                tokens.TokenizedStringEntry(2, 'two'),
697*61c4878aSAndroid Build Coastguard Worker                tokens.TokenizedStringEntry(3, 'three'),
698*61c4878aSAndroid Build Coastguard Worker            ]
699*61c4878aSAndroid Build Coastguard Worker        )
700*61c4878aSAndroid Build Coastguard Worker        second = tokens.Database(
701*61c4878aSAndroid Build Coastguard Worker            [
702*61c4878aSAndroid Build Coastguard Worker                tokens.TokenizedStringEntry(1, 'one'),
703*61c4878aSAndroid Build Coastguard Worker                tokens.TokenizedStringEntry(3, 'three'),
704*61c4878aSAndroid Build Coastguard Worker                tokens.TokenizedStringEntry(4, 'four'),
705*61c4878aSAndroid Build Coastguard Worker            ]
706*61c4878aSAndroid Build Coastguard Worker        )
707*61c4878aSAndroid Build Coastguard Worker        difference = first.difference(second)
708*61c4878aSAndroid Build Coastguard Worker        self.assertEqual({e.string for e in difference.entries()}, {'two'})
709*61c4878aSAndroid Build Coastguard Worker
710*61c4878aSAndroid Build Coastguard Worker    def test_tokens_by_domain(self) -> None:
711*61c4878aSAndroid Build Coastguard Worker        db = read_db_from_csv(CSV_DATABASE_2)
712*61c4878aSAndroid Build Coastguard Worker        self.assertEqual(db.domains.keys(), {''})
713*61c4878aSAndroid Build Coastguard Worker        db = read_db_from_csv(CSV_DATABASE_3)
714*61c4878aSAndroid Build Coastguard Worker        self.assertEqual(db.domains.keys(), {'TEST_DOMAIN'})
715*61c4878aSAndroid Build Coastguard Worker        db = read_db_from_csv(CSV_DATABASE_4)
716*61c4878aSAndroid Build Coastguard Worker        self.assertEqual(db.domains.keys(), {'', 'TEST_DOMAIN'})
717*61c4878aSAndroid Build Coastguard Worker        db = read_db_from_csv(CSV_DATABASE_5)
718*61c4878aSAndroid Build Coastguard Worker        self.assertEqual(db.domains.keys(), {'', '?', 'Domain'})
719*61c4878aSAndroid Build Coastguard Worker
720*61c4878aSAndroid Build Coastguard Worker    def test_binary_format_write(self) -> None:
721*61c4878aSAndroid Build Coastguard Worker        db = read_db_from_csv(CSV_DATABASE)
722*61c4878aSAndroid Build Coastguard Worker
723*61c4878aSAndroid Build Coastguard Worker        with io.BytesIO() as fd:
724*61c4878aSAndroid Build Coastguard Worker            tokens.write_binary(db, fd)
725*61c4878aSAndroid Build Coastguard Worker            binary_db = fd.getvalue()
726*61c4878aSAndroid Build Coastguard Worker
727*61c4878aSAndroid Build Coastguard Worker        self.assertEqual(BINARY_DATABASE, binary_db)
728*61c4878aSAndroid Build Coastguard Worker
729*61c4878aSAndroid Build Coastguard Worker    def test_binary_format_parse(self) -> None:
730*61c4878aSAndroid Build Coastguard Worker        with io.BytesIO(BINARY_DATABASE) as binary_db:
731*61c4878aSAndroid Build Coastguard Worker            db = tokens.Database(tokens.parse_binary(binary_db))
732*61c4878aSAndroid Build Coastguard Worker
733*61c4878aSAndroid Build Coastguard Worker        self.assertEqual(str(db), CSV_DATABASE)
734*61c4878aSAndroid Build Coastguard Worker
735*61c4878aSAndroid Build Coastguard Worker
736*61c4878aSAndroid Build Coastguard Workerclass TestDatabaseFile(unittest.TestCase):
737*61c4878aSAndroid Build Coastguard Worker    """Tests the DatabaseFile class."""
738*61c4878aSAndroid Build Coastguard Worker
739*61c4878aSAndroid Build Coastguard Worker    def setUp(self) -> None:
740*61c4878aSAndroid Build Coastguard Worker        file = tempfile.NamedTemporaryFile(delete=False)
741*61c4878aSAndroid Build Coastguard Worker        file.close()
742*61c4878aSAndroid Build Coastguard Worker        self._path = Path(file.name)
743*61c4878aSAndroid Build Coastguard Worker
744*61c4878aSAndroid Build Coastguard Worker    def tearDown(self) -> None:
745*61c4878aSAndroid Build Coastguard Worker        self._path.unlink()
746*61c4878aSAndroid Build Coastguard Worker
747*61c4878aSAndroid Build Coastguard Worker    def test_update_csv_file(self) -> None:
748*61c4878aSAndroid Build Coastguard Worker        self._path.write_text(CSV_DATABASE)
749*61c4878aSAndroid Build Coastguard Worker        db = tokens.DatabaseFile.load(self._path)
750*61c4878aSAndroid Build Coastguard Worker        self.assertEqual(str(db), CSV_DATABASE)
751*61c4878aSAndroid Build Coastguard Worker
752*61c4878aSAndroid Build Coastguard Worker        db.add([tokens.TokenizedStringEntry(0xFFFFFFFF, 'New entry!', '')])
753*61c4878aSAndroid Build Coastguard Worker
754*61c4878aSAndroid Build Coastguard Worker        db.write_to_file()
755*61c4878aSAndroid Build Coastguard Worker
756*61c4878aSAndroid Build Coastguard Worker        self.assertEqual(
757*61c4878aSAndroid Build Coastguard Worker            self._path.read_text(),
758*61c4878aSAndroid Build Coastguard Worker            CSV_DATABASE + 'ffffffff,          ,"","New entry!"\n',
759*61c4878aSAndroid Build Coastguard Worker        )
760*61c4878aSAndroid Build Coastguard Worker
761*61c4878aSAndroid Build Coastguard Worker    def test_csv_file_too_short_raises_exception(self) -> None:
762*61c4878aSAndroid Build Coastguard Worker        self._path.write_text('1234')
763*61c4878aSAndroid Build Coastguard Worker
764*61c4878aSAndroid Build Coastguard Worker        with self.assertRaises(tokens.DatabaseFormatError):
765*61c4878aSAndroid Build Coastguard Worker            tokens.DatabaseFile.load(self._path)
766*61c4878aSAndroid Build Coastguard Worker
767*61c4878aSAndroid Build Coastguard Worker    def test_csv_invalid_format_raises_exception(self) -> None:
768*61c4878aSAndroid Build Coastguard Worker        self._path.write_text('MK34567890')
769*61c4878aSAndroid Build Coastguard Worker
770*61c4878aSAndroid Build Coastguard Worker        with self.assertRaises(tokens.DatabaseFormatError):
771*61c4878aSAndroid Build Coastguard Worker            tokens.DatabaseFile.load(self._path)
772*61c4878aSAndroid Build Coastguard Worker
773*61c4878aSAndroid Build Coastguard Worker    def test_csv_not_utf8(self) -> None:
774*61c4878aSAndroid Build Coastguard Worker        self._path.write_bytes(b'\x80' * 20)
775*61c4878aSAndroid Build Coastguard Worker
776*61c4878aSAndroid Build Coastguard Worker        with self.assertRaises(tokens.DatabaseFormatError):
777*61c4878aSAndroid Build Coastguard Worker            tokens.DatabaseFile.load(self._path)
778*61c4878aSAndroid Build Coastguard Worker
779*61c4878aSAndroid Build Coastguard Worker
780*61c4878aSAndroid Build Coastguard Workerclass TestFilter(unittest.TestCase):
781*61c4878aSAndroid Build Coastguard Worker    """Tests the filtering functionality."""
782*61c4878aSAndroid Build Coastguard Worker
783*61c4878aSAndroid Build Coastguard Worker    def setUp(self) -> None:
784*61c4878aSAndroid Build Coastguard Worker        self.db = tokens.Database(
785*61c4878aSAndroid Build Coastguard Worker            [
786*61c4878aSAndroid Build Coastguard Worker                tokens.TokenizedStringEntry(1, 'Luke'),
787*61c4878aSAndroid Build Coastguard Worker                tokens.TokenizedStringEntry(2, 'Leia'),
788*61c4878aSAndroid Build Coastguard Worker                tokens.TokenizedStringEntry(2, 'Darth Vader'),
789*61c4878aSAndroid Build Coastguard Worker                tokens.TokenizedStringEntry(2, 'Emperor Palpatine'),
790*61c4878aSAndroid Build Coastguard Worker                tokens.TokenizedStringEntry(3, 'Han'),
791*61c4878aSAndroid Build Coastguard Worker                tokens.TokenizedStringEntry(4, 'Chewbacca'),
792*61c4878aSAndroid Build Coastguard Worker                tokens.TokenizedStringEntry(5, 'Darth Maul'),
793*61c4878aSAndroid Build Coastguard Worker                tokens.TokenizedStringEntry(6, 'Han Solo'),
794*61c4878aSAndroid Build Coastguard Worker            ]
795*61c4878aSAndroid Build Coastguard Worker        )
796*61c4878aSAndroid Build Coastguard Worker
797*61c4878aSAndroid Build Coastguard Worker    def test_filter_include_single_regex(self) -> None:
798*61c4878aSAndroid Build Coastguard Worker        self.db.filter(include=[' '])  # anything with a space
799*61c4878aSAndroid Build Coastguard Worker        self.assertEqual(
800*61c4878aSAndroid Build Coastguard Worker            set(e.string for e in self.db.entries()),
801*61c4878aSAndroid Build Coastguard Worker            {'Darth Vader', 'Emperor Palpatine', 'Darth Maul', 'Han Solo'},
802*61c4878aSAndroid Build Coastguard Worker        )
803*61c4878aSAndroid Build Coastguard Worker
804*61c4878aSAndroid Build Coastguard Worker    def test_filter_include_multiple_regexes(self) -> None:
805*61c4878aSAndroid Build Coastguard Worker        self.db.filter(include=['Darth', 'cc', '^Han$'])
806*61c4878aSAndroid Build Coastguard Worker        self.assertEqual(
807*61c4878aSAndroid Build Coastguard Worker            set(e.string for e in self.db.entries()),
808*61c4878aSAndroid Build Coastguard Worker            {'Darth Vader', 'Darth Maul', 'Han', 'Chewbacca'},
809*61c4878aSAndroid Build Coastguard Worker        )
810*61c4878aSAndroid Build Coastguard Worker
811*61c4878aSAndroid Build Coastguard Worker    def test_filter_include_no_matches(self) -> None:
812*61c4878aSAndroid Build Coastguard Worker        self.db.filter(include=['Gandalf'])
813*61c4878aSAndroid Build Coastguard Worker        self.assertFalse(self.db.entries())
814*61c4878aSAndroid Build Coastguard Worker
815*61c4878aSAndroid Build Coastguard Worker    def test_filter_exclude_single_regex(self) -> None:
816*61c4878aSAndroid Build Coastguard Worker        self.db.filter(exclude=['^[^L]'])
817*61c4878aSAndroid Build Coastguard Worker        self.assertEqual(
818*61c4878aSAndroid Build Coastguard Worker            set(e.string for e in self.db.entries()), {'Luke', 'Leia'}
819*61c4878aSAndroid Build Coastguard Worker        )
820*61c4878aSAndroid Build Coastguard Worker
821*61c4878aSAndroid Build Coastguard Worker    def test_filter_exclude_multiple_regexes(self) -> None:
822*61c4878aSAndroid Build Coastguard Worker        self.db.filter(exclude=[' ', 'Han', 'Chewbacca'])
823*61c4878aSAndroid Build Coastguard Worker        self.assertEqual(
824*61c4878aSAndroid Build Coastguard Worker            set(e.string for e in self.db.entries()), {'Luke', 'Leia'}
825*61c4878aSAndroid Build Coastguard Worker        )
826*61c4878aSAndroid Build Coastguard Worker
827*61c4878aSAndroid Build Coastguard Worker    def test_filter_exclude_no_matches(self) -> None:
828*61c4878aSAndroid Build Coastguard Worker        self.db.filter(exclude=['.*'])
829*61c4878aSAndroid Build Coastguard Worker        self.assertFalse(self.db.entries())
830*61c4878aSAndroid Build Coastguard Worker
831*61c4878aSAndroid Build Coastguard Worker    def test_filter_include_and_exclude(self) -> None:
832*61c4878aSAndroid Build Coastguard Worker        self.db.filter(include=[' '], exclude=['Darth', 'Emperor'])
833*61c4878aSAndroid Build Coastguard Worker        self.assertEqual(set(e.string for e in self.db.entries()), {'Han Solo'})
834*61c4878aSAndroid Build Coastguard Worker
835*61c4878aSAndroid Build Coastguard Worker    def test_filter_neither_include_nor_exclude(self) -> None:
836*61c4878aSAndroid Build Coastguard Worker        self.db.filter()
837*61c4878aSAndroid Build Coastguard Worker        self.assertEqual(
838*61c4878aSAndroid Build Coastguard Worker            set(e.string for e in self.db.entries()),
839*61c4878aSAndroid Build Coastguard Worker            {
840*61c4878aSAndroid Build Coastguard Worker                'Luke',
841*61c4878aSAndroid Build Coastguard Worker                'Leia',
842*61c4878aSAndroid Build Coastguard Worker                'Darth Vader',
843*61c4878aSAndroid Build Coastguard Worker                'Emperor Palpatine',
844*61c4878aSAndroid Build Coastguard Worker                'Han',
845*61c4878aSAndroid Build Coastguard Worker                'Chewbacca',
846*61c4878aSAndroid Build Coastguard Worker                'Darth Maul',
847*61c4878aSAndroid Build Coastguard Worker                'Han Solo',
848*61c4878aSAndroid Build Coastguard Worker            },
849*61c4878aSAndroid Build Coastguard Worker        )
850*61c4878aSAndroid Build Coastguard Worker
851*61c4878aSAndroid Build Coastguard Worker    def test_csv_remove_domain_whitespace(self) -> None:
852*61c4878aSAndroid Build Coastguard Worker        db = read_db_from_csv(CSV_DATABASE_6_DOMAIN_WHITESPACE)
853*61c4878aSAndroid Build Coastguard Worker        self.assertEqual(
854*61c4878aSAndroid Build Coastguard Worker            db.token_to_entries[1],
855*61c4878aSAndroid Build Coastguard Worker            [
856*61c4878aSAndroid Build Coastguard Worker                tokens.TokenizedStringEntry(
857*61c4878aSAndroid Build Coastguard Worker                    token=1,
858*61c4878aSAndroid Build Coastguard Worker                    string='hello',
859*61c4878aSAndroid Build Coastguard Worker                    domain='Domain1',
860*61c4878aSAndroid Build Coastguard Worker                    date_removed=datetime(2001, 9, 4),
861*61c4878aSAndroid Build Coastguard Worker                )
862*61c4878aSAndroid Build Coastguard Worker            ],
863*61c4878aSAndroid Build Coastguard Worker        )
864*61c4878aSAndroid Build Coastguard Worker        self.assertEqual(
865*61c4878aSAndroid Build Coastguard Worker            db.token_to_entries[2],
866*61c4878aSAndroid Build Coastguard Worker            [
867*61c4878aSAndroid Build Coastguard Worker                tokens.TokenizedStringEntry(token=2, string='yes', domain=''),
868*61c4878aSAndroid Build Coastguard Worker                tokens.TokenizedStringEntry(
869*61c4878aSAndroid Build Coastguard Worker                    token=2, string='No!', domain='Domain20'
870*61c4878aSAndroid Build Coastguard Worker                ),
871*61c4878aSAndroid Build Coastguard Worker            ],
872*61c4878aSAndroid Build Coastguard Worker        )
873*61c4878aSAndroid Build Coastguard Worker        self.assertEqual(
874*61c4878aSAndroid Build Coastguard Worker            db.token_to_entries[4],
875*61c4878aSAndroid Build Coastguard Worker            [
876*61c4878aSAndroid Build Coastguard Worker                tokens.TokenizedStringEntry(
877*61c4878aSAndroid Build Coastguard Worker                    token=4, string='The answer is: %s', domain='?'
878*61c4878aSAndroid Build Coastguard Worker                ),
879*61c4878aSAndroid Build Coastguard Worker            ],
880*61c4878aSAndroid Build Coastguard Worker        )
881*61c4878aSAndroid Build Coastguard Worker
882*61c4878aSAndroid Build Coastguard Worker
883*61c4878aSAndroid Build Coastguard Workerclass TestDirectoryDatabase(unittest.TestCase):
884*61c4878aSAndroid Build Coastguard Worker    """Test DirectoryDatabase class is properly loaded."""
885*61c4878aSAndroid Build Coastguard Worker
886*61c4878aSAndroid Build Coastguard Worker    def setUp(self) -> None:
887*61c4878aSAndroid Build Coastguard Worker        self._dir = Path(tempfile.mkdtemp('_pw_tokenizer_test'))
888*61c4878aSAndroid Build Coastguard Worker        self._db_dir = self._dir / '_dir_database_test'
889*61c4878aSAndroid Build Coastguard Worker        self._db_dir.mkdir(exist_ok=True)
890*61c4878aSAndroid Build Coastguard Worker        self._db_csv = self._db_dir / f'first{DIR_DB_SUFFIX}'
891*61c4878aSAndroid Build Coastguard Worker
892*61c4878aSAndroid Build Coastguard Worker    def tearDown(self) -> None:
893*61c4878aSAndroid Build Coastguard Worker        shutil.rmtree(self._dir)
894*61c4878aSAndroid Build Coastguard Worker
895*61c4878aSAndroid Build Coastguard Worker    def test_loading_empty_directory(self) -> None:
896*61c4878aSAndroid Build Coastguard Worker        self.assertFalse(tokens.DatabaseFile.load(self._db_dir).entries())
897*61c4878aSAndroid Build Coastguard Worker
898*61c4878aSAndroid Build Coastguard Worker    def test_loading_a_single_file(self) -> None:
899*61c4878aSAndroid Build Coastguard Worker        self._db_csv.write_text(CSV_DATABASE)
900*61c4878aSAndroid Build Coastguard Worker        csv = tokens.DatabaseFile.load(self._db_csv)
901*61c4878aSAndroid Build Coastguard Worker        directory_db = tokens.DatabaseFile.load(self._db_dir)
902*61c4878aSAndroid Build Coastguard Worker        self.assertEqual(1, len(list(self._db_dir.iterdir())))
903*61c4878aSAndroid Build Coastguard Worker        self.assertEqual(str(csv), str(directory_db))
904*61c4878aSAndroid Build Coastguard Worker
905*61c4878aSAndroid Build Coastguard Worker    def test_loading_multiples_files(self) -> None:
906*61c4878aSAndroid Build Coastguard Worker        self._db_csv.write_text(CSV_DATABASE_3)
907*61c4878aSAndroid Build Coastguard Worker        first_csv = tokens.DatabaseFile.load(self._db_csv)
908*61c4878aSAndroid Build Coastguard Worker
909*61c4878aSAndroid Build Coastguard Worker        path_to_second_csv = self._db_dir / f'second{DIR_DB_SUFFIX}'
910*61c4878aSAndroid Build Coastguard Worker        path_to_second_csv.write_text(CSV_DATABASE_2)
911*61c4878aSAndroid Build Coastguard Worker        second_csv = tokens.DatabaseFile.load(path_to_second_csv)
912*61c4878aSAndroid Build Coastguard Worker
913*61c4878aSAndroid Build Coastguard Worker        path_to_third_csv = self._db_dir / f'third{DIR_DB_SUFFIX}'
914*61c4878aSAndroid Build Coastguard Worker        path_to_third_csv.write_text(CSV_DATABASE_4)
915*61c4878aSAndroid Build Coastguard Worker        third_csv = tokens.DatabaseFile.load(path_to_third_csv)
916*61c4878aSAndroid Build Coastguard Worker
917*61c4878aSAndroid Build Coastguard Worker        all_databases_merged = tokens.Database.merged(
918*61c4878aSAndroid Build Coastguard Worker            first_csv, second_csv, third_csv
919*61c4878aSAndroid Build Coastguard Worker        )
920*61c4878aSAndroid Build Coastguard Worker        directory_db = tokens.DatabaseFile.load(self._db_dir)
921*61c4878aSAndroid Build Coastguard Worker        self.assertEqual(3, len(list(self._db_dir.iterdir())))
922*61c4878aSAndroid Build Coastguard Worker        self.assertEqual(str(all_databases_merged), str(directory_db))
923*61c4878aSAndroid Build Coastguard Worker
924*61c4878aSAndroid Build Coastguard Worker    def test_loading_multiples_files_with_removal_dates(self) -> None:
925*61c4878aSAndroid Build Coastguard Worker        self._db_csv.write_text(CSV_DATABASE)
926*61c4878aSAndroid Build Coastguard Worker        first_csv = tokens.DatabaseFile.load(self._db_csv)
927*61c4878aSAndroid Build Coastguard Worker
928*61c4878aSAndroid Build Coastguard Worker        path_to_second_csv = self._db_dir / f'second{DIR_DB_SUFFIX}'
929*61c4878aSAndroid Build Coastguard Worker        path_to_second_csv.write_text(CSV_DATABASE_2)
930*61c4878aSAndroid Build Coastguard Worker        second_csv = tokens.DatabaseFile.load(path_to_second_csv)
931*61c4878aSAndroid Build Coastguard Worker
932*61c4878aSAndroid Build Coastguard Worker        path_to_third_csv = self._db_dir / f'third{DIR_DB_SUFFIX}'
933*61c4878aSAndroid Build Coastguard Worker        path_to_third_csv.write_text(CSV_DATABASE_3)
934*61c4878aSAndroid Build Coastguard Worker        third_csv = tokens.DatabaseFile.load(path_to_third_csv)
935*61c4878aSAndroid Build Coastguard Worker
936*61c4878aSAndroid Build Coastguard Worker        all_databases_merged = tokens.Database.merged(
937*61c4878aSAndroid Build Coastguard Worker            first_csv, second_csv, third_csv
938*61c4878aSAndroid Build Coastguard Worker        )
939*61c4878aSAndroid Build Coastguard Worker        directory_db = tokens.DatabaseFile.load(self._db_dir)
940*61c4878aSAndroid Build Coastguard Worker        self.assertEqual(3, len(list(self._db_dir.iterdir())))
941*61c4878aSAndroid Build Coastguard Worker        self.assertEqual(str(all_databases_merged), str(directory_db))
942*61c4878aSAndroid Build Coastguard Worker
943*61c4878aSAndroid Build Coastguard Worker    def test_rewrite(self) -> None:
944*61c4878aSAndroid Build Coastguard Worker        self._db_dir.joinpath('junk_file').write_text('should be ignored')
945*61c4878aSAndroid Build Coastguard Worker
946*61c4878aSAndroid Build Coastguard Worker        self._db_csv.write_text(CSV_DATABASE_3)
947*61c4878aSAndroid Build Coastguard Worker        first_csv = tokens.DatabaseFile.load(self._db_csv)
948*61c4878aSAndroid Build Coastguard Worker
949*61c4878aSAndroid Build Coastguard Worker        path_to_second_csv = self._db_dir / f'second{DIR_DB_SUFFIX}'
950*61c4878aSAndroid Build Coastguard Worker        path_to_second_csv.write_text(CSV_DATABASE_2)
951*61c4878aSAndroid Build Coastguard Worker        second_csv = tokens.DatabaseFile.load(path_to_second_csv)
952*61c4878aSAndroid Build Coastguard Worker
953*61c4878aSAndroid Build Coastguard Worker        path_to_third_csv = self._db_dir / f'third{DIR_DB_SUFFIX}'
954*61c4878aSAndroid Build Coastguard Worker        path_to_third_csv.write_text(CSV_DATABASE_4)
955*61c4878aSAndroid Build Coastguard Worker        third_csv = tokens.DatabaseFile.load(path_to_third_csv)
956*61c4878aSAndroid Build Coastguard Worker
957*61c4878aSAndroid Build Coastguard Worker        all_databases_merged = tokens.Database.merged(
958*61c4878aSAndroid Build Coastguard Worker            first_csv, second_csv, third_csv
959*61c4878aSAndroid Build Coastguard Worker        )
960*61c4878aSAndroid Build Coastguard Worker
961*61c4878aSAndroid Build Coastguard Worker        directory_db = tokens.DatabaseFile.load(self._db_dir)
962*61c4878aSAndroid Build Coastguard Worker        directory_db.write_to_file(rewrite=True)
963*61c4878aSAndroid Build Coastguard Worker
964*61c4878aSAndroid Build Coastguard Worker        self.assertEqual(1, len(list(self._db_dir.glob(f'*{DIR_DB_SUFFIX}'))))
965*61c4878aSAndroid Build Coastguard Worker        self.assertEqual(
966*61c4878aSAndroid Build Coastguard Worker            self._db_dir.joinpath('junk_file').read_text(), 'should be ignored'
967*61c4878aSAndroid Build Coastguard Worker        )
968*61c4878aSAndroid Build Coastguard Worker
969*61c4878aSAndroid Build Coastguard Worker        directory_db = tokens.DatabaseFile.load(self._db_dir)
970*61c4878aSAndroid Build Coastguard Worker        self.assertEqual(str(all_databases_merged), str(directory_db))
971*61c4878aSAndroid Build Coastguard Worker
972*61c4878aSAndroid Build Coastguard Worker
973*61c4878aSAndroid Build Coastguard Workerif __name__ == '__main__':
974*61c4878aSAndroid Build Coastguard Worker    unittest.main()
975