1*1c60b9acSAndroid Build Coastguard Worker /*
2*1c60b9acSAndroid Build Coastguard Worker * libwebsockets - small server side websockets and web server implementation
3*1c60b9acSAndroid Build Coastguard Worker *
4*1c60b9acSAndroid Build Coastguard Worker * Copyright (C) 2010 - 2019 Andy Green <[email protected]>
5*1c60b9acSAndroid Build Coastguard Worker *
6*1c60b9acSAndroid Build Coastguard Worker * Permission is hereby granted, free of charge, to any person obtaining a copy
7*1c60b9acSAndroid Build Coastguard Worker * of this software and associated documentation files (the "Software"), to
8*1c60b9acSAndroid Build Coastguard Worker * deal in the Software without restriction, including without limitation the
9*1c60b9acSAndroid Build Coastguard Worker * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10*1c60b9acSAndroid Build Coastguard Worker * sell copies of the Software, and to permit persons to whom the Software is
11*1c60b9acSAndroid Build Coastguard Worker * furnished to do so, subject to the following conditions:
12*1c60b9acSAndroid Build Coastguard Worker *
13*1c60b9acSAndroid Build Coastguard Worker * The above copyright notice and this permission notice shall be included in
14*1c60b9acSAndroid Build Coastguard Worker * all copies or substantial portions of the Software.
15*1c60b9acSAndroid Build Coastguard Worker *
16*1c60b9acSAndroid Build Coastguard Worker * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17*1c60b9acSAndroid Build Coastguard Worker * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18*1c60b9acSAndroid Build Coastguard Worker * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19*1c60b9acSAndroid Build Coastguard Worker * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20*1c60b9acSAndroid Build Coastguard Worker * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21*1c60b9acSAndroid Build Coastguard Worker * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22*1c60b9acSAndroid Build Coastguard Worker * IN THE SOFTWARE.
23*1c60b9acSAndroid Build Coastguard Worker *
24*1c60b9acSAndroid Build Coastguard Worker * The functions allow
25*1c60b9acSAndroid Build Coastguard Worker *
26*1c60b9acSAndroid Build Coastguard Worker * - collecting a concordance of strings from one or more files (eg, a
27*1c60b9acSAndroid Build Coastguard Worker * directory of files) into a single in-memory, lac-backed trie;
28*1c60b9acSAndroid Build Coastguard Worker *
29*1c60b9acSAndroid Build Coastguard Worker * - to optimize and serialize the in-memory trie to an fd;
30*1c60b9acSAndroid Build Coastguard Worker *
31*1c60b9acSAndroid Build Coastguard Worker * - to very quickly report any instances of a string in any of the files
32*1c60b9acSAndroid Build Coastguard Worker * indexed by the trie, by a seeking around a serialized trie fd, without
33*1c60b9acSAndroid Build Coastguard Worker * having to load it all in memory
34*1c60b9acSAndroid Build Coastguard Worker */
35*1c60b9acSAndroid Build Coastguard Worker
36*1c60b9acSAndroid Build Coastguard Worker #include "private-lib-core.h"
37*1c60b9acSAndroid Build Coastguard Worker #include "private-lib-misc-fts.h"
38*1c60b9acSAndroid Build Coastguard Worker
39*1c60b9acSAndroid Build Coastguard Worker #include <stdio.h>
40*1c60b9acSAndroid Build Coastguard Worker #include <string.h>
41*1c60b9acSAndroid Build Coastguard Worker #include <assert.h>
42*1c60b9acSAndroid Build Coastguard Worker #include <fcntl.h>
43*1c60b9acSAndroid Build Coastguard Worker #include <errno.h>
44*1c60b9acSAndroid Build Coastguard Worker #include <sys/types.h>
45*1c60b9acSAndroid Build Coastguard Worker
46*1c60b9acSAndroid Build Coastguard Worker struct lws_fts_entry;
47*1c60b9acSAndroid Build Coastguard Worker
48*1c60b9acSAndroid Build Coastguard Worker /* notice these are stored in t->lwsac_input_head which has input file scope */
49*1c60b9acSAndroid Build Coastguard Worker
50*1c60b9acSAndroid Build Coastguard Worker struct lws_fts_filepath {
51*1c60b9acSAndroid Build Coastguard Worker struct lws_fts_filepath *next;
52*1c60b9acSAndroid Build Coastguard Worker struct lws_fts_filepath *prev;
53*1c60b9acSAndroid Build Coastguard Worker char filepath[256];
54*1c60b9acSAndroid Build Coastguard Worker jg2_file_offset ofs;
55*1c60b9acSAndroid Build Coastguard Worker jg2_file_offset line_table_ofs;
56*1c60b9acSAndroid Build Coastguard Worker int filepath_len;
57*1c60b9acSAndroid Build Coastguard Worker int file_index;
58*1c60b9acSAndroid Build Coastguard Worker int total_lines;
59*1c60b9acSAndroid Build Coastguard Worker int priority;
60*1c60b9acSAndroid Build Coastguard Worker };
61*1c60b9acSAndroid Build Coastguard Worker
62*1c60b9acSAndroid Build Coastguard Worker /* notice these are stored in t->lwsac_input_head which has input file scope */
63*1c60b9acSAndroid Build Coastguard Worker
64*1c60b9acSAndroid Build Coastguard Worker struct lws_fts_lines {
65*1c60b9acSAndroid Build Coastguard Worker struct lws_fts_lines *lines_next;
66*1c60b9acSAndroid Build Coastguard Worker /*
67*1c60b9acSAndroid Build Coastguard Worker * amount of line numbers needs to meet average count for best
68*1c60b9acSAndroid Build Coastguard Worker * efficiency.
69*1c60b9acSAndroid Build Coastguard Worker *
70*1c60b9acSAndroid Build Coastguard Worker * Line numbers are stored in VLI format since if we don't, around half
71*1c60b9acSAndroid Build Coastguard Worker * the total lac allocation consists of struct lws_fts_lines...
72*1c60b9acSAndroid Build Coastguard Worker * size chosen to maintain 8-byte struct alignment
73*1c60b9acSAndroid Build Coastguard Worker */
74*1c60b9acSAndroid Build Coastguard Worker uint8_t vli[119];
75*1c60b9acSAndroid Build Coastguard Worker char count;
76*1c60b9acSAndroid Build Coastguard Worker };
77*1c60b9acSAndroid Build Coastguard Worker
78*1c60b9acSAndroid Build Coastguard Worker /* this represents the instances of a symbol inside a given filepath */
79*1c60b9acSAndroid Build Coastguard Worker
80*1c60b9acSAndroid Build Coastguard Worker struct lws_fts_instance_file {
81*1c60b9acSAndroid Build Coastguard Worker /* linked-list of tifs generated for current file */
82*1c60b9acSAndroid Build Coastguard Worker struct lws_fts_instance_file *inst_file_next;
83*1c60b9acSAndroid Build Coastguard Worker struct lws_fts_entry *owner;
84*1c60b9acSAndroid Build Coastguard Worker struct lws_fts_lines *lines_list, *lines_tail;
85*1c60b9acSAndroid Build Coastguard Worker uint32_t file_index;
86*1c60b9acSAndroid Build Coastguard Worker uint32_t total;
87*1c60b9acSAndroid Build Coastguard Worker
88*1c60b9acSAndroid Build Coastguard Worker /*
89*1c60b9acSAndroid Build Coastguard Worker * optimization for the common case there's only 1 - ~3 matches, so we
90*1c60b9acSAndroid Build Coastguard Worker * don't have to allocate any lws_fts_lines struct
91*1c60b9acSAndroid Build Coastguard Worker *
92*1c60b9acSAndroid Build Coastguard Worker * Using 8 bytes total for this maintains 8-byte struct alignment...
93*1c60b9acSAndroid Build Coastguard Worker */
94*1c60b9acSAndroid Build Coastguard Worker
95*1c60b9acSAndroid Build Coastguard Worker uint8_t vli[7];
96*1c60b9acSAndroid Build Coastguard Worker char count;
97*1c60b9acSAndroid Build Coastguard Worker };
98*1c60b9acSAndroid Build Coastguard Worker
99*1c60b9acSAndroid Build Coastguard Worker /*
100*1c60b9acSAndroid Build Coastguard Worker * this is the main trie in-memory allocation object
101*1c60b9acSAndroid Build Coastguard Worker */
102*1c60b9acSAndroid Build Coastguard Worker
103*1c60b9acSAndroid Build Coastguard Worker struct lws_fts_entry {
104*1c60b9acSAndroid Build Coastguard Worker struct lws_fts_entry *parent;
105*1c60b9acSAndroid Build Coastguard Worker
106*1c60b9acSAndroid Build Coastguard Worker struct lws_fts_entry *child_list;
107*1c60b9acSAndroid Build Coastguard Worker struct lws_fts_entry *sibling;
108*1c60b9acSAndroid Build Coastguard Worker
109*1c60b9acSAndroid Build Coastguard Worker /*
110*1c60b9acSAndroid Build Coastguard Worker * care... this points to content in t->lwsac_input_head, it goes
111*1c60b9acSAndroid Build Coastguard Worker * out of scope when the input file being indexed completes
112*1c60b9acSAndroid Build Coastguard Worker */
113*1c60b9acSAndroid Build Coastguard Worker struct lws_fts_instance_file *inst_file_list;
114*1c60b9acSAndroid Build Coastguard Worker
115*1c60b9acSAndroid Build Coastguard Worker jg2_file_offset ofs_last_inst_file;
116*1c60b9acSAndroid Build Coastguard Worker
117*1c60b9acSAndroid Build Coastguard Worker char *suffix; /* suffix string or NULL if one char (in .c) */
118*1c60b9acSAndroid Build Coastguard Worker jg2_file_offset ofs;
119*1c60b9acSAndroid Build Coastguard Worker uint32_t child_count;
120*1c60b9acSAndroid Build Coastguard Worker uint32_t instance_count;
121*1c60b9acSAndroid Build Coastguard Worker uint32_t agg_inst_count;
122*1c60b9acSAndroid Build Coastguard Worker uint32_t agg_child_count;
123*1c60b9acSAndroid Build Coastguard Worker uint32_t suffix_len;
124*1c60b9acSAndroid Build Coastguard Worker unsigned char c;
125*1c60b9acSAndroid Build Coastguard Worker };
126*1c60b9acSAndroid Build Coastguard Worker
127*1c60b9acSAndroid Build Coastguard Worker /* there's only one of these per trie file */
128*1c60b9acSAndroid Build Coastguard Worker
129*1c60b9acSAndroid Build Coastguard Worker struct lws_fts {
130*1c60b9acSAndroid Build Coastguard Worker struct lwsac *lwsac_head;
131*1c60b9acSAndroid Build Coastguard Worker struct lwsac *lwsac_input_head;
132*1c60b9acSAndroid Build Coastguard Worker struct lws_fts_entry *root;
133*1c60b9acSAndroid Build Coastguard Worker struct lws_fts_filepath *filepath_list;
134*1c60b9acSAndroid Build Coastguard Worker struct lws_fts_filepath *fp;
135*1c60b9acSAndroid Build Coastguard Worker
136*1c60b9acSAndroid Build Coastguard Worker struct lws_fts_entry *parser;
137*1c60b9acSAndroid Build Coastguard Worker struct lws_fts_entry *root_lookup[256];
138*1c60b9acSAndroid Build Coastguard Worker
139*1c60b9acSAndroid Build Coastguard Worker /*
140*1c60b9acSAndroid Build Coastguard Worker * head of linked-list of tifs generated for current file
141*1c60b9acSAndroid Build Coastguard Worker * care... this points to content in t->lwsac_input_head
142*1c60b9acSAndroid Build Coastguard Worker */
143*1c60b9acSAndroid Build Coastguard Worker struct lws_fts_instance_file *tif_list;
144*1c60b9acSAndroid Build Coastguard Worker
145*1c60b9acSAndroid Build Coastguard Worker jg2_file_offset c; /* length of output file so far */
146*1c60b9acSAndroid Build Coastguard Worker
147*1c60b9acSAndroid Build Coastguard Worker uint64_t agg_trie_creation_us;
148*1c60b9acSAndroid Build Coastguard Worker uint64_t agg_raw_input;
149*1c60b9acSAndroid Build Coastguard Worker uint64_t worst_lwsac_input_size;
150*1c60b9acSAndroid Build Coastguard Worker int last_file_index;
151*1c60b9acSAndroid Build Coastguard Worker int chars_in_line;
152*1c60b9acSAndroid Build Coastguard Worker jg2_file_offset last_block_len_ofs;
153*1c60b9acSAndroid Build Coastguard Worker int line_number;
154*1c60b9acSAndroid Build Coastguard Worker int lines_in_unsealed_linetable;
155*1c60b9acSAndroid Build Coastguard Worker int next_file_index;
156*1c60b9acSAndroid Build Coastguard Worker int count_entries;
157*1c60b9acSAndroid Build Coastguard Worker
158*1c60b9acSAndroid Build Coastguard Worker int fd;
159*1c60b9acSAndroid Build Coastguard Worker unsigned int agg_pos;
160*1c60b9acSAndroid Build Coastguard Worker unsigned int str_match_pos;
161*1c60b9acSAndroid Build Coastguard Worker
162*1c60b9acSAndroid Build Coastguard Worker unsigned char aggregate;
163*1c60b9acSAndroid Build Coastguard Worker unsigned char agg[128];
164*1c60b9acSAndroid Build Coastguard Worker };
165*1c60b9acSAndroid Build Coastguard Worker
166*1c60b9acSAndroid Build Coastguard Worker /* since the kernel case allocates >300MB, no point keeping this too low */
167*1c60b9acSAndroid Build Coastguard Worker
168*1c60b9acSAndroid Build Coastguard Worker #define TRIE_LWSAC_BLOCK_SIZE (1024 * 1024)
169*1c60b9acSAndroid Build Coastguard Worker
170*1c60b9acSAndroid Build Coastguard Worker #define spill(margin, force) \
171*1c60b9acSAndroid Build Coastguard Worker if (bp && ((uint32_t)bp >= (sizeof(buf) - (size_t)(margin)) || (force))) { \
172*1c60b9acSAndroid Build Coastguard Worker if ((int)write(t->fd, buf, (size_t)bp) != bp) { \
173*1c60b9acSAndroid Build Coastguard Worker lwsl_err("%s: write %d failed (%d)\n", __func__, \
174*1c60b9acSAndroid Build Coastguard Worker bp, errno); \
175*1c60b9acSAndroid Build Coastguard Worker return 1; \
176*1c60b9acSAndroid Build Coastguard Worker } \
177*1c60b9acSAndroid Build Coastguard Worker t->c += (unsigned int)bp; \
178*1c60b9acSAndroid Build Coastguard Worker bp = 0; \
179*1c60b9acSAndroid Build Coastguard Worker }
180*1c60b9acSAndroid Build Coastguard Worker
181*1c60b9acSAndroid Build Coastguard Worker static int
g32(unsigned char * b,uint32_t d)182*1c60b9acSAndroid Build Coastguard Worker g32(unsigned char *b, uint32_t d)
183*1c60b9acSAndroid Build Coastguard Worker {
184*1c60b9acSAndroid Build Coastguard Worker *b++ = (uint8_t)((d >> 24) & 0xff);
185*1c60b9acSAndroid Build Coastguard Worker *b++ = (uint8_t)((d >> 16) & 0xff);
186*1c60b9acSAndroid Build Coastguard Worker *b++ = (uint8_t)((d >> 8) & 0xff);
187*1c60b9acSAndroid Build Coastguard Worker *b = (uint8_t)(d & 0xff);
188*1c60b9acSAndroid Build Coastguard Worker
189*1c60b9acSAndroid Build Coastguard Worker return 4;
190*1c60b9acSAndroid Build Coastguard Worker }
191*1c60b9acSAndroid Build Coastguard Worker
192*1c60b9acSAndroid Build Coastguard Worker static int
g16(unsigned char * b,int d)193*1c60b9acSAndroid Build Coastguard Worker g16(unsigned char *b, int d)
194*1c60b9acSAndroid Build Coastguard Worker {
195*1c60b9acSAndroid Build Coastguard Worker *b++ = (uint8_t)((d >> 8) & 0xff);
196*1c60b9acSAndroid Build Coastguard Worker *b = (uint8_t)(d & 0xff);
197*1c60b9acSAndroid Build Coastguard Worker
198*1c60b9acSAndroid Build Coastguard Worker return 2;
199*1c60b9acSAndroid Build Coastguard Worker }
200*1c60b9acSAndroid Build Coastguard Worker
201*1c60b9acSAndroid Build Coastguard Worker static int
wq32(unsigned char * b,uint32_t d)202*1c60b9acSAndroid Build Coastguard Worker wq32(unsigned char *b, uint32_t d)
203*1c60b9acSAndroid Build Coastguard Worker {
204*1c60b9acSAndroid Build Coastguard Worker unsigned char *ob = b;
205*1c60b9acSAndroid Build Coastguard Worker
206*1c60b9acSAndroid Build Coastguard Worker if (d > (1 << 28) - 1)
207*1c60b9acSAndroid Build Coastguard Worker *b++ = (uint8_t)(((d >> 28) | 0x80) & 0xff);
208*1c60b9acSAndroid Build Coastguard Worker
209*1c60b9acSAndroid Build Coastguard Worker if (d > (1 << 21) - 1)
210*1c60b9acSAndroid Build Coastguard Worker *b++ = (uint8_t)(((d >> 21) | 0x80) & 0xff);
211*1c60b9acSAndroid Build Coastguard Worker
212*1c60b9acSAndroid Build Coastguard Worker if (d > (1 << 14) - 1)
213*1c60b9acSAndroid Build Coastguard Worker *b++ = (uint8_t)(((d >> 14) | 0x80) & 0xff);
214*1c60b9acSAndroid Build Coastguard Worker
215*1c60b9acSAndroid Build Coastguard Worker if (d > (1 << 7) - 1)
216*1c60b9acSAndroid Build Coastguard Worker *b++ = (uint8_t)(((d >> 7) | 0x80) & 0xff);
217*1c60b9acSAndroid Build Coastguard Worker
218*1c60b9acSAndroid Build Coastguard Worker *b++ = (uint8_t)(d & 0x7f);
219*1c60b9acSAndroid Build Coastguard Worker
220*1c60b9acSAndroid Build Coastguard Worker return lws_ptr_diff(b, ob);
221*1c60b9acSAndroid Build Coastguard Worker }
222*1c60b9acSAndroid Build Coastguard Worker
223*1c60b9acSAndroid Build Coastguard Worker
224*1c60b9acSAndroid Build Coastguard Worker /* read a VLI, return the number of bytes used */
225*1c60b9acSAndroid Build Coastguard Worker
226*1c60b9acSAndroid Build Coastguard Worker int
rq32(unsigned char * b,uint32_t * d)227*1c60b9acSAndroid Build Coastguard Worker rq32(unsigned char *b, uint32_t *d)
228*1c60b9acSAndroid Build Coastguard Worker {
229*1c60b9acSAndroid Build Coastguard Worker unsigned char *ob = b;
230*1c60b9acSAndroid Build Coastguard Worker uint32_t t = 0;
231*1c60b9acSAndroid Build Coastguard Worker
232*1c60b9acSAndroid Build Coastguard Worker t = *b & 0x7f;
233*1c60b9acSAndroid Build Coastguard Worker if (*(b++) & 0x80) {
234*1c60b9acSAndroid Build Coastguard Worker t = (t << 7) | (*b & 0x7f);
235*1c60b9acSAndroid Build Coastguard Worker if (*(b++) & 0x80) {
236*1c60b9acSAndroid Build Coastguard Worker t = (t << 7) | (*b & 0x7f);
237*1c60b9acSAndroid Build Coastguard Worker if (*(b++) & 0x80) {
238*1c60b9acSAndroid Build Coastguard Worker t = (t << 7) | (*b & 0x7f);
239*1c60b9acSAndroid Build Coastguard Worker if (*(b++) & 0x80) {
240*1c60b9acSAndroid Build Coastguard Worker t = (t << 7) | (*b & 0x7f);
241*1c60b9acSAndroid Build Coastguard Worker b++;
242*1c60b9acSAndroid Build Coastguard Worker }
243*1c60b9acSAndroid Build Coastguard Worker }
244*1c60b9acSAndroid Build Coastguard Worker }
245*1c60b9acSAndroid Build Coastguard Worker }
246*1c60b9acSAndroid Build Coastguard Worker
247*1c60b9acSAndroid Build Coastguard Worker *d = t;
248*1c60b9acSAndroid Build Coastguard Worker
249*1c60b9acSAndroid Build Coastguard Worker return (int)(b - ob);
250*1c60b9acSAndroid Build Coastguard Worker }
251*1c60b9acSAndroid Build Coastguard Worker
252*1c60b9acSAndroid Build Coastguard Worker struct lws_fts *
lws_fts_create(int fd)253*1c60b9acSAndroid Build Coastguard Worker lws_fts_create(int fd)
254*1c60b9acSAndroid Build Coastguard Worker {
255*1c60b9acSAndroid Build Coastguard Worker struct lws_fts *t;
256*1c60b9acSAndroid Build Coastguard Worker struct lwsac *lwsac_head = NULL;
257*1c60b9acSAndroid Build Coastguard Worker unsigned char buf[TRIE_FILE_HDR_SIZE];
258*1c60b9acSAndroid Build Coastguard Worker
259*1c60b9acSAndroid Build Coastguard Worker t = lwsac_use(&lwsac_head, sizeof(*t), TRIE_LWSAC_BLOCK_SIZE);
260*1c60b9acSAndroid Build Coastguard Worker if (!t)
261*1c60b9acSAndroid Build Coastguard Worker return NULL;
262*1c60b9acSAndroid Build Coastguard Worker
263*1c60b9acSAndroid Build Coastguard Worker memset(t, 0, sizeof(*t));
264*1c60b9acSAndroid Build Coastguard Worker
265*1c60b9acSAndroid Build Coastguard Worker t->fd = fd;
266*1c60b9acSAndroid Build Coastguard Worker t->lwsac_head = lwsac_head;
267*1c60b9acSAndroid Build Coastguard Worker t->root = lwsac_use(&lwsac_head, sizeof(*t->root),
268*1c60b9acSAndroid Build Coastguard Worker TRIE_LWSAC_BLOCK_SIZE);
269*1c60b9acSAndroid Build Coastguard Worker if (!t->root)
270*1c60b9acSAndroid Build Coastguard Worker goto unwind;
271*1c60b9acSAndroid Build Coastguard Worker
272*1c60b9acSAndroid Build Coastguard Worker memset(t->root, 0, sizeof(*t->root));
273*1c60b9acSAndroid Build Coastguard Worker t->parser = t->root;
274*1c60b9acSAndroid Build Coastguard Worker t->last_file_index = -1;
275*1c60b9acSAndroid Build Coastguard Worker t->line_number = 1;
276*1c60b9acSAndroid Build Coastguard Worker t->filepath_list = NULL;
277*1c60b9acSAndroid Build Coastguard Worker
278*1c60b9acSAndroid Build Coastguard Worker memset(t->root_lookup, 0, sizeof(*t->root_lookup));
279*1c60b9acSAndroid Build Coastguard Worker
280*1c60b9acSAndroid Build Coastguard Worker /* write the header */
281*1c60b9acSAndroid Build Coastguard Worker
282*1c60b9acSAndroid Build Coastguard Worker buf[0] = 0xca;
283*1c60b9acSAndroid Build Coastguard Worker buf[1] = 0x7a;
284*1c60b9acSAndroid Build Coastguard Worker buf[2] = 0x5f;
285*1c60b9acSAndroid Build Coastguard Worker buf[3] = 0x75;
286*1c60b9acSAndroid Build Coastguard Worker
287*1c60b9acSAndroid Build Coastguard Worker /* (these are filled in with correct data at the end) */
288*1c60b9acSAndroid Build Coastguard Worker
289*1c60b9acSAndroid Build Coastguard Worker /* file offset to root trie entry */
290*1c60b9acSAndroid Build Coastguard Worker g32(&buf[4], 0);
291*1c60b9acSAndroid Build Coastguard Worker /* file length when it was created */
292*1c60b9acSAndroid Build Coastguard Worker g32(&buf[8], 0);
293*1c60b9acSAndroid Build Coastguard Worker /* fileoffset to the filepath table */
294*1c60b9acSAndroid Build Coastguard Worker g32(&buf[0xc], 0);
295*1c60b9acSAndroid Build Coastguard Worker /* count of filepaths */
296*1c60b9acSAndroid Build Coastguard Worker g32(&buf[0x10], 0);
297*1c60b9acSAndroid Build Coastguard Worker
298*1c60b9acSAndroid Build Coastguard Worker if (write(t->fd, buf, TRIE_FILE_HDR_SIZE) != TRIE_FILE_HDR_SIZE) {
299*1c60b9acSAndroid Build Coastguard Worker lwsl_err("%s: trie header write failed\n", __func__);
300*1c60b9acSAndroid Build Coastguard Worker goto unwind;
301*1c60b9acSAndroid Build Coastguard Worker }
302*1c60b9acSAndroid Build Coastguard Worker
303*1c60b9acSAndroid Build Coastguard Worker t->c = TRIE_FILE_HDR_SIZE;
304*1c60b9acSAndroid Build Coastguard Worker
305*1c60b9acSAndroid Build Coastguard Worker return t;
306*1c60b9acSAndroid Build Coastguard Worker
307*1c60b9acSAndroid Build Coastguard Worker unwind:
308*1c60b9acSAndroid Build Coastguard Worker lwsac_free(&lwsac_head);
309*1c60b9acSAndroid Build Coastguard Worker
310*1c60b9acSAndroid Build Coastguard Worker return NULL;
311*1c60b9acSAndroid Build Coastguard Worker }
312*1c60b9acSAndroid Build Coastguard Worker
313*1c60b9acSAndroid Build Coastguard Worker void
lws_fts_destroy(struct lws_fts ** trie)314*1c60b9acSAndroid Build Coastguard Worker lws_fts_destroy(struct lws_fts **trie)
315*1c60b9acSAndroid Build Coastguard Worker {
316*1c60b9acSAndroid Build Coastguard Worker struct lwsac *lwsac_head = (*trie)->lwsac_head;
317*1c60b9acSAndroid Build Coastguard Worker lwsac_free(&(*trie)->lwsac_input_head);
318*1c60b9acSAndroid Build Coastguard Worker lwsac_free(&lwsac_head);
319*1c60b9acSAndroid Build Coastguard Worker *trie = NULL;
320*1c60b9acSAndroid Build Coastguard Worker }
321*1c60b9acSAndroid Build Coastguard Worker
322*1c60b9acSAndroid Build Coastguard Worker int
lws_fts_file_index(struct lws_fts * t,const char * filepath,int filepath_len,int priority)323*1c60b9acSAndroid Build Coastguard Worker lws_fts_file_index(struct lws_fts *t, const char *filepath, int filepath_len,
324*1c60b9acSAndroid Build Coastguard Worker int priority)
325*1c60b9acSAndroid Build Coastguard Worker {
326*1c60b9acSAndroid Build Coastguard Worker struct lws_fts_filepath *fp = t->filepath_list;
327*1c60b9acSAndroid Build Coastguard Worker #if 0
328*1c60b9acSAndroid Build Coastguard Worker while (fp) {
329*1c60b9acSAndroid Build Coastguard Worker if (fp->filepath_len == filepath_len &&
330*1c60b9acSAndroid Build Coastguard Worker !strcmp(fp->filepath, filepath))
331*1c60b9acSAndroid Build Coastguard Worker return fp->file_index;
332*1c60b9acSAndroid Build Coastguard Worker
333*1c60b9acSAndroid Build Coastguard Worker fp = fp->next;
334*1c60b9acSAndroid Build Coastguard Worker }
335*1c60b9acSAndroid Build Coastguard Worker #endif
336*1c60b9acSAndroid Build Coastguard Worker fp = lwsac_use(&t->lwsac_head, sizeof(*fp), TRIE_LWSAC_BLOCK_SIZE);
337*1c60b9acSAndroid Build Coastguard Worker if (!fp)
338*1c60b9acSAndroid Build Coastguard Worker return -1;
339*1c60b9acSAndroid Build Coastguard Worker
340*1c60b9acSAndroid Build Coastguard Worker fp->next = t->filepath_list;
341*1c60b9acSAndroid Build Coastguard Worker t->filepath_list = fp;
342*1c60b9acSAndroid Build Coastguard Worker strncpy(fp->filepath, filepath, sizeof(fp->filepath) - 1);
343*1c60b9acSAndroid Build Coastguard Worker fp->filepath[sizeof(fp->filepath) - 1] = '\0';
344*1c60b9acSAndroid Build Coastguard Worker fp->filepath_len = filepath_len;
345*1c60b9acSAndroid Build Coastguard Worker fp->file_index = t->next_file_index++;
346*1c60b9acSAndroid Build Coastguard Worker fp->line_table_ofs = t->c;
347*1c60b9acSAndroid Build Coastguard Worker fp->priority = priority;
348*1c60b9acSAndroid Build Coastguard Worker fp->total_lines = 0;
349*1c60b9acSAndroid Build Coastguard Worker t->fp = fp;
350*1c60b9acSAndroid Build Coastguard Worker
351*1c60b9acSAndroid Build Coastguard Worker return fp->file_index;
352*1c60b9acSAndroid Build Coastguard Worker }
353*1c60b9acSAndroid Build Coastguard Worker
354*1c60b9acSAndroid Build Coastguard Worker static struct lws_fts_entry *
lws_fts_entry_child_add(struct lws_fts * t,unsigned char c,struct lws_fts_entry * parent)355*1c60b9acSAndroid Build Coastguard Worker lws_fts_entry_child_add(struct lws_fts *t, unsigned char c,
356*1c60b9acSAndroid Build Coastguard Worker struct lws_fts_entry *parent)
357*1c60b9acSAndroid Build Coastguard Worker {
358*1c60b9acSAndroid Build Coastguard Worker struct lws_fts_entry *e, **pe;
359*1c60b9acSAndroid Build Coastguard Worker
360*1c60b9acSAndroid Build Coastguard Worker e = lwsac_use(&t->lwsac_head, sizeof(*e), TRIE_LWSAC_BLOCK_SIZE);
361*1c60b9acSAndroid Build Coastguard Worker if (!e)
362*1c60b9acSAndroid Build Coastguard Worker return NULL;
363*1c60b9acSAndroid Build Coastguard Worker
364*1c60b9acSAndroid Build Coastguard Worker memset(e, 0, sizeof(*e));
365*1c60b9acSAndroid Build Coastguard Worker
366*1c60b9acSAndroid Build Coastguard Worker e->c = c;
367*1c60b9acSAndroid Build Coastguard Worker parent->child_count++;
368*1c60b9acSAndroid Build Coastguard Worker e->parent = parent;
369*1c60b9acSAndroid Build Coastguard Worker t->count_entries++;
370*1c60b9acSAndroid Build Coastguard Worker
371*1c60b9acSAndroid Build Coastguard Worker /* keep the parent child list in ascending sort order for c */
372*1c60b9acSAndroid Build Coastguard Worker
373*1c60b9acSAndroid Build Coastguard Worker pe = &parent->child_list;
374*1c60b9acSAndroid Build Coastguard Worker while (*pe) {
375*1c60b9acSAndroid Build Coastguard Worker assert((*pe)->parent == parent);
376*1c60b9acSAndroid Build Coastguard Worker if ((*pe)->c > c) {
377*1c60b9acSAndroid Build Coastguard Worker /* add it before */
378*1c60b9acSAndroid Build Coastguard Worker e->sibling = *pe;
379*1c60b9acSAndroid Build Coastguard Worker *pe = e;
380*1c60b9acSAndroid Build Coastguard Worker break;
381*1c60b9acSAndroid Build Coastguard Worker }
382*1c60b9acSAndroid Build Coastguard Worker pe = &(*pe)->sibling;
383*1c60b9acSAndroid Build Coastguard Worker }
384*1c60b9acSAndroid Build Coastguard Worker
385*1c60b9acSAndroid Build Coastguard Worker if (!*pe) {
386*1c60b9acSAndroid Build Coastguard Worker /* add it at the end */
387*1c60b9acSAndroid Build Coastguard Worker e->sibling = NULL;
388*1c60b9acSAndroid Build Coastguard Worker *pe = e;
389*1c60b9acSAndroid Build Coastguard Worker }
390*1c60b9acSAndroid Build Coastguard Worker
391*1c60b9acSAndroid Build Coastguard Worker return e;
392*1c60b9acSAndroid Build Coastguard Worker }
393*1c60b9acSAndroid Build Coastguard Worker
394*1c60b9acSAndroid Build Coastguard Worker static int
finalize_per_input(struct lws_fts * t)395*1c60b9acSAndroid Build Coastguard Worker finalize_per_input(struct lws_fts *t)
396*1c60b9acSAndroid Build Coastguard Worker {
397*1c60b9acSAndroid Build Coastguard Worker struct lws_fts_instance_file *tif;
398*1c60b9acSAndroid Build Coastguard Worker unsigned char buf[8192];
399*1c60b9acSAndroid Build Coastguard Worker uint64_t lwsac_input_size;
400*1c60b9acSAndroid Build Coastguard Worker jg2_file_offset temp;
401*1c60b9acSAndroid Build Coastguard Worker int bp = 0;
402*1c60b9acSAndroid Build Coastguard Worker
403*1c60b9acSAndroid Build Coastguard Worker bp += g16(&buf[bp], 0);
404*1c60b9acSAndroid Build Coastguard Worker bp += g16(&buf[bp], 0);
405*1c60b9acSAndroid Build Coastguard Worker bp += g32(&buf[bp], 0);
406*1c60b9acSAndroid Build Coastguard Worker if ((int)write(t->fd, buf, (size_t)bp) != bp)
407*1c60b9acSAndroid Build Coastguard Worker return 1;
408*1c60b9acSAndroid Build Coastguard Worker t->c += (unsigned int)bp;
409*1c60b9acSAndroid Build Coastguard Worker bp = 0;
410*1c60b9acSAndroid Build Coastguard Worker
411*1c60b9acSAndroid Build Coastguard Worker /*
412*1c60b9acSAndroid Build Coastguard Worker * Write the generated file index + instances (if any)
413*1c60b9acSAndroid Build Coastguard Worker *
414*1c60b9acSAndroid Build Coastguard Worker * Notice the next same-parent file instance fileoffset list is
415*1c60b9acSAndroid Build Coastguard Worker * backwards, so it does not require seeks to fill in. The first
416*1c60b9acSAndroid Build Coastguard Worker * entry has 0 but the second entry points to the first entry (whose
417*1c60b9acSAndroid Build Coastguard Worker * fileoffset is known).
418*1c60b9acSAndroid Build Coastguard Worker *
419*1c60b9acSAndroid Build Coastguard Worker * After all the file instance structs are finalized,
420*1c60b9acSAndroid Build Coastguard Worker * .ofs_last_inst_file contains the fileoffset of that child's tif
421*1c60b9acSAndroid Build Coastguard Worker * list head in the file.
422*1c60b9acSAndroid Build Coastguard Worker *
423*1c60b9acSAndroid Build Coastguard Worker * The file instances are written to disk in the order that the files
424*1c60b9acSAndroid Build Coastguard Worker * were indexed, along with their prev pointers inline.
425*1c60b9acSAndroid Build Coastguard Worker */
426*1c60b9acSAndroid Build Coastguard Worker
427*1c60b9acSAndroid Build Coastguard Worker tif = t->tif_list;
428*1c60b9acSAndroid Build Coastguard Worker while (tif) {
429*1c60b9acSAndroid Build Coastguard Worker struct lws_fts_lines *i;
430*1c60b9acSAndroid Build Coastguard Worker
431*1c60b9acSAndroid Build Coastguard Worker spill((3 * MAX_VLI) + tif->count, 0);
432*1c60b9acSAndroid Build Coastguard Worker
433*1c60b9acSAndroid Build Coastguard Worker temp = tif->owner->ofs_last_inst_file;
434*1c60b9acSAndroid Build Coastguard Worker if (tif->total)
435*1c60b9acSAndroid Build Coastguard Worker tif->owner->ofs_last_inst_file = t->c + (unsigned int)bp;
436*1c60b9acSAndroid Build Coastguard Worker
437*1c60b9acSAndroid Build Coastguard Worker assert(!temp || (temp > TRIE_FILE_HDR_SIZE && temp < t->c));
438*1c60b9acSAndroid Build Coastguard Worker
439*1c60b9acSAndroid Build Coastguard Worker /* fileoffset of prev instance file for this entry, or 0 */
440*1c60b9acSAndroid Build Coastguard Worker bp += wq32(&buf[bp], temp);
441*1c60b9acSAndroid Build Coastguard Worker bp += wq32(&buf[bp], tif->file_index);
442*1c60b9acSAndroid Build Coastguard Worker bp += wq32(&buf[bp], tif->total);
443*1c60b9acSAndroid Build Coastguard Worker
444*1c60b9acSAndroid Build Coastguard Worker /* remove any pointers into this disposable lac footprint */
445*1c60b9acSAndroid Build Coastguard Worker tif->owner->inst_file_list = NULL;
446*1c60b9acSAndroid Build Coastguard Worker
447*1c60b9acSAndroid Build Coastguard Worker memcpy(&buf[bp], &tif->vli, (size_t)tif->count);
448*1c60b9acSAndroid Build Coastguard Worker bp += tif->count;
449*1c60b9acSAndroid Build Coastguard Worker
450*1c60b9acSAndroid Build Coastguard Worker i = tif->lines_list;
451*1c60b9acSAndroid Build Coastguard Worker while (i) {
452*1c60b9acSAndroid Build Coastguard Worker spill(i->count, 0);
453*1c60b9acSAndroid Build Coastguard Worker memcpy(&buf[bp], &i->vli, (size_t)i->count);
454*1c60b9acSAndroid Build Coastguard Worker bp += i->count;
455*1c60b9acSAndroid Build Coastguard Worker
456*1c60b9acSAndroid Build Coastguard Worker i = i->lines_next;
457*1c60b9acSAndroid Build Coastguard Worker }
458*1c60b9acSAndroid Build Coastguard Worker
459*1c60b9acSAndroid Build Coastguard Worker tif = tif->inst_file_next;
460*1c60b9acSAndroid Build Coastguard Worker }
461*1c60b9acSAndroid Build Coastguard Worker
462*1c60b9acSAndroid Build Coastguard Worker spill(0, 1);
463*1c60b9acSAndroid Build Coastguard Worker
464*1c60b9acSAndroid Build Coastguard Worker assert(lseek(t->fd, 0, SEEK_END) == (off_t)t->c);
465*1c60b9acSAndroid Build Coastguard Worker
466*1c60b9acSAndroid Build Coastguard Worker if (t->lwsac_input_head) {
467*1c60b9acSAndroid Build Coastguard Worker lwsac_input_size = lwsac_total_alloc(t->lwsac_input_head);
468*1c60b9acSAndroid Build Coastguard Worker if (lwsac_input_size > t->worst_lwsac_input_size)
469*1c60b9acSAndroid Build Coastguard Worker t->worst_lwsac_input_size = lwsac_input_size;
470*1c60b9acSAndroid Build Coastguard Worker }
471*1c60b9acSAndroid Build Coastguard Worker
472*1c60b9acSAndroid Build Coastguard Worker /*
473*1c60b9acSAndroid Build Coastguard Worker * those per-file allocations are all on a separate lac so we can
474*1c60b9acSAndroid Build Coastguard Worker * free it cleanly afterwards
475*1c60b9acSAndroid Build Coastguard Worker */
476*1c60b9acSAndroid Build Coastguard Worker lwsac_free(&t->lwsac_input_head);
477*1c60b9acSAndroid Build Coastguard Worker
478*1c60b9acSAndroid Build Coastguard Worker /* and lose the pointer into the deallocated lac */
479*1c60b9acSAndroid Build Coastguard Worker t->tif_list = NULL;
480*1c60b9acSAndroid Build Coastguard Worker
481*1c60b9acSAndroid Build Coastguard Worker return 0;
482*1c60b9acSAndroid Build Coastguard Worker }
483*1c60b9acSAndroid Build Coastguard Worker
484*1c60b9acSAndroid Build Coastguard Worker /*
485*1c60b9acSAndroid Build Coastguard Worker * 0 = punctuation, whitespace, brackets etc
486*1c60b9acSAndroid Build Coastguard Worker * 1 = character inside symbol set
487*1c60b9acSAndroid Build Coastguard Worker * 2 = upper-case character inside symbol set
488*1c60b9acSAndroid Build Coastguard Worker */
489*1c60b9acSAndroid Build Coastguard Worker
490*1c60b9acSAndroid Build Coastguard Worker static char classify[] = {
491*1c60b9acSAndroid Build Coastguard Worker 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
492*1c60b9acSAndroid Build Coastguard Worker 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
493*1c60b9acSAndroid Build Coastguard Worker 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
494*1c60b9acSAndroid Build Coastguard Worker 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
495*1c60b9acSAndroid Build Coastguard Worker 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
496*1c60b9acSAndroid Build Coastguard Worker 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 1, //1,
497*1c60b9acSAndroid Build Coastguard Worker 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
498*1c60b9acSAndroid Build Coastguard Worker 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
499*1c60b9acSAndroid Build Coastguard Worker 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
500*1c60b9acSAndroid Build Coastguard Worker 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
501*1c60b9acSAndroid Build Coastguard Worker 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
502*1c60b9acSAndroid Build Coastguard Worker 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
503*1c60b9acSAndroid Build Coastguard Worker 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
504*1c60b9acSAndroid Build Coastguard Worker 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
505*1c60b9acSAndroid Build Coastguard Worker 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
506*1c60b9acSAndroid Build Coastguard Worker 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
507*1c60b9acSAndroid Build Coastguard Worker };
508*1c60b9acSAndroid Build Coastguard Worker
509*1c60b9acSAndroid Build Coastguard Worker #if 0
510*1c60b9acSAndroid Build Coastguard Worker static const char *
511*1c60b9acSAndroid Build Coastguard Worker name_entry(struct lws_fts_entry *e1, char *s, int len)
512*1c60b9acSAndroid Build Coastguard Worker {
513*1c60b9acSAndroid Build Coastguard Worker struct lws_fts_entry *e2;
514*1c60b9acSAndroid Build Coastguard Worker int n = len;
515*1c60b9acSAndroid Build Coastguard Worker
516*1c60b9acSAndroid Build Coastguard Worker s[--n] = '\0';
517*1c60b9acSAndroid Build Coastguard Worker
518*1c60b9acSAndroid Build Coastguard Worker e2 = e1;
519*1c60b9acSAndroid Build Coastguard Worker while (e2) {
520*1c60b9acSAndroid Build Coastguard Worker if (e2->suffix) {
521*1c60b9acSAndroid Build Coastguard Worker if ((int)e2->suffix_len < n) {
522*1c60b9acSAndroid Build Coastguard Worker n -= e2->suffix_len;
523*1c60b9acSAndroid Build Coastguard Worker memcpy(&s[n], e2->suffix, e2->suffix_len);
524*1c60b9acSAndroid Build Coastguard Worker }
525*1c60b9acSAndroid Build Coastguard Worker } else {
526*1c60b9acSAndroid Build Coastguard Worker n--;
527*1c60b9acSAndroid Build Coastguard Worker s[n] = e2->c;
528*1c60b9acSAndroid Build Coastguard Worker }
529*1c60b9acSAndroid Build Coastguard Worker
530*1c60b9acSAndroid Build Coastguard Worker e2 = e2->parent;
531*1c60b9acSAndroid Build Coastguard Worker }
532*1c60b9acSAndroid Build Coastguard Worker
533*1c60b9acSAndroid Build Coastguard Worker return &s[n + 1];
534*1c60b9acSAndroid Build Coastguard Worker }
535*1c60b9acSAndroid Build Coastguard Worker #endif
536*1c60b9acSAndroid Build Coastguard Worker
537*1c60b9acSAndroid Build Coastguard Worker /*
538*1c60b9acSAndroid Build Coastguard Worker * as we parse the input, we create a line length table for the file index.
539*1c60b9acSAndroid Build Coastguard Worker * Only the file header has been written before we start doing this.
540*1c60b9acSAndroid Build Coastguard Worker */
541*1c60b9acSAndroid Build Coastguard Worker
542*1c60b9acSAndroid Build Coastguard Worker int
lws_fts_fill(struct lws_fts * t,uint32_t file_index,const char * buf,size_t len)543*1c60b9acSAndroid Build Coastguard Worker lws_fts_fill(struct lws_fts *t, uint32_t file_index, const char *buf,
544*1c60b9acSAndroid Build Coastguard Worker size_t len)
545*1c60b9acSAndroid Build Coastguard Worker {
546*1c60b9acSAndroid Build Coastguard Worker unsigned long long tf = (unsigned long long)lws_now_usecs();
547*1c60b9acSAndroid Build Coastguard Worker unsigned char c, linetable[256], vlibuf[8];
548*1c60b9acSAndroid Build Coastguard Worker struct lws_fts_entry *e, *e1, *dcl;
549*1c60b9acSAndroid Build Coastguard Worker struct lws_fts_instance_file *tif;
550*1c60b9acSAndroid Build Coastguard Worker int bp = 0, sline, chars, m;
551*1c60b9acSAndroid Build Coastguard Worker char *osuff, skipline = 0;
552*1c60b9acSAndroid Build Coastguard Worker struct lws_fts_lines *tl;
553*1c60b9acSAndroid Build Coastguard Worker unsigned int olen, n;
554*1c60b9acSAndroid Build Coastguard Worker off_t lbh;
555*1c60b9acSAndroid Build Coastguard Worker
556*1c60b9acSAndroid Build Coastguard Worker if ((int)file_index != t->last_file_index) {
557*1c60b9acSAndroid Build Coastguard Worker if (t->last_file_index >= 0)
558*1c60b9acSAndroid Build Coastguard Worker finalize_per_input(t);
559*1c60b9acSAndroid Build Coastguard Worker t->last_file_index = (int)file_index;
560*1c60b9acSAndroid Build Coastguard Worker t->line_number = 1;
561*1c60b9acSAndroid Build Coastguard Worker t->chars_in_line = 0;
562*1c60b9acSAndroid Build Coastguard Worker t->lines_in_unsealed_linetable = 0;
563*1c60b9acSAndroid Build Coastguard Worker }
564*1c60b9acSAndroid Build Coastguard Worker
565*1c60b9acSAndroid Build Coastguard Worker t->agg_raw_input += len;
566*1c60b9acSAndroid Build Coastguard Worker
567*1c60b9acSAndroid Build Coastguard Worker resume:
568*1c60b9acSAndroid Build Coastguard Worker
569*1c60b9acSAndroid Build Coastguard Worker chars = 0;
570*1c60b9acSAndroid Build Coastguard Worker lbh = (off_t)t->c;
571*1c60b9acSAndroid Build Coastguard Worker sline = t->line_number;
572*1c60b9acSAndroid Build Coastguard Worker bp += g16(&linetable[bp], 0);
573*1c60b9acSAndroid Build Coastguard Worker bp += g16(&linetable[bp], 0);
574*1c60b9acSAndroid Build Coastguard Worker bp += g32(&linetable[bp], 0);
575*1c60b9acSAndroid Build Coastguard Worker
576*1c60b9acSAndroid Build Coastguard Worker while (len) {
577*1c60b9acSAndroid Build Coastguard Worker char go_around = 0;
578*1c60b9acSAndroid Build Coastguard Worker
579*1c60b9acSAndroid Build Coastguard Worker if (t->lines_in_unsealed_linetable >= LWS_FTS_LINES_PER_CHUNK)
580*1c60b9acSAndroid Build Coastguard Worker break;
581*1c60b9acSAndroid Build Coastguard Worker
582*1c60b9acSAndroid Build Coastguard Worker len--;
583*1c60b9acSAndroid Build Coastguard Worker
584*1c60b9acSAndroid Build Coastguard Worker c = (unsigned char)*buf++;
585*1c60b9acSAndroid Build Coastguard Worker t->chars_in_line++;
586*1c60b9acSAndroid Build Coastguard Worker if (c == '\n') {
587*1c60b9acSAndroid Build Coastguard Worker skipline = 0;
588*1c60b9acSAndroid Build Coastguard Worker t->filepath_list->total_lines++;
589*1c60b9acSAndroid Build Coastguard Worker t->lines_in_unsealed_linetable++;
590*1c60b9acSAndroid Build Coastguard Worker t->line_number++;
591*1c60b9acSAndroid Build Coastguard Worker
592*1c60b9acSAndroid Build Coastguard Worker bp += wq32(&linetable[bp], (uint32_t)t->chars_in_line);
593*1c60b9acSAndroid Build Coastguard Worker if ((unsigned int)bp > sizeof(linetable) - 6) {
594*1c60b9acSAndroid Build Coastguard Worker if ((int)write(t->fd, linetable, (unsigned int)bp) != bp) {
595*1c60b9acSAndroid Build Coastguard Worker lwsl_err("%s: linetable write failed\n",
596*1c60b9acSAndroid Build Coastguard Worker __func__);
597*1c60b9acSAndroid Build Coastguard Worker return 1;
598*1c60b9acSAndroid Build Coastguard Worker }
599*1c60b9acSAndroid Build Coastguard Worker t->c += (unsigned int)bp;
600*1c60b9acSAndroid Build Coastguard Worker bp = 0;
601*1c60b9acSAndroid Build Coastguard Worker // assert(lseek(t->fd, 0, SEEK_END) == t->c);
602*1c60b9acSAndroid Build Coastguard Worker }
603*1c60b9acSAndroid Build Coastguard Worker
604*1c60b9acSAndroid Build Coastguard Worker chars += t->chars_in_line;
605*1c60b9acSAndroid Build Coastguard Worker t->chars_in_line = 0;
606*1c60b9acSAndroid Build Coastguard Worker
607*1c60b9acSAndroid Build Coastguard Worker /*
608*1c60b9acSAndroid Build Coastguard Worker * Detect overlength lines and skip them (eg, BASE64
609*1c60b9acSAndroid Build Coastguard Worker * in css etc)
610*1c60b9acSAndroid Build Coastguard Worker */
611*1c60b9acSAndroid Build Coastguard Worker
612*1c60b9acSAndroid Build Coastguard Worker if (len > 200) {
613*1c60b9acSAndroid Build Coastguard Worker n = 0;
614*1c60b9acSAndroid Build Coastguard Worker m = 0;
615*1c60b9acSAndroid Build Coastguard Worker while (n < 200 && m < 80 && buf[n] != '\n') {
616*1c60b9acSAndroid Build Coastguard Worker if (buf[n] == ' ' || buf[n] == '\t')
617*1c60b9acSAndroid Build Coastguard Worker m = 0;
618*1c60b9acSAndroid Build Coastguard Worker n++;
619*1c60b9acSAndroid Build Coastguard Worker m++;
620*1c60b9acSAndroid Build Coastguard Worker }
621*1c60b9acSAndroid Build Coastguard Worker
622*1c60b9acSAndroid Build Coastguard Worker /* 80 lines no whitespace, or >=200-char line */
623*1c60b9acSAndroid Build Coastguard Worker
624*1c60b9acSAndroid Build Coastguard Worker if (m == 80 || n == 200)
625*1c60b9acSAndroid Build Coastguard Worker skipline = 1;
626*1c60b9acSAndroid Build Coastguard Worker }
627*1c60b9acSAndroid Build Coastguard Worker
628*1c60b9acSAndroid Build Coastguard Worker goto seal;
629*1c60b9acSAndroid Build Coastguard Worker }
630*1c60b9acSAndroid Build Coastguard Worker if (skipline)
631*1c60b9acSAndroid Build Coastguard Worker continue;
632*1c60b9acSAndroid Build Coastguard Worker
633*1c60b9acSAndroid Build Coastguard Worker m = classify[(int)c];
634*1c60b9acSAndroid Build Coastguard Worker if (!m)
635*1c60b9acSAndroid Build Coastguard Worker goto seal;
636*1c60b9acSAndroid Build Coastguard Worker if (m == 2)
637*1c60b9acSAndroid Build Coastguard Worker c = (unsigned char)((char)c + 'a' - 'A');
638*1c60b9acSAndroid Build Coastguard Worker
639*1c60b9acSAndroid Build Coastguard Worker if (t->aggregate) {
640*1c60b9acSAndroid Build Coastguard Worker
641*1c60b9acSAndroid Build Coastguard Worker /*
642*1c60b9acSAndroid Build Coastguard Worker * We created a trie entry for an earlier char in this
643*1c60b9acSAndroid Build Coastguard Worker * symbol already. So we know at the moment, any
644*1c60b9acSAndroid Build Coastguard Worker * further chars in the symbol are the only children.
645*1c60b9acSAndroid Build Coastguard Worker *
646*1c60b9acSAndroid Build Coastguard Worker * Aggregate them and add them as a string suffix to
647*1c60b9acSAndroid Build Coastguard Worker * the trie symbol at the end (when we know how much to
648*1c60b9acSAndroid Build Coastguard Worker * allocate).
649*1c60b9acSAndroid Build Coastguard Worker */
650*1c60b9acSAndroid Build Coastguard Worker
651*1c60b9acSAndroid Build Coastguard Worker if (t->agg_pos < sizeof(t->agg) - 1)
652*1c60b9acSAndroid Build Coastguard Worker /* symbol is not too long to stash */
653*1c60b9acSAndroid Build Coastguard Worker t->agg[t->agg_pos++] = c;
654*1c60b9acSAndroid Build Coastguard Worker
655*1c60b9acSAndroid Build Coastguard Worker continue;
656*1c60b9acSAndroid Build Coastguard Worker }
657*1c60b9acSAndroid Build Coastguard Worker
658*1c60b9acSAndroid Build Coastguard Worker if (t->str_match_pos) {
659*1c60b9acSAndroid Build Coastguard Worker go_around = 1;
660*1c60b9acSAndroid Build Coastguard Worker goto seal;
661*1c60b9acSAndroid Build Coastguard Worker }
662*1c60b9acSAndroid Build Coastguard Worker
663*1c60b9acSAndroid Build Coastguard Worker /* zeroth-iteration child matching */
664*1c60b9acSAndroid Build Coastguard Worker
665*1c60b9acSAndroid Build Coastguard Worker if (t->parser == t->root) {
666*1c60b9acSAndroid Build Coastguard Worker e = t->root_lookup[(int)c];
667*1c60b9acSAndroid Build Coastguard Worker if (e) {
668*1c60b9acSAndroid Build Coastguard Worker t->parser = e;
669*1c60b9acSAndroid Build Coastguard Worker continue;
670*1c60b9acSAndroid Build Coastguard Worker }
671*1c60b9acSAndroid Build Coastguard Worker } else {
672*1c60b9acSAndroid Build Coastguard Worker
673*1c60b9acSAndroid Build Coastguard Worker /* look for the char amongst the children */
674*1c60b9acSAndroid Build Coastguard Worker
675*1c60b9acSAndroid Build Coastguard Worker e = t->parser->child_list;
676*1c60b9acSAndroid Build Coastguard Worker while (e) {
677*1c60b9acSAndroid Build Coastguard Worker
678*1c60b9acSAndroid Build Coastguard Worker /* since they're alpha ordered... */
679*1c60b9acSAndroid Build Coastguard Worker if (e->c > c) {
680*1c60b9acSAndroid Build Coastguard Worker e = NULL;
681*1c60b9acSAndroid Build Coastguard Worker break;
682*1c60b9acSAndroid Build Coastguard Worker }
683*1c60b9acSAndroid Build Coastguard Worker if (e->c == c) {
684*1c60b9acSAndroid Build Coastguard Worker t->parser = e;
685*1c60b9acSAndroid Build Coastguard Worker
686*1c60b9acSAndroid Build Coastguard Worker if (e->suffix)
687*1c60b9acSAndroid Build Coastguard Worker t->str_match_pos = 1;
688*1c60b9acSAndroid Build Coastguard Worker
689*1c60b9acSAndroid Build Coastguard Worker break;
690*1c60b9acSAndroid Build Coastguard Worker }
691*1c60b9acSAndroid Build Coastguard Worker
692*1c60b9acSAndroid Build Coastguard Worker e = e->sibling;
693*1c60b9acSAndroid Build Coastguard Worker }
694*1c60b9acSAndroid Build Coastguard Worker
695*1c60b9acSAndroid Build Coastguard Worker if (e)
696*1c60b9acSAndroid Build Coastguard Worker continue;
697*1c60b9acSAndroid Build Coastguard Worker }
698*1c60b9acSAndroid Build Coastguard Worker
699*1c60b9acSAndroid Build Coastguard Worker /*
700*1c60b9acSAndroid Build Coastguard Worker * we are blazing a new trail, add a new child representing
701*1c60b9acSAndroid Build Coastguard Worker * the whole suffix that couldn't be matched until now.
702*1c60b9acSAndroid Build Coastguard Worker */
703*1c60b9acSAndroid Build Coastguard Worker
704*1c60b9acSAndroid Build Coastguard Worker e = lws_fts_entry_child_add(t, c, t->parser);
705*1c60b9acSAndroid Build Coastguard Worker if (!e) {
706*1c60b9acSAndroid Build Coastguard Worker lwsl_err("%s: lws_fts_entry_child_add failed\n",
707*1c60b9acSAndroid Build Coastguard Worker __func__);
708*1c60b9acSAndroid Build Coastguard Worker return 1;
709*1c60b9acSAndroid Build Coastguard Worker }
710*1c60b9acSAndroid Build Coastguard Worker
711*1c60b9acSAndroid Build Coastguard Worker /* if it's the root node, keep the root_lookup table in sync */
712*1c60b9acSAndroid Build Coastguard Worker
713*1c60b9acSAndroid Build Coastguard Worker if (t->parser == t->root)
714*1c60b9acSAndroid Build Coastguard Worker t->root_lookup[(int)c] = e;
715*1c60b9acSAndroid Build Coastguard Worker
716*1c60b9acSAndroid Build Coastguard Worker /* follow the new path */
717*1c60b9acSAndroid Build Coastguard Worker t->parser = e;
718*1c60b9acSAndroid Build Coastguard Worker
719*1c60b9acSAndroid Build Coastguard Worker {
720*1c60b9acSAndroid Build Coastguard Worker struct lws_fts_entry **pe = &e->child_list;
721*1c60b9acSAndroid Build Coastguard Worker while (*pe) {
722*1c60b9acSAndroid Build Coastguard Worker assert((*pe)->parent == e);
723*1c60b9acSAndroid Build Coastguard Worker
724*1c60b9acSAndroid Build Coastguard Worker pe = &(*pe)->sibling;
725*1c60b9acSAndroid Build Coastguard Worker }
726*1c60b9acSAndroid Build Coastguard Worker }
727*1c60b9acSAndroid Build Coastguard Worker
728*1c60b9acSAndroid Build Coastguard Worker /*
729*1c60b9acSAndroid Build Coastguard Worker * If there are any more symbol characters coming, just
730*1c60b9acSAndroid Build Coastguard Worker * create a suffix string on t->parser instead of what must
731*1c60b9acSAndroid Build Coastguard Worker * currently be single-child nodes, since we just created e
732*1c60b9acSAndroid Build Coastguard Worker * as a child with a single character due to no existing match
733*1c60b9acSAndroid Build Coastguard Worker * on that single character... so if no match on 'h' with this
734*1c60b9acSAndroid Build Coastguard Worker * guy's parent, we created e that matches on the single char
735*1c60b9acSAndroid Build Coastguard Worker * 'h'. If the symbol continues ... 'a' 'p' 'p' 'y', then
736*1c60b9acSAndroid Build Coastguard Worker * instead of creating singleton child nodes under e,
737*1c60b9acSAndroid Build Coastguard Worker * modify e to match on the whole string suffix "happy".
738*1c60b9acSAndroid Build Coastguard Worker *
739*1c60b9acSAndroid Build Coastguard Worker * If later "hoppy" appears, we will remove the suffix on e,
740*1c60b9acSAndroid Build Coastguard Worker * so it reverts to a char match for 'h', add singleton children
741*1c60b9acSAndroid Build Coastguard Worker * for 'a' and 'o', and attach a "ppy" suffix child to each of
742*1c60b9acSAndroid Build Coastguard Worker * those.
743*1c60b9acSAndroid Build Coastguard Worker *
744*1c60b9acSAndroid Build Coastguard Worker * We want to do this so we don't have to allocate trie entries
745*1c60b9acSAndroid Build Coastguard Worker * for every char in the string to save memory and consequently
746*1c60b9acSAndroid Build Coastguard Worker * time.
747*1c60b9acSAndroid Build Coastguard Worker *
748*1c60b9acSAndroid Build Coastguard Worker * Don't try this optimization if the parent is the root node...
749*1c60b9acSAndroid Build Coastguard Worker * it's not compatible with it's root_lookup table and it's
750*1c60b9acSAndroid Build Coastguard Worker * highly likely children off the root entry are going to have
751*1c60b9acSAndroid Build Coastguard Worker * to be fragmented.
752*1c60b9acSAndroid Build Coastguard Worker */
753*1c60b9acSAndroid Build Coastguard Worker
754*1c60b9acSAndroid Build Coastguard Worker if (e->parent != t->root) {
755*1c60b9acSAndroid Build Coastguard Worker t->aggregate = 1;
756*1c60b9acSAndroid Build Coastguard Worker t->agg_pos = 0;
757*1c60b9acSAndroid Build Coastguard Worker }
758*1c60b9acSAndroid Build Coastguard Worker
759*1c60b9acSAndroid Build Coastguard Worker continue;
760*1c60b9acSAndroid Build Coastguard Worker
761*1c60b9acSAndroid Build Coastguard Worker seal:
762*1c60b9acSAndroid Build Coastguard Worker if (t->str_match_pos) {
763*1c60b9acSAndroid Build Coastguard Worker
764*1c60b9acSAndroid Build Coastguard Worker /*
765*1c60b9acSAndroid Build Coastguard Worker * We're partway through matching an elaborated string
766*1c60b9acSAndroid Build Coastguard Worker * on a child, not just a character. String matches
767*1c60b9acSAndroid Build Coastguard Worker * only exist when we met a child entry that only had
768*1c60b9acSAndroid Build Coastguard Worker * one path until now... so we had an 'h', and the
769*1c60b9acSAndroid Build Coastguard Worker * only child had a string "hello".
770*1c60b9acSAndroid Build Coastguard Worker *
771*1c60b9acSAndroid Build Coastguard Worker * We are following the right path and will not need
772*1c60b9acSAndroid Build Coastguard Worker * to back up, but we may find as we go we have the
773*1c60b9acSAndroid Build Coastguard Worker * first instance of a second child path, eg, "help".
774*1c60b9acSAndroid Build Coastguard Worker *
775*1c60b9acSAndroid Build Coastguard Worker * When we get to the 'p', we have to split what was
776*1c60b9acSAndroid Build Coastguard Worker * the only string option "hello" into "hel" and then
777*1c60b9acSAndroid Build Coastguard Worker * two child entries, for "lo" and 'p'.
778*1c60b9acSAndroid Build Coastguard Worker */
779*1c60b9acSAndroid Build Coastguard Worker
780*1c60b9acSAndroid Build Coastguard Worker if (c == t->parser->suffix[t->str_match_pos++]) {
781*1c60b9acSAndroid Build Coastguard Worker if (t->str_match_pos < t->parser->suffix_len)
782*1c60b9acSAndroid Build Coastguard Worker continue;
783*1c60b9acSAndroid Build Coastguard Worker
784*1c60b9acSAndroid Build Coastguard Worker /*
785*1c60b9acSAndroid Build Coastguard Worker * We simply matched everything, continue
786*1c60b9acSAndroid Build Coastguard Worker * parsing normally from this trie entry.
787*1c60b9acSAndroid Build Coastguard Worker */
788*1c60b9acSAndroid Build Coastguard Worker
789*1c60b9acSAndroid Build Coastguard Worker t->str_match_pos = 0;
790*1c60b9acSAndroid Build Coastguard Worker continue;
791*1c60b9acSAndroid Build Coastguard Worker }
792*1c60b9acSAndroid Build Coastguard Worker
793*1c60b9acSAndroid Build Coastguard Worker /*
794*1c60b9acSAndroid Build Coastguard Worker * So... we hit a mismatch somewhere... it means we
795*1c60b9acSAndroid Build Coastguard Worker * have to split this string entry.
796*1c60b9acSAndroid Build Coastguard Worker *
797*1c60b9acSAndroid Build Coastguard Worker * We know the first char actually matched in order to
798*1c60b9acSAndroid Build Coastguard Worker * start down this road. So for the current trie entry,
799*1c60b9acSAndroid Build Coastguard Worker * we need to truncate his suffix at the char before
800*1c60b9acSAndroid Build Coastguard Worker * this mismatched one, where we diverged (if the
801*1c60b9acSAndroid Build Coastguard Worker * second char, simply remove the suffix string from the
802*1c60b9acSAndroid Build Coastguard Worker * current trie entry to turn it back to a 1-char match)
803*1c60b9acSAndroid Build Coastguard Worker *
804*1c60b9acSAndroid Build Coastguard Worker * The original entry, which becomes the lhs post-split,
805*1c60b9acSAndroid Build Coastguard Worker * is t->parser.
806*1c60b9acSAndroid Build Coastguard Worker */
807*1c60b9acSAndroid Build Coastguard Worker
808*1c60b9acSAndroid Build Coastguard Worker olen = t->parser->suffix_len;
809*1c60b9acSAndroid Build Coastguard Worker osuff = t->parser->suffix;
810*1c60b9acSAndroid Build Coastguard Worker
811*1c60b9acSAndroid Build Coastguard Worker if (t->str_match_pos == 2)
812*1c60b9acSAndroid Build Coastguard Worker t->parser->suffix = NULL;
813*1c60b9acSAndroid Build Coastguard Worker else
814*1c60b9acSAndroid Build Coastguard Worker t->parser->suffix_len = t->str_match_pos - 1;
815*1c60b9acSAndroid Build Coastguard Worker
816*1c60b9acSAndroid Build Coastguard Worker /*
817*1c60b9acSAndroid Build Coastguard Worker * Then we need to create a new child trie entry that
818*1c60b9acSAndroid Build Coastguard Worker * represents the remainder of the original string
819*1c60b9acSAndroid Build Coastguard Worker * path that we didn't match. For the "hello" /
820*1c60b9acSAndroid Build Coastguard Worker * "help" case, this guy will have "lo".
821*1c60b9acSAndroid Build Coastguard Worker *
822*1c60b9acSAndroid Build Coastguard Worker * Any instances or children (not siblings...) that were
823*1c60b9acSAndroid Build Coastguard Worker * attached to the original trie entry must be detached
824*1c60b9acSAndroid Build Coastguard Worker * first and then migrate to this new guy that completes
825*1c60b9acSAndroid Build Coastguard Worker * the original string.
826*1c60b9acSAndroid Build Coastguard Worker */
827*1c60b9acSAndroid Build Coastguard Worker
828*1c60b9acSAndroid Build Coastguard Worker dcl = t->parser->child_list;
829*1c60b9acSAndroid Build Coastguard Worker m = (int)t->parser->child_count;
830*1c60b9acSAndroid Build Coastguard Worker
831*1c60b9acSAndroid Build Coastguard Worker t->parser->child_list = NULL;
832*1c60b9acSAndroid Build Coastguard Worker t->parser->child_count = 0;
833*1c60b9acSAndroid Build Coastguard Worker
834*1c60b9acSAndroid Build Coastguard Worker e = lws_fts_entry_child_add(t, (unsigned char)
835*1c60b9acSAndroid Build Coastguard Worker osuff[t->str_match_pos - 1], t->parser);
836*1c60b9acSAndroid Build Coastguard Worker if (!e) {
837*1c60b9acSAndroid Build Coastguard Worker lwsl_err("%s: lws_fts_entry_child_add fail1\n",
838*1c60b9acSAndroid Build Coastguard Worker __func__);
839*1c60b9acSAndroid Build Coastguard Worker return 1;
840*1c60b9acSAndroid Build Coastguard Worker }
841*1c60b9acSAndroid Build Coastguard Worker
842*1c60b9acSAndroid Build Coastguard Worker e->child_list = dcl;
843*1c60b9acSAndroid Build Coastguard Worker e->child_count = (uint32_t)m;
844*1c60b9acSAndroid Build Coastguard Worker /*
845*1c60b9acSAndroid Build Coastguard Worker * any children we took over must point to us as the
846*1c60b9acSAndroid Build Coastguard Worker * parent now they appear on our child list
847*1c60b9acSAndroid Build Coastguard Worker */
848*1c60b9acSAndroid Build Coastguard Worker e1 = e->child_list;
849*1c60b9acSAndroid Build Coastguard Worker while (e1) {
850*1c60b9acSAndroid Build Coastguard Worker e1->parent = e;
851*1c60b9acSAndroid Build Coastguard Worker e1 = e1->sibling;
852*1c60b9acSAndroid Build Coastguard Worker }
853*1c60b9acSAndroid Build Coastguard Worker
854*1c60b9acSAndroid Build Coastguard Worker /*
855*1c60b9acSAndroid Build Coastguard Worker * We detached any children, gave them to the new guy
856*1c60b9acSAndroid Build Coastguard Worker * and replaced them with just our new guy
857*1c60b9acSAndroid Build Coastguard Worker */
858*1c60b9acSAndroid Build Coastguard Worker t->parser->child_count = 1;
859*1c60b9acSAndroid Build Coastguard Worker t->parser->child_list = e;
860*1c60b9acSAndroid Build Coastguard Worker
861*1c60b9acSAndroid Build Coastguard Worker /*
862*1c60b9acSAndroid Build Coastguard Worker * any instances that belonged to the original entry we
863*1c60b9acSAndroid Build Coastguard Worker * are splitting now must be reassigned to the end
864*1c60b9acSAndroid Build Coastguard Worker * part
865*1c60b9acSAndroid Build Coastguard Worker */
866*1c60b9acSAndroid Build Coastguard Worker
867*1c60b9acSAndroid Build Coastguard Worker e->inst_file_list = t->parser->inst_file_list;
868*1c60b9acSAndroid Build Coastguard Worker if (e->inst_file_list)
869*1c60b9acSAndroid Build Coastguard Worker e->inst_file_list->owner = e;
870*1c60b9acSAndroid Build Coastguard Worker t->parser->inst_file_list = NULL;
871*1c60b9acSAndroid Build Coastguard Worker e->instance_count = t->parser->instance_count;
872*1c60b9acSAndroid Build Coastguard Worker t->parser->instance_count = 0;
873*1c60b9acSAndroid Build Coastguard Worker
874*1c60b9acSAndroid Build Coastguard Worker e->ofs_last_inst_file = t->parser->ofs_last_inst_file;
875*1c60b9acSAndroid Build Coastguard Worker t->parser->ofs_last_inst_file = 0;
876*1c60b9acSAndroid Build Coastguard Worker
877*1c60b9acSAndroid Build Coastguard Worker if (t->str_match_pos != olen) {
878*1c60b9acSAndroid Build Coastguard Worker /* we diverged partway */
879*1c60b9acSAndroid Build Coastguard Worker e->suffix = &osuff[t->str_match_pos - 1];
880*1c60b9acSAndroid Build Coastguard Worker e->suffix_len = olen - (t->str_match_pos - 1);
881*1c60b9acSAndroid Build Coastguard Worker }
882*1c60b9acSAndroid Build Coastguard Worker
883*1c60b9acSAndroid Build Coastguard Worker /*
884*1c60b9acSAndroid Build Coastguard Worker * if the current char is a terminal, skip creating a
885*1c60b9acSAndroid Build Coastguard Worker * new way forward.
886*1c60b9acSAndroid Build Coastguard Worker */
887*1c60b9acSAndroid Build Coastguard Worker
888*1c60b9acSAndroid Build Coastguard Worker if (classify[(int)c]) {
889*1c60b9acSAndroid Build Coastguard Worker
890*1c60b9acSAndroid Build Coastguard Worker /*
891*1c60b9acSAndroid Build Coastguard Worker * Lastly we need to create a new child trie
892*1c60b9acSAndroid Build Coastguard Worker * entry that represents the new way forward
893*1c60b9acSAndroid Build Coastguard Worker * from the point that we diverged. For the
894*1c60b9acSAndroid Build Coastguard Worker * "hello" / "help" case, this guy will start
895*1c60b9acSAndroid Build Coastguard Worker * as a child of "hel" with the single
896*1c60b9acSAndroid Build Coastguard Worker * character match 'p'.
897*1c60b9acSAndroid Build Coastguard Worker *
898*1c60b9acSAndroid Build Coastguard Worker * Since he becomes the current parser context,
899*1c60b9acSAndroid Build Coastguard Worker * more symbol characters may be coming to make
900*1c60b9acSAndroid Build Coastguard Worker * him into, eg, "helping", in which case he
901*1c60b9acSAndroid Build Coastguard Worker * will acquire a suffix eventually of "ping"
902*1c60b9acSAndroid Build Coastguard Worker * via the aggregation stuff
903*1c60b9acSAndroid Build Coastguard Worker */
904*1c60b9acSAndroid Build Coastguard Worker
905*1c60b9acSAndroid Build Coastguard Worker e = lws_fts_entry_child_add(t, c, t->parser);
906*1c60b9acSAndroid Build Coastguard Worker if (!e) {
907*1c60b9acSAndroid Build Coastguard Worker lwsl_err("%s: child_add fail2\n",
908*1c60b9acSAndroid Build Coastguard Worker __func__);
909*1c60b9acSAndroid Build Coastguard Worker return 1;
910*1c60b9acSAndroid Build Coastguard Worker }
911*1c60b9acSAndroid Build Coastguard Worker }
912*1c60b9acSAndroid Build Coastguard Worker
913*1c60b9acSAndroid Build Coastguard Worker /* go on following this path */
914*1c60b9acSAndroid Build Coastguard Worker t->parser = e;
915*1c60b9acSAndroid Build Coastguard Worker
916*1c60b9acSAndroid Build Coastguard Worker t->aggregate = 1;
917*1c60b9acSAndroid Build Coastguard Worker t->agg_pos = 0;
918*1c60b9acSAndroid Build Coastguard Worker
919*1c60b9acSAndroid Build Coastguard Worker t->str_match_pos = 0;
920*1c60b9acSAndroid Build Coastguard Worker
921*1c60b9acSAndroid Build Coastguard Worker if (go_around)
922*1c60b9acSAndroid Build Coastguard Worker continue;
923*1c60b9acSAndroid Build Coastguard Worker
924*1c60b9acSAndroid Build Coastguard Worker /* this is intended to be a seal */
925*1c60b9acSAndroid Build Coastguard Worker }
926*1c60b9acSAndroid Build Coastguard Worker
927*1c60b9acSAndroid Build Coastguard Worker
928*1c60b9acSAndroid Build Coastguard Worker /* end of token */
929*1c60b9acSAndroid Build Coastguard Worker
930*1c60b9acSAndroid Build Coastguard Worker if (t->aggregate && t->agg_pos) {
931*1c60b9acSAndroid Build Coastguard Worker
932*1c60b9acSAndroid Build Coastguard Worker /* if nothing in agg[]: leave as single char match */
933*1c60b9acSAndroid Build Coastguard Worker
934*1c60b9acSAndroid Build Coastguard Worker /* otherwise copy out the symbol aggregation */
935*1c60b9acSAndroid Build Coastguard Worker t->parser->suffix = lwsac_use(&t->lwsac_head,
936*1c60b9acSAndroid Build Coastguard Worker t->agg_pos + 1,
937*1c60b9acSAndroid Build Coastguard Worker TRIE_LWSAC_BLOCK_SIZE);
938*1c60b9acSAndroid Build Coastguard Worker if (!t->parser->suffix) {
939*1c60b9acSAndroid Build Coastguard Worker lwsl_err("%s: lac for suffix failed\n",
940*1c60b9acSAndroid Build Coastguard Worker __func__);
941*1c60b9acSAndroid Build Coastguard Worker return 1;
942*1c60b9acSAndroid Build Coastguard Worker }
943*1c60b9acSAndroid Build Coastguard Worker
944*1c60b9acSAndroid Build Coastguard Worker /* add the first char at the beginning */
945*1c60b9acSAndroid Build Coastguard Worker *t->parser->suffix = (char)t->parser->c;
946*1c60b9acSAndroid Build Coastguard Worker /* and then add the agg buffer stuff */
947*1c60b9acSAndroid Build Coastguard Worker memcpy(t->parser->suffix + 1, t->agg, t->agg_pos);
948*1c60b9acSAndroid Build Coastguard Worker t->parser->suffix_len = t->agg_pos + 1;
949*1c60b9acSAndroid Build Coastguard Worker }
950*1c60b9acSAndroid Build Coastguard Worker t->aggregate = 0;
951*1c60b9acSAndroid Build Coastguard Worker
952*1c60b9acSAndroid Build Coastguard Worker if (t->parser == t->root) /* multiple terminal chars */
953*1c60b9acSAndroid Build Coastguard Worker continue;
954*1c60b9acSAndroid Build Coastguard Worker
955*1c60b9acSAndroid Build Coastguard Worker if (!t->parser->inst_file_list ||
956*1c60b9acSAndroid Build Coastguard Worker t->parser->inst_file_list->file_index != file_index) {
957*1c60b9acSAndroid Build Coastguard Worker tif = lwsac_use(&t->lwsac_input_head, sizeof(*tif),
958*1c60b9acSAndroid Build Coastguard Worker TRIE_LWSAC_BLOCK_SIZE);
959*1c60b9acSAndroid Build Coastguard Worker if (!tif) {
960*1c60b9acSAndroid Build Coastguard Worker lwsl_err("%s: lac for tif failed\n",
961*1c60b9acSAndroid Build Coastguard Worker __func__);
962*1c60b9acSAndroid Build Coastguard Worker return 1;
963*1c60b9acSAndroid Build Coastguard Worker }
964*1c60b9acSAndroid Build Coastguard Worker
965*1c60b9acSAndroid Build Coastguard Worker tif->file_index = file_index;
966*1c60b9acSAndroid Build Coastguard Worker tif->owner = t->parser;
967*1c60b9acSAndroid Build Coastguard Worker tif->lines_list = NULL;
968*1c60b9acSAndroid Build Coastguard Worker tif->lines_tail = NULL;
969*1c60b9acSAndroid Build Coastguard Worker tif->total = 0;
970*1c60b9acSAndroid Build Coastguard Worker tif->count = 0;
971*1c60b9acSAndroid Build Coastguard Worker tif->inst_file_next = t->tif_list;
972*1c60b9acSAndroid Build Coastguard Worker t->tif_list = tif;
973*1c60b9acSAndroid Build Coastguard Worker
974*1c60b9acSAndroid Build Coastguard Worker t->parser->inst_file_list = tif;
975*1c60b9acSAndroid Build Coastguard Worker }
976*1c60b9acSAndroid Build Coastguard Worker
977*1c60b9acSAndroid Build Coastguard Worker /*
978*1c60b9acSAndroid Build Coastguard Worker * A naive allocation strategy for this leads to 50% of the
979*1c60b9acSAndroid Build Coastguard Worker * total inmem lac allocation being for line numbers...
980*1c60b9acSAndroid Build Coastguard Worker *
981*1c60b9acSAndroid Build Coastguard Worker * It's mainly solved by only holding the instance and line
982*1c60b9acSAndroid Build Coastguard Worker * number tables for the duration of a file being input, as soon
983*1c60b9acSAndroid Build Coastguard Worker * as one input file is finished it is written to disk.
984*1c60b9acSAndroid Build Coastguard Worker *
985*1c60b9acSAndroid Build Coastguard Worker * For the common case of 1 - ~3 matches the line number are
986*1c60b9acSAndroid Build Coastguard Worker * stored in a small VLI array inside the filepath inst. If the
987*1c60b9acSAndroid Build Coastguard Worker * next one won't fit, it allocates a line number struct with
988*1c60b9acSAndroid Build Coastguard Worker * more vli space and continues chaining those if needed.
989*1c60b9acSAndroid Build Coastguard Worker */
990*1c60b9acSAndroid Build Coastguard Worker
991*1c60b9acSAndroid Build Coastguard Worker n = (unsigned int)wq32(vlibuf, (uint32_t)t->line_number);
992*1c60b9acSAndroid Build Coastguard Worker tif = t->parser->inst_file_list;
993*1c60b9acSAndroid Build Coastguard Worker
994*1c60b9acSAndroid Build Coastguard Worker if (!tif->lines_list) {
995*1c60b9acSAndroid Build Coastguard Worker /* we are still trying to use the file inst vli */
996*1c60b9acSAndroid Build Coastguard Worker if (LWS_ARRAY_SIZE(tif->vli) - (size_t)tif->count >= n) {
997*1c60b9acSAndroid Build Coastguard Worker tif->count = (char)((char)tif->count + (char)wq32(tif->vli + tif->count,
998*1c60b9acSAndroid Build Coastguard Worker (uint32_t)t->line_number));
999*1c60b9acSAndroid Build Coastguard Worker goto after;
1000*1c60b9acSAndroid Build Coastguard Worker }
1001*1c60b9acSAndroid Build Coastguard Worker /* we are going to have to allocate */
1002*1c60b9acSAndroid Build Coastguard Worker }
1003*1c60b9acSAndroid Build Coastguard Worker
1004*1c60b9acSAndroid Build Coastguard Worker /* can we add to an existing line numbers struct? */
1005*1c60b9acSAndroid Build Coastguard Worker if (tif->lines_tail &&
1006*1c60b9acSAndroid Build Coastguard Worker LWS_ARRAY_SIZE(tif->lines_tail->vli) -
1007*1c60b9acSAndroid Build Coastguard Worker (unsigned char)tif->lines_tail->count >= n) {
1008*1c60b9acSAndroid Build Coastguard Worker tif->lines_tail->count = (char)((char)tif->lines_tail->count + (char)wq32(tif->lines_tail->vli +
1009*1c60b9acSAndroid Build Coastguard Worker tif->lines_tail->count,
1010*1c60b9acSAndroid Build Coastguard Worker (uint32_t)t->line_number));
1011*1c60b9acSAndroid Build Coastguard Worker goto after;
1012*1c60b9acSAndroid Build Coastguard Worker }
1013*1c60b9acSAndroid Build Coastguard Worker
1014*1c60b9acSAndroid Build Coastguard Worker /* either no existing line numbers struct at tail, or full */
1015*1c60b9acSAndroid Build Coastguard Worker
1016*1c60b9acSAndroid Build Coastguard Worker /* have to create a(nother) line numbers struct */
1017*1c60b9acSAndroid Build Coastguard Worker tl = lwsac_use(&t->lwsac_input_head, sizeof(*tl),
1018*1c60b9acSAndroid Build Coastguard Worker TRIE_LWSAC_BLOCK_SIZE);
1019*1c60b9acSAndroid Build Coastguard Worker if (!tl) {
1020*1c60b9acSAndroid Build Coastguard Worker lwsl_err("%s: lac for tl failed\n", __func__);
1021*1c60b9acSAndroid Build Coastguard Worker return 1;
1022*1c60b9acSAndroid Build Coastguard Worker }
1023*1c60b9acSAndroid Build Coastguard Worker tl->lines_next = NULL;
1024*1c60b9acSAndroid Build Coastguard Worker if (tif->lines_tail)
1025*1c60b9acSAndroid Build Coastguard Worker tif->lines_tail->lines_next = tl;
1026*1c60b9acSAndroid Build Coastguard Worker
1027*1c60b9acSAndroid Build Coastguard Worker tif->lines_tail = tl;
1028*1c60b9acSAndroid Build Coastguard Worker if (!tif->lines_list)
1029*1c60b9acSAndroid Build Coastguard Worker tif->lines_list = tl;
1030*1c60b9acSAndroid Build Coastguard Worker
1031*1c60b9acSAndroid Build Coastguard Worker tl->count = (char)wq32(tl->vli, (uint32_t)t->line_number);
1032*1c60b9acSAndroid Build Coastguard Worker after:
1033*1c60b9acSAndroid Build Coastguard Worker tif->total++;
1034*1c60b9acSAndroid Build Coastguard Worker #if 0
1035*1c60b9acSAndroid Build Coastguard Worker {
1036*1c60b9acSAndroid Build Coastguard Worker char s[128];
1037*1c60b9acSAndroid Build Coastguard Worker const char *ne = name_entry(t->parser, s, sizeof(s));
1038*1c60b9acSAndroid Build Coastguard Worker
1039*1c60b9acSAndroid Build Coastguard Worker if (!strcmp(ne, "describ")) {
1040*1c60b9acSAndroid Build Coastguard Worker lwsl_err(" %s %d\n", ne, t->str_match_pos);
1041*1c60b9acSAndroid Build Coastguard Worker write(1, buf - 10, 20);
1042*1c60b9acSAndroid Build Coastguard Worker }
1043*1c60b9acSAndroid Build Coastguard Worker }
1044*1c60b9acSAndroid Build Coastguard Worker #endif
1045*1c60b9acSAndroid Build Coastguard Worker t->parser->instance_count++;
1046*1c60b9acSAndroid Build Coastguard Worker t->parser = t->root;
1047*1c60b9acSAndroid Build Coastguard Worker t->str_match_pos = 0;
1048*1c60b9acSAndroid Build Coastguard Worker }
1049*1c60b9acSAndroid Build Coastguard Worker
1050*1c60b9acSAndroid Build Coastguard Worker /* seal off the line length table block */
1051*1c60b9acSAndroid Build Coastguard Worker
1052*1c60b9acSAndroid Build Coastguard Worker if (bp) {
1053*1c60b9acSAndroid Build Coastguard Worker if ((int)write(t->fd, linetable, (size_t)bp) != bp)
1054*1c60b9acSAndroid Build Coastguard Worker return 1;
1055*1c60b9acSAndroid Build Coastguard Worker t->c += (unsigned int)bp;
1056*1c60b9acSAndroid Build Coastguard Worker bp = 0;
1057*1c60b9acSAndroid Build Coastguard Worker }
1058*1c60b9acSAndroid Build Coastguard Worker
1059*1c60b9acSAndroid Build Coastguard Worker if (lseek(t->fd, lbh, SEEK_SET) < 0) {
1060*1c60b9acSAndroid Build Coastguard Worker lwsl_err("%s: seek to 0x%llx failed\n", __func__,
1061*1c60b9acSAndroid Build Coastguard Worker (unsigned long long)lbh);
1062*1c60b9acSAndroid Build Coastguard Worker return 1;
1063*1c60b9acSAndroid Build Coastguard Worker }
1064*1c60b9acSAndroid Build Coastguard Worker
1065*1c60b9acSAndroid Build Coastguard Worker g16(linetable, (uint16_t)(t->c - (jg2_file_offset)lbh));
1066*1c60b9acSAndroid Build Coastguard Worker g16(linetable + 2, (uint16_t)(t->line_number - sline));
1067*1c60b9acSAndroid Build Coastguard Worker g32(linetable + 4, (uint32_t)chars);
1068*1c60b9acSAndroid Build Coastguard Worker if ((int)write(t->fd, linetable, 8) != 8) {
1069*1c60b9acSAndroid Build Coastguard Worker lwsl_err("%s: write linetable header failed\n", __func__);
1070*1c60b9acSAndroid Build Coastguard Worker return 1;
1071*1c60b9acSAndroid Build Coastguard Worker }
1072*1c60b9acSAndroid Build Coastguard Worker
1073*1c60b9acSAndroid Build Coastguard Worker assert(lseek(t->fd, 0, SEEK_END) == (off_t)t->c);
1074*1c60b9acSAndroid Build Coastguard Worker
1075*1c60b9acSAndroid Build Coastguard Worker if (lseek(t->fd, (off_t)t->c, SEEK_SET) < 0) {
1076*1c60b9acSAndroid Build Coastguard Worker lwsl_err("%s: end seek failed\n", __func__);
1077*1c60b9acSAndroid Build Coastguard Worker return 1;
1078*1c60b9acSAndroid Build Coastguard Worker }
1079*1c60b9acSAndroid Build Coastguard Worker
1080*1c60b9acSAndroid Build Coastguard Worker bp = 0;
1081*1c60b9acSAndroid Build Coastguard Worker
1082*1c60b9acSAndroid Build Coastguard Worker if (len) {
1083*1c60b9acSAndroid Build Coastguard Worker t->lines_in_unsealed_linetable = 0;
1084*1c60b9acSAndroid Build Coastguard Worker goto resume;
1085*1c60b9acSAndroid Build Coastguard Worker }
1086*1c60b9acSAndroid Build Coastguard Worker
1087*1c60b9acSAndroid Build Coastguard Worker /* dump the collected per-input instance and line data, and free it */
1088*1c60b9acSAndroid Build Coastguard Worker
1089*1c60b9acSAndroid Build Coastguard Worker t->agg_trie_creation_us += (uint64_t)((uint64_t)lws_now_usecs() - tf);
1090*1c60b9acSAndroid Build Coastguard Worker
1091*1c60b9acSAndroid Build Coastguard Worker return 0;
1092*1c60b9acSAndroid Build Coastguard Worker }
1093*1c60b9acSAndroid Build Coastguard Worker
1094*1c60b9acSAndroid Build Coastguard Worker /* refer to ./README.md */
1095*1c60b9acSAndroid Build Coastguard Worker
1096*1c60b9acSAndroid Build Coastguard Worker int
lws_fts_serialize(struct lws_fts * t)1097*1c60b9acSAndroid Build Coastguard Worker lws_fts_serialize(struct lws_fts *t)
1098*1c60b9acSAndroid Build Coastguard Worker {
1099*1c60b9acSAndroid Build Coastguard Worker struct lws_fts_filepath *fp = t->filepath_list, *ofp;
1100*1c60b9acSAndroid Build Coastguard Worker unsigned long long tf = (unsigned long long)lws_now_usecs();
1101*1c60b9acSAndroid Build Coastguard Worker struct lws_fts_entry *e, *e1, *s[256];
1102*1c60b9acSAndroid Build Coastguard Worker unsigned char buf[8192], stasis;
1103*1c60b9acSAndroid Build Coastguard Worker int n, bp, sp = 0, do_parent;
1104*1c60b9acSAndroid Build Coastguard Worker
1105*1c60b9acSAndroid Build Coastguard Worker (void)tf;
1106*1c60b9acSAndroid Build Coastguard Worker finalize_per_input(t);
1107*1c60b9acSAndroid Build Coastguard Worker
1108*1c60b9acSAndroid Build Coastguard Worker /*
1109*1c60b9acSAndroid Build Coastguard Worker * Compute aggregated instance counts (parents should know the total
1110*1c60b9acSAndroid Build Coastguard Worker * number of instances below each child path)
1111*1c60b9acSAndroid Build Coastguard Worker *
1112*1c60b9acSAndroid Build Coastguard Worker *
1113*1c60b9acSAndroid Build Coastguard Worker * If we have
1114*1c60b9acSAndroid Build Coastguard Worker *
1115*1c60b9acSAndroid Build Coastguard Worker * (root) -> (c1) -> (c2)
1116*1c60b9acSAndroid Build Coastguard Worker * -> (c3)
1117*1c60b9acSAndroid Build Coastguard Worker *
1118*1c60b9acSAndroid Build Coastguard Worker * we need to visit the nodes in the order
1119*1c60b9acSAndroid Build Coastguard Worker *
1120*1c60b9acSAndroid Build Coastguard Worker * c2, c1, c3, root
1121*1c60b9acSAndroid Build Coastguard Worker */
1122*1c60b9acSAndroid Build Coastguard Worker
1123*1c60b9acSAndroid Build Coastguard Worker sp = 0;
1124*1c60b9acSAndroid Build Coastguard Worker s[0] = t->root;
1125*1c60b9acSAndroid Build Coastguard Worker do_parent = 0;
1126*1c60b9acSAndroid Build Coastguard Worker while (sp >= 0) {
1127*1c60b9acSAndroid Build Coastguard Worker int n;
1128*1c60b9acSAndroid Build Coastguard Worker
1129*1c60b9acSAndroid Build Coastguard Worker /* aggregate in every antecedent */
1130*1c60b9acSAndroid Build Coastguard Worker
1131*1c60b9acSAndroid Build Coastguard Worker for (n = 0; n <= sp; n++) {
1132*1c60b9acSAndroid Build Coastguard Worker s[n]->agg_inst_count += s[sp]->instance_count;
1133*1c60b9acSAndroid Build Coastguard Worker s[n]->agg_child_count += s[sp]->child_count;
1134*1c60b9acSAndroid Build Coastguard Worker }
1135*1c60b9acSAndroid Build Coastguard Worker
1136*1c60b9acSAndroid Build Coastguard Worker /* handle any children before the parent */
1137*1c60b9acSAndroid Build Coastguard Worker
1138*1c60b9acSAndroid Build Coastguard Worker if (s[sp]->child_list) {
1139*1c60b9acSAndroid Build Coastguard Worker if (sp + 1 == LWS_ARRAY_SIZE(s)) {
1140*1c60b9acSAndroid Build Coastguard Worker lwsl_err("Stack too deep\n");
1141*1c60b9acSAndroid Build Coastguard Worker
1142*1c60b9acSAndroid Build Coastguard Worker goto bail;
1143*1c60b9acSAndroid Build Coastguard Worker }
1144*1c60b9acSAndroid Build Coastguard Worker
1145*1c60b9acSAndroid Build Coastguard Worker s[sp + 1] = s[sp]->child_list;
1146*1c60b9acSAndroid Build Coastguard Worker sp++;
1147*1c60b9acSAndroid Build Coastguard Worker continue;
1148*1c60b9acSAndroid Build Coastguard Worker }
1149*1c60b9acSAndroid Build Coastguard Worker
1150*1c60b9acSAndroid Build Coastguard Worker do {
1151*1c60b9acSAndroid Build Coastguard Worker if (s[sp]->sibling) {
1152*1c60b9acSAndroid Build Coastguard Worker s[sp] = s[sp]->sibling;
1153*1c60b9acSAndroid Build Coastguard Worker break;
1154*1c60b9acSAndroid Build Coastguard Worker } else
1155*1c60b9acSAndroid Build Coastguard Worker sp--;
1156*1c60b9acSAndroid Build Coastguard Worker } while (sp >= 0);
1157*1c60b9acSAndroid Build Coastguard Worker }
1158*1c60b9acSAndroid Build Coastguard Worker
1159*1c60b9acSAndroid Build Coastguard Worker /* dump the filepaths and set prev */
1160*1c60b9acSAndroid Build Coastguard Worker
1161*1c60b9acSAndroid Build Coastguard Worker fp = t->filepath_list;
1162*1c60b9acSAndroid Build Coastguard Worker ofp = NULL;
1163*1c60b9acSAndroid Build Coastguard Worker bp = 0;
1164*1c60b9acSAndroid Build Coastguard Worker while (fp) {
1165*1c60b9acSAndroid Build Coastguard Worker
1166*1c60b9acSAndroid Build Coastguard Worker fp->ofs = t->c + (unsigned int)bp;
1167*1c60b9acSAndroid Build Coastguard Worker n = (int)strlen(fp->filepath);
1168*1c60b9acSAndroid Build Coastguard Worker spill(15 + n, 0);
1169*1c60b9acSAndroid Build Coastguard Worker
1170*1c60b9acSAndroid Build Coastguard Worker bp += wq32(&buf[bp], fp->line_table_ofs);
1171*1c60b9acSAndroid Build Coastguard Worker bp += wq32(&buf[bp], (uint32_t)fp->total_lines);
1172*1c60b9acSAndroid Build Coastguard Worker bp += wq32(&buf[bp], (uint32_t)n);
1173*1c60b9acSAndroid Build Coastguard Worker memcpy(&buf[bp], fp->filepath, (unsigned int)n);
1174*1c60b9acSAndroid Build Coastguard Worker bp += n;
1175*1c60b9acSAndroid Build Coastguard Worker
1176*1c60b9acSAndroid Build Coastguard Worker fp->prev = ofp;
1177*1c60b9acSAndroid Build Coastguard Worker ofp = fp;
1178*1c60b9acSAndroid Build Coastguard Worker fp = fp->next;
1179*1c60b9acSAndroid Build Coastguard Worker }
1180*1c60b9acSAndroid Build Coastguard Worker
1181*1c60b9acSAndroid Build Coastguard Worker spill(0, 1);
1182*1c60b9acSAndroid Build Coastguard Worker
1183*1c60b9acSAndroid Build Coastguard Worker /* record the fileoffset of the filepath map and filepath count */
1184*1c60b9acSAndroid Build Coastguard Worker
1185*1c60b9acSAndroid Build Coastguard Worker if (lseek(t->fd, 0xc, SEEK_SET) < 0)
1186*1c60b9acSAndroid Build Coastguard Worker goto bail_seek;
1187*1c60b9acSAndroid Build Coastguard Worker
1188*1c60b9acSAndroid Build Coastguard Worker g32(buf, t->c + (unsigned int)bp);
1189*1c60b9acSAndroid Build Coastguard Worker g32(buf + 4, (uint32_t)t->next_file_index);
1190*1c60b9acSAndroid Build Coastguard Worker if ((int)write(t->fd, buf, 8) != 8)
1191*1c60b9acSAndroid Build Coastguard Worker goto bail;
1192*1c60b9acSAndroid Build Coastguard Worker
1193*1c60b9acSAndroid Build Coastguard Worker if (lseek(t->fd, (off_t)(t->c + (unsigned int)bp), SEEK_SET) < 0)
1194*1c60b9acSAndroid Build Coastguard Worker goto bail_seek;
1195*1c60b9acSAndroid Build Coastguard Worker
1196*1c60b9acSAndroid Build Coastguard Worker /* dump the filepath map, starting from index 0, which is at the tail */
1197*1c60b9acSAndroid Build Coastguard Worker
1198*1c60b9acSAndroid Build Coastguard Worker fp = ofp;
1199*1c60b9acSAndroid Build Coastguard Worker bp = 0;
1200*1c60b9acSAndroid Build Coastguard Worker while (fp) {
1201*1c60b9acSAndroid Build Coastguard Worker spill(5, 0);
1202*1c60b9acSAndroid Build Coastguard Worker g32(buf + bp, fp->ofs);
1203*1c60b9acSAndroid Build Coastguard Worker bp += 4;
1204*1c60b9acSAndroid Build Coastguard Worker fp = fp->prev;
1205*1c60b9acSAndroid Build Coastguard Worker }
1206*1c60b9acSAndroid Build Coastguard Worker spill(0, 1);
1207*1c60b9acSAndroid Build Coastguard Worker
1208*1c60b9acSAndroid Build Coastguard Worker /*
1209*1c60b9acSAndroid Build Coastguard Worker * The trie entries in reverse order... because of the reversal, we have
1210*1c60b9acSAndroid Build Coastguard Worker * always written children first, and marked them with their file offset
1211*1c60b9acSAndroid Build Coastguard Worker * before we come to refer to them.
1212*1c60b9acSAndroid Build Coastguard Worker */
1213*1c60b9acSAndroid Build Coastguard Worker
1214*1c60b9acSAndroid Build Coastguard Worker bp = 0;
1215*1c60b9acSAndroid Build Coastguard Worker sp = 0;
1216*1c60b9acSAndroid Build Coastguard Worker s[0] = t->root;
1217*1c60b9acSAndroid Build Coastguard Worker do_parent = 0;
1218*1c60b9acSAndroid Build Coastguard Worker while (s[sp]) {
1219*1c60b9acSAndroid Build Coastguard Worker
1220*1c60b9acSAndroid Build Coastguard Worker /* handle any children before the parent */
1221*1c60b9acSAndroid Build Coastguard Worker
1222*1c60b9acSAndroid Build Coastguard Worker if (!do_parent && s[sp]->child_list) {
1223*1c60b9acSAndroid Build Coastguard Worker
1224*1c60b9acSAndroid Build Coastguard Worker if (sp + 1 == LWS_ARRAY_SIZE(s)) {
1225*1c60b9acSAndroid Build Coastguard Worker lwsl_err("Stack too deep\n");
1226*1c60b9acSAndroid Build Coastguard Worker
1227*1c60b9acSAndroid Build Coastguard Worker goto bail;
1228*1c60b9acSAndroid Build Coastguard Worker }
1229*1c60b9acSAndroid Build Coastguard Worker
1230*1c60b9acSAndroid Build Coastguard Worker s[sp + 1] = s[sp]->child_list;
1231*1c60b9acSAndroid Build Coastguard Worker sp++;
1232*1c60b9acSAndroid Build Coastguard Worker continue;
1233*1c60b9acSAndroid Build Coastguard Worker }
1234*1c60b9acSAndroid Build Coastguard Worker
1235*1c60b9acSAndroid Build Coastguard Worker /* leaf nodes with no children */
1236*1c60b9acSAndroid Build Coastguard Worker
1237*1c60b9acSAndroid Build Coastguard Worker e = s[sp];
1238*1c60b9acSAndroid Build Coastguard Worker e->ofs = t->c + (unsigned int)bp;
1239*1c60b9acSAndroid Build Coastguard Worker
1240*1c60b9acSAndroid Build Coastguard Worker /* write the trie entry header */
1241*1c60b9acSAndroid Build Coastguard Worker
1242*1c60b9acSAndroid Build Coastguard Worker spill((3 * MAX_VLI), 0);
1243*1c60b9acSAndroid Build Coastguard Worker
1244*1c60b9acSAndroid Build Coastguard Worker bp += wq32(&buf[bp], e->ofs_last_inst_file);
1245*1c60b9acSAndroid Build Coastguard Worker bp += wq32(&buf[bp], e->child_count);
1246*1c60b9acSAndroid Build Coastguard Worker bp += wq32(&buf[bp], e->instance_count);
1247*1c60b9acSAndroid Build Coastguard Worker bp += wq32(&buf[bp], e->agg_inst_count);
1248*1c60b9acSAndroid Build Coastguard Worker
1249*1c60b9acSAndroid Build Coastguard Worker /* sort the children in order of highest aggregate hits first */
1250*1c60b9acSAndroid Build Coastguard Worker
1251*1c60b9acSAndroid Build Coastguard Worker do {
1252*1c60b9acSAndroid Build Coastguard Worker struct lws_fts_entry **pe, *te1, *te2;
1253*1c60b9acSAndroid Build Coastguard Worker
1254*1c60b9acSAndroid Build Coastguard Worker stasis = 1;
1255*1c60b9acSAndroid Build Coastguard Worker
1256*1c60b9acSAndroid Build Coastguard Worker /* bubble sort keeps going until nothing changed */
1257*1c60b9acSAndroid Build Coastguard Worker
1258*1c60b9acSAndroid Build Coastguard Worker pe = &e->child_list;
1259*1c60b9acSAndroid Build Coastguard Worker while (*pe) {
1260*1c60b9acSAndroid Build Coastguard Worker
1261*1c60b9acSAndroid Build Coastguard Worker te1 = *pe;
1262*1c60b9acSAndroid Build Coastguard Worker te2 = te1->sibling;
1263*1c60b9acSAndroid Build Coastguard Worker
1264*1c60b9acSAndroid Build Coastguard Worker if (te2 && te1->agg_inst_count <
1265*1c60b9acSAndroid Build Coastguard Worker te2->agg_inst_count) {
1266*1c60b9acSAndroid Build Coastguard Worker stasis = 0;
1267*1c60b9acSAndroid Build Coastguard Worker
1268*1c60b9acSAndroid Build Coastguard Worker *pe = te2;
1269*1c60b9acSAndroid Build Coastguard Worker te1->sibling = te2->sibling;
1270*1c60b9acSAndroid Build Coastguard Worker te2->sibling = te1;
1271*1c60b9acSAndroid Build Coastguard Worker }
1272*1c60b9acSAndroid Build Coastguard Worker
1273*1c60b9acSAndroid Build Coastguard Worker pe = &(*pe)->sibling;
1274*1c60b9acSAndroid Build Coastguard Worker }
1275*1c60b9acSAndroid Build Coastguard Worker
1276*1c60b9acSAndroid Build Coastguard Worker } while (!stasis);
1277*1c60b9acSAndroid Build Coastguard Worker
1278*1c60b9acSAndroid Build Coastguard Worker /* write the children */
1279*1c60b9acSAndroid Build Coastguard Worker
1280*1c60b9acSAndroid Build Coastguard Worker e1 = e->child_list;
1281*1c60b9acSAndroid Build Coastguard Worker while (e1) {
1282*1c60b9acSAndroid Build Coastguard Worker spill((5 * MAX_VLI) + e1->suffix_len + 1, 0);
1283*1c60b9acSAndroid Build Coastguard Worker
1284*1c60b9acSAndroid Build Coastguard Worker bp += wq32(&buf[bp], e1->ofs);
1285*1c60b9acSAndroid Build Coastguard Worker bp += wq32(&buf[bp], e1->instance_count);
1286*1c60b9acSAndroid Build Coastguard Worker bp += wq32(&buf[bp], e1->agg_inst_count);
1287*1c60b9acSAndroid Build Coastguard Worker bp += wq32(&buf[bp], e1->agg_child_count);
1288*1c60b9acSAndroid Build Coastguard Worker
1289*1c60b9acSAndroid Build Coastguard Worker if (e1->suffix) { /* string */
1290*1c60b9acSAndroid Build Coastguard Worker bp += wq32(&buf[bp], e1->suffix_len);
1291*1c60b9acSAndroid Build Coastguard Worker memmove(&buf[bp], e1->suffix, e1->suffix_len);
1292*1c60b9acSAndroid Build Coastguard Worker bp += (int)e1->suffix_len;
1293*1c60b9acSAndroid Build Coastguard Worker } else { /* char */
1294*1c60b9acSAndroid Build Coastguard Worker bp += wq32(&buf[bp], 1);
1295*1c60b9acSAndroid Build Coastguard Worker buf[bp++] = e1->c;
1296*1c60b9acSAndroid Build Coastguard Worker }
1297*1c60b9acSAndroid Build Coastguard Worker #if 0
1298*1c60b9acSAndroid Build Coastguard Worker if (e1->suffix && e1->suffix_len == 3 &&
1299*1c60b9acSAndroid Build Coastguard Worker !memcmp(e1->suffix, "cri", 3)) {
1300*1c60b9acSAndroid Build Coastguard Worker struct lws_fts_entry *e2;
1301*1c60b9acSAndroid Build Coastguard Worker
1302*1c60b9acSAndroid Build Coastguard Worker e2 = e1;
1303*1c60b9acSAndroid Build Coastguard Worker while (e2){
1304*1c60b9acSAndroid Build Coastguard Worker if (e2->suffix)
1305*1c60b9acSAndroid Build Coastguard Worker lwsl_notice("%s\n", e2->suffix);
1306*1c60b9acSAndroid Build Coastguard Worker else
1307*1c60b9acSAndroid Build Coastguard Worker lwsl_notice("%c\n", e2->c);
1308*1c60b9acSAndroid Build Coastguard Worker
1309*1c60b9acSAndroid Build Coastguard Worker e2 = e2->parent;
1310*1c60b9acSAndroid Build Coastguard Worker }
1311*1c60b9acSAndroid Build Coastguard Worker
1312*1c60b9acSAndroid Build Coastguard Worker lwsl_err("*** %c CRI inst %d ch %d\n", e1->parent->c,
1313*1c60b9acSAndroid Build Coastguard Worker e1->instance_count, e1->child_count);
1314*1c60b9acSAndroid Build Coastguard Worker }
1315*1c60b9acSAndroid Build Coastguard Worker #endif
1316*1c60b9acSAndroid Build Coastguard Worker e1 = e1->sibling;
1317*1c60b9acSAndroid Build Coastguard Worker }
1318*1c60b9acSAndroid Build Coastguard Worker
1319*1c60b9acSAndroid Build Coastguard Worker /* if there are siblings, do those next */
1320*1c60b9acSAndroid Build Coastguard Worker
1321*1c60b9acSAndroid Build Coastguard Worker if (do_parent) {
1322*1c60b9acSAndroid Build Coastguard Worker do_parent = 0;
1323*1c60b9acSAndroid Build Coastguard Worker sp--;
1324*1c60b9acSAndroid Build Coastguard Worker }
1325*1c60b9acSAndroid Build Coastguard Worker
1326*1c60b9acSAndroid Build Coastguard Worker if (s[sp]->sibling)
1327*1c60b9acSAndroid Build Coastguard Worker s[sp] = s[sp]->sibling;
1328*1c60b9acSAndroid Build Coastguard Worker else {
1329*1c60b9acSAndroid Build Coastguard Worker /* if there are no siblings, do the parent */
1330*1c60b9acSAndroid Build Coastguard Worker do_parent = 1;
1331*1c60b9acSAndroid Build Coastguard Worker s[sp] = s[sp]->parent;
1332*1c60b9acSAndroid Build Coastguard Worker }
1333*1c60b9acSAndroid Build Coastguard Worker }
1334*1c60b9acSAndroid Build Coastguard Worker
1335*1c60b9acSAndroid Build Coastguard Worker spill(0, 1);
1336*1c60b9acSAndroid Build Coastguard Worker
1337*1c60b9acSAndroid Build Coastguard Worker assert(lseek(t->fd, 0, SEEK_END) == (off_t)t->c);
1338*1c60b9acSAndroid Build Coastguard Worker
1339*1c60b9acSAndroid Build Coastguard Worker /* drop the correct root trie offset + file length into the header */
1340*1c60b9acSAndroid Build Coastguard Worker
1341*1c60b9acSAndroid Build Coastguard Worker if (lseek(t->fd, 4, SEEK_SET) < 0) {
1342*1c60b9acSAndroid Build Coastguard Worker lwsl_err("%s: unable to seek\n", __func__);
1343*1c60b9acSAndroid Build Coastguard Worker
1344*1c60b9acSAndroid Build Coastguard Worker goto bail;
1345*1c60b9acSAndroid Build Coastguard Worker }
1346*1c60b9acSAndroid Build Coastguard Worker
1347*1c60b9acSAndroid Build Coastguard Worker g32(buf, t->root->ofs);
1348*1c60b9acSAndroid Build Coastguard Worker g32(buf + 4, t->c);
1349*1c60b9acSAndroid Build Coastguard Worker if (write(t->fd, buf, 0x8) != 0x8)
1350*1c60b9acSAndroid Build Coastguard Worker goto bail;
1351*1c60b9acSAndroid Build Coastguard Worker
1352*1c60b9acSAndroid Build Coastguard Worker lwsl_notice("%s: index %d files (%uMiB) cpu time %dms, "
1353*1c60b9acSAndroid Build Coastguard Worker "alloc: %dKiB + %dKiB, "
1354*1c60b9acSAndroid Build Coastguard Worker "serialize: %dms, file: %dKiB\n", __func__,
1355*1c60b9acSAndroid Build Coastguard Worker t->next_file_index,
1356*1c60b9acSAndroid Build Coastguard Worker (int)(t->agg_raw_input / (1024 * 1024)),
1357*1c60b9acSAndroid Build Coastguard Worker (int)(t->agg_trie_creation_us / 1000),
1358*1c60b9acSAndroid Build Coastguard Worker (int)(lwsac_total_alloc(t->lwsac_head) / 1024),
1359*1c60b9acSAndroid Build Coastguard Worker (int)(t->worst_lwsac_input_size / 1024),
1360*1c60b9acSAndroid Build Coastguard Worker (int)(((uint64_t)lws_now_usecs() - tf) / 1000),
1361*1c60b9acSAndroid Build Coastguard Worker (int)(t->c / 1024));
1362*1c60b9acSAndroid Build Coastguard Worker
1363*1c60b9acSAndroid Build Coastguard Worker return 0;
1364*1c60b9acSAndroid Build Coastguard Worker
1365*1c60b9acSAndroid Build Coastguard Worker bail_seek:
1366*1c60b9acSAndroid Build Coastguard Worker lwsl_err("%s: problem seekings\n", __func__);
1367*1c60b9acSAndroid Build Coastguard Worker
1368*1c60b9acSAndroid Build Coastguard Worker bail:
1369*1c60b9acSAndroid Build Coastguard Worker return 1;
1370*1c60b9acSAndroid Build Coastguard Worker }
1371*1c60b9acSAndroid Build Coastguard Worker
1372*1c60b9acSAndroid Build Coastguard Worker
1373