xref: /aosp_15_r20/external/skia/tools/unicode_comparison/go/generate_table/main.go (revision c8dee2aa9b3f27cf6c858bd81872bdeb2c07ed17)
1// Copyright 2023 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4//
5// This executable generates a comparison table in HTML format
6// for all ICU implementations presented in a given root folder
7
8package main
9
10import (
11	"errors"
12	"flag"
13	"fmt"
14	"go.skia.org/skia/tools/unicode_comparison/go/helpers"
15	"html/template"
16	"os"
17	"path/filepath"
18	"sort"
19	"strconv"
20	"strings"
21)
22
23type Range struct {
24	Start int
25	End   int
26	Type  string
27}
28
29// Main numeric type
30type Ratio struct {
31	Num   int
32	Total int
33}
34
35type FloatRatio struct {
36	Top    float64
37	Bottom float64
38}
39
40func (r Ratio) Divide() string {
41	if r.Num == 0 {
42		return "        "
43	} else {
44		return fmt.Sprintf("%.6f", float64(r.Num)/float64(r.Total))
45	}
46}
47func (fr FloatRatio) Percents() string {
48	if fr.Top == fr.Bottom {
49		return "        "
50	} else {
51		return fmt.Sprintf("%.6f", (float64(fr.Top)/float64(fr.Bottom)-1)*100)
52	}
53}
54
55func (r *Ratio) Add(other Ratio) {
56	if other.Num > 0 {
57		r.Num += other.Num
58		r.Total += other.Total
59	}
60}
61
62type CalculatedDelta struct {
63	Memory      float64
64	Performance FloatRatio
65	Disk        float64
66	RowCount    int
67	DiffCount   int
68	Graphemes   Ratio
69	SoftBreaks  Ratio
70	HardBreaks  Ratio
71	Whitespaces Ratio
72	Words       Ratio
73	Controls    Ratio
74	Data        RangedData
75}
76
77func NewCalculatedDelta() CalculatedDelta {
78	return CalculatedDelta{
79		Performance: FloatRatio{0, 1},
80		Graphemes:   Ratio{0, 1},
81		SoftBreaks:  Ratio{0, 1},
82		HardBreaks:  Ratio{0, 1},
83		Whitespaces: Ratio{0, 1},
84		Words:       Ratio{0, 1},
85		Controls:    Ratio{0, 1},
86	}
87}
88
89func (cd *CalculatedDelta) Add(other CalculatedDelta) {
90	cd.Performance.Top += other.Performance.Top
91	cd.Performance.Bottom += other.Performance.Bottom
92	cd.Memory += other.Memory
93	cd.Disk += other.Disk
94	cd.RowCount += 1
95	cd.DiffCount += other.DiffCount
96	cd.Graphemes.Add(other.Graphemes)
97	cd.SoftBreaks.Add(other.SoftBreaks)
98	cd.HardBreaks.Add(other.HardBreaks)
99	cd.Whitespaces.Add(other.Whitespaces)
100	cd.Words.Add(other.Words)
101	cd.Controls.Add(other.Controls)
102}
103
104type RangeDataSet struct {
105	Graphemes   []Range
106	SoftBreaks  []Range
107	HardBreaks  []Range
108	Whitespaces []Range
109	Words       []Range
110	Controls    []Range
111}
112type RangedData struct {
113	Missing RangeDataSet
114	Extra   RangeDataSet
115}
116
117type ParsedData struct {
118	Count       int
119	Time        float64
120	Memory      float64
121	Graphemes   []int
122	SoftBreaks  []int
123	HardBreaks  []int
124	Whitespaces []int
125	Words       []int
126	Controls    []int
127}
128
129func NewParsedData() ParsedData {
130	return ParsedData{}
131}
132
133// Row type
134type Row struct {
135	Id       string
136	Num      string
137	ParentId string
138	Names    []string
139	Text     string
140	IsFile   bool
141	Delta    CalculatedDelta
142	Children []Row
143}
144
145func NewImpl(impl string) *Row {
146	return &Row{Names: []string{impl}, IsFile: false, Delta: NewCalculatedDelta(), Children: nil}
147}
148
149func NewLocale(impl string, locale string) *Row {
150	return &Row{Names: []string{impl, locale}, IsFile: false, Delta: NewCalculatedDelta(), Children: nil}
151}
152
153func NewSize(names ...string) *Row {
154	return &Row{Names: names, IsFile: false, Delta: NewCalculatedDelta(), Children: nil}
155}
156
157func NewRow(text string, delta CalculatedDelta, names ...string) *Row {
158	return &Row{Names: names, IsFile: true, Text: text, Delta: delta, Children: nil}
159}
160
161type Chunk struct {
162	Text    string
163	Classes string
164	Indexes Range
165}
166
167func (row Row) FormattedChunks(ranges []Range, name string, chunkType string, includeRange bool) []Chunk {
168
169	var results []Chunk
170	gap := 0
171	for i, r := range ranges {
172		if i == 0 {
173			continue
174		}
175
176		if r.Start > gap {
177			text := row.Text[gap:r.Start]
178			results = append(results, Chunk{text, "", Range{gap, r.Start, ""}})
179		}
180		if includeRange {
181			text := row.Text[r.Start:r.End]
182			if name == "whitespace" {
183				corrected := ""
184				for _, t := range text {
185					if t == ' ' {
186						corrected += "nbsp;"
187					} else {
188						corrected += string(t)
189					}
190				}
191				text = corrected
192			}
193			results = append(results, Chunk{text, name, Range{r.Start, r.End, chunkType}})
194			gap = r.End
195		} else {
196			results = append(results, Chunk{"\u200B", name, Range{r.Start, r.Start, chunkType}})
197			gap = r.Start
198		}
199	}
200	if gap < len(row.Text) {
201		text := row.Text[gap:]
202		results = append(results, Chunk{text, "", Range{gap, len(row.Text), ""}})
203	}
204	return results
205}
206
207func (r Row) FormattedMissingGraphemes() []Chunk {
208	return r.FormattedChunks(r.Delta.Data.Missing.Graphemes, "grapheme", "missing", true)
209}
210
211func (r Row) FormattedExtraGraphemes() []Chunk {
212	return r.FormattedChunks(r.Delta.Data.Extra.Graphemes, "grapheme", "extra", true)
213}
214
215func (r Row) FormattedMissingSoftBreaks() []Chunk {
216	return r.FormattedChunks(r.Delta.Data.Missing.SoftBreaks, "softBreak", "missing", false)
217}
218
219func (r Row) FormattedExtraSoftBreaks() []Chunk {
220	return r.FormattedChunks(r.Delta.Data.Extra.SoftBreaks, "softBreak", "extra", false)
221}
222
223func (r Row) FormattedMissingHardBreaks() []Chunk {
224	return r.FormattedChunks(r.Delta.Data.Missing.HardBreaks, "hardBreak", "missing", false)
225}
226
227func (r Row) FormattedExtraHardBreaks() []Chunk {
228	return r.FormattedChunks(r.Delta.Data.Extra.HardBreaks, "hardBreak", "extra", false)
229}
230
231func (r Row) FormattedMissingWords() []Chunk {
232	return r.FormattedChunks(r.Delta.Data.Missing.Words, "word", "missing", false)
233}
234
235func (r Row) FormattedExtraWords() []Chunk {
236	return r.FormattedChunks(r.Delta.Data.Extra.Words, "word", "extra", false)
237}
238
239func (r Row) FormattedMissingWhitespaces() []Chunk {
240	return r.FormattedChunks(r.Delta.Data.Missing.Whitespaces, "whitespace", "missing", true)
241}
242
243func (r Row) FormattedExtraWhitespaces() []Chunk {
244	return r.FormattedChunks(r.Delta.Data.Extra.Whitespaces, "whitespace", "extra", true)
245}
246
247func (r Row) FormattedMissingControls() []Chunk {
248	return r.FormattedChunks(r.Delta.Data.Missing.Controls, "control", "missing", false)
249}
250
251func (r Row) FormattedExtraControls() []Chunk {
252	return r.FormattedChunks(r.Delta.Data.Extra.Controls, "control", "extra", false)
253}
254
255func (r *Row) Add(child Row) {
256	r.Delta.Add(child.Delta)
257	r.Children = append(r.Children, child)
258}
259
260func (r Row) Name() string {
261	if r.IsFile {
262		return r.Names[len(r.Names)-1]
263	} else {
264		names := ""
265		for i, name := range r.Names {
266			if i > 0 {
267				names += "."
268			}
269			names += name
270		}
271		return names
272	}
273}
274
275func (r Row) ParentName() string {
276	names := ""
277	for i, name := range r.Names {
278		if i == len(r.Names)-1 {
279			break
280		}
281		if i > 0 {
282			names += "."
283		}
284		names += name
285	}
286	return names
287}
288
289func (r Row) Implementation() string {
290	return r.Names[0]
291}
292
293func (r Row) Level() string {
294	return fmt.Sprintf("l%d", len(r.Names))
295}
296
297func (r Row) HasText() bool {
298	return len(r.Text) != 0
299}
300
301func (r Row) HasChildren() bool {
302	return len(r.Children) > 0
303}
304
305func (r Row) IsImplementation() bool {
306	return len(r.Names) == 1
307}
308
309func (r Row) HasNoDifferences() bool {
310	return len(r.Delta.Data.Missing.Graphemes) == 0 &&
311		len(r.Delta.Data.Missing.SoftBreaks) == 0 &&
312		len(r.Delta.Data.Missing.HardBreaks) == 0 &&
313		len(r.Delta.Data.Missing.Words) == 0 &&
314		len(r.Delta.Data.Missing.Whitespaces) == 0 &&
315		len(r.Delta.Data.Missing.Controls) == 0 &&
316		len(r.Delta.Data.Extra.Graphemes) == 0 &&
317		len(r.Delta.Data.Extra.SoftBreaks) == 0 &&
318		len(r.Delta.Data.Extra.HardBreaks) == 0 &&
319		len(r.Delta.Data.Extra.Words) == 0 &&
320		len(r.Delta.Data.Extra.Whitespaces) == 0 &&
321		len(r.Delta.Data.Extra.Controls) == 0
322}
323
324func (r Row) Differences() int {
325	return len(r.Delta.Data.Missing.Graphemes) +
326		len(r.Delta.Data.Missing.SoftBreaks) +
327		len(r.Delta.Data.Missing.HardBreaks) +
328		len(r.Delta.Data.Missing.Words) +
329		len(r.Delta.Data.Missing.Whitespaces) +
330		len(r.Delta.Data.Missing.Controls) +
331		len(r.Delta.Data.Extra.Graphemes) +
332		len(r.Delta.Data.Extra.SoftBreaks) +
333		len(r.Delta.Data.Extra.HardBreaks) +
334		len(r.Delta.Data.Extra.Words) +
335		len(r.Delta.Data.Extra.Whitespaces) +
336		len(r.Delta.Data.Extra.Controls)
337}
338
339func (r Row) HasMissingGraphemes() bool {
340	return len(r.Delta.Data.Missing.Graphemes) > 0
341}
342
343func (r Row) HasExtraGraphemes() bool {
344	return len(r.Delta.Data.Extra.Graphemes) > 0
345}
346
347func (r Row) HasNoGraphemes() bool {
348	return !r.HasMissingGraphemes() && !r.HasExtraGraphemes()
349}
350
351func (r Row) HasMissingSoftBreaks() bool {
352	return len(r.Delta.Data.Missing.SoftBreaks) > 0
353}
354
355func (r Row) HasExtraSoftBreaks() bool {
356	return len(r.Delta.Data.Extra.SoftBreaks) > 0
357}
358
359func (r Row) HasNoSoftBreaks() bool {
360	return !r.HasMissingSoftBreaks() && !r.HasExtraSoftBreaks()
361}
362
363func (r Row) HasMissingHardBreaks() bool {
364	return len(r.Delta.Data.Missing.HardBreaks) > 0
365}
366
367func (r Row) HasExtraHardBreaks() bool {
368	return len(r.Delta.Data.Extra.HardBreaks) > 0
369}
370
371func (r Row) HasNoHardBreaks() bool {
372	return !r.HasMissingHardBreaks() && !r.HasExtraHardBreaks()
373}
374
375func (r Row) HasMissingWhitespaces() bool {
376	return len(r.Delta.Data.Missing.Whitespaces) > 0
377}
378
379func (r Row) HasExtraWhitespaces() bool {
380	return len(r.Delta.Data.Extra.Whitespaces) > 0
381}
382
383func (r Row) HasNoWhitespaces() bool {
384	return !r.HasMissingWhitespaces() && !r.HasExtraWhitespaces()
385}
386
387func (r Row) HasMissingWords() bool {
388	return len(r.Delta.Data.Missing.Words) > 0
389}
390
391func (r Row) HasExtraWords() bool {
392	return len(r.Delta.Data.Extra.Words) > 0
393}
394
395func (r Row) HasNoWords() bool {
396	return !r.HasMissingWords() && !r.HasExtraWords()
397}
398
399func (r Row) HasMissingControls() bool {
400	return len(r.Delta.Data.Missing.Controls) > 0
401}
402
403func (r Row) HasExtraControls() bool {
404	return len(r.Delta.Data.Extra.Controls) > 0
405}
406func (r Row) HasNoControls() bool {
407	return !r.HasMissingControls() && !r.HasExtraControls()
408}
409
410func (r Row) MissingGraphemeNum() int {
411	return len(r.Delta.Data.Missing.Graphemes) - 1
412}
413
414func (r Row) ExtraGraphemeNum() int {
415	return len(r.Delta.Data.Extra.Graphemes) - 1
416}
417
418func (r Row) MissingSoftBreakNum() int {
419	return len(r.Delta.Data.Missing.SoftBreaks) - 1
420}
421
422func (r Row) ExtraSoftBreakNum() int {
423	return len(r.Delta.Data.Extra.SoftBreaks) - 1
424}
425
426func (r Row) MissingHardBreakNum() int {
427	return len(r.Delta.Data.Missing.HardBreaks) - 1
428}
429
430func (r Row) ExtraHardBreakNum() int {
431	return len(r.Delta.Data.Extra.HardBreaks) - 1
432}
433
434func (r Row) MissingWhitespaceNum() int {
435	return len(r.Delta.Data.Missing.Whitespaces) - 1
436}
437
438func (r Row) ExtraWhitespaceNum() int {
439	return len(r.Delta.Data.Extra.Whitespaces) - 1
440}
441
442func (r Row) MissingWordNum() int {
443	return len(r.Delta.Data.Missing.Words) - 1
444}
445
446func (r Row) ExtraWordNum() int {
447	return len(r.Delta.Data.Extra.Words) - 1
448}
449
450func (r Row) MissingControlNum() int {
451	return len(r.Delta.Data.Missing.Controls) - 1
452}
453
454func (r Row) ExtraControlNum() int {
455	return len(r.Delta.Data.Extra.Controls) - 1
456}
457
458type WebPage struct {
459	Title   string
460	Heading string
461	Rows    []Row
462}
463
464func assignIDs(children []Row, parentId, parentNum string) {
465	for i := range children {
466		children[i].Num = fmt.Sprintf("%s_%d", parentNum, i+1)
467		children[i].Id = fmt.Sprintf("%s_%d", parentId, i+1)
468		children[i].ParentId = parentId
469		assignIDs(children[i].Children, children[i].Id, children[i].Num)
470	}
471}
472
473func addImpl(web *WebPage, impl Row) {
474	impl.Num = fmt.Sprintf("%d", len(web.Rows)+1)
475	impl.Id = fmt.Sprintf("id_%d", len(web.Rows)+1)
476	impl.ParentId = ""
477	assignIDs(impl.Children, impl.Id, impl.Num)
478	web.Rows = append(web.Rows, impl)
479}
480
481func parseFile(path string, textLen int) (ParsedData, error) {
482	var result ParsedData
483	// Time: float64
484	// Memory: float64
485	// Graphemes: n1 n2 ...
486	// SoftBreaks: n1 n2 ...
487	// HardBreaks: n1 n2 ...
488	// Whitespaces: n1 n2 ...
489	// Words: n1 n2 ...
490	// Controls: n1 n2 ...
491	content, err := os.ReadFile(path)
492	if err != nil {
493		return result, err
494	}
495
496	lines := strings.Split(string(content), "\n")
497	if len(lines) < 8 {
498		return result, errors.New("Wrong data format (number of lines)")
499	}
500
501	result.Time, err = strconv.ParseFloat(lines[0], 64)
502	if err != nil {
503		return result, errors.New("Wrong data format (time)")
504	}
505	result.Memory, err = strconv.ParseFloat(lines[1], 64)
506	if err != nil {
507		return result, errors.New("Wrong data format (memory)")
508	}
509
510	result.Graphemes = helpers.SplitAsInts(lines[2]+" "+strconv.Itoa(textLen), " ")
511	result.SoftBreaks = helpers.SplitAsInts(lines[3]+" "+strconv.Itoa(textLen), " ")
512	result.HardBreaks = helpers.SplitAsInts(lines[4]+" "+strconv.Itoa(textLen), " ")
513	result.Whitespaces = helpers.SplitAsInts(lines[5]+" "+strconv.Itoa(textLen), " ")
514	result.Words = helpers.SplitAsInts(lines[6]+" "+strconv.Itoa(textLen), " ")
515	result.Controls = helpers.SplitAsInts(lines[7]+" "+strconv.Itoa(textLen), " ")
516
517	return result, nil
518}
519
520func compareLines(expected []int, actual []int, includeRange bool, missing bool) (Ratio, []Range) {
521
522	var diff []Range
523	diff = append(diff, Range{len(actual), len(expected), ""})
524	aLen := len(actual) - 1
525	eLen := len(expected) - 1
526
527	e := 1
528	a := 1
529	for e < eLen || a < aLen {
530		a1 := actual[a]
531		if includeRange && a < aLen {
532			a1 = actual[a+1]
533		}
534		e1 := expected[e]
535		if includeRange && e < eLen {
536			e1 = expected[e+1]
537		}
538
539		if e >= eLen {
540			if !missing {
541				diff = append(diff, Range{helpers.Abs(actual[a]), helpers.Abs(a1), "extra"})
542			}
543			a += 1
544		} else if a >= aLen {
545			if missing {
546				diff = append(diff, Range{helpers.Abs(expected[e]), helpers.Abs(e1), "missing"})
547			}
548			e += 1
549		} else if actual[a] < expected[e] {
550			if !missing {
551				diff = append(diff, Range{helpers.Abs(actual[a]), helpers.Abs(a1), "extra"})
552			}
553			a += 1
554		} else if actual[a] > expected[e] {
555			if missing {
556				diff = append(diff, Range{helpers.Abs(expected[e]), helpers.Abs(e1), "missing"})
557			}
558			e += 1
559		} else {
560			a += 1
561			e += 1
562		}
563	}
564
565	// TODO: keep the difference, too
566	if len(diff) > 1 {
567		return Ratio{len(diff) - 1, len(expected)}, diff
568	} else {
569		return Ratio{0, 1}, nil
570	}
571}
572
573func compareData(expected ParsedData, actual ParsedData) CalculatedDelta {
574
575	var delta CalculatedDelta
576
577	delta.Performance.Top = actual.Time
578	delta.Performance.Bottom = expected.Time
579
580	var deltaGraphemes, deltaSoftBreaks, deltaHardBreaks, deltaWhitespaces, deltaWords, deltaControls Ratio
581	deltaGraphemes, delta.Data.Missing.Graphemes = compareLines(expected.Graphemes, actual.Graphemes, true, true)
582
583	deltaGraphemes, delta.Data.Missing.Graphemes = compareLines(expected.Graphemes, actual.Graphemes, true, true)
584	deltaSoftBreaks, delta.Data.Missing.SoftBreaks = compareLines(expected.SoftBreaks, actual.SoftBreaks, false, true)
585	deltaHardBreaks, delta.Data.Missing.HardBreaks = compareLines(expected.HardBreaks, actual.HardBreaks, false, true)
586	deltaWhitespaces, delta.Data.Missing.Whitespaces = compareLines(expected.Whitespaces, actual.Whitespaces, true, true)
587	deltaWords, delta.Data.Missing.Words = compareLines(expected.Words, actual.Words, true, true)
588	deltaControls, delta.Data.Missing.Controls = compareLines(expected.Controls, actual.Controls, false, true)
589
590	delta.Graphemes.Add(deltaGraphemes)
591	delta.SoftBreaks.Add(deltaSoftBreaks)
592	delta.HardBreaks.Add(deltaHardBreaks)
593	delta.Whitespaces.Add(deltaWhitespaces)
594	delta.Words.Add(deltaWords)
595	delta.Controls.Add(deltaControls)
596
597	deltaGraphemes, delta.Data.Extra.Graphemes = compareLines(expected.Graphemes, actual.Graphemes, true, false)
598	deltaSoftBreaks, delta.Data.Extra.SoftBreaks = compareLines(expected.SoftBreaks, actual.SoftBreaks, false, false)
599	deltaHardBreaks, delta.Data.Extra.HardBreaks = compareLines(expected.HardBreaks, actual.HardBreaks, false, false)
600	deltaWhitespaces, delta.Data.Extra.Whitespaces = compareLines(expected.Whitespaces, actual.Whitespaces, true, false)
601	deltaWords, delta.Data.Extra.Words = compareLines(expected.Words, actual.Words, true, false)
602	deltaControls, delta.Data.Extra.Controls = compareLines(expected.Controls, actual.Controls, false, false)
603
604	delta.Graphemes.Add(deltaGraphemes)
605	delta.SoftBreaks.Add(deltaSoftBreaks)
606	delta.HardBreaks.Add(deltaHardBreaks)
607	delta.Whitespaces.Add(deltaWhitespaces)
608	delta.Words.Add(deltaWords)
609	delta.Controls.Add(deltaControls)
610
611	return delta
612}
613
614func printDifference(text string, diff []int) {
615	count := diff[0]
616
617	if len(diff) <= 1 {
618		// No diff
619	} else if (len(diff)-1)*10 < count {
620		// Too small diff
621		fmt.Printf("%d < %d:\n%s\n", (len(diff)-1)*10, count, text)
622		return
623	} else if count == 0 {
624		// Too small string
625		fmt.Printf("%d == 0:\n%s\n", count, text)
626		return
627	}
628	first := helpers.Abs(diff[1])
629	last := first + 10
630	if last >= len(text) {
631		last = len(text) - 1
632	}
633	fmt.Printf("Difference @%d:\n%s\n", first, text[:last])
634}
635
636func finishRows(rows []Row, start int) []Row {
637	if len(rows) == 0 {
638		return []Row{}
639	}
640	i := len(rows) - 1
641	for i > start {
642		(rows)[i-1].Add((rows)[i])
643		i -= 1
644	}
645
646	if start > 0 {
647		return rows[:start]
648	} else {
649		return rows[:start+1]
650	}
651}
652
653func findParentRow(rows []Row, name string) int {
654	for i := range rows {
655		row := rows[len(rows)-1-i]
656		if row.Names[len(row.Names)-1] == name {
657			return len(rows) - 1 - i
658		}
659	}
660	return -1
661}
662
663func compareFiles(inputPath string, sampleLimit int) (WebPage, error) {
664
665	var rows []Row
666
667	// Define the data to be used in the template
668	web := WebPage{
669		Title: "Comparison Table (accuracy, performance and disk memory)",
670	}
671
672	err := filepath.Walk(inputPath,
673		func(inputFile string, info os.FileInfo, err error) error {
674			if err != nil {
675				fmt.Println(err)
676				return err
677			}
678			tokens := strings.Split(inputFile, string(os.PathSeparator))
679			outputIndex := -1
680			for i, t := range tokens {
681				if t == "output" {
682					outputIndex = i
683					break
684				}
685			}
686			if outputIndex < 0 {
687				return fmt.Errorf("Currently only supported directory structure: [...]/output/{implementation}/{locale}:\n%s\n", inputFile)
688			}
689
690			if info.IsDir() {
691				if len(tokens) == outputIndex+1 {
692					// ~/datasets/output
693				} else if len(tokens) == outputIndex+2 {
694					// ~/datasets/output/icu
695					rows = finishRows(rows, 0)
696					if len(rows) > 0 {
697						addImpl(&web, rows[0])
698					}
699					rows = []Row{*NewImpl(tokens[outputIndex+1])}
700				} else if len(tokens) == outputIndex+3 {
701					// ~/datasets/output/icu/en
702					rows = finishRows(rows, 0)
703					rows = append(rows, *NewLocale(rows[0].Names[0], tokens[outputIndex+2]))
704				} else {
705					fmt.Printf("skipping %s\n", inputFile)
706					return nil
707				}
708			} else if len(rows) <= 1 {
709				return errors.New(fmt.Sprintf("Wrong directory structure: %s\n", inputFile))
710			} else {
711				// Find the parent row
712				parent := &rows[len(rows)-1]
713				impl := parent.Names[0]
714				// Read and parse the data
715				textFile := strings.Replace(inputFile, filepath.Join("output", impl), "input", 1)
716				textContent, err := os.ReadFile(textFile)
717				helpers.Check(err)
718				if len(textContent) == 0 {
719					fmt.Printf("Empty text file %s\n", inputFile)
720					return nil
721				}
722
723				var actualData ParsedData
724				actualData, err = parseFile(inputFile, len(textContent))
725				if err != nil {
726					return errors.New(fmt.Sprintf("Cannot parse output file %s: %s\n", inputFile, err.Error()))
727				}
728
729				var validationData ParsedData
730				validationFile := strings.Replace(inputFile, filepath.Join("output", impl), "validation", 1)
731				validationData, err = parseFile(validationFile, len(textContent))
732				if err != nil {
733					return errors.New(fmt.Sprintf("Cannot parse validation file%s: %s\n", validationFile, err.Error()))
734				}
735
736				// Compare the data
737				var delta CalculatedDelta
738				_, shortFileName := filepath.Split(inputFile)
739				delta = compareData(validationData, actualData)
740				row := NewRow(string(textContent), delta, append(parent.Names, shortFileName)...)
741				if !row.HasNoDifferences() {
742					parent.Add(*row)
743					sort.Slice(parent.Children, func(i, j int) bool {
744						return parent.Children[i].Differences() > parent.Children[j].Differences()
745					})
746					if len(parent.Children) > sampleLimit {
747						parent.Children = parent.Children[0 : sampleLimit-1]
748					}
749				}
750			}
751			return nil
752		})
753	rows = finishRows(rows, 0)
754	if len(rows) > 0 {
755		addImpl(&web, rows[0])
756	}
757	return web, err
758}
759
760func main() {
761	var (
762		root        = flag.String("root", "~/datasets", "Folder (inputs for the table expected to be under <Folder>/output/>")
763		sampleLimit = flag.Int("sampleLimit", 10, "Number of files to show with differences")
764	)
765	flag.Parse()
766	if *root == "" {
767		fmt.Println("Must set --root")
768		flag.PrintDefaults()
769	}
770	*root = helpers.ExpandPath(*root)
771
772	// Parse the template
773	t, err := template.ParseFiles("../html/index.html", "../html/scripts.html", "../html/styles.html", "../html/tbody.html")
774	helpers.Check(err)
775
776	// Create index.html
777	indexPath := filepath.Join(*root, "index.html")
778	indexFile, err := os.Create(indexPath)
779	helpers.Check(err)
780
781	outputPath := filepath.Join(*root, "output")
782	web, err := compareFiles(outputPath, *sampleLimit)
783	helpers.Check(err)
784
785	// Execute the template and write the result to index.html
786	err = t.Execute(indexFile, web)
787	helpers.Check(err)
788	indexFile.Close()
789}
790