xref: /aosp_15_r20/external/bazelbuild-rules_android/src/tools/ak/bucketize/bucketize.go (revision 9e965d6fece27a77de5377433c2f7e6999b8cc0b)
1// Copyright 2018 The Bazel Authors. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//    http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15// Package bucketize provides functionality to bucketize Android resources.
16package bucketize
17
18import (
19	"bytes"
20	"context"
21	"encoding/xml"
22	"flag"
23	"fmt"
24	"io"
25	"io/ioutil"
26	"log"
27	"os"
28	"path"
29	"strings"
30	"sync"
31
32	"src/common/golang/flags"
33	"src/common/golang/shard"
34	"src/common/golang/walk"
35	"src/common/golang/xml2"
36	"src/tools/ak/akhelper"
37	"src/tools/ak/res/res"
38	"src/tools/ak/types"
39)
40
41const (
42	numParsers = 25
43)
44
45// Archiver process the provided resource files and directories stores the data
46type Archiver struct {
47	ResFiles    []*res.PathInfo
48	Partitioner Partitioner
49}
50
51// ResourcesAttribute correlates the attribute of a resources xml tag and the file where it originates
52type ResourcesAttribute struct {
53	Attribute xml.Attr
54	ResFile   *res.PathInfo
55}
56
57var (
58	// Cmd defines the command to run repack
59	Cmd = types.Command{
60		Init: Init,
61		Run:  Run,
62		Desc: desc,
63		Flags: []string{
64			"res_paths",
65			"typed_outputs",
66		},
67	}
68
69	resPaths     flags.StringList
70	typedOutputs flags.StringList
71
72	initOnce sync.Once
73)
74
75// Init initializes repack.
76func Init() {
77	initOnce.Do(func() {
78		flag.Var(&resPaths, "res_paths", "List of res paths (a file or directory).")
79		flag.Var(&typedOutputs, "typed_outputs", akhelper.FormatDesc([]string{
80			"A list of output file paths, each path prefixed with the res type it supports.",
81			"<res_type>:<file_path> i.e. string:/foo/bar/res-string-0.zip,string:/foo/bar/res-string-1.zip,...",
82			"The number of files per res type will determine shards."}))
83	})
84}
85
86func desc() string {
87	return "Bucketize Android resources."
88}
89
90// MakeArchiver creates an Archiver
91func makeArchiver(resFiles []string, p Partitioner) (*Archiver, error) {
92	pis, err := res.MakePathInfos(resFiles)
93	if err != nil {
94		return nil, fmt.Errorf("converting res path failed: %v", err)
95	}
96	return &Archiver{ResFiles: pis, Partitioner: p}, nil
97}
98
99// Archive process the res directories and files of the archiver
100func (a *Archiver) Archive(ctx context.Context) error {
101	ctx, cancel := context.WithCancel(prefixErr(ctx, "archive: "))
102	defer cancel()
103	vPIC, nvPIC := separatePathInfosByValues(ctx, a.ResFiles)
104	vrCs := make([]<-chan *res.ValuesResource, 0, numParsers)
105	raCs := make([]<-chan *ResourcesAttribute, 0, numParsers)
106	errCs := make([]<-chan error, 0, numParsers)
107	for i := 0; i < numParsers; i++ {
108		vrC, raC, vErrC := handleValuesPathInfos(ctx, vPIC)
109		vrCs = append(vrCs, vrC)
110		raCs = append(raCs, raC)
111		errCs = append(errCs, vErrC)
112	}
113	mVRC := mergeValuesResourceStreams(ctx, vrCs)
114	mRAC := mergeResourcesAttributeStreams(ctx, raCs)
115	mErrC := mergeErrStreams(ctx, errCs)
116	return a.archive(ctx, nvPIC, mVRC, mRAC, mErrC)
117}
118
119// archive takes PathInfo, ValuesResource and error channels and process the values given
120func (a *Archiver) archive(ctx context.Context, piC <-chan *res.PathInfo, vrC <-chan *res.ValuesResource, raC <-chan *ResourcesAttribute, errC <-chan error) error {
121	var errs []error
122Loop:
123	for piC != nil || vrC != nil || errC != nil || raC != nil {
124		select {
125		case e, ok := <-errC:
126			if !ok {
127				errC = nil
128				continue
129			}
130			errs = append(errs, e)
131			break Loop
132		case ra, ok := <-raC:
133			if !ok {
134				raC = nil
135				continue
136			}
137			a.Partitioner.CollectResourcesAttribute(ra)
138		case pi, ok := <-piC:
139			if !ok {
140				piC = nil
141				continue
142			}
143			a.Partitioner.CollectPathResource(*pi)
144		case vr, ok := <-vrC:
145			if !ok {
146				vrC = nil
147				continue
148			}
149			if err := a.Partitioner.CollectValues(vr); err != nil {
150				return fmt.Errorf("got error collecting values: %v", err)
151			}
152		}
153	}
154
155	if len(errs) != 0 {
156		return errorf(ctx, "errors encountered: %v", errs)
157	}
158	if err := a.Partitioner.Close(); err != nil {
159		return fmt.Errorf("got error closing partitioner: %v", err)
160	}
161	return nil
162}
163
164func handleValuesPathInfos(ctx context.Context, piC <-chan *res.PathInfo) (<-chan *res.ValuesResource, <-chan *ResourcesAttribute, <-chan error) {
165	vrC := make(chan *res.ValuesResource)
166	raC := make(chan *ResourcesAttribute)
167	errC := make(chan error)
168	go func() {
169		defer close(vrC)
170		defer close(raC)
171		defer close(errC)
172		for pi := range piC {
173			if !syncParse(prefixErr(ctx, fmt.Sprintf("%s values-parse: ", pi.Path)), pi, vrC, raC, errC) {
174				return
175			}
176		}
177	}()
178	return vrC, raC, errC
179}
180
181func syncParse(ctx context.Context, pi *res.PathInfo, vrC chan<- *res.ValuesResource, raC chan<- *ResourcesAttribute, errC chan<- error) bool {
182	f, err := os.Open(pi.Path)
183	if err != nil {
184		return sendErr(ctx, errC, errorf(ctx, "open failed: %v", err))
185	}
186	defer f.Close()
187	return syncParseReader(ctx, pi, xml.NewDecoder(f), vrC, raC, errC)
188}
189
190func syncParseReader(ctx context.Context, pi *res.PathInfo, dec *xml.Decoder, vrC chan<- *res.ValuesResource, raC chan<- *ResourcesAttribute, errC chan<- error) bool {
191	// Shadow Encoder is used to track xml state, such as namespaces. The state will be inherited by child encoders.
192	parentEnc := xml2.NewEncoder(ioutil.Discard)
193	for {
194		t, err := dec.Token()
195		if err == io.EOF {
196			return true
197		}
198		if err != nil {
199			return sendErr(ctx, errC, errorf(ctx, "token failed: %v", err))
200		}
201		if err := parentEnc.EncodeToken(t); err != nil {
202			return sendErr(ctx, errC, errorf(ctx, "encoding token token %s failed: %v", t, err))
203		}
204		if se, ok := t.(xml.StartElement); ok && se.Name == res.ResourcesTagName {
205			for _, xmlAttr := range se.Attr {
206				raC <- &ResourcesAttribute{ResFile: pi, Attribute: xmlAttr}
207			}
208			// AAPT2 does not support a multiple resources sections in a single file and silently ignores
209			// subsequent resources sections. The parser will only parse the first resources tag and exit.
210			return parseRes(ctx, parentEnc, pi, dec, vrC, errC)
211		}
212	}
213}
214
215func skipTag(se xml.StartElement) bool {
216	_, ok := res.ResourcesChildToSkip[se.Name]
217	return ok
218}
219
220func parseRes(ctx context.Context, parentEnc *xml2.Encoder, pi *res.PathInfo, dec *xml.Decoder, vrC chan<- *res.ValuesResource, errC chan<- error) bool {
221	for {
222		t, err := dec.Token()
223		if err != nil {
224			return sendErr(ctx, errC, errorf(ctx, "extract token failed: %v", err))
225		}
226		// Encode all tokens to the shadow Encoder at the top-level loop to keep track of any required xml state.
227		if err := parentEnc.EncodeToken(t); err != nil {
228			return sendErr(ctx, errC, errorf(ctx, "encoding token token %s failed: %v", t, err))
229		}
230		switch t.(type) {
231		case xml.StartElement:
232			se := t.(xml.StartElement)
233			if skipTag(se) {
234				dec.Skip()
235				break
236			}
237
238			fqn, err := extractFQN(se)
239			if err != nil {
240				return sendErr(ctx, errC, errorf(ctx, "extract name and type failed: %v", err))
241			}
242
243			b, err := extractElement(parentEnc, dec, se)
244			if err != nil {
245				return sendErr(ctx, errC, errorf(ctx, "extracting element failed: %v", err))
246			}
247
248			if !sendVR(ctx, vrC, &res.ValuesResource{pi, fqn, b.Bytes()}) {
249				return false
250			}
251
252			if fqn.Type == res.Styleable {
253				// with a declare-styleable tag, parse its childen and treat them as direct children of resources
254				dsDec := xml.NewDecoder(b)
255				dsDec.Token() // we've already processed the first token (the declare-styleable start element)
256				if !parseRes(ctx, parentEnc, pi, dsDec, vrC, errC) {
257					return false
258				}
259			}
260		case xml.EndElement:
261			return true
262		}
263	}
264}
265
266func extractFQN(se xml.StartElement) (res.FullyQualifiedName, error) {
267	if matches(se.Name, res.ItemTagName) {
268		nameAttr, resType, err := extractNameAndType(se)
269		if err != nil {
270			return res.FullyQualifiedName{}, err
271		}
272		return res.ParseName(nameAttr, resType)
273	}
274
275	nameAttr, err := extractName(se)
276	if err != nil {
277		return res.FullyQualifiedName{}, err
278	}
279	if resType, ok := res.ResourcesTagToType[se.Name.Local]; ok {
280		return res.ParseName(nameAttr, resType)
281	}
282	return res.FullyQualifiedName{}, fmt.Errorf("%s: is an unhandled tag", se.Name.Local)
283
284}
285
286func extractName(se xml.StartElement) (nameAttr string, err error) {
287	for _, a := range se.Attr {
288		if matches(res.NameAttrName, a.Name) {
289			nameAttr = a.Value
290			break
291		}
292	}
293	if nameAttr == "" {
294		err = fmt.Errorf("%s: tag is missing %q attribute or is empty", se.Name.Local, res.NameAttrName.Local)
295	}
296	return
297}
298
299func extractNameAndType(se xml.StartElement) (nameAttr string, resType res.Type, err error) {
300	var typeAttr string
301	for _, a := range se.Attr {
302		if matches(res.NameAttrName, a.Name) {
303			nameAttr = a.Value
304		}
305		if matches(res.TypeAttrName, a.Name) {
306			typeAttr = a.Value
307		}
308	}
309	if nameAttr == "" {
310		err = fmt.Errorf("%s: tag is missing %q attribute or is empty", se.Name.Local, res.NameAttrName.Local)
311		return
312	}
313	if typeAttr == "" {
314		err = fmt.Errorf("%s: tag is missing %q attribute or is empty", se.Name.Local, res.TypeAttrName.Local)
315		return
316	}
317	resType, err = res.ParseType(typeAttr)
318	return
319}
320
321func matches(n1, n2 xml.Name) bool {
322	// Ignores xml.Name Space attributes unless both names specify Space.
323	if n1.Space == "" || n2.Space == "" {
324		return n1.Local == n2.Local
325	}
326	return n1.Local == n2.Local && n1.Space == n2.Space
327}
328
329func extractElement(parentEnc *xml2.Encoder, dec *xml.Decoder, se xml.Token) (*bytes.Buffer, error) {
330	// copy tag contents to a buffer
331	b := &bytes.Buffer{}
332	enc := xml2.ChildEncoder(b, parentEnc)
333	if err := enc.EncodeToken(se); err != nil {
334		return nil, fmt.Errorf("encoding start element failed: %v", err)
335	}
336	if err := copyTag(enc, dec); err != nil {
337		return nil, fmt.Errorf("copyTag failed: %s", err)
338	}
339	enc.Flush()
340	return b, nil
341}
342
343func copyTag(enc *xml2.Encoder, dec *xml.Decoder) error {
344	for {
345		t, err := dec.Token()
346		if err != nil {
347			return fmt.Errorf("extract token failed: %v", err)
348		}
349		if err := enc.EncodeToken(t); err != nil {
350			return fmt.Errorf("encoding token %v failed: %v", t, err)
351		}
352		switch t.(type) {
353		case xml.StartElement:
354			if err := copyTag(enc, dec); err != nil {
355				return err
356			}
357		case xml.EndElement:
358			return nil
359		}
360	}
361}
362
363func sendVR(ctx context.Context, vrC chan<- *res.ValuesResource, vr *res.ValuesResource) bool {
364	select {
365	case vrC <- vr:
366	case <-ctx.Done():
367		return false
368	}
369	return true
370}
371
372func hasChildType(dec *xml.Decoder, lookup map[xml.Name]res.Type, want res.Type) (bool, error) {
373	for {
374		t, err := dec.Token()
375		if err != nil {
376			return false, fmt.Errorf("extract token failed: %v", err)
377		}
378		switch t.(type) {
379		case xml.StartElement:
380			if rt, ok := lookup[t.(xml.StartElement).Name]; ok {
381				if rt == want {
382					return true, nil
383				}
384			}
385			// when tag is not in the lookup or the type is unknown or "wanted", skip it.
386			dec.Skip()
387		case xml.EndElement:
388			return false, nil
389		}
390	}
391}
392
393func createPartitions(typedOutputs []string) (map[res.Type][]io.Writer, error) {
394	partitions := make(map[res.Type][]io.Writer)
395	for _, tAndOP := range typedOutputs {
396		tOP := strings.SplitN(tAndOP, ":", 2)
397		// no shard count override specified
398		if len(tOP) == 1 {
399			return nil, fmt.Errorf("got malformed typed output path %q wanted the following format \"<type>:<file path>\"", tAndOP)
400		}
401		t, err := res.ParseType(tOP[0])
402		if err != nil {
403			return nil, fmt.Errorf("got err while trying to parse %s to a res type: %v", tOP[0], err)
404		}
405		op := tOP[1]
406		if err := os.MkdirAll(path.Dir(op), 0744); err != nil {
407			return nil, fmt.Errorf("%s: mkdir failed: %v", op, err)
408		}
409		f, err := os.OpenFile(op, os.O_CREATE|os.O_RDWR|os.O_TRUNC, 0644)
410		if err != nil {
411			return nil, fmt.Errorf("open/create failed: %v", err)
412		}
413		partitions[t] = append(partitions[t], f)
414	}
415	return partitions, nil
416}
417
418// Run is the entry point for bucketize.
419func Run() {
420	if resPaths == nil || typedOutputs == nil {
421		log.Fatal("Flags -res_paths and -typed_outputs must be specified.")
422	}
423
424	resFiles, err := walk.Files(resPaths)
425	if err != nil {
426		log.Fatalf("Got error getting the resource paths: %v", err)
427	}
428	resFileIdxs := make(map[string]int)
429	for i, resFile := range resFiles {
430		resFileIdxs[resFile] = i
431	}
432
433	p, err := createPartitions(typedOutputs)
434	if err != nil {
435		log.Fatalf("Got error creating partitions: %v", err)
436	}
437
438	ps, err := makePartitionSession(p, shard.FNV, resFileIdxs)
439	if err != nil {
440		log.Fatalf("Got error making partition session: %v", err)
441	}
442
443	m, err := makeArchiver(resFiles, ps)
444	if err != nil {
445		log.Fatalf("Got error making archiver: %v", err)
446	}
447
448	if err := m.Archive(context.Background()); err != nil {
449		log.Fatalf("Got error archiving: %v", err)
450	}
451}
452