xref: /aosp_15_r20/external/pigweed/pw_tokenizer/ts/detokenizer.ts (revision 61c4878ac05f98d0ceed94b57d316916de578985)
1// Copyright 2022 The Pigweed Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License"); you may not
4// use this file except in compliance with the License. You may obtain a copy of
5// the License at
6//
7//     https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12// License for the specific language governing permissions and limitations under
13// the License.
14
15/**  Decodes and detokenizes strings from binary or Base64 input. */
16import { Buffer } from 'buffer';
17import { Frame } from 'pigweedjs/pw_hdlc';
18import { TokenDatabase } from './token_database';
19import { PrintfDecoder } from './printf_decoder';
20
21const MAX_RECURSIONS = 9;
22const BASE64CHARS = '[A-Za-z0-9+/-_]';
23const PATTERN = new RegExp(
24  // Base64 tokenized strings start with the prefix character ($)
25  '\\$' +
26    // Tokenized strings contain 0 or more blocks of four Base64 chars.
27    `(?:${BASE64CHARS}{4})*` +
28    // The last block of 4 chars may have one or two padding chars (=).
29    `(?:${BASE64CHARS}{3}=|${BASE64CHARS}{2}==)?`,
30  'g',
31);
32
33interface TokenAndArgs {
34  token: number;
35  args: Uint8Array;
36}
37
38export class Detokenizer {
39  private database: TokenDatabase;
40
41  constructor(csvDatabase: string) {
42    this.database = new TokenDatabase(csvDatabase);
43  }
44
45  /**
46   * Detokenize frame data into actual string messages using the provided
47   * token database.
48   *
49   * If the frame doesn't match any token from database, the frame will be
50   * returned as string as-is.
51   */
52  detokenize(tokenizedFrame: Frame): string {
53    return this.detokenizeUint8Array(tokenizedFrame.data);
54  }
55
56  /**
57   * Detokenize uint8 into actual string messages using the provided
58   * token database.
59   *
60   * If the data doesn't match any token from database, the data will be
61   * returned as string as-is.
62   */
63  detokenizeUint8Array(data: Uint8Array): string {
64    const { token, args } = this.decodeUint8Array(data);
65    // Parse arguments if this is printf-style text.
66    const format = this.database.get(token);
67    if (format) {
68      return new PrintfDecoder().decode(String(format), args);
69    }
70
71    return new TextDecoder().decode(data);
72  }
73
74  /**
75   * Detokenize Base64-encoded frame data into actual string messages using the
76   * provided token database.
77   *
78   * If the frame doesn't match any token from database, the frame will be
79   * returned as string as-is.
80   */
81  detokenizeBase64(
82    tokenizedFrame: Frame,
83    maxRecursion: number = MAX_RECURSIONS,
84  ): string {
85    const base64String = new TextDecoder().decode(tokenizedFrame.data);
86    return this.detokenizeBase64String(base64String, maxRecursion);
87  }
88
89  private detokenizeBase64String(
90    base64String: string,
91    recursions: number,
92  ): string {
93    return base64String.replace(PATTERN, (base64Substring) => {
94      const { token, args } = this.decodeBase64TokenFrame(base64Substring);
95      const format = this.database.get(token);
96      // Parse arguments if this is printf-style text.
97      if (format) {
98        const decodedOriginal = new PrintfDecoder().decode(
99          String(format),
100          args,
101        );
102        // Detokenize nested Base64 tokens and their arguments.
103        if (recursions > 0) {
104          return this.detokenizeBase64String(decodedOriginal, recursions - 1);
105        }
106        return decodedOriginal;
107      }
108      return base64Substring;
109    });
110  }
111
112  private decodeUint8Array(data: Uint8Array): TokenAndArgs {
113    const token = new DataView(data.buffer, data.byteOffset, 4).getUint32(
114      0,
115      true,
116    );
117    const args = new Uint8Array(data.buffer.slice(data.byteOffset + 4));
118
119    return { token, args };
120  }
121
122  private decodeBase64TokenFrame(base64Data: string): TokenAndArgs {
123    // Remove the prefix '$' and convert from Base64.
124    const prefixRemoved = base64Data.slice(1);
125    const noBase64 = Buffer.from(prefixRemoved, 'base64').toString('binary');
126    // Convert back to bytes and return token and arguments.
127    const bytes = noBase64.split('').map((ch) => ch.charCodeAt(0));
128    const uIntArray = new Uint8Array(bytes);
129    const token = new DataView(
130      uIntArray.buffer,
131      uIntArray.byteOffset,
132      4,
133    ).getUint32(0, true);
134    const args = new Uint8Array(bytes.slice(4));
135
136    return { token, args };
137  }
138}
139