1// Copyright 2022 The Pigweed Authors 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); you may not 4// use this file except in compliance with the License. You may obtain a copy of 5// the License at 6// 7// https://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 11// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 12// License for the specific language governing permissions and limitations under 13// the License. 14 15/** Decodes and detokenizes strings from binary or Base64 input. */ 16import { Buffer } from 'buffer'; 17import { Frame } from 'pigweedjs/pw_hdlc'; 18import { TokenDatabase } from './token_database'; 19import { PrintfDecoder } from './printf_decoder'; 20 21const MAX_RECURSIONS = 9; 22const BASE64CHARS = '[A-Za-z0-9+/-_]'; 23const PATTERN = new RegExp( 24 // Base64 tokenized strings start with the prefix character ($) 25 '\\$' + 26 // Tokenized strings contain 0 or more blocks of four Base64 chars. 27 `(?:${BASE64CHARS}{4})*` + 28 // The last block of 4 chars may have one or two padding chars (=). 29 `(?:${BASE64CHARS}{3}=|${BASE64CHARS}{2}==)?`, 30 'g', 31); 32 33interface TokenAndArgs { 34 token: number; 35 args: Uint8Array; 36} 37 38export class Detokenizer { 39 private database: TokenDatabase; 40 41 constructor(csvDatabase: string) { 42 this.database = new TokenDatabase(csvDatabase); 43 } 44 45 /** 46 * Detokenize frame data into actual string messages using the provided 47 * token database. 48 * 49 * If the frame doesn't match any token from database, the frame will be 50 * returned as string as-is. 51 */ 52 detokenize(tokenizedFrame: Frame): string { 53 return this.detokenizeUint8Array(tokenizedFrame.data); 54 } 55 56 /** 57 * Detokenize uint8 into actual string messages using the provided 58 * token database. 59 * 60 * If the data doesn't match any token from database, the data will be 61 * returned as string as-is. 62 */ 63 detokenizeUint8Array(data: Uint8Array): string { 64 const { token, args } = this.decodeUint8Array(data); 65 // Parse arguments if this is printf-style text. 66 const format = this.database.get(token); 67 if (format) { 68 return new PrintfDecoder().decode(String(format), args); 69 } 70 71 return new TextDecoder().decode(data); 72 } 73 74 /** 75 * Detokenize Base64-encoded frame data into actual string messages using the 76 * provided token database. 77 * 78 * If the frame doesn't match any token from database, the frame will be 79 * returned as string as-is. 80 */ 81 detokenizeBase64( 82 tokenizedFrame: Frame, 83 maxRecursion: number = MAX_RECURSIONS, 84 ): string { 85 const base64String = new TextDecoder().decode(tokenizedFrame.data); 86 return this.detokenizeBase64String(base64String, maxRecursion); 87 } 88 89 private detokenizeBase64String( 90 base64String: string, 91 recursions: number, 92 ): string { 93 return base64String.replace(PATTERN, (base64Substring) => { 94 const { token, args } = this.decodeBase64TokenFrame(base64Substring); 95 const format = this.database.get(token); 96 // Parse arguments if this is printf-style text. 97 if (format) { 98 const decodedOriginal = new PrintfDecoder().decode( 99 String(format), 100 args, 101 ); 102 // Detokenize nested Base64 tokens and their arguments. 103 if (recursions > 0) { 104 return this.detokenizeBase64String(decodedOriginal, recursions - 1); 105 } 106 return decodedOriginal; 107 } 108 return base64Substring; 109 }); 110 } 111 112 private decodeUint8Array(data: Uint8Array): TokenAndArgs { 113 const token = new DataView(data.buffer, data.byteOffset, 4).getUint32( 114 0, 115 true, 116 ); 117 const args = new Uint8Array(data.buffer.slice(data.byteOffset + 4)); 118 119 return { token, args }; 120 } 121 122 private decodeBase64TokenFrame(base64Data: string): TokenAndArgs { 123 // Remove the prefix '$' and convert from Base64. 124 const prefixRemoved = base64Data.slice(1); 125 const noBase64 = Buffer.from(prefixRemoved, 'base64').toString('binary'); 126 // Convert back to bytes and return token and arguments. 127 const bytes = noBase64.split('').map((ch) => ch.charCodeAt(0)); 128 const uIntArray = new Uint8Array(bytes); 129 const token = new DataView( 130 uIntArray.buffer, 131 uIntArray.byteOffset, 132 4, 133 ).getUint32(0, true); 134 const args = new Uint8Array(bytes.slice(4)); 135 136 return { token, args }; 137 } 138} 139