1# Copyright 2020 The Pigweed Authors 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); you may not 4# use this file except in compliance with the License. You may obtain a copy of 5# the License at 6# 7# https://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 11# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 12# License for the specific language governing permissions and limitations under 13# the License. 14"""Provides functionality for encoding tokenized messages.""" 15 16import argparse 17import base64 18import struct 19import sys 20from typing import Sequence 21 22from pw_tokenizer import tokens 23 24_INT32_MAX = 2**31 - 1 25_UINT32_MAX = 2**32 - 1 26NESTED_TOKEN_PREFIX = '$' 27NESTED_TOKEN_BASE_PREFIX = '#' 28NESTED_DOMAIN_START_PREFIX = '{' 29NESTED_DOMAIN_END_PREFIX = '}' 30 31 32def _zig_zag_encode(value: int) -> int: 33 """Encodes signed integers to give a compact varint encoding.""" 34 return value << 1 if value >= 0 else (value << 1) ^ (~0) 35 36 37def _little_endian_base128_encode(integer: int) -> bytearray: 38 data = bytearray() 39 40 while True: 41 # Grab 7 bits; the eighth bit is set to 1 to indicate more data coming. 42 data.append((integer & 0x7F) | 0x80) 43 integer >>= 7 44 45 if not integer: 46 break 47 48 data[-1] &= 0x7F # clear the top bit of the last byte 49 return data 50 51 52def _encode_int32(arg: int) -> bytearray: 53 # Convert large unsigned numbers into their corresponding signed values. 54 if arg > _INT32_MAX: 55 arg -= 2**32 56 57 return _little_endian_base128_encode(_zig_zag_encode(arg)) 58 59 60def _encode_string(arg: bytes) -> bytes: 61 size_byte = len(arg) if len(arg) < 128 else 0xFF 62 return struct.pack('B', size_byte) + arg[:127] 63 64 65def encode_args(*args: int | float | bytes | str) -> bytes: 66 """Encodes a list of arguments to their on-wire representation.""" 67 68 data = bytearray(b'') 69 for arg in args: 70 if isinstance(arg, int): 71 if arg.bit_length() > 32: 72 raise ValueError( 73 f'Cannot encode {arg}: only 32-bit integers may be encoded' 74 ) 75 data += _encode_int32(arg) 76 elif isinstance(arg, float): 77 data += struct.pack('<f', arg) 78 elif isinstance(arg, str): 79 data += _encode_string(arg.encode()) 80 elif isinstance(arg, bytes): 81 data += _encode_string(arg) 82 else: 83 raise ValueError( 84 f'{arg} has type {type(arg)}, which is not supported' 85 ) 86 return bytes(data) 87 88 89def encode_token_and_args( 90 token: int, *args: int | float | bytes | str 91) -> bytes: 92 """Encodes a tokenized message given its token and arguments. 93 94 This function assumes that the token represents a format string with 95 conversion specifiers that correspond with the provided argument types. 96 Currently, only 32-bit integers are supported. 97 """ 98 99 if token < 0 or token > _UINT32_MAX: 100 raise ValueError( 101 f'The token ({token}) must be an unsigned 32-bit integer' 102 ) 103 104 return struct.pack('<I', token) + encode_args(*args) 105 106 107def prefixed_base64(data: bytes, prefix: str | bytes = '$') -> str: 108 """Encodes a tokenized message as prefixed Base64.""" 109 prefix = prefix if isinstance(prefix, str) else prefix.decode() 110 return prefix + base64.b64encode(data).decode() 111 112 113def _parse_user_input(string: str): 114 """Evaluates a string as Python code or returns it as a literal string.""" 115 try: 116 value = eval(string, dict(__builtins__={})) # pylint: disable=eval-used 117 except (NameError, SyntaxError): 118 return string 119 120 return value if isinstance(value, (int, float)) else string 121 122 123def _main(format_string_list: Sequence[str], raw_args: Sequence[str]) -> int: 124 (format_string,) = format_string_list 125 token = tokens.pw_tokenizer_65599_hash(format_string) 126 args = tuple(_parse_user_input(a) for a in raw_args) 127 128 data = encode_token_and_args(token, *args) 129 token = int.from_bytes(data[:4], 'little') 130 binary = ' '.join(f'{b:02x}' for b in data) 131 132 print(f' Raw input: {format_string!r} % {args!r}') 133 print(f'Formatted input: {format_string % args}') 134 print(f' Token: 0x{token:08x}') 135 print(f' Encoded: {data!r} ({binary}) [{len(data)} bytes]') 136 print(f'Prefixed Base64: {prefixed_base64(data)}') 137 138 return 0 139 140 141def _parse_args() -> dict: 142 parser = argparse.ArgumentParser( 143 description=__doc__, 144 formatter_class=argparse.RawDescriptionHelpFormatter, 145 ) 146 parser.add_argument( 147 'format_string_list', 148 metavar='FORMAT_STRING', 149 nargs=1, 150 help='Format string with optional %%-style arguments.', 151 ) 152 parser.add_argument( 153 'raw_args', 154 metavar='ARG', 155 nargs='*', 156 help=( 157 'Arguments for the format string, if any. Arguments are parsed ' 158 'as Python expressions, with no builtins (e.g. 9 is the number ' 159 '9 and \'"9"\' is the string "9"). Arguments that are not valid ' 160 'Python are treated as string literals.' 161 ), 162 ) 163 return vars(parser.parse_args()) 164 165 166if __name__ == '__main__': 167 sys.exit(_main(**_parse_args())) 168