1# Copyright 2019 The Chromium Authors 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5"""Contains common helpers for working with Android manifests.""" 6 7import hashlib 8import os 9import re 10import shlex 11import sys 12import xml.dom.minidom as minidom 13from xml.etree import ElementTree 14 15from util import build_utils 16import action_helpers # build_utils adds //build to sys.path. 17 18ANDROID_NAMESPACE = 'http://schemas.android.com/apk/res/android' 19TOOLS_NAMESPACE = 'http://schemas.android.com/tools' 20DIST_NAMESPACE = 'http://schemas.android.com/apk/distribution' 21EMPTY_ANDROID_MANIFEST_PATH = os.path.abspath( 22 os.path.join(os.path.dirname(__file__), '..', '..', 'AndroidManifest.xml')) 23# When normalizing for expectation matching, wrap these tags when they are long 24# or else they become very hard to read. 25_WRAP_CANDIDATES = ( 26 '<manifest', 27 '<application', 28 '<activity', 29 '<provider', 30 '<receiver', 31 '<service', 32) 33# Don't wrap lines shorter than this. 34_WRAP_LINE_LENGTH = 100 35 36_xml_namespace_initialized = False 37 38 39def _RegisterElementTreeNamespaces(): 40 global _xml_namespace_initialized 41 if _xml_namespace_initialized: 42 return 43 _xml_namespace_initialized = True 44 ElementTree.register_namespace('android', ANDROID_NAMESPACE) 45 ElementTree.register_namespace('tools', TOOLS_NAMESPACE) 46 ElementTree.register_namespace('dist', DIST_NAMESPACE) 47 48 49def NamespacedGet(node, key): 50 return node.get('{%s}%s' % (ANDROID_NAMESPACE, key)) 51 52 53def NamespacedSet(node, key, value): 54 node.set('{%s}%s' % (ANDROID_NAMESPACE, key), value) 55 56 57def ParseManifest(path): 58 """Parses an AndroidManifest.xml using ElementTree. 59 60 Registers required namespaces, creates application node if missing, adds any 61 missing namespaces for 'android', 'tools' and 'dist'. 62 63 Returns tuple of: 64 doc: Root xml document. 65 manifest_node: the <manifest> node. 66 app_node: the <application> node. 67 """ 68 _RegisterElementTreeNamespaces() 69 doc = ElementTree.parse(path) 70 # ElementTree.find does not work if the required tag is the root. 71 if doc.getroot().tag == 'manifest': 72 manifest_node = doc.getroot() 73 else: 74 manifest_node = doc.find('manifest') 75 assert manifest_node is not None, 'Manifest is none for path ' + path 76 77 app_node = doc.find('application') 78 if app_node is None: 79 app_node = ElementTree.SubElement(manifest_node, 'application') 80 81 return doc, manifest_node, app_node 82 83 84def SaveManifest(doc, path): 85 with action_helpers.atomic_output(path) as f: 86 f.write(ElementTree.tostring(doc.getroot(), encoding='UTF-8')) 87 88 89def GetPackage(manifest_node): 90 return manifest_node.get('package') 91 92 93def SetUsesSdk(manifest_node, 94 target_sdk_version, 95 min_sdk_version, 96 max_sdk_version=None): 97 uses_sdk_node = manifest_node.find('./uses-sdk') 98 if uses_sdk_node is None: 99 uses_sdk_node = ElementTree.SubElement(manifest_node, 'uses-sdk') 100 NamespacedSet(uses_sdk_node, 'targetSdkVersion', target_sdk_version) 101 NamespacedSet(uses_sdk_node, 'minSdkVersion', min_sdk_version) 102 if max_sdk_version: 103 NamespacedSet(uses_sdk_node, 'maxSdkVersion', max_sdk_version) 104 105 106def SetTargetApiIfUnset(manifest_node, target_sdk_version): 107 uses_sdk_node = manifest_node.find('./uses-sdk') 108 if uses_sdk_node is None: 109 uses_sdk_node = ElementTree.SubElement(manifest_node, 'uses-sdk') 110 curr_target_sdk_version = NamespacedGet(uses_sdk_node, 'targetSdkVersion') 111 if curr_target_sdk_version is None: 112 NamespacedSet(uses_sdk_node, 'targetSdkVersion', target_sdk_version) 113 return curr_target_sdk_version is None 114 115 116def OverrideMinSdkVersionIfPresent(manifest_node, min_sdk_version): 117 uses_sdk_node = manifest_node.find('./uses-sdk') 118 if uses_sdk_node is not None: 119 NamespacedSet(uses_sdk_node, 'minSdkVersion', min_sdk_version) 120 121 122def _SortAndStripElementTree(root): 123 # Sort alphabetically with two exceptions: 124 # 1) Put <application> node last (since it's giant). 125 # 2) Put android:name before other attributes. 126 def element_sort_key(node): 127 if node.tag == 'application': 128 return 'z' 129 ret = ElementTree.tostring(node) 130 # ElementTree.tostring inserts namespace attributes for any that are needed 131 # for the node or any of its descendants. Remove them so as to prevent a 132 # change to a child that adds/removes a namespace usage from changing sort 133 # order. 134 return re.sub(r' xmlns:.*?".*?"', '', ret.decode('utf8')) 135 136 name_attr = '{%s}name' % ANDROID_NAMESPACE 137 138 def attribute_sort_key(tup): 139 return ('', '') if tup[0] == name_attr else tup 140 141 def helper(node): 142 for child in node: 143 if child.text and child.text.isspace(): 144 child.text = None 145 helper(child) 146 147 # Sort attributes (requires Python 3.8+). 148 node.attrib = dict(sorted(node.attrib.items(), key=attribute_sort_key)) 149 150 # Sort nodes 151 node[:] = sorted(node, key=element_sort_key) 152 153 helper(root) 154 155 156def _SplitElement(line): 157 """Parses a one-line xml node into ('<tag', ['a="b"', ...]], '/>').""" 158 159 # Shlex splits nicely, but removes quotes. Need to put them back. 160 def restore_quotes(value): 161 return value.replace('=', '="', 1) + '"' 162 163 # Simplify restore_quotes by separating />. 164 assert line.endswith('>'), line 165 end_tag = '>' 166 if line.endswith('/>'): 167 end_tag = '/>' 168 line = line[:-len(end_tag)] 169 170 # Use shlex to avoid having to re-encode ", etc. 171 parts = shlex.split(line) 172 start_tag = parts[0] 173 attrs = parts[1:] 174 175 return start_tag, [restore_quotes(x) for x in attrs], end_tag 176 177 178def _CreateNodeHash(lines): 179 """Computes a hash (md5) for the first XML node found in |lines|. 180 181 Args: 182 lines: List of strings containing pretty-printed XML. 183 184 Returns: 185 Positive 32-bit integer hash of the node (including children). 186 """ 187 target_indent = lines[0].find('<') 188 tag_closed = False 189 for i, l in enumerate(lines[1:]): 190 cur_indent = l.find('<') 191 if cur_indent != -1 and cur_indent <= target_indent: 192 tag_lines = lines[:i + 1] 193 break 194 if not tag_closed and 'android:name="' in l: 195 # To reduce noise of node tags changing, use android:name as the 196 # basis the hash since they usually unique. 197 tag_lines = [l] 198 break 199 tag_closed = tag_closed or '>' in l 200 else: 201 assert False, 'Did not find end of node:\n' + '\n'.join(lines) 202 203 # Insecure and truncated hash as it only needs to be unique vs. its neighbors. 204 return hashlib.md5(('\n'.join(tag_lines)).encode('utf8')).hexdigest()[:8] 205 206 207def _IsSelfClosing(lines): 208 """Given pretty-printed xml, returns whether first node is self-closing.""" 209 for l in lines: 210 idx = l.find('>') 211 if idx != -1: 212 return l[idx - 1] == '/' 213 raise RuntimeError('Did not find end of tag:\n%s' % '\n'.join(lines)) 214 215 216def _AddDiffTags(lines): 217 # When multiple identical tags appear sequentially, XML diffs can look like: 218 # + </tag> 219 # + <tag> 220 # rather than: 221 # + <tag> 222 # + </tag> 223 # To reduce confusion, add hashes to tags. 224 # This also ensures changed tags show up with outer <tag> elements rather than 225 # showing only changed attributes. 226 hash_stack = [] 227 for i, l in enumerate(lines): 228 stripped = l.lstrip() 229 # Ignore non-indented tags and lines that are not the start/end of a node. 230 if l[0] != ' ' or stripped[0] != '<': 231 continue 232 # Ignore self-closing nodes that fit on one line. 233 if l[-2:] == '/>': 234 continue 235 # Ignore <application> since diff tag changes with basically any change. 236 if stripped.lstrip('</').startswith('application'): 237 continue 238 239 # Check for the closing tag (</foo>). 240 if stripped[1] != '/': 241 cur_hash = _CreateNodeHash(lines[i:]) 242 if not _IsSelfClosing(lines[i:]): 243 hash_stack.append(cur_hash) 244 else: 245 cur_hash = hash_stack.pop() 246 lines[i] += ' # DIFF-ANCHOR: {}'.format(cur_hash) 247 assert not hash_stack, 'hash_stack was not empty:\n' + '\n'.join(hash_stack) 248 249 250def NormalizeManifest(manifest_contents, version_code_offset, 251 library_version_offset): 252 _RegisterElementTreeNamespaces() 253 # This also strips comments and sorts node attributes alphabetically. 254 root = ElementTree.fromstring(manifest_contents) 255 package = GetPackage(root) 256 257 app_node = root.find('application') 258 if app_node is not None: 259 # android:debuggable is added when !is_official_build. Strip it out to avoid 260 # expectation diffs caused by not adding is_official_build. Play store 261 # blocks uploading apps with it set, so there's no risk of it slipping in. 262 debuggable_name = '{%s}debuggable' % ANDROID_NAMESPACE 263 if debuggable_name in app_node.attrib: 264 del app_node.attrib[debuggable_name] 265 266 version_code = NamespacedGet(root, 'versionCode') 267 if version_code and version_code_offset: 268 version_code = int(version_code) - int(version_code_offset) 269 NamespacedSet(root, 'versionCode', f'OFFSET={version_code}') 270 version_name = NamespacedGet(root, 'versionName') 271 if version_name: 272 version_name = re.sub(r'\d+', '#', version_name) 273 NamespacedSet(root, 'versionName', version_name) 274 275 # Trichrome's static library version number is updated daily. To avoid 276 # frequent manifest check failures, we remove the exact version number 277 # during normalization. 278 for node in app_node: 279 if node.tag in ['uses-static-library', 'static-library']: 280 version = NamespacedGet(node, 'version') 281 if version and library_version_offset: 282 version = int(version) - int(library_version_offset) 283 NamespacedSet(node, 'version', f'OFFSET={version}') 284 285 # We also remove the exact package name (except the one at the root level) 286 # to avoid noise during manifest comparison. 287 def blur_package_name(node): 288 for key in node.keys(): 289 node.set(key, node.get(key).replace(package, '$PACKAGE')) 290 291 for child in node: 292 blur_package_name(child) 293 294 # We only blur the package names of non-root nodes because they generate a lot 295 # of diffs when doing manifest checks for upstream targets. We still want to 296 # have 1 piece of package name not blurred just in case the package name is 297 # mistakenly changed. 298 for child in root: 299 blur_package_name(child) 300 301 _SortAndStripElementTree(root) 302 303 # Fix up whitespace/indentation. 304 dom = minidom.parseString(ElementTree.tostring(root)) 305 out_lines = [] 306 for l in dom.toprettyxml(indent=' ').splitlines(): 307 if not l or l.isspace(): 308 continue 309 if len(l) > _WRAP_LINE_LENGTH and any(x in l for x in _WRAP_CANDIDATES): 310 indent = ' ' * l.find('<') 311 start_tag, attrs, end_tag = _SplitElement(l) 312 out_lines.append('{}{}'.format(indent, start_tag)) 313 for attribute in attrs: 314 out_lines.append('{} {}'.format(indent, attribute)) 315 out_lines[-1] += '>' 316 # Heuristic: Do not allow multi-line tags to be self-closing since these 317 # can generally be allowed to have nested elements. When diffing, it adds 318 # noise if the base file is self-closing and the non-base file is not 319 # self-closing. 320 if end_tag == '/>': 321 out_lines.append('{}{}>'.format(indent, start_tag.replace('<', '</'))) 322 else: 323 out_lines.append(l) 324 325 # Make output more diff-friendly. 326 _AddDiffTags(out_lines) 327 328 return '\n'.join(out_lines) + '\n' 329