xref: /aosp_15_r20/tools/dexter/slicer/dex_ir.cc (revision f0dffb02cdb5c647d21204e89a92a1ffae2dad87)
1*f0dffb02SXin Li /*
2*f0dffb02SXin Li  * Copyright (C) 2017 The Android Open Source Project
3*f0dffb02SXin Li  *
4*f0dffb02SXin Li  * Licensed under the Apache License, Version 2.0 (the "License");
5*f0dffb02SXin Li  * you may not use this file except in compliance with the License.
6*f0dffb02SXin Li  * You may obtain a copy of the License at
7*f0dffb02SXin Li  *
8*f0dffb02SXin Li  *      http://www.apache.org/licenses/LICENSE-2.0
9*f0dffb02SXin Li  *
10*f0dffb02SXin Li  * Unless required by applicable law or agreed to in writing, software
11*f0dffb02SXin Li  * distributed under the License is distributed on an "AS IS" BASIS,
12*f0dffb02SXin Li  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13*f0dffb02SXin Li  * See the License for the specific language governing permissions and
14*f0dffb02SXin Li  * limitations under the License.
15*f0dffb02SXin Li  */
16*f0dffb02SXin Li 
17*f0dffb02SXin Li #include "slicer/dex_ir.h"
18*f0dffb02SXin Li 
19*f0dffb02SXin Li #include "slicer/chronometer.h"
20*f0dffb02SXin Li #include "slicer/dex_utf8.h"
21*f0dffb02SXin Li #include "slicer/dex_format.h"
22*f0dffb02SXin Li 
23*f0dffb02SXin Li #include <cstdint>
24*f0dffb02SXin Li #include <algorithm>
25*f0dffb02SXin Li #include <memory>
26*f0dffb02SXin Li #include <sstream>
27*f0dffb02SXin Li #include <vector>
28*f0dffb02SXin Li 
29*f0dffb02SXin Li namespace ir {
30*f0dffb02SXin Li 
31*f0dffb02SXin Li // DBJ2a string hash
HashString(const char * cstr)32*f0dffb02SXin Li static uint32_t HashString(const char* cstr) {
33*f0dffb02SXin Li   uint32_t hash = 5381;  // DBJ2 magic prime value
34*f0dffb02SXin Li   while (*cstr) {
35*f0dffb02SXin Li     hash = ((hash << 5) + hash) ^ *cstr++;
36*f0dffb02SXin Li   }
37*f0dffb02SXin Li   return hash;
38*f0dffb02SXin Li }
39*f0dffb02SXin Li 
Hash(const char * string_key) const40*f0dffb02SXin Li uint32_t StringsHasher::Hash(const char* string_key) const {
41*f0dffb02SXin Li   return HashString(string_key);
42*f0dffb02SXin Li }
43*f0dffb02SXin Li 
Compare(const char * string_key,const String * string) const44*f0dffb02SXin Li bool StringsHasher::Compare(const char* string_key, const String* string) const {
45*f0dffb02SXin Li   return dex::Utf8Cmp(string_key, string->c_str()) == 0;
46*f0dffb02SXin Li }
47*f0dffb02SXin Li 
Hash(const std::string & proto_key) const48*f0dffb02SXin Li uint32_t ProtosHasher::Hash(const std::string& proto_key) const {
49*f0dffb02SXin Li   return HashString(proto_key.c_str());
50*f0dffb02SXin Li }
51*f0dffb02SXin Li 
Compare(const std::string & proto_key,const Proto * proto) const52*f0dffb02SXin Li bool ProtosHasher::Compare(const std::string& proto_key, const Proto* proto) const {
53*f0dffb02SXin Li   return proto_key == proto->Signature();
54*f0dffb02SXin Li }
55*f0dffb02SXin Li 
GetKey(const EncodedMethod * method) const56*f0dffb02SXin Li MethodKey MethodsHasher::GetKey(const EncodedMethod* method) const {
57*f0dffb02SXin Li   MethodKey method_key;
58*f0dffb02SXin Li   method_key.class_descriptor = method->decl->parent->descriptor;
59*f0dffb02SXin Li   method_key.method_name = method->decl->name;
60*f0dffb02SXin Li   method_key.prototype = method->decl->prototype;
61*f0dffb02SXin Li   return method_key;
62*f0dffb02SXin Li }
63*f0dffb02SXin Li 
Hash(const MethodKey & method_key) const64*f0dffb02SXin Li uint32_t MethodsHasher::Hash(const MethodKey& method_key) const {
65*f0dffb02SXin Li   return static_cast<uint32_t>(std::hash<void*>{}(method_key.class_descriptor) ^
66*f0dffb02SXin Li                                std::hash<void*>{}(method_key.method_name) ^
67*f0dffb02SXin Li                                std::hash<void*>{}(method_key.prototype));
68*f0dffb02SXin Li }
69*f0dffb02SXin Li 
Compare(const MethodKey & method_key,const EncodedMethod * method) const70*f0dffb02SXin Li bool MethodsHasher::Compare(const MethodKey& method_key, const EncodedMethod* method) const {
71*f0dffb02SXin Li   return method_key.class_descriptor == method->decl->parent->descriptor &&
72*f0dffb02SXin Li          method_key.method_name == method->decl->name &&
73*f0dffb02SXin Li          method_key.prototype == method->decl->prototype;
74*f0dffb02SXin Li }
75*f0dffb02SXin Li 
76*f0dffb02SXin Li // Human-readable type declaration
Decl() const77*f0dffb02SXin Li std::string Type::Decl() const {
78*f0dffb02SXin Li   return dex::DescriptorToDecl(descriptor->c_str());
79*f0dffb02SXin Li }
80*f0dffb02SXin Li 
GetCategory() const81*f0dffb02SXin Li Type::Category Type::GetCategory() const {
82*f0dffb02SXin Li   switch (*descriptor->c_str()) {
83*f0dffb02SXin Li     case 'L':
84*f0dffb02SXin Li     case '[':
85*f0dffb02SXin Li       return Category::Reference;
86*f0dffb02SXin Li     case 'V':
87*f0dffb02SXin Li       return Category::Void;
88*f0dffb02SXin Li     case 'D':
89*f0dffb02SXin Li     case 'J':
90*f0dffb02SXin Li       return Category::WideScalar;
91*f0dffb02SXin Li     default:
92*f0dffb02SXin Li       return Category::Scalar;
93*f0dffb02SXin Li   }
94*f0dffb02SXin Li }
95*f0dffb02SXin Li 
96*f0dffb02SXin Li // Create the corresponding JNI signature:
97*f0dffb02SXin Li //  https://docs.oracle.com/javase/8/docs/technotes/guides/jni/spec/types.html#type_signatures
Signature() const98*f0dffb02SXin Li std::string Proto::Signature() const {
99*f0dffb02SXin Li   std::stringstream ss;
100*f0dffb02SXin Li   ss << "(";
101*f0dffb02SXin Li   if (param_types != nullptr) {
102*f0dffb02SXin Li     for (const auto& type : param_types->types) {
103*f0dffb02SXin Li       ss << type->descriptor->c_str();
104*f0dffb02SXin Li     }
105*f0dffb02SXin Li   }
106*f0dffb02SXin Li   ss << ")";
107*f0dffb02SXin Li   ss << return_type->descriptor->c_str();
108*f0dffb02SXin Li   return ss.str();
109*f0dffb02SXin Li }
110*f0dffb02SXin Li 
IsField()111*f0dffb02SXin Li bool MethodHandle::IsField(){
112*f0dffb02SXin Li   return (
113*f0dffb02SXin Li     method_handle_type == dex::METHOD_HANDLE_TYPE_STATIC_PUT ||
114*f0dffb02SXin Li     method_handle_type == dex::METHOD_HANDLE_TYPE_STATIC_GET ||
115*f0dffb02SXin Li     method_handle_type == dex::METHOD_HANDLE_TYPE_INSTANCE_PUT ||
116*f0dffb02SXin Li     method_handle_type == dex::METHOD_HANDLE_TYPE_INSTANCE_GET
117*f0dffb02SXin Li   );
118*f0dffb02SXin Li }
119*f0dffb02SXin Li 
120*f0dffb02SXin Li // Helper for IR normalization
121*f0dffb02SXin Li // (it sorts items and update the numeric idexes to match)
122*f0dffb02SXin Li template <class T, class C>
IndexItems(std::vector<T> & items,C comp)123*f0dffb02SXin Li static void IndexItems(std::vector<T>& items, C comp) {
124*f0dffb02SXin Li   std::sort(items.begin(), items.end(), comp);
125*f0dffb02SXin Li   for (size_t i = 0; i < items.size(); ++i) {
126*f0dffb02SXin Li     items[i]->index = i;
127*f0dffb02SXin Li   }
128*f0dffb02SXin Li }
129*f0dffb02SXin Li 
130*f0dffb02SXin Li // Helper for IR normalization (DFS for topological sort)
131*f0dffb02SXin Li //
132*f0dffb02SXin Li // NOTE: this recursive version is clean and simple and we know
133*f0dffb02SXin Li //  that the max depth is bounded (exactly 1 for JVMTI and a small
134*f0dffb02SXin Li //  max for general case - the largest .dex file in AOSP has 5000 classes
135*f0dffb02SXin Li //  total)
136*f0dffb02SXin Li //
TopSortClassIndex(Class * irClass,dex::u4 * nextIndex)137*f0dffb02SXin Li void DexFile::TopSortClassIndex(Class* irClass, dex::u4* nextIndex) {
138*f0dffb02SXin Li   if (irClass->index == dex::u4(-1)) {
139*f0dffb02SXin Li     if (irClass->super_class && irClass->super_class->class_def) {
140*f0dffb02SXin Li       TopSortClassIndex(irClass->super_class->class_def, nextIndex);
141*f0dffb02SXin Li     }
142*f0dffb02SXin Li 
143*f0dffb02SXin Li     if (irClass->interfaces) {
144*f0dffb02SXin Li       for (Type* interfaceType : irClass->interfaces->types) {
145*f0dffb02SXin Li         if (interfaceType->class_def) {
146*f0dffb02SXin Li           TopSortClassIndex(interfaceType->class_def, nextIndex);
147*f0dffb02SXin Li         }
148*f0dffb02SXin Li       }
149*f0dffb02SXin Li     }
150*f0dffb02SXin Li 
151*f0dffb02SXin Li     SLICER_CHECK_LT(*nextIndex, classes.size());
152*f0dffb02SXin Li     irClass->index = (*nextIndex)++;
153*f0dffb02SXin Li   }
154*f0dffb02SXin Li }
155*f0dffb02SXin Li 
156*f0dffb02SXin Li // Helper for IR normalization
157*f0dffb02SXin Li // (topological sort the classes)
SortClassIndexes()158*f0dffb02SXin Li void DexFile::SortClassIndexes() {
159*f0dffb02SXin Li   for (auto& irClass : classes) {
160*f0dffb02SXin Li     irClass->index = dex::u4(-1);
161*f0dffb02SXin Li   }
162*f0dffb02SXin Li 
163*f0dffb02SXin Li   dex::u4 nextIndex = 0;
164*f0dffb02SXin Li   for (auto& irClass : classes) {
165*f0dffb02SXin Li     TopSortClassIndex(irClass.get(), &nextIndex);
166*f0dffb02SXin Li   }
167*f0dffb02SXin Li }
168*f0dffb02SXin Li 
169*f0dffb02SXin Li // Helper for NormalizeClass()
SortEncodedFields(std::vector<EncodedField * > * fields)170*f0dffb02SXin Li static void SortEncodedFields(std::vector<EncodedField*>* fields) {
171*f0dffb02SXin Li   std::sort(fields->begin(), fields->end(),
172*f0dffb02SXin Li             [](const EncodedField* a, const EncodedField* b) {
173*f0dffb02SXin Li               SLICER_CHECK(a->decl->index != b->decl->index || a == b);
174*f0dffb02SXin Li               return a->decl->index < b->decl->index;
175*f0dffb02SXin Li             });
176*f0dffb02SXin Li }
177*f0dffb02SXin Li 
178*f0dffb02SXin Li // Helper for NormalizeClass()
SortEncodedMethods(std::vector<EncodedMethod * > * methods)179*f0dffb02SXin Li static void SortEncodedMethods(std::vector<EncodedMethod*>* methods) {
180*f0dffb02SXin Li   std::sort(methods->begin(), methods->end(),
181*f0dffb02SXin Li             [](const EncodedMethod* a, const EncodedMethod* b) {
182*f0dffb02SXin Li               SLICER_CHECK(a->decl->index != b->decl->index || a == b);
183*f0dffb02SXin Li               return a->decl->index < b->decl->index;
184*f0dffb02SXin Li             });
185*f0dffb02SXin Li }
186*f0dffb02SXin Li 
187*f0dffb02SXin Li // Helper for IR normalization
188*f0dffb02SXin Li // (sort the field & method arrays)
NormalizeClass(Class * irClass)189*f0dffb02SXin Li static void NormalizeClass(Class* irClass) {
190*f0dffb02SXin Li   SortEncodedFields(&irClass->static_fields);
191*f0dffb02SXin Li   SortEncodedFields(&irClass->instance_fields);
192*f0dffb02SXin Li   SortEncodedMethods(&irClass->direct_methods);
193*f0dffb02SXin Li   SortEncodedMethods(&irClass->virtual_methods);
194*f0dffb02SXin Li }
195*f0dffb02SXin Li 
196*f0dffb02SXin Li // Prepare the IR for generating a .dex image
197*f0dffb02SXin Li // (the .dex format requires a specific sort order for some of the arrays, etc...)
198*f0dffb02SXin Li //
199*f0dffb02SXin Li // TODO: not a great solution - move this logic to the writer!
200*f0dffb02SXin Li //
201*f0dffb02SXin Li // TODO: the comparison predicate can be better expressed by using std::tie()
202*f0dffb02SXin Li //  Ex. FieldDecl has a method comp() returning tie(parent->index, name->index, type->index)
203*f0dffb02SXin Li //
Normalize()204*f0dffb02SXin Li void DexFile::Normalize() {
205*f0dffb02SXin Li   // sort build the .dex indexes
206*f0dffb02SXin Li   IndexItems(strings, [](const own<String>& a, const own<String>& b) {
207*f0dffb02SXin Li     // this list must be sorted by std::string contents, using UTF-16 code point values
208*f0dffb02SXin Li     // (not in a locale-sensitive manner)
209*f0dffb02SXin Li     return dex::Utf8Cmp(a->c_str(), b->c_str()) < 0;
210*f0dffb02SXin Li   });
211*f0dffb02SXin Li 
212*f0dffb02SXin Li   IndexItems(types, [](const own<Type>& a, const own<Type>& b) {
213*f0dffb02SXin Li     // this list must be sorted by string_id index
214*f0dffb02SXin Li     return a->descriptor->index < b->descriptor->index;
215*f0dffb02SXin Li   });
216*f0dffb02SXin Li 
217*f0dffb02SXin Li   IndexItems(protos, [](const own<Proto>& a, const own<Proto>& b) {
218*f0dffb02SXin Li     // this list must be sorted in return-type (by type_id index) major order,
219*f0dffb02SXin Li     // and then by argument list (lexicographic ordering, individual arguments
220*f0dffb02SXin Li     // ordered by type_id index)
221*f0dffb02SXin Li     if (a->return_type->index != b->return_type->index) {
222*f0dffb02SXin Li       return a->return_type->index < b->return_type->index;
223*f0dffb02SXin Li     } else {
224*f0dffb02SXin Li       std::vector<Type*> empty;
225*f0dffb02SXin Li       const auto& aParamTypes = a->param_types ? a->param_types->types : empty;
226*f0dffb02SXin Li       const auto& bParamTypes = b->param_types ? b->param_types->types : empty;
227*f0dffb02SXin Li       return std::lexicographical_compare(
228*f0dffb02SXin Li           aParamTypes.begin(), aParamTypes.end(), bParamTypes.begin(),
229*f0dffb02SXin Li           bParamTypes.end(),
230*f0dffb02SXin Li           [](const Type* t1, const Type* t2) { return t1->index < t2->index; });
231*f0dffb02SXin Li     }
232*f0dffb02SXin Li   });
233*f0dffb02SXin Li 
234*f0dffb02SXin Li   IndexItems(fields, [](const own<FieldDecl>& a, const own<FieldDecl>& b) {
235*f0dffb02SXin Li     // this list must be sorted, where the defining type (by type_id index) is
236*f0dffb02SXin Li     // the major order, field name (by string_id index) is the intermediate
237*f0dffb02SXin Li     // order, and type (by type_id index) is the minor order
238*f0dffb02SXin Li     return (a->parent->index != b->parent->index)
239*f0dffb02SXin Li                ? a->parent->index < b->parent->index
240*f0dffb02SXin Li                : (a->name->index != b->name->index)
241*f0dffb02SXin Li                      ? a->name->index < b->name->index
242*f0dffb02SXin Li                      : a->type->index < b->type->index;
243*f0dffb02SXin Li   });
244*f0dffb02SXin Li 
245*f0dffb02SXin Li   IndexItems(methods, [](const own<MethodDecl>& a, const own<MethodDecl>& b) {
246*f0dffb02SXin Li     // this list must be sorted, where the defining type (by type_id index) is
247*f0dffb02SXin Li     // the major order, method name (by string_id index) is the intermediate
248*f0dffb02SXin Li     // order, and method prototype (by proto_id index) is the minor order
249*f0dffb02SXin Li     return (a->parent->index != b->parent->index)
250*f0dffb02SXin Li                ? a->parent->index < b->parent->index
251*f0dffb02SXin Li                : (a->name->index != b->name->index)
252*f0dffb02SXin Li                      ? a->name->index < b->name->index
253*f0dffb02SXin Li                      : a->prototype->index < b->prototype->index;
254*f0dffb02SXin Li   });
255*f0dffb02SXin Li 
256*f0dffb02SXin Li   // reverse topological sort
257*f0dffb02SXin Li   //
258*f0dffb02SXin Li   // the classes must be ordered such that a given class's superclass and
259*f0dffb02SXin Li   // implemented interfaces appear in the list earlier than the referring
260*f0dffb02SXin Li   // class
261*f0dffb02SXin Li   //
262*f0dffb02SXin Li   // CONSIDER: for the BCI-only scenario we can avoid this
263*f0dffb02SXin Li   //
264*f0dffb02SXin Li   SortClassIndexes();
265*f0dffb02SXin Li 
266*f0dffb02SXin Li   IndexItems(classes, [&](const own<Class>& a, const own<Class>& b) {
267*f0dffb02SXin Li     SLICER_CHECK_LT(a->index, classes.size());
268*f0dffb02SXin Li     SLICER_CHECK_LT(b->index, classes.size());
269*f0dffb02SXin Li     SLICER_CHECK(a->index != b->index || a == b);
270*f0dffb02SXin Li     return a->index < b->index;
271*f0dffb02SXin Li   });
272*f0dffb02SXin Li 
273*f0dffb02SXin Li   // normalize class data
274*f0dffb02SXin Li   for (const auto& irClass : classes) {
275*f0dffb02SXin Li     NormalizeClass(irClass.get());
276*f0dffb02SXin Li   }
277*f0dffb02SXin Li 
278*f0dffb02SXin Li   // normalize annotations
279*f0dffb02SXin Li   for (const auto& irAnnotation : annotations) {
280*f0dffb02SXin Li     // elements must be sorted in increasing order by string_id index
281*f0dffb02SXin Li     auto& elements = irAnnotation->elements;
282*f0dffb02SXin Li     std::sort(elements.begin(), elements.end(),
283*f0dffb02SXin Li               [](const AnnotationElement* a, const AnnotationElement* b) {
284*f0dffb02SXin Li                 return a->name->index < b->name->index;
285*f0dffb02SXin Li               });
286*f0dffb02SXin Li   }
287*f0dffb02SXin Li 
288*f0dffb02SXin Li   // normalize "annotation_set_item"
289*f0dffb02SXin Li   for (const auto& irAnnotationSet : annotation_sets) {
290*f0dffb02SXin Li     // The elements must be sorted in increasing order, by type_idx
291*f0dffb02SXin Li     auto& annotations = irAnnotationSet->annotations;
292*f0dffb02SXin Li     std::sort(annotations.begin(), annotations.end(),
293*f0dffb02SXin Li               [](const Annotation* a, const Annotation* b) {
294*f0dffb02SXin Li                 return a->type->index < b->type->index;
295*f0dffb02SXin Li               });
296*f0dffb02SXin Li   }
297*f0dffb02SXin Li 
298*f0dffb02SXin Li   // normalize "annotations_directory_item"
299*f0dffb02SXin Li   for (const auto& irAnnotationDirectory : annotations_directories) {
300*f0dffb02SXin Li     // field_annotations: The elements of the list must be
301*f0dffb02SXin Li     // sorted in increasing order, by field_idx
302*f0dffb02SXin Li     auto& field_annotations = irAnnotationDirectory->field_annotations;
303*f0dffb02SXin Li     std::sort(field_annotations.begin(), field_annotations.end(),
304*f0dffb02SXin Li               [](const FieldAnnotation* a, const FieldAnnotation* b) {
305*f0dffb02SXin Li                 return a->field_decl->index < b->field_decl->index;
306*f0dffb02SXin Li               });
307*f0dffb02SXin Li 
308*f0dffb02SXin Li     // method_annotations: The elements of the list must be
309*f0dffb02SXin Li     // sorted in increasing order, by method_idx
310*f0dffb02SXin Li     auto& method_annotations = irAnnotationDirectory->method_annotations;
311*f0dffb02SXin Li     std::sort(method_annotations.begin(), method_annotations.end(),
312*f0dffb02SXin Li               [](const MethodAnnotation* a, const MethodAnnotation* b) {
313*f0dffb02SXin Li                 return a->method_decl->index < b->method_decl->index;
314*f0dffb02SXin Li               });
315*f0dffb02SXin Li 
316*f0dffb02SXin Li     // parameter_annotations: The elements of the list must be
317*f0dffb02SXin Li     // sorted in increasing order, by method_idx
318*f0dffb02SXin Li     auto& param_annotations = irAnnotationDirectory->param_annotations;
319*f0dffb02SXin Li     std::sort(param_annotations.begin(), param_annotations.end(),
320*f0dffb02SXin Li               [](const ParamAnnotation* a, const ParamAnnotation* b) {
321*f0dffb02SXin Li                 return a->method_decl->index < b->method_decl->index;
322*f0dffb02SXin Li               });
323*f0dffb02SXin Li   }
324*f0dffb02SXin Li }
325*f0dffb02SXin Li 
326*f0dffb02SXin Li } // namespace ir
327*f0dffb02SXin Li 
328