xref: /aosp_15_r20/external/AFLplusplus/instrumentation/afl-gcc-cmptrs-pass.so.cc (revision 08b48e0b10e97b33e7b60c5b6e2243bd915777f2)
1 /* GCC plugin for cmplog routines instrumentation of code for AFL++.
2 
3    Copyright 2014-2019 Free Software Foundation, Inc
4    Copyright 2015, 2016 Google Inc. All rights reserved.
5    Copyright 2019-2020 AFLplusplus Project. All rights reserved.
6    Copyright 2019-2024 AdaCore
7 
8    Written by Alexandre Oliva <[email protected]>, based on the AFL++
9    LLVM CmpLog Routines pass by Andrea Fioraldi
10    <[email protected]>, and on the AFL GCC CmpLog pass.
11 
12    This program is free software: you can redistribute it and/or modify
13    it under the terms of the GNU General Public License as published by
14    the Free Software Foundation, either version 3 of the License, or
15    (at your option) any later version.
16 
17    This program is distributed in the hope that it will be useful,
18    but WITHOUT ANY WARRANTY; without even the implied warranty of
19    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20    GNU General Public License for more details.
21 
22    You should have received a copy of the GNU General Public License
23    along with this program.  If not, see <http://www.gnu.org/licenses/>.
24 
25  */
26 
27 #include "afl-gcc-common.h"
28 
29 /* This plugin, being under the same license as GCC, satisfies the
30    "GPL-compatible Software" definition in the GCC RUNTIME LIBRARY
31    EXCEPTION, so it can be part of an "Eligible" "Compilation
32    Process".  */
33 int plugin_is_GPL_compatible = 1;
34 
35 namespace {
36 
37 static const struct pass_data afl_cmptrs_pass_data = {
38 
39     .type = GIMPLE_PASS,
40     .name = "aflcmptrs",
41     .optinfo_flags = OPTGROUP_NONE,
42     .tv_id = TV_NONE,
43     .properties_required = 0,
44     .properties_provided = 0,
45     .properties_destroyed = 0,
46     .todo_flags_start = 0,
47     .todo_flags_finish = (TODO_update_ssa | TODO_cleanup_cfg | TODO_verify_il |
48                           TODO_rebuild_cgraph_edges),
49 
50 };
51 
52 struct afl_cmptrs_pass : afl_base_pass {
53 
afl_cmptrs_pass__anon21d00caf0111::afl_cmptrs_pass54   afl_cmptrs_pass(bool quiet)
55       : afl_base_pass(quiet, /*debug=*/false, afl_cmptrs_pass_data),
56         tp8u(),
57         cmptrs_hooks() {
58 
59   }
60 
61   /* A pointer type to a unsigned 8-bit integral type.  */
62   tree tp8u;
63 
64   /* Declarations for the various cmptrs hook functions, allocated on
65      demand..  [0] is for compares between any pointers, [1] is for
66      compares between G++ std::string, [2] is for compares between G++
67      std::string and GCC C strings, [3] and [4] are analogous to [1]
68      and [2] but for LLVM C++ strings.  */
69   tree cmptrs_hooks[5];
70 
cmptrs_hook__anon21d00caf0111::afl_cmptrs_pass71   tree cmptrs_hook(unsigned i) {
72 
73     if (!tp8u) {
74 
75       tree t8u;
76       if (BITS_PER_UNIT == 8)
77         t8u = unsigned_char_type_node;
78       else
79         t8u = build_nonstandard_integer_type(8, 1);
80       tp8u = build_pointer_type(t8u);
81 
82     }
83 
84     if (i <= ARRAY_SIZE(cmptrs_hooks) && cmptrs_hooks[i])
85       return cmptrs_hooks[i];
86 
87     const char *n = NULL;
88 
89     switch (i) {
90 
91       case 0:
92         n = "__cmplog_rtn_hook";
93         break;
94 
95       case 1:
96         n = "__cmplog_rtn_gcc_stdstring_stdstring";
97         break;
98 
99       case 2:
100         n = "__cmplog_rtn_gcc_stdstring_cstring";
101         break;
102 
103       case 3:
104         n = "__cmplog_rtn_llvm_stdstring_stdstring";
105         break;
106 
107       case 4:
108         n = "__cmplog_rtn_llvm_stdstring_cstring";
109         break;
110 
111       default:
112         gcc_unreachable();
113 
114     }
115 
116     tree fnt = build_function_type_list(void_type_node, tp8u, tp8u, NULL_TREE);
117     tree t = cmptrs_hooks[i] = build_fn_decl(n, fnt);
118 
119     /* Mark the newly-created decl as non-throwing, so that we can
120        insert call within basic blocks.  */
121     TREE_NOTHROW(t) = 1;
122 
123     return t;
124 
125   }
126 
127   /* Return true if T is the char* type.  */
is_c_string__anon21d00caf0111::afl_cmptrs_pass128   bool is_c_string(tree t) {
129 
130     return (POINTER_TYPE_P(t) &&
131             TYPE_MAIN_VARIANT(TREE_TYPE(t)) == char_type_node);
132 
133   }
134 
135   /* Return true if T is an indirect std::string type.  The LLVM pass
136      tests portions of the mangled name of the callee.  We could do
137      that in GCC too, but computing the mangled name may cause
138      template instantiations and get symbols defined that could
139      otherwise be considered unused.  We check for compatible layout,
140      and class, namespace, and field names.  These have been unchanged
141      since at least GCC 7, probably longer, up to GCC 11.  Odds are
142      that, if it were to change in significant ways, mangling would
143      also change to flag the incompatibility, and we'd have to use a
144      different hook anyway.  */
is_gxx_std_string__anon21d00caf0111::afl_cmptrs_pass145   bool is_gxx_std_string(tree t) {
146 
147     /* We need a pointer or reference type.  */
148     if (!POINTER_TYPE_P(t)) return false;
149 
150     /* Get to the pointed-to type.  */
151     t = TREE_TYPE(t);
152     if (!t) return false;
153 
154     /* Select the main variant, so that can compare types with pointers.  */
155     t = TYPE_MAIN_VARIANT(t);
156 
157     /* We expect it to be a record type.  */
158     if (TREE_CODE(t) != RECORD_TYPE) return false;
159 
160     /* The type has an identifier.  */
161     if (!TYPE_IDENTIFIER(t)) return false;
162 
163     /* The type of the template is basic_string.  */
164     if (strcmp(IDENTIFIER_POINTER(TYPE_IDENTIFIER(t)), "basic_string") != 0)
165       return false;
166 
167     /* It's declared in an internal namespace named __cxx11.  */
168     tree c = DECL_CONTEXT(TYPE_NAME(t));
169     if (!c || TREE_CODE(c) != NAMESPACE_DECL ||
170         strcmp(IDENTIFIER_POINTER(DECL_NAME(c)), "__cxx11") != 0)
171       return false;
172 
173     /* The __cxx11 namespace is a member of namespace std.  */
174     c = DECL_CONTEXT(c);
175     if (!c || TREE_CODE(c) != NAMESPACE_DECL ||
176         strcmp(IDENTIFIER_POINTER(DECL_NAME(c)), "std") != 0)
177       return false;
178 
179     /* And the std namespace is in the global namespace.  */
180     c = DECL_CONTEXT(c);
181     if (c && TREE_CODE(c) != TRANSLATION_UNIT_DECL) return false;
182 
183     /* Check that the first nonstatic data member of the record type
184        is named _M_dataplus.  */
185     for (c = TYPE_FIELDS(t); c; c = DECL_CHAIN(c))
186       if (TREE_CODE(c) == FIELD_DECL) break;
187     if (!c || !integer_zerop(DECL_FIELD_BIT_OFFSET(c)) ||
188         strcmp(IDENTIFIER_POINTER(DECL_NAME(c)), "_M_dataplus") != 0)
189       return false;
190 
191     /* Check that the second nonstatic data member of the record type
192        is named _M_string_length.  */
193     tree f2;
194     for (f2 = DECL_CHAIN(c); f2; f2 = DECL_CHAIN(f2))
195       if (TREE_CODE(f2) == FIELD_DECL) break;
196     if (!f2                       /* No need to check this field's offset.  */
197         || strcmp(IDENTIFIER_POINTER(DECL_NAME(f2)), "_M_string_length") != 0)
198       return false;
199 
200     /* The type of the second data member is size_t.  */
201     if (!TREE_TYPE(f2) || TYPE_MAIN_VARIANT(TREE_TYPE(f2)) != size_type_node)
202       return false;
203 
204     /* Now go back to the first data member.  Its type should be a
205        record type named _Alloc_hider.  */
206     c = TREE_TYPE(c);
207     if (!c || TREE_CODE(c) != RECORD_TYPE || !TYPE_IDENTIFIER(t) ||
208         strcmp(IDENTIFIER_POINTER(TYPE_IDENTIFIER(c)), "_Alloc_hider") != 0)
209       return false;
210 
211     /* And its first data member is named _M_p.  */
212     for (c = TYPE_FIELDS(c); c; c = DECL_CHAIN(c))
213       if (TREE_CODE(c) == FIELD_DECL) break;
214     if (!c || !integer_zerop(DECL_FIELD_BIT_OFFSET(c)) ||
215         strcmp(IDENTIFIER_POINTER(DECL_NAME(c)), "_M_p") != 0)
216       return false;
217 
218     /* For the basic_string<char> type we're interested in, the type
219        of the data member is the C string type.  */
220     if (!is_c_string(TREE_TYPE(c))) return false;
221 
222     /* This might not be the real thing, but the bits that matter for
223        the hook are there.  */
224 
225     return true;
226 
227   }
228 
229   /* ??? This is not implemented.  What would the point be of
230      recognizing LLVM's string type in GCC?  */
is_llvm_std_string__anon21d00caf0111::afl_cmptrs_pass231   bool is_llvm_std_string(tree t) {
232 
233     return false;
234 
235   }
236 
execute__anon21d00caf0111::afl_cmptrs_pass237   virtual unsigned int execute(function *fn) {
238 
239     if (!isInInstrumentList(fn)) return 0;
240 
241     basic_block bb;
242     FOR_EACH_BB_FN(bb, fn) {
243 
244       for (gimple_stmt_iterator gsi = gsi_after_labels(bb); !gsi_end_p(gsi);
245            gsi_next(&gsi)) {
246 
247         gimple stmt = gsi_stmt(gsi);
248 
249         /* We're only interested in GIMPLE_CALLs.  */
250         if (gimple_code(stmt) != GIMPLE_CALL) continue;
251 
252         if (gimple_call_num_args(stmt) < 2) continue;
253 
254         gcall *c = as_a<gcall *>(stmt);
255 
256         tree callee_type = gimple_call_fntype(c);
257 
258         if (!callee_type || !TYPE_ARG_TYPES(callee_type) ||
259             !TREE_CHAIN(TYPE_ARG_TYPES(callee_type)))
260           continue;
261 
262         tree arg_type[2] = {
263 
264             TYPE_MAIN_VARIANT(TREE_VALUE(TYPE_ARG_TYPES(callee_type))),
265             TYPE_MAIN_VARIANT(
266                 TREE_VALUE(TREE_CHAIN(TYPE_ARG_TYPES(callee_type))))};
267 
268         tree fn = NULL;
269         /* Callee arglist starts with two GCC std::string arguments.  */
270         if (arg_type[0] == arg_type[1] && is_gxx_std_string(arg_type[0]))
271           fn = cmptrs_hook(1);
272         /* Callee arglist starts with GCC std::string and C string.  */
273         else if (is_gxx_std_string(arg_type[0]) && is_c_string(arg_type[1]))
274           fn = cmptrs_hook(2);
275         /* Callee arglist starts with two LLVM std::string arguments.  */
276         else if (arg_type[0] == arg_type[1] && is_llvm_std_string(arg_type[0]))
277           fn = cmptrs_hook(3);
278         /* Callee arglist starts with LLVM std::string and C string.  */
279         else if (is_llvm_std_string(arg_type[0]) && is_c_string(arg_type[1]))
280           fn = cmptrs_hook(4);
281         /* Callee arglist starts with two pointers to the same type,
282            and callee returns a value.  */
283         else if (arg_type[0] == arg_type[1] && POINTER_TYPE_P(arg_type[0]) &&
284                  (TYPE_MAIN_VARIANT(gimple_call_return_type(c)) !=
285                   void_type_node))
286           fn = cmptrs_hook(0);
287         else
288           continue;
289 
290         tree arg[2] = {gimple_call_arg(c, 0), gimple_call_arg(c, 1)};
291 
292         for (unsigned i = 0; i < ARRAY_SIZE(arg); i++) {
293 
294           tree c = fold_convert_loc(UNKNOWN_LOCATION, tp8u, arg[i]);
295           if (!is_gimple_val(c)) {
296 
297             tree   s = make_ssa_name(tp8u);
298             gimple g = gimple_build_assign(s, c);
299             c = s;
300             gsi_insert_before(&gsi, g, GSI_SAME_STMT);
301 
302           }
303 
304           arg[i] = c;
305 
306         }
307 
308         gimple call = gimple_build_call(fn, 2, arg[0], arg[1]);
309         gsi_insert_before(&gsi, call, GSI_SAME_STMT);
310 
311       }
312 
313     }
314 
315     return 0;
316 
317   }
318 
319 };
320 
321 static struct plugin_info afl_cmptrs_plugin = {
322 
323     .version = "20220420",
324     .help = G_("AFL gcc cmptrs plugin\n\
325 \n\
326 Set AFL_QUIET in the environment to silence it.\n\
327 "),
328 
329 };
330 
331 }  // namespace
332 
333 /* This is the function GCC calls when loading a plugin.  Initialize
334    and register further callbacks.  */
plugin_init(struct plugin_name_args * info,struct plugin_gcc_version * version)335 int plugin_init(struct plugin_name_args   *info,
336                 struct plugin_gcc_version *version) {
337 
338   if (!plugin_default_version_check(version, &gcc_version))
339     FATAL(G_("GCC and plugin have incompatible versions, expected GCC %s, "
340              "is %s"),
341           gcc_version.basever, version->basever);
342 
343   /* Show a banner.  */
344   bool quiet = false;
345   if (isatty(2) && !getenv("AFL_QUIET"))
346     SAYF(cCYA "afl-gcc-cmptrs-pass " cBRI VERSION cRST
347               " by <[email protected]>\n");
348   else
349     quiet = true;
350 
351   const char *name = info->base_name;
352   register_callback(name, PLUGIN_INFO, NULL, &afl_cmptrs_plugin);
353 
354   afl_cmptrs_pass          *aflp = new afl_cmptrs_pass(quiet);
355   struct register_pass_info pass_info = {
356 
357       .pass = aflp,
358       .reference_pass_name = "ssa",
359       .ref_pass_instance_number = 1,
360       .pos_op = PASS_POS_INSERT_AFTER,
361 
362   };
363 
364   register_callback(name, PLUGIN_PASS_MANAGER_SETUP, NULL, &pass_info);
365 
366   return 0;
367 
368 }
369 
370