1 /* GCC plugin for cmplog routines instrumentation of code for AFL++.
2
3 Copyright 2014-2019 Free Software Foundation, Inc
4 Copyright 2015, 2016 Google Inc. All rights reserved.
5 Copyright 2019-2020 AFLplusplus Project. All rights reserved.
6 Copyright 2019-2024 AdaCore
7
8 Written by Alexandre Oliva <[email protected]>, based on the AFL++
9 LLVM CmpLog Routines pass by Andrea Fioraldi
10 <[email protected]>, and on the AFL GCC CmpLog pass.
11
12 This program is free software: you can redistribute it and/or modify
13 it under the terms of the GNU General Public License as published by
14 the Free Software Foundation, either version 3 of the License, or
15 (at your option) any later version.
16
17 This program is distributed in the hope that it will be useful,
18 but WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 GNU General Public License for more details.
21
22 You should have received a copy of the GNU General Public License
23 along with this program. If not, see <http://www.gnu.org/licenses/>.
24
25 */
26
27 #include "afl-gcc-common.h"
28
29 /* This plugin, being under the same license as GCC, satisfies the
30 "GPL-compatible Software" definition in the GCC RUNTIME LIBRARY
31 EXCEPTION, so it can be part of an "Eligible" "Compilation
32 Process". */
33 int plugin_is_GPL_compatible = 1;
34
35 namespace {
36
37 static const struct pass_data afl_cmptrs_pass_data = {
38
39 .type = GIMPLE_PASS,
40 .name = "aflcmptrs",
41 .optinfo_flags = OPTGROUP_NONE,
42 .tv_id = TV_NONE,
43 .properties_required = 0,
44 .properties_provided = 0,
45 .properties_destroyed = 0,
46 .todo_flags_start = 0,
47 .todo_flags_finish = (TODO_update_ssa | TODO_cleanup_cfg | TODO_verify_il |
48 TODO_rebuild_cgraph_edges),
49
50 };
51
52 struct afl_cmptrs_pass : afl_base_pass {
53
afl_cmptrs_pass__anon21d00caf0111::afl_cmptrs_pass54 afl_cmptrs_pass(bool quiet)
55 : afl_base_pass(quiet, /*debug=*/false, afl_cmptrs_pass_data),
56 tp8u(),
57 cmptrs_hooks() {
58
59 }
60
61 /* A pointer type to a unsigned 8-bit integral type. */
62 tree tp8u;
63
64 /* Declarations for the various cmptrs hook functions, allocated on
65 demand.. [0] is for compares between any pointers, [1] is for
66 compares between G++ std::string, [2] is for compares between G++
67 std::string and GCC C strings, [3] and [4] are analogous to [1]
68 and [2] but for LLVM C++ strings. */
69 tree cmptrs_hooks[5];
70
cmptrs_hook__anon21d00caf0111::afl_cmptrs_pass71 tree cmptrs_hook(unsigned i) {
72
73 if (!tp8u) {
74
75 tree t8u;
76 if (BITS_PER_UNIT == 8)
77 t8u = unsigned_char_type_node;
78 else
79 t8u = build_nonstandard_integer_type(8, 1);
80 tp8u = build_pointer_type(t8u);
81
82 }
83
84 if (i <= ARRAY_SIZE(cmptrs_hooks) && cmptrs_hooks[i])
85 return cmptrs_hooks[i];
86
87 const char *n = NULL;
88
89 switch (i) {
90
91 case 0:
92 n = "__cmplog_rtn_hook";
93 break;
94
95 case 1:
96 n = "__cmplog_rtn_gcc_stdstring_stdstring";
97 break;
98
99 case 2:
100 n = "__cmplog_rtn_gcc_stdstring_cstring";
101 break;
102
103 case 3:
104 n = "__cmplog_rtn_llvm_stdstring_stdstring";
105 break;
106
107 case 4:
108 n = "__cmplog_rtn_llvm_stdstring_cstring";
109 break;
110
111 default:
112 gcc_unreachable();
113
114 }
115
116 tree fnt = build_function_type_list(void_type_node, tp8u, tp8u, NULL_TREE);
117 tree t = cmptrs_hooks[i] = build_fn_decl(n, fnt);
118
119 /* Mark the newly-created decl as non-throwing, so that we can
120 insert call within basic blocks. */
121 TREE_NOTHROW(t) = 1;
122
123 return t;
124
125 }
126
127 /* Return true if T is the char* type. */
is_c_string__anon21d00caf0111::afl_cmptrs_pass128 bool is_c_string(tree t) {
129
130 return (POINTER_TYPE_P(t) &&
131 TYPE_MAIN_VARIANT(TREE_TYPE(t)) == char_type_node);
132
133 }
134
135 /* Return true if T is an indirect std::string type. The LLVM pass
136 tests portions of the mangled name of the callee. We could do
137 that in GCC too, but computing the mangled name may cause
138 template instantiations and get symbols defined that could
139 otherwise be considered unused. We check for compatible layout,
140 and class, namespace, and field names. These have been unchanged
141 since at least GCC 7, probably longer, up to GCC 11. Odds are
142 that, if it were to change in significant ways, mangling would
143 also change to flag the incompatibility, and we'd have to use a
144 different hook anyway. */
is_gxx_std_string__anon21d00caf0111::afl_cmptrs_pass145 bool is_gxx_std_string(tree t) {
146
147 /* We need a pointer or reference type. */
148 if (!POINTER_TYPE_P(t)) return false;
149
150 /* Get to the pointed-to type. */
151 t = TREE_TYPE(t);
152 if (!t) return false;
153
154 /* Select the main variant, so that can compare types with pointers. */
155 t = TYPE_MAIN_VARIANT(t);
156
157 /* We expect it to be a record type. */
158 if (TREE_CODE(t) != RECORD_TYPE) return false;
159
160 /* The type has an identifier. */
161 if (!TYPE_IDENTIFIER(t)) return false;
162
163 /* The type of the template is basic_string. */
164 if (strcmp(IDENTIFIER_POINTER(TYPE_IDENTIFIER(t)), "basic_string") != 0)
165 return false;
166
167 /* It's declared in an internal namespace named __cxx11. */
168 tree c = DECL_CONTEXT(TYPE_NAME(t));
169 if (!c || TREE_CODE(c) != NAMESPACE_DECL ||
170 strcmp(IDENTIFIER_POINTER(DECL_NAME(c)), "__cxx11") != 0)
171 return false;
172
173 /* The __cxx11 namespace is a member of namespace std. */
174 c = DECL_CONTEXT(c);
175 if (!c || TREE_CODE(c) != NAMESPACE_DECL ||
176 strcmp(IDENTIFIER_POINTER(DECL_NAME(c)), "std") != 0)
177 return false;
178
179 /* And the std namespace is in the global namespace. */
180 c = DECL_CONTEXT(c);
181 if (c && TREE_CODE(c) != TRANSLATION_UNIT_DECL) return false;
182
183 /* Check that the first nonstatic data member of the record type
184 is named _M_dataplus. */
185 for (c = TYPE_FIELDS(t); c; c = DECL_CHAIN(c))
186 if (TREE_CODE(c) == FIELD_DECL) break;
187 if (!c || !integer_zerop(DECL_FIELD_BIT_OFFSET(c)) ||
188 strcmp(IDENTIFIER_POINTER(DECL_NAME(c)), "_M_dataplus") != 0)
189 return false;
190
191 /* Check that the second nonstatic data member of the record type
192 is named _M_string_length. */
193 tree f2;
194 for (f2 = DECL_CHAIN(c); f2; f2 = DECL_CHAIN(f2))
195 if (TREE_CODE(f2) == FIELD_DECL) break;
196 if (!f2 /* No need to check this field's offset. */
197 || strcmp(IDENTIFIER_POINTER(DECL_NAME(f2)), "_M_string_length") != 0)
198 return false;
199
200 /* The type of the second data member is size_t. */
201 if (!TREE_TYPE(f2) || TYPE_MAIN_VARIANT(TREE_TYPE(f2)) != size_type_node)
202 return false;
203
204 /* Now go back to the first data member. Its type should be a
205 record type named _Alloc_hider. */
206 c = TREE_TYPE(c);
207 if (!c || TREE_CODE(c) != RECORD_TYPE || !TYPE_IDENTIFIER(t) ||
208 strcmp(IDENTIFIER_POINTER(TYPE_IDENTIFIER(c)), "_Alloc_hider") != 0)
209 return false;
210
211 /* And its first data member is named _M_p. */
212 for (c = TYPE_FIELDS(c); c; c = DECL_CHAIN(c))
213 if (TREE_CODE(c) == FIELD_DECL) break;
214 if (!c || !integer_zerop(DECL_FIELD_BIT_OFFSET(c)) ||
215 strcmp(IDENTIFIER_POINTER(DECL_NAME(c)), "_M_p") != 0)
216 return false;
217
218 /* For the basic_string<char> type we're interested in, the type
219 of the data member is the C string type. */
220 if (!is_c_string(TREE_TYPE(c))) return false;
221
222 /* This might not be the real thing, but the bits that matter for
223 the hook are there. */
224
225 return true;
226
227 }
228
229 /* ??? This is not implemented. What would the point be of
230 recognizing LLVM's string type in GCC? */
is_llvm_std_string__anon21d00caf0111::afl_cmptrs_pass231 bool is_llvm_std_string(tree t) {
232
233 return false;
234
235 }
236
execute__anon21d00caf0111::afl_cmptrs_pass237 virtual unsigned int execute(function *fn) {
238
239 if (!isInInstrumentList(fn)) return 0;
240
241 basic_block bb;
242 FOR_EACH_BB_FN(bb, fn) {
243
244 for (gimple_stmt_iterator gsi = gsi_after_labels(bb); !gsi_end_p(gsi);
245 gsi_next(&gsi)) {
246
247 gimple stmt = gsi_stmt(gsi);
248
249 /* We're only interested in GIMPLE_CALLs. */
250 if (gimple_code(stmt) != GIMPLE_CALL) continue;
251
252 if (gimple_call_num_args(stmt) < 2) continue;
253
254 gcall *c = as_a<gcall *>(stmt);
255
256 tree callee_type = gimple_call_fntype(c);
257
258 if (!callee_type || !TYPE_ARG_TYPES(callee_type) ||
259 !TREE_CHAIN(TYPE_ARG_TYPES(callee_type)))
260 continue;
261
262 tree arg_type[2] = {
263
264 TYPE_MAIN_VARIANT(TREE_VALUE(TYPE_ARG_TYPES(callee_type))),
265 TYPE_MAIN_VARIANT(
266 TREE_VALUE(TREE_CHAIN(TYPE_ARG_TYPES(callee_type))))};
267
268 tree fn = NULL;
269 /* Callee arglist starts with two GCC std::string arguments. */
270 if (arg_type[0] == arg_type[1] && is_gxx_std_string(arg_type[0]))
271 fn = cmptrs_hook(1);
272 /* Callee arglist starts with GCC std::string and C string. */
273 else if (is_gxx_std_string(arg_type[0]) && is_c_string(arg_type[1]))
274 fn = cmptrs_hook(2);
275 /* Callee arglist starts with two LLVM std::string arguments. */
276 else if (arg_type[0] == arg_type[1] && is_llvm_std_string(arg_type[0]))
277 fn = cmptrs_hook(3);
278 /* Callee arglist starts with LLVM std::string and C string. */
279 else if (is_llvm_std_string(arg_type[0]) && is_c_string(arg_type[1]))
280 fn = cmptrs_hook(4);
281 /* Callee arglist starts with two pointers to the same type,
282 and callee returns a value. */
283 else if (arg_type[0] == arg_type[1] && POINTER_TYPE_P(arg_type[0]) &&
284 (TYPE_MAIN_VARIANT(gimple_call_return_type(c)) !=
285 void_type_node))
286 fn = cmptrs_hook(0);
287 else
288 continue;
289
290 tree arg[2] = {gimple_call_arg(c, 0), gimple_call_arg(c, 1)};
291
292 for (unsigned i = 0; i < ARRAY_SIZE(arg); i++) {
293
294 tree c = fold_convert_loc(UNKNOWN_LOCATION, tp8u, arg[i]);
295 if (!is_gimple_val(c)) {
296
297 tree s = make_ssa_name(tp8u);
298 gimple g = gimple_build_assign(s, c);
299 c = s;
300 gsi_insert_before(&gsi, g, GSI_SAME_STMT);
301
302 }
303
304 arg[i] = c;
305
306 }
307
308 gimple call = gimple_build_call(fn, 2, arg[0], arg[1]);
309 gsi_insert_before(&gsi, call, GSI_SAME_STMT);
310
311 }
312
313 }
314
315 return 0;
316
317 }
318
319 };
320
321 static struct plugin_info afl_cmptrs_plugin = {
322
323 .version = "20220420",
324 .help = G_("AFL gcc cmptrs plugin\n\
325 \n\
326 Set AFL_QUIET in the environment to silence it.\n\
327 "),
328
329 };
330
331 } // namespace
332
333 /* This is the function GCC calls when loading a plugin. Initialize
334 and register further callbacks. */
plugin_init(struct plugin_name_args * info,struct plugin_gcc_version * version)335 int plugin_init(struct plugin_name_args *info,
336 struct plugin_gcc_version *version) {
337
338 if (!plugin_default_version_check(version, &gcc_version))
339 FATAL(G_("GCC and plugin have incompatible versions, expected GCC %s, "
340 "is %s"),
341 gcc_version.basever, version->basever);
342
343 /* Show a banner. */
344 bool quiet = false;
345 if (isatty(2) && !getenv("AFL_QUIET"))
346 SAYF(cCYA "afl-gcc-cmptrs-pass " cBRI VERSION cRST
347 " by <[email protected]>\n");
348 else
349 quiet = true;
350
351 const char *name = info->base_name;
352 register_callback(name, PLUGIN_INFO, NULL, &afl_cmptrs_plugin);
353
354 afl_cmptrs_pass *aflp = new afl_cmptrs_pass(quiet);
355 struct register_pass_info pass_info = {
356
357 .pass = aflp,
358 .reference_pass_name = "ssa",
359 .ref_pass_instance_number = 1,
360 .pos_op = PASS_POS_INSERT_AFTER,
361
362 };
363
364 register_callback(name, PLUGIN_PASS_MANAGER_SETUP, NULL, &pass_info);
365
366 return 0;
367
368 }
369
370