xref: /aosp_15_r20/external/mesa3d/src/compiler/glsl/opt_function_inlining.cpp (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 /**
25  * \file opt_function_inlining.cpp
26  *
27  * Replaces calls to functions with the body of the function.
28  */
29 
30 #include "ir.h"
31 #include "ir_visitor.h"
32 #include "ir_rvalue_visitor.h"
33 #include "ir_expression_flattening.h"
34 #include "compiler/glsl_types.h"
35 #include "util/hash_table.h"
36 
37 static void
38 do_variable_replacement(exec_list *instructions,
39                         ir_variable *orig,
40                         ir_rvalue *repl);
41 
42 namespace {
43 
44 class ir_save_lvalue_visitor : public ir_hierarchical_visitor {
45 public:
46    virtual ir_visitor_status visit_enter(ir_dereference_array *);
47 };
48 
49 } /* unnamed namespace */
50 
51 static void
replace_return_with_assignment(ir_instruction * ir,void * data)52 replace_return_with_assignment(ir_instruction *ir, void *data)
53 {
54    void *ctx = ralloc_parent(ir);
55    ir_dereference *orig_deref = (ir_dereference *) data;
56    ir_return *ret = ir->as_return();
57 
58    if (ret) {
59       if (ret->value) {
60 	 ir_rvalue *lhs = orig_deref->clone(ctx, NULL);
61          ret->replace_with(new(ctx) ir_assignment(lhs, ret->value));
62       } else {
63 	 /* un-valued return has to be the last return, or we shouldn't
64 	  * have reached here. (see can_inline()).
65 	  */
66 	 assert(ret->next->is_tail_sentinel());
67 	 ret->remove();
68       }
69    }
70 }
71 
72 /* Save the given lvalue before the given instruction.
73  *
74  * This is done by adding temporary variables into which the current value
75  * of any array indices are saved, and then modifying the dereference chain
76  * in-place to point to those temporary variables.
77  *
78  * The hierarchical visitor is only used to traverse the left-hand-side chain
79  * of derefs.
80  */
81 ir_visitor_status
visit_enter(ir_dereference_array * deref)82 ir_save_lvalue_visitor::visit_enter(ir_dereference_array *deref)
83 {
84    if (deref->array_index->ir_type != ir_type_constant) {
85       void *ctx = ralloc_parent(deref);
86       ir_variable *index;
87       ir_assignment *assignment;
88 
89       index = new(ctx) ir_variable(deref->array_index->type, "saved_idx", ir_var_temporary);
90       base_ir->insert_before(index);
91 
92       assignment = new(ctx) ir_assignment(new(ctx) ir_dereference_variable(index),
93                                           deref->array_index);
94       base_ir->insert_before(assignment);
95 
96       deref->array_index = new(ctx) ir_dereference_variable(index);
97    }
98 
99    deref->array->accept(this);
100    return visit_stop;
101 }
102 
103 static bool
should_replace_variable(ir_variable * sig_param,ir_rvalue * param,bool is_builtin)104 should_replace_variable(ir_variable *sig_param, ir_rvalue *param,
105                         bool is_builtin) {
106 
107    if (sig_param->data.mode != ir_var_function_in &&
108        sig_param->data.mode != ir_var_const_in)
109       return false;
110 
111    /* Some places in glsl_to_nir() expect images to always be copied to a temp
112     * first.
113     */
114    if (glsl_type_is_image(glsl_without_array(sig_param->type)) && !param->is_dereference())
115       return false;
116 
117    /* SSBO and shared vars might be passed to a built-in such as an atomic
118     * memory function, where copying these to a temp before passing to the
119     * atomic function is not valid so we must replace these instead. Also,
120     * shader inputs for interpolateAt funtions also need to be replaced.
121     *
122     * Our builtins should always use temps and not the inputs themselves to
123     * store temporay values so just checking is_builtin rather than string
124     * comparing the function name for e.g atomic* should always be safe.
125     */
126    if (is_builtin)
127       return true;
128 
129    /* For opaque types, we want the inlined variable references
130     * referencing the passed in variable, since that will have
131     * the location information, which an assignment of an opaque
132     * variable wouldn't.
133     */
134    return glsl_contains_opaque(sig_param->type);
135 }
136 
137 void
generate_inline(ir_instruction * next_ir)138 ir_call::generate_inline(ir_instruction *next_ir)
139 {
140    void *ctx = ralloc_parent(this);
141    ir_variable **parameters;
142    unsigned num_parameters;
143    int i;
144    struct hash_table *ht;
145 
146    ht = _mesa_pointer_hash_table_create(NULL);
147 
148    num_parameters = this->callee->parameters.length();
149    parameters = new ir_variable *[num_parameters];
150 
151    /* Generate the declarations for the parameters to our inlined code,
152     * and set up the mapping of real function body variables to ours.
153     */
154    i = 0;
155    foreach_two_lists(formal_node, &this->callee->parameters,
156                      actual_node, &this->actual_parameters) {
157       ir_variable *sig_param = (ir_variable *) formal_node;
158       ir_rvalue *param = (ir_rvalue *) actual_node;
159 
160       /* Generate a new variable for the parameter. */
161       if (should_replace_variable(sig_param, param,
162                                   this->callee->is_builtin())) {
163          /* Actual replacement happens below */
164 	 parameters[i] = NULL;
165       } else {
166 	 parameters[i] = sig_param->clone(ctx, ht);
167 	 parameters[i]->data.mode = ir_var_temporary;
168 
169 	 /* Remove the read-only decoration because we're going to write
170 	  * directly to this variable.  If the cloned variable is left
171 	  * read-only and the inlined function is inside a loop, the loop
172 	  * analysis code will get confused.
173 	  */
174 	 parameters[i]->data.read_only = false;
175 	 next_ir->insert_before(parameters[i]);
176       }
177 
178       /* Section 6.1.1 (Function Calling Conventions) of the OpenGL Shading
179        * Language 4.5 spec says:
180        *
181        *    "All arguments are evaluated at call time, exactly once, in order,
182        *     from left to right. [...] Evaluation of an out parameter results
183        *     in an l-value that is used to copy out a value when the function
184        *     returns."
185        *
186        * I.e., we have to take temporary copies of any relevant array indices
187        * before the function body is executed.
188        *
189        * This ensures that
190        * (a) if an array index expressions refers to a variable that is
191        *     modified by the execution of the function body, we use the
192        *     original value as intended, and
193        * (b) if an array index expression has side effects, those side effects
194        *     are only executed once and at the right time.
195        */
196       if (parameters[i]) {
197          if (sig_param->data.mode == ir_var_function_in ||
198              sig_param->data.mode == ir_var_const_in) {
199             ir_assignment *assign;
200 
201             assign = new(ctx) ir_assignment(new(ctx) ir_dereference_variable(parameters[i]),
202                                             param);
203             next_ir->insert_before(assign);
204          } else {
205             assert(sig_param->data.mode == ir_var_function_out ||
206                    sig_param->data.mode == ir_var_function_inout);
207             assert(param->is_lvalue());
208 
209             ir_save_lvalue_visitor v;
210             v.base_ir = next_ir;
211 
212             param->accept(&v);
213 
214             if (sig_param->data.mode == ir_var_function_inout) {
215                ir_assignment *assign;
216 
217                assign = new(ctx) ir_assignment(new(ctx) ir_dereference_variable(parameters[i]),
218                                                param->clone(ctx, NULL)->as_rvalue());
219                next_ir->insert_before(assign);
220             }
221          }
222       }
223 
224       ++i;
225    }
226 
227    exec_list new_instructions;
228 
229    /* Generate the inlined body of the function to a new list */
230    foreach_in_list(ir_instruction, ir, &callee->body) {
231       ir_instruction *new_ir = ir->clone(ctx, ht);
232 
233       new_instructions.push_tail(new_ir);
234       visit_tree(new_ir, replace_return_with_assignment, this->return_deref);
235    }
236 
237    /* If any opaque types were passed in, replace any deref of the
238     * opaque variable with a deref of the argument.
239     */
240    foreach_two_lists(formal_node, &this->callee->parameters,
241                      actual_node, &this->actual_parameters) {
242       ir_rvalue *const param = (ir_rvalue *) actual_node;
243       ir_variable *sig_param = (ir_variable *) formal_node;
244 
245       if (should_replace_variable(sig_param, param,
246                                   this->callee->is_builtin())) {
247          do_variable_replacement(&new_instructions, sig_param, param);
248       }
249    }
250 
251    /* Now push those new instructions in. */
252    next_ir->insert_before(&new_instructions);
253 
254    /* Copy back the value of any 'out' parameters from the function body
255     * variables to our own.
256     */
257    i = 0;
258    foreach_two_lists(formal_node, &this->callee->parameters,
259                      actual_node, &this->actual_parameters) {
260       ir_rvalue *const param = (ir_rvalue *) actual_node;
261       const ir_variable *const sig_param = (ir_variable *) formal_node;
262 
263       /* Move our param variable into the actual param if it's an 'out' type. */
264       if (parameters[i] && (sig_param->data.mode == ir_var_function_out ||
265 			    sig_param->data.mode == ir_var_function_inout)) {
266 	 ir_assignment *assign;
267 
268          assign = new(ctx) ir_assignment(param,
269                                          new(ctx) ir_dereference_variable(parameters[i]));
270 	 next_ir->insert_before(assign);
271       }
272 
273       ++i;
274    }
275 
276    delete [] parameters;
277 
278    _mesa_hash_table_destroy(ht, NULL);
279 }
280 
281 /**
282  * Replaces references to the "orig" variable with a clone of "repl."
283  *
284  * From the spec, opaque types can appear in the tree as function
285  * (non-out) parameters and as the result of array indexing and
286  * structure field selection.  In our builtin implementation, they
287  * also appear in the sampler field of an ir_tex instruction.
288  */
289 
290 class ir_variable_replacement_visitor : public ir_rvalue_visitor {
291 public:
ir_variable_replacement_visitor(ir_variable * orig,ir_rvalue * repl)292    ir_variable_replacement_visitor(ir_variable *orig, ir_rvalue *repl)
293    {
294       this->orig = orig;
295       this->repl = repl;
296    }
297 
~ir_variable_replacement_visitor()298    virtual ~ir_variable_replacement_visitor()
299    {
300    }
301 
302    virtual ir_visitor_status visit_leave(ir_call *);
303    virtual ir_visitor_status visit_leave(ir_texture *);
304    virtual ir_visitor_status visit_leave(ir_assignment *);
305 
306    void handle_rvalue(ir_rvalue **rvalue);
307    void replace_deref(ir_dereference **deref);
308    void replace_rvalue(ir_rvalue **rvalue);
309 
310    ir_variable *orig;
311    ir_rvalue *repl;
312 };
313 
314 void
replace_deref(ir_dereference ** deref)315 ir_variable_replacement_visitor::replace_deref(ir_dereference **deref)
316 {
317    ir_dereference_variable *deref_var = (*deref)->as_dereference_variable();
318    if (deref_var && deref_var->var == this->orig)
319       *deref = this->repl->as_dereference()->clone(ralloc_parent(*deref), NULL);
320 }
321 
322 void
handle_rvalue(ir_rvalue ** rvalue)323 ir_variable_replacement_visitor::handle_rvalue(ir_rvalue **rvalue)
324 {
325    replace_rvalue(rvalue);
326 }
327 
328 void
replace_rvalue(ir_rvalue ** rvalue)329 ir_variable_replacement_visitor::replace_rvalue(ir_rvalue **rvalue)
330 {
331    if (!*rvalue)
332       return;
333 
334    ir_dereference *deref = (*rvalue)->as_dereference();
335 
336    if (!deref)
337       return;
338 
339    ir_dereference_variable *deref_var = (deref)->as_dereference_variable();
340    if (deref_var && deref_var->var == this->orig)
341       *rvalue = this->repl->clone(ralloc_parent(deref), NULL);
342 }
343 
344 ir_visitor_status
visit_leave(ir_texture * ir)345 ir_variable_replacement_visitor::visit_leave(ir_texture *ir)
346 {
347    replace_deref(&ir->sampler);
348 
349    return rvalue_visit(ir);
350 }
351 
352 ir_visitor_status
visit_leave(ir_assignment * ir)353 ir_variable_replacement_visitor::visit_leave(ir_assignment *ir)
354 {
355    replace_deref(&ir->lhs);
356    replace_rvalue(&ir->rhs);
357 
358    return visit_continue;
359 }
360 
361 ir_visitor_status
visit_leave(ir_call * ir)362 ir_variable_replacement_visitor::visit_leave(ir_call *ir)
363 {
364    foreach_in_list_safe(ir_rvalue, param, &ir->actual_parameters) {
365       ir_rvalue *new_param = param;
366       replace_rvalue(&new_param);
367 
368       if (new_param != param) {
369          param->replace_with(new_param);
370       }
371    }
372    return visit_continue;
373 }
374 
375 static void
do_variable_replacement(exec_list * instructions,ir_variable * orig,ir_rvalue * repl)376 do_variable_replacement(exec_list *instructions,
377                         ir_variable *orig,
378                         ir_rvalue *repl)
379 {
380    ir_variable_replacement_visitor v(orig, repl);
381 
382    visit_list_elements(&v, instructions);
383 }
384