1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <gtest/gtest.h>
25 #include "elk_fs.h"
26 #include "elk_fs_builder.h"
27 #include "elk_cfg.h"
28
29 using namespace elk;
30
31 class saturate_propagation_test : public ::testing::Test {
32 protected:
33 saturate_propagation_test();
34 ~saturate_propagation_test() override;
35
36 struct elk_compiler *compiler;
37 struct elk_compile_params params;
38 struct intel_device_info *devinfo;
39 void *ctx;
40 struct elk_wm_prog_data *prog_data;
41 struct gl_shader_program *shader_prog;
42 elk_fs_visitor *v;
43 fs_builder bld;
44 };
45
46 class saturate_propagation_fs_visitor : public elk_fs_visitor
47 {
48 public:
saturate_propagation_fs_visitor(struct elk_compiler * compiler,struct elk_compile_params * params,struct elk_wm_prog_data * prog_data,nir_shader * shader)49 saturate_propagation_fs_visitor(struct elk_compiler *compiler,
50 struct elk_compile_params *params,
51 struct elk_wm_prog_data *prog_data,
52 nir_shader *shader)
53 : elk_fs_visitor(compiler, params, NULL,
54 &prog_data->base, shader, 16, false, false) {}
55 };
56
57
saturate_propagation_test()58 saturate_propagation_test::saturate_propagation_test()
59 : bld(NULL, 0)
60 {
61 ctx = ralloc_context(NULL);
62 compiler = rzalloc(ctx, struct elk_compiler);
63 devinfo = rzalloc(ctx, struct intel_device_info);
64 compiler->devinfo = devinfo;
65
66 params = {};
67 params.mem_ctx = ctx;
68
69 prog_data = ralloc(ctx, struct elk_wm_prog_data);
70 nir_shader *shader =
71 nir_shader_create(ctx, MESA_SHADER_FRAGMENT, NULL, NULL);
72
73 v = new saturate_propagation_fs_visitor(compiler, ¶ms, prog_data, shader);
74
75 bld = fs_builder(v).at_end();
76
77 devinfo->ver = 6;
78 devinfo->verx10 = devinfo->ver * 10;
79 }
80
~saturate_propagation_test()81 saturate_propagation_test::~saturate_propagation_test()
82 {
83 delete v;
84 v = NULL;
85
86 ralloc_free(ctx);
87 ctx = NULL;
88 }
89
90
91 static elk_fs_inst *
instruction(elk_bblock_t * block,int num)92 instruction(elk_bblock_t *block, int num)
93 {
94 elk_fs_inst *inst = (elk_fs_inst *)block->start();
95 for (int i = 0; i < num; i++) {
96 inst = (elk_fs_inst *)inst->next;
97 }
98 return inst;
99 }
100
101 static bool
saturate_propagation(elk_fs_visitor * v)102 saturate_propagation(elk_fs_visitor *v)
103 {
104 const bool print = false;
105
106 if (print) {
107 fprintf(stderr, "= Before =\n");
108 v->cfg->dump();
109 }
110
111 bool ret = v->opt_saturate_propagation();
112
113 if (print) {
114 fprintf(stderr, "\n= After =\n");
115 v->cfg->dump();
116 }
117
118 return ret;
119 }
120
TEST_F(saturate_propagation_test,basic)121 TEST_F(saturate_propagation_test, basic)
122 {
123 elk_fs_reg dst0 = v->vgrf(glsl_float_type());
124 elk_fs_reg dst1 = v->vgrf(glsl_float_type());
125 elk_fs_reg src0 = v->vgrf(glsl_float_type());
126 elk_fs_reg src1 = v->vgrf(glsl_float_type());
127 bld.ADD(dst0, src0, src1);
128 set_saturate(true, bld.MOV(dst1, dst0));
129
130 /* = Before =
131 *
132 * 0: add(16) dst0 src0 src1
133 * 1: mov.sat(16) dst1 dst0
134 *
135 * = After =
136 * 0: add.sat(16) dst0 src0 src1
137 * 1: mov(16) dst1 dst0
138 */
139
140 v->calculate_cfg();
141 elk_bblock_t *block0 = v->cfg->blocks[0];
142
143 EXPECT_EQ(0, block0->start_ip);
144 EXPECT_EQ(1, block0->end_ip);
145
146 EXPECT_TRUE(saturate_propagation(v));
147 EXPECT_EQ(0, block0->start_ip);
148 EXPECT_EQ(1, block0->end_ip);
149 EXPECT_EQ(ELK_OPCODE_ADD, instruction(block0, 0)->opcode);
150 EXPECT_TRUE(instruction(block0, 0)->saturate);
151 EXPECT_EQ(ELK_OPCODE_MOV, instruction(block0, 1)->opcode);
152 EXPECT_FALSE(instruction(block0, 1)->saturate);
153 }
154
TEST_F(saturate_propagation_test,other_non_saturated_use)155 TEST_F(saturate_propagation_test, other_non_saturated_use)
156 {
157 elk_fs_reg dst0 = v->vgrf(glsl_float_type());
158 elk_fs_reg dst1 = v->vgrf(glsl_float_type());
159 elk_fs_reg dst2 = v->vgrf(glsl_float_type());
160 elk_fs_reg src0 = v->vgrf(glsl_float_type());
161 elk_fs_reg src1 = v->vgrf(glsl_float_type());
162 bld.ADD(dst0, src0, src1);
163 set_saturate(true, bld.MOV(dst1, dst0));
164 bld.ADD(dst2, dst0, src0);
165
166 /* = Before =
167 *
168 * 0: add(16) dst0 src0 src1
169 * 1: mov.sat(16) dst1 dst0
170 * 2: add(16) dst2 dst0 src0
171 *
172 * = After =
173 * (no changes)
174 */
175
176 v->calculate_cfg();
177 elk_bblock_t *block0 = v->cfg->blocks[0];
178
179 EXPECT_EQ(0, block0->start_ip);
180 EXPECT_EQ(2, block0->end_ip);
181
182 EXPECT_FALSE(saturate_propagation(v));
183 EXPECT_EQ(0, block0->start_ip);
184 EXPECT_EQ(2, block0->end_ip);
185 EXPECT_EQ(ELK_OPCODE_ADD, instruction(block0, 0)->opcode);
186 EXPECT_FALSE(instruction(block0, 0)->saturate);
187 EXPECT_EQ(ELK_OPCODE_MOV, instruction(block0, 1)->opcode);
188 EXPECT_TRUE(instruction(block0, 1)->saturate);
189 EXPECT_EQ(ELK_OPCODE_ADD, instruction(block0, 2)->opcode);
190 }
191
TEST_F(saturate_propagation_test,predicated_instruction)192 TEST_F(saturate_propagation_test, predicated_instruction)
193 {
194 elk_fs_reg dst0 = v->vgrf(glsl_float_type());
195 elk_fs_reg dst1 = v->vgrf(glsl_float_type());
196 elk_fs_reg src0 = v->vgrf(glsl_float_type());
197 elk_fs_reg src1 = v->vgrf(glsl_float_type());
198 bld.ADD(dst0, src0, src1)
199 ->predicate = ELK_PREDICATE_NORMAL;
200 set_saturate(true, bld.MOV(dst1, dst0));
201
202 /* = Before =
203 *
204 * 0: (+f0) add(16) dst0 src0 src1
205 * 1: mov.sat(16) dst1 dst0
206 *
207 * = After =
208 * (no changes)
209 */
210
211 v->calculate_cfg();
212 elk_bblock_t *block0 = v->cfg->blocks[0];
213
214 EXPECT_EQ(0, block0->start_ip);
215 EXPECT_EQ(1, block0->end_ip);
216
217 EXPECT_FALSE(saturate_propagation(v));
218 EXPECT_EQ(0, block0->start_ip);
219 EXPECT_EQ(1, block0->end_ip);
220 EXPECT_EQ(ELK_OPCODE_ADD, instruction(block0, 0)->opcode);
221 EXPECT_FALSE(instruction(block0, 0)->saturate);
222 EXPECT_EQ(ELK_OPCODE_MOV, instruction(block0, 1)->opcode);
223 EXPECT_TRUE(instruction(block0, 1)->saturate);
224 }
225
TEST_F(saturate_propagation_test,neg_mov_sat)226 TEST_F(saturate_propagation_test, neg_mov_sat)
227 {
228 elk_fs_reg dst0 = v->vgrf(glsl_float_type());
229 elk_fs_reg dst1 = v->vgrf(glsl_float_type());
230 elk_fs_reg src0 = v->vgrf(glsl_float_type());
231 bld.RNDU(dst0, src0);
232 dst0.negate = true;
233 set_saturate(true, bld.MOV(dst1, dst0));
234
235 /* = Before =
236 *
237 * 0: rndu(16) dst0 src0
238 * 1: mov.sat(16) dst1 -dst0
239 *
240 * = After =
241 * (no changes)
242 */
243
244 v->calculate_cfg();
245 elk_bblock_t *block0 = v->cfg->blocks[0];
246
247 EXPECT_EQ(0, block0->start_ip);
248 EXPECT_EQ(1, block0->end_ip);
249
250 EXPECT_FALSE(saturate_propagation(v));
251 EXPECT_EQ(0, block0->start_ip);
252 EXPECT_EQ(1, block0->end_ip);
253 EXPECT_EQ(ELK_OPCODE_RNDU, instruction(block0, 0)->opcode);
254 EXPECT_FALSE(instruction(block0, 0)->saturate);
255 EXPECT_EQ(ELK_OPCODE_MOV, instruction(block0, 1)->opcode);
256 EXPECT_TRUE(instruction(block0, 1)->saturate);
257 }
258
TEST_F(saturate_propagation_test,add_neg_mov_sat)259 TEST_F(saturate_propagation_test, add_neg_mov_sat)
260 {
261 elk_fs_reg dst0 = v->vgrf(glsl_float_type());
262 elk_fs_reg dst1 = v->vgrf(glsl_float_type());
263 elk_fs_reg src0 = v->vgrf(glsl_float_type());
264 elk_fs_reg src1 = v->vgrf(glsl_float_type());
265 bld.ADD(dst0, src0, src1);
266 dst0.negate = true;
267 set_saturate(true, bld.MOV(dst1, dst0));
268
269 /* = Before =
270 *
271 * 0: add(16) dst0 src0 src1
272 * 1: mov.sat(16) dst1 -dst0
273 *
274 * = After =
275 * 0: add.sat(16) dst0 -src0 -src1
276 * 1: mov(16) dst1 dst0
277 */
278
279 v->calculate_cfg();
280 elk_bblock_t *block0 = v->cfg->blocks[0];
281
282 EXPECT_EQ(0, block0->start_ip);
283 EXPECT_EQ(1, block0->end_ip);
284
285 EXPECT_TRUE(saturate_propagation(v));
286 EXPECT_EQ(0, block0->start_ip);
287 EXPECT_EQ(1, block0->end_ip);
288 EXPECT_EQ(ELK_OPCODE_ADD, instruction(block0, 0)->opcode);
289 EXPECT_TRUE(instruction(block0, 0)->saturate);
290 EXPECT_TRUE(instruction(block0, 0)->src[0].negate);
291 EXPECT_TRUE(instruction(block0, 0)->src[1].negate);
292 EXPECT_EQ(ELK_OPCODE_MOV, instruction(block0, 1)->opcode);
293 EXPECT_FALSE(instruction(block0, 1)->saturate);
294 }
295
TEST_F(saturate_propagation_test,add_imm_float_neg_mov_sat)296 TEST_F(saturate_propagation_test, add_imm_float_neg_mov_sat)
297 {
298 elk_fs_reg dst0 = v->vgrf(glsl_float_type());
299 elk_fs_reg dst1 = v->vgrf(glsl_float_type());
300 elk_fs_reg src0 = v->vgrf(glsl_float_type());
301 elk_fs_reg src1 = elk_imm_f(1.0f);
302 bld.ADD(dst0, src0, src1);
303 dst0.negate = true;
304 set_saturate(true, bld.MOV(dst1, dst0));
305
306 /* = Before =
307 *
308 * 0: add(16) dst0 src0 1.0f
309 * 1: mov.sat(16) dst1 -dst0
310 *
311 * = After =
312 * 0: add.sat(16) dst0 -src0 -1.0f
313 * 1: mov(16) dst1 dst0
314 */
315
316 v->calculate_cfg();
317 elk_bblock_t *block0 = v->cfg->blocks[0];
318
319 EXPECT_EQ(0, block0->start_ip);
320 EXPECT_EQ(1, block0->end_ip);
321
322 EXPECT_TRUE(saturate_propagation(v));
323 EXPECT_EQ(0, block0->start_ip);
324 EXPECT_EQ(1, block0->end_ip);
325 EXPECT_EQ(ELK_OPCODE_ADD, instruction(block0, 0)->opcode);
326 EXPECT_TRUE(instruction(block0, 0)->saturate);
327 EXPECT_TRUE(instruction(block0, 0)->src[0].negate);
328 EXPECT_EQ(instruction(block0, 0)->src[1].f, -1.0f);
329 EXPECT_EQ(ELK_OPCODE_MOV, instruction(block0, 1)->opcode);
330 EXPECT_FALSE(instruction(block0, 1)->saturate);
331 }
332
TEST_F(saturate_propagation_test,mul_neg_mov_sat)333 TEST_F(saturate_propagation_test, mul_neg_mov_sat)
334 {
335 elk_fs_reg dst0 = v->vgrf(glsl_float_type());
336 elk_fs_reg dst1 = v->vgrf(glsl_float_type());
337 elk_fs_reg src0 = v->vgrf(glsl_float_type());
338 elk_fs_reg src1 = v->vgrf(glsl_float_type());
339 bld.MUL(dst0, src0, src1);
340 dst0.negate = true;
341 set_saturate(true, bld.MOV(dst1, dst0));
342
343 /* = Before =
344 *
345 * 0: mul(16) dst0 src0 src1
346 * 1: mov.sat(16) dst1 -dst0
347 *
348 * = After =
349 * 0: mul.sat(16) dst0 src0 -src1
350 * 1: mov(16) dst1 dst0
351 */
352
353 v->calculate_cfg();
354 elk_bblock_t *block0 = v->cfg->blocks[0];
355
356 EXPECT_EQ(0, block0->start_ip);
357 EXPECT_EQ(1, block0->end_ip);
358
359 EXPECT_TRUE(saturate_propagation(v));
360 EXPECT_EQ(0, block0->start_ip);
361 EXPECT_EQ(1, block0->end_ip);
362 EXPECT_EQ(ELK_OPCODE_MUL, instruction(block0, 0)->opcode);
363 EXPECT_TRUE(instruction(block0, 0)->saturate);
364 EXPECT_TRUE(instruction(block0, 0)->src[0].negate);
365 EXPECT_EQ(ELK_OPCODE_MOV, instruction(block0, 1)->opcode);
366 EXPECT_FALSE(instruction(block0, 1)->saturate);
367 EXPECT_FALSE(instruction(block0, 1)->src[0].negate);
368 }
369
TEST_F(saturate_propagation_test,mad_neg_mov_sat)370 TEST_F(saturate_propagation_test, mad_neg_mov_sat)
371 {
372 elk_fs_reg dst0 = v->vgrf(glsl_float_type());
373 elk_fs_reg dst1 = v->vgrf(glsl_float_type());
374 elk_fs_reg src0 = v->vgrf(glsl_float_type());
375 elk_fs_reg src1 = v->vgrf(glsl_float_type());
376 elk_fs_reg src2 = v->vgrf(glsl_float_type());
377 bld.MAD(dst0, src0, src1, src2);
378 dst0.negate = true;
379 set_saturate(true, bld.MOV(dst1, dst0));
380
381 /* = Before =
382 *
383 * 0: mad(16) dst0 src0 src1 src2
384 * 1: mov.sat(16) dst1 -dst0
385 *
386 * = After =
387 * 0: mad.sat(16) dst0 -src0 -src1 src2
388 * 1: mov(16) dst1 dst0
389 */
390
391 v->calculate_cfg();
392 elk_bblock_t *block0 = v->cfg->blocks[0];
393
394 EXPECT_EQ(0, block0->start_ip);
395 EXPECT_EQ(1, block0->end_ip);
396
397 EXPECT_TRUE(saturate_propagation(v));
398 EXPECT_EQ(0, block0->start_ip);
399 EXPECT_EQ(1, block0->end_ip);
400 EXPECT_EQ(ELK_OPCODE_MAD, instruction(block0, 0)->opcode);
401 EXPECT_TRUE(instruction(block0, 0)->saturate);
402 EXPECT_TRUE(instruction(block0, 0)->src[0].negate);
403 EXPECT_TRUE(instruction(block0, 0)->src[1].negate);
404 EXPECT_FALSE(instruction(block0, 0)->src[2].negate);
405 EXPECT_EQ(ELK_OPCODE_MOV, instruction(block0, 1)->opcode);
406 EXPECT_FALSE(instruction(block0, 1)->saturate);
407 EXPECT_FALSE(instruction(block0, 1)->src[0].negate);
408 }
409
TEST_F(saturate_propagation_test,mad_imm_float_neg_mov_sat)410 TEST_F(saturate_propagation_test, mad_imm_float_neg_mov_sat)
411 {
412 elk_fs_reg dst0 = v->vgrf(glsl_float_type());
413 elk_fs_reg dst1 = v->vgrf(glsl_float_type());
414 elk_fs_reg src0 = elk_imm_f(1.0f);
415 elk_fs_reg src1 = elk_imm_f(-2.0f);
416 elk_fs_reg src2 = v->vgrf(glsl_float_type());
417 /* The builder for MAD tries to be helpful and not put immediates as direct
418 * sources. We want to test specifically that case.
419 */
420 elk_fs_inst *mad = bld.MAD(dst0, src2, src2, src2);
421 mad->src[0]= src0;
422 mad->src[1] = src1;
423 dst0.negate = true;
424 set_saturate(true, bld.MOV(dst1, dst0));
425
426 /* = Before =
427 *
428 * 0: mad(16) dst0 1.0f -2.0f src2
429 * 1: mov.sat(16) dst1 -dst0
430 *
431 * = After =
432 * 0: mad.sat(16) dst0 -1.0f 2.0f src2
433 * 1: mov(16) dst1 dst0
434 */
435
436 v->calculate_cfg();
437 elk_bblock_t *block0 = v->cfg->blocks[0];
438
439 EXPECT_EQ(0, block0->start_ip);
440 EXPECT_EQ(1, block0->end_ip);
441
442 EXPECT_TRUE(saturate_propagation(v));
443 EXPECT_EQ(0, block0->start_ip);
444 EXPECT_EQ(1, block0->end_ip);
445 EXPECT_EQ(ELK_OPCODE_MAD, instruction(block0, 0)->opcode);
446 EXPECT_TRUE(instruction(block0, 0)->saturate);
447 EXPECT_EQ(instruction(block0, 0)->src[0].f, -1.0f);
448 EXPECT_EQ(instruction(block0, 0)->src[1].f, 2.0f);
449 EXPECT_EQ(ELK_OPCODE_MOV, instruction(block0, 1)->opcode);
450 EXPECT_FALSE(instruction(block0, 1)->saturate);
451 EXPECT_FALSE(instruction(block0, 1)->src[0].negate);
452 }
453
TEST_F(saturate_propagation_test,mul_mov_sat_neg_mov_sat)454 TEST_F(saturate_propagation_test, mul_mov_sat_neg_mov_sat)
455 {
456 elk_fs_reg dst0 = v->vgrf(glsl_float_type());
457 elk_fs_reg dst1 = v->vgrf(glsl_float_type());
458 elk_fs_reg dst2 = v->vgrf(glsl_float_type());
459 elk_fs_reg src0 = v->vgrf(glsl_float_type());
460 elk_fs_reg src1 = v->vgrf(glsl_float_type());
461 bld.MUL(dst0, src0, src1);
462 set_saturate(true, bld.MOV(dst1, dst0));
463 dst0.negate = true;
464 set_saturate(true, bld.MOV(dst2, dst0));
465
466 /* = Before =
467 *
468 * 0: mul(16) dst0 src0 src1
469 * 1: mov.sat(16) dst1 dst0
470 * 2: mov.sat(16) dst2 -dst0
471 *
472 * = After =
473 * (no changes)
474 */
475
476 v->calculate_cfg();
477 elk_bblock_t *block0 = v->cfg->blocks[0];
478
479 EXPECT_EQ(0, block0->start_ip);
480 EXPECT_EQ(2, block0->end_ip);
481
482 EXPECT_FALSE(saturate_propagation(v));
483 EXPECT_EQ(0, block0->start_ip);
484 EXPECT_EQ(2, block0->end_ip);
485 EXPECT_EQ(ELK_OPCODE_MUL, instruction(block0, 0)->opcode);
486 EXPECT_FALSE(instruction(block0, 0)->saturate);
487 EXPECT_FALSE(instruction(block0, 0)->src[1].negate);
488 EXPECT_EQ(ELK_OPCODE_MOV, instruction(block0, 1)->opcode);
489 EXPECT_TRUE(instruction(block0, 1)->saturate);
490 EXPECT_EQ(ELK_OPCODE_MOV, instruction(block0, 2)->opcode);
491 EXPECT_TRUE(instruction(block0, 2)->src[0].negate);
492 EXPECT_TRUE(instruction(block0, 2)->saturate);
493 }
494
TEST_F(saturate_propagation_test,mul_neg_mov_sat_neg_mov_sat)495 TEST_F(saturate_propagation_test, mul_neg_mov_sat_neg_mov_sat)
496 {
497 elk_fs_reg dst0 = v->vgrf(glsl_float_type());
498 elk_fs_reg dst1 = v->vgrf(glsl_float_type());
499 elk_fs_reg dst2 = v->vgrf(glsl_float_type());
500 elk_fs_reg src0 = v->vgrf(glsl_float_type());
501 elk_fs_reg src1 = v->vgrf(glsl_float_type());
502 bld.MUL(dst0, src0, src1);
503 dst0.negate = true;
504 set_saturate(true, bld.MOV(dst1, dst0));
505 set_saturate(true, bld.MOV(dst2, dst0));
506
507 /* = Before =
508 *
509 * 0: mul(16) dst0 src0 src1
510 * 1: mov.sat(16) dst1 -dst0
511 * 2: mov.sat(16) dst2 -dst0
512 *
513 * = After =
514 * (no changes)
515 */
516
517 v->calculate_cfg();
518 elk_bblock_t *block0 = v->cfg->blocks[0];
519
520 EXPECT_EQ(0, block0->start_ip);
521 EXPECT_EQ(2, block0->end_ip);
522
523 EXPECT_FALSE(saturate_propagation(v));
524 EXPECT_EQ(0, block0->start_ip);
525 EXPECT_EQ(2, block0->end_ip);
526 EXPECT_EQ(ELK_OPCODE_MUL, instruction(block0, 0)->opcode);
527 EXPECT_FALSE(instruction(block0, 0)->saturate);
528 EXPECT_FALSE(instruction(block0, 0)->src[1].negate);
529 EXPECT_EQ(ELK_OPCODE_MOV, instruction(block0, 1)->opcode);
530 EXPECT_TRUE(instruction(block0, 1)->src[0].negate);
531 EXPECT_TRUE(instruction(block0, 1)->saturate);
532 EXPECT_EQ(ELK_OPCODE_MOV, instruction(block0, 2)->opcode);
533 EXPECT_TRUE(instruction(block0, 2)->src[0].negate);
534 EXPECT_TRUE(instruction(block0, 2)->saturate);
535 }
536
TEST_F(saturate_propagation_test,abs_mov_sat)537 TEST_F(saturate_propagation_test, abs_mov_sat)
538 {
539 elk_fs_reg dst0 = v->vgrf(glsl_float_type());
540 elk_fs_reg dst1 = v->vgrf(glsl_float_type());
541 elk_fs_reg src0 = v->vgrf(glsl_float_type());
542 elk_fs_reg src1 = v->vgrf(glsl_float_type());
543 bld.ADD(dst0, src0, src1);
544 dst0.abs = true;
545 set_saturate(true, bld.MOV(dst1, dst0));
546
547 /* = Before =
548 *
549 * 0: add(16) dst0 src0 src1
550 * 1: mov.sat(16) dst1 (abs)dst0
551 *
552 * = After =
553 * (no changes)
554 */
555
556 v->calculate_cfg();
557 elk_bblock_t *block0 = v->cfg->blocks[0];
558
559 EXPECT_EQ(0, block0->start_ip);
560 EXPECT_EQ(1, block0->end_ip);
561
562 EXPECT_FALSE(saturate_propagation(v));
563 EXPECT_EQ(0, block0->start_ip);
564 EXPECT_EQ(1, block0->end_ip);
565 EXPECT_EQ(ELK_OPCODE_ADD, instruction(block0, 0)->opcode);
566 EXPECT_FALSE(instruction(block0, 0)->saturate);
567 EXPECT_EQ(ELK_OPCODE_MOV, instruction(block0, 1)->opcode);
568 EXPECT_TRUE(instruction(block0, 1)->saturate);
569 }
570
TEST_F(saturate_propagation_test,producer_saturates)571 TEST_F(saturate_propagation_test, producer_saturates)
572 {
573 elk_fs_reg dst0 = v->vgrf(glsl_float_type());
574 elk_fs_reg dst1 = v->vgrf(glsl_float_type());
575 elk_fs_reg dst2 = v->vgrf(glsl_float_type());
576 elk_fs_reg src0 = v->vgrf(glsl_float_type());
577 elk_fs_reg src1 = v->vgrf(glsl_float_type());
578 set_saturate(true, bld.ADD(dst0, src0, src1));
579 set_saturate(true, bld.MOV(dst1, dst0));
580 bld.MOV(dst2, dst0);
581
582 /* = Before =
583 *
584 * 0: add.sat(16) dst0 src0 src1
585 * 1: mov.sat(16) dst1 dst0
586 * 2: mov(16) dst2 dst0
587 *
588 * = After =
589 * 0: add.sat(16) dst0 src0 src1
590 * 1: mov(16) dst1 dst0
591 * 2: mov(16) dst2 dst0
592 */
593
594 v->calculate_cfg();
595 elk_bblock_t *block0 = v->cfg->blocks[0];
596
597 EXPECT_EQ(0, block0->start_ip);
598 EXPECT_EQ(2, block0->end_ip);
599
600 EXPECT_TRUE(saturate_propagation(v));
601 EXPECT_EQ(0, block0->start_ip);
602 EXPECT_EQ(2, block0->end_ip);
603 EXPECT_EQ(ELK_OPCODE_ADD, instruction(block0, 0)->opcode);
604 EXPECT_TRUE(instruction(block0, 0)->saturate);
605 EXPECT_EQ(ELK_OPCODE_MOV, instruction(block0, 1)->opcode);
606 EXPECT_FALSE(instruction(block0, 1)->saturate);
607 }
608
TEST_F(saturate_propagation_test,intervening_saturating_copy)609 TEST_F(saturate_propagation_test, intervening_saturating_copy)
610 {
611 elk_fs_reg dst0 = v->vgrf(glsl_float_type());
612 elk_fs_reg dst1 = v->vgrf(glsl_float_type());
613 elk_fs_reg dst2 = v->vgrf(glsl_float_type());
614 elk_fs_reg src0 = v->vgrf(glsl_float_type());
615 elk_fs_reg src1 = v->vgrf(glsl_float_type());
616 bld.ADD(dst0, src0, src1);
617 set_saturate(true, bld.MOV(dst1, dst0));
618 set_saturate(true, bld.MOV(dst2, dst0));
619
620 /* = Before =
621 *
622 * 0: add(16) dst0 src0 src1
623 * 1: mov.sat(16) dst1 dst0
624 * 2: mov.sat(16) dst2 dst0
625 *
626 * = After =
627 * 0: add.sat(16) dst0 src0 src1
628 * 1: mov(16) dst1 dst0
629 * 2: mov(16) dst2 dst0
630 */
631
632 v->calculate_cfg();
633 elk_bblock_t *block0 = v->cfg->blocks[0];
634
635 EXPECT_EQ(0, block0->start_ip);
636 EXPECT_EQ(2, block0->end_ip);
637
638 EXPECT_TRUE(saturate_propagation(v));
639 EXPECT_EQ(0, block0->start_ip);
640 EXPECT_EQ(2, block0->end_ip);
641 EXPECT_EQ(ELK_OPCODE_ADD, instruction(block0, 0)->opcode);
642 EXPECT_TRUE(instruction(block0, 0)->saturate);
643 EXPECT_EQ(ELK_OPCODE_MOV, instruction(block0, 1)->opcode);
644 EXPECT_FALSE(instruction(block0, 1)->saturate);
645 EXPECT_EQ(ELK_OPCODE_MOV, instruction(block0, 2)->opcode);
646 EXPECT_FALSE(instruction(block0, 2)->saturate);
647 }
648
TEST_F(saturate_propagation_test,intervening_dest_write)649 TEST_F(saturate_propagation_test, intervening_dest_write)
650 {
651 elk_fs_reg dst0 = v->vgrf(glsl_vec4_type());
652 elk_fs_reg dst1 = v->vgrf(glsl_float_type());
653 elk_fs_reg src0 = v->vgrf(glsl_float_type());
654 elk_fs_reg src1 = v->vgrf(glsl_float_type());
655 elk_fs_reg src2 = v->vgrf(glsl_vec2_type());
656 bld.ADD(offset(dst0, bld, 2), src0, src1);
657 bld.emit(ELK_SHADER_OPCODE_TEX, dst0, src2)
658 ->size_written = 8 * REG_SIZE;
659 set_saturate(true, bld.MOV(dst1, offset(dst0, bld, 2)));
660
661 /* = Before =
662 *
663 * 0: add(16) dst0+2 src0 src1
664 * 1: tex(16) rlen 4 dst0+0 src2
665 * 2: mov.sat(16) dst1 dst0+2
666 *
667 * = After =
668 * (no changes)
669 */
670
671 v->calculate_cfg();
672 elk_bblock_t *block0 = v->cfg->blocks[0];
673
674 EXPECT_EQ(0, block0->start_ip);
675 EXPECT_EQ(2, block0->end_ip);
676
677 EXPECT_FALSE(saturate_propagation(v));
678 EXPECT_EQ(0, block0->start_ip);
679 EXPECT_EQ(2, block0->end_ip);
680 EXPECT_EQ(ELK_OPCODE_ADD, instruction(block0, 0)->opcode);
681 EXPECT_FALSE(instruction(block0, 0)->saturate);
682 EXPECT_EQ(ELK_SHADER_OPCODE_TEX, instruction(block0, 1)->opcode);
683 EXPECT_FALSE(instruction(block0, 0)->saturate);
684 EXPECT_EQ(ELK_OPCODE_MOV, instruction(block0, 2)->opcode);
685 EXPECT_TRUE(instruction(block0, 2)->saturate);
686 }
687
TEST_F(saturate_propagation_test,mul_neg_mov_sat_mov_sat)688 TEST_F(saturate_propagation_test, mul_neg_mov_sat_mov_sat)
689 {
690 elk_fs_reg dst0 = v->vgrf(glsl_float_type());
691 elk_fs_reg dst1 = v->vgrf(glsl_float_type());
692 elk_fs_reg dst2 = v->vgrf(glsl_float_type());
693 elk_fs_reg src0 = v->vgrf(glsl_float_type());
694 elk_fs_reg src1 = v->vgrf(glsl_float_type());
695 bld.MUL(dst0, src0, src1);
696 dst0.negate = true;
697 set_saturate(true, bld.MOV(dst1, dst0));
698 dst0.negate = false;
699 set_saturate(true, bld.MOV(dst2, dst0));
700
701 /* = Before =
702 *
703 * 0: mul(16) dst0 src0 src1
704 * 1: mov.sat(16) dst1 -dst0
705 * 2: mov.sat(16) dst2 dst0
706 *
707 * = After =
708 * (no changes)
709 */
710
711 v->calculate_cfg();
712 elk_bblock_t *block0 = v->cfg->blocks[0];
713
714 EXPECT_EQ(0, block0->start_ip);
715 EXPECT_EQ(2, block0->end_ip);
716
717 EXPECT_FALSE(saturate_propagation(v));
718 EXPECT_EQ(0, block0->start_ip);
719 EXPECT_EQ(2, block0->end_ip);
720 EXPECT_EQ(ELK_OPCODE_MUL, instruction(block0, 0)->opcode);
721 EXPECT_FALSE(instruction(block0, 0)->saturate);
722 EXPECT_FALSE(instruction(block0, 0)->src[1].negate);
723 EXPECT_EQ(ELK_OPCODE_MOV, instruction(block0, 1)->opcode);
724 EXPECT_TRUE(instruction(block0, 1)->saturate);
725 EXPECT_TRUE(instruction(block0, 1)->src[0].negate);
726 EXPECT_EQ(ELK_OPCODE_MOV, instruction(block0, 2)->opcode);
727 EXPECT_TRUE(instruction(block0, 2)->saturate);
728 }
729
TEST_F(saturate_propagation_test,smaller_exec_size_consumer)730 TEST_F(saturate_propagation_test, smaller_exec_size_consumer)
731 {
732 elk_fs_reg dst0 = v->vgrf(glsl_float_type());
733 elk_fs_reg dst1 = v->vgrf(glsl_float_type());
734 elk_fs_reg src0 = v->vgrf(glsl_float_type());
735 elk_fs_reg src1 = v->vgrf(glsl_float_type());
736 bld.ADD(dst0, src0, src1);
737 set_saturate(true, bld.group(8, 0).MOV(dst1, dst0));
738
739 /* = Before =
740 *
741 * 0: add(16) dst0 src0 src1
742 * 1: mov.sat(8) dst1 dst0
743 *
744 * = After =
745 * (no changes)
746 */
747
748 v->calculate_cfg();
749 elk_bblock_t *block0 = v->cfg->blocks[0];
750
751 EXPECT_EQ(0, block0->start_ip);
752 EXPECT_EQ(1, block0->end_ip);
753
754 EXPECT_FALSE(saturate_propagation(v));
755 EXPECT_EQ(0, block0->start_ip);
756 EXPECT_EQ(1, block0->end_ip);
757 EXPECT_EQ(ELK_OPCODE_ADD, instruction(block0, 0)->opcode);
758 EXPECT_FALSE(instruction(block0, 0)->saturate);
759 EXPECT_EQ(ELK_OPCODE_MOV, instruction(block0, 1)->opcode);
760 EXPECT_TRUE(instruction(block0, 1)->saturate);
761 }
762
TEST_F(saturate_propagation_test,larger_exec_size_consumer)763 TEST_F(saturate_propagation_test, larger_exec_size_consumer)
764 {
765 elk_fs_reg dst0 = v->vgrf(glsl_float_type());
766 elk_fs_reg dst1 = v->vgrf(glsl_float_type());
767 elk_fs_reg src0 = v->vgrf(glsl_float_type());
768 elk_fs_reg src1 = v->vgrf(glsl_float_type());
769 bld.group(8, 0).ADD(dst0, src0, src1);
770 set_saturate(true, bld.MOV(dst1, dst0));
771
772 /* = Before =
773 *
774 * 0: add(8) dst0 src0 src1
775 * 1: mov.sat(16) dst1 dst0
776 *
777 * = After =
778 * (no changes)
779 */
780
781 v->calculate_cfg();
782 elk_bblock_t *block0 = v->cfg->blocks[0];
783
784 EXPECT_EQ(0, block0->start_ip);
785 EXPECT_EQ(1, block0->end_ip);
786
787 EXPECT_FALSE(saturate_propagation(v));
788 EXPECT_EQ(0, block0->start_ip);
789 EXPECT_EQ(1, block0->end_ip);
790 EXPECT_EQ(ELK_OPCODE_ADD, instruction(block0, 0)->opcode);
791 EXPECT_FALSE(instruction(block0, 0)->saturate);
792 EXPECT_EQ(ELK_OPCODE_MOV, instruction(block0, 1)->opcode);
793 EXPECT_TRUE(instruction(block0, 1)->saturate);
794 }
795
TEST_F(saturate_propagation_test,offset_source_barrier)796 TEST_F(saturate_propagation_test, offset_source_barrier)
797 {
798 elk_fs_reg dst0 = v->vgrf(glsl_float_type());
799 elk_fs_reg dst1 = v->vgrf(glsl_float_type());
800 elk_fs_reg dst2 = v->vgrf(glsl_float_type());
801 elk_fs_reg src0 = v->vgrf(glsl_float_type());
802 elk_fs_reg src1 = v->vgrf(glsl_float_type());
803 bld.group(16, 0).ADD(dst0, src0, src1);
804 bld.group(1, 0).ADD(dst1, component(dst0, 8), elk_imm_f(1.0f));
805 set_saturate(true, bld.group(16, 0).MOV(dst2, dst0));
806
807 /* = Before =
808 *
809 * 0: add(16) dst0 src0 src1
810 * 0: add(1) dst1 dst0+8 1.0f
811 * 1: mov.sat(16) dst2 dst0
812 *
813 * = After =
814 * (no changes)
815 */
816
817 v->calculate_cfg();
818 elk_bblock_t *block0 = v->cfg->blocks[0];
819
820 EXPECT_EQ(0, block0->start_ip);
821 EXPECT_EQ(2, block0->end_ip);
822
823 EXPECT_FALSE(saturate_propagation(v));
824 EXPECT_EQ(0, block0->start_ip);
825 EXPECT_EQ(2, block0->end_ip);
826 EXPECT_EQ(ELK_OPCODE_ADD, instruction(block0, 0)->opcode);
827 EXPECT_EQ(ELK_OPCODE_ADD, instruction(block0, 1)->opcode);
828 EXPECT_FALSE(instruction(block0, 0)->saturate);
829 EXPECT_FALSE(instruction(block0, 1)->saturate);
830 EXPECT_EQ(ELK_OPCODE_MOV, instruction(block0, 2)->opcode);
831 EXPECT_TRUE(instruction(block0, 2)->saturate);
832 }
833