1 /**************************************************************************
2 *
3 * Copyright 2008 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * AA point stage: AA points are converted to quads and rendered with a
30 * special fragment shader. Another approach would be to use a texture
31 * map image of a point, but experiments indicate the quality isn't nearly
32 * as good as this approach.
33 *
34 * Note: this looks a lot like draw_aaline.c but there's actually little
35 * if any code that can be shared.
36 *
37 * Authors: Brian Paul
38 */
39
40
41 #include "pipe/p_context.h"
42 #include "pipe/p_defines.h"
43 #include "pipe/p_shader_tokens.h"
44
45 #include "tgsi/tgsi_transform.h"
46 #include "tgsi/tgsi_dump.h"
47
48 #include "util/u_math.h"
49 #include "util/u_memory.h"
50
51 #include "draw_context.h"
52 #include "draw_vs.h"
53 #include "draw_pipe.h"
54
55 #include "nir.h"
56 #include "nir/nir_draw_helpers.h"
57
58 /** Approx number of new tokens for instructions in aa_transform_inst() */
59 #define NUM_NEW_TOKENS 200
60
61
62 /*
63 * Enabling NORMALIZE might give _slightly_ better results.
64 * Basically, it controls whether we compute distance as d=sqrt(x*x+y*y) or
65 * d=x*x+y*y. Since we're working with a unit circle, the later seems
66 * close enough and saves some costly instructions.
67 */
68 #define NORMALIZE 0
69
70
71 /**
72 * Subclass of pipe_shader_state to carry extra fragment shader info.
73 */
74 struct aapoint_fragment_shader
75 {
76 struct pipe_shader_state state;
77 void *driver_fs; /**< the regular shader */
78 void *aapoint_fs; /**< the aa point-augmented shader */
79 int generic_attrib; /**< The generic input attrib/texcoord we'll use */
80 };
81
82
83 /**
84 * Subclass of draw_stage
85 */
86 struct aapoint_stage
87 {
88 struct draw_stage stage;
89
90 /** half of pipe_rasterizer_state::point_size */
91 float radius;
92
93 /** vertex attrib slot containing point size */
94 int psize_slot;
95
96 /** this is the vertex attrib slot for the new texcoords */
97 unsigned tex_slot;
98
99 /** vertex attrib slot containing position */
100 unsigned pos_slot;
101
102 /** Type of Boolean variables on this hardware. */
103 nir_alu_type bool_type;
104
105 /** Currently bound fragment shader */
106 struct aapoint_fragment_shader *fs;
107
108 /*
109 * Driver interface/override functions
110 */
111 void * (*driver_create_fs_state)(struct pipe_context *,
112 const struct pipe_shader_state *);
113 void (*driver_bind_fs_state)(struct pipe_context *, void *);
114 void (*driver_delete_fs_state)(struct pipe_context *, void *);
115 };
116
117
118
119 /**
120 * Subclass of tgsi_transform_context, used for transforming the
121 * user's fragment shader to add the special AA instructions.
122 */
123 struct aa_transform_context {
124 struct tgsi_transform_context base;
125 uint32_t tempsUsed; /**< bitmask */
126 int colorOutput; /**< which output is the primary color */
127 int maxInput, maxGeneric; /**< max input index found */
128 int tmp0, colorTemp; /**< temp registers */
129 };
130
131
132 /**
133 * TGSI declaration transform callback.
134 * Look for two free temp regs and available input reg for new texcoords.
135 */
136 static void
aa_transform_decl(struct tgsi_transform_context * ctx,struct tgsi_full_declaration * decl)137 aa_transform_decl(struct tgsi_transform_context *ctx,
138 struct tgsi_full_declaration *decl)
139 {
140 struct aa_transform_context *aactx = (struct aa_transform_context *) ctx;
141
142 if (decl->Declaration.File == TGSI_FILE_OUTPUT &&
143 decl->Semantic.Name == TGSI_SEMANTIC_COLOR &&
144 decl->Semantic.Index == 0) {
145 aactx->colorOutput = decl->Range.First;
146 }
147 else if (decl->Declaration.File == TGSI_FILE_INPUT) {
148 if ((int) decl->Range.Last > aactx->maxInput)
149 aactx->maxInput = decl->Range.Last;
150 if (decl->Semantic.Name == TGSI_SEMANTIC_GENERIC &&
151 (int) decl->Semantic.Index > aactx->maxGeneric) {
152 aactx->maxGeneric = decl->Semantic.Index;
153 }
154 }
155 else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) {
156 unsigned i;
157 for (i = decl->Range.First;
158 i <= decl->Range.Last; i++) {
159 aactx->tempsUsed |= 1u << i;
160 }
161 }
162
163 ctx->emit_declaration(ctx, decl);
164 }
165
166
167 /**
168 * TGSI transform callback.
169 * Insert new declarations and instructions before first instruction.
170 */
171 static void
aa_transform_prolog(struct tgsi_transform_context * ctx)172 aa_transform_prolog(struct tgsi_transform_context *ctx)
173 {
174 /* emit our new declarations before the first instruction */
175 struct aa_transform_context *aactx = (struct aa_transform_context *) ctx;
176 struct tgsi_full_instruction newInst;
177 const int texInput = aactx->maxInput + 1;
178 int tmp0;
179 unsigned i;
180
181 /* find two free temp regs */
182 for (i = 0; i < 32; i++) {
183 if ((aactx->tempsUsed & (1u << i)) == 0) {
184 /* found a free temp */
185 if (aactx->tmp0 < 0)
186 aactx->tmp0 = i;
187 else if (aactx->colorTemp < 0)
188 aactx->colorTemp = i;
189 else
190 break;
191 }
192 }
193
194 assert(aactx->colorTemp != aactx->tmp0);
195
196 tmp0 = aactx->tmp0;
197
198 /* declare new generic input/texcoord */
199 tgsi_transform_input_decl(ctx, texInput,
200 TGSI_SEMANTIC_GENERIC, aactx->maxGeneric + 1,
201 TGSI_INTERPOLATE_LINEAR);
202
203 /* declare new temp regs */
204 tgsi_transform_temp_decl(ctx, tmp0);
205 tgsi_transform_temp_decl(ctx, aactx->colorTemp);
206
207 /*
208 * Emit code to compute fragment coverage, kill if outside point radius
209 *
210 * Temp reg0 usage:
211 * t0.x = distance of fragment from center point
212 * t0.y = boolean, is t0.x > 1.0, also misc temp usage
213 * t0.z = temporary for computing 1/(1-k) value
214 * t0.w = final coverage value
215 */
216
217 /* MUL t0.xy, tex, tex; # compute x^2, y^2 */
218 tgsi_transform_op2_inst(ctx, TGSI_OPCODE_MUL,
219 TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_XY,
220 TGSI_FILE_INPUT, texInput,
221 TGSI_FILE_INPUT, texInput, false);
222
223 /* ADD t0.x, t0.x, t0.y; # x^2 + y^2 */
224 tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_ADD,
225 TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_X,
226 TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_X,
227 TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_Y, false);
228
229 #if NORMALIZE /* OPTIONAL normalization of length */
230 /* RSQ t0.x, t0.x; */
231 tgsi_transform_op1_inst(ctx, TGSI_OPCODE_RSQ,
232 TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_X,
233 TGSI_FILE_TEMPORARY, tmp0);
234
235 /* RCP t0.x, t0.x; */
236 tgsi_transform_op1_inst(ctx, TGSI_OPCODE_RCP,
237 TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_X,
238 TGSI_FILE_TEMPORARY, tmp0);
239 #endif
240
241 /* SGT t0.y, t0.xxxx, tex.wwww; # bool b = d > 1 (NOTE tex.w == 1) */
242 tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_SGT,
243 TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_Y,
244 TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_X,
245 TGSI_FILE_INPUT, texInput, TGSI_SWIZZLE_W, false);
246
247 /* KILL_IF -tmp0.yyyy; # if -tmp0.y < 0, KILL */
248 tgsi_transform_kill_inst(ctx, TGSI_FILE_TEMPORARY, tmp0,
249 TGSI_SWIZZLE_Y, true);
250
251 /* compute coverage factor = (1-d)/(1-k) */
252
253 /* SUB t0.z, tex.w, tex.z; # m = 1 - k */
254 tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_ADD,
255 TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_Z,
256 TGSI_FILE_INPUT, texInput, TGSI_SWIZZLE_W,
257 TGSI_FILE_INPUT, texInput, TGSI_SWIZZLE_Z, true);
258
259 /* RCP t0.z, t0.z; # t0.z = 1 / m */
260 newInst = tgsi_default_full_instruction();
261 newInst.Instruction.Opcode = TGSI_OPCODE_RCP;
262 newInst.Instruction.NumDstRegs = 1;
263 newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
264 newInst.Dst[0].Register.Index = tmp0;
265 newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_Z;
266 newInst.Instruction.NumSrcRegs = 1;
267 newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY;
268 newInst.Src[0].Register.Index = tmp0;
269 newInst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_Z;
270 ctx->emit_instruction(ctx, &newInst);
271
272 /* SUB t0.y, 1, t0.x; # d = 1 - d */
273 tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_ADD,
274 TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_Y,
275 TGSI_FILE_INPUT, texInput, TGSI_SWIZZLE_W,
276 TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_X, true);
277
278 /* MUL t0.w, t0.y, t0.z; # coverage = d * m */
279 tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_MUL,
280 TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_W,
281 TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_Y,
282 TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_Z, false);
283
284 /* SLE t0.y, t0.x, tex.z; # bool b = distance <= k */
285 tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_SLE,
286 TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_Y,
287 TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_X,
288 TGSI_FILE_INPUT, texInput, TGSI_SWIZZLE_Z, false);
289
290 /* CMP t0.w, -t0.y, tex.w, t0.w;
291 * # if -t0.y < 0 then
292 * t0.w = 1
293 * else
294 * t0.w = t0.w
295 */
296 tgsi_transform_op3_swz_inst(ctx, TGSI_OPCODE_CMP,
297 TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_W,
298 TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_Y, 1,
299 TGSI_FILE_INPUT, texInput, TGSI_SWIZZLE_W,
300 TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_W);
301 }
302
303
304 /**
305 * TGSI transform callback.
306 * Insert new instructions before the END instruction.
307 */
308 static void
aa_transform_epilog(struct tgsi_transform_context * ctx)309 aa_transform_epilog(struct tgsi_transform_context *ctx)
310 {
311 struct aa_transform_context *aactx = (struct aa_transform_context *) ctx;
312
313 /* add alpha modulation code at tail of program */
314
315 /* MOV result.color.xyz, colorTemp; */
316 tgsi_transform_op1_inst(ctx, TGSI_OPCODE_MOV,
317 TGSI_FILE_OUTPUT, aactx->colorOutput,
318 TGSI_WRITEMASK_XYZ,
319 TGSI_FILE_TEMPORARY, aactx->colorTemp);
320
321 /* MUL result.color.w, colorTemp, tmp0.w; */
322 tgsi_transform_op2_inst(ctx, TGSI_OPCODE_MUL,
323 TGSI_FILE_OUTPUT, aactx->colorOutput,
324 TGSI_WRITEMASK_W,
325 TGSI_FILE_TEMPORARY, aactx->colorTemp,
326 TGSI_FILE_TEMPORARY, aactx->tmp0, false);
327 }
328
329
330 /**
331 * TGSI transform callback.
332 * Called per instruction.
333 * Replace writes to result.color w/ a temp reg.
334 */
335 static void
aa_transform_inst(struct tgsi_transform_context * ctx,struct tgsi_full_instruction * inst)336 aa_transform_inst(struct tgsi_transform_context *ctx,
337 struct tgsi_full_instruction *inst)
338 {
339 struct aa_transform_context *aactx = (struct aa_transform_context *) ctx;
340 unsigned i;
341
342 /* Not an END instruction.
343 * Look for writes to result.color and replace with colorTemp reg.
344 */
345 for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
346 struct tgsi_full_dst_register *dst = &inst->Dst[i];
347 if (dst->Register.File == TGSI_FILE_OUTPUT &&
348 dst->Register.Index == aactx->colorOutput) {
349 dst->Register.File = TGSI_FILE_TEMPORARY;
350 dst->Register.Index = aactx->colorTemp;
351 }
352 }
353
354 ctx->emit_instruction(ctx, inst);
355 }
356
357
358 /**
359 * Generate the frag shader we'll use for drawing AA points.
360 * This will be the user's shader plus some texture/modulate instructions.
361 */
362 static bool
generate_aapoint_fs(struct aapoint_stage * aapoint)363 generate_aapoint_fs(struct aapoint_stage *aapoint)
364 {
365 const struct pipe_shader_state *orig_fs = &aapoint->fs->state;
366 struct pipe_shader_state aapoint_fs;
367 struct aa_transform_context transform;
368 const unsigned newLen = tgsi_num_tokens(orig_fs->tokens) + NUM_NEW_TOKENS;
369 struct pipe_context *pipe = aapoint->stage.draw->pipe;
370
371 aapoint_fs = *orig_fs; /* copy to init */
372
373 assert(aapoint_fs.type == PIPE_SHADER_IR_TGSI);
374
375 memset(&transform, 0, sizeof(transform));
376 transform.colorOutput = -1;
377 transform.maxInput = -1;
378 transform.maxGeneric = -1;
379 transform.colorTemp = -1;
380 transform.tmp0 = -1;
381 transform.base.prolog = aa_transform_prolog;
382 transform.base.epilog = aa_transform_epilog;
383 transform.base.transform_instruction = aa_transform_inst;
384 transform.base.transform_declaration = aa_transform_decl;
385
386 aapoint_fs.tokens = tgsi_transform_shader(orig_fs->tokens, newLen, &transform.base);
387 if (!aapoint_fs.tokens)
388 return false;
389
390 #if 0 /* debug */
391 debug_printf("draw_aapoint, orig shader:\n");
392 tgsi_dump(orig_fs->tokens, 0);
393 debug_printf("draw_aapoint, new shader:\n");
394 tgsi_dump(aapoint_fs.tokens, 0);
395 #endif
396
397 aapoint->fs->aapoint_fs
398 = aapoint->driver_create_fs_state(pipe, &aapoint_fs);
399 if (aapoint->fs->aapoint_fs == NULL)
400 goto fail;
401
402 aapoint->fs->generic_attrib = transform.maxGeneric + 1;
403 FREE((void *)aapoint_fs.tokens);
404 return true;
405
406 fail:
407 FREE((void *)aapoint_fs.tokens);
408 return false;
409 }
410
411
412 static bool
generate_aapoint_fs_nir(struct aapoint_stage * aapoint)413 generate_aapoint_fs_nir(struct aapoint_stage *aapoint)
414 {
415 struct pipe_context *pipe = aapoint->stage.draw->pipe;
416 const struct pipe_shader_state *orig_fs = &aapoint->fs->state;
417 struct pipe_shader_state aapoint_fs;
418
419 aapoint_fs = *orig_fs; /* copy to init */
420 aapoint_fs.ir.nir = nir_shader_clone(NULL, orig_fs->ir.nir);
421 if (!aapoint_fs.ir.nir)
422 return false;
423
424 nir_lower_aapoint_fs(aapoint_fs.ir.nir, &aapoint->fs->generic_attrib, aapoint->bool_type);
425 aapoint->fs->aapoint_fs = aapoint->driver_create_fs_state(pipe, &aapoint_fs);
426 if (aapoint->fs->aapoint_fs == NULL)
427 goto fail;
428
429 return true;
430
431 fail:
432 return false;
433 }
434
435
436 /**
437 * When we're about to draw our first AA point in a batch, this function is
438 * called to tell the driver to bind our modified fragment shader.
439 */
440 static bool
bind_aapoint_fragment_shader(struct aapoint_stage * aapoint)441 bind_aapoint_fragment_shader(struct aapoint_stage *aapoint)
442 {
443 struct draw_context *draw = aapoint->stage.draw;
444 struct pipe_context *pipe = draw->pipe;
445
446 if (!aapoint->fs->aapoint_fs) {
447 if (aapoint->fs->state.type == PIPE_SHADER_IR_NIR) {
448 if (!generate_aapoint_fs_nir(aapoint))
449 return false;
450 } else if (!generate_aapoint_fs(aapoint))
451 return false;
452 }
453
454 draw->suspend_flushing = true;
455 aapoint->driver_bind_fs_state(pipe, aapoint->fs->aapoint_fs);
456 draw->suspend_flushing = false;
457
458 return true;
459 }
460
461
462 static inline struct aapoint_stage *
aapoint_stage(struct draw_stage * stage)463 aapoint_stage(struct draw_stage *stage)
464 {
465 return (struct aapoint_stage *) stage;
466 }
467
468
469 /**
470 * Draw an AA point by drawing a quad.
471 */
472 static void
aapoint_point(struct draw_stage * stage,struct prim_header * header)473 aapoint_point(struct draw_stage *stage, struct prim_header *header)
474 {
475 const struct aapoint_stage *aapoint = aapoint_stage(stage);
476 struct prim_header tri;
477 struct vertex_header *v[4];
478 const unsigned tex_slot = aapoint->tex_slot;
479 const unsigned pos_slot = aapoint->pos_slot;
480 float radius, *pos, *tex;
481 float k;
482
483 if (aapoint->psize_slot >= 0) {
484 radius = 0.5f * header->v[0]->data[aapoint->psize_slot][0];
485 }
486 else {
487 radius = aapoint->radius;
488 }
489
490 /*
491 * Note: the texcoords (generic attrib, really) we use are special:
492 * The S and T components simply vary from -1 to +1.
493 * The R component is k, below.
494 * The Q component is 1.0 and will used as a handy constant in the
495 * fragment shader.
496 */
497
498 /*
499 * k is the threshold distance from the point's center at which
500 * we begin alpha attenuation (the coverage value).
501 * Operating within a unit circle, we'll compute the fragment's
502 * distance 'd' from the center point using the texcoords.
503 * IF d > 1.0 THEN
504 * KILL fragment
505 * ELSE IF d > k THEN
506 * compute coverage in [0,1] proportional to d in [k, 1].
507 * ELSE
508 * coverage = 1.0; // full coverage
509 * ENDIF
510 *
511 * Note: the ELSEIF and ELSE clauses are actually implemented with CMP to
512 * avoid using IF/ELSE/ENDIF TGSI opcodes.
513 */
514
515 #if !NORMALIZE
516 k = 1.0f / radius;
517 k = 1.0f - 2.0f * k + k * k;
518 #else
519 k = 1.0f - 1.0f / radius;
520 #endif
521
522 /* allocate/dup new verts */
523 for (unsigned i = 0; i < 4; i++) {
524 v[i] = dup_vert(stage, header->v[0], i);
525 }
526
527 /* new verts */
528 pos = v[0]->data[pos_slot];
529 pos[0] -= radius;
530 pos[1] -= radius;
531
532 pos = v[1]->data[pos_slot];
533 pos[0] += radius;
534 pos[1] -= radius;
535
536 pos = v[2]->data[pos_slot];
537 pos[0] += radius;
538 pos[1] += radius;
539
540 pos = v[3]->data[pos_slot];
541 pos[0] -= radius;
542 pos[1] += radius;
543
544 /* new texcoords */
545 tex = v[0]->data[tex_slot];
546 ASSIGN_4V(tex, -1, -1, k, 1);
547
548 tex = v[1]->data[tex_slot];
549 ASSIGN_4V(tex, 1, -1, k, 1);
550
551 tex = v[2]->data[tex_slot];
552 ASSIGN_4V(tex, 1, 1, k, 1);
553
554 tex = v[3]->data[tex_slot];
555 ASSIGN_4V(tex, -1, 1, k, 1);
556
557 /* emit 2 tris for the quad strip */
558 tri.v[0] = v[0];
559 tri.v[1] = v[1];
560 tri.v[2] = v[2];
561 stage->next->tri(stage->next, &tri);
562
563 tri.v[0] = v[0];
564 tri.v[1] = v[2];
565 tri.v[2] = v[3];
566 stage->next->tri(stage->next, &tri);
567 }
568
569
570 static void
aapoint_first_point(struct draw_stage * stage,struct prim_header * header)571 aapoint_first_point(struct draw_stage *stage, struct prim_header *header)
572 {
573 auto struct aapoint_stage *aapoint = aapoint_stage(stage);
574 struct draw_context *draw = stage->draw;
575 struct pipe_context *pipe = draw->pipe;
576 const struct pipe_rasterizer_state *rast = draw->rasterizer;
577 void *r;
578
579 assert(draw->rasterizer->point_smooth && !draw->rasterizer->multisample);
580
581 if (draw->rasterizer->point_size <= 2.0)
582 aapoint->radius = 1.0;
583 else
584 aapoint->radius = 0.5f * draw->rasterizer->point_size;
585
586 /*
587 * Bind (generate) our fragprog.
588 */
589 bind_aapoint_fragment_shader(aapoint);
590
591 draw_aapoint_prepare_outputs(draw, draw->pipeline.aapoint);
592
593 draw->suspend_flushing = true;
594
595 /* Disable triangle culling, stippling, unfilled mode etc. */
596 r = draw_get_rasterizer_no_cull(draw, rast);
597 pipe->bind_rasterizer_state(pipe, r);
598
599 draw->suspend_flushing = false;
600
601 /* now really draw first point */
602 stage->point = aapoint_point;
603 stage->point(stage, header);
604 }
605
606
607 static void
aapoint_flush(struct draw_stage * stage,unsigned flags)608 aapoint_flush(struct draw_stage *stage, unsigned flags)
609 {
610 struct draw_context *draw = stage->draw;
611 struct aapoint_stage *aapoint = aapoint_stage(stage);
612 struct pipe_context *pipe = draw->pipe;
613
614 stage->point = aapoint_first_point;
615 stage->next->flush(stage->next, flags);
616
617 /* restore original frag shader */
618 draw->suspend_flushing = true;
619 aapoint->driver_bind_fs_state(pipe, aapoint->fs ? aapoint->fs->driver_fs : NULL);
620
621 /* restore original rasterizer state */
622 if (draw->rast_handle) {
623 pipe->bind_rasterizer_state(pipe, draw->rast_handle);
624 }
625
626 draw->suspend_flushing = false;
627
628 draw_remove_extra_vertex_attribs(draw);
629 }
630
631
632 static void
aapoint_reset_stipple_counter(struct draw_stage * stage)633 aapoint_reset_stipple_counter(struct draw_stage *stage)
634 {
635 stage->next->reset_stipple_counter(stage->next);
636 }
637
638
639 static void
aapoint_destroy(struct draw_stage * stage)640 aapoint_destroy(struct draw_stage *stage)
641 {
642 struct aapoint_stage* aapoint = aapoint_stage(stage);
643 struct pipe_context *pipe = stage->draw->pipe;
644
645 draw_free_temp_verts(stage);
646
647 /* restore the old entry points */
648 pipe->create_fs_state = aapoint->driver_create_fs_state;
649 pipe->bind_fs_state = aapoint->driver_bind_fs_state;
650 pipe->delete_fs_state = aapoint->driver_delete_fs_state;
651
652 FREE(stage);
653 }
654
655
656 void
draw_aapoint_prepare_outputs(struct draw_context * draw,struct draw_stage * stage)657 draw_aapoint_prepare_outputs(struct draw_context *draw,
658 struct draw_stage *stage)
659 {
660 struct aapoint_stage *aapoint = aapoint_stage(stage);
661 const struct pipe_rasterizer_state *rast = draw->rasterizer;
662
663 /* update vertex attrib info */
664 aapoint->pos_slot = draw_current_shader_position_output(draw);
665
666 if (!rast->point_smooth || rast->multisample)
667 return;
668
669 if (aapoint->fs && aapoint->fs->aapoint_fs) {
670 /* allocate the extra post-transformed vertex attribute */
671 aapoint->tex_slot = draw_alloc_extra_vertex_attrib(draw,
672 TGSI_SEMANTIC_GENERIC,
673 aapoint->fs->generic_attrib);
674 assert(aapoint->tex_slot > 0); /* output[0] is vertex pos */
675 } else {
676 aapoint->tex_slot = -1;
677 }
678
679 /* find psize slot in post-transform vertex */
680 aapoint->psize_slot = -1;
681 if (draw->rasterizer->point_size_per_vertex) {
682 const struct tgsi_shader_info *info = draw_get_shader_info(draw);
683 /* find PSIZ vertex output */
684 for (unsigned i = 0; i < info->num_outputs; i++) {
685 if (info->output_semantic_name[i] == TGSI_SEMANTIC_PSIZE) {
686 aapoint->psize_slot = i;
687 break;
688 }
689 }
690 }
691 }
692
693
694 static struct aapoint_stage *
draw_aapoint_stage(struct draw_context * draw,nir_alu_type bool_type)695 draw_aapoint_stage(struct draw_context *draw, nir_alu_type bool_type)
696 {
697 struct aapoint_stage *aapoint = CALLOC_STRUCT(aapoint_stage);
698 if (!aapoint)
699 goto fail;
700
701 aapoint->stage.draw = draw;
702 aapoint->stage.name = "aapoint";
703 aapoint->stage.next = NULL;
704 aapoint->stage.point = aapoint_first_point;
705 aapoint->stage.line = draw_pipe_passthrough_line;
706 aapoint->stage.tri = draw_pipe_passthrough_tri;
707 aapoint->stage.flush = aapoint_flush;
708 aapoint->stage.reset_stipple_counter = aapoint_reset_stipple_counter;
709 aapoint->stage.destroy = aapoint_destroy;
710 aapoint->bool_type = bool_type;
711
712 if (!draw_alloc_temp_verts(&aapoint->stage, 4))
713 goto fail;
714
715 return aapoint;
716
717 fail:
718 if (aapoint)
719 aapoint->stage.destroy(&aapoint->stage);
720
721 return NULL;
722
723 }
724
725
726 static struct aapoint_stage *
aapoint_stage_from_pipe(struct pipe_context * pipe)727 aapoint_stage_from_pipe(struct pipe_context *pipe)
728 {
729 struct draw_context *draw = (struct draw_context *) pipe->draw;
730 return aapoint_stage(draw->pipeline.aapoint);
731 }
732
733
734 /**
735 * This function overrides the driver's create_fs_state() function and
736 * will typically be called by the gallium frontend.
737 */
738 static void *
aapoint_create_fs_state(struct pipe_context * pipe,const struct pipe_shader_state * fs)739 aapoint_create_fs_state(struct pipe_context *pipe,
740 const struct pipe_shader_state *fs)
741 {
742 struct aapoint_stage *aapoint = aapoint_stage_from_pipe(pipe);
743 struct aapoint_fragment_shader *aafs = CALLOC_STRUCT(aapoint_fragment_shader);
744 if (!aafs)
745 return NULL;
746
747 aafs->state.type = fs->type;
748 if (fs->type == PIPE_SHADER_IR_TGSI)
749 aafs->state.tokens = tgsi_dup_tokens(fs->tokens);
750 else
751 aafs->state.ir.nir = nir_shader_clone(NULL, fs->ir.nir);
752 /* pass-through */
753 aafs->driver_fs = aapoint->driver_create_fs_state(pipe, fs);
754
755 return aafs;
756 }
757
758
759 static void
aapoint_bind_fs_state(struct pipe_context * pipe,void * fs)760 aapoint_bind_fs_state(struct pipe_context *pipe, void *fs)
761 {
762 struct aapoint_stage *aapoint = aapoint_stage_from_pipe(pipe);
763 struct aapoint_fragment_shader *aafs = (struct aapoint_fragment_shader *) fs;
764 /* save current */
765 aapoint->fs = aafs;
766 /* pass-through */
767 aapoint->driver_bind_fs_state(pipe,
768 (aafs ? aafs->driver_fs : NULL));
769 }
770
771
772 static void
aapoint_delete_fs_state(struct pipe_context * pipe,void * fs)773 aapoint_delete_fs_state(struct pipe_context *pipe, void *fs)
774 {
775 struct aapoint_stage *aapoint = aapoint_stage_from_pipe(pipe);
776 struct aapoint_fragment_shader *aafs = (struct aapoint_fragment_shader *) fs;
777
778 /* pass-through */
779 aapoint->driver_delete_fs_state(pipe, aafs->driver_fs);
780
781 if (aafs->aapoint_fs)
782 aapoint->driver_delete_fs_state(pipe, aafs->aapoint_fs);
783
784 if (aafs->state.type == PIPE_SHADER_IR_TGSI)
785 FREE((void*)aafs->state.tokens);
786 else
787 ralloc_free(aafs->state.ir.nir);
788
789 FREE(aafs);
790 }
791
792
793 /**
794 * Called by drivers that want to install this AA point prim stage
795 * into the draw module's pipeline. This will not be used if the
796 * hardware has native support for AA points.
797 */
798 bool
draw_install_aapoint_stage(struct draw_context * draw,struct pipe_context * pipe,nir_alu_type bool_type)799 draw_install_aapoint_stage(struct draw_context *draw,
800 struct pipe_context *pipe,
801 nir_alu_type bool_type)
802 {
803 struct aapoint_stage *aapoint;
804
805 pipe->draw = (void *) draw;
806
807 /*
808 * Create / install AA point drawing / prim stage
809 */
810 aapoint = draw_aapoint_stage(draw, bool_type);
811 if (!aapoint)
812 return false;
813
814 /* save original driver functions */
815 aapoint->driver_create_fs_state = pipe->create_fs_state;
816 aapoint->driver_bind_fs_state = pipe->bind_fs_state;
817 aapoint->driver_delete_fs_state = pipe->delete_fs_state;
818
819 /* override the driver's functions */
820 pipe->create_fs_state = aapoint_create_fs_state;
821 pipe->bind_fs_state = aapoint_bind_fs_state;
822 pipe->delete_fs_state = aapoint_delete_fs_state;
823
824 draw->pipeline.aapoint = &aapoint->stage;
825
826 return true;
827 }
828