xref: /aosp_15_r20/external/mesa3d/src/gallium/auxiliary/draw/draw_pipe_aapoint.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /**************************************************************************
2  *
3  * Copyright 2008 VMware, Inc.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sub license, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the
15  * next paragraph) shall be included in all copies or substantial portions
16  * of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25  *
26  **************************************************************************/
27 
28 /**
29  * AA point stage:  AA points are converted to quads and rendered with a
30  * special fragment shader.  Another approach would be to use a texture
31  * map image of a point, but experiments indicate the quality isn't nearly
32  * as good as this approach.
33  *
34  * Note: this looks a lot like draw_aaline.c but there's actually little
35  * if any code that can be shared.
36  *
37  * Authors:  Brian Paul
38  */
39 
40 
41 #include "pipe/p_context.h"
42 #include "pipe/p_defines.h"
43 #include "pipe/p_shader_tokens.h"
44 
45 #include "tgsi/tgsi_transform.h"
46 #include "tgsi/tgsi_dump.h"
47 
48 #include "util/u_math.h"
49 #include "util/u_memory.h"
50 
51 #include "draw_context.h"
52 #include "draw_vs.h"
53 #include "draw_pipe.h"
54 
55 #include "nir.h"
56 #include "nir/nir_draw_helpers.h"
57 
58 /** Approx number of new tokens for instructions in aa_transform_inst() */
59 #define NUM_NEW_TOKENS 200
60 
61 
62 /*
63  * Enabling NORMALIZE might give _slightly_ better results.
64  * Basically, it controls whether we compute distance as d=sqrt(x*x+y*y) or
65  * d=x*x+y*y.  Since we're working with a unit circle, the later seems
66  * close enough and saves some costly instructions.
67  */
68 #define NORMALIZE 0
69 
70 
71 /**
72  * Subclass of pipe_shader_state to carry extra fragment shader info.
73  */
74 struct aapoint_fragment_shader
75 {
76    struct pipe_shader_state state;
77    void *driver_fs;   /**< the regular shader */
78    void *aapoint_fs;  /**< the aa point-augmented shader */
79    int generic_attrib; /**< The generic input attrib/texcoord we'll use */
80 };
81 
82 
83 /**
84  * Subclass of draw_stage
85  */
86 struct aapoint_stage
87 {
88    struct draw_stage stage;
89 
90    /** half of pipe_rasterizer_state::point_size */
91    float radius;
92 
93    /** vertex attrib slot containing point size */
94    int psize_slot;
95 
96    /** this is the vertex attrib slot for the new texcoords */
97    unsigned tex_slot;
98 
99    /** vertex attrib slot containing position */
100    unsigned pos_slot;
101 
102    /** Type of Boolean variables on this hardware. */
103    nir_alu_type bool_type;
104 
105    /** Currently bound fragment shader */
106    struct aapoint_fragment_shader *fs;
107 
108    /*
109     * Driver interface/override functions
110     */
111    void * (*driver_create_fs_state)(struct pipe_context *,
112                                     const struct pipe_shader_state *);
113    void (*driver_bind_fs_state)(struct pipe_context *, void *);
114    void (*driver_delete_fs_state)(struct pipe_context *, void *);
115 };
116 
117 
118 
119 /**
120  * Subclass of tgsi_transform_context, used for transforming the
121  * user's fragment shader to add the special AA instructions.
122  */
123 struct aa_transform_context {
124    struct tgsi_transform_context base;
125    uint32_t tempsUsed;  /**< bitmask */
126    int colorOutput; /**< which output is the primary color */
127    int maxInput, maxGeneric;  /**< max input index found */
128    int tmp0, colorTemp;  /**< temp registers */
129 };
130 
131 
132 /**
133  * TGSI declaration transform callback.
134  * Look for two free temp regs and available input reg for new texcoords.
135  */
136 static void
aa_transform_decl(struct tgsi_transform_context * ctx,struct tgsi_full_declaration * decl)137 aa_transform_decl(struct tgsi_transform_context *ctx,
138                   struct tgsi_full_declaration *decl)
139 {
140    struct aa_transform_context *aactx = (struct aa_transform_context *) ctx;
141 
142    if (decl->Declaration.File == TGSI_FILE_OUTPUT &&
143        decl->Semantic.Name == TGSI_SEMANTIC_COLOR &&
144        decl->Semantic.Index == 0) {
145       aactx->colorOutput = decl->Range.First;
146    }
147    else if (decl->Declaration.File == TGSI_FILE_INPUT) {
148       if ((int) decl->Range.Last > aactx->maxInput)
149          aactx->maxInput = decl->Range.Last;
150       if (decl->Semantic.Name == TGSI_SEMANTIC_GENERIC &&
151            (int) decl->Semantic.Index > aactx->maxGeneric) {
152          aactx->maxGeneric = decl->Semantic.Index;
153       }
154    }
155    else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) {
156       unsigned i;
157       for (i = decl->Range.First;
158            i <= decl->Range.Last; i++) {
159          aactx->tempsUsed |= 1u << i;
160       }
161    }
162 
163    ctx->emit_declaration(ctx, decl);
164 }
165 
166 
167 /**
168  * TGSI transform callback.
169  * Insert new declarations and instructions before first instruction.
170  */
171 static void
aa_transform_prolog(struct tgsi_transform_context * ctx)172 aa_transform_prolog(struct tgsi_transform_context *ctx)
173 {
174    /* emit our new declarations before the first instruction */
175    struct aa_transform_context *aactx = (struct aa_transform_context *) ctx;
176    struct tgsi_full_instruction newInst;
177    const int texInput = aactx->maxInput + 1;
178    int tmp0;
179    unsigned i;
180 
181    /* find two free temp regs */
182    for (i = 0; i < 32; i++) {
183       if ((aactx->tempsUsed & (1u << i)) == 0) {
184          /* found a free temp */
185          if (aactx->tmp0 < 0)
186             aactx->tmp0 = i;
187          else if (aactx->colorTemp < 0)
188             aactx->colorTemp = i;
189          else
190             break;
191       }
192    }
193 
194    assert(aactx->colorTemp != aactx->tmp0);
195 
196    tmp0 = aactx->tmp0;
197 
198    /* declare new generic input/texcoord */
199    tgsi_transform_input_decl(ctx, texInput,
200                              TGSI_SEMANTIC_GENERIC, aactx->maxGeneric + 1,
201                              TGSI_INTERPOLATE_LINEAR);
202 
203    /* declare new temp regs */
204    tgsi_transform_temp_decl(ctx, tmp0);
205    tgsi_transform_temp_decl(ctx, aactx->colorTemp);
206 
207    /*
208     * Emit code to compute fragment coverage, kill if outside point radius
209     *
210     * Temp reg0 usage:
211     *  t0.x = distance of fragment from center point
212     *  t0.y = boolean, is t0.x > 1.0, also misc temp usage
213     *  t0.z = temporary for computing 1/(1-k) value
214     *  t0.w = final coverage value
215     */
216 
217    /* MUL t0.xy, tex, tex;  # compute x^2, y^2 */
218    tgsi_transform_op2_inst(ctx, TGSI_OPCODE_MUL,
219                            TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_XY,
220                            TGSI_FILE_INPUT, texInput,
221                            TGSI_FILE_INPUT, texInput, false);
222 
223    /* ADD t0.x, t0.x, t0.y;  # x^2 + y^2 */
224    tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_ADD,
225                                TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_X,
226                                TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_X,
227                                TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_Y, false);
228 
229 #if NORMALIZE  /* OPTIONAL normalization of length */
230    /* RSQ t0.x, t0.x; */
231    tgsi_transform_op1_inst(ctx, TGSI_OPCODE_RSQ,
232                            TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_X,
233                            TGSI_FILE_TEMPORARY, tmp0);
234 
235    /* RCP t0.x, t0.x; */
236    tgsi_transform_op1_inst(ctx, TGSI_OPCODE_RCP,
237                            TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_X,
238                            TGSI_FILE_TEMPORARY, tmp0);
239 #endif
240 
241    /* SGT t0.y, t0.xxxx, tex.wwww;  # bool b = d > 1 (NOTE tex.w == 1) */
242    tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_SGT,
243                                TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_Y,
244                                TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_X,
245                                TGSI_FILE_INPUT, texInput, TGSI_SWIZZLE_W, false);
246 
247    /* KILL_IF -tmp0.yyyy;   # if -tmp0.y < 0, KILL */
248    tgsi_transform_kill_inst(ctx, TGSI_FILE_TEMPORARY, tmp0,
249                             TGSI_SWIZZLE_Y, true);
250 
251    /* compute coverage factor = (1-d)/(1-k) */
252 
253    /* SUB t0.z, tex.w, tex.z;  # m = 1 - k */
254    tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_ADD,
255                                TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_Z,
256                                TGSI_FILE_INPUT, texInput, TGSI_SWIZZLE_W,
257                                TGSI_FILE_INPUT, texInput, TGSI_SWIZZLE_Z, true);
258 
259    /* RCP t0.z, t0.z;  # t0.z = 1 / m */
260    newInst = tgsi_default_full_instruction();
261    newInst.Instruction.Opcode = TGSI_OPCODE_RCP;
262    newInst.Instruction.NumDstRegs = 1;
263    newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
264    newInst.Dst[0].Register.Index = tmp0;
265    newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_Z;
266    newInst.Instruction.NumSrcRegs = 1;
267    newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY;
268    newInst.Src[0].Register.Index = tmp0;
269    newInst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_Z;
270    ctx->emit_instruction(ctx, &newInst);
271 
272    /* SUB t0.y, 1, t0.x;  # d = 1 - d */
273    tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_ADD,
274                                TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_Y,
275                                TGSI_FILE_INPUT, texInput, TGSI_SWIZZLE_W,
276                                TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_X, true);
277 
278    /* MUL t0.w, t0.y, t0.z;   # coverage = d * m */
279    tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_MUL,
280                                TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_W,
281                                TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_Y,
282                                TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_Z, false);
283 
284    /* SLE t0.y, t0.x, tex.z;  # bool b = distance <= k */
285    tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_SLE,
286                                TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_Y,
287                                TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_X,
288                                TGSI_FILE_INPUT, texInput, TGSI_SWIZZLE_Z, false);
289 
290    /* CMP t0.w, -t0.y, tex.w, t0.w;
291     *  # if -t0.y < 0 then
292     *       t0.w = 1
293     *    else
294     *       t0.w = t0.w
295     */
296    tgsi_transform_op3_swz_inst(ctx, TGSI_OPCODE_CMP,
297                                TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_W,
298                                TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_Y, 1,
299                                TGSI_FILE_INPUT, texInput, TGSI_SWIZZLE_W,
300                                TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_W);
301 }
302 
303 
304 /**
305  * TGSI transform callback.
306  * Insert new instructions before the END instruction.
307  */
308 static void
aa_transform_epilog(struct tgsi_transform_context * ctx)309 aa_transform_epilog(struct tgsi_transform_context *ctx)
310 {
311    struct aa_transform_context *aactx = (struct aa_transform_context *) ctx;
312 
313    /* add alpha modulation code at tail of program */
314 
315    /* MOV result.color.xyz, colorTemp; */
316    tgsi_transform_op1_inst(ctx, TGSI_OPCODE_MOV,
317                            TGSI_FILE_OUTPUT, aactx->colorOutput,
318                            TGSI_WRITEMASK_XYZ,
319                            TGSI_FILE_TEMPORARY, aactx->colorTemp);
320 
321    /* MUL result.color.w, colorTemp, tmp0.w; */
322    tgsi_transform_op2_inst(ctx, TGSI_OPCODE_MUL,
323                            TGSI_FILE_OUTPUT, aactx->colorOutput,
324                            TGSI_WRITEMASK_W,
325                            TGSI_FILE_TEMPORARY, aactx->colorTemp,
326                            TGSI_FILE_TEMPORARY, aactx->tmp0, false);
327 }
328 
329 
330 /**
331  * TGSI transform callback.
332  * Called per instruction.
333  * Replace writes to result.color w/ a temp reg.
334  */
335 static void
aa_transform_inst(struct tgsi_transform_context * ctx,struct tgsi_full_instruction * inst)336 aa_transform_inst(struct tgsi_transform_context *ctx,
337                   struct tgsi_full_instruction *inst)
338 {
339    struct aa_transform_context *aactx = (struct aa_transform_context *) ctx;
340    unsigned i;
341 
342    /* Not an END instruction.
343     * Look for writes to result.color and replace with colorTemp reg.
344     */
345    for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
346       struct tgsi_full_dst_register *dst = &inst->Dst[i];
347       if (dst->Register.File == TGSI_FILE_OUTPUT &&
348           dst->Register.Index == aactx->colorOutput) {
349          dst->Register.File = TGSI_FILE_TEMPORARY;
350          dst->Register.Index = aactx->colorTemp;
351       }
352    }
353 
354    ctx->emit_instruction(ctx, inst);
355 }
356 
357 
358 /**
359  * Generate the frag shader we'll use for drawing AA points.
360  * This will be the user's shader plus some texture/modulate instructions.
361  */
362 static bool
generate_aapoint_fs(struct aapoint_stage * aapoint)363 generate_aapoint_fs(struct aapoint_stage *aapoint)
364 {
365    const struct pipe_shader_state *orig_fs = &aapoint->fs->state;
366    struct pipe_shader_state aapoint_fs;
367    struct aa_transform_context transform;
368    const unsigned newLen = tgsi_num_tokens(orig_fs->tokens) + NUM_NEW_TOKENS;
369    struct pipe_context *pipe = aapoint->stage.draw->pipe;
370 
371    aapoint_fs = *orig_fs; /* copy to init */
372 
373    assert(aapoint_fs.type == PIPE_SHADER_IR_TGSI);
374 
375    memset(&transform, 0, sizeof(transform));
376    transform.colorOutput = -1;
377    transform.maxInput = -1;
378    transform.maxGeneric = -1;
379    transform.colorTemp = -1;
380    transform.tmp0 = -1;
381    transform.base.prolog = aa_transform_prolog;
382    transform.base.epilog = aa_transform_epilog;
383    transform.base.transform_instruction = aa_transform_inst;
384    transform.base.transform_declaration = aa_transform_decl;
385 
386    aapoint_fs.tokens = tgsi_transform_shader(orig_fs->tokens, newLen, &transform.base);
387    if (!aapoint_fs.tokens)
388       return false;
389 
390 #if 0 /* debug */
391    debug_printf("draw_aapoint, orig shader:\n");
392    tgsi_dump(orig_fs->tokens, 0);
393    debug_printf("draw_aapoint, new shader:\n");
394    tgsi_dump(aapoint_fs.tokens, 0);
395 #endif
396 
397    aapoint->fs->aapoint_fs
398       = aapoint->driver_create_fs_state(pipe, &aapoint_fs);
399    if (aapoint->fs->aapoint_fs == NULL)
400       goto fail;
401 
402    aapoint->fs->generic_attrib = transform.maxGeneric + 1;
403    FREE((void *)aapoint_fs.tokens);
404    return true;
405 
406 fail:
407    FREE((void *)aapoint_fs.tokens);
408    return false;
409 }
410 
411 
412 static bool
generate_aapoint_fs_nir(struct aapoint_stage * aapoint)413 generate_aapoint_fs_nir(struct aapoint_stage *aapoint)
414 {
415    struct pipe_context *pipe = aapoint->stage.draw->pipe;
416    const struct pipe_shader_state *orig_fs = &aapoint->fs->state;
417    struct pipe_shader_state aapoint_fs;
418 
419    aapoint_fs = *orig_fs; /* copy to init */
420    aapoint_fs.ir.nir = nir_shader_clone(NULL, orig_fs->ir.nir);
421    if (!aapoint_fs.ir.nir)
422       return false;
423 
424    nir_lower_aapoint_fs(aapoint_fs.ir.nir, &aapoint->fs->generic_attrib, aapoint->bool_type);
425    aapoint->fs->aapoint_fs = aapoint->driver_create_fs_state(pipe, &aapoint_fs);
426    if (aapoint->fs->aapoint_fs == NULL)
427       goto fail;
428 
429    return true;
430 
431 fail:
432    return false;
433 }
434 
435 
436 /**
437  * When we're about to draw our first AA point in a batch, this function is
438  * called to tell the driver to bind our modified fragment shader.
439  */
440 static bool
bind_aapoint_fragment_shader(struct aapoint_stage * aapoint)441 bind_aapoint_fragment_shader(struct aapoint_stage *aapoint)
442 {
443    struct draw_context *draw = aapoint->stage.draw;
444    struct pipe_context *pipe = draw->pipe;
445 
446    if (!aapoint->fs->aapoint_fs) {
447       if (aapoint->fs->state.type == PIPE_SHADER_IR_NIR) {
448          if (!generate_aapoint_fs_nir(aapoint))
449             return false;
450       } else if (!generate_aapoint_fs(aapoint))
451          return false;
452    }
453 
454    draw->suspend_flushing = true;
455    aapoint->driver_bind_fs_state(pipe, aapoint->fs->aapoint_fs);
456    draw->suspend_flushing = false;
457 
458    return true;
459 }
460 
461 
462 static inline struct aapoint_stage *
aapoint_stage(struct draw_stage * stage)463 aapoint_stage(struct draw_stage *stage)
464 {
465    return (struct aapoint_stage *) stage;
466 }
467 
468 
469 /**
470  * Draw an AA point by drawing a quad.
471  */
472 static void
aapoint_point(struct draw_stage * stage,struct prim_header * header)473 aapoint_point(struct draw_stage *stage, struct prim_header *header)
474 {
475    const struct aapoint_stage *aapoint = aapoint_stage(stage);
476    struct prim_header tri;
477    struct vertex_header *v[4];
478    const unsigned tex_slot = aapoint->tex_slot;
479    const unsigned pos_slot = aapoint->pos_slot;
480    float radius, *pos, *tex;
481    float k;
482 
483    if (aapoint->psize_slot >= 0) {
484       radius = 0.5f * header->v[0]->data[aapoint->psize_slot][0];
485    }
486    else {
487       radius = aapoint->radius;
488    }
489 
490    /*
491     * Note: the texcoords (generic attrib, really) we use are special:
492     * The S and T components simply vary from -1 to +1.
493     * The R component is k, below.
494     * The Q component is 1.0 and will used as a handy constant in the
495     * fragment shader.
496     */
497 
498    /*
499     * k is the threshold distance from the point's center at which
500     * we begin alpha attenuation (the coverage value).
501     * Operating within a unit circle, we'll compute the fragment's
502     * distance 'd' from the center point using the texcoords.
503     * IF d > 1.0 THEN
504     *    KILL fragment
505     * ELSE IF d > k THEN
506     *    compute coverage in [0,1] proportional to d in [k, 1].
507     * ELSE
508     *    coverage = 1.0;  // full coverage
509     * ENDIF
510     *
511     * Note: the ELSEIF and ELSE clauses are actually implemented with CMP to
512     * avoid using IF/ELSE/ENDIF TGSI opcodes.
513     */
514 
515 #if !NORMALIZE
516    k = 1.0f / radius;
517    k = 1.0f - 2.0f * k + k * k;
518 #else
519    k = 1.0f - 1.0f / radius;
520 #endif
521 
522    /* allocate/dup new verts */
523    for (unsigned i = 0; i < 4; i++) {
524       v[i] = dup_vert(stage, header->v[0], i);
525    }
526 
527    /* new verts */
528    pos = v[0]->data[pos_slot];
529    pos[0] -= radius;
530    pos[1] -= radius;
531 
532    pos = v[1]->data[pos_slot];
533    pos[0] += radius;
534    pos[1] -= radius;
535 
536    pos = v[2]->data[pos_slot];
537    pos[0] += radius;
538    pos[1] += radius;
539 
540    pos = v[3]->data[pos_slot];
541    pos[0] -= radius;
542    pos[1] += radius;
543 
544    /* new texcoords */
545    tex = v[0]->data[tex_slot];
546    ASSIGN_4V(tex, -1, -1, k, 1);
547 
548    tex = v[1]->data[tex_slot];
549    ASSIGN_4V(tex,  1, -1, k, 1);
550 
551    tex = v[2]->data[tex_slot];
552    ASSIGN_4V(tex,  1,  1, k, 1);
553 
554    tex = v[3]->data[tex_slot];
555    ASSIGN_4V(tex, -1,  1, k, 1);
556 
557    /* emit 2 tris for the quad strip */
558    tri.v[0] = v[0];
559    tri.v[1] = v[1];
560    tri.v[2] = v[2];
561    stage->next->tri(stage->next, &tri);
562 
563    tri.v[0] = v[0];
564    tri.v[1] = v[2];
565    tri.v[2] = v[3];
566    stage->next->tri(stage->next, &tri);
567 }
568 
569 
570 static void
aapoint_first_point(struct draw_stage * stage,struct prim_header * header)571 aapoint_first_point(struct draw_stage *stage, struct prim_header *header)
572 {
573    auto struct aapoint_stage *aapoint = aapoint_stage(stage);
574    struct draw_context *draw = stage->draw;
575    struct pipe_context *pipe = draw->pipe;
576    const struct pipe_rasterizer_state *rast = draw->rasterizer;
577    void *r;
578 
579    assert(draw->rasterizer->point_smooth && !draw->rasterizer->multisample);
580 
581    if (draw->rasterizer->point_size <= 2.0)
582       aapoint->radius = 1.0;
583    else
584       aapoint->radius = 0.5f * draw->rasterizer->point_size;
585 
586    /*
587     * Bind (generate) our fragprog.
588     */
589    bind_aapoint_fragment_shader(aapoint);
590 
591    draw_aapoint_prepare_outputs(draw, draw->pipeline.aapoint);
592 
593    draw->suspend_flushing = true;
594 
595    /* Disable triangle culling, stippling, unfilled mode etc. */
596    r = draw_get_rasterizer_no_cull(draw, rast);
597    pipe->bind_rasterizer_state(pipe, r);
598 
599    draw->suspend_flushing = false;
600 
601    /* now really draw first point */
602    stage->point = aapoint_point;
603    stage->point(stage, header);
604 }
605 
606 
607 static void
aapoint_flush(struct draw_stage * stage,unsigned flags)608 aapoint_flush(struct draw_stage *stage, unsigned flags)
609 {
610    struct draw_context *draw = stage->draw;
611    struct aapoint_stage *aapoint = aapoint_stage(stage);
612    struct pipe_context *pipe = draw->pipe;
613 
614    stage->point = aapoint_first_point;
615    stage->next->flush(stage->next, flags);
616 
617    /* restore original frag shader */
618    draw->suspend_flushing = true;
619    aapoint->driver_bind_fs_state(pipe, aapoint->fs ? aapoint->fs->driver_fs : NULL);
620 
621    /* restore original rasterizer state */
622    if (draw->rast_handle) {
623       pipe->bind_rasterizer_state(pipe, draw->rast_handle);
624    }
625 
626    draw->suspend_flushing = false;
627 
628    draw_remove_extra_vertex_attribs(draw);
629 }
630 
631 
632 static void
aapoint_reset_stipple_counter(struct draw_stage * stage)633 aapoint_reset_stipple_counter(struct draw_stage *stage)
634 {
635    stage->next->reset_stipple_counter(stage->next);
636 }
637 
638 
639 static void
aapoint_destroy(struct draw_stage * stage)640 aapoint_destroy(struct draw_stage *stage)
641 {
642    struct aapoint_stage* aapoint = aapoint_stage(stage);
643    struct pipe_context *pipe = stage->draw->pipe;
644 
645    draw_free_temp_verts(stage);
646 
647    /* restore the old entry points */
648    pipe->create_fs_state = aapoint->driver_create_fs_state;
649    pipe->bind_fs_state = aapoint->driver_bind_fs_state;
650    pipe->delete_fs_state = aapoint->driver_delete_fs_state;
651 
652    FREE(stage);
653 }
654 
655 
656 void
draw_aapoint_prepare_outputs(struct draw_context * draw,struct draw_stage * stage)657 draw_aapoint_prepare_outputs(struct draw_context *draw,
658                              struct draw_stage *stage)
659 {
660    struct aapoint_stage *aapoint = aapoint_stage(stage);
661    const struct pipe_rasterizer_state *rast = draw->rasterizer;
662 
663    /* update vertex attrib info */
664    aapoint->pos_slot = draw_current_shader_position_output(draw);
665 
666    if (!rast->point_smooth || rast->multisample)
667       return;
668 
669    if (aapoint->fs && aapoint->fs->aapoint_fs) {
670       /* allocate the extra post-transformed vertex attribute */
671       aapoint->tex_slot = draw_alloc_extra_vertex_attrib(draw,
672                                                          TGSI_SEMANTIC_GENERIC,
673                                                          aapoint->fs->generic_attrib);
674       assert(aapoint->tex_slot > 0); /* output[0] is vertex pos */
675    } else {
676       aapoint->tex_slot = -1;
677    }
678 
679    /* find psize slot in post-transform vertex */
680    aapoint->psize_slot = -1;
681    if (draw->rasterizer->point_size_per_vertex) {
682       const struct tgsi_shader_info *info = draw_get_shader_info(draw);
683       /* find PSIZ vertex output */
684       for (unsigned i = 0; i < info->num_outputs; i++) {
685          if (info->output_semantic_name[i] == TGSI_SEMANTIC_PSIZE) {
686             aapoint->psize_slot = i;
687             break;
688          }
689       }
690    }
691 }
692 
693 
694 static struct aapoint_stage *
draw_aapoint_stage(struct draw_context * draw,nir_alu_type bool_type)695 draw_aapoint_stage(struct draw_context *draw, nir_alu_type bool_type)
696 {
697    struct aapoint_stage *aapoint = CALLOC_STRUCT(aapoint_stage);
698    if (!aapoint)
699       goto fail;
700 
701    aapoint->stage.draw = draw;
702    aapoint->stage.name = "aapoint";
703    aapoint->stage.next = NULL;
704    aapoint->stage.point = aapoint_first_point;
705    aapoint->stage.line = draw_pipe_passthrough_line;
706    aapoint->stage.tri = draw_pipe_passthrough_tri;
707    aapoint->stage.flush = aapoint_flush;
708    aapoint->stage.reset_stipple_counter = aapoint_reset_stipple_counter;
709    aapoint->stage.destroy = aapoint_destroy;
710    aapoint->bool_type = bool_type;
711 
712    if (!draw_alloc_temp_verts(&aapoint->stage, 4))
713       goto fail;
714 
715    return aapoint;
716 
717  fail:
718    if (aapoint)
719       aapoint->stage.destroy(&aapoint->stage);
720 
721    return NULL;
722 
723 }
724 
725 
726 static struct aapoint_stage *
aapoint_stage_from_pipe(struct pipe_context * pipe)727 aapoint_stage_from_pipe(struct pipe_context *pipe)
728 {
729    struct draw_context *draw = (struct draw_context *) pipe->draw;
730    return aapoint_stage(draw->pipeline.aapoint);
731 }
732 
733 
734 /**
735  * This function overrides the driver's create_fs_state() function and
736  * will typically be called by the gallium frontend.
737  */
738 static void *
aapoint_create_fs_state(struct pipe_context * pipe,const struct pipe_shader_state * fs)739 aapoint_create_fs_state(struct pipe_context *pipe,
740                         const struct pipe_shader_state *fs)
741 {
742    struct aapoint_stage *aapoint = aapoint_stage_from_pipe(pipe);
743    struct aapoint_fragment_shader *aafs = CALLOC_STRUCT(aapoint_fragment_shader);
744    if (!aafs)
745       return NULL;
746 
747    aafs->state.type = fs->type;
748    if (fs->type == PIPE_SHADER_IR_TGSI)
749       aafs->state.tokens = tgsi_dup_tokens(fs->tokens);
750    else
751       aafs->state.ir.nir = nir_shader_clone(NULL, fs->ir.nir);
752    /* pass-through */
753    aafs->driver_fs = aapoint->driver_create_fs_state(pipe, fs);
754 
755    return aafs;
756 }
757 
758 
759 static void
aapoint_bind_fs_state(struct pipe_context * pipe,void * fs)760 aapoint_bind_fs_state(struct pipe_context *pipe, void *fs)
761 {
762    struct aapoint_stage *aapoint = aapoint_stage_from_pipe(pipe);
763    struct aapoint_fragment_shader *aafs = (struct aapoint_fragment_shader *) fs;
764    /* save current */
765    aapoint->fs = aafs;
766    /* pass-through */
767    aapoint->driver_bind_fs_state(pipe,
768                                  (aafs ? aafs->driver_fs : NULL));
769 }
770 
771 
772 static void
aapoint_delete_fs_state(struct pipe_context * pipe,void * fs)773 aapoint_delete_fs_state(struct pipe_context *pipe, void *fs)
774 {
775    struct aapoint_stage *aapoint = aapoint_stage_from_pipe(pipe);
776    struct aapoint_fragment_shader *aafs = (struct aapoint_fragment_shader *) fs;
777 
778    /* pass-through */
779    aapoint->driver_delete_fs_state(pipe, aafs->driver_fs);
780 
781    if (aafs->aapoint_fs)
782       aapoint->driver_delete_fs_state(pipe, aafs->aapoint_fs);
783 
784    if (aafs->state.type == PIPE_SHADER_IR_TGSI)
785       FREE((void*)aafs->state.tokens);
786    else
787       ralloc_free(aafs->state.ir.nir);
788 
789    FREE(aafs);
790 }
791 
792 
793 /**
794  * Called by drivers that want to install this AA point prim stage
795  * into the draw module's pipeline.  This will not be used if the
796  * hardware has native support for AA points.
797  */
798 bool
draw_install_aapoint_stage(struct draw_context * draw,struct pipe_context * pipe,nir_alu_type bool_type)799 draw_install_aapoint_stage(struct draw_context *draw,
800                            struct pipe_context *pipe,
801                            nir_alu_type bool_type)
802 {
803    struct aapoint_stage *aapoint;
804 
805    pipe->draw = (void *) draw;
806 
807    /*
808     * Create / install AA point drawing / prim stage
809     */
810    aapoint = draw_aapoint_stage(draw, bool_type);
811    if (!aapoint)
812       return false;
813 
814    /* save original driver functions */
815    aapoint->driver_create_fs_state = pipe->create_fs_state;
816    aapoint->driver_bind_fs_state = pipe->bind_fs_state;
817    aapoint->driver_delete_fs_state = pipe->delete_fs_state;
818 
819    /* override the driver's functions */
820    pipe->create_fs_state = aapoint_create_fs_state;
821    pipe->bind_fs_state = aapoint_bind_fs_state;
822    pipe->delete_fs_state = aapoint_delete_fs_state;
823 
824    draw->pipeline.aapoint = &aapoint->stage;
825 
826    return true;
827 }
828