xref: /aosp_15_r20/external/mesa3d/src/intel/compiler/elk/elk_clip_util.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
3  Intel funded Tungsten Graphics to
4  develop this 3D driver.
5 
6  Permission is hereby granted, free of charge, to any person obtaining
7  a copy of this software and associated documentation files (the
8  "Software"), to deal in the Software without restriction, including
9  without limitation the rights to use, copy, modify, merge, publish,
10  distribute, sublicense, and/or sell copies of the Software, and to
11  permit persons to whom the Software is furnished to do so, subject to
12  the following conditions:
13 
14  The above copyright notice and this permission notice (including the
15  next paragraph) shall be included in all copies or substantial
16  portions of the Software.
17 
18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 
26  **********************************************************************/
27  /*
28   * Authors:
29   *   Keith Whitwell <[email protected]>
30   */
31 
32 #include "elk_clip.h"
33 
34 
get_tmp(struct elk_clip_compile * c)35 struct elk_reg get_tmp( struct elk_clip_compile *c )
36 {
37    struct elk_reg tmp = elk_vec4_grf(c->last_tmp, 0);
38 
39    if (++c->last_tmp > c->prog_data.total_grf)
40       c->prog_data.total_grf = c->last_tmp;
41 
42    return tmp;
43 }
44 
release_tmp(struct elk_clip_compile * c,struct elk_reg tmp)45 static void release_tmp( struct elk_clip_compile *c, struct elk_reg tmp )
46 {
47    if (tmp.nr == c->last_tmp-1)
48       c->last_tmp--;
49 }
50 
51 
make_plane_ud(GLuint x,GLuint y,GLuint z,GLuint w)52 static struct elk_reg make_plane_ud(GLuint x, GLuint y, GLuint z, GLuint w)
53 {
54    return elk_imm_ud((w<<24) | (z<<16) | (y<<8) | x);
55 }
56 
57 
elk_clip_init_planes(struct elk_clip_compile * c)58 void elk_clip_init_planes( struct elk_clip_compile *c )
59 {
60    struct elk_codegen *p = &c->func;
61 
62    if (!c->key.nr_userclip) {
63       elk_MOV(p, get_element_ud(c->reg.fixed_planes, 0), make_plane_ud( 0,    0, 0xff, 1));
64       elk_MOV(p, get_element_ud(c->reg.fixed_planes, 1), make_plane_ud( 0,    0,    1, 1));
65       elk_MOV(p, get_element_ud(c->reg.fixed_planes, 2), make_plane_ud( 0, 0xff,    0, 1));
66       elk_MOV(p, get_element_ud(c->reg.fixed_planes, 3), make_plane_ud( 0,    1,    0, 1));
67       elk_MOV(p, get_element_ud(c->reg.fixed_planes, 4), make_plane_ud(0xff,  0,    0, 1));
68       elk_MOV(p, get_element_ud(c->reg.fixed_planes, 5), make_plane_ud( 1,    0,    0, 1));
69    }
70 }
71 
72 
73 
74 #define W 3
75 
76 /* Project 'pos' to screen space (or back again), overwrite with results:
77  */
elk_clip_project_position(struct elk_clip_compile * c,struct elk_reg pos)78 void elk_clip_project_position(struct elk_clip_compile *c, struct elk_reg pos )
79 {
80    struct elk_codegen *p = &c->func;
81 
82    /* calc rhw
83     */
84    elk_math_invert(p, get_element(pos, W), get_element(pos, W));
85 
86    /* value.xyz *= value.rhw
87     */
88    elk_set_default_access_mode(p, ELK_ALIGN_16);
89    elk_MUL(p, elk_writemask(pos, WRITEMASK_XYZ), pos,
90            elk_swizzle(pos, ELK_SWIZZLE_WWWW));
91    elk_set_default_access_mode(p, ELK_ALIGN_1);
92 }
93 
94 
elk_clip_project_vertex(struct elk_clip_compile * c,struct elk_indirect vert_addr)95 static void elk_clip_project_vertex( struct elk_clip_compile *c,
96 				     struct elk_indirect vert_addr )
97 {
98    struct elk_codegen *p = &c->func;
99    struct elk_reg tmp = get_tmp(c);
100    GLuint hpos_offset = elk_varying_to_offset(&c->vue_map, VARYING_SLOT_POS);
101    GLuint ndc_offset = elk_varying_to_offset(&c->vue_map,
102                                              ELK_VARYING_SLOT_NDC);
103 
104    /* Fixup position.  Extract from the original vertex and re-project
105     * to screen space:
106     */
107    elk_MOV(p, tmp, deref_4f(vert_addr, hpos_offset));
108    elk_clip_project_position(c, tmp);
109    elk_MOV(p, deref_4f(vert_addr, ndc_offset), tmp);
110 
111    release_tmp(c, tmp);
112 }
113 
114 
115 
116 
117 /* Interpolate between two vertices and put the result into a0.0.
118  * Increment a0.0 accordingly.
119  *
120  * Beware that dest_ptr can be equal to v0_ptr!
121  */
elk_clip_interp_vertex(struct elk_clip_compile * c,struct elk_indirect dest_ptr,struct elk_indirect v0_ptr,struct elk_indirect v1_ptr,struct elk_reg t0,bool force_edgeflag)122 void elk_clip_interp_vertex( struct elk_clip_compile *c,
123 			     struct elk_indirect dest_ptr,
124 			     struct elk_indirect v0_ptr, /* from */
125 			     struct elk_indirect v1_ptr, /* to */
126 			     struct elk_reg t0,
127 			     bool force_edgeflag)
128 {
129    struct elk_codegen *p = &c->func;
130    struct elk_reg t_nopersp, v0_ndc_copy;
131    GLuint slot;
132 
133    /* Just copy the vertex header:
134     */
135    /*
136     * After CLIP stage, only first 256 bits of the VUE are read
137     * back on Ironlake, so needn't change it
138     */
139    elk_copy_indirect_to_indirect(p, dest_ptr, v0_ptr, 1);
140 
141 
142    /* First handle the 3D and NDC interpolation, in case we
143     * need noperspective interpolation. Doing it early has no
144     * performance impact in any case.
145     */
146 
147    /* Take a copy of the v0 NDC coordinates, in case dest == v0. */
148    if (c->key.contains_noperspective_varying) {
149       GLuint offset = elk_varying_to_offset(&c->vue_map,
150                                                  ELK_VARYING_SLOT_NDC);
151       v0_ndc_copy = get_tmp(c);
152       elk_MOV(p, v0_ndc_copy, deref_4f(v0_ptr, offset));
153    }
154 
155    /* Compute the new 3D position
156     *
157     * dest_hpos = v0_hpos * (1 - t0) + v1_hpos * t0
158     */
159    {
160       GLuint delta = elk_varying_to_offset(&c->vue_map, VARYING_SLOT_POS);
161       struct elk_reg tmp = get_tmp(c);
162       elk_MUL(p, vec4(elk_null_reg()), deref_4f(v1_ptr, delta), t0);
163       elk_MAC(p, tmp, negate(deref_4f(v0_ptr, delta)), t0);
164       elk_ADD(p, deref_4f(dest_ptr, delta), deref_4f(v0_ptr, delta), tmp);
165       release_tmp(c, tmp);
166    }
167 
168    /* Recreate the projected (NDC) coordinate in the new vertex header */
169    elk_clip_project_vertex(c, dest_ptr);
170 
171    /* If we have noperspective attributes,
172     * we need to compute the screen-space t
173     */
174    if (c->key.contains_noperspective_varying) {
175       GLuint delta = elk_varying_to_offset(&c->vue_map,
176                                                 ELK_VARYING_SLOT_NDC);
177       struct elk_reg tmp = get_tmp(c);
178       t_nopersp = get_tmp(c);
179 
180       /* t_nopersp = vec4(v1.xy, dest.xy) */
181       elk_MOV(p, t_nopersp, deref_4f(v1_ptr, delta));
182       elk_MOV(p, tmp, deref_4f(dest_ptr, delta));
183       elk_set_default_access_mode(p, ELK_ALIGN_16);
184       elk_MOV(p,
185               elk_writemask(t_nopersp, WRITEMASK_ZW),
186               elk_swizzle(tmp, ELK_SWIZZLE_XYXY));
187 
188       /* t_nopersp = vec4(v1.xy, dest.xy) - v0.xyxy */
189       elk_ADD(p, t_nopersp, t_nopersp,
190               negate(elk_swizzle(v0_ndc_copy, ELK_SWIZZLE_XYXY)));
191 
192       /* Add the absolute values of the X and Y deltas so that if
193        * the points aren't in the same place on the screen we get
194        * nonzero values to divide.
195        *
196        * After that, we have vert1 - vert0 in t_nopersp.x and
197        * vertnew - vert0 in t_nopersp.y
198        *
199        * t_nopersp = vec2(|v1.x  -v0.x| + |v1.y  -v0.y|,
200        *                  |dest.x-v0.x| + |dest.y-v0.y|)
201        */
202       elk_ADD(p,
203               elk_writemask(t_nopersp, WRITEMASK_XY),
204               elk_abs(elk_swizzle(t_nopersp, ELK_SWIZZLE_XZXZ)),
205               elk_abs(elk_swizzle(t_nopersp, ELK_SWIZZLE_YWYW)));
206       elk_set_default_access_mode(p, ELK_ALIGN_1);
207 
208       /* If the points are in the same place, just substitute a
209        * value to avoid divide-by-zero
210        */
211       elk_CMP(p, vec1(elk_null_reg()), ELK_CONDITIONAL_EQ,
212               vec1(t_nopersp),
213               elk_imm_f(0));
214       elk_IF(p, ELK_EXECUTE_1);
215       elk_MOV(p, t_nopersp, elk_imm_vf4(elk_float_to_vf(1.0),
216                                         elk_float_to_vf(0.0),
217                                         elk_float_to_vf(0.0),
218                                         elk_float_to_vf(0.0)));
219       elk_ENDIF(p);
220 
221       /* Now compute t_nopersp = t_nopersp.y/t_nopersp.x and broadcast it. */
222       elk_math_invert(p, get_element(t_nopersp, 0), get_element(t_nopersp, 0));
223       elk_MUL(p, vec1(t_nopersp), vec1(t_nopersp),
224             vec1(suboffset(t_nopersp, 1)));
225       elk_set_default_access_mode(p, ELK_ALIGN_16);
226       elk_MOV(p, t_nopersp, elk_swizzle(t_nopersp, ELK_SWIZZLE_XXXX));
227       elk_set_default_access_mode(p, ELK_ALIGN_1);
228 
229       release_tmp(c, tmp);
230       release_tmp(c, v0_ndc_copy);
231    }
232 
233    /* Now we can iterate over each attribute
234     * (could be done in pairs?)
235     */
236    for (slot = 0; slot < c->vue_map.num_slots; slot++) {
237       int varying = c->vue_map.slot_to_varying[slot];
238       GLuint delta = elk_vue_slot_to_offset(slot);
239 
240       /* HPOS, NDC already handled above */
241       if (varying == VARYING_SLOT_POS || varying == ELK_VARYING_SLOT_NDC)
242          continue;
243 
244 
245       if (varying == VARYING_SLOT_EDGE) {
246 	 if (force_edgeflag)
247 	    elk_MOV(p, deref_4f(dest_ptr, delta), elk_imm_f(1));
248 	 else
249 	    elk_MOV(p, deref_4f(dest_ptr, delta), deref_4f(v0_ptr, delta));
250       } else if (varying == VARYING_SLOT_PSIZ) {
251          /* PSIZ doesn't need interpolation because it isn't used by the
252           * fragment shader.
253           */
254       } else if (varying < VARYING_SLOT_MAX) {
255 	 /* This is a true vertex result (and not a special value for the VUE
256 	  * header), so interpolate:
257 	  *
258 	  *        New = attr0 + t*attr1 - t*attr0
259           *
260           * Unless the attribute is flat shaded -- in which case just copy
261           * from one of the sources (doesn't matter which; already copied from pv)
262 	  */
263          GLuint interp = c->key.interp_mode[slot];
264 
265          if (interp != INTERP_MODE_FLAT) {
266             struct elk_reg tmp = get_tmp(c);
267             struct elk_reg t =
268                interp == INTERP_MODE_NOPERSPECTIVE ? t_nopersp : t0;
269 
270             elk_MUL(p,
271                   vec4(elk_null_reg()),
272                   deref_4f(v1_ptr, delta),
273                   t);
274 
275             elk_MAC(p,
276                   tmp,
277                   negate(deref_4f(v0_ptr, delta)),
278                   t);
279 
280             elk_ADD(p,
281                   deref_4f(dest_ptr, delta),
282                   deref_4f(v0_ptr, delta),
283                   tmp);
284 
285             release_tmp(c, tmp);
286          }
287          else {
288             elk_MOV(p,
289                   deref_4f(dest_ptr, delta),
290                   deref_4f(v0_ptr, delta));
291          }
292       }
293    }
294 
295    if (c->vue_map.num_slots % 2) {
296       GLuint delta = elk_vue_slot_to_offset(c->vue_map.num_slots);
297 
298       elk_MOV(p, deref_4f(dest_ptr, delta), elk_imm_f(0));
299    }
300 
301    if (c->key.contains_noperspective_varying)
302       release_tmp(c, t_nopersp);
303 }
304 
elk_clip_emit_vue(struct elk_clip_compile * c,struct elk_indirect vert,enum elk_urb_write_flags flags,GLuint header)305 void elk_clip_emit_vue(struct elk_clip_compile *c,
306 		       struct elk_indirect vert,
307                        enum elk_urb_write_flags flags,
308 		       GLuint header)
309 {
310    struct elk_codegen *p = &c->func;
311    bool allocate = flags & ELK_URB_WRITE_ALLOCATE;
312 
313    elk_clip_ff_sync(c);
314 
315    /* Any URB entry that is allocated must subsequently be used or discarded,
316     * so it doesn't make sense to mark EOT and ALLOCATE at the same time.
317     */
318    assert(!(allocate && (flags & ELK_URB_WRITE_EOT)));
319 
320    /* Copy the vertex from vertn into m1..mN+1:
321     */
322    elk_copy_from_indirect(p, elk_message_reg(1), vert, c->nr_regs);
323 
324    /* Overwrite PrimType and PrimStart in the message header, for
325     * each vertex in turn:
326     */
327    elk_MOV(p, get_element_ud(c->reg.R0, 2), elk_imm_ud(header));
328 
329 
330    /* Send each vertex as a separate write to the urb.  This
331     * is different to the concept in elk_sf_emit.c, where
332     * subsequent writes are used to build up a single urb
333     * entry.  Each of these writes instantiates a separate
334     * urb entry - (I think... what about 'allocate'?)
335     */
336    elk_urb_WRITE(p,
337 		 allocate ? c->reg.R0 : retype(elk_null_reg(), ELK_REGISTER_TYPE_UD),
338 		 0,
339 		 c->reg.R0,
340                  flags,
341 		 c->nr_regs + 1, /* msg length */
342 		 allocate ? 1 : 0, /* response_length */
343 		 0,		/* urb offset */
344 		 ELK_URB_SWIZZLE_NONE);
345 }
346 
347 
348 
elk_clip_kill_thread(struct elk_clip_compile * c)349 void elk_clip_kill_thread(struct elk_clip_compile *c)
350 {
351    struct elk_codegen *p = &c->func;
352 
353    elk_clip_ff_sync(c);
354    /* Send an empty message to kill the thread and release any
355     * allocated urb entry:
356     */
357    elk_urb_WRITE(p,
358 		 retype(elk_null_reg(), ELK_REGISTER_TYPE_UD),
359 		 0,
360 		 c->reg.R0,
361                  ELK_URB_WRITE_UNUSED | ELK_URB_WRITE_EOT_COMPLETE,
362 		 1, 		/* msg len */
363 		 0, 		/* response len */
364 		 0,
365 		 ELK_URB_SWIZZLE_NONE);
366 }
367 
368 
369 
370 
elk_clip_plane0_address(struct elk_clip_compile * c)371 struct elk_reg elk_clip_plane0_address( struct elk_clip_compile *c )
372 {
373    return elk_address(c->reg.fixed_planes);
374 }
375 
376 
elk_clip_plane_stride(struct elk_clip_compile * c)377 struct elk_reg elk_clip_plane_stride( struct elk_clip_compile *c )
378 {
379    if (c->key.nr_userclip) {
380       return elk_imm_uw(16);
381    }
382    else {
383       return elk_imm_uw(4);
384    }
385 }
386 
387 
388 /* Distribute flatshaded attributes from provoking vertex prior to
389  * clipping.
390  */
elk_clip_copy_flatshaded_attributes(struct elk_clip_compile * c,GLuint to,GLuint from)391 void elk_clip_copy_flatshaded_attributes( struct elk_clip_compile *c,
392 			   GLuint to, GLuint from )
393 {
394    struct elk_codegen *p = &c->func;
395 
396    for (int i = 0; i < c->vue_map.num_slots; i++) {
397       if (c->key.interp_mode[i] == INTERP_MODE_FLAT) {
398          elk_MOV(p,
399                  byte_offset(c->reg.vertex[to], elk_vue_slot_to_offset(i)),
400                  byte_offset(c->reg.vertex[from], elk_vue_slot_to_offset(i)));
401       }
402    }
403 }
404 
405 
406 
elk_clip_init_clipmask(struct elk_clip_compile * c)407 void elk_clip_init_clipmask( struct elk_clip_compile *c )
408 {
409    struct elk_codegen *p = &c->func;
410    struct elk_reg incoming = get_element_ud(c->reg.R0, 2);
411 
412    /* Shift so that lowest outcode bit is rightmost:
413     */
414    elk_SHR(p, c->reg.planemask, incoming, elk_imm_ud(26));
415 
416    if (c->key.nr_userclip) {
417       struct elk_reg tmp = retype(vec1(get_tmp(c)), ELK_REGISTER_TYPE_UD);
418 
419       /* Rearrange userclip outcodes so that they come directly after
420        * the fixed plane bits.
421        */
422       if (p->devinfo->ver == 5 || p->devinfo->verx10 == 45)
423          elk_AND(p, tmp, incoming, elk_imm_ud(0xff<<14));
424       else
425          elk_AND(p, tmp, incoming, elk_imm_ud(0x3f<<14));
426 
427       elk_SHR(p, tmp, tmp, elk_imm_ud(8));
428       elk_OR(p, c->reg.planemask, c->reg.planemask, tmp);
429 
430       release_tmp(c, tmp);
431    }
432 }
433 
elk_clip_ff_sync(struct elk_clip_compile * c)434 void elk_clip_ff_sync(struct elk_clip_compile *c)
435 {
436     struct elk_codegen *p = &c->func;
437 
438     if (p->devinfo->ver == 5) {
439         elk_AND(p, elk_null_reg(), c->reg.ff_sync, elk_imm_ud(0x1));
440         elk_inst_set_cond_modifier(p->devinfo, elk_last_inst, ELK_CONDITIONAL_Z);
441         elk_IF(p, ELK_EXECUTE_1);
442         {
443             elk_OR(p, c->reg.ff_sync, c->reg.ff_sync, elk_imm_ud(0x1));
444             elk_ff_sync(p,
445 			c->reg.R0,
446 			0,
447 			c->reg.R0,
448 			1, /* allocate */
449 			1, /* response length */
450 			0 /* eot */);
451         }
452         elk_ENDIF(p);
453         elk_set_default_predicate_control(p, ELK_PREDICATE_NONE);
454     }
455 }
456 
elk_clip_init_ff_sync(struct elk_clip_compile * c)457 void elk_clip_init_ff_sync(struct elk_clip_compile *c)
458 {
459     struct elk_codegen *p = &c->func;
460 
461     if (p->devinfo->ver == 5) {
462         elk_MOV(p, c->reg.ff_sync, elk_imm_ud(0));
463     }
464 }
465