1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <[email protected]>
30 */
31
32 #include "elk_clip.h"
33
34
get_tmp(struct elk_clip_compile * c)35 struct elk_reg get_tmp( struct elk_clip_compile *c )
36 {
37 struct elk_reg tmp = elk_vec4_grf(c->last_tmp, 0);
38
39 if (++c->last_tmp > c->prog_data.total_grf)
40 c->prog_data.total_grf = c->last_tmp;
41
42 return tmp;
43 }
44
release_tmp(struct elk_clip_compile * c,struct elk_reg tmp)45 static void release_tmp( struct elk_clip_compile *c, struct elk_reg tmp )
46 {
47 if (tmp.nr == c->last_tmp-1)
48 c->last_tmp--;
49 }
50
51
make_plane_ud(GLuint x,GLuint y,GLuint z,GLuint w)52 static struct elk_reg make_plane_ud(GLuint x, GLuint y, GLuint z, GLuint w)
53 {
54 return elk_imm_ud((w<<24) | (z<<16) | (y<<8) | x);
55 }
56
57
elk_clip_init_planes(struct elk_clip_compile * c)58 void elk_clip_init_planes( struct elk_clip_compile *c )
59 {
60 struct elk_codegen *p = &c->func;
61
62 if (!c->key.nr_userclip) {
63 elk_MOV(p, get_element_ud(c->reg.fixed_planes, 0), make_plane_ud( 0, 0, 0xff, 1));
64 elk_MOV(p, get_element_ud(c->reg.fixed_planes, 1), make_plane_ud( 0, 0, 1, 1));
65 elk_MOV(p, get_element_ud(c->reg.fixed_planes, 2), make_plane_ud( 0, 0xff, 0, 1));
66 elk_MOV(p, get_element_ud(c->reg.fixed_planes, 3), make_plane_ud( 0, 1, 0, 1));
67 elk_MOV(p, get_element_ud(c->reg.fixed_planes, 4), make_plane_ud(0xff, 0, 0, 1));
68 elk_MOV(p, get_element_ud(c->reg.fixed_planes, 5), make_plane_ud( 1, 0, 0, 1));
69 }
70 }
71
72
73
74 #define W 3
75
76 /* Project 'pos' to screen space (or back again), overwrite with results:
77 */
elk_clip_project_position(struct elk_clip_compile * c,struct elk_reg pos)78 void elk_clip_project_position(struct elk_clip_compile *c, struct elk_reg pos )
79 {
80 struct elk_codegen *p = &c->func;
81
82 /* calc rhw
83 */
84 elk_math_invert(p, get_element(pos, W), get_element(pos, W));
85
86 /* value.xyz *= value.rhw
87 */
88 elk_set_default_access_mode(p, ELK_ALIGN_16);
89 elk_MUL(p, elk_writemask(pos, WRITEMASK_XYZ), pos,
90 elk_swizzle(pos, ELK_SWIZZLE_WWWW));
91 elk_set_default_access_mode(p, ELK_ALIGN_1);
92 }
93
94
elk_clip_project_vertex(struct elk_clip_compile * c,struct elk_indirect vert_addr)95 static void elk_clip_project_vertex( struct elk_clip_compile *c,
96 struct elk_indirect vert_addr )
97 {
98 struct elk_codegen *p = &c->func;
99 struct elk_reg tmp = get_tmp(c);
100 GLuint hpos_offset = elk_varying_to_offset(&c->vue_map, VARYING_SLOT_POS);
101 GLuint ndc_offset = elk_varying_to_offset(&c->vue_map,
102 ELK_VARYING_SLOT_NDC);
103
104 /* Fixup position. Extract from the original vertex and re-project
105 * to screen space:
106 */
107 elk_MOV(p, tmp, deref_4f(vert_addr, hpos_offset));
108 elk_clip_project_position(c, tmp);
109 elk_MOV(p, deref_4f(vert_addr, ndc_offset), tmp);
110
111 release_tmp(c, tmp);
112 }
113
114
115
116
117 /* Interpolate between two vertices and put the result into a0.0.
118 * Increment a0.0 accordingly.
119 *
120 * Beware that dest_ptr can be equal to v0_ptr!
121 */
elk_clip_interp_vertex(struct elk_clip_compile * c,struct elk_indirect dest_ptr,struct elk_indirect v0_ptr,struct elk_indirect v1_ptr,struct elk_reg t0,bool force_edgeflag)122 void elk_clip_interp_vertex( struct elk_clip_compile *c,
123 struct elk_indirect dest_ptr,
124 struct elk_indirect v0_ptr, /* from */
125 struct elk_indirect v1_ptr, /* to */
126 struct elk_reg t0,
127 bool force_edgeflag)
128 {
129 struct elk_codegen *p = &c->func;
130 struct elk_reg t_nopersp, v0_ndc_copy;
131 GLuint slot;
132
133 /* Just copy the vertex header:
134 */
135 /*
136 * After CLIP stage, only first 256 bits of the VUE are read
137 * back on Ironlake, so needn't change it
138 */
139 elk_copy_indirect_to_indirect(p, dest_ptr, v0_ptr, 1);
140
141
142 /* First handle the 3D and NDC interpolation, in case we
143 * need noperspective interpolation. Doing it early has no
144 * performance impact in any case.
145 */
146
147 /* Take a copy of the v0 NDC coordinates, in case dest == v0. */
148 if (c->key.contains_noperspective_varying) {
149 GLuint offset = elk_varying_to_offset(&c->vue_map,
150 ELK_VARYING_SLOT_NDC);
151 v0_ndc_copy = get_tmp(c);
152 elk_MOV(p, v0_ndc_copy, deref_4f(v0_ptr, offset));
153 }
154
155 /* Compute the new 3D position
156 *
157 * dest_hpos = v0_hpos * (1 - t0) + v1_hpos * t0
158 */
159 {
160 GLuint delta = elk_varying_to_offset(&c->vue_map, VARYING_SLOT_POS);
161 struct elk_reg tmp = get_tmp(c);
162 elk_MUL(p, vec4(elk_null_reg()), deref_4f(v1_ptr, delta), t0);
163 elk_MAC(p, tmp, negate(deref_4f(v0_ptr, delta)), t0);
164 elk_ADD(p, deref_4f(dest_ptr, delta), deref_4f(v0_ptr, delta), tmp);
165 release_tmp(c, tmp);
166 }
167
168 /* Recreate the projected (NDC) coordinate in the new vertex header */
169 elk_clip_project_vertex(c, dest_ptr);
170
171 /* If we have noperspective attributes,
172 * we need to compute the screen-space t
173 */
174 if (c->key.contains_noperspective_varying) {
175 GLuint delta = elk_varying_to_offset(&c->vue_map,
176 ELK_VARYING_SLOT_NDC);
177 struct elk_reg tmp = get_tmp(c);
178 t_nopersp = get_tmp(c);
179
180 /* t_nopersp = vec4(v1.xy, dest.xy) */
181 elk_MOV(p, t_nopersp, deref_4f(v1_ptr, delta));
182 elk_MOV(p, tmp, deref_4f(dest_ptr, delta));
183 elk_set_default_access_mode(p, ELK_ALIGN_16);
184 elk_MOV(p,
185 elk_writemask(t_nopersp, WRITEMASK_ZW),
186 elk_swizzle(tmp, ELK_SWIZZLE_XYXY));
187
188 /* t_nopersp = vec4(v1.xy, dest.xy) - v0.xyxy */
189 elk_ADD(p, t_nopersp, t_nopersp,
190 negate(elk_swizzle(v0_ndc_copy, ELK_SWIZZLE_XYXY)));
191
192 /* Add the absolute values of the X and Y deltas so that if
193 * the points aren't in the same place on the screen we get
194 * nonzero values to divide.
195 *
196 * After that, we have vert1 - vert0 in t_nopersp.x and
197 * vertnew - vert0 in t_nopersp.y
198 *
199 * t_nopersp = vec2(|v1.x -v0.x| + |v1.y -v0.y|,
200 * |dest.x-v0.x| + |dest.y-v0.y|)
201 */
202 elk_ADD(p,
203 elk_writemask(t_nopersp, WRITEMASK_XY),
204 elk_abs(elk_swizzle(t_nopersp, ELK_SWIZZLE_XZXZ)),
205 elk_abs(elk_swizzle(t_nopersp, ELK_SWIZZLE_YWYW)));
206 elk_set_default_access_mode(p, ELK_ALIGN_1);
207
208 /* If the points are in the same place, just substitute a
209 * value to avoid divide-by-zero
210 */
211 elk_CMP(p, vec1(elk_null_reg()), ELK_CONDITIONAL_EQ,
212 vec1(t_nopersp),
213 elk_imm_f(0));
214 elk_IF(p, ELK_EXECUTE_1);
215 elk_MOV(p, t_nopersp, elk_imm_vf4(elk_float_to_vf(1.0),
216 elk_float_to_vf(0.0),
217 elk_float_to_vf(0.0),
218 elk_float_to_vf(0.0)));
219 elk_ENDIF(p);
220
221 /* Now compute t_nopersp = t_nopersp.y/t_nopersp.x and broadcast it. */
222 elk_math_invert(p, get_element(t_nopersp, 0), get_element(t_nopersp, 0));
223 elk_MUL(p, vec1(t_nopersp), vec1(t_nopersp),
224 vec1(suboffset(t_nopersp, 1)));
225 elk_set_default_access_mode(p, ELK_ALIGN_16);
226 elk_MOV(p, t_nopersp, elk_swizzle(t_nopersp, ELK_SWIZZLE_XXXX));
227 elk_set_default_access_mode(p, ELK_ALIGN_1);
228
229 release_tmp(c, tmp);
230 release_tmp(c, v0_ndc_copy);
231 }
232
233 /* Now we can iterate over each attribute
234 * (could be done in pairs?)
235 */
236 for (slot = 0; slot < c->vue_map.num_slots; slot++) {
237 int varying = c->vue_map.slot_to_varying[slot];
238 GLuint delta = elk_vue_slot_to_offset(slot);
239
240 /* HPOS, NDC already handled above */
241 if (varying == VARYING_SLOT_POS || varying == ELK_VARYING_SLOT_NDC)
242 continue;
243
244
245 if (varying == VARYING_SLOT_EDGE) {
246 if (force_edgeflag)
247 elk_MOV(p, deref_4f(dest_ptr, delta), elk_imm_f(1));
248 else
249 elk_MOV(p, deref_4f(dest_ptr, delta), deref_4f(v0_ptr, delta));
250 } else if (varying == VARYING_SLOT_PSIZ) {
251 /* PSIZ doesn't need interpolation because it isn't used by the
252 * fragment shader.
253 */
254 } else if (varying < VARYING_SLOT_MAX) {
255 /* This is a true vertex result (and not a special value for the VUE
256 * header), so interpolate:
257 *
258 * New = attr0 + t*attr1 - t*attr0
259 *
260 * Unless the attribute is flat shaded -- in which case just copy
261 * from one of the sources (doesn't matter which; already copied from pv)
262 */
263 GLuint interp = c->key.interp_mode[slot];
264
265 if (interp != INTERP_MODE_FLAT) {
266 struct elk_reg tmp = get_tmp(c);
267 struct elk_reg t =
268 interp == INTERP_MODE_NOPERSPECTIVE ? t_nopersp : t0;
269
270 elk_MUL(p,
271 vec4(elk_null_reg()),
272 deref_4f(v1_ptr, delta),
273 t);
274
275 elk_MAC(p,
276 tmp,
277 negate(deref_4f(v0_ptr, delta)),
278 t);
279
280 elk_ADD(p,
281 deref_4f(dest_ptr, delta),
282 deref_4f(v0_ptr, delta),
283 tmp);
284
285 release_tmp(c, tmp);
286 }
287 else {
288 elk_MOV(p,
289 deref_4f(dest_ptr, delta),
290 deref_4f(v0_ptr, delta));
291 }
292 }
293 }
294
295 if (c->vue_map.num_slots % 2) {
296 GLuint delta = elk_vue_slot_to_offset(c->vue_map.num_slots);
297
298 elk_MOV(p, deref_4f(dest_ptr, delta), elk_imm_f(0));
299 }
300
301 if (c->key.contains_noperspective_varying)
302 release_tmp(c, t_nopersp);
303 }
304
elk_clip_emit_vue(struct elk_clip_compile * c,struct elk_indirect vert,enum elk_urb_write_flags flags,GLuint header)305 void elk_clip_emit_vue(struct elk_clip_compile *c,
306 struct elk_indirect vert,
307 enum elk_urb_write_flags flags,
308 GLuint header)
309 {
310 struct elk_codegen *p = &c->func;
311 bool allocate = flags & ELK_URB_WRITE_ALLOCATE;
312
313 elk_clip_ff_sync(c);
314
315 /* Any URB entry that is allocated must subsequently be used or discarded,
316 * so it doesn't make sense to mark EOT and ALLOCATE at the same time.
317 */
318 assert(!(allocate && (flags & ELK_URB_WRITE_EOT)));
319
320 /* Copy the vertex from vertn into m1..mN+1:
321 */
322 elk_copy_from_indirect(p, elk_message_reg(1), vert, c->nr_regs);
323
324 /* Overwrite PrimType and PrimStart in the message header, for
325 * each vertex in turn:
326 */
327 elk_MOV(p, get_element_ud(c->reg.R0, 2), elk_imm_ud(header));
328
329
330 /* Send each vertex as a separate write to the urb. This
331 * is different to the concept in elk_sf_emit.c, where
332 * subsequent writes are used to build up a single urb
333 * entry. Each of these writes instantiates a separate
334 * urb entry - (I think... what about 'allocate'?)
335 */
336 elk_urb_WRITE(p,
337 allocate ? c->reg.R0 : retype(elk_null_reg(), ELK_REGISTER_TYPE_UD),
338 0,
339 c->reg.R0,
340 flags,
341 c->nr_regs + 1, /* msg length */
342 allocate ? 1 : 0, /* response_length */
343 0, /* urb offset */
344 ELK_URB_SWIZZLE_NONE);
345 }
346
347
348
elk_clip_kill_thread(struct elk_clip_compile * c)349 void elk_clip_kill_thread(struct elk_clip_compile *c)
350 {
351 struct elk_codegen *p = &c->func;
352
353 elk_clip_ff_sync(c);
354 /* Send an empty message to kill the thread and release any
355 * allocated urb entry:
356 */
357 elk_urb_WRITE(p,
358 retype(elk_null_reg(), ELK_REGISTER_TYPE_UD),
359 0,
360 c->reg.R0,
361 ELK_URB_WRITE_UNUSED | ELK_URB_WRITE_EOT_COMPLETE,
362 1, /* msg len */
363 0, /* response len */
364 0,
365 ELK_URB_SWIZZLE_NONE);
366 }
367
368
369
370
elk_clip_plane0_address(struct elk_clip_compile * c)371 struct elk_reg elk_clip_plane0_address( struct elk_clip_compile *c )
372 {
373 return elk_address(c->reg.fixed_planes);
374 }
375
376
elk_clip_plane_stride(struct elk_clip_compile * c)377 struct elk_reg elk_clip_plane_stride( struct elk_clip_compile *c )
378 {
379 if (c->key.nr_userclip) {
380 return elk_imm_uw(16);
381 }
382 else {
383 return elk_imm_uw(4);
384 }
385 }
386
387
388 /* Distribute flatshaded attributes from provoking vertex prior to
389 * clipping.
390 */
elk_clip_copy_flatshaded_attributes(struct elk_clip_compile * c,GLuint to,GLuint from)391 void elk_clip_copy_flatshaded_attributes( struct elk_clip_compile *c,
392 GLuint to, GLuint from )
393 {
394 struct elk_codegen *p = &c->func;
395
396 for (int i = 0; i < c->vue_map.num_slots; i++) {
397 if (c->key.interp_mode[i] == INTERP_MODE_FLAT) {
398 elk_MOV(p,
399 byte_offset(c->reg.vertex[to], elk_vue_slot_to_offset(i)),
400 byte_offset(c->reg.vertex[from], elk_vue_slot_to_offset(i)));
401 }
402 }
403 }
404
405
406
elk_clip_init_clipmask(struct elk_clip_compile * c)407 void elk_clip_init_clipmask( struct elk_clip_compile *c )
408 {
409 struct elk_codegen *p = &c->func;
410 struct elk_reg incoming = get_element_ud(c->reg.R0, 2);
411
412 /* Shift so that lowest outcode bit is rightmost:
413 */
414 elk_SHR(p, c->reg.planemask, incoming, elk_imm_ud(26));
415
416 if (c->key.nr_userclip) {
417 struct elk_reg tmp = retype(vec1(get_tmp(c)), ELK_REGISTER_TYPE_UD);
418
419 /* Rearrange userclip outcodes so that they come directly after
420 * the fixed plane bits.
421 */
422 if (p->devinfo->ver == 5 || p->devinfo->verx10 == 45)
423 elk_AND(p, tmp, incoming, elk_imm_ud(0xff<<14));
424 else
425 elk_AND(p, tmp, incoming, elk_imm_ud(0x3f<<14));
426
427 elk_SHR(p, tmp, tmp, elk_imm_ud(8));
428 elk_OR(p, c->reg.planemask, c->reg.planemask, tmp);
429
430 release_tmp(c, tmp);
431 }
432 }
433
elk_clip_ff_sync(struct elk_clip_compile * c)434 void elk_clip_ff_sync(struct elk_clip_compile *c)
435 {
436 struct elk_codegen *p = &c->func;
437
438 if (p->devinfo->ver == 5) {
439 elk_AND(p, elk_null_reg(), c->reg.ff_sync, elk_imm_ud(0x1));
440 elk_inst_set_cond_modifier(p->devinfo, elk_last_inst, ELK_CONDITIONAL_Z);
441 elk_IF(p, ELK_EXECUTE_1);
442 {
443 elk_OR(p, c->reg.ff_sync, c->reg.ff_sync, elk_imm_ud(0x1));
444 elk_ff_sync(p,
445 c->reg.R0,
446 0,
447 c->reg.R0,
448 1, /* allocate */
449 1, /* response length */
450 0 /* eot */);
451 }
452 elk_ENDIF(p);
453 elk_set_default_predicate_control(p, ELK_PREDICATE_NONE);
454 }
455 }
456
elk_clip_init_ff_sync(struct elk_clip_compile * c)457 void elk_clip_init_ff_sync(struct elk_clip_compile *c)
458 {
459 struct elk_codegen *p = &c->func;
460
461 if (p->devinfo->ver == 5) {
462 elk_MOV(p, c->reg.ff_sync, elk_imm_ud(0));
463 }
464 }
465