xref: /aosp_15_r20/external/mesa3d/src/nouveau/nil/tic.rs (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 // Copyright © 2024 Collabora, Ltd.
2 // SPDX-License-Identifier: MIT
3 
4 #![allow(unused_macros)]
5 
6 extern crate bitview;
7 extern crate nvidia_headers;
8 
9 use bitview::*;
10 use nil_rs_bindings::*;
11 use nvidia_headers::classes::cl9097::tex as cl9097;
12 use nvidia_headers::classes::cl9097::FERMI_A;
13 use nvidia_headers::classes::clb097::tex as clb097;
14 use nvidia_headers::classes::clb097::MAXWELL_A;
15 use nvidia_headers::classes::clc097::tex as clc097;
16 use nvidia_headers::classes::clc097::PASCAL_A;
17 use nvidia_headers::classes::clc397::VOLTA_A;
18 use paste::paste;
19 use std::ops::Range;
20 
21 use crate::extent::{units, Extent4D};
22 use crate::format::Format;
23 use crate::image::Image;
24 use crate::image::ImageDim;
25 use crate::image::SampleLayout;
26 use crate::image::View;
27 use crate::image::ViewType;
28 
29 macro_rules! set_enum {
30     ($th:expr, $cls:ident, $field:ident, $enum:ident) => {
31         paste! {
32             $th.set_field($cls::$field, $cls::[<$field _ $enum>])
33         }
34     };
35 }
36 
37 trait SetUFixed {
set_ufixed(&mut self, range: Range<usize>, val: f32)38     fn set_ufixed(&mut self, range: Range<usize>, val: f32);
39 }
40 
41 const FIXED_FRAC_BITS: u32 = 8;
42 
43 impl<T: SetFieldU64> SetUFixed for T {
set_ufixed(&mut self, range: Range<usize>, val: f32)44     fn set_ufixed(&mut self, range: Range<usize>, val: f32) {
45         assert!(range.len() >= FIXED_FRAC_BITS as usize);
46         let scaled = val * ((1 << FIXED_FRAC_BITS) as f32);
47         let scaled_max = ((1 << range.len()) - 1) as f32;
48         let u_val = scaled.clamp(0.0, scaled_max).round() as u32;
49         self.set_field(range, u_val);
50     }
51 }
52 
nv9097_th_v2_source( fmt: &nil_tic_format, swizzle: pipe_swizzle, is_int: bool, ) -> u3253 fn nv9097_th_v2_source(
54     fmt: &nil_tic_format,
55     swizzle: pipe_swizzle,
56     is_int: bool,
57 ) -> u32 {
58     match swizzle {
59         PIPE_SWIZZLE_X => fmt.src_x(),
60         PIPE_SWIZZLE_Y => fmt.src_y(),
61         PIPE_SWIZZLE_Z => fmt.src_z(),
62         PIPE_SWIZZLE_W => fmt.src_w(),
63         PIPE_SWIZZLE_0 => cl9097::TEXHEADV2_X_SOURCE_IN_ZERO,
64         PIPE_SWIZZLE_1 => {
65             if is_int {
66                 cl9097::TEXHEADV2_X_SOURCE_IN_ONE_INT
67             } else {
68                 cl9097::TEXHEADV2_X_SOURCE_IN_ONE_FLOAT
69             }
70         }
71         other => panic!("Invalid component swizzle {}", other),
72     }
73 }
74 
nvb097_th_bl_source( fmt: &nil_tic_format, swizzle: pipe_swizzle, is_int: bool, ) -> u3275 fn nvb097_th_bl_source(
76     fmt: &nil_tic_format,
77     swizzle: pipe_swizzle,
78     is_int: bool,
79 ) -> u32 {
80     match swizzle {
81         PIPE_SWIZZLE_X => fmt.src_x(),
82         PIPE_SWIZZLE_Y => fmt.src_y(),
83         PIPE_SWIZZLE_Z => fmt.src_z(),
84         PIPE_SWIZZLE_W => fmt.src_w(),
85         PIPE_SWIZZLE_0 => clb097::TEXHEADV2_X_SOURCE_IN_ZERO,
86         PIPE_SWIZZLE_1 => {
87             if is_int {
88                 clb097::TEXHEADV2_X_SOURCE_IN_ONE_INT
89             } else {
90                 clb097::TEXHEADV2_X_SOURCE_IN_ONE_FLOAT
91             }
92         }
93         other => panic!("Invalid component swizzle {}", other),
94     }
95 }
96 
97 type THBitView<'a> = BitMutView<'a, [u32; 8]>;
98 
nv9097_set_th_v2_0<'a>( th: &mut THBitView<'a>, format: &Format, swizzle: [nil_rs_bindings::pipe_swizzle; 4], )99 fn nv9097_set_th_v2_0<'a>(
100     th: &mut THBitView<'a>,
101     format: &Format,
102     swizzle: [nil_rs_bindings::pipe_swizzle; 4],
103 ) {
104     let fmt = &format.info().tic;
105     let is_int = format.is_integer();
106     let source = [
107         nv9097_th_v2_source(fmt, swizzle[0], is_int),
108         nv9097_th_v2_source(fmt, swizzle[1], is_int),
109         nv9097_th_v2_source(fmt, swizzle[2], is_int),
110         nv9097_th_v2_source(fmt, swizzle[3], is_int),
111     ];
112 
113     th.set_field(cl9097::TEXHEADV2_COMPONENT_SIZES, fmt.comp_sizes());
114     th.set_field(cl9097::TEXHEADV2_R_DATA_TYPE, fmt.type_r());
115     th.set_field(cl9097::TEXHEADV2_G_DATA_TYPE, fmt.type_g());
116     th.set_field(cl9097::TEXHEADV2_B_DATA_TYPE, fmt.type_b());
117     th.set_field(cl9097::TEXHEADV2_A_DATA_TYPE, fmt.type_a());
118     th.set_field(cl9097::TEXHEADV2_X_SOURCE, source[0]);
119     th.set_field(cl9097::TEXHEADV2_Y_SOURCE, source[1]);
120     th.set_field(cl9097::TEXHEADV2_Z_SOURCE, source[2]);
121     th.set_field(cl9097::TEXHEADV2_W_SOURCE, source[3]);
122 }
123 
nvb097_set_th_bl_0<'a>( th: &mut THBitView<'a>, format: &Format, swizzle: [nil_rs_bindings::pipe_swizzle; 4], )124 fn nvb097_set_th_bl_0<'a>(
125     th: &mut THBitView<'a>,
126     format: &Format,
127     swizzle: [nil_rs_bindings::pipe_swizzle; 4],
128 ) {
129     let fmt = &format.info().tic;
130     let is_int = format.is_integer();
131     let source = [
132         nvb097_th_bl_source(fmt, swizzle[0], is_int),
133         nvb097_th_bl_source(fmt, swizzle[1], is_int),
134         nvb097_th_bl_source(fmt, swizzle[2], is_int),
135         nvb097_th_bl_source(fmt, swizzle[3], is_int),
136     ];
137 
138     th.set_field(clb097::TEXHEAD_BL_COMPONENTS, fmt.comp_sizes());
139     th.set_field(clb097::TEXHEAD_BL_R_DATA_TYPE, fmt.type_r());
140     th.set_field(clb097::TEXHEAD_BL_G_DATA_TYPE, fmt.type_g());
141     th.set_field(clb097::TEXHEAD_BL_B_DATA_TYPE, fmt.type_b());
142     th.set_field(clb097::TEXHEAD_BL_A_DATA_TYPE, fmt.type_a());
143     th.set_field(clb097::TEXHEAD_BL_X_SOURCE, source[0]);
144     th.set_field(clb097::TEXHEAD_BL_Y_SOURCE, source[1]);
145     th.set_field(clb097::TEXHEAD_BL_Z_SOURCE, source[2]);
146     th.set_field(clb097::TEXHEAD_BL_W_SOURCE, source[3]);
147 }
148 
pipe_to_nv_texture_type(ty: ViewType) -> u32149 fn pipe_to_nv_texture_type(ty: ViewType) -> u32 {
150     match ty {
151         ViewType::_1D => clb097::TEXHEAD_BL_TEXTURE_TYPE_ONE_D,
152         ViewType::_2D => clb097::TEXHEAD_BL_TEXTURE_TYPE_TWO_D,
153         ViewType::_3D | ViewType::_3DSliced => {
154             clb097::TEXHEAD_BL_TEXTURE_TYPE_THREE_D
155         }
156         ViewType::Cube => clb097::TEXHEAD_BL_TEXTURE_TYPE_CUBEMAP,
157         ViewType::_1DArray => clb097::TEXHEAD_BL_TEXTURE_TYPE_ONE_D_ARRAY,
158         ViewType::_2DArray => clb097::TEXHEAD_BL_TEXTURE_TYPE_TWO_D_ARRAY,
159         ViewType::CubeArray => clb097::TEXHEAD_BL_TEXTURE_TYPE_CUBEMAP_ARRAY,
160     }
161 }
162 
nil_rs_to_nv9097_multi_sample_count(sample_layout: SampleLayout) -> u32163 fn nil_rs_to_nv9097_multi_sample_count(sample_layout: SampleLayout) -> u32 {
164     match sample_layout {
165         SampleLayout::_1x1 => cl9097::TEXHEADV2_MULTI_SAMPLE_COUNT_MODE_1X1,
166         SampleLayout::_2x1 => cl9097::TEXHEADV2_MULTI_SAMPLE_COUNT_MODE_2X1,
167         SampleLayout::_2x2 => cl9097::TEXHEADV2_MULTI_SAMPLE_COUNT_MODE_2X2,
168         SampleLayout::_4x2 => cl9097::TEXHEADV2_MULTI_SAMPLE_COUNT_MODE_4X2,
169         SampleLayout::_4x4 => cl9097::TEXHEADV2_MULTI_SAMPLE_COUNT_MODE_4X4,
170         SampleLayout::Invalid => panic!("Invalid sample layout"),
171     }
172 }
173 
nil_rs_to_nvb097_multi_sample_count(sample_layout: SampleLayout) -> u32174 fn nil_rs_to_nvb097_multi_sample_count(sample_layout: SampleLayout) -> u32 {
175     match sample_layout {
176         SampleLayout::_1x1 => clb097::TEXHEAD_BL_MULTI_SAMPLE_COUNT_MODE_1X1,
177         SampleLayout::_2x1 => clb097::TEXHEAD_BL_MULTI_SAMPLE_COUNT_MODE_2X1,
178         SampleLayout::_2x2 => clb097::TEXHEAD_BL_MULTI_SAMPLE_COUNT_MODE_2X2,
179         SampleLayout::_4x2 => clb097::TEXHEAD_BL_MULTI_SAMPLE_COUNT_MODE_4X2,
180         SampleLayout::_4x4 => clb097::TEXHEAD_BL_MULTI_SAMPLE_COUNT_MODE_4X4,
181         SampleLayout::Invalid => panic!("Invalid sample layout"),
182     }
183 }
184 
nil_rs_max_mip_level(image: &Image, view: &View) -> u32185 fn nil_rs_max_mip_level(image: &Image, view: &View) -> u32 {
186     if view.view_type != ViewType::_3D
187         && view.array_len == 1
188         && view.base_level == 0
189         && view.num_levels == 1
190     {
191         // The Unnormalized coordinates bit in the sampler gets ignored if the
192         // referenced image has more than one miplevel.  Fortunately, Vulkan has
193         // restrictions requiring the view to be a single-layer single-LOD view
194         // in order to use nonnormalizedCoordinates = VK_TRUE in the sampler.
195         // From the Vulkan 1.3.255 spec:
196         //
197         //  "When unnormalizedCoordinates is VK_TRUE, images the sampler is
198         //  used with in the shader have the following requirements:
199         //
200         //      - The viewType must be either VK_IMAGE_VIEW_TYPE_1D or
201         //        VK_IMAGE_VIEW_TYPE_2D.
202         //      - The image view must have a single layer and a single mip
203         //        level."
204         //
205         // Under these conditions, the view is simply LOD 0 of a single array
206         // slice so we don't need to care about aray stride between slices so
207         // it's safe to set the number of miplevels to 0 regardless of how many
208         // the image actually has.
209         0
210     } else {
211         image.num_levels - 1
212     }
213 }
214 
normalize_extent(image: &Image, view: &View) -> Extent4D<units::Pixels>215 fn normalize_extent(image: &Image, view: &View) -> Extent4D<units::Pixels> {
216     let mut extent = image.extent_px;
217     match view.view_type {
218         ViewType::_1D
219         | ViewType::_2D
220         | ViewType::_1DArray
221         | ViewType::_2DArray => {
222             assert!(image.extent_px.depth == 1);
223             extent.depth = view.array_len;
224         }
225         ViewType::_3D => {
226             assert!(image.dim == ImageDim::_3D);
227             extent.depth = image.extent_px.depth;
228         }
229         ViewType::Cube | ViewType::CubeArray => {
230             assert!(image.dim == ImageDim::_2D);
231             assert!(view.array_len % 6 == 0);
232             extent.depth = view.array_len / 6;
233         }
234         ViewType::_3DSliced => {
235             assert!(image.dim == ImageDim::_3D);
236             extent.depth = view.array_len;
237         }
238     }
239     extent.array_len = 0;
240 
241     extent
242 }
243 
nv9097_fill_tic( image: &Image, view: &View, base_address: u64, desc_out: &mut [u32; 8], )244 fn nv9097_fill_tic(
245     image: &Image,
246     view: &View,
247     base_address: u64,
248     desc_out: &mut [u32; 8],
249 ) {
250     assert!(image.format.el_size_B() == view.format.el_size_B());
251     assert!(view.base_level + view.num_levels <= image.num_levels);
252     assert!(
253         view.base_array_layer + view.array_len <= image.extent_px.array_len
254     );
255 
256     *desc_out = [0u32; 8];
257     let mut th = BitMutView::new(desc_out);
258 
259     th.set_field(cl9097::TEXHEADV2_USE_TEXTURE_HEADER_VERSION2, true);
260     nv9097_set_th_v2_0(&mut th, &view.format, view.swizzle);
261 
262     // There's no base layer field in the texture header
263     let layer_address = base_address
264         + u64::from(view.base_array_layer)
265         + u64::from(image.array_stride_B);
266 
267     th.set_field(cl9097::TEXHEADV2_OFFSET_LOWER, layer_address as u32);
268     th.set_field(cl9097::TEXHEADV2_OFFSET_UPPER, (layer_address >> 32) as u32);
269 
270     let tiling = &image.levels[0].tiling;
271 
272     if tiling.is_tiled {
273         set_enum!(th, cl9097, TEXHEADV2_MEMORY_LAYOUT, BLOCKLINEAR);
274 
275         assert!(tiling.gob_height_is_8);
276         assert!(tiling.x_log2 == 0);
277         set_enum!(th, cl9097, TEXHEADV2_GOBS_PER_BLOCK_WIDTH, ONE_GOB);
278         th.set_field(cl9097::TEXHEADV2_GOBS_PER_BLOCK_HEIGHT, tiling.y_log2);
279         th.set_field(cl9097::TEXHEADV2_GOBS_PER_BLOCK_DEPTH, tiling.z_log2);
280 
281         let nv_tex_type = pipe_to_nv_texture_type(view.view_type);
282         th.set_field(cl9097::TEXHEADV2_TEXTURE_TYPE, nv_tex_type);
283     } else {
284         set_enum!(th, cl9097, TEXHEADV2_MEMORY_LAYOUT, PITCH);
285 
286         let pitch = image.levels[0].row_stride_B;
287         th.set_field(cl9097::TEXHEADV2_PITCH, pitch);
288 
289         assert!(
290             view.view_type == ViewType::_2D
291                 || view.view_type == ViewType::_2DArray
292         );
293         assert!(image.sample_layout == SampleLayout::_1x1);
294         assert!(view.num_levels == 1);
295         set_enum!(th, cl9097, TEXHEADV2_TEXTURE_TYPE, TWO_D_NO_MIPMAP);
296     }
297 
298     set_enum!(th, cl9097, TEXHEADV2_LOD_ANISO_QUALITY, LOD_QUALITY_HIGH);
299     set_enum!(th, cl9097, TEXHEADV2_LOD_ISO_QUALITY, LOD_QUALITY_HIGH);
300     set_enum!(
301         th,
302         cl9097,
303         TEXHEADV2_ANISO_COARSE_SPREAD_MODIFIER,
304         SPREAD_MODIFIER_NONE
305     );
306 
307     let extent = normalize_extent(image, view);
308     th.set_field(cl9097::TEXHEADV2_WIDTH, extent.width);
309     th.set_field(cl9097::TEXHEADV2_HEIGHT, extent.height);
310     th.set_field(cl9097::TEXHEADV2_DEPTH, extent.depth);
311 
312     let max_mip_level = nil_rs_max_mip_level(image, view);
313     th.set_field(cl9097::TEXHEADV2_MAX_MIP_LEVEL, max_mip_level);
314 
315     th.set_field(cl9097::TEXHEADV2_S_R_G_B_CONVERSION, view.format.is_srgb());
316 
317     set_enum!(th, cl9097, TEXHEADV2_BORDER_SOURCE, BORDER_COLOR);
318 
319     // In the sampler, the two options for FLOAT_COORD_NORMALIZATION are:
320     //
321     // - FORCE_UNNORMALIZED_COORDS
322     // - USE_HEADER_SETTING
323     //
324     // So we set it to normalized in the header and let the sampler select that
325     // or force non-normalized.
326     th.set_field(cl9097::TEXHEADV2_NORMALIZED_COORDS, true);
327 
328     set_enum!(
329         th,
330         cl9097,
331         TEXHEADV2_ANISO_FINE_SPREAD_FUNC,
332         SPREAD_FUNC_TWO
333     );
334     set_enum!(
335         th,
336         cl9097,
337         TEXHEADV2_ANISO_COARSE_SPREAD_FUNC,
338         SPREAD_FUNC_ONE
339     );
340 
341     th.set_field(cl9097::TEXHEADV2_RES_VIEW_MIN_MIP_LEVEL, view.base_level);
342     th.set_field(
343         cl9097::TEXHEADV2_RES_VIEW_MAX_MIP_LEVEL,
344         view.num_levels + view.base_level - 1,
345     );
346 
347     let msc = nil_rs_to_nv9097_multi_sample_count(image.sample_layout);
348     th.set_field(cl9097::TEXHEADV2_MULTI_SAMPLE_COUNT, msc);
349 
350     let min_lod_clamp = view.min_lod_clamp - (view.base_level as f32);
351     th.set_ufixed(cl9097::TEXHEADV2_MIN_LOD_CLAMP, min_lod_clamp);
352 }
353 
nvb097_fill_tic( dev: &nil_rs_bindings::nv_device_info, image: &Image, view: &View, base_address: u64, desc_out: &mut [u32; 8], )354 fn nvb097_fill_tic(
355     dev: &nil_rs_bindings::nv_device_info,
356     image: &Image,
357     view: &View,
358     base_address: u64,
359     desc_out: &mut [u32; 8],
360 ) {
361     assert!(image.format.el_size_B() == view.format.el_size_B());
362     assert!(view.base_level + view.num_levels <= image.num_levels);
363 
364     *desc_out = [0u32; 8];
365     let mut th = BitMutView::new(desc_out);
366 
367     nvb097_set_th_bl_0(&mut th, &view.format, view.swizzle);
368 
369     let tiling = &image.levels[0].tiling;
370 
371     // There's no base layer field in the texture header
372     let mut layer_address = base_address;
373     if view.view_type == ViewType::_3DSliced {
374         assert!(view.num_levels == 1);
375         assert!(
376             view.base_array_layer + view.array_len <= image.extent_px.depth
377         );
378 
379         layer_address +=
380             image.level_z_offset_B(view.base_level, view.base_array_layer);
381     } else {
382         assert!(
383             view.base_array_layer + view.array_len <= image.extent_px.array_len
384         );
385         layer_address +=
386             u64::from(view.base_array_layer) * u64::from(image.array_stride_B);
387     }
388 
389     if tiling.is_tiled {
390         set_enum!(th, clb097, TEXHEAD_BL_HEADER_VERSION, SELECT_BLOCKLINEAR);
391 
392         let addr = BitView::new(&layer_address);
393         assert!(addr.get_bit_range_u64(0..9) == 0);
394         th.set_field(
395             clb097::TEXHEAD_BL_ADDRESS_BITS31TO9,
396             addr.get_bit_range_u64(9..32),
397         );
398         th.set_field(
399             clb097::TEXHEAD_BL_ADDRESS_BITS47TO32,
400             addr.get_bit_range_u64(32..48),
401         );
402         assert!(addr.get_bit_range_u64(48..64) == 0);
403 
404         assert!(tiling.gob_height_is_8);
405 
406         set_enum!(th, clb097, TEXHEAD_BL_GOBS_PER_BLOCK_WIDTH, ONE_GOB);
407         th.set_field(clb097::TEXHEAD_BL_GOBS_PER_BLOCK_HEIGHT, tiling.y_log2);
408         th.set_field(clb097::TEXHEAD_BL_GOBS_PER_BLOCK_DEPTH, tiling.z_log2);
409         th.set_field(clb097::TEXHEAD_BL_TILE_WIDTH_IN_GOBS, tiling.x_log2);
410 
411         let nv_text_type = pipe_to_nv_texture_type(view.view_type);
412         th.set_field(clb097::TEXHEAD_BL_TEXTURE_TYPE, nv_text_type);
413     } else {
414         set_enum!(th, clb097, TEXHEAD_BL_HEADER_VERSION, SELECT_PITCH);
415 
416         let addr = BitView::new(&layer_address);
417         assert!(addr.get_bit_range_u64(0..5) == 0);
418         th.set_field(
419             clb097::TEXHEAD_PITCH_ADDRESS_BITS31TO5,
420             addr.get_bit_range_u64(5..32),
421         );
422         th.set_field(
423             clb097::TEXHEAD_PITCH_ADDRESS_BITS47TO32,
424             addr.get_bit_range_u64(32..48),
425         );
426         assert!(addr.get_bit_range_u64(48..64) == 0);
427 
428         let pitch = image.levels[0].row_stride_B;
429         let pitch = BitView::new(&pitch);
430         assert!(pitch.get_bit_range_u64(0..5) == 0);
431         assert!(pitch.get_bit_range_u64(21..32) == 0);
432         th.set_field(
433             clb097::TEXHEAD_PITCH_PITCH_BITS20TO5,
434             pitch.get_bit_range_u64(5..21),
435         );
436 
437         assert!(
438             view.view_type == ViewType::_2D
439                 || view.view_type == ViewType::_2DArray
440         );
441         assert!(image.sample_layout == SampleLayout::_1x1);
442         assert!(view.num_levels == 1);
443         set_enum!(th, clb097, TEXHEAD_PITCH_TEXTURE_TYPE, TWO_D_NO_MIPMAP);
444     }
445 
446     th.set_field(clb097::TEXHEAD_BL_LOD_ANISO_QUALITY2, true);
447     set_enum!(th, clb097, TEXHEAD_BL_LOD_ANISO_QUALITY, LOD_QUALITY_HIGH);
448     set_enum!(th, clb097, TEXHEAD_BL_LOD_ISO_QUALITY, LOD_QUALITY_HIGH);
449     set_enum!(
450         th,
451         clb097,
452         TEXHEAD_BL_ANISO_COARSE_SPREAD_MODIFIER,
453         SPREAD_MODIFIER_NONE
454     );
455 
456     let extent = normalize_extent(image, view);
457     th.set_field(clb097::TEXHEAD_BL_WIDTH_MINUS_ONE, extent.width - 1);
458 
459     if dev.cls_eng3d >= PASCAL_A {
460         let height_1 = extent.height - 1;
461         let depth_1 = extent.depth - 1;
462         th.set_field(clc097::TEXHEAD_BL_HEIGHT_MINUS_ONE, height_1 & 0xffff);
463         th.set_field(clc097::TEXHEAD_BL_HEIGHT_MINUS_ONE_BIT16, height_1 >> 16);
464         th.set_field(clc097::TEXHEAD_BL_DEPTH_MINUS_ONE, depth_1 & 0x3fff);
465         th.set_field(clc097::TEXHEAD_BL_DEPTH_MINUS_ONE_BIT14, depth_1 >> 14);
466     } else {
467         th.set_field(clb097::TEXHEAD_BL_HEIGHT_MINUS_ONE, extent.height - 1);
468         th.set_field(clb097::TEXHEAD_BL_DEPTH_MINUS_ONE, extent.depth - 1);
469     }
470 
471     let max_mip_level = nil_rs_max_mip_level(image, view);
472     th.set_field(clb097::TEXHEAD_BL_MAX_MIP_LEVEL, max_mip_level);
473 
474     th.set_field(clb097::TEXHEAD_BL_S_R_G_B_CONVERSION, view.format.is_srgb());
475 
476     set_enum!(th, clb097, TEXHEAD_BL_SECTOR_PROMOTION, PROMOTE_TO_2_V);
477     set_enum!(th, clb097, TEXHEAD_BL_BORDER_SIZE, BORDER_SAMPLER_COLOR);
478 
479     // In the sampler, the two options for FLOAT_COORD_NORMALIZATION are:
480     //
481     // - FORCE_UNNORMALIZED_COORDS
482     // - USE_HEADER_SETTING
483     //
484     // So we set it to normalized in the header and let the sampler select that
485     // or force non-normalized.
486 
487     th.set_field(clb097::TEXHEAD_BL_NORMALIZED_COORDS, true);
488     set_enum!(
489         th,
490         clb097,
491         TEXHEAD_BL_ANISO_FINE_SPREAD_FUNC,
492         SPREAD_FUNC_TWO
493     );
494     set_enum!(
495         th,
496         clb097,
497         TEXHEAD_BL_ANISO_COARSE_SPREAD_FUNC,
498         SPREAD_FUNC_ONE
499     );
500 
501     th.set_field(clb097::TEXHEAD_BL_RES_VIEW_MIN_MIP_LEVEL, view.base_level);
502 
503     let max_mip_level = view.num_levels + view.base_level - 1;
504     th.set_field(clb097::TEXHEAD_BL_RES_VIEW_MAX_MIP_LEVEL, max_mip_level);
505 
506     let msc = nil_rs_to_nvb097_multi_sample_count(image.sample_layout);
507     th.set_field(clb097::TEXHEAD_BL_MULTI_SAMPLE_COUNT, msc);
508 
509     let min_lod_clamp = view.min_lod_clamp - (view.base_level as f32);
510     th.set_ufixed(clb097::TEXHEAD_BL_MIN_LOD_CLAMP, min_lod_clamp);
511 }
512 
513 pub const IDENTITY_SWIZZLE: [nil_rs_bindings::pipe_swizzle; 4] = [
514     nil_rs_bindings::PIPE_SWIZZLE_X,
515     nil_rs_bindings::PIPE_SWIZZLE_Y,
516     nil_rs_bindings::PIPE_SWIZZLE_Z,
517     nil_rs_bindings::PIPE_SWIZZLE_W,
518 ];
519 
nv9097_nil_fill_buffer_tic( base_address: u64, format: Format, num_elements: u32, desc_out: &mut [u32; 8], )520 fn nv9097_nil_fill_buffer_tic(
521     base_address: u64,
522     format: Format,
523     num_elements: u32,
524     desc_out: &mut [u32; 8],
525 ) {
526     *desc_out = [0u32; 8];
527     let mut th = BitMutView::new(desc_out);
528     th.set_field(cl9097::TEXHEADV2_USE_TEXTURE_HEADER_VERSION2, true);
529 
530     assert!(format.supports_buffer());
531     nv9097_set_th_v2_0(&mut th, &format, IDENTITY_SWIZZLE);
532 
533     th.set_field(cl9097::TEXHEADV2_OFFSET_LOWER, base_address as u32);
534     th.set_field(cl9097::TEXHEADV2_OFFSET_UPPER, (base_address >> 32) as u32);
535 
536     set_enum!(th, cl9097, TEXHEADV2_MEMORY_LAYOUT, PITCH);
537 
538     th.set_field(cl9097::TEXHEADV2_WIDTH, num_elements);
539 
540     set_enum!(th, cl9097, TEXHEADV2_TEXTURE_TYPE, ONE_D_BUFFER);
541 }
542 
nvb097_nil_fill_buffer_tic( base_address: u64, format: Format, num_elements: u32, desc_out: &mut [u32; 8], )543 fn nvb097_nil_fill_buffer_tic(
544     base_address: u64,
545     format: Format,
546     num_elements: u32,
547     desc_out: &mut [u32; 8],
548 ) {
549     *desc_out = [0u32; 8];
550     let mut th = BitMutView::new(desc_out);
551 
552     assert!(format.supports_buffer());
553     nvb097_set_th_bl_0(&mut th, &format, IDENTITY_SWIZZLE);
554 
555     th.set_field(clb097::TEXHEAD_1D_ADDRESS_BITS31TO0, base_address as u32);
556     th.set_field(clb097::TEXHEAD_1D_ADDRESS_BITS47TO32, base_address >> 32);
557 
558     set_enum!(th, clb097, TEXHEAD_1D_HEADER_VERSION, SELECT_ONE_D_BUFFER);
559 
560     th.set_field(
561         clb097::TEXHEAD_1D_WIDTH_MINUS_ONE_BITS15TO0,
562         (num_elements - 1) & 0xffff,
563     );
564     th.set_field(
565         clb097::TEXHEAD_1D_WIDTH_MINUS_ONE_BITS31TO16,
566         (num_elements - 1) >> 16,
567     );
568 
569     set_enum!(th, clb097, TEXHEAD_1D_TEXTURE_TYPE, ONE_D_BUFFER);
570 
571     // TODO: Do we need this?
572     set_enum!(th, clb097, TEXHEAD_1D_SECTOR_PROMOTION, PROMOTE_TO_2_V);
573 }
574 
575 impl Image {
576     #[no_mangle]
nil_image_fill_tic( &self, dev: &nil_rs_bindings::nv_device_info, view: &View, base_address: u64, desc_out: &mut [u32; 8], )577     pub extern "C" fn nil_image_fill_tic(
578         &self,
579         dev: &nil_rs_bindings::nv_device_info,
580         view: &View,
581         base_address: u64,
582         desc_out: &mut [u32; 8],
583     ) {
584         self.fill_tic(dev, view, base_address, desc_out);
585     }
586 
fill_tic( &self, dev: &nil_rs_bindings::nv_device_info, view: &View, base_address: u64, desc_out: &mut [u32; 8], )587     pub fn fill_tic(
588         &self,
589         dev: &nil_rs_bindings::nv_device_info,
590         view: &View,
591         base_address: u64,
592         desc_out: &mut [u32; 8],
593     ) {
594         if dev.cls_eng3d >= MAXWELL_A {
595             nvb097_fill_tic(dev, self, view, base_address, desc_out);
596         } else if dev.cls_eng3d >= FERMI_A {
597             nv9097_fill_tic(self, view, base_address, desc_out);
598         } else {
599             panic!("Tesla and older not supported");
600         }
601     }
602 }
603 
604 #[no_mangle]
nil_buffer_fill_tic( dev: &nil_rs_bindings::nv_device_info, base_address: u64, format: Format, num_elements: u32, desc_out: &mut [u32; 8], )605 pub extern "C" fn nil_buffer_fill_tic(
606     dev: &nil_rs_bindings::nv_device_info,
607     base_address: u64,
608     format: Format,
609     num_elements: u32,
610     desc_out: &mut [u32; 8],
611 ) {
612     fill_buffer_tic(dev, base_address, format, num_elements, desc_out);
613 }
614 
fill_buffer_tic( dev: &nil_rs_bindings::nv_device_info, base_address: u64, format: Format, num_elements: u32, desc_out: &mut [u32; 8], )615 pub fn fill_buffer_tic(
616     dev: &nil_rs_bindings::nv_device_info,
617     base_address: u64,
618     format: Format,
619     num_elements: u32,
620     desc_out: &mut [u32; 8],
621 ) {
622     if dev.cls_eng3d >= MAXWELL_A {
623         nvb097_nil_fill_buffer_tic(base_address, format, num_elements, desc_out)
624     } else if dev.cls_eng3d >= FERMI_A {
625         nv9097_nil_fill_buffer_tic(base_address, format, num_elements, desc_out)
626     } else {
627         panic!("Tesla and older not supported");
628     }
629 }
630 
631 pub const ZERO_SWIZZLE: [nil_rs_bindings::pipe_swizzle; 4] = [
632     nil_rs_bindings::PIPE_SWIZZLE_0,
633     nil_rs_bindings::PIPE_SWIZZLE_0,
634     nil_rs_bindings::PIPE_SWIZZLE_0,
635     nil_rs_bindings::PIPE_SWIZZLE_0,
636 ];
637 
nv9097_fill_null_tic(zero_page_address: u64, desc_out: &mut [u32; 8])638 fn nv9097_fill_null_tic(zero_page_address: u64, desc_out: &mut [u32; 8]) {
639     *desc_out = [0u32; 8];
640     let mut th = BitMutView::new(desc_out);
641 
642     th.set_field(cl9097::TEXHEADV2_USE_TEXTURE_HEADER_VERSION2, true);
643     let format = Format::try_from(PIPE_FORMAT_R8_UNORM).unwrap();
644     nvb097_set_th_bl_0(&mut th, &format, ZERO_SWIZZLE);
645 
646     th.set_field(cl9097::TEXHEADV2_OFFSET_LOWER, zero_page_address as u32);
647     th.set_field(
648         cl9097::TEXHEADV2_OFFSET_UPPER,
649         (zero_page_address >> 32) as u32,
650     );
651 
652     set_enum!(th, cl9097, TEXHEADV2_MEMORY_LAYOUT, BLOCKLINEAR);
653     set_enum!(th, cl9097, TEXHEADV2_TEXTURE_TYPE, TWO_D_ARRAY);
654     th.set_field(cl9097::TEXHEADV2_NORMALIZED_COORDS, true);
655 
656     th.set_field(cl9097::TEXHEADV2_RES_VIEW_MIN_MIP_LEVEL, 1_u8);
657     th.set_field(cl9097::TEXHEADV2_RES_VIEW_MAX_MIP_LEVEL, 0_u8);
658 }
659 
nvb097_fill_null_tic(zero_page_address: u64, desc_out: &mut [u32; 8])660 fn nvb097_fill_null_tic(zero_page_address: u64, desc_out: &mut [u32; 8]) {
661     *desc_out = [0u32; 8];
662     let mut th = BitMutView::new(desc_out);
663 
664     let format = Format::try_from(PIPE_FORMAT_R8_UNORM).unwrap();
665     nvb097_set_th_bl_0(&mut th, &format, ZERO_SWIZZLE);
666 
667     set_enum!(th, clb097, TEXHEAD_BL_HEADER_VERSION, SELECT_BLOCKLINEAR);
668 
669     let addr = BitView::new(&zero_page_address);
670     assert!(addr.get_bit_range_u64(0..9) == 0);
671     th.set_field(
672         clb097::TEXHEAD_BL_ADDRESS_BITS31TO9,
673         addr.get_bit_range_u64(9..32),
674     );
675     th.set_field(
676         clb097::TEXHEAD_BL_ADDRESS_BITS47TO32,
677         addr.get_bit_range_u64(32..48),
678     );
679     assert!(addr.get_bit_range_u64(48..64) == 0);
680 
681     set_enum!(th, clb097, TEXHEAD_BL_TEXTURE_TYPE, TWO_D_ARRAY);
682     set_enum!(th, clb097, TEXHEAD_BL_BORDER_SIZE, BORDER_SAMPLER_COLOR);
683     th.set_field(cl9097::TEXHEADV2_NORMALIZED_COORDS, true);
684 
685     th.set_field(cl9097::TEXHEADV2_RES_VIEW_MIN_MIP_LEVEL, 1_u8);
686     th.set_field(cl9097::TEXHEADV2_RES_VIEW_MAX_MIP_LEVEL, 0_u8);
687 
688     // This is copied from the D3D12 driver. I have no idea what these bits do
689     // or if they even do anything.
690     th.set_field(clb097::TEXHEAD_BL_RESERVED4A, 0x4_u8);
691     th.set_field(clb097::TEXHEAD_BL_RESERVED7Y, 0x80_u8);
692 }
693 
fill_null_tic( dev: &nil_rs_bindings::nv_device_info, zero_page_address: u64, desc_out: &mut [u32; 8], )694 pub fn fill_null_tic(
695     dev: &nil_rs_bindings::nv_device_info,
696     zero_page_address: u64,
697     desc_out: &mut [u32; 8],
698 ) {
699     if dev.cls_eng3d >= VOLTA_A {
700         // On Volta+, we can just fill with zeros
701         *desc_out = [0; 8]
702     } else if dev.cls_eng3d >= MAXWELL_A {
703         nvb097_fill_null_tic(zero_page_address, desc_out)
704     } else if dev.cls_eng3d >= FERMI_A {
705         nv9097_fill_null_tic(zero_page_address, desc_out)
706     } else {
707         panic!("Tesla and older not supported");
708     }
709 }
710 
711 #[no_mangle]
nil_fill_null_tic( dev: &nil_rs_bindings::nv_device_info, zero_page_address: u64, desc_out: &mut [u32; 8], )712 pub extern "C" fn nil_fill_null_tic(
713     dev: &nil_rs_bindings::nv_device_info,
714     zero_page_address: u64,
715     desc_out: &mut [u32; 8],
716 ) {
717     fill_null_tic(dev, zero_page_address, desc_out);
718 }
719