1*61046927SAndroid Build Coastguard Worker /*
2*61046927SAndroid Build Coastguard Worker * Copyright (c) 2011-2013 Luc Verhaegen <[email protected]>
3*61046927SAndroid Build Coastguard Worker * Copyright (c) 2018 Alyssa Rosenzweig <[email protected]>
4*61046927SAndroid Build Coastguard Worker * Copyright (c) 2018 Vasily Khoruzhick <[email protected]>
5*61046927SAndroid Build Coastguard Worker * Copyright (c) 2019 Collabora, Ltd.
6*61046927SAndroid Build Coastguard Worker *
7*61046927SAndroid Build Coastguard Worker * Permission is hereby granted, free of charge, to any person obtaining a
8*61046927SAndroid Build Coastguard Worker * copy of this software and associated documentation files (the "Software"),
9*61046927SAndroid Build Coastguard Worker * to deal in the Software without restriction, including without limitation
10*61046927SAndroid Build Coastguard Worker * the rights to use, copy, modify, merge, publish, distribute, sub license,
11*61046927SAndroid Build Coastguard Worker * and/or sell copies of the Software, and to permit persons to whom the
12*61046927SAndroid Build Coastguard Worker * Software is furnished to do so, subject to the following conditions:
13*61046927SAndroid Build Coastguard Worker *
14*61046927SAndroid Build Coastguard Worker * The above copyright notice and this permission notice (including the
15*61046927SAndroid Build Coastguard Worker * next paragraph) shall be included in all copies or substantial portions
16*61046927SAndroid Build Coastguard Worker * of the Software.
17*61046927SAndroid Build Coastguard Worker *
18*61046927SAndroid Build Coastguard Worker * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19*61046927SAndroid Build Coastguard Worker * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20*61046927SAndroid Build Coastguard Worker * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21*61046927SAndroid Build Coastguard Worker * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22*61046927SAndroid Build Coastguard Worker * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23*61046927SAndroid Build Coastguard Worker * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24*61046927SAndroid Build Coastguard Worker * DEALINGS IN THE SOFTWARE.
25*61046927SAndroid Build Coastguard Worker *
26*61046927SAndroid Build Coastguard Worker */
27*61046927SAndroid Build Coastguard Worker
28*61046927SAndroid Build Coastguard Worker #include "pan_tiling.h"
29*61046927SAndroid Build Coastguard Worker #include <stdbool.h>
30*61046927SAndroid Build Coastguard Worker #include "util/bitscan.h"
31*61046927SAndroid Build Coastguard Worker #include "util/macros.h"
32*61046927SAndroid Build Coastguard Worker
33*61046927SAndroid Build Coastguard Worker /*
34*61046927SAndroid Build Coastguard Worker * This file implements software encode/decode of u-interleaved textures.
35*61046927SAndroid Build Coastguard Worker * See docs/drivers/panfrost.rst for details on the format.
36*61046927SAndroid Build Coastguard Worker *
37*61046927SAndroid Build Coastguard Worker * The tricky bit is ordering along the space-filling curve:
38*61046927SAndroid Build Coastguard Worker *
39*61046927SAndroid Build Coastguard Worker * | y3 | (x3 ^ y3) | y2 | (y2 ^ x2) | y1 | (y1 ^ x1) | y0 | (y0 ^ x0) |
40*61046927SAndroid Build Coastguard Worker *
41*61046927SAndroid Build Coastguard Worker * While interleaving bits is trivial in hardware, it is nontrivial in software.
42*61046927SAndroid Build Coastguard Worker * The trick is to divide the pattern up:
43*61046927SAndroid Build Coastguard Worker *
44*61046927SAndroid Build Coastguard Worker * | y3 | y3 | y2 | y2 | y1 | y1 | y0 | y0 |
45*61046927SAndroid Build Coastguard Worker * ^ | 0 | x3 | 0 | x2 | 0 | x1 | 0 | x0 |
46*61046927SAndroid Build Coastguard Worker *
47*61046927SAndroid Build Coastguard Worker * That is, duplicate the bits of the Y and space out the bits of the X. The top
48*61046927SAndroid Build Coastguard Worker * line is a function only of Y, so it can be calculated once per row and stored
49*61046927SAndroid Build Coastguard Worker * in a register. The bottom line is simply X with the bits spaced out. Spacing
50*61046927SAndroid Build Coastguard Worker * out the X is easy enough with a LUT, or by subtracting+ANDing the mask
51*61046927SAndroid Build Coastguard Worker * pattern (abusing carry bits).
52*61046927SAndroid Build Coastguard Worker *
53*61046927SAndroid Build Coastguard Worker */
54*61046927SAndroid Build Coastguard Worker
55*61046927SAndroid Build Coastguard Worker /*
56*61046927SAndroid Build Coastguard Worker * Given the lower 4-bits of the Y coordinate, we would like to
57*61046927SAndroid Build Coastguard Worker * duplicate every bit over. So instead of 0b1010, we would like
58*61046927SAndroid Build Coastguard Worker * 0b11001100. The idea is that for the bits in the solely Y place, we
59*61046927SAndroid Build Coastguard Worker * get a Y place, and the bits in the XOR place *also* get a Y.
60*61046927SAndroid Build Coastguard Worker */
61*61046927SAndroid Build Coastguard Worker /* clang-format off */
62*61046927SAndroid Build Coastguard Worker const uint32_t bit_duplication[16] = {
63*61046927SAndroid Build Coastguard Worker 0b00000000,
64*61046927SAndroid Build Coastguard Worker 0b00000011,
65*61046927SAndroid Build Coastguard Worker 0b00001100,
66*61046927SAndroid Build Coastguard Worker 0b00001111,
67*61046927SAndroid Build Coastguard Worker 0b00110000,
68*61046927SAndroid Build Coastguard Worker 0b00110011,
69*61046927SAndroid Build Coastguard Worker 0b00111100,
70*61046927SAndroid Build Coastguard Worker 0b00111111,
71*61046927SAndroid Build Coastguard Worker 0b11000000,
72*61046927SAndroid Build Coastguard Worker 0b11000011,
73*61046927SAndroid Build Coastguard Worker 0b11001100,
74*61046927SAndroid Build Coastguard Worker 0b11001111,
75*61046927SAndroid Build Coastguard Worker 0b11110000,
76*61046927SAndroid Build Coastguard Worker 0b11110011,
77*61046927SAndroid Build Coastguard Worker 0b11111100,
78*61046927SAndroid Build Coastguard Worker 0b11111111,
79*61046927SAndroid Build Coastguard Worker };
80*61046927SAndroid Build Coastguard Worker /* clang-format on */
81*61046927SAndroid Build Coastguard Worker
82*61046927SAndroid Build Coastguard Worker /*
83*61046927SAndroid Build Coastguard Worker * Space the bits out of a 4-bit nibble
84*61046927SAndroid Build Coastguard Worker */
85*61046927SAndroid Build Coastguard Worker /* clang-format off */
86*61046927SAndroid Build Coastguard Worker const unsigned space_4[16] = {
87*61046927SAndroid Build Coastguard Worker 0b0000000,
88*61046927SAndroid Build Coastguard Worker 0b0000001,
89*61046927SAndroid Build Coastguard Worker 0b0000100,
90*61046927SAndroid Build Coastguard Worker 0b0000101,
91*61046927SAndroid Build Coastguard Worker 0b0010000,
92*61046927SAndroid Build Coastguard Worker 0b0010001,
93*61046927SAndroid Build Coastguard Worker 0b0010100,
94*61046927SAndroid Build Coastguard Worker 0b0010101,
95*61046927SAndroid Build Coastguard Worker 0b1000000,
96*61046927SAndroid Build Coastguard Worker 0b1000001,
97*61046927SAndroid Build Coastguard Worker 0b1000100,
98*61046927SAndroid Build Coastguard Worker 0b1000101,
99*61046927SAndroid Build Coastguard Worker 0b1010000,
100*61046927SAndroid Build Coastguard Worker 0b1010001,
101*61046927SAndroid Build Coastguard Worker 0b1010100,
102*61046927SAndroid Build Coastguard Worker 0b1010101
103*61046927SAndroid Build Coastguard Worker };
104*61046927SAndroid Build Coastguard Worker /* clang-format on */
105*61046927SAndroid Build Coastguard Worker
106*61046927SAndroid Build Coastguard Worker /* The scheme uses 16x16 tiles */
107*61046927SAndroid Build Coastguard Worker
108*61046927SAndroid Build Coastguard Worker #define TILE_WIDTH 16
109*61046927SAndroid Build Coastguard Worker #define TILE_HEIGHT 16
110*61046927SAndroid Build Coastguard Worker #define PIXELS_PER_TILE (TILE_WIDTH * TILE_HEIGHT)
111*61046927SAndroid Build Coastguard Worker
112*61046927SAndroid Build Coastguard Worker /* We need a 128-bit type for idiomatically tiling bpp128 formats. The type must
113*61046927SAndroid Build Coastguard Worker * only support copies and sizeof, so emulating with a packed structure works
114*61046927SAndroid Build Coastguard Worker * well enough, but if there's a native 128-bit type we may we well prefer
115*61046927SAndroid Build Coastguard Worker * that. */
116*61046927SAndroid Build Coastguard Worker
117*61046927SAndroid Build Coastguard Worker #ifdef __SIZEOF_INT128__
118*61046927SAndroid Build Coastguard Worker typedef __uint128_t pan_uint128_t;
119*61046927SAndroid Build Coastguard Worker #else
120*61046927SAndroid Build Coastguard Worker typedef struct {
121*61046927SAndroid Build Coastguard Worker uint64_t lo;
122*61046927SAndroid Build Coastguard Worker uint64_t hi;
123*61046927SAndroid Build Coastguard Worker } __attribute__((packed)) pan_uint128_t;
124*61046927SAndroid Build Coastguard Worker #endif
125*61046927SAndroid Build Coastguard Worker
126*61046927SAndroid Build Coastguard Worker typedef struct {
127*61046927SAndroid Build Coastguard Worker uint16_t lo;
128*61046927SAndroid Build Coastguard Worker uint8_t hi;
129*61046927SAndroid Build Coastguard Worker } __attribute__((packed)) pan_uint24_t;
130*61046927SAndroid Build Coastguard Worker
131*61046927SAndroid Build Coastguard Worker typedef struct {
132*61046927SAndroid Build Coastguard Worker uint32_t lo;
133*61046927SAndroid Build Coastguard Worker uint16_t hi;
134*61046927SAndroid Build Coastguard Worker } __attribute__((packed)) pan_uint48_t;
135*61046927SAndroid Build Coastguard Worker
136*61046927SAndroid Build Coastguard Worker typedef struct {
137*61046927SAndroid Build Coastguard Worker uint64_t lo;
138*61046927SAndroid Build Coastguard Worker uint32_t hi;
139*61046927SAndroid Build Coastguard Worker } __attribute__((packed)) pan_uint96_t;
140*61046927SAndroid Build Coastguard Worker
141*61046927SAndroid Build Coastguard Worker /* Optimized routine to tile an aligned (w & 0xF == 0) texture. Explanation:
142*61046927SAndroid Build Coastguard Worker *
143*61046927SAndroid Build Coastguard Worker * dest_start precomputes the offset to the beginning of the first horizontal
144*61046927SAndroid Build Coastguard Worker * tile we're writing to, knowing that x is 16-aligned. Tiles themselves are
145*61046927SAndroid Build Coastguard Worker * stored linearly, so we get the X tile number by shifting and then multiply
146*61046927SAndroid Build Coastguard Worker * by the bytes per tile .
147*61046927SAndroid Build Coastguard Worker *
148*61046927SAndroid Build Coastguard Worker * We iterate across the pixels we're trying to store in source-order. For each
149*61046927SAndroid Build Coastguard Worker * row in the destination image, we figure out which row of 16x16 block we're
150*61046927SAndroid Build Coastguard Worker * in, by slicing off the lower 4-bits (block_y).
151*61046927SAndroid Build Coastguard Worker *
152*61046927SAndroid Build Coastguard Worker * dest then precomputes the location of the top-left corner of the block the
153*61046927SAndroid Build Coastguard Worker * row starts in. In pixel coordinates (where the origin is the top-left),
154*61046927SAndroid Build Coastguard Worker * (block_y, 0) is the top-left corner of the leftmost tile in this row. While
155*61046927SAndroid Build Coastguard Worker * pixels are reordered within a block, the blocks themselves are stored
156*61046927SAndroid Build Coastguard Worker * linearly, so multiplying block_y by the pixel stride of the destination
157*61046927SAndroid Build Coastguard Worker * image equals the byte offset of that top-left corner of the block this row
158*61046927SAndroid Build Coastguard Worker * is in.
159*61046927SAndroid Build Coastguard Worker *
160*61046927SAndroid Build Coastguard Worker * On the other hand, the source is linear so we compute the locations of the
161*61046927SAndroid Build Coastguard Worker * start and end of the row in the source by a simple linear addressing.
162*61046927SAndroid Build Coastguard Worker *
163*61046927SAndroid Build Coastguard Worker * For indexing within the tile, we need to XOR with the [y3 y3 y2 y2 y1 y1 y0
164*61046927SAndroid Build Coastguard Worker * y0] value. Since this is constant across a row, we look it up per-row and
165*61046927SAndroid Build Coastguard Worker * store in expanded_y.
166*61046927SAndroid Build Coastguard Worker *
167*61046927SAndroid Build Coastguard Worker * Finally, we iterate each row in source order. In the outer loop, we iterate
168*61046927SAndroid Build Coastguard Worker * each 16 pixel tile. Within each tile, we iterate the 16 pixels (this should
169*61046927SAndroid Build Coastguard Worker * be unrolled), calculating the index within the tile and writing.
170*61046927SAndroid Build Coastguard Worker */
171*61046927SAndroid Build Coastguard Worker
172*61046927SAndroid Build Coastguard Worker #define TILED_ACCESS_TYPE(pixel_t, shift) \
173*61046927SAndroid Build Coastguard Worker static ALWAYS_INLINE void panfrost_access_tiled_image_##pixel_t( \
174*61046927SAndroid Build Coastguard Worker void *dst, void *src, uint16_t sx, uint16_t sy, uint16_t w, uint16_t h, \
175*61046927SAndroid Build Coastguard Worker uint32_t dst_stride, uint32_t src_stride, bool is_store) \
176*61046927SAndroid Build Coastguard Worker { \
177*61046927SAndroid Build Coastguard Worker uint8_t *dest_start = \
178*61046927SAndroid Build Coastguard Worker dst + ((sx >> 4) * PIXELS_PER_TILE * sizeof(pixel_t)); \
179*61046927SAndroid Build Coastguard Worker for (int y = sy, src_y = 0; src_y < h; ++y, ++src_y) { \
180*61046927SAndroid Build Coastguard Worker uint8_t *dest = (uint8_t *)(dest_start + ((y >> 4) * dst_stride)); \
181*61046927SAndroid Build Coastguard Worker pixel_t *source = src + (src_y * src_stride); \
182*61046927SAndroid Build Coastguard Worker pixel_t *source_end = source + w; \
183*61046927SAndroid Build Coastguard Worker unsigned expanded_y = bit_duplication[y & 0xF] << shift; \
184*61046927SAndroid Build Coastguard Worker for (; source < source_end; dest += (PIXELS_PER_TILE << shift)) { \
185*61046927SAndroid Build Coastguard Worker for (uint8_t i = 0; i < 16; ++i) { \
186*61046927SAndroid Build Coastguard Worker unsigned index = expanded_y ^ (space_4[i] << shift); \
187*61046927SAndroid Build Coastguard Worker if (is_store) \
188*61046927SAndroid Build Coastguard Worker *((pixel_t *)(dest + index)) = *(source++); \
189*61046927SAndroid Build Coastguard Worker else \
190*61046927SAndroid Build Coastguard Worker *(source++) = *((pixel_t *)(dest + index)); \
191*61046927SAndroid Build Coastguard Worker } \
192*61046927SAndroid Build Coastguard Worker } \
193*61046927SAndroid Build Coastguard Worker } \
194*61046927SAndroid Build Coastguard Worker }
195*61046927SAndroid Build Coastguard Worker
196*61046927SAndroid Build Coastguard Worker TILED_ACCESS_TYPE(uint8_t, 0);
197*61046927SAndroid Build Coastguard Worker TILED_ACCESS_TYPE(uint16_t, 1);
198*61046927SAndroid Build Coastguard Worker TILED_ACCESS_TYPE(uint32_t, 2);
199*61046927SAndroid Build Coastguard Worker TILED_ACCESS_TYPE(uint64_t, 3);
200*61046927SAndroid Build Coastguard Worker TILED_ACCESS_TYPE(pan_uint128_t, 4);
201*61046927SAndroid Build Coastguard Worker
202*61046927SAndroid Build Coastguard Worker #define TILED_UNALIGNED_TYPE(pixel_t, is_store, tile_shift) \
203*61046927SAndroid Build Coastguard Worker { \
204*61046927SAndroid Build Coastguard Worker const unsigned mask = (1 << tile_shift) - 1; \
205*61046927SAndroid Build Coastguard Worker for (int y = sy, src_y = 0; src_y < h; ++y, ++src_y) { \
206*61046927SAndroid Build Coastguard Worker unsigned block_start_s = (y >> tile_shift) * dst_stride; \
207*61046927SAndroid Build Coastguard Worker unsigned source_start = src_y * src_stride; \
208*61046927SAndroid Build Coastguard Worker unsigned expanded_y = bit_duplication[y & mask]; \
209*61046927SAndroid Build Coastguard Worker \
210*61046927SAndroid Build Coastguard Worker for (int x = sx, src_x = 0; src_x < w; ++x, ++src_x) { \
211*61046927SAndroid Build Coastguard Worker unsigned block_x_s = (x >> tile_shift) * (1 << (tile_shift * 2)); \
212*61046927SAndroid Build Coastguard Worker unsigned index = expanded_y ^ space_4[x & mask]; \
213*61046927SAndroid Build Coastguard Worker uint8_t *source = src + source_start + sizeof(pixel_t) * src_x; \
214*61046927SAndroid Build Coastguard Worker uint8_t *dest = \
215*61046927SAndroid Build Coastguard Worker dst + block_start_s + sizeof(pixel_t) * (block_x_s + index); \
216*61046927SAndroid Build Coastguard Worker \
217*61046927SAndroid Build Coastguard Worker pixel_t *outp = (pixel_t *)(is_store ? dest : source); \
218*61046927SAndroid Build Coastguard Worker pixel_t *inp = (pixel_t *)(is_store ? source : dest); \
219*61046927SAndroid Build Coastguard Worker *outp = *inp; \
220*61046927SAndroid Build Coastguard Worker } \
221*61046927SAndroid Build Coastguard Worker } \
222*61046927SAndroid Build Coastguard Worker }
223*61046927SAndroid Build Coastguard Worker
224*61046927SAndroid Build Coastguard Worker #define TILED_UNALIGNED_TYPES(store, shift) \
225*61046927SAndroid Build Coastguard Worker { \
226*61046927SAndroid Build Coastguard Worker if (bpp == 8) \
227*61046927SAndroid Build Coastguard Worker TILED_UNALIGNED_TYPE(uint8_t, store, shift) \
228*61046927SAndroid Build Coastguard Worker else if (bpp == 16) \
229*61046927SAndroid Build Coastguard Worker TILED_UNALIGNED_TYPE(uint16_t, store, shift) \
230*61046927SAndroid Build Coastguard Worker else if (bpp == 24) \
231*61046927SAndroid Build Coastguard Worker TILED_UNALIGNED_TYPE(pan_uint24_t, store, shift) \
232*61046927SAndroid Build Coastguard Worker else if (bpp == 32) \
233*61046927SAndroid Build Coastguard Worker TILED_UNALIGNED_TYPE(uint32_t, store, shift) \
234*61046927SAndroid Build Coastguard Worker else if (bpp == 48) \
235*61046927SAndroid Build Coastguard Worker TILED_UNALIGNED_TYPE(pan_uint48_t, store, shift) \
236*61046927SAndroid Build Coastguard Worker else if (bpp == 64) \
237*61046927SAndroid Build Coastguard Worker TILED_UNALIGNED_TYPE(uint64_t, store, shift) \
238*61046927SAndroid Build Coastguard Worker else if (bpp == 96) \
239*61046927SAndroid Build Coastguard Worker TILED_UNALIGNED_TYPE(pan_uint96_t, store, shift) \
240*61046927SAndroid Build Coastguard Worker else if (bpp == 128) \
241*61046927SAndroid Build Coastguard Worker TILED_UNALIGNED_TYPE(pan_uint128_t, store, shift) \
242*61046927SAndroid Build Coastguard Worker }
243*61046927SAndroid Build Coastguard Worker
244*61046927SAndroid Build Coastguard Worker /*
245*61046927SAndroid Build Coastguard Worker * Perform a generic access to a tiled image with a given format. This works
246*61046927SAndroid Build Coastguard Worker * even for block-compressed images on entire blocks at a time. sx/sy/w/h are
247*61046927SAndroid Build Coastguard Worker * specified in pixels, not blocks, but our internal routines work in blocks,
248*61046927SAndroid Build Coastguard Worker * so we divide here. Alignment is assumed.
249*61046927SAndroid Build Coastguard Worker */
250*61046927SAndroid Build Coastguard Worker static void
panfrost_access_tiled_image_generic(void * dst,void * src,unsigned sx,unsigned sy,unsigned w,unsigned h,uint32_t dst_stride,uint32_t src_stride,const struct util_format_description * desc,bool _is_store)251*61046927SAndroid Build Coastguard Worker panfrost_access_tiled_image_generic(void *dst, void *src, unsigned sx,
252*61046927SAndroid Build Coastguard Worker unsigned sy, unsigned w, unsigned h,
253*61046927SAndroid Build Coastguard Worker uint32_t dst_stride, uint32_t src_stride,
254*61046927SAndroid Build Coastguard Worker const struct util_format_description *desc,
255*61046927SAndroid Build Coastguard Worker bool _is_store)
256*61046927SAndroid Build Coastguard Worker {
257*61046927SAndroid Build Coastguard Worker unsigned bpp = desc->block.bits;
258*61046927SAndroid Build Coastguard Worker
259*61046927SAndroid Build Coastguard Worker /* Convert units */
260*61046927SAndroid Build Coastguard Worker sx /= desc->block.width;
261*61046927SAndroid Build Coastguard Worker sy /= desc->block.height;
262*61046927SAndroid Build Coastguard Worker w = DIV_ROUND_UP(w, desc->block.width);
263*61046927SAndroid Build Coastguard Worker h = DIV_ROUND_UP(h, desc->block.height);
264*61046927SAndroid Build Coastguard Worker
265*61046927SAndroid Build Coastguard Worker if (desc->block.width > 1) {
266*61046927SAndroid Build Coastguard Worker if (_is_store)
267*61046927SAndroid Build Coastguard Worker TILED_UNALIGNED_TYPES(true, 2)
268*61046927SAndroid Build Coastguard Worker else
269*61046927SAndroid Build Coastguard Worker TILED_UNALIGNED_TYPES(false, 2)
270*61046927SAndroid Build Coastguard Worker } else {
271*61046927SAndroid Build Coastguard Worker if (_is_store)
272*61046927SAndroid Build Coastguard Worker TILED_UNALIGNED_TYPES(true, 4)
273*61046927SAndroid Build Coastguard Worker else
274*61046927SAndroid Build Coastguard Worker TILED_UNALIGNED_TYPES(false, 4)
275*61046927SAndroid Build Coastguard Worker }
276*61046927SAndroid Build Coastguard Worker }
277*61046927SAndroid Build Coastguard Worker
278*61046927SAndroid Build Coastguard Worker #define OFFSET(src, _x, _y) \
279*61046927SAndroid Build Coastguard Worker (void *)((uint8_t *)src + ((_y)-orig_y) * src_stride + \
280*61046927SAndroid Build Coastguard Worker (((_x)-orig_x) * (bpp / 8)))
281*61046927SAndroid Build Coastguard Worker
282*61046927SAndroid Build Coastguard Worker static ALWAYS_INLINE void
panfrost_access_tiled_image(void * dst,void * src,unsigned x,unsigned y,unsigned w,unsigned h,uint32_t dst_stride,uint32_t src_stride,enum pipe_format format,bool is_store)283*61046927SAndroid Build Coastguard Worker panfrost_access_tiled_image(void *dst, void *src, unsigned x, unsigned y,
284*61046927SAndroid Build Coastguard Worker unsigned w, unsigned h, uint32_t dst_stride,
285*61046927SAndroid Build Coastguard Worker uint32_t src_stride, enum pipe_format format,
286*61046927SAndroid Build Coastguard Worker bool is_store)
287*61046927SAndroid Build Coastguard Worker {
288*61046927SAndroid Build Coastguard Worker const struct util_format_description *desc = util_format_description(format);
289*61046927SAndroid Build Coastguard Worker unsigned bpp = desc->block.bits;
290*61046927SAndroid Build Coastguard Worker
291*61046927SAndroid Build Coastguard Worker /* Our optimized routines cannot handle unaligned blocks (without depending
292*61046927SAndroid Build Coastguard Worker * on platform-specific behaviour), and there is no good reason to do so. If
293*61046927SAndroid Build Coastguard Worker * these assertions fail, there is either a driver bug or a non-portable unit
294*61046927SAndroid Build Coastguard Worker * test.
295*61046927SAndroid Build Coastguard Worker */
296*61046927SAndroid Build Coastguard Worker assert((dst_stride % (bpp / 8)) == 0 && "unaligned destination stride");
297*61046927SAndroid Build Coastguard Worker assert((src_stride % (bpp / 8)) == 0 && "unaligned source stride");
298*61046927SAndroid Build Coastguard Worker
299*61046927SAndroid Build Coastguard Worker if (desc->block.width > 1 ||
300*61046927SAndroid Build Coastguard Worker !util_is_power_of_two_nonzero(desc->block.bits)) {
301*61046927SAndroid Build Coastguard Worker panfrost_access_tiled_image_generic(
302*61046927SAndroid Build Coastguard Worker dst, (void *)src, x, y, w, h, dst_stride, src_stride, desc, is_store);
303*61046927SAndroid Build Coastguard Worker
304*61046927SAndroid Build Coastguard Worker return;
305*61046927SAndroid Build Coastguard Worker }
306*61046927SAndroid Build Coastguard Worker
307*61046927SAndroid Build Coastguard Worker unsigned first_full_tile_x = DIV_ROUND_UP(x, TILE_WIDTH) * TILE_WIDTH;
308*61046927SAndroid Build Coastguard Worker unsigned first_full_tile_y = DIV_ROUND_UP(y, TILE_HEIGHT) * TILE_HEIGHT;
309*61046927SAndroid Build Coastguard Worker unsigned last_full_tile_x = ((x + w) / TILE_WIDTH) * TILE_WIDTH;
310*61046927SAndroid Build Coastguard Worker unsigned last_full_tile_y = ((y + h) / TILE_HEIGHT) * TILE_HEIGHT;
311*61046927SAndroid Build Coastguard Worker
312*61046927SAndroid Build Coastguard Worker /* First, tile the top portion */
313*61046927SAndroid Build Coastguard Worker
314*61046927SAndroid Build Coastguard Worker unsigned orig_x = x, orig_y = y;
315*61046927SAndroid Build Coastguard Worker
316*61046927SAndroid Build Coastguard Worker if (first_full_tile_y != y) {
317*61046927SAndroid Build Coastguard Worker unsigned dist = MIN2(first_full_tile_y - y, h);
318*61046927SAndroid Build Coastguard Worker
319*61046927SAndroid Build Coastguard Worker panfrost_access_tiled_image_generic(dst, OFFSET(src, x, y), x, y, w, dist,
320*61046927SAndroid Build Coastguard Worker dst_stride, src_stride, desc,
321*61046927SAndroid Build Coastguard Worker is_store);
322*61046927SAndroid Build Coastguard Worker
323*61046927SAndroid Build Coastguard Worker if (dist == h)
324*61046927SAndroid Build Coastguard Worker return;
325*61046927SAndroid Build Coastguard Worker
326*61046927SAndroid Build Coastguard Worker y += dist;
327*61046927SAndroid Build Coastguard Worker h -= dist;
328*61046927SAndroid Build Coastguard Worker }
329*61046927SAndroid Build Coastguard Worker
330*61046927SAndroid Build Coastguard Worker /* Next, the bottom portion */
331*61046927SAndroid Build Coastguard Worker if (last_full_tile_y != (y + h)) {
332*61046927SAndroid Build Coastguard Worker unsigned dist = (y + h) - last_full_tile_y;
333*61046927SAndroid Build Coastguard Worker
334*61046927SAndroid Build Coastguard Worker panfrost_access_tiled_image_generic(
335*61046927SAndroid Build Coastguard Worker dst, OFFSET(src, x, last_full_tile_y), x, last_full_tile_y, w, dist,
336*61046927SAndroid Build Coastguard Worker dst_stride, src_stride, desc, is_store);
337*61046927SAndroid Build Coastguard Worker
338*61046927SAndroid Build Coastguard Worker h -= dist;
339*61046927SAndroid Build Coastguard Worker }
340*61046927SAndroid Build Coastguard Worker
341*61046927SAndroid Build Coastguard Worker /* The left portion */
342*61046927SAndroid Build Coastguard Worker if (first_full_tile_x != x) {
343*61046927SAndroid Build Coastguard Worker unsigned dist = MIN2(first_full_tile_x - x, w);
344*61046927SAndroid Build Coastguard Worker
345*61046927SAndroid Build Coastguard Worker panfrost_access_tiled_image_generic(dst, OFFSET(src, x, y), x, y, dist, h,
346*61046927SAndroid Build Coastguard Worker dst_stride, src_stride, desc,
347*61046927SAndroid Build Coastguard Worker is_store);
348*61046927SAndroid Build Coastguard Worker
349*61046927SAndroid Build Coastguard Worker if (dist == w)
350*61046927SAndroid Build Coastguard Worker return;
351*61046927SAndroid Build Coastguard Worker
352*61046927SAndroid Build Coastguard Worker x += dist;
353*61046927SAndroid Build Coastguard Worker w -= dist;
354*61046927SAndroid Build Coastguard Worker }
355*61046927SAndroid Build Coastguard Worker
356*61046927SAndroid Build Coastguard Worker /* Finally, the right portion */
357*61046927SAndroid Build Coastguard Worker if (last_full_tile_x != (x + w)) {
358*61046927SAndroid Build Coastguard Worker unsigned dist = (x + w) - last_full_tile_x;
359*61046927SAndroid Build Coastguard Worker
360*61046927SAndroid Build Coastguard Worker panfrost_access_tiled_image_generic(
361*61046927SAndroid Build Coastguard Worker dst, OFFSET(src, last_full_tile_x, y), last_full_tile_x, y, dist, h,
362*61046927SAndroid Build Coastguard Worker dst_stride, src_stride, desc, is_store);
363*61046927SAndroid Build Coastguard Worker
364*61046927SAndroid Build Coastguard Worker w -= dist;
365*61046927SAndroid Build Coastguard Worker }
366*61046927SAndroid Build Coastguard Worker
367*61046927SAndroid Build Coastguard Worker if (bpp == 8)
368*61046927SAndroid Build Coastguard Worker panfrost_access_tiled_image_uint8_t(dst, OFFSET(src, x, y), x, y, w, h,
369*61046927SAndroid Build Coastguard Worker dst_stride, src_stride, is_store);
370*61046927SAndroid Build Coastguard Worker else if (bpp == 16)
371*61046927SAndroid Build Coastguard Worker panfrost_access_tiled_image_uint16_t(dst, OFFSET(src, x, y), x, y, w, h,
372*61046927SAndroid Build Coastguard Worker dst_stride, src_stride, is_store);
373*61046927SAndroid Build Coastguard Worker else if (bpp == 32)
374*61046927SAndroid Build Coastguard Worker panfrost_access_tiled_image_uint32_t(dst, OFFSET(src, x, y), x, y, w, h,
375*61046927SAndroid Build Coastguard Worker dst_stride, src_stride, is_store);
376*61046927SAndroid Build Coastguard Worker else if (bpp == 64)
377*61046927SAndroid Build Coastguard Worker panfrost_access_tiled_image_uint64_t(dst, OFFSET(src, x, y), x, y, w, h,
378*61046927SAndroid Build Coastguard Worker dst_stride, src_stride, is_store);
379*61046927SAndroid Build Coastguard Worker else if (bpp == 128)
380*61046927SAndroid Build Coastguard Worker panfrost_access_tiled_image_pan_uint128_t(
381*61046927SAndroid Build Coastguard Worker dst, OFFSET(src, x, y), x, y, w, h, dst_stride, src_stride, is_store);
382*61046927SAndroid Build Coastguard Worker }
383*61046927SAndroid Build Coastguard Worker
384*61046927SAndroid Build Coastguard Worker /**
385*61046927SAndroid Build Coastguard Worker * Access a tiled image (load or store). Note: the region of interest (x, y, w,
386*61046927SAndroid Build Coastguard Worker * h) is specified in pixels, not blocks. It is expected that these quantities
387*61046927SAndroid Build Coastguard Worker * are aligned to the block size.
388*61046927SAndroid Build Coastguard Worker */
389*61046927SAndroid Build Coastguard Worker void
panfrost_store_tiled_image(void * dst,const void * src,unsigned x,unsigned y,unsigned w,unsigned h,uint32_t dst_stride,uint32_t src_stride,enum pipe_format format)390*61046927SAndroid Build Coastguard Worker panfrost_store_tiled_image(void *dst, const void *src, unsigned x, unsigned y,
391*61046927SAndroid Build Coastguard Worker unsigned w, unsigned h, uint32_t dst_stride,
392*61046927SAndroid Build Coastguard Worker uint32_t src_stride, enum pipe_format format)
393*61046927SAndroid Build Coastguard Worker {
394*61046927SAndroid Build Coastguard Worker panfrost_access_tiled_image(dst, (void *)src, x, y, w, h, dst_stride,
395*61046927SAndroid Build Coastguard Worker src_stride, format, true);
396*61046927SAndroid Build Coastguard Worker }
397*61046927SAndroid Build Coastguard Worker
398*61046927SAndroid Build Coastguard Worker void
panfrost_load_tiled_image(void * dst,const void * src,unsigned x,unsigned y,unsigned w,unsigned h,uint32_t dst_stride,uint32_t src_stride,enum pipe_format format)399*61046927SAndroid Build Coastguard Worker panfrost_load_tiled_image(void *dst, const void *src, unsigned x, unsigned y,
400*61046927SAndroid Build Coastguard Worker unsigned w, unsigned h, uint32_t dst_stride,
401*61046927SAndroid Build Coastguard Worker uint32_t src_stride, enum pipe_format format)
402*61046927SAndroid Build Coastguard Worker {
403*61046927SAndroid Build Coastguard Worker panfrost_access_tiled_image((void *)src, dst, x, y, w, h, src_stride,
404*61046927SAndroid Build Coastguard Worker dst_stride, format, false);
405*61046927SAndroid Build Coastguard Worker }
406