xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/nouveau/nv30/nv30_transfer.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright 2012 Red Hat Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Ben Skeggs
23  *
24  */
25 
26 #define XFER_ARGS                                                              \
27    struct nv30_context *nv30, enum nv30_transfer_filter filter,                \
28    struct nv30_rect *src, struct nv30_rect *dst
29 
30 #include "util/u_math.h"
31 
32 #include "nv_object.xml.h"
33 #include "nv_m2mf.xml.h"
34 #include "nv30/nv01_2d.xml.h"
35 #include "nv30/nv30-40_3d.xml.h"
36 
37 #include "nv30/nv30_context.h"
38 #include "nv30/nv30_transfer.h"
39 #include "nv30/nv30_winsys.h"
40 
41 /* Various helper functions to transfer different types of data in a number
42  * of different ways.
43  */
44 
45 static inline bool
nv30_transfer_scaled(struct nv30_rect * src,struct nv30_rect * dst)46 nv30_transfer_scaled(struct nv30_rect *src, struct nv30_rect *dst)
47 {
48    if (src->x1 - src->x0 != dst->x1 - dst->x0)
49       return true;
50    if (src->y1 - src->y0 != dst->y1 - dst->y0)
51       return true;
52    return false;
53 }
54 
55 static inline bool
nv30_transfer_blit(XFER_ARGS)56 nv30_transfer_blit(XFER_ARGS)
57 {
58    if (nv30->screen->eng3d->oclass < NV40_3D_CLASS)
59       return false;
60    if (dst->offset & 63 || dst->pitch & 63 || dst->d > 1)
61       return false;
62    if (dst->w < 2 || dst->h < 2)
63       return false;
64    if (dst->cpp > 4 || (dst->cpp == 1 && !dst->pitch))
65       return false;
66    if (src->cpp > 4)
67       return false;
68    return true;
69 }
70 
71 static inline struct nouveau_heap *
nv30_transfer_rect_vertprog(struct nv30_context * nv30)72 nv30_transfer_rect_vertprog(struct nv30_context *nv30)
73 {
74    struct nouveau_heap *heap = nv30->screen->vp_exec_heap;
75    struct nouveau_heap *vp;
76 
77    vp = nv30->blit_vp;
78    if (!vp) {
79       if (nouveau_heap_alloc(heap, 2, &nv30->blit_vp, &nv30->blit_vp)) {
80          while (heap->next && heap->size < 2) {
81             struct nouveau_heap **evict = heap->next->priv;
82             nouveau_heap_free(evict);
83          }
84 
85          if (nouveau_heap_alloc(heap, 2, &nv30->blit_vp, &nv30->blit_vp))
86             return NULL;
87       }
88 
89       vp = nv30->blit_vp;
90       if (vp) {
91          struct nouveau_pushbuf *push = nv30->base.pushbuf;
92 
93          BEGIN_NV04(push, NV30_3D(VP_UPLOAD_FROM_ID), 1);
94          PUSH_DATA (push, vp->start);
95          BEGIN_NV04(push, NV30_3D(VP_UPLOAD_INST(0)), 4);
96          PUSH_DATA (push, 0x401f9c6c); /* mov o[hpos], a[0]; */
97          PUSH_DATA (push, 0x0040000d);
98          PUSH_DATA (push, 0x8106c083);
99          PUSH_DATA (push, 0x6041ff80);
100          BEGIN_NV04(push, NV30_3D(VP_UPLOAD_INST(0)), 4);
101          PUSH_DATA (push, 0x401f9c6c); /* mov o[tex0], a[8]; end; */
102          PUSH_DATA (push, 0x0040080d);
103          PUSH_DATA (push, 0x8106c083);
104          PUSH_DATA (push, 0x6041ff9d);
105       }
106    }
107 
108    return vp;
109 }
110 
111 
112 static inline struct nv04_resource *
nv30_transfer_rect_fragprog(struct nv30_context * nv30)113 nv30_transfer_rect_fragprog(struct nv30_context *nv30)
114 {
115    struct nv04_resource *fp = nv04_resource(nv30->blit_fp);
116    struct pipe_context *pipe = &nv30->base.pipe;
117 
118    if (!fp) {
119       nv30->blit_fp =
120          pipe_buffer_create(pipe->screen, 0, PIPE_USAGE_STAGING, 12 * 4);
121       if (nv30->blit_fp) {
122          struct pipe_transfer *transfer;
123          u32 *map = pipe_buffer_map(pipe, nv30->blit_fp,
124                                     PIPE_MAP_WRITE, &transfer);
125          if (map) {
126             map[0] = 0x17009e00; /* texr r0, i[tex0], texture[0]; end; */
127             map[1] = 0x1c9dc801;
128             map[2] = 0x0001c800;
129             map[3] = 0x3fe1c800;
130             map[4] = 0x01401e81; /* end; */
131             map[5] = 0x1c9dc800;
132             map[6] = 0x0001c800;
133             map[7] = 0x0001c800;
134             pipe_buffer_unmap(pipe, transfer);
135          }
136 
137          fp = nv04_resource(nv30->blit_fp);
138          nouveau_buffer_migrate(&nv30->base, fp, NOUVEAU_BO_VRAM);
139       }
140    }
141 
142    return fp;
143 }
144 
145 static void
nv30_transfer_rect_blit(XFER_ARGS)146 nv30_transfer_rect_blit(XFER_ARGS)
147 {
148    struct nv04_resource *fp = nv30_transfer_rect_fragprog(nv30);
149    struct nouveau_heap *vp = nv30_transfer_rect_vertprog(nv30);
150    struct nouveau_pushbuf *push = nv30->base.pushbuf;
151    struct nouveau_pushbuf_refn refs[] = {
152       { fp->bo, fp->domain | NOUVEAU_BO_RD },
153       { src->bo, src->domain | NOUVEAU_BO_RD },
154       { dst->bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR },
155    };
156    u32 texfmt, texswz;
157    u32 format, stride;
158 
159    if (!PUSH_SPACE_EX(push, 512, 8, 0) ||
160        PUSH_REFN(push, refs, ARRAY_SIZE(refs)))
161       return;
162 
163    /* various switches depending on cpp of the transfer */
164    switch (dst->cpp) {
165    case 4:
166       format = NV30_3D_RT_FORMAT_COLOR_A8R8G8B8 |
167                NV30_3D_RT_FORMAT_ZETA_Z24S8;
168       texfmt = NV40_3D_TEX_FORMAT_FORMAT_A8R8G8B8;
169       texswz = 0x0000aae4;
170       break;
171    case 2:
172       format = NV30_3D_RT_FORMAT_COLOR_R5G6B5 |
173                NV30_3D_RT_FORMAT_ZETA_Z16;
174       texfmt = NV40_3D_TEX_FORMAT_FORMAT_R5G6B5;
175       texswz = 0x0000a9e4;
176       break;
177    case 1:
178       format = NV30_3D_RT_FORMAT_COLOR_B8 |
179                NV30_3D_RT_FORMAT_ZETA_Z16;
180       texfmt = NV40_3D_TEX_FORMAT_FORMAT_L8;
181       texswz = 0x0000aaff;
182       break;
183    default:
184       assert(0);
185       return;
186    }
187 
188    /* render target */
189    if (!dst->pitch) {
190       format |= NV30_3D_RT_FORMAT_TYPE_SWIZZLED;
191       format |= util_logbase2(dst->w) << 16;
192       format |= util_logbase2(dst->h) << 24;
193       stride  = 64;
194    } else {
195       format |= NV30_3D_RT_FORMAT_TYPE_LINEAR;
196       stride  = dst->pitch;
197    }
198 
199    BEGIN_NV04(push, NV30_3D(VIEWPORT_HORIZ), 2);
200    PUSH_DATA (push, dst->w << 16);
201    PUSH_DATA (push, dst->h << 16);
202    BEGIN_NV04(push, NV30_3D(RT_HORIZ), 5);
203    PUSH_DATA (push, dst->w << 16);
204    PUSH_DATA (push, dst->h << 16);
205    PUSH_DATA (push, format);
206    PUSH_DATA (push, stride);
207    PUSH_RELOC(push, dst->bo, dst->offset, NOUVEAU_BO_LOW, 0, 0);
208    BEGIN_NV04(push, NV30_3D(RT_ENABLE), 1);
209    PUSH_DATA (push, NV30_3D_RT_ENABLE_COLOR0);
210 
211    nv30->dirty |= NV30_NEW_FRAMEBUFFER;
212 
213    /* viewport state */
214    BEGIN_NV04(push, NV30_3D(VIEWPORT_TRANSLATE_X), 8);
215    PUSH_DATAf(push, 0.0);
216    PUSH_DATAf(push, 0.0);
217    PUSH_DATAf(push, 0.0);
218    PUSH_DATAf(push, 0.0);
219    PUSH_DATAf(push, 1.0);
220    PUSH_DATAf(push, 1.0);
221    PUSH_DATAf(push, 1.0);
222    PUSH_DATAf(push, 1.0);
223    BEGIN_NV04(push, NV30_3D(DEPTH_RANGE_NEAR), 2);
224    PUSH_DATAf(push, 0.0);
225    PUSH_DATAf(push, 1.0);
226 
227    nv30->dirty |= NV30_NEW_VIEWPORT;
228 
229    /* blend state */
230    BEGIN_NV04(push, NV30_3D(COLOR_LOGIC_OP_ENABLE), 1);
231    PUSH_DATA (push, 0);
232    BEGIN_NV04(push, NV30_3D(DITHER_ENABLE), 1);
233    PUSH_DATA (push, 0);
234    BEGIN_NV04(push, NV30_3D(BLEND_FUNC_ENABLE), 1);
235    PUSH_DATA (push, 0);
236    BEGIN_NV04(push, NV30_3D(COLOR_MASK), 1);
237    PUSH_DATA (push, 0x01010101);
238 
239    nv30->dirty |= NV30_NEW_BLEND;
240 
241    /* depth-stencil-alpha state */
242    BEGIN_NV04(push, NV30_3D(DEPTH_WRITE_ENABLE), 2);
243    PUSH_DATA (push, 0);
244    PUSH_DATA (push, 0);
245    BEGIN_NV04(push, NV30_3D(STENCIL_ENABLE(0)), 1);
246    PUSH_DATA (push, 0);
247    BEGIN_NV04(push, NV30_3D(STENCIL_ENABLE(1)), 1);
248    PUSH_DATA (push, 0);
249    BEGIN_NV04(push, NV30_3D(ALPHA_FUNC_ENABLE), 1);
250    PUSH_DATA (push, 0);
251 
252    nv30->dirty |= NV30_NEW_ZSA;
253 
254    /* rasterizer state */
255    BEGIN_NV04(push, NV30_3D(SHADE_MODEL), 1);
256    PUSH_DATA (push, NV30_3D_SHADE_MODEL_FLAT);
257    BEGIN_NV04(push, NV30_3D(CULL_FACE_ENABLE), 1);
258    PUSH_DATA (push, 0);
259    BEGIN_NV04(push, NV30_3D(POLYGON_MODE_FRONT), 2);
260    PUSH_DATA (push, NV30_3D_POLYGON_MODE_FRONT_FILL);
261    PUSH_DATA (push, NV30_3D_POLYGON_MODE_BACK_FILL);
262    BEGIN_NV04(push, NV30_3D(POLYGON_OFFSET_FILL_ENABLE), 1);
263    PUSH_DATA (push, 0);
264    BEGIN_NV04(push, NV30_3D(POLYGON_STIPPLE_ENABLE), 1);
265    PUSH_DATA (push, 0);
266 
267    nv30->state.scissor_off = 0;
268    nv30->dirty |= NV30_NEW_RASTERIZER;
269 
270    /* vertex program */
271    BEGIN_NV04(push, NV30_3D(VP_START_FROM_ID), 1);
272    PUSH_DATA (push, vp->start);
273    BEGIN_NV04(push, NV40_3D(VP_ATTRIB_EN), 2);
274    PUSH_DATA (push, 0x00000101); /* attrib: 0, 8 */
275    PUSH_DATA (push, 0x00004000); /* result: hpos, tex0 */
276    BEGIN_NV04(push, NV30_3D(ENGINE), 1);
277    PUSH_DATA (push, 0x00000103);
278    BEGIN_NV04(push, NV30_3D(VP_CLIP_PLANES_ENABLE), 1);
279    PUSH_DATA (push, 0x00000000);
280 
281    nv30->dirty |= NV30_NEW_VERTPROG;
282    nv30->dirty |= NV30_NEW_CLIP;
283 
284    /* fragment program */
285    BEGIN_NV04(push, NV30_3D(FP_ACTIVE_PROGRAM), 1);
286    PUSH_RELOC(push, fp->bo, fp->offset, fp->domain |
287                     NOUVEAU_BO_LOW | NOUVEAU_BO_OR,
288                     NV30_3D_FP_ACTIVE_PROGRAM_DMA0,
289                     NV30_3D_FP_ACTIVE_PROGRAM_DMA1);
290    BEGIN_NV04(push, NV30_3D(FP_CONTROL), 1);
291    PUSH_DATA (push, 0x02000000);
292 
293    nv30->state.fragprog = NULL;
294    nv30->dirty |= NV30_NEW_FRAGPROG;
295 
296    /* texture */
297    texfmt |= 1 << NV40_3D_TEX_FORMAT_MIPMAP_COUNT__SHIFT;
298    texfmt |= NV30_3D_TEX_FORMAT_NO_BORDER;
299    texfmt |= NV40_3D_TEX_FORMAT_RECT;
300    texfmt |= 0x00008000;
301    if (src->d < 2)
302       texfmt |= NV30_3D_TEX_FORMAT_DIMS_2D;
303    else
304       texfmt |= NV30_3D_TEX_FORMAT_DIMS_3D;
305    if (src->pitch)
306       texfmt |= NV40_3D_TEX_FORMAT_LINEAR;
307 
308    BEGIN_NV04(push, NV30_3D(TEX_OFFSET(0)), 8);
309    PUSH_RELOC(push, src->bo, src->offset, NOUVEAU_BO_LOW, 0, 0);
310    PUSH_RELOC(push, src->bo, texfmt, NOUVEAU_BO_OR,
311                     NV30_3D_TEX_FORMAT_DMA0, NV30_3D_TEX_FORMAT_DMA1);
312    PUSH_DATA (push, NV30_3D_TEX_WRAP_S_CLAMP_TO_EDGE |
313                     NV30_3D_TEX_WRAP_T_CLAMP_TO_EDGE |
314                     NV30_3D_TEX_WRAP_R_CLAMP_TO_EDGE);
315    PUSH_DATA (push, NV40_3D_TEX_ENABLE_ENABLE);
316    PUSH_DATA (push, texswz);
317    switch (filter) {
318    case BILINEAR:
319       PUSH_DATA (push, NV30_3D_TEX_FILTER_MIN_LINEAR |
320                        NV30_3D_TEX_FILTER_MAG_LINEAR | 0x00002000);
321       break;
322    default:
323       PUSH_DATA (push, NV30_3D_TEX_FILTER_MIN_NEAREST |
324                        NV30_3D_TEX_FILTER_MAG_NEAREST | 0x00002000);
325       break;
326    }
327    PUSH_DATA (push, (src->w << 16) | src->h);
328    PUSH_DATA (push, 0x00000000);
329    BEGIN_NV04(push, NV40_3D(TEX_SIZE1(0)), 1);
330    PUSH_DATA (push, 0x00100000 | src->pitch);
331    BEGIN_NV04(push, SUBC_3D(0x0b40), 1);
332    PUSH_DATA (push, src->d < 2 ? 0x00000001 : 0x00000000);
333    BEGIN_NV04(push, NV40_3D(TEX_CACHE_CTL), 1);
334    PUSH_DATA (push, 1);
335 
336    nv30->fragprog.dirty_samplers |= 1;
337    nv30->dirty |= NV30_NEW_FRAGTEX;
338 
339    /* blit! */
340    BEGIN_NV04(push, NV30_3D(SCISSOR_HORIZ), 2);
341    PUSH_DATA (push, (dst->x1 - dst->x0) << 16 | dst->x0);
342    PUSH_DATA (push, (dst->y1 - dst->y0) << 16 | dst->y0);
343    BEGIN_NV04(push, NV30_3D(VERTEX_BEGIN_END), 1);
344    PUSH_DATA (push, NV30_3D_VERTEX_BEGIN_END_QUADS);
345    BEGIN_NV04(push, NV30_3D(VTX_ATTR_3F(8)), 3);
346    PUSH_DATAf(push, src->x0);
347    PUSH_DATAf(push, src->y0);
348    PUSH_DATAf(push, src->z);
349    BEGIN_NV04(push, NV30_3D(VTX_ATTR_2I(0)), 1);
350    PUSH_DATA (push, (dst->y0 << 16) | dst->x0);
351    BEGIN_NV04(push, NV30_3D(VTX_ATTR_3F(8)), 3);
352    PUSH_DATAf(push, src->x1);
353    PUSH_DATAf(push, src->y0);
354    PUSH_DATAf(push, src->z);
355    BEGIN_NV04(push, NV30_3D(VTX_ATTR_2I(0)), 1);
356    PUSH_DATA (push, (dst->y0 << 16) | dst->x1);
357    BEGIN_NV04(push, NV30_3D(VTX_ATTR_3F(8)), 3);
358    PUSH_DATAf(push, src->x1);
359    PUSH_DATAf(push, src->y1);
360    PUSH_DATAf(push, src->z);
361    BEGIN_NV04(push, NV30_3D(VTX_ATTR_2I(0)), 1);
362    PUSH_DATA (push, (dst->y1 << 16) | dst->x1);
363    BEGIN_NV04(push, NV30_3D(VTX_ATTR_3F(8)), 3);
364    PUSH_DATAf(push, src->x0);
365    PUSH_DATAf(push, src->y1);
366    PUSH_DATAf(push, src->z);
367    BEGIN_NV04(push, NV30_3D(VTX_ATTR_2I(0)), 1);
368    PUSH_DATA (push, (dst->y1 << 16) | dst->x0);
369    BEGIN_NV04(push, NV30_3D(VERTEX_BEGIN_END), 1);
370    PUSH_DATA (push, NV30_3D_VERTEX_BEGIN_END_STOP);
371 }
372 
373 static bool
nv30_transfer_sifm(XFER_ARGS)374 nv30_transfer_sifm(XFER_ARGS)
375 {
376    if (!src->pitch || src->w > 1024 || src->h > 1024 || src->w < 2 || src->h < 2)
377       return false;
378 
379    if (src->d > 1 || dst->d > 1)
380       return false;
381 
382    if (dst->offset & 63)
383       return false;
384 
385    if (!dst->pitch) {
386       if (dst->w > 2048 || dst->h > 2048 || dst->w < 2 || dst->h < 2)
387          return false;
388    } else {
389       if (dst->domain != NOUVEAU_BO_VRAM)
390          return false;
391       if (dst->pitch & 63)
392          return false;
393    }
394 
395    return true;
396 }
397 
398 static void
nv30_transfer_rect_sifm(XFER_ARGS)399 nv30_transfer_rect_sifm(XFER_ARGS)
400 
401 {
402    struct nouveau_pushbuf *push = nv30->base.pushbuf;
403    struct nouveau_pushbuf_refn refs[] = {
404       { src->bo, src->domain | NOUVEAU_BO_RD },
405       { dst->bo, dst->domain | NOUVEAU_BO_WR },
406    };
407    struct nv04_fifo *fifo = push->channel->data;
408    unsigned si_fmt, si_arg;
409    unsigned ss_fmt;
410 
411    switch (dst->cpp) {
412    case 4: ss_fmt = NV04_SURFACE_SWZ_FORMAT_COLOR_A8R8G8B8; break;
413    case 2: ss_fmt = NV04_SURFACE_SWZ_FORMAT_COLOR_R5G6B5; break;
414    default:
415       ss_fmt = NV04_SURFACE_SWZ_FORMAT_COLOR_Y8;
416       break;
417    }
418 
419    switch (src->cpp) {
420    case 4: si_fmt = NV03_SIFM_COLOR_FORMAT_A8R8G8B8; break;
421    case 2: si_fmt = NV03_SIFM_COLOR_FORMAT_R5G6B5; break;
422    default:
423       si_fmt = NV03_SIFM_COLOR_FORMAT_AY8;
424       break;
425    }
426 
427    if (filter == NEAREST) {
428       si_arg  = NV03_SIFM_FORMAT_ORIGIN_CENTER;
429       si_arg |= NV03_SIFM_FORMAT_FILTER_POINT_SAMPLE;
430    } else {
431       si_arg  = NV03_SIFM_FORMAT_ORIGIN_CORNER;
432       si_arg |= NV03_SIFM_FORMAT_FILTER_BILINEAR;
433    }
434 
435    if (!PUSH_SPACE_EX(push, 64, 6, 0) ||
436        PUSH_REFN(push, refs, 2))
437       return;
438 
439    if (dst->pitch) {
440       BEGIN_NV04(push, NV04_SF2D(DMA_IMAGE_SOURCE), 2);
441       PUSH_RELOC(push, dst->bo, 0, NOUVEAU_BO_OR, fifo->vram, fifo->gart);
442       PUSH_RELOC(push, dst->bo, 0, NOUVEAU_BO_OR, fifo->vram, fifo->gart);
443       BEGIN_NV04(push, NV04_SF2D(FORMAT), 4);
444       PUSH_DATA (push, ss_fmt);
445       PUSH_DATA (push, dst->pitch << 16 | dst->pitch);
446       PUSH_RELOC(push, dst->bo, dst->offset, NOUVEAU_BO_LOW, 0, 0);
447       PUSH_RELOC(push, dst->bo, dst->offset, NOUVEAU_BO_LOW, 0, 0);
448       BEGIN_NV04(push, NV05_SIFM(SURFACE), 1);
449       PUSH_DATA (push, nv30->screen->surf2d->handle);
450    } else {
451       BEGIN_NV04(push, NV04_SSWZ(DMA_IMAGE), 1);
452       PUSH_RELOC(push, dst->bo, 0, NOUVEAU_BO_OR, fifo->vram, fifo->gart);
453       BEGIN_NV04(push, NV04_SSWZ(FORMAT), 2);
454       PUSH_DATA (push, ss_fmt | (util_logbase2(dst->w) << 16) |
455                                 (util_logbase2(dst->h) << 24));
456       PUSH_RELOC(push, dst->bo, dst->offset, NOUVEAU_BO_LOW, 0, 0);
457       BEGIN_NV04(push, NV05_SIFM(SURFACE), 1);
458       PUSH_DATA (push, nv30->screen->swzsurf->handle);
459    }
460 
461    BEGIN_NV04(push, NV03_SIFM(DMA_IMAGE), 1);
462    PUSH_RELOC(push, src->bo, 0, NOUVEAU_BO_OR, fifo->vram, fifo->gart);
463    BEGIN_NV04(push, NV03_SIFM(COLOR_FORMAT), 8);
464    PUSH_DATA (push, si_fmt);
465    PUSH_DATA (push, NV03_SIFM_OPERATION_SRCCOPY);
466    PUSH_DATA (push, (           dst->y0  << 16) |            dst->x0);
467    PUSH_DATA (push, ((dst->y1 - dst->y0) << 16) | (dst->x1 - dst->x0));
468    PUSH_DATA (push, (           dst->y0  << 16) |            dst->x0);
469    PUSH_DATA (push, ((dst->y1 - dst->y0) << 16) | (dst->x1 - dst->x0));
470    PUSH_DATA (push, ((src->x1 - src->x0) << 20) / (dst->x1 - dst->x0));
471    PUSH_DATA (push, ((src->y1 - src->y0) << 20) / (dst->y1 - dst->y0));
472    BEGIN_NV04(push, NV03_SIFM(SIZE), 4);
473    PUSH_DATA (push, align(src->h, 2) << 16 | align(src->w, 2));
474    PUSH_DATA (push, src->pitch | si_arg);
475    PUSH_RELOC(push, src->bo, src->offset, NOUVEAU_BO_LOW, 0, 0);
476    PUSH_DATA (push, (src->y0 << 20) | src->x0 << 4);
477 }
478 
479 /* The NOP+OFFSET_OUT stuff after each M2MF transfer *is* actually required
480  * to prevent some odd things from happening, easily reproducible by
481  * attempting to do conditional rendering that has a M2MF transfer done
482  * some time before it.  0x1e98 will fail with a DMA_W_PROTECTION (assuming
483  * that name is still accurate on nv4x) error.
484  */
485 
486 static bool
nv30_transfer_m2mf(XFER_ARGS)487 nv30_transfer_m2mf(XFER_ARGS)
488 {
489    if (!src->pitch || !dst->pitch)
490       return false;
491    if (nv30_transfer_scaled(src, dst))
492       return false;
493    return true;
494 }
495 
496 static void
nv30_transfer_rect_m2mf(XFER_ARGS)497 nv30_transfer_rect_m2mf(XFER_ARGS)
498 {
499    struct nouveau_pushbuf *push = nv30->base.pushbuf;
500    struct nouveau_pushbuf_refn refs[] = {
501       { src->bo, src->domain | NOUVEAU_BO_RD },
502       { dst->bo, dst->domain | NOUVEAU_BO_WR },
503    };
504    struct nv04_fifo *fifo = push->channel->data;
505    unsigned src_offset = src->offset;
506    unsigned dst_offset = dst->offset;
507    unsigned w = dst->x1 - dst->x0;
508    unsigned h = dst->y1 - dst->y0;
509 
510    src_offset += (src->y0 * src->pitch) + (src->x0 * src->cpp);
511    dst_offset += (dst->y0 * dst->pitch) + (dst->x0 * dst->cpp);
512 
513    BEGIN_NV04(push, NV03_M2MF(DMA_BUFFER_IN), 2);
514    PUSH_DATA (push, (src->domain == NOUVEAU_BO_VRAM) ? fifo->vram : fifo->gart);
515    PUSH_DATA (push, (dst->domain == NOUVEAU_BO_VRAM) ? fifo->vram : fifo->gart);
516 
517    while (h) {
518       unsigned lines = (h > 2047) ? 2047 : h;
519 
520       if (!PUSH_SPACE_EX(push, 32, 2, 0) ||
521           PUSH_REFN(push, refs, 2))
522          return;
523 
524       BEGIN_NV04(push, NV03_M2MF(OFFSET_IN), 8);
525       PUSH_RELOC(push, src->bo, src_offset, NOUVEAU_BO_LOW, 0, 0);
526       PUSH_RELOC(push, dst->bo, dst_offset, NOUVEAU_BO_LOW, 0, 0);
527       PUSH_DATA (push, src->pitch);
528       PUSH_DATA (push, dst->pitch);
529       PUSH_DATA (push, w * src->cpp);
530       PUSH_DATA (push, lines);
531       PUSH_DATA (push, NV03_M2MF_FORMAT_INPUT_INC_1 |
532                        NV03_M2MF_FORMAT_OUTPUT_INC_1);
533       PUSH_DATA (push, 0x00000000);
534       BEGIN_NV04(push, NV04_GRAPH(M2MF, NOP), 1);
535       PUSH_DATA (push, 0x00000000);
536       BEGIN_NV04(push, NV03_M2MF(OFFSET_OUT), 1);
537       PUSH_DATA (push, 0x00000000);
538 
539       h -= lines;
540       src_offset += src->pitch * lines;
541       dst_offset += dst->pitch * lines;
542    }
543 }
544 
545 static bool
nv30_transfer_cpu(XFER_ARGS)546 nv30_transfer_cpu(XFER_ARGS)
547 {
548    if (nv30_transfer_scaled(src, dst))
549       return false;
550    return true;
551 }
552 
553 static char *
linear_ptr(struct nv30_rect * rect,char * base,int x,int y,int z)554 linear_ptr(struct nv30_rect *rect, char *base, int x, int y, int z)
555 {
556    return base + (y * rect->pitch) + (x * rect->cpp);
557 }
558 
559 static inline unsigned
swizzle2d(unsigned v,unsigned s)560 swizzle2d(unsigned v, unsigned s)
561 {
562    v = (v | (v << 8)) & 0x00ff00ff;
563    v = (v | (v << 4)) & 0x0f0f0f0f;
564    v = (v | (v << 2)) & 0x33333333;
565    v = (v | (v << 1)) & 0x55555555;
566    return v << s;
567 }
568 
569 static char *
swizzle2d_ptr(struct nv30_rect * rect,char * base,int x,int y,int z)570 swizzle2d_ptr(struct nv30_rect *rect, char *base, int x, int y, int z)
571 {
572    unsigned k = util_logbase2(MIN2(rect->w, rect->h));
573    unsigned km = (1 << k) - 1;
574    unsigned nx = rect->w >> k;
575    unsigned tx = x >> k;
576    unsigned ty = y >> k;
577    unsigned m;
578 
579    m  = swizzle2d(x & km, 0);
580    m |= swizzle2d(y & km, 1);
581    m += ((ty * nx) + tx) << k << k;
582 
583    return base + (m * rect->cpp);
584 }
585 
586 static char *
swizzle3d_ptr(struct nv30_rect * rect,char * base,int x,int y,int z)587 swizzle3d_ptr(struct nv30_rect *rect, char *base, int x, int y, int z)
588 {
589    unsigned w = rect->w >> 1;
590    unsigned h = rect->h >> 1;
591    unsigned d = rect->d >> 1;
592    unsigned i = 0, o;
593    unsigned v = 0;
594 
595    do {
596       o = i;
597       if (w) {
598          v |= (x & 1) << i++;
599          x >>= 1;
600          w >>= 1;
601       }
602       if (h) {
603          v |= (y & 1) << i++;
604          y >>= 1;
605          h >>= 1;
606       }
607       if (d) {
608          v |= (z & 1) << i++;
609          z >>= 1;
610          d >>= 1;
611       }
612    } while(o != i);
613 
614    return base + (v * rect->cpp);
615 }
616 
617 typedef char *(*get_ptr_t)(struct nv30_rect *, char *, int, int, int);
618 
619 static inline get_ptr_t
get_ptr(struct nv30_rect * rect)620 get_ptr(struct nv30_rect *rect)
621 {
622    if (rect->pitch)
623       return linear_ptr;
624 
625    if (rect->d <= 1)
626       return swizzle2d_ptr;
627 
628    return swizzle3d_ptr;
629 }
630 
631 static void
nv30_transfer_rect_cpu(XFER_ARGS)632 nv30_transfer_rect_cpu(XFER_ARGS)
633 {
634    get_ptr_t sp = get_ptr(src);
635    get_ptr_t dp = get_ptr(dst);
636    char *srcmap, *dstmap;
637    int x, y;
638 
639    BO_MAP(nv30->base.screen, src->bo, NOUVEAU_BO_RD, nv30->base.client);
640    BO_MAP(nv30->base.screen, dst->bo, NOUVEAU_BO_WR, nv30->base.client);
641    srcmap = src->bo->map + src->offset;
642    dstmap = dst->bo->map + dst->offset;
643 
644    for (y = 0; y < (dst->y1 - dst->y0); y++) {
645       for (x = 0; x < (dst->x1 - dst->x0); x++) {
646          memcpy(dp(dst, dstmap, dst->x0 + x, dst->y0 + y, dst->z),
647                 sp(src, srcmap, src->x0 + x, src->y0 + y, src->z), dst->cpp);
648       }
649    }
650 }
651 
652 void
nv30_transfer_rect(struct nv30_context * nv30,enum nv30_transfer_filter filter,struct nv30_rect * src,struct nv30_rect * dst)653 nv30_transfer_rect(struct nv30_context *nv30, enum nv30_transfer_filter filter,
654                    struct nv30_rect *src, struct nv30_rect *dst)
655 {
656    static const struct {
657       char *name;
658       bool (*possible)(XFER_ARGS);
659       void (*execute)(XFER_ARGS);
660    } *method, methods[] = {
661       { "m2mf", nv30_transfer_m2mf, nv30_transfer_rect_m2mf },
662       { "sifm", nv30_transfer_sifm, nv30_transfer_rect_sifm },
663       { "blit", nv30_transfer_blit, nv30_transfer_rect_blit },
664       { "rect", nv30_transfer_cpu, nv30_transfer_rect_cpu },
665       {}
666    };
667 
668    for (method = methods; method->possible; method++) {
669       if (method->possible(nv30, filter, src, dst)) {
670          method->execute(nv30, filter, src, dst);
671          return;
672       }
673    }
674 
675    assert(0);
676 }
677 
678 void
nv30_transfer_push_data(struct nouveau_context * nv,struct nouveau_bo * bo,unsigned offset,unsigned domain,unsigned size,void * data)679 nv30_transfer_push_data(struct nouveau_context *nv,
680                         struct nouveau_bo *bo, unsigned offset, unsigned domain,
681                         unsigned size, void *data)
682 {
683    /* use ifc, or scratch + copy_data? */
684    fprintf(stderr, "nv30: push_data not implemented\n");
685 }
686 
687 void
nv30_transfer_copy_data(struct nouveau_context * nv,struct nouveau_bo * dst,unsigned d_off,unsigned d_dom,struct nouveau_bo * src,unsigned s_off,unsigned s_dom,unsigned size)688 nv30_transfer_copy_data(struct nouveau_context *nv,
689                         struct nouveau_bo *dst, unsigned d_off, unsigned d_dom,
690                         struct nouveau_bo *src, unsigned s_off, unsigned s_dom,
691                         unsigned size)
692 {
693    struct nv04_fifo *fifo = nv->screen->channel->data;
694    struct nouveau_pushbuf_refn refs[] = {
695       { src, s_dom | NOUVEAU_BO_RD },
696       { dst, d_dom | NOUVEAU_BO_WR },
697    };
698    struct nouveau_pushbuf *push = nv->pushbuf;
699    unsigned pages, lines;
700 
701    pages = size >> 12;
702    size -= (pages << 12);
703 
704    BEGIN_NV04(push, NV03_M2MF(DMA_BUFFER_IN), 2);
705    PUSH_DATA (push, (s_dom == NOUVEAU_BO_VRAM) ? fifo->vram : fifo->gart);
706    PUSH_DATA (push, (d_dom == NOUVEAU_BO_VRAM) ? fifo->vram : fifo->gart);
707 
708    while (pages) {
709       lines  = (pages > 2047) ? 2047 : pages;
710       pages -= lines;
711 
712       if (!PUSH_SPACE_EX(push, 32, 2, 0) ||
713           PUSH_REFN(push, refs, 2))
714          return;
715 
716       BEGIN_NV04(push, NV03_M2MF(OFFSET_IN), 8);
717       PUSH_RELOC(push, src, s_off, NOUVEAU_BO_LOW, 0, 0);
718       PUSH_RELOC(push, dst, d_off, NOUVEAU_BO_LOW, 0, 0);
719       PUSH_DATA (push, 4096);
720       PUSH_DATA (push, 4096);
721       PUSH_DATA (push, 4096);
722       PUSH_DATA (push, lines);
723       PUSH_DATA (push, NV03_M2MF_FORMAT_INPUT_INC_1 |
724                        NV03_M2MF_FORMAT_OUTPUT_INC_1);
725       PUSH_DATA (push, 0x00000000);
726       BEGIN_NV04(push, NV04_GRAPH(M2MF, NOP), 1);
727       PUSH_DATA (push, 0x00000000);
728       BEGIN_NV04(push, NV03_M2MF(OFFSET_OUT), 1);
729       PUSH_DATA (push, 0x00000000);
730 
731       s_off += (lines << 12);
732       d_off += (lines << 12);
733    }
734 
735    if (size) {
736       if (!PUSH_SPACE_EX(push, 32, 2, 0) ||
737           PUSH_REFN(push, refs, 2))
738          return;
739 
740       BEGIN_NV04(push, NV03_M2MF(OFFSET_IN), 8);
741       PUSH_RELOC(push, src, s_off, NOUVEAU_BO_LOW, 0, 0);
742       PUSH_RELOC(push, dst, d_off, NOUVEAU_BO_LOW, 0, 0);
743       PUSH_DATA (push, size);
744       PUSH_DATA (push, size);
745       PUSH_DATA (push, size);
746       PUSH_DATA (push, 1);
747       PUSH_DATA (push, NV03_M2MF_FORMAT_INPUT_INC_1 |
748                        NV03_M2MF_FORMAT_OUTPUT_INC_1);
749       PUSH_DATA (push, 0x00000000);
750       BEGIN_NV04(push, NV04_GRAPH(M2MF, NOP), 1);
751       PUSH_DATA (push, 0x00000000);
752       BEGIN_NV04(push, NV03_M2MF(OFFSET_OUT), 1);
753       PUSH_DATA (push, 0x00000000);
754    }
755 }
756