xref: /aosp_15_r20/external/libdav1d/src/loongarch/loongson_util.S (revision c09093415860a1c2373dacd84c4fde00c507cdfd)
1/******************************************************************************
2 * Copyright © 2024, VideoLAN and dav1d authors
3 * Copyright © 2024, Loongson Technology Corporation Limited
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright notice, this
10 *    list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright notice,
13 *    this list of conditions and the following disclaimer in the documentation
14 *    and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 *****************************************************************************/
27
28#ifndef DAV1D_SRC_LOONGSON_UTIL_S
29#define DAV1D_SRC_LOONGSON_UTIL_S
30
31#ifndef DEFAULT_ALIGN
32#define DEFAULT_ALIGN 5
33#endif
34
35//That l means local defines local functions
36.macro functionl name, align=DEFAULT_ALIGN
37.macro endfuncl
38    jirl    $r0, $r1, 0x0
39    .size \name, . - \name
40    .purgem endfuncl
41.endm
42.text ;
43.align \align ;
44.hidden \name ;
45.type \name, @function ;
46\name: ;
47.endm
48
49.macro TRANSPOSE_4x16B in0, in1 ,in2, in3, in4, in5, in6, in7
50    vpackev.b        \in4,  \in1,  \in0
51    vpackod.b        \in5,  \in1,  \in0
52    vpackev.b        \in6,  \in3,  \in2
53    vpackod.b        \in7,  \in3,  \in2
54
55    vpackev.h        \in0,  \in6,  \in4
56    vpackod.h        \in2,  \in6,  \in4
57    vpackev.h        \in1,  \in7,  \in5
58    vpackod.h        \in3,  \in7,  \in5
59.endm
60
61.macro TRANSPOSE_8x16B in0, in1, in2, in3, in4, in5, in6, in7, in8, in9
62    vpackev.b        \in8,  \in1,  \in0
63    vpackod.b        \in9,  \in1,  \in0
64    vpackev.b        \in1,  \in3,  \in2
65    vpackod.b        \in3,  \in3,  \in2
66    vpackev.b        \in0,  \in5,  \in4
67    vpackod.b        \in5,  \in5,  \in4
68    vpackev.b        \in2,  \in7,  \in6
69    vpackod.b        \in7,  \in7,  \in6
70
71    vpackev.h        \in4,  \in2,  \in0
72    vpackod.h        \in2,  \in2,  \in0
73    vpackev.h        \in6,  \in7,  \in5
74    vpackod.h        \in7,  \in7,  \in5
75    vpackev.h        \in5,  \in3,  \in9
76    vpackod.h        \in9,  \in3,  \in9
77    vpackev.h        \in3,  \in1,  \in8
78    vpackod.h        \in8,  \in1,  \in8
79
80    vpackev.w        \in0,  \in4,  \in3
81    vpackod.w        \in4,  \in4,  \in3
82    vpackev.w        \in1,  \in6,  \in5
83    vpackod.w        \in5,  \in6,  \in5
84    vpackod.w        \in6,  \in2,  \in8
85    vpackev.w        \in2,  \in2,  \in8
86    vpackev.w        \in3,  \in7,  \in9
87    vpackod.w        \in7,  \in7,  \in9
88.endm
89
90.macro vld_x8 src, start, stride, in0, in1, in2, in3, in4, in5, in6, in7
91    vld           \in0,     \src,     \start
92    vld           \in1,     \src,     \start+(\stride*1)
93    vld           \in2,     \src,     \start+(\stride*2)
94    vld           \in3,     \src,     \start+(\stride*3)
95    vld           \in4,     \src,     \start+(\stride*4)
96    vld           \in5,     \src,     \start+(\stride*5)
97    vld           \in6,     \src,     \start+(\stride*6)
98    vld           \in7,     \src,     \start+(\stride*7)
99.endm
100
101.macro vst_x8 src, start, stride, in0, in1, in2, in3, in4, in5, in6, in7
102    vst           \in0,     \src,     \start
103    vst           \in1,     \src,     \start+(\stride*1)
104    vst           \in2,     \src,     \start+(\stride*2)
105    vst           \in3,     \src,     \start+(\stride*3)
106    vst           \in4,     \src,     \start+(\stride*4)
107    vst           \in5,     \src,     \start+(\stride*5)
108    vst           \in6,     \src,     \start+(\stride*6)
109    vst           \in7,     \src,     \start+(\stride*7)
110.endm
111
112.macro vld_x16 src, start, stride, in0, in1, in2, in3, in4, in5, in6, in7, \
113               in8, in9, in10, in11, in12, in13, in14, in15
114
115    vld_x8 \src, \start, \stride, \in0, \in1, \in2, \in3, \in4, \in5, \in6, \in7
116
117    vld           \in8,     \src,     \start+(\stride*8)
118    vld           \in9,     \src,     \start+(\stride*9)
119    vld           \in10,    \src,     \start+(\stride*10)
120    vld           \in11,    \src,     \start+(\stride*11)
121    vld           \in12,    \src,     \start+(\stride*12)
122    vld           \in13,    \src,     \start+(\stride*13)
123    vld           \in14,    \src,     \start+(\stride*14)
124    vld           \in15,    \src,     \start+(\stride*15)
125.endm
126
127.macro vst_x16 src, start, stride, in0, in1, in2, in3, in4, in5, in6, in7, \
128               in8, in9, in10, in11, in12, in13, in14, in15
129
130    vst_x8 \src, \start, \stride, \in0, \in1, \in2, \in3, \in4, \in5, \in6, \in7
131
132    vst           \in8,     \src,     \start+(\stride*8)
133    vst           \in9,     \src,     \start+(\stride*9)
134    vst           \in10,    \src,     \start+(\stride*10)
135    vst           \in11,    \src,     \start+(\stride*11)
136    vst           \in12,    \src,     \start+(\stride*12)
137    vst           \in13,    \src,     \start+(\stride*13)
138    vst           \in14,    \src,     \start+(\stride*14)
139    vst           \in15,    \src,     \start+(\stride*15)
140.endm
141
142.macro xvld_x8 src, start, stride, in0, in1, in2, in3, in4, in5, in6, in7
143    xvld           \in0,     \src,     \start
144    xvld           \in1,     \src,     \start+(\stride)
145    xvld           \in2,     \src,     \start+(\stride<<1)
146    xvld           \in3,     \src,     \start+(\stride<<1)+(\stride)
147    xvld           \in4,     \src,     \start+(\stride<<2)
148    xvld           \in5,     \src,     \start+(\stride<<2)+(\stride)
149    xvld           \in6,     \src,     \start+(\stride*6)
150    xvld           \in7,     \src,     \start+(\stride<<3)-(\stride)
151.endm
152
153.macro xvst_x8 src, start, stride, in0, in1, in2, in3, in4, in5, in6, in7
154    xvst           \in0,     \src,     \start
155    xvst           \in1,     \src,     \start+(\stride)
156    xvst           \in2,     \src,     \start+(\stride<<1)
157    xvst           \in3,     \src,     \start+(\stride<<1)+(\stride)
158    xvst           \in4,     \src,     \start+(\stride<<2)
159    xvst           \in5,     \src,     \start+(\stride<<2)+(\stride)
160    xvst           \in6,     \src,     \start+(\stride*6)
161    xvst           \in7,     \src,     \start+(\stride<<3)-(\stride)
162.endm
163
164.macro xvld_x16 src, start, stride, in0, in1, in2, in3, in4, in5, in6, in7, \
165                in8, in9, in10, in11, in12, in13, in14, in15
166    xvld_x8 \src, \start, \stride, \in0, \in1, \in2, \in3, \in4, \in5, \in6, \in7
167
168    xvld           \in8,     \src,     \start+(\stride<<3)
169    xvld           \in9,     \src,     \start+(\stride<<3)+(\stride)
170    xvld           \in10,    \src,     \start+(\stride*10)
171    xvld           \in11,    \src,     \start+(\stride*11)
172    xvld           \in12,    \src,     \start+(\stride*12)
173    xvld           \in13,    \src,     \start+(\stride*13)
174    xvld           \in14,    \src,     \start+(\stride*14)
175    xvld           \in15,    \src,     \start+(\stride<<4)-(\stride)
176.endm
177
178.macro xvst_x16 src, start, stride, in0, in1, in2, in3, in4, in5, in6, in7, \
179               in8, in9, in10, in11, in12, in13, in14, in15
180    xvst_x8 \src, \start, \stride, \in0, \in1, \in2, \in3, \in4, \in5, \in6, \in7
181
182    xvst           \in8,     \src,     \start+(\stride<<3)
183    xvst           \in9,     \src,     \start+(\stride<<3)+(\stride)
184    xvst           \in10,    \src,     \start+(\stride*10)
185    xvst           \in11,    \src,     \start+(\stride*11)
186    xvst           \in12,    \src,     \start+(\stride*12)
187    xvst           \in13,    \src,     \start+(\stride*13)
188    xvst           \in14,    \src,     \start+(\stride*14)
189    xvst           \in15,    \src,     \start+(\stride<<4)-(\stride)
190.endm
191
192#endif /* DAV1D_SRC_LOONGSON_UTIL_S */
193