xref: /aosp_15_r20/external/libhevc/common/arm/ihevc_intra_pred_chroma_ver.s (revision c83a76b084498d55f252f48b2e3786804cdf24b7)
1@/*****************************************************************************
2@*
3@* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4@*
5@* Licensed under the Apache License, Version 2.0 (the "License");
6@* you may not use this file except in compliance with the License.
7@* You may obtain a copy of the License at:
8@*
9@* http://www.apache.org/licenses/LICENSE-2.0
10@*
11@* Unless required by applicable law or agreed to in writing, software
12@* distributed under the License is distributed on an "AS IS" BASIS,
13@* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14@* See the License for the specific language governing permissions and
15@* limitations under the License.
16@*
17@*****************************************************************************/
18@/**
19@*******************************************************************************
20@* @file
21@*  ihevc_intra_pred_chroma_ver_neon.s
22@*
23@* @brief
24@*  contains function definitions for intra prediction dc filtering.
25@* functions are coded using neon  intrinsics and can be compiled using
26
27@* rvct
28@*
29@* @author
30@*  yogeswaran rs
31@*
32@* @par list of functions:
33@*
34@*
35@* @remarks
36@*  none
37@*
38@*******************************************************************************
39@*/
40@/**
41@*******************************************************************************
42@*
43@* @brief
44@*    luma intraprediction filter for dc input
45@*
46@* @par description:
47@*
48@* @param[in] pu1_ref
49@*  uword8 pointer to the source
50@*
51@* @param[out] pu1_dst
52@*  uword8 pointer to the destination
53@*
54@* @param[in] src_strd
55@*  integer source stride
56@*
57@* @param[in] dst_strd
58@*  integer destination stride
59@*
60@* @param[in] nt
61@*  size of tranform block
62@*
63@* @param[in] mode
64@*  type of filtering
65@*
66@* @returns
67@*
68@* @remarks
69@*  none
70@*
71@*******************************************************************************
72@*/
73
74@void ihevc_intra_pred_chroma_ver(uword8 *pu1_ref,
75@        word32 src_strd,
76@        uword8 *pu1_dst,
77@        word32 dst_strd,
78@        word32 nt,
79@        word32 mode)
80@**************variables vs registers*****************************************
81@r0 => *pu1_ref
82@r1 => src_strd
83@r2 => *pu1_dst
84@r3 => dst_strd
85
86@stack contents from #40
87@   nt
88@   mode
89
90.equ    nt_offset,      40
91
92.text
93.align 4
94
95
96
97
98.globl ihevc_intra_pred_chroma_ver_a9q
99
100.type ihevc_intra_pred_chroma_ver_a9q, %function
101
102ihevc_intra_pred_chroma_ver_a9q:
103
104    stmfd       sp!, {r4-r12, r14}          @stack stores the values of the arguments
105
106    ldr         r4,[sp,#nt_offset]          @loads nt
107    lsl         r5, r4, #2                  @4nt
108
109
110    cmp         r4, #8
111    beq         blk_8
112    blt         blk_4
113
114copy_16:
115    add         r5, r5, #2                  @2nt+2
116    add         r6, r0, r5                  @&src[2nt+1]
117
118    add         r5, r2, r3                  @pu1_dst + dst_strd
119    vld2.8      {d20,d21}, [r6]!            @16 loads (col 0:15)
120    add         r8, r5, r3
121
122    add         r10, r8, r3
123    vld2.8      {d22,d23}, [r6]             @16 loads (col 16:31)
124    lsl         r11, r3, #2
125
126    add         r11, r11, #0xfffffff0
127
128
129    vst2.8      {d20,d21}, [r2]!
130    vst2.8      {d20,d21}, [r5]!
131    vst2.8      {d20,d21}, [r8]!
132    vst2.8      {d20,d21}, [r10]!
133
134    vst2.8      {d22,d23}, [r2], r11
135    vst2.8      {d22,d23}, [r5], r11
136    vst2.8      {d22,d23}, [r8], r11
137    vst2.8      {d22,d23}, [r10], r11
138
139    subs        r4, r4, #4
140
141kernel_copy_16:
142    vst2.8      {d20,d21}, [r2]!
143    vst2.8      {d20,d21}, [r5]!
144    vst2.8      {d20,d21}, [r8]!
145    vst2.8      {d20,d21}, [r10]!
146
147    vst2.8      {d22,d23}, [r2], r11
148    vst2.8      {d22,d23}, [r5], r11
149    vst2.8      {d22,d23}, [r8], r11
150    vst2.8      {d22,d23}, [r10], r11
151
152    subs        r4, r4, #4
153
154
155    vst2.8      {d20,d21}, [r2]!
156    vst2.8      {d20,d21}, [r5]!
157    vst2.8      {d20,d21}, [r8]!
158    vst2.8      {d20,d21}, [r10]!
159
160    vst2.8      {d22,d23}, [r2], r11
161    vst2.8      {d22,d23}, [r5], r11
162    vst2.8      {d22,d23}, [r8], r11
163    vst2.8      {d22,d23}, [r10], r11
164
165    subs        r4, r4, #4
166
167    vst2.8      {d20,d21}, [r2]!
168    vst2.8      {d20,d21}, [r5]!
169    vst2.8      {d20,d21}, [r8]!
170    vst2.8      {d20,d21}, [r10]!
171
172    vst2.8      {d22,d23}, [r2], r11
173    vst2.8      {d22,d23}, [r5], r11
174    vst2.8      {d22,d23}, [r8], r11
175    vst2.8      {d22,d23}, [r10], r11
176
177    subs        r4, r4, #4
178    bne         kernel_copy_16
179
180    b           end_func
181
182blk_8:
183
184    add         r5, r5, #2                  @2nt+2
185    add         r6, r0, r5                  @&src[2nt+1]
186
187    add         r5, r2, r3                  @pu1_dst + dst_strd
188    vld2.8      {d20,d21}, [r6]!            @16 loads (col 0:15)
189    add         r8, r5, r3
190
191    add         r10, r8, r3
192    vld2.8      {d22,d23}, [r6]             @16 loads (col 16:31)
193
194    lsl         r11,r3,#2
195
196    vst2.8      {d20,d21}, [r2],r11
197    vst2.8      {d20,d21}, [r5],r11
198    vst2.8      {d20,d21}, [r8],r11
199    vst2.8      {d20,d21}, [r10],r11
200
201    vst2.8      {d20,d21}, [r2]
202    vst2.8      {d20,d21}, [r5]
203    vst2.8      {d20,d21}, [r8]
204    vst2.8      {d20,d21}, [r10]
205
206    subs        r4, r4, #8
207    beq         end_func
208
209blk_4:
210
211    @lsl        r5, r4, #2          @4nt
212    add         r5, r5, #2                  @2nt+2
213    add         r6, r0, r5                  @&src[2nt+1]
214
215    vld1.8      {d0},[r6]
216    add         r5, r2, r3                  @pu1_dst + dst_strd
217
218    vst1.8      {d0},[r2]
219    add         r8, r5, r3
220    vst1.8      {d0},[r5]
221    add         r10, r8, r3
222    vst1.8      {d0},[r8]
223    vst1.8      {d0},[r10]
224
225
226
227end_func:
228    ldmfd       sp!,{r4-r12,r15}            @reload the registers from sp
229
230
231
232