xref: /aosp_15_r20/external/libjpeg-turbo/simd/i386/jcsample-sse2.asm (revision dfc6aa5c1cfd4bc4e2018dc74aa96e29ee49c6da)
1*dfc6aa5cSAndroid Build Coastguard Worker;
2*dfc6aa5cSAndroid Build Coastguard Worker; jcsample.asm - downsampling (SSE2)
3*dfc6aa5cSAndroid Build Coastguard Worker;
4*dfc6aa5cSAndroid Build Coastguard Worker; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
5*dfc6aa5cSAndroid Build Coastguard Worker; Copyright (C) 2016, D. R. Commander.
6*dfc6aa5cSAndroid Build Coastguard Worker;
7*dfc6aa5cSAndroid Build Coastguard Worker; Based on the x86 SIMD extension for IJG JPEG library
8*dfc6aa5cSAndroid Build Coastguard Worker; Copyright (C) 1999-2006, MIYASAKA Masaru.
9*dfc6aa5cSAndroid Build Coastguard Worker; For conditions of distribution and use, see copyright notice in jsimdext.inc
10*dfc6aa5cSAndroid Build Coastguard Worker;
11*dfc6aa5cSAndroid Build Coastguard Worker; This file should be assembled with NASM (Netwide Assembler),
12*dfc6aa5cSAndroid Build Coastguard Worker; can *not* be assembled with Microsoft's MASM or any compatible
13*dfc6aa5cSAndroid Build Coastguard Worker; assembler (including Borland's Turbo Assembler).
14*dfc6aa5cSAndroid Build Coastguard Worker; NASM is available from http://nasm.sourceforge.net/ or
15*dfc6aa5cSAndroid Build Coastguard Worker; http://sourceforge.net/project/showfiles.php?group_id=6208
16*dfc6aa5cSAndroid Build Coastguard Worker
17*dfc6aa5cSAndroid Build Coastguard Worker%include "jsimdext.inc"
18*dfc6aa5cSAndroid Build Coastguard Worker
19*dfc6aa5cSAndroid Build Coastguard Worker; --------------------------------------------------------------------------
20*dfc6aa5cSAndroid Build Coastguard Worker    SECTION     SEG_TEXT
21*dfc6aa5cSAndroid Build Coastguard Worker    BITS        32
22*dfc6aa5cSAndroid Build Coastguard Worker;
23*dfc6aa5cSAndroid Build Coastguard Worker; Downsample pixel values of a single component.
24*dfc6aa5cSAndroid Build Coastguard Worker; This version handles the common case of 2:1 horizontal and 1:1 vertical,
25*dfc6aa5cSAndroid Build Coastguard Worker; without smoothing.
26*dfc6aa5cSAndroid Build Coastguard Worker;
27*dfc6aa5cSAndroid Build Coastguard Worker; GLOBAL(void)
28*dfc6aa5cSAndroid Build Coastguard Worker; jsimd_h2v1_downsample_sse2(JDIMENSION image_width, int max_v_samp_factor,
29*dfc6aa5cSAndroid Build Coastguard Worker;                            JDIMENSION v_samp_factor,
30*dfc6aa5cSAndroid Build Coastguard Worker;                            JDIMENSION width_in_blocks, JSAMPARRAY input_data,
31*dfc6aa5cSAndroid Build Coastguard Worker;                            JSAMPARRAY output_data);
32*dfc6aa5cSAndroid Build Coastguard Worker;
33*dfc6aa5cSAndroid Build Coastguard Worker
34*dfc6aa5cSAndroid Build Coastguard Worker%define img_width(b)    (b) + 8         ; JDIMENSION image_width
35*dfc6aa5cSAndroid Build Coastguard Worker%define max_v_samp(b)   (b) + 12        ; int max_v_samp_factor
36*dfc6aa5cSAndroid Build Coastguard Worker%define v_samp(b)       (b) + 16        ; JDIMENSION v_samp_factor
37*dfc6aa5cSAndroid Build Coastguard Worker%define width_blks(b)   (b) + 20        ; JDIMENSION width_in_blocks
38*dfc6aa5cSAndroid Build Coastguard Worker%define input_data(b)   (b) + 24        ; JSAMPARRAY input_data
39*dfc6aa5cSAndroid Build Coastguard Worker%define output_data(b)  (b) + 28        ; JSAMPARRAY output_data
40*dfc6aa5cSAndroid Build Coastguard Worker
41*dfc6aa5cSAndroid Build Coastguard Worker    align       32
42*dfc6aa5cSAndroid Build Coastguard Worker    GLOBAL_FUNCTION(jsimd_h2v1_downsample_sse2)
43*dfc6aa5cSAndroid Build Coastguard Worker
44*dfc6aa5cSAndroid Build Coastguard WorkerEXTN(jsimd_h2v1_downsample_sse2):
45*dfc6aa5cSAndroid Build Coastguard Worker    push        ebp
46*dfc6aa5cSAndroid Build Coastguard Worker    mov         ebp, esp
47*dfc6aa5cSAndroid Build Coastguard Worker;   push        ebx                     ; unused
48*dfc6aa5cSAndroid Build Coastguard Worker;   push        ecx                     ; need not be preserved
49*dfc6aa5cSAndroid Build Coastguard Worker;   push        edx                     ; need not be preserved
50*dfc6aa5cSAndroid Build Coastguard Worker    push        esi
51*dfc6aa5cSAndroid Build Coastguard Worker    push        edi
52*dfc6aa5cSAndroid Build Coastguard Worker
53*dfc6aa5cSAndroid Build Coastguard Worker    mov         ecx, JDIMENSION [width_blks(ebp)]
54*dfc6aa5cSAndroid Build Coastguard Worker    shl         ecx, 3                  ; imul ecx,DCTSIZE (ecx = output_cols)
55*dfc6aa5cSAndroid Build Coastguard Worker    jz          near .return
56*dfc6aa5cSAndroid Build Coastguard Worker
57*dfc6aa5cSAndroid Build Coastguard Worker    mov         edx, JDIMENSION [img_width(ebp)]
58*dfc6aa5cSAndroid Build Coastguard Worker
59*dfc6aa5cSAndroid Build Coastguard Worker    ; -- expand_right_edge
60*dfc6aa5cSAndroid Build Coastguard Worker
61*dfc6aa5cSAndroid Build Coastguard Worker    push        ecx
62*dfc6aa5cSAndroid Build Coastguard Worker    shl         ecx, 1                  ; output_cols * 2
63*dfc6aa5cSAndroid Build Coastguard Worker    sub         ecx, edx
64*dfc6aa5cSAndroid Build Coastguard Worker    jle         short .expand_end
65*dfc6aa5cSAndroid Build Coastguard Worker
66*dfc6aa5cSAndroid Build Coastguard Worker    mov         eax, INT [max_v_samp(ebp)]
67*dfc6aa5cSAndroid Build Coastguard Worker    test        eax, eax
68*dfc6aa5cSAndroid Build Coastguard Worker    jle         short .expand_end
69*dfc6aa5cSAndroid Build Coastguard Worker
70*dfc6aa5cSAndroid Build Coastguard Worker    cld
71*dfc6aa5cSAndroid Build Coastguard Worker    mov         esi, JSAMPARRAY [input_data(ebp)]  ; input_data
72*dfc6aa5cSAndroid Build Coastguard Worker    alignx      16, 7
73*dfc6aa5cSAndroid Build Coastguard Worker.expandloop:
74*dfc6aa5cSAndroid Build Coastguard Worker    push        eax
75*dfc6aa5cSAndroid Build Coastguard Worker    push        ecx
76*dfc6aa5cSAndroid Build Coastguard Worker
77*dfc6aa5cSAndroid Build Coastguard Worker    mov         edi, JSAMPROW [esi]
78*dfc6aa5cSAndroid Build Coastguard Worker    add         edi, edx
79*dfc6aa5cSAndroid Build Coastguard Worker    mov         al, JSAMPLE [edi-1]
80*dfc6aa5cSAndroid Build Coastguard Worker
81*dfc6aa5cSAndroid Build Coastguard Worker    rep stosb
82*dfc6aa5cSAndroid Build Coastguard Worker
83*dfc6aa5cSAndroid Build Coastguard Worker    pop         ecx
84*dfc6aa5cSAndroid Build Coastguard Worker    pop         eax
85*dfc6aa5cSAndroid Build Coastguard Worker
86*dfc6aa5cSAndroid Build Coastguard Worker    add         esi, byte SIZEOF_JSAMPROW
87*dfc6aa5cSAndroid Build Coastguard Worker    dec         eax
88*dfc6aa5cSAndroid Build Coastguard Worker    jg          short .expandloop
89*dfc6aa5cSAndroid Build Coastguard Worker
90*dfc6aa5cSAndroid Build Coastguard Worker.expand_end:
91*dfc6aa5cSAndroid Build Coastguard Worker    pop         ecx                     ; output_cols
92*dfc6aa5cSAndroid Build Coastguard Worker
93*dfc6aa5cSAndroid Build Coastguard Worker    ; -- h2v1_downsample
94*dfc6aa5cSAndroid Build Coastguard Worker
95*dfc6aa5cSAndroid Build Coastguard Worker    mov         eax, JDIMENSION [v_samp(ebp)]  ; rowctr
96*dfc6aa5cSAndroid Build Coastguard Worker    test        eax, eax
97*dfc6aa5cSAndroid Build Coastguard Worker    jle         near .return
98*dfc6aa5cSAndroid Build Coastguard Worker
99*dfc6aa5cSAndroid Build Coastguard Worker    mov         edx, 0x00010000         ; bias pattern
100*dfc6aa5cSAndroid Build Coastguard Worker    movd        xmm7, edx
101*dfc6aa5cSAndroid Build Coastguard Worker    pcmpeqw     xmm6, xmm6
102*dfc6aa5cSAndroid Build Coastguard Worker    pshufd      xmm7, xmm7, 0x00        ; xmm7={0, 1, 0, 1, 0, 1, 0, 1}
103*dfc6aa5cSAndroid Build Coastguard Worker    psrlw       xmm6, BYTE_BIT          ; xmm6={0xFF 0x00 0xFF 0x00 ..}
104*dfc6aa5cSAndroid Build Coastguard Worker
105*dfc6aa5cSAndroid Build Coastguard Worker    mov         esi, JSAMPARRAY [input_data(ebp)]   ; input_data
106*dfc6aa5cSAndroid Build Coastguard Worker    mov         edi, JSAMPARRAY [output_data(ebp)]  ; output_data
107*dfc6aa5cSAndroid Build Coastguard Worker    alignx      16, 7
108*dfc6aa5cSAndroid Build Coastguard Worker.rowloop:
109*dfc6aa5cSAndroid Build Coastguard Worker    push        ecx
110*dfc6aa5cSAndroid Build Coastguard Worker    push        edi
111*dfc6aa5cSAndroid Build Coastguard Worker    push        esi
112*dfc6aa5cSAndroid Build Coastguard Worker
113*dfc6aa5cSAndroid Build Coastguard Worker    mov         esi, JSAMPROW [esi]     ; inptr
114*dfc6aa5cSAndroid Build Coastguard Worker    mov         edi, JSAMPROW [edi]     ; outptr
115*dfc6aa5cSAndroid Build Coastguard Worker
116*dfc6aa5cSAndroid Build Coastguard Worker    cmp         ecx, byte SIZEOF_XMMWORD
117*dfc6aa5cSAndroid Build Coastguard Worker    jae         short .columnloop
118*dfc6aa5cSAndroid Build Coastguard Worker    alignx      16, 7
119*dfc6aa5cSAndroid Build Coastguard Worker
120*dfc6aa5cSAndroid Build Coastguard Worker.columnloop_r8:
121*dfc6aa5cSAndroid Build Coastguard Worker    movdqa      xmm0, XMMWORD [esi+0*SIZEOF_XMMWORD]
122*dfc6aa5cSAndroid Build Coastguard Worker    pxor        xmm1, xmm1
123*dfc6aa5cSAndroid Build Coastguard Worker    mov         ecx, SIZEOF_XMMWORD
124*dfc6aa5cSAndroid Build Coastguard Worker    jmp         short .downsample
125*dfc6aa5cSAndroid Build Coastguard Worker    alignx      16, 7
126*dfc6aa5cSAndroid Build Coastguard Worker
127*dfc6aa5cSAndroid Build Coastguard Worker.columnloop:
128*dfc6aa5cSAndroid Build Coastguard Worker    movdqa      xmm0, XMMWORD [esi+0*SIZEOF_XMMWORD]
129*dfc6aa5cSAndroid Build Coastguard Worker    movdqa      xmm1, XMMWORD [esi+1*SIZEOF_XMMWORD]
130*dfc6aa5cSAndroid Build Coastguard Worker
131*dfc6aa5cSAndroid Build Coastguard Worker.downsample:
132*dfc6aa5cSAndroid Build Coastguard Worker    movdqa      xmm2, xmm0
133*dfc6aa5cSAndroid Build Coastguard Worker    movdqa      xmm3, xmm1
134*dfc6aa5cSAndroid Build Coastguard Worker
135*dfc6aa5cSAndroid Build Coastguard Worker    pand        xmm0, xmm6
136*dfc6aa5cSAndroid Build Coastguard Worker    psrlw       xmm2, BYTE_BIT
137*dfc6aa5cSAndroid Build Coastguard Worker    pand        xmm1, xmm6
138*dfc6aa5cSAndroid Build Coastguard Worker    psrlw       xmm3, BYTE_BIT
139*dfc6aa5cSAndroid Build Coastguard Worker
140*dfc6aa5cSAndroid Build Coastguard Worker    paddw       xmm0, xmm2
141*dfc6aa5cSAndroid Build Coastguard Worker    paddw       xmm1, xmm3
142*dfc6aa5cSAndroid Build Coastguard Worker    paddw       xmm0, xmm7
143*dfc6aa5cSAndroid Build Coastguard Worker    paddw       xmm1, xmm7
144*dfc6aa5cSAndroid Build Coastguard Worker    psrlw       xmm0, 1
145*dfc6aa5cSAndroid Build Coastguard Worker    psrlw       xmm1, 1
146*dfc6aa5cSAndroid Build Coastguard Worker
147*dfc6aa5cSAndroid Build Coastguard Worker    packuswb    xmm0, xmm1
148*dfc6aa5cSAndroid Build Coastguard Worker
149*dfc6aa5cSAndroid Build Coastguard Worker    movdqa      XMMWORD [edi+0*SIZEOF_XMMWORD], xmm0
150*dfc6aa5cSAndroid Build Coastguard Worker
151*dfc6aa5cSAndroid Build Coastguard Worker    sub         ecx, byte SIZEOF_XMMWORD    ; outcol
152*dfc6aa5cSAndroid Build Coastguard Worker    add         esi, byte 2*SIZEOF_XMMWORD  ; inptr
153*dfc6aa5cSAndroid Build Coastguard Worker    add         edi, byte 1*SIZEOF_XMMWORD  ; outptr
154*dfc6aa5cSAndroid Build Coastguard Worker    cmp         ecx, byte SIZEOF_XMMWORD
155*dfc6aa5cSAndroid Build Coastguard Worker    jae         short .columnloop
156*dfc6aa5cSAndroid Build Coastguard Worker    test        ecx, ecx
157*dfc6aa5cSAndroid Build Coastguard Worker    jnz         short .columnloop_r8
158*dfc6aa5cSAndroid Build Coastguard Worker
159*dfc6aa5cSAndroid Build Coastguard Worker    pop         esi
160*dfc6aa5cSAndroid Build Coastguard Worker    pop         edi
161*dfc6aa5cSAndroid Build Coastguard Worker    pop         ecx
162*dfc6aa5cSAndroid Build Coastguard Worker
163*dfc6aa5cSAndroid Build Coastguard Worker    add         esi, byte SIZEOF_JSAMPROW  ; input_data
164*dfc6aa5cSAndroid Build Coastguard Worker    add         edi, byte SIZEOF_JSAMPROW  ; output_data
165*dfc6aa5cSAndroid Build Coastguard Worker    dec         eax                        ; rowctr
166*dfc6aa5cSAndroid Build Coastguard Worker    jg          near .rowloop
167*dfc6aa5cSAndroid Build Coastguard Worker
168*dfc6aa5cSAndroid Build Coastguard Worker.return:
169*dfc6aa5cSAndroid Build Coastguard Worker    pop         edi
170*dfc6aa5cSAndroid Build Coastguard Worker    pop         esi
171*dfc6aa5cSAndroid Build Coastguard Worker;   pop         edx                     ; need not be preserved
172*dfc6aa5cSAndroid Build Coastguard Worker;   pop         ecx                     ; need not be preserved
173*dfc6aa5cSAndroid Build Coastguard Worker;   pop         ebx                     ; unused
174*dfc6aa5cSAndroid Build Coastguard Worker    pop         ebp
175*dfc6aa5cSAndroid Build Coastguard Worker    ret
176*dfc6aa5cSAndroid Build Coastguard Worker
177*dfc6aa5cSAndroid Build Coastguard Worker; --------------------------------------------------------------------------
178*dfc6aa5cSAndroid Build Coastguard Worker;
179*dfc6aa5cSAndroid Build Coastguard Worker; Downsample pixel values of a single component.
180*dfc6aa5cSAndroid Build Coastguard Worker; This version handles the standard case of 2:1 horizontal and 2:1 vertical,
181*dfc6aa5cSAndroid Build Coastguard Worker; without smoothing.
182*dfc6aa5cSAndroid Build Coastguard Worker;
183*dfc6aa5cSAndroid Build Coastguard Worker; GLOBAL(void)
184*dfc6aa5cSAndroid Build Coastguard Worker; jsimd_h2v2_downsample_sse2(JDIMENSION image_width, int max_v_samp_factor,
185*dfc6aa5cSAndroid Build Coastguard Worker;                            JDIMENSION v_samp_factor,
186*dfc6aa5cSAndroid Build Coastguard Worker;                            JDIMENSION width_in_blocks, JSAMPARRAY input_data,
187*dfc6aa5cSAndroid Build Coastguard Worker;                            JSAMPARRAY output_data);
188*dfc6aa5cSAndroid Build Coastguard Worker;
189*dfc6aa5cSAndroid Build Coastguard Worker
190*dfc6aa5cSAndroid Build Coastguard Worker%define img_width(b)    (b) + 8         ; JDIMENSION image_width
191*dfc6aa5cSAndroid Build Coastguard Worker%define max_v_samp(b)   (b) + 12        ; int max_v_samp_factor
192*dfc6aa5cSAndroid Build Coastguard Worker%define v_samp(b)       (b) + 16        ; JDIMENSION v_samp_factor
193*dfc6aa5cSAndroid Build Coastguard Worker%define width_blks(b)   (b) + 20        ; JDIMENSION width_in_blocks
194*dfc6aa5cSAndroid Build Coastguard Worker%define input_data(b)   (b) + 24        ; JSAMPARRAY input_data
195*dfc6aa5cSAndroid Build Coastguard Worker%define output_data(b)  (b) + 28        ; JSAMPARRAY output_data
196*dfc6aa5cSAndroid Build Coastguard Worker
197*dfc6aa5cSAndroid Build Coastguard Worker    align       32
198*dfc6aa5cSAndroid Build Coastguard Worker    GLOBAL_FUNCTION(jsimd_h2v2_downsample_sse2)
199*dfc6aa5cSAndroid Build Coastguard Worker
200*dfc6aa5cSAndroid Build Coastguard WorkerEXTN(jsimd_h2v2_downsample_sse2):
201*dfc6aa5cSAndroid Build Coastguard Worker    push        ebp
202*dfc6aa5cSAndroid Build Coastguard Worker    mov         ebp, esp
203*dfc6aa5cSAndroid Build Coastguard Worker;   push        ebx                     ; unused
204*dfc6aa5cSAndroid Build Coastguard Worker;   push        ecx                     ; need not be preserved
205*dfc6aa5cSAndroid Build Coastguard Worker;   push        edx                     ; need not be preserved
206*dfc6aa5cSAndroid Build Coastguard Worker    push        esi
207*dfc6aa5cSAndroid Build Coastguard Worker    push        edi
208*dfc6aa5cSAndroid Build Coastguard Worker
209*dfc6aa5cSAndroid Build Coastguard Worker    mov         ecx, JDIMENSION [width_blks(ebp)]
210*dfc6aa5cSAndroid Build Coastguard Worker    shl         ecx, 3                  ; imul ecx,DCTSIZE (ecx = output_cols)
211*dfc6aa5cSAndroid Build Coastguard Worker    jz          near .return
212*dfc6aa5cSAndroid Build Coastguard Worker
213*dfc6aa5cSAndroid Build Coastguard Worker    mov         edx, JDIMENSION [img_width(ebp)]
214*dfc6aa5cSAndroid Build Coastguard Worker
215*dfc6aa5cSAndroid Build Coastguard Worker    ; -- expand_right_edge
216*dfc6aa5cSAndroid Build Coastguard Worker
217*dfc6aa5cSAndroid Build Coastguard Worker    push        ecx
218*dfc6aa5cSAndroid Build Coastguard Worker    shl         ecx, 1                  ; output_cols * 2
219*dfc6aa5cSAndroid Build Coastguard Worker    sub         ecx, edx
220*dfc6aa5cSAndroid Build Coastguard Worker    jle         short .expand_end
221*dfc6aa5cSAndroid Build Coastguard Worker
222*dfc6aa5cSAndroid Build Coastguard Worker    mov         eax, INT [max_v_samp(ebp)]
223*dfc6aa5cSAndroid Build Coastguard Worker    test        eax, eax
224*dfc6aa5cSAndroid Build Coastguard Worker    jle         short .expand_end
225*dfc6aa5cSAndroid Build Coastguard Worker
226*dfc6aa5cSAndroid Build Coastguard Worker    cld
227*dfc6aa5cSAndroid Build Coastguard Worker    mov         esi, JSAMPARRAY [input_data(ebp)]  ; input_data
228*dfc6aa5cSAndroid Build Coastguard Worker    alignx      16, 7
229*dfc6aa5cSAndroid Build Coastguard Worker.expandloop:
230*dfc6aa5cSAndroid Build Coastguard Worker    push        eax
231*dfc6aa5cSAndroid Build Coastguard Worker    push        ecx
232*dfc6aa5cSAndroid Build Coastguard Worker
233*dfc6aa5cSAndroid Build Coastguard Worker    mov         edi, JSAMPROW [esi]
234*dfc6aa5cSAndroid Build Coastguard Worker    add         edi, edx
235*dfc6aa5cSAndroid Build Coastguard Worker    mov         al, JSAMPLE [edi-1]
236*dfc6aa5cSAndroid Build Coastguard Worker
237*dfc6aa5cSAndroid Build Coastguard Worker    rep stosb
238*dfc6aa5cSAndroid Build Coastguard Worker
239*dfc6aa5cSAndroid Build Coastguard Worker    pop         ecx
240*dfc6aa5cSAndroid Build Coastguard Worker    pop         eax
241*dfc6aa5cSAndroid Build Coastguard Worker
242*dfc6aa5cSAndroid Build Coastguard Worker    add         esi, byte SIZEOF_JSAMPROW
243*dfc6aa5cSAndroid Build Coastguard Worker    dec         eax
244*dfc6aa5cSAndroid Build Coastguard Worker    jg          short .expandloop
245*dfc6aa5cSAndroid Build Coastguard Worker
246*dfc6aa5cSAndroid Build Coastguard Worker.expand_end:
247*dfc6aa5cSAndroid Build Coastguard Worker    pop         ecx                     ; output_cols
248*dfc6aa5cSAndroid Build Coastguard Worker
249*dfc6aa5cSAndroid Build Coastguard Worker    ; -- h2v2_downsample
250*dfc6aa5cSAndroid Build Coastguard Worker
251*dfc6aa5cSAndroid Build Coastguard Worker    mov         eax, JDIMENSION [v_samp(ebp)]  ; rowctr
252*dfc6aa5cSAndroid Build Coastguard Worker    test        eax, eax
253*dfc6aa5cSAndroid Build Coastguard Worker    jle         near .return
254*dfc6aa5cSAndroid Build Coastguard Worker
255*dfc6aa5cSAndroid Build Coastguard Worker    mov         edx, 0x00020001         ; bias pattern
256*dfc6aa5cSAndroid Build Coastguard Worker    movd        xmm7, edx
257*dfc6aa5cSAndroid Build Coastguard Worker    pcmpeqw     xmm6, xmm6
258*dfc6aa5cSAndroid Build Coastguard Worker    pshufd      xmm7, xmm7, 0x00        ; xmm7={1, 2, 1, 2, 1, 2, 1, 2}
259*dfc6aa5cSAndroid Build Coastguard Worker    psrlw       xmm6, BYTE_BIT          ; xmm6={0xFF 0x00 0xFF 0x00 ..}
260*dfc6aa5cSAndroid Build Coastguard Worker
261*dfc6aa5cSAndroid Build Coastguard Worker    mov         esi, JSAMPARRAY [input_data(ebp)]   ; input_data
262*dfc6aa5cSAndroid Build Coastguard Worker    mov         edi, JSAMPARRAY [output_data(ebp)]  ; output_data
263*dfc6aa5cSAndroid Build Coastguard Worker    alignx      16, 7
264*dfc6aa5cSAndroid Build Coastguard Worker.rowloop:
265*dfc6aa5cSAndroid Build Coastguard Worker    push        ecx
266*dfc6aa5cSAndroid Build Coastguard Worker    push        edi
267*dfc6aa5cSAndroid Build Coastguard Worker    push        esi
268*dfc6aa5cSAndroid Build Coastguard Worker
269*dfc6aa5cSAndroid Build Coastguard Worker    mov         edx, JSAMPROW [esi+0*SIZEOF_JSAMPROW]  ; inptr0
270*dfc6aa5cSAndroid Build Coastguard Worker    mov         esi, JSAMPROW [esi+1*SIZEOF_JSAMPROW]  ; inptr1
271*dfc6aa5cSAndroid Build Coastguard Worker    mov         edi, JSAMPROW [edi]                    ; outptr
272*dfc6aa5cSAndroid Build Coastguard Worker
273*dfc6aa5cSAndroid Build Coastguard Worker    cmp         ecx, byte SIZEOF_XMMWORD
274*dfc6aa5cSAndroid Build Coastguard Worker    jae         short .columnloop
275*dfc6aa5cSAndroid Build Coastguard Worker    alignx      16, 7
276*dfc6aa5cSAndroid Build Coastguard Worker
277*dfc6aa5cSAndroid Build Coastguard Worker.columnloop_r8:
278*dfc6aa5cSAndroid Build Coastguard Worker    movdqa      xmm0, XMMWORD [edx+0*SIZEOF_XMMWORD]
279*dfc6aa5cSAndroid Build Coastguard Worker    movdqa      xmm1, XMMWORD [esi+0*SIZEOF_XMMWORD]
280*dfc6aa5cSAndroid Build Coastguard Worker    pxor        xmm2, xmm2
281*dfc6aa5cSAndroid Build Coastguard Worker    pxor        xmm3, xmm3
282*dfc6aa5cSAndroid Build Coastguard Worker    mov         ecx, SIZEOF_XMMWORD
283*dfc6aa5cSAndroid Build Coastguard Worker    jmp         short .downsample
284*dfc6aa5cSAndroid Build Coastguard Worker    alignx      16, 7
285*dfc6aa5cSAndroid Build Coastguard Worker
286*dfc6aa5cSAndroid Build Coastguard Worker.columnloop:
287*dfc6aa5cSAndroid Build Coastguard Worker    movdqa      xmm0, XMMWORD [edx+0*SIZEOF_XMMWORD]
288*dfc6aa5cSAndroid Build Coastguard Worker    movdqa      xmm1, XMMWORD [esi+0*SIZEOF_XMMWORD]
289*dfc6aa5cSAndroid Build Coastguard Worker    movdqa      xmm2, XMMWORD [edx+1*SIZEOF_XMMWORD]
290*dfc6aa5cSAndroid Build Coastguard Worker    movdqa      xmm3, XMMWORD [esi+1*SIZEOF_XMMWORD]
291*dfc6aa5cSAndroid Build Coastguard Worker
292*dfc6aa5cSAndroid Build Coastguard Worker.downsample:
293*dfc6aa5cSAndroid Build Coastguard Worker    movdqa      xmm4, xmm0
294*dfc6aa5cSAndroid Build Coastguard Worker    movdqa      xmm5, xmm1
295*dfc6aa5cSAndroid Build Coastguard Worker    pand        xmm0, xmm6
296*dfc6aa5cSAndroid Build Coastguard Worker    psrlw       xmm4, BYTE_BIT
297*dfc6aa5cSAndroid Build Coastguard Worker    pand        xmm1, xmm6
298*dfc6aa5cSAndroid Build Coastguard Worker    psrlw       xmm5, BYTE_BIT
299*dfc6aa5cSAndroid Build Coastguard Worker    paddw       xmm0, xmm4
300*dfc6aa5cSAndroid Build Coastguard Worker    paddw       xmm1, xmm5
301*dfc6aa5cSAndroid Build Coastguard Worker
302*dfc6aa5cSAndroid Build Coastguard Worker    movdqa      xmm4, xmm2
303*dfc6aa5cSAndroid Build Coastguard Worker    movdqa      xmm5, xmm3
304*dfc6aa5cSAndroid Build Coastguard Worker    pand        xmm2, xmm6
305*dfc6aa5cSAndroid Build Coastguard Worker    psrlw       xmm4, BYTE_BIT
306*dfc6aa5cSAndroid Build Coastguard Worker    pand        xmm3, xmm6
307*dfc6aa5cSAndroid Build Coastguard Worker    psrlw       xmm5, BYTE_BIT
308*dfc6aa5cSAndroid Build Coastguard Worker    paddw       xmm2, xmm4
309*dfc6aa5cSAndroid Build Coastguard Worker    paddw       xmm3, xmm5
310*dfc6aa5cSAndroid Build Coastguard Worker
311*dfc6aa5cSAndroid Build Coastguard Worker    paddw       xmm0, xmm1
312*dfc6aa5cSAndroid Build Coastguard Worker    paddw       xmm2, xmm3
313*dfc6aa5cSAndroid Build Coastguard Worker    paddw       xmm0, xmm7
314*dfc6aa5cSAndroid Build Coastguard Worker    paddw       xmm2, xmm7
315*dfc6aa5cSAndroid Build Coastguard Worker    psrlw       xmm0, 2
316*dfc6aa5cSAndroid Build Coastguard Worker    psrlw       xmm2, 2
317*dfc6aa5cSAndroid Build Coastguard Worker
318*dfc6aa5cSAndroid Build Coastguard Worker    packuswb    xmm0, xmm2
319*dfc6aa5cSAndroid Build Coastguard Worker
320*dfc6aa5cSAndroid Build Coastguard Worker    movdqa      XMMWORD [edi+0*SIZEOF_XMMWORD], xmm0
321*dfc6aa5cSAndroid Build Coastguard Worker
322*dfc6aa5cSAndroid Build Coastguard Worker    sub         ecx, byte SIZEOF_XMMWORD    ; outcol
323*dfc6aa5cSAndroid Build Coastguard Worker    add         edx, byte 2*SIZEOF_XMMWORD  ; inptr0
324*dfc6aa5cSAndroid Build Coastguard Worker    add         esi, byte 2*SIZEOF_XMMWORD  ; inptr1
325*dfc6aa5cSAndroid Build Coastguard Worker    add         edi, byte 1*SIZEOF_XMMWORD  ; outptr
326*dfc6aa5cSAndroid Build Coastguard Worker    cmp         ecx, byte SIZEOF_XMMWORD
327*dfc6aa5cSAndroid Build Coastguard Worker    jae         near .columnloop
328*dfc6aa5cSAndroid Build Coastguard Worker    test        ecx, ecx
329*dfc6aa5cSAndroid Build Coastguard Worker    jnz         near .columnloop_r8
330*dfc6aa5cSAndroid Build Coastguard Worker
331*dfc6aa5cSAndroid Build Coastguard Worker    pop         esi
332*dfc6aa5cSAndroid Build Coastguard Worker    pop         edi
333*dfc6aa5cSAndroid Build Coastguard Worker    pop         ecx
334*dfc6aa5cSAndroid Build Coastguard Worker
335*dfc6aa5cSAndroid Build Coastguard Worker    add         esi, byte 2*SIZEOF_JSAMPROW  ; input_data
336*dfc6aa5cSAndroid Build Coastguard Worker    add         edi, byte 1*SIZEOF_JSAMPROW  ; output_data
337*dfc6aa5cSAndroid Build Coastguard Worker    dec         eax                          ; rowctr
338*dfc6aa5cSAndroid Build Coastguard Worker    jg          near .rowloop
339*dfc6aa5cSAndroid Build Coastguard Worker
340*dfc6aa5cSAndroid Build Coastguard Worker.return:
341*dfc6aa5cSAndroid Build Coastguard Worker    pop         edi
342*dfc6aa5cSAndroid Build Coastguard Worker    pop         esi
343*dfc6aa5cSAndroid Build Coastguard Worker;   pop         edx                     ; need not be preserved
344*dfc6aa5cSAndroid Build Coastguard Worker;   pop         ecx                     ; need not be preserved
345*dfc6aa5cSAndroid Build Coastguard Worker;   pop         ebx                     ; unused
346*dfc6aa5cSAndroid Build Coastguard Worker    pop         ebp
347*dfc6aa5cSAndroid Build Coastguard Worker    ret
348*dfc6aa5cSAndroid Build Coastguard Worker
349*dfc6aa5cSAndroid Build Coastguard Worker; For some reason, the OS X linker does not honor the request to align the
350*dfc6aa5cSAndroid Build Coastguard Worker; segment unless we do this.
351*dfc6aa5cSAndroid Build Coastguard Worker    align       32
352