1 // Copyright 2014 Google Inc. All Rights Reserved. 2 // 3 // Use of this source code is governed by a BSD-style license 4 // that can be found in the COPYING file in the root of the source 5 // tree. An additional intellectual property rights grant can be found 6 // in the file PATENTS. All contributing project authors may 7 // be found in the AUTHORS file in the root of the source tree. 8 // ----------------------------------------------------------------------------- 9 // 10 // MIPS common macros 11 12 #ifndef WEBP_DSP_MIPS_MACRO_H_ 13 #define WEBP_DSP_MIPS_MACRO_H_ 14 15 #if defined(__GNUC__) && defined(__ANDROID__) && LOCAL_GCC_VERSION == 0x409 16 #define WORK_AROUND_GCC 17 #endif 18 19 #define STR(s) #s 20 #define XSTR(s) STR(s) 21 22 // O0[31..16 | 15..0] = I0[31..16 | 15..0] + I1[31..16 | 15..0] 23 // O1[31..16 | 15..0] = I0[31..16 | 15..0] - I1[31..16 | 15..0] 24 // O - output 25 // I - input (macro doesn't change it) 26 #define ADD_SUB_HALVES(O0, O1, \ 27 I0, I1) \ 28 "addq.ph %[" #O0 "], %[" #I0 "], %[" #I1 "] \n\t" \ 29 "subq.ph %[" #O1 "], %[" #I0 "], %[" #I1 "] \n\t" 30 31 // O - output 32 // I - input (macro doesn't change it) 33 // I[0/1] - offset in bytes 34 #define LOAD_IN_X2(O0, O1, \ 35 I0, I1) \ 36 "lh %[" #O0 "], " #I0 "(%[in]) \n\t" \ 37 "lh %[" #O1 "], " #I1 "(%[in]) \n\t" 38 39 // I0 - location 40 // I1..I9 - offsets in bytes 41 #define LOAD_WITH_OFFSET_X4(O0, O1, O2, O3, \ 42 I0, I1, I2, I3, I4, I5, I6, I7, I8, I9) \ 43 "ulw %[" #O0 "], " #I1 "+" XSTR(I9) "*" #I5 "(%[" #I0 "]) \n\t" \ 44 "ulw %[" #O1 "], " #I2 "+" XSTR(I9) "*" #I6 "(%[" #I0 "]) \n\t" \ 45 "ulw %[" #O2 "], " #I3 "+" XSTR(I9) "*" #I7 "(%[" #I0 "]) \n\t" \ 46 "ulw %[" #O3 "], " #I4 "+" XSTR(I9) "*" #I8 "(%[" #I0 "]) \n\t" 47 48 49 // O - output 50 // I - input (macro doesn't change it so it should be different from I) 51 #define MUL_SHIFT_C1(O, I) \ 52 "mul %[" #O "], %[" #I "], %[kC1] \n\t" \ 53 "sra %[" #O "], %[" #O "], 16 \n\t" \ 54 "addu %[" #O "], %[" #O "], %[" #I "] \n\t" 55 #define MUL_SHIFT_C2(O, I) \ 56 "mul %[" #O "], %[" #I "], %[kC2] \n\t" \ 57 "sra %[" #O "], %[" #O "], 16 \n\t" 58 59 // Same as #define MUL_SHIFT_C1 but I and O are the same. It stores the 60 // intermediary result in TMP. 61 #define MUL_SHIFT_C1_IO(IO, TMP) \ 62 "mul %[" #TMP "], %[" #IO "], %[kC1] \n\t" \ 63 "sra %[" #TMP "], %[" #TMP "], 16 \n\t" \ 64 "addu %[" #IO "], %[" #TMP "], %[" #IO "] \n\t" 65 66 // O - output 67 // IO - input/output 68 // I - input (macro doesn't change it) 69 #define MUL_SHIFT_SUM(O0, O1, O2, O3, O4, O5, O6, O7, \ 70 IO0, IO1, IO2, IO3, \ 71 I0, I1, I2, I3, I4, I5, I6, I7) \ 72 MUL_SHIFT_C2(O0, I0) \ 73 MUL_SHIFT_C1(O1, I0) \ 74 MUL_SHIFT_C2(O2, I1) \ 75 MUL_SHIFT_C1(O3, I1) \ 76 MUL_SHIFT_C2(O4, I2) \ 77 MUL_SHIFT_C1(O5, I2) \ 78 MUL_SHIFT_C2(O6, I3) \ 79 MUL_SHIFT_C1(O7, I3) \ 80 "addu %[" #IO0 "], %[" #IO0 "], %[" #I4 "] \n\t" \ 81 "addu %[" #IO1 "], %[" #IO1 "], %[" #I5 "] \n\t" \ 82 "subu %[" #IO2 "], %[" #IO2 "], %[" #I6 "] \n\t" \ 83 "subu %[" #IO3 "], %[" #IO3 "], %[" #I7 "] \n\t" 84 85 // O - output 86 // I - input (macro doesn't change it) 87 #define INSERT_HALF_X2(O0, O1, \ 88 I0, I1) \ 89 "ins %[" #O0 "], %[" #I0 "], 16, 16 \n\t" \ 90 "ins %[" #O1 "], %[" #I1 "], 16, 16 \n\t" 91 92 // O - output 93 // I - input (macro doesn't change it) 94 #define SRA_16(O0, O1, O2, O3, \ 95 I0, I1, I2, I3) \ 96 "sra %[" #O0 "], %[" #I0 "], 16 \n\t" \ 97 "sra %[" #O1 "], %[" #I1 "], 16 \n\t" \ 98 "sra %[" #O2 "], %[" #I2 "], 16 \n\t" \ 99 "sra %[" #O3 "], %[" #I3 "], 16 \n\t" 100 101 // temp0[31..16 | 15..0] = temp8[31..16 | 15..0] + temp12[31..16 | 15..0] 102 // temp1[31..16 | 15..0] = temp8[31..16 | 15..0] - temp12[31..16 | 15..0] 103 // temp0[31..16 | 15..0] = temp0[31..16 >> 3 | 15..0 >> 3] 104 // temp1[31..16 | 15..0] = temp1[31..16 >> 3 | 15..0 >> 3] 105 // O - output 106 // I - input (macro doesn't change it) 107 #define SHIFT_R_SUM_X2(O0, O1, O2, O3, O4, O5, O6, O7, \ 108 I0, I1, I2, I3, I4, I5, I6, I7) \ 109 "addq.ph %[" #O0 "], %[" #I0 "], %[" #I4 "] \n\t" \ 110 "subq.ph %[" #O1 "], %[" #I0 "], %[" #I4 "] \n\t" \ 111 "addq.ph %[" #O2 "], %[" #I1 "], %[" #I5 "] \n\t" \ 112 "subq.ph %[" #O3 "], %[" #I1 "], %[" #I5 "] \n\t" \ 113 "addq.ph %[" #O4 "], %[" #I2 "], %[" #I6 "] \n\t" \ 114 "subq.ph %[" #O5 "], %[" #I2 "], %[" #I6 "] \n\t" \ 115 "addq.ph %[" #O6 "], %[" #I3 "], %[" #I7 "] \n\t" \ 116 "subq.ph %[" #O7 "], %[" #I3 "], %[" #I7 "] \n\t" \ 117 "shra.ph %[" #O0 "], %[" #O0 "], 3 \n\t" \ 118 "shra.ph %[" #O1 "], %[" #O1 "], 3 \n\t" \ 119 "shra.ph %[" #O2 "], %[" #O2 "], 3 \n\t" \ 120 "shra.ph %[" #O3 "], %[" #O3 "], 3 \n\t" \ 121 "shra.ph %[" #O4 "], %[" #O4 "], 3 \n\t" \ 122 "shra.ph %[" #O5 "], %[" #O5 "], 3 \n\t" \ 123 "shra.ph %[" #O6 "], %[" #O6 "], 3 \n\t" \ 124 "shra.ph %[" #O7 "], %[" #O7 "], 3 \n\t" 125 126 // precrq.ph.w temp0, temp8, temp2 127 // temp0 = temp8[31..16] | temp2[31..16] 128 // ins temp2, temp8, 16, 16 129 // temp2 = temp8[31..16] | temp2[15..0] 130 // O - output 131 // IO - input/output 132 // I - input (macro doesn't change it) 133 #define PACK_2_HALVES_TO_WORD(O0, O1, O2, O3, \ 134 IO0, IO1, IO2, IO3, \ 135 I0, I1, I2, I3) \ 136 "precrq.ph.w %[" #O0 "], %[" #I0 "], %[" #IO0 "] \n\t" \ 137 "precrq.ph.w %[" #O1 "], %[" #I1 "], %[" #IO1 "] \n\t" \ 138 "ins %[" #IO0 "], %[" #I0 "], 16, 16 \n\t" \ 139 "ins %[" #IO1 "], %[" #I1 "], 16, 16 \n\t" \ 140 "precrq.ph.w %[" #O2 "], %[" #I2 "], %[" #IO2 "] \n\t" \ 141 "precrq.ph.w %[" #O3 "], %[" #I3 "], %[" #IO3 "] \n\t" \ 142 "ins %[" #IO2 "], %[" #I2 "], 16, 16 \n\t" \ 143 "ins %[" #IO3 "], %[" #I3 "], 16, 16 \n\t" 144 145 // preceu.ph.qbr temp0, temp8 146 // temp0 = 0 | 0 | temp8[23..16] | temp8[7..0] 147 // preceu.ph.qbl temp1, temp8 148 // temp1 = temp8[23..16] | temp8[7..0] | 0 | 0 149 // O - output 150 // I - input (macro doesn't change it) 151 #define CONVERT_2_BYTES_TO_HALF(O0, O1, O2, O3, O4, O5, O6, O7, \ 152 I0, I1, I2, I3) \ 153 "preceu.ph.qbr %[" #O0 "], %[" #I0 "] \n\t" \ 154 "preceu.ph.qbl %[" #O1 "], %[" #I0 "] \n\t" \ 155 "preceu.ph.qbr %[" #O2 "], %[" #I1 "] \n\t" \ 156 "preceu.ph.qbl %[" #O3 "], %[" #I1 "] \n\t" \ 157 "preceu.ph.qbr %[" #O4 "], %[" #I2 "] \n\t" \ 158 "preceu.ph.qbl %[" #O5 "], %[" #I2 "] \n\t" \ 159 "preceu.ph.qbr %[" #O6 "], %[" #I3 "] \n\t" \ 160 "preceu.ph.qbl %[" #O7 "], %[" #I3 "] \n\t" 161 162 // temp0[31..16 | 15..0] = temp0[31..16 | 15..0] + temp8[31..16 | 15..0] 163 // temp0[31..16 | 15..0] = temp0[31..16 <<(s) 7 | 15..0 <<(s) 7] 164 // temp1..temp7 same as temp0 165 // precrqu_s.qb.ph temp0, temp1, temp0: 166 // temp0 = temp1[31..24] | temp1[15..8] | temp0[31..24] | temp0[15..8] 167 // store temp0 to dst 168 // IO - input/output 169 // I - input (macro doesn't change it) 170 #define STORE_SAT_SUM_X2(IO0, IO1, IO2, IO3, IO4, IO5, IO6, IO7, \ 171 I0, I1, I2, I3, I4, I5, I6, I7, \ 172 I8, I9, I10, I11, I12, I13) \ 173 "addq.ph %[" #IO0 "], %[" #IO0 "], %[" #I0 "] \n\t" \ 174 "addq.ph %[" #IO1 "], %[" #IO1 "], %[" #I1 "] \n\t" \ 175 "addq.ph %[" #IO2 "], %[" #IO2 "], %[" #I2 "] \n\t" \ 176 "addq.ph %[" #IO3 "], %[" #IO3 "], %[" #I3 "] \n\t" \ 177 "addq.ph %[" #IO4 "], %[" #IO4 "], %[" #I4 "] \n\t" \ 178 "addq.ph %[" #IO5 "], %[" #IO5 "], %[" #I5 "] \n\t" \ 179 "addq.ph %[" #IO6 "], %[" #IO6 "], %[" #I6 "] \n\t" \ 180 "addq.ph %[" #IO7 "], %[" #IO7 "], %[" #I7 "] \n\t" \ 181 "shll_s.ph %[" #IO0 "], %[" #IO0 "], 7 \n\t" \ 182 "shll_s.ph %[" #IO1 "], %[" #IO1 "], 7 \n\t" \ 183 "shll_s.ph %[" #IO2 "], %[" #IO2 "], 7 \n\t" \ 184 "shll_s.ph %[" #IO3 "], %[" #IO3 "], 7 \n\t" \ 185 "shll_s.ph %[" #IO4 "], %[" #IO4 "], 7 \n\t" \ 186 "shll_s.ph %[" #IO5 "], %[" #IO5 "], 7 \n\t" \ 187 "shll_s.ph %[" #IO6 "], %[" #IO6 "], 7 \n\t" \ 188 "shll_s.ph %[" #IO7 "], %[" #IO7 "], 7 \n\t" \ 189 "precrqu_s.qb.ph %[" #IO0 "], %[" #IO1 "], %[" #IO0 "] \n\t" \ 190 "precrqu_s.qb.ph %[" #IO2 "], %[" #IO3 "], %[" #IO2 "] \n\t" \ 191 "precrqu_s.qb.ph %[" #IO4 "], %[" #IO5 "], %[" #IO4 "] \n\t" \ 192 "precrqu_s.qb.ph %[" #IO6 "], %[" #IO7 "], %[" #IO6 "] \n\t" \ 193 "usw %[" #IO0 "], " XSTR(I13) "*" #I9 "(%[" #I8 "]) \n\t" \ 194 "usw %[" #IO2 "], " XSTR(I13) "*" #I10 "(%[" #I8 "]) \n\t" \ 195 "usw %[" #IO4 "], " XSTR(I13) "*" #I11 "(%[" #I8 "]) \n\t" \ 196 "usw %[" #IO6 "], " XSTR(I13) "*" #I12 "(%[" #I8 "]) \n\t" 197 198 #define OUTPUT_EARLY_CLOBBER_REGS_10() \ 199 : [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), [temp3]"=&r"(temp3), \ 200 [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [temp6]"=&r"(temp6), \ 201 [temp7]"=&r"(temp7), [temp8]"=&r"(temp8), [temp9]"=&r"(temp9), \ 202 [temp10]"=&r"(temp10) 203 204 #define OUTPUT_EARLY_CLOBBER_REGS_18() \ 205 OUTPUT_EARLY_CLOBBER_REGS_10(), \ 206 [temp11]"=&r"(temp11), [temp12]"=&r"(temp12), [temp13]"=&r"(temp13), \ 207 [temp14]"=&r"(temp14), [temp15]"=&r"(temp15), [temp16]"=&r"(temp16), \ 208 [temp17]"=&r"(temp17), [temp18]"=&r"(temp18) 209 210 #endif // WEBP_DSP_MIPS_MACRO_H_ 211