1*b2055c35SXin Li // Copyright 2014 Google Inc. All Rights Reserved.
2*b2055c35SXin Li //
3*b2055c35SXin Li // Use of this source code is governed by a BSD-style license
4*b2055c35SXin Li // that can be found in the COPYING file in the root of the source
5*b2055c35SXin Li // tree. An additional intellectual property rights grant can be found
6*b2055c35SXin Li // in the file PATENTS. All contributing project authors may
7*b2055c35SXin Li // be found in the AUTHORS file in the root of the source tree.
8*b2055c35SXin Li // -----------------------------------------------------------------------------
9*b2055c35SXin Li //
10*b2055c35SXin Li // Author: Djordje Pesut ([email protected])
11*b2055c35SXin Li
12*b2055c35SXin Li #include "src/dsp/dsp.h"
13*b2055c35SXin Li
14*b2055c35SXin Li #if defined(WEBP_USE_MIPS32)
15*b2055c35SXin Li
16*b2055c35SXin Li #include "src/enc/cost_enc.h"
17*b2055c35SXin Li
GetResidualCost_MIPS32(int ctx0,const VP8Residual * const res)18*b2055c35SXin Li static int GetResidualCost_MIPS32(int ctx0, const VP8Residual* const res) {
19*b2055c35SXin Li int temp0, temp1;
20*b2055c35SXin Li int v_reg, ctx_reg;
21*b2055c35SXin Li int n = res->first;
22*b2055c35SXin Li // should be prob[VP8EncBands[n]], but it's equivalent for n=0 or 1
23*b2055c35SXin Li int p0 = res->prob[n][ctx0][0];
24*b2055c35SXin Li CostArrayPtr const costs = res->costs;
25*b2055c35SXin Li const uint16_t* t = costs[n][ctx0];
26*b2055c35SXin Li // bit_cost(1, p0) is already incorporated in t[] tables, but only if ctx != 0
27*b2055c35SXin Li // (as required by the syntax). For ctx0 == 0, we need to add it here or it'll
28*b2055c35SXin Li // be missing during the loop.
29*b2055c35SXin Li int cost = (ctx0 == 0) ? VP8BitCost(1, p0) : 0;
30*b2055c35SXin Li const int16_t* res_coeffs = res->coeffs;
31*b2055c35SXin Li const int res_last = res->last;
32*b2055c35SXin Li const int const_max_level = MAX_VARIABLE_LEVEL;
33*b2055c35SXin Li const int const_2 = 2;
34*b2055c35SXin Li const uint16_t** p_costs = &costs[n][0];
35*b2055c35SXin Li const size_t inc_p_costs = NUM_CTX * sizeof(*p_costs);
36*b2055c35SXin Li
37*b2055c35SXin Li if (res->last < 0) {
38*b2055c35SXin Li return VP8BitCost(0, p0);
39*b2055c35SXin Li }
40*b2055c35SXin Li
41*b2055c35SXin Li __asm__ volatile (
42*b2055c35SXin Li ".set push \n\t"
43*b2055c35SXin Li ".set noreorder \n\t"
44*b2055c35SXin Li "subu %[temp1], %[res_last], %[n] \n\t"
45*b2055c35SXin Li "sll %[temp0], %[n], 1 \n\t"
46*b2055c35SXin Li "blez %[temp1], 2f \n\t"
47*b2055c35SXin Li " addu %[res_coeffs], %[res_coeffs], %[temp0] \n\t"
48*b2055c35SXin Li "1: \n\t"
49*b2055c35SXin Li "lh %[v_reg], 0(%[res_coeffs]) \n\t"
50*b2055c35SXin Li "addiu %[n], %[n], 1 \n\t"
51*b2055c35SXin Li "negu %[temp0], %[v_reg] \n\t"
52*b2055c35SXin Li "slti %[temp1], %[v_reg], 0 \n\t"
53*b2055c35SXin Li "movn %[v_reg], %[temp0], %[temp1] \n\t"
54*b2055c35SXin Li "sltiu %[temp0], %[v_reg], 2 \n\t"
55*b2055c35SXin Li "move %[ctx_reg], %[v_reg] \n\t"
56*b2055c35SXin Li "movz %[ctx_reg], %[const_2], %[temp0] \n\t"
57*b2055c35SXin Li "sll %[temp1], %[v_reg], 1 \n\t"
58*b2055c35SXin Li "addu %[temp1], %[temp1], %[VP8LevelFixedCosts] \n\t"
59*b2055c35SXin Li "lhu %[temp1], 0(%[temp1]) \n\t"
60*b2055c35SXin Li "slt %[temp0], %[v_reg], %[const_max_level] \n\t"
61*b2055c35SXin Li "movz %[v_reg], %[const_max_level], %[temp0] \n\t"
62*b2055c35SXin Li "addu %[cost], %[cost], %[temp1] \n\t"
63*b2055c35SXin Li "sll %[v_reg], %[v_reg], 1 \n\t"
64*b2055c35SXin Li "sll %[ctx_reg], %[ctx_reg], 2 \n\t"
65*b2055c35SXin Li "addu %[v_reg], %[v_reg], %[t] \n\t"
66*b2055c35SXin Li "lhu %[temp0], 0(%[v_reg]) \n\t"
67*b2055c35SXin Li "addu %[p_costs], %[p_costs], %[inc_p_costs] \n\t"
68*b2055c35SXin Li "addu %[t], %[p_costs], %[ctx_reg] \n\t"
69*b2055c35SXin Li "addu %[cost], %[cost], %[temp0] \n\t"
70*b2055c35SXin Li "addiu %[res_coeffs], %[res_coeffs], 2 \n\t"
71*b2055c35SXin Li "bne %[n], %[res_last], 1b \n\t"
72*b2055c35SXin Li " lw %[t], 0(%[t]) \n\t"
73*b2055c35SXin Li "2: \n\t"
74*b2055c35SXin Li ".set pop \n\t"
75*b2055c35SXin Li : [cost]"+&r"(cost), [t]"+&r"(t), [n]"+&r"(n), [v_reg]"=&r"(v_reg),
76*b2055c35SXin Li [ctx_reg]"=&r"(ctx_reg), [p_costs]"+&r"(p_costs), [temp0]"=&r"(temp0),
77*b2055c35SXin Li [temp1]"=&r"(temp1), [res_coeffs]"+&r"(res_coeffs)
78*b2055c35SXin Li : [const_2]"r"(const_2), [const_max_level]"r"(const_max_level),
79*b2055c35SXin Li [VP8LevelFixedCosts]"r"(VP8LevelFixedCosts), [res_last]"r"(res_last),
80*b2055c35SXin Li [inc_p_costs]"r"(inc_p_costs)
81*b2055c35SXin Li : "memory"
82*b2055c35SXin Li );
83*b2055c35SXin Li
84*b2055c35SXin Li // Last coefficient is always non-zero
85*b2055c35SXin Li {
86*b2055c35SXin Li const int v = abs(res->coeffs[n]);
87*b2055c35SXin Li assert(v != 0);
88*b2055c35SXin Li cost += VP8LevelCost(t, v);
89*b2055c35SXin Li if (n < 15) {
90*b2055c35SXin Li const int b = VP8EncBands[n + 1];
91*b2055c35SXin Li const int ctx = (v == 1) ? 1 : 2;
92*b2055c35SXin Li const int last_p0 = res->prob[b][ctx][0];
93*b2055c35SXin Li cost += VP8BitCost(0, last_p0);
94*b2055c35SXin Li }
95*b2055c35SXin Li }
96*b2055c35SXin Li return cost;
97*b2055c35SXin Li }
98*b2055c35SXin Li
SetResidualCoeffs_MIPS32(const int16_t * const coeffs,VP8Residual * const res)99*b2055c35SXin Li static void SetResidualCoeffs_MIPS32(const int16_t* const coeffs,
100*b2055c35SXin Li VP8Residual* const res) {
101*b2055c35SXin Li const int16_t* p_coeffs = (int16_t*)coeffs;
102*b2055c35SXin Li int temp0, temp1, temp2, n, n1;
103*b2055c35SXin Li assert(res->first == 0 || coeffs[0] == 0);
104*b2055c35SXin Li
105*b2055c35SXin Li __asm__ volatile (
106*b2055c35SXin Li ".set push \n\t"
107*b2055c35SXin Li ".set noreorder \n\t"
108*b2055c35SXin Li "addiu %[p_coeffs], %[p_coeffs], 28 \n\t"
109*b2055c35SXin Li "li %[n], 15 \n\t"
110*b2055c35SXin Li "li %[temp2], -1 \n\t"
111*b2055c35SXin Li "0: \n\t"
112*b2055c35SXin Li "ulw %[temp0], 0(%[p_coeffs]) \n\t"
113*b2055c35SXin Li "beqz %[temp0], 1f \n\t"
114*b2055c35SXin Li #if defined(WORDS_BIGENDIAN)
115*b2055c35SXin Li " sll %[temp1], %[temp0], 16 \n\t"
116*b2055c35SXin Li #else
117*b2055c35SXin Li " srl %[temp1], %[temp0], 16 \n\t"
118*b2055c35SXin Li #endif
119*b2055c35SXin Li "addiu %[n1], %[n], -1 \n\t"
120*b2055c35SXin Li "movz %[temp0], %[n1], %[temp1] \n\t"
121*b2055c35SXin Li "movn %[temp0], %[n], %[temp1] \n\t"
122*b2055c35SXin Li "j 2f \n\t"
123*b2055c35SXin Li " addiu %[temp2], %[temp0], 0 \n\t"
124*b2055c35SXin Li "1: \n\t"
125*b2055c35SXin Li "addiu %[n], %[n], -2 \n\t"
126*b2055c35SXin Li "bgtz %[n], 0b \n\t"
127*b2055c35SXin Li " addiu %[p_coeffs], %[p_coeffs], -4 \n\t"
128*b2055c35SXin Li "2: \n\t"
129*b2055c35SXin Li ".set pop \n\t"
130*b2055c35SXin Li : [p_coeffs]"+&r"(p_coeffs), [temp0]"=&r"(temp0),
131*b2055c35SXin Li [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
132*b2055c35SXin Li [n]"=&r"(n), [n1]"=&r"(n1)
133*b2055c35SXin Li :
134*b2055c35SXin Li : "memory"
135*b2055c35SXin Li );
136*b2055c35SXin Li res->last = temp2;
137*b2055c35SXin Li res->coeffs = coeffs;
138*b2055c35SXin Li }
139*b2055c35SXin Li
140*b2055c35SXin Li //------------------------------------------------------------------------------
141*b2055c35SXin Li // Entry point
142*b2055c35SXin Li
143*b2055c35SXin Li extern void VP8EncDspCostInitMIPS32(void);
144*b2055c35SXin Li
VP8EncDspCostInitMIPS32(void)145*b2055c35SXin Li WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspCostInitMIPS32(void) {
146*b2055c35SXin Li VP8GetResidualCost = GetResidualCost_MIPS32;
147*b2055c35SXin Li VP8SetResidualCoeffs = SetResidualCoeffs_MIPS32;
148*b2055c35SXin Li }
149*b2055c35SXin Li
150*b2055c35SXin Li #else // !WEBP_USE_MIPS32
151*b2055c35SXin Li
152*b2055c35SXin Li WEBP_DSP_INIT_STUB(VP8EncDspCostInitMIPS32)
153*b2055c35SXin Li
154*b2055c35SXin Li #endif // WEBP_USE_MIPS32
155