1/****************************************************************************** 2 * Copyright © 2024, VideoLAN and dav1d authors 3 * Copyright © 2024, Loongson Technology Corporation Limited 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions are met: 8 * 9 * 1. Redistributions of source code must retain the above copyright notice, this 10 * list of conditions and the following disclaimer. 11 * 12 * 2. Redistributions in binary form must reproduce the above copyright notice, 13 * this list of conditions and the following disclaimer in the documentation 14 * and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 20 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 23 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 *****************************************************************************/ 27 28#ifndef DAV1D_SRC_LOONGSON_UTIL_S 29#define DAV1D_SRC_LOONGSON_UTIL_S 30 31#ifndef DEFAULT_ALIGN 32#define DEFAULT_ALIGN 5 33#endif 34 35//That l means local defines local functions 36.macro functionl name, align=DEFAULT_ALIGN 37.macro endfuncl 38 jirl $r0, $r1, 0x0 39 .size \name, . - \name 40 .purgem endfuncl 41.endm 42.text ; 43.align \align ; 44.hidden \name ; 45.type \name, @function ; 46\name: ; 47.endm 48 49.macro TRANSPOSE_4x16B in0, in1 ,in2, in3, in4, in5, in6, in7 50 vpackev.b \in4, \in1, \in0 51 vpackod.b \in5, \in1, \in0 52 vpackev.b \in6, \in3, \in2 53 vpackod.b \in7, \in3, \in2 54 55 vpackev.h \in0, \in6, \in4 56 vpackod.h \in2, \in6, \in4 57 vpackev.h \in1, \in7, \in5 58 vpackod.h \in3, \in7, \in5 59.endm 60 61.macro TRANSPOSE_8x16B in0, in1, in2, in3, in4, in5, in6, in7, in8, in9 62 vpackev.b \in8, \in1, \in0 63 vpackod.b \in9, \in1, \in0 64 vpackev.b \in1, \in3, \in2 65 vpackod.b \in3, \in3, \in2 66 vpackev.b \in0, \in5, \in4 67 vpackod.b \in5, \in5, \in4 68 vpackev.b \in2, \in7, \in6 69 vpackod.b \in7, \in7, \in6 70 71 vpackev.h \in4, \in2, \in0 72 vpackod.h \in2, \in2, \in0 73 vpackev.h \in6, \in7, \in5 74 vpackod.h \in7, \in7, \in5 75 vpackev.h \in5, \in3, \in9 76 vpackod.h \in9, \in3, \in9 77 vpackev.h \in3, \in1, \in8 78 vpackod.h \in8, \in1, \in8 79 80 vpackev.w \in0, \in4, \in3 81 vpackod.w \in4, \in4, \in3 82 vpackev.w \in1, \in6, \in5 83 vpackod.w \in5, \in6, \in5 84 vpackod.w \in6, \in2, \in8 85 vpackev.w \in2, \in2, \in8 86 vpackev.w \in3, \in7, \in9 87 vpackod.w \in7, \in7, \in9 88.endm 89 90.macro vld_x8 src, start, stride, in0, in1, in2, in3, in4, in5, in6, in7 91 vld \in0, \src, \start 92 vld \in1, \src, \start+(\stride*1) 93 vld \in2, \src, \start+(\stride*2) 94 vld \in3, \src, \start+(\stride*3) 95 vld \in4, \src, \start+(\stride*4) 96 vld \in5, \src, \start+(\stride*5) 97 vld \in6, \src, \start+(\stride*6) 98 vld \in7, \src, \start+(\stride*7) 99.endm 100 101.macro vst_x8 src, start, stride, in0, in1, in2, in3, in4, in5, in6, in7 102 vst \in0, \src, \start 103 vst \in1, \src, \start+(\stride*1) 104 vst \in2, \src, \start+(\stride*2) 105 vst \in3, \src, \start+(\stride*3) 106 vst \in4, \src, \start+(\stride*4) 107 vst \in5, \src, \start+(\stride*5) 108 vst \in6, \src, \start+(\stride*6) 109 vst \in7, \src, \start+(\stride*7) 110.endm 111 112.macro vld_x16 src, start, stride, in0, in1, in2, in3, in4, in5, in6, in7, \ 113 in8, in9, in10, in11, in12, in13, in14, in15 114 115 vld_x8 \src, \start, \stride, \in0, \in1, \in2, \in3, \in4, \in5, \in6, \in7 116 117 vld \in8, \src, \start+(\stride*8) 118 vld \in9, \src, \start+(\stride*9) 119 vld \in10, \src, \start+(\stride*10) 120 vld \in11, \src, \start+(\stride*11) 121 vld \in12, \src, \start+(\stride*12) 122 vld \in13, \src, \start+(\stride*13) 123 vld \in14, \src, \start+(\stride*14) 124 vld \in15, \src, \start+(\stride*15) 125.endm 126 127.macro vst_x16 src, start, stride, in0, in1, in2, in3, in4, in5, in6, in7, \ 128 in8, in9, in10, in11, in12, in13, in14, in15 129 130 vst_x8 \src, \start, \stride, \in0, \in1, \in2, \in3, \in4, \in5, \in6, \in7 131 132 vst \in8, \src, \start+(\stride*8) 133 vst \in9, \src, \start+(\stride*9) 134 vst \in10, \src, \start+(\stride*10) 135 vst \in11, \src, \start+(\stride*11) 136 vst \in12, \src, \start+(\stride*12) 137 vst \in13, \src, \start+(\stride*13) 138 vst \in14, \src, \start+(\stride*14) 139 vst \in15, \src, \start+(\stride*15) 140.endm 141 142.macro xvld_x8 src, start, stride, in0, in1, in2, in3, in4, in5, in6, in7 143 xvld \in0, \src, \start 144 xvld \in1, \src, \start+(\stride) 145 xvld \in2, \src, \start+(\stride<<1) 146 xvld \in3, \src, \start+(\stride<<1)+(\stride) 147 xvld \in4, \src, \start+(\stride<<2) 148 xvld \in5, \src, \start+(\stride<<2)+(\stride) 149 xvld \in6, \src, \start+(\stride*6) 150 xvld \in7, \src, \start+(\stride<<3)-(\stride) 151.endm 152 153.macro xvst_x8 src, start, stride, in0, in1, in2, in3, in4, in5, in6, in7 154 xvst \in0, \src, \start 155 xvst \in1, \src, \start+(\stride) 156 xvst \in2, \src, \start+(\stride<<1) 157 xvst \in3, \src, \start+(\stride<<1)+(\stride) 158 xvst \in4, \src, \start+(\stride<<2) 159 xvst \in5, \src, \start+(\stride<<2)+(\stride) 160 xvst \in6, \src, \start+(\stride*6) 161 xvst \in7, \src, \start+(\stride<<3)-(\stride) 162.endm 163 164.macro xvld_x16 src, start, stride, in0, in1, in2, in3, in4, in5, in6, in7, \ 165 in8, in9, in10, in11, in12, in13, in14, in15 166 xvld_x8 \src, \start, \stride, \in0, \in1, \in2, \in3, \in4, \in5, \in6, \in7 167 168 xvld \in8, \src, \start+(\stride<<3) 169 xvld \in9, \src, \start+(\stride<<3)+(\stride) 170 xvld \in10, \src, \start+(\stride*10) 171 xvld \in11, \src, \start+(\stride*11) 172 xvld \in12, \src, \start+(\stride*12) 173 xvld \in13, \src, \start+(\stride*13) 174 xvld \in14, \src, \start+(\stride*14) 175 xvld \in15, \src, \start+(\stride<<4)-(\stride) 176.endm 177 178.macro xvst_x16 src, start, stride, in0, in1, in2, in3, in4, in5, in6, in7, \ 179 in8, in9, in10, in11, in12, in13, in14, in15 180 xvst_x8 \src, \start, \stride, \in0, \in1, \in2, \in3, \in4, \in5, \in6, \in7 181 182 xvst \in8, \src, \start+(\stride<<3) 183 xvst \in9, \src, \start+(\stride<<3)+(\stride) 184 xvst \in10, \src, \start+(\stride*10) 185 xvst \in11, \src, \start+(\stride*11) 186 xvst \in12, \src, \start+(\stride*12) 187 xvst \in13, \src, \start+(\stride*13) 188 xvst \in14, \src, \start+(\stride*14) 189 xvst \in15, \src, \start+(\stride<<4)-(\stride) 190.endm 191 192#endif /* DAV1D_SRC_LOONGSON_UTIL_S */ 193