1*795d594fSAndroid Build Coastguard Worker /* 2*795d594fSAndroid Build Coastguard Worker * Copyright (C) 2017 The Android Open Source Project 3*795d594fSAndroid Build Coastguard Worker * 4*795d594fSAndroid Build Coastguard Worker * Licensed under the Apache License, Version 2.0 (the "License"); 5*795d594fSAndroid Build Coastguard Worker * you may not use this file except in compliance with the License. 6*795d594fSAndroid Build Coastguard Worker * You may obtain a copy of the License at 7*795d594fSAndroid Build Coastguard Worker * 8*795d594fSAndroid Build Coastguard Worker * http://www.apache.org/licenses/LICENSE-2.0 9*795d594fSAndroid Build Coastguard Worker * 10*795d594fSAndroid Build Coastguard Worker * Unless required by applicable law or agreed to in writing, software 11*795d594fSAndroid Build Coastguard Worker * distributed under the License is distributed on an "AS IS" BASIS, 12*795d594fSAndroid Build Coastguard Worker * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13*795d594fSAndroid Build Coastguard Worker * See the License for the specific language governing permissions and 14*795d594fSAndroid Build Coastguard Worker * limitations under the License. 15*795d594fSAndroid Build Coastguard Worker */ 16*795d594fSAndroid Build Coastguard Worker 17*795d594fSAndroid Build Coastguard Worker /** 18*795d594fSAndroid Build Coastguard Worker * Tests for halving-add idiomatic vectorization. 19*795d594fSAndroid Build Coastguard Worker * 20*795d594fSAndroid Build Coastguard Worker * Alternative version expressed with logical shift right 21*795d594fSAndroid Build Coastguard Worker * in the higher precision (has no impact on idiom). 22*795d594fSAndroid Build Coastguard Worker */ 23*795d594fSAndroid Build Coastguard Worker public class HaddAltByte { 24*795d594fSAndroid Build Coastguard Worker 25*795d594fSAndroid Build Coastguard Worker private static final int N = 256; 26*795d594fSAndroid Build Coastguard Worker private static final int M = N * N + 15; 27*795d594fSAndroid Build Coastguard Worker 28*795d594fSAndroid Build Coastguard Worker static byte[] sB1 = new byte[M]; 29*795d594fSAndroid Build Coastguard Worker static byte[] sB2 = new byte[M]; 30*795d594fSAndroid Build Coastguard Worker static byte[] sBo = new byte[M]; 31*795d594fSAndroid Build Coastguard Worker 32*795d594fSAndroid Build Coastguard Worker /// CHECK-START: void HaddAltByte.halving_add_signed(byte[], byte[], byte[]) loop_optimization (before) 33*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<I1:i\d+>> IntConstant 1 loop:none 34*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none 35*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<Get1:b\d+>> ArrayGet loop:<<Loop>> outer_loop:none 36*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<Get2:b\d+>> ArrayGet loop:<<Loop>> outer_loop:none 37*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<Add:i\d+>> Add [<<Get1>>,<<Get2>>] loop:<<Loop>> outer_loop:none 38*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<UShr:i\d+>> UShr [<<Add>>,<<I1>>] loop:<<Loop>> outer_loop:none 39*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<Cnv:b\d+>> TypeConversion [<<UShr>>] loop:<<Loop>> outer_loop:none 40*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>> outer_loop:none 41*795d594fSAndroid Build Coastguard Worker // 42*795d594fSAndroid Build Coastguard Worker /// CHECK-START-ARM: void HaddAltByte.halving_add_signed(byte[], byte[], byte[]) loop_optimization (after) 43*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<Get1:d\d+>> VecLoad loop:<<Loop:B\d+>> outer_loop:none 44*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<Get2:d\d+>> VecLoad loop:<<Loop>> outer_loop:none 45*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Int8 rounded:false loop:<<Loop>> outer_loop:none 46*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: VecStore [{{l\d+}},{{i\d+}},<<HAdd>>] loop:<<Loop>> outer_loop:none 47*795d594fSAndroid Build Coastguard Worker // 48*795d594fSAndroid Build Coastguard Worker /// CHECK-START-ARM64: void HaddAltByte.halving_add_signed(byte[], byte[], byte[]) loop_optimization (after) 49*795d594fSAndroid Build Coastguard Worker /// CHECK-IF: hasIsaFeature("sve") and os.environ.get('ART_FORCE_TRY_PREDICATED_SIMD') == 'true' 50*795d594fSAndroid Build Coastguard Worker // 51*795d594fSAndroid Build Coastguard Worker // HalvingAdd idiom is not supported for SVE. 52*795d594fSAndroid Build Coastguard Worker /// CHECK-NOT: VecHalvingAdd 53*795d594fSAndroid Build Coastguard Worker // 54*795d594fSAndroid Build Coastguard Worker /// CHECK-ELSE: 55*795d594fSAndroid Build Coastguard Worker // 56*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<Get1:d\d+>> VecLoad loop:<<Loop:B\d+>> outer_loop:none 57*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<Get2:d\d+>> VecLoad loop:<<Loop>> outer_loop:none 58*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Int8 rounded:false loop:<<Loop>> outer_loop:none 59*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: VecStore [{{l\d+}},{{i\d+}},<<HAdd>>] loop:<<Loop>> outer_loop:none 60*795d594fSAndroid Build Coastguard Worker // 61*795d594fSAndroid Build Coastguard Worker /// CHECK-FI: halving_add_signed(byte[] b1, byte[] b2, byte[] bo)62*795d594fSAndroid Build Coastguard Worker private static void halving_add_signed(byte[] b1, byte[] b2, byte[] bo) { 63*795d594fSAndroid Build Coastguard Worker int min_length = Math.min(bo.length, Math.min(b1.length, b2.length)); 64*795d594fSAndroid Build Coastguard Worker for (int i = 0; i < min_length; i++) { 65*795d594fSAndroid Build Coastguard Worker bo[i] = (byte) ((b1[i] + b2[i]) >>> 1); 66*795d594fSAndroid Build Coastguard Worker } 67*795d594fSAndroid Build Coastguard Worker } 68*795d594fSAndroid Build Coastguard Worker 69*795d594fSAndroid Build Coastguard Worker /// CHECK-START: void HaddAltByte.halving_add_unsigned(byte[], byte[], byte[]) instruction_simplifier (before) 70*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<I1:i\d+>> IntConstant 1 loop:none 71*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<I255:i\d+>> IntConstant 255 loop:none 72*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none 73*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<Get1:b\d+>> ArrayGet loop:<<Loop>> outer_loop:none 74*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<Get2:b\d+>> ArrayGet loop:<<Loop>> outer_loop:none 75*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<And1:i\d+>> And [<<Get1>>,<<I255>>] loop:<<Loop>> outer_loop:none 76*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<And2:i\d+>> And [<<Get2>>,<<I255>>] loop:<<Loop>> outer_loop:none 77*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<Add:i\d+>> Add [<<And1>>,<<And2>>] loop:<<Loop>> outer_loop:none 78*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<UShr:i\d+>> UShr [<<Add>>,<<I1>>] loop:<<Loop>> outer_loop:none 79*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<Cnv:b\d+>> TypeConversion [<<UShr>>] loop:<<Loop>> outer_loop:none 80*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: ArraySet [{{l\d+}},{{i\d+}},<<Cnv>>] loop:<<Loop>> outer_loop:none 81*795d594fSAndroid Build Coastguard Worker // 82*795d594fSAndroid Build Coastguard Worker /// CHECK-START: void HaddAltByte.halving_add_unsigned(byte[], byte[], byte[]) loop_optimization (before) 83*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<I1:i\d+>> IntConstant 1 loop:none 84*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none 85*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<Get1:a\d+>> ArrayGet loop:<<Loop>> outer_loop:none 86*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<Get2:a\d+>> ArrayGet loop:<<Loop>> outer_loop:none 87*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<Add:i\d+>> Add [<<Get1>>,<<Get2>>] loop:<<Loop>> outer_loop:none 88*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<UShr:i\d+>> UShr [<<Add>>,<<I1>>] loop:<<Loop>> outer_loop:none 89*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<Cnv:b\d+>> TypeConversion [<<UShr>>] loop:<<Loop>> outer_loop:none 90*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>> outer_loop:none 91*795d594fSAndroid Build Coastguard Worker // 92*795d594fSAndroid Build Coastguard Worker /// CHECK-START-ARM: void HaddAltByte.halving_add_unsigned(byte[], byte[], byte[]) loop_optimization (after) 93*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<Get1:d\d+>> VecLoad loop:<<Loop:B\d+>> outer_loop:none 94*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<Get2:d\d+>> VecLoad loop:<<Loop>> outer_loop:none 95*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Uint8 rounded:false loop:<<Loop>> outer_loop:none 96*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: VecStore [{{l\d+}},{{i\d+}},<<HAdd>>] loop:<<Loop>> outer_loop:none 97*795d594fSAndroid Build Coastguard Worker // 98*795d594fSAndroid Build Coastguard Worker /// CHECK-START-ARM64: void HaddAltByte.halving_add_unsigned(byte[], byte[], byte[]) loop_optimization (after) 99*795d594fSAndroid Build Coastguard Worker /// CHECK-IF: hasIsaFeature("sve") and os.environ.get('ART_FORCE_TRY_PREDICATED_SIMD') == 'true' 100*795d594fSAndroid Build Coastguard Worker // 101*795d594fSAndroid Build Coastguard Worker // HalvingAdd idiom is not supported for SVE. 102*795d594fSAndroid Build Coastguard Worker /// CHECK-NOT: VecHalvingAdd 103*795d594fSAndroid Build Coastguard Worker // 104*795d594fSAndroid Build Coastguard Worker /// CHECK-ELSE: 105*795d594fSAndroid Build Coastguard Worker // 106*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<Get1:d\d+>> VecLoad loop:<<Loop:B\d+>> outer_loop:none 107*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<Get2:d\d+>> VecLoad loop:<<Loop>> outer_loop:none 108*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Uint8 rounded:false loop:<<Loop>> outer_loop:none 109*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: VecStore [{{l\d+}},{{i\d+}},<<HAdd>>] loop:<<Loop>> outer_loop:none 110*795d594fSAndroid Build Coastguard Worker // 111*795d594fSAndroid Build Coastguard Worker /// CHECK-FI: halving_add_unsigned(byte[] b1, byte[] b2, byte[] bo)112*795d594fSAndroid Build Coastguard Worker private static void halving_add_unsigned(byte[] b1, byte[] b2, byte[] bo) { 113*795d594fSAndroid Build Coastguard Worker int min_length = Math.min(bo.length, Math.min(b1.length, b2.length)); 114*795d594fSAndroid Build Coastguard Worker for (int i = 0; i < min_length; i++) { 115*795d594fSAndroid Build Coastguard Worker bo[i] = (byte) (((b1[i] & 0xff) + (b2[i] & 0xff)) >>> 1); 116*795d594fSAndroid Build Coastguard Worker } 117*795d594fSAndroid Build Coastguard Worker } 118*795d594fSAndroid Build Coastguard Worker 119*795d594fSAndroid Build Coastguard Worker /// CHECK-START: void HaddAltByte.rounding_halving_add_signed(byte[], byte[], byte[]) loop_optimization (before) 120*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<I1:i\d+>> IntConstant 1 loop:none 121*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none 122*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<Get1:b\d+>> ArrayGet loop:<<Loop>> outer_loop:none 123*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<Get2:b\d+>> ArrayGet loop:<<Loop>> outer_loop:none 124*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<Add1:i\d+>> Add [<<Get1>>,<<Get2>>] loop:<<Loop>> outer_loop:none 125*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<Add2:i\d+>> Add [<<Add1>>,<<I1>>] loop:<<Loop>> outer_loop:none 126*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<UShr:i\d+>> UShr [<<Add2>>,<<I1>>] loop:<<Loop>> outer_loop:none 127*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<Cnv:b\d+>> TypeConversion [<<UShr>>] loop:<<Loop>> outer_loop:none 128*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>> outer_loop:none 129*795d594fSAndroid Build Coastguard Worker // 130*795d594fSAndroid Build Coastguard Worker /// CHECK-START-ARM: void HaddAltByte.rounding_halving_add_signed(byte[], byte[], byte[]) loop_optimization (after) 131*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<Get1:d\d+>> VecLoad loop:<<Loop:B\d+>> outer_loop:none 132*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<Get2:d\d+>> VecLoad loop:<<Loop>> outer_loop:none 133*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Int8 rounded:true loop:<<Loop>> outer_loop:none 134*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: VecStore [{{l\d+}},{{i\d+}},<<HAdd>>] loop:<<Loop>> outer_loop:none 135*795d594fSAndroid Build Coastguard Worker // 136*795d594fSAndroid Build Coastguard Worker /// CHECK-START-ARM64: void HaddAltByte.rounding_halving_add_signed(byte[], byte[], byte[]) loop_optimization (after) 137*795d594fSAndroid Build Coastguard Worker /// CHECK-IF: hasIsaFeature("sve") and os.environ.get('ART_FORCE_TRY_PREDICATED_SIMD') == 'true' 138*795d594fSAndroid Build Coastguard Worker // 139*795d594fSAndroid Build Coastguard Worker // HalvingAdd idiom is not supported for SVE. 140*795d594fSAndroid Build Coastguard Worker /// CHECK-NOT: VecHalvingAdd 141*795d594fSAndroid Build Coastguard Worker // 142*795d594fSAndroid Build Coastguard Worker /// CHECK-ELSE: 143*795d594fSAndroid Build Coastguard Worker // 144*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<Get1:d\d+>> VecLoad loop:<<Loop:B\d+>> outer_loop:none 145*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<Get2:d\d+>> VecLoad loop:<<Loop>> outer_loop:none 146*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Int8 rounded:true loop:<<Loop>> outer_loop:none 147*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: VecStore [{{l\d+}},{{i\d+}},<<HAdd>>] loop:<<Loop>> outer_loop:none 148*795d594fSAndroid Build Coastguard Worker // 149*795d594fSAndroid Build Coastguard Worker /// CHECK-FI: rounding_halving_add_signed(byte[] b1, byte[] b2, byte[] bo)150*795d594fSAndroid Build Coastguard Worker private static void rounding_halving_add_signed(byte[] b1, byte[] b2, byte[] bo) { 151*795d594fSAndroid Build Coastguard Worker int min_length = Math.min(bo.length, Math.min(b1.length, b2.length)); 152*795d594fSAndroid Build Coastguard Worker for (int i = 0; i < min_length; i++) { 153*795d594fSAndroid Build Coastguard Worker bo[i] = (byte) ((b1[i] + b2[i] + 1) >>> 1); 154*795d594fSAndroid Build Coastguard Worker } 155*795d594fSAndroid Build Coastguard Worker } 156*795d594fSAndroid Build Coastguard Worker 157*795d594fSAndroid Build Coastguard Worker /// CHECK-START: void HaddAltByte.rounding_halving_add_unsigned(byte[], byte[], byte[]) instruction_simplifier (before) 158*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<I1:i\d+>> IntConstant 1 loop:none 159*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<I255:i\d+>> IntConstant 255 loop:none 160*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none 161*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<Get1:b\d+>> ArrayGet loop:<<Loop>> outer_loop:none 162*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<Get2:b\d+>> ArrayGet loop:<<Loop>> outer_loop:none 163*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<And1:i\d+>> And [<<Get1>>,<<I255>>] loop:<<Loop>> outer_loop:none 164*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<And2:i\d+>> And [<<Get2>>,<<I255>>] loop:<<Loop>> outer_loop:none 165*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<Add1:i\d+>> Add [<<And1>>,<<And2>>] loop:<<Loop>> outer_loop:none 166*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<Add2:i\d+>> Add [<<Add1>>,<<I1>>] loop:<<Loop>> outer_loop:none 167*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<UShr:i\d+>> UShr [<<Add2>>,<<I1>>] loop:<<Loop>> outer_loop:none 168*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<Cnv:b\d+>> TypeConversion [<<UShr>>] loop:<<Loop>> outer_loop:none 169*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: ArraySet [{{l\d+}},{{i\d+}},<<Cnv>>] loop:<<Loop>> outer_loop:none 170*795d594fSAndroid Build Coastguard Worker // 171*795d594fSAndroid Build Coastguard Worker /// CHECK-START: void HaddAltByte.rounding_halving_add_unsigned(byte[], byte[], byte[]) loop_optimization (before) 172*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<I1:i\d+>> IntConstant 1 loop:none 173*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none 174*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<Get1:a\d+>> ArrayGet loop:<<Loop>> outer_loop:none 175*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<Get2:a\d+>> ArrayGet loop:<<Loop>> outer_loop:none 176*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<Add1:i\d+>> Add [<<Get1>>,<<Get2>>] loop:<<Loop>> outer_loop:none 177*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<Add2:i\d+>> Add [<<Add1>>,<<I1>>] loop:<<Loop>> outer_loop:none 178*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<UShr:i\d+>> UShr [<<Add2>>,<<I1>>] loop:<<Loop>> outer_loop:none 179*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<Cnv:b\d+>> TypeConversion [<<UShr>>] loop:<<Loop>> outer_loop:none 180*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>> outer_loop:none 181*795d594fSAndroid Build Coastguard Worker // 182*795d594fSAndroid Build Coastguard Worker /// CHECK-START-ARM: void HaddAltByte.rounding_halving_add_unsigned(byte[], byte[], byte[]) loop_optimization (after) 183*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<Get1:d\d+>> VecLoad loop:<<Loop:B\d+>> outer_loop:none 184*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<Get2:d\d+>> VecLoad loop:<<Loop>> outer_loop:none 185*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Uint8 rounded:true loop:<<Loop>> outer_loop:none 186*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: VecStore [{{l\d+}},{{i\d+}},<<HAdd>>] loop:<<Loop>> outer_loop:none 187*795d594fSAndroid Build Coastguard Worker // 188*795d594fSAndroid Build Coastguard Worker /// CHECK-START-ARM64: void HaddAltByte.rounding_halving_add_unsigned(byte[], byte[], byte[]) loop_optimization (after) 189*795d594fSAndroid Build Coastguard Worker /// CHECK-IF: hasIsaFeature("sve") and os.environ.get('ART_FORCE_TRY_PREDICATED_SIMD') == 'true' 190*795d594fSAndroid Build Coastguard Worker // 191*795d594fSAndroid Build Coastguard Worker // HalvingAdd idiom is not supported for SVE. 192*795d594fSAndroid Build Coastguard Worker /// CHECK-NOT: VecHalvingAdd 193*795d594fSAndroid Build Coastguard Worker // 194*795d594fSAndroid Build Coastguard Worker /// CHECK-ELSE: 195*795d594fSAndroid Build Coastguard Worker // 196*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<Get1:d\d+>> VecLoad loop:<<Loop:B\d+>> outer_loop:none 197*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<Get2:d\d+>> VecLoad loop:<<Loop>> outer_loop:none 198*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Uint8 rounded:true loop:<<Loop>> outer_loop:none 199*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: VecStore [{{l\d+}},{{i\d+}},<<HAdd>>] loop:<<Loop>> outer_loop:none 200*795d594fSAndroid Build Coastguard Worker // 201*795d594fSAndroid Build Coastguard Worker /// CHECK-FI: rounding_halving_add_unsigned(byte[] b1, byte[] b2, byte[] bo)202*795d594fSAndroid Build Coastguard Worker private static void rounding_halving_add_unsigned(byte[] b1, byte[] b2, byte[] bo) { 203*795d594fSAndroid Build Coastguard Worker int min_length = Math.min(bo.length, Math.min(b1.length, b2.length)); 204*795d594fSAndroid Build Coastguard Worker for (int i = 0; i < min_length; i++) { 205*795d594fSAndroid Build Coastguard Worker bo[i] = (byte) (((b1[i] & 0xff) + (b2[i] & 0xff) + 1) >>> 1); 206*795d594fSAndroid Build Coastguard Worker } 207*795d594fSAndroid Build Coastguard Worker } 208*795d594fSAndroid Build Coastguard Worker 209*795d594fSAndroid Build Coastguard Worker /// CHECK-START: void HaddAltByte.halving_add_signed_constant(byte[], byte[]) loop_optimization (before) 210*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<I1:i\d+>> IntConstant 1 loop:none 211*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<I127:i\d+>> IntConstant 127 loop:none 212*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none 213*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<Get:b\d+>> ArrayGet loop:<<Loop>> outer_loop:none 214*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<Add:i\d+>> Add [<<Get>>,<<I127>>] loop:<<Loop>> outer_loop:none 215*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<UShr:i\d+>> UShr [<<Add>>,<<I1>>] loop:<<Loop>> outer_loop:none 216*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<Cnv:b\d+>> TypeConversion [<<UShr>>] loop:<<Loop>> outer_loop:none 217*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>> outer_loop:none 218*795d594fSAndroid Build Coastguard Worker // 219*795d594fSAndroid Build Coastguard Worker /// CHECK-START-ARM: void HaddAltByte.halving_add_signed_constant(byte[], byte[]) loop_optimization (after) 220*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<I127:i\d+>> IntConstant 127 loop:none 221*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<I127>>] loop:none 222*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<Get:d\d+>> VecLoad loop:<<Loop:B\d+>> outer_loop:none 223*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] packed_type:Int8 rounded:false loop:<<Loop>> outer_loop:none 224*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: VecStore [{{l\d+}},{{i\d+}},<<HAdd>>] loop:<<Loop>> outer_loop:none 225*795d594fSAndroid Build Coastguard Worker // 226*795d594fSAndroid Build Coastguard Worker /// CHECK-START-ARM64: void HaddAltByte.halving_add_signed_constant(byte[], byte[]) loop_optimization (after) 227*795d594fSAndroid Build Coastguard Worker /// CHECK-IF: hasIsaFeature("sve") and os.environ.get('ART_FORCE_TRY_PREDICATED_SIMD') == 'true' 228*795d594fSAndroid Build Coastguard Worker // 229*795d594fSAndroid Build Coastguard Worker // HalvingAdd idiom is not supported for SVE. 230*795d594fSAndroid Build Coastguard Worker /// CHECK-NOT: VecHalvingAdd 231*795d594fSAndroid Build Coastguard Worker // 232*795d594fSAndroid Build Coastguard Worker /// CHECK-ELSE: 233*795d594fSAndroid Build Coastguard Worker // 234*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<I127:i\d+>> IntConstant 127 loop:none 235*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<I127>>] loop:none 236*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<Get:d\d+>> VecLoad loop:<<Loop:B\d+>> outer_loop:none 237*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] packed_type:Int8 rounded:false loop:<<Loop>> outer_loop:none 238*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: VecStore [{{l\d+}},{{i\d+}},<<HAdd>>] loop:<<Loop>> outer_loop:none 239*795d594fSAndroid Build Coastguard Worker // 240*795d594fSAndroid Build Coastguard Worker /// CHECK-FI: halving_add_signed_constant(byte[] b1, byte[] bo)241*795d594fSAndroid Build Coastguard Worker private static void halving_add_signed_constant(byte[] b1, byte[] bo) { 242*795d594fSAndroid Build Coastguard Worker int min_length = Math.min(bo.length, b1.length); 243*795d594fSAndroid Build Coastguard Worker for (int i = 0; i < min_length; i++) { 244*795d594fSAndroid Build Coastguard Worker bo[i] = (byte) ((b1[i] + 0x7f) >>> 1); 245*795d594fSAndroid Build Coastguard Worker } 246*795d594fSAndroid Build Coastguard Worker } 247*795d594fSAndroid Build Coastguard Worker 248*795d594fSAndroid Build Coastguard Worker /// CHECK-START: void HaddAltByte.halving_add_unsigned_constant(byte[], byte[]) instruction_simplifier (before) 249*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<I1:i\d+>> IntConstant 1 loop:none 250*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<I255:i\d+>> IntConstant 255 loop:none 251*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none 252*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<Get:b\d+>> ArrayGet loop:<<Loop>> outer_loop:none 253*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<And:i\d+>> And [<<Get>>,<<I255>>] loop:<<Loop>> outer_loop:none 254*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<Add:i\d+>> Add [<<And>>,<<I255>>] loop:<<Loop>> outer_loop:none 255*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<UShr:i\d+>> UShr [<<Add>>,<<I1>>] loop:<<Loop>> outer_loop:none 256*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<Cnv:b\d+>> TypeConversion [<<UShr>>] loop:<<Loop>> outer_loop:none 257*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: ArraySet [{{l\d+}},{{i\d+}},<<Cnv>>] loop:<<Loop>> outer_loop:none 258*795d594fSAndroid Build Coastguard Worker // 259*795d594fSAndroid Build Coastguard Worker /// CHECK-START: void HaddAltByte.halving_add_unsigned_constant(byte[], byte[]) loop_optimization (before) 260*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<I1:i\d+>> IntConstant 1 loop:none 261*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<I255:i\d+>> IntConstant 255 loop:none 262*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none 263*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<Get:a\d+>> ArrayGet loop:<<Loop>> outer_loop:none 264*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<Add:i\d+>> Add [<<Get>>,<<I255>>] loop:<<Loop>> outer_loop:none 265*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<UShr:i\d+>> UShr [<<Add>>,<<I1>>] loop:<<Loop>> outer_loop:none 266*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<Cnv:b\d+>> TypeConversion [<<UShr>>] loop:<<Loop>> outer_loop:none 267*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>> outer_loop:none 268*795d594fSAndroid Build Coastguard Worker // 269*795d594fSAndroid Build Coastguard Worker /// CHECK-START-ARM: void HaddAltByte.halving_add_unsigned_constant(byte[], byte[]) loop_optimization (after) 270*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<I255:i\d+>> IntConstant 255 loop:none 271*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<I255>>] loop:none 272*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<Get:d\d+>> VecLoad loop:<<Loop:B\d+>> outer_loop:none 273*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] packed_type:Uint8 rounded:false loop:<<Loop>> outer_loop:none 274*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: VecStore [{{l\d+}},{{i\d+}},<<HAdd>>] loop:<<Loop>> outer_loop:none 275*795d594fSAndroid Build Coastguard Worker // 276*795d594fSAndroid Build Coastguard Worker /// CHECK-START-ARM64: void HaddAltByte.halving_add_unsigned_constant(byte[], byte[]) loop_optimization (after) 277*795d594fSAndroid Build Coastguard Worker /// CHECK-IF: hasIsaFeature("sve") and os.environ.get('ART_FORCE_TRY_PREDICATED_SIMD') == 'true' 278*795d594fSAndroid Build Coastguard Worker // 279*795d594fSAndroid Build Coastguard Worker // HalvingAdd idiom is not supported for SVE. 280*795d594fSAndroid Build Coastguard Worker /// CHECK-NOT: VecHalvingAdd 281*795d594fSAndroid Build Coastguard Worker // 282*795d594fSAndroid Build Coastguard Worker /// CHECK-ELSE: 283*795d594fSAndroid Build Coastguard Worker // 284*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<I255:i\d+>> IntConstant 255 loop:none 285*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<I255>>] loop:none 286*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<Get:d\d+>> VecLoad loop:<<Loop:B\d+>> outer_loop:none 287*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] packed_type:Uint8 rounded:false loop:<<Loop>> outer_loop:none 288*795d594fSAndroid Build Coastguard Worker /// CHECK-DAG: VecStore [{{l\d+}},{{i\d+}},<<HAdd>>] loop:<<Loop>> outer_loop:none 289*795d594fSAndroid Build Coastguard Worker // 290*795d594fSAndroid Build Coastguard Worker /// CHECK-FI: halving_add_unsigned_constant(byte[] b1, byte[] bo)291*795d594fSAndroid Build Coastguard Worker private static void halving_add_unsigned_constant(byte[] b1, byte[] bo) { 292*795d594fSAndroid Build Coastguard Worker int min_length = Math.min(bo.length, b1.length); 293*795d594fSAndroid Build Coastguard Worker for (int i = 0; i < min_length; i++) { 294*795d594fSAndroid Build Coastguard Worker bo[i] = (byte) (((b1[i] & 0xff) + 0xff) >>> 1); 295*795d594fSAndroid Build Coastguard Worker } 296*795d594fSAndroid Build Coastguard Worker } 297*795d594fSAndroid Build Coastguard Worker main()298*795d594fSAndroid Build Coastguard Worker public static void main() { 299*795d594fSAndroid Build Coastguard Worker // Initialize cross-values to test all cases, and also 300*795d594fSAndroid Build Coastguard Worker // set up some extra values to exercise the cleanup loop. 301*795d594fSAndroid Build Coastguard Worker int k = 0; 302*795d594fSAndroid Build Coastguard Worker for (int i = 0; i < N; i++) { 303*795d594fSAndroid Build Coastguard Worker for (int j = 0; j < N; j++) { 304*795d594fSAndroid Build Coastguard Worker sB1[k] = (byte) i; 305*795d594fSAndroid Build Coastguard Worker sB2[k] = (byte) j; 306*795d594fSAndroid Build Coastguard Worker k++; 307*795d594fSAndroid Build Coastguard Worker } 308*795d594fSAndroid Build Coastguard Worker } 309*795d594fSAndroid Build Coastguard Worker for (int i = 0; i < 15; i++) { 310*795d594fSAndroid Build Coastguard Worker sB1[k] = (byte) i; 311*795d594fSAndroid Build Coastguard Worker sB2[k] = 100; 312*795d594fSAndroid Build Coastguard Worker k++; 313*795d594fSAndroid Build Coastguard Worker } 314*795d594fSAndroid Build Coastguard Worker expectEquals(k, M); 315*795d594fSAndroid Build Coastguard Worker 316*795d594fSAndroid Build Coastguard Worker // Test halving add idioms. Note that the expected result is computed 317*795d594fSAndroid Build Coastguard Worker // with the arithmetic >> to demonstrate the computed narrower result 318*795d594fSAndroid Build Coastguard Worker // does not depend on the wider >> or >>>. 319*795d594fSAndroid Build Coastguard Worker halving_add_signed(sB1, sB2, sBo); 320*795d594fSAndroid Build Coastguard Worker for (int i = 0; i < M; i++) { 321*795d594fSAndroid Build Coastguard Worker byte e = (byte) ((sB1[i] + sB2[i]) >> 1); 322*795d594fSAndroid Build Coastguard Worker expectEquals(e, sBo[i]); 323*795d594fSAndroid Build Coastguard Worker } 324*795d594fSAndroid Build Coastguard Worker halving_add_unsigned(sB1, sB2, sBo); 325*795d594fSAndroid Build Coastguard Worker for (int i = 0; i < M; i++) { 326*795d594fSAndroid Build Coastguard Worker byte e = (byte) (((sB1[i] & 0xff) + (sB2[i] & 0xff)) >> 1); 327*795d594fSAndroid Build Coastguard Worker expectEquals(e, sBo[i]); 328*795d594fSAndroid Build Coastguard Worker } 329*795d594fSAndroid Build Coastguard Worker rounding_halving_add_signed(sB1, sB2, sBo); 330*795d594fSAndroid Build Coastguard Worker for (int i = 0; i < M; i++) { 331*795d594fSAndroid Build Coastguard Worker byte e = (byte) ((sB1[i] + sB2[i] + 1) >> 1); 332*795d594fSAndroid Build Coastguard Worker expectEquals(e, sBo[i]); 333*795d594fSAndroid Build Coastguard Worker } 334*795d594fSAndroid Build Coastguard Worker rounding_halving_add_unsigned(sB1, sB2, sBo); 335*795d594fSAndroid Build Coastguard Worker for (int i = 0; i < M; i++) { 336*795d594fSAndroid Build Coastguard Worker byte e = (byte) (((sB1[i] & 0xff) + (sB2[i] & 0xff) + 1) >> 1); 337*795d594fSAndroid Build Coastguard Worker expectEquals(e, sBo[i]); 338*795d594fSAndroid Build Coastguard Worker } 339*795d594fSAndroid Build Coastguard Worker halving_add_signed_constant(sB1, sBo); 340*795d594fSAndroid Build Coastguard Worker for (int i = 0; i < M; i++) { 341*795d594fSAndroid Build Coastguard Worker byte e = (byte) ((sB1[i] + 0x7f) >> 1); 342*795d594fSAndroid Build Coastguard Worker expectEquals(e, sBo[i]); 343*795d594fSAndroid Build Coastguard Worker } 344*795d594fSAndroid Build Coastguard Worker halving_add_unsigned_constant(sB1, sBo); 345*795d594fSAndroid Build Coastguard Worker for (int i = 0; i < M; i++) { 346*795d594fSAndroid Build Coastguard Worker byte e = (byte) (((sB1[i] & 0xff) + 0xff) >> 1); 347*795d594fSAndroid Build Coastguard Worker expectEquals(e, sBo[i]); 348*795d594fSAndroid Build Coastguard Worker } 349*795d594fSAndroid Build Coastguard Worker 350*795d594fSAndroid Build Coastguard Worker System.out.println("HaddAltByte passed"); 351*795d594fSAndroid Build Coastguard Worker } 352*795d594fSAndroid Build Coastguard Worker expectEquals(int expected, int result)353*795d594fSAndroid Build Coastguard Worker private static void expectEquals(int expected, int result) { 354*795d594fSAndroid Build Coastguard Worker if (expected != result) { 355*795d594fSAndroid Build Coastguard Worker throw new Error("Expected: " + expected + ", found: " + result); 356*795d594fSAndroid Build Coastguard Worker } 357*795d594fSAndroid Build Coastguard Worker } 358*795d594fSAndroid Build Coastguard Worker } 359