1#!/bin/sh 2# Copyright 2019 Google LLC 3# 4# This source code is licensed under the BSD-style license found in the 5# LICENSE file in the root directory of this source tree. 6 7#################################### Scalar ################################### 8### Generic C micro-kernels 9tools/xngen src/f32-igemm/scalar.c.in -D MR=1 -D NR=4 -D WASM=0 -D ACTIVATION=LINEAR -o src/f32-igemm/gen/1x4-scalar.c & 10tools/xngen src/f32-igemm/scalar.c.in -D MR=2 -D NR=4 -D WASM=0 -D ACTIVATION=LINEAR -o src/f32-igemm/gen/2x4-scalar.c & 11tools/xngen src/f32-igemm/scalar.c.in -D MR=4 -D NR=2 -D WASM=0 -D ACTIVATION=LINEAR -o src/f32-igemm/gen/4x2-scalar.c & 12tools/xngen src/f32-igemm/scalar.c.in -D MR=4 -D NR=4 -D WASM=0 -D ACTIVATION=LINEAR -o src/f32-igemm/gen/4x4-scalar.c & 13 14tools/xngen src/f32-igemm/scalar.c.in -D MR=1 -D NR=4 -D WASM=0 -D ACTIVATION=RELU -o src/f32-igemm/gen/1x4-relu-scalar.c & 15tools/xngen src/f32-igemm/scalar.c.in -D MR=2 -D NR=4 -D WASM=0 -D ACTIVATION=RELU -o src/f32-igemm/gen/2x4-relu-scalar.c & 16tools/xngen src/f32-igemm/scalar.c.in -D MR=4 -D NR=2 -D WASM=0 -D ACTIVATION=RELU -o src/f32-igemm/gen/4x2-relu-scalar.c & 17tools/xngen src/f32-igemm/scalar.c.in -D MR=4 -D NR=4 -D WASM=0 -D ACTIVATION=RELU -o src/f32-igemm/gen/4x4-relu-scalar.c & 18 19tools/xngen src/f32-igemm/scalar.c.in -D MR=1 -D NR=4 -D WASM=0 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/1x4-minmax-scalar.c & 20tools/xngen src/f32-igemm/scalar.c.in -D MR=2 -D NR=4 -D WASM=0 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/2x4-minmax-scalar.c & 21tools/xngen src/f32-igemm/scalar.c.in -D MR=4 -D NR=2 -D WASM=0 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/4x2-minmax-scalar.c & 22tools/xngen src/f32-igemm/scalar.c.in -D MR=4 -D NR=4 -D WASM=0 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/4x4-minmax-scalar.c & 23 24### WAsm-specific micro-kernels 25tools/xngen src/f32-igemm/scalar.c.in -D MR=1 -D NR=4 -D WASM=1 -D ACTIVATION=RELU -o src/f32-igemm/gen/1x4-relu-wasm.c & 26tools/xngen src/f32-igemm/scalar.c.in -D MR=2 -D NR=4 -D WASM=1 -D ACTIVATION=RELU -o src/f32-igemm/gen/2x4-relu-wasm.c & 27tools/xngen src/f32-igemm/scalar.c.in -D MR=4 -D NR=2 -D WASM=1 -D ACTIVATION=RELU -o src/f32-igemm/gen/4x2-relu-wasm.c & 28tools/xngen src/f32-igemm/scalar.c.in -D MR=4 -D NR=4 -D WASM=1 -D ACTIVATION=RELU -o src/f32-igemm/gen/4x4-relu-wasm.c & 29 30tools/xngen src/f32-igemm/scalar.c.in -D MR=1 -D NR=4 -D WASM=1 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/1x4-minmax-wasm.c & 31tools/xngen src/f32-igemm/scalar.c.in -D MR=2 -D NR=4 -D WASM=1 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/2x4-minmax-wasm.c & 32tools/xngen src/f32-igemm/scalar.c.in -D MR=4 -D NR=2 -D WASM=1 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/4x2-minmax-wasm.c & 33tools/xngen src/f32-igemm/scalar.c.in -D MR=4 -D NR=4 -D WASM=1 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/4x4-minmax-wasm.c & 34 35################################## WAsm SIMD ################################## 36### LOAD1+BROADCAST micro-kernels 37tools/xngen src/f32-igemm/wasmsimd-loadsplat.c.in -D MR=1 -D NR=8 -D FMA=0 -D ACTIVATION=MINMAX -D ARCH=ARM -o src/f32-igemm/gen/1x8-minmax-wasmsimd-arm-loadsplat.c & 38tools/xngen src/f32-igemm/wasmsimd-loadsplat.c.in -D MR=3 -D NR=8 -D FMA=0 -D ACTIVATION=MINMAX -D ARCH=ARM -o src/f32-igemm/gen/3x8-minmax-wasmsimd-arm-loadsplat.c & 39tools/xngen src/f32-igemm/wasmsimd-loadsplat.c.in -D MR=4 -D NR=8 -D FMA=0 -D ACTIVATION=MINMAX -D ARCH=ARM -o src/f32-igemm/gen/4x8-minmax-wasmsimd-arm-loadsplat.c & 40tools/xngen src/f32-igemm/wasmsimd-loadsplat.c.in -D MR=5 -D NR=8 -D FMA=0 -D ACTIVATION=MINMAX -D ARCH=ARM -o src/f32-igemm/gen/5x8-minmax-wasmsimd-arm-loadsplat.c & 41tools/xngen src/f32-igemm/wasmsimd-loadsplat.c.in -D MR=6 -D NR=8 -D FMA=0 -D ACTIVATION=MINMAX -D ARCH=ARM -o src/f32-igemm/gen/6x8-minmax-wasmsimd-arm-loadsplat.c & 42 43tools/xngen src/f32-igemm/wasmsimd-loadsplat.c.in -D MR=1 -D NR=8 -D FMA=0 -D ACTIVATION=MINMAX -D ARCH=X86 -o src/f32-igemm/gen/1x8-minmax-wasmsimd-x86-loadsplat.c & 44tools/xngen src/f32-igemm/wasmsimd-loadsplat.c.in -D MR=3 -D NR=8 -D FMA=0 -D ACTIVATION=MINMAX -D ARCH=X86 -o src/f32-igemm/gen/3x8-minmax-wasmsimd-x86-loadsplat.c & 45tools/xngen src/f32-igemm/wasmsimd-loadsplat.c.in -D MR=4 -D NR=8 -D FMA=0 -D ACTIVATION=MINMAX -D ARCH=X86 -o src/f32-igemm/gen/4x8-minmax-wasmsimd-x86-loadsplat.c & 46tools/xngen src/f32-igemm/wasmsimd-loadsplat.c.in -D MR=5 -D NR=8 -D FMA=0 -D ACTIVATION=MINMAX -D ARCH=X86 -o src/f32-igemm/gen/5x8-minmax-wasmsimd-x86-loadsplat.c & 47tools/xngen src/f32-igemm/wasmsimd-loadsplat.c.in -D MR=6 -D NR=8 -D FMA=0 -D ACTIVATION=MINMAX -D ARCH=X86 -o src/f32-igemm/gen/6x8-minmax-wasmsimd-x86-loadsplat.c & 48 49tools/xngen src/f32-igemm/wasmsimd-loadsplat.c.in -D MR=1 -D NR=8 -D FMA=0 -D ACTIVATION=MINMAX -D ARCH=RELAXED -o src/f32-igemm/gen/1x8-minmax-wasmrelaxedsimd-loadsplat.c & 50tools/xngen src/f32-igemm/wasmsimd-loadsplat.c.in -D MR=3 -D NR=8 -D FMA=0 -D ACTIVATION=MINMAX -D ARCH=RELAXED -o src/f32-igemm/gen/3x8-minmax-wasmrelaxedsimd-loadsplat.c & 51tools/xngen src/f32-igemm/wasmsimd-loadsplat.c.in -D MR=4 -D NR=8 -D FMA=0 -D ACTIVATION=MINMAX -D ARCH=RELAXED -o src/f32-igemm/gen/4x8-minmax-wasmrelaxedsimd-loadsplat.c & 52tools/xngen src/f32-igemm/wasmsimd-loadsplat.c.in -D MR=5 -D NR=8 -D FMA=0 -D ACTIVATION=MINMAX -D ARCH=RELAXED -o src/f32-igemm/gen/5x8-minmax-wasmrelaxedsimd-loadsplat.c & 53tools/xngen src/f32-igemm/wasmsimd-loadsplat.c.in -D MR=6 -D NR=8 -D FMA=0 -D ACTIVATION=MINMAX -D ARCH=RELAXED -o src/f32-igemm/gen/6x8-minmax-wasmrelaxedsimd-loadsplat.c & 54 55tools/xngen src/f32-igemm/wasmsimd-loadsplat.c.in -D MR=1 -D NR=8 -D FMA=1 -D ACTIVATION=MINMAX -D ARCH=RELAXED -o src/f32-igemm/gen/1x8-minmax-wasmrelaxedsimd-fma-loadsplat.c & 56tools/xngen src/f32-igemm/wasmsimd-loadsplat.c.in -D MR=3 -D NR=8 -D FMA=1 -D ACTIVATION=MINMAX -D ARCH=RELAXED -o src/f32-igemm/gen/3x8-minmax-wasmrelaxedsimd-fma-loadsplat.c & 57tools/xngen src/f32-igemm/wasmsimd-loadsplat.c.in -D MR=4 -D NR=8 -D FMA=1 -D ACTIVATION=MINMAX -D ARCH=RELAXED -o src/f32-igemm/gen/4x8-minmax-wasmrelaxedsimd-fma-loadsplat.c & 58tools/xngen src/f32-igemm/wasmsimd-loadsplat.c.in -D MR=5 -D NR=8 -D FMA=1 -D ACTIVATION=MINMAX -D ARCH=RELAXED -o src/f32-igemm/gen/5x8-minmax-wasmrelaxedsimd-fma-loadsplat.c & 59tools/xngen src/f32-igemm/wasmsimd-loadsplat.c.in -D MR=6 -D NR=8 -D FMA=1 -D ACTIVATION=MINMAX -D ARCH=RELAXED -o src/f32-igemm/gen/6x8-minmax-wasmrelaxedsimd-fma-loadsplat.c & 60 61tools/xngen src/f32-igemm/wasmsimd-loadsplat.c.in -D MR=1 -D NR=8 -D FMA=0 -D ACTIVATION=RELU -o src/f32-igemm/gen/1x8-relu-wasmsimd-loadsplat.c & 62tools/xngen src/f32-igemm/wasmsimd-loadsplat.c.in -D MR=3 -D NR=8 -D FMA=0 -D ACTIVATION=RELU -o src/f32-igemm/gen/3x8-relu-wasmsimd-loadsplat.c & 63tools/xngen src/f32-igemm/wasmsimd-loadsplat.c.in -D MR=4 -D NR=8 -D FMA=0 -D ACTIVATION=RELU -o src/f32-igemm/gen/4x8-relu-wasmsimd-loadsplat.c & 64tools/xngen src/f32-igemm/wasmsimd-loadsplat.c.in -D MR=5 -D NR=8 -D FMA=0 -D ACTIVATION=RELU -o src/f32-igemm/gen/5x8-relu-wasmsimd-loadsplat.c & 65tools/xngen src/f32-igemm/wasmsimd-loadsplat.c.in -D MR=6 -D NR=8 -D FMA=0 -D ACTIVATION=RELU -o src/f32-igemm/gen/6x8-relu-wasmsimd-loadsplat.c & 66 67tools/xngen src/f32-igemm/wasmsimd-loadsplat.c.in -D MR=1 -D NR=8 -D FMA=1 -D ACTIVATION=RELU -D ARCH=RELAXED -o src/f32-igemm/gen/1x8-relu-wasmrelaxedsimd-fma-loadsplat.c & 68tools/xngen src/f32-igemm/wasmsimd-loadsplat.c.in -D MR=3 -D NR=8 -D FMA=1 -D ACTIVATION=RELU -D ARCH=RELAXED -o src/f32-igemm/gen/3x8-relu-wasmrelaxedsimd-fma-loadsplat.c & 69tools/xngen src/f32-igemm/wasmsimd-loadsplat.c.in -D MR=4 -D NR=8 -D FMA=1 -D ACTIVATION=RELU -D ARCH=RELAXED -o src/f32-igemm/gen/4x8-relu-wasmrelaxedsimd-fma-loadsplat.c & 70tools/xngen src/f32-igemm/wasmsimd-loadsplat.c.in -D MR=5 -D NR=8 -D FMA=1 -D ACTIVATION=RELU -D ARCH=RELAXED -o src/f32-igemm/gen/5x8-relu-wasmrelaxedsimd-fma-loadsplat.c & 71tools/xngen src/f32-igemm/wasmsimd-loadsplat.c.in -D MR=6 -D NR=8 -D FMA=1 -D ACTIVATION=RELU -D ARCH=RELAXED -o src/f32-igemm/gen/6x8-relu-wasmrelaxedsimd-fma-loadsplat.c & 72 73tools/xngen src/f32-igemm/wasmsimd-loadsplat.c.in -D MR=1 -D NR=8 -D FMA=0 -D ACTIVATION=LINEAR -o src/f32-igemm/gen/1x8-wasmsimd-loadsplat.c & 74tools/xngen src/f32-igemm/wasmsimd-loadsplat.c.in -D MR=3 -D NR=8 -D FMA=0 -D ACTIVATION=LINEAR -o src/f32-igemm/gen/3x8-wasmsimd-loadsplat.c & 75tools/xngen src/f32-igemm/wasmsimd-loadsplat.c.in -D MR=4 -D NR=8 -D FMA=0 -D ACTIVATION=LINEAR -o src/f32-igemm/gen/4x8-wasmsimd-loadsplat.c & 76tools/xngen src/f32-igemm/wasmsimd-loadsplat.c.in -D MR=5 -D NR=8 -D FMA=0 -D ACTIVATION=LINEAR -o src/f32-igemm/gen/5x8-wasmsimd-loadsplat.c & 77tools/xngen src/f32-igemm/wasmsimd-loadsplat.c.in -D MR=6 -D NR=8 -D FMA=0 -D ACTIVATION=LINEAR -o src/f32-igemm/gen/6x8-wasmsimd-loadsplat.c & 78 79tools/xngen src/f32-igemm/wasmsimd-loadsplat.c.in -D MR=1 -D NR=8 -D FMA=1 -D ACTIVATION=LINEAR -D ARCH=RELAXED -o src/f32-igemm/gen/1x8-wasmrelaxedsimd-fma-loadsplat.c & 80tools/xngen src/f32-igemm/wasmsimd-loadsplat.c.in -D MR=3 -D NR=8 -D FMA=1 -D ACTIVATION=LINEAR -D ARCH=RELAXED -o src/f32-igemm/gen/3x8-wasmrelaxedsimd-fma-loadsplat.c & 81tools/xngen src/f32-igemm/wasmsimd-loadsplat.c.in -D MR=4 -D NR=8 -D FMA=1 -D ACTIVATION=LINEAR -D ARCH=RELAXED -o src/f32-igemm/gen/4x8-wasmrelaxedsimd-fma-loadsplat.c & 82tools/xngen src/f32-igemm/wasmsimd-loadsplat.c.in -D MR=5 -D NR=8 -D FMA=1 -D ACTIVATION=LINEAR -D ARCH=RELAXED -o src/f32-igemm/gen/5x8-wasmrelaxedsimd-fma-loadsplat.c & 83tools/xngen src/f32-igemm/wasmsimd-loadsplat.c.in -D MR=6 -D NR=8 -D FMA=1 -D ACTIVATION=LINEAR -D ARCH=RELAXED -o src/f32-igemm/gen/6x8-wasmrelaxedsimd-fma-loadsplat.c & 84 85### LOAD4+DUPLICATE micro-kernels 86tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=1 -D NR=8 -D FMA=0 -D ACTIVATION=MINMAX -D ARCH=ARM -o src/f32-igemm/gen/1x8-minmax-wasmsimd-arm-splat.c & 87tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=3 -D NR=8 -D FMA=0 -D ACTIVATION=MINMAX -D ARCH=ARM -o src/f32-igemm/gen/3x8-minmax-wasmsimd-arm-splat.c & 88tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=4 -D NR=8 -D FMA=0 -D ACTIVATION=MINMAX -D ARCH=ARM -o src/f32-igemm/gen/4x8-minmax-wasmsimd-arm-splat.c & 89tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=5 -D NR=8 -D FMA=0 -D ACTIVATION=MINMAX -D ARCH=ARM -o src/f32-igemm/gen/5x8-minmax-wasmsimd-arm-splat.c & 90tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=6 -D NR=8 -D FMA=0 -D ACTIVATION=MINMAX -D ARCH=ARM -o src/f32-igemm/gen/6x8-minmax-wasmsimd-arm-splat.c & 91 92tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=1 -D NR=8 -D FMA=0 -D ACTIVATION=MINMAX -D ARCH=X86 -o src/f32-igemm/gen/1x8-minmax-wasmsimd-x86-splat.c & 93tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=3 -D NR=8 -D FMA=0 -D ACTIVATION=MINMAX -D ARCH=X86 -o src/f32-igemm/gen/3x8-minmax-wasmsimd-x86-splat.c & 94tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=4 -D NR=8 -D FMA=0 -D ACTIVATION=MINMAX -D ARCH=X86 -o src/f32-igemm/gen/4x8-minmax-wasmsimd-x86-splat.c & 95tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=5 -D NR=8 -D FMA=0 -D ACTIVATION=MINMAX -D ARCH=X86 -o src/f32-igemm/gen/5x8-minmax-wasmsimd-x86-splat.c & 96tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=6 -D NR=8 -D FMA=0 -D ACTIVATION=MINMAX -D ARCH=X86 -o src/f32-igemm/gen/6x8-minmax-wasmsimd-x86-splat.c & 97 98tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=1 -D NR=8 -D FMA=0 -D ACTIVATION=MINMAX -D ARCH=RELAXED -o src/f32-igemm/gen/1x8-minmax-wasmrelaxedsimd-splat.c & 99tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=3 -D NR=8 -D FMA=0 -D ACTIVATION=MINMAX -D ARCH=RELAXED -o src/f32-igemm/gen/3x8-minmax-wasmrelaxedsimd-splat.c & 100tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=4 -D NR=8 -D FMA=0 -D ACTIVATION=MINMAX -D ARCH=RELAXED -o src/f32-igemm/gen/4x8-minmax-wasmrelaxedsimd-splat.c & 101tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=5 -D NR=8 -D FMA=0 -D ACTIVATION=MINMAX -D ARCH=RELAXED -o src/f32-igemm/gen/5x8-minmax-wasmrelaxedsimd-splat.c & 102tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=6 -D NR=8 -D FMA=0 -D ACTIVATION=MINMAX -D ARCH=RELAXED -o src/f32-igemm/gen/6x8-minmax-wasmrelaxedsimd-splat.c & 103 104tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=1 -D NR=8 -D FMA=1 -D ACTIVATION=MINMAX -D ARCH=RELAXED -o src/f32-igemm/gen/1x8-minmax-wasmrelaxedsimd-fma-splat.c & 105tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=3 -D NR=8 -D FMA=1 -D ACTIVATION=MINMAX -D ARCH=RELAXED -o src/f32-igemm/gen/3x8-minmax-wasmrelaxedsimd-fma-splat.c & 106tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=4 -D NR=8 -D FMA=1 -D ACTIVATION=MINMAX -D ARCH=RELAXED -o src/f32-igemm/gen/4x8-minmax-wasmrelaxedsimd-fma-splat.c & 107tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=5 -D NR=8 -D FMA=1 -D ACTIVATION=MINMAX -D ARCH=RELAXED -o src/f32-igemm/gen/5x8-minmax-wasmrelaxedsimd-fma-splat.c & 108tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=6 -D NR=8 -D FMA=1 -D ACTIVATION=MINMAX -D ARCH=RELAXED -o src/f32-igemm/gen/6x8-minmax-wasmrelaxedsimd-fma-splat.c & 109 110tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=1 -D NR=8 -D FMA=0 -D ACTIVATION=RELU -o src/f32-igemm/gen/1x8-relu-wasmsimd-splat.c & 111tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=3 -D NR=8 -D FMA=0 -D ACTIVATION=RELU -o src/f32-igemm/gen/3x8-relu-wasmsimd-splat.c & 112tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=4 -D NR=8 -D FMA=0 -D ACTIVATION=RELU -o src/f32-igemm/gen/4x8-relu-wasmsimd-splat.c & 113tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=5 -D NR=8 -D FMA=0 -D ACTIVATION=RELU -o src/f32-igemm/gen/5x8-relu-wasmsimd-splat.c & 114tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=6 -D NR=8 -D FMA=0 -D ACTIVATION=RELU -o src/f32-igemm/gen/6x8-relu-wasmsimd-splat.c & 115 116tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=1 -D NR=8 -D FMA=1 -D ACTIVATION=RELU -D ARCH=RELAXED -o src/f32-igemm/gen/1x8-relu-wasmrelaxedsimd-fma-splat.c & 117tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=3 -D NR=8 -D FMA=1 -D ACTIVATION=RELU -D ARCH=RELAXED -o src/f32-igemm/gen/3x8-relu-wasmrelaxedsimd-fma-splat.c & 118tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=4 -D NR=8 -D FMA=1 -D ACTIVATION=RELU -D ARCH=RELAXED -o src/f32-igemm/gen/4x8-relu-wasmrelaxedsimd-fma-splat.c & 119tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=5 -D NR=8 -D FMA=1 -D ACTIVATION=RELU -D ARCH=RELAXED -o src/f32-igemm/gen/5x8-relu-wasmrelaxedsimd-fma-splat.c & 120tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=6 -D NR=8 -D FMA=1 -D ACTIVATION=RELU -D ARCH=RELAXED -o src/f32-igemm/gen/6x8-relu-wasmrelaxedsimd-fma-splat.c & 121 122tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=1 -D NR=8 -D FMA=0 -D ACTIVATION=LINEAR -o src/f32-igemm/gen/1x8-wasmsimd-splat.c & 123tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=3 -D NR=8 -D FMA=0 -D ACTIVATION=LINEAR -o src/f32-igemm/gen/3x8-wasmsimd-splat.c & 124tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=4 -D NR=8 -D FMA=0 -D ACTIVATION=LINEAR -o src/f32-igemm/gen/4x8-wasmsimd-splat.c & 125tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=5 -D NR=8 -D FMA=0 -D ACTIVATION=LINEAR -o src/f32-igemm/gen/5x8-wasmsimd-splat.c & 126tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=6 -D NR=8 -D FMA=0 -D ACTIVATION=LINEAR -o src/f32-igemm/gen/6x8-wasmsimd-splat.c & 127 128tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=1 -D NR=8 -D FMA=1 -D ACTIVATION=LINEAR -D ARCH=RELAXED -o src/f32-igemm/gen/1x8-wasmrelaxedsimd-fma-splat.c & 129tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=3 -D NR=8 -D FMA=1 -D ACTIVATION=LINEAR -D ARCH=RELAXED -o src/f32-igemm/gen/3x8-wasmrelaxedsimd-fma-splat.c & 130tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=4 -D NR=8 -D FMA=1 -D ACTIVATION=LINEAR -D ARCH=RELAXED -o src/f32-igemm/gen/4x8-wasmrelaxedsimd-fma-splat.c & 131tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=5 -D NR=8 -D FMA=1 -D ACTIVATION=LINEAR -D ARCH=RELAXED -o src/f32-igemm/gen/5x8-wasmrelaxedsimd-fma-splat.c & 132tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=6 -D NR=8 -D FMA=1 -D ACTIVATION=LINEAR -D ARCH=RELAXED -o src/f32-igemm/gen/6x8-wasmrelaxedsimd-fma-splat.c & 133 134### LOAD4+PERMUTE micro-kernels 135tools/xngen src/f32-igemm/wasmsimd-s4.c.in -D MR=1 -D NR=8 -D FMA=0 -D ACTIVATION=MINMAX -D ARCH=ARM -o src/f32-igemm/gen/1x8s4-minmax-wasmsimd-arm.c & 136tools/xngen src/f32-igemm/wasmsimd-s4.c.in -D MR=3 -D NR=8 -D FMA=0 -D ACTIVATION=MINMAX -D ARCH=ARM -o src/f32-igemm/gen/3x8s4-minmax-wasmsimd-arm.c & 137tools/xngen src/f32-igemm/wasmsimd-s4.c.in -D MR=4 -D NR=8 -D FMA=0 -D ACTIVATION=MINMAX -D ARCH=ARM -o src/f32-igemm/gen/4x8s4-minmax-wasmsimd-arm.c & 138tools/xngen src/f32-igemm/wasmsimd-s4.c.in -D MR=5 -D NR=8 -D FMA=0 -D ACTIVATION=MINMAX -D ARCH=ARM -o src/f32-igemm/gen/5x8s4-minmax-wasmsimd-arm.c & 139tools/xngen src/f32-igemm/wasmsimd-s4.c.in -D MR=6 -D NR=8 -D FMA=0 -D ACTIVATION=MINMAX -D ARCH=ARM -o src/f32-igemm/gen/6x8s4-minmax-wasmsimd-arm.c & 140 141tools/xngen src/f32-igemm/wasmsimd-s4.c.in -D MR=1 -D NR=8 -D FMA=0 -D ACTIVATION=MINMAX -D ARCH=X86 -o src/f32-igemm/gen/1x8s4-minmax-wasmsimd-x86.c & 142tools/xngen src/f32-igemm/wasmsimd-s4.c.in -D MR=3 -D NR=8 -D FMA=0 -D ACTIVATION=MINMAX -D ARCH=X86 -o src/f32-igemm/gen/3x8s4-minmax-wasmsimd-x86.c & 143tools/xngen src/f32-igemm/wasmsimd-s4.c.in -D MR=4 -D NR=8 -D FMA=0 -D ACTIVATION=MINMAX -D ARCH=X86 -o src/f32-igemm/gen/4x8s4-minmax-wasmsimd-x86.c & 144tools/xngen src/f32-igemm/wasmsimd-s4.c.in -D MR=5 -D NR=8 -D FMA=0 -D ACTIVATION=MINMAX -D ARCH=X86 -o src/f32-igemm/gen/5x8s4-minmax-wasmsimd-x86.c & 145tools/xngen src/f32-igemm/wasmsimd-s4.c.in -D MR=6 -D NR=8 -D FMA=0 -D ACTIVATION=MINMAX -D ARCH=X86 -o src/f32-igemm/gen/6x8s4-minmax-wasmsimd-x86.c & 146 147tools/xngen src/f32-igemm/wasmsimd-s4.c.in -D MR=1 -D NR=8 -D FMA=0 -D ACTIVATION=MINMAX -D ARCH=RELAXED -o src/f32-igemm/gen/1x8s4-minmax-wasmrelaxedsimd.c & 148tools/xngen src/f32-igemm/wasmsimd-s4.c.in -D MR=3 -D NR=8 -D FMA=0 -D ACTIVATION=MINMAX -D ARCH=RELAXED -o src/f32-igemm/gen/3x8s4-minmax-wasmrelaxedsimd.c & 149tools/xngen src/f32-igemm/wasmsimd-s4.c.in -D MR=4 -D NR=8 -D FMA=0 -D ACTIVATION=MINMAX -D ARCH=RELAXED -o src/f32-igemm/gen/4x8s4-minmax-wasmrelaxedsimd.c & 150tools/xngen src/f32-igemm/wasmsimd-s4.c.in -D MR=5 -D NR=8 -D FMA=0 -D ACTIVATION=MINMAX -D ARCH=RELAXED -o src/f32-igemm/gen/5x8s4-minmax-wasmrelaxedsimd.c & 151tools/xngen src/f32-igemm/wasmsimd-s4.c.in -D MR=6 -D NR=8 -D FMA=0 -D ACTIVATION=MINMAX -D ARCH=RELAXED -o src/f32-igemm/gen/6x8s4-minmax-wasmrelaxedsimd.c & 152 153tools/xngen src/f32-igemm/wasmsimd-s4.c.in -D MR=1 -D NR=8 -D FMA=1 -D ACTIVATION=MINMAX -D ARCH=RELAXED -o src/f32-igemm/gen/1x8s4-minmax-wasmrelaxedsimd-fma.c & 154tools/xngen src/f32-igemm/wasmsimd-s4.c.in -D MR=3 -D NR=8 -D FMA=1 -D ACTIVATION=MINMAX -D ARCH=RELAXED -o src/f32-igemm/gen/3x8s4-minmax-wasmrelaxedsimd-fma.c & 155tools/xngen src/f32-igemm/wasmsimd-s4.c.in -D MR=4 -D NR=8 -D FMA=1 -D ACTIVATION=MINMAX -D ARCH=RELAXED -o src/f32-igemm/gen/4x8s4-minmax-wasmrelaxedsimd-fma.c & 156tools/xngen src/f32-igemm/wasmsimd-s4.c.in -D MR=5 -D NR=8 -D FMA=1 -D ACTIVATION=MINMAX -D ARCH=RELAXED -o src/f32-igemm/gen/5x8s4-minmax-wasmrelaxedsimd-fma.c & 157tools/xngen src/f32-igemm/wasmsimd-s4.c.in -D MR=6 -D NR=8 -D FMA=1 -D ACTIVATION=MINMAX -D ARCH=RELAXED -o src/f32-igemm/gen/6x8s4-minmax-wasmrelaxedsimd-fma.c & 158 159tools/xngen src/f32-igemm/wasmsimd-s4.c.in -D MR=1 -D NR=8 -D FMA=0 -D ACTIVATION=RELU -o src/f32-igemm/gen/1x8s4-relu-wasmsimd.c & 160tools/xngen src/f32-igemm/wasmsimd-s4.c.in -D MR=3 -D NR=8 -D FMA=0 -D ACTIVATION=RELU -o src/f32-igemm/gen/3x8s4-relu-wasmsimd.c & 161tools/xngen src/f32-igemm/wasmsimd-s4.c.in -D MR=4 -D NR=8 -D FMA=0 -D ACTIVATION=RELU -o src/f32-igemm/gen/4x8s4-relu-wasmsimd.c & 162tools/xngen src/f32-igemm/wasmsimd-s4.c.in -D MR=5 -D NR=8 -D FMA=0 -D ACTIVATION=RELU -o src/f32-igemm/gen/5x8s4-relu-wasmsimd.c & 163tools/xngen src/f32-igemm/wasmsimd-s4.c.in -D MR=6 -D NR=8 -D FMA=0 -D ACTIVATION=RELU -o src/f32-igemm/gen/6x8s4-relu-wasmsimd.c & 164 165tools/xngen src/f32-igemm/wasmsimd-s4.c.in -D MR=1 -D NR=8 -D FMA=1 -D ACTIVATION=RELU -D ARCH=RELAXED -o src/f32-igemm/gen/1x8s4-relu-wasmrelaxedsimd-fma.c & 166tools/xngen src/f32-igemm/wasmsimd-s4.c.in -D MR=3 -D NR=8 -D FMA=1 -D ACTIVATION=RELU -D ARCH=RELAXED -o src/f32-igemm/gen/3x8s4-relu-wasmrelaxedsimd-fma.c & 167tools/xngen src/f32-igemm/wasmsimd-s4.c.in -D MR=4 -D NR=8 -D FMA=1 -D ACTIVATION=RELU -D ARCH=RELAXED -o src/f32-igemm/gen/4x8s4-relu-wasmrelaxedsimd-fma.c & 168tools/xngen src/f32-igemm/wasmsimd-s4.c.in -D MR=5 -D NR=8 -D FMA=1 -D ACTIVATION=RELU -D ARCH=RELAXED -o src/f32-igemm/gen/5x8s4-relu-wasmrelaxedsimd-fma.c & 169tools/xngen src/f32-igemm/wasmsimd-s4.c.in -D MR=6 -D NR=8 -D FMA=1 -D ACTIVATION=RELU -D ARCH=RELAXED -o src/f32-igemm/gen/6x8s4-relu-wasmrelaxedsimd-fma.c & 170 171tools/xngen src/f32-igemm/wasmsimd-s4.c.in -D MR=1 -D NR=8 -D FMA=0 -D ACTIVATION=LINEAR -o src/f32-igemm/gen/1x8s4-wasmsimd.c & 172tools/xngen src/f32-igemm/wasmsimd-s4.c.in -D MR=3 -D NR=8 -D FMA=0 -D ACTIVATION=LINEAR -o src/f32-igemm/gen/3x8s4-wasmsimd.c & 173tools/xngen src/f32-igemm/wasmsimd-s4.c.in -D MR=4 -D NR=8 -D FMA=0 -D ACTIVATION=LINEAR -o src/f32-igemm/gen/4x8s4-wasmsimd.c & 174tools/xngen src/f32-igemm/wasmsimd-s4.c.in -D MR=5 -D NR=8 -D FMA=0 -D ACTIVATION=LINEAR -o src/f32-igemm/gen/5x8s4-wasmsimd.c & 175tools/xngen src/f32-igemm/wasmsimd-s4.c.in -D MR=6 -D NR=8 -D FMA=0 -D ACTIVATION=LINEAR -o src/f32-igemm/gen/6x8s4-wasmsimd.c & 176 177tools/xngen src/f32-igemm/wasmsimd-s4.c.in -D MR=1 -D NR=8 -D FMA=1 -D ACTIVATION=LINEAR -D ARCH=RELAXED -o src/f32-igemm/gen/1x8s4-wasmrelaxedsimd-fma.c & 178tools/xngen src/f32-igemm/wasmsimd-s4.c.in -D MR=3 -D NR=8 -D FMA=1 -D ACTIVATION=LINEAR -D ARCH=RELAXED -o src/f32-igemm/gen/3x8s4-wasmrelaxedsimd-fma.c & 179tools/xngen src/f32-igemm/wasmsimd-s4.c.in -D MR=4 -D NR=8 -D FMA=1 -D ACTIVATION=LINEAR -D ARCH=RELAXED -o src/f32-igemm/gen/4x8s4-wasmrelaxedsimd-fma.c & 180tools/xngen src/f32-igemm/wasmsimd-s4.c.in -D MR=5 -D NR=8 -D FMA=1 -D ACTIVATION=LINEAR -D ARCH=RELAXED -o src/f32-igemm/gen/5x8s4-wasmrelaxedsimd-fma.c & 181tools/xngen src/f32-igemm/wasmsimd-s4.c.in -D MR=6 -D NR=8 -D FMA=1 -D ACTIVATION=LINEAR -D ARCH=RELAXED -o src/f32-igemm/gen/6x8s4-wasmrelaxedsimd-fma.c & 182 183### MRx2 micro-kernels 184tools/xngen src/f32-igemm/MRx2c4-wasmsimd.c.in -D MR=4 -D NR=2 -D FMA=0 -D ACTIVATION=MINMAX -D ARCH=ARM -o src/f32-igemm/gen/4x2c4-minmax-wasmsimd-arm.c & 185tools/xngen src/f32-igemm/MRx2c4-wasmsimd.c.in -D MR=4 -D NR=2 -D FMA=0 -D ACTIVATION=MINMAX -D ARCH=X86 -o src/f32-igemm/gen/4x2c4-minmax-wasmsimd-x86.c & 186tools/xngen src/f32-igemm/MRx2c4-wasmsimd.c.in -D MR=4 -D NR=2 -D FMA=0 -D ACTIVATION=MINMAX -D ARCH=RELAXED -o src/f32-igemm/gen/4x2c4-minmax-wasmrelaxedsimd.c & 187tools/xngen src/f32-igemm/MRx2c4-wasmsimd.c.in -D MR=4 -D NR=2 -D FMA=1 -D ACTIVATION=MINMAX -D ARCH=RELAXED -o src/f32-igemm/gen/4x2c4-minmax-wasmrelaxedsimd-fma.c & 188tools/xngen src/f32-igemm/MRx2c4-wasmsimd.c.in -D MR=4 -D NR=2 -D FMA=0 -D ACTIVATION=RELU -o src/f32-igemm/gen/4x2c4-relu-wasmsimd.c & 189tools/xngen src/f32-igemm/MRx2c4-wasmsimd.c.in -D MR=4 -D NR=2 -D FMA=1 -D ACTIVATION=RELU -D ARCH=RELAXED -o src/f32-igemm/gen/4x2c4-relu-wasmrelaxedsimd-fma.c & 190tools/xngen src/f32-igemm/MRx2c4-wasmsimd.c.in -D MR=4 -D NR=2 -D FMA=0 -D ACTIVATION=LINEAR -o src/f32-igemm/gen/4x2c4-wasmsimd.c & 191tools/xngen src/f32-igemm/MRx2c4-wasmsimd.c.in -D MR=4 -D NR=2 -D FMA=1 -D ACTIVATION=LINEAR -D ARCH=RELAXED -o src/f32-igemm/gen/4x2c4-wasmrelaxedsimd-fma.c & 192 193############################### AArch64 assembly ############################## 194### LD64 micro-kernels 195tools/xngen src/f32-igemm/4x8-aarch64-neonfma-ld64.S.in -o src/f32-igemm/gen/4x8-minmax-aarch64-neonfma-ld64.S & 196tools/xngen src/f32-igemm/6x8-aarch64-neonfma-ld64.S.in -o src/f32-igemm/gen/6x8-minmax-aarch64-neonfma-ld64.S & 197 198### LD128 micro-kernels 199tools/xngen src/f32-igemm/4x8-aarch64-neonfma-ld128.S.in -o src/f32-igemm/gen/4x8-minmax-aarch64-neonfma-ld128.S & 200tools/xngen src/f32-igemm/6x8-aarch64-neonfma-ld128.S.in -o src/f32-igemm/gen/6x8-minmax-aarch64-neonfma-ld128.S & 201 202### MRx2 micro-kernels 203tools/xngen src/f32-igemm/4x2-aarch64-neonfma-ld64.S.in -o src/f32-igemm/gen/4x2-minmax-aarch64-neonfma-ld64.S & 204tools/xngen src/f32-igemm/4x2-aarch64-neonfma-cortex-a75.S.in -D PREFETCH=0 -o src/f32-igemm/gen/4x2-minmax-aarch64-neonfma-cortex-a75.S & 205tools/xngen src/f32-igemm/4x2-aarch64-neonfma-cortex-a75.S.in -D PREFETCH=1 -o src/f32-igemm/gen/4x2-minmax-aarch64-neonfma-prfm-cortex-a75.S & 206 207### Cortex A53 micro-kernels 208tools/xngen src/f32-igemm/1x8-aarch64-neonfma-cortex-a53.S.in -D PREFETCH=0 -o src/f32-igemm/gen/1x8-minmax-aarch64-neonfma-cortex-a53.S & 209tools/xngen src/f32-igemm/1x8-aarch64-neonfma-cortex-a53.S.in -D PREFETCH=1 -o src/f32-igemm/gen/1x8-minmax-aarch64-neonfma-prfm-cortex-a53.S & 210tools/xngen src/f32-igemm/4x8-aarch64-neonfma-cortex-a53.S.in -D PREFETCH=0 -o src/f32-igemm/gen/4x8-minmax-aarch64-neonfma-cortex-a53.S & 211tools/xngen src/f32-igemm/4x8-aarch64-neonfma-cortex-a53.S.in -D PREFETCH=1 -o src/f32-igemm/gen/4x8-minmax-aarch64-neonfma-prfm-cortex-a53.S & 212tools/xngen src/f32-igemm/6x8-aarch64-neonfma-cortex-a53.S.in -D PREFETCH=0 -o src/f32-igemm/gen/6x8-minmax-aarch64-neonfma-cortex-a53.S & 213tools/xngen src/f32-igemm/6x8-aarch64-neonfma-cortex-a53.S.in -D PREFETCH=1 -o src/f32-igemm/gen/6x8-minmax-aarch64-neonfma-prfm-cortex-a53.S & 214 215### Cortex A75 micro-kernels 216tools/xngen src/f32-igemm/1x8-aarch64-neonfma-cortex-a75.S.in -D PREFETCH=0 -o src/f32-igemm/gen/1x8-minmax-aarch64-neonfma-cortex-a75.S & 217tools/xngen src/f32-igemm/1x8-aarch64-neonfma-cortex-a75.S.in -D PREFETCH=1 -o src/f32-igemm/gen/1x8-minmax-aarch64-neonfma-prfm-cortex-a75.S & 218tools/xngen src/f32-igemm/4x8-aarch64-neonfma-cortex-a75.S.in -D PREFETCH=0 -o src/f32-igemm/gen/4x8-minmax-aarch64-neonfma-cortex-a75.S & 219tools/xngen src/f32-igemm/4x8-aarch64-neonfma-cortex-a75.S.in -D PREFETCH=1 -o src/f32-igemm/gen/4x8-minmax-aarch64-neonfma-prfm-cortex-a75.S & 220tools/xngen src/f32-igemm/5x8-aarch64-neonfma-cortex-a75.S.in -D PREFETCH=0 -o src/f32-igemm/gen/5x8-minmax-aarch64-neonfma-cortex-a75.S & 221tools/xngen src/f32-igemm/5x8-aarch64-neonfma-cortex-a75.S.in -D PREFETCH=1 -o src/f32-igemm/gen/5x8-minmax-aarch64-neonfma-prfm-cortex-a75.S & 222tools/xngen src/f32-igemm/6x8-aarch64-neonfma-cortex-a75.S.in -D PREFETCH=0 -o src/f32-igemm/gen/6x8-minmax-aarch64-neonfma-cortex-a75.S & 223tools/xngen src/f32-igemm/6x8-aarch64-neonfma-cortex-a75.S.in -D PREFETCH=1 -o src/f32-igemm/gen/6x8-minmax-aarch64-neonfma-prfm-cortex-a75.S & 224 225############################### AArch32 assembly ############################## 226tools/xngen src/f32-igemm/4x8-aarch32-neon-cortex-a53.S.in -D PREFETCH=0 -o src/f32-igemm/gen/4x8-minmax-aarch32-neon-cortex-a53.S & 227tools/xngen src/f32-igemm/4x8-aarch32-neon-cortex-a53.S.in -D PREFETCH=1 -o src/f32-igemm/gen/4x8-minmax-aarch32-neon-prfm-cortex-a53.S & 228tools/xngen src/f32-igemm/4x8-aarch32-neon-cortex-a75.S.in -D PREFETCH=0 -o src/f32-igemm/gen/4x8-minmax-aarch32-neon-cortex-a75.S & 229tools/xngen src/f32-igemm/4x8-aarch32-neon-cortex-a75.S.in -D PREFETCH=1 -o src/f32-igemm/gen/4x8-minmax-aarch32-neon-prfm-cortex-a75.S & 230tools/xngen src/f32-igemm/4x8-minmax-aarch32-neon-cortex-a7.S.in -D PREFETCH=1 -o src/f32-igemm/gen/4x8-minmax-aarch32-neon-cortex-a7.S & 231tools/xngen src/f32-igemm/4x8-minmax-aarch32-neon-ld64.S.in -D PREFETCH=0 -o src/f32-igemm/gen/4x8-minmax-aarch32-neon-ld64.S & 232 233################################### ARM NEON ################################## 234### LD64 micro-kernels 235tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=1 -D NR=8 -D FMA=0 -D DUP=0 -o src/f32-igemm/gen/1x8-minmax-neon-lane-ld64.c & 236tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=1 -D NR=8 -D FMA=1 -D DUP=0 -o src/f32-igemm/gen/1x8-minmax-neonfma-lane-ld64.c & 237tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=4 -D NR=4 -D FMA=0 -D DUP=0 -o src/f32-igemm/gen/4x4-minmax-neon-lane-ld64.c & 238tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=4 -D NR=4 -D FMA=1 -D DUP=0 -o src/f32-igemm/gen/4x4-minmax-neonfma-lane-ld64.c & 239tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=4 -D NR=8 -D FMA=0 -D DUP=0 -o src/f32-igemm/gen/4x8-minmax-neon-lane-ld64.c & 240tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=4 -D NR=8 -D FMA=1 -D DUP=0 -o src/f32-igemm/gen/4x8-minmax-neonfma-lane-ld64.c & 241tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=6 -D NR=8 -D FMA=0 -D DUP=0 -o src/f32-igemm/gen/6x8-minmax-neon-lane-ld64.c & 242tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=6 -D NR=8 -D FMA=1 -D DUP=0 -o src/f32-igemm/gen/6x8-minmax-neonfma-lane-ld64.c & 243### LD128 micro-kernels 244tools/xngen src/f32-igemm/neon-ld128.c.in -D MR=4 -D NR=8 -D FMA=0 -D DUP=0 -o src/f32-igemm/gen/4x8-minmax-neon-lane-ld128.c & 245tools/xngen src/f32-igemm/neon-ld128.c.in -D MR=4 -D NR=8 -D FMA=1 -D DUP=0 -o src/f32-igemm/gen/4x8-minmax-neonfma-lane-ld128.c & 246tools/xngen src/f32-igemm/neon-ld128.c.in -D MR=6 -D NR=8 -D FMA=0 -D DUP=0 -o src/f32-igemm/gen/6x8-minmax-neon-lane-ld128.c & 247tools/xngen src/f32-igemm/neon-ld128.c.in -D MR=6 -D NR=8 -D FMA=1 -D DUP=0 -o src/f32-igemm/gen/6x8-minmax-neonfma-lane-ld128.c & 248### MRx2 micro-kernels 249tools/xngen src/f32-igemm/MRx2-neon-ld64.c.in -D MR=4 -D NR=2 -D FMA=0 -D DUP=0 -o src/f32-igemm/gen/4x2-minmax-neon-lane-ld64.c & 250tools/xngen src/f32-igemm/MRx2-neon-ld64.c.in -D MR=4 -D NR=2 -D FMA=1 -D DUP=0 -o src/f32-igemm/gen/4x2-minmax-neonfma-lane-ld64.c & 251tools/xngen src/f32-igemm/MRx2-neon-ld64.c.in -D MR=6 -D NR=2 -D FMA=0 -D DUP=0 -o src/f32-igemm/gen/6x2-minmax-neon-lane-ld64.c & 252tools/xngen src/f32-igemm/MRx2-neon-ld64.c.in -D MR=6 -D NR=2 -D FMA=1 -D DUP=0 -o src/f32-igemm/gen/6x2-minmax-neonfma-lane-ld64.c & 253### DUP LD64 micro-kernels 254tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=1 -D NR=8 -D FMA=0 -D DUP=1 -o src/f32-igemm/gen/1x8-minmax-neon-dup-ld64.c & 255tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=1 -D NR=8 -D FMA=1 -D DUP=1 -o src/f32-igemm/gen/1x8-minmax-neonfma-dup-ld64.c & 256tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=4 -D NR=8 -D FMA=0 -D DUP=1 -o src/f32-igemm/gen/4x8-minmax-neon-dup-ld64.c & 257tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=4 -D NR=8 -D FMA=1 -D DUP=1 -o src/f32-igemm/gen/4x8-minmax-neonfma-dup-ld64.c & 258tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=6 -D NR=8 -D FMA=0 -D DUP=1 -o src/f32-igemm/gen/6x8-minmax-neon-dup-ld64.c & 259tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=6 -D NR=8 -D FMA=1 -D DUP=1 -o src/f32-igemm/gen/6x8-minmax-neonfma-dup-ld64.c & 260### DUP LD128 micro-kernels 261tools/xngen src/f32-igemm/neon-ld128.c.in -D MR=4 -D NR=8 -D FMA=0 -D DUP=1 -o src/f32-igemm/gen/4x8-minmax-neon-dup-ld128.c & 262tools/xngen src/f32-igemm/neon-ld128.c.in -D MR=4 -D NR=8 -D FMA=1 -D DUP=1 -o src/f32-igemm/gen/4x8-minmax-neonfma-dup-ld128.c & 263tools/xngen src/f32-igemm/neon-ld128.c.in -D MR=6 -D NR=8 -D FMA=0 -D DUP=1 -o src/f32-igemm/gen/6x8-minmax-neon-dup-ld128.c & 264tools/xngen src/f32-igemm/neon-ld128.c.in -D MR=6 -D NR=8 -D FMA=1 -D DUP=1 -o src/f32-igemm/gen/6x8-minmax-neonfma-dup-ld128.c & 265### LOAD4+PERMUTE micro-kernels 266tools/xngen src/f32-igemm/neon-shuffle.c.in -D MR=1 -D NR=8 -D FMA=0 -o src/f32-igemm/gen/1x8s4-minmax-neon.c & 267tools/xngen src/f32-igemm/neon-shuffle.c.in -D MR=1 -D NR=8 -D FMA=1 -o src/f32-igemm/gen/1x8s4-minmax-neonfma.c & 268tools/xngen src/f32-igemm/neon-shuffle.c.in -D MR=4 -D NR=8 -D FMA=0 -o src/f32-igemm/gen/4x8s4-minmax-neon.c & 269tools/xngen src/f32-igemm/neon-shuffle.c.in -D MR=4 -D NR=8 -D FMA=1 -o src/f32-igemm/gen/4x8s4-minmax-neonfma.c & 270tools/xngen src/f32-igemm/neon-shuffle.c.in -D MR=6 -D NR=8 -D FMA=0 -o src/f32-igemm/gen/6x8s4-minmax-neon.c & 271tools/xngen src/f32-igemm/neon-shuffle.c.in -D MR=6 -D NR=8 -D FMA=1 -o src/f32-igemm/gen/6x8s4-minmax-neonfma.c & 272tools/xngen src/f32-igemm/neon-shuffle.c.in -D MR=8 -D NR=8 -D FMA=0 -o src/f32-igemm/gen/8x8s4-minmax-neon.c & 273tools/xngen src/f32-igemm/neon-shuffle.c.in -D MR=8 -D NR=8 -D FMA=1 -o src/f32-igemm/gen/8x8s4-minmax-neonfma.c & 274 275################################### x86 SSE ################################### 276### LOAD1+BROADCAST micro-kernels 277tools/xngen src/f32-igemm/sse-load1.c.in -D MR=1 -D NR=8 -o src/f32-igemm/gen/1x8-minmax-sse-load1.c & 278tools/xngen src/f32-igemm/sse-load1.c.in -D MR=3 -D NR=8 -o src/f32-igemm/gen/3x8-minmax-sse-load1.c & 279tools/xngen src/f32-igemm/sse-load1.c.in -D MR=4 -D NR=8 -o src/f32-igemm/gen/4x8-minmax-sse-load1.c & 280tools/xngen src/f32-igemm/sse-load1.c.in -D MR=5 -D NR=8 -o src/f32-igemm/gen/5x8-minmax-sse-load1.c & 281 282### LOAD4+DUPLICATE micro-kernels 283tools/xngen src/f32-igemm/sse-dup.c.in -D MR=1 -D NR=8 -D SSE=1 -o src/f32-igemm/gen/1x8-minmax-sse-dup.c & 284tools/xngen src/f32-igemm/sse-dup.c.in -D MR=3 -D NR=8 -D SSE=1 -o src/f32-igemm/gen/3x8-minmax-sse-dup.c & 285tools/xngen src/f32-igemm/sse-dup.c.in -D MR=4 -D NR=8 -D SSE=1 -o src/f32-igemm/gen/4x8-minmax-sse-dup.c & 286tools/xngen src/f32-igemm/sse-dup.c.in -D MR=5 -D NR=8 -D SSE=1 -o src/f32-igemm/gen/5x8-minmax-sse-dup.c & 287 288tools/xngen src/f32-igemm/sse-dup.c.in -D MR=1 -D NR=8 -D SSE=2 -o src/f32-igemm/gen/1x8-minmax-sse2-dup.c & 289tools/xngen src/f32-igemm/sse-dup.c.in -D MR=3 -D NR=8 -D SSE=2 -o src/f32-igemm/gen/3x8-minmax-sse2-dup.c & 290tools/xngen src/f32-igemm/sse-dup.c.in -D MR=4 -D NR=8 -D SSE=2 -o src/f32-igemm/gen/4x8-minmax-sse2-dup.c & 291tools/xngen src/f32-igemm/sse-dup.c.in -D MR=5 -D NR=8 -D SSE=2 -o src/f32-igemm/gen/5x8-minmax-sse2-dup.c & 292 293### LOAD4+PERMUTE micro-kernels 294tools/xngen src/f32-igemm/sse-shuffle.c.in -D MR=1 -D NR=8 -o src/f32-igemm/gen/1x8s4-minmax-sse.c & 295tools/xngen src/f32-igemm/sse-shuffle.c.in -D MR=3 -D NR=8 -o src/f32-igemm/gen/3x8s4-minmax-sse.c & 296tools/xngen src/f32-igemm/sse-shuffle.c.in -D MR=4 -D NR=8 -o src/f32-igemm/gen/4x8s4-minmax-sse.c & 297tools/xngen src/f32-igemm/sse-shuffle.c.in -D MR=5 -D NR=8 -o src/f32-igemm/gen/5x8s4-minmax-sse.c & 298 299### MRx2 micro-kernels 300tools/xngen src/f32-igemm/MRx2c4-sse.c.in -D MR=4 -D NR=2 -o src/f32-igemm/gen/4x2c4-minmax-sse.c & 301 302################################### x86 AVX ################################### 303### AVX+BROADCAST micro-kernels 304tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=1 -D NR=8 -D FMA=0 -o src/f32-igemm/gen/1x8-minmax-avx-broadcast.c & 305tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=4 -D NR=8 -D FMA=0 -o src/f32-igemm/gen/4x8-minmax-avx-broadcast.c & 306tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=5 -D NR=8 -D FMA=0 -o src/f32-igemm/gen/5x8-minmax-avx-broadcast.c & 307tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=6 -D NR=8 -D FMA=0 -o src/f32-igemm/gen/6x8-minmax-avx-broadcast.c & 308tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=7 -D NR=8 -D FMA=0 -o src/f32-igemm/gen/7x8-minmax-avx-broadcast.c & 309tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=1 -D NR=16 -D FMA=0 -o src/f32-igemm/gen/1x16-minmax-avx-broadcast.c & 310tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=3 -D NR=16 -D FMA=0 -o src/f32-igemm/gen/3x16-minmax-avx-broadcast.c & 311tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=4 -D NR=16 -D FMA=0 -o src/f32-igemm/gen/4x16-minmax-avx-broadcast.c & 312tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=5 -D NR=16 -D FMA=0 -o src/f32-igemm/gen/5x16-minmax-avx-broadcast.c & 313### FMA3+BROADCAST micro-kernels 314tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=1 -D NR=8 -D FMA=3 -o src/f32-igemm/gen/1x8-minmax-fma3-broadcast.c & 315tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=4 -D NR=8 -D FMA=3 -o src/f32-igemm/gen/4x8-minmax-fma3-broadcast.c & 316tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=5 -D NR=8 -D FMA=3 -o src/f32-igemm/gen/5x8-minmax-fma3-broadcast.c & 317tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=6 -D NR=8 -D FMA=3 -o src/f32-igemm/gen/6x8-minmax-fma3-broadcast.c & 318tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=7 -D NR=8 -D FMA=3 -o src/f32-igemm/gen/7x8-minmax-fma3-broadcast.c & 319tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=8 -D NR=8 -D FMA=3 -o src/f32-igemm/gen/8x8-minmax-fma3-broadcast.c & 320tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=1 -D NR=16 -D FMA=3 -o src/f32-igemm/gen/1x16-minmax-fma3-broadcast.c & 321tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=3 -D NR=16 -D FMA=3 -o src/f32-igemm/gen/3x16-minmax-fma3-broadcast.c & 322tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=4 -D NR=16 -D FMA=3 -o src/f32-igemm/gen/4x16-minmax-fma3-broadcast.c & 323tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=5 -D NR=16 -D FMA=3 -o src/f32-igemm/gen/5x16-minmax-fma3-broadcast.c & 324 325tools/xngen src/f32-igemm/avx-shuffle4.c.in -D MR=1 -D NR=16 -D FMA=3 -o src/f32-igemm/gen/1x16s4-minmax-fma3-broadcast.c & 326tools/xngen src/f32-igemm/avx-shuffle4.c.in -D MR=3 -D NR=16 -D FMA=3 -o src/f32-igemm/gen/3x16s4-minmax-fma3-broadcast.c & 327tools/xngen src/f32-igemm/avx-shuffle4.c.in -D MR=4 -D NR=16 -D FMA=3 -o src/f32-igemm/gen/4x16s4-minmax-fma3-broadcast.c & 328tools/xngen src/f32-igemm/avx-shuffle4.c.in -D MR=5 -D NR=16 -D FMA=3 -o src/f32-igemm/gen/5x16s4-minmax-fma3-broadcast.c & 329 330################################# x86 AVX-512 ################################# 331### AVX512F+BROADCAST micro-kernels 332tools/xngen src/f32-igemm/avx512-broadcast.c.in -D MR=1 -D NR=16 -o src/f32-igemm/gen/1x16-minmax-avx512f-broadcast.c & 333tools/xngen src/f32-igemm/avx512-broadcast.c.in -D MR=4 -D NR=16 -o src/f32-igemm/gen/4x16-minmax-avx512f-broadcast.c & 334tools/xngen src/f32-igemm/avx512-broadcast.c.in -D MR=5 -D NR=16 -o src/f32-igemm/gen/5x16-minmax-avx512f-broadcast.c & 335tools/xngen src/f32-igemm/avx512-broadcast.c.in -D MR=6 -D NR=16 -o src/f32-igemm/gen/6x16-minmax-avx512f-broadcast.c & 336tools/xngen src/f32-igemm/avx512-broadcast.c.in -D MR=7 -D NR=16 -o src/f32-igemm/gen/7x16-minmax-avx512f-broadcast.c & 337tools/xngen src/f32-igemm/avx512-broadcast.c.in -D MR=8 -D NR=16 -o src/f32-igemm/gen/8x16-minmax-avx512f-broadcast.c & 338 339################################## Unit tests ################################# 340tools/generate-gemm-test.py --spec test/f32-igemm.yaml --output test/f32-igemm.cc --output test/f32-igemm-2.cc & 341tools/generate-gemm-test.py --spec test/f32-igemm-relu.yaml --output test/f32-igemm-relu.cc --output test/f32-igemm-relu-2.cc & 342tools/generate-gemm-test.py --spec test/f32-igemm-minmax.yaml --output test/f32-igemm-minmax.cc --output test/f32-igemm-minmax-2.cc & 343 344wait 345