--[[ Execute the example from the Dot Product 4 Accumulate instruction as seen in the PRM. mov (1) r1.0:d 0x0102037F:d // (char4)(0x1,0x2,0x3,0x7F) mov (1) r2.0:d 50:d dp4a (1) r3.0:d r2:d r1:d r1:d // r3.0 = 50 + (0x1*0x1 + 0x2*0x2 + 0x3*0x3 + 0x7F*0x7F) // = 50 + (1 + 4 + 9 + 16129) // = 16193 --]] check_ver(12) function DP4A(a, b, c) local r = c for i = 1, 4 do r = r + a[i] * b[i] end return r end local r = execute { src = [[ @id g9 @mov g1 0x0102037F @mov g2 50 dp4a(8) g3<1>UD g2<8,8,1>UD g1<8,8,1>UD g1<8,8,1>UD { align1 @1 1Q }; @write g9 g3 @eot ]], } print("expected", DP4A({1,2,3,0x7F}, {1,2,3,0x7F}, 50)) print("calculated", r[0])