xref: /aosp_15_r20/external/mesa3d/src/intel/executor/examples/dp4a.lua (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1--[[
2
3Execute the example from the Dot Product 4 Accumulate
4instruction as seen in the PRM.
5
6    mov (1) r1.0:d 0x0102037F:d
7    // (char4)(0x1,0x2,0x3,0x7F)
8    mov (1) r2.0:d 50:d
9    dp4a (1) r3.0:d r2:d r1:d r1:d
10    // r3.0 = 50 + (0x1*0x1 + 0x2*0x2 + 0x3*0x3 + 0x7F*0x7F)
11    // = 50 + (1 + 4 + 9 + 16129)
12    // = 16193
13
14--]]
15
16check_ver(12)
17
18function DP4A(a, b, c)
19  local r = c
20  for i = 1, 4 do
21    r = r + a[i] * b[i]
22  end
23  return r
24end
25
26local r = execute {
27  src = [[
28    @id   g9
29
30    @mov  g1  0x0102037F
31    @mov  g2  50
32
33    dp4a(8)  g3<1>UD  g2<8,8,1>UD  g1<8,8,1>UD  g1<8,8,1>UD  { align1 @1 1Q };
34
35    @write g9 g3
36    @eot
37  ]],
38}
39
40print("expected", DP4A({1,2,3,0x7F}, {1,2,3,0x7F}, 50))
41print("calculated", r[0])
42