xref: /aosp_15_r20/external/mesa3d/src/microsoft/clc/compute_test.cpp (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © Microsoft Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include <stdio.h>
25 #include <stdint.h>
26 #include <stdexcept>
27 
28 #include <unknwn.h>
29 #include <directx/d3d12.h>
30 #include <dxgi1_4.h>
31 #include <gtest/gtest.h>
32 #include <wrl.h>
33 #include <dxguids/dxguids.h>
34 
35 #include "util/u_debug.h"
36 #include "clc_compiler.h"
37 #include "compute_test.h"
38 #include "dxil_validator.h"
39 
40 #include <spirv-tools/libspirv.hpp>
41 
42 #if (defined(_WIN32) && defined(_MSC_VER))
43 inline D3D12_CPU_DESCRIPTOR_HANDLE
GetCPUDescriptorHandleForHeapStart(ID3D12DescriptorHeap * heap)44 GetCPUDescriptorHandleForHeapStart(ID3D12DescriptorHeap *heap)
45 {
46    return heap->GetCPUDescriptorHandleForHeapStart();
47 }
48 inline D3D12_GPU_DESCRIPTOR_HANDLE
GetGPUDescriptorHandleForHeapStart(ID3D12DescriptorHeap * heap)49 GetGPUDescriptorHandleForHeapStart(ID3D12DescriptorHeap *heap)
50 {
51    return heap->GetGPUDescriptorHandleForHeapStart();
52 }
53 inline D3D12_HEAP_PROPERTIES
GetCustomHeapProperties(ID3D12Device * dev,D3D12_HEAP_TYPE type)54 GetCustomHeapProperties(ID3D12Device *dev, D3D12_HEAP_TYPE type)
55 {
56    return dev->GetCustomHeapProperties(0, type);
57 }
58 #else
59 inline D3D12_CPU_DESCRIPTOR_HANDLE
GetCPUDescriptorHandleForHeapStart(ID3D12DescriptorHeap * heap)60 GetCPUDescriptorHandleForHeapStart(ID3D12DescriptorHeap *heap)
61 {
62    D3D12_CPU_DESCRIPTOR_HANDLE ret;
63    heap->GetCPUDescriptorHandleForHeapStart(&ret);
64    return ret;
65 }
66 inline D3D12_GPU_DESCRIPTOR_HANDLE
GetGPUDescriptorHandleForHeapStart(ID3D12DescriptorHeap * heap)67 GetGPUDescriptorHandleForHeapStart(ID3D12DescriptorHeap *heap)
68 {
69    D3D12_GPU_DESCRIPTOR_HANDLE ret;
70    heap->GetGPUDescriptorHandleForHeapStart(&ret);
71    return ret;
72 }
73 inline D3D12_HEAP_PROPERTIES
GetCustomHeapProperties(ID3D12Device * dev,D3D12_HEAP_TYPE type)74 GetCustomHeapProperties(ID3D12Device *dev, D3D12_HEAP_TYPE type)
75 {
76    D3D12_HEAP_PROPERTIES ret;
77    dev->GetCustomHeapProperties(&ret, 0, type);
78    return ret;
79 }
80 #endif
81 
82 using std::runtime_error;
83 using Microsoft::WRL::ComPtr;
84 
85 enum compute_test_debug_flags {
86    COMPUTE_DEBUG_EXPERIMENTAL_SHADERS = 1 << 0,
87    COMPUTE_DEBUG_USE_HW_D3D           = 1 << 1,
88    COMPUTE_DEBUG_OPTIMIZE_LIBCLC      = 1 << 2,
89    COMPUTE_DEBUG_SERIALIZE_LIBCLC     = 1 << 3,
90 };
91 
92 static const struct debug_named_value compute_debug_options[] = {
93    { "experimental_shaders",  COMPUTE_DEBUG_EXPERIMENTAL_SHADERS, "Enable experimental shaders" },
94    { "use_hw_d3d",            COMPUTE_DEBUG_USE_HW_D3D,           "Use a hardware D3D device"   },
95    { "optimize_libclc",       COMPUTE_DEBUG_OPTIMIZE_LIBCLC,      "Optimize the clc_libclc before using it" },
96    { "serialize_libclc",      COMPUTE_DEBUG_SERIALIZE_LIBCLC,     "Serialize and deserialize the clc_libclc" },
97    DEBUG_NAMED_VALUE_END
98 };
99 
100 DEBUG_GET_ONCE_FLAGS_OPTION(debug_compute, "COMPUTE_TEST_DEBUG", compute_debug_options, 0)
101 
warning_callback(void * priv,const char * msg)102 static void warning_callback(void *priv, const char *msg)
103 {
104    fprintf(stderr, "WARNING: %s\n", msg);
105 }
106 
error_callback(void * priv,const char * msg)107 static void error_callback(void *priv, const char *msg)
108 {
109    fprintf(stderr, "ERROR: %s\n", msg);
110 }
111 
112 static const struct clc_logger logger = {
113    NULL,
114    error_callback,
115    warning_callback,
116 };
117 
118 void
enable_d3d12_debug_layer()119 ComputeTest::enable_d3d12_debug_layer()
120 {
121    HMODULE hD3D12Mod = LoadLibrary("D3D12.DLL");
122    if (!hD3D12Mod) {
123       fprintf(stderr, "D3D12: failed to load D3D12.DLL\n");
124       return;
125    }
126 
127    typedef HRESULT(WINAPI * PFN_D3D12_GET_DEBUG_INTERFACE)(REFIID riid,
128                                                            void **ppFactory);
129    PFN_D3D12_GET_DEBUG_INTERFACE D3D12GetDebugInterface = (PFN_D3D12_GET_DEBUG_INTERFACE)GetProcAddress(hD3D12Mod, "D3D12GetDebugInterface");
130    if (!D3D12GetDebugInterface) {
131       fprintf(stderr, "D3D12: failed to load D3D12GetDebugInterface from D3D12.DLL\n");
132       return;
133    }
134 
135    ID3D12Debug *debug;
136    if (FAILED(D3D12GetDebugInterface(__uuidof(ID3D12Debug), (void **)& debug))) {
137       fprintf(stderr, "D3D12: D3D12GetDebugInterface failed\n");
138       return;
139    }
140 
141    debug->EnableDebugLayer();
142 }
143 
144 IDXGIFactory4 *
get_dxgi_factory()145 ComputeTest::get_dxgi_factory()
146 {
147    static const GUID IID_IDXGIFactory4 = {
148       0x1bc6ea02, 0xef36, 0x464f,
149       { 0xbf, 0x0c, 0x21, 0xca, 0x39, 0xe5, 0x16, 0x8a }
150    };
151 
152    typedef HRESULT(WINAPI * PFN_CREATE_DXGI_FACTORY)(REFIID riid,
153                                                      void **ppFactory);
154    PFN_CREATE_DXGI_FACTORY CreateDXGIFactory;
155 
156    HMODULE hDXGIMod = LoadLibrary("DXGI.DLL");
157    if (!hDXGIMod)
158       throw runtime_error("Failed to load DXGI.DLL");
159 
160    CreateDXGIFactory = (PFN_CREATE_DXGI_FACTORY)GetProcAddress(hDXGIMod, "CreateDXGIFactory");
161    if (!CreateDXGIFactory)
162       throw runtime_error("Failed to load CreateDXGIFactory from DXGI.DLL");
163 
164    IDXGIFactory4 *factory = NULL;
165    HRESULT hr = CreateDXGIFactory(IID_IDXGIFactory4, (void **)&factory);
166    if (FAILED(hr))
167       throw runtime_error("CreateDXGIFactory failed");
168 
169    return factory;
170 }
171 
172 IDXGIAdapter1 *
choose_adapter(IDXGIFactory4 * factory)173 ComputeTest::choose_adapter(IDXGIFactory4 *factory)
174 {
175    IDXGIAdapter1 *ret;
176 
177    if (debug_get_option_debug_compute() & COMPUTE_DEBUG_USE_HW_D3D) {
178       for (unsigned i = 0; SUCCEEDED(factory->EnumAdapters1(i, &ret)); i++) {
179          DXGI_ADAPTER_DESC1 desc;
180          ret->GetDesc1(&desc);
181          if (!(desc.Flags & D3D_DRIVER_TYPE_SOFTWARE))
182             return ret;
183       }
184       throw runtime_error("Failed to enum hardware adapter");
185    } else {
186       if (FAILED(factory->EnumWarpAdapter(__uuidof(IDXGIAdapter1),
187          (void **)& ret)))
188          throw runtime_error("Failed to enum warp adapter");
189       return ret;
190    }
191 }
192 
193 ID3D12Device *
create_device(IDXGIAdapter1 * adapter)194 ComputeTest::create_device(IDXGIAdapter1 *adapter)
195 {
196    typedef HRESULT(WINAPI *PFN_D3D12CREATEDEVICE)(IUnknown *, D3D_FEATURE_LEVEL, REFIID, void **);
197    PFN_D3D12CREATEDEVICE D3D12CreateDevice;
198 
199    HMODULE hD3D12Mod = LoadLibrary("D3D12.DLL");
200    if (!hD3D12Mod)
201       throw runtime_error("failed to load D3D12.DLL");
202 
203    if (debug_get_option_debug_compute() & COMPUTE_DEBUG_EXPERIMENTAL_SHADERS) {
204       typedef HRESULT(WINAPI *PFN_D3D12ENABLEEXPERIMENTALFEATURES)(UINT, const IID *, void *, UINT *);
205       PFN_D3D12ENABLEEXPERIMENTALFEATURES D3D12EnableExperimentalFeatures;
206       D3D12EnableExperimentalFeatures = (PFN_D3D12ENABLEEXPERIMENTALFEATURES)
207          GetProcAddress(hD3D12Mod, "D3D12EnableExperimentalFeatures");
208       if (FAILED(D3D12EnableExperimentalFeatures(1, &D3D12ExperimentalShaderModels, NULL, NULL)))
209          throw runtime_error("failed to enable experimental shader models");
210    }
211 
212    D3D12CreateDevice = (PFN_D3D12CREATEDEVICE)GetProcAddress(hD3D12Mod, "D3D12CreateDevice");
213    if (!D3D12CreateDevice)
214       throw runtime_error("failed to load D3D12CreateDevice from D3D12.DLL");
215 
216    ID3D12Device *dev;
217    if (FAILED(D3D12CreateDevice(adapter, D3D_FEATURE_LEVEL_12_0,
218        __uuidof(ID3D12Device), (void **)& dev)))
219       throw runtime_error("D3D12CreateDevice failed");
220 
221    return dev;
222 }
223 
224 ComPtr<ID3D12RootSignature>
create_root_signature(const ComputeTest::Resources & resources)225 ComputeTest::create_root_signature(const ComputeTest::Resources &resources)
226 {
227    D3D12_ROOT_PARAMETER1 root_param;
228    root_param.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
229    root_param.DescriptorTable.NumDescriptorRanges = resources.ranges.size();
230    root_param.DescriptorTable.pDescriptorRanges = resources.ranges.data();
231    root_param.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
232 
233    D3D12_ROOT_SIGNATURE_DESC1 root_sig_desc;
234    root_sig_desc.NumParameters = 1;
235    root_sig_desc.pParameters = &root_param;
236    root_sig_desc.NumStaticSamplers = 0;
237    root_sig_desc.pStaticSamplers = NULL;
238    root_sig_desc.Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE;
239 
240    D3D12_VERSIONED_ROOT_SIGNATURE_DESC versioned_desc;
241    versioned_desc.Version = D3D_ROOT_SIGNATURE_VERSION_1_1;
242    versioned_desc.Desc_1_1 = root_sig_desc;
243 
244    ID3DBlob *sig, *error;
245    if (FAILED(D3D12SerializeVersionedRootSignature(&versioned_desc,
246        &sig, &error)))
247       throw runtime_error("D3D12SerializeVersionedRootSignature failed");
248 
249    ComPtr<ID3D12RootSignature> ret;
250    if (FAILED(dev->CreateRootSignature(0,
251        sig->GetBufferPointer(),
252        sig->GetBufferSize(),
253        __uuidof(ID3D12RootSignature),
254        (void **)& ret)))
255       throw runtime_error("CreateRootSignature failed");
256 
257    return ret;
258 }
259 
260 ComPtr<ID3D12PipelineState>
create_pipeline_state(ComPtr<ID3D12RootSignature> & root_sig,const struct clc_dxil_object & dxil)261 ComputeTest::create_pipeline_state(ComPtr<ID3D12RootSignature> &root_sig,
262                                    const struct clc_dxil_object &dxil)
263 {
264    D3D12_COMPUTE_PIPELINE_STATE_DESC pipeline_desc = { root_sig.Get() };
265    pipeline_desc.CS.pShaderBytecode = dxil.binary.data;
266    pipeline_desc.CS.BytecodeLength = dxil.binary.size;
267 
268    ComPtr<ID3D12PipelineState> pipeline_state;
269    if (FAILED(dev->CreateComputePipelineState(&pipeline_desc,
270                                               __uuidof(ID3D12PipelineState),
271                                               (void **)& pipeline_state)))
272       throw runtime_error("Failed to create pipeline state");
273    return pipeline_state;
274 }
275 
276 ComPtr<ID3D12Resource>
create_buffer(int size,D3D12_HEAP_TYPE heap_type)277 ComputeTest::create_buffer(int size, D3D12_HEAP_TYPE heap_type)
278 {
279    D3D12_RESOURCE_DESC desc;
280    desc.Format = DXGI_FORMAT_UNKNOWN;
281    desc.Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT;
282    desc.Width = size;
283    desc.Height = 1;
284    desc.DepthOrArraySize = 1;
285    desc.MipLevels = 1;
286    desc.SampleDesc.Count = 1;
287    desc.SampleDesc.Quality = 0;
288    desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
289    desc.Flags = heap_type == D3D12_HEAP_TYPE_DEFAULT ? D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS : D3D12_RESOURCE_FLAG_NONE;
290    desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
291 
292    D3D12_HEAP_PROPERTIES heap_pris = GetCustomHeapProperties(dev, heap_type);
293 
294    ComPtr<ID3D12Resource> res;
295    if (FAILED(dev->CreateCommittedResource(&heap_pris,
296        D3D12_HEAP_FLAG_NONE, &desc, D3D12_RESOURCE_STATE_COMMON,
297        NULL, __uuidof(ID3D12Resource), (void **)&res)))
298       throw runtime_error("CreateCommittedResource failed");
299 
300    return res;
301 }
302 
303 ComPtr<ID3D12Resource>
create_upload_buffer_with_data(const void * data,size_t size)304 ComputeTest::create_upload_buffer_with_data(const void *data, size_t size)
305 {
306    auto upload_res = create_buffer(size, D3D12_HEAP_TYPE_UPLOAD);
307 
308    void *ptr = NULL;
309    D3D12_RANGE res_range = { 0, (SIZE_T)size };
310    if (FAILED(upload_res->Map(0, &res_range, (void **)&ptr)))
311       throw runtime_error("Failed to map upload-buffer");
312    assert(ptr);
313    memcpy(ptr, data, size);
314    upload_res->Unmap(0, &res_range);
315    return upload_res;
316 }
317 
318 ComPtr<ID3D12Resource>
create_sized_buffer_with_data(size_t buffer_size,const void * data,size_t data_size)319 ComputeTest::create_sized_buffer_with_data(size_t buffer_size,
320                                            const void *data,
321                                            size_t data_size)
322 {
323    auto upload_res = create_upload_buffer_with_data(data, data_size);
324 
325    auto res = create_buffer(buffer_size, D3D12_HEAP_TYPE_DEFAULT);
326    resource_barrier(res, D3D12_RESOURCE_STATE_COMMON, D3D12_RESOURCE_STATE_COPY_DEST);
327    cmdlist->CopyBufferRegion(res.Get(), 0, upload_res.Get(), 0, data_size);
328    resource_barrier(res, D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_COMMON);
329    execute_cmdlist();
330 
331    return res;
332 }
333 
334 void
get_buffer_data(ComPtr<ID3D12Resource> res,void * buf,size_t size)335 ComputeTest::get_buffer_data(ComPtr<ID3D12Resource> res,
336                              void *buf, size_t size)
337 {
338    auto readback_res = create_buffer(align(size, 4), D3D12_HEAP_TYPE_READBACK);
339    resource_barrier(res, D3D12_RESOURCE_STATE_COMMON, D3D12_RESOURCE_STATE_COPY_SOURCE);
340    cmdlist->CopyResource(readback_res.Get(), res.Get());
341    resource_barrier(res, D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_COMMON);
342    execute_cmdlist();
343 
344    void *ptr = NULL;
345    D3D12_RANGE res_range = { 0, size };
346    if (FAILED(readback_res->Map(0, &res_range, &ptr)))
347       throw runtime_error("Failed to map readback-buffer");
348 
349    memcpy(buf, ptr, size);
350 
351    D3D12_RANGE empty_range = { 0, 0 };
352    readback_res->Unmap(0, &empty_range);
353 }
354 
355 void
resource_barrier(ComPtr<ID3D12Resource> & res,D3D12_RESOURCE_STATES state_before,D3D12_RESOURCE_STATES state_after)356 ComputeTest::resource_barrier(ComPtr<ID3D12Resource> &res,
357                               D3D12_RESOURCE_STATES state_before,
358                               D3D12_RESOURCE_STATES state_after)
359 {
360    D3D12_RESOURCE_BARRIER barrier;
361    barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
362    barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
363    barrier.Transition.pResource = res.Get();
364    barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
365    barrier.Transition.StateBefore = state_before;
366    barrier.Transition.StateAfter = state_after;
367    cmdlist->ResourceBarrier(1, &barrier);
368 }
369 
370 void
execute_cmdlist()371 ComputeTest::execute_cmdlist()
372 {
373    if (FAILED(cmdlist->Close()))
374       throw runtime_error("Closing ID3D12GraphicsCommandList failed");
375 
376    ID3D12CommandList *cmdlists[] = { cmdlist };
377    cmdqueue->ExecuteCommandLists(1, cmdlists);
378    cmdqueue_fence->SetEventOnCompletion(fence_value, event);
379    cmdqueue->Signal(cmdqueue_fence, fence_value);
380    fence_value++;
381    WaitForSingleObject(event, INFINITE);
382 
383    if (FAILED(cmdalloc->Reset()))
384       throw runtime_error("resetting ID3D12CommandAllocator failed");
385 
386    if (FAILED(cmdlist->Reset(cmdalloc, NULL)))
387       throw runtime_error("resetting ID3D12GraphicsCommandList failed");
388 }
389 
390 void
create_uav_buffer(ComPtr<ID3D12Resource> res,size_t width,size_t byte_stride,D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle)391 ComputeTest::create_uav_buffer(ComPtr<ID3D12Resource> res,
392                                size_t width, size_t byte_stride,
393                                D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle)
394 {
395    D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc;
396    uav_desc.Format = DXGI_FORMAT_R32_TYPELESS;
397    uav_desc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER;
398    uav_desc.Buffer.FirstElement = 0;
399    uav_desc.Buffer.NumElements = DIV_ROUND_UP(width * byte_stride, 4);
400    uav_desc.Buffer.StructureByteStride = 0;
401    uav_desc.Buffer.CounterOffsetInBytes = 0;
402    uav_desc.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_RAW;
403 
404    dev->CreateUnorderedAccessView(res.Get(), NULL, &uav_desc, cpu_handle);
405 }
406 
407 void
create_cbv(ComPtr<ID3D12Resource> res,size_t size,D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle)408 ComputeTest::create_cbv(ComPtr<ID3D12Resource> res, size_t size,
409                         D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle)
410 {
411    D3D12_CONSTANT_BUFFER_VIEW_DESC cbv_desc;
412    cbv_desc.BufferLocation = res ? res->GetGPUVirtualAddress() : 0;
413    cbv_desc.SizeInBytes = size;
414 
415    dev->CreateConstantBufferView(&cbv_desc, cpu_handle);
416 }
417 
418 ComPtr<ID3D12Resource>
add_uav_resource(ComputeTest::Resources & resources,unsigned spaceid,unsigned resid,const void * data,size_t num_elems,size_t elem_size)419 ComputeTest::add_uav_resource(ComputeTest::Resources &resources,
420                               unsigned spaceid, unsigned resid,
421                               const void *data, size_t num_elems,
422                               size_t elem_size)
423 {
424    size_t size = align(elem_size * num_elems, 4);
425    D3D12_CPU_DESCRIPTOR_HANDLE handle;
426    ComPtr<ID3D12Resource> res;
427    handle = GetCPUDescriptorHandleForHeapStart(uav_heap);
428    handle = offset_cpu_handle(handle, resources.descs.size() * uav_heap_incr);
429 
430    if (size) {
431       if (data)
432          res = create_buffer_with_data(data, size);
433       else
434          res = create_buffer(size, D3D12_HEAP_TYPE_DEFAULT);
435 
436       resource_barrier(res, D3D12_RESOURCE_STATE_COMMON,
437                        D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
438    }
439    create_uav_buffer(res, num_elems, elem_size, handle);
440    resources.add(res, D3D12_DESCRIPTOR_RANGE_TYPE_UAV, spaceid, resid);
441    return res;
442 }
443 
444 ComPtr<ID3D12Resource>
add_cbv_resource(ComputeTest::Resources & resources,unsigned spaceid,unsigned resid,const void * data,size_t size)445 ComputeTest::add_cbv_resource(ComputeTest::Resources &resources,
446                               unsigned spaceid, unsigned resid,
447                               const void *data, size_t size)
448 {
449    unsigned aligned_size = align(size, 256);
450    D3D12_CPU_DESCRIPTOR_HANDLE handle;
451    ComPtr<ID3D12Resource> res;
452    handle = GetCPUDescriptorHandleForHeapStart(uav_heap);
453    handle = offset_cpu_handle(handle, resources.descs.size() * uav_heap_incr);
454 
455    if (size) {
456      assert(data);
457      res = create_sized_buffer_with_data(aligned_size, data, size);
458    }
459    create_cbv(res, aligned_size, handle);
460    resources.add(res, D3D12_DESCRIPTOR_RANGE_TYPE_CBV, spaceid, resid);
461    return res;
462 }
463 
464 void
run_shader_with_raw_args(Shader shader,const CompileArgs & compile_args,const std::vector<RawShaderArg * > & args)465 ComputeTest::run_shader_with_raw_args(Shader shader,
466                                       const CompileArgs &compile_args,
467                                       const std::vector<RawShaderArg *> &args)
468 {
469    if (args.size() < 1)
470       throw runtime_error("no inputs");
471 
472    static HMODULE hD3D12Mod = LoadLibrary("D3D12.DLL");
473    if (!hD3D12Mod)
474       throw runtime_error("Failed to load D3D12.DLL");
475 
476    D3D12SerializeVersionedRootSignature = (PFN_D3D12_SERIALIZE_VERSIONED_ROOT_SIGNATURE)GetProcAddress(hD3D12Mod, "D3D12SerializeVersionedRootSignature");
477 
478    if (args.size() != shader.dxil->kernel->num_args)
479       throw runtime_error("incorrect number of inputs");
480 
481    struct clc_runtime_kernel_conf conf = { 0 };
482 
483    // Older WARP and some hardware doesn't support int64, so for these tests, unconditionally lower away int64
484    // A more complex runtime can be smarter about detecting when this needs to be done
485    conf.lower_bit_size = 64;
486    conf.max_shader_model = SHADER_MODEL_6_2;
487    conf.validator_version = DXIL_VALIDATOR_1_4;
488 
489    if (!shader.dxil->metadata.local_size[0])
490       conf.local_size[0] = compile_args.x;
491    else
492       conf.local_size[0] = shader.dxil->metadata.local_size[0];
493 
494    if (!shader.dxil->metadata.local_size[1])
495       conf.local_size[1] = compile_args.y;
496    else
497       conf.local_size[1] = shader.dxil->metadata.local_size[1];
498 
499    if (!shader.dxil->metadata.local_size[2])
500       conf.local_size[2] = compile_args.z;
501    else
502       conf.local_size[2] = shader.dxil->metadata.local_size[2];
503 
504    if (compile_args.x % conf.local_size[0] ||
505        compile_args.y % conf.local_size[1] ||
506        compile_args.z % conf.local_size[2])
507       throw runtime_error("invalid global size must be a multiple of local size");
508 
509    std::vector<struct clc_runtime_arg_info> argsinfo(args.size());
510 
511    conf.args = argsinfo.data();
512    conf.support_global_work_id_offsets =
513       compile_args.work_props.global_offset_x != 0 ||
514       compile_args.work_props.global_offset_y != 0 ||
515       compile_args.work_props.global_offset_z != 0;
516    conf.support_workgroup_id_offsets =
517       compile_args.work_props.group_id_offset_x != 0 ||
518       compile_args.work_props.group_id_offset_y != 0 ||
519       compile_args.work_props.group_id_offset_z != 0;
520 
521    for (unsigned i = 0; i < shader.dxil->kernel->num_args; ++i) {
522       RawShaderArg *arg = args[i];
523       size_t size = arg->get_elem_size() * arg->get_num_elems();
524 
525       switch (shader.dxil->kernel->args[i].address_qualifier) {
526       case CLC_KERNEL_ARG_ADDRESS_LOCAL:
527          argsinfo[i].localptr.size = size;
528          break;
529       default:
530          break;
531       }
532    }
533 
534    configure(shader, &conf);
535    validate(shader);
536 
537    std::shared_ptr<struct clc_dxil_object> &dxil = shader.dxil;
538 
539    std::vector<uint8_t> argsbuf(dxil->metadata.kernel_inputs_buf_size);
540    std::vector<ComPtr<ID3D12Resource>> argres(shader.dxil->kernel->num_args);
541    clc_work_properties_data work_props = compile_args.work_props;
542    if (!conf.support_workgroup_id_offsets) {
543       work_props.group_count_total_x = compile_args.x / conf.local_size[0];
544       work_props.group_count_total_y = compile_args.y / conf.local_size[1];
545       work_props.group_count_total_z = compile_args.z / conf.local_size[2];
546    }
547    if (work_props.work_dim == 0)
548       work_props.work_dim = 3;
549    Resources resources;
550 
551    for (unsigned i = 0; i < dxil->kernel->num_args; ++i) {
552       RawShaderArg *arg = args[i];
553       size_t size = arg->get_elem_size() * arg->get_num_elems();
554       void *slot = argsbuf.data() + dxil->metadata.args[i].offset;
555 
556       switch (dxil->kernel->args[i].address_qualifier) {
557       case CLC_KERNEL_ARG_ADDRESS_CONSTANT:
558       case CLC_KERNEL_ARG_ADDRESS_GLOBAL: {
559          assert(dxil->metadata.args[i].size == sizeof(uint64_t));
560          uint64_t *ptr_slot = (uint64_t *)slot;
561          if (arg->get_data())
562             *ptr_slot = (uint64_t)dxil->metadata.args[i].globconstptr.buf_id << 32;
563          else
564             *ptr_slot = ~0ull;
565          break;
566       }
567       case CLC_KERNEL_ARG_ADDRESS_LOCAL: {
568          assert(dxil->metadata.args[i].size == sizeof(uint64_t));
569          uint64_t *ptr_slot = (uint64_t *)slot;
570          *ptr_slot = dxil->metadata.args[i].localptr.sharedmem_offset;
571          break;
572       }
573       case CLC_KERNEL_ARG_ADDRESS_PRIVATE: {
574          assert(size == dxil->metadata.args[i].size);
575          memcpy(slot, arg->get_data(), size);
576          break;
577       }
578       default:
579          assert(0);
580       }
581    }
582 
583    for (unsigned i = 0; i < dxil->kernel->num_args; ++i) {
584       RawShaderArg *arg = args[i];
585 
586       if (dxil->kernel->args[i].address_qualifier == CLC_KERNEL_ARG_ADDRESS_GLOBAL ||
587           dxil->kernel->args[i].address_qualifier == CLC_KERNEL_ARG_ADDRESS_CONSTANT) {
588          argres[i] = add_uav_resource(resources, 0,
589                                       dxil->metadata.args[i].globconstptr.buf_id,
590                                       arg->get_data(), arg->get_num_elems(),
591                                       arg->get_elem_size());
592       }
593    }
594 
595    if (dxil->metadata.printf.uav_id > 0)
596       add_uav_resource(resources, 0, dxil->metadata.printf.uav_id, NULL, 1024 * 1024 / 4, 4);
597 
598    for (unsigned i = 0; i < dxil->metadata.num_consts; ++i)
599       add_uav_resource(resources, 0, dxil->metadata.consts[i].uav_id,
600                        dxil->metadata.consts[i].data,
601                        dxil->metadata.consts[i].size / 4, 4);
602 
603    if (argsbuf.size())
604       add_cbv_resource(resources, 0, dxil->metadata.kernel_inputs_cbv_id,
605                        argsbuf.data(), argsbuf.size());
606 
607    add_cbv_resource(resources, 0, dxil->metadata.work_properties_cbv_id,
608                     &work_props, sizeof(work_props));
609 
610    auto root_sig = create_root_signature(resources);
611    auto pipeline_state = create_pipeline_state(root_sig, *dxil);
612 
613    cmdlist->SetDescriptorHeaps(1, &uav_heap);
614    cmdlist->SetComputeRootSignature(root_sig.Get());
615    cmdlist->SetComputeRootDescriptorTable(0, GetGPUDescriptorHandleForHeapStart(uav_heap));
616    cmdlist->SetPipelineState(pipeline_state.Get());
617 
618    cmdlist->Dispatch(compile_args.x / conf.local_size[0],
619                      compile_args.y / conf.local_size[1],
620                      compile_args.z / conf.local_size[2]);
621 
622    for (auto &range : resources.ranges) {
623       if (range.RangeType == D3D12_DESCRIPTOR_RANGE_TYPE_UAV) {
624          for (unsigned i = range.OffsetInDescriptorsFromTableStart;
625               i < range.NumDescriptors; i++) {
626             if (!resources.descs[i].Get())
627                continue;
628 
629             resource_barrier(resources.descs[i],
630                              D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
631                              D3D12_RESOURCE_STATE_COMMON);
632          }
633       }
634    }
635 
636    execute_cmdlist();
637 
638    for (unsigned i = 0; i < args.size(); i++) {
639       if (!(args[i]->get_direction() & SHADER_ARG_OUTPUT))
640          continue;
641 
642       assert(dxil->kernel->args[i].address_qualifier == CLC_KERNEL_ARG_ADDRESS_GLOBAL);
643       get_buffer_data(argres[i], args[i]->get_data(),
644                       args[i]->get_elem_size() * args[i]->get_num_elems());
645    }
646 
647    ComPtr<ID3D12InfoQueue> info_queue;
648    dev->QueryInterface(info_queue.ReleaseAndGetAddressOf());
649    if (info_queue)
650    {
651       EXPECT_EQ(0, info_queue->GetNumStoredMessages());
652       for (unsigned i = 0; i < info_queue->GetNumStoredMessages(); ++i) {
653          SIZE_T message_size = 0;
654          info_queue->GetMessageA(i, nullptr, &message_size);
655          D3D12_MESSAGE* message = (D3D12_MESSAGE*)malloc(message_size);
656          info_queue->GetMessageA(i, message, &message_size);
657          FAIL() << message->pDescription;
658          free(message);
659       }
660    }
661 }
662 
663 void
SetUp()664 ComputeTest::SetUp()
665 {
666    static struct clc_libclc *compiler_ctx_g = nullptr;
667 
668    if (!compiler_ctx_g) {
669       clc_libclc_dxil_options options = { };
670       options.optimize = (debug_get_option_debug_compute() & COMPUTE_DEBUG_OPTIMIZE_LIBCLC) != 0;
671 
672       compiler_ctx_g = clc_libclc_new_dxil(&logger, &options);
673       if (!compiler_ctx_g)
674          throw runtime_error("failed to create CLC compiler context");
675 
676       if (debug_get_option_debug_compute() & COMPUTE_DEBUG_SERIALIZE_LIBCLC) {
677          void *serialized = nullptr;
678          size_t serialized_size = 0;
679          clc_libclc_serialize(compiler_ctx_g, &serialized, &serialized_size);
680          if (!serialized)
681             throw runtime_error("failed to serialize CLC compiler context");
682 
683          clc_free_libclc(compiler_ctx_g);
684          compiler_ctx_g = nullptr;
685 
686          compiler_ctx_g = clc_libclc_deserialize(serialized, serialized_size);
687          if (!compiler_ctx_g)
688             throw runtime_error("failed to deserialize CLC compiler context");
689 
690          clc_libclc_free_serialized(serialized);
691       }
692    }
693    compiler_ctx = compiler_ctx_g;
694 
695    enable_d3d12_debug_layer();
696 
697    factory = get_dxgi_factory();
698    if (!factory)
699       throw runtime_error("failed to create DXGI factory");
700 
701    adapter = choose_adapter(factory);
702    if (!adapter)
703       throw runtime_error("failed to choose adapter");
704 
705    dev = create_device(adapter);
706    if (!dev)
707       throw runtime_error("failed to create device");
708 
709    if (FAILED(dev->CreateFence(0, D3D12_FENCE_FLAG_NONE,
710                                __uuidof(cmdqueue_fence),
711                                (void **)&cmdqueue_fence)))
712       throw runtime_error("failed to create fence\n");
713 
714    D3D12_COMMAND_QUEUE_DESC queue_desc;
715    queue_desc.Type = D3D12_COMMAND_LIST_TYPE_COMPUTE;
716    queue_desc.Priority = D3D12_COMMAND_QUEUE_PRIORITY_NORMAL;
717    queue_desc.Flags = D3D12_COMMAND_QUEUE_FLAG_NONE;
718    queue_desc.NodeMask = 0;
719    if (FAILED(dev->CreateCommandQueue(&queue_desc,
720                                       __uuidof(cmdqueue),
721                                       (void **)&cmdqueue)))
722       throw runtime_error("failed to create command queue");
723 
724    if (FAILED(dev->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_COMPUTE,
725              __uuidof(cmdalloc), (void **)&cmdalloc)))
726       throw runtime_error("failed to create command allocator");
727 
728    if (FAILED(dev->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_COMPUTE,
729              cmdalloc, NULL, __uuidof(cmdlist), (void **)&cmdlist)))
730       throw runtime_error("failed to create command list");
731 
732    D3D12_DESCRIPTOR_HEAP_DESC heap_desc;
733    heap_desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV;
734    heap_desc.NumDescriptors = 1000;
735    heap_desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE;
736    heap_desc.NodeMask = 0;
737    if (FAILED(dev->CreateDescriptorHeap(&heap_desc,
738        __uuidof(uav_heap), (void **)&uav_heap)))
739       throw runtime_error("failed to create descriptor heap");
740 
741    uav_heap_incr = dev->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
742 
743    event = CreateEvent(NULL, false, false, NULL);
744    if (!event)
745       throw runtime_error("Failed to create event");
746    fence_value = 1;
747 }
748 
749 void
TearDown()750 ComputeTest::TearDown()
751 {
752    CloseHandle(event);
753 
754    uav_heap->Release();
755    cmdlist->Release();
756    cmdalloc->Release();
757    cmdqueue->Release();
758    cmdqueue_fence->Release();
759    dev->Release();
760    adapter->Release();
761    factory->Release();
762 }
763 
764 PFN_D3D12_SERIALIZE_VERSIONED_ROOT_SIGNATURE ComputeTest::D3D12SerializeVersionedRootSignature;
765 
766 bool
validate_module(const struct clc_dxil_object & dxil)767 validate_module(const struct clc_dxil_object &dxil)
768 {
769    struct dxil_validator *val = dxil_create_validator(NULL);
770    char *err;
771    bool res = dxil_validate_module(val, dxil.binary.data,
772                                    dxil.binary.size, &err);
773    if (!res && err)
774       fprintf(stderr, "D3D12: validation failed: %s", err);
775 
776    dxil_destroy_validator(val);
777    return res;
778 }
779 
780 static void
dump_blob(const char * path,const struct clc_dxil_object & dxil)781 dump_blob(const char *path, const struct clc_dxil_object &dxil)
782 {
783    FILE *fp = fopen(path, "wb");
784    if (fp) {
785       fwrite(dxil.binary.data, 1, dxil.binary.size, fp);
786       fclose(fp);
787       printf("D3D12: wrote '%s'...\n", path);
788    }
789 }
790 
791 ComputeTest::Shader
compile(const std::vector<const char * > & sources,const std::vector<const char * > & compile_args,bool create_library)792 ComputeTest::compile(const std::vector<const char *> &sources,
793                      const std::vector<const char *> &compile_args,
794                      bool create_library)
795 {
796    struct clc_compile_args args = {
797    };
798    args.args = compile_args.data();
799    args.num_args = (unsigned)compile_args.size();
800    args.features.images = true;
801    args.features.images_read_write = true;
802    args.features.int64 = true;
803    ComputeTest::Shader shader;
804 
805    std::vector<Shader> shaders;
806 
807    args.source.name = "obj.cl";
808 
809    for (unsigned i = 0; i < sources.size(); i++) {
810       args.source.value = sources[i];
811 
812       clc_binary spirv{};
813       if (!clc_compile_c_to_spirv(&args, &logger, &spirv))
814          throw runtime_error("failed to compile object!");
815 
816       Shader shader;
817       shader.obj = std::shared_ptr<clc_binary>(new clc_binary(spirv), [](clc_binary *spirv)
818          {
819             clc_free_spirv(spirv);
820             delete spirv;
821          });
822       shaders.push_back(shader);
823    }
824 
825    if (shaders.size() == 1 && create_library)
826       return shaders[0];
827 
828    return link(shaders, create_library);
829 }
830 
831 ComputeTest::Shader
link(const std::vector<Shader> & sources,bool create_library)832 ComputeTest::link(const std::vector<Shader> &sources,
833                   bool create_library)
834 {
835    std::vector<const clc_binary*> objs;
836    for (auto& source : sources)
837       objs.push_back(&*source.obj);
838 
839    struct clc_linker_args link_args = {};
840    link_args.in_objs = objs.data();
841    link_args.num_in_objs = (unsigned)objs.size();
842    link_args.create_library = create_library;
843    clc_binary spirv{};
844    if (!clc_link_spirv(&link_args, &logger, &spirv))
845       throw runtime_error("failed to link objects!");
846 
847    ComputeTest::Shader shader;
848    shader.obj = std::shared_ptr<clc_binary>(new clc_binary(spirv), [](clc_binary *spirv)
849       {
850          clc_free_spirv(spirv);
851          delete spirv;
852       });
853    if (!link_args.create_library)
854       configure(shader, NULL);
855 
856    return shader;
857 }
858 
859 ComputeTest::Shader
assemble(const char * source)860 ComputeTest::assemble(const char *source)
861 {
862    spvtools::SpirvTools tools(SPV_ENV_UNIVERSAL_1_0);
863    std::vector<uint32_t> binary;
864    if (!tools.Assemble(source, strlen(source), &binary))
865       throw runtime_error("failed to assemble");
866 
867    ComputeTest::Shader shader;
868    shader.obj = std::shared_ptr<clc_binary>(new clc_binary{}, [](clc_binary *spirv)
869       {
870          free(spirv->data);
871          delete spirv;
872       });
873    shader.obj->size = binary.size() * 4;
874    shader.obj->data = malloc(shader.obj->size);
875    memcpy(shader.obj->data, binary.data(), shader.obj->size);
876 
877    configure(shader, NULL);
878 
879    return shader;
880 }
881 
882 void
configure(Shader & shader,const struct clc_runtime_kernel_conf * conf)883 ComputeTest::configure(Shader &shader,
884                        const struct clc_runtime_kernel_conf *conf)
885 {
886    if (!shader.metadata) {
887       shader.metadata = std::shared_ptr<clc_parsed_spirv>(new clc_parsed_spirv{}, [](clc_parsed_spirv *metadata)
888          {
889             clc_free_parsed_spirv(metadata);
890             delete metadata;
891          });
892       if (!clc_parse_spirv(shader.obj.get(), NULL, shader.metadata.get()))
893          throw runtime_error("failed to parse spirv!");
894    }
895 
896    std::unique_ptr<clc_dxil_object> dxil(new clc_dxil_object{});
897    if (!clc_spirv_to_dxil(compiler_ctx, shader.obj.get(), shader.metadata.get(), "main_test", conf, nullptr, &logger, dxil.get()))
898       throw runtime_error("failed to compile kernel!");
899    shader.dxil = std::shared_ptr<clc_dxil_object>(dxil.release(), [](clc_dxil_object *dxil)
900       {
901          clc_free_dxil_object(dxil);
902          delete dxil;
903       });
904 }
905 
906 void
validate(ComputeTest::Shader & shader)907 ComputeTest::validate(ComputeTest::Shader &shader)
908 {
909    dump_blob("unsigned.cso", *shader.dxil);
910    if (!validate_module(*shader.dxil))
911       throw runtime_error("failed to validate module!");
912 
913    dump_blob("signed.cso", *shader.dxil);
914 }
915