1 /*
2 * Copyright © Microsoft Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <stdio.h>
25 #include <stdint.h>
26 #include <stdexcept>
27
28 #include <unknwn.h>
29 #include <directx/d3d12.h>
30 #include <dxgi1_4.h>
31 #include <gtest/gtest.h>
32 #include <wrl.h>
33 #include <dxguids/dxguids.h>
34
35 #include "util/u_debug.h"
36 #include "clc_compiler.h"
37 #include "compute_test.h"
38 #include "dxil_validator.h"
39
40 #include <spirv-tools/libspirv.hpp>
41
42 #if (defined(_WIN32) && defined(_MSC_VER))
43 inline D3D12_CPU_DESCRIPTOR_HANDLE
GetCPUDescriptorHandleForHeapStart(ID3D12DescriptorHeap * heap)44 GetCPUDescriptorHandleForHeapStart(ID3D12DescriptorHeap *heap)
45 {
46 return heap->GetCPUDescriptorHandleForHeapStart();
47 }
48 inline D3D12_GPU_DESCRIPTOR_HANDLE
GetGPUDescriptorHandleForHeapStart(ID3D12DescriptorHeap * heap)49 GetGPUDescriptorHandleForHeapStart(ID3D12DescriptorHeap *heap)
50 {
51 return heap->GetGPUDescriptorHandleForHeapStart();
52 }
53 inline D3D12_HEAP_PROPERTIES
GetCustomHeapProperties(ID3D12Device * dev,D3D12_HEAP_TYPE type)54 GetCustomHeapProperties(ID3D12Device *dev, D3D12_HEAP_TYPE type)
55 {
56 return dev->GetCustomHeapProperties(0, type);
57 }
58 #else
59 inline D3D12_CPU_DESCRIPTOR_HANDLE
GetCPUDescriptorHandleForHeapStart(ID3D12DescriptorHeap * heap)60 GetCPUDescriptorHandleForHeapStart(ID3D12DescriptorHeap *heap)
61 {
62 D3D12_CPU_DESCRIPTOR_HANDLE ret;
63 heap->GetCPUDescriptorHandleForHeapStart(&ret);
64 return ret;
65 }
66 inline D3D12_GPU_DESCRIPTOR_HANDLE
GetGPUDescriptorHandleForHeapStart(ID3D12DescriptorHeap * heap)67 GetGPUDescriptorHandleForHeapStart(ID3D12DescriptorHeap *heap)
68 {
69 D3D12_GPU_DESCRIPTOR_HANDLE ret;
70 heap->GetGPUDescriptorHandleForHeapStart(&ret);
71 return ret;
72 }
73 inline D3D12_HEAP_PROPERTIES
GetCustomHeapProperties(ID3D12Device * dev,D3D12_HEAP_TYPE type)74 GetCustomHeapProperties(ID3D12Device *dev, D3D12_HEAP_TYPE type)
75 {
76 D3D12_HEAP_PROPERTIES ret;
77 dev->GetCustomHeapProperties(&ret, 0, type);
78 return ret;
79 }
80 #endif
81
82 using std::runtime_error;
83 using Microsoft::WRL::ComPtr;
84
85 enum compute_test_debug_flags {
86 COMPUTE_DEBUG_EXPERIMENTAL_SHADERS = 1 << 0,
87 COMPUTE_DEBUG_USE_HW_D3D = 1 << 1,
88 COMPUTE_DEBUG_OPTIMIZE_LIBCLC = 1 << 2,
89 COMPUTE_DEBUG_SERIALIZE_LIBCLC = 1 << 3,
90 };
91
92 static const struct debug_named_value compute_debug_options[] = {
93 { "experimental_shaders", COMPUTE_DEBUG_EXPERIMENTAL_SHADERS, "Enable experimental shaders" },
94 { "use_hw_d3d", COMPUTE_DEBUG_USE_HW_D3D, "Use a hardware D3D device" },
95 { "optimize_libclc", COMPUTE_DEBUG_OPTIMIZE_LIBCLC, "Optimize the clc_libclc before using it" },
96 { "serialize_libclc", COMPUTE_DEBUG_SERIALIZE_LIBCLC, "Serialize and deserialize the clc_libclc" },
97 DEBUG_NAMED_VALUE_END
98 };
99
100 DEBUG_GET_ONCE_FLAGS_OPTION(debug_compute, "COMPUTE_TEST_DEBUG", compute_debug_options, 0)
101
warning_callback(void * priv,const char * msg)102 static void warning_callback(void *priv, const char *msg)
103 {
104 fprintf(stderr, "WARNING: %s\n", msg);
105 }
106
error_callback(void * priv,const char * msg)107 static void error_callback(void *priv, const char *msg)
108 {
109 fprintf(stderr, "ERROR: %s\n", msg);
110 }
111
112 static const struct clc_logger logger = {
113 NULL,
114 error_callback,
115 warning_callback,
116 };
117
118 void
enable_d3d12_debug_layer()119 ComputeTest::enable_d3d12_debug_layer()
120 {
121 HMODULE hD3D12Mod = LoadLibrary("D3D12.DLL");
122 if (!hD3D12Mod) {
123 fprintf(stderr, "D3D12: failed to load D3D12.DLL\n");
124 return;
125 }
126
127 typedef HRESULT(WINAPI * PFN_D3D12_GET_DEBUG_INTERFACE)(REFIID riid,
128 void **ppFactory);
129 PFN_D3D12_GET_DEBUG_INTERFACE D3D12GetDebugInterface = (PFN_D3D12_GET_DEBUG_INTERFACE)GetProcAddress(hD3D12Mod, "D3D12GetDebugInterface");
130 if (!D3D12GetDebugInterface) {
131 fprintf(stderr, "D3D12: failed to load D3D12GetDebugInterface from D3D12.DLL\n");
132 return;
133 }
134
135 ID3D12Debug *debug;
136 if (FAILED(D3D12GetDebugInterface(__uuidof(ID3D12Debug), (void **)& debug))) {
137 fprintf(stderr, "D3D12: D3D12GetDebugInterface failed\n");
138 return;
139 }
140
141 debug->EnableDebugLayer();
142 }
143
144 IDXGIFactory4 *
get_dxgi_factory()145 ComputeTest::get_dxgi_factory()
146 {
147 static const GUID IID_IDXGIFactory4 = {
148 0x1bc6ea02, 0xef36, 0x464f,
149 { 0xbf, 0x0c, 0x21, 0xca, 0x39, 0xe5, 0x16, 0x8a }
150 };
151
152 typedef HRESULT(WINAPI * PFN_CREATE_DXGI_FACTORY)(REFIID riid,
153 void **ppFactory);
154 PFN_CREATE_DXGI_FACTORY CreateDXGIFactory;
155
156 HMODULE hDXGIMod = LoadLibrary("DXGI.DLL");
157 if (!hDXGIMod)
158 throw runtime_error("Failed to load DXGI.DLL");
159
160 CreateDXGIFactory = (PFN_CREATE_DXGI_FACTORY)GetProcAddress(hDXGIMod, "CreateDXGIFactory");
161 if (!CreateDXGIFactory)
162 throw runtime_error("Failed to load CreateDXGIFactory from DXGI.DLL");
163
164 IDXGIFactory4 *factory = NULL;
165 HRESULT hr = CreateDXGIFactory(IID_IDXGIFactory4, (void **)&factory);
166 if (FAILED(hr))
167 throw runtime_error("CreateDXGIFactory failed");
168
169 return factory;
170 }
171
172 IDXGIAdapter1 *
choose_adapter(IDXGIFactory4 * factory)173 ComputeTest::choose_adapter(IDXGIFactory4 *factory)
174 {
175 IDXGIAdapter1 *ret;
176
177 if (debug_get_option_debug_compute() & COMPUTE_DEBUG_USE_HW_D3D) {
178 for (unsigned i = 0; SUCCEEDED(factory->EnumAdapters1(i, &ret)); i++) {
179 DXGI_ADAPTER_DESC1 desc;
180 ret->GetDesc1(&desc);
181 if (!(desc.Flags & D3D_DRIVER_TYPE_SOFTWARE))
182 return ret;
183 }
184 throw runtime_error("Failed to enum hardware adapter");
185 } else {
186 if (FAILED(factory->EnumWarpAdapter(__uuidof(IDXGIAdapter1),
187 (void **)& ret)))
188 throw runtime_error("Failed to enum warp adapter");
189 return ret;
190 }
191 }
192
193 ID3D12Device *
create_device(IDXGIAdapter1 * adapter)194 ComputeTest::create_device(IDXGIAdapter1 *adapter)
195 {
196 typedef HRESULT(WINAPI *PFN_D3D12CREATEDEVICE)(IUnknown *, D3D_FEATURE_LEVEL, REFIID, void **);
197 PFN_D3D12CREATEDEVICE D3D12CreateDevice;
198
199 HMODULE hD3D12Mod = LoadLibrary("D3D12.DLL");
200 if (!hD3D12Mod)
201 throw runtime_error("failed to load D3D12.DLL");
202
203 if (debug_get_option_debug_compute() & COMPUTE_DEBUG_EXPERIMENTAL_SHADERS) {
204 typedef HRESULT(WINAPI *PFN_D3D12ENABLEEXPERIMENTALFEATURES)(UINT, const IID *, void *, UINT *);
205 PFN_D3D12ENABLEEXPERIMENTALFEATURES D3D12EnableExperimentalFeatures;
206 D3D12EnableExperimentalFeatures = (PFN_D3D12ENABLEEXPERIMENTALFEATURES)
207 GetProcAddress(hD3D12Mod, "D3D12EnableExperimentalFeatures");
208 if (FAILED(D3D12EnableExperimentalFeatures(1, &D3D12ExperimentalShaderModels, NULL, NULL)))
209 throw runtime_error("failed to enable experimental shader models");
210 }
211
212 D3D12CreateDevice = (PFN_D3D12CREATEDEVICE)GetProcAddress(hD3D12Mod, "D3D12CreateDevice");
213 if (!D3D12CreateDevice)
214 throw runtime_error("failed to load D3D12CreateDevice from D3D12.DLL");
215
216 ID3D12Device *dev;
217 if (FAILED(D3D12CreateDevice(adapter, D3D_FEATURE_LEVEL_12_0,
218 __uuidof(ID3D12Device), (void **)& dev)))
219 throw runtime_error("D3D12CreateDevice failed");
220
221 return dev;
222 }
223
224 ComPtr<ID3D12RootSignature>
create_root_signature(const ComputeTest::Resources & resources)225 ComputeTest::create_root_signature(const ComputeTest::Resources &resources)
226 {
227 D3D12_ROOT_PARAMETER1 root_param;
228 root_param.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
229 root_param.DescriptorTable.NumDescriptorRanges = resources.ranges.size();
230 root_param.DescriptorTable.pDescriptorRanges = resources.ranges.data();
231 root_param.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
232
233 D3D12_ROOT_SIGNATURE_DESC1 root_sig_desc;
234 root_sig_desc.NumParameters = 1;
235 root_sig_desc.pParameters = &root_param;
236 root_sig_desc.NumStaticSamplers = 0;
237 root_sig_desc.pStaticSamplers = NULL;
238 root_sig_desc.Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE;
239
240 D3D12_VERSIONED_ROOT_SIGNATURE_DESC versioned_desc;
241 versioned_desc.Version = D3D_ROOT_SIGNATURE_VERSION_1_1;
242 versioned_desc.Desc_1_1 = root_sig_desc;
243
244 ID3DBlob *sig, *error;
245 if (FAILED(D3D12SerializeVersionedRootSignature(&versioned_desc,
246 &sig, &error)))
247 throw runtime_error("D3D12SerializeVersionedRootSignature failed");
248
249 ComPtr<ID3D12RootSignature> ret;
250 if (FAILED(dev->CreateRootSignature(0,
251 sig->GetBufferPointer(),
252 sig->GetBufferSize(),
253 __uuidof(ID3D12RootSignature),
254 (void **)& ret)))
255 throw runtime_error("CreateRootSignature failed");
256
257 return ret;
258 }
259
260 ComPtr<ID3D12PipelineState>
create_pipeline_state(ComPtr<ID3D12RootSignature> & root_sig,const struct clc_dxil_object & dxil)261 ComputeTest::create_pipeline_state(ComPtr<ID3D12RootSignature> &root_sig,
262 const struct clc_dxil_object &dxil)
263 {
264 D3D12_COMPUTE_PIPELINE_STATE_DESC pipeline_desc = { root_sig.Get() };
265 pipeline_desc.CS.pShaderBytecode = dxil.binary.data;
266 pipeline_desc.CS.BytecodeLength = dxil.binary.size;
267
268 ComPtr<ID3D12PipelineState> pipeline_state;
269 if (FAILED(dev->CreateComputePipelineState(&pipeline_desc,
270 __uuidof(ID3D12PipelineState),
271 (void **)& pipeline_state)))
272 throw runtime_error("Failed to create pipeline state");
273 return pipeline_state;
274 }
275
276 ComPtr<ID3D12Resource>
create_buffer(int size,D3D12_HEAP_TYPE heap_type)277 ComputeTest::create_buffer(int size, D3D12_HEAP_TYPE heap_type)
278 {
279 D3D12_RESOURCE_DESC desc;
280 desc.Format = DXGI_FORMAT_UNKNOWN;
281 desc.Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT;
282 desc.Width = size;
283 desc.Height = 1;
284 desc.DepthOrArraySize = 1;
285 desc.MipLevels = 1;
286 desc.SampleDesc.Count = 1;
287 desc.SampleDesc.Quality = 0;
288 desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
289 desc.Flags = heap_type == D3D12_HEAP_TYPE_DEFAULT ? D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS : D3D12_RESOURCE_FLAG_NONE;
290 desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
291
292 D3D12_HEAP_PROPERTIES heap_pris = GetCustomHeapProperties(dev, heap_type);
293
294 ComPtr<ID3D12Resource> res;
295 if (FAILED(dev->CreateCommittedResource(&heap_pris,
296 D3D12_HEAP_FLAG_NONE, &desc, D3D12_RESOURCE_STATE_COMMON,
297 NULL, __uuidof(ID3D12Resource), (void **)&res)))
298 throw runtime_error("CreateCommittedResource failed");
299
300 return res;
301 }
302
303 ComPtr<ID3D12Resource>
create_upload_buffer_with_data(const void * data,size_t size)304 ComputeTest::create_upload_buffer_with_data(const void *data, size_t size)
305 {
306 auto upload_res = create_buffer(size, D3D12_HEAP_TYPE_UPLOAD);
307
308 void *ptr = NULL;
309 D3D12_RANGE res_range = { 0, (SIZE_T)size };
310 if (FAILED(upload_res->Map(0, &res_range, (void **)&ptr)))
311 throw runtime_error("Failed to map upload-buffer");
312 assert(ptr);
313 memcpy(ptr, data, size);
314 upload_res->Unmap(0, &res_range);
315 return upload_res;
316 }
317
318 ComPtr<ID3D12Resource>
create_sized_buffer_with_data(size_t buffer_size,const void * data,size_t data_size)319 ComputeTest::create_sized_buffer_with_data(size_t buffer_size,
320 const void *data,
321 size_t data_size)
322 {
323 auto upload_res = create_upload_buffer_with_data(data, data_size);
324
325 auto res = create_buffer(buffer_size, D3D12_HEAP_TYPE_DEFAULT);
326 resource_barrier(res, D3D12_RESOURCE_STATE_COMMON, D3D12_RESOURCE_STATE_COPY_DEST);
327 cmdlist->CopyBufferRegion(res.Get(), 0, upload_res.Get(), 0, data_size);
328 resource_barrier(res, D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_COMMON);
329 execute_cmdlist();
330
331 return res;
332 }
333
334 void
get_buffer_data(ComPtr<ID3D12Resource> res,void * buf,size_t size)335 ComputeTest::get_buffer_data(ComPtr<ID3D12Resource> res,
336 void *buf, size_t size)
337 {
338 auto readback_res = create_buffer(align(size, 4), D3D12_HEAP_TYPE_READBACK);
339 resource_barrier(res, D3D12_RESOURCE_STATE_COMMON, D3D12_RESOURCE_STATE_COPY_SOURCE);
340 cmdlist->CopyResource(readback_res.Get(), res.Get());
341 resource_barrier(res, D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_COMMON);
342 execute_cmdlist();
343
344 void *ptr = NULL;
345 D3D12_RANGE res_range = { 0, size };
346 if (FAILED(readback_res->Map(0, &res_range, &ptr)))
347 throw runtime_error("Failed to map readback-buffer");
348
349 memcpy(buf, ptr, size);
350
351 D3D12_RANGE empty_range = { 0, 0 };
352 readback_res->Unmap(0, &empty_range);
353 }
354
355 void
resource_barrier(ComPtr<ID3D12Resource> & res,D3D12_RESOURCE_STATES state_before,D3D12_RESOURCE_STATES state_after)356 ComputeTest::resource_barrier(ComPtr<ID3D12Resource> &res,
357 D3D12_RESOURCE_STATES state_before,
358 D3D12_RESOURCE_STATES state_after)
359 {
360 D3D12_RESOURCE_BARRIER barrier;
361 barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
362 barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
363 barrier.Transition.pResource = res.Get();
364 barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
365 barrier.Transition.StateBefore = state_before;
366 barrier.Transition.StateAfter = state_after;
367 cmdlist->ResourceBarrier(1, &barrier);
368 }
369
370 void
execute_cmdlist()371 ComputeTest::execute_cmdlist()
372 {
373 if (FAILED(cmdlist->Close()))
374 throw runtime_error("Closing ID3D12GraphicsCommandList failed");
375
376 ID3D12CommandList *cmdlists[] = { cmdlist };
377 cmdqueue->ExecuteCommandLists(1, cmdlists);
378 cmdqueue_fence->SetEventOnCompletion(fence_value, event);
379 cmdqueue->Signal(cmdqueue_fence, fence_value);
380 fence_value++;
381 WaitForSingleObject(event, INFINITE);
382
383 if (FAILED(cmdalloc->Reset()))
384 throw runtime_error("resetting ID3D12CommandAllocator failed");
385
386 if (FAILED(cmdlist->Reset(cmdalloc, NULL)))
387 throw runtime_error("resetting ID3D12GraphicsCommandList failed");
388 }
389
390 void
create_uav_buffer(ComPtr<ID3D12Resource> res,size_t width,size_t byte_stride,D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle)391 ComputeTest::create_uav_buffer(ComPtr<ID3D12Resource> res,
392 size_t width, size_t byte_stride,
393 D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle)
394 {
395 D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc;
396 uav_desc.Format = DXGI_FORMAT_R32_TYPELESS;
397 uav_desc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER;
398 uav_desc.Buffer.FirstElement = 0;
399 uav_desc.Buffer.NumElements = DIV_ROUND_UP(width * byte_stride, 4);
400 uav_desc.Buffer.StructureByteStride = 0;
401 uav_desc.Buffer.CounterOffsetInBytes = 0;
402 uav_desc.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_RAW;
403
404 dev->CreateUnorderedAccessView(res.Get(), NULL, &uav_desc, cpu_handle);
405 }
406
407 void
create_cbv(ComPtr<ID3D12Resource> res,size_t size,D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle)408 ComputeTest::create_cbv(ComPtr<ID3D12Resource> res, size_t size,
409 D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle)
410 {
411 D3D12_CONSTANT_BUFFER_VIEW_DESC cbv_desc;
412 cbv_desc.BufferLocation = res ? res->GetGPUVirtualAddress() : 0;
413 cbv_desc.SizeInBytes = size;
414
415 dev->CreateConstantBufferView(&cbv_desc, cpu_handle);
416 }
417
418 ComPtr<ID3D12Resource>
add_uav_resource(ComputeTest::Resources & resources,unsigned spaceid,unsigned resid,const void * data,size_t num_elems,size_t elem_size)419 ComputeTest::add_uav_resource(ComputeTest::Resources &resources,
420 unsigned spaceid, unsigned resid,
421 const void *data, size_t num_elems,
422 size_t elem_size)
423 {
424 size_t size = align(elem_size * num_elems, 4);
425 D3D12_CPU_DESCRIPTOR_HANDLE handle;
426 ComPtr<ID3D12Resource> res;
427 handle = GetCPUDescriptorHandleForHeapStart(uav_heap);
428 handle = offset_cpu_handle(handle, resources.descs.size() * uav_heap_incr);
429
430 if (size) {
431 if (data)
432 res = create_buffer_with_data(data, size);
433 else
434 res = create_buffer(size, D3D12_HEAP_TYPE_DEFAULT);
435
436 resource_barrier(res, D3D12_RESOURCE_STATE_COMMON,
437 D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
438 }
439 create_uav_buffer(res, num_elems, elem_size, handle);
440 resources.add(res, D3D12_DESCRIPTOR_RANGE_TYPE_UAV, spaceid, resid);
441 return res;
442 }
443
444 ComPtr<ID3D12Resource>
add_cbv_resource(ComputeTest::Resources & resources,unsigned spaceid,unsigned resid,const void * data,size_t size)445 ComputeTest::add_cbv_resource(ComputeTest::Resources &resources,
446 unsigned spaceid, unsigned resid,
447 const void *data, size_t size)
448 {
449 unsigned aligned_size = align(size, 256);
450 D3D12_CPU_DESCRIPTOR_HANDLE handle;
451 ComPtr<ID3D12Resource> res;
452 handle = GetCPUDescriptorHandleForHeapStart(uav_heap);
453 handle = offset_cpu_handle(handle, resources.descs.size() * uav_heap_incr);
454
455 if (size) {
456 assert(data);
457 res = create_sized_buffer_with_data(aligned_size, data, size);
458 }
459 create_cbv(res, aligned_size, handle);
460 resources.add(res, D3D12_DESCRIPTOR_RANGE_TYPE_CBV, spaceid, resid);
461 return res;
462 }
463
464 void
run_shader_with_raw_args(Shader shader,const CompileArgs & compile_args,const std::vector<RawShaderArg * > & args)465 ComputeTest::run_shader_with_raw_args(Shader shader,
466 const CompileArgs &compile_args,
467 const std::vector<RawShaderArg *> &args)
468 {
469 if (args.size() < 1)
470 throw runtime_error("no inputs");
471
472 static HMODULE hD3D12Mod = LoadLibrary("D3D12.DLL");
473 if (!hD3D12Mod)
474 throw runtime_error("Failed to load D3D12.DLL");
475
476 D3D12SerializeVersionedRootSignature = (PFN_D3D12_SERIALIZE_VERSIONED_ROOT_SIGNATURE)GetProcAddress(hD3D12Mod, "D3D12SerializeVersionedRootSignature");
477
478 if (args.size() != shader.dxil->kernel->num_args)
479 throw runtime_error("incorrect number of inputs");
480
481 struct clc_runtime_kernel_conf conf = { 0 };
482
483 // Older WARP and some hardware doesn't support int64, so for these tests, unconditionally lower away int64
484 // A more complex runtime can be smarter about detecting when this needs to be done
485 conf.lower_bit_size = 64;
486 conf.max_shader_model = SHADER_MODEL_6_2;
487 conf.validator_version = DXIL_VALIDATOR_1_4;
488
489 if (!shader.dxil->metadata.local_size[0])
490 conf.local_size[0] = compile_args.x;
491 else
492 conf.local_size[0] = shader.dxil->metadata.local_size[0];
493
494 if (!shader.dxil->metadata.local_size[1])
495 conf.local_size[1] = compile_args.y;
496 else
497 conf.local_size[1] = shader.dxil->metadata.local_size[1];
498
499 if (!shader.dxil->metadata.local_size[2])
500 conf.local_size[2] = compile_args.z;
501 else
502 conf.local_size[2] = shader.dxil->metadata.local_size[2];
503
504 if (compile_args.x % conf.local_size[0] ||
505 compile_args.y % conf.local_size[1] ||
506 compile_args.z % conf.local_size[2])
507 throw runtime_error("invalid global size must be a multiple of local size");
508
509 std::vector<struct clc_runtime_arg_info> argsinfo(args.size());
510
511 conf.args = argsinfo.data();
512 conf.support_global_work_id_offsets =
513 compile_args.work_props.global_offset_x != 0 ||
514 compile_args.work_props.global_offset_y != 0 ||
515 compile_args.work_props.global_offset_z != 0;
516 conf.support_workgroup_id_offsets =
517 compile_args.work_props.group_id_offset_x != 0 ||
518 compile_args.work_props.group_id_offset_y != 0 ||
519 compile_args.work_props.group_id_offset_z != 0;
520
521 for (unsigned i = 0; i < shader.dxil->kernel->num_args; ++i) {
522 RawShaderArg *arg = args[i];
523 size_t size = arg->get_elem_size() * arg->get_num_elems();
524
525 switch (shader.dxil->kernel->args[i].address_qualifier) {
526 case CLC_KERNEL_ARG_ADDRESS_LOCAL:
527 argsinfo[i].localptr.size = size;
528 break;
529 default:
530 break;
531 }
532 }
533
534 configure(shader, &conf);
535 validate(shader);
536
537 std::shared_ptr<struct clc_dxil_object> &dxil = shader.dxil;
538
539 std::vector<uint8_t> argsbuf(dxil->metadata.kernel_inputs_buf_size);
540 std::vector<ComPtr<ID3D12Resource>> argres(shader.dxil->kernel->num_args);
541 clc_work_properties_data work_props = compile_args.work_props;
542 if (!conf.support_workgroup_id_offsets) {
543 work_props.group_count_total_x = compile_args.x / conf.local_size[0];
544 work_props.group_count_total_y = compile_args.y / conf.local_size[1];
545 work_props.group_count_total_z = compile_args.z / conf.local_size[2];
546 }
547 if (work_props.work_dim == 0)
548 work_props.work_dim = 3;
549 Resources resources;
550
551 for (unsigned i = 0; i < dxil->kernel->num_args; ++i) {
552 RawShaderArg *arg = args[i];
553 size_t size = arg->get_elem_size() * arg->get_num_elems();
554 void *slot = argsbuf.data() + dxil->metadata.args[i].offset;
555
556 switch (dxil->kernel->args[i].address_qualifier) {
557 case CLC_KERNEL_ARG_ADDRESS_CONSTANT:
558 case CLC_KERNEL_ARG_ADDRESS_GLOBAL: {
559 assert(dxil->metadata.args[i].size == sizeof(uint64_t));
560 uint64_t *ptr_slot = (uint64_t *)slot;
561 if (arg->get_data())
562 *ptr_slot = (uint64_t)dxil->metadata.args[i].globconstptr.buf_id << 32;
563 else
564 *ptr_slot = ~0ull;
565 break;
566 }
567 case CLC_KERNEL_ARG_ADDRESS_LOCAL: {
568 assert(dxil->metadata.args[i].size == sizeof(uint64_t));
569 uint64_t *ptr_slot = (uint64_t *)slot;
570 *ptr_slot = dxil->metadata.args[i].localptr.sharedmem_offset;
571 break;
572 }
573 case CLC_KERNEL_ARG_ADDRESS_PRIVATE: {
574 assert(size == dxil->metadata.args[i].size);
575 memcpy(slot, arg->get_data(), size);
576 break;
577 }
578 default:
579 assert(0);
580 }
581 }
582
583 for (unsigned i = 0; i < dxil->kernel->num_args; ++i) {
584 RawShaderArg *arg = args[i];
585
586 if (dxil->kernel->args[i].address_qualifier == CLC_KERNEL_ARG_ADDRESS_GLOBAL ||
587 dxil->kernel->args[i].address_qualifier == CLC_KERNEL_ARG_ADDRESS_CONSTANT) {
588 argres[i] = add_uav_resource(resources, 0,
589 dxil->metadata.args[i].globconstptr.buf_id,
590 arg->get_data(), arg->get_num_elems(),
591 arg->get_elem_size());
592 }
593 }
594
595 if (dxil->metadata.printf.uav_id > 0)
596 add_uav_resource(resources, 0, dxil->metadata.printf.uav_id, NULL, 1024 * 1024 / 4, 4);
597
598 for (unsigned i = 0; i < dxil->metadata.num_consts; ++i)
599 add_uav_resource(resources, 0, dxil->metadata.consts[i].uav_id,
600 dxil->metadata.consts[i].data,
601 dxil->metadata.consts[i].size / 4, 4);
602
603 if (argsbuf.size())
604 add_cbv_resource(resources, 0, dxil->metadata.kernel_inputs_cbv_id,
605 argsbuf.data(), argsbuf.size());
606
607 add_cbv_resource(resources, 0, dxil->metadata.work_properties_cbv_id,
608 &work_props, sizeof(work_props));
609
610 auto root_sig = create_root_signature(resources);
611 auto pipeline_state = create_pipeline_state(root_sig, *dxil);
612
613 cmdlist->SetDescriptorHeaps(1, &uav_heap);
614 cmdlist->SetComputeRootSignature(root_sig.Get());
615 cmdlist->SetComputeRootDescriptorTable(0, GetGPUDescriptorHandleForHeapStart(uav_heap));
616 cmdlist->SetPipelineState(pipeline_state.Get());
617
618 cmdlist->Dispatch(compile_args.x / conf.local_size[0],
619 compile_args.y / conf.local_size[1],
620 compile_args.z / conf.local_size[2]);
621
622 for (auto &range : resources.ranges) {
623 if (range.RangeType == D3D12_DESCRIPTOR_RANGE_TYPE_UAV) {
624 for (unsigned i = range.OffsetInDescriptorsFromTableStart;
625 i < range.NumDescriptors; i++) {
626 if (!resources.descs[i].Get())
627 continue;
628
629 resource_barrier(resources.descs[i],
630 D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
631 D3D12_RESOURCE_STATE_COMMON);
632 }
633 }
634 }
635
636 execute_cmdlist();
637
638 for (unsigned i = 0; i < args.size(); i++) {
639 if (!(args[i]->get_direction() & SHADER_ARG_OUTPUT))
640 continue;
641
642 assert(dxil->kernel->args[i].address_qualifier == CLC_KERNEL_ARG_ADDRESS_GLOBAL);
643 get_buffer_data(argres[i], args[i]->get_data(),
644 args[i]->get_elem_size() * args[i]->get_num_elems());
645 }
646
647 ComPtr<ID3D12InfoQueue> info_queue;
648 dev->QueryInterface(info_queue.ReleaseAndGetAddressOf());
649 if (info_queue)
650 {
651 EXPECT_EQ(0, info_queue->GetNumStoredMessages());
652 for (unsigned i = 0; i < info_queue->GetNumStoredMessages(); ++i) {
653 SIZE_T message_size = 0;
654 info_queue->GetMessageA(i, nullptr, &message_size);
655 D3D12_MESSAGE* message = (D3D12_MESSAGE*)malloc(message_size);
656 info_queue->GetMessageA(i, message, &message_size);
657 FAIL() << message->pDescription;
658 free(message);
659 }
660 }
661 }
662
663 void
SetUp()664 ComputeTest::SetUp()
665 {
666 static struct clc_libclc *compiler_ctx_g = nullptr;
667
668 if (!compiler_ctx_g) {
669 clc_libclc_dxil_options options = { };
670 options.optimize = (debug_get_option_debug_compute() & COMPUTE_DEBUG_OPTIMIZE_LIBCLC) != 0;
671
672 compiler_ctx_g = clc_libclc_new_dxil(&logger, &options);
673 if (!compiler_ctx_g)
674 throw runtime_error("failed to create CLC compiler context");
675
676 if (debug_get_option_debug_compute() & COMPUTE_DEBUG_SERIALIZE_LIBCLC) {
677 void *serialized = nullptr;
678 size_t serialized_size = 0;
679 clc_libclc_serialize(compiler_ctx_g, &serialized, &serialized_size);
680 if (!serialized)
681 throw runtime_error("failed to serialize CLC compiler context");
682
683 clc_free_libclc(compiler_ctx_g);
684 compiler_ctx_g = nullptr;
685
686 compiler_ctx_g = clc_libclc_deserialize(serialized, serialized_size);
687 if (!compiler_ctx_g)
688 throw runtime_error("failed to deserialize CLC compiler context");
689
690 clc_libclc_free_serialized(serialized);
691 }
692 }
693 compiler_ctx = compiler_ctx_g;
694
695 enable_d3d12_debug_layer();
696
697 factory = get_dxgi_factory();
698 if (!factory)
699 throw runtime_error("failed to create DXGI factory");
700
701 adapter = choose_adapter(factory);
702 if (!adapter)
703 throw runtime_error("failed to choose adapter");
704
705 dev = create_device(adapter);
706 if (!dev)
707 throw runtime_error("failed to create device");
708
709 if (FAILED(dev->CreateFence(0, D3D12_FENCE_FLAG_NONE,
710 __uuidof(cmdqueue_fence),
711 (void **)&cmdqueue_fence)))
712 throw runtime_error("failed to create fence\n");
713
714 D3D12_COMMAND_QUEUE_DESC queue_desc;
715 queue_desc.Type = D3D12_COMMAND_LIST_TYPE_COMPUTE;
716 queue_desc.Priority = D3D12_COMMAND_QUEUE_PRIORITY_NORMAL;
717 queue_desc.Flags = D3D12_COMMAND_QUEUE_FLAG_NONE;
718 queue_desc.NodeMask = 0;
719 if (FAILED(dev->CreateCommandQueue(&queue_desc,
720 __uuidof(cmdqueue),
721 (void **)&cmdqueue)))
722 throw runtime_error("failed to create command queue");
723
724 if (FAILED(dev->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_COMPUTE,
725 __uuidof(cmdalloc), (void **)&cmdalloc)))
726 throw runtime_error("failed to create command allocator");
727
728 if (FAILED(dev->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_COMPUTE,
729 cmdalloc, NULL, __uuidof(cmdlist), (void **)&cmdlist)))
730 throw runtime_error("failed to create command list");
731
732 D3D12_DESCRIPTOR_HEAP_DESC heap_desc;
733 heap_desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV;
734 heap_desc.NumDescriptors = 1000;
735 heap_desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE;
736 heap_desc.NodeMask = 0;
737 if (FAILED(dev->CreateDescriptorHeap(&heap_desc,
738 __uuidof(uav_heap), (void **)&uav_heap)))
739 throw runtime_error("failed to create descriptor heap");
740
741 uav_heap_incr = dev->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
742
743 event = CreateEvent(NULL, false, false, NULL);
744 if (!event)
745 throw runtime_error("Failed to create event");
746 fence_value = 1;
747 }
748
749 void
TearDown()750 ComputeTest::TearDown()
751 {
752 CloseHandle(event);
753
754 uav_heap->Release();
755 cmdlist->Release();
756 cmdalloc->Release();
757 cmdqueue->Release();
758 cmdqueue_fence->Release();
759 dev->Release();
760 adapter->Release();
761 factory->Release();
762 }
763
764 PFN_D3D12_SERIALIZE_VERSIONED_ROOT_SIGNATURE ComputeTest::D3D12SerializeVersionedRootSignature;
765
766 bool
validate_module(const struct clc_dxil_object & dxil)767 validate_module(const struct clc_dxil_object &dxil)
768 {
769 struct dxil_validator *val = dxil_create_validator(NULL);
770 char *err;
771 bool res = dxil_validate_module(val, dxil.binary.data,
772 dxil.binary.size, &err);
773 if (!res && err)
774 fprintf(stderr, "D3D12: validation failed: %s", err);
775
776 dxil_destroy_validator(val);
777 return res;
778 }
779
780 static void
dump_blob(const char * path,const struct clc_dxil_object & dxil)781 dump_blob(const char *path, const struct clc_dxil_object &dxil)
782 {
783 FILE *fp = fopen(path, "wb");
784 if (fp) {
785 fwrite(dxil.binary.data, 1, dxil.binary.size, fp);
786 fclose(fp);
787 printf("D3D12: wrote '%s'...\n", path);
788 }
789 }
790
791 ComputeTest::Shader
compile(const std::vector<const char * > & sources,const std::vector<const char * > & compile_args,bool create_library)792 ComputeTest::compile(const std::vector<const char *> &sources,
793 const std::vector<const char *> &compile_args,
794 bool create_library)
795 {
796 struct clc_compile_args args = {
797 };
798 args.args = compile_args.data();
799 args.num_args = (unsigned)compile_args.size();
800 args.features.images = true;
801 args.features.images_read_write = true;
802 args.features.int64 = true;
803 ComputeTest::Shader shader;
804
805 std::vector<Shader> shaders;
806
807 args.source.name = "obj.cl";
808
809 for (unsigned i = 0; i < sources.size(); i++) {
810 args.source.value = sources[i];
811
812 clc_binary spirv{};
813 if (!clc_compile_c_to_spirv(&args, &logger, &spirv))
814 throw runtime_error("failed to compile object!");
815
816 Shader shader;
817 shader.obj = std::shared_ptr<clc_binary>(new clc_binary(spirv), [](clc_binary *spirv)
818 {
819 clc_free_spirv(spirv);
820 delete spirv;
821 });
822 shaders.push_back(shader);
823 }
824
825 if (shaders.size() == 1 && create_library)
826 return shaders[0];
827
828 return link(shaders, create_library);
829 }
830
831 ComputeTest::Shader
link(const std::vector<Shader> & sources,bool create_library)832 ComputeTest::link(const std::vector<Shader> &sources,
833 bool create_library)
834 {
835 std::vector<const clc_binary*> objs;
836 for (auto& source : sources)
837 objs.push_back(&*source.obj);
838
839 struct clc_linker_args link_args = {};
840 link_args.in_objs = objs.data();
841 link_args.num_in_objs = (unsigned)objs.size();
842 link_args.create_library = create_library;
843 clc_binary spirv{};
844 if (!clc_link_spirv(&link_args, &logger, &spirv))
845 throw runtime_error("failed to link objects!");
846
847 ComputeTest::Shader shader;
848 shader.obj = std::shared_ptr<clc_binary>(new clc_binary(spirv), [](clc_binary *spirv)
849 {
850 clc_free_spirv(spirv);
851 delete spirv;
852 });
853 if (!link_args.create_library)
854 configure(shader, NULL);
855
856 return shader;
857 }
858
859 ComputeTest::Shader
assemble(const char * source)860 ComputeTest::assemble(const char *source)
861 {
862 spvtools::SpirvTools tools(SPV_ENV_UNIVERSAL_1_0);
863 std::vector<uint32_t> binary;
864 if (!tools.Assemble(source, strlen(source), &binary))
865 throw runtime_error("failed to assemble");
866
867 ComputeTest::Shader shader;
868 shader.obj = std::shared_ptr<clc_binary>(new clc_binary{}, [](clc_binary *spirv)
869 {
870 free(spirv->data);
871 delete spirv;
872 });
873 shader.obj->size = binary.size() * 4;
874 shader.obj->data = malloc(shader.obj->size);
875 memcpy(shader.obj->data, binary.data(), shader.obj->size);
876
877 configure(shader, NULL);
878
879 return shader;
880 }
881
882 void
configure(Shader & shader,const struct clc_runtime_kernel_conf * conf)883 ComputeTest::configure(Shader &shader,
884 const struct clc_runtime_kernel_conf *conf)
885 {
886 if (!shader.metadata) {
887 shader.metadata = std::shared_ptr<clc_parsed_spirv>(new clc_parsed_spirv{}, [](clc_parsed_spirv *metadata)
888 {
889 clc_free_parsed_spirv(metadata);
890 delete metadata;
891 });
892 if (!clc_parse_spirv(shader.obj.get(), NULL, shader.metadata.get()))
893 throw runtime_error("failed to parse spirv!");
894 }
895
896 std::unique_ptr<clc_dxil_object> dxil(new clc_dxil_object{});
897 if (!clc_spirv_to_dxil(compiler_ctx, shader.obj.get(), shader.metadata.get(), "main_test", conf, nullptr, &logger, dxil.get()))
898 throw runtime_error("failed to compile kernel!");
899 shader.dxil = std::shared_ptr<clc_dxil_object>(dxil.release(), [](clc_dxil_object *dxil)
900 {
901 clc_free_dxil_object(dxil);
902 delete dxil;
903 });
904 }
905
906 void
validate(ComputeTest::Shader & shader)907 ComputeTest::validate(ComputeTest::Shader &shader)
908 {
909 dump_blob("unsigned.cso", *shader.dxil);
910 if (!validate_module(*shader.dxil))
911 throw runtime_error("failed to validate module!");
912
913 dump_blob("signed.cso", *shader.dxil);
914 }
915