1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2016
5 // Mehdi Goli Codeplay Software Ltd.
6 // Ralph Potter Codeplay Software Ltd.
7 // Luke Iwanski Codeplay Software Ltd.
8 // Contact: <[email protected]>
9 // Benoit Steiner <[email protected]>
10 //
11 // This Source Code Form is subject to the terms of the Mozilla
12 // Public License v. 2.0. If a copy of the MPL was not distributed
13 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
14
15
16 #define EIGEN_TEST_NO_LONGDOUBLE
17 #define EIGEN_TEST_NO_COMPLEX
18
19 #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
20 #define EIGEN_USE_SYCL
21
22 #include "main.h"
23
24 #include <Eigen/CXX11/Tensor>
25
26 using Eigen::Tensor;
27
28 template <typename DataType, int DataLayout, typename IndexType>
test_static_chip_sycl(const Eigen::SyclDevice & sycl_device)29 static void test_static_chip_sycl(const Eigen::SyclDevice& sycl_device)
30 {
31 IndexType sizeDim1 = 2;
32 IndexType sizeDim2 = 3;
33 IndexType sizeDim3 = 5;
34 IndexType sizeDim4 = 7;
35 IndexType sizeDim5 = 11;
36
37 array<IndexType, 5> tensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4, sizeDim5}};
38 array<IndexType, 4> chip1TensorRange = {{sizeDim2, sizeDim3, sizeDim4, sizeDim5}};
39
40 Tensor<DataType, 5, DataLayout,IndexType> tensor(tensorRange);
41 Tensor<DataType, 4, DataLayout,IndexType> chip1(chip1TensorRange);
42
43 tensor.setRandom();
44
45 const size_t tensorBuffSize =tensor.size()*sizeof(DataType);
46 const size_t chip1TensorBuffSize =chip1.size()*sizeof(DataType);
47 DataType* gpu_data_tensor = static_cast<DataType*>(sycl_device.allocate(tensorBuffSize));
48 DataType* gpu_data_chip1 = static_cast<DataType*>(sycl_device.allocate(chip1TensorBuffSize));
49
50 TensorMap<Tensor<DataType, 5, DataLayout,IndexType>> gpu_tensor(gpu_data_tensor, tensorRange);
51 TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip1(gpu_data_chip1, chip1TensorRange);
52
53 sycl_device.memcpyHostToDevice(gpu_data_tensor, tensor.data(), tensorBuffSize);
54 gpu_chip1.device(sycl_device)=gpu_tensor.template chip<0l>(1l);
55 sycl_device.memcpyDeviceToHost(chip1.data(), gpu_data_chip1, chip1TensorBuffSize);
56
57 VERIFY_IS_EQUAL(chip1.dimension(0), sizeDim2);
58 VERIFY_IS_EQUAL(chip1.dimension(1), sizeDim3);
59 VERIFY_IS_EQUAL(chip1.dimension(2), sizeDim4);
60 VERIFY_IS_EQUAL(chip1.dimension(3), sizeDim5);
61
62 for (IndexType i = 0; i < sizeDim2; ++i) {
63 for (IndexType j = 0; j < sizeDim3; ++j) {
64 for (IndexType k = 0; k < sizeDim4; ++k) {
65 for (IndexType l = 0; l < sizeDim5; ++l) {
66 VERIFY_IS_EQUAL(chip1(i,j,k,l), tensor(1l,i,j,k,l));
67 }
68 }
69 }
70 }
71
72 array<IndexType, 4> chip2TensorRange = {{sizeDim1, sizeDim3, sizeDim4, sizeDim5}};
73 Tensor<DataType, 4, DataLayout,IndexType> chip2(chip2TensorRange);
74 const size_t chip2TensorBuffSize =chip2.size()*sizeof(DataType);
75 DataType* gpu_data_chip2 = static_cast<DataType*>(sycl_device.allocate(chip2TensorBuffSize));
76 TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip2(gpu_data_chip2, chip2TensorRange);
77
78 gpu_chip2.device(sycl_device)=gpu_tensor.template chip<1l>(1l);
79 sycl_device.memcpyDeviceToHost(chip2.data(), gpu_data_chip2, chip2TensorBuffSize);
80
81 VERIFY_IS_EQUAL(chip2.dimension(0), sizeDim1);
82 VERIFY_IS_EQUAL(chip2.dimension(1), sizeDim3);
83 VERIFY_IS_EQUAL(chip2.dimension(2), sizeDim4);
84 VERIFY_IS_EQUAL(chip2.dimension(3), sizeDim5);
85
86 for (IndexType i = 0; i < sizeDim1; ++i) {
87 for (IndexType j = 0; j < sizeDim3; ++j) {
88 for (IndexType k = 0; k < sizeDim4; ++k) {
89 for (IndexType l = 0; l < sizeDim5; ++l) {
90 VERIFY_IS_EQUAL(chip2(i,j,k,l), tensor(i,1l,j,k,l));
91 }
92 }
93 }
94 }
95
96 array<IndexType, 4> chip3TensorRange = {{sizeDim1, sizeDim2, sizeDim4, sizeDim5}};
97 Tensor<DataType, 4, DataLayout,IndexType> chip3(chip3TensorRange);
98 const size_t chip3TensorBuffSize =chip3.size()*sizeof(DataType);
99 DataType* gpu_data_chip3 = static_cast<DataType*>(sycl_device.allocate(chip3TensorBuffSize));
100 TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip3(gpu_data_chip3, chip3TensorRange);
101
102 gpu_chip3.device(sycl_device)=gpu_tensor.template chip<2l>(2l);
103 sycl_device.memcpyDeviceToHost(chip3.data(), gpu_data_chip3, chip3TensorBuffSize);
104
105 VERIFY_IS_EQUAL(chip3.dimension(0), sizeDim1);
106 VERIFY_IS_EQUAL(chip3.dimension(1), sizeDim2);
107 VERIFY_IS_EQUAL(chip3.dimension(2), sizeDim4);
108 VERIFY_IS_EQUAL(chip3.dimension(3), sizeDim5);
109
110 for (IndexType i = 0; i < sizeDim1; ++i) {
111 for (IndexType j = 0; j < sizeDim2; ++j) {
112 for (IndexType k = 0; k < sizeDim4; ++k) {
113 for (IndexType l = 0; l < sizeDim5; ++l) {
114 VERIFY_IS_EQUAL(chip3(i,j,k,l), tensor(i,j,2l,k,l));
115 }
116 }
117 }
118 }
119
120 array<IndexType, 4> chip4TensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim5}};
121 Tensor<DataType, 4, DataLayout,IndexType> chip4(chip4TensorRange);
122 const size_t chip4TensorBuffSize =chip4.size()*sizeof(DataType);
123 DataType* gpu_data_chip4 = static_cast<DataType*>(sycl_device.allocate(chip4TensorBuffSize));
124 TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip4(gpu_data_chip4, chip4TensorRange);
125
126 gpu_chip4.device(sycl_device)=gpu_tensor.template chip<3l>(5l);
127 sycl_device.memcpyDeviceToHost(chip4.data(), gpu_data_chip4, chip4TensorBuffSize);
128
129 VERIFY_IS_EQUAL(chip4.dimension(0), sizeDim1);
130 VERIFY_IS_EQUAL(chip4.dimension(1), sizeDim2);
131 VERIFY_IS_EQUAL(chip4.dimension(2), sizeDim3);
132 VERIFY_IS_EQUAL(chip4.dimension(3), sizeDim5);
133
134 for (IndexType i = 0; i < sizeDim1; ++i) {
135 for (IndexType j = 0; j < sizeDim2; ++j) {
136 for (IndexType k = 0; k < sizeDim3; ++k) {
137 for (IndexType l = 0; l < sizeDim5; ++l) {
138 VERIFY_IS_EQUAL(chip4(i,j,k,l), tensor(i,j,k,5l,l));
139 }
140 }
141 }
142 }
143
144
145 array<IndexType, 4> chip5TensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4}};
146 Tensor<DataType, 4, DataLayout,IndexType> chip5(chip5TensorRange);
147 const size_t chip5TensorBuffSize =chip5.size()*sizeof(DataType);
148 DataType* gpu_data_chip5 = static_cast<DataType*>(sycl_device.allocate(chip5TensorBuffSize));
149 TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip5(gpu_data_chip5, chip5TensorRange);
150
151 gpu_chip5.device(sycl_device)=gpu_tensor.template chip<4l>(7l);
152 sycl_device.memcpyDeviceToHost(chip5.data(), gpu_data_chip5, chip5TensorBuffSize);
153
154 VERIFY_IS_EQUAL(chip5.dimension(0), sizeDim1);
155 VERIFY_IS_EQUAL(chip5.dimension(1), sizeDim2);
156 VERIFY_IS_EQUAL(chip5.dimension(2), sizeDim3);
157 VERIFY_IS_EQUAL(chip5.dimension(3), sizeDim4);
158
159 for (IndexType i = 0; i < sizeDim1; ++i) {
160 for (IndexType j = 0; j < sizeDim2; ++j) {
161 for (IndexType k = 0; k < sizeDim3; ++k) {
162 for (IndexType l = 0; l < sizeDim4; ++l) {
163 VERIFY_IS_EQUAL(chip5(i,j,k,l), tensor(i,j,k,l,7l));
164 }
165 }
166 }
167 }
168
169 sycl_device.deallocate(gpu_data_tensor);
170 sycl_device.deallocate(gpu_data_chip1);
171 sycl_device.deallocate(gpu_data_chip2);
172 sycl_device.deallocate(gpu_data_chip3);
173 sycl_device.deallocate(gpu_data_chip4);
174 sycl_device.deallocate(gpu_data_chip5);
175 }
176
177 template <typename DataType, int DataLayout, typename IndexType>
test_dynamic_chip_sycl(const Eigen::SyclDevice & sycl_device)178 static void test_dynamic_chip_sycl(const Eigen::SyclDevice& sycl_device)
179 {
180 IndexType sizeDim1 = 2;
181 IndexType sizeDim2 = 3;
182 IndexType sizeDim3 = 5;
183 IndexType sizeDim4 = 7;
184 IndexType sizeDim5 = 11;
185
186 array<IndexType, 5> tensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4, sizeDim5}};
187 array<IndexType, 4> chip1TensorRange = {{sizeDim2, sizeDim3, sizeDim4, sizeDim5}};
188
189 Tensor<DataType, 5, DataLayout,IndexType> tensor(tensorRange);
190 Tensor<DataType, 4, DataLayout,IndexType> chip1(chip1TensorRange);
191
192 tensor.setRandom();
193
194 const size_t tensorBuffSize =tensor.size()*sizeof(DataType);
195 const size_t chip1TensorBuffSize =chip1.size()*sizeof(DataType);
196 DataType* gpu_data_tensor = static_cast<DataType*>(sycl_device.allocate(tensorBuffSize));
197 DataType* gpu_data_chip1 = static_cast<DataType*>(sycl_device.allocate(chip1TensorBuffSize));
198
199 TensorMap<Tensor<DataType, 5, DataLayout,IndexType>> gpu_tensor(gpu_data_tensor, tensorRange);
200 TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip1(gpu_data_chip1, chip1TensorRange);
201
202 sycl_device.memcpyHostToDevice(gpu_data_tensor, tensor.data(), tensorBuffSize);
203 gpu_chip1.device(sycl_device)=gpu_tensor.chip(1l,0l);
204 sycl_device.memcpyDeviceToHost(chip1.data(), gpu_data_chip1, chip1TensorBuffSize);
205
206 VERIFY_IS_EQUAL(chip1.dimension(0), sizeDim2);
207 VERIFY_IS_EQUAL(chip1.dimension(1), sizeDim3);
208 VERIFY_IS_EQUAL(chip1.dimension(2), sizeDim4);
209 VERIFY_IS_EQUAL(chip1.dimension(3), sizeDim5);
210
211 for (IndexType i = 0; i < sizeDim2; ++i) {
212 for (IndexType j = 0; j < sizeDim3; ++j) {
213 for (IndexType k = 0; k < sizeDim4; ++k) {
214 for (IndexType l = 0; l < sizeDim5; ++l) {
215 VERIFY_IS_EQUAL(chip1(i,j,k,l), tensor(1l,i,j,k,l));
216 }
217 }
218 }
219 }
220
221 array<IndexType, 4> chip2TensorRange = {{sizeDim1, sizeDim3, sizeDim4, sizeDim5}};
222 Tensor<DataType, 4, DataLayout,IndexType> chip2(chip2TensorRange);
223 const size_t chip2TensorBuffSize =chip2.size()*sizeof(DataType);
224 DataType* gpu_data_chip2 = static_cast<DataType*>(sycl_device.allocate(chip2TensorBuffSize));
225 TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip2(gpu_data_chip2, chip2TensorRange);
226
227 gpu_chip2.device(sycl_device)=gpu_tensor.chip(1l,1l);
228 sycl_device.memcpyDeviceToHost(chip2.data(), gpu_data_chip2, chip2TensorBuffSize);
229
230 VERIFY_IS_EQUAL(chip2.dimension(0), sizeDim1);
231 VERIFY_IS_EQUAL(chip2.dimension(1), sizeDim3);
232 VERIFY_IS_EQUAL(chip2.dimension(2), sizeDim4);
233 VERIFY_IS_EQUAL(chip2.dimension(3), sizeDim5);
234
235 for (IndexType i = 0; i < sizeDim1; ++i) {
236 for (IndexType j = 0; j < sizeDim3; ++j) {
237 for (IndexType k = 0; k < sizeDim4; ++k) {
238 for (IndexType l = 0; l < sizeDim5; ++l) {
239 VERIFY_IS_EQUAL(chip2(i,j,k,l), tensor(i,1l,j,k,l));
240 }
241 }
242 }
243 }
244
245 array<IndexType, 4> chip3TensorRange = {{sizeDim1, sizeDim2, sizeDim4, sizeDim5}};
246 Tensor<DataType, 4, DataLayout,IndexType> chip3(chip3TensorRange);
247 const size_t chip3TensorBuffSize =chip3.size()*sizeof(DataType);
248 DataType* gpu_data_chip3 = static_cast<DataType*>(sycl_device.allocate(chip3TensorBuffSize));
249 TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip3(gpu_data_chip3, chip3TensorRange);
250
251 gpu_chip3.device(sycl_device)=gpu_tensor.chip(2l,2l);
252 sycl_device.memcpyDeviceToHost(chip3.data(), gpu_data_chip3, chip3TensorBuffSize);
253
254 VERIFY_IS_EQUAL(chip3.dimension(0), sizeDim1);
255 VERIFY_IS_EQUAL(chip3.dimension(1), sizeDim2);
256 VERIFY_IS_EQUAL(chip3.dimension(2), sizeDim4);
257 VERIFY_IS_EQUAL(chip3.dimension(3), sizeDim5);
258
259 for (IndexType i = 0; i < sizeDim1; ++i) {
260 for (IndexType j = 0; j < sizeDim2; ++j) {
261 for (IndexType k = 0; k < sizeDim4; ++k) {
262 for (IndexType l = 0; l < sizeDim5; ++l) {
263 VERIFY_IS_EQUAL(chip3(i,j,k,l), tensor(i,j,2l,k,l));
264 }
265 }
266 }
267 }
268
269 array<IndexType, 4> chip4TensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim5}};
270 Tensor<DataType, 4, DataLayout,IndexType> chip4(chip4TensorRange);
271 const size_t chip4TensorBuffSize =chip4.size()*sizeof(DataType);
272 DataType* gpu_data_chip4 = static_cast<DataType*>(sycl_device.allocate(chip4TensorBuffSize));
273 TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip4(gpu_data_chip4, chip4TensorRange);
274
275 gpu_chip4.device(sycl_device)=gpu_tensor.chip(5l,3l);
276 sycl_device.memcpyDeviceToHost(chip4.data(), gpu_data_chip4, chip4TensorBuffSize);
277
278 VERIFY_IS_EQUAL(chip4.dimension(0), sizeDim1);
279 VERIFY_IS_EQUAL(chip4.dimension(1), sizeDim2);
280 VERIFY_IS_EQUAL(chip4.dimension(2), sizeDim3);
281 VERIFY_IS_EQUAL(chip4.dimension(3), sizeDim5);
282
283 for (IndexType i = 0; i < sizeDim1; ++i) {
284 for (IndexType j = 0; j < sizeDim2; ++j) {
285 for (IndexType k = 0; k < sizeDim3; ++k) {
286 for (IndexType l = 0; l < sizeDim5; ++l) {
287 VERIFY_IS_EQUAL(chip4(i,j,k,l), tensor(i,j,k,5l,l));
288 }
289 }
290 }
291 }
292
293
294 array<IndexType, 4> chip5TensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4}};
295 Tensor<DataType, 4, DataLayout,IndexType> chip5(chip5TensorRange);
296 const size_t chip5TensorBuffSize =chip5.size()*sizeof(DataType);
297 DataType* gpu_data_chip5 = static_cast<DataType*>(sycl_device.allocate(chip5TensorBuffSize));
298 TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip5(gpu_data_chip5, chip5TensorRange);
299
300 gpu_chip5.device(sycl_device)=gpu_tensor.chip(7l,4l);
301 sycl_device.memcpyDeviceToHost(chip5.data(), gpu_data_chip5, chip5TensorBuffSize);
302
303 VERIFY_IS_EQUAL(chip5.dimension(0), sizeDim1);
304 VERIFY_IS_EQUAL(chip5.dimension(1), sizeDim2);
305 VERIFY_IS_EQUAL(chip5.dimension(2), sizeDim3);
306 VERIFY_IS_EQUAL(chip5.dimension(3), sizeDim4);
307
308 for (IndexType i = 0; i < sizeDim1; ++i) {
309 for (IndexType j = 0; j < sizeDim2; ++j) {
310 for (IndexType k = 0; k < sizeDim3; ++k) {
311 for (IndexType l = 0; l < sizeDim4; ++l) {
312 VERIFY_IS_EQUAL(chip5(i,j,k,l), tensor(i,j,k,l,7l));
313 }
314 }
315 }
316 }
317 sycl_device.deallocate(gpu_data_tensor);
318 sycl_device.deallocate(gpu_data_chip1);
319 sycl_device.deallocate(gpu_data_chip2);
320 sycl_device.deallocate(gpu_data_chip3);
321 sycl_device.deallocate(gpu_data_chip4);
322 sycl_device.deallocate(gpu_data_chip5);
323 }
324
325 template <typename DataType, int DataLayout, typename IndexType>
test_chip_in_expr(const Eigen::SyclDevice & sycl_device)326 static void test_chip_in_expr(const Eigen::SyclDevice& sycl_device) {
327
328 IndexType sizeDim1 = 2;
329 IndexType sizeDim2 = 3;
330 IndexType sizeDim3 = 5;
331 IndexType sizeDim4 = 7;
332 IndexType sizeDim5 = 11;
333
334 array<IndexType, 5> tensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4, sizeDim5}};
335 array<IndexType, 4> chip1TensorRange = {{sizeDim2, sizeDim3, sizeDim4, sizeDim5}};
336
337 Tensor<DataType, 5, DataLayout,IndexType> tensor(tensorRange);
338
339 Tensor<DataType, 4, DataLayout,IndexType> chip1(chip1TensorRange);
340 Tensor<DataType, 4, DataLayout,IndexType> tensor1(chip1TensorRange);
341 tensor.setRandom();
342 tensor1.setRandom();
343
344 const size_t tensorBuffSize =tensor.size()*sizeof(DataType);
345 const size_t chip1TensorBuffSize =chip1.size()*sizeof(DataType);
346 DataType* gpu_data_tensor = static_cast<DataType*>(sycl_device.allocate(tensorBuffSize));
347 DataType* gpu_data_chip1 = static_cast<DataType*>(sycl_device.allocate(chip1TensorBuffSize));
348 DataType* gpu_data_tensor1 = static_cast<DataType*>(sycl_device.allocate(chip1TensorBuffSize));
349
350 TensorMap<Tensor<DataType, 5, DataLayout,IndexType>> gpu_tensor(gpu_data_tensor, tensorRange);
351 TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip1(gpu_data_chip1, chip1TensorRange);
352 TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_tensor1(gpu_data_tensor1, chip1TensorRange);
353
354
355 sycl_device.memcpyHostToDevice(gpu_data_tensor, tensor.data(), tensorBuffSize);
356 sycl_device.memcpyHostToDevice(gpu_data_tensor1, tensor1.data(), chip1TensorBuffSize);
357 gpu_chip1.device(sycl_device)=gpu_tensor.template chip<0l>(0l) + gpu_tensor1;
358 sycl_device.memcpyDeviceToHost(chip1.data(), gpu_data_chip1, chip1TensorBuffSize);
359
360 for (int i = 0; i < sizeDim2; ++i) {
361 for (int j = 0; j < sizeDim3; ++j) {
362 for (int k = 0; k < sizeDim4; ++k) {
363 for (int l = 0; l < sizeDim5; ++l) {
364 float expected = tensor(0l,i,j,k,l) + tensor1(i,j,k,l);
365 VERIFY_IS_EQUAL(chip1(i,j,k,l), expected);
366 }
367 }
368 }
369 }
370
371 array<IndexType, 3> chip2TensorRange = {{sizeDim2, sizeDim4, sizeDim5}};
372 Tensor<DataType, 3, DataLayout,IndexType> tensor2(chip2TensorRange);
373 Tensor<DataType, 3, DataLayout,IndexType> chip2(chip2TensorRange);
374 tensor2.setRandom();
375 const size_t chip2TensorBuffSize =tensor2.size()*sizeof(DataType);
376 DataType* gpu_data_tensor2 = static_cast<DataType*>(sycl_device.allocate(chip2TensorBuffSize));
377 DataType* gpu_data_chip2 = static_cast<DataType*>(sycl_device.allocate(chip2TensorBuffSize));
378 TensorMap<Tensor<DataType, 3, DataLayout,IndexType>> gpu_tensor2(gpu_data_tensor2, chip2TensorRange);
379 TensorMap<Tensor<DataType, 3, DataLayout,IndexType>> gpu_chip2(gpu_data_chip2, chip2TensorRange);
380
381 sycl_device.memcpyHostToDevice(gpu_data_tensor2, tensor2.data(), chip2TensorBuffSize);
382 gpu_chip2.device(sycl_device)=gpu_tensor.template chip<0l>(0l).template chip<1l>(2l) + gpu_tensor2;
383 sycl_device.memcpyDeviceToHost(chip2.data(), gpu_data_chip2, chip2TensorBuffSize);
384
385 for (int i = 0; i < sizeDim2; ++i) {
386 for (int j = 0; j < sizeDim4; ++j) {
387 for (int k = 0; k < sizeDim5; ++k) {
388 float expected = tensor(0l,i,2l,j,k) + tensor2(i,j,k);
389 VERIFY_IS_EQUAL(chip2(i,j,k), expected);
390 }
391 }
392 }
393 sycl_device.deallocate(gpu_data_tensor);
394 sycl_device.deallocate(gpu_data_tensor1);
395 sycl_device.deallocate(gpu_data_chip1);
396 sycl_device.deallocate(gpu_data_tensor2);
397 sycl_device.deallocate(gpu_data_chip2);
398 }
399
400 template <typename DataType, int DataLayout, typename IndexType>
test_chip_as_lvalue_sycl(const Eigen::SyclDevice & sycl_device)401 static void test_chip_as_lvalue_sycl(const Eigen::SyclDevice& sycl_device)
402 {
403
404 IndexType sizeDim1 = 2;
405 IndexType sizeDim2 = 3;
406 IndexType sizeDim3 = 5;
407 IndexType sizeDim4 = 7;
408 IndexType sizeDim5 = 11;
409
410 array<IndexType, 5> tensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4, sizeDim5}};
411 array<IndexType, 4> input2TensorRange = {{sizeDim2, sizeDim3, sizeDim4, sizeDim5}};
412
413 Tensor<DataType, 5, DataLayout,IndexType> tensor(tensorRange);
414 Tensor<DataType, 5, DataLayout,IndexType> input1(tensorRange);
415 Tensor<DataType, 4, DataLayout,IndexType> input2(input2TensorRange);
416 input1.setRandom();
417 input2.setRandom();
418
419
420 const size_t tensorBuffSize =tensor.size()*sizeof(DataType);
421 const size_t input2TensorBuffSize =input2.size()*sizeof(DataType);
422 std::cout << tensorBuffSize << " , "<< input2TensorBuffSize << std::endl;
423 DataType* gpu_data_tensor = static_cast<DataType*>(sycl_device.allocate(tensorBuffSize));
424 DataType* gpu_data_input1 = static_cast<DataType*>(sycl_device.allocate(tensorBuffSize));
425 DataType* gpu_data_input2 = static_cast<DataType*>(sycl_device.allocate(input2TensorBuffSize));
426
427 TensorMap<Tensor<DataType, 5, DataLayout,IndexType>> gpu_tensor(gpu_data_tensor, tensorRange);
428 TensorMap<Tensor<DataType, 5, DataLayout,IndexType>> gpu_input1(gpu_data_input1, tensorRange);
429 TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_input2(gpu_data_input2, input2TensorRange);
430
431 sycl_device.memcpyHostToDevice(gpu_data_input1, input1.data(), tensorBuffSize);
432 gpu_tensor.device(sycl_device)=gpu_input1;
433 sycl_device.memcpyHostToDevice(gpu_data_input2, input2.data(), input2TensorBuffSize);
434 gpu_tensor.template chip<0l>(1l).device(sycl_device)=gpu_input2;
435 sycl_device.memcpyDeviceToHost(tensor.data(), gpu_data_tensor, tensorBuffSize);
436
437 for (int i = 0; i < sizeDim1; ++i) {
438 for (int j = 0; j < sizeDim2; ++j) {
439 for (int k = 0; k < sizeDim3; ++k) {
440 for (int l = 0; l < sizeDim4; ++l) {
441 for (int m = 0; m < sizeDim5; ++m) {
442 if (i != 1) {
443 VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input1(i,j,k,l,m));
444 } else {
445 VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input2(j,k,l,m));
446 }
447 }
448 }
449 }
450 }
451 }
452
453 gpu_tensor.device(sycl_device)=gpu_input1;
454 array<IndexType, 4> input3TensorRange = {{sizeDim1, sizeDim3, sizeDim4, sizeDim5}};
455 Tensor<DataType, 4, DataLayout,IndexType> input3(input3TensorRange);
456 input3.setRandom();
457
458 const size_t input3TensorBuffSize =input3.size()*sizeof(DataType);
459 DataType* gpu_data_input3 = static_cast<DataType*>(sycl_device.allocate(input3TensorBuffSize));
460 TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_input3(gpu_data_input3, input3TensorRange);
461
462 sycl_device.memcpyHostToDevice(gpu_data_input3, input3.data(), input3TensorBuffSize);
463 gpu_tensor.template chip<1l>(1l).device(sycl_device)=gpu_input3;
464 sycl_device.memcpyDeviceToHost(tensor.data(), gpu_data_tensor, tensorBuffSize);
465
466 for (int i = 0; i < sizeDim1; ++i) {
467 for (int j = 0; j < sizeDim2; ++j) {
468 for (int k = 0; k <sizeDim3; ++k) {
469 for (int l = 0; l < sizeDim4; ++l) {
470 for (int m = 0; m < sizeDim5; ++m) {
471 if (j != 1) {
472 VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input1(i,j,k,l,m));
473 } else {
474 VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input3(i,k,l,m));
475 }
476 }
477 }
478 }
479 }
480 }
481
482 gpu_tensor.device(sycl_device)=gpu_input1;
483 array<IndexType, 4> input4TensorRange = {{sizeDim1, sizeDim2, sizeDim4, sizeDim5}};
484 Tensor<DataType, 4, DataLayout,IndexType> input4(input4TensorRange);
485 input4.setRandom();
486
487 const size_t input4TensorBuffSize =input4.size()*sizeof(DataType);
488 DataType* gpu_data_input4 = static_cast<DataType*>(sycl_device.allocate(input4TensorBuffSize));
489 TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_input4(gpu_data_input4, input4TensorRange);
490
491 sycl_device.memcpyHostToDevice(gpu_data_input4, input4.data(), input4TensorBuffSize);
492 gpu_tensor.template chip<2l>(3l).device(sycl_device)=gpu_input4;
493 sycl_device.memcpyDeviceToHost(tensor.data(), gpu_data_tensor, tensorBuffSize);
494
495 for (int i = 0; i < sizeDim1; ++i) {
496 for (int j = 0; j < sizeDim2; ++j) {
497 for (int k = 0; k <sizeDim3; ++k) {
498 for (int l = 0; l < sizeDim4; ++l) {
499 for (int m = 0; m < sizeDim5; ++m) {
500 if (k != 3) {
501 VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input1(i,j,k,l,m));
502 } else {
503 VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input4(i,j,l,m));
504 }
505 }
506 }
507 }
508 }
509 }
510
511 gpu_tensor.device(sycl_device)=gpu_input1;
512 array<IndexType, 4> input5TensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim5}};
513 Tensor<DataType, 4, DataLayout,IndexType> input5(input5TensorRange);
514 input5.setRandom();
515
516 const size_t input5TensorBuffSize =input5.size()*sizeof(DataType);
517 DataType* gpu_data_input5 = static_cast<DataType*>(sycl_device.allocate(input5TensorBuffSize));
518 TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_input5(gpu_data_input5, input5TensorRange);
519
520 sycl_device.memcpyHostToDevice(gpu_data_input5, input5.data(), input5TensorBuffSize);
521 gpu_tensor.template chip<3l>(4l).device(sycl_device)=gpu_input5;
522 sycl_device.memcpyDeviceToHost(tensor.data(), gpu_data_tensor, tensorBuffSize);
523
524 for (int i = 0; i < sizeDim1; ++i) {
525 for (int j = 0; j < sizeDim2; ++j) {
526 for (int k = 0; k <sizeDim3; ++k) {
527 for (int l = 0; l < sizeDim4; ++l) {
528 for (int m = 0; m < sizeDim5; ++m) {
529 if (l != 4) {
530 VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input1(i,j,k,l,m));
531 } else {
532 VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input5(i,j,k,m));
533 }
534 }
535 }
536 }
537 }
538 }
539 gpu_tensor.device(sycl_device)=gpu_input1;
540 array<IndexType, 4> input6TensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4}};
541 Tensor<DataType, 4, DataLayout,IndexType> input6(input6TensorRange);
542 input6.setRandom();
543
544 const size_t input6TensorBuffSize =input6.size()*sizeof(DataType);
545 DataType* gpu_data_input6 = static_cast<DataType*>(sycl_device.allocate(input6TensorBuffSize));
546 TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_input6(gpu_data_input6, input6TensorRange);
547
548 sycl_device.memcpyHostToDevice(gpu_data_input6, input6.data(), input6TensorBuffSize);
549 gpu_tensor.template chip<4l>(5l).device(sycl_device)=gpu_input6;
550 sycl_device.memcpyDeviceToHost(tensor.data(), gpu_data_tensor, tensorBuffSize);
551
552 for (int i = 0; i < sizeDim1; ++i) {
553 for (int j = 0; j < sizeDim2; ++j) {
554 for (int k = 0; k <sizeDim3; ++k) {
555 for (int l = 0; l < sizeDim4; ++l) {
556 for (int m = 0; m < sizeDim5; ++m) {
557 if (m != 5) {
558 VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input1(i,j,k,l,m));
559 } else {
560 VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input6(i,j,k,l));
561 }
562 }
563 }
564 }
565 }
566 }
567
568
569 gpu_tensor.device(sycl_device)=gpu_input1;
570 Tensor<DataType, 5, DataLayout,IndexType> input7(tensorRange);
571 input7.setRandom();
572
573 DataType* gpu_data_input7 = static_cast<DataType*>(sycl_device.allocate(tensorBuffSize));
574 TensorMap<Tensor<DataType, 5, DataLayout,IndexType>> gpu_input7(gpu_data_input7, tensorRange);
575
576 sycl_device.memcpyHostToDevice(gpu_data_input7, input7.data(), tensorBuffSize);
577 gpu_tensor.chip(0l,0l).device(sycl_device)=gpu_input7.chip(0l,0l);
578 sycl_device.memcpyDeviceToHost(tensor.data(), gpu_data_tensor, tensorBuffSize);
579
580 for (int i = 0; i < sizeDim1; ++i) {
581 for (int j = 0; j < sizeDim2; ++j) {
582 for (int k = 0; k <sizeDim3; ++k) {
583 for (int l = 0; l < sizeDim4; ++l) {
584 for (int m = 0; m < sizeDim5; ++m) {
585 if (i != 0) {
586 VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input1(i,j,k,l,m));
587 } else {
588 VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input7(i,j,k,l,m));
589 }
590 }
591 }
592 }
593 }
594 }
595 sycl_device.deallocate(gpu_data_tensor);
596 sycl_device.deallocate(gpu_data_input1);
597 sycl_device.deallocate(gpu_data_input2);
598 sycl_device.deallocate(gpu_data_input3);
599 sycl_device.deallocate(gpu_data_input4);
600 sycl_device.deallocate(gpu_data_input5);
601 sycl_device.deallocate(gpu_data_input6);
602 sycl_device.deallocate(gpu_data_input7);
603
604 }
605
sycl_chipping_test_per_device(dev_Selector s)606 template<typename DataType, typename dev_Selector> void sycl_chipping_test_per_device(dev_Selector s){
607 QueueInterface queueInterface(s);
608 auto sycl_device = Eigen::SyclDevice(&queueInterface);
609 /* test_static_chip_sycl<DataType, RowMajor, int64_t>(sycl_device);
610 test_static_chip_sycl<DataType, ColMajor, int64_t>(sycl_device);
611 test_dynamic_chip_sycl<DataType, RowMajor, int64_t>(sycl_device);
612 test_dynamic_chip_sycl<DataType, ColMajor, int64_t>(sycl_device);
613 test_chip_in_expr<DataType, RowMajor, int64_t>(sycl_device);
614 test_chip_in_expr<DataType, ColMajor, int64_t>(sycl_device);*/
615 test_chip_as_lvalue_sycl<DataType, RowMajor, int64_t>(sycl_device);
616 // test_chip_as_lvalue_sycl<DataType, ColMajor, int64_t>(sycl_device);
617 }
EIGEN_DECLARE_TEST(cxx11_tensor_chipping_sycl)618 EIGEN_DECLARE_TEST(cxx11_tensor_chipping_sycl)
619 {
620 for (const auto& device :Eigen::get_sycl_supported_devices()) {
621 CALL_SUBTEST(sycl_chipping_test_per_device<float>(device));
622 }
623 }
624