xref: /aosp_15_r20/external/eigen/unsupported/test/cxx11_tensor_chipping_sycl.cpp (revision bf2c37156dfe67e5dfebd6d394bad8b2ab5804d4)
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2016
5 // Mehdi Goli    Codeplay Software Ltd.
6 // Ralph Potter  Codeplay Software Ltd.
7 // Luke Iwanski  Codeplay Software Ltd.
8 // Contact: <[email protected]>
9 // Benoit Steiner <[email protected]>
10 //
11 // This Source Code Form is subject to the terms of the Mozilla
12 // Public License v. 2.0. If a copy of the MPL was not distributed
13 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
14 
15 
16 #define EIGEN_TEST_NO_LONGDOUBLE
17 #define EIGEN_TEST_NO_COMPLEX
18 
19 #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
20 #define EIGEN_USE_SYCL
21 
22 #include "main.h"
23 
24 #include <Eigen/CXX11/Tensor>
25 
26 using Eigen::Tensor;
27 
28 template <typename DataType, int DataLayout, typename IndexType>
test_static_chip_sycl(const Eigen::SyclDevice & sycl_device)29 static void test_static_chip_sycl(const Eigen::SyclDevice& sycl_device)
30 {
31   IndexType sizeDim1 = 2;
32   IndexType sizeDim2 = 3;
33   IndexType sizeDim3 = 5;
34   IndexType sizeDim4 = 7;
35   IndexType sizeDim5 = 11;
36 
37   array<IndexType, 5> tensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4, sizeDim5}};
38   array<IndexType, 4> chip1TensorRange = {{sizeDim2, sizeDim3, sizeDim4, sizeDim5}};
39 
40   Tensor<DataType, 5, DataLayout,IndexType> tensor(tensorRange);
41   Tensor<DataType, 4, DataLayout,IndexType> chip1(chip1TensorRange);
42 
43   tensor.setRandom();
44 
45   const size_t tensorBuffSize =tensor.size()*sizeof(DataType);
46   const size_t chip1TensorBuffSize =chip1.size()*sizeof(DataType);
47   DataType* gpu_data_tensor  = static_cast<DataType*>(sycl_device.allocate(tensorBuffSize));
48   DataType* gpu_data_chip1  = static_cast<DataType*>(sycl_device.allocate(chip1TensorBuffSize));
49 
50   TensorMap<Tensor<DataType, 5, DataLayout,IndexType>> gpu_tensor(gpu_data_tensor, tensorRange);
51   TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip1(gpu_data_chip1, chip1TensorRange);
52 
53   sycl_device.memcpyHostToDevice(gpu_data_tensor, tensor.data(), tensorBuffSize);
54   gpu_chip1.device(sycl_device)=gpu_tensor.template chip<0l>(1l);
55   sycl_device.memcpyDeviceToHost(chip1.data(), gpu_data_chip1, chip1TensorBuffSize);
56 
57   VERIFY_IS_EQUAL(chip1.dimension(0), sizeDim2);
58   VERIFY_IS_EQUAL(chip1.dimension(1), sizeDim3);
59   VERIFY_IS_EQUAL(chip1.dimension(2), sizeDim4);
60   VERIFY_IS_EQUAL(chip1.dimension(3), sizeDim5);
61 
62   for (IndexType i = 0; i < sizeDim2; ++i) {
63     for (IndexType j = 0; j < sizeDim3; ++j) {
64       for (IndexType k = 0; k < sizeDim4; ++k) {
65         for (IndexType l = 0; l < sizeDim5; ++l) {
66           VERIFY_IS_EQUAL(chip1(i,j,k,l), tensor(1l,i,j,k,l));
67         }
68       }
69     }
70   }
71 
72   array<IndexType, 4> chip2TensorRange = {{sizeDim1, sizeDim3, sizeDim4, sizeDim5}};
73   Tensor<DataType, 4, DataLayout,IndexType> chip2(chip2TensorRange);
74   const size_t chip2TensorBuffSize =chip2.size()*sizeof(DataType);
75   DataType* gpu_data_chip2  = static_cast<DataType*>(sycl_device.allocate(chip2TensorBuffSize));
76   TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip2(gpu_data_chip2, chip2TensorRange);
77 
78   gpu_chip2.device(sycl_device)=gpu_tensor.template chip<1l>(1l);
79   sycl_device.memcpyDeviceToHost(chip2.data(), gpu_data_chip2, chip2TensorBuffSize);
80 
81   VERIFY_IS_EQUAL(chip2.dimension(0), sizeDim1);
82   VERIFY_IS_EQUAL(chip2.dimension(1), sizeDim3);
83   VERIFY_IS_EQUAL(chip2.dimension(2), sizeDim4);
84   VERIFY_IS_EQUAL(chip2.dimension(3), sizeDim5);
85 
86   for (IndexType i = 0; i < sizeDim1; ++i) {
87     for (IndexType j = 0; j < sizeDim3; ++j) {
88       for (IndexType k = 0; k < sizeDim4; ++k) {
89         for (IndexType l = 0; l < sizeDim5; ++l) {
90           VERIFY_IS_EQUAL(chip2(i,j,k,l), tensor(i,1l,j,k,l));
91         }
92       }
93     }
94   }
95 
96   array<IndexType, 4> chip3TensorRange = {{sizeDim1, sizeDim2, sizeDim4, sizeDim5}};
97   Tensor<DataType, 4, DataLayout,IndexType> chip3(chip3TensorRange);
98   const size_t chip3TensorBuffSize =chip3.size()*sizeof(DataType);
99   DataType* gpu_data_chip3  = static_cast<DataType*>(sycl_device.allocate(chip3TensorBuffSize));
100   TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip3(gpu_data_chip3, chip3TensorRange);
101 
102   gpu_chip3.device(sycl_device)=gpu_tensor.template chip<2l>(2l);
103   sycl_device.memcpyDeviceToHost(chip3.data(), gpu_data_chip3, chip3TensorBuffSize);
104 
105   VERIFY_IS_EQUAL(chip3.dimension(0), sizeDim1);
106   VERIFY_IS_EQUAL(chip3.dimension(1), sizeDim2);
107   VERIFY_IS_EQUAL(chip3.dimension(2), sizeDim4);
108   VERIFY_IS_EQUAL(chip3.dimension(3), sizeDim5);
109 
110   for (IndexType i = 0; i < sizeDim1; ++i) {
111     for (IndexType j = 0; j < sizeDim2; ++j) {
112       for (IndexType k = 0; k < sizeDim4; ++k) {
113         for (IndexType l = 0; l < sizeDim5; ++l) {
114           VERIFY_IS_EQUAL(chip3(i,j,k,l), tensor(i,j,2l,k,l));
115         }
116       }
117     }
118   }
119 
120   array<IndexType, 4> chip4TensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim5}};
121   Tensor<DataType, 4, DataLayout,IndexType> chip4(chip4TensorRange);
122   const size_t chip4TensorBuffSize =chip4.size()*sizeof(DataType);
123   DataType* gpu_data_chip4  = static_cast<DataType*>(sycl_device.allocate(chip4TensorBuffSize));
124   TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip4(gpu_data_chip4, chip4TensorRange);
125 
126   gpu_chip4.device(sycl_device)=gpu_tensor.template chip<3l>(5l);
127   sycl_device.memcpyDeviceToHost(chip4.data(), gpu_data_chip4, chip4TensorBuffSize);
128 
129   VERIFY_IS_EQUAL(chip4.dimension(0), sizeDim1);
130   VERIFY_IS_EQUAL(chip4.dimension(1), sizeDim2);
131   VERIFY_IS_EQUAL(chip4.dimension(2), sizeDim3);
132   VERIFY_IS_EQUAL(chip4.dimension(3), sizeDim5);
133 
134   for (IndexType i = 0; i < sizeDim1; ++i) {
135     for (IndexType j = 0; j < sizeDim2; ++j) {
136       for (IndexType k = 0; k < sizeDim3; ++k) {
137         for (IndexType l = 0; l < sizeDim5; ++l) {
138           VERIFY_IS_EQUAL(chip4(i,j,k,l), tensor(i,j,k,5l,l));
139         }
140       }
141     }
142   }
143 
144 
145   array<IndexType, 4> chip5TensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4}};
146   Tensor<DataType, 4, DataLayout,IndexType> chip5(chip5TensorRange);
147   const size_t chip5TensorBuffSize =chip5.size()*sizeof(DataType);
148   DataType* gpu_data_chip5  = static_cast<DataType*>(sycl_device.allocate(chip5TensorBuffSize));
149   TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip5(gpu_data_chip5, chip5TensorRange);
150 
151   gpu_chip5.device(sycl_device)=gpu_tensor.template chip<4l>(7l);
152   sycl_device.memcpyDeviceToHost(chip5.data(), gpu_data_chip5, chip5TensorBuffSize);
153 
154   VERIFY_IS_EQUAL(chip5.dimension(0), sizeDim1);
155   VERIFY_IS_EQUAL(chip5.dimension(1), sizeDim2);
156   VERIFY_IS_EQUAL(chip5.dimension(2), sizeDim3);
157   VERIFY_IS_EQUAL(chip5.dimension(3), sizeDim4);
158 
159   for (IndexType i = 0; i < sizeDim1; ++i) {
160     for (IndexType j = 0; j < sizeDim2; ++j) {
161       for (IndexType k = 0; k < sizeDim3; ++k) {
162         for (IndexType l = 0; l < sizeDim4; ++l) {
163           VERIFY_IS_EQUAL(chip5(i,j,k,l), tensor(i,j,k,l,7l));
164         }
165       }
166     }
167   }
168 
169   sycl_device.deallocate(gpu_data_tensor);
170   sycl_device.deallocate(gpu_data_chip1);
171   sycl_device.deallocate(gpu_data_chip2);
172   sycl_device.deallocate(gpu_data_chip3);
173   sycl_device.deallocate(gpu_data_chip4);
174   sycl_device.deallocate(gpu_data_chip5);
175 }
176 
177 template <typename DataType, int DataLayout, typename IndexType>
test_dynamic_chip_sycl(const Eigen::SyclDevice & sycl_device)178 static void test_dynamic_chip_sycl(const Eigen::SyclDevice& sycl_device)
179 {
180   IndexType sizeDim1 = 2;
181   IndexType sizeDim2 = 3;
182   IndexType sizeDim3 = 5;
183   IndexType sizeDim4 = 7;
184   IndexType sizeDim5 = 11;
185 
186   array<IndexType, 5> tensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4, sizeDim5}};
187   array<IndexType, 4> chip1TensorRange = {{sizeDim2, sizeDim3, sizeDim4, sizeDim5}};
188 
189   Tensor<DataType, 5, DataLayout,IndexType> tensor(tensorRange);
190   Tensor<DataType, 4, DataLayout,IndexType> chip1(chip1TensorRange);
191 
192   tensor.setRandom();
193 
194   const size_t tensorBuffSize =tensor.size()*sizeof(DataType);
195   const size_t chip1TensorBuffSize =chip1.size()*sizeof(DataType);
196   DataType* gpu_data_tensor  = static_cast<DataType*>(sycl_device.allocate(tensorBuffSize));
197   DataType* gpu_data_chip1  = static_cast<DataType*>(sycl_device.allocate(chip1TensorBuffSize));
198 
199   TensorMap<Tensor<DataType, 5, DataLayout,IndexType>> gpu_tensor(gpu_data_tensor, tensorRange);
200   TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip1(gpu_data_chip1, chip1TensorRange);
201 
202   sycl_device.memcpyHostToDevice(gpu_data_tensor, tensor.data(), tensorBuffSize);
203   gpu_chip1.device(sycl_device)=gpu_tensor.chip(1l,0l);
204   sycl_device.memcpyDeviceToHost(chip1.data(), gpu_data_chip1, chip1TensorBuffSize);
205 
206   VERIFY_IS_EQUAL(chip1.dimension(0), sizeDim2);
207   VERIFY_IS_EQUAL(chip1.dimension(1), sizeDim3);
208   VERIFY_IS_EQUAL(chip1.dimension(2), sizeDim4);
209   VERIFY_IS_EQUAL(chip1.dimension(3), sizeDim5);
210 
211   for (IndexType i = 0; i < sizeDim2; ++i) {
212     for (IndexType j = 0; j < sizeDim3; ++j) {
213       for (IndexType k = 0; k < sizeDim4; ++k) {
214         for (IndexType l = 0; l < sizeDim5; ++l) {
215           VERIFY_IS_EQUAL(chip1(i,j,k,l), tensor(1l,i,j,k,l));
216         }
217       }
218     }
219   }
220 
221   array<IndexType, 4> chip2TensorRange = {{sizeDim1, sizeDim3, sizeDim4, sizeDim5}};
222   Tensor<DataType, 4, DataLayout,IndexType> chip2(chip2TensorRange);
223   const size_t chip2TensorBuffSize =chip2.size()*sizeof(DataType);
224   DataType* gpu_data_chip2  = static_cast<DataType*>(sycl_device.allocate(chip2TensorBuffSize));
225   TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip2(gpu_data_chip2, chip2TensorRange);
226 
227   gpu_chip2.device(sycl_device)=gpu_tensor.chip(1l,1l);
228   sycl_device.memcpyDeviceToHost(chip2.data(), gpu_data_chip2, chip2TensorBuffSize);
229 
230   VERIFY_IS_EQUAL(chip2.dimension(0), sizeDim1);
231   VERIFY_IS_EQUAL(chip2.dimension(1), sizeDim3);
232   VERIFY_IS_EQUAL(chip2.dimension(2), sizeDim4);
233   VERIFY_IS_EQUAL(chip2.dimension(3), sizeDim5);
234 
235   for (IndexType i = 0; i < sizeDim1; ++i) {
236     for (IndexType j = 0; j < sizeDim3; ++j) {
237       for (IndexType k = 0; k < sizeDim4; ++k) {
238         for (IndexType l = 0; l < sizeDim5; ++l) {
239           VERIFY_IS_EQUAL(chip2(i,j,k,l), tensor(i,1l,j,k,l));
240         }
241       }
242     }
243   }
244 
245   array<IndexType, 4> chip3TensorRange = {{sizeDim1, sizeDim2, sizeDim4, sizeDim5}};
246   Tensor<DataType, 4, DataLayout,IndexType> chip3(chip3TensorRange);
247   const size_t chip3TensorBuffSize =chip3.size()*sizeof(DataType);
248   DataType* gpu_data_chip3  = static_cast<DataType*>(sycl_device.allocate(chip3TensorBuffSize));
249   TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip3(gpu_data_chip3, chip3TensorRange);
250 
251   gpu_chip3.device(sycl_device)=gpu_tensor.chip(2l,2l);
252   sycl_device.memcpyDeviceToHost(chip3.data(), gpu_data_chip3, chip3TensorBuffSize);
253 
254   VERIFY_IS_EQUAL(chip3.dimension(0), sizeDim1);
255   VERIFY_IS_EQUAL(chip3.dimension(1), sizeDim2);
256   VERIFY_IS_EQUAL(chip3.dimension(2), sizeDim4);
257   VERIFY_IS_EQUAL(chip3.dimension(3), sizeDim5);
258 
259   for (IndexType i = 0; i < sizeDim1; ++i) {
260     for (IndexType j = 0; j < sizeDim2; ++j) {
261       for (IndexType k = 0; k < sizeDim4; ++k) {
262         for (IndexType l = 0; l < sizeDim5; ++l) {
263           VERIFY_IS_EQUAL(chip3(i,j,k,l), tensor(i,j,2l,k,l));
264         }
265       }
266     }
267   }
268 
269   array<IndexType, 4> chip4TensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim5}};
270   Tensor<DataType, 4, DataLayout,IndexType> chip4(chip4TensorRange);
271   const size_t chip4TensorBuffSize =chip4.size()*sizeof(DataType);
272   DataType* gpu_data_chip4  = static_cast<DataType*>(sycl_device.allocate(chip4TensorBuffSize));
273   TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip4(gpu_data_chip4, chip4TensorRange);
274 
275   gpu_chip4.device(sycl_device)=gpu_tensor.chip(5l,3l);
276   sycl_device.memcpyDeviceToHost(chip4.data(), gpu_data_chip4, chip4TensorBuffSize);
277 
278   VERIFY_IS_EQUAL(chip4.dimension(0), sizeDim1);
279   VERIFY_IS_EQUAL(chip4.dimension(1), sizeDim2);
280   VERIFY_IS_EQUAL(chip4.dimension(2), sizeDim3);
281   VERIFY_IS_EQUAL(chip4.dimension(3), sizeDim5);
282 
283   for (IndexType i = 0; i < sizeDim1; ++i) {
284     for (IndexType j = 0; j < sizeDim2; ++j) {
285       for (IndexType k = 0; k < sizeDim3; ++k) {
286         for (IndexType l = 0; l < sizeDim5; ++l) {
287           VERIFY_IS_EQUAL(chip4(i,j,k,l), tensor(i,j,k,5l,l));
288         }
289       }
290     }
291   }
292 
293 
294   array<IndexType, 4> chip5TensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4}};
295   Tensor<DataType, 4, DataLayout,IndexType> chip5(chip5TensorRange);
296   const size_t chip5TensorBuffSize =chip5.size()*sizeof(DataType);
297   DataType* gpu_data_chip5  = static_cast<DataType*>(sycl_device.allocate(chip5TensorBuffSize));
298   TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip5(gpu_data_chip5, chip5TensorRange);
299 
300   gpu_chip5.device(sycl_device)=gpu_tensor.chip(7l,4l);
301   sycl_device.memcpyDeviceToHost(chip5.data(), gpu_data_chip5, chip5TensorBuffSize);
302 
303   VERIFY_IS_EQUAL(chip5.dimension(0), sizeDim1);
304   VERIFY_IS_EQUAL(chip5.dimension(1), sizeDim2);
305   VERIFY_IS_EQUAL(chip5.dimension(2), sizeDim3);
306   VERIFY_IS_EQUAL(chip5.dimension(3), sizeDim4);
307 
308   for (IndexType i = 0; i < sizeDim1; ++i) {
309     for (IndexType j = 0; j < sizeDim2; ++j) {
310       for (IndexType k = 0; k < sizeDim3; ++k) {
311         for (IndexType l = 0; l < sizeDim4; ++l) {
312           VERIFY_IS_EQUAL(chip5(i,j,k,l), tensor(i,j,k,l,7l));
313         }
314       }
315     }
316   }
317   sycl_device.deallocate(gpu_data_tensor);
318   sycl_device.deallocate(gpu_data_chip1);
319   sycl_device.deallocate(gpu_data_chip2);
320   sycl_device.deallocate(gpu_data_chip3);
321   sycl_device.deallocate(gpu_data_chip4);
322   sycl_device.deallocate(gpu_data_chip5);
323 }
324 
325 template <typename DataType, int DataLayout, typename IndexType>
test_chip_in_expr(const Eigen::SyclDevice & sycl_device)326 static void test_chip_in_expr(const Eigen::SyclDevice& sycl_device) {
327 
328   IndexType sizeDim1 = 2;
329   IndexType sizeDim2 = 3;
330   IndexType sizeDim3 = 5;
331   IndexType sizeDim4 = 7;
332   IndexType sizeDim5 = 11;
333 
334   array<IndexType, 5> tensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4, sizeDim5}};
335   array<IndexType, 4> chip1TensorRange = {{sizeDim2, sizeDim3, sizeDim4, sizeDim5}};
336 
337   Tensor<DataType, 5, DataLayout,IndexType> tensor(tensorRange);
338 
339   Tensor<DataType, 4, DataLayout,IndexType> chip1(chip1TensorRange);
340   Tensor<DataType, 4, DataLayout,IndexType> tensor1(chip1TensorRange);
341   tensor.setRandom();
342   tensor1.setRandom();
343 
344   const size_t tensorBuffSize =tensor.size()*sizeof(DataType);
345   const size_t chip1TensorBuffSize =chip1.size()*sizeof(DataType);
346   DataType* gpu_data_tensor  = static_cast<DataType*>(sycl_device.allocate(tensorBuffSize));
347   DataType* gpu_data_chip1  = static_cast<DataType*>(sycl_device.allocate(chip1TensorBuffSize));
348   DataType* gpu_data_tensor1  = static_cast<DataType*>(sycl_device.allocate(chip1TensorBuffSize));
349 
350   TensorMap<Tensor<DataType, 5, DataLayout,IndexType>> gpu_tensor(gpu_data_tensor, tensorRange);
351   TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip1(gpu_data_chip1, chip1TensorRange);
352   TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_tensor1(gpu_data_tensor1, chip1TensorRange);
353 
354 
355   sycl_device.memcpyHostToDevice(gpu_data_tensor, tensor.data(), tensorBuffSize);
356   sycl_device.memcpyHostToDevice(gpu_data_tensor1, tensor1.data(), chip1TensorBuffSize);
357   gpu_chip1.device(sycl_device)=gpu_tensor.template chip<0l>(0l) + gpu_tensor1;
358   sycl_device.memcpyDeviceToHost(chip1.data(), gpu_data_chip1, chip1TensorBuffSize);
359 
360   for (int i = 0; i < sizeDim2; ++i) {
361     for (int j = 0; j < sizeDim3; ++j) {
362       for (int k = 0; k < sizeDim4; ++k) {
363         for (int l = 0; l < sizeDim5; ++l) {
364           float expected = tensor(0l,i,j,k,l) + tensor1(i,j,k,l);
365           VERIFY_IS_EQUAL(chip1(i,j,k,l), expected);
366         }
367       }
368     }
369   }
370 
371   array<IndexType, 3> chip2TensorRange = {{sizeDim2, sizeDim4, sizeDim5}};
372   Tensor<DataType, 3, DataLayout,IndexType> tensor2(chip2TensorRange);
373   Tensor<DataType, 3, DataLayout,IndexType> chip2(chip2TensorRange);
374   tensor2.setRandom();
375   const size_t chip2TensorBuffSize =tensor2.size()*sizeof(DataType);
376   DataType* gpu_data_tensor2  = static_cast<DataType*>(sycl_device.allocate(chip2TensorBuffSize));
377   DataType* gpu_data_chip2  = static_cast<DataType*>(sycl_device.allocate(chip2TensorBuffSize));
378   TensorMap<Tensor<DataType, 3, DataLayout,IndexType>> gpu_tensor2(gpu_data_tensor2, chip2TensorRange);
379   TensorMap<Tensor<DataType, 3, DataLayout,IndexType>> gpu_chip2(gpu_data_chip2, chip2TensorRange);
380 
381   sycl_device.memcpyHostToDevice(gpu_data_tensor2, tensor2.data(), chip2TensorBuffSize);
382   gpu_chip2.device(sycl_device)=gpu_tensor.template chip<0l>(0l).template chip<1l>(2l) + gpu_tensor2;
383   sycl_device.memcpyDeviceToHost(chip2.data(), gpu_data_chip2, chip2TensorBuffSize);
384 
385   for (int i = 0; i < sizeDim2; ++i) {
386     for (int j = 0; j < sizeDim4; ++j) {
387       for (int k = 0; k < sizeDim5; ++k) {
388         float expected = tensor(0l,i,2l,j,k) + tensor2(i,j,k);
389         VERIFY_IS_EQUAL(chip2(i,j,k), expected);
390       }
391     }
392   }
393   sycl_device.deallocate(gpu_data_tensor);
394   sycl_device.deallocate(gpu_data_tensor1);
395   sycl_device.deallocate(gpu_data_chip1);
396   sycl_device.deallocate(gpu_data_tensor2);
397   sycl_device.deallocate(gpu_data_chip2);
398 }
399 
400 template <typename DataType, int DataLayout, typename IndexType>
test_chip_as_lvalue_sycl(const Eigen::SyclDevice & sycl_device)401 static void test_chip_as_lvalue_sycl(const Eigen::SyclDevice& sycl_device)
402 {
403 
404   IndexType sizeDim1 = 2;
405   IndexType sizeDim2 = 3;
406   IndexType sizeDim3 = 5;
407   IndexType sizeDim4 = 7;
408   IndexType sizeDim5 = 11;
409 
410   array<IndexType, 5> tensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4, sizeDim5}};
411   array<IndexType, 4> input2TensorRange = {{sizeDim2, sizeDim3, sizeDim4, sizeDim5}};
412 
413   Tensor<DataType, 5, DataLayout,IndexType> tensor(tensorRange);
414   Tensor<DataType, 5, DataLayout,IndexType> input1(tensorRange);
415   Tensor<DataType, 4, DataLayout,IndexType> input2(input2TensorRange);
416   input1.setRandom();
417   input2.setRandom();
418 
419 
420   const size_t tensorBuffSize =tensor.size()*sizeof(DataType);
421   const size_t input2TensorBuffSize =input2.size()*sizeof(DataType);
422   std::cout << tensorBuffSize << " , "<<  input2TensorBuffSize << std::endl;
423   DataType* gpu_data_tensor  = static_cast<DataType*>(sycl_device.allocate(tensorBuffSize));
424   DataType* gpu_data_input1  = static_cast<DataType*>(sycl_device.allocate(tensorBuffSize));
425   DataType* gpu_data_input2  = static_cast<DataType*>(sycl_device.allocate(input2TensorBuffSize));
426 
427   TensorMap<Tensor<DataType, 5, DataLayout,IndexType>> gpu_tensor(gpu_data_tensor, tensorRange);
428   TensorMap<Tensor<DataType, 5, DataLayout,IndexType>> gpu_input1(gpu_data_input1, tensorRange);
429   TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_input2(gpu_data_input2, input2TensorRange);
430 
431   sycl_device.memcpyHostToDevice(gpu_data_input1, input1.data(), tensorBuffSize);
432   gpu_tensor.device(sycl_device)=gpu_input1;
433   sycl_device.memcpyHostToDevice(gpu_data_input2, input2.data(), input2TensorBuffSize);
434   gpu_tensor.template chip<0l>(1l).device(sycl_device)=gpu_input2;
435   sycl_device.memcpyDeviceToHost(tensor.data(), gpu_data_tensor, tensorBuffSize);
436 
437   for (int i = 0; i < sizeDim1; ++i) {
438     for (int j = 0; j < sizeDim2; ++j) {
439       for (int k = 0; k < sizeDim3; ++k) {
440         for (int l = 0; l < sizeDim4; ++l) {
441           for (int m = 0; m < sizeDim5; ++m) {
442             if (i != 1) {
443               VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input1(i,j,k,l,m));
444             } else {
445               VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input2(j,k,l,m));
446             }
447           }
448         }
449       }
450     }
451   }
452 
453   gpu_tensor.device(sycl_device)=gpu_input1;
454   array<IndexType, 4> input3TensorRange = {{sizeDim1, sizeDim3, sizeDim4, sizeDim5}};
455   Tensor<DataType, 4, DataLayout,IndexType> input3(input3TensorRange);
456   input3.setRandom();
457 
458   const size_t input3TensorBuffSize =input3.size()*sizeof(DataType);
459   DataType* gpu_data_input3  = static_cast<DataType*>(sycl_device.allocate(input3TensorBuffSize));
460   TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_input3(gpu_data_input3, input3TensorRange);
461 
462   sycl_device.memcpyHostToDevice(gpu_data_input3, input3.data(), input3TensorBuffSize);
463   gpu_tensor.template chip<1l>(1l).device(sycl_device)=gpu_input3;
464   sycl_device.memcpyDeviceToHost(tensor.data(), gpu_data_tensor, tensorBuffSize);
465 
466   for (int i = 0; i < sizeDim1; ++i) {
467     for (int j = 0; j < sizeDim2; ++j) {
468       for (int k = 0; k <sizeDim3; ++k) {
469         for (int l = 0; l < sizeDim4; ++l) {
470           for (int m = 0; m < sizeDim5; ++m) {
471             if (j != 1) {
472               VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input1(i,j,k,l,m));
473             } else {
474               VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input3(i,k,l,m));
475             }
476           }
477         }
478       }
479     }
480   }
481 
482   gpu_tensor.device(sycl_device)=gpu_input1;
483   array<IndexType, 4> input4TensorRange = {{sizeDim1, sizeDim2, sizeDim4, sizeDim5}};
484   Tensor<DataType, 4, DataLayout,IndexType> input4(input4TensorRange);
485   input4.setRandom();
486 
487   const size_t input4TensorBuffSize =input4.size()*sizeof(DataType);
488   DataType* gpu_data_input4  = static_cast<DataType*>(sycl_device.allocate(input4TensorBuffSize));
489   TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_input4(gpu_data_input4, input4TensorRange);
490 
491   sycl_device.memcpyHostToDevice(gpu_data_input4, input4.data(), input4TensorBuffSize);
492   gpu_tensor.template chip<2l>(3l).device(sycl_device)=gpu_input4;
493   sycl_device.memcpyDeviceToHost(tensor.data(), gpu_data_tensor, tensorBuffSize);
494 
495   for (int i = 0; i < sizeDim1; ++i) {
496     for (int j = 0; j < sizeDim2; ++j) {
497       for (int k = 0; k <sizeDim3; ++k) {
498         for (int l = 0; l < sizeDim4; ++l) {
499           for (int m = 0; m < sizeDim5; ++m) {
500             if (k != 3) {
501               VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input1(i,j,k,l,m));
502             } else {
503               VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input4(i,j,l,m));
504             }
505           }
506         }
507       }
508     }
509   }
510 
511   gpu_tensor.device(sycl_device)=gpu_input1;
512   array<IndexType, 4> input5TensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim5}};
513   Tensor<DataType, 4, DataLayout,IndexType> input5(input5TensorRange);
514   input5.setRandom();
515 
516   const size_t input5TensorBuffSize =input5.size()*sizeof(DataType);
517   DataType* gpu_data_input5  = static_cast<DataType*>(sycl_device.allocate(input5TensorBuffSize));
518   TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_input5(gpu_data_input5, input5TensorRange);
519 
520   sycl_device.memcpyHostToDevice(gpu_data_input5, input5.data(), input5TensorBuffSize);
521   gpu_tensor.template chip<3l>(4l).device(sycl_device)=gpu_input5;
522   sycl_device.memcpyDeviceToHost(tensor.data(), gpu_data_tensor, tensorBuffSize);
523 
524   for (int i = 0; i < sizeDim1; ++i) {
525     for (int j = 0; j < sizeDim2; ++j) {
526       for (int k = 0; k <sizeDim3; ++k) {
527         for (int l = 0; l < sizeDim4; ++l) {
528           for (int m = 0; m < sizeDim5; ++m) {
529             if (l != 4) {
530               VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input1(i,j,k,l,m));
531             } else {
532               VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input5(i,j,k,m));
533             }
534           }
535         }
536       }
537     }
538   }
539   gpu_tensor.device(sycl_device)=gpu_input1;
540   array<IndexType, 4> input6TensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4}};
541   Tensor<DataType, 4, DataLayout,IndexType> input6(input6TensorRange);
542   input6.setRandom();
543 
544   const size_t input6TensorBuffSize =input6.size()*sizeof(DataType);
545   DataType* gpu_data_input6  = static_cast<DataType*>(sycl_device.allocate(input6TensorBuffSize));
546   TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_input6(gpu_data_input6, input6TensorRange);
547 
548   sycl_device.memcpyHostToDevice(gpu_data_input6, input6.data(), input6TensorBuffSize);
549   gpu_tensor.template chip<4l>(5l).device(sycl_device)=gpu_input6;
550   sycl_device.memcpyDeviceToHost(tensor.data(), gpu_data_tensor, tensorBuffSize);
551 
552   for (int i = 0; i < sizeDim1; ++i) {
553     for (int j = 0; j < sizeDim2; ++j) {
554       for (int k = 0; k <sizeDim3; ++k) {
555         for (int l = 0; l < sizeDim4; ++l) {
556           for (int m = 0; m < sizeDim5; ++m) {
557             if (m != 5) {
558               VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input1(i,j,k,l,m));
559             } else {
560               VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input6(i,j,k,l));
561             }
562           }
563         }
564       }
565     }
566   }
567 
568 
569   gpu_tensor.device(sycl_device)=gpu_input1;
570   Tensor<DataType, 5, DataLayout,IndexType> input7(tensorRange);
571   input7.setRandom();
572 
573   DataType* gpu_data_input7  = static_cast<DataType*>(sycl_device.allocate(tensorBuffSize));
574   TensorMap<Tensor<DataType, 5, DataLayout,IndexType>> gpu_input7(gpu_data_input7, tensorRange);
575 
576   sycl_device.memcpyHostToDevice(gpu_data_input7, input7.data(), tensorBuffSize);
577   gpu_tensor.chip(0l,0l).device(sycl_device)=gpu_input7.chip(0l,0l);
578   sycl_device.memcpyDeviceToHost(tensor.data(), gpu_data_tensor, tensorBuffSize);
579 
580   for (int i = 0; i < sizeDim1; ++i) {
581     for (int j = 0; j < sizeDim2; ++j) {
582       for (int k = 0; k <sizeDim3; ++k) {
583         for (int l = 0; l < sizeDim4; ++l) {
584           for (int m = 0; m < sizeDim5; ++m) {
585             if (i != 0) {
586               VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input1(i,j,k,l,m));
587             } else {
588               VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input7(i,j,k,l,m));
589             }
590           }
591         }
592       }
593     }
594   }
595   sycl_device.deallocate(gpu_data_tensor);
596   sycl_device.deallocate(gpu_data_input1);
597   sycl_device.deallocate(gpu_data_input2);
598   sycl_device.deallocate(gpu_data_input3);
599   sycl_device.deallocate(gpu_data_input4);
600   sycl_device.deallocate(gpu_data_input5);
601   sycl_device.deallocate(gpu_data_input6);
602   sycl_device.deallocate(gpu_data_input7);
603 
604 }
605 
sycl_chipping_test_per_device(dev_Selector s)606 template<typename DataType, typename dev_Selector> void sycl_chipping_test_per_device(dev_Selector s){
607   QueueInterface queueInterface(s);
608   auto sycl_device = Eigen::SyclDevice(&queueInterface);
609  /* test_static_chip_sycl<DataType, RowMajor, int64_t>(sycl_device);
610   test_static_chip_sycl<DataType, ColMajor, int64_t>(sycl_device);
611   test_dynamic_chip_sycl<DataType, RowMajor, int64_t>(sycl_device);
612   test_dynamic_chip_sycl<DataType, ColMajor, int64_t>(sycl_device);
613   test_chip_in_expr<DataType, RowMajor, int64_t>(sycl_device);
614   test_chip_in_expr<DataType, ColMajor, int64_t>(sycl_device);*/
615   test_chip_as_lvalue_sycl<DataType, RowMajor, int64_t>(sycl_device);
616  // test_chip_as_lvalue_sycl<DataType, ColMajor, int64_t>(sycl_device);
617 }
EIGEN_DECLARE_TEST(cxx11_tensor_chipping_sycl)618 EIGEN_DECLARE_TEST(cxx11_tensor_chipping_sycl)
619 {
620   for (const auto& device :Eigen::get_sycl_supported_devices()) {
621     CALL_SUBTEST(sycl_chipping_test_per_device<float>(device));
622   }
623 }
624