/aosp_15_r20/external/pytorch/aten/src/ATen/native/sparse/cuda/ |
H A D | SparseCUDABlas.cpp | 103 T *b, int64_t ldb, T *beta, T *c, int64_t ldc, in _csrmm2() 205 T *b, int64_t ldb, T beta, T *c, int64_t ldc) in csrmm2() 214 float *b, int64_t ldb, float beta, float *c, int64_t ldc) in csrmm2() 223 double *b, int64_t ldb, double beta, double *c, int64_t ldc) in csrmm2() 232 …c10::complex<float> *b, int64_t ldb, c10::complex<float> beta, c10::complex<float> *c, int64_t ldc) in csrmm2() 249 …c10::complex<double> *b, int64_t ldb, c10::complex<double> beta, c10::complex<double> *c, int64_t … in csrmm2() 264 void adjustLd(char transb, int64_t m, int64_t n, int64_t k, int64_t *ldb, int64_t *ldc) in adjustLd() 283 …onst float *csrvala, int *csrrowptra, int *csrcolinda, const float *b, int64_t ldb, const float *b… in Scsrmm2() 305 …st double *csrvala, int *csrrowptra, int *csrcolinda, const double *b, int64_t ldb, const double *… in Dcsrmm2() 330 …*csrvala, int *csrrowptra, int *csrcolinda, const complex_target_t *b, int64_t ldb, const complex_… in Ccsrmm2() [all …]
|
/aosp_15_r20/external/tensorflow/tensorflow/stream_executor/rocm/ |
H A D | rocm_blas.cc | 393 int lda, const DeviceMemoryBase &b, int ldb, in DoBlasGemm() 557 int lda, const DeviceMemory<Eigen::half> &b, int ldb, float beta, in DoBlasGemmWithProfiling() 568 const DeviceMemory<float> &b, int ldb, float beta, DeviceMemory<float> *c, in DoBlasGemmWithProfiling() 578 const DeviceMemory<double> &b, int ldb, double beta, in DoBlasGemmWithProfiling() 590 const DeviceMemory<std::complex<float>> &b, int ldb, in DoBlasGemmWithProfiling() 602 const DeviceMemory<std::complex<double>> &b, int ldb, in DoBlasGemmWithProfiling() 624 int lda, const DeviceMemory<T> &b, int ldb, const ParamType &beta, in DoBlasGemmWithProfilingImpl() 633 blas::DataType type_b, int ldb, const void *beta, DeviceMemoryBase *c, in DoBlasGemmWithAlgorithm() 644 blas::DataType type_b, int ldb, int64_t stride_b, const void *beta, in DoBlasGemmStridedBatchedWithAlgorithm() 769 const absl::Span<DeviceMemory<T> *const> &b_ptrs_to_wrappers, int ldb, in DoBlasGemmBatchedInternal() [all …]
|
/aosp_15_r20/external/pytorch/aten/src/ATen/native/cpu/ |
H A D | BlasKernel.cpp | 108 int64_t ldb, in gemm_notrans_() 144 int64_t ldb, in gemm_notrans_() 170 const scalar_t *b, int64_t ldb, in gemm_transa_() 202 int64_t ldb, in gemm_transb_impl() 235 int64_t ldb, in gemm_transb_() 257 int64_t ldb, in gemm_transb_() 309 const scalar_t *b, int64_t ldb, in gemm_transab_() 339 int64_t ldb, in gemm_notrans_() 387 const at::Half *b, int64_t ldb, in gemm_transa_() 419 const at::BFloat16 *b, int64_t ldb, in gemm_transa_() [all …]
|
H A D | int8mm_kernel.cpp | 38 int ldb, in tinygemm_kernel() 233 int ldb, in tinygemm_kernel_() 275 int ldb, in tinygemm_kernel() 288 int ldb, in tinygemm_kernel() 301 int ldb, in tinygemm_kernel() 316 int ldb, in tinygemm_kernel()
|
H A D | int4mm_kernel.cpp | 59 int ldb, in tinygemm_kernel() 209 int ldb, in tinygemm_kernel() 386 int ldb, in tinygemm_kernel_() 434 int ldb, in tinygemm_kernel() 448 int ldb, in tinygemm_kernel() 462 int ldb, in tinygemm_kernel() 520 int ldb, in tinygemm_kernel()
|
/aosp_15_r20/external/pytorch/aten/src/ATen/native/ |
H A D | CPUBlas.cpp | 61 int64_t *lda, int64_t *ldb, int64_t *ldc) { in normalize_last_dims() 89 int64_t lda, int64_t ldb, int64_t ldc) { in use_blas_gemm() 132 const double *b, int64_t ldb, in gemm() 175 const float *b, int64_t ldb, in gemm() 223 const c10::complex<double> *b, int64_t ldb, in gemm() 266 const c10::complex<float> *b, int64_t ldb, in gemm() 309 const at::BFloat16 *b, int64_t ldb, in gemm() 349 const at::Half *b, int64_t ldb, in gemm() 368 const at::BFloat16 *b, int64_t ldb, in gemm() 419 const at::Half *b, int64_t ldb, in gemm() [all …]
|
H A D | BatchLinearAlgebra.cpp | 807 …, int n, int nrhs, c10::complex<double> *a, int lda, c10::complex<double> *b, int ldb, int *info) { in lapackCholeskySolve() 811 …lo, int n, int nrhs, c10::complex<float> *a, int lda, c10::complex<float> *b, int ldb, int *info) { in lapackCholeskySolve() 815 …leskySolve<double>(char uplo, int n, int nrhs, double *a, int lda, double *b, int ldb, int *info) { in lapackCholeskySolve() 819 …CholeskySolve<float>(char uplo, int n, int nrhs, float *a, int lda, float *b, int ldb, int *info) { in lapackCholeskySolve() 1128 int ldb, in lapackLdlSolveSymmetric() 1142 int ldb, in lapackLdlSolveSymmetric() 1156 int ldb, in lapackLdlSolveSymmetric() 1179 int ldb, in lapackLdlSolveSymmetric() 1202 int ldb, in lapackLdlSolveHermitian() 1216 int ldb, in lapackLdlSolveHermitian() [all …]
|
H A D | Unfold3d.cpp | 22 void MatCopy(int64_t M, int64_t N, int64_t lda, int64_t ldb, const T* A, T* B) { in MatCopy() 34 int64_t ldb, in MatCopy() 82 int64_t ldb, in MatCopy() 93 int64_t ldb, in MatCopy() 105 int64_t ldb, in MatCopy() 118 int64_t ldb, in MatCopy()
|
H A D | ConvolutionMM2d.cpp | 252 const int64_t ldb = k; in slow_conv2d_update_output_frame() local 270 const int64_t ldb = k; in slow_conv2d_update_output_frame() local 307 const int64_t ldb = k; in slow_conv2d_backward_update_grad_input_frame() local 325 const int64_t ldb = n; in slow_conv2d_backward_update_grad_input_frame() local 449 const int64_t ldb = n; in slow_conv2d_backward_weight_frame() local 467 const int64_t ldb = k; in slow_conv2d_backward_weight_frame() local
|
/aosp_15_r20/external/tensorflow/tensorflow/compiler/xla/stream_executor/cuda/ |
H A D | cuda_blas.cc | 612 int lda, const DeviceMemoryBase &b, int ldb, in DoBlasInternalImpl() 792 int lda, const DeviceMemory<Eigen::half> &b, int ldb, float beta, in DoBlasInternalImpl() 803 const DeviceMemory<float> &b, int ldb, float beta, DeviceMemory<float> *c, in DoBlasInternalImpl() 813 const DeviceMemory<double> &b, int ldb, double beta, in DoBlasInternalImpl() 825 const DeviceMemory<std::complex<float>> &b, int ldb, in DoBlasInternalImpl() 837 const DeviceMemory<std::complex<double>> &b, int ldb, in DoBlasInternalImpl() 881 int lda, const DeviceMemory<T> &b, int ldb, const ParamType &beta, in DoBlasInternalImpl() 1019 blas::DataType type_b, int ldb, const void *beta, DeviceMemoryBase *c, in DoBlasInternalImpl() 1048 blas::DataType type_b, int ldb, int64_t stride_b, const void *beta, in DoBlasInternalImpl() 1191 const DeviceMemorySlice<T> &b_ptrs_to_wrappers, int ldb, Scalar beta, in DoBlasInternalImpl() [all …]
|
/aosp_15_r20/external/pytorch/aten/src/ATen/native/mkl/ |
H A D | LinearAlgebra.cpp | 13 const float** A, const MKL_INT lda, const float** B, const MKL_INT ldb, const float beta, in mkl_gemm_batched() 21 const double** A, const MKL_INT lda, const double** B, const MKL_INT ldb, const double beta, in mkl_gemm_batched() 29 …const c10::complex<float>** A, const MKL_INT lda, const c10::complex<float>** B, const MKL_INT ldb, in mkl_gemm_batched() 37 …nst c10::complex<double>** A, const MKL_INT lda, const c10::complex<double>** B, const MKL_INT ldb, in mkl_gemm_batched() 45 const c10::BFloat16* A, MKL_INT lda, const c10::BFloat16* B, MKL_INT ldb, in mkl_gemm_bf16bf16f32() 53 const c10::Half* A, int lda, const c10::Half* B, int ldb, in mkl_gemm_f16f16f32()
|
/aosp_15_r20/external/executorch/kernels/optimized/blas/ |
H A D | BlasKernel.h | 76 int64_t ldb, in gemm_notrans_() 113 int64_t ldb, in gemm_notrans_() 139 const scalar_t *b, int64_t ldb, in gemm_transa_() 221 int64_t ldb, in gemm_transb_() 258 int64_t ldb, in gemm_transb_() 284 const scalar_t *b, int64_t ldb, in gemm_transab_()
|
H A D | CPUBlas.cpp | 51 int64_t *lda, int64_t *ldb, int64_t *ldc) { in normalize_last_dims() 80 const double *b, int64_t ldb, in gemm() 120 const float *b, int64_t ldb, in gemm() 160 const Half *b, int64_t ldb, in gemm() 183 const BFloat16 *b, int64_t ldb, in gemm()
|
/aosp_15_r20/external/pytorch/aten/src/ATen/native/mkldnn/ |
H A D | Matmul.cpp | 40 const c10::BFloat16 *b, int64_t ldb, in mkldnn_bf16_gemm() 51 const c10::Half *b, int64_t ldb, in mkldnn_fp16_gemm() 61 const float *b, int64_t ldb, in mkldnn_bf32_gemm() 123 const scalar_t *b_data, int64_t ldb, in mkldnn_gemm() 199 const c10::BFloat16 *b, int64_t ldb, in mkldnn_bf16_gemm() 210 const c10::Half *b, int64_t ldb, in mkldnn_fp16_gemm() 221 const float *b, int64_t ldb, in mkldnn_bf32_gemm() 469 const int ldb = transb == 'T' ? mat2.stride(1) : mat2.stride(0); in _mkldnn_gemm_i8i8i32_with_blas() local
|
/aosp_15_r20/external/cblas/testing/ |
H A D | c_zblas3.c | 16 CBLAS_TEST_ZOMPLEX *b, int *ldb, CBLAS_TEST_ZOMPLEX *beta, in F77_zgemm() 92 CBLAS_TEST_ZOMPLEX *b, int *ldb, CBLAS_TEST_ZOMPLEX *beta, in F77_zhemm() 156 CBLAS_TEST_ZOMPLEX *b, int *ldb, CBLAS_TEST_ZOMPLEX *beta, in F77_zsymm() 322 CBLAS_TEST_ZOMPLEX *b, int *ldb, double *beta, in F77_zher2k() 386 CBLAS_TEST_ZOMPLEX *b, int *ldb, CBLAS_TEST_ZOMPLEX *beta, in F77_zsyr2k() 450 int *lda, CBLAS_TEST_ZOMPLEX *b, int *ldb) { in F77_ztrmm() 509 int *lda, CBLAS_TEST_ZOMPLEX *b, int *ldb) { in F77_ztrsm()
|
H A D | c_cblas3.c | 16 CBLAS_TEST_COMPLEX *b, int *ldb, CBLAS_TEST_COMPLEX *beta, in F77_cgemm() 92 CBLAS_TEST_COMPLEX *b, int *ldb, CBLAS_TEST_COMPLEX *beta, in F77_chemm() 156 CBLAS_TEST_COMPLEX *b, int *ldb, CBLAS_TEST_COMPLEX *beta, in F77_csymm() 322 CBLAS_TEST_COMPLEX *b, int *ldb, float *beta, in F77_cher2k() 386 CBLAS_TEST_COMPLEX *b, int *ldb, CBLAS_TEST_COMPLEX *beta, in F77_csyr2k() 450 int *lda, CBLAS_TEST_COMPLEX *b, int *ldb) { in F77_ctrmm() 509 int *lda, CBLAS_TEST_COMPLEX *b, int *ldb) { in F77_ctrsm()
|
H A D | c_sblas3.c | 13 int *k, float *alpha, float *a, int *lda, float *b, int *ldb, in F77_sgemm() 74 float *alpha, float *a, int *lda, float *b, int *ldb, in F77_ssymm() 176 float *alpha, float *a, int *lda, float *b, int *ldb, in F77_ssyr2k() 232 int *ldb) { in F77_strmm() 283 int *ldb) { in F77_strsm()
|
H A D | c_dblas3.c | 15 int *k, double *alpha, double *a, int *lda, double *b, int *ldb, in F77_dgemm() 77 double *alpha, double *a, int *lda, double *b, int *ldb, in F77_dsymm() 179 double *alpha, double *a, int *lda, double *b, int *ldb, in F77_dsyr2k() 235 int *ldb) { in F77_dtrmm() 286 int *ldb) { in F77_dtrsm()
|
/aosp_15_r20/external/tensorflow/tensorflow/compiler/xla/stream_executor/ |
H A D | stream.h | 757 const DeviceMemory<InputType> &b, int ldb, in ThenBlasGemm() 771 const DeviceMemory<InputType> &b, int ldb, in ThenBlasGemm() 781 const DeviceMemory<InputType> &b, int ldb, in ThenBlasGemm() 819 const DeviceMemory<InputType> &b, int ldb, in ThenBlasGemm() 868 const DeviceMemory<InputType> &b, int ldb, DeviceMemory<OutputType> *c, in ThenBlasGemmWithAlgorithm() 883 const DeviceMemory<InputType> &b, int ldb, ConstantType beta, in ThenBlasGemmWithAlgorithm() 921 int64_t stride_a, const DeviceMemory<InputType> &b, int ldb, in ThenBlasGemmStridedBatchedWithAlgorithm() 1036 int64_t stride_a, const DeviceMemory<InputType> &b, int ldb, in ThenBlasGemmStridedBatched()
|
H A D | stream.cc | 1454 const DeviceMemory<Eigen::half> &b, int ldb, float beta, in ThenBlasGemmWithProfiling() 1474 const DeviceMemory<float> &b, int ldb, float beta, DeviceMemory<float> *c, in ThenBlasGemmWithProfiling() 1493 const DeviceMemory<double> &b, int ldb, double beta, in ThenBlasGemmWithProfiling() 1514 const DeviceMemory<std::complex<float>> &b, int ldb, in ThenBlasGemmWithProfiling() 1536 const DeviceMemory<std::complex<double>> &b, int ldb, in ThenBlasGemmWithProfiling() 1558 DeviceMemory<float> *b, int ldb) { in ThenBlasTrsm() 1574 DeviceMemory<double> *b, int ldb) { in ThenBlasTrsm() 1591 int ldb) { in ThenBlasTrsm() 1609 int ldb) { in ThenBlasTrsm() 1626 DeviceMemory<float *> *bs, int ldb, in ThenBlasTrsmBatched() [all …]
|
/aosp_15_r20/external/pytorch/aten/src/ATen/native/nested/cuda/ |
H A D | NestedTensorMatmul.cu | 52 const std::vector<int64_t>& ldb, in gemm_grouped_cuda_internal() 184 const std::vector<int64_t>& ldb, in group_gemm_dispatch() 198 const std::vector<int64_t>& ldb, in group_gemm_dispatch() 224 const std::vector<int64_t>& ldb, in group_gemm_dispatch() 349 std::vector<int64_t> ldb(ntensors); in bmm_nested_cuda() local
|
/aosp_15_r20/external/ComputeLibrary/src/core/NEON/kernels/arm_gemm/ |
H A D | quantize_wrapper.hpp | 80 void col_sums_pretransposed(const To *B, const int ldb, const int B_multi_stride) { in col_sums_pretransposed() 122 const To *B, const int ldb, const int B_multi_stride, in set_arrays() 182 …void requantize_bias(void *in_buffer, const To *B, const int ldb, const int B_multi_stride) overri… in requantize_bias() 187 …void pretranspose_B_array(void *buffer, const To *B, const int ldb, const int B_multi_stride) over… in pretranspose_B_array()
|
/aosp_15_r20/external/gemmlowp/eight_bit_int_gemm/ |
H A D | eight_bit_int_gemm.cc | 71 const std::uint8_t* b, std::int32_t b_offset, int ldb, in EightBitIntGemmImpl() 111 std::int32_t b_offset, int ldb, std::int32_t* c, in EightBitIntGemmInt32Impl() 218 int m, int n, int k, int lda, int ldb, int ldc, in CanHandleMetaFastpath() 304 std::int32_t b_offset, int ldb, std::uint8_t* c, in EightBitIntGemm() 342 std::int32_t ldb, float* c, float c_offset, in EightBitIntGemm()
|
/aosp_15_r20/external/eigen/bench/ |
H A D | bench_gemm.cpp | 90 int lda = a.outerStride(); int ldb = b.outerStride(); int ldc = c.rows(); in blas_gemm() local 102 int lda = a.outerStride(); int ldb = b.outerStride(); int ldc = c.rows(); in blas_gemm() local 114 int lda = a.outerStride(); int ldb = b.outerStride(); int ldc = c.rows(); in blas_gemm() local 126 int lda = a.outerStride(); int ldb = b.outerStride(); int ldc = c.rows(); in blas_gemm() local
|
/aosp_15_r20/frameworks/rs/cpu_ref/ |
H A D | rsCpuIntrinsicBLAS.cpp | 69 int* ldb, in initABC() 154 int lda = 0, ldb = 0, ldc = 0; in walk_tiled_gemm() local 257 int lda = 0, ldb = 0, ldc = 0; in invokeForEach() local 835 const uint8_t* b, uint8_t b_offset, size_t ldb, in kernelBNNM()
|