/aosp_15_r20/external/pytorch/aten/src/ATen/native/cuda/linalg/ |
H A D | CUDASolver.cpp | 87 …cusolverDnHandle_t handle, int n, int nrhs, double* dA, int lda, int* ipiv, double* ret, int ldb, … in getrs() 94 …cusolverDnHandle_t handle, int n, int nrhs, float* dA, int lda, int* ipiv, float* ret, int ldb, in… in getrs() 105 int lda, in getrs() 130 int lda, in getrs() 282 …cusolverDnHandle_t handle, cusolverEigMode_t jobz, int econ, int m, int n, float *A, int lda, floa… in gesvdj_buffersize() 290 …cusolverDnHandle_t handle, cusolverEigMode_t jobz, int econ, int m, int n, double *A, int lda, dou… in gesvdj_buffersize() 298 … handle, cusolverEigMode_t jobz, int econ, int m, int n, c10::complex<float> *A, int lda, float *S, in gesvdj_buffersize() 313 …andle, cusolverEigMode_t jobz, int econ, int m, int n, c10::complex<double> *A, int lda, double *S, in gesvdj_buffersize() 329 …cusolverDnHandle_t handle, cusolverEigMode_t jobz, int econ, int m, int n, float* A, int lda, floa… in gesvdj() 337 …cusolverDnHandle_t handle, cusolverEigMode_t jobz, int econ, int m, int n, double* A, int lda, dou… in gesvdj() [all …]
|
H A D | BatchLinearAlgebraLib.cpp | 264 int lda = std::max<int>(1, m); in apply_svd_cusolver_gesvd() local 381 int lda = A.stride(-1); in apply_svd_cusolver_gesvdj() local 448 int lda = A.stride(-1); in apply_svd_cusolver_gesvdjBatched() local 723 const int64_t lda = std::max<int64_t>(1, n); in apply_cholesky_cusolver_potrf_looped() local 792 const int lda = std::max<int>(1, n); in apply_cholesky_cusolver_potrfBatched() local 828 const int64_t lda = std::max<int64_t>(1, n); in apply_cholesky_cusolver_potrs() local 884 const int64_t lda = std::max<int64_t>(1, n); in apply_cholesky_cusolver_potrsBatched() local 971 int64_t lda = std::max<int64_t>(1, m); in apply_geqrf() local 1092 auto lda = std::max<int>(1, left ? m : n); in apply_ormqr() local 1159 auto lda = std::max<int>(1, m); in apply_orgqr() local [all …]
|
H A D | BatchLinearAlgebraLibBlas.cpp | 84 auto lda = std::max<int>(1, m); in apply_geqrf_batched() local 114 auto lda = cuda_int_cast(std::max<int>(1, n), "lda"); in apply_lu_factor_batched_cublas() local 140 auto lda = cuda_int_cast(std::max<int>(1, m), "lda"); in apply_lu_solve_batched_cublas() local 175 auto lda = std::max<int>(1, cuda_int_cast(A.size(-2), "lda")); in apply_triangular_solve() local 205 auto lda = std::max<int>(1, cuda_int_cast(A.size(-2), "lda")); in apply_triangular_solve_batched() local 254 auto lda = std::max<int>(1, m); in apply_gels_batched() local
|
/aosp_15_r20/external/pytorch/aten/src/ATen/native/ |
H A D | BatchLinearAlgebra.cpp | 791 template<> void lapackLu<c10::complex<double>>(int m, int n, c10::complex<double> *a, int lda, int … in lapackLu() 795 template<> void lapackLu<c10::complex<float>>(int m, int n, c10::complex<float> *a, int lda, int *i… in lapackLu() 799 template<> void lapackLu<double>(int m, int n, double *a, int lda, int *ipiv, int *info) { in lapackLu() 803 template<> void lapackLu<float>(int m, int n, float *a, int lda, int *ipiv, int *info) { in lapackLu() 807 …0::complex<double>>(char uplo, int n, int nrhs, c10::complex<double> *a, int lda, c10::complex<dou… in lapackCholeskySolve() 811 …c10::complex<float>>(char uplo, int n, int nrhs, c10::complex<float> *a, int lda, c10::complex<flo… in lapackCholeskySolve() 815 template<> void lapackCholeskySolve<double>(char uplo, int n, int nrhs, double *a, int lda, double … in lapackCholeskySolve() 819 template<> void lapackCholeskySolve<float>(char uplo, int n, int nrhs, float *a, int lda, float *b,… in lapackCholeskySolve() 823 …packCholesky<c10::complex<double>>(char uplo, int n, c10::complex<double> *a, int lda, int *info) { in lapackCholesky() 827 …lapackCholesky<c10::complex<float>>(char uplo, int n, c10::complex<float> *a, int lda, int *info) { in lapackCholesky() [all …]
|
H A D | BlasKernel.cpp | 142 C10_UNUSED int64_t lda, in gemv_use_fast_path() 155 … C10_UNUSED const scalar_t *alpha, C10_UNUSED const scalar_t *a, C10_UNUSED const int *lda, in gemv_fast_path() 190 …>(C10_UNUSED char trans, int64_t m, int64_t n, C10_UNUSED float alpha, int64_t lda, int64_t incx, … in gemv_use_fast_path() 197 …(C10_UNUSED char trans, int64_t m, int64_t n, C10_UNUSED double alpha, int64_t lda, int64_t incx, … in gemv_use_fast_path() 202 …s, const int *m, const int *n, const double *alpha, const double *a, const int *lda, const double … in gemv_fast_path() 207 …ans, const int *m, const int *n, const float *alpha, const float *a, const int *lda, const float *… in gemv_fast_path() 232 C10_UNUSED int64_t lda, in gemv_use_fast_path() 246 C10_UNUSED int64_t lda, in gemv_use_fast_path() 347 …6_arith_by_dot_products(const int m, const int n, const float16_t* a, const int lda, const float16… in fp16_gemv_trans_fp16_arith_by_dot_products() 524 …2_arith_by_dot_products(const int m, const int n, const float16_t* a, const int lda, const float16… in fp16_gemv_trans_fp32_arith_by_dot_products() [all …]
|
H A D | CPUBlas.cpp | 61 int64_t *lda, int64_t *ldb, int64_t *ldc) { in normalize_last_dims() 89 int64_t lda, int64_t ldb, int64_t ldc) { in use_blas_gemm() 131 const double *a, int64_t lda, in gemm() 174 const float *a, int64_t lda, in gemm() 222 const c10::complex<double> *a, int64_t lda, in gemm() 265 const c10::complex<float> *a, int64_t lda, in gemm() 308 const at::BFloat16 *a, int64_t lda, in gemm() 348 const at::Half *a, int64_t lda, in gemm() 367 const at::BFloat16 *a, int64_t lda, in gemm() 418 const at::Half *a, int64_t lda, in gemm() [all …]
|
/aosp_15_r20/external/tensorflow/tensorflow/stream_executor/rocm/ |
H A D | rocm_blas.cc | 321 int lda, const DeviceMemory<float> &x, int incx, in DoBlasGemv() 332 const DeviceMemory<double> &a, int lda, in DoBlasGemv() 344 const DeviceMemory<std::complex<float>> &a, int lda, in DoBlasGemv() 357 const DeviceMemory<std::complex<double>> &a, int lda, in DoBlasGemv() 370 int lda, const DeviceMemory<float> &x, int incx, in DoBlasSbmv() 380 const DeviceMemory<double> &a, int lda, in DoBlasSbmv() 393 int lda, const DeviceMemoryBase &b, int ldb, in DoBlasGemm() 514 const DeviceMemory<float> &a, int lda, const DeviceMemory<float> &x, in DoBlasGemvWithProfiling() 524 const DeviceMemory<double> &a, int lda, const DeviceMemory<double> &x, in DoBlasGemvWithProfiling() 535 int lda, const DeviceMemory<std::complex<float>> &x, int incx, in DoBlasGemvWithProfiling() [all …]
|
/aosp_15_r20/external/tensorflow/tensorflow/compiler/xla/stream_executor/cuda/ |
H A D | cuda_blas.cc | 538 int lda, const DeviceMemory<float> &x, int incx, in DoBlasInternalImpl() 548 const DeviceMemory<double> &a, int lda, in DoBlasInternalImpl() 559 const DeviceMemory<std::complex<float>> &a, int lda, in DoBlasInternalImpl() 574 const DeviceMemory<std::complex<double>> &a, int lda, in DoBlasInternalImpl() 589 int lda, const DeviceMemory<float> &x, int incx, in DoBlasInternalImpl() 599 const DeviceMemory<double> &a, int lda, in DoBlasInternalImpl() 612 int lda, const DeviceMemoryBase &b, int ldb, in DoBlasInternalImpl() 749 const DeviceMemory<float> &a, int lda, const DeviceMemory<float> &x, in DoBlasInternalImpl() 759 const DeviceMemory<double> &a, int lda, const DeviceMemory<double> &x, in DoBlasInternalImpl() 770 int lda, const DeviceMemory<std::complex<float>> &x, int incx, in DoBlasInternalImpl() [all …]
|
/aosp_15_r20/external/pytorch/aten/src/ATen/native/cpu/ |
H A D | BlasKernel.cpp | 54 void scale_(int64_t m, int64_t n, opmath_t alpha, scalar_t *a, int64_t lda) { in scale_() 106 int64_t lda, in gemm_notrans_() 142 int64_t lda, in gemm_notrans_() 169 const scalar_t *a, int64_t lda, in gemm_transa_() 200 int64_t lda, in gemm_transb_impl() 233 int64_t lda, in gemm_transb_() 255 int64_t lda, in gemm_transb_() 308 const scalar_t *a, int64_t lda, in gemm_transab_() 337 int64_t lda, in gemm_notrans_() 386 const at::Half *a, int64_t lda, in gemm_transa_() [all …]
|
/aosp_15_r20/external/tensorflow/tensorflow/core/util/ |
H A D | cuda_solvers.cc | 349 const Scalar* A, int lda, in GeamImpl() 381 cublasFillMode_t uplo, int n, Scalar* A, int lda, in PotrfImpl() 414 const Scalar* const host_a_dev_ptrs[], int lda, in PotrfBatchedImpl() 450 int n, Scalar* A, int lda, int* dev_pivots, in GetrfImpl() 483 const Scalar* A, int lda, const int* pivots, in GetrsImpl() 509 int n, Scalar* A, int lda, Scalar* tau, in GeqrfImpl() 543 int lda, const Scalar* dev_tau, Scalar* dev_c, in UnmqrImpl() 586 int n, int k, Scalar* dev_a, int lda, in UngqrImpl() 625 int n, Scalar* dev_A, int lda, in HeevdImpl() 679 int n, Scalar* A, int lda, Scalar* S, Scalar* U, in GesvdImpl() [all …]
|
/aosp_15_r20/external/cblas/testing/ |
H A D | c_cblas2.c | 13 CBLAS_TEST_COMPLEX *a, int *lda, const void *x, int *incx, in F77_cgemv() 42 CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, int *lda, in F77_cgbmv() 90 CBLAS_TEST_COMPLEX *a, int *lda){ in F77_cgeru() 119 CBLAS_TEST_COMPLEX *a, int *lda) { in F77_cgerc() 146 CBLAS_TEST_COMPLEX *a, int *lda, CBLAS_TEST_COMPLEX *x, in F77_chemv() 176 CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, int *lda, in F77_chbmv() 296 int *n, int *k, CBLAS_TEST_COMPLEX *a, int *lda, CBLAS_TEST_COMPLEX *x, in F77_ctbmv() 359 int *n, int *k, CBLAS_TEST_COMPLEX *a, int *lda, CBLAS_TEST_COMPLEX *x, in F77_ctbsv() 533 int *n, CBLAS_TEST_COMPLEX *a, int *lda, CBLAS_TEST_COMPLEX *x, in F77_ctrmv() 562 int *n, CBLAS_TEST_COMPLEX *a, int *lda, CBLAS_TEST_COMPLEX *x, in F77_ctrsv() [all …]
|
H A D | c_zblas2.c | 13 CBLAS_TEST_ZOMPLEX *a, int *lda, const void *x, int *incx, in F77_zgemv() 42 CBLAS_TEST_ZOMPLEX *alpha, CBLAS_TEST_ZOMPLEX *a, int *lda, in F77_zgbmv() 90 CBLAS_TEST_ZOMPLEX *a, int *lda){ in F77_zgeru() 119 CBLAS_TEST_ZOMPLEX *a, int *lda) { in F77_zgerc() 146 CBLAS_TEST_ZOMPLEX *a, int *lda, CBLAS_TEST_ZOMPLEX *x, in F77_zhemv() 176 CBLAS_TEST_ZOMPLEX *alpha, CBLAS_TEST_ZOMPLEX *a, int *lda, in F77_zhbmv() 296 int *n, int *k, CBLAS_TEST_ZOMPLEX *a, int *lda, CBLAS_TEST_ZOMPLEX *x, in F77_ztbmv() 359 int *n, int *k, CBLAS_TEST_ZOMPLEX *a, int *lda, CBLAS_TEST_ZOMPLEX *x, in F77_ztbsv() 533 int *n, CBLAS_TEST_ZOMPLEX *a, int *lda, CBLAS_TEST_ZOMPLEX *x, in F77_ztrmv() 562 int *n, CBLAS_TEST_ZOMPLEX *a, int *lda, CBLAS_TEST_ZOMPLEX *x, in F77_ztrsv() [all …]
|
H A D | c_dblas2.c | 12 double *a, int *lda, double *x, int *incx, double *beta, in F77_dgemv() 39 double *y, int *incy, double *a, int *lda ) { in F77_dger() 64 int *n, double *a, int *lda, double *x, int *incx) { in F77_dtrmv() 92 int *n, double *a, int *lda, double *x, int *incx ) { in F77_dtrsv() 116 int *lda, double *x, int *incx, double *beta, double *y, in F77_dsymv() 140 int *incx, double *a, int *lda) { in F77_dsyr() 164 int *incx, double *y, int *incy, double *a, int *lda) { in F77_dsyr2() 188 double *alpha, double *a, int *lda, double *x, int *incx, in F77_dgbmv() 226 int *n, int *k, double *a, int *lda, double *x, int *incx) { in F77_dtbmv() 272 int *n, int *k, double *a, int *lda, double *x, int *incx) { in F77_dtbsv() [all …]
|
H A D | c_sblas2.c | 12 float *a, int *lda, float *x, int *incx, float *beta, in F77_sgemv() 39 float *y, int *incy, float *a, int *lda ) { in F77_sger() 64 int *n, float *a, int *lda, float *x, int *incx) { in F77_strmv() 92 int *n, float *a, int *lda, float *x, int *incx ) { in F77_strsv() 116 int *lda, float *x, int *incx, float *beta, float *y, in F77_ssymv() 140 int *incx, float *a, int *lda) { in F77_ssyr() 164 int *incx, float *y, int *incy, float *a, int *lda) { in F77_ssyr2() 188 float *alpha, float *a, int *lda, float *x, int *incx, in F77_sgbmv() 226 int *n, int *k, float *a, int *lda, float *x, int *incx) { in F77_stbmv() 272 int *n, int *k, float *a, int *lda, float *x, int *incx) { in F77_stbsv() [all …]
|
H A D | c_zblas3.c | 15 int *k, CBLAS_TEST_ZOMPLEX *alpha, CBLAS_TEST_ZOMPLEX *a, int *lda, in F77_zgemm() 91 CBLAS_TEST_ZOMPLEX *alpha, CBLAS_TEST_ZOMPLEX *a, int *lda, in F77_zhemm() 155 CBLAS_TEST_ZOMPLEX *alpha, CBLAS_TEST_ZOMPLEX *a, int *lda, in F77_zsymm() 210 double *alpha, CBLAS_TEST_ZOMPLEX *a, int *lda, in F77_zherk() 266 CBLAS_TEST_ZOMPLEX *alpha, CBLAS_TEST_ZOMPLEX *a, int *lda, in F77_zsyrk() 321 CBLAS_TEST_ZOMPLEX *alpha, CBLAS_TEST_ZOMPLEX *a, int *lda, in F77_zher2k() 385 CBLAS_TEST_ZOMPLEX *alpha, CBLAS_TEST_ZOMPLEX *a, int *lda, in F77_zsyr2k() 450 int *lda, CBLAS_TEST_ZOMPLEX *b, int *ldb) { in F77_ztrmm() 509 int *lda, CBLAS_TEST_ZOMPLEX *b, int *ldb) { in F77_ztrsm()
|
H A D | c_cblas3.c | 15 int *k, CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, int *lda, in F77_cgemm() 91 CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, int *lda, in F77_chemm() 155 CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, int *lda, in F77_csymm() 210 float *alpha, CBLAS_TEST_COMPLEX *a, int *lda, in F77_cherk() 266 CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, int *lda, in F77_csyrk() 321 CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, int *lda, in F77_cher2k() 385 CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, int *lda, in F77_csyr2k() 450 int *lda, CBLAS_TEST_COMPLEX *b, int *ldb) { in F77_ctrmm() 509 int *lda, CBLAS_TEST_COMPLEX *b, int *ldb) { in F77_ctrsm()
|
H A D | c_sblas3.c | 13 int *k, float *alpha, float *a, int *lda, float *b, int *ldb, in F77_sgemm() 74 float *alpha, float *a, int *lda, float *b, int *ldb, in F77_ssymm() 128 float *alpha, float *a, int *lda, in F77_ssyrk() 176 float *alpha, float *a, int *lda, float *b, int *ldb, in F77_ssyr2k() 231 int *m, int *n, float *alpha, float *a, int *lda, float *b, in F77_strmm() 282 int *m, int *n, float *alpha, float *a, int *lda, float *b, in F77_strsm()
|
H A D | c_dblas3.c | 15 int *k, double *alpha, double *a, int *lda, double *b, int *ldb, in F77_dgemm() 77 double *alpha, double *a, int *lda, double *b, int *ldb, in F77_dsymm() 131 double *alpha, double *a, int *lda, in F77_dsyrk() 179 double *alpha, double *a, int *lda, double *b, int *ldb, in F77_dsyr2k() 234 int *m, int *n, double *alpha, double *a, int *lda, double *b, in F77_dtrmm() 285 int *m, int *n, double *alpha, double *a, int *lda, double *b, in F77_dtrsm()
|
/aosp_15_r20/external/tensorflow/tensorflow/compiler/xla/service/gpu/ |
H A D | cusolver_context.cc | 341 int n, int lda, in PotrfBufferSize() 383 se::DeviceMemory<float> a, int lda, in Potrf() 396 se::DeviceMemory<double> a, int lda, in Potrf() 409 se::DeviceMemory<std::complex<float>> a, int lda, in Potrf() 423 int lda, se::DeviceMemory<int> lapack_info, in Potrf() 435 se::DeviceMemory<float*> as, int lda, in PotrfBatched() 447 se::DeviceMemory<double*> as, int lda, in PotrfBatched() 460 int lda, in PotrfBatched() 473 se::DeviceMemory<std::complex<double>*> as, int lda, in PotrfBatched()
|
/aosp_15_r20/external/executorch/kernels/optimized/blas/ |
H A D | BlasKernel.h | 23 void scale_(int64_t m, int64_t n, opmath_t alpha, scalar_t* a, int64_t lda) { in scale_() 74 int64_t lda, in gemm_notrans_() 111 int64_t lda, in gemm_notrans_() 138 const scalar_t *a, int64_t lda, in gemm_transa_() 219 int64_t lda, in gemm_transb_() 256 int64_t lda, in gemm_transb_() 283 const scalar_t *a, int64_t lda, in gemm_transab_()
|
H A D | CPUBlas.cpp | 51 int64_t *lda, int64_t *ldb, int64_t *ldc) { in normalize_last_dims() 79 const double *a, int64_t lda, in gemm() 119 const float *a, int64_t lda, in gemm() 159 const Half *a, int64_t lda, in gemm() 182 const BFloat16 *a, int64_t lda, in gemm()
|
/aosp_15_r20/external/tensorflow/tensorflow/compiler/xla/stream_executor/ |
H A D | stream.cc | 1257 float alpha, const DeviceMemory<float> &a, int lda, in ThenBlasGemv() 1274 int lda, const DeviceMemory<double> &x, int incx, in ThenBlasGemv() 1291 int lda, in ThenBlasGemv() 1311 int lda, in ThenBlasGemv() 1329 float alpha, const DeviceMemory<float> &a, int lda, in ThenBlasSbmv() 1345 int lda, const DeviceMemory<double> &x, int incx, in ThenBlasSbmv() 1378 const DeviceMemory<float> &a, int lda, const DeviceMemory<float> &x, in ThenBlasGemvWithProfiling() 1395 const DeviceMemory<double> &a, int lda, const DeviceMemory<double> &x, in ThenBlasGemvWithProfiling() 1413 const DeviceMemory<std::complex<float>> &a, int lda, in ThenBlasGemvWithProfiling() 1433 const DeviceMemory<std::complex<double>> &a, int lda, in ThenBlasGemvWithProfiling() [all …]
|
H A D | stream.h | 756 const DeviceMemory<InputType> &a, int lda, in ThenBlasGemm() 770 const DeviceMemory<InputType> &a, int lda, in ThenBlasGemm() 780 const DeviceMemory<InputType> &a, int lda, in ThenBlasGemm() 818 const DeviceMemory<InputType> &a, int lda, in ThenBlasGemm() 867 uint64_t k, const DeviceMemory<InputType> &a, int lda, in ThenBlasGemmWithAlgorithm() 882 uint64_t k, ConstantType alpha, const DeviceMemory<InputType> &a, int lda, in ThenBlasGemmWithAlgorithm() 920 uint64_t k, ConstantType alpha, const DeviceMemory<InputType> &a, int lda, in ThenBlasGemmStridedBatchedWithAlgorithm() 1035 uint64_t k, ConstantType alpha, const DeviceMemory<InputType> &a, int lda, in ThenBlasGemmStridedBatched()
|
/aosp_15_r20/external/pytorch/aten/src/ATen/native/mkl/ |
H A D | LinearAlgebra.cpp | 13 const float** A, const MKL_INT lda, const float** B, const MKL_INT ldb, const float beta, in mkl_gemm_batched() 21 const double** A, const MKL_INT lda, const double** B, const MKL_INT ldb, const double beta, in mkl_gemm_batched() 29 …const c10::complex<float>** A, const MKL_INT lda, const c10::complex<float>** B, const MKL_INT ldb, in mkl_gemm_batched() 37 …const c10::complex<double>** A, const MKL_INT lda, const c10::complex<double>** B, const MKL_INT l… in mkl_gemm_batched() 45 const c10::BFloat16* A, MKL_INT lda, const c10::BFloat16* B, MKL_INT ldb, in mkl_gemm_bf16bf16f32() 53 const c10::Half* A, int lda, const c10::Half* B, int ldb, in mkl_gemm_f16f16f32()
|
/aosp_15_r20/external/pytorch/aten/src/ATen/native/mkldnn/ |
H A D | Matmul.cpp | 39 const c10::BFloat16 *a, int64_t lda, in mkldnn_bf16_gemm() 50 const c10::Half *a, int64_t lda, in mkldnn_fp16_gemm() 60 const float *a, int64_t lda, in mkldnn_bf32_gemm() 122 const scalar_t *a_data, int64_t lda, in mkldnn_gemm() 198 const c10::BFloat16 *a, int64_t lda, in mkldnn_bf16_gemm() 209 const c10::Half *a, int64_t lda, in mkldnn_fp16_gemm() 220 const float *a, int64_t lda, in mkldnn_bf32_gemm() 468 const int lda = transa == 'T' ? self.stride(1) : self.stride(0); in _mkldnn_gemm_i8i8i32_with_blas() local
|