Searched defs:num_warps (Results 1 – 7 of 7) sorted by relevance
/aosp_15_r20/external/pytorch/torch/_inductor/runtime/ |
H A D | triton_heuristics.py | 505 num_warps, argument 545 num_warps, argument 579 num_warps, argument 1120 def _num_warps(num_warps, max_num_warps=8, min_num_warps=2, register_intensive=False): argument 1132 def _check_max_grid_x(size_hints, x, num_warps): argument 1250 size_hints, x, r, num_stages=1, num_warps=None, register_intensive=False argument 1618 def template(num_stages, num_warps, triton_meta, filename=None, inductor_meta=None): argument 1669 def foreach(triton_meta, num_warps, filename=None, inductor_meta=None): argument
|
/aosp_15_r20/external/pytorch/torch/_inductor/kernel/ |
H A D | mm_common.py | 21 def triton_config(num_stages, num_warps, **kwargs): argument
|
/aosp_15_r20/external/pytorch/torch/_inductor/ |
H A D | select_algorithm.py | 126 num_warps, argument 608 num_warps, argument
|
/aosp_15_r20/external/pytorch/torch/sparse/ |
H A D | _triton_ops.py | 510 num_warps=None, argument 748 num_warps=None, argument
|
/aosp_15_r20/external/pytorch/aten/src/ATen/native/cuda/ |
H A D | EmbeddingBag.cu | 484 const int num_warps = blockDim.x * gridDim.x / C10_WARP_SIZE; in _embedding_bag_per_sample_weights_backward_kernel() local
|
H A D | TensorTopK.cu | 453 constexpr int num_warps = RADIX_DIGITS / C10_WARP_SIZE; in C10_LAUNCH_BOUNDS_1() local
|
/aosp_15_r20/external/tensorflow/tensorflow/compiler/xla/service/gpu/ |
H A D | ir_emitter_unnested.cc | 3633 int num_warps = tiling_scheme.GetNumThreadsPerBlock() / WarpSize(); in GenerateReductionCodegenState() local
|