threads_per_block (definition) in projects: aosp_15_r20

Project(s)

Full Search
Definition
Symbol
File Path
History
Type

Searched defs:threads_per_block (Results 1 – 18 of 18) sorted by relevance

/aosp_15_r20/external/pytorch/torch/csrc/distributed/c10d/quantization/
H A D	quantization_gpu.cu	`79 constexpr size_t threads_per_block = 256; in _float_to_bfloat16_cuda() local 126 constexpr size_t threads_per_block = 256; in _bfloat16_to_float_cuda() local`
/aosp_15_r20/external/pytorch/aten/src/ATen/native/cuda/
H A D	PersistentSoftmax.cuh	`320 constexpr int threads_per_block = 128; in dispatch_softmax_forward() local 370 constexpr int threads_per_block = 128; in dispatch_softmax_backward() local`
H A D	Shape.cu	`56 constexpr unsigned int threads_per_block = 256; in getCatGridRocm() local 78 constexpr unsigned int threads_per_block = 128; in getCatGridContig() local`
H A D	SegmentReduce.cu	`299 constexpr int threads_per_block = 256; in _segment_reduce_lengths_offsets_backward_cuda_kernel() local 427 constexpr int threads_per_block = 256; in _segment_reduce_lengths_offsets_cuda_kernel() local`
H A D	EmbeddingBag.cu	`530 const int threads_per_block = 512; in _embedding_bag_per_sample_weights_backward_cuda() local`
/aosp_15_r20/external/tensorflow/tensorflow/stream_executor/rocm/
H A D	rocm_helpers.cu.cc	`36 const int threads_per_block = 256; in rocm_MakeBatchPointers() local`
H A D	rocm_driver.cc	`1339 GpuContext* context, hipFunction_t kernel, int threads_per_block, in GetMaxOccupiedBlocksPerCore()`
/aosp_15_r20/external/tensorflow/tensorflow/core/kernels/image/
H A D	adjust_saturation_op_gpu.cu.cc	`36 const int threads_per_block = config.thread_per_block; in operator ()() local`
H A D	adjust_hue_op_gpu.cu.cc	`35 const int threads_per_block = config.thread_per_block; in operator ()() local`
/aosp_15_r20/external/tensorflow/tensorflow/compiler/xla/stream_executor/
H A D	device_description.cc	`143 int64_t element_count, int64_t *threads_per_block, in CalculateDimensionality()`
/aosp_15_r20/external/tensorflow/tensorflow/compiler/xla/service/gpu/
H A D	launch_dimensions.cc	`39 int64_t threads_per_block = gpu_device_info.threads_per_block_limit; in ThreadsPerBlockLimit() local`
H A D	elemental_ir_emitter.cc	`329 llvm::Value* threads_per_block = IntCast( in EmitThreadId() local`
H A D	ir_emitter_unnested.cc	`3666 int threads_per_block, int num_results_per_warp) { in EmitFullWarpShuffleDownLoopForReduce() 4055 llvm::Value* IrEmitterUnnested::EmitThreadId(int64_t threads_per_block, in EmitThreadId()`
/aosp_15_r20/external/tensorflow/tensorflow/compiler/xla/stream_executor/gpu/
H A D	redzone_allocator.cc	`224 int64_t threads_per_block = std::min( in RunRedzoneChecker() local`
/aosp_15_r20/external/mesa3d/src/gallium/drivers/radeonsi/
H A D	si_get.c	`1242 unsigned threads_per_block = get_max_threads_per_block(sscreen, ir_type); in si_get_compute_param() local`
/aosp_15_r20/external/pytorch/c10/macros/
H A D	Macros.h	`304 #define C10_MIN_BLOCKS_PER_SM(threads_per_block, blocks_per_sm) \ argument`
/aosp_15_r20/external/mesa3d/src/gallium/drivers/r600/
H A D	r600_pipe_common.c	`932 unsigned threads_per_block = get_max_threads_per_block(rscreen, ir_type); in r600_get_compute_param() local`
/aosp_15_r20/external/tensorflow/tensorflow/compiler/xla/stream_executor/cuda/
H A D	cuda_driver.cc	`1637 GpuContext* context, CUfunction kernel, int threads_per_block, in GetMaxOccupiedBlocksPerCore()`