/aosp_15_r20/external/tensorflow/tensorflow/compiler/mlir/tensorflow/transforms/ |
H A D | tpu_parallel_execute_sink_resource_write.cc | 52 tf_device::ParallelExecuteOp parallel_execute, Value result) { in GetSingleUseResourceWrite() argument 62 if (resource_handle_op == parallel_execute) return nullptr; in GetSingleUseResourceWrite() 66 parallel_execute.getOperation()->getBlock() && in GetSingleUseResourceWrite() 67 parallel_execute.getOperation()->isBeforeInBlock(resource_handle_op)) in GetSingleUseResourceWrite() 77 tf_device::ParallelExecuteOp parallel_execute) { in SinkResourceWritesIntoParallelExecute() argument 79 const int num_regions = parallel_execute.getNumRegions(); in SinkResourceWritesIntoParallelExecute() 88 Block& block = parallel_execute.GetRegionBlockWithIndex(i); in SinkResourceWritesIntoParallelExecute() 89 auto results = parallel_execute.GetRegionOutputs(i); in SinkResourceWritesIntoParallelExecute() 95 GetSingleUseResourceWrite(parallel_execute, result.value()); in SinkResourceWritesIntoParallelExecute() 118 parallel_execute.GetRegionBlockWithIndex(results_to_remove.index()); in SinkResourceWritesIntoParallelExecute() [all …]
|
H A D | tpu_merge_variables_with_execute.cc | 167 auto parallel_execute = llvm::dyn_cast<tf_device::ParallelExecuteOp>( in BuildVariableAccessInfo() local 170 parallel_execute ? parallel_execute.getOperation() : execute_launch; in BuildVariableAccessInfo() 261 parallel_execute in BuildVariableAccessInfo() 262 ? parallel_execute.GetRegionOutputs( in BuildVariableAccessInfo() 346 tf_device::ParallelExecuteOp parallel_execute, int start, in AppendTypes() argument 350 Block& block = parallel_execute.GetRegionBlockWithIndex(index); in AppendTypes() 361 tf_device::ParallelExecuteOp parallel_execute, in ReplaceParallelExecute() argument 365 Operation* parallel_execute_op = parallel_execute.getOperation(); in ReplaceParallelExecute() 375 AppendTypes(&output_types, parallel_execute, 0, region_index); in ReplaceParallelExecute() 381 &output_types, parallel_execute, region_index + 1, num_regions); in ReplaceParallelExecute() [all …]
|
H A D | tpu_rewrite_pass.cc | 615 tf_device::ParallelExecuteOp* parallel_execute) { in BuildSingletonParallelExecuteOp() argument 616 if (!*parallel_execute) { in BuildSingletonParallelExecuteOp() 619 *parallel_execute = builder->create<tf_device::ParallelExecuteOp>( in BuildSingletonParallelExecuteOp() 622 auto& block = parallel_execute->GetRegionBlockWithIndex(0); in BuildSingletonParallelExecuteOp() 625 cluster_func.replaceAllUsesWith(*parallel_execute); in BuildSingletonParallelExecuteOp() 633 tf_device::ParallelExecuteOp parallel_execute, OpBuilder* builder) { in RemoveSingletonParallelExecuteOp() argument 634 if (parallel_execute.regions().size() == 1) { in RemoveSingletonParallelExecuteOp() 635 builder->setInsertionPoint(parallel_execute); in RemoveSingletonParallelExecuteOp() 636 auto& block = parallel_execute.GetRegionBlockWithIndex(0); in RemoveSingletonParallelExecuteOp() 642 parallel_execute.emitError() << "Expected 2 ops in parallel_execute."; in RemoveSingletonParallelExecuteOp() [all …]
|
H A D | tf_passes.td | 1083 … = "Extracts TPU outside compilation computation to a separate tf_device.parallel_execute region."; 1089 a tf_device.parallel_execute region. The TPU cluster is also moved to a 1090 tf_device.parallel_execute region. Communication ops between device and host are 1107 will become a tf_device.parallel_execute op with a CPU/host region and 1112 %0 = "tf_device.parallel_execute"() ( { 1457 %1 = "tf_device.parallel_execute"() ( { 1984 let summary = "Moves tf.AssignVariableOp consumers of tf_device.parallel_execute " 1985 "into tf_device.parallel_execute regions"; 2103 let summary = "Lowers device parallel_execute to executor islands";
|
/aosp_15_r20/external/tensorflow/tensorflow/compiler/mlir/tensorflow/tests/ |
H A D | tpu_parallel_execute_sink_resource_write.mlir | 6 // CHECK: [[PARALLEL_EXECUTE:%.+]]:2 = "tf_device.parallel_execute" 7 %0:2 = "tf_device.parallel_execute"() ({ 22 // CHECK: [[PARALLEL_EXECUTE:%.+]]:2 = "tf_device.parallel_execute" 23 %0:2 = "tf_device.parallel_execute"() ({ 37 // CHECK: [[PARALLEL_EXECUTE:%.+]]:2 = "tf_device.parallel_execute" 38 %0:2 = "tf_device.parallel_execute"() ({ 51 // CHECK: [[PARALLEL_EXECUTE:%.+]]:2 = "tf_device.parallel_execute" 52 %0:2 = "tf_device.parallel_execute"() ({ 64 // CHECK: [[PARALLEL_EXECUTE:%.+]]:2 = "tf_device.parallel_execute" 65 %0:2 = "tf_device.parallel_execute"() ({ [all …]
|
H A D | tf_device_ops_invalid.mlir | 199 // Check that a parallel_execute op with a single region is not allowed. 201 "tf_device.parallel_execute"() {} : () -> () 202 // expected-error@-1 {{'tf_device.parallel_execute' op must have at least one region.}} 208 // Check that a parallel_execute op with empty regions are not allowed. 210 "tf_device.parallel_execute"() ({ 211 // expected-error@-1 {{'tf_device.parallel_execute' op region #0 ('regions') failed to verify const… 221 // Check that a parallel_execute ops with invalid number of output types are 224 "tf_device.parallel_execute"() ({ 225 // expected-error@-1 {{'tf_device.parallel_execute' op number of output types (3) must match the to… 239 // Check that a parallel_execute ops with mismatching output types are not [all …]
|
H A D | tpu_extract_outside_compilation.mlir | 4 // Tests that TPU cluster with no outside compilation does not generate parallel_execute. 8 // CHECK-NOT: "tf_device.parallel_execute" 33 // CHECK: "tf_device.parallel_execute" 55 // CHECK: "tf_device.parallel_execute" 80 // CHECK: "tf_device.parallel_execute" 103 // CHECK: %[[PARALLEL_EXECUTE_OUTPUT:[0-9]*]] = "tf_device.parallel_execute" 131 // CHECK: %[[PARALLEL_EXECUTE_OUTPUT:[0-9]*]]:2 = "tf_device.parallel_execute" 156 // CHECK: %[[PARALLEL_EXECUTE_OUTPUT:[0-9]*]] = "tf_device.parallel_execute" 189 // CHECK: %[[PARALLEL_EXECUTE_OUTPUT:[0-9]*]] = "tf_device.parallel_execute" 211 // CHECK: %[[PARALLEL_EXECUTE_OUTPUT:[0-9]*]] = "tf_device.parallel_execute" [all …]
|
H A D | parallel_execute_to_islands.mlir | 7 "tf_device.parallel_execute"() ({ 35 %4:2 = "tf_device.parallel_execute"() ({ 68 %3:2 = "tf_device.parallel_execute"() ({ 96 %1:2 = "tf_device.parallel_execute"() ({ 128 %2:2 = "tf_device.parallel_execute"() ({ 163 %4 = "tf_device.parallel_execute"() ({
|
H A D | tpu-merge-variables-with-execute.mlir | 288 // with TPUExecutes in a tf_device.parallel_execute. 290 // CHECK-LABEL: func @parallel_execute 294 func.func @parallel_execute( 301 // CHECK: "tf_device.parallel_execute" 302 %pe:2 = "tf_device.parallel_execute"() ({ 332 // tf_device.parallel_execute that is replicated (tf_device.replicate). 354 // CHECK: "tf_device.parallel_execute" 355 %pe:2 = "tf_device.parallel_execute"() ({
|
H A D | tf_device_ops.mlir | 167 "tf_device.parallel_execute"() ({ 178 "tf_device.parallel_execute"() ({ 192 "tf_device.parallel_execute"() ({
|
H A D | tpu_rewrite.mlir | 1167 // CHECK: [[PARALLEL_EXECUTE_OUTPUT:[0-9]*]]:2 = "tf_device.parallel_execute" 1205 // CHECK: [[PARALLEL_EXECUTE_OUTPUT:[0-9]*]]:2 = "tf_device.parallel_execute" 1301 "tf_device.parallel_execute"() ({ 1351 // parallel_execute. 1363 // CHECK: "tf_device.parallel_execute" 1369 %3 = "tf_device.parallel_execute"() ({ 1396 // parallelism and parallel_execute. 1408 // CHECK: "tf_device.parallel_execute" 1416 %3 = "tf_device.parallel_execute"() ({ 1439 // parallelism and parallel_execute with 2 parallel_execute children after the [all …]
|
H A D | tpu-dynamic-layout-pass.mlir | 410 // CHECK-LABEL: func @parallel_execute 411 func.func @parallel_execute(%arg0: tensor<*x!tf_type.resource> {tf.device = "/device:CPU:0"}) { 427 // CHECK: "tf_device.parallel_execute" 428 "tf_device.parallel_execute"() ({ 502 // CHECK: "tf_device.parallel_execute" 503 "tf_device.parallel_execute"() ({
|
H A D | tpu-variable-runtime-reformatting.mlir | 204 "tf_device.parallel_execute"() ({
|
/aosp_15_r20/external/tensorflow/tensorflow/compiler/mlir/tensorflow/ir/ |
H A D | tf_device_ops.td | 132 def TfDevice_ParallelExecuteOp : TfDevice_Op<"parallel_execute", 167 // Checks if a tf_device.parallel_execute index'th region block wraps a 192 A tf_device.parallel_execute inside the tf_device.replicate op region may be 195 concurrent execution (i.e. region) defined by tf_device.parallel_execute op.
|