Skip to content

Commit 3556114

Browse files
committedOct 29, 2020
[mlir][gpu] Allow gpu.launch_func to be async.
This is a roll-forward of rGec7780ebdab4, now that the remaining gpu.launch_func have been converted to custom form in rGb22f111023ba. Reviewed By: antiagainst Differential Revision: https://reviews.llvm.org/D90420
1 parent 20b386a commit 3556114

File tree

4 files changed

+45
-17
lines changed

4 files changed

+45
-17
lines changed
 

‎mlir/include/mlir/Dialect/GPU/GPUOps.td

+28-13
Original file line numberDiff line numberDiff line change
@@ -291,12 +291,14 @@ def GPU_GPUFuncOp : GPU_Op<"func", [HasParent<"GPUModuleOp">,
291291
let parser = [{ return parseGPUFuncOp(parser, result); }];
292292
}
293293

294-
def GPU_LaunchFuncOp : GPU_Op<"launch_func">,
295-
Arguments<(ins SymbolRefAttr:$kernel,
294+
def GPU_LaunchFuncOp : GPU_Op<"launch_func",
295+
[GPU_AsyncOpInterface, AttrSizedOperandSegments]>,
296+
Arguments<(ins Variadic<GPU_AsyncToken>:$asyncDependencies,
297+
SymbolRefAttr:$kernel,
296298
Index:$gridSizeX, Index:$gridSizeY, Index:$gridSizeZ,
297299
Index:$blockSizeX, Index:$blockSizeY, Index:$blockSizeZ,
298300
Variadic<AnyType>:$operands)>,
299-
Results<(outs)> {
301+
Results<(outs Optional<GPU_AsyncToken>:$asyncToken)> {
300302
let summary = "Launches a function as a GPU kernel";
301303

302304
let description = [{
@@ -308,14 +310,22 @@ def GPU_LaunchFuncOp : GPU_Op<"launch_func">,
308310
function is required to be a gpu.module. And finally, the module containing
309311
the kernel module (which thus cannot be the top-level module) is required
310312
to have the `gpu.container_module` attribute. The `gpu.launch_func`
311-
operation has a symbol attribute named `kernel` to identify the fully
313+
operation has a symbol attribute named `kernel` to identify the fully
312314
specified kernel function to launch (both the gpu.module and func).
313315

314-
The operation takes at least six operands, with the first three operands
315-
being grid sizes along x,y,z dimensions and the following three being block
316-
sizes along x,y,z dimensions. When a lower-dimensional kernel is required,
317-
unused sizes must be explicitly set to `1`. The remaining operands are
318-
passed as arguments to the kernel function.
316+
The `gpu.launch_func` supports async dependencies: the kernel does not start
317+
executing until the ops producing those async dependencies have completed.
318+
319+
By the default, the host implicitly blocks until kernel execution has
320+
completed. If the `async` keyword is present, the host does not block but
321+
instead a `!gpu.async.token` is returned. Other async GPU ops can take this
322+
token as dependency.
323+
324+
The operation requires at least the grid and block sizes along the x,y,z
325+
dimensions as arguments. When a lower-dimensional kernel is required,
326+
unused sizes must be explicitly set to `1`.
327+
328+
The remaining operands are passed as arguments to the kernel function.
319329

320330
Example:
321331

@@ -351,11 +361,15 @@ def GPU_LaunchFuncOp : GPU_Op<"launch_func">,
351361
}
352362
}
353363

364+
%t0 = gpu.wait async
354365
gpu.launch_func
355-
@kernels::@kernel_1 // Kernel function.
356-
blocks in (%cst, %cst, %cst) // Grid size.
357-
threads in (%cst, %cst, %cst) // Block size.
358-
args(%arg0 : f32, %arg1 : memref<?xf32, 1>) // Kernel arguments.
366+
async // (Optional) Don't block host, return token.
367+
[%t0] // (Optional) Execute only after %t0 has completed.
368+
@kernels::@kernel_1 // Kernel function.
369+
blocks in (%cst, %cst, %cst) // Grid size.
370+
threads in (%cst, %cst, %cst) // Block size.
371+
args(%arg0 : f32, // (Optional) Kernel arguments.
372+
%arg1 : memref<?xf32, 1>)
359373
}
360374
```
361375
}];
@@ -401,6 +415,7 @@ def GPU_LaunchFuncOp : GPU_Op<"launch_func">,
401415

402416
let verifier = [{ return ::verify(*this); }];
403417
let assemblyFormat = [{
418+
custom<AsyncDependencies>(type($asyncToken), $asyncDependencies)
404419
$kernel
405420
`blocks` `in` ` ` `(`$gridSizeX`,` $gridSizeY`,` $gridSizeZ`)`
406421
`threads` `in` ` ` `(`$blockSizeX`,` $blockSizeY`,` $blockSizeZ`)`

‎mlir/lib/Dialect/GPU/IR/GPUDialect.cpp

+11-4
Original file line numberDiff line numberDiff line change
@@ -438,10 +438,15 @@ void LaunchFuncOp::build(OpBuilder &builder, OperationState &result,
438438
auto kernelSymbol = builder.getSymbolRefAttr(
439439
kernelModule.getName(), {builder.getSymbolRefAttr(kernelFunc.getName())});
440440
result.addAttribute(getKernelAttrName(), kernelSymbol);
441+
SmallVector<int32_t, 8> segmentSizes(8, 1);
442+
segmentSizes.front() = 0; // Initially no async dependencies.
443+
segmentSizes.back() = static_cast<int32_t>(kernelOperands.size());
444+
result.addAttribute(getOperandSegmentSizeAttr(),
445+
builder.getI32VectorAttr(segmentSizes));
441446
}
442447

443448
unsigned LaunchFuncOp::getNumKernelOperands() {
444-
return getNumOperands() - kNumConfigOperands;
449+
return getNumOperands() - asyncDependencies().size() - kNumConfigOperands;
445450
}
446451

447452
StringRef LaunchFuncOp::getKernelModuleName() {
@@ -451,15 +456,17 @@ StringRef LaunchFuncOp::getKernelModuleName() {
451456
StringRef LaunchFuncOp::getKernelName() { return kernel().getLeafReference(); }
452457

453458
Value LaunchFuncOp::getKernelOperand(unsigned i) {
454-
return getOperation()->getOperand(i + kNumConfigOperands);
459+
return getOperand(asyncDependencies().size() + kNumConfigOperands + i);
455460
}
456461

457462
KernelDim3 LaunchFuncOp::getGridSizeOperandValues() {
458-
return KernelDim3{getOperand(0), getOperand(1), getOperand(2)};
463+
auto operands = getOperands().drop_front(asyncDependencies().size());
464+
return KernelDim3{operands[0], operands[1], operands[2]};
459465
}
460466

461467
KernelDim3 LaunchFuncOp::getBlockSizeOperandValues() {
462-
return KernelDim3{getOperand(3), getOperand(4), getOperand(5)};
468+
auto operands = getOperands().drop_front(asyncDependencies().size());
469+
return KernelDim3{operands[3], operands[4], operands[5]};
463470
}
464471

465472
static LogicalResult verify(LaunchFuncOp op) {

‎mlir/test/Dialect/GPU/invalid.mlir

+2
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ func @launch_requires_gpu_return(%sz : index) {
3737
func @launch_func_too_few_operands(%sz : index) {
3838
// expected-error@+1 {{expected 6 or more operands}}
3939
"gpu.launch_func"(%sz, %sz, %sz, %sz, %sz)
40+
{operand_segment_sizes = dense<[0, 1, 1, 1, 1, 1, 0, 0]> : vector<8xi32>}
4041
: (index, index, index, index, index) -> ()
4142
return
4243
}
@@ -55,6 +56,7 @@ module attributes {gpu.container_module} {
5556
func @launch_func_missing_callee_attribute(%sz : index) {
5657
// expected-error@+1 {{'gpu.launch_func' op requires attribute 'kernel'}}
5758
"gpu.launch_func"(%sz, %sz, %sz, %sz, %sz, %sz)
59+
{operand_segment_sizes = dense<[0, 1, 1, 1, 1, 1, 1, 0]> : vector<8xi32>}
5860
: (index, index, index, index, index, index) -> ()
5961
return
6062
}

‎mlir/test/Dialect/GPU/ops.mlir

+4
Original file line numberDiff line numberDiff line change
@@ -73,13 +73,17 @@ module attributes {gpu.container_module} {
7373
%1 = "op"() : () -> (memref<?xf32, 1>)
7474
// CHECK: %{{.*}} = constant 8
7575
%cst = constant 8 : index
76+
%t0 = gpu.wait async
7677

7778
// CHECK: gpu.launch_func @kernels::@kernel_1 blocks in (%{{.*}}, %{{.*}}, %{{.*}}) threads in (%{{.*}}, %{{.*}}, %{{.*}}) args(%{{.*}} : f32, %{{.*}} : memref<?xf32, 1>)
7879
gpu.launch_func @kernels::@kernel_1 blocks in (%cst, %cst, %cst) threads in (%cst, %cst, %cst) args(%0 : f32, %1 : memref<?xf32, 1>)
7980

8081
// CHECK: gpu.launch_func @kernels::@kernel_2 blocks in (%{{.*}}, %{{.*}}, %{{.*}}) threads in (%{{.*}}, %{{.*}}, %{{.*}})
8182
gpu.launch_func @kernels::@kernel_2 blocks in (%cst, %cst, %cst) threads in (%cst, %cst, %cst)
8283

84+
// CHECK: %{{.*}} = gpu.launch_func async [%{{.*}}] @kernels::@kernel_2 blocks in (%{{.*}}, %{{.*}}, %{{.*}}) threads in (%{{.*}}, %{{.*}}, %{{.*}})
85+
%t1 = gpu.launch_func async [%t0] @kernels::@kernel_2 blocks in (%cst, %cst, %cst) threads in (%cst, %cst, %cst)
86+
8387
return
8488
}
8589

0 commit comments

Comments
 (0)