diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index bfd7d65912bdb..0c34126667324 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -33,7 +33,9 @@ #include "llvm/Transforms/Utils/ModuleUtils.h" #include +#include #include +#include #include #include @@ -2037,7 +2039,7 @@ llvm::Value *getSizeInBytes(DataLayout &dl, const mlir::Type &type, if (auto boundOp = mlir::dyn_cast_if_present( bounds.getDefiningOp())) { // The below calculation for the size to be mapped calculated from the - // map_info's bounds is: (elemCount * [UB - LB] + 1), later we + // map.info's bounds is: (elemCount * [UB - LB] + 1), later we // multiply by the underlying element types byte size to get the full // size to be offloaded based on the bounds elementCount = builder.CreateMul( @@ -2089,9 +2091,9 @@ void collectMapDataFromMapOperands(MapInfoData &mapData, mapData.BaseType.push_back( moduleTranslation.convertType(mapOp.getVarType())); - mapData.Sizes.push_back(getSizeInBytes( - dl, mapOp.getVarType(), mapOp, mapData.BasePointers.back(), - mapData.BaseType.back(), builder, moduleTranslation)); + mapData.Sizes.push_back( + getSizeInBytes(dl, mapOp.getVarType(), mapOp, mapData.Pointers.back(), + mapData.BaseType.back(), builder, moduleTranslation)); mapData.MapClause.push_back(mapOp.getOperation()); mapData.Types.push_back( llvm::omp::OpenMPOffloadMappingFlags(mapOp.getMapType().value())); @@ -2122,6 +2124,67 @@ void collectMapDataFromMapOperands(MapInfoData &mapData, } } +static int getMapDataMemberIdx(MapInfoData &mapData, + mlir::omp::MapInfoOp memberOp) { + auto *res = llvm::find(mapData.MapClause, memberOp); + assert(res != mapData.MapClause.end() && + "MapInfoOp for member not found in MapData, cannot return index"); + return std::distance(mapData.MapClause.begin(), res); +} + +static mlir::omp::MapInfoOp +getFirstOrLastMappedMemberPtr(mlir::omp::MapInfoOp mapInfo, bool first) { + mlir::DenseIntElementsAttr indexAttr = mapInfo.getMembersIndexAttr(); + + // Only 1 member has been mapped, we can return it. + if (indexAttr.size() == 1) + if (auto mapOp = mlir::dyn_cast( + mapInfo.getMembers()[0].getDefiningOp())) + return mapOp; + + llvm::ArrayRef shape = indexAttr.getShapedType().getShape(); + llvm::SmallVector indices(shape[0]); + std::iota(indices.begin(), indices.end(), 0); + + llvm::sort( + indices.begin(), indices.end(), [&](const size_t a, const size_t b) { + auto indexValues = indexAttr.getValues(); + for (int i = 0; + i < shape[1]; + ++i) { + int aIndex = indexValues[a * shape[1] + i]; + int bIndex = indexValues[b * shape[1] + i]; + + if (aIndex != -1 && bIndex == -1) + return false; + + if (aIndex == -1 && bIndex != -1) + return true; + + if (aIndex == -1) + return first; + + if (bIndex == -1) + return !first; + + // A is earlier in the record type layout than B + if (aIndex < bIndex) + return first; + + if (bIndex < aIndex) + return !first; + } + + // iterated the entire list and couldn't make a decision, all elements + // were likely the same, return true for now similar to reaching the end + // of both and finding invalid indices. + return true; + }); + + return llvm::cast( + mapInfo.getMembers()[indices.front()].getDefiningOp()); +} + /// This function calculates the array/pointer offset for map data provided /// with bounds operations, e.g. when provided something like the following: /// @@ -2227,6 +2290,9 @@ calculateBoundsOffset(LLVM::ModuleTranslation &moduleTranslation, // which is utilised in subsequent member mappings (by modifying there map type // with it) to indicate that a member is part of this parent and should be // treated by the runtime as such. Important to achieve the correct mapping. +// +// This function borrows a lot from Clang's emitCombinedEntry function +// inside of CGOpenMPRuntime.cpp static llvm::omp::OpenMPOffloadMappingFlags mapParentWithMembers( LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl, @@ -2242,7 +2308,6 @@ static llvm::omp::OpenMPOffloadMappingFlags mapParentWithMembers( combinedInfo.Names.emplace_back(LLVM::createMappingInformation( mapData.MapClause[mapDataIndex]->getLoc(), ompBuilder)); combinedInfo.BasePointers.emplace_back(mapData.BasePointers[mapDataIndex]); - combinedInfo.Pointers.emplace_back(mapData.Pointers[mapDataIndex]); // Calculate size of the parent object being mapped based on the // addresses at runtime, highAddr - lowAddr = size. This of course @@ -2251,42 +2316,68 @@ static llvm::omp::OpenMPOffloadMappingFlags mapParentWithMembers( // Fortran pointers and allocatables, the mapping of the pointed to // data by the descriptor (which itself, is a structure containing // runtime information on the dynamically allocated data). - llvm::Value *lowAddr = builder.CreatePointerCast( - mapData.Pointers[mapDataIndex], builder.getPtrTy()); - llvm::Value *highAddr = builder.CreatePointerCast( - builder.CreateConstGEP1_32(mapData.BaseType[mapDataIndex], - mapData.Pointers[mapDataIndex], 1), - builder.getPtrTy()); + auto parentClause = + llvm::cast(mapData.MapClause[mapDataIndex]); + + llvm::Value *lowAddr, *highAddr; + if (!parentClause.getPartialMap()) { + lowAddr = builder.CreatePointerCast(mapData.Pointers[mapDataIndex], + builder.getPtrTy()); + highAddr = builder.CreatePointerCast( + builder.CreateConstGEP1_32(mapData.BaseType[mapDataIndex], + mapData.Pointers[mapDataIndex], 1), + builder.getPtrTy()); + combinedInfo.Pointers.emplace_back(mapData.Pointers[mapDataIndex]); + } else { + auto mapOp = + mlir::dyn_cast(mapData.MapClause[mapDataIndex]); + int firstMemberIdx = getMapDataMemberIdx( + mapData, getFirstOrLastMappedMemberPtr(mapOp, true)); + lowAddr = builder.CreatePointerCast(mapData.Pointers[firstMemberIdx], + builder.getPtrTy()); + int lastMemberIdx = getMapDataMemberIdx( + mapData, getFirstOrLastMappedMemberPtr(mapOp, false)); + highAddr = builder.CreatePointerCast( + builder.CreateGEP(mapData.BaseType[lastMemberIdx], + mapData.Pointers[lastMemberIdx], builder.getInt64(1)), + builder.getPtrTy()); + combinedInfo.Pointers.emplace_back(mapData.Pointers[firstMemberIdx]); + } + llvm::Value *size = builder.CreateIntCast( builder.CreatePtrDiff(builder.getInt8Ty(), highAddr, lowAddr), builder.getInt64Ty(), /*isSigned=*/false); combinedInfo.Sizes.push_back(size); - // This creates the initial MEMBER_OF mapping that consists of - // the parent/top level container (same as above effectively, except - // with a fixed initial compile time size and seperate maptype which - // indicates the true mape type (tofrom etc.) and that it is a part - // of a larger mapping and indicating the link between it and it's - // members that are also explicitly mapped). + // TODO: This will need to be expanded to include the whole host of logic for + // the map flags that Clang currently supports (e.g. it should take the map + // flag of the parent map flag, remove the OMP_MAP_TARGET_PARAM and do some + // further case specific flag modifications). For the moment, it handles what + // we support as expected. llvm::omp::OpenMPOffloadMappingFlags mapFlag = llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO; - if (isTargetParams) - mapFlag &= ~llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM; llvm::omp::OpenMPOffloadMappingFlags memberOfFlag = ompBuilder.getMemberOfFlag(combinedInfo.BasePointers.size() - 1); ompBuilder.setCorrectMemberOfFlag(mapFlag, memberOfFlag); - combinedInfo.Types.emplace_back(mapFlag); - combinedInfo.DevicePointers.emplace_back( - llvm::OpenMPIRBuilder::DeviceInfoTy::None); - combinedInfo.Names.emplace_back(LLVM::createMappingInformation( - mapData.MapClause[mapDataIndex]->getLoc(), ompBuilder)); - combinedInfo.BasePointers.emplace_back(mapData.BasePointers[mapDataIndex]); - combinedInfo.Pointers.emplace_back(mapData.Pointers[mapDataIndex]); - combinedInfo.Sizes.emplace_back(mapData.Sizes[mapDataIndex]); - + // This creates the initial MEMBER_OF mapping that consists of + // the parent/top level container (same as above effectively, except + // with a fixed initial compile time size and seperate maptype which + // indicates the true mape type (tofrom etc.). This parent mapping is + // only relevant if the structure in its totality is being mapped, + // otherwise the above suffices. + if (!parentClause.getPartialMap()) { + combinedInfo.Types.emplace_back(mapFlag); + combinedInfo.DevicePointers.emplace_back( + llvm::OpenMPIRBuilder::DeviceInfoTy::None); + combinedInfo.Names.emplace_back(LLVM::createMappingInformation( + mapData.MapClause[mapDataIndex]->getLoc(), ompBuilder)); + combinedInfo.BasePointers.emplace_back(mapData.BasePointers[mapDataIndex]); + combinedInfo.Pointers.emplace_back(mapData.Pointers[mapDataIndex]); + combinedInfo.Sizes.emplace_back(mapData.Sizes[mapDataIndex]); + } return memberOfFlag; } @@ -2319,21 +2410,17 @@ static void processMapMembersWithParent( uint64_t mapDataIndex, llvm::omp::OpenMPOffloadMappingFlags memberOfFlag) { auto parentClause = - mlir::dyn_cast(mapData.MapClause[mapDataIndex]); + llvm::cast(mapData.MapClause[mapDataIndex]); for (auto mappedMembers : parentClause.getMembers()) { auto memberClause = - mlir::dyn_cast(mappedMembers.getDefiningOp()); - int memberDataIdx = -1; - for (size_t i = 0; i < mapData.MapClause.size(); ++i) { - if (mapData.MapClause[i] == memberClause) - memberDataIdx = i; - } + llvm::cast(mappedMembers.getDefiningOp()); + int memberDataIdx = getMapDataMemberIdx(mapData, memberClause); assert(memberDataIdx >= 0 && "could not find mapped member of structure"); // Same MemberOfFlag to indicate its link with parent and other members - // of, and we flag that it's part of a pointer and object coupling. + // of. auto mapFlag = llvm::omp::OpenMPOffloadMappingFlags(memberClause.getMapType().value()); mapFlag &= ~llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM; @@ -2347,18 +2434,81 @@ static void processMapMembersWithParent( llvm::OpenMPIRBuilder::DeviceInfoTy::None); combinedInfo.Names.emplace_back( LLVM::createMappingInformation(memberClause.getLoc(), ompBuilder)); - - combinedInfo.BasePointers.emplace_back(mapData.BasePointers[memberDataIdx]); + combinedInfo.BasePointers.emplace_back(mapData.BasePointers[mapDataIndex]); combinedInfo.Pointers.emplace_back(mapData.Pointers[memberDataIdx]); combinedInfo.Sizes.emplace_back(mapData.Sizes[memberDataIdx]); } } +static void +processIndividualMap(MapInfoData &mapData, size_t mapDataIdx, + llvm::OpenMPIRBuilder::MapInfosTy &combinedInfo, + bool isTargetParams, int mapDataParentIdx = -1) { + // Declare Target Mappings are excluded from being marked as + // OMP_MAP_TARGET_PARAM as they are not passed as parameters, they're + // marked with OMP_MAP_PTR_AND_OBJ instead. + auto mapFlag = mapData.Types[mapDataIdx]; + auto mapInfoOp = + llvm::cast(mapData.MapClause[mapDataIdx]); + + bool isPtrTy = checkIfPointerMap(mapInfoOp); + if (isPtrTy) + mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ; + + if (isTargetParams && !mapData.IsDeclareTarget[mapDataIdx]) + mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM; + + if (mapInfoOp.getMapCaptureType().value() == + mlir::omp::VariableCaptureKind::ByCopy && + !isPtrTy) + mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_LITERAL; + + // if we're provided a mapDataParentIdx, then the data being mapped is + // part of a larger object (in a parent <-> member mapping) and in this + // case our BasePointer should be the parent. + if (mapDataParentIdx >= 0) + combinedInfo.BasePointers.emplace_back( + mapData.BasePointers[mapDataParentIdx]); + else + combinedInfo.BasePointers.emplace_back(mapData.BasePointers[mapDataIdx]); + + combinedInfo.Pointers.emplace_back(mapData.Pointers[mapDataIdx]); + combinedInfo.DevicePointers.emplace_back(mapData.DevicePointers[mapDataIdx]); + combinedInfo.Names.emplace_back(mapData.Names[mapDataIdx]); + combinedInfo.Types.emplace_back(mapFlag); + combinedInfo.Sizes.emplace_back(mapData.Sizes[mapDataIdx]); +} + static void processMapWithMembersOf( LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl, llvm::OpenMPIRBuilder::MapInfosTy &combinedInfo, MapInfoData &mapData, uint64_t mapDataIndex, bool isTargetParams) { + auto parentClause = + llvm::cast(mapData.MapClause[mapDataIndex]); + + // If we have a partial map (no parent referenced in the map clauses of the + // directive, only members) and only a single member, we do not need to bind + // the map of the member to the parent, we can pass the member seperately. + if (parentClause.getMembers().size() == 1 && parentClause.getPartialMap()) { + auto memberClause = llvm::cast( + parentClause.getMembers()[0].getDefiningOp()); + int memberDataIdx = getMapDataMemberIdx(mapData, memberClause); + // Note: Clang treats arrays with explicit bounds that fall into this + // category as a parent with map case, however, it seems this isn't a + // requirement, and processing them as an individual map is fine. So, + // we will handle them as individual maps for the moment, as it's + // difficult for us to check this as we always require bounds to be + // specified currently and it's also marginally more optimal (single + // map rather than two). The difference may come from the fact that + // Clang maps array without bounds as pointers (which we do not + // currently do), whereas we treat them as arrays in all cases + // currently. + processIndividualMap(mapData, memberDataIdx, combinedInfo, isTargetParams, + mapDataIndex); + return; + } + llvm::omp::OpenMPOffloadMappingFlags memberOfParentFlag = mapParentWithMembers(moduleTranslation, builder, ompBuilder, dl, combinedInfo, mapData, mapDataIndex, isTargetParams); @@ -2477,12 +2627,8 @@ static void genMapInfos(llvm::IRBuilderBase &builder, // utilise the size from any component of MapInfoData, if we can't // something is missing from the initial MapInfoData construction. for (size_t i = 0; i < mapData.MapClause.size(); ++i) { - // NOTE/TODO: We currently do not handle member mapping seperately from it's - // parent or explicit mapping of a parent and member in the same operation, - // this will need to change in the near future, for now we primarily handle - // descriptor mapping from fortran, generalised as mapping record types - // with implicit member maps. This lowering needs further generalisation to - // fully support fortran derived types, and C/C++ structures and classes. + // NOTE/TODO: We currently do not support arbitrary depth record + // type mapping. if (mapData.IsAMember[i]) continue; @@ -2493,28 +2639,7 @@ static void genMapInfos(llvm::IRBuilderBase &builder, continue; } - auto mapFlag = mapData.Types[i]; - bool isPtrTy = checkIfPointerMap(mapInfoOp); - if (isPtrTy) - mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ; - - // Declare Target Mappings are excluded from being marked as - // OMP_MAP_TARGET_PARAM as they are not passed as parameters. - if (isTargetParams && !mapData.IsDeclareTarget[i]) - mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM; - - if (auto mapInfoOp = dyn_cast(mapData.MapClause[i])) - if (mapInfoOp.getMapCaptureType().value() == - mlir::omp::VariableCaptureKind::ByCopy && - !isPtrTy) - mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_LITERAL; - - combinedInfo.BasePointers.emplace_back(mapData.BasePointers[i]); - combinedInfo.Pointers.emplace_back(mapData.Pointers[i]); - combinedInfo.DevicePointers.emplace_back(mapData.DevicePointers[i]); - combinedInfo.Names.emplace_back(mapData.Names[i]); - combinedInfo.Types.emplace_back(mapFlag); - combinedInfo.Sizes.emplace_back(mapData.Sizes[i]); + processIndividualMap(mapData, i, combinedInfo, isTargetParams); } auto findMapInfo = [&combinedInfo](llvm::Value *val, unsigned &index) { diff --git a/mlir/test/Target/LLVMIR/omptarget-fortran-allocatable-types-host.mlir b/mlir/test/Target/LLVMIR/omptarget-fortran-allocatable-types-host.mlir index 7cb22dbb10b18..429bb379ee1b8 100644 --- a/mlir/test/Target/LLVMIR/omptarget-fortran-allocatable-types-host.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-fortran-allocatable-types-host.mlir @@ -2,10 +2,9 @@ // This test checks the offload sizes, map types and base pointers and pointers // provided to the OpenMP kernel argument structure are correct when lowering -// to LLVM-IR from MLIR when the fortran allocatables flag is switched on and -// a fortran allocatable descriptor type is provided alongside the omp.map.info, -// the test utilises mapping of array sections, full arrays and individual -// allocated scalars. +// to LLVM-IR from MLIR when a fortran allocatable descriptor type is provided +// alongside the omp.map.info, the test utilises mapping of array sections, +// full arrays and individual allocated scalars. module attributes {omp.is_target_device = false} { llvm.func @_QQmain() { @@ -27,7 +26,7 @@ module attributes {omp.is_target_device = false} { %15 = omp.map.bounds lower_bound(%7 : i64) upper_bound(%14 : i64) extent(%11 : i64) stride(%13 : i64) start_idx(%9 : i64) {stride_in_bytes = true} %16 = llvm.getelementptr %3[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> %17 = omp.map.info var_ptr(%3 : !llvm.ptr, f32) var_ptr_ptr(%16 : !llvm.ptr) map_clauses(tofrom) capture(ByRef) bounds(%15) -> !llvm.ptr {name = "full_arr"} - %18 = omp.map.info var_ptr(%3 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>) map_clauses(tofrom) capture(ByRef) members(%17 : !llvm.ptr) -> !llvm.ptr {name = "full_arr"} + %18 = omp.map.info var_ptr(%3 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>) map_clauses(tofrom) capture(ByRef) members(%17 : [0] : !llvm.ptr) -> !llvm.ptr {name = "full_arr"} %19 = llvm.getelementptr %6[0, 7, %7, 0] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> %20 = llvm.load %19 : !llvm.ptr -> i64 %21 = llvm.getelementptr %6[0, 7, %7, 1] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> @@ -39,10 +38,10 @@ module attributes {omp.is_target_device = false} { %27 = omp.map.bounds lower_bound(%25 : i64) upper_bound(%26 : i64) extent(%22 : i64) stride(%24 : i64) start_idx(%20 : i64) {stride_in_bytes = true} %28 = llvm.getelementptr %6[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> %29 = omp.map.info var_ptr(%6 : !llvm.ptr, i32) var_ptr_ptr(%28 : !llvm.ptr) map_clauses(tofrom) capture(ByRef) bounds(%27) -> !llvm.ptr {name = "sect_arr(2:5)"} - %30 = omp.map.info var_ptr(%6 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>) map_clauses(tofrom) capture(ByRef) members(%29 : !llvm.ptr) -> !llvm.ptr {name = "sect_arr(2:5)"} + %30 = omp.map.info var_ptr(%6 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>) map_clauses(tofrom) capture(ByRef) members(%29 : [0] : !llvm.ptr) -> !llvm.ptr {name = "sect_arr(2:5)"} %31 = llvm.getelementptr %5[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> %32 = omp.map.info var_ptr(%5 : !llvm.ptr, f32) var_ptr_ptr(%31 : !llvm.ptr) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "scalar"} - %33 = omp.map.info var_ptr(%5 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(tofrom) capture(ByRef) members(%32 : !llvm.ptr) -> !llvm.ptr {name = "scalar"} + %33 = omp.map.info var_ptr(%5 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(tofrom) capture(ByRef) members(%32 : [0] : !llvm.ptr) -> !llvm.ptr {name = "scalar"} omp.target map_entries(%17 -> %arg0, %18 -> %arg1, %29 -> %arg2, %30 -> %arg3, %32 -> %arg4, %33 -> %arg5 : !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr) { ^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr, %arg2: !llvm.ptr, %arg3: !llvm.ptr, %arg4: !llvm.ptr, %arg5: !llvm.ptr): omp.terminator @@ -142,6 +141,6 @@ module attributes {omp.is_target_device = false} { // CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_ptrs, i32 0, i32 7 // CHECK: store ptr %[[SCALAR_ALLOCA]], ptr %[[OFFLOADPTRS]], align 8 // CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_baseptrs, i32 0, i32 8 -// CHECK: store ptr %[[SCALAR_BASE]], ptr %[[OFFLOADBASEPTRS]], align 8 +// CHECK: store ptr %[[SCALAR_ALLOCA]], ptr %[[OFFLOADBASEPTRS]], align 8 // CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_ptrs, i32 0, i32 8 // CHECK: store ptr %[[SCALAR_PTR_LOAD]], ptr %[[OFFLOADPTRS]], align 8 diff --git a/mlir/test/Target/LLVMIR/omptarget-llvm.mlir b/mlir/test/Target/LLVMIR/omptarget-llvm.mlir index 2f629675442d0..18189ea639816 100644 --- a/mlir/test/Target/LLVMIR/omptarget-llvm.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-llvm.mlir @@ -69,18 +69,18 @@ llvm.func @_QPopenmp_target_data_region(%0 : !llvm.ptr) { // CHECK: %[[ARR_OFFSET:.*]] = getelementptr inbounds [1024 x i32], ptr %[[ARR_DATA:.*]], i64 0, i64 0 // CHECK: %[[VAL_5:.*]] = getelementptr inbounds [1 x ptr], ptr %[[VAL_0]], i32 0, i32 0 // CHECK: store ptr %[[ARR_DATA]], ptr %[[VAL_5]], align 8 -// CHECK: %[[VAL_7:.*]] = getelementptr inbounds [1 x ptr], ptr %[[VAL_1]], i32 0, i32 0 -// CHECK: store ptr %[[ARR_OFFSET]], ptr %[[VAL_7]], align 8 -// CHECK: %[[VAL_8:.*]] = getelementptr inbounds [1 x ptr], ptr %[[VAL_2]], i64 0, i64 0 -// CHECK: store ptr null, ptr %[[VAL_8]], align 8 -// CHECK: %[[VAL_9:.*]] = getelementptr inbounds [1 x ptr], ptr %[[VAL_0]], i32 0, i32 0 -// CHECK: %[[VAL_10:.*]] = getelementptr inbounds [1 x ptr], ptr %[[VAL_1]], i32 0, i32 0 -// CHECK: call void @__tgt_target_data_begin_mapper(ptr @2, i64 -1, i32 1, ptr %[[VAL_9]], ptr %[[VAL_10]], ptr @.offload_sizes, ptr @.offload_maptypes, ptr @.offload_mapnames, ptr null) -// CHECK: %[[VAL_11:.*]] = getelementptr [1024 x i32], ptr %[[ARR_DATA]], i32 0, i64 0 -// CHECK: store i32 99, ptr %[[VAL_11]], align 4 -// CHECK: %[[VAL_12:.*]] = getelementptr inbounds [1 x ptr], ptr %[[VAL_0]], i32 0, i32 0 -// CHECK: %[[VAL_13:.*]] = getelementptr inbounds [1 x ptr], ptr %[[VAL_1]], i32 0, i32 0 -// CHECK: call void @__tgt_target_data_end_mapper(ptr @2, i64 -1, i32 1, ptr %[[VAL_12]], ptr %[[VAL_13]], ptr @.offload_sizes, ptr @.offload_maptypes, ptr @.offload_mapnames, ptr null) +// CHECK: %[[VAL_6:.*]] = getelementptr inbounds [1 x ptr], ptr %[[VAL_1]], i32 0, i32 0 +// CHECK: store ptr %[[ARR_OFFSET]], ptr %[[VAL_6]], align 8 +// CHECK: %[[VAL_7:.*]] = getelementptr inbounds [1 x ptr], ptr %[[VAL_2]], i64 0, i64 0 +// CHECK: store ptr null, ptr %[[VAL_7]], align 8 +// CHECK: %[[VAL_8:.*]] = getelementptr inbounds [1 x ptr], ptr %[[VAL_0]], i32 0, i32 0 +// CHECK: %[[VAL_9:.*]] = getelementptr inbounds [1 x ptr], ptr %[[VAL_1]], i32 0, i32 0 +// CHECK: call void @__tgt_target_data_begin_mapper(ptr @2, i64 -1, i32 1, ptr %[[VAL_8]], ptr %[[VAL_9]], ptr @.offload_sizes, ptr @.offload_maptypes, ptr @.offload_mapnames, ptr null) +// CHECK: %[[VAL_10:.*]] = getelementptr [1024 x i32], ptr %[[ARR_DATA]], i32 0, i64 0 +// CHECK: store i32 99, ptr %[[VAL_10]], align 4 +// CHECK: %[[VAL_11:.*]] = getelementptr inbounds [1 x ptr], ptr %[[VAL_0]], i32 0, i32 0 +// CHECK: %[[VAL_12:.*]] = getelementptr inbounds [1 x ptr], ptr %[[VAL_1]], i32 0, i32 0 +// CHECK: call void @__tgt_target_data_end_mapper(ptr @2, i64 -1, i32 1, ptr %[[VAL_11]], ptr %[[VAL_12]], ptr @.offload_sizes, ptr @.offload_maptypes, ptr @.offload_mapnames, ptr null) // CHECK: ret void // ----- @@ -157,13 +157,13 @@ llvm.func @_QPomp_target_enter_exit(%1 : !llvm.ptr, %3 : !llvm.ptr) { // CHECK: %[[ARR_OFFSET1:.*]] = getelementptr inbounds [1024 x i32], ptr %[[VAL_16:.*]], i64 0, i64 0 // CHECK: %[[ARR_OFFSET2:.*]] = getelementptr inbounds [512 x i32], ptr %[[VAL_20:.*]], i64 0, i64 0 // CHECK: %[[VAL_15:.*]] = getelementptr inbounds [2 x ptr], ptr %[[VAL_3]], i32 0, i32 0 -// CHECK: store ptr %[[VAL_16:.*]], ptr %[[VAL_15]], align 8 +// CHECK: store ptr %[[VAL_16]], ptr %[[VAL_15]], align 8 // CHECK: %[[VAL_17:.*]] = getelementptr inbounds [2 x ptr], ptr %[[VAL_4]], i32 0, i32 0 // CHECK: store ptr %[[ARR_OFFSET1]], ptr %[[VAL_17]], align 8 // CHECK: %[[VAL_18:.*]] = getelementptr inbounds [2 x ptr], ptr %[[VAL_5]], i64 0, i64 0 // CHECK: store ptr null, ptr %[[VAL_18]], align 8 // CHECK: %[[VAL_19:.*]] = getelementptr inbounds [2 x ptr], ptr %[[VAL_3]], i32 0, i32 1 -// CHECK: store ptr %[[VAL_20:.*]], ptr %[[VAL_19]], align 8 +// CHECK: store ptr %[[VAL_20]], ptr %[[VAL_19]], align 8 // CHECK: %[[VAL_21:.*]] = getelementptr inbounds [2 x ptr], ptr %[[VAL_4]], i32 0, i32 1 // CHECK: store ptr %[[ARR_OFFSET2]], ptr %[[VAL_21]], align 8 // CHECK: %[[VAL_22:.*]] = getelementptr inbounds [2 x ptr], ptr %[[VAL_5]], i64 0, i64 1 diff --git a/mlir/test/Target/LLVMIR/omptarget-nested-record-type-mapping-host.mlir b/mlir/test/Target/LLVMIR/omptarget-nested-record-type-mapping-host.mlir new file mode 100644 index 0000000000000..e4d82d4a58c89 --- /dev/null +++ b/mlir/test/Target/LLVMIR/omptarget-nested-record-type-mapping-host.mlir @@ -0,0 +1,69 @@ +// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s + +// This test checks the offload sizes, map types and base pointers and pointers +// provided to the OpenMP kernel argument structure are correct when lowering +// to LLVM-IR from MLIR when performing explicit member mapping of a record type +// that includes another nested record type (C++/C class/structure, Fortran +// derived type) where members of both the nested and outer record type have +// members mapped. + +module attributes {omp.is_target_device = false} { +llvm.func @_QQmain() { + %0 = llvm.mlir.constant(10 : index) : i64 + %1 = llvm.mlir.constant(4 : index) : i64 + %2 = llvm.mlir.constant(1 : index) : i64 + %3 = llvm.mlir.constant(1 : i64) : i64 + %4 = llvm.alloca %3 x !llvm.struct<(f32, array<10 x i32>, struct<(f32, i32)>, i32)> : (i64) -> !llvm.ptr + %5 = llvm.getelementptr %4[0, 3] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(f32, array<10 x i32>, struct<(f32, i32)>, i32)> + %6 = omp.map.info var_ptr(%5 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr + %7 = llvm.getelementptr %4[0, 2, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(f32, array<10 x i32>, struct<(f32, i32)>, i32)> + %8 = omp.map.info var_ptr(%7 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr + %9 = llvm.getelementptr %4[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(f32, array<10 x i32>, struct<(f32, i32)>, i32)> + %10 = omp.map.bounds lower_bound(%2 : i64) upper_bound(%1 : i64) extent(%0 : i64) stride(%2 : i64) start_idx(%2 : i64) + %11 = omp.map.info var_ptr(%9 : !llvm.ptr, !llvm.array<10 x i32>) map_clauses(tofrom) capture(ByRef) bounds(%10) -> !llvm.ptr + %12 = omp.map.info var_ptr(%4 : !llvm.ptr, !llvm.struct<(f32, array<10 x i32>, struct<(f32, i32)>, i32)>) map_clauses(tofrom) capture(ByRef) members(%6, %8, %11 : [3, -1], [2, 1], [1, -1] : !llvm.ptr, !llvm.ptr, !llvm.ptr) -> !llvm.ptr {partial_map = true} + omp.target map_entries(%6 -> %arg0, %8 -> %arg1, %11 -> %arg2, %12 -> %arg3 : !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr) { + ^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr, %arg2: !llvm.ptr, %arg3: !llvm.ptr): + omp.terminator + } + llvm.return + } +} + +// CHECK: @.offload_sizes = private unnamed_addr constant [4 x i64] [i64 0, i64 4, i64 4, i64 16] +// CHECK: @.offload_maptypes = private unnamed_addr constant [4 x i64] [i64 32, i64 281474976710659, i64 281474976710659, i64 281474976710659] + +// CHECK: define void @_QQmain() +// CHECK: %[[ALLOCA:.*]] = alloca { float, [10 x i32], { float, i32 }, i32 }, i64 1, align 8 +// CHECK: %[[MEMBER_ACCESS_1:.*]] = getelementptr { float, [10 x i32], { float, i32 }, i32 }, ptr %[[ALLOCA]], i32 0, i32 3 +// CHECK: %[[MEMBER_ACCESS_2:.*]] = getelementptr { float, [10 x i32], { float, i32 }, i32 }, ptr %[[ALLOCA]], i32 0, i32 2, i32 1 +// CHECK: %[[MEMBER_ACCESS_3:.*]] = getelementptr { float, [10 x i32], { float, i32 }, i32 }, ptr %[[ALLOCA]], i32 0, i32 1 + +// CHECK: %[[LAST_MEMBER:.*]] = getelementptr inbounds [10 x i32], ptr %[[MEMBER_ACCESS_3]], i64 0, i64 1 +// CHECK: %[[FIRST_MEMBER:.*]] = getelementptr i32, ptr %[[MEMBER_ACCESS_1]], i64 1 +// CHECK: %[[FIRST_MEMBER_OFF:.*]] = ptrtoint ptr %[[FIRST_MEMBER]] to i64 +// CHECK: %[[SECOND_MEMBER_OFF:.*]] = ptrtoint ptr %[[LAST_MEMBER]] to i64 +// CHECK: %[[MEMBER_DIFF:.*]] = sub i64 %[[FIRST_MEMBER_OFF]], %[[SECOND_MEMBER_OFF]] +// CHECK: %[[OFFLOAD_SIZE:.*]] = sdiv exact i64 %[[MEMBER_DIFF]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) + +// CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 0 +// CHECK: store ptr %[[ALLOCA]], ptr %[[BASE_PTR_ARR]], align 8 +// CHECK: %[[PTR_ARR:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_ptrs, i32 0, i32 0 +// CHECK: store ptr %[[LAST_MEMBER]], ptr %[[PTR_ARR]], align 8 +// CHECK: %[[SIZE_ARR:.*]] = getelementptr inbounds [4 x i64], ptr %.offload_sizes, i32 0, i32 0 +// CHECK: store i64 %[[OFFLOAD_SIZE]], ptr %[[SIZE_ARR]], align 8 + +// CHECK: %[[BASE_PTR_ARR_2:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 1 +// CHECK: store ptr %[[ALLOCA]], ptr %[[BASE_PTR_ARR_2]], align 8 +// CHECK: %[[PTR_ARR_2:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_ptrs, i32 0, i32 1 +// CHECK: store ptr %[[MEMBER_ACCESS_1]], ptr %[[PTR_ARR_2]], align 8 + +// CHECK: %[[BASE_PTR_ARR_3:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 2 +// CHECK: store ptr %[[ALLOCA]], ptr %[[BASE_PTR_ARR_3]], align 8 +// CHECK: %[[PTR_ARR_3:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_ptrs, i32 0, i32 2 +// CHECK: store ptr %[[MEMBER_ACCESS_2]], ptr %[[PTR_ARR_3]], align 8 + +// CHECK: %[[BASE_PTR_ARR_4:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 3 +// CHECK: store ptr %[[ALLOCA]], ptr %[[BASE_PTR_ARR_4]], align 8 +// CHECK: %[[PTR_ARR_4:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_ptrs, i32 0, i32 3 +// CHECK: store ptr %[[LAST_MEMBER]], ptr %[[PTR_ARR_4]], align 8 diff --git a/mlir/test/Target/LLVMIR/omptarget-record-type-mapping-host.mlir b/mlir/test/Target/LLVMIR/omptarget-record-type-mapping-host.mlir new file mode 100644 index 0000000000000..c7a87e44d6537 --- /dev/null +++ b/mlir/test/Target/LLVMIR/omptarget-record-type-mapping-host.mlir @@ -0,0 +1,62 @@ +// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s + +// This test checks the offload sizes, map types and base pointers and pointers +// provided to the OpenMP kernel argument structure are correct when lowering +// to LLVM-IR from MLIR when performing explicit member mapping of a record type +// (C++/C class/structure, Fortran derived type) where only members of the record +// type are mapped. + +module attributes {omp.is_target_device = false} { +llvm.func @_QQmain() { + %0 = llvm.mlir.constant(10 : index) : i64 + %1 = llvm.mlir.constant(4 : index) : i64 + %2 = llvm.mlir.constant(1 : index) : i64 + %3 = llvm.mlir.constant(1 : i64) : i64 + %4 = llvm.alloca %3 x !llvm.struct<(f32, array<10 x i32>, i32)> : (i64) -> !llvm.ptr + %5 = llvm.mlir.constant(2 : i32) : i32 + %6 = llvm.getelementptr %4[0, 2] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(f32, array<10 x i32>, i32)> + %7 = omp.map.info var_ptr(%6 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr + %8 = llvm.mlir.constant(1 : i32) : i32 + %9 = llvm.getelementptr %4[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(f32, array<10 x i32>, i32)> + %10 = omp.map.bounds lower_bound(%2 : i64) upper_bound(%1 : i64) extent(%0 : i64) stride(%2 : i64) start_idx(%2 : i64) + %11 = omp.map.info var_ptr(%9 : !llvm.ptr, !llvm.array<10 x i32>) map_clauses(tofrom) capture(ByRef) bounds(%10) -> !llvm.ptr + %12 = omp.map.info var_ptr(%4 : !llvm.ptr, !llvm.struct<(f32, array<10 x i32>, i32)>) map_clauses(tofrom) capture(ByRef) members(%7, %11 : [2], [1] : !llvm.ptr, !llvm.ptr) -> !llvm.ptr {partial_map = true} + omp.target map_entries(%7 -> %arg0, %11 -> %arg1, %12 -> %arg2 : !llvm.ptr, !llvm.ptr, !llvm.ptr) { + ^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr, %arg2: !llvm.ptr): + omp.terminator + } + llvm.return + } +} + +// CHECK: @.offload_sizes = private unnamed_addr constant [3 x i64] [i64 0, i64 4, i64 16] +// CHECK: @.offload_maptypes = private unnamed_addr constant [3 x i64] [i64 32, i64 281474976710659, i64 281474976710659] + +// CHECK: define void @_QQmain() +// CHECK: %[[ALLOCA:.*]] = alloca { float, [10 x i32], i32 }, i64 1, align 8 +// CHECK: %[[MEMBER_ACCESS_1:.*]] = getelementptr { float, [10 x i32], i32 }, ptr %[[ALLOCA]], i32 0, i32 2 +// CHECK: %[[MEMBER_ACCESS_2:.*]] = getelementptr { float, [10 x i32], i32 }, ptr %[[ALLOCA]], i32 0, i32 1 + +// CHECK: %[[LAST_MEMBER:.*]] = getelementptr inbounds [10 x i32], ptr %[[MEMBER_ACCESS_2]], i64 0, i64 1 +// CHECK: %[[FIRST_MEMBER:.*]] = getelementptr i32, ptr %[[MEMBER_ACCESS_1]], i64 1 +// CHECK: %[[FIRST_MEMBER_OFF:.*]] = ptrtoint ptr %[[FIRST_MEMBER]] to i64 +// CHECK: %[[SECOND_MEMBER_OFF:.*]] = ptrtoint ptr %[[LAST_MEMBER]] to i64 +// CHECK: %[[MEMBER_DIFF:.*]] = sub i64 %[[FIRST_MEMBER_OFF]], %[[SECOND_MEMBER_OFF]] +// CHECK: %[[OFFLOAD_SIZE:.*]] = sdiv exact i64 %[[MEMBER_DIFF]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) + +// CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [3 x ptr], ptr %.offload_baseptrs, i32 0, i32 0 +// CHECK: store ptr %[[ALLOCA]], ptr %[[BASE_PTR_ARR]], align 8 +// CHECK: %[[PTR_ARR:.*]] = getelementptr inbounds [3 x ptr], ptr %.offload_ptrs, i32 0, i32 0 +// CHECK: store ptr %[[LAST_MEMBER]], ptr %[[PTR_ARR]], align 8 +// CHECK: %[[SIZE_ARR:.*]] = getelementptr inbounds [3 x i64], ptr %.offload_sizes, i32 0, i32 0 +// CHECK: store i64 %[[OFFLOAD_SIZE]], ptr %[[SIZE_ARR]], align 8 + +// CHECK: %[[BASE_PTR_ARR_2:.*]] = getelementptr inbounds [3 x ptr], ptr %.offload_baseptrs, i32 0, i32 1 +// CHECK: store ptr %[[ALLOCA]], ptr %[[BASE_PTR_ARR_2]], align 8 +// CHECK: %[[PTR_ARR_2:.*]] = getelementptr inbounds [3 x ptr], ptr %.offload_ptrs, i32 0, i32 1 +// CHECK: store ptr %[[MEMBER_ACCESS_1]], ptr %[[PTR_ARR_2]], align 8 + +// CHECK: %[[BASE_PTR_ARR_3:.*]] = getelementptr inbounds [3 x ptr], ptr %.offload_baseptrs, i32 0, i32 2 +// CHECK: store ptr %[[ALLOCA]], ptr %[[BASE_PTR_ARR_3]], align 8 +// CHECK: %[[PTR_ARR_3:.*]] = getelementptr inbounds [3 x ptr], ptr %.offload_ptrs, i32 0, i32 2 +// CHECK: store ptr %[[LAST_MEMBER]], ptr %[[PTR_ARR_3]], align 8