From 4d58f013bc53904da2fcff0965ddadb829d1db8f Mon Sep 17 00:00:00 2001 From: Arvind Sudarsanam Date: Fri, 11 Nov 2022 05:30:43 -0800 Subject: [PATCH 1/7] Add kernel property to propagate compile options to backend Signed-off-by: Arvind Sudarsanam --- clang/include/clang/Basic/Attr.td | 13 ++- clang/lib/CodeGen/CodeGenFunction.cpp | 20 ++++- .../CompileTimePropertiesPass.cpp | 4 + llvm/tools/sycl-post-link/sycl-post-link.cpp | 81 +++++++++++++++++-- sycl/include/sycl/compile_options.hpp | 22 +++++ .../oneapi/kernel_properties/properties.hpp | 29 +++++++ .../sycl/ext/oneapi/properties/property.hpp | 3 +- sycl/include/sycl/info/compile_options.def | 4 + sycl/include/sycl/sycl.hpp | 1 + .../properties/prop_compile_options.cpp | 15 ++++ 10 files changed, 177 insertions(+), 15 deletions(-) create mode 100644 sycl/include/sycl/compile_options.hpp create mode 100644 sycl/include/sycl/info/compile_options.def create mode 100644 sycl/test/extensions/properties/prop_compile_options.cpp diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index d2c31a178d40c..89140b0197b10 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -1309,12 +1309,12 @@ def SYCLType: InheritableAttr { "specialization_id", "kernel_handler", "buffer_location", "no_alias", "accessor_property_list", "group", "private_memory", "aspect", "annotated_ptr", "annotated_arg", - "stream", "sampler"], + "compile_options", "stream", "sampler"], ["accessor", "local_accessor", "spec_constant", "specialization_id", "kernel_handler", "buffer_location", "no_alias", "accessor_property_list", "group", "private_memory", "aspect", "annotated_ptr", "annotated_arg", - "stream", "sampler"]>]; + "compile_options", "stream", "sampler"]>]; // Only used internally by SYCL implementation let Documentation = [InternalOnly]; } @@ -1328,6 +1328,15 @@ def SYCLDeviceHas : InheritableAttr { let SupportsNonconformingLambdaSyntax = 1; } +def SYCLDeviceCompileOptions : InheritableAttr { + let Spellings = [CXX11<"sycl", "device_compile_options">]; + let Subjects = SubjectList<[Function], ErrorDiag>; + let Args = [VariadicExprArgument<"DeviceCompileOptions">]; + let LangOpts = [SYCLIsDevice, SilentlyIgnoreSYCLIsHost]; + // Only used internally by SYCL implementation + let Documentation = [InternalOnly]; +} + def SYCLUsesAspects : InheritableAttr { let Spellings = [CXX11<"__sycl_detail__", "__uses_aspects__">]; let Subjects = SubjectList<[CXXRecord, Function], ErrorDiag>; diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp index 789ea6b1e00ad..239427e74b611 100644 --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -582,11 +582,23 @@ void CodeGenFunction::EmitKernelMetadata(const FunctionDecl *FD, && !FD->hasAttr()) return; - // TODO Module identifier is not reliable for this purpose since two modules - // can have the same ID, needs improvement - if (getLangOpts().SYCLIsDevice) + + if (getLangOpts().SYCLIsDevice) { + // TODO Module identifier is not reliable for this purpose since two modules + // can have the same ID, needs improvement Fn->addFnAttr("sycl-module-id", Fn->getParent()->getModuleIdentifier()); - + int SYCLDeviceCompileOptLevel; + switch (CGM.getCodeGenOpts().OptimizationLevel) { + default: + llvm_unreachable("Invalid optimization level!"); + case 0: + case 1: + case 2: + case 3: + SYCLDeviceCompileOptLevel = CGM.getCodeGenOpts().OptimizationLevel; + } + Fn->addFnAttr("sycl-device-compile-optlevel", std::to_string(SYCLDeviceCompileOptLevel)); + } llvm::LLVMContext &Context = getLLVMContext(); if (FD->hasAttr() || FD->hasAttr()) diff --git a/llvm/tools/sycl-post-link/CompileTimePropertiesPass.cpp b/llvm/tools/sycl-post-link/CompileTimePropertiesPass.cpp index e6774c42876e1..47c6956d0292b 100644 --- a/llvm/tools/sycl-post-link/CompileTimePropertiesPass.cpp +++ b/llvm/tools/sycl-post-link/CompileTimePropertiesPass.cpp @@ -208,6 +208,10 @@ attributeToExecModeMetadata(Module &M, const Attribute &Attr) { MDNode::get(Ctx, MD)); } + if (AttrKindStr == "sycl-device-compile-options") { + auto Opt = Attr.getValueAsString(); + llvm::errs() << "ARV: Opt is -O" << Opt << "\n"; + } return None; } diff --git a/llvm/tools/sycl-post-link/sycl-post-link.cpp b/llvm/tools/sycl-post-link/sycl-post-link.cpp index 30ad2da61330c..7f791451b8ffd 100644 --- a/llvm/tools/sycl-post-link/sycl-post-link.cpp +++ b/llvm/tools/sycl-post-link/sycl-post-link.cpp @@ -89,6 +89,7 @@ cl::OptionCategory PostLinkCat{"sycl-post-link options"}; // clang/lib/Driver/Driver.cpp, sycl-post-link.cpp, ClangOffloadWrapper.cpp constexpr char COL_CODE[] = "Code"; constexpr char COL_SYM[] = "Symbols"; +constexpr char COL_OPTS[] = "Options"; constexpr char COL_PROPS[] = "Properties"; // InputFilename - The filename to read from. @@ -215,10 +216,11 @@ struct GlobalBinImageProps { bool EmitDeviceGlobalPropSet; }; -struct IrPropSymFilenameTriple { +struct IrPropSymFilenameQuad { std::string Ir; std::string Prop; std::string Sym; + std::string Opt; }; void writeToFile(const std::string &Filename, const std::string &Content) { @@ -466,6 +468,44 @@ std::string saveModuleProperties(module_split::ModuleDesc &MD, return SCFile; } +std::string getOptString(module_split::ModuleDesc &MD) { + auto &M = MD.getModule(); + // Process all properties on kernels. + for (Function &F : M) { + // Only consider kernels. + if (F.getCallingConv() != CallingConv::SPIR_KERNEL) + continue; + + SmallVector MDOps; + SmallVector, 8> NamedMDOps; + for (const Attribute &Attr : F.getAttributes().getFnAttrs()) { + // Currently, only string attributes are supported + if (!Attr.isStringAttribute()) + continue; + StringRef AttrKindStr = Attr.getKindAsString(); + if (AttrKindStr == "sycl-device-compile-optlevel") { + auto Opt = "-O" + Attr.getValueAsString(); + llvm::errs() << "ARV: Opt is " << Opt << "\n"; + return Opt.str(); + } + } + } + return ""; +} + +std::string saveModuleOptions(module_split::ModuleDesc &MD, + const std::string &Opts, int I, + StringRef Suff) { + std::error_code EC; + std::string SCFile = makeResultFileName(".opt", I, Suff); + raw_fd_ostream SCOut(SCFile, EC); + checkError(EC, "error opening file '" + SCFile + "'"); + SCOut << Opts; + + return SCFile; +} + + // Saves specified collection of symbols to a file. std::string saveModuleSymbolTable(const module_split::EntryPointSet &Es, int I, StringRef Suffix) { @@ -570,11 +610,11 @@ StringRef getModuleSuffix(const module_split::ModuleDesc &MD) { // @param IRFilename filename of already available IR component. If not empty, // IR component saving is skipped, and this file name is recorded as such in // the result. -// @return a triple of files where IR, Property and Symbols components of the -// Module descriptor are written respectively. -IrPropSymFilenameTriple saveModule(module_split::ModuleDesc &MD, int I, +// @return a quadruple of files where IR, Property, Symbols and Opts components +// of the Module descriptor are written respectively. +IrPropSymFilenameQuad saveModule(module_split::ModuleDesc &MD, int I, StringRef IRFilename = "") { - IrPropSymFilenameTriple Res; + IrPropSymFilenameQuad Res; StringRef Suffix = getModuleSuffix(MD); if (!IRFilename.empty()) { @@ -587,6 +627,9 @@ IrPropSymFilenameTriple saveModule(module_split::ModuleDesc &MD, int I, EmitExportedSymbols, DeviceGlobals}; Res.Prop = saveModuleProperties(MD, Props, I, Suffix); + std::string Opts = getOptString(MD); + Res.Opt = saveModuleOptions(MD, Opts, I, Suffix); + if (DoSymGen) { // save the names of the entry points - the symbol table Res.Sym = saveModuleSymbolTable(MD.entries(), I, Suffix); @@ -631,20 +674,37 @@ bool processSpecConstants(module_split::ModuleDesc &MD) { return MD.Props.SpecConstsMet; } -constexpr int MAX_COLUMNS_IN_FILE_TABLE = 3; +constexpr int MAX_COLUMNS_IN_FILE_TABLE = 4; +#if 0 void addTableRow(util::SimpleTable &Table, - const IrPropSymFilenameTriple &RowData) { + const IrPropSymFilenameQuad &RowData) { SmallVector Row; + for (const std::string *S : {&RowData.Ir, &RowData.Prop, &RowData.Opt, &RowData.Sym}) { + if (!S->empty()) { + Row.push_back(StringRef(*S)); + } + } + llvm::errs() << "ARV: " << static_cast(Table.getNumColumns()) << "," << Row.size() <<"\n"; + assert(static_cast(Table.getNumColumns()) == Row.size()); + Table.addRow(Row); +} +#else +void addTableRow(util::SimpleTable &Table, + const IrPropSymFilenameQuad &RowData) { + SmallVector Row; + for (const std::string *S : {&RowData.Ir, &RowData.Prop, &RowData.Sym}) { if (!S->empty()) { Row.push_back(StringRef(*S)); } } + //llvm::errs() << "ARV: " << static_cast(Table.getNumColumns()) << "," << Row.size() <<"\n"; assert(static_cast(Table.getNumColumns()) == Row.size()); Table.addRow(Row); } +#endif // Removes the global variable "llvm.used" and returns true on success. // "llvm.used" is a global constant array containing references to kernels @@ -693,8 +753,13 @@ static bool removeSYCLKernelsConstRefArray(Module &M) { std::unique_ptr processInputModule(std::unique_ptr M) { // Construct the resulting table which will accumulate all the outputs. +#if 0 + SmallVector ColumnTitles{ + StringRef(COL_CODE), StringRef(COL_PROPS), StringRef(COL_OPTS)}; +#else SmallVector ColumnTitles{ StringRef(COL_CODE), StringRef(COL_PROPS)}; +#endif if (DoSymGen) { ColumnTitles.push_back(COL_SYM); @@ -869,7 +934,7 @@ processInputModule(std::unique_ptr M) { "have been made\n"; } for (module_split::ModuleDesc &IrMD : MMs) { - IrPropSymFilenameTriple T = saveModule(IrMD, ID, OutIRFileName); + IrPropSymFilenameQuad T = saveModule(IrMD, ID, OutIRFileName); addTableRow(*Table, T); } } diff --git a/sycl/include/sycl/compile_options.hpp b/sycl/include/sycl/compile_options.hpp new file mode 100644 index 0000000000000..eed3cbe5bf474 --- /dev/null +++ b/sycl/include/sycl/compile_options.hpp @@ -0,0 +1,22 @@ +//==---- compile_options.hpp - SYCL compile options Enums -----*- C++ -*---==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// ===--------------------------------------------------------------------=== // +#pragma once + +#include + +namespace sycl { +__SYCL_INLINE_VER_NAMESPACE(_V1) { + +#define __SYCL_COMPILE_OPTION(OPTION, ID) OPTION = ID, +enum class __SYCL_TYPE(compile_options) compile_options { +#include +}; +#undef __SYCL_COMPILE_OPTION + +} // __SYCL_INLINE_VER_NAMESPACE(_V1) +} // namespace sycl diff --git a/sycl/include/sycl/ext/oneapi/kernel_properties/properties.hpp b/sycl/include/sycl/ext/oneapi/kernel_properties/properties.hpp index 925ef721eea87..a5bbc82c4b9d0 100644 --- a/sycl/include/sycl/ext/oneapi/kernel_properties/properties.hpp +++ b/sycl/include/sycl/ext/oneapi/kernel_properties/properties.hpp @@ -9,6 +9,7 @@ #pragma once #include +#include #include #include #include @@ -56,6 +57,12 @@ struct device_has_key { std::integral_constant...>; }; +struct device_compile_options_key { + template + using value_t = property_value...>; +}; + template struct property_value, std::integral_constant...> { @@ -108,6 +115,14 @@ struct property_value value{Aspects...}; }; +template +struct property_value...> { + using key_t = device_compile_options_key; + static constexpr std::array value{CompileOptions...}; +}; + template inline constexpr work_group_size_key::value_t work_group_size; @@ -121,11 +136,15 @@ inline constexpr sub_group_size_key::value_t sub_group_size; template inline constexpr device_has_key::value_t device_has; +template +inline constexpr device_compile_options_key::value_t device_compile_options; + template <> struct is_property_key : std::true_type {}; template <> struct is_property_key : std::true_type {}; template <> struct is_property_key : std::true_type {}; template <> struct is_property_key : std::true_type {}; +template <> struct is_property_key : std::true_type {}; namespace detail { template <> struct PropertyToKind { @@ -140,6 +159,9 @@ template <> struct PropertyToKind { template <> struct PropertyToKind { static constexpr PropKind Kind = PropKind::DeviceHas; }; +template <> struct PropertyToKind { + static constexpr PropKind Kind = PropKind::DeviceCompileOptions; +}; template <> struct IsCompileTimeProperty : std::true_type {}; @@ -148,6 +170,7 @@ struct IsCompileTimeProperty : std::true_type {}; template <> struct IsCompileTimeProperty : std::true_type {}; template <> struct IsCompileTimeProperty : std::true_type {}; +template <> struct IsCompileTimeProperty : std::true_type {}; template struct PropertyMetaInfo> { @@ -170,6 +193,12 @@ struct PropertyMetaInfo> { static constexpr const char *value = SizeListToStr(Aspects)...>::value; }; +template +struct PropertyMetaInfo> { + static constexpr const char *name = "sycl-device-compile-options"; + static constexpr const char *value = + SizeListToStr(CompileOptions)...>::value; +}; template struct HasKernelPropertiesGetMethod : std::false_type {}; diff --git a/sycl/include/sycl/ext/oneapi/properties/property.hpp b/sycl/include/sycl/ext/oneapi/properties/property.hpp index 08aa8a5f76dd4..6645d76d7acc5 100644 --- a/sycl/include/sycl/ext/oneapi/properties/property.hpp +++ b/sycl/include/sycl/ext/oneapi/properties/property.hpp @@ -173,8 +173,9 @@ enum PropKind : uint32_t { WorkGroupSizeHint = 7, SubGroupSize = 8, DeviceHas = 9, + DeviceCompileOptions = 10, // PropKindSize must always be the last value. - PropKindSize = 10, + PropKindSize = 11, }; // This trait must be specialized for all properties and must have a unique diff --git a/sycl/include/sycl/info/compile_options.def b/sycl/include/sycl/info/compile_options.def new file mode 100644 index 0000000000000..527bee4b7cce0 --- /dev/null +++ b/sycl/include/sycl/info/compile_options.def @@ -0,0 +1,4 @@ +__SYCL_COMPILE_OPTION(O0, 0) +__SYCL_COMPILE_OPTION(O1, 1) +__SYCL_COMPILE_OPTION(O2, 2) +__SYCL_COMPILE_OPTION(O3, 3) \ No newline at end of file diff --git a/sycl/include/sycl/sycl.hpp b/sycl/include/sycl/sycl.hpp index 70a29d96d5097..cdab4ecd4e69a 100644 --- a/sycl/include/sycl/sycl.hpp +++ b/sycl/include/sycl/sycl.hpp @@ -19,6 +19,7 @@ #endif #include #include +#include #include #include #include diff --git a/sycl/test/extensions/properties/prop_compile_options.cpp b/sycl/test/extensions/properties/prop_compile_options.cpp new file mode 100644 index 0000000000000..9e3184e6042fa --- /dev/null +++ b/sycl/test/extensions/properties/prop_compile_options.cpp @@ -0,0 +1,15 @@ +// RUN: %clangxx -O0 -fsycl-device-only -S -Xclang -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK-IR +// RUN: %clangxx -O0 -fsycl-device-only -Xclang -verify %s +// expected-no-diagnostics +// Tests for propagation of compile options + +#include + +using namespace sycl; +using namespace sycl::ext::oneapi::experimental; + +int main() { + queue Q; + // CHECK-IR: spir_kernel void @{{.*}}Kernel0(){{.*}} #[[COAttr1:[0-9]+]] + Q.single_task([]() {}); +} From d73a1daf4f447ca1105a6a5ae64d62af5bd6f7b1 Mon Sep 17 00:00:00 2001 From: Arvind Sudarsanam Date: Thu, 15 Dec 2022 11:16:55 -0800 Subject: [PATCH 2/7] second stage of implementation Signed-off-by: Arvind Sudarsanam --- clang/include/clang/Basic/Attr.td | 13 +- clang/lib/CodeGen/CodeGenFunction.cpp | 2 +- .../CompileTimePropertiesPass.cpp | 4 - llvm/tools/sycl-post-link/ModuleSplitter.cpp | 64 ++++ llvm/tools/sycl-post-link/ModuleSplitter.h | 11 + llvm/tools/sycl-post-link/sycl-post-link.cpp | 279 +++++++----------- ...ropagateCompilerFlagsToLinkerAndRuntime.md | 15 + sycl/include/sycl/compile_options.hpp | 22 -- .../oneapi/kernel_properties/properties.hpp | 29 -- .../sycl/ext/oneapi/properties/property.hpp | 3 +- sycl/include/sycl/info/compile_options.def | 4 - sycl/include/sycl/sycl.hpp | 1 - .../program_manager/program_manager.cpp | 23 ++ .../properties/prop_compile_options.cpp | 15 - 14 files changed, 231 insertions(+), 254 deletions(-) create mode 100644 sycl/doc/design/PropagateCompilerFlagsToLinkerAndRuntime.md delete mode 100644 sycl/include/sycl/compile_options.hpp delete mode 100644 sycl/include/sycl/info/compile_options.def delete mode 100644 sycl/test/extensions/properties/prop_compile_options.cpp diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index 89140b0197b10..d2c31a178d40c 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -1309,12 +1309,12 @@ def SYCLType: InheritableAttr { "specialization_id", "kernel_handler", "buffer_location", "no_alias", "accessor_property_list", "group", "private_memory", "aspect", "annotated_ptr", "annotated_arg", - "compile_options", "stream", "sampler"], + "stream", "sampler"], ["accessor", "local_accessor", "spec_constant", "specialization_id", "kernel_handler", "buffer_location", "no_alias", "accessor_property_list", "group", "private_memory", "aspect", "annotated_ptr", "annotated_arg", - "compile_options", "stream", "sampler"]>]; + "stream", "sampler"]>]; // Only used internally by SYCL implementation let Documentation = [InternalOnly]; } @@ -1328,15 +1328,6 @@ def SYCLDeviceHas : InheritableAttr { let SupportsNonconformingLambdaSyntax = 1; } -def SYCLDeviceCompileOptions : InheritableAttr { - let Spellings = [CXX11<"sycl", "device_compile_options">]; - let Subjects = SubjectList<[Function], ErrorDiag>; - let Args = [VariadicExprArgument<"DeviceCompileOptions">]; - let LangOpts = [SYCLIsDevice, SilentlyIgnoreSYCLIsHost]; - // Only used internally by SYCL implementation - let Documentation = [InternalOnly]; -} - def SYCLUsesAspects : InheritableAttr { let Spellings = [CXX11<"__sycl_detail__", "__uses_aspects__">]; let Subjects = SubjectList<[CXXRecord, Function], ErrorDiag>; diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp index 239427e74b611..8ea68fc0491ad 100644 --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -587,7 +587,7 @@ void CodeGenFunction::EmitKernelMetadata(const FunctionDecl *FD, // TODO Module identifier is not reliable for this purpose since two modules // can have the same ID, needs improvement Fn->addFnAttr("sycl-module-id", Fn->getParent()->getModuleIdentifier()); - int SYCLDeviceCompileOptLevel; + int SYCLDeviceCompileOptLevel = 2; switch (CGM.getCodeGenOpts().OptimizationLevel) { default: llvm_unreachable("Invalid optimization level!"); diff --git a/llvm/tools/sycl-post-link/CompileTimePropertiesPass.cpp b/llvm/tools/sycl-post-link/CompileTimePropertiesPass.cpp index 47c6956d0292b..e6774c42876e1 100644 --- a/llvm/tools/sycl-post-link/CompileTimePropertiesPass.cpp +++ b/llvm/tools/sycl-post-link/CompileTimePropertiesPass.cpp @@ -208,10 +208,6 @@ attributeToExecModeMetadata(Module &M, const Attribute &Attr) { MDNode::get(Ctx, MD)); } - if (AttrKindStr == "sycl-device-compile-options") { - auto Opt = Attr.getValueAsString(); - llvm::errs() << "ARV: Opt is -O" << Opt << "\n"; - } return None; } diff --git a/llvm/tools/sycl-post-link/ModuleSplitter.cpp b/llvm/tools/sycl-post-link/ModuleSplitter.cpp index ada6c5007ed7d..e8e83990a0527 100644 --- a/llvm/tools/sycl-post-link/ModuleSplitter.cpp +++ b/llvm/tools/sycl-post-link/ModuleSplitter.cpp @@ -43,6 +43,9 @@ constexpr char ESIMD_MARKER_MD[] = "sycl_explicit_simd"; constexpr char ATTR_SYCL_MODULE_ID[] = "sycl-module-id"; +// Similar copy in sycl-post-link.cpp +constexpr char ATTR_OPT_LEVEL[] = "sycl-device-compile-optlevel"; + bool hasIndirectFunctionsOrCalls(const Module &M) { for (const auto &F : M.functions()) { // There are functions marked with [[intel::device_indirectly_callable]] @@ -299,6 +302,44 @@ groupEntryPointsByAttribute(ModuleDesc &MD, StringRef AttrName, return EntryPointGroups; } +template +EntryPointGroupVec +groupEntryPointsByOptLevel(ModuleDesc &MD, StringRef AttrName, + bool EmitOnlyKernelsAsEntryPoints, + EntryPoinGroupFunc F) { + EntryPointGroupVec EntryPointGroups{}; + std::map EntryPointMap; + Module &M = MD.getModule(); + + // Only process module entry points: + for (auto &F : M.functions()) { + if (!isEntryPoint(F, EmitOnlyKernelsAsEntryPoints) || + !MD.isEntryPointCandidate(F)) { + continue; + } + if (F.hasFnAttribute(AttrName)) { + SmallString<16> stringConstName; + StringRef OptLevelStr = F.getFnAttribute(AttrName).getValueAsString(); + EntryPointMap[OptLevelStr].insert(&F); + } else { + EntryPointMap["2"].insert(&F); + } + } + if (!EntryPointMap.empty()) { + EntryPointGroups.reserve(EntryPointMap.size()); + for (auto &EPG : EntryPointMap) { + EntryPointGroups.emplace_back(EntryPointGroup{ + EPG.first, std::move(EPG.second), MD.getEntryPointGroup().Props}); + F(EntryPointGroups.back()); + } + } else { + // No entry points met, record this. + EntryPointGroups.push_back({GLOBAL_SCOPE_NAME, {}}); + F(EntryPointGroups.back()); + } + return EntryPointGroups; +} + // Represents a call graph between functions in a module. Nodes are functions, // edges are "calls" relation. class CallGraph { @@ -761,5 +802,28 @@ getLargeGRFSplitter(ModuleDesc &&MD, bool EmitOnlyKernelsAsEntryPoints) { return std::make_unique(std::move(MD), std::move(Groups)); } +std::unique_ptr +getOptLevelSplitter(ModuleDesc &&MD, bool EmitOnlyKernelsAsEntryPoints) { + EntryPointGroupVec Groups = groupEntryPointsByOptLevel( + MD, ATTR_OPT_LEVEL, EmitOnlyKernelsAsEntryPoints, + [](EntryPointGroup &G) { + if (G.GroupId == "3") + G.Props.OptLevel = 3; + else if (G.GroupId == "2") + G.Props.OptLevel = 2; + else if (G.GroupId == "1") + G.Props.OptLevel = 1; + else if (G.GroupId == "0") + G.Props.OptLevel = 0; + }); + assert(!Groups.empty() && "At least one group is expected"); + assert(Groups.size() <= 2 && "At most 2 groups are expected"); + + if (Groups.size() > 1) + return std::make_unique(std::move(MD), std::move(Groups)); + else + return std::make_unique(std::move(MD), std::move(Groups)); +} + } // namespace module_split } // namespace llvm diff --git a/llvm/tools/sycl-post-link/ModuleSplitter.h b/llvm/tools/sycl-post-link/ModuleSplitter.h index 7088909c8400a..1dc9ac1886bfb 100644 --- a/llvm/tools/sycl-post-link/ModuleSplitter.h +++ b/llvm/tools/sycl-post-link/ModuleSplitter.h @@ -60,12 +60,16 @@ struct EntryPointGroup { // Scope represented by EPs in a group EntryPointsGroupScope Scope = Scope_Global; + // opt level + unsigned OptLevel = 2; + Properties merge(const Properties &Other) const { Properties Res; Res.HasESIMD = HasESIMD == Other.HasESIMD ? HasESIMD : SyclEsimdSplitStatus::SYCL_AND_ESIMD; Res.UsesLargeGRF = UsesLargeGRF || Other.UsesLargeGRF; + // Opt Level remains at '2' // Scope remains global return Res; } @@ -93,6 +97,9 @@ struct EntryPointGroup { // Tells if some entry points use large GRF mode. bool isLargeGRF() const { return Props.UsesLargeGRF; } + // Get opt level. + uint32_t getOptLevel() const { return Props.OptLevel; } + void saveNames(std::vector &Dest) const; void rebuildFromNames(const std::vector &Names, const Module &M); void rebuild(const Module &M); @@ -147,6 +154,7 @@ class ModuleDesc { bool isESIMD() const { return EntryPoints.isEsimd(); } bool isSYCL() const { return EntryPoints.isSycl(); } bool isLargeGRF() const { return EntryPoints.isLargeGRF(); } + uint32_t getOptLevel() const { return EntryPoints.getOptLevel(); } const EntryPointSet &entries() const { return EntryPoints.Functions; } const EntryPointGroup &getEntryPointGroup() const { return EntryPoints; } @@ -253,6 +261,9 @@ getSplitterByMode(ModuleDesc &&MD, IRSplitMode Mode, std::unique_ptr getLargeGRFSplitter(ModuleDesc &&MD, bool EmitOnlyKernelsAsEntryPoints); +std::unique_ptr +getOptLevelSplitter(ModuleDesc &&MD, bool EmitOnlyKernelsAsEntryPoints); + #ifndef NDEBUG void dumpEntryPoints(const EntryPointSet &C, const char *msg = "", int Tab = 0); void dumpEntryPoints(const Module &M, bool OnlyKernelsAreEntryPoints = false, diff --git a/llvm/tools/sycl-post-link/sycl-post-link.cpp b/llvm/tools/sycl-post-link/sycl-post-link.cpp index 7f791451b8ffd..a6e4dbc7a145f 100644 --- a/llvm/tools/sycl-post-link/sycl-post-link.cpp +++ b/llvm/tools/sycl-post-link/sycl-post-link.cpp @@ -89,9 +89,11 @@ cl::OptionCategory PostLinkCat{"sycl-post-link options"}; // clang/lib/Driver/Driver.cpp, sycl-post-link.cpp, ClangOffloadWrapper.cpp constexpr char COL_CODE[] = "Code"; constexpr char COL_SYM[] = "Symbols"; -constexpr char COL_OPTS[] = "Options"; constexpr char COL_PROPS[] = "Properties"; +// Similar copy in ModuleSplitter.cpp +constexpr char ATTR_OPT_LEVEL[] = "sycl-device-compile-optlevel"; + // InputFilename - The filename to read from. cl::opt InputFilename{cl::Positional, cl::desc(""), @@ -216,11 +218,10 @@ struct GlobalBinImageProps { bool EmitDeviceGlobalPropSet; }; -struct IrPropSymFilenameQuad { +struct IrPropSymFilenameTriple { std::string Ir; std::string Prop; std::string Sym; - std::string Opt; }; void writeToFile(const std::string &Filename, const std::string &Content) { @@ -441,6 +442,9 @@ std::string saveModuleProperties(module_split::ModuleDesc &MD, ProgramMetadata.insert({MetadataNames.back(), KernelReqdWorkGroupSize}); } } + + PropSet[PropSetRegTy::SYCL_MISC_PROP].insert({"OptLevel", MD.getOptLevel()}); + if (MD.isESIMD()) { PropSet[PropSetRegTy::SYCL_MISC_PROP].insert({"isEsimdImage", true}); } @@ -468,44 +472,6 @@ std::string saveModuleProperties(module_split::ModuleDesc &MD, return SCFile; } -std::string getOptString(module_split::ModuleDesc &MD) { - auto &M = MD.getModule(); - // Process all properties on kernels. - for (Function &F : M) { - // Only consider kernels. - if (F.getCallingConv() != CallingConv::SPIR_KERNEL) - continue; - - SmallVector MDOps; - SmallVector, 8> NamedMDOps; - for (const Attribute &Attr : F.getAttributes().getFnAttrs()) { - // Currently, only string attributes are supported - if (!Attr.isStringAttribute()) - continue; - StringRef AttrKindStr = Attr.getKindAsString(); - if (AttrKindStr == "sycl-device-compile-optlevel") { - auto Opt = "-O" + Attr.getValueAsString(); - llvm::errs() << "ARV: Opt is " << Opt << "\n"; - return Opt.str(); - } - } - } - return ""; -} - -std::string saveModuleOptions(module_split::ModuleDesc &MD, - const std::string &Opts, int I, - StringRef Suff) { - std::error_code EC; - std::string SCFile = makeResultFileName(".opt", I, Suff); - raw_fd_ostream SCOut(SCFile, EC); - checkError(EC, "error opening file '" + SCFile + "'"); - SCOut << Opts; - - return SCFile; -} - - // Saves specified collection of symbols to a file. std::string saveModuleSymbolTable(const module_split::EntryPointSet &Es, int I, StringRef Suffix) { @@ -610,11 +576,11 @@ StringRef getModuleSuffix(const module_split::ModuleDesc &MD) { // @param IRFilename filename of already available IR component. If not empty, // IR component saving is skipped, and this file name is recorded as such in // the result. -// @return a quadruple of files where IR, Property, Symbols and Opts components -// of the Module descriptor are written respectively. -IrPropSymFilenameQuad saveModule(module_split::ModuleDesc &MD, int I, +// @return a triple of files where IR, Property and Symbols components of the +// Module descriptor are written respectively. +IrPropSymFilenameTriple saveModule(module_split::ModuleDesc &MD, int I, StringRef IRFilename = "") { - IrPropSymFilenameQuad Res; + IrPropSymFilenameTriple Res; StringRef Suffix = getModuleSuffix(MD); if (!IRFilename.empty()) { @@ -627,9 +593,6 @@ IrPropSymFilenameQuad saveModule(module_split::ModuleDesc &MD, int I, EmitExportedSymbols, DeviceGlobals}; Res.Prop = saveModuleProperties(MD, Props, I, Suffix); - std::string Opts = getOptString(MD); - Res.Opt = saveModuleOptions(MD, Opts, I, Suffix); - if (DoSymGen) { // save the names of the entry points - the symbol table Res.Sym = saveModuleSymbolTable(MD.entries(), I, Suffix); @@ -674,37 +637,20 @@ bool processSpecConstants(module_split::ModuleDesc &MD) { return MD.Props.SpecConstsMet; } -constexpr int MAX_COLUMNS_IN_FILE_TABLE = 4; +constexpr int MAX_COLUMNS_IN_FILE_TABLE = 3; -#if 0 void addTableRow(util::SimpleTable &Table, - const IrPropSymFilenameQuad &RowData) { + const IrPropSymFilenameTriple &RowData) { SmallVector Row; - for (const std::string *S : {&RowData.Ir, &RowData.Prop, &RowData.Opt, &RowData.Sym}) { - if (!S->empty()) { - Row.push_back(StringRef(*S)); - } - } - llvm::errs() << "ARV: " << static_cast(Table.getNumColumns()) << "," << Row.size() <<"\n"; - assert(static_cast(Table.getNumColumns()) == Row.size()); - Table.addRow(Row); -} -#else -void addTableRow(util::SimpleTable &Table, - const IrPropSymFilenameQuad &RowData) { - SmallVector Row; - for (const std::string *S : {&RowData.Ir, &RowData.Prop, &RowData.Sym}) { if (!S->empty()) { Row.push_back(StringRef(*S)); } } - //llvm::errs() << "ARV: " << static_cast(Table.getNumColumns()) << "," << Row.size() <<"\n"; assert(static_cast(Table.getNumColumns()) == Row.size()); Table.addRow(Row); } -#endif // Removes the global variable "llvm.used" and returns true on success. // "llvm.used" is a global constant array containing references to kernels @@ -753,13 +699,8 @@ static bool removeSYCLKernelsConstRefArray(Module &M) { std::unique_ptr processInputModule(std::unique_ptr M) { // Construct the resulting table which will accumulate all the outputs. -#if 0 - SmallVector ColumnTitles{ - StringRef(COL_CODE), StringRef(COL_PROPS), StringRef(COL_OPTS)}; -#else SmallVector ColumnTitles{ StringRef(COL_CODE), StringRef(COL_PROPS)}; -#endif if (DoSymGen) { ColumnTitles.push_back(COL_SYM); @@ -839,103 +780,111 @@ processInputModule(std::unique_ptr M) { module_split::ModuleDesc MDesc = ScopedSplitter->nextSplit(); DUMP_ENTRY_POINTS(MDesc.entries(), MDesc.Name.c_str(), 1); - std::unique_ptr LargeGRFSplitter = - module_split::getLargeGRFSplitter(std::move(MDesc), + std::unique_ptr OptLevelSplitter = + module_split::getOptLevelSplitter(std::move(MDesc), EmitOnlyKernelsAsEntryPoints); - const bool SplitByLargeGRF = LargeGRFSplitter->totalSplits() > 1; - Modified |= SplitByLargeGRF; - - // Now split further by "large-grf" attribute. - while (LargeGRFSplitter->hasMoreSplits()) { - module_split::ModuleDesc MDesc1 = LargeGRFSplitter->nextSplit(); - DUMP_ENTRY_POINTS(MDesc1.entries(), MDesc1.Name.c_str(), 2); - MDesc1.fixupLinkageOfDirectInvokeSimdTargets(); - - // Do SYCL/ESIMD splitting. It happens always, as ESIMD and SYCL must - // undergo different set of LLVMIR passes. After this they are linked back - // together to form single module with disjoint SYCL and ESIMD call graphs - // unless -split-esimd option is specified. The graphs become disjoint - // when linked back because functions shared between graphs are cloned and - // renamed. - std::unique_ptr ESIMDSplitter = - module_split::getSplitterByKernelType(std::move(MDesc1), - EmitOnlyKernelsAsEntryPoints); - const bool SplitByESIMD = ESIMDSplitter->totalSplits() > 1; - Modified |= SplitByESIMD; - - if (SplitByESIMD && SplitByScope && - (SplitMode == module_split::SPLIT_PER_KERNEL) && !SplitEsimd) { - // Controversial state reached - SYCL and ESIMD entry points resulting - // from SYCL/ESIMD split (which is done always) are linked back, since - // -split-esimd is not specified, but per-kernel split is requested. - warning("SYCL and ESIMD entry points detected and split mode is " - "per-kernel, so " + - SplitEsimd.ValueStr + " must also be specified"); - } - SmallVector MMs; - - while (ESIMDSplitter->hasMoreSplits()) { - module_split::ModuleDesc MDesc2 = ESIMDSplitter->nextSplit(); - DUMP_ENTRY_POINTS(MDesc2.entries(), MDesc2.Name.c_str(), 3); - Modified |= processSpecConstants(MDesc2); - - // TODO: detach compile-time properties from device globals. - if (DeviceGlobals.getNumOccurrences() > 0) { - Modified |= - runModulePass(MDesc2.getModule()); + const bool SplitByOptLevel = OptLevelSplitter->totalSplits() > 1; + while (OptLevelSplitter->hasMoreSplits()) { + module_split::ModuleDesc MDesc1 = OptLevelSplitter->nextSplit(); + std::unique_ptr LargeGRFSplitter = + module_split::getLargeGRFSplitter(std::move(MDesc1), + EmitOnlyKernelsAsEntryPoints); + const bool SplitByLargeGRF = LargeGRFSplitter->totalSplits() > 1; + Modified |= SplitByLargeGRF; + + // Now split further by "large-grf" attribute. + while (LargeGRFSplitter->hasMoreSplits()) { + module_split::ModuleDesc MDesc1 = LargeGRFSplitter->nextSplit(); + DUMP_ENTRY_POINTS(MDesc1.entries(), MDesc1.Name.c_str(), 2); + MDesc1.fixupLinkageOfDirectInvokeSimdTargets(); + + // Do SYCL/ESIMD splitting. It happens always, as ESIMD and SYCL must + // undergo different set of LLVMIR passes. After this they are linked back + // together to form single module with disjoint SYCL and ESIMD call graphs + // unless -split-esimd option is specified. The graphs become disjoint + // when linked back because functions shared between graphs are cloned and + // renamed. + std::unique_ptr ESIMDSplitter = + module_split::getSplitterByKernelType(std::move(MDesc1), + EmitOnlyKernelsAsEntryPoints); + const bool SplitByESIMD = ESIMDSplitter->totalSplits() > 1; + Modified |= SplitByESIMD; + + if (SplitByESIMD && SplitByScope && + (SplitMode == module_split::SPLIT_PER_KERNEL) && !SplitEsimd) { + // Controversial state reached - SYCL and ESIMD entry points resulting + // from SYCL/ESIMD split (which is done always) are linked back, since + // -split-esimd is not specified, but per-kernel split is requested. + warning("SYCL and ESIMD entry points detected and split mode is " + "per-kernel, so " + + SplitEsimd.ValueStr + " must also be specified"); } - if (!MDesc2.isSYCL() && LowerEsimd) { - assert(MDesc2.isESIMD() && "NYI"); - // ESIMD lowering also detects large-GRF kernels, so it must happen - // before large-GRF split. - Modified |= lowerEsimdConstructs(MDesc2); + SmallVector MMs; + + while (ESIMDSplitter->hasMoreSplits()) { + module_split::ModuleDesc MDesc2 = ESIMDSplitter->nextSplit(); + DUMP_ENTRY_POINTS(MDesc2.entries(), MDesc2.Name.c_str(), 3); + Modified |= processSpecConstants(MDesc2); + + // TODO: detach compile-time properties from device globals. + if (DeviceGlobals.getNumOccurrences() > 0) { + Modified |= + runModulePass(MDesc2.getModule()); + } + if (!MDesc2.isSYCL() && LowerEsimd) { + assert(MDesc2.isESIMD() && "NYI"); + // ESIMD lowering also detects large-GRF kernels, so it must happen + // before large-GRF split. + Modified |= lowerEsimdConstructs(MDesc2); + } + MMs.emplace_back(std::move(MDesc2)); } - MMs.emplace_back(std::move(MDesc2)); - } - if (!SplitEsimd && (MMs.size() > 1)) { - // SYCL/ESIMD splitting is not requested, link back into single module. - assert(MMs.size() == 2); - assert((MMs[0].isESIMD() && MMs[1].isSYCL()) || - (MMs[1].isESIMD() && MMs[0].isSYCL())); - int ESIMDInd = MMs[0].isESIMD() ? 0 : 1; - int SYCLInd = MMs[0].isESIMD() ? 1 : 0; - // ... but before that, make sure no link conflicts will occur. - MMs[ESIMDInd].renameDuplicatesOf(MMs[SYCLInd].getModule(), ".esimd"); - module_split::ModuleDesc M2 = - link(std::move(MMs[0]), std::move(MMs[1])); - M2.restoreLinkageOfDirectInvokeSimdTargets(); - string_vector Names; - M2.saveEntryPointNames(Names); - M2.cleanup(); // may remove some entry points, need to save/rebuild - M2.rebuildEntryPoints(Names); - MMs.clear(); - MMs.emplace_back(std::move(M2)); - DUMP_ENTRY_POINTS(MMs.back().entries(), MMs.back().Name.c_str(), 3); - Modified = true; - } - bool SplitOccurred = SplitByScope || SplitByLargeGRF || SplitByESIMD; - - if (IROutputOnly) { - if (SplitOccurred) { - error("some modules had to be split, '-" + IROutputOnly.ArgStr + - "' can't be used"); + if (!SplitEsimd && (MMs.size() > 1)) { + // SYCL/ESIMD splitting is not requested, link back into single module. + assert(MMs.size() == 2); + assert((MMs[0].isESIMD() && MMs[1].isSYCL()) || + (MMs[1].isESIMD() && MMs[0].isSYCL())); + int ESIMDInd = MMs[0].isESIMD() ? 0 : 1; + int SYCLInd = MMs[0].isESIMD() ? 1 : 0; + // ... but before that, make sure no link conflicts will occur. + MMs[ESIMDInd].renameDuplicatesOf(MMs[SYCLInd].getModule(), ".esimd"); + module_split::ModuleDesc M2 = + link(std::move(MMs[0]), std::move(MMs[1])); + M2.restoreLinkageOfDirectInvokeSimdTargets(); + string_vector Names; + M2.saveEntryPointNames(Names); + M2.cleanup(); // may remove some entry points, need to save/rebuild + M2.rebuildEntryPoints(Names); + MMs.clear(); + MMs.emplace_back(std::move(M2)); + DUMP_ENTRY_POINTS(MMs.back().entries(), MMs.back().Name.c_str(), 3); + Modified = true; + } + bool SplitOccurred = SplitByScope || SplitByOptLevel || SplitByLargeGRF + || SplitByESIMD; + + if (IROutputOnly) { + if (SplitOccurred) { + error("some modules had to be split, '-" + IROutputOnly.ArgStr + + "' can't be used"); + } + saveModuleIR(MMs.front().getModule(), OutputFilename); + return Table; + } + // Empty IR file name directs saveModule to generate one and save IR to + // it: + std::string OutIRFileName = ""; + + if (!Modified && (OutputFilename.getNumOccurrences() == 0)) { + assert(!SplitOccurred); + OutIRFileName = InputFilename; // ... non-empty means "skip IR writing" + errs() << "sycl-post-link NOTE: no modifications to the input LLVM IR " + "have been made\n"; + } + for (module_split::ModuleDesc &IrMD : MMs) { + IrPropSymFilenameTriple T = saveModule(IrMD, ID, OutIRFileName); + addTableRow(*Table, T); } - saveModuleIR(MMs.front().getModule(), OutputFilename); - return Table; - } - // Empty IR file name directs saveModule to generate one and save IR to - // it: - std::string OutIRFileName = ""; - - if (!Modified && (OutputFilename.getNumOccurrences() == 0)) { - assert(!SplitOccurred); - OutIRFileName = InputFilename; // ... non-empty means "skip IR writing" - errs() << "sycl-post-link NOTE: no modifications to the input LLVM IR " - "have been made\n"; - } - for (module_split::ModuleDesc &IrMD : MMs) { - IrPropSymFilenameQuad T = saveModule(IrMD, ID, OutIRFileName); - addTableRow(*Table, T); } } ++ID; diff --git a/sycl/doc/design/PropagateCompilerFlagsToLinkerAndRuntime.md b/sycl/doc/design/PropagateCompilerFlagsToLinkerAndRuntime.md new file mode 100644 index 0000000000000..e473c536a82ac --- /dev/null +++ b/sycl/doc/design/PropagateCompilerFlagsToLinkerAndRuntime.md @@ -0,0 +1,15 @@ +# Propagation of optimization levels used by front-end compiler to linker and backend compiler + +In order to ease the process of debugging, there is a user requirement to compile different modules with different levels of optimization. This document proposes a compiler flow that will enable propagation of compiler options (limited to only optimization levels for now) specified for front-end to the linker and runtimes and eventually to the backend. + +**NOTE**: This is not a final version. The document is still in progress. + +## Background + +## Requirements + +## Use case + +## Proposed design + +## References \ No newline at end of file diff --git a/sycl/include/sycl/compile_options.hpp b/sycl/include/sycl/compile_options.hpp deleted file mode 100644 index eed3cbe5bf474..0000000000000 --- a/sycl/include/sycl/compile_options.hpp +++ /dev/null @@ -1,22 +0,0 @@ -//==---- compile_options.hpp - SYCL compile options Enums -----*- C++ -*---==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -// ===--------------------------------------------------------------------=== // -#pragma once - -#include - -namespace sycl { -__SYCL_INLINE_VER_NAMESPACE(_V1) { - -#define __SYCL_COMPILE_OPTION(OPTION, ID) OPTION = ID, -enum class __SYCL_TYPE(compile_options) compile_options { -#include -}; -#undef __SYCL_COMPILE_OPTION - -} // __SYCL_INLINE_VER_NAMESPACE(_V1) -} // namespace sycl diff --git a/sycl/include/sycl/ext/oneapi/kernel_properties/properties.hpp b/sycl/include/sycl/ext/oneapi/kernel_properties/properties.hpp index a5bbc82c4b9d0..925ef721eea87 100644 --- a/sycl/include/sycl/ext/oneapi/kernel_properties/properties.hpp +++ b/sycl/include/sycl/ext/oneapi/kernel_properties/properties.hpp @@ -9,7 +9,6 @@ #pragma once #include -#include #include #include #include @@ -57,12 +56,6 @@ struct device_has_key { std::integral_constant...>; }; -struct device_compile_options_key { - template - using value_t = property_value...>; -}; - template struct property_value, std::integral_constant...> { @@ -115,14 +108,6 @@ struct property_value value{Aspects...}; }; -template -struct property_value...> { - using key_t = device_compile_options_key; - static constexpr std::array value{CompileOptions...}; -}; - template inline constexpr work_group_size_key::value_t work_group_size; @@ -136,15 +121,11 @@ inline constexpr sub_group_size_key::value_t sub_group_size; template inline constexpr device_has_key::value_t device_has; -template -inline constexpr device_compile_options_key::value_t device_compile_options; - template <> struct is_property_key : std::true_type {}; template <> struct is_property_key : std::true_type {}; template <> struct is_property_key : std::true_type {}; template <> struct is_property_key : std::true_type {}; -template <> struct is_property_key : std::true_type {}; namespace detail { template <> struct PropertyToKind { @@ -159,9 +140,6 @@ template <> struct PropertyToKind { template <> struct PropertyToKind { static constexpr PropKind Kind = PropKind::DeviceHas; }; -template <> struct PropertyToKind { - static constexpr PropKind Kind = PropKind::DeviceCompileOptions; -}; template <> struct IsCompileTimeProperty : std::true_type {}; @@ -170,7 +148,6 @@ struct IsCompileTimeProperty : std::true_type {}; template <> struct IsCompileTimeProperty : std::true_type {}; template <> struct IsCompileTimeProperty : std::true_type {}; -template <> struct IsCompileTimeProperty : std::true_type {}; template struct PropertyMetaInfo> { @@ -193,12 +170,6 @@ struct PropertyMetaInfo> { static constexpr const char *value = SizeListToStr(Aspects)...>::value; }; -template -struct PropertyMetaInfo> { - static constexpr const char *name = "sycl-device-compile-options"; - static constexpr const char *value = - SizeListToStr(CompileOptions)...>::value; -}; template struct HasKernelPropertiesGetMethod : std::false_type {}; diff --git a/sycl/include/sycl/ext/oneapi/properties/property.hpp b/sycl/include/sycl/ext/oneapi/properties/property.hpp index 6645d76d7acc5..08aa8a5f76dd4 100644 --- a/sycl/include/sycl/ext/oneapi/properties/property.hpp +++ b/sycl/include/sycl/ext/oneapi/properties/property.hpp @@ -173,9 +173,8 @@ enum PropKind : uint32_t { WorkGroupSizeHint = 7, SubGroupSize = 8, DeviceHas = 9, - DeviceCompileOptions = 10, // PropKindSize must always be the last value. - PropKindSize = 11, + PropKindSize = 10, }; // This trait must be specialized for all properties and must have a unique diff --git a/sycl/include/sycl/info/compile_options.def b/sycl/include/sycl/info/compile_options.def deleted file mode 100644 index 527bee4b7cce0..0000000000000 --- a/sycl/include/sycl/info/compile_options.def +++ /dev/null @@ -1,4 +0,0 @@ -__SYCL_COMPILE_OPTION(O0, 0) -__SYCL_COMPILE_OPTION(O1, 1) -__SYCL_COMPILE_OPTION(O2, 2) -__SYCL_COMPILE_OPTION(O3, 3) \ No newline at end of file diff --git a/sycl/include/sycl/sycl.hpp b/sycl/include/sycl/sycl.hpp index cdab4ecd4e69a..70a29d96d5097 100644 --- a/sycl/include/sycl/sycl.hpp +++ b/sycl/include/sycl/sycl.hpp @@ -19,7 +19,6 @@ #endif #include #include -#include #include #include #include diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 7bea37764cd4f..1ceca904d9878 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -407,6 +407,24 @@ static void appendCompileOptionsFromImage(std::string &CompileOpts, // TODO: Remove isDoubleGRF check in next ABI break bool isLargeGRF = getUint32PropAsBool(Img, "isLargeGRF") || getUint32PropAsBool(Img, "isDoubleGRF"); + pi_device_binary_property Prop = Img.getProperty("OptLevel"); + uint32_t OptLevel = Prop ? DeviceBinaryProperty(Prop).asUint32() : 2; + std::string OptLevelStr = ""; + switch (OptLevel) { + case 0: + OptLevelStr = "-cl-opt-disable"; + break; + case 1: + OptLevelStr = "-O1"; + break; + case 2: + OptLevelStr = "-O2"; + break; + case 3: + OptLevelStr = "-O3"; + break; + } + // The -vc-codegen option is always preserved for ESIMD kernels, regardless // of the contents SYCL_PROGRAM_COMPILE_OPTIONS environment variable. if (isEsimdImage) { @@ -425,6 +443,11 @@ static void appendCompileOptionsFromImage(std::string &CompileOpts, // is fixed CompileOpts += isEsimdImage ? "-doubleGRF" : "-ze-opt-large-register-file"; } + if (!OptLevelStr.empty()) { + if (!CompileOpts.empty()) + CompileOpts += " "; + CompileOpts += OptLevelStr; + } } static void applyOptionsFromImage(std::string &CompileOpts, diff --git a/sycl/test/extensions/properties/prop_compile_options.cpp b/sycl/test/extensions/properties/prop_compile_options.cpp deleted file mode 100644 index 9e3184e6042fa..0000000000000 --- a/sycl/test/extensions/properties/prop_compile_options.cpp +++ /dev/null @@ -1,15 +0,0 @@ -// RUN: %clangxx -O0 -fsycl-device-only -S -Xclang -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK-IR -// RUN: %clangxx -O0 -fsycl-device-only -Xclang -verify %s -// expected-no-diagnostics -// Tests for propagation of compile options - -#include - -using namespace sycl; -using namespace sycl::ext::oneapi::experimental; - -int main() { - queue Q; - // CHECK-IR: spir_kernel void @{{.*}}Kernel0(){{.*}} #[[COAttr1:[0-9]+]] - Q.single_task([]() {}); -} From f4eefc5e59b27d74baf4b138bdeb908f332542de Mon Sep 17 00:00:00 2001 From: Arvind Sudarsanam Date: Wed, 21 Dec 2022 12:45:32 -0800 Subject: [PATCH 3/7] minor corrections Signed-off-by: Arvind Sudarsanam --- llvm/tools/sycl-post-link/sycl-post-link.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/tools/sycl-post-link/sycl-post-link.cpp b/llvm/tools/sycl-post-link/sycl-post-link.cpp index c83e912c3e2cf..ac66e51cfb3b5 100644 --- a/llvm/tools/sycl-post-link/sycl-post-link.cpp +++ b/llvm/tools/sycl-post-link/sycl-post-link.cpp @@ -806,12 +806,12 @@ processInputModule(std::unique_ptr M) { while (OptionalFeaturesSplitter->hasMoreSplits()) { // Here, we perform third-level splitting based on optimization level. // This step is mandatory, as optimization level is at module level. + module_split::ModuleDesc MDesc = OptionalFeaturesSplitter->nextSplit(); std::unique_ptr OptLevelSplitter = module_split::getOptLevelSplitter(std::move(MDesc), EmitOnlyKernelsAsEntryPoints); - SplitByOptLevel |= OptLevelSplitter->totalSplits() > 1; + SplitByOptLevel |= OptLevelSplitter->remainingSplits() > 1; while (OptLevelSplitter->hasMoreSplits()) { - module_split::ModuleDesc MDesc1 = OptLevelSplitter->nextSplit(); TopLevelModules.emplace_back(OptLevelSplitter->nextSplit()); } } From 8c8e0e135215b6c0e02f5eaf0788edfdb7f130fa Mon Sep 17 00:00:00 2001 From: Arvind Sudarsanam Date: Wed, 21 Dec 2022 14:28:42 -0800 Subject: [PATCH 4/7] Change default opt level from O2 to unknown Signed-off-by: Arvind Sudarsanam --- clang/lib/CodeGen/CodeGenFunction.cpp | 5 +++-- llvm/tools/sycl-post-link/ModuleSplitter.cpp | 2 +- llvm/tools/sycl-post-link/ModuleSplitter.h | 9 +++++---- llvm/tools/sycl-post-link/sycl-post-link.cpp | 4 +++- sycl/source/detail/program_manager/program_manager.cpp | 2 +- 5 files changed, 13 insertions(+), 9 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp index a908f85039042..aca68e407a07e 100644 --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -588,7 +588,7 @@ void CodeGenFunction::EmitKernelMetadata(const FunctionDecl *FD, // TODO Module identifier is not reliable for this purpose since two modules // can have the same ID, needs improvement Fn->addFnAttr("sycl-module-id", Fn->getParent()->getModuleIdentifier()); - int SYCLDeviceCompileOptLevel = 2; + int SYCLDeviceCompileOptLevel = -1; switch (CGM.getCodeGenOpts().OptimizationLevel) { default: llvm_unreachable("Invalid optimization level!"); @@ -598,7 +598,8 @@ void CodeGenFunction::EmitKernelMetadata(const FunctionDecl *FD, case 3: SYCLDeviceCompileOptLevel = CGM.getCodeGenOpts().OptimizationLevel; } - Fn->addFnAttr("sycl-device-compile-optlevel", std::to_string(SYCLDeviceCompileOptLevel)); + if (SYCLDeviceCompileOptLevel > 0) + Fn->addFnAttr("sycl-device-compile-optlevel", std::to_string(SYCLDeviceCompileOptLevel)); } llvm::LLVMContext &Context = getLLVMContext(); diff --git a/llvm/tools/sycl-post-link/ModuleSplitter.cpp b/llvm/tools/sycl-post-link/ModuleSplitter.cpp index 4bace9eed964e..6327c85ed050d 100644 --- a/llvm/tools/sycl-post-link/ModuleSplitter.cpp +++ b/llvm/tools/sycl-post-link/ModuleSplitter.cpp @@ -284,7 +284,7 @@ groupEntryPointsByOptLevel(ModuleDesc &MD, StringRef AttrName, StringRef OptLevelStr = F.getFnAttribute(AttrName).getValueAsString(); EntryPointMap[OptLevelStr].insert(&F); } else { - EntryPointMap["2"].insert(&F); + EntryPointMap["-1"].insert(&F); } } if (!EntryPointMap.empty()) { diff --git a/llvm/tools/sycl-post-link/ModuleSplitter.h b/llvm/tools/sycl-post-link/ModuleSplitter.h index 3bef77af71c9c..16447d61911eb 100644 --- a/llvm/tools/sycl-post-link/ModuleSplitter.h +++ b/llvm/tools/sycl-post-link/ModuleSplitter.h @@ -61,7 +61,7 @@ struct EntryPointGroup { EntryPointsGroupScope Scope = Scope_Global; // opt level - unsigned OptLevel = 2; + int OptLevel = -1; Properties merge(const Properties &Other) const { Properties Res; @@ -69,7 +69,8 @@ struct EntryPointGroup { ? HasESIMD : SyclEsimdSplitStatus::SYCL_AND_ESIMD; Res.UsesLargeGRF = UsesLargeGRF || Other.UsesLargeGRF; - // Opt Level remains at '2' + // TODO What do we do about optimization levels while merging? + // Opt Level remains at '-1' // Scope remains global return Res; } @@ -98,7 +99,7 @@ struct EntryPointGroup { bool isLargeGRF() const { return Props.UsesLargeGRF; } // Get opt level. - uint32_t getOptLevel() const { return Props.OptLevel; } + int getOptLevel() const { return Props.OptLevel; } void saveNames(std::vector &Dest) const; void rebuildFromNames(const std::vector &Names, const Module &M); @@ -154,7 +155,7 @@ class ModuleDesc { bool isESIMD() const { return EntryPoints.isEsimd(); } bool isSYCL() const { return EntryPoints.isSycl(); } bool isLargeGRF() const { return EntryPoints.isLargeGRF(); } - uint32_t getOptLevel() const { return EntryPoints.getOptLevel(); } + int getOptLevel() const { return EntryPoints.getOptLevel(); } const EntryPointSet &entries() const { return EntryPoints.Functions; } const EntryPointGroup &getEntryPointGroup() const { return EntryPoints; } diff --git a/llvm/tools/sycl-post-link/sycl-post-link.cpp b/llvm/tools/sycl-post-link/sycl-post-link.cpp index ac66e51cfb3b5..ec0b92467a12f 100644 --- a/llvm/tools/sycl-post-link/sycl-post-link.cpp +++ b/llvm/tools/sycl-post-link/sycl-post-link.cpp @@ -453,7 +453,9 @@ std::string saveModuleProperties(module_split::ModuleDesc &MD, } } - PropSet[PropSetRegTy::SYCL_MISC_PROP].insert({"OptLevel", MD.getOptLevel()}); + auto OptLevel = MD.getOptLevel(); + if (OptLevel >= 0) + PropSet[PropSetRegTy::SYCL_MISC_PROP].insert({"OptLevel", OptLevel}); if (MD.isESIMD()) { PropSet[PropSetRegTy::SYCL_MISC_PROP].insert({"isEsimdImage", true}); diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index b39b6efdcb01c..d5148a12bfa41 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -408,7 +408,7 @@ static void appendCompileOptionsFromImage(std::string &CompileOpts, bool isLargeGRF = getUint32PropAsBool(Img, "isLargeGRF") || getUint32PropAsBool(Img, "isDoubleGRF"); pi_device_binary_property Prop = Img.getProperty("OptLevel"); - uint32_t OptLevel = Prop ? DeviceBinaryProperty(Prop).asUint32() : 2; + int OptLevel = Prop ? DeviceBinaryProperty(Prop).asUint32() : -1; std::string OptLevelStr = ""; switch (OptLevel) { case 0: From a997862cdc4b87ea0bf60797cb1ad0759d44b077 Mon Sep 17 00:00:00 2001 From: Arvind Sudarsanam Date: Wed, 21 Dec 2022 17:43:32 -0800 Subject: [PATCH 5/7] Fix more test failures Signed-off-by: Arvind Sudarsanam --- clang/lib/CodeGen/CodeGenFunction.cpp | 3 +- ...ropagateCompilerFlagsToLinkerAndRuntime.md | 60 ++++++++++++++++++- .../program_manager/program_manager.cpp | 18 ++---- 3 files changed, 63 insertions(+), 18 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp index aca68e407a07e..79a679eab5d9f 100644 --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -9,7 +9,6 @@ // This coordinates the per-function state used while generating code. // //===----------------------------------------------------------------------===// - #include "CodeGenFunction.h" #include "CGBlocks.h" #include "CGCUDARuntime.h" @@ -598,7 +597,7 @@ void CodeGenFunction::EmitKernelMetadata(const FunctionDecl *FD, case 3: SYCLDeviceCompileOptLevel = CGM.getCodeGenOpts().OptimizationLevel; } - if (SYCLDeviceCompileOptLevel > 0) + if (SYCLDeviceCompileOptLevel >= 0) Fn->addFnAttr("sycl-device-compile-optlevel", std::to_string(SYCLDeviceCompileOptLevel)); } llvm::LLVMContext &Context = getLLVMContext(); diff --git a/sycl/doc/design/PropagateCompilerFlagsToLinkerAndRuntime.md b/sycl/doc/design/PropagateCompilerFlagsToLinkerAndRuntime.md index e473c536a82ac..26e8d224bc1b5 100644 --- a/sycl/doc/design/PropagateCompilerFlagsToLinkerAndRuntime.md +++ b/sycl/doc/design/PropagateCompilerFlagsToLinkerAndRuntime.md @@ -1,15 +1,71 @@ # Propagation of optimization levels used by front-end compiler to linker and backend compiler -In order to ease the process of debugging, there is a user requirement to compile different modules with different levels of optimization. This document proposes a compiler flow that will enable propagation of compiler options (limited to only optimization levels for now) specified for front-end to the linker and runtimes and eventually to the backend. +In order to ease the process of debugging, there is a user requirement to compile different modules with different levels of optimization. This document proposes a compiler flow that will enable propagation of compiler options specified for front-end to the linker and runtimes and eventually to the backend. Currently, only O0/O1/O2/O3 options are handled. **NOTE**: This is not a final version. The document is still in progress. ## Background +When building an application with several source and object files, it should be possible to specify the optimization parameters individually for each source file/object file (for each invocation of the DPCPP compiler). The linker should pass the original optimization options (e.g. -O0 or -O2) used when building an object file to the device backend compiler (IGC compiler). This will improve the debugging experience by selectively disabling/enabling optimizations for each source file, and therefore achieving better debuggability and better performance as needed. + +The current behavior, is that the device backend optimization options are determined by the linker optimization options. If the -O0 option is specified for linker, the linker will pass -cl-opt-disable option to IGC for {*}all kernels{*}, essentially disabling optimizations globally. Otherwise, if the -O0 option is not specified for linker, it will not pass -cl-opt-disable option at all, therefore making the kernels mostly undebuggable, regardless of the original front-end compiler options. + +Here is an example that demonstrates this pain point: + +``` +icx -c -fsycl test1.c -o test1 +icx -c -O0 -fsycl test2.c -o test2 +icx -fsycl -o test test1.o test2.o +``` + +In this scenario, the fat binary is 'test' and there are no compilation flags sent across to the backend compiler. Though the user wanted to have full debuggability with test2.c module, some of the debuggablity is lost. + +Another scenario is shown below: + +``` +icpx -c -O0 -fsycl -g test.cpp -o test.o +icpx -fsycl test.o +``` + +In this scenario, the fat binary is 'test' and there are no compilation flags sent across to the backend compiler. Though the user wanted to have full debuggability with test.cpp module, some of the debuggablity is lost. The user was not able to set a breakpoint inside device code. + ## Requirements +In order to support module-level debuggability, the user will compile different module files with different levels of optimization. These optimization levels must be preserved and made use of during every stage of compilation. Following are the requirements for this feature. +- If the user specifies '-Ox' as a front-end compile option for a particular module, this option must be preserved during compilation, linking, AOT compilation as well as JIT compilation. +- If the user specifies '-Ox' option as a front-end linker option, this option will override any front-end compile options and the linker option will be preserved during AOT and JIT compilation. +- If the user specifies '-O0' option, we need to pass '-cl-opt-disable' to AOT and JIT compilation stages. + ## Use case +Following is a possible use case: + +``` +A list of modules: +test1.cpp +test2.cpp +test3.cpp +``` + +``` +Following are the compilation steps: +# compiling +icpx -c -O0 -fsycl test1.cpp -o test1.o +icpx -c -O3 -fsycl test2.cpp -o test2.o +icpx -c -fsycl test3.cpp -o test3.o +# linking +icpx -o test -fsycl test1.o test2.o test3.o +# JIT compilation (For GPU backends, this calls igc-standalone compiler in the background) +./test +``` + +Since we have three modules with three different compiler options, we will need to end up with three device binaries, each with their own compiler option specified. + ## Proposed design -## References \ No newline at end of file +Following are changes required in various stages of the compilation pipeline: +- Front-end code generation: For each SYCL kernel, identify the compilation option. Add an appropriate attribute to that kernel. Name of that attribute is 'sycl-device-compile-optlevel'. +- During the llvm-link stage, all modules are linked into a single module. This is an existing behavior. +- During sycl-post-link stage, we first split the kernels into multiple modules based on their optimization level. For each split module, an entry corresponding to its optimization level is made in its .props file. +- During ocloc call generation, the .props file will be parsed and appropriate option will be added to the list of compiler options. +- In SYCL runtime, logic will be added to program manager to parse the .props file, extract the optimization level, and add '-cl-opt-disable' if the optimization level is 0. Otherwise, we do nothing. diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index d5148a12bfa41..46a6bbaa2a1de 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -410,20 +410,10 @@ static void appendCompileOptionsFromImage(std::string &CompileOpts, pi_device_binary_property Prop = Img.getProperty("OptLevel"); int OptLevel = Prop ? DeviceBinaryProperty(Prop).asUint32() : -1; std::string OptLevelStr = ""; - switch (OptLevel) { - case 0: - OptLevelStr = "-cl-opt-disable"; - break; - case 1: - OptLevelStr = "-O1"; - break; - case 2: - OptLevelStr = "-O2"; - break; - case 3: - OptLevelStr = "-O3"; - break; - } + // Currently, we do not do anything for other opt levels + // TODO: Figure out a way to send some info across for other opt levels. + if (OptLevel == 0) + OptLevelStr = "-cl-opt-disable"; // The -vc-codegen option is always preserved for ESIMD kernels, regardless // of the contents SYCL_PROGRAM_COMPILE_OPTIONS environment variable. From fad07312f2fb2ef91d02aa38935d46c0116e052c Mon Sep 17 00:00:00 2001 From: Arvind Sudarsanam Date: Wed, 21 Dec 2022 18:48:40 -0800 Subject: [PATCH 6/7] Fixing doc issue Signed-off-by: Arvind Sudarsanam --- sycl/doc/index.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/sycl/doc/index.rst b/sycl/doc/index.rst index b589953df9a22..9f236bde2719c 100644 --- a/sycl/doc/index.rst +++ b/sycl/doc/index.rst @@ -44,6 +44,7 @@ Design Documents for the oneAPI DPC++ Compiler design/CompileTimeProperties design/ESIMDStatelesAccessors design/DeviceIf + design/PropagateCompilerFlagsToLinkerAndRuntime New OpenCL Extensions New SPIR-V Extensions From 4ea7a9cbf03a73a1fd0368cf34d5eace83ba2fa5 Mon Sep 17 00:00:00 2001 From: Arvind Sudarsanam Date: Wed, 21 Dec 2022 19:57:11 -0800 Subject: [PATCH 7/7] Add LIT test Signed-off-by: Arvind Sudarsanam --- sycl/test/basic_tests/sycl-opt-level.cpp | 32 ++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 sycl/test/basic_tests/sycl-opt-level.cpp diff --git a/sycl/test/basic_tests/sycl-opt-level.cpp b/sycl/test/basic_tests/sycl-opt-level.cpp new file mode 100644 index 0000000000000..2aaafab47ddc8 --- /dev/null +++ b/sycl/test/basic_tests/sycl-opt-level.cpp @@ -0,0 +1,32 @@ +// RUN: %clangxx %s -O0 -S -o %t.ll -fsycl-device-only +// RUN: FileCheck %s --input-file %t.ll -check-prefixes=CHECK-IR +// CHECK-IR: define weak_odr dso_local spir_kernel void @{{.*}}main{{.*}}sycl{{.*}}handler{{.*}}() #[[ATTR:[0-9]+]] +// CHECK-IR: attributes #[[ATTR]] = { {{.*}} "sycl-device-compile-optlevel"="0" {{.*}}} + +// RUN: %clangxx %s -O0 -o %t.bc -fsycl-device-only +// RUN: sycl-post-link -split=source -symbols -S %t.bc -o %t.table +// RUN: FileCheck %s -input-file=%t.table +// RUN: FileCheck %s -input-file=%t_0.prop --check-prefixes CHECK-OPT-LEVEL-PROP + +// CHECK: [Code|Properties|Symbols] +// CHECK: {{.*}}_0.ll|{{.*}}_0.prop|{{.*}}_0.sym + +// CHECK-OPT-LEVEL-PROP: OptLevel=1|0 + +// This test checks adding of the attribute 'sycl-device-compile-optlevel' +// by the clang front-end +// This test also checks parsing of the attribute 'sycl-device-compile-optlevel' +// by the sycl-post-link-tool: +// Splitting happens as usual. +// - sycl-post-link adds 'OptLevel' property to the device binary + +#include + +int main() { + sycl::queue q; + q.submit([&](sycl::handler &h) { + h.single_task([=]() {}); + }); + return 0; +} +