diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h index a318ef0b6bd68..6d0ba466347c1 100644 --- a/bolt/include/bolt/Core/MCPlusBuilder.h +++ b/bolt/include/bolt/Core/MCPlusBuilder.h @@ -1894,6 +1894,19 @@ class MCPlusBuilder { llvm_unreachable("not implemented"); } + /// Checks if the indirect call / jump is accepted by the landing pad at the + /// start of the target BasicBlock. + virtual bool isCallCoveredByBTI(MCInst &Call, MCInst &Pad) const { + llvm_unreachable("not implemented"); + return false; + } + + /// Inserts a BTI landing pad to the start of the BB, that matches the + /// indirect call inst used to call the BB. + virtual void insertBTI(BinaryBasicBlock &BB, MCInst &Call) const { + llvm_unreachable("not implemented"); + } + /// Store \p Target absolute address to \p RegName virtual InstructionListType materializeAddress(const MCSymbol *Target, MCContext *Ctx, diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp index 5881d3fba70f6..729f854736f6c 100644 --- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp +++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp @@ -2806,6 +2806,81 @@ class AArch64MCPlusBuilder : public MCPlusBuilder { Inst.addOperand(MCOperand::createImm(HintNum)); } + bool isCallCoveredByBTI(MCInst &Call, MCInst &Pad) const override { + assert((isIndirectCall(Call) || isIndirectBranch(Call)) && + "Not an indirect call or branch."); + + // A BLR can be accepted by a BTI c. + if (isIndirectCall(Call)) + return isBTILandingPad(Pad, true, false) || + isBTILandingPad(Pad, true, true); + + // A BR can be accepted by a BTI j or BTI c (and BTI jc) IF the operand is + // x16 or x17. If the operand is not x16 or x17, it can be accepted by a BTI + // j or BTI jc (and not BTI c). + if (isIndirectBranch(Call)) { + assert(Call.getNumOperands() == 1 && + "Indirect branch needs to have 1 operand."); + assert(Call.getOperand(0).isReg() && + "Indirect branch does not have a register operand."); + MCPhysReg Reg = Call.getOperand(0).getReg(); + if (Reg == AArch64::X16 || Reg == AArch64::X17) + return isBTILandingPad(Pad, true, false) || + isBTILandingPad(Pad, false, true) || + isBTILandingPad(Pad, true, true); + return isBTILandingPad(Pad, false, true) || + isBTILandingPad(Pad, true, true); + } + return false; + } + + void insertBTI(BinaryBasicBlock &BB, MCInst &Call) const override { + auto II = BB.getFirstNonPseudo(); + // Only check the first instruction for non-empty BasicBlocks + bool Empty = (II == BB.end()); + if (!Empty && isCallCoveredByBTI(Call, *II)) + return; + // A BLR can be accepted by a BTI c. + if (isIndirectCall(Call)) { + // if we have a BTI j at the start, extend it to a BTI jc, + // otherwise insert a new BTI c. + if (!Empty && isBTILandingPad(*II, false, true)) { + updateBTIVariant(*II, true, true); + } else { + MCInst BTIInst; + createBTI(BTIInst, true, false); + BB.insertInstruction(II, BTIInst); + } + } + + // A BR can be accepted by a BTI j or BTI c (and BTI jc) IF the operand is + // x16 or x17. If the operand is not x16 or x17, it can be accepted by a + // BTI j or BTI jc (and not BTI c). + if (isIndirectBranch(Call)) { + assert(Call.getNumOperands() == 1 && + "Indirect branch needs to have 1 operand."); + assert(Call.getOperand(0).isReg() && + "Indirect branch does not have a register operand."); + MCPhysReg Reg = Call.getOperand(0).getReg(); + if (Reg == AArch64::X16 || Reg == AArch64::X17) { + // Add a new BTI c + MCInst BTIInst; + createBTI(BTIInst, true, false); + BB.insertInstruction(II, BTIInst); + } else { + // If BB starts with a BTI c, extend it to BTI jc, + // otherwise insert a new BTI j. + if (!Empty && isBTILandingPad(*II, true, false)) { + updateBTIVariant(*II, true, true); + } else { + MCInst BTIInst; + createBTI(BTIInst, false, true); + BB.insertInstruction(II, BTIInst); + } + } + } + } + InstructionListType materializeAddress(const MCSymbol *Target, MCContext *Ctx, MCPhysReg RegName, int64_t Addend = 0) const override { diff --git a/bolt/unittests/Core/MCPlusBuilder.cpp b/bolt/unittests/Core/MCPlusBuilder.cpp index 7b6f1620a3f2c..e8323e87fe148 100644 --- a/bolt/unittests/Core/MCPlusBuilder.cpp +++ b/bolt/unittests/Core/MCPlusBuilder.cpp @@ -198,6 +198,122 @@ TEST_P(MCPlusBuilderTester, AArch64_BTI) { ASSERT_TRUE(BC->MIB->isImplicitBTIC(*II)); } +TEST_P(MCPlusBuilderTester, AArch64_insertBTI_empty) { + if (GetParam() != Triple::aarch64) + GTEST_SKIP(); + BinaryFunction *BF = BC->createInjectedBinaryFunction("BF", true); + std::unique_ptr BB = BF->createBasicBlock(); + MCInst CallInst = MCInstBuilder(AArch64::BR).addReg(AArch64::X16); + BC->MIB->insertBTI(*BB, CallInst); + // Check that BTI c is added to the empty block. + auto II = BB->begin(); + ASSERT_TRUE(BC->MIB->isBTILandingPad(*II, true, false)); +} +TEST_P(MCPlusBuilderTester, AArch64_insertBTI_0) { + if (GetParam() != Triple::aarch64) + GTEST_SKIP(); + BinaryFunction *BF = BC->createInjectedBinaryFunction("BF", true); + std::unique_ptr BB = BF->createBasicBlock(); + MCInst Inst = MCInstBuilder(AArch64::RET).addReg(AArch64::LR); + BB->addInstruction(Inst); + // BR x16 needs BTI c or BTI j. We prefer adding a BTI c. + MCInst CallInst = MCInstBuilder(AArch64::BR).addReg(AArch64::X16); + BC->MIB->insertBTI(*BB, CallInst); + auto II = BB->begin(); + ASSERT_TRUE(BC->MIB->isBTILandingPad(*II, true, false)); +} + +TEST_P(MCPlusBuilderTester, AArch64_insertBTI_1) { + if (GetParam() != Triple::aarch64) + GTEST_SKIP(); + BinaryFunction *BF = BC->createInjectedBinaryFunction("BF", true); + std::unique_ptr BB = BF->createBasicBlock(); + MCInst BTIc; + BC->MIB->createBTI(BTIc, true, false); + BB->addInstruction(BTIc); + // BR x16 needs BTI c or BTI j. We have a BTI c, no change is needed. + MCInst CallInst = MCInstBuilder(AArch64::BR).addReg(AArch64::X16); + BC->MIB->insertBTI(*BB, CallInst); + auto II = BB->begin(); + ASSERT_TRUE(BC->MIB->isBTILandingPad(*II, true, false)); +} + +TEST_P(MCPlusBuilderTester, AArch64_insertBTI_2) { + if (GetParam() != Triple::aarch64) + GTEST_SKIP(); + BinaryFunction *BF = BC->createInjectedBinaryFunction("BF", true); + std::unique_ptr BB = BF->createBasicBlock(); + MCInst BTIc; + BC->MIB->createBTI(BTIc, true, false); + BB->addInstruction(BTIc); + // BR x5 needs BTI j + // we have BTI c -> extend it to BTI jc. + MCInst CallInst = MCInstBuilder(AArch64::BR).addReg(AArch64::X5); + BC->MIB->insertBTI(*BB, CallInst); + auto II = BB->begin(); + ASSERT_TRUE(BC->MIB->isBTILandingPad(*II, true, true)); +} + +TEST_P(MCPlusBuilderTester, AArch64_insertBTI_3) { + if (GetParam() != Triple::aarch64) + GTEST_SKIP(); + BinaryFunction *BF = BC->createInjectedBinaryFunction("BF", true); + std::unique_ptr BB = BF->createBasicBlock(); + MCInst Inst = MCInstBuilder(AArch64::RET).addReg(AArch64::LR); + BB->addInstruction(Inst); + // BR x5 needs BTI j + MCInst CallInst = MCInstBuilder(AArch64::BR).addReg(AArch64::X5); + BC->MIB->insertBTI(*BB, CallInst); + auto II = BB->begin(); + ASSERT_TRUE(BC->MIB->isBTILandingPad(*II, false, true)); +} + +TEST_P(MCPlusBuilderTester, AArch64_insertBTI_4) { + if (GetParam() != Triple::aarch64) + GTEST_SKIP(); + BinaryFunction *BF = BC->createInjectedBinaryFunction("BF", true); + std::unique_ptr BB = BF->createBasicBlock(); + MCInst Inst = MCInstBuilder(AArch64::RET).addReg(AArch64::LR); + BB->addInstruction(Inst); + // BLR needs BTI c, regardless of the register used. + MCInst CallInst = MCInstBuilder(AArch64::BLR).addReg(AArch64::X5); + BC->MIB->insertBTI(*BB, CallInst); + auto II = BB->begin(); + ASSERT_TRUE(BC->MIB->isBTILandingPad(*II, true, false)); +} + +TEST_P(MCPlusBuilderTester, AArch64_insertBTI_5) { + if (GetParam() != Triple::aarch64) + GTEST_SKIP(); + BinaryFunction *BF = BC->createInjectedBinaryFunction("BF", true); + std::unique_ptr BB = BF->createBasicBlock(); + MCInst BTIj; + BC->MIB->createBTI(BTIj, false, true); + BB->addInstruction(BTIj); + // BLR needs BTI c, regardless of the register used. + // We have a BTI j -> extend it to BTI jc. + MCInst CallInst = MCInstBuilder(AArch64::BLR).addReg(AArch64::X5); + BC->MIB->insertBTI(*BB, CallInst); + auto II = BB->begin(); + ASSERT_TRUE(BC->MIB->isBTILandingPad(*II, true, true)); +} + +TEST_P(MCPlusBuilderTester, AArch64_insertBTI_6) { + if (GetParam() != Triple::aarch64) + GTEST_SKIP(); + BinaryFunction *BF = BC->createInjectedBinaryFunction("BF", true); + std::unique_ptr BB = BF->createBasicBlock(); + MCInst Paciasp = + MCInstBuilder(AArch64::PACIASP).addReg(AArch64::LR).addReg(AArch64::SP); + BB->addInstruction(Paciasp); + // PACI(AB)SP are implicit BTI c, no change needed. + MCInst CallInst = MCInstBuilder(AArch64::BR).addReg(AArch64::X17); + BC->MIB->insertBTI(*BB, CallInst); + auto II = BB->begin(); + ASSERT_TRUE(BC->MIB->isBTILandingPad(*II, true, false)); + ASSERT_TRUE(BC->MIB->isPSignOnLR(*II)); +} + TEST_P(MCPlusBuilderTester, AArch64_CmpJNE) { if (GetParam() != Triple::aarch64) GTEST_SKIP(); diff --git a/clang-tools-extra/clang-tidy/bugprone/BranchCloneCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/BranchCloneCheck.cpp index 4f33670a8500a..6618341296aaf 100644 --- a/clang-tools-extra/clang-tidy/bugprone/BranchCloneCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/BranchCloneCheck.cpp @@ -237,6 +237,12 @@ static bool isIdenticalStmt(const ASTContext &Ctx, const Stmt *Stmt1, return false; return true; } + case Stmt::DeferStmtClass: { + const auto *DefStmt1 = cast(Stmt1); + const auto *DefStmt2 = cast(Stmt2); + return isIdenticalStmt(Ctx, DefStmt1->getBody(), DefStmt2->getBody(), + IgnoreSideEffects); + } case Stmt::CompoundStmtClass: { const auto *CompStmt1 = cast(Stmt1); const auto *CompStmt2 = cast(Stmt2); diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index f5bc3293427b0..661bb0f4d52df 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -208,6 +208,11 @@ Resolutions to C++ Defect Reports C Language Changes ------------------ +- Implemented the ``defer`` draft Technical Specification + (`WG14 N3734 `_); it is enabled in C mode by + passing ``-fdefer-ts``. Note, the details of this feature are subject to change given that the Technical + Specification is not yet ratified. + C2y Feature Support ^^^^^^^^^^^^^^^^^^^ - No longer triggering ``-Wstatic-in-inline`` in C2y mode; use of a static diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h index 8f427427d71ed..c3ac310bf5402 100644 --- a/clang/include/clang/AST/RecursiveASTVisitor.h +++ b/clang/include/clang/AST/RecursiveASTVisitor.h @@ -2561,6 +2561,7 @@ DEF_TRAVERSE_STMT(DefaultStmt, {}) DEF_TRAVERSE_STMT(DoStmt, {}) DEF_TRAVERSE_STMT(ForStmt, {}) DEF_TRAVERSE_STMT(GotoStmt, {}) +DEF_TRAVERSE_STMT(DeferStmt, {}) DEF_TRAVERSE_STMT(IfStmt, {}) DEF_TRAVERSE_STMT(IndirectGotoStmt, {}) DEF_TRAVERSE_STMT(LabelStmt, {}) diff --git a/clang/include/clang/AST/Stmt.h b/clang/include/clang/AST/Stmt.h index e1cca34d2212c..d56de08eaf279 100644 --- a/clang/include/clang/AST/Stmt.h +++ b/clang/include/clang/AST/Stmt.h @@ -317,6 +317,16 @@ class alignas(void *) Stmt { SourceLocation KeywordLoc; }; + class DeferStmtBitfields { + friend class DeferStmt; + + LLVM_PREFERRED_TYPE(StmtBitfields) + unsigned : NumStmtBits; + + /// The location of the "defer". + SourceLocation DeferLoc; + }; + //===--- Expression bitfields classes ---===// class ExprBitfields { @@ -1318,6 +1328,7 @@ class alignas(void *) Stmt { LoopControlStmtBitfields LoopControlStmtBits; ReturnStmtBitfields ReturnStmtBits; SwitchCaseBitfields SwitchCaseBits; + DeferStmtBitfields DeferStmtBits; // Expressions ExprBitfields ExprBits; @@ -3211,6 +3222,47 @@ class ReturnStmt final } }; +/// DeferStmt - This represents a deferred statement. +class DeferStmt : public Stmt { + friend class ASTStmtReader; + + /// The deferred statement. + Stmt *Body; + + DeferStmt(EmptyShell Empty); + DeferStmt(SourceLocation DeferLoc, Stmt *Body); + +public: + static DeferStmt *CreateEmpty(ASTContext &Context, EmptyShell Empty); + static DeferStmt *Create(ASTContext &Context, SourceLocation DeferLoc, + Stmt *Body); + + SourceLocation getDeferLoc() const { return DeferStmtBits.DeferLoc; } + void setDeferLoc(SourceLocation DeferLoc) { + DeferStmtBits.DeferLoc = DeferLoc; + } + + Stmt *getBody() { return Body; } + const Stmt *getBody() const { return Body; } + void setBody(Stmt *S) { + assert(S && "defer body must not be null"); + Body = S; + } + + SourceLocation getBeginLoc() const { return getDeferLoc(); } + SourceLocation getEndLoc() const { return Body->getEndLoc(); } + + child_range children() { return child_range(&Body, &Body + 1); } + + const_child_range children() const { + return const_child_range(&Body, &Body + 1); + } + + static bool classof(const Stmt *S) { + return S->getStmtClass() == DeferStmtClass; + } +}; + /// AsmStmt is the base class for GCCAsmStmt and MSAsmStmt. class AsmStmt : public Stmt { protected: diff --git a/clang/include/clang/Basic/DiagnosticParseKinds.td b/clang/include/clang/Basic/DiagnosticParseKinds.td index 9401377002223..442a90ec2472d 100644 --- a/clang/include/clang/Basic/DiagnosticParseKinds.td +++ b/clang/include/clang/Basic/DiagnosticParseKinds.td @@ -350,6 +350,8 @@ def err_address_of_label_outside_fn : Error< "use of address-of-label extension outside of a function body">; def err_asm_operand_wide_string_literal : Error< "cannot use %select{unicode|wide}0 string literal in 'asm'">; +def err_defer_ts_labeled_stmt : Error< + "substatement of defer must not be a label">; def err_asm_expected_string : Error< "expected string literal %select{or parenthesized constant expression |}0in 'asm'">; diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 127818ec5767b..5444071fa8261 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -6844,6 +6844,7 @@ def note_protected_by_objc_weak_init : Note< "jump bypasses initialization of __weak variable">; def note_protected_by_non_trivial_c_struct_init : Note< "jump bypasses initialization of variable of non-trivial C struct type">; +def note_protected_by_defer_stmt : Note<"jump bypasses defer statement">; def note_enters_block_captures_cxx_obj : Note< "jump enters lifetime of block which captures a destructible C++ object">; def note_enters_block_captures_strong : Note< @@ -6857,6 +6858,7 @@ def note_enters_compound_literal_scope : Note< "jump enters lifetime of a compound literal that is non-trivial to destruct">; def note_enters_statement_expression : Note< "jump enters a statement expression">; +def note_enters_defer_stmt : Note<"jump enters a defer statement">; def note_exits_cleanup : Note< "jump exits scope of variable with __attribute__((cleanup))">; @@ -6902,6 +6904,16 @@ def note_exits_block_captures_non_trivial_c_struct : Note< "to destroy">; def note_exits_compound_literal_scope : Note< "jump exits lifetime of a compound literal that is non-trivial to destruct">; +def note_exits_defer_stmt : Note<"jump exits a defer statement">; +def err_jump_out_of_defer_stmt : Error< + "cannot %enum_select{" + "%Break{break out of a}|" + "%Continue{continue loop outside of enclosing}|" + "%Return{return from a}|" + "%SEHLeave{__leave a}" + "}0 defer statement">; +def err_defer_invalid_sjlj : Error< + "cannot use %0 inside a defer statement">; def err_func_returning_qualified_void : ExtWarn< "function cannot return qualified void type %0">, @@ -11016,6 +11028,8 @@ def err_switch_explicit_conversion : Error< def err_switch_incomplete_class_type : Error< "switch condition has incomplete class type %0">; +// TODO: It ought to be possible to refactor these to be a single warning that +// uses %enum_select. def warn_empty_if_body : Warning< "if statement has empty body">, InGroup; def warn_empty_for_body : Warning< @@ -11026,6 +11040,8 @@ def warn_empty_while_body : Warning< "while loop has empty body">, InGroup; def warn_empty_switch_body : Warning< "switch statement has empty body">, InGroup; +def warn_empty_defer_body : Warning< + "defer statement has empty body">, InGroup; def note_empty_body_on_separate_line : Note< "put the semicolon on a separate line to silence this warning">; diff --git a/clang/include/clang/Basic/IdentifierTable.h b/clang/include/clang/Basic/IdentifierTable.h index b27492d19a65b..043c184323876 100644 --- a/clang/include/clang/Basic/IdentifierTable.h +++ b/clang/include/clang/Basic/IdentifierTable.h @@ -77,7 +77,8 @@ enum TokenKey : unsigned { KEYNOZOS = 0x4000000, KEYHLSL = 0x8000000, KEYFIXEDPOINT = 0x10000000, - KEYMAX = KEYFIXEDPOINT, // The maximum key + KEYDEFERTS = 0x20000000, + KEYMAX = KEYDEFERTS, // The maximum key KEYALLCXX = KEYCXX | KEYCXX11 | KEYCXX20, KEYALL = (KEYMAX | (KEYMAX - 1)) & ~KEYNOMS18 & ~KEYNOOPENCL & ~KEYNOZOS // KEYNOMS18, KEYNOOPENCL, KEYNOZOS are excluded. diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def index bd3fb665b4a8d..2f0359c597613 100644 --- a/clang/include/clang/Basic/LangOptions.def +++ b/clang/include/clang/Basic/LangOptions.def @@ -194,6 +194,7 @@ LANGOPT(NoSignedZero , 1, 0, Benign, "Permit Floating Point optimization wi LANGOPT(AllowRecip , 1, 0, Benign, "Permit Floating Point reciprocal") LANGOPT(ApproxFunc , 1, 0, Benign, "Permit Floating Point approximation") LANGOPT(NamedLoops , 1, 0, Benign, "Permit named break/continue") +LANGOPT(DeferTS , 1, 0, Benign, "C '_Defer' Technical Specification") ENUM_LANGOPT(ComplexRange, ComplexRangeKind, 3, CX_None, NotCompatible, "Enable use of range reduction for complex arithmetics.") @@ -515,6 +516,8 @@ LANGOPT(BoundsSafety, 1, 0, NotCompatible, "Bounds safety extension for C") LANGOPT(EnableLifetimeSafety, 1, 0, NotCompatible, "Experimental lifetime safety analysis for C++") +LANGOPT(EnableLifetimeSafetyInference, 1, 0, NotCompatible, "Experimental lifetime safety inference analysis for C++") + LANGOPT(PreserveVec3Type, 1, 0, NotCompatible, "Preserve 3-component vector type") #undef LANGOPT diff --git a/clang/include/clang/Basic/StmtNodes.td b/clang/include/clang/Basic/StmtNodes.td index bf3686bb372d5..2d740425a3cb0 100644 --- a/clang/include/clang/Basic/StmtNodes.td +++ b/clang/include/clang/Basic/StmtNodes.td @@ -17,6 +17,7 @@ def ForStmt : StmtNode; def GotoStmt : StmtNode; def IndirectGotoStmt : StmtNode; def ReturnStmt : StmtNode; +def DeferStmt : StmtNode; def DeclStmt : StmtNode; def SwitchCase : StmtNode; def CaseStmt : StmtNode; diff --git a/clang/include/clang/Basic/TokenKinds.def b/clang/include/clang/Basic/TokenKinds.def index 564d6010181cc..8240d395d3e8f 100644 --- a/clang/include/clang/Basic/TokenKinds.def +++ b/clang/include/clang/Basic/TokenKinds.def @@ -293,6 +293,7 @@ PUNCTUATOR(greatergreatergreater, ">>>") // CHAR8SUPPORT - This is a keyword if 'char8_t' is a built-in type // KEYFIXEDPOINT - This is a keyword according to the N1169 fixed point // extension. +// KEYDEFERTS - This is a keyword if the C '_Defer' TS is enabled // KEYZOS - This is a keyword in C/C++ on z/OS // KEYWORD(auto , KEYALL) @@ -441,6 +442,9 @@ KEYWORD(_Float16 , KEYALL) C23_KEYWORD(typeof , KEYGNU) C23_KEYWORD(typeof_unqual , 0) +// '_Defer' TS +KEYWORD(_Defer , KEYDEFERTS) + // ISO/IEC JTC1 SC22 WG14 N1169 Extension KEYWORD(_Accum , KEYFIXEDPOINT) KEYWORD(_Fract , KEYFIXEDPOINT) diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td index c4ad6a0c0732c..74e0860762ec6 100644 --- a/clang/include/clang/CIR/Dialect/IR/CIROps.td +++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td @@ -1080,6 +1080,12 @@ def CIR_SwitchOp : CIR_Op<"switch", [ conditionally executing multiple regions of code. The operand to an switch is an integral condition value. + Besides taking an integer condition and CIR regions, it also accepts an + `all_enum_cases_covered` attribute indicating whether all enum cases are + handled by the operation. Note that the presence of a default CaseOp does + not imply `all_enum_cases_covered`. The original AST switch must explicitly list + every enum case. + The set of `cir.case` operations and their enclosing `cir.switch` represent the semantics of a C/C++ switch statement. Users can use `collectCases(llvm::SmallVector &cases)` to collect the `cir.case` @@ -1206,7 +1212,10 @@ def CIR_SwitchOp : CIR_Op<"switch", [ ``` }]; - let arguments = (ins CIR_IntType:$condition); + let arguments = (ins + CIR_IntType:$condition, + UnitAttr:$allEnumCasesCovered + ); let regions = (region AnyRegion:$body); @@ -1217,9 +1226,9 @@ def CIR_SwitchOp : CIR_Op<"switch", [ ]; let assemblyFormat = [{ - custom( - $body, $condition, type($condition) - ) + `(` $condition `:` qualified(type($condition)) `)` + (`allEnumCasesCovered` $allEnumCasesCovered^)? + $body attr-dict }]; diff --git a/clang/include/clang/Options/Options.td b/clang/include/clang/Options/Options.td index 84ae211bed0a4..a6b2cee072413 100644 --- a/clang/include/clang/Options/Options.td +++ b/clang/include/clang/Options/Options.td @@ -1677,6 +1677,14 @@ defm named_loops PosFlag, NegFlag>; +// C '_Defer' TS +defm defer_ts : BoolFOption<"defer-ts", + LangOpts<"DeferTS">, DefaultFalse, + PosFlag, + NegFlag>, + ShouldParseIf; + // C++ Coroutines defm coroutines : BoolFOption<"coroutines", LangOpts<"Coroutines">, Default, @@ -1962,6 +1970,14 @@ defm lifetime_safety : BoolFOption< BothFlags<[], [CC1Option], " experimental lifetime safety for C++">>; +defm lifetime_safety_inference + : BoolFOption<"experimental-lifetime-safety-inference", + LangOpts<"EnableLifetimeSafetyInference">, DefaultFalse, + PosFlag, + NegFlag, + BothFlags<[], [CC1Option], + " experimental lifetime safety inference for C++">>; + defm addrsig : BoolFOption<"addrsig", CodeGenOpts<"Addrsig">, DefaultFalse, PosFlag, diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h index 58eb1c0a7c114..47eedf216a44b 100644 --- a/clang/include/clang/Parse/Parser.h +++ b/clang/include/clang/Parse/Parser.h @@ -7500,6 +7500,16 @@ class Parser : public CodeCompletionHandler { StmtResult ParseBreakOrContinueStatement(bool IsContinue); + /// ParseDeferStatement + /// \verbatim + /// defer-statement: + /// '_Defer' deferred-block + /// + /// deferred-block: + /// unlabeled-statement + /// \endverbatim + StmtResult ParseDeferStatement(SourceLocation *TrailingElseLoc); + StmtResult ParsePragmaLoopHint(StmtVector &Stmts, ParsedStmtContext StmtCtx, SourceLocation *TrailingElseLoc, ParsedAttributes &Attrs, diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index d14b5dc5ffaa4..97b6bb3d1b3a8 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -10935,6 +10935,10 @@ class Sema final : public SemaBase { /// Stack of active SEH __finally scopes. Can be empty. SmallVector CurrentSEHFinally; + /// Stack of '_Defer' statements that are currently being parsed, as well + /// as the locations of their '_Defer' keywords. Can be empty. + SmallVector, 2> CurrentDefer; + StmtResult ActOnExprStmt(ExprResult Arg, bool DiscardedValue = true); StmtResult ActOnExprStmtError(); @@ -11081,6 +11085,10 @@ class Sema final : public SemaBase { StmtResult ActOnBreakStmt(SourceLocation BreakLoc, Scope *CurScope, LabelDecl *Label, SourceLocation LabelLoc); + void ActOnStartOfDeferStmt(SourceLocation DeferLoc, Scope *CurScope); + void ActOnDeferStmtError(Scope *CurScope); + StmtResult ActOnEndOfDeferStmt(Stmt *Body, Scope *CurScope); + struct NamedReturnInfo { const VarDecl *Candidate; diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h index 0f868c1c7c5da..f6e08d7a8a995 100644 --- a/clang/include/clang/Serialization/ASTBitCodes.h +++ b/clang/include/clang/Serialization/ASTBitCodes.h @@ -2061,6 +2061,7 @@ enum StmtCode { // HLSL Constructs EXPR_HLSL_OUT_ARG, + STMT_DEFER, }; /// The kinds of designators that can occur in a diff --git a/clang/lib/AST/Stmt.cpp b/clang/lib/AST/Stmt.cpp index 11ece494490de..10aacd75a650a 100644 --- a/clang/lib/AST/Stmt.cpp +++ b/clang/lib/AST/Stmt.cpp @@ -1499,3 +1499,19 @@ const Stmt *LoopControlStmt::getNamedLoopOrSwitch() const { return nullptr; return getLabelDecl()->getStmt()->getInnermostLabeledStmt(); } + +DeferStmt::DeferStmt(EmptyShell Empty) : Stmt(DeferStmtClass, Empty) {} +DeferStmt::DeferStmt(SourceLocation DeferLoc, Stmt *Body) + : Stmt(DeferStmtClass) { + setDeferLoc(DeferLoc); + setBody(Body); +} + +DeferStmt *DeferStmt::CreateEmpty(ASTContext &Context, EmptyShell Empty) { + return new (Context) DeferStmt(Empty); +} + +DeferStmt *DeferStmt::Create(ASTContext &Context, SourceLocation DeferLoc, + Stmt *Body) { + return new (Context) DeferStmt(DeferLoc, Body); +} diff --git a/clang/lib/AST/StmtPrinter.cpp b/clang/lib/AST/StmtPrinter.cpp index ff8ca01ec5477..9bc5ee0c7f40e 100644 --- a/clang/lib/AST/StmtPrinter.cpp +++ b/clang/lib/AST/StmtPrinter.cpp @@ -491,6 +491,11 @@ void StmtPrinter::VisitBreakStmt(BreakStmt *Node) { if (Policy.IncludeNewlines) OS << NL; } +void StmtPrinter::VisitDeferStmt(DeferStmt *Node) { + Indent() << "_Defer"; + PrintControlledStmt(Node->getBody()); +} + void StmtPrinter::VisitReturnStmt(ReturnStmt *Node) { Indent() << "return"; if (Node->getRetValue()) { diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp index 4a8c638c85331..b6395a17547f7 100644 --- a/clang/lib/AST/StmtProfile.cpp +++ b/clang/lib/AST/StmtProfile.cpp @@ -323,6 +323,8 @@ void StmtProfiler::VisitReturnStmt(const ReturnStmt *S) { VisitStmt(S); } +void StmtProfiler::VisitDeferStmt(const DeferStmt *S) { VisitStmt(S); } + void StmtProfiler::VisitGCCAsmStmt(const GCCAsmStmt *S) { VisitStmt(S); ID.AddBoolean(S->isVolatile()); diff --git a/clang/lib/Analysis/LifetimeSafety/Checker.cpp b/clang/lib/Analysis/LifetimeSafety/Checker.cpp index 74792768e2c57..99071d6b46c1e 100644 --- a/clang/lib/Analysis/LifetimeSafety/Checker.cpp +++ b/clang/lib/Analysis/LifetimeSafety/Checker.cpp @@ -55,13 +55,14 @@ class LifetimeChecker { const LiveOriginsAnalysis &LiveOrigins; const FactManager &FactMgr; LifetimeSafetyReporter *Reporter; + ASTContext &AST; public: LifetimeChecker(const LoanPropagationAnalysis &LoanPropagation, const LiveOriginsAnalysis &LiveOrigins, const FactManager &FM, AnalysisDeclContext &ADC, LifetimeSafetyReporter *Reporter) : LoanPropagation(LoanPropagation), LiveOrigins(LiveOrigins), FactMgr(FM), - Reporter(Reporter) { + Reporter(Reporter), AST(ADC.getASTContext()) { for (const CFGBlock *B : *ADC.getAnalysis()) for (const Fact *F : FactMgr.getFacts(B)) if (const auto *EF = F->getAs()) @@ -70,6 +71,11 @@ class LifetimeChecker { checkAnnotations(OEF); issuePendingWarnings(); suggestAnnotations(); + // Annotation inference is currently guarded by a frontend flag. In the + // future, this might be replaced by a design that differentiates between + // explicit and inferred findings with separate warning groups. + if (AST.getLangOpts().EnableLifetimeSafetyInference) + inferAnnotations(); } /// Checks if an escaping origin holds a placeholder loan, indicating a @@ -160,6 +166,20 @@ class LifetimeChecker { for (const auto &[PVD, EscapeExpr] : AnnotationWarningsMap) Reporter->suggestAnnotation(PVD, EscapeExpr); } + + void inferAnnotations() { + // FIXME: To maximise inference propagation, functions should be analyzed in + // post-order of the call graph, allowing inferred annotations to propagate + // through the call chain + // FIXME: Add the inferred attribute to all redeclarations of the function, + // not just the definition being analyzed. + for (const auto &[ConstPVD, EscapeExpr] : AnnotationWarningsMap) { + ParmVarDecl *PVD = const_cast(ConstPVD); + if (!PVD->hasAttr()) + PVD->addAttr( + LifetimeBoundAttr::CreateImplicit(AST, PVD->getLocation())); + } + } }; } // namespace diff --git a/clang/lib/Basic/IdentifierTable.cpp b/clang/lib/Basic/IdentifierTable.cpp index d1c959b9687c4..9b4019834c4be 100644 --- a/clang/lib/Basic/IdentifierTable.cpp +++ b/clang/lib/Basic/IdentifierTable.cpp @@ -164,6 +164,8 @@ static KeywordStatus getKeywordStatusHelper(const LangOptions &LangOpts, return KS_Unknown; case KEYFIXEDPOINT: return LangOpts.FixedPoint ? KS_Enabled : KS_Disabled; + case KEYDEFERTS: + return LangOpts.DeferTS ? KS_Enabled : KS_Disabled; default: llvm_unreachable("Unknown KeywordStatus flag"); } diff --git a/clang/lib/Basic/Targets/PPC.h b/clang/lib/Basic/Targets/PPC.h index 8313826d88500..664c9e15d8d18 100644 --- a/clang/lib/Basic/Targets/PPC.h +++ b/clang/lib/Basic/Targets/PPC.h @@ -368,7 +368,7 @@ class LLVM_LIBRARY_VISIBILITY PPCTargetInfo : public TargetInfo { bool supportsCpuSupports() const override { llvm::Triple Triple = getTriple(); // AIX 7.2 is the minimum requirement to support __builtin_cpu_supports(). - return Triple.isOSGlibc() || + return Triple.isOSGlibc() || Triple.isMusl() || (Triple.isOSAIX() && !Triple.isOSVersionLT(MINIMUM_AIX_OS_MAJOR, MINIMUM_AIX_OS_MINOR)); } @@ -376,7 +376,7 @@ class LLVM_LIBRARY_VISIBILITY PPCTargetInfo : public TargetInfo { bool supportsCpuIs() const override { llvm::Triple Triple = getTriple(); // AIX 7.2 is the minimum requirement to support __builtin_cpu_is(). - return Triple.isOSGlibc() || + return Triple.isOSGlibc() || Triple.isMusl() || (Triple.isOSAIX() && !Triple.isOSVersionLT(MINIMUM_AIX_OS_MAJOR, MINIMUM_AIX_OS_MINOR)); } diff --git a/clang/lib/CIR/CodeGen/CIRGenStmt.cpp b/clang/lib/CIR/CodeGen/CIRGenStmt.cpp index f13e7cb32c71e..b7bd405bf4df4 100644 --- a/clang/lib/CIR/CodeGen/CIRGenStmt.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenStmt.cpp @@ -1105,6 +1105,8 @@ mlir::LogicalResult CIRGenFunction::emitSwitchStmt(const clang::SwitchStmt &s) { terminateBody(builder, caseOp.getCaseRegion(), caseOp.getLoc()); terminateBody(builder, swop.getBody(), swop.getLoc()); + swop.setAllEnumCasesCovered(s.isAllEnumCasesCovered()); + return res; } diff --git a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp index 8077dc6597047..d888fdcf081e7 100644 --- a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp +++ b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp @@ -1359,44 +1359,6 @@ void cir::CaseOp::build(OpBuilder &builder, OperationState &result, // SwitchOp //===----------------------------------------------------------------------===// -static ParseResult parseSwitchOp(OpAsmParser &parser, mlir::Region ®ions, - mlir::OpAsmParser::UnresolvedOperand &cond, - mlir::Type &condType) { - cir::IntType intCondType; - - if (parser.parseLParen()) - return mlir::failure(); - - if (parser.parseOperand(cond)) - return mlir::failure(); - if (parser.parseColon()) - return mlir::failure(); - if (parser.parseCustomTypeWithFallback(intCondType)) - return mlir::failure(); - condType = intCondType; - - if (parser.parseRParen()) - return mlir::failure(); - if (parser.parseRegion(regions, /*arguments=*/{}, /*argTypes=*/{})) - return failure(); - - return mlir::success(); -} - -static void printSwitchOp(OpAsmPrinter &p, cir::SwitchOp op, - mlir::Region &bodyRegion, mlir::Value condition, - mlir::Type condType) { - p << "("; - p << condition; - p << " : "; - p.printStrippedAttrOrType(condType); - p << ")"; - - p << ' '; - p.printRegion(bodyRegion, /*printEntryBlockArgs=*/false, - /*printBlockTerminators=*/true); -} - void cir::SwitchOp::getSuccessorRegions( mlir::RegionBranchPoint point, SmallVectorImpl ®ion) { if (!point.isParent()) { diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp index 5643c58ca52f4..6e30ace8c791c 100644 --- a/clang/lib/CodeGen/CGStmt.cpp +++ b/clang/lib/CodeGen/CGStmt.cpp @@ -1190,6 +1190,7 @@ void CodeGenFunction::EmitStmt(const Stmt *S, ArrayRef Attrs) { case Stmt::ContinueStmtClass: case Stmt::DefaultStmtClass: case Stmt::CaseStmtClass: + case Stmt::DeferStmtClass: case Stmt::SEHLeaveStmtClass: case Stmt::SYCLKernelCallStmtClass: llvm_unreachable("should have emitted these statements as simple"); @@ -1616,6 +1617,9 @@ bool CodeGenFunction::EmitSimpleStmt(const Stmt *S, case Stmt::CaseStmtClass: EmitCaseStmt(cast(*S), Attrs); break; + case Stmt::DeferStmtClass: + EmitDeferStmt(cast(*S)); + break; case Stmt::SEHLeaveStmtClass: EmitSEHLeaveStmt(cast(*S)); break; @@ -3264,6 +3268,87 @@ void CodeGenFunction::EmitDefaultStmt(const DefaultStmt &S, EmitStmt(S.getSubStmt()); } +namespace { +struct EmitDeferredStatement final : EHScopeStack::Cleanup { + const DeferStmt &Stmt; + EmitDeferredStatement(const DeferStmt *Stmt) : Stmt(*Stmt) {} + + void Emit(CodeGenFunction &CGF, Flags) override { + // Take care that any cleanups pushed by the body of a '_Defer' statement + // don't clobber the current cleanup slot value. + // + // Assume we have a scope that pushes a cleanup; when that scope is exited, + // we need to run that cleanup; this is accomplished by emitting the cleanup + // into a separate block and then branching to that block at scope exit. + // + // Where this gets complicated is if we exit the scope in multiple different + // ways; e.g. in a 'for' loop, we may exit the scope of its body by falling + // off the end (in which case we need to run the cleanup and then branch to + // the increment), or by 'break'ing out of the loop (in which case we need + // to run the cleanup and then branch to the loop exit block); in both cases + // we first branch to the cleanup block to run the cleanup, but the block we + // need to jump to *after* running the cleanup is different. + // + // This is accomplished using a local integer variable called the 'cleanup + // slot': before branching to the cleanup block, we store a value into that + // slot. Then, in the cleanup block, after running the cleanup, we load the + // value of that variable and 'switch' on it to branch to the appropriate + // continuation block. + // + // The problem that arises once '_Defer' statements are involved is that the + // body of a '_Defer' is an arbitrary statement which itself can create more + // cleanups. This means we may end up overwriting the cleanup slot before we + // ever have a chance to 'switch' on it, which means that once we *do* get + // to the 'switch', we end up in whatever block the cleanup code happened to + // pick as the default 'switch' exit label! + // + // That is, what is normally supposed to happen is something like: + // + // 1. Store 'X' to cleanup slot. + // 2. Branch to cleanup block. + // 3. Execute cleanup. + // 4. Read value from cleanup slot. + // 5. Branch to the block associated with 'X'. + // + // But if we encounter a _Defer' statement that contains a cleanup, then + // what might instead happen is: + // + // 1. Store 'X' to cleanup slot. + // 2. Branch to cleanup block. + // 3. Execute cleanup; this ends up pushing another cleanup, so: + // 3a. Store 'Y' to cleanup slot. + // 3b. Run steps 2–5 recursively. + // 4. Read value from cleanup slot, which is now 'Y' instead of 'X'. + // 5. Branch to the block associated with 'Y'... which doesn't even + // exist because the value 'Y' is only meaningful for the inner + // cleanup. The result is we just branch 'somewhere random'. + // + // The rest of the cleanup code simply isn't prepared to handle this case + // because most other cleanups can't push more cleanups, and thus, emitting + // other cleanups generally cannot clobber the cleanup slot. + // + // To prevent this from happening, save the current cleanup slot value and + // restore it after emitting the '_Defer' statement. + llvm::Value *SavedCleanupDest = nullptr; + if (CGF.NormalCleanupDest.isValid()) + SavedCleanupDest = + CGF.Builder.CreateLoad(CGF.NormalCleanupDest, "cleanup.dest.saved"); + + CGF.EmitStmt(Stmt.getBody()); + + if (SavedCleanupDest && CGF.HaveInsertPoint()) + CGF.Builder.CreateStore(SavedCleanupDest, CGF.NormalCleanupDest); + + // Cleanups must end with an insert point. + CGF.EnsureInsertPoint(); + } +}; +} // namespace + +void CodeGenFunction::EmitDeferStmt(const DeferStmt &S) { + EHStack.pushCleanup(NormalAndEHCleanup, &S); +} + /// CollectStatementsForCase - Given the body of a 'switch' statement and a /// constant value that is being switched on, see if we can dead code eliminate /// the body of the switch to a simple series of statements to emit. Basically, diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 5e028371eee6d..a8b9b0d109357 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -3689,6 +3689,7 @@ class CodeGenFunction : public CodeGenTypeCache { void EmitDefaultStmt(const DefaultStmt &S, ArrayRef Attrs); void EmitCaseStmt(const CaseStmt &S, ArrayRef Attrs); void EmitCaseStmtRange(const CaseStmt &S, ArrayRef Attrs); + void EmitDeferStmt(const DeferStmt &S); void EmitAsmStmt(const AsmStmt &S); const BreakContinue *GetDestForLoopControlStmt(const LoopControlStmt &S); diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index e55df74244234..22cf31b7d30c3 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -382,15 +382,11 @@ static void checkDataLayoutConsistency(const TargetInfo &Target, Check("bfloat", llvm::Type::getBFloatTy(Context), Target.BFloat16Align); Check("float", llvm::Type::getFloatingPointTy(Context, *Target.FloatFormat), Target.FloatAlign); - // FIXME: AIX specifies wrong double alignment in DataLayout - if (!Triple.isOSAIX()) { - Check("double", - llvm::Type::getFloatingPointTy(Context, *Target.DoubleFormat), - Target.DoubleAlign); - Check("long double", - llvm::Type::getFloatingPointTy(Context, *Target.LongDoubleFormat), - Target.LongDoubleAlign); - } + Check("double", llvm::Type::getFloatingPointTy(Context, *Target.DoubleFormat), + Target.DoubleAlign); + Check("long double", + llvm::Type::getFloatingPointTy(Context, *Target.LongDoubleFormat), + Target.LongDoubleAlign); if (Target.hasFloat128Type()) Check("__float128", llvm::Type::getFP128Ty(Context), Target.Float128Align); if (Target.hasIbm128Type()) diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 613438dcacee5..6b5f536f59d34 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -7178,6 +7178,10 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, types::isCXX(InputType)) CmdArgs.push_back("-fcoro-aligned-allocation"); + if (Args.hasFlag(options::OPT_fdefer_ts, options::OPT_fno_defer_ts, + /*Default=*/false)) + CmdArgs.push_back("-fdefer-ts"); + Args.AddLastArg(CmdArgs, options::OPT_fdouble_square_bracket_attributes, options::OPT_fno_double_square_bracket_attributes); diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp index a54f863754046..3b9475a646452 100644 --- a/clang/lib/Frontend/InitPreprocessor.cpp +++ b/clang/lib/Frontend/InitPreprocessor.cpp @@ -498,6 +498,11 @@ static void InitializeStandardPredefinedMacros(const TargetInfo &TI, Builder.defineMacro("__STDC_EMBED_EMPTY__", llvm::itostr(static_cast(EmbedResult::Empty))); + // We define this to '1' here to indicate that we only support '_Defer' + // as a keyword. + if (LangOpts.DeferTS) + Builder.defineMacro("__STDC_DEFER_TS25755__", "1"); + if (LangOpts.ObjC) Builder.defineMacro("__OBJC__"); diff --git a/clang/lib/Headers/stddefer.h b/clang/lib/Headers/stddefer.h new file mode 100644 index 0000000000000..162876ddfa395 --- /dev/null +++ b/clang/lib/Headers/stddefer.h @@ -0,0 +1,19 @@ +/*===---- stddefer.h - Standard header for 'defer' -------------------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __CLANG_STDDEFER_H +#define __CLANG_STDDEFER_H + +/* Provide 'defer' if '_Defer' is supported. */ +#ifdef __STDC_DEFER_TS25755__ +#define __STDC_VERSION_STDDEFER_H__ 202602L +#define defer _Defer +#endif + +#endif /* __CLANG_STDDEFER_H */ diff --git a/clang/lib/Parse/ParseStmt.cpp b/clang/lib/Parse/ParseStmt.cpp index 7e73d89c2a18c..78ce4b76d29ae 100644 --- a/clang/lib/Parse/ParseStmt.cpp +++ b/clang/lib/Parse/ParseStmt.cpp @@ -28,6 +28,7 @@ #include "clang/Sema/SemaOpenMP.h" #include "clang/Sema/TypoCorrection.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/ScopeExit.h" #include using namespace clang; @@ -312,6 +313,8 @@ StmtResult Parser::ParseStatementOrDeclarationAfterAttributes( Res = ParseReturnStatement(); SemiError = "co_return"; break; + case tok::kw__Defer: // C defer TS: defer-statement + return ParseDeferStatement(TrailingElseLoc); case tok::kw_asm: { for (const ParsedAttr &AL : CXX11Attrs) @@ -2370,6 +2373,29 @@ StmtResult Parser::ParseReturnStatement() { return Actions.ActOnReturnStmt(ReturnLoc, R.get(), getCurScope()); } +StmtResult Parser::ParseDeferStatement(SourceLocation *TrailingElseLoc) { + assert(Tok.is(tok::kw__Defer)); + SourceLocation DeferLoc = ConsumeToken(); + + Actions.ActOnStartOfDeferStmt(DeferLoc, getCurScope()); + + auto OnError = llvm::make_scope_exit( + [&] { Actions.ActOnDeferStmtError(getCurScope()); }); + + StmtResult Res = ParseStatement(TrailingElseLoc); + if (!Res.isUsable()) + return StmtError(); + + // The grammar specifically calls for an unlabeled-statement here. + if (auto *L = dyn_cast(Res.get())) { + Diag(L->getIdentLoc(), diag::err_defer_ts_labeled_stmt); + return StmtError(); + } + + OnError.release(); + return Actions.ActOnEndOfDeferStmt(Res.get(), getCurScope()); +} + StmtResult Parser::ParsePragmaLoopHint(StmtVector &Stmts, ParsedStmtContext StmtCtx, SourceLocation *TrailingElseLoc, diff --git a/clang/lib/Sema/JumpDiagnostics.cpp b/clang/lib/Sema/JumpDiagnostics.cpp index 2c6ae89513241..1dcbf7dd1eb90 100644 --- a/clang/lib/Sema/JumpDiagnostics.cpp +++ b/clang/lib/Sema/JumpDiagnostics.cpp @@ -595,6 +595,27 @@ void JumpScopeChecker::BuildScopeInformation(Stmt *S, break; } + case Stmt::DeferStmtClass: { + auto *D = cast(S); + + { + // Disallow jumps over defer statements. + unsigned NewParentScope = Scopes.size(); + Scopes.emplace_back(ParentScope, diag::note_protected_by_defer_stmt, 0, + D->getDeferLoc()); + origParentScope = NewParentScope; + } + + // Disallow jumps into or out of defer statements. + { + unsigned NewParentScope = Scopes.size(); + Scopes.emplace_back(ParentScope, diag::note_enters_defer_stmt, + diag::note_exits_defer_stmt, D->getDeferLoc()); + BuildScopeInformation(D->getBody(), NewParentScope); + } + return; + } + case Stmt::CaseStmtClass: case Stmt::DefaultStmtClass: case Stmt::LabelStmtClass: @@ -977,7 +998,7 @@ void JumpScopeChecker::CheckJump(Stmt *From, Stmt *To, SourceLocation DiagLoc, // Common case: exactly the same scope, which is fine. if (FromScope == ToScope) return; - // Warn on gotos out of __finally blocks. + // Warn on gotos out of __finally blocks and defer statements. if (isa(From) || isa(From)) { // If FromScope > ToScope, FromScope is more nested and the jump goes to a // less nested scope. Check if it crosses a __finally along the way. @@ -995,6 +1016,10 @@ void JumpScopeChecker::CheckJump(Stmt *From, Stmt *To, SourceLocation DiagLoc, S.Diag(From->getBeginLoc(), diag::err_goto_into_protected_scope); S.Diag(Scopes[I].Loc, diag::note_acc_branch_out_of_compute_construct); return; + } else if (Scopes[I].OutDiag == diag::note_exits_defer_stmt) { + S.Diag(From->getBeginLoc(), diag::err_goto_into_protected_scope); + S.Diag(Scopes[I].Loc, diag::note_exits_defer_stmt); + return; } } } diff --git a/clang/lib/Sema/SemaExceptionSpec.cpp b/clang/lib/Sema/SemaExceptionSpec.cpp index a0483c3027199..b5ff1dbd26d68 100644 --- a/clang/lib/Sema/SemaExceptionSpec.cpp +++ b/clang/lib/Sema/SemaExceptionSpec.cpp @@ -1538,6 +1538,7 @@ CanThrowResult Sema::canThrow(const Stmt *S) { case Stmt::SEHTryStmtClass: case Stmt::SwitchStmtClass: case Stmt::WhileStmtClass: + case Stmt::DeferStmtClass: return canSubStmtsThrow(*this, S); case Stmt::DeclStmtClass: { diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 39ffa69a4535a..70d4f4191267d 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -6877,6 +6877,34 @@ ExprResult Sema::BuildResolvedCallExpr(Expr *Fn, NamedDecl *NDecl, FunctionDecl *FDecl = dyn_cast_or_null(NDecl); unsigned BuiltinID = (FDecl ? FDecl->getBuiltinID() : 0); + auto IsSJLJ = [&] { + switch (BuiltinID) { + case Builtin::BI__builtin_longjmp: + case Builtin::BI__builtin_setjmp: + case Builtin::BI__sigsetjmp: + case Builtin::BI_longjmp: + case Builtin::BI_setjmp: + case Builtin::BIlongjmp: + case Builtin::BIsetjmp: + case Builtin::BIsiglongjmp: + case Builtin::BIsigsetjmp: + return true; + default: + return false; + } + }; + + // Forbid any call to setjmp/longjmp and friends inside a '_Defer' statement. + if (!CurrentDefer.empty() && IsSJLJ()) { + // Note: If we ever start supporting '_Defer' in C++ we'll have to check + // for more than just blocks (e.g. lambdas, nested classes...). + Scope *DeferParent = CurrentDefer.back().first; + Scope *Block = CurScope->getBlockParent(); + if (DeferParent->Contains(*CurScope) && + (!Block || !DeferParent->Contains(*Block))) + Diag(Fn->getExprLoc(), diag::err_defer_invalid_sjlj) << FDecl; + } + // Functions with 'interrupt' attribute cannot be called directly. if (FDecl) { if (FDecl->hasAttr()) { diff --git a/clang/lib/Sema/SemaStmt.cpp b/clang/lib/Sema/SemaStmt.cpp index 6bb1a27d1800c..1b1643250d05e 100644 --- a/clang/lib/Sema/SemaStmt.cpp +++ b/clang/lib/Sema/SemaStmt.cpp @@ -3267,12 +3267,23 @@ Sema::ActOnIndirectGotoStmt(SourceLocation GotoLoc, SourceLocation StarLoc, return new (Context) IndirectGotoStmt(GotoLoc, StarLoc, E); } -static void CheckJumpOutOfSEHFinally(Sema &S, SourceLocation Loc, - const Scope &DestScope) { +static void CheckJumpOutOfSEHFinallyOrDefer(Sema &S, SourceLocation Loc, + const Scope &DestScope, + unsigned DeferJumpKind) { if (!S.CurrentSEHFinally.empty() && DestScope.Contains(*S.CurrentSEHFinally.back())) { S.Diag(Loc, diag::warn_jump_out_of_seh_finally); } + + if (!S.CurrentDefer.empty()) { + Scope *Parent = S.CurrentDefer.back().first; + assert(Parent); + + // Note: We don't create a new scope for defer statements, so 'Parent' + // is actually the scope that contains the '_Defer'. + if (DestScope.Contains(*Parent) || &DestScope == Parent) + S.Diag(Loc, diag::err_jump_out_of_defer_stmt) << DeferJumpKind; + } } static Scope *FindLabeledBreakContinueScope(Sema &S, Scope *CurScope, @@ -3346,7 +3357,8 @@ StmtResult Sema::ActOnContinueStmt(SourceLocation ContinueLoc, Scope *CurScope, Diag(ContinueLoc, diag::err_acc_branch_in_out_compute_construct) << /*branch*/ 0 << /*out of */ 0); - CheckJumpOutOfSEHFinally(*this, ContinueLoc, *S); + CheckJumpOutOfSEHFinallyOrDefer(*this, ContinueLoc, *S, + diag::DeferJumpKind::Continue); return new (Context) ContinueStmt(ContinueLoc, LabelLoc, Target); } @@ -3387,7 +3399,8 @@ StmtResult Sema::ActOnBreakStmt(SourceLocation BreakLoc, Scope *CurScope, Diag(BreakLoc, diag::err_acc_branch_in_out_compute_construct) << /*branch*/ 0 << /*out of */ 0); - CheckJumpOutOfSEHFinally(*this, BreakLoc, *S); + CheckJumpOutOfSEHFinallyOrDefer(*this, BreakLoc, *S, + diag::DeferJumpKind::Break); return new (Context) BreakStmt(BreakLoc, LabelLoc, Target); } @@ -3932,11 +3945,30 @@ Sema::ActOnReturnStmt(SourceLocation ReturnLoc, Expr *RetValExp, CurScope->updateNRVOCandidate(VD); - CheckJumpOutOfSEHFinally(*this, ReturnLoc, *CurScope->getFnParent()); + CheckJumpOutOfSEHFinallyOrDefer(*this, ReturnLoc, *CurScope->getFnParent(), + diag::DeferJumpKind::Return); return R; } +void Sema::ActOnStartOfDeferStmt(SourceLocation DeferLoc, Scope *CurScope) { + CurrentDefer.emplace_back(CurScope, DeferLoc); +} + +void Sema::ActOnDeferStmtError([[maybe_unused]] Scope *CurScope) { + assert(!CurrentDefer.empty() && CurrentDefer.back().first == CurScope); + CurrentDefer.pop_back(); +} + +StmtResult Sema::ActOnEndOfDeferStmt(Stmt *Body, + [[maybe_unused]] Scope *CurScope) { + assert(!CurrentDefer.empty() && CurrentDefer.back().first == CurScope); + SourceLocation DeferLoc = CurrentDefer.pop_back_val().second; + DiagnoseEmptyStmtBody(DeferLoc, Body, diag::warn_empty_defer_body); + setFunctionHasBranchProtectedScope(); + return DeferStmt::Create(Context, DeferLoc, Body); +} + static bool CheckSimplerImplicitMovesMSVCWorkaround(const Sema &S, const Expr *E) { if (!E || !S.getLangOpts().CPlusPlus23 || !S.getLangOpts().MSVCCompat) @@ -4554,7 +4586,8 @@ Sema::ActOnSEHLeaveStmt(SourceLocation Loc, Scope *CurScope) { SEHTryParent = SEHTryParent->getParent(); if (!SEHTryParent) return StmtError(Diag(Loc, diag::err_ms___leave_not_in___try)); - CheckJumpOutOfSEHFinally(*this, Loc, *SEHTryParent); + CheckJumpOutOfSEHFinallyOrDefer(*this, Loc, *SEHTryParent, + diag::DeferJumpKind::SEHLeave); return new (Context) SEHLeaveStmt(Loc); } diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index 8e5dbeb792348..d5b6fdd7dc405 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -8552,6 +8552,14 @@ TreeTransform::TransformBreakStmt(BreakStmt *S) { BreakStmt(S->getKwLoc(), S->getLabelLoc(), cast(LD)); } +template +StmtResult TreeTransform::TransformDeferStmt(DeferStmt *S) { + StmtResult Result = getDerived().TransformStmt(S->getBody()); + if (!Result.isUsable()) + return StmtError(); + return DeferStmt::Create(getSema().Context, S->getDeferLoc(), Result.get()); +} + template StmtResult TreeTransform::TransformReturnStmt(ReturnStmt *S) { diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp index eef97a8588f0b..495517ccb31f3 100644 --- a/clang/lib/Serialization/ASTReaderStmt.cpp +++ b/clang/lib/Serialization/ASTReaderStmt.cpp @@ -335,6 +335,12 @@ void ASTStmtReader::VisitContinueStmt(ContinueStmt *S) { void ASTStmtReader::VisitBreakStmt(BreakStmt *S) { VisitLoopControlStmt(S); } +void ASTStmtReader::VisitDeferStmt(DeferStmt *S) { + VisitStmt(S); + S->setDeferLoc(readSourceLocation()); + S->setBody(Record.readSubStmt()); +} + void ASTStmtReader::VisitReturnStmt(ReturnStmt *S) { VisitStmt(S); @@ -3146,6 +3152,10 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) { S = new (Context) BreakStmt(Empty); break; + case STMT_DEFER: + S = DeferStmt::CreateEmpty(Context, Empty); + break; + case STMT_RETURN: S = ReturnStmt::CreateEmpty( Context, /* HasNRVOCandidate=*/Record[ASTStmtReader::NumStmtFields]); diff --git a/clang/lib/Serialization/ASTWriterStmt.cpp b/clang/lib/Serialization/ASTWriterStmt.cpp index acf345392aa1a..a457e627799c9 100644 --- a/clang/lib/Serialization/ASTWriterStmt.cpp +++ b/clang/lib/Serialization/ASTWriterStmt.cpp @@ -330,6 +330,13 @@ void ASTStmtWriter::VisitBreakStmt(BreakStmt *S) { Code = serialization::STMT_BREAK; } +void ASTStmtWriter::VisitDeferStmt(DeferStmt *S) { + VisitStmt(S); + Record.AddSourceLocation(S->getDeferLoc()); + Record.AddStmt(S->getBody()); + Code = serialization::STMT_DEFER; +} + void ASTStmtWriter::VisitReturnStmt(ReturnStmt *S) { VisitStmt(S); diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp index a759aee47b8ea..d3de632179e1d 100644 --- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp +++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp @@ -1874,6 +1874,7 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred, case Stmt::NullStmtClass: case Stmt::SwitchStmtClass: case Stmt::WhileStmtClass: + case Stmt::DeferStmtClass: case Expr::MSDependentExistsStmtClass: llvm_unreachable("Stmt should not be in analyzer evaluation loop"); case Stmt::ImplicitValueInitExprClass: diff --git a/clang/lib/Tooling/Tooling.cpp b/clang/lib/Tooling/Tooling.cpp index 1d55f615de8a9..46b2cc1ac99c1 100644 --- a/clang/lib/Tooling/Tooling.cpp +++ b/clang/lib/Tooling/Tooling.cpp @@ -97,7 +97,7 @@ static bool ignoreExtraCC1Commands(const driver::Compilation *Compilation) { OffloadCompilation = true; if (Jobs.size() > 1) { - for (auto *A : Actions){ + for (auto *A : Actions) { // On MacOSX real actions may end up being wrapped in BindArchAction if (isa(A)) A = *A->input_begin(); @@ -414,8 +414,8 @@ bool ToolInvocation::run() { Driver->BuildCompilation(llvm::ArrayRef(Argv))); if (!Compilation) return false; - const llvm::opt::ArgStringList *const CC1Args = getCC1Arguments( - &*Diagnostics, Compilation.get()); + const llvm::opt::ArgStringList *const CC1Args = + getCC1Arguments(&*Diagnostics, Compilation.get()); if (!CC1Args) return false; std::unique_ptr Invocation( @@ -498,9 +498,7 @@ void ClangTool::appendArgumentsAdjuster(ArgumentsAdjuster Adjuster) { ArgsAdjuster = combineAdjusters(std::move(ArgsAdjuster), std::move(Adjuster)); } -void ClangTool::clearArgumentsAdjusters() { - ArgsAdjuster = nullptr; -} +void ClangTool::clearArgumentsAdjusters() { ArgsAdjuster = nullptr; } static void injectResourceDir(CommandLineArguments &Args, const char *Argv0, void *MainAddr) { @@ -555,8 +553,9 @@ int ClangTool::run(ToolAction *Action) { } size_t NumOfTotalFiles = AbsolutePaths.size(); - unsigned ProcessedFileCounter = 0; + unsigned CurrentFileIndex = 0; for (llvm::StringRef File : AbsolutePaths) { + ++CurrentFileIndex; // Currently implementations of CompilationDatabase::getCompileCommands can // change the state of the file system (e.g. prepare generated headers), so // this method needs to run right before we invoke the tool, as the next @@ -571,6 +570,7 @@ int ClangTool::run(ToolAction *Action) { FileSkipped = true; continue; } + unsigned CurrentCommandIndexForFile = 0; for (CompileCommand &CompileCommand : CompileCommandsForFile) { // If the 'directory' field of the compilation database is empty, display // an error and use the working directory instead. @@ -617,13 +617,20 @@ int ClangTool::run(ToolAction *Action) { // pass in made-up names here. Make sure this works on other platforms. injectResourceDir(CommandLine, "clang_tool", &StaticSymbol); + ++CurrentCommandIndexForFile; + // FIXME: We need a callback mechanism for the tool writer to output a // customized message for each file. - if (NumOfTotalFiles > 1) - llvm::errs() << "[" + std::to_string(++ProcessedFileCounter) + "/" + - std::to_string(NumOfTotalFiles) + - "] Processing file " + File - << ".\n"; + if (NumOfTotalFiles > 1 || CompileCommandsForFile.size() > 1) { + llvm::errs() << "[" << std::to_string(CurrentFileIndex) << "/" + << std::to_string(NumOfTotalFiles) << "]"; + if (CompileCommandsForFile.size() > 1) { + llvm::errs() << " (" << std::to_string(CurrentCommandIndexForFile) + << "/" << std::to_string(CompileCommandsForFile.size()) + << ")"; + } + llvm::errs() << " Processing file " << File << ".\n"; + } ToolInvocation Invocation(std::move(CommandLine), Action, Files.get(), PCHContainerOps); Invocation.setDiagnosticConsumer(DiagConsumer); diff --git a/clang/test/AST/ast-dump-defer-ts.c b/clang/test/AST/ast-dump-defer-ts.c new file mode 100644 index 0000000000000..eba057f93c9c2 --- /dev/null +++ b/clang/test/AST/ast-dump-defer-ts.c @@ -0,0 +1,27 @@ +// Test without serialization: +// RUN: %clang_cc1 -std=c23 -fdefer-ts -ast-dump %s -triple x86_64-linux-gnu \ +// RUN: | FileCheck %s +// +// Test with serialization: +// RUN: %clang_cc1 -std=c23 -fdefer-ts -triple x86_64-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -std=c23 -fdefer-ts -triple x86_64-linux-gnu -include-pch %t -ast-dump-all /dev/null \ +// RUN: | FileCheck %s + +static inline void f() { + _Defer 3; + _Defer { 4; } + _Defer _Defer if (true) {} +} + +// CHECK-LABEL: f 'void (void)' static inline +// CHECK-NEXT: `-CompoundStmt {{.*}} +// CHECK-NEXT: |-DeferStmt {{.*}} +// CHECK-NEXT: | `-IntegerLiteral {{.*}} 'int' 3 +// CHECK-NEXT: |-DeferStmt {{.*}} +// CHECK-NEXT: | `-CompoundStmt {{.*}} +// CHECK-NEXT: | `-IntegerLiteral {{.*}} 'int' 4 +// CHECK-NEXT: `-DeferStmt {{.*}} +// CHECK-NEXT: `-DeferStmt {{.*}} +// CHECK-NEXT: `-IfStmt {{.*}} +// CHECK-NEXT: |-CXXBoolLiteralExpr {{.*}} 'bool' true +// CHECK-NEXT: `-CompoundStmt {{.*}} diff --git a/clang/test/AST/ast-print-defer-ts.c b/clang/test/AST/ast-print-defer-ts.c new file mode 100644 index 0000000000000..bcc217a597778 --- /dev/null +++ b/clang/test/AST/ast-print-defer-ts.c @@ -0,0 +1,33 @@ +// RUN: %clang_cc1 -std=c23 -fdefer-ts -ast-print %s | FileCheck %s + +void g(); + +// CHECK: void f +void f() { + // CHECK-NEXT: _Defer + // CHECK-NEXT: g(); + // CHECK-NEXT: _Defer + // CHECK-NEXT: _Defer + // CHECK-NEXT: g(); + // CHECK-NEXT: _Defer { + // CHECK-NEXT: } + // CHECK-NEXT: _Defer { + // CHECK-NEXT: int x; + // CHECK-NEXT: } + // CHECK-NEXT: _Defer + // CHECK-NEXT: if (1) { + // CHECK-NEXT: } + _Defer + g(); + _Defer + _Defer + g(); + _Defer { + } + _Defer { + int x; + } + _Defer + if (1) { + } +} diff --git a/clang/test/CIR/CodeGen/atomic.c b/clang/test/CIR/CodeGen/atomic.c index 64e0961fe20d9..5fbbee0e88a15 100644 --- a/clang/test/CIR/CodeGen/atomic.c +++ b/clang/test/CIR/CodeGen/atomic.c @@ -1143,7 +1143,7 @@ int atomic_load_dynamic_order(int *ptr, int order) { // CIR: %[[PTR:.+]] = cir.load align(8) %{{.+}} : !cir.ptr>, !cir.ptr // CIR-NEXT: %[[ORDER:.+]] = cir.load align(4) %{{.+}} : !cir.ptr, !s32i - // CIR-NEXT: cir.switch (%[[ORDER]] : !s32i) { + // CIR-NEXT: cir.switch(%[[ORDER]] : !s32i) { // CIR-NEXT: cir.case(default, []) { // CIR-NEXT: %[[RES:.+]] = cir.load align(4) syncscope(system) atomic(relaxed) %[[PTR]] : !cir.ptr, !s32i // CIR-NEXT: cir.store align(4) %[[RES]], %[[RES_SLOT:.+]] : !s32i, !cir.ptr @@ -1219,7 +1219,7 @@ void atomic_store_dynamic_order(int *ptr, int order) { // CIR: %[[PTR:.+]] = cir.load align(8) %{{.+}} : !cir.ptr>, !cir.ptr // CIR-NEXT: %[[ORDER:.+]] = cir.load align(4) %{{.+}} : !cir.ptr, !s32i - // CIR: cir.switch (%[[ORDER]] : !s32i) { + // CIR: cir.switch(%[[ORDER]] : !s32i) { // CIR-NEXT: cir.case(default, []) { // CIR-NEXT: %[[VALUE:.+]] = cir.load align(4) %{{.+}} : !cir.ptr, !s32i // CIR-NEXT: cir.store align(4) atomic(relaxed) %[[VALUE]], %[[PTR]] : !s32i, !cir.ptr @@ -1288,7 +1288,7 @@ int atomic_load_and_store_dynamic_order(int *ptr, int order) { // CIR: %[[PTR:.+]] = cir.load align(8) %{{.+}} : !cir.ptr>, !cir.ptr // CIR-NEXT: %[[ORDER:.+]] = cir.load align(4) %{{.+}} : !cir.ptr, !s32i - // CIR: cir.switch (%[[ORDER]] : !s32i) { + // CIR: cir.switch(%[[ORDER]] : !s32i) { // CIR-NEXT: cir.case(default, []) { // CIR-NEXT: %[[LIT:.+]] = cir.load align(4) %{{.+}} : !cir.ptr, !s32i // CIR-NEXT: %[[RES:.+]] = cir.atomic.xchg relaxed %[[PTR]], %[[LIT]] : (!cir.ptr, !s32i) -> !s32i diff --git a/clang/test/CIR/CodeGen/switch.cpp b/clang/test/CIR/CodeGen/switch.cpp index 3824be0d08c2f..b7bd2da5e39b8 100644 --- a/clang/test/CIR/CodeGen/switch.cpp +++ b/clang/test/CIR/CodeGen/switch.cpp @@ -20,7 +20,7 @@ void sw1(int a) { } // CIR: cir.func{{.*}} @_Z3sw1i -// CIR: cir.switch (%[[COND:.*]] : !s32i) { +// CIR: cir.switch(%[[COND:.*]] : !s32i) { // CIR-NEXT: cir.case(equal, [#cir.int<0> : !s32i]) { // CIR: cir.break // CIR: cir.case(equal, [#cir.int<1> : !s32i]) { @@ -101,7 +101,7 @@ void sw2(int a) { // CIR: cir.scope { // CIR-NEXT: %[[YOLO:.*]] = cir.alloca !s32i, !cir.ptr, ["yolo", init] // CIR-NEXT: %[[FOMO:.*]] = cir.alloca !s32i, !cir.ptr, ["fomo", init] -// CIR: cir.switch (%[[COND:.*]] : !s32i) { +// CIR: cir.switch(%[[COND:.*]] : !s32i) { // CIR-NEXT: cir.case(equal, [#cir.int<3> : !s32i]) { // CIR-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s32i // CIR-NEXT: cir.store{{.*}} %[[ZERO]], %[[FOMO]] : !s32i, !cir.ptr @@ -154,7 +154,7 @@ void sw3(int a) { // CIR: cir.func{{.*}} @_Z3sw3i // CIR: cir.scope { // CIR-NEXT: %[[COND:.*]] = cir.load{{.*}} %[[A:.*]] : !cir.ptr, !s32i -// CIR-NEXT: cir.switch (%[[COND]] : !s32i) { +// CIR-NEXT: cir.switch(%[[COND]] : !s32i) { // CIR-NEXT: cir.case(default, []) { // CIR-NEXT: cir.break // CIR-NEXT: } @@ -196,7 +196,7 @@ int sw4(int a) { } // CIR: cir.func{{.*}} @_Z3sw4i -// CIR: cir.switch (%[[COND:.*]] : !s32i) { +// CIR: cir.switch(%[[COND:.*]] : !s32i) { // CIR-NEXT: cir.case(equal, [#cir.int<42> : !s32i]) { // CIR-NEXT: cir.scope { // CIR-NEXT: %[[THREE:.*]] = cir.const #cir.int<3> : !s32i @@ -264,7 +264,7 @@ void sw5(int a) { } // CIR: cir.func{{.*}} @_Z3sw5i -// CIR: cir.switch (%[[A:.*]] : !s32i) { +// CIR: cir.switch(%[[A:.*]] : !s32i) { // CIR-NEXT: cir.case(equal, [#cir.int<1> : !s32i]) { // CIR-NEXT: cir.yield // CIR-NEXT: } @@ -314,7 +314,7 @@ void sw6(int a) { } // CIR: cir.func{{.*}} @_Z3sw6i -// CIR: cir.switch (%[[A:.*]] : !s32i) { +// CIR: cir.switch(%[[A:.*]] : !s32i) { // CIR-NEXT: cir.case(equal, [#cir.int<0> : !s32i]) { // CIR-NEXT: cir.yield // CIR-NEXT: } @@ -406,7 +406,7 @@ void sw7(int a) { // CIR: cir.func{{.*}} @_Z3sw7i // CIR: %[[X:.*]] = cir.alloca !s32i, !cir.ptr, ["x"] -// CIR: cir.switch (%[[A:.*]] : !s32i) +// CIR: cir.switch(%[[A:.*]] : !s32i) // CIR-NEXT: cir.case(equal, [#cir.int<0> : !s32i]) { // CIR-NEXT: cir.yield // CIR-NEXT: } @@ -499,7 +499,7 @@ void sw8(int a) { } // CIR: cir.func{{.*}} @_Z3sw8i -// CIR: cir.switch (%[[A:.*]] : !s32i) +// CIR: cir.switch(%[[A:.*]] : !s32i) // CIR-NEXT: cir.case(equal, [#cir.int<3> : !s32i]) { // CIR-NEXT: cir.break // CIR-NEXT: } @@ -557,7 +557,7 @@ void sw9(int a) { } // CIR: cir.func{{.*}} @_Z3sw9i -// CIR: cir.switch (%[[A:.*]] : !s32i) +// CIR: cir.switch(%[[A:.*]] : !s32i) // CIR-NEXT: cir.case(equal, [#cir.int<3> : !s32i]) { // CIR-NEXT: cir.break // CIR-NEXT: } @@ -616,7 +616,7 @@ void sw10(int a) { } // CIR: cir.func{{.*}} @_Z4sw10i -// CIR: cir.switch (%[[A:.*]] : !s32i) +// CIR: cir.switch(%[[A:.*]] : !s32i) // CIR-NEXT: cir.case(equal, [#cir.int<3> : !s32i]) { // CIR-NEXT: cir.break // CIR-NEXT: } @@ -688,7 +688,7 @@ void sw11(int a) { } // CIR: cir.func{{.*}} @_Z4sw11i -// CIR: cir.switch (%[[A:.*]] : !s32i) +// CIR: cir.switch(%[[A:.*]] : !s32i) // CIR-NEXT: cir.case(equal, [#cir.int<3> : !s32i]) { // CIR-NEXT: cir.break // CIR-NEXT: } @@ -1063,7 +1063,7 @@ int nested_switch(int a) { return 0; } -// CIR: cir.switch (%[[COND:.*]] : !s32i) { +// CIR: cir.switch(%[[COND:.*]] : !s32i) { // CIR: cir.case(equal, [#cir.int<0> : !s32i]) { // CIR: cir.yield // CIR: } @@ -1198,7 +1198,7 @@ int sw_return_multi_cases(int x) { } // CIR-LABEL: cir.func{{.*}} @_Z21sw_return_multi_casesi -// CIR: cir.switch (%{{.*}} : !s32i) { +// CIR: cir.switch(%{{.*}} : !s32i) { // CIR-NEXT: cir.case(equal, [#cir.int<0> : !s32i]) { // CIR: %[[ZERO:.*]] = cir.const #cir.int<0> : !s32i // CIR: cir.store{{.*}} %[[ZERO]], %{{.*}} : !s32i, !cir.ptr @@ -1270,3 +1270,25 @@ int sw_return_multi_cases(int x) { // OGCG: [[RETURN]]: // OGCG: %[[RETVAL_LOAD:.*]] = load i32, ptr %[[RETVAL]], align 4 // OGCG: ret i32 %[[RETVAL_LOAD]] + +enum M { + Six, + Seven +}; + +void testSwitchCoverAllCase(M m) { + switch (m) { + case Six:case Seven: + break; + } +} +// CIR: cir.switch(%[[ARG:.*]] : !s32i) allEnumCasesCovered { + +void testSwitchNotCoverAllCase(M m) { + switch (m) { + case Six: + default: + break; + } +} +// CIR: cir.switch(%[[ARG:.*]] : !s32i) { diff --git a/clang/test/CIR/CodeGen/switch_flat_op.cpp b/clang/test/CIR/CodeGen/switch_flat_op.cpp index a3ea7e7a15547..ba0a82da52c70 100644 --- a/clang/test/CIR/CodeGen/switch_flat_op.cpp +++ b/clang/test/CIR/CodeGen/switch_flat_op.cpp @@ -21,7 +21,7 @@ void swf(int a) { // BEFORE: cir.func{{.*}} @_Z3swfi // BEFORE: %[[VAR_B:.*]] = cir.alloca !s32i, !cir.ptr, ["b", init] {alignment = 4 : i64} // BEFORE: %[[CONST_3:.*]] = cir.const #cir.int<3> : !s32i -// BEFORE: cir.switch (%[[COND:.*]] : !s32i) { +// BEFORE: cir.switch(%[[COND:.*]] : !s32i) { // BEFORE: cir.case(equal, [#cir.int<3> : !s32i]) { // BEFORE: %[[LOAD_B_EQ:.*]] = cir.load{{.*}} %[[VAR_B]] : !cir.ptr, !s32i // BEFORE: %[[CONST_2:.*]] = cir.const #cir.int<2> : !s32i diff --git a/clang/test/CIR/IR/switch.cir b/clang/test/CIR/IR/switch.cir index 87d45bf1f5219..89614480e43cd 100644 --- a/clang/test/CIR/IR/switch.cir +++ b/clang/test/CIR/IR/switch.cir @@ -21,7 +21,7 @@ cir.func @s0() { cir.return } -// CHECK: cir.switch (%0 : !s32i) { +// CHECK: cir.switch(%0 : !s32i) { // CHECK-NEXT: cir.case(default, []) { // CHECK-NEXT: cir.return // CHECK-NEXT: } @@ -36,3 +36,33 @@ cir.func @s0() { // CHECK-NEXT: } // CHECK-NEXT: cir.yield // CHECK-NEXT: } + + +// Pretends that this is lowered from a C file and was tagged with allEnumCasesCovered = true +cir.func @s1(%1 : !s32i) { + cir.switch (%1 : !s32i) allEnumCasesCovered { + cir.case (default, []) { + cir.return + } + cir.case (equal, [#cir.int<1> : !s32i]) { + cir.yield + } + cir.case (equal, [#cir.int<2> : !s32i]) { + cir.yield + } + cir.yield + } { } + cir.return +} +// CHECK: cir.switch(%[[ARG:.*]] : !s32i) allEnumCasesCovered { +// CHECK-NEXT: cir.case(default, []) { +// CHECK-NEXT: cir.return +// CHECK-NEXT: } +// CHECK-NEXT: cir.case(equal, [#cir.int<1> : !s32i]) { +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: cir.case(equal, [#cir.int<2> : !s32i]) { +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } diff --git a/clang/test/CIR/Transforms/switch-fold.cir b/clang/test/CIR/Transforms/switch-fold.cir index 62a94f4fde2c3..c348a05128671 100644 --- a/clang/test/CIR/Transforms/switch-fold.cir +++ b/clang/test/CIR/Transforms/switch-fold.cir @@ -27,7 +27,7 @@ module { cir.return } //CHECK: cir.func @foldCascade - //CHECK: cir.switch (%[[COND:.*]] : !s32i) { + //CHECK: cir.switch(%[[COND:.*]] : !s32i) { //CHECK-NEXT: cir.case(anyof, [#cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i]) { //CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2> : !s32i //CHECK-NEXT: cir.store %[[TWO]], %[[ARG0:.*]] : !s32i, !cir.ptr @@ -66,7 +66,7 @@ module { cir.return } //CHECK: @foldCascade2 - //CHECK: cir.switch (%[[COND2:.*]] : !s32i) { + //CHECK: cir.switch(%[[COND2:.*]] : !s32i) { //CHECK: cir.case(anyof, [#cir.int<0> : !s32i, #cir.int<2> : !s32i, #cir.int<4> : !s32i]) { //CHECK: cir.break //cehck: } @@ -106,7 +106,7 @@ module { cir.return } //CHECK: cir.func @foldCascade3 - //CHECK: cir.switch (%[[COND3:.*]] : !s32i) { + //CHECK: cir.switch(%[[COND3:.*]] : !s32i) { //CHECK: cir.case(anyof, [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i]) { //CHECK: cir.break //CHECK: } @@ -142,7 +142,7 @@ module { cir.return } //CHECK: cir.func @foldCascadeWithDefault - //CHECK: cir.switch (%[[COND:.*]] : !s32i) { + //CHECK: cir.switch(%[[COND:.*]] : !s32i) { //CHECK: cir.case(equal, [#cir.int<3> : !s32i]) { //CHECK: cir.break //CHECK: } @@ -187,7 +187,7 @@ module { cir.return } //CHECK: cir.func @foldAllCascade - //CHECK: cir.switch (%[[COND:.*]] : !s32i) { + //CHECK: cir.switch(%[[COND:.*]] : !s32i) { //CHECK: cir.case(anyof, [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i]) { //CHECK: cir.yield //CHECK: } diff --git a/clang/test/CodeGen/2007-05-07-PaddingElements.c b/clang/test/CodeGen/2007-05-07-PaddingElements.c index f8ec2483a8d61..28d24800abbe6 100644 --- a/clang/test/CodeGen/2007-05-07-PaddingElements.c +++ b/clang/test/CodeGen/2007-05-07-PaddingElements.c @@ -1,6 +1,9 @@ // PR 1278 -// RUN: %clang_cc1 %s -emit-llvm -o - | grep struct.s | not grep "4 x i8] zeroinitializer" -// RUN: %clang_cc1 %s -emit-llvm -o - | not grep "i32 0, i32 2" +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -triple powerpc64-ibm-aix %s -emit-llvm -o - | FileCheck %s --check-prefix=AIX + +// CHECK: %struct.s = type { double, i32 } +// AIX: %struct.s = type { double, i32, [4 x i8] } struct s { double d1; int s1; diff --git a/clang/test/CodeGen/defer-ts-musttail.c b/clang/test/CodeGen/defer-ts-musttail.c new file mode 100644 index 0000000000000..5622fecbb4fed --- /dev/null +++ b/clang/test/CodeGen/defer-ts-musttail.c @@ -0,0 +1,7 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux -std=c23 -fdefer-ts -emit-llvm %s -o /dev/null -verify + +int bar() { return 12; } +int foo() { + _Defer {}; + [[clang::musttail]] return bar(); // expected-error {{cannot compile this tail call skipping over cleanups yet}} +} diff --git a/clang/test/CodeGen/defer-ts-nested-cleanups.c b/clang/test/CodeGen/defer-ts-nested-cleanups.c new file mode 100644 index 0000000000000..d831b4380b929 --- /dev/null +++ b/clang/test/CodeGen/defer-ts-nested-cleanups.c @@ -0,0 +1,179 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux -std=c23 -fdefer-ts -emit-llvm %s -o - -O1 -disable-llvm-passes | FileCheck %s + +// Test that cleanups emitted in a '_Defer' don't clobber the cleanup slot; we +// test this using lifetime intrinsics, which are emitted starting at -O1. + +void g(); + +// CHECK-LABEL: define {{.*}} void @f1() +// CHECK: entry: +// CHECK-NEXT: %i = alloca i32, align 4 +// CHECK-NEXT: %cleanup.dest.slot = alloca i32, align 4 +// CHECK-NEXT: %j = alloca i32, align 4 +// CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr %i) +// CHECK-NEXT: store i32 0, ptr %i, align 4 +// CHECK-NEXT: br label %for.cond +// CHECK: for.cond: +// CHECK-NEXT: %0 = load i32, ptr %i, align 4 +// CHECK-NEXT: %cmp = icmp eq i32 %0, 1 +// CHECK-NEXT: br i1 %cmp, label %if.then, label %if.end +// CHECK: if.then: +// CHECK-NEXT: store i32 2, ptr %cleanup.dest.slot, align 4 +// CHECK-NEXT: br label %cleanup +// CHECK: if.end: +// CHECK-NEXT: store i32 0, ptr %cleanup.dest.slot, align 4 +// CHECK-NEXT: br label %cleanup +// CHECK: cleanup: +// CHECK-NEXT: %cleanup.dest.saved = load i32, ptr %cleanup.dest.slot, align 4 +// CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr %j) +// CHECK-NEXT: store i32 0, ptr %j, align 4 +// CHECK-NEXT: br label %for.cond1 +// CHECK: for.cond1: +// CHECK-NEXT: %1 = load i32, ptr %j, align 4 +// CHECK-NEXT: %cmp2 = icmp ne i32 %1, 1 +// CHECK-NEXT: br i1 %cmp2, label %for.body, label %for.cond.cleanup +// CHECK: for.cond.cleanup: +// CHECK-NEXT: store i32 5, ptr %cleanup.dest.slot, align 4 +// CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr %j) +// CHECK-NEXT: br label %for.end +// CHECK: for.body: +// CHECK-NEXT: call void @g() +// CHECK-NEXT: br label %for.inc +// CHECK: for.inc: +// CHECK-NEXT: %2 = load i32, ptr %j, align 4 +// CHECK-NEXT: %inc = add nsw i32 %2, 1 +// CHECK-NEXT: store i32 %inc, ptr %j, align 4 +// CHECK-NEXT: br label %for.cond1 +// CHECK: for.end: +// CHECK-NEXT: store i32 %cleanup.dest.saved, ptr %cleanup.dest.slot, align 4 +// CHECK-NEXT: %cleanup.dest = load i32, ptr %cleanup.dest.slot, align 4 +// CHECK-NEXT: switch i32 %cleanup.dest, label %cleanup6 [ +// CHECK-NEXT: i32 0, label %cleanup.cont +// CHECK-NEXT: ] +// CHECK: cleanup.cont: +// CHECK-NEXT: br label %for.inc4 +// CHECK: for.inc4: +// CHECK-NEXT: %3 = load i32, ptr %i, align 4 +// CHECK-NEXT: %inc5 = add nsw i32 %3, 1 +// CHECK-NEXT: store i32 %inc5, ptr %i, align 4 +// CHECK-NEXT: br label %for.cond +// CHECK: cleanup6: +// CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr %i) +// CHECK-NEXT: br label %for.end7 +// CHECK: for.end7: +// CHECK-NEXT: ret void +void f1() { + for (int i = 0;; i++) { + _Defer { + for (int j = 0; j != 1; j++) { + g(); + } + } + if (i == 1) break; + } +} + +// CHECK-LABEL: define {{.*}} void @f2() +// CHECK: entry: +// CHECK-NEXT: %i = alloca i32, align 4 +// CHECK-NEXT: %cleanup.dest.slot = alloca i32, align 4 +// CHECK-NEXT: %j = alloca i32, align 4 +// CHECK-NEXT: %k = alloca i32, align 4 +// CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr %i) +// CHECK-NEXT: store i32 0, ptr %i, align 4 +// CHECK-NEXT: br label %for.cond +// CHECK: for.cond: +// CHECK-NEXT: %0 = load i32, ptr %i, align 4 +// CHECK-NEXT: %cmp = icmp eq i32 %0, 1 +// CHECK-NEXT: br i1 %cmp, label %if.then, label %if.end +// CHECK: if.then: +// CHECK-NEXT: store i32 2, ptr %cleanup.dest.slot, align 4 +// CHECK-NEXT: br label %cleanup +// CHECK: if.end: +// CHECK-NEXT: store i32 0, ptr %cleanup.dest.slot, align 4 +// CHECK-NEXT: br label %cleanup +// CHECK: cleanup: +// CHECK-NEXT: %cleanup.dest.saved = load i32, ptr %cleanup.dest.slot, align 4 +// CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr %j) +// CHECK-NEXT: store i32 0, ptr %j, align 4 +// CHECK-NEXT: br label %for.cond1 +// CHECK: for.cond1: +// CHECK-NEXT: %1 = load i32, ptr %j, align 4 +// CHECK-NEXT: %cmp2 = icmp eq i32 %1, 1 +// CHECK-NEXT: br i1 %cmp2, label %if.then3, label %if.end4 +// CHECK: if.then3: +// CHECK-NEXT: store i32 5, ptr %cleanup.dest.slot, align 4 +// CHECK-NEXT: br label %cleanup5 +// CHECK: if.end4: +// CHECK-NEXT: store i32 0, ptr %cleanup.dest.slot, align 4 +// CHECK-NEXT: br label %cleanup5 +// CHECK: cleanup5: +// CHECK-NEXT: %cleanup.dest.saved6 = load i32, ptr %cleanup.dest.slot, align 4 +// CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr %k) +// CHECK-NEXT: store i32 0, ptr %k, align 4 +// CHECK-NEXT: br label %for.cond7 +// CHECK: for.cond7: +// CHECK-NEXT: %2 = load i32, ptr %k, align 4 +// CHECK-NEXT: %cmp8 = icmp ne i32 %2, 1 +// CHECK-NEXT: br i1 %cmp8, label %for.body, label %for.cond.cleanup +// CHECK: for.cond.cleanup: +// CHECK-NEXT: store i32 8, ptr %cleanup.dest.slot, align 4 +// CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr %k) +// CHECK-NEXT: br label %for.end +// CHECK: for.body: +// CHECK-NEXT: call void @g() +// CHECK-NEXT: br label %for.inc +// CHECK: for.inc: +// CHECK-NEXT: %3 = load i32, ptr %k, align 4 +// CHECK-NEXT: %inc = add nsw i32 %3, 1 +// CHECK-NEXT: store i32 %inc, ptr %k, align 4 +// CHECK-NEXT: br label %for.cond7 +// CHECK: for.end: +// CHECK-NEXT: store i32 %cleanup.dest.saved6, ptr %cleanup.dest.slot, align 4 +// CHECK-NEXT: %cleanup.dest = load i32, ptr %cleanup.dest.slot, align 4 +// CHECK-NEXT: switch i32 %cleanup.dest, label %cleanup12 [ +// CHECK-NEXT: i32 0, label %cleanup.cont +// CHECK-NEXT: ] +// CHECK: cleanup.cont: +// CHECK-NEXT: br label %for.inc10 +// CHECK: for.inc10: +// CHECK-NEXT: %4 = load i32, ptr %j, align 4 +// CHECK-NEXT: %inc11 = add nsw i32 %4, 1 +// CHECK-NEXT: store i32 %inc11, ptr %j, align 4 +// CHECK-NEXT: br label %for.cond1 +// CHECK: cleanup12: +// CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr %j) +// CHECK-NEXT: br label %for.end13 +// CHECK: for.end13: +// CHECK-NEXT: store i32 %cleanup.dest.saved, ptr %cleanup.dest.slot, align 4 +// CHECK-NEXT: %cleanup.dest14 = load i32, ptr %cleanup.dest.slot, align 4 +// CHECK-NEXT: switch i32 %cleanup.dest14, label %cleanup18 [ +// CHECK-NEXT: i32 0, label %cleanup.cont15 +// CHECK-NEXT: ] +// CHECK: cleanup.cont15: +// CHECK-NEXT: br label %for.inc16 +// CHECK: for.inc16: +// CHECK-NEXT: %5 = load i32, ptr %i, align 4 +// CHECK-NEXT: %inc17 = add nsw i32 %5, 1 +// CHECK-NEXT: store i32 %inc17, ptr %i, align 4 +// CHECK-NEXT: br label %for.cond +// CHECK: cleanup18: +// CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr %i) +// CHECK-NEXT: br label %for.end19 +// CHECK: for.end19: +// CHECK-NEXT: ret void +void f2() { + for (int i = 0;; i++) { + _Defer { + for (int j = 0;; j++) { + _Defer { + for (int k = 0; k != 1; k++) { + g(); + } + } + if (j == 1) break; + } + } + if (i == 1) break; + } +} diff --git a/clang/test/CodeGen/defer-ts-seh.c b/clang/test/CodeGen/defer-ts-seh.c new file mode 100644 index 0000000000000..a91816f50d8d5 --- /dev/null +++ b/clang/test/CodeGen/defer-ts-seh.c @@ -0,0 +1,44 @@ +// RUN: %clang_cc1 -triple x86_64-windows-msvc -std=c23 -fdefer-ts -fms-compatibility -emit-llvm %s -o - | FileCheck %s + +void g(); +void h(); + +void f() { + __try { + _Defer h(); + g(); + } __finally { + + } +} + +// CHECK-LABEL: define {{.*}} void @f() {{.*}} personality ptr @__C_specific_handler +// CHECK: entry: +// CHECK: invoke void @g() #4 +// CHECK: to label %invoke.cont unwind label %ehcleanup +// CHECK: invoke.cont: +// CHECK: invoke void @h() #4 +// CHECK: to label %invoke.cont1 unwind label %ehcleanup3 +// CHECK: invoke.cont1: +// CHECK: %0 = call ptr @llvm.localaddress() +// CHECK: call void @"?fin$0@0@f@@"(i8 {{.*}} 0, ptr {{.*}} %0) +// CHECK: ret void +// CHECK: ehcleanup: +// CHECK: %1 = cleanuppad within none [] +// CHECK: invoke void @h() #4 [ "funclet"(token %1) ] +// CHECK: to label %invoke.cont2 unwind label %ehcleanup3 +// CHECK: invoke.cont2: +// CHECK: cleanupret from %1 unwind label %ehcleanup3 +// CHECK: ehcleanup3: +// CHECK: %2 = cleanuppad within none [] +// CHECK: %3 = call ptr @llvm.localaddress() +// CHECK: call void @"?fin$0@0@f@@"(i8 {{.*}} 1, ptr {{.*}} %3) [ "funclet"(token %2) ] +// CHECK: cleanupret from %2 unwind to caller + +// CHECK-LABEL: define {{.*}} void @"?fin$0@0@f@@"(i8 {{.*}} %abnormal_termination, ptr {{.*}} %frame_pointer) +// CHECK: entry: +// CHECK: %frame_pointer.addr = alloca ptr, align 8 +// CHECK: %abnormal_termination.addr = alloca i8, align 1 +// CHECK: store ptr %frame_pointer, ptr %frame_pointer.addr, align 8 +// CHECK: store i8 %abnormal_termination, ptr %abnormal_termination.addr, align 1 +// CHECK: ret void diff --git a/clang/test/CodeGen/defer-ts.c b/clang/test/CodeGen/defer-ts.c new file mode 100644 index 0000000000000..79b09064d330c --- /dev/null +++ b/clang/test/CodeGen/defer-ts.c @@ -0,0 +1,652 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux -std=c23 -fdefer-ts -emit-llvm %s -o - | FileCheck %s + +#define defer _Defer + +void a(); +void b(); +void c(); +void x(int q); +bool q(int q); +[[noreturn]] void noreturn(); + +// CHECK-LABEL: define {{.*}} void @f1() +void f1() { + // CHECK: call void @c() + // CHECK: call void @b() + // CHECK: call void @a() + defer a(); + defer b(); + defer c(); +} + +// CHECK-LABEL: define {{.*}} void @f2() +void f2() { + // CHECK: call void @x(i32 {{.*}} 1) + // CHECK: call void @x(i32 {{.*}} 2) + // CHECK: call void @x(i32 {{.*}} 3) + // CHECK: call void @x(i32 {{.*}} 4) + // CHECK: call void @x(i32 {{.*}} 5) + defer x(5); + { + defer x(4); + { + defer x(2); + defer x(1); + } + x(3); + } +} + +// CHECK-LABEL: define {{.*}} void @f3(i1 {{.*}} %ret) +void f3(bool ret) { + // CHECK: entry: + // CHECK: %ret.addr = alloca i8, align 1 + // CHECK: %cleanup.dest.slot = alloca i32, align 4 + // CHECK: %storedv = zext i1 %ret to i8 + // CHECK: store i8 %storedv, ptr %ret.addr, align 1 + // CHECK: %0 = load i8, ptr %ret.addr, align 1 + // CHECK: %loadedv = trunc i8 %0 to i1 + // CHECK: br i1 %loadedv, label %if.then, label %if.end + // CHECK: if.then: + // CHECK: store i32 1, ptr %cleanup.dest.slot, align 4 + // CHECK: br label %cleanup + // CHECK: if.end: + // CHECK: %cleanup.dest.saved = load i32, ptr %cleanup.dest.slot, align 4 + // CHECK: call void @x(i32 {{.*}} 1) + // CHECK: store i32 %cleanup.dest.saved, ptr %cleanup.dest.slot, align 4 + // CHECK: store i32 0, ptr %cleanup.dest.slot, align 4 + // CHECK: br label %cleanup + // CHECK: cleanup: + // CHECK: %cleanup.dest.saved1 = load i32, ptr %cleanup.dest.slot, align 4 + // CHECK: call void @x(i32 {{.*}} 2) + // CHECK: store i32 %cleanup.dest.saved1, ptr %cleanup.dest.slot, align 4 + // CHECK: %cleanup.dest = load i32, ptr %cleanup.dest.slot, align 4 + // CHECK: switch i32 %cleanup.dest, label %unreachable [ + // CHECK: i32 0, label %cleanup.cont + // CHECK: i32 1, label %cleanup.cont + // CHECK: ] + // CHECK: cleanup.cont: + // CHECK: ret void + // CHECK: unreachable: + // CHECK: unreachable + defer x(2); + if (ret) return; + defer x(1); +} + +// CHECK-LABEL: define {{.*}} void @ts_g() +void ts_g() { + // CHECK-NEXT: entry: + // CHECK-NEXT: ret void + // CHECK-NEXT: } + return; + defer x(42); +} + +// CHECK-LABEL: define {{.*}} void @ts_h() +void ts_h() { + // CHECK-NEXT: entry: + // CHECK-NEXT: br label %b + // CHECK-EMPTY: + goto b; + { + defer x(42); + } + + // CHECK-NEXT: b: + // CHECK-NEXT: ret void + // CHECK-NEXT: } + b: +} + +// CHECK-LABEL: define {{.*}} void @ts_i() +void ts_i() { + // CHECK: entry: + // CHECK: %cleanup.dest.slot = alloca i32, align 4 + // CHECK: store i32 2, ptr %cleanup.dest.slot, align 4 + // CHECK: %cleanup.dest.saved = load i32, ptr %cleanup.dest.slot, align 4 + // CHECK: call void @x(i32 {{.*}} 42) + // CHECK: store i32 %cleanup.dest.saved, ptr %cleanup.dest.slot, align 4 + // CHECK: %cleanup.dest = load i32, ptr %cleanup.dest.slot, align 4 + // CHECK: switch i32 %cleanup.dest, label %unreachable [ + // CHECK: i32 2, label %b + // CHECK: ] + // CHECK: b: + // CHECK: ret void + // CHECK: unreachable: + // CHECK: unreachable + { + defer { x(42); } + goto b; + } + b: +} + + +// CHECK-LABEL: define {{.*}} void @ts_m() +void ts_m() { + // CHECK: entry: + // CHECK: br label %b + // CHECK: b: + // CHECK: call void @x(i32 {{.*}} 1) + // CHECK: ret void + goto b; + { + b: + defer x(1); + } +} + +// CHECK-LABEL: define {{.*}} void @ts_p() +void ts_p() { + // CHECK: entry: + // CHECK: br label %b + // CHECK: b: + // CHECK: ret void + { + goto b; + defer x(42); + } + b: +} + +// CHECK-LABEL: define {{.*}} void @ts_r() +void ts_r() { + // CHECK: entry: + // CHECK: br label %b + // CHECK: b: + // CHECK: call void @x(i32 {{.*}} 42) + // CHECK: br label %b + { + b: + defer x(42); + } + goto b; +} + +// CHECK-LABEL: define {{.*}} i32 @return_value() +int return_value() { + // CHECK: entry: + // CHECK: %r = alloca i32, align 4 + // CHECK: %p = alloca ptr, align 8 + // CHECK: store i32 4, ptr %r, align 4 + // CHECK: store ptr %r, ptr %p, align 8 + // CHECK: %0 = load ptr, ptr %p, align 8 + // CHECK: %1 = load i32, ptr %0, align 4 + // CHECK: %2 = load ptr, ptr %p, align 8 + // CHECK: store i32 5, ptr %2, align 4 + // CHECK: ret i32 %1 + int r = 4; + int* p = &r; + defer { *p = 5; } + return *p; +} + +void* malloc(__SIZE_TYPE__ size); +void free(void* ptr); +int use_buffer(__SIZE_TYPE__ size, void* ptr); + +// CHECK-LABEL: define {{.*}} i32 @malloc_free_example() +int malloc_free_example() { + // CHECK: entry: + // CHECK: %size = alloca i32, align 4 + // CHECK: %buf = alloca ptr, align 8 + // CHECK: store i32 20, ptr %size, align 4 + // CHECK: %call = call ptr @malloc(i64 {{.*}} 20) + // CHECK: store ptr %call, ptr %buf, align 8 + // CHECK: %0 = load ptr, ptr %buf, align 8 + // CHECK: %call1 = call i32 @use_buffer(i64 {{.*}} 20, ptr {{.*}} %0) + // CHECK: %1 = load ptr, ptr %buf, align 8 + // CHECK: call void @free(ptr {{.*}} %1) + // CHECK: ret i32 %call1 + const int size = 20; + void* buf = malloc(size); + defer { free(buf); } + return use_buffer(size, buf); +} + +// CHECK-LABEL: define {{.*}} void @sequencing_1() +void sequencing_1() { + // CHECK: entry: + // CHECK: call void @x(i32 {{.*}} 1) + // CHECK: call void @x(i32 {{.*}} 2) + // CHECK: call void @x(i32 {{.*}} 3) + // CHECK: ret void + { + defer { + x(3); + } + if (true) + defer x(1); + x(2); + } +} + +// CHECK-LABEL: define {{.*}} void @sequencing_2() +void sequencing_2() { + // CHECK: entry: + // CHECK: %arr = alloca [3 x i32], align 4 + // CHECK: %i = alloca i32, align 4 + // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 4 %arr, ptr align 4 @__const.sequencing_2.arr, i64 12, i1 false) + // CHECK: store i32 0, ptr %i, align 4 + // CHECK: br label %for.cond + // CHECK: for.cond: + // CHECK: %0 = load i32, ptr %i, align 4 + // CHECK: %cmp = icmp ult i32 %0, 3 + // CHECK: br i1 %cmp, label %for.body, label %for.end + // CHECK: for.body: + // CHECK: %1 = load i32, ptr %i, align 4 + // CHECK: %idxprom = zext i32 %1 to i64 + // CHECK: %arrayidx = getelementptr inbounds nuw [3 x i32], ptr %arr, i64 0, i64 %idxprom + // CHECK: %2 = load i32, ptr %arrayidx, align 4 + // CHECK: call void @x(i32 {{.*}} %2) + // CHECK: br label %for.inc + // CHECK: for.inc: + // CHECK: %3 = load i32, ptr %i, align 4 + // CHECK: %inc = add i32 %3, 1 + // CHECK: store i32 %inc, ptr %i, align 4 + // CHECK: br label %for.cond + // CHECK: for.end: + // CHECK: call void @x(i32 {{.*}} 4) + // CHECK: call void @x(i32 {{.*}} 5) + // CHECK: ret void + { + int arr[] = {1, 2, 3}; + defer { + x(5); + } + for (unsigned i = 0; i < 3; ++i) + defer x(arr[i]); + x(4); + } +} + +// CHECK-LABEL: define {{.*}} void @sequencing_3() +void sequencing_3() { + // CHECK: entry: + // CHECK: %r = alloca i32, align 4 + // CHECK: store i32 0, ptr %r, align 4 + // CHECK: %0 = load i32, ptr %r, align 4 + // CHECK: %add = add nsw i32 %0, 1 + // CHECK: store i32 %add, ptr %r, align 4 + // CHECK: %1 = load i32, ptr %r, align 4 + // CHECK: %mul = mul nsw i32 %1, 2 + // CHECK: store i32 %mul, ptr %r, align 4 + // CHECK: %2 = load i32, ptr %r, align 4 + // CHECK: %add1 = add nsw i32 %2, 3 + // CHECK: store i32 %add1, ptr %r, align 4 + // CHECK: %3 = load i32, ptr %r, align 4 + // CHECK: %mul2 = mul nsw i32 %3, 4 + // CHECK: store i32 %mul2, ptr %r, align 4 + // CHECK: ret void + int r = 0; + { + defer { + defer r *= 4; + r *= 2; + defer { + r += 3; + } + } + defer r += 1; + } +} + +// CHECK-LABEL: define {{.*}} void @defer_stmt(i32 {{.*}} %q) +void defer_stmt(int q) { + // CHECK: entry: + // CHECK: %q.addr = alloca i32, align 4 + // CHECK: store i32 %q, ptr %q.addr, align 4 + // CHECK: %0 = load i32, ptr %q.addr, align 4 + // CHECK: %cmp = icmp eq i32 %0, 3 + // CHECK: br i1 %cmp, label %if.then, label %if.end + // CHECK: if.then: + // CHECK: call void @x(i32 {{.*}} 42) + // CHECK: br label %if.end + // CHECK: if.end: + // CHECK: ret void + defer if (q == 3) x(42); +} + +// CHECK-LABEL: define {{.*}} void @defer_defer() +void defer_defer() { + // CHECK: entry: + // CHECK: call void @x(i32 {{.*}} 0) + // CHECK: call void @x(i32 {{.*}} 1) + // CHECK: call void @x(i32 {{.*}} 2) + // CHECK: call void @x(i32 {{.*}} 3) + // CHECK: call void @x(i32 {{.*}} 4) + // CHECK: ret void + defer x(4); + defer defer x(3); + defer defer defer x(2); + defer defer defer defer x(1); + x(0); +} + +// CHECK-LABEL: define {{.*}} i32 @vla(ptr {{.*}} %p, i32 {{.*}} %x) +int vla(int* p, int x) { + // CHECK: entry: + // CHECK: %retval = alloca i32, align 4 + // CHECK: %p.addr = alloca ptr, align 8 + // CHECK: %x.addr = alloca i32, align 4 + // CHECK: %cleanup.dest.slot = alloca i32, align 4 + // CHECK: %saved_stack = alloca ptr, align 8 + // CHECK: %__vla_expr0 = alloca i64, align 8 + // CHECK: %saved_stack3 = alloca ptr, align 8 + // CHECK: %__vla_expr1 = alloca i64, align 8 + // CHECK: store ptr %p, ptr %p.addr, align 8 + // CHECK: store i32 %x, ptr %x.addr, align 4 + // CHECK: %0 = load i32, ptr %x.addr, align 4 + // CHECK: %cmp = icmp slt i32 %0, 5 + // CHECK: br i1 %cmp, label %if.then, label %if.end + // CHECK: if.then: + // CHECK: store i32 10, ptr %retval, align 4 + // CHECK: store i32 1, ptr %cleanup.dest.slot, align 4 + // CHECK: br label %cleanup + // CHECK: if.end: + // CHECK: store i32 7, ptr %retval, align 4 + // CHECK: store i32 1, ptr %cleanup.dest.slot, align 4 + // CHECK: %cleanup.dest.saved = load i32, ptr %cleanup.dest.slot, align 4 + // CHECK: %1 = load i32, ptr %x.addr, align 4 + // CHECK: %2 = zext i32 %1 to i64 + // CHECK: %3 = call ptr @llvm.stacksave.p0() + // CHECK: store ptr %3, ptr %saved_stack, align 8 + // CHECK: %vla = alloca i32, i64 %2, align 16 + // CHECK: store i64 %2, ptr %__vla_expr0, align 8 + // CHECK: %arrayidx = getelementptr inbounds i32, ptr %vla, i64 2 + // CHECK: store i32 4, ptr %arrayidx, align 8 + // CHECK: %arrayidx1 = getelementptr inbounds i32, ptr %vla, i64 2 + // CHECK: %4 = load i32, ptr %arrayidx1, align 8 + // CHECK: %5 = load ptr, ptr %p.addr, align 8 + // CHECK: store i32 %4, ptr %5, align 4 + // CHECK: %6 = load ptr, ptr %saved_stack, align 8 + // CHECK: call void @llvm.stackrestore.p0(ptr %6) + // CHECK: store i32 %cleanup.dest.saved, ptr %cleanup.dest.slot, align 4 + // CHECK: br label %cleanup + // CHECK: cleanup: + // CHECK: %cleanup.dest.saved2 = load i32, ptr %cleanup.dest.slot, align 4 + // CHECK: %7 = load i32, ptr %x.addr, align 4 + // CHECK: %8 = zext i32 %7 to i64 + // CHECK: %9 = call ptr @llvm.stacksave.p0() + // CHECK: store ptr %9, ptr %saved_stack3, align 8 + // CHECK: %vla4 = alloca i32, i64 %8, align 16 + // CHECK: store i64 %8, ptr %__vla_expr1, align 8 + // CHECK: %arrayidx5 = getelementptr inbounds i32, ptr %vla4, i64 2 + // CHECK: store i32 3, ptr %arrayidx5, align 8 + // CHECK: %arrayidx6 = getelementptr inbounds i32, ptr %vla4, i64 2 + // CHECK: %10 = load i32, ptr %arrayidx6, align 8 + // CHECK: %11 = load ptr, ptr %p.addr, align 8 + // CHECK: store i32 %10, ptr %11, align 4 + // CHECK: %12 = load ptr, ptr %saved_stack3, align 8 + // CHECK: call void @llvm.stackrestore.p0(ptr %12) + // CHECK: store i32 %cleanup.dest.saved2, ptr %cleanup.dest.slot, align 4 + // CHECK: %13 = load i32, ptr %retval, align 4 + // CHECK: ret i32 %13 + defer { + int a[x]; + a[2] = 3; + *p = a[2]; + } + if (x < 5) { return 10; } + defer { + int b[x]; + b[2] = 4; + *p = b[2]; + } + return 7; +} + +[[noreturn]] void exit(); +[[noreturn]] void _Exit(); +[[noreturn]] void foobar(); + +// CHECK-LABEL: define {{.*}} i32 @call_exit() +int call_exit() { + // CHECK: entry: + // CHECK: call void @exit() + // CHECK: unreachable + defer x(1); + exit(); +} + +// CHECK-LABEL: define {{.*}} i32 @call__Exit() +int call__Exit() { + // CHECK: entry: + // CHECK: call void @_Exit() + // CHECK: unreachable + defer x(1); + _Exit(); +} + +// CHECK-LABEL: define {{.*}} i32 @call_foobar() +int call_foobar() { + // CHECK: entry: + // CHECK: call void @foobar() + // CHECK: unreachable + defer x(1); + foobar(); +} + +// CHECK-LABEL: define {{.*}} i32 @main() +int main() { + // CHECK: entry: + // CHECK: %retval = alloca i32, align 4 + // CHECK: store i32 0, ptr %retval, align 4 + // CHECK: store i32 5, ptr %retval, align 4 + // CHECK: call void @x(i32 {{.*}} 42) + // CHECK: %0 = load i32, ptr %retval, align 4 + // CHECK: ret i32 %0 + defer x(42); + return 5; +} + +// CHECK-LABEL: define {{.*}} void @t() +// CHECK: entry: +// CHECK-NEXT: %count = alloca i32, align 4 +// CHECK-NEXT: %cleanup.dest.slot = alloca i32, align 4 +// CHECK-NEXT: store i32 0, ptr %count, align 4 +// CHECK-NEXT: br label %target +// CHECK: target: +// CHECK-NEXT: %0 = load i32, ptr %count, align 4 +// CHECK-NEXT: %inc = add nsw i32 %0, 1 +// CHECK-NEXT: store i32 %inc, ptr %count, align 4 +// CHECK-NEXT: %1 = load i32, ptr %count, align 4 +// CHECK-NEXT: %cmp = icmp sle i32 %1, 2 +// CHECK-NEXT: br i1 %cmp, label %if.then, label %if.end +// CHECK: if.then: +// CHECK-NEXT: store i32 2, ptr %cleanup.dest.slot, align 4 +// CHECK-NEXT: br label %cleanup +// CHECK: if.end: +// CHECK-NEXT: store i32 0, ptr %cleanup.dest.slot, align 4 +// CHECK-NEXT: br label %cleanup +// CHECK: cleanup: +// CHECK-NEXT: %cleanup.dest.saved = load i32, ptr %cleanup.dest.slot, align 4 +// CHECK-NEXT: call void @x(i32 {{.*}} 1) +// CHECK-NEXT: store i32 %cleanup.dest.saved, ptr %cleanup.dest.slot, align 4 +// CHECK-NEXT: %cleanup.dest = load i32, ptr %cleanup.dest.slot, align 4 +// CHECK-NEXT: switch i32 %cleanup.dest, label %unreachable [ +// CHECK-NEXT: i32 0, label %cleanup.cont +// CHECK-NEXT: i32 2, label %target +// CHECK-NEXT: ] +// CHECK: cleanup.cont: +// CHECK-NEXT: call void @x(i32 {{.*}} 2) +// CHECK-NEXT: ret void +// CHECK: unreachable: +// CHECK-NEXT: unreachable +void t() { + int count = 0; + + { + target: + _Defer { x(1); } + ++count; + if (count <= 2) { + goto target; + } + } + + x(2); +} + +// CHECK-LABEL: define {{.*}} void @stmt_expr() +// CHECK: entry: +// CHECK-NEXT: %tmp = alloca i32, align 4 +// CHECK-NEXT: call void @x(i32 {{.*}} 1) +// CHECK-NEXT: call void @x(i32 {{.*}} 2) +// CHECK-NEXT: call void @x(i32 {{.*}} 3) +// CHECK-NEXT: call void @x(i32 {{.*}} 4) +// CHECK-NEXT: store i32 6, ptr %tmp, align 4 +// CHECK-NEXT: call void @x(i32 {{.*}} 5) +// CHECK-NEXT: %0 = load i32, ptr %tmp, align 4 +// CHECK-NEXT: call void @x(i32 {{.*}} %0) +// CHECK-NEXT: ret void +void stmt_expr() { + ({ + _Defer x(4); + _Defer ({ + _Defer x(3); + x(2); + }); + x(1); + }); + + x(({ + _Defer x(5); + 6; + })); +} + +// CHECK-LABEL: define {{.*}} void @cleanup_no_insert_point() +// CHECK: entry: +// CHECK-NEXT: %cleanup.dest.slot = alloca i32, align 4 +// CHECK-NEXT: br label %while.cond +// CHECK: while.cond: +// CHECK-NEXT: %call = call {{.*}} i1 @q(i32 {{.*}} 1) +// CHECK-NEXT: br i1 %call, label %while.body, label %while.end +// CHECK: while.body: +// CHECK-NEXT: %call1 = call {{.*}} i1 @q(i32 {{.*}} 2) +// CHECK-NEXT: br i1 %call1, label %if.then, label %if.end +// CHECK: if.then: +// CHECK-NEXT: store i32 2, ptr %cleanup.dest.slot, align 4 +// CHECK-NEXT: br label %cleanup +// CHECK: if.end: +// CHECK-NEXT: %call2 = call {{.*}} i1 @q(i32 {{.*}} 3) +// CHECK-NEXT: br i1 %call2, label %if.then3, label %if.end4 +// CHECK: if.then3: +// CHECK-NEXT: store i32 3, ptr %cleanup.dest.slot, align 4 +// CHECK-NEXT: br label %cleanup +// CHECK: if.end4: +// CHECK-NEXT: store i32 0, ptr %cleanup.dest.slot, align 4 +// CHECK-NEXT: br label %cleanup +// CHECK: cleanup: +// CHECK-NEXT: %cleanup.dest.saved = load i32, ptr %cleanup.dest.slot, align 4 +// CHECK-NEXT: call void @noreturn() +// CHECK-NEXT: unreachable +// CHECK: 0: +// CHECK-NEXT: %cleanup.dest = load i32, ptr %cleanup.dest.slot, align 4 +// CHECK-NEXT: switch i32 %cleanup.dest, label %unreachable [ +// CHECK-NEXT: i32 0, label %cleanup.cont +// CHECK-NEXT: i32 2, label %while.cond +// CHECK-NEXT: i32 3, label %while.end +// CHECK-NEXT: ] +// CHECK: cleanup.cont: +// CHECK-NEXT: br label %while.cond +// CHECK: while.end: +// CHECK-NEXT: ret void +// CHECK: unreachable: +// CHECK-NEXT: unreachable +void cleanup_no_insert_point() { + while (q(1)) { + _Defer { + noreturn(); + }; + if (q(2)) continue; + if (q(3)) break; + } +} + +// CHECK-LABEL: define {{.*}} void @cleanup_nested() +// CHECK: entry: +// CHECK-NEXT: %cleanup.dest.slot = alloca i32, align 4 +// CHECK-NEXT: br label %while.cond +// CHECK: while.cond: +// CHECK-NEXT: %call = call {{.*}} i1 @q(i32 {{.*}} 1) +// CHECK-NEXT: br i1 %call, label %while.body, label %while.end19 +// CHECK: while.body: +// CHECK-NEXT: %call1 = call {{.*}} i1 @q(i32 {{.*}} 6) +// CHECK-NEXT: br i1 %call1, label %if.then, label %if.end +// CHECK: if.then: +// CHECK-NEXT: store i32 2, ptr %cleanup.dest.slot, align 4 +// CHECK-NEXT: br label %cleanup +// CHECK: if.end: +// CHECK-NEXT: %call2 = call {{.*}} i1 @q(i32 {{.*}} 7) +// CHECK-NEXT: br i1 %call2, label %if.then3, label %if.end4 +// CHECK: if.then3: +// CHECK-NEXT: store i32 3, ptr %cleanup.dest.slot, align 4 +// CHECK-NEXT: br label %cleanup +// CHECK: if.end4: +// CHECK-NEXT: store i32 0, ptr %cleanup.dest.slot, align 4 +// CHECK-NEXT: br label %cleanup +// CHECK: cleanup: +// CHECK-NEXT: %cleanup.dest.saved = load i32, ptr %cleanup.dest.slot, align 4 +// CHECK-NEXT: br label %while.cond5 +// CHECK: while.cond5: +// CHECK-NEXT: %call6 = call {{.*}} i1 @q(i32 {{.*}} 2) +// CHECK-NEXT: br i1 %call6, label %while.body7, label %while.end +// CHECK: while.body7: +// CHECK-NEXT: %call8 = call {{.*}} i1 @q(i32 {{.*}} 4) +// CHECK-NEXT: br i1 %call8, label %if.then9, label %if.end10 +// CHECK: if.then9: +// CHECK-NEXT: store i32 4, ptr %cleanup.dest.slot, align 4 +// CHECK-NEXT: br label %cleanup14 +// CHECK: if.end10: +// CHECK-NEXT: %call11 = call {{.*}} i1 @q(i32 {{.*}} 5) +// CHECK-NEXT: br i1 %call11, label %if.then12, label %if.end13 +// CHECK: if.then12: +// CHECK-NEXT: store i32 5, ptr %cleanup.dest.slot, align 4 +// CHECK-NEXT: br label %cleanup14 +// CHECK: if.end13: +// CHECK-NEXT: store i32 0, ptr %cleanup.dest.slot, align 4 +// CHECK-NEXT: br label %cleanup14 +// CHECK: cleanup14: +// CHECK-NEXT: %cleanup.dest.saved15 = load i32, ptr %cleanup.dest.slot, align 4 +// CHECK-NEXT: %call16 = call {{.*}} i1 @q(i32 {{.*}} 3) +// CHECK-NEXT: store i32 %cleanup.dest.saved15, ptr %cleanup.dest.slot, align 4 +// CHECK-NEXT: %cleanup.dest = load i32, ptr %cleanup.dest.slot, align 4 +// CHECK-NEXT: switch i32 %cleanup.dest, label %unreachable [ +// CHECK-NEXT: i32 0, label %cleanup.cont +// CHECK-NEXT: i32 4, label %while.cond5 +// CHECK-NEXT: i32 5, label %while.end +// CHECK-NEXT: ] +// CHECK: cleanup.cont: +// CHECK-NEXT: br label %while.cond5 +// CHECK: while.end: +// CHECK-NEXT: store i32 %cleanup.dest.saved, ptr %cleanup.dest.slot, align 4 +// CHECK-NEXT: %cleanup.dest17 = load i32, ptr %cleanup.dest.slot, align 4 +// CHECK-NEXT: switch i32 %cleanup.dest17, label %unreachable [ +// CHECK-NEXT: i32 0, label %cleanup.cont18 +// CHECK-NEXT: i32 2, label %while.cond +// CHECK-NEXT: i32 3, label %while.end19 +// CHECK-NEXT: ] +// CHECK: cleanup.cont18: +// CHECK-NEXT: br label %while.cond +// CHECK: while.end19: +// CHECK-NEXT: ret void +// CHECK: unreachable: +// CHECK-NEXT: unreachable +void cleanup_nested() { + while (q(1)) { + _Defer { + while (q(2)) { + _Defer { + q(3); + } + if (q(4)) continue; + if (q(5)) break; + } + }; + if (q(6)) continue; + if (q(7)) break; + } +} diff --git a/clang/test/Lexer/defer-keyword.cpp b/clang/test/Lexer/defer-keyword.cpp new file mode 100644 index 0000000000000..929f2c58f974a --- /dev/null +++ b/clang/test/Lexer/defer-keyword.cpp @@ -0,0 +1,5 @@ +// RUN: %clang_cc1 -fsyntax-only -verify %s +// RUN: %clang_cc1 -fsyntax-only -verify -fdefer-ts %s + +// expected-no-diagnostics +int _Defer; diff --git a/clang/test/Parser/defer-ts.c b/clang/test/Parser/defer-ts.c new file mode 100644 index 0000000000000..118fe9ee3cc8f --- /dev/null +++ b/clang/test/Parser/defer-ts.c @@ -0,0 +1,58 @@ +// RUN: %clang_cc1 -std=c11 -fsyntax-only -fdefer-ts -verify %s +// RUN: %clang_cc1 -std=c23 -fsyntax-only -fdefer-ts -verify %s + +#define defer _Defer + +int g(void); +int h(int x); + +void f1(void) { + defer 1; // expected-warning {{expression result unused}} + defer 1 + 1; // expected-warning {{expression result unused}} + defer "a"; // expected-warning {{expression result unused}} + defer "a" "b" "c"; // expected-warning {{expression result unused}} + defer defer 1; // expected-warning {{expression result unused}} + defer defer defer defer 1; // expected-warning {{expression result unused}} + defer (int) 4; // expected-warning {{expression result unused}} + defer g(); + + defer {} + defer { defer {} } + defer { defer {} defer {} } + + defer if (g()) g(); + defer while (g()) g(); + defer for (int i = 0; i < 10; i++) h(i); + defer switch (g()) { case 1: g(); } + + defer; // expected-warning {{defer statement has empty body}} expected-note {{put the semicolon on a separate line}} + defer + ; + + defer a: g(); // expected-error {{substatement of defer must not be a label}} + defer b: {} // expected-error {{substatement of defer must not be a label}} + defer { c: g(); } + + if (g()) defer g(); + while (g()) defer g(); + defer ({}); + ({ defer g(); }); + + defer int x; // expected-error {{expected expression}} + defer void q() {} // expected-error {{expected expression}} +} + +void f2(void) { + [[some, attributes]] defer g(); // expected-warning 2 {{unknown attribute}} + __attribute__((some_attribute)) defer g(); // expected-warning {{unknown attribute}} + [[some, attributes]] defer { g(); } // expected-warning 2 {{unknown attribute}} + __attribute__((some_attribute)) defer { g(); } // expected-warning {{unknown attribute}} +} + +void f3(void) { + _Defer 1; // expected-warning {{expression result unused}} + _Defer {} + _Defer _Defer {} + _Defer { defer {} _Defer {} } + _Defer if (g()) g(); +} diff --git a/clang/test/Parser/defer-ts.cpp b/clang/test/Parser/defer-ts.cpp new file mode 100644 index 0000000000000..fa25cac8575f6 --- /dev/null +++ b/clang/test/Parser/defer-ts.cpp @@ -0,0 +1,5 @@ +// RUN: %clang_cc1 -std=c++20 -fsyntax-only -fdefer-ts -verify %s + +void f() { + _Defer {} // expected-error {{use of undeclared identifier '_Defer'}} +} diff --git a/clang/test/Preprocessor/defer-ts.c b/clang/test/Preprocessor/defer-ts.c new file mode 100644 index 0000000000000..e4995ac9b23ea --- /dev/null +++ b/clang/test/Preprocessor/defer-ts.c @@ -0,0 +1,9 @@ +// RUN: %clang_cc1 -fsyntax-only -fdefer-ts -verify=enabled %s +// RUN: %clang_cc1 -fsyntax-only -verify=disabled %s +// RUN: %clang_cc1 -x c++ -fsyntax-only -fdefer-ts -verify=disabled %s +// RUN: %clang_cc1 -x c++ -fsyntax-only -verify=disabled %s +// enabled-no-diagnostics +#if __STDC_DEFER_TS25755__ != 1 +// disabled-error@+1 {{Should have defined __STDC_DEFER_TS25755__ to 1}} +# error Should have defined __STDC_DEFER_TS25755__ to 1 +#endif diff --git a/clang/test/Sema/defer-ts-seh.c b/clang/test/Sema/defer-ts-seh.c new file mode 100644 index 0000000000000..4b773ed3f09a0 --- /dev/null +++ b/clang/test/Sema/defer-ts-seh.c @@ -0,0 +1,17 @@ +// RUN: %clang_cc1 -std=c23 -fdefer-ts -fms-compatibility -triple x86_64-windows-msvc -fsyntax-only -verify %s + +void f() { + __try { + _Defer { + __leave; // expected-error {{cannot __leave a defer statement}} + } + } __finally {} + + __try { + _Defer { + __try { + __leave; + } __finally {} + } + } __finally {} +} diff --git a/clang/test/Sema/defer-ts-sjlj.c b/clang/test/Sema/defer-ts-sjlj.c new file mode 100644 index 0000000000000..49230fa721e0f --- /dev/null +++ b/clang/test/Sema/defer-ts-sjlj.c @@ -0,0 +1,52 @@ +// RUN: %clang_cc1 -triple x86_64-windows-msvc -std=gnu23 -fdefer-ts -fsyntax-only -fblocks -verify %s + +typedef void** jmp_buf; +typedef void** sigjmp_buf; + +int setjmp(jmp_buf env); +int _setjmp(jmp_buf env); +int sigsetjmp(sigjmp_buf env, int savesigs); +int __sigsetjmp(sigjmp_buf env, int savesigs); +void longjmp(jmp_buf env, int val); +void _longjmp(jmp_buf env, int val); +void siglongjmp(sigjmp_buf env, int val); + +jmp_buf x; +sigjmp_buf y; +void f() { + _Defer { + __builtin_setjmp(x); // expected-error {{cannot use '__builtin_setjmp' inside a defer statement}} + __builtin_longjmp(x, 1); // expected-error {{cannot use '__builtin_longjmp' inside a defer statement}} + setjmp(x); // expected-error {{cannot use 'setjmp' inside a defer statement}} + _setjmp(x); // expected-error {{cannot use '_setjmp' inside a defer statement}} + sigsetjmp(y, 0); // expected-error {{cannot use 'sigsetjmp' inside a defer statement}} + __sigsetjmp(y, 0); // expected-error {{cannot use '__sigsetjmp' inside a defer statement}} + longjmp(x, 0); // expected-error {{cannot use 'longjmp' inside a defer statement}} + _longjmp(x, 0); // expected-error {{cannot use '_longjmp' inside a defer statement}} + siglongjmp(y, 0); // expected-error {{cannot use 'siglongjmp' inside a defer statement}} + + (void) ^{ + __builtin_setjmp(x); + __builtin_longjmp(x, 1); + setjmp(x); + _setjmp(x); + sigsetjmp(y, 0); + __sigsetjmp(y, 0); + longjmp(x, 0); + _longjmp(x, 0); + siglongjmp(y, 0); + + _Defer { + __builtin_setjmp(x); // expected-error {{cannot use '__builtin_setjmp' inside a defer statement}} + __builtin_longjmp(x, 1); // expected-error {{cannot use '__builtin_longjmp' inside a defer statement}} + setjmp(x); // expected-error {{cannot use 'setjmp' inside a defer statement}} + _setjmp(x); // expected-error {{cannot use '_setjmp' inside a defer statement}} + sigsetjmp(y, 0); // expected-error {{cannot use 'sigsetjmp' inside a defer statement}} + __sigsetjmp(y, 0); // expected-error {{cannot use '__sigsetjmp' inside a defer statement}} + longjmp(x, 0); // expected-error {{cannot use 'longjmp' inside a defer statement}} + _longjmp(x, 0); // expected-error {{cannot use '_longjmp' inside a defer statement}} + siglongjmp(y, 0); // expected-error {{cannot use 'siglongjmp' inside a defer statement}} + } + }; + } +} diff --git a/clang/test/Sema/defer-ts.c b/clang/test/Sema/defer-ts.c new file mode 100644 index 0000000000000..95c68fa213eaa --- /dev/null +++ b/clang/test/Sema/defer-ts.c @@ -0,0 +1,172 @@ +// RUN: %clang_cc1 -std=c23 -fdefer-ts -fsyntax-only -verify %s + +#define defer _Defer + +void a(); + +void f1() { + defer { + goto l1; + l1: + } + + defer { + l2: + goto l2; + } +} + +void f2() { + goto l1; // expected-error {{cannot jump from this goto statement to its label}} + defer { // expected-note {{jump enters a defer statement}} + l1: + } + + goto l2; // expected-error {{cannot jump from this goto statement to its label}} + defer {} // expected-note {{jump bypasses defer statement}} + l2: +} + +void f3() { + x: + defer { // expected-note {{jump exits a defer statement}} + goto x; // expected-error {{cannot jump from this goto statement to its label}} + } +} + +void f4() { + defer { // expected-note {{jump exits a defer statement}} + goto y; // expected-error {{cannot jump from this goto statement to its label}} + } + y: +} + +void f5() { + defer { // expected-note {{jump enters a defer statement}} + l2: + } + goto l2; // expected-error {{cannot jump from this goto statement to its label}} +} + +void f6() { + goto b; // expected-error {{cannot jump from this goto statement to its label}} + { + defer {} // expected-note {{jump bypasses defer statement}} + b: + } + + { + defer {} // expected-note {{jump bypasses defer statement}} + b2: + } + goto b2; // expected-error {{cannot jump from this goto statement to its label}} +} + +void f7() { + defer { // expected-note {{jump bypasses defer statement}} + goto cross1; // expected-error {{cannot jump from this goto statement to its label}} + cross2: + } + defer { // expected-note {{jump exits a defer statement}} expected-note {{jump enters a defer statement}} + goto cross2; // expected-error {{cannot jump from this goto statement to its label}} + cross1: + } +} + +void f8() { + defer { + return; // expected-error {{cannot return from a defer statement}} + } + + { + defer { + return; // expected-error {{cannot return from a defer statement}} + } + } + + switch (1) { + case 1: defer { + break; // expected-error {{cannot break out of a defer statement}} + } + } + + for (;;) { + defer { + break; // expected-error {{cannot break out of a defer statement}} + } + } + + for (;;) { + defer { + continue; // expected-error {{cannot continue loop outside of enclosing defer statement}} + } + } + + switch (1) { + defer {} // expected-note {{jump bypasses defer statement}} + default: // expected-error {{cannot jump from switch statement to this case label}} + defer {} + break; + } + + switch (1) { + case 1: { + defer { // expected-note {{jump enters a defer statement}} + case 2: {} // expected-error {{cannot jump from switch statement to this case label}} + } + } + } + + switch (1) { + case 1: defer { + switch (2) { case 2: break; } + } + } + + for (;;) { + defer { for (;;) break; } + } + + for (;;) { + defer { for (;;) continue; } + } +} + +void f9() { + { + defer {} + goto l1; + } + l1: + + { + goto l2; + defer {} + } + l2: + + { + { defer {} } + goto l3; + } + l3: + + { + defer {} + { goto l4; } + } + l4: +} + +void f10(int i) { + switch (i) { + defer case 12: break; // expected-error {{cannot break out of a defer statement}} \ + expected-error {{cannot jump from switch statement to this case label}} \ + expected-note {{jump enters a defer statement}} \ + expected-note {{jump bypasses defer statement}} + + defer default: break; // expected-error {{cannot break out of a defer statement}} \ + expected-error {{cannot jump from switch statement to this case label}} \ + expected-note {{jump enters a defer statement}} + } +} diff --git a/clang/test/Sema/warn-lifetime-safety-suggestions.cpp b/clang/test/Sema/warn-lifetime-safety-suggestions.cpp index c0f675a301d14..9f3ccb7fca770 100644 --- a/clang/test/Sema/warn-lifetime-safety-suggestions.cpp +++ b/clang/test/Sema/warn-lifetime-safety-suggestions.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsyntax-only -fexperimental-lifetime-safety -Wexperimental-lifetime-safety-suggestions -verify %s +// RUN: %clang_cc1 -fsyntax-only -fexperimental-lifetime-safety -fexperimental-lifetime-safety-inference -Wexperimental-lifetime-safety-suggestions -Wexperimental-lifetime-safety -verify %s struct MyObj { int id; @@ -89,6 +89,98 @@ void test_getView_on_temporary() { (void)sv; } +//===----------------------------------------------------------------------===// +// Annotation Inference Test Cases +//===----------------------------------------------------------------------===// + +namespace correct_order_inference { +View return_view_by_func (View a) { // expected-warning {{param should be marked [[clang::lifetimebound]]}}. + return return_view_directly(a); // expected-note {{param returned here}} +} + +MyObj* return_pointer_by_func (MyObj* a) { // expected-warning {{param should be marked [[clang::lifetimebound]]}}. + return return_pointer_object(a); // expected-note {{param returned here}} +} +} // namespace correct_order_inference + +namespace incorrect_order_inference_view { +View return_view_callee(View a); + +// FIXME: No lifetime annotation suggestion when functions are not present in the callee-before-caller pattern +View return_view_caller(View a) { + return return_view_callee(a); +} + +View return_view_callee(View a) { // expected-warning {{param should be marked [[clang::lifetimebound]]}}. + return a; // expected-note {{param returned here}} +} +} // namespace incorrect_order_inference_view + +namespace incorrect_order_inference_object { +MyObj* return_object_callee(MyObj* a); + +// FIXME: No lifetime annotation suggestion warning when functions are not present in the callee-before-caller pattern +MyObj* return_object_caller(MyObj* a) { + return return_object_callee(a); +} + +MyObj* return_object_callee(MyObj* a) { // expected-warning {{param should be marked [[clang::lifetimebound]]}}. + return a; // expected-note {{param returned here}} +} +} // namespace incorrect_order_inference_object + +namespace simple_annotation_inference { +View inference_callee_return_identity(View a) { // expected-warning {{param should be marked [[clang::lifetimebound]]}}. + return a; // expected-note {{param returned here}} +} + +View inference_caller_forwards_callee(View a) { // expected-warning {{param should be marked [[clang::lifetimebound]]}}. + return inference_callee_return_identity(a); // expected-note {{param returned here}} +} + +View inference_top_level_return_stack_view() { + MyObj local_stack; + return inference_caller_forwards_callee(local_stack); // expected-warning {{address of stack memory is returned later}} + // expected-note@-1 {{returned here}} +} +} // namespace simple_annotation_inference + +namespace inference_in_order_with_redecls { +View inference_callee_return_identity(View a); +View inference_callee_return_identity(View a) { // expected-warning {{param should be marked [[clang::lifetimebound]]}}. + return a; // expected-note {{param returned here}} +} + +View inference_caller_forwards_callee(View a); +View inference_caller_forwards_callee(View a) { // expected-warning {{param should be marked [[clang::lifetimebound]]}}. + return inference_callee_return_identity(a); // expected-note {{param returned here}} +} + +View inference_top_level_return_stack_view() { + MyObj local_stack; + return inference_caller_forwards_callee(local_stack); // expected-warning {{address of stack memory is returned later}} + // expected-note@-1 {{returned here}} +} +} // namespace inference_in_order_with_redecls + +namespace inference_with_templates { +template +T* template_identity(T* a) { // expected-warning {{param should be marked [[clang::lifetimebound]]}}. + return a; // expected-note {{param returned here}} +} + +template +T* template_caller(T* a) { + return template_identity(a); // expected-note {{in instantiation of function template specialization 'inference_with_templates::template_identity' requested here}} +} + +// FIXME: Fails to detect UAR as template instantiations are deferred to the end of the Translation Unit. +MyObj* test_template_inference_with_stack() { + MyObj local_stack; + return template_caller(&local_stack); // expected-note {{in instantiation of function template specialization 'inference_with_templates::template_caller' requested here}} +} +} // namespace inference_with_templates + //===----------------------------------------------------------------------===// // Negative Test Cases //===----------------------------------------------------------------------===// diff --git a/clang/tools/libclang/CXCursor.cpp b/clang/tools/libclang/CXCursor.cpp index 0a43d73063c1f..c49ca567049c7 100644 --- a/clang/tools/libclang/CXCursor.cpp +++ b/clang/tools/libclang/CXCursor.cpp @@ -224,6 +224,11 @@ CXCursor cxcursor::MakeCXCursor(const Stmt *S, const Decl *Parent, K = CXCursor_ReturnStmt; break; + // Not exposed for now because '_Defer' is currently just a TS. + case Stmt::DeferStmtClass: + K = CXCursor_UnexposedStmt; + break; + case Stmt::GCCAsmStmtClass: K = CXCursor_GCCAsmStmt; break; diff --git a/clang/unittests/Tooling/ToolingTest.cpp b/clang/unittests/Tooling/ToolingTest.cpp index 25e1d67eb2294..9a7559405c43c 100644 --- a/clang/unittests/Tooling/ToolingTest.cpp +++ b/clang/unittests/Tooling/ToolingTest.cpp @@ -20,8 +20,10 @@ #include "clang/Testing/CommandLineArgs.h" #include "clang/Tooling/ArgumentsAdjusters.h" #include "clang/Tooling/CompilationDatabase.h" +#include "clang/Tooling/JSONCompilationDatabase.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Support/JSON.h" #include "llvm/Support/Path.h" #include "llvm/Support/TargetSelect.h" #include "llvm/TargetParser/Host.h" @@ -1034,5 +1036,136 @@ TEST(runToolOnCode, TestResetDiagnostics) { "void func() { long x; Foo f(x); }")); } +namespace { +struct TestCommand { + llvm::StringRef File; + llvm::StringRef Command; +}; + +std::string runToolWithProgress(llvm::ArrayRef Commands, + llvm::StringRef BaseDir) { + std::string ErrorMessage; + + llvm::json::Array Entries; + for (const auto &Cmd : Commands) { + Entries.push_back(llvm::json::Object{ + {"directory", BaseDir}, {"command", Cmd.Command}, {"file", Cmd.File}}); + } + std::string DatabaseContent; + llvm::raw_string_ostream OS(DatabaseContent); + OS << llvm::json::Value(std::move(Entries)); + + std::unique_ptr Database( + JSONCompilationDatabase::loadFromBuffer(DatabaseContent, ErrorMessage, + JSONCommandLineSyntax::Gnu)); + if (!Database) { + ADD_FAILURE() << "Failed to load compilation database: " << ErrorMessage; + return ""; + } + + std::vector AbsoluteFiles; + for (const auto &Cmd : Commands) { + SmallString<32> NativeFile(BaseDir); + llvm::sys::path::append(NativeFile, Cmd.File); + llvm::sys::path::native(NativeFile); + std::string AbsPath = std::string(NativeFile); + if (AbsoluteFiles.empty() || AbsoluteFiles.back() != AbsPath) { + AbsoluteFiles.push_back(AbsPath); + } + } + + ClangTool Tool(*Database, AbsoluteFiles); + for (const auto &F : AbsoluteFiles) { + Tool.mapVirtualFile(F, "int x;"); + } + + testing::internal::CaptureStderr(); + Tool.run(newFrontendActionFactory().get()); + return testing::internal::GetCapturedStderr(); +} +} // namespace + +TEST(ClangToolTest, ProgressReportSingleFile) { + SmallString<32> BaseDir; + llvm::sys::path::system_temp_directory(false, BaseDir); + llvm::sys::path::native(BaseDir, llvm::sys::path::Style::posix); + + EXPECT_TRUE( + runToolWithProgress({{"test.cpp", "clang++ -c test.cpp"}}, BaseDir) + .empty()); +} + +TEST(ClangToolTest, ProgressReportMultipleFiles) { + SmallString<32> BaseDir; + llvm::sys::path::system_temp_directory(false, BaseDir); + llvm::sys::path::native(BaseDir, llvm::sys::path::Style::posix); + + std::string Output = + runToolWithProgress({{"test1.cpp", "clang++ -c test1.cpp"}, + {"test2.cpp", "clang++ -c test2.cpp"}}, + BaseDir); + + SmallString<32> NativeFile1(BaseDir); + llvm::sys::path::append(NativeFile1, "test1.cpp"); + llvm::sys::path::native(NativeFile1); + SmallString<32> NativeFile2(BaseDir); + llvm::sys::path::append(NativeFile2, "test2.cpp"); + llvm::sys::path::native(NativeFile2); + + std::string Expected = "[1/2] Processing file " + std::string(NativeFile1) + + ".\n" + "[2/2] Processing file " + + std::string(NativeFile2) + ".\n"; + EXPECT_EQ(Output, Expected); +} + +TEST(ClangToolTest, ProgressReportMultipleCommands) { + SmallString<32> BaseDir; + llvm::sys::path::system_temp_directory(false, BaseDir); + llvm::sys::path::native(BaseDir, llvm::sys::path::Style::posix); + + std::string Output = + runToolWithProgress({{"test.cpp", "clang++ -c test.cpp -DCMD1"}, + {"test.cpp", "clang++ -c test.cpp -DCMD2"}}, + BaseDir); + + SmallString<32> NativeFile(BaseDir); + llvm::sys::path::append(NativeFile, "test.cpp"); + llvm::sys::path::native(NativeFile); + std::string Expected = + "[1/1] (1/2) Processing file " + std::string(NativeFile) + ".\n" + + "[1/1] (2/2) Processing file " + std::string(NativeFile) + ".\n"; + EXPECT_EQ(Output, Expected); +} + +TEST(ClangToolTest, ProgressReportMixed) { + SmallString<32> BaseDir; + llvm::sys::path::system_temp_directory(false, BaseDir); + llvm::sys::path::native(BaseDir, llvm::sys::path::Style::posix); + + std::string Output = + runToolWithProgress({{"test1.cpp", "clang++ -c test1.cpp"}, + {"test2.cpp", "clang++ -c test2.cpp -DCMD1"}, + {"test2.cpp", "clang++ -c test2.cpp -DCMD2"}, + {"test3.cpp", "clang++ -c test3.cpp"}}, + BaseDir); + + SmallString<32> NativeFile1(BaseDir); + llvm::sys::path::append(NativeFile1, "test1.cpp"); + llvm::sys::path::native(NativeFile1); + SmallString<32> NativeFile2(BaseDir); + llvm::sys::path::append(NativeFile2, "test2.cpp"); + llvm::sys::path::native(NativeFile2); + SmallString<32> NativeFile3(BaseDir); + llvm::sys::path::append(NativeFile3, "test3.cpp"); + llvm::sys::path::native(NativeFile3); + + std::string Expected = + "[1/3] Processing file " + std::string(NativeFile1) + ".\n" + + "[2/3] (1/2) Processing file " + std::string(NativeFile2) + ".\n" + + "[2/3] (2/2) Processing file " + std::string(NativeFile2) + ".\n" + + "[3/3] Processing file " + std::string(NativeFile3) + ".\n"; + EXPECT_EQ(Output, Expected); +} + } // end namespace tooling } // end namespace clang diff --git a/compiler-rt/lib/orc/CMakeLists.txt b/compiler-rt/lib/orc/CMakeLists.txt index b8d1b03b788c9..649d988d9d608 100644 --- a/compiler-rt/lib/orc/CMakeLists.txt +++ b/compiler-rt/lib/orc/CMakeLists.txt @@ -119,6 +119,7 @@ else() # not Apple elfnix_tls.x86-64.S elfnix_tls.aarch64.S elfnix_tls.ppc64.S + elfnix_tls.systemz.S sysv_reenter.arm64.S sysv_reenter.x86-64.S ) diff --git a/compiler-rt/lib/orc/elfnix_tls.systemz.S b/compiler-rt/lib/orc/elfnix_tls.systemz.S new file mode 100644 index 0000000000000..4e116c92a5a88 --- /dev/null +++ b/compiler-rt/lib/orc/elfnix_tls.systemz.S @@ -0,0 +1,42 @@ +//===-- orc_rt_elfnix_tls_systemz.s -------------------------------*- ASM -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of the ORC runtime support library. +// +//===----------------------------------------------------------------------===// + +// The special thing about the s390 TLS ABI is that we do not have the +// standard __tls_get_addr function but the __tls_get_offset function +// which differs in two important aspects: +// 1) __tls_get_offset gets a got offset instead of a pointer to the +// tls_index structure +// 2) __tls_get_offset returns the offset of the requested variable to +// the thread descriptor instead of a pointer to the variable. + +// The content of this file is systemz-only + +#if defined(__s390x__) + + .text + // returns offset of TLV from TP in %r2. + .globl ___orc_rt_elfnix_tls_get_offset +___orc_rt_elfnix_tls_get_offset: + stmg %r14, %r15, 112(%r15) + aghi %r15, -160 + // Pass pointer to tls_index. + la %r2, 0(%r2, %r12) + brasl %r14, __orc_rt_elfnix_tls_get_addr_impl + // Return offset from TP. + ear %r0, %a0 + sllg %r0, %r0, 32 + ear %r0, %a1 + sgr %r2, %r0 + lmg %r14, %r15, 272(%r15) + br %r14 + +#endif // defined(__s390x__) diff --git a/compiler-rt/test/builtins/Unit/ppc/fixtfti_test.c b/compiler-rt/test/builtins/Unit/ppc/fixtfti_test.c index 8b86d7879f7e7..f0c9fcd30c07e 100644 --- a/compiler-rt/test/builtins/Unit/ppc/fixtfti_test.c +++ b/compiler-rt/test/builtins/Unit/ppc/fixtfti_test.c @@ -1,3 +1,4 @@ +// XFAIL: * // REQUIRES: target-is-powerpc64le // RUN: %clang_builtins %s %librt -o %t && %run %t diff --git a/compiler-rt/test/builtins/Unit/ppc/fixunstfti_test.c b/compiler-rt/test/builtins/Unit/ppc/fixunstfti_test.c index 0eee31db1b5dd..fe706986a345d 100644 --- a/compiler-rt/test/builtins/Unit/ppc/fixunstfti_test.c +++ b/compiler-rt/test/builtins/Unit/ppc/fixunstfti_test.c @@ -1,3 +1,4 @@ +// XFAIL: * // REQUIRES: target-is-powerpc64le // RUN: %clang_builtins %s %librt -o %t && %run %t diff --git a/compiler-rt/test/orc/TestCases/Linux/systemz/trivial-tls.S b/compiler-rt/test/orc/TestCases/Linux/systemz/trivial-tls.S new file mode 100644 index 0000000000000..4bf1c578bd1d7 --- /dev/null +++ b/compiler-rt/test/orc/TestCases/Linux/systemz/trivial-tls.S @@ -0,0 +1,67 @@ +// RUN: %clang -c -o %t %s +// RUN: %llvm_jitlink %t +// +// Test that basic ELF TLS work by adding together TLSs with values +// 0, 1, and -1, and returning the result (0 for success). This setup +// tests both zero-initialized (.tbss) and non-zero-initialized +// (.tdata) sections. + + .section .data.rel.ro,"aw",@progbits + .p2align 3, 0x0 +.LCPI0_0: + .quad x@TLSGD +.LCPI0_1: + .quad y@TLSGD +.LCPI0_2: + .quad z@TLSGD + + .text + .globl main + .p2align 4 + .type main,@function +main: + stmg %r10, %r15, 80(%r15) + aghi %r15, -160 + lgrl %r2, .LCPI0_0 + larl %r12, _GLOBAL_OFFSET_TABLE_ + brasl %r14, __tls_get_offset@PLT:tls_gdcall:x + lgr %r13, %r2 + lgrl %r2, .LCPI0_1 + brasl %r14, __tls_get_offset@PLT:tls_gdcall:y + ear %r0, %a0 + sllg %r11, %r0, 32 + ear %r11, %a1 + l %r10, 0(%r2,%r11) + lgrl %r2, .LCPI0_2 + a %r10, 0(%r13,%r11) + brasl %r14, __tls_get_offset@PLT:tls_gdcall:z + a %r10, 0(%r2,%r11) + lgfr %r2, %r10 + lmg %r10, %r15, 240(%r15) + br %r14 +.Lfunc_end0: + .size main, .Lfunc_end0-main + + + .type x,@object # @x + .section .tbss,"awT",@nobits + .globl x + .p2align 2, 0x0 +x: + .long 0 # 0x0 + .size x, 4 + + .type y,@object # @y + .section .tdata,"awT",@progbits + .globl y + .p2align 2, 0x0 +y: + .long 1 # 0x1 + .size y, 4 + + .type z,@object # @z + .globl z + .p2align 2, 0x0 +z: + .long 4294967295 # 0xffffffff + .size z, 4 diff --git a/flang-rt/lib/cuda/allocatable.cpp b/flang-rt/lib/cuda/allocatable.cpp index ff1a225d66ce9..662703dfb6321 100644 --- a/flang-rt/lib/cuda/allocatable.cpp +++ b/flang-rt/lib/cuda/allocatable.cpp @@ -57,26 +57,34 @@ int RTDEF(CUFAllocatableAllocate)(Descriptor &desc, int64_t *stream, int RTDEF(CUFAllocatableAllocateSource)(Descriptor &alloc, const Descriptor &source, int64_t *stream, bool *pinned, bool hasStat, - const Descriptor *errMsg, const char *sourceFile, int sourceLine) { + const Descriptor *errMsg, const char *sourceFile, int sourceLine, + bool sourceIsDevice) { int stat{RTNAME(CUFAllocatableAllocate)( alloc, stream, pinned, hasStat, errMsg, sourceFile, sourceLine)}; if (stat == StatOk) { Terminator terminator{sourceFile, sourceLine}; - Fortran::runtime::DoFromSourceAssign( - alloc, source, terminator, &MemmoveHostToDevice); + Fortran::runtime::DoFromSourceAssign(alloc, source, terminator, + sourceIsDevice ? &MemmoveDeviceToHost : &MemmoveHostToDevice); } return stat; } int RTDEF(CUFAllocatableAllocateSourceSync)(Descriptor &alloc, const Descriptor &source, int64_t *stream, bool *pinned, bool hasStat, - const Descriptor *errMsg, const char *sourceFile, int sourceLine) { - int stat{RTNAME(CUFAllocatableAllocateSync)( - alloc, stream, pinned, hasStat, errMsg, sourceFile, sourceLine)}; + const Descriptor *errMsg, const char *sourceFile, int sourceLine, + bool sourceIsDevice) { + int stat; + if (sourceIsDevice) { + stat = RTNAME(CUFAllocatableAllocate)( + alloc, stream, pinned, hasStat, errMsg, sourceFile, sourceLine); + } else { + stat = RTNAME(CUFAllocatableAllocateSync)( + alloc, stream, pinned, hasStat, errMsg, sourceFile, sourceLine); + } if (stat == StatOk) { Terminator terminator{sourceFile, sourceLine}; - Fortran::runtime::DoFromSourceAssign( - alloc, source, terminator, &MemmoveHostToDevice); + Fortran::runtime::DoFromSourceAssign(alloc, source, terminator, + sourceIsDevice ? &MemmoveDeviceToHost : &MemmoveHostToDevice); } return stat; } diff --git a/flang-rt/lib/cuda/pointer.cpp b/flang-rt/lib/cuda/pointer.cpp index d3f5cfe8e96a1..f07b1a9b60924 100644 --- a/flang-rt/lib/cuda/pointer.cpp +++ b/flang-rt/lib/cuda/pointer.cpp @@ -56,26 +56,28 @@ int RTDEF(CUFPointerAllocateSync)(Descriptor &desc, int64_t *stream, int RTDEF(CUFPointerAllocateSource)(Descriptor &pointer, const Descriptor &source, int64_t *stream, bool *pinned, bool hasStat, - const Descriptor *errMsg, const char *sourceFile, int sourceLine) { + const Descriptor *errMsg, const char *sourceFile, int sourceLine, + bool sourceIsDevice) { int stat{RTNAME(CUFPointerAllocate)( pointer, stream, pinned, hasStat, errMsg, sourceFile, sourceLine)}; if (stat == StatOk) { Terminator terminator{sourceFile, sourceLine}; - Fortran::runtime::DoFromSourceAssign( - pointer, source, terminator, &MemmoveHostToDevice); + Fortran::runtime::DoFromSourceAssign(pointer, source, terminator, + sourceIsDevice ? &MemmoveDeviceToHost : &MemmoveHostToDevice); } return stat; } int RTDEF(CUFPointerAllocateSourceSync)(Descriptor &pointer, const Descriptor &source, int64_t *stream, bool *pinned, bool hasStat, - const Descriptor *errMsg, const char *sourceFile, int sourceLine) { + const Descriptor *errMsg, const char *sourceFile, int sourceLine, + bool sourceIsDevice) { int stat{RTNAME(CUFPointerAllocateSync)( pointer, stream, pinned, hasStat, errMsg, sourceFile, sourceLine)}; if (stat == StatOk) { Terminator terminator{sourceFile, sourceLine}; - Fortran::runtime::DoFromSourceAssign( - pointer, source, terminator, &MemmoveHostToDevice); + Fortran::runtime::DoFromSourceAssign(pointer, source, terminator, + sourceIsDevice ? &MemmoveDeviceToHost : &MemmoveHostToDevice); } return stat; } diff --git a/flang/include/flang/Optimizer/Analysis/TBAAForest.h b/flang/include/flang/Optimizer/Analysis/TBAAForest.h index b4932594114a1..0b70778eba3af 100644 --- a/flang/include/flang/Optimizer/Analysis/TBAAForest.h +++ b/flang/include/flang/Optimizer/Analysis/TBAAForest.h @@ -99,11 +99,25 @@ struct TBAATree { // |- "any data access" // | // |- "dummy arg data" - // |- "target data" - // | - // |- "allocated data" - // |- "direct data" - // |- "global data" + // | + // |- + // |- + // |- "target data" <-- Any POINTER variable or TARGET dummy arg + // | + // |- <--- any TARGET variable which isn't a dummy arg + // |- + // |- "allocated data" + // | + // |- + // |- + // |- "direct data" + // | + // |- + // |- + // |- "global data" + // | + // |- + // |- static TBAATree buildTree(mlir::StringAttr functionName); private: diff --git a/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td b/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td index 636879f28a2fb..34ac21c51b933 100644 --- a/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td +++ b/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td @@ -100,8 +100,9 @@ def cuf_AllocateOp : cuf_Op<"allocate", [AttrSizedOperandSegments, Optional:$stream, Arg, "", [MemWrite]>:$pinned, Arg, "", [MemRead]>:$source, - cuf_DataAttributeAttr:$data_attr, UnitAttr:$hasStat, - UnitAttr:$hasDoubleDescriptor, UnitAttr:$pointer); + OptionalAttr:$data_attr, UnitAttr:$hasStat, + UnitAttr:$hasDoubleDescriptor, UnitAttr:$pointer, + UnitAttr:$device_source); let results = (outs AnyIntegerType:$stat); diff --git a/flang/include/flang/Runtime/CUDA/allocatable.h b/flang/include/flang/Runtime/CUDA/allocatable.h index 6c97afa9e10e8..97f24bc34bfb8 100644 --- a/flang/include/flang/Runtime/CUDA/allocatable.h +++ b/flang/include/flang/Runtime/CUDA/allocatable.h @@ -34,14 +34,16 @@ int RTDECL(CUFAllocatableAllocateSync)(Descriptor &, int64_t *stream = nullptr, int RTDEF(CUFAllocatableAllocateSource)(Descriptor &alloc, const Descriptor &source, int64_t *stream = nullptr, bool *pinned = nullptr, bool hasStat = false, const Descriptor *errMsg = nullptr, - const char *sourceFile = nullptr, int sourceLine = 0); + const char *sourceFile = nullptr, int sourceLine = 0, + bool sourceIsDevice = false); /// Perform allocation of the descriptor with synchronization of it when /// necessary. Assign data from source. int RTDEF(CUFAllocatableAllocateSourceSync)(Descriptor &alloc, const Descriptor &source, int64_t *stream = nullptr, bool *pinned = nullptr, bool hasStat = false, const Descriptor *errMsg = nullptr, - const char *sourceFile = nullptr, int sourceLine = 0); + const char *sourceFile = nullptr, int sourceLine = 0, + bool sourceIsDevice = false); /// Perform deallocation of the descriptor with synchronization of it when /// necessary. diff --git a/flang/include/flang/Runtime/CUDA/pointer.h b/flang/include/flang/Runtime/CUDA/pointer.h index bdfc3268e0814..b845fd59114d4 100644 --- a/flang/include/flang/Runtime/CUDA/pointer.h +++ b/flang/include/flang/Runtime/CUDA/pointer.h @@ -34,14 +34,16 @@ int RTDECL(CUFPointerAllocateSync)(Descriptor &, int64_t *stream = nullptr, int RTDEF(CUFPointerAllocateSource)(Descriptor &pointer, const Descriptor &source, int64_t *stream = nullptr, bool *pinned = nullptr, bool hasStat = false, const Descriptor *errMsg = nullptr, - const char *sourceFile = nullptr, int sourceLine = 0); + const char *sourceFile = nullptr, int sourceLine = 0, + bool sourceIsDevice = false); /// Perform allocation of the descriptor with synchronization of it when /// necessary. Assign data from source. int RTDEF(CUFPointerAllocateSourceSync)(Descriptor &pointer, const Descriptor &source, int64_t *stream = nullptr, bool *pinned = nullptr, bool hasStat = false, const Descriptor *errMsg = nullptr, - const char *sourceFile = nullptr, int sourceLine = 0); + const char *sourceFile = nullptr, int sourceLine = 0, + bool sourceIsDevice = false); } // extern "C" diff --git a/flang/lib/Lower/Allocatable.cpp b/flang/lib/Lower/Allocatable.cpp index 5fc47107f0e88..bdf651f49f76e 100644 --- a/flang/lib/Lower/Allocatable.cpp +++ b/flang/lib/Lower/Allocatable.cpp @@ -682,9 +682,10 @@ class AllocateStmtHelper { bool isAMDMemoryAllocatorEnabled = langFeatures.IsEnabled( Fortran::common::LanguageFeature::AmdMemoryAllocator); + bool sourceIsDevice = false; if (const Fortran::semantics::Symbol *sym{GetLastSymbol(sourceExpr)}) if (Fortran::semantics::IsCUDADevice(*sym)) - TODO(loc, "CUDA Fortran: allocate with device source"); + sourceIsDevice = true; // Generate a sequence of runtime calls. errorManager.genStatCheck(builder, loc); @@ -704,7 +705,7 @@ class AllocateStmtHelper { genSetDeferredLengthParameters(alloc, box); genAllocateObjectBounds(alloc, box); mlir::Value stat; - if (Fortran::semantics::HasCUDAAttr(alloc.getSymbol())) { + if (Fortran::semantics::HasCUDAAttr(alloc.getSymbol()) || sourceIsDevice) { stat = genCudaAllocate(builder, loc, box, errorManager, alloc.getSymbol()); } else { @@ -853,13 +854,19 @@ class AllocateStmtHelper { // Keep return type the same as a standard AllocatableAllocate call. mlir::Type retTy = fir::runtime::getModel()(builder.getContext()); + bool isSourceDevice = false; + if (const Fortran::semantics::Symbol *sym{GetLastSymbol(sourceExpr)}) + if (Fortran::semantics::IsCUDADevice(*sym)) + isSourceDevice = true; + bool doubleDescriptors = Fortran::lower::hasDoubleDescriptor(box.getAddr()); return cuf::AllocateOp::create( builder, loc, retTy, box.getAddr(), errmsg, stream, pinned, source, cudaAttr, errorManager.hasStatSpec() ? builder.getUnitAttr() : nullptr, doubleDescriptors ? builder.getUnitAttr() : nullptr, - box.isPointer() ? builder.getUnitAttr() : nullptr) + box.isPointer() ? builder.getUnitAttr() : nullptr, + isSourceDevice ? builder.getUnitAttr() : nullptr) .getResult(); } diff --git a/flang/lib/Optimizer/Analysis/TBAAForest.cpp b/flang/lib/Optimizer/Analysis/TBAAForest.cpp index 44a0348da3a6f..7154785c62c75 100644 --- a/flang/lib/Optimizer/Analysis/TBAAForest.cpp +++ b/flang/lib/Optimizer/Analysis/TBAAForest.cpp @@ -66,12 +66,9 @@ fir::TBAATree::TBAATree(mlir::LLVM::TBAATypeDescriptorAttr anyAccess, mlir::LLVM::TBAATypeDescriptorAttr dataRoot, mlir::LLVM::TBAATypeDescriptorAttr boxMemberTypeDesc) : targetDataTree(dataRoot.getContext(), "target data", dataRoot), - globalDataTree(dataRoot.getContext(), "global data", - targetDataTree.getRoot()), - allocatedDataTree(dataRoot.getContext(), "allocated data", - targetDataTree.getRoot()), + globalDataTree(dataRoot.getContext(), "global data", dataRoot), + allocatedDataTree(dataRoot.getContext(), "allocated data", dataRoot), dummyArgDataTree(dataRoot.getContext(), "dummy arg data", dataRoot), - directDataTree(dataRoot.getContext(), "direct data", - targetDataTree.getRoot()), + directDataTree(dataRoot.getContext(), "direct data", dataRoot), anyAccessDesc(anyAccess), boxMemberTypeDesc(boxMemberTypeDesc), anyDataTypeDesc(dataRoot) {} diff --git a/flang/lib/Optimizer/Transforms/AddAliasTags.cpp b/flang/lib/Optimizer/Transforms/AddAliasTags.cpp index 0221c7a8184d7..558ffa1a80bcf 100644 --- a/flang/lib/Optimizer/Transforms/AddAliasTags.cpp +++ b/flang/lib/Optimizer/Transforms/AddAliasTags.cpp @@ -60,6 +60,9 @@ static llvm::cl::opt localAllocsThreshold( llvm::cl::desc("If present, stops generating TBAA tags for accesses of " "local allocations after N accesses in a module")); +// Defined in AliasAnalysis.cpp +extern llvm::cl::opt supportCrayPointers; + namespace { // Return the size and alignment (in bytes) for the given type. @@ -668,6 +671,7 @@ void AddAliasTagsPass::runOnAliasInterface(fir::FirAliasTagOpInterface op, LLVM_DEBUG(llvm::dbgs() << "Analysing " << op << "\n"); const fir::AliasAnalysis::Source &source = state.getSource(memref); + LLVM_DEBUG(llvm::dbgs() << "Got source " << source << "\n"); // Process the scopes, if not processed yet. state.processFunctionScopes(func); @@ -686,14 +690,22 @@ void AddAliasTagsPass::runOnAliasInterface(fir::FirAliasTagOpInterface op, } mlir::LLVM::TBAATagAttr tag; - // TBAA for dummy arguments - if (enableDummyArgs && - source.kind == fir::AliasAnalysis::SourceKind::Argument) { + // Cray pointer/pointee is a special case. These might alias with any data. + if (supportCrayPointers && source.isCrayPointerOrPointee()) { + LLVM_DEBUG(llvm::dbgs().indent(2) + << "Found reference to Cray pointer/pointee at " << *op << "\n"); + mlir::LLVM::TBAATypeDescriptorAttr anyDataDesc = + state.getFuncTreeWithScope(func, scopeOp).anyDataTypeDesc; + tag = mlir::LLVM::TBAATagAttr::get(anyDataDesc, anyDataDesc, /*offset=*/0); + // TBAA for dummy arguments + } else if (enableDummyArgs && + source.kind == fir::AliasAnalysis::SourceKind::Argument) { LLVM_DEBUG(llvm::dbgs().indent(2) << "Found reference to dummy argument at " << *op << "\n"); std::string name = getFuncArgName(llvm::cast(source.origin.u)); - // If it is a TARGET or POINTER, then we do not care about the name, - // because the tag points to the root of the subtree currently. + // POINTERS can alias with any POINTER or TARGET. Assume that TARGET dummy + // arguments might alias with each other (because of the "TARGET" hole for + // dummy arguments). See flang/docs/Aliasing.md. if (source.isTargetOrPointer()) { tag = state.getFuncTreeWithScope(func, scopeOp).targetDataTree.getTag(); } else if (!name.empty()) { @@ -715,13 +727,10 @@ void AddAliasTagsPass::runOnAliasInterface(fir::FirAliasTagOpInterface op, LLVM_DEBUG(llvm::dbgs().indent(2) << "Found reference to global " << globalName.str() << " at " << *op << "\n"); - if (source.isPointer()) { - tag = state.getFuncTreeWithScope(func, scopeOp).targetDataTree.getTag(); - } else { - // In general, place the tags under the "global data" root. - fir::TBAATree::SubtreeState *subTree = - &state.getMutableFuncTreeWithScope(func, scopeOp).globalDataTree; + // Add a named tag inside the given subtree, disambiguating members of a + // common block + auto addTagUsingStorageDesc = [&](fir::TBAATree::SubtreeState *subTree) { mlir::Operation *instantiationPoint = source.origin.instantiationPoint; auto storageIface = mlir::dyn_cast_or_null( @@ -766,6 +775,19 @@ void AddAliasTagsPass::runOnAliasInterface(fir::FirAliasTagOpInterface op, LLVM_DEBUG(llvm::dbgs() << "Tagged under '" << globalName << "' root\n"); } + }; + + if (source.isPointer()) { + // Pointers can alias with any pointer or target. + tag = state.getFuncTreeWithScope(func, scopeOp).targetDataTree.getTag(); + } else if (source.isTarget()) { + // Targets could alias with any pointer but not with each other. + addTagUsingStorageDesc( + &state.getMutableFuncTreeWithScope(func, scopeOp).targetDataTree); + } else { + // In general, place the tags under the "global data" root. + addTagUsingStorageDesc( + &state.getMutableFuncTreeWithScope(func, scopeOp).globalDataTree); } // TBAA for global variables with descriptors @@ -776,9 +798,17 @@ void AddAliasTagsPass::runOnAliasInterface(fir::FirAliasTagOpInterface op, const char *name = glbl.getRootReference().data(); LLVM_DEBUG(llvm::dbgs().indent(2) << "Found reference to direct " << name << " at " << *op << "\n"); + // Pointer can alias with any pointer or target so that gets the root. if (source.isPointer()) tag = state.getFuncTreeWithScope(func, scopeOp).targetDataTree.getTag(); + // Targets could alias with any pointer but not with each other so they + // get their own node inside of the target data tree. + else if (source.isTarget()) + tag = state.getFuncTreeWithScope(func, scopeOp) + .targetDataTree.getTag(name); else + // Boxes that are not pointers or targets cannot alias with those that + // are. Put them under global data. tag = state.getFuncTreeWithScope(func, scopeOp) .directDataTree.getTag(name); } else { @@ -815,8 +845,13 @@ void AddAliasTagsPass::runOnAliasInterface(fir::FirAliasTagOpInterface op, << "\n"); } else if (source.isPointer() && state.attachLocalAllocTag()) { LLVM_DEBUG(llvm::dbgs().indent(2) - << "Found reference to allocation at " << *op << "\n"); + << "Found reference to POINTER allocation at " << *op << "\n"); tag = state.getFuncTreeWithScope(func, scopeOp).targetDataTree.getTag(); + } else if (source.isTarget() && state.attachLocalAllocTag()) { + LLVM_DEBUG(llvm::dbgs().indent(2) + << "Found reference to TARGET allocation at " << *op << "\n"); + tag = state.getFuncTreeWithScope(func, scopeOp) + .targetDataTree.getTag(*name); } else if (name && state.attachLocalAllocTag()) { LLVM_DEBUG(llvm::dbgs().indent(2) << "Found reference to allocation " << name << " at " << *op << "\n"); diff --git a/flang/lib/Optimizer/Transforms/CUDA/CUFAllocationConversion.cpp b/flang/lib/Optimizer/Transforms/CUDA/CUFAllocationConversion.cpp index 6579c2362cd87..4444fc61239ea 100644 --- a/flang/lib/Optimizer/Transforms/CUDA/CUFAllocationConversion.cpp +++ b/flang/lib/Optimizer/Transforms/CUDA/CUFAllocationConversion.cpp @@ -99,7 +99,6 @@ static mlir::LogicalResult convertOpToCall(OpTy op, mlir::Value hasStat = op.getHasStat() ? builder.createBool(loc, true) : builder.createBool(loc, false); - mlir::Value errmsg; if (op.getErrmsg()) { errmsg = op.getErrmsg(); @@ -116,12 +115,15 @@ static mlir::LogicalResult convertOpToCall(OpTy op, loc, fir::ReferenceType::get( mlir::IntegerType::get(op.getContext(), 1))); if (op.getSource()) { + mlir::Value isDeviceSource = op.getDeviceSource() + ? builder.createBool(loc, true) + : builder.createBool(loc, false); mlir::Value stream = op.getStream() ? op.getStream() : builder.createNullConstant(loc, fTy.getInput(2)); args = fir::runtime::createArguments( builder, loc, fTy, op.getBox(), op.getSource(), stream, pinned, - hasStat, errmsg, sourceFile, sourceLine); + hasStat, errmsg, sourceFile, sourceLine, isDeviceSource); } else { mlir::Value stream = op.getStream() ? op.getStream() diff --git a/flang/test/Driver/tco-test-gen.fir b/flang/test/Driver/tco-test-gen.fir index b39295d72918f..438804ce42b76 100644 --- a/flang/test/Driver/tco-test-gen.fir +++ b/flang/test/Driver/tco-test-gen.fir @@ -77,13 +77,13 @@ func.func @_QPtest(%arg0: !fir.ref {fir.bindc_name = "num"}, %arg1: !fir.re // CHECK: llvm.cond_br %[[VAL_17]], ^bb2, ^bb3 // CHECK: ^bb2: -// AA: llvm.store %[[VAL_15]], %[[VAL_1]] {tbaa = [#llvm.tbaa_tag, 0>}>, 0>}>, 0>}>, 0>}>, 0>}>, access_type = , 0>}>, 0>}>, 0>}>, 0>}>, 0>}>, offset = 0>]} : i32, !llvm.ptr +// AA: llvm.store %[[VAL_15]], %[[VAL_1]] {tbaa = [#llvm.tbaa_tag, 0>}>, 0>}>, 0>}>, 0>}>, access_type = , 0>}>, 0>}>, 0>}>, 0>}>, offset = 0>]} : i32, !llvm.ptr // NOAA: llvm.store %[[VAL_15]], %{{.*}} : i32, !llvm.ptr // AA: %[[VAL_18:.*]] = llvm.load %[[ARG0]] {tbaa = [#llvm.tbaa_tag, 0>}>, 0>}>, 0>}>, 0>}>, access_type = , 0>}>, 0>}>, 0>}>, 0>}>, offset = 0>]} : !llvm.ptr -> i32 // NOAA: %[[VAL_18:.*]] = llvm.load %[[ARG0]] : !llvm.ptr -> i32 -// AA: %[[VAL_19:.*]] = llvm.load %[[VAL_1]] {tbaa = [#llvm.tbaa_tag, 0>}>, 0>}>, 0>}>, 0>}>, 0>}>, access_type = , 0>}>, 0>}>, 0>}>, 0>}>, 0>}>, offset = 0>]} : !llvm.ptr -> i32 +// AA: %[[VAL_19:.*]] = llvm.load %[[VAL_1]] {tbaa = [#llvm.tbaa_tag, 0>}>, 0>}>, 0>}>, 0>}>, access_type = , 0>}>, 0>}>, 0>}>, 0>}>, offset = 0>]} : !llvm.ptr -> i32 // NOAA: %[[VAL_19:.*]] = llvm.load %{{.*}} : !llvm.ptr -> i32 // CHECK: %[[VAL_20:.*]] = llvm.add %[[VAL_18]], %[[VAL_19]] : i32 @@ -92,7 +92,7 @@ func.func @_QPtest(%arg0: !fir.ref {fir.bindc_name = "num"}, %arg1: !fir.re // CHECK: %[[VAL_21:.*]] = llvm.trunc %[[VAL_10]] : i64 to i32 -// AA: %[[VAL_22:.*]] = llvm.load %[[VAL_1]] {tbaa = [#llvm.tbaa_tag, 0>}>, 0>}>, 0>}>, 0>}>, 0>}>, access_type = , 0>}>, 0>}>, 0>}>, 0>}>, 0>}>, offset = 0>]} : !llvm.ptr -> i32 +// AA: %[[VAL_22:.*]] = llvm.load %[[VAL_1]] {tbaa = [#llvm.tbaa_tag, 0>}>, 0>}>, 0>}>, 0>}>, access_type = , 0>}>, 0>}>, 0>}>, 0>}>, offset = 0>]} : !llvm.ptr -> i32 // NOAA: %[[VAL_22:.*]] = llvm.load %{{.*}} : !llvm.ptr -> i32 // CHECK: %[[VAL_23:.*]] = llvm.add %[[VAL_22]], %[[VAL_21]] overflow : i32 @@ -100,7 +100,7 @@ func.func @_QPtest(%arg0: !fir.ref {fir.bindc_name = "num"}, %arg1: !fir.re // CHECK: llvm.br ^bb1(%[[VAL_23]], %[[VAL_24]] : i32, i64) // CHECK: ^bb3: -// AA: llvm.store %[[VAL_15]], %[[VAL_1]] {tbaa = [#llvm.tbaa_tag, 0>}>, 0>}>, 0>}>, 0>}>, 0>}>, access_type = , 0>}>, 0>}>, 0>}>, 0>}>, 0>}>, offset = 0>]} : i32, !llvm.ptr +// AA: llvm.store %[[VAL_15]], %[[VAL_1]] {tbaa = [#llvm.tbaa_tag, 0>}>, 0>}>, 0>}>, 0>}>, access_type = , 0>}>, 0>}>, 0>}>, 0>}>, offset = 0>]} : i32, !llvm.ptr // NOAA: llvm.store %[[VAL_15]], %{{.*}} : i32, !llvm.ptr // CHECK: llvm.return diff --git a/flang/test/Fir/CUDA/cuda-allocate.fir b/flang/test/Fir/CUDA/cuda-allocate.fir index 9d0d181609ada..5184561a03e67 100644 --- a/flang/test/Fir/CUDA/cuda-allocate.fir +++ b/flang/test/Fir/CUDA/cuda-allocate.fir @@ -128,11 +128,14 @@ func.func @_QPallocate_source() { %c1 = arith.constant 1 : index %c0 = arith.constant 0 : index %0 = fir.alloca !fir.box>> {bindc_name = "a", uniq_name = "_QFallocate_sourceEa"} + %devsource = fir.alloca !fir.box>> {bindc_name = "a", uniq_name = "_QFallocate_sourceEa"} %4 = fir.declare %0 {fortran_attrs = #fir.var_attrs, uniq_name = "_QFallocate_sourceEa"} : (!fir.ref>>>) -> !fir.ref>>> %5 = cuf.alloc !fir.box>> {bindc_name = "a_d", data_attr = #cuf.cuda, uniq_name = "_QFallocate_sourceEa_d"} -> !fir.ref>>> %7 = fir.declare %5 {data_attr = #cuf.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QFallocate_sourceEa_d"} : (!fir.ref>>>) -> !fir.ref>>> %8 = fir.load %4 : !fir.ref>>> %22 = cuf.allocate %7 : !fir.ref>>> source(%8 : !fir.box>>) {data_attr = #cuf.cuda} -> i32 + %9 = fir.load %devsource : !fir.ref>>> + %23 = cuf.allocate %7 : !fir.ref>>> source(%9 : !fir.box>>) {device_source} -> i32 return } @@ -142,8 +145,8 @@ func.func @_QPallocate_source() { // CHECK: %[[SOURCE:.*]] = fir.load %[[DECL_HOST]] : !fir.ref>>> // CHECK: %[[DEV_CONV:.*]] = fir.convert %[[DECL_DEV]] : (!fir.ref>>>) -> !fir.ref> // CHECK: %[[SOURCE_CONV:.*]] = fir.convert %[[SOURCE]] : (!fir.box>>) -> !fir.box -// CHECK: %{{.*}} = fir.call @_FortranACUFAllocatableAllocateSource(%[[DEV_CONV]], %[[SOURCE_CONV]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref>, !fir.box, !fir.ref, !fir.ref, i1, !fir.box, !fir.ref, i32) -> i32 - +// CHECK: %{{.*}} = fir.call @_FortranACUFAllocatableAllocateSource(%[[DEV_CONV]], %[[SOURCE_CONV]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref>, !fir.box, !fir.ref, !fir.ref, i1, !fir.box, !fir.ref, i32, i1) -> i32 +// CHECK: %{{.*}} = fir.call @_FortranACUFAllocatableAllocateSource(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %true{{.*}}) fir.global @_QMmod1Ea_d {data_attr = #cuf.cuda} : !fir.box>> { %c0 = arith.constant 0 : index diff --git a/flang/test/Fir/tbaa-codegen2.fir b/flang/test/Fir/tbaa-codegen2.fir index 4907aa03ec5a5..071d3ec89394c 100644 --- a/flang/test/Fir/tbaa-codegen2.fir +++ b/flang/test/Fir/tbaa-codegen2.fir @@ -114,4 +114,3 @@ module attributes {fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", llvm.targ // CHECK: ![[TMP_DATA_ACCESS_TAG]] = !{![[TMP_DATA_ACCESS_TYPE:.*]], ![[TMP_DATA_ACCESS_TYPE]], i64 0} // CHECK: ![[TMP_DATA_ACCESS_TYPE]] = !{!"allocated data/", ![[TMP_ACCESS_TYPE:.*]], i64 0} // CHECK: ![[TMP_ACCESS_TYPE]] = !{!"allocated data", ![[TARGET_ACCESS_TAG:.*]], i64 0} -// CHECK: ![[TARGET_ACCESS_TAG]] = !{!"target data", ![[DATA_ACCESS_TYPE]], i64 0} diff --git a/flang/test/Lower/CUDA/TODO/cuda-allocate-source-device.cuf b/flang/test/Lower/CUDA/TODO/cuda-allocate-source-device.cuf deleted file mode 100644 index 3e59e2f01119e..0000000000000 --- a/flang/test/Lower/CUDA/TODO/cuda-allocate-source-device.cuf +++ /dev/null @@ -1,9 +0,0 @@ -! RUN: %not_todo_cmd bbc -emit-fir -fcuda -o - %s 2>&1 | FileCheck %s - -program main - implicit none - integer, device, allocatable :: a_d(:) - integer, allocatable :: a(:) -! CHECK: not yet implemented: CUDA Fortran: allocate with device source - allocate(a, source=a_d) -end program diff --git a/flang/test/Lower/CUDA/cuda-allocatable.cuf b/flang/test/Lower/CUDA/cuda-allocatable.cuf index 43e716532ecca..52303d126b8dc 100644 --- a/flang/test/Lower/CUDA/cuda-allocatable.cuf +++ b/flang/test/Lower/CUDA/cuda-allocatable.cuf @@ -261,3 +261,12 @@ end subroutine ! CHECK: cuf.deallocate %{{.*}} : !fir.ref>>> {data_attr = #cuf.cuda, hasDoubleDescriptor} -> i32 ! CHECK: cuf.deallocate %{{.*}} : !fir.ref>>> {data_attr = #cuf.cuda, hasDoubleDescriptor} -> i32 ! CHECK: cuf.deallocate %{{.*}} : !fir.ref>>> {data_attr = #cuf.cuda} -> i32 + +attributes(global) subroutine from_device_source() + real, device, allocatable :: a(:) + real, allocatable :: b(:) + allocate(b, source=a) +end subroutine + +! CHECK-LABEL: func.func @_QPfrom_device_source() +! CHECK: cuf.allocate{{.*}}device_source diff --git a/flang/test/Transforms/tbaa-cray-pointer.fir b/flang/test/Transforms/tbaa-cray-pointer.fir new file mode 100644 index 0000000000000..54406271aaa58 --- /dev/null +++ b/flang/test/Transforms/tbaa-cray-pointer.fir @@ -0,0 +1,43 @@ +// RUN: fir-opt -funsafe-cray-pointers --fir-add-alias-tags %s | FileCheck %s + +// Fortran source: +// subroutine test() +// real :: a, b +// pointer(p, a) +// p = loc(b) +// b = 2 +// end subroutine + +// CHECK: #[[TBAA_ROOT:.*]] = #llvm.tbaa_root +// CHECK-NEXT: #[[ANY_ACCESS:.*]] = #llvm.tbaa_type_desc}> +// CHECK-NEXT: #[[ANY_DATA:.*]] = #llvm.tbaa_type_desc}> +// CHECK-NEXT: #[[ANY_DATA_TAG:.*]] = #llvm.tbaa_tag +// CHECK-NEXT: #[[ALLOCATED_DATA:.*]] = #llvm.tbaa_type_desc}> +// CHECK-NEXT: #[[B:.*]] = #llvm.tbaa_type_desc}> +// CHECK-NEXT: #[[B_TAG:.*]] = #llvm.tbaa_tag + +module attributes {dlti.dl_spec = #dlti.dl_spec = dense<32> : vector<4xi64>, !llvm.ptr<271> = dense<32> : vector<4xi64>, !llvm.ptr<272> = dense<64> : vector<4xi64>, i8 = dense<[8, 32]> : vector<2xi64>, i16 = dense<[16, 32]> : vector<2xi64>, i64 = dense<64> : vector<2xi64>, i128 = dense<128> : vector<2xi64>, !llvm.ptr = dense<64> : vector<4xi64>, i1 = dense<8> : vector<2xi64>, i32 = dense<32> : vector<2xi64>, f16 = dense<16> : vector<2xi64>, f64 = dense<64> : vector<2xi64>, f128 = dense<128> : vector<2xi64>, "dlti.endianness" = "little", "dlti.mangling_mode" = "e", "dlti.legal_int_widths" = array, "dlti.stack_alignment" = 128 : i64, "dlti.function_pointer_alignment" = #dlti.function_pointer_alignment<32, function_dependent = true>>, fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"} { +// CHECK-LABEL: func.func @_QPtest() + func.func @_QPtest() { + %cst = arith.constant 2.000000e+00 : f32 + %0 = fir.alloca !fir.box> + %1 = fir.dummy_scope : !fir.dscope + %2 = fir.alloca i64 {bindc_name = "p", uniq_name = "_QFtestEp"} + %3 = fir.declare %2 {fortran_attrs = #fir.var_attrs, uniq_name = "_QFtestEp"} : (!fir.ref) -> !fir.ref + %4 = fir.alloca f32 {bindc_name = "b", uniq_name = "_QFtestEb"} + %5 = fir.declare %4 {uniq_name = "_QFtestEb"} : (!fir.ref) -> !fir.ref + %6 = fir.declare %0 {fortran_attrs = #fir.var_attrs, uniq_name = "_QFtestEa"} : (!fir.ref>>) -> !fir.ref>> + %7 = fir.zero_bits !fir.ptr + %8 = fir.embox %7 : (!fir.ptr) -> !fir.box> + fir.store %8 to %6 : !fir.ref>> +// Descriptor tagged in codegen +// CHECK: fir.store %{{.*}} to %{{.*}} : !fir.ref> + %9 = fir.convert %5 : (!fir.ref) -> i64 + fir.store %9 to %3 : !fir.ref +// CHECK: fir.store {{.*}} to {{.*}} {tbaa = [#[[ANY_DATA_TAG]]]} : !fir.ref + fir.store %cst to %5 : !fir.ref +// CHECK: fir.store {{.*}} to {{.*}} {tbaa = [#[[B_TAG]]]} : !fir.ref + return + } +} + diff --git a/flang/test/Transforms/tbaa-for-common-vars.fir b/flang/test/Transforms/tbaa-for-common-vars.fir index a8dd86bff72ed..992658ee2387f 100644 --- a/flang/test/Transforms/tbaa-for-common-vars.fir +++ b/flang/test/Transforms/tbaa-for-common-vars.fir @@ -28,8 +28,7 @@ module attributes {dlti.dl_spec = #dlti.dl_spec : vector<4 // CHECK: #[[$ATTR_0:.+]] = #llvm.tbaa_root // CHECK: #[[$ATTR_1:.+]] = #llvm.tbaa_type_desc}> // CHECK: #[[$ATTR_2:.+]] = #llvm.tbaa_type_desc}> -// CHECK: #[[$ATTR_3:.+]] = #llvm.tbaa_type_desc}> -// CHECK: #[[$ATTR_4:.+]] = #llvm.tbaa_type_desc}> +// CHECK: #[[$ATTR_4:.+]] = #llvm.tbaa_type_desc}> // CHECK: #[[$ATTR_5:.+]] = #llvm.tbaa_type_desc}> // CHECK: #[[$ATTR_6:.+]] = #llvm.tbaa_type_desc}> // CHECK: #[[$ATTR_7:.+]] = #llvm.tbaa_type_desc}> @@ -66,8 +65,7 @@ module attributes {dlti.dl_spec = #dlti.dl_spec : vector<4 // CHECK: #[[$ATTR_10:.+]] = #llvm.tbaa_root // CHECK: #[[$ATTR_11:.+]] = #llvm.tbaa_type_desc}> // CHECK: #[[$ATTR_12:.+]] = #llvm.tbaa_type_desc}> -// CHECK: #[[$ATTR_13:.+]] = #llvm.tbaa_type_desc}> -// CHECK: #[[$ATTR_14:.+]] = #llvm.tbaa_type_desc}> +// CHECK: #[[$ATTR_14:.+]] = #llvm.tbaa_type_desc}> // CHECK: #[[$ATTR_15:.+]] = #llvm.tbaa_type_desc}> // CHECK: #[[$ATTR_16:.+]] = #llvm.tbaa_type_desc}> // CHECK: #[[$ATTR_18:.+]] = #llvm.tbaa_tag @@ -118,14 +116,13 @@ module attributes {dlti.dl_spec = #dlti.dl_spec : vector<4 // CHECK: #[[ANYACC3INNER:.+]] = #llvm.tbaa_type_desc}> // CHECK: #[[ANYDATA3:.+]] = #llvm.tbaa_type_desc}> // CHECK: #[[ANYDATA3INNER:.+]] = #llvm.tbaa_type_desc}> -// CHECK: #[[TARGETDATA3:.+]] = #llvm.tbaa_type_desc}> +// CHECK: #[[GLOBALDATA3:.+]] = #llvm.tbaa_type_desc}> // CHECK: #[[DUMMYARG3INNER:.+]] = #llvm.tbaa_type_desc}> -// CHECK: #[[GLOBALDATA3:.+]] = #llvm.tbaa_type_desc}> +// CHECK: #[[GLOBALDATA3COMMON3:.+]] = #llvm.tbaa_type_desc}> // CHECK: #[[DUMMYD:.+]] = #llvm.tbaa_type_desc}> // CHECK: #[[DUMMYC:.+]] = #llvm.tbaa_type_desc}> // CHECK: #[[DUMMYDTAG:.+]] = #llvm.tbaa_tag // CHECK: #[[DUMMYCTAG:.+]] = #llvm.tbaa_tag -// CHECK: #[[GLOBALDATA3COMMON3:.+]] = #llvm.tbaa_type_desc}> // CHECK: #[[GLOBALB:.+]] = #llvm.tbaa_type_desc}> // CHECK: #[[GLOBALA:.+]] = #llvm.tbaa_type_desc}> // CHECK: #[[GLOBALBTAG:.+]] = #llvm.tbaa_tag @@ -180,10 +177,8 @@ module attributes {dlti.dl_spec = #dlti.dl_spec : vector<4 // CHECK: #[[INNER4ANYACC:.+]] = #llvm.tbaa_type_desc}> // CHECK: #[[TEST4ANYDATA:.+]] = #llvm.tbaa_type_desc}> // CHECK: #[[INNER4ANYDATA:.+]] = #llvm.tbaa_type_desc}> -// CHECK: #[[TEST4TARGET:.+]] = #llvm.tbaa_type_desc}> -// CHECK: #[[INNER4TARGET:.+]] = #llvm.tbaa_type_desc}> -// CHECK: #[[TEST4GLOBAL:.+]] = #llvm.tbaa_type_desc}> -// CHECK: #[[INNER4GLOBAL:.+]] = #llvm.tbaa_type_desc}> +// CHECK: #[[TEST4GLOBAL:.+]] = #llvm.tbaa_type_desc}> +// CHECK: #[[INNER4GLOBAL:.+]] = #llvm.tbaa_type_desc}> // CHECK: #[[TEST4COMMON:.+]] = #llvm.tbaa_type_desc}> // CHECK: #[[INNER4COMMON:.+]] = #llvm.tbaa_type_desc}> // CHECK: #[[TEST4B:.+]] = #llvm.tbaa_type_desc}> @@ -229,8 +224,7 @@ module attributes {dlti.dl_spec = #dlti.dl_spec : vector<4 // CHECK: #[[TEST5ROOT:.+]] = #llvm.tbaa_root // CHECK: #[[TEST5ANYACC:.+]] = #llvm.tbaa_type_desc}> // CHECK: #[[TEST5ANYDATA:.+]] = #llvm.tbaa_type_desc}> -// CHECK: #[[TEST5TARGET:.+]] = #llvm.tbaa_type_desc}> -// CHECK: #[[TEST5GLOBAL:.+]] = #llvm.tbaa_type_desc}> +// CHECK: #[[TEST5GLOBAL:.+]] = #llvm.tbaa_type_desc}> // CHECK: #[[TEST5COMMON5:.+]] = #llvm.tbaa_type_desc}> // CHECK: #[[TEST5COMMON5TAG:.+]] = #llvm.tbaa_tag // CHECK: #[[TEST5A:.+]] = #llvm.tbaa_type_desc}> @@ -288,8 +282,7 @@ module attributes {dlti.dl_spec = #dlti.dl_spec : vector<4 // CHECK: #[[$ATTR_0:.+]] = #llvm.tbaa_root // CHECK: #[[$ATTR_1:.+]] = #llvm.tbaa_type_desc}> // CHECK: #[[$ATTR_2:.+]] = #llvm.tbaa_type_desc}> -// CHECK: #[[$ATTR_3:.+]] = #llvm.tbaa_type_desc}> -// CHECK: #[[$ATTR_4:.+]] = #llvm.tbaa_type_desc}> +// CHECK: #[[$ATTR_4:.+]] = #llvm.tbaa_type_desc}> // CHECK: #[[$ATTR_5:.+]] = #llvm.tbaa_type_desc}> // CHECK: #[[$ATTR_6:.+]] = #llvm.tbaa_type_desc}> // CHECK: #[[$ATTR_7:.+]] = #llvm.tbaa_tag @@ -354,8 +347,8 @@ module attributes {dlti.dl_spec = #dlti.dl_spec : vector<4 // CHECK: #[[$ATTR_74:.+]] = #llvm.tbaa_type_desc}> // CHECK: #[[$ATTR_75:.+]] = #llvm.tbaa_type_desc}> // CHECK: #[[$ATTR_76:.+]] = #llvm.tbaa_type_desc}> +// CHECK: #[[$ATTR_78:.+]] = #llvm.tbaa_type_desc}> // CHECK: #[[$ATTR_77:.+]] = #llvm.tbaa_tag -// CHECK: #[[$ATTR_78:.+]] = #llvm.tbaa_type_desc}> // CHECK: #[[$ATTR_79:.+]] = #llvm.tbaa_type_desc}> // CHECK: #[[$ATTR_80:.+]] = #llvm.tbaa_type_desc}> // CHECK: #[[$ATTR_81:.+]] = #llvm.tbaa_tag @@ -425,12 +418,61 @@ module attributes {dlti.dl_spec = #dlti.dl_spec : vector<4 // CHECK: #[[$ATTR_82:.+]] = #llvm.tbaa_root // CHECK: #[[$ATTR_83:.+]] = #llvm.tbaa_type_desc}> // CHECK: #[[$ATTR_84:.+]] = #llvm.tbaa_type_desc}> +// CHECK: #[[$ATTR_87:.+]] = #llvm.tbaa_type_desc}> // CHECK: #[[$ATTR_85:.+]] = #llvm.tbaa_type_desc}> -// CHECK: #[[$ATTR_86:.+]] = #llvm.tbaa_tag -// CHECK: #[[$ATTR_87:.+]] = #llvm.tbaa_type_desc}> // CHECK: #[[$ATTR_88:.+]] = #llvm.tbaa_tag +// CHECK: #[[$ATTR_86:.+]] = #llvm.tbaa_tag // CHECK-LABEL: func.func @_QPtest8() { // CHECK: fir.load %{{[0-9]+}} : !fir.ref>> // CHECK: fir.load %{{[0-9]+}} {tbaa = [#[[$ATTR_86]]]} : !fir.ptr // CHECK: fir.load %{{[0-9]+}} : !fir.ref // CHECK: fir.store %{{[0-9]+}} to %{{[0-9]+}} : !fir.ref + +// ----- + +// Fortran source: +// subroutine target_comon_tbaa() +// real :: a +// real, target :: b, c +// common /common1/ a,b,c +// a = b +// end subroutine +// +// Test generation of tbaa tags where some members of a common block are TARGET +module attributes {dlti.dl_spec = #dlti.dl_spec : vector<4xi64>, i1 = dense<8> : vector<2xi64>, i8 = dense<8> : vector<2xi64>, i16 = dense<16> : vector<2xi64>, i32 = dense<32> : vector<2xi64>, i64 = dense<[32, 64]> : vector<2xi64>, f16 = dense<16> : vector<2xi64>, f64 = dense<64> : vector<2xi64>, f128 = dense<128> : vector<2xi64>, "dlti.endianness" = "little">, llvm.data_layout = ""} { fir.global common @block_(dense<0> : vector<44xi8>) {alignment = 4 : i64} : !fir.array<44xi8> + fir.global common @common1_(dense<0> : vector<12xi8>) {alignment = 4 : i64} : !fir.array<12xi8> + func.func @_QPtarget_common_tbaa() { + %c8 = arith.constant 8 : index + %c4 = arith.constant 4 : index + %c0 = arith.constant 0 : index + %0 = fir.dummy_scope : !fir.dscope + %1 = fir.address_of(@common1_) : !fir.ref> + %2 = fir.coordinate_of %1, %c0 : (!fir.ref>, index) -> !fir.ref + %3 = fir.convert %2 : (!fir.ref) -> !fir.ref + %4 = fir.declare %3 storage(%1[0]) {uniq_name = "_QFtarget_comon_tbaaEa"} : (!fir.ref, !fir.ref>) -> !fir.ref + %5 = fir.coordinate_of %1, %c4 : (!fir.ref>, index) -> !fir.ref + %6 = fir.convert %5 : (!fir.ref) -> !fir.ref + %7 = fir.declare %6 storage(%1[4]) {fortran_attrs = #fir.var_attrs, uniq_name = "_QFtarget_comon_tbaaEb"} : (!fir.ref, !fir.ref>) -> !fir.ref + %8 = fir.coordinate_of %1, %c8 : (!fir.ref>, index) -> !fir.ref + %9 = fir.convert %8 : (!fir.ref) -> !fir.ref + %10 = fir.declare %9 storage(%1[8]) {fortran_attrs = #fir.var_attrs, uniq_name = "_QFtarget_comon_tbaaEc"} : (!fir.ref, !fir.ref>) -> !fir.ref + %11 = fir.load %7 : !fir.ref + fir.store %11 to %4 : !fir.ref + return + } +} +// CHECK: #[[TBAA_FUNC_ROOT:.*]] = #llvm.tbaa_root +// CHECK-NEXT: #[[ANY_ACCESS:.*]] = #llvm.tbaa_type_desc}> +// CHECK-NEXT: #[[ANY_DATA:.*]] = #llvm.tbaa_type_desc}> +// CHECK-NEXT: #[[TARGET_DATA:.*]] = #llvm.tbaa_type_desc}> +// CHECK-NEXT: #[[GLOBAL_DATA:.*]] = #llvm.tbaa_type_desc}> +// CHECK-NEXT: #[[TARGET_COMMON:.*]] = #llvm.tbaa_type_desc}> +// CHECK-NEXT: #[[GLOBAL_COMMON:.*]] = #llvm.tbaa_type_desc}> +// CHECK-NEXT: #[[B:.*]] = #llvm.tbaa_type_desc}> +// CHECK-NEXT: #[[A:.*]] = #llvm.tbaa_type_desc}> +// CHECK-NEXT: #[[B_TAG:.*]] = #llvm.tbaa_tag +// CHECK-NEXT: #[[A_TAG:.*]] = #llvm.tbaa_tag + +// CHECK-LABEL: func.func @_QPtarget_common_tbaa() +// CHECK: %[[LOAD:.*]] = fir.load %{{.*}} {tbaa = [#[[B_TAG]]]} +// CHECK: fir.store %[[LOAD]] to %{{.*}} {tbaa = [#[[A_TAG]]]} diff --git a/flang/test/Transforms/tbaa-for-global-equiv-vars.fir b/flang/test/Transforms/tbaa-for-global-equiv-vars.fir index dbefa3f8e3f5f..0d082c7504024 100644 --- a/flang/test/Transforms/tbaa-for-global-equiv-vars.fir +++ b/flang/test/Transforms/tbaa-for-global-equiv-vars.fir @@ -30,8 +30,7 @@ module attributes {dlti.dl_spec = #dlti.dl_spec : vector<4 // CHECK: #[[ROOT1:.+]] = #llvm.tbaa_root // CHECK: #[[ANYACC1:.+]] = #llvm.tbaa_type_desc}> // CHECK: #[[ANYDATA1:.+]] = #llvm.tbaa_type_desc}> -// CHECK: #[[TARGETDATA1:.+]] = #llvm.tbaa_type_desc}> -// CHECK: #[[GLOBALDATA1:.+]] = #llvm.tbaa_type_desc}> +// CHECK: #[[GLOBALDATA1:.+]] = #llvm.tbaa_type_desc}> // CHECK: #[[GLOB1COMMON:.+]] = #llvm.tbaa_type_desc}> // CHECK: #[[GLOB1:.+]] = #llvm.tbaa_type_desc}> // CHECK: #[[TAG:.+]] = #llvm.tbaa_tag @@ -74,8 +73,7 @@ module attributes {dlti.dl_spec = #dlti.dl_spec : vector<4 // CHECK: #[[ROOT2:.+]] = #llvm.tbaa_root // CHECK: #[[ANYACC2:.+]] = #llvm.tbaa_type_desc}> // CHECK: #[[ANYDATA2:.+]] = #llvm.tbaa_type_desc}> -// CHECK: #[[TARGETDATA2:.+]] = #llvm.tbaa_type_desc}> -// CHECK: #[[GLOBALDATA2:.+]] = #llvm.tbaa_type_desc}> +// CHECK: #[[GLOBALDATA2:.+]] = #llvm.tbaa_type_desc}> // CHECK: #[[GLOB1COMMON:.+]] = #llvm.tbaa_type_desc}> // CHECK: #[[GLOB1GLOB2:.+]] = #llvm.tbaa_type_desc}> // CHECK: #[[GLOB3:.+]] = #llvm.tbaa_type_desc}> diff --git a/flang/test/Transforms/tbaa-for-local-vars.fir b/flang/test/Transforms/tbaa-for-local-vars.fir index 4eb6b2ecf31c4..fde5c400c75ed 100644 --- a/flang/test/Transforms/tbaa-for-local-vars.fir +++ b/flang/test/Transforms/tbaa-for-local-vars.fir @@ -35,18 +35,22 @@ // scope's TBAA tree. // RUN: fir-opt --fir-add-alias-tags %s | FileCheck %s -// CHECK: #[[$ATTR_0:.+]] = #llvm.tbaa_root -// CHECK: #[[$ATTR_1:.+]] = #llvm.tbaa_root -// CHECK: #[[$ATTR_2:.+]] = #llvm.tbaa_type_desc}> -// CHECK: #[[$ATTR_3:.+]] = #llvm.tbaa_type_desc}> -// CHECK: #[[$ATTR_4:.+]] = #llvm.tbaa_type_desc}> -// CHECK: #[[$ATTR_5:.+]] = #llvm.tbaa_type_desc}> -// CHECK: #[[$ATTR_6:.+]] = #llvm.tbaa_type_desc}> -// CHECK: #[[$ATTR_7:.+]] = #llvm.tbaa_type_desc}> -// CHECK: #[[$ATTR_9:.+]] = #llvm.tbaa_type_desc}> -// CHECK: #[[$ATTR_10:.+]] = #llvm.tbaa_type_desc}> -// CHECK: #[[$ATTR_12:.+]] = #llvm.tbaa_tag -// CHECK: #[[$ATTR_13:.+]] = #llvm.tbaa_tag +// CHECK: #[[$SCOPE_2:.+]] = #llvm.tbaa_root +// CHECK: #[[$SCOPE_1:.+]] = #llvm.tbaa_root +// CHECK: #[[$ANY_ACCESS2:.+]] = #llvm.tbaa_type_desc}> +// CHECK: #[[$ANY_ACCESS1:.+]] = #llvm.tbaa_type_desc}> +// CHECK: #[[$ANY_DATA2:.+]] = #llvm.tbaa_type_desc}> +// CHECK: #[[$ANY_DATA1:.+]] = #llvm.tbaa_type_desc}> +// CHECK: #[[$DUMMY_ARG2:.+]] = #llvm.tbaa_type_desc}> +// CHECK: #[[$ALLOCATED_DATA1:.+]] = #llvm.tbaa_type_desc}> +// CHECK: #[[$DUMMY_ARG1:.+]] = #llvm.tbaa_type_desc}> +// CHECK: #[[$ALLOCATED_DATA1_TAG:.+]] = #llvm.tbaa_tag +// CHECK: #[[$BAR_THIS2:.+]] = #llvm.tbaa_type_desc}> +// CHECK: #[[$TEST_VAR1:.+]] = #llvm.tbaa_type_desc}> +// CHECK: #[[$TEST_ARG1:.+]] = #llvm.tbaa_type_desc}> +// CHECK: #[[$BAR_THIS2_TAG:.+]] = #llvm.tbaa_tag +// CHECK: #[[$TEST_VAR1_TAG:.+]] = #llvm.tbaa_tag +// CHECK: #[[$TEST_ARG2_TAG:.+]] = #llvm.tbaa_tag // CHECK-LABEL: func.func @_QMmPtest( // CHECK-SAME: %[[ARG0:.*]]: !fir.ref {fir.bindc_name = "arg"}) { @@ -61,10 +65,10 @@ // CHECK: %[[VAL_10:.*]] = fir.dummy_scope : !fir.dscope // CHECK: %[[VAL_11:.*]] = fir.declare %[[VAL_9]] dummy_scope %[[VAL_10]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QMmFbarEthis"} : (!fir.class>, !fir.dscope) -> !fir.class> // CHECK: %[[VAL_12:.*]] = fir.coordinate_of %[[VAL_11]], x : (!fir.class>) -> !fir.ref -// CHECK: fir.store %[[VAL_0]] to %[[VAL_12]] {tbaa = [#[[$ATTR_12]]]} : !fir.ref +// CHECK: fir.store %[[VAL_0]] to %[[VAL_12]] {tbaa = [#[[$BAR_THIS2_TAG]]]} : !fir.ref // CHECK: %[[VAL_13:.*]] = fir.declare %[[VAL_1]] {uniq_name = ".tmp.func_result"} : (!fir.ref>) -> !fir.ref> // CHECK: %[[VAL_14:.*]] = fir.coordinate_of %[[VAL_13]], x : (!fir.ref>) -> !fir.ref -// CHECK: %[[VAL_16:.*]] = fir.load %[[VAL_14]] {tbaa = [#[[$ATTR_13]]]} : !fir.ref +// CHECK: %[[VAL_16:.*]] = fir.load %[[VAL_14]] {tbaa = [#[[$ALLOCATED_DATA1_TAG]]]} : !fir.ref module attributes {dlti.dl_spec = #dlti.dl_spec : vector<4xi64>, i1 = dense<8> : vector<2xi64>, i8 = dense<8> : vector<2xi64>, i16 = dense<16> : vector<2xi64>, i32 = dense<32> : vector<2xi64>, i64 = dense<[32, 64]> : vector<2xi64>, f16 = dense<16> : vector<2xi64>, f64 = dense<64> : vector<2xi64>, f128 = dense<128> : vector<2xi64>, "dlti.endianness" = "little">, llvm.data_layout = ""} { func.func @_QMmPtest(%arg0: !fir.ref {fir.bindc_name = "arg"}) { %cst = arith.constant 1.000000e+00 : f32 diff --git a/flang/test/Transforms/tbaa-with-dummy-scope.fir b/flang/test/Transforms/tbaa-with-dummy-scope.fir index 4ae2b8efe2581..d7f33776150ae 100644 --- a/flang/test/Transforms/tbaa-with-dummy-scope.fir +++ b/flang/test/Transforms/tbaa-with-dummy-scope.fir @@ -24,7 +24,7 @@ // CHECK: #[[TARGETDATA:.+]] = #llvm.tbaa_type_desc}> // CHECK: #[[$ATTR_6:.+]] = #llvm.tbaa_type_desc}> // CHECK: #[[$ATTR_7:.+]] = #llvm.tbaa_type_desc}> -// CHECK: #[[TARGETTAG:.+]] = #llvm.tbaa_tag +// CHECK: #[[TARGETDATA_TAG:.+]] = #llvm.tbaa_tag // CHECK: #[[$ATTR_8:.+]] = #llvm.tbaa_type_desc}> // CHECK: #[[$ATTR_9:.+]] = #llvm.tbaa_type_desc}> // CHECK: #[[$ATTR_10:.+]] = #llvm.tbaa_type_desc}> @@ -34,8 +34,8 @@ // CHECK: #[[$ATTR_14:.+]] = #llvm.tbaa_tag // CHECK: #[[$ATTR_15:.+]] = #llvm.tbaa_tag // CHECK: func.func @test1( -// CHECK: %[[VAL_5:.*]] = fir.load %{{.*}} {tbaa = [#[[TARGETTAG]]]} : !fir.ref -// CHECK: fir.store %{{.*}} {tbaa = [#[[TARGETTAG]]]} : !fir.ref +// CHECK: %[[VAL_5:.*]] = fir.load %{{.*}} {tbaa = [#[[TARGETDATA_TAG]]]} : !fir.ref +// CHECK: fir.store %{{.*}} {tbaa = [#[[TARGETDATA_TAG]]]} : !fir.ref // CHECK: %[[VAL_6:.*]] = fir.dummy_scope : !fir.dscope // CHECK: %[[VAL_9:.*]] = fir.load %{{.*}} {tbaa = [#[[$ATTR_12]]]} : !fir.ref // CHECK: fir.store %{{.*}} {tbaa = [#[[$ATTR_13]]]} : !fir.ref @@ -83,23 +83,21 @@ func.func @test1(%arg0: !fir.ref {fir.bindc_name = "x", fir.target}, %arg1: // CHECK: #[[$ATTR_33:.+]] = #llvm.tbaa_root // CHECK: #[[$ATTR_34:.+]] = #llvm.tbaa_type_desc}> // CHECK: #[[$ATTR_35:.+]] = #llvm.tbaa_type_desc}> -// CHECK: #[[$ATTR_36:.+]] = #llvm.tbaa_type_desc}> -// CHECK: #[[$ATTR_37:.+]] = #llvm.tbaa_type_desc}> -// CHECK: #[[CALLERTARGETDATA:.+]] = #llvm.tbaa_type_desc}> -// CHECK: #[[CALLEETARGETDATA:.+]] = #llvm.tbaa_type_desc}> -// CHECK: #[[$ATTR_40:.+]] = #llvm.tbaa_type_desc}> -// CHECK: #[[$ATTR_38:.+]] = #llvm.tbaa_type_desc}> -// CHECK: #[[$ATTR_39:.+]] = #llvm.tbaa_type_desc}> -// CHECK: #[[$ATTR_45:.+]] = #llvm.tbaa_type_desc}> -// CHECK: #[[$ATTR_50:.+]] = #llvm.tbaa_tag +// CHECK: #[[$CALLERANYDATA:.+]] = #llvm.tbaa_type_desc}> +// CHECK: #[[$CALLEEANYDATA:.+]] = #llvm.tbaa_type_desc}> +// CHECK: #[[$ATTR_38:.+]] = #llvm.tbaa_type_desc}> +// CHECK: #[[$ATTR_39:.+]] = #llvm.tbaa_type_desc}> +// CHECK: #[[$ATTR_40:.+]] = #llvm.tbaa_type_desc}> // CHECK: #[[$ATTR_41:.+]] = #llvm.tbaa_type_desc}> // CHECK: #[[$ATTR_42:.+]] = #llvm.tbaa_type_desc}> // CHECK: #[[$ATTR_43:.+]] = #llvm.tbaa_type_desc}> // CHECK: #[[$ATTR_44:.+]] = #llvm.tbaa_type_desc}> +// CHECK: #[[$ATTR_45:.+]] = #llvm.tbaa_type_desc}> // CHECK: #[[$ATTR_46:.+]] = #llvm.tbaa_tag // CHECK: #[[$ATTR_47:.+]] = #llvm.tbaa_tag // CHECK: #[[$ATTR_48:.+]] = #llvm.tbaa_tag // CHECK: #[[$ATTR_49:.+]] = #llvm.tbaa_tag +// CHECK: #[[$ATTR_50:.+]] = #llvm.tbaa_tag // CHECK: func.func @_QMtestPcaller( // CHECK-SAME: %[[VAL_0:.*]]: !fir.ref {fir.bindc_name = "z"}) { // CHECK: %[[VAL_1:.*]] = fir.dummy_scope : !fir.dscope diff --git a/flang/test/Transforms/tbaa-with-dummy-scope2.fir b/flang/test/Transforms/tbaa-with-dummy-scope2.fir index 54902ca7d41e1..6f5ed69fbc9c6 100644 --- a/flang/test/Transforms/tbaa-with-dummy-scope2.fir +++ b/flang/test/Transforms/tbaa-with-dummy-scope2.fir @@ -44,16 +44,15 @@ func.func @_QPtest1() attributes {noinline} { } // CHECK: #[[$ATTR_0:.+]] = #llvm.tbaa_root // CHECK: #[[$ATTR_1:.+]] = #llvm.tbaa_type_desc}> -// CHECK: #[[$ATTR_2:.+]] = #llvm.tbaa_type_desc}> -// CHECK: #[[$TARGETDATA:.+]] = #llvm.tbaa_type_desc}> -// CHECK: #[[$ATTR_3:.+]] = #llvm.tbaa_type_desc}> -// CHECK: #[[$LOCAL_ATTR_0:.+]] = #llvm.tbaa_type_desc}> -// CHECK: #[[$ATTR_5:.+]] = #llvm.tbaa_type_desc}> -// CHECK: #[[$ATTR_4:.+]] = #llvm.tbaa_type_desc}> -// CHECK: #[[$ATTR_7:.+]] = #llvm.tbaa_tag +// CHECK: #[[$ANYDATA:.+]] = #llvm.tbaa_type_desc}> +// CHECK: #[[$LOCAL_ATTR_0:.+]] = #llvm.tbaa_type_desc}> +// CHECK: #[[$ATTR_3:.+]] = #llvm.tbaa_type_desc}> +// CHECK: #[[$ATTR_4:.+]] = #llvm.tbaa_type_desc}> // CHECK: #[[$LOCAL_ATTR_1:.+]] = #llvm.tbaa_type_desc}> +// CHECK: #[[$ATTR_5:.+]] = #llvm.tbaa_type_desc}> // CHECK: #[[$ATTR_6:.+]] = #llvm.tbaa_type_desc}> // CHECK: #[[$LOCAL_ATTR_2:.+]] = #llvm.tbaa_tag +// CHECK: #[[$ATTR_7:.+]] = #llvm.tbaa_tag // CHECK: #[[$ATTR_8:.+]] = #llvm.tbaa_tag // CHECK-LABEL: func.func @_QPtest1() attributes {noinline} { // CHECK: %[[VAL_2:.*]] = fir.alloca i32 {bindc_name = "y", uniq_name = "_QFtest1FinnerEy"} @@ -90,19 +89,18 @@ func.func @_QPtest2() attributes {noinline} { } // CHECK: #[[$ATTR_0:.+]] = #llvm.tbaa_root // CHECK: #[[$ATTR_1:.+]] = #llvm.tbaa_root -// CHECK: #[[$ATTR_2:.+]] = #llvm.tbaa_type_desc}> -// CHECK: #[[$ATTR_3:.+]] = #llvm.tbaa_type_desc}> -// CHECK: #[[$ATTR_4:.+]] = #llvm.tbaa_type_desc}> -// CHECK: #[[$ATTR_5:.+]] = #llvm.tbaa_type_desc}> -// CHECK: #[[$TARGETDATA_0:.+]] = #llvm.tbaa_type_desc}> -// CHECK: #[[$ATTR_6:.+]] = #llvm.tbaa_type_desc}> -// CHECK: #[[$LOCAL_ATTR_0:.+]] = #llvm.tbaa_type_desc}> -// CHECK: #[[$ATTR_8:.+]] = #llvm.tbaa_type_desc}> -// CHECK: #[[$ATTR_7:.+]] = #llvm.tbaa_type_desc}> -// CHECK: #[[$ATTR_10:.+]] = #llvm.tbaa_tag +// CHECK: #[[$ANY_ACCESS_0:.+]] = #llvm.tbaa_type_desc}> +// CHECK: #[[$ANY_ACCESS_1:.+]] = #llvm.tbaa_type_desc}> +// CHECK: #[[$ANY_DATA_0:.+]] = #llvm.tbaa_type_desc}> +// CHECK: #[[$ANY_DATA_1:.+]] = #llvm.tbaa_type_desc}> +// CHECK: #[[$LOCAL_ATTR_0:.+]] = #llvm.tbaa_type_desc}> +// CHECK: #[[$ATTR_6:.+]] = #llvm.tbaa_type_desc}> +// CHECK: #[[$ATTR_7:.+]] = #llvm.tbaa_type_desc}> // CHECK: #[[$LOCAL_ATTR_1:.+]] = #llvm.tbaa_type_desc}> +// CHECK: #[[$ATTR_8:.+]] = #llvm.tbaa_type_desc}> // CHECK: #[[$ATTR_9:.+]] = #llvm.tbaa_type_desc}> // CHECK: #[[$LOCAL_ATTR_2:.+]] = #llvm.tbaa_tag +// CHECK: #[[$ATTR_10:.+]] = #llvm.tbaa_tag // CHECK: #[[$ATTR_11:.+]] = #llvm.tbaa_tag // CHECK-LABEL: func.func @_QPtest2() attributes {noinline} { // CHECK: %[[VAL_2:.*]] = fir.alloca i32 {bindc_name = "y", uniq_name = "_QFtest2FinnerEy"} diff --git a/flang/test/Transforms/tbaa2.fir b/flang/test/Transforms/tbaa2.fir index a594e6b32fdac..9b5307ba69d17 100644 --- a/flang/test/Transforms/tbaa2.fir +++ b/flang/test/Transforms/tbaa2.fir @@ -48,18 +48,10 @@ module attributes {dlti.dl_spec = #dlti.dl_spec : vector<4 // CHECK: #[[ROOT:.+]] = #llvm.tbaa_root // CHECK: #[[ANY_ACCESS:.+]] = #llvm.tbaa_type_desc}> // CHECK: #[[ANY_DATA:.+]] = #llvm.tbaa_type_desc}> -// CHECK: #[[TARGETDATA:.+]] = #llvm.tbaa_type_desc}> +// CHECK: #[[ANY_GLBL:.+]] = #llvm.tbaa_type_desc}> +// CHECK: #[[ANY_LOCAL:.+]] = #llvm.tbaa_type_desc}> // CHECK: #[[ANY_ARG:.+]] = #llvm.tbaa_type_desc}> -// CHECK: #[[ANY_GLBL:.+]] = #llvm.tbaa_type_desc}> -// CHECK: #[[ANY_LOCAL:.+]] = #llvm.tbaa_type_desc}> -// CHECK: #[[ARG_LOW:.+]] = #llvm.tbaa_type_desc}> -// CHECK: #[[ANY_DIRECT:.+]] = #llvm.tbaa_type_desc}> -// CHECK: #[[ARG_Z:.+]] = #llvm.tbaa_type_desc}> -// CHECK: #[[ARG_Y:.+]] = #llvm.tbaa_type_desc}> - -// CHECK: #[[ARG_LOW_TAG:.+]] = #llvm.tbaa_tag -// CHECK: #[[ARG_Z_TAG:.+]] = #llvm.tbaa_tag -// CHECK: #[[ARG_Y_TAG:.+]] = #llvm.tbaa_tag +// CHECK: #[[ANY_DIRECT:.+]] = #llvm.tbaa_type_desc}> // CHECK: #[[GLBL_ZSTART:.+]] = #llvm.tbaa_type_desc}> // CHECK: #[[GLBL_ZSTOP:.+]] = #llvm.tbaa_type_desc}> @@ -69,10 +61,13 @@ module attributes {dlti.dl_spec = #dlti.dl_spec : vector<4 // CHECK: #[[LOCAL2_ALLOC:.+]] = #llvm.tbaa_type_desc}> // CHECK: #[[GLBL_XSTART:.+]] = #llvm.tbaa_type_desc}> // CHECK: #[[LOCAL3_ALLOC:.+]] = #llvm.tbaa_type_desc}> +// CHECK: #[[ARG_LOW:.+]] = #llvm.tbaa_type_desc}> // CHECK: #[[LOCAL4_ALLOC:.+]] = #llvm.tbaa_type_desc}> // CHECK: #[[DIRECT_A:.+]] = #llvm.tbaa_type_desc}> // CHECK: #[[DIRECT_B:.+]] = #llvm.tbaa_type_desc}> +// CHECK: #[[ARG_Z:.+]] = #llvm.tbaa_type_desc}> // CHECK: #[[GLBL_DYINV:.+]] = #llvm.tbaa_type_desc}> +// CHECK: #[[ARG_Y:.+]] = #llvm.tbaa_type_desc}> // CHECK: #[[LOCAL5_ALLOC:.+]] = #llvm.tbaa_type_desc}> // CHECK: #[[GLBL_ZSTART_TAG:.+]] = #llvm.tbaa_tag @@ -83,10 +78,13 @@ module attributes {dlti.dl_spec = #dlti.dl_spec : vector<4 // CHECK: #[[LOCAL2_ALLOC_TAG:.+]] = #llvm.tbaa_tag // CHECK: #[[GLBL_XSTART_TAG:.+]] = #llvm.tbaa_tag // CHECK: #[[LOCAL3_ALLOC_TAG:.+]] = #llvm.tbaa_tag +// CHECK: #[[ARG_LOW_TAG:.+]] = #llvm.tbaa_tag // CHECK: #[[LOCAL4_ALLOC_TAG:.+]] = #llvm.tbaa_tag // CHECK: #[[DIRECT_A_TAG:.+]] = #llvm.tbaa_tag // CHECK: #[[DIRECT_B_TAG:.+]] = #llvm.tbaa_tag +// CHECK: #[[ARG_Z_TAG:.+]] = #llvm.tbaa_tag // CHECK: #[[GLBL_DYINV_TAG:.+]] = #llvm.tbaa_tag +// CHECK: #[[ARG_Y_TAG:.+]] = #llvm.tbaa_tag // CHECK: #[[LOCAL5_ALLOC_TAG:.+]] = #llvm.tbaa_tag func.func @_QMmodPcallee(%arg0: !fir.box> {fir.bindc_name = "z"}, %arg1: !fir.box> {fir.bindc_name = "y"}, %arg2: !fir.ref>>> {fir.bindc_name = "low"}) { diff --git a/flang/test/Transforms/tbaa3.fir b/flang/test/Transforms/tbaa3.fir index abcb7e000bac1..79f79cb6ca26b 100644 --- a/flang/test/Transforms/tbaa3.fir +++ b/flang/test/Transforms/tbaa3.fir @@ -1,5 +1,4 @@ -// RUN: fir-opt --fir-add-alias-tags %s | FileCheck --check-prefixes=ALL,DEFAULT %s -// RUN: fir-opt --fir-add-alias-tags --local-alloc-tbaa %s | FileCheck --check-prefixes=ALL,LOCAL %s +// RUN: fir-opt --fir-add-alias-tags %s | FileCheck --check-prefixes=ALL %s // Test AddAliasTagsPass creating sub-tree for TARGET/POINTER variables. @@ -56,56 +55,57 @@ // | |- "dummy arg data/_QFtest1Edummyas" // | |- "dummy arg data/_QFtest1Edummya" // | -// |- "target data" <- all pointers and taget dummys -// | -// |- "global data" -// | | -// | |- "global data/_QMdataEglob" -// | |- "global data/_QMdataEglobt" -// | -// |- "direct data" -// | | -// | |- "direct data/_QMdataEgloba" -// | |- "direct data/_QMdataEglobat" +// |- "target data" <--- all pointers and target dummy arguments go here +// | |- "target data/_QMdataEglobt" +// | |- "target data/_QMdataEglobat" +// | |- "target data/_QFtest1Elocalt" +// | |- "target data/_QFtest1Elocalat" +// | +// |- "global data" +// | | +// | |- "global data/_QMdataEglob" +// | +// |- "direct data" +// | | +// | |- "direct data/_QMdataEgloba" +// | +// |- "allocated data" // | -// |- "allocated data" -// | -// |- "allocated data/_QFtest1Elocal" -// |- "allocated data/_QFtest1Elocalt" -// |- "allocated data/_QFtest1Elocala" -// |- "allocated data/_QFtest1Elocalat" +// |- "allocated data/_QFtest1Elocal" +// |- "allocated data/_QFtest1Elocala" // ALL: #[[FUNCROOT:.+]] = #llvm.tbaa_root // ALL: #[[ANYACCESS:.+]] = #llvm.tbaa_type_desc}> // ALL: #[[ANYDATA:.+]] = #llvm.tbaa_type_desc}> +// ALL: #[[GLOBALDATA:.+]] = #llvm.tbaa_type_desc}> // ALL: #[[TARGETDATA:.+]] = #llvm.tbaa_type_desc}> +// ALL: #[[DIRECTDATA:.+]] = #llvm.tbaa_type_desc}> // ALL: #[[DUMMYDATA:.+]] = #llvm.tbaa_type_desc}> +// ALL: #[[LOCALDATA:.+]] = #llvm.tbaa_type_desc}> // ALL: #[[TARGETTAG:.+]] = #llvm.tbaa_tag -// ALL: #[[GLOBALDATA:.+]] = #llvm.tbaa_type_desc}> -// ALL: #[[DIRECTDATA:.+]] = #llvm.tbaa_type_desc}> +// ALL: #[[GLOBVAR:.+]] = #llvm.tbaa_type_desc}> +// ALL: #[[GLOBTVAR:.+]] = #llvm.tbaa_type_desc}> +// ALL: #[[GLOBAVAR:.+]] = #llvm.tbaa_type_desc}> +// ALL: #[[GLOBATVAR:.+]] = #llvm.tbaa_type_desc}> // ALL: #[[DUMMYFVAR:.+]] = #llvm.tbaa_type_desc}> // ALL: #[[DUMMYASVAR:.+]] = #llvm.tbaa_type_desc}> // ALL: #[[DUMMYAVAR:.+]] = #llvm.tbaa_type_desc}> -// LOCAL: #[[LOCALDATA:.+]] = #llvm.tbaa_type_desc}> -// ALL: #[[DUMMYFTAG:.+]] = #llvm.tbaa_tag -// ALL: #[[DUMMYASTAG:.+]] = #llvm.tbaa_tag -// ALL: #[[DUMMYATAG:.+]] = #llvm.tbaa_tag -// ALL: #[[GLOBVAR:.+]] = #llvm.tbaa_type_desc}> -// ALL: #[[GLOBTVAR:.+]] = #llvm.tbaa_type_desc}> -// ALL: #[[GLOBAVAR:.+]] = #llvm.tbaa_type_desc}> -// ALL: #[[GLOBATVAR:.+]] = #llvm.tbaa_type_desc}> -// LOCAL: #[[LOCALVAR:.+]] = #llvm.tbaa_type_desc}> -// LOCAL: #[[LOCALTVAR:.+]] = #llvm.tbaa_type_desc}> -// LOCAL: #[[LOCALAVAR:.+]] = #llvm.tbaa_type_desc}> -// LOCAL: #[[LOCALATVAR:.+]] = #llvm.tbaa_type_desc}> +// ALL: #[[LOCALVAR:.+]] = #llvm.tbaa_type_desc}> +// ALL: #[[LOCALTVAR:.+]] = #llvm.tbaa_type_desc}> +// ALL: #[[LOCALAVAR:.+]] = #llvm.tbaa_type_desc}> +// ALL: #[[LOCALATVAR:.+]] = #llvm.tbaa_type_desc}> + // ALL: #[[GLOBTAG:.+]] = #llvm.tbaa_tag // ALL: #[[GLOBTTAG:.+]] = #llvm.tbaa_tag // ALL: #[[GLOBATAG:.+]] = #llvm.tbaa_tag // ALL: #[[GLOBATTAG:.+]] = #llvm.tbaa_tag -// LOCAL: #[[LOCALTAG:.+]] = #llvm.tbaa_tag -// LOCAL: #[[LOCALTTAG:.+]] = #llvm.tbaa_tag -// LOCAL: #[[LOCALATAG:.+]] = #llvm.tbaa_tag -// LOCAL: #[[LOCALATTAG:.+]] = #llvm.tbaa_tag +// ALL: #[[DUMMYFTAG:.+]] = #llvm.tbaa_tag +// ALL: #[[DUMMYASTAG:.+]] = #llvm.tbaa_tag +// ALL: #[[DUMMYATAG:.+]] = #llvm.tbaa_tag +// ALL: #[[LOCALTAG:.+]] = #llvm.tbaa_tag +// ALL: #[[LOCALTTAG:.+]] = #llvm.tbaa_tag +// ALL: #[[LOCALATAG:.+]] = #llvm.tbaa_tag +// ALL: #[[LOCALATTAG:.+]] = #llvm.tbaa_tag module attributes {dlti.dl_spec = #dlti.dl_spec : vector<4xi64>, i1 = dense<8> : vector<2xi64>, i8 = dense<8> : vector<2xi64>, i16 = dense<16> : vector<2xi64>, i32 = dense<32> : vector<2xi64>, i64 = dense<[32, 64]> : vector<2xi64>, f16 = dense<16> : vector<2xi64>, f64 = dense<64> : vector<2xi64>, f128 = dense<128> : vector<2xi64>, "dlti.endianness" = "little">, llvm.data_layout = ""} { fir.global @_QMdataEglob : !fir.array<10xf32> { @@ -263,13 +263,11 @@ module attributes {dlti.dl_spec = #dlti.dl_spec : vector<4 fir.store %cst to %67 : !fir.ref %68 = fir.array_coor %20(%5) %c1 : (!fir.ref>, !fir.shape<1>, index) -> !fir.ref // real :: local(10) -// DEFAULT: fir.store{{.*}}tbaa -// LOCAL: fir.store{{.*}}{tbaa = [#[[LOCALTAG]]]} : !fir.ref +// ALL: fir.store{{.*}}{tbaa = [#[[LOCALTAG]]]} : !fir.ref fir.store %cst to %68 : !fir.ref %69 = fir.array_coor %33(%5) %c1 : (!fir.ref>, !fir.shape<1>, index) -> !fir.ref // real, target :: localt(10) -// DEFAULT: fir.store{{.*}}tbaa -// LOCAL: fir.store{{.*}}{tbaa = [#[[LOCALTTAG]]]} : !fir.ref +// ALL: fir.store{{.*}}{tbaa = [#[[LOCALTTAG]]]} : !fir.ref fir.store %cst to %69 : !fir.ref // ALL-NOT: fir.load{{.*}}tbaa %70 = fir.load %25 : !fir.ref>>> @@ -278,8 +276,7 @@ module attributes {dlti.dl_spec = #dlti.dl_spec : vector<4 %73 = fir.shape_shift %72#0, %72#1 : (index, index) -> !fir.shapeshift<1> %74 = fir.array_coor %71(%73) %c1 : (!fir.heap>, !fir.shapeshift<1>, index) -> !fir.ref // real, allocatable :: locala(:) -// DEFAULT: fir.store{{.*}}tbaa -// LOCAL: fir.store{{.*}}{tbaa = [#[[LOCALATAG]]]} : !fir.ref +// ALL: fir.store{{.*}}{tbaa = [#[[LOCALATAG]]]} : !fir.ref fir.store %cst to %74 : !fir.ref // ALL-NOT: fir.load{{.*}}tbaa %75 = fir.load %27 : !fir.ref>>> @@ -288,8 +285,7 @@ module attributes {dlti.dl_spec = #dlti.dl_spec : vector<4 %78 = fir.shape_shift %77#0, %77#1 : (index, index) -> !fir.shapeshift<1> %79 = fir.array_coor %76(%78) %c1 : (!fir.heap>, !fir.shapeshift<1>, index) -> !fir.ref // real, allocatable, target :: localat(:) -// DEFAULT: fir.store{{.*}}tbaa -// LOCAL: fir.store{{.*}}{tbaa = [#[[LOCALATTAG]]]} : !fir.ref +// ALL: fir.store{{.*}}{tbaa = [#[[LOCALATTAG]]]} : !fir.ref fir.store %cst to %79 : !fir.ref // ALL-NOT: fir.load{{.*}}tbaa %80 = fir.load %31 : !fir.ref>>> @@ -297,8 +293,7 @@ module attributes {dlti.dl_spec = #dlti.dl_spec : vector<4 %82 = fir.shift %81#0 : (index) -> !fir.shift<1> %83 = fir.array_coor %80(%82) %c1 : (!fir.box>>, !fir.shift<1>, index) -> !fir.ref // real, pointer :: localp(:) -// DEFAULT: fir.store{{.*}}tbaa -// LOCAL: fir.store{{.*}}{tbaa = [#[[TARGETTAG]]]} : !fir.ref +// ALL: fir.store{{.*}}{tbaa = [#[[TARGETTAG]]]} : !fir.ref fir.store %cst to %83 : !fir.ref // ALL-NOT: fir.load{{.*}}tbaa %84 = fir.load %27 : !fir.ref>>> diff --git a/flang/test/Transforms/tbaa4.fir b/flang/test/Transforms/tbaa4.fir index c368a3d06c2ba..5e29014af8935 100644 --- a/flang/test/Transforms/tbaa4.fir +++ b/flang/test/Transforms/tbaa4.fir @@ -1,12 +1,10 @@ // Test TBAA tags for common and equivalence. -// RUN: fir-opt --fir-add-alias-tags --split-input-file %s | FileCheck --check-prefixes=ALL,DEFAULT %s -// RUN: fir-opt --fir-add-alias-tags --local-alloc-tbaa --split-input-file %s | FileCheck --check-prefixes=ALL,LOCAL %s +// RUN: fir-opt --fir-add-alias-tags --split-input-file %s | FileCheck --check-prefixes=ALL %s // ALL: #[[ROOT:.+]] = #llvm.tbaa_root // ALL: #[[ANY:.+]] = #llvm.tbaa_type_desc}> // ALL: #[[ANYDATA:.+]] = #llvm.tbaa_type_desc}> -// ALL: #[[TARGETDATA:.+]] = #llvm.tbaa_type_desc}> -// ALL: #[[GLOBALDATA:.+]] = #llvm.tbaa_type_desc}> +// ALL: #[[GLOBALDATA:.+]] = #llvm.tbaa_type_desc}> // ALL: #[[BLK:.+]] = #llvm.tbaa_type_desc}> // ALL: #[[BLK_A:.+]] = #llvm.tbaa_type_desc}> // ALL: #[[BLK_C:.+]] = #llvm.tbaa_type_desc}> @@ -54,19 +52,17 @@ module attributes {dlti.dl_spec = #dlti.dl_spec : vector<4 // ----- -// LOCAL: #[[ROOT:.+]] = #llvm.tbaa_root -// LOCAL: #[[ANY:.+]] = #llvm.tbaa_type_desc}> -// LOCAL: #[[ANYDATA:.+]] = #llvm.tbaa_type_desc}> -// LOCAL: #[[TARGETDATA:.+]] = #llvm.tbaa_type_desc}> -// LOCAL: #[[ALLOCATEDDATA:.+]] = #llvm.tbaa_type_desc}> -// LOCAL: #[[EQUIV:.+]] = #llvm.tbaa_type_desc}> -// LOCAL: #[[TAG:.+]] = #llvm.tbaa_tag +// ALL: #[[ROOT:.+]] = #llvm.tbaa_root +// ALL: #[[ANY:.+]] = #llvm.tbaa_type_desc}> +// ALL: #[[ANYDATA:.+]] = #llvm.tbaa_type_desc}> +// ALL: #[[ALLOCATEDDATA:.+]] = #llvm.tbaa_type_desc}> +// ALL: #[[EQUIV:.+]] = #llvm.tbaa_type_desc}> +// ALL: #[[TAG:.+]] = #llvm.tbaa_tag // ALL-LABEL: func.func @_QPtest_local_equiv() { -// LOCAL: fir.store{{.*}}{tbaa = [#[[TAG]]]} : !fir.ptr -// LOCAL: fir.store{{.*}}{tbaa = [#[[TAG]]]} : !fir.ref -// LOCAL: fir.store{{.*}}{tbaa = [#[[TAG]]]} : !fir.ptr -// DEFAULT-NOT: fir.store{{.}}tbaa +// ALL: fir.store{{.*}}{tbaa = [#[[TAG]]]} : !fir.ptr +// ALL: fir.store{{.*}}{tbaa = [#[[TAG]]]} : !fir.ref +// ALL: fir.store{{.*}}{tbaa = [#[[TAG]]]} : !fir.ptr module attributes {dlti.dl_spec = #dlti.dl_spec : vector<4xi64>, i1 = dense<8> : vector<2xi64>, i8 = dense<8> : vector<2xi64>, i16 = dense<16> : vector<2xi64>, i32 = dense<32> : vector<2xi64>, i64 = dense<[32, 64]> : vector<2xi64>, f16 = dense<16> : vector<2xi64>, f64 = dense<64> : vector<2xi64>, f128 = dense<128> : vector<2xi64>, "dlti.endianness" = "little">, llvm.data_layout = ""} { func.func @_QPtest_local_equiv() { %c1 = arith.constant 1 : index @@ -98,8 +94,7 @@ func.func @_QPtest_local_equiv() { // ALL: #[[ROOT:.+]] = #llvm.tbaa_root // ALL: #[[ANY:.+]] = #llvm.tbaa_type_desc}> // ALL: #[[ANYDATA:.+]] = #llvm.tbaa_type_desc}> -// ALL: #[[TARGETDATA:.+]] = #llvm.tbaa_type_desc}> -// ALL: #[[GLOBALDATA:.+]] = #llvm.tbaa_type_desc}> +// ALL: #[[GLOBALDATA:.+]] = #llvm.tbaa_type_desc}> // ALL: #[[EQUIV:.+]] = #llvm.tbaa_type_desc}> // ALL: #[[TAG:.+]] = #llvm.tbaa_tag module attributes {dlti.dl_spec = #dlti.dl_spec : vector<4xi64>, i1 = dense<8> : vector<2xi64>, i8 = dense<8> : vector<2xi64>, i16 = dense<16> : vector<2xi64>, i32 = dense<32> : vector<2xi64>, i64 = dense<[32, 64]> : vector<2xi64>, f16 = dense<16> : vector<2xi64>, f64 = dense<64> : vector<2xi64>, f128 = dense<128> : vector<2xi64>, "dlti.endianness" = "little">, llvm.data_layout = ""} { @@ -143,8 +138,7 @@ module attributes {dlti.dl_spec = #dlti.dl_spec : vector<4 // ALL: #[[ROOT:.+]] = #llvm.tbaa_root // ALL: #[[ANY:.+]] = #llvm.tbaa_type_desc}> // ALL: #[[ANYDATA:.+]] = #llvm.tbaa_type_desc}> -// ALL: #[[TARGETDATA:.+]] = #llvm.tbaa_type_desc}> -// ALL: #[[GLOBALDATA:.+]] = #llvm.tbaa_type_desc}> +// ALL: #[[GLOBALDATA:.+]] = #llvm.tbaa_type_desc}> // ALL: #[[EQUIV:.+]] = #llvm.tbaa_type_desc}> // ALL: #[[TAG:.+]] = #llvm.tbaa_tag diff --git a/libclc/opencl/lib/clspv/shared/vstore_half.cl b/libclc/opencl/lib/clspv/shared/vstore_half.cl index 341ec3e251719..cfcbf55caeae7 100644 --- a/libclc/opencl/lib/clspv/shared/vstore_half.cl +++ b/libclc/opencl/lib/clspv/shared/vstore_half.cl @@ -8,7 +8,13 @@ #include #include +#include +#include +#include #include +#include +#include +#include #pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable @@ -48,32 +54,32 @@ _CLC_DEF _CLC_OVERLOAD float __clc_rtz(float x) { /* Handle nan corner case */ - if (isnan(x)) + if (__clc_isnan(x)) return x; /* RTZ does not produce Inf for large numbers */ - if (fabs(x) > 65504.0f && !isinf(x)) - return copysign(65504.0f, x); + if (__clc_fabs(x) > 65504.0f && !__clc_isinf(x)) + return __clc_copysign(65504.0f, x); const int exp = (__clc_as_uint(x) >> 23 & 0xff) - 127; /* Manage range rounded to +- zero explicitely */ if (exp < -24) - return copysign(0.0f, x); + return __clc_copysign(0.0f, x); /* Remove lower 13 bits to make sure the number is rounded down */ int mask = 0xffffe000; /* Denormals cannot be flushed, and they use different bit for rounding */ if (exp < -14) - mask <<= min(-(exp + 14), 10); + mask <<= __clc_min(-(exp + 14), 10); return __clc_as_float(__clc_as_uint(x) & mask); } _CLC_DEF _CLC_OVERLOAD float __clc_rti(float x) { /* Handle nan corner case */ - if (isnan(x)) + if (__clc_isnan(x)) return x; - const float inf = copysign(INFINITY, x); + const float inf = __clc_copysign(INFINITY, x); uint ux = __clc_as_uint(x); /* Manage +- infinity explicitely */ @@ -82,23 +88,23 @@ _CLC_DEF _CLC_OVERLOAD float __clc_rti(float x) { } /* Manage +- zero explicitely */ if ((ux & 0x7fffffff) == 0) { - return copysign(0.0f, x); + return __clc_copysign(0.0f, x); } const int exp = (__clc_as_uint(x) >> 23 & 0xff) - 127; /* Manage range rounded to smallest half denormal explicitely */ if (exp < -24) { - return copysign(0x1.0p-24f, x); + return __clc_copysign(0x1.0p-24f, x); } /* Set lower 13 bits */ int mask = (1 << 13) - 1; /* Denormals cannot be flushed, and they use different bit for rounding */ if (exp < -14) { - mask = (1 << (13 + min(-(exp + 14), 10))) - 1; + mask = (1 << (13 + __clc_min(-(exp + 14), 10))) - 1; } - const float next = nextafter(__clc_as_float(ux | mask), inf); + const float next = __clc_nextafter(__clc_as_float(ux | mask), inf); return ((ux & mask) == 0) ? __clc_as_float(ux) : next; } _CLC_DEF _CLC_OVERLOAD float __clc_rtn(float x) { @@ -116,7 +122,7 @@ _CLC_DEF _CLC_OVERLOAD float __clc_rte(float x) { /* The default assumes lower 13 bits are rounded, * but it might be more for denormals. * Shifting beyond last == 0b, and qr == 00b is not necessary */ - shift += min(-(exp + 14), 15); + shift += __clc_min(-(exp + 14), 15); } int mask = (1 << shift) - 1; const uint grs = mantissa & mask; diff --git a/libcxx/docs/ReleaseNotes/22.rst b/libcxx/docs/ReleaseNotes/22.rst index 56eb0e588d81d..f1912668e4013 100644 --- a/libcxx/docs/ReleaseNotes/22.rst +++ b/libcxx/docs/ReleaseNotes/22.rst @@ -122,5 +122,9 @@ ABI Affecting Changes - ``ranges::iota_view`` is now aware of ``__int128``. This causes ``iota_view::difference_type`` to change from ``long long`` to ``__int128`` in some cases. +- ``std::allocator`` is now trivially default constructible. The behaviour can be reverted by defining + ``_LIBCPP_DEPRECATED_ABI_NON_TRIVIAL_ALLOCATOR``. Please inform the libc++ team if you need this flag, since it will + be removed in LLVM 24 if there is no evidence that it's required. + Build System Changes -------------------- diff --git a/libcxx/include/__algorithm/copy_backward.h b/libcxx/include/__algorithm/copy_backward.h index 6c9eba672e154..8758d2c9e7b5d 100644 --- a/libcxx/include/__algorithm/copy_backward.h +++ b/libcxx/include/__algorithm/copy_backward.h @@ -11,6 +11,7 @@ #include <__algorithm/copy_move_common.h> #include <__algorithm/copy_n.h> +#include <__algorithm/for_each_segment.h> #include <__algorithm/iterator_operations.h> #include <__algorithm/min.h> #include <__config> @@ -173,27 +174,10 @@ struct __copy_backward_impl { template , int> = 0> _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_InIter, _OutIter> operator()(_InIter __first, _InIter __last, _OutIter __result) const { - using _Traits = __segmented_iterator_traits<_InIter>; - auto __sfirst = _Traits::__segment(__first); - auto __slast = _Traits::__segment(__last); - if (__sfirst == __slast) { - auto __iters = - std::__copy_backward<_AlgPolicy>(_Traits::__local(__first), _Traits::__local(__last), std::move(__result)); - return std::make_pair(__last, __iters.second); - } - - __result = - std::__copy_backward<_AlgPolicy>(_Traits::__begin(__slast), _Traits::__local(__last), std::move(__result)) - .second; - --__slast; - while (__sfirst != __slast) { - __result = - std::__copy_backward<_AlgPolicy>(_Traits::__begin(__slast), _Traits::__end(__slast), std::move(__result)) - .second; - --__slast; - } - __result = std::__copy_backward<_AlgPolicy>(_Traits::__local(__first), _Traits::__end(__slast), std::move(__result)) - .second; + using __local_iterator = typename __segmented_iterator_traits<_InIter>::__local_iterator; + std::__for_each_segment_backward(__first, __last, [&__result](__local_iterator __lfirst, __local_iterator __llast) { + __result = std::__copy_backward<_AlgPolicy>(std::move(__lfirst), std::move(__llast), std::move(__result)).second; + }); return std::make_pair(__last, std::move(__result)); } diff --git a/libcxx/include/__algorithm/find_end.h b/libcxx/include/__algorithm/find_end.h index 86b4a3e2e3689..84b43e31a3a59 100644 --- a/libcxx/include/__algorithm/find_end.h +++ b/libcxx/include/__algorithm/find_end.h @@ -76,6 +76,111 @@ _LIBCPP_HIDE_FROM_ABI inline _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_Iter1, _Iter1> } } +template +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_Iter1, _Iter1> __find_end_impl( + _Iter1 __first1, + _Sent1 __sent1, + _Iter2 __first2, + _Sent2 __sent2, + _Pred& __pred, + _Proj1& __proj1, + _Proj2& __proj2, + bidirectional_iterator_tag, + bidirectional_iterator_tag) { + auto __last1 = _IterOps<_AlgPolicy>::next(__first1, __sent1); + auto __last2 = _IterOps<_AlgPolicy>::next(__first2, __sent2); + // modeled after search algorithm (in reverse) + if (__first2 == __last2) + return std::make_pair(__last1, __last1); // Everything matches an empty sequence + _Iter1 __l1 = __last1; + _Iter2 __l2 = __last2; + --__l2; + while (true) { + // Find last element in sequence 1 that matches *(__last2-1), with a mininum of loop checks + while (true) { + if (__first1 == __l1) // return __last1 if no element matches *__first2 + return std::make_pair(__last1, __last1); + if (std::__invoke(__pred, std::__invoke(__proj1, *--__l1), std::__invoke(__proj2, *__l2))) + break; + } + // *__l1 matches *__l2, now match elements before here + _Iter1 __match_last = __l1; + _Iter1 __m1 = __l1; + _Iter2 __m2 = __l2; + while (true) { + if (__m2 == __first2) // If pattern exhausted, __m1 is the answer (works for 1 element pattern) + return std::make_pair(__m1, ++__match_last); + if (__m1 == __first1) // Otherwise if source exhaused, pattern not found + return std::make_pair(__last1, __last1); + + // if there is a mismatch, restart with a new __l1 + if (!std::__invoke(__pred, std::__invoke(__proj1, *--__m1), std::__invoke(__proj2, *--__m2))) { + break; + } // else there is a match, check next elements + } + } +} + +template +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_Iter1, _Iter1> __find_end_impl( + _Iter1 __first1, + _Sent1 __sent1, + _Iter2 __first2, + _Sent2 __sent2, + _Pred& __pred, + _Proj1& __proj1, + _Proj2& __proj2, + random_access_iterator_tag, + random_access_iterator_tag) { + typedef typename iterator_traits<_Iter1>::difference_type _D1; + auto __last1 = _IterOps<_AlgPolicy>::next(__first1, __sent1); + auto __last2 = _IterOps<_AlgPolicy>::next(__first2, __sent2); + // Take advantage of knowing source and pattern lengths. Stop short when source is smaller than pattern + auto __len2 = __last2 - __first2; + if (__len2 == 0) + return std::make_pair(__last1, __last1); + auto __len1 = __last1 - __first1; + if (__len1 < __len2) + return std::make_pair(__last1, __last1); + const _Iter1 __s = __first1 + _D1(__len2 - 1); // End of pattern match can't go before here + _Iter1 __l1 = __last1; + _Iter2 __l2 = __last2; + --__l2; + while (true) { + while (true) { + if (__s == __l1) + return std::make_pair(__last1, __last1); + if (std::__invoke(__pred, std::__invoke(__proj1, *--__l1), std::__invoke(__proj2, *__l2))) + break; + } + _Iter1 __last_match = __l1; + _Iter1 __m1 = __l1; + _Iter2 __m2 = __l2; + while (true) { + if (__m2 == __first2) + return std::make_pair(__m1, ++__last_match); + // no need to check range on __m1 because __s guarantees we have enough source + if (!std::__invoke(__pred, std::__invoke(__proj1, *--__m1), std::__invoke(__proj2, *--__m2))) { + break; + } + } + } +} + template [[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _ForwardIterator1 __find_end_classic( _ForwardIterator1 __first1, diff --git a/libcxx/include/__algorithm/for_each_segment.h b/libcxx/include/__algorithm/for_each_segment.h index 93aa8259b2f7f..c02436c9aa33c 100644 --- a/libcxx/include/__algorithm/for_each_segment.h +++ b/libcxx/include/__algorithm/for_each_segment.h @@ -48,6 +48,32 @@ __for_each_segment(_SegmentedIterator __first, _SegmentedIterator __last, _Funct __func(_Traits::__begin(__sfirst), _Traits::__local(__last)); } +template +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 void +__for_each_segment_backward(_SegmentedIterator __first, _SegmentedIterator __last, _Functor __func) { + using _Traits = __segmented_iterator_traits<_SegmentedIterator>; + + auto __sfirst = _Traits::__segment(__first); + auto __slast = _Traits::__segment(__last); + + // We are in a single segment, so we might not be at the beginning or end + if (__sfirst == __slast) { + __func(_Traits::__local(__first), _Traits::__local(__last)); + return; + } + + // We have more than one segment. Iterate over the last segment, since we might not start at the end + __func(_Traits::__begin(__slast), _Traits::__local(__last)); + --__slast; + // iterate over the segments which are guaranteed to be completely in the range + while (__sfirst != __slast) { + __func(_Traits::__begin(__slast), _Traits::__end(__slast)); + --__slast; + } + // iterate over the first segment + __func(_Traits::__local(__first), _Traits::__end(__slast)); +} + _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP___ALGORITHM_FOR_EACH_SEGMENT_H diff --git a/libcxx/include/__algorithm/move_backward.h b/libcxx/include/__algorithm/move_backward.h index a4698327b474d..43b72057a5eca 100644 --- a/libcxx/include/__algorithm/move_backward.h +++ b/libcxx/include/__algorithm/move_backward.h @@ -11,6 +11,7 @@ #include <__algorithm/copy_backward.h> #include <__algorithm/copy_move_common.h> +#include <__algorithm/for_each_segment.h> #include <__algorithm/iterator_operations.h> #include <__algorithm/min.h> #include <__config> @@ -54,27 +55,10 @@ struct __move_backward_impl { template , int> = 0> _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_InIter, _OutIter> operator()(_InIter __first, _InIter __last, _OutIter __result) const { - using _Traits = __segmented_iterator_traits<_InIter>; - auto __sfirst = _Traits::__segment(__first); - auto __slast = _Traits::__segment(__last); - if (__sfirst == __slast) { - auto __iters = - std::__move_backward<_AlgPolicy>(_Traits::__local(__first), _Traits::__local(__last), std::move(__result)); - return std::make_pair(__last, __iters.second); - } - - __result = - std::__move_backward<_AlgPolicy>(_Traits::__begin(__slast), _Traits::__local(__last), std::move(__result)) - .second; - --__slast; - while (__sfirst != __slast) { - __result = - std::__move_backward<_AlgPolicy>(_Traits::__begin(__slast), _Traits::__end(__slast), std::move(__result)) - .second; - --__slast; - } - __result = std::__move_backward<_AlgPolicy>(_Traits::__local(__first), _Traits::__end(__slast), std::move(__result)) - .second; + using __local_iterator = typename __segmented_iterator_traits<_InIter>::__local_iterator; + std::__for_each_segment_backward(__first, __last, [&__result](__local_iterator __lfirst, __local_iterator __llast) { + __result = std::__move_backward<_AlgPolicy>(std::move(__lfirst), std::move(__llast), std::move(__result)).second; + }); return std::make_pair(__last, std::move(__result)); } diff --git a/libcxx/include/__memory/allocator.h b/libcxx/include/__memory/allocator.h index 52f4122a9bf5f..1c96a2ab64578 100644 --- a/libcxx/include/__memory/allocator.h +++ b/libcxx/include/__memory/allocator.h @@ -14,7 +14,6 @@ #include <__cstddef/ptrdiff_t.h> #include <__cstddef/size_t.h> #include <__memory/addressof.h> -#include <__memory/allocate_at_least.h> #include <__memory/allocator_traits.h> #include <__new/allocate.h> #include <__new/exceptions.h> @@ -51,33 +50,21 @@ class allocator { }; #endif // _LIBCPP_STD_VER <= 17 -// This class provides a non-trivial default constructor to the class that derives from it -// if the condition is satisfied. -// -// The second template parameter exists to allow giving a unique type to __non_trivial_if, -// which makes it possible to avoid breaking the ABI when making this a base class of an -// existing class. Without that, imagine we have classes D1 and D2, both of which used to -// have no base classes, but which now derive from __non_trivial_if. The layout of a class -// that inherits from both D1 and D2 will change because the two __non_trivial_if base -// classes are not allowed to share the same address. -// -// By making those __non_trivial_if base classes unique, we work around this problem and -// it is safe to start deriving from __non_trivial_if in existing classes. -template -struct __non_trivial_if {}; +template +struct __non_trivially_default_constructible_if {}; template -struct __non_trivial_if { - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR __non_trivial_if() _NOEXCEPT {} +struct __non_trivially_default_constructible_if { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR __non_trivially_default_constructible_if() {} }; -// allocator -// -// Note: For ABI compatibility between C++20 and previous standards, we make -// allocator trivial in C++20. - template -class allocator : private __non_trivial_if::value, allocator<_Tp> > { +class allocator +// TODO(LLVM 24): Remove the opt-out +#ifdef _LIBCPP_DEPRECATED_ABI_NON_TRIVIAL_ALLOCATOR + : __non_trivially_default_constructible_if::value, allocator<_Tp> > +#endif +{ static_assert(!is_const<_Tp>::value, "std::allocator does not support const types"); static_assert(!is_volatile<_Tp>::value, "std::allocator does not support volatile types"); diff --git a/libcxx/include/__split_buffer b/libcxx/include/__split_buffer index 1e05e4df8ba0f..d6176f8ca2749 100644 --- a/libcxx/include/__split_buffer +++ b/libcxx/include/__split_buffer @@ -33,7 +33,6 @@ #include <__type_traits/is_swappable.h> #include <__type_traits/is_trivially_destructible.h> #include <__type_traits/is_trivially_relocatable.h> -#include <__type_traits/remove_reference.h> #include <__utility/forward.h> #include <__utility/move.h> @@ -54,8 +53,7 @@ class __split_buffer_pointer_layout { protected: using value_type = _Tp; using allocator_type = _Allocator; - using __alloc_rr _LIBCPP_NODEBUG = __libcpp_remove_reference_t; - using __alloc_traits _LIBCPP_NODEBUG = allocator_traits<__alloc_rr>; + using __alloc_traits _LIBCPP_NODEBUG = allocator_traits; using reference = value_type&; using const_reference = const value_type&; using size_type = typename __alloc_traits::size_type; @@ -159,9 +157,9 @@ public: _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI const_reference back() const _NOEXCEPT { return *(__end_ - 1); } _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __swap_without_allocator( - __split_buffer_pointer_layout<__split_buffer, + __split_buffer_pointer_layout<__split_buffer, value_type, - __alloc_rr&>& __other) _NOEXCEPT { + allocator_type>& __other) _NOEXCEPT { std::swap(__front_cap_, __other.__front_cap_); std::swap(__begin_, __other.__begin_); std::swap(__back_cap_, __other.__back_cap_); @@ -207,8 +205,7 @@ class __split_buffer_size_layout { protected: using value_type = _Tp; using allocator_type = _Allocator; - using __alloc_rr _LIBCPP_NODEBUG = __libcpp_remove_reference_t; - using __alloc_traits _LIBCPP_NODEBUG = allocator_traits<__alloc_rr>; + using __alloc_traits _LIBCPP_NODEBUG = allocator_traits; using reference = value_type&; using const_reference = const value_type&; using size_type = typename __alloc_traits::size_type; @@ -316,9 +313,9 @@ public: } _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __swap_without_allocator( - __split_buffer_pointer_layout<__split_buffer, + __split_buffer_pointer_layout<__split_buffer, value_type, - __alloc_rr&>& __other) _NOEXCEPT { + allocator_type>& __other) _NOEXCEPT { std::swap(__front_cap_, __other.__front_cap_); std::swap(__begin_, __other.__begin_); std::swap(__cap_, __other.__cap_); @@ -386,8 +383,7 @@ private: // protected: // using value_type = _Tp; // using allocator_type = _Allocator; -// using __alloc_rr = __libcpp_remove_reference_t; -// using __alloc_traits = allocator_traits<__alloc_rr>; +// using __alloc_traits = allocator_traits; // using reference = value_type&; // using const_reference = const value_type&; // using size_type = typename __alloc_traits::size_type; @@ -462,7 +458,6 @@ public: using __base_type::__set_sentinel; using __base_type::__set_valid_range; - using typename __base_type::__alloc_rr; using typename __base_type::__alloc_traits; using typename __base_type::allocator_type; using typename __base_type::const_iterator; @@ -489,18 +484,18 @@ public: _LIBCPP_HIDE_FROM_ABI __split_buffer() = default; - _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI explicit __split_buffer(__alloc_rr& __a) : __base_type(__a) {} + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI explicit __split_buffer(allocator_type& __a) : __base_type(__a) {} - _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI explicit __split_buffer(const __alloc_rr& __a) + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI explicit __split_buffer(const allocator_type& __a) : __base_type(__a) {} _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI - __split_buffer(size_type __cap, size_type __start, __alloc_rr& __a); + __split_buffer(size_type __cap, size_type __start, allocator_type& __a); _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI __split_buffer(__split_buffer&& __c) _NOEXCEPT_(is_nothrow_move_constructible::value); - _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI __split_buffer(__split_buffer&& __c, const __alloc_rr& __a); + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI __split_buffer(__split_buffer&& __c, const allocator_type& __a); _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI __split_buffer& operator=(__split_buffer&& __c) _NOEXCEPT_((__alloc_traits::propagate_on_container_move_assignment::value && @@ -560,7 +555,7 @@ public: _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __destruct_at_end(pointer __new_last, true_type) _NOEXCEPT; _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void swap(__split_buffer& __x) - _NOEXCEPT_(!__alloc_traits::propagate_on_container_swap::value || __is_nothrow_swappable_v<__alloc_rr>); + _NOEXCEPT_(!__alloc_traits::propagate_on_container_swap::value || __is_nothrow_swappable_v); _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI bool __invariants() const { if (__front_cap() == nullptr) { @@ -589,7 +584,7 @@ public: } _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void - __swap_without_allocator(__split_buffer& __other) _NOEXCEPT { + __swap_without_allocator(__split_buffer& __other) _NOEXCEPT { __base_type::__swap_without_allocator(__other); } @@ -653,7 +648,7 @@ template class _Lay template _LIBCPP_CONSTEXPR_SINCE_CXX20 void __split_buffer<_Tp, _Allocator, _Layout>::__construct_at_end_with_sentinel(_Iterator __first, _Sentinel __last) { - __alloc_rr& __a = __get_allocator(); + allocator_type& __a = __get_allocator(); for (; __first != __last; ++__first) { if (__back_spare() == 0) { size_type __old_cap = capacity(); @@ -718,7 +713,7 @@ __split_buffer<_Tp, _Allocator, _Layout>::__destruct_at_end(pointer __new_last, template class _Layout> _LIBCPP_CONSTEXPR_SINCE_CXX20 -__split_buffer<_Tp, _Allocator, _Layout>::__split_buffer(size_type __cap, size_type __start, __alloc_rr& __a) +__split_buffer<_Tp, _Allocator, _Layout>::__split_buffer(size_type __cap, size_type __start, allocator_type& __a) : __base_type(__a) { _LIBCPP_ASSERT_INTERNAL(__cap >= __start, "can't have a start point outside the capacity"); if (__cap > 0) { @@ -748,7 +743,7 @@ _LIBCPP_CONSTEXPR_SINCE_CXX20 __split_buffer<_Tp, _Allocator, _Layout>::__split_ template class _Layout> _LIBCPP_CONSTEXPR_SINCE_CXX20 -__split_buffer<_Tp, _Allocator, _Layout>::__split_buffer(__split_buffer&& __c, const __alloc_rr& __a) +__split_buffer<_Tp, _Allocator, _Layout>::__split_buffer(__split_buffer&& __c, const allocator_type& __a) : __base_type(__a) { if (__a == __c.__get_allocator()) { __set_data(__c.__front_cap()); @@ -781,7 +776,7 @@ __split_buffer<_Tp, _Allocator, _Layout>::operator=(__split_buffer&& __c) template class _Layout> _LIBCPP_CONSTEXPR_SINCE_CXX20 void __split_buffer<_Tp, _Allocator, _Layout>::swap(__split_buffer& __x) - _NOEXCEPT_(!__alloc_traits::propagate_on_container_swap::value || __is_nothrow_swappable_v<__alloc_rr>) { + _NOEXCEPT_(!__alloc_traits::propagate_on_container_swap::value || __is_nothrow_swappable_v) { __base_type::swap(__x); } @@ -791,7 +786,7 @@ _LIBCPP_CONSTEXPR_SINCE_CXX20 void __split_buffer<_Tp, _Allocator, _Layout>::shr #if _LIBCPP_HAS_EXCEPTIONS try { #endif // _LIBCPP_HAS_EXCEPTIONS - __split_buffer __t(size(), 0, __get_allocator()); + __split_buffer __t(size(), 0, __get_allocator()); if (__t.capacity() < capacity()) { __t.__construct_at_end(move_iterator(begin()), move_iterator(end())); __t.__set_sentinel(size()); @@ -818,7 +813,7 @@ _LIBCPP_CONSTEXPR_SINCE_CXX20 void __split_buffer<_Tp, _Allocator, _Layout>::emp __set_valid_range(std::move_backward(begin(), __end, __new_end), __new_end); } else { size_type __c = std::max(2 * capacity(), 1); - __split_buffer __t(__c, (__c + 3) / 4, __get_allocator()); + __split_buffer __t(__c, (__c + 3) / 4, __get_allocator()); __t.__construct_at_end(move_iterator(begin()), move_iterator(__end)); __base_type::__swap_without_allocator(__t); } @@ -840,7 +835,7 @@ _LIBCPP_CONSTEXPR_SINCE_CXX20 void __split_buffer<_Tp, _Allocator, _Layout>::emp __set_valid_range(begin() - __d, __end); } else { size_type __c = std::max(2 * capacity(), 1); - __split_buffer __t(__c, __c / 4, __get_allocator()); + __split_buffer __t(__c, __c / 4, __get_allocator()); __t.__construct_at_end(move_iterator(begin()), move_iterator(__end)); __base_type::__swap_without_allocator(__t); } diff --git a/libcxx/include/__tree b/libcxx/include/__tree index ceae22bb48702..f8064106de075 100644 --- a/libcxx/include/__tree +++ b/libcxx/include/__tree @@ -902,8 +902,6 @@ public: _LIBCPP_HIDE_FROM_ABI __tree& operator=(const __tree& __t); template _LIBCPP_HIDE_FROM_ABI void __assign_unique(_ForwardIterator __first, _ForwardIterator __last); - template - _LIBCPP_HIDE_FROM_ABI void __assign_multi(_InputIterator __first, _InputIterator __last); _LIBCPP_HIDE_FROM_ABI __tree(__tree&& __t) _NOEXCEPT_( is_nothrow_move_constructible<__node_allocator>::value&& is_nothrow_move_constructible::value); _LIBCPP_HIDE_FROM_ABI __tree(__tree&& __t, const allocator_type& __a); @@ -1036,11 +1034,6 @@ public: } } - _LIBCPP_HIDE_FROM_ABI pair __node_assign_unique(const value_type& __v, __node_pointer __dest); - - _LIBCPP_HIDE_FROM_ABI iterator __node_insert_multi(__node_pointer __nd); - _LIBCPP_HIDE_FROM_ABI iterator __node_insert_multi(const_iterator __p, __node_pointer __nd); - template _LIBCPP_HIDE_FROM_ABI void __insert_range_unique(_InIter __first, _Sent __last) { if (__first == __last) @@ -1311,43 +1304,6 @@ private: __lhs = std::forward<_From>(__rhs); } - struct _DetachedTreeCache { - _LIBCPP_HIDE_FROM_ABI explicit _DetachedTreeCache(__tree* __t) _NOEXCEPT - : __t_(__t), - __cache_root_(__detach_from_tree(__t)) { - __advance(); - } - - _LIBCPP_HIDE_FROM_ABI __node_pointer __get() const _NOEXCEPT { return __cache_elem_; } - - _LIBCPP_HIDE_FROM_ABI void __advance() _NOEXCEPT { - __cache_elem_ = __cache_root_; - if (__cache_root_) { - __cache_root_ = __detach_next(__cache_root_); - } - } - - _LIBCPP_HIDE_FROM_ABI ~_DetachedTreeCache() { - __t_->destroy(__cache_elem_); - if (__cache_root_) { - while (__cache_root_->__parent_ != nullptr) - __cache_root_ = static_cast<__node_pointer>(__cache_root_->__parent_); - __t_->destroy(__cache_root_); - } - } - - _DetachedTreeCache(_DetachedTreeCache const&) = delete; - _DetachedTreeCache& operator=(_DetachedTreeCache const&) = delete; - - private: - _LIBCPP_HIDE_FROM_ABI static __node_pointer __detach_from_tree(__tree* __t) _NOEXCEPT; - _LIBCPP_HIDE_FROM_ABI static __node_pointer __detach_next(__node_pointer) _NOEXCEPT; - - __tree* __t_; - __node_pointer __cache_root_; - __node_pointer __cache_elem_; - }; - class __tree_deleter { __node_allocator& __alloc_; @@ -1486,47 +1442,6 @@ private: } }; -// Precondition: __size_ != 0 -template -typename __tree<_Tp, _Compare, _Allocator>::__node_pointer -__tree<_Tp, _Compare, _Allocator>::_DetachedTreeCache::__detach_from_tree(__tree* __t) _NOEXCEPT { - __node_pointer __cache = static_cast<__node_pointer>(__t->__begin_node_); - __t->__begin_node_ = __t->__end_node(); - __t->__end_node()->__left_->__parent_ = nullptr; - __t->__end_node()->__left_ = nullptr; - __t->__size_ = 0; - // __cache->__left_ == nullptr - if (__cache->__right_ != nullptr) - __cache = static_cast<__node_pointer>(__cache->__right_); - // __cache->__left_ == nullptr - // __cache->__right_ == nullptr - return __cache; -} - -// Precondition: __cache != nullptr -// __cache->left_ == nullptr -// __cache->right_ == nullptr -// This is no longer a red-black tree -template -typename __tree<_Tp, _Compare, _Allocator>::__node_pointer -__tree<_Tp, _Compare, _Allocator>::_DetachedTreeCache::__detach_next(__node_pointer __cache) _NOEXCEPT { - if (__cache->__parent_ == nullptr) - return nullptr; - if (std::__tree_is_left_child(static_cast<__node_base_pointer>(__cache))) { - __cache->__parent_->__left_ = nullptr; - __cache = static_cast<__node_pointer>(__cache->__parent_); - if (__cache->__right_ == nullptr) - return __cache; - return static_cast<__node_pointer>(std::__tree_leaf(__cache->__right_)); - } - // __cache is right child - __cache->__parent_unsafe()->__right_ = nullptr; - __cache = static_cast<__node_pointer>(__cache->__parent_); - if (__cache->__left_ == nullptr) - return __cache; - return static_cast<__node_pointer>(std::__tree_leaf(__cache->__left_)); -} - template __tree<_Tp, _Compare, _Allocator>& __tree<_Tp, _Compare, _Allocator>::operator=(const __tree& __t) { if (this == std::addressof(__t)) @@ -1549,46 +1464,6 @@ __tree<_Tp, _Compare, _Allocator>& __tree<_Tp, _Compare, _Allocator>::operator=( return *this; } -template -template -void __tree<_Tp, _Compare, _Allocator>::__assign_unique(_ForwardIterator __first, _ForwardIterator __last) { - using _ITraits = iterator_traits<_ForwardIterator>; - using _ItValueType = typename _ITraits::value_type; - static_assert( - is_same<_ItValueType, value_type>::value, "__assign_unique may only be called with the containers value type"); - static_assert( - __has_forward_iterator_category<_ForwardIterator>::value, "__assign_unique requires a forward iterator"); - if (__size_ != 0) { - _DetachedTreeCache __cache(this); - for (; __cache.__get() != nullptr && __first != __last; ++__first) { - if (__node_assign_unique(*__first, __cache.__get()).second) - __cache.__advance(); - } - } - for (; __first != __last; ++__first) - __emplace_unique(*__first); -} - -template -template -void __tree<_Tp, _Compare, _Allocator>::__assign_multi(_InputIterator __first, _InputIterator __last) { - using _ITraits = iterator_traits<_InputIterator>; - using _ItValueType = typename _ITraits::value_type; - static_assert( - is_same<_ItValueType, value_type>::value, "__assign_multi may only be called with the containers value_type"); - if (__size_ != 0) { - _DetachedTreeCache __cache(this); - for (; __cache.__get() && __first != __last; ++__first) { - __assign_value(__cache.__get()->__get_value(), *__first); - __node_insert_multi(__cache.__get()); - __cache.__advance(); - } - } - const_iterator __e = end(); - for (; __first != __last; ++__first) - __emplace_hint_multi(__e, *__first); -} - template __tree<_Tp, _Compare, _Allocator>::__tree(const __tree& __t) : __begin_node_(__end_node()), @@ -1942,39 +1817,6 @@ __tree<_Tp, _Compare, _Allocator>::__emplace_hint_multi(const_iterator __p, _Arg return iterator(static_cast<__node_pointer>(__h.release())); } -template -pair::iterator, bool> -__tree<_Tp, _Compare, _Allocator>::__node_assign_unique(const value_type& __v, __node_pointer __nd) { - auto [__parent, __child] = __find_equal(__v); - __node_pointer __r = static_cast<__node_pointer>(__child); - bool __inserted = false; - if (__child == nullptr) { - __assign_value(__nd->__get_value(), __v); - __insert_node_at(__parent, __child, static_cast<__node_base_pointer>(__nd)); - __r = __nd; - __inserted = true; - } - return pair(iterator(__r), __inserted); -} - -template -typename __tree<_Tp, _Compare, _Allocator>::iterator -__tree<_Tp, _Compare, _Allocator>::__node_insert_multi(__node_pointer __nd) { - __end_node_pointer __parent; - __node_base_pointer& __child = __find_leaf_high(__parent, __nd->__get_value()); - __insert_node_at(__parent, __child, static_cast<__node_base_pointer>(__nd)); - return iterator(__nd); -} - -template -typename __tree<_Tp, _Compare, _Allocator>::iterator -__tree<_Tp, _Compare, _Allocator>::__node_insert_multi(const_iterator __p, __node_pointer __nd) { - __end_node_pointer __parent; - __node_base_pointer& __child = __find_leaf(__p, __parent, __nd->__get_value()); - __insert_node_at(__parent, __child, static_cast<__node_base_pointer>(__nd)); - return iterator(__nd); -} - template typename __tree<_Tp, _Compare, _Allocator>::iterator __tree<_Tp, _Compare, _Allocator>::__remove_node_pointer(__node_pointer __ptr) _NOEXCEPT { diff --git a/libcxx/include/__vector/vector.h b/libcxx/include/__vector/vector.h index 4961a5fcb2067..93358d863492e 100644 --- a/libcxx/include/__vector/vector.h +++ b/libcxx/include/__vector/vector.h @@ -687,9 +687,9 @@ class vector { } _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void - __swap_out_circular_buffer(__split_buffer& __v); + __swap_out_circular_buffer(__split_buffer& __v); _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI pointer - __swap_out_circular_buffer(__split_buffer& __v, pointer __p); + __swap_out_circular_buffer(__split_buffer& __v, pointer __p); _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __move_range(pointer __from_s, pointer __from_e, pointer __to); _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __move_assign(vector& __c, true_type) @@ -810,7 +810,7 @@ class vector { return __p; } - _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __swap_layouts(__split_buffer<_Tp, allocator_type&>& __sb) { + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __swap_layouts(__split_buffer<_Tp, allocator_type>& __sb) { auto __vector_begin = __begin_; auto __vector_sentinel = __end_; auto __vector_cap = __cap_; @@ -855,7 +855,7 @@ vector(from_range_t, _Range&&, _Alloc = _Alloc()) -> vector _LIBCPP_CONSTEXPR_SINCE_CXX20 void -vector<_Tp, _Allocator>::__swap_out_circular_buffer(__split_buffer& __v) { +vector<_Tp, _Allocator>::__swap_out_circular_buffer(__split_buffer& __v) { __annotate_delete(); auto __new_begin = __v.begin() - size(); std::__uninitialized_allocator_relocate( @@ -874,7 +874,7 @@ vector<_Tp, _Allocator>::__swap_out_circular_buffer(__split_buffer _LIBCPP_CONSTEXPR_SINCE_CXX20 typename vector<_Tp, _Allocator>::pointer -vector<_Tp, _Allocator>::__swap_out_circular_buffer(__split_buffer& __v, pointer __p) { +vector<_Tp, _Allocator>::__swap_out_circular_buffer(__split_buffer& __v, pointer __p) { __annotate_delete(); pointer __ret = __v.begin(); @@ -1074,7 +1074,7 @@ _LIBCPP_CONSTEXPR_SINCE_CXX20 void vector<_Tp, _Allocator>::reserve(size_type __ if (__n > capacity()) { if (__n > max_size()) this->__throw_length_error(); - __split_buffer __v(__n, size(), this->__alloc_); + __split_buffer __v(__n, size(), this->__alloc_); __swap_out_circular_buffer(__v); } } @@ -1085,7 +1085,7 @@ _LIBCPP_CONSTEXPR_SINCE_CXX20 void vector<_Tp, _Allocator>::shrink_to_fit() _NOE #if _LIBCPP_HAS_EXCEPTIONS try { #endif // _LIBCPP_HAS_EXCEPTIONS - __split_buffer __v(size(), size(), this->__alloc_); + __split_buffer __v(size(), size(), this->__alloc_); // The Standard mandates shrink_to_fit() does not increase the capacity. // With equal capacity keep the existing buffer. This avoids extra work // due to swapping the elements. @@ -1102,7 +1102,7 @@ template template _LIBCPP_CONSTEXPR_SINCE_CXX20 typename vector<_Tp, _Allocator>::pointer vector<_Tp, _Allocator>::__emplace_back_slow_path(_Args&&... __args) { - __split_buffer __v(__recommend(size() + 1), size(), this->__alloc_); + __split_buffer __v(__recommend(size() + 1), size(), this->__alloc_); // __v.emplace_back(std::forward<_Args>(__args)...); pointer __end = __v.end(); __alloc_traits::construct(this->__alloc_, std::__to_address(__end), std::forward<_Args>(__args)...); @@ -1205,7 +1205,7 @@ vector<_Tp, _Allocator>::insert(const_iterator __position, const_reference __x) *__p = *__xr; } } else { - __split_buffer __v(__recommend(size() + 1), __p - this->__begin_, this->__alloc_); + __split_buffer __v(__recommend(size() + 1), __p - this->__begin_, this->__alloc_); __v.emplace_back(__x); __p = __swap_out_circular_buffer(__v, __p); } @@ -1224,7 +1224,7 @@ vector<_Tp, _Allocator>::insert(const_iterator __position, value_type&& __x) { *__p = std::move(__x); } } else { - __split_buffer __v(__recommend(size() + 1), __p - this->__begin_, this->__alloc_); + __split_buffer __v(__recommend(size() + 1), __p - this->__begin_, this->__alloc_); __v.emplace_back(std::move(__x)); __p = __swap_out_circular_buffer(__v, __p); } @@ -1245,7 +1245,7 @@ vector<_Tp, _Allocator>::emplace(const_iterator __position, _Args&&... __args) { *__p = std::move(__tmp.get()); } } else { - __split_buffer __v(__recommend(size() + 1), __p - this->__begin_, this->__alloc_); + __split_buffer __v(__recommend(size() + 1), __p - this->__begin_, this->__alloc_); __v.emplace_back(std::forward<_Args>(__args)...); __p = __swap_out_circular_buffer(__v, __p); } @@ -1273,7 +1273,7 @@ vector<_Tp, _Allocator>::insert(const_iterator __position, size_type __n, const_ std::fill_n(__p, __n, *__xr); } } else { - __split_buffer __v(__recommend(size() + __n), __p - this->__begin_, this->__alloc_); + __split_buffer __v(__recommend(size() + __n), __p - this->__begin_, this->__alloc_); __v.__construct_at_end(__n, __x); __p = __swap_out_circular_buffer(__v, __p); } @@ -1294,11 +1294,11 @@ vector<_Tp, _Allocator>::__insert_with_sentinel(const_iterator __position, _Inpu if (__first == __last) (void)std::rotate(__p, __old_last, this->__end_); else { - __split_buffer __v(__alloc_); + __split_buffer __v(__alloc_); auto __guard = std::__make_exception_guard( _AllocatorDestroyRangeReverse(__alloc_, __old_last, this->__end_)); __v.__construct_at_end_with_sentinel(std::move(__first), std::move(__last)); - __split_buffer __merged( + __split_buffer __merged( __recommend(size() + __v.size()), __off, __alloc_); // has `__off` positions available at the front std::__uninitialized_allocator_relocate( __alloc_, std::__to_address(__old_last), std::__to_address(this->__end_), std::__to_address(__merged.end())); @@ -1344,7 +1344,7 @@ vector<_Tp, _Allocator>::__insert_with_size( __insert_assign_n_unchecked<_AlgPolicy>(std::move(__first), __n, __p); } } else { - __split_buffer __v(__recommend(size() + __n), __p - this->__begin_, this->__alloc_); + __split_buffer __v(__recommend(size() + __n), __p - this->__begin_, this->__alloc_); __v.__construct_at_end_with_size(std::move(__first), __n); __p = __swap_out_circular_buffer(__v, __p); } @@ -1359,7 +1359,7 @@ _LIBCPP_CONSTEXPR_SINCE_CXX20 void vector<_Tp, _Allocator>::resize(size_type __n if (__new_size <= capacity()) { __construct_at_end(__new_size - __current_size); } else { - __split_buffer __v(__recommend(__new_size), __current_size, __alloc_); + __split_buffer __v(__recommend(__new_size), __current_size, __alloc_); __v.__construct_at_end(__new_size - __current_size); __swap_out_circular_buffer(__v); } @@ -1375,7 +1375,7 @@ _LIBCPP_CONSTEXPR_SINCE_CXX20 void vector<_Tp, _Allocator>::resize(size_type __n if (__new_size <= capacity()) __construct_at_end(__new_size - __current_size, __x); else { - __split_buffer __v(__recommend(__new_size), __current_size, __alloc_); + __split_buffer __v(__recommend(__new_size), __current_size, __alloc_); __v.__construct_at_end(__new_size - __current_size, __x); __swap_out_circular_buffer(__v); } diff --git a/libcxx/include/deque b/libcxx/include/deque index ad2d759e1fcac..befe27bb4282d 100644 --- a/libcxx/include/deque +++ b/libcxx/include/deque @@ -1785,9 +1785,9 @@ template template _LIBCPP_HIDE_FROM_ABI typename deque<_Tp, _Allocator>::iterator deque<_Tp, _Allocator>::__insert_with_sentinel(const_iterator __p, _Iterator __f, _Sentinel __l) { - __split_buffer __buf(__alloc()); + __split_buffer __buf(__alloc()); __buf.__construct_at_end_with_sentinel(std::move(__f), std::move(__l)); - typedef typename __split_buffer::iterator __bi; + typedef typename __split_buffer::iterator __bi; return insert(__p, move_iterator<__bi>(__buf.begin()), move_iterator<__bi>(__buf.end())); } @@ -1802,9 +1802,9 @@ template template _LIBCPP_HIDE_FROM_ABI typename deque<_Tp, _Allocator>::iterator deque<_Tp, _Allocator>::__insert_with_size(const_iterator __p, _Iterator __f, size_type __n) { - __split_buffer __buf(__n, 0, __alloc()); + __split_buffer __buf(__n, 0, __alloc()); __buf.__construct_at_end_with_size(__f, __n); - typedef typename __split_buffer::iterator __fwd; + typedef typename __split_buffer::iterator __fwd; return insert(__p, move_iterator<__fwd>(__buf.begin()), move_iterator<__fwd>(__buf.end())); } @@ -1982,7 +1982,7 @@ void deque<_Tp, _Allocator>::__add_front_capacity() { } // Else need to allocate 1 buffer, *and* we need to reallocate __map_. else { - __split_buffer __buf( + __split_buffer __buf( std::max(2 * __map_.capacity(), 1), 0, __map_.__get_allocator()); typedef __allocator_destructor<_Allocator> _Dp; @@ -2042,7 +2042,7 @@ void deque<_Tp, _Allocator>::__add_front_capacity(size_type __n) { // Else need to allocate __nb buffers, *and* we need to reallocate __map_. else { size_type __ds = (__nb + __back_capacity) * __block_size - __map_.empty(); - __split_buffer __buf( + __split_buffer __buf( std::max(2 * __map_.capacity(), __nb + __map_.size()), 0, __map_.__get_allocator()); auto __guard = std::__make_exception_guard([&] { __annotate_delete(); @@ -2094,7 +2094,7 @@ void deque<_Tp, _Allocator>::__add_back_capacity() { } // Else need to allocate 1 buffer, *and* we need to reallocate __map_. else { - __split_buffer __buf( + __split_buffer __buf( std::max(2 * __map_.capacity(), 1), __map_.size(), __map_.__get_allocator()); typedef __allocator_destructor<_Allocator> _Dp; @@ -2154,7 +2154,7 @@ void deque<_Tp, _Allocator>::__add_back_capacity(size_type __n) { // Else need to allocate __nb buffers, *and* we need to reallocate __map_. else { size_type __ds = __front_capacity * __block_size; - __split_buffer __buf( + __split_buffer __buf( std::max(2 * __map_.capacity(), __nb + __map_.size()), __map_.size() - __front_capacity, __map_.__get_allocator()); diff --git a/libcxx/include/map b/libcxx/include/map index 0dca11cabd12e..e67f7cef5861d 100644 --- a/libcxx/include/map +++ b/libcxx/include/map @@ -1015,7 +1015,8 @@ public: # endif _LIBCPP_HIDE_FROM_ABI map& operator=(initializer_list __il) { - __tree_.__assign_unique(__il.begin(), __il.end()); + clear(); + insert(__il.begin(), __il.end()); return *this; } @@ -1689,7 +1690,8 @@ public: # endif _LIBCPP_HIDE_FROM_ABI multimap& operator=(initializer_list __il) { - __tree_.__assign_multi(__il.begin(), __il.end()); + clear(); + insert(__il.begin(), __il.end()); return *this; } diff --git a/libcxx/include/set b/libcxx/include/set index 3d6f571a42a1a..f333d97defac1 100644 --- a/libcxx/include/set +++ b/libcxx/include/set @@ -692,7 +692,8 @@ public: # endif _LIBCPP_HIDE_FROM_ABI set& operator=(initializer_list __il) { - __tree_.__assign_unique(__il.begin(), __il.end()); + clear(); + insert(__il.begin(), __il.end()); return *this; } @@ -1136,7 +1137,8 @@ public: # endif _LIBCPP_HIDE_FROM_ABI multiset& operator=(initializer_list __il) { - __tree_.__assign_multi(__il.begin(), __il.end()); + clear(); + insert(__il.begin(), __il.end()); return *this; } diff --git a/libcxx/test/libcxx/memory/allocator_triviality.compile.pass.cpp b/libcxx/test/libcxx/memory/allocator_triviality.compile.pass.cpp new file mode 100644 index 0000000000000..ff298963e074a --- /dev/null +++ b/libcxx/test/libcxx/memory/allocator_triviality.compile.pass.cpp @@ -0,0 +1,24 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// + +// Make sure that std::allocator is trivial. + +// + +#include +#include +#include + +static_assert(std::is_trivially_default_constructible >::value, ""); +static_assert(std::is_trivially_default_constructible >::value, ""); +static_assert(std::is_trivially_default_constructible >::value, ""); + +static_assert(std::is_trivially_copyable >::value, ""); +static_assert(std::is_trivially_copyable >::value, ""); +static_assert(std::is_trivially_copyable >::value, ""); diff --git a/libcxx/test/libcxx/memory/allocator_triviality.deprecated_abi.compile.pass.cpp b/libcxx/test/libcxx/memory/allocator_triviality.deprecated_abi.compile.pass.cpp new file mode 100644 index 0000000000000..be2a1840ec903 --- /dev/null +++ b/libcxx/test/libcxx/memory/allocator_triviality.deprecated_abi.compile.pass.cpp @@ -0,0 +1,27 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// + +// Make sure that std::allocator is not trivial if _LIBCPP_DEPRECATED_ABI_NON_TRIVIAL_ALLOCATOR if defined. +// std::allocator _should_ still be trivial, since it has always been trivial. + +// + +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DEPRECATED_ABI_NON_TRIVIAL_ALLOCATOR + +#include +#include +#include + +static_assert(!std::is_trivially_default_constructible >::value, ""); +static_assert(!std::is_trivially_default_constructible >::value, ""); +static_assert(std::is_trivially_default_constructible >::value, ""); + +static_assert(std::is_trivially_copyable >::value, ""); +static_assert(std::is_trivially_copyable >::value, ""); +static_assert(std::is_trivially_copyable >::value, ""); diff --git a/libcxx/test/libcxx/memory/allocator_void.trivial.compile.pass.cpp b/libcxx/test/libcxx/memory/allocator_void.trivial.compile.pass.cpp deleted file mode 100644 index b7dfc190e8e91..0000000000000 --- a/libcxx/test/libcxx/memory/allocator_void.trivial.compile.pass.cpp +++ /dev/null @@ -1,26 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// Make sure that std::allocator is trivial. This was the case before C++20 -// with the std::allocator explicit specialization, and this test makes sure -// that we maintain that property across all standards. -// -// This is important since triviality has implications on how the type is passed -// as a function argument in the ABI. - -#include -#include - -typedef std::allocator A1; -struct A2 : std::allocator { }; - -static_assert(std::is_trivially_default_constructible::value, ""); -static_assert(std::is_trivially_copyable::value, ""); - -static_assert(std::is_trivially_default_constructible::value, ""); -static_assert(std::is_trivially_copyable::value, ""); diff --git a/libcxx/test/std/utilities/optional/optional.specalg/make_optional_explicit.pass.cpp b/libcxx/test/std/utilities/optional/optional.specalg/make_optional_explicit.pass.cpp index 5dd1d6f0b3380..b08fce2b701e2 100644 --- a/libcxx/test/std/utilities/optional/optional.specalg/make_optional_explicit.pass.cpp +++ b/libcxx/test/std/utilities/optional/optional.specalg/make_optional_explicit.pass.cpp @@ -12,9 +12,6 @@ // template // constexpr optional make_optional(Args&&... args); -// GCC crashes on this file, see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=120577 -// XFAIL: gcc-15 - #include #include #include diff --git a/libcxx/test/std/utilities/optional/optional.specalg/make_optional_explicit_initializer_list.pass.cpp b/libcxx/test/std/utilities/optional/optional.specalg/make_optional_explicit_initializer_list.pass.cpp index 5ddb229ad9268..80371d6333712 100644 --- a/libcxx/test/std/utilities/optional/optional.specalg/make_optional_explicit_initializer_list.pass.cpp +++ b/libcxx/test/std/utilities/optional/optional.specalg/make_optional_explicit_initializer_list.pass.cpp @@ -12,9 +12,6 @@ // template // constexpr optional make_optional(initializer_list il, Args&&... args); -// GCC crashes on this file, see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=120577 -// XFAIL: gcc-15 - #include #include #include diff --git a/libunwind/src/UnwindCursor.hpp b/libunwind/src/UnwindCursor.hpp index 33fcd841b2ab0..afa0cae790377 100644 --- a/libunwind/src/UnwindCursor.hpp +++ b/libunwind/src/UnwindCursor.hpp @@ -41,7 +41,8 @@ #define _LIBUNWIND_CHECK_LINUX_SIGRETURN 1 #endif -#if defined(_LIBUNWIND_TARGET_HAIKU) && defined(_LIBUNWIND_TARGET_X86_64) +#if defined(_LIBUNWIND_TARGET_HAIKU) && \ + (defined(_LIBUNWIND_TARGET_I386) || defined(_LIBUNWIND_TARGET_X86_64)) #include #include #define _LIBUNWIND_CHECK_HAIKU_SIGRETURN 1 @@ -1366,7 +1367,7 @@ class UnwindCursor : public AbstractUnwindCursor{ bool _unwindInfoMissing; bool _isSignalFrame; #if defined(_LIBUNWIND_CHECK_LINUX_SIGRETURN) || \ - defined(_LIBUNWIND_TARGET_HAIKU) + defined(_LIBUNWIND_CHECK_HAIKU_SIGRETURN) bool _isSigReturn = false; #endif #ifdef _LIBUNWIND_TRACE_RET_INJECT diff --git a/lldb/include/lldb/Host/Terminal.h b/lldb/include/lldb/Host/Terminal.h index 3d66515c18812..da0d05e8bd265 100644 --- a/lldb/include/lldb/Host/Terminal.h +++ b/lldb/include/lldb/Host/Terminal.h @@ -68,18 +68,6 @@ class Terminal { llvm::Error SetHardwareFlowControl(bool enabled); - /// Returns whether or not the current terminal supports Unicode rendering. - /// - /// The value is cached after the first computation. - /// - /// On POSIX systems, we check if the LANG environment variable contains the - /// substring "UTF-8", case insensitive. - /// - /// On Windows, we always return true since we use the `WriteConsoleW` API - /// internally. Note that the default Windows codepage (437) does not support - /// all Unicode characters. This function does not check the codepage. - static bool SupportsUnicode(); - protected: struct Data; diff --git a/lldb/include/lldb/Host/common/DiagnosticsRendering.h b/lldb/include/lldb/Host/common/DiagnosticsRendering.h index 3eea0647da37e..dd33d671c24a5 100644 --- a/lldb/include/lldb/Host/common/DiagnosticsRendering.h +++ b/lldb/include/lldb/Host/common/DiagnosticsRendering.h @@ -59,27 +59,10 @@ struct DiagnosticDetail { StructuredData::ObjectSP Serialize(llvm::ArrayRef details); -/// Renders an array of DiagnosticDetail instances. -/// -/// \param[in] stream -/// The stream to render the diagnostics to. -/// \param offset_in_command -/// An optional offset to the column position of the diagnostic in the -/// source. -/// \param show_inline -/// Whether to show the diagnostics inline. -/// \param details -/// The array of DiagnosticsDetail to render. -/// \param force_ascii -/// Whether to force ascii rendering. If false, Unicode characters will be -/// used if the output file supports them. -/// -/// \see lldb_private::Terminal::SupportsUnicode void RenderDiagnosticDetails(Stream &stream, std::optional offset_in_command, bool show_inline, - llvm::ArrayRef details, - bool force_ascii = false); + llvm::ArrayRef details); class DiagnosticError : public llvm::ErrorInfo { diff --git a/lldb/source/Host/common/DiagnosticsRendering.cpp b/lldb/source/Host/common/DiagnosticsRendering.cpp index 2c9d33a6c325c..f2cd3968967fb 100644 --- a/lldb/source/Host/common/DiagnosticsRendering.cpp +++ b/lldb/source/Host/common/DiagnosticsRendering.cpp @@ -7,8 +7,6 @@ //===----------------------------------------------------------------------===// #include "lldb/Host/common/DiagnosticsRendering.h" -#include "lldb/Host/Terminal.h" - #include using namespace lldb_private; @@ -87,8 +85,7 @@ static llvm::raw_ostream &PrintSeverity(Stream &stream, void RenderDiagnosticDetails(Stream &stream, std::optional offset_in_command, bool show_inline, - llvm::ArrayRef details, - bool force_ascii) { + llvm::ArrayRef details) { if (details.empty()) return; @@ -100,8 +97,12 @@ void RenderDiagnosticDetails(Stream &stream, return; } + // Since there is no other way to find this out, use the color + // attribute as a proxy for whether the terminal supports Unicode + // characters. In the future it might make sense to move this into + // Host so it can be customized for a specific platform. llvm::StringRef cursor, underline, vbar, joint, hbar, spacer; - if (Terminal::SupportsUnicode() && !force_ascii) { + if (stream.AsRawOstream().colors_enabled()) { cursor = "˄"; underline = "˜"; vbar = "│"; diff --git a/lldb/source/Host/common/Terminal.cpp b/lldb/source/Host/common/Terminal.cpp index d3647835e3937..436dfd8130d9b 100644 --- a/lldb/source/Host/common/Terminal.cpp +++ b/lldb/source/Host/common/Terminal.cpp @@ -400,22 +400,6 @@ llvm::Error Terminal::SetHardwareFlowControl(bool enabled) { #endif // LLDB_ENABLE_TERMIOS } -bool Terminal::SupportsUnicode() { - static std::optional g_result; - if (g_result) - return g_result.value(); -#ifdef _WIN32 - return true; -#else - const char *lang_var = std::getenv("LANG"); - if (!lang_var) - return false; - g_result = - llvm::StringRef(lang_var).lower().find("utf-8") != std::string::npos; -#endif - return g_result.value(); -} - TerminalState::TerminalState(Terminal term, bool save_process_group) : m_tty(term) { Save(term, save_process_group); diff --git a/lldb/source/Interpreter/CommandInterpreter.cpp b/lldb/source/Interpreter/CommandInterpreter.cpp index afc1753e21c46..0198ddcfa31e0 100644 --- a/lldb/source/Interpreter/CommandInterpreter.cpp +++ b/lldb/source/Interpreter/CommandInterpreter.cpp @@ -316,6 +316,11 @@ void CommandInterpreter::Initialize() { AddAlias("continue", cmd_obj_sp); } + // At this point, I'm leaving "b" command aliased to "_regexp-break". There's + // a catch-all regexp in the command that takes any unrecognized input and + // runs it as `break set ` and switching the command to break add + // would change that behavior. People who want to use the break add for the + // "b" alias can do so in their .lldbinit. cmd_obj_sp = GetCommandSPExact("_regexp-break"); if (cmd_obj_sp) AddAlias("b", cmd_obj_sp)->SetSyntax(cmd_obj_sp->GetSyntax()); @@ -668,6 +673,89 @@ void CommandInterpreter::LoadCommandDictionary() { } } + // clang-format off + // FIXME: It would be simpler to just use the linespec's directly here, but + // the `b` alias allows "foo.c : 12 : 45" but the linespec parser + // is more rigorous, and doesn't strip spaces, so the two are not equivalent. + const char *break_add_regexes[][2] = { + {"^(.*[^[:space:]])[[:space:]]*:[[:space:]]*([[:digit:]]+)[[:space:]]*:[[:space:]]*([[:digit:]]+)[[:space:]]*$", + "breakpoint add file --file '%1' --line %2 --column %3"}, + {"^(.*[^[:space:]])[[:space:]]*:[[:space:]]*([[:digit:]]+)[[:space:]]*$", + "breakpoint add file --file '%1' --line %2"}, + {"^/([^/]+)/$", "breakpoint add pattern -- %1"}, + {"^([[:digit:]]+)[[:space:]]*$", + "breakpoint add file --line %1"}, + {"^\\*?(0x[[:xdigit:]]+)[[:space:]]*$", + "breakpoint add address %1"}, + {"^[\"']?([-+]?\\[.*\\])[\"']?[[:space:]]*$", + "breakpoint add name '%1'"}, + {"^(-.*)$", + "breakpoint add name '%1'"}, + {"^(.*[^[:space:]])`(.*[^[:space:]])[[:space:]]*$", + "breakpoint add name '%2' --shlib '%1'"}, + {"^\\&(.*[^[:space:]])[[:space:]]*$", + "breakpoint add name '%1' --skip-prologue=0"}, + {"^[\"']?(.*[^[:space:]\"'])[\"']?[[:space:]]*$", + "breakpoint add name '%1'"}}; + // clang-format on + + size_t num_add_regexes = std::size(break_add_regexes); + + std::unique_ptr break_add_regex_cmd_up( + new CommandObjectRegexCommand( + *this, "_regexp-break-add", + "Set a breakpoint using one of several shorthand formats, or list " + "the existing breakpoints if no arguments are provided.", + "\n" + "_regexp-break-add ::\n" + " main.c:12:21 // Break at line 12 and column " + "21 of main.c\n\n" + "_regexp-break-add :\n" + " main.c:12 // Break at line 12 of " + "main.c\n\n" + "_regexp-break-add \n" + " 12 // Break at line 12 of current " + "file\n\n" + "_regexp-break-add 0x
\n" + " 0x1234000 // Break at address " + "0x1234000\n\n" + "_regexp-break-add \n" + " main // Break in 'main' after the " + "prologue\n\n" + "_regexp-break-add &\n" + " &main // Break at first instruction " + "in 'main'\n\n" + "_regexp-break-add `\n" + " libc.so`malloc // Break in 'malloc' from " + "'libc.so'\n\n" + "_regexp-break-add //\n" + " /break here/ // Break on source lines in " + "current file\n" + " // containing text 'break " + "here'.\n" + "_regexp-break-add\n" + " // List the existing " + "breakpoints\n", + lldb::eSymbolCompletion | lldb::eSourceFileCompletion, false)); + + if (break_add_regex_cmd_up) { + bool success = true; + for (size_t i = 0; i < num_add_regexes; i++) { + success = break_add_regex_cmd_up->AddRegexCommand( + break_add_regexes[i][0], break_add_regexes[i][1]); + if (!success) + break; + } + success = + break_add_regex_cmd_up->AddRegexCommand("^$", "breakpoint list --full"); + + if (success) { + CommandObjectSP break_add_regex_cmd_sp(break_add_regex_cmd_up.release()); + m_command_dict[std::string(break_add_regex_cmd_sp->GetCommandName())] = + break_add_regex_cmd_sp; + } + } + std::unique_ptr tbreak_regex_cmd_up( new CommandObjectRegexCommand( *this, "_regexp-tbreak", diff --git a/lldb/test/API/functionalities/breakpoint/breakpoint_command/TestRegexpBreakCommand.py b/lldb/test/API/functionalities/breakpoint/breakpoint_command/TestRegexpBreakCommand.py index 235a41d1adef3..930d497032171 100644 --- a/lldb/test/API/functionalities/breakpoint/breakpoint_command/TestRegexpBreakCommand.py +++ b/lldb/test/API/functionalities/breakpoint/breakpoint_command/TestRegexpBreakCommand.py @@ -10,10 +10,15 @@ class RegexpBreakCommandTestCase(TestBase): - def test(self): + def test_set_version(self): """Test _regexp-break command.""" self.build() - self.regexp_break_command() + self.regexp_break_command("_regexp-break") + + def test_add_version(self): + """Test _regexp-break-add command.""" + self.build() + self.regexp_break_command("_regexp-break-add") def setUp(self): # Call super's setUp(). @@ -22,12 +27,12 @@ def setUp(self): self.source = "main.c" self.line = line_number(self.source, "// Set break point at this line.") - def regexp_break_command(self): + def regexp_break_command(self, cmd_name): """Test the super consie "b" command, which is analias for _regexp-break.""" exe = self.getBuildArtifact("a.out") self.runCmd("file " + exe, CURRENT_EXECUTABLE_SET) - break_results = lldbutil.run_break_set_command(self, "b %d" % self.line) + break_results = lldbutil.run_break_set_command(self, f"{cmd_name} {self.line}") lldbutil.check_breakpoint_result( self, break_results, @@ -37,7 +42,7 @@ def regexp_break_command(self): ) break_results = lldbutil.run_break_set_command( - self, "b %s:%d" % (self.source, self.line) + self, f"{cmd_name} {self.source}:{self.line}" ) lldbutil.check_breakpoint_result( self, @@ -50,7 +55,7 @@ def regexp_break_command(self): # Check breakpoint with full file path. full_path = os.path.join(self.getSourceDir(), self.source) break_results = lldbutil.run_break_set_command( - self, "b %s:%d" % (full_path, self.line) + self, f"{cmd_name} {full_path}:{self.line}" ) lldbutil.check_breakpoint_result( self, @@ -60,6 +65,17 @@ def regexp_break_command(self): num_locations=1, ) + # Check breakpoint with symbol name. I'm also passing in + # the module so I can check the number of locations. + exe_spec = lldb.SBFileSpec(exe) + exe_filename = exe_spec.basename + cmd = f"{cmd_name} {exe_filename}`main" + print(f"About to run: '{cmd}'") + break_results = lldbutil.run_break_set_command(self, cmd) + lldbutil.check_breakpoint_result( + self, break_results, symbol_name="main", num_locations=1 + ) + self.runCmd("run", RUN_SUCCEEDED) # The stop reason of the thread should be breakpoint. diff --git a/lldb/test/API/terminal/TestEditlineCompletions.py b/lldb/test/API/terminal/TestEditlineCompletions.py index b4ea0f39ec10c..ac1d3f90e2970 100644 --- a/lldb/test/API/terminal/TestEditlineCompletions.py +++ b/lldb/test/API/terminal/TestEditlineCompletions.py @@ -72,11 +72,11 @@ def test_completion_pagination(self): self.child.expect("Available completions:") self.child.expect(" _regexp-attach") self.child.expect(" _regexp-break") + self.child.expect(" _regexp-break-add") self.child.expect(" _regexp-bt") self.child.expect(" _regexp-display") self.child.expect(" _regexp-down") self.child.expect(" _regexp-env") - self.child.expect(" _regexp-jump") self.child.expect("More") @skipIfAsan diff --git a/lldb/test/Shell/BuildScript/toolchain-msvc.test b/lldb/test/Shell/BuildScript/toolchain-msvc.test index dce87d5aee2af..bde895fa6dbaf 100644 --- a/lldb/test/Shell/BuildScript/toolchain-msvc.test +++ b/lldb/test/Shell/BuildScript/toolchain-msvc.test @@ -23,15 +23,15 @@ RUN: | FileCheck --check-prefix=64BIT %s 32BIT: Cleaning {{.*}}toolchain-msvc.test.tmp\foo.pdb 32BIT: Cleaning {{.*}}toolchain-msvc.test.tmp\foo.exe 32BIT: compiling foobar.c -> foo.exe-foobar.obj -32BIT: Command Line: {{.*}}\{{[Hh]ost[Xx](64|86)}}\{{(x86|arm)}}\cl.{{EXE|exe}} +32BIT: Command Line: {{.*}}\{{[Hh]ost([Xx](64|86)|(arm64|ARM64))}}\{{(x86|arm)}}\cl.{{EXE|exe}} 32BIT: linking foo.exe-foobar.obj -> foo.exe -32BIT: Command Line: {{.*}}\{{[Hh]ost[Xx](64|86)}}\{{(x86|arm)}}\link.{{EXE|exe}} +32BIT: Command Line: {{.*}}\{{[Hh]ost([Xx](64|86)|(arm64|ARM64))}}\{{(x86|arm)}}\link.{{EXE|exe}} 32BIT: Env 32BIT: LIB = {{.*}}\ATLMFC\lib\{{(x86|arm)}} 32BIT: {{.*}}\lib\{{(x86|arm)}} 32BIT: {{.*}}\ucrt\{{(x86|arm)}} 32BIT: {{.*}}\um\{{(x86|arm)}} -32BIT: PATH = {{.*}}\bin\{{[Hh]ost[Xx](64|86)}}\{{(x86|x64)}} +32BIT: PATH = {{.*}}\bin\{{[Hh]ost([Xx](64|86)|(arm64|ARM64))}}\{{(x86|x64|arm64)}} 64BIT: Script Arguments: @@ -51,12 +51,12 @@ RUN: | FileCheck --check-prefix=64BIT %s 64BIT: Cleaning {{.*}}toolchain-msvc.test.tmp\foo.pdb 64BIT: Cleaning {{.*}}toolchain-msvc.test.tmp\foo.exe 64BIT: compiling foobar.c -> foo.exe-foobar.obj -64BIT: Command Line: {{.*}}\{{[Hh]ost[Xx](64|86)}}\{{(x64|arm64)}}\cl.{{EXE|exe}} +64BIT: Command Line: {{.*}}\{{[Hh]ost([Xx](64|86)|(arm64|ARM64))}}\{{(x64|arm64)}}\cl.{{EXE|exe}} 64BIT: linking foo.exe-foobar.obj -> foo.exe -64BIT: Command Line: {{.*}}\{{[Hh]ost[Xx](64|86)}}\{{(x64|arm64)}}\link.{{EXE|exe}} +64BIT: Command Line: {{.*}}\{{[Hh]ost([Xx](64|86)|(arm64|ARM64))}}\{{(x64|arm64)}}\link.{{EXE|exe}} 64BIT: Env 64BIT: LIB = {{.*}}\ATLMFC\lib\{{(x64|arm64)}} 64BIT: {{.*}}\lib\{{(x64|arm64)}} 64BIT: {{.*}}\ucrt\{{(x64|arm64)}} 64BIT: {{.*}}\um\{{(x64|arm64)}} -64BIT: PATH = {{.*}}\bin\{{[Hh]ost[Xx](64|86)}}\{{(x86|x64)}} +64BIT: PATH = {{.*}}\bin\{{[Hh]ost([Xx](64|86)|(arm64|ARM64))}}\{{(x86|x64|arm64)}} diff --git a/lldb/test/Shell/Commands/command-dwim-print.test b/lldb/test/Shell/Commands/command-dwim-print.test index 88e7314976ad8..9153edbd21791 100644 --- a/lldb/test/Shell/Commands/command-dwim-print.test +++ b/lldb/test/Shell/Commands/command-dwim-print.test @@ -1,16 +1,16 @@ # RUN: echo quit | %lldb -o "dwim-print a" \ # RUN: | FileCheck %s --strict-whitespace --check-prefix=CHECK1 # (lldb) dwim-print a -# CHECK1:{{^ (\^|˄)}} +# CHECK1:{{^ \^}} # CHECK1: {{^ error: use of undeclared identifier 'a'}} # RUN: echo quit | %lldb -o "p a" \ # RUN: | FileCheck %s --strict-whitespace --check-prefix=CHECK2 # (lldb) p a -# CHECK2:{{^ (\^|˄)}} +# CHECK2:{{^ \^}} # RUN: echo quit | %lldb -o "dwim-print -- a" \ # RUN: | FileCheck %s --strict-whitespace --check-prefix=CHECK3 # (lldb) dwim-print -- a -# CHECK3:{{^ (\^|˄)}} +# CHECK3:{{^ \^}} # RUN: echo quit | %lldb -o "settings set show-inline-diagnostics false" \ # RUN: -o "dwim-print a" 2>&1 | FileCheck %s --check-prefix=CHECK4 # CHECK4: error: :1:1: use of undeclared identifier diff --git a/lldb/test/Shell/Commands/command-expr-diagnostics.test b/lldb/test/Shell/Commands/command-expr-diagnostics.test index cde0e6c6768f7..3c827fb4516ec 100644 --- a/lldb/test/Shell/Commands/command-expr-diagnostics.test +++ b/lldb/test/Shell/Commands/command-expr-diagnostics.test @@ -2,19 +2,19 @@ # RUN: echo quit | %lldb -o "expression a+b" \ # RUN: | FileCheck %s --strict-whitespace --check-prefix=CHECK1 # (lldb) expression a+b -# CHECK1:{{^ (\^|˄) (\^|˄)}} -# CHECK1: {{^ (\||│) error: use of undeclared identifier 'b'}} +# CHECK1:{{^ \^ \^}} +# CHECK1: {{^ | error: use of undeclared identifier 'b'}} # CHECK1: {{^ error: use of undeclared identifier 'a'}} # RUN: echo quit | %lldb -o "expr a" \ # RUN: | FileCheck %s --strict-whitespace --check-prefix=CHECK2 # (lldb) expr a -# CHECK2:{{^ (\^|˄)}} +# CHECK2:{{^ \^}} # RUN: echo quit | %lldb -o "expr -i 0 -o 0 -- a" \ # RUN: | FileCheck %s --strict-whitespace --check-prefix=CHECK3 # (lldb) expr -i 0 -o 0 -- a -# CHECK3:{{^ (\^|˄)}} +# CHECK3:{{^ \^}} # CHECK3: {{^ error: use of undeclared identifier 'a'}} # RUN: echo "int main(){return 0;}">%t.c @@ -23,7 +23,7 @@ # RUN: "expr --top-level -- template T FOO(T x) { return x/2;}" -o \ # RUN: "expression -- FOO(\"\")" 2>&1 | FileCheck %s --check-prefix=CHECK4 # (lldb) expression -- FOO("") -# CHECK4:{{^ (\^|˄)}} +# CHECK4:{{^ \^}} # CHECK4: {{^ note: in instantiation of function template}} # CHECK4: error: details) { StreamString stream; - RenderDiagnosticDetails(stream, 0, true, details, /*force_ascii=*/true); + RenderDiagnosticDetails(stream, 0, true, details); return stream.GetData(); } } // namespace diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/systemz.h b/llvm/include/llvm/ExecutionEngine/JITLink/systemz.h index 09ec56db6826f..bfd22ec753074 100644 --- a/llvm/include/llvm/ExecutionEngine/JITLink/systemz.h +++ b/llvm/include/llvm/ExecutionEngine/JITLink/systemz.h @@ -507,6 +507,21 @@ enum EdgeKind_systemz : Edge::Kind { /// RequestGOTAndTransformToDelta32dbl, + /// A TLSInfo entry getter/constructor, transformed to Delta64FromGOT. + /// + /// Indicates that this edge should be transformed into a Delta64FromGOT + /// targeting the TLSInfo entry for the edge's current target. A TLSInfo + /// entry for the target should be created if one does not already exist. + /// + /// Fixup expression: + /// NONE + /// + /// Errors: + /// - *ASSERTION* Failure to handle edges of this kind prior to the fixup + /// phase will result in an assert/unreachable during the fixup phase. + /// + RequestTLSDescInGOTAndTransformToDelta64FromGOT, + /// A 32-bit Delta to GOT base. /// /// Fixup expression: diff --git a/llvm/include/llvm/TargetParser/Triple.h b/llvm/include/llvm/TargetParser/Triple.h index 11b76cd183108..9480e7b36dc2c 100644 --- a/llvm/include/llvm/TargetParser/Triple.h +++ b/llvm/include/llvm/TargetParser/Triple.h @@ -769,7 +769,7 @@ class Triple { bool isOSGlibc() const { return (getOS() == Triple::Linux || getOS() == Triple::KFreeBSD || getOS() == Triple::Hurd) && - !isAndroid(); + !isAndroid() && !isMusl(); } /// Tests whether the OS is AIX. diff --git a/llvm/lib/Analysis/CmpInstAnalysis.cpp b/llvm/lib/Analysis/CmpInstAnalysis.cpp index a1a79e5685f80..a6d0d3ff4fcd4 100644 --- a/llvm/lib/Analysis/CmpInstAnalysis.cpp +++ b/llvm/lib/Analysis/CmpInstAnalysis.cpp @@ -75,7 +75,7 @@ Constant *llvm::getPredForFCmpCode(unsigned Code, Type *OpTy, std::optional llvm::decomposeBitTestICmp(Value *LHS, Value *RHS, CmpInst::Predicate Pred, - bool LookThruTrunc, bool AllowNonZeroC, + bool LookThroughTrunc, bool AllowNonZeroC, bool DecomposeAnd) { using namespace PatternMatch; @@ -173,7 +173,7 @@ llvm::decomposeBitTestICmp(Value *LHS, Value *RHS, CmpInst::Predicate Pred, Result.Pred = ICmpInst::getInversePredicate(Result.Pred); Value *X; - if (LookThruTrunc && match(LHS, m_Trunc(m_Value(X)))) { + if (LookThroughTrunc && match(LHS, m_Trunc(m_Value(X)))) { Result.X = X; Result.Mask = Result.Mask.zext(X->getType()->getScalarSizeInBits()); Result.C = Result.C.zext(X->getType()->getScalarSizeInBits()); @@ -185,7 +185,7 @@ llvm::decomposeBitTestICmp(Value *LHS, Value *RHS, CmpInst::Predicate Pred, } std::optional llvm::decomposeBitTest(Value *Cond, - bool LookThruTrunc, + bool LookThroughTrunc, bool AllowNonZeroC, bool DecomposeAnd) { using namespace PatternMatch; @@ -194,7 +194,7 @@ std::optional llvm::decomposeBitTest(Value *Cond, if (!ICmp->getOperand(0)->getType()->isIntOrIntVectorTy()) return std::nullopt; return decomposeBitTestICmp(ICmp->getOperand(0), ICmp->getOperand(1), - ICmp->getPredicate(), LookThruTrunc, + ICmp->getPredicate(), LookThroughTrunc, AllowNonZeroC, DecomposeAnd); } Value *X; diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp index b39b32042dd2f..a9b51065a1d99 100644 --- a/llvm/lib/Analysis/ConstantFolding.cpp +++ b/llvm/lib/Analysis/ConstantFolding.cpp @@ -1223,11 +1223,12 @@ Constant *llvm::ConstantFoldCompareInstOperands( } } - // Only do this transformation if the int is intptrty in size, otherwise - // there is a truncation or extension that we aren't modeling. - if (CE0->getOpcode() == Instruction::PtrToInt) { - Type *IntPtrTy = DL.getIntPtrType(CE0->getOperand(0)->getType()); - if (CE0->getType() == IntPtrTy) { + // icmp only compares the address part of the pointer, so only do this + // transform if the integer size matches the address size. + if (CE0->getOpcode() == Instruction::PtrToInt || + CE0->getOpcode() == Instruction::PtrToAddr) { + Type *AddrTy = DL.getAddressType(CE0->getOperand(0)->getType()); + if (CE0->getType() == AddrTy) { Constant *C = CE0->getOperand(0); Constant *Null = Constant::getNullValue(C->getType()); return ConstantFoldCompareInstOperands(Predicate, C, Null, DL, TLI); @@ -1250,11 +1251,12 @@ Constant *llvm::ConstantFoldCompareInstOperands( return ConstantFoldCompareInstOperands(Predicate, C0, C1, DL, TLI); } - // Only do this transformation if the int is intptrty in size, otherwise - // there is a truncation or extension that we aren't modeling. - if (CE0->getOpcode() == Instruction::PtrToInt) { - Type *IntPtrTy = DL.getIntPtrType(CE0->getOperand(0)->getType()); - if (CE0->getType() == IntPtrTy && + // icmp only compares the address part of the pointer, so only do this + // transform if the integer size matches the address size. + if (CE0->getOpcode() == Instruction::PtrToInt || + CE0->getOpcode() == Instruction::PtrToAddr) { + Type *AddrTy = DL.getAddressType(CE0->getOperand(0)->getType()); + if (CE0->getType() == AddrTy && CE0->getOperand(0)->getType() == CE1->getOperand(0)->getType()) { return ConstantFoldCompareInstOperands( Predicate, CE0->getOperand(0), CE1->getOperand(0), DL, TLI); diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 9cb6f19b9340c..92577cd7517e6 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -7286,15 +7286,15 @@ OverflowResult llvm::computeOverflowForUnsignedMul(const Value *LHS, const Value *RHS, const SimplifyQuery &SQ, bool IsNSW) { - KnownBits LHSKnown = computeKnownBits(LHS, SQ); - KnownBits RHSKnown = computeKnownBits(RHS, SQ); + ConstantRange LHSRange = + computeConstantRangeIncludingKnownBits(LHS, /*ForSigned=*/false, SQ); + ConstantRange RHSRange = + computeConstantRangeIncludingKnownBits(RHS, /*ForSigned=*/false, SQ); // mul nsw of two non-negative numbers is also nuw. - if (IsNSW && LHSKnown.isNonNegative() && RHSKnown.isNonNegative()) + if (IsNSW && LHSRange.isAllNonNegative() && RHSRange.isAllNonNegative()) return OverflowResult::NeverOverflows; - ConstantRange LHSRange = ConstantRange::fromKnownBits(LHSKnown, false); - ConstantRange RHSRange = ConstantRange::fromKnownBits(RHSKnown, false); return mapOverflowResult(LHSRange.unsignedMulMayOverflow(RHSRange)); } diff --git a/llvm/lib/ExecutionEngine/JITLink/ELF_systemz.cpp b/llvm/lib/ExecutionEngine/JITLink/ELF_systemz.cpp index 29eeecceea766..50acd6ea2e542 100644 --- a/llvm/lib/ExecutionEngine/JITLink/ELF_systemz.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/ELF_systemz.cpp @@ -27,12 +27,67 @@ using namespace llvm::jitlink; namespace { constexpr StringRef ELFGOTSymbolName = "_GLOBAL_OFFSET_TABLE_"; +constexpr StringRef ELFTLSInfoSectionName = "$__TLSINFO"; + +// TLS Info Builder. +class TLSInfoTableManager_ELF_systemz + : public TableManager { +public: + static StringRef getSectionName() { return ELFTLSInfoSectionName; } + + static const uint8_t TLSInfoEntryContent[16]; + + bool visitEdge(LinkGraph &G, Block *B, Edge &E) { + if (E.getKind() == + systemz::RequestTLSDescInGOTAndTransformToDelta64FromGOT) { + LLVM_DEBUG({ + dbgs() << " Fixing " << G.getEdgeKindName(E.getKind()) << " edge at " + << formatv("{0:x}", B->getFixupAddress(E)) << " (" + << formatv("{0:x}", B->getAddress()) << " + " + << formatv("{0:x}", E.getOffset()) << ")\n"; + }); + E.setKind(systemz::Delta64FromGOT); + E.setTarget(getEntryForTarget(G, E.getTarget())); + return true; + } + return false; + } + + Symbol &createEntry(LinkGraph &G, Symbol &Target) { + // the TLS Info entry's key value will be written by the fixTLVSectionByName + // pass, so create mutable content. + auto &TLSInfoEntry = G.createMutableContentBlock( + getTLSInfoSection(G), G.allocateContent(getTLSInfoEntryContent()), + orc::ExecutorAddr(), 8, 0); + TLSInfoEntry.addEdge(systemz::Pointer64, 8, Target, 0); + return G.addAnonymousSymbol(TLSInfoEntry, 0, 16, false, false); + } + +private: + Section &getTLSInfoSection(LinkGraph &G) { + if (!TLSInfoTable) + TLSInfoTable = &G.createSection(getSectionName(), orc::MemProt::Read); + return *TLSInfoTable; + } + + ArrayRef getTLSInfoEntryContent() const { + return {reinterpret_cast(TLSInfoEntryContent), + sizeof(TLSInfoEntryContent)}; + } + + Section *TLSInfoTable = nullptr; +}; + +const uint8_t TLSInfoTableManager_ELF_systemz::TLSInfoEntryContent[16] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; Error buildTables_ELF_systemz(LinkGraph &G) { LLVM_DEBUG(dbgs() << "Visiting edges in graph:\n"); systemz::GOTTableManager GOT; systemz::PLTTableManager PLT(GOT); - visitExistingEdges(G, GOT, PLT); + TLSInfoTableManager_ELF_systemz TLSInfo; + visitExistingEdges(G, GOT, PLT, TLSInfo); return Error::success(); } @@ -329,6 +384,15 @@ class ELFLinkGraphBuilder_systemz Kind = systemz::Delta32dblGOTBase; break; } + // Tag for function call in general dynamic TLS code. + case ELF::R_390_TLS_GDCALL: { + break; + } + // Direct 64 bit for general dynamic thread local data. + case ELF::R_390_TLS_GD64: { + Kind = systemz::RequestTLSDescInGOTAndTransformToDelta64FromGOT; + break; + } default: return make_error( "In " + G->getName() + ": Unsupported systemz relocation type " + diff --git a/llvm/lib/ExecutionEngine/JITLink/systemz.cpp b/llvm/lib/ExecutionEngine/JITLink/systemz.cpp index f6cc29fa6e6a1..dbb924c3f9291 100644 --- a/llvm/lib/ExecutionEngine/JITLink/systemz.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/systemz.cpp @@ -104,6 +104,8 @@ const char *getEdgeKindName(Edge::Kind R) { return "RequestGOTAndTransformToDelta12FromGOT"; case RequestGOTAndTransformToDelta32dbl: return "RequestGOTAndTransformToDelta32dbl"; + case RequestTLSDescInGOTAndTransformToDelta64FromGOT: + return "RequestTLSDescInGOTAndTransformToDelta64FromGOT"; default: return getGenericEdgeKindName(static_cast(R)); } diff --git a/llvm/lib/ExecutionEngine/Orc/ELFNixPlatform.cpp b/llvm/lib/ExecutionEngine/Orc/ELFNixPlatform.cpp index 7dc1ae520f132..0a761290373aa 100644 --- a/llvm/lib/ExecutionEngine/Orc/ELFNixPlatform.cpp +++ b/llvm/lib/ExecutionEngine/Orc/ELFNixPlatform.cpp @@ -988,6 +988,7 @@ Error ELFNixPlatform::ELFNixPlatformPlugin::fixTLVSectionsAndEdges( jitlink::LinkGraph &G, JITDylib &JD) { auto TLSGetAddrSymbolName = G.intern("__tls_get_addr"); auto TLSDescResolveSymbolName = G.intern("__tlsdesc_resolver"); + auto TLSGetOffsetSymbolName = G.intern("__tls_get_offset"); for (auto *Sym : G.external_symbols()) { if (Sym->getName() == TLSGetAddrSymbolName) { auto TLSGetAddr = @@ -997,6 +998,10 @@ Error ELFNixPlatform::ELFNixPlatformPlugin::fixTLVSectionsAndEdges( auto TLSGetAddr = MP.getExecutionSession().intern("___orc_rt_elfnix_tlsdesc_resolver"); Sym->setName(std::move(TLSGetAddr)); + } else if (Sym->getName() == TLSGetOffsetSymbolName) { + auto TLSGetAddr = + MP.getExecutionSession().intern("___orc_rt_elfnix_tls_get_offset"); + Sym->setName(std::move(TLSGetAddr)); } } diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index d47aafb31ebdf..c54c428926bf8 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -6469,7 +6469,13 @@ std::string llvm::UpgradeDataLayoutString(StringRef DL, StringRef TT) { if (Pos != size_t(-1)) Res.insert(Pos + I64.size(), I128); } - return Res; + } + + if (T.isPPC() && T.isOSAIX() && !DL.contains("f64:32:64") && !DL.empty()) { + size_t Pos = Res.find("-S128"); + if (Pos == StringRef::npos) + Pos = Res.size(); + Res.insert(Pos, "-f64:32:64"); } if (!T.isX86()) diff --git a/llvm/lib/IR/Instruction.cpp b/llvm/lib/IR/Instruction.cpp index 33ca46ca1c2c6..b95c1466871bc 100644 --- a/llvm/lib/IR/Instruction.cpp +++ b/llvm/lib/IR/Instruction.cpp @@ -1271,6 +1271,7 @@ bool Instruction::isAssociative() const { switch (Opcode) { case FMul: + return cast(this)->hasAllowReassoc(); case FAdd: return cast(this)->hasAllowReassoc() && cast(this)->hasNoSignedZeros(); diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index 054c85d115970..7e2dfbbb4772b 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -2692,6 +2692,9 @@ void Verifier::verifyFunctionMetadata( } void Verifier::visitConstantExprsRecursively(const Constant *EntryC) { + if (EntryC->getNumOperands() == 0) + return; + if (!ConstantExprVisited.insert(EntryC).second) return; @@ -5626,14 +5629,8 @@ void Verifier::visitInstruction(Instruction &I) { } else if (isa(I.getOperand(i))) { Check(CBI && &CBI->getCalledOperandUse() == &I.getOperandUse(i), "Cannot take the address of an inline asm!", &I); - } else if (auto *CPA = dyn_cast(I.getOperand(i))) { - visitConstantExprsRecursively(CPA); - } else if (ConstantExpr *CE = dyn_cast(I.getOperand(i))) { - if (CE->getType()->isPtrOrPtrVectorTy()) { - // If we have a ConstantExpr pointer, we need to see if it came from an - // illegal bitcast. - visitConstantExprsRecursively(CE); - } + } else if (auto *C = dyn_cast(I.getOperand(i))) { + visitConstantExprsRecursively(C); } } diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp index 28a52ab8b1ae6..87256352faccd 100644 --- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -2423,7 +2423,7 @@ static bool targetSupportsPAuthRelocation(const Triple &TT, const MCExpr *Target, const MCExpr *DSExpr) { // No released version of glibc supports PAuth relocations. - if (TT.isOSGlibc()) + if (TT.isOSGlibc() || TT.isMusl()) return false; // We emit PAuth constants as IRELATIVE relocations in cases where the diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 30eb19036ddda..41caa817c11a4 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -24759,7 +24759,12 @@ static SDValue performPostLD1Combine(SDNode *N, static bool performTBISimplification(SDValue Addr, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG) { - APInt DemandedMask = APInt::getLowBitsSet(64, 56); + const auto &Subtarget = DAG.getSubtarget(); + // If MTE is enabled, TBI only applies to the top 4 bits. + // Both arm64 and arm64e processes on Darwin may run with MTE enabled. + unsigned NumIgnoreBits = + Subtarget.hasMTE() || Subtarget.isTargetDarwin() ? 4 : 8; + APInt DemandedMask = APInt::getLowBitsSet(64, 64 - NumIgnoreBits); KnownBits Known; TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(), !DCI.isBeforeLegalizeOps()); @@ -31792,12 +31797,12 @@ SDValue AArch64TargetLowering::LowerFixedLengthVECTOR_SHUFFLEToSVE( unsigned OperandOrder; if (isZIPMask(ShuffleMask, VT.getVectorNumElements(), WhichResult, OperandOrder) && - WhichResult == 0) - return convertFromScalableVector( - DAG, VT, - DAG.getNode(AArch64ISD::ZIP1, DL, ContainerVT, - OperandOrder == 0 ? Op1 : Op2, - OperandOrder == 0 ? Op2 : Op1)); + WhichResult == 0) { + SDValue ZIP = DAG.getNode(AArch64ISD::ZIP1, DL, ContainerVT, + OperandOrder == 0 ? Op1 : Op2, + OperandOrder == 0 ? Op2 : Op1); + return convertFromScalableVector(DAG, VT, ZIP); + } if (isTRNMask(ShuffleMask, VT.getVectorNumElements(), WhichResult, OperandOrder)) { @@ -31847,12 +31852,12 @@ SDValue AArch64TargetLowering::LowerFixedLengthVECTOR_SHUFFLEToSVE( if (isZIPMask(ShuffleMask, VT.getVectorNumElements(), WhichResult, OperandOrder) && - WhichResult != 0) - return convertFromScalableVector( - DAG, VT, - DAG.getNode(AArch64ISD::ZIP2, DL, ContainerVT, - OperandOrder == 0 ? Op1 : Op2, - OperandOrder == 0 ? Op2 : Op1)); + WhichResult != 0) { + SDValue ZIP = DAG.getNode(AArch64ISD::ZIP2, DL, ContainerVT, + OperandOrder == 0 ? Op1 : Op2, + OperandOrder == 0 ? Op2 : Op1); + return convertFromScalableVector(DAG, VT, ZIP); + } if (isUZPMask(ShuffleMask, VT.getVectorNumElements(), WhichResult)) { unsigned Opc = (WhichResult == 0) ? AArch64ISD::UZP1 : AArch64ISD::UZP2; diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td index 4d2e740779961..892b8ee1ed3cb 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -4386,7 +4386,7 @@ multiclass BaseLoadUnscaleV84 sz, bits<2> opc, DAGOperand regtype > { def i : BaseLoadStoreUnscale, - Sched<[WriteST]> { + Sched<[WriteLD]> { let Inst{29} = 0; let Inst{24} = 1; } diff --git a/llvm/lib/Target/AArch64/AArch64PerfectShuffle.h b/llvm/lib/Target/AArch64/AArch64PerfectShuffle.h index c7d6b31291197..12a53aad08aa8 100644 --- a/llvm/lib/Target/AArch64/AArch64PerfectShuffle.h +++ b/llvm/lib/Target/AArch64/AArch64PerfectShuffle.h @@ -6631,43 +6631,44 @@ inline bool isZIPMask(ArrayRef M, unsigned NumElts, if (NumElts % 2 != 0) return false; - // "Variant" refers to the distinction bwetween zip1 and zip2, while - // "Order" refers to sequence of input registers (matching vs flipped). - bool Variant0Order0 = true; // WhichResultOut = 0, OperandOrderOut = 0 - bool Variant1Order0 = true; // WhichResultOut = 1, OperandOrderOut = 0 - bool Variant0Order1 = true; // WhichResultOut = 0, OperandOrderOut = 1 - bool Variant1Order1 = true; // WhichResultOut = 1, OperandOrderOut = 1 + // "Result" corresponds to "WhichResultOut", selecting between zip1 and zip2. + // "Order" corresponds to "OperandOrderOut", selecting the order of operands + // for the instruction (flipped or not). + bool Result0Order0 = true; // WhichResultOut = 0, OperandOrderOut = 0 + bool Result1Order0 = true; // WhichResultOut = 1, OperandOrderOut = 0 + bool Result0Order1 = true; // WhichResultOut = 0, OperandOrderOut = 1 + bool Result1Order1 = true; // WhichResultOut = 1, OperandOrderOut = 1 // Check all elements match. for (unsigned i = 0; i != NumElts; i += 2) { if (M[i] >= 0) { unsigned EvenElt = (unsigned)M[i]; if (EvenElt != i / 2) - Variant0Order0 = false; + Result0Order0 = false; if (EvenElt != NumElts / 2 + i / 2) - Variant1Order0 = false; + Result1Order0 = false; if (EvenElt != NumElts + i / 2) - Variant0Order1 = false; + Result0Order1 = false; if (EvenElt != NumElts + NumElts / 2 + i / 2) - Variant1Order1 = false; + Result1Order1 = false; } if (M[i + 1] >= 0) { unsigned OddElt = (unsigned)M[i + 1]; if (OddElt != NumElts + i / 2) - Variant0Order0 = false; + Result0Order0 = false; if (OddElt != NumElts + NumElts / 2 + i / 2) - Variant1Order0 = false; + Result1Order0 = false; if (OddElt != i / 2) - Variant0Order1 = false; + Result0Order1 = false; if (OddElt != NumElts / 2 + i / 2) - Variant1Order1 = false; + Result1Order1 = false; } } - if (Variant0Order0 + Variant1Order0 + Variant0Order1 + Variant1Order1 != 1) + if (Result0Order0 + Result1Order0 + Result0Order1 + Result1Order1 != 1) return false; - WhichResultOut = (Variant0Order0 || Variant0Order1) ? 0 : 1; - OperandOrderOut = (Variant0Order0 || Variant1Order0) ? 0 : 1; + WhichResultOut = (Result0Order0 || Result0Order1) ? 0 : 1; + OperandOrderOut = (Result0Order0 || Result1Order0) ? 0 : 1; return true; } diff --git a/llvm/lib/Target/LoongArch/Disassembler/LoongArchDisassembler.cpp b/llvm/lib/Target/LoongArch/Disassembler/LoongArchDisassembler.cpp index d4058fac4304a..584b45b4111cd 100644 --- a/llvm/lib/Target/LoongArch/Disassembler/LoongArchDisassembler.cpp +++ b/llvm/lib/Target/LoongArch/Disassembler/LoongArchDisassembler.cpp @@ -157,6 +157,29 @@ static DecodeStatus decodeSImmOperand(MCInst &Inst, uint64_t Imm, return MCDisassembler::Success; } +// Decode AMSWAP.W and UD, which share the same base encoding. +// If rk == 1 and rd == rj, interpret the instruction as UD; +// otherwise decode as AMSWAP.W. +static DecodeStatus DecodeAMOrUDInstruction(MCInst &Inst, unsigned Insn, + uint64_t Address, + const MCDisassembler *Decoder) { + unsigned Rd = fieldFromInstruction(Insn, 0, 5); + unsigned Rj = fieldFromInstruction(Insn, 5, 5); + unsigned Rk = fieldFromInstruction(Insn, 10, 5); + + if (Rk == 1 && Rd == Rj) { + Inst.setOpcode(LoongArch::UD); + Inst.addOperand(MCOperand::createImm(Rd)); + } else { + Inst.setOpcode(LoongArch::AMSWAP_W); + Inst.addOperand(MCOperand::createReg(LoongArch::R0 + Rd)); + Inst.addOperand(MCOperand::createReg(LoongArch::R0 + Rk)); + Inst.addOperand(MCOperand::createReg(LoongArch::R0 + Rj)); + } + + return MCDisassembler::Success; +} + #include "LoongArchGenDisassemblerTables.inc" DecodeStatus LoongArchDisassembler::getInstruction(MCInst &MI, uint64_t &Size, diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrFormats.td b/llvm/lib/Target/LoongArch/LoongArchInstrFormats.td index 419e20431c59f..fa049fcbc2d21 100644 --- a/llvm/lib/Target/LoongArch/LoongArchInstrFormats.td +++ b/llvm/lib/Target/LoongArch/LoongArchInstrFormats.td @@ -401,3 +401,16 @@ class FmtLDPTE pattern = []> let Inst{9-5} = rj; let Inst{4-0} = 0b00000; } + +// FmtUD +// <0b0011100001100000000001 | I5 | I5> +class FmtUD pattern = []> + : LAInst.ret, opnstr, pattern> { + bits<5> imm5; + + let Inst{31-10} = 0b0011100001100000000001; + let Inst{9-5} = imm5; + let Inst{4-0} = imm5; + + let DecoderMethod = "DecodeAMOrUDInstruction"; +} diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td index 2e6653e1a09ac..d971f8bc1986b 100644 --- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td @@ -800,6 +800,10 @@ class AM_3R op> let Constraints = "@earlyclobber $rd"; } +class AU_3R op> : AM_3R { + let DecoderMethod = "DecodeAMOrUDInstruction"; +} + class AMCAS_3R op> : Fmt3R { @@ -923,6 +927,9 @@ def BREAK : MISC_I15<0x002a0000>; def RDTIMEL_W : RDTIME_2R<0x00006000>; def RDTIMEH_W : RDTIME_2R<0x00006400>; +let hasSideEffects = 1, mayLoad = 0, mayStore = 0 in +def UD : FmtUD<(outs), (ins uimm5:$imm5), "$imm5">; + // The CPUCFG instruction offers a reliable way to probing CPU features. // Although support is not guaranteed on LA32R, having compiler support // nevertheless enables applications to rely on its presence, potentially @@ -1087,7 +1094,7 @@ def STLE_D : STORE_3R<0x387f8000>; // Atomic Memory Access Instructions for 64-bits def AMSWAP_B : AM_3R<0x385c0000>; def AMSWAP_H : AM_3R<0x385c8000>; -def AMSWAP_W : AM_3R<0x38600000>; +def AMSWAP_W : AU_3R<0x38600000>; def AMSWAP_D : AM_3R<0x38608000>; def AMADD_B : AM_3R<0x385d0000>; def AMADD_H : AM_3R<0x385d8000>; @@ -1410,12 +1417,8 @@ def : Pat<(and GPR:$rj, BstrinsImm:$imm), /// Traps -// We lower `trap` to `amswap.w rd:$r0, rk:$r1, rj:$r0`, as this is guaranteed -// to trap with an INE (non-existent on LA32, explicitly documented to INE on -// LA64). And the resulting signal is different from `debugtrap` like on some -// other existing ports so programs/porters might have an easier time. -def PseudoUNIMP : Pseudo<(outs), (ins), [(trap)]>, - PseudoInstExpansion<(AMSWAP_W R0, R1, R0)>; +// We lower `trap` to `ud 0`, which is an alias for `amswap.w $r0, $r1, $r0`. +def PseudoUNIMP : Pseudo<(outs), (ins), [(trap)]>, PseudoInstExpansion<(UD 0)>; // We lower `debugtrap` to `break 0`, as this is guaranteed to exist and work, // even for LA32 Primary. Also, because so far the ISA does not provide a diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index 366a7b6d0135a..99bef417eaa89 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -3180,7 +3180,8 @@ bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { return true; } case PPC::PPCLdFixedAddr: { - assert(Subtarget.getTargetTriple().isOSGlibc() && + assert((Subtarget.getTargetTriple().isOSGlibc() || + Subtarget.getTargetTriple().isMusl()) && "Only targets with Glibc expected to contain PPCLdFixedAddr"); int64_t Offset = 0; const unsigned Reg = Subtarget.isPPC64() ? PPC::X13 : PPC::R2; diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h index 40b46f503ca53..74066c86d6ebe 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h @@ -433,6 +433,8 @@ enum OperandType : unsigned { OPERAND_RTZARG, // Condition code used by select and short forward branch pseudos. OPERAND_COND_CODE, + // Ordering for atomic pseudos. + OPERAND_ATOMIC_ORDERING, // Vector policy operand. OPERAND_VEC_POLICY, // Vector SEW operand. Stores in log2(SEW). diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td index 1a5bb837a4318..39228a11e1309 100644 --- a/llvm/lib/Target/RISCV/RISCVFeatures.td +++ b/llvm/lib/Target/RISCV/RISCVFeatures.td @@ -908,6 +908,7 @@ def HasVInstructionsF16Minimal : Predicate<"Subtarget->hasVInstructionsF16Minima def HasVInstructionsBF16Minimal : Predicate<"Subtarget->hasVInstructionsBF16Minimal()">; def HasVInstructionsF16 : Predicate<"Subtarget->hasVInstructionsF16()">; +def HasVInstructionsBF16 : Predicate<"Subtarget->hasVInstructionsBF16()">; def HasVInstructionsF64 : Predicate<"Subtarget->hasVInstructionsF64()">; def HasVInstructionsFullMultiply : Predicate<"Subtarget->hasVInstructionsFullMultiply()">; diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index 8bfdbef39708a..b6b716be35c3e 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -2608,8 +2608,8 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { MachineSDNode *TileLoad = CurDAG->getMachineNode(PseudoInst, DL, Node->getVTList(), Operands); - if (auto *MemOp = dyn_cast(Node)) - CurDAG->setNodeMemRefs(TileLoad, {MemOp->getMemOperand()}); + CurDAG->setNodeMemRefs(TileLoad, + {cast(Node)->getMemOperand()}); ReplaceNode(Node, TileLoad); return; diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 29fc2ddb818b5..2c0a02ae396c7 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -526,7 +526,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction({ISD::AVGFLOORS, ISD::AVGFLOORU}, VTs, Legal); setOperationAction({ISD::ABDS, ISD::ABDU}, VTs, Legal); setOperationAction(ISD::SPLAT_VECTOR, VTs, Legal); - setOperationAction(ISD::SHL, VTs, Custom); + setOperationAction({ISD::SHL, ISD::SRL, ISD::SRA}, VTs, Custom); setOperationAction(ISD::BITCAST, VTs, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, VTs, Custom); } @@ -2196,6 +2196,60 @@ bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 5, /*IsStore*/ true, /*IsUnitStrided*/ false); + case Intrinsic::riscv_sf_vlte8: + case Intrinsic::riscv_sf_vlte16: + case Intrinsic::riscv_sf_vlte32: + case Intrinsic::riscv_sf_vlte64: + Info.opc = ISD::INTRINSIC_VOID; + Info.ptrVal = I.getArgOperand(1); + switch (Intrinsic) { + case Intrinsic::riscv_sf_vlte8: + Info.memVT = MVT::i8; + Info.align = Align(1); + break; + case Intrinsic::riscv_sf_vlte16: + Info.memVT = MVT::i16; + Info.align = Align(2); + break; + case Intrinsic::riscv_sf_vlte32: + Info.memVT = MVT::i32; + Info.align = Align(4); + break; + case Intrinsic::riscv_sf_vlte64: + Info.memVT = MVT::i64; + Info.align = Align(8); + break; + } + Info.size = MemoryLocation::UnknownSize; + Info.flags |= MachineMemOperand::MOLoad; + return true; + case Intrinsic::riscv_sf_vste8: + case Intrinsic::riscv_sf_vste16: + case Intrinsic::riscv_sf_vste32: + case Intrinsic::riscv_sf_vste64: + Info.opc = ISD::INTRINSIC_VOID; + Info.ptrVal = I.getArgOperand(1); + switch (Intrinsic) { + case Intrinsic::riscv_sf_vste8: + Info.memVT = MVT::i8; + Info.align = Align(1); + break; + case Intrinsic::riscv_sf_vste16: + Info.memVT = MVT::i16; + Info.align = Align(2); + break; + case Intrinsic::riscv_sf_vste32: + Info.memVT = MVT::i32; + Info.align = Align(4); + break; + case Intrinsic::riscv_sf_vste64: + Info.memVT = MVT::i64; + Info.align = Align(8); + break; + } + Info.size = MemoryLocation::UnknownSize; + Info.flags |= MachineMemOperand::MOStore; + return true; } } @@ -8608,22 +8662,21 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, case ISD::VSELECT: return lowerToScalableOp(Op, DAG); case ISD::SHL: - if (Subtarget.enablePExtCodeGen() && - Op.getSimpleValueType().isFixedLengthVector()) { - // We have patterns for scalar/immediate shift amount, so no lowering - // needed. - if (Op.getOperand(1)->getOpcode() == ISD::SPLAT_VECTOR) - return Op; - - // There's no vector-vector version of shift instruction in P extension so - // we need to unroll to scalar computation and pack them back. - return DAG.UnrollVectorOp(Op.getNode()); - } - [[fallthrough]]; - case ISD::SRA: case ISD::SRL: - if (Op.getSimpleValueType().isFixedLengthVector()) + case ISD::SRA: + if (Op.getSimpleValueType().isFixedLengthVector()) { + if (Subtarget.enablePExtCodeGen()) { + // We have patterns for scalar/immediate shift amount, so no lowering + // needed. + if (Op.getOperand(1)->getOpcode() == ISD::SPLAT_VECTOR) + return Op; + + // There's no vector-vector version of shift instruction in P extension + // so we need to unroll to scalar computation and pack them back. + return DAG.UnrollVectorOp(Op.getNode()); + } return lowerToScalableOp(Op, DAG); + } // This can be called for an i32 shift amount that needs to be promoted. assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() && "Unexpected custom legalisation"); diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index ce5a67bd23a9a..76dc57c45fb0b 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -3025,6 +3025,9 @@ bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI, case RISCVOp::OPERAND_COND_CODE: Ok = Imm >= 0 && Imm < RISCVCC::COND_INVALID; break; + case RISCVOp::OPERAND_ATOMIC_ORDERING: + Ok = isValidAtomicOrdering(Imm); + break; case RISCVOp::OPERAND_VEC_POLICY: Ok = (Imm & (RISCVVType::TAIL_AGNOSTIC | RISCVVType::MASK_AGNOSTIC)) == Imm; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td index 5c81a0990a64f..f5fd9acd8b303 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td @@ -11,6 +11,14 @@ // //===----------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// +// Operand and SDNode transformation definitions. +//===----------------------------------------------------------------------===// + +def ordering : RISCVOp { + let OperandType = "OPERAND_ATOMIC_ORDERING"; +} + //===----------------------------------------------------------------------===// // Instruction class templates //===----------------------------------------------------------------------===// @@ -244,7 +252,7 @@ defm : AMOPat<"atomic_load_umin_i64", "AMOMINU_D", i64, [IsRV64]>; /// Pseudo AMOs class PseudoAMO : Pseudo<(outs GPR:$res, GPR:$scratch), - (ins GPR:$addr, GPR:$incr, ixlenimm:$ordering), []> { + (ins GPR:$addr, GPR:$incr, ordering:$ordering), []> { let Constraints = "@earlyclobber $res,@earlyclobber $scratch"; let mayLoad = 1; let mayStore = 1; @@ -253,7 +261,7 @@ class PseudoAMO : Pseudo<(outs GPR:$res, GPR:$scratch), class PseudoMaskedAMO : Pseudo<(outs GPR:$res, GPR:$scratch), - (ins GPR:$addr, GPR:$incr, GPR:$mask, ixlenimm:$ordering), []> { + (ins GPR:$addr, GPR:$incr, GPR:$mask, ordering:$ordering), []> { let Constraints = "@earlyclobber $res,@earlyclobber $scratch"; let mayLoad = 1; let mayStore = 1; @@ -262,8 +270,8 @@ class PseudoMaskedAMO class PseudoMaskedAMOMinMax : Pseudo<(outs GPR:$res, GPR:$scratch1, GPR:$scratch2), - (ins GPR:$addr, GPR:$incr, GPR:$mask, ixlenimm:$sextshamt, - ixlenimm:$ordering), []> { + (ins GPR:$addr, GPR:$incr, GPR:$mask, GPR:$sextshamt, + ordering:$ordering), []> { let Constraints = "@earlyclobber $res,@earlyclobber $scratch1," "@earlyclobber $scratch2"; let mayLoad = 1; @@ -273,7 +281,7 @@ class PseudoMaskedAMOMinMax class PseudoMaskedAMOUMinUMax : Pseudo<(outs GPR:$res, GPR:$scratch1, GPR:$scratch2), - (ins GPR:$addr, GPR:$incr, GPR:$mask, ixlenimm:$ordering), []> { + (ins GPR:$addr, GPR:$incr, GPR:$mask, ordering:$ordering), []> { let Constraints = "@earlyclobber $res,@earlyclobber $scratch1," "@earlyclobber $scratch2"; let mayLoad = 1; @@ -419,7 +427,7 @@ defm : PseudoAMOPat<"atomic_load_nand_i64", PseudoAtomicLoadNand64, i64>; class PseudoCmpXchg : Pseudo<(outs GPR:$res, GPR:$scratch), - (ins GPR:$addr, GPR:$cmpval, GPR:$newval, ixlenimm:$ordering), []> { + (ins GPR:$addr, GPR:$cmpval, GPR:$newval, ordering:$ordering), []> { let Constraints = "@earlyclobber $res,@earlyclobber $scratch"; let mayLoad = 1; let mayStore = 1; @@ -457,7 +465,7 @@ let Predicates = [HasStdExtZalrsc] in { def PseudoMaskedCmpXchg32 : Pseudo<(outs GPR:$res, GPR:$scratch), (ins GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, - ixlenimm:$ordering), []> { + ordering:$ordering), []> { let Constraints = "@earlyclobber $res,@earlyclobber $scratch"; let mayLoad = 1; let mayStore = 1; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td index bba9f961b9639..da4a3a6022337 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td @@ -437,7 +437,7 @@ class RVPTernary_rrr f, bits<2> w, bits<3> funct3, string opcodestr> let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in class RVPWideningTernary_rrr f, bits<2> w, string opcodestr> : RVPWideningBase { + (ins GPRPairRV32:$rd, GPR:$rs1, GPR:$rs2), opcodestr> { let Inst{30-27} = f; let Constraints = "$rd = $rd_wb"; @@ -1513,26 +1513,55 @@ let Predicates = [HasStdExtP] in { def: Pat<(XLenVecI16VT (abds GPR:$rs1, GPR:$rs2)), (PABD_H GPR:$rs1, GPR:$rs2)>; def: Pat<(XLenVecI16VT (abdu GPR:$rs1, GPR:$rs2)), (PABDU_H GPR:$rs1, GPR:$rs2)>; - // 8-bit logical shift left patterns + // 8-bit logical shift left/right patterns def: Pat<(XLenVecI8VT (shl GPR:$rs1, (XLenVecI8VT (splat_vector uimm3:$shamt)))), (PSLLI_B GPR:$rs1, uimm3:$shamt)>; + def: Pat<(XLenVecI8VT (srl GPR:$rs1, (XLenVecI8VT (splat_vector uimm3:$shamt)))), + (PSRLI_B GPR:$rs1, uimm3:$shamt)>; - // 16-bit logical shift left patterns + // 16-bit logical shift left/right patterns def: Pat<(XLenVecI16VT (shl GPR:$rs1, (XLenVecI16VT (splat_vector uimm4:$shamt)))), (PSLLI_H GPR:$rs1, uimm4:$shamt)>; + def: Pat<(XLenVecI16VT (srl GPR:$rs1, (XLenVecI16VT (splat_vector uimm4:$shamt)))), + (PSRLI_H GPR:$rs1, uimm4:$shamt)>; + + // 8-bit arithmetic shift right patterns + def: Pat<(XLenVecI8VT (sra GPR:$rs1, (XLenVecI8VT (splat_vector uimm3:$shamt)))), + (PSRAI_B GPR:$rs1, uimm3:$shamt)>; + + // 16-bit arithmetic shift right patterns + def: Pat<(XLenVecI16VT (sra GPR:$rs1, (XLenVecI16VT (splat_vector uimm4:$shamt)))), + (PSRAI_H GPR:$rs1, uimm4:$shamt)>; // 16-bit signed saturation shift left patterns def: Pat<(XLenVecI16VT (sshlsat GPR:$rs1, (XLenVecI16VT (splat_vector uimm4:$shamt)))), (PSSLAI_H GPR:$rs1, uimm4:$shamt)>; - // 8-bit logical shift left + // 8-bit logical shift left/right def: Pat<(XLenVecI8VT (shl GPR:$rs1, (XLenVecI8VT (splat_vector (XLenVT GPR:$rs2))))), (PSLL_BS GPR:$rs1, GPR:$rs2)>; - // 16-bit logical shift left + def: Pat<(XLenVecI8VT (srl GPR:$rs1, + (XLenVecI8VT (splat_vector (XLenVT GPR:$rs2))))), + (PSRL_BS GPR:$rs1, GPR:$rs2)>; + + // 8-bit arithmetic shift left/right + def: Pat<(XLenVecI8VT (sra GPR:$rs1, + (XLenVecI8VT (splat_vector (XLenVT GPR:$rs2))))), + (PSRA_BS GPR:$rs1, GPR:$rs2)>; + + // 16-bit logical shift left/right def: Pat<(XLenVecI16VT (shl GPR:$rs1, (XLenVecI16VT (splat_vector (XLenVT GPR:$rs2))))), (PSLL_HS GPR:$rs1, GPR:$rs2)>; + def: Pat<(XLenVecI16VT (srl GPR:$rs1, + (XLenVecI16VT (splat_vector (XLenVT GPR:$rs2))))), + (PSRL_HS GPR:$rs1, GPR:$rs2)>; + + // 16-bit arithmetic shift left/right + def: Pat<(XLenVecI16VT (sra GPR:$rs1, + (XLenVecI16VT (splat_vector (XLenVT GPR:$rs2))))), + (PSRA_HS GPR:$rs1, GPR:$rs2)>; // 8-bit PLI SD node pattern def: Pat<(XLenVecI8VT (splat_vector simm8_unsigned:$imm8)), (PLI_B simm8_unsigned:$imm8)>; @@ -1580,16 +1609,28 @@ let Predicates = [HasStdExtP, IsRV64] in { def: Pat<(v2i32 (riscv_pasub GPR:$rs1, GPR:$rs2)), (PASUB_W GPR:$rs1, GPR:$rs2)>; def: Pat<(v2i32 (riscv_pasubu GPR:$rs1, GPR:$rs2)), (PASUBU_W GPR:$rs1, GPR:$rs2)>; - // 32-bit logical shift left + // 32-bit logical shift left/right def: Pat<(v2i32 (shl GPR:$rs1, (v2i32 (splat_vector (XLenVT GPR:$rs2))))), (PSLL_WS GPR:$rs1, GPR:$rs2)>; + def: Pat<(v2i32 (srl GPR:$rs1, (v2i32 (splat_vector (XLenVT GPR:$rs2))))), + (PSRL_WS GPR:$rs1, GPR:$rs2)>; + + // 32-bit arithmetic shift left/right + def: Pat<(v2i32 (sra GPR:$rs1, (v2i32 (splat_vector (XLenVT GPR:$rs2))))), + (PSRA_WS GPR:$rs1, GPR:$rs2)>; // splat pattern def: Pat<(v2i32 (splat_vector (XLenVT GPR:$rs2))), (PADD_WS (XLenVT X0), GPR:$rs2)>; - // 32-bit logical shift left patterns + // 32-bit logical shift left/right patterns def: Pat<(v2i32 (shl GPR:$rs1, (v2i32 (splat_vector uimm5:$shamt)))), (PSLLI_W GPR:$rs1, uimm5:$shamt)>; + def: Pat<(v2i32 (srl GPR:$rs1, (v2i32 (splat_vector uimm5:$shamt)))), + (PSRLI_W GPR:$rs1, uimm5:$shamt)>; + + // 32-bit arithmetic shift left/right patterns + def: Pat<(v2i32 (sra GPR:$rs1, (v2i32 (splat_vector uimm5:$shamt)))), + (PSRAI_W GPR:$rs1, uimm5:$shamt)>; // 32-bit signed saturation shift left patterns def: Pat<(v2i32 (sshlsat GPR:$rs1, (v2i32 (splat_vector uimm5:$shamt)))), diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td index 594a75a4746d4..9354b63bced53 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td @@ -1840,3 +1840,6 @@ let Predicates = [HasVInstructionsI64, IsRV64] in { include "RISCVInstrInfoVPseudos.td" include "RISCVInstrInfoZvfbf.td" +// Include the non-intrinsic ISel patterns +include "RISCVInstrInfoVVLPatterns.td" +include "RISCVInstrInfoVSDPatterns.td" diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td index e36204c536c0d..cdbeb0c1046d2 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -473,17 +473,27 @@ defset list AllWidenableIntVectors = { def : VTypeInfoToWide; } -defset list AllWidenableFloatVectors = { - def : VTypeInfoToWide; - def : VTypeInfoToWide; - def : VTypeInfoToWide; - def : VTypeInfoToWide; - def : VTypeInfoToWide; +defset list AllWidenableFloatAndBF16Vectors = { + defset list AllWidenableFloatVectors = { + def : VTypeInfoToWide; + def : VTypeInfoToWide; + def : VTypeInfoToWide; + def : VTypeInfoToWide; + def : VTypeInfoToWide; - def : VTypeInfoToWide; - def : VTypeInfoToWide; - def : VTypeInfoToWide; - def : VTypeInfoToWide; + def : VTypeInfoToWide; + def : VTypeInfoToWide; + def : VTypeInfoToWide; + def : VTypeInfoToWide; + } + + defset list AllWidenableBF16ToFloatVectors = { + def : VTypeInfoToWide; + def : VTypeInfoToWide; + def : VTypeInfoToWide; + def : VTypeInfoToWide; + def : VTypeInfoToWide; + } } defset list AllFractionableVF2IntVectors = { @@ -543,14 +553,6 @@ defset list AllWidenableIntToFloatVectors = { def : VTypeInfoToWide; } -defset list AllWidenableBF16ToFloatVectors = { - def : VTypeInfoToWide; - def : VTypeInfoToWide; - def : VTypeInfoToWide; - def : VTypeInfoToWide; - def : VTypeInfoToWide; -} - // This class holds the record of the RISCVVPseudoTable below. // This represents the information we need in codegen for each pseudo. // The definition should be consistent with `struct PseudoInfo` in @@ -780,7 +782,7 @@ class GetVRegNoV0 { class GetVTypePredicates { list Predicates = !cond(!eq(vti.Scalar, f16) : [HasVInstructionsF16], - !eq(vti.Scalar, bf16) : [HasVInstructionsBF16Minimal], + !eq(vti.Scalar, bf16) : [HasVInstructionsBF16], !eq(vti.Scalar, f32) : [HasVInstructionsAnyF], !eq(vti.Scalar, f64) : [HasVInstructionsF64], !eq(vti.SEW, 64) : [HasVInstructionsI64], @@ -7326,7 +7328,3 @@ defm : VPatBinaryV_VV_INT_EEW<"int_riscv_vrgatherei16_vv", "PseudoVRGATHEREI16", // 16.5. Vector Compress Instruction //===----------------------------------------------------------------------===// defm : VPatUnaryV_V_AnyMask<"int_riscv_vcompress", "PseudoVCOMPRESS", AllVectors>; - -// Include the non-intrinsic ISel patterns -include "RISCVInstrInfoVVLPatterns.td" -include "RISCVInstrInfoVSDPatterns.td" diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td index a67112b9981b8..14ad7ca0eb35a 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td @@ -215,13 +215,17 @@ multiclass VPatBinaryFPSDNode_VV_VF { - foreach vti = !if(isBF16, AllBF16Vectors, AllFloatVectors) in { + list vtilist = AllFloatVectors, + bit isSEWAware = 0> { + foreach vti = vtilist in { let Predicates = GetVTypePredicates.Predicates in { - def : VPatBinarySDNode_VV_RM; - def : VPatBinarySDNode_VF_RM; @@ -246,14 +250,17 @@ multiclass VPatBinaryFPSDNode_R_VF { - foreach fvti = !if(isBF16, AllBF16Vectors, AllFloatVectors) in + list vtilist = AllFloatVectors, + bit isSEWAware = 0> { + foreach fvti = vtilist in let Predicates = GetVTypePredicates.Predicates in def : Pat<(fvti.Vector (vop (fvti.Vector (SplatFPOp fvti.Scalar:$rs2)), (fvti.Vector fvti.RegClass:$rs1))), (!cast( !if(isSEWAware, - instruction_name#"_V"#fvti.ScalarSuffix#"_"#fvti.LMul.MX#"_E"#fvti.SEW, + instruction_name# + !if(!eq(fvti.Scalar, bf16), "_ALT", "")# + "_V"#fvti.ScalarSuffix#"_"#fvti.LMul.MX#"_E"#fvti.SEW, instruction_name#"_V"#fvti.ScalarSuffix#"_"#fvti.LMul.MX)) (fvti.Vector (IMPLICIT_DEF)), fvti.RegClass:$rs1, @@ -664,11 +671,10 @@ multiclass VPatWidenFPMulAccSDNode_VV_VF_RM.Predicates, - GetVTypePredicates.Predicates, + let Predicates = !listconcat(GetVTypePredicates.Predicates, !if(!eq(vti.Scalar, bf16), [HasStdExtZvfbfwma], - [])) in { + GetVTypePredicates.Predicates)) in { def : Pat<(fma (wti.Vector (riscv_fpextend_vl_sameuser (vti.Vector vti.RegClass:$rs1), (vti.Mask true_mask), (XLenVT srcvalue))), @@ -676,7 +682,9 @@ multiclass VPatWidenFPMulAccSDNode_VV_VF_RM(instruction_name#"_VV_"#suffix) + (!cast(instruction_name# + !if(!eq(vti.Scalar, bf16), "BF16", "")# + "_VV_"#suffix) wti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2, // Value to indicate no rounding mode change in // RISCVInsertReadWriteCSR @@ -688,7 +696,9 @@ multiclass VPatWidenFPMulAccSDNode_VV_VF_RM(instruction_name#"_V"#vti.ScalarSuffix#"_"#suffix) + (!cast(instruction_name# + !if(!eq(vti.Scalar, bf16), "BF16", "")# + "_V"#vti.ScalarSuffix#"_"#suffix) wti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, // Value to indicate no rounding mode change in // RISCVInsertReadWriteCSR @@ -1201,16 +1211,20 @@ foreach mti = AllMasks in { // 13. Vector Floating-Point Instructions // 13.2. Vector Single-Width Floating-Point Add/Subtract Instructions -defm : VPatBinaryFPSDNode_VV_VF_RM; -defm : VPatBinaryFPSDNode_VV_VF_RM; -defm : VPatBinaryFPSDNode_R_VF_RM; +defm : VPatBinaryFPSDNode_VV_VF_RM; +defm : VPatBinaryFPSDNode_VV_VF_RM; +defm : VPatBinaryFPSDNode_R_VF_RM; // 13.3. Vector Widening Floating-Point Add/Subtract Instructions defm : VPatWidenBinaryFPSDNode_VV_VF_WV_WF_RM; defm : VPatWidenBinaryFPSDNode_VV_VF_WV_WF_RM; // 13.4. Vector Single-Width Floating-Point Multiply/Divide Instructions -defm : VPatBinaryFPSDNode_VV_VF_RM; +defm : VPatBinaryFPSDNode_VV_VF_RM; defm : VPatBinaryFPSDNode_VV_VF_RM; defm : VPatBinaryFPSDNode_R_VF_RM; @@ -1314,14 +1328,15 @@ foreach fvti = AllFloatVectors in { // 13.7. Vector Widening Floating-Point Fused Multiply-Add Instructions defm : VPatWidenFPMulAccSDNode_VV_VF_RM<"PseudoVFWMACC", - AllWidenableFloatVectors>; + AllWidenableFloatAndBF16Vectors>; defm : VPatWidenFPNegMulAccSDNode_VV_VF_RM<"PseudoVFWNMACC">; defm : VPatWidenFPMulSacSDNode_VV_VF_RM<"PseudoVFWMSAC">; defm : VPatWidenFPNegMulSacSDNode_VV_VF_RM<"PseudoVFWNMSAC">; -foreach vti = AllFloatVectors in { +foreach vti = AllFloatAndBF16Vectors in { let Predicates = GetVTypePredicates.Predicates in { // 13.8. Vector Floating-Point Square-Root Instruction + if !ne(vti.Scalar, bf16) then def : Pat<(any_fsqrt (vti.Vector vti.RegClass:$rs2)), (!cast("PseudoVFSQRT_V_"# vti.LMul.MX#"_E"#vti.SEW) (vti.Vector (IMPLICIT_DEF)), @@ -1333,34 +1348,46 @@ foreach vti = AllFloatVectors in { // 13.12. Vector Floating-Point Sign-Injection Instructions def : Pat<(fabs (vti.Vector vti.RegClass:$rs)), - (!cast("PseudoVFSGNJX_VV_"# vti.LMul.MX#"_E"#vti.SEW) + (!cast("PseudoVFSGNJX"# + !if(!eq(vti.Scalar, bf16), "_ALT", "")# + "_VV_"# vti.LMul.MX#"_E"#vti.SEW) (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs, vti.RegClass:$rs, vti.AVL, vti.Log2SEW, TA_MA)>; // Handle fneg with VFSGNJN using the same input for both operands. def : Pat<(fneg (vti.Vector vti.RegClass:$rs)), - (!cast("PseudoVFSGNJN_VV_"# vti.LMul.MX#"_E"#vti.SEW) + (!cast("PseudoVFSGNJN"# + !if(!eq(vti.Scalar, bf16), "_ALT", "")# + "_VV_"# vti.LMul.MX#"_E"#vti.SEW) (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs, vti.RegClass:$rs, vti.AVL, vti.Log2SEW, TA_MA)>; def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1), (vti.Vector vti.RegClass:$rs2))), - (!cast("PseudoVFSGNJ_VV_"# vti.LMul.MX#"_E"#vti.SEW) + (!cast("PseudoVFSGNJ"# + !if(!eq(vti.Scalar, bf16), "_ALT", "")# + "_VV_"# vti.LMul.MX#"_E"#vti.SEW) (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, vti.RegClass:$rs2, vti.AVL, vti.Log2SEW, TA_MA)>; def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1), (vti.Vector (SplatFPOp vti.ScalarRegClass:$rs2)))), - (!cast("PseudoVFSGNJ_V"#vti.ScalarSuffix#"_"#vti.LMul.MX#"_E"#vti.SEW) + (!cast("PseudoVFSGNJ"# + !if(!eq(vti.Scalar, bf16), "_ALT", "")# + "_V"#vti.ScalarSuffix#"_"#vti.LMul.MX#"_E"#vti.SEW) (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, vti.ScalarRegClass:$rs2, vti.AVL, vti.Log2SEW, TA_MA)>; def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1), (vti.Vector (fneg vti.RegClass:$rs2)))), - (!cast("PseudoVFSGNJN_VV_"# vti.LMul.MX#"_E"#vti.SEW) + (!cast("PseudoVFSGNJN"# + !if(!eq(vti.Scalar, bf16), "_ALT", "")# + "_VV_"# vti.LMul.MX#"_E"#vti.SEW) (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, vti.RegClass:$rs2, vti.AVL, vti.Log2SEW, TA_MA)>; def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1), (vti.Vector (fneg (SplatFPOp vti.ScalarRegClass:$rs2))))), - (!cast("PseudoVFSGNJN_V"#vti.ScalarSuffix#"_"#vti.LMul.MX#"_E"#vti.SEW) + (!cast("PseudoVFSGNJN"# + !if(!eq(vti.Scalar, bf16), "_ALT", "")# + "_V"#vti.ScalarSuffix#"_"#vti.LMul.MX#"_E"#vti.SEW) (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, vti.ScalarRegClass:$rs2, vti.AVL, vti.Log2SEW, TA_MA)>; } @@ -1446,13 +1473,28 @@ defm : VPatNConvertFP2ISDNode_W; defm : VPatNConvertFP2ISDNode_W; defm : VPatNConvertI2FPSDNode_W_RM; defm : VPatNConvertI2FPSDNode_W_RM; -foreach fvtiToFWti = AllWidenableFloatVectors in { +foreach fvtiToFWti = AllWidenableFloatAndBF16Vectors in { defvar fvti = fvtiToFWti.Vti; defvar fwti = fvtiToFWti.Wti; - let Predicates = !listconcat(GetVTypeMinimalPredicates.Predicates, - GetVTypeMinimalPredicates.Predicates) in + let Predicates = !listconcat(GetVTypeMinimalPredicates.Predicates, + !if(!eq(fvti.Scalar, bf16), + [HasStdExtZvfbfmin], + GetVTypeMinimalPredicates.Predicates)) in + def : Pat<(fvti.Vector (fpround (fwti.Vector fwti.RegClass:$rs1))), + (!cast("PseudoVFNCVT"# + !if(!eq(fvti.Scalar, bf16), "BF16", "")# + "_F_F_W_"#fvti.LMul.MX#"_E"#fvti.SEW) + (fvti.Vector (IMPLICIT_DEF)), + fwti.RegClass:$rs1, + // Value to indicate no rounding mode change in + // RISCVInsertReadWriteCSR + FRM_DYN, + fvti.AVL, fvti.Log2SEW, TA_MA)>; + // Define vfncvt.f.f.w for bf16 when Zvfbfa is enabled. + if !eq(fvti.Scalar, bf16) then + let Predicates = [HasVInstructionsBF16] in def : Pat<(fvti.Vector (fpround (fwti.Vector fwti.RegClass:$rs1))), - (!cast("PseudoVFNCVT_F_F_W_"#fvti.LMul.MX#"_E"#fvti.SEW) + (!cast("PseudoVFNCVT_F_F_ALT_W_"#fvti.LMul.MX#"_E"#fvti.SEW) (fvti.Vector (IMPLICIT_DEF)), fwti.RegClass:$rs1, // Value to indicate no rounding mode change in @@ -1464,10 +1506,10 @@ foreach fvtiToFWti = AllWidenableFloatVectors in { //===----------------------------------------------------------------------===// // Vector Element Extracts //===----------------------------------------------------------------------===// -foreach vti = NoGroupFloatVectors in { - defvar vfmv_f_s_inst = !cast(!strconcat("PseudoVFMV_", - vti.ScalarSuffix, - "_S")); +foreach vti = !listconcat(NoGroupFloatVectors, NoGroupBF16Vectors) in { + defvar vfmv_f_s_inst = + !cast(!strconcat("PseudoVFMV_", vti.ScalarSuffix, + "_S", !if(!eq(vti.Scalar, bf16), "_ALT", ""))); // Only pattern-match extract-element operations where the index is 0. Any // other index will have been custom-lowered to slide the vector correctly // into place. diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td index 38edab5400291..9273ce094eb0a 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td @@ -1058,14 +1058,18 @@ multiclass VPatBinaryFPVL_VV_VF { - foreach vti = !if(isBF16, AllBF16Vectors, AllFloatVectors) in { + list vtilist = AllFloatVectors, + bit isSEWAware = 0> { + foreach vti = vtilist in { let Predicates = GetVTypePredicates.Predicates in { - def : VPatBinaryVL_V_RM; - def : VPatBinaryVL_VF_RM; @@ -1093,8 +1097,9 @@ multiclass VPatBinaryFPVL_R_VF { - foreach fvti = !if(isBF16, AllBF16Vectors, AllFloatVectors) in { + list vtilist = AllFloatVectors, + bit isSEWAware = 0> { + foreach fvti = vtilist in { let Predicates = GetVTypePredicates.Predicates in def : Pat<(fvti.Vector (vop (SplatFPOp fvti.ScalarRegClass:$rs2), fvti.RegClass:$rs1, @@ -1103,7 +1108,9 @@ multiclass VPatBinaryFPVL_R_VF_RM( !if(isSEWAware, - instruction_name#"_V"#fvti.ScalarSuffix#"_"#fvti.LMul.MX#"_E"#fvti.SEW#"_MASK", + instruction_name# + !if(!eq(fvti.Scalar, bf16), "_ALT", "")# + "_V"#fvti.ScalarSuffix#"_"#fvti.LMul.MX#"_E"#fvti.SEW#"_MASK", instruction_name#"_V"#fvti.ScalarSuffix#"_"#fvti.LMul.MX#"_MASK")) fvti.RegClass:$passthru, fvti.RegClass:$rs1, fvti.ScalarRegClass:$rs2, @@ -1832,16 +1839,17 @@ multiclass VPatWidenFPMulAccVL_VV_VF_RM.Predicates, - GetVTypePredicates.Predicates, + let Predicates = !listconcat(GetVTypePredicates.Predicates, !if(!eq(vti.Scalar, bf16), [HasStdExtZvfbfwma], - [])) in { + GetVTypePredicates.Predicates)) in { def : Pat<(vop (vti.Vector vti.RegClass:$rs1), (vti.Vector vti.RegClass:$rs2), (wti.Vector wti.RegClass:$rd), (vti.Mask VMV0:$vm), VLOpFrag), - (!cast(instruction_name#"_VV_"#suffix#"_MASK") + (!cast(instruction_name# + !if(!eq(vti.Scalar, bf16), "BF16", "")# + "_VV_"#suffix#"_MASK") wti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2, (vti.Mask VMV0:$vm), // Value to indicate no rounding mode change in @@ -1852,7 +1860,9 @@ multiclass VPatWidenFPMulAccVL_VV_VF_RM(instruction_name#"_V"#vti.ScalarSuffix#"_"#suffix#"_MASK") + (!cast(instruction_name# + !if(!eq(vti.Scalar, bf16), "BF16", "")# + "_V"#vti.ScalarSuffix#"_"#suffix#"_MASK") wti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, (vti.Mask VMV0:$vm), // Value to indicate no rounding mode change in @@ -2296,9 +2306,12 @@ foreach vtiTowti = AllWidenableIntVectors in { // 13. Vector Floating-Point Instructions // 13.2. Vector Single-Width Floating-Point Add/Subtract Instructions -defm : VPatBinaryFPVL_VV_VF_RM; -defm : VPatBinaryFPVL_VV_VF_RM; -defm : VPatBinaryFPVL_R_VF_RM; +defm : VPatBinaryFPVL_VV_VF_RM; +defm : VPatBinaryFPVL_VV_VF_RM; +defm : VPatBinaryFPVL_R_VF_RM; // 13.3. Vector Widening Floating-Point Add/Subtract Instructions defm : VPatBinaryFPWVL_VV_VF_WV_WF_RM; // 13.4. Vector Single-Width Floating-Point Multiply/Divide Instructions -defm : VPatBinaryFPVL_VV_VF_RM; +defm : VPatBinaryFPVL_VV_VF_RM; defm : VPatBinaryFPVL_VV_VF_RM; defm : VPatBinaryFPVL_R_VF_RM; @@ -2321,7 +2335,8 @@ defm : VPatFPMulAddVL_VV_VF_RM; defm : VPatFPMulAddVL_VV_VF_RM; // 13.7. Vector Widening Floating-Point Fused Multiply-Add Instructions -defm : VPatWidenFPMulAccVL_VV_VF_RM; +defm : VPatWidenFPMulAccVL_VV_VF_RM; defm : VPatWidenFPMulAccVL_VV_VF_RM; defm : VPatWidenFPMulAccVL_VV_VF_RM; defm : VPatWidenFPMulAccVL_VV_VF_RM; @@ -2423,6 +2438,66 @@ foreach vti = AllFloatVectors in { } } +foreach vti = AllBF16Vectors in { + let Predicates = GetVTypePredicates.Predicates in { + // 13.12. Vector Floating-Point Sign-Injection Instructions + def : Pat<(riscv_fabs_vl (vti.Vector vti.RegClass:$rs), (vti.Mask VMV0:$vm), + VLOpFrag), + (!cast("PseudoVFSGNJX"# + !if(!eq(vti.Scalar, bf16), "_ALT", "")# + "_VV_"# vti.LMul.MX #"_E"#vti.SEW#"_MASK") + (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs, + vti.RegClass:$rs, (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, + TA_MA)>; + // Handle fneg with VFSGNJN using the same input for both operands. + def : Pat<(riscv_fneg_vl (vti.Vector vti.RegClass:$rs), (vti.Mask VMV0:$vm), + VLOpFrag), + (!cast("PseudoVFSGNJN"# + !if(!eq(vti.Scalar, bf16), "_ALT", "")# + "_VV_"# vti.LMul.MX#"_E"#vti.SEW #"_MASK") + (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs, + vti.RegClass:$rs, (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, + TA_MA)>; + + def : Pat<(riscv_fcopysign_vl (vti.Vector vti.RegClass:$rs1), + (vti.Vector vti.RegClass:$rs2), + vti.RegClass:$passthru, + (vti.Mask VMV0:$vm), + VLOpFrag), + (!cast("PseudoVFSGNJ"# + !if(!eq(vti.Scalar, bf16), "_ALT", "")# + "_VV_"# vti.LMul.MX#"_E"#vti.SEW#"_MASK") + vti.RegClass:$passthru, vti.RegClass:$rs1, + vti.RegClass:$rs2, (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, + TAIL_AGNOSTIC)>; + + def : Pat<(riscv_fcopysign_vl (vti.Vector vti.RegClass:$rs1), + (riscv_fneg_vl vti.RegClass:$rs2, + (vti.Mask true_mask), + VLOpFrag), + srcvalue, + (vti.Mask true_mask), + VLOpFrag), + (!cast("PseudoVFSGNJN"# + !if(!eq(vti.Scalar, bf16), "_ALT", "")# + "_VV_"# vti.LMul.MX#"_E"#vti.SEW) + (vti.Vector (IMPLICIT_DEF)), + vti.RegClass:$rs1, vti.RegClass:$rs2, GPR:$vl, vti.Log2SEW, TA_MA)>; + + def : Pat<(riscv_fcopysign_vl (vti.Vector vti.RegClass:$rs1), + (SplatFPOp vti.ScalarRegClass:$rs2), + vti.RegClass:$passthru, + (vti.Mask VMV0:$vm), + VLOpFrag), + (!cast("PseudoVFSGNJ"# + !if(!eq(vti.Scalar, bf16), "_ALT", "")# + "_V"#vti.ScalarSuffix#"_"# vti.LMul.MX#"_E"#vti.SEW#"_MASK") + vti.RegClass:$passthru, vti.RegClass:$rs1, + vti.ScalarRegClass:$rs2, (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, + TAIL_AGNOSTIC)>; + } +} + // Floating-point vselects: // 11.15. Vector Integer Merge Instructions // 13.15. Vector Floating-Point Merge Instruction @@ -2476,7 +2551,7 @@ foreach fvti = AllFloatVectors in { } } -foreach fvti = AllFloatVectors in { +foreach fvti = AllFloatAndBF16Vectors in { defvar ivti = GetIntVTypeInfo.Vti; let Predicates = GetVTypePredicates.Predicates in { // 13.16. Vector Floating-Point Move Instruction @@ -2492,11 +2567,13 @@ foreach fvti = AllFloatVectors in { } } -foreach fvti = AllFloatVectors in { +foreach fvti = AllFloatAndBF16Vectors in { let Predicates = GetVTypePredicates.Predicates in { def : Pat<(fvti.Vector (riscv_vfmv_v_f_vl fvti.Vector:$passthru, (fvti.Scalar fvti.ScalarRegClass:$rs2), VLOpFrag)), - (!cast("PseudoVFMV_V_" # fvti.ScalarSuffix # "_" # + (!cast("PseudoVFMV_V" # + !if(!eq(fvti.Scalar, bf16), "_ALT_", "_") # + fvti.ScalarSuffix # "_" # fvti.LMul.MX) $passthru, (fvti.Scalar fvti.ScalarRegClass:$rs2), GPR:$vl, fvti.Log2SEW, TU_MU)>; @@ -2526,20 +2603,37 @@ defm : VPatWConvertFP2IVL_V; defm : VPatWConvertI2FPVL_V; -foreach fvtiToFWti = AllWidenableFloatVectors in { +foreach fvtiToFWti = AllWidenableFloatAndBF16Vectors in { defvar fvti = fvtiToFWti.Vti; defvar fwti = fvtiToFWti.Wti; - // Define vfwcvt.f.f.v for f16 when Zvfhmin is enable. - let Predicates = !listconcat(GetVTypeMinimalPredicates.Predicates, - GetVTypeMinimalPredicates.Predicates) in + // Define vfwcvt.f.f.v for f16 when Zvfhmin is enabled. + // Define vfwcvtbf16.f.f.v for bf16 when Zvfbfmin is enabled. + let Predicates = !listconcat(GetVTypeMinimalPredicates.Predicates, + !if(!eq(fvti.Scalar, bf16), + [HasStdExtZvfbfmin], + GetVTypeMinimalPredicates.Predicates)) in { def : Pat<(fwti.Vector (any_riscv_fpextend_vl (fvti.Vector fvti.RegClass:$rs1), (fvti.Mask VMV0:$vm), VLOpFrag)), - (!cast("PseudoVFWCVT_F_F_V_"#fvti.LMul.MX#"_E"#fvti.SEW#"_MASK") + (!cast("PseudoVFWCVT"# + !if(!eq(fvti.Scalar, bf16), "BF16", "")# + "_F_F_V_"#fvti.LMul.MX#"_E"#fvti.SEW#"_MASK") (fwti.Vector (IMPLICIT_DEF)), fvti.RegClass:$rs1, (fvti.Mask VMV0:$vm), GPR:$vl, fvti.Log2SEW, TA_MA)>; + + // Define vfwcvt.f.f.v for bf16 when Zvfbfa is enabled. + if !eq(fvti.Scalar, bf16) then + let Predicates = [HasVInstructionsBF16] in + def : Pat<(fwti.Vector (any_riscv_fpextend_vl + (fvti.Vector fvti.RegClass:$rs1), + (fvti.Mask VMV0:$vm), + VLOpFrag)), + (!cast("PseudoVFWCVT_F_F_ALT_V_"#fvti.LMul.MX#"_E"#fvti.SEW#"_MASK") + (fwti.Vector (IMPLICIT_DEF)), fvti.RegClass:$rs1, + (fvti.Mask VMV0:$vm), + GPR:$vl, fvti.Log2SEW, TA_MA)>; } // 13.19 Narrowing Floating-Point/Integer Type-Convert Instructions @@ -2555,16 +2649,21 @@ defm : VPatNConvertI2FPVL_W_RM; defm : VPatNConvertI2FP_RM_VL_W; defm : VPatNConvertI2FP_RM_VL_W; -foreach fvtiToFWti = AllWidenableFloatVectors in { +foreach fvtiToFWti = AllWidenableFloatAndBF16Vectors in { defvar fvti = fvtiToFWti.Vti; defvar fwti = fvtiToFWti.Wti; - // Define vfncvt.f.f.w for f16 when Zvfhmin is enable. - let Predicates = !listconcat(GetVTypeMinimalPredicates.Predicates, - GetVTypeMinimalPredicates.Predicates) in { + // Define vfncvt.f.f.w for f16 when Zvfhmin is enabled. + // Define vfncvtbf16.f.f.w for bf16 when Zvfbfmin is enabled. + let Predicates = !listconcat(GetVTypeMinimalPredicates.Predicates, + !if(!eq(fvti.Scalar, bf16), + [HasStdExtZvfbfmin], + GetVTypeMinimalPredicates.Predicates)) in def : Pat<(fvti.Vector (any_riscv_fpround_vl (fwti.Vector fwti.RegClass:$rs1), (fwti.Mask VMV0:$vm), VLOpFrag)), - (!cast("PseudoVFNCVT_F_F_W_"#fvti.LMul.MX#"_E"#fvti.SEW#"_MASK") + (!cast("PseudoVFNCVT"# + !if(!eq(fvti.Scalar, bf16), "BF16", "")# + "_F_F_W_"#fvti.LMul.MX#"_E"#fvti.SEW#"_MASK") (fvti.Vector (IMPLICIT_DEF)), fwti.RegClass:$rs1, (fwti.Mask VMV0:$vm), // Value to indicate no rounding mode change in @@ -2581,6 +2680,20 @@ foreach fvtiToFWti = AllWidenableFloatVectors in { (fvti.Vector (IMPLICIT_DEF)), fwti.RegClass:$rs1, (fwti.Mask VMV0:$vm), GPR:$vl, fvti.Log2SEW, TA_MA)>; } + + // Define vfncvt.f.f.w for bf16 when Zvfbfa is enabled. + if !eq(fvti.Scalar, bf16) then + let Predicates = [HasVInstructionsBF16] in + def : Pat<(fvti.Vector (any_riscv_fpround_vl + (fwti.Vector fwti.RegClass:$rs1), + (fwti.Mask VMV0:$vm), VLOpFrag)), + (!cast("PseudoVFNCVT_F_F_ALT_W_"#fvti.LMul.MX#"_E"#fvti.SEW#"_MASK") + (fvti.Vector (IMPLICIT_DEF)), fwti.RegClass:$rs1, + (fwti.Mask VMV0:$vm), + // Value to indicate no rounding mode change in + // RISCVInsertReadWriteCSR + FRM_DYN, + GPR:$vl, fvti.Log2SEW, TA_MA)>; } // 14. Vector Reduction Operations @@ -2751,7 +2864,7 @@ foreach vti = AllIntegerVectors in { } // 16.2. Floating-Point Scalar Move Instructions -foreach vti = NoGroupFloatVectors in { +foreach vti = !listconcat(NoGroupFloatVectors, NoGroupBF16Vectors) in { let Predicates = GetVTypePredicates.Predicates in { def : Pat<(vti.Vector (riscv_vfmv_s_f_vl (vti.Vector vti.RegClass:$passthru), (vti.Scalar (fpimm0)), @@ -2764,7 +2877,8 @@ foreach vti = NoGroupFloatVectors in { def : Pat<(vti.Vector (riscv_vfmv_s_f_vl (vti.Vector vti.RegClass:$passthru), vti.ScalarRegClass:$rs1, VLOpFrag)), - (!cast("PseudoVFMV_S_"#vti.ScalarSuffix) + (!cast("PseudoVFMV_S_"#vti.ScalarSuffix# + !if(!eq(vti.Scalar, bf16), "_ALT", "")) vti.RegClass:$passthru, (vti.Scalar vti.ScalarRegClass:$rs1), GPR:$vl, vti.Log2SEW)>; } diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXRivos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXRivos.td index 3a6ce3ce1d469..39a7aeda94707 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoXRivos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXRivos.td @@ -156,7 +156,7 @@ foreach m = MxList in { let BaseInstr = RI_VEXTRACT in def PseudoRI_VEXTRACT_ # mx : RISCVVPseudo<(outs GPR:$rd), - (ins m.vrclass:$rs2, uimm5:$idx, ixlenimm:$sew), + (ins m.vrclass:$rs2, uimm5:$idx, sew:$sew), []>; let HasVLOp = 1, BaseInstr = RI_VINSERT, HasVecPolicyOp = 1, @@ -164,7 +164,7 @@ foreach m = MxList in { def PseudoRI_VINSERT_ # mx : RISCVVPseudo<(outs m.vrclass:$rd), (ins m.vrclass:$rs1, GPR:$rs2, uimm5:$idx, AVL:$vl, - ixlenimm:$sew, ixlenimm:$policy), + sew:$sew, vec_policy:$policy), []>; } } diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td index e24e4a33288f7..866e831fdcd94 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td @@ -406,47 +406,11 @@ let Predicates = [HasStdExtZvfbfmin] in { "PseudoVFWCVTBF16_F_F", isSEWAware=1>; defm : VPatConversionVF_WF_BF_RM<"int_riscv_vfncvtbf16_f_f_w", "PseudoVFNCVTBF16_F_F", isSEWAware=1>; - - foreach fvtiToFWti = AllWidenableBF16ToFloatVectors in { - defvar fvti = fvtiToFWti.Vti; - defvar fwti = fvtiToFWti.Wti; - def : Pat<(fwti.Vector (any_riscv_fpextend_vl - (fvti.Vector fvti.RegClass:$rs1), - (fvti.Mask VMV0:$vm), - VLOpFrag)), - (!cast("PseudoVFWCVTBF16_F_F_V_"#fvti.LMul.MX#"_E"#fvti.SEW#"_MASK") - (fwti.Vector (IMPLICIT_DEF)), fvti.RegClass:$rs1, - (fvti.Mask VMV0:$vm), - GPR:$vl, fvti.Log2SEW, TA_MA)>; - - def : Pat<(fvti.Vector (any_riscv_fpround_vl - (fwti.Vector fwti.RegClass:$rs1), - (fwti.Mask VMV0:$vm), VLOpFrag)), - (!cast("PseudoVFNCVTBF16_F_F_W_"#fvti.LMul.MX#"_E"#fvti.SEW#"_MASK") - (fvti.Vector (IMPLICIT_DEF)), fwti.RegClass:$rs1, - (fwti.Mask VMV0:$vm), - // Value to indicate no rounding mode change in - // RISCVInsertReadWriteCSR - FRM_DYN, - GPR:$vl, fvti.Log2SEW, TA_MA)>; - def : Pat<(fvti.Vector (fpround (fwti.Vector fwti.RegClass:$rs1))), - (!cast("PseudoVFNCVTBF16_F_F_W_"#fvti.LMul.MX#"_E"#fvti.SEW) - (fvti.Vector (IMPLICIT_DEF)), - fwti.RegClass:$rs1, - // Value to indicate no rounding mode change in - // RISCVInsertReadWriteCSR - FRM_DYN, - fvti.AVL, fvti.Log2SEW, TA_MA)>; - } } let Predicates = [HasStdExtZvfbfwma] in { defm : VPatTernaryW_VV_VX_RM<"int_riscv_vfwmaccbf16", "PseudoVFWMACCBF16", AllWidenableBF16ToFloatVectors, isSEWAware=1>; - defm : VPatWidenFPMulAccVL_VV_VF_RM; - defm : VPatWidenFPMulAccSDNode_VV_VF_RM<"PseudoVFWMACCBF16", - AllWidenableBF16ToFloatVectors>; } multiclass VPatConversionVI_VF_BF16 { @@ -614,191 +578,4 @@ defm : VPatConversionVF_WF_BF16<"int_riscv_vfncvt_rod_f_f_w", "PseudoVFNCVT_ROD_ isSEWAware=1>; defm : VPatBinaryV_VX<"int_riscv_vfslide1up", "PseudoVFSLIDE1UP_ALT", AllBF16Vectors>; defm : VPatBinaryV_VX<"int_riscv_vfslide1down", "PseudoVFSLIDE1DOWN_ALT", AllBF16Vectors>; - -foreach fvti = AllBF16Vectors in { - defvar ivti = GetIntVTypeInfo.Vti; - let Predicates = GetVTypePredicates.Predicates in { - // 13.16. Vector Floating-Point Move Instruction - // If we're splatting fpimm0, use vmv.v.x vd, x0. - def : Pat<(fvti.Vector (riscv_vfmv_v_f_vl - fvti.Vector:$passthru, (fvti.Scalar (fpimm0)), VLOpFrag)), - (!cast("PseudoVMV_V_I_"#fvti.LMul.MX) - $passthru, 0, GPR:$vl, fvti.Log2SEW, TU_MU)>; - def : Pat<(fvti.Vector (riscv_vfmv_v_f_vl - fvti.Vector:$passthru, (fvti.Scalar (SelectScalarFPAsInt (XLenVT GPR:$imm))), VLOpFrag)), - (!cast("PseudoVMV_V_X_"#fvti.LMul.MX) - $passthru, GPR:$imm, GPR:$vl, fvti.Log2SEW, TU_MU)>; - } - - let Predicates = GetVTypePredicates.Predicates in { - def : Pat<(fvti.Vector (riscv_vfmv_v_f_vl - fvti.Vector:$passthru, (fvti.Scalar fvti.ScalarRegClass:$rs2), VLOpFrag)), - (!cast("PseudoVFMV_V_ALT_" # fvti.ScalarSuffix # "_" # - fvti.LMul.MX) - $passthru, (fvti.Scalar fvti.ScalarRegClass:$rs2), - GPR:$vl, fvti.Log2SEW, TU_MU)>; - } -} - -foreach vti = NoGroupBF16Vectors in { - let Predicates = GetVTypePredicates.Predicates in { - def : Pat<(vti.Vector (riscv_vfmv_s_f_vl (vti.Vector vti.RegClass:$passthru), - (vti.Scalar (fpimm0)), - VLOpFrag)), - (PseudoVMV_S_X $passthru, (XLenVT X0), GPR:$vl, vti.Log2SEW)>; - def : Pat<(vti.Vector (riscv_vfmv_s_f_vl (vti.Vector vti.RegClass:$passthru), - (vti.Scalar (SelectScalarFPAsInt (XLenVT GPR:$imm))), - VLOpFrag)), - (PseudoVMV_S_X $passthru, GPR:$imm, GPR:$vl, vti.Log2SEW)>; - def : Pat<(vti.Vector (riscv_vfmv_s_f_vl (vti.Vector vti.RegClass:$passthru), - vti.ScalarRegClass:$rs1, - VLOpFrag)), - (!cast("PseudoVFMV_S_"#vti.ScalarSuffix#"_ALT") - vti.RegClass:$passthru, - (vti.Scalar vti.ScalarRegClass:$rs1), GPR:$vl, vti.Log2SEW)>; - } - - defvar vfmv_f_s_inst = !cast(!strconcat("PseudoVFMV_", - vti.ScalarSuffix, - "_S_ALT")); - // Only pattern-match extract-element operations where the index is 0. Any - // other index will have been custom-lowered to slide the vector correctly - // into place. - let Predicates = GetVTypePredicates.Predicates in - def : Pat<(vti.Scalar (extractelt (vti.Vector vti.RegClass:$rs2), 0)), - (vfmv_f_s_inst vti.RegClass:$rs2, vti.Log2SEW)>; -} - -let Predicates = [HasStdExtZvfbfa] in { - foreach fvtiToFWti = AllWidenableBF16ToFloatVectors in { - defvar fvti = fvtiToFWti.Vti; - defvar fwti = fvtiToFWti.Wti; - def : Pat<(fwti.Vector (any_riscv_fpextend_vl - (fvti.Vector fvti.RegClass:$rs1), - (fvti.Mask VMV0:$vm), - VLOpFrag)), - (!cast("PseudoVFWCVT_F_F_ALT_V_"#fvti.LMul.MX#"_E"#fvti.SEW#"_MASK") - (fwti.Vector (IMPLICIT_DEF)), fvti.RegClass:$rs1, - (fvti.Mask VMV0:$vm), - GPR:$vl, fvti.Log2SEW, TA_MA)>; - - def : Pat<(fvti.Vector (any_riscv_fpround_vl - (fwti.Vector fwti.RegClass:$rs1), - (fwti.Mask VMV0:$vm), VLOpFrag)), - (!cast("PseudoVFNCVT_F_F_ALT_W_"#fvti.LMul.MX#"_E"#fvti.SEW#"_MASK") - (fvti.Vector (IMPLICIT_DEF)), fwti.RegClass:$rs1, - (fwti.Mask VMV0:$vm), - // Value to indicate no rounding mode change in - // RISCVInsertReadWriteCSR - FRM_DYN, - GPR:$vl, fvti.Log2SEW, TA_MA)>; - def : Pat<(fvti.Vector (fpround (fwti.Vector fwti.RegClass:$rs1))), - (!cast("PseudoVFNCVT_F_F_ALT_W_"#fvti.LMul.MX#"_E"#fvti.SEW) - (fvti.Vector (IMPLICIT_DEF)), - fwti.RegClass:$rs1, - // Value to indicate no rounding mode change in - // RISCVInsertReadWriteCSR - FRM_DYN, - fvti.AVL, fvti.Log2SEW, TA_MA)>; - } - - foreach vti = AllBF16Vectors in { - // 13.12. Vector Floating-Point Sign-Injection Instructions - def : Pat<(fabs (vti.Vector vti.RegClass:$rs)), - (!cast("PseudoVFSGNJX_ALT_VV_"# vti.LMul.MX#"_E"#vti.SEW) - (vti.Vector (IMPLICIT_DEF)), - vti.RegClass:$rs, vti.RegClass:$rs, vti.AVL, vti.Log2SEW, TA_MA)>; - // Handle fneg with VFSGNJN using the same input for both operands. - def : Pat<(fneg (vti.Vector vti.RegClass:$rs)), - (!cast("PseudoVFSGNJN_ALT_VV_"# vti.LMul.MX#"_E"#vti.SEW) - (vti.Vector (IMPLICIT_DEF)), - vti.RegClass:$rs, vti.RegClass:$rs, vti.AVL, vti.Log2SEW, TA_MA)>; - - def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1), - (vti.Vector vti.RegClass:$rs2))), - (!cast("PseudoVFSGNJ_ALT_VV_"# vti.LMul.MX#"_E"#vti.SEW) - (vti.Vector (IMPLICIT_DEF)), - vti.RegClass:$rs1, vti.RegClass:$rs2, vti.AVL, vti.Log2SEW, TA_MA)>; - def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1), - (vti.Vector (SplatFPOp vti.ScalarRegClass:$rs2)))), - (!cast("PseudoVFSGNJ_ALT_V"#vti.ScalarSuffix#"_"#vti.LMul.MX#"_E"#vti.SEW) - (vti.Vector (IMPLICIT_DEF)), - vti.RegClass:$rs1, vti.ScalarRegClass:$rs2, vti.AVL, vti.Log2SEW, TA_MA)>; - - def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1), - (vti.Vector (fneg vti.RegClass:$rs2)))), - (!cast("PseudoVFSGNJN_ALT_VV_"# vti.LMul.MX#"_E"#vti.SEW) - (vti.Vector (IMPLICIT_DEF)), - vti.RegClass:$rs1, vti.RegClass:$rs2, vti.AVL, vti.Log2SEW, TA_MA)>; - def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1), - (vti.Vector (fneg (SplatFPOp vti.ScalarRegClass:$rs2))))), - (!cast("PseudoVFSGNJN_ALT_V"#vti.ScalarSuffix#"_"#vti.LMul.MX#"_E"#vti.SEW) - (vti.Vector (IMPLICIT_DEF)), - vti.RegClass:$rs1, vti.ScalarRegClass:$rs2, vti.AVL, vti.Log2SEW, TA_MA)>; - - // 13.12. Vector Floating-Point Sign-Injection Instructions - def : Pat<(riscv_fabs_vl (vti.Vector vti.RegClass:$rs), (vti.Mask VMV0:$vm), - VLOpFrag), - (!cast("PseudoVFSGNJX_ALT_VV_"# vti.LMul.MX #"_E"#vti.SEW#"_MASK") - (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs, - vti.RegClass:$rs, (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, - TA_MA)>; - // Handle fneg with VFSGNJN using the same input for both operands. - def : Pat<(riscv_fneg_vl (vti.Vector vti.RegClass:$rs), (vti.Mask VMV0:$vm), - VLOpFrag), - (!cast("PseudoVFSGNJN_ALT_VV_"# vti.LMul.MX#"_E"#vti.SEW #"_MASK") - (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs, - vti.RegClass:$rs, (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, - TA_MA)>; - - def : Pat<(riscv_fcopysign_vl (vti.Vector vti.RegClass:$rs1), - (vti.Vector vti.RegClass:$rs2), - vti.RegClass:$passthru, - (vti.Mask VMV0:$vm), - VLOpFrag), - (!cast("PseudoVFSGNJ_ALT_VV_"# vti.LMul.MX#"_E"#vti.SEW#"_MASK") - vti.RegClass:$passthru, vti.RegClass:$rs1, - vti.RegClass:$rs2, (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, - TAIL_AGNOSTIC)>; - - def : Pat<(riscv_fcopysign_vl (vti.Vector vti.RegClass:$rs1), - (riscv_fneg_vl vti.RegClass:$rs2, - (vti.Mask true_mask), - VLOpFrag), - srcvalue, - (vti.Mask true_mask), - VLOpFrag), - (!cast("PseudoVFSGNJN_ALT_VV_"# vti.LMul.MX#"_E"#vti.SEW) - (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, - vti.RegClass:$rs2, GPR:$vl, vti.Log2SEW, TA_MA)>; - - def : Pat<(riscv_fcopysign_vl (vti.Vector vti.RegClass:$rs1), - (SplatFPOp vti.ScalarRegClass:$rs2), - vti.RegClass:$passthru, - (vti.Mask VMV0:$vm), - VLOpFrag), - (!cast("PseudoVFSGNJ_ALT_V"#vti.ScalarSuffix#"_"# vti.LMul.MX#"_E"#vti.SEW#"_MASK") - vti.RegClass:$passthru, vti.RegClass:$rs1, - vti.ScalarRegClass:$rs2, (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, - TAIL_AGNOSTIC)>; - } - } - - defm : VPatBinaryFPSDNode_VV_VF_RM; - defm : VPatBinaryFPSDNode_VV_VF_RM; - defm : VPatBinaryFPSDNode_VV_VF_RM; - defm : VPatBinaryFPSDNode_R_VF_RM; - - defm : VPatBinaryFPVL_VV_VF_RM; - defm : VPatBinaryFPVL_VV_VF_RM; - defm : VPatBinaryFPVL_VV_VF_RM; - defm : VPatBinaryFPVL_R_VF_RM; } // Predicates = [HasStdExtZvfbfa] diff --git a/llvm/lib/Target/RISCV/RISCVLoadStoreOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVLoadStoreOptimizer.cpp index a22ab6bfc04b8..f1827dcf174f3 100644 --- a/llvm/lib/Target/RISCV/RISCVLoadStoreOptimizer.cpp +++ b/llvm/lib/Target/RISCV/RISCVLoadStoreOptimizer.cpp @@ -70,6 +70,12 @@ struct RISCVLoadStoreOpt : public MachineFunctionPass { // Convert load/store pairs to single instructions. bool tryConvertToLdStPair(MachineBasicBlock::iterator First, MachineBasicBlock::iterator Second); + bool tryConvertToXqcilsmLdStPair(MachineFunction *MF, + MachineBasicBlock::iterator First, + MachineBasicBlock::iterator Second); + bool tryConvertToMIPSLdStPair(MachineFunction *MF, + MachineBasicBlock::iterator First, + MachineBasicBlock::iterator Second); // Scan the instructions looking for a load/store that can be combined // with the current instruction into a load/store pair. @@ -114,7 +120,7 @@ bool RISCVLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { ModifiedRegUnits.init(*TRI); UsedRegUnits.init(*TRI); - if (Subtarget.useMIPSLoadStorePairs()) { + if (Subtarget.useMIPSLoadStorePairs() || Subtarget.hasVendorXqcilsm()) { for (MachineBasicBlock &MBB : Fn) { LLVM_DEBUG(dbgs() << "MBB: " << MBB.getName() << "\n"); @@ -168,14 +174,93 @@ bool RISCVLoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) { return false; } -// Merge two adjacent load/store instructions into a paired instruction -// (LDP/SDP/SWP/LWP) if the effective address is 8-byte aligned in case of -// SWP/LWP 16-byte aligned in case of LDP/SDP. This function selects the -// appropriate paired opcode, verifies that the memory operand is properly -// aligned, and checks that the offset is valid. If all conditions are met, it -// builds and inserts the paired instruction. -bool RISCVLoadStoreOpt::tryConvertToLdStPair( - MachineBasicBlock::iterator First, MachineBasicBlock::iterator Second) { +bool RISCVLoadStoreOpt::tryConvertToXqcilsmLdStPair( + MachineFunction *MF, MachineBasicBlock::iterator First, + MachineBasicBlock::iterator Second) { + unsigned Opc = First->getOpcode(); + if ((Opc != RISCV::LW && Opc != RISCV::SW) || Second->getOpcode() != Opc) + return false; + + const auto &FirstOp1 = First->getOperand(1); + const auto &SecondOp1 = Second->getOperand(1); + const auto &FirstOp2 = First->getOperand(2); + const auto &SecondOp2 = Second->getOperand(2); + + // Require simple reg+imm addressing for both. + if (!FirstOp1.isReg() || !SecondOp1.isReg() || !FirstOp2.isImm() || + !SecondOp2.isImm()) + return false; + + Register Base1 = FirstOp1.getReg(); + Register Base2 = SecondOp1.getReg(); + + if (Base1 != Base2) + return false; + + const MachineMemOperand *MMO = *First->memoperands_begin(); + Align MMOAlign = MMO->getAlign(); + + if (MMOAlign < Align(4)) + return false; + + auto &FirstOp0 = First->getOperand(0); + auto &SecondOp0 = Second->getOperand(0); + + int64_t Off1 = FirstOp2.getImm(); + int64_t Off2 = SecondOp2.getImm(); + + if (Off2 < Off1) { + std::swap(FirstOp0, SecondOp0); + std::swap(Off1, Off2); + } + + Register StartReg = FirstOp0.getReg(); + Register NextReg = SecondOp0.getReg(); + + if (StartReg == RISCV::X0 || NextReg == RISCV::X0) + return false; + + // If the base reg gets overwritten by one of the loads then bail out. + if (Opc == RISCV::LW && (StartReg == Base1 || NextReg == Base1)) + return false; + + if (!isShiftedUInt<5, 2>(Off1) || (Off2 - Off1 != 4)) + return false; + + if (NextReg != StartReg + 1) + return false; + + unsigned XqciOpc = (Opc == RISCV::LW) ? RISCV::QC_LWMI : RISCV::QC_SWMI; + + auto StartRegState = (Opc == RISCV::LW) ? RegState::Define + : getKillRegState(FirstOp0.isKill()); + auto NextRegState = + (Opc == RISCV::LW) + ? RegState::ImplicitDefine + : (RegState::Implicit | getKillRegState(SecondOp0.isKill())); + + DebugLoc DL = + First->getDebugLoc() ? First->getDebugLoc() : Second->getDebugLoc(); + MachineInstrBuilder MIB = BuildMI(*MF, DL, TII->get(XqciOpc)); + MIB.addReg(StartReg, StartRegState) + .addReg(Base1, getKillRegState(FirstOp1.isKill() || SecondOp1.isKill())) + .addImm(2) + .addImm(Off1) + .cloneMergedMemRefs({&*First, &*Second}) + .addReg(NextReg, NextRegState); + + First->getParent()->insert(First, MIB); + First->removeFromParent(); + Second->removeFromParent(); + + return true; +} + +bool RISCVLoadStoreOpt::tryConvertToMIPSLdStPair( + MachineFunction *MF, MachineBasicBlock::iterator First, + MachineBasicBlock::iterator Second) { + // Try converting to SWP/LWP/LDP/SDP. + // SWP/LWP requires 8-byte alignment whereas LDP/SDP needs 16-byte alignment. unsigned PairOpc; Align RequiredAlignment; switch (First->getOpcode()) { @@ -199,7 +284,6 @@ bool RISCVLoadStoreOpt::tryConvertToLdStPair( break; } - MachineFunction *MF = First->getMF(); const MachineMemOperand *MMO = *First->memoperands_begin(); Align MMOAlign = MMO->getAlign(); @@ -227,6 +311,24 @@ bool RISCVLoadStoreOpt::tryConvertToLdStPair( return true; } +// Merge two adjacent load/store instructions into a paired instruction. +// This function calls the vendor specific implementation that seelects the +// appropriate paired opcode, verifies that the memory operand is properly +// aligned, and checks that the offset is valid. If all conditions are met, it +// builds and inserts the paired instruction. +bool RISCVLoadStoreOpt::tryConvertToLdStPair( + MachineBasicBlock::iterator First, MachineBasicBlock::iterator Second) { + MachineFunction *MF = First->getMF(); + const RISCVSubtarget &STI = MF->getSubtarget(); + + // Try converting to QC_LWMI/QC_SWMI if the XQCILSM extension is enabled. + if (!STI.is64Bit() && STI.hasVendorXqcilsm()) + return tryConvertToXqcilsmLdStPair(MF, First, Second); + + // Else try to convert them into MIPS Paired Loads/Stores. + return tryConvertToMIPSLdStPair(MF, First, Second); +} + static bool mayAlias(MachineInstr &MIa, SmallVectorImpl &MemInsns, AliasAnalysis *AA) { diff --git a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp index e5ba0201c0cc1..b111909fc25cc 100644 --- a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp @@ -1154,10 +1154,63 @@ static unsigned getNumSizeComponents(SPIRVType *imgType) { return arrayed ? numComps + 1 : numComps; } +static bool builtinMayNeedPromotionToVec(uint32_t BuiltinNumber) { + switch (BuiltinNumber) { + case SPIRV::OpenCLExtInst::s_min: + case SPIRV::OpenCLExtInst::u_min: + case SPIRV::OpenCLExtInst::s_max: + case SPIRV::OpenCLExtInst::u_max: + case SPIRV::OpenCLExtInst::fmax: + case SPIRV::OpenCLExtInst::fmin: + case SPIRV::OpenCLExtInst::fmax_common: + case SPIRV::OpenCLExtInst::fmin_common: + case SPIRV::OpenCLExtInst::s_clamp: + case SPIRV::OpenCLExtInst::fclamp: + case SPIRV::OpenCLExtInst::u_clamp: + case SPIRV::OpenCLExtInst::mix: + case SPIRV::OpenCLExtInst::step: + case SPIRV::OpenCLExtInst::smoothstep: + return true; + default: + break; + } + return false; +} + //===----------------------------------------------------------------------===// // Implementation functions for each builtin group //===----------------------------------------------------------------------===// +static SmallVector +getBuiltinCallArguments(const SPIRV::IncomingCall *Call, uint32_t BuiltinNumber, + MachineIRBuilder &MIRBuilder, SPIRVGlobalRegistry *GR) { + + Register ReturnTypeId = GR->getSPIRVTypeID(Call->ReturnType); + unsigned ResultElementCount = + GR->getScalarOrVectorComponentCount(ReturnTypeId); + bool MayNeedPromotionToVec = + builtinMayNeedPromotionToVec(BuiltinNumber) && ResultElementCount > 1; + + if (!MayNeedPromotionToVec) + return {Call->Arguments.begin(), Call->Arguments.end()}; + + SmallVector Arguments; + for (Register Argument : Call->Arguments) { + Register VecArg = Argument; + SPIRVType *ArgumentType = GR->getSPIRVTypeForVReg(Argument); + if (ArgumentType != Call->ReturnType) { + VecArg = createVirtualRegister(Call->ReturnType, GR, MIRBuilder); + auto VecSplat = MIRBuilder.buildInstr(SPIRV::OpCompositeConstruct) + .addDef(VecArg) + .addUse(ReturnTypeId); + for (unsigned I = 0; I != ResultElementCount; ++I) + VecSplat.addUse(Argument); + } + Arguments.push_back(VecArg); + } + return Arguments; +} + static bool generateExtInst(const SPIRV::IncomingCall *Call, MachineIRBuilder &MIRBuilder, SPIRVGlobalRegistry *GR, const CallBase &CB) { @@ -1179,16 +1232,21 @@ static bool generateExtInst(const SPIRV::IncomingCall *Call, : SPIRV::OpenCLExtInst::fmax; } + Register ReturnTypeId = GR->getSPIRVTypeID(Call->ReturnType); + SmallVector Arguments = + getBuiltinCallArguments(Call, Number, MIRBuilder, GR); + // Build extended instruction. auto MIB = MIRBuilder.buildInstr(SPIRV::OpExtInst) .addDef(Call->ReturnRegister) - .addUse(GR->getSPIRVTypeID(Call->ReturnType)) + .addUse(ReturnTypeId) .addImm(static_cast(SPIRV::InstructionSet::OpenCL_std)) .addImm(Number); - for (auto Argument : Call->Arguments) + for (Register Argument : Arguments) MIB.addUse(Argument); + MIB.getInstr()->copyIRFlags(CB); if (OrigNumber == SPIRV::OpenCLExtInst::fmin_common || OrigNumber == SPIRV::OpenCLExtInst::fmax_common) { diff --git a/llvm/lib/Target/SPIRV/SPIRVRegularizer.cpp b/llvm/lib/Target/SPIRV/SPIRVRegularizer.cpp index 1b95f09974c61..653c9ad53e888 100644 --- a/llvm/lib/Target/SPIRV/SPIRVRegularizer.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVRegularizer.cpp @@ -12,11 +12,10 @@ //===----------------------------------------------------------------------===// #include "SPIRV.h" -#include "llvm/Demangle/Demangle.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/InstIterator.h" -#include "llvm/IR/InstVisitor.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/PassManager.h" -#include "llvm/Transforms/Utils/Cloning.h" #include @@ -25,9 +24,7 @@ using namespace llvm; namespace { -struct SPIRVRegularizer : public FunctionPass, InstVisitor { - DenseMap Old2NewFuncs; - +struct SPIRVRegularizer : public FunctionPass { public: static char ID; SPIRVRegularizer() : FunctionPass(ID) {} @@ -37,11 +34,8 @@ struct SPIRVRegularizer : public FunctionPass, InstVisitor { void getAnalysisUsage(AnalysisUsage &AU) const override { FunctionPass::getAnalysisUsage(AU); } - void visitCallInst(CallInst &CI); private: - void visitCallScalToVec(CallInst *CI, StringRef MangledName, - StringRef DemangledName); void runLowerConstExpr(Function &F); }; } // namespace @@ -157,98 +151,8 @@ void SPIRVRegularizer::runLowerConstExpr(Function &F) { } } -// It fixes calls to OCL builtins that accept vector arguments and one of them -// is actually a scalar splat. -void SPIRVRegularizer::visitCallInst(CallInst &CI) { - auto F = CI.getCalledFunction(); - if (!F) - return; - - auto MangledName = F->getName(); - char *NameStr = itaniumDemangle(F->getName().data()); - if (!NameStr) - return; - StringRef DemangledName(NameStr); - - // TODO: add support for other builtins. - if (DemangledName.starts_with("fmin") || DemangledName.starts_with("fmax") || - DemangledName.starts_with("min") || DemangledName.starts_with("max")) - visitCallScalToVec(&CI, MangledName, DemangledName); - free(NameStr); -} - -void SPIRVRegularizer::visitCallScalToVec(CallInst *CI, StringRef MangledName, - StringRef DemangledName) { - // Check if all arguments have the same type - it's simple case. - auto Uniform = true; - Type *Arg0Ty = CI->getOperand(0)->getType(); - auto IsArg0Vector = isa(Arg0Ty); - for (unsigned I = 1, E = CI->arg_size(); Uniform && (I != E); ++I) - Uniform = isa(CI->getOperand(I)->getType()) == IsArg0Vector; - if (Uniform) - return; - - auto *OldF = CI->getCalledFunction(); - Function *NewF = nullptr; - auto [It, Inserted] = Old2NewFuncs.try_emplace(OldF); - if (Inserted) { - AttributeList Attrs = CI->getCalledFunction()->getAttributes(); - SmallVector ArgTypes = {OldF->getArg(0)->getType(), Arg0Ty}; - auto *NewFTy = - FunctionType::get(OldF->getReturnType(), ArgTypes, OldF->isVarArg()); - NewF = Function::Create(NewFTy, OldF->getLinkage(), OldF->getName(), - *OldF->getParent()); - ValueToValueMapTy VMap; - auto NewFArgIt = NewF->arg_begin(); - for (auto &Arg : OldF->args()) { - auto ArgName = Arg.getName(); - NewFArgIt->setName(ArgName); - VMap[&Arg] = &(*NewFArgIt++); - } - SmallVector Returns; - CloneFunctionInto(NewF, OldF, VMap, - CloneFunctionChangeType::LocalChangesOnly, Returns); - NewF->setAttributes(Attrs); - It->second = NewF; - } else { - NewF = It->second; - } - assert(NewF); - - // This produces an instruction sequence that implements a splat of - // CI->getOperand(1) to a vector Arg0Ty. However, we use InsertElementInst - // and ShuffleVectorInst to generate the same code as the SPIR-V translator. - // For instance (transcoding/OpMin.ll), this call - // call spir_func <2 x i32> @_Z3minDv2_ii(<2 x i32> , i32 5) - // is translated to - // %8 = OpUndef %v2uint - // %14 = OpConstantComposite %v2uint %uint_1 %uint_10 - // ... - // %10 = OpCompositeInsert %v2uint %uint_5 %8 0 - // %11 = OpVectorShuffle %v2uint %10 %8 0 0 - // %call = OpExtInst %v2uint %1 s_min %14 %11 - auto ConstInt = ConstantInt::get(IntegerType::get(CI->getContext(), 32), 0); - PoisonValue *PVal = PoisonValue::get(Arg0Ty); - Instruction *Inst = InsertElementInst::Create( - PVal, CI->getOperand(1), ConstInt, "", CI->getIterator()); - ElementCount VecElemCount = cast(Arg0Ty)->getElementCount(); - Constant *ConstVec = ConstantVector::getSplat(VecElemCount, ConstInt); - Value *NewVec = - new ShuffleVectorInst(Inst, PVal, ConstVec, "", CI->getIterator()); - CI->setOperand(1, NewVec); - CI->replaceUsesOfWith(OldF, NewF); - CI->mutateFunctionType(NewF->getFunctionType()); -} - bool SPIRVRegularizer::runOnFunction(Function &F) { runLowerConstExpr(F); - visit(F); - for (auto &OldNew : Old2NewFuncs) { - Function *OldF = OldNew.first; - Function *NewF = OldNew.second; - NewF->takeName(OldF); - OldF->eraseFromParent(); - } return true; } diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index e7903a72d85bb..9791c1999086b 100644 --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -1875,8 +1875,8 @@ bool X86DAGToDAGISel::matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM, // For more information see http://people.redhat.com/drepper/tls.pdf if (isNullConstant(Address) && AM.Segment.getNode() == nullptr && !IndirectTlsSegRefs && - (Subtarget->isTargetGlibc() || Subtarget->isTargetAndroid() || - Subtarget->isTargetFuchsia())) { + (Subtarget->isTargetGlibc() || Subtarget->isTargetMusl() || + Subtarget->isTargetAndroid() || Subtarget->isTargetFuchsia())) { if (Subtarget->isTarget64BitILP32() && !AllowSegmentRegForX32) return true; switch (N->getPointerInfo().getAddrSpace()) { diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 3b3b20edbbe84..ec746843f8ea8 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -33171,7 +33171,14 @@ static SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG, // For illegal i64 atomic_stores, we can try to use MOVQ or MOVLPS if SSE // is enabled. if (VT == MVT::i64) { - if (Subtarget.hasSSE1()) { + SDValue BCValue = peekThroughBitcasts(Node->getVal()); + if (BCValue.getValueType() == MVT::f64 && + (Subtarget.hasX87() || Subtarget.hasSSE2())) { + // If the i64 was bitcast from a f64 then we can do the f64 atomic store + // directly with FSTPL/MOVSD. + Chain = DAG.getStore(Node->getChain(), dl, BCValue, Node->getBasePtr(), + Node->getMemOperand()); + } else if (Subtarget.hasSSE1()) { SDValue SclToVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Node->getVal()); MVT StVT = Subtarget.hasSSE2() ? MVT::v2i64 : MVT::v4f32; diff --git a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp index 8db3e501f9b7e..ae9d0a162011f 100644 --- a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp +++ b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp @@ -546,8 +546,8 @@ unsigned X86TargetLowering::getAddressSpace() const { } static bool hasStackGuardSlotTLS(const Triple &TargetTriple) { - return TargetTriple.isOSGlibc() || TargetTriple.isOSFuchsia() || - TargetTriple.isAndroid(); + return TargetTriple.isOSGlibc() || TargetTriple.isMusl() || + TargetTriple.isOSFuchsia() || TargetTriple.isAndroid(); } static Constant* SegmentOffset(IRBuilderBase &IRB, diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h index 868f41375b96b..3b920bc4ef7c1 100644 --- a/llvm/lib/Target/X86/X86Subtarget.h +++ b/llvm/lib/Target/X86/X86Subtarget.h @@ -293,6 +293,7 @@ class X86Subtarget final : public X86GenSubtargetInfo { bool isTargetKFreeBSD() const { return TargetTriple.isOSKFreeBSD(); } bool isTargetHurd() const { return TargetTriple.isOSHurd(); } bool isTargetGlibc() const { return TargetTriple.isOSGlibc(); } + bool isTargetMusl() const { return TargetTriple.isMusl(); } bool isTargetAndroid() const { return TargetTriple.isAndroid(); } bool isTargetMCU() const { return TargetTriple.isOSIAMCU(); } bool isTargetFuchsia() const { return TargetTriple.isOSFuchsia(); } diff --git a/llvm/lib/TargetParser/TargetDataLayout.cpp b/llvm/lib/TargetParser/TargetDataLayout.cpp index cbcbb5e40fdfa..981c5561211db 100644 --- a/llvm/lib/TargetParser/TargetDataLayout.cpp +++ b/llvm/lib/TargetParser/TargetDataLayout.cpp @@ -247,6 +247,10 @@ static std::string computePowerDataLayout(const Triple &T, StringRef ABIName) { else Ret += "-n32"; + // The ABI alignment for doubles on AIX is 4 bytes. + if (T.isOSAIX()) + Ret += "-f64:32:64"; + // Specify the vector alignment explicitly. For v256i1 and v512i1, the // calculated alignment would be 256*alignment(i1) and 512*alignment(i1), // which is 256 and 512 bytes - way over aligned. diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index ba5568b00441b..9cf382f8020fa 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -186,9 +186,9 @@ static unsigned conjugateICmpMask(unsigned Mask) { return NewMask; } -// Adapts the external decomposeBitTestICmp for local use. -static bool decomposeBitTestICmp(Value *Cond, CmpInst::Predicate &Pred, - Value *&X, Value *&Y, Value *&Z) { +// Adapts the external decomposeBitTest for local use. +static bool decomposeBitTest(Value *Cond, CmpInst::Predicate &Pred, Value *&X, + Value *&Y, Value *&Z) { auto Res = llvm::decomposeBitTest(Cond, /*LookThroughTrunc=*/true, /*AllowNonZeroC=*/true); if (!Res) @@ -220,7 +220,7 @@ getMaskedTypeForICmpPair(Value *&A, Value *&B, Value *&C, Value *&D, Value *&E, // Check whether the icmp can be decomposed into a bit test. Value *L1, *L11, *L12, *L2, *L21, *L22; - if (decomposeBitTestICmp(LHS, PredL, L11, L12, L2)) { + if (decomposeBitTest(LHS, PredL, L11, L12, L2)) { L21 = L22 = L1 = nullptr; } else { auto *LHSCMP = dyn_cast(LHS); @@ -253,7 +253,7 @@ getMaskedTypeForICmpPair(Value *&A, Value *&B, Value *&C, Value *&D, Value *&E, return std::nullopt; Value *R11, *R12, *R2; - if (decomposeBitTestICmp(RHS, PredR, R11, R12, R2)) { + if (decomposeBitTest(RHS, PredR, R11, R12, R2)) { if (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22) { A = R11; D = R12; @@ -3890,7 +3890,7 @@ static std::optional matchBitmaskMul(Value *V) { // Decompose ((A & N) ? 0 : N * C) into BitMaskMul if (match(Op, m_Select(m_Value(Cond), m_APInt(EqZero), m_APInt(NeZero)))) { auto ICmpDecompose = - decomposeBitTest(Cond, /*LookThruTrunc=*/true, + decomposeBitTest(Cond, /*LookThroughTrunc=*/true, /*AllowNonZeroC=*/false, /*DecomposeBitMask=*/true); if (!ICmpDecompose.has_value()) return std::nullopt; diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index abf4381ebd794..1859dad4ec00b 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -6290,7 +6290,7 @@ Instruction *InstCombinerImpl::foldICmpWithTrunc(ICmpInst &ICmp) { // This matches patterns corresponding to tests of the signbit as well as: // (trunc X) pred C2 --> (X & Mask) == C - if (auto Res = decomposeBitTestICmp(Op0, Op1, Pred, /*WithTrunc=*/true, + if (auto Res = decomposeBitTestICmp(Op0, Op1, Pred, /*LookThroughTrunc=*/true, /*AllowNonZeroC=*/true)) { Value *And = Builder.CreateAnd(Res->X, Res->Mask); Constant *C = ConstantInt::get(Res->X->getType(), Res->C); diff --git a/llvm/runtimes/CMakeLists.txt b/llvm/runtimes/CMakeLists.txt index 4521352e8bcb0..02bf3ef1f765e 100644 --- a/llvm/runtimes/CMakeLists.txt +++ b/llvm/runtimes/CMakeLists.txt @@ -265,7 +265,7 @@ function(runtime_default_target) list(APPEND test_targets runtimes-test-depends check-runtimes check-builtins) # The default runtimes target can run tests the default builtins target - list(APPEND ARG_CMAKE_ARGS "-DCOMPILER_RT_FORCE_TEST_BUILTINS_DIR=${LLVM_BINARY_DIR}/runtimes/builtins-bins/") + list(APPEND ARG_CMAKE_ARGS "-DCOMPILER_RT_TEST_BUILTINS_DIR=${LLVM_BINARY_DIR}/runtimes/builtins-bins/") endif() set_enable_per_target_runtime_dir() @@ -376,7 +376,7 @@ function(runtime_register_target name) # If a builtins-${name} target exists, we'll test those builtins # with this runtimes build if(TARGET builtins-${name}) - list(APPEND ARG_CMAKE_ARGS "-DCOMPILER_RT_FORCE_TEST_BUILTINS_DIR=${LLVM_BINARY_DIR}/runtimes/builtins-${name}-bins/") + list(APPEND ARG_CMAKE_ARGS "-DCOMPILER_RT_TEST_BUILTINS_DIR=${LLVM_BINARY_DIR}/runtimes/builtins-${name}-bins/") set(check-builtins-${name} check-builtins) list(APPEND ${name}_test_targets check-builtins-${name}) list(APPEND test_targets check-builtins-${name}) diff --git a/llvm/test/Assembler/ptrtoaddr-invalid-constexpr.ll b/llvm/test/Assembler/ptrtoaddr-invalid-constexpr.ll index 665deff4cd04b..2857f77ff695b 100644 --- a/llvm/test/Assembler/ptrtoaddr-invalid-constexpr.ll +++ b/llvm/test/Assembler/ptrtoaddr-invalid-constexpr.ll @@ -51,6 +51,20 @@ @g = global i32 ptrtoaddr (ptr @g to i32) ; DST_NOT_ADDR_SIZE-NEXT: PtrToAddr result must be address width ; DST_NOT_ADDR_SIZE-NEXT: i32 ptrtoaddr (ptr @g to i32) -@g_vec = global <4 x i32> ptrtoaddr (<4 x ptr> to <4 x i32>) -; TODO: Verifier.cpp does not visit ConstantVector/ConstantStruct values -; TODO-DST_NOT_ADDR_SIZE: PtrToAddr result must be address width +@g_vec = global <4 x i32> ptrtoaddr (<4 x ptr> to <4 x i32>) +; DST_NOT_ADDR_SIZE-NEXT: PtrToAddr result must be address width +; DST_NOT_ADDR_SIZE-NEXT: i32 ptrtoaddr (ptr @g_vec to i32) + +;--- dst_not_addr_size_in_inst.ll +; RUN: not llvm-as %t/dst_not_addr_size_in_inst.ll -o /dev/null 2>&1 | FileCheck -check-prefix=DST_NOT_ADDR_SIZE_IN_INST %s --implicit-check-not="error:" +; DST_NOT_ADDR_SIZE_IN_INST: PtrToAddr result must be address width +; DST_NOT_ADDR_SIZE_IN_INST-NEXT: i32 ptrtoaddr (ptr @fn to i32) +define i32 @fn() { + ret i32 ptrtoaddr (ptr @fn to i32) +} + +; DST_NOT_ADDR_SIZE_IN_INST: PtrToAddr result must be address width +; DST_NOT_ADDR_SIZE_IN_INST-NEXT: i32 ptrtoaddr (ptr @fn2 to i32) +define <2 x i32> @fn2() { + ret <2 x i32> +} diff --git a/llvm/test/CodeGen/AArch64/tbi.ll b/llvm/test/CodeGen/AArch64/tbi.ll index 285726a485b87..a7c9b4cddf808 100644 --- a/llvm/test/CodeGen/AArch64/tbi.ll +++ b/llvm/test/CodeGen/AArch64/tbi.ll @@ -7,7 +7,7 @@ ; TBI-NOT: and x ; NO_TBI: and x define i32 @ld_and32(i64 %p) { - %and = and i64 %p, 72057594037927935 + %and = and i64 %p, 1152921504606846975 %cast = inttoptr i64 %and to ptr %load = load i32, ptr %cast ret i32 %load @@ -18,7 +18,7 @@ define i32 @ld_and32(i64 %p) { ; TBI-NOT: and x ; NO_TBI: and x define i32 @ld_and_plus_offset(i64 %p) { - %and = and i64 %p, 72057594037927935 + %and = and i64 %p, 1152921504606846975 %cast = inttoptr i64 %and to ptr %gep = getelementptr i32, ptr %cast, i64 4 %load = load i32, ptr %gep @@ -40,7 +40,7 @@ define i32 @ld_and32_wider(i64 %p) { ; TBI-NOT: and x ; NO_TBI: and x define i64 @ld_and64(i64 %p) { - %and = and i64 %p, 72057594037927935 + %and = and i64 %p, 1152921504606846975 %cast = inttoptr i64 %and to ptr %load = load i64, ptr %cast ret i64 %load @@ -50,7 +50,7 @@ define i64 @ld_and64(i64 %p) { ; TBI-NOT: and x ; NO_TBI: and x define void @st_and32(i64 %p, i32 %v) { - %and = and i64 %p, 72057594037927935 + %and = and i64 %p, 1152921504606846975 %cast = inttoptr i64 %and to ptr store i32 %v, ptr %cast ret void @@ -62,7 +62,7 @@ define void @st_and32(i64 %p, i32 %v) { ; NO_TBI: and x define i32 @ld_ro(i64 %a, i64 %b) { %p = add i64 %a, %b - %and = and i64 %p, 72057594037927935 + %and = and i64 %p, 1152921504606846975 %cast = inttoptr i64 %and to ptr %load = load i32, ptr %cast ret i32 %load @@ -73,7 +73,7 @@ define i32 @ld_ro(i64 %a, i64 %b) { ; TBI-NOT: and x ; NO_TBI: and x define i32 @ld_ro2(i64 %a, i64 %b) { - %and = and i64 %a, 72057594037927935 + %and = and i64 %a, 1152921504606846975 %p = add i64 %and, %b %cast = inttoptr i64 %p to ptr %load = load i32, ptr %cast @@ -85,7 +85,7 @@ define i32 @ld_ro2(i64 %a, i64 %b) { ; TBI-NOT: and x ; NO_TBI: and x define i32 @ld_indirect_and(i64 %r1, i64 %r2) { - %and = and i64 %r1, 72057594037927935 + %and = and i64 %r1, 1152921504606846975 %p = or i64 %and, %r2 %cast = inttoptr i64 %p to ptr %load = load i32, ptr %cast diff --git a/llvm/test/CodeGen/LoongArch/trap.ll b/llvm/test/CodeGen/LoongArch/trap.ll index 15a7ad82bd7a8..d433266b47e47 100644 --- a/llvm/test/CodeGen/LoongArch/trap.ll +++ b/llvm/test/CodeGen/LoongArch/trap.ll @@ -10,7 +10,7 @@ declare void @llvm.debugtrap() define void @test_trap() nounwind { ; CHECK-LABEL: test_trap: ; CHECK: # %bb.0: -; CHECK-NEXT: amswap.w $zero, $ra, $zero +; CHECK-NEXT: ud 0 ; CHECK-NEXT: ret tail call void @llvm.trap() ret void diff --git a/llvm/test/CodeGen/PowerPC/aix-cc-abi-mir.ll b/llvm/test/CodeGen/PowerPC/aix-cc-abi-mir.ll index 258ddf60088c1..02994811dc8af 100644 --- a/llvm/test/CodeGen/PowerPC/aix-cc-abi-mir.ll +++ b/llvm/test/CodeGen/PowerPC/aix-cc-abi-mir.ll @@ -636,7 +636,7 @@ define i32 @test_mix(float %f, i32 signext %i, double %d, i8 signext %c) { ; 32BIT-NEXT: renamable $f0 = nofpexcept FADDS killed renamable $f0, killed renamable $f1, implicit $rm ; 32BIT-NEXT: renamable $f0 = nofpexcept FCTIWZ killed renamable $f0, implicit $rm ; 32BIT-NEXT: STFD killed renamable $f0, 0, %stack.0 :: (store (s64) into %stack.0) - ; 32BIT-NEXT: renamable $r3 = LWZ 4, %stack.0 :: (load (s32) from %stack.0 + 4, basealign 8) + ; 32BIT-NEXT: renamable $r3 = LWZ 4, %stack.0 :: (load (s32) from %stack.0 + 4) ; 32BIT-NEXT: BLR implicit $lr, implicit $rm, implicit $r3 ; ; 64BIT-LABEL: name: test_mix @@ -655,7 +655,7 @@ define i32 @test_mix(float %f, i32 signext %i, double %d, i8 signext %c) { ; 64BIT-NEXT: renamable $f0 = nofpexcept FADDS killed renamable $f0, killed renamable $f1, implicit $rm ; 64BIT-NEXT: renamable $f0 = nofpexcept FCTIWZ killed renamable $f0, implicit $rm ; 64BIT-NEXT: STFD killed renamable $f0, 0, %stack.0 :: (store (s64) into %stack.0) - ; 64BIT-NEXT: renamable $x3 = LWZ8 4, %stack.0 :: (load (s32) from %stack.0 + 4, basealign 8) + ; 64BIT-NEXT: renamable $x3 = LWZ8 4, %stack.0 :: (load (s32) from %stack.0 + 4) ; 64BIT-NEXT: BLR8 implicit $lr8, implicit $rm, implicit $x3 entry: %conv = fpext float %f to double @@ -956,11 +956,7 @@ define void @call_test_stackarg_float() { ; 32BIT-NEXT: renamable $f1 = LFS 0, killed renamable $r3 :: (dereferenceable load (s32) from @f) ; 32BIT-NEXT: renamable $f2 = LFD 0, killed renamable $r4 :: (dereferenceable load (s64) from @d) ; 32BIT-NEXT: ADJCALLSTACKDOWN 68, 0, implicit-def dead $r1, implicit $r1 - ; 32BIT-NEXT: STFD renamable $f2, 0, %stack.0 :: (store (s64) into %stack.0) - ; 32BIT-NEXT: STFS renamable $f1, 56, $r1 :: (store (s32) into stack + 56, align 8, basealign 16) - ; 32BIT-NEXT: renamable $r3 = LWZ 4, %stack.0 :: (load (s32) from %stack.0 + 4) - ; 32BIT-NEXT: STW killed renamable $r3, 64, $r1 :: (store (s32) into stack + 64, align 16) - ; 32BIT-NEXT: renamable $r11 = LWZ 0, %stack.0 :: (load (s32) from %stack.0, align 8) + ; 32BIT-NEXT: STFD renamable $f2, 60, $r1 :: (store (s64) into stack + 60, align 4, basealign 16) ; 32BIT-NEXT: $r3 = LI 1 ; 32BIT-NEXT: $r4 = LI 2 ; 32BIT-NEXT: $r5 = LI 3 @@ -969,8 +965,8 @@ define void @call_test_stackarg_float() { ; 32BIT-NEXT: $r8 = LI 6 ; 32BIT-NEXT: $r9 = LI 7 ; 32BIT-NEXT: $r10 = LI 8 - ; 32BIT-NEXT: STW killed renamable $r11, 60, $r1 :: (store (s32) into stack + 60, basealign 16) - ; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r4, implicit $r5, implicit $r6, implicit $r7, implicit $r8, implicit $r9, implicit $r10, implicit $f1, implicit $f2, implicit $r2, implicit-def $r1 + ; 32BIT-NEXT: STFS renamable $f1, 56, $r1 :: (store (s32) into stack + 56, align 8, basealign 16) + ; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r4, implicit killed $r5, implicit killed $r6, implicit killed $r7, implicit killed $r8, implicit killed $r9, implicit killed $r10, implicit $f1, implicit $f2, implicit $r2, implicit-def $r1 ; 32BIT-NEXT: ADJCALLSTACKUP 68, 0, implicit-def dead $r1, implicit $r1 ; 32BIT-NEXT: BLR implicit $lr, implicit $rm ; @@ -1057,11 +1053,7 @@ define void @call_test_stackarg_float3() { ; 32BIT-NEXT: renamable $r10 = LWZ 0, %stack.0 :: (load (s32) from %stack.0, align 8) ; 32BIT-NEXT: renamable $f2 = LFS 0, killed renamable $r3 :: (dereferenceable load (s32) from @f) ; 32BIT-NEXT: ADJCALLSTACKDOWN 64, 0, implicit-def dead $r1, implicit $r1 - ; 32BIT-NEXT: STFD renamable $f1, 0, %stack.1 :: (store (s64) into %stack.1) ; 32BIT-NEXT: STFS renamable $f2, 60, $r1 :: (store (s32) into stack + 60, basealign 16) - ; 32BIT-NEXT: renamable $r3 = LWZ 4, %stack.1 :: (load (s32) from %stack.1 + 4) - ; 32BIT-NEXT: STW killed renamable $r3, 56, $r1 :: (store (s32) into stack + 56, align 8, basealign 16) - ; 32BIT-NEXT: renamable $r11 = LWZ 0, %stack.1 :: (load (s32) from %stack.1, align 8) ; 32BIT-NEXT: $r3 = LI 1 ; 32BIT-NEXT: $r4 = LI 2 ; 32BIT-NEXT: $r5 = LI 3 @@ -1069,8 +1061,8 @@ define void @call_test_stackarg_float3() { ; 32BIT-NEXT: $r7 = LI 5 ; 32BIT-NEXT: $r8 = LI 6 ; 32BIT-NEXT: $r9 = LI 7 - ; 32BIT-NEXT: STW killed renamable $r11, 52, $r1 :: (store (s32) into stack + 52, basealign 16) - ; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r4, implicit $r5, implicit $r6, implicit $r7, implicit $r8, implicit $r9, implicit $f1, implicit $r10, implicit $f2, implicit $r2, implicit-def $r1 + ; 32BIT-NEXT: STFD renamable $f1, 52, $r1 :: (store (s64) into stack + 52, align 4, basealign 16) + ; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit killed $r4, implicit killed $r5, implicit killed $r6, implicit killed $r7, implicit killed $r8, implicit killed $r9, implicit $f1, implicit $r10, implicit $f2, implicit $r2, implicit-def $r1 ; 32BIT-NEXT: ADJCALLSTACKUP 64, 0, implicit-def dead $r1, implicit $r1 ; 32BIT-NEXT: BLR implicit $lr, implicit $rm ; @@ -1372,7 +1364,7 @@ define double @test_fpr_stack(double %d1, double %d2, double %d3, double %d4, do ; 32BIT: bb.0.entry: ; 32BIT-NEXT: liveins: $f1, $f2, $f3, $f4, $f5, $f6, $f7, $f8, $f9, $f10, $f11, $f12, $f13 ; 32BIT-NEXT: {{ $}} - ; 32BIT-NEXT: renamable $f0 = LFD 0, %fixed-stack.1 :: (load (s64) from %fixed-stack.1) + ; 32BIT-NEXT: renamable $f0 = LFD 0, %fixed-stack.1 :: (load (s64) from %fixed-stack.1, align 4) ; 32BIT-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f1, killed renamable $f2, implicit $rm ; 32BIT-NEXT: renamable $f2 = LFS 0, %fixed-stack.2 :: (load (s32) from %fixed-stack.2, align 16) ; 32BIT-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f1, killed renamable $f3, implicit $rm @@ -1449,92 +1441,88 @@ define void @caller_fpr_stack() { ; 32BIT-NEXT: renamable $r3 = LWZtoc @d15, $r2 :: (load (s32) from got) ; 32BIT-NEXT: renamable $r4 = LWZtoc @f14, $r2 :: (load (s32) from got) ; 32BIT-NEXT: renamable $f0 = LFD 0, killed renamable $r3 :: (dereferenceable load (s64) from @d15) - ; 32BIT-NEXT: renamable $r3 = LWZtoc @f16, $r2 :: (load (s32) from got) - ; 32BIT-NEXT: renamable $r4 = LWZ 0, killed renamable $r4 :: (dereferenceable load (s32) from @f14) - ; 32BIT-NEXT: renamable $r3 = LWZ 0, killed renamable $r3 :: (dereferenceable load (s32) from @f16) + ; 32BIT-NEXT: renamable $r5 = LWZtoc @f16, $r2 :: (load (s32) from got) + ; 32BIT-NEXT: renamable $r3 = LWZ 0, killed renamable $r4 :: (dereferenceable load (s32) from @f14) + ; 32BIT-NEXT: renamable $r4 = LWZ 0, killed renamable $r5 :: (dereferenceable load (s32) from @f16) ; 32BIT-NEXT: ADJCALLSTACKDOWN 144, 0, implicit-def dead $r1, implicit $r1 - ; 32BIT-NEXT: STFD killed renamable $f0, 0, %stack.0 :: (store (s64) into %stack.0) ; 32BIT-NEXT: renamable $r5 = LI 0 ; 32BIT-NEXT: renamable $r6 = LIS 16352 - ; 32BIT-NEXT: STW killed renamable $r3, 140, $r1 :: (store (s32) into stack + 140, basealign 16) - ; 32BIT-NEXT: renamable $r3 = LIS 13107 - ; 32BIT-NEXT: STW killed renamable $r4, 128, $r1 :: (store (s32) into stack + 128, align 16) - ; 32BIT-NEXT: renamable $r4 = LIS 16355 ; 32BIT-NEXT: STW killed renamable $r5, 60, $r1 :: (store (s32) into stack + 60, basealign 16) - ; 32BIT-NEXT: renamable $r5 = LIS 26214 + ; 32BIT-NEXT: renamable $r5 = LIS 13107 ; 32BIT-NEXT: STW killed renamable $r6, 56, $r1 :: (store (s32) into stack + 56, align 8, basealign 16) + ; 32BIT-NEXT: renamable $r6 = LIS 16355 + ; 32BIT-NEXT: renamable $r5 = ORI killed renamable $r5, 13107 + ; 32BIT-NEXT: STW killed renamable $r5, 68, $r1 :: (store (s32) into stack + 68, basealign 16) + ; 32BIT-NEXT: renamable $r5 = LIS 26214 + ; 32BIT-NEXT: renamable $r6 = ORI killed renamable $r6, 13107 + ; 32BIT-NEXT: STW killed renamable $r6, 64, $r1 :: (store (s32) into stack + 64, align 16) ; 32BIT-NEXT: renamable $r6 = LIS 16358 - ; 32BIT-NEXT: renamable $r3 = ORI killed renamable $r3, 13107 - ; 32BIT-NEXT: STW killed renamable $r3, 68, $r1 :: (store (s32) into stack + 68, basealign 16) - ; 32BIT-NEXT: renamable $r3 = LIS 39321 - ; 32BIT-NEXT: renamable $r4 = ORI killed renamable $r4, 13107 - ; 32BIT-NEXT: STW killed renamable $r4, 64, $r1 :: (store (s32) into stack + 64, align 16) - ; 32BIT-NEXT: renamable $r4 = LIS 16361 ; 32BIT-NEXT: renamable $r5 = ORI killed renamable $r5, 26214 ; 32BIT-NEXT: STW killed renamable $r5, 76, $r1 :: (store (s32) into stack + 76, basealign 16) - ; 32BIT-NEXT: renamable $r5 = LIS 52428 + ; 32BIT-NEXT: renamable $r5 = LIS 39321 ; 32BIT-NEXT: renamable $r6 = ORI killed renamable $r6, 26214 ; 32BIT-NEXT: STW killed renamable $r6, 72, $r1 :: (store (s32) into stack + 72, align 8, basealign 16) + ; 32BIT-NEXT: renamable $r6 = LIS 16361 + ; 32BIT-NEXT: renamable $r6 = ORI killed renamable $r6, 39321 + ; 32BIT-NEXT: STW killed renamable $r6, 80, $r1 :: (store (s32) into stack + 80, align 16) + ; 32BIT-NEXT: renamable $r6 = LIS 52428 + ; 32BIT-NEXT: renamable $r6 = ORI killed renamable $r6, 52429 + ; 32BIT-NEXT: STW killed renamable $r6, 92, $r1 :: (store (s32) into stack + 92, basealign 16) ; 32BIT-NEXT: renamable $r6 = LIS 16364 - ; 32BIT-NEXT: renamable $r4 = ORI killed renamable $r4, 39321 - ; 32BIT-NEXT: STW killed renamable $r4, 80, $r1 :: (store (s32) into stack + 80, align 16) - ; 32BIT-NEXT: renamable $r4 = LIS 16313 - ; 32BIT-NEXT: renamable $r5 = ORI killed renamable $r5, 52429 - ; 32BIT-NEXT: STW killed renamable $r5, 92, $r1 :: (store (s32) into stack + 92, basealign 16) - ; 32BIT-NEXT: renamable $r5 = LIS 49807 - ; 32BIT-NEXT: renamable $r3 = ORI killed renamable $r3, 39322 - ; 32BIT-NEXT: STW renamable $r3, 84, $r1 :: (store (s32) into stack + 84, basealign 16) + ; 32BIT-NEXT: renamable $r5 = ORI killed renamable $r5, 39322 + ; 32BIT-NEXT: STW renamable $r5, 84, $r1 :: (store (s32) into stack + 84, basealign 16) ; 32BIT-NEXT: renamable $r6 = ORI killed renamable $r6, 52428 ; 32BIT-NEXT: STW killed renamable $r6, 88, $r1 :: (store (s32) into stack + 88, align 8, basealign 16) + ; 32BIT-NEXT: renamable $r6 = LIS 16313 + ; 32BIT-NEXT: STW killed renamable $r5, 100, $r1 :: (store (s32) into stack + 100, basealign 16) + ; 32BIT-NEXT: renamable $r5 = LIS 49807 + ; 32BIT-NEXT: renamable $r6 = ORI killed renamable $r6, 39321 + ; 32BIT-NEXT: STW killed renamable $r6, 96, $r1 :: (store (s32) into stack + 96, align 16) ; 32BIT-NEXT: renamable $r6 = LIS 16316 - ; 32BIT-NEXT: STW killed renamable $r3, 100, $r1 :: (store (s32) into stack + 100, basealign 16) - ; 32BIT-NEXT: renamable $r3 = LIS 60293 - ; 32BIT-NEXT: renamable $r4 = ORI killed renamable $r4, 39321 - ; 32BIT-NEXT: STW killed renamable $r4, 96, $r1 :: (store (s32) into stack + 96, align 16) - ; 32BIT-NEXT: renamable $r4 = LIS 16318 ; 32BIT-NEXT: renamable $r5 = ORI killed renamable $r5, 23593 ; 32BIT-NEXT: STW killed renamable $r5, 108, $r1 :: (store (s32) into stack + 108, basealign 16) - ; 32BIT-NEXT: renamable $r5 = LIS 2621 + ; 32BIT-NEXT: renamable $r5 = LIS 60293 ; 32BIT-NEXT: renamable $r6 = ORI killed renamable $r6, 10485 ; 32BIT-NEXT: STW killed renamable $r6, 104, $r1 :: (store (s32) into stack + 104, align 8, basealign 16) + ; 32BIT-NEXT: renamable $r6 = LIS 16318 + ; 32BIT-NEXT: renamable $r5 = ORI killed renamable $r5, 7864 + ; 32BIT-NEXT: STW killed renamable $r5, 116, $r1 :: (store (s32) into stack + 116, basealign 16) + ; 32BIT-NEXT: renamable $r5 = LIS 2621 + ; 32BIT-NEXT: renamable $r6 = ORI killed renamable $r6, 47185 + ; 32BIT-NEXT: STW killed renamable $r6, 112, $r1 :: (store (s32) into stack + 112, align 16) ; 32BIT-NEXT: renamable $r6 = LIS 16320 - ; 32BIT-NEXT: renamable $r3 = ORI killed renamable $r3, 7864 - ; 32BIT-NEXT: STW killed renamable $r3, 116, $r1 :: (store (s32) into stack + 116, basealign 16) - ; 32BIT-NEXT: renamable $r3 = LWZtoc %const.0, $r2 :: (load (s32) from got) - ; 32BIT-NEXT: renamable $r4 = ORI killed renamable $r4, 47185 - ; 32BIT-NEXT: STW killed renamable $r4, 112, $r1 :: (store (s32) into stack + 112, align 16) - ; 32BIT-NEXT: renamable $r4 = ORI killed renamable $r5, 28836 - ; 32BIT-NEXT: STW killed renamable $r4, 124, $r1 :: (store (s32) into stack + 124, basealign 16) - ; 32BIT-NEXT: renamable $r4 = ORI killed renamable $r6, 41943 - ; 32BIT-NEXT: STW killed renamable $r4, 120, $r1 :: (store (s32) into stack + 120, align 8, basealign 16) - ; 32BIT-NEXT: renamable $r4 = LWZtoc %const.1, $r2 :: (load (s32) from got) - ; 32BIT-NEXT: renamable $r5 = LWZ 4, %stack.0 :: (load (s32) from %stack.0 + 4) - ; 32BIT-NEXT: renamable $r6 = LWZtoc %const.2, $r2 :: (load (s32) from got) - ; 32BIT-NEXT: renamable $f2 = LFD 0, killed renamable $r3 :: (load (s64) from constant-pool) - ; 32BIT-NEXT: renamable $r3 = LWZtoc %const.3, $r2 :: (load (s32) from got) - ; 32BIT-NEXT: renamable $f3 = LFD 0, killed renamable $r4 :: (load (s64) from constant-pool) - ; 32BIT-NEXT: renamable $r4 = LWZtoc %const.4, $r2 :: (load (s32) from got) - ; 32BIT-NEXT: renamable $f4 = LFD 0, killed renamable $r6 :: (load (s64) from constant-pool) + ; 32BIT-NEXT: renamable $r5 = ORI killed renamable $r5, 28836 + ; 32BIT-NEXT: STW killed renamable $r5, 124, $r1 :: (store (s32) into stack + 124, basealign 16) + ; 32BIT-NEXT: renamable $r5 = LWZtoc %const.0, $r2 :: (load (s32) from got) + ; 32BIT-NEXT: renamable $r6 = ORI killed renamable $r6, 41943 + ; 32BIT-NEXT: STW killed renamable $r6, 120, $r1 :: (store (s32) into stack + 120, align 8, basealign 16) + ; 32BIT-NEXT: renamable $r6 = LWZtoc %const.1, $r2 :: (load (s32) from got) + ; 32BIT-NEXT: renamable $f2 = LFD 0, killed renamable $r5 :: (load (s64) from constant-pool) + ; 32BIT-NEXT: renamable $r5 = LWZtoc %const.2, $r2 :: (load (s32) from got) + ; 32BIT-NEXT: renamable $f3 = LFD 0, killed renamable $r6 :: (load (s64) from constant-pool) + ; 32BIT-NEXT: renamable $r6 = LWZtoc %const.3, $r2 :: (load (s32) from got) + ; 32BIT-NEXT: renamable $f4 = LFD 0, killed renamable $r5 :: (load (s64) from constant-pool) + ; 32BIT-NEXT: renamable $r5 = LWZtoc %const.4, $r2 :: (load (s32) from got) + ; 32BIT-NEXT: renamable $f6 = LFD 0, killed renamable $r6 :: (load (s64) from constant-pool) ; 32BIT-NEXT: renamable $r6 = LWZtoc %const.5, $r2 :: (load (s32) from got) - ; 32BIT-NEXT: renamable $f6 = LFD 0, killed renamable $r3 :: (load (s64) from constant-pool) - ; 32BIT-NEXT: renamable $r3 = LWZtoc %const.6, $r2 :: (load (s32) from got) - ; 32BIT-NEXT: renamable $f7 = LFD 0, killed renamable $r4 :: (load (s64) from constant-pool) - ; 32BIT-NEXT: renamable $r4 = LWZtoc %const.7, $r2 :: (load (s32) from got) + ; 32BIT-NEXT: renamable $f7 = LFD 0, killed renamable $r5 :: (load (s64) from constant-pool) + ; 32BIT-NEXT: renamable $r5 = LWZtoc %const.6, $r2 :: (load (s32) from got) ; 32BIT-NEXT: renamable $f8 = LFD 0, killed renamable $r6 :: (load (s64) from constant-pool) - ; 32BIT-NEXT: renamable $r6 = LWZtoc %const.8, $r2 :: (load (s32) from got) - ; 32BIT-NEXT: renamable $f9 = LFD 0, killed renamable $r3 :: (load (s64) from constant-pool) - ; 32BIT-NEXT: renamable $r3 = LWZtoc %const.9, $r2 :: (load (s32) from got) - ; 32BIT-NEXT: renamable $f1 = LFD 0, killed renamable $r4 :: (load (s64) from constant-pool) - ; 32BIT-NEXT: renamable $r4 = LWZtoc %const.10, $r2 :: (load (s32) from got) - ; 32BIT-NEXT: renamable $f11 = LFD 0, killed renamable $r6 :: (load (s64) from constant-pool) + ; 32BIT-NEXT: renamable $r6 = LWZtoc %const.7, $r2 :: (load (s32) from got) + ; 32BIT-NEXT: renamable $f9 = LFD 0, killed renamable $r5 :: (load (s64) from constant-pool) + ; 32BIT-NEXT: renamable $r5 = LWZtoc %const.8, $r2 :: (load (s32) from got) + ; 32BIT-NEXT: renamable $f1 = LFD 0, killed renamable $r6 :: (load (s64) from constant-pool) + ; 32BIT-NEXT: renamable $r6 = LWZtoc %const.9, $r2 :: (load (s32) from got) + ; 32BIT-NEXT: renamable $f11 = LFD 0, killed renamable $r5 :: (load (s64) from constant-pool) + ; 32BIT-NEXT: renamable $r5 = LWZtoc %const.10, $r2 :: (load (s32) from got) + ; 32BIT-NEXT: renamable $f12 = LFD 0, killed renamable $r6 :: (load (s64) from constant-pool) ; 32BIT-NEXT: renamable $r6 = LWZtoc %const.11, $r2 :: (load (s32) from got) - ; 32BIT-NEXT: renamable $f12 = LFD 0, killed renamable $r3 :: (load (s64) from constant-pool) - ; 32BIT-NEXT: renamable $f13 = LFD 0, killed renamable $r4 :: (load (s64) from constant-pool) + ; 32BIT-NEXT: renamable $f13 = LFD 0, killed renamable $r5 :: (load (s64) from constant-pool) ; 32BIT-NEXT: renamable $f5 = LFS 0, killed renamable $r6 :: (load (s32) from constant-pool) - ; 32BIT-NEXT: STW killed renamable $r5, 136, $r1 :: (store (s32) into stack + 136, align 8, basealign 16) - ; 32BIT-NEXT: renamable $r3 = LWZ 0, %stack.0 :: (load (s32) from %stack.0, align 8) + ; 32BIT-NEXT: STW killed renamable $r4, 140, $r1 :: (store (s32) into stack + 140, basealign 16) + ; 32BIT-NEXT: STFD killed renamable $f0, 132, $r1 :: (store (s64) into stack + 132, align 4, basealign 16) ; 32BIT-NEXT: $f10 = COPY renamable $f1 - ; 32BIT-NEXT: STW killed renamable $r3, 132, $r1 :: (store (s32) into stack + 132, basealign 16) + ; 32BIT-NEXT: STW killed renamable $r3, 128, $r1 :: (store (s32) into stack + 128, align 16) ; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $f1, implicit $f2, implicit $f3, implicit $f4, implicit $f5, implicit $f6, implicit $f7, implicit $f8, implicit $f9, implicit killed $f10, implicit $f11, implicit $f12, implicit $f13, implicit $r2, implicit-def $r1, implicit-def dead $f1 ; 32BIT-NEXT: ADJCALLSTACKUP 144, 0, implicit-def dead $r1, implicit $r1 ; 32BIT-NEXT: BLR implicit $lr, implicit $rm @@ -1647,7 +1635,7 @@ define i32 @mix_callee(double %d1, double %d2, double %d3, double %d4, i8 zeroex ; 32BIT-NEXT: renamable $f0 = nofpexcept FADD killed renamable $f0, killed renamable $f1, implicit $rm ; 32BIT-NEXT: renamable $f0 = nofpexcept FCTIWZ killed renamable $f0, implicit $rm ; 32BIT-NEXT: STFD killed renamable $f0, 0, %stack.0 :: (store (s64) into %stack.0) - ; 32BIT-NEXT: renamable $r3 = LWZ 4, %stack.0 :: (load (s32) from %stack.0 + 4, basealign 8) + ; 32BIT-NEXT: renamable $r3 = LWZ 4, %stack.0 :: (load (s32) from %stack.0 + 4) ; 32BIT-NEXT: BLR implicit $lr, implicit $rm, implicit $r3 ; ; 64BIT-LABEL: name: mix_callee @@ -1671,7 +1659,7 @@ define i32 @mix_callee(double %d1, double %d2, double %d3, double %d4, i8 zeroex ; 64BIT-NEXT: renamable $f0 = nofpexcept FADD killed renamable $f0, killed renamable $f1, implicit $rm ; 64BIT-NEXT: renamable $f0 = nofpexcept FCTIWZ killed renamable $f0, implicit $rm ; 64BIT-NEXT: STFD killed renamable $f0, 0, %stack.0 :: (store (s64) into %stack.0) - ; 64BIT-NEXT: renamable $x3 = LWZ8 4, %stack.0 :: (load (s32) from %stack.0 + 4, basealign 8) + ; 64BIT-NEXT: renamable $x3 = LWZ8 4, %stack.0 :: (load (s32) from %stack.0 + 4) ; 64BIT-NEXT: BLR8 implicit $lr8, implicit $rm, implicit $x3 entry: %add = fadd double %d1, %d2 @@ -1791,7 +1779,7 @@ define void @caller_mix() { ; 32BIT-NEXT: renamable $f0 = nofpexcept FADD killed renamable $f0, killed renamable $f30, implicit $rm ; 32BIT-NEXT: renamable $f0 = nofpexcept FCTIWZ killed renamable $f0, implicit $rm ; 32BIT-NEXT: STFD killed renamable $f0, 0, %stack.0 :: (store (s64) into %stack.0) - ; 32BIT-NEXT: renamable $r3 = LWZ 4, %stack.0 :: (load (s32) from %stack.0 + 4, basealign 8) + ; 32BIT-NEXT: renamable $r3 = LWZ 4, %stack.0 :: (load (s32) from %stack.0 + 4) ; 32BIT-NEXT: BLR implicit $lr, implicit $rm, implicit $r3 ; ; 64BIT-LABEL: name: mix_floats @@ -1826,7 +1814,7 @@ define void @caller_mix() { ; 64BIT-NEXT: renamable $f0 = nofpexcept FADD killed renamable $f1, killed renamable $f0, implicit $rm ; 64BIT-NEXT: renamable $f0 = nofpexcept FCTIWZ killed renamable $f0, implicit $rm ; 64BIT-NEXT: STFD killed renamable $f0, 0, %stack.0 :: (store (s64) into %stack.0) - ; 64BIT-NEXT: renamable $x3 = LWZ8 4, %stack.0 :: (load (s32) from %stack.0 + 4, basealign 8) + ; 64BIT-NEXT: renamable $x3 = LWZ8 4, %stack.0 :: (load (s32) from %stack.0 + 4) ; 64BIT-NEXT: BLR8 implicit $lr8, implicit $rm, implicit $x3 entry: %add = add nsw i32 %i1, %i2 diff --git a/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll b/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll index 03770d22d9f4f..5ed0dfb258f73 100644 --- a/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll +++ b/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll @@ -1012,22 +1012,18 @@ define void @call_test_stackarg_float() { ; ASM32PWR4-NEXT: lwz 3, L..C8(2) # @f ; ASM32PWR4-NEXT: stw 0, 88(1) ; ASM32PWR4-NEXT: li 4, 2 +; ASM32PWR4-NEXT: li 5, 3 ; ASM32PWR4-NEXT: li 6, 4 ; ASM32PWR4-NEXT: li 7, 5 -; ASM32PWR4-NEXT: li 8, 6 ; ASM32PWR4-NEXT: lfs 1, 0(3) ; ASM32PWR4-NEXT: lwz 3, L..C9(2) # @d +; ASM32PWR4-NEXT: li 8, 6 ; ASM32PWR4-NEXT: li 9, 7 -; ASM32PWR4-NEXT: li 10, 8 ; ASM32PWR4-NEXT: lfd 2, 0(3) ; ASM32PWR4-NEXT: li 3, 1 -; ASM32PWR4-NEXT: stfd 2, 72(1) -; ASM32PWR4-NEXT: lwz 5, 76(1) -; ASM32PWR4-NEXT: lwz 11, 72(1) -; ASM32PWR4-NEXT: stw 5, 64(1) -; ASM32PWR4-NEXT: li 5, 3 +; ASM32PWR4-NEXT: li 10, 8 +; ASM32PWR4-NEXT: stfd 2, 60(1) ; ASM32PWR4-NEXT: stfs 1, 56(1) -; ASM32PWR4-NEXT: stw 11, 60(1) ; ASM32PWR4-NEXT: bl .test_stackarg_float[PR] ; ASM32PWR4-NEXT: nop ; ASM32PWR4-NEXT: addi 1, 1, 80 @@ -1130,24 +1126,20 @@ define void @call_test_stackarg_float3() { ; ASM32PWR4-NEXT: stwu 1, -80(1) ; ASM32PWR4-NEXT: lwz 3, L..C9(2) # @d ; ASM32PWR4-NEXT: stw 0, 88(1) +; ASM32PWR4-NEXT: li 4, 2 ; ASM32PWR4-NEXT: li 5, 3 ; ASM32PWR4-NEXT: li 6, 4 ; ASM32PWR4-NEXT: li 7, 5 -; ASM32PWR4-NEXT: li 8, 6 ; ASM32PWR4-NEXT: lfd 1, 0(3) ; ASM32PWR4-NEXT: lwz 3, L..C8(2) # @f +; ASM32PWR4-NEXT: li 8, 6 ; ASM32PWR4-NEXT: li 9, 7 ; ASM32PWR4-NEXT: stfd 1, 72(1) +; ASM32PWR4-NEXT: lwz 10, 72(1) ; ASM32PWR4-NEXT: lfs 2, 0(3) ; ASM32PWR4-NEXT: li 3, 1 -; ASM32PWR4-NEXT: stfd 1, 64(1) -; ASM32PWR4-NEXT: lwz 4, 68(1) -; ASM32PWR4-NEXT: lwz 10, 72(1) -; ASM32PWR4-NEXT: lwz 11, 64(1) -; ASM32PWR4-NEXT: stw 4, 56(1) -; ASM32PWR4-NEXT: li 4, 2 ; ASM32PWR4-NEXT: stfs 2, 60(1) -; ASM32PWR4-NEXT: stw 11, 52(1) +; ASM32PWR4-NEXT: stfd 1, 52(1) ; ASM32PWR4-NEXT: bl .test_stackarg_float3[PR] ; ASM32PWR4-NEXT: nop ; ASM32PWR4-NEXT: addi 1, 1, 80 @@ -1570,99 +1562,95 @@ define void @caller_fpr_stack() { ; ASM32PWR4-LABEL: caller_fpr_stack: ; ASM32PWR4: # %bb.0: # %entry ; ASM32PWR4-NEXT: mflr 0 -; ASM32PWR4-NEXT: stwu 1, -160(1) +; ASM32PWR4-NEXT: stwu 1, -144(1) ; ASM32PWR4-NEXT: lwz 3, L..C19(2) # @d15 -; ASM32PWR4-NEXT: stw 0, 168(1) -; ASM32PWR4-NEXT: lwz 5, L..C20(2) # %const.1 -; ASM32PWR4-NEXT: lwz 4, L..C21(2) # @f14 +; ASM32PWR4-NEXT: lwz 4, L..C20(2) # @f14 +; ASM32PWR4-NEXT: lwz 5, L..C21(2) # @f16 +; ASM32PWR4-NEXT: stw 0, 152(1) +; ASM32PWR4-NEXT: lis 6, 16361 +; ASM32PWR4-NEXT: ori 6, 6, 39321 ; ASM32PWR4-NEXT: lfd 0, 0(3) -; ASM32PWR4-NEXT: lwz 3, L..C22(2) # @f16 -; ASM32PWR4-NEXT: lwz 3, 0(3) -; ASM32PWR4-NEXT: stw 3, 140(1) -; ASM32PWR4-NEXT: li 3, 0 -; ASM32PWR4-NEXT: stw 3, 60(1) -; ASM32PWR4-NEXT: lis 3, 16352 -; ASM32PWR4-NEXT: stw 3, 56(1) -; ASM32PWR4-NEXT: lis 3, 13107 -; ASM32PWR4-NEXT: ori 3, 3, 13107 -; ASM32PWR4-NEXT: stw 3, 68(1) -; ASM32PWR4-NEXT: lis 3, 16355 -; ASM32PWR4-NEXT: ori 3, 3, 13107 -; ASM32PWR4-NEXT: stw 3, 64(1) -; ASM32PWR4-NEXT: lis 3, 26214 -; ASM32PWR4-NEXT: ori 3, 3, 26214 -; ASM32PWR4-NEXT: stw 3, 76(1) -; ASM32PWR4-NEXT: lis 3, 16358 -; ASM32PWR4-NEXT: ori 3, 3, 26214 -; ASM32PWR4-NEXT: stw 3, 72(1) -; ASM32PWR4-NEXT: lis 3, -26215 -; ASM32PWR4-NEXT: ori 3, 3, 39322 -; ASM32PWR4-NEXT: stw 3, 84(1) -; ASM32PWR4-NEXT: stw 3, 100(1) -; ASM32PWR4-NEXT: lis 3, 16313 -; ASM32PWR4-NEXT: ori 3, 3, 39321 -; ASM32PWR4-NEXT: stw 3, 96(1) -; ASM32PWR4-NEXT: lis 3, -15729 -; ASM32PWR4-NEXT: ori 3, 3, 23593 -; ASM32PWR4-NEXT: stw 3, 108(1) -; ASM32PWR4-NEXT: lis 3, 16316 -; ASM32PWR4-NEXT: ori 3, 3, 10485 -; ASM32PWR4-NEXT: stw 3, 104(1) -; ASM32PWR4-NEXT: lis 3, -5243 -; ASM32PWR4-NEXT: ori 3, 3, 7864 -; ASM32PWR4-NEXT: stw 3, 116(1) -; ASM32PWR4-NEXT: lis 3, 16318 -; ASM32PWR4-NEXT: ori 3, 3, 47185 -; ASM32PWR4-NEXT: stw 3, 112(1) -; ASM32PWR4-NEXT: lis 3, 2621 -; ASM32PWR4-NEXT: ori 3, 3, 28836 -; ASM32PWR4-NEXT: stw 3, 124(1) -; ASM32PWR4-NEXT: lis 3, 16320 -; ASM32PWR4-NEXT: ori 3, 3, 41943 -; ASM32PWR4-NEXT: stw 3, 120(1) -; ASM32PWR4-NEXT: lwz 3, L..C23(2) # %const.0 -; ASM32PWR4-NEXT: lfd 2, 0(3) -; ASM32PWR4-NEXT: lwz 3, L..C24(2) # %const.2 +; ASM32PWR4-NEXT: lwz 3, 0(4) +; ASM32PWR4-NEXT: lwz 4, 0(5) +; ASM32PWR4-NEXT: li 5, 0 +; ASM32PWR4-NEXT: stw 5, 60(1) +; ASM32PWR4-NEXT: lis 5, 16352 +; ASM32PWR4-NEXT: stw 5, 56(1) +; ASM32PWR4-NEXT: lis 5, 13107 +; ASM32PWR4-NEXT: ori 5, 5, 13107 +; ASM32PWR4-NEXT: stw 5, 68(1) +; ASM32PWR4-NEXT: lis 5, 16355 +; ASM32PWR4-NEXT: ori 5, 5, 13107 +; ASM32PWR4-NEXT: stw 5, 64(1) +; ASM32PWR4-NEXT: lis 5, 26214 +; ASM32PWR4-NEXT: ori 5, 5, 26214 +; ASM32PWR4-NEXT: stw 5, 76(1) +; ASM32PWR4-NEXT: lis 5, 16358 +; ASM32PWR4-NEXT: ori 5, 5, 26214 +; ASM32PWR4-NEXT: stw 5, 72(1) +; ASM32PWR4-NEXT: lis 5, -26215 +; ASM32PWR4-NEXT: ori 5, 5, 39322 +; ASM32PWR4-NEXT: stw 5, 84(1) +; ASM32PWR4-NEXT: stw 5, 100(1) +; ASM32PWR4-NEXT: lis 5, 16313 +; ASM32PWR4-NEXT: ori 5, 5, 39321 +; ASM32PWR4-NEXT: stw 5, 96(1) +; ASM32PWR4-NEXT: lis 5, -15729 +; ASM32PWR4-NEXT: ori 5, 5, 23593 +; ASM32PWR4-NEXT: stw 5, 108(1) +; ASM32PWR4-NEXT: lis 5, 16316 +; ASM32PWR4-NEXT: ori 5, 5, 10485 +; ASM32PWR4-NEXT: stw 5, 104(1) +; ASM32PWR4-NEXT: lis 5, -5243 +; ASM32PWR4-NEXT: ori 5, 5, 7864 +; ASM32PWR4-NEXT: stw 5, 116(1) +; ASM32PWR4-NEXT: lis 5, 16318 +; ASM32PWR4-NEXT: ori 5, 5, 47185 +; ASM32PWR4-NEXT: stw 6, 80(1) +; ASM32PWR4-NEXT: lis 6, -13108 +; ASM32PWR4-NEXT: ori 6, 6, 52429 +; ASM32PWR4-NEXT: stw 5, 112(1) +; ASM32PWR4-NEXT: lis 5, 2621 +; ASM32PWR4-NEXT: ori 5, 5, 28836 +; ASM32PWR4-NEXT: stw 6, 92(1) +; ASM32PWR4-NEXT: lis 6, 16364 +; ASM32PWR4-NEXT: ori 6, 6, 52428 +; ASM32PWR4-NEXT: stw 5, 124(1) +; ASM32PWR4-NEXT: lis 5, 16320 +; ASM32PWR4-NEXT: ori 5, 5, 41943 +; ASM32PWR4-NEXT: stw 6, 88(1) +; ASM32PWR4-NEXT: lwz 6, L..C22(2) # %const.0 +; ASM32PWR4-NEXT: stw 5, 120(1) +; ASM32PWR4-NEXT: lwz 5, L..C23(2) # %const.1 +; ASM32PWR4-NEXT: lfd 2, 0(6) +; ASM32PWR4-NEXT: lwz 6, L..C24(2) # %const.2 ; ASM32PWR4-NEXT: lfd 3, 0(5) ; ASM32PWR4-NEXT: lwz 5, L..C25(2) # %const.3 -; ASM32PWR4-NEXT: lfd 4, 0(3) -; ASM32PWR4-NEXT: lwz 3, L..C26(2) # %const.4 +; ASM32PWR4-NEXT: lfd 4, 0(6) +; ASM32PWR4-NEXT: lwz 6, L..C26(2) # %const.4 ; ASM32PWR4-NEXT: lfd 6, 0(5) ; ASM32PWR4-NEXT: lwz 5, L..C27(2) # %const.5 -; ASM32PWR4-NEXT: lwz 4, 0(4) -; ASM32PWR4-NEXT: lfd 7, 0(3) -; ASM32PWR4-NEXT: lwz 3, L..C28(2) # %const.6 +; ASM32PWR4-NEXT: lfd 7, 0(6) +; ASM32PWR4-NEXT: lwz 6, L..C28(2) # %const.6 ; ASM32PWR4-NEXT: lfd 8, 0(5) ; ASM32PWR4-NEXT: lwz 5, L..C29(2) # %const.7 -; ASM32PWR4-NEXT: stw 4, 128(1) -; ASM32PWR4-NEXT: lis 4, 16361 -; ASM32PWR4-NEXT: ori 4, 4, 39321 -; ASM32PWR4-NEXT: lfd 9, 0(3) -; ASM32PWR4-NEXT: lwz 3, L..C30(2) # %const.8 +; ASM32PWR4-NEXT: lfd 9, 0(6) +; ASM32PWR4-NEXT: lwz 6, L..C30(2) # %const.8 ; ASM32PWR4-NEXT: lfd 1, 0(5) ; ASM32PWR4-NEXT: lwz 5, L..C31(2) # %const.9 -; ASM32PWR4-NEXT: stw 4, 80(1) -; ASM32PWR4-NEXT: lis 4, -13108 +; ASM32PWR4-NEXT: lfd 11, 0(6) +; ASM32PWR4-NEXT: lwz 6, L..C32(2) # %const.10 ; ASM32PWR4-NEXT: fmr 10, 1 -; ASM32PWR4-NEXT: ori 4, 4, 52429 -; ASM32PWR4-NEXT: lfd 11, 0(3) -; ASM32PWR4-NEXT: lwz 3, L..C32(2) # %const.10 ; ASM32PWR4-NEXT: lfd 12, 0(5) ; ASM32PWR4-NEXT: lwz 5, L..C33(2) # %const.11 -; ASM32PWR4-NEXT: stw 4, 92(1) -; ASM32PWR4-NEXT: lis 4, 16364 -; ASM32PWR4-NEXT: ori 4, 4, 52428 -; ASM32PWR4-NEXT: stfd 0, 152(1) -; ASM32PWR4-NEXT: stw 4, 88(1) -; ASM32PWR4-NEXT: lwz 4, 156(1) -; ASM32PWR4-NEXT: lfd 13, 0(3) +; ASM32PWR4-NEXT: lfd 13, 0(6) ; ASM32PWR4-NEXT: lfs 5, 0(5) -; ASM32PWR4-NEXT: lwz 3, 152(1) -; ASM32PWR4-NEXT: stw 4, 136(1) -; ASM32PWR4-NEXT: stw 3, 132(1) +; ASM32PWR4-NEXT: stfd 0, 132(1) +; ASM32PWR4-NEXT: stw 4, 140(1) +; ASM32PWR4-NEXT: stw 3, 128(1) ; ASM32PWR4-NEXT: bl .test_fpr_stack ; ASM32PWR4-NEXT: nop -; ASM32PWR4-NEXT: addi 1, 1, 160 +; ASM32PWR4-NEXT: addi 1, 1, 144 ; ASM32PWR4-NEXT: lwz 0, 8(1) ; ASM32PWR4-NEXT: mtlr 0 ; ASM32PWR4-NEXT: blr diff --git a/llvm/test/CodeGen/PowerPC/aix-emit-tracebacktable.ll b/llvm/test/CodeGen/PowerPC/aix-emit-tracebacktable.ll index 2827155dc1845..8c4f81b65144e 100644 --- a/llvm/test/CodeGen/PowerPC/aix-emit-tracebacktable.ll +++ b/llvm/test/CodeGen/PowerPC/aix-emit-tracebacktable.ll @@ -160,7 +160,7 @@ entry: ; CHECK-ASM-LABEL: .main:{{[[:space:]] *}}# %bb.0: ; CHECK-FUNC-LABEL: .csect .main[PR],5{{[[:space:]] *}}# %bb.0 ; COMMON-NEXT: mflr 0 -; COMMON: stw 0, 168(1) +; COMMON: stw 0, 152(1) ; COMMON: mtlr 0 ; COMMON-NEXT: blr ; COMMON-NEXT: L..main0: diff --git a/llvm/test/CodeGen/PowerPC/aix-xcoff-data.ll b/llvm/test/CodeGen/PowerPC/aix-xcoff-data.ll index 468303d8d9cbc..88db5cd1c8af0 100644 --- a/llvm/test/CodeGen/PowerPC/aix-xcoff-data.ll +++ b/llvm/test/CodeGen/PowerPC/aix-xcoff-data.ll @@ -29,8 +29,8 @@ %struct.anon = type <{ i32, double }> @astruct = global [1 x %struct.anon] [%struct.anon <{ i32 1, double 7.000000e+00 }>], align 1 -%struct.anon2 = type { double, i32 } -@bstruct = global [1 x %struct.anon2] [%struct.anon2 { double 7.000000e+00 , i32 1}], align 8 +%struct.anon2 = type { double, i32, [4 x i8] } +@bstruct = global [1 x %struct.anon2] [%struct.anon2 { double 7.000000e+00 , i32 1, [4 x i8] undef }], align 8 @a = common global i32 0, align 4 @b = common global i64 0, align 8 diff --git a/llvm/test/CodeGen/PowerPC/aix32-cc-abi-vaarg-mir.ll b/llvm/test/CodeGen/PowerPC/aix32-cc-abi-vaarg-mir.ll index 682c2b7afe34d..7218c814b30b8 100644 --- a/llvm/test/CodeGen/PowerPC/aix32-cc-abi-vaarg-mir.ll +++ b/llvm/test/CodeGen/PowerPC/aix32-cc-abi-vaarg-mir.ll @@ -114,24 +114,18 @@ define double @double_va_arg(double %a, ...) local_unnamed_addr { ; CHECK: bb.0.entry: ; CHECK-NEXT: liveins: $f1, $r5, $r6, $r7, $r8, $r9, $r10 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $r3 = ADDI %fixed-stack.0, 0 + ; CHECK-NEXT: STW killed renamable $r5, 0, %fixed-stack.0 :: (store (s32) into %fixed-stack.0, align 16) + ; CHECK-NEXT: STW killed renamable $r6, 4, %fixed-stack.0 :: (store (s32) into %fixed-stack.0 + 4) ; CHECK-NEXT: STW killed renamable $r7, 8, %fixed-stack.0 :: (store (s32) into %fixed-stack.0 + 8, align 8) - ; CHECK-NEXT: STW renamable $r5, 0, %fixed-stack.0 :: (store (s32) into %fixed-stack.0, align 16) - ; CHECK-NEXT: STW renamable $r6, 4, %fixed-stack.0 :: (store (s32) into %fixed-stack.0 + 4) ; CHECK-NEXT: STW killed renamable $r8, 12, %fixed-stack.0 :: (store (s32) into %fixed-stack.0 + 12) + ; CHECK-NEXT: renamable $f0 = LFD 0, %fixed-stack.0 :: (load (s64) from %ir.argp.cur2, align 16) ; CHECK-NEXT: STW killed renamable $r9, 16, %fixed-stack.0 :: (store (s32) into %fixed-stack.0 + 16, align 16) ; CHECK-NEXT: STW killed renamable $r10, 20, %fixed-stack.0 :: (store (s32) into %fixed-stack.0 + 20) - ; CHECK-NEXT: STW renamable $r3, 0, %stack.0.arg1 :: (store (s32) into %ir.arg1) - ; CHECK-NEXT: STW killed renamable $r3, 0, %stack.1.arg2 :: (store (s32) into %ir.arg2) - ; CHECK-NEXT: STW renamable $r5, 0, %stack.2 :: (store (s32) into %stack.2, align 8) - ; CHECK-NEXT: STW renamable $r6, 4, %stack.2 :: (store (s32) into %stack.2 + 4) - ; CHECK-NEXT: renamable $f0 = LFD 0, %stack.2 :: (load (s64) from %stack.2) - ; CHECK-NEXT: STW killed renamable $r5, 0, %stack.3 :: (store (s32) into %stack.3, align 8) - ; CHECK-NEXT: STW killed renamable $r6, 4, %stack.3 :: (store (s32) into %stack.3 + 4) - ; CHECK-NEXT: renamable $f2 = LFD 0, %stack.3 :: (load (s64) from %stack.3) - ; CHECK-NEXT: renamable $f0 = nofpexcept FADD killed renamable $f0, killed renamable $f1, implicit $rm - ; CHECK-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f2, renamable $f2, implicit $rm - ; CHECK-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f0, killed renamable $f1, implicit $rm + ; CHECK-NEXT: renamable $f1 = nofpexcept FADD renamable $f0, killed renamable $f1, implicit $rm + ; CHECK-NEXT: renamable $f0 = nofpexcept FADD killed renamable $f0, renamable $f0, implicit $rm + ; CHECK-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f1, killed renamable $f0, implicit $rm + ; CHECK-NEXT: renamable $r3 = ADDI %fixed-stack.0, 0 + ; CHECK-NEXT: STW killed renamable $r3, 0, %stack.0.arg1 :: (store (s32) into %ir.arg1) ; CHECK-NEXT: BLR implicit $lr, implicit $rm, implicit $f1 entry: %arg1 = alloca ptr, align 4 @@ -163,31 +157,24 @@ define double @double_stack_va_arg(double %one, double %two, double %three, doub ; CHECK: bb.0.entry: ; CHECK-NEXT: liveins: $f1, $f2, $f3, $f4, $f5, $f6, $f7, $f8, $f9, $f10, $f11, $f12, $f13 ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $f0 = LFD 0, %fixed-stack.0 :: (load (s64) from %ir.argp.cur142, align 16) + ; CHECK-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f1, killed renamable $f2, implicit $rm + ; CHECK-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f1, killed renamable $f3, implicit $rm + ; CHECK-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f1, killed renamable $f4, implicit $rm + ; CHECK-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f1, killed renamable $f5, implicit $rm + ; CHECK-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f1, killed renamable $f6, implicit $rm + ; CHECK-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f1, killed renamable $f7, implicit $rm + ; CHECK-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f1, killed renamable $f8, implicit $rm + ; CHECK-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f1, killed renamable $f9, implicit $rm + ; CHECK-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f1, killed renamable $f10, implicit $rm + ; CHECK-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f1, killed renamable $f11, implicit $rm + ; CHECK-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f1, killed renamable $f12, implicit $rm + ; CHECK-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f1, killed renamable $f13, implicit $rm + ; CHECK-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f1, renamable $f0, implicit $rm + ; CHECK-NEXT: renamable $f0 = nofpexcept FADD killed renamable $f0, renamable $f0, implicit $rm + ; CHECK-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f1, killed renamable $f0, implicit $rm ; CHECK-NEXT: renamable $r3 = ADDI %fixed-stack.0, 0 ; CHECK-NEXT: STW killed renamable $r3, 0, %stack.0.arg1 :: (store (s32) into %ir.arg1) - ; CHECK-NEXT: renamable $r3 = LWZ 0, %fixed-stack.0 :: (load (s32) from %ir.argp.cur142, align 16) - ; CHECK-NEXT: renamable $f0 = nofpexcept FADD killed renamable $f1, killed renamable $f2, implicit $rm - ; CHECK-NEXT: renamable $f0 = nofpexcept FADD killed renamable $f0, killed renamable $f3, implicit $rm - ; CHECK-NEXT: STW renamable $r3, 0, %stack.2 :: (store (s32) into %stack.2, align 8) - ; CHECK-NEXT: renamable $f0 = nofpexcept FADD killed renamable $f0, killed renamable $f4, implicit $rm - ; CHECK-NEXT: renamable $r4 = LWZ 4, %fixed-stack.0 :: (load (s32) from %ir.argp.cur142 + 4) - ; CHECK-NEXT: renamable $f0 = nofpexcept FADD killed renamable $f0, killed renamable $f5, implicit $rm - ; CHECK-NEXT: renamable $f0 = nofpexcept FADD killed renamable $f0, killed renamable $f6, implicit $rm - ; CHECK-NEXT: renamable $f0 = nofpexcept FADD killed renamable $f0, killed renamable $f7, implicit $rm - ; CHECK-NEXT: STW renamable $r4, 4, %stack.2 :: (store (s32) into %stack.2 + 4) - ; CHECK-NEXT: renamable $f0 = nofpexcept FADD killed renamable $f0, killed renamable $f8, implicit $rm - ; CHECK-NEXT: renamable $f1 = LFD 0, %stack.2 :: (load (s64) from %stack.2) - ; CHECK-NEXT: renamable $f0 = nofpexcept FADD killed renamable $f0, killed renamable $f9, implicit $rm - ; CHECK-NEXT: STW killed renamable $r3, 0, %stack.3 :: (store (s32) into %stack.3, align 8) - ; CHECK-NEXT: renamable $f0 = nofpexcept FADD killed renamable $f0, killed renamable $f10, implicit $rm - ; CHECK-NEXT: STW killed renamable $r4, 4, %stack.3 :: (store (s32) into %stack.3 + 4) - ; CHECK-NEXT: renamable $f0 = nofpexcept FADD killed renamable $f0, killed renamable $f11, implicit $rm - ; CHECK-NEXT: renamable $f2 = LFD 0, %stack.3 :: (load (s64) from %stack.3) - ; CHECK-NEXT: renamable $f0 = nofpexcept FADD killed renamable $f0, killed renamable $f12, implicit $rm - ; CHECK-NEXT: renamable $f0 = nofpexcept FADD killed renamable $f0, killed renamable $f13, implicit $rm - ; CHECK-NEXT: renamable $f0 = nofpexcept FADD killed renamable $f0, killed renamable $f1, implicit $rm - ; CHECK-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f2, renamable $f2, implicit $rm - ; CHECK-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f0, killed renamable $f1, implicit $rm ; CHECK-NEXT: BLR implicit $lr, implicit $rm, implicit $f1 entry: %arg1 = alloca ptr, align 4 diff --git a/llvm/test/CodeGen/PowerPC/aix32-cc-abi-vaarg.ll b/llvm/test/CodeGen/PowerPC/aix32-cc-abi-vaarg.ll index 9cf1e45607042..30727b8d4fe94 100644 --- a/llvm/test/CodeGen/PowerPC/aix32-cc-abi-vaarg.ll +++ b/llvm/test/CodeGen/PowerPC/aix32-cc-abi-vaarg.ll @@ -108,24 +108,18 @@ entry: define double @double_va_arg(double %a, ...) local_unnamed_addr { ; CHECK-LABEL: double_va_arg: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: stw 5, -16(1) -; CHECK-NEXT: addi 3, 1, 32 -; CHECK-NEXT: stw 6, -12(1) -; CHECK-NEXT: lfd 0, -16(1) -; CHECK-NEXT: stw 5, -24(1) -; CHECK-NEXT: fadd 0, 0, 1 -; CHECK-NEXT: stw 6, -20(1) -; CHECK-NEXT: lfd 1, -24(1) -; CHECK-NEXT: fadd 1, 1, 1 -; CHECK-NEXT: stw 7, 40(1) -; CHECK-NEXT: fadd 1, 0, 1 ; CHECK-NEXT: stw 5, 32(1) +; CHECK-NEXT: addi 3, 1, 32 ; CHECK-NEXT: stw 6, 36(1) +; CHECK-NEXT: lfd 0, 32(1) +; CHECK-NEXT: fadd 1, 0, 1 +; CHECK-NEXT: fadd 0, 0, 0 +; CHECK-NEXT: stw 7, 40(1) ; CHECK-NEXT: stw 8, 44(1) +; CHECK-NEXT: fadd 1, 1, 0 ; CHECK-NEXT: stw 9, 48(1) ; CHECK-NEXT: stw 10, 52(1) ; CHECK-NEXT: stw 3, -4(1) -; CHECK-NEXT: stw 3, -8(1) ; CHECK-NEXT: blr entry: %arg1 = alloca ptr, align 4 @@ -155,31 +149,24 @@ entry: define double @double_stack_va_arg(double %one, double %two, double %three, double %four, double %five, double %six, double %seven, double %eight, double %nine, double %ten, double %eleven, double %twelve, double %thirteen, ...) local_unnamed_addr { ; CHECK-LABEL: double_stack_va_arg: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fadd 0, 1, 2 +; CHECK-NEXT: fadd 1, 1, 2 +; CHECK-NEXT: lfd 0, 128(1) ; CHECK-NEXT: addi 3, 1, 128 -; CHECK-NEXT: lwz 4, 132(1) -; CHECK-NEXT: fadd 0, 0, 3 +; CHECK-NEXT: fadd 1, 1, 3 ; CHECK-NEXT: stw 3, -4(1) -; CHECK-NEXT: fadd 0, 0, 4 -; CHECK-NEXT: lwz 3, 128(1) -; CHECK-NEXT: fadd 0, 0, 5 -; CHECK-NEXT: stw 3, -16(1) -; CHECK-NEXT: fadd 0, 0, 6 -; CHECK-NEXT: stw 4, -12(1) -; CHECK-NEXT: fadd 0, 0, 7 -; CHECK-NEXT: lfd 1, -16(1) -; CHECK-NEXT: fadd 0, 0, 8 -; CHECK-NEXT: stw 3, -24(1) -; CHECK-NEXT: fadd 0, 0, 9 -; CHECK-NEXT: stw 4, -20(1) -; CHECK-NEXT: fadd 0, 0, 10 -; CHECK-NEXT: fadd 0, 0, 11 -; CHECK-NEXT: fadd 0, 0, 12 -; CHECK-NEXT: fadd 0, 0, 13 -; CHECK-NEXT: fadd 0, 0, 1 -; CHECK-NEXT: lfd 1, -24(1) -; CHECK-NEXT: fadd 1, 1, 1 -; CHECK-NEXT: fadd 1, 0, 1 +; CHECK-NEXT: fadd 1, 1, 4 +; CHECK-NEXT: fadd 1, 1, 5 +; CHECK-NEXT: fadd 1, 1, 6 +; CHECK-NEXT: fadd 1, 1, 7 +; CHECK-NEXT: fadd 1, 1, 8 +; CHECK-NEXT: fadd 1, 1, 9 +; CHECK-NEXT: fadd 1, 1, 10 +; CHECK-NEXT: fadd 1, 1, 11 +; CHECK-NEXT: fadd 1, 1, 12 +; CHECK-NEXT: fadd 1, 1, 13 +; CHECK-NEXT: fadd 1, 1, 0 +; CHECK-NEXT: fadd 0, 0, 0 +; CHECK-NEXT: fadd 1, 1, 0 ; CHECK-NEXT: blr entry: %arg1 = alloca ptr, align 4 diff --git a/llvm/test/CodeGen/PowerPC/aix64-cc-abi-vaarg-mir.ll b/llvm/test/CodeGen/PowerPC/aix64-cc-abi-vaarg-mir.ll index dc62e18378e72..af13552ed5949 100644 --- a/llvm/test/CodeGen/PowerPC/aix64-cc-abi-vaarg-mir.ll +++ b/llvm/test/CodeGen/PowerPC/aix64-cc-abi-vaarg-mir.ll @@ -113,10 +113,10 @@ define double @double_va_arg(double %a, ...) local_unnamed_addr { ; CHECK-NEXT: renamable $x5 = ADDI8 %fixed-stack.0, 8 ; CHECK-NEXT: STD killed renamable $x3, 0, %stack.0.arg1 :: (store (s64) into %ir.arg1) ; CHECK-NEXT: STD killed renamable $x5, 0, %stack.0.arg1 :: (store (s64) into %ir.arg1) - ; CHECK-NEXT: renamable $f0 = LFD 0, %fixed-stack.0 :: (load (s64)) + ; CHECK-NEXT: renamable $f0 = LFD 0, %fixed-stack.0 :: (load (s64) from %fixed-stack.0) ; CHECK-NEXT: renamable $x3 = ADDI8 renamable $x4, 8 ; CHECK-NEXT: STD killed renamable $x3, 0, %stack.1.arg2 :: (store (s64) into %ir.arg2) - ; CHECK-NEXT: renamable $f2 = LFD 0, killed renamable $x4 :: (load (s64)) + ; CHECK-NEXT: renamable $f2 = LFD 0, killed renamable $x4 :: (load (s64), align 4) ; CHECK-NEXT: renamable $f0 = nofpexcept FADD killed renamable $f0, killed renamable $f1, implicit $rm ; CHECK-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f2, renamable $f2, implicit $rm ; CHECK-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f0, killed renamable $f1, implicit $rm @@ -145,7 +145,7 @@ define double @double_stack_va_arg(double %one, double %two, double %three, doub ; CHECK: bb.0.entry: ; CHECK-NEXT: liveins: $f1, $f2, $f3, $f4, $f5, $f6, $f7, $f8, $f9, $f10, $f11, $f12, $f13 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $f0 = LFD 0, %fixed-stack.0 :: (load (s64)) + ; CHECK-NEXT: renamable $f0 = LFD 0, %fixed-stack.0 :: (load (s64) from %fixed-stack.0) ; CHECK-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f1, killed renamable $f2, implicit $rm ; CHECK-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f1, killed renamable $f3, implicit $rm ; CHECK-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f1, killed renamable $f4, implicit $rm diff --git a/llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll b/llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll index cd59aa03597e2..1e1110f0a30b8 100644 --- a/llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll @@ -638,6 +638,60 @@ define void @test_psslai_h(ptr %ret_ptr, ptr %a_ptr) { ret void } +; Test logical shift right immediate +define void @test_psrli_h(ptr %ret_ptr, ptr %a_ptr) { +; CHECK-LABEL: test_psrli_h: +; CHECK: # %bb.0: +; CHECK-NEXT: lw a1, 0(a1) +; CHECK-NEXT: psrli.h a1, a1, 2 +; CHECK-NEXT: sw a1, 0(a0) +; CHECK-NEXT: ret + %a = load <2 x i16>, ptr %a_ptr + %res = lshr <2 x i16> %a, splat(i16 2) + store <2 x i16> %res, ptr %ret_ptr + ret void +} + +define void @test_psrli_b(ptr %ret_ptr, ptr %a_ptr) { +; CHECK-LABEL: test_psrli_b: +; CHECK: # %bb.0: +; CHECK-NEXT: lw a1, 0(a1) +; CHECK-NEXT: psrli.b a1, a1, 2 +; CHECK-NEXT: sw a1, 0(a0) +; CHECK-NEXT: ret + %a = load <4 x i8>, ptr %a_ptr + %res = lshr <4 x i8> %a, splat(i8 2) + store <4 x i8> %res, ptr %ret_ptr + ret void +} + +; Test arithmetic shift right immediate +define void @test_psrai_h(ptr %ret_ptr, ptr %a_ptr) { +; CHECK-LABEL: test_psrai_h: +; CHECK: # %bb.0: +; CHECK-NEXT: lw a1, 0(a1) +; CHECK-NEXT: psrai.h a1, a1, 2 +; CHECK-NEXT: sw a1, 0(a0) +; CHECK-NEXT: ret + %a = load <2 x i16>, ptr %a_ptr + %res = ashr <2 x i16> %a, splat(i16 2) + store <2 x i16> %res, ptr %ret_ptr + ret void +} + +define void @test_psrai_b(ptr %ret_ptr, ptr %a_ptr) { +; CHECK-LABEL: test_psrai_b: +; CHECK: # %bb.0: +; CHECK-NEXT: lw a1, 0(a1) +; CHECK-NEXT: psrai.b a1, a1, 2 +; CHECK-NEXT: sw a1, 0(a0) +; CHECK-NEXT: ret + %a = load <4 x i8>, ptr %a_ptr + %res = ashr <4 x i8> %a, splat(i8 2) + store <4 x i8> %res, ptr %ret_ptr + ret void +} + ; Test logical shift left(scalar shamt) define void @test_psll_hs(ptr %ret_ptr, ptr %a_ptr, i16 %shamt) { ; CHECK-LABEL: test_psll_hs: @@ -746,3 +800,243 @@ define void @test_psll_bs_vec_shamt(ptr %ret_ptr, ptr %a_ptr, ptr %shamt_ptr) { store <4 x i8> %res, ptr %ret_ptr ret void } + +; Test logical shift right(scalar shamt) +define void @test_psrl_hs(ptr %ret_ptr, ptr %a_ptr, i16 %shamt) { +; CHECK-LABEL: test_psrl_hs: +; CHECK: # %bb.0: +; CHECK-NEXT: lw a1, 0(a1) +; CHECK-NEXT: psrl.hs a1, a1, a2 +; CHECK-NEXT: sw a1, 0(a0) +; CHECK-NEXT: ret + %a = load <2 x i16>, ptr %a_ptr + %insert = insertelement <2 x i16> poison, i16 %shamt, i32 0 + %b = shufflevector <2 x i16> %insert, <2 x i16> poison, <2 x i32> zeroinitializer + %res = lshr <2 x i16> %a, %b + store <2 x i16> %res, ptr %ret_ptr + ret void +} + +define void @test_psrl_bs(ptr %ret_ptr, ptr %a_ptr, i8 %shamt) { +; CHECK-LABEL: test_psrl_bs: +; CHECK: # %bb.0: +; CHECK-NEXT: lw a1, 0(a1) +; CHECK-NEXT: psrl.bs a1, a1, a2 +; CHECK-NEXT: sw a1, 0(a0) +; CHECK-NEXT: ret + %a = load <4 x i8>, ptr %a_ptr + %insert = insertelement <4 x i8> poison, i8 %shamt, i32 0 + %b = shufflevector <4 x i8> %insert, <4 x i8> poison, <4 x i32> zeroinitializer + %res = lshr <4 x i8> %a, %b + store <4 x i8> %res, ptr %ret_ptr + ret void +} + +; Test arithmetic shift right(scalar shamt) +define void @test_psra_hs(ptr %ret_ptr, ptr %a_ptr, i16 %shamt) { +; CHECK-LABEL: test_psra_hs: +; CHECK: # %bb.0: +; CHECK-NEXT: lw a1, 0(a1) +; CHECK-NEXT: psra.hs a1, a1, a2 +; CHECK-NEXT: sw a1, 0(a0) +; CHECK-NEXT: ret + %a = load <2 x i16>, ptr %a_ptr + %insert = insertelement <2 x i16> poison, i16 %shamt, i32 0 + %b = shufflevector <2 x i16> %insert, <2 x i16> poison, <2 x i32> zeroinitializer + %res = ashr <2 x i16> %a, %b + store <2 x i16> %res, ptr %ret_ptr + ret void +} + +define void @test_psra_bs(ptr %ret_ptr, ptr %a_ptr, i8 %shamt) { +; CHECK-LABEL: test_psra_bs: +; CHECK: # %bb.0: +; CHECK-NEXT: lw a1, 0(a1) +; CHECK-NEXT: psra.bs a1, a1, a2 +; CHECK-NEXT: sw a1, 0(a0) +; CHECK-NEXT: ret + %a = load <4 x i8>, ptr %a_ptr + %insert = insertelement <4 x i8> poison, i8 %shamt, i32 0 + %b = shufflevector <4 x i8> %insert, <4 x i8> poison, <4 x i32> zeroinitializer + %res = ashr <4 x i8> %a, %b + store <4 x i8> %res, ptr %ret_ptr + ret void +} + +; Test logical shift right(vector shamt) +define void @test_psrl_hs_vec_shamt(ptr %ret_ptr, ptr %a_ptr, ptr %shamt_ptr) { +; CHECK-RV32-LABEL: test_psrl_hs_vec_shamt: +; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: lw a2, 0(a2) +; CHECK-RV32-NEXT: lw a1, 0(a1) +; CHECK-RV32-NEXT: srli a3, a2, 16 +; CHECK-RV32-NEXT: srli a4, a1, 16 +; CHECK-RV32-NEXT: slli a1, a1, 16 +; CHECK-RV32-NEXT: srl a3, a4, a3 +; CHECK-RV32-NEXT: srli a1, a1, 16 +; CHECK-RV32-NEXT: srl a1, a1, a2 +; CHECK-RV32-NEXT: pack a1, a1, a3 +; CHECK-RV32-NEXT: sw a1, 0(a0) +; CHECK-RV32-NEXT: ret +; +; CHECK-RV64-LABEL: test_psrl_hs_vec_shamt: +; CHECK-RV64: # %bb.0: +; CHECK-RV64-NEXT: lw a2, 0(a2) +; CHECK-RV64-NEXT: lw a1, 0(a1) +; CHECK-RV64-NEXT: srli a3, a2, 16 +; CHECK-RV64-NEXT: srliw a4, a1, 16 +; CHECK-RV64-NEXT: slli a1, a1, 48 +; CHECK-RV64-NEXT: srl a3, a4, a3 +; CHECK-RV64-NEXT: srli a1, a1, 48 +; CHECK-RV64-NEXT: srl a1, a1, a2 +; CHECK-RV64-NEXT: ppaire.h a1, a1, a3 +; CHECK-RV64-NEXT: sw a1, 0(a0) +; CHECK-RV64-NEXT: ret + %a = load <2 x i16>, ptr %a_ptr + %b = load <2 x i16>, ptr %shamt_ptr + %res = lshr <2 x i16> %a, %b + store <2 x i16> %res, ptr %ret_ptr + ret void +} + +define void @test_psrl_bs_vec_shamt(ptr %ret_ptr, ptr %a_ptr, ptr %shamt_ptr) { +; CHECK-RV32-LABEL: test_psrl_bs_vec_shamt: +; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: lw a2, 0(a2) +; CHECK-RV32-NEXT: lw a1, 0(a1) +; CHECK-RV32-NEXT: srli a3, a2, 24 +; CHECK-RV32-NEXT: srli a4, a1, 24 +; CHECK-RV32-NEXT: srli a5, a2, 8 +; CHECK-RV32-NEXT: slli a6, a1, 16 +; CHECK-RV32-NEXT: srl a7, a4, a3 +; CHECK-RV32-NEXT: srli a3, a6, 24 +; CHECK-RV32-NEXT: srl a6, a3, a5 +; CHECK-RV32-NEXT: zext.b a3, a1 +; CHECK-RV32-NEXT: srli a4, a2, 16 +; CHECK-RV32-NEXT: slli a1, a1, 8 +; CHECK-RV32-NEXT: srl a2, a3, a2 +; CHECK-RV32-NEXT: srli a1, a1, 24 +; CHECK-RV32-NEXT: srl a3, a1, a4 +; CHECK-RV32-NEXT: ppaire.db a2, a2, a6 +; CHECK-RV32-NEXT: pack a1, a2, a3 +; CHECK-RV32-NEXT: sw a1, 0(a0) +; CHECK-RV32-NEXT: ret +; +; CHECK-RV64-LABEL: test_psrl_bs_vec_shamt: +; CHECK-RV64: # %bb.0: +; CHECK-RV64-NEXT: lw a2, 0(a2) +; CHECK-RV64-NEXT: lw a1, 0(a1) +; CHECK-RV64-NEXT: srli a3, a2, 24 +; CHECK-RV64-NEXT: srliw a4, a1, 24 +; CHECK-RV64-NEXT: srli a5, a2, 16 +; CHECK-RV64-NEXT: srl a3, a4, a3 +; CHECK-RV64-NEXT: slli a4, a1, 40 +; CHECK-RV64-NEXT: srli a4, a4, 56 +; CHECK-RV64-NEXT: srl a4, a4, a5 +; CHECK-RV64-NEXT: zext.b a5, a1 +; CHECK-RV64-NEXT: srl a5, a5, a2 +; CHECK-RV64-NEXT: srli a2, a2, 8 +; CHECK-RV64-NEXT: slli a1, a1, 48 +; CHECK-RV64-NEXT: srli a1, a1, 56 +; CHECK-RV64-NEXT: srl a1, a1, a2 +; CHECK-RV64-NEXT: ppaire.b a2, a4, a3 +; CHECK-RV64-NEXT: ppaire.b a1, a5, a1 +; CHECK-RV64-NEXT: ppaire.h a1, a1, a2 +; CHECK-RV64-NEXT: sw a1, 0(a0) +; CHECK-RV64-NEXT: ret + %a = load <4 x i8>, ptr %a_ptr + %b = load <4 x i8>, ptr %shamt_ptr + %res = lshr <4 x i8> %a, %b + store <4 x i8> %res, ptr %ret_ptr + ret void +} + +; Test arithmetic shift right(vector shamt) +define void @test_psra_hs_vec_shamt(ptr %ret_ptr, ptr %a_ptr, ptr %shamt_ptr) { +; CHECK-RV32-LABEL: test_psra_hs_vec_shamt: +; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: lw a2, 0(a2) +; CHECK-RV32-NEXT: lw a1, 0(a1) +; CHECK-RV32-NEXT: srli a3, a2, 16 +; CHECK-RV32-NEXT: srai a4, a1, 16 +; CHECK-RV32-NEXT: slli a1, a1, 16 +; CHECK-RV32-NEXT: sra a3, a4, a3 +; CHECK-RV32-NEXT: srai a1, a1, 16 +; CHECK-RV32-NEXT: sra a1, a1, a2 +; CHECK-RV32-NEXT: pack a1, a1, a3 +; CHECK-RV32-NEXT: sw a1, 0(a0) +; CHECK-RV32-NEXT: ret +; +; CHECK-RV64-LABEL: test_psra_hs_vec_shamt: +; CHECK-RV64: # %bb.0: +; CHECK-RV64-NEXT: lw a2, 0(a2) +; CHECK-RV64-NEXT: lw a1, 0(a1) +; CHECK-RV64-NEXT: srli a3, a2, 16 +; CHECK-RV64-NEXT: sraiw a4, a1, 16 +; CHECK-RV64-NEXT: slli a1, a1, 48 +; CHECK-RV64-NEXT: sra a3, a4, a3 +; CHECK-RV64-NEXT: srai a1, a1, 48 +; CHECK-RV64-NEXT: sra a1, a1, a2 +; CHECK-RV64-NEXT: ppaire.h a1, a1, a3 +; CHECK-RV64-NEXT: sw a1, 0(a0) +; CHECK-RV64-NEXT: ret + %a = load <2 x i16>, ptr %a_ptr + %b = load <2 x i16>, ptr %shamt_ptr + %res = ashr <2 x i16> %a, %b + store <2 x i16> %res, ptr %ret_ptr + ret void +} + +define void @test_psra_bs_vec_shamt(ptr %ret_ptr, ptr %a_ptr, ptr %shamt_ptr) { +; CHECK-RV32-LABEL: test_psra_bs_vec_shamt: +; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: lw a2, 0(a2) +; CHECK-RV32-NEXT: lw a1, 0(a1) +; CHECK-RV32-NEXT: srli a3, a2, 24 +; CHECK-RV32-NEXT: srai a4, a1, 24 +; CHECK-RV32-NEXT: srli a5, a2, 8 +; CHECK-RV32-NEXT: slli a6, a1, 16 +; CHECK-RV32-NEXT: sra a7, a4, a3 +; CHECK-RV32-NEXT: srai a3, a6, 24 +; CHECK-RV32-NEXT: sra a6, a3, a5 +; CHECK-RV32-NEXT: srli a3, a2, 16 +; CHECK-RV32-NEXT: slli a4, a1, 8 +; CHECK-RV32-NEXT: slli a1, a1, 24 +; CHECK-RV32-NEXT: srai a4, a4, 24 +; CHECK-RV32-NEXT: sra a3, a4, a3 +; CHECK-RV32-NEXT: srai a1, a1, 24 +; CHECK-RV32-NEXT: sra a2, a1, a2 +; CHECK-RV32-NEXT: ppaire.db a2, a2, a6 +; CHECK-RV32-NEXT: pack a1, a2, a3 +; CHECK-RV32-NEXT: sw a1, 0(a0) +; CHECK-RV32-NEXT: ret +; +; CHECK-RV64-LABEL: test_psra_bs_vec_shamt: +; CHECK-RV64: # %bb.0: +; CHECK-RV64-NEXT: lw a2, 0(a2) +; CHECK-RV64-NEXT: lw a1, 0(a1) +; CHECK-RV64-NEXT: srli a3, a2, 24 +; CHECK-RV64-NEXT: sraiw a4, a1, 24 +; CHECK-RV64-NEXT: srli a5, a2, 16 +; CHECK-RV64-NEXT: slli a6, a1, 40 +; CHECK-RV64-NEXT: sra a3, a4, a3 +; CHECK-RV64-NEXT: srli a4, a2, 8 +; CHECK-RV64-NEXT: srai a6, a6, 56 +; CHECK-RV64-NEXT: sra a5, a6, a5 +; CHECK-RV64-NEXT: slli a6, a1, 48 +; CHECK-RV64-NEXT: srai a6, a6, 56 +; CHECK-RV64-NEXT: sra a4, a6, a4 +; CHECK-RV64-NEXT: slli a1, a1, 56 +; CHECK-RV64-NEXT: srai a1, a1, 56 +; CHECK-RV64-NEXT: sra a1, a1, a2 +; CHECK-RV64-NEXT: ppaire.b a2, a5, a3 +; CHECK-RV64-NEXT: ppaire.b a1, a1, a4 +; CHECK-RV64-NEXT: ppaire.h a1, a1, a2 +; CHECK-RV64-NEXT: sw a1, 0(a0) +; CHECK-RV64-NEXT: ret + %a = load <4 x i8>, ptr %a_ptr + %b = load <4 x i8>, ptr %shamt_ptr + %res = ashr <4 x i8> %a, %b + store <4 x i8> %res, ptr %ret_ptr + ret void +} diff --git a/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll b/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll index c7fb891cdd996..3e0f431d67f41 100644 --- a/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll @@ -791,6 +791,86 @@ define void @test_pslli_w(ptr %ret_ptr, ptr %a_ptr) { store <2 x i32> %res, ptr %ret_ptr ret void } +; Test logical shift right immediate +define void @test_psrli_w(ptr %ret_ptr, ptr %a_ptr) { +; CHECK-LABEL: test_psrli_w: +; CHECK: # %bb.0: +; CHECK-NEXT: ld a1, 0(a1) +; CHECK-NEXT: psrli.w a1, a1, 2 +; CHECK-NEXT: sd a1, 0(a0) +; CHECK-NEXT: ret + %a = load <2 x i32>, ptr %a_ptr + %res = lshr <2 x i32> %a, splat(i32 2) + store <2 x i32> %res, ptr %ret_ptr + ret void +} + +define void @test_psrli_h(ptr %ret_ptr, ptr %a_ptr) { +; CHECK-LABEL: test_psrli_h: +; CHECK: # %bb.0: +; CHECK-NEXT: ld a1, 0(a1) +; CHECK-NEXT: psrli.h a1, a1, 2 +; CHECK-NEXT: sd a1, 0(a0) +; CHECK-NEXT: ret + %a = load <4 x i16>, ptr %a_ptr + %res = lshr <4 x i16> %a, splat(i16 2) + store <4 x i16> %res, ptr %ret_ptr + ret void +} + +define void @test_psrli_b(ptr %ret_ptr, ptr %a_ptr) { +; CHECK-LABEL: test_psrli_b: +; CHECK: # %bb.0: +; CHECK-NEXT: ld a1, 0(a1) +; CHECK-NEXT: psrli.b a1, a1, 2 +; CHECK-NEXT: sd a1, 0(a0) +; CHECK-NEXT: ret + %a = load <8 x i8>, ptr %a_ptr + %res = lshr <8 x i8> %a, splat(i8 2) + store <8 x i8> %res, ptr %ret_ptr + ret void +} + +; Test arithmetic shift right immediate +define void @test_psrai_w(ptr %ret_ptr, ptr %a_ptr) { +; CHECK-LABEL: test_psrai_w: +; CHECK: # %bb.0: +; CHECK-NEXT: ld a1, 0(a1) +; CHECK-NEXT: psrai.w a1, a1, 2 +; CHECK-NEXT: sd a1, 0(a0) +; CHECK-NEXT: ret + %a = load <2 x i32>, ptr %a_ptr + %res = ashr <2 x i32> %a, splat(i32 2) + store <2 x i32> %res, ptr %ret_ptr + ret void +} + +define void @test_psrai_h(ptr %ret_ptr, ptr %a_ptr) { +; CHECK-LABEL: test_psrai_h: +; CHECK: # %bb.0: +; CHECK-NEXT: ld a1, 0(a1) +; CHECK-NEXT: psrai.h a1, a1, 2 +; CHECK-NEXT: sd a1, 0(a0) +; CHECK-NEXT: ret + %a = load <4 x i16>, ptr %a_ptr + %res = ashr <4 x i16> %a, splat(i16 2) + store <4 x i16> %res, ptr %ret_ptr + ret void +} + +define void @test_psrai_b(ptr %ret_ptr, ptr %a_ptr) { +; CHECK-LABEL: test_psrai_b: +; CHECK: # %bb.0: +; CHECK-NEXT: ld a1, 0(a1) +; CHECK-NEXT: psrai.b a1, a1, 2 +; CHECK-NEXT: sd a1, 0(a0) +; CHECK-NEXT: ret + %a = load <8 x i8>, ptr %a_ptr + %res = ashr <8 x i8> %a, splat(i8 2) + store <8 x i8> %res, ptr %ret_ptr + ret void +} + ; Test arithmetic saturation shift left immediate for v2i32 define void @test_psslai_w(ptr %ret_ptr, ptr %a_ptr) { @@ -841,3 +921,75 @@ define void @test_psll_ws_vec_shamt(ptr %ret_ptr, ptr %a_ptr, ptr %shamt_ptr) { store <2 x i32> %res, ptr %ret_ptr ret void } + +; Test logical shift right(scalar shamt) +define void @test_psrl_ws(ptr %ret_ptr, ptr %a_ptr, i32 %shamt) { +; CHECK-LABEL: test_psrl_ws: +; CHECK: # %bb.0: +; CHECK-NEXT: ld a1, 0(a1) +; CHECK-NEXT: psrl.ws a1, a1, a2 +; CHECK-NEXT: sd a1, 0(a0) +; CHECK-NEXT: ret + %a = load <2 x i32>, ptr %a_ptr + %insert = insertelement <2 x i32> poison, i32 %shamt, i32 0 + %b = shufflevector <2 x i32> %insert, <2 x i32> poison, <2 x i32> zeroinitializer + %res = lshr <2 x i32> %a, %b + store <2 x i32> %res, ptr %ret_ptr + ret void +} + +; Test arithmetic shift right(scalar shamt) +define void @test_psra_ws(ptr %ret_ptr, ptr %a_ptr, i32 %shamt) { +; CHECK-LABEL: test_psra_ws: +; CHECK: # %bb.0: +; CHECK-NEXT: ld a1, 0(a1) +; CHECK-NEXT: psra.ws a1, a1, a2 +; CHECK-NEXT: sd a1, 0(a0) +; CHECK-NEXT: ret + %a = load <2 x i32>, ptr %a_ptr + %insert = insertelement <2 x i32> poison, i32 %shamt, i32 0 + %b = shufflevector <2 x i32> %insert, <2 x i32> poison, <2 x i32> zeroinitializer + %res = ashr <2 x i32> %a, %b + store <2 x i32> %res, ptr %ret_ptr + ret void +} + +; Test logical shift right(vector shamt) +define void @test_psrl_ws_vec_shamt(ptr %ret_ptr, ptr %a_ptr, ptr %shamt_ptr) { +; CHECK-LABEL: test_psrl_ws_vec_shamt: +; CHECK: # %bb.0: +; CHECK-NEXT: ld a1, 0(a1) +; CHECK-NEXT: ld a2, 0(a2) +; CHECK-NEXT: srlw a3, a1, a2 +; CHECK-NEXT: srli a2, a2, 32 +; CHECK-NEXT: srli a1, a1, 32 +; CHECK-NEXT: srlw a1, a1, a2 +; CHECK-NEXT: pack a1, a3, a1 +; CHECK-NEXT: sd a1, 0(a0) +; CHECK-NEXT: ret + %a = load <2 x i32>, ptr %a_ptr + %b = load <2 x i32>, ptr %shamt_ptr + %res = lshr <2 x i32> %a, %b + store <2 x i32> %res, ptr %ret_ptr + ret void +} + +; Test arithmetic shift right(vector shamt) +define void @test_psra_ws_vec_shamt(ptr %ret_ptr, ptr %a_ptr, ptr %shamt_ptr) { +; CHECK-LABEL: test_psra_ws_vec_shamt: +; CHECK: # %bb.0: +; CHECK-NEXT: ld a1, 0(a1) +; CHECK-NEXT: ld a2, 0(a2) +; CHECK-NEXT: sraw a3, a1, a2 +; CHECK-NEXT: srli a2, a2, 32 +; CHECK-NEXT: srli a1, a1, 32 +; CHECK-NEXT: sraw a1, a1, a2 +; CHECK-NEXT: pack a1, a3, a1 +; CHECK-NEXT: sd a1, 0(a0) +; CHECK-NEXT: ret + %a = load <2 x i32>, ptr %a_ptr + %b = load <2 x i32>, ptr %shamt_ptr + %res = ashr <2 x i32> %a, %b + store <2 x i32> %res, ptr %ret_ptr + ret void +} diff --git a/llvm/test/CodeGen/RISCV/xqcilsm-lwmi-swmi.mir b/llvm/test/CodeGen/RISCV/xqcilsm-lwmi-swmi.mir new file mode 100644 index 0000000000000..396f67326a7ca --- /dev/null +++ b/llvm/test/CodeGen/RISCV/xqcilsm-lwmi-swmi.mir @@ -0,0 +1,315 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6 +# RUN: llc -mtriple=riscv32 -mattr=+experimental-xqcilsm -run-pass=riscv-load-store-opt %s -o - | FileCheck %s + +--- | + + define void @pair_two_lw_into_qc_lwmi() nounwind { ret void } + define void @pair_two_lw_into_qc_lwmi_reversed() nounwind { ret void } + define void @pair_two_sw_into_qc_swmi_reversed() nounwind { ret void } + define void @no_pair_if_different_base_regs() nounwind { ret void } + define void @no_pair_if_alignment_lt_4() nounwind { ret void } + define void @pair_two_sw_into_qc_swmi() nounwind { ret void } + define void @no_pair_if_misaligned() nounwind { ret void } + define void @pair_at_upper_boundary_lw() nounwind { ret void } + define void @pair_at_upper_boundary_sw() nounwind { ret void } + define void @no_pair_if_offset_out_of_range_lw() nounwind { ret void } + define void @no_pair_if_offset_out_of_range_sw() nounwind { ret void } + define void @no_pair_if_non_consecutive_regs() nounwind { ret void } + define void @no_pair_if_rd_is_x0() nounwind { ret void } + define void @no_pair_if_lw_rd_equals_base() nounwind { ret void } + define void @pair_if_not_adjacent() nounwind { ret void } + define void @pair_if_not_adjacent_use() nounwind { ret void } + define void @no_pair_if_not_adjacent_use() nounwind { ret void } +--- +name: pair_two_lw_into_qc_lwmi +tracksRegLiveness: false +body: | + bb.0: + liveins: $x10 + ; CHECK-LABEL: name: pair_two_lw_into_qc_lwmi + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $x12 = QC_LWMI $x10, 2, 0, implicit-def $x13 :: (load (s32)) + ; CHECK-NEXT: PseudoRET + $x12 = LW $x10, 0 :: (load (s32), align 4) + $x13 = LW $x10, 4 :: (load (s32), align 4) + PseudoRET + +... +--- +# FIXME: Kill flags are not propagated correctly for the base register +name: pair_two_lw_into_qc_lwmi_reversed +tracksRegLiveness: false +body: | + bb.0: + liveins: $x10 + ; CHECK-LABEL: name: pair_two_lw_into_qc_lwmi_reversed + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $x12 = QC_LWMI $x10, 2, 0, implicit-def $x13 :: (load (s32)) + ; CHECK-NEXT: PseudoRET + $x13 = LW $x10, 4 :: (load (s32)) + $x12 = LW killed $x10, 0 :: (load (s32)) + PseudoRET + +... +--- +name: pair_two_sw_into_qc_swmi_reversed +tracksRegLiveness: false +body: | + bb.0: + liveins: $x10, $x12, $x13 + ; CHECK-LABEL: name: pair_two_sw_into_qc_swmi_reversed + ; CHECK: liveins: $x10, $x12, $x13 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: QC_SWMI killed $x12, $x10, 2, 0, implicit killed $x13 :: (store (s32)) + ; CHECK-NEXT: PseudoRET + SW killed $x13, $x10, 4 :: (store (s32)) + SW killed $x12, $x10, 0 :: (store (s32)) + PseudoRET + +... +--- +name: no_pair_if_different_base_regs +tracksRegLiveness: false +body: | + bb.0: + liveins: $x10, $x11 + ; CHECK-LABEL: name: no_pair_if_different_base_regs + ; CHECK: liveins: $x10, $x11 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $x12 = LW $x10, 0 :: (load (s32)) + ; CHECK-NEXT: $x13 = LW $x11, 4 :: (load (s32)) + ; CHECK-NEXT: PseudoRET + $x12 = LW $x10, 0 :: (load (s32)) + $x13 = LW $x11, 4 :: (load (s32)) + PseudoRET + +... +--- +name: no_pair_if_alignment_lt_4 +tracksRegLiveness: false +body: | + bb.0: + liveins: $x10 + ; CHECK-LABEL: name: no_pair_if_alignment_lt_4 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $x12 = LW $x10, 0 :: (load (s32)) + ; CHECK-NEXT: $x13 = LW $x10, 3 :: (load (s32)) + ; CHECK-NEXT: PseudoRET + $x12 = LW $x10, 0 :: (load (s32)) + $x13 = LW $x10, 3 :: (load (s32)) + PseudoRET + +... +--- +name: pair_two_sw_into_qc_swmi +tracksRegLiveness: false +body: | + bb.0: + liveins: $x10, $x12, $x13 + ; CHECK-LABEL: name: pair_two_sw_into_qc_swmi + ; CHECK: liveins: $x10, $x12, $x13 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: QC_SWMI killed $x12, $x10, 2, 0, implicit killed $x13 :: (store (s32)) + ; CHECK-NEXT: PseudoRET + SW killed $x12, $x10, 0 :: (store (s32), align 4) + SW killed $x13, $x10, 4 :: (store (s32), align 4) + PseudoRET + +... +--- +name: no_pair_if_misaligned +tracksRegLiveness: false +body: | + bb.0: + liveins: $x10 + ; CHECK-LABEL: name: no_pair_if_misaligned + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $x12 = LW $x10, 2 :: (load (s32)) + ; CHECK-NEXT: $x13 = LW $x10, 6 :: (load (s32)) + ; CHECK-NEXT: PseudoRET + $x12 = LW $x10, 2 :: (load (s32), align 4) + $x13 = LW $x10, 6 :: (load (s32), align 4) + PseudoRET + +... +--- +# FIXME: Kill flags are not propagated correctly for the base register +name: pair_at_upper_boundary_lw +tracksRegLiveness: false +body: | + bb.0: + liveins: $x10 + ; CHECK-LABEL: name: pair_at_upper_boundary_lw + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $x12 = QC_LWMI $x10, 2, 124, implicit-def $x13 :: (load (s32)) + ; CHECK-NEXT: PseudoRET + $x12 = LW $x10, 124 :: (load (s32), align 4) + $x13 = LW killed $x10, 128 :: (load (s32), align 4) + PseudoRET + +... +--- +# FIXME: Kill flags are not propagated correctly for the base register +name: pair_at_upper_boundary_sw +tracksRegLiveness: false +body: | + bb.0: + liveins: $x10, $x12, $x13 + ; CHECK-LABEL: name: pair_at_upper_boundary_sw + ; CHECK: liveins: $x10, $x12, $x13 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: QC_SWMI $x12, $x10, 2, 124, implicit $x13 :: (store (s32)) + ; CHECK-NEXT: PseudoRET + SW $x12, $x10, 124 :: (store (s32), align 4) + SW $x13, killed $x10, 128 :: (store (s32), align 4) + PseudoRET + +... +--- +name: no_pair_if_offset_out_of_range_lw +tracksRegLiveness: false +body: | + bb.0: + liveins: $x10 + ; CHECK-LABEL: name: no_pair_if_offset_out_of_range_lw + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $x12 = LW $x10, 128 :: (load (s32)) + ; CHECK-NEXT: $x13 = LW $x10, 132 :: (load (s32)) + ; CHECK-NEXT: PseudoRET + $x12 = LW $x10, 128 :: (load (s32), align 4) + $x13 = LW $x10, 132 :: (load (s32), align 4) + PseudoRET + +... +--- +name: no_pair_if_offset_out_of_range_sw +tracksRegLiveness: false +body: | + bb.0: + liveins: $x10, $x12, $x13 + ; CHECK-LABEL: name: no_pair_if_offset_out_of_range_sw + ; CHECK: liveins: $x10, $x12, $x13 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: SW $x12, $x10, 128 :: (store (s32)) + ; CHECK-NEXT: SW $x13, $x10, 132 :: (store (s32)) + ; CHECK-NEXT: PseudoRET + SW $x12, $x10, 128 :: (store (s32), align 4) + SW $x13, $x10, 132 :: (store (s32), align 4) + PseudoRET + +... +--- +name: no_pair_if_non_consecutive_regs +tracksRegLiveness: false +body: | + bb.0: + liveins: $x10 + ; CHECK-LABEL: name: no_pair_if_non_consecutive_regs + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $x11 = LW $x10, 0 :: (load (s32)) + ; CHECK-NEXT: $x13 = LW $x10, 4 :: (load (s32)) + ; CHECK-NEXT: PseudoRET + $x11 = LW $x10, 0 :: (load (s32), align 4) + $x13 = LW $x10, 4 :: (load (s32), align 4) + PseudoRET + +... +--- +name: no_pair_if_rd_is_x0 +tracksRegLiveness: false +body: | + bb.0: + liveins: $x10 + ; CHECK-LABEL: name: no_pair_if_rd_is_x0 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $x0 = LW $x10, 0 :: (load (s32)) + ; CHECK-NEXT: $x1 = LW $x10, 4 :: (load (s32)) + ; CHECK-NEXT: PseudoRET + $x0 = LW $x10, 0 :: (load (s32), align 4) + $x1 = LW $x10, 4 :: (load (s32), align 4) + PseudoRET + +... +--- +name: no_pair_if_lw_rd_equals_base +tracksRegLiveness: false +body: | + bb.0: + liveins: $x10 + ; CHECK-LABEL: name: no_pair_if_lw_rd_equals_base + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $x10 = LW $x10, 20 :: (load (s32)) + ; CHECK-NEXT: $x11 = LW $x10, 24 :: (load (s32)) + ; CHECK-NEXT: PseudoRET + $x10 = LW $x10, 20 :: (load (s32), align 4) + $x11 = LW $x10, 24 :: (load (s32), align 4) + PseudoRET + +... +--- +# FIXME: Kill flags are not propagated correctly for the base register +name: pair_if_not_adjacent +tracksRegLiveness: false +body: | + bb.0: + liveins: $x10 + ; CHECK-LABEL: name: pair_if_not_adjacent + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $x1 = QC_LWMI $x10, 2, 20, implicit-def $x2 :: (load (s32)) + ; CHECK-NEXT: $x3 = ADDI $x1, 10 + ; CHECK-NEXT: PseudoRET + $x1 = LW $x10, 20 :: (load (s32), align 4) + $x3 = ADDI $x1, 10 + $x2 = LW killed $x10, 24 :: (load (s32), align 4) + PseudoRET + +... +--- +name: pair_if_not_adjacent_use +tracksRegLiveness: false +body: | + bb.0: + liveins: $x10, $x1, $x2 + ; CHECK-LABEL: name: pair_if_not_adjacent_use + ; CHECK: liveins: $x10, $x1, $x2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $x2 = ADDI $x2, 10 + ; CHECK-NEXT: QC_SWMI $x1, $x10, 2, 20, implicit $x2 :: (store (s32)) + ; CHECK-NEXT: PseudoRET + SW $x1, $x10, 20 :: (store (s32), align 4) + $x2 = ADDI $x2, 10 + SW $x2, $x10, 24 :: (store (s32), align 4) + PseudoRET + +... +--- +name: no_pair_if_not_adjacent_use +tracksRegLiveness: false +body: | + bb.0: + liveins: $x10, $x2 + ; CHECK-LABEL: name: no_pair_if_not_adjacent_use + ; CHECK: liveins: $x10, $x2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $x1 = LW $x10, 20 :: (load (s32)) + ; CHECK-NEXT: $x1 = ADDI $x1, 10 + ; CHECK-NEXT: SW $x2, $x10, 40 :: (store (s32)) + ; CHECK-NEXT: $x2 = LW $x10, 24 :: (load (s32)) + ; CHECK-NEXT: PseudoRET + $x1 = LW $x10, 20 :: (load (s32), align 4) + $x1 = ADDI $x1, 10 + SW $x2, $x10, 40 :: (store (s32), align 4) + $x2 = LW $x10, 24 :: (load (s32), align 4) + PseudoRET + +... diff --git a/llvm/test/CodeGen/SPIRV/transcoding/OpExtInst_vector_promotion.ll b/llvm/test/CodeGen/SPIRV/transcoding/OpExtInst_vector_promotion.ll new file mode 100644 index 0000000000000..b406f8b71f7e6 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/transcoding/OpExtInst_vector_promotion.ll @@ -0,0 +1,179 @@ +; RUN: llc -O0 -mtriple=spirv32-unknown-unknown < %s | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown < %s -filetype=obj | spirv-val %} +; +; Some OpenCL builtins have mixed vector-scalar variants, but OpExtInt only supports +; versions where all the arguments have the same type. +; +; We generate code, but it is invalid. +; We should generate vector versions for these cases. + +define spir_kernel void @S_MIN() { +; CHECK-LABEL: OpFunction %{{[0-9]+}} None %{{[0-9]+}} ; -- Begin function S_MIN +; CHECK-NEXT: OpLabel +; CHECK-NEXT: %[[VEC:[0-9]+]] = OpCompositeConstruct %[[VECTYPE:[0-9]+]] %[[SCALAR:[0-9]+]] %[[SCALAR]] +; CHECK-NEXT: %{{[0-9]+}} = OpExtInst %[[VECTYPE]] %{{[0-9]+}} s_min %{{[0-9]+}} %[[VEC]] +; CHECK-NEXT: OpReturn +; CHECK-NEXT: OpFunctionEnd +; CHECK-NEXT: ; -- End function +entry: + %call = tail call spir_func <2 x i32> @_Z3minDv2_ii(<2 x i32> , i32 5) + ret void +} + +define spir_kernel void @U_MIN() { +; CHECK-LABEL: OpFunction %{{[0-9]+}} None %{{[0-9]+}} ; -- Begin function U_MIN +; CHECK-NEXT: OpLabel +; CHECK-NEXT: %[[VEC:[0-9]+]] = OpCompositeConstruct %[[VECTYPE:[0-9]+]] %[[SCALAR:[0-9]+]] %[[SCALAR]] +; CHECK-NEXT: %{{[0-9]+}} = OpExtInst %[[VECTYPE]] %{{[0-9]+}} u_min %{{[0-9]+}} %[[VEC]] +; CHECK-NEXT: OpReturn +; CHECK-NEXT: OpFunctionEnd +; CHECK-NEXT: ; -- End function +entry: + %call = tail call spir_func <2 x i32> @_Z3minDv2_jj(<2 x i32> , i32 5) + ret void +} + +define spir_kernel void @S_MAX() { +; CHECK-LABEL: OpFunction %{{[0-9]+}} None %{{[0-9]+}} ; -- Begin function S_MAX +; CHECK-NEXT: OpLabel +; CHECK-NEXT: %[[VEC:[0-9]+]] = OpCompositeConstruct %[[VECTYPE:[0-9]+]] %[[SCALAR:[0-9]+]] %[[SCALAR]] +; CHECK-NEXT: %{{[0-9]+}} = OpExtInst %[[VECTYPE]] %{{[0-9]+}} s_max %{{[0-9]+}} %[[VEC]] +; CHECK-NEXT: OpReturn +; CHECK-NEXT: OpFunctionEnd +; CHECK-NEXT: ; -- End function +entry: + %call = tail call spir_func <2 x i32> @_Z3maxDv2_ii(<2 x i32> , i32 5) + ret void +} + +define spir_kernel void @F_MIN() { +; CHECK-LABEL: OpFunction %{{[0-9]+}} None %{{[0-9]+}} ; -- Begin function F_MIN +; CHECK-NEXT: OpLabel +; CHECK-NEXT: %[[VEC:[0-9]+]] = OpCompositeConstruct %[[VECTYPE:[0-9]+]] %[[SCALAR:[0-9]+]] %[[SCALAR]] +; CHECK-NEXT: %{{[0-9]+}} = OpExtInst %[[VECTYPE]] %{{[0-9]+}} fmin %{{[0-9]+}} %[[VEC]] +; CHECK-NEXT: OpReturn +; CHECK-NEXT: OpFunctionEnd +; CHECK-NEXT: ; -- End function +entry: + %call = tail call spir_func <2 x float> @_Z3minDv2_ff(<2 x float> , float 5.0) + ret void +} + +define spir_kernel void @F_MAX() { +; CHECK-LABEL: OpFunction %{{[0-9]+}} None %{{[0-9]+}} ; -- Begin function F_MAX +; CHECK-NEXT: OpLabel +; CHECK-NEXT: %[[VEC:[0-9]+]] = OpCompositeConstruct %[[VECTYPE:[0-9]+]] %[[SCALAR:[0-9]+]] %[[SCALAR]] +; CHECK-NEXT: %{{[0-9]+}} = OpExtInst %[[VECTYPE]] %{{[0-9]+}} fmax %{{[0-9]+}} %[[VEC]] +; CHECK-NEXT: OpReturn +; CHECK-NEXT: OpFunctionEnd +; CHECK-NEXT: ; -- End function +entry: + %call = tail call spir_func <2 x float> @_Z3maxDv2_ff(<2 x float> , float 5.0) + ret void +} + +define spir_kernel void @F_FMIN() { +; CHECK-LABEL: OpFunction %{{[0-9]+}} None %{{[0-9]+}} ; -- Begin function F_FMIN +; CHECK-NEXT: OpLabel +; CHECK-NEXT: %[[VEC:[0-9]+]] = OpCompositeConstruct %[[VECTYPE:[0-9]+]] %[[SCALAR:[0-9]+]] %[[SCALAR]] +; CHECK-NEXT: %{{[0-9]+}} = OpExtInst %[[VECTYPE]] %{{[0-9]+}} fmin %{{[0-9]+}} %[[VEC]] +; CHECK-NEXT: OpReturn +; CHECK-NEXT: OpFunctionEnd +; CHECK-NEXT: ; -- End function +entry: + %call = tail call spir_func <2 x float> @_Z4fminDv2_ff(<2 x float> , float 5.0) + ret void +} + +define spir_kernel void @F_FMAX() { +; CHECK-LABEL: OpFunction %{{[0-9]+}} None %{{[0-9]+}} ; -- Begin function F_FMAX +; CHECK-NEXT: OpLabel +; CHECK-NEXT: %[[VEC:[0-9]+]] = OpCompositeConstruct %[[VECTYPE:[0-9]+]] %[[SCALAR:[0-9]+]] %[[SCALAR]] +; CHECK-NEXT: %{{[0-9]+}} = OpExtInst %[[VECTYPE]] %{{[0-9]+}} fmax %{{[0-9]+}} %[[VEC]] +; CHECK-NEXT: OpReturn +; CHECK-NEXT: OpFunctionEnd +; CHECK-NEXT: ; -- End function +entry: + %call = tail call spir_func <2 x float> @_Z4fmaxDv2_ff(<2 x float> , float 5.0) + ret void +} + +define spir_kernel void @S_CLAMP() { +; CHECK-LABEL: OpFunction %{{[0-9]+}} None %{{[0-9]+}} ; -- Begin function S_CLAMP +; CHECK-NEXT: OpLabel +; CHECK-NEXT: %[[VEC_0:[0-9]+]] = OpCompositeConstruct %[[VECTYPE:[0-9]+]] %[[SCALAR_0:[0-9]+]] %[[SCALAR_0]] +; CHECK-NEXT: %[[VEC_1:[0-9]+]] = OpCompositeConstruct %[[VECTYPE:[0-9]+]] %[[SCALAR_1:[0-9]+]] %[[SCALAR_1]] +; CHECK-NEXT: %{{[0-9]+}} = OpExtInst %[[VECTYPE]] %{{[0-9]+}} s_clamp %{{[0-9]+}} %[[VEC_0]] %[[VEC_1]] +; CHECK-NEXT: OpReturn +; CHECK-NEXT: OpFunctionEnd +; CHECK-NEXT: ; -- End function +entry: + %call = tail call spir_func <2 x i32> @_Z5clampDv2_iii(<2 x i32> , i32 5, i32 6) + ret void +} + +define spir_kernel void @F_CLAMP() { +; CHECK-LABEL: OpFunction %{{[0-9]+}} None %{{[0-9]+}} ; -- Begin function F_CLAMP +; CHECK-NEXT: OpLabel +; CHECK-NEXT: %[[VEC_0:[0-9]+]] = OpCompositeConstruct %[[VECTYPE:[0-9]+]] %[[SCALAR_0:[0-9]+]] %[[SCALAR_0]] +; CHECK-NEXT: %[[VEC_1:[0-9]+]] = OpCompositeConstruct %[[VECTYPE:[0-9]+]] %[[SCALAR_1:[0-9]+]] %[[SCALAR_1]] +; CHECK-NEXT: %{{[0-9]+}} = OpExtInst %[[VECTYPE]] %{{[0-9]+}} fclamp %{{[0-9]+}} %[[VEC_0]] %[[VEC_1]] +; CHECK-NEXT: OpReturn +; CHECK-NEXT: OpFunctionEnd +; CHECK-NEXT: ; -- End function +entry: + %call = tail call spir_func <2 x float> @_Z5clampDv2_fff(<2 x float> , float 5.0, float 6.0) + ret void +} + +define spir_kernel void @MIX() { +; CHECK-LABEL: OpFunction %{{[0-9]+}} None %{{[0-9]+}} ; -- Begin function MIX +; CHECK-NEXT: OpLabel +; CHECK-NEXT: %[[VEC:[0-9]+]] = OpCompositeConstruct %[[VECTYPE:[0-9]+]] %[[SCALAR:[0-9]+]] %[[SCALAR]] +; CHECK-NEXT: %{{[0-9]+}} = OpExtInst %[[VECTYPE]] %{{[0-9]+}} mix %{{[0-9]+}} %{{[0-9]+}} %[[VEC]] +; CHECK-NEXT: OpReturn +; CHECK-NEXT: OpFunctionEnd +; CHECK-NEXT: ; -- End function +entry: + %call = tail call spir_func <2 x float> @_Z3mixDv2_fS_f(<2 x float> , <2 x float> , float 0.5) + ret void +} + +define spir_kernel void @SMOOTHSTEP() { +; CHECK-LABEL: OpFunction %{{[0-9]+}} None %{{[0-9]+}} ; -- Begin function SMOOTHSTEP +; CHECK-NEXT: OpLabel +; CHECK-NEXT: %[[VEC_0:[0-9]+]] = OpCompositeConstruct %[[VECTYPE:[0-9]+]] %[[SCALAR_0:[0-9]+]] %[[SCALAR_0]] +; CHECK-NEXT: %[[VEC_1:[0-9]+]] = OpCompositeConstruct %[[VECTYPE:[0-9]+]] %[[SCALAR_1:[0-9]+]] %[[SCALAR_1]] +; CHECK-NEXT: %{{[0-9]+}} = OpExtInst %[[VECTYPE]] %{{[0-9]+}} smoothstep %[[VEC_0]] %[[VEC_1]] %{{[0-9]+}} +; CHECK-NEXT: OpReturn +; CHECK-NEXT: OpFunctionEnd +; CHECK-NEXT: ; -- End function +entry: + %call = tail call spir_func <2 x float> @_Z10smoothstepffDv2_f(float 1.0, float 0.5, <2 x float> ) + ret void +} + +define spir_kernel void @ill_0() { +; CHECK-LABEL: OpFunction %{{[0-9]+}} None %{{[0-9]+}} ; -- Begin function ill_0 +; CHECK-NEXT: OpLabel +; CHECK-NEXT: OpFunctionCall %{{[0-9]+}} %{{[0-9]+}} +; CHECK-NEXT: OpReturn +; CHECK-NEXT: OpFunctionEnd +; CHECK-NEXT: ; -- End function +entry: + tail call spir_func void @_Z3minv() + ret void +} + +declare spir_func <2 x i32> @_Z3minDv2_ii(<2 x i32>, i32) +declare spir_func <2 x i32> @_Z3minDv2_jj(<2 x i32>, i32) +declare spir_func <2 x i32> @_Z3maxDv2_ii(<2 x i32>, i32) +declare spir_func <2 x float> @_Z3minDv2_ff(<2 x float>, float) +declare spir_func <2 x float> @_Z3maxDv2_ff(<2 x float>, float) +declare spir_func <2 x float> @_Z4fminDv2_ff(<2 x float>, float) +declare spir_func <2 x float> @_Z4fmaxDv2_ff(<2 x float>, float) +declare spir_func <2 x i32> @_Z5clampDv2_iii(<2 x i32>, i32) +declare spir_func <2 x float> @_Z5clampDv2_fff(<2 x float>, float) +declare spir_func <2 x float> @_Z3mixDv2_fS_f(<2 x float>, <2 x float>, float) +declare spir_func <2 x float> @_Z10smoothstepffDv2_f(float, float, <2 x float>) +declare spir_func void @_Z3minv() diff --git a/llvm/test/CodeGen/SPIRV/transcoding/OpExtInst_vector_promotion_bug.ll b/llvm/test/CodeGen/SPIRV/transcoding/OpExtInst_vector_promotion_bug.ll new file mode 100644 index 0000000000000..b81f373be33c3 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/transcoding/OpExtInst_vector_promotion_bug.ll @@ -0,0 +1,21 @@ +; RUN: llc -O0 -mtriple=spirv32-unknown-unknown < %s | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown < %s -filetype=obj | not spirv-val 2>&1 | FileCheck %s --check-prefix=VALIDATOR %} +; +; _Z3miniii is not a valid OpenCL intrinsic, do not treat it like one. +; +; VALIDATOR: Invalid instruction OpExtInst starting at word {{[0-9]+}}: expected no more operands after 7 words, but stated word count is 8 + +define spir_kernel void @ill_1() { +; CHECK-LABEL: OpFunction %{{[0-9]+}} None %{{[0-9]+}} ; -- Begin function ill_1 +; CHECK-NEXT: OpLabel +; This is wrong, we should generate a regular call +; CHECK-NEXT: %{{[0-9]+}} = OpExtInst %{{[0-9]+}} %{{[0-9]+}} s_min %{{[0-9]+}} %{{[0-9]+}} %{{[0-9]+}} +; CHECK-NEXT: OpReturn +; CHECK-NEXT: OpFunctionEnd +; CHECK-NEXT: ; -- End function +entry: + tail call spir_func void @_Z3miniii(i32 1, i32 2, i32 3) + ret void +} + +declare spir_func i32 @_Z3miniii(i32, i32, i32) diff --git a/llvm/test/CodeGen/SPIRV/transcoding/OpMin.ll b/llvm/test/CodeGen/SPIRV/transcoding/OpMin.ll deleted file mode 100644 index 5cc3ea01e5191..0000000000000 --- a/llvm/test/CodeGen/SPIRV/transcoding/OpMin.ll +++ /dev/null @@ -1,16 +0,0 @@ -; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV - -; CHECK-SPIRV: %[[#SetInstID:]] = OpExtInstImport "OpenCL.std" -; CHECK-SPIRV: %[[#IntTypeID:]] = OpTypeInt 32 [[#]] -; CHECK-SPIRV: %[[#Int2TypeID:]] = OpTypeVector %[[#IntTypeID]] 2 -; CHECK-SPIRV: %[[#CompositeID:]] = OpCompositeInsert %[[#Int2TypeID]] %[[#]] %[[#]] [[#]] -; CHECK-SPIRV: %[[#ShuffleID:]] = OpVectorShuffle %[[#Int2TypeID]] %[[#CompositeID]] %[[#]] [[#]] [[#]] -; CHECK-SPIRV: %[[#]] = OpExtInst %[[#Int2TypeID]] %[[#SetInstID]] s_min %[[#]] %[[#ShuffleID]] - -define spir_kernel void @test() { -entry: - %call = tail call spir_func <2 x i32> @_Z3minDv2_ii(<2 x i32> , i32 5) #2 - ret void -} - -declare spir_func <2 x i32> @_Z3minDv2_ii(<2 x i32>, i32) diff --git a/llvm/test/CodeGen/X86/atomic-fp.ll b/llvm/test/CodeGen/X86/atomic-fp.ll index fe79dfe39f645..2dee1d12e7255 100644 --- a/llvm/test/CodeGen/X86/atomic-fp.ll +++ b/llvm/test/CodeGen/X86/atomic-fp.ll @@ -80,23 +80,17 @@ define dso_local void @fadd_64r(ptr %loc, double %val) nounwind { ; X86-NOSSE-NEXT: pushl %ebp ; X86-NOSSE-NEXT: movl %esp, %ebp ; X86-NOSSE-NEXT: andl $-8, %esp -; X86-NOSSE-NEXT: subl $32, %esp +; X86-NOSSE-NEXT: subl $24, %esp ; X86-NOSSE-NEXT: movl 8(%ebp), %eax ; X86-NOSSE-NEXT: fildll (%eax) ; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: faddl 12(%ebp) -; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NOSSE-NEXT: movl %ecx, (%esp) -; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: fildll (%esp) -; X86-NOSSE-NEXT: fistpll (%eax) +; X86-NOSSE-NEXT: fldl (%esp) +; X86-NOSSE-NEXT: faddl 12(%ebp) +; X86-NOSSE-NEXT: fstpl (%eax) ; X86-NOSSE-NEXT: movl %ebp, %esp ; X86-NOSSE-NEXT: popl %ebp ; X86-NOSSE-NEXT: retl @@ -109,16 +103,13 @@ define dso_local void @fadd_64r(ptr %loc, double %val) nounwind { ; X86-SSE1-NEXT: subl $16, %esp ; X86-SSE1-NEXT: movl 8(%ebp), %eax ; X86-SSE1-NEXT: xorps %xmm0, %xmm0 -; X86-SSE1-NEXT: xorps %xmm1, %xmm1 -; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3] -; X86-SSE1-NEXT: movss %xmm1, (%esp) -; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] -; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] +; X86-SSE1-NEXT: movss %xmm0, (%esp) +; X86-SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] +; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp) ; X86-SSE1-NEXT: fldl (%esp) ; X86-SSE1-NEXT: faddl 12(%ebp) -; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp) -; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] -; X86-SSE1-NEXT: movlps %xmm0, (%eax) +; X86-SSE1-NEXT: fstpl (%eax) ; X86-SSE1-NEXT: movl %ebp, %esp ; X86-SSE1-NEXT: popl %ebp ; X86-SSE1-NEXT: retl @@ -132,9 +123,7 @@ define dso_local void @fadd_64r(ptr %loc, double %val) nounwind { ; X86-SSE2-NEXT: movl 8(%ebp), %eax ; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; X86-SSE2-NEXT: addsd 12(%ebp), %xmm0 -; X86-SSE2-NEXT: movsd %xmm0, (%esp) -; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; X86-SSE2-NEXT: movlps %xmm0, (%eax) +; X86-SSE2-NEXT: movsd %xmm0, (%eax) ; X86-SSE2-NEXT: movl %ebp, %esp ; X86-SSE2-NEXT: popl %ebp ; X86-SSE2-NEXT: retl @@ -148,9 +137,7 @@ define dso_local void @fadd_64r(ptr %loc, double %val) nounwind { ; X86-AVX-NEXT: movl 8(%ebp), %eax ; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; X86-AVX-NEXT: vaddsd 12(%ebp), %xmm0, %xmm0 -; X86-AVX-NEXT: vmovsd %xmm0, (%esp) -; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; X86-AVX-NEXT: vmovlps %xmm0, (%eax) +; X86-AVX-NEXT: vmovsd %xmm0, (%eax) ; X86-AVX-NEXT: movl %ebp, %esp ; X86-AVX-NEXT: popl %ebp ; X86-AVX-NEXT: retl @@ -246,22 +233,16 @@ define dso_local void @fadd_64g() nounwind { ; X86-NOSSE-NEXT: pushl %ebp ; X86-NOSSE-NEXT: movl %esp, %ebp ; X86-NOSSE-NEXT: andl $-8, %esp -; X86-NOSSE-NEXT: subl $32, %esp +; X86-NOSSE-NEXT: subl $24, %esp ; X86-NOSSE-NEXT: fildll glob64 ; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: fld1 -; X86-NOSSE-NEXT: faddl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NOSSE-NEXT: movl %eax, (%esp) -; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: fildll (%esp) -; X86-NOSSE-NEXT: fistpll glob64 +; X86-NOSSE-NEXT: fld1 +; X86-NOSSE-NEXT: faddl (%esp) +; X86-NOSSE-NEXT: fstpl glob64 ; X86-NOSSE-NEXT: movl %ebp, %esp ; X86-NOSSE-NEXT: popl %ebp ; X86-NOSSE-NEXT: retl @@ -273,16 +254,13 @@ define dso_local void @fadd_64g() nounwind { ; X86-SSE1-NEXT: andl $-8, %esp ; X86-SSE1-NEXT: subl $16, %esp ; X86-SSE1-NEXT: xorps %xmm0, %xmm0 -; X86-SSE1-NEXT: xorps %xmm1, %xmm1 -; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3] -; X86-SSE1-NEXT: movss %xmm1, (%esp) -; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] -; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] +; X86-SSE1-NEXT: movss %xmm0, (%esp) +; X86-SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] +; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp) ; X86-SSE1-NEXT: fld1 ; X86-SSE1-NEXT: faddl (%esp) -; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp) -; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] -; X86-SSE1-NEXT: movlps %xmm0, glob64 +; X86-SSE1-NEXT: fstpl glob64 ; X86-SSE1-NEXT: movl %ebp, %esp ; X86-SSE1-NEXT: popl %ebp ; X86-SSE1-NEXT: retl @@ -295,9 +273,7 @@ define dso_local void @fadd_64g() nounwind { ; X86-SSE2-NEXT: subl $8, %esp ; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; X86-SSE2-NEXT: addsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 -; X86-SSE2-NEXT: movsd %xmm0, (%esp) -; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; X86-SSE2-NEXT: movlps %xmm0, glob64 +; X86-SSE2-NEXT: movsd %xmm0, glob64 ; X86-SSE2-NEXT: movl %ebp, %esp ; X86-SSE2-NEXT: popl %ebp ; X86-SSE2-NEXT: retl @@ -310,9 +286,7 @@ define dso_local void @fadd_64g() nounwind { ; X86-AVX-NEXT: subl $8, %esp ; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; X86-AVX-NEXT: vaddsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 -; X86-AVX-NEXT: vmovsd %xmm0, (%esp) -; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; X86-AVX-NEXT: vmovlps %xmm0, glob64 +; X86-AVX-NEXT: vmovsd %xmm0, glob64 ; X86-AVX-NEXT: movl %ebp, %esp ; X86-AVX-NEXT: popl %ebp ; X86-AVX-NEXT: retl @@ -409,22 +383,16 @@ define dso_local void @fadd_64imm() nounwind { ; X86-NOSSE-NEXT: pushl %ebp ; X86-NOSSE-NEXT: movl %esp, %ebp ; X86-NOSSE-NEXT: andl $-8, %esp -; X86-NOSSE-NEXT: subl $32, %esp +; X86-NOSSE-NEXT: subl $24, %esp ; X86-NOSSE-NEXT: fildll -559038737 ; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: fld1 -; X86-NOSSE-NEXT: faddl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NOSSE-NEXT: movl %eax, (%esp) -; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: fildll (%esp) -; X86-NOSSE-NEXT: fistpll -559038737 +; X86-NOSSE-NEXT: fld1 +; X86-NOSSE-NEXT: faddl (%esp) +; X86-NOSSE-NEXT: fstpl -559038737 ; X86-NOSSE-NEXT: movl %ebp, %esp ; X86-NOSSE-NEXT: popl %ebp ; X86-NOSSE-NEXT: retl @@ -436,16 +404,13 @@ define dso_local void @fadd_64imm() nounwind { ; X86-SSE1-NEXT: andl $-8, %esp ; X86-SSE1-NEXT: subl $16, %esp ; X86-SSE1-NEXT: xorps %xmm0, %xmm0 -; X86-SSE1-NEXT: xorps %xmm1, %xmm1 -; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3] -; X86-SSE1-NEXT: movss %xmm1, (%esp) -; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] -; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] +; X86-SSE1-NEXT: movss %xmm0, (%esp) +; X86-SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] +; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp) ; X86-SSE1-NEXT: fld1 ; X86-SSE1-NEXT: faddl (%esp) -; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp) -; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] -; X86-SSE1-NEXT: movlps %xmm0, -559038737 +; X86-SSE1-NEXT: fstpl -559038737 ; X86-SSE1-NEXT: movl %ebp, %esp ; X86-SSE1-NEXT: popl %ebp ; X86-SSE1-NEXT: retl @@ -458,9 +423,7 @@ define dso_local void @fadd_64imm() nounwind { ; X86-SSE2-NEXT: subl $8, %esp ; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; X86-SSE2-NEXT: addsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 -; X86-SSE2-NEXT: movsd %xmm0, (%esp) -; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; X86-SSE2-NEXT: movlps %xmm0, -559038737 +; X86-SSE2-NEXT: movsd %xmm0, -559038737 ; X86-SSE2-NEXT: movl %ebp, %esp ; X86-SSE2-NEXT: popl %ebp ; X86-SSE2-NEXT: retl @@ -473,9 +436,7 @@ define dso_local void @fadd_64imm() nounwind { ; X86-AVX-NEXT: subl $8, %esp ; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; X86-AVX-NEXT: vaddsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 -; X86-AVX-NEXT: vmovsd %xmm0, (%esp) -; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; X86-AVX-NEXT: vmovlps %xmm0, -559038737 +; X86-AVX-NEXT: vmovsd %xmm0, -559038737 ; X86-AVX-NEXT: movl %ebp, %esp ; X86-AVX-NEXT: popl %ebp ; X86-AVX-NEXT: retl @@ -577,22 +538,16 @@ define dso_local void @fadd_64stack() nounwind { ; X86-NOSSE-NEXT: pushl %ebp ; X86-NOSSE-NEXT: movl %esp, %ebp ; X86-NOSSE-NEXT: andl $-8, %esp -; X86-NOSSE-NEXT: subl $40, %esp +; X86-NOSSE-NEXT: subl $32, %esp ; X86-NOSSE-NEXT: fildll {{[0-9]+}}(%esp) ; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl %eax, (%esp) ; X86-NOSSE-NEXT: fld1 -; X86-NOSSE-NEXT: faddl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: faddl (%esp) ; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NOSSE-NEXT: movl %eax, (%esp) -; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: fildll (%esp) -; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) ; X86-NOSSE-NEXT: movl %ebp, %esp ; X86-NOSSE-NEXT: popl %ebp ; X86-NOSSE-NEXT: retl @@ -604,16 +559,13 @@ define dso_local void @fadd_64stack() nounwind { ; X86-SSE1-NEXT: andl $-8, %esp ; X86-SSE1-NEXT: subl $24, %esp ; X86-SSE1-NEXT: xorps %xmm0, %xmm0 -; X86-SSE1-NEXT: xorps %xmm1, %xmm1 -; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3] -; X86-SSE1-NEXT: movss %xmm1, (%esp) -; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] -; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] +; X86-SSE1-NEXT: movss %xmm0, (%esp) +; X86-SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] +; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp) ; X86-SSE1-NEXT: fld1 ; X86-SSE1-NEXT: faddl (%esp) ; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp) -; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] -; X86-SSE1-NEXT: movlps %xmm0, {{[0-9]+}}(%esp) ; X86-SSE1-NEXT: movl %ebp, %esp ; X86-SSE1-NEXT: popl %ebp ; X86-SSE1-NEXT: retl @@ -627,8 +579,6 @@ define dso_local void @fadd_64stack() nounwind { ; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; X86-SSE2-NEXT: addsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 ; X86-SSE2-NEXT: movsd %xmm0, (%esp) -; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; X86-SSE2-NEXT: movlps %xmm0, {{[0-9]+}}(%esp) ; X86-SSE2-NEXT: movl %ebp, %esp ; X86-SSE2-NEXT: popl %ebp ; X86-SSE2-NEXT: retl @@ -642,8 +592,6 @@ define dso_local void @fadd_64stack() nounwind { ; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; X86-AVX-NEXT: vaddsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 ; X86-AVX-NEXT: vmovsd %xmm0, (%esp) -; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; X86-AVX-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp) ; X86-AVX-NEXT: movl %ebp, %esp ; X86-AVX-NEXT: popl %ebp ; X86-AVX-NEXT: retl @@ -677,7 +625,7 @@ define dso_local void @fadd_array(ptr %arg, double %arg1, i64 %arg2) nounwind { ; X86-NOSSE-NEXT: movl %esp, %ebp ; X86-NOSSE-NEXT: pushl %esi ; X86-NOSSE-NEXT: andl $-8, %esp -; X86-NOSSE-NEXT: subl $40, %esp +; X86-NOSSE-NEXT: subl $32, %esp ; X86-NOSSE-NEXT: movl 20(%ebp), %eax ; X86-NOSSE-NEXT: movl 8(%ebp), %ecx ; X86-NOSSE-NEXT: fildll (%ecx,%eax,8) @@ -685,16 +633,10 @@ define dso_local void @fadd_array(ptr %arg, double %arg1, i64 %arg2) nounwind { ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NOSSE-NEXT: movl %esi, {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: faddl 12(%ebp) -; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NOSSE-NEXT: movl %edx, (%esp) -; X86-NOSSE-NEXT: movl %esi, {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: fildll (%esp) -; X86-NOSSE-NEXT: fistpll (%ecx,%eax,8) +; X86-NOSSE-NEXT: fldl (%esp) +; X86-NOSSE-NEXT: faddl 12(%ebp) +; X86-NOSSE-NEXT: fstpl (%ecx,%eax,8) ; X86-NOSSE-NEXT: leal -4(%ebp), %esp ; X86-NOSSE-NEXT: popl %esi ; X86-NOSSE-NEXT: popl %ebp @@ -709,16 +651,13 @@ define dso_local void @fadd_array(ptr %arg, double %arg1, i64 %arg2) nounwind { ; X86-SSE1-NEXT: movl 20(%ebp), %eax ; X86-SSE1-NEXT: movl 8(%ebp), %ecx ; X86-SSE1-NEXT: xorps %xmm0, %xmm0 -; X86-SSE1-NEXT: xorps %xmm1, %xmm1 -; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3] -; X86-SSE1-NEXT: movss %xmm1, (%esp) -; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] -; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] +; X86-SSE1-NEXT: movss %xmm0, (%esp) +; X86-SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] +; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp) ; X86-SSE1-NEXT: fldl (%esp) ; X86-SSE1-NEXT: faddl 12(%ebp) -; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp) -; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] -; X86-SSE1-NEXT: movlps %xmm0, (%ecx,%eax,8) +; X86-SSE1-NEXT: fstpl (%ecx,%eax,8) ; X86-SSE1-NEXT: movl %ebp, %esp ; X86-SSE1-NEXT: popl %ebp ; X86-SSE1-NEXT: retl @@ -733,9 +672,7 @@ define dso_local void @fadd_array(ptr %arg, double %arg1, i64 %arg2) nounwind { ; X86-SSE2-NEXT: movl 8(%ebp), %ecx ; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; X86-SSE2-NEXT: addsd 12(%ebp), %xmm0 -; X86-SSE2-NEXT: movsd %xmm0, (%esp) -; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; X86-SSE2-NEXT: movlps %xmm0, (%ecx,%eax,8) +; X86-SSE2-NEXT: movsd %xmm0, (%ecx,%eax,8) ; X86-SSE2-NEXT: movl %ebp, %esp ; X86-SSE2-NEXT: popl %ebp ; X86-SSE2-NEXT: retl @@ -750,9 +687,7 @@ define dso_local void @fadd_array(ptr %arg, double %arg1, i64 %arg2) nounwind { ; X86-AVX-NEXT: movl 8(%ebp), %ecx ; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; X86-AVX-NEXT: vaddsd 12(%ebp), %xmm0, %xmm0 -; X86-AVX-NEXT: vmovsd %xmm0, (%esp) -; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; X86-AVX-NEXT: vmovlps %xmm0, (%ecx,%eax,8) +; X86-AVX-NEXT: vmovsd %xmm0, (%ecx,%eax,8) ; X86-AVX-NEXT: movl %ebp, %esp ; X86-AVX-NEXT: popl %ebp ; X86-AVX-NEXT: retl @@ -852,23 +787,17 @@ define dso_local void @fsub_64r(ptr %loc, double %val) nounwind { ; X86-NOSSE-NEXT: pushl %ebp ; X86-NOSSE-NEXT: movl %esp, %ebp ; X86-NOSSE-NEXT: andl $-8, %esp -; X86-NOSSE-NEXT: subl $32, %esp +; X86-NOSSE-NEXT: subl $24, %esp ; X86-NOSSE-NEXT: movl 8(%ebp), %eax ; X86-NOSSE-NEXT: fildll (%eax) ; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: fsubl 12(%ebp) -; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NOSSE-NEXT: movl %ecx, (%esp) -; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: fildll (%esp) -; X86-NOSSE-NEXT: fistpll (%eax) +; X86-NOSSE-NEXT: fldl (%esp) +; X86-NOSSE-NEXT: fsubl 12(%ebp) +; X86-NOSSE-NEXT: fstpl (%eax) ; X86-NOSSE-NEXT: movl %ebp, %esp ; X86-NOSSE-NEXT: popl %ebp ; X86-NOSSE-NEXT: retl @@ -881,16 +810,13 @@ define dso_local void @fsub_64r(ptr %loc, double %val) nounwind { ; X86-SSE1-NEXT: subl $16, %esp ; X86-SSE1-NEXT: movl 8(%ebp), %eax ; X86-SSE1-NEXT: xorps %xmm0, %xmm0 -; X86-SSE1-NEXT: xorps %xmm1, %xmm1 -; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3] -; X86-SSE1-NEXT: movss %xmm1, (%esp) -; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] -; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] +; X86-SSE1-NEXT: movss %xmm0, (%esp) +; X86-SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] +; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp) ; X86-SSE1-NEXT: fldl (%esp) ; X86-SSE1-NEXT: fsubl 12(%ebp) -; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp) -; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] -; X86-SSE1-NEXT: movlps %xmm0, (%eax) +; X86-SSE1-NEXT: fstpl (%eax) ; X86-SSE1-NEXT: movl %ebp, %esp ; X86-SSE1-NEXT: popl %ebp ; X86-SSE1-NEXT: retl @@ -904,9 +830,7 @@ define dso_local void @fsub_64r(ptr %loc, double %val) nounwind { ; X86-SSE2-NEXT: movl 8(%ebp), %eax ; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; X86-SSE2-NEXT: subsd 12(%ebp), %xmm0 -; X86-SSE2-NEXT: movsd %xmm0, (%esp) -; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; X86-SSE2-NEXT: movlps %xmm0, (%eax) +; X86-SSE2-NEXT: movsd %xmm0, (%eax) ; X86-SSE2-NEXT: movl %ebp, %esp ; X86-SSE2-NEXT: popl %ebp ; X86-SSE2-NEXT: retl @@ -920,9 +844,7 @@ define dso_local void @fsub_64r(ptr %loc, double %val) nounwind { ; X86-AVX-NEXT: movl 8(%ebp), %eax ; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; X86-AVX-NEXT: vsubsd 12(%ebp), %xmm0, %xmm0 -; X86-AVX-NEXT: vmovsd %xmm0, (%esp) -; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; X86-AVX-NEXT: vmovlps %xmm0, (%eax) +; X86-AVX-NEXT: vmovsd %xmm0, (%eax) ; X86-AVX-NEXT: movl %ebp, %esp ; X86-AVX-NEXT: popl %ebp ; X86-AVX-NEXT: retl @@ -1018,23 +940,17 @@ define dso_local void @fsub_64g() nounwind { ; X86-NOSSE-NEXT: pushl %ebp ; X86-NOSSE-NEXT: movl %esp, %ebp ; X86-NOSSE-NEXT: andl $-8, %esp -; X86-NOSSE-NEXT: subl $32, %esp +; X86-NOSSE-NEXT: subl $24, %esp ; X86-NOSSE-NEXT: fildll glob64 ; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl %eax, (%esp) ; X86-NOSSE-NEXT: fld1 ; X86-NOSSE-NEXT: fchs -; X86-NOSSE-NEXT: faddl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NOSSE-NEXT: movl %eax, (%esp) -; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: fildll (%esp) -; X86-NOSSE-NEXT: fistpll glob64 +; X86-NOSSE-NEXT: faddl (%esp) +; X86-NOSSE-NEXT: fstpl glob64 ; X86-NOSSE-NEXT: movl %ebp, %esp ; X86-NOSSE-NEXT: popl %ebp ; X86-NOSSE-NEXT: retl @@ -1046,17 +962,14 @@ define dso_local void @fsub_64g() nounwind { ; X86-SSE1-NEXT: andl $-8, %esp ; X86-SSE1-NEXT: subl $16, %esp ; X86-SSE1-NEXT: xorps %xmm0, %xmm0 -; X86-SSE1-NEXT: xorps %xmm1, %xmm1 -; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3] -; X86-SSE1-NEXT: movss %xmm1, (%esp) -; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] -; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] +; X86-SSE1-NEXT: movss %xmm0, (%esp) +; X86-SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] +; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp) ; X86-SSE1-NEXT: fld1 ; X86-SSE1-NEXT: fchs ; X86-SSE1-NEXT: faddl (%esp) -; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp) -; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] -; X86-SSE1-NEXT: movlps %xmm0, glob64 +; X86-SSE1-NEXT: fstpl glob64 ; X86-SSE1-NEXT: movl %ebp, %esp ; X86-SSE1-NEXT: popl %ebp ; X86-SSE1-NEXT: retl @@ -1069,9 +982,7 @@ define dso_local void @fsub_64g() nounwind { ; X86-SSE2-NEXT: subl $8, %esp ; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; X86-SSE2-NEXT: addsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 -; X86-SSE2-NEXT: movsd %xmm0, (%esp) -; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; X86-SSE2-NEXT: movlps %xmm0, glob64 +; X86-SSE2-NEXT: movsd %xmm0, glob64 ; X86-SSE2-NEXT: movl %ebp, %esp ; X86-SSE2-NEXT: popl %ebp ; X86-SSE2-NEXT: retl @@ -1084,9 +995,7 @@ define dso_local void @fsub_64g() nounwind { ; X86-AVX-NEXT: subl $8, %esp ; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; X86-AVX-NEXT: vaddsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 -; X86-AVX-NEXT: vmovsd %xmm0, (%esp) -; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; X86-AVX-NEXT: vmovlps %xmm0, glob64 +; X86-AVX-NEXT: vmovsd %xmm0, glob64 ; X86-AVX-NEXT: movl %ebp, %esp ; X86-AVX-NEXT: popl %ebp ; X86-AVX-NEXT: retl @@ -1184,23 +1093,17 @@ define dso_local void @fsub_64imm() nounwind { ; X86-NOSSE-NEXT: pushl %ebp ; X86-NOSSE-NEXT: movl %esp, %ebp ; X86-NOSSE-NEXT: andl $-8, %esp -; X86-NOSSE-NEXT: subl $32, %esp +; X86-NOSSE-NEXT: subl $24, %esp ; X86-NOSSE-NEXT: fildll -559038737 ; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl %eax, (%esp) ; X86-NOSSE-NEXT: fld1 ; X86-NOSSE-NEXT: fchs -; X86-NOSSE-NEXT: faddl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NOSSE-NEXT: movl %eax, (%esp) -; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: fildll (%esp) -; X86-NOSSE-NEXT: fistpll -559038737 +; X86-NOSSE-NEXT: faddl (%esp) +; X86-NOSSE-NEXT: fstpl -559038737 ; X86-NOSSE-NEXT: movl %ebp, %esp ; X86-NOSSE-NEXT: popl %ebp ; X86-NOSSE-NEXT: retl @@ -1212,17 +1115,14 @@ define dso_local void @fsub_64imm() nounwind { ; X86-SSE1-NEXT: andl $-8, %esp ; X86-SSE1-NEXT: subl $16, %esp ; X86-SSE1-NEXT: xorps %xmm0, %xmm0 -; X86-SSE1-NEXT: xorps %xmm1, %xmm1 -; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3] -; X86-SSE1-NEXT: movss %xmm1, (%esp) -; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] -; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] +; X86-SSE1-NEXT: movss %xmm0, (%esp) +; X86-SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] +; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp) ; X86-SSE1-NEXT: fld1 ; X86-SSE1-NEXT: fchs ; X86-SSE1-NEXT: faddl (%esp) -; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp) -; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] -; X86-SSE1-NEXT: movlps %xmm0, -559038737 +; X86-SSE1-NEXT: fstpl -559038737 ; X86-SSE1-NEXT: movl %ebp, %esp ; X86-SSE1-NEXT: popl %ebp ; X86-SSE1-NEXT: retl @@ -1235,9 +1135,7 @@ define dso_local void @fsub_64imm() nounwind { ; X86-SSE2-NEXT: subl $8, %esp ; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; X86-SSE2-NEXT: addsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 -; X86-SSE2-NEXT: movsd %xmm0, (%esp) -; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; X86-SSE2-NEXT: movlps %xmm0, -559038737 +; X86-SSE2-NEXT: movsd %xmm0, -559038737 ; X86-SSE2-NEXT: movl %ebp, %esp ; X86-SSE2-NEXT: popl %ebp ; X86-SSE2-NEXT: retl @@ -1250,9 +1148,7 @@ define dso_local void @fsub_64imm() nounwind { ; X86-AVX-NEXT: subl $8, %esp ; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; X86-AVX-NEXT: vaddsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 -; X86-AVX-NEXT: vmovsd %xmm0, (%esp) -; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; X86-AVX-NEXT: vmovlps %xmm0, -559038737 +; X86-AVX-NEXT: vmovsd %xmm0, -559038737 ; X86-AVX-NEXT: movl %ebp, %esp ; X86-AVX-NEXT: popl %ebp ; X86-AVX-NEXT: retl @@ -1354,22 +1250,16 @@ define dso_local void @fsub_64stack() nounwind { ; X86-NOSSE-NEXT: pushl %ebp ; X86-NOSSE-NEXT: movl %esp, %ebp ; X86-NOSSE-NEXT: andl $-8, %esp -; X86-NOSSE-NEXT: subl $40, %esp +; X86-NOSSE-NEXT: subl $32, %esp ; X86-NOSSE-NEXT: fildll {{[0-9]+}}(%esp) ; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl %eax, (%esp) ; X86-NOSSE-NEXT: fld1 -; X86-NOSSE-NEXT: fsubl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: fsubl (%esp) ; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NOSSE-NEXT: movl %eax, (%esp) -; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: fildll (%esp) -; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) ; X86-NOSSE-NEXT: movl %ebp, %esp ; X86-NOSSE-NEXT: popl %ebp ; X86-NOSSE-NEXT: retl @@ -1381,16 +1271,13 @@ define dso_local void @fsub_64stack() nounwind { ; X86-SSE1-NEXT: andl $-8, %esp ; X86-SSE1-NEXT: subl $24, %esp ; X86-SSE1-NEXT: xorps %xmm0, %xmm0 -; X86-SSE1-NEXT: xorps %xmm1, %xmm1 -; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3] -; X86-SSE1-NEXT: movss %xmm1, (%esp) -; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] -; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] +; X86-SSE1-NEXT: movss %xmm0, (%esp) +; X86-SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] +; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp) ; X86-SSE1-NEXT: fld1 ; X86-SSE1-NEXT: fsubl (%esp) ; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp) -; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] -; X86-SSE1-NEXT: movlps %xmm0, {{[0-9]+}}(%esp) ; X86-SSE1-NEXT: movl %ebp, %esp ; X86-SSE1-NEXT: popl %ebp ; X86-SSE1-NEXT: retl @@ -1405,8 +1292,6 @@ define dso_local void @fsub_64stack() nounwind { ; X86-SSE2-NEXT: movsd {{.*#+}} xmm1 = [1.0E+0,0.0E+0] ; X86-SSE2-NEXT: subsd %xmm0, %xmm1 ; X86-SSE2-NEXT: movsd %xmm1, (%esp) -; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; X86-SSE2-NEXT: movlps %xmm0, {{[0-9]+}}(%esp) ; X86-SSE2-NEXT: movl %ebp, %esp ; X86-SSE2-NEXT: popl %ebp ; X86-SSE2-NEXT: retl @@ -1421,8 +1306,6 @@ define dso_local void @fsub_64stack() nounwind { ; X86-AVX-NEXT: vmovsd {{.*#+}} xmm1 = [1.0E+0,0.0E+0] ; X86-AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm0 ; X86-AVX-NEXT: vmovsd %xmm0, (%esp) -; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; X86-AVX-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp) ; X86-AVX-NEXT: movl %ebp, %esp ; X86-AVX-NEXT: popl %ebp ; X86-AVX-NEXT: retl @@ -1456,7 +1339,7 @@ define dso_local void @fsub_array(ptr %arg, double %arg1, i64 %arg2) nounwind { ; X86-NOSSE-NEXT: movl %esp, %ebp ; X86-NOSSE-NEXT: pushl %esi ; X86-NOSSE-NEXT: andl $-8, %esp -; X86-NOSSE-NEXT: subl $40, %esp +; X86-NOSSE-NEXT: subl $32, %esp ; X86-NOSSE-NEXT: movl 20(%ebp), %eax ; X86-NOSSE-NEXT: movl 8(%ebp), %ecx ; X86-NOSSE-NEXT: fildll (%ecx,%eax,8) @@ -1464,16 +1347,10 @@ define dso_local void @fsub_array(ptr %arg, double %arg1, i64 %arg2) nounwind { ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NOSSE-NEXT: movl %esi, {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: fsubl 12(%ebp) -; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NOSSE-NEXT: movl %edx, (%esp) -; X86-NOSSE-NEXT: movl %esi, {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: fildll (%esp) -; X86-NOSSE-NEXT: fistpll (%ecx,%eax,8) +; X86-NOSSE-NEXT: fldl (%esp) +; X86-NOSSE-NEXT: fsubl 12(%ebp) +; X86-NOSSE-NEXT: fstpl (%ecx,%eax,8) ; X86-NOSSE-NEXT: leal -4(%ebp), %esp ; X86-NOSSE-NEXT: popl %esi ; X86-NOSSE-NEXT: popl %ebp @@ -1488,16 +1365,13 @@ define dso_local void @fsub_array(ptr %arg, double %arg1, i64 %arg2) nounwind { ; X86-SSE1-NEXT: movl 20(%ebp), %eax ; X86-SSE1-NEXT: movl 8(%ebp), %ecx ; X86-SSE1-NEXT: xorps %xmm0, %xmm0 -; X86-SSE1-NEXT: xorps %xmm1, %xmm1 -; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3] -; X86-SSE1-NEXT: movss %xmm1, (%esp) -; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] -; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] +; X86-SSE1-NEXT: movss %xmm0, (%esp) +; X86-SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] +; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp) ; X86-SSE1-NEXT: fldl (%esp) ; X86-SSE1-NEXT: fsubl 12(%ebp) -; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp) -; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] -; X86-SSE1-NEXT: movlps %xmm0, (%ecx,%eax,8) +; X86-SSE1-NEXT: fstpl (%ecx,%eax,8) ; X86-SSE1-NEXT: movl %ebp, %esp ; X86-SSE1-NEXT: popl %ebp ; X86-SSE1-NEXT: retl @@ -1512,9 +1386,7 @@ define dso_local void @fsub_array(ptr %arg, double %arg1, i64 %arg2) nounwind { ; X86-SSE2-NEXT: movl 8(%ebp), %ecx ; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; X86-SSE2-NEXT: subsd 12(%ebp), %xmm0 -; X86-SSE2-NEXT: movsd %xmm0, (%esp) -; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; X86-SSE2-NEXT: movlps %xmm0, (%ecx,%eax,8) +; X86-SSE2-NEXT: movsd %xmm0, (%ecx,%eax,8) ; X86-SSE2-NEXT: movl %ebp, %esp ; X86-SSE2-NEXT: popl %ebp ; X86-SSE2-NEXT: retl @@ -1529,9 +1401,7 @@ define dso_local void @fsub_array(ptr %arg, double %arg1, i64 %arg2) nounwind { ; X86-AVX-NEXT: movl 8(%ebp), %ecx ; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; X86-AVX-NEXT: vsubsd 12(%ebp), %xmm0, %xmm0 -; X86-AVX-NEXT: vmovsd %xmm0, (%esp) -; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; X86-AVX-NEXT: vmovlps %xmm0, (%ecx,%eax,8) +; X86-AVX-NEXT: vmovsd %xmm0, (%ecx,%eax,8) ; X86-AVX-NEXT: movl %ebp, %esp ; X86-AVX-NEXT: popl %ebp ; X86-AVX-NEXT: retl @@ -1631,23 +1501,17 @@ define dso_local void @fmul_64r(ptr %loc, double %val) nounwind { ; X86-NOSSE-NEXT: pushl %ebp ; X86-NOSSE-NEXT: movl %esp, %ebp ; X86-NOSSE-NEXT: andl $-8, %esp -; X86-NOSSE-NEXT: subl $32, %esp +; X86-NOSSE-NEXT: subl $24, %esp ; X86-NOSSE-NEXT: movl 8(%ebp), %eax ; X86-NOSSE-NEXT: fildll (%eax) ; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: fmull 12(%ebp) -; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NOSSE-NEXT: movl %ecx, (%esp) -; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: fildll (%esp) -; X86-NOSSE-NEXT: fistpll (%eax) +; X86-NOSSE-NEXT: fldl (%esp) +; X86-NOSSE-NEXT: fmull 12(%ebp) +; X86-NOSSE-NEXT: fstpl (%eax) ; X86-NOSSE-NEXT: movl %ebp, %esp ; X86-NOSSE-NEXT: popl %ebp ; X86-NOSSE-NEXT: retl @@ -1660,16 +1524,13 @@ define dso_local void @fmul_64r(ptr %loc, double %val) nounwind { ; X86-SSE1-NEXT: subl $16, %esp ; X86-SSE1-NEXT: movl 8(%ebp), %eax ; X86-SSE1-NEXT: xorps %xmm0, %xmm0 -; X86-SSE1-NEXT: xorps %xmm1, %xmm1 -; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3] -; X86-SSE1-NEXT: movss %xmm1, (%esp) -; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] -; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] +; X86-SSE1-NEXT: movss %xmm0, (%esp) +; X86-SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] +; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp) ; X86-SSE1-NEXT: fldl (%esp) ; X86-SSE1-NEXT: fmull 12(%ebp) -; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp) -; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] -; X86-SSE1-NEXT: movlps %xmm0, (%eax) +; X86-SSE1-NEXT: fstpl (%eax) ; X86-SSE1-NEXT: movl %ebp, %esp ; X86-SSE1-NEXT: popl %ebp ; X86-SSE1-NEXT: retl @@ -1683,9 +1544,7 @@ define dso_local void @fmul_64r(ptr %loc, double %val) nounwind { ; X86-SSE2-NEXT: movl 8(%ebp), %eax ; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; X86-SSE2-NEXT: mulsd 12(%ebp), %xmm0 -; X86-SSE2-NEXT: movsd %xmm0, (%esp) -; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; X86-SSE2-NEXT: movlps %xmm0, (%eax) +; X86-SSE2-NEXT: movsd %xmm0, (%eax) ; X86-SSE2-NEXT: movl %ebp, %esp ; X86-SSE2-NEXT: popl %ebp ; X86-SSE2-NEXT: retl @@ -1699,9 +1558,7 @@ define dso_local void @fmul_64r(ptr %loc, double %val) nounwind { ; X86-AVX-NEXT: movl 8(%ebp), %eax ; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; X86-AVX-NEXT: vmulsd 12(%ebp), %xmm0, %xmm0 -; X86-AVX-NEXT: vmovsd %xmm0, (%esp) -; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; X86-AVX-NEXT: vmovlps %xmm0, (%eax) +; X86-AVX-NEXT: vmovsd %xmm0, (%eax) ; X86-AVX-NEXT: movl %ebp, %esp ; X86-AVX-NEXT: popl %ebp ; X86-AVX-NEXT: retl @@ -1794,22 +1651,16 @@ define dso_local void @fmul_64g() nounwind { ; X86-NOSSE-NEXT: pushl %ebp ; X86-NOSSE-NEXT: movl %esp, %ebp ; X86-NOSSE-NEXT: andl $-8, %esp -; X86-NOSSE-NEXT: subl $32, %esp +; X86-NOSSE-NEXT: subl $24, %esp ; X86-NOSSE-NEXT: fildll glob64 ; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: fmuls {{\.?LCPI[0-9]+_[0-9]+}} -; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NOSSE-NEXT: movl %eax, (%esp) -; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: fildll (%esp) -; X86-NOSSE-NEXT: fistpll glob64 +; X86-NOSSE-NEXT: fldl (%esp) +; X86-NOSSE-NEXT: fmuls {{\.?LCPI[0-9]+_[0-9]+}} +; X86-NOSSE-NEXT: fstpl glob64 ; X86-NOSSE-NEXT: movl %ebp, %esp ; X86-NOSSE-NEXT: popl %ebp ; X86-NOSSE-NEXT: retl @@ -1821,16 +1672,13 @@ define dso_local void @fmul_64g() nounwind { ; X86-SSE1-NEXT: andl $-8, %esp ; X86-SSE1-NEXT: subl $16, %esp ; X86-SSE1-NEXT: xorps %xmm0, %xmm0 -; X86-SSE1-NEXT: xorps %xmm1, %xmm1 -; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3] -; X86-SSE1-NEXT: movss %xmm1, (%esp) -; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] -; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] +; X86-SSE1-NEXT: movss %xmm0, (%esp) +; X86-SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] +; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp) ; X86-SSE1-NEXT: fldl (%esp) ; X86-SSE1-NEXT: fmuls {{\.?LCPI[0-9]+_[0-9]+}} -; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp) -; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] -; X86-SSE1-NEXT: movlps %xmm0, glob64 +; X86-SSE1-NEXT: fstpl glob64 ; X86-SSE1-NEXT: movl %ebp, %esp ; X86-SSE1-NEXT: popl %ebp ; X86-SSE1-NEXT: retl @@ -1843,9 +1691,7 @@ define dso_local void @fmul_64g() nounwind { ; X86-SSE2-NEXT: subl $8, %esp ; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; X86-SSE2-NEXT: mulsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 -; X86-SSE2-NEXT: movsd %xmm0, (%esp) -; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; X86-SSE2-NEXT: movlps %xmm0, glob64 +; X86-SSE2-NEXT: movsd %xmm0, glob64 ; X86-SSE2-NEXT: movl %ebp, %esp ; X86-SSE2-NEXT: popl %ebp ; X86-SSE2-NEXT: retl @@ -1858,9 +1704,7 @@ define dso_local void @fmul_64g() nounwind { ; X86-AVX-NEXT: subl $8, %esp ; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; X86-AVX-NEXT: vmulsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 -; X86-AVX-NEXT: vmovsd %xmm0, (%esp) -; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; X86-AVX-NEXT: vmovlps %xmm0, glob64 +; X86-AVX-NEXT: vmovsd %xmm0, glob64 ; X86-AVX-NEXT: movl %ebp, %esp ; X86-AVX-NEXT: popl %ebp ; X86-AVX-NEXT: retl @@ -1957,22 +1801,16 @@ define dso_local void @fmul_64imm() nounwind { ; X86-NOSSE-NEXT: pushl %ebp ; X86-NOSSE-NEXT: movl %esp, %ebp ; X86-NOSSE-NEXT: andl $-8, %esp -; X86-NOSSE-NEXT: subl $32, %esp +; X86-NOSSE-NEXT: subl $24, %esp ; X86-NOSSE-NEXT: fildll -559038737 ; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: fmuls {{\.?LCPI[0-9]+_[0-9]+}} -; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NOSSE-NEXT: movl %eax, (%esp) -; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: fildll (%esp) -; X86-NOSSE-NEXT: fistpll -559038737 +; X86-NOSSE-NEXT: fldl (%esp) +; X86-NOSSE-NEXT: fmuls {{\.?LCPI[0-9]+_[0-9]+}} +; X86-NOSSE-NEXT: fstpl -559038737 ; X86-NOSSE-NEXT: movl %ebp, %esp ; X86-NOSSE-NEXT: popl %ebp ; X86-NOSSE-NEXT: retl @@ -1984,16 +1822,13 @@ define dso_local void @fmul_64imm() nounwind { ; X86-SSE1-NEXT: andl $-8, %esp ; X86-SSE1-NEXT: subl $16, %esp ; X86-SSE1-NEXT: xorps %xmm0, %xmm0 -; X86-SSE1-NEXT: xorps %xmm1, %xmm1 -; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3] -; X86-SSE1-NEXT: movss %xmm1, (%esp) -; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] -; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] +; X86-SSE1-NEXT: movss %xmm0, (%esp) +; X86-SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] +; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp) ; X86-SSE1-NEXT: fldl (%esp) ; X86-SSE1-NEXT: fmuls {{\.?LCPI[0-9]+_[0-9]+}} -; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp) -; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] -; X86-SSE1-NEXT: movlps %xmm0, -559038737 +; X86-SSE1-NEXT: fstpl -559038737 ; X86-SSE1-NEXT: movl %ebp, %esp ; X86-SSE1-NEXT: popl %ebp ; X86-SSE1-NEXT: retl @@ -2006,9 +1841,7 @@ define dso_local void @fmul_64imm() nounwind { ; X86-SSE2-NEXT: subl $8, %esp ; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; X86-SSE2-NEXT: mulsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 -; X86-SSE2-NEXT: movsd %xmm0, (%esp) -; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; X86-SSE2-NEXT: movlps %xmm0, -559038737 +; X86-SSE2-NEXT: movsd %xmm0, -559038737 ; X86-SSE2-NEXT: movl %ebp, %esp ; X86-SSE2-NEXT: popl %ebp ; X86-SSE2-NEXT: retl @@ -2021,9 +1854,7 @@ define dso_local void @fmul_64imm() nounwind { ; X86-AVX-NEXT: subl $8, %esp ; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; X86-AVX-NEXT: vmulsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 -; X86-AVX-NEXT: vmovsd %xmm0, (%esp) -; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; X86-AVX-NEXT: vmovlps %xmm0, -559038737 +; X86-AVX-NEXT: vmovsd %xmm0, -559038737 ; X86-AVX-NEXT: movl %ebp, %esp ; X86-AVX-NEXT: popl %ebp ; X86-AVX-NEXT: retl @@ -2125,22 +1956,16 @@ define dso_local void @fmul_64stack() nounwind { ; X86-NOSSE-NEXT: pushl %ebp ; X86-NOSSE-NEXT: movl %esp, %ebp ; X86-NOSSE-NEXT: andl $-8, %esp -; X86-NOSSE-NEXT: subl $40, %esp +; X86-NOSSE-NEXT: subl $32, %esp ; X86-NOSSE-NEXT: fildll {{[0-9]+}}(%esp) ; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl %eax, (%esp) +; X86-NOSSE-NEXT: fldl (%esp) ; X86-NOSSE-NEXT: fmuls {{\.?LCPI[0-9]+_[0-9]+}} ; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NOSSE-NEXT: movl %eax, (%esp) -; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: fildll (%esp) -; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) ; X86-NOSSE-NEXT: movl %ebp, %esp ; X86-NOSSE-NEXT: popl %ebp ; X86-NOSSE-NEXT: retl @@ -2152,16 +1977,13 @@ define dso_local void @fmul_64stack() nounwind { ; X86-SSE1-NEXT: andl $-8, %esp ; X86-SSE1-NEXT: subl $24, %esp ; X86-SSE1-NEXT: xorps %xmm0, %xmm0 -; X86-SSE1-NEXT: xorps %xmm1, %xmm1 -; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3] -; X86-SSE1-NEXT: movss %xmm1, (%esp) -; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] -; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] +; X86-SSE1-NEXT: movss %xmm0, (%esp) +; X86-SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] +; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp) ; X86-SSE1-NEXT: fldl (%esp) ; X86-SSE1-NEXT: fmuls {{\.?LCPI[0-9]+_[0-9]+}} ; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp) -; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] -; X86-SSE1-NEXT: movlps %xmm0, {{[0-9]+}}(%esp) ; X86-SSE1-NEXT: movl %ebp, %esp ; X86-SSE1-NEXT: popl %ebp ; X86-SSE1-NEXT: retl @@ -2175,8 +1997,6 @@ define dso_local void @fmul_64stack() nounwind { ; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; X86-SSE2-NEXT: mulsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 ; X86-SSE2-NEXT: movsd %xmm0, (%esp) -; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; X86-SSE2-NEXT: movlps %xmm0, {{[0-9]+}}(%esp) ; X86-SSE2-NEXT: movl %ebp, %esp ; X86-SSE2-NEXT: popl %ebp ; X86-SSE2-NEXT: retl @@ -2190,8 +2010,6 @@ define dso_local void @fmul_64stack() nounwind { ; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; X86-AVX-NEXT: vmulsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 ; X86-AVX-NEXT: vmovsd %xmm0, (%esp) -; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; X86-AVX-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp) ; X86-AVX-NEXT: movl %ebp, %esp ; X86-AVX-NEXT: popl %ebp ; X86-AVX-NEXT: retl @@ -2225,7 +2043,7 @@ define dso_local void @fmul_array(ptr %arg, double %arg1, i64 %arg2) nounwind { ; X86-NOSSE-NEXT: movl %esp, %ebp ; X86-NOSSE-NEXT: pushl %esi ; X86-NOSSE-NEXT: andl $-8, %esp -; X86-NOSSE-NEXT: subl $40, %esp +; X86-NOSSE-NEXT: subl $32, %esp ; X86-NOSSE-NEXT: movl 20(%ebp), %eax ; X86-NOSSE-NEXT: movl 8(%ebp), %ecx ; X86-NOSSE-NEXT: fildll (%ecx,%eax,8) @@ -2233,16 +2051,10 @@ define dso_local void @fmul_array(ptr %arg, double %arg1, i64 %arg2) nounwind { ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NOSSE-NEXT: movl %esi, {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: fmull 12(%ebp) -; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NOSSE-NEXT: movl %edx, (%esp) -; X86-NOSSE-NEXT: movl %esi, {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: fildll (%esp) -; X86-NOSSE-NEXT: fistpll (%ecx,%eax,8) +; X86-NOSSE-NEXT: fldl (%esp) +; X86-NOSSE-NEXT: fmull 12(%ebp) +; X86-NOSSE-NEXT: fstpl (%ecx,%eax,8) ; X86-NOSSE-NEXT: leal -4(%ebp), %esp ; X86-NOSSE-NEXT: popl %esi ; X86-NOSSE-NEXT: popl %ebp @@ -2257,16 +2069,13 @@ define dso_local void @fmul_array(ptr %arg, double %arg1, i64 %arg2) nounwind { ; X86-SSE1-NEXT: movl 20(%ebp), %eax ; X86-SSE1-NEXT: movl 8(%ebp), %ecx ; X86-SSE1-NEXT: xorps %xmm0, %xmm0 -; X86-SSE1-NEXT: xorps %xmm1, %xmm1 -; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3] -; X86-SSE1-NEXT: movss %xmm1, (%esp) -; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] -; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] +; X86-SSE1-NEXT: movss %xmm0, (%esp) +; X86-SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] +; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp) ; X86-SSE1-NEXT: fldl (%esp) ; X86-SSE1-NEXT: fmull 12(%ebp) -; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp) -; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] -; X86-SSE1-NEXT: movlps %xmm0, (%ecx,%eax,8) +; X86-SSE1-NEXT: fstpl (%ecx,%eax,8) ; X86-SSE1-NEXT: movl %ebp, %esp ; X86-SSE1-NEXT: popl %ebp ; X86-SSE1-NEXT: retl @@ -2281,9 +2090,7 @@ define dso_local void @fmul_array(ptr %arg, double %arg1, i64 %arg2) nounwind { ; X86-SSE2-NEXT: movl 8(%ebp), %ecx ; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; X86-SSE2-NEXT: mulsd 12(%ebp), %xmm0 -; X86-SSE2-NEXT: movsd %xmm0, (%esp) -; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; X86-SSE2-NEXT: movlps %xmm0, (%ecx,%eax,8) +; X86-SSE2-NEXT: movsd %xmm0, (%ecx,%eax,8) ; X86-SSE2-NEXT: movl %ebp, %esp ; X86-SSE2-NEXT: popl %ebp ; X86-SSE2-NEXT: retl @@ -2298,9 +2105,7 @@ define dso_local void @fmul_array(ptr %arg, double %arg1, i64 %arg2) nounwind { ; X86-AVX-NEXT: movl 8(%ebp), %ecx ; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; X86-AVX-NEXT: vmulsd 12(%ebp), %xmm0, %xmm0 -; X86-AVX-NEXT: vmovsd %xmm0, (%esp) -; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; X86-AVX-NEXT: vmovlps %xmm0, (%ecx,%eax,8) +; X86-AVX-NEXT: vmovsd %xmm0, (%ecx,%eax,8) ; X86-AVX-NEXT: movl %ebp, %esp ; X86-AVX-NEXT: popl %ebp ; X86-AVX-NEXT: retl @@ -2400,23 +2205,17 @@ define dso_local void @fdiv_64r(ptr %loc, double %val) nounwind { ; X86-NOSSE-NEXT: pushl %ebp ; X86-NOSSE-NEXT: movl %esp, %ebp ; X86-NOSSE-NEXT: andl $-8, %esp -; X86-NOSSE-NEXT: subl $32, %esp +; X86-NOSSE-NEXT: subl $24, %esp ; X86-NOSSE-NEXT: movl 8(%ebp), %eax ; X86-NOSSE-NEXT: fildll (%eax) ; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: fdivl 12(%ebp) -; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NOSSE-NEXT: movl %ecx, (%esp) -; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: fildll (%esp) -; X86-NOSSE-NEXT: fistpll (%eax) +; X86-NOSSE-NEXT: fldl (%esp) +; X86-NOSSE-NEXT: fdivl 12(%ebp) +; X86-NOSSE-NEXT: fstpl (%eax) ; X86-NOSSE-NEXT: movl %ebp, %esp ; X86-NOSSE-NEXT: popl %ebp ; X86-NOSSE-NEXT: retl @@ -2429,16 +2228,13 @@ define dso_local void @fdiv_64r(ptr %loc, double %val) nounwind { ; X86-SSE1-NEXT: subl $16, %esp ; X86-SSE1-NEXT: movl 8(%ebp), %eax ; X86-SSE1-NEXT: xorps %xmm0, %xmm0 -; X86-SSE1-NEXT: xorps %xmm1, %xmm1 -; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3] -; X86-SSE1-NEXT: movss %xmm1, (%esp) -; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] -; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] +; X86-SSE1-NEXT: movss %xmm0, (%esp) +; X86-SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] +; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp) ; X86-SSE1-NEXT: fldl (%esp) ; X86-SSE1-NEXT: fdivl 12(%ebp) -; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp) -; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] -; X86-SSE1-NEXT: movlps %xmm0, (%eax) +; X86-SSE1-NEXT: fstpl (%eax) ; X86-SSE1-NEXT: movl %ebp, %esp ; X86-SSE1-NEXT: popl %ebp ; X86-SSE1-NEXT: retl @@ -2452,9 +2248,7 @@ define dso_local void @fdiv_64r(ptr %loc, double %val) nounwind { ; X86-SSE2-NEXT: movl 8(%ebp), %eax ; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; X86-SSE2-NEXT: divsd 12(%ebp), %xmm0 -; X86-SSE2-NEXT: movsd %xmm0, (%esp) -; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; X86-SSE2-NEXT: movlps %xmm0, (%eax) +; X86-SSE2-NEXT: movsd %xmm0, (%eax) ; X86-SSE2-NEXT: movl %ebp, %esp ; X86-SSE2-NEXT: popl %ebp ; X86-SSE2-NEXT: retl @@ -2468,9 +2262,7 @@ define dso_local void @fdiv_64r(ptr %loc, double %val) nounwind { ; X86-AVX-NEXT: movl 8(%ebp), %eax ; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; X86-AVX-NEXT: vdivsd 12(%ebp), %xmm0, %xmm0 -; X86-AVX-NEXT: vmovsd %xmm0, (%esp) -; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; X86-AVX-NEXT: vmovlps %xmm0, (%eax) +; X86-AVX-NEXT: vmovsd %xmm0, (%eax) ; X86-AVX-NEXT: movl %ebp, %esp ; X86-AVX-NEXT: popl %ebp ; X86-AVX-NEXT: retl @@ -2565,22 +2357,16 @@ define dso_local void @fdiv_64g() nounwind { ; X86-NOSSE-NEXT: pushl %ebp ; X86-NOSSE-NEXT: movl %esp, %ebp ; X86-NOSSE-NEXT: andl $-8, %esp -; X86-NOSSE-NEXT: subl $32, %esp +; X86-NOSSE-NEXT: subl $24, %esp ; X86-NOSSE-NEXT: fildll glob64 ; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: fdivs {{\.?LCPI[0-9]+_[0-9]+}} -; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NOSSE-NEXT: movl %eax, (%esp) -; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: fildll (%esp) -; X86-NOSSE-NEXT: fistpll glob64 +; X86-NOSSE-NEXT: fldl (%esp) +; X86-NOSSE-NEXT: fdivs {{\.?LCPI[0-9]+_[0-9]+}} +; X86-NOSSE-NEXT: fstpl glob64 ; X86-NOSSE-NEXT: movl %ebp, %esp ; X86-NOSSE-NEXT: popl %ebp ; X86-NOSSE-NEXT: retl @@ -2592,16 +2378,13 @@ define dso_local void @fdiv_64g() nounwind { ; X86-SSE1-NEXT: andl $-8, %esp ; X86-SSE1-NEXT: subl $16, %esp ; X86-SSE1-NEXT: xorps %xmm0, %xmm0 -; X86-SSE1-NEXT: xorps %xmm1, %xmm1 -; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3] -; X86-SSE1-NEXT: movss %xmm1, (%esp) -; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] -; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] +; X86-SSE1-NEXT: movss %xmm0, (%esp) +; X86-SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] +; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp) ; X86-SSE1-NEXT: fldl (%esp) ; X86-SSE1-NEXT: fdivs {{\.?LCPI[0-9]+_[0-9]+}} -; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp) -; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] -; X86-SSE1-NEXT: movlps %xmm0, glob64 +; X86-SSE1-NEXT: fstpl glob64 ; X86-SSE1-NEXT: movl %ebp, %esp ; X86-SSE1-NEXT: popl %ebp ; X86-SSE1-NEXT: retl @@ -2614,9 +2397,7 @@ define dso_local void @fdiv_64g() nounwind { ; X86-SSE2-NEXT: subl $8, %esp ; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; X86-SSE2-NEXT: divsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 -; X86-SSE2-NEXT: movsd %xmm0, (%esp) -; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; X86-SSE2-NEXT: movlps %xmm0, glob64 +; X86-SSE2-NEXT: movsd %xmm0, glob64 ; X86-SSE2-NEXT: movl %ebp, %esp ; X86-SSE2-NEXT: popl %ebp ; X86-SSE2-NEXT: retl @@ -2629,9 +2410,7 @@ define dso_local void @fdiv_64g() nounwind { ; X86-AVX-NEXT: subl $8, %esp ; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; X86-AVX-NEXT: vdivsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 -; X86-AVX-NEXT: vmovsd %xmm0, (%esp) -; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; X86-AVX-NEXT: vmovlps %xmm0, glob64 +; X86-AVX-NEXT: vmovsd %xmm0, glob64 ; X86-AVX-NEXT: movl %ebp, %esp ; X86-AVX-NEXT: popl %ebp ; X86-AVX-NEXT: retl @@ -2728,22 +2507,16 @@ define dso_local void @fdiv_64imm() nounwind { ; X86-NOSSE-NEXT: pushl %ebp ; X86-NOSSE-NEXT: movl %esp, %ebp ; X86-NOSSE-NEXT: andl $-8, %esp -; X86-NOSSE-NEXT: subl $32, %esp +; X86-NOSSE-NEXT: subl $24, %esp ; X86-NOSSE-NEXT: fildll -559038737 ; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: fdivs {{\.?LCPI[0-9]+_[0-9]+}} -; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NOSSE-NEXT: movl %eax, (%esp) -; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: fildll (%esp) -; X86-NOSSE-NEXT: fistpll -559038737 +; X86-NOSSE-NEXT: fldl (%esp) +; X86-NOSSE-NEXT: fdivs {{\.?LCPI[0-9]+_[0-9]+}} +; X86-NOSSE-NEXT: fstpl -559038737 ; X86-NOSSE-NEXT: movl %ebp, %esp ; X86-NOSSE-NEXT: popl %ebp ; X86-NOSSE-NEXT: retl @@ -2755,16 +2528,13 @@ define dso_local void @fdiv_64imm() nounwind { ; X86-SSE1-NEXT: andl $-8, %esp ; X86-SSE1-NEXT: subl $16, %esp ; X86-SSE1-NEXT: xorps %xmm0, %xmm0 -; X86-SSE1-NEXT: xorps %xmm1, %xmm1 -; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3] -; X86-SSE1-NEXT: movss %xmm1, (%esp) -; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] -; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] +; X86-SSE1-NEXT: movss %xmm0, (%esp) +; X86-SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] +; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp) ; X86-SSE1-NEXT: fldl (%esp) ; X86-SSE1-NEXT: fdivs {{\.?LCPI[0-9]+_[0-9]+}} -; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp) -; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] -; X86-SSE1-NEXT: movlps %xmm0, -559038737 +; X86-SSE1-NEXT: fstpl -559038737 ; X86-SSE1-NEXT: movl %ebp, %esp ; X86-SSE1-NEXT: popl %ebp ; X86-SSE1-NEXT: retl @@ -2777,9 +2547,7 @@ define dso_local void @fdiv_64imm() nounwind { ; X86-SSE2-NEXT: subl $8, %esp ; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; X86-SSE2-NEXT: divsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 -; X86-SSE2-NEXT: movsd %xmm0, (%esp) -; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; X86-SSE2-NEXT: movlps %xmm0, -559038737 +; X86-SSE2-NEXT: movsd %xmm0, -559038737 ; X86-SSE2-NEXT: movl %ebp, %esp ; X86-SSE2-NEXT: popl %ebp ; X86-SSE2-NEXT: retl @@ -2792,9 +2560,7 @@ define dso_local void @fdiv_64imm() nounwind { ; X86-AVX-NEXT: subl $8, %esp ; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; X86-AVX-NEXT: vdivsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 -; X86-AVX-NEXT: vmovsd %xmm0, (%esp) -; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; X86-AVX-NEXT: vmovlps %xmm0, -559038737 +; X86-AVX-NEXT: vmovsd %xmm0, -559038737 ; X86-AVX-NEXT: movl %ebp, %esp ; X86-AVX-NEXT: popl %ebp ; X86-AVX-NEXT: retl @@ -2896,22 +2662,16 @@ define dso_local void @fdiv_64stack() nounwind { ; X86-NOSSE-NEXT: pushl %ebp ; X86-NOSSE-NEXT: movl %esp, %ebp ; X86-NOSSE-NEXT: andl $-8, %esp -; X86-NOSSE-NEXT: subl $40, %esp +; X86-NOSSE-NEXT: subl $32, %esp ; X86-NOSSE-NEXT: fildll {{[0-9]+}}(%esp) ; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl %eax, (%esp) ; X86-NOSSE-NEXT: fld1 -; X86-NOSSE-NEXT: fdivl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: fdivl (%esp) ; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NOSSE-NEXT: movl %eax, (%esp) -; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: fildll (%esp) -; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) ; X86-NOSSE-NEXT: movl %ebp, %esp ; X86-NOSSE-NEXT: popl %ebp ; X86-NOSSE-NEXT: retl @@ -2923,16 +2683,13 @@ define dso_local void @fdiv_64stack() nounwind { ; X86-SSE1-NEXT: andl $-8, %esp ; X86-SSE1-NEXT: subl $24, %esp ; X86-SSE1-NEXT: xorps %xmm0, %xmm0 -; X86-SSE1-NEXT: xorps %xmm1, %xmm1 -; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3] -; X86-SSE1-NEXT: movss %xmm1, (%esp) -; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] -; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] +; X86-SSE1-NEXT: movss %xmm0, (%esp) +; X86-SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] +; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp) ; X86-SSE1-NEXT: fld1 ; X86-SSE1-NEXT: fdivl (%esp) ; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp) -; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] -; X86-SSE1-NEXT: movlps %xmm0, {{[0-9]+}}(%esp) ; X86-SSE1-NEXT: movl %ebp, %esp ; X86-SSE1-NEXT: popl %ebp ; X86-SSE1-NEXT: retl @@ -2947,8 +2704,6 @@ define dso_local void @fdiv_64stack() nounwind { ; X86-SSE2-NEXT: movsd {{.*#+}} xmm1 = [1.0E+0,0.0E+0] ; X86-SSE2-NEXT: divsd %xmm0, %xmm1 ; X86-SSE2-NEXT: movsd %xmm1, (%esp) -; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; X86-SSE2-NEXT: movlps %xmm0, {{[0-9]+}}(%esp) ; X86-SSE2-NEXT: movl %ebp, %esp ; X86-SSE2-NEXT: popl %ebp ; X86-SSE2-NEXT: retl @@ -2963,8 +2718,6 @@ define dso_local void @fdiv_64stack() nounwind { ; X86-AVX-NEXT: vmovsd {{.*#+}} xmm1 = [1.0E+0,0.0E+0] ; X86-AVX-NEXT: vdivsd %xmm0, %xmm1, %xmm0 ; X86-AVX-NEXT: vmovsd %xmm0, (%esp) -; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; X86-AVX-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp) ; X86-AVX-NEXT: movl %ebp, %esp ; X86-AVX-NEXT: popl %ebp ; X86-AVX-NEXT: retl @@ -2998,7 +2751,7 @@ define dso_local void @fdiv_array(ptr %arg, double %arg1, i64 %arg2) nounwind { ; X86-NOSSE-NEXT: movl %esp, %ebp ; X86-NOSSE-NEXT: pushl %esi ; X86-NOSSE-NEXT: andl $-8, %esp -; X86-NOSSE-NEXT: subl $40, %esp +; X86-NOSSE-NEXT: subl $32, %esp ; X86-NOSSE-NEXT: movl 20(%ebp), %eax ; X86-NOSSE-NEXT: movl 8(%ebp), %ecx ; X86-NOSSE-NEXT: fildll (%ecx,%eax,8) @@ -3006,16 +2759,10 @@ define dso_local void @fdiv_array(ptr %arg, double %arg1, i64 %arg2) nounwind { ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NOSSE-NEXT: movl %esi, {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: fdivl 12(%ebp) -; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NOSSE-NEXT: movl %edx, (%esp) -; X86-NOSSE-NEXT: movl %esi, {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: fildll (%esp) -; X86-NOSSE-NEXT: fistpll (%ecx,%eax,8) +; X86-NOSSE-NEXT: fldl (%esp) +; X86-NOSSE-NEXT: fdivl 12(%ebp) +; X86-NOSSE-NEXT: fstpl (%ecx,%eax,8) ; X86-NOSSE-NEXT: leal -4(%ebp), %esp ; X86-NOSSE-NEXT: popl %esi ; X86-NOSSE-NEXT: popl %ebp @@ -3030,16 +2777,13 @@ define dso_local void @fdiv_array(ptr %arg, double %arg1, i64 %arg2) nounwind { ; X86-SSE1-NEXT: movl 20(%ebp), %eax ; X86-SSE1-NEXT: movl 8(%ebp), %ecx ; X86-SSE1-NEXT: xorps %xmm0, %xmm0 -; X86-SSE1-NEXT: xorps %xmm1, %xmm1 -; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3] -; X86-SSE1-NEXT: movss %xmm1, (%esp) -; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] -; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] +; X86-SSE1-NEXT: movss %xmm0, (%esp) +; X86-SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] +; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp) ; X86-SSE1-NEXT: fldl (%esp) ; X86-SSE1-NEXT: fdivl 12(%ebp) -; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp) -; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] -; X86-SSE1-NEXT: movlps %xmm0, (%ecx,%eax,8) +; X86-SSE1-NEXT: fstpl (%ecx,%eax,8) ; X86-SSE1-NEXT: movl %ebp, %esp ; X86-SSE1-NEXT: popl %ebp ; X86-SSE1-NEXT: retl @@ -3054,9 +2798,7 @@ define dso_local void @fdiv_array(ptr %arg, double %arg1, i64 %arg2) nounwind { ; X86-SSE2-NEXT: movl 8(%ebp), %ecx ; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; X86-SSE2-NEXT: divsd 12(%ebp), %xmm0 -; X86-SSE2-NEXT: movsd %xmm0, (%esp) -; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; X86-SSE2-NEXT: movlps %xmm0, (%ecx,%eax,8) +; X86-SSE2-NEXT: movsd %xmm0, (%ecx,%eax,8) ; X86-SSE2-NEXT: movl %ebp, %esp ; X86-SSE2-NEXT: popl %ebp ; X86-SSE2-NEXT: retl @@ -3071,9 +2813,7 @@ define dso_local void @fdiv_array(ptr %arg, double %arg1, i64 %arg2) nounwind { ; X86-AVX-NEXT: movl 8(%ebp), %ecx ; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; X86-AVX-NEXT: vdivsd 12(%ebp), %xmm0, %xmm0 -; X86-AVX-NEXT: vmovsd %xmm0, (%esp) -; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; X86-AVX-NEXT: vmovlps %xmm0, (%ecx,%eax,8) +; X86-AVX-NEXT: vmovsd %xmm0, (%ecx,%eax,8) ; X86-AVX-NEXT: movl %ebp, %esp ; X86-AVX-NEXT: popl %ebp ; X86-AVX-NEXT: retl diff --git a/llvm/test/MC/LoongArch/Basic/Integer/misc.s b/llvm/test/MC/LoongArch/Basic/Integer/misc.s index 182d1da9b237e..26a9205d8e17d 100644 --- a/llvm/test/MC/LoongArch/Basic/Integer/misc.s +++ b/llvm/test/MC/LoongArch/Basic/Integer/misc.s @@ -7,7 +7,7 @@ # RUN: llvm-mc %s --triple=loongarch32 --filetype=obj | llvm-objdump -d - \ # RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s # RUN: llvm-mc %s --triple=loongarch64 --filetype=obj --defsym=LA64=1 | llvm-objdump -d - \ -# RUN: | FileCheck --check-prefixes=CHECK-ASM-AND-OBJ,CHECK64-ASM-AND-OBJ %s +# RUN: | FileCheck --check-prefixes=CHECK-ASM-AND-OBJ,CHECK64-OBJ,CHECK64-ASM-AND-OBJ %s ############################################################# ## Instructions for both loongarch32 and loongarch64 @@ -33,6 +33,13 @@ rdtimeh.w $a7, $a1 # CHECK-ASM: encoding: [0x03,0x6d,0x00,0x00] cpucfg $sp, $a4 +# CHECK-ASM-AND-OBJ: ud 0 +# CHECK-ASM: encoding: [0x00,0x04,0x60,0x38] +ud 0 + +# CHECK-ASM-AND-OBJ: ud 31 +# CHECK-ASM: encoding: [0xff,0x07,0x60,0x38] +ud 31 ############################################################# ## Instructions only for loongarch64 @@ -40,6 +47,11 @@ cpucfg $sp, $a4 .ifdef LA64 +# CHECK64-OBJ: ud 0 +# CHECK64-ASM: amswap.w $zero, $ra, $zero +# CHECK64-ASM: encoding: [0x00,0x04,0x60,0x38] +amswap.w $zero, $ra, $zero + # CHECK64-ASM-AND-OBJ: asrtle.d $t0, $t5 # CHECK64-ASM: encoding: [0x80,0x45,0x01,0x00] asrtle.d $t0, $t5 diff --git a/llvm/test/TableGen/RegClassByHwModeErrors.td b/llvm/test/TableGen/RegClassByHwModeErrors.td index 0ee6370ccd0ce..c7731312e28a6 100644 --- a/llvm/test/TableGen/RegClassByHwModeErrors.td +++ b/llvm/test/TableGen/RegClassByHwModeErrors.td @@ -9,6 +9,8 @@ // RUN: %t/compress-regclass-by-hwmode-2.td -o /dev/null 2>&1 | FileCheck %t/compress-regclass-by-hwmode-2.td --implicit-check-not="error:" // RUN: not llvm-tblgen --gen-dag-isel -I %p/../../include -I %t -I %S \ // RUN: %t/vt-by-hwmode-missing.td -o /dev/null 2>&1 | FileCheck %t/vt-by-hwmode-missing.td --implicit-check-not="error:" +// RUN: not llvm-tblgen --gen-dag-isel -I %p/../../include -I %t -I %S \ +// RUN: %t/multiple-entries-for-same-mode.td -o /dev/null 2>&1 | FileCheck %t/multiple-entries-for-same-mode.td --implicit-check-not="error:" //--- Common.td include "Common/RegClassByHwModeCommon.td" @@ -119,3 +121,22 @@ def TEST : TestInstruction { def MyTargetISA : InstrInfo; def MyTarget : Target { let InstructionSet = MyTargetISA; } + + +//--- multiple-entries-for-same-mode.td +include "Common.td" +/// We should get an error if the same mode is listed more than once +defvar Ptr64Alias = Ptr64; +def BadRegClass : RegClassByHwMode<[Ptr32, Ptr64, Ptr64Alias], [XRegs, YRegs, YRegs]>; +// CHECK: [[#@LINE-1]]:5: error: duplicate RegisterClass entry for HwMode Ptr64: YRegs +// Need at least one CompressPat use of the bad reg class to trigger the error: +def USE_BAD_REG_CLASS : TestInstruction { + let OutOperandList = (outs BadRegClass:$dst); + let InOperandList = (ins BadRegClass:$src1, BadRegClass:$src2); + let AsmString = "bad $dst"; + let Pattern = [(set BadRegClass:$dst, (add BadRegClass:$src1, BadRegClass:$src2))]; +} +def MyTargetISA : InstrInfo; +def MyTarget : Target { + let InstructionSet = MyTargetISA; +} diff --git a/llvm/test/Transforms/InstCombine/2006-10-26-VectorReassoc.ll b/llvm/test/Transforms/InstCombine/2006-10-26-VectorReassoc.ll index fb860a5e7bdf3..6509797e0d3dc 100644 --- a/llvm/test/Transforms/InstCombine/2006-10-26-VectorReassoc.ll +++ b/llvm/test/Transforms/InstCombine/2006-10-26-VectorReassoc.ll @@ -35,12 +35,10 @@ define <4 x float> @test_fmul_reassoc_nsz(<4 x float> %V) { } ; (V * C1) * C2 => V * (C1 * C2) -; TODO: This doesn't require 'nsz'. It should fold to V * { 1.0, 4.0e+05, -9.0, 16.0 } define <4 x float> @test_fmul_reassoc(<4 x float> %V) { ; CHECK-LABEL: @test_fmul_reassoc( -; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc <4 x float> [[V:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = fmul reassoc <4 x float> [[TMP1]], -; CHECK-NEXT: ret <4 x float> [[TMP2]] +; CHECK: [[TMP1:%.*]] = fmul reassoc <4 x float> %V, +; CHECK-NEXT: ret <4 x float> [[TMP1]] %Y = fmul reassoc <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 > %Z = fmul reassoc <4 x float> %Y, < float 1.000000e+00, float 2.000000e+05, float -3.000000e+00, float 4.000000e+00 > ret <4 x float> %Z diff --git a/llvm/test/Transforms/InstCombine/fdiv.ll b/llvm/test/Transforms/InstCombine/fdiv.ll index 54b0bf8c50ac7..3465781e3af9d 100644 --- a/llvm/test/Transforms/InstCombine/fdiv.ll +++ b/llvm/test/Transforms/InstCombine/fdiv.ll @@ -525,8 +525,7 @@ define <2 x float> @div_constant_dividend2_reassoc_only(<2 x float> %x) { define <2 x float> @div_constant_dividend3(<2 x float> %x) { ; CHECK-LABEL: @div_constant_dividend3( -; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc arcp <2 x float> [[X:%.*]], -; CHECK-NEXT: [[T2:%.*]] = fmul reassoc arcp <2 x float> [[TMP1]], +; CHECK-NEXT: [[T2:%.*]] = fmul reassoc arcp <2 x float> [[X:%.*]], ; CHECK-NEXT: ret <2 x float> [[T2]] ; %t1 = fdiv <2 x float> , %x diff --git a/llvm/test/Transforms/InstCombine/issue64967-reassoc-fmul.ll b/llvm/test/Transforms/InstCombine/issue64967-reassoc-fmul.ll new file mode 100644 index 0000000000000..5d064234bf609 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/issue64967-reassoc-fmul.ll @@ -0,0 +1,111 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt -S -passes=instcombine < %s | FileCheck %s + +; Show that unlike fadd, fmul does not require nsz to be reassociated. + +; Can't reassociate anyway +define float @fmul(float %x) { +; CHECK-LABEL: define float @fmul( +; CHECK-SAME: float [[X:%.*]]) { +; CHECK-NEXT: [[FMUL0:%.*]] = fmul float [[X]], 2.000000e+00 +; CHECK-NEXT: [[FMUL1:%.*]] = fmul float [[FMUL0]], 4.000000e+00 +; CHECK-NEXT: ret float [[FMUL1]] +; + %fmul0 = fmul float %x, 2.0 + %fmul1 = fmul float %fmul0, 4.0 + ret float %fmul1 +} + +; Should be able to reassociate without nsz +; (+0 * 2) * 4 = +0 +; (-0 * 2) * 4 = -0 + +; (+0 * 8) = +0 +; (-0 * 8) = -0 +define float @fmul_reassoc(float %x) { +; CHECK-LABEL: define float @fmul_reassoc( +; CHECK-SAME: float [[X:%.*]]) { +; CHECK-NEXT: [[FMUL1:%.*]] = fmul reassoc float [[X]], 8.000000e+00 +; CHECK-NEXT: ret float [[FMUL1]] +; + %fmul0 = fmul reassoc float %x, 2.0 + %fmul1 = fmul reassoc float %fmul0, 4.0 + ret float %fmul1 +} + +define <2 x float> @fmul_reassoc_v2(<2 x float> %x) { +; CHECK-LABEL: define <2 x float> @fmul_reassoc_v2( +; CHECK-SAME: <2 x float> [[X:%.*]]) { +; CHECK-NEXT: [[FMUL1:%.*]] = fmul reassoc <2 x float> [[X]], splat (float 8.000000e+00) +; CHECK-NEXT: ret <2 x float> [[FMUL1]] +; + %fmul0 = fmul reassoc <2 x float> %x, splat (float 2.0) + %fmul1 = fmul reassoc <2 x float> %fmul0, splat (float 4.0) + ret <2 x float> %fmul1 +} + +; (+0 * 2) * -4 = -0 +; (-0 * 2) * -4 = +0 + +; (+0 * -8) = -0 +; (-0 * -8) = +0 +define float @fmul_reassoc_negative_0(float %x) { +; CHECK-LABEL: define float @fmul_reassoc_negative_0( +; CHECK-SAME: float [[X:%.*]]) { +; CHECK-NEXT: [[FMUL1:%.*]] = fmul reassoc float [[X]], -8.000000e+00 +; CHECK-NEXT: ret float [[FMUL1]] +; + %fmul0 = fmul reassoc float %x, 2.0 + %fmul1 = fmul reassoc float %fmul0, -4.0 + ret float %fmul1 +} + +; (+0 * -2) * 4 = -0 +; (-0 * -2) * 4 = +0 + +; (+0 * -8) = -0 +; (-0 * -8) = +0 +define float @fmul_reassoc_negative_1(float %x) { +; CHECK-LABEL: define float @fmul_reassoc_negative_1( +; CHECK-SAME: float [[X:%.*]]) { +; CHECK-NEXT: [[FMUL1:%.*]] = fmul reassoc float [[X]], -8.000000e+00 +; CHECK-NEXT: ret float [[FMUL1]] +; + %fmul0 = fmul reassoc float %x, -2.0 + %fmul1 = fmul reassoc float %fmul0, 4.0 + ret float %fmul1 +} + +; Does reassociate already, unnecessarily requires nsz on both multiplies. +define float @fmul_reassoc_nsz(float %x) { +; CHECK-LABEL: define float @fmul_reassoc_nsz( +; CHECK-SAME: float [[X:%.*]]) { +; CHECK-NEXT: [[FMUL1:%.*]] = fmul reassoc nsz float [[X]], 8.000000e+00 +; CHECK-NEXT: ret float [[FMUL1]] +; + %fmul0 = fmul nsz reassoc float %x, 2.0 + %fmul1 = fmul nsz reassoc float %fmul0, 4.0 + ret float %fmul1 +} + +define float @fmul_reassoc_posk_neg0(float %x) { +; CHECK-LABEL: define float @fmul_reassoc_posk_neg0( +; CHECK-SAME: float [[X:%.*]]) { +; CHECK-NEXT: [[FMUL1:%.*]] = fmul reassoc float [[X]], -0.000000e+00 +; CHECK-NEXT: ret float [[FMUL1]] +; + %fmul0 = fmul reassoc float %x, 4.0 + %fmul1 = fmul reassoc float %fmul0, -0.0 + ret float %fmul1 +} + +define float @fmul_reassoc_neg0_posk(float %x) { +; CHECK-LABEL: define float @fmul_reassoc_neg0_posk( +; CHECK-SAME: float [[X:%.*]]) { +; CHECK-NEXT: [[FMUL0:%.*]] = fmul reassoc float [[X]], -0.000000e+00 +; CHECK-NEXT: ret float [[FMUL0]] +; + %fmul0 = fmul reassoc float %x, -0.0 + %fmul1 = fmul reassoc float %fmul0, 4.0 + ret float %fmul1 +} diff --git a/llvm/test/Transforms/InstCombine/mul.ll b/llvm/test/Transforms/InstCombine/mul.ll index 0f3137cdd0be3..615f905b7e58a 100644 --- a/llvm/test/Transforms/InstCombine/mul.ll +++ b/llvm/test/Transforms/InstCombine/mul.ll @@ -2202,3 +2202,31 @@ define i8 @mul_not_nsw_nonneg(i8 %x, i8 %y) { %mul = mul i8 %x, %y ret i8 %mul } + +define i16 @mul_udiv_zext(i8 %x) { +; CHECK-LABEL: @mul_udiv_zext( +; CHECK-NEXT: [[X_FR:%.*]] = freeze i8 [[X:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = urem i8 [[X_FR]], 15 +; CHECK-NEXT: [[NARROW:%.*]] = sub nuw i8 [[X_FR]], [[TMP1]] +; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[NARROW]] to i16 +; CHECK-NEXT: ret i16 [[ZEXT]] +; + %div = udiv i8 %x, 15 + %zext = zext i8 %div to i16 + %mul = mul i16 %zext, 15 + ret i16 %mul +} + +define i16 @mul_udiv_zext_uneq(i8 %x) { +; CHECK-LABEL: @mul_udiv_zext_uneq( +; CHECK-NEXT: [[DIV:%.*]] = udiv i8 [[X:%.*]], 20 +; CHECK-NEXT: [[NARROW:%.*]] = mul nuw i8 [[DIV]], 15 +; CHECK-NEXT: [[MUL:%.*]] = zext i8 [[NARROW]] to i16 +; CHECK-NEXT: ret i16 [[MUL]] +; + %div = udiv i8 %x, 20 + %zext = zext i8 %div to i16 + %mul = mul i16 %zext, 15 + ret i16 %mul +} + diff --git a/llvm/test/Transforms/InstSimplify/ptrtoaddr.ll b/llvm/test/Transforms/InstSimplify/ptrtoaddr.ll index d06b520931b92..eaccf15cd80f6 100644 --- a/llvm/test/Transforms/InstSimplify/ptrtoaddr.ll +++ b/llvm/test/Transforms/InstSimplify/ptrtoaddr.ll @@ -316,3 +316,85 @@ define ptr @gep_gep_inv_ptrtoaddr(ptr %p) { %gep2 = getelementptr i8, ptr %gep1, i64 %p.addr.inv ret ptr %gep2 } + +define i1 @icmp_ptrtoaddr_0() { +; CHECK-LABEL: define i1 @icmp_ptrtoaddr_0() { +; CHECK-NEXT: ret i1 true +; + %cmp = icmp ne i64 ptrtoaddr (ptr @g to i64), 0 + ret i1 %cmp +} + +; This fails to fold because we currently don't assume that globals are located +; at a non-null address for non-default address spaces. +define i1 @icmp_ptrtoaddr_0_addrsize() { +; CHECK-LABEL: define i1 @icmp_ptrtoaddr_0_addrsize() { +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 ptrtoaddr (ptr addrspace(1) @g.as1 to i32), 0 +; CHECK-NEXT: ret i1 [[CMP]] +; + %cmp = icmp ne i32 ptrtoaddr (ptr addrspace(1) @g.as1 to i32), 0 + ret i1 %cmp +} + +define i1 @icmp_ptrtoint_0_addrsize() { +; CHECK-LABEL: define i1 @icmp_ptrtoint_0_addrsize() { +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i64 ptrtoint (ptr addrspace(1) @g.as1 to i64), 0 +; CHECK-NEXT: ret i1 [[CMP]] +; + %cmp = icmp ne i64 ptrtoint (ptr addrspace(1) @g.as1 to i64), 0 + ret i1 %cmp +} + +define i1 @icmp_ptrtoaddr_ptrtoaddr() { +; CHECK-LABEL: define i1 @icmp_ptrtoaddr_ptrtoaddr() { +; CHECK-NEXT: ret i1 true +; + %cmp = icmp ne i64 ptrtoaddr (ptr @g to i64), ptrtoaddr (ptr @g2 to i64) + ret i1 %cmp +} + +define i1 @icmp_ptrtoaddr_ptrtoaddr_addrsize() { +; CHECK-LABEL: define i1 @icmp_ptrtoaddr_ptrtoaddr_addrsize() { +; CHECK-NEXT: ret i1 true +; + %cmp = icmp ne i32 ptrtoaddr (ptr addrspace(1) @g.as1 to i32), ptrtoaddr (ptr addrspace(1) @g2.as1 to i32) + ret i1 %cmp +} + +; This could still be folded because the address being non-equal also implies +; that all pointer bits together are non-equal. +define i1 @icmp_ptrtoint_ptrtoint_addrsize() { +; CHECK-LABEL: define i1 @icmp_ptrtoint_ptrtoint_addrsize() { +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i64 ptrtoint (ptr addrspace(1) @g.as1 to i64), ptrtoint (ptr addrspace(1) @g2.as1 to i64) +; CHECK-NEXT: ret i1 [[CMP]] +; + %cmp = icmp ne i64 ptrtoint (ptr addrspace(1) @g.as1 to i64), ptrtoint (ptr addrspace(1) @g2.as1 to i64) + ret i1 %cmp +} + +define i1 @icmp_relational_ptrtoaddr_ptrtoaddr() { +; CHECK-LABEL: define i1 @icmp_relational_ptrtoaddr_ptrtoaddr() { +; CHECK-NEXT: ret i1 true +; + %cmp = icmp ult i64 ptrtoaddr (ptr @g to i64), ptrtoaddr (ptr getelementptr inbounds (i8, ptr @g, i64 1) to i64) + ret i1 %cmp +} + +define i1 @icmp_relational_ptrtoaddr_ptrtoaddr_addrsize() { +; CHECK-LABEL: define i1 @icmp_relational_ptrtoaddr_ptrtoaddr_addrsize() { +; CHECK-NEXT: ret i1 true +; + %cmp = icmp ult i32 ptrtoaddr (ptr addrspace(1) @g.as1 to i32), ptrtoaddr (ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) @g.as1, i32 1) to i32) + ret i1 %cmp +} + +; This could still be folded because we know that the non-address bits must be +; the same, as GEP does not modify them. +define i1 @icmp_relational_ptrtoint_ptrtoint_addrsize() { +; CHECK-LABEL: define i1 @icmp_relational_ptrtoint_ptrtoint_addrsize() { +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 ptrtoint (ptr addrspace(1) @g.as1 to i64), ptrtoint (ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) @g.as1, i64 1) to i64) +; CHECK-NEXT: ret i1 [[CMP]] +; + %cmp = icmp ult i64 ptrtoint (ptr addrspace(1) @g.as1 to i64), ptrtoint (ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) @g.as1, i64 1) to i64) + ret i1 %cmp +} diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-rcpc-immo-instructions.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-rcpc-immo-instructions.s index cd3d7e0bf1b57..d9943f342b827 100644 --- a/llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-rcpc-immo-instructions.s +++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-rcpc-immo-instructions.s @@ -10,15 +10,15 @@ # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 2 1 0.50 * ldapur w7, [x24] -# CHECK-NEXT: 2 1 0.50 * ldapur x20, [x13] -# CHECK-NEXT: 2 1 0.50 * ldapurb w13, [x17] -# CHECK-NEXT: 2 1 0.50 * ldapurh w3, [x22] -# CHECK-NEXT: 2 1 0.50 U ldapursb w7, [x8] -# CHECK-NEXT: 2 1 0.50 U ldapursb x29, [x7] -# CHECK-NEXT: 2 1 0.50 U ldapursh w17, [x19] -# CHECK-NEXT: 2 1 0.50 U ldapursh x3, [x3] -# CHECK-NEXT: 2 1 0.50 U ldapursw x3, [x18] +# CHECK-NEXT: 1 4 0.33 * ldapur w7, [x24] +# CHECK-NEXT: 1 4 0.33 * ldapur x20, [x13] +# CHECK-NEXT: 1 4 0.33 * ldapurb w13, [x17] +# CHECK-NEXT: 1 4 0.33 * ldapurh w3, [x22] +# CHECK-NEXT: 1 4 0.33 U ldapursb w7, [x8] +# CHECK-NEXT: 1 4 0.33 U ldapursb x29, [x7] +# CHECK-NEXT: 1 4 0.33 U ldapursh w17, [x19] +# CHECK-NEXT: 1 4 0.33 U ldapursh x3, [x3] +# CHECK-NEXT: 1 4 0.33 U ldapursw x3, [x18] # CHECK-NEXT: 2 1 0.50 * stlur w3, [x27] # CHECK-NEXT: 2 1 0.50 * stlur x23, [x25] # CHECK-NEXT: 2 1 0.50 * stlurb w30, [x17] @@ -41,19 +41,19 @@ # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0.0] [0.1] [1.0] [1.1] [2] [3.0] [3.1] [4] [5] [6.0] [6.1] [7] [8] -# CHECK-NEXT: - - 6.50 6.50 - 6.50 6.50 - - - - - - +# CHECK-NEXT: - - 2.00 2.00 3.00 5.00 5.00 - - - - - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0.0] [0.1] [1.0] [1.1] [2] [3.0] [3.1] [4] [5] [6.0] [6.1] [7] [8] Instructions: -# CHECK-NEXT: - - 0.50 0.50 - 0.50 0.50 - - - - - - ldapur w7, [x24] -# CHECK-NEXT: - - 0.50 0.50 - 0.50 0.50 - - - - - - ldapur x20, [x13] -# CHECK-NEXT: - - 0.50 0.50 - 0.50 0.50 - - - - - - ldapurb w13, [x17] -# CHECK-NEXT: - - 0.50 0.50 - 0.50 0.50 - - - - - - ldapurh w3, [x22] -# CHECK-NEXT: - - 0.50 0.50 - 0.50 0.50 - - - - - - ldapursb w7, [x8] -# CHECK-NEXT: - - 0.50 0.50 - 0.50 0.50 - - - - - - ldapursb x29, [x7] -# CHECK-NEXT: - - 0.50 0.50 - 0.50 0.50 - - - - - - ldapursh w17, [x19] -# CHECK-NEXT: - - 0.50 0.50 - 0.50 0.50 - - - - - - ldapursh x3, [x3] -# CHECK-NEXT: - - 0.50 0.50 - 0.50 0.50 - - - - - - ldapursw x3, [x18] +# CHECK-NEXT: - - - - 0.33 0.33 0.33 - - - - - - ldapur w7, [x24] +# CHECK-NEXT: - - - - 0.33 0.33 0.33 - - - - - - ldapur x20, [x13] +# CHECK-NEXT: - - - - 0.33 0.33 0.33 - - - - - - ldapurb w13, [x17] +# CHECK-NEXT: - - - - 0.33 0.33 0.33 - - - - - - ldapurh w3, [x22] +# CHECK-NEXT: - - - - 0.33 0.33 0.33 - - - - - - ldapursb w7, [x8] +# CHECK-NEXT: - - - - 0.33 0.33 0.33 - - - - - - ldapursb x29, [x7] +# CHECK-NEXT: - - - - 0.33 0.33 0.33 - - - - - - ldapursh w17, [x19] +# CHECK-NEXT: - - - - 0.33 0.33 0.33 - - - - - - ldapursh x3, [x3] +# CHECK-NEXT: - - - - 0.33 0.33 0.33 - - - - - - ldapursw x3, [x18] # CHECK-NEXT: - - 0.50 0.50 - 0.50 0.50 - - - - - - stlur w3, [x27] # CHECK-NEXT: - - 0.50 0.50 - 0.50 0.50 - - - - - - stlur x23, [x25] # CHECK-NEXT: - - 0.50 0.50 - 0.50 0.50 - - - - - - stlurb w30, [x17] diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/N3-rcpc-immo-instructions.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/N3-rcpc-immo-instructions.s index 6faa5e1f4db1b..d5302e96edf4e 100644 --- a/llvm/test/tools/llvm-mca/AArch64/Neoverse/N3-rcpc-immo-instructions.s +++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/N3-rcpc-immo-instructions.s @@ -10,15 +10,15 @@ # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 2 1 0.50 * ldapur w7, [x24] -# CHECK-NEXT: 2 1 0.50 * ldapur x20, [x13] -# CHECK-NEXT: 2 1 0.50 * ldapurb w13, [x17] -# CHECK-NEXT: 2 1 0.50 * ldapurh w3, [x22] -# CHECK-NEXT: 2 1 0.50 U ldapursb w7, [x8] -# CHECK-NEXT: 2 1 0.50 U ldapursb x29, [x7] -# CHECK-NEXT: 2 1 0.50 U ldapursh w17, [x19] -# CHECK-NEXT: 2 1 0.50 U ldapursh x3, [x3] -# CHECK-NEXT: 2 1 0.50 U ldapursw x3, [x18] +# CHECK-NEXT: 1 4 0.33 * ldapur w7, [x24] +# CHECK-NEXT: 1 4 0.33 * ldapur x20, [x13] +# CHECK-NEXT: 1 4 0.33 * ldapurb w13, [x17] +# CHECK-NEXT: 1 4 0.33 * ldapurh w3, [x22] +# CHECK-NEXT: 1 4 0.33 U ldapursb w7, [x8] +# CHECK-NEXT: 1 4 0.33 U ldapursb x29, [x7] +# CHECK-NEXT: 1 4 0.33 U ldapursh w17, [x19] +# CHECK-NEXT: 1 4 0.33 U ldapursh x3, [x3] +# CHECK-NEXT: 1 4 0.33 U ldapursw x3, [x18] # CHECK-NEXT: 2 1 0.50 * stlur w3, [x27] # CHECK-NEXT: 2 1 0.50 * stlur x23, [x25] # CHECK-NEXT: 2 1 0.50 * stlurb w30, [x17] @@ -41,19 +41,19 @@ # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0.0] [0.1] [1.0] [1.1] [2] [3.0] [3.1] [4] [5] [6.0] [6.1] [7] [8] -# CHECK-NEXT: - - 6.50 6.50 - 6.50 6.50 - - - - - - +# CHECK-NEXT: - - 2.00 2.00 3.00 5.00 5.00 - - - - - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0.0] [0.1] [1.0] [1.1] [2] [3.0] [3.1] [4] [5] [6.0] [6.1] [7] [8] Instructions: -# CHECK-NEXT: - - 0.50 0.50 - 0.50 0.50 - - - - - - ldapur w7, [x24] -# CHECK-NEXT: - - 0.50 0.50 - 0.50 0.50 - - - - - - ldapur x20, [x13] -# CHECK-NEXT: - - 0.50 0.50 - 0.50 0.50 - - - - - - ldapurb w13, [x17] -# CHECK-NEXT: - - 0.50 0.50 - 0.50 0.50 - - - - - - ldapurh w3, [x22] -# CHECK-NEXT: - - 0.50 0.50 - 0.50 0.50 - - - - - - ldapursb w7, [x8] -# CHECK-NEXT: - - 0.50 0.50 - 0.50 0.50 - - - - - - ldapursb x29, [x7] -# CHECK-NEXT: - - 0.50 0.50 - 0.50 0.50 - - - - - - ldapursh w17, [x19] -# CHECK-NEXT: - - 0.50 0.50 - 0.50 0.50 - - - - - - ldapursh x3, [x3] -# CHECK-NEXT: - - 0.50 0.50 - 0.50 0.50 - - - - - - ldapursw x3, [x18] +# CHECK-NEXT: - - - - 0.33 0.33 0.33 - - - - - - ldapur w7, [x24] +# CHECK-NEXT: - - - - 0.33 0.33 0.33 - - - - - - ldapur x20, [x13] +# CHECK-NEXT: - - - - 0.33 0.33 0.33 - - - - - - ldapurb w13, [x17] +# CHECK-NEXT: - - - - 0.33 0.33 0.33 - - - - - - ldapurh w3, [x22] +# CHECK-NEXT: - - - - 0.33 0.33 0.33 - - - - - - ldapursb w7, [x8] +# CHECK-NEXT: - - - - 0.33 0.33 0.33 - - - - - - ldapursb x29, [x7] +# CHECK-NEXT: - - - - 0.33 0.33 0.33 - - - - - - ldapursh w17, [x19] +# CHECK-NEXT: - - - - 0.33 0.33 0.33 - - - - - - ldapursh x3, [x3] +# CHECK-NEXT: - - - - 0.33 0.33 0.33 - - - - - - ldapursw x3, [x18] # CHECK-NEXT: - - 0.50 0.50 - 0.50 0.50 - - - - - - stlur w3, [x27] # CHECK-NEXT: - - 0.50 0.50 - 0.50 0.50 - - - - - - stlur x23, [x25] # CHECK-NEXT: - - 0.50 0.50 - 0.50 0.50 - - - - - - stlurb w30, [x17] diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-rcpc-immo-instructions.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-rcpc-immo-instructions.s index 5c9b43a0e5121..dcea382de5fa9 100644 --- a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-rcpc-immo-instructions.s +++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-rcpc-immo-instructions.s @@ -10,15 +10,15 @@ # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 2 1 0.50 * ldapur w7, [x24] -# CHECK-NEXT: 2 1 0.50 * ldapur x20, [x13] -# CHECK-NEXT: 2 1 0.50 * ldapurb w13, [x17] -# CHECK-NEXT: 2 1 0.50 * ldapurh w3, [x22] -# CHECK-NEXT: 2 1 0.50 U ldapursb w7, [x8] -# CHECK-NEXT: 2 1 0.50 U ldapursb x29, [x7] -# CHECK-NEXT: 2 1 0.50 U ldapursh w17, [x19] -# CHECK-NEXT: 2 1 0.50 U ldapursh x3, [x3] -# CHECK-NEXT: 2 1 0.50 U ldapursw x3, [x18] +# CHECK-NEXT: 1 4 0.33 * ldapur w7, [x24] +# CHECK-NEXT: 1 4 0.33 * ldapur x20, [x13] +# CHECK-NEXT: 1 4 0.33 * ldapurb w13, [x17] +# CHECK-NEXT: 1 4 0.33 * ldapurh w3, [x22] +# CHECK-NEXT: 1 4 0.33 U ldapursb w7, [x8] +# CHECK-NEXT: 1 4 0.33 U ldapursb x29, [x7] +# CHECK-NEXT: 1 4 0.33 U ldapursh w17, [x19] +# CHECK-NEXT: 1 4 0.33 U ldapursh x3, [x3] +# CHECK-NEXT: 1 4 0.33 U ldapursw x3, [x18] # CHECK-NEXT: 2 1 0.50 * stlur w3, [x27] # CHECK-NEXT: 2 1 0.50 * stlur x23, [x25] # CHECK-NEXT: 2 1 0.50 * stlurb w30, [x17] @@ -46,19 +46,19 @@ # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0.0] [0.1] [1.0] [1.1] [2.0] [2.1] [2.2] [3] [4.0] [4.1] [5] [6] [7.0] [7.1] [8] [9] [10] [11] -# CHECK-NEXT: - - 6.50 6.50 - - - - 6.50 6.50 - - - - - - - - +# CHECK-NEXT: - - 2.00 2.00 - - - 3.00 5.00 5.00 - - - - - - - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0.0] [0.1] [1.0] [1.1] [2.0] [2.1] [2.2] [3] [4.0] [4.1] [5] [6] [7.0] [7.1] [8] [9] [10] [11] Instructions: -# CHECK-NEXT: - - 0.50 0.50 - - - - 0.50 0.50 - - - - - - - - ldapur w7, [x24] -# CHECK-NEXT: - - 0.50 0.50 - - - - 0.50 0.50 - - - - - - - - ldapur x20, [x13] -# CHECK-NEXT: - - 0.50 0.50 - - - - 0.50 0.50 - - - - - - - - ldapurb w13, [x17] -# CHECK-NEXT: - - 0.50 0.50 - - - - 0.50 0.50 - - - - - - - - ldapurh w3, [x22] -# CHECK-NEXT: - - 0.50 0.50 - - - - 0.50 0.50 - - - - - - - - ldapursb w7, [x8] -# CHECK-NEXT: - - 0.50 0.50 - - - - 0.50 0.50 - - - - - - - - ldapursb x29, [x7] -# CHECK-NEXT: - - 0.50 0.50 - - - - 0.50 0.50 - - - - - - - - ldapursh w17, [x19] -# CHECK-NEXT: - - 0.50 0.50 - - - - 0.50 0.50 - - - - - - - - ldapursh x3, [x3] -# CHECK-NEXT: - - 0.50 0.50 - - - - 0.50 0.50 - - - - - - - - ldapursw x3, [x18] +# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 - - - - - - - - ldapur w7, [x24] +# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 - - - - - - - - ldapur x20, [x13] +# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 - - - - - - - - ldapurb w13, [x17] +# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 - - - - - - - - ldapurh w3, [x22] +# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 - - - - - - - - ldapursb w7, [x8] +# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 - - - - - - - - ldapursb x29, [x7] +# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 - - - - - - - - ldapursh w17, [x19] +# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 - - - - - - - - ldapursh x3, [x3] +# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 - - - - - - - - ldapursw x3, [x18] # CHECK-NEXT: - - 0.50 0.50 - - - - 0.50 0.50 - - - - - - - - stlur w3, [x27] # CHECK-NEXT: - - 0.50 0.50 - - - - 0.50 0.50 - - - - - - - - stlur x23, [x25] # CHECK-NEXT: - - 0.50 0.50 - - - - 0.50 0.50 - - - - - - - - stlurb w30, [x17] diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-rcpc-immo-instructions.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-rcpc-immo-instructions.s index 71fd689522215..dfcc202192392 100644 --- a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-rcpc-immo-instructions.s +++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-rcpc-immo-instructions.s @@ -10,15 +10,15 @@ # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 2 1 0.50 * ldapur w7, [x24] -# CHECK-NEXT: 2 1 0.50 * ldapur x20, [x13] -# CHECK-NEXT: 2 1 0.50 * ldapurb w13, [x17] -# CHECK-NEXT: 2 1 0.50 * ldapurh w3, [x22] -# CHECK-NEXT: 2 1 0.50 U ldapursb w7, [x8] -# CHECK-NEXT: 2 1 0.50 U ldapursb x29, [x7] -# CHECK-NEXT: 2 1 0.50 U ldapursh w17, [x19] -# CHECK-NEXT: 2 1 0.50 U ldapursh x3, [x3] -# CHECK-NEXT: 2 1 0.50 U ldapursw x3, [x18] +# CHECK-NEXT: 1 4 0.33 * ldapur w7, [x24] +# CHECK-NEXT: 1 4 0.33 * ldapur x20, [x13] +# CHECK-NEXT: 1 4 0.33 * ldapurb w13, [x17] +# CHECK-NEXT: 1 4 0.33 * ldapurh w3, [x22] +# CHECK-NEXT: 1 4 0.33 U ldapursb w7, [x8] +# CHECK-NEXT: 1 4 0.33 U ldapursb x29, [x7] +# CHECK-NEXT: 1 4 0.33 U ldapursh w17, [x19] +# CHECK-NEXT: 1 4 0.33 U ldapursh x3, [x3] +# CHECK-NEXT: 1 4 0.33 U ldapursw x3, [x18] # CHECK-NEXT: 2 1 0.50 * stlur w3, [x27] # CHECK-NEXT: 2 1 0.50 * stlur x23, [x25] # CHECK-NEXT: 2 1 0.50 * stlurb w30, [x17] @@ -48,19 +48,19 @@ # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0.0] [0.1] [1.0] [1.1] [2.0] [2.1] [2.2] [3] [4.0] [4.1] [5] [6] [7] [8] [9] [10] [11] [12] [13] [14] -# CHECK-NEXT: - - 6.50 6.50 - - - - 6.50 6.50 - - - - - - - - - - +# CHECK-NEXT: - - 2.00 2.00 - - - 3.00 5.00 5.00 - - - - - - - - - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0.0] [0.1] [1.0] [1.1] [2.0] [2.1] [2.2] [3] [4.0] [4.1] [5] [6] [7] [8] [9] [10] [11] [12] [13] [14] Instructions: -# CHECK-NEXT: - - 0.50 0.50 - - - - 0.50 0.50 - - - - - - - - - - ldapur w7, [x24] -# CHECK-NEXT: - - 0.50 0.50 - - - - 0.50 0.50 - - - - - - - - - - ldapur x20, [x13] -# CHECK-NEXT: - - 0.50 0.50 - - - - 0.50 0.50 - - - - - - - - - - ldapurb w13, [x17] -# CHECK-NEXT: - - 0.50 0.50 - - - - 0.50 0.50 - - - - - - - - - - ldapurh w3, [x22] -# CHECK-NEXT: - - 0.50 0.50 - - - - 0.50 0.50 - - - - - - - - - - ldapursb w7, [x8] -# CHECK-NEXT: - - 0.50 0.50 - - - - 0.50 0.50 - - - - - - - - - - ldapursb x29, [x7] -# CHECK-NEXT: - - 0.50 0.50 - - - - 0.50 0.50 - - - - - - - - - - ldapursh w17, [x19] -# CHECK-NEXT: - - 0.50 0.50 - - - - 0.50 0.50 - - - - - - - - - - ldapursh x3, [x3] -# CHECK-NEXT: - - 0.50 0.50 - - - - 0.50 0.50 - - - - - - - - - - ldapursw x3, [x18] +# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - ldapur w7, [x24] +# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - ldapur x20, [x13] +# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - ldapurb w13, [x17] +# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - ldapurh w3, [x22] +# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - ldapursb w7, [x8] +# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - ldapursb x29, [x7] +# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - ldapursh w17, [x19] +# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - ldapursh x3, [x3] +# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - ldapursw x3, [x18] # CHECK-NEXT: - - 0.50 0.50 - - - - 0.50 0.50 - - - - - - - - - - stlur w3, [x27] # CHECK-NEXT: - - 0.50 0.50 - - - - 0.50 0.50 - - - - - - - - - - stlur x23, [x25] # CHECK-NEXT: - - 0.50 0.50 - - - - 0.50 0.50 - - - - - - - - - - stlurb w30, [x17] diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V3-rcpc-immo-instructions.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V3-rcpc-immo-instructions.s index a48978ce8b94d..4fff7670058bb 100644 --- a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V3-rcpc-immo-instructions.s +++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V3-rcpc-immo-instructions.s @@ -10,15 +10,15 @@ # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 2 1 0.50 * ldapur w7, [x24] -# CHECK-NEXT: 2 1 0.50 * ldapur x20, [x13] -# CHECK-NEXT: 2 1 0.50 * ldapurb w13, [x17] -# CHECK-NEXT: 2 1 0.50 * ldapurh w3, [x22] -# CHECK-NEXT: 2 1 0.50 U ldapursb w7, [x8] -# CHECK-NEXT: 2 1 0.50 U ldapursb x29, [x7] -# CHECK-NEXT: 2 1 0.50 U ldapursh w17, [x19] -# CHECK-NEXT: 2 1 0.50 U ldapursh x3, [x3] -# CHECK-NEXT: 2 1 0.50 U ldapursw x3, [x18] +# CHECK-NEXT: 1 4 0.33 * ldapur w7, [x24] +# CHECK-NEXT: 1 4 0.33 * ldapur x20, [x13] +# CHECK-NEXT: 1 4 0.33 * ldapurb w13, [x17] +# CHECK-NEXT: 1 4 0.33 * ldapurh w3, [x22] +# CHECK-NEXT: 1 4 0.33 U ldapursb w7, [x8] +# CHECK-NEXT: 1 4 0.33 U ldapursb x29, [x7] +# CHECK-NEXT: 1 4 0.33 U ldapursh w17, [x19] +# CHECK-NEXT: 1 4 0.33 U ldapursh x3, [x3] +# CHECK-NEXT: 1 4 0.33 U ldapursw x3, [x18] # CHECK-NEXT: 2 1 0.50 * stlur w3, [x27] # CHECK-NEXT: 2 1 0.50 * stlur x23, [x25] # CHECK-NEXT: 2 1 0.50 * stlurb w30, [x17] @@ -53,19 +53,19 @@ # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0.0] [0.1] [0.2] [1.0] [1.1] [2.0] [2.1] [2.2] [2.3] [3.0] [3.1] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] [14] [15] [16] [17] -# CHECK-NEXT: - - - 6.50 6.50 - - - - - - 6.50 - - - - - - - - 6.50 - - - - +# CHECK-NEXT: - - - 2.00 2.00 - - - - 3.00 3.00 5.00 - - - - - - - - 2.00 - - - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0.0] [0.1] [0.2] [1.0] [1.1] [2.0] [2.1] [2.2] [2.3] [3.0] [3.1] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] [14] [15] [16] [17] Instructions: -# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 - - - - - - - - 0.50 - - - - ldapur w7, [x24] -# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 - - - - - - - - 0.50 - - - - ldapur x20, [x13] -# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 - - - - - - - - 0.50 - - - - ldapurb w13, [x17] -# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 - - - - - - - - 0.50 - - - - ldapurh w3, [x22] -# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 - - - - - - - - 0.50 - - - - ldapursb w7, [x8] -# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 - - - - - - - - 0.50 - - - - ldapursb x29, [x7] -# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 - - - - - - - - 0.50 - - - - ldapursh w17, [x19] -# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 - - - - - - - - 0.50 - - - - ldapursh x3, [x3] -# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 - - - - - - - - 0.50 - - - - ldapursw x3, [x18] +# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldapur w7, [x24] +# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldapur x20, [x13] +# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldapurb w13, [x17] +# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldapurh w3, [x22] +# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldapursb w7, [x8] +# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldapursb x29, [x7] +# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldapursh w17, [x19] +# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldapursh x3, [x3] +# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldapursw x3, [x18] # CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 - - - - - - - - 0.50 - - - - stlur w3, [x27] # CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 - - - - - - - - 0.50 - - - - stlur x23, [x25] # CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 - - - - - - - - 0.50 - - - - stlurb w30, [x17] diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V3AE-rcpc-immo-instructions.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V3AE-rcpc-immo-instructions.s index f801a18bc7a06..dc064d6ea3f3f 100644 --- a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V3AE-rcpc-immo-instructions.s +++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V3AE-rcpc-immo-instructions.s @@ -10,15 +10,15 @@ # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 2 1 0.50 * ldapur w7, [x24] -# CHECK-NEXT: 2 1 0.50 * ldapur x20, [x13] -# CHECK-NEXT: 2 1 0.50 * ldapurb w13, [x17] -# CHECK-NEXT: 2 1 0.50 * ldapurh w3, [x22] -# CHECK-NEXT: 2 1 0.50 U ldapursb w7, [x8] -# CHECK-NEXT: 2 1 0.50 U ldapursb x29, [x7] -# CHECK-NEXT: 2 1 0.50 U ldapursh w17, [x19] -# CHECK-NEXT: 2 1 0.50 U ldapursh x3, [x3] -# CHECK-NEXT: 2 1 0.50 U ldapursw x3, [x18] +# CHECK-NEXT: 1 4 0.33 * ldapur w7, [x24] +# CHECK-NEXT: 1 4 0.33 * ldapur x20, [x13] +# CHECK-NEXT: 1 4 0.33 * ldapurb w13, [x17] +# CHECK-NEXT: 1 4 0.33 * ldapurh w3, [x22] +# CHECK-NEXT: 1 4 0.33 U ldapursb w7, [x8] +# CHECK-NEXT: 1 4 0.33 U ldapursb x29, [x7] +# CHECK-NEXT: 1 4 0.33 U ldapursh w17, [x19] +# CHECK-NEXT: 1 4 0.33 U ldapursh x3, [x3] +# CHECK-NEXT: 1 4 0.33 U ldapursw x3, [x18] # CHECK-NEXT: 2 1 0.50 * stlur w3, [x27] # CHECK-NEXT: 2 1 0.50 * stlur x23, [x25] # CHECK-NEXT: 2 1 0.50 * stlurb w30, [x17] @@ -51,19 +51,19 @@ # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0.0] [0.1] [0.2] [1.0] [1.1] [2.0] [2.1] [2.2] [2.3] [3.0] [3.1] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] [14] [15] -# CHECK-NEXT: - - - 6.50 6.50 - - - - - - 6.50 - - - - - - - - 6.50 - - +# CHECK-NEXT: - - - 2.00 2.00 - - - - 3.00 3.00 5.00 - - - - - - - - 2.00 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0.0] [0.1] [0.2] [1.0] [1.1] [2.0] [2.1] [2.2] [2.3] [3.0] [3.1] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] [14] [15] Instructions: -# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 - - - - - - - - 0.50 - - ldapur w7, [x24] -# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 - - - - - - - - 0.50 - - ldapur x20, [x13] -# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 - - - - - - - - 0.50 - - ldapurb w13, [x17] -# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 - - - - - - - - 0.50 - - ldapurh w3, [x22] -# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 - - - - - - - - 0.50 - - ldapursb w7, [x8] -# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 - - - - - - - - 0.50 - - ldapursb x29, [x7] -# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 - - - - - - - - 0.50 - - ldapursh w17, [x19] -# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 - - - - - - - - 0.50 - - ldapursh x3, [x3] -# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 - - - - - - - - 0.50 - - ldapursw x3, [x18] +# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldapur w7, [x24] +# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldapur x20, [x13] +# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldapurb w13, [x17] +# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldapurh w3, [x22] +# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldapursb w7, [x8] +# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldapursb x29, [x7] +# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldapursh w17, [x19] +# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldapursh x3, [x3] +# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldapursw x3, [x18] # CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 - - - - - - - - 0.50 - - stlur w3, [x27] # CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 - - - - - - - - 0.50 - - stlur x23, [x25] # CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 - - - - - - - - 0.50 - - stlurb w30, [x17] diff --git a/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp b/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp index 57e15a48c0bff..898141cbcf978 100644 --- a/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp +++ b/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp @@ -105,7 +105,7 @@ TEST(DataLayoutUpgradeTest, ValidDataLayoutUpgrade) { "E-m:e-Fn32-i64:64-i128:128-n32:64"); EXPECT_EQ( UpgradeDataLayoutString("E-m:a-Fi64-i64:64-n32:64", "powerpc64-ibm-aix"), - "E-m:a-Fi64-i64:64-i128:128-n32:64"); + "E-m:a-Fi64-i64:64-i128:128-n32:64-f64:32:64"); // Check that WebAssembly targets add -i128:128. EXPECT_EQ( @@ -189,6 +189,16 @@ TEST(DataLayoutUpgradeTest, NoDataLayoutUpgrade) { "E-m:e-Fn32-i64:64-n32"); EXPECT_EQ(UpgradeDataLayoutString("E-m:a-Fi64-i64:64-n32", "powerpc-aix"), "E-m:a-Fi64-i64:64-n32"); + + EXPECT_EQ(UpgradeDataLayoutString("E-m:a-p:32:32-Fi32-i64:64-n32", + "powerpc-unknown-aix"), + "E-m:a-p:32:32-Fi32-i64:64-n32-f64:32:64"); + EXPECT_EQ( + UpgradeDataLayoutString( + "E-m:a-Fi64-i64:64-i128:128-n32:64-S128-v256:256:256-v512:512:512", + "powerpc64-unknown-aix"), + "E-m:a-Fi64-i64:64-i128:128-n32:64-f64:32:64-S128-v256:256:256-v512:512:" + "512"); } TEST(DataLayoutUpgradeTest, EmptyDataLayout) { diff --git a/llvm/utils/TableGen/Common/InfoByHwMode.cpp b/llvm/utils/TableGen/Common/InfoByHwMode.cpp index a3f8909c36090..4ab27a610249d 100644 --- a/llvm/utils/TableGen/Common/InfoByHwMode.cpp +++ b/llvm/utils/TableGen/Common/InfoByHwMode.cpp @@ -18,6 +18,7 @@ #include "llvm/ADT/Twine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/TableGen/Error.h" #include "llvm/TableGen/Record.h" #include @@ -32,10 +33,12 @@ std::string llvm::getModeName(unsigned Mode) { ValueTypeByHwMode::ValueTypeByHwMode(const Record *R, const CodeGenHwModes &CGH) : InfoByHwMode(R) { const HwModeSelect &MS = CGH.getHwModeSelect(R); - for (const HwModeSelect::PairType &P : MS.Items) { - auto I = Map.try_emplace(P.first, MVT(llvm::getValueType(P.second))); - assert(I.second && "Duplicate entry?"); - (void)I; + for (auto [ModeID, VT] : MS.Items) { + assert(VT && VT->isSubClassOf("ValueType")); + if (!Map.try_emplace(ModeID, MVT(llvm::getValueType(VT))).second) + PrintFatalError(R->getLoc(), "duplicate ValueType entry for HwMode " + + CGH.getModeName(ModeID, true) + ": " + + VT->getName()); } if (R->isSubClassOf("PtrValueType")) PtrAddrSpace = R->getValueAsInt("AddrSpace"); @@ -143,10 +146,12 @@ RegSizeInfoByHwMode::RegSizeInfoByHwMode(const Record *R, const CodeGenHwModes &CGH) : InfoByHwMode(R) { const HwModeSelect &MS = CGH.getHwModeSelect(R); - for (const HwModeSelect::PairType &P : MS.Items) { - auto I = Map.try_emplace(P.first, RegSizeInfo(P.second)); - assert(I.second && "Duplicate entry?"); - (void)I; + for (auto [ModeID, RegInfo] : MS.Items) { + assert(RegInfo && RegInfo->isSubClassOf("RegInfo")); + if (!Map.try_emplace(ModeID, RegSizeInfo(RegInfo)).second) + PrintFatalError(R->getLoc(), "duplicate RegInfo entry for HwMode " + + CGH.getModeName(ModeID, true) + ": " + + RegInfo->getName()); } } @@ -198,7 +203,9 @@ RegClassByHwMode::RegClassByHwMode(const Record *R, const CodeGenHwModes &CGH, "Register class must subclass RegisterClass"); const CodeGenRegisterClass *RegClass = RegBank.getRegClass(RegClassRec); if (!Map.try_emplace(ModeID, RegClass).second) - llvm_unreachable("duplicate entry"); + PrintFatalError(R->getLoc(), "duplicate RegisterClass entry for HwMode " + + CGH.getModeName(ModeID, true) + ": " + + RegClass->getName()); } } @@ -211,10 +218,12 @@ SubRegRangeByHwMode::SubRegRangeByHwMode(const Record *R, const CodeGenHwModes &CGH) : InfoByHwMode(R) { const HwModeSelect &MS = CGH.getHwModeSelect(R); - for (const HwModeSelect::PairType &P : MS.Items) { - auto I = Map.try_emplace(P.first, SubRegRange(P.second)); - assert(I.second && "Duplicate entry?"); - (void)I; + for (auto [ModeID, Range] : MS.Items) { + assert(Range && Range->isSubClassOf("SubRegRange")); + if (!Map.try_emplace(ModeID, SubRegRange(Range)).second) + PrintFatalError(R->getLoc(), "duplicate SubRegRange entry for HwMode " + + CGH.getModeName(ModeID, true) + ": " + + Range->getName()); } } @@ -222,12 +231,14 @@ EncodingInfoByHwMode::EncodingInfoByHwMode(const Record *R, const CodeGenHwModes &CGH) : InfoByHwMode(R) { const HwModeSelect &MS = CGH.getHwModeSelect(R); - for (const HwModeSelect::PairType &P : MS.Items) { - assert(P.second && P.second->isSubClassOf("InstructionEncoding") && + for (auto [ModeID, Encoding] : MS.Items) { + assert(Encoding && Encoding->isSubClassOf("InstructionEncoding") && "Encoding must subclass InstructionEncoding"); - auto I = Map.try_emplace(P.first, P.second); - assert(I.second && "Duplicate entry?"); - (void)I; + if (!Map.try_emplace(ModeID, Encoding).second) + PrintFatalError(R->getLoc(), + "duplicate InstructionEncoding entry for HwMode " + + CGH.getModeName(ModeID, true) + ": " + + Encoding->getName()); } } diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td index a0a00513d7da5..51d310970fda9 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td @@ -543,7 +543,7 @@ def NVVM_NanosleepOp : NVVM_Op<"nanosleep">, // NVVM Performance Monitor events //===----------------------------------------------------------------------===// -def NVVM_PMEventOp : NVVM_PTXBuilder_Op<"pmevent">, +def NVVM_PMEventOp : NVVM_Op<"pmevent">, Arguments<(ins OptionalAttr:$maskedEventId, OptionalAttr:$eventId)> { let summary = "Trigger one or more Performance Monitor events."; @@ -561,20 +561,20 @@ def NVVM_PMEventOp : NVVM_PTXBuilder_Op<"pmevent">, [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#miscellaneous-instructions-pmevent) }]; - string llvmBuilder = [{ - llvm::Value *mId = builder.getInt16(* $maskedEventId); - createIntrinsicCall(builder, llvm::Intrinsic::nvvm_pm_event_mask, {mId}); - }]; - let assemblyFormat = "attr-dict (`id` `=` $eventId^)? (`mask` `=` $maskedEventId^)?"; + let hasVerifier = 1; let extraClassDeclaration = [{ - bool hasIntrinsic() { return !getEventId(); } + static mlir::NVVM::IDArgPair + getIntrinsicIDAndArgs(Operation &op, LLVM::ModuleTranslation &mt, + llvm::IRBuilderBase& builder); }]; - let extraClassDefinition = [{ - std::string $cppClass::getPtx() { return std::string("pmevent %0;"); } + + string llvmBuilder = [{ + auto [id, args] = NVVM::PMEventOp::getIntrinsicIDAndArgs( + *op, moduleTranslation, builder); + createIntrinsicCall(builder, id, args); }]; - let hasVerifier = 1; } //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp index 89197ec2f50b6..fd84ed6399d5d 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp @@ -2476,6 +2476,25 @@ mlir::NVVM::IDArgPair NVVM::BarrierOp::getIntrinsicIDAndArgs( return {id, std::move(args)}; } +mlir::NVVM::IDArgPair +PMEventOp::getIntrinsicIDAndArgs(Operation &op, LLVM::ModuleTranslation &mt, + llvm::IRBuilderBase &builder) { + auto thisOp = cast(op); + llvm::Type *i16Ty = llvm::Type::getInt16Ty(mt.getLLVMContext()); + + // With event-id, mask is generated as (1 << event-id) + llvm::Value *maskVal; + if (auto eventAttr = thisOp.getEventIdAttr()) { + uint16_t mask = static_cast(1u << eventAttr.getInt()); + maskVal = llvm::ConstantInt::get(i16Ty, mask); + } else { + maskVal = + llvm::ConstantInt::get(i16Ty, thisOp.getMaskedEventIdAttr().getValue()); + } + + return {llvm::Intrinsic::nvvm_pm_event_mask, {maskVal}}; +} + mlir::NVVM::IDArgPair MBarrierInitOp::getIntrinsicIDAndArgs( Operation &op, LLVM::ModuleTranslation &mt, llvm::IRBuilderBase &builder) { auto thisOp = cast(op); diff --git a/mlir/lib/Dialect/Linalg/Transforms/Specialize.cpp b/mlir/lib/Dialect/Linalg/Transforms/Specialize.cpp index c2485a08932dd..bbfbd2e9736a1 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Specialize.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Specialize.cpp @@ -279,6 +279,17 @@ static FailureOr specializeLinalgConvolutions(RewriterBase &rewriter, CONV_OP_SPECIALIZER(linalg::Conv1DNwcWcfOp); CONV_OP_SPECIALIZER(linalg::Conv1DNcwFcwOp); CONV_OP_SPECIALIZER(linalg::Conv2DOp); + CONV_OP_SPECIALIZER(linalg::Conv2DNhwcHwcfOp); + CONV_OP_SPECIALIZER(linalg::Conv2DNhwcHwcfQOp); + CONV_OP_SPECIALIZER(linalg::Conv2DNhwcFhwcOp); + CONV_OP_SPECIALIZER(linalg::Conv2DNhwcFhwcQOp); + CONV_OP_SPECIALIZER(linalg::Conv2DNchwFchwOp); + CONV_OP_SPECIALIZER(linalg::Conv2DNchwFchwQOp); + CONV_OP_SPECIALIZER(linalg::Conv2DNgchwFgchwOp); + CONV_OP_SPECIALIZER(linalg::Conv2DNgchwGfchwOp); + CONV_OP_SPECIALIZER(linalg::Conv2DNgchwGfchwQOp); + CONV_OP_SPECIALIZER(linalg::Conv2DNhwgcGfhwcOp); + CONV_OP_SPECIALIZER(linalg::Conv2DNhwgcGfhwcQOp); CONV_OP_SPECIALIZER(linalg::Conv3DOp); // ----------------------------- // Depthwise Convolution ops. @@ -287,6 +298,10 @@ static FailureOr specializeLinalgConvolutions(RewriterBase &rewriter, CONV_OP_SPECIALIZER(linalg::DepthwiseConv1DNwcWcOp); CONV_OP_SPECIALIZER(linalg::DepthwiseConv1DNwcWcmOp); CONV_OP_SPECIALIZER(linalg::DepthwiseConv2DNchwChwOp); + CONV_OP_SPECIALIZER(linalg::DepthwiseConv2DNhwcHwcOp); + CONV_OP_SPECIALIZER(linalg::DepthwiseConv2DNhwcHwcQOp); + CONV_OP_SPECIALIZER(linalg::DepthwiseConv2DNhwcHwcmOp); + CONV_OP_SPECIALIZER(linalg::DepthwiseConv2DNhwcHwcmQOp); CONV_OP_SPECIALIZER(linalg::DepthwiseConv3DNdhwcDhwcmOp); // ----------------------------- // Pooling ops. diff --git a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp index 01e6e1e248658..1244be90390e2 100644 --- a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp +++ b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp @@ -240,8 +240,8 @@ bool isReductionIterator(utils::IteratorType iteratorType) { //===----------------------------------------------------------------------===// /// Returns the BlockArgument that leads to `val`, if any. Traverses optional -/// ext* ops. -static BlockArgument getBlockArgumentWithOptionalExtOps(Value val) { +/// ext*/sitofp ops. +static BlockArgument getBlockArgumentWithOptionalCastOps(Value val) { BlockArgument blockArg = dyn_cast(val); if ((blockArg)) return blockArg; @@ -249,18 +249,82 @@ static BlockArgument getBlockArgumentWithOptionalExtOps(Value val) { Operation *defOp = val.getDefiningOp(); if (!dyn_cast_if_present(defOp) && !dyn_cast_if_present(defOp) && - !dyn_cast_if_present(defOp)) { + !dyn_cast_if_present(defOp) && + !dyn_cast_if_present(defOp)) { return nullptr; } return dyn_cast(defOp->getOperand(0)); } +/// Utility function to match the zero point offset body of quantized +/// convolution ops. +/// +/// Quantized convolutions have a body of the form: +/// %out + ((%input - %inputZp) * (%filter - %filterZp)) +/// where: +/// - %input is the input tensor element (block arg 0) +/// - %filter is the filter tensor element (block arg 1) +/// - %inputZp is the input zero-point scalar (block arg 2) +/// - %filterZp is the filter zero-point scalar (block arg 3) +/// - %out is the output accumulator (block arg 4) +/// +/// This function verifies that the multiplication operands are subtraction +/// operations matching this pattern. +static bool bodyMatcherForZeroPointOffsets(Operation *addOp, Operation *mulOp, + Block *body) { + // The multiplication should have two subtraction operands: + // one for (input - inputZp) and one for (filter - filterZp). + Operation *inputSubOp = mulOp->getOperand(0).getDefiningOp(); + if (!isa_and_present(inputSubOp)) + return false; + + Operation *filterSubOp = mulOp->getOperand(1).getDefiningOp(); + if (!isa_and_present(filterSubOp)) + return false; + + // Extract block arguments from subtraction operands. + BlockArgument inputBlockArg = + getBlockArgumentWithOptionalCastOps(inputSubOp->getOperand(0)); + BlockArgument inputZpBlockArg = + getBlockArgumentWithOptionalCastOps(inputSubOp->getOperand(1)); + BlockArgument filterBlockArg = + getBlockArgumentWithOptionalCastOps(filterSubOp->getOperand(0)); + BlockArgument filterZpBlockArg = + getBlockArgumentWithOptionalCastOps(filterSubOp->getOperand(1)); + BlockArgument outBlockArg = + getBlockArgumentWithOptionalCastOps(addOp->getOperand(0)); + + // Verify all block arguments are valid. + if (!inputBlockArg || !inputZpBlockArg || !filterBlockArg || + !filterZpBlockArg || !outBlockArg) + return false; + + // Verify all block arguments belong to the convolution body. + if (inputBlockArg.getOwner() != body || inputZpBlockArg.getOwner() != body || + filterBlockArg.getOwner() != body || + filterZpBlockArg.getOwner() != body || outBlockArg.getOwner() != body) + return false; + + // Verify block arguments have expected indices: + // arg0: input, arg1: filter, arg2: inputZp, arg3: filterZp, arg4: output + if (inputBlockArg.getArgNumber() != 0 || filterBlockArg.getArgNumber() != 1 || + inputZpBlockArg.getArgNumber() != 2 || + filterZpBlockArg.getArgNumber() != 3 || outBlockArg.getArgNumber() != 4) + return false; + + return true; +} + /// Utility to match block body for convolution ops. /// The body is thus expected to yield :- /// %out + (%lhs * %rhs) /// where: %lhs, %rhs and %out are block arguments and /// %lhs and %rhs can have optional upcast operation. -static bool bodyMatcherForConvolutionOps(Value yieldVal, Block *body) { +/// NOTE: In case of zero point offset convolution ops %lhs and %rhs would be :- +/// %input - %input_scalar +/// where, %input_scalar can have optional upcast operation. +static bool bodyMatcherForConvolutionOps(Value yieldVal, Block *body, + bool containsZeroPointOffset = false) { Operation *addOp = yieldVal.getDefiningOp(); if (!isa_and_present(addOp)) return false; @@ -269,12 +333,15 @@ static bool bodyMatcherForConvolutionOps(Value yieldVal, Block *body) { if (!isa_and_present(mulOp)) return false; + if (containsZeroPointOffset) { + return bodyMatcherForZeroPointOffsets(addOp, mulOp, body); + } BlockArgument lhsBlockArg = - getBlockArgumentWithOptionalExtOps(mulOp->getOperand(0)); + getBlockArgumentWithOptionalCastOps(mulOp->getOperand(0)); BlockArgument rhsBlockArg = - getBlockArgumentWithOptionalExtOps(mulOp->getOperand(1)); + getBlockArgumentWithOptionalCastOps(mulOp->getOperand(1)); BlockArgument outBlockArg = - getBlockArgumentWithOptionalExtOps(addOp->getOperand(0)); + getBlockArgumentWithOptionalCastOps(addOp->getOperand(0)); if (!lhsBlockArg || !rhsBlockArg || !outBlockArg || lhsBlockArg.getOwner() != body || rhsBlockArg.getOwner() != body || outBlockArg.getOwner() != body || lhsBlockArg.getArgNumber() != 0 || @@ -291,9 +358,9 @@ static bool bodyMatcherForPoolOps(Value yieldVal, Block *body) { return false; BlockArgument lhsArg = - getBlockArgumentWithOptionalExtOps(defOp->getOperand(0)); + getBlockArgumentWithOptionalCastOps(defOp->getOperand(0)); BlockArgument rhsArg = - getBlockArgumentWithOptionalExtOps(defOp->getOperand(1)); + getBlockArgumentWithOptionalCastOps(defOp->getOperand(1)); if (!lhsArg || !rhsArg || lhsArg.getOwner() != body || rhsArg.getOwner() != body || lhsArg.getArgNumber() != 2 || rhsArg.getArgNumber() != 0) @@ -502,14 +569,15 @@ class ConvMatcherBuilder { } /// Match body pattern. This should be called last. - bool matchBody() { + bool matchBody(bool zeroPointOffset = false) { if (!matched) return false; Block *body = op.getBlock(); auto yieldOp = cast(body->getTerminator()); switch (poolingType) { case PoolingType::None: - return bodyMatcherForConvolutionOps(yieldOp.getOperand(0), body); + return bodyMatcherForConvolutionOps(yieldOp.getOperand(0), body, + zeroPointOffset); case PoolingType::MaxSigned: return bodyMatcherForMaxSignedPoolOps(yieldOp.getOperand(0), body); case PoolingType::MaxUnsigned: @@ -634,6 +702,361 @@ bool isaConvolutionOpOfType(LinalgOp op, .matchBody(); } +// #inputMap = affine_map<(N, H, W, F, h, w, c) -> (N, H + h, W + w, c)> +// #filterMap = affine_map<(N, H, W, F, h, w, c) -> (h, w, c, F)> +// #outputMap = affine_map<(N, H, W, F, h, w, c) -> (N, H, W, F)> +template <> +bool isaConvolutionOpOfType( + LinalgOp op, SmallVector *dilations, + SmallVector *strides) { + if (isa(op)) + return true; + + assert(isaConvolutionOpInterface(op) && + "expected op to implement ConvolutionOpInterface"); + + ConvMatcherBuilder m(op, /*spatialRank=*/2, dilations, strides); + AffineExpr N = m.dim(0); + AffineExpr H = m.dim(1); + AffineExpr W = m.dim(2); + AffineExpr F = m.dim(3); + AffineExpr h = m.dim(4); + AffineExpr w = m.dim(5); + AffineExpr c = m.dim(6); + + return m.matchStride(/*iDim=*/1, /*fDim=*/0, /*oDim=*/1, /*idx=*/0) + .matchStride(/*iDim=*/2, /*fDim=*/1, /*oDim=*/2, /*idx=*/1) + .matchMaps({/*inputMap=*/{N, m.strided(H, h, 0), m.strided(W, w, 1), c}, + /*filterMap=*/{h, w, c, F}, + /*outputMap=*/{N, H, W, F}}) + .matchBody(); +} + +// #inputMap = affine_map<(N, H, W, F, h, w, c) -> (N, H + h, W + w, c)> +// #filterMap = affine_map<(N, H, W, F, h, w, c) -> (h, w, c, F)> +// #scalarMap = affine_map<(N, H, W, F, h, w, c) -> ()> +// #outputMap = affine_map<(N, H, W, F, h, w, c) -> (N, H, W, F)> +template <> +bool isaConvolutionOpOfType( + LinalgOp op, SmallVector *dilations, + SmallVector *strides) { + if (isa(op)) + return true; + + assert(isaConvolutionOpInterface(op) && + "expected op to implement ConvolutionOpInterface"); + + ConvMatcherBuilder m(op, /*spatialRank=*/2, dilations, strides); + AffineExpr N = m.dim(0); + AffineExpr H = m.dim(1); + AffineExpr W = m.dim(2); + AffineExpr F = m.dim(3); + AffineExpr h = m.dim(4); + AffineExpr w = m.dim(5); + AffineExpr c = m.dim(6); + + return m.matchStride(/*iDim=*/1, /*fDim=*/0, /*oDim=*/1, /*idx=*/0) + .matchStride(/*iDim=*/2, /*fDim=*/1, /*oDim=*/2, /*idx=*/1) + .matchMaps({/*inputMap=*/{N, m.strided(H, h, 0), m.strided(W, w, 1), c}, + /*filterMap=*/{h, w, c, F}, + /*scalarMap=*/{}, + /*scalarMap=*/{}, + /*outputMap=*/{N, H, W, F}}) + .matchBody(/*zeroPointOffset=*/true); +} + +// #inputMap = affine_map<(N, H, W, F, h, w, c) -> (N, H + h, W + w, c)> +// #filterMap = affine_map<(N, H, W, F, h, w, c) -> (F, h, w, c)> +// #outputMap = affine_map<(N, H, W, F, h, w, c) -> (N, H, W, F)> +template <> +bool isaConvolutionOpOfType( + LinalgOp op, SmallVector *dilations, + SmallVector *strides) { + if (isa(op)) + return true; + + assert(isaConvolutionOpInterface(op) && + "expected op to implement ConvolutionOpInterface"); + + ConvMatcherBuilder m(op, /*spatialRank=*/2, dilations, strides); + AffineExpr N = m.dim(0); + AffineExpr H = m.dim(1); + AffineExpr W = m.dim(2); + AffineExpr F = m.dim(3); + AffineExpr h = m.dim(4); + AffineExpr w = m.dim(5); + AffineExpr c = m.dim(6); + + return m.matchStride(/*iDim=*/1, /*fDim=*/1, /*oDim=*/1, /*idx=*/0) + .matchStride(/*iDim=*/2, /*fDim=*/2, /*oDim=*/2, /*idx=*/1) + .matchMaps({/*inputMap=*/{N, m.strided(H, h, 0), m.strided(W, w, 1), c}, + /*filterMap=*/{F, h, w, c}, + /*outputMap=*/{N, H, W, F}}) + .matchBody(); +} + +// #inputMap = affine_map<(N, H, W, F, h, w, c) -> (N, H + h, W + w, c)> +// #filterMap = affine_map<(N, H, W, F, h, w, c) -> (F, h, w, c)> +// #scalarMap = affine_map<(N, H, W, F, h, w, c) -> ()> +// #outputMap = affine_map<(N, H, W, F, h, w, c) -> (N, H, W, F)> +template <> +bool isaConvolutionOpOfType( + LinalgOp op, SmallVector *dilations, + SmallVector *strides) { + if (isa(op)) + return true; + + assert(isaConvolutionOpInterface(op) && + "expected op to implement ConvolutionOpInterface"); + + ConvMatcherBuilder m(op, /*spatialRank=*/2, dilations, strides); + AffineExpr N = m.dim(0); + AffineExpr H = m.dim(1); + AffineExpr W = m.dim(2); + AffineExpr F = m.dim(3); + AffineExpr h = m.dim(4); + AffineExpr w = m.dim(5); + AffineExpr c = m.dim(6); + + return m.matchStride(/*iDim=*/1, /*fDim=*/1, /*oDim=*/1, /*idx=*/0) + .matchStride(/*iDim=*/2, /*fDim=*/2, /*oDim=*/2, /*idx=*/1) + .matchMaps({/*inputMap=*/{N, m.strided(H, h, 0), m.strided(W, w, 1), c}, + /*filterMap=*/{F, h, w, c}, + /*scalarMap=*/{}, + /*scalarMap=*/{}, + /*outputMap=*/{N, H, W, F}}) + .matchBody(/*zeroPointOffset=*/true); +} + +// #inputMap = affine_map<(N, F, H, W, c, h, w) -> (N, c, H + h, W + w)> +// #filterMap = affine_map<(N, F, H, W, c, h, w) -> (F, c, h, w)> +// #outputMap = affine_map<(N, F, H, W, c, h, w) -> (N, F, H, W)> +template <> +bool isaConvolutionOpOfType( + LinalgOp op, SmallVector *dilations, + SmallVector *strides) { + if (isa(op)) + return true; + + assert(isaConvolutionOpInterface(op) && + "expected op to implement ConvolutionOpInterface"); + + ConvMatcherBuilder m(op, /*spatialRank=*/2, dilations, strides); + AffineExpr N = m.dim(0); + AffineExpr F = m.dim(1); + AffineExpr H = m.dim(2); + AffineExpr W = m.dim(3); + AffineExpr c = m.dim(4); + AffineExpr h = m.dim(5); + AffineExpr w = m.dim(6); + + return m.matchStride(/*iDim=*/2, /*fDim=*/2, /*oDim=*/2, /*idx=*/0) + .matchStride(/*iDim=*/3, /*fDim=*/3, /*oDim=*/3, /*idx=*/1) + .matchMaps({/*inputMap=*/{N, c, m.strided(H, h, 0), m.strided(W, w, 1)}, + /*filterMap=*/{F, c, h, w}, + /*outputMap=*/{N, F, H, W}}) + .matchBody(); +} + +// #inputMap = affine_map<(N, F, H, W, c, h, w) -> (N, c, H + h, W + w)> +// #filterMap = affine_map<(N, F, H, W, c, h, w) -> (F, c, h, w)> +// #scalarMap = affine_map<(N, F, H, W, c, h, w) -> ()> +// #outputMap = affine_map<(N, F, H, W, c, h, w) -> (N, F, H, W)> +template <> +bool isaConvolutionOpOfType( + LinalgOp op, SmallVector *dilations, + SmallVector *strides) { + if (isa(op)) + return true; + + assert(isaConvolutionOpInterface(op) && + "expected op to implement ConvolutionOpInterface"); + + ConvMatcherBuilder m(op, /*spatialRank=*/2, dilations, strides); + AffineExpr N = m.dim(0); + AffineExpr F = m.dim(1); + AffineExpr H = m.dim(2); + AffineExpr W = m.dim(3); + AffineExpr c = m.dim(4); + AffineExpr h = m.dim(5); + AffineExpr w = m.dim(6); + + return m.matchStride(/*iDim=*/2, /*fDim=*/2, /*oDim=*/2, /*idx=*/0) + .matchStride(/*iDim=*/3, /*fDim=*/3, /*oDim=*/3, /*idx=*/1) + .matchMaps({/*inputMap=*/{N, c, m.strided(H, h, 0), m.strided(W, w, 1)}, + /*filterMap=*/{F, c, h, w}, + /*scalarMap=*/{}, + /*scalarMap=*/{}, + /*outputMap=*/{N, F, H, W}}) + .matchBody(/*zeroPointOffset=*/true); +} + +// #inputMap = affine_map<(N, G, F, H, W, c, h, w) -> (N, G, c, H + h, W + w)> +// #filterMap = affine_map<(N, G, F, H, W, c, h, w) -> (F, G, c, h, w)> +// #outputMap = affine_map<(N, G, F, H, W, c, h, w) -> (N, G, F, H, W)> +template <> +bool isaConvolutionOpOfType( + LinalgOp op, SmallVector *dilations, + SmallVector *strides) { + if (isa(op)) + return true; + + assert(isaConvolutionOpInterface(op) && + "expected op to implement ConvolutionOpInterface"); + + ConvMatcherBuilder m(op, /*spatialRank=*/2, dilations, strides); + AffineExpr N = m.dim(0); + AffineExpr G = m.dim(1); + AffineExpr F = m.dim(2); + AffineExpr H = m.dim(3); + AffineExpr W = m.dim(4); + AffineExpr c = m.dim(5); + AffineExpr h = m.dim(6); + AffineExpr w = m.dim(7); + + return m.matchStride(/*iDim=*/3, /*fDim=*/3, /*oDim=*/3, /*idx=*/0) + .matchStride(/*iDim=*/4, /*fDim=*/4, /*oDim=*/4, /*idx=*/1) + .matchMaps( + {/*inputMap=*/{N, G, c, m.strided(H, h, 0), m.strided(W, w, 1)}, + /*filterMap=*/{F, G, c, h, w}, + /*outputMap=*/{N, G, F, H, W}}) + .matchBody(); +} + +// #inputMap = affine_map<(N, G, F, H, W, c, h, w) -> (N, G, c, H + h, W + w)> +// #filterMap = affine_map<(N, G, F, H, W, c, h, w) -> (G, F, c, h, w)> +// #outputMap = affine_map<(N, G, F, H, W, c, h, w) -> (N, G, F, H, W)> +template <> +bool isaConvolutionOpOfType( + LinalgOp op, SmallVector *dilations, + SmallVector *strides) { + if (isa(op)) + return true; + + assert(isaConvolutionOpInterface(op) && + "expected op to implement ConvolutionOpInterface"); + + ConvMatcherBuilder m(op, /*spatialRank=*/2, dilations, strides); + AffineExpr N = m.dim(0); + AffineExpr G = m.dim(1); + AffineExpr F = m.dim(2); + AffineExpr H = m.dim(3); + AffineExpr W = m.dim(4); + AffineExpr c = m.dim(5); + AffineExpr h = m.dim(6); + AffineExpr w = m.dim(7); + + return m.matchStride(/*iDim=*/3, /*fDim=*/3, /*oDim=*/3, /*idx=*/0) + .matchStride(/*iDim=*/4, /*fDim=*/4, /*oDim=*/4, /*idx=*/1) + .matchMaps( + {/*inputMap=*/{N, G, c, m.strided(H, h, 0), m.strided(W, w, 1)}, + /*filterMap=*/{G, F, c, h, w}, + /*outputMap=*/{N, G, F, H, W}}) + .matchBody(); +} + +// #inputMap = affine_map<(N, G, F, H, W, c, h, w) -> (N, G, c, H + h, W + w)> +// #filterMap = affine_map<(N, G, F, H, W, c, h, w) -> (G, F, c, h, w)> +// #scalarMap = affine_map<(N, G, F, H, W, c, h, w) -> ()> +// #outputMap = affine_map<(N, G, F, H, W, c, h, w) -> (N, G, F, H, W)> +template <> +bool isaConvolutionOpOfType( + LinalgOp op, SmallVector *dilations, + SmallVector *strides) { + if (isa(op)) + return true; + + assert(isaConvolutionOpInterface(op) && + "expected op to implement ConvolutionOpInterface"); + + ConvMatcherBuilder m(op, /*spatialRank=*/2, dilations, strides); + AffineExpr N = m.dim(0); + AffineExpr G = m.dim(1); + AffineExpr F = m.dim(2); + AffineExpr H = m.dim(3); + AffineExpr W = m.dim(4); + AffineExpr c = m.dim(5); + AffineExpr h = m.dim(6); + AffineExpr w = m.dim(7); + + return m.matchStride(/*iDim=*/3, /*fDim=*/3, /*oDim=*/3, /*idx=*/0) + .matchStride(/*iDim=*/4, /*fDim=*/4, /*oDim=*/4, /*idx=*/1) + .matchMaps( + {/*inputMap=*/{N, G, c, m.strided(H, h, 0), m.strided(W, w, 1)}, + /*filterMap=*/{G, F, c, h, w}, + /*scalarMap=*/{}, + /*scalarMap=*/{}, + /*outputMap=*/{N, G, F, H, W}}) + .matchBody(/*zeroPointOffset=*/true); +} + +// #inputMap = affine_map<(N, H, W, G, F, h, w, c) -> (N, H + h, W + w, G, c)> +// #filterMap = affine_map<(N, H, W, G, F, h, w, c) -> (G, F, h, w, c)> +// #outputMap = affine_map<(N, H, W, G, F, h, w, c) -> (N, H, W, G, F)> +template <> +bool isaConvolutionOpOfType( + LinalgOp op, SmallVector *dilations, + SmallVector *strides) { + if (isa(op)) + return true; + + assert(isaConvolutionOpInterface(op) && + "expected op to implement ConvolutionOpInterface"); + + ConvMatcherBuilder m(op, /*spatialRank=*/2, dilations, strides); + AffineExpr N = m.dim(0); + AffineExpr H = m.dim(1); + AffineExpr W = m.dim(2); + AffineExpr G = m.dim(3); + AffineExpr F = m.dim(4); + AffineExpr h = m.dim(5); + AffineExpr w = m.dim(6); + AffineExpr c = m.dim(7); + + return m.matchStride(/*iDim=*/1, /*fDim=*/2, /*oDim=*/1, /*idx=*/0) + .matchStride(/*iDim=*/2, /*fDim=*/3, /*oDim=*/2, /*idx=*/1) + .matchMaps( + {/*inputMap=*/{N, m.strided(H, h, 0), m.strided(W, w, 1), G, c}, + /*filterMap=*/{G, F, h, w, c}, + /*outputMap=*/{N, H, W, G, F}}) + .matchBody(); +} + +// #inputMap = affine_map<(N, H, W, G, F, h, w, c) -> (N, H + h, W + w, G, c)> +// #filterMap = affine_map<(N, H, W, G, F, h, w, c) -> (G, F, h, w, c)> +// #scalarMap = affine_map<(N, H, W, G, F, h, w, c) -> ()> +// #outputMap = affine_map<(N, H, W, G, F, h, w, c) -> (N, H, W, G, F)> +template <> +bool isaConvolutionOpOfType( + LinalgOp op, SmallVector *dilations, + SmallVector *strides) { + if (isa(op)) + return true; + + assert(isaConvolutionOpInterface(op) && + "expected op to implement ConvolutionOpInterface"); + + ConvMatcherBuilder m(op, /*spatialRank=*/2, dilations, strides); + AffineExpr N = m.dim(0); + AffineExpr H = m.dim(1); + AffineExpr W = m.dim(2); + AffineExpr G = m.dim(3); + AffineExpr F = m.dim(4); + AffineExpr h = m.dim(5); + AffineExpr w = m.dim(6); + AffineExpr c = m.dim(7); + + return m.matchStride(/*iDim=*/1, /*fDim=*/2, /*oDim=*/1, /*idx=*/0) + .matchStride(/*iDim=*/2, /*fDim=*/3, /*oDim=*/2, /*idx=*/1) + .matchMaps( + {/*inputMap=*/{N, m.strided(H, h, 0), m.strided(W, w, 1), G, c}, + /*filterMap=*/{G, F, h, w, c}, + /*scalarMap=*/{}, + /*scalarMap=*/{}, + /*outputMap=*/{N, H, W, G, F}}) + .matchBody(/*zeroPointOffset=*/true); +} + // #inputMap = affine_map<(D, H, W, d, h, w) -> (D + d, H + h, W + w)> // #filterMap = affine_map<(D, H, W, d, h, w) -> (d, h, w)> // #outputMap = affine_map<(D, H, W, d, h, w) -> (D, H, W)> @@ -773,6 +1196,130 @@ bool isaConvolutionOpOfType( .matchBody(); } +// #inputMap = affine_map<(N, H, W, C, h, w) -> (N, H + h, W + w, C)> +// #filterMap = affine_map<(N, H, W, C, h, w) -> (h, w, C)> +// #outputMap = affine_map<(N, H, W, C, h, w) -> (N, H, W, C)> +template <> +bool isaConvolutionOpOfType( + LinalgOp op, SmallVector *dilations, + SmallVector *strides) { + if (isa(op)) + return true; + + assert(isaConvolutionOpInterface(op) && + "expected op to implement ConvolutionOpInterface"); + + ConvMatcherBuilder m(op, /*spatialRank=*/2, dilations, strides); + AffineExpr N = m.dim(0); + AffineExpr H = m.dim(1); + AffineExpr W = m.dim(2); + AffineExpr C = m.dim(3); + AffineExpr h = m.dim(4); + AffineExpr w = m.dim(5); + + return m.matchStride(/*iDim=*/1, /*fDim=*/0, /*oDim=*/1, /*idx=*/0) + .matchStride(/*iDim=*/2, /*fDim=*/1, /*oDim=*/2, /*idx=*/1) + .matchMaps({/*inputMap=*/{N, m.strided(H, h, 0), m.strided(W, w, 1), C}, + /*filterMap=*/{h, w, C}, + /*outputMap=*/{N, H, W, C}}) + .matchBody(); +} + +// #inputMap = affine_map<(N, H, W, C, h, w) -> (N, H + h, W + w, C)> +// #filterMap = affine_map<(N, H, W, C, h, w) -> (h, w, C)> +// #scalarMap = affine_map<(N, H, W, C, h, w) -> ()> +// #outputMap = affine_map<(N, H, W, C, h, w) -> (N, H, W, C)> +template <> +bool isaConvolutionOpOfType( + LinalgOp op, SmallVector *dilations, + SmallVector *strides) { + if (isa(op)) + return true; + + assert(isaConvolutionOpInterface(op) && + "expected op to implement ConvolutionOpInterface"); + + ConvMatcherBuilder m(op, /*spatialRank=*/2, dilations, strides); + AffineExpr N = m.dim(0); + AffineExpr H = m.dim(1); + AffineExpr W = m.dim(2); + AffineExpr C = m.dim(3); + AffineExpr h = m.dim(4); + AffineExpr w = m.dim(5); + + return m.matchStride(/*iDim=*/1, /*fDim=*/0, /*oDim=*/1, /*idx=*/0) + .matchStride(/*iDim=*/2, /*fDim=*/1, /*oDim=*/2, /*idx=*/1) + .matchMaps({/*inputMap=*/{N, m.strided(H, h, 0), m.strided(W, w, 1), C}, + /*filterMap=*/{h, w, C}, + /*scalarMap=*/{}, + /*scalarMap=*/{}, + /*outputMap=*/{N, H, W, C}}) + .matchBody(/*zeroPointOffset=*/true); +} + +// #inputMap = affine_map<(N, H, W, C, CM, h, w) -> (N, H + h, W + w, C)> +// #filterMap = affine_map<(N, H, W, C, CM, h, w) -> (h, w, C, CM)> +// #outputMap = affine_map<(N, H, W, C, CM, h, w) -> (N, H, W, C, CM)> +template <> +bool isaConvolutionOpOfType( + LinalgOp op, SmallVector *dilations, + SmallVector *strides) { + if (isa(op)) + return true; + + assert(isaConvolutionOpInterface(op) && + "expected op to implement ConvolutionOpInterface"); + + ConvMatcherBuilder m(op, /*spatialRank=*/2, dilations, strides); + AffineExpr N = m.dim(0); + AffineExpr H = m.dim(1); + AffineExpr W = m.dim(2); + AffineExpr C = m.dim(3); + AffineExpr CM = m.dim(4); + AffineExpr h = m.dim(5); + AffineExpr w = m.dim(6); + + return m.matchStride(/*iDim=*/1, /*fDim=*/0, /*oDim=*/1, /*idx=*/0) + .matchStride(/*iDim=*/2, /*fDim=*/1, /*oDim=*/2, /*idx=*/1) + .matchMaps({/*inputMap=*/{N, m.strided(H, h, 0), m.strided(W, w, 1), C}, + /*filterMap=*/{h, w, C, CM}, + /*outputMap=*/{N, H, W, C, CM}}) + .matchBody(); +} + +// #inputMap = affine_map<(N, H, W, C, CM, h, w) -> (N, H + h, W + w, C)> +// #filterMap = affine_map<(N, H, W, C, CM, h, w) -> (h, w, C, CM)> +// #scalarMap = affine_map<(N, H, W, C, CM, h, w) -> ()> +// #outputMap = affine_map<(N, H, W, C, CM, h, w) -> (N, H, W, C, CM)> +template <> +bool isaConvolutionOpOfType( + LinalgOp op, SmallVector *dilations, + SmallVector *strides) { + if (isa(op)) + return true; + + assert(isaConvolutionOpInterface(op) && + "expected op to implement ConvolutionOpInterface"); + + ConvMatcherBuilder m(op, /*spatialRank=*/2, dilations, strides); + AffineExpr N = m.dim(0); + AffineExpr H = m.dim(1); + AffineExpr W = m.dim(2); + AffineExpr C = m.dim(3); + AffineExpr CM = m.dim(4); + AffineExpr h = m.dim(5); + AffineExpr w = m.dim(6); + + return m.matchStride(/*iDim=*/1, /*fDim=*/0, /*oDim=*/1, /*idx=*/0) + .matchStride(/*iDim=*/2, /*fDim=*/1, /*oDim=*/2, /*idx=*/1) + .matchMaps({/*inputMap=*/{N, m.strided(H, h, 0), m.strided(W, w, 1), C}, + /*filterMap=*/{h, w, C, CM}, + /*scalarMap=*/{}, + /*scalarMap=*/{}, + /*outputMap=*/{N, H, W, C, CM}}) + .matchBody(/*zeroPointOffset=*/true); +} + // #inputMap = affine_map<(N, D, H, W, CM, d, h, w, C) // -> (N, D + d, H + h, W + w, C)> // #filterMap = affine_map<(N, D, H, W, CM, d, h, w, C) diff --git a/mlir/lib/Dialect/SCF/IR/ValueBoundsOpInterfaceImpl.cpp b/mlir/lib/Dialect/SCF/IR/ValueBoundsOpInterfaceImpl.cpp index 410a6bffd345e..496a7b036e65d 100644 --- a/mlir/lib/Dialect/SCF/IR/ValueBoundsOpInterfaceImpl.cpp +++ b/mlir/lib/Dialect/SCF/IR/ValueBoundsOpInterfaceImpl.cpp @@ -17,19 +17,36 @@ namespace mlir { namespace scf { namespace { +static AffineExpr getTripCountExpr(OpFoldResult lb, OpFoldResult ub, + OpFoldResult step, + ValueBoundsConstraintSet &cstr) { + AffineExpr lbExpr = cstr.getExpr(lb); + AffineExpr ubExpr = cstr.getExpr(ub); + AffineExpr stepExpr = cstr.getExpr(step); + AffineExpr tripCountExpr = + AffineExpr(ubExpr - lbExpr).ceilDiv(stepExpr); // (ub - lb) / step + return tripCountExpr; +} + +static void populateIVBounds(OpFoldResult lb, OpFoldResult ub, + OpFoldResult step, Value iv, + ValueBoundsConstraintSet &cstr) { + cstr.bound(iv) >= cstr.getExpr(lb); + cstr.bound(iv) < cstr.getExpr(ub); + // iv <= lb + ((ub-lb)/step - 1) * step + // This bound does not replace the `iv < ub` constraint mentioned above, + // since constraints involving the multiplication of two constraint set + // dimensions are not supported. + AffineExpr tripCountMinusOne = + getTripCountExpr(lb, ub, step, cstr) - cstr.getExpr(1); + AffineExpr computedUpperBound = + cstr.getExpr(lb) + AffineExpr(tripCountMinusOne * cstr.getExpr(step)); + cstr.bound(iv) <= computedUpperBound; +} + struct ForOpInterface : public ValueBoundsOpInterface::ExternalModel { - static AffineExpr getTripCountExpr(scf::ForOp forOp, - ValueBoundsConstraintSet &cstr) { - AffineExpr lbExpr = cstr.getExpr(forOp.getLowerBound()); - AffineExpr ubExpr = cstr.getExpr(forOp.getUpperBound()); - AffineExpr stepExpr = cstr.getExpr(forOp.getStep()); - AffineExpr tripCountExpr = - AffineExpr(ubExpr - lbExpr).ceilDiv(stepExpr); // (ub - lb) / step - return tripCountExpr; - } - /// Populate bounds of values/dimensions for iter_args/OpResults. If the /// value/dimension size does not change in an iteration, we can deduce that /// it the same as the initial value/dimension. @@ -87,7 +104,8 @@ struct ForOpInterface // `value` is result of `forOp`, we can prove that: // %result == %init_arg + trip_count * (%yielded_value - %iter_arg). // Where trip_count is (ub - lb) / step. - AffineExpr tripCountExpr = getTripCountExpr(forOp, cstr); + AffineExpr tripCountExpr = getTripCountExpr( + forOp.getLowerBound(), forOp.getUpperBound(), forOp.getStep(), cstr); AffineExpr oneIterAdvanceExpr = cstr.getExpr(yieldedValue) - cstr.getExpr(iterArg); cstr.bound(value) == @@ -99,19 +117,8 @@ struct ForOpInterface auto forOp = cast(op); if (value == forOp.getInductionVar()) { - cstr.bound(value) >= forOp.getLowerBound(); - cstr.bound(value) < forOp.getUpperBound(); - // iv <= lb + ((ub-lb)/step - 1) * step - // This bound does not replace the `iv < ub` constraint mentioned above, - // since constraints involving the multiplication of two constraint set - // dimensions are not supported. - AffineExpr tripCountMinusOne = - getTripCountExpr(forOp, cstr) - cstr.getExpr(1); - AffineExpr computedUpperBound = - cstr.getExpr(forOp.getLowerBound()) + - AffineExpr(tripCountMinusOne * cstr.getExpr(forOp.getStep())); - cstr.bound(value) <= computedUpperBound; - return; + return populateIVBounds(forOp.getLowerBound(), forOp.getUpperBound(), + forOp.getStep(), value, cstr); } // Handle iter_args and OpResults. @@ -141,11 +148,9 @@ struct ForallOpInterface assert(blockArg.getArgNumber() < forallOp.getInductionVars().size() && "expected index value to be an induction var"); int64_t idx = blockArg.getArgNumber(); - // TODO: Take into account step size. - AffineExpr lb = cstr.getExpr(forallOp.getMixedLowerBound()[idx]); - AffineExpr ub = cstr.getExpr(forallOp.getMixedUpperBound()[idx]); - cstr.bound(value) >= lb; - cstr.bound(value) < ub; + return populateIVBounds(forallOp.getMixedLowerBound()[idx], + forallOp.getMixedUpperBound()[idx], + forallOp.getMixedStep()[idx], value, cstr); } void populateBoundsForShapedValueDim(Operation *op, Value value, int64_t dim, diff --git a/mlir/test/Conversion/NVVMToLLVM/nvvm-to-llvm.mlir b/mlir/test/Conversion/NVVMToLLVM/nvvm-to-llvm.mlir index 8fb36ace2c463..c4b8e93b6a9f9 100644 --- a/mlir/test/Conversion/NVVMToLLVM/nvvm-to-llvm.mlir +++ b/mlir/test/Conversion/NVVMToLLVM/nvvm-to-llvm.mlir @@ -678,21 +678,6 @@ llvm.func @inline_ptx_multi_rw_r(%a : i32, %b : i32, %rw_c : f32, %rw_d : f32) llvm.return %r5 : f32 } - -// ----- - -// CHECK-LABEL: @nvvm_pmevent -llvm.func @nvvm_pmevent() { - // CHECK: %[[S0:.+]] = llvm.mlir.constant(10 : i32) : i32 - // CHECK: llvm.inline_asm has_side_effects asm_dialect = att "pmevent $0;", "n" %[[S0]] : (i32) -> () - - nvvm.pmevent id = 10 - // CHECK: %[[S1:.+]] = llvm.mlir.constant(4 : i32) : i32 - // CHECK: llvm.inline_asm has_side_effects asm_dialect = att "pmevent $0;", "n" %[[S1]] : (i32) -> () - nvvm.pmevent id = 4 - llvm.return -} - // ----- llvm.func @inline_ptx_pack_4i8(%src : vector<4xi8>, %mask : i32, %zero: i32) { diff --git a/mlir/test/Dialect/Linalg/convolution/roundtrip-convolution.mlir b/mlir/test/Dialect/Linalg/convolution/roundtrip-convolution.mlir index 4b2d42a3ae4e0..432fdd12f540d 100644 --- a/mlir/test/Dialect/Linalg/convolution/roundtrip-convolution.mlir +++ b/mlir/test/Dialect/Linalg/convolution/roundtrip-convolution.mlir @@ -5,8 +5,9 @@ // RUN: mlir-opt %s -linalg-generalize-named-ops | mlir-opt --linalg-specialize-generic-ops | FileCheck %s --implicit-check-not=linalg.generic // ----------------------------- -// Convolution ops. +// Convolution ops - 1D. // ----------------------------- + func.func @conv_1d(%in : tensor, %filter : tensor, %out : tensor) -> tensor { %0 = linalg.conv_1d ins(%in, %filter : tensor, tensor) @@ -44,6 +45,10 @@ func.func @conv_1d_ncw_fcw(%input: tensor, %filter: tensor // ----- +// ----------------------------- +// Convolution ops - 2D. +// ----------------------------- + func.func @conv_2d(%in : tensor, %filter : tensor, %out : tensor) -> tensor { %0 = linalg.conv_2d ins(%in, %filter : tensor, tensor) @@ -55,6 +60,153 @@ func.func @conv_2d(%in : tensor, %filter : tensor, %out : tens // ----- +func.func @conv_2d_nhwc_hwcf(%input: tensor, %filter: tensor, %output: tensor) -> tensor { + %0 = linalg.conv_2d_nhwc_hwcf + {dilations = dense<2> : tensor<2xi64>, strides = dense<3> : tensor<2xi64>} + ins (%input, %filter: tensor, tensor) + outs (%output: tensor) -> tensor + return %0 : tensor +} +// CHECK: @conv_2d_nhwc_hwcf +// CHECK: linalg.conv_2d_nhwc_hwcf +// CHECK-SAME: dilations = dense<2> : tensor<2xi64>, strides = dense<3> : tensor<2xi64> + +// ----- + +func.func @conv_2d_nhwc_hwcf_q(%input: tensor, %filter: tensor, %output: tensor, %zp_input: i32, %zp_filter: i32) -> tensor { + %0 = linalg.conv_2d_nhwc_hwcf_q + {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} + ins (%input, %filter, %zp_input, %zp_filter : tensor, tensor, i32, i32) + outs (%output: tensor) -> tensor + return %0 : tensor +} +// CHECK: @conv_2d_nhwc_hwcf_q +// CHECK: linalg.conv_2d_nhwc_hwcf_q +// CHECK-SAME: dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64> + +// ----- + +func.func @conv_2d_nhwc_fhwc(%input: tensor, %filter: tensor, %output: tensor) -> tensor { + %0 = linalg.conv_2d_nhwc_fhwc + {dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} + ins (%input, %filter: tensor, tensor) + outs (%output: tensor) -> tensor + return %0 : tensor +} +// CHECK: @conv_2d_nhwc_fhwc +// CHECK: linalg.conv_2d_nhwc_fhwc +// CHECK-SAME: dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64> + +// ----- + +func.func @conv_2d_nhwc_fhwc_q(%input: tensor, %filter: tensor, %output: tensor, %zp_input: i32, %zp_filter: i32) -> tensor { + %0 = linalg.conv_2d_nhwc_fhwc_q + {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} + ins (%input, %filter, %zp_input, %zp_filter : tensor, tensor, i32, i32) + outs (%output: tensor) -> tensor + return %0 : tensor +} +// CHECK: @conv_2d_nhwc_fhwc_q +// CHECK: linalg.conv_2d_nhwc_fhwc_q +// CHECK-SAME: dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64> + +// ----- + +func.func @conv_2d_nchw_fchw(%input: tensor, %filter: tensor, %output: tensor) -> tensor { + %0 = linalg.conv_2d_nchw_fchw + {dilations = dense<[1, 2]> : tensor<2xi64>, strides = dense<[3, 4]> : tensor<2xi64>} + ins (%input, %filter: tensor, tensor) + outs (%output: tensor) -> tensor + return %0 : tensor +} +// CHECK: @conv_2d_nchw_fchw +// CHECK: linalg.conv_2d_nchw_fchw +// CHECK-SAME: dilations = dense<[1, 2]> : tensor<2xi64>, strides = dense<[3, 4]> : tensor<2xi64> + +// ----- + +func.func @conv_2d_nchw_fchw_q(%input: tensor, %filter: tensor, %output: tensor, %zp_input: i32, %zp_filter: i32) -> tensor { + %0 = linalg.conv_2d_nchw_fchw_q + {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} + ins (%input, %filter, %zp_input, %zp_filter : tensor, tensor, i32, i32) + outs (%output: tensor) -> tensor + return %0 : tensor +} +// CHECK: @conv_2d_nchw_fchw_q +// CHECK: linalg.conv_2d_nchw_fchw_q +// CHECK-SAME: dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64> + +// ----- + +func.func @conv_2d_ngchw_fgchw(%input: tensor, %filter: tensor, %output: tensor) -> tensor { + %0 = linalg.conv_2d_ngchw_fgchw + {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} + ins (%input, %filter: tensor, tensor) + outs (%output: tensor) -> tensor + return %0 : tensor +} +// CHECK: @conv_2d_ngchw_fgchw +// CHECK: linalg.conv_2d_ngchw_fgchw +// CHECK-SAME: dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64> + +// ----- + +func.func @conv_2d_ngchw_gfchw(%input: tensor, %filter: tensor, %output: tensor) -> tensor { + %0 = linalg.conv_2d_ngchw_gfchw + {dilations = dense<2> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} + ins (%input, %filter: tensor, tensor) + outs (%output: tensor) -> tensor + return %0 : tensor +} +// CHECK: @conv_2d_ngchw_gfchw +// CHECK: linalg.conv_2d_ngchw_gfchw +// CHECK-SAME: dilations = dense<2> : tensor<2xi64>, strides = dense<1> : tensor<2xi64> + +// ----- + +func.func @conv_2d_ngchw_gfchw_q(%input: tensor, %filter: tensor, %output: tensor, %zp_input: i32, %zp_filter: i32) -> tensor { + %0 = linalg.conv_2d_ngchw_gfchw_q + {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} + ins (%input, %filter, %zp_input, %zp_filter : tensor, tensor, i32, i32) + outs (%output: tensor) -> tensor + return %0 : tensor +} +// CHECK: @conv_2d_ngchw_gfchw_q +// CHECK: linalg.conv_2d_ngchw_gfchw_q +// CHECK-SAME: dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64> + +// ----- + +func.func @conv_2d_nhwgc_gfhwc(%input: tensor, %filter: tensor, %output: tensor) -> tensor { + %0 = linalg.conv_2d_nhwgc_gfhwc + {dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} + ins (%input, %filter: tensor, tensor) + outs (%output: tensor) -> tensor + return %0 : tensor +} +// CHECK: @conv_2d_nhwgc_gfhwc +// CHECK: linalg.conv_2d_nhwgc_gfhwc +// CHECK-SAME: dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64> + +// ----- + +func.func @conv_2d_nhwgc_gfhwc_q(%input: tensor, %filter: tensor, %output: tensor, %zp_input: i32, %zp_filter: i32) -> tensor { + %0 = linalg.conv_2d_nhwgc_gfhwc_q + {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} + ins (%input, %filter, %zp_input, %zp_filter : tensor, tensor, i32, i32) + outs (%output: tensor) -> tensor + return %0 : tensor +} +// CHECK: @conv_2d_nhwgc_gfhwc_q +// CHECK: linalg.conv_2d_nhwgc_gfhwc_q +// CHECK-SAME: dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64> + +// ----- + +// ----------------------------- +// Convolution ops - 3D. +// ----------------------------- + func.func @conv_3d(%in : tensor, %filter : tensor, %out : tensor) -> tensor { %0 = linalg.conv_3d ins(%in, %filter : tensor, tensor) @@ -66,9 +218,10 @@ func.func @conv_3d(%in : tensor, %filter : tensor, %out : // ----- -// ----------------------------- -// Depthwise Convolution ops. -// ----------------------------- +// ------------------------------- +// Depthwise Convolution ops - 1D. +// ------------------------------- + func.func @depthwise_conv_1d_ncw_cw(%input: tensor, %filter: tensor, %output: tensor) -> tensor { %0 = linalg.depthwise_conv_1d_ncw_cw {dilations = dense<3> : tensor<1xi64>, strides = dense<2> : tensor<1xi64>} @@ -108,6 +261,10 @@ func.func @depthwise_conv_1d_nwc_wcm(%input: tensor, %filter: tensor< // ----- +// ------------------------------- +// Depthwise Convolution ops - 2D. +// ------------------------------- + func.func @depthwise_conv_2d_nchw_chw(%input: tensor, %filter: tensor, %output: tensor) -> tensor { %0 = linalg.depthwise_conv_2d_nchw_chw {dilations = dense<[2,3]> : vector<2xi64>, strides = dense<[4,5]> : vector<2xi64>} @@ -121,6 +278,62 @@ func.func @depthwise_conv_2d_nchw_chw(%input: tensor, %filter: tens // ----- +func.func @depthwise_conv_2d_nhwc_hwc(%input: tensor, %filter: tensor, %output: tensor) -> tensor { + %0 = linalg.depthwise_conv_2d_nhwc_hwc + {dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} + ins (%input, %filter: tensor, tensor) + outs (%output: tensor) -> tensor + return %0 : tensor +} +// CHECK: @depthwise_conv_2d_nhwc_hwc +// CHECK: linalg.depthwise_conv_2d_nhwc_hwc +// CHECK-SAME: dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64> + +// ----- + +func.func @depthwise_conv_2d_nhwc_hwc_q(%input: tensor, %filter: tensor, %output: tensor, %zp_input: i32, %zp_filter: i32) -> tensor { + %0 = linalg.depthwise_conv_2d_nhwc_hwc_q + {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} + ins (%input, %filter, %zp_input, %zp_filter : tensor, tensor, i32, i32) + outs (%output: tensor) -> tensor + return %0 : tensor +} +// CHECK: @depthwise_conv_2d_nhwc_hwc_q +// CHECK: linalg.depthwise_conv_2d_nhwc_hwc_q +// CHECK-SAME: dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64> + +// ----- + +func.func @depthwise_conv_2d_nhwc_hwcm(%input: tensor, %filter: tensor, %output: tensor) -> tensor { + %0 = linalg.depthwise_conv_2d_nhwc_hwcm + {dilations = dense<[1, 2]> : tensor<2xi64>, strides = dense<[3, 1]> : tensor<2xi64>} + ins (%input, %filter: tensor, tensor) + outs (%output: tensor) -> tensor + return %0 : tensor +} +// CHECK: @depthwise_conv_2d_nhwc_hwcm +// CHECK: linalg.depthwise_conv_2d_nhwc_hwcm +// CHECK-SAME: dilations = dense<[1, 2]> : tensor<2xi64>, strides = dense<[3, 1]> : tensor<2xi64> + +// ----- + +func.func @depthwise_conv_2d_nhwc_hwcm_q(%input: tensor, %filter: tensor, %output: tensor, %zp_input: i32, %zp_filter: i32) -> tensor { + %0 = linalg.depthwise_conv_2d_nhwc_hwcm_q + {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} + ins (%input, %filter, %zp_input, %zp_filter : tensor, tensor, i32, i32) + outs (%output: tensor) -> tensor + return %0 : tensor +} +// CHECK: @depthwise_conv_2d_nhwc_hwcm_q +// CHECK: linalg.depthwise_conv_2d_nhwc_hwcm_q +// CHECK-SAME: dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64> + +// ----- + +// ------------------------------- +// Depthwise Convolution ops - 3D. +// ------------------------------- + func.func @depthwise_conv_3d_ndhwc_dhwcm(%input: tensor, %filter: tensor, %output: tensor) -> tensor { %0 = linalg.depthwise_conv_3d_ndhwc_dhwcm {dilations = dense<1> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>} @@ -137,6 +350,7 @@ func.func @depthwise_conv_3d_ndhwc_dhwcm(%input: tensor, %filter: // ----------------------------- // Pooling ops. // ----------------------------- + func.func @pooling_nhwc_max(%input: tensor, %filter: tensor, %output: tensor) -> tensor { %0 = linalg.pooling_nhwc_max {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} diff --git a/mlir/test/Dialect/SCF/value-bounds-op-interface-impl.mlir b/mlir/test/Dialect/SCF/value-bounds-op-interface-impl.mlir index 339d97df001c5..60fe96d52d20b 100644 --- a/mlir/test/Dialect/SCF/value-bounds-op-interface-impl.mlir +++ b/mlir/test/Dialect/SCF/value-bounds-op-interface-impl.mlir @@ -379,3 +379,12 @@ func.func @scf_for_result_infer_dynamic_init_big_step(%i : index) { "test.compare"(%0, %7) {cmp = "LE"} : (index, index) -> () return } + +func.func @scf_forall_computed_upper_bound(%x: index) { + %c6 = arith.constant 6 : index + scf.forall (%iv) = (0) to (8) step (3) { + // expected-remark @below{{true}} + "test.compare"(%iv, %c6) {cmp = "LE"} : (index, index) -> () + } + return +} diff --git a/mlir/test/Target/LLVMIR/nvvm/pm_event.mlir b/mlir/test/Target/LLVMIR/nvvm/pm_event.mlir new file mode 100644 index 0000000000000..0092d32319a83 --- /dev/null +++ b/mlir/test/Target/LLVMIR/nvvm/pm_event.mlir @@ -0,0 +1,23 @@ +// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s + +llvm.func @nvvm_pmevent_mask() { + // CHECK-LABEL: define void @nvvm_pmevent_mask() { + // CHECK-NEXT: call void @llvm.nvvm.pm.event.mask(i16 15000) + // CHECK-NEXT: call void @llvm.nvvm.pm.event.mask(i16 4) + // CHECK-NEXT: ret void + // CHECK-NEXT: } + nvvm.pmevent mask = 15000 + nvvm.pmevent mask = 4 + llvm.return +} + +llvm.func @nvvm_pmevent_id() { + // CHECK-LABEL: define void @nvvm_pmevent_id() { + // CHECK-NEXT: call void @llvm.nvvm.pm.event.mask(i16 1024) + // CHECK-NEXT: call void @llvm.nvvm.pm.event.mask(i16 16) + // CHECK-NEXT: ret void + // CHECK-NEXT: } + nvvm.pmevent id = 10 + nvvm.pmevent id = 4 + llvm.return +} diff --git a/mlir/test/Target/LLVMIR/nvvm/pm_event_invalid.mlir b/mlir/test/Target/LLVMIR/nvvm/pm_event_invalid.mlir new file mode 100644 index 0000000000000..783988fb36368 --- /dev/null +++ b/mlir/test/Target/LLVMIR/nvvm/pm_event_invalid.mlir @@ -0,0 +1,21 @@ +// RUN: mlir-translate -verify-diagnostics -split-input-file -mlir-to-llvmir %s + +llvm.func @pmevent_no_id() { + // expected-error @below {{either `id` or `mask` must be set}} + nvvm.pmevent +} + +// ----- + +llvm.func @pmevent_bigger15() { + // expected-error @below {{`id` must be between 0 and 15}} + nvvm.pmevent id = 16 +} + +// ----- + +llvm.func @pmevent_many_ids() { + // expected-error @below {{`id` and `mask` cannot be set at the same time}} + nvvm.pmevent id = 1 mask = 1 +} + diff --git a/mlir/test/Target/LLVMIR/nvvmir-invalid.mlir b/mlir/test/Target/LLVMIR/nvvmir-invalid.mlir index d5868ee73cc50..c0fe0fa11f497 100644 --- a/mlir/test/Target/LLVMIR/nvvmir-invalid.mlir +++ b/mlir/test/Target/LLVMIR/nvvmir-invalid.mlir @@ -1,26 +1,5 @@ // RUN: mlir-translate -verify-diagnostics -split-input-file -mlir-to-llvmir %s -llvm.func @pmevent_no_id() { - // expected-error @below {{either `id` or `mask` must be set}} - nvvm.pmevent -} - -// ----- - -llvm.func @pmevent_bigger15() { - // expected-error @below {{`id` must be between 0 and 15}} - nvvm.pmevent id = 141 -} - -// ----- - -llvm.func @pmevent_many_ids() { - // expected-error @below {{`id` and `mask` cannot be set at the same time}} - nvvm.pmevent id = 1 mask = 1 -} - -// ----- - llvm.func @kernel_func(%numberOfThreads : i32) { // expected-error @below {{'nvvm.barrier' op barrier id is missing, it should be set between 0 to 15}} nvvm.barrier number_of_threads = %numberOfThreads diff --git a/mlir/test/Target/LLVMIR/nvvmir.mlir b/mlir/test/Target/LLVMIR/nvvmir.mlir index c4a69097692cb..9e4aadac69896 100644 --- a/mlir/test/Target/LLVMIR/nvvmir.mlir +++ b/mlir/test/Target/LLVMIR/nvvmir.mlir @@ -903,17 +903,6 @@ llvm.func @nvvm_dot_accumulate_2way(%a: vector<2xi16>, %b: vector<4xi8>, %c: i32 // ----- -// CHECK-LABEL: @nvvm_pmevent -llvm.func @nvvm_pmevent() { - // CHECK: call void @llvm.nvvm.pm.event.mask(i16 15000) - nvvm.pmevent mask = 15000 - // CHECK: call void @llvm.nvvm.pm.event.mask(i16 4) - nvvm.pmevent mask = 4 - llvm.return -} - -// ----- - // CHECK-LABEL: @nanosleep llvm.func @nanosleep(%duration: i32) { // CHECK: call void @llvm.nvvm.nanosleep(i32 %{{.*}})