diff --git a/CHANGELOG.md b/CHANGELOG.md index 312df60..b2a0a83 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,9 @@ - **(BREAKING)** Updated to ANTLR runtime `4.13.2`, using the ANTLR tool to generate both target languages. +- Enabled `caseInsensitive` option for lexers (added in ANTLR 4.10). + - As a result, `CaseInsensitiveInputStream` is deprecated and should no longer be required. + - Added `ApexParserFactory` class to create parsers, token streams, and lexers. - Primarily for TS to avoid directly creating `antlr4` class instances. - In Java, it still requires passing a `CharStream` or `CommonTokenStream` to create parsers. @@ -39,7 +42,7 @@ - **(BREAKING)** Updated output to `ES2020` and increased min node version to 16. -- `CaseInsensitiveInputStream` type now extends `CharStream` and can be constructed from `string`. +- `CaseInsensitiveInputStream` (deprecated) type now extends `CharStream` and can be constructed from `string`. - Constructor passing in `CharStream` retained to match Java version. - Removed `node-dir` dependency - replaced with node fs api. diff --git a/README.md b/README.md index 413432c..4d410ff 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ Parser for Salesforce Apex (including Triggers & inline SOQL/SOQL). This is based on an [ANTLR4](https://www.antlr.org/) grammar, see [`antlr/BaseApexParser.g4`](./antlr/BaseApexParser.g4). Currently packaged for Java and JavaScript/TypeScript targets. -With the ANTLR4 generated types, a `CaseInsensitiveInputStream` is included (and required) for the lexer. Type aliases and abstractions like `ApexParserFactory` and `ApexErrorListener` are also available for quick start. There are minimal examples in the test classes. +The packages include ANTLR4 generated types plus optional extras for convenience. The TypeScript package exports type aliases for ANTLR types, while both packages have abstractions like `ApexParserFactory` and `ApexErrorListener`. There are minimal examples below and in the test classes. ## Installation diff --git a/antlr/BaseApexLexer.g4 b/antlr/BaseApexLexer.g4 index 82df5d3..96c304b 100644 --- a/antlr/BaseApexLexer.g4 +++ b/antlr/BaseApexLexer.g4 @@ -299,16 +299,16 @@ IntegerLiteral ; LongLiteral - : Digit Digit* [lL] + : Digit Digit* [l] ; NumberLiteral - : Digit* '.' Digit Digit* [dD]? + : Digit* '.' Digit Digit* [d]? ; fragment HexCharacter - : [0-9a-fA-F] + : [0-9a-f] ; fragment @@ -424,7 +424,7 @@ Identifier // so we can give better error messages fragment JavaLetter - : [a-zA-Z$_] // these are the "java letters" below 0xFF + : [a-z$_] // these are the "java letters" below 0xFF | // covers all characters above 0xFF which are not a surrogate ~[\u0000-\u00FF\uD800-\uDBFF] | // covers UTF-16 surrogate pairs encodings for U+10000 to U+10FFFF @@ -433,7 +433,7 @@ JavaLetter fragment JavaLetterOrDigit - : [a-zA-Z0-9$_] // these are the "java letters or digits" below 0xFF + : [a-z0-9$_] // these are the "java letters or digits" below 0xFF | // covers all characters above 0xFF which are not a surrogate ~[\u0000-\u00FF\uD800-\uDBFF] | // covers UTF-16 surrogate pairs encodings for U+10000 to U+10FFFF diff --git a/jvm/antlr/ApexLexer.g4 b/jvm/antlr/ApexLexer.g4 index 4361415..338b403 100644 --- a/jvm/antlr/ApexLexer.g4 +++ b/jvm/antlr/ApexLexer.g4 @@ -1,7 +1,8 @@ lexer grammar ApexLexer; +options { caseInsensitive = true; } @lexer::members { -public void clearCache() {_interp.clearDFA();} +public void clearCache() { _interp.clearDFA(); } } import BaseApexLexer; diff --git a/jvm/src/main/java/io/github/apexdevtools/apexparser/ApexParserFactory.java b/jvm/src/main/java/io/github/apexdevtools/apexparser/ApexParserFactory.java index 564f114..3f381af 100644 --- a/jvm/src/main/java/io/github/apexdevtools/apexparser/ApexParserFactory.java +++ b/jvm/src/main/java/io/github/apexdevtools/apexparser/ApexParserFactory.java @@ -51,6 +51,10 @@ public static CommonTokenStream createTokenStream(CharStream stream) { } public static ApexLexer createLexer(CharStream stream) { - return new ApexLexer(new CaseInsensitiveInputStream(stream)); + ApexLexer lexer = new ApexLexer(stream); + + // always remove default console listener + lexer.removeErrorListeners(); + return lexer; } } diff --git a/jvm/src/main/java/io/github/apexdevtools/apexparser/CaseInsensitiveInputStream.java b/jvm/src/main/java/io/github/apexdevtools/apexparser/CaseInsensitiveInputStream.java index b473a58..6e95393 100644 --- a/jvm/src/main/java/io/github/apexdevtools/apexparser/CaseInsensitiveInputStream.java +++ b/jvm/src/main/java/io/github/apexdevtools/apexparser/CaseInsensitiveInputStream.java @@ -37,8 +37,12 @@ INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT /** * ANTLR4 stream handler that allows use of case insensitive handling. + * + * @deprecated since 5.0.0. ApexLexer is generated with ANTLR 4.10 + * `caseInsensitive` option enabled. */ -@SuppressWarnings({ "unused" }) +@SuppressWarnings("all") +@Deprecated public class CaseInsensitiveInputStream implements CharStream { private final CharStream src; diff --git a/jvm/src/test/java/io/github/apexdevtools/apexparser/ApexLexerTest.java b/jvm/src/test/java/io/github/apexdevtools/apexparser/ApexLexerTest.java index d3e4f2d..67c591e 100644 --- a/jvm/src/test/java/io/github/apexdevtools/apexparser/ApexLexerTest.java +++ b/jvm/src/test/java/io/github/apexdevtools/apexparser/ApexLexerTest.java @@ -25,8 +25,7 @@ public class ApexLexerTest { @Test void testLexerGeneratesTokens() { Map.Entry lexerAndCounter = createLexer( - "public class Hello {}", - false + "public class Hello {}" ); CommonTokenStream tokens = new CommonTokenStream(lexerAndCounter.getKey()); assertEquals(6, tokens.getNumberOfOnChannelTokens()); @@ -36,8 +35,7 @@ void testLexerGeneratesTokens() { @Test void testCaseInsensitivityLowerCase() { Map.Entry lexerAndCounter = createLexer( - "public", - true + "public" ); CommonTokenStream tokens = new CommonTokenStream(lexerAndCounter.getKey()); assertEquals(2, tokens.getNumberOfOnChannelTokens()); @@ -47,8 +45,7 @@ void testCaseInsensitivityLowerCase() { @Test void testCaseInsensitivityUpperCase() { Map.Entry lexerAndCounter = createLexer( - "PUBLIC", - true + "PUBLIC" ); CommonTokenStream tokens = new CommonTokenStream(lexerAndCounter.getKey()); assertEquals(2, tokens.getNumberOfOnChannelTokens()); @@ -58,19 +55,33 @@ void testCaseInsensitivityUpperCase() { @Test void testCaseInsensitivityMixedCase() { Map.Entry lexerAndCounter = createLexer( - "PuBliC", - true + "PuBliC" ); CommonTokenStream tokens = new CommonTokenStream(lexerAndCounter.getKey()); assertEquals(2, tokens.getNumberOfOnChannelTokens()); assertEquals(0, lexerAndCounter.getValue().getNumErrors()); } + @Test + @SuppressWarnings("deprecation") + void testCaseInsensitivityDeprecated() { + // intentional testing deprecated type backward compat + ApexLexer lexer = new ApexLexer( + new CaseInsensitiveInputStream(CharStreams.fromString("PuBliC")) + ); + lexer.removeErrorListeners(); + SyntaxErrorCounter errorCounter = new SyntaxErrorCounter(); + lexer.addErrorListener(errorCounter); + + CommonTokenStream tokens = new CommonTokenStream(lexer); + assertEquals(2, tokens.getNumberOfOnChannelTokens()); + assertEquals(0, errorCounter.getNumErrors()); + } + @Test void testLexerUnicodeEscapes() { Map.Entry lexerAndCounter = createLexer( - "'Fran\\u00E7ois'", - false + "'Fran\\u00E7ois'" ); CommonTokenStream tokens = new CommonTokenStream(lexerAndCounter.getKey()); assertEquals(2, tokens.getNumberOfOnChannelTokens()); diff --git a/jvm/src/test/java/io/github/apexdevtools/apexparser/SyntaxErrorCounter.java b/jvm/src/test/java/io/github/apexdevtools/apexparser/SyntaxErrorCounter.java index 4c5106d..a70e438 100644 --- a/jvm/src/test/java/io/github/apexdevtools/apexparser/SyntaxErrorCounter.java +++ b/jvm/src/test/java/io/github/apexdevtools/apexparser/SyntaxErrorCounter.java @@ -38,15 +38,11 @@ public int getNumErrors() { } public static Map.Entry createLexer( - String input, - Boolean caseInsensitive + String input ) { - CharStream stream = CharStreams.fromString(input); - ApexLexer lexer = new ApexLexer( - caseInsensitive ? new CaseInsensitiveInputStream(stream) : stream + ApexLexer lexer = ApexParserFactory.createLexer( + CharStreams.fromString(input) ); - - lexer.removeErrorListeners(); SyntaxErrorCounter errorCounter = new SyntaxErrorCounter(); lexer.addErrorListener(errorCounter); diff --git a/npm/antlr/ApexLexer.g4 b/npm/antlr/ApexLexer.g4 index debb38a..2632e4c 100644 --- a/npm/antlr/ApexLexer.g4 +++ b/npm/antlr/ApexLexer.g4 @@ -1,3 +1,4 @@ lexer grammar ApexLexer; +options { caseInsensitive = true; } import BaseApexLexer; diff --git a/npm/src/ApexParserFactory.ts b/npm/src/ApexParserFactory.ts index 2e10411..167a563 100644 --- a/npm/src/ApexParserFactory.ts +++ b/npm/src/ApexParserFactory.ts @@ -27,6 +27,7 @@ */ import { + CharStreams, CommonTokenStream, ErrorNode, ParserRuleContext, @@ -41,7 +42,6 @@ import { import ApexParserListener from "./antlr/ApexParserListener"; import ApexParserVisitor from "./antlr/ApexParserVisitor"; import ApexLexer from "./antlr/ApexLexer"; -import { CaseInsensitiveInputStream } from "./CaseInsensitiveInputStream"; import ApexParser from "./antlr/ApexParser"; import { ThrowingErrorListener } from "./ApexErrorListener"; @@ -85,7 +85,11 @@ export class ApexParserFactory { } static createLexer(source: string): ApexLexer { - return new ApexLexer(new CaseInsensitiveInputStream(source)); + const lexer = new ApexLexer(CharStreams.fromString(source)); + + // always remove default console listener + lexer.removeErrorListeners(); + return lexer; } } diff --git a/npm/src/CaseInsensitiveInputStream.ts b/npm/src/CaseInsensitiveInputStream.ts index 3af5938..050705d 100644 --- a/npm/src/CaseInsensitiveInputStream.ts +++ b/npm/src/CaseInsensitiveInputStream.ts @@ -28,6 +28,12 @@ import { CharStream } from "antlr4"; +/** + * Converts char stream to lower case for case insensitive usage. + * + * @deprecated since 5.0.0. ApexLexer is generated with ANTLR 4.10 + * `caseInsensitive` option enabled. + */ export class CaseInsensitiveInputStream extends CharStream { constructor(data: string, decodeToUnicodeCodePoints?: boolean); constructor(stream: CharStream, decodeToUnicodeCodePoints?: boolean); diff --git a/npm/src/__tests__/ApexLexerTest.ts b/npm/src/__tests__/ApexLexerTest.ts index 2d7ea03..730e713 100644 --- a/npm/src/__tests__/ApexLexerTest.ts +++ b/npm/src/__tests__/ApexLexerTest.ts @@ -12,7 +12,9 @@ derived from this software without specific prior written permission. */ import { CommonTokenStream } from "antlr4"; -import { createLexer } from "./SyntaxErrorCounter"; +import { createLexer, SyntaxErrorCounter } from "./SyntaxErrorCounter"; +import { CaseInsensitiveInputStream } from "../CaseInsensitiveInputStream"; +import ApexLexer from "../antlr/ApexLexer"; type ExtCommonTokenStream = CommonTokenStream & { // This method is present but not available @@ -21,7 +23,7 @@ type ExtCommonTokenStream = CommonTokenStream & { }; test("Lexer generates tokens", () => { - const [lexer, errorCounter] = createLexer("public class Hello {}", false); + const [lexer, errorCounter] = createLexer("public class Hello {}"); const tokens = new CommonTokenStream(lexer) as ExtCommonTokenStream; expect(tokens.getNumberOfOnChannelTokens()).toBe(6); expect(errorCounter.getNumErrors()).toEqual(0); @@ -48,8 +50,20 @@ test("Case insensitivity (mixed case)", () => { expect(errorCounter.getNumErrors()).toEqual(0); }); +test("Case insensitivity (deprecated stream)", () => { + // intentional testing deprecated type backward compat + const lexer = new ApexLexer(new CaseInsensitiveInputStream("PuBliC")); + lexer.removeErrorListeners(); + const errorCounter = new SyntaxErrorCounter(); + lexer.addErrorListener(errorCounter); + + const tokens = new CommonTokenStream(lexer) as ExtCommonTokenStream; + expect(tokens.getNumberOfOnChannelTokens()).toBe(2); + expect(errorCounter.getNumErrors()).toEqual(0); +}); + test("Lexer unicode escapes", () => { - const [lexer, errorCounter] = createLexer("'Fran\\u00E7ois'", false); + const [lexer, errorCounter] = createLexer("'Fran\\u00E7ois'"); const tokens = new CommonTokenStream(lexer) as ExtCommonTokenStream; expect(tokens.getNumberOfOnChannelTokens()).toBe(2); expect(errorCounter.getNumErrors()).toEqual(0); diff --git a/npm/src/__tests__/SyntaxErrorCounter.ts b/npm/src/__tests__/SyntaxErrorCounter.ts index 1418963..e7540f3 100644 --- a/npm/src/__tests__/SyntaxErrorCounter.ts +++ b/npm/src/__tests__/SyntaxErrorCounter.ts @@ -11,16 +11,9 @@ 3. The name of the author may not be used to endorse or promote products derived from this software without specific prior written permission. */ -import { - ErrorListener, - CharStreams, - RecognitionException, - Recognizer, - Token, -} from "antlr4"; +import { ErrorListener, RecognitionException, Recognizer, Token } from "antlr4"; import ApexLexer from "../antlr/ApexLexer"; import ApexParser from "../antlr/ApexParser"; -import { CaseInsensitiveInputStream } from "../CaseInsensitiveInputStream"; import { ApexParserFactory } from "../ApexParserFactory"; export class SyntaxErrorCounter extends ErrorListener { @@ -43,16 +36,9 @@ export class SyntaxErrorCounter extends ErrorListener { } export function createLexer( - input: string, - caseInsensitive: boolean = true + input: string ): [ApexLexer, SyntaxErrorCounter] { - const lexer = new ApexLexer( - caseInsensitive - ? new CaseInsensitiveInputStream(input) - : CharStreams.fromString(input) - ); - - lexer.removeErrorListeners(); + const lexer = ApexParserFactory.createLexer(input); const errorCounter = new SyntaxErrorCounter(); lexer.addErrorListener(errorCounter);