Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@

- **(BREAKING)** Updated to ANTLR runtime `4.13.2`, using the ANTLR tool to generate both target languages.

- Enabled `caseInsensitive` option for lexers (added in ANTLR 4.10).
- As a result, `CaseInsensitiveInputStream` is deprecated and should no longer be required.

- Added `ApexParserFactory` class to create parsers, token streams, and lexers.
- Primarily for TS to avoid directly creating `antlr4` class instances.
- In Java, it still requires passing a `CharStream` or `CommonTokenStream` to create parsers.
Expand Down Expand Up @@ -39,7 +42,7 @@

- **(BREAKING)** Updated output to `ES2020` and increased min node version to 16.

- `CaseInsensitiveInputStream` type now extends `CharStream` and can be constructed from `string`.
- `CaseInsensitiveInputStream` (deprecated) type now extends `CharStream` and can be constructed from `string`.
- Constructor passing in `CharStream` retained to match Java version.

- Removed `node-dir` dependency - replaced with node fs api.
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

Parser for Salesforce Apex (including Triggers & inline SOQL/SOQL). This is based on an [ANTLR4](https://www.antlr.org/) grammar, see [`antlr/BaseApexParser.g4`](./antlr/BaseApexParser.g4). Currently packaged for Java and JavaScript/TypeScript targets.

With the ANTLR4 generated types, a `CaseInsensitiveInputStream` is included (and required) for the lexer. Type aliases and abstractions like `ApexParserFactory` and `ApexErrorListener` are also available for quick start. There are minimal examples in the test classes.
The packages include ANTLR4 generated types plus optional extras for convenience. The TypeScript package exports type aliases for ANTLR types, while both packages have abstractions like `ApexParserFactory` and `ApexErrorListener`. There are minimal examples below and in the test classes.

## Installation

Expand Down
10 changes: 5 additions & 5 deletions antlr/BaseApexLexer.g4
Original file line number Diff line number Diff line change
Expand Up @@ -299,16 +299,16 @@ IntegerLiteral
;

LongLiteral
: Digit Digit* [lL]
: Digit Digit* [l]
;

NumberLiteral
: Digit* '.' Digit Digit* [dD]?
: Digit* '.' Digit Digit* [d]?
;

fragment
HexCharacter
: [0-9a-fA-F]
: [0-9a-f]
;

fragment
Expand Down Expand Up @@ -424,7 +424,7 @@ Identifier
// so we can give better error messages
fragment
JavaLetter
: [a-zA-Z$_] // these are the "java letters" below 0xFF
: [a-z$_] // these are the "java letters" below 0xFF
| // covers all characters above 0xFF which are not a surrogate
~[\u0000-\u00FF\uD800-\uDBFF]
| // covers UTF-16 surrogate pairs encodings for U+10000 to U+10FFFF
Expand All @@ -433,7 +433,7 @@ JavaLetter

fragment
JavaLetterOrDigit
: [a-zA-Z0-9$_] // these are the "java letters or digits" below 0xFF
: [a-z0-9$_] // these are the "java letters or digits" below 0xFF
| // covers all characters above 0xFF which are not a surrogate
~[\u0000-\u00FF\uD800-\uDBFF]
| // covers UTF-16 surrogate pairs encodings for U+10000 to U+10FFFF
Expand Down
3 changes: 2 additions & 1 deletion jvm/antlr/ApexLexer.g4
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
lexer grammar ApexLexer;
options { caseInsensitive = true; }

@lexer::members {
public void clearCache() {_interp.clearDFA();}
public void clearCache() { _interp.clearDFA(); }
}

import BaseApexLexer;
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,10 @@ public static CommonTokenStream createTokenStream(CharStream stream) {
}

public static ApexLexer createLexer(CharStream stream) {
return new ApexLexer(new CaseInsensitiveInputStream(stream));
ApexLexer lexer = new ApexLexer(stream);

// always remove default console listener
lexer.removeErrorListeners();
return lexer;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,12 @@ INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT

/**
* ANTLR4 stream handler that allows use of case insensitive handling.
*
* @deprecated since 5.0.0. ApexLexer is generated with ANTLR 4.10
* `caseInsensitive` option enabled.
*/
@SuppressWarnings({ "unused" })
@SuppressWarnings("all")
@Deprecated
public class CaseInsensitiveInputStream implements CharStream {

private final CharStream src;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,7 @@ public class ApexLexerTest {
@Test
void testLexerGeneratesTokens() {
Map.Entry<ApexLexer, SyntaxErrorCounter> lexerAndCounter = createLexer(
"public class Hello {}",
false
"public class Hello {}"
);
CommonTokenStream tokens = new CommonTokenStream(lexerAndCounter.getKey());
assertEquals(6, tokens.getNumberOfOnChannelTokens());
Expand All @@ -36,8 +35,7 @@ void testLexerGeneratesTokens() {
@Test
void testCaseInsensitivityLowerCase() {
Map.Entry<ApexLexer, SyntaxErrorCounter> lexerAndCounter = createLexer(
"public",
true
"public"
);
CommonTokenStream tokens = new CommonTokenStream(lexerAndCounter.getKey());
assertEquals(2, tokens.getNumberOfOnChannelTokens());
Expand All @@ -47,8 +45,7 @@ void testCaseInsensitivityLowerCase() {
@Test
void testCaseInsensitivityUpperCase() {
Map.Entry<ApexLexer, SyntaxErrorCounter> lexerAndCounter = createLexer(
"PUBLIC",
true
"PUBLIC"
);
CommonTokenStream tokens = new CommonTokenStream(lexerAndCounter.getKey());
assertEquals(2, tokens.getNumberOfOnChannelTokens());
Expand All @@ -58,19 +55,33 @@ void testCaseInsensitivityUpperCase() {
@Test
void testCaseInsensitivityMixedCase() {
Map.Entry<ApexLexer, SyntaxErrorCounter> lexerAndCounter = createLexer(
"PuBliC",
true
"PuBliC"
);
CommonTokenStream tokens = new CommonTokenStream(lexerAndCounter.getKey());
assertEquals(2, tokens.getNumberOfOnChannelTokens());
assertEquals(0, lexerAndCounter.getValue().getNumErrors());
}

@Test
@SuppressWarnings("deprecation")
void testCaseInsensitivityDeprecated() {
// intentional testing deprecated type backward compat
ApexLexer lexer = new ApexLexer(
new CaseInsensitiveInputStream(CharStreams.fromString("PuBliC"))
);
lexer.removeErrorListeners();
SyntaxErrorCounter errorCounter = new SyntaxErrorCounter();
lexer.addErrorListener(errorCounter);

CommonTokenStream tokens = new CommonTokenStream(lexer);
assertEquals(2, tokens.getNumberOfOnChannelTokens());
assertEquals(0, errorCounter.getNumErrors());
}

@Test
void testLexerUnicodeEscapes() {
Map.Entry<ApexLexer, SyntaxErrorCounter> lexerAndCounter = createLexer(
"'Fran\\u00E7ois'",
false
"'Fran\\u00E7ois'"
);
CommonTokenStream tokens = new CommonTokenStream(lexerAndCounter.getKey());
assertEquals(2, tokens.getNumberOfOnChannelTokens());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,15 +38,11 @@ public int getNumErrors() {
}

public static Map.Entry<ApexLexer, SyntaxErrorCounter> createLexer(
String input,
Boolean caseInsensitive
String input
) {
CharStream stream = CharStreams.fromString(input);
ApexLexer lexer = new ApexLexer(
caseInsensitive ? new CaseInsensitiveInputStream(stream) : stream
ApexLexer lexer = ApexParserFactory.createLexer(
CharStreams.fromString(input)
);

lexer.removeErrorListeners();
SyntaxErrorCounter errorCounter = new SyntaxErrorCounter();
lexer.addErrorListener(errorCounter);

Expand Down
1 change: 1 addition & 0 deletions npm/antlr/ApexLexer.g4
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
lexer grammar ApexLexer;
options { caseInsensitive = true; }

import BaseApexLexer;
8 changes: 6 additions & 2 deletions npm/src/ApexParserFactory.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
*/

import {
CharStreams,
CommonTokenStream,
ErrorNode,
ParserRuleContext,
Expand All @@ -41,7 +42,6 @@ import {
import ApexParserListener from "./antlr/ApexParserListener";
import ApexParserVisitor from "./antlr/ApexParserVisitor";
import ApexLexer from "./antlr/ApexLexer";
import { CaseInsensitiveInputStream } from "./CaseInsensitiveInputStream";
import ApexParser from "./antlr/ApexParser";
import { ThrowingErrorListener } from "./ApexErrorListener";

Expand Down Expand Up @@ -85,7 +85,11 @@ export class ApexParserFactory {
}

static createLexer(source: string): ApexLexer {
return new ApexLexer(new CaseInsensitiveInputStream(source));
const lexer = new ApexLexer(CharStreams.fromString(source));

// always remove default console listener
lexer.removeErrorListeners();
return lexer;
}
}

Expand Down
6 changes: 6 additions & 0 deletions npm/src/CaseInsensitiveInputStream.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,12 @@

import { CharStream } from "antlr4";

/**
* Converts char stream to lower case for case insensitive usage.
*
* @deprecated since 5.0.0. ApexLexer is generated with ANTLR 4.10
* `caseInsensitive` option enabled.
*/
export class CaseInsensitiveInputStream extends CharStream {
constructor(data: string, decodeToUnicodeCodePoints?: boolean);
constructor(stream: CharStream, decodeToUnicodeCodePoints?: boolean);
Expand Down
20 changes: 17 additions & 3 deletions npm/src/__tests__/ApexLexerTest.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@
derived from this software without specific prior written permission.
*/
import { CommonTokenStream } from "antlr4";
import { createLexer } from "./SyntaxErrorCounter";
import { createLexer, SyntaxErrorCounter } from "./SyntaxErrorCounter";
import { CaseInsensitiveInputStream } from "../CaseInsensitiveInputStream";
import ApexLexer from "../antlr/ApexLexer";

type ExtCommonTokenStream = CommonTokenStream & {
// This method is present but not available
Expand All @@ -21,7 +23,7 @@ type ExtCommonTokenStream = CommonTokenStream & {
};

test("Lexer generates tokens", () => {
const [lexer, errorCounter] = createLexer("public class Hello {}", false);
const [lexer, errorCounter] = createLexer("public class Hello {}");
const tokens = new CommonTokenStream(lexer) as ExtCommonTokenStream;
expect(tokens.getNumberOfOnChannelTokens()).toBe(6);
expect(errorCounter.getNumErrors()).toEqual(0);
Expand All @@ -48,8 +50,20 @@ test("Case insensitivity (mixed case)", () => {
expect(errorCounter.getNumErrors()).toEqual(0);
});

test("Case insensitivity (deprecated stream)", () => {
// intentional testing deprecated type backward compat
const lexer = new ApexLexer(new CaseInsensitiveInputStream("PuBliC"));
lexer.removeErrorListeners();
const errorCounter = new SyntaxErrorCounter<number>();
lexer.addErrorListener(errorCounter);

const tokens = new CommonTokenStream(lexer) as ExtCommonTokenStream;
expect(tokens.getNumberOfOnChannelTokens()).toBe(2);
expect(errorCounter.getNumErrors()).toEqual(0);
});

test("Lexer unicode escapes", () => {
const [lexer, errorCounter] = createLexer("'Fran\\u00E7ois'", false);
const [lexer, errorCounter] = createLexer("'Fran\\u00E7ois'");
const tokens = new CommonTokenStream(lexer) as ExtCommonTokenStream;
expect(tokens.getNumberOfOnChannelTokens()).toBe(2);
expect(errorCounter.getNumErrors()).toEqual(0);
Expand Down
20 changes: 3 additions & 17 deletions npm/src/__tests__/SyntaxErrorCounter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,9 @@
3. The name of the author may not be used to endorse or promote products
derived from this software without specific prior written permission.
*/
import {
ErrorListener,
CharStreams,
RecognitionException,
Recognizer,
Token,
} from "antlr4";
import { ErrorListener, RecognitionException, Recognizer, Token } from "antlr4";
import ApexLexer from "../antlr/ApexLexer";
import ApexParser from "../antlr/ApexParser";
import { CaseInsensitiveInputStream } from "../CaseInsensitiveInputStream";
import { ApexParserFactory } from "../ApexParserFactory";

export class SyntaxErrorCounter<T = Token> extends ErrorListener<T> {
Expand All @@ -43,16 +36,9 @@ export class SyntaxErrorCounter<T = Token> extends ErrorListener<T> {
}

export function createLexer(
input: string,
caseInsensitive: boolean = true
input: string
): [ApexLexer, SyntaxErrorCounter<number>] {
const lexer = new ApexLexer(
caseInsensitive
? new CaseInsensitiveInputStream(input)
: CharStreams.fromString(input)
);

lexer.removeErrorListeners();
const lexer = ApexParserFactory.createLexer(input);
const errorCounter = new SyntaxErrorCounter<number>();
lexer.addErrorListener(errorCounter);

Expand Down