From caa6e9150ab1910fd2dc7381a7e14fae262cedef Mon Sep 17 00:00:00 2001 From: Elliot Hillary Date: Tue, 22 Jul 2025 15:53:08 +1000 Subject: [PATCH] Add full support for `HIGHCHARUNICODE` --- CHANGELOG.md | 4 ++ .../antlr/ast/node/TextLiteralNodeImpl.java | 41 +++++++++++------ .../ast/node/TextLiteralNodeImplTest.java | 44 ++++++++++++++++--- 3 files changed, 69 insertions(+), 20 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 78f638e11..d3a1ba20c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added + +- Full support for the `HIGHCHARUNICODE` compiler directive. + ## [1.17.2] - 2025-07-03 ### Fixed diff --git a/delphi-frontend/src/main/java/au/com/integradev/delphi/antlr/ast/node/TextLiteralNodeImpl.java b/delphi-frontend/src/main/java/au/com/integradev/delphi/antlr/ast/node/TextLiteralNodeImpl.java index 1be02f6ad..8c2504473 100644 --- a/delphi-frontend/src/main/java/au/com/integradev/delphi/antlr/ast/node/TextLiteralNodeImpl.java +++ b/delphi-frontend/src/main/java/au/com/integradev/delphi/antlr/ast/node/TextLiteralNodeImpl.java @@ -20,6 +20,8 @@ import au.com.integradev.delphi.antlr.ast.visitors.DelphiParserVisitor; import au.com.integradev.delphi.preprocessor.TextBlockLineEndingMode; +import java.nio.ByteBuffer; +import java.nio.charset.Charset; import java.util.ArrayDeque; import java.util.Deque; import java.util.stream.Collectors; @@ -28,6 +30,7 @@ import org.apache.commons.lang3.Strings; import org.sonar.plugins.communitydelphi.api.ast.DelphiNode; import org.sonar.plugins.communitydelphi.api.ast.TextLiteralNode; +import org.sonar.plugins.communitydelphi.api.directive.SwitchDirective.SwitchKind; import org.sonar.plugins.communitydelphi.api.token.DelphiTokenType; import org.sonar.plugins.communitydelphi.api.type.IntrinsicType; import org.sonar.plugins.communitydelphi.api.type.Type; @@ -167,26 +170,38 @@ private String createSingleLineValue() { return imageBuilder.toString(); } - private static char characterEscapeToChar(String image) { + private boolean isHighCharUnicode() { + return getAst() + .getDelphiFile() + .getCompilerSwitchRegistry() + .isActiveSwitch(SwitchKind.HIGHCHARUNICODE, getTokenIndex()); + } + + public Charset getAnsiCharset() { + return Charset.forName(System.getProperty("native.encoding")); + } + + private char characterEscapeToChar(String image) { image = image.substring(1); int radix = 10; - switch (image.charAt(0)) { - case '$': - radix = 16; - image = image.substring(1); - break; - case '%': - radix = 2; - image = image.substring(1); - break; - default: - // do nothing + if (image.charAt(0) == '$') { + radix = 16; + image = image.substring(1); } image = StringUtils.remove(image, '_'); + char character = (char) Integer.parseInt(image, radix); - return (char) Integer.parseInt(image, radix); + if (isHighCharUnicode() || character > 255) { + // With HIGHCHARUNICODE ON, all escapes are interpreted as UTF-16. + // Escapes above 255 are always interpreted as UTF-16. + return character; + } else { + // With HIGHCHARUNICODE OFF, escapes between 0-255 are interpreted in the system code page. + var buffer = ByteBuffer.allocate(1).put((byte) character).flip(); + return getAnsiCharset().decode(buffer).get(); + } } @Override diff --git a/delphi-frontend/src/test/java/au/com/integradev/delphi/antlr/ast/node/TextLiteralNodeImplTest.java b/delphi-frontend/src/test/java/au/com/integradev/delphi/antlr/ast/node/TextLiteralNodeImplTest.java index 0ffa82f5a..d84253a65 100644 --- a/delphi-frontend/src/test/java/au/com/integradev/delphi/antlr/ast/node/TextLiteralNodeImplTest.java +++ b/delphi-frontend/src/test/java/au/com/integradev/delphi/antlr/ast/node/TextLiteralNodeImplTest.java @@ -20,17 +20,24 @@ import static org.assertj.core.api.Assertions.assertThat; import static org.mockito.ArgumentMatchers.anyInt; +import static org.mockito.ArgumentMatchers.eq; import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.spy; import static org.mockito.Mockito.when; import au.com.integradev.delphi.antlr.DelphiLexer; import au.com.integradev.delphi.antlr.ast.DelphiAstImpl; import au.com.integradev.delphi.file.DelphiFile; +import au.com.integradev.delphi.preprocessor.CompilerSwitchRegistry; import au.com.integradev.delphi.preprocessor.TextBlockLineEndingMode; import au.com.integradev.delphi.preprocessor.TextBlockLineEndingModeRegistry; +import java.nio.charset.Charset; import org.antlr.runtime.CommonToken; import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; import org.sonar.plugins.communitydelphi.api.ast.DelphiNode; +import org.sonar.plugins.communitydelphi.api.directive.SwitchDirective.SwitchKind; class TextLiteralNodeImplTest { @Test @@ -59,9 +66,21 @@ void testMultilineImage() { assertThat(node.isMultiline()).isTrue(); } - @Test - void testGetImageWithCharacterEscapes() { - TextLiteralNodeImpl node = new TextLiteralNodeImpl(DelphiLexer.TkTextLiteral); + @ParameterizedTest(name = "HIGHCHARUNICODE = {0}") + @ValueSource(booleans = {true, false}) + void testGetImageWithCharacterEscapes(boolean highCharUnicode) { + var registry = mock(CompilerSwitchRegistry.class); + when(registry.isActiveSwitch(eq(SwitchKind.HIGHCHARUNICODE), anyInt())) + .thenReturn(highCharUnicode); + var file = mock(DelphiFile.class); + when(file.getCompilerSwitchRegistry()).thenReturn(registry); + var ast = mock(DelphiAstImpl.class); + when(ast.getDelphiFile()).thenReturn(file); + + TextLiteralNodeImpl node = spy(new TextLiteralNodeImpl(DelphiLexer.TkTextLiteral)); + when(node.getAnsiCharset()).thenReturn(Charset.forName("windows-1252")); + node.setParent(ast); + node.addChild(createNode(DelphiLexer.TkQuotedString, "'F'")); node.addChild(createNode(DelphiLexer.TkCharacterEscapeCode, "#111")); node.addChild(createNode(DelphiLexer.TkCharacterEscapeCode, "#111")); @@ -69,12 +88,23 @@ void testGetImageWithCharacterEscapes() { node.addChild(createNode(DelphiLexer.TkCharacterEscapeCode, "#$61")); node.addChild(createNode(DelphiLexer.TkCharacterEscapeCode, "#$72")); node.addChild(createNode(DelphiLexer.TkQuotedString, "'B'")); - node.addChild(createNode(DelphiLexer.TkCharacterEscapeCode, "#%01100001")); - node.addChild(createNode(DelphiLexer.TkCharacterEscapeCode, "#%01111010")); + node.addChild(createNode(DelphiLexer.TkCharacterEscapeCode, "#$80")); + node.addChild(createNode(DelphiLexer.TkCharacterEscapeCode, "#$98")); + node.addChild(createNode(DelphiLexer.TkCharacterEscapeCode, "#$A3")); + node.addChild(createNode(DelphiLexer.TkCharacterEscapeCode, "#$20AC")); + node.addChild(createNode(DelphiLexer.TkQuotedString, "'az'")); - assertThat(node.getImage()).isEqualTo("'F'#111#111'B'#$61#$72'B'#%01100001#%01111010"); - assertThat(node.getValue()).isEqualTo(node.getImageWithoutQuotes()).isEqualTo("FooBarBaz"); assertThat(node.isMultiline()).isFalse(); + assertThat(node.getImage()).isEqualTo("'F'#111#111'B'#$61#$72'B'#$80#$98#$A3#$20AC'az'"); + if (highCharUnicode) { + assertThat(node.getValue()) + .isEqualTo(node.getImageWithoutQuotes()) + .isEqualTo("FooBarB\u0080\u0098£€az"); + } else { + assertThat(node.getValue()) + .isEqualTo(node.getImageWithoutQuotes()) + .isEqualTo("FooBarB€˜£€az"); + } } @Test