Skip to content

Commit 5ffd78f

Browse files
authored
Merge pull request #97 from yoeunes/dev
Adds strict ranges option to regex optimizer
2 parents 6d6d453 + afa7314 commit 5ffd78f

File tree

6 files changed

+58
-15
lines changed

6 files changed

+58
-15
lines changed

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -499,6 +499,7 @@ parameters:
499499
optimizationConfig:
500500
digits: true
501501
word: true
502+
strictRanges: true
502503
```
503504

504505
* Options mirror the PHPStan bridge:
@@ -509,6 +510,7 @@ parameters:
509510
* `suggestOptimizations` — surface shorter equivalent patterns when found (default: `false`).
510511
* `optimizationConfig.digits` — enable `[0-9]` → `\d` optimization suggestions (default: `true`).
511512
* `optimizationConfig.word` — enable `[a-zA-Z0-9_]` → `\w` optimization suggestions (default: `true`).
513+
* `optimizationConfig.strictRanges` — prevent merging characters from different categories (digits, letters, symbols) into single ranges for better readability (default: `true`).
512514

513515
### Psalm
514516

TODO.md

Lines changed: 0 additions & 7 deletions
This file was deleted.

extension.neon

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,8 @@ parametersSchema:
1111
# Configuration for specific optimizations
1212
optimizationConfig: structure([
1313
digits: bool(),
14-
word: bool()
14+
word: bool(),
15+
strictRanges: bool()
1516
])
1617
])
1718

@@ -34,6 +35,7 @@ parameters:
3435
optimizationConfig:
3536
digits: true
3637
word: true
38+
strictRanges: true
3739

3840
services:
3941
# Main regex validation rule for PHPStan

src/Bridge/PHPStan/PregValidationRule.php

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -85,17 +85,17 @@ final class PregValidationRule implements Rule
8585
private ?ReDoSAnalyzer $redosAnalyzer = null;
8686

8787
/**
88-
* @param bool $ignoreParseErrors Ignore parse errors for partial regex strings
89-
* @param bool $reportRedos Report ReDoS vulnerability analysis
90-
* @param string $redosThreshold Minimum ReDoS severity level to report
91-
* @param array{digits: bool, word: bool} $optimizationConfig
88+
* @param bool $ignoreParseErrors Ignore parse errors for partial regex strings
89+
* @param bool $reportRedos Report ReDoS vulnerability analysis
90+
* @param string $redosThreshold Minimum ReDoS severity level to report
91+
* @param array{digits: bool, word: bool, strictRanges: bool} $optimizationConfig
9292
*/
9393
public function __construct(
9494
private readonly bool $ignoreParseErrors = true,
9595
private readonly bool $reportRedos = true,
9696
private readonly string $redosThreshold = 'high',
9797
private readonly bool $suggestOptimizations = false,
98-
private readonly array $optimizationConfig = ['digits' => true, 'word' => true],
98+
private readonly array $optimizationConfig = ['digits' => true, 'word' => true, 'strictRanges' => true],
9999
) {}
100100

101101
public function getNodeType(): string
@@ -268,6 +268,7 @@ private function validatePattern(string $pattern, int $lineNumber): array
268268
$optimizer = new \RegexParser\NodeVisitor\OptimizerNodeVisitor(
269269
optimizeDigits: (bool) ($this->optimizationConfig['digits'] ?? true),
270270
optimizeWord: (bool) ($this->optimizationConfig['word'] ?? true),
271+
strictRanges: (bool) ($this->optimizationConfig['strictRanges'] ?? true),
271272
);
272273
$optimizedAst = $ast->accept($optimizer);
273274
// Use compiler to get string back

src/NodeVisitor/OptimizerNodeVisitor.php

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,8 @@ final class OptimizerNodeVisitor extends AbstractNodeVisitor
3434

3535
public function __construct(
3636
private readonly bool $optimizeDigits = true,
37-
private readonly bool $optimizeWord = true
37+
private readonly bool $optimizeWord = true,
38+
private readonly bool $strictRanges = true
3839
) {
3940
$this->charSetAnalyzer = new CharSetAnalyzer();
4041
}
@@ -573,6 +574,28 @@ private function isFullWordClass(array $parts): bool
573574
return !\in_array(false, $partsFound, true);
574575
}
575576

577+
/**
578+
* Classify a character by its ASCII category for range merging.
579+
*
580+
* @param int $ord the ASCII ordinal of the character
581+
*
582+
* @return int category: 0=other, 1=digits, 2=uppercase, 3=lowercase
583+
*/
584+
private function getCharCategory(int $ord): int
585+
{
586+
if ($ord >= 48 && $ord <= 57) { // 0-9
587+
return 1;
588+
}
589+
if ($ord >= 65 && $ord <= 90) { // A-Z
590+
return 2;
591+
}
592+
if ($ord >= 97 && $ord <= 122) { // a-z
593+
return 3;
594+
}
595+
596+
return 0; // other
597+
}
598+
576599
/**
577600
* @param array<Node\NodeInterface> $parts
578601
*
@@ -654,7 +677,7 @@ private function normalizeCharClassParts(array $parts): array
654677
continue;
655678
}
656679

657-
if ($ord === $rangeEnd + 1) {
680+
if ($ord === $rangeEnd + 1 && (!$this->strictRanges || $this->getCharCategory($ord) === $this->getCharCategory($rangeEnd))) {
658681
$rangeEnd = $ord;
659682
$rangeEndPos = max($rangeEndPos, $posEnd);
660683

tests/NodeVisitor/OptimizerNodeVisitorTest.php

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -533,4 +533,26 @@ public function test_optimizations_can_be_disabled(): void
533533
$resultWordEnabled = $optimizedWordEnabled->accept($compiler);
534534
$this->assertSame('/\w/', $resultWordEnabled, 'Word optimization should work when enabled');
535535
}
536+
537+
public function test_strict_ranges_option(): void
538+
{
539+
$regex = Regex::create();
540+
$compiler = new CompilerNodeVisitor();
541+
542+
// Test strict ranges (default): prevent merging different categories
543+
$ast = $regex->parse('/[0-9:]/');
544+
$optimizerStrict = new OptimizerNodeVisitor(strictRanges: true);
545+
$optimizedStrict = $ast->accept($optimizerStrict);
546+
$resultStrict = $optimizedStrict->accept($compiler);
547+
// Should remain [0-9:] or equivalent, not [0-: ]
548+
$this->assertStringStartsWith('/[', $resultStrict);
549+
$this->assertStringEndsWith(']/', $resultStrict);
550+
$this->assertNotSame('/[0-:]/', $resultStrict, 'Strict ranges should not merge digits and symbols');
551+
552+
// Test loose ranges: allow merging different categories
553+
$optimizerLoose = new OptimizerNodeVisitor(strictRanges: false);
554+
$optimizedLoose = $ast->accept($optimizerLoose);
555+
$resultLoose = $optimizedLoose->accept($compiler);
556+
$this->assertSame('/[0-:]/', $resultLoose, 'Loose ranges should merge digits and symbols');
557+
}
536558
}

0 commit comments

Comments
 (0)