Skip to content

Commit 6d6d453

Browse files
authored
Merge pull request #96 from yoeunes/dev
Configures regex optimization suggestions
2 parents ebd1f57 + c13f3ee commit 6d6d453

File tree

5 files changed

+77
-10
lines changed

5 files changed

+77
-10
lines changed

README.md

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -487,16 +487,29 @@ services:
487487
* Validate regex syntax at analysis time.
488488
* Optionally report ReDoS risks as PHPStan errors or warnings.
489489

490-
Configuration is done via the provided `extension.neon`, with options such as:
490+
Configuration is done via the provided `extension.neon`, with options such as:
491491

492492
```neon
493493
parameters:
494494
regexParser:
495495
ignoreParseErrors: true
496496
reportRedos: true
497497
redosThreshold: 'high'
498+
suggestOptimizations: false
499+
optimizationConfig:
500+
digits: true
501+
word: true
498502
```
499503

504+
* Options mirror the PHPStan bridge:
505+
506+
* `ignoreParseErrors` — skip likely partial regex strings (default: `true`).
507+
* `reportRedos` — emit ReDoS issues (default: `true`).
508+
* `redosThreshold` — minimum severity to report (`low`, `medium`, `high`, `critical`; default: `high`).
509+
* `suggestOptimizations` — surface shorter equivalent patterns when found (default: `false`).
510+
* `optimizationConfig.digits` — enable `[0-9]` → `\d` optimization suggestions (default: `true`).
511+
* `optimizationConfig.word` — enable `[a-zA-Z0-9_]` → `\w` optimization suggestions (default: `true`).
512+
500513
### Psalm
501514

502515
* Psalm plugin uses the same RegexParser validation and ReDoS checks for `preg_*` calls (including `preg_replace_callback_array` keys).

extension.neon

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,12 @@ parametersSchema:
77
# Minimum severity level to report: 'low', 'medium', 'high', 'critical'
88
redosThreshold: string(),
99
# Enable/disable regex optimization suggestions
10-
suggestOptimizations: bool()
10+
suggestOptimizations: bool(),
11+
# Configuration for specific optimizations
12+
optimizationConfig: structure([
13+
digits: bool(),
14+
word: bool()
15+
])
1116
])
1217

1318
parameters:
@@ -25,6 +30,11 @@ parameters:
2530
# Enable regex optimization suggestions
2631
suggestOptimizations: false
2732

33+
# Configuration for specific optimizations (both default to true for BC)
34+
optimizationConfig:
35+
digits: true
36+
word: true
37+
2838
services:
2939
# Main regex validation rule for PHPStan
3040
-
@@ -34,5 +44,6 @@ services:
3444
reportRedos: %regexParser.reportRedos%
3545
redosThreshold: %regexParser.redosThreshold%
3646
suggestOptimizations: %regexParser.suggestOptimizations%
47+
optimizationConfig: %regexParser.optimizationConfig%
3748
tags:
3849
- phpstan.rules.rule

src/Bridge/PHPStan/PregValidationRule.php

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -85,15 +85,17 @@ final class PregValidationRule implements Rule
8585
private ?ReDoSAnalyzer $redosAnalyzer = null;
8686

8787
/**
88-
* @param bool $ignoreParseErrors Ignore parse errors for partial regex strings
89-
* @param bool $reportRedos Report ReDoS vulnerability analysis
90-
* @param string $redosThreshold Minimum ReDoS severity level to report
88+
* @param bool $ignoreParseErrors Ignore parse errors for partial regex strings
89+
* @param bool $reportRedos Report ReDoS vulnerability analysis
90+
* @param string $redosThreshold Minimum ReDoS severity level to report
91+
* @param array{digits: bool, word: bool} $optimizationConfig
9192
*/
9293
public function __construct(
9394
private readonly bool $ignoreParseErrors = true,
9495
private readonly bool $reportRedos = true,
9596
private readonly string $redosThreshold = 'high',
9697
private readonly bool $suggestOptimizations = false,
98+
private readonly array $optimizationConfig = ['digits' => true, 'word' => true],
9799
) {}
98100

99101
public function getNodeType(): string
@@ -262,7 +264,15 @@ private function validatePattern(string $pattern, int $lineNumber): array
262264

263265
if ($this->suggestOptimizations) {
264266
try {
265-
$optimized = $this->getRegex()->optimize($pattern);
267+
$ast = $this->getRegex()->parse($pattern);
268+
$optimizer = new \RegexParser\NodeVisitor\OptimizerNodeVisitor(
269+
optimizeDigits: (bool) ($this->optimizationConfig['digits'] ?? true),
270+
optimizeWord: (bool) ($this->optimizationConfig['word'] ?? true),
271+
);
272+
$optimizedAst = $ast->accept($optimizer);
273+
// Use compiler to get string back
274+
$compiler = new \RegexParser\NodeVisitor\CompilerNodeVisitor();
275+
$optimized = $optimizedAst->accept($compiler);
266276
if ($optimized !== $pattern && \strlen($optimized) < \strlen($pattern)) {
267277
// Safeguard: Validate that the optimized pattern is still valid
268278
try {

src/NodeVisitor/OptimizerNodeVisitor.php

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,10 @@ final class OptimizerNodeVisitor extends AbstractNodeVisitor
3232

3333
private bool $isInsideQuantifier = false;
3434

35-
public function __construct()
36-
{
35+
public function __construct(
36+
private readonly bool $optimizeDigits = true,
37+
private readonly bool $optimizeWord = true
38+
) {
3739
$this->charSetAnalyzer = new CharSetAnalyzer();
3840
}
3941

@@ -258,7 +260,7 @@ public function visitCharClass(Node\CharClassNode $node): Node\NodeInterface
258260
$isUnicode = str_contains($this->flags, 'u');
259261
$parts = $node->expression instanceof Node\AlternationNode ? $node->expression->alternatives : [$node->expression];
260262

261-
if (!$isUnicode && !$node->isNegated && 1 === \count($parts)) {
263+
if ($this->optimizeDigits && !$isUnicode && !$node->isNegated && 1 === \count($parts)) {
262264
$part = $parts[0];
263265
if ($part instanceof Node\RangeNode && $part->start instanceof Node\LiteralNode && $part->end instanceof Node\LiteralNode) {
264266
if ('0' === $part->start->value && '9' === $part->end->value) {
@@ -267,7 +269,7 @@ public function visitCharClass(Node\CharClassNode $node): Node\NodeInterface
267269
}
268270
}
269271

270-
if (!$isUnicode && !$node->isNegated && 4 === \count($parts)) {
272+
if ($this->optimizeWord && !$isUnicode && !$node->isNegated && 4 === \count($parts)) {
271273
if ($this->isFullWordClass($parts)) {
272274
return new Node\CharTypeNode('w', $node->startPosition, $node->endPosition);
273275
}

tests/NodeVisitor/OptimizerNodeVisitorTest.php

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -502,4 +502,35 @@ public static function optimizationProvider(): \Iterator
502502
// Inner group stays because in quantified context
503503
yield 'Nested Sequence' => ['/(?:(?:abc))?/', '/(?:(?:abc))?/'];
504504
}
505+
506+
public function test_optimizations_can_be_disabled(): void
507+
{
508+
$regex = Regex::create();
509+
$compiler = new CompilerNodeVisitor();
510+
511+
// Test disabling digits optimization
512+
$ast = $regex->parse('/[0-9]/');
513+
$optimizerDisabled = new OptimizerNodeVisitor(optimizeDigits: false);
514+
$optimizedDisabled = $ast->accept($optimizerDisabled);
515+
$resultDisabled = $optimizedDisabled->accept($compiler);
516+
$this->assertSame('/[0-9]/', $resultDisabled, 'Digits optimization should be disabled');
517+
518+
$optimizerEnabled = new OptimizerNodeVisitor(optimizeDigits: true);
519+
$optimizedEnabled = $ast->accept($optimizerEnabled);
520+
$resultEnabled = $optimizedEnabled->accept($compiler);
521+
$this->assertSame('/\d/', $resultEnabled, 'Digits optimization should work when enabled');
522+
523+
// Test disabling word optimization
524+
$ast2 = $regex->parse('/[a-zA-Z0-9_]/');
525+
$optimizerWordDisabled = new OptimizerNodeVisitor(optimizeWord: false);
526+
$optimizedWordDisabled = $ast2->accept($optimizerWordDisabled);
527+
$resultWordDisabled = $optimizedWordDisabled->accept($compiler);
528+
$this->assertNotSame('/\w/', $resultWordDisabled, 'Word optimization should be disabled');
529+
$this->assertStringStartsWith('/[', $resultWordDisabled, 'Should remain as char class');
530+
531+
$optimizerWordEnabled = new OptimizerNodeVisitor(optimizeWord: true);
532+
$optimizedWordEnabled = $ast2->accept($optimizerWordEnabled);
533+
$resultWordEnabled = $optimizedWordEnabled->accept($compiler);
534+
$this->assertSame('/\w/', $resultWordEnabled, 'Word optimization should work when enabled');
535+
}
505536
}

0 commit comments

Comments
 (0)