Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,13 @@ _Functions_:
— Replaces a suffix in a path with a new value.


### PCRE Functions

- [preg_escape()](https://usephul.empaphy.org/packages/PCRE.html#function_preg_escape)
— Escapes all instances of the given PCRE delimiter character in a raw
regular expression pattern.


### SPL Functions

- [class_parents_uses()](https://usephul.empaphy.org/packages/Other-SPL.html#function_class_parents_uses)
Expand Down
1 change: 1 addition & 0 deletions composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
"src/generators.php",
"src/Math/functions.php",
"src/other.php",
"src/pcre.php",
"src/Path/functions.php",
"src/Type/functions.php",
"src/Var/functions.php"
Expand Down
104 changes: 104 additions & 0 deletions src/pcre.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
<?php

/**
* @author Alwin Garside <alwin@garsi.de>
* @copyright 2025 The Empaphy Project
* @license MIT
* @package PCRE
*/

declare(strict_types=1);

namespace empaphy\usephul;

use ValueError;

use function assert;
use function preg_last_error_msg;
use function preg_match;
use function preg_quote;
use function preg_replace;
use function sprintf;
use function str_contains;
use function strlen;

/**
* Escapes all instances of the given PCRE delimiter character in a raw regular
* expression pattern.
*
* {@see preg_escape()} takes __pattern__ and puts a backslash in front
* of every unescaped __delimiter__. This is useful to prepare raw regular
* expression patterns for use with PHP's PCRE functions.
*
* For example:
*
* preg_escape('foo_bar', '_'); // returns `foo\_bar`
* preg_escape('foo\\_bar', '_'); // returns `foo\_bar`
*
* @param string $pattern
* The input pattern.
*
* @param non-empty-string $delimiter
* The delimiter to be escaped. Must be a single non-alphanumeric,
* non-backslash, non-whitespace character.
*
* This function doesn't support bracket style delimiters (`(`, `)`,
* `{`, `}`, `[`, `]`, `<`, and `>`).
*
* @return string
* The __pattern__ with all instances of __delimiter__ escaped where needed.
*
* @throws ValueError
* Thrown if __delimiter__ is not a single non-alphanumeric, non-backslash,
* non-whitespace character, or if it is a bracket style delimiter.
*/
function preg_escape(string $pattern, string $delimiter): string
{
// A delimiter can be any non-alphanumeric, non-backslash, non-whitespace
// character.
$matched = preg_match('/^[^[:alnum:]\\\\[:space:](){}\\[\\]<>]$/', $delimiter);
if (! $matched) {
assert($matched !== false, preg_last_error_msg());

if (strlen($delimiter) !== 1) {
throw new ValueError(
sprintf(
'%s(): Argument #2 ($delimiter) must be a single character',
__FUNCTION__,
),
);
}

if (str_contains('(){}[]<>', $delimiter)) {
throw new ValueError(
sprintf(
'%s(): Argument #2 ($delimiter) cannot be a bracket style '
. 'delimiter',
__FUNCTION__,
),
);
}

throw new ValueError(
sprintf(
'%s(): Argument #2 ($delimiter) must be a non-alphanumeric, '
. 'non-backslash, non-whitespace character',
__FUNCTION__,
),
);
}

if (empty($pattern)) {
return $pattern;
}

$pattern = preg_replace(
'/(?<!\\\\)(?>\\\\\\\\)*\K' . preg_quote($delimiter, '/') . '/',
'\\\\' . $delimiter,
$pattern,
);

assert($pattern !== null);

return $pattern;
}
120 changes: 120 additions & 0 deletions tests/Unit/pcreTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
<?php

/**
* @author Alwin Garside <alwin@garsi.de>
* @copyright 2025 The Empaphy Project
* @license MIT
*
* @noinspection StaticClosureCanBeUsedInspection
*/

declare(strict_types=1);

namespace Pest\Unit\pcre;

use Generator;
use ValueError;

use function empaphy\usephul\generators\seq;
use function empaphy\usephul\preg_escape;

function alphanumeric_characters(): Generator
{
for ($i = 0x30; $i < 0x3a; $i++) {
yield [chr($i)];
}

for ($i = 0x41; $i < 0x5b; $i++) {
yield [chr($i)];
}

for ($i = 0x61; $i < 0x7b; $i++) {
yield [chr($i)];
}
}

/**
* Returns all whitespace characters.
*
* > The space characters are HT (9), LF (10), VT (11), FF (12),
* > CR (13), and space (32). Notice that this list includes the VT
* > character (code 11).
* See: https://www.php.net/regexp.reference.character-classes
*/
function whitespace_characters(): Generator
{
for ($i = 9; $i < 14; $i++) {
yield [chr($i)];
}

yield [chr(32)];
}

function delimiters(): Generator
{
$delimiters = '!"#$%&\'*+,-./:;=?@^_`|~';

yield from seq($delimiters);
}

describe('preg_escape()', function () {
test('escapes delimiters when appropriate', function ($pattern, $delimiter, $expected) {
$value = preg_escape($pattern, $delimiter);
expect($value)->toBe($expected);
})->with([
['pattern' => 'foo/bar', 'delimiter' => '/', 'expected' => 'foo\\/bar'],
['pattern' => 'foo\\/bar', 'delimiter' => '/', 'expected' => 'foo\\/bar'],
['pattern' => '', 'delimiter' => '/', 'expected' => ''],
]);

test('returns valid patterns for all delimiters', function ($expression, $delimiter, $subject) {
$pattern = $delimiter . preg_escape($expression, $delimiter) . $delimiter;
$matched = preg_match($pattern, $subject, $matches);
expect($matched)->toBe(1)->and($matches[0])->toBe($expression);
})->with(function () {
foreach (delimiters() as $delimiter) {
yield ["foo{$delimiter}bar", $delimiter, "quxfoo{$delimiter}barbaz"];
}
});

test('throws ValueError when $delimiter is empty', function () {
preg_escape('foo', ''); // @phpstan-ignore argument.type
})->throws(ValueError::class, 'preg_escape(): Argument #2 ($delimiter) must be a single character');

test('throws ValueError when $delimiter is longer than a single character', function () {
preg_escape('foo', '//');
})->throws(ValueError::class, 'preg_escape(): Argument #2 ($delimiter) must be a single character');

test('throws `ValueError` when the delimiter is alphanumeric', function ($delimiter) {
preg_escape('foo', $delimiter);
})->throws(
ValueError::class,
'preg_escape(): Argument #2 ($delimiter) must be a '
. 'non-alphanumeric, non-backslash, non-whitespace character',
)->with(alphanumeric_characters(...));

test('throws `ValueError` when the delimiter is a backslash', function () {
preg_escape('foo', '\\');
})->throws(
ValueError::class,
'preg_escape(): Argument #2 ($delimiter) must be a '
. 'non-alphanumeric, non-backslash, non-whitespace character',
);

test('throws `ValueError` when the delimiter is whitespace', function ($delimiter) {
preg_escape('foo', $delimiter);
})->throws(
ValueError::class,
'preg_escape(): Argument #2 ($delimiter) must be a '
. 'non-alphanumeric, non-backslash, non-whitespace character',
)->with(whitespace_characters(...));

test('throws `ValueError` when given a bracket style `$delimiter`', function ($delimiter) {
preg_escape('foo', $delimiter);
})->throws(
ValueError::class,
'preg_escape(): Argument #2 ($delimiter) cannot be a bracket style delimiter',
)->with(function () {
yield from seq('(){}[]<>');
});
});