Skip to content

Commit d6b9c7f

Browse files
authored
Merge pull request #21 from aymanrb/feature-detailed-parsing-regex
Feature: allowing custom parsing regex for each variable
2 parents 47ff77f + 6ecbcf0 commit d6b9c7f

File tree

9 files changed

+72
-9
lines changed

9 files changed

+72
-9
lines changed

README.md

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,9 @@ Parsing Procedure
6161
----------
6262
1- Grab a single copy of the text you want to parse.
6363

64-
2- Replace every single varying text within it to a named variable in the form of ``{%VariableName%}``
64+
2- Replace every single varying text within it to a named variable in the form of ``{%VariableName%}`` if you want to match
65+
everything in this part of text or ``{%VariableName:Pattern%}`` if you want to match a specific set of characters or use a more
66+
precise pattern.
6567

6668
3- Add the templates file into the templates directory (defined in parsing code) with a txt extension ``fileName.txt``
6769

@@ -72,8 +74,9 @@ Template Example
7274
If the text documents you want to parse looks like this:
7375

7476
```
75-
Hi GitHub-er,
77+
Hello,
7678
If you wish to parse message coming from a website that states info like:
79+
ID & Source: 12234432 Website Form
7780
Name: Pet Cat
7881
E-Mail: email@example.com
7982
Comment: Some text goes here
@@ -86,8 +89,9 @@ Admin
8689
Your Template file (``example_template.txt``) could be something like:
8790

8891
```
89-
Hi {%nameOfRecipient%},
92+
Hello,
9093
If you wish to parse message coming from a website that states info like:
94+
ID & Source: {%id:[0-9]+%} {%source%}
9195
Name: {%senderName%}
9296
E-Mail: {%senderEmail%}
9397
Comment: {%comment%}
@@ -101,7 +105,8 @@ The output of a successful parsing job would be:
101105

102106
```
103107
Array(
104-
'nameOfRecipient' => 'GitHub-er',
108+
'id' => '12234432',
109+
'source' => 'Website Form',
105110
'senderName' => 'Pet Cat',
106111
'senderEmail' => 'email@example.com',
107112
'comment' => 'Some text goes here'

examples/templates/t_8.txt

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
~ New message received ~
2+
3+
Sender: {%id:[0-9]+%} {%name%}
4+
Sender-id: {%senderId%}
5+
Sender-full-name: {%senderName%}
6+
Sender-email: {%senderEmail%}
7+
Sender-website: {%senderSite%}
8+
Sender-nationality: {%senderCountry%}
9+
Message:
10+
{%senderMessage%}

examples/test_txt_files/m_8.txt

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
~ New message received ~
2+
3+
Sender: 12345678 John Anthony Doe
4+
Sender-id: 12345678
5+
Sender-full-name: John Anthony Doe
6+
Sender-email: example@test.com
7+
Sender-website: www.example.com/something
8+
Sender-nationality: N/A
9+
Message:
10+
Some Text Goes Here - Some Text Goes Here

src/Helper/TemplatesHelper.php

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ private function findTemplate(string $text): array
4343
foreach ($this->directoryIterator as $fileInfo) {
4444
$templateContent = file_get_contents($fileInfo->getPathname());
4545

46-
//Compare template against text to decide on similarity percentage
46+
// compare template against text to decide on similarity percentage
4747
similar_text($text, $templateContent, $matchPercentage);
4848

4949
if ($matchPercentage > $maxMatch) {
@@ -74,7 +74,21 @@ private function getAllValidTemplates(): array
7474

7575
private function prepareTemplate(string $templateText): string
7676
{
77+
$templateText = preg_quote($templateText, '/');
78+
79+
// replace all {%Var:Pattern%} in the template with (?<Var>Pattern) regex vars
80+
$templateText = preg_replace('/\\\{%([^%]+)\\:(.*)%\\\}/U', '(?<$1>$2)', $templateText);
81+
82+
// remove the regex escaped characters of the provided patterns
83+
$templateText = preg_replace_callback(
84+
'/(\(\?[^\)]*)./',
85+
function ($matches) {
86+
return str_replace('\\', '', $matches[0]);
87+
},
88+
$templateText
89+
);
90+
7791
// replace all {%Var%} in the template with (?<Var>.*) regex vars
78-
return preg_replace('/\\\{%(.*)%\\\}/U', '(?<$1>.*)', preg_quote($templateText, '/'));
92+
return preg_replace('/\\\{%(.*)%\\\}/U', '(?<$1>.*)', $templateText);
7993
}
8094
}

tests/Helper/TemplatesHelperTest.php

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@
88

99
class TemplatesHelperTest extends TestCase
1010
{
11+
private const DIR_HELPER_TEMPLATES = '/helper_templates';
12+
const DIR_EXPECTED_TEMPLATES = '/expected_templates';
13+
1114
public function testExceptionIsRaisedForInvalidTemplatesDirectory()
1215
{
1316
$this->expectException(InvalidTemplatesDirectoryException::class);
@@ -16,15 +19,16 @@ public function testExceptionIsRaisedForInvalidTemplatesDirectory()
1619

1720
private function getTemplatesHelperInstance()
1821
{
19-
return new TemplatesHelper(__DIR__ . '/helper_templates');
22+
return new TemplatesHelper(__DIR__ . self::DIR_HELPER_TEMPLATES);
2023
}
2124

2225
public function testGetAllTemplates()
2326
{
2427
$templatesHelper = $this->getTemplatesHelperInstance();
28+
$expectedTemplatesCount = count(glob(__DIR__ . self::DIR_HELPER_TEMPLATES . "/*"));
2529

2630
$returnedTemplates = $templatesHelper->getTemplates('regardless of what comes here');
27-
$this->assertCount(2, $returnedTemplates);
31+
$this->assertCount($expectedTemplatesCount, $returnedTemplates);
2832
}
2933

3034
public function testGetAllTemplatesRegexIsPrepared()
@@ -54,7 +58,9 @@ public function testGetMostMatchingTemplateToTextRegexIsPrepared()
5458
private function checkPreparedTemplates(array $templatesArray): bool
5559
{
5660
foreach ($templatesArray as $templatePath => $template) {
57-
$this->assertStringContainsString('(?<variable>.*)', $template);
61+
$expectedTemplate = $this->getExpectedTemplate($templatePath);
62+
63+
$this->assertEquals($expectedTemplate, $template);
5864
$this->assertTrue($this->isValidRegex($template));
5965
}
6066

@@ -71,4 +77,16 @@ private function isValidRegex(string $pattern): bool
7177

7278
return true;
7379
}
80+
81+
private function getExpectedTemplate(string $templatePath)
82+
{
83+
return file_get_contents(
84+
str_replace(
85+
self::DIR_HELPER_TEMPLATES,
86+
self::DIR_EXPECTED_TEMPLATES,
87+
$templatePath
88+
)
89+
);
90+
}
91+
7492
}
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
\<htmlTag\>Simple Template 01 (?<variable>.*)\<\/htmlTag\>
2+
3+
Multi Line
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Simple Template 01 (?<variable>.*)
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Template with specified pattern (?<variable>[0-9]+)
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Template with specified pattern {%variable:[0-9]+%}

0 commit comments

Comments
 (0)