From fcc91ca93a700814399c85684bcc2b2aab8840fa Mon Sep 17 00:00:00 2001 From: Maxime HORCHOLLE Date: Sun, 22 Dec 2019 21:28:18 +0100 Subject: [PATCH] Fix #92: simplexml should not throw error on string parsing --- src/Parser/AbstractXmlOutputParser.php | 12 +- tests/Parser/MediaInfoOutputParserTest.php | 25 +++ .../mediainfo-output-invalid-encoding.xml | 155 ++++++++++++++++++ 3 files changed, 191 insertions(+), 1 deletion(-) create mode 100644 tests/fixtures/mediainfo-output-invalid-encoding.xml diff --git a/src/Parser/AbstractXmlOutputParser.php b/src/Parser/AbstractXmlOutputParser.php index a36f5af..e5e825a 100644 --- a/src/Parser/AbstractXmlOutputParser.php +++ b/src/Parser/AbstractXmlOutputParser.php @@ -15,7 +15,17 @@ protected function transformXmlToArray(string $xmlString): array $xmlString = utf8_encode($xmlString); } - $xml = simplexml_load_string($xmlString); + libxml_use_internal_errors(true); + $dom = new \DOMDocument('1.0', 'UTF-8'); + $dom->strictErrorChecking = false; + $dom->validateOnParse = false; + $dom->recover = true; + $dom->loadXML($xmlString); + $xml = simplexml_import_dom($dom); + + libxml_clear_errors(); + libxml_use_internal_errors(false); + $json = json_encode($xml); return json_decode($json, true); diff --git a/tests/Parser/MediaInfoOutputParserTest.php b/tests/Parser/MediaInfoOutputParserTest.php index e18f4c2..7dd4a77 100644 --- a/tests/Parser/MediaInfoOutputParserTest.php +++ b/tests/Parser/MediaInfoOutputParserTest.php @@ -25,11 +25,17 @@ class MediaInfoOutputParserTest extends TestCase */ private $outputMediainfo1710Path; + /** + * @var string + */ + private $invalidEncodingOutputPath; + protected function setUp(): void { $this->outputPath = __DIR__.'/../fixtures/mediainfo-output.xml'; $this->outputMediainfo1710Path = __DIR__.'/../fixtures/mediainfo-17.10-output.xml'; $this->invalidOutputPath = __DIR__.'/../fixtures/mediainfo-output-invalid-types.xml'; + $this->invalidEncodingOutputPath = __DIR__.'/../fixtures/mediainfo-output-invalid-encoding.xml'; } public function testGetMediaInfoContainerBeforeCallParse(): void @@ -98,4 +104,23 @@ public function testThrowInvalidTrackType(): void // will throw exception here as default behavior $mediaInfoContainer = $mediaInfoOutputParser->getMediaInfoContainer(); } + + public function testIgnoreInvalidEncodingErrors() + { + $mediaInfoOutputParser = new MediaInfoOutputParser(); + $mediaInfoOutputParser->parse(file_get_contents($this->invalidEncodingOutputPath)); + // xml string in file contains bad encoded characters, on parsing simplexml should not + // throw an error + $mediaInfoContainer = $mediaInfoOutputParser->getMediaInfoContainer(true); + + $this->assertEquals('Mhor\MediaInfo\Type\General', get_class($mediaInfoContainer->getGeneral())); + + $this->assertEquals(1, count($mediaInfoContainer->getAudios())); + + $general = $mediaInfoContainer->getGeneral(); + + $this->assertTrue($general->has('copyright')); + $this->assertTrue(is_array($general->get('copyright'))); + $this->assertEquals('Invalid Char', $general->get('copyright2')); + } } diff --git a/tests/fixtures/mediainfo-output-invalid-encoding.xml b/tests/fixtures/mediainfo-output-invalid-encoding.xml new file mode 100644 index 0000000..b2255f1 --- /dev/null +++ b/tests/fixtures/mediainfo-output-invalid-encoding.xml @@ -0,0 +1,155 @@ + + + + +331 +1 +General +General +0 +1 +1 +VC-1 +VC-1 (WMV3) +VC-1 +WMA +WMA +WMA +/mnt/ramdisk/5/54c9f93b-8550-4100-8eeb-328841dc00d6/782247_ohrly-rh131aaso.wmv +/mnt/ramdisk/5/54c9f93b-8550-4100-8eeb-328841dc00d6 +782247_ohrly-rh131aaso.wmv +782247_ohrly-rh131aaso +wmv +Windows Media +Windows Media +asf dvr-ms wma wmv +Windows Media +video/x-ms-wmv +760169 +742 KiB +742 KiB +742 KiB +742 KiB +742.4 KiB +489056 +8 min 9 s +8 min 9 s 56 ms +8 min 9 s +00:08:09.056 +00:08:09;03 +00:08:09.056 (00:08:09;03) +12435 +12.4 kb/s +5136894 +5 137 kb/s +29.970 +29.970 FPS +14657 +1046 +759123 +Ron Harris +UTC 2012-05-14 00:53:44.000 +UTC 2019-12-17 17:20:55 +2019-12-17 18:20:55 + +Invalid ￾ Char +HD Videos + + +377 +1 +Video +Video +0 +0 +1 +1 +VC-1 +VC-1 +VC-1 +Main +video/vc1 +WMV3 +Windows Media Video 9 +WMV3 +http://www.microsoft.com/windows/windowsmedia/format/codecdownload.aspx +Windows Media Video 9 - 2-pass VBR +489056 +8 min 9 s +8 min 9 s 56 ms +8 min 9 s +00:08:09.056 +00:08:09;03 +00:08:09.056 (00:08:09;03) +5000000 +5 000 kb/s +1920 +1 920 pixels +1080 +1 080 pixels +1.000 +1.778 +16:9 +29.970 +29.970 (29970/1000) FPS +29970 +1000 +14657 +YUV +4:2:0 +4:2:0 +8 +8 bits +Progressive +Progressive +Lossy +Lossy +0.080 +305660000 +292 MiB +292 MiB +292 MiB +292 MiB +291.5 MiB + + +280 +1 +Audio +Audio +0 +1 +2 +2 +WMA +WMA +WMA +Version 2 +161 +Windows Media Audio +http://www.microsoft.com/windows/windowsmedia/format/codecdownload.aspx +Windows Media Audio 9 - 128 kbps, 44 kHz, stereo CBR +489056 +8 min 9 s +8 min 9 s 56 ms +8 min 9 s +00:08:09.056 +00:08:09.056 +128000 +128 kb/s +2 +2 channels +44100 +44.1 kHz +21567370 +16 +16 bits +7824896 +7.46 MiB +7 MiB +7.5 MiB +7.46 MiB +7.462 MiB + + +