diff --git a/src/SmsLength.php b/src/SmsLength.php index 93da5a1..89902d9 100644 --- a/src/SmsLength.php +++ b/src/SmsLength.php @@ -162,12 +162,17 @@ private function inspect(string $messageContent): void // Any character outside the 7-bit alphabet switches the entire encoding to UCS-2 $this->encoding = '7-bit'; $this->size = 0; + $mbLength = mb_strlen($messageContent, 'UTF-8'); for ($i = 0; $i < $mbLength; $i++) { $char = mb_substr($messageContent, $i, 1, 'UTF-8'); if (in_array($char, self::GSM0338_BASIC, true)) { $this->size++; } elseif (in_array($char, self::GSM0338_EXTENDED, true)) { + // In cases where a double counted char straddles two messages, add padding to push it to the next part + if (($this->size + 2) % self::MAXIMUM_CHARACTERS_7BIT_CONCATENATED === 1) { + $this->size++; + } $this->size += 2; } else { $this->encoding = 'ucs-2'; @@ -182,7 +187,14 @@ private function inspect(string $messageContent): void for ($i = 0; $i < $mbLength; $i++) { $char = mb_substr($messageContent, $i, 1, 'UTF-8'); $utf16Hex = bin2hex(mb_convert_encoding($char, 'UTF-16', 'UTF-8')); - $this->size += strlen($utf16Hex) / 4; + $charSize = strlen($utf16Hex) / 4; + + // In cases where a double counted char straddles two messages, add padding to push it to the next part + if ($charSize > 1 && ($this->size + $charSize) % self::MAXIMUM_CHARACTERS_UCS2_CONCATENATED === 1) { + $this->size++; + } + + $this->size += $charSize; } } @@ -196,7 +208,7 @@ private function inspect(string $messageContent): void $this->messageCount = 1; if ($this->size > $singleSize) { - $this->messageCount = (int)ceil($this->size / $concatSize); + $this->messageCount = (int) ceil($this->size / $concatSize); } } } diff --git a/tests/SmsLengthTest.php b/tests/SmsLengthTest.php index 011bf83..0766bfe 100644 --- a/tests/SmsLengthTest.php +++ b/tests/SmsLengthTest.php @@ -78,8 +78,8 @@ public function providerSize(): array 'long-gsm-exact' => [str_repeat('exact max', 153), '7-bit', 1377, 9, 1377], // long 7-bit extended - 'long-gsm-ex-1' => [str_repeat(self::GSM0338_EXTENDED, 40), '7-bit', 720, 5, 765], - 'long-gsm-ex-2' => [str_repeat(self::GSM0338_EXTENDED, 76), '7-bit', 1368, 9, 1377], + 'long-gsm-ex-1' => [str_repeat(self::GSM0338_EXTENDED, 40), '7-bit', 724, 5, 765], + 'long-gsm-ex-2' => [str_repeat(self::GSM0338_EXTENDED, 76), '7-bit', 1376, 9, 1377], // long UCS-2 'long-ucs-1' => [str_repeat('simple msg plus •', 20), 'ucs-2', 340, 6, 402], @@ -87,7 +87,22 @@ public function providerSize(): array 'long-ucs-exact' => [str_repeat('exact•max', 67), 'ucs-2', 603, 9, 603], // empty - 'empty' => ['', '7-bit', 0, 1, 160], + 'empty messages' => ['', '7-bit', 0, 1, 160], + + 'test length calculates correctly when a GSM extended char straddles two messages' => [ + 'The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick brown f[x jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the.', + '7-bit', + 307, + 3, + 459, + ], + 'test length calculates correctly when a unicode char straddles two messages' => [ + str_repeat('🌐', 67), + 'ucs-2', + 136, + 3, + 201, + ], ]; }