From 4789f36e0beb2aaa1649eb700d14a199259c9171 Mon Sep 17 00:00:00 2001 From: atymic Date: Wed, 14 Jul 2021 12:50:41 +1000 Subject: [PATCH] feat: truncate to length + tests --- src/SmsLength.php | 98 +++++++++++++++++++++++++++++++++++++++++ tests/SmsLengthTest.php | 59 +++++++++++++++++++++++++ 2 files changed, 157 insertions(+) diff --git a/src/SmsLength.php b/src/SmsLength.php index 93da5a1..f15024d 100644 --- a/src/SmsLength.php +++ b/src/SmsLength.php @@ -82,11 +82,19 @@ class SmsLength */ private $messageCount; + /** + * @var string + */ + private $messageContent; + /** * @param string $messageContent SMS message content (UTF-8) + * + * @throws InvalidArgumentException */ public function __construct(string $messageContent) { + $this->messageContent = $messageContent; $this->inspect($messageContent); } @@ -116,6 +124,16 @@ public function getMessageCount(): int return $this->messageCount; } + /** + * Get number of message content + * + * @return string + */ + public function getMessageContent() + { + return $this->messageContent; + } + /** * Get upper breakpoint for the current message count */ @@ -147,6 +165,86 @@ public function validate(): bool return true; } + /** + * Return a new instance with the message truncated to a set part count + * + * @param int $parts + * + * @return self + */ + public function truncate($parts) + { + if ($this->messageCount <= $parts) { + return $this; + } + + if ($this->encoding === '7-bit') { + return new self($this->truncate7Bit($this->messageContent, $parts)); + } + + return new self($this->truncateUcs2($this->messageContent, $parts)); + } + + private function truncate7Bit($message, $parts) + { + $size = 0; + $newMessage = ''; + + $mbLength = mb_strlen($message, 'UTF-8'); + + for ($i = 0; $i < $mbLength; $i++) { + $char = mb_substr($message, $i, 1, 'UTF-8'); + + if (in_array($char, self::GSM0338_BASIC)) { + $charSize = 1; + } elseif (in_array($char, self::GSM0338_EXTENDED)) { + $charSize = 2; + } else { + continue; + } + + if ($parts === 1 && $size + $charSize > self::MAXIMUM_CHARACTERS_7BIT_SINGLE) { + return $newMessage; + } + + if ($parts > 1 && $size + $charSize > $parts * self::MAXIMUM_CHARACTERS_7BIT_CONCATENATED) { + return $newMessage; + } + + $size += $charSize; + $newMessage .= $char; + } + + return $newMessage; + } + + private function truncateUcs2($message, $parts) + { + $size = 0; + $newMessage = ''; + + $mbLength = mb_strlen($message, 'UTF-8'); + + for ($i = 0; $i < $mbLength; $i++) { + $char = mb_substr($message, $i, 1, 'UTF-8'); + $utf16Hex = bin2hex(mb_convert_encoding($char, 'UTF-16', 'UTF-8')); + $charSize = strlen($utf16Hex) / 4; + + if ($parts === 1 && $size + $charSize > self::MAXIMUM_CHARACTERS_UCS2_SINGLE) { + return $newMessage; + } + + if ($parts > 1 && $size + $charSize >= $parts * self::MAXIMUM_CHARACTERS_UCS2_CONCATENATED) { + return $newMessage; + } + + $size += $charSize; + $newMessage .= $char; + } + + return $newMessage; + } + /** * Parse content to discover size characteristics */ diff --git a/tests/SmsLengthTest.php b/tests/SmsLengthTest.php index 011bf83..1fc542a 100644 --- a/tests/SmsLengthTest.php +++ b/tests/SmsLengthTest.php @@ -133,4 +133,63 @@ public function providerTooLarge(): array 'ucs-2' => [str_repeat("simple msg plus \xf0\x9f\x93\xb1", 950), 'ucs-2', 17100, 256, 17152], ]; } + + /** + * @dataProvider providerTruncate + */ + public function testTruncate($content, $parts, $expected) + { + $original = new SmsLength($content); + + $new = $original->truncate($parts); + + $this->assertSame($parts, $new->getMessageCount()); + $this->assertSame($expected, $new->getMessageContent()); + } + + public function providerTruncate() + { + return [ + 'message under one part' => [ + 'message' => 'La La La', + 'parts' => 1, + 'expected' => 'La La La', + ], + 'message over one part, gsm7' => [ + 'message' => str_repeat('abcd', 45), + 'parts' => 1, + 'expected' => str_repeat('abcd', 40), + ], + 'message over two part, gsm7' => [ + 'message' => str_repeat('abcd', 100), + 'parts' => 2, + 'expected' => str_repeat('abcd', 76) . 'ab', + ], + 'message over one part, gsm7 + ext' => [ + 'message' => str_repeat('abcd[', 30), + 'parts' => 1, + 'expected' => str_repeat('abcd[', 26) . 'abcd', // each part is 6 chars, should be 26 reps + 4 chars leftover + ], + 'message over one part, uc2 1 part char' => [ + 'message' => str_repeat('•', 100), + 'parts' => 1, + 'expected' => str_repeat('•', 70), + ], + 'message over one part, uc2 3 byte' => [ + 'message' => str_repeat('⏩', 100), + 'parts' => 1, + 'expected' => str_repeat('⏩', 70), + ], + 'message over one part, uc2 4 byte' => [ + 'message' => str_repeat('🌐', 100), + 'parts' => 1, + 'expected' => str_repeat('🌐', 35), + ], + 'message over two parts, uc2 4 byte' => [ + 'message' => str_repeat('🌐', 200), + 'parts' => 2, + 'expected' => str_repeat('🌐', 66), + ] + ]; + } }