Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 14 additions & 2 deletions src/SmsLength.php
Original file line number Diff line number Diff line change
Expand Up @@ -162,12 +162,17 @@ private function inspect(string $messageContent): void
// Any character outside the 7-bit alphabet switches the entire encoding to UCS-2
$this->encoding = '7-bit';
$this->size = 0;

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No need to commit a change here.

$mbLength = mb_strlen($messageContent, 'UTF-8');
for ($i = 0; $i < $mbLength; $i++) {
$char = mb_substr($messageContent, $i, 1, 'UTF-8');
if (in_array($char, self::GSM0338_BASIC, true)) {
$this->size++;
} elseif (in_array($char, self::GSM0338_EXTENDED, true)) {
// In cases where a double counted char straddles two messages, add padding to push it to the next part
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it may be clearer to say why we need to count as 3; something like:

In cases where an extended char would straddle two messages, a padding control character is added first,
so the extended character is pushed entirely to the next part".

if (($this->size + 2) % self::MAXIMUM_CHARACTERS_7BIT_CONCATENATED === 1) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I haven't verified this, but I'm concerned that if the extended character is encountered at pos 153 (so also using 154) in a message whose total length that is shorter than 160, this will be adding unnecessary size.

E.g. number of characters is 159, one extended char at pos 153 with size counted as 3, pushes total size to 161 where actually it would be 160 and fit in a single message.

There should be a test for this.

$this->size++;
}
$this->size += 2;
} else {
$this->encoding = 'ucs-2';
Expand All @@ -182,7 +187,14 @@ private function inspect(string $messageContent): void
for ($i = 0; $i < $mbLength; $i++) {
$char = mb_substr($messageContent, $i, 1, 'UTF-8');
$utf16Hex = bin2hex(mb_convert_encoding($char, 'UTF-16', 'UTF-8'));
$this->size += strlen($utf16Hex) / 4;
$charSize = strlen($utf16Hex) / 4;

// In cases where a double counted char straddles two messages, add padding to push it to the next part
if ($charSize > 1 && ($this->size + $charSize) % self::MAXIMUM_CHARACTERS_UCS2_CONCATENATED === 1) {
$this->size++;
}

$this->size += $charSize;
}
}

Expand All @@ -196,7 +208,7 @@ private function inspect(string $messageContent): void

$this->messageCount = 1;
if ($this->size > $singleSize) {
$this->messageCount = (int)ceil($this->size / $concatSize);
$this->messageCount = (int) ceil($this->size / $concatSize);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No need to commit a change here.

}
}
}
21 changes: 18 additions & 3 deletions tests/SmsLengthTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -78,16 +78,31 @@ public function providerSize(): array
'long-gsm-exact' => [str_repeat('exact max', 153), '7-bit', 1377, 9, 1377],

// long 7-bit extended
'long-gsm-ex-1' => [str_repeat(self::GSM0338_EXTENDED, 40), '7-bit', 720, 5, 765],
'long-gsm-ex-2' => [str_repeat(self::GSM0338_EXTENDED, 76), '7-bit', 1368, 9, 1377],
'long-gsm-ex-1' => [str_repeat(self::GSM0338_EXTENDED, 40), '7-bit', 724, 5, 765],
'long-gsm-ex-2' => [str_repeat(self::GSM0338_EXTENDED, 76), '7-bit', 1376, 9, 1377],

// long UCS-2
'long-ucs-1' => [str_repeat('simple msg plus •', 20), 'ucs-2', 340, 6, 402],
'long-ucs-2' => [str_repeat("simple msg plus \xf0\x9f\x93\xb1", 20), 'ucs-2', 360, 6, 402],
'long-ucs-exact' => [str_repeat('exact•max', 67), 'ucs-2', 603, 9, 603],

// empty
'empty' => ['', '7-bit', 0, 1, 160],
'empty messages' => ['', '7-bit', 0, 1, 160],

'test length calculates correctly when a GSM extended char straddles two messages' => [
'The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick brown f[x jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the.',
'7-bit',
307,
3,
459,
],
'test length calculates correctly when a unicode char straddles two messages' => [
str_repeat('🌐', 67),
'ucs-2',
136,
3,
201,
],
];
}

Expand Down