From 0cb869782215f319e2697be717d7f88bd81fdbf8 Mon Sep 17 00:00:00 2001
From: Neok <neokdev@gmail.com>
Date: Mon, 7 Mar 2022 08:55:14 +0100
Subject: [PATCH] fix PHP Deprecated

fix PHP Deprecated:  Implicit conversion from float to int loses precision
adding floor() on lines 201, 210, 219, 224, 232
for compat PHP 8.1
---
 src/ForceUTF8/Encoding.php | 409 +++++++++++++++++++------------------
 1 file changed, 215 insertions(+), 194 deletions(-)

diff --git a/src/ForceUTF8/Encoding.php b/src/ForceUTF8/Encoding.php
index 2031592..c8d17f1 100644
--- a/src/ForceUTF8/Encoding.php
+++ b/src/ForceUTF8/Encoding.php
@@ -39,13 +39,13 @@
 
 namespace ForceUTF8;
 
-class Encoding {
+class Encoding
+{
+    public const ICONV_TRANSLIT = 'TRANSLIT';
+    public const ICONV_IGNORE = 'IGNORE';
+    public const WITHOUT_ICONV = '';
 
-  const ICONV_TRANSLIT = "TRANSLIT";
-  const ICONV_IGNORE = "IGNORE";
-  const WITHOUT_ICONV = "";
-
-  protected static $win1252ToUtf8 = array(
+    protected static array $win1252ToUtf8 = [
         128 => "\xe2\x82\xac",
 
         130 => "\xe2\x80\x9a",
@@ -62,7 +62,6 @@ class Encoding {
 
         142 => "\xc5\xbd",
 
-
         145 => "\xe2\x80\x98",
         146 => "\xe2\x80\x99",
         147 => "\xe2\x80\x9c",
@@ -77,10 +76,10 @@ class Encoding {
         156 => "\xc5\x93",
 
         158 => "\xc5\xbe",
-        159 => "\xc5\xb8"
-  );
+        159 => "\xc5\xb8",
+    ];
 
-    protected static $brokenUtf8ToUtf8 = array(
+    protected static array $brokenUtf8ToUtf8 = [
         "\xc2\x80" => "\xe2\x82\xac",
 
         "\xc2\x82" => "\xe2\x80\x9a",
@@ -97,7 +96,6 @@ class Encoding {
 
         "\xc2\x8e" => "\xc5\xbd",
 
-
         "\xc2\x91" => "\xe2\x80\x98",
         "\xc2\x92" => "\xe2\x80\x99",
         "\xc2\x93" => "\xe2\x80\x9c",
@@ -112,26 +110,25 @@ class Encoding {
         "\xc2\x9c" => "\xc5\x93",
 
         "\xc2\x9e" => "\xc5\xbe",
-        "\xc2\x9f" => "\xc5\xb8"
-  );
+        "\xc2\x9f" => "\xc5\xb8",
+    ];
 
-  protected static $utf8ToWin1252 = array(
+    protected static array $utf8ToWin1252 = [
        "\xe2\x82\xac" => "\x80",
 
        "\xe2\x80\x9a" => "\x82",
-       "\xc6\x92"     => "\x83",
+       "\xc6\x92" => "\x83",
        "\xe2\x80\x9e" => "\x84",
        "\xe2\x80\xa6" => "\x85",
        "\xe2\x80\xa0" => "\x86",
        "\xe2\x80\xa1" => "\x87",
-       "\xcb\x86"     => "\x88",
+       "\xcb\x86" => "\x88",
        "\xe2\x80\xb0" => "\x89",
-       "\xc5\xa0"     => "\x8a",
+       "\xc5\xa0" => "\x8a",
        "\xe2\x80\xb9" => "\x8b",
-       "\xc5\x92"     => "\x8c",
-
-       "\xc5\xbd"     => "\x8e",
+       "\xc5\x92" => "\x8c",
 
+       "\xc5\xbd" => "\x8e",
 
        "\xe2\x80\x98" => "\x91",
        "\xe2\x80\x99" => "\x92",
@@ -140,212 +137,236 @@ class Encoding {
        "\xe2\x80\xa2" => "\x95",
        "\xe2\x80\x93" => "\x96",
        "\xe2\x80\x94" => "\x97",
-       "\xcb\x9c"     => "\x98",
+       "\xcb\x9c" => "\x98",
        "\xe2\x84\xa2" => "\x99",
-       "\xc5\xa1"     => "\x9a",
+       "\xc5\xa1" => "\x9a",
        "\xe2\x80\xba" => "\x9b",
-       "\xc5\x93"     => "\x9c",
-
-       "\xc5\xbe"     => "\x9e",
-       "\xc5\xb8"     => "\x9f"
-    );
-
-  static function toUTF8($text){
-  /**
-   * Function \ForceUTF8\Encoding::toUTF8
-   *
-   * This function leaves UTF8 characters alone, while converting almost all non-UTF8 to UTF8.
-   *
-   * It assumes that the encoding of the original string is either Windows-1252 or ISO 8859-1.
-   *
-   * It may fail to convert characters to UTF-8 if they fall into one of these scenarios:
-   *
-   * 1) when any of these characters:   ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞß
-   *    are followed by any of these:  ("group B")
-   *                                    ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶•¸¹º»¼½¾¿
-   * For example:   %ABREPRESENT%C9%BB. «REPRESENTÉ»
-   * The "«" (%AB) character will be converted, but the "É" followed by "»" (%C9%BB)
-   * is also a valid unicode character, and will be left unchanged.
-   *
-   * 2) when any of these: àáâãäåæçèéêëìíîï  are followed by TWO chars from group B,
-   * 3) when any of these: ðñòó  are followed by THREE chars from group B.
-   *
-   * @name toUTF8
-   * @param string $text  Any string.
-   * @return string  The same string, UTF8 encoded
-   *
-   */
-
-    if(is_array($text))
+       "\xc5\x93" => "\x9c",
+
+       "\xc5\xbe" => "\x9e",
+       "\xc5\xb8" => "\x9f",
+    ];
+
+    public static function toUTF8(array|string $text): array|string
     {
-      foreach($text as $k => $v)
-      {
-        $text[$k] = self::toUTF8($v);
-      }
-      return $text;
-    }
+        /**
+         * Function \ForceUTF8\Encoding::toUTF8.
+         *
+         * This function leaves UTF8 characters alone, while converting almost all non-UTF8 to UTF8.
+         *
+         * It assumes that the encoding of the original string is either Windows-1252 or ISO 8859-1.
+         *
+         * It may fail to convert characters to UTF-8 if they fall into one of these scenarios:
+         *
+         * 1) when any of these characters:   ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞß
+         *    are followed by any of these:  ("group B")
+         *                                    ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶•¸¹º»¼½¾¿
+         * For example:   %ABREPRESENT%C9%BB. «REPRESENTÉ»
+         * The "«" (%AB) character will be converted, but the "É" followed by "»" (%C9%BB)
+         * is also a valid unicode character, and will be left unchanged.
+         *
+         * 2) when any of these: àáâãäåæçèéêëìíîï  are followed by TWO chars from group B,
+         * 3) when any of these: ðñòó  are followed by THREE chars from group B.
+         *
+         * @param string $text any string
+         *
+         * @return string The same string, UTF8 encoded
+         */
+        if (is_array($text)) {
+            foreach ($text as $k => $v) {
+                $text[$k] = self::toUTF8($v);
+            }
 
-    if(!is_string($text)) {
-      return $text;
-    }
+            return $text;
+        }
+
+        if (!is_string($text)) {
+            return $text;
+        }
 
-    $max = self::strlen($text);
-
-    $buf = "";
-    for($i = 0; $i < $max; $i++){
-        $c1 = $text[$i];
-        if($c1>="\xc0"){ //Should be converted to UTF8, if it's not UTF8 already
-          $c2 = $i+1 >= $max? "\x00" : $text[$i+1];
-          $c3 = $i+2 >= $max? "\x00" : $text[$i+2];
-          $c4 = $i+3 >= $max? "\x00" : $text[$i+3];
-            if($c1 >= "\xc0" & $c1 <= "\xdf"){ //looks like 2 bytes UTF8
-                if($c2 >= "\x80" && $c2 <= "\xbf"){ //yeah, almost sure it's UTF8 already
-                    $buf .= $c1 . $c2;
-                    $i++;
+        $max = self::strlen($text);
+
+        $buf = '';
+        for ($i = 0; $i < $max; ++$i) {
+            $c1 = $text[$i];
+            if ($c1 >= "\xc0") { //Should be converted to UTF8, if it's not UTF8 already
+                $c2 = $i + 1 >= $max ? "\x00" : $text[$i + 1];
+                $c3 = $i + 2 >= $max ? "\x00" : $text[$i + 2];
+                $c4 = $i + 3 >= $max ? "\x00" : $text[$i + 3];
+                if ($c1 >= "\xc0" & $c1 <= "\xdf") { //looks like 2 bytes UTF8
+                if ($c2 >= "\x80" && $c2 <= "\xbf") { //yeah, almost sure it's UTF8 already
+                    $buf .= $c1.$c2;
+                    ++$i;
                 } else { //not valid UTF8.  Convert it.
-                    $cc1 = (chr(ord($c1) / 64) | "\xc0");
+                    $cc1 = chr(floor(ord($c1) / 64)) | "\xc0";
                     $cc2 = ($c1 & "\x3f") | "\x80";
-                    $buf .= $cc1 . $cc2;
+                    $buf .= $cc1.$cc2;
                 }
-            } elseif($c1 >= "\xe0" & $c1 <= "\xef"){ //looks like 3 bytes UTF8
-                if($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf"){ //yeah, almost sure it's UTF8 already
-                    $buf .= $c1 . $c2 . $c3;
+                } elseif ($c1 >= "\xe0" & $c1 <= "\xef") { //looks like 3 bytes UTF8
+                if ($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf") { //yeah, almost sure it's UTF8 already
+                    $buf .= $c1.$c2.$c3;
                     $i = $i + 2;
                 } else { //not valid UTF8.  Convert it.
-                    $cc1 = (chr(ord($c1) / 64) | "\xc0");
+                    $cc1 = chr(floor(ord($c1) / 64)) | "\xc0";
                     $cc2 = ($c1 & "\x3f") | "\x80";
-                    $buf .= $cc1 . $cc2;
+                    $buf .= $cc1.$cc2;
                 }
-            } elseif($c1 >= "\xf0" & $c1 <= "\xf7"){ //looks like 4 bytes UTF8
-                if($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf" && $c4 >= "\x80" && $c4 <= "\xbf"){ //yeah, almost sure it's UTF8 already
-                    $buf .= $c1 . $c2 . $c3 . $c4;
+                } elseif ($c1 >= "\xf0" & $c1 <= "\xf7") { //looks like 4 bytes UTF8
+                if ($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf" && $c4 >= "\x80" && $c4 <= "\xbf") { //yeah, almost sure it's UTF8 already
+                    $buf .= $c1.$c2.$c3.$c4;
                     $i = $i + 3;
                 } else { //not valid UTF8.  Convert it.
-                    $cc1 = (chr(ord($c1) / 64) | "\xc0");
+                    $cc1 = chr(floor(ord($c1) / 64)) | "\xc0";
                     $cc2 = ($c1 & "\x3f") | "\x80";
-                    $buf .= $cc1 . $cc2;
+                    $buf .= $cc1.$cc2;
                 }
-            } else { //doesn't look like UTF8, but should be converted
-                    $cc1 = (chr(ord($c1) / 64) | "\xc0");
-                    $cc2 = (($c1 & "\x3f") | "\x80");
-                    $buf .= $cc1 . $cc2;
-            }
-        } elseif(($c1 & "\xc0") === "\x80"){ // needs conversion
-              if(isset(self::$win1252ToUtf8[ord($c1)])) { //found in Windows-1252 special cases
+                } else { //doesn't look like UTF8, but should be converted
+                    $cc1 = chr(floor(ord($c1) / 64)) | "\xc0";
+                    $cc2 = ($c1 & "\x3f") | "\x80";
+                    $buf .= $cc1.$cc2;
+                }
+            } elseif (($c1 & "\xc0") === "\x80") { // needs conversion
+              if (isset(self::$win1252ToUtf8[ord($c1)])) { //found in Windows-1252 special cases
                   $buf .= self::$win1252ToUtf8[ord($c1)];
               } else {
-                $cc1 = (chr(ord($c1) / 64) | "\xc0");
-                $cc2 = (($c1 & "\x3f") | "\x80");
-                $buf .= $cc1 . $cc2;
+                  $cc1 = chr(floor(ord($c1) / 64)) | "\xc0";
+                  $cc2 = ($c1 & "\x3f") | "\x80";
+                  $buf .= $cc1.$cc2;
               }
-        } else { // it doesn't need conversion
-            $buf .= $c1;
+            } else { // it doesn't need conversion
+                $buf .= $c1;
+            }
+        }
+
+        return $buf;
+    }
+
+    public static function toWin1252($text, ?string $option = self::WITHOUT_ICONV): bool|array|string
+    {
+        if (is_array($text)) {
+            foreach ($text as $k => $v) {
+                $text[$k] = self::toWin1252($v, $option);
+            }
+
+            return $text;
+        } elseif (is_string($text)) {
+
+            return static::utf8_decode($text, $option);
+        } else {
+
+            return $text;
         }
     }
-    return $buf;
-  }
-
-  static function toWin1252($text, $option = self::WITHOUT_ICONV) {
-    if(is_array($text)) {
-      foreach($text as $k => $v) {
-        $text[$k] = self::toWin1252($v, $option);
-      }
-      return $text;
-    } elseif(is_string($text)) {
-      return static::utf8_decode($text, $option);
-    } else {
-      return $text;
+
+    public static function toISO8859($text, ?string $option = self::WITHOUT_ICONV): bool|array|string
+    {
+        return self::toWin1252($text, $option);
+    }
+
+    public static function toLatin1($text, ?string $option = self::WITHOUT_ICONV): bool|array|string
+    {
+        return self::toWin1252($text, $option);
     }
-  }
-
-  static function toISO8859($text, $option = self::WITHOUT_ICONV) {
-    return self::toWin1252($text, $option);
-  }
-
-  static function toLatin1($text, $option = self::WITHOUT_ICONV) {
-    return self::toWin1252($text, $option);
-  }
-
-  static function fixUTF8($text, $option = self::WITHOUT_ICONV){
-    if(is_array($text)) {
-      foreach($text as $k => $v) {
-        $text[$k] = self::fixUTF8($v, $option);
-      }
-      return $text;
+
+    public static function fixUTF8($text, ?string $option = self::WITHOUT_ICONV): bool|array|string
+    {
+        if (is_array($text)) {
+            foreach ($text as $k => $v) {
+                $text[$k] = self::fixUTF8($v, $option);
+            }
+
+            return $text;
+        }
+
+        if (!is_string($text)) {
+
+            return $text;
+        }
+
+        $last = '';
+        while ($last != $text) {
+            $last = $text;
+            $text = self::toUTF8(static::utf8_decode($text, $option));
+        }
+
+        return self::toUTF8(static::utf8_decode($text, $option));
     }
 
-    if(!is_string($text)) {
-      return $text;
+    public static function UTF8FixWin1252Chars($text): array|string
+    {
+        // If you received an UTF-8 string that was converted from Windows-1252 as it was ISO8859-1
+        // (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
+        // See: http://en.wikipedia.org/wiki/Windows-1252
+
+        return str_replace(array_keys(self::$brokenUtf8ToUtf8), array_values(self::$brokenUtf8ToUtf8), $text);
     }
 
-    $last = "";
-    while($last <> $text){
-      $last = $text;
-      $text = self::toUTF8(static::utf8_decode($text, $option));
+    public static function removeBOM(?string $str = ''): string
+    {
+        if (substr($str, 0, 3) === pack('CCC', 0xEF, 0xBB, 0xBF)) {
+            $str = substr($str, 3);
+        }
+
+        return $str;
     }
-    $text = self::toUTF8(static::utf8_decode($text, $option));
-    return $text;
-  }
 
-  static function UTF8FixWin1252Chars($text){
-    // If you received an UTF-8 string that was converted from Windows-1252 as it was ISO8859-1
-    // (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
-    // See: http://en.wikipedia.org/wiki/Windows-1252
+    protected static function strlen(string $text): bool|int
+    {
+        return (function_exists('mb_strlen') && ((int) ini_get('mbstring.func_overload')) & 2)
+            ? mb_strlen($text, '8bit')
+            : strlen($text);
+    }
 
-    return str_replace(array_keys(self::$brokenUtf8ToUtf8), array_values(self::$brokenUtf8ToUtf8), $text);
-  }
+    public static function normalizeEncoding(string $encodingLabel): string
+    {
+        $encoding = strtoupper($encodingLabel);
+        $encoding = preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
+        $equivalences = [
+            'ISO88591' => 'ISO-8859-1',
+            'ISO8859' => 'ISO-8859-1',
+            'ISO' => 'ISO-8859-1',
+            'LATIN1' => 'ISO-8859-1',
+            'LATIN' => 'ISO-8859-1',
+            'UTF8' => 'UTF-8',
+            'UTF' => 'UTF-8',
+            'WIN1252' => 'ISO-8859-1',
+            'WINDOWS1252' => 'ISO-8859-1',
+        ];
+
+        if (empty($equivalences[$encoding])) {
+
+            return 'UTF-8';
+        }
 
-  static function removeBOM($str=""){
-    if(substr($str, 0,3) === pack("CCC",0xef,0xbb,0xbf)) {
-      $str=substr($str, 3);
+        return $equivalences[$encoding];
     }
-    return $str;
-  }
-
-  protected static function strlen($text){
-    return (function_exists('mb_strlen') && ((int) ini_get('mbstring.func_overload')) & 2) ?
-           mb_strlen($text,'8bit') : strlen($text);
-  }
-
-  public static function normalizeEncoding($encodingLabel)
-  {
-    $encoding = strtoupper($encodingLabel);
-    $encoding = preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
-    $equivalences = array(
-        'ISO88591' => 'ISO-8859-1',
-        'ISO8859'  => 'ISO-8859-1',
-        'ISO'      => 'ISO-8859-1',
-        'LATIN1'   => 'ISO-8859-1',
-        'LATIN'    => 'ISO-8859-1',
-        'UTF8'     => 'UTF-8',
-        'UTF'      => 'UTF-8',
-        'WIN1252'  => 'ISO-8859-1',
-        'WINDOWS1252' => 'ISO-8859-1'
-    );
-
-    if(empty($equivalences[$encoding])){
-      return 'UTF-8';
+
+    public static function encode(string $encodingLabel, array|string $text): bool|array|string
+    {
+        $encodingLabel = self::normalizeEncoding($encodingLabel);
+        if ('ISO-8859-1' === $encodingLabel) {
+
+            return self::toLatin1($text);
+        }
+
+        return self::toUTF8($text);
     }
 
-    return $equivalences[$encoding];
-  }
-
-  public static function encode($encodingLabel, $text)
-  {
-    $encodingLabel = self::normalizeEncoding($encodingLabel);
-    if($encodingLabel === 'ISO-8859-1') return self::toLatin1($text);
-    return self::toUTF8($text);
-  }
-
-  protected static function utf8_decode($text, $option = self::WITHOUT_ICONV)
-  {
-    if ($option == self::WITHOUT_ICONV || !function_exists('iconv')) {
-       $o = utf8_decode(
-         str_replace(array_keys(self::$utf8ToWin1252), array_values(self::$utf8ToWin1252), self::toUTF8($text))
-       );
-    } else {
-       $o = iconv("UTF-8", "Windows-1252" . ($option === self::ICONV_TRANSLIT ? '//TRANSLIT' : ($option === self::ICONV_IGNORE ? '//IGNORE' : '')), $text);
+    protected static function utf8_decode($text, ?string $option = self::WITHOUT_ICONV): bool|string
+    {
+        if (self::WITHOUT_ICONV == $option || !function_exists('iconv')) {
+            $o = utf8_decode(
+                str_replace(array_keys(self::$utf8ToWin1252), array_values(self::$utf8ToWin1252), self::toUTF8($text))
+            );
+        } else {
+            $o = iconv(
+                'UTF-8',
+                'Windows-1252'.(self::ICONV_TRANSLIT === $option ?
+                    '//TRANSLIT' : (self::ICONV_IGNORE === $option ?
+                        '//IGNORE' : '')), $text
+            );
+        }
+
+        return $o;
     }
-    return $o;
-  }
 }