From b23f394f66fc5601b51af7f39109d8c19a62482e Mon Sep 17 00:00:00 2001 From: zaenalcoders Date: Thu, 4 Dec 2025 16:30:38 +0700 Subject: [PATCH] feat: add isCompressed() detection API with heuristics and tests Signed-off-by: zaenalcoders --- src/index.ts | 2 + .../__test__/__snapshots__/index.test.ts.snap | 7 ++ src/isCompressed/__test__/index.test.ts | 15 +++ .../__test__/isCompressed.test.ts | 40 ++++++++ src/isCompressed/index.ts | 99 +++++++++++++++++++ 5 files changed, 163 insertions(+) create mode 100644 src/isCompressed/__test__/__snapshots__/index.test.ts.snap create mode 100644 src/isCompressed/__test__/index.test.ts create mode 100644 src/isCompressed/__test__/isCompressed.test.ts create mode 100644 src/isCompressed/index.ts diff --git a/src/index.ts b/src/index.ts index 21e7876..90d85fc 100644 --- a/src/index.ts +++ b/src/index.ts @@ -9,6 +9,7 @@ import { _decompress } from "./_decompress"; import { compressToBase64, decompressFromBase64 } from "./base64"; import { compressToCustom, decompressFromCustom } from "./custom"; import { compressToEncodedURIComponent, decompressFromEncodedURIComponent } from "./encodedURIComponent"; +import { isCompressed } from "./isCompressed"; import { loadBinaryFile, saveBinaryFile } from "./node"; import { compress, decompress } from "./raw"; import { @@ -38,4 +39,5 @@ export default { decompressFromUTF16, loadBinaryFile, saveBinaryFile, + isCompressed }; diff --git a/src/isCompressed/__test__/__snapshots__/index.test.ts.snap b/src/isCompressed/__test__/__snapshots__/index.test.ts.snap new file mode 100644 index 0000000..d7e6a66 --- /dev/null +++ b/src/isCompressed/__test__/__snapshots__/index.test.ts.snap @@ -0,0 +1,7 @@ +// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html + +exports[`isCompressed/index.ts > was the change deliberate? 1`] = ` +{ + "isCompressed": [Function], +} +`; diff --git a/src/isCompressed/__test__/index.test.ts b/src/isCompressed/__test__/index.test.ts new file mode 100644 index 0000000..1be120b --- /dev/null +++ b/src/isCompressed/__test__/index.test.ts @@ -0,0 +1,15 @@ +/* + * SPDX-FileCopyrightText: 2013 Pieroxy + * + * SPDX-License-Identifier: MIT + */ + +import { describe, test } from "vitest"; + +import * as index from "../index"; + +describe("isCompressed/index.ts", () => { + test("was the change deliberate?", ({ expect }) => { + expect(index).toMatchSnapshot(); + }); +}); diff --git a/src/isCompressed/__test__/isCompressed.test.ts b/src/isCompressed/__test__/isCompressed.test.ts new file mode 100644 index 0000000..c521264 --- /dev/null +++ b/src/isCompressed/__test__/isCompressed.test.ts @@ -0,0 +1,40 @@ +import { describe, it, expect } from "vitest"; +import { isCompressed } from ".."; +import { compress } from "../../raw"; +import { compressToBase64 } from "../../base64"; +import { compressToEncodedURIComponent } from "../../encodedURIComponent"; +import { compressToUTF16 } from "../../UTF16"; +import { compressToUint8Array } from "../../Uint8Array"; + +describe("isCompressed()", () => { + const raw = "Hello World"; + + it("returns false for raw string", () => { + expect(isCompressed(raw)).toBe(false); + }); + + it("detects compress()", () => { + const c = compress(raw); + expect(isCompressed(c)).toBe(true); + }); + + it("detects compressToBase64()", () => { + const c = compressToBase64(raw); + expect(isCompressed(c)).toBe(true); + }); + + it("detects compressToEncodedURIComponent()", () => { + const c = compressToEncodedURIComponent(raw); + expect(isCompressed(c)).toBe(true); + }); + + it("detects compressToUTF16()", () => { + const c = compressToUTF16(raw); + expect(isCompressed(c)).toBe(true); + }); + + it("detects compressToUint8Array()", () => { + const c = compressToUint8Array(raw); + expect(isCompressed(c)).toBe(true); + }); +}); diff --git a/src/isCompressed/index.ts b/src/isCompressed/index.ts new file mode 100644 index 0000000..f7b6a47 --- /dev/null +++ b/src/isCompressed/index.ts @@ -0,0 +1,99 @@ +/** + * Determine whether the string likely represents `compress()` output. + * The UTF-16 compressed output produced by LZ-String commonly contains + * characters with a charCode value > 255. + * + * @param str - The input string to evaluate. + * @returns `true` if the string contains non-ASCII characters, otherwise `false`. + */ +function looksLikeUTF16(str: string): boolean { + for (let i = 0; i < str.length; i++) { + if (str.charCodeAt(i) > 255) return true; + } + return false; +} + +/** + * Detect whether the string is a valid candidate for Base64-based + * compression output from `compressToBase64()`. + * This uses a strict alphabet check and enforces that the length is + * divisible by 4, as required by Base64 encoding. + * + * @param str - The string to validate. + * @returns `true` if the string matches Base64 patterns, otherwise `false`. + */ +function looksLikeBase64(str: string): boolean { + return /^[A-Za-z0-9+/=]+$/.test(str) && str.length % 4 === 0; +} + +/** + * Detect whether the string matches the URI-safe alphabet used by + * `compressToEncodedURIComponent()`. + * + * @param str - The string to inspect. + * @returns `true` if it matches the URI-encoded compression alphabet. + */ +function looksLikeURIEncoded(str: string): boolean { + return /^[0-9A-Za-z\-_%!'()*]+$/.test(str); +} + +/** + * Identify strings generated by `compressToUTF16()`. + * The first character always follows a predictable range: + * `32 <= code < 32 + 32` (based on the LZ-String implementation). + * + * @param str - The string to check. + * @returns `true` if the signature matches UTF-16 compressed output. + */ +function looksLikeUTF16Special(str: string): boolean { + if (str.length === 0) return false; + const code = str.charCodeAt(0); + const base = 32; + return code >= base && code < base + 32; +} + +/** + * Determine whether a value appears to be LZ-String compressed data, + * covering all primary compression formats: + * + * - `compress()` → UTF-16 (non-ASCII characters) + * - `compressToBase64()` → Base64 signature + * - `compressToEncodedURIComponent()` → URI-safe alphabet + * - `compressToUTF16()` → Leading UTF-16 header range + * - `compressToUint8Array()` → `Uint8Array` instance + * + * This function does not perform decompression. + * Detection is heuristic-based but highly accurate against standard LZ-String behavior. + * + * @param input - The value to evaluate. May be a string or a Uint8Array. + * @returns `true` if the input appears to be in any LZ-String compressed format. + */ +export function isCompressed(input: unknown): boolean { + if (input instanceof Uint8Array) { + return true; + } + + if (typeof input !== "string") { + return false; + } + + const str = input; + + if (looksLikeUTF16(str)) { + return true; + } + + if (looksLikeUTF16Special(str)) { + return true; + } + + if (looksLikeBase64(str)) { + return true; + } + + if (looksLikeURIEncoded(str)) { + return true; + } + + return false; +}