From ab44a0d6dbe2eb386a768a83016a9d13d33fb442 Mon Sep 17 00:00:00 2001 From: bbopen Date: Wed, 21 Jan 2026 12:29:30 -0800 Subject: [PATCH 1/4] fix(codec): extract Arrow values as plain arrays for ndarray decoding MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix two issues in ndarray Arrow decoding: 1. **1D arrays returning raw Arrow Table**: Previously, the code only extracted values from Arrow tables when shape.length > 1, causing 1D arrays to return the raw Table object instead of the values. 2. **Typed arrays in reshaped data**: Arrow's column.toArray() returns typed arrays (Int32Array, BigInt64Array, etc.). When slicing these during reshape, they remained as typed arrays instead of plain JS arrays, causing test failures. Solution: - Add `typedArrayToPlain()` helper to convert typed arrays to plain arrays, with BigInt → Number conversion for safe integer range - Add `extractArrowValues()` helper to extract values from Arrow tables - Update ndarray decoder to always extract values (not just multi-dim) - Convert typed arrays to plain arrays before reshaping This fixes the codec-suite CI failures where tests expected plain arrays but received Arrow Tables or BigInt64Arrays. Co-Authored-By: Claude Opus 4.5 --- src/utils/codec.ts | 110 ++++++++++++++++++++++++++++++++------------- 1 file changed, 80 insertions(+), 30 deletions(-) diff --git a/src/utils/codec.ts b/src/utils/codec.ts index 8f03bce..bed5a2b 100644 --- a/src/utils/codec.ts +++ b/src/utils/codec.ts @@ -243,6 +243,63 @@ function isPromiseLike(value: unknown): value is PromiseLike { ); } +/** + * Convert a typed array (Int32Array, Float64Array, BigInt64Array, etc.) to a plain JS array. + * + * Why: Arrow's column.toArray() returns typed arrays, but we need plain arrays for + * JSON-compatible output and proper nested array reshaping. + * + * @param arr - Typed array or plain array + * @returns Plain JavaScript array with values converted (BigInt → Number where safe) + */ +function typedArrayToPlain(arr: unknown): unknown[] { + if (Array.isArray(arr)) { + return arr; + } + // Handle typed arrays (Int32Array, Float64Array, BigInt64Array, etc.) + if (ArrayBuffer.isView(arr) && 'length' in arr) { + const typedArr = arr as unknown as { length: number; [index: number]: unknown }; + const result: unknown[] = []; + for (let i = 0; i < typedArr.length; i++) { + const val = typedArr[i]; + // Convert BigInt to Number if within safe integer range + if (typeof val === 'bigint') { + if (val >= BigInt(Number.MIN_SAFE_INTEGER) && val <= BigInt(Number.MAX_SAFE_INTEGER)) { + result.push(Number(val)); + } else { + result.push(val); // Keep as BigInt if too large + } + } else { + result.push(val); + } + } + return result; + } + // Fallback: try to convert to array + return Array.from(arr as Iterable); +} + +/** + * Extract values from an Arrow table as a plain JavaScript array. + * + * Why: Arrow decoding returns Table objects, not raw arrays. We need to extract + * the column values and convert any typed arrays to plain arrays. + */ +function extractArrowValues(data: unknown): unknown[] | null { + if (Array.isArray(data)) { + return data; + } + // Arrow table - extract values from first column + const table = data as ArrowTable & { getChildAt?: (i: number) => { toArray?: () => unknown } }; + if (typeof table.getChildAt === 'function') { + const column = table.getChildAt(0); + if (column && typeof column.toArray === 'function') { + return typedArrayToPlain(column.toArray()); + } + } + return null; +} + /** * Reshape a flat array into a multi-dimensional nested array. * @@ -251,7 +308,7 @@ function isPromiseLike(value: unknown): value is PromiseLike { * binary efficiency while working with current arrow-js (which doesn't yet support * FixedShapeTensorArray). See: https://github.com/apache/arrow-js/issues/115 * - * @param flat - Flat array of values + * @param flat - Flat array of values (must be a plain array, not typed array) * @param shape - Target shape, e.g., [2, 3] for a 2x3 matrix * @returns Nested array with the specified shape */ @@ -371,37 +428,30 @@ function decodeEnvelopeCore( const bytes = fromBase64(b64); const decoded = decodeArrow(bytes); - // Reshape if multi-dimensional (Arrow only handles 1D, so we flatten on encode) - if (shape && shape.length > 1) { - if (isPromiseLike(decoded)) { - return decoded.then(data => { - if (Array.isArray(data)) { - return reshapeArray(data, shape); - } - // Arrow table - extract values and reshape - const table = data as ArrowTable & { getChildAt?: (i: number) => { toArray?: () => unknown[] } }; - if (typeof table.getChildAt === 'function') { - const column = table.getChildAt(0); - if (column && typeof column.toArray === 'function') { - return reshapeArray(column.toArray(), shape); - } - } - return data; - }); - } - if (Array.isArray(decoded)) { - return reshapeArray(decoded, shape); - } - // Arrow table - extract values and reshape - const table = decoded as ArrowTable & { getChildAt?: (i: number) => { toArray?: () => unknown[] } }; - if (typeof table.getChildAt === 'function') { - const column = table.getChildAt(0); - if (column && typeof column.toArray === 'function') { - return reshapeArray(column.toArray(), shape); + // Extract values from Arrow table and reshape if multi-dimensional + // Arrow only handles 1D arrays, so we flatten on encode and reshape here + if (isPromiseLike(decoded)) { + return decoded.then(data => { + const values = extractArrowValues(data); + if (!values) { + return data; // Fallback: return raw data if extraction fails } - } + // Reshape if multi-dimensional, otherwise return flat array + if (shape && shape.length > 1) { + return reshapeArray(values, shape); + } + return values; + }); + } + const values = extractArrowValues(decoded); + if (!values) { + return decoded; // Fallback: return raw data if extraction fails + } + // Reshape if multi-dimensional, otherwise return flat array + if (shape && shape.length > 1) { + return reshapeArray(values, shape); } - return decoded; + return values; } if (encoding === 'json') { if (!('data' in (value as object))) { From 6d0bb58f267e2ecdb2aea1b9d921fffa8b04d445 Mon Sep 17 00:00:00 2001 From: bbopen Date: Wed, 21 Jan 2026 12:33:22 -0800 Subject: [PATCH 2/4] ci: trigger codec-suite with area:codec label From 72fd3caede8de8291b48e231e2583a7052f831cb Mon Sep 17 00:00:00 2001 From: bbopen Date: Wed, 21 Jan 2026 12:37:47 -0800 Subject: [PATCH 3/4] fix: address CodeRabbit review feedback 1. Add iterable guard before Array.from fallback in typedArrayToPlain() to prevent throwing on null/undefined/non-iterables 2. Fix scalar handling: change condition from shape.length > 1 to shape.length !== 1 so that scalars (shape.length === 0) are properly extracted via reshapeArray returning flat[0] Co-Authored-By: Claude Opus 4.5 --- src/utils/codec.ts | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/src/utils/codec.ts b/src/utils/codec.ts index bed5a2b..22aae70 100644 --- a/src/utils/codec.ts +++ b/src/utils/codec.ts @@ -275,8 +275,12 @@ function typedArrayToPlain(arr: unknown): unknown[] { } return result; } - // Fallback: try to convert to array - return Array.from(arr as Iterable); + // Fallback: check if iterable before converting + if (arr != null && typeof arr === 'object' && Symbol.iterator in arr) { + return Array.from(arr as Iterable); + } + // Non-iterable: return empty array (shouldn't happen with valid Arrow data) + return []; } /** @@ -428,16 +432,18 @@ function decodeEnvelopeCore( const bytes = fromBase64(b64); const decoded = decodeArrow(bytes); - // Extract values from Arrow table and reshape if multi-dimensional + // Extract values from Arrow table and reshape if needed // Arrow only handles 1D arrays, so we flatten on encode and reshape here + // Reshape for: scalars (shape.length === 0) and multi-dim (shape.length > 1) + // Skip reshape for: 1D arrays (shape.length === 1) - return as-is if (isPromiseLike(decoded)) { return decoded.then(data => { const values = extractArrowValues(data); if (!values) { return data; // Fallback: return raw data if extraction fails } - // Reshape if multi-dimensional, otherwise return flat array - if (shape && shape.length > 1) { + // Reshape scalars and multi-dimensional arrays, but not 1D + if (shape && shape.length !== 1) { return reshapeArray(values, shape); } return values; @@ -447,8 +453,8 @@ function decodeEnvelopeCore( if (!values) { return decoded; // Fallback: return raw data if extraction fails } - // Reshape if multi-dimensional, otherwise return flat array - if (shape && shape.length > 1) { + // Reshape scalars and multi-dimensional arrays, but not 1D + if (shape && shape.length !== 1) { return reshapeArray(values, shape); } return values; From 2dd95a616f243520b6c359349d82e5f6277e3547 Mon Sep 17 00:00:00 2001 From: bbopen Date: Wed, 21 Jan 2026 12:40:04 -0800 Subject: [PATCH 4/4] fix: use strict equality check in iterable guard --- src/utils/codec.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utils/codec.ts b/src/utils/codec.ts index 22aae70..60bddfd 100644 --- a/src/utils/codec.ts +++ b/src/utils/codec.ts @@ -276,7 +276,7 @@ function typedArrayToPlain(arr: unknown): unknown[] { return result; } // Fallback: check if iterable before converting - if (arr != null && typeof arr === 'object' && Symbol.iterator in arr) { + if (arr !== null && arr !== undefined && typeof arr === 'object' && Symbol.iterator in arr) { return Array.from(arr as Iterable); } // Non-iterable: return empty array (shouldn't happen with valid Arrow data)