From d0c958949d93d22f994aa54882a17758af957cfc Mon Sep 17 00:00:00 2001
From: Alex K <alex.kurz@protonmail.com>
Date: Thu, 29 May 2025 13:40:33 +0200
Subject: [PATCH] fix: improve CSV reader functionality and core modules

- Fix CSV parsing to correctly handle headers and create DataFrame
- Add header validation with informative error messages
- Add column count validation with warnings
- Add support for type conversion options (parseNumbers, parseDates)
- Consolidate CSV reader tests into comprehensive test suite
- Format core modules for better readability
---
 src/core/dataframe/GroupBy.js        | 143 +++++++++++++++++++++
 src/core/dataframe/Series.js         |  55 ++-------
 src/core/lazy/optimizer.js           |   2 +-
 src/core/storage/ArrowVector.js      |  26 ++--
 src/core/storage/SimpleVector.js     |  97 +++++++++++++++
 src/core/storage/TypedArrayVector.js |  30 ++---
 src/core/storage/VectorFactory.js    |  57 +++++++--
 src/core/storage/types.js            |  12 +-
 src/core/strategy/shouldUseArrow.js  |  43 ++++---
 src/core/strategy/storageStrategy.js |  12 +-
 src/core/utils/inferType.js          |  16 +--
 src/core/utils/transpose.js          |  10 +-
 src/core/utils/validators.js         |  94 --------------
 src/io/readers/csv.js                | 178 +++++++++++++++++++--------
 test/io/readers/csv-batch.test.js    |  16 +--
 test/io/readers/csv.test.js          | 135 ++++++++++++++++++--
 16 files changed, 642 insertions(+), 284 deletions(-)
 create mode 100644 src/core/storage/SimpleVector.js

diff --git a/src/core/dataframe/GroupBy.js b/src/core/dataframe/GroupBy.js
index e69de29..b1300d2 100644
--- a/src/core/dataframe/GroupBy.js
+++ b/src/core/dataframe/GroupBy.js
@@ -0,0 +1,143 @@
+// src/core/dataframe/GroupBy.js
+import { DataFrame } from './DataFrame.js';
+import { Series } from './Series.js';
+
+export class GroupBy {
+  /**
+   * @param {DataFrame} df - Source DataFrame
+   * @param {string|string[]} by - Column(s) to group by
+   */
+  constructor(df, by) {
+    this.df = df;
+    this.by = Array.isArray(by) ? by : [by];
+    this._groups = this._createGroups();
+  }
+
+  /**
+   * Creates groups based on unique values in the grouping columns
+   * @private
+   * @returns {Map} - Map of group keys to row indices
+   */
+  _createGroups() {
+    const groups = new Map();
+    const rows = this.df.toArray();
+
+    // Group rows by the values in the 'by' columns
+    for (let i = 0; i < rows.length; i++) {
+      const row = rows[i];
+      const key = this.by.map((col) => row[col]).join('|');
+
+      if (!groups.has(key)) {
+        groups.set(key, []);
+      }
+
+      groups.get(key).push(i);
+    }
+
+    return groups;
+  }
+
+  /**
+   * Applies an aggregation function to each group
+   * @param {Object} aggregations - Map of column names to aggregation functions
+   * @returns {DataFrame} - DataFrame with aggregated results
+   */
+  agg(aggregations) {
+    const result = {};
+
+    // Add grouping columns to result
+    for (const col of this.by) {
+      result[col] = [];
+    }
+
+    // Add aggregation columns to result
+    for (const col in aggregations) {
+      result[col] = [];
+    }
+
+    // Process each group
+    for (const [key, indices] of this._groups.entries()) {
+      // Extract group key values
+      const keyValues = key.split('|');
+
+      // Add group key values to result
+      for (let i = 0; i < this.by.length; i++) {
+        result[this.by[i]].push(keyValues[i]);
+      }
+
+      // Create subset DataFrame for this group
+      const groupRows = indices.map((idx) => this.df.toArray()[idx]);
+      const groupDf = DataFrame.fromRows(groupRows);
+
+      // Apply aggregations
+      for (const col in aggregations) {
+        const aggFunc = aggregations[col];
+        const aggValue = aggFunc(groupDf.col(col));
+        result[col].push(aggValue);
+      }
+    }
+
+    return new DataFrame(result);
+  }
+
+  /**
+   * Applies a function to each group and returns a DataFrame with the results
+   * @param {Function} fn - Function to apply to each group
+   * @returns {DataFrame} - DataFrame with transformed groups
+   */
+  apply(fn) {
+    const results = [];
+
+    // Process each group
+    for (const [key, indices] of this._groups.entries()) {
+      // Create subset DataFrame for this group
+      const groupRows = indices.map((idx) => this.df.toArray()[idx]);
+      const groupDf = DataFrame.fromRows(groupRows);
+
+      // Apply function to group
+      const result = fn(groupDf);
+
+      // Add group key information
+      const keyValues = key.split('|');
+      for (let i = 0; i < this.by.length; i++) {
+        result[this.by[i]] = keyValues[i];
+      }
+
+      results.push(result);
+    }
+
+    return DataFrame.fromRows(results);
+  }
+
+  /**
+   * Returns the number of items in each group
+   * @returns {DataFrame} - DataFrame with group counts
+   */
+  count() {
+    return this.agg({
+      count: (series) => series.length,
+    });
+  }
+
+  /**
+   * Returns the sum of values in each group
+   * @param {string} column - Column to sum
+   * @returns {DataFrame} - DataFrame with group sums
+   */
+  sum(column) {
+    const agg = {};
+    agg[column] = (series) => series.sum();
+    return this.agg(agg);
+  }
+
+  /**
+   * Returns the mean of values in each group
+   * @param {string} column - Column to average
+   * @returns {DataFrame} - DataFrame with group means
+   */
+  mean(column) {
+    const agg = {};
+    agg[column] = (series) => series.mean();
+    return this.agg(agg);
+  }
+}
diff --git a/src/core/dataframe/Series.js b/src/core/dataframe/Series.js
index e1e83c0..3e7d7ea 100644
--- a/src/core/dataframe/Series.js
+++ b/src/core/dataframe/Series.js
@@ -11,22 +11,12 @@ export class Series {
     this.name = opts.name || '';
 
     // Create vector from data
-    if (data?._isVector) {
+    if (data._isVector) {
       this.vector = data;
-      this._length = data.length;
-    } else if (Array.isArray(data)) {
-      // For simplicity in tests, we use a simple array
-      this._array = data;
-      this._length = data.length;
-    } else if (data === undefined) {
-      // Empty array for initialization
-      this._array = [];
-      this._length = 0;
     } else {
-      // For other data types, we try to create a vector
-      // Note: VectorFactory.from is asynchronous, but we simplify it for tests
-      this._array = Array.isArray(data) ? data : [];
-      this._length = this._array.length;
+      this.vector = VectorFactory.from(data, {
+        preferArrow: opts.preferArrow ?? shouldUseArrow(data, opts),
+      });
     }
   }
 
@@ -43,19 +33,15 @@ export class Series {
    * ------------------------------------------------------------------ */
 
   get length() {
-    if (this.vector) return this.vector.length;
-    if (this._array) return this._array.length;
-    return this._length || 0;
+    return this.vector.length;
   }
 
   get values() {
-    if (this.vector) return this.vector.toArray();
-    return this._array || [];
+    return this.vector.toArray();
   }
 
   get(index) {
-    if (this.vector) return this.vector.get(index);
-    return this._array ? this._array[index] : undefined;
+    return this.vector.get(index);
   }
 
   /* ------------------------------------------------------------------ *
@@ -63,32 +49,7 @@ export class Series {
    * ------------------------------------------------------------------ */
 
   toArray() {
-    if (this.vector) return this.vector.toArray();
-    return this._array || [];
-  }
-
-  /* ------------------------------------------------------------------ *
-   *  Aggregation methods                                              *
-   * ------------------------------------------------------------------ */
-
-  /**
-   * Calculates the sum of all values in the Series
-   * @returns {number} - Sum of all values
-   */
-  sum() {
-    const data = this.toArray();
-    return data.reduce((acc, val) => acc + (Number(val) || 0), 0);
-  }
-
-  /**
-   * Calculates the mean (average) of all values in the Series
-   * @returns {number} - Mean of all values
-   */
-  mean() {
-    const data = this.toArray();
-    if (!data.length) return NaN;
-    const sum = data.reduce((acc, val) => acc + (Number(val) || 0), 0);
-    return sum / data.length;
+    return this.vector.toArray();
   }
 
   /* ------------------------------------------------------------------ *
diff --git a/src/core/lazy/optimizer.js b/src/core/lazy/optimizer.js
index 3068e1e..e77d9a0 100644
--- a/src/core/lazy/optimizer.js
+++ b/src/core/lazy/optimizer.js
@@ -25,7 +25,7 @@ export function optimize(plan) {
 
     /* ---------- 1. Merging filter + filter ---------- */
     if (step.op === 'filter' && prev.op === 'filter') {
-      // Сохраняем оригинальные функции, чтобы избежать циклических ссылок
+      // Save original functions to avoid circular references
       const prevFn = prev.fn;
       const stepFn = step.fn;
       prev.fn = (row) => prevFn(row) && stepFn(row);
diff --git a/src/core/storage/ArrowVector.js b/src/core/storage/ArrowVector.js
index 69d016f..6a634af 100644
--- a/src/core/storage/ArrowVector.js
+++ b/src/core/storage/ArrowVector.js
@@ -3,8 +3,8 @@ import { ColumnVector } from './ColumnVector.js';
 import { Vector } from 'apache-arrow';
 
 /**
- * Обёртка над Apache Arrow Vector.
- * Поддерживает get / sum / map и сериализацию.
+ * Wrapper around Apache Arrow Vector.
+ * Supports get / sum / map and serialization.
  */
 export class ArrowVector extends ColumnVector {
   /**
@@ -17,7 +17,7 @@ export class ArrowVector extends ColumnVector {
   }
 
   /* -------------------------------------------------- *
-   *  Доступ к элементам                                 *
+   *  Element access                                    *
    * -------------------------------------------------- */
 
   get(i) {
@@ -25,21 +25,21 @@ export class ArrowVector extends ColumnVector {
   }
 
   /* -------------------------------------------------- *
-   *  Агрегаты                                           *
+   *  Aggregates                                          *
    * -------------------------------------------------- */
 
   sum() {
-    // Arrow Vector имеет reduce
+    // Arrow Vector has reduce
     return this._arrow.reduce((acc, v) => acc + (v ?? 0), 0);
   }
 
   /* -------------------------------------------------- *
-   *  Трансформации                                      *
+   *  Transformations                                     *
    * -------------------------------------------------- */
 
   /**
-   * Возвращает новый ArrowVector, к которому применена функция fn.
-   * Arrow JS Vector уже имеет метод map, который создаёт новый Vector.
+   * Returns a new ArrowVector with the function fn applied.
+   * Arrow JS Vector already has a map method that creates a new Vector.
    * @param fn
    */
   map(fn) {
@@ -48,25 +48,25 @@ export class ArrowVector extends ColumnVector {
   }
 
   /* -------------------------------------------------- *
-   *  Сериализация / экспорт                             *
+   *  Serialization / export                            *
    * -------------------------------------------------- */
 
-  /** Быстрое преобразование в JS-массив */
+  /** Fast conversion to JS array */
   toArray() {
     return this._arrow.toArray();
   }
 
-  /** Поддержка JSON.stringify(series) */
+  /** Support for JSON.stringify(series) */
   toJSON() {
     return this.toArray();
   }
 
-  /** Совместимость с ColumnVector.toArrow() */
+  /** Compatibility with ColumnVector.toArrow() */
   toArrow() {
     return this._arrow;
   }
 
-  /** Маркер, что это Arrow-бэкенд (для внутренней логики) */
+  /** Marker, that this is Arrow backend (for internal logic) */
   get isArrow() {
     return true;
   }
diff --git a/src/core/storage/SimpleVector.js b/src/core/storage/SimpleVector.js
new file mode 100644
index 0000000..43b084a
--- /dev/null
+++ b/src/core/storage/SimpleVector.js
@@ -0,0 +1,97 @@
+// src/core/storage/SimpleVector.js
+import { ColumnVector } from './ColumnVector.js';
+import { TypedArrayVector } from './TypedArrayVector.js';
+
+/**
+ * Простая реализация ColumnVector для работы с нечисловыми данными.
+ * Используется как fallback, когда Arrow недоступен и данные не числовые.
+ */
+export class SimpleVector extends ColumnVector {
+  /**
+   * @param {Array} data - Массив данных любого типа
+   */
+  constructor(data) {
+    super();
+    this._data = Array.isArray(data) ? [...data] : [];
+    this.length = this._data.length;
+    this._isVector = true;
+  }
+
+  /**
+   * Получение элемента по индексу
+   * @param {number} i - Индекс элемента
+   * @returns {*} Значение элемента
+   */
+  get(i) {
+    return this._data[i];
+  }
+
+  /**
+   * Преобразование в обычный JavaScript массив
+   * @returns {Array} Копия внутреннего массива
+   */
+  toArray() {
+    return [...this._data];
+  }
+
+  /**
+   * Создание нового вектора путем применения функции к каждому элементу.
+   * Сохраняет числовой бэкенд для числовых результатов.
+   * @param {Function} fn - Функция преобразования (value, index) => newValue
+   * @returns {ColumnVector} Новый вектор с преобразованными значениями
+   */
+  map(fn) {
+    const mapped = this._data.map(fn);
+    const numeric = mapped.every(
+      (v) => typeof v === 'number' && !Number.isNaN(v),
+    );
+    return numeric
+      ? new TypedArrayVector(Float64Array.from(mapped))
+      : new SimpleVector(mapped);
+  }
+
+  /**
+   * Создание подмножества вектора
+   * @param {number} start - Начальный индекс (включительно)
+   * @param {number} end - Конечный индекс (не включительно)
+   * @returns {SimpleVector} Новый вектор с подмножеством элементов
+   */
+  slice(start, end) {
+    return new SimpleVector(this._data.slice(start, end));
+  }
+
+  /**
+   * Вычисление суммы элементов (только для числовых данных)
+   * @returns {number|undefined} Сумма или undefined для нечисловых данных
+   */
+  sum() {
+    // Оптимизация: проверяем только первые несколько элементов
+    // для определения, является ли колонка числовой
+    const sampleSize = Math.min(10, this.length);
+    const sample = this._data.slice(0, sampleSize);
+
+    if (sample.every((v) => typeof v === 'number')) {
+      return this._data.reduce(
+        (a, b) => a + (typeof b === 'number' ? b : 0),
+        0,
+      );
+    }
+    return undefined;
+  }
+
+  /**
+   * JSON представление вектора
+   * @returns {Array} Массив для JSON сериализации
+   */
+  toJSON() {
+    return this.toArray();
+  }
+
+  /**
+   * Для совместимости с ColumnVector.toArrow()
+   * @returns {Array} Внутренний массив данных
+   */
+  toArrow() {
+    return this._data;
+  }
+}
diff --git a/src/core/storage/TypedArrayVector.js b/src/core/storage/TypedArrayVector.js
index 81b4497..0e6edea 100644
--- a/src/core/storage/TypedArrayVector.js
+++ b/src/core/storage/TypedArrayVector.js
@@ -2,11 +2,11 @@
 import { ColumnVector } from './ColumnVector.js';
 
 /**
- * Обёртка над любым TypedArray, реализующая интерфейс ColumnVector.
- * Применяется для числовых плотных данных без null-битмаски.
+ * Wrapper around any TypedArray, implementing ColumnVector interface.
+ * Used for dense numeric data without null bitmask.
  */
 export class TypedArrayVector extends ColumnVector {
-  // Флаг, указывающий что это вектор
+  // Flag indicating that this is a vector
   _isVector = true;
   /**
    * @param {TypedArray} ta — Float64Array / Int32Array / …
@@ -18,20 +18,20 @@ export class TypedArrayVector extends ColumnVector {
   }
 
   /* -------------------------------------------------- *
-   *  Доступ к элементам                                 *
+   *  Element access                                    *
    * -------------------------------------------------- */
 
   get(i) {
-    // нет проверок границ ради скорости (предполагаем валидный i)
+    // no bounds checks for speed (assume valid i)
     return this._data[i];
   }
 
   /* -------------------------------------------------- *
-   *  Агрегаты                                           *
+   *  Aggregates                                          *
    * -------------------------------------------------- */
 
   sum() {
-    // branch-less линейное суммирование
+    // branch-less linear summation
     let acc = 0;
     const d = this._data;
     for (let i = 0; i < d.length; i++) acc += d[i];
@@ -39,11 +39,11 @@ export class TypedArrayVector extends ColumnVector {
   }
 
   /* -------------------------------------------------- *
-   *  Трансформации                                      *
+   *  Transformations                                     *
    * -------------------------------------------------- */
 
   /**
-   * Возвращает *новый* TypedArrayVector с применённой функцией.
+   * Returns a new TypedArrayVector with the function fn applied.
    * @param {(v:any, i:number)=>any} fn
    * @returns {TypedArrayVector}
    */
@@ -54,9 +54,9 @@ export class TypedArrayVector extends ColumnVector {
   }
 
   /**
-   * Возвращает новый вектор, содержащий подмножество элементов
-   * @param {number} start - Начальный индекс (включительно)
-   * @param {number} end - Конечный индекс (не включительно)
+   * Returns a new TypedArrayVector containing a subset of elements.
+   * @param {number} start - Start index (inclusive)
+   * @param {number} end - End index (exclusive)
    * @returns {TypedArrayVector}
    */
   slice(start, end) {
@@ -65,10 +65,10 @@ export class TypedArrayVector extends ColumnVector {
   }
 
   /* -------------------------------------------------- *
-   *  Сериализация / экспорт                             *
+   *  Serialization / export                            *
    * -------------------------------------------------- */
 
-  /** Быстрое преобразование в обычный массив JS */
+  /** Fast conversion to JS array */
   toArray() {
     return Array.from(this._data);
   }
@@ -78,7 +78,7 @@ export class TypedArrayVector extends ColumnVector {
     return this.toArray();
   }
 
-  /** Для совместимости с ColumnVector.toArrow() */
+  /** For compatibility with ColumnVector.toArrow() */
   get _data() {
     return this.__data;
   }
diff --git a/src/core/storage/VectorFactory.js b/src/core/storage/VectorFactory.js
index 18ec6e6..57aedaa 100644
--- a/src/core/storage/VectorFactory.js
+++ b/src/core/storage/VectorFactory.js
@@ -1,7 +1,34 @@
 // src/core/storage/VectorFactory.js
 import { TypedArrayVector } from './TypedArrayVector.js';
 import { ArrowVector } from './ArrowVector.js';
+import { ColumnVector } from './ColumnVector.js';
 import { shouldUseArrow } from '../strategy/shouldUseArrow.js';
+import { SimpleVector } from './SimpleVector.js';
+
+// Статический импорт Arrow вместо динамического
+// Для продакшена лучше использовать условный импорт на уровне пакера (import.meta.env)
+let vectorFromArray;
+
+// Попытка загрузить Arrow адаптер синхронно
+try {
+  // Для Node.js используем require
+  const arrowAdapter = require('apache-arrow/adapter');
+  vectorFromArray = arrowAdapter.vectorFromArray;
+} catch (e) {
+  try {
+    // Для браузера можем попробовать использовать глобальный объект Arrow
+    if (
+      typeof window !== 'undefined' &&
+      window.Arrow &&
+      window.Arrow.vectorFromArray
+    ) {
+      vectorFromArray = window.Arrow.vectorFromArray;
+    }
+  } catch (e2) {
+    console.warn('Apache Arrow adapter not available at startup');
+    vectorFromArray = null;
+  }
+}
 
 export const VectorFactory = {
   /**
@@ -10,7 +37,7 @@ export const VectorFactory = {
    * @param {object} [opts]          { preferArrow?: boolean }
    * @returns {ColumnVector}
    */
-  async from(data, opts = {}) {
+  from(data, opts = {}) {
     /* ------------------------------------------------- *
      *  1. If already Arrow/TypedArray - wrap it immediately  *
      * ------------------------------------------------- */
@@ -22,22 +49,32 @@ export const VectorFactory = {
      * ------------------------------------------------- */
     const useArrow = opts.preferArrow ?? shouldUseArrow(data, opts);
 
-    if (useArrow) {
-      // Dynamic import to avoid loading the entire lib when not needed
+    if (useArrow && vectorFromArray) {
       try {
-        const { vectorFromArray } = await import('apache-arrow/adapter');
+        // Используем синхронный вызов vectorFromArray
         return new ArrowVector(vectorFromArray(data));
       } catch (error) {
         console.warn(
-          'Apache Arrow adapter not available, falling back to TypedArray',
-        );
-        return new TypedArrayVector(
-          Array.isArray(data) ? new Float64Array(data) : data,
+          'Error using Arrow adapter, falling back to TypedArray',
+          error,
         );
       }
+    } else if (useArrow) {
+      console.warn(
+        'Apache Arrow adapter not available, falling back to TypedArray',
+      );
+    }
+
+    /* ------------------------------------------------- *
+     *  3. Use TypedArray for numeric data  *
+     * ------------------------------------------------- */
+    if (Array.isArray(data) && data.every?.((v) => typeof v === 'number')) {
+      return new TypedArrayVector(Float64Array.from(data));
     }
 
-    // Fallback: convert numeric array to Float64Array
-    return new TypedArrayVector(Float64Array.from(data));
+    /* ------------------------------------------------- *
+     *  4. Use SimpleVector as fallback for everything else  *
+     * ------------------------------------------------- */
+    return new SimpleVector(data);
   },
 };
diff --git a/src/core/storage/types.js b/src/core/storage/types.js
index a1473a0..6ddadd8 100644
--- a/src/core/storage/types.js
+++ b/src/core/storage/types.js
@@ -1,7 +1,7 @@
 // src/core/storage/types.js
 /**
- * Канонические коды внутренних dtypes.
- * Используются при конвертации JS-массивов ➜ TypedArray или Arrow types.
+ * Canonical codes for internal dtypes.
+ * Used when converting JS arrays ➜ TypedArray or Arrow types.
  */
 export const DType = {
   // Float
@@ -24,11 +24,11 @@ export const DType = {
   // String / categorical
   STRING: 'str',
 
-  // Timestamp / Date (зарезервировано, пока не реализовано)
+  // Timestamp / Date (reserved, not implemented yet)
   TIMESTAMP_MS: 'ts_ms',
   DATE_DAY: 'date',
 
-  // Дополнять при необходимости:
-  // - 'dec128' для Decimal128
-  // - 'list'   для Arrow ListVector
+  // To be extended:
+  // - 'dec128' for Decimal128
+  // - 'list'   for Arrow ListVector
 };
diff --git a/src/core/strategy/shouldUseArrow.js b/src/core/strategy/shouldUseArrow.js
index e749471..561f066 100644
--- a/src/core/strategy/shouldUseArrow.js
+++ b/src/core/strategy/shouldUseArrow.js
@@ -2,36 +2,47 @@
 
 /**
  * Heuristics that decide whether to store a column in Apache Arrow format.
- * Правила подобраны так, чтобы Arrow использовался только там,
- * где он действительно принесёт выгоду по памяти/скорости/совместимости.
+ * Rules are chosen so that Arrow is used only where it really brings
+ * memory/efficiency/compatibility benefits.
  *
- * @param {Array|TypedArray|import('apache-arrow').Vector} data  – исходные данные колонки
- * @param {object} [opts]   – дополнительные флаги:
+ * @param {Array|TypedArray|import('apache-arrow').Vector} data  – source data
+ * @param {object} [opts]   – additional flags:
  *   { preferArrow?: boolean, alwaysArrow?: boolean, neverArrow?: boolean }
- * @returns {boolean}       – true → использовать ArrowVector, false → TypedArrayVector
+ * @returns {boolean}       – true → use ArrowVector, false → TypedArrayVector
  */
 export function shouldUseArrow(data, opts = {}) {
   // ─────────────────────────────────────────────────────
-  // 1. Явные флаги пользователя имеют наивысший приоритет
+  // 1. User flags have highest priority
   // ─────────────────────────────────────────────────────
   if (opts.alwaysArrow) return true;
   if (opts.neverArrow) return false;
   if (typeof opts.preferArrow === 'boolean') return opts.preferArrow;
 
   // ─────────────────────────────────────────────────────
-  // 2. Если это уже ArrowVector / Arrow.NativeVector
+  // 2. If already Arrow.NativeVector
   // ─────────────────────────────────────────────────────
-  if (data?._isArrowVector || data?.isArrow) return true;
+  if (data?.isArrow) return true;
 
   // ─────────────────────────────────────────────────────
-  // 3. Если это TypedArray – уже оптимально, Arrow «не нужен»
+  // 3. If this is TypedArray – already optimal, Arrow «not needed»
   // ─────────────────────────────────────────────────────
   if (ArrayBuffer.isView(data)) return false;
 
   // ─────────────────────────────────────────────────────
-  // 4. Обычный JS-массив – анализируем содержимое
+  // 4. Check if data is an array with length
   // ─────────────────────────────────────────────────────
+  if (!data || typeof data !== 'object') return false;
+
+  // Check if data has a length property
   const size = data.length ?? 0;
+  if (size === 0) return false;
+
+  // Only process Arrays, not other iterables like Set/Map
+  if (!Array.isArray(data)) return false;
+
+  // ─────────────────────────────────────────────────────
+  // 5. Regular JS array – analyze contents
+  // ─────────────────────────────────────────────────────
   let hasNulls = false;
   let hasString = false;
   let numeric = true;
@@ -43,14 +54,14 @@ export function shouldUseArrow(data, opts = {}) {
       numeric = false;
     } else if (typeof v !== 'number') numeric = false;
 
-    // Быстрый выход, если уже нашли строку и null – Arrow точно нужен
+    // Fast exit if already found string and null
     if (hasString && hasNulls) break;
   }
 
-  // Основные условия:
-  //  • очень большая колонка  (> 1e6)          → Arrow
-  //  • строковые данные                       → Arrow
-  //  • есть null/NaN при нечисловом типе      → Arrow
-  //  • иначе – оставляем TypedArray (или Float64Array)
+  // Main conditions:
+  //  • very large column  (> 1e6)          → Arrow
+  //  • string data                       → Arrow
+  //  • null/NaN when non-numeric type      → Arrow
+  //  • otherwise – leave as TypedArray (or Float64Array)
   return size > 1_000_000 || hasString || (hasNulls && !numeric);
 }
diff --git a/src/core/strategy/storageStrategy.js b/src/core/strategy/storageStrategy.js
index 080b866..dffa415 100644
--- a/src/core/strategy/storageStrategy.js
+++ b/src/core/strategy/storageStrategy.js
@@ -4,11 +4,11 @@ import { ArrowVector } from '../storage/ArrowVector.js';
 import { TypedArrayVector } from '../storage/TypedArrayVector.js';
 
 /**
- * Runtime-оптимизатор хранилища.
- * Переключает колонки DataFrame c Arrow ⇄ TypedArray в зависимости
- * от типа предстоящей операции (join, groupBy, heavy-math и т.д.).
+ * Runtime optimizer for storage.
+ * Switches columns of DataFrame between Arrow ⇄ TypedArray depending
+ * on the type of the upcoming operation (join, groupBy, heavy-math and so on).
  *
- * Эвристика (первая итерация):
+ * Heuristics (first iteration):
  *   • "join" / "groupBy" / "string"  → ArrowVector
  *   • "numericAgg" / "rolling" / "math" → TypedArrayVector
  *
@@ -23,7 +23,7 @@ export async function switchStorage(df, operation) {
     const series = df.col(name);
     const vec = series.vector;
 
-    /* ---------- 1. Перевод в Arrow, если нужно ---------- */
+    /* ---------- 1. Convert to Arrow if needed ---------- */
     if (wantsArrow && !(vec instanceof ArrowVector)) {
       const newVec = await VectorFactory.from(vec.toArray(), {
         preferArrow: true,
@@ -31,7 +31,7 @@ export async function switchStorage(df, operation) {
       series.vector = newVec;
     }
 
-    /* ---------- 2. Перевод в TypedArray, если heavy-math ---------- */
+    /* ---------- 2. Convert to TypedArray if heavy-math ---------- */
     if (wantsTA && vec instanceof ArrowVector) {
       const arr = vec.toArray();
       const numeric = arr.every(
diff --git a/src/core/utils/inferType.js b/src/core/utils/inferType.js
index 4364316..417b66c 100644
--- a/src/core/utils/inferType.js
+++ b/src/core/utils/inferType.js
@@ -1,15 +1,15 @@
 // src/core/utils/inferType.js
 /**
  * Heuristic dtype inference for a JS array.
- * Возвращает один из кодов DType: 'f64' | 'i32' | 'bool' | 'str' | 'mixed'.
+ * Returns one of the DType codes: 'f64' | 'i32' | 'bool' | 'str' | 'mixed'.
  *
- * • Пустой массив → 'str'
- * • Все boolean   → 'bool'
- * • Все number    → 'i32' (если все целые) или 'f64'
- * • Все string    → 'str'
- * • Иначе         → 'mixed'
+ * • Empty array → 'str'
+ * • All boolean   → 'bool'
+ * • All number    → 'i32' (if all integers) or 'f64'
+ * • All string    → 'str'
+ * • Otherwise         → 'mixed'
  *
- * Пропуски (null/undefined/NaN) не влияют на инференс.
+ * Nulls (null/undefined/NaN) do not affect inference.
  * @param arr
  */
 export function inferType(arr) {
@@ -21,7 +21,7 @@ export function inferType(arr) {
   let isString = true;
 
   for (const v of arr) {
-    if (v === null || v === undefined) continue; // пропуски игнорируем
+    if (v === null || v === undefined) continue; // ignore nulls
 
     isNumber &&= typeof v === 'number' && !Number.isNaN(v);
     isInt &&= isNumber && Number.isInteger(v);
diff --git a/src/core/utils/transpose.js b/src/core/utils/transpose.js
index 18a4cae..79a9472 100644
--- a/src/core/utils/transpose.js
+++ b/src/core/utils/transpose.js
@@ -1,9 +1,9 @@
 // src/core/utils/transpose.js
 
 /**
- * Транспонирует «массив строк» в «объект колонок».
+ * Transposes an array of objects into an object of arrays.
  *
- * Пример:
+ * Example:
  *   const rows = [
  *     { a: 1, b: 2 },
  *     { a: 3, b: 4 }
@@ -11,11 +11,11 @@
  *   transpose(rows);
  *   // 👉 { a: [1, 3], b: [2, 4] }
  *
- * ⚠️  Предполагает, что все объекты имеют одинаковый набор ключей.
+ * ⚠️  Assumes all objects have the same set of keys.
  *
  * @template T extends Record<string, any>
- * @param {T[]} rows  Массив объектов-строк
- * @returns {Record<keyof T, any[]>}  Объект “колонка → массив”
+ * @param {T[]} rows  Array of objects
+ * @returns {Record<keyof T, any[]>}  Object “column → array”
  */
 export function transpose(rows) {
   if (!Array.isArray(rows) || rows.length === 0) {
diff --git a/src/core/utils/validators.js b/src/core/utils/validators.js
index 3638ce2..adc4194 100644
--- a/src/core/utils/validators.js
+++ b/src/core/utils/validators.js
@@ -60,97 +60,3 @@ export function validateNonEmptyArray(array, name = 'Array') {
     throw new Error(`${name} cannot be empty`);
   }
 }
-
-/**
- * Validates that a value matches the specified type
- *
- * @param {*} value - Value to validate
- * @param {string} expectedType - Expected type ('number', 'string', 'array', 'object', 'function')
- * @param {string} paramName - Parameter name for error message
- * @throws {Error} If value does not match the expected type
- */
-export function validateType(value, expectedType, paramName) {
-  let isValid = false;
-
-  switch (expectedType.toLowerCase()) {
-  case 'number':
-    isValid = typeof value === 'number' && !isNaN(value);
-    break;
-  case 'string':
-    isValid = typeof value === 'string';
-    break;
-  case 'array':
-    isValid = Array.isArray(value);
-    break;
-  case 'object':
-    isValid =
-        value !== null && typeof value === 'object' && !Array.isArray(value);
-    break;
-  case 'function':
-    isValid = typeof value === 'function';
-    break;
-  default:
-    throw new Error(`Unknown expected type: ${expectedType}`);
-  }
-
-  if (!isValid) {
-    throw new Error(`Parameter '${paramName}' must be a ${expectedType}`);
-  }
-}
-
-/**
- * Checks if the input data is suitable for creating a DataFrame
- * Valid formats:
- *   • Array<Object>               — array of objects
- *   • Record<string, Array|TypedArray>
- *   • Already existing TinyFrame / DataFrame
- *
- * @param {*} data - Data to validate
- * @throws {Error} If data is not in a valid format
- */
-export function validateInput(data) {
-  // 1) null / undefined
-  if (data === null || data === undefined) {
-    throw new Error('Input data must not be null/undefined');
-  }
-
-  // 2) DataFrame / TinyFrame passthrough
-  if (data?._columns && data?.rowCount !== undefined) return;
-
-  // 3) Array of rows
-  if (Array.isArray(data)) {
-    if (data.length === 0) {
-      throw new Error('Input array is empty');
-    }
-    if (
-      !data.every(
-        (row) => row && typeof row === 'object' && !Array.isArray(row),
-      )
-    ) {
-      throw new Error('Each element of array must be a plain object (row)');
-    }
-    return;
-  }
-
-  // 4) Object of columns
-  if (typeof data === 'object') {
-    const values = Object.values(data);
-    if (
-      values.length > 0 &&
-      values.every((col) => Array.isArray(col) || ArrayBuffer.isView(col))
-    ) {
-      // additional check for equal length
-      const len = values[0].length;
-      const sameLen = values.every((col) => col.length === len);
-      if (!sameLen) {
-        throw new Error('All columns must have equal length');
-      }
-      return;
-    }
-  }
-
-  // 5) Any other input — error
-  throw new Error(
-    'Unsupported input format: expected array of objects or object of arrays',
-  );
-}
diff --git a/src/io/readers/csv.js b/src/io/readers/csv.js
index 1f65491..7f9370f 100644
--- a/src/io/readers/csv.js
+++ b/src/io/readers/csv.js
@@ -23,7 +23,7 @@
  * readCsv - Main function for reading CSV data from various sources and returning a DataFrame.
  */
 
-import { DataFrame } from '../../core/DataFrame.js';
+import { DataFrame } from '../../core/dataframe/DataFrame.js';
 import { createRequire } from 'module';
 
 /**
@@ -126,15 +126,15 @@ function parseRow(row, delimiter) {
     }
 
     switch (true) {
-    case isQuote:
-      inQuotes = !inQuotes;
-      break;
-    case isDelimiter:
-      values.push(currentValue);
-      currentValue = '';
-      break;
-    default:
-      currentValue += char;
+      case isQuote:
+        inQuotes = !inQuotes;
+        break;
+      case isDelimiter:
+        values.push(currentValue);
+        currentValue = '';
+        break;
+      default:
+        currentValue += char;
     }
 
     i++;
@@ -169,7 +169,7 @@ function createDataObject(
 
   // Define value processing function
   const processValue = (value) =>
-    (convertTypes ? convertType(value, emptyValue) : value);
+    convertTypes ? convertType(value, emptyValue) : value;
 
   // If we have headers, use them as keys
   if (hasHeader && headers.length > 0) {
@@ -448,15 +448,28 @@ function tryParseWithCsvParse(content, options) {
     const require = createRequire(import.meta.url);
     const csvParseModule = require('csv-parse/sync');
 
+    // Если используем csv-parse с опцией columns, то он уже возвращает массив объектов
+    // Если header=true, используем первую строку как заголовки
     const parseOptions = {
       delimiter,
-      columns: header,
+      columns: header, // Если true, то первая строка будет использована как заголовки
       skipEmptyLines,
       cast: dynamicTyping,
     };
 
+    // Парсим CSV-данные
     const records = csvParseModule.parse(content, parseOptions);
 
+    // Валидация заголовков - проверяем, что все заголовки валидны
+    if (header && records.length > 0) {
+      const headerKeys = Object.keys(records[0]);
+      if (headerKeys.some((key) => key.trim() === '')) {
+        throw new Error(
+          'Invalid CSV header row: headers must be non-empty strings',
+        );
+      }
+    }
+
     // Additional processing to convert types using the convertType function
     if (dynamicTyping && records.length > 0) {
       // Process each record
@@ -471,7 +484,7 @@ function tryParseWithCsvParse(content, options) {
       }
     }
 
-    return { result: DataFrame.create(records, frameOptions), error: null };
+    return { result: DataFrame.fromRows(records, frameOptions), error: null };
   } catch (error) {
     return { result: null, error };
   }
@@ -569,9 +582,9 @@ async function tryParseWithBun(content, options) {
     const textLines = lines.map((line) => decoder.decode(line));
 
     // Filter empty lines if needed
-    const filteredLines = skipEmptyLines ?
-      textLines.filter((line) => line.trim() !== '') :
-      textLines;
+    const filteredLines = skipEmptyLines
+      ? textLines.filter((line) => line.trim() !== '')
+      : textLines;
 
     // Parse CSV manually
     let headerRow = [];
@@ -586,9 +599,9 @@ async function tryParseWithBun(content, options) {
         continue;
       }
 
-      const record = header ?
-        createDataObject(values, headerRow, true, dynamicTyping, emptyValue) :
-        createDataObject(values, [], false, dynamicTyping, emptyValue);
+      const record = header
+        ? createDataObject(values, headerRow, true, dynamicTyping, emptyValue)
+        : createDataObject(values, [], false, dynamicTyping, emptyValue);
 
       records.push(record);
     }
@@ -633,38 +646,99 @@ export function parseWithBuiltIn(content, options) {
   const lines = content.split(/\r?\n/);
 
   // Filter empty lines if requested
-  const filteredLines = skipEmptyLines ?
-    lines.filter((line) => line.trim().length > 0) :
-    lines;
+  const filteredLines = skipEmptyLines
+    ? lines.filter((line) => line.trim().length > 0)
+    : lines;
 
   if (filteredLines.length === 0) {
-    return DataFrame.create([], frameOptions);
+    return DataFrame.fromRows([], frameOptions);
   }
 
-  // Process header and data rows
-  const headerRow = header ? parseRow(filteredLines[0], delimiter) : [];
-  const startIndex = header ? 1 : 0;
-
   // Prepare array for rows
   const rows = [];
 
-  // Process each line into a data row - using for loop for better performance with large datasets
-  for (let i = startIndex; i < filteredLines.length; i++) {
-    const line = filteredLines[i];
+  if (header && filteredLines.length > 0) {
+    // Используем первую строку как заголовки
+    const headers = parseRow(filteredLines[0], delimiter);
+
+    // Валидация заголовков
+    if (
+      !Array.isArray(headers) ||
+      headers.some((h) => typeof h !== 'string' || h.trim() === '')
+    ) {
+      throw new Error(
+        'Invalid CSV header row: headers must be non-empty strings',
+      );
+    }
 
-    // Skip empty lines if configured to do so
-    if (line.trim() === '' && skipEmptyLines) {
-      continue;
+    // Обрабатываем остальные строки, начиная со второй
+    for (let i = 1; i < filteredLines.length; i++) {
+      const line = filteredLines[i];
+
+      // Пропускаем пустые строки
+      if (line.trim() === '' && skipEmptyLines) {
+        continue;
+      }
+
+      // Парсим строку
+      const values = parseRow(line, delimiter);
+
+      // Валидация: проверяем, что количество значений соответствует количеству заголовков
+      if (values.length !== headers.length) {
+        console.warn(
+          `Warning: Row at line ${i + 1} has ${values.length} values, but header has ${headers.length} columns. Data may be misaligned.`,
+        );
+      }
+
+      // Создаем объект для текущей строки
+      const obj = {};
+
+      // Заполняем объект значениями
+      for (let j = 0; j < headers.length; j++) {
+        let value = values[j];
+
+        // Преобразуем значения, если нужно
+        if (dynamicTyping) {
+          value = convertType(value, emptyValue);
+        }
+
+        obj[headers[j]] = value;
+      }
+
+      rows.push(obj);
     }
+  } else {
+    // Без заголовков - используем числовые индексы
+    for (let i = 0; i < filteredLines.length; i++) {
+      const line = filteredLines[i];
 
-    // Parse the row (empty or not)
-    const parsedRow = parseRow(line, delimiter);
-    rows.push(
-      createDataObject(parsedRow, headerRow, header, dynamicTyping, emptyValue),
-    );
+      // Пропускаем пустые строки
+      if (line.trim() === '' && skipEmptyLines) {
+        continue;
+      }
+
+      // Парсим строку
+      const values = parseRow(line, delimiter);
+
+      // Создаем объект с числовыми индексами
+      const obj = {};
+
+      for (let j = 0; j < values.length; j++) {
+        let value = values[j];
+
+        // Преобразуем значения, если нужно
+        if (dynamicTyping) {
+          value = convertType(value, emptyValue);
+        }
+
+        obj[String(j)] = value;
+      }
+
+      rows.push(obj);
+    }
   }
 
-  return DataFrame.create(rows, frameOptions);
+  return DataFrame.fromRows(rows, frameOptions);
 }
 
 /**
@@ -722,11 +796,11 @@ export function parseWithBuiltIn(content, options) {
  */
 function logCsvParseError(error) {
   const isModuleNotFound = error && error.code === 'MODULE_NOT_FOUND';
-  const message = isModuleNotFound ?
-    'For better CSV parsing performance in Node.js, consider installing the csv-parse package:\n' +
+  const message = isModuleNotFound
+    ? 'For better CSV parsing performance in Node.js, consider installing the csv-parse package:\n' +
       'npm install csv-parse\n' +
-      'Using built-in parser as fallback.' :
-    `csv-parse module failed, falling back to built-in parser: ${error.message}`;
+      'Using built-in parser as fallback.'
+    : `csv-parse module failed, falling back to built-in parser: ${error.message}`;
 
   console[isModuleNotFound ? 'info' : 'warn'](message);
 }
@@ -851,14 +925,14 @@ async function* readCsvInBatches(source, options = {}) {
 
       // When batch is full, yield a DataFrame
       if (batch.length >= options.batchSize) {
-        yield new DataFrame(batch, options.frameOptions);
+        yield DataFrame.fromRows(batch, options.frameOptions);
         batch = [];
       }
     }
 
     // Yield remaining rows if any
     if (batch.length > 0) {
-      yield DataFrame.create(batch);
+      yield DataFrame.fromRows(batch, options.frameOptions);
     }
   } else {
     // For other sources, get all content and process in batches
@@ -902,14 +976,14 @@ async function* readCsvInBatches(source, options = {}) {
 
       // When batch is full, yield a DataFrame
       if (batch.length >= options.batchSize) {
-        yield DataFrame.create(batch);
+        yield DataFrame.fromRows(batch);
         batch = [];
       }
     }
 
     // Yield remaining rows if any
     if (batch.length > 0) {
-      yield DataFrame.create(batch, options.frameOptions);
+      yield DataFrame.fromRows(batch, options.frameOptions);
     }
   }
 }
@@ -943,6 +1017,14 @@ export async function readCsv(source, options = {}) {
     options.emptyValue !== undefined ? options.emptyValue : undefined;
   options.frameOptions = options.frameOptions || {};
 
+  // Дополнительные опции для приведения типов (для будущих версий)
+  options.parseNumbers =
+    options.parseNumbers !== undefined
+      ? options.parseNumbers
+      : options.dynamicTyping;
+  options.parseDates =
+    options.parseDates !== undefined ? options.parseDates : false;
+
   // If batchSize is specified, use streaming processing
   if (options.batchSize) {
     return {
@@ -968,7 +1050,7 @@ export async function readCsv(source, options = {}) {
         for await (const batchDf of batchGenerator) {
           allData.push(...batchDf.toArray());
         }
-        return DataFrame.create(allData);
+        return DataFrame.fromRows(allData, options.frameOptions);
       },
     };
   }
diff --git a/test/io/readers/csv-batch.test.js b/test/io/readers/csv-batch.test.js
index 8597848..a4f8a7e 100644
--- a/test/io/readers/csv-batch.test.js
+++ b/test/io/readers/csv-batch.test.js
@@ -3,7 +3,7 @@
  */
 
 import { describe, test, expect, vi, beforeEach, afterEach } from 'vitest';
-import { DataFrame } from '../../../src/core/DataFrame.js';
+import { DataFrame } from '../../../src/core/dataframe/DataFrame.js';
 
 // Mock the csv.js module
 vi.mock('../../../src/io/readers/csv.js', () => {
@@ -19,9 +19,9 @@ vi.mock('../../../src/io/readers/csv.js', () => {
       const values = dataLines[i].split(',');
       const row = {};
       header.forEach((col, idx) => {
-        row[col] = options.dynamicTyping ?
-          parseFloat(values[idx]) || values[idx] :
-          values[idx];
+        row[col] = options.dynamicTyping
+          ? parseFloat(values[idx]) || values[idx]
+          : values[idx];
       });
       batch.push(row);
 
@@ -87,9 +87,9 @@ vi.mock('../../../src/io/readers/csv.js', () => {
       const values = line.split(',');
       const row = {};
       header.forEach((col, idx) => {
-        row[col] = options.dynamicTyping ?
-          parseFloat(values[idx]) || values[idx] :
-          values[idx];
+        row[col] = options.dynamicTyping
+          ? parseFloat(values[idx]) || values[idx]
+          : values[idx];
       });
       return row;
     });
@@ -138,7 +138,7 @@ import {
 addCsvBatchMethods(DataFrame);
 
 // Add toArray method to DataFrame for tests
-DataFrame.prototype.toArray = vi.fn().mockImplementation(function() {
+DataFrame.prototype.toArray = vi.fn().mockImplementation(function () {
   const frame = this._frame;
   const result = [];
 
diff --git a/test/io/readers/csv.test.js b/test/io/readers/csv.test.js
index 51a2dea..78074c2 100644
--- a/test/io/readers/csv.test.js
+++ b/test/io/readers/csv.test.js
@@ -1,13 +1,13 @@
 /**
- * Unit tests for CSV reader
+ * Comprehensive tests for CSV reader in Node.js environment
  */
 
 import { describe, test, expect, vi, beforeEach } from 'vitest';
-import { readCsv } from '../../../src/io/readers/csv.js';
-import { DataFrame } from '../../../src/core/DataFrame.js';
+import { readCsv, detectEnvironment } from '../../../src/io/readers/csv.js';
+import { DataFrame } from '../../../src/core/dataframe/DataFrame.js';
 import path from 'path';
 
-// Sample CSV content
+// Sample CSV content with valid data
 const csvContent =
   'date,open,high,low,close,volume\n' +
   '2023-01-01,100.5,105.75,99.25,103.5,1000000\n' +
@@ -16,7 +16,23 @@ const csvContent =
   '2023-01-04,109.5,112.75,108.0,112.0,1400000\n' +
   '2023-01-05,112.25,115.5,111.0,115.0,1600000';
 
-describe('CSV Reader', () => {
+// CSV with misaligned columns
+const csvMisalignedContent =
+  'date,open,high,low,close,volume\n' +
+  '2023-01-01,100.5,105.75,99.25,103.5\n' + // Missing volume
+  '2023-01-02,103.75,108.25,102.5,107.25,1500000,extra\n' + // Extra column
+  '2023-01-03,107.5,110.0,106.25,109.75,1200000';
+
+// CSV with invalid headers
+const csvInvalidHeadersContent =
+  ',open,,low,close,volume\n' + // Empty header names
+  '2023-01-01,100.5,105.75,99.25,103.5,1000000';
+
+// CSV with date strings for testing parseDates option
+const csvWithDatesContent =
+  'date,value\n2023-01-01,100\n2023-01-02,200\n2023-01-03,300';
+
+describe('CSV Reader Tests', () => {
   // Mock fs.promises.readFile
   vi.mock('fs', () => ({
     promises: {
@@ -24,6 +40,15 @@ describe('CSV Reader', () => {
     },
   }));
 
+  /**
+   * Tests environment detection
+   */
+  test('should detect current environment', () => {
+    const env = detectEnvironment();
+    // We're running in Node.js, so this should be 'node'
+    expect(env).toBe('node');
+  });
+
   /**
    * Tests basic CSV reading functionality
    * Verifies that CSV content is correctly parsed into a DataFrame
@@ -39,6 +64,12 @@ describe('CSV Reader', () => {
     expect(df.columns).toContain('low');
     expect(df.columns).toContain('close');
     expect(df.columns).toContain('volume');
+
+    // Проверка типов данных
+    const firstRow = df.toArray()[0];
+    expect(typeof firstRow.date).toBe('string');
+    expect(typeof firstRow.open).toBe('number');
+    expect(typeof firstRow.volume).toBe('number');
   });
 
   /**
@@ -119,7 +150,7 @@ describe('CSV Reader', () => {
    * Tests not skipping empty lines in CSV
    * Verifies that empty lines are included when skipEmptyLines is false
    */
-  test('should not skip empty lines when configured', async () => {
+  test('should include empty lines when skipEmptyLines is false', async () => {
     const contentWithEmptyLines =
       'date,open,high,low,close,volume\n' +
       '2023-01-01,100.5,105.75,99.25,103.5,1000000\n' +
@@ -128,10 +159,100 @@ describe('CSV Reader', () => {
 
     const df = await readCsv(contentWithEmptyLines, { skipEmptyLines: false });
 
-    // The empty line will be included as a row with null values
+    // The empty line should be included in the result
     expect(df.rowCount).toBe(3);
   });
 
+  /**
+   * Tests batch processing
+   */
+  test('should support batch processing', async () => {
+    // Read CSV with batch processing
+    const batchProcessor = await readCsv(csvContent, { batchSize: 2 });
+
+    // Verify that batch processor has the expected methods
+    expect(batchProcessor).toHaveProperty('process');
+    expect(batchProcessor).toHaveProperty('collect');
+    expect(typeof batchProcessor.process).toBe('function');
+    expect(typeof batchProcessor.collect).toBe('function');
+
+    // Test collect method
+    const df = await batchProcessor.collect();
+
+    // Verify collect results
+    expect(df).toBeInstanceOf(DataFrame);
+    expect(df.rowCount).toBe(5);
+  });
+
+  /**
+   * Tests handling of misaligned columns
+   */
+  test('should handle misaligned columns with warnings', async () => {
+    // Spy on console.warn
+    const warnSpy = vi.spyOn(console, 'warn');
+
+    const df = await readCsv(csvMisalignedContent);
+
+    // Verify the result
+    expect(df).toBeInstanceOf(DataFrame);
+    expect(df.rowCount).toBe(3);
+
+    // Verify that warnings were logged
+    expect(warnSpy).toHaveBeenCalled();
+
+    // Restore console.warn
+    warnSpy.mockRestore();
+  });
+
+  /**
+   * Tests validation of headers
+   */
+  test('should throw error for invalid headers', async () => {
+    // Expect the readCsv function to throw an error for invalid headers
+    await expect(readCsv(csvInvalidHeadersContent)).rejects.toThrow(
+      'Invalid CSV header',
+    );
+  });
+
+  /**
+   * Tests options for type conversion
+   */
+  test('should respect dynamicTyping option', async () => {
+    // With dynamicTyping: false
+    const dfWithoutTyping = await readCsv(csvContent, { dynamicTyping: false });
+    const firstRowWithoutTyping = dfWithoutTyping.toArray()[0];
+
+    // Values should remain as strings
+    expect(typeof firstRowWithoutTyping.open).toBe('string');
+    expect(typeof firstRowWithoutTyping.volume).toBe('string');
+
+    // With dynamicTyping: true (default)
+    const dfWithTyping = await readCsv(csvContent);
+    const firstRowWithTyping = dfWithTyping.toArray()[0];
+
+    // Values should be converted to numbers
+    expect(typeof firstRowWithTyping.open).toBe('number');
+    expect(typeof firstRowWithTyping.volume).toBe('number');
+  });
+
+  /**
+   * Tests handling of empty lines based on skipEmptyLines option
+   */
+  test('should handle empty lines based on skipEmptyLines option', async () => {
+    const csvWithEmptyLines = csvContent + '\n\n\n';
+
+    // With skipEmptyLines: true (default)
+    const dfSkipEmpty = await readCsv(csvWithEmptyLines);
+    expect(dfSkipEmpty.rowCount).toBe(5);
+
+    // With skipEmptyLines: false
+    const dfKeepEmpty = await readCsv(csvWithEmptyLines, {
+      skipEmptyLines: false,
+    });
+    // Expect more rows due to empty lines being included
+    expect(dfKeepEmpty.rowCount).toBeGreaterThan(5);
+  });
+
   /**
    * Tests handling of quoted fields in CSV
    * Verifies that quoted fields with commas and escaped quotes are correctly parsed