diff --git a/docs/io-module.md b/docs/io-module.md
new file mode 100644
index 0000000..067a4e3
--- /dev/null
+++ b/docs/io-module.md
@@ -0,0 +1,360 @@
+# IO Module Documentation
+
+## Обзор
+
+IO модуль TinyFrameJS предоставляет инструменты для чтения, преобразования и записи данных из различных источников. Модуль включает в себя:
+
+- **Readers** - функции для чтения данных из различных источников (CSV, JSON, Excel и т.д.)
+- **Stream Readers** - функции для потоковой обработки больших файлов
+- **API Client** - клиент для работы с REST API с поддержкой кеширования, троттлинга и ротации ключей
+- **Schema Registry** - реестр схем для автоматического преобразования данных из различных API
+- **Transformers** - функции для преобразования данных между различными форматами
+- **Pipeline** - конвейер для последовательной обработки данных
+
+## Readers
+
+### Базовые ридеры
+
+```javascript
+import { readCsv, readJson, readExcel, readTsv, readSql } from 'tinyframejs/io';
+
+// Чтение CSV файла
+const df = await readCsv('data.csv');
+
+// Чтение JSON файла
+const df = await readJson('data.json');
+
+// Чтение Excel файла
+const df = await readExcel('data.xlsx', { sheet: 'Sheet1' });
+
+// Чтение TSV файла
+const df = await readTsv('data.tsv');
+
+// Чтение SQL запроса
+const df = await readSql('SELECT * FROM table', connection);
+```
+
+### Потоковые ридеры
+
+Для обработки больших файлов без загрузки их полностью в память:
+
+```javascript
+import { readCSVStream, readJSONLStream } from 'tinyframejs/io';
+
+// Потоковое чтение CSV файла
+await readCSVStream('large-data.csv', {
+ batchSize: 1000,
+ onBatch: async (batch) => {
+ // Обработка каждой партии данных
+ console.log(`Обработано ${batch.rowCount} строк`);
+
+ // Можно вернуть результат обработки
+ return batch.sum('value');
+ }
+});
+
+// Потоковое чтение JSONL файла
+await readJSONLStream('large-data.jsonl', {
+ batchSize: 500,
+ onBatch: async (batch) => {
+ // Обработка каждой партии данных
+ await processData(batch);
+ }
+});
+```
+
+## API Client
+
+API клиент предоставляет унифицированный интерфейс для работы с REST API, включая кеширование, троттлинг и ротацию ключей.
+
+```javascript
+import { ApiClient, createApiClient } from 'tinyframejs/io';
+
+// Создание клиента
+const client = createApiClient({
+ baseUrl: 'https://api.example.com',
+ defaultHeaders: {
+ 'Content-Type': 'application/json',
+ 'Accept': 'application/json'
+ },
+ // Настройки аутентификации
+ auth: {
+ keys: [
+ { id: 'key1', key: 'api-key-1' },
+ { id: 'key2', key: 'api-key-2' }
+ ],
+ authType: 'bearer' // 'bearer', 'basic', 'header', 'query'
+ },
+ // Настройки кеширования
+ cache: {
+ ttl: 3600000, // 1 час
+ maxSize: 100 // максимальное количество элементов в кеше
+ },
+ // Настройки троттлинга
+ throttle: {
+ requestsPerSecond: 5,
+ requestsPerMinute: 100
+ },
+ // Настройки повторных попыток
+ retry: {
+ retries: 3,
+ retryDelay: 1000,
+ retryOn: [429, 503]
+ }
+});
+
+// Выполнение запросов
+const data = await client.fetchJson('/endpoint');
+
+// Выполнение запроса с преобразованием в DataFrame
+const df = await client.fetchDataFrame('/endpoint');
+
+// Выполнение запроса с применением схемы
+const data = await client.fetchJson('/endpoint', {}, 'binanceOHLCV');
+
+// Выполнение запроса с получением CSV данных
+const df = await client.fetchCsv('/endpoint.csv');
+```
+
+## Schema Registry
+
+Реестр схем позволяет автоматически преобразовывать данные из различных API к стандартному формату.
+
+```javascript
+import {
+ getSchema,
+ registerSchema,
+ applySchema,
+ binanceOHLCV,
+ alphaVantageDaily
+} from 'tinyframejs/io';
+
+// Получение схемы по имени
+const schema = getSchema('binanceOHLCV');
+
+// Регистрация новой схемы
+registerSchema('myApiSchema', {
+ timestamp: 'time',
+ value: {
+ path: 'data.value',
+ transform: (value) => parseFloat(value)
+ },
+ name: (obj) => `${obj.type}-${obj.id}`
+});
+
+// Применение схемы к данным
+const data = await client.fetchJson('/endpoint');
+const transformed = applySchema(data, 'myApiSchema');
+
+// Применение встроенной схемы
+const binanceData = await client.fetchJson('/binance/klines');
+const standardized = applySchema(binanceData, binanceOHLCV);
+```
+
+## Pipeline
+
+Конвейер позволяет создавать цепочки обработки данных для ETL процессов.
+
+```javascript
+import {
+ createPipeline,
+ filter,
+ map,
+ sort,
+ limit,
+ toDataFrame,
+ log
+} from 'tinyframejs/io';
+import { readCsv } from 'tinyframejs/io';
+
+// Создание конвейера
+const pipeline = createPipeline(
+ // Ридер
+ () => readCsv('data.csv'),
+ // Трансформеры
+ [
+ filter(row => row.value > 0),
+ map(row => ({ ...row, value: row.value * 2 })),
+ sort('timestamp'),
+ limit(1000),
+ log('Processed data:'),
+ toDataFrame()
+ ]
+);
+
+// Выполнение конвейера
+const result = await pipeline();
+```
+
+## Batch Processing
+
+Для обработки данных партиями:
+
+```javascript
+import { batchProcess } from 'tinyframejs/io';
+import { readCSVStream } from 'tinyframejs/io';
+
+// Обработка данных партиями
+const results = await batchProcess(
+ // Ридер
+ (options) => readCSVStream('large-data.csv', options),
+ // Обработчик партии
+ async (batch) => {
+ // Обработка партии данных
+ return batch.sum('value');
+ },
+ // Опции
+ {
+ batchSize: 1000,
+ onProgress: ({ processedCount, batchCount }) => {
+ console.log(`Processed ${processedCount} rows in ${batchCount} batches`);
+ }
+ }
+);
+
+// Результаты содержат массив результатов обработки каждой партии
+console.log(`Total sum: ${results.reduce((sum, val) => sum + val, 0)}`);
+```
+
+## Middleware Hooks
+
+Хуки (middleware) позволяют расширять функциональность API клиента.
+
+### Logger Hook
+
+```javascript
+import { createLoggerHook } from 'tinyframejs/io';
+
+const loggerHook = createLoggerHook({
+ logRequest: true,
+ logResponse: true,
+ logErrors: true,
+ logTiming: true,
+ logger: console.log
+});
+
+client.addHook(loggerHook);
+```
+
+### Cache Hook
+
+```javascript
+import { createCacheHook, MemoryCache } from 'tinyframejs/io';
+
+const cache = new MemoryCache({
+ ttl: 3600000, // 1 час
+ maxSize: 100
+});
+
+const cacheHook = createCacheHook({
+ cache,
+ ttl: 3600000,
+ keyGenerator: (request) => `${request.method}:${request.url}`,
+ shouldCache: (request) => request.method === 'GET'
+});
+
+client.addHook(cacheHook);
+```
+
+### Throttle Hook
+
+```javascript
+import { createThrottleHook } from 'tinyframejs/io';
+
+const throttleHook = createThrottleHook({
+ requestsPerSecond: 5,
+ requestsPerMinute: 100,
+ requestsPerHour: 1000,
+ groupByDomain: true,
+ onThrottle: (waitTime) => console.log(`Request throttled. Waiting ${waitTime}ms`)
+});
+
+client.addHook(throttleHook);
+```
+
+### Auth Hook
+
+```javascript
+import { createAuthHook, KeyRotator } from 'tinyframejs/io';
+
+const authHook = createAuthHook({
+ keys: [
+ { id: 'key1', key: 'api-key-1' },
+ { id: 'key2', key: 'api-key-2' }
+ ],
+ authType: 'bearer', // 'bearer', 'basic', 'header', 'query'
+ headerName: 'Authorization',
+ queryParam: 'api_key',
+ maxErrorsBeforeDisable: 3,
+ resetErrorsAfter: 3600000, // 1 час
+ rotationStrategy: 'round-robin' // 'round-robin', 'least-used', 'random'
+});
+
+client.addHook(authHook);
+```
+
+## Примеры использования
+
+### Загрузка и обработка данных о ценах криптовалют
+
+```javascript
+import { createApiClient, applySchema, binanceOHLCV } from 'tinyframejs/io';
+
+async function getBitcoinPrices() {
+ const client = createApiClient({
+ baseUrl: 'https://api.binance.com',
+ cache: { ttl: 300000 }, // 5 минут
+ throttle: { requestsPerMinute: 60 }
+ });
+
+ // Получение данных
+ const data = await client.fetchJson('/api/v3/klines?symbol=BTCUSDT&interval=1d&limit=30');
+
+ // Применение схемы
+ const standardized = applySchema(data, binanceOHLCV);
+
+ // Преобразование в DataFrame
+ return DataFrame.fromRows(standardized);
+}
+
+// Использование
+const btcPrices = await getBitcoinPrices();
+btcPrices.plot('line', { x: 'timestamp', y: 'close' });
+```
+
+### Потоковая обработка большого CSV файла
+
+```javascript
+import { readCSVStream, batchProcess } from 'tinyframejs/io';
+
+async function processLargeCSV(filePath) {
+ let total = 0;
+ let count = 0;
+
+ await batchProcess(
+ (options) => readCSVStream(filePath, options),
+ async (batch) => {
+ // Вычисление среднего значения для каждой партии
+ const batchSum = batch.sum('value');
+ const batchCount = batch.rowCount;
+
+ total += batchSum;
+ count += batchCount;
+
+ return { batchSum, batchCount };
+ },
+ {
+ batchSize: 10000,
+ onProgress: ({ processedCount }) => {
+ console.log(`Processed ${processedCount} rows`);
+ }
+ }
+ );
+
+ return total / count; // Среднее значение по всему файлу
+}
+
+// Использование
+const average = await processLargeCSV('very-large-file.csv');
+console.log(`Average value: ${average}`);
+```
diff --git a/src/display/web/html.js b/src/display/web/html.js
index 6b8655c..6abc021 100644
--- a/src/display/web/html.js
+++ b/src/display/web/html.js
@@ -82,9 +82,9 @@ export function toHTML(frame, options = {}) {
if (rowIdx === -1) {
// This is the ellipsis row
const remainingRows = rowCount - maxRows * 2;
- const colSpan = showIndex
- ? visibleColumns.length + 1
- : visibleColumns.length;
+ const colSpan = showIndex ?
+ visibleColumns.length + 1 :
+ visibleColumns.length;
rowsHtml += `
| ... ${remainingRows} more rows ... |
`;
skipNextRow = true;
} else if (!skipNextRow) {
@@ -324,9 +324,9 @@ function getThemeStyles(theme) {
// Theme-specific styles
switch (theme) {
- case 'dark':
- return (
- baseStyles +
+ case 'dark':
+ return (
+ baseStyles +
`
.tinyframe-table.theme-dark {
background-color: #222;
@@ -352,10 +352,10 @@ function getThemeStyles(theme) {
color: #e88c6c;
}
`
- );
- case 'minimal':
- return (
- baseStyles +
+ );
+ case 'minimal':
+ return (
+ baseStyles +
`
.tinyframe-table.theme-minimal {
border: none;
@@ -370,10 +370,10 @@ function getThemeStyles(theme) {
background-color: #f9f9f9;
}
`
- );
- default: // 'default' theme
- return (
- baseStyles +
+ );
+ default: // 'default' theme
+ return (
+ baseStyles +
`
.tinyframe-table.theme-default {
border: 1px solid #ddd;
@@ -395,6 +395,6 @@ function getThemeStyles(theme) {
color: #cc6600;
}
`
- );
+ );
}
}
diff --git a/src/display/web/jupyter.js b/src/display/web/jupyter.js
index 4354991..ce8c893 100644
--- a/src/display/web/jupyter.js
+++ b/src/display/web/jupyter.js
@@ -64,7 +64,7 @@ export function registerJupyterDisplay(DataFrame) {
// Add repr_html method to DataFrame for Jupyter display
// Using non-camelCase name because this is a Jupyter-specific convention
// eslint-disable-next-line camelcase
- DataFrame.prototype._repr_html_ = function () {
+ DataFrame.prototype._repr_html_ = function() {
// Import the toHTML function from html.js
const { toHTML } = require('./html.js');
@@ -81,7 +81,7 @@ export function registerJupyterDisplay(DataFrame) {
// Add repr_mimebundle method for more control over display
// Using non-camelCase name because this is a Jupyter-specific convention
// eslint-disable-next-line camelcase
- DataFrame.prototype._repr_mimebundle_ = function (include, exclude) {
+ DataFrame.prototype._repr_mimebundle_ = function(include, exclude) {
// Convert DataFrame to TinyFrame format
const frame = {
columns: this._columns,
diff --git a/src/index.js b/src/index.js
index 85673aa..6b721f2 100644
--- a/src/index.js
+++ b/src/index.js
@@ -7,12 +7,14 @@
// Export core components
export { DataFrame } from './core/dataframe/DataFrame.js';
+export { Series } from './core/dataframe/Series.js';
export { createFrame, cloneFrame } from './core/createFrame.js';
export * from './core/types.js';
export * from './core/utils/validators.js';
-// Initialize automatic extension of DataFrame methods
+// Initialize automatic extension of DataFrame and Series methods
import './methods/autoExtend.js';
+import './methods/index.js';
// Export IO functions
export * from './io/index.js';
diff --git a/src/io/hooks/auth.js b/src/io/hooks/auth.js
new file mode 100644
index 0000000..908bb89
--- /dev/null
+++ b/src/io/hooks/auth.js
@@ -0,0 +1,280 @@
+/**
+ * Authentication hook for API requests
+ * Provides authentication rotation and management for API keys
+ */
+
+/**
+ * Key rotation strategy implementation
+ */
+class KeyRotator {
+ /**
+ * Create a new key rotator
+ *
+ * @param {Object[]} keys - Array of API keys with their limits
+ * @param {Object} options - Rotator options
+ */
+ constructor(keys = [], options = {}) {
+ this.keys = keys.map((key) => ({
+ ...key,
+ usageCount: 0,
+ lastUsed: 0,
+ errors: 0,
+ disabled: false,
+ }));
+
+ this.options = {
+ maxErrorsBeforeDisable: options.maxErrorsBeforeDisable || 3,
+ resetErrorsAfter: options.resetErrorsAfter || 3600000, // 1 hour
+ rotationStrategy: options.rotationStrategy || 'round-robin', // 'round-robin', 'least-used', 'random'
+ ...options,
+ };
+
+ this.currentKeyIndex = 0;
+ }
+
+ /**
+ * Get the next available API key
+ *
+ * @returns {Object|null} - Next available API key or null if none available
+ */
+ getNextKey() {
+ if (this.keys.length === 0) {
+ return null;
+ }
+
+ // Filter out disabled keys
+ const availableKeys = this.keys.filter((key) => !key.disabled);
+
+ if (availableKeys.length === 0) {
+ return null;
+ }
+
+ let selectedKey;
+
+ switch (this.options.rotationStrategy) {
+ case 'round-robin':
+ // Move to the next key in the list
+ this.currentKeyIndex =
+ (this.currentKeyIndex + 1) % availableKeys.length;
+ selectedKey = availableKeys[this.currentKeyIndex];
+ break;
+
+ case 'least-used':
+ // Use the key with the least usage count
+ selectedKey = availableKeys.reduce(
+ (least, current) =>
+ (current.usageCount < least.usageCount ? current : least),
+ availableKeys[0],
+ );
+ break;
+
+ case 'random':
+ // Select a random key
+ selectedKey =
+ availableKeys[Math.floor(Math.random() * availableKeys.length)];
+ break;
+
+ default:
+ // Default to round-robin
+ this.currentKeyIndex =
+ (this.currentKeyIndex + 1) % availableKeys.length;
+ selectedKey = availableKeys[this.currentKeyIndex];
+ }
+
+ // Update key usage
+ selectedKey.usageCount++;
+ selectedKey.lastUsed = Date.now();
+
+ return selectedKey;
+ }
+
+ /**
+ * Record a successful request for a key
+ *
+ * @param {string} keyId - ID of the key
+ */
+ recordSuccess(keyId) {
+ const key = this.keys.find((k) => k.id === keyId);
+
+ if (key) {
+ // Reset errors after successful request
+ key.errors = 0;
+ }
+ }
+
+ /**
+ * Record an error for a key
+ *
+ * @param {string} keyId - ID of the key
+ * @param {Object} error - Error object
+ */
+ recordError(keyId, error) {
+ const key = this.keys.find((k) => k.id === keyId);
+
+ if (key) {
+ key.errors++;
+
+ // Disable key if too many errors
+ if (key.errors >= this.options.maxErrorsBeforeDisable) {
+ key.disabled = true;
+
+ // Schedule key re-enabling
+ setTimeout(() => {
+ key.disabled = false;
+ key.errors = 0;
+ }, this.options.resetErrorsAfter);
+ }
+ }
+ }
+
+ /**
+ * Add a new API key
+ *
+ * @param {Object} key - API key object
+ */
+ addKey(key) {
+ this.keys.push({
+ ...key,
+ usageCount: 0,
+ lastUsed: 0,
+ errors: 0,
+ disabled: false,
+ });
+ }
+
+ /**
+ * Remove an API key
+ *
+ * @param {string} keyId - ID of the key to remove
+ */
+ removeKey(keyId) {
+ this.keys = this.keys.filter((key) => key.id !== keyId);
+
+ // Reset current index if needed
+ if (this.currentKeyIndex >= this.keys.length) {
+ this.currentKeyIndex = 0;
+ }
+ }
+
+ /**
+ * Get all API keys
+ *
+ * @returns {Object[]} - Array of API keys
+ */
+ getAllKeys() {
+ return this.keys.map((key) => ({
+ ...key,
+ // Don't expose the actual key value
+ key: key.key ? '***' : undefined,
+ }));
+ }
+}
+
+/**
+ * Creates an authentication hook for API requests
+ *
+ * @param {Object} options - Authentication options
+ * @param {Object[]} [options.keys] - Array of API keys
+ * @param {string} [options.authType='bearer'] - Authentication type (bearer, basic, header, query)
+ * @param {string} [options.headerName='Authorization'] - Header name for authentication
+ * @param {string} [options.queryParam='api_key'] - Query parameter name for authentication
+ * @param {Function} [options.authFormatter] - Function to format authentication value
+ * @param {Function} [options.isAuthError] - Function to determine if an error is an authentication error
+ * @returns {Function} - Authentication hook function
+ */
+export function createAuthHook(options = {}) {
+ const {
+ keys = [],
+ authType = 'bearer',
+ headerName = 'Authorization',
+ queryParam = 'api_key',
+ authFormatter,
+ isAuthError = (error) => error.status === 401 || error.status === 403,
+ } = options;
+
+ // Create key rotator
+ const keyRotator = new KeyRotator(keys, options);
+
+ // Format authentication value based on type
+ const formatAuth = (key) => {
+ if (authFormatter) {
+ return authFormatter(key);
+ }
+
+ switch (authType.toLowerCase()) {
+ case 'bearer':
+ return `Bearer ${key}`;
+ case 'basic':
+ return `Basic ${key}`;
+ default:
+ return key;
+ }
+ };
+
+ return async (context, next) => {
+ const { request } = context;
+
+ // Get the next available key
+ const keyObj = keyRotator.getNextKey();
+
+ if (!keyObj) {
+ throw new Error('No API keys available');
+ }
+
+ const { id, key } = keyObj;
+
+ // Apply authentication based on type
+ switch (authType.toLowerCase()) {
+ case 'bearer':
+ case 'basic':
+ case 'header':
+ // Add authentication header
+ request.headers = {
+ ...request.headers,
+ [headerName]: formatAuth(key),
+ };
+ break;
+
+ case 'query':
+ // Add authentication query parameter
+ const url = new URL(request.url);
+ url.searchParams.set(queryParam, key);
+ request.url = url.toString();
+ break;
+ }
+
+ try {
+ // Execute the next middleware or the actual request
+ const response = await next(context);
+
+ // Record successful request
+ keyRotator.recordSuccess(id);
+
+ return response;
+ } catch (error) {
+ // Check if it's an authentication error
+ if (isAuthError(error)) {
+ // Record authentication error
+ keyRotator.recordError(id, error);
+ }
+
+ throw error;
+ }
+ };
+}
+
+/**
+ * Creates a key rotation manager
+ *
+ * @param {Object[]} keys - Array of API keys
+ * @param {Object} options - Rotation options
+ * @returns {KeyRotator} - Key rotator instance
+ */
+export function createKeyRotator(keys = [], options = {}) {
+ return new KeyRotator(keys, options);
+}
+
+/**
+ * Export the KeyRotator class for direct usage
+ */
+export { KeyRotator };
diff --git a/src/io/hooks/cache.js b/src/io/hooks/cache.js
new file mode 100644
index 0000000..c1bc300
--- /dev/null
+++ b/src/io/hooks/cache.js
@@ -0,0 +1,202 @@
+/**
+ * Cache hook for API requests
+ * Provides caching functionality to avoid redundant API calls
+ */
+
+/**
+ * Simple in-memory cache implementation
+ */
+class MemoryCache {
+ constructor(options = {}) {
+ this.cache = new Map();
+ this.ttl = options.ttl || 3600000; // Default TTL: 1 hour
+ this.maxSize = options.maxSize || 100; // Default max items: 100
+ }
+
+ /**
+ * Set a value in the cache
+ *
+ * @param {string} key - Cache key
+ * @param {*} value - Value to cache
+ * @param {number} [ttl] - Time to live in milliseconds
+ */
+ set(key, value, ttl = this.ttl) {
+ // Implement LRU eviction if cache is full
+ if (this.cache.size >= this.maxSize && !this.cache.has(key)) {
+ const oldestKey = this.cache.keys().next().value;
+ this.cache.delete(oldestKey);
+ }
+
+ this.cache.set(key, {
+ value,
+ expires: Date.now() + ttl,
+ });
+ }
+
+ /**
+ * Get a value from the cache
+ *
+ * @param {string} key - Cache key
+ * @returns {*|null} - Cached value or null if not found
+ */
+ get(key) {
+ const item = this.cache.get(key);
+
+ if (!item) {
+ return null;
+ }
+
+ // Check if the item has expired
+ if (item.expires < Date.now()) {
+ this.cache.delete(key);
+ return null;
+ }
+
+ // Move the item to the end of the Map to implement LRU
+ this.cache.delete(key);
+ this.cache.set(key, item);
+
+ return item.value;
+ }
+
+ /**
+ * Check if a key exists in the cache
+ *
+ * @param {string} key - Cache key
+ * @returns {boolean} - Whether the key exists
+ */
+ has(key) {
+ const item = this.cache.get(key);
+
+ if (!item) {
+ return false;
+ }
+
+ // Check if the item has expired
+ if (item.expires < Date.now()) {
+ this.cache.delete(key);
+ return false;
+ }
+
+ return true;
+ }
+
+ /**
+ * Delete a value from the cache
+ *
+ * @param {string} key - Cache key
+ */
+ delete(key) {
+ this.cache.delete(key);
+ }
+
+ /**
+ * Clear the cache
+ */
+ clear() {
+ this.cache.clear();
+ }
+}
+
+/**
+ * Creates a cache key from request details
+ *
+ * @param {Object} request - Request object
+ * @returns {string} - Cache key
+ */
+function createCacheKey(request) {
+ const { url, method = 'GET', headers = {}, body } = request;
+
+ // Create a string representation of the request
+ const parts = [method.toUpperCase(), url];
+
+ // Add headers that might affect the response
+ const cacheableHeaders = ['accept', 'content-type'];
+ const headerStr = cacheableHeaders
+ .filter((key) => headers[key])
+ .map((key) => `${key}:${headers[key]}`)
+ .join(',');
+
+ if (headerStr) {
+ parts.push(headerStr);
+ }
+
+ // Add body if present
+ if (body) {
+ parts.push(typeof body === 'string' ? body : JSON.stringify(body));
+ }
+
+ return parts.join('|');
+}
+
+/**
+ * Creates a cache hook for API requests
+ *
+ * @param {Object} options - Cache options
+ * @param {Object} [options.cache] - Cache implementation (must have get, set, has methods)
+ * @param {number} [options.ttl] - Time to live in milliseconds
+ * @param {Function} [options.keyGenerator] - Function to generate cache keys
+ * @param {Function} [options.shouldCache] - Function to determine if a request should be cached
+ * @returns {Function} - Cache hook function
+ */
+export function createCacheHook(options = {}) {
+ const {
+ cache = new MemoryCache(options),
+ ttl = 3600000, // Default TTL: 1 hour
+ keyGenerator = createCacheKey,
+ shouldCache = (request) =>
+ request.method === 'GET' || request.method === undefined,
+ } = options;
+
+ return async (context, next) => {
+ const { request } = context;
+
+ // Skip caching for non-GET requests by default
+ if (!shouldCache(request)) {
+ return next(context);
+ }
+
+ // Generate cache key
+ const cacheKey = keyGenerator(request);
+
+ // Check if response is in cache
+ if (cache.has(cacheKey)) {
+ const cachedResponse = cache.get(cacheKey);
+
+ // Add cache hit information
+ cachedResponse.headers = {
+ ...cachedResponse.headers,
+ 'x-cache': 'HIT',
+ };
+
+ return cachedResponse;
+ }
+
+ // Execute the next middleware or the actual request
+ const response = await next(context);
+
+ // Cache the response
+ if (response.ok) {
+ // Clone the response to cache it
+ const clonedResponse = {
+ ...response,
+ headers: { ...response.headers, 'x-cache': 'MISS' },
+ };
+
+ // Store in cache
+ cache.set(cacheKey, clonedResponse, ttl);
+ }
+
+ return response;
+ };
+}
+
+/**
+ * Default cache hook with standard configuration
+ */
+export const cacheHook = createCacheHook();
+
+/**
+ * Export the MemoryCache class for direct usage
+ */
+export { MemoryCache };
diff --git a/src/io/hooks/cache/fs.js b/src/io/hooks/cache/fs.js
new file mode 100644
index 0000000..2137e21
--- /dev/null
+++ b/src/io/hooks/cache/fs.js
@@ -0,0 +1,219 @@
+/**
+ * File system cache backend for API requests
+ * Provides persistent caching using the file system
+ */
+
+import { isNodeJs } from '../../utils/environment.js';
+
+/**
+ * File system cache implementation
+ */
+export class FileSystemCache {
+ /**
+ * Create a new file system cache
+ *
+ * @param {Object} options - Cache options
+ * @param {string} [options.directory='./cache'] - Cache directory
+ * @param {number} [options.ttl=3600000] - Default TTL in milliseconds (1 hour)
+ * @param {boolean} [options.createDir=true] - Whether to create the cache directory if it doesn't exist
+ */
+ constructor(options = {}) {
+ if (!isNodeJs()) {
+ throw new Error(
+ 'FileSystemCache is only available in Node.js environment',
+ );
+ }
+
+ this.directory = options.directory || './cache';
+ this.ttl = options.ttl || 3600000; // Default TTL: 1 hour
+ this.createDir = options.createDir !== false;
+
+ // Initialize cache directory
+ this._initDirectory();
+ }
+
+ /**
+ * Initialize cache directory
+ *
+ * @private
+ */
+ async _initDirectory() {
+ try {
+ const fs = await import('fs/promises');
+ const path = await import('path');
+
+ // Create directory if it doesn't exist
+ if (this.createDir) {
+ await fs.mkdir(this.directory, { recursive: true });
+ }
+
+ // Store references to fs and path modules
+ this.fs = fs;
+ this.path = path;
+ } catch (error) {
+ console.error('Failed to initialize cache directory:', error);
+ throw error;
+ }
+ }
+
+ /**
+ * Get a file path for a cache key
+ *
+ * @param {string} key - Cache key
+ * @returns {string} - File path
+ * @private
+ */
+ _getFilePath(key) {
+ // Create a safe filename from the key
+ const safeKey = Buffer.from(key).toString('base64').replace(/[/+=]/g, '_');
+ return this.path.join(this.directory, safeKey);
+ }
+
+ /**
+ * Set a value in the cache
+ *
+ * @param {string} key - Cache key
+ * @param {*} value - Value to cache
+ * @param {number} [ttl] - Time to live in milliseconds
+ * @returns {Promise}
+ */
+ async set(key, value, ttl = this.ttl) {
+ try {
+ // Wait for initialization to complete
+ if (!this.fs) {
+ await this._initDirectory();
+ }
+
+ const filePath = this._getFilePath(key);
+
+ // Create cache entry
+ const entry = {
+ value,
+ expires: Date.now() + ttl,
+ };
+
+ // Write to file
+ await this.fs.writeFile(filePath, JSON.stringify(entry), 'utf8');
+ } catch (error) {
+ console.error('Failed to set cache entry:', error);
+ }
+ }
+
+ /**
+ * Get a value from the cache
+ *
+ * @param {string} key - Cache key
+ * @returns {Promise<*|null>} - Cached value or null if not found
+ */
+ async get(key) {
+ try {
+ // Wait for initialization to complete
+ if (!this.fs) {
+ await this._initDirectory();
+ }
+
+ const filePath = this._getFilePath(key);
+
+ // Check if file exists
+ try {
+ await this.fs.access(filePath);
+ } catch (error) {
+ return null;
+ }
+
+ // Read file
+ const data = await this.fs.readFile(filePath, 'utf8');
+ const entry = JSON.parse(data);
+
+ // Check if entry has expired
+ if (entry.expires < Date.now()) {
+ // Remove expired entry
+ await this.delete(key);
+ return null;
+ }
+
+ return entry.value;
+ } catch (error) {
+ console.error('Failed to get cache entry:', error);
+ return null;
+ }
+ }
+
+ /**
+ * Check if a key exists in the cache
+ *
+ * @param {string} key - Cache key
+ * @returns {Promise} - Whether the key exists
+ */
+ async has(key) {
+ const value = await this.get(key);
+ return value !== null;
+ }
+
+ /**
+ * Delete a value from the cache
+ *
+ * @param {string} key - Cache key
+ * @returns {Promise} - Whether the key was deleted
+ */
+ async delete(key) {
+ try {
+ // Wait for initialization to complete
+ if (!this.fs) {
+ await this._initDirectory();
+ }
+
+ const filePath = this._getFilePath(key);
+
+ // Check if file exists
+ try {
+ await this.fs.access(filePath);
+ } catch (error) {
+ return false;
+ }
+
+ // Delete file
+ await this.fs.unlink(filePath);
+ return true;
+ } catch (error) {
+ console.error('Failed to delete cache entry:', error);
+ return false;
+ }
+ }
+
+ /**
+ * Clear the cache
+ *
+ * @returns {Promise}
+ */
+ async clear() {
+ try {
+ // Wait for initialization to complete
+ if (!this.fs) {
+ await this._initDirectory();
+ }
+
+ // Read directory
+ const files = await this.fs.readdir(this.directory);
+
+ // Delete all files
+ await Promise.all(
+ files.map((file) =>
+ this.fs.unlink(this.path.join(this.directory, file)),
+ ),
+ );
+ } catch (error) {
+ console.error('Failed to clear cache:', error);
+ }
+ }
+}
+
+/**
+ * Create a file system cache
+ *
+ * @param {Object} options - Cache options
+ * @returns {FileSystemCache} - File system cache instance
+ */
+export function createFileSystemCache(options = {}) {
+ return new FileSystemCache(options);
+}
diff --git a/src/io/hooks/cache/indexeddb.js b/src/io/hooks/cache/indexeddb.js
new file mode 100644
index 0000000..389e4dd
--- /dev/null
+++ b/src/io/hooks/cache/indexeddb.js
@@ -0,0 +1,245 @@
+/**
+ * IndexedDB cache backend for API requests
+ * Provides persistent caching using browser's IndexedDB
+ */
+
+import { isBrowser } from '../../utils/environment.js';
+
+/**
+ * IndexedDB cache implementation
+ */
+export class IndexedDBCache {
+ /**
+ * Create a new IndexedDB cache
+ *
+ * @param {Object} options - Cache options
+ * @param {string} [options.dbName='tinyframe-cache'] - Database name
+ * @param {string} [options.storeName='api-cache'] - Object store name
+ * @param {number} [options.ttl=3600000] - Default TTL in milliseconds (1 hour)
+ * @param {number} [options.version=1] - Database version
+ */
+ constructor(options = {}) {
+ if (!isBrowser()) {
+ throw new Error(
+ 'IndexedDBCache is only available in browser environment',
+ );
+ }
+
+ this.dbName = options.dbName || 'tinyframe-cache';
+ this.storeName = options.storeName || 'api-cache';
+ this.ttl = options.ttl || 3600000; // Default TTL: 1 hour
+ this.version = options.version || 1;
+
+ // Initialize database
+ this._dbPromise = this._initDatabase();
+ }
+
+ /**
+ * Initialize database
+ *
+ * @returns {Promise} - IndexedDB database
+ * @private
+ */
+ async _initDatabase() {
+ return new Promise((resolve, reject) => {
+ // Check if IndexedDB is available
+ if (!window.indexedDB) {
+ reject(new Error('IndexedDB is not supported in this browser'));
+ return;
+ }
+
+ // Open database
+ const request = window.indexedDB.open(this.dbName, this.version);
+
+ // Handle errors
+ request.onerror = (event) => {
+ reject(new Error(`Failed to open IndexedDB: ${event.target.error}`));
+ };
+
+ // Create object store if needed
+ request.onupgradeneeded = (event) => {
+ const db = event.target.result;
+
+ // Create object store if it doesn't exist
+ if (!db.objectStoreNames.contains(this.storeName)) {
+ db.createObjectStore(this.storeName, { keyPath: 'key' });
+ }
+ };
+
+ // Success handler
+ request.onsuccess = (event) => {
+ resolve(event.target.result);
+ };
+ });
+ }
+
+ /**
+ * Get a transaction and object store
+ *
+ * @param {string} mode - Transaction mode ('readonly' or 'readwrite')
+ * @returns {Promise} - IndexedDB object store
+ * @private
+ */
+ async _getStore(mode) {
+ const db = await this._dbPromise;
+ const transaction = db.transaction(this.storeName, mode);
+ return transaction.objectStore(this.storeName);
+ }
+
+ /**
+ * Set a value in the cache
+ *
+ * @param {string} key - Cache key
+ * @param {*} value - Value to cache
+ * @param {number} [ttl] - Time to live in milliseconds
+ * @returns {Promise}
+ */
+ async set(key, value, ttl = this.ttl) {
+ try {
+ const store = await this._getStore('readwrite');
+
+ // Create cache entry
+ const entry = {
+ key,
+ value,
+ expires: Date.now() + ttl,
+ };
+
+ // Store entry
+ return new Promise((resolve, reject) => {
+ const request = store.put(entry);
+
+ request.onerror = (event) => {
+ reject(new Error(`Failed to set cache entry: ${event.target.error}`));
+ };
+
+ request.onsuccess = () => {
+ resolve();
+ };
+ });
+ } catch (error) {
+ console.error('Failed to set cache entry:', error);
+ }
+ }
+
+ /**
+ * Get a value from the cache
+ *
+ * @param {string} key - Cache key
+ * @returns {Promise<*|null>} - Cached value or null if not found
+ */
+ async get(key) {
+ try {
+ const store = await this._getStore('readonly');
+
+ // Get entry
+ return new Promise((resolve, reject) => {
+ const request = store.get(key);
+
+ request.onerror = (event) => {
+ reject(new Error(`Failed to get cache entry: ${event.target.error}`));
+ };
+
+ request.onsuccess = (event) => {
+ const entry = event.target.result;
+
+ // Check if entry exists
+ if (!entry) {
+ resolve(null);
+ return;
+ }
+
+ // Check if entry has expired
+ if (entry.expires < Date.now()) {
+ // Remove expired entry
+ this.delete(key).catch(console.error);
+ resolve(null);
+ return;
+ }
+
+ resolve(entry.value);
+ };
+ });
+ } catch (error) {
+ console.error('Failed to get cache entry:', error);
+ return null;
+ }
+ }
+
+ /**
+ * Check if a key exists in the cache
+ *
+ * @param {string} key - Cache key
+ * @returns {Promise} - Whether the key exists
+ */
+ async has(key) {
+ const value = await this.get(key);
+ return value !== null;
+ }
+
+ /**
+ * Delete a value from the cache
+ *
+ * @param {string} key - Cache key
+ * @returns {Promise} - Whether the key was deleted
+ */
+ async delete(key) {
+ try {
+ const store = await this._getStore('readwrite');
+
+ // Delete entry
+ return new Promise((resolve, reject) => {
+ const request = store.delete(key);
+
+ request.onerror = (event) => {
+ reject(
+ new Error(`Failed to delete cache entry: ${event.target.error}`),
+ );
+ };
+
+ request.onsuccess = () => {
+ resolve(true);
+ };
+ });
+ } catch (error) {
+ console.error('Failed to delete cache entry:', error);
+ return false;
+ }
+ }
+
+ /**
+ * Clear the cache
+ *
+ * @returns {Promise}
+ */
+ async clear() {
+ try {
+ const store = await this._getStore('readwrite');
+
+ // Clear store
+ return new Promise((resolve, reject) => {
+ const request = store.clear();
+
+ request.onerror = (event) => {
+ reject(new Error(`Failed to clear cache: ${event.target.error}`));
+ };
+
+ request.onsuccess = () => {
+ resolve();
+ };
+ });
+ } catch (error) {
+ console.error('Failed to clear cache:', error);
+ }
+ }
+}
+
+/**
+ * Create an IndexedDB cache
+ *
+ * @param {Object} options - Cache options
+ * @returns {IndexedDBCache} - IndexedDB cache instance
+ */
+export function createIndexedDBCache(options = {}) {
+ return new IndexedDBCache(options);
+}
diff --git a/src/io/hooks/error.js b/src/io/hooks/error.js
new file mode 100644
index 0000000..1a93c99
--- /dev/null
+++ b/src/io/hooks/error.js
@@ -0,0 +1,161 @@
+/**
+ * Error handling hook for API requests
+ * Provides centralized error handling, retry with backoff, and alerting
+ */
+
+/**
+ * Default backoff strategy with exponential delay
+ *
+ * @param {number} attempt - Current attempt number (1-based)
+ * @param {number} maxDelay - Maximum delay in milliseconds
+ * @returns {number} - Delay in milliseconds
+ */
+function defaultBackoffStrategy(attempt, maxDelay = 30000) {
+ // Exponential backoff with jitter: 2^n * 100ms + random(50ms)
+ const delay = Math.min(
+ Math.pow(2, attempt) * 100 + Math.floor(Math.random() * 50),
+ maxDelay,
+ );
+
+ return delay;
+}
+
+/**
+ * Creates an error handling hook for API requests
+ *
+ * @param {Object} options - Error handling options
+ * @param {number} [options.maxRetries=3] - Maximum number of retry attempts
+ * @param {Function} [options.backoffStrategy] - Function to calculate retry delay
+ * @param {Function} [options.shouldRetry] - Function to determine if request should be retried
+ * @param {Function} [options.onError] - Function to call when an error occurs
+ * @param {Function} [options.onRetry] - Function to call before a retry attempt
+ * @param {Function} [options.onMaxRetriesExceeded] - Function to call when max retries are exceeded
+ * @returns {Function} - Error handling hook function
+ */
+export function createErrorHook(options = {}) {
+ const {
+ maxRetries = 3,
+ backoffStrategy = defaultBackoffStrategy,
+ shouldRetry = (error) => {
+ // Default retry on network errors and specific status codes
+ if (!error.status) return true; // Network error
+ return [408, 429, 500, 502, 503, 504].includes(error.status);
+ },
+ onError = (error, context) => {
+ console.error(`API Error: ${error.message || 'Unknown error'}`, {
+ url: context.request.url,
+ method: context.request.method,
+ status: error.status,
+ });
+ },
+ onRetry = (error, attempt, delay, context) => {
+ console.warn(
+ `Retrying request (${attempt}/${maxRetries}) after ${delay}ms`,
+ {
+ url: context.request.url,
+ method: context.request.method,
+ error: error.message || 'Unknown error',
+ },
+ );
+ },
+ onMaxRetriesExceeded = (error, context) => {
+ console.error(`Max retries (${maxRetries}) exceeded for request`, {
+ url: context.request.url,
+ method: context.request.method,
+ error: error.message || 'Unknown error',
+ });
+ },
+ } = options;
+
+ return async (context, next) => {
+ let attempts = 0;
+
+ while (true) {
+ try {
+ attempts++;
+ return await next(context);
+ } catch (error) {
+ // Call the error handler
+ onError(error, context);
+
+ // Check if we should retry
+ if (attempts <= maxRetries && shouldRetry(error)) {
+ // Calculate backoff delay
+ const delay = backoffStrategy(attempts, options.maxDelay);
+
+ // Call the retry handler
+ onRetry(error, attempts, delay, context);
+
+ // Wait for the backoff period
+ await new Promise((resolve) => setTimeout(resolve, delay));
+
+ // Continue to next attempt
+ continue;
+ }
+
+ // Max retries exceeded or shouldn't retry
+ if (attempts > 1) {
+ onMaxRetriesExceeded(error, context);
+ }
+
+ // Re-throw the error
+ throw error;
+ }
+ }
+ };
+}
+
+/**
+ * Creates an alerting hook for critical API errors
+ *
+ * @param {Object} options - Alerting options
+ * @param {Function} [options.isCriticalError] - Function to determine if an error is critical
+ * @param {Function} [options.alert] - Function to send alerts
+ * @returns {Function} - Alerting hook function
+ */
+export function createAlertHook(options = {}) {
+ const {
+ isCriticalError = (error) => {
+ // Default critical errors: 5xx errors or network errors
+ if (!error.status) return true; // Network error
+ return error.status >= 500;
+ },
+ alert = (error, context) => {
+ console.error('CRITICAL API ERROR', {
+ url: context.request.url,
+ method: context.request.method,
+ error: error.message || 'Unknown error',
+ status: error.status,
+ timestamp: new Date().toISOString(),
+ });
+
+ // Here you would typically send an alert to a monitoring system
+ // For example: sendSlackAlert(), sendEmailAlert(), etc.
+ },
+ } = options;
+
+ return async (context, next) => {
+ try {
+ return await next(context);
+ } catch (error) {
+ // Check if this is a critical error
+ if (isCriticalError(error)) {
+ // Send alert
+ alert(error, context);
+ }
+
+ // Re-throw the error
+ throw error;
+ }
+ };
+}
+
+/**
+ * Default error hook with standard configuration
+ */
+export const errorHook = createErrorHook();
+
+/**
+ * Default alert hook with standard configuration
+ */
+export const alertHook = createAlertHook();
diff --git a/src/io/hooks/index.js b/src/io/hooks/index.js
new file mode 100644
index 0000000..f713af9
--- /dev/null
+++ b/src/io/hooks/index.js
@@ -0,0 +1,9 @@
+/**
+ * API hooks (middleware) for extending API functionality
+ * Provides hooks for logging, caching, throttling, and authentication rotation
+ */
+
+export * from './logger.js';
+export * from './cache.js';
+export * from './throttle.js';
+export * from './auth.js';
diff --git a/src/io/hooks/logger.js b/src/io/hooks/logger.js
new file mode 100644
index 0000000..c716b14
--- /dev/null
+++ b/src/io/hooks/logger.js
@@ -0,0 +1,85 @@
+/**
+ * Logger hook for API requests
+ * Provides logging functionality for API requests and responses
+ */
+
+/**
+ * Creates a logger hook for API requests
+ *
+ * @param {Object} options - Logger options
+ * @param {boolean} [options.logRequest=true] - Whether to log request details
+ * @param {boolean} [options.logResponse=true] - Whether to log response details
+ * @param {boolean} [options.logErrors=true] - Whether to log errors
+ * @param {boolean} [options.logTiming=true] - Whether to log request timing
+ * @param {Function} [options.logger=console.log] - Logger function
+ * @returns {Function} - Logger hook function
+ */
+export function createLoggerHook(options = {}) {
+ const {
+ logRequest = true,
+ logResponse = true,
+ logErrors = true,
+ logTiming = true,
+ logger = console.log,
+ } = options;
+
+ return async (context, next) => {
+ const { url, method, headers, body } = context.request;
+
+ // Log request details
+ if (logRequest) {
+ logger(`API Request: ${method || 'GET'} ${url}`);
+
+ if (headers && Object.keys(headers).length > 0) {
+ logger('Headers:', { ...headers });
+ }
+
+ if (body) {
+ logger('Body:', body);
+ }
+ }
+
+ // Track timing
+ const startTime = logTiming ? Date.now() : null;
+
+ try {
+ // Execute the next middleware or the actual request
+ const result = await next(context);
+
+ // Log response details
+ if (logResponse) {
+ logger(`API Response: ${result.status} ${result.statusText}`);
+
+ // Log response headers
+ if (result.headers && Object.keys(result.headers).length > 0) {
+ logger('Response Headers:', { ...result.headers });
+ }
+
+ // Log timing
+ if (logTiming) {
+ const duration = Date.now() - startTime;
+ logger(`Request Duration: ${duration}ms`);
+ }
+ }
+
+ return result;
+ } catch (error) {
+ // Log errors
+ if (logErrors) {
+ logger(`API Error: ${error.message}`);
+
+ if (logTiming) {
+ const duration = Date.now() - startTime;
+ logger(`Failed Request Duration: ${duration}ms`);
+ }
+ }
+
+ throw error;
+ }
+ };
+}
+
+/**
+ * Default logger hook with standard configuration
+ */
+export const loggerHook = createLoggerHook();
diff --git a/src/io/hooks/throttle.js b/src/io/hooks/throttle.js
new file mode 100644
index 0000000..9e84eb5
--- /dev/null
+++ b/src/io/hooks/throttle.js
@@ -0,0 +1,205 @@
+/**
+ * Throttle hook for API requests
+ * Limits the rate of API requests to avoid rate limiting
+ */
+
+/**
+ * Simple rate limiter implementation
+ */
+class RateLimiter {
+ constructor(options = {}) {
+ this.requestsPerSecond = options.requestsPerSecond || 5;
+ this.requestsPerMinute = options.requestsPerMinute || 100;
+ this.requestsPerHour = options.requestsPerHour || 1000;
+
+ this.requestTimestamps = {
+ second: [],
+ minute: [],
+ hour: [],
+ };
+ }
+
+ /**
+ * Check if a request can be made
+ *
+ * @returns {boolean} - Whether the request can be made
+ */
+ canMakeRequest() {
+ const now = Date.now();
+
+ // Clean up old timestamps
+ this._cleanTimestamps(now);
+
+ // Check rate limits
+ if (this.requestTimestamps.second.length >= this.requestsPerSecond) {
+ return false;
+ }
+
+ if (this.requestTimestamps.minute.length >= this.requestsPerMinute) {
+ return false;
+ }
+
+ if (this.requestTimestamps.hour.length >= this.requestsPerHour) {
+ return false;
+ }
+
+ return true;
+ }
+
+ /**
+ * Record a request
+ */
+ recordRequest() {
+ const now = Date.now();
+
+ this.requestTimestamps.second.push(now);
+ this.requestTimestamps.minute.push(now);
+ this.requestTimestamps.hour.push(now);
+ }
+
+ /**
+ * Get the time to wait before making a request
+ *
+ * @returns {number} - Time to wait in milliseconds
+ */
+ getWaitTime() {
+ const now = Date.now();
+
+ // Clean up old timestamps
+ this._cleanTimestamps(now);
+
+ if (
+ this.requestTimestamps.second.length < this.requestsPerSecond &&
+ this.requestTimestamps.minute.length < this.requestsPerMinute &&
+ this.requestTimestamps.hour.length < this.requestsPerHour
+ ) {
+ return 0;
+ }
+
+ // Calculate wait time for each limit
+ const waitTimes = [];
+
+ if (this.requestTimestamps.second.length >= this.requestsPerSecond) {
+ const oldestTimestamp = this.requestTimestamps.second[0];
+ waitTimes.push(oldestTimestamp + 1000 - now);
+ }
+
+ if (this.requestTimestamps.minute.length >= this.requestsPerMinute) {
+ const oldestTimestamp = this.requestTimestamps.minute[0];
+ waitTimes.push(oldestTimestamp + 60000 - now);
+ }
+
+ if (this.requestTimestamps.hour.length >= this.requestsPerHour) {
+ const oldestTimestamp = this.requestTimestamps.hour[0];
+ waitTimes.push(oldestTimestamp + 3600000 - now);
+ }
+
+ // Return the maximum wait time
+ return Math.max(0, ...waitTimes);
+ }
+
+ /**
+ * Clean up old timestamps
+ *
+ * @param {number} now - Current timestamp
+ * @private
+ */
+ _cleanTimestamps(now) {
+ this.requestTimestamps.second = this.requestTimestamps.second.filter(
+ (timestamp) => now - timestamp < 1000,
+ );
+
+ this.requestTimestamps.minute = this.requestTimestamps.minute.filter(
+ (timestamp) => now - timestamp < 60000,
+ );
+
+ this.requestTimestamps.hour = this.requestTimestamps.hour.filter(
+ (timestamp) => now - timestamp < 3600000,
+ );
+ }
+}
+
+/**
+ * Creates a throttle hook for API requests
+ *
+ * @param {Object} options - Throttle options
+ * @param {number} [options.requestsPerSecond] - Maximum requests per second
+ * @param {number} [options.requestsPerMinute] - Maximum requests per minute
+ * @param {number} [options.requestsPerHour] - Maximum requests per hour
+ * @param {boolean} [options.groupByDomain=true] - Whether to group rate limits by domain
+ * @param {Function} [options.onThrottle] - Function to call when a request is throttled
+ * @returns {Function} - Throttle hook function
+ */
+export function createThrottleHook(options = {}) {
+ const {
+ requestsPerSecond,
+ requestsPerMinute,
+ requestsPerHour,
+ groupByDomain = true,
+ onThrottle = (waitTime) =>
+ console.log(`Request throttled. Waiting ${waitTime}ms`),
+ } = options;
+
+ // Create rate limiters
+ const rateLimiters = new Map();
+
+ // Get or create a rate limiter for a domain
+ const getRateLimiter = (domain) => {
+ if (!rateLimiters.has(domain)) {
+ rateLimiters.set(
+ domain,
+ new RateLimiter({
+ requestsPerSecond,
+ requestsPerMinute,
+ requestsPerHour,
+ }),
+ );
+ }
+
+ return rateLimiters.get(domain);
+ };
+
+ // Extract domain from URL
+ const getDomain = (url) => {
+ try {
+ return new URL(url).hostname;
+ } catch (error) {
+ return 'default';
+ }
+ };
+
+ return async (context, next) => {
+ const { url } = context.request;
+
+ // Get the appropriate rate limiter
+ const domain = groupByDomain ? getDomain(url) : 'default';
+ const rateLimiter = getRateLimiter(domain);
+
+ // Check if the request can be made
+ if (!rateLimiter.canMakeRequest()) {
+ const waitTime = rateLimiter.getWaitTime();
+
+ // Call the onThrottle callback
+ onThrottle(waitTime);
+
+ // Wait for the specified time
+ await new Promise((resolve) => setTimeout(resolve, waitTime));
+ }
+
+ // Record the request
+ rateLimiter.recordRequest();
+
+ // Execute the next middleware or the actual request
+ return next(context);
+ };
+}
+
+/**
+ * Default throttle hook with standard configuration
+ */
+export const throttleHook = createThrottleHook();
+
+/**
+ * Export the RateLimiter class for direct usage
+ */
+export { RateLimiter };
diff --git a/src/io/index.js b/src/io/index.js
index a069637..e888eca 100644
--- a/src/io/index.js
+++ b/src/io/index.js
@@ -6,4 +6,13 @@ export * from './readers/index.js';
// Export all transformers
export * from './transformers/index.js';
-// Note: Writers and Parsers will be added in future versions
+// Export API schema registry
+export * from './transformers/apiSchemas/index.js';
+
+// Export pipeline utilities
+export * from './pipe.js';
+
+// Export middleware hooks
+export * from './hooks/index.js';
+
+// Note: Writers will be added in future versions
diff --git a/src/io/parsers/dateParser.js b/src/io/parsers/dateParser.js
index 21f27a5..1c33b2a 100644
--- a/src/io/parsers/dateParser.js
+++ b/src/io/parsers/dateParser.js
@@ -1,65 +1,110 @@
/**
- * Модуль для парсинга дат из различных форматов
+ * Module for parsing dates in various formats
*/
/**
- * Преобразует строку с датой в объект Date
- * @param {string} dateString - Строка с датой
- * @param {Object} options - Опции парсинга
- * @param {string} options.format - Формат даты (например, 'YYYY-MM-DD')
- * @param {string} options.locale - Локаль для парсинга (например, 'ru-RU')
- * @returns {Date} - Объект Date
+ * Converts a date string to a Date object
+ * @param {string} dateString - Date string
+ * @param {Object} options - Parsing options
+ * @param {string} options.format - Date format (e.g., 'YYYY-MM-DD')
+ * @param {string} options.locale - Locale for parsing (e.g., 'ru-RU')
+ * @returns {Date} - Date object
*/
export function parseDate(dateString, options = {}) {
if (!dateString) {
return null;
}
- // Если передан объект Date, возвращаем его
+ // If the input is already a Date object, return it as is
if (dateString instanceof Date) {
return dateString;
}
- // Пробуем стандартный парсинг
+ // Try standard parsing
const date = new Date(dateString);
if (!isNaN(date.getTime())) {
return date;
}
- // Если стандартный парсинг не сработал, пробуем разные форматы
- // ISO формат: YYYY-MM-DD
+ // If standard parsing fails, try different formats
+ // ISO format: YYYY-MM-DD
const isoRegex = /^(\d{4})-(\d{2})-(\d{2})$/;
const isoMatch = dateString.match(isoRegex);
if (isoMatch) {
const [, year, month, day] = isoMatch;
- return new Date(parseInt(year), parseInt(month) - 1, parseInt(day));
+ const parsedYear = parseInt(year);
+ const parsedMonth = parseInt(month) - 1;
+ const parsedDay = parseInt(day);
+
+ // Создаем дату
+ const date = new Date(parsedYear, parsedMonth, parsedDay);
+
+ // Проверяем, что дата валидна (день и месяц не были скорректированы)
+ if (
+ date.getFullYear() === parsedYear &&
+ date.getMonth() === parsedMonth &&
+ date.getDate() === parsedDay
+ ) {
+ return date;
+ }
+ return null;
}
- // Формат DD.MM.YYYY
+ // Format DD.MM.YYYY
const dotRegex = /^(\d{2})\.(\d{2})\.(\d{4})$/;
const dotMatch = dateString.match(dotRegex);
if (dotMatch) {
const [, day, month, year] = dotMatch;
- return new Date(parseInt(year), parseInt(month) - 1, parseInt(day));
+ const parsedDay = parseInt(day);
+ const parsedMonth = parseInt(month) - 1;
+ const parsedYear = parseInt(year);
+
+ // Создаем дату
+ const date = new Date(parsedYear, parsedMonth, parsedDay);
+
+ // Проверяем, что дата валидна (день и месяц не были скорректированы)
+ if (
+ date.getFullYear() === parsedYear &&
+ date.getMonth() === parsedMonth &&
+ date.getDate() === parsedDay
+ ) {
+ return date;
+ }
+ return null;
}
- // Формат MM/DD/YYYY
+ // Format MM/DD/YYYY
const slashRegex = /^(\d{2})\/(\d{2})\/(\d{4})$/;
const slashMatch = dateString.match(slashRegex);
if (slashMatch) {
const [, month, day, year] = slashMatch;
- return new Date(parseInt(year), parseInt(month) - 1, parseInt(day));
+ const parsedMonth = parseInt(month) - 1;
+ const parsedDay = parseInt(day);
+ const parsedYear = parseInt(year);
+
+ // Создаем дату
+ const date = new Date(parsedYear, parsedMonth, parsedDay);
+
+ // Проверяем, что дата валидна (день и месяц не были скорректированы)
+ if (
+ date.getFullYear() === parsedYear &&
+ date.getMonth() === parsedMonth &&
+ date.getDate() === parsedDay
+ ) {
+ return date;
+ }
+ return null;
}
- // Если ничего не сработало, возвращаем null
+ // If nothing worked, return null
return null;
}
/**
- * Форматирует объект Date в строку в заданном формате
- * @param {Date} date - Объект Date
- * @param {string} format - Формат вывода (например, 'YYYY-MM-DD')
- * @returns {string} - Отформатированная строка с датой
+ * Formats a Date object into a string in the specified format
+ * @param {Date} date - Date object
+ * @param {string} format - Output format (e.g., 'YYYY-MM-DD')
+ * @returns {string} - Formatted date string
*/
export function formatDate(date, format = 'YYYY-MM-DD') {
if (!date || !(date instanceof Date) || isNaN(date.getTime())) {
diff --git a/src/io/parsers/index.js b/src/io/parsers/index.js
index 3a22367..d485b0c 100644
--- a/src/io/parsers/index.js
+++ b/src/io/parsers/index.js
@@ -1,20 +1,20 @@
/**
- * Экспорт парсеров для различных форматов данных
+ * Export parsers for various data formats
*/
import * as dateParser from './dateParser.js';
import * as numberParser from './numberParser.js';
-// Экспорт всех парсеров
+// Export all parsers
export { dateParser, numberParser };
-// Экспорт отдельных функций для удобства
+// Export individual functions for convenience
export const parseDate = dateParser.parseDate;
export const formatDate = dateParser.formatDate;
export const parseNumber = numberParser.parseNumber;
export const formatNumber = numberParser.formatNumber;
-// Экспорт по умолчанию
+// Export default
export default {
dateParser,
numberParser,
diff --git a/src/io/parsers/numberParser.js b/src/io/parsers/numberParser.js
index 84c010d..281f8c7 100644
--- a/src/io/parsers/numberParser.js
+++ b/src/io/parsers/numberParser.js
@@ -1,96 +1,155 @@
/**
- * Модуль для парсинга числовых значений из различных форматов
+ * Module for parsing numbers in various formats
*/
/**
- * Преобразует строку с числом в числовое значение
- * @param {string|number} value - Строка с числом или число
- * @param {Object} options - Опции парсинга
- * @param {string} options.decimalSeparator - Разделитель десятичной части (по умолчанию '.')
- * @param {string} options.thousandsSeparator - Разделитель тысяч (по умолчанию ',')
- * @param {boolean} options.parsePercent - Преобразовывать ли проценты в десятичные дроби (по умолчанию true)
- * @returns {number} - Числовое значение или NaN, если парсинг не удался
+ * Converts a string with a number to a numeric value
+ * @param {string|number} value - String with a number or number
+ * @param {Object} options - Parsing options
+ * @param {string} options.decimalSeparator - Decimal separator (default '.')
+ * @param {string} options.thousandsSeparator - Thousands separator (default ',')
+ * @param {boolean} options.parsePercent - Convert percentages to decimal fractions (default true)
+ * @returns {number} - Numeric value or NaN if parsing fails
*/
export function parseNumber(value, options = {}) {
- // Значения по умолчанию
+ // Default values
const decimalSeparator = options.decimalSeparator || '.';
const thousandsSeparator = options.thousandsSeparator || ',';
const parsePercent = options.parsePercent !== false;
- // Если value уже число, возвращаем его
+ // If value is already a number, return it
if (typeof value === 'number') {
- return value;
+ return value === 0 ? 0 : value; // Convert -0 to 0
}
- // Если value не строка или пустая строка, возвращаем NaN
+ // If value is not a string or an empty string, return NaN
if (typeof value !== 'string' || value.trim() === '') {
return NaN;
}
- // Обрабатываем проценты
+ // Handle percentages
let stringValue = value.trim();
let percentMultiplier = 1;
- if (parsePercent && stringValue.endsWith('%')) {
- stringValue = stringValue.slice(0, -1).trim();
- percentMultiplier = 0.01;
+ if (stringValue.endsWith('%')) {
+ if (parsePercent) {
+ stringValue = stringValue.slice(0, -1).trim();
+ percentMultiplier = 0.01;
+ } else {
+ // If parsePercent is false, just remove the % sign without applying multiplier
+ stringValue = stringValue.slice(0, -1).trim();
+ }
}
- // Удаляем разделители тысяч и заменяем десятичный разделитель на точку
- const normalizedValue = stringValue
- .replace(new RegExp(`\\${thousandsSeparator}`, 'g'), '')
- .replace(new RegExp(`\\${decimalSeparator}`, 'g'), '.');
+ // Basic validation before processing
+ // Check for multiple minus signs
+ const minusCount = (stringValue.match(/-/g) || []).length;
+ if (minusCount > 1 || (minusCount === 1 && !stringValue.startsWith('-'))) {
+ return NaN;
+ }
- // Преобразуем в число
- const number = parseFloat(normalizedValue);
+ // Check for multiple decimal separators
+ const decimalCount = (
+ stringValue.match(new RegExp(`\\${decimalSeparator}`, 'g')) || []
+ ).length;
+ if (decimalCount > 1) {
+ return NaN;
+ }
- // Применяем множитель для процентов
- return isNaN(number) ? NaN : number * percentMultiplier;
+ // Simple approach for parsing with custom decimal separator
+ try {
+ // Handle the sign separately
+ const isNegative = stringValue.startsWith('-');
+ if (isNegative) {
+ stringValue = stringValue.substring(1);
+ }
+
+ // Split by decimal separator
+ const parts = stringValue.split(decimalSeparator);
+
+ // If we have more than 2 parts after splitting by decimal separator, it's invalid
+ if (parts.length > 2) {
+ return NaN;
+ }
+
+ // Get integer and fractional parts
+ let integerPart = parts[0] || '0';
+ const fractionalPart = parts.length > 1 ? parts[1] : '';
+
+ // Remove thousands separators from integer part
+ if (thousandsSeparator) {
+ integerPart = integerPart.replace(
+ new RegExp(`\\${thousandsSeparator}`, 'g'),
+ '',
+ );
+ }
+
+ // Check if the parts contain only digits
+ if (!/^\d*$/.test(integerPart) || !/^\d*$/.test(fractionalPart)) {
+ return NaN;
+ }
+
+ // Combine parts into a proper number string
+ const numberStr = `${isNegative ? '-' : ''}${integerPart}${fractionalPart ? '.' + fractionalPart : ''}`;
+
+ // Parse the number
+ const number = parseFloat(numberStr);
+
+ // Handle -0 case
+ if (Object.is(number, -0)) {
+ return 0;
+ }
+
+ // Apply percentage multiplier
+ return isNaN(number) ? NaN : number * percentMultiplier;
+ } catch (e) {
+ return NaN;
+ }
}
/**
- * Форматирует число в строку с заданными параметрами
- * @param {number} value - Число для форматирования
- * @param {Object} options - Опции форматирования
- * @param {string} options.decimalSeparator - Разделитель десятичной части (по умолчанию '.')
- * @param {string} options.thousandsSeparator - Разделитель тысяч (по умолчанию ',')
- * @param {number} options.precision - Количество знаков после запятой (по умолчанию 2)
- * @param {boolean} options.showPercent - Показывать ли значение как процент (по умолчанию false)
- * @returns {string} - Отформатированное число в виде строки
+ * Formats a number into a string with the specified parameters
+ * @param {number} value - Number to format
+ * @param {Object} options - Formatting options
+ * @param {string} options.decimalSeparator - Decimal separator (default '.')
+ * @param {string} options.thousandsSeparator - Thousands separator (default ',')
+ * @param {number} options.precision - Number of decimal places (default 2)
+ * @param {boolean} options.showPercent - Show value as percentage (default false)
+ * @returns {string} - Formatted number as string
*/
export function formatNumber(value, options = {}) {
- // Значения по умолчанию
+ // Default values
const decimalSeparator = options.decimalSeparator || '.';
const thousandsSeparator = options.thousandsSeparator || ',';
const precision = options.precision !== undefined ? options.precision : 2;
const showPercent = options.showPercent || false;
- // Если value не число, возвращаем пустую строку
+ // If value is not a number, return an empty string
if (typeof value !== 'number' || isNaN(value)) {
return '';
}
- // Применяем множитель для процентов
+ // Apply percentage multiplier
const multipliedValue = showPercent ? value * 100 : value;
- // Форматируем число
+ // Format the number
const [integerPart, decimalPart] = multipliedValue
.toFixed(precision)
.split('.');
- // Добавляем разделители тысяч
+ // Add thousands separators
const formattedIntegerPart = integerPart.replace(
/\B(?=(\d{3})+(?!\d))/g,
thousandsSeparator,
);
- // Собираем результат
+ // Assemble the result
let result = formattedIntegerPart;
if (precision > 0) {
result += decimalSeparator + decimalPart;
}
- // Добавляем знак процента, если нужно
+ // Add percentage sign if needed
if (showPercent) {
result += '%';
}
diff --git a/src/io/pipe.js b/src/io/pipe.js
new file mode 100644
index 0000000..3b5d21f
--- /dev/null
+++ b/src/io/pipe.js
@@ -0,0 +1,308 @@
+/**
+ * Generic transformer pipeline for declarative ETL processes
+ * Allows composing readers, transformers, and writers into a single data pipeline
+ */
+
+import { DataFrame } from '../core/dataframe/DataFrame.js';
+import { filter as dfFilter } from '../methods/dataframe/filtering/filter.js';
+import { sort as dfSort } from '../methods/dataframe/transform/sort.js';
+import { apply as dfApply } from '../methods/dataframe/transform/apply.js';
+
+/**
+ * Creates a pipeline of functions that transform data
+ *
+ * @param {...Function} fns - Functions to compose
+ * @returns {Function} - Composed function
+ */
+export function compose(...fns) {
+ return fns.reduce(
+ (f, g) =>
+ async (...args) =>
+ g(await f(...args)),
+ );
+}
+
+/**
+ * Creates a data pipeline that reads, transforms, and optionally writes data
+ *
+ * @param {Function} reader - Function that reads data from a source
+ * @param {Function[]} transformers - Array of functions that transform data
+ * @param {Function} [writer] - Optional function that writes data to a destination
+ * @returns {Function} - Pipeline function that processes data
+ */
+export function createPipeline(reader, transformers = [], writer = null) {
+ return async (...args) => {
+ // Read data from source
+ let data = await reader(...args);
+
+ // Apply transformers
+ for (const transformer of transformers) {
+ data = await transformer(data);
+ }
+
+ // Write data if writer is provided
+ if (writer) {
+ await writer(data);
+ }
+
+ return data;
+ };
+}
+
+/**
+ * Creates a batch processing pipeline that processes data in chunks
+ *
+ * @param {Function} reader - Function that reads data from a source
+ * @param {Function} processor - Function that processes each batch
+ * @param {Object} options - Pipeline options
+ * @param {number} [options.batchSize=1000] - Size of each batch
+ * @param {Function} [options.onProgress] - Callback for progress updates
+ * @returns {Promise} - Array of processed results
+ */
+export async function batchProcess(reader, processor, options = {}) {
+ const { batchSize = 1000, onProgress = null } = options;
+
+ const results = [];
+ let processedCount = 0;
+
+ // Process data in batches
+ await reader({
+ batchSize,
+ onBatch: async (batch) => {
+ const result = await processor(batch);
+ if (result !== undefined) {
+ results.push(result);
+ }
+
+ processedCount += batch.rowCount;
+
+ if (onProgress) {
+ onProgress({
+ processedCount,
+ batchCount: results.length,
+ lastBatch: batch,
+ });
+ }
+ },
+ });
+
+ return results;
+}
+
+/**
+ * Creates a function that applies a schema to data
+ *
+ * @param {Object|string} schema - Schema mapping or schema name
+ * @returns {Function} - Function that applies the schema
+ */
+export function applySchema(schema) {
+ return async (data) => {
+ const { applySchema: applySchemaFn } = await import(
+ './transformers/apiSchemas/index.js'
+ );
+ return applySchemaFn(data, schema);
+ };
+}
+
+/**
+ * Creates a function that filters data based on a predicate
+ *
+ * @param {Function} predicate - Function that returns true for rows to keep
+ * @returns {Function} - Function that filters data
+ */
+export function filter(predicate) {
+ return (data) => {
+ if (data instanceof DataFrame) {
+ // Используем функцию dfFilter из модуля methods
+ return dfFilter(data, predicate);
+ }
+
+ if (Array.isArray(data)) {
+ return data.filter(predicate);
+ }
+
+ throw new Error('Data must be a DataFrame or an array');
+ };
+}
+
+/**
+ * Creates a function that maps data using a transform function
+ *
+ * @param {Function} transform - Function that transforms each row
+ * @returns {Function} - Function that maps data
+ */
+export function map(transform) {
+ return (data) => {
+ if (data instanceof DataFrame) {
+ // Преобразуем DataFrame в массив, применяем трансформацию и создаем новый DataFrame
+ const rows = data.toArray();
+ const transformed = rows.map(transform);
+ return DataFrame.fromRows(transformed);
+ }
+
+ if (Array.isArray(data)) {
+ return data.map(transform);
+ }
+
+ throw new Error('Data must be a DataFrame or an array');
+ };
+}
+
+/**
+ * Creates a function that sorts data based on a key or comparator
+ *
+ * @param {string|Function} keyOrComparator - Sort key or comparator function
+ * @param {boolean} [ascending=true] - Sort direction
+ * @returns {Function} - Function that sorts data
+ */
+export function sort(keyOrComparator, ascending = true) {
+ return (data) => {
+ if (data instanceof DataFrame) {
+ // Если ключ - функция, преобразуем в сортировку по столбцу
+ if (typeof keyOrComparator === 'function') {
+ // Для функции-компаратора используем преобразование в массив
+ const rows = data.toArray();
+ const sorted = [...rows].sort(keyOrComparator);
+ return DataFrame.fromRows(sorted);
+ } else {
+ // Для строкового ключа используем сортировку по столбцу
+ const rows = data.toArray();
+ const sorted = [...rows].sort((a, b) => {
+ const aVal = a[keyOrComparator];
+ const bVal = b[keyOrComparator];
+
+ if (aVal < bVal) return ascending ? -1 : 1;
+ if (aVal > bVal) return ascending ? 1 : -1;
+ return 0;
+ });
+ return DataFrame.fromRows(sorted);
+ }
+ }
+
+ if (Array.isArray(data)) {
+ const sorted = [...data];
+
+ if (typeof keyOrComparator === 'function') {
+ sorted.sort(keyOrComparator);
+ } else {
+ sorted.sort((a, b) => {
+ const aVal = a[keyOrComparator];
+ const bVal = b[keyOrComparator];
+
+ if (aVal < bVal) return ascending ? -1 : 1;
+ if (aVal > bVal) return ascending ? 1 : -1;
+ return 0;
+ });
+ }
+
+ return sorted;
+ }
+
+ throw new Error('Data must be a DataFrame or an array');
+ };
+}
+
+/**
+ * Creates a function that limits the number of rows
+ *
+ * @param {number} count - Maximum number of rows to keep
+ * @returns {Function} - Function that limits data
+ */
+export function limit(count) {
+ return (data) => {
+ if (data instanceof DataFrame) {
+ // Преобразуем DataFrame в массив, берем первые count элементов и создаем новый DataFrame
+ const rows = data.toArray().slice(0, count);
+ return DataFrame.fromRows(rows);
+ }
+
+ if (Array.isArray(data)) {
+ return data.slice(0, count);
+ }
+
+ throw new Error('Data must be a DataFrame or an array');
+ };
+}
+
+/**
+ * Creates a function that converts data to a DataFrame
+ *
+ * @param {Object} [options] - Conversion options
+ * @returns {Function} - Function that converts data to DataFrame
+ */
+export function toDataFrame(options = {}) {
+ return (data) => {
+ if (data instanceof DataFrame) {
+ return data;
+ }
+
+ if (Array.isArray(data)) {
+ return DataFrame.fromRows(data, options);
+ }
+
+ if (typeof data === 'object' && data !== null) {
+ // Check if it's a columns object
+ const firstValue = Object.values(data)[0];
+ if (Array.isArray(firstValue)) {
+ return DataFrame.fromColumns(data, options);
+ }
+
+ // Single row object
+ return DataFrame.fromRows([data], options);
+ }
+
+ throw new Error('Cannot convert data to DataFrame');
+ };
+}
+
+/**
+ * Creates a function that logs data for debugging
+ *
+ * @param {string} [message='Data:'] - Message to log before data
+ * @param {boolean} [detailed=false] - Whether to log detailed information
+ * @returns {Function} - Function that logs data
+ */
+export function log(message = 'Data:', detailed = false) {
+ return (data) => {
+ if (data instanceof DataFrame) {
+ console.log(message);
+ if (detailed) {
+ console.log(`Rows: ${data.rowCount}, Columns: ${data.columns.length}`);
+ console.log('Columns:', data.columns);
+ console.log('Sample:');
+ // Используем toArray для получения первых 5 строк
+ console.table(data.toArray().slice(0, 5));
+ } else {
+ console.table(data.toArray().slice(0, 5));
+ }
+ } else {
+ console.log(message, data);
+ }
+
+ return data;
+ };
+}
+
+/**
+ * Example of a complete ETL pipeline
+ *
+ * @example
+ * // Create a pipeline that reads CSV data, transforms it, and writes to a database
+ * const pipeline = createPipeline(
+ * // Reader
+ * () => readCSV('data.csv'),
+ * // Transformers
+ * [
+ * filter(row => row.value > 0),
+ * map(row => ({ ...row, value: row.value * 2 })),
+ * sort('timestamp'),
+ * limit(1000),
+ * log('Processed data:')
+ * ],
+ * // Writer
+ * (data) => writeToDatabase(data, 'table_name')
+ * );
+ *
+ * // Execute the pipeline
+ * await pipeline();
+ */
diff --git a/src/io/pipeConfigRunner.js b/src/io/pipeConfigRunner.js
new file mode 100644
index 0000000..97a5434
--- /dev/null
+++ b/src/io/pipeConfigRunner.js
@@ -0,0 +1,356 @@
+/**
+ * Configuration-driven pipeline runner
+ * Allows defining ETL pipelines using YAML/JSON configuration
+ */
+
+import {
+ createPipeline,
+ filter,
+ map,
+ sort,
+ limit,
+ toDataFrame,
+ log,
+} from './pipe.js';
+import { applySchema } from './transformers/apiSchemas/index.js';
+import { createValidator } from './transformers/validators/schemaValidator.js';
+
+/**
+ * Pipeline configuration schema
+ *
+ * @typedef {Object} PipelineConfig
+ * @property {Object} reader - Reader configuration
+ * @property {string} reader.type - Reader type (csv, json, api, etc.)
+ * @property {Object} reader.params - Reader parameters
+ * @property {Object[]} transformers - Array of transformer configurations
+ * @property {string} transformers[].type - Transformer type (filter, map, sort, etc.)
+ * @property {Object} transformers[].params - Transformer parameters
+ * @property {Object} [writer] - Writer configuration
+ * @property {string} writer.type - Writer type (csv, json, arrow, etc.)
+ * @property {Object} writer.params - Writer parameters
+ */
+
+/**
+ * Registry of available readers
+ */
+const readerRegistry = new Map();
+
+/**
+ * Registry of available transformers
+ */
+const transformerRegistry = new Map();
+
+/**
+ * Registry of available writers
+ */
+const writerRegistry = new Map();
+
+/**
+ * Register a reader
+ *
+ * @param {string} type - Reader type
+ * @param {Function} readerFn - Reader function
+ */
+export function registerReader(type, readerFn) {
+ readerRegistry.set(type, readerFn);
+}
+
+/**
+ * Register a transformer
+ *
+ * @param {string} type - Transformer type
+ * @param {Function} transformerFactory - Transformer factory function
+ */
+export function registerTransformer(type, transformerFactory) {
+ transformerRegistry.set(type, transformerFactory);
+}
+
+/**
+ * Register a writer
+ *
+ * @param {string} type - Writer type
+ * @param {Function} writerFn - Writer function
+ */
+export function registerWriter(type, writerFn) {
+ writerRegistry.set(type, writerFn);
+}
+
+/**
+ * Create a reader from configuration
+ *
+ * @param {Object} config - Reader configuration
+ * @returns {Function} - Reader function
+ */
+function createReaderFromConfig(config) {
+ const { type, params = {} } = config;
+
+ if (!readerRegistry.has(type)) {
+ throw new Error(`Unknown reader type: ${type}`);
+ }
+
+ const readerFn = readerRegistry.get(type);
+
+ return (...args) => {
+ // Merge args with params
+ const mergedParams = { ...params };
+
+ // If first arg is a string (path/url), use it as source
+ if (typeof args[0] === 'string') {
+ mergedParams.source = args[0];
+ } else if (typeof args[0] === 'object') {
+ Object.assign(mergedParams, args[0]);
+ }
+
+ return readerFn(mergedParams);
+ };
+}
+
+/**
+ * Create a transformer from configuration
+ *
+ * @param {Object} config - Transformer configuration
+ * @returns {Function} - Transformer function
+ */
+function createTransformerFromConfig(config) {
+ const { type, params = {} } = config;
+
+ // Handle built-in transformers
+ switch (type) {
+ case 'filter':
+ // Convert string expression to function
+ if (typeof params.predicate === 'string') {
+ // Simple expression parser for basic conditions
+ const expr = params.predicate;
+ return filter((row) => {
+ const fn = new Function('row', `return ${expr}`);
+ return fn(row);
+ });
+ }
+ return filter(params.predicate);
+
+ case 'map':
+ // Convert string expression to function
+ if (typeof params.transform === 'string') {
+ // Simple expression parser for basic transformations
+ const expr = params.transform;
+ return map((row) => {
+ const fn = new Function('row', `return ${expr}`);
+ return fn(row);
+ });
+ }
+ return map(params.transform);
+
+ case 'sort':
+ return sort(params.key, params.ascending);
+
+ case 'limit':
+ return limit(params.count);
+
+ case 'log':
+ return log(params.message, params.detailed);
+
+ case 'toDataFrame':
+ return toDataFrame(params);
+
+ case 'schema':
+ return (data) => applySchema(data, params.schema);
+
+ case 'validate':
+ return createValidator(params.schema, params.options);
+
+ default:
+ // Check custom transformer registry
+ if (!transformerRegistry.has(type)) {
+ throw new Error(`Unknown transformer type: ${type}`);
+ }
+
+ const transformerFactory = transformerRegistry.get(type);
+ return transformerFactory(params);
+ }
+}
+
+/**
+ * Create a writer from configuration
+ *
+ * @param {Object} config - Writer configuration
+ * @returns {Function} - Writer function
+ */
+function createWriterFromConfig(config) {
+ const { type, params = {} } = config;
+
+ if (!writerRegistry.has(type)) {
+ throw new Error(`Unknown writer type: ${type}`);
+ }
+
+ const writerFn = writerRegistry.get(type);
+
+ return (data) => writerFn(data, params);
+}
+
+/**
+ * Create a pipeline from configuration
+ *
+ * @param {PipelineConfig} config - Pipeline configuration
+ * @returns {Function} - Pipeline function
+ */
+export function createPipelineFromConfig(config) {
+ // Validate configuration
+ if (!config.reader) {
+ throw new Error('Pipeline configuration must include a reader');
+ }
+
+ // Create reader
+ const reader = createReaderFromConfig(config.reader);
+
+ // Create transformers
+ const transformers = (config.transformers || []).map(
+ createTransformerFromConfig,
+ );
+
+ // Create writer (optional)
+ const writer = config.writer ? createWriterFromConfig(config.writer) : null;
+
+ // Create pipeline
+ return createPipeline(reader, transformers, writer);
+}
+
+/**
+ * Run a pipeline from configuration
+ *
+ * @param {PipelineConfig|string} config - Pipeline configuration or path to config file
+ * @param {Object} [args] - Arguments to pass to the pipeline
+ * @returns {Promise} - Pipeline result
+ */
+export async function runPipeline(config, args = {}) {
+ // If config is a string, load it as a file
+ if (typeof config === 'string') {
+ const { isNodeJs } = await import('./utils/environment.js');
+
+ if (isNodeJs()) {
+ const fs = await import('fs/promises');
+ const path = await import('path');
+ const yaml = await import('js-yaml');
+
+ const configPath = config;
+ const ext = path.extname(configPath).toLowerCase();
+
+ const content = await fs.readFile(configPath, 'utf8');
+
+ if (ext === '.json') {
+ config = JSON.parse(content);
+ } else if (ext === '.yml' || ext === '.yaml') {
+ config = yaml.load(content);
+ } else {
+ throw new Error(`Unsupported config file extension: ${ext}`);
+ }
+ } else {
+ throw new Error(
+ 'Loading config from file is only supported in Node.js environment',
+ );
+ }
+ }
+
+ // Create and run pipeline
+ const pipeline = createPipelineFromConfig(config);
+ return pipeline(args);
+}
+
+// Register built-in readers
+import { readCsv } from './readers/csv.js';
+import { readJson } from './readers/json.js';
+import { readTsv } from './readers/tsv.js';
+import { readExcel } from './readers/excel.js';
+import { readCSVStream } from './readers/stream/csvStream.js';
+import { readJSONLStream } from './readers/stream/jsonStream.js';
+import { ApiClient } from './readers/api/client.js';
+
+registerReader('csv', ({ source, ...options }) => readCsv(source, options));
+registerReader('json', ({ source, ...options }) => readJson(source, options));
+registerReader('tsv', ({ source, ...options }) => readTsv(source, options));
+registerReader('excel', ({ source, ...options }) => readExcel(source, options));
+registerReader('csvStream', ({ source, ...options }) =>
+ readCSVStream(source, options),
+);
+registerReader('jsonlStream', ({ source, ...options }) =>
+ readJSONLStream(source, options),
+);
+registerReader(
+ 'api',
+ ({ url, method = 'GET', baseUrl, headers, ...options }) => {
+ const client = new ApiClient({ baseUrl, defaultHeaders: headers });
+ return method.toUpperCase() === 'GET'
+ ? client.fetchJson(url, options)
+ : client.request(url, { method, ...options }).then((res) => res.json());
+ },
+);
+
+// Register built-in writers
+import { writeArrow } from './writers/arrow.js';
+
+registerWriter('arrow', (data, { destination, ...options }) =>
+ writeArrow(data, destination, options),
+);
+registerWriter('console', (data) => {
+ console.log(data);
+ return data;
+});
+
+/**
+ * Example pipeline configuration:
+ *
+ * ```json
+ * {
+ * "reader": {
+ * "type": "csv",
+ * "params": {
+ * "source": "data.csv",
+ * "header": true
+ * }
+ * },
+ * "transformers": [
+ * {
+ * "type": "filter",
+ * "params": {
+ * "predicate": "row.value > 0"
+ * }
+ * },
+ * {
+ * "type": "map",
+ * "params": {
+ * "transform": "{ ...row, value: row.value * 2 }"
+ * }
+ * },
+ * {
+ * "type": "sort",
+ * "params": {
+ * "key": "timestamp",
+ * "ascending": true
+ * }
+ * },
+ * {
+ * "type": "limit",
+ * "params": {
+ * "count": 1000
+ * }
+ * },
+ * {
+ * "type": "log",
+ * "params": {
+ * "message": "Processed data:",
+ * "detailed": true
+ * }
+ * },
+ * {
+ * "type": "toDataFrame"
+ * }
+ * ],
+ * "writer": {
+ * "type": "arrow",
+ * "params": {
+ * "destination": "output.arrow",
+ * "compression": "zstd"
+ * }
+ * }
+ * }
+ * ```
+ */
diff --git a/src/io/readers/api/client.js b/src/io/readers/api/client.js
new file mode 100644
index 0000000..25eb163
--- /dev/null
+++ b/src/io/readers/api/client.js
@@ -0,0 +1,289 @@
+/**
+ * Enhanced API client with support for schemas, hooks, and transformers
+ * Provides a unified interface for making API requests with advanced features
+ */
+
+import { fetchWithRetry } from './common.js';
+import { DataFrame } from '../../../core/dataframe/DataFrame.js';
+import { applySchema } from '../../transformers/apiSchemas/index.js';
+import {
+ createLoggerHook,
+ createCacheHook,
+ createThrottleHook,
+ createAuthHook,
+} from '../../hooks/index.js';
+
+/**
+ * API Client class for making API requests with advanced features
+ */
+export class ApiClient {
+ /**
+ * Create a new API client
+ *
+ * @param {Object} options - Client options
+ * @param {string} [options.baseUrl] - Base URL for all requests
+ * @param {Object} [options.defaultHeaders] - Default headers for all requests
+ * @param {Object} [options.auth] - Authentication options
+ * @param {Object} [options.cache] - Cache options
+ * @param {Object} [options.throttle] - Throttle options
+ * @param {Object} [options.retry] - Retry options
+ * @param {Object[]} [options.hooks] - Additional middleware hooks
+ */
+ constructor(options = {}) {
+ this.baseUrl = options.baseUrl || '';
+ this.defaultHeaders = options.defaultHeaders || {};
+ this.hooks = [];
+
+ // Add default hooks
+ if (options.auth) {
+ this.hooks.push(createAuthHook(options.auth));
+ }
+
+ if (options.cache !== false) {
+ this.hooks.push(createCacheHook(options.cache || {}));
+ }
+
+ if (options.throttle !== false) {
+ this.hooks.push(createThrottleHook(options.throttle || {}));
+ }
+
+ // Add logger hook last to see the final request
+ if (options.logger !== false) {
+ this.hooks.push(createLoggerHook(options.logger || {}));
+ }
+
+ // Add additional hooks
+ if (Array.isArray(options.hooks)) {
+ this.hooks.push(...options.hooks);
+ }
+
+ this.retryOptions = options.retry || {};
+ }
+
+ /**
+ * Add a hook to the client
+ *
+ * @param {Function} hook - Hook function
+ * @returns {ApiClient} - This client instance for chaining
+ */
+ addHook(hook) {
+ this.hooks.push(hook);
+ return this;
+ }
+
+ /**
+ * Make an API request
+ *
+ * @param {string|Object} urlOrOptions - URL or request options
+ * @param {Object} [options] - Request options
+ * @returns {Promise} - Response object
+ */
+ async request(urlOrOptions, options = {}) {
+ // Handle different argument formats
+ const requestOptions =
+ typeof urlOrOptions === 'string'
+ ? { ...options, url: urlOrOptions }
+ : { ...urlOrOptions };
+
+ // Apply base URL if relative URL is provided
+ if (this.baseUrl && !requestOptions.url.startsWith('http')) {
+ requestOptions.url = `${this.baseUrl}${requestOptions.url}`;
+ }
+
+ // Apply default headers
+ requestOptions.headers = {
+ ...this.defaultHeaders,
+ ...requestOptions.headers,
+ };
+
+ // Create request context
+ const context = {
+ request: requestOptions,
+ client: this,
+ };
+
+ // Apply hooks in sequence
+ const executeRequest = async (ctx) =>
+ fetchWithRetry(ctx.request.url, {
+ method: ctx.request.method,
+ headers: ctx.request.headers,
+ body: ctx.request.body,
+ ...this.retryOptions,
+ });
+
+ // Chain hooks together
+ const chainedRequest = this.hooks.reduceRight(
+ (next, hook) => (ctx) => hook(ctx, next),
+ executeRequest,
+ );
+
+ // Execute the request with all hooks
+ return chainedRequest(context);
+ }
+
+ /**
+ * Make a GET request
+ *
+ * @param {string} url - URL to request
+ * @param {Object} [options] - Request options
+ * @returns {Promise} - Response object
+ */
+ async get(url, options = {}) {
+ return this.request(url, {
+ ...options,
+ method: 'GET',
+ });
+ }
+
+ /**
+ * Make a POST request
+ *
+ * @param {string} url - URL to request
+ * @param {Object|string} data - Data to send
+ * @param {Object} [options] - Request options
+ * @returns {Promise} - Response object
+ */
+ async post(url, data, options = {}) {
+ const isJson = typeof data === 'object';
+
+ return this.request(url, {
+ ...options,
+ method: 'POST',
+ headers: {
+ 'Content-Type': isJson
+ ? 'application/json'
+ : 'application/x-www-form-urlencoded',
+ ...options.headers,
+ },
+ body: isJson ? JSON.stringify(data) : data,
+ });
+ }
+
+ /**
+ * Make a PUT request
+ *
+ * @param {string} url - URL to request
+ * @param {Object|string} data - Data to send
+ * @param {Object} [options] - Request options
+ * @returns {Promise} - Response object
+ */
+ async put(url, data, options = {}) {
+ const isJson = typeof data === 'object';
+
+ return this.request(url, {
+ ...options,
+ method: 'PUT',
+ headers: {
+ 'Content-Type': isJson
+ ? 'application/json'
+ : 'application/x-www-form-urlencoded',
+ ...options.headers,
+ },
+ body: isJson ? JSON.stringify(data) : data,
+ });
+ }
+
+ /**
+ * Make a DELETE request
+ *
+ * @param {string} url - URL to request
+ * @param {Object} [options] - Request options
+ * @returns {Promise} - Response object
+ */
+ async delete(url, options = {}) {
+ return this.request(url, {
+ ...options,
+ method: 'DELETE',
+ });
+ }
+
+ /**
+ * Fetch JSON data from an API
+ *
+ * @param {string} url - URL to request
+ * @param {Object} [options] - Request options
+ * @param {string|Object} [schema] - Schema name or mapping for transformation
+ * @returns {Promise