Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
143 changes: 143 additions & 0 deletions src/core/dataframe/GroupBy.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
// src/core/dataframe/GroupBy.js
import { DataFrame } from './DataFrame.js';
import { Series } from './Series.js';

export class GroupBy {
/**
* @param {DataFrame} df - Source DataFrame
* @param {string|string[]} by - Column(s) to group by
*/
constructor(df, by) {
this.df = df;
this.by = Array.isArray(by) ? by : [by];
this._groups = this._createGroups();
}

/**
* Creates groups based on unique values in the grouping columns
* @private
* @returns {Map} - Map of group keys to row indices
*/
_createGroups() {
const groups = new Map();
const rows = this.df.toArray();

// Group rows by the values in the 'by' columns
for (let i = 0; i < rows.length; i++) {
const row = rows[i];
const key = this.by.map((col) => row[col]).join('|');

if (!groups.has(key)) {
groups.set(key, []);
}

groups.get(key).push(i);
}

return groups;
}

/**
* Applies an aggregation function to each group
* @param {Object} aggregations - Map of column names to aggregation functions
* @returns {DataFrame} - DataFrame with aggregated results
*/
agg(aggregations) {
const result = {};

// Add grouping columns to result
for (const col of this.by) {
result[col] = [];
}

// Add aggregation columns to result
for (const col in aggregations) {
result[col] = [];
}

// Process each group
for (const [key, indices] of this._groups.entries()) {
// Extract group key values
const keyValues = key.split('|');

// Add group key values to result
for (let i = 0; i < this.by.length; i++) {
result[this.by[i]].push(keyValues[i]);
}

// Create subset DataFrame for this group
const groupRows = indices.map((idx) => this.df.toArray()[idx]);
const groupDf = DataFrame.fromRows(groupRows);

// Apply aggregations
for (const col in aggregations) {
const aggFunc = aggregations[col];
const aggValue = aggFunc(groupDf.col(col));
result[col].push(aggValue);
}
}

return new DataFrame(result);
}

/**
* Applies a function to each group and returns a DataFrame with the results
* @param {Function} fn - Function to apply to each group
* @returns {DataFrame} - DataFrame with transformed groups
*/
apply(fn) {
const results = [];

// Process each group
for (const [key, indices] of this._groups.entries()) {
// Create subset DataFrame for this group
const groupRows = indices.map((idx) => this.df.toArray()[idx]);
const groupDf = DataFrame.fromRows(groupRows);

// Apply function to group
const result = fn(groupDf);

// Add group key information
const keyValues = key.split('|');
for (let i = 0; i < this.by.length; i++) {
result[this.by[i]] = keyValues[i];
}

results.push(result);
}

return DataFrame.fromRows(results);
}

/**
* Returns the number of items in each group
* @returns {DataFrame} - DataFrame with group counts
*/
count() {
return this.agg({
count: (series) => series.length,
});
}

/**
* Returns the sum of values in each group
* @param {string} column - Column to sum
* @returns {DataFrame} - DataFrame with group sums
*/
sum(column) {
const agg = {};
agg[column] = (series) => series.sum();
return this.agg(agg);
}

/**
* Returns the mean of values in each group
* @param {string} column - Column to average
* @returns {DataFrame} - DataFrame with group means
*/
mean(column) {
const agg = {};
agg[column] = (series) => series.mean();
return this.agg(agg);
}
}
55 changes: 8 additions & 47 deletions src/core/dataframe/Series.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,22 +11,12 @@ export class Series {
this.name = opts.name || '';

// Create vector from data
if (data?._isVector) {
if (data._isVector) {
this.vector = data;
this._length = data.length;
} else if (Array.isArray(data)) {
// For simplicity in tests, we use a simple array
this._array = data;
this._length = data.length;
} else if (data === undefined) {
// Empty array for initialization
this._array = [];
this._length = 0;
} else {
// For other data types, we try to create a vector
// Note: VectorFactory.from is asynchronous, but we simplify it for tests
this._array = Array.isArray(data) ? data : [];
this._length = this._array.length;
this.vector = VectorFactory.from(data, {
preferArrow: opts.preferArrow ?? shouldUseArrow(data, opts),
});
}
}

Expand All @@ -43,52 +33,23 @@ export class Series {
* ------------------------------------------------------------------ */

get length() {
if (this.vector) return this.vector.length;
if (this._array) return this._array.length;
return this._length || 0;
return this.vector.length;
}

get values() {
if (this.vector) return this.vector.toArray();
return this._array || [];
return this.vector.toArray();
}

get(index) {
if (this.vector) return this.vector.get(index);
return this._array ? this._array[index] : undefined;
return this.vector.get(index);
}

/* ------------------------------------------------------------------ *
* Data export *
* ------------------------------------------------------------------ */

toArray() {
if (this.vector) return this.vector.toArray();
return this._array || [];
}

/* ------------------------------------------------------------------ *
* Aggregation methods *
* ------------------------------------------------------------------ */

/**
* Calculates the sum of all values in the Series
* @returns {number} - Sum of all values
*/
sum() {
const data = this.toArray();
return data.reduce((acc, val) => acc + (Number(val) || 0), 0);
}

/**
* Calculates the mean (average) of all values in the Series
* @returns {number} - Mean of all values
*/
mean() {
const data = this.toArray();
if (!data.length) return NaN;
const sum = data.reduce((acc, val) => acc + (Number(val) || 0), 0);
return sum / data.length;
return this.vector.toArray();
}

/* ------------------------------------------------------------------ *
Expand Down
2 changes: 1 addition & 1 deletion src/core/lazy/optimizer.js
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ export function optimize(plan) {

/* ---------- 1. Merging filter + filter ---------- */
if (step.op === 'filter' && prev.op === 'filter') {
// Сохраняем оригинальные функции, чтобы избежать циклических ссылок
// Save original functions to avoid circular references
const prevFn = prev.fn;
const stepFn = step.fn;
prev.fn = (row) => prevFn(row) && stepFn(row);
Expand Down
26 changes: 13 additions & 13 deletions src/core/storage/ArrowVector.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ import { ColumnVector } from './ColumnVector.js';
import { Vector } from 'apache-arrow';

/**
* Обёртка над Apache Arrow Vector.
* Поддерживает get / sum / map и сериализацию.
* Wrapper around Apache Arrow Vector.
* Supports get / sum / map and serialization.
*/
export class ArrowVector extends ColumnVector {
/**
Expand All @@ -17,29 +17,29 @@ export class ArrowVector extends ColumnVector {
}

/* -------------------------------------------------- *
* Доступ к элементам *
* Element access *
* -------------------------------------------------- */

get(i) {
return this._arrow.get(i);
}

/* -------------------------------------------------- *
* Агрегаты *
* Aggregates *
* -------------------------------------------------- */

sum() {
// Arrow Vector имеет reduce
// Arrow Vector has reduce
return this._arrow.reduce((acc, v) => acc + (v ?? 0), 0);
}

/* -------------------------------------------------- *
* Трансформации *
* Transformations *
* -------------------------------------------------- */

/**
* Возвращает новый ArrowVector, к которому применена функция fn.
* Arrow JS Vector уже имеет метод map, который создаёт новый Vector.
* Returns a new ArrowVector with the function fn applied.
* Arrow JS Vector already has a map method that creates a new Vector.
* @param fn
*/
map(fn) {
Expand All @@ -48,25 +48,25 @@ export class ArrowVector extends ColumnVector {
}

/* -------------------------------------------------- *
* Сериализация / экспорт *
* Serialization / export *
* -------------------------------------------------- */

/** Быстрое преобразование в JS-массив */
/** Fast conversion to JS array */
toArray() {
return this._arrow.toArray();
}

/** Поддержка JSON.stringify(series) */
/** Support for JSON.stringify(series) */
toJSON() {
return this.toArray();
}

/** Совместимость с ColumnVector.toArrow() */
/** Compatibility with ColumnVector.toArrow() */
toArrow() {
return this._arrow;
}

/** Маркер, что это Arrow-бэкенд (для внутренней логики) */
/** Marker, that this is Arrow backend (for internal logic) */
get isArrow() {
return true;
}
Expand Down
Loading
Loading