Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 76 additions & 0 deletions Python/README_CUSTOM.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
# Custom FSA Implementation

This directory contains a custom implementation of the FSA (Feature Selection with Annealing) algorithm with enhanced multiclass support.

## Files

- **fsa_custom.py**: Main implementation of FSA_Multiclass with PyTorch
- **demo_custom.py**: Demo script showing usage for binary and multiclass classification
- **fsa.py**: Original FSA implementation from the repository
- **demo.ipynb**: Original demo notebook

## Custom Implementation Features

### FSA_Multiclass Class

The `FSA_Multiclass` class provides an improved implementation with the following features:

- **Multiclass Support**: Handles both binary and multiclass classification problems
- **GPU Acceleration**: Utilizes CUDA when available for faster computation
- **Gradient Clipping**: Prevents gradient explosion during training
- **Annealing Schedule**: Gradually reduces features during optimization
- **Flexible Interface**: Works with both NumPy arrays and PyTorch tensors

### Parameters

- `k` (int): Target number of features to select
- `mu` (float, default=100): Annealing parameter controlling feature reduction speed
- `s` (float, default=0.0001): L2 regularization parameter
- `Niter` (int, default=300): Number of optimization iterations
- `lr` (float, default=0.01): Learning rate for SGD optimizer

## Usage Example

```python
import torch
from fsa_custom import FSA_Multiclass, select_features_fsa

# Prepare data
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
X_tensor = torch.tensor(X_train, dtype=torch.float32).to(device)
y_tensor = torch.tensor(y_train, dtype=torch.float32).to(device)

# Select top k features
fsa = FSA_Multiclass(k=20, mu=100, Niter=300)
fsa.fit(X_tensor, y_tensor, device, num_classes=5)

# Get selected feature indices
selected_features = fsa.idx.cpu().numpy()
print(f"Selected features: {selected_features}")

# Or use the high-level function
selected_feature_names = select_features_fsa(X_train, y_train, k=20, num_classes=5)
```

## Running the Demo

```bash
cd Python
python demo_custom.py
```

The demo will run two examples:
1. Binary classification with 100 features → 20 selected
2. Multiclass (5 classes) with 100 features → 25 selected

## Reference

Original FSA algorithm from:
> A. Barbu, Y. She, L. Ding, G. Gramajo. Feature Selection with Annealing for Computer Vision and Big Data Learning. IEEE PAMI, 39, No. 2, 272–286, 2017

## Requirements

- Python 3.7+
- PyTorch
- NumPy
- scikit-learn (for demo only)
145 changes: 145 additions & 0 deletions Python/demo_custom.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
"""
Demo script for FSA_Multiclass feature selection
Shows usage examples for both binary and multiclass classification
"""

import torch
import numpy as np
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import sys
sys.path.append('.')
from fsa_custom import FSA_Multiclass, select_features_fsa


def demo_binary_classification():
"""Demo for binary classification"""
print("=" * 60)
print("BINARY CLASSIFICATION DEMO")
print("=" * 60)

# Generate synthetic binary dataset
X, y = make_classification(
n_samples=500,
n_features=100,
n_informative=10,
n_redundant=20,
n_classes=2,
random_state=42
)

X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.3, random_state=42
)

print(f"Dataset: {X_train.shape[0]} samples, {X_train.shape[1]} features")

# Feature selection with FSA
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

k = 20 # Select 20 features
print(f"\nSelecting top {k} features with FSA...")

X_tensor = torch.tensor(X_train, dtype=torch.float32).to(device)
y_tensor = torch.tensor(y_train, dtype=torch.float32).to(device)

fsa = FSA_Multiclass(k=k, mu=100, Niter=300)
fsa.fit(X_tensor, y_tensor, device, num_classes=2)

selected_features = fsa.idx.cpu().numpy()
print(f"Selected features: {selected_features}")

# Train classifier on selected features
X_train_selected = X_train[:, selected_features]
X_test_selected = X_test[:, selected_features]

clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train_selected, y_train)

# Evaluate
y_pred = clf.predict(X_test_selected)
acc = accuracy_score(y_test, y_pred)

print(f"\nAccuracy with {k} selected features: {acc:.4f}")

# Compare with all features
clf_all = RandomForestClassifier(n_estimators=100, random_state=42)
clf_all.fit(X_train, y_train)
y_pred_all = clf_all.predict(X_test)
acc_all = accuracy_score(y_test, y_pred_all)

print(f"Accuracy with all {X_train.shape[1]} features: {acc_all:.4f}")
print()


def demo_multiclass_classification():
"""Demo for multiclass classification"""
print("=" * 60)
print("MULTICLASS CLASSIFICATION DEMO")
print("=" * 60)

# Generate synthetic multiclass dataset
X, y = make_classification(
n_samples=500,
n_features=100,
n_informative=15,
n_redundant=25,
n_classes=5, # 5 classes
n_clusters_per_class=1,
random_state=42
)

X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.3, random_state=42
)

print(f"Dataset: {X_train.shape[0]} samples, {X_train.shape[1]} features, 5 classes")

# Feature selection with FSA
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

k = 25 # Select 25 features
print(f"\nSelecting top {k} features with FSA...")

X_tensor = torch.tensor(X_train, dtype=torch.float32).to(device)
y_tensor = torch.tensor(y_train, dtype=torch.float32).to(device)

fsa = FSA_Multiclass(k=k, mu=100, Niter=300)
fsa.fit(X_tensor, y_tensor, device, num_classes=5)

selected_features = fsa.idx.cpu().numpy()
print(f"Selected features: {selected_features}")

# Train classifier on selected features
X_train_selected = X_train[:, selected_features]
X_test_selected = X_test[:, selected_features]

clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train_selected, y_train)

# Evaluate
y_pred = clf.predict(X_test_selected)
acc = accuracy_score(y_test, y_pred)

print(f"\nAccuracy with {k} selected features: {acc:.4f}")

# Compare with all features
clf_all = RandomForestClassifier(n_estimators=100, random_state=42)
clf_all.fit(X_train, y_train)
y_pred_all = clf_all.predict(X_test)
acc_all = accuracy_score(y_test, y_pred_all)

print(f"Accuracy with all {X_train.shape[1]} features: {acc_all:.4f}")
print()


if __name__ == "__main__":
demo_binary_classification()
demo_multiclass_classification()
print("=" * 60)
print("DEMO COMPLETED")
print("=" * 60)
Loading