Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 87 additions & 0 deletions Solution.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@


from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report
import pandas as pd

def load_data():
iris = load_iris()
X = iris.data
y = iris.target

df = pd.DataFrame(X, columns=iris.feature_names)
df["species"] = y

print("First 5 rows of data:")
print(df.head())
print("\nFeature names:", iris.feature_names)
print("Target names:", iris.target_names)
print("\nShape of data:", df.shape)

return df, iris

def train_test_split_data(df):
X = df.drop("species", axis=1)
y = df["species"]

X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=42
)

print("\nTrain shape:", X_train.shape, "Test shape:", X_test.shape)
return X_train, X_test, y_train, y_test

def train_logistic_regression(X_train, y_train):
log_reg = LogisticRegression(max_iter=200)
log_reg.fit(X_train, y_train)
print("\nLogistic Regression training complete.")
return log_reg

def evaluate_model(model, X_test, y_test, model_name="Model"):
y_pred = model.predict(X_test)
acc = accuracy_score(y_test, y_pred)

print(f"\n=== {model_name} Evaluation ===")
print("Accuracy:", acc)
print("\nClassification report:")
print(classification_report(y_test, y_pred))

print("Sample predictions vs true:")
for i in range(min(5, len(y_test))):
print(f"Predicted: {y_pred[i]}, True: {y_test.iloc[i]}")
return acc

def train_knn(X_train, y_train, n_neighbors=5):
knn = KNeighborsClassifier(n_neighbors=n_neighbors)
knn.fit(X_train, y_train)
print(f"\nKNN (k={n_neighbors}) training complete.")
return knn

def main():
df, iris = load_data()
X_train, X_test, y_train, y_test = train_test_split_data(df)

#Logistic Regression
log_reg_model = train_logistic_regression(X_train, y_train)
log_reg_acc = evaluate_model(log_reg_model, X_test, y_test, "Logistic Regression")

#KNN
knn_model = train_knn(X_train, y_train, n_neighbors=5)
knn_acc = evaluate_model(knn_model, X_test, y_test, "KNN (k=5)")

print("\nSummary:")
print(f"Logistic Regression accuracy: {log_reg_acc:.4f}")
print(f"KNN (k=5) accuracy: {knn_acc:.4f}")

if knn_acc > log_reg_acc:
print("KNN performed better on this test split.")
elif knn_acc < log_reg_acc:
print("Logistic Regression performed better on this test split.")
else:
print("Both models performed equally on this test split.")

if __name__ == "__main__":
main()
44 changes: 44 additions & 0 deletions excercise.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# ML Learning Project – Iris Classification (Exercise)

## 1. Overview

In this exercise, you will build a **Machine Learning model** to classify iris flowers into three species using the classic **Iris dataset**.

You will:
- Load and explore data
- Split into train/test
- Train a model
- Evaluate performance
- Try simple improvements

---

## 2. Objective

> Build a classifier that predicts the iris flower species based on petal and sepal measurements.

Target variable:
- `species` (Setosa, Versicolor, Virginica)

Features:
- `sepal length`
- `sepal width`
- `petal length`
- `petal width`

We will use the built-in Iris dataset from **scikit-learn** (no external CSV needed).

---

## 3. Requirements

### Tools / Libraries
- Python 3.x
- `scikit-learn`
- `pandas`
- `numpy`
- `matplotlib` (optional, for plots)

Install (if needed):

```