Skip to content

Kernel Crashes After Stage 2 #53

@Yeoonsu

Description

@Yeoonsu

Hello,

The kernel keeps crashing after stage 2. Could you please advise on how to resolve this issue?
I use 16 GB RAM, i7 cpu labtop.

Thank you!

import lightgbm as lgb
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error
from datetime import timedelta
from openfe import OpenFE, transform

# 테스트 기간 리스트
test_dates = [
    pd.to_datetime('2023-01-05'),
    pd.to_datetime('2023-02-08'),
    pd.to_datetime('2023-03-07'),
    pd.to_datetime('2023-04-11'),
    pd.to_datetime('2023-05-07'),
    pd.to_datetime('2023-06-08'),
    pd.to_datetime('2023-07-09'),
    pd.to_datetime('2023-08-08'),
    pd.to_datetime('2023-09-06'),
    pd.to_datetime('2024-01-10'),
    pd.to_datetime('2024-02-11'),
    pd.to_datetime('2024-03-08'),
    pd.to_datetime('2024-04-06'),
]

# 결과를 저장할 리스트 초기화
results = []

# 각 테스트 기간에 대해 train, test 데이터를 생성하고 LightGBM 모델로 예측
for test_start_date in test_dates:
    test_end_date = test_start_date + timedelta(days=90) - timedelta(seconds=1)
    
    train = df[df['tm'] <= test_start_date - timedelta(seconds=1)]
    test = df[(df['tm'] >= test_start_date) & (df['tm'] <= test_end_date)]
    
    # Train 데이터가 비어 있는지 확인
    if train.empty or test.empty:
        print(f"Skipping period {test_start_date} to {test_end_date} due to insufficient data.")
        continue
    
    # Features (X)와 Target (y) 설정
    x_train = train[var]
    y_train = train['demand']
    x_test = test[var]
    y_test = test['demand']

    # OpenFE로 피처 엔지니어링 수행
    ofe = OpenFE()
    features = ofe.fit(data=x_train, label=y_train, n_jobs=4)  # 새로운 피처 생성
    x_train, x_test = transform(x_train, x_test, features, n_jobs=4)  # 생성된 피처로 데이터 변환
    
    # LightGBM 데이터셋 생성
    train_data = lgb.Dataset(x_train, label=y_train)

    # LightGBM 모델 파라미터 설정
    params = {
        'objective': 'regression',
        'metric': 'rmse',
        'boosting_type': 'gbdt',
        'learning_rate': 0.05,
        'num_leaves': 31,
        'n_estimators': 200,
        'feature_fraction': 0.8,
        'bagging_fraction': 0.8,
        'bagging_freq': 10,
        'verbose': 0
    }
    
    # 모델 학습
    model_lgb = lgb.train(params, train_data, num_boost_round=100)
    
    # 테스트 데이터에 대한 예측
    preds_lgb = model_lgb.predict(x_test)
    
    # RMSE 계산
    rmse = np.sqrt(mean_squared_error(y_test, preds_lgb))
    
    # 결과 저장
    results.append({
        'Test Start Date': test_start_date,
        'Test End Date': test_end_date,
        'RMSE': rmse
    })
    
    print(f"Period {test_start_date} to {test_end_date} - RMSE: {rmse}")

# 결과를 데이터프레임으로 변환
results_df = pd.DataFrame(results)
results_df

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions