|
| 1 | +import numpy as np |
| 2 | +import pandas as pd |
| 3 | +import matplotlib.pyplot as plt |
| 4 | +from sklearn.model_selection import train_test_split |
| 5 | +from sklearn.linear_model import Ridge |
| 6 | +from sklearn.metrics import mean_squared_error |
| 7 | + |
| 8 | +def generate_sin(): |
| 9 | + np.random.seed(42) |
| 10 | + X = np.random.rand(100, 1) * 10 |
| 11 | + Y = np.sin(X) + np.random.randn(100) / 10 |
| 12 | + X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=0) |
| 13 | + return X_train, Y_train, X_test, Y_test |
| 14 | + |
| 15 | +def import_csv(path): |
| 16 | + df = pd.read_csv(path) |
| 17 | + X = df['Height'].values.reshape(-1, 1) # Reshape for sklearn |
| 18 | + Y = df['Weight'].values |
| 19 | + X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=0) |
| 20 | + return X_train, Y_train, X_test, Y_test |
| 21 | + |
| 22 | +def ridge_regression(X_train, Y_train, X_test, Y_test, alpha): |
| 23 | + model = Ridge(alpha=alpha) |
| 24 | + model.fit(X_train, Y_train) |
| 25 | + |
| 26 | + train_rmse = mean_squared_error(Y_train, model.predict(X_train)) |
| 27 | + test_rmse = mean_squared_error( Y_test, model.predict(X_test)) |
| 28 | + |
| 29 | + return train_rmse, test_rmse |
| 30 | + |
| 31 | +def plot_rmse_vs_alpha(alphas, train_rmse, test_rmse): |
| 32 | + |
| 33 | + plt.figure(figsize=(8, 6)) |
| 34 | + plt.plot(alphas, train_rmse, label="Train", marker='o', linestyle='-') |
| 35 | + plt.plot(alphas, test_rmse, label="Test", marker='s', linestyle='-') |
| 36 | + plt.xlabel("Model Complexity (Log scale)") |
| 37 | + plt.xscale('log') |
| 38 | + plt.ylabel("RMSE") |
| 39 | + plt.title("Model Complexity vs. E_train and E_test") |
| 40 | + plt.legend() |
| 41 | + plt.grid(True) |
| 42 | + plt.tight_layout() |
| 43 | + plt.show() |
| 44 | + |
| 45 | +X_train, Y_train, X_test, Y_test = import_csv("Regularization/dataset/HeightWeight.csv") |
| 46 | +#X_train, Y_train, X_test, Y_test = generate_sin() |
| 47 | + |
| 48 | +alphas = np.arange(1, 100000, 100) |
| 49 | +E_train = [] |
| 50 | +E_test = [] |
| 51 | + |
| 52 | +for alpha_ in alphas: |
| 53 | + train_rmse, test_rmse = ridge_regression(X_train, Y_train, X_test, Y_test, alpha_) |
| 54 | + E_train.append(np.sqrt(train_rmse)) |
| 55 | + E_test.append(np.sqrt(test_rmse)) |
| 56 | + |
| 57 | +plot_rmse_vs_alpha(alphas, E_train, E_test) |
| 58 | + |
| 59 | + |
0 commit comments