Skip to content

Commit 2ecb851

Browse files
committed
Merge branch 'RidgeRMSEwithLamda'
2 parents 0d77aa1 + a33bd74 commit 2ecb851

2 files changed

Lines changed: 60 additions & 2 deletions

File tree

Regularization/scratching/Ridge_ass2.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ def polyRidge(degree):
2424
y_train, y_test = y[train_index], y[test_index]
2525

2626
# Create and train the Ridge regression model
27-
ridge = Ridge(alpha=10000) # Adjust alpha for regularization strength
27+
ridge = Ridge(alpha=100000) # Adjust alpha for regularization strength
2828
ridge.fit(X_train, y_train)
2929

3030
# Make predictions on the test and training sets
@@ -50,7 +50,6 @@ def polyRidge(degree):
5050
# Print results and return errors
5151
return E_train, E_out
5252

53-
5453
def generate_sin():
5554
np.random.seed(42)
5655
X = np.random.rand(100, 1) * 10

Regularization/scratching/lamda.py

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
import numpy as np
2+
import pandas as pd
3+
import matplotlib.pyplot as plt
4+
from sklearn.model_selection import train_test_split
5+
from sklearn.linear_model import Ridge
6+
from sklearn.metrics import mean_squared_error
7+
8+
def generate_sin():
9+
np.random.seed(42)
10+
X = np.random.rand(100, 1) * 10
11+
Y = np.sin(X) + np.random.randn(100) / 10
12+
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=0)
13+
return X_train, Y_train, X_test, Y_test
14+
15+
def import_csv(path):
16+
df = pd.read_csv(path)
17+
X = df['Height'].values.reshape(-1, 1) # Reshape for sklearn
18+
Y = df['Weight'].values
19+
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=0)
20+
return X_train, Y_train, X_test, Y_test
21+
22+
def ridge_regression(X_train, Y_train, X_test, Y_test, alpha):
23+
model = Ridge(alpha=alpha)
24+
model.fit(X_train, Y_train)
25+
26+
train_rmse = mean_squared_error(Y_train, model.predict(X_train))
27+
test_rmse = mean_squared_error( Y_test, model.predict(X_test))
28+
29+
return train_rmse, test_rmse
30+
31+
def plot_rmse_vs_alpha(alphas, train_rmse, test_rmse):
32+
33+
plt.figure(figsize=(8, 6))
34+
plt.plot(alphas, train_rmse, label="Train", marker='o', linestyle='-')
35+
plt.plot(alphas, test_rmse, label="Test", marker='s', linestyle='-')
36+
plt.xlabel("Model Complexity (Log scale)")
37+
plt.xscale('log')
38+
plt.ylabel("RMSE")
39+
plt.title("Model Complexity vs. E_train and E_test")
40+
plt.legend()
41+
plt.grid(True)
42+
plt.tight_layout()
43+
plt.show()
44+
45+
X_train, Y_train, X_test, Y_test = import_csv("Regularization/dataset/HeightWeight.csv")
46+
#X_train, Y_train, X_test, Y_test = generate_sin()
47+
48+
alphas = np.arange(1, 100000, 100)
49+
E_train = []
50+
E_test = []
51+
52+
for alpha_ in alphas:
53+
train_rmse, test_rmse = ridge_regression(X_train, Y_train, X_test, Y_test, alpha_)
54+
E_train.append(np.sqrt(train_rmse))
55+
E_test.append(np.sqrt(test_rmse))
56+
57+
plot_rmse_vs_alpha(alphas, E_train, E_test)
58+
59+

0 commit comments

Comments
 (0)