Skip to content

Commit a9c3f4e

Browse files
authored
Merge pull request #17 from SiriwatHuntra/main
Fix, last version (ver.2)
2 parents c93a6a8 + 2ecb851 commit a9c3f4e

2 files changed

Lines changed: 104 additions & 26 deletions

File tree

Regularization/scratching/Ridge_ass2.py

Lines changed: 45 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,35 +1,54 @@
11
import numpy as np
22
import pandas as pd
33
import matplotlib.pyplot as plt
4-
from sklearn.model_selection import train_test_split
54
from sklearn.preprocessing import PolynomialFeatures
65
from sklearn.linear_model import Ridge
76
from sklearn.metrics import mean_squared_error
7+
from sklearn.model_selection import KFold
88

99
def polyRidge(degree):
1010
# Create polynomial features
1111
poly = PolynomialFeatures(degree) # Adjust degree as needed
1212
X_poly = poly.fit_transform(X)
13-
# Split data into training and testing sets
14-
X_train, X_test, y_train, y_test = train_test_split(X_poly, y, test_size=0.2, random_state=42)
15-
# Create and train the Ridge regression model
16-
ridge = Ridge(alpha=100) # Adjust alpha for regularization strength
17-
ridge.fit(X_train, y_train)
18-
19-
# Make predictions on the test and training sets
20-
y_pred_train = ridge.predict(X_train)
21-
y_pred_test = ridge.predict(X_test)
22-
23-
# Evaluate the model
24-
# Calculate E-train and E-test (RMSE)
25-
mse_train = mean_squared_error(y_train, y_pred_train)
26-
mse_test = mean_squared_error(y_test, y_pred_test)
27-
rmse_train = np.sqrt(mse_train)
28-
rmse_test = np.sqrt(mse_test)
13+
14+
# Split data for KFold cross-validation
15+
kf = KFold(n_splits=10, shuffle=True, random_state=42) # Adjust n_splits
16+
17+
# Initialize variables
18+
mse_train_list = []
19+
mse_test_list = []
20+
21+
# Perform KFold cross-validation
22+
for train_index, test_index in kf.split(X_poly):
23+
X_train, X_test = X_poly[train_index], X_poly[test_index]
24+
y_train, y_test = y[train_index], y[test_index]
25+
26+
# Create and train the Ridge regression model
27+
ridge = Ridge(alpha=100000) # Adjust alpha for regularization strength
28+
ridge.fit(X_train, y_train)
29+
30+
# Make predictions on the test and training sets
31+
y_pred_train = ridge.predict(X_train)
32+
y_pred_test = ridge.predict(X_test)
33+
34+
# Evaluate the model
35+
mse_train = mean_squared_error(y_train, y_pred_train)
36+
mse_test = mean_squared_error(y_test, y_pred_test)
37+
38+
mse_train_list.append(mse_train)
39+
mse_test_list.append(mse_test)
40+
41+
# Estimate bias (average training error)
42+
E_train = np.sqrt(np.mean(mse_train_list))
43+
44+
# Estimate variance (average difference between training and test error)
45+
E_var = np.sqrt(np.mean(np.square(np.array(mse_test_list) - np.mean(mse_train_list))))
46+
47+
# Estimated E_out (sum of bias and variance)
48+
E_out = E_train + E_var
2949

3050
# Print results and return errors
31-
print(f"Degree={degree}: E_train: {rmse_train:.4f}, E_test: {rmse_test:.4f}")
32-
return rmse_train, rmse_test
51+
return E_train, E_out
3352

3453
def generate_sin():
3554
np.random.seed(42)
@@ -49,19 +68,19 @@ def import_csv(path):
4968
degrees = np.arange(1, 11) # Adjust the range as needed
5069

5170
# Initialize lists to store errors
52-
E_train_list = []
53-
E_test_list = []
71+
E_out_list = []
72+
E_in_List = []
5473

5574
# Call the function for each degree and store errors
5675
for deg in degrees:
57-
rmse_train, rmse_test = polyRidge(deg)
58-
E_train_list.append(rmse_train)
59-
E_test_list.append(rmse_test)
76+
E_out, E_in = polyRidge(deg)
77+
E_out_list.append(E_out)
78+
E_in_List.append(E_in)
6079

6180
# Plot the results
6281
plt.figure(figsize=(8, 6))
63-
plt.plot(degrees, E_train_list, label="E_train", marker='o', linestyle='-')
64-
plt.plot(degrees, E_test_list, label="E_test", marker='s', linestyle='-')
82+
plt.plot(degrees, E_out_list, label="E_Out", marker='o', linestyle='-')
83+
plt.plot(degrees, E_in_List, label="E_In", marker='s', linestyle='-')
6584
plt.xlabel("Model Complexity (Degree)")
6685
plt.ylabel("RMSE")
6786
plt.title("Model Complexity vs. E_train and E_test")

Regularization/scratching/lamda.py

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
import numpy as np
2+
import pandas as pd
3+
import matplotlib.pyplot as plt
4+
from sklearn.model_selection import train_test_split
5+
from sklearn.linear_model import Ridge
6+
from sklearn.metrics import mean_squared_error
7+
8+
def generate_sin():
9+
np.random.seed(42)
10+
X = np.random.rand(100, 1) * 10
11+
Y = np.sin(X) + np.random.randn(100) / 10
12+
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=0)
13+
return X_train, Y_train, X_test, Y_test
14+
15+
def import_csv(path):
16+
df = pd.read_csv(path)
17+
X = df['Height'].values.reshape(-1, 1) # Reshape for sklearn
18+
Y = df['Weight'].values
19+
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=0)
20+
return X_train, Y_train, X_test, Y_test
21+
22+
def ridge_regression(X_train, Y_train, X_test, Y_test, alpha):
23+
model = Ridge(alpha=alpha)
24+
model.fit(X_train, Y_train)
25+
26+
train_rmse = mean_squared_error(Y_train, model.predict(X_train))
27+
test_rmse = mean_squared_error( Y_test, model.predict(X_test))
28+
29+
return train_rmse, test_rmse
30+
31+
def plot_rmse_vs_alpha(alphas, train_rmse, test_rmse):
32+
33+
plt.figure(figsize=(8, 6))
34+
plt.plot(alphas, train_rmse, label="Train", marker='o', linestyle='-')
35+
plt.plot(alphas, test_rmse, label="Test", marker='s', linestyle='-')
36+
plt.xlabel("Model Complexity (Log scale)")
37+
plt.xscale('log')
38+
plt.ylabel("RMSE")
39+
plt.title("Model Complexity vs. E_train and E_test")
40+
plt.legend()
41+
plt.grid(True)
42+
plt.tight_layout()
43+
plt.show()
44+
45+
X_train, Y_train, X_test, Y_test = import_csv("Regularization/dataset/HeightWeight.csv")
46+
#X_train, Y_train, X_test, Y_test = generate_sin()
47+
48+
alphas = np.arange(1, 100000, 100)
49+
E_train = []
50+
E_test = []
51+
52+
for alpha_ in alphas:
53+
train_rmse, test_rmse = ridge_regression(X_train, Y_train, X_test, Y_test, alpha_)
54+
E_train.append(np.sqrt(train_rmse))
55+
E_test.append(np.sqrt(test_rmse))
56+
57+
plot_rmse_vs_alpha(alphas, E_train, E_test)
58+
59+

0 commit comments

Comments
 (0)