11import numpy as np
22import pandas as pd
33import matplotlib .pyplot as plt
4- from sklearn .model_selection import train_test_split
54from sklearn .preprocessing import PolynomialFeatures
65from sklearn .linear_model import Ridge
76from sklearn .metrics import mean_squared_error
7+ from sklearn .model_selection import KFold
88
99def polyRidge (degree ):
1010 # Create polynomial features
1111 poly = PolynomialFeatures (degree ) # Adjust degree as needed
1212 X_poly = poly .fit_transform (X )
13- # Split data into training and testing sets
14- X_train , X_test , y_train , y_test = train_test_split (X_poly , y , test_size = 0.2 , random_state = 42 )
15- # Create and train the Ridge regression model
16- ridge = Ridge (alpha = 100 ) # Adjust alpha for regularization strength
17- ridge .fit (X_train , y_train )
18-
19- # Make predictions on the test and training sets
20- y_pred_train = ridge .predict (X_train )
21- y_pred_test = ridge .predict (X_test )
22-
23- # Evaluate the model
24- # Calculate E-train and E-test (RMSE)
25- mse_train = mean_squared_error (y_train , y_pred_train )
26- mse_test = mean_squared_error (y_test , y_pred_test )
27- rmse_train = np .sqrt (mse_train )
28- rmse_test = np .sqrt (mse_test )
13+
14+ # Split data for KFold cross-validation
15+ kf = KFold (n_splits = 10 , shuffle = True , random_state = 42 ) # Adjust n_splits
16+
17+ # Initialize variables
18+ mse_train_list = []
19+ mse_test_list = []
20+
21+ # Perform KFold cross-validation
22+ for train_index , test_index in kf .split (X_poly ):
23+ X_train , X_test = X_poly [train_index ], X_poly [test_index ]
24+ y_train , y_test = y [train_index ], y [test_index ]
25+
26+ # Create and train the Ridge regression model
27+ ridge = Ridge (alpha = 100000 ) # Adjust alpha for regularization strength
28+ ridge .fit (X_train , y_train )
29+
30+ # Make predictions on the test and training sets
31+ y_pred_train = ridge .predict (X_train )
32+ y_pred_test = ridge .predict (X_test )
33+
34+ # Evaluate the model
35+ mse_train = mean_squared_error (y_train , y_pred_train )
36+ mse_test = mean_squared_error (y_test , y_pred_test )
37+
38+ mse_train_list .append (mse_train )
39+ mse_test_list .append (mse_test )
40+
41+ # Estimate bias (average training error)
42+ E_train = np .sqrt (np .mean (mse_train_list ))
43+
44+ # Estimate variance (average difference between training and test error)
45+ E_var = np .sqrt (np .mean (np .square (np .array (mse_test_list ) - np .mean (mse_train_list ))))
46+
47+ # Estimated E_out (sum of bias and variance)
48+ E_out = E_train + E_var
2949
3050 # Print results and return errors
31- print (f"Degree={ degree } : E_train: { rmse_train :.4f} , E_test: { rmse_test :.4f} " )
32- return rmse_train , rmse_test
51+ return E_train , E_out
3352
3453def generate_sin ():
3554 np .random .seed (42 )
@@ -49,19 +68,19 @@ def import_csv(path):
4968degrees = np .arange (1 , 11 ) # Adjust the range as needed
5069
5170# Initialize lists to store errors
52- E_train_list = []
53- E_test_list = []
71+ E_out_list = []
72+ E_in_List = []
5473
5574# Call the function for each degree and store errors
5675for deg in degrees :
57- rmse_train , rmse_test = polyRidge (deg )
58- E_train_list .append (rmse_train )
59- E_test_list .append (rmse_test )
76+ E_out , E_in = polyRidge (deg )
77+ E_out_list .append (E_out )
78+ E_in_List .append (E_in )
6079
6180# Plot the results
6281plt .figure (figsize = (8 , 6 ))
63- plt .plot (degrees , E_train_list , label = "E_train " , marker = 'o' , linestyle = '-' )
64- plt .plot (degrees , E_test_list , label = "E_test " , marker = 's' , linestyle = '-' )
82+ plt .plot (degrees , E_out_list , label = "E_Out " , marker = 'o' , linestyle = '-' )
83+ plt .plot (degrees , E_in_List , label = "E_In " , marker = 's' , linestyle = '-' )
6584plt .xlabel ("Model Complexity (Degree)" )
6685plt .ylabel ("RMSE" )
6786plt .title ("Model Complexity vs. E_train and E_test" )
0 commit comments