-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathexercise_1_mlp_buggy.py
More file actions
121 lines (108 loc) · 3.51 KB
/
exercise_1_mlp_buggy.py
File metadata and controls
121 lines (108 loc) · 3.51 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
#%% Learning point: understand basic PyTorch concepts and how to train a simple MLP model
"""
Task: fix this buggy code in the code
"""
import torch
from torch import nn
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from tqdm import tqdm
from sklearn.metrics import classification_report
import numpy as np
torch.manual_seed(42)
#%%
class NeuralNetwork(nn.Module):
def __init__(self):
super(NeuralNetwork, self).__init__()
self.flatten = nn.Flatten()
self.linear_relu_stack = nn.Sequential(
nn.Linear(28 * 28, 512),
nn.ReLU(),
nn.Linear(512, 512),
nn.ReLU(),
nn.Linear(512, 10),
)
self.softmax = nn.LogSoftmax(dim=1)
def forward(self, x):
# breakpoint()
# Flatten the image for the input layer
x = self.flatten(x)
# Apply the linear layers of MLP with ReLU activation
logits = self.linear_relu_stack(x)
# Apply the softmax function to get probabilities
probabilities = self.softmax(logits)
return probabilities
#%%
# Load MNIST dataset
# Import to rescale the image to [-1, 1] to match activation functions
transform = transforms.Compose(
[transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))]
)
# Load the MNIST dataset
train_data = datasets.MNIST(
root="./data", train=True, download=True, transform=transform
)
test_data = datasets.MNIST(
root="./data", train=False, download=True, transform=transform
)
#%%
# Prepare the dataloaders, shuffle the data, and set the batch size
# Batches are used to update the model weights because we can't pass the entire dataset at once
train_dataloader = DataLoader(train_data, batch_size=64, shuffle=True)
test_dataloader = DataLoader(test_data, batch_size=64, shuffle=True)
#%%
# Initialize the model
model = NeuralNetwork()
print(model)
#%%
# MNIST dataset
test_tensor = train_data[0]
print("Image shape:", test_tensor[0].shape, "Class:", test_tensor[1])
#%%
print("Test model forward pass")
assert model(test_tensor[0]).shape == (1, 10), "Model output shape is incorrect"
#%%
learning_rate = 1e-3
epochs = 25
loss_fn = nn.CrossEntropyLoss()
# loss_fn = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
#%% Training
for epoch in tqdm(range(epochs)):
size = len(train_dataloader.dataset)
for batch, (X, y) in enumerate(train_dataloader):
pred = model(X)
loss = loss_fn(pred, pred)
# Clear old gradients
optimizer.zero_grad()
# Compute derivatives
loss.backward()
# Update the weights of the model using the optimizer
optimizer.step()
if batch % 100 == 0:
loss, current = loss.item(), batch * len(X)
print(f"Epoch: {epoch+1}, Loss: {loss:.6f}, Progress: [{current}/{size}]")
#%%
# Test the model
model.eval()
y_pred = []
y_true = []
# Disable gradient computation for evaluation to save memory and computations
with torch.no_grad():
all_preds = []
all_labels = []
for X, y in test_dataloader:
# breakpoint()
preds = model(X)
all_preds.extend(preds.argmax(1).numpy()) # Get the predicted classes
all_labels.extend(y.numpy())
#%%
# Convert list to NumPy arrays for Scikit-Learn
all_preds = np.array(all_preds)
all_labels = np.array(all_labels)
#%%
# Classification report
report = classification_report(
all_labels, all_preds, target_names=[str(i) for i in range(10)]
)
print("Classification Report:\n", report)