Coursera website: Deep Neural Networks with PyTorch
Course certificate
Week 1 - Tensor and Datasets
Learning Objectives
- Tensors 1D
- Two-Dimensional Tensors
- Data Set
- Differentiation in PyTorch
notebook
Tensors 1D
The basics
#initialize
import torch
=torch.tensor([7,4,3,2,6])
a
#dtype, type()
a.dtypetype()
a.
#convert with type
=a.type(torch.FloatTensor)
a
#size, ndimension
a.size()
a.ndimension()
#convert to 2D
=a.view(-1, 1)
a_2D
#from_numpy, to numpy
import numpy as np
= np.array([0.0, 1.0, 2.0, 3.0, 4.0])
numpy_array = torch.from_numpy(numpy_array)
torch_tensor = torch_tensor.numpy()
back_to_numpy
#from pandas
import pandas as pd
= pd.Series([0.1, 2, 0.3, 10.1])
pandas_series = torch.from_numpy(pandas_series.values)
pandas_to_torch
#to list
= torch.tensor([0, 1, 2, 3])
this_tensor = this_tensor.tolist()
torch_to_list
#item
= torch.tensor([5, 2, 6, 1])
new_tensor 0].item()
new_tensor[
#indexing and slicing
3:5]=torch.tensor([300.0, 4.0]) c[
basic operations
#hadamard product
= u*v
z
#dot product, (produit scalaire)
= torch.dot(u, v) result
universal functions, mean, max, mathematical functions, plot with linspace
#mean
a.mean()
#max
max()
b.
#plot y=sin(x)
import matplotlib.pyplot as plt
%matplotlib inline
= torch.linspace(0, 2 * np.pi, 100)
x = torch.sin(x)
y plt.plot(x.numpy(), y.numpy())
Ungraded lab
Tensors 2D
Tensor creation in 2D
= [ [11, 12, 13], [21, 22, 23], [31, 32, 33] ]
a = torch.tensor(a)
A
A.ndimension()>> 2
A.shape>> torch.Size([3, 3])
A.size()>> torch.Size([3, 3])
#number of elements
A.numel()>> 9
Indexing and slicing in 2D
0, 0:2]
A[>> tensor([11, 12])
1:3, 2]
A[>> tensor([23, 33])
Basic operations in 2D: hadamard product, matrix multiplication
= torch.tensor([[1,0], [0,1]])
X = torch.tensor([[2,1], [1,2]])
Y
#hadamard product
= X*Y
Z
Z>> tensor([[2, 0],
0, 2]])
[
= torch.tensor([ [0, 1, 1], [1, 0, 1]])
A = torch.tensor([ [1, 1], [1, 1], [-1, 1]])
B
#matrix multiplication
= torch.mm(A, B)
C
C>> tensor([[0, 2],
0, 2]]) [
Ungraded lab
Derivatives in Pytorch
Derivatives
using \(y(x)=x^2\)
= torch.tensor(2., requires_grad=True)
x = x ** 2
y
#calculate derivative df/dx
y.backward()#evaluate at x : df/dx(x)
x.grad>> tensor(4.)
using \(z(x)=x^2+2x+1\)
= torch.tensor(2., requires_grad=True)
x = x**2 + 2*x + 1
z
z.backward()
x.grad>> tensor(6.)
Note: in my version of pytorch (1.7.1), I cannot use torch.int dtypes.
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-92-979d0f10c1e7> in <module>
----> 3 x = torch.tensor(2, requires_grad=True)
4 z = x**2 + 2*x + 1
5 z.backward()
RuntimeError: Only Tensors of floating point and complex dtype can require gradients
Partial derivatives
using \(f(u, v)=uv+u^2\), \(\frac{\partial f(u,v)}{\partial u} = v+2u\), \(\frac{\partial f(u,v)}{\partial v} = u\)
= torch.tensor(1., requires_grad=True)
u = torch.tensor(2., requires_grad=True)
v
= u*v + u**2
f
#calculate all partial derivatives df/du and df/dv
f.backward()#evaluate partial derivative with respect to u df/du at u, v : df/du(u, v)
u.grad>> tensor(4.)
#evaluate partial derivative with respect to v df/dv at u, v : df/dv(u, v)
v.grad>> tensor(1.)
Ungraded lab
1.2derivativesandGraphsinPytorch_v2.ipynb
With some explanation about .detach()
pointing to torch.autograd documentation. In this page, there is a link to walkthrough of backprop video.
Will have to go back to .detach()
Simple Dataset
Build a Dataset Class and Object
from torch.utils.data import Dataset
class toy_set(Dataset):
def __init__(self, length=100, transform=None):
self.x = 2*torch.ones(length, 2)
self.y = torch.ones(length, 1)
self.len = length
self.transform = transform
def __getitem__(self, index):
=self.x[index], self.y[index]
sampleif self.transform:
= self.transform(sample)
sample return sample
def __len__(self):
return self.len
= toy_set()
dataset len(dataset)
>> 100
0]
dataset[2., 2.]), tensor([1.])) (tensor([
Build a Dataset Transform (e.g. normalize or standardize)
class add_mult(object):
def __init__(self, addx=1, muly=1):
self.addx = addx
self.muly = muly
def __call__(self, sample):
=sample[0]
x=sample[1]
y=x+self.addx
x=y*self.muly
y=x, y
samplereturn sample
# automatically apply the transform
= add_mult()
a_m = toy_set(transform=a_m)
dataset_ 0]
dataset_[>> (tensor([3., 3.]), tensor([1.]))
Compose Transforms
class mult(object):
def __init__(self, mul=100):
self.mul = mul
def __call__(self, sample):
= sample[0]
x = sample[1]
y = x * self.mul
x = y * self.mul
y = x, y
sample return sample
from torchvision import transforms
= transforms.Compose([add_mult(), mult()])
data_transform
# automatically apply the composed transform
= toy_set(transform=data_transform)
dataset_tr 0]
dataset_tr[>> (tensor([300., 300.]), tensor([100.]))
Ungraded lab
Dataset
Dataset Class for Images
from PIL import Image
import pandas as pd
import os
from matplotlib.pyplot import imshow
from torch.utils.data import Dataset, DataLoader
class Dataset(Dataset):
def __init__(self, csv_file, data_dir, transform=None):
self.transform = transform
self.data_dir = data_dir
= os.path.join(self.data_dir, csv_file)
data_dir_csv_file self.data_name = pd.read_csv(data_dir_csv_file)
self.len = self.data_name.shape[0]
def __len__(self):
return self.len
def __getitem__(self, idx):
=os.path.join(self.data_dir, self.data_name.iloc[idx, 1])
img_name= Image.open(img_name)
image = self.data_name.iloc[idx, 0]
y if self.transform:
= self.transform(image)
image return image, y
def show_data(data_sample, shape = (28, 28)):
0].numpy().reshape(shape), cmap='gray')
plt.imshow(data_sample['y = ' + data_sample[1]) plt.title(
= Dataset(csv_file=csv_file, data_dir=directory)
dataset 0]) show_data(dataset[
Torch Vision Transforms
import torchvision.transforms as transforms
20)
transforms.CenterCrop(
transforms.ToTensor()= transforms.Compose( [ transforms.CenterCrop(20), transforms.ToTensor() ] )
croptensor_data_transform = Dataset(csv_file=csv_file, data_dir=directory, transform=croptensor_data_transform)
dataset 0][0].shape
dataset[>> torch.Size([1, 20, 20])
Torch Vision Datasets
MNIST example
import torchvision.datasets as dsets
= dsets.MNIST(root='./data', train = False, download = True, transform = transforms.ToTensor()) dataset
Ungraded lab
Week 2 - Linear Regression
Learning Objectives
- Linear Regression Prediction
- Linear Regression Training
- Loss
- Gradient Descent
- Cost
- Linear Regression Training PyTorch
notebook
Linear Regression in 1D - Prediction
Simple linear regression - prediction
import torch
= torch.tensor(2.0, requires_grad=True)
w = torch.tensor(-1.0, requires_grad=True)
b def forward(x):
=w*x+b
yreturn y
=torch.tensor([1.0])
x=forward(x)
yhat
yhat>> tensor([1.], grad_fn=<AddBackward0>)
=torch.tensor([[1.0],[2.0]])
x
forward(x) >> tensor([[1.],
3.]], grad_fn=<AddBackward0>) [
PyTorch - Class Linear
from torch.nn import Linear
1)
torch.manual_seed(= Linear(in_features=1, out_features=1)
model list(model.parameters())
>> [Parameter containing:
0.5153]], requires_grad=True),
tensor([[
Parameter containing:-0.4414], requires_grad=True)] tensor([
=torch.tensor([[1.0],[2.0]])
x
model(x)>> tensor([[0.0739],
0.5891]], grad_fn=<AddmmBackward>) [
PyTorch - Custom Modules
import torch.nn as nn
class LR(nn.Module):
def __init__(self, in_size, output_size):
super(LR, self).__init__()
self.linear = nn.Linear(in_size, output_size)
def forward(self, x):
= self.linear(x)
out return out
= LR(1, 1)
model list(model.parameters())
>> [Parameter containing:
-0.9414]], requires_grad=True),
tensor([[
Parameter containing:0.5997], requires_grad=True)] tensor([
=torch.tensor([[1.0],[2.0]])
x
model(x)>> tensor([[-0.3417],
-1.2832]], grad_fn=<AddmmBackward>) [
Model state_dict()
this returns a python dictionary. We will use it as our models get more complex. One Function is to map the relationship of the linear layers to its parameters. we can print out the keys and values.
model.state_dict()>> OrderedDict([('linear.weight', tensor([[-0.9414]])),
'linear.bias', tensor([0.5997]))]) (
Ungraded lab
Linear Regression Training
loss function presented is mean squared error
\(l(w,b)=\frac{1}{N}\displaystyle\sum_{n=1}^{N}(y_n-(wx_n+b))^2\)
Gradient Descent and cost
PyTorch Slope
import torch
=torch.tensor(-10.0, requires_grad=True)
w=torch.arange(-3,3,0.1).view(-1, 1)
X= -3*X f
import matplotlib.pyplot as plt
plt.plot(X.numpy(), f.numpy())
plt.show()= f+0.1*torch.randn(X.size())
Y 'ro')
plt.plot(X.numpy(), Y.numpy(), plt.show()
def forward(x):
return w*x
def criterion(yhat, y):
return torch.mean((yhat-y)**2)
= 0.1
lr for epoch in range(4):
= forward(X)
Yhat = criterion(Yhat, Y)
loss
loss.backward()= w.data - lr*w.grad.data
w.data w.grad.data.zero_()
Ungraded lab
Linear Regression Training in PyTorch
Cost surface
# The class for plot the diagram
class plot_error_surfaces(object):
# Constructor
def __init__(self, w_range, b_range, X, Y, n_samples = 30, go = True):
= np.linspace(-w_range, w_range, n_samples)
W = np.linspace(-b_range, b_range, n_samples)
B = np.meshgrid(W, B)
w, b = np.zeros((30,30))
Z = 0
count1 self.y = Y.numpy()
self.x = X.numpy()
for w1, b1 in zip(w, b):
= 0
count2 for w2, b2 in zip(w1, b1):
= np.mean((self.y - w2 * self.x + b2) ** 2)
Z[count1, count2] += 1
count2 += 1
count1 self.Z = Z
self.w = w
self.b = b
self.W = []
self.B = []
self.LOSS = []
self.n = 0
if go == True:
plt.figure()= (7.5, 5))
plt.figure(figsize ='3d').plot_surface(self.w, self.b, self.Z, rstride = 1, cstride = 1,cmap = 'viridis', edgecolor = 'none')
plt.axes(projection'Cost/Total Loss Surface')
plt.title('w')
plt.xlabel('b')
plt.ylabel(
plt.show()
plt.figure()'Cost/Total Loss Surface Contour')
plt.title('w')
plt.xlabel('b')
plt.ylabel(self.w, self.b, self.Z)
plt.contour(
plt.show()
# Setter
def set_para_loss(self, W, B, loss):
self.n = self.n + 1
self.W.append(W)
self.B.append(B)
self.LOSS.append(loss)
# Plot diagram
def final_plot(self):
= plt.axes(projection = '3d')
ax self.w, self.b, self.Z)
ax.plot_wireframe(self.W,self.B, self.LOSS, c = 'r', marker = 'x', s = 200, alpha = 1)
ax.scatter(
plt.figure()self.w,self.b, self.Z)
plt.contour(self.W, self.B, c = 'r', marker = 'x')
plt.scatter('w')
plt.xlabel('b')
plt.ylabel(
plt.show()
# Plot diagram
def plot_ps(self):
121)
plt.subplot(
plt.ylimself.x, self.y, 'ro', label="training points")
plt.plot(self.x, self.W[-1] * self.x + self.B[-1], label = "estimated line")
plt.plot('x')
plt.xlabel('y')
plt.ylabel(-10, 15))
plt.ylim(('Data Space Iteration: ' + str(self.n))
plt.title(
122)
plt.subplot(self.w, self.b, self.Z)
plt.contour(self.W, self.B, c = 'r', marker = 'x')
plt.scatter('Total Loss Surface Contour Iteration' + str(self.n))
plt.title('w')
plt.xlabel('b')
plt.ylabel(
plt.show()
= plot_error_surfaces(15, 15, X, Y, 30) get_surface
PyTorch (hard way)
def forward(x):
=w*x+b
yreturn y
def criterion(yhat, y):
return torch.mean((yhat-y)**2)
= torch.tensor(-15.0, requires_grad=True)
w = torch.tensor(-10.0, requires_grad=True)
b = torch.arange(-3, 3, 0.1).view(-1, 1)
X = 1*X-1
f = f+0.1*torch.rand(X.size()) Y
= 0.1
lr for epoch in range(15):
=forward(X)
Yhat=criterion(Yhat, Y)
loss
loss.backward()=w.data-lr*w.grad.data
w.data
w.grad.data.zero_()=b.data-lr*b.grad.data
b.data b.grad.data.zero_()
Ungraded lab
Stochastic Gradient Descent and the Data Loader
Stochastic Gradient Descent in PyTorch
= torch.tensor(-15.0, requires_grad=True)
w = torch.tensor(-10.0, requires_grad=True)
b = torch.arange(-3, 3, 0.1).view(-1, 1)
X = -3*X
f =f+0.1*torch.randn(X.size())
Y
def forward(x):
=w*x+b
yreturn y
def criterion(yhat, y):
return torch.mean((yhat-y)**2)
= 0.1
lr for epoch in range(4):
for x, y in zip(X, Y):
=forward(x)
yhat=criterion(yhat, y)
loss
loss.backward()=w.data-lr*w.grad.data
w.data
w.grad.data.zero_()=b.data-lr*b.grad.data
b.data b.grad.data.zero_()
Stochastic Gradient Descent DataLoader
dataset
from torch.utils.data import Dataset
class Data(Dataset):
def __init__(self):
self.x = torch.arange(-3, 3, 0.1).view(-1, 1)
self.y = -3*X+1
self.len = self.x.shape[0]
def __getitem__(self, index):
return self.x[index], self.y[index]
def __len__(self):
return self.len
= Data() dataset
dataloader
from torch.utils.data import DataLoader
=Data()
dataset= DataLoader(dataset=dataset, batch_size=1) trainloader
stochastic gradient descent
for x, y in trainloader:
= forward(x)
yhat = criterion(yhat, y)
loss
loss.backward()=w.data-lr*w.grad.data
w.data=b.data-lr*b.grad.data
b.data
w.grad.data.zero_() b.grad.data.zero_()
Ungraded lab
Mini-Batch Gradient Descent
Iterations = \(\frac{\text{training size}}{\text{batch size}}\)
Mini-Batch Gradient Descent in Pytorch
= Data()
dataset = DataLoader(dataset=dataset, batch_size=5)
trainloader
=0.1
lr= []
LOSS for epoch in range(4):
for x, y in trainloader:
=forward(x)
yhat= criterion(yhat, y)
loss
loss.backward()=w.data-lr*w.grad.data
w.data=b.data-lr*b.grad.data
b.data
w.grad.data.zero_()
b.grad.data.zero_() LOSS.append(loss.item())
Optimization in PyTorch
= nn.MSELoss()
criterion = DataLoader(dataset=dataset, batch_size=1)
trainloader = LR(1,1)
model from torch import nn, optim
= optim.SGD(model.parameters(), lr = 0.01)
optimizer
optimizer.state_dict()>> {'state': {},
'param_groups': [{'lr': 0.01,
'momentum': 0,
'dampening': 0,
'weight_decay': 0,
'nesterov': False,
'params': [0, 1]}]}
for epoch in range(100):
for x, y in trainloader:
= model(x)
yhat = criterion(yhat, y)
loss
optimizer.zero_grad()
loss.backward() optimizer.step()
Ungraded lab
Training, Validation and Test Split
standard explanation about Train, Validation, Test
Training, Validation and Test Split in PyTorch
Dataset to generate train_data and val_data
from torch.utils.data import Dataset, DataLoader
class Data(Dataset):
def __init__(self, train = True):
self.x = torch.arange(-3, 3, 0.1).view(-1, 1)
self.f = -3*self.x+1
self.y = self.f+0.1*torch.randn(self.x.size())
self.len = self.x.shape[0]
if train == True:
self.y[0] = 0
self.y[50:55] = 20
else:
pass
def __getitem__(self, index):
return self.x[index], self.y[index]
def __len__(self):
return self.len
= Data()
train_data = Data(train=False) val_data
LR model
import torch.nn as nn
class LR(nn.Module):
def __init__(self, input_size, output_size):
super(LR, self).__init__()
self.linear = nn.Linear(input_size, output_size)
def forward(self, x):
=self.linear(x)
outreturn out
= nn.MSELoss()
criterion
= DataLoader(dataset=train_data, batch_size=1) trainloader
= 10
epochs = [0.0001, 0.001, 0.01, 0.1, 1]
learning_rates = torch.zeros(len(learning_rates))
validation_error =torch.zeros(len(learning_rates))
test_error=[] MODELS
from torch import optim
from tqdm import tqdm
for i, learning_rate in tqdm(enumerate(learning_rates)):
= LR(1,1)
model = optim.SGD(model.parameters(), lr = learning_rate)
optimizer
for epoch in range(epochs):
for x, y in trainloader:
= model(x)
yhat = criterion(yhat, y)
loss
optimizer.zero_grad()
loss.backward()
optimizer.step()
=model(train_data.x)
yhat=criterion(yhat, train_data.y)
loss=loss.item()
test_error[i]
=model(val_data.x)
yhat=criterion(yhat, val_data.y)
loss=loss.item()
validation_error[i] MODELS.append(model)
import numpy as np
='training cost/total loss')
plt.semilogx(np.array(learning_rates), validation_error.numpy(), label='validation cost/total loss')
plt.semilogx(np.array(learning_rates), test_error.numpy(), label'Cost Total loss')
plt.ylabel('learning rate')
plt.xlabel(
plt.legend() plt.show()
Week 3 - Multiple Input Output Linear Regression - Logistic Regression for Classification
Learning Objectives
- Multiple Linear Regression
- Multiple Linear Regression Training
- Linear Regression Multiple Outputs
- Linear Regression Multiple Outputs Training
notebook
Multiple Input Linear Regression Prediction
Class Linear
import torch
from torch.nn import Linear
1)
torch.manual_seed(= Linear(in_features=2, out_features=1)
model list(model.parameters())
>> [Parameter containing:
0.3643, -0.3121]], requires_grad=True),
tensor([[
Parameter containing:-0.1371], requires_grad=True)]
tensor([
model.state_dict()>> OrderedDict([('weight', tensor([[ 0.3643, -0.3121]])),
'bias', tensor([-0.1371]))])
(#predictions for multiple samples
= torch.tensor([[1.0, 1.0], [1.0, 2.0], [1.0, 3.0]])
X = model(X)
yhat
yhat>> tensor([[-0.0848],
-0.3969],
[-0.7090]], grad_fn=<AddmmBackward>) [
Custom Modules
import torch.nn as nn
class LR(nn.Module):
def __init__(self, input_size, output_size):
super(LR, self).__init__()
self.linear = nn.Linear(input_size, output_size)
def forward(self, x):
= self.linear(x)
out return out
Ungraded lab
Multiple Input Linear Regression Training
Cost function and Gradient Descent for Multiple Linear Regression
Cost function
\[l(w,b)=\frac{1}{N}\displaystyle\sum_{n=1}^{N}(y_n-(x_nw+b))^2\]
Gradient of loss function with respect to the weights
\[\nabla l(w,b) = \begin{bmatrix}\frac{\partial l(w,b)}{\partial w_1}\\ \vdots \\\frac{\partial l(w,b)}{\partial w_d}\end{bmatrix}\]
Gradient of loss function with respect to the bias
\[\frac{\partial l(w,b)}{\partial b}\]
Update of weights
\[w^{k+1} = w^k-\eta \nabla l(w^k,b^k)\]
\[\begin{bmatrix} w_1^{k+1}\\ \vdots\\ w_d^{k+1}\\\end{bmatrix}=\begin{bmatrix} w_1^{k}\\ \vdots\\ w_d^{k}\\\end{bmatrix}-\eta \begin{bmatrix}\frac{\partial l(w^k,b^k)}{\partial w_1}\\ \vdots \\\frac{\partial l(w^k,b^k)}{\partial w_d}\end{bmatrix}\]
and update of bias
\[b^{k+1}=b^k-\eta \frac{\partial l(w^k,b^k)}{\partial b}\]
Train the model in PyTorch
from torch import nn, optim
import torch
class LR(nn.Module):
def __init__(self, input_size, output_size):
super(LR, self).__init__()
self.linear = nn.Linear(input_size, output_size)
def forward(self, x):
= self.linear(x)
out return out
from torch.utils.data import Dataset, DataLoader
class Data2D(Dataset):
def __init__(self):
self.x = torch.zeros(20,2)
self.x[:, 0] = torch.arange(-1,1,0.1)
self.x[:, 1] = torch.arange(-1,1,0.1)
self.w = torch.tensor([ [1.0], [1.0]])
self.b = 1
self.f = torch.mm(self.x, self.w)+self.b
self.y = self.f + 0.1*torch.randn((self.x.shape[0], 1))
self.len = self.x.shape[0]
def __getitem__(self, index):
return self.x[index], self.y[index]
def __len__(self):
return self.len
= Data2D()
data_set = nn.MSELoss()
criterion = DataLoader(dataset=data_set, batch_size=2)
trainloader = LR(input_size=2, output_size=1)
model = optim.SGD(model.parameters(), lr=0.1) optimizer
for epoch in range(100):
for x, y in trainloader:
= model(x)
yhat = criterion(yhat, y)
loss
optimizer.zero_grad()
loss.backward() optimizer.step()
Ungraded lab
Multiple Output Linear Regression
Linear regression with multiple outputs
Custom Modules
import torch.nn as nn
import torch
class LR(nn.Module):
def __init__(self, input_size, output_size):
super(LR, self).__init__()
self.linear = nn.Linear(input_size, output_size)
def forward(self, x):
= self.linear(x)
out return out
1)
torch.manual_seed(= LR(input_size=2, output_size=2)
model
list(model.parameters())
>> [Parameter containing:
0.3643, -0.3121],
tensor([[ -0.1371, 0.3319]], requires_grad=True),
[
Parameter containing:-0.6657, 0.4241], requires_grad=True)]
tensor([
#with 2 columns and 3 rows
=torch.tensor([[1.0, 1.0], [1.0,2.0], [1.0, 3.0]])
X= model(X)
Yhat
Yhat>> tensor([[-0.6135, 0.6189],
-0.9256, 0.9508],
[-1.2377, 1.2827]], grad_fn=<AddmmBackward>) [
Ungraded lab
Multiple Output Linear Regression Training
Training in PyTorch
Training is the same, what changes is Dataset:
from torch.utils.data import Dataset, DataLoader
class Data2D(Dataset):
def __init__(self):
self.x = torch.zeros(20,2)
self.x[:, 0] = torch.arange(-1,1,0.1)
self.x[:, 1] = torch.arange(-1,1,0.1)
self.w = torch.tensor([ [1.0, -1.0], [1.0, -1.0]])
self.b = torch.tensor([[1.0, -1.0]])
self.f = torch.mm(self.x, self.w)+self.b
self.y = self.f + 0.1*torch.randn((self.x.shape[0], 1))
self.len = self.x.shape[0]
def __getitem__(self, index):
return self.x[index], self.y[index]
def __len__(self):
return self.len
and model instantiation
from torch import nn, optim
= Data2D()
data_set = nn.MSELoss()
criterion = DataLoader(dataset=data_set, batch_size=1)
trainloader = LR(input_size=2, output_size=2)
model = optim.SGD(model.parameters(), lr=0.001) optimizer
Training:
for epoch in range(100):
for x, y in trainloader:
= model(x)
yhat = criterion(yhat, y)
loss
optimizer.zero_grad()
loss.backward() optimizer.step()
Ungraded lab
Linear Classifier and Logistic Regression
\[\sigma(z)=\frac{1}{1+e^{-z}}\]
sigmoid is used as the threshold function in logistic regression
Logistic Regression: Prediction
logistic function in PyTorch
as a function: torch.sigmoid
import torch
import matplotlib.pyplot as plt
= torch.arange(-100, 100, 0.1).view(-1, 1)
z = torch.sigmoid(z)
yhat plt.plot(z.numpy(), yhat.numpy())
as a class: nn.Signmoid()
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
= torch.arange(-100, 100, 0.1).view(-1, 1)
z = nn.Sigmoid()
sig = sig(z)
yhat plt.plot(z.numpy(), yhat.numpy())
torch.nn.Sigmoid vs torch.sigmoid - PyTorch Forums
torch.nn.Sigmoid
(note the capital “S”) is a class. When you
instantiate it, you get a function object, that is, an object that you
can call like a function. In contrast,torch.sigmoid
is a function.
nn.Sequential
= nn.Sequential(nn.Linear(1,1), nn.Sigmoid()) sequential_model
nn.Module
import torch.nn as nn
class logistic_regression(nn.Module):
def __init__(self, in_size):
super(logistic_regression, self).__init__()
self.linear = nn.Linear(in_size, 1)
def forward(self, x):
= torch.sigmoid(self.linear(x))
z return z
= logistic_regression(1) custom_model
Making a prediction
=torch.tensor([[1.0], [2.0]])
x
custom_model(x)>> tensor([[0.4129],
0.3936]], grad_fn=<SigmoidBackward>)
[
sequential_model(x)>> tensor([[0.2848],
0.2115]], grad_fn=<SigmoidBackward>) [
Multidimensional Logistic Regression
= logistic_regression(2)
custom_2D_model = nn.Sequential(nn.Linear(2, 1), nn.Sigmoid())
sequential_2D_model
=torch.tensor([[1.0, 2.0]])
x= sequential_2D_model(x)
yhat
yhat>> tensor([[0.7587]], grad_fn=<SigmoidBackward>)
Ungraded lab
Bernoulli Distribution and Maximum Likelihood Estimation
To fine the parameter values of the Bernoulli distribution, we do not maximize the likelihood function but the log of the likelihood function: Loss likelihood which is given by
\[l(\theta) = \ln(p(Y|\theta))=\displaystyle\sum_{n=1}^{N}y_n \ln(\theta)+(1-y_n) \ln(1-\theta)\]
Note: We want to get
\[\hat\theta = argmax_\theta(P(Y|\theta))\]
where
\[P(Y|\theta) = \displaystyle\prod_{n=1}^{N}\theta^{y_n}(1-\theta)^{1-y_n}\]
Logistic Regression Cross Entropy Loss
Loss function \(l(w,b)=\frac{1}{N}\displaystyle\sum_{n=1}^{N}(y_n-\sigma(wx_n+b))^2\)
Cross entropy loss
\[l(\theta)=-\frac{1}{N}\displaystyle\sum_{n=1}^{N}y_n \ln(\sigma(wx_n+b))+(1-y_n)\ln(1-\sigma(wx_n+b))\]
def criterion(yhat, y):
= -1 * torch.mean(y * torch.log(yhat) + (1-y) * torch.log(1-yhat))
out return out
Logistic Regression in PyTorch
Create a model (using Sequential)
= nn.Sequential(nn.Linear(1, 1), nn.Sigmoid()) model
or create a custom one
import torch.nn as nn
class logistic_regression(nn.Module):
def __init__(self, in_size):
super(logistic_regression, self).__init__()
self.linear = nn.Linear(in_size, 1)
def forward(self, x):
= torch.sigmoid(self.linear(x))
z return z
Then define our loss function
def criterion(yhat, y):
= -1 * torch.mean(y * torch.log(yhat) + (1-y) * torch.log(1-yhat))
out return out
or simply BCE (binary cross entropy)
= nn.BCELoss() criterion
Putting all pieces together:
#dataset
import torch
from torch.utils.data import Dataset
class Data(Dataset):
def __init__(self):
self.x = torch.arange(-1, 1, 0.1).view(-1, 1)
self.y = torch.zeros(self.x.shape[0], 1)
self.y[self.x[:, 0] > 0.2] = 1
self.len = self.x.shape[0]
def __getitem__(self, index):
return self.x[index], self.y[index]
def __len__(self):
return self.len
= Data()
dataset
# dataloader
from torch.utils.data import DataLoader
= DataLoader(dataset=dataset, batch_size=1)
trainloader
# model
import torch.nn as nn
= nn.Sequential(nn.Linear(1, 1), nn.Sigmoid())
model
# optimizer
from torch import optim
= optim.SGD(model.parameters(), lr = 0.01)
optimizer
# loss
= nn.BCELoss()
criterion
# training
for epoch in range(100):
for x, y in trainloader:
= model(x)
yhat = criterion(yhat, y)
loss
optimizer.zero_grad()
loss.backward() optimizer.step()
Ungraded lab
5.2.2bad_inshilization_logistic_regression_with_mean_square_error_v2.ipynb
Week 4 - Softmax regression
Learning Objectives
- Using Lines to Classify Data
- Softmax Prediction in PyTorch
- Softmax Pytorch MNIST
notebook
Softmax Prediction
Softmax is a combination of logistic regression and argmax
Softmax function
Custom module using nn.module
import torch.nn as nn
class Softmax(nn.Module):
def __init__(self, in_size, out_size):
super(Softmax, self).__init__()
self.linear = nn.Linear(in_size, out_size)
def forward(self, x):
= self.linear(x)
out return out
import torch
1)
torch.manual_seed(# 2 dimensions input samples and 3 output classes
= Softmax(2,3)
model
= torch.tensor([[1.0, 2.0]])
x = model(x)
z
z>> tensor([[-0.4053, 0.8864, 0.2807]], grad_fn=<AddmmBackward>)
= z.max(1)
_, yhat
yhat>> tensor([1])
and with multiple samples
=torch.tensor([[1.0, 1.0],[1.0, 2.0],[1.0, -3.0]])
X= model(X)
z
z>> tensor([[-0.0932, 0.5545, -0.1433],
-0.4053, 0.8864, 0.2807],
[1.1552, -0.7730, -1.8396]], grad_fn=<AddmmBackward>)
[
= z.max(1)
_, yhat
yhat>> tensor([1, 1, 0])
Softmax PyTorch
Load Data
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as dsets
= dsets.MNIST(root='./data', train = True, download = True, transform=transforms.ToTensor())
train_dataset
= dsets.MNIST(root='./data', train = False, download = True, transform=transforms.ToTensor()) validation_dataset
train_dataset[0] is a tuple with the image and the class:
Create Model
import torch.nn as nn
class Softmax(nn.Module):
def __init__(self, in_size, out_size):
super(Softmax, self).__init__()
self.linear = nn.Linear(in_size, out_size)
def forward(self, x):
= self.linear(x)
out return out
= 28 * 28
input_dim = 10
output_dim = Softmax(input_dim, output_dim) model
= nn.CrossEntropyLoss()
criterion
import torch.optim as optim
= optim.SGD(model.parameters(), lr=0.01)
optimizer
= 100
n_epochs = []
accuracy_list
= torch.utils.data.DataLoader(dataset = train_dataset, batch_size = 100)
train_loader = torch.utils.data.DataLoader(dataset=validation_dataset, batch_size=5000) validation_loader
Train Model
from tqdm import tqdm
for epoch in tqdm(range(n_epochs)):
for x, y in train_loader:
optimizer.zero_grad()= model(x.view(-1, 28 * 28))
z = criterion(z, y)
loss
loss.backward()
optimizer.step()= 0
correct for x_test, y_test in validation_loader:
= model(x_test.view(-1, 28 * 28))
z = torch.max(z.data, 1)
_, yhat = correct+(yhat == y_test).sum().item()
correct = correct / y.shape[0]
accuracy accuracy_list.append(accuracy)
Ungraded lab
Ungraded lab
6.2lab_predicting _MNIST_using_Softmax_v2.ipynb
# The function to plot parameters
def PlotParameters(model):
= model.state_dict()['linear.weight'].data
W = W.min().item()
w_min = W.max().item()
w_max = plt.subplots(2, 5)
fig, axes =0.01, wspace=0.1)
fig.subplots_adjust(hspacefor i, ax in enumerate(axes.flat):
if i < 10:
# Set the label for the sub-plot.
"class: {0}".format(i))
ax.set_xlabel(
# Plot the image.
28, 28), vmin=w_min, vmax=w_max, cmap='seismic')
ax.imshow(W[i, :].view(
ax.set_xticks([])
ax.set_yticks([])
# Ensure the plot is shown correctly with multiple plots
# in a single Notebook cell.
plt.show()
# Plot the parameters
PlotParameters(model)
Week 4 - Shallow neural networks
Learning Objectives
- Simple Neural Networks
- More Hidden Neurons
- Neural Networks with Multiple Dimensional
- Multi-Class Neural Networks
- Backpropagation
- Activation Functions
notebook
Neural networks in One Dimension
using nn.Module
import torch
import torch.nn as nn
from torch import sigmoid
class Net(nn.Module):
def __init__(self, D_in, H, D_out):
super(Net, self).__init__()
self.linear1 = nn.Linear(D_in, H)
self.linear2 = nn.Linear(H, D_out)
def forward(self, x):
=sigmoid(self.linear1(x))
x=sigmoid(self.linear2(x))
xreturn x
= Net(1, 2, 1)
model = torch.tensor([0.0])
x = model(x)
yhat
yhat>> tensor([0.5972], grad_fn=<SigmoidBackward>)
# multiple samples
= torch.tensor([[0.0], [2.0], [3.0]])
x = model(x)
yhat
yhat>> tensor([[0.5972],
0.5925],
[0.5894]], grad_fn=<SigmoidBackward>)
[
# to get a discrete value we apply a threshold
= yhat < 0.59
yhat
yhat>> tensor([[False],
False],
[True]]) [
model.state_dict()>> OrderedDict([('linear1.weight',
0.3820],
tensor([[0.4019]])),
['linear1.bias', tensor([-0.7746, -0.3389])),
('linear2.weight', tensor([[-0.3466, 0.2201]])),
('linear2.bias', tensor([0.4115]))]) (
using nn.Sequential
= nn.Sequential(nn.Linear(1, 2), nn.Sigmoid(), nn.Linear(2, 1), nn.Sigmoid()) model
train the model
we create the data
= torch.arange(-20, 20, 1).view(-1, 1).type(torch.FloatTensor)
X = torch.zeros(X.shape[0])
Y 0]>-4) & (X[:, 0] <4)] = 1.0 Y[(X[:,
we create a training function
from tqdm import tqdm
def train(Y, X, model, optimizer, criterion, epochs=1000):
= []
cost = 0
total for epoch in tqdm(range(epochs)):
= 0
total for x, y in zip(X, Y):
= model(x)
yhat = criterion(yhat, y.view(-1))
loss
optimizer.zero_grad()
loss.backward()
optimizer.step()+=loss.item()
total
cost.append(total)return cost
and the training process is now
#loss
= nn.BCELoss()
criterion
#data
= torch.arange(-20, 20, 1).view(-1, 1).type(torch.FloatTensor)
X = torch.zeros(X.shape[0])
Y 0]>-4) & (X[:, 0] <4)] = 1.0
Y[(X[:,
#model
= Net(1, 2, 1)
model
#optimizer
= torch.optim.SGD(model.parameters(), lr = 0.01)
optimizer
#train the model
= train(Y, X, model, optimizer, criterion, epochs=1000)
cost >> 100%|██████████| 1000/1000 [00:12<00:00, 76.96it/s]
Ungraded lab
I like how to display intermediate representations of learning performance:
# The function for plotting the model
def PlotStuff(X, Y, model, epoch, leg=True):
=('epoch ' + str(epoch)))
plt.plot(X.numpy(), model(X).detach().numpy(), label'r')
plt.plot(X.numpy(), Y.numpy(), 'x')
plt.xlabel(if leg == True:
plt.legend()else:
pass
activation values (called in the training loop). Using model variables (model.a1) which seems a bad practice.
0], model.a1.detach().numpy()[:, 1], c=Y.numpy().reshape(-1))
plt.scatter(model.a1.detach().numpy()[:, 'activations')
plt.title( plt.show()
and final loss curve
Neural Networks with Multiple Dimensional Input
implementation
import torch
import torch.nn as nn
from torch import sigmoid
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm
import numpy as np
we create a dataset class
class XOR_Data(Dataset):
def __init__(self, N_s=100):
self.x = torch.zeros((N_s, 2))
self.y = torch.zeros((N_s, 1))
for i in range(N_s // 4):
self.x[i, :] = torch.Tensor([0.0, 0.0])
self.y[i, 0] = torch.Tensor([0.0])
self.x[i + N_s // 4, :] = torch.Tensor([0.0, 1.0])
self.y[i + N_s // 4, 0] = torch.Tensor([1.0])
self.x[i + N_s // 2, :] = torch.Tensor([1.0, 0.0])
self.y[i + N_s // 2, 0] = torch.Tensor([1.0])
self.x[i + 3 * N_s // 4, :] = torch.Tensor([1.0, 1.0])
self.y[i + 3 * N_s // 4, 0] = torch.Tensor([0.0])
self.x = self.x + 0.01 * torch.randn((N_s, 2))
self.len = N_s
def __getitem__(self, index):
return self.x[index], self.y[index]
def __len__(self):
return self.len
# Plot the data
def plot_stuff(self):
self.x[self.y[:, 0] == 0, 0].numpy(), self.x[self.y[:, 0] == 0, 1].numpy(), 'o', label="y=0")
plt.plot(self.x[self.y[:, 0] == 1, 0].numpy(), self.x[self.y[:, 0] == 1, 1].numpy(), 'ro', label="y=1")
plt.plot( plt.legend()
We create a class for creating our model
class Net(nn.Module):
def __init__(self, D_in, H, D_out):
super(Net, self).__init__()
self.linear1 = nn.Linear(D_in, H)
self.linear2 = nn.Linear(H, D_out)
def forward(self, x):
=sigmoid(self.linear1(x))
x=sigmoid(self.linear2(x))
xreturn x
We create a function to train our model
# Calculate the accuracy
def accuracy(model, data_set):
return np.mean(data_set.y.view(-1).numpy() == (model(data_set.x)[:, 0] > 0.5).numpy())
def train(data_set, model, criterion, train_loader, optimizer, epochs=5):
= []
COST = []
ACC for epoch in tqdm(range(epochs)):
=0
totalfor x, y in train_loader:
optimizer.zero_grad()= model(x)
yhat = criterion(yhat, y)
loss
optimizer.zero_grad()
loss.backward()
optimizer.step()#cumulative loss
+=loss.item()
total
ACC.append(accuracy(model, data_set))
COST.append(total)
return COST
process for training is identical to logistic regression
= nn.BCELoss()
criterion = XOR_Data()
data_set = DataLoader(dataset=data_set, batch_size=1)
train_loader = Net(2, 4, 1)
model = torch.optim.SGD(model.parameters(), lr = 0.01)
optimizer =500) train(data_set, model, criterion, train_loader, optimizer, epochs
overfitting and underfitting
Solution:
- use validation data to determine optimum number of neurons
- get more data
- regularization: for example dropout
Ungraded lab
Multi-Class Neural Networks
using nn.Module
we don’t have sigmoid for the output, and D_out is our number of classes
class Net(nn.Module):
def __init__(self, D_in, H, D_out):
super(Net, self).__init__()
self.linear1 = nn.Linear(D_in, H)
self.linear2 = nn.Linear(H, D_out)
def forward(self, x):
=sigmoid(self.linear1(x))
x=(self.linear2(x))
xreturn x
using nn.Sequential
= 2
input_dim = 6
hidden_dim = 3
output_dim = nn.Sequential(
model
nn.Linear(input_dim, hidden_dim),
nn.Sigmoid(),
nn.Linear(hidden_dim, output_dim) )
training
we create a validation and training dataset
import torchvision.datasets as dsets
import torchvision.transforms as transforms
= dsets.MNIST(root='./data', train = True, download = True, transform=transforms.ToTensor())
train_dataset = dsets.MNIST(root='./data', train = False, download = True, transform=transforms.ToTensor()) validation_dataset
we create a validation and training loader
= torch.utils.data.DataLoader(dataset=train_dataset, batch_size=2000)
train_loader = torch.utils.data.DataLoader(dataset=validation_dataset, batch_size=2000) validation_loader
= nn.CrossEntropyLoss() criterion
we create the training function
from tqdm import tqdm
def train(model, criterion, train_loader, validation_loader, optimizer, epochs=100):
= 0
i = {'training_loss': [],'validation_accuracy': []}
useful_stuff for epoch in tqdm(range(epochs)):
for i, (x, y) in enumerate(train_loader):
optimizer.zero_grad()= model(x.view(-1, 28 * 28))
z = criterion(z, y)
loss
loss.backward()
optimizer.step()#loss for every iteration
'training_loss'].append(loss.data.item())
useful_stuff[= 0
correct for x, y in validation_loader:
#validation
= model(x.view(-1, 28 * 28))
z = torch.max(z, 1)
_, label += (label == y).sum().item()
correct = 100 * (correct / len(validation_dataset))
accuracy 'validation_accuracy'].append(accuracy)
useful_stuff[return useful_stuff
We instantiate and train the model
= 28 * 28
input_dim = 100
hidden_dim = 10
output_dim
= Net(input_dim, hidden_dim, output_dim)
model = train(model, criterion, train_loader, validation_loader, optimizer, epochs=30) training_results
To plot accuracy and lost
# Define a function to plot accuracy and loss
def plot_accuracy_loss(training_results):
2, 1, 1)
plt.subplot('training_loss'], 'r')
plt.plot(training_results['loss')
plt.ylabel('training loss iterations')
plt.title(2, 1, 2)
plt.subplot('validation_accuracy'])
plt.plot(training_results['accuracy')
plt.ylabel('epochs')
plt.xlabel(
plt.show()
plot_accuracy_loss(training_results)
To plot improper classified items
= 0
count for x, y in validation_dataset:
= model(x.reshape(-1, 28 * 28))
z = torch.max(z, 1)
_,yhat if yhat != y:
show_data(x)+= 1
count if count >= 5:
break
Ungraded lab
Backpropagation
Following the chain rule in gradient calculation, it happens that gradient results are getting closer and closer to 0. (i.e. vanishing gradient) therefore we cannot improve model parameters.
One way to deal with that is to change activation function.
Activation functions
sigmoid, tanh, relu
sigmoid, tanh, relu in PyTorch
class Net_sigmoid(nn.Module):
def __init__(self, D_in, H, D_out):
super(Net, self).__init__()
self.linear1 = nn.Linear(D_in, H)
self.linear2 = nn.Linear(H, D_out)
def forward(self, x):
=sigmoid(self.linear1(x))
x=(self.linear2(x))
xreturn x
class Net_tanh(nn.Module):
def __init__(self, D_in, H, D_out):
super(Net, self).__init__()
self.linear1 = nn.Linear(D_in, H)
self.linear2 = nn.Linear(H, D_out)
def forward(self, x):
=torch.tanh(self.linear1(x))
x=(self.linear2(x))
xreturn x
class Net_relu(nn.Module):
def __init__(self, D_in, H, D_out):
super(Net, self).__init__()
self.linear1 = nn.Linear(D_in, H)
self.linear2 = nn.Linear(H, D_out)
def forward(self, x):
=torch.relu(self.linear1(x))
x=(self.linear2(x))
xreturn x
using nn.Sequential
= nn.Sequential(
model_tanh
nn.Linear(input_dim, hidden_dim),
nn.Tanh(),
nn.Linear(hidden_dim, output_dim)
)
= nn.Sequential(
model_relu
nn.Linear(input_dim, hidden_dim),
nn.ReLU(),
nn.Linear(hidden_dim, output_dim) )
Ungraded lab
Ungraded lab
to monitor gpu usage: nvidia-smi -l 1
Week 5 - Deep neural networks
Learning Objectives
- building deep networks
- Dropout
- Neural Network initialization weights
- Gradient Descent with Momentum
notebook
Deep Neural Networks
Deep, following this course definition, is when number of hidden layers > 1.
using nn.Module
import torch
import torch.nn as nn
from torch import sigmoid
class Net(nn.Module):
def __init__(self, D_in, H1, H2, D_out):
super(Net, self).__init__()
self.linear1 = nn.Linear(D_in, H1)
self.linear2 = nn.Linear(H1, H2)
self.linear3 = nn.Linear(H2, D_out)
def forward(self, x):
=sigmoid(self.linear1(x))
x=sigmoid(self.linear2(x))
x=self.linear3(x)
xreturn x
using nn.Sequential
= 2
input_dim = 6
hidden_dim1 = 4
hidden_dim2 = 3
output_dim = nn.Sequential(
model
nn.Linear(input_dim, hidden_dim1),
nn.Sigmoid(),
nn.Linear(hidden_dim1, hidden_dim2),
nn.Sigmoid(),
nn.Linear(hidden_dim2, output_dim) )
training
there is no change compare to other networks
we create a validation and training dataset
import torchvision.datasets as dsets
import torchvision.transforms as transforms
= dsets.MNIST(root='./data', train = True, download = True, transform=transforms.ToTensor())
train_dataset = dsets.MNIST(root='./data', train = False, download = True, transform=transforms.ToTensor()) validation_dataset
we create a validation and training loader
= torch.utils.data.DataLoader(dataset=train_dataset, batch_size=2000)
train_loader = torch.utils.data.DataLoader(dataset=validation_dataset, batch_size=2000) validation_loader
= nn.CrossEntropyLoss() criterion
we create the training function
from tqdm import tqdm
def train(model, criterion, train_loader, validation_loader, optimizer, epochs=100):
= 0
i = {'training_loss': [],'validation_accuracy': []}
useful_stuff for epoch in tqdm(range(epochs)):
for i, (x, y) in enumerate(train_loader):
optimizer.zero_grad()= model(x.view(-1, 28 * 28))
z = criterion(z, y)
loss
loss.backward()
optimizer.step()#loss for every iteration
'training_loss'].append(loss.data.item())
useful_stuff[= 0
correct for x, y in validation_loader:
#validation
= model(x.view(-1, 28 * 28))
z = torch.max(z, 1)
_, label += (label == y).sum().item()
correct = 100 * (correct / len(validation_dataset))
accuracy 'validation_accuracy'].append(accuracy)
useful_stuff[return useful_stuff
We instantiate and train the model
= 28 * 28
input_dim = 50
hidden_dim1 = 50
hidden_dim2 = 10
output_dim
= Net(input_dim, hidden_dim1, hidden_dim2, output_dim)
model = torch.optim.SGD(model.parameters(), lr=0.01)
optimizer = train(model, criterion, train_loader, validation_loader, optimizer, epochs=30) training_results
Ungraded lab - deep neural networks
Deep Neural Networks : nn.ModuleList()
jdc
this is a nice library to allow breaking down definition of classes in separate notebook cells
Installation is as simple as pip install jdc
and usage is
import jdc
and start a cell with %%add_to <your class name>
python implementation
import torch
import torch.nn as nn
from torch import sigmoid
import jdc
class Net(nn.Module):
def __init__(self, Layers):
super(Net, self).__init__()
self.hidden = nn.ModuleList()
for input_size, output_size in zip(Layers, Layers[1:]):
self.hidden.append(nn.Linear(input_size, output_size))
= [2, 3, 4, 3]
Layers = Net(Layers) model
%%add_to Net
def forward(self, x):
= len(self.hidden)
L for (l, linear_transform) in zip(range(L), self.hidden):
if (l < L-1):
= torch.relu(linear_transform(x))
x else:
= linear_transform(x)
x return x
Ungraded lab - nn.ModuleList()
8.1.2mulitclassspiralrulu_v2.ipynb
Dropout
using nn.Module
class Net(nn.Module):
def __init__(self, in_size, n_hidden, out_size, p=0):
super(Net, self).__init__()
self.drop = nn.Dropout(p=p)
self.linear1 = nn.Linear(in_size, n_hidden)
self.linear2 = nn.Linear(n_hidden, n_hidden)
self.linear3 = nn.Linear(n_hidden, out_size)
def forward(self, x):
=torch.relu(self.linear1(x))
x=self.drop(x)
x=torch.relu(self.linear2(x))
x=self.drop(x)
x=self.linear3(x)
xreturn x
using nn.Sequential
= nn.Sequential(
model 1, 10),
nn.Linear(0.5),
nn.Dropout(
nn.ReLU(),10, 12),
nn.Linear(0.5),
nn.Dropout(
nn.ReLU(),12, 1),
nn.Linear( )
training
create data
from torch.utils.data import Dataset, DataLoader
import numpy as np
# Create data class for creating dataset object
class Data(Dataset):
# Constructor
def __init__(self, N_SAMPLES=1000, noise_std=0.15, train=True):
= np.matrix([-1, 1, 2, 1, 1, -3, 1]).T
a self.x = np.matrix(np.random.rand(N_SAMPLES, 2))
self.f = np.array(a[0] + (self.x) * a[1:3] + np.multiply(self.x[:, 0], self.x[:, 1]) * a[4] + np.multiply(self.x, self.x) * a[5:7]).flatten()
self.a = a
self.y = np.zeros(N_SAMPLES)
self.y[self.f > 0] = 1
self.y = torch.from_numpy(self.y).type(torch.LongTensor)
self.x = torch.from_numpy(self.x).type(torch.FloatTensor)
self.x = self.x + noise_std * torch.randn(self.x.size())
self.f = torch.from_numpy(self.f)
self.a = a
if train == True:
1)
torch.manual_seed(self.x = self.x + noise_std * torch.randn(self.x.size())
0)
torch.manual_seed(
# Getter
def __getitem__(self, index):
return self.x[index], self.y[index]
# Get Length
def __len__(self):
return self.len
# Plot the diagram
def plot(self):
= data_set.x.numpy()
X = data_set.y.numpy()
y = .02
h = X[:, 0].min(), X[:, 0].max()
x_min, x_max = X[:, 1].min(), X[:, 1].max()
y_min, y_max = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
xx, yy = data_set.multi_dim_poly(np.c_[xx.ravel(), yy.ravel()]).flatten()
Z = np.zeros(Z.shape)
f > 0] = 1
f[Z = f.reshape(xx.shape)
f
'True decision boundary and sample points with noise ')
plt.title(self.x[self.y == 0, 0].numpy(), self.x[self.y == 0,1].numpy(), 'bo', label='y=0')
plt.plot(self.x[self.y == 1, 0].numpy(), self.x[self.y == 1,1].numpy(), 'ro', label='y=1')
plt.plot(=plt.cm.Paired)
plt.contour(xx, yy, f,cmap0,1)
plt.xlim(0,1)
plt.ylim(
plt.legend()
# Make a multidimension ploynomial function
def multi_dim_poly(self, x):
= np.matrix(x)
x = np.array(self.a[0] + (x) * self.a[1:3] + np.multiply(x[:, 0], x[:, 1]) * self.a[4] + np.multiply(x, x) * self.a[5:7])
out = np.array(out)
out return out
instantiate the model
= Net(2, 300, 2, p=0.5) model_drop
train method tells the model we are in the training phase which will implement the dropout method, later we use the eval method to tell the model it is in the evaluation phase and that will turn off the dropout method
model_drop.train()= torch.optim.Adam(model_drop.parameters(), lr = 0.01)
optimizer = nn.CrossEntropyLoss()
criterion = Data()
data_set = Data(train=False) validation_set
# Initialize the LOSS dictionary to store the loss
= {}
LOSS 'training data dropout'] = []
LOSS['validation data dropout'] = [] LOSS[
train the model
# Train the model
from tqdm import tqdm
= 500
epochs
def train_model(epochs):
for epoch in tqdm(range(epochs)):
#all the samples are used for training
= model_drop(data_set.x)
yhat_drop = criterion(yhat_drop, data_set.y)
loss_drop
#store the loss for both the training and validation data for both models
'training data dropout'].append(loss_drop.item())
LOSS[eval()
model_drop.'validation data dropout'].append(criterion(model_drop(validation_set.x), validation_set.y).item())
LOSS[
model_drop.train()
optimizer.zero_grad()
loss_drop.backward()
optimizer.step()
train_model(epochs)
# The function for calculating accuracy
def accuracy(model, data_set):
= torch.max(model(data_set.x), 1)
_, yhat return (yhat == data_set.y).numpy().mean()
# Print out the accuracy of the model with dropout
print("The accuracy of the model with dropout: ", accuracy(model_drop, validation_set))
>> The accuracy of the model with dropout: 0.866
Ungraded lab - dropout classification
Ungraded lab - dropout regression
Neural Network initialization weights
Different methods exist:
- uniform distribution for parameters: we simply make the lower bound of the range of the distribution the negative of the inverse of square root of L in. the upper bound of the range of the distribution is the positive of the inverse of square root of L in. See this paper for more details. LeCun, Yann A., et al. “Efficient backprop.” Neural networks: Tricks of the trade. Springer, Berlin, Heidelberg, 2012. 9-48
=nn.Linear(input_size,output_size)
linear0, 1) linear.weight.data.uniform_(
- xavier method: Xavier Initialization is another popular method and is used in conjunction with the tanh activation. It takes into consideration the number of input neurons “Lin” as well as the number of neuron in the next layer “L out”. This paper for more details: Glorot, Xavier and Yoshua Bengio. “Understanding the difficulty of training deep feedforward neural networks” 2010.
=nn.Linear(input_size,output_size)
linear torch.nn.init.xavier_uniform_(linear.weight)
- He method: For relu we use the He initialize method, after we create a linear object, We use the following method to initialize the weights, for more info check out the following paper. He, Kaiming, et al. “Delving deep into rectifiers: surpassing human-level performance in imagenet classification”
= nn.Linear(input_size, output_size)
linear ='relu') torch.nn.init.kaiming_uniform_(linear.weight, nonlinearity
Ungraded lab - initialization
Ungraded lab - Xavier initialization
Ungraded lab - He initialization
Gradient Descent with Momentum
PyTorch implementation
In PyTorch, this is just defined at optim level
= torch.optim.SGD(model.parameters(), lr=0.1, momentum = 0.4) optimizer
Ungraded lab - momentum with different polynomial
Ungraded lab - Neural Network momentum
Batch Normalization
𝛾, 𝛽 parameters are are actually scale and shift parameters, which we’re going to learn via training.
using nn.Module
class Net_BatchNorm(nn.Module):
def __init__(self, in_size, n_hidden1, n_hidden2, out_size):
super(Net_BatchNorm, self).__init__()
self.linear1 = nn.Linear(in_size, n_hidden1)
self.linear2 = nn.Linear(n_hidden1, n_hidden2)
self.linear3 = nn.Linear(n_hidden2, out_size)
self.bn1 = nn.BatchNorm1d(n_hidden1)
self.bn2 = nn.BatchNorm1d(n_hidden2)
def forward(self, x):
=torch.sigmoid(self.bn1(self.linear1(x)))
x=torch.sigmoid(self.bn2(self.linear2(x)))
x=self.linear3(x)
xreturn x
Ungraded lab - Batch normalization
comparing training loss for each iteration and accuracy on validation data for both Batch / No Batch normalization.
Week 6 - Convolutional neural networks
Learning Objectives
- Convolution
- Activation Functions
- Max Pooling
- Convolution: Multiple Channels
- Convolutional Neural Network
- TORCH-VISION MODELS
notebook
Convolution
convolution explanation from stanford course CS231
convolution
= nn.Conv2d(in_channels=1, out_channels=1, kernel_size=3) conv
= torch.zeros(1,1,5,5)
image 0,0,:,2] = 1
image[
image>> tensor([[[[0., 0., 1., 0., 0.],
0., 0., 1., 0., 0.],
[0., 0., 1., 0., 0.],
[0., 0., 1., 0., 0.],
[0., 0., 1., 0., 0.]]]])
[=conv(image)
z>> tensor([[[[ 0.6065, 0.0728, -0.7915],
0.6065, 0.0728, -0.7915],
[ 0.6065, 0.0728, -0.7915]]]], grad_fn=<ThnnConv2DBackward>)
[
conv.state_dict()>> OrderedDict([('weight',
0.1132, -0.0418, 0.3140],
tensor([[[[ -0.2261, -0.1528, -0.3270],
[-0.2140, -0.1900, 0.2127]]]])),
['bias', tensor([0.0423]))]) (
stride
= nn.Conv2d(in_channels=1, out_channels=1, kernel_size=3, stride = 2) conv
zeros padding
= nn.Conv2d(in_channels=1, out_channels=1, kernel_size=3, stride = 2, padding = 1) conv
size of activation map
Feature size = ((Image size + 2 * Padding size − Kernel size) / Stride)+1
Ungraded lab - What’s convolution
Activation Functions and Max Polling
Activation function using nn.Module
import torch
= torch.zeros(1,1,5,5)
image 0,0,:,2] = 1
image[= nn.Conv2d(in_channels=1, out_channels=1, kernel_size=3)
conv =conv(image)
z= torch.relu(z) A
Activation function using nn.Sequential
= nn.ReLU()
relu = relu(z) A
Max pooling
max = nn.MaxPool2d(2, stride=1)
max(image)
=1, kernel_size=2) torch.max_pool2d(image, stride
Ungraded lab - Activation Functions and Max Polling
Multiple Input and Output Channels
Ungraded lab - Activation Functions and Max Polling
Convolutional Neural Network
using nn.Module
class CNN(nn.Module):
def __init__(self,out_1=2,out_2=1):
super(CNN,self).__init__()
#first Convolutional layers
self.cnn1=nn.Conv2d(in_channels=1,out_channels=out_1,kernel_size=2,padding=0)
self.maxpool1=nn.MaxPool2d(kernel_size=2 ,stride=1)
#second Convolutional layers
self.cnn2=nn.Conv2d(in_channels=out_1,out_channels=out_2,kernel_size=2,stride=1,padding=0)
self.maxpool2=nn.MaxPool2d(kernel_size=2 ,stride=1)
#max pooling
#fully connected layer
self.fc1=nn.Linear(out_2*7*7,2)
def forward(self,x):
#first Convolutional layers
=self.cnn1(x)
x#activation function
=torch.relu(x)
x#max pooling
=self.maxpool1(x)
x#first Convolutional layers
=self.cnn2(x)
x#activation function
=torch.relu(x)
x#max pooling
=self.maxpool2(x)
x#flatten output
=x.view(x.size(0),-1)
x#fully connected layer
=self.fc1(x)
xreturn x
training
=10
n_epochs=[]
cost_list=[]
accuracy_list=len(validation_dataset)
N_test=0
cost#n_epochs
for epoch in range(n_epochs):
=0
costfor x, y in train_loader:
#clear gradient
optimizer.zero_grad()#make a prediction
=model(x)
z# calculate loss
=criterion(z,y)
loss# calculate gradients of parameters
loss.backward()# update parameters
optimizer.step()+=loss.item()
cost
cost_list.append(cost)=0
correct#perform a prediction on the validation data
for x_test, y_test in validation_loader:
=model(x_test)
z=torch.max(z.data,1)
_,yhat+=(yhat==y_test).sum().item()
correct=correct/N_test
accuracy accuracy_list.append(accuracy)
Ungraded lab - Convolutional Neural Network Simple example
9.4.1ConvolutionalNeralNetworkSimple example.ipynb
def conv_output_shape(h_w, kernel_size=1, stride=1, pad=0, dilation=1):
#by Duane Nielsen
from math import floor
if type(kernel_size) is not tuple:
= (kernel_size, kernel_size)
kernel_size = floor( ((h_w[0] + (2 * pad) - ( dilation * (kernel_size[0] - 1) ) - 1 )/ stride) + 1)
h = floor( ((h_w[1] + (2 * pad) - ( dilation * (kernel_size[1] - 1) ) - 1 )/ stride) + 1)
w return h, w
=conv_output_shape((11,11), kernel_size=2, stride=1, pad=0, dilation=1)
outprint(out)
=conv_output_shape(out, kernel_size=2, stride=1, pad=0, dilation=1)
out1print(out1)
=conv_output_shape(out1, kernel_size=2, stride=1, pad=0, dilation=1)
out2print(out2)
=conv_output_shape(out2, kernel_size=2, stride=1, pad=0, dilation=1)
out3print(out3)
>> (10, 10)
9, 9)
(8, 8)
(7, 7) (
Ungraded lab - Convolutional Neural Network MNIST
Ungraded lab - Convolutional Neural Networks with Batch Norm
GPU in PyTorch
torch.cuda.is_available()>> True
= torch.device('cuda:0')
device
1,2,32,4]).to(device)
torch.tensor([>> tensor([ 1, 2, 32, 4], device='cuda:0')
= CNN()
model model.to(device)
Training on GPU
for epoch in range(num_epochs):
for features, labels in train_loader:
= features.to(device), labels.to(device)
features, labels
optimizer.zero_grad()= model(features)
predictions = criterion(predictions, labels)
loss
loss.backward() optimizer.step()
TORCH-VISION MODELS
load resnet18 with pretrained parameters
import torch
import torchvision.models as models
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import torch.nn as nn
0)
torch.manual_seed(
= models.resnet18(pretrained=True)
model
= [0.485, 0.456, 0.406]
mean = [0.229, 0.224, 0.225]
std
= transforms.Compose([transforms.Resize(224),
composed
transforms.ToTensor(),
transforms.Normalize(mean, std)])
= Dataset(transform=composed, train = True)
train_dataset = Dataset(transform=composed) validation_dataset
freeze parameters and add a final layer to be trained
for param in model.parameters():
=False
param.requires_grad= nn.Linear(512, 7) model.fc
= DataLoader(dataset=train_loader, batch_size=15)
train_loader = DataLoader(dataset=validation_loader, batch_size=10) validation_loader
provides only parameters to be trained to optim
= nn.CrossEntropyLoss()
criterion = torch.optim.Adam([parameters for parameters in model.parameters() if parameters.requires_grad], lr = 0.003)
optimizer
= 20
N_EPOCHS = []
loss_list = []
accuracy_list = 0
correct = len(validation_dataset) n_test
train the model, switching to model.train and model.eval
for epoch in range(N_EPOCHS):
= []
loss_sublist for x, y in train_loader:
model.train()
optimizer.zero_grad()= model(x)
z = criterion(z, y)
loss
loss_sublist.append(loss.data.item())
loss.backward()
optimizer.step()
loss_list.append(np.mean(loss_sublist))= 0
correct for x_test, y_test in validation_loader:
eval()
model.= model(x_test)
z = torch.max(z.data, 1)
_, yhat += (yhat == y_test).sum().item()
correct = correct / n_test
accuracy accuracy_list.append(accuracy)
Week 7 - Fashion MNIST
Learning Objectives
- Apply all you have learned to train a Convolutional Neural Network