Coursera website: Deep Neural Networks with PyTorch

Week 1 - Tensor and Datasets

Learning Objectives

Tensors 1D
Two-Dimensional Tensors
Data Set
Differentiation in PyTorch

notebook

Tensors 1D

The basics

#initialize
import torch
a=torch.tensor([7,4,3,2,6])

#dtype, type()
a.dtype
a.type()

#convert with type
a=a.type(torch.FloatTensor)

#size, ndimension
a.size()
a.ndimension()

#convert to 2D
a_2D=a.view(-1, 1)

#from_numpy, to numpy
import numpy as np
numpy_array = np.array([0.0, 1.0, 2.0, 3.0, 4.0])
torch_tensor = torch.from_numpy(numpy_array)
back_to_numpy = torch_tensor.numpy()

#from pandas
import pandas as pd
pandas_series = pd.Series([0.1, 2, 0.3, 10.1])
pandas_to_torch = torch.from_numpy(pandas_series.values)

#to list
this_tensor = torch.tensor([0, 1, 2, 3])
torch_to_list = this_tensor.tolist()

#item
new_tensor = torch.tensor([5, 2, 6, 1])
new_tensor[0].item()

#indexing and slicing
c[3:5]=torch.tensor([300.0, 4.0])

basic operations

#hadamard product
z = u*v

#dot product, (produit scalaire)
result = torch.dot(u, v)

universal functions, mean, max, mathematical functions, plot with linspace

#mean
a.mean()

#max
b.max()

#plot y=sin(x)
import matplotlib.pyplot as plt
%matplotlib inline

x = torch.linspace(0, 2 * np.pi, 100)
y = torch.sin(x)
plt.plot(x.numpy(), y.numpy())

Ungraded lab

1.1_1Dtensors_v2.ipynb

Tensors 2D

notebook

Tensor creation in 2D

a = [ [11, 12, 13], [21, 22, 23], [31, 32, 33] ]
A = torch.tensor(a)

A.ndimension()
>> 2

A.shape
>> torch.Size([3, 3])

A.size()
>> torch.Size([3, 3])

#number of elements
A.numel()
>> 9

Indexing and slicing in 2D

A[0, 0:2]
>> tensor([11, 12])

A[1:3, 2]
>> tensor([23, 33])

Basic operations in 2D: hadamard product, matrix multiplication

X = torch.tensor([[1,0], [0,1]])
Y = torch.tensor([[2,1], [1,2]])

#hadamard product
Z = X*Y
Z
>> tensor([[2, 0],
           [0, 2]])

A = torch.tensor([ [0, 1, 1], [1, 0, 1]])
B = torch.tensor([ [1, 1], [1, 1], [-1, 1]])

#matrix multiplication
C = torch.mm(A, B)
C
>> tensor([[0, 2],
           [0, 2]])

Ungraded lab

1.1_2 Two-Dimensional Tensors_v2.ipynb

Derivatives in Pytorch

Derivatives

using \(y(x)=x^2\)

x = torch.tensor(2., requires_grad=True)
y = x ** 2

#calculate derivative df/dx
y.backward()
#evaluate at x : df/dx(x)
x.grad
>> tensor(4.)

using \(z(x)=x^2+2x+1\)

x = torch.tensor(2., requires_grad=True)
z = x**2 + 2*x + 1
z.backward()
x.grad
>> tensor(6.)

Note: in my version of pytorch (1.7.1), I cannot use torch.int dtypes.

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-92-979d0f10c1e7> in <module>
----> 3 x = torch.tensor(2, requires_grad=True)
      4 z = x**2 + 2*x + 1
      5 z.backward()
RuntimeError: Only Tensors of floating point and complex dtype can require gradients

Partial derivatives

using \(f(u, v)=uv+u^2\), \(\frac{\partial f(u,v)}{\partial u} = v+2u\), \(\frac{\partial f(u,v)}{\partial v} = u\)

u = torch.tensor(1., requires_grad=True)
v = torch.tensor(2., requires_grad=True)

f = u*v + u**2

#calculate all partial derivatives df/du and df/dv
f.backward()
#evaluate partial derivative with respect to u df/du at u, v : df/du(u, v)
u.grad
>> tensor(4.)
#evaluate partial derivative with respect to v df/dv at u, v : df/dv(u, v)
v.grad
>> tensor(1.)

Ungraded lab

1.2derivativesandGraphsinPytorch_v2.ipynb

With some explanation about .detach() pointing to torch.autograd documentation. In this page, there is a link to walkthrough of backprop video.

Will have to go back to .detach()

Simple Dataset

Build a Dataset Class and Object

from torch.utils.data import Dataset

class toy_set(Dataset):
    def __init__(self, length=100, transform=None):
        self.x = 2*torch.ones(length, 2)
        self.y = torch.ones(length, 1)
        self.len = length
        self.transform = transform
    def __getitem__(self, index):
        sample=self.x[index], self.y[index]
        if self.transform:
            sample = self.transform(sample)
        return sample
    def __len__(self):
        return self.len
    
dataset = toy_set()
len(dataset)
>> 100
dataset[0]
(tensor([2., 2.]), tensor([1.]))

Build a Dataset Transform (e.g. normalize or standardize)

class add_mult(object):
    def __init__(self, addx=1, muly=1):
        self.addx = addx
        self.muly = muly
    def __call__(self, sample):
        x=sample[0]
        y=sample[1]
        x=x+self.addx
        y=y*self.muly
        sample=x, y
        return sample
    
    
# automatically apply the transform
a_m = add_mult()
dataset_ = toy_set(transform=a_m)
dataset_[0]
>> (tensor([3., 3.]), tensor([1.]))

Compose Transforms

class mult(object):
    def __init__(self, mul=100):
        self.mul = mul

    def __call__(self, sample):
        x = sample[0]
        y = sample[1]
        x = x * self.mul
        y = y * self.mul
        sample = x, y
        return sample
    
from torchvision import transforms
data_transform = transforms.Compose([add_mult(), mult()])

# automatically apply the composed transform
dataset_tr = toy_set(transform=data_transform)
dataset_tr[0]
>> (tensor([300., 300.]), tensor([100.]))

Ungraded lab

1.3.1_simple_data_set_v2.ipynb

Dataset

Dataset Class for Images

from PIL import Image
import pandas as pd
import os
from matplotlib.pyplot import imshow
from torch.utils.data import Dataset, DataLoader

class Dataset(Dataset):
    def __init__(self, csv_file, data_dir, transform=None):
        self.transform = transform
        self.data_dir = data_dir
        data_dir_csv_file = os.path.join(self.data_dir, csv_file)
        self.data_name = pd.read_csv(data_dir_csv_file)
        self.len = self.data_name.shape[0]
    def __len__(self):
        return self.len
    def __getitem__(self, idx):
        img_name=os.path.join(self.data_dir, self.data_name.iloc[idx, 1])
        image = Image.open(img_name)
        y = self.data_name.iloc[idx, 0]
        if self.transform:
            image = self.transform(image)
        return image, y
    
def show_data(data_sample, shape = (28, 28)):
    plt.imshow(data_sample[0].numpy().reshape(shape), cmap='gray')
    plt.title('y = ' + data_sample[1])

dataset = Dataset(csv_file=csv_file, data_dir=directory)
show_data(dataset[0])

Torch Vision Transforms

import torchvision.transforms as transforms
transforms.CenterCrop(20)
transforms.ToTensor()
croptensor_data_transform = transforms.Compose( [ transforms.CenterCrop(20), transforms.ToTensor() ] )
dataset = Dataset(csv_file=csv_file, data_dir=directory, transform=croptensor_data_transform)
dataset[0][0].shape
>> torch.Size([1, 20, 20])

Torch Vision Datasets

MNIST example

import torchvision.datasets as dsets
dataset = dsets.MNIST(root='./data', train = False, download = True, transform = transforms.ToTensor())

Ungraded lab

1.3.2_Datasets_and_transforms.ipynb

1.3.3_pre-Built Datasets_and_transforms_v2.ipynb

Week 2 - Linear Regression

Learning Objectives

Linear Regression Prediction
Linear Regression Training
Loss
Gradient Descent
Cost
Linear Regression Training PyTorch

notebook

Linear Regression in 1D - Prediction

Simple linear regression - prediction

import torch
w = torch.tensor(2.0, requires_grad=True)
b = torch.tensor(-1.0, requires_grad=True)
def forward(x):
    y=w*x+b
    return y

x=torch.tensor([1.0])
yhat=forward(x)
yhat
>> tensor([1.], grad_fn=<AddBackward0>)
x=torch.tensor([[1.0],[2.0]])
forward(x)  
>> tensor([[1.],
        [3.]], grad_fn=<AddBackward0>)

PyTorch - Class Linear

from torch.nn import Linear
torch.manual_seed(1)
model = Linear(in_features=1, out_features=1)
list(model.parameters())
>> [Parameter containing:
     tensor([[0.5153]], requires_grad=True),
     Parameter containing:
     tensor([-0.4414], requires_grad=True)]

x=torch.tensor([[1.0],[2.0]])
model(x)
>> tensor([[0.0739],
        [0.5891]], grad_fn=<AddmmBackward>)

PyTorch - Custom Modules

import torch.nn as nn

class LR(nn.Module):
    def __init__(self, in_size, output_size):
        super(LR, self).__init__()
        self.linear = nn.Linear(in_size, output_size)
    def forward(self, x):
        out = self.linear(x)
        return out

model = LR(1, 1)
list(model.parameters())
>> [Parameter containing:
     tensor([[-0.9414]], requires_grad=True),
     Parameter containing:
     tensor([0.5997], requires_grad=True)]

x=torch.tensor([[1.0],[2.0]])
model(x)
>> tensor([[-0.3417],
        [-1.2832]], grad_fn=<AddmmBackward>)

Model state_dict()

this returns a python dictionary. We will use it as our models get more complex. One Function is to map the relationship of the linear layers to its parameters. we can print out the keys and values.

model.state_dict()
>> OrderedDict([('linear.weight', tensor([[-0.9414]])),
             ('linear.bias', tensor([0.5997]))])

Ungraded lab

2.1Prediction1Dregression_v3.ipynb

Linear Regression Training

loss function presented is mean squared error

\(l(w,b)=\frac{1}{N}\displaystyle\sum_{n=1}^{N}(y_n-(wx_n+b))^2\)

Gradient Descent and cost

PyTorch Slope

import torch
w=torch.tensor(-10.0, requires_grad=True)
X=torch.arange(-3,3,0.1).view(-1, 1)
f = -3*X

import matplotlib.pyplot as plt
plt.plot(X.numpy(), f.numpy())
plt.show()
Y = f+0.1*torch.randn(X.size())
plt.plot(X.numpy(), Y.numpy(), 'ro')
plt.show()

def forward(x):
    return w*x

def criterion(yhat, y):
    return torch.mean((yhat-y)**2)

lr = 0.1
for epoch in range(4):
    Yhat = forward(X)
    loss= criterion(Yhat, Y)
    loss.backward()
    w.data = w.data - lr*w.grad.data
    w.grad.data.zero_()

Ungraded lab

2.2_linear_regression_one_parameter_v3.ipynb

Linear Regression Training in PyTorch

Cost surface

# The class for plot the diagram

class plot_error_surfaces(object):
    
    # Constructor
    def __init__(self, w_range, b_range, X, Y, n_samples = 30, go = True):
        W = np.linspace(-w_range, w_range, n_samples)
        B = np.linspace(-b_range, b_range, n_samples)
        w, b = np.meshgrid(W, B)    
        Z = np.zeros((30,30))
        count1 = 0
        self.y = Y.numpy()
        self.x = X.numpy()
        for w1, b1 in zip(w, b):
            count2 = 0
            for w2, b2 in zip(w1, b1):
                Z[count1, count2] = np.mean((self.y - w2 * self.x + b2) ** 2)
                count2 += 1
            count1 += 1
        self.Z = Z
        self.w = w
        self.b = b
        self.W = []
        self.B = []
        self.LOSS = []
        self.n = 0
        if go == True:
            plt.figure()
            plt.figure(figsize = (7.5, 5))
            plt.axes(projection='3d').plot_surface(self.w, self.b, self.Z, rstride = 1, cstride = 1,cmap = 'viridis', edgecolor = 'none')
            plt.title('Cost/Total Loss Surface')
            plt.xlabel('w')
            plt.ylabel('b')
            plt.show()
            plt.figure()
            plt.title('Cost/Total Loss Surface Contour')
            plt.xlabel('w')
            plt.ylabel('b')
            plt.contour(self.w, self.b, self.Z)
            plt.show()
    
    # Setter
    def set_para_loss(self, W, B, loss):
        self.n = self.n + 1
        self.W.append(W)
        self.B.append(B)
        self.LOSS.append(loss)
    
    # Plot diagram
    def final_plot(self): 
        ax = plt.axes(projection = '3d')
        ax.plot_wireframe(self.w, self.b, self.Z)
        ax.scatter(self.W,self.B, self.LOSS, c = 'r', marker = 'x', s = 200, alpha = 1)
        plt.figure()
        plt.contour(self.w,self.b, self.Z)
        plt.scatter(self.W, self.B, c = 'r', marker = 'x')
        plt.xlabel('w')
        plt.ylabel('b')
        plt.show()
    
    # Plot diagram
    def plot_ps(self):
        plt.subplot(121)
        plt.ylim
        plt.plot(self.x, self.y, 'ro', label="training points")
        plt.plot(self.x, self.W[-1] * self.x + self.B[-1], label = "estimated line")
        plt.xlabel('x')
        plt.ylabel('y')
        plt.ylim((-10, 15))
        plt.title('Data Space Iteration: ' + str(self.n))

        plt.subplot(122)
        plt.contour(self.w, self.b, self.Z)
        plt.scatter(self.W, self.B, c = 'r', marker = 'x')
        plt.title('Total Loss Surface Contour Iteration' + str(self.n))
        plt.xlabel('w')
        plt.ylabel('b')
        plt.show()
        
get_surface = plot_error_surfaces(15, 15, X, Y, 30)

PyTorch (hard way)

def forward(x):
    y=w*x+b
    return y
def criterion(yhat, y):
    return torch.mean((yhat-y)**2)

w = torch.tensor(-15.0, requires_grad=True)
b = torch.tensor(-10.0, requires_grad=True)
X = torch.arange(-3, 3, 0.1).view(-1, 1)
f = 1*X-1
Y = f+0.1*torch.rand(X.size())

lr = 0.1
for epoch in range(15):
    Yhat=forward(X)
    loss=criterion(Yhat, Y)
    loss.backward()
    w.data=w.data-lr*w.grad.data
    w.grad.data.zero_()
    b.data=b.data-lr*b.grad.data
    b.grad.data.zero_()

Ungraded lab

2.3_training_slope_and_bias_v3.ipynb

Stochastic Gradient Descent and the Data Loader

Stochastic Gradient Descent in PyTorch

w = torch.tensor(-15.0, requires_grad=True)
b = torch.tensor(-10.0, requires_grad=True)
X = torch.arange(-3, 3, 0.1).view(-1, 1)
f = -3*X
Y=f+0.1*torch.randn(X.size())

def forward(x):
    y=w*x+b
    return y
def criterion(yhat, y):
    return torch.mean((yhat-y)**2)

lr = 0.1
for epoch in range(4):
    for x, y in zip(X, Y):
        yhat=forward(x)
        loss=criterion(yhat, y)
        loss.backward()
        w.data=w.data-lr*w.grad.data
        w.grad.data.zero_()
        b.data=b.data-lr*b.grad.data
        b.grad.data.zero_()

Stochastic Gradient Descent DataLoader

dataset

from torch.utils.data import Dataset

class Data(Dataset):
    def __init__(self):
        self.x = torch.arange(-3, 3, 0.1).view(-1, 1)
        self.y = -3*X+1
        self.len = self.x.shape[0]
    def __getitem__(self, index):
        return self.x[index], self.y[index]
    def __len__(self):
        return self.len
    
dataset = Data()

dataloader

from torch.utils.data import DataLoader

dataset=Data()
trainloader = DataLoader(dataset=dataset, batch_size=1)

stochastic gradient descent

for x, y in trainloader:
    yhat = forward(x)
    loss = criterion(yhat, y)
    loss.backward()
    w.data=w.data-lr*w.grad.data
    b.data=b.data-lr*b.grad.data
    w.grad.data.zero_()
    b.grad.data.zero_()

Ungraded lab

3.1_stochastic_gradient_descent_v3.ipynb

Mini-Batch Gradient Descent

Iterations = \(\frac{\text{training size}}{\text{batch size}}\)

Mini-Batch Gradient Descent in Pytorch

dataset = Data()
trainloader = DataLoader(dataset=dataset, batch_size=5)

lr=0.1
LOSS = []
for epoch in range(4):
    for x, y in trainloader:
        yhat=forward(x)
        loss = criterion(yhat, y)
        loss.backward()
        w.data=w.data-lr*w.grad.data
        b.data=b.data-lr*b.grad.data
        w.grad.data.zero_()
        b.grad.data.zero_()      
        LOSS.append(loss.item())

Optimization in PyTorch

criterion = nn.MSELoss()
trainloader = DataLoader(dataset=dataset, batch_size=1)
model = LR(1,1)
from torch import nn, optim
optimizer = optim.SGD(model.parameters(), lr = 0.01)
optimizer.state_dict()
>> {'state': {},
 'param_groups': [{'lr': 0.01,
   'momentum': 0,
   'dampening': 0,
   'weight_decay': 0,
   'nesterov': False,
   'params': [0, 1]}]}

for epoch in range(100):
    for x, y in trainloader:
        yhat = model(x)
        loss = criterion(yhat, y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

Ungraded lab

3.3_PyTorchway_v3.ipynb

Training, Validation and Test Split

standard explanation about Train, Validation, Test

Training, Validation and Test Split in PyTorch

Dataset to generate train_data and val_data

from torch.utils.data import Dataset, DataLoader

class Data(Dataset):
    def __init__(self, train = True):
        self.x = torch.arange(-3, 3, 0.1).view(-1, 1)
        self.f = -3*self.x+1
        self.y = self.f+0.1*torch.randn(self.x.size())
        self.len = self.x.shape[0]
        if train == True:
            self.y[0] = 0
            self.y[50:55] = 20
        else:
            pass
        
    def __getitem__(self, index):
        return self.x[index], self.y[index]
    
    def __len__(self):
        return self.len
                
        
train_data = Data()
val_data = Data(train=False)

LR model

import torch.nn as nn

class LR(nn.Module):
    def __init__(self, input_size, output_size):
        super(LR, self).__init__()
        self.linear = nn.Linear(input_size, output_size)
    def forward(self, x):
        out=self.linear(x)
        return out

criterion = nn.MSELoss()

trainloader = DataLoader(dataset=train_data, batch_size=1)

epochs = 10
learning_rates = [0.0001, 0.001, 0.01, 0.1, 1]
validation_error = torch.zeros(len(learning_rates))
test_error=torch.zeros(len(learning_rates))
MODELS=[]

from torch import optim
from tqdm import tqdm
for i, learning_rate in tqdm(enumerate(learning_rates)):
    model = LR(1,1)
    optimizer = optim.SGD(model.parameters(), lr = learning_rate)
    
    for epoch in range(epochs):
        for x, y in trainloader:
            yhat = model(x)
            loss = criterion(yhat, y)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
    yhat=model(train_data.x)
    loss=criterion(yhat, train_data.y)
    test_error[i]=loss.item()

    yhat=model(val_data.x)
    loss=criterion(yhat, val_data.y)
    validation_error[i]=loss.item()
    MODELS.append(model)

import numpy as np
plt.semilogx(np.array(learning_rates), validation_error.numpy(), label='training cost/total loss')
plt.semilogx(np.array(learning_rates), test_error.numpy(), label='validation cost/total loss')
plt.ylabel('Cost Total loss')
plt.xlabel('learning rate')
plt.legend()
plt.show()

Week 3 - Multiple Input Output Linear Regression - Logistic Regression for Classification

Learning Objectives

Multiple Linear Regression
Multiple Linear Regression Training
Linear Regression Multiple Outputs
Linear Regression Multiple Outputs Training

notebook

Multiple Input Linear Regression Prediction

Class Linear

import torch
from torch.nn import Linear
torch.manual_seed(1)
model = Linear(in_features=2, out_features=1)
list(model.parameters())
>> [Parameter containing:
 tensor([[ 0.3643, -0.3121]], requires_grad=True),
 Parameter containing:
 tensor([-0.1371], requires_grad=True)]
model.state_dict()
>> OrderedDict([('weight', tensor([[ 0.3643, -0.3121]])),
             ('bias', tensor([-0.1371]))])
#predictions for multiple samples
X = torch.tensor([[1.0, 1.0], [1.0, 2.0], [1.0, 3.0]])
yhat = model(X)
yhat
>> tensor([[-0.0848],
        [-0.3969],
        [-0.7090]], grad_fn=<AddmmBackward>)

Custom Modules

import torch.nn as nn

class LR(nn.Module):
    def __init__(self, input_size, output_size):
        super(LR, self).__init__()
        self.linear = nn.Linear(input_size, output_size)
    def forward(self, x):
        out = self.linear(x)
        return out

Ungraded lab

4.1.multiple_linear_regression_prediction_v2.ipynb

Multiple Input Linear Regression Training

Cost function and Gradient Descent for Multiple Linear Regression

Cost function

\[l(w,b)=\frac{1}{N}\displaystyle\sum_{n=1}^{N}(y_n-(x_nw+b))^2\]

Gradient of loss function with respect to the weights

\[\nabla l(w,b) = \begin{bmatrix}\frac{\partial l(w,b)}{\partial w_1}\\ \vdots \\\frac{\partial l(w,b)}{\partial w_d}\end{bmatrix}\]

Gradient of loss function with respect to the bias

\[\frac{\partial l(w,b)}{\partial b}\]

Update of weights

\[w^{k+1} = w^k-\eta \nabla l(w^k,b^k)\]

\[\begin{bmatrix} w_1^{k+1}\\ \vdots\\ w_d^{k+1}\\\end{bmatrix}=\begin{bmatrix} w_1^{k}\\ \vdots\\ w_d^{k}\\\end{bmatrix}-\eta \begin{bmatrix}\frac{\partial l(w^k,b^k)}{\partial w_1}\\ \vdots \\\frac{\partial l(w^k,b^k)}{\partial w_d}\end{bmatrix}\]

and update of bias

\[b^{k+1}=b^k-\eta \frac{\partial l(w^k,b^k)}{\partial b}\]

Train the model in PyTorch

from torch import nn, optim
import torch

class LR(nn.Module):
    def __init__(self, input_size, output_size):
        super(LR, self).__init__()
        self.linear = nn.Linear(input_size, output_size)
    def forward(self, x):
        out = self.linear(x)
        return out

from torch.utils.data import Dataset, DataLoader

class Data2D(Dataset):
    def __init__(self):
        self.x = torch.zeros(20,2)
        self.x[:, 0] = torch.arange(-1,1,0.1)
        self.x[:, 1] = torch.arange(-1,1,0.1)
        self.w = torch.tensor([ [1.0], [1.0]])
        self.b = 1
        self.f = torch.mm(self.x, self.w)+self.b
        self.y = self.f + 0.1*torch.randn((self.x.shape[0], 1))
        self.len = self.x.shape[0]
    def __getitem__(self, index):
        return self.x[index], self.y[index]
    def __len__(self):
        return self.len

data_set = Data2D()
criterion = nn.MSELoss()
trainloader = DataLoader(dataset=data_set, batch_size=2)
model = LR(input_size=2, output_size=1)
optimizer = optim.SGD(model.parameters(), lr=0.1)

for epoch in range(100):
    for x, y in trainloader:
        yhat = model(x)
        loss = criterion(yhat, y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

Ungraded lab

4.2.multiple_linear_regression_training_v2.ipynb

Multiple Output Linear Regression

Linear regression with multiple outputs

Custom Modules

import torch.nn as nn
import torch
class LR(nn.Module):
    def __init__(self, input_size, output_size):
        super(LR, self).__init__()
        self.linear = nn.Linear(input_size, output_size)
    def forward(self, x):
        out = self.linear(x)
        return out
    
torch.manual_seed(1)
model = LR(input_size=2, output_size=2)

list(model.parameters())
>> [Parameter containing:
 tensor([[ 0.3643, -0.3121],
         [-0.1371,  0.3319]], requires_grad=True),
 Parameter containing:
 tensor([-0.6657,  0.4241], requires_grad=True)]

#with 2 columns and 3 rows
X=torch.tensor([[1.0, 1.0], [1.0,2.0], [1.0, 3.0]])
Yhat = model(X)
Yhat
>> tensor([[-0.6135,  0.6189],
        [-0.9256,  0.9508],
        [-1.2377,  1.2827]], grad_fn=<AddmmBackward>)

Ungraded lab

4.3.multi-target_linear_regression.ipynb

Multiple Output Linear Regression Training

Training in PyTorch

Training is the same, what changes is Dataset:

from torch.utils.data import Dataset, DataLoader

class Data2D(Dataset):
    def __init__(self):
        self.x = torch.zeros(20,2)
        self.x[:, 0] = torch.arange(-1,1,0.1)
        self.x[:, 1] = torch.arange(-1,1,0.1)
        self.w = torch.tensor([ [1.0, -1.0], [1.0, -1.0]])
        self.b = torch.tensor([[1.0, -1.0]])
        self.f = torch.mm(self.x, self.w)+self.b
        self.y = self.f + 0.1*torch.randn((self.x.shape[0], 1))
        self.len = self.x.shape[0]
    def __getitem__(self, index):
        return self.x[index], self.y[index]
    def __len__(self):
        return self.len

and model instantiation

from torch import nn, optim

data_set = Data2D()
criterion = nn.MSELoss()
trainloader = DataLoader(dataset=data_set, batch_size=1)
model = LR(input_size=2, output_size=2)
optimizer = optim.SGD(model.parameters(), lr=0.001)

Training:

for epoch in range(100):
    for x, y in trainloader:
        yhat = model(x)
        loss = criterion(yhat, y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

Ungraded lab

4.4.training_multiple_output_linear_regression.ipynb

Linear Classifier and Logistic Regression

\[\sigma(z)=\frac{1}{1+e^{-z}}\]

sigmoid is used as the threshold function in logistic regression

Logistic Regression: Prediction

logistic function in PyTorch

as a function: torch.sigmoid

import torch
import matplotlib.pyplot as plt

z = torch.arange(-100, 100, 0.1).view(-1, 1)
yhat = torch.sigmoid(z)
plt.plot(z.numpy(), yhat.numpy())

as a class: nn.Signmoid()

import torch
import torch.nn as nn
import matplotlib.pyplot as plt

z = torch.arange(-100, 100, 0.1).view(-1, 1)
sig = nn.Sigmoid()
yhat = sig(z)
plt.plot(z.numpy(), yhat.numpy())

torch.nn.Sigmoid vs torch.sigmoid - PyTorch Forums

torch.nn.Sigmoid (note the capital “S”) is a class. When you
instantiate it, you get a function object, that is, an object that you
can call like a function. In contrast, torch.sigmoid is a function.

nn.Sequential

sequential_model = nn.Sequential(nn.Linear(1,1), nn.Sigmoid())

nn.Module

import torch.nn as nn

class logistic_regression(nn.Module):
    def __init__(self, in_size):
        super(logistic_regression, self).__init__()
        self.linear = nn.Linear(in_size, 1)
    def forward(self, x):
        z = torch.sigmoid(self.linear(x))
        return z
    
custom_model = logistic_regression(1)

Making a prediction

x=torch.tensor([[1.0], [2.0]])
custom_model(x)
>> tensor([[0.4129],
        [0.3936]], grad_fn=<SigmoidBackward>)
sequential_model(x)
>> tensor([[0.2848],
        [0.2115]], grad_fn=<SigmoidBackward>)

Multidimensional Logistic Regression

custom_2D_model = logistic_regression(2)
sequential_2D_model = nn.Sequential(nn.Linear(2, 1), nn.Sigmoid())

x=torch.tensor([[1.0, 2.0]])
yhat = sequential_2D_model(x)
yhat
>> tensor([[0.7587]], grad_fn=<SigmoidBackward>)

Ungraded lab

5.1logistic_regression_prediction_v2.ipynb

Bernoulli Distribution and Maximum Likelihood Estimation

To fine the parameter values of the Bernoulli distribution, we do not maximize the likelihood function but the log of the likelihood function: Loss likelihood which is given by

\[l(\theta) = \ln(p(Y|\theta))=\displaystyle\sum_{n=1}^{N}y_n \ln(\theta)+(1-y_n) \ln(1-\theta)\]

Note: We want to get

\[\hat\theta = argmax_\theta(P(Y|\theta))\]

where

\[P(Y|\theta) = \displaystyle\prod_{n=1}^{N}\theta^{y_n}(1-\theta)^{1-y_n}\]

Logistic Regression Cross Entropy Loss

Loss function \(l(w,b)=\frac{1}{N}\displaystyle\sum_{n=1}^{N}(y_n-\sigma(wx_n+b))^2\)

Cross entropy loss

\[l(\theta)=-\frac{1}{N}\displaystyle\sum_{n=1}^{N}y_n \ln(\sigma(wx_n+b))+(1-y_n)\ln(1-\sigma(wx_n+b))\]

def criterion(yhat, y):
    out = -1 * torch.mean(y * torch.log(yhat) + (1-y) * torch.log(1-yhat))
    return out

Logistic Regression in PyTorch

Create a model (using Sequential)

model = nn.Sequential(nn.Linear(1, 1), nn.Sigmoid())

or create a custom one

import torch.nn as nn

class logistic_regression(nn.Module):
    def __init__(self, in_size):
        super(logistic_regression, self).__init__()
        self.linear = nn.Linear(in_size, 1)
    def forward(self, x):
        z = torch.sigmoid(self.linear(x))
        return z

Then define our loss function

def criterion(yhat, y):
    out = -1 * torch.mean(y * torch.log(yhat) + (1-y) * torch.log(1-yhat))
    return out

or simply BCE (binary cross entropy)

criterion = nn.BCELoss()

Putting all pieces together:

#dataset

import torch
from torch.utils.data import Dataset

class Data(Dataset):
    def __init__(self):
        self.x = torch.arange(-1, 1, 0.1).view(-1, 1)
        self.y = torch.zeros(self.x.shape[0], 1)
        self.y[self.x[:, 0] > 0.2] = 1
        self.len = self.x.shape[0]
    def __getitem__(self, index):      
        return self.x[index], self.y[index]
    def __len__(self):
        return self.len
    
dataset = Data()

# dataloader

from torch.utils.data import DataLoader
trainloader = DataLoader(dataset=dataset, batch_size=1)

# model

import torch.nn as nn
model = nn.Sequential(nn.Linear(1, 1), nn.Sigmoid())

# optimizer

from torch import optim
optimizer = optim.SGD(model.parameters(), lr = 0.01)

# loss

criterion = nn.BCELoss()

# training

for epoch in range(100):
    for x, y in trainloader:
        yhat = model(x)
        loss = criterion(yhat, y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

Ungraded lab

5.2.2bad_inshilization_logistic_regression_with_mean_square_error_v2.ipynb

Week 4 - Softmax regression

Learning Objectives

Using Lines to Classify Data
Softmax Prediction in PyTorch
Softmax Pytorch MNIST

notebook

Softmax Prediction

Softmax is a combination of logistic regression and argmax

Softmax function

Custom module using nn.module

import torch.nn as nn

class Softmax(nn.Module):
    def __init__(self, in_size, out_size):
        super(Softmax, self).__init__()
        self.linear = nn.Linear(in_size, out_size)
    def forward(self, x):
        out = self.linear(x)
        return out

import torch
torch.manual_seed(1)
# 2 dimensions input samples and 3 output classes
model = Softmax(2,3)

x = torch.tensor([[1.0, 2.0]])
z = model(x)
z
>> tensor([[-0.4053,  0.8864,  0.2807]], grad_fn=<AddmmBackward>)

_, yhat = z.max(1)
yhat
>> tensor([1])

and with multiple samples

X=torch.tensor([[1.0, 1.0],[1.0, 2.0],[1.0, -3.0]])
z = model(X)
z
>> tensor([[-0.0932,  0.5545, -0.1433],
        [-0.4053,  0.8864,  0.2807],
        [ 1.1552, -0.7730, -1.8396]], grad_fn=<AddmmBackward>)

_, yhat = z.max(1)
yhat
>> tensor([1, 1, 0])

Softmax PyTorch

Load Data

import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as dsets

train_dataset = dsets.MNIST(root='./data', train = True, download = True, transform=transforms.ToTensor())

validation_dataset = dsets.MNIST(root='./data', train = False, download = True, transform=transforms.ToTensor())

train_dataset[0] is a tuple with the image and the class:

Create Model

import torch.nn as nn

class Softmax(nn.Module):
    def __init__(self, in_size, out_size):
        super(Softmax, self).__init__()
        self.linear = nn.Linear(in_size, out_size)
    def forward(self, x):
        out = self.linear(x)
        return out
    
input_dim = 28 * 28
output_dim = 10
model = Softmax(input_dim, output_dim)

criterion = nn.CrossEntropyLoss()

import torch.optim as optim
optimizer = optim.SGD(model.parameters(), lr=0.01)

n_epochs = 100
accuracy_list = []

train_loader = torch.utils.data.DataLoader(dataset = train_dataset, batch_size = 100)
validation_loader = torch.utils.data.DataLoader(dataset=validation_dataset, batch_size=5000)

Train Model

from tqdm import tqdm

for epoch in tqdm(range(n_epochs)):
    for x, y in train_loader:
        optimizer.zero_grad()
        z = model(x.view(-1, 28 * 28))
        loss = criterion(z, y)
        loss.backward()
        optimizer.step()
    correct = 0
    for x_test, y_test in validation_loader:
        z = model(x_test.view(-1, 28 * 28))
        _, yhat = torch.max(z.data, 1)
        correct = correct+(yhat == y_test).sum().item()
    accuracy = correct / y.shape[0]
    accuracy_list.append(accuracy)

Ungraded lab

5.4softmax_in_one_dimension_v2.ipynb

Ungraded lab

6.2lab_predicting _MNIST_using_Softmax_v2.ipynb

# The function to plot parameters

def PlotParameters(model): 
    W = model.state_dict()['linear.weight'].data
    w_min = W.min().item()
    w_max = W.max().item()
    fig, axes = plt.subplots(2, 5)
    fig.subplots_adjust(hspace=0.01, wspace=0.1)
    for i, ax in enumerate(axes.flat):
        if i < 10:
            
            # Set the label for the sub-plot.
            ax.set_xlabel("class: {0}".format(i))

            # Plot the image.
            ax.imshow(W[i, :].view(28, 28), vmin=w_min, vmax=w_max, cmap='seismic')

            ax.set_xticks([])
            ax.set_yticks([])

        # Ensure the plot is shown correctly with multiple plots
        # in a single Notebook cell.
    plt.show()
    
# Plot the parameters

PlotParameters(model)

Week 4 - Shallow neural networks

Learning Objectives

Simple Neural Networks
More Hidden Neurons
Neural Networks with Multiple Dimensional
Multi-Class Neural Networks
Backpropagation
Activation Functions

notebook

Neural networks in One Dimension

using nn.Module

import torch
import torch.nn as nn
from torch import sigmoid

class Net(nn.Module):
    def __init__(self, D_in, H, D_out):
        super(Net, self).__init__()
        self.linear1 = nn.Linear(D_in, H)
        self.linear2 = nn.Linear(H, D_out)
    def forward(self, x):
        x=sigmoid(self.linear1(x))
        x=sigmoid(self.linear2(x))
        return x

model = Net(1, 2, 1)
x = torch.tensor([0.0])
yhat = model(x)
yhat
>> tensor([0.5972], grad_fn=<SigmoidBackward>)

# multiple samples
x = torch.tensor([[0.0], [2.0], [3.0]])
yhat = model(x)
yhat
>> tensor([[0.5972],
        [0.5925],
        [0.5894]], grad_fn=<SigmoidBackward>)

# to get a discrete value we apply a threshold
yhat = yhat < 0.59
yhat
>> tensor([[False],
           [False],
           [ True]])

model.state_dict()
>> OrderedDict([('linear1.weight',
              tensor([[0.3820],
                      [0.4019]])),
             ('linear1.bias', tensor([-0.7746, -0.3389])),
             ('linear2.weight', tensor([[-0.3466,  0.2201]])),
             ('linear2.bias', tensor([0.4115]))])

using nn.Sequential

model = nn.Sequential(nn.Linear(1, 2), nn.Sigmoid(), nn.Linear(2, 1), nn.Sigmoid())

train the model

we create the data

X = torch.arange(-20, 20, 1).view(-1, 1).type(torch.FloatTensor)
Y = torch.zeros(X.shape[0])
Y[(X[:, 0]>-4) & (X[:, 0] <4)] = 1.0

we create a training function

from tqdm import tqdm

def train(Y, X, model, optimizer, criterion, epochs=1000):
    cost = []
    total = 0
    for epoch in tqdm(range(epochs)):
        total = 0
        for x, y in zip(X, Y):
            yhat = model(x)
            loss = criterion(yhat, y.view(-1))
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            total+=loss.item()
        cost.append(total)
    return cost

and the training process is now

#loss
criterion = nn.BCELoss()

#data
X = torch.arange(-20, 20, 1).view(-1, 1).type(torch.FloatTensor)
Y = torch.zeros(X.shape[0])
Y[(X[:, 0]>-4) & (X[:, 0] <4)] = 1.0

#model
model = Net(1, 2, 1)

#optimizer
optimizer = torch.optim.SGD(model.parameters(), lr = 0.01)

#train the model
cost = train(Y, X, model, optimizer, criterion, epochs=1000)
>> 100%|██████████| 1000/1000 [00:12<00:00, 76.96it/s]

Ungraded lab

7.1_simple1hiddenlayer.ipynb

I like how to display intermediate representations of learning performance:

# The function for plotting the model

def PlotStuff(X, Y, model, epoch, leg=True):
    
    plt.plot(X.numpy(), model(X).detach().numpy(), label=('epoch ' + str(epoch)))
    plt.plot(X.numpy(), Y.numpy(), 'r')
    plt.xlabel('x')
    if leg == True:
        plt.legend()
    else:
        pass

activation values (called in the training loop). Using model variables (model.a1) which seems a bad practice.

plt.scatter(model.a1.detach().numpy()[:, 0], model.a1.detach().numpy()[:, 1], c=Y.numpy().reshape(-1))
plt.title('activations')
plt.show()

and final loss curve

Neural Networks More Hidden Neurons

using nn.Module

import torch
import torch.nn as nn
from torch import sigmoid
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt
from tqdm import tqdm

class to get our dataset

class Data(Dataset):
    def __init__(self):
        self.x = torch.linspace(-20, 20, 100).view(-1, 1)
        self.y = torch.zeros(self.x.shape[0])
        self.y[(self.x[:, 0]>-10) & (self.x[:, 0]<-5)] = 1
        self.y[(self.x[:, 0]>5) & (self.x[:, 0]<10)] = 1
        self.y = self.y.view(-1, 1)
        self.len = self.x.shape[0]
    def __getitem__(self, index):
        return self.x[index], self.y[index]
    def __len__(self):
        return self.len

class for creating our model

class Net(nn.Module):
    def __init__(self, D_in, H, D_out):
        super(Net, self).__init__()
        self.linear1 = nn.Linear(D_in, H)
        self.linear2 = nn.Linear(H, D_out)
    def forward(self, x):
        x=sigmoid(self.linear1(x))
        x=sigmoid(self.linear2(x))
        return x

and the function to train our model

# The function for plotting the model
def PlotStuff(X, Y, model):  
    plt.plot(X.numpy(), model(X).detach().numpy())
    plt.plot(X.numpy(), Y.numpy(), 'r')
    plt.xlabel('x')
    
def train(data_set, model, criterion, train_loader, optimizer, epochs=5):
    cost = []
    total=0
    for epoch in tqdm(range(epochs)):
        total=0
        for x, y in train_loader:
            optimizer.zero_grad()
            yhat = model(x)
            loss = criterion(yhat, y)
            loss.backward()
            optimizer.step()
            total+=loss.item() 
            PlotStuff(data_set.x, data_set.y, model)
        cost.append(total)
    return cost

process for training is identical to logistic regression

criterion = nn.BCELoss()
data_set = Data()
train_loader = DataLoader(dataset=data_set, batch_size=100)
model = Net(1, 6, 1)
optimizer = torch.optim.Adam(model.parameters(), lr = 0.001)
train(data_set, model, criterion, train_loader, optimizer, epochs=1000)

using nn.Sequential

model = nn.Sequential(
    nn.Linear(1, 7),
    nn.Sigmoid(),
    nn.Linear(7, 1),
    nn.Sigmoid()
)

Ungraded lab

7.2multiple_neurons.ipynb

Neural Networks with Multiple Dimensional Input

implementation

import torch
import torch.nn as nn
from torch import sigmoid
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm
import numpy as np

we create a dataset class

class XOR_Data(Dataset):
    def __init__(self, N_s=100):
        self.x = torch.zeros((N_s, 2))
        self.y = torch.zeros((N_s, 1))
        for i in range(N_s // 4):
            self.x[i, :] = torch.Tensor([0.0, 0.0])
            self.y[i, 0] = torch.Tensor([0.0])
            self.x[i + N_s // 4, :] = torch.Tensor([0.0, 1.0])
            self.y[i + N_s // 4, 0] = torch.Tensor([1.0])
            self.x[i + N_s // 2, :] = torch.Tensor([1.0, 0.0])
            self.y[i + N_s // 2, 0] = torch.Tensor([1.0])
            self.x[i + 3 * N_s // 4, :] = torch.Tensor([1.0, 1.0])
            self.y[i + 3 * N_s // 4, 0] = torch.Tensor([0.0])
            self.x = self.x + 0.01 * torch.randn((N_s, 2))
        self.len = N_s
            
    def __getitem__(self, index):
        return self.x[index], self.y[index]
    def __len__(self):
        return self.len      
    # Plot the data
    def plot_stuff(self):
        plt.plot(self.x[self.y[:, 0] == 0, 0].numpy(), self.x[self.y[:, 0] == 0, 1].numpy(), 'o', label="y=0")
        plt.plot(self.x[self.y[:, 0] == 1, 0].numpy(), self.x[self.y[:, 0] == 1, 1].numpy(), 'ro', label="y=1")
        plt.legend()

We create a class for creating our model

class Net(nn.Module):
    def __init__(self, D_in, H, D_out):
        super(Net, self).__init__()
        self.linear1 = nn.Linear(D_in, H)
        self.linear2 = nn.Linear(H, D_out)
    def forward(self, x):
        x=sigmoid(self.linear1(x))
        x=sigmoid(self.linear2(x))
        return x

We create a function to train our model

# Calculate the accuracy

def accuracy(model, data_set):
    return np.mean(data_set.y.view(-1).numpy() == (model(data_set.x)[:, 0] > 0.5).numpy())

def train(data_set, model, criterion, train_loader, optimizer, epochs=5):
    COST = []
    ACC = []
    for epoch in tqdm(range(epochs)):
        total=0
        for x, y in train_loader:
            optimizer.zero_grad()
            yhat = model(x)
            loss = criterion(yhat, y)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            #cumulative loss 
            total+=loss.item()
        ACC.append(accuracy(model, data_set))
        COST.append(total)
        
    return COST

process for training is identical to logistic regression

criterion = nn.BCELoss()
data_set = XOR_Data()
train_loader = DataLoader(dataset=data_set, batch_size=1)
model = Net(2, 4, 1)
optimizer = torch.optim.SGD(model.parameters(), lr = 0.01)
train(data_set, model, criterion, train_loader, optimizer, epochs=500)

overfitting and underfitting

Solution:

use validation data to determine optimum number of neurons
get more data
regularization: for example dropout

Ungraded lab

7.3xor_v2.ipynb

Multi-Class Neural Networks

using nn.Module

we don’t have sigmoid for the output, and D_out is our number of classes

class Net(nn.Module):
    def __init__(self, D_in, H, D_out):
        super(Net, self).__init__()
        self.linear1 = nn.Linear(D_in, H)
        self.linear2 = nn.Linear(H, D_out)
    def forward(self, x):
        x=sigmoid(self.linear1(x))
        x=(self.linear2(x))
        return x

using nn.Sequential

input_dim = 2
hidden_dim = 6
output_dim = 3
model = nn.Sequential(
    nn.Linear(input_dim, hidden_dim),
    nn.Sigmoid(),
    nn.Linear(hidden_dim, output_dim)
)

training

we create a validation and training dataset

import torchvision.datasets as dsets
import torchvision.transforms as transforms
train_dataset = dsets.MNIST(root='./data', train = True, download = True, transform=transforms.ToTensor())
validation_dataset = dsets.MNIST(root='./data', train = False, download = True, transform=transforms.ToTensor())

we create a validation and training loader

train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=2000)
validation_loader = torch.utils.data.DataLoader(dataset=validation_dataset, batch_size=2000)

criterion = nn.CrossEntropyLoss()

we create the training function

from tqdm import tqdm
def train(model, criterion, train_loader, validation_loader, optimizer, epochs=100):
    i = 0
    useful_stuff = {'training_loss': [],'validation_accuracy': []}  
    for epoch in tqdm(range(epochs)):
        for i, (x, y) in enumerate(train_loader): 
            optimizer.zero_grad()
            z = model(x.view(-1, 28 * 28))
            loss = criterion(z, y)
            loss.backward()
            optimizer.step()
             #loss for every iteration
            useful_stuff['training_loss'].append(loss.data.item())
        correct = 0
        for x, y in validation_loader:
            #validation 
            z = model(x.view(-1, 28 * 28))
            _, label = torch.max(z, 1)
            correct += (label == y).sum().item()
        accuracy = 100 * (correct / len(validation_dataset))
        useful_stuff['validation_accuracy'].append(accuracy)
    return useful_stuff

We instantiate and train the model

input_dim = 28 * 28
hidden_dim = 100
output_dim = 10

model = Net(input_dim, hidden_dim, output_dim)
training_results = train(model, criterion, train_loader, validation_loader, optimizer, epochs=30)

To plot accuracy and lost

# Define a function to plot accuracy and loss

def plot_accuracy_loss(training_results): 
    plt.subplot(2, 1, 1)
    plt.plot(training_results['training_loss'], 'r')
    plt.ylabel('loss')
    plt.title('training loss iterations')
    plt.subplot(2, 1, 2)
    plt.plot(training_results['validation_accuracy'])
    plt.ylabel('accuracy')
    plt.xlabel('epochs')   
    plt.show()
    
plot_accuracy_loss(training_results)

To plot improper classified items

count = 0
for x, y in validation_dataset:
    z = model(x.reshape(-1, 28 * 28))
    _,yhat = torch.max(z, 1)
    if yhat != y:
        show_data(x)
        count += 1
    if count >= 5:
        break

Ungraded lab

7.4one_layer_neural_network_MNIST.ipynb

Backpropagation

Following the chain rule in gradient calculation, it happens that gradient results are getting closer and closer to 0. (i.e. vanishing gradient) therefore we cannot improve model parameters.

One way to deal with that is to change activation function.

Activation functions

sigmoid, tanh, relu

sigmoid, tanh, relu in PyTorch

class Net_sigmoid(nn.Module):
    def __init__(self, D_in, H, D_out):
        super(Net, self).__init__()
        self.linear1 = nn.Linear(D_in, H)
        self.linear2 = nn.Linear(H, D_out)
    def forward(self, x):
        x=sigmoid(self.linear1(x))
        x=(self.linear2(x))
        return x

class Net_tanh(nn.Module):
    def __init__(self, D_in, H, D_out):
        super(Net, self).__init__()
        self.linear1 = nn.Linear(D_in, H)
        self.linear2 = nn.Linear(H, D_out)
    def forward(self, x):
        x=torch.tanh(self.linear1(x))
        x=(self.linear2(x))
        return x

class Net_relu(nn.Module):
    def __init__(self, D_in, H, D_out):
        super(Net, self).__init__()
        self.linear1 = nn.Linear(D_in, H)
        self.linear2 = nn.Linear(H, D_out)
    def forward(self, x):
        x=torch.relu(self.linear1(x))
        x=(self.linear2(x))
        return x

using nn.Sequential

model_tanh = nn.Sequential(
    nn.Linear(input_dim, hidden_dim),
    nn.Tanh(),
    nn.Linear(hidden_dim, output_dim)
)

model_relu = nn.Sequential(
    nn.Linear(input_dim, hidden_dim),
    nn.ReLU(),
    nn.Linear(hidden_dim, output_dim)
)

Ungraded lab

7.5.1activationfuction_v2.ipynb

Ungraded lab

7.5.2mist1layer_v2.ipynb

to monitor gpu usage: nvidia-smi -l 1

Week 5 - Deep neural networks

Learning Objectives

building deep networks
Dropout
Neural Network initialization weights
Gradient Descent with Momentum

notebook

Deep Neural Networks

Deep, following this course definition, is when number of hidden layers > 1.

using nn.Module

import torch
import torch.nn as nn
from torch import sigmoid

class Net(nn.Module):
    def __init__(self, D_in, H1, H2, D_out):
        super(Net, self).__init__()
        self.linear1 = nn.Linear(D_in, H1)
        self.linear2 = nn.Linear(H1, H2)
        self.linear3 = nn.Linear(H2, D_out)
    def forward(self, x):
        x=sigmoid(self.linear1(x))       
        x=sigmoid(self.linear2(x))
        x=self.linear3(x)
        return x

using nn.Sequential

input_dim = 2
hidden_dim1 = 6
hidden_dim2 = 4
output_dim = 3
model = nn.Sequential(
    nn.Linear(input_dim, hidden_dim1),
    nn.Sigmoid(),
    nn.Linear(hidden_dim1, hidden_dim2),
    nn.Sigmoid(),    
    nn.Linear(hidden_dim2, output_dim)
)

training

there is no change compare to other networks

we create a validation and training dataset

import torchvision.datasets as dsets
import torchvision.transforms as transforms
train_dataset = dsets.MNIST(root='./data', train = True, download = True, transform=transforms.ToTensor())
validation_dataset = dsets.MNIST(root='./data', train = False, download = True, transform=transforms.ToTensor())

we create a validation and training loader

train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=2000)
validation_loader = torch.utils.data.DataLoader(dataset=validation_dataset, batch_size=2000)

criterion = nn.CrossEntropyLoss()

we create the training function

from tqdm import tqdm
def train(model, criterion, train_loader, validation_loader, optimizer, epochs=100):
    i = 0
    useful_stuff = {'training_loss': [],'validation_accuracy': []}  
    for epoch in tqdm(range(epochs)):
        for i, (x, y) in enumerate(train_loader): 
            optimizer.zero_grad()
            z = model(x.view(-1, 28 * 28))
            loss = criterion(z, y)
            loss.backward()
            optimizer.step()
             #loss for every iteration
            useful_stuff['training_loss'].append(loss.data.item())
        correct = 0
        for x, y in validation_loader:
            #validation 
            z = model(x.view(-1, 28 * 28))
            _, label = torch.max(z, 1)
            correct += (label == y).sum().item()
        accuracy = 100 * (correct / len(validation_dataset))
        useful_stuff['validation_accuracy'].append(accuracy)
    return useful_stuff

We instantiate and train the model

input_dim = 28 * 28
hidden_dim1 = 50
hidden_dim2 = 50
output_dim = 10

model = Net(input_dim, hidden_dim1, hidden_dim2, output_dim)
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
training_results = train(model, criterion, train_loader, validation_loader, optimizer, epochs=30)

Ungraded lab - deep neural networks

8.1.1mist2layer_v2.ipynb

Deep Neural Networks : nn.ModuleList()

jdc

this is a nice library to allow breaking down definition of classes in separate notebook cells

Installation is as simple as pip install jdc

and usage is

import jdc

and start a cell with %%add_to <your class name>

python implementation

import torch
import torch.nn as nn
from torch import sigmoid
import jdc

class Net(nn.Module):
    def __init__(self, Layers):
        super(Net, self).__init__()
        self.hidden = nn.ModuleList()
        for input_size, output_size in zip(Layers, Layers[1:]):
            self.hidden.append(nn.Linear(input_size, output_size))

Layers = [2, 3, 4, 3]
model = Net(Layers)

%%add_to Net

def forward(self, x):
    L = len(self.hidden)
    for (l, linear_transform) in zip(range(L), self.hidden):
        if (l < L-1):
            x = torch.relu(linear_transform(x))
        else:
            x = linear_transform(x)
    return x

Ungraded lab - nn.ModuleList()

8.1.2mulitclassspiralrulu_v2.ipynb

Dropout

using nn.Module

class Net(nn.Module):
    def __init__(self, in_size, n_hidden, out_size, p=0):
        super(Net, self).__init__()
        self.drop = nn.Dropout(p=p)
        self.linear1 = nn.Linear(in_size, n_hidden)
        self.linear2 = nn.Linear(n_hidden, n_hidden)
        self.linear3 = nn.Linear(n_hidden, out_size)
    def forward(self, x):
        x=torch.relu(self.linear1(x))       
        x=self.drop(x)
        x=torch.relu(self.linear2(x))
        x=self.drop(x)
        x=self.linear3(x)
        return x

using nn.Sequential

model = nn.Sequential(
    nn.Linear(1, 10),
    nn.Dropout(0.5),
    nn.ReLU(),
    nn.Linear(10, 12),
    nn.Dropout(0.5),
    nn.ReLU(),
    nn.Linear(12, 1),
)

training

create data

from torch.utils.data import Dataset, DataLoader 
import numpy as np
# Create data class for creating dataset object

class Data(Dataset):
    
    # Constructor
    def __init__(self, N_SAMPLES=1000, noise_std=0.15, train=True):
        a = np.matrix([-1, 1, 2, 1, 1, -3, 1]).T
        self.x = np.matrix(np.random.rand(N_SAMPLES, 2))
        self.f = np.array(a[0] + (self.x) * a[1:3] + np.multiply(self.x[:, 0], self.x[:, 1]) * a[4] + np.multiply(self.x, self.x) * a[5:7]).flatten()
        self.a = a
       
        self.y = np.zeros(N_SAMPLES)
        self.y[self.f > 0] = 1
        self.y = torch.from_numpy(self.y).type(torch.LongTensor)
        self.x = torch.from_numpy(self.x).type(torch.FloatTensor)
        self.x = self.x + noise_std * torch.randn(self.x.size())
        self.f = torch.from_numpy(self.f)
        self.a = a
        if train == True:
            torch.manual_seed(1)
            self.x = self.x + noise_std * torch.randn(self.x.size())
            torch.manual_seed(0)
        
    # Getter        
    def __getitem__(self, index):    
        return self.x[index], self.y[index]
    
    # Get Length
    def __len__(self):
        return self.len
    
    # Plot the diagram
    def plot(self):
        X = data_set.x.numpy()
        y = data_set.y.numpy()
        h = .02
        x_min, x_max = X[:, 0].min(), X[:, 0].max()
        y_min, y_max = X[:, 1].min(), X[:, 1].max() 
        xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
        Z = data_set.multi_dim_poly(np.c_[xx.ravel(), yy.ravel()]).flatten()
        f = np.zeros(Z.shape)
        f[Z > 0] = 1
        f = f.reshape(xx.shape)
        
        plt.title('True decision boundary  and sample points with noise ')
        plt.plot(self.x[self.y == 0, 0].numpy(), self.x[self.y == 0,1].numpy(), 'bo', label='y=0') 
        plt.plot(self.x[self.y == 1, 0].numpy(), self.x[self.y == 1,1].numpy(), 'ro', label='y=1')
        plt.contour(xx, yy, f,cmap=plt.cm.Paired)
        plt.xlim(0,1)
        plt.ylim(0,1)
        plt.legend()
    
    # Make a multidimension ploynomial function
    def multi_dim_poly(self, x):
        x = np.matrix(x)
        out = np.array(self.a[0] + (x) * self.a[1:3] + np.multiply(x[:, 0], x[:, 1]) * self.a[4] + np.multiply(x, x) * self.a[5:7])
        out = np.array(out)
        return out

instantiate the model

model_drop = Net(2, 300, 2, p=0.5)

train method tells the model we are in the training phase which will implement the dropout method, later we use the eval method to tell the model it is in the evaluation phase and that will turn off the dropout method

model_drop.train()
optimizer = torch.optim.Adam(model_drop.parameters(), lr = 0.01)
criterion = nn.CrossEntropyLoss()
data_set = Data()
validation_set = Data(train=False)

# Initialize the LOSS dictionary to store the loss

LOSS = {}
LOSS['training data dropout'] = []
LOSS['validation data dropout'] = []

train the model

# Train the model
from tqdm import tqdm

epochs = 500

def train_model(epochs):
    
    for epoch in tqdm(range(epochs)):
        #all the samples are used for training 
        yhat_drop = model_drop(data_set.x)
        loss_drop = criterion(yhat_drop, data_set.y)

        #store the loss for both the training and validation data for both models 
        LOSS['training data dropout'].append(loss_drop.item())
        model_drop.eval()
        LOSS['validation data dropout'].append(criterion(model_drop(validation_set.x), validation_set.y).item())
        model_drop.train()

        optimizer.zero_grad()
        loss_drop.backward()
        optimizer.step()
        
train_model(epochs)

# The function for calculating accuracy

def accuracy(model, data_set):
    _, yhat = torch.max(model(data_set.x), 1)
    return (yhat == data_set.y).numpy().mean()

# Print out the accuracy of the model with dropout

print("The accuracy of the model with dropout: ", accuracy(model_drop, validation_set))
>> The accuracy of the model with dropout:  0.866

Ungraded lab - dropout classification

8.2.1dropoutPredictin_v2.ipynb

Ungraded lab - dropout regression

8.2.2dropoutRegression_v2.ipynb

Neural Network initialization weights

Different methods exist:

uniform distribution for parameters: we simply make the lower bound of the range of the distribution the negative of the inverse of square root of L in. the upper bound of the range of the distribution is the positive of the inverse of square root of L in. See this paper for more details. LeCun, Yann A., et al. “Efficient backprop.” Neural networks: Tricks of the trade. Springer, Berlin, Heidelberg, 2012. 9-48

linear=nn.Linear(input_size,output_size)
linear.weight.data.uniform_(0, 1)

xavier method: Xavier Initialization is another popular method and is used in conjunction with the tanh activation. It takes into consideration the number of input neurons “Lin” as well as the number of neuron in the next layer “L out”. This paper for more details: Glorot, Xavier and Yoshua Bengio. “Understanding the difficulty of training deep feedforward neural networks” 2010.

linear=nn.Linear(input_size,output_size)
torch.nn.init.xavier_uniform_(linear.weight)

He method: For relu we use the He initialize method, after we create a linear object, We use the following method to initialize the weights, for more info check out the following paper. He, Kaiming, et al. “Delving deep into rectifiers: surpassing human-level performance in imagenet classification”

linear = nn.Linear(input_size, output_size)
torch.nn.init.kaiming_uniform_(linear.weight, nonlinearity='relu')

Gradient Descent with Momentum

PyTorch implementation

In PyTorch, this is just defined at optim level

optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum = 0.4)

Ungraded lab - momentum with different polynomial

8.4.1_MomentumwithPolynomialFunctions_v2.ipynb

Ungraded lab - Neural Network momentum

8.4.2_NeuralNetworkswithMomentum_v2.ipynb

Batch Normalization

𝛾, 𝛽 parameters are are actually scale and shift parameters, which we’re going to learn via training.

using nn.Module

class Net_BatchNorm(nn.Module):
    def __init__(self, in_size, n_hidden1, n_hidden2, out_size):
        super(Net_BatchNorm, self).__init__()

        self.linear1 = nn.Linear(in_size, n_hidden1)
        self.linear2 = nn.Linear(n_hidden1, n_hidden2)
        self.linear3 = nn.Linear(n_hidden2, out_size)
        
        self.bn1 = nn.BatchNorm1d(n_hidden1)
        self.bn2 = nn.BatchNorm1d(n_hidden2)
        
    def forward(self, x):
        x=torch.sigmoid(self.bn1(self.linear1(x)))
        x=torch.sigmoid(self.bn2(self.linear2(x)))
        x=self.linear3(x)
        return x

Ungraded lab - Batch normalization

8.5.1BachNorm_v2.ipynb

comparing training loss for each iteration and accuracy on validation data for both Batch / No Batch normalization.

Week 6 - Convolutional neural networks

Learning Objectives

Convolution
Activation Functions
Max Pooling
Convolution: Multiple Channels
Convolutional Neural Network
TORCH-VISION MODELS

notebook

Convolution

convolution explanation from stanford course CS231

convolution

conv = nn.Conv2d(in_channels=1, out_channels=1, kernel_size=3)

image = torch.zeros(1,1,5,5)
image[0,0,:,2] = 1
image
>> tensor([[[[0., 0., 1., 0., 0.],
          [0., 0., 1., 0., 0.],
          [0., 0., 1., 0., 0.],
          [0., 0., 1., 0., 0.],
          [0., 0., 1., 0., 0.]]]])
z=conv(image)
>> tensor([[[[ 0.6065,  0.0728, -0.7915],
          [ 0.6065,  0.0728, -0.7915],
          [ 0.6065,  0.0728, -0.7915]]]], grad_fn=<ThnnConv2DBackward>)
conv.state_dict()
>> OrderedDict([('weight',
              tensor([[[[ 0.1132, -0.0418,  0.3140],
                        [-0.2261, -0.1528, -0.3270],
                        [-0.2140, -0.1900,  0.2127]]]])),
             ('bias', tensor([0.0423]))])

stride

conv = nn.Conv2d(in_channels=1, out_channels=1, kernel_size=3, stride = 2)

zeros padding

conv = nn.Conv2d(in_channels=1, out_channels=1, kernel_size=3, stride = 2, padding = 1)

size of activation map

Feature size = ((Image size + 2 * Padding size − Kernel size) / Stride)+1

Ungraded lab - What’s convolution

9.1What_is_Convolution.ipynb

Activation Functions and Max Polling

Activation function using nn.Module

import torch
image = torch.zeros(1,1,5,5)
image[0,0,:,2] = 1
conv = nn.Conv2d(in_channels=1, out_channels=1, kernel_size=3)
z=conv(image)
A = torch.relu(z)

Activation function using nn.Sequential

relu = nn.ReLU()
A = relu(z)

Max pooling

max = nn.MaxPool2d(2, stride=1)
max(image)

torch.max_pool2d(image, stride=1, kernel_size=2)

Ungraded lab - Activation Functions and Max Polling

9.2Activation_max_pooling.ipynb

Multiple Input and Output Channels

Ungraded lab - Activation Functions and Max Polling

9.3Multiple Channel Convolution.ipynb

Convolutional Neural Network

using nn.Module

class CNN(nn.Module):
    def __init__(self,out_1=2,out_2=1):
        
        super(CNN,self).__init__()
        #first Convolutional layers 
        self.cnn1=nn.Conv2d(in_channels=1,out_channels=out_1,kernel_size=2,padding=0)
        self.maxpool1=nn.MaxPool2d(kernel_size=2 ,stride=1)

        #second Convolutional layers
        self.cnn2=nn.Conv2d(in_channels=out_1,out_channels=out_2,kernel_size=2,stride=1,padding=0)
        self.maxpool2=nn.MaxPool2d(kernel_size=2 ,stride=1)
        #max pooling 

        #fully connected layer 
        self.fc1=nn.Linear(out_2*7*7,2)
        
    def forward(self,x):
        #first Convolutional layers
        x=self.cnn1(x)
        #activation function 
        x=torch.relu(x)
        #max pooling 
        x=self.maxpool1(x)
        #first Convolutional layers
        x=self.cnn2(x)
        #activation function
        x=torch.relu(x)
        #max pooling
        x=self.maxpool2(x)
        #flatten output 
        x=x.view(x.size(0),-1)
        #fully connected layer
        x=self.fc1(x)
        return x

training

n_epochs=10
cost_list=[]
accuracy_list=[]
N_test=len(validation_dataset)
cost=0
#n_epochs
for epoch in range(n_epochs):
    cost=0    
    for x, y in train_loader:
        #clear gradient 
        optimizer.zero_grad()
        #make a prediction 
        z=model(x)
        # calculate loss 
        loss=criterion(z,y)
        # calculate gradients of parameters 
        loss.backward()
        # update parameters 
        optimizer.step()
        cost+=loss.item()
    cost_list.append(cost)
    correct=0
    #perform a prediction on the validation  data  
    for x_test, y_test in validation_loader:
        z=model(x_test)
        _,yhat=torch.max(z.data,1)
        correct+=(yhat==y_test).sum().item()
    accuracy=correct/N_test
    accuracy_list.append(accuracy)

Ungraded lab - Convolutional Neural Network Simple example

9.4.1ConvolutionalNeralNetworkSimple example.ipynb

def conv_output_shape(h_w, kernel_size=1, stride=1, pad=0, dilation=1):
    #by Duane Nielsen
    from math import floor
    if type(kernel_size) is not tuple:
        kernel_size = (kernel_size, kernel_size)
    h = floor( ((h_w[0] + (2 * pad) - ( dilation * (kernel_size[0] - 1) ) - 1 )/ stride) + 1)
    w = floor( ((h_w[1] + (2 * pad) - ( dilation * (kernel_size[1] - 1) ) - 1 )/ stride) + 1)
    return h, w

out=conv_output_shape((11,11), kernel_size=2, stride=1, pad=0, dilation=1)
print(out)
out1=conv_output_shape(out, kernel_size=2, stride=1, pad=0, dilation=1)
print(out1)
out2=conv_output_shape(out1, kernel_size=2, stride=1, pad=0, dilation=1)
print(out2)

out3=conv_output_shape(out2, kernel_size=2, stride=1, pad=0, dilation=1)
print(out3)
>> (10, 10)
(9, 9)
(8, 8)
(7, 7)

Ungraded lab - Convolutional Neural Network MNIST

9.4.2CNN_Small_Image.ipynbb

Ungraded lab - Convolutional Neural Networks with Batch Norm

9.4.3CNN_Small_Image_batch.ipynb

GPU in PyTorch

torch.cuda.is_available()
>> True

device = torch.device('cuda:0')

torch.tensor([1,2,32,4]).to(device)
>> tensor([ 1,  2, 32,  4], device='cuda:0')

model = CNN()
model.to(device)

Training on GPU

for epoch in range(num_epochs):
    for features, labels in train_loader:
        features, labels = features.to(device), labels.to(device)
        optimizer.zero_grad()
        predictions = model(features)
        loss = criterion(predictions, labels)
        loss.backward()
        optimizer.step()

TORCH-VISION MODELS

load resnet18 with pretrained parameters

import torch
import torchvision.models as models
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import torch.nn as nn
torch.manual_seed(0)

model = models.resnet18(pretrained=True)

mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

composed = transforms.Compose([transforms.Resize(224),
                              transforms.ToTensor(),
                              transforms.Normalize(mean, std)])

train_dataset = Dataset(transform=composed, train = True)
validation_dataset = Dataset(transform=composed)

freeze parameters and add a final layer to be trained

for param in model.parameters():
    param.requires_grad=False
model.fc = nn.Linear(512, 7)

train_loader = DataLoader(dataset=train_loader, batch_size=15)
validation_loader = DataLoader(dataset=validation_loader, batch_size=10)

provides only parameters to be trained to optim

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam([parameters for parameters in model.parameters() if parameters.requires_grad], lr = 0.003)

N_EPOCHS = 20
loss_list = []
accuracy_list = []
correct = 0
n_test = len(validation_dataset)

train the model, switching to model.train and model.eval

for epoch in range(N_EPOCHS):
    loss_sublist = []
    for x, y in train_loader:
        model.train()
        optimizer.zero_grad()
        z = model(x)
        loss = criterion(z, y)
        loss_sublist.append(loss.data.item())
        loss.backward()
        optimizer.step()
    loss_list.append(np.mean(loss_sublist))
    correct = 0
    for x_test, y_test in validation_loader:
        model.eval()
        z = model(x_test)
        _, yhat = torch.max(z.data, 1)
        correct += (yhat == y_test).sum().item()
    accuracy = correct / n_test
    accuracy_list.append(accuracy)

Week 7 - Fashion MNIST

Learning Objectives

Apply all you have learned to train a Convolutional Neural Network