Intro ANN#

  • Review numpy and pytorch

  • Perceptron with numpy and pytorch

  • Training a basic network with pytorch

import numpy as np
import matplotlib.pyplot as plt
import torch
from tqdm import tqdm
---------------------------------------------------------------------------
ModuleNotFoundError                       Traceback (most recent call last)
Cell In[1], line 3
      1 import numpy as np
      2 import matplotlib.pyplot as plt
----> 3 import torch
      4 from tqdm import tqdm

ModuleNotFoundError: No module named 'torch'

Derivatives and Gradients for Optimization#

Consider:

\[f(x) = x^2\]

The big idea is that we can use the tangent line at a point to approximate the function itself. Thus, we walk on the tangent line in the negative direction of the slope. If we take small enough steps and readjust each iteration we should find ourselves at the bottom of the graph.

\[x_n = x_{n-1} - \alpha f'(x_{n-1})\]

Start at the point \((1, 1)\) and note that the slope of the tangent line is \(f'(1) = 2*1 = 2\).

def f(x): return x**2
x = np.linspace(-2, 2, 100)
plt.plot(x, f(x))
plt.plot(1, 1, 'ro')
[<matplotlib.lines.Line2D at 0x7ce687367880>]
_images/ea46f88f4addd8df1130ea193559680a3affcfcead2e45d044b862651d190a0a.png
#start - step * slope
1 - .1*(2*1)
0.8
#next step
.8 - .1*(2*.8)
0.64

PROBLEM

Complete the loop below, updating the list xs with each iteration of gradient descent.

xs = [1]
for i in range(10):
  pass
xs = np.array(xs)
plt.plot(x, f(x))
plt.plot(xs, f(xs), 'ro')
plt.title('Did we find the minimum?')
plt.grid();
_images/673acac393a19ca95aa1c377655f2b241bd90bdfe847f312814001e639b4b9a2.png

Introduction to Artificial Neural Networks#

For the examples in our class, we will use the pytorch library for modeling with neural networks. The important object here is the tensor object, similar to the numpy.array but with some extra gradient functionality.

Playground

import torch
import torch.nn as nn
import torch.optim as optim
xt = torch.tensor(x, dtype = torch.float32)
yt = torch.tensor(f(x), dtype = torch.float32)
loss_fn = nn.MSELoss()
def model(x, w, b):
  return w*x**2 + b
w = torch.ones((), requires_grad=True)
b = torch.ones((), requires_grad=True)
model(xt, w, b)
tensor([5.0000, 4.8400, 4.6833, 4.5298, 4.3797, 4.2327, 4.0891, 3.9487, 3.8115,
        3.6777, 3.5471, 3.4198, 3.2957, 3.1749, 3.0573, 2.9431, 2.8321, 2.7243,
        2.6198, 2.5186, 2.4207, 2.3260, 2.2346, 2.1464, 2.0615, 1.9799, 1.9015,
        1.8264, 1.7546, 1.6861, 1.6208, 1.5587, 1.4999, 1.4444, 1.3922, 1.3432,
        1.2975, 1.2551, 1.2159, 1.1800, 1.1473, 1.1179, 1.0918, 1.0690, 1.0494,
        1.0331, 1.0200, 1.0102, 1.0037, 1.0004, 1.0004, 1.0037, 1.0102, 1.0200,
        1.0331, 1.0494, 1.0690, 1.0918, 1.1179, 1.1473, 1.1800, 1.2159, 1.2551,
        1.2975, 1.3432, 1.3922, 1.4444, 1.4999, 1.5587, 1.6208, 1.6861, 1.7546,
        1.8264, 1.9015, 1.9799, 2.0615, 2.1464, 2.2346, 2.3260, 2.4207, 2.5186,
        2.6198, 2.7243, 2.8321, 2.9431, 3.0573, 3.1749, 3.2957, 3.4198, 3.5471,
        3.6777, 3.8115, 3.9487, 4.0891, 4.2327, 4.3797, 4.5298, 4.6833, 4.8400,
        5.0000], grad_fn=<AddBackward0>)
yhat = model(xt, w, b)
loss = loss_fn(yt, yhat)
loss.backward()
w.grad
tensor(2.7205)
b.grad
tensor(2.)
#loss.zero_()
optimizer = optim.SGD([w, b], lr = 0.1)
for epoch in tqdm(range(20)):
  yhat = model(xt, w, b)
  loss = loss_fn(yt, yhat)
  loss.backward()
  optimizer.step()
  optimizer.zero_grad()
100%|██████████| 20/20 [00:00<00:00, 2071.62it/s]
w
tensor(0.9263, requires_grad=True)
b
tensor(0.1602, requires_grad=True)
def soln(x): return w.detach().numpy()*x**2 + b.detach().numpy()
plt.plot(x, f(x))
plt.plot(x, soln(x), '--r')
[<matplotlib.lines.Line2D at 0x7ce6874dfd00>]
_images/90b9eb0ad323093f5cc161c7ca8dc75ddf6a7cf3a7b86864feb3d18741352174.png

Pytorch and Regression#

np.random.seed(11)
x = np.random.random_integers(low = 1, high = 30, size = 15)
y = 3*x + 4 + np.random.normal(size = len(x), scale = 3)
<ipython-input-616-75502d753c78>:2: DeprecationWarning: This function is deprecated. Please call randint(1, 30 + 1) instead
  x = np.random.random_integers(low = 1, high = 30, size = 15)
plt.scatter(x, y)
<matplotlib.collections.PathCollection at 0x7ce685bd5150>
_images/6ca7e60eac77985752edc623d58ff78fcbd5eb8a7e943aecd8d1c92985331730.png
#rule 1 -- turn everything into a tensor
xt = torch.tensor(x, dtype = torch.float32)
yt = torch.tensor(y, dtype = torch.float32)
xt
tensor([26., 17., 28., 18., 24., 14., 13.,  2.,  8., 19., 25., 14., 29., 17.,
        30.])
yt
tensor([80.3901, 55.9462, 89.2632, 54.8032, 73.3413, 44.5728, 45.0690, 11.6836,
        24.0834, 57.6416, 81.2105, 50.7239, 90.9068, 52.9497, 97.2869])
### define the model
model = nn.Linear(in_features=1, out_features=1)
list(model.parameters())
[Parameter containing:
 tensor([[-0.2880]], requires_grad=True),
 Parameter containing:
 tensor([-0.1145], requires_grad=True)]
loss_fn = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr = 0.01)
model(xt.reshape(-1, 1))
tensor([[-7.6035],
        [-5.0112],
        [-8.1796],
        [-5.2992],
        [-7.0275],
        [-4.1471],
        [-3.8590],
        [-0.6906],
        [-2.4188],
        [-5.5873],
        [-7.3155],
        [-4.1471],
        [-8.4676],
        [-5.0112],
        [-8.7557]], grad_fn=<AddmmBackward0>)
yhat = model(xt.reshape(-1, 1))
loss_fn(yt.reshape(-1, 1), yhat )
tensor(5064.9062, grad_fn=<MseLossBackward0>)
loss = loss_fn(yhat, yt.reshape(-1, 1))
loss.backward()
optimizer.step()
list(model.parameters())
[Parameter containing:
 tensor([[28.8443]], requires_grad=True),
 Parameter containing:
 tensor([1.2100], requires_grad=True)]
model = nn.Linear(in_features=1, out_features=1)
loss_fn = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr = 0.1)

losses = []
for epoch in tqdm(range(100)):
  yhat = model(xt.reshape(-1, 1))
  loss = loss_fn( yhat, yt.reshape(-1, 1))
  optimizer.zero_grad()
  loss.backward()
  optimizer.step()
  losses.append(loss.item())
100%|██████████| 100/100 [00:00<00:00, 1651.63it/s]
fig, ax = plt.subplots(1, 2, figsize = (15, 5))
ax[0].plot(losses, '--r')
ax[0].set_title('Training Loss')
ax[0].grid();

ax[1].scatter(x, y)
ax[1].plot(x, model(xt.reshape(-1, 1)).detach().numpy(), '-r')
ax[1].set_title('Trained Model')
ax[1].grid();
_images/1da02e4c4abd51ba168addf2167f710e3bd384eaf5e47d68d6a7ca455703ebac.png
import pandas as pd

Binary Classification#

Same process but we add a Sigmoid to the end of the network to interpret the output as probabilities.

Loss will use BCELoss or Binary Cross Entropy – a measure associated with the quality of predictions in binary classification.

\[\text{BCE Loss} = -\frac{1}{n} \sum_{i = 1}^n y_i \log(p_i) + (1 - y_i)\log(1 - p_i)\]
from sklearn.datasets import make_blobs
### make a basic classification dataset
X, y = make_blobs(centers = 2, center_box=(-3, 3), random_state = 22)
X.shape
(100, 2)
plt.scatter(X[:, 0], X[:, 1], c = y)
plt.grid()
plt.title('Binary Classification Data');
_images/99bc0bec019d85e38039be1439769d4f7979ad5b90b3413de85c6d15dc70e1fe.png
model = nn.Sequential(nn.Linear(in_features=2, out_features=1), nn.Sigmoid())
# WE NEED BINARY CLASSIFICATION LOSS
loss = nn.BCELoss()
optimizer = optim.SGD(model.parameters(), lr = 0.1)
X = torch.tensor(X, dtype = torch.float32)
y = torch.tensor(y, dtype = torch.float32)
losses = []
for epoch in tqdm(range(100)):
  #make some predictions
  yhat = model(X)
  #evaluate the predictions
  loss_val = loss(yhat, y.unsqueeze(1))
  #use those predictions to update the parameters
  optimizer.zero_grad() #clearing out any prior gradient info
  loss_val.backward() #computes derivatives/gradients of loss function
  optimizer.step() #steps towards minimum values
  #keep track of how we are doing
  losses.append(loss_val.item())
100%|██████████| 100/100 [00:00<00:00, 2096.67it/s]
yhat.dtype
torch.float32
#the model returns probabilities
model(X)[:10]
tensor([[0.7954],
        [0.0077],
        [0.0995],
        [0.0091],
        [0.0183],
        [0.8861],
        [0.9653],
        [0.0081],
        [0.1589],
        [0.9119]], grad_fn=<SliceBackward0>)
torch.where(model(X) > 0.5, 1, 0).flatten()
tensor([1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0,
        0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1,
        1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0,
        1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0,
        1, 1, 0, 0])
y
tensor([1., 0., 0., 0., 0., 1., 1., 0., 0., 1., 0., 0., 1., 0., 0., 1., 1., 1.,
        1., 1., 1., 0., 1., 0., 0., 1., 0., 0., 0., 1., 1., 1., 0., 0., 1., 0.,
        1., 1., 0., 0., 0., 0., 1., 1., 0., 0., 1., 1., 1., 1., 0., 1., 0., 0.,
        1., 0., 1., 1., 1., 0., 1., 0., 0., 1., 0., 0., 1., 0., 1., 0., 0., 0.,
        1., 0., 0., 1., 0., 0., 1., 1., 1., 1., 1., 1., 1., 0., 0., 1., 0., 0.,
        1., 1., 1., 1., 0., 0., 1., 1., 0., 0.])
torch.where(model(X) > 0.5, 1, 0).flatten() == y
tensor([ True,  True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True, False,  True,  True,  True,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True,  True])
(torch.where(model(X) > 0.5, 1, 0).flatten() == y).sum()
tensor(99)
X.shape
torch.Size([100, 2])
yhat = torch.where(model(X) > 0, 1, 0)
yhat.shape
torch.Size([100, 1])
y == yhat.flatten()
tensor([ True, False, False, False, False,  True,  True, False, False,  True,
        False, False,  True, False, False,  True,  True,  True,  True,  True,
         True, False,  True, False, False,  True, False, False, False,  True,
         True,  True, False, False,  True, False,  True,  True, False, False,
        False, False,  True,  True, False, False,  True,  True,  True,  True,
        False,  True, False, False,  True, False,  True,  True,  True, False,
         True, False, False,  True, False, False,  True, False,  True, False,
        False, False,  True, False, False,  True, False, False,  True,  True,
         True,  True,  True,  True,  True, False, False,  True, False, False,
         True,  True,  True,  True, False, False,  True,  True, False, False])

Image Classification#

from torchvision.datasets import FashionMNIST
from torch.utils.data import DataLoader
from torchvision.transforms import ToTensor
train = FashionMNIST(root = './data', train = True, download = True, transform = ToTensor())
test = FashionMNIST(root = './data', train = False, download = True, transform=ToTensor())
trainloader = DataLoader(train, batch_size = 10, shuffle = True)
testloader = DataLoader(test, batch_size = 10, shuffle = False)
plt.imshow(train[0][0].squeeze(0), cmap = 'gray')
<matplotlib.image.AxesImage at 0x7ce685789810>
_images/88678bdacd7b14a8341296df31458f32781cef8476500ceb3484b38bddfb6da2.png
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = nn.Sequential(nn.Flatten(),
                      nn.Linear(in_features=28*28, out_features=10))
model = model.to(device)
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr = 0.1)
device
device(type='cuda')
for epoch in tqdm(range(10)):
  for x, y in trainloader:
    x = x.to(device)
    y = y.to(device)
    yhat = model(x)
    loss = loss_fn(yhat, y)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
100%|██████████| 10/10 [02:11<00:00, 13.19s/it]
model(x).argmax(dim = 1)
tensor([7, 5, 4, 4, 3, 4, 8, 3, 7, 4], device='cuda:0')
y
tensor([7, 5, 4, 4, 3, 4, 8, 3, 7, 4], device='cuda:0')
(model(x).argmax(dim = 1) == y).sum()
tensor(10, device='cuda:0')
correct = 0
total = 0
for x, y in testloader:
    x = x.to(device)
    y = y.to(device)
    yhat = model(x)
    correct += (yhat.argmax(dim = 1) == y).sum()
    total += len(y)
print(f'Accuracy: {correct/total}')
Accuracy: 0.8136999607086182