In [1]:
import torch
import torchvision
import torch.nn as nn
import numpy as np
import torchvision.transforms as transforms
In [2]:
# Create tensors of shape (10, 3) and (10, 2)
x = torch.randn(10, 3)
y = torch.randn(10, 2)
In [3]:
# Build a fully connected layer
linear = nn.Linear(3, 2)
print ('w: ', linear.weight)
print ('b: ', linear.bias)
w: Parameter containing: tensor([[-0.4826, 0.1809, -0.5195], [ 0.0754, 0.3776, 0.2431]], requires_grad=True) b: Parameter containing: tensor([ 0.4482, -0.2941], requires_grad=True)
In [4]:
# Loss function and optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.SGD(linear.parameters(), lr=0.01)
In [5]:
# Forward pass
pred = linear(x)
In [6]:
# Compute loss
loss = criterion(pred, y)
print('loss: ', loss.item())
loss: 0.8285614252090454
In [7]:
# Backward pass
loss.backward()
In [8]:
# Print out the gradients
print ('dL/dw: ', linear.weight.grad)
print ('dL/db: ', linear.bias.grad)
dL/dw: tensor([[-0.0552, -0.0734, -0.3314], [ 0.4246, 0.3199, 0.0844]]) dL/db: tensor([-0.3375, 0.1542])
In [9]:
# 1-step gradient descent
optimizer.step()
In [10]:
# Print out the loss after 1-step gradient descent.
pred = linear(x)
loss = criterion(pred, y)
print('loss after 1 step optimization: ', loss.item())
loss after 1 step optimization: 0.8231405019760132