Skip to content


Merge pull request #76 from Ethan-Chen-plus/master
Browse files Browse the repository at this point in the history
update ch03
  • Loading branch information
KMnO4-zx authored Feb 3, 2024
2 parents 0f281a9 + 7f62429 commit ef0a5d5
Show file tree
Hide file tree
Showing 6 changed files with 550 additions and 32 deletions.
Empty file removed codes/.gitkeep
Empty file.
126 changes: 126 additions & 0 deletions codes/ch03/
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
# ------------------------------
#用代码验证电阻为$30 \Omega$的电流与电压的计算公式
# ------------------------------

import torch
import random

import numpy as np
from d2l import torch as d2l
# 生成数据
x = torch.randn(100, 1)
y = 30 * x

# 定义模型
model = torch.nn.Linear(1, 1)

# 定义损失函数和优化器
criterion = torch.nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

# 训练模型
for epoch in range(500):
# 前向传播
y_pred = model(x)

# 计算损失
loss = criterion(y_pred, y)

# 反向传播


# ------------------------------
# ------------------------------

x = torch.randn((2), requires_grad=True)
y = x ** 3
# 一阶导数
dy = torch.autograd.grad(y, x, grad_outputs=torch.ones(x.shape),
retain_graph=True, create_graph=True)

# 二阶导数
dy2 = torch.autograd.grad(dy, x, grad_outputs=torch.ones(x.shape))
dy[0] == 3 * x**2
dy2[0] == 6 * x

# ------------------------------
# 尝试使用不同的学习率,观察损失函数值下降的快慢。
# ------------------------------

# 生成数据集
def synthetic_data(w, b, num_examples): #@save
X = torch.normal(0, 1, (num_examples, len(w)))
y = torch.matmul(X, w) + b
y += torch.normal(0, 0.01, y.shape)
return X, y.reshape((-1, 1))

true_w = torch.tensor([2, -3.4])
true_b = 4.2
features, labels = synthetic_data(true_w, true_b, 1000)

# 读取数据集
def data_iter(batch_size, features, labels):
num_examples = len(features)
indices = list(range(num_examples))
# 这些样本是随机读取的,没有特定的顺序
for i in range(0, num_examples, batch_size):
batch_indices = torch.tensor(
indices[i: min(i + batch_size, num_examples)])
yield features[batch_indices], labels[batch_indices]

# 初始化参数
w = torch.normal(0, 0.01, size=(2,1), requires_grad=True)
b = torch.zeros(1, requires_grad=True)

def linreg(X, w, b): #@save
return torch.matmul(X, w) + b

def squared_loss(y_hat, y): #@save
return (y_hat - y.reshape(y_hat.shape)) ** 2 / 2

def sgd(params, lr, batch_size): #@save
with torch.no_grad():
for param in params:
param -= lr * param.grad / batch_size

lrs = [0.5, 0.3, 0.1, 0.01]
num_epochs = 10
net = linreg
loss = squared_loss

batch_size = 10

all_lrs = []
for lr in lrs:
train_lrs = []
for epoch in range(num_epochs):
for X, y in data_iter(batch_size, features, labels):
l = loss(net(X, w, b), y) # X和y的小批量损失
# 因为l形状是(batch_size,1),而不是一个标量。l中的所有元素被加到一起,
# 并以此计算关于[w,b]的梯度
sgd([w, b], lr, batch_size) # 使用参数的梯度更新参数
with torch.no_grad():
train_l = loss(net(features, w, b), labels)

epochs = np.arange(1, num_epochs+1)
d2l.plot(epochs, all_lrs, xlabel='epoch num', ylabel='loss',
legend=[f'learn rate {lr}' for lr in lrs],
figsize=(6, 4))
53 changes: 53 additions & 0 deletions codes/ch03/
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# ------------------------------
# ------------------------------
import numpy as np
import torch
from torch.utils import data
from d2l import torch as d2l

# 生成数据集
true_w = torch.tensor([2, -3.4])
true_b = 4.2
features, labels = d2l.synthetic_data(true_w, true_b, 1000)

# 读取数据集
def load_array(data_arrays, batch_size, is_train=True): #@save
dataset = data.TensorDataset(*data_arrays)
return data.DataLoader(dataset, batch_size, shuffle=is_train)

batch_size = 10
data_iter = load_array((features, labels), batch_size)

net = nn.Sequential(nn.Linear(2, 1))

# 使用Huber损失函数
loss = nn.HuberLoss()

trainer = torch.optim.SGD(net.parameters(), lr=0.03)

num_epochs = 3
for epoch in range(num_epochs):
for X, y in data_iter:
l = loss(net(X) ,y)
l = loss(net(features), labels)
print(f'epoch {epoch + 1}, loss {l:f}')

w = net[0]
print('w的估计误差:', true_w - w.reshape(true_w.shape))
b = net[0]
print('b的估计误差:', true_b - b)

# ------------------------------
# 如何访问线性回归的梯度:
# ------------------------------

w_grad = net[0].weight.grad
print('w的梯度:', w_grad)
b_grad = net[0].bias.grad
print('b的梯度:', b_grad)
13 changes: 13 additions & 0 deletions codes/ch03/
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# ------------------------------
# 自定义softmax函数
# ------------------------------

def softmax(X):
X_exp = np.exp(X)
partition = X_exp.sum(1, keepdims=True)
return X_exp / partition # 这里应用了广播机制

except Exception as e:

0 comments on commit ef0a5d5

Please sign in to comment.