1.导入依赖包
import torch
from torch import nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
%matplotlib inline
2.数据预处理
2.1读取数据集并查看前5条数据
data = pd.read_csv('HR.csv')
data.head()
2.2查看数据结构
data.shape
2.3查看数据信息
可以看到一共有10个特征,其中有两个是字符串类型,需要转成独热编码
data.info()
2.4查看一共有几个部门
data.part.unique()
2.5查看一共有几种安全程度
data.salary.unique()
2.6查看每个安全程度各部门有多少人
data.groupby(['salary', 'part']).size()
2.7对安全程度进行独热编码
pd.get_dummies(data.salary)
2.8将安全程度的独热编码加入数据集中,并将原来字符串特征进行删除
data = data.join(pd.get_dummies(data.salary))
del data['salary']
data.head()
2.9将部门进行独热编码并删除原来的字符串特征
data = data.join(pd.get_dummies(data.part))
del data['part']
data
3.0查看各个标签出现的次数
data.left.value_counts()
3.1改变Y的维度
Y_data = data.left.values.reshape(-1, 1)
Y_data.shape
3.2将Y转成张量
Y = torch.from_numpy(Y_data).type(torch.FloatTensor)
3.3将X转成张量
X_data = data[[c for c in data.columns if c != 'left']].values
X = torch.from_numpy(X_data).type(torch.FloatTensor)
3.4查看X和Y的维度
X.size(), Y.size()
4.搭建模型
4.1方法一:
class Model_1(nn.Module):
def __init__(self):
super().__init__()
self.linear_1 = nn.Linear(20, 64)
self.linear_2 = nn.Linear(64, 64)
self.linear_3 = nn.Linear(64, 1)
self.relu = nn.ReLU()
self.sigmoid = nn.Sigmoid()
def forward(self, input):
x = self.linear_1(input)
x = self.relu(x)
x = self.linear_2(x)
x = self.relu(x)
x = self.linear_3(x)
y = self.sigmoid(x)
return y
model_1 = Model_1()
model_1
4.2方法二:
class Model_2(nn.Module):
def __init__(self):
super().__init__()
self.linear_1 = nn.Linear(20, 64)
self.linear_2 = nn.Linear(64, 64)
self.linear_3 = nn.Linear(64, 1)
def forward(self, input):
x = F.relu(self.linear_1(input))
x = F.relu(self.linear_2(x))
y = F.sigmoid(self.linear_3(x))
return y
model_2 = Model_2()
model_2
5.设置超参数
lr = 0.0001 #学习率
loss_fn = nn.BCELoss() #损失函数
batch_size = 64 #批训练大小
iteration = len(data) // batch_size #需要训练的批数
epochs = 100 #所有数据训练的次数
6.获取模型
def get_model():
model = Model_2()
opt = torch.optim.Adam(model.parameters(), lr=lr)
return model, opt
model, optim = get_model()
7.训练模型
7.1方法一:用切分的方法切割数据集
for epoch in range(epochs):
for i in range(iteration):
start = i * batch_size
end = start + batch_size
x = X[start : end]
y = Y[start : end]
y_pred = model(x)
loss = loss_fn(y_pred, y)
optim.zero_grad()
loss.backward()
optim.step()
with torch.no_grad():
print('epoch: ', epoch, 'loss: ', loss_fn(model(X), Y).data.item())
7.2方法二:用TensorDataset的方法切割数据集
hr_dataset = TensorDataset(X, Y)
len(hr_dataset)
hr_dataset[66 : 68]
model, optim = get_model()
for epoch in range(epochs):
for i in range(iteration):
x, y = hr_dataset[i * batch_size: i * batch_size + batch_size]
y_pred = model(x)
loss = loss_fn(y_pred, y)
optim.zero_grad()
loss.backward()
optim.step()
with torch.no_grad():
print('epoch: ', epoch, 'loss: ', loss_fn(model(X), Y).data.item())
7.3方法三:用DataLoader的方法切割数据集
hr_ds = TensorDataset(X, Y)
hr_dl = DataLoader(hr_ds, batch_size=batch_size, shuffle=True)
model, optim = get_model()
for epoch in range(epochs):
for x, y in hr_dl:
y_pred = model(x)
loss = loss_fn(y_pred, y)
optim.zero_grad()
loss.backward()
optim.step()
with torch.no_grad():
print('epoch: ', epoch, 'loss: ', loss_fn(model(X), Y).data.item())
7.4方法四:用train_test_split的方法切割数据集,并将其划分为训练集和测试集
train_x, test_x, train_y, test_y = train_test_split(X_data, Y_data)
train_x = torch.from_numpy(train_x).type(torch.float32)
train_y = torch.from_numpy(train_y).type(torch.float32)
test_x = torch.from_numpy(test_x).type(torch.float32)
test_y = torch.from_numpy(test_y).type(torch.float32)
train_ds = TensorDataset(train_x, train_y)
train_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
test_ds = TensorDataset(test_x, test_y)
test_dl = DataLoader(test_ds, batch_size=batch_size, shuffle=True)
8.添加准确率
def acc(y_pred, y_true):
y_pred = y_pred = (y_pred > 0.5).type(torch.int32)
acc = (y_pred == y_true).float().mean()
return acc
model, optim = get_model()
for epoch in range(epochs):
for x, y in train_dl:
y_pred = model(x)
loss = loss_fn(y_pred, y)
optim.zero_grad()
loss.backward()
optim.step()
with torch.no_grad():
epoch_acc = acc(model(train_x), train_y)
epoch_loss = loss_fn(model(train_x), train_y).data
epoch_test_acc = acc(model(test_x), test_y)
epoch_test_loss = loss_fn(model(test_x), test_y).data
print('epoch: ', epoch,
'loss: ', round(epoch_loss.item(), 3),
'accuracy: ', round(epoch_acc.item(), 3),
'test_loss: ', round(epoch_test_loss.item(), 3),
'test_accuracy: ', round(epoch_test_acc.item(), 3))
转载请注明来源,欢迎对文章中的引用来源进行考证,欢迎指出任何有错误或不够清晰的表达。可以在下面评论区评论,也可以邮件至 2621041184@qq.com