多层感知器

1.导入依赖包

import torch
from torch import nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
%matplotlib inline

2.数据预处理

2.1读取数据集并查看前5条数据

data = pd.read_csv('HR.csv')
data.head()

2.2查看数据结构

data.shape

2.3查看数据信息

可以看到一共有10个特征,其中有两个是字符串类型,需要转成独热编码

data.info()

2.4查看一共有几个部门

data.part.unique()

2.5查看一共有几种安全程度

data.salary.unique()

2.6查看每个安全程度各部门有多少人

data.groupby(['salary', 'part']).size()

2.7对安全程度进行独热编码

pd.get_dummies(data.salary)

2.8将安全程度的独热编码加入数据集中,并将原来字符串特征进行删除

data = data.join(pd.get_dummies(data.salary))
del data['salary']
data.head()

2.9将部门进行独热编码并删除原来的字符串特征

data = data.join(pd.get_dummies(data.part))
del data['part']
data

3.0查看各个标签出现的次数

data.left.value_counts()

3.1改变Y的维度

Y_data = data.left.values.reshape(-1, 1)
Y_data.shape

3.2将Y转成张量

Y = torch.from_numpy(Y_data).type(torch.FloatTensor)

3.3将X转成张量

X_data = data[[c for c in data.columns if c != 'left']].values
X = torch.from_numpy(X_data).type(torch.FloatTensor)

3.4查看X和Y的维度

X.size(), Y.size()

4.搭建模型

4.1方法一:

class Model_1(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear_1 = nn.Linear(20, 64)
        self.linear_2 = nn.Linear(64, 64)
        self.linear_3 = nn.Linear(64, 1)
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()

    def forward(self, input):
        x = self.linear_1(input)
        x = self.relu(x)
        x = self.linear_2(x)
        x = self.relu(x)
        x = self.linear_3(x)
        y = self.sigmoid(x)
        return y

model_1 = Model_1()
model_1

4.2方法二:

class Model_2(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear_1 = nn.Linear(20, 64)
        self.linear_2 = nn.Linear(64, 64)
        self.linear_3 = nn.Linear(64, 1)

    def forward(self, input):
        x = F.relu(self.linear_1(input))
        x = F.relu(self.linear_2(x))
        y = F.sigmoid(self.linear_3(x))
        return y

model_2 = Model_2()
model_2

5.设置超参数

lr = 0.0001                              #学习率
loss_fn = nn.BCELoss()                   #损失函数
batch_size = 64                          #批训练大小
iteration = len(data) // batch_size      #需要训练的批数
epochs = 100                             #所有数据训练的次数

6.获取模型

def get_model():
    model = Model_2()
    opt = torch.optim.Adam(model.parameters(), lr=lr)
    return model, opt

model, optim = get_model()

7.训练模型

7.1方法一:用切分的方法切割数据集

for epoch in range(epochs):
    for i in range(iteration):
        start = i * batch_size
        end = start + batch_size
        x = X[start : end]
        y = Y[start : end]
        y_pred = model(x)
        loss = loss_fn(y_pred, y)
        optim.zero_grad()
        loss.backward()
        optim.step()
    with torch.no_grad():
        print('epoch: ', epoch, 'loss: ', loss_fn(model(X), Y).data.item())

7.2方法二:用TensorDataset的方法切割数据集

hr_dataset = TensorDataset(X, Y)
len(hr_dataset)
hr_dataset[66 : 68]
model, optim = get_model()
for epoch in range(epochs):
    for i in range(iteration):
        x, y = hr_dataset[i * batch_size: i * batch_size + batch_size]
        y_pred = model(x)
        loss = loss_fn(y_pred, y)
        optim.zero_grad()
        loss.backward()
        optim.step()
    with torch.no_grad():
        print('epoch: ', epoch, 'loss: ', loss_fn(model(X), Y).data.item())

7.3方法三:用DataLoader的方法切割数据集

hr_ds = TensorDataset(X, Y)
hr_dl = DataLoader(hr_ds, batch_size=batch_size, shuffle=True)
model, optim = get_model()
for epoch in range(epochs):
    for x, y in hr_dl:
        y_pred = model(x)
        loss = loss_fn(y_pred, y)
        optim.zero_grad()
        loss.backward()
        optim.step()
    with torch.no_grad():
        print('epoch: ', epoch, 'loss: ', loss_fn(model(X), Y).data.item())

7.4方法四:用train_test_split的方法切割数据集,并将其划分为训练集和测试集

train_x, test_x, train_y, test_y = train_test_split(X_data, Y_data)
train_x = torch.from_numpy(train_x).type(torch.float32)
train_y = torch.from_numpy(train_y).type(torch.float32)
test_x = torch.from_numpy(test_x).type(torch.float32)
test_y = torch.from_numpy(test_y).type(torch.float32)
train_ds = TensorDataset(train_x, train_y)
train_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
test_ds = TensorDataset(test_x, test_y)
test_dl = DataLoader(test_ds, batch_size=batch_size, shuffle=True)

8.添加准确率

def acc(y_pred, y_true):
    y_pred = y_pred = (y_pred > 0.5).type(torch.int32)
    acc = (y_pred == y_true).float().mean()
    return acc
model, optim = get_model()
for epoch in range(epochs):
    for x, y in train_dl:
        y_pred = model(x)
        loss = loss_fn(y_pred, y)
        optim.zero_grad()
        loss.backward()
        optim.step()
    with torch.no_grad():
        epoch_acc = acc(model(train_x), train_y)
        epoch_loss = loss_fn(model(train_x), train_y).data
        epoch_test_acc = acc(model(test_x), test_y)
        epoch_test_loss = loss_fn(model(test_x), test_y).data
        print('epoch: ', epoch, 
              'loss: ', round(epoch_loss.item(), 3), 
              'accuracy: ', round(epoch_acc.item(), 3), 
              'test_loss: ', round(epoch_test_loss.item(), 3), 
              'test_accuracy: ', round(epoch_test_acc.item(), 3))

转载请注明来源,欢迎对文章中的引用来源进行考证,欢迎指出任何有错误或不够清晰的表达。可以在下面评论区评论,也可以邮件至 2621041184@qq.com