




大作业1 :手写数字识别系统。
(1)使用MNIST手写数字数据集,进行手写数字识别(参考课本P186, 例6.2 )。


显卡:NVIDIA显卡,CUDA 11.7。
IDE:Pycharm Community集成开发环境,Jupyter Notebook



4.1 分析与设计思路与流程图



4.2 主要算法代码


import numpy as np  # numpy数组库
import math  # 数学运算库
import matplotlib.pyplot as plt  # 画图库
import torch  # torch基础库
import torchvision.datasets as dataset  # 公开数据集的下载和管理
import torchvision.transforms as transforms  # 公开数据集的预处理库,格式转换
import torchvision.utils as utils
import torch.utils.data as data_utils  # 对数据集进行分批加载的工具集
from torch.utils import data
from d2l import torch as d2l
from torch import nn
net = nn.Sequential(# 这里,我们使用一个11*11的更大窗口来捕捉对象。# 同时,步幅为4,以减少输出的高度和宽度。# 另外,输出通道的数目远大于LeNetnn.Conv2d(1, 96, kernel_size=11, stride=4, padding=1), nn.ReLU(),nn.MaxPool2d(kernel_size=3, stride=2),# 减小卷积窗口,使用填充为2来使得输入与输出的高和宽一致,且增大输出通道数nn.Conv2d(96, 256, kernel_size=5, padding=2), nn.ReLU(),nn.MaxPool2d(kernel_size=3, stride=2),# 使用三个连续的卷积层和较小的卷积窗口。# 除了最后的卷积层,输出通道的数量进一步增加。# 在前两个卷积层之后,汇聚层不用于减少输入的高度和宽度nn.Conv2d(256, 384, kernel_size=3, padding=1), nn.ReLU(),nn.Conv2d(384, 384, kernel_size=3, padding=1), nn.ReLU(),nn.Conv2d(384, 256, kernel_size=3, padding=1), nn.ReLU(),nn.MaxPool2d(kernel_size=3, stride=2),nn.Flatten(),# 这里,全连接层的输出数量是LeNet中的好几倍。使用dropout层来减轻过拟合nn.Linear(6400, 4096), nn.ReLU(),nn.Dropout(p=0.5),nn.Linear(4096, 4096), nn.ReLU(),nn.Dropout(p=0.5),# 最后是输出层。由于这里使用MNIST,所以用类别数为10,而非论文中的1000nn.Linear(4096, 10))
X = torch.randn(1, 1, 224, 224)#随机初值
for layer in net:#用随机权重参数初始化CNNX=layer(X)print(layer.__class__.__name__,'output shape:\t',X.shape)
def load_data_mnist(batch_size, resize=None):#读取、加载MNIST数据集,并batchtrans = [transforms.ToTensor()]if resize:trans.insert(0, transforms.Resize(resize))trans = transforms.Compose(trans)mnist_train = dataset.MNIST(root="../data", train=True, transform=trans, download=True)mnist_test = dataset.MNIST(root="../data", train=False, transform=trans, download=True)return (data.DataLoader(mnist_train, batch_size, shuffle=True,num_workers=4),data.DataLoader(mnist_test, batch_size, shuffle=False,num_workers=4))
batch_size = 128
train_iter, test_iter = load_data_mnist(batch_size=batch_size,resize=224)
for i, (X, y) in enumerate(train_iter):device = torch.device("cuda" if torch.cuda.is_available() else "cpu")X, y = X.to(device), y.to(device)print("X:",X.shape)print("y:",y.shape)
def evaluate_accuracy_gpu(net, data_iter, device=None): #@save"""使用GPU计算模型在数据集上的精度"""if isinstance(net, nn.Module):net.eval()  # 设置为评估模式if not device:device = next(iter(net.parameters())).device# 正确预测的数量,总预测的数量metric = d2l.Accumulator(2)with torch.no_grad():for X, y in data_iter:if isinstance(X, list):# BERT微调所需的X = [x.to(device) for x in X]else:X = X.to(device)y = y.to(device)metric.add(d2l.accuracy(net(X), y), y.numel())return metric[0] / metric[1]
def train_ch6(net, train_iter, test_iter, num_epochs, lr, device):"""用GPU训练模型(在第六章定义)"""def init_weights(m):if type(m) == nn.Linear or type(m) == nn.Conv2d:nn.init.xavier_uniform_(m.weight)net.apply(init_weights)print('training on', device)net.to(device)optimizer = torch.optim.SGD(net.parameters(), lr=lr)loss = nn.CrossEntropyLoss()animator = d2l.Animator(xlabel='epoch', xlim=[1, num_epochs],legend=['train loss', 'train acc', 'test acc'])timer, num_batches = d2l.Timer(), len(train_iter)for epoch in range(num_epochs):# 训练损失之和,训练准确率之和,样本数metric = d2l.Accumulator(3)net.train()for i, (X, y) in enumerate(train_iter):timer.start()optimizer.zero_grad()X, y = X.to(device), y.to(device)y_hat = net(X)l = loss(y_hat, y)l.backward()optimizer.step()with torch.no_grad():metric.add(l * X.shape[0], d2l.accuracy(y_hat, y), X.shape[0])timer.stop()train_l = metric[0] / metric[2]train_acc = metric[1] / metric[2]if (i + 1) % (num_batches // 5) == 0 or i == num_batches - 1:animator.add(epoch + (i + 1) / num_batches,(train_l, train_acc, None))test_acc = evaluate_accuracy_gpu(net, test_iter)animator.add(epoch + 1, (None, None, test_acc))animator.show()print(f'loss {train_l:.3f}, train acc {train_acc:.3f}, 'f'test acc {test_acc:.3f}')print(f'{metric[2] * num_epochs / timer.sum():.1f} examples/sec 'f'on {str(device)}')lr, num_epochs = 0.01, 10
train_ch6(net, train_iter, test_iter, num_epochs, lr, d2l.try_gpu())torch.save(net, "cnn.pt")


from PIL import Image, ImageDraw, ImageFont
from PyQt5.QtWidgets import (QMainWindow, QMenuBar, QToolBar, QTextEdit, QAction, QApplication,qApp, QMessageBox, QFileDialog, QLabel, QHBoxLayout, QGroupBox,QComboBox, QGridLayout, QLineEdit, QSlider, QPushButton)
from PyQt5.QtGui import *
from PyQt5.QtGui import QPalette, QImage, QPixmap, QBrush
from PyQt5.QtCore import *
import sys
import cv2 as cv
import cv2
import numpy as np
import time
from pylab import *
import os
from torchvision import transforms
from PIL import Image
import torch
import numpy as npclass Window(QMainWindow):path = ' 'change_path = "change.png"#被处理过的图像的路径IMG1 = ' 'IMG2 = 'null'def __init__(self):super(Window, self).__init__()# 界面初始化self.createMenu()#创建左上角菜单栏self.cwd = os.getcwd()#当前工作目录self.image_show()self.label1 = QLabel(self)self.initUI()# 菜单栏def createMenu(self):# menubar = QMenuBar(self)menubar = self.menuBar()menu1 = menubar.addMenu("文件")menu1.addAction("打开")menu1.triggered[QAction].connect(self.menu1_process)#展示大图片def image_show(self):self.lbl = QLabel(self)self.lbl.setPixmap(QPixmap('source.png'))self.lbl.setAlignment(Qt.AlignCenter)  # 图像显示区,居中self.lbl.setGeometry(35, 35, 800, 700)self.lbl.setStyleSheet("border: 2px solid black")def initUI(self):self.setGeometry(50, 50, 900, 800)self.setWindowTitle('mnist识别系统')palette = QPalette()palette.setColor(self.backgroundRole(), QColor(255, 255, 255))self.setPalette(palette)self.label1.setText("TextLabel")self.label1.move(100,730)self.show()# 菜单1处理def menu1_process(self, q):self.path = QFileDialog.getOpenFileName(self, '打开文件', self.cwd,"All Files (*);;(*.bmp);;(*.tif);;(*.png);;(*.jpg)")self.image = cv.imread(self.path[0])self.lbl.setPixmap(QPixmap(self.path[0]))cv2.imwrite(self.change_path, self.image)transforms1 = transforms.Compose([transforms.ToTensor()])self.label1.setText("识别中")img = Image.open(self.change_path)img = img.convert("L")img = img.resize((224, 224))tensor = transforms1(img)print(tensor.shape)tensor = tensor.type(torch.FloatTensor)device = torch.device("cuda" if torch.cuda.is_available() else "cpu")tensor = tensor.to(device)tensor = tensor.reshape((1, 1, 224, 224))print(tensor.shape)y = net(tensor)print(y)print(torch.argmax(y))self.label1.setText(str(int(torch.argmax(y))))
if __name__ == '__main__':net = torch.load('cnn.pt')app = QApplication(sys.argv)ex = Window()ex.show()sys.exit(app.exec_())

4.3 改进与拓展

1.重新配置了环境,安装了基于GPU版本的Pytorch。与CPU版本相比,GPU能够支持更快的卷积运算。同时,更大的显存也能够支持将更多的数据batch分批处理,有利于训练的稳定性防止“梯度消失”。在CPU版本中,若batch size过大则可能出现爆内存等问题。



D:\ProgramData\Anaconda3\python.exe F:/mnist/mlhw3.py
Conv2d output shape:     torch.Size([1, 96, 54, 54])
ReLU output shape:   torch.Size([1, 96, 54, 54])
MaxPool2d output shape:  torch.Size([1, 96, 26, 26])
Conv2d output shape:     torch.Size([1, 256, 26, 26])
ReLU output shape:   torch.Size([1, 256, 26, 26])
MaxPool2d output shape:  torch.Size([1, 256, 12, 12])
Conv2d output shape:     torch.Size([1, 384, 12, 12])
ReLU output shape:   torch.Size([1, 384, 12, 12])
Conv2d output shape:     torch.Size([1, 384, 12, 12])
ReLU output shape:   torch.Size([1, 384, 12, 12])
Conv2d output shape:     torch.Size([1, 256, 12, 12])
ReLU output shape:   torch.Size([1, 256, 12, 12])
MaxPool2d output shape:  torch.Size([1, 256, 5, 5])
Flatten output shape:    torch.Size([1, 6400])
Linear output shape:     torch.Size([1, 4096])
ReLU output shape:   torch.Size([1, 4096])
Dropout output shape:    torch.Size([1, 4096])
Linear output shape:     torch.Size([1, 4096])
ReLU output shape:   torch.Size([1, 4096])
Dropout output shape:    torch.Size([1, 4096])
Linear output shape:     torch.Size([1, 10])
training on cuda:0
<Figure size 350x250 with 1 Axes>
<Figure size 350x250 with 1 Axes>
<Figure size 350x250 with 1 Axes>
<Figure size 350x250 with 1 Axes>
<Figure size 350x250 with 1 Axes>
<Figure size 1920x951 with 1 Axes>
<Figure size 1920x951 with 1 Axes>
<Figure size 1920x951 with 1 Axes>
<Figure size 1920x951 with 1 Axes>
<Figure size 1920x951 with 1 Axes>
<Figure size 1920x951 with 1 Axes>
<Figure size 1920x951 with 1 Axes>
<Figure size 1920x951 with 1 Axes>
<Figure size 1920x951 with 1 Axes>
<Figure size 1920x951 with 1 Axes>
<Figure size 1920x951 with 1 Axes>
<Figure size 1920x951 with 1 Axes>
<Figure size 1920x951 with 1 Axes>
<Figure size 1920x951 with 1 Axes>
<Figure size 1920x951 with 1 Axes>
<Figure size 1920x951 with 1 Axes>
<Figure size 1920x951 with 1 Axes>
<Figure size 1920x951 with 1 Axes>
<Figure size 1920x951 with 1 Axes>
<Figure size 1920x951 with 1 Axes>
<Figure size 1920x951 with 1 Axes>
<Figure size 1920x951 with 1 Axes>
<Figure size 1920x951 with 1 Axes>
<Figure size 1920x951 with 1 Axes>
<Figure size 1920x951 with 1 Axes>
<Figure size 1920x951 with 1 Axes>
<Figure size 1920x951 with 1 Axes>
<Figure size 1920x951 with 1 Axes>
<Figure size 1920x951 with 1 Axes>
<Figure size 1920x951 with 1 Axes>
<Figure size 1920x951 with 1 Axes>
<Figure size 1920x951 with 1 Axes>
<Figure size 1920x951 with 1 Axes>
<Figure size 1920x951 with 1 Axes>
<Figure size 1920x951 with 1 Axes>
<Figure size 1920x951 with 1 Axes>
<Figure size 1920x951 with 1 Axes>
<Figure size 1920x951 with 1 Axes>
<Figure size 1920x951 with 1 Axes>
<Figure size 1920x951 with 1 Axes>
<Figure size 1920x951 with 1 Axes>
<Figure size 1920x951 with 1 Axes>
<Figure size 1920x951 with 1 Axes>
<Figure size 1920x951 with 1 Axes>
<Figure size 1920x951 with 1 Axes>
<Figure size 1920x951 with 1 Axes>
<Figure size 1920x951 with 1 Axes>
<Figure size 1920x951 with 1 Axes>
<Figure size 1920x951 with 1 Axes>
<Figure size 1920x951 with 1 Axes>
<Figure size 1920x951 with 1 Axes>
<Figure size 1920x951 with 1 Axes>
<Figure size 1920x951 with 1 Axes>
<Figure size 1920x951 with 1 Axes>
<Figure size 1920x951 with 1 Axes>
<Figure size 1920x951 with 1 Axes>
<Figure size 1920x951 with 1 Axes>
<Figure size 1920x951 with 1 Axes>
<Figure size 1920x951 with 1 Axes>
<Figure size 1920x951 with 1 Axes>
<Figure size 1920x951 with 1 Axes>
<Figure size 1920x951 with 1 Axes>
<Figure size 1920x951 with 1 Axes>
<Figure size 1920x951 with 1 Axes>
<Figure size 1920x951 with 1 Axes>
loss 0.039, train acc 0.988, test acc 0.991
624.4 examples/sec on cuda:0





torch.Size([1, 224, 224])
torch.Size([1, 1, 224, 224])
tensor([[16.5969, -2.6974,  1.0315, -4.3109, -2.4112, -2.2494,  2.9837, -2.5753,-1.0474,  0.4374]], device='cuda:0', grad_fn=<AddmmBackward0>)
tensor(0, device='cuda:0')



torch.Size([1, 224, 224])
torch.Size([1, 1, 224, 224])
tensor([[-3.4247, -0.9304, -0.6991, -1.0964,  0.0769,  2.2364, 11.2306, -6.2827,6.0278, -7.4283]], device='cuda:0', grad_fn=<AddmmBackward0>)
tensor(6, device='cuda:0')





[1]周志华. 机器学习[M]. 2016年1月第1版. 北京:清华大学出版社, 2016.
[2]赵卫东, 董亮. 机器学习[M]. 2018年8月第1版. 北京:人民邮电出版社, 2018.
[3]李航. 统计学习方法[M]. 2019年5月第2版. 北京:清华大学出版社, 2019.
[4]阿斯顿·张(Aston Zhang), 李沐(Mu Li), [美]扎卡里·C.立顿(Zachary C.Lipton), 等. 动手学深度学习[M]. 2019年6月第1版. 北京:人民邮电出版社, 2019.
[5][美]Ian Goodfellow [加]Yoshua Bengio [加]Aaron Courville. 深度学习[M]. 2017年8月第1版. 北京:人民邮电出版社, 2017.

【机器学习/人工智能】 大作业:手写数字识别系统相关推荐

  1. 课程设计(毕业设计)—基于机器学习KNN算法手写数字识别系统—计算机专业课程设计(毕业设计)

    机器学习KNN算法手写数字识别系统 下载本文手写数字识别系统完整的代码和课设报告的链接(或者可以联系博主koukou(壹壹23七2五六98),获取源码和报告):https://download.csd ...

  2. Python TensorFlow框架 实现手写数字识别系统

    手写数字识别算法的设计与实现 本文使用python基于TensorFlow设计手写数字识别算法,并编程实现GUI界面,构建手写数字识别系统.这是本人的本科毕业论文课题,当然,这个也是机器学习的基本问题 ...

  3. Python(TensorFlow框架)实现手写数字识别系统

    手写数字识别算法的设计与实现 本文使用python基于TensorFlow设计手写数字识别算法,并编程实现GUI界面,构建手写数字识别系统.这是本人的本科毕业论文课题,当然,这个也是机器学习的基本问题 ...

  4. k-近邻算法实现手写数字识别系统

    k-近邻算法实现手写数字识别系统 一.实验介绍 1.1 实验内容 本实验将会从电影题材分类的例子入手,详细讲述k-近邻算法的原理.在这之后,我们将会使用该算法实现手写数字识别系统. 1.2 课程来源 ...

  5. 【毕业设计_课程设计】手写数字识别系统的设计实现(源码+论文)

    文章目录 0 项目说明 1 系统概述 1.1 系统实现环境 2 研究方法 2.1 图像预处理阶段 2.2 特征提取阶段 2.3 数字识别阶段 3 研究结论 4 论文概览 5 项目工程 0 项目说明 手 ...

  6. 完整代码及解析!!手写数字识别系统(手写数字测试识别 + pytoch实现 + 完整代码及解析)

    基于深度学习的手写数字识别系统 一.实验目的 ​ 1.任选实验环境及深度学习框架,实现手写数字识别系统: ​ 2.掌握所采用的深度血迹框架构建方式. 二.实验理论基础 1.MNIST数据集 ​ MNI ...

  7. 手写数字识别系统学习(1)

    手写数字识别系统学习(1) 从这一章起,我们将通过手写数字识别这一非常经典的机器学习项目接着来学习神经网络 一.数据数据和测试数据 ​ 我们在上一章提到了权重和偏置的概念,这是一个相当繁琐的数据集,我 ...

  8. 使用K-邻近算法实现手写数字识别系统

    k-近邻法简介 k近邻法(k-nearest neighbor, k-NN)是1967年由Cover T和Hart P提出的一种基本分类与回归方法.它的工作原理是:存在一个样本数据集合,也称作为训练样 ...

  9. 手写数字识别系统 基于python

    环境基于Python3.6和Tensorflow框架 实现手写数字识别系统 本文使用python基于TensorFlow设计手写数字识别算法,并编程实现GUI界面,构建手写数字识别系统.文中首先对如何 ...


  1. day1-字符串拼接、表达式、break和continue
  2. 微软发布多项Azure Kubernetes服务更新,增加GPU支持
  3. 前端知识点回顾之重点篇——JavaScript异步机制
  4. python 报错 bs4 FeatureNotFound: Couldn‘t find a tree builder with the features you requested: lxml
  5. POJ 1118 求平面上最多x点共线
  6. win2003 + sqlserver2K sp4,客户端无法连接
  7. python+webdriver(三)
  8. 小D课堂 - 零基础入门SpringBoot2.X到实战_第三节SpringBoot热部署devtool和配置文件自动注入实战_16、注解配置文件自动映射到属性和实体类实战...
  9. androidru使用adb启动activity和monkeyrunner启动activity
  10. 由QCustomplot引发drawPolyline和drawLine的区别
  11. win10安装Microsoft Office 2016(64位)提示已安装Microsoft Office 2016(32位)
  12. Spring 6.0 堪称最强!新特性,惊爆了!
  13. socket技术详解(看清socket编程)
  14. 创建CSDN虚拟大学
  15. 计算机语言栏在哪里,缺少计算机语言栏的解决方案是什么
  16. QT实现文本编辑器(简易版)
  17. 地下城与勇士游戏设计元素及成功原因分析
  18. “揭开意识的奥秘:认知相对论”及作者李玉鑑简介 (公号发“李玉鑑AI”下载PDF资料,欢迎转发、赞赏支持科普)
  19. @WebFilter和@Component一起使用导致urlPatterns不起作用
  20. matlab技术应用,030232015-MATLAB技术应用-赵延东


  1. python 等号需要空格_等号两边需要空格吗?
  2. Linux系统编程 25 动态库和静态库 理论对比
  3. 疫情患者活动轨迹查询-小程序
  4. myelicpes2019初次使用设置_PES2019技巧(键盘操作,个人经验)
  5. 一文讲懂SQL分组子句GROUP BY
  6. matlab. set坐标轴,matlab绘图坐标轴设置
  7. java标准i o重定向_Java重定向标准输入输出流I/O
  8. java重定向cookie_在java中,JSP重定向,转发,Cookie,session
  9. webStorm Vue3 页面模板
  10. 分析并实现 —— 猜数字小游戏(利用C语言实现)