您的位置：首页 > 脚本大全 > > 正文

python验证码处理教程（python简单验证码识别的实现方法）

更多时间：2021-10-07 00:55:48 类别：脚本大全浏览量：250

python验证码处理教程

python简单验证码识别的实现方法

利用svm向量机进行4位数字验证码识别

主要是思路和步骤如下：

一，素材收集

检查环境是否包含有相应的库：

1.在cmd中，通过 pip list命令查看安装的库

2.再使用pip installrequests 安装requests库

3.再次使用pip list 命令

python验证码处理教程（python简单验证码识别的实现方法）

4.利用python获取验证码资源

编写代码：_downloadpic.py

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28 #!/usr/bin/nev python3

#利用python从站点下载验证码图片

import requests

## 1.在 http://www.xxx.com

# 获取验证码url

def downloads_pic(strpath, strname):

#设置url

url = 'http://www.xxx.com'

#以二进制方式发送get请求，

#将stream = true，

#数据读取完成前不要断开链接

rreq = requests.get(url, stream = true)

#尝试保存图片

with open(strpath + strname + '.jpg" alt="python验证码处理教程（python简单验证码识别的实现方法）" border="0" />, 'wb') as fppic:
#循环读取1024byte到bychunk中，读完则跳出 for bychunk in rreq.iter_content(chunk_size = 1024): if bychunk: fppic.write(bychunk) fppic.flush() fppic.close() for i in range(1, 10 + 1): strfilename = "%03d" % i downloads_pic('d:/1/', strfilename)

二，素材处理

1.二值化处理，增加对比度，锐化，增加亮度，滤镜，转为黑白

2.去除噪点

3.切割图片

python验证码处理教程（python简单验证码识别的实现方法）

编写代码：_picdealwith.py

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86 #!/usr/bin/env python3

import os

import os.path

from pil import image, imageenhance, imagefilter

import random

#二值化处理

#strimgpath 图片路径

def binaryzationimg(strimgpath):

#打开图片

imgoriimg = image.open(strimgpath)

#增加对比度

pocenhance = imageenhance.contrast(imgoriimg)

#增加255%对比度

imgoriimg = pocenhance.enhance(2.55)

#锐化

pocenhance = imageenhance.sharpness(imgoriimg)

#锐化200%

imgoriimg = pocenhance.enhance(2.0)

#增加亮度

pocenhance = imageenhance.brightness(imgoriimg)

#增加200%

imgoriimg = pocenhance.enhance(2.0)

#添加滤镜效果

imggryimg = imgoriimg.convert('l').filter(imagefilter.detail)

#二值化处理

imgbinimg = imggryimg.convert('1')

return imgbinimg

#去除噪点

def clearnoise(imgbinimg):

for x in range(1, (imgbinimg.size[0]-1)):

for y in range(1,(imgbinimg.size[1] - 1)):

#一个点为黑色，周围8个点为白色，则此点为噪点，设置为白色

if imgbinimg.getpixel((x, y)) == 0 \

and imgbinimg.getpixel(((x - 1), (y + 1))) == 255 \

and imgbinimg.getpixel(((x - 1), y)) == 255 \

and imgbinimg.getpixel(((x - 1), (y - 1))) == 255 \

and imgbinimg.getpixel(((x + 1), (y + 1))) == 255 \

and imgbinimg.getpixel(((x + 1), y)) == 255 \

and imgbinimg.getpixel(((x + 1), (y - 1))) == 255 \

and imgbinimg.getpixel((x, (y + 1))) == 255 \

and imgbinimg.getpixel((x, (y - 1))) == 255:

imgbinimg.putpixel([x, y], 255)

return imgbinimg

#切割图片

def getcropimgs(imgclrimg):

imglist = []

for i in range(4):

x = 6 + i * 13

y = 3

subimg = imgclrimg.crop((x, y, x + 13, y + 15))

imglist.append(subimg)

return imglist

#调用部分

def main():

g_count = 0

strstep1dir = 'd:/1/step1/'

strstep2dir = 'd:/1/step2/'

for parentpath, dirname, filenames in os.walk(strstep1dir):

for i in filenames:

#图片文件路径信息

strfullpath = os.path.join(parentpath, i)

imgbinimg = binaryzationimg(strfullpath)

imgclrimg = clearnoise(imgbinimg)

imglist = getcropimgs(imgclrimg)

for img in imglist:

strimgname = "%04d%04d.jpg" alt="python验证码处理教程（python简单验证码识别的实现方法）" border="0" /> % (g_count, random.randint(0, 9999))
strimgpath = os.path.join(strstep2dir, strimgname) img.save(strimgpath) g_count += 1 print("ok！") if __name__ == '__mian__': main()

三，手工分类

将第二步切割好的图片进行分类，体力活

python验证码处理教程（python简单验证码识别的实现方法）

四，利用svm向量机建立模型

1.安装svm库

下载libsvm库，并解压

将库中的windows目录的路径添加到path环境变量中

将libsvm下的python文件夹中的svm.py和svmutil.py文件拷贝到你的python的路径中lib文件夹中

1	`from` `svmutil` `import` `*`

2.生成模型文件

2.1.将分好类的图片信息进行提取，生成特征值

2.2.输出向量数据

python验证码处理教程（python简单验证码识别的实现方法）

2.3.根据数据输出svm模型文件

python验证码处理教程（python简单验证码识别的实现方法）

编写代码：_svmdemo.py

100

101

102

103

104

105

106

107

108

109

#!/usr/bin/env python3

#svm,验证码识别

import os

import sys

import random

import os.path

from pil import image, imageenhance, imagefilter

from svmutil import *

##记录像素点的值，描述特征，采用遍历每个像素点统计黑色点的数量

def getfeature(imgcropimg, nimgheight, nimgwidth):

pixelcountlist = []

for y in range(nimgheight):

countx = 0

for x in range(nimgwidth):

if imgcropimg.getpixel((x, y)) == 0:

countx += 1

pixelcountlist.append(countx)

for x in range(nimgwidth):

county = 0

for y in range(nimgheight):

if imgcropimg.getpixel((x, y)) == 0:

county += 1

pixelcountlist.append(county)

return pixelcountlist

##输出向量数据

def outputvectordata(strid, strmaterialdir, stroutpath):

for parentpath, dirnames, filenames in os.walk(strmaterialdir):

with open(stroutpath, 'a') as fpfea:

for fp in filenames:

#图片文件路径信息

strfullpath = os.path.join(parentpath, fp)

#打开图片

imgoriimg = image.open(strfullpath)

#生成特征值

featurelist = getfeature(imgoriimg, 15, 13)

strfeature = strid + ' '

ncount = 1

for i in featurelist:

strfeature = '%s%d:%d ' % (strfeature, ncount, i)

ncount += 1

fpfea.write(strfeature + '\n')

fpfea.flush()

fpfea.close()

#训练svm模型

def trainsvmmodel(strproblempath, strmodelpath):

y, x = svm_read_problem(strproblempath)

model = svm_train(y, x)

svm_save_model(strmodelpath, model)

#svm模型测试

def svmmodeltest(strproblempath, strmodelpath):

testy, testx = svm_read_problem(strproblempath)

model = svm_load_model(strmodelpath)

#返回识别结果

plabel, pacc, pval = svm_predict(testy, testx, model)

return plabel

##输出测试向量数据

def outputtestvectordata(strid, strdir, stroutpath):

filelist = []

for parentpath, strdir, filename in os.walk(strdir):

filelist = filename

with open(stroutpath, 'a') as fpfea:

for fp in filelist:

#图片文件路径信息

strfullpath = os.path.join(parentpath, fp)

#打开图片

imgoriimg = image.open(strfullpath)

#生成特征值

featurelist = getfeature(imgoriimg, 15, 13)

strfeature = strid + ' '

ncount = 1

for i in featurelist:

strfeature = '%s%d:%d ' % (strfeature, ncount, i)

ncount += 1

fpfea.write(strfeature + '\n')

fpfea.flush()

fpfea.close()

def main():

# 1.循环输出向量文件

for i in range(0, 10):

strid = '%d' % i

outputvectordata(strid, 'd:/1/step3/' + strid, 'd:/1/step4/vector.txt')

标签：Python 验证码识别

上一篇：idea 远程部署docker（IDEA使用Docker插件菜鸟教程）

下一篇：JavaScript css3实现简单视频弹幕功能（JavaScript css3实现简单视频弹幕功能）

您可能感兴趣

python验证码处理教程（python简单验证码识别的实现方法）

python验证码处理教程

热门推荐

排行榜