PIL库基本介绍
PIL:Python Imaging Library,已经是Python平台事实上的图像处理标准库了。PIL功能非常强大,但API却非常简单易用
# 加载图片
data = Image.open("../data/getcode.do.jpg")
data.show()
# 把图片转化为矩阵类型
data = np.array(data)
print(data,data.shape)
完成登录验证码识别操作
# -*- coding: utf-8 -*-
import scrapy
from scrapy import cmdline
from urllib import request
from PIL import Image
class DoubanSpiderSpider(scrapy.Spider):
name = 'douban_spider'
allowed_domains = ['movie.douban.com']
# 启动url地址
start_urls = ['http://www.renren.com/PLogin.do']
def parse(self, response):
formdata = {
'email': '970138074@qq.com',
'password': 'pythonspider'
}
# 获取当前的验证码地址
img_url = response.xpath("//*[@id='verifyPic_login']/@src")
print(img_url)
if img_url:
# 根据图片地址获取图片信息
check_code = self.parse_image(img_url)
formdata['icode'] = check_code
yield scrapy.FormRequest(url='http://www.renren.com/PLogin.do', formdata=formdata, callback=self.after_login)
# next(parse)
def parse_image(self, image_url):
# from urllib import request
request.urlretrieve(image_url, 'check_code.png')
image = Image.open('check_code.png')
image.show()
check_code = input("请输入验证码")
return check_code
def after_login(self, response):
print('---->' , response.url)
if __name__ == "__main__":
# cmdline.execute("scrapy crawl -s LOG_FILE=all.log douban_spider".split(' '))
cmdline.execute("scrapy crawl douban_spider".split(' '))
# cmdline.execute("scrapy crawl -o douban.csv douban_spider".split(' '))
版本二
import scrapy
from urllib import request
from PIL import Image
from lxml import etree
class RenrenSpiderSpider(scrapy.Spider):
name = 'renren_spider'
allowed_domains = ['renren.com']
start_urls = ['http://www.renren.com/SysHome.do']
def parse(self, response):
loginForm = {
'email':'1147040@qq.com',
'password':'scrapy_demo1'
}
#img_url = etree.HTML(response.text).xpath("//*[@id='verifyPic_login']/@src")
img_url = 'http://icode.renren.com/getcode.do?t=web_login&rnd=Math.random()'
print(img_url)
if img_url:
check_code = self.__parse_image(img_url)
loginForm['icode'] = check_code
return scrapy.FormRequest(url='http://www.renren.com/PLogin.do',formdata=loginForm,callback=self.after_login)
def __parse_image(self,img_url):
request.urlretrieve(img_url,"check_code.png")
image = Image.open('check_code.png')
image.show()
check_code = input('请输入验证码')
return check_code
def after_login(self,reponse):
with open('renren.hmtl','w+',encoding='utf-8') as f:
f.write(reponse.text)
————————————————
版权声明:本文为CSDN博主「lsqzedu」的原创文章,遵循CC 4.0 BY-SA版权协议,转载请附上原文出处链接及本声明。
原文链接:https://blog.csdn.net/lsqzedu/article/details/99707709
