PIL库基本介绍
PIL:Python Imaging Library,已经是Python平台事实上的图像处理标准库了。PIL功能非常强大,但API却非常简单易用
# 加载图片 data = Image.open("../data/getcode.do.jpg") data.show() # 把图片转化为矩阵类型 data = np.array(data) print(data,data.shape)
完成登录验证码识别操作
# -*- coding: utf-8 -*- import scrapy from scrapy import cmdline from urllib import request from PIL import Image class DoubanSpiderSpider(scrapy.Spider): name = 'douban_spider' allowed_domains = ['movie.douban.com'] # 启动url地址 start_urls = ['http://www.renren.com/PLogin.do'] def parse(self, response): formdata = { 'email': '970138074@qq.com', 'password': 'pythonspider' } # 获取当前的验证码地址 img_url = response.xpath("//*[@id='verifyPic_login']/@src") print(img_url) if img_url: # 根据图片地址获取图片信息 check_code = self.parse_image(img_url) formdata['icode'] = check_code yield scrapy.FormRequest(url='http://www.renren.com/PLogin.do', formdata=formdata, callback=self.after_login) # next(parse) def parse_image(self, image_url): # from urllib import request request.urlretrieve(image_url, 'check_code.png') image = Image.open('check_code.png') image.show() check_code = input("请输入验证码") return check_code def after_login(self, response): print('---->' , response.url) if __name__ == "__main__": # cmdline.execute("scrapy crawl -s LOG_FILE=all.log douban_spider".split(' ')) cmdline.execute("scrapy crawl douban_spider".split(' ')) # cmdline.execute("scrapy crawl -o douban.csv douban_spider".split(' '))
版本二
import scrapy from urllib import request from PIL import Image from lxml import etree class RenrenSpiderSpider(scrapy.Spider): name = 'renren_spider' allowed_domains = ['renren.com'] start_urls = ['http://www.renren.com/SysHome.do'] def parse(self, response): loginForm = { 'email':'1147040@qq.com', 'password':'scrapy_demo1' } #img_url = etree.HTML(response.text).xpath("//*[@id='verifyPic_login']/@src") img_url = 'http://icode.renren.com/getcode.do?t=web_login&rnd=Math.random()' print(img_url) if img_url: check_code = self.__parse_image(img_url) loginForm['icode'] = check_code return scrapy.FormRequest(url='http://www.renren.com/PLogin.do',formdata=loginForm,callback=self.after_login) def __parse_image(self,img_url): request.urlretrieve(img_url,"check_code.png") image = Image.open('check_code.png') image.show() check_code = input('请输入验证码') return check_code def after_login(self,reponse): with open('renren.hmtl','w+',encoding='utf-8') as f: f.write(reponse.text)
————————————————
版权声明:本文为CSDN博主「lsqzedu」的原创文章,遵循CC 4.0 BY-SA版权协议,转载请附上原文出处链接及本声明。
原文链接:https://blog.csdn.net/lsqzedu/article/details/99707709