{"id":176,"date":"2020-07-24T08:15:57","date_gmt":"2020-07-24T00:15:57","guid":{"rendered":"http:\/\/www.gaoxigang.com\/?p=176"},"modified":"2020-07-24T08:15:57","modified_gmt":"2020-07-24T00:15:57","slug":"scrapy-%e7%88%ac%e8%99%ab%e6%a1%86%e6%9e%b6-%e8%8e%b7%e5%8f%96ajax%e6%95%b0%e6%8d%ae","status":"publish","type":"post","link":"https:\/\/www.gaoxigang.com\/index.php\/2020\/07\/24\/scrapy-%e7%88%ac%e8%99%ab%e6%a1%86%e6%9e%b6-%e8%8e%b7%e5%8f%96ajax%e6%95%b0%e6%8d%ae\/","title":{"rendered":"Scrapy \u722c\u866b\u6846\u67b6-\u83b7\u53d6ajax\u6570\u636e"},"content":{"rendered":"\n<h2 class=\"wp-block-heading\">ChromeDriver\u4ecb\u7ecd<\/h2>\n\n\n\n<blockquote class=\"wp-block-quote is-layout-flow wp-block-quote-is-layout-flow\"><p>chromeDriver \u662f google \u4e3a\u7f51\u7ad9\u5f00\u53d1\u4eba\u5458\u63d0\u4f9b\u7684\u81ea\u52a8\u5316\u6d4b\u8bd5\u63a5\u53e3,WebDriver\u662f\u4e00\u4e2a\u5f00\u6e90\u5de5\u5177\uff0c\u7528\u4e8e\u5728\u8bb8\u591a\u6d4f\u89c8\u5668\u4e0a\u81ea\u52a8\u6d4b\u8bd5webapps\u3002\u5b83\u63d0\u4f9b\u4e86\u5bfc\u822a\u5230\u7f51\u9875\uff0c\u7528\u6237\u8f93\u5165\uff0cJavaScript\u6267\u884c\u7b49\u529f\u80fd<\/p><\/blockquote>\n\n\n\n<p class=\"wp-block-paragraph\">ChromeDriver\u7684\u5b89\u88c5\u4e00\u5b9a\u8981\u4e0eChrome\u7684\u7248\u672c\u4e00\u81f4\uff0c\u4e0d\u7136\u5c31\u4e0d\u8d77\u4f5c\u7528,\u67e5\u770bchrom\u7684\u7248\u672c\u5219\u5728\u6d4f\u89c8\u5668\u5730\u5740\u680f\u8f93\u5165\uff1achrome:\/\/version\/<br>chromedriver\u4e0b\u8f7d\u5730\u5740: http:\/\/chromedriver.storage.googleapis.com\/index.html<\/p>\n\n\n\n<h2 class=\"wp-block-heading\">\u91cd\u6784\u4e0b\u8f7d\u5668\u589e\u52a0ajax\u529f\u80fd<\/h2>\n\n\n\n<blockquote class=\"wp-block-quote is-layout-flow wp-block-quote-is-layout-flow\"><p>\u9996\u5148\u8981\u5728setting.py\u4e2d\u5f00\u542f\u4e0b\u8f7d\u5668\u4e2d\u95f4\u4ef6\uff1aJianshuDownloaderMiddleware<\/p><\/blockquote>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"python\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">DOWNLOADER_MIDDLEWARES = {\n   'jianshu.middlewares.JianshuDownloaderMiddleware': 543,\n}\n<\/pre>\n\n\n\n<h2 class=\"wp-block-heading\">\u91cd\u5199process_request\u65b9\u6cd5\uff08middleware\u6587\u4ef6\uff09<\/h2>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"python\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\"># Define here the models for your spider middleware\nimport time\nfrom selenium import webdriver\nfrom scrapy.http.response.html import HtmlResponse\n\nclass JianshuDownloaderMiddleware:\n\n    def __init__(self):\n        #\u52a0\u8f7d\u6d4b\u8bd5\u6d4f\u89c8\u5668\n        self.driver = webdriver.Chrome(executable_path=r\"f:\\chromedriver.exe\")\n    # request:\u5219scrapy\u6846\u67b6\u4f1a\u53bb\u670d\u52a1\u5668\u52a0\u8f7d\u8d44\u6e90\n    # repose:\u5219\u8df3\u8d44\u6e90\u4e0b\u8f7d\u76f4\u63a5\u4ea4\u7ed9\u89e3\u6790\u5668\u65b9\u6cd5\n    def process_request(self, request, spider):\n        #\u6a21\u62df\u4eba\u7c7b\u8bbf\u95ee\u9875\u9762\u7684\u884c\u4e3a\uff0c\u5e76\u4e14\u5355\u51fb\u6536\u5165\u7684\u4e13\u9898\u6309\u94ae\n        self.driver.get(request.url)\n        #\u4e3a\u4e86\u9632\u6b62\u9875\u9762\u52a0\u8f7d\u8fc7\u6162\uff0c\u7b49\u5f851\u79d2\n        time.sleep(1)\n        try:\n            while True:\n                show_more = self.driver.find_element_by_class_name('show-more')\n                show_more.click()\n                print('-'*100)\n                time.sleep(0.5)\n        except:\n            print('\u522b\u70b9\u51fb\u4e86\uff0c\u5df2\u7ecf\u6ca1\u6709\u4e86')\n        source = self.driver.page_source\n        #\u521b\u5efa\u4e00\u4e2aresponse\u5bf9\u8c61\uff0c\u628a\u9875\u9762\u4fe1\u606f\u90fd\u5c01\u88c5\u5230response\u5bf9\u8c61\u4e2d\n        response = HtmlResponse(url=self.driver.current_url,body=source,request=request,encoding='utf-8')\n        return response\n\n<\/pre>\n","protected":false},"excerpt":{"rendered":"<p>ChromeDriver\u4ecb\u7ecd chromeDriver \u662f google \u4e3a\u7f51\u7ad9\u5f00\u53d1\u4eba\u5458\u63d0\u4f9b\u7684\u81ea\u52a8\u5316\u6d4b\u8bd5\u63a5\u53e3, [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[4],"tags":[],"class_list":["post-176","post","type-post","status-publish","format-standard","hentry","category-biji"],"_links":{"self":[{"href":"https:\/\/www.gaoxigang.com\/index.php\/wp-json\/wp\/v2\/posts\/176","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/www.gaoxigang.com\/index.php\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/www.gaoxigang.com\/index.php\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/www.gaoxigang.com\/index.php\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/www.gaoxigang.com\/index.php\/wp-json\/wp\/v2\/comments?post=176"}],"version-history":[{"count":0,"href":"https:\/\/www.gaoxigang.com\/index.php\/wp-json\/wp\/v2\/posts\/176\/revisions"}],"wp:attachment":[{"href":"https:\/\/www.gaoxigang.com\/index.php\/wp-json\/wp\/v2\/media?parent=176"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/www.gaoxigang.com\/index.php\/wp-json\/wp\/v2\/categories?post=176"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/www.gaoxigang.com\/index.php\/wp-json\/wp\/v2\/tags?post=176"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}