{"id":171,"date":"2020-07-23T08:37:42","date_gmt":"2020-07-23T00:37:42","guid":{"rendered":"http:\/\/www.gaoxigang.com\/?p=171"},"modified":"2020-07-23T08:37:42","modified_gmt":"2020-07-23T00:37:42","slug":"scrapy-%e7%88%ac%e8%99%ab%e6%a1%86%e6%9e%b6-%e7%ae%80%e4%b9%a6%e4%b8%9a%e5%8a%a1%e5%88%86%e6%9e%90","status":"publish","type":"post","link":"https:\/\/www.gaoxigang.com\/index.php\/2020\/07\/23\/scrapy-%e7%88%ac%e8%99%ab%e6%a1%86%e6%9e%b6-%e7%ae%80%e4%b9%a6%e4%b8%9a%e5%8a%a1%e5%88%86%e6%9e%90\/","title":{"rendered":"Scrapy \u722c\u866b\u6846\u67b6-\u7b80\u4e66\u4e1a\u52a1\u5206\u6790"},"content":{"rendered":"\n<h2 class=\"wp-block-heading\">\u521b\u5efa\u7b80\u4e66\u722c\u866b\u9879\u76ee<\/h2>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">C:\\Users\\Administrator\\Desktop>scrapy startproject jianshu\nNew Scrapy project 'jianshu', using template directory 'd:\\anaconda3\\lib\\site-packages\\scrapy\\templates\\project', created in:\n    C:\\Users\\Administrator\\Desktop\\jianshu\n\nYou can start your first spider with:\n    cd jianshu\n    scrapy genspider example example.com\n<\/pre>\n\n\n\n<h2 class=\"wp-block-heading\">\u521b\u5efacrawl\u89e3\u6790\u5668<\/h2>\n\n\n\n<blockquote class=\"wp-block-quote is-layout-flow wp-block-quote-is-layout-flow\"><p>\u4e4b\u524d\u521b\u5efa\u7684spider\u89e3\u6790\u5668\u91c7\u7528\u90fd\u662fbasic\u6a21\u677f\uff0c\u8fd9\u6b21\u722c\u866b\u662f\u8981\u4e0b\u8f7d\u7b80\u4e66\u6587\u7ae0\uff0c\u9700\u8981\u652f\u6301\u6b63\u5219\u8868\u8fbe\u5f0f\u5339\u914d\uff0c\u56e0\u6b64\u5efa\u8bae\u91c7\u7528crawl\u6a21\u677f\u6765\u521b\u5efaspider\u89e3\u6790\u5668<\/p><\/blockquote>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">C:\\Users\\Administrator\\Desktop>cd jianshu\n\nC:\\Users\\Administrator\\Desktop\\jianshu>scrapy genspider -t crawl jianshu_spider jianshu.com\nCreated spider 'jianshu_spider' using template 'crawl' in module:\n  jianshu.spiders.jianshu_spider\n<\/pre>\n\n\n\n<h2 class=\"wp-block-heading\">\u914d\u7f6e\u7b80\u4e66\u4e0b\u8f7d\u683c\u5f0f<\/h2>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"python\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\"># -*- coding: utf-8 -*-\nimport scrapy\nfrom scrapy.linkextractors import LinkExtractor\nfrom scrapy.spiders import CrawlSpider, Rule\n\nclass JianshuSpiderSpider(CrawlSpider):\n    name = 'jianshu_spider'\n    allowed_domains = ['jianshu.com']\n    start_urls = ['https:\/\/www.jianshu.com\/']\n\n    # \u53ef\u4ee5\u6307\u5b9a\u722c\u866b\u6293\u53d6\u7684\u89c4\u5219\uff0c\u652f\u6301\u6b63\u5219\u8868\u8fbe\u5f0f\n    # https:\/\/www.jianshu.com\/p\/df7cad4eb8d8\n    # https:\/\/www.jianshu.com\/p\/07b0456cbadb?*****\n    # https:\/\/www.jianshu.com\/p\/.*\n    rules = (\n        Rule(LinkExtractor(allow=r'https:\/\/www.jianshu.com\/p\/[0-9a-z]{12}.*'), callback='parse_item', follow=True),\n    )\n    # name = title = url = collection = scrapy.Field()\n    def parse_item(self, response):\n        print(response.text)\n<\/pre>\n","protected":false},"excerpt":{"rendered":"<p>\u521b\u5efa\u7b80\u4e66\u722c\u866b\u9879\u76ee \u521b\u5efacrawl\u89e3\u6790\u5668 \u4e4b\u524d\u521b\u5efa\u7684spider\u89e3\u6790\u5668\u91c7\u7528\u90fd\u662fbasic\u6a21\u677f\uff0c\u8fd9\u6b21\u722c\u866b\u662f\u8981\u4e0b\u8f7d\u7b80 [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[4],"tags":[],"class_list":["post-171","post","type-post","status-publish","format-standard","hentry","category-biji"],"_links":{"self":[{"href":"https:\/\/www.gaoxigang.com\/index.php\/wp-json\/wp\/v2\/posts\/171","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/www.gaoxigang.com\/index.php\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/www.gaoxigang.com\/index.php\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/www.gaoxigang.com\/index.php\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/www.gaoxigang.com\/index.php\/wp-json\/wp\/v2\/comments?post=171"}],"version-history":[{"count":0,"href":"https:\/\/www.gaoxigang.com\/index.php\/wp-json\/wp\/v2\/posts\/171\/revisions"}],"wp:attachment":[{"href":"https:\/\/www.gaoxigang.com\/index.php\/wp-json\/wp\/v2\/media?parent=171"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/www.gaoxigang.com\/index.php\/wp-json\/wp\/v2\/categories?post=171"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/www.gaoxigang.com\/index.php\/wp-json\/wp\/v2\/tags?post=171"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}