{"id":144,"date":"2020-07-15T08:37:42","date_gmt":"2020-07-15T00:37:42","guid":{"rendered":"http:\/\/www.gaoxigang.com\/?p=144"},"modified":"2020-07-15T08:37:42","modified_gmt":"2020-07-15T00:37:42","slug":"scrapy-%e7%88%ac%e8%99%ab%e6%a1%86%e6%9e%b6-yield%e4%b8%8e%e8%87%aa%e5%8a%a8%e7%bf%bb%e9%a1%b5","status":"publish","type":"post","link":"https:\/\/www.gaoxigang.com\/index.php\/2020\/07\/15\/scrapy-%e7%88%ac%e8%99%ab%e6%a1%86%e6%9e%b6-yield%e4%b8%8e%e8%87%aa%e5%8a%a8%e7%bf%bb%e9%a1%b5\/","title":{"rendered":"Scrapy \u722c\u866b\u6846\u67b6-yield\u4e0e\u81ea\u52a8\u7ffb\u9875"},"content":{"rendered":"\n<h2 class=\"wp-block-heading\">yield\u4f18\u70b9<\/h2>\n\n\n\n<blockquote class=\"wp-block-quote is-layout-flow wp-block-quote-is-layout-flow\"><p>yield \u7684\u597d\u5904\u662f\u663e\u800c\u6613\u89c1\u7684\uff0c\u628a\u4e00\u4e2a\u51fd\u6570\u6539\u5199\u4e3a\u4e00\u4e2a generator \u5c31\u83b7\u5f97\u4e86\u8fed\u4ee3\u80fd\u529b\uff0c\u6bd4\u8d77\u7528\u7c7b\u7684\u5b9e\u4f8b\u4fdd\u5b58\u72b6\u6001\u6765\u8ba1\u7b97\u4e0b\u4e00\u4e2a next() \u7684\u503c\uff0c\u4e0d\u4ec5\u4ee3\u7801\u7b80\u6d01\uff0c\u800c\u4e14\u6267\u884c\u6d41\u7a0b\u5f02\u5e38\u6e05\u6670<\/p><\/blockquote>\n\n\n\n<p class=\"wp-block-paragraph\">\u800c\u5728Scrapy\u722c\u866b\u6846\u67b6\u4e2d\uff0cyield\u6709\u5929\u7136\u7684\u4f7f\u7528\u573a\u666f\uff0c\u56e0\u4e3a\u6211\u4eec\u5e76\u4e0d\u77e5\u9053\u722c\u866b\u6bcf\u6b21\u83b7\u53d6\u6570\u636e\u7684\u5927\u5c0f\uff0c\u5982\u679c\u6bcf\u6b21\u90fd\u4e00\u8d77\u8fd4\u56de\u5219\u6570\u636e\u91cf\u4f1a\u975e\u5e38\u5927\uff0c\u6b64\u65f6\u5982\u679c\u91c7\u7528yield\u6765\u4f18\u5316\uff0c\u5219\u4ee3\u7801\u4f1a\u975e\u5e38\u7b80\u6d01\u4e14\u9ad8\u6548<\/p>\n\n\n\n<h2 class=\"wp-block-heading\">yield\u4f18\u5316\u8fd4\u56de\u6570\u636e<\/h2>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"python\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\"># -*- coding: utf-8 -*-\nimport scrapy\nfrom lxml import etree\nfrom douban.items import DoubanItem\n\nclass DoubanSpiderSpider(scrapy.Spider):\n    name = 'douban_spider'\n    # \u5141\u8bb8\u4e0b\u8f7d\u7684\u57df\u540d\n    allowed_domains = ['movie.douban.com']\n    # \u914d\u7f6e\u4e0b\u8f7d\u7684\u9996\u5730\u5740\n    start_urls = ['http:\/\/movie.douban.com\/top250']\n    # \u4e0b\u8f7d\u5b8c\u6bd5\u4e4b\u540e\u7684\u89e3\u6790\u65b9\u6cd5 (parse\u5728\u6e90\u7801\u4e2d\u652f\u6301yield)\n    def parse(self, response):\n        # print(response.text)\n        html = etree.HTML(response.text)\n        # \u9996\u5148\u901a\u8fc7xpath\u83b7\u53d6ol\n        li_list = html.xpath(\"\/\/ol[@class='grid_view']\/li\")\n        for li in li_list:\n            item = DoubanItem()\n            # em = title = img = comment\n            item['em'] = li.xpath(\".\/\/em\/text()\")[0]\n            item['title'] = li.xpath(\".\/\/span[@class='title']\/text()\")[0]\n            item['img'] = li.xpath(\".\/\/img\/@src\")[0]\n            item['comment'] = li.xpath(\".\/\/div[@class='star']\/span\/text()\")[-1]\n            # yield\u8fd4\u56de\u5f53\u524d\u7535\u5f71\u7684\u6570\u636e\n            yield item\n        try:\n            # \u83b7\u53d6\u540e\u9875\u8d85\u94fe\u63a5\u7684\u503c \uff08xpath\u8fd4\u56de\u7684\u662flist\uff09\n            next_page = html.xpath(\"\/\/span[@class='next']\/a\/@href\")[0]\n            # \u624b\u52a8\u53d1\u9001\u8bf7\u6c42,\u8ba9\u722c\u866b\u53bb\u89e3\u6790\u4e0b\u4e00\u9875\u7684\u6570\u636e (ajax)\n            yield scrapy.Request(url = 'http:\/\/movie.douban.com\/top250' + next_page,callback=self.parse)\n        except:\n            print('\u4e0b\u8f7d\u5b8c\u6bd5.......')\n<\/pre>\n","protected":false},"excerpt":{"rendered":"<p>yield\u4f18\u70b9 yield \u7684\u597d\u5904\u662f\u663e\u800c\u6613\u89c1\u7684\uff0c\u628a\u4e00\u4e2a\u51fd\u6570\u6539\u5199\u4e3a\u4e00\u4e2a generator \u5c31\u83b7\u5f97\u4e86\u8fed\u4ee3\u80fd\u529b\uff0c\u6bd4 [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[4],"tags":[],"class_list":["post-144","post","type-post","status-publish","format-standard","hentry","category-biji"],"_links":{"self":[{"href":"https:\/\/www.gaoxigang.com\/index.php\/wp-json\/wp\/v2\/posts\/144","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/www.gaoxigang.com\/index.php\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/www.gaoxigang.com\/index.php\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/www.gaoxigang.com\/index.php\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/www.gaoxigang.com\/index.php\/wp-json\/wp\/v2\/comments?post=144"}],"version-history":[{"count":0,"href":"https:\/\/www.gaoxigang.com\/index.php\/wp-json\/wp\/v2\/posts\/144\/revisions"}],"wp:attachment":[{"href":"https:\/\/www.gaoxigang.com\/index.php\/wp-json\/wp\/v2\/media?parent=144"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/www.gaoxigang.com\/index.php\/wp-json\/wp\/v2\/categories?post=144"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/www.gaoxigang.com\/index.php\/wp-json\/wp\/v2\/tags?post=144"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}