{"id":138,"date":"2020-07-15T08:03:47","date_gmt":"2020-07-15T00:03:47","guid":{"rendered":"http:\/\/www.gaoxigang.com\/?p=138"},"modified":"2020-07-15T08:03:47","modified_gmt":"2020-07-15T00:03:47","slug":"scrapy-%e7%88%ac%e8%99%ab%e6%a1%86%e6%9e%b6-item%e6%a8%a1%e5%9e%8b%e5%b0%81%e8%a3%85%e6%95%b0%e6%8d%ae","status":"publish","type":"post","link":"https:\/\/www.gaoxigang.com\/index.php\/2020\/07\/15\/scrapy-%e7%88%ac%e8%99%ab%e6%a1%86%e6%9e%b6-item%e6%a8%a1%e5%9e%8b%e5%b0%81%e8%a3%85%e6%95%b0%e6%8d%ae\/","title":{"rendered":"Scrapy \u722c\u866b\u6846\u67b6-Item\u6a21\u578b\u5c01\u88c5\u6570\u636e"},"content":{"rendered":"\n<h2 class=\"wp-block-heading\">\u521b\u5efaItem\u6a21\u578b\u5c42<\/h2>\n\n\n\n<blockquote class=\"wp-block-quote is-layout-flow wp-block-quote-is-layout-flow\"><p>\u524d\u9762\u6211\u4eec\u5df2\u7ecf\u53ef\u4ee5\u628a\u7b2c\u4e00\u9875\u7684\u6570\u636e\u83b7\u53d6\uff0c\u4f46\u662f\u4ec5\u4ec5\u662f\u5728\u63a7\u5236\u53f0\u6253\u5370\u3002\u5728Scrapy\u8fd8\u6709\u4e00\u4e2aItem\u7684\u6a21\u5757\uff0c\u6b64\u7c7b\u5c31\u662f\u6a21\u578b\u5c42\uff0c\u4e3b\u8981\u5b8c\u6210\u5bf9\u4ef7\u503c\u6570\u636e\u7684\u5c01\u88c5\uff0c\u7136\u540e\u5728\u5199\u5165\u5230\u6570\u636e\u5e93\u4e2d<\/p><\/blockquote>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"python\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">import scrapy\n# \u6b64\u7c7b\u5c31\u662f\u6a21\u578b\u5c42\uff0c\u4e3b\u8981\u5b8c\u6210\u5bf9\u4ef7\u503c\u6570\u636e\u7684\u5c01\u88c5\uff0c\u7136\u540e\u5728\u5199\u5165\u5230\u6570\u636e\u5e93\u4e2d\nclass DoubanItem(scrapy.Item):\n    # define the fields for your item here like:\n    em = title = img = comment = scrapy.Field()<\/pre>\n\n\n\n<h2 class=\"wp-block-heading\">\u5c01\u88c5\u5df2\u722c\u53d6\u6570\u636e<\/h2>\n\n\n\n<blockquote class=\"wp-block-quote is-layout-flow wp-block-quote-is-layout-flow\"><p>\u628a\u722c\u53d6\u7684\u6570\u636e\u5b58\u50a8\u5230DoubanItem\u5bf9\u8c61\u4e2d\uff0c\u7136\u540e\u628a\u5b83\u5728\u4ea4\u7ed9item_list \u6700\u540e\u8fd4\u56deItem_list<\/p><\/blockquote>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"python\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">import scrapy\nfrom lxml import etree\nfrom douban.items import DoubanItem\n\nclass DoubanSpiderSpider(scrapy.Spider):\n    name = 'douban_spider'\n    # \u5141\u8bb8\u4e0b\u8f7d\u7684\u57df\u540d\n    allowed_domains = ['movie.douban.com']\n    # \u914d\u7f6e\u4e0b\u8f7d\u7684\u9996\u5730\u5740\n    start_urls = ['http:\/\/movie.douban.com\/top250']\n    # \u4e0b\u8f7d\u5b8c\u6bd5\u4e4b\u540e\u7684\u89e3\u6790\u65b9\u6cd5 (parse\u5728\u6e90\u7801\u4e2d\u652f\u6301yield)\n    def parse(self, response):\n        # print(response.text)\n        html = etree.HTML(response.text)\n        # \u9996\u5148\u901a\u8fc7xpath\u83b7\u53d6ol\n        li_list = html.xpath(\"\/\/ol[@class='grid_view']\/li\")\n        item_list = []\n        for li in li_list:\n            item = DoubanItem()\n            # em = title = img = comment\n            item['em'] = li.xpath(\".\/\/em\/text()\")[0]\n            item['title'] = li.xpath(\".\/\/span[@class='title']\/text()\")[0]\n            item['img'] = li.xpath(\".\/\/img\/@src\")[0]\n            item['comment'] = li.xpath(\".\/\/div[@class='star']\/span\/text()\")[-1]\n            item_list.append(item)\n        return item_list\n<\/pre>\n\n\n\n<h2 class=\"wp-block-heading\">yield\u7684\u8bed\u6cd5\u4ecb\u7ecd<\/h2>\n\n\n\n<blockquote class=\"wp-block-quote is-layout-flow wp-block-quote-is-layout-flow\"><p>\u4e00\u4e2a\u5e26\u6709 yield \u7684\u51fd\u6570\u5c31\u662f\u4e00\u4e2a generator\uff0c\u5b83\u548c\u666e\u901a\u51fd\u6570\u4e0d\u540c\uff0c\u751f\u6210\u4e00\u4e2a generator \u770b\u8d77\u6765\u50cf\u51fd\u6570\u8c03\u7528\uff0c\u4f46\u4e0d\u4f1a\u6267\u884c\u4efb\u4f55\u51fd\u6570\u4ee3\u7801\uff0c\u76f4\u5230\u5bf9\u5176\u8c03\u7528 next()\uff08\u5728 for \u5faa\u73af\u4e2d\u4f1a\u81ea\u52a8\u8c03\u7528 next()\uff09\u624d\u5f00\u59cb\u6267\u884c\u3002\u867d\u7136\u6267\u884c\u6d41\u7a0b\u4ecd\u6309\u51fd\u6570\u7684\u6d41\u7a0b\u6267\u884c\uff0c\u4f46\u6bcf\u6267\u884c\u5230\u4e00\u4e2a yield \u8bed\u53e5\u5c31\u4f1a\u4e2d\u65ad\uff0c\u5e76\u8fd4\u56de\u4e00\u4e2a\u8fed\u4ee3\u503c\uff0c\u4e0b\u6b21\u6267\u884c\u65f6\u4ece yield \u7684\u4e0b\u4e00\u4e2a\u8bed\u53e5\u7ee7\u7eed\u6267\u884c\u3002\u770b\u8d77\u6765\u5c31\u597d\u50cf\u4e00\u4e2a\u51fd\u6570\u5728\u6b63\u5e38\u6267\u884c\u7684\u8fc7\u7a0b\u4e2d\u88ab yield \u4e2d\u65ad\u4e86\u6570\u6b21\uff0c\u6bcf\u6b21\u4e2d\u65ad\u90fd\u4f1a\u901a\u8fc7 yield \u8fd4\u56de\u5f53\u524d\u7684\u8fed\u4ee3\u503c<\/p><\/blockquote>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"python\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">def fn():\n    for i in range(3):\n        return i  # return \u5219\u8bf4\u660e\u6b64\u6b21\u8c03\u7528\u7ed3\u675f\n#\nprint(type(fn))\nprint(fn())\nprint(fn())\n\ndef fn():\n    for i in range(3):\n        yield i  # yield \u8fd4\u56de\u7684\u662f\u4e00\u4e2a\u8fed\u4ee3\u5668,next()\u65b9\u6cd5\u6765\u6267\u884c\n    yield 'hehe'\n\nprint(type(fn))\ng = fn()\n# \u76f4\u63a5\u8fd4\u56deyield\u540e\u9762\u7684\u503c\nprint(next(g))\n# \u4e0b\u4e00\u6b21next\u4f1a\u7ee7\u7eed\u6267\u884cyield\u540e\u9762\u7684\u8bed\u53e5\nprint(next(g))\nprint(next(g))\nprint(next(g))\n<\/pre>\n","protected":false},"excerpt":{"rendered":"<p>\u521b\u5efaItem\u6a21\u578b\u5c42 \u524d\u9762\u6211\u4eec\u5df2\u7ecf\u53ef\u4ee5\u628a\u7b2c\u4e00\u9875\u7684\u6570\u636e\u83b7\u53d6\uff0c\u4f46\u662f\u4ec5\u4ec5\u662f\u5728\u63a7\u5236\u53f0\u6253\u5370\u3002\u5728Scrapy\u8fd8\u6709\u4e00\u4e2aItem [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[4],"tags":[],"class_list":["post-138","post","type-post","status-publish","format-standard","hentry","category-biji"],"_links":{"self":[{"href":"https:\/\/www.gaoxigang.com\/index.php\/wp-json\/wp\/v2\/posts\/138","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/www.gaoxigang.com\/index.php\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/www.gaoxigang.com\/index.php\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/www.gaoxigang.com\/index.php\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/www.gaoxigang.com\/index.php\/wp-json\/wp\/v2\/comments?post=138"}],"version-history":[{"count":0,"href":"https:\/\/www.gaoxigang.com\/index.php\/wp-json\/wp\/v2\/posts\/138\/revisions"}],"wp:attachment":[{"href":"https:\/\/www.gaoxigang.com\/index.php\/wp-json\/wp\/v2\/media?parent=138"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/www.gaoxigang.com\/index.php\/wp-json\/wp\/v2\/categories?post=138"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/www.gaoxigang.com\/index.php\/wp-json\/wp\/v2\/tags?post=138"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}