{"id":135,"date":"2020-07-10T07:49:34","date_gmt":"2020-07-09T23:49:34","guid":{"rendered":"http:\/\/www.gaoxigang.com\/?p=135"},"modified":"2020-07-10T07:49:34","modified_gmt":"2020-07-09T23:49:34","slug":"scrapy-%e7%88%ac%e8%99%ab%e6%a1%86%e6%9e%b6-xpath%e8%8e%b7%e5%8f%96%e6%9c%89%e4%bb%b7%e5%80%bc%e7%9a%84%e6%95%b0%e6%8d%ae","status":"publish","type":"post","link":"https:\/\/www.gaoxigang.com\/index.php\/2020\/07\/10\/scrapy-%e7%88%ac%e8%99%ab%e6%a1%86%e6%9e%b6-xpath%e8%8e%b7%e5%8f%96%e6%9c%89%e4%bb%b7%e5%80%bc%e7%9a%84%e6%95%b0%e6%8d%ae\/","title":{"rendered":"Scrapy \u722c\u866b\u6846\u67b6-xpath\u83b7\u53d6\u6709\u4ef7\u503c\u7684\u6570\u636e"},"content":{"rendered":"<h1>\u8c46\u74e3\u7535\u5f71\u9875\u5206\u6790<\/h1>\n<blockquote><p>\u5df2\u7ecf\u5411\u60a8\u8bb2\u89e3\u4e86\u5982\u4f55\u5728 XML \u6587\u6863\u4e2d\u67e5\u627e\u4fe1\u606f\uff0c\u5177\u4f53\u6765\u8bf4\u5df2\u7ecf\u5b66\u4f1a\u4e86\u5982\u679c\u83b7\u53d6\u5143\u7d20\u3001\u5185\u5bb9\u3001\u5c5e\u6027\uff0c\u5e76\u4e14\u8fd8\u77e5\u9053\u5982\u4f55\u901a\u8fc7\u6807\u7b7e\u7684\u5c5e\u6027\u6765\u8fdb\u7b5b\u9009\u4e0e\u8fc7\u6ee4\u3002\u672c\u7ae0\u8282\u6765\u8bb2\u89e3\u5982\u4f55\u901a\u8fc7xpath\u83b7\u53d6\u8c46\u74e3\u7684\u6570\u636e<\/p><\/blockquote>\n<h1>\u7b2c\u4e00\u9875\u4e0b\u8f7d\u5b9e\u73b0<\/h1>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"python\"># -*- coding: utf-8 -*-\nimport scrapy\nfrom lxml import etree\nfrom douban.items import DoubanItem\n\nclass DoubanSpiderSpider(scrapy.Spider):\n    name = 'douban_spider'\n    # \u5141\u8bb8\u4e0b\u8f7d\u7684\u57df\u540d\n    allowed_domains = ['movie.douban.com']\n    # \u914d\u7f6e\u4e0b\u8f7d\u7684\u9996\u5730\u5740\n    start_urls = ['http:\/\/movie.douban.com\/top250']\n    def parse(self, response):\n        # print(response.text)\n        html = etree.HTML(response.text)\n        # \u9996\u5148\u901a\u8fc7xpath\u83b7\u53d6ol\n        li_list = html.xpath(\"\/\/ol[@class='grid_view']\/li\")\n        for li in li_list:\n            item = DoubanItem()\n            # em = title = img = comment\n            print(li.xpath(\".\/\/em\/text()\")[0])\n            print(li.xpath(\".\/\/span[@class='title']\/text()\")[0])\n            print(li.xpath(\".\/\/img\/@src\")[0])\n            print(li.xpath(\".\/\/div[@class='star']\/span\/text()\")[-1])\n<\/pre>\n<h1>\u524d5\u90e8\u7535\u5f71\u6570\u636e\u5982\u4e0b<\/h1>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\">1\n\u8096\u7533\u514b\u7684\u6551\u8d4e\nhttps:\/\/img3.doubanio.com\/view\/photo\/s_ratio_poster\/public\/p480747492.jpg\n1551310\u4eba\u8bc4\u4ef7\n2\n\u9738\u738b\u522b\u59ec\nhttps:\/\/img3.doubanio.com\/view\/photo\/s_ratio_poster\/public\/p2561716440.jpg\n1146654\u4eba\u8bc4\u4ef7\n3\n\u8fd9\u4e2a\u6740\u624b\u4e0d\u592a\u51b7\nhttps:\/\/img3.doubanio.com\/view\/photo\/s_ratio_poster\/public\/p511118051.jpg\n1399607\u4eba\u8bc4\u4ef7\n4\n\u963f\u7518\u6b63\u4f20\nhttps:\/\/img3.doubanio.com\/view\/photo\/s_ratio_poster\/public\/p2559011361.jpg\n1209414\u4eba\u8bc4\u4ef7\n5\n\u7f8e\u4e3d\u4eba\u751f\nhttps:\/\/img3.doubanio.com\/view\/photo\/s_ratio_poster\/public\/p510861873.jpg\n708487\u4eba\u8bc4\u4ef7\n<\/pre>\n<p>&nbsp;<\/p>\n","protected":false},"excerpt":{"rendered":"<p>\u8c46\u74e3\u7535\u5f71\u9875\u5206\u6790 \u5df2\u7ecf\u5411\u60a8\u8bb2\u89e3\u4e86\u5982\u4f55\u5728 XML \u6587\u6863\u4e2d\u67e5\u627e\u4fe1\u606f\uff0c\u5177\u4f53\u6765\u8bf4\u5df2\u7ecf\u5b66\u4f1a\u4e86\u5982\u679c\u83b7\u53d6\u5143\u7d20\u3001\u5185\u5bb9\u3001\u5c5e\u6027\uff0c\u5e76\u4e14 [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[4],"tags":[],"class_list":["post-135","post","type-post","status-publish","format-standard","hentry","category-biji"],"_links":{"self":[{"href":"https:\/\/www.gaoxigang.com\/index.php\/wp-json\/wp\/v2\/posts\/135","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/www.gaoxigang.com\/index.php\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/www.gaoxigang.com\/index.php\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/www.gaoxigang.com\/index.php\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/www.gaoxigang.com\/index.php\/wp-json\/wp\/v2\/comments?post=135"}],"version-history":[{"count":0,"href":"https:\/\/www.gaoxigang.com\/index.php\/wp-json\/wp\/v2\/posts\/135\/revisions"}],"wp:attachment":[{"href":"https:\/\/www.gaoxigang.com\/index.php\/wp-json\/wp\/v2\/media?parent=135"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/www.gaoxigang.com\/index.php\/wp-json\/wp\/v2\/categories?post=135"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/www.gaoxigang.com\/index.php\/wp-json\/wp\/v2\/tags?post=135"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}