{"id":206593,"date":"2020-12-13T09:20:17","date_gmt":"2020-12-13T01:20:17","guid":{"rendered":"https:\/\/lrxjmw.cn\/?p=206593"},"modified":"2020-12-07T10:20:52","modified_gmt":"2020-12-07T02:20:52","slug":"spider-pseudo-elements","status":"publish","type":"post","link":"https:\/\/lrxjmw.cn\/spider-pseudo-elements.html","title":{"rendered":"\u5982\u4f55\u8ba9\u722c\u866b\u6b63\u786e\u63d0\u53d6\u4f2a\u5143\u7d20"},"content":{"rendered":"
\u5bfc\u8bfb<\/td>\n | \u6211\u4eec\u6765\u770b\u4e00\u4e2a\u7f51\u9875\uff0c\u5927\u5bb6\u60f3\u60f3\u4f7f\u7528 XPath \u600e\u4e48\u6293\u53d6\u3002\u53ef\u4ee5\u770b\u5230\uff0c\u5728\u6e90\u4ee3\u7801\u91cc\u9762\u6ca1\u6709\u8bf7\u6293\u53d6\u6211!\u8fd9\u6bb5\u6587\u5b57\u3002\u96be\u9053\u8fd9\u4e2a\u7f51\u9875\u662f\u5f02\u6b65\u52a0\u8f7d?\u6211\u4eec\u73b0\u5728\u6765\u770b\u4e00\u4e0b\u7f51\u9875\u7684\u8bf7\u6c42\u3002<\/strong><\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n <\/p>\n \u6211\u4eec\u6765\u770b\u4e00\u4e2a\u7f51\u9875\uff0c\u5927\u5bb6\u60f3\u60f3\u4f7f\u7528 XPath \u600e\u4e48\u6293\u53d6\u3002<\/p>\n <\/p>\n \u53ef\u4ee5\u770b\u5230\uff0c\u5728\u6e90\u4ee3\u7801\u91cc\u9762\u6ca1\u6709\u8bf7\u6293\u53d6\u6211!\u8fd9\u6bb5\u6587\u5b57\u3002\u96be\u9053\u8fd9\u4e2a\u7f51\u9875\u662f\u5f02\u6b65\u52a0\u8f7d?\u6211\u4eec\u73b0\u5728\u6765\u770b\u4e00\u4e0b\u7f51\u9875\u7684\u8bf7\u6c42\uff1a<\/p>\n <\/p>\n \u7f51\u9875\u4e5f\u6ca1\u6709\u53d1\u8d77\u4efb\u4f55\u7684Ajax \u8bf7\u6c42\u3002\u90a3\u4e48\uff0c\u8fd9\u6bb5\u6587\u5b57\u662f\u4ece\u54ea\u91cc\u6765\u7684?<\/p>\n \u6211\u4eec\u6765\u770b\u4e00\u4e0b\u8fd9\u4e2a\u7f51\u9875\u5bf9\u5e94\u7684 HTML\uff1a<\/p>\n <\/p>\n \u6574\u4e2a HTML \u91cc\u9762\uff0c\u751a\u81f3\u8fde JavaScript \u90fd\u6ca1\u6709\u3002\u90a3\u4e48\u8fd9\u6bb5\u6587\u5b57\u662f\u54ea\u91cc\u6765\u7684\u5462?<\/p>\n \u6709\u70b9\u7ecf\u9a8c\u7684\u540c\u5b66\uff0c\u53ef\u80fd\u4f1a\u60f3\u5230\u770b\u4e00\u4e0b\u8fd9\u4e2aexample.css\u6587\u4ef6\uff0c\u5176\u5185\u5bb9\u5982\u4e0b\uff1a<\/p>\n <\/p>\n \u6ca1\u9519\uff0c\u6587\u5b57\u786e\u5b9e\u5728\u8fd9\u91cc\u9762\u3002\u5176\u4e2d::after\uff0c\u6211\u4eec\u79f0\u4e4b\u4e3a\u4f2a\u5143\u7d20(Pseudo-element)[1]\u3002<\/p>\n \u5bf9\u4e8e\u4f2a\u5143\u7d20\u91cc\u9762\u7684\u6587\u5b57\uff0c\u5e94\u8be5\u5982\u4f55\u63d0\u53d6\u5462?\u5f53\u7136\uff0c\u4f60\u53ef\u4ee5\u4f7f\u7528\u6b63\u5219\u8868\u8fbe\u5f0f\u6765\u63d0\u53d6\u3002\u4e0d\u8fc7\u6211\u4eec\u4eca\u5929\u4e0d\u51c6\u5907\u8bb2\u8fd9\u4e2a\u3002<\/p>\n XPath \u6ca1\u6709\u529e\u6cd5\u63d0\u53d6\u4f2a\u5143\u7d20\uff0c\u56e0\u4e3a XPath \u53ea\u80fd\u63d0\u53d6 Dom \u6811\u4e2d\u7684\u5185\u5bb9\uff0c\u4f46\u662f\u4f2a\u5143\u7d20\u662f\u4e0d\u5c5e\u4e8e Dom \u6811\u7684\uff0c\u56e0\u6b64\u65e0\u6cd5\u63d0\u53d6\u3002\u8981\u63d0\u53d6\u4f2a\u5143\u7d20\uff0c\u9700\u8981\u4f7f\u7528 CSS \u9009\u62e9\u5668\u3002<\/p>\n \u7531\u4e8e\u7f51\u9875\u7684 HTML \u4e0e CSS \u662f\u5206\u5f00\u7684\u3002\u5982\u679c\u6211\u4eec\u4f7f\u7528 requests \u6216\u8005 Scrapy\uff0c\u53ea\u80fd\u5355\u72ec\u62ff\u5230 HTML \u548c CSS\u3002\u5355\u72ec\u62ff\u5230 HTML \u6ca1\u6709\u4efb\u4f55\u4f5c\u7528\uff0c\u56e0\u4e3a\u6570\u636e\u6839\u672c\u4e0d\u5728\u91cc\u9762\u3002\u5355\u72ec\u62ff\u5230 CSS\uff0c\u867d\u7136\u6709\u6570\u636e\uff0c\u4f46\u5982\u679c\u4e0d\u7528\u6b63\u5219\u8868\u8fbe\u5f0f\u7684\u8bdd\uff0c\u91cc\u9762\u7684\u6570\u636e\u62ff\u4e0d\u51fa\u6765\u3002\u6240\u4ee5 BeautifulSoup4\u7684 CSS \u9009\u62e9\u5668\u4e5f\u6ca1\u6709\u4ec0\u4e48\u4f5c\u7528\u3002\u6240\u4ee5\u6211\u4eec\u9700\u8981\u628a CSS \u548c HTML \u653e\u5230\u4e00\u8d77\u6765\u6e32\u67d3\uff0c\u7136\u540e\u518d\u4f7f\u7528JavaScript \u7684 CSS \u9009\u62e9\u5668\u627e\u5230\u9700\u8981\u63d0\u53d6\u7684\u5185\u5bb9\u3002<\/p>\n \u9996\u5148\u6211\u4eec\u6765\u770b\u4e00\u4e0b\uff0c\u4e3a\u4e86\u63d0\u53d6\u8fd9\u4e2a\u4f2a\u5143\u7d20\u7684\u503c\uff0c\u6211\u4eec\u9700\u8981\u4e0b\u9762\u8fd9\u6bb5Js \u4ee3\u7801\uff1a<\/p>\n window.getComputedStyle(document.querySelector('.fake_element'),':after').getPropertyValue('content')<\/pre>\n |