{"id":206593,"date":"2020-12-13T09:20:17","date_gmt":"2020-12-13T01:20:17","guid":{"rendered":"https:\/\/lrxjmw.cn\/?p=206593"},"modified":"2020-12-07T10:20:52","modified_gmt":"2020-12-07T02:20:52","slug":"spider-pseudo-elements","status":"publish","type":"post","link":"https:\/\/lrxjmw.cn\/spider-pseudo-elements.html","title":{"rendered":"\u5982\u4f55\u8ba9\u722c\u866b\u6b63\u786e\u63d0\u53d6\u4f2a\u5143\u7d20"},"content":{"rendered":"\n\n\n
\u5bfc\u8bfb<\/td>\n\u6211\u4eec\u6765\u770b\u4e00\u4e2a\u7f51\u9875\uff0c\u5927\u5bb6\u60f3\u60f3\u4f7f\u7528 XPath \u600e\u4e48\u6293\u53d6\u3002\u53ef\u4ee5\u770b\u5230\uff0c\u5728\u6e90\u4ee3\u7801\u91cc\u9762\u6ca1\u6709\u8bf7\u6293\u53d6\u6211!\u8fd9\u6bb5\u6587\u5b57\u3002\u96be\u9053\u8fd9\u4e2a\u7f51\u9875\u662f\u5f02\u6b65\u52a0\u8f7d?\u6211\u4eec\u73b0\u5728\u6765\u770b\u4e00\u4e0b\u7f51\u9875\u7684\u8bf7\u6c42\u3002<\/strong><\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n

\"\"<\/p>\n

\u6211\u4eec\u6765\u770b\u4e00\u4e2a\u7f51\u9875\uff0c\u5927\u5bb6\u60f3\u60f3\u4f7f\u7528 XPath \u600e\u4e48\u6293\u53d6\u3002<\/p>\n

\"\"<\/p>\n

\u53ef\u4ee5\u770b\u5230\uff0c\u5728\u6e90\u4ee3\u7801\u91cc\u9762\u6ca1\u6709\u8bf7\u6293\u53d6\u6211!\u8fd9\u6bb5\u6587\u5b57\u3002\u96be\u9053\u8fd9\u4e2a\u7f51\u9875\u662f\u5f02\u6b65\u52a0\u8f7d?\u6211\u4eec\u73b0\u5728\u6765\u770b\u4e00\u4e0b\u7f51\u9875\u7684\u8bf7\u6c42\uff1a<\/p>\n

\"\"<\/p>\n

\u7f51\u9875\u4e5f\u6ca1\u6709\u53d1\u8d77\u4efb\u4f55\u7684Ajax \u8bf7\u6c42\u3002\u90a3\u4e48\uff0c\u8fd9\u6bb5\u6587\u5b57\u662f\u4ece\u54ea\u91cc\u6765\u7684?<\/p>\n

\u6211\u4eec\u6765\u770b\u4e00\u4e0b\u8fd9\u4e2a\u7f51\u9875\u5bf9\u5e94\u7684 HTML\uff1a<\/p>\n

\"\"<\/p>\n

\u6574\u4e2a HTML \u91cc\u9762\uff0c\u751a\u81f3\u8fde JavaScript \u90fd\u6ca1\u6709\u3002\u90a3\u4e48\u8fd9\u6bb5\u6587\u5b57\u662f\u54ea\u91cc\u6765\u7684\u5462?<\/p>\n

\u6709\u70b9\u7ecf\u9a8c\u7684\u540c\u5b66\uff0c\u53ef\u80fd\u4f1a\u60f3\u5230\u770b\u4e00\u4e0b\u8fd9\u4e2aexample.css\u6587\u4ef6\uff0c\u5176\u5185\u5bb9\u5982\u4e0b\uff1a<\/p>\n

\"\"<\/p>\n

\u6ca1\u9519\uff0c\u6587\u5b57\u786e\u5b9e\u5728\u8fd9\u91cc\u9762\u3002\u5176\u4e2d::after\uff0c\u6211\u4eec\u79f0\u4e4b\u4e3a\u4f2a\u5143\u7d20(Pseudo-element)[1]\u3002<\/p>\n

\u5bf9\u4e8e\u4f2a\u5143\u7d20\u91cc\u9762\u7684\u6587\u5b57\uff0c\u5e94\u8be5\u5982\u4f55\u63d0\u53d6\u5462?\u5f53\u7136\uff0c\u4f60\u53ef\u4ee5\u4f7f\u7528\u6b63\u5219\u8868\u8fbe\u5f0f\u6765\u63d0\u53d6\u3002\u4e0d\u8fc7\u6211\u4eec\u4eca\u5929\u4e0d\u51c6\u5907\u8bb2\u8fd9\u4e2a\u3002<\/p>\n

XPath \u6ca1\u6709\u529e\u6cd5\u63d0\u53d6\u4f2a\u5143\u7d20\uff0c\u56e0\u4e3a XPath \u53ea\u80fd\u63d0\u53d6 Dom \u6811\u4e2d\u7684\u5185\u5bb9\uff0c\u4f46\u662f\u4f2a\u5143\u7d20\u662f\u4e0d\u5c5e\u4e8e Dom \u6811\u7684\uff0c\u56e0\u6b64\u65e0\u6cd5\u63d0\u53d6\u3002\u8981\u63d0\u53d6\u4f2a\u5143\u7d20\uff0c\u9700\u8981\u4f7f\u7528 CSS \u9009\u62e9\u5668\u3002<\/p>\n

\u7531\u4e8e\u7f51\u9875\u7684 HTML \u4e0e CSS \u662f\u5206\u5f00\u7684\u3002\u5982\u679c\u6211\u4eec\u4f7f\u7528 requests \u6216\u8005 Scrapy\uff0c\u53ea\u80fd\u5355\u72ec\u62ff\u5230 HTML \u548c CSS\u3002\u5355\u72ec\u62ff\u5230 HTML \u6ca1\u6709\u4efb\u4f55\u4f5c\u7528\uff0c\u56e0\u4e3a\u6570\u636e\u6839\u672c\u4e0d\u5728\u91cc\u9762\u3002\u5355\u72ec\u62ff\u5230 CSS\uff0c\u867d\u7136\u6709\u6570\u636e\uff0c\u4f46\u5982\u679c\u4e0d\u7528\u6b63\u5219\u8868\u8fbe\u5f0f\u7684\u8bdd\uff0c\u91cc\u9762\u7684\u6570\u636e\u62ff\u4e0d\u51fa\u6765\u3002\u6240\u4ee5 BeautifulSoup4\u7684 CSS \u9009\u62e9\u5668\u4e5f\u6ca1\u6709\u4ec0\u4e48\u4f5c\u7528\u3002\u6240\u4ee5\u6211\u4eec\u9700\u8981\u628a CSS \u548c HTML \u653e\u5230\u4e00\u8d77\u6765\u6e32\u67d3\uff0c\u7136\u540e\u518d\u4f7f\u7528JavaScript \u7684 CSS \u9009\u62e9\u5668\u627e\u5230\u9700\u8981\u63d0\u53d6\u7684\u5185\u5bb9\u3002<\/p>\n

\u9996\u5148\u6211\u4eec\u6765\u770b\u4e00\u4e0b\uff0c\u4e3a\u4e86\u63d0\u53d6\u8fd9\u4e2a\u4f2a\u5143\u7d20\u7684\u503c\uff0c\u6211\u4eec\u9700\u8981\u4e0b\u9762\u8fd9\u6bb5Js \u4ee3\u7801\uff1a<\/p>\n

window.getComputedStyle(document.querySelector('.fake_element'),':after').getPropertyValue('content')<\/pre>\n

\u5176\u4e2d\uff0cducument.querySelector\u7684\u7b2c\u4e00\u4e2a\u53c2\u6570.fake_element\u5c31\u8868\u793a\u503c\u4e3afake_element\u7684 class \u5c5e\u6027\u3002\u7b2c\u4e8c\u4e2a\u53c2\u6570\u5c31\u662f\u4f2a\u5143\u7d20:after\u3002\u8fd0\u884c\u6548\u679c\u5982\u4e0b\u56fe\u6240\u793a\uff1a<\/p>\n

\"\"<\/p>\n

\u4e3a\u4e86\u80fd\u591f\u8fd0\u884c\u8fd9\u6bb5 JavaScript\uff0c\u6211\u4eec\u9700\u8981\u4f7f\u7528\u6a21\u62df\u6d4f\u89c8\u5668\uff0c\u65e0\u8bba\u662f Selenium \u8fd8\u662f Puppeteer \u90fd\u53ef\u4ee5\u3002\u8fd9\u91cc\u4ee5 Selenium \u4e3a\u4f8b\u3002<\/p>\n

\u5728 Selenium \u8981\u6267\u884c Js\uff0c\u9700\u8981\u4f7f\u7528driver.execute_script()\u65b9\u6cd5\uff0c\u4ee3\u7801\u5982\u4e0b\uff1a<\/p>\n

\"\"<\/p>\n

\u63d0\u53d6\u51fa\u6765\u7684\u5185\u5bb9\u6700\u5916\u5c42\u4f1a\u5305\u4e0a\u4e00\u5bf9\u53cc\u5f15\u53f7\uff0c\u62ff\u5230\u4ee5\u540e\u79fb\u9664\u5916\u4fa7\u7684\u53cc\u5f15\u53f7\uff0c\u5c31\u662f\u6211\u4eec\u5728\u7f51\u9875\u4e0a\u770b\u5230\u7684\u5185\u5bb9\u4e86\u3002<\/p>\n","protected":false},"excerpt":{"rendered":"

\u6211\u4eec\u6765\u770b\u4e00\u4e2a\u7f51\u9875\uff0c\u5927\u5bb6\u60f3\u60f3\u4f7f\u7528 XPath \u600e\u4e48\u6293\u53d6\u3002 \u53ef\u4ee5\u770b\u5230\uff0c\u5728\u6e90\u4ee3\u7801\u91cc\u9762\u6ca1\u6709\u8bf7\u6293\u53d6\u6211!\u8fd9\u6bb5\u6587\u5b57\u3002\u96be\u9053\u8fd9\u4e2a […]<\/p>\n","protected":false},"author":317,"featured_media":169135,"comment_status":"closed","ping_status":"closed","sticky":false,"template":"","format":"standard","meta":{"_acf_changed":false,"footnotes":""},"categories":[55],"tags":[],"class_list":["post-206593","post","type-post","status-publish","format-standard","has-post-thumbnail","hentry","category-thread"],"acf":[],"_links":{"self":[{"href":"https:\/\/lrxjmw.cn\/wp-json\/wp\/v2\/posts\/206593","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/lrxjmw.cn\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/lrxjmw.cn\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/lrxjmw.cn\/wp-json\/wp\/v2\/users\/317"}],"replies":[{"embeddable":true,"href":"https:\/\/lrxjmw.cn\/wp-json\/wp\/v2\/comments?post=206593"}],"version-history":[{"count":2,"href":"https:\/\/lrxjmw.cn\/wp-json\/wp\/v2\/posts\/206593\/revisions"}],"predecessor-version":[{"id":206602,"href":"https:\/\/lrxjmw.cn\/wp-json\/wp\/v2\/posts\/206593\/revisions\/206602"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/lrxjmw.cn\/wp-json\/wp\/v2\/media\/169135"}],"wp:attachment":[{"href":"https:\/\/lrxjmw.cn\/wp-json\/wp\/v2\/media?parent=206593"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/lrxjmw.cn\/wp-json\/wp\/v2\/categories?post=206593"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/lrxjmw.cn\/wp-json\/wp\/v2\/tags?post=206593"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}