{"id":245277,"date":"2022-07-01T08:02:18","date_gmt":"2022-07-01T00:02:18","guid":{"rendered":"https:\/\/lrxjmw.cn\/?p=245277"},"modified":"2022-06-23T18:05:09","modified_gmt":"2022-06-23T10:05:09","slug":"etl-summary","status":"publish","type":"post","link":"https:\/\/lrxjmw.cn\/etl-summary.html","title":{"rendered":"\u4e94\u79cd\u6d41\u5f0fETL\u6a21\u5f0f\u603b\u7ed3"},"content":{"rendered":"\n\n\n
\u5bfc\u8bfb<\/td>\n\u5728\u5b9e\u9645\u4f7f\u7528\u4e2d\uff0cETL \u4e2d\u7684\u201cT\u201d\u4ee3\u8868\u7531\u539f\u59cb\u64cd\u4f5c\u7ec4\u88c5\u800c\u6210\u7684\u5404\u79cd\u6a21\u5f0f\u3002\u5728\u672c\u6587\u4e2d\uff0c\u6211\u4eec\u5c06\u63a2\u7d22\u8fd9\u4e9b\u64cd\u4f5c\u5e76\u67e5\u770b\u5982\u4f55\u5c06\u5b83\u4eec\u5b9e\u73b0\u4e3a SQL \u8bed\u53e5\u7684\u793a\u4f8b\u3002<\/strong><\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n

\u200b1970 \u5e74\u4ee3\u7684\u8bb8\u591a\u8ba1\u7b97\u6982\u5ff5\u5df2\u7ecf\u8fc7\u65f6\uff0c\u4f46ETL (Extract-Transform-Load)\u53ca\u5176\u6700\u8fd1\u7684 anagram shuffle ELT\u5e76\u975e\u5982\u6b64\uff0c\u5b83\u5728\u76ee\u7684\u5730\u4e0e\u98de\u884c\u4e2d\u64cd\u7eb5\u6570\u636e\u3002ETL \u548c ELT \u4f20\u7edf\u4e0a\u662f\u8ba1\u5212\u7684\u6279\u5904\u7406\u64cd\u4f5c\uff0c\u4f46\u968f\u7740\u5bf9\u59cb\u7ec8\u5728\u7ebf\u3001\u59cb\u7ec8\u6700\u65b0\u7684\u6570\u636e\u670d\u52a1\u7684\u9700\u6c42\u6210\u4e3a\u5e38\u6001\uff0c\u5728\u6570\u636e\u6d41\u4e0a\u64cd\u4f5c\u7684\u5b9e\u65f6 ELT \u662f\u8bb8\u591a\u7ec4\u7ec7\u7684\u76ee\u6807\u2014\u2014\u5982\u679c\u4e0d\u662f\u73b0\u5b9e\u7684\u8bdd\u3002<\/p>\n

\u5728\u5b9e\u9645\u4f7f\u7528\u4e2d\uff0cETL \u4e2d\u7684\u201cT\u201d\u4ee3\u8868\u7531\u539f\u59cb\u64cd\u4f5c\u7ec4\u88c5\u800c\u6210\u7684\u5404\u79cd\u6a21\u5f0f\u3002\u5728\u672c\u6587\u4e2d\uff0c\u6211\u4eec\u5c06\u63a2\u7d22\u8fd9\u4e9b\u64cd\u4f5c\u5e76\u67e5\u770b\u5982\u4f55\u5c06\u5b83\u4eec\u5b9e\u73b0\u4e3a SQL \u8bed\u53e5\u7684\u793a\u4f8b\u3002<\/p>\n

\u4f7f\u7528 SQL \u8bed\u53e5\u8fdb\u884c\u8f6c\u6362\uff1f<\/strong><\/div>\n

\u662f\u7684\uff01SQL \u5c06\u58f0\u660e\u6027\u8bed\u8a00\u7684\u5f3a\u5927\u548c\u7b80\u6d01\u6027\u4e0e\u4efb\u4f55\u4f7f\u7528\u4ee3\u7801\u6216\u6570\u636e\u7684\u4eba\u7684\u666e\u904d\u6280\u80fd\u76f8\u7ed3\u5408\u3002\u4e0e\u60a8\u53ef\u80fd\u7528\u4f5c\u66ff\u4ee3\u7684\u51e0\u4e4e\u4efb\u4f55\u7f16\u7a0b\u8bed\u8a00\u4e0d\u540c\uff0cSQL \u7684\u666e\u53ca\u8981\u5f52\u529f\u4e8e\u5c06\u8fd1 50 \u5e74\u7684\u5bff\u547d\u2014\u2014\u8ba1\u7b97\u884c\u4e1a\u4e2d\u7684\u51e0\u4e4e\u6bcf\u4e2a\u4eba\u90fd\u66fe\u5728\u67d0\u4e2a\u65f6\u5019\u4f7f\u7528\u8fc7\u5b83\u3002SQL \u7684\u5f3a\u5927\u529f\u80fd\u548c\u666e\u904d\u6027\u610f\u5473\u7740\u5b83\u65e0\u5904\u4e0d\u5728\uff0c\u751a\u81f3\u5728\u6784\u5efa\u6700\u65b0\u5f00\u53d1\u4eba\u5458\u6280\u672f\u548c\u670d\u52a1\u7684\u516c\u53f8\u4e2d\u4e5f\u662f\u5982\u6b64\u3002\u5f53\u901a\u8fc7\u51fd\u6570\u589e\u5f3a\u65f6\uff0cSQL \u53d8\u5f97\u66f4\u52a0\u5f3a\u5927\u3002<\/p>\n

\u7ba1\u9053\u6a21\u5f0f<\/strong><\/div>\n
1\uff1a\u8fc7\u6ee4\u5668<\/strong><\/span><\/div>\n

\"\"<\/p>\n

\u8fc7\u6ee4\u5668\u4ece\u6d41\u4e2d\u5220\u9664\u4e0d\u9700\u8981\u7684\u8bb0\u5f55\uff0c\u5220\u9664\u4e0e SQL where\u5b50\u53e5\u4e2d\u7684\u201c\u89c4\u5219\u201d\u4e0d\u5339\u914d\u7684\u8bb0\u5f55\u3002\u8fc7\u6ee4\u5668\u901a\u5e38\u7528\u4e8e\u6291\u5236\u654f\u611f\u8bb0\u5f55\u4ee5\u786e\u4fdd\u5408\u89c4\u6027\uff0c\u6216\u51cf\u5c11\u76ee\u6807\u7cfb\u7edf\u4e0a\u7684\u5904\u7406\u8d1f\u8f7d\u6216\u5b58\u50a8\u9700\u6c42\u3002<\/p>\n

 Filter only records pertaining to the application\r\ninsert into application_events\r\nselect * from http_eventswhere hostname = 'app.decodable.co'\r\nFilter only records that modify the inventory\r\ninsert into inventory_updates\r\nselect * from http_eventswhere hostname = 'api.mycompany.com' and\r\npath like '\/v1\/inventory%' and\r\nmethod in ( 'POST', 'PUT', 'DELETE', 'PATCH' )<\/pre>\n
2\uff1a\u8def\u7ebf<\/strong><\/span><\/div>\n

\"\"<\/p>\n

Route \u6a21\u5f0f\u4ece\u4e00\u4e2a\u6216\u591a\u4e2a\u8f93\u5165\u6d41\u521b\u5efa\u591a\u4e2a\u8f93\u51fa\u6d41\uff0c\u6839\u636e\u4e00\u7ec4\u89c4\u5219\u5c06\u8bb0\u5f55\u5b9a\u5411\u5230\u6b63\u786e\u7684\u76ee\u7684\u5730\u3002\u6b64\u6a21\u5f0f\u5b9e\u9645\u4e0a\u7531\u591a\u4e2a\u8fc7\u6ee4\u5668\u7ec4\u6210\uff0c\u5b83\u4eec\u90fd\u53ef\u4ee5\u67e5\u770b\u6bcf\u4e2a\u8f93\u5165\u8bb0\u5f55\uff0c\u4f46\u6bcf\u4e2a\u8fc7\u6ee4\u5668\u4ec5\u4f20\u8f93\u4e0e\u8be5\u7279\u5b9a\u76ee\u7684\u5730\u7684\u89c4\u5219\u5339\u914d\u7684\u90a3\u4e9b\u8bb0\u5f55\u3002<\/p>\n

 Route security-related HTTP events\r\ninsert into security_events\r\nselect * from http_eventswhere path like '\/login%' or\r\npath like '\/billing\/cc%'\r\nRoute app-related HTTP events\r\ninsert into application_events\r\nselect * from http_eventswhere hostname = 'app.decodable.co'\r\nRoute requests to Customer Success if it looks like the user needs help\r\ninsert into cs_alerts\r\nselect * from http_events\r\nwhere response_code between 500 and 599 or -- any server failure\r\n( path = '\/signup' and response_code != 200 ) or -- failed to sign up for any reason<\/pre>\n
3\uff1a\u53d8\u6362<\/strong><\/span><\/div>\n

\"\"<\/p>\n

\u8f6c\u6362\u7ba1\u9053\u901a\u8fc7\u4fee\u6539\u8f93\u5165\u8bb0\u5f55\u6765\u521b\u5efa\u8f93\u51fa\u8bb0\u5f55\u3002\u901a\u5e38\u8fd9\u5c06\u5bfc\u81f4 1:1 \u4f20\u8f93\uff0c\u4f46\u5728\u67d0\u4e9b\u60c5\u51b5\u4e0b\uff0c\u8f93\u51fa\u6765\u81ea\u591a\u4e2a\u8f93\u5165\u8bb0\u5f55\uff0c\u56e0\u6b64\u53ef\u80fd\u5b58\u5728 1:many \u5173\u7cfb\u3002\u5728\u8fd9\u91cc\uff0c\u6211\u4eec\u5c06\u8c03\u7528\u4e09\u4e2a\u4e13\u95e8\u7684\u8f6c\u6362\uff1a<\/p>\n

\u53d8\u6362\uff1a\u63d0\u53d6<\/p>\n

\"\"<\/p>\n

\u89e3\u6790\u8f93\u5165\u8bb0\u5f55\uff0c\u4ece\u8f93\u5165\u8bb0\u5f55\u4e2d\u63d0\u53d6\u6570\u636e\u5e76\u5c06\u5176\u7528\u4f5c\u4e30\u5bcc\u6d3e\u751f\u8f93\u51fa\u8bb0\u5f55\u7684\u57fa\u7840\u3002<\/p>\n

 Parse timestamp and action\r\ninsert into user_events\r\nselect\r\nto_date(fields['ts'], 'YYYY-MM-DD''T''HH:MI:SS') as ts,\r\nfields['user_id']    as user_id,\r\nfields['path']       as path,  case fields['method']    when 'GET'         then 'read'\r\nwhen 'POST', 'PUT' then 'modify'\r\nwhen 'DELETE'      then 'delete'\r\nend as actionfrom (  select\r\ngrok(\r\nbody,      '\\[${ISO8661_DATETIME:ts} ${DATA:method} \"${PATH:path}\" uid:${DATA:user_id}'\r\nas fields  from http_event\r\n<\/pre>\n

\u53d8\u6362\uff1a\u5f52\u4e00\u5316<\/p>\n

\"\"<\/p>\n

\u4f20\u5165\u7684\u6570\u636e\u8bb0\u5f55\u901a\u5e38\u9700\u8981\u9488\u5bf9\u6a21\u5f0f\u8fdb\u884c\u89c4\u8303\u5316\uff0c\u4ee5\u4fbf\u76ee\u6807\u7cfb\u7edf\u5904\u7406\u5b83\u4eec\u3002\u7f3a\u5c11\u7684\u5b57\u6bb5\u53ef\u80fd\u9700\u8981\u586b\u5145\u9ed8\u8ba4\u503c\uff0c\u53ef\u80fd\u9700\u8981\u5220\u9664\u53ef\u9009\u5b57\u6bb5\uff0c\u5e76\u5f3a\u5236\u6267\u884c\u6570\u636e\u7c7b\u578b\u3002<\/p>\n

 Cleanse incoming data for downstream processes\r\ninsert into sensor_readings\r\nselect\r\ncast(ifnull(sensor_id, '0') as bigint) as sensor_id,  lower(trim(name))                      as name,  cast(`value` as bigint)                as reading\r\nfrom raw_sensor_readings<\/pre>\n

\u8f6c\u6362\uff1a\u533f\u540d\u5316<\/p>\n

\"\"<\/p>\n

\u5728\u76ee\u6807\u7cfb\u7edf\u4e0d\u9700\u8981\u4fe1\u606f\u6765\u5b8c\u6210\u5904\u7406\u7684\u60c5\u51b5\u4e0b\uff0c\u533f\u540d\u7ba1\u9053\u53ea\u662f\u51fa\u4e8e\u5408\u89c4\u3001\u76d1\u7ba1\u6216\u9690\u79c1\u539f\u56e0\u800c\u6d88\u9664\u4e86\u654f\u611f\u5b57\u6bb5\u3002<\/p>\n

Anonymize SSNs and zip codes\r\ninsert into user_events_masked\r\nselect\r\nuser_id,\r\nusername,  overlay(ssn placing '*' from 1 for 12) as ssn,  substring(zip_code from 1 for 2)       as zip_code_1,\r\naction\r\nfrom user_events<\/pre>\n
4\uff1a\u805a\u5408<\/strong><\/span><\/div>\n

\u56fe\u7247\u805a\u5408\u7ba1\u9053\u901a\u5e38\u4f7f\u7528 SQL \u7a97\u53e3\u51fd\u6570\u5c06\u4f20\u5165\u8bb0\u5f55\u5206\u7ec4\u5230\u5b58\u50a8\u6876\u4e2d\uff08\u901a\u5e38\u57fa\u4e8e\u65f6\u95f4\uff09\uff0c\u5728\u8fd9\u4e9b\u5b58\u50a8\u6876\u4e0a\u6267\u884c\u805a\u5408\u64cd\u4f5c\u3002Count\u3001Min\u3001Max\u3001Avg\u3001Sum \u662f\u5178\u578b\u7684\u8fd0\u7b97\u7b26\uff0c\u4f46\u8fd8\u6709\u5f88\u591a\u3002<\/p>\n

 Count the number of events by path and status every 10 seconds.\r\ninsert into site_activity\r\nselect\r\nwindow_start,\r\nwindow_end,\r\npath,\r\nstatus,  count(1) as `count`\r\nfrom table(\r\ntumble(    table http_events,    descriptor(_time),\r\ninterval '10' seconds\r\n  )\r\n)group by window_start, window_end, path, status<\/pre>\n
5\uff1a\u89e6\u53d1<\/strong><\/span><\/div>\n

\"\"<\/p>\n

\u6211\u4eec\u7684\u6700\u7ec8\u6a21\u5f0f\u662f\u89e6\u53d1\u5668\u3002\u4e0e\u51e0\u4e4e\u6240\u6709\u5176\u4ed6\u6a21\u5f0f\u4e0d\u540c\uff0c\u89e6\u53d1\u5668\u8f93\u51fa\u8bb0\u5f55\u53ef\u80fd\u4e0e\u8f93\u5165\u8bb0\u5f55\u7684\u6a21\u5f0f\u51e0\u4e4e\u6ca1\u6709\u91cd\u53e0\uff0c\u56e0\u4e3a\u5b83\u8868\u660e\u5df2\u5728\u4e00\u4e2a\u6216\u591a\u4e2a\u8f93\u5165\u8bb0\u5f55\u4e0a\u68c0\u6d4b\u5230\u4e00\u7ec4\u6761\u4ef6\uff0c\u5e76\u4f5c\u4e3a\u7ed3\u679c\u8f93\u51fa\u8b66\u62a5\u3002\u8f93\u51fa\u6a21\u5f0f\u53ef\u4ee5\u8868\u793a\u68c0\u6d4b\u5230\u7684\u6761\u4ef6\u3001\u8981\u91c7\u53d6\u7684\u884c\u52a8\u6216\u4e24\u8005\u517c\u800c\u6709\u4e4b\u3002<\/p>\n

Build hourly usage data for a Stripe integration on the output stream\r\ninsert into stripe_product_usage\r\nselect\r\nwindow_start as _time,\r\ncustomer_id,  'abcd1234' as price_id  sum(bytes_sent) \/ 1024 \/ 1024 as mb_sentfrom table(\r\ntumble(    table document_downloads,    descriptor(_time),\r\ninterval '1' hour  )\r\n)group by window_start, customer_idhaving mb_sent > 1024<\/pre>\n","protected":false},"excerpt":{"rendered":"

\u200b1970 \u5e74\u4ee3\u7684\u8bb8\u591a\u8ba1\u7b97\u6982\u5ff5\u5df2\u7ecf\u8fc7\u65f6\uff0c\u4f46ETL (Extract-Transform-Load)\u53ca\u5176\u6700\u8fd1\u7684 […]<\/p>\n","protected":false},"author":1920,"featured_media":240847,"comment_status":"closed","ping_status":"closed","sticky":false,"template":"","format":"standard","meta":{"_acf_changed":false,"footnotes":""},"categories":[55],"tags":[],"class_list":["post-245277","post","type-post","status-publish","format-standard","has-post-thumbnail","hentry","category-thread"],"acf":[],"_links":{"self":[{"href":"https:\/\/lrxjmw.cn\/wp-json\/wp\/v2\/posts\/245277","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/lrxjmw.cn\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/lrxjmw.cn\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/lrxjmw.cn\/wp-json\/wp\/v2\/users\/1920"}],"replies":[{"embeddable":true,"href":"https:\/\/lrxjmw.cn\/wp-json\/wp\/v2\/comments?post=245277"}],"version-history":[{"count":5,"href":"https:\/\/lrxjmw.cn\/wp-json\/wp\/v2\/posts\/245277\/revisions"}],"predecessor-version":[{"id":246022,"href":"https:\/\/lrxjmw.cn\/wp-json\/wp\/v2\/posts\/245277\/revisions\/246022"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/lrxjmw.cn\/wp-json\/wp\/v2\/media\/240847"}],"wp:attachment":[{"href":"https:\/\/lrxjmw.cn\/wp-json\/wp\/v2\/media?parent=245277"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/lrxjmw.cn\/wp-json\/wp\/v2\/categories?post=245277"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/lrxjmw.cn\/wp-json\/wp\/v2\/tags?post=245277"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}