with raw_sample as (
select 'field1,field2,fiend3,123,456,"http://some.domain/abc/Player.aspx?playerID=111&BrowseIds=2221,423062611,423870887,424044345,...,",THIS_IS_MY,en,20 294 998 1001,end' as raw_line
)
select regexp_extract(raw_line,'(,?(".*?"|[^,]*)){01}',2) as c01
,regexp_extract(raw_line,'(,?(".*?"|[^,]*)){02}',2) as c02
,regexp_extract(raw_line,'(,?(".*?"|[^,]*)){03}',2) as c03
,regexp_extract(raw_line,'(,?(".*?"|[^,]*)){04}',2) as c04
,regexp_extract(raw_line,'(,?(".*?"|[^,]*)){05}',2) as c05
,regexp_extract(raw_line,'(,?(".*?"|[^,]*)){06}',2) as c06
,regexp_extract(raw_line,'(,?(".*?"|[^,]*)){07}',2) as c07
,regexp_extract(raw_line,'(,?(".*?"|[^,]*)){08}',2) as c08
,regexp_extract(raw_line,'(,?(".*?"|[^,]*)){09}',2) as c09
,regexp_extract(raw_line,'(,?(".*?"|[^,]*)){10}',2) as c10
from raw_sample
;
+--------+--------+--------+-----+-----+-----------------------------------------------------------------------------------------------------+------------+-----+-----------------+-----+
| c01 | c02 | c03 | c04 | c05 | c06 | c07 | c08 | c09 | c10 |
+--------+--------+--------+-----+-----+-----------------------------------------------------------------------------------------------------+------------+-----+-----------------+-----+
| field1 | field2 | fiend3 | 123 | 456 | "http://some.domain/abc/Player.aspx?playerID=111&BrowseIds=2221,423062611,423870887,424044345,...," | THIS_IS_MY | en | 20 294 998 1001 | end |
+--------+--------+--------+-----+-----+-----------------------------------------------------------------------------------------------------+------------+-----+-----------------+-----+
你說的 「人工分析」 是什麼意思?爲什麼不使用CSV SerDe? –
手動解析,在配置單元中執行。我需要計算幾個csv文件並將tham合併到一個表中。除了其他計算。我需要查詢來執行這項工作。 :/也許我需要用Spark做到這一點.. –
那麼,爲什麼不使用CSV SerDe? –