如果我是你,我的首要考慮的是性能我會用表函數。 mathguys解決方案可以很好地工作,但如果我們使用流水線函數,它將更具性能。
首先,我們創建我們的功能所必需的類型。
drop type type_test_table;
drop type type_test_row;
CREATE TYPE type_test_row AS OBJECT (
code varchar2(2000),
descr VARCHAR2(50)
)
/
CREATE TYPE type_test_table IS TABLE OF type_test_row
/
然後我們創建函數:
create or replace function test_pipe_func return type_test_table pipelined as
cursor c_data_in is
select '12361'||level||'_BBMS_GTECHL'||level||'|12362'||level||'_BBMS_PRIM'||level||'|12363'||level||'_BBMS_SEC'||level||'|12364'||level||'_BBU_SEC'||level as str from dual
connect by level <= 1000000;
v_element varchar2(300);
v_code varchar2(100);
v_descr varchar2(200);
p_deb number;
p_fin number;
begin
for l_data_in in c_data_in loop
p_deb := 0;
p_fin := 1;
while p_fin > 0 loop
p_fin := case when p_deb = 0 then instr(l_data_in.str,'|',1, 1) else instr(l_data_in.str,'|',p_deb-1, 2) end;
p_deb := case when p_deb = 0 then 1 else instr(l_data_in.str,'|',p_deb-1, 1)+1 end;
v_element := case when p_fin = 0 then substr(l_data_in.str, p_deb) else substr(l_data_in.str, p_deb, p_fin - p_deb) end;
p_deb := p_fin +1;
v_code := substr(v_element, 1 , instr(v_element, '_' , 1,1)-1);
v_descr := substr(v_element, instr(v_element, '_' , 1,1)+1);
pipe row(type_test_row(v_code, v_descr));
end loop;
end loop;
end test_pipe_func;
/
我改變了測試用例一點點能夠在必要時產生儘可能多的線我的測試。我使用了流水線功能來限制進程內存在大數據集情況下的使用,並且能夠在select中使用它。如果您的用例不同(我不知道可能使用輸入插入表),另一個選項可以是使用批量收集和整合。
create or replace procedure test_bulk_collect_proc as
cursor c_data_in is
select '12361'||level||'_BBMS_GTECHL'||level||'|12362'||level||'_BBMS_PRIM'||level||'|12363'||level||'_BBMS_SEC'||level as str from dual
connect by level <= 1000000;
type type_table_data_in is table of c_data_in%rowtype;
table_data_in type_table_data_in;
v_element varchar2(300);
v_code varchar2(100);
v_descr varchar2(200);
p_deb number;
p_fin number;
v_str varchar2(4000);
v_t_insr type_test_table;
limit_in number := 100000;
i number;
begin
OPEN c_data_in;
LOOP
FETCH c_data_in BULK COLLECT INTO table_data_in LIMIT limit_in;
v_t_insr := type_test_table();
i := 1;
for indx IN 1 .. table_data_in.COUNT LOOP
v_str := table_data_in(indx).str;
p_deb := 0;
p_fin := 1;
while p_fin > 0 loop
p_fin := case when p_deb = 0 then instr(v_str,'|',1, 1) else instr(v_str,'|',p_deb-1, 2) end;
p_deb := case when p_deb = 0 then 1 else instr(v_str,'|',p_deb-1, 1)+1 end;
v_element := case when p_fin = 0 then substr(v_str, p_deb) else substr(v_str, p_deb, p_fin - p_deb) end;
p_deb := p_fin +1;
v_code := substr(v_element, 1 , instr(v_element, '_' , 1,1)-1);
v_descr := substr(v_element, instr(v_element, '_' , 1,1)+1);
v_t_insr.extend;
v_t_insr(i) := type_test_row(v_code, v_descr);
i:= i+1;
end loop;
END LOOP;
forall t in v_t_insr.first..v_t_insr.last
insert into test_bbu(CODE, DESCR) values (v_t_insr(t).code, v_t_insr(t).descr);
EXIT WHEN table_data_in.COUNT < limit_in;
END LOOP;
End;
/
我測試了我的數據庫上的所有三種方法。爲了測試mathguy的sql以及我使用CTAS的流水線函數,並且對於批量收集,我只需執行該過程。
create table test_bbu as
with input_data (input_str) as (
select '12361'||level||'_BBMS_GTECHL'||level||'|12362'||level||'_BBMS_PRIM'||level||'|12363'||level||'_BBMS_SEC'||level from dual
connect by level <= 1000000
),
t (str) as (
select '|' || input_str || '|' from input_data
),
r (lvl, code, descr, str, p1_from, p2_from, p1_to, p2_to) as (
select 0, null, null, str, 1, 1, instr(str, '_', 1, 1), instr(str, '|', 1, 2)
from t
union all
select lvl+1, substr(str, p2_from + 1, p1_to - p2_from - 1),
substr(str, p1_to + 1, p2_to - p1_to - 1),
str, p1_to, p2_to, instr(str, '_', p2_to + 1, 1),
instr(str, '|', p2_to + 1, 1)
from r
where p1_to != 0
)
select code, descr
from r
where lvl != 0;
create table test_bbu2 as
select * from table(test_pipe_func);
execute test_bulk_collect_proc;
我用500K和1M行測試了三種方法。這是我的結果,但我敦促你在做出決定之前在你的環境中進行測試。
500K 1M
----------------------------------------
SQL 36s 1m:15s
Pipelined 11s 23s
Bulk Collect 8s 17s
的[這](http://stackoverflow.com/questions/14328621/splitting-string-into-multiple-rows-in-oracle) – Aleksej
不是一個真正的副本可能的複製 - 拆分一個豎線分隔字符串確實是重複的,但在這個問題中,每個管道分隔的字符串有兩個令牌,依次由第一個下劃線(可能會有更多下劃線)分隔。 – mathguy