我目前正在將一些數據庫從excel格式遷移到PostgreSQL。其中一些記錄在一行中,由分隔符分隔。我想寫由選定的隔膜通過以下方式分離所選列轉換表的功能:PGSQL函數在特定列的分隔符上拆分表格
create table janek.temp (a integer, b text);
insert into janek.temp values (1, 'cat');
insert into janek.temp values (2, 'dog;hound');
此表中包含2行,我想有一個功能,當我執行:
select * from janek.split_table ('janek', 'temp', 'b', ';')
我得到3行返回:
我與strugge1;'cat'
2;'dog'
2;'hound'
目前的問題是 - 不知道如何通過模式名和表名到RETURNS setof $1.$2 AS
- 首次執行的查詢不起作用,原因我不明白,錯誤粘貼在下面
我希望我的邏輯對你們很清楚。我提供了示例數據和期望的結果。我對PGSQL功能並不熟悉,但我希望我編寫的代碼易於操作。
我希望這個函數能被重用,對我的數據遷移似乎很有幫助。
這裏是我的功能代碼:
create or replace function janek.split_table (table_schema text, table_name text, column_name text, separator text)
RETURNS setof $1.$2 AS
-- don't know how to pass table_schema.table_name as variable - need help here, this line doesn't work, but after manually entering schema and table name it works
$BODY$
DECLARE
-- string variable containing sql queries to execute
execute_query text;
-- string variable where column list of target table will be assigned, except for one column - column_name
column_list text;
-- variable where maximal count of separator will be assigned
separator_count integer;
BEGIN
-- assigning variable column_list
-- first error here, that I don't understand - when I execute this select statement manually it returns one column containing one string:
-- ERROR: query "SELECT 'array_to_string(array(select column_name::text
-- from information_schema.columns
-- where table_name = '''||table_name||'''
-- and table_schema = '''||table_schema||'''
-- and column_name <> '''||column_name||'''
-- ), ', ');'" returned 2 columns
-- CONTEXT: PL/pgSQL function "split_table" line 15 at assignment
execute_query := 'SELECT array_to_string(array(select column_name::text
from information_schema.columns
where table_name = '''||table_name||'''
and table_schema = '''||table_schema||'''
and column name <> '''||column_name||'''
), ', ');';
execute execute_query into column_list;
-- assigning variable separator_count - code of function janek.cointinstring below, it returns number of occurences of separator in string
execute_query := 'select max(janek.countinstring('''||column_name||''', '''||separator||''')) from '||table_schema||'.'||table_name||'''';
execute execute_query into separator_count;
-- redefining table we're transforming - splitting column_name to an array on delimiter + deleting spaces. If you guys know a way to remove spaces only 'touching' the separator it would be better:
execute_query := 'with t as (
select '||column_list||',
case when position('''||separator||''' in '||table_name||') > 0 then
regexp_split_to_array(replace('||column_name||', '' '', ''''), '''||separator||''')
elsif length('||table_name||') > 0 then array['||column_name||']
else null
as arr
from '||table_schema||'.'||table_name||')';
-- main loop, iterates on table in order to transform column 'arr' to separate strings instead of array using union all:
for i in 1..separator_count loop
execute_query := execute_query||'
select
'||column_list||', arr['||separator||'] as '||column_name||'
from t
where arr['||separator||'] is not null
union all';
end loop;
-- removing last union all
execute_query := substr(execute_query, 1, length(execute_query) - 9);
-- executing the main query
RETURN QUERY EXECUTE execute_query;
END $BODY$
LANGUAGE plpgsql VOLATILE
COST 100;
ALTER FUNCTION janek.split_table (table_schema text, table_name text, column_name text, separator text)
OWNER TO jsiekierski;
GRANT EXECUTE ON FUNCTION janek.split_table (table_schema text, table_name text, column_name text, separator text) TO jsiekierski;
GRANT EXECUTE ON FUNCTION janek.split_table (table_schema text, table_name text, column_name text, separator text) TO wsd_users;
-- janek.countinstring function code:
CREATE OR REPLACE FUNCTION janek.countinstring(text, text)
RETURNS integer AS
$BODY$
SELECT(Length($1) - Length(REPLACE($1, $2, '')))/Length($2) ;
$BODY$
LANGUAGE sql IMMUTABLE
COST 100;
ALTER FUNCTION janek.countinstring(text, text)
OWNER TO jsiekierski;
感謝。像往常一樣重新創造車輪。 – fetta
難道我們都是? :-) –