2009-10-01 191 views
3

我是Postgres和SQL的新手。我創建了以下腳本,從最近一行中的點到投影點繪製一條線。它適用於小數據集5到10個點,行數相同;但是,使用2000行的60分進行查詢時,查詢大約需要12個小時。它是基於以下粘貼,以及從http://www.bostongis.com/downloads/pgis_nn.txtSlow Postgres查詢

編輯近鄰功能上pgis_fn_nn文檔可在http://www.bostongis.com/PrinterFriendly.aspx?content_name=postgis_nearest_neighbor_generic

緩慢的部分pgis_fn_nn(...)

    實施
  1. 我做錯了什麼?
  2. 有沒有什麼建議可以讓這個更快?
  3. 有沒有一種方法可以改善這兩種腳本?
  4. 如果我想將兩個查詢合併爲一個,你會推薦什麼?

my_script.sql

-- this sql script creates a line table that connects points from a point table 
-- to the projected points from the nearest line to the point of oritin 

-- delete duplicate tables if they exist 
DROP TABLE exploded_roads; 
DROP TABLE projected_points; 
DROP TABLE lines_from_centroids_to_roads; 

-- create temporary exploaded lines table 
CREATE TABLE exploded_roads (
the_geom geometry, 
edge_id serial 
); 

-- insert the linestring that are not multistring 
INSERT INTO exploded_roads 
SELECT the_geom 
FROM "StreetCenterLines" 
WHERE st_geometrytype(the_geom) = 'ST_LineString'; 

-- insert the linestrings that need to be converted from multi string 
INSERT INTO exploded_roads 
SELECT the_geom 
FROM (
    SELECT ST_GeometryN(
the_geom, 
generate_series(1, ST_NumGeometries(the_geom))) 
    AS the_geom 
    FROM "StreetCenterLines" 
) 
AS foo; 

-- create projected points table with ids matching centroid table 
CREATE TABLE projected_points (
the_geom geometry, 
pid serial, 
dauid int 
); 

-- Populate Table 
-- code based on Paul Ramsey's site and Boston GIS' NN code 
INSERT INTO projected_points(the_geom, dauid) 
SELECT DISTINCT ON ("DAUID"::int) 
( 
    ST_Line_Interpolate_Point(
    (
    SELECT the_geom 
    FROM exploded_roads 
    WHERE edge_id IN 
    (
     SELECT nn_gid 
     FROM pgis_fn_nn(centroids.the_geom, 30000000, 1,10, 'exploded_roads', 'true', 'edge_id', 'the_geom') 
    ) 
    ), 
    ST_Line_Locate_Point(
    exploded_roads.the_geom, 
    centroids.the_geom 
    ) 
) 
), 
(centroids."DAUID"::int) 

FROM exploded_roads, fred_city_o6_da_centroids centroids; 


-- Create Line tables 
CREATE TABLE lines_from_centroids_to_roads (
the_geom geometry, 
edge_id SERIAL 
); 


-- Populate Line Table 
INSERT INTO lines_from_centroids_to_roads(
SELECT 
ST_MakeLine(centroids.the_geom, projected_points.the_geom) 
FROM projected_points, fred_city_o6_da_centroids centroids 
WHERE projected_points.dauid = centroids.id 
); 

pgis_fn_nnhttp://www.bostongis.com/downloads/pgis_nn.txt

---LAST UPDATED 8/2/2007 -- 
CREATE OR REPLACE FUNCTION expandoverlap_metric(a geometry, b geometry, maxe double precision, maxslice double precision) 
    RETURNS integer AS 
$BODY$ 
BEGIN 
    FOR i IN 0..maxslice LOOP 
     IF expand(a,maxe*i/maxslice) && b THEN 
      RETURN i; 
     END IF; 
    END LOOP; 
    RETURN 99999999; 
END; 
$BODY$ 
LANGUAGE 'plpgsql' IMMUTABLE; 

CREATE TYPE pgis_nn AS 
    (nn_gid integer, nn_dist numeric(16,5)); 

CREATE OR REPLACE FUNCTION _pgis_fn_nn(geom1 geometry, distguess double precision, numnn integer, maxslices integer, lookupset varchar(150), swhere varchar(5000), sgid2field varchar(100), sgeom2field varchar(100)) 
    RETURNS SETOF pgis_nn AS 
$BODY$ 
DECLARE 
    strsql text; 
    rec pgis_nn; 
    ncollected integer; 
    it integer; 
--NOTE: it: the iteration we are currently at 
--start at the bounding box of the object (expand 0) and move up until it has collected more objects than we need or it = maxslices whichever event happens first 
BEGIN 
    ncollected := 0; it := 0; 
    WHILE ncollected < numnn AND it <= maxslices LOOP 
     strsql := 'SELECT currentit.' || sgid2field || ', distance(ref.geom, currentit.' || sgeom2field || ') as dist FROM ' || lookupset || ' as currentit, (SELECT geometry(''' || CAST(geom1 As text) || ''') As geom) As ref WHERE ' || swhere || ' AND distance(ref.geom, currentit.' || sgeom2field || ') <= ' || CAST(distguess As varchar(200)) || ' AND expand(ref.geom, ' || CAST(distguess*it/maxslices As varchar(100)) || ') && currentit.' || sgeom2field || ' AND expandoverlap_metric(ref.geom, currentit.' || sgeom2field || ', ' || CAST(distguess As varchar(200)) || ', ' || CAST(maxslices As varchar(200)) || ') = ' || CAST(it As varchar(100)) || ' ORDER BY distance(ref.geom, currentit.' || sgeom2field || ') LIMIT ' || 
     CAST((numnn - ncollected) As varchar(200)); 
     --RAISE NOTICE 'sql: %', strsql; 
     FOR rec in EXECUTE (strsql) LOOP 
      IF ncollected < numnn THEN 
       ncollected := ncollected + 1; 
       RETURN NEXT rec; 
      ELSE 
       EXIT; 
      END IF; 
     END LOOP; 
     it := it + 1; 
    END LOOP; 
END 
$BODY$ 
LANGUAGE 'plpgsql' STABLE; 

CREATE OR REPLACE FUNCTION pgis_fn_nn(geom1 geometry, distguess double precision, numnn integer, maxslices integer, lookupset varchar(150), swhere varchar(5000), sgid2field varchar(100), sgeom2field varchar(100)) 
    RETURNS SETOF pgis_nn AS 
$BODY$ 
    SELECT * FROM _pgis_fn_nn($1,$2, $3, $4, $5, $6, $7, $8); 
$BODY$ 
    LANGUAGE 'sql' STABLE; 
+0

什麼是使用在客戶端的存儲過程的原因邊碼? – 2009-10-01 14:50:41

回答

4

我使用 「最近的」 功能做pgRouting OpenStreetMap的數據。起初,我偶然發現了你提到的fn_nn函數,但是訪問irc.freenode.net上的#postgis irc頻道幫助我解決了問題。事實證明,postgis有一些夢幻般的線性函數,當它們結合時,可以回答你需要的一切!

你可以找到更多的線性函數:http://postgis.refractions.net/documentation/manual-1.3/ch06.html#id2578698但這裏是我是如何實現它

select 
    line_interpolate_point(ways.the_geom, 
    line_locate_point(ways.the_geom, pnt))),')','')) as anchor_point, 
    -- returns the anchor point 
    line_locate_point(ways.the_geom, pnt) as anchor_percentage, 
    -- returns the percentage on the line where the anchor will 
    -- touch (number between 0 and 1) 
    CASE 
    WHEN line_locate_point(ways.the_geom, pnt) < 0.5 THEN ways.source 
    WHEN line_locate_point(ways.the_geom, pnt) > 0.5 THEN ways.target 
    END as node, 
    -- returns the nearest end node id 
    length_spheroid(st_line_substring(ways.the_geom,0, 
    line_locate_point(ways.the_geom, pnt)), 
    'SPHEROID[\"WGS 84\",6378137,298.257223563]') as length, 
    distance_spheroid(pnt, line_interpolate_point(ways.the_geom, 
    line_locate_point(ways.the_geom, pnt)), 
    'SPHEROID[\"WGS 84\",6378137,298.257223563]') as dist 
      from ways, planet_osm_line, 
      ST_GeomFromText('POINT(1.245 51.234)', 4326) as pnt 
    where ways.gid = planet_osm_line.osm_id 
      order by dist asc limit 1;"; 

希望這是任何使用你

+0

感謝您的回答;然而,代碼似乎返回了一條線上的最近點,如果你有2000條線,那麼它是如何返回到最近點的? – dassouki 2009-10-02 14:15:19

+1

函數line_locate_point(table.column,real_point)將搜索整個數據集,在我的情況下是58000行元素。子句「order by dist asc limit 1」會給我一個距離最短的單線元素的結果。得到它? – milovanderlinden 2009-10-02 21:17:48