2017-03-07 61 views
0

我需要創建HTML表格解析器,它將以正確的順序讀取表格單元格。PL/SQL中的HTML表格解析器

代碼我到目前爲止有:

html := '<body> 
      <table border="1"> 
      <tr> 
       <td><b>A1</b></td> 
       <td><i>B1</i></td> 
      </tr> 
      <tr> 
       <td><b>A2</b></td> 
       <td><i>B2</i></td> 
      </tr> 
      </table> 
     </body>'; 

FOR r IN (SELECT rownum rn, td FROM xmltable('*/table/tr' passing xmltype(html) 
                 columns td xmltype path './td')) 
LOOP 
    FOR c IN (SELECT cell FROM xmltable('.' passing r.td 
                columns cell VARCHAR(200) path '.')) 
    LOOP 
    dbms_output.put_line('Row ' || r.rn || ': ' || c.cell); 

    END LOOP; 
END LOOP; 

現在的結果是:

Row 1: A1B1 
Row 2: A2B2 

我需要的是:

Row 1: A1 
Row 1: B1 
Row 2: A2 
Row 2: B2 

我怎樣才能做到這一點?感謝您的回覆。

回答

1

假設HTML列是clob,你可以做到這一點,如下:

declare 
html clob:= '<body> 
      <table border="1"> 
      <tr> 
       <td><b>A1</b></td> 
       <td><i>B1</i></td> 
      </tr> 
      <tr> 
       <td><b>A2</b></td> 
       <td><i>B2</i></td> 
      </tr> 
      </table> 
     </body>'; 
begin 
FOR r IN (SELECT rownum rn, td FROM xmltable('*/table/tr' passing xmltype(html) 
                 columns td xmltype path './td')) 
LOOP 
    FOR c IN (SELECT cell FROM xmltable('./td' passing r.td 
                columns cell VARCHAR(200) path '.')) 
    LOOP 
    dbms_output.put_line('Row ' || r.rn || ': ' || c.cell); 

    END LOOP; 
END LOOP; 

end; 
1

這可能做的伎倆:)

DECLARE 
     html VARCHAR2(1000) := '<body>   
     <table border="1">    
      <tr>    
       <td><b>A1</b></td>    
       <td><i>B1</i></td>    
      </tr>    
      <tr>     
       <td><b>A2</b></td>    
       <td><i>B2</i></td>    
      </tr>   
     </table>   
     </body>'; 
BEGIN 
    FOR r IN 
    (SELECT rownum rn, 
    td 
    FROM xmltable('*/table/tr' passing xmltype(html) columns td xmltype path '.') 
) 
    LOOP 
    FOR c IN 
    (SELECT cell 
    FROM xmltable('*/td/.' passing r.td columns cell VARCHAR(200) path '.') 
    ) 
    LOOP 
     dbms_output.put_line('Row ' || r.rn || ': ' || c.cell); 
    END LOOP; 
    END LOOP; 
END; 

輸出: -

Row 1: A1 
Row 1: B1 
Row 2: A2 
Row 2: B2 
1

一個查詢就夠了。

SELECT SEQNO,text FROM xmltable('//*/table/tr' passing xmltype('<body> 
      <table border="1"> 
      <tr> 
       <td><b>A1</b></td> 
       <td><i>B1</i></td> 
      </tr> 
      <tr> 
       <td><b>A2</b></td> 
       <td><i>B2</i></td> 
      </tr> 
      </table> 
     </body>') 
columns td xmltype path '.' , "SEQNO" FOR ORDINALITY) 
,xmltable('//td' passing td columns text varchar2(100) path './*/text()');