SQL用戶定義的函數去掉HTML標籤並替換HTML實體

我正在嘗試編寫一個UDF（實際上我正在調整我在網絡中發現的一些代碼到單個函數中）來執行所描述的標題。SQL用戶定義的函數去掉HTML標籤並替換HTML實體

下面的代碼：

declare @txt varchar(max), @start int, @end int, @len int 

set @txt = '<p class=&#34;answer&#34;>Informamos que a documenta&ccedil;&atilde;o <strong>deve ser impressa e enviada fisicamente pela AG&Ecirc;NCIA</strong>, contendo confere com oringinal por funcion&aacute;rio CAIXA.</p>' 
set @start = charindex('<',@txt) 
set @end = charindex('>',@txt,@start) 
set @len = (@end - @start) + 1 

while @start > 0 and @end > 0 and @len > 0 
begin 
    set @txt = stuff(@txt,@start,@len,'') 
    set @start = charindex('<',@txt) 
    set @end = charindex('>',@txt,@start) 
    set @len = (@end - @start) + 1 
end 

SET @txt = REPLACE(@txt,'&nbsp;',' ') --space 
SET @txt = REPLACE(@txt,'&ldquo;',CHAR(34)) --" 
SET @txt = REPLACE(@txt,'&rdquo;',CHAR(34)) --" 
SET @txt = REPLACE(@txt,'&lsquo;',CHAR(39)) --' 
SET @txt = REPLACE(@txt,'&rsquo;',CHAR(39)) --' 
SET @txt = REPLACE(@txt,'&ndash;',CHAR(150)) -- – 
SET @txt = REPLACE(@txt,'&mdash;',CHAR(151)) -- — 
SET @txt = REPLACE(@txt,'&ordm;',CHAR(186)) -- º 
SET @txt = REPLACE(@txt,'&ordf;',CHAR(170)) -- ª 
SET @txt = REPLACE(@txt,'&sect;',CHAR(167)) -- § 
-------------------------------------------------------------- 
SET @txt = REPLACE(@txt,'&#34;',CHAR(34)) --" 
SET @txt = REPLACE(@txt,'&#39;',CHAR(39)) --' 
-------------------------------------------------------------- 

SET @txt = REPLACE(@txt,'&agrave;','à') --à 
SET @txt = REPLACE(@txt,'&aacute;','á') --á 
SET @txt = REPLACE(@txt,'&atilde;','ã') --ã 
SET @txt = REPLACE(@txt,'&acirc;','â') --â 
SET @txt = REPLACE(@txt,'&auml;','ä') --ä 
SET @txt = REPLACE(@txt,'&eacute;','é') --é 
SET @txt = REPLACE(@txt,'&ecirc;','ê') --ê 
SET @txt = REPLACE(@txt,'&iacute;','í') --í 
SET @txt = REPLACE(@txt,'&oacute;','ó') --ó 
SET @txt = REPLACE(@txt,'&otilde;','õ') --õ  
SET @txt = REPLACE(@txt,'&oslash;','ø') --ø 
SET @txt = REPLACE(@txt,'&uacute;','ú') --ú 
SET @txt = REPLACE(@txt,'&uuml;','ü') --ü 
SET @txt = REPLACE(@txt,'&ccedil;','ç') --ç 
-------------------------------------------------------------- 
SET @txt = REPLACE(@txt,'&Agrave;',CHAR(192)) --À 
SET @txt = REPLACE(@txt,'&Aacute;',CHAR(193)) --Á 
SET @txt = REPLACE(@txt,'&Atilde;',CHAR(195)) --Ã 
SET @txt = REPLACE(@txt,'&Acirc;',CHAR(194)) --Â 
SET @txt = REPLACE(@txt,'&Auml;',CHAR(196)) --Ä 
SET @txt = REPLACE(@txt,'&Eacute;',CHAR(201)) --É 
SET @txt = REPLACE(@txt,'&Ecirc;',CHAR(202)) --Ê 
SET @txt = REPLACE(@txt,'&Iacute;',CHAR(205)) --Í 
SET @txt = REPLACE(@txt,'&Oacute;',CHAR(211)) --Ó 
SET @txt = REPLACE(@txt,'&Otilde;',CHAR(213)) --Õ 
SET @txt = REPLACE(@txt,'&Oslash;',CHAR(216)) --Ø 
SET @txt = REPLACE(@txt,'&Uacute;',CHAR(218)) --Ú 
SET @txt = REPLACE(@txt,'&Uuml;',CHAR(220)) --Ü 
SET @txt = REPLACE(@txt,'&Ccedil;',CHAR(199)) --Ç 

select LTRIM(RTRIM(@txt))

它剝離HTML標籤的轉換隻有小寫HTML實體，在單詞查找大寫的時候喜歡Ê AG Ê NCIA（通訊社）不工作，印刷通訊社代替。

任何幫助，使其正常工作？

編輯： PS：我不能改變我的數據庫整理，通過@dzomba

來源

2015-07-10 FMarcel

試試這個http://stackoverflow.com/questions/457701/best-way-to-strip-html-tags-from-a-string-in-sql-server – Sushil

的建議好，如果你想，你將需要設置你數據庫的排序規則區分大小寫。

更新！這是一種解決方法，但我認爲這將會完成這項工作。我沒有一個SQL服務器來測試它，但我幾乎可以肯定，它的工作原理很好。

declare @txt varchar(max) 
declare @start int 
declare @end int 
declare @len int 

set @txt = '<p class=&#34;answer&#34;>Informamos que a documenta&ccedil;&atilde;o <strong>deve ser impressa e enviada fisicamente pela AG&Ecirc;NCIA</strong>, contendo confere com oringinal por funcion&aacute;rio CAIXA.</p>' 
set @start = charindex('<',@txt) 
set @end = charindex('>',@txt,@start) 
set @len = (@end - @start) + 1 

while @start > 0 and @end > 0 and @len > 0 
begin 
    set @txt = stuff(@txt,@start,@len,'') 
    set @start = charindex('<',@txt) 
    set @end = charindex('>',@txt,@start) 
    set @len = (@end - @start) + 1 
end 

DECLARE @table (txtColumn varchar(max) COLLATE SQL_Latin1_General_CP1_CS_AS) --make the column case sensitive 


INSERT INTO @table (txtColumn) 
SELECT @txt 

UPDATE @table set txtColumn = REPLACE(txtColumn,'&nbsp;',' ') --space 
UPDATE @table set txtColumn = REPLACE(txtColumn,'&ldquo;',CHAR(34)) --" 
UPDATE @table set txtColumn = REPLACE(txtColumn,'&rdquo;',CHAR(34)) --" 
UPDATE @table set txtColumn = REPLACE(txtColumn,'&lsquo;',CHAR(39)) --' 
UPDATE @table set txtColumn = REPLACE(txtColumn,'&rsquo;',CHAR(39)) --' 
UPDATE @table set txtColumn = REPLACE(txtColumn,'&ndash;',CHAR(150)) -- – 
UPDATE @table set txtColumn = REPLACE(txtColumn,'&mdash;',CHAR(151)) -- — 
UPDATE @table set txtColumn = REPLACE(txtColumn,'&ordm;',CHAR(186)) -- º 
UPDATE @table set txtColumn = REPLACE(txtColumn,'&ordf;',CHAR(170)) -- ª 
UPDATE @table set txtColumn = REPLACE(txtColumn,'&sect;',CHAR(167)) -- § 
    ------------------------------------------------------------- 
UPDATE @table set txtColumn = REPLACE(txtColumn,'&#34;',CHAR(34)) --" 
UPDATE @table set txtColumn = REPLACE(txtColumn,'&#39;',CHAR(39)) --' 
    -------------------------------------------------------------- 

UPDATE @table set txtColumn = REPLACE(txtColumn,'&agrave;','à') --à 
UPDATE @table set txtColumn = REPLACE(txtColumn,'&aacute;','á') --á 
UPDATE @table set txtColumn = REPLACE(txtColumn,'&atilde;','ã') --ã 
UPDATE @table set txtColumn = REPLACE(txtColumn,'&acirc;','â') --â 
UPDATE @table set txtColumn = REPLACE(txtColumn,'&auml;','ä') --ä 
UPDATE @table set txtColumn = REPLACE(txtColumn,'&eacute;','é') --é 
UPDATE @table set txtColumn = REPLACE(txtColumn,'&ecirc;','ê') --ê 
UPDATE @table set txtColumn = REPLACE(txtColumn,'&iacute;','í') --í 
UPDATE @table set txtColumn = REPLACE(txtColumn,'&oacute;','ó') --ó 
UPDATE @table set txtColumn = REPLACE(txtColumn,'&otilde;','õ') --õ  
UPDATE @table set txtColumn = REPLACE(txtColumn,'&oslash;','ø') --ø 
UPDATE @table set txtColumn = REPLACE(txtColumn,'&uacute;','ú') --ú 
UPDATE @table set txtColumn = REPLACE(txtColumn,'&uuml;','ü') --ü 
UPDATE @table set txtColumn = REPLACE(txtColumn,'&ccedil;','ç') --ç 
    ------------------------------------------------------------- 
UPDATE @table set txtColumn = REPLACE(txtColumn,'&Agrave;',CHAR(192)) --À 
UPDATE @table set txtColumn = REPLACE(txtColumn,'&Aacute;',CHAR(193)) --Á 
UPDATE @table set txtColumn = REPLACE(txtColumn,'&Atilde;',CHAR(195)) --Ã 
UPDATE @table set txtColumn = REPLACE(txtColumn,'&Acirc;',CHAR(194)) --Â 
UPDATE @table set txtColumn = REPLACE(txtColumn,'&Auml;',CHAR(196)) --Ä 
UPDATE @table set txtColumn = REPLACE(txtColumn,'&Eacute;',CHAR(201)) --É 
UPDATE @table set txtColumn = REPLACE(txtColumn,'&Ecirc;',CHAR(202)) --Ê 
UPDATE @table set txtColumn = REPLACE(txtColumn,'&Iacute;',CHAR(205)) --Í 
UPDATE @table set txtColumn = REPLACE(txtColumn,'&Oacute;',CHAR(211)) --Ó 
UPDATE @table set txtColumn = REPLACE(txtColumn,'&Otilde;',CHAR(213)) --Õ 
UPDATE @table set txtColumn = REPLACE(txtColumn,'&Oslash;',CHAR(216)) --Ø 
UPDATE @table set txtColumn = REPLACE(txtColumn,'&Uacute;',CHAR(218)) --Ú 
UPDATE @table set txtColumn = REPLACE(txtColumn,'&Uuml;',CHAR(220)) --Ü 
UPDATE @table set txtColumn = REPLACE(txtColumn,'&Ccedil;',CHAR(199)) --Ç 

    SELECT LTRIM(RTRIM(txtColumn)) FROM @table 

--- IN THE END DROP THE TABLE 
    DROP TABLE @table

來源

2015-07-10 23:02:22 danvasiloiu

的問題是，我可以這樣做..謝謝無論如何，我goint編輯我的問題與你的觀察... – FMarcel

好吧，我會更新我的答案。希望它能幫助你 – danvasiloiu

SQL用戶定義的函數去掉HTML標籤並替換HTML實體

回答

相關問題