您可以使用SQL函數是這樣的:
ALTER FUNCTION [dbo].[StripOutHTML]
(
@HTMLText VARCHAR(max),
@stripDisallowedOnly BIT
)
returns VARCHAR(max)
AS
BEGIN
DECLARE @Start INT
DECLARE @End INT
DECLARE @Length INT
-- Replace the HTML entity & with the '&' character (this needs to be done first, as
-- '&' might be double encoded as '&')
SET @Start = Charindex('&', @HTMLText)
SET @End = @Start + 4
SET @Length = (@End - @Start) + 1
WHILE (@Start > 0
AND @End > 0
AND @Length > 0)
BEGIN
SET @HTMLText = Stuff(@HTMLText, @Start, @Length, '&')
SET @Start = Charindex('&', @HTMLText)
SET @End = @Start + 4
SET @Length = (@End - @Start) + 1
END
-- Replace the HTML entity < with the '<' character
SET @Start = Charindex('<', @HTMLText)
SET @End = @Start + 3
SET @Length = (@End - @Start) + 1
WHILE (@Start > 0
AND @End > 0
AND @Length > 0)
BEGIN
SET @HTMLText = Stuff(@HTMLText, @Start, @Length, '<')
SET @Start = Charindex('<', @HTMLText)
SET @End = @Start + 3
SET @Length = (@End - @Start) + 1
END
-- Replace the HTML entity > with the '>' character
SET @Start = Charindex('>', @HTMLText)
SET @End = @Start + 3
SET @Length = (@End - @Start) + 1
WHILE (@Start > 0
AND @End > 0
AND @Length > 0)
BEGIN
SET @HTMLText = Stuff(@HTMLText, @Start, @Length, '>')
SET @Start = Charindex('>', @HTMLText)
SET @End = @Start + 3
SET @Length = (@End - @Start) + 1
END
-- Replace the HTML entity & with the '&' character
SET @Start = Charindex('&amp;', @HTMLText)
SET @End = @Start + 4
SET @Length = (@End - @Start) + 1
WHILE (@Start > 0
AND @End > 0
AND @Length > 0)
BEGIN
SET @HTMLText = Stuff(@HTMLText, @Start, @Length, '&')
SET @Start = Charindex('&amp;', @HTMLText)
SET @End = @Start + 4
SET @Length = (@End - @Start) + 1
END
-- Replace the HTML entity with the ' ' character
SET @Start = Charindex(' ', @HTMLText)
SET @End = @Start + 5
SET @Length = (@End - @Start) + 1
WHILE (@Start > 0
AND @End > 0
AND @Length > 0)
BEGIN
SET @HTMLText = Stuff(@HTMLText, @Start, @Length, ' ')
SET @Start = Charindex(' ', @HTMLText)
SET @End = @Start + 5
SET @Length = (@End - @Start) + 1
END
-- Replace any <P>, </P>tags with a <BR>, so they will be replaced with a new line in next step
SET @HTMLText = REPLACE(@HTMLText, '<P>', '<br>')
SET @HTMLText = REPLACE(@HTMLText, '</P>', '<br>')
-- Replace any <BR> tags with a newline
SET @Start = Charindex('<br>', @HTMLText)
SET @End = @Start + 3
SET @Length = (@End - @Start) + 1
WHILE (@Start > 0
AND @End > 0
AND @Length > 0)
BEGIN
SET @HTMLText = Stuff(@HTMLText, @Start, @Length,
Char(13) + Char(10))
SET @Start = Charindex('<br>', @HTMLText)
SET @End = @Start + 3
SET @Length = (@End - @Start) + 1
END
-- Replace any tags with a newline
SET @Start = Charindex('<br/>', @HTMLText)
SET @End = @Start + 4
SET @Length = (@End - @Start) + 1
WHILE (@Start > 0
AND @End > 0
AND @Length > 0)
BEGIN
SET @HTMLText = Stuff(@HTMLText, @Start, @Length,
'CHAR(13) + CHAR(10)')
SET @Start = Charindex('<br/>', @HTMLText)
SET @End = @Start + 4
SET @Length = (@End - @Start) + 1
END
-- Replace any tags with a newline
SET @Start = Charindex('<br />', @HTMLText)
SET @End = @Start + 5
SET @Length = (@End - @Start) + 1
WHILE (@Start > 0
AND @End > 0
AND @Length > 0)
BEGIN
SET @HTMLText = Stuff(@HTMLText, @Start, @Length,
'CHAR(13) + CHAR(10)')
SET @Start = Charindex('<br />', @HTMLText)
SET @End = @Start + 5
SET @Length = (@End - @Start) + 1
END
-- Remove anything between tags
SET @Start = Charindex('<', @HTMLText)
SET @End = Charindex('>', @HTMLText, Charindex('<', @HTMLText))
SET @Length = (@End - @Start) + 1
WHILE (@Start > 0
AND @End > 0
AND @Length > 0)
BEGIN
IF @stripDisallowedOnly = 1
BEGIN
IF (Upper(Substring(@HTMLText, @Start, 2)) <> '<B')
AND (Upper(Substring(@HTMLText, @Start, 3)) <> '</B')
AND (Upper(Substring(@HTMLText, @Start, 2)) <> '<U')
AND (Upper(Substring(@HTMLText, @Start, 3)) <> '</U')
AND (Upper(Substring(@HTMLText, @Start, 2)) <> '<I')
AND (Upper(Substring(@HTMLText, @Start, 3)) <> '</I')
BEGIN
SET @HTMLText = Stuff(@HTMLText, @Start, @Length, '')
END
ELSE
BEGIN
SET @Length = 0
END
END
ELSE
BEGIN
SET @HTMLText = Stuff(@HTMLText, @Start, @Length, '')
END
SET @Start = Charindex('<', @HTMLText, @End - @Length)
SET @End = Charindex('>', @HTMLText, Charindex('<', @HTMLText,
@Start)
)
SET @Length = (@End - @Start) + 1
END
-- Remove any leading space/carriage return
DECLARE @trimchars VARCHAR(10)
SET @trimchars = CHAR(9)+CHAR(10)+CHAR(13)+CHAR(32)
IF @HTMLText LIKE '[' + @trimchars + ']%' SET @HTMLText = SUBSTRING(@HTMLText, PATINDEX('%[^' + @trimchars + ']%', @HTMLText), LEN(@HTMLText))
RETURN Ltrim(Rtrim(@HTMLText))
END
您需要修改以下行,以保持類似em
標籤:(Upper(Substring(@HTMLText, @Start, 2)) <> '<B')
:SQL函數不具有替代<BR>
和<P>
標籤新線,但你可以很容易地刪除這些線,如果你不需要。希望這可以幫助你或指出你在一個正確的方向
WHILE (@Start > 0
AND @End > 0
AND @Length > 0)
BEGIN
IF @stripDisallowedOnly = 1
BEGIN
IF (Upper(Substring(@HTMLText, @Start, 2)) <> '<B')
AND (Upper(Substring(@HTMLText, @Start, 3)) <> '</B')
AND (Upper(Substring(@HTMLText, @Start, 2)) <> '<U')
AND (Upper(Substring(@HTMLText, @Start, 3)) <> '</U')
AND (Upper(Substring(@HTMLText, @Start, 2)) <> '<I')
AND (Upper(Substring(@HTMLText, @Start, 3)) <> '</I')
BEGIN
SET @HTMLText = Stuff(@HTMLText, @Start, @Length, '')
END
ELSE
BEGIN
SET @Length = 0
END
END
ELSE
BEGIN
SET @HTMLText = Stuff(@HTMLText, @Start, @Length, '')
END
SET @Start = Charindex('<', @HTMLText, @End - @Length)
SET @End = Charindex('>', @HTMLText, Charindex('<', @HTMLText,
@Start)
)
SET @Length = (@End - @Start) + 1
END
這將工作與形成不良的HTML。我需要刪除adsf的屬性沒有引用,沒有HREF – tsdexter 2012-04-19 20:08:02
Sql Server不適合這樣的事情,但我敢肯定你可以在你的輸入html字符串上應用** HTML Tidy **,然後提交它進行處理 – 2012-04-19 20:12:29
因爲這是用新數據替換舊的不正確數據的一部分,並且舊數據非常具體(即:html標籤全部大寫,而我想保留的大寫字母小寫)我能夠更輕鬆地做到這一點使用PATINDEX而不是CHARINDEX - 請參閱我的答案。 – tsdexter 2012-04-19 20:26:02