我需要從T-SQL中的SELECT語句中過濾出(移除)擴展的ASCII字符。如何從T-SQL中的字符串中刪除擴展的ASCII字符?
我正在使用存儲過程來這樣做。
預期輸入:
ËËËËeeeeËËËË
預期輸出:
eeee
所有我發現是MySQL。
我使用:
Microsoft SQL Server Management Studio 11.0.2100.60
Microsoft .NET Framework 4.0.30319.17929
我需要從T-SQL中的SELECT語句中過濾出(移除)擴展的ASCII字符。如何從T-SQL中的字符串中刪除擴展的ASCII字符?
我正在使用存儲過程來這樣做。
預期輸入:
ËËËËeeeeËËËË
預期輸出:
eeee
所有我發現是MySQL。
我使用:
Microsoft SQL Server Management Studio 11.0.2100.60
Microsoft .NET Framework 4.0.30319.17929
OK,試試這個。看起來他們有同樣的問題。無論如何,您需要根據您的要求對其進行修改。
CREATE FUNCTION RemoveNonASCII
(
@nstring nvarchar(255)
)
RETURNS varchar(255)
AS
BEGIN
DECLARE @Result varchar(255)
SET @Result = ''
DECLARE @nchar nvarchar(1)
DECLARE @position int
SET @position = 1
WHILE @position <= LEN(@nstring)
BEGIN
SET @nchar = SUBSTRING(@nstring, @position, 1)
--Unicode & ASCII are the same from 1 to 255.
--Only Unicode goes beyond 255
--0 to 31 are non-printable characters
IF UNICODE(@nchar) between 32 and 255
SET @Result = @Result + @nchar
SET @position = @position + 1
END
RETURN @Result
END
GO
上面的代碼只是糾正(這是切點)
CREATE FUNCTION [dbo].[RemoveNonASCII]
(
@nstring nvarchar(255)
)
RETURNS nvarchar(255)
AS
BEGIN
DECLARE @Result nvarchar(255)
SET @Result = ''
DECLARE @nchar nvarchar(1)
DECLARE @position int
SET @position = 1
WHILE @position <= LEN(@nstring)
BEGIN
SET @nchar = SUBSTRING(@nstring, @position, 1)
--Unicode & ASCII are the same from 1 to 255.
--Only Unicode goes beyond 255
--0 to 31 are non-printable characters
IF (UNICODE(@nchar) between 192 and 198) or (UNICODE(@nchar) between 225 and 230) -- letter A or a with accents
SET @nchar = 'a'
IF (UNICODE(@nchar) between 200 and 203) or (UNICODE(@nchar) between 232 and 235) -- letter E or e with accents
SET @nchar = 'e'
IF (UNICODE(@nchar) between 204 and 207) or (UNICODE(@nchar) between 236 and 239) -- letter I or i with accents
SET @nchar = 'i'
IF (UNICODE(@nchar) between 210 and 214) or (UNICODE(@nchar) between 242 and 246) or (UNICODE(@nchar)=240) -- letter O or o with accents
SET @nchar = 'o'
IF (UNICODE(@nchar) between 217 and 220) or (UNICODE(@nchar) between 249 and 252) -- letter U or u with accents
SET @nchar = 'u'
IF (UNICODE(@nchar)=199) or (UNICODE(@nchar)=231) -- letter Ç or ç
SET @nchar = 'c'
IF (UNICODE(@nchar)=209) or (UNICODE(@nchar)=241) -- letter Ñ or ñ
SET @nchar = 'n'
IF (UNICODE(@nchar) between 45 and 46) or (UNICODE(@nchar) between 48 and 57) or (UNICODE(@nchar) between 64 and 90) or (UNICODE(@nchar) = 95) or (UNICODE(@nchar) between 97 and 122)
SET @Result = @Result + @nchar
SET @position = @position + 1
END
set @Result = lower(@Result) -- e-mails in lower case
RETURN @Result
END
感謝您分享您的代碼。
我需要類似的東西,不僅用於清理電子郵件地址,還用於通用目的,在通過集成模塊到達SAP ERP之前過濾用戶網站輸入。
在巴西運行,它必須服從語言口音......
這裏去生成的代碼。
也許它可以幫助別人,就像它對我做的那樣。
IF EXISTS
(
SELECT *
FROM sys.objects
WHERE object_id = OBJECT_ID(N'[dbo].[fnRemoveNonASCII]')
AND type IN (N'FN')
)
DROP FUNCTION dbo.fnRemoveNonASCII
GO
CREATE FUNCTION [dbo].[fnRemoveNonASCII]
(
@nstring nvarchar(MAX)
)
RETURNS nvarchar(MAX)
AS
BEGIN
DECLARE @nchar nvarchar(1) -- individual char in string
DECLARE @nUnicode nvarchar(3) -- ASCII for individual char in string
DECLARE @position int -- subscript to control loop in the string
DECLARE @Result nvarchar(MAX) -- return valus
SET @Result = ''
SET @position = 1
WHILE @position <= LEN(@nstring)
BEGIN
--Unicode & ASCII are the same from 1 to 255.
--Only Unicode goes beyond 255
--0 to 31 are non-printable characters
SET @nchar = SUBSTRING(@nstring, @position, 1)
SET @nUnicode = UNICODE(@nChar)
IF @nUnicode = 10
OR @nUnicode = 13
OR @nUnicode BETWEEN 32 AND 126
OR @nUnicode = 160
OR @nUnicode BETWEEN 192 AND 207
OR @nUnicode BETWEEN 210 AND 213
OR @nUnicode BETWEEN 217 AND 219
OR @nUnicode BETWEEN 224 AND 227
OR @nUnicode BETWEEN 231 AND 234
OR @nUnicode = 236
OR @nUnicode = 237
OR @nUnicode BETWEEN 242 AND 245
OR @nUnicode = 247
OR @nUnicode = 249
OR @nUnicode = 250
SET @Result = @Result + @nchar
ELSE IF @nUnicode = 9 -- TAB
SET @Result = @Result + ' '
ELSE
SET @Result = @Result + ' '
SET @position = @position + 1
END
RETURN @Result
END
/*
---------------------------------------------------------------------------------------------------------------
-- Tabela dos caracteres Unicode/ASCII exportáveis
	 | 	 | %9 = TAB
| 
 | %a = 0A Line Feed
| 
 | %d = 0D Carriage Return
  |   | %20 = <space>
! | ! | %21 = !
" | " | %22 = "
# | # | %23 = #
$ | $ | %24 = $
% | % | %25 = %
& | & | %26 = &
' | ' | %27 = '
( | ( | %28 = (
) | ) | %29 = )
* | * | %2a = *
+ | + | %2b = +
, | , | %2c = ,
- | - | %2d = -
. | . | %2e = .
/ | / | %2f = /
0 | 0 | %30 = 0
1 | 1 | %31 = 1
2 | 2 | %32 = 2
3 | 3 | %33 = 3
4 | 4 | %34 = 4
5 | 5 | %35 = 5
6 | 6 | %36 = 6
7 | 7 | %37 = 7
8 | 8 | %38 = 8
9 | 9 | %39 = 9
: | : | %3a = :
; | ; | %3b = ;
< | < | %3c = <
= | = | %3d = =
> | > | %3e = >
? | ? | %3f = ?
@ | @ | %40 = @
A | A | %41 = A
B | B | %42 = B
C | C | %43 = C
D | D | %44 = D
E | E | %45 = E
F | F | %46 = F
G | G | %47 = G
H | H | %48 = H
I | I | %49 = I
J | J | %4a = J
K | K | %4b = K
L | L | %4c = L
M | M | %4d = M
N | N | %4e = N
O | O | %4f = O
P | P | %50 = P
Q | Q | %51 = Q
R | R | %52 = R
S | S | %53 = S
T | T | %54 = T
U | U | %55 = U
V | V | %56 = V
W | W | %57 = W
X | X | %58 = X
Y | Y | %59 = Y
Z | Z | %5a = Z
[ | [ | %5b = [
\ | \ | %5c = \
] | ] | %5d = ]
^ | ^ | %5e = ^
_ | _ | %5f = _
` | ` | %60 = `
a | a | %61 = a
b | b | %62 = b
c | c | %63 = c
d | d | %64 = d
e | e | %65 = e
f | f | %66 = f
g | g | %67 = g
h | h | %68 = h
i | i | %69 = i
j | j | %6a = j
k | k | %6b = k
l | l | %6c = l
m | m | %6d = m
n | n | %6e = n
o | o | %6f = o
p | p | %70 = p
q | q | %71 = q
r | r | %72 = r
s | s | %73 = s
t | t | %74 = t
u | u | %75 = u
v | v | %76 = v
w | w | %77 = w
x | x | %78 = x
y | y | %79 = y
z | z | %7a = z
{ | { | %7b = {
| | | | %7c = |
} | } | %7d = }
~ | ~ | %7e = ~
  |   | %a0 = <nbsp>
À | À | %c0 = À
Á | Á | %c1 = Á
 |  | %c2 = Â
à | à | %c3 = Ã
Ä | Ä | %c4 = Ä
Å | Å | %c5 = Å
Æ | Æ | %c6 = Æ
Ç | Ç | %c7 = Ç
È | È | %c8 = È
É | É | %c9 = É
Ê | Ê | %ca = Ê
Ë | Ë | %cb = Ë
Ì | Ì | %cc = Ì
Í | Í | %cd = Í
Î | Î | %ce = Î
Ï | Ï | %cf = Ï
Ò | Ò | %d2 = Ò
Ó | Ó | %d3 = Ó
Ô | Ô | %d4 = Ô
Õ | Õ | %d5 = Õ
Ù | Ù | %d9 = Ù
Ú | Ú | %da = Ú
Û | Û | %db = Û
à | à | %e0 = à
á | á | %e1 = á
â | â | %e2 = â
ã | ã | %e3 = ã
ç | ç | %e7 = ç
è | è | %e8 = è
é | é | %e9 = é
ê | ê | %ea = ê
ì | ì | %ec = ì
í | í | %ed = í
ò | ò | %f2 = ò
ó | ó | %f3 = ó
ô | ô | %f4 = ô
õ | õ | %f5 = õ
÷ | ÷ | %f7 = ÷
ù | ù | %f9 = ù
ú | ú | %fa = ú
*/
GO
接受的答案是使用一個循環應該避免的......
我的解決方案是完全inlineable,可以很容易地創建一個UDF(或者甚至更好:內聯TVF)來自於此。
這個想法:創建一組運行數字(在這裏它受限於sys.objects中對象的數量,但有很多示例如何創建一個動態數字計數)。在第二個CTE中,字符串被分割爲單個字符。最後的選擇帶有清潔後的字符串。
DECLARE @tbl TABLE(ID INT IDENTITY, EvilString NVARCHAR(100));
INSERT INTO @tbl(EvilString) VALUES('ËËËËeeeeËËËË'),('ËaËËbËeeeeËËËcË');
WITH RunningNumbers AS
(
SELECT ROW_NUMBER() OVER(ORDER BY (SELECT NULL)) AS Nmbr
FROM sys.objects
)
,SingleChars AS
(
SELECT tbl.ID,rn.Nmbr,SUBSTRING(tbl.EvilString,rn.Nmbr,1) AS Chr
FROM @tbl AS tbl
CROSS APPLY (SELECT TOP(LEN(tbl.EvilString)) Nmbr FROM RunningNumbers) AS rn
)
SELECT ID,EvilString
,(
SELECT '' + Chr
FROM SingleChars AS sc
WHERE sc.ID=tbl.ID AND ASCII(Chr)<128
ORDER BY sc.Nmbr
FOR XML PATH('')
) AS GoodString
FROM @tbl As tbl
結果
1 ËËËËeeeeËËËË eeee
2 ËaËËbËeeeeËËËcË abeeeec
Here is another answer從我在哪裏,使用這種方式與安全字符替換所有特殊字符度日提供純拉丁
你的答案似乎並沒有爲我的問題在這裏工作:https://stackoverflow.com/questions/44804879/how-to-find-invalid-char-in-a -SQL-表 – Mike 2017-07-05 17:25:11
解決方案阿米特·科漢的作品,但對我來說很慢。對於大字符串數據,每個字符的迭代不是最優的。我做了以下功能,並不是非常緊湊但很快。
CREATE FUNCTIONdbo.F_StripLowAscii
(
@Name nvarchar(max)
)
RETURN nvarchar(max) as
BEGIN
DECLARE @Result nvarchar(max)
If @Name IS NULL
RETURN @Name
DECLARE @BlankRange VARCHAR(15)
DECLARE @FoundAt INTEGER
-- ASCII CHAR #0 needs a special treatment
SET @BlankRange = '%[' + CHAR(0) + ']%'
SET @FoundAt = PATINDEX(@BlankRange ,@Name COLLATE SQL_Latin1_General_CP850_Bin)
WHILE @FoundAt > 0
BEGIN
SET @name = left(@name, @FoundAt-1) + SUBSTRING(@name, @FoundAt+1, LEN(@Name))
SET @FoundAt = PATINDEX(@BlankRange ,@Name COLLATE SQL_Latin1_General_CP850_Bin)
END
SET @BlankRange = '%[' + CHAR(1)+'-'+CHAR(8) + ']%'
SET @FoundAt = PATINDEX(@BlankRange ,@Name COLLATE SQL_Latin1_General_CP850_Bin)
WHILE @FoundAt > 0
BEGIN
SET @name = Replace(@Name, SUBSTRING(@Name, @FoundAt,1),'')
SET @FoundAt = PATINDEX(@BlankRange ,@Name COLLATE SQL_Latin1_General_CP850_Bin)
END
SET @BlankRange = '%[' + CHAR(11)+'-'+CHAR(12) + ']%'
SET @FoundAt = PATINDEX(@BlankRange ,@Name COLLATE SQL_Latin1_General_CP850_Bin)
WHILE @FoundAt > 0
BEGIN
SET @name = Replace(@Name, SUBSTRING(@Name, @FoundAt,1),'')
SET @FoundAt = PATINDEX(@BlankRange ,@Name COLLATE SQL_Latin1_General_CP850_Bin)
END
SET @BlankRange = '%[' + CHAR(14)+'-'+CHAR(31) + ']%'
SET @FoundAt = PATINDEX(@BlankRange ,@Name COLLATE SQL_Latin1_General_CP850_Bin)
WHILE @FoundAt > 0
BEGIN
SET @name = Replace(@Name, SUBSTRING(@Name, @FoundAt,1),'')
SET @FoundAt = PATINDEX(@BlankRange ,@Name COLLATE SQL_Latin1_General_CP850_Bin)
END
RETURN @Name
END
GO
這可能有助於http://stackoverflow.com/questions/983291/purpose-of-x20-x7e-in-regular-expressions – 2013-03-06 22:47:00
取決於你是什麼數據庫版本。 – 2013-03-06 22:49:31
我不確定你爲什麼不在表現層中避免(我相信你有一個很好的理由),但使用正則表達式,你可以在入口點內嵌'RegEx.Replace(sVar,「[^ A -Za-z0-9 \ s] [\ x00- \ xFF]「,」「)'ref:http://www.stylusstudio.com/SSDN/default.asp?action=9&read=1968&fid=23 – 2013-03-06 23:25:43