2017-10-12 36 views
0

我在包含非英文字符的文本中獲得了html文件參數。 就像是:http://www.example.com/player.html?name=%e4%e1%ef解碼URL中的非英文字母(javascript)

我需要這個解碼爲名稱=הבן

我tryed:

unescape(url) - get wrong characters: name=äáï (maybe from another charset) 
//and 
decodeURIComponent(url) - get Malformed URI exception 
// even found some solutions like 
decodeURIComponent((url+'').replace(/\+/g, '%20')) - exception too 

對於任何解決方案?如何使用正確的字符集進行解碼?

謝謝

我希望有一個簡單的方法,但經過我建議更改網站,我這麼做是骯髒的,但工作方式。 我的解決方案:

function decode(str) { 
    str=str.split("%e0").join("א"); 
    str=str.split("%e1").join("ב"); 
    str=str.split("%e2").join("ג"); 
    str=str.split("%e3").join("ד"); 
    str=str.split("%e4").join("ה"); 
    str=str.split("%e5").join("ו"); 
    str=str.split("%e6").join("ז"); 
    str=str.split("%e7").join("ח"); 
    str=str.split("%e8").join("ט"); 
    str=str.split("%e9").join("י"); 
    str=str.split("%ea").join("ך"); 
    str=str.split("%eb").join("כ"); 
    str=str.split("%ec").join("ל"); 
    str=str.split("%ed").join("ם"); 
    str=str.split("%ee").join("מ"); 
    str=str.split("%ef").join("ן"); 
    str=str.split("%f0").join("נ"); 
    str=str.split("%f1").join("ס"); 
    str=str.split("%f2").join("ע"); 
    str=str.split("%f3").join("ף"); 
    str=str.split("%f4").join("פ"); 
    str=str.split("%f5").join("ץ"); 
    str=str.split("%f6").join("צ"); 
    str=str.split("%f7").join("ק"); 
    str=str.split("%f8").join("ר"); 
    str=str.split("%f9").join("ש"); 
    str=str.split("%fa").join("ת"); 
    str=str.split("%20").join(" "); 
    return str; 
} 
+1

什麼工具創建該URI?它應該是'name =%D7%94%D7%91%D7%9F'(使用UTF8) – Bergi

+0

如果你想解碼,那麼你應該有'name =%D7%94%D7%91%D7% 9F'開始。 – Quentin

+0

你確定它是編碼形式嗎?正確的字符串編碼是:%D7%94%D7%91%D7%9F – Optional

回答

2

原始URL編碼不正確:

heb = "http://www.example.com/player.html?name=הבן" 
"http://www.example.com/player.html?name=הבן" 

enc = encodeURI(heb) 
"http://www.example.com/player.html?name=%D7%94%D7%91%D7%9F" 

decodeURI(enc) 
"http://www.example.com/player.html?name=הבן" 
+0

似乎它的編碼是正確的...只是不適用於UTF8(iso-8859-8-i) – Ben

1

免責聲明:我沒有做任何詳細的測試這個代碼

這就是說,它你想要什麼,而且非常簡單。從unicode.org使用轉換表執行以下操作:

  1. UNESCAPE字符串
  2. 分割的每個字符到一個數組
  3. 轉換每個字符到一個unicode號碼(UTF-8)
  4. 重新分配陣列元件到相應的Unicode數(iso_8859_8)
  5. 陣列轉換回成一個字符串

var url = "http://www.example.com/player.html?name=%e4%e1%ef;"; 
 
var string = '%e4%e1%ef'; 
 

 
var charEncodings = { 
 
'0x00': '0x0000', // # NULL 
 
'0x01': '0x0001', // # START OF HEADING 
 
'0x02': '0x0002', // # START OF TEXT 
 
'0x03': '0x0003', // # END OF TEXT 
 
'0x04': '0x0004', // # END OF TRANSMISSION 
 
'0x05': '0x0005', // # ENQUIRY 
 
'0x06': '0x0006', // # ACKNOWLEDGE 
 
'0x07': '0x0007', // # BELL 
 
'0x08': '0x0008', // # BACKSPACE 
 
'0x09': '0x0009', // # HORIZONTAL TABULATION 
 
'0x0A': '0x000A', // # LINE FEED 
 
'0x0B': '0x000B', // # VERTICAL TABULATION 
 
'0x0C': '0x000C', // # FORM FEED 
 
'0x0D': '0x000D', // # CARRIAGE RETURN 
 
'0x0E': '0x000E', // # SHIFT OUT 
 
'0x0F': '0x000F', // # SHIFT IN 
 
'0x10': '0x0010', // # DATA LINK ESCAPE 
 
'0x11': '0x0011', // # DEVICE CONTROL ONE 
 
'0x12': '0x0012', // # DEVICE CONTROL TWO 
 
'0x13': '0x0013', // # DEVICE CONTROL THREE 
 
'0x14': '0x0014', // # DEVICE CONTROL FOUR 
 
'0x15': '0x0015', // # NEGATIVE ACKNOWLEDGE 
 
'0x16': '0x0016', // # SYNCHRONOUS IDLE 
 
'0x17': '0x0017', // # END OF TRANSMISSION BLOCK 
 
'0x18': '0x0018', // # CANCEL 
 
'0x19': '0x0019', // # END OF MEDIUM 
 
'0x1A': '0x001A', // # SUBSTITUTE 
 
'0x1B': '0x001B', // # ESCAPE 
 
'0x1C': '0x001C', // # FILE SEPARATOR 
 
'0x1D': '0x001D', // # GROUP SEPARATOR 
 
'0x1E': '0x001E', // # RECORD SEPARATOR 
 
'0x1F': '0x001F', // # UNIT SEPARATOR 
 
'0x20': '0x0020', // # SPACE 
 
'0x21': '0x0021', // # EXCLAMATION MARK 
 
'0x22': '0x0022', // # QUOTATION MARK 
 
'0x23': '0x0023', // # NUMBER SIGN 
 
'0x24': '0x0024', // # DOLLAR SIGN 
 
'0x25': '0x0025', // # PERCENT SIGN 
 
'0x26': '0x0026', // # AMPERSAND 
 
'0x27': '0x0027', // # APOSTROPHE 
 
'0x28': '0x0028', // # LEFT PARENTHESIS 
 
'0x29': '0x0029', // # RIGHT PARENTHESIS 
 
'0x2A': '0x002A', // # ASTERISK 
 
'0x2B': '0x002B', // # PLUS SIGN 
 
'0x2C': '0x002C', // # COMMA 
 
'0x2D': '0x002D', // # HYPHEN-MINUS 
 
'0x2E': '0x002E', // # FULL STOP 
 
'0x2F': '0x002F', // # SOLIDUS 
 
'0x30': '0x0030', // # DIGIT ZERO 
 
'0x31': '0x0031', // # DIGIT ONE 
 
'0x32': '0x0032', // # DIGIT TWO 
 
'0x33': '0x0033', // # DIGIT THREE 
 
'0x34': '0x0034', // # DIGIT FOUR 
 
'0x35': '0x0035', // # DIGIT FIVE 
 
'0x36': '0x0036', // # DIGIT SIX 
 
'0x37': '0x0037', // # DIGIT SEVEN 
 
'0x38': '0x0038', // # DIGIT EIGHT 
 
'0x39': '0x0039', // # DIGIT NINE 
 
'0x3A': '0x003A', // # COLON 
 
'0x3B': '0x003B', // # SEMICOLON 
 
'0x3C': '0x003C', // # LESS-THAN SIGN 
 
'0x3D': '0x003D', // # EQUALS SIGN 
 
'0x3E': '0x003E', // # GREATER-THAN SIGN 
 
'0x3F': '0x003F', // # QUESTION MARK 
 
'0x40': '0x0040', // # COMMERCIAL AT 
 
'0x41': '0x0041', // # LATIN CAPITAL LETTER A 
 
'0x42': '0x0042', // # LATIN CAPITAL LETTER B 
 
'0x43': '0x0043', // # LATIN CAPITAL LETTER C 
 
'0x44': '0x0044', // # LATIN CAPITAL LETTER D 
 
'0x45': '0x0045', // # LATIN CAPITAL LETTER E 
 
'0x46': '0x0046', // # LATIN CAPITAL LETTER F 
 
'0x47': '0x0047', // # LATIN CAPITAL LETTER G 
 
'0x48': '0x0048', // # LATIN CAPITAL LETTER H 
 
'0x49': '0x0049', // # LATIN CAPITAL LETTER I 
 
'0x4A': '0x004A', // # LATIN CAPITAL LETTER J 
 
'0x4B': '0x004B', // # LATIN CAPITAL LETTER K 
 
'0x4C': '0x004C', // # LATIN CAPITAL LETTER L 
 
'0x4D': '0x004D', // # LATIN CAPITAL LETTER M 
 
'0x4E': '0x004E', // # LATIN CAPITAL LETTER N 
 
'0x4F': '0x004F', // # LATIN CAPITAL LETTER O 
 
'0x50': '0x0050', // # LATIN CAPITAL LETTER P 
 
'0x51': '0x0051', // # LATIN CAPITAL LETTER Q 
 
'0x52': '0x0052', // # LATIN CAPITAL LETTER R 
 
'0x53': '0x0053', // # LATIN CAPITAL LETTER S 
 
'0x54': '0x0054', // # LATIN CAPITAL LETTER T 
 
'0x55': '0x0055', // # LATIN CAPITAL LETTER U 
 
'0x56': '0x0056', // # LATIN CAPITAL LETTER V 
 
'0x57': '0x0057', // # LATIN CAPITAL LETTER W 
 
'0x58': '0x0058', // # LATIN CAPITAL LETTER X 
 
'0x59': '0x0059', // # LATIN CAPITAL LETTER Y 
 
'0x5A': '0x005A', // # LATIN CAPITAL LETTER Z 
 
'0x5B': '0x005B', // # LEFT SQUARE BRACKET 
 
'0x5C': '0x005C', // # REVERSE SOLIDUS 
 
'0x5D': '0x005D', // # RIGHT SQUARE BRACKET 
 
'0x5E': '0x005E', // # CIRCUMFLEX ACCENT 
 
'0x5F': '0x005F', // # LOW LINE 
 
'0x60': '0x0060', // # GRAVE ACCENT 
 
'0x61': '0x0061', // # LATIN SMALL LETTER A 
 
'0x62': '0x0062', // # LATIN SMALL LETTER B 
 
'0x63': '0x0063', // # LATIN SMALL LETTER C 
 
'0x64': '0x0064', // # LATIN SMALL LETTER D 
 
'0x65': '0x0065', // # LATIN SMALL LETTER E 
 
'0x66': '0x0066', // # LATIN SMALL LETTER F 
 
'0x67': '0x0067', // # LATIN SMALL LETTER G 
 
'0x68': '0x0068', // # LATIN SMALL LETTER H 
 
'0x69': '0x0069', // # LATIN SMALL LETTER I 
 
'0x6A': '0x006A', // # LATIN SMALL LETTER J 
 
'0x6B': '0x006B', // # LATIN SMALL LETTER K 
 
'0x6C': '0x006C', // # LATIN SMALL LETTER L 
 
'0x6D': '0x006D', // # LATIN SMALL LETTER M 
 
'0x6E': '0x006E', // # LATIN SMALL LETTER N 
 
'0x6F': '0x006F', // # LATIN SMALL LETTER O 
 
'0x70': '0x0070', // # LATIN SMALL LETTER P 
 
'0x71': '0x0071', // # LATIN SMALL LETTER Q 
 
'0x72': '0x0072', // # LATIN SMALL LETTER R 
 
'0x73': '0x0073', // # LATIN SMALL LETTER S 
 
'0x74': '0x0074', // # LATIN SMALL LETTER T 
 
'0x75': '0x0075', // # LATIN SMALL LETTER U 
 
'0x76': '0x0076', // # LATIN SMALL LETTER V 
 
'0x77': '0x0077', // # LATIN SMALL LETTER W 
 
'0x78': '0x0078', // # LATIN SMALL LETTER X 
 
'0x79': '0x0079', // # LATIN SMALL LETTER Y 
 
'0x7A': '0x007A', // # LATIN SMALL LETTER Z 
 
'0x7B': '0x007B', // # LEFT CURLY BRACKET 
 
'0x7C': '0x007C', // # VERTICAL LINE 
 
'0x7D': '0x007D', // # RIGHT CURLY BRACKET 
 
'0x7E': '0x007E', // # TILDE 
 
'0x7F': '0x007F', // # DELETE 
 
'0x80': '0x0080', // # <control> 
 
'0x81': '0x0081', // # <control> 
 
'0x82': '0x0082', // # <control> 
 
'0x83': '0x0083', // # <control> 
 
'0x84': '0x0084', // # <control> 
 
'0x85': '0x0085', // # <control> 
 
'0x86': '0x0086', // # <control> 
 
'0x87': '0x0087', // # <control> 
 
'0x88': '0x0088', // # <control> 
 
'0x89': '0x0089', // # <control> 
 
'0x8A': '0x008A', // # <control> 
 
'0x8B': '0x008B', // # <control> 
 
'0x8C': '0x008C', // # <control> 
 
'0x8D': '0x008D', // # <control> 
 
'0x8E': '0x008E', // # <control> 
 
'0x8F': '0x008F', // # <control> 
 
'0x90': '0x0090', // # <control> 
 
'0x91': '0x0091', // # <control> 
 
'0x92': '0x0092', // # <control> 
 
'0x93': '0x0093', // # <control> 
 
'0x94': '0x0094', // # <control> 
 
'0x95': '0x0095', // # <control> 
 
'0x96': '0x0096', // # <control> 
 
'0x97': '0x0097', // # <control> 
 
'0x98': '0x0098', // # <control> 
 
'0x99': '0x0099', // # <control> 
 
'0x9A': '0x009A', // # <control> 
 
'0x9B': '0x009B', // # <control> 
 
'0x9C': '0x009C', // # <control> 
 
'0x9D': '0x009D', // # <control> 
 
'0x9E': '0x009E', // # <control> 
 
'0x9F': '0x009F', // # <control> 
 
'0xA0': '0x00A0', // # NO-BREAK SPACE 
 
'0xA2': '0x00A2', // # CENT SIGN 
 
'0xA3': '0x00A3', // # POUND SIGN 
 
'0xA4': '0x00A4', // # CURRENCY SIGN 
 
'0xA5': '0x00A5', // # YEN SIGN 
 
'0xA6': '0x00A6', // # BROKEN BAR 
 
'0xA7': '0x00A7', // # SECTION SIGN 
 
'0xA8': '0x00A8', // # DIAERESIS 
 
'0xA9': '0x00A9', // # COPYRIGHT SIGN 
 
'0xAA': '0x00D7', // # MULTIPLICATION SIGN 
 
'0xAB': '0x00AB', // # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK 
 
'0xAC': '0x00AC', // # NOT SIGN 
 
'0xAD': '0x00AD', // # SOFT HYPHEN 
 
'0xAE': '0x00AE', // # REGISTERED SIGN 
 
'0xAF': '0x00AF', // # MACRON 
 
'0xB0': '0x00B0', // # DEGREE SIGN 
 
'0xB1': '0x00B1', // # PLUS-MINUS SIGN 
 
'0xB2': '0x00B2', // # SUPERSCRIPT TWO 
 
'0xB3': '0x00B3', // # SUPERSCRIPT THREE 
 
'0xB4': '0x00B4', // # ACUTE ACCENT 
 
'0xB5': '0x00B5', // # MICRO SIGN 
 
'0xB6': '0x00B6', // # PILCROW SIGN 
 
'0xB7': '0x00B7', // # MIDDLE DOT 
 
'0xB8': '0x00B8', // # CEDILLA 
 
'0xB9': '0x00B9', // # SUPERSCRIPT ONE 
 
'0xBA': '0x00F7', // # DIVISION SIGN 
 
'0xBB': '0x00BB', // # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK 
 
'0xBC': '0x00BC', // # VULGAR FRACTION ONE QUARTER 
 
'0xBD': '0x00BD', // # VULGAR FRACTION ONE HALF 
 
'0xBE': '0x00BE', // # VULGAR FRACTION THREE QUARTERS 
 
'0xDF': '0x2017', // # DOUBLE LOW LINE 
 
'0xE0': '0x05D0', // # HEBREW LETTER ALEF 
 
'0xE1': '0x05D1', // # HEBREW LETTER BET 
 
'0xE2': '0x05D2', // # HEBREW LETTER GIMEL 
 
'0xE3': '0x05D3', // # HEBREW LETTER DALET 
 
'0xE4': '0x05D4', // # HEBREW LETTER HE 
 
'0xE5': '0x05D5', // # HEBREW LETTER VAV 
 
'0xE6': '0x05D6', // # HEBREW LETTER ZAYIN 
 
'0xE7': '0x05D7', // # HEBREW LETTER HET 
 
'0xE8': '0x05D8', // # HEBREW LETTER TET 
 
'0xE9': '0x05D9', // # HEBREW LETTER YOD 
 
'0xEA': '0x05DA', // # HEBREW LETTER FINAL KAF 
 
'0xEB': '0x05DB', // # HEBREW LETTER KAF 
 
'0xEC': '0x05DC', // # HEBREW LETTER LAMED 
 
'0xED': '0x05DD', // # HEBREW LETTER FINAL MEM 
 
'0xEE': '0x05DE', // # HEBREW LETTER MEM 
 
'0xEF': '0x05DF', // # HEBREW LETTER FINAL NUN 
 
'0xF0': '0x05E0', // # HEBREW LETTER NUN 
 
'0xF1': '0x05E1', // # HEBREW LETTER SAMEKH 
 
'0xF2': '0x05E2', // # HEBREW LETTER AYIN 
 
'0xF3': '0x05E3', // # HEBREW LETTER FINAL PE 
 
'0xF4': '0x05E4', // # HEBREW LETTER PE 
 
'0xF5': '0x05E5', // # HEBREW LETTER FINAL TSADI 
 
'0xF6': '0x05E6', // # HEBREW LETTER TSADI 
 
'0xF7': '0x05E7', // # HEBREW LETTER QOF 
 
'0xF8': '0x05E8', // # HEBREW LETTER RESH 
 
'0xF9': '0x05E9', // # HEBREW LETTER SHIN 
 
'0xFA': '0x05EA', // # HEBREW LETTER TAV 
 
'0xFD': '0x200E', // # LEFT-TO-RIGHT MARK 
 
'0xFE': '0x200F' // # RIGHT-TO-LEFT MARK 
 
} 
 

 
utf8_to_iso_8859_8(string); 
 
function utf8_to_iso_8859_8(s) { 
 
    var utf8EncodedString = unescape(s); 
 
    var splitUtf8 = utf8EncodedString.split(''); 
 
    splitUtf8.forEach(function(element, index) { 
 
    var utfChar = element.charCodeAt(0).toString(16).toUpperCase(); 
 
    splitUtf8[index] = String.fromCharCode(charEncodings['0x'+utfChar]); 
 
    }); 
 
    //splitUtf8 = splitUtf8.reverse(); 
 
    splitUtf8 = splitUtf8.toString().replace(/,/g, ''); 
 
    console.log(splitUtf8); 
 
}

+0

我認爲你應該將'charEncodings'存儲爲數字,而不是十六進制字符串 – Bergi

+0

@Bergi任何特定的原因?我的想法是,由於方法中的所有內容都以字符串形式進行操作(尤其是因爲對象鍵以串聯字符串形式訪問),因此字符串最容易。而且,無可否認,我還沒有很多這種類型的轉換,所以任何最佳實踐的想法,讚賞。 – Ben

+0

「*特別是因爲對象鍵被作爲連接字符串訪問*」 - 嗯,是的,這只是問題所在。刪除這個字符串連接並使用'String.fromCharCode(charEncodings(s [index]))'(假設's'是一個數組數組,這是從百分比編碼的字符串解析的字節) – Bergi