2014-02-27 26 views
0

我想獲取html文件並解析此頁面。但HTML與編碼字符返回,我無法解決這個問題。HttpClient在JavaScript上獲取編碼字符

var client = Ti.Network.createHTTPClient({ 
     // function called when the response data is available 
     onload : function(e) { 
      var data = this; 
      callback(data); 
     }, 
     onerror : function(e) { 
      error(); 
     }, 
     timeout : 10000 // in milliseconds, 
    }); 


    client.open("GET",_link); 
    client.send(); 


Ka­dın­la­rın or­tak // A sample output ­ &amp etc. 

我該如何解決這個問題?

回答

0

使用此功能,html_entity_decode取自此jsperf。爲了您的字符串:

html_entity_decode("Ka­dın­la­rın or­tak"); 
"Ka­dın­la­rın or­tak" 

下面是實際的功能,它會創建所有代碼的ehaustive地圖,然後替換它們:

function html_entity_decode(string, quote_style) { 
    var hash_map = {}, 
     symbol = '', 
     tmp_str = '', 
     entity = ''; 
    tmp_str = string.toString(); 

    if (false === (hash_map = get_html_translation_table('HTML_ENTITIES', quote_style))) { 
    return false; 
    } 

    // fix & problem 
    // http://phpjs.org/functions/get_html_translation_table:416#comment_97660 
    delete(hash_map['&']); 
    hash_map['&'] = '&'; 

    for (symbol in hash_map) { 
    entity = hash_map[symbol]; 
    tmp_str = tmp_str.split(entity).join(symbol); 
    } 
    tmp_str = tmp_str.split(''').join("'"); 

    return tmp_str; 
} 

function get_html_translation_table(table, quote_style) { 
    var entities = {}, 
     hash_map = {}, 
     decimal; 
    var constMappingTable = {}, 
     constMappingQuoteStyle = {}; 
    var useTable = {}, 
     useQuoteStyle = {}; 

    // Translate arguments 
    constMappingTable[0] = 'HTML_SPECIALCHARS'; 
    constMappingTable[1] = 'HTML_ENTITIES'; 
    constMappingQuoteStyle[0] = 'ENT_NOQUOTES'; 
    constMappingQuoteStyle[2] = 'ENT_COMPAT'; 
    constMappingQuoteStyle[3] = 'ENT_QUOTES'; 

    useTable = !isNaN(table) ? constMappingTable[table] : table ? table.toUpperCase() : 'HTML_SPECIALCHARS'; 
    useQuoteStyle = !isNaN(quote_style) ? constMappingQuoteStyle[quote_style] : quote_style ? quote_style.toUpperCase() : 'ENT_COMPAT'; 

    if (useTable !== 'HTML_SPECIALCHARS' && useTable !== 'HTML_ENTITIES') { 
    throw new Error("Table: " + useTable + ' not supported'); 
    // return false; 
    } 

    entities['38'] = '&'; 
    if (useTable === 'HTML_ENTITIES') { 
    entities['160'] = ' '; 
    entities['161'] = '¡'; 
    entities['162'] = '¢'; 
    entities['163'] = '£'; 
    entities['164'] = '¤'; 
    entities['165'] = '¥'; 
    entities['166'] = '¦'; 
    entities['167'] = '§'; 
    entities['168'] = '¨'; 
    entities['169'] = '©'; 
    entities['170'] = 'ª'; 
    entities['171'] = '«'; 
    entities['172'] = '¬'; 
    entities['173'] = '­'; 
    entities['174'] = '®'; 
    entities['175'] = '¯'; 
    entities['176'] = '°'; 
    entities['177'] = '±'; 
    entities['178'] = '²'; 
    entities['179'] = '³'; 
    entities['180'] = '´'; 
    entities['181'] = 'µ'; 
    entities['182'] = '¶'; 
    entities['183'] = '·'; 
    entities['184'] = '¸'; 
    entities['185'] = '¹'; 
    entities['186'] = 'º'; 
    entities['187'] = '»'; 
    entities['188'] = '¼'; 
    entities['189'] = '½'; 
    entities['190'] = '¾'; 
    entities['191'] = '¿'; 
    entities['192'] = 'À'; 
    entities['193'] = 'Á'; 
    entities['194'] = 'Â'; 
    entities['195'] = 'Ã'; 
    entities['196'] = 'Ä'; 
    entities['197'] = 'Å'; 
    entities['198'] = 'Æ'; 
    entities['199'] = 'Ç'; 
    entities['200'] = 'È'; 
    entities['201'] = 'É'; 
    entities['202'] = 'Ê'; 
    entities['203'] = 'Ë'; 
    entities['204'] = 'Ì'; 
    entities['205'] = 'Í'; 
    entities['206'] = 'Î'; 
    entities['207'] = 'Ï'; 
    entities['208'] = 'Ð'; 
    entities['209'] = 'Ñ'; 
    entities['210'] = 'Ò'; 
    entities['211'] = 'Ó'; 
    entities['212'] = 'Ô'; 
    entities['213'] = 'Õ'; 
    entities['214'] = 'Ö'; 
    entities['215'] = '×'; 
    entities['216'] = 'Ø'; 
    entities['217'] = 'Ù'; 
    entities['218'] = 'Ú'; 
    entities['219'] = 'Û'; 
    entities['220'] = 'Ü'; 
    entities['221'] = 'Ý'; 
    entities['222'] = 'Þ'; 
    entities['223'] = 'ß'; 
    entities['224'] = 'à'; 
    entities['225'] = 'á'; 
    entities['226'] = 'â'; 
    entities['227'] = 'ã'; 
    entities['228'] = 'ä'; 
    entities['229'] = 'å'; 
    entities['230'] = 'æ'; 
    entities['231'] = 'ç'; 
    entities['232'] = 'è'; 
    entities['233'] = 'é'; 
    entities['234'] = 'ê'; 
    entities['235'] = 'ë'; 
    entities['236'] = 'ì'; 
    entities['237'] = 'í'; 
    entities['238'] = 'î'; 
    entities['239'] = 'ï'; 
    entities['240'] = 'ð'; 
    entities['241'] = 'ñ'; 
    entities['242'] = 'ò'; 
    entities['243'] = 'ó'; 
    entities['244'] = 'ô'; 
    entities['245'] = 'õ'; 
    entities['246'] = 'ö'; 
    entities['247'] = '÷'; 
    entities['248'] = 'ø'; 
    entities['249'] = 'ù'; 
    entities['250'] = 'ú'; 
    entities['251'] = 'û'; 
    entities['252'] = 'ü'; 
    entities['253'] = 'ý'; 
    entities['254'] = 'þ'; 
    entities['255'] = 'ÿ'; 
    } 

    if (useQuoteStyle !== 'ENT_NOQUOTES') { 
    entities['34'] = '"'; 
    } 
    if (useQuoteStyle === 'ENT_QUOTES') { 
    entities['39'] = '''; 
    } 
    entities['60'] = '<'; 
    entities['62'] = '>'; 


    // ascii decimals to real symbols 
    for (decimal in entities) { 
    if (entities.hasOwnProperty(decimal)) { 
     hash_map[String.fromCharCode(decimal)] = entities[decimal]; 
    } 
    } 

    return hash_map; 
} 
+0

這是解決問題的一部分。我在輸出中看到「’ ”」等字符。我怎樣才能解決這個問題? –

+0

將它們添加到實體表 –

+0

謝謝。我發現這個網站的所有html實體http://htmlentities.net/html/entities/ –