2017-07-04 73 views
2

在上一個問題(需要鏈接)我嘗試從Twitter刪除hastags和評論。我的字符串是這樣的文字:正則表達式刪除等

@lien_ayy92 % Real Avail▶#Jakarta #Bekasi Excl/Incl Expo▶6-7 Juli #Cirebon Wajib DP Cek BIO https://local.com/ 

我想嘗試刪除不重要的圖標和文本。

我從previus代碼

let clean = function (data) { 
 
\t data = data.replace(/(?:https?|ftp):\/\/[\n\S]+|\B[@#]\w+\b|\b\w+[@#]\B|\B[^\w\s]{2,}\B|\b[a-zA-Z]{1,3}\b|[0-9]+|[$&+,:;[email protected]#|'<>.^*()%!-/]|\ud83d[\ude00-\ude4f]/g, ''); 
 
\t return data; 
 
} 
 
let stopwords = function (docs) { 
 
    \t docs = clean(docs); 
 
\t docs = docs.trim(); \t 
 
    docs = docs.toLowerCase(); \t 
 
\t docs = docs.split(' '); 
 
    let wordsstop = ['about']; 
 
\t let docs1 = new Array; 
 
    var x = 0; 
 
    for(let i = 0; i < docs.length; i++){ 
 
     if(wordsstop.indexOf(docs[i]) !== -1 || docs[i] == ""){ 
 

 
     }else{ 
 
     docs1[x] = docs[i] 
 
     x++; 
 
     } 
 
    } 
 
    return docs1; 
 
} 
 

 
console.log(stopwords('@lien_ayy92 % Real Avail▶#Jakarta #Bekasi Excl/Incl Expo▶6-7 Juli #Cirebon Wajib DP Cek BIO https://local.com about data'));

代碼我想結果是這樣的:

["real","juli","data"]; 
+0

這聽起來像你應該尋找字典中的字...不使用正則表達式。 –

+0

請參閱https://jsfiddle.net/ktpam6zu/ –

回答

1

您可以使用Array.prototype.filter刪除與特殊字符的所有項目。

let clean = function (data) { 
 
\t data = data.replace(/(?:https?|ftp):\/\/[\n\S]+|\B[@#]\w+\b|\b\w+[@#]\B|\B[^\w\s]{2,}\B|\b[a-zA-Z]{1,3}\b|[0-9]+|[$&+,:;[email protected]#|'<>.^*()%!-/]|\ud83d[\ude00-\ude4f]/g, ''); 
 
\t return data; 
 
} 
 
let stopwords = function (docs) { 
 
    \t docs = clean(docs); 
 
\t docs = docs.trim(); \t 
 
    docs = docs.toLowerCase(); \t 
 
\t docs = docs.split(' '); 
 
    let wordsstop = ['about']; 
 
\t let docs1 = new Array; 
 
    var x = 0; 
 
    for(let i = 0; i < docs.length; i++){ 
 
     if(wordsstop.indexOf(docs[i]) !== -1 || docs[i] == ""){ 
 

 
     }else{ 
 
     docs1[x] = docs[i] 
 
     x++; 
 
     } 
 
    } 
 
    
 
    // filter code below 
 
    var resultDocs = docs1.filter(function(data) { 
 
    var tmp = data.replace(/[a-zA-Z$&+,:;[email protected]#|'<>.^*()%!-/]/g, ''); 
 
    if (tmp.length === 0) { 
 
     return true; 
 
    } 
 
    }); 
 

 
    return resultDocs; 
 

 
} 
 

 
console.log(stopwords('@lien_ayy92 % Real Avail▶#Jakarta #Bekasi Excl/Incl Expo▶6-7 Juli #Cirebon Wajib DP Cek BIO https://local.com about data'));