2017-04-16 59 views
2

我目前在學習Node.js,javascript等等。我來自C++。Javascript文本數組解析

我需要解析的陣列如:

====================================================================================================== 
No. Name     Cask    Current   Latest   Auto-Update State 
====================================================================================================== 
1/38 5KPlayer    5kplayer   latest   latest 
2/38 Adobe Photoshop CC  adobe-photoshop-cc 16    16 
3/38 Alfred     alfred    3.3.1_806  3.3.2_818  Y   ignored 
4/38 AppCleaner    appcleaner   3.4    3.4    Y   ignored 
5/38 Github Atom   atom    1.15.0   1.15.0   Y   ignored 
6/38 BetterZipQL   betterzipql   latest   latest 
7/38 Boom     boom    1.6,1490693621 1.6,1490693621 
8/38 CheatSheet    cheatsheet   1.2.7   1.2.7 
9/38 Cyberduck    cyberduck   5.4.0.23761  5.4.0.23761 
10/38 Dropbox    dropbox    21.4.25   latest   Y   ignored 

這是安裝了Mac上應用的列表,每個應用1行。

如果該應用程序已過期('current'!='latest'),我會保留該行併爲其做後期處理。

我想出了一個骯髒的 - 可是working-解決方案:

function parseBrewCUArray(array) { 
    var toUpdate = []; 
    var lines = array.split('\n'); 

    //remove useless lines 
    lines = lines.slice(3); 
    for (var i=0; i<lines.length; i++) { 
     splittedLine = lines[i].split(/[ ]{2,}/); 
     if (splittedLine[3] != splittedLine[4]) { 
      toUpdate.push(splittedLine) 
      console.log(splittedLine); 
     } 
    } 
} 

但是,必須有一個非常更好的解決方案在那裏!有人可以優化這一點,使這段代碼更美麗?

回答

1

你的代碼可以簡化爲以下幾點:

//keeps only the header and the rows where Current !== Latest 
 
function parseBrewCUArray(str) { 
 
    return str.split('\n').filter((row, index) => { 
 
     if(index < 3) return true; 
 
     
 
     var cols = row.split(/ {2,}/); 
 
     return cols[3] !== cols[4] 
 
    }).join("\n"); 
 
} 
 

 
var s = `====================================================================================================== 
 
No. Name     Cask    Current   Latest   Auto-Update State 
 
====================================================================================================== 
 
1/38 5KPlayer    5kplayer   latest   latest 
 
2/38 Adobe Photoshop CC  adobe-photoshop-cc 16    16 
 
3/38 Alfred     alfred    3.3.1_806  3.3.2_818  Y   ignored 
 
4/38 AppCleaner    appcleaner   3.4    3.4    Y   ignored 
 
5/38 Github Atom   atom    1.15.0   1.15.0   Y   ignored 
 
6/38 BetterZipQL   betterzipql   latest   latest 
 
7/38 Boom     boom    1.6,1490693621 1.6,1490693621 
 
8/38 CheatSheet    cheatsheet   1.2.7   1.2.7 
 
9/38 Cyberduck    cyberduck   5.4.0.23761  5.4.0.23761 
 
10/38 Dropbox    dropbox    21.4.25   latest   Y   ignored`; 
 

 
console.log(parseBrewCUArray(s));

但通常我寧願先解析字符串轉換成一些可用的數據結構,然後與

繼續

// first the utilities: 
 

 
//most of the time I want null and undefined to be cast to an empty String not to "null"/"undefined". 
 
var string = value => value == null? "": String(value); 
 

 
//a utility to define replacements 
 
var replace = (pattern, replacement="") => value => string(value).replace(pattern, replacement); 
 

 
//escapes special chars that have a special meaning in Regular expressions 
 
var escapeForRegex = replace(/[\-\[\]\/\{\}\(\)\*\+\?\.\\\^\$\|]/g, "\\$&"); 
 

 
//RegExp#exec() is awful to be used manually 
 
//too much boilerplate code and I ended too many times in an infinite loop 
 
function matchAll(haystack, needle){ 
 
    var str = string(haystack), 
 
     regex = needle instanceof RegExp? 
 
      needle: 
 
      new RegExp(escapeForRegex(needle), "g"), 
 
     results = [], 
 
     lastMatchIndex = NaN, 
 
     match; 
 
    
 
    while((match = regex.exec(str)) && lastMatchIndex !== match.index){ 
 
     results.push(match); 
 
     lastMatchIndex = match.index; 
 
    } 
 
    return results; 
 
} 
 

 

 
//a generic function that takes a table where columns ain't defined by a seperator but by their alignment 
 
//removes every row that doesn't contain at least one letter or number 
 
//parses the first row to determine the column names and their offsets 
 
//returns an array of objects with the column names as properties 
 
//doesn't handle tabs, because there are too many standards about how wide a tab may be 
 
function parseTableByAlignment(str, allowSingleSpacesInTitle=false){ 
 
    var end, 
 
     rows = string(str).match(/[^\r\n]*[a-zA-Z0-9][^\r\n]*/g), 
 
     removeTrailingDots = replace(/[\.:\s]+$/, ""), 
 
     parseRow = new Function("row", "return {\n" + matchAll(rows.shift(), allowSingleSpacesInTitle? /\S+(?: \S+)*/g: /\S+/g) 
 
      .reduceRight((acc, match) => { 
 
       var row = JSON.stringify(removeTrailingDots(match[0])) + ": row.substring(" + match.index + ", " + end + ").trim()"; 
 
       end = match.index; 
 
       return " " + row + ",\n" + acc; 
 
     }, "}")); 
 

 
    return rows.map(parseRow); 
 
} 
 

 
var s = ` 
 
====================================================================================================== 
 
No. Name     Cask    Current   Latest   Auto-Update State 
 
====================================================================================================== 
 
1/38 5KPlayer    5kplayer   latest   latest 
 
2/38 Adobe Photoshop CC  adobe-photoshop-cc 16    16 
 
3/38 Alfred     alfred    3.3.1_806  3.3.2_818  Y   ignored 
 
4/38 AppCleaner    appcleaner   3.4    3.4    Y   ignored 
 
5/38 Github Atom   atom    1.15.0   1.15.0   Y   ignored 
 
6/38 BetterZipQL   betterzipql   latest   latest 
 
7/38 Boom     boom    1.6,1490693621 1.6,1490693621 
 
8/38 CheatSheet    cheatsheet   1.2.7   1.2.7 
 
9/38 Cyberduck    cyberduck   5.4.0.23761  5.4.0.23761 
 
10/38 Dropbox    dropbox    21.4.25   latest   Y   ignored 
 
`; 
 

 
var data = parseTableByAlignment(s); 
 
console.log(data.filter(item => item.Current !== item.Latest));

使用函數構造函數並將字符串評估爲代碼...好吧,你會得到大多數否定的回答。所以我添加了第二個實現parseTableByAlignment()沒有這個函數的構造函數。結果仍然是:

function parseTableByAlignment(str, allowSingleSpacesInTitle=false){ 
    var previousColumn, 
     rows = string(str).match(/[^\r\n]*[a-zA-Z0-9][^\r\n]*/g), 
     removeTrailingDots = replace(/[\.:\s]+$/, ""), 
     columns = matchAll(rows.shift(), allowSingleSpacesInTitle? /\S+(?: \S+)*/g: /\S+/g) 
      .map(match => { 
       if(previousColumn) previousColumn.end = match.index; 
       return previousColumn = { 
        name: removeTrailingDots(match[0]), 
        start: match.index, 
        end: undefined 
       }; 
      }); 

    return rows.map(row => columns.reduce((obj, column) => { 
     obj[column.name] = row.substring(column.start, column.end).trim(); 
     return obj; 
    }, {})); 
} 
+0

非常感謝這個答案,超出了我的預期。我非常喜歡JSON化的方法。 – deadbird