我之前問過類似的問題,並認爲我有正確的答案,但後來意識到我正在捕獲一些我不應該的字符串。Javascript正則表達式忽略第一捕獲組
我想解析一個大的文本文件,並用正則表達式拉出某些元素。我爲我的網站使用Node,所以我在Javascript中執行此操作。
在下面的示例中,我試圖用逗號和句點匹配10個數字串。在第一個例子中,我匹配正確的模式,但我捕獲兩個外圍的字符串(我只想要以「4 0000 ....」開頭的行末尾的數字)。
https://regex101.com/r/nO8nM1/8
在這個例子中,我匹配字符串的適合的情況下,但我不能夠忽略第一個捕獲組,所以其他的字符和空格都包括在內。
https://regex101.com/r/uB6hE4/1
正則表達式:
/(\d+,\d+.\d+)(?=")|(\d+,\d+,\d+.\d+)(?=")/gm
樣本數據:
23205 - Grants Current-County Operatin 4,425,327.00"
" 4 0000047387 Central Equatoria State 1003-1478 Sta Hosp Oper Oct 85,784.00"
" 4 0000047442 EASTERN EQUATORIA ST 1003-1479 Sta Hosp Oper Oct 93,137.00"
" 4 0000047485 JONGLEI STATE 1003-1519 Sta Hosp Oper Oct 144,608.00"
" 4 0000047501 Lakes State 1003-1482 Sta Hosp Oper Oct 93,137.00"
" 4 0000047528 Unity State 1003-1484 Sta Hosp Oper Oct 75,980.00"
" 4 0000047532 Northern Bahr-el State 1003-1483 Sta Hosp Oper Oct 58,824.00"
" 4 0000047615 Western E State 1003-1488 Sta Hosp Oper Oct 93,137.00"
" 4 0000047638 Warap State 1003-1486 Sta Hosp Oper Oct 51,471.00"
" 4 0000047680 Upper Nile State 1003-1485 Capitation 102,941.00"
" 4 0000047703 Western BG State 1003-1487 Sta Hosp Oper Oct 34,314.00"
----------------------
" Total For Period 4 833,333.00"
----------------------------------------------------------------------------------------------------------------------------
Fiscal Year 2015/16 Republic Of South Sudan Date 2015/11/20
Period 5 Time 12:58:40
FreeBalance Financial Management System Page 7
----------------------------------------------------------------------------------------------------------------------------
Vendor Analysis Report
1091 Health (MOH)
Prd Voucher # Vendor Name Description Amount
--- ---------------- ------------------------------ ----------------------------- ----------------------
----------------------
"
(\d+,\d+,\d+.\d+)(?=")
正則表達式2:
/(?:\s\w{3}\s+|Capitation\s+)(\d+,\d+.\d+)(?=")|(?:\s\w{3}\s+|Capitation\s+)(\d+,\d+,\d+.\d+)(?=")/gm
在我的代碼我推這些值對象的數組如果他們在場。我只嘗試推送與我想要的相關的匹配組,但它只會從匹配中推送索引項。
我度過了困難時期嘗試的?:
,?=
,並?!
忽略第一個捕獲組中的第二個鏈接無濟於事幾種不同的組合。我覺得解決方案必須相當簡單,但我不能完全達到目的。任何想法我做錯了什麼?
我的代碼:
var openFile = function(event) {
var input = event.target;
var reader = new FileReader();
reader.onload = function() {
var text = reader.result;
// console.log(text.substring(0, 999999999999999));
var section = text.substring(0, 9999999999999999);
var subSection = [];
console.log(typeof subSection);
var masterArray = new Object();
var uploadDate = "";
var period = "";
var transferArray = [];
var subSectionRegex =/ Total([\s\S]*?)Total|^\s+\d{4,5}([\s\S]*?)Total F/gm;
var transferCodeRegex = /[0-9]{4,5}/;
var voucherNumberRegex = /([0-9]{7,10}[\S])(?=\s+)/g;
var vendorRegex = /(?!\d{10})(\S+\s\S+(\s\S+)?)(?=\s+100)|(?!\d{10})(\S+(\s\S+)?)(?=\s+100)/gm;
var descriptionRegex = /(?!\d{10})(\S+\s\S+(\s\S+)?)(?=\s+100)|(?!\d{10})(\S+(\s\S+)?)(?=\s+100)|(?!\d{10})(\S+\s(\s\S+)?)(?=\s+100)/g;
// var descriptionRegex = /(\d{4}-\d{4})(\D+)*\s\D/g;
var amountRegex = /(?:\s\w{3}\s+|Capitation\s+)(\d+,\d+.\d+)(?=")|(?:\s\w{3}\s+|Capitation\s+)(\d+,\d+,\d+.\d+)(?=")/gm;
// var amountRegex = /(\d+,\d+.\d+)(?=")|(\d+,\d+,\d+.\d+)(?=")/gm;
// var amountRegex = /\w\s{10,20}(\d+(?:,\d{3})*\.\d+)/gm;
var oneLineAmountRegex = /(\d+,\d+,\d+.\d+)|\d+,\d+.\d+/g;
var oneLineDescRegex =/- (\D+)|- \d+(\D+)/gm;
var allData = [{}];
console.log('section: ' + typeof section);
subSection = section.match(subSectionRegex);
subSection = subSection.filter(Boolean);
console.log(typeof subSection);
function extractDate() {
uploadDate = section.match(/Date (.*)/)[1].trim();
uploadDate = new Date(uploadDate);
allData["uploadDate"] = uploadDate;
}
extractDate();
// console.log(allData.uploadDate);
function extractPeriod() {
period = section.match(/Period (.*)/)[1].trim();
period = period.split(" ");
period = period[0];
period = parseInt(period);
// console.log("period: " + period);
allData["period"] = period;
}
extractPeriod();
// console.log(allData.period);
function extractDetails() {
for(var i = 0; i < subSection.length; i++) {
if(subSection[i].match(transferCodeRegex) && subSection[i].match(voucherNumberRegex) && subSection[i].match(vendorRegex) && subSection[i].match(descriptionRegex) && subSection[i].match(amountRegex)) {
transferArray.push({
"transferCode": subSection[i].match(transferCodeRegex),
"details": [{
"voucherNumber": subSection[i].match(voucherNumberRegex),
"vendor": subSection[i].match(vendorRegex),
"description": subSection[i].match(descriptionRegex),
"total": subSection[i].match(amountRegex)
}]
})
} else {
transferArray.push({
"transferCode": subSection[i].match(transferCodeRegex),
"details": [{
"voucherNumber": subSection[i].match(voucherNumberRegex),
"description": subSection[i].match(oneLineDescRegex),
"total": subSection[i].match(oneLineAmountRegex)
}]
})
}
}
}
function removeNulls(obj) {
var isArray = obj instanceof Array;
for(var k in obj) {
console.log('k: ' + k);
if(obj[k] === null || obj[k] === undefined) isArray ? obj.splice(k, 1) : delete obj[k];
else if (typeof obj[k] === "object") removeNulls(obj[k]);
}
}
removeNulls(transferArray);
console.log(transferArray);
console.log(JSON.stringify(transferArray, null, 2))
function cleanData() {
transferArray.forEach(function(e) {
console.log(e)
e.details.forEach(function(evt) {
console.log(evt)
console.log(evt.amount)
console.log(evt.description)
for(i = 0; i < evt.amount.length; i++) {
// evt.amount[i] = evt.amount[i].toString();
// evt.amount[i] = evt.amount[i].replace(/^[a-zA-Z]\s+/g, '');
evt.amount[i] = parseFloat(evt.amount[i].replace(/\,/g, ""));
}
for(i = 0; i < evt.description.length; i++) {
evt.description[i] = evt.description[i].toString();
evt.description[i] = evt.description[i].trim();
}
return(evt);
})
// console.log(evt.amount);
// console.log(evt.description);
});
}
cleanData();
console.log(transferArray);
console.log(transferArray);
//adds detailed data to allData array
allData["section"] = transferArray;
extractDetails();
console.log(allData);
function pushArrayToObject() {
}
};
reader.readAsText(input.files[0]);
};
從你寫的是什麼,很明顯你正在試圖從JS得到一個回顧後正則表達式引擎** JS不支持lookbehind **。恕我直言,['(?:\ s \ w {3} \ s + | Capitation \ s +)(\ d +(?:,\ d {3})* \。\ d +)「'](https://regex101.com/r/uH9uI5/1)是這裏使用的最好的正則表達式。這個問題只有你的代碼....我不能在問題中看到。 –
啊,對不起。我認爲它會顯示在鏈接中。我現在將在編輯中添加代碼。 –