2013-02-28 35 views
2

我有一箇中文提供導出的文本文件。 該文本文件具有以下模式的許多條目。中文提供PHP preg_match_all

@article{ls_leimeister, 
    added-at = {2013-01-18T11:14:11.000+0100}, 
    author = {Wegener, R. and Leimeister, J. M.}, 
    biburl = {http://www.bibsonomy.org/bibtex/27bb26b4b4858439f81aa0ec777944ac5/ls_leimeister}, 
    journal = {International Journal of Technology Enhanced Learning (to appear)}, 
    keywords = {Challenges Communities: Factors Learning Success VirtualCommunity and itegpub pub_jml pub_rwe}, 
    note = {JML_390}, 
    title = {Virtual Learning Communities: Success Factors and Challenges}, 
    year = 2013 
} 

我想使用PHP和preg_match_all

考慮下面沒得到我的任何地方:

preg_match_all('/@^.*}$/', file_get_contents($file_path),$results); 

我想開始簡單的,但真正地沒有工作。 我有點新的PHP RegEx。

完美的最終輸出將是:

Array 
    (
     [0] => Array 
      (
       ['type'] => article 
       ['unique_name'] => ls_leimeister 
       ['added-at'] => 2013-01-18T11:14:11.000+0100 
       ['author'] => Wegener, R. and Leimeister, J. M. 
       ['biburl'] => http://www.bibsonomy.org/bibtex/27bb26b4b4858439f81aa0ec777944ac5/ls_leimeister 
       ['journal'] => International Journal of Technology Enhanced Learning (to appear) 
       ['keywords'] => Challenges Communities: Factors Learning Success VirtualCommunity and itegpub pub_jml pub_rwe 
       ['note'] => JML_390 
       ['title'] => Virtual Learning Communities: Success Factors and Challenges 
       ['year'] => 2013 
      ) 

     [1] => Array 
      (
       [...] => … 
      ) 

    ) 
+0

@renanbr [renanbr](https://stackoverflow.com/users/5249251/renanbr)推薦:renanbr/bibtex-parser https://github.com/renanbr/bibtex-parser(我認爲是他自己的發明)。 – mickmackusa 2017-12-12 03:15:08

+0

我已經看到的所有BibTex文檔都將年份值包裹在大括號中。這是發佈時的錯字嗎? – mickmackusa 2017-12-12 13:54:05

回答

2

試試這個:這裏我只進賬和typeunique_name,通過觀察它,你可以獲取所有其他人。

$str = '@article{ls_leimeister, 
    added-at = {2013-01-18T11:14:11.000+0100}, 
    author = {Wegener, R. and Leimeister, J. M.}, 
    biburl = {http://www.bibsonomy.org/bibtex/27bb26b4b4858439f81aa0ec777944ac5/ls_leimeister}, 
    journal = {International Journal of Technology Enhanced Learning (to appear)}, 
    keywords = {Challenges Communities: Factors Learning Success VirtualCommunity and itegpub pub_jml pub_rwe}, 
    note = {JML_390}, 
    title = {Virtual Learning Communities: Success Factors and Challenges}, 
    year = 2013 
}'; 

preg_match_all('/@(?P<type>\w+){(?P<unique_name>\w+),(.*)/',$str,$matches); 

echo $matches['type'][0]; 
echo "<br>"; 
echo $matches['unique_name'][0]; 
echo "<br>"; 

echo "<pre>"; 
print_r($matches); 

輸出數組格式與您的輸出數組格式稍有不同,但您可以將此格式更改爲您的格式。

+0

感謝這個工程,但其他行是比較困難的。 行數是可變的,也有一些線路有「{...},」和其他人不。 – Spurious 2013-02-28 12:25:03

+0

是的,我知道這很難,但你試着去做。 – 2013-02-28 12:30:00

+0

preg_match_all( '/ @(\ W +){(+),\ S +(\ S +)\ S + = \ S + {(*)},(*)/。',$ FILE_CONTENT,$結果); 這產生的第一行,以及。如何讓RegEx檢索具有相同格式的無限數量的行? 我需要讀出條目的匹配,然後爲不同的匹配做另一個preg_match。 – Spurious 2013-02-28 12:41:51

0

圖樣:/^@([^{]+)\{([^,]+),\s*$|^\s*([^\[email protected]=]+) = \{(.*?)}/msDemo

這種模式有兩個替代方案;每個包含兩個捕獲組。

  • typeunique_name被捕獲並存儲在元件[1][2]
  • 所有其他鍵 - 值對存儲在元素[3][4]

此分離的陣列存儲允許可靠的加工構建所期望的輸出陣列結構時。

輸入:

$bibtex='@BOOK{ko, 
    title = {Wissenschaftlich schreiben leicht gemacht}, 
    publisher = {Haupt}, 
    year = {2011}, 
    author = {Kornmeier, M.}, 
    number = {3154}, 
    series = {UTB}, 
    address = {Bern}, 
    edition = {4}, 
    subtitle = {für Bachelor, Master und Dissertation} 
} 

@BOOK{nial, 
    title = {Wissenschaftliche Arbeiten schreiben mit Word 2010}, 
    publisher = {Addison Wesley}, 
    year = {2011}, 
    author = {Nicol, N. and Albrecht, R.}, 
    address = {München}, 
    edition = {7} 
} 

@ARTICLE{shome, 
    author = {Scholz, S. and Menzl, S.}, 
    title = {Alle Wege führen nach Rom}, 
    journal = {Medizin Produkte Journal}, 
    year = {2011}, 
    volume = {18}, 
    pages = {243-254}, 
    subtitle = {ein Vergleich der regulatorischen Anforderungen und Medizinprodukte 
    in Europa und den USA}, 
    issue = {4} 
} 

@INBOOK{shu, 
    author = {Schulz, C.}, 
    title = {Corporate Finance für den Mittelstand}, 
    booktitle = {Praxishandbuch Firmenkundengeschäft}, 
    year = {2010}, 
    editor = {Hilse, J. and Netzel, W and Simmert, D.B.}, 
    booksubtitle = {Geschäftsfelder Risikomanagement Marketing}, 
    publisher = {Gabler}, 
    pages = {97-107}, 
    location = {Wiesbaden} 
}'; 

方法:(Demo

$pattern='/^@([^{]+)\{([^,]+),\s*$|^\s*([^\[email protected]=]+) = \{(.*?)}/ms'; 
if(preg_match_all($pattern,$bibtex,$out,PREG_SET_ORDER)){ 
    foreach($out as $line){ 
     if(isset($line[1])){ 
      if(!isset($line[3])){ // this is the starting line of a new set 
       if(isset($temp)){ 
        $result[]=$temp; // send $temp data to permanent storage 
       } 
       $temp=['type'=>$line[1],'unique_name'=>$line[2]]; // declare fresh new $temp 
      }else{ 
       $temp[$line[3]]=$line[4]; // continue to store the $temp data 
      } 
     } 
    } 
    $result[]=$temp; // store the final $temp data 
} 
var_export($result); 

輸出:

array (
    0 => 
    array (
    'type' => 'BOOK', 
    'unique_name' => 'ko', 
    'title' => 'Wissenschaftlich schreiben leicht gemacht', 
    'publisher' => 'Haupt', 
    'year' => '2011', 
    'author' => 'Kornmeier, M.', 
    'number' => '3154', 
    'series' => 'UTB', 
    'address' => 'Bern', 
    'edition' => '4', 
    'subtitle' => 'für Bachelor, Master und Dissertation', 
), 
    1 => 
    array (
    'type' => 'BOOK', 
    'unique_name' => 'nial', 
    'title' => 'Wissenschaftliche Arbeiten schreiben mit Word 2010', 
    'publisher' => 'Addison Wesley', 
    'year' => '2011', 
    'author' => 'Nicol, N. and Albrecht, R.', 
    'address' => 'München', 
    'edition' => '7', 
), 
    2 => 
    array (
    'type' => 'ARTICLE', 
    'unique_name' => 'shome', 
    'author' => 'Scholz, S. and Menzl, S.', 
    'title' => 'Alle Wege führen nach Rom', 
    'journal' => 'Medizin Produkte Journal', 
    'year' => '2011', 
    'volume' => '18', 
    'pages' => '243-254', 
    'subtitle' => 'ein Vergleich der regulatorischen Anforderungen und Medizinprodukte 
    in Europa und den USA', 
    'issue' => '4', 
), 
    3 => 
    array (
    'type' => 'INBOOK', 
    'unique_name' => 'shu', 
    'author' => 'Schulz, C.', 
    'title' => 'Corporate Finance für den Mittelstand', 
    'booktitle' => 'Praxishandbuch Firmenkundengeschäft', 
    'year' => '2010', 
    'editor' => 'Hilse, J. and Netzel, W and Simmert, D.B.', 
    'booksubtitle' => 'Geschäftsfelder Risikomanagement Marketing', 
    'publisher' => 'Gabler', 
    'pages' => '97-107', 
    'location' => 'Wiesbaden', 
), 
) 

這裏是the site我提取新的採樣輸入的字符串從。