2014-03-26 27 views



signers <- data.frame(
    first = 
     c("Benjamin","Thomas","Robert","George","Thomas","Jared","James","John","James","George","George","James","Edmund","George") , 
    last = 


text <- 
"A lot of people attended the Constitutional Convention in Philadephia, including Alexander Hamilton, Benjamin Franklin and John Adams. 
Not everyone who attended the convention ended up signing the Constitution, including George Wythe, John F. Mercer and Edmund Jennings Randolph who abstained." 


在本傑明富蘭克林和喬治Wythe的情況下,名稱完全在文本中。在Edmund Randolph的情況下,他的名字和姓氏之間有一個字或10個字符。


 first  last  inparagraph 
1 Benjamin Franklin  1 
2 Thomas Mifflin 
3 Robert  Morris 
4 George  Clymer 
5 Thomas Fitzsimons 
6  Jared Ingersoll 
7  James  Wilson 
8  John  Blair 
9  James Madison 
10 George Washington 
11 George  Mason 
12 James McClurg 
13 Edmund Randolph  1 
14 George  Wythe  1 


namesfinds <- lapply(signers$first , grep, text) 




patterns <- paste0("(.*)(", signers$first, "(\\s+[[:alpha:].]+){,3}\\s+", signers$last, ")(.*)") 
signers$inparagraph <- ifelse(sapply(patterns, grepl, text), "1", "") 


 first  last inparagraph 
1 Benjamin Franklin   1 
2 Thomas Mifflin    
3 Robert  Morris    
4 George  Clymer    
5 Thomas Fitzsimons    
6  Jared Ingersoll    
7  James  Wilson    
8  John  Blair   1 
9  James Madison    
10 George Washington    
11 George  Mason    
12 James McClurg    
13 Edmund Randolph   1 
14 George  Wythe   1 


unname(sapply(patterns, gsub, "\\2", text))[sapply(patterns, grepl, text)] 
# [1] "Benjamin Franklin"  "John W. F. Blair"   "Edmund Jennings Randolph" 
# [4] "George Wythe"  


text <- 
    "A lot of people attended the Constitutional Convention in Philadephia, including Alexander Hamilton, Benjamin Franklin and John Adams. 
Not everyone who attended the convention ended up signing the Constitution, including George Wythe, John F. Mercer and Edmund Jennings Randolph who abstained and John W. F. Blair ate cake" 

我知道它已經兩年了 - 但我非常感謝這個答案! – MatthewR


@MatthewR,沒有問題,我很欣賞這種讚賞;) – BrodieG



> a <- paste(signers[,1], signers[,2]) 
> pst <- paste(signers$first, ".*", signers$last, sep = "") 
> gg <- gsub("\\.\\*", " ", names(unlist(sapply(pst, grep, text)))) 
> signers$inparagraph <- ifelse(a %in% gg, "1", "") 
> signers 
##  first  last inparagraph 
## 1 Benjamin Franklin   1 
## 2 Thomas Mifflin   
## 3 Robert  Morris   
## 4 George  Clymer   
## 5 Thomas Fitzsimons   
## 6  Jared Ingersoll   
## 7  James  Wilson   
## 8  John  Blair   
## 9  James Madison   
## 10 George Washington   
## 11 George  Mason   
## 12 James McClurg   
## 13 Edmund Randolph   1 
## 14 George  Wythe   1