2013-03-01 40 views
1

好吧,我經常覺得,根據我已經發布的問題的數量,曾經使用ANTLR的最愚蠢的人,但是在這裏我再次要求幫助。ANTLR:在空白處理中破壞的政策

我最終試圖重寫一個現有的策略來簡化它,只讓「簡化」的人決定在應該發送到HIDDEN頻道的空格上彈出(skip()沒有工作其一)。它可能只是無序的Lexer令牌,但我很難(可能我沒有很好的理解如何指定順序)。

總之,這裏的整個(有點消毒)政策:

grammar ValidatingPolicy; 

options { 
    language = Java; 
    backtrack = true; 
} 

// package and imports for the parser 
@parser::header { 
package org.jason.manager.impl; 

import org.jason.manager.RecognitionRuntimeException; 
import org.slf4j.Logger; 
import org.slf4j.LoggerFactory; 
} 

// package and imports for the lexer 
@lexer::header { 
package org.jason.manager.impl; 

import org.slf4j.Logger; 
import org.slf4j.LoggerFactory; 
} 

// member functions and fields for the parser 
@parser::members { 

private static final Logger log = LoggerFactory.getLogger(ValidatingPolicyParser.class); 
@Override 
protected Object recoverFromMismatchedToken(IntStream input, int ttype, BitSet follow) throws RecognitionException { 
    throw new MismatchedTokenException(ttype, input); 
} 

    @Override 
public Object recoverFromMismatchedSet(IntStream input, RecognitionException e, BitSet follow) throws RecognitionException { 
    throw e; 
} 

@Override 
public String getErrorMessage(RecognitionException e, String[] tokenNames) { 
    // wrap in a runtime exception to escape ANTLR's dungeon 
    throw new RecognitionRuntimeException(e); 
} 
} 

// member functions and fields for the lexer 
@lexer::members { 
    private static final Logger log = LoggerFactory.getLogger(ValidatingPolicyLexer.class); 
} 

// validate a group of SHOW constructs 
showGroup 
    : show+ EOF 
    ; 

// validate a construct WITHOUT show (MINQ, MOS, etc) 
noShow 
    : simpleIfStatement+ EOF 
    ; 

// validate a SHOW construct (COMP or ELIG validation) 
show 
    : SHOW STRING FOR simpleIfStatement+ 
    ; 

// handle an if statement 
simpleIfStatement 
    // basic if statement 
    : IF chainedOperation THEN operationGroup (ELSE operationGroup)? ENDIF 
    // if statement with recursive if statement in THEN or ELSE block 
    | IF chainedOperation THEN simpleIfStatement (ELSE simpleIfStatement)? ENDIF 
    | operationGroup 
    ; 

// aggregate multiple operations. When evaluated, there is an implicit AND 
// when there are multiple groups 
operationGroup 
    : chainedOperation+ 
    ; 

// chain an operation together optionally with AND/OR 
chainedOperation 
    @init { 
    log.info("Entered chainedOperation"); 
    } 
    : operation (AND operation | OR operation)* 
    ; 

// aggregate into a single rule that can be referenced up the chain 
operation 
    @init { 
    log.info("Entered operation"); 
    } 
    // legal operation 
    : (booleanLogical | stringLogical | integerLogical | dateLogical | datePeriodLogical) 
    ; 

// LOGICAL OPERATIONS 
// Logical operators do not have a pass through, but may have limits 
// on which particular operators can be used 

// compare DATE/DATE_FIELD to DATE/DATE_FIELD 
dateLogical 
    @init { 
    log.info("Entered dateLogical"); 
    } 
    : dateOp (EQ|NE|LT|LE|GT|GE) dateOp 
    ; 

// compare DATE_PERIOD/DATE_PERIOD_CONSTANT/DATE_PERIOD_FIELD 
datePeriodLogical 
    @init { 
    log.info("Entered datePeriodLogical"); 
    } 
    : datePeriodOp (EQ|NE|LT|LE|GT|GE) datePeriodOp 
    ; 

// compare INTEGER_FIELD/INTEGER 
integerLogical 
    @init { 
    log.info("Entered integerLogical"); 
    } 
    : integerOp (EQ|NE|LT|LE|GT|GE) integerOp 
    ; 

// compare BOOLEAN_FIELD/BOOLEAN_CONSTANT 
booleanLogical 
    : booleanOp (EQ|NE) booleanOp 
    ; 

// compare STRING_FIELD/STRING 
stringLogical 
    : stringOp (EQ|NE|LT|LE|GT|GE) stringOp 
    { 
    System.out.println("stringLogical: matched rule 1"); 
    } 
    ; 

dateOp 
    @init { 
    log.info("Entered dateOp"); 
    } 
    // pass through if no math op needs to be performed 
    : DATE_FIELD|DATE|DATE_CONSTANT 
    // match a legal math op 
    | DATE_FIELD|DATE|DATE_CONSTANT ((PLUS|MINUS) DATE_FIELD|DATE|DATE_CONSTANT|DATE_PERIOD_FIELD|DATE_PERIOD_CONSTANT (' ' DATE_PERIOD_CONSTANT)*)* 
    ; 

datePeriodOp 
    // pass through if no math op needs to be performed 
    : DATE_PERIOD_FIELD|DATE_PERIOD_CONSTANT 
    // match a legal math op 
    | DATE_PERIOD_FIELD ((PLUS|MINUS) DATE_FIELD|DATE|DATE_CONSTANT|DATE_PERIOD_FIELD|DATE_PERIOD_CONSTANT+)* 
    ; 

integerOp 
    @init { 
    log.info("Entered integerOp"); 
    } 
    // pass through if no math op needs to be performed 
    : INTEGER_FIELD | INTEGER 
    // match a legal math op 
    | INTEGER_FIELD (PLUS|MINUS INTEGER_FIELD|INTEGER)* 
    ; 

// booleanOp, stringOp, and waiverOp don't do anything since + and - ops are not 
// supported for them 
booleanOp 
    : BOOLEAN_FIELD | BOOLEAN_CONSTANT 
    ; 

stringOp 
    : STRING_FIELD | STRING 
    ; 

// these items are not directly referenced by parser rules, so they 
// can be fragments 

fragment DIGIT: ('0'..'9'); 
fragment DATE: ; 
fragment DATE_PERIOD_CONSTANT: DIGIT+ ' '+ (YEAR | MONTH | WEEK | DAY); 
YEAR: ('YEAR'|'YEARS'); 
MONTH: ('MONTH'|'MONTHS'); 
WEEK: ('WEEK'|'WEEKS'); 
DAY: ('DAY'|'DAYS'); 

DATE_FIELD:('DOB'|'TEST_DATE'); 
DATE_PERIOD_FIELD:('EMPLOYMENT_PERIOD'); 
BOOLEAN_FIELD:('CERTIFIED'); 
INTEGER_FIELD:('AGE'|'OPTION'); 
STRING_FIELD:('STATE'|'UF_USERID'|'USER_LEVEL'); 

// various tokens can't be fragments since they are directly referenced by parser rules 
COMMENT_START: ';'; 
BOOLEAN_CONSTANT: ('TRUE'|'FALSE'|'"Y"'|'"N"'); 
DATE_CONSTANT:('TODAY'|'YESTERDAY'|'TOMMOROW'); 
SHOW: 'SHOW'; 
FOR: 'FOR'; 
IF: 'IF'; 
THEN: 'THEN'; 
ELSE: 'ELSE'; 
ENDIF: 'ENDIF'; 
AND: 'AND'; 
OR: 'OR'; 
EQ: '='; 
NE: '<>'; 
LT: '<'; 
LE: '<='; 
GT: '>'; 
GE: '>='; 
NOT: 'NOT'; 
HAS: 'HAS'; 
PLUS: '+'; 
MINUS: '-'; 

// Commented ifs seem to take more than one line, even if comments are 
// only supposed to be a single line 
COMMENTED_IF: COMMENT_START WS* IF (options {greedy=false;} : .)* ENDIF '\r\n' 
{ 
    log.info("Lexer: matched COMMENTED IF" + getText()); 
    $channel=HIDDEN; 
    //skip(); 
}; 

// Handle an empty comment such as "; " 
EMPTY_COMMENT: COMMENT_START WS* '\r\n' 
{ 
    log.info("Lexer: matched EMPTY_COMMENT: " + getText()); 
    $channel=HIDDEN; 
}; 

// Handle a single-line comment. Policies often end with a comment, so be ready for it 
SINGLE_COMMENT: COMMENT_START ~('\r'|'\n')* (('\r\n')+| EOF) 
{ 
    log.info("Lexer: matched SINGLE_COMMENT: " + getText()); 
    $channel=HIDDEN; 
}; 

INTEGER 
    // Bart Kiers on SO helped me with this one, basically handle a date period such as 
    // 4 WEEKS, 1 YEAR 6 MONTHS 2 WEEKS 8 DAYS, etc 
: (DATE_PERIOD_CONSTANT)=> DATE_PERIOD_CONSTANT ((' '+ DATE_PERIOD_CONSTANT)=> ' '+ DATE_PERIOD_CONSTANT)* 
    { 
     // manually switch the type from INTEGER to DATE_PERIOD_CONSTANT 
    $type=DATE_PERIOD_CONSTANT; 
    log.info("Matched DATE_PERIOD_CONSTANT: " + getText()); 
    } 
| DIGIT+ 
    { 
     // match a 6-digit or 8-digit date format (20120101 or 201201) 
    if ($text.matches("(19|20|21)[0-9]{2}[0-1]\\d{3}") || $text.matches("(19|20|21)\\d{2}(0[1-9]|1[0-2])")) { 
     log.info("Matched DATE pattern: " + getText()); 
     $type = DATE; 
    } else { 
     log.info("Matched INTEGER: " + getText()); 
    } 
    } 
; 

STRING 
    : '"' ID (' ' ID)* '"' 
    ; 

ID: ('A'..'Z'|'a'..'z'|DIGIT|','|'!'|'?'|':')+; 

WS: (' '+|'\r'|'\n'|'\t') 
{ 
    //skip(); 
    $channel=HIDDEN; 
}; 

「秀」的結構應該是這個樣子:

SHOW "DOES NOT MEET AGE REQUIREMENTS FOR EMPLOYMENT" FOR 
    AGE < 18 

SHOW "TOO YOUNG FOR CERTIFICATION IN KY" FOR 
    IF STATE="KY" THEN AGE > 21 ENDIF 

它的工作原理,當我刪除空格,如各地字符串,或來自運營商周圍等。

此外,如果有人在語法中看到任何其他愚蠢行爲,我會很高興聽到他們。

傑森

回答

1

你的詞法分析器是在暗示,不願透露姓名的詞法規則匹配的空間。此詞法分析程序規則在解析器規則dateOp引用:

dateOp 
    //... 
    // pass through if no math op needs to be performed 
    : DATE_FIELD|DATE|DATE_CONSTANT 
    // match a legal math op 
    | DATE_FIELD|DATE|DATE_CONSTANT 
    ((PLUS|MINUS) DATE_FIELD|DATE|DATE_CONSTANT|DATE_PERIOD_FIELD|DATE_PERIOD_CONSTANT 
     (' ' DATE_PERIOD_CONSTANT)* //<--- ' ' becomes a new lexer rule 
    )* 
    ; 

它如同普通詞法規則,所以與該輸入:

SHOW "DOES NOT MEET AGE REQUIREMENTS FOR EMPLOYMENT" FOR 
    AGE < 18 

詞法分析器產生這些令牌:

[SHOW : SHOW] [' ' : ] [STRING : "DOES NOT MEET AGE REQUIREMENTS FOR EMPLOYMENT"] 
[' ' : ] [FOR : FOR] [INTEGER_FIELD : AGE] [' ' : ] [LT : <] [' ' : ] 
[INTEGER : 18] 

請注意0​​令牌。這些是工作中隱含的詞法分析規則。解析器並不期待dateOp規則以外的這些標記,因此解析gags。

從解析器規則dateOp取出' '後,輸入上面產生下列標記,符合市場預期:

[SHOW : SHOW] [STRING : "DOES NOT MEET AGE REQUIREMENTS FOR EMPLOYMENT"] 
[FOR : FOR] [INTEGER_FIELD : AGE] [LT : <] 
[INTEGER : 18] 

我不知道是否去除' 'dateOp是你的語法或不能接受的。如果需要明確測試空間,請考慮重新編寫可將空白測試移入詞法分析器的內容。或者,解析器可以預見下一個標記是否是隱藏的WS標記。不過,對於初學者,我建議儘可能最好地清理dateOp,看看事情的落地。