事實並非如此簡單,但希望這對其他人有所幫助。最大的難題是從preg_replace的回調函數返回非字符串。
謝謝所有誰試圖幫助!
class Parser {
public $ret=array();
function loadTemplateFromString($str){
$this->parsed_template=$this->tags=array();
if(!$str){
return;
}
var_dump($str);
/* First expand self-closing tags <?$tag?> -> <?tag?><?/tag?> */
$str=preg_replace('/<\?\$([\w]+)\?>/','<?\1?><?/\1?>',$str);
/* Next fix short ending tag <?tag?> <?/?> -> <?tag?> <?/?> */
$x=preg_replace_callback('/.*?<\?\/\?>/',function($x){
return preg_replace('/(.*<\?([^\/][\w]+)\?>)(.*?)(<\?\/?\?>)/',
'\1\3<?/\2?>',$x[0]);
},$str);
/* Finally recursively build tag structure */
$this->recursiveReplace($x);
}
function recursiveReplace($x){
if(is_array($x)){
// Called recursively
$tmp2=$this->ret;$this->ret=array();
}else{
$x=array(4=>$x);
$tmp2=null;
}
$y=preg_replace_callback('/(.*?)(<\?([^\/$][\w]+)\?>)(.*?)(<\?\/(\3)?\?>)(.*?)/',
array($this,'recursiveReplace'),$x[4]);
$this->ret[]=$y;
if($tmp2===null)return;
$tmp=$this->ret;
$this->ret=$tmp2;
$this->ret[]=$x[1];
$this->ret[]=$tmp;
return '';
}
}
$p=new Parser();
$p->loadTemplateFromString('bla <?name?> name <?/name?> bla bla <?$surname?> bla '.
'<?middle?> mm <?/?> blah <?outer?> you <?inner?> are <?/?> inside <?/outer?>'.
' bobobo');
var_dump($p->ret);
此輸出:
array
0 => string 'bla ' (length=4)
1 =>
array
0 => string ' name ' (length=6)
2 => string ' bla bla ' (length=9)
3 =>
array
0 => string '' (length=0)
4 => string ' bla ' (length=5)
5 =>
array
0 => string ' mm ' (length=4)
6 => string ' blah ' (length=6)
7 =>
array
0 => string ' you ' (length=5)
1 =>
array
0 => string ' are ' (length=5)
2 => string ' inside ' (length=8)
8 => string ' bobobo' (length=7)
你所描述語言,是不是定期。這意味着正則表達式不是一個合適的解決方案 – webbiedave 2011-05-09 21:43:40
添加了不能嵌套同名標籤的規則。 – romaninsh 2011-05-09 21:47:18
@webbiedave:*理論*正則表達式不合適,但現在大多數所謂的正則表達式實現也允許您匹配非常規語言。 – jwodder 2011-05-09 21:48:24