利用PHP实现词法分析器与自定义语言

2022-09-03 16:32:12 63 0
魁首哥

目录

  • 一、分析需求
  • 二、实现需求
  • 三、如何使用
  • 四、使用场景

之前项目有一个需求,业务人员使用中文编写一些自定义公式,然后需要我们后台执行将结果返回到界面上,于是就基于有限状态机写了这个词法分析器,比较简单,希望能够抛砖引玉。

利用PHP实现词法分析器与自定义语言

一、分析需求

输入中文公式,返回结果,比如:

现有薪资=10000;
个税起点=3000;
当前年份=2021;
如果(当前年份=2022){
个税起点=5000;
}
返回(现有薪资-个税起点)*0.2;

二、实现需求

最初的想法是使用字符串替换的方式,将中文关键字替换成 php 的关键字,然后调用 eval 执行,这样确实也是可以的,但是总觉得不是很美丽,并且不能实现动态解析。就想着自己实现一个简单的词法分析,然后结合 ast 将词法转换成 php 代码执行,岂不快哉。当前版本没有用到抽象语法树来生成代码,全部使用字符串拼接。

<?php

/**

*ClassLexer

*@packageSett\OaLang

*词法分析器
*/
classLexer{
//内置关键字集合
public$keywordList=[];
//内置操作符集合
public$operatorList=[
"+","-","*","/","=",">","<","!","(",")","{","}",",",";"
];
//源代码
private$input;
//当前的字符
private$currChar;
//当前字符位置
private$currCharPos=0;
//结束符
private$eof="eof";
//当前编码
private$currEncode="UTF-8";

//内置关键字
publicconstVAR="variable";
publicconstSTR="string";
publicconstKW="keyword";
publicconstOPR="operator";
publicconstINT="integer";
publicconstNIL="null";


/**
*Lexerconstructor.
*@paramstring$input
*/
publicfunction__construct(string$input){
$this->input=$input;
$this->currChar=mb_substr($this->input,$this->currCharPos,1);
}

/**
*@paramarray$keywordList
*/
publicfunctionsetKeywordList($keywordList){
$this->keywordList=$keywordList;
}

/**
*@returnarray
*@throwsException
*/
publicfunctionparseInput(){
if($this->input==""){
thrownewException("codecannotbeempty");
}
$tokens=[];
do{
$token=$this->nextToken();
if($token["type"]!="eof"){
$tokens[]=$token;
}
if($token["type"]==self::KW){
$tokens[]=$this->makeToken(self::NIL,"");
}
}while($token["type"]!="eof");
return$tokens;
}

/**
*@returnarray
*/
publicfunctionnextToken(){
$this->skipBlankChar();
$this->currChar==""&&$this->currChar=$this->eof;
if($this->isCnLetter()){
$word=$this->matchUntilNextCharIsNotCn();
if($this->isKeyword($word)){
$this->currCharPos-=1;
return$this->currToken(static::KW,$word);
}
//不是关键字的全部归为变量
return$this->makeToken(static::VAR,$word);
}
//如果是操作符
if($this->isOperator()){
return$this->currToken(static::OPR,$this->currChar);
}
//如果是数字
if($this->isNumber()){
return$this->currToken(static::INT,$this->currChar);
}
//如果是字符串
if($str=$this->isStr()){
return$this->currToken(static::STR,$str);
}
//如果是变量
if($this->isVar()){
$word=$this->matchVar();
if($this->isKeyword($word)){
return$this->currToken(static::KW,$word);
}
return$this->makeToken(static::VAR,$word);
}
if($this->currChar==$this->eof){
return$this->currToken('eof',$this->currChar);
}
return$this->currToken(static::VAR,$this->currChar);
}

/**
*@paramstring$input
*@returnstring
*/
privatefunctionmatchVar(string$input=""){
$word=$input?:'';
while($this->isVar()){
$word.=$this->currChar;
$this->nextChar();
}
return$word;
}

/**
*@returnbool
*是否为普通变量
*/
privatefunctionisVar(){
return$this->isCnLetter()||$this->isEnLetter();
}


/**
*跳过空白字符
*/
privatefunctionskipBlankChar(){
while(ord($this->currChar)==10||
ord($this->currChar)==13||
ord($this->currChar)==32){
$this->nextChar();
}
}

/**
*@paramstring$type
*@param$word
*@returnarray
*记录当前token和下一个字符
*/
privatefunctioncurrToken(string$type,$word){
$token=$this->makeToken($type,$word);
$this->nextChar();
return$token;
}

/**
*@paramstring$type
*@paramstring$char
*@returnarray
*/
privatefunctionmakeToken(string$type,string$char){
return["type"=>$type,"char"=>$char,"pos"=>$this->currCharPos];
}


/**
*@returnbool
*判断是否是英文字符
*/
privatefunctionisEnLetter(){
if($this->currChar==""||$this->currChar==$this->eof){
returnfalse;
}
$ord=mb_ord($this->currChar,$this->currEncode);
if($ord>ord('a')&&$ord<ord('z')){
returntrue;
}
returnfalse;
}

/**
*@returnfalse|int
*是否中文字符
*/
privphpatefunctionisCnLetter(){
returnpreg_match("/^[\x{4e00}-\x{9fa5}]+$/u",$this->currChar);
}

/**
*@returnbool
*是否为数字
*/
privatefunctionisNumber(){
returnis_numeric($this->currChar);
}

/**
*@returnbool
*是否是字符串
*/
privatefunctionisStr(){
retujavascriptrn$this->matchCompleteStr();
}

/**
*@returnstring
*匹配完整字符串
*/
privatefunctionmatchCompleteStr(){
$char="";
if($this->currChar=="\""){
$this->nextChar();
while($this->currChar!="\""){
if($this->currChar!="\""){
$char.=$this->currChar;
}
$this->nextChar();
}
return$char;
}
return$char;
}

/**
*@returnbool
*是否是操作符
*/
privatefunctionisOperator(){
returnin_array($this->currChar,$this->operatorList);
}

/**
*@returnstring
*匹配中文字符
*/
privatefunctionmatchUntilNextCharIsNotCn(){
$char="";
while($this->isCnLetter()){
$char.=$this->currChar;
$this->nextChar();
}
return$char;
}

/**
*@returnvoid获取下一个字符
*获取下一个字符
*/
privatefunctionnextChar(){
$this->currCharPos+=1;
$this->currChar=mb_substr($this->input,$this->currCharPos,1);
if($this->currChar==""){
$this->currChar=$this-&ghttp://www.cppcns.comt;eof;
}
}

/**
*@paramstring$input
*@returnbool
*是否是关键字
*/
privatefunctionisKeyword(string$input){
return($this->keywordList[$input]??"")!="";
}

publicfunctionconvert(array$tokens){
$code="";
foreach($this->lexerIterator($tokens)as$generator){
switch($generator["type"]){
casestatic::KW:
$code.=$this->keywordList[$generator["char"]];
break;
casestatic::VAR:
$code.=sprintf("$%s",$generator["char"]);
break;
casestatic::OPR:
$code.=$this->replace($generator["char"]);
break;
casestatic::INT:
$code.=$generator["char"];
break;
casestatic::STR:
$code.=sprintf("\"%s\"",$generator["char"]);
break;
default:
$code.=$generator["char"];
}
}
return$code;
}

privatefunctionreplace(string$char){
returnstr_replace("+",".",$char);
}

/**
*@paramarray$tokens
*@return\Generator
*/
privatefunctionlexerIterator(array$tokens){
foreach($tokensas$index=>$token){
yield$token;
}
}

}

三、如何使用

require__DIR__."/vendor/autoload.php";
//定义一段代码
$code=<<<EOF
姓名="腕豪";
问候="你好啊";
地址=(1+2)*3;
如果(地址>3){
地址=1;
}否则{
地址="艾欧尼亚"
}
说话=("我"+"爱")+"你";
返回姓名+年龄;
EOF;
$lexer=newLexer($code);
//自定义你的关键字
$kwMap=[
"如果"=>"if","否则"=>"else","返回"=>"return","否则如果"=>"elseif"
];
$lexer->setKeywordList($kwMap);
//这里是生成的词
$tokens=$lexer->parseInput();
//将生成的词转成php,当然你也可以尝试用php-parse转ast再转成php,这里只是简单的拼接
var_dump($lexer->convert($tokens));

生成词

[{
"type":"variable",
"char":"姓名",
"pos":2
},{
"type":"operator",
"char":"=",
"pos":2
},{
"type":"string",
"char":"腕豪",
"pos":7
},{
"type":"operator",
"char":";",
"pos":8
},{
"type":"variable",
"char":"问候",
"pos":13
},{
"type":"operator",
"char":"=",
"pos":13
},{
"type":"string",
"char":"你好啊",
"pos":17
},{
"type":"operator",
"char":";",
"pos":18
},{
"type":"variable",
"char":"地址",
"pos":23
},{
"type":"operator",
"char":"=",
"pos":23
},{
"type":"operator",
"char":"(",
"pos":24
},{
"type":"integer",
"char":"1",
"pos":25
},{
"type":"operator",
"char":"+",
"pos":26
},{
"type":"integer",
"char":"2",
"pos":27
},{
"type":"operator",
"char":")",
"pos":28
},{
"type":"operator",
"char":"*",
"pos":30
},{
"type":"integer",
"char":"3",
"pos":32
},{
"type":"operator",
"char":";",
"pos":33
},{
"type":"keyword",
"char":"如果",
"pos":37
},{
"type":"null",
"char":"",
"pos":38
},{
"type":"operator",
"char":"(",
"pos":38
},{
"type":"variable",
"char":"地址",
"pos":41
},{
"type":"operator",
"char":">",
"pos":42
},{
"type":"integer",
"char":"3",
"pos":44
},{
"type":"operator",
"char":")",
"pos":45
},{
"type":"operator",
"char":"{",
"pos":46
},{
"type":"variable",
"char":"地址",
"pos":55
},{
"type":"operator",
"char":"=",
"pos":55
},{
"type":"integer",
"char":"1",
"pos":56
},{
"type":"operator",
"char":";",
"pos":57
},{
"type":"operator",
"char":"}",
"pos":60
},{
"type":"keyword",
"char":"否则",
"pos":62
},{
"type":"null",
"char":"",
"pos":63
},{
"type":"operator",
"char":"{",
"pos":63
},{
"type":"variable",
"char":"地址",
"pos":72
},{
"type":"operator",
"char":"=",
"pos":72
},{
"type":"string",
"char":"艾欧尼亚",
"pos":78
},{
"type":"operator",
"char":";",
"pos":79
},{
"type":"operator",
"char":"}",
"pos":82
},{
"type":"variable",
"char":"说话",
"pos":87
},{
"type":"operator",
"char":"=",
"pos":88
},{
"type":"operator",
"char":"(",
"pos":90
},{
"type":"string",
"char":"我",
"pos":93
},{
"type":"operator",
"char":"+",
"pos":94
},{
"type":"string",
"char":"爱",
"pos":97
},{
"type":"operator",
"char":")",
"pos":98
},{
"type":"operator",
"char":"+",
"pos":99
},{
"type":"string",
"char":"你",
"pos":102
},{
"type":"operator",
"char":";",
"pos":103
},{
"type":"keyword",
"char":"返回",
"pos":107
},{
"type":"null",
"char":"",
"pos":108
},{
"type":"variable",
"char":"姓名",
"pos":111
},{
"type":"operator",
"char":"+",
"pos":111
},{
"type":"variable",
"cpythonhar":"年龄",
"pos":114
},{
"type":"operator",
"char":";",
"pos":114
}]

输出:

$姓名="腕豪";$问候="你好啊";$地址=(1.2)*3;if($地址>3){$地址=1;}else{$地址="艾欧尼亚";}$说话=("我"."爱")."你";return$姓名.$年龄;

能执行吗?当然能。还存在一些小 bug,不想改了。

四、使用场景

什么,居然有人说没什么用?oa 系统总有用到的时候。

到此这篇关于利用PHP实现词法分析器与自定义语言的文章就介绍到这了,更多相关PHP词法分析器内容请搜恰卡编程网索我们以前的文章或继续浏览下面的相关文章希望大家以后多多支持我们!

收藏
分享
海报
0 条评论
63
上一篇:一文搞懂PHP中的DI依赖注入 下一篇:利用PHPExcel导出excel文件的方法详解

本站已关闭游客评论,请登录或者注册后再评论吧~

忘记密码?

图形验证码