目录
- 一、分析需求
- 二、实现需求
- 三、如何使用
- 四、使用场景
之前项目有一个需求,业务人员使用中文编写一些自定义公式,然后需要我们后台执行将结果返回到界面上,于是就基于有限状态机写了这个词法分析器,比较简单,希望能够抛砖引玉。
一、分析需求
输入中文公式,返回结果,比如:
现有薪资=10000;
个税起点=3000;
当前年份=2021;
如果(当前年份=2022){
个税起点=5000;
}
返回(现有薪资-个税起点)*0.2;
二、实现需求
最初的想法是使用字符串替换的方式,将中文关键字替换成 php 的关键字,然后调用 eval 执行,这样确实也是可以的,但是总觉得不是很美丽,并且不能实现动态解析。就想着自己实现一个简单的词法分析,然后结合 ast 将词法转换成 php 代码执行,岂不快哉。当前版本没有用到抽象语法树来生成代码,全部使用字符串拼接。
<?php
/**
*ClassLexer
*@packageSett\OaLang
*词法分析器
*/
classLexer{
//内置关键字集合
public$keywordList=[];
//内置操作符集合
public$operatorList=[
"+","-","*","/","=",">","<","!","(",")","{","}",",",";"
];
//源代码
private$input;
//当前的字符
private$currChar;
//当前字符位置
private$currCharPos=0;
//结束符
private$eof="eof";
//当前编码
private$currEncode="UTF-8";
//内置关键字
publicconstVAR="variable";
publicconstSTR="string";
publicconstKW="keyword";
publicconstOPR="operator";
publicconstINT="integer";
publicconstNIL="null";
/**
*Lexerconstructor.
*@paramstring$input
*/
publicfunction__construct(string$input){
$this->input=$input;
$this->currChar=mb_substr($this->input,$this->currCharPos,1);
}
/**
*@paramarray$keywordList
*/
publicfunctionsetKeywordList($keywordList){
$this->keywordList=$keywordList;
}
/**
*@returnarray
*@throwsException
*/
publicfunctionparseInput(){
if($this->input==""){
thrownewException("codecannotbeempty");
}
$tokens=[];
do{
$token=$this->nextToken();
if($token["type"]!="eof"){
$tokens[]=$token;
}
if($token["type"]==self::KW){
$tokens[]=$this->makeToken(self::NIL,"");
}
}while($token["type"]!="eof");
return$tokens;
}
/**
*@returnarray
*/
publicfunctionnextToken(){
$this->skipBlankChar();
$this->currChar==""&&$this->currChar=$this->eof;
if($this->isCnLetter()){
$word=$this->matchUntilNextCharIsNotCn();
if($this->isKeyword($word)){
$this->currCharPos-=1;
return$this->currToken(static::KW,$word);
}
//不是关键字的全部归为变量
return$this->makeToken(static::VAR,$word);
}
//如果是操作符
if($this->isOperator()){
return$this->currToken(static::OPR,$this->currChar);
}
//如果是数字
if($this->isNumber()){
return$this->currToken(static::INT,$this->currChar);
}
//如果是字符串
if($str=$this->isStr()){
return$this->currToken(static::STR,$str);
}
//如果是变量
if($this->isVar()){
$word=$this->matchVar();
if($this->isKeyword($word)){
return$this->currToken(static::KW,$word);
}
return$this->makeToken(static::VAR,$word);
}
if($this->currChar==$this->eof){
return$this->currToken('eof',$this->currChar);
}
return$this->currToken(static::VAR,$this->currChar);
}
/**
*@paramstring$input
*@returnstring
*/
privatefunctionmatchVar(string$input=""){
$word=$input?:'';
while($this->isVar()){
$word.=$this->currChar;
$this->nextChar();
}
return$word;
}
/**
*@returnbool
*是否为普通变量
*/
privatefunctionisVar(){
return$this->isCnLetter()||$this->isEnLetter();
}
/**
*跳过空白字符
*/
privatefunctionskipBlankChar(){
while(ord($this->currChar)==10||
ord($this->currChar)==13||
ord($this->currChar)==32){
$this->nextChar();
}
}
/**
*@paramstring$type
*@param$word
*@returnarray
*记录当前token和下一个字符
*/
privatefunctioncurrToken(string$type,$word){
$token=$this->makeToken($type,$word);
$this->nextChar();
return$token;
}
/**
*@paramstring$type
*@paramstring$char
*@returnarray
*/
privatefunctionmakeToken(string$type,string$char){
return["type"=>$type,"char"=>$char,"pos"=>$this->currCharPos];
}
/**
*@returnbool
*判断是否是英文字符
*/
privatefunctionisEnLetter(){
if($this->currChar==""||$this->currChar==$this->eof){
returnfalse;
}
$ord=mb_ord($this->currChar,$this->currEncode);
if($ord>ord('a')&&$ord<ord('z')){
returntrue;
}
returnfalse;
}
/**
*@returnfalse|int
*是否中文字符
*/
privphpatefunctionisCnLetter(){
returnpreg_match("/^[\x{4e00}-\x{9fa5}]+$/u",$this->currChar);
}
/**
*@returnbool
*是否为数字
*/
privatefunctionisNumber(){
returnis_numeric($this->currChar);
}
/**
*@returnbool
*是否是字符串
*/
privatefunctionisStr(){
retujavascriptrn$this->matchCompleteStr();
}
/**
*@returnstring
*匹配完整字符串
*/
privatefunctionmatchCompleteStr(){
$char="";
if($this->currChar=="\""){
$this->nextChar();
while($this->currChar!="\""){
if($this->currChar!="\""){
$char.=$this->currChar;
}
$this->nextChar();
}
return$char;
}
return$char;
}
/**
*@returnbool
*是否是操作符
*/
privatefunctionisOperator(){
returnin_array($this->currChar,$this->operatorList);
}
/**
*@returnstring
*匹配中文字符
*/
privatefunctionmatchUntilNextCharIsNotCn(){
$char="";
while($this->isCnLetter()){
$char.=$this->currChar;
$this->nextChar();
}
return$char;
}
/**
*@returnvoid获取下一个字符
*获取下一个字符
*/
privatefunctionnextChar(){
$this->currCharPos+=1;
$this->currChar=mb_substr($this->input,$this->currCharPos,1);
if($this->currChar==""){
$this->currChar=$this-&ghttp://www.cppcns.comt;eof;
}
}
/**
*@paramstring$input
*@returnbool
*是否是关键字
*/
privatefunctionisKeyword(string$input){
return($this->keywordList[$input]??"")!="";
}
publicfunctionconvert(array$tokens){
$code="";
foreach($this->lexerIterator($tokens)as$generator){
switch($generator["type"]){
casestatic::KW:
$code.=$this->keywordList[$generator["char"]];
break;
casestatic::VAR:
$code.=sprintf("$%s",$generator["char"]);
break;
casestatic::OPR:
$code.=$this->replace($generator["char"]);
break;
casestatic::INT:
$code.=$generator["char"];
break;
casestatic::STR:
$code.=sprintf("\"%s\"",$generator["char"]);
break;
default:
$code.=$generator["char"];
}
}
return$code;
}
privatefunctionreplace(string$char){
returnstr_replace("+",".",$char);
}
/**
*@paramarray$tokens
*@return\Generator
*/
privatefunctionlexerIterator(array$tokens){
foreach($tokensas$index=>$token){
yield$token;
}
}
}
三、如何使用
require__DIR__."/vendor/autoload.php"; //定义一段代码 $code=<<<EOF 姓名="腕豪"; 问候="你好啊"; 地址=(1+2)*3; 如果(地址>3){ 地址=1; }否则{ 地址="艾欧尼亚" } 说话=("我"+"爱")+"你"; 返回姓名+年龄; EOF; $lexer=newLexer($code); //自定义你的关键字 $kwMap=[ "如果"=>"if","否则"=>"else","返回"=>"return","否则如果"=>"elseif" ]; $lexer->setKeywordList($kwMap); //这里是生成的词 $tokens=$lexer->parseInput(); //将生成的词转成php,当然你也可以尝试用php-parse转ast再转成php,这里只是简单的拼接 var_dump($lexer->convert($tokens));
生成词
[{ "type":"variable", "char":"姓名", "pos":2 },{ "type":"operator", "char":"=", "pos":2 },{ "type":"string", "char":"腕豪", "pos":7 },{ "type":"operator", "char":";", "pos":8 },{ "type":"variable", "char":"问候", "pos":13 },{ "type":"operator", "char":"=", "pos":13 },{ "type":"string", "char":"你好啊", "pos":17 },{ "type":"operator", "char":";", "pos":18 },{ "type":"variable", "char":"地址", "pos":23 },{ "type":"operator", "char":"=", "pos":23 },{ "type":"operator", "char":"(", "pos":24 },{ "type":"integer", "char":"1", "pos":25 },{ "type":"operator", "char":"+", "pos":26 },{ "type":"integer", "char":"2", "pos":27 },{ "type":"operator", "char":")", "pos":28 },{ "type":"operator", "char":"*", "pos":30 },{ "type":"integer", "char":"3", "pos":32 },{ "type":"operator", "char":";", "pos":33 },{ "type":"keyword", "char":"如果", "pos":37 },{ "type":"null", "char":"", "pos":38 },{ "type":"operator", "char":"(", "pos":38 },{ "type":"variable", "char":"地址", "pos":41 },{ "type":"operator", "char":">", "pos":42 },{ "type":"integer", "char":"3", "pos":44 },{ "type":"operator", "char":")", "pos":45 },{ "type":"operator", "char":"{", "pos":46 },{ "type":"variable", "char":"地址", "pos":55 },{ "type":"operator", "char":"=", "pos":55 },{ "type":"integer", "char":"1", "pos":56 },{ "type":"operator", "char":";", "pos":57 },{ "type":"operator", "char":"}", "pos":60 },{ "type":"keyword", "char":"否则", "pos":62 },{ "type":"null", "char":"", "pos":63 },{ "type":"operator", "char":"{", "pos":63 },{ "type":"variable", "char":"地址", "pos":72 },{ "type":"operator", "char":"=", "pos":72 },{ "type":"string", "char":"艾欧尼亚", "pos":78 },{ "type":"operator", "char":";", "pos":79 },{ "type":"operator", "char":"}", "pos":82 },{ "type":"variable", "char":"说话", "pos":87 },{ "type":"operator", "char":"=", "pos":88 },{ "type":"operator", "char":"(", "pos":90 },{ "type":"string", "char":"我", "pos":93 },{ "type":"operator", "char":"+", "pos":94 },{ "type":"string", "char":"爱", "pos":97 },{ "type":"operator", "char":")", "pos":98 },{ "type":"operator", "char":"+", "pos":99 },{ "type":"string", "char":"你", "pos":102 },{ "type":"operator", "char":";", "pos":103 },{ "type":"keyword", "char":"返回", "pos":107 },{ "type":"null", "char":"", "pos":108 },{ "type":"variable", "char":"姓名", "pos":111 },{ "type":"operator", "char":"+", "pos":111 },{ "type":"variable", "char":"年龄", "pos":114 },{ "type":"operator", "char":";", "pos":114 }]
输出:
$姓名="腕豪";$问候="你好啊";$地址=(1.2)*3;if($地址>3){$地址=1;}else{$地址="艾欧尼亚";}$说话=("我"."爱")."你";return$姓名.$年龄;
能执行吗?当然能。还存在一些小 bug,不想改了。
四、使用场景
什么,居然有人说没什么用?oa 系统总有用到的时候。
到此这篇关于利用PHP实现词法分析器与自定义语言的文章就介绍到这了,更多相关PHP词法分析器内容请搜恰卡编程网索我们以前的文章或继续浏览下面的相关文章希望大家以后多多支持我们!
海报
0 条评论
63
相关文章
本站已关闭游客评论,请登录或者注册后再评论吧~