900字范文,内容丰富有趣,生活中的好帮手!
900字范文 > Atitit attilax擅长项目解析与大数据采集提取 词法分析 电话号码提取 p

Atitit attilax擅长项目解析与大数据采集提取 词法分析 电话号码提取 p

时间:2024-05-20 11:26:12

相关推荐

Atitit attilax擅长项目解析与大数据采集提取   词法分析  电话号码提取   p

Atitit attilax擅长项目解析与大数据采集提取 词法分析 电话号码提取

packagevcfvcardprj;

importjava.util.Collection;

importjava.util.List;

importjava.util.regex.Matcher;

importjava.util.regex.Pattern;

importcom.alibaba.fastjson.JSON;

importcom.attilax.fsm.TokenEndEx;

importcom.attilax.parser.Token;

importmon.collect.Lists;

publicclassmblFetch{

publicstaticvoidmain(String[]args){

//TODOAuto-generated method stub

Strings="周何琪__学校郑州大学__联系方式15538130516__身高体重162cm,47k";

s="天津广播影视职业学院 韩震宇 15641656234 161cm,44kg";

List<Token>process=newmblFetch().getTokens(s);

System.out.println(JSON.toJSONString(process,true));

Stringcp=getMblCp(process);

System.out.println(cp);

}

privatestaticStringgetMblCp(List<Token>process){

for(Tokentoken:process){

if(newmblFetch().isnum(token.value))

returntoken.value;

}

return"";

}

privatechar[]process(Strings){

//TODOAuto-generated method stub

returnnull;

}

intcharIndex;

charcur_char;

char[]code_char_arr;

privateStringcurStat="start";

privateList<Token>tokens_tmp;

privateStringcurTokenTxt="";

@SuppressWarnings("unchecked")

publicList<Token>getTokens(StringcodeStr){

List<Token>li=Lists.newArrayList();

code_char_arr=codeStr.toCharArray();

while(true){

Objecttk;

try{

tk=nextTokens();

}catch(TokenEndExe){

break;

}

if(tkinstanceofToken)

li.add((Token)tk);

elseif(tkinstanceofList)

li.addAll((Collection<?extendsToken>)tk);

else

thrownewRuntimeException("token type err,curchar:"+cur_char+",colidx:"+charIndex);

}

returnli;

}

/**

*

*@returntoken or list<token>

*@throwsTokenEndEx

*/

publicObjectnextTokens()throwsTokenEndEx{

// code_char_arr = code.toCharArray();

charIndex++;

if(charIndex>code_char_arr.length-1)

thrownewTokenEndEx(newString(code_char_arr));

cur_char=code_char_arr[charIndex];

// cur_char=cur_char;

// if (this.curTokenTxt.equals("1598"))

// System.out.println("dbg");

// if(this.gColumn==30)

// System.out.println("dbg");

// get next char,,then changestat

//judecur char and curstat...then if or notchagestat

if(ishanzi(cur_char))

returnhanziEvt();

elseif(isnum(cur_char))

returnnumEvt();

else

returnsplitorCharEvt();

// break;

}

privateObjectnumEvt()throwsTokenEndEx{

if(this.curStat.equals("start")){

this.curStat="numStat";

returngaziStat();

}

if(this.curStat.equals("numStat")){

returngaziStat();

}

if(this.curStat.equals("hanziStat")){

this.curStat="numStat";

returnretNumtoken();

}

if(this.curStat.equals("splitorStat")){

this.curStat="numStat";

returnretSplitorToken();

}

returnnull;

}

privateObjecthanziEvt()throwsTokenEndEx{

if(this.curStat.equals("start")){

this.curStat="hanziStat";

returngaziStat();

}

if(this.curStat.equals("hanziStat")){

returngaziStat();

}

// if ishanzi&& cur isnumstat

if(this.curStat.equals("numStat")){

this.curStat="hanziStat";

returnretNumtoken();

}

if(this.curStat.equals("splitorStat")){

this.curStat="hanziStat";

returnretSplitorToken();

}

this.curStat="hanziStat";

returnnull;

}

privateObjectsplitorCharEvt()throwsTokenEndEx{

if(this.curStat.equals("start")){

this.curStat="splitorStat";

returngaziStat();

}

if(this.curStat.equals("hanziStat")){

this.curStat="splitorStat";

returnretHeziToken();

}

if(this.curStat.equals("numStat")){

this.curStat="splitorStat";

returnretNumtoken();

}

//gazi

this.curStat="splitorStat";

returngaziStat();

}

privateObjectretHeziToken(){

Tokentk=newToken();

tk.Text=curTokenTxt.toString();

tk.Type="hezi";

tk.value=curTokenTxt.toString();

curTokenTxt=String.valueOf(cur_char);

returntk;

}

privateObjectretNumtoken(){

Tokentk=newToken();

tk.Text=curTokenTxt.toString();

tk.Type="num";

tk.value=curTokenTxt.toString();

curTokenTxt="";

curTokenTxt=String.valueOf(cur_char);

returntk;

}

privateObjectretSplitorToken(){

Tokentk=newToken();

tk.Text=curTokenTxt.toString();

tk.Type="splitor";

tk.value=curTokenTxt.toString();

curTokenTxt=""; curTokenTxt=String.valueOf(cur_char);

returntk;

}

privateObjectgaziStat()throwsTokenEndEx{

curTokenTxt=curTokenTxt+String.valueOf(cur_char);

returnnextTokens();

}

privatebooleanishanzi(charcur_char2){

returnisChinese(String.valueOf(cur_char2));

}

privatebooleanisnum(charcur_char2){

Stringstr=String.valueOf(cur_char2);

returnisnum(str);

}

privatebooleanisnum(Stringstr){

for(inti=str.length();--i>=0;){

if(!Character.isDigit(str.charAt(i))){

returnfalse;

}

}

returntrue;

}

publicstaticbooleanisChinese(Stringstr){

StringregEx="[\u4e00-\u9fa5]";

Patternpat=Pattern.compile(regEx);

Matchermatcher=pat.matcher(str);

booleanflg=false;

if(matcher.find())

flg=true;

returnflg;

}

}

Atitit attilax擅长项目解析与大数据采集提取 词法分析 电话号码提取 package vcfvcardprj; import java.util.Collection; imp

本内容不代表本网观点和政治立场,如有侵犯你的权益请联系我们处理。
网友评论
网友评论仅供其表达个人看法,并不表明网站立场。