文本替换添加对应的日志

This commit is contained in:
tiger_zhou 2024-03-11 17:57:12 +08:00
parent c085f5f778
commit d20fe5fd27

View File

@ -14,222 +14,224 @@ import java.util.regex.Pattern;
@Component @Component
public class SimulationVoiceHandler { public class SimulationVoiceHandler {
static List<Replacement> replacementList; static List<Replacement> replacementList;
static Map<String, String> regexReplaceMap = new HashMap<>(); static Map<String, String> regexReplaceMap = new HashMap<>();
// static { // static {
// regexReplaceMap.put("", "1"); // regexReplaceMap.put("", "1");
// } // }
static List<Replacement> numReplacementList; static List<Replacement> numReplacementList;
static { static {
replacementList = new ArrayList<>(); replacementList = new ArrayList<>();
numReplacementList = new ArrayList<>(); numReplacementList = new ArrayList<>();
//------------- 数字 -------------- //------------- 数字 --------------
numReplacementList.add(new Replacement("(零|洞|栋)", "0")); numReplacementList.add(new Replacement("(零|洞|栋)", "0"));
numReplacementList.add(new Replacement("(一|幺|e|E)", "1")); numReplacementList.add(new Replacement("(一|幺|e|E)", "1"));
numReplacementList.add(new Replacement("(二|两|r)", "2")); numReplacementList.add(new Replacement("(二|两|r)", "2"));
numReplacementList.add(new Replacement("(三)", "3")); numReplacementList.add(new Replacement("(三)", "3"));
numReplacementList.add(new Replacement("(四)", "4")); numReplacementList.add(new Replacement("(四)", "4"));
numReplacementList.add(new Replacement("(五)", "5")); numReplacementList.add(new Replacement("(五)", "5"));
numReplacementList.add(new Replacement("(六)", "6")); numReplacementList.add(new Replacement("(六)", "6"));
numReplacementList.add(new Replacement("(七|拐)", "7")); numReplacementList.add(new Replacement("(七|拐)", "7"));
numReplacementList.add(new Replacement("(八)", "8")); numReplacementList.add(new Replacement("(八)", "8"));
numReplacementList.add(new Replacement("(九)", "9")); numReplacementList.add(new Replacement("(九)", "9"));
numReplacementList.add(new Replacement("(|。)", "")); numReplacementList.add(new Replacement("(|。)", ""));
numReplacementList.add(new Replacement("( )", "")); numReplacementList.add(new Replacement("( )", ""));
numReplacementList.add(new Replacement("(低|地|第|敌|底|递)", "d")); numReplacementList.add(new Replacement("(低|地|第|敌|底|递)", "d"));
//------------- 专业词汇 -------------- //------------- 专业词汇 --------------
replacementList.add(new Replacement("(行吊|情调|神雕|性交|心跳|香蕉|星标|平调|新调)", "行调")); replacementList.add(new Replacement("(行吊|情调|神雕|性交|心跳|香蕉|星标|平调|新调)", "行调"));
replacementList.add(new Replacement("(茼蒿)", "通号")); replacementList.add(new Replacement("(茼蒿)", "通号"));
replacementList.add(new Replacement("", "")); replacementList.add(new Replacement("", ""));
replacementList.add(new Replacement("自动折法", "自动折返")); replacementList.add(new Replacement("自动折法", "自动折返"));
replacementList.add(new Replacement("(倒插|刀叉|倒茶|倒叉|到插|倒闸|到察|到厂|道碴|到差|到查)", "道岔")); replacementList.add(new Replacement("(倒插|刀叉|倒茶|倒叉|到插|倒闸|到察|到厂|道碴|到差|到查)", "道岔"));
replacementList.add(new Replacement("(师表|手表|时表)", "失表")); replacementList.add(new Replacement("(师表|手表|时表)", "失表"));
replacementList.add(new Replacement("攻作组", "工作组")); replacementList.add(new Replacement("攻作组", "工作组"));
replacementList.add(new Replacement("军事表", "均失表")); replacementList.add(new Replacement("军事表", "均失表"));
replacementList.add(new Replacement("(阴道插|阴道畅)", "因道岔")); replacementList.add(new Replacement("(阴道插|阴道畅)", "因道岔"));
replacementList.add(new Replacement("(费用车|背影车)", "备用车")); replacementList.add(new Replacement("(费用车|背影车)", "备用车"));
replacementList.add(new Replacement("(具贝家车条件|具被介绍条件)", "具备接车条件")); replacementList.add(new Replacement("(具贝家车条件|具被介绍条件)", "具备接车条件"));
replacementList.add(new Replacement("(烟雾)", "延误")); replacementList.add(new Replacement("(烟雾)", "延误"));
replacementList.add(new Replacement("(客栈)", "各站")); replacementList.add(new Replacement("(客栈)", "各站"));
replacementList.add(new Replacement("(正线队长发送)", "正线队长复诵")); replacementList.add(new Replacement("(正线队长发送)", "正线队长复诵"));
replacementList.add(new Replacement("(电影)", "点名")); replacementList.add(new Replacement("(电影)", "点名"));
replacementList.add(new Replacement("(代理|代练|待定|待命)", "待令")); replacementList.add(new Replacement("(代理|代练|待定|待命)", "待令"));
replacementList.add(new Replacement("(伤心|绍兴|上省)", "上行")); replacementList.add(new Replacement("(伤心|绍兴|上省)", "上行"));
replacementList.add(new Replacement("(调制)", "调至")); replacementList.add(new Replacement("(调制)", "调至"));
replacementList.add(new Replacement("(时代丽|时代绿)", "首待令")); replacementList.add(new Replacement("(时代丽|时代绿)", "首待令"));
replacementList.add(new Replacement("(绰号)", "做好")); replacementList.add(new Replacement("(绰号)", "做好"));
replacementList.add(new Replacement("(清江|青椒|京腔)", "请讲")); replacementList.add(new Replacement("(清江|青椒|京腔)", "请讲"));
replacementList.add(new Replacement("(转换慧)", "转换轨")); replacementList.add(new Replacement("(转换慧)", "转换轨"));
replacementList.add(new Replacement("(芝法玲时期|至发令时期|此法令时期)", "自发令时起")); replacementList.add(new Replacement("(芝法玲时期|至发令时期|此法令时期)", "自发令时起"));
replacementList.add(new Replacement("(上有延误)", "稍有延误")); replacementList.add(new Replacement("(上有延误)", "稍有延误"));
replacementList.add(new Replacement("(歌词)", "各次")); replacementList.add(new Replacement("(歌词)", "各次"));
replacementList.add(new Replacement("(程序你)", "准许你")); replacementList.add(new Replacement("(程序你)", "准许你"));
replacementList.add(new Replacement("(车法)", "折返")); replacementList.add(new Replacement("(车法)", "折返"));
replacementList.add(new Replacement("(拉萍|拉婷)", "拉停")); replacementList.add(new Replacement("(拉萍|拉婷)", "拉停"));
replacementList.add(new Replacement("(站街)", "站级")); replacementList.add(new Replacement("(站街)", "站级"));
replacementList.add(new Replacement("(羡慕)", "线路")); replacementList.add(new Replacement("(羡慕)", "线路"));
replacementList.add(new Replacement("(功率标|功率表|公里表|公历表|公斤标|公立标)", "公里标")); replacementList.add(new Replacement("(功率标|功率表|公里表|公历表|公斤标|公立标)", "公里标"));
replacementList.add(new Replacement("(赚钱折法|赚钱折返)", "站前折返")); replacementList.add(new Replacement("(赚钱折法|赚钱折返)", "站前折返"));
replacementList.add(new Replacement("(权限各次)", "全线各次")); replacementList.add(new Replacement("(权限各次)", "全线各次"));
replacementList.add(new Replacement("(庭院)", "停运")); replacementList.add(new Replacement("(庭院)", "停运"));
replacementList.add(new Replacement("(请客|青稞|青客|轻客|轻课|青涩|听课|停课|听,可|听可|顷刻|倾刻|千克|乘车|七个|切克)", "清客")); replacementList.add(new Replacement("(请客|青稞|青客|轻客|轻课|青涩|听课|停课|听,可|听可|顷刻|倾刻|千克|乘车|七个|切克)", "清客"));
replacementList.add(new Replacement("(会断|会短|回断)", "回段")); replacementList.add(new Replacement("(会断|会短|回断)", "回段"));
replacementList.add(new Replacement("(清朝|请朝|请钞)", "请抄")); replacementList.add(new Replacement("(清朝|请朝|请钞)", "请抄"));
replacementList.add(new Replacement("(请超链|请超龄)", "请抄令")); replacementList.add(new Replacement("(请超链|请超龄)", "请抄令"));
replacementList.add(new Replacement("(首领处所|手令处所|手链处所)", "受令处所")); replacementList.add(new Replacement("(首领处所|手令处所|手链处所)", "受令处所"));
replacementList.add(new Replacement("安全保护", "安全防护")); replacementList.add(new Replacement("安全保护", "安全防护"));
replacementList.add(new Replacement("(手摇质|说杨智|受邀至)", "手摇至")); replacementList.add(new Replacement("(手摇质|说杨智|受邀至)", "手摇至"));
replacementList.add(new Replacement("(反胃|软位)", "反位")); replacementList.add(new Replacement("(反胃|软位)", "反位"));
replacementList.add(new Replacement("所逼", "锁闭")); replacementList.add(new Replacement("所逼", "锁闭"));
replacementList.add(new Replacement("位直", "位置")); replacementList.add(new Replacement("位直", "位置"));
replacementList.add(new Replacement("丞将|成强|成交|沉降", "乘降")); replacementList.add(new Replacement("丞将|成强|成交|沉降", "乘降"));
replacementList.add(new Replacement("售电弓|售电工|收电弓|数电弓", "受电弓")); replacementList.add(new Replacement("售电弓|售电工|收电弓|数电弓", "受电弓"));
replacementList.add(new Replacement("故障回复", "故障恢复")); replacementList.add(new Replacement("故障回复", "故障恢复"));
replacementList.add(new Replacement("(出入断线|出路段线)", "出入段线")); replacementList.add(new Replacement("(出入断线|出路段线)", "出入段线"));
replacementList.add(new Replacement("(控子权|控质权|放置权|矿质权|空置权)", "控制权")); replacementList.add(new Replacement("(控子权|控质权|放置权|矿质权|空置权)", "控制权"));
replacementList.add(new Replacement("(手机号|首信号|首信好|首信后|首信哈|手型号)", "手信号")); replacementList.add(new Replacement("(手机号|首信号|首信好|首信后|首信哈|手型号)", "手信号"));
replacementList.add(new Replacement("(鼓掌|部长)", "故障")); replacementList.add(new Replacement("(鼓掌|部长)", "故障"));
replacementList.add(new Replacement("(向凤|下方)", "下放")); replacementList.add(new Replacement("(向凤|下方)", "下放"));
replacementList.add(new Replacement("(占线|占先|展现|战线)", "站线")); replacementList.add(new Replacement("(占线|占先|展现|战线)", "站线"));
replacementList.add(new Replacement("(定为)", "定位")); replacementList.add(new Replacement("(定为)", "定位"));
replacementList.add(new Replacement("(者搬迁|折返性)", "折返线")); replacementList.add(new Replacement("(者搬迁|折返性)", "折返线"));
replacementList.add(new Replacement("(进入)", "进路")); replacementList.add(new Replacement("(进入)", "进路"));
replacementList.add(new Replacement("(污点|5点)", "无电")); replacementList.add(new Replacement("(污点|5点)", "无电"));
replacementList.add(new Replacement("(出轻)", "出清")); replacementList.add(new Replacement("(出轻)", "出清"));
replacementList.add(new Replacement("(抚慰)", "复位")); replacementList.add(new Replacement("(抚慰)", "复位"));
replacementList.add(new Replacement("(实业)", "实验")); replacementList.add(new Replacement("(实业)", "实验"));
replacementList.add(new Replacement("(控线)", "空闲")); replacementList.add(new Replacement("(控线)", "空闲"));
//------------- 设备相关 -------------- //------------- 设备相关 --------------
replacementList.add(new Replacement("(宇花祭|玉花祭|雨化寨|氯化钙|绿化带|优化钙)", "鱼化寨")); replacementList.add(new Replacement("(宇花祭|玉花祭|雨化寨|氯化钙|绿化带|优化钙)", "鱼化寨"));
replacementList.add(new Replacement("(保税局)", "保税区")); replacementList.add(new Replacement("(保税局)", "保税区"));
replacementList.add(new Replacement("(高桥息)", "高桥西")); replacementList.add(new Replacement("(高桥息)", "高桥西"));
replacementList.add(new Replacement("(车站10)", "车站十")); replacementList.add(new Replacement("(车站10)", "车站十"));
replacementList.add(new Replacement("(车站11)", "车站十一")); replacementList.add(new Replacement("(车站11)", "车站十一"));
replacementList.add(new Replacement("(车站12)", "车站十二")); replacementList.add(new Replacement("(车站12)", "车站十二"));
replacementList.add(new Replacement("(车站13)", "车站十三")); replacementList.add(new Replacement("(车站13)", "车站十三"));
replacementList.add(new Replacement("(车站14)", "车站十四")); replacementList.add(new Replacement("(车站14)", "车站十四"));
replacementList.add(new Replacement("(车站15)", "车站十五")); replacementList.add(new Replacement("(车站15)", "车站十五"));
replacementList.add(new Replacement("(车站16)", "车站十六")); replacementList.add(new Replacement("(车站16)", "车站十六"));
replacementList.add(new Replacement("(车站17)", "车站十七")); replacementList.add(new Replacement("(车站17)", "车站十七"));
replacementList.add(new Replacement("(车站18)", "车站十八")); replacementList.add(new Replacement("(车站18)", "车站十八"));
replacementList.add(new Replacement("(车站19)", "车站十九")); replacementList.add(new Replacement("(车站19)", "车站十九"));
replacementList.add(new Replacement("(车站20)", "车站二十")); replacementList.add(new Replacement("(车站20)", "车站二十"));
replacementList.add(new Replacement("(车站21)", "车站二十一")); replacementList.add(new Replacement("(车站21)", "车站二十一"));
replacementList.add(new Replacement("(车站22)", "车站二十二")); replacementList.add(new Replacement("(车站22)", "车站二十二"));
replacementList.add(new Replacement("(车站23)", "车站二十三")); replacementList.add(new Replacement("(车站23)", "车站二十三"));
replacementList.add(new Replacement("(车站24)", "车站二十四")); replacementList.add(new Replacement("(车站24)", "车站二十四"));
replacementList.add(new Replacement("(车站25)", "车站二十五")); replacementList.add(new Replacement("(车站25)", "车站二十五"));
replacementList.add(new Replacement("(车站26)", "车站二十六")); replacementList.add(new Replacement("(车站26)", "车站二十六"));
replacementList.add(new Replacement("(车站27)", "车站二十七")); replacementList.add(new Replacement("(车站27)", "车站二十七"));
replacementList.add(new Replacement("(车站28)", "车站二十八")); replacementList.add(new Replacement("(车站28)", "车站二十八"));
replacementList.add(new Replacement("(车站29)", "车站二十九")); replacementList.add(new Replacement("(车站29)", "车站二十九"));
replacementList.add(new Replacement("(车站1)", "车站一")); replacementList.add(new Replacement("(车站1)", "车站一"));
replacementList.add(new Replacement("(车站2)", "车站二")); replacementList.add(new Replacement("(车站2)", "车站二"));
replacementList.add(new Replacement("(车站3)", "车站三")); replacementList.add(new Replacement("(车站3)", "车站三"));
replacementList.add(new Replacement("(车站4)", "车站四")); replacementList.add(new Replacement("(车站4)", "车站四"));
replacementList.add(new Replacement("(车站5)", "车站五")); replacementList.add(new Replacement("(车站5)", "车站五"));
replacementList.add(new Replacement("(车站6)", "车站六")); replacementList.add(new Replacement("(车站6)", "车站六"));
replacementList.add(new Replacement("(车站7)", "车站七")); replacementList.add(new Replacement("(车站7)", "车站七"));
replacementList.add(new Replacement("(车站8)", "车站八")); replacementList.add(new Replacement("(车站8)", "车站八"));
replacementList.add(new Replacement("(车站9)", "车站九")); replacementList.add(new Replacement("(车站9)", "车站九"));
replacementList.add(new Replacement("(5爱3|我爱干|5爱站|5爱车|我爱他|吾爱插|我爱张|不爱张|我爱着|父爱站|我爱家|52战" + replacementList.add(new Replacement("(5爱3|我爱干|5爱站|5爱车|我爱他|吾爱插|我爱张|不爱张|我爱着|父爱站|我爱家|52战" +
"|於爱站|於2站|5Y站|Y站|外站|屋外站|不2战|不爱站|外传|5爱弹|污爱战|无爱站|吾隘站|无爱展|无爱占|无爱战|屋爱站)", "邬隘站")); "|於爱站|於2站|5Y站|Y站|外站|屋外站|不2战|不爱站|外传|5爱弹|污爱战|无爱站|吾隘站|无爱展|无爱占|无爱战|屋爱站)", "邬隘站"));
replacementList.add(new Replacement("(平安大道)", "民安大道")); replacementList.add(new Replacement("(平安大道)", "民安大道"));
replacementList.add(new Replacement("(忻州路)", "新洲路")); replacementList.add(new Replacement("(忻州路)", "新洲路"));
replacementList.add(new Replacement("(c接口)", "新街口")); replacementList.add(new Replacement("(c接口)", "新街口"));
}
/**
* 匹配替换对象
*/
public static class Replacement {
String match;
String replace;
public Replacement(String match, String replace) {
this.match = match;
this.replace = replace;
} }
/** String matchAndReplace(String content) {
* 匹配替换对象 if (content != null) {
*/ return content.replaceAll(this.match, this.replace);
public static class Replacement { }
String match; return null;
String replace;
public Replacement(String match, String replace) {
this.match = match;
this.replace = replace;
}
String matchAndReplace(String content) {
if (content != null) {
return content.replaceAll(this.match, this.replace);
}
return null;
}
} }
}
public String numHandle(String content){ public String numHandle(String content) {
for (Replacement replacement : numReplacementList) { for (Replacement replacement : numReplacementList) {
content = replacement.matchAndReplace(content); content = replacement.matchAndReplace(content);
}
return regexReplace(content);
}
/**
* 对语音识别内容进行仿真用词处理
*
* @param voiceContent
* @return
*/
public String handle(String voiceContent) {
for (Replacement replacement : replacementList) {
voiceContent = replacement.matchAndReplace(voiceContent);
}
return regexReplace(voiceContent);
} }
return regexReplace(content);
}
private String regexReplace(String s) { /**
log.info("正则替换"); * 对语音识别内容进行仿真用词处理
//编号替换 *
Pattern pattern = Pattern.compile("([一二三四五六七八九十]+)(号)"); * @param voiceContent
Matcher matcher = pattern.matcher(s); * @return
StringBuffer result = new StringBuffer(); */
while (matcher.find()) { public String handle(String voiceContent) {
StringBuilder sb = new StringBuilder(); for (Replacement replacement : replacementList) {
String[] split = matcher.group(1).split(""); voiceContent = replacement.matchAndReplace(voiceContent);
for (String str : split) {
switch (str) {
case "":
sb.append("1");
break;
case "":
sb.append("2");
break;
case "":
sb.append("3");
break;
case "":
sb.append("4");
break;
case "":
sb.append("5");
break;
case "":
sb.append("6");
break;
case "":
sb.append("7");
break;
case "":
sb.append("8");
break;
case "":
sb.append("9");
break;
case "":
sb.append("10");
break;
}
}
sb.append("$2");
matcher.appendReplacement(result, sb.toString());
}
matcher.appendTail(result);
return result.toString();
} }
return regexReplace(voiceContent);
}
private String regexReplace(String s) {
log.info("正则替换 字符[{}]", s);
//编号替换
Pattern pattern = Pattern.compile("([一二三四五六七八九十]+)(号)");
Matcher matcher = pattern.matcher(s);
StringBuffer result = new StringBuffer();
while (matcher.find()) {
StringBuilder sb = new StringBuilder();
String[] split = matcher.group(1).split("");
for (String str : split) {
switch (str) {
case "":
sb.append("1");
break;
case "":
sb.append("2");
break;
case "":
sb.append("3");
break;
case "":
sb.append("4");
break;
case "":
sb.append("5");
break;
case "":
sb.append("6");
break;
case "":
sb.append("7");
break;
case "":
sb.append("8");
break;
case "":
sb.append("9");
break;
case "":
sb.append("10");
break;
}
}
sb.append("$2");
matcher.appendReplacement(result, sb.toString());
}
matcher.appendTail(result);
return result.toString();
}
} }