Java 向语言工具建议列表添加单词_Java_Spell Checking_Languagetool

Java 向语言工具建议列表添加单词

java

Java 向语言工具建议列表添加单词,java,spell-checking,languagetool,Java,Spell Checking,Languagetool,我使用LanguageTool在我的应用程序中执行一些拼写检查和拼写更正功能 LanguageTool描述了如何从拼写检查中排除单词（使用正在使用的拼写检查规则的addIgnoreTokens（…）方法调用）如何在拼写检查中添加一些单词（例如，来自特定词典）？也就是说，LanguageTool可以修复拼写错误的单词并从我的特定词典中推荐单词吗？不幸的是，我认为API不支持这一点。在没有API的情况下，您可以将单词添加到spelling.txt，以使它们被接受并用作建议。使用API，您可能需要扩

我使用LanguageTool在我的应用程序中执行一些拼写检查和拼写更正功能

LanguageTool描述了如何从拼写检查中排除单词（使用正在使用的拼写检查规则的

addIgnoreTokens（…）

方法调用）

如何在拼写检查中添加一些单词（例如，来自特定词典）？也就是说，LanguageTool可以修复拼写错误的单词并从我的特定词典中推荐单词吗？

不幸的是，我认为API不支持这一点。在没有API的情况下，您可以将单词添加到

spelling.txt

，以使它们被接受并用作建议。使用API，您可能需要扩展MorfologikSpllerRule并进行更改。（披露：我是LanguageTool的维护者）

我有类似的要求，那就是将一些自定义词作为“建议词”加载到词典中，而不仅仅是“忽略词”。最后，我扩展了MorfologikSpellerRule来实现这一点：

创建类MorfologikSpillerRulex从MorfologikSpillerRule扩展而来，重写方法“match（）”，并编写我自己的“initSpeller（）”来创建拼写器
然后，对于语言工具，创建此自定义拼写器规则以替换现有规则

代码：

我的定制Morfologikspellerrulex的代码：

public class MorfologikSpellerRuleEx extends MorfologikSpellerRule {

private String spellingFilePath = null;
private boolean ignoreTaggedWords = false;

public MorfologikSpellerRuleEx(ResourceBundle messages, Language language) throws IOException {
    super(messages, language);
}

@Override
public String getFileName() {
    return "/en/hunspell/en_US.dict";
}

@Override
public String getId() {
    return "MORFOLOGIK_SPELLING_RULE_EX";
}

@Override
public void setIgnoreTaggedWords() {
    ignoreTaggedWords = true;
}

public String getSpellingFilePath() {
    return spellingFilePath;
}

public void setSpellingFilePath(String spellingFilePath) {
    this.spellingFilePath = spellingFilePath;
}

private void initSpellerEx(String binaryDict) throws IOException {
    String plainTextDict = null;
    if (JLanguageTool.getDataBroker().resourceExists(getSpellingFileName())) {
        plainTextDict = getSpellingFileName();
    }
    if (plainTextDict != null) {

        BufferedReader br = null;
        if (this.spellingFilePath != null) {
            try {
                br = new BufferedReader(new FileReader(this.spellingFilePath));
            }
            catch (Exception e) {
                br = null;
            }
        }

        if (br != null) {
            speller1 = new MorfologikMultiSpeller(binaryDict, br, plainTextDict, 1);
            speller2 = new MorfologikMultiSpeller(binaryDict, br, plainTextDict, 2);
            speller3 = new MorfologikMultiSpeller(binaryDict, br, plainTextDict, 3);

            br.close();
        }
        else {
            speller1 = new MorfologikMultiSpeller(binaryDict, plainTextDict, 1);
            speller2 = new MorfologikMultiSpeller(binaryDict, plainTextDict, 2);
            speller3 = new MorfologikMultiSpeller(binaryDict, plainTextDict, 3);
        }

        setConvertsCase(speller1.convertsCase());
    } else {
        throw new RuntimeException("Could not find ignore spell file in path: " + getSpellingFileName());
    }
}

private boolean canBeIgnored(AnalyzedTokenReadings[] tokens, int idx, AnalyzedTokenReadings token)
        throws IOException {
    return token.isSentenceStart() || token.isImmunized() || token.isIgnoredBySpeller() || isUrl(token.getToken())
            || isEMail(token.getToken()) || (ignoreTaggedWords && token.isTagged()) || ignoreToken(tokens, idx);
}   

@Override
public RuleMatch[] match(AnalyzedSentence sentence) throws IOException {
    List<RuleMatch> ruleMatches = new ArrayList<>();
    AnalyzedTokenReadings[] tokens = getSentenceWithImmunization(sentence).getTokensWithoutWhitespace();
    // lazy init
    if (speller1 == null) {
        String binaryDict = null;
        if (JLanguageTool.getDataBroker().resourceExists(getFileName())) {
            binaryDict = getFileName();
        }
        if (binaryDict != null) {
            initSpellerEx(binaryDict);  //here's the change
        } else {
            // should not happen, as we only configure this rule (or rather its subclasses)
            // when we have the resources:
            return toRuleMatchArray(ruleMatches);
        }
    }
    int idx = -1;
    for (AnalyzedTokenReadings token : tokens) {
        idx++;
        if (canBeIgnored(tokens, idx, token)) {
            continue;
        }
        // if we use token.getToken() we'll get ignored characters inside and speller
        // will choke
        String word = token.getAnalyzedToken(0).getToken();
        if (tokenizingPattern() == null) {
            ruleMatches.addAll(getRuleMatches(word, token.getStartPos(), sentence));
        } else {
            int index = 0;
            Matcher m = tokenizingPattern().matcher(word);
            while (m.find()) {
                String match = word.subSequence(index, m.start()).toString();
                ruleMatches.addAll(getRuleMatches(match, token.getStartPos() + index, sentence));
                index = m.end();
            }
            if (index == 0) { // tokenizing char not found
                ruleMatches.addAll(getRuleMatches(word, token.getStartPos(), sentence));
            } else {
                ruleMatches.addAll(getRuleMatches(word.subSequence(index, word.length()).toString(),
                        token.getStartPos() + index, sentence));
            }
        }
    }
    return toRuleMatchArray(ruleMatches);
}

公共类MorfologikSpillerRulex扩展了MorfologikSpillerRule{
私有字符串拼写FilePath=null；
私有布尔ignoreTaggedWords=false；
公共morfologikspellerrulex（ResourceBundle消息、语言）引发IOException{
超级（信息、语言）；
}
@凌驾
公共字符串getFileName（）{
返回“/en/hunspell/en_US.dict”；
}
@凌驾
公共字符串getId（）{
返回“MORFOLOGIK\u拼写规则\u EX”；
}
@凌驾
public void setIgnoreTaggedWords（）{
ignoreTaggedWords=真；
}
公共字符串getSpellingFilePath（）{
返回拼写文件路径；
}
public void setSpellingFilePath（字符串spellingFilePath）{
this.spellingFilePath=spellingFilePath；
}
私有void initSpellerEx（字符串binaryDict）引发IOException{
字符串plainTextDict=null；
如果（JLanguageTool.getDataBroker（）.resourceExists（getSpellingFileName（）））{
plainTextDict=getSpellingFileName（）；
}
if（明文dict！=null）{
BufferedReader br=null；
if（this.spellingFilePath！=null）{
试一试{
br=新的BufferedReader（新文件读取器（this.spellingFilePath））；
}
捕获（例外e）{
br=null；
}
}
如果（br！=null）{
speller1=新的MorfologyMultiSpeller（二进制dict，br，明文dict，1）；
speller2=新的MorfologyMultiSpeller（二进制dict，br，明文dict，2）；
speller3=新的MorfologyMultiSpeller（二进制dict，br，明文dict，3）；
br.close（）；
}
否则{
speller1=新的MorfologyMultiSpeller（二进制dict，明文dict，1）；
speller2=新的MorfologyMultiSpeller（二进制dict，明文dict，2）；
speller3=新的MorfologyMultiSpeller（二进制dict，明文dict，3）；
}
setConvertsCase（拼写器1.convertsCase（））；
}否则{
抛出新的RuntimeException（“在路径：+getSpellingFileName（）中找不到忽略拼写文件”）；
}
}
私有布尔值可以忽略（AnalyzedTokenReadings[]标记，int idx，AnalyzedTokenReadings标记）
抛出IOException{
返回token.isSentenceStart（）| | | token.isImmunized（）| | | token.isIgnoredBySpeller（）| | isUrl（token.getToken（））
||isEMail（token.getToken（））| |（ignoreTaggedWords&&token.istaged（））| | ignoreToken（tokens，idx）；
}   
@凌驾
公共规则匹配[]匹配（AnalyzedEntence语句）引发IOException{
List ruleMatches=new ArrayList（）；
AnalyzedTokenRedings[]tokens=getSentenceWithImmunization（句子）。GetTokenWithWithHiteSpace（）；
//惰性初始化
if（拼写器1==null）{
字符串binaryDict=null；
如果（JLanguageTool.getDataBroker（）.resourceExists（getFileName（）））{
binaryDict=getFileName（）；
}
if（二进制dict！=null）{
initSpellerEx（binaryDict）；//这是更改
}否则{
//不应该发生，因为我们只配置此规则（或其子类）
//当我们有资源时：
返回圆环匹配数组（ruleMatches）；
}
}
intidx=-1；
for（AnalyzedTokenReadings令牌：令牌）{
idx++；
if（可识别（令牌、idx、令牌））{
继续；
}
//如果我们使用token.getToken（）我们将在和拼写器中获得被忽略的字符
//会窒息
String word=token.getAnalyzedToken（0.getToken（）；
if（tokenizingPattern（）==null）{
addAll（getRuleMatches（word，token.getStartPos（），句子））；
}否则{
int指数=0；
Matcher m=tokenizingPattern（）.Matcher（word）；
while（m.find（））{
字符串匹配=word.subSequence（index，m.start（））.toString（）；
addAll（getRuleMatches（match，token.getStartPos（）+索引，句子））；
索引=m.end（）；
}
如果（索引==0）{//未找到标记化字符
addAll（getRuleMatches（word，token.getStartPos（），句子））；
}否则{
ruleMatches.addAll（getRuleMatches（word.subSequence（index，word.length（））.toString（），
getStartPos（）+索引，句子））；
}
}
}
返回圆环匹配数组（ruleMatches）；
}

}

非常感谢！据我所知，如果我将单词添加到spelling.txt，那么它将导致仅在初始化时将这些单词添加到建议列表？若我在running应用程序中添加单词，我需要扩展MorfologikSpillerRule（并更改代码）？是的，就是这样。我创建了一个类并扩展了MorfologikSpillerRule，但什么时候

public class MorfologikSpellerRuleEx extends MorfologikSpellerRule {

private String spellingFilePath = null;
private boolean ignoreTaggedWords = false;

public MorfologikSpellerRuleEx(ResourceBundle messages, Language language) throws IOException {
    super(messages, language);
}

@Override
public String getFileName() {
    return "/en/hunspell/en_US.dict";
}

@Override
public String getId() {
    return "MORFOLOGIK_SPELLING_RULE_EX";
}

@Override
public void setIgnoreTaggedWords() {
    ignoreTaggedWords = true;
}

public String getSpellingFilePath() {
    return spellingFilePath;
}

public void setSpellingFilePath(String spellingFilePath) {
    this.spellingFilePath = spellingFilePath;
}

private void initSpellerEx(String binaryDict) throws IOException {
    String plainTextDict = null;
    if (JLanguageTool.getDataBroker().resourceExists(getSpellingFileName())) {
        plainTextDict = getSpellingFileName();
    }
    if (plainTextDict != null) {

        BufferedReader br = null;
        if (this.spellingFilePath != null) {
            try {
                br = new BufferedReader(new FileReader(this.spellingFilePath));
            }
            catch (Exception e) {
                br = null;
            }
        }

        if (br != null) {
            speller1 = new MorfologikMultiSpeller(binaryDict, br, plainTextDict, 1);
            speller2 = new MorfologikMultiSpeller(binaryDict, br, plainTextDict, 2);
            speller3 = new MorfologikMultiSpeller(binaryDict, br, plainTextDict, 3);

            br.close();
        }
        else {
            speller1 = new MorfologikMultiSpeller(binaryDict, plainTextDict, 1);
            speller2 = new MorfologikMultiSpeller(binaryDict, plainTextDict, 2);
            speller3 = new MorfologikMultiSpeller(binaryDict, plainTextDict, 3);
        }

        setConvertsCase(speller1.convertsCase());
    } else {
        throw new RuntimeException("Could not find ignore spell file in path: " + getSpellingFileName());
    }
}

private boolean canBeIgnored(AnalyzedTokenReadings[] tokens, int idx, AnalyzedTokenReadings token)
        throws IOException {
    return token.isSentenceStart() || token.isImmunized() || token.isIgnoredBySpeller() || isUrl(token.getToken())
            || isEMail(token.getToken()) || (ignoreTaggedWords && token.isTagged()) || ignoreToken(tokens, idx);
}   

@Override
public RuleMatch[] match(AnalyzedSentence sentence) throws IOException {
    List<RuleMatch> ruleMatches = new ArrayList<>();
    AnalyzedTokenReadings[] tokens = getSentenceWithImmunization(sentence).getTokensWithoutWhitespace();
    // lazy init
    if (speller1 == null) {
        String binaryDict = null;
        if (JLanguageTool.getDataBroker().resourceExists(getFileName())) {
            binaryDict = getFileName();
        }
        if (binaryDict != null) {
            initSpellerEx(binaryDict);  //here's the change
        } else {
            // should not happen, as we only configure this rule (or rather its subclasses)
            // when we have the resources:
            return toRuleMatchArray(ruleMatches);
        }
    }
    int idx = -1;
    for (AnalyzedTokenReadings token : tokens) {
        idx++;
        if (canBeIgnored(tokens, idx, token)) {
            continue;
        }
        // if we use token.getToken() we'll get ignored characters inside and speller
        // will choke
        String word = token.getAnalyzedToken(0).getToken();
        if (tokenizingPattern() == null) {
            ruleMatches.addAll(getRuleMatches(word, token.getStartPos(), sentence));
        } else {
            int index = 0;
            Matcher m = tokenizingPattern().matcher(word);
            while (m.find()) {
                String match = word.subSequence(index, m.start()).toString();
                ruleMatches.addAll(getRuleMatches(match, token.getStartPos() + index, sentence));
                index = m.end();
            }
            if (index == 0) { // tokenizing char not found
                ruleMatches.addAll(getRuleMatches(word, token.getStartPos(), sentence));
            } else {
                ruleMatches.addAll(getRuleMatches(word.subSequence(index, word.length()).toString(),
                        token.getStartPos() + index, sentence));
            }
        }
    }
    return toRuleMatchArray(ruleMatches);
}