Speech recognition Sphinx4 OutOfMemoryError带有自定义配置

Speech recognition Sphinx4 OutOfMemoryError带有自定义配置,speech-recognition,speech-to-text,cmusphinx,sphinx4,Speech Recognition,Speech To Text,Cmusphinx,Sphinx4,我想创建一个从.wav文件识别语音的程序。我尝试了下面的代码,但它引发了一个异常 主线程java.lang.OutOfMemoryError中出现异常:超出GC开销限制 即使我的eclipse.ini具有以下属性: --launcher.XXMaxPermSize 2048M --launcher.XXMaxPermSize 2048m -Xms2048m -Xmx2048m 如何修复此异常 Java代码: import java.net.MalformedURLException; i

我想创建一个从.wav文件识别语音的程序。我尝试了下面的代码,但它引发了一个异常

主线程java.lang.OutOfMemoryError中出现异常:超出GC开销限制

即使我的eclipse.ini具有以下属性:

--launcher.XXMaxPermSize 2048M 
--launcher.XXMaxPermSize 2048m 
-Xms2048m 
-Xmx2048m
如何修复此异常

Java代码:

import java.net.MalformedURLException;
import java.net.URL;

import edu.cmu.sphinx.frontend.util.AudioFileDataSource;
import edu.cmu.sphinx.recognizer.Recognizer;
import edu.cmu.sphinx.result.Result;
import edu.cmu.sphinx.util.props.ConfigurationManager;

public class TestRecognizer {

    public static void main(String[] args) {
        ConfigurationManager cm;

        if (args.length > 0) {
            cm = new ConfigurationManager(args[0]);
        } else {
            cm = new ConfigurationManager("english_use_LexTreeLinguist.xml");
        }

        URL audioURL = null;
        try {
            audioURL = new URL("file:./10001-90210-01803.wav");
        } catch (MalformedURLException e) {
            e.printStackTrace();
        }
        if(audioURL == null)
            throw new IllegalArgumentException("Given audio file doesn't exist.");

        // allocate the recognizer
        System.out.println("Loading recognizer");
        Recognizer recognizer = (Recognizer) cm.lookup("recognizer");
        recognizer.allocate();
        System.out.println("Loading audio");
        AudioFileDataSource dataSource = (AudioFileDataSource) cm.lookup("audioFileDataSource");
        dataSource.setAudioFile(audioURL, null);

        // loop the recognition until the programm exits.
        Result result;
        System.out.println("recognizing");`enter code here`
        while ((result = recognizer.recognize())!= null) {
            String resultText = result.getBestResultNoFiller();
            System.out.println(resultText);
        }
    }

}
XML文件配置:

<config>
    <!-- ******************************************************** -->
    <!-- frequently tuned properties                              -->
    <!-- ******************************************************** -->

    <property name="absoluteBeamWidth"           value="-1"/>
    <property name="relativeBeamWidth"           value="1E-80"/>
    <property name="wordInsertionProbability"    value=".1"/>
    <property name="languageWeight"              value="8"/>
    <property name="silenceInsertionProbability" value="1"/>
    <property name="fillerInsertionProbability" value="1E-10"/>
    <property name="logLevel"                    value="WARNING"/>
    <property name="recognizer" value="recognizer"/>
    <property name="linguist"   value="lexTreeLinguist"/>
    <property name="frontend"   value="mfcFrontEnd"/>

    <!-- ******************************************************** -->
    <!-- The Recognizer configuration               -->
    <!-- ******************************************************** -->

    <component name="recognizer" 
               type="edu.cmu.sphinx.recognizer.Recognizer">
        <property name="decoder" value="decoder"/>
        <propertylist name="monitors">
        </propertylist>
    </component>

    <!-- ******************************************************** -->
    <!-- The Decoder configuration                              -->
    <!-- ******************************************************** -->

    <component name="decoder" type="edu.cmu.sphinx.decoder.Decoder">
        <property name="searchManager" value="searchManager"/>
    </component>

    <component name="searchManager" 
        type="edu.cmu.sphinx.decoder.search.SimpleBreadthFirstSearchManager">
        <property name="logMath" value="logMath"/>
        <property name="linguist" value="${linguist}"/>
        <property name="pruner" value="trivialPruner"/>
        <property name="scorer" value="threadedScorer"/>
        <property name="activeListFactory" value="activeList"/>
    </component>

    <component name="activeList" 
             type="edu.cmu.sphinx.decoder.search.SortingActiveListFactory">
        <property name="logMath" value="logMath"/>
        <property name="absoluteBeamWidth" value="${absoluteBeamWidth}"/>
        <property name="relativeBeamWidth" value="${relativeBeamWidth}"/>
    </component>

    <component name="trivialPruner" 
                type="edu.cmu.sphinx.decoder.pruner.SimplePruner"/>

    <component name="threadedScorer" 
                type="edu.cmu.sphinx.decoder.scorer.ThreadedAcousticScorer">
        <property name="frontend" value="${frontend}"/>
        <property name="isCpuRelative" value="true"/>
        <property name="numThreads" value="0"/>
        <property name="minScoreablesPerThread" value="10"/>
        <property name="scoreablesKeepFeature" value="true"/>
    </component>

    <!-- ******************************************************** -->
    <!-- The linguist  configuration                              -->
    <!-- ******************************************************** -->

    <component name="lexTreeLinguist" 
                type="edu.cmu.sphinx.linguist.lextree.LexTreeLinguist">
        <property name="logMath" value="logMath"/>
        <property name="acousticModel" value="wsj"/>
        <property name="languageModel" value="trigramModel"/>
        <property name="dictionary" value="englishDict"/>
        <property name="addFillerWords" value="false"/>
        <property name="fillerInsertionProbability" value="${fillerInsertionProbability}"/>
        <property name="generateUnitStates" value="false"/>
        <property name="wantUnigramSmear" value="true"/>
        <property name="unigramSmearWeight" value="1"/>
        <property name="wordInsertionProbability" 
                value="${wordInsertionProbability}"/>
        <property name="silenceInsertionProbability" 
                value="${silenceInsertionProbability}"/>
        <property name="languageWeight" value="${languageWeight}"/>
        <property name="unitManager" value="unitManager"/>
    </component>

    <!-- ******************************************************** -->
    <!-- The Language Model configuration                         -->
    <!-- ******************************************************** -->
    <component name="trigramModel" 
            type="edu.cmu.sphinx.linguist.language.ngram.large.LargeTrigramModel">
        <property name="unigramWeight" value=".5"/>
        <property name="maxDepth" value="3"/>
        <property name="logMath" value="logMath"/>
        <property name="dictionary" value="englishDict"/>
        <property name="location"
            value="resource:/edu/cmu/sphinx/models/language/en-us.lm.dmp"/>
    </component>

    <!-- ******************************************************** -->
    <!-- The Dictionary configuration                            -->
    <!-- ******************************************************** -->
    <component name="englishDict" 
        type="edu.cmu.sphinx.linguist.dictionary.FastDictionary">
        <property name="dictionaryPath"
                  value="resource:/WSJ_8gau_13dCep_8kHz_31mel_200Hz_3500Hz/dict/cmudict.0.6d"/>
        <property name="fillerPath" 
              value="resource:/WSJ_8gau_13dCep_8kHz_31mel_200Hz_3500Hz/noisedict"/>
        <property name="addSilEndingPronunciation" value="false"/>
        <property name="wordReplacement" value="&lt;sil&gt;"/>
        <property name="unitManager" value="unitManager"/>
    </component>

    <!-- ******************************************************** -->
    <!-- The acoustic model configuration                         -->
    <!-- ******************************************************** -->

    <component name="wsj"
               type="edu.cmu.sphinx.linguist.acoustic.tiedstate.TiedStateAcousticModel">
        <property name="loader" value="wsjLoader"/>
        <property name="unitManager" value="unitManager"/>
    </component>

    <component name="wsjLoader" type="edu.cmu.sphinx.linguist.acoustic.tiedstate.Sphinx3Loader">
        <property name="logMath" value="logMath"/>
        <property name="unitManager" value="unitManager"/>
        <property name="location" value="resource:/WSJ_8gau_13dCep_8kHz_31mel_200Hz_3500Hz"/>
    </component>

    <!-- ******************************************************** -->
    <!-- The unit manager configuration                           -->
    <!-- ******************************************************** -->

    <component name="unitManager" 
        type="edu.cmu.sphinx.linguist.acoustic.UnitManager"/>

    <!-- ******************************************************** -->
    <!-- The frontend configuration                               -->
    <!-- ******************************************************** -->

    <component name="mfcFrontEnd" type="edu.cmu.sphinx.frontend.FrontEnd">
        <propertylist name="pipeline">
            <!--item>streamDataSource </item-->
            <item>audioFileDataSource </item>
            <item>preemphasizer </item>
            <item>windower </item>
            <item>fft </item>
            <item>melFilterBank </item>
            <item>dct </item>
            <item>batchCMN </item>
            <item>featureExtraction </item>
        </propertylist>
    </component>

    <component name="streamDataSource" 
                type="edu.cmu.sphinx.frontend.util.StreamDataSource">
        <property name="sampleRate" value="16000"/>
    <property name="bitsPerSample" value="16"/>
    <property name="bigEndianData" value="false"/>
    <property name="signedData" value="true"/>
    </component>

    <component name="audioFileDataSource" type="edu.cmu.sphinx.frontend.util.AudioFileDataSource"/>

    <component name="preemphasizer"
        type="edu.cmu.sphinx.frontend.filter.Preemphasizer"/>

    <component name="windower" 
               type="edu.cmu.sphinx.frontend.window.RaisedCosineWindower"/>

    <component name="fft" 
            type="edu.cmu.sphinx.frontend.transform.DiscreteFourierTransform"/>

    <component name="melFilterBank" 
          type="edu.cmu.sphinx.frontend.frequencywarp.MelFrequencyFilterBank"/>

    <component name="dct" 
            type="edu.cmu.sphinx.frontend.transform.DiscreteCosineTransform"/>

    <component name="batchCMN" 
               type="edu.cmu.sphinx.frontend.feature.BatchCMN"/>

    <component name="featureExtraction"
        type="edu.cmu.sphinx.frontend.feature.DeltasFeatureExtractor"/>

    <!-- ******************************************************* -->
    <!--  Miscellaneous components                               -->
    <!-- ******************************************************* -->

    <component name="logMath" type="edu.cmu.sphinx.util.LogMath">
        <property name="logBase" value="1.0001"/>
        <property name="useAddTable" value="true"/>
    </component>

</config>

您的配置完全错误,光束太宽,前端配置不正确

如果要进行修改或只使用不带xml文件的高级API,则需要使用default configuration default.config.xml。为了获得最佳解码精度,您需要使用下载中提供的en-us-8khz声学模型

如果您想转录8khz音频,还需要调用recognizer.setSampleRate8000