Warning: file_get_contents(/data/phpspider/zhask/data//catemap/4/c/60.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
C 使用libsox对wav文件进行下采样_C_Ubuntu_Libsox - Fatal编程技术网

C 使用libsox对wav文件进行下采样

C 使用libsox对wav文件进行下采样,c,ubuntu,libsox,C,Ubuntu,Libsox,我正在尝试使用Sox库C程序()将16KHZ 16位有符号PCM编码的波形文件转换为8KHz 8位mu编码的波形文件。从PCM到mu的转换工作正常。但当我应用下采样效果时,输出文件的持续时间仅为I/p文件的一半(见下文)。我使用了帖子中提到的技巧,但对我没有帮助 当我执行下面的代码时,我看到一个警告 wav: Premature EOF on .wav input file 输出: Input File : 'text2speech_0.wav' Channels : 1

我正在尝试使用Sox库C程序()将16KHZ 16位有符号PCM编码的波形文件转换为8KHz 8位mu编码的波形文件。从PCM到mu的转换工作正常。但当我应用下采样效果时,输出文件的持续时间仅为I/p文件的一半(见下文)。我使用了帖子中提到的技巧,但对我没有帮助

当我执行下面的代码时,我看到一个警告

wav: Premature EOF on .wav input file
输出:

Input File     : 'text2speech_0.wav'
Channels       : 1
Sample Rate    : 16000
Precision      : 16-bit
**Duration       : 00:00:06.24 = 99777 samples ~ 467.705 CDDA sectors**
File Size      : 200k
Bit Rate       : 256k
Sample Encoding: 16-bit Signed Integer PCM


Input File     : 'out_8k.wav'
Channels       : 1
Sample Rate    : 8000
Precision      : 14-bit
**Duration       : 00:00:03.12 = 24945 samples ~ 233.859 CDDA sectors**
File Size      : 49.9k
Bit Rate       : 128k
Sample Encoding: 8bit u-law
int main(int argc, char * argv[])
{
    static sox_format_t * in, * out; /* input and output files */
    sox_effects_chain_t * chain;
    sox_effect_t * e;
    char * args[10];
    assert(argc == 3);
    assert(sox_init() == SOX_SUCCESS);
    assert(in = sox_open_read(argv[1], NULL, NULL, NULL));

    assert(out = sox_open_write(argv[2], &in->signal, NULL, NULL, NULL, NULL));

    chain = sox_create_effects_chain(&in->encoding, &out->encoding);

    e = sox_create_effect(sox_find_effect("input"));
    args[0] = (char *)in, assert(sox_effect_options(e, 1, args) == SOX_SUCCESS);
    assert(sox_add_effect(chain, e, &in->signal, &in->signal) == SOX_SUCCESS);
    free(e);

    out->signal.rate = 8000;
    in->signal.rate = 16000;

   if (in->signal.rate != out->signal.rate) {
        e = sox_create_effect(sox_find_effect("rate"));
        args[0] = "16000", assert(sox_effect_options(e, 1, args) == SOX_SUCCESS);
        assert(sox_add_effect(chain, e, &in->signal, &out->signal) == SOX_SUCCESS);
        free(e);
    }

    if (in->signal.channels != out->signal.channels) {
        e = sox_create_effect(sox_find_effect("channels"));
        assert(sox_effect_options(e, 0, NULL) == SOX_SUCCESS);
        assert(sox_add_effect(chain, e, &in->signal, &out->signal) == SOX_SUCCESS);
        free(e);
    }

    e = sox_create_effect(sox_find_effect("output"));
    args[0] = (char *)out, assert(sox_effect_options(e, 1, args) == SOX_SUCCESS);
    assert(sox_add_effect(chain, e, &in->signal, &out->signal) == SOX_SUCCESS);
    free(e);

    sox_flow_effects(chain, NULL, NULL);

    sox_delete_effects_chain(chain);
    sox_close(out);
    sox_close(in);
    sox_quit();

    return 0;
}
gcc -g -o example3 example3.c `pkg-config --cflags --libs sox`
./example3 text2speech_0.wav out_8k.wav
代码:

Input File     : 'text2speech_0.wav'
Channels       : 1
Sample Rate    : 16000
Precision      : 16-bit
**Duration       : 00:00:06.24 = 99777 samples ~ 467.705 CDDA sectors**
File Size      : 200k
Bit Rate       : 256k
Sample Encoding: 16-bit Signed Integer PCM


Input File     : 'out_8k.wav'
Channels       : 1
Sample Rate    : 8000
Precision      : 14-bit
**Duration       : 00:00:03.12 = 24945 samples ~ 233.859 CDDA sectors**
File Size      : 49.9k
Bit Rate       : 128k
Sample Encoding: 8bit u-law
int main(int argc, char * argv[])
{
    static sox_format_t * in, * out; /* input and output files */
    sox_effects_chain_t * chain;
    sox_effect_t * e;
    char * args[10];
    assert(argc == 3);
    assert(sox_init() == SOX_SUCCESS);
    assert(in = sox_open_read(argv[1], NULL, NULL, NULL));

    assert(out = sox_open_write(argv[2], &in->signal, NULL, NULL, NULL, NULL));

    chain = sox_create_effects_chain(&in->encoding, &out->encoding);

    e = sox_create_effect(sox_find_effect("input"));
    args[0] = (char *)in, assert(sox_effect_options(e, 1, args) == SOX_SUCCESS);
    assert(sox_add_effect(chain, e, &in->signal, &in->signal) == SOX_SUCCESS);
    free(e);

    out->signal.rate = 8000;
    in->signal.rate = 16000;

   if (in->signal.rate != out->signal.rate) {
        e = sox_create_effect(sox_find_effect("rate"));
        args[0] = "16000", assert(sox_effect_options(e, 1, args) == SOX_SUCCESS);
        assert(sox_add_effect(chain, e, &in->signal, &out->signal) == SOX_SUCCESS);
        free(e);
    }

    if (in->signal.channels != out->signal.channels) {
        e = sox_create_effect(sox_find_effect("channels"));
        assert(sox_effect_options(e, 0, NULL) == SOX_SUCCESS);
        assert(sox_add_effect(chain, e, &in->signal, &out->signal) == SOX_SUCCESS);
        free(e);
    }

    e = sox_create_effect(sox_find_effect("output"));
    args[0] = (char *)out, assert(sox_effect_options(e, 1, args) == SOX_SUCCESS);
    assert(sox_add_effect(chain, e, &in->signal, &out->signal) == SOX_SUCCESS);
    free(e);

    sox_flow_effects(chain, NULL, NULL);

    sox_delete_effects_chain(chain);
    sox_close(out);
    sox_close(in);
    sox_quit();

    return 0;
}
gcc -g -o example3 example3.c `pkg-config --cflags --libs sox`
./example3 text2speech_0.wav out_8k.wav
编译和执行:

Input File     : 'text2speech_0.wav'
Channels       : 1
Sample Rate    : 16000
Precision      : 16-bit
**Duration       : 00:00:06.24 = 99777 samples ~ 467.705 CDDA sectors**
File Size      : 200k
Bit Rate       : 256k
Sample Encoding: 16-bit Signed Integer PCM


Input File     : 'out_8k.wav'
Channels       : 1
Sample Rate    : 8000
Precision      : 14-bit
**Duration       : 00:00:03.12 = 24945 samples ~ 233.859 CDDA sectors**
File Size      : 49.9k
Bit Rate       : 128k
Sample Encoding: 8bit u-law
int main(int argc, char * argv[])
{
    static sox_format_t * in, * out; /* input and output files */
    sox_effects_chain_t * chain;
    sox_effect_t * e;
    char * args[10];
    assert(argc == 3);
    assert(sox_init() == SOX_SUCCESS);
    assert(in = sox_open_read(argv[1], NULL, NULL, NULL));

    assert(out = sox_open_write(argv[2], &in->signal, NULL, NULL, NULL, NULL));

    chain = sox_create_effects_chain(&in->encoding, &out->encoding);

    e = sox_create_effect(sox_find_effect("input"));
    args[0] = (char *)in, assert(sox_effect_options(e, 1, args) == SOX_SUCCESS);
    assert(sox_add_effect(chain, e, &in->signal, &in->signal) == SOX_SUCCESS);
    free(e);

    out->signal.rate = 8000;
    in->signal.rate = 16000;

   if (in->signal.rate != out->signal.rate) {
        e = sox_create_effect(sox_find_effect("rate"));
        args[0] = "16000", assert(sox_effect_options(e, 1, args) == SOX_SUCCESS);
        assert(sox_add_effect(chain, e, &in->signal, &out->signal) == SOX_SUCCESS);
        free(e);
    }

    if (in->signal.channels != out->signal.channels) {
        e = sox_create_effect(sox_find_effect("channels"));
        assert(sox_effect_options(e, 0, NULL) == SOX_SUCCESS);
        assert(sox_add_effect(chain, e, &in->signal, &out->signal) == SOX_SUCCESS);
        free(e);
    }

    e = sox_create_effect(sox_find_effect("output"));
    args[0] = (char *)out, assert(sox_effect_options(e, 1, args) == SOX_SUCCESS);
    assert(sox_add_effect(chain, e, &in->signal, &out->signal) == SOX_SUCCESS);
    free(e);

    sox_flow_effects(chain, NULL, NULL);

    sox_delete_effects_chain(chain);
    sox_close(out);
    sox_close(in);
    sox_quit();

    return 0;
}
gcc -g -o example3 example3.c `pkg-config --cflags --libs sox`
./example3 text2speech_0.wav out_8k.wav

Bellow是一个工作代码的diff。 我认为主要的错误是在打开它进行写作之前必须设置“out”结构

+#include <sox.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+
+ int main(int argc, char * argv[])
+ {
+     static sox_format_t * in, * out; /* input and output files */
+@@ -7,9 +13,15 @@
 assert(argc == 3);
 assert(sox_init() == SOX_SUCCESS);
 assert(in = sox_open_read(argv[1], NULL, NULL, NULL));
 +    out= (sox_format_t *) malloc(sizeof (sox_format_t));
 +    memcpy(out, in, sizeof (sox_format_t));
 +    out->encoding.encoding = SOX_ENCODING_ULAW;
 +    out->encoding.bits_per_sample=8;
 +    out->signal.rate = 8000;
 +    out->signal.precision = 8;
 +    out->signal.length = SOX_UNSPEC;
 +    assert(out = sox_open_write(argv[2], &out->signal, &out->encoding, NULL, NULL, NULL));

 -    assert(out = sox_open_write(argv[2-    assert(out = sox_open_write(argv[2],  &in->signal, NULL, NULL, NULL, NULL));
 -
 chain = sox_create_effects_chain(&in->encoding, &out->encoding);

 e = sox_create_effect(sox_find_effect("input"));
 @@ -17,16 +29,14 @@
 assert(sox_add_effect(chain, e, &in->signal, &in->signal) == SOX_SUCCESS);
 free(e);

 -    out->signal.rate = 8000;
 -    in->signal.rate = 16000;
 if (in->signal.rate != out->signal.rate) {
     e = sox_create_effect(sox_find_effect("rate"));
 -        args[0] = "16000", assert(sox_effect_options(e, 1, args) == SOX_SUCCESS);
 +        assert(sox_effect_options(e, 0, NULL) == SOX_SUCCESS);
 +         e->handler.flags |= SOX_EFF_LENGTH;
 +  
     assert(sox_add_effect(chain, e, &in->signal, &out->signal) == SOX_SUCCESS);
     free(e);
+#包括
+#包括
+#包括
+#包括
+
+int main(int argc,char*argv[])
+ {
+静态sox_格式_t*in、*out;/*输入和输出文件*/
+@@ -7,9 +13,15 @@
断言(argc==3);
断言(sox_init()==sox_SUCCESS);
断言(in=sox_open_read(argv[1],NULL,NULL,NULL));
+out=(sox_格式)malloc(sizeof(sox_格式));
+memcpy(out、in、sizeof(sox_格式);
+out->encoding.encoding=SOX\u encoding\u ULAW;
+输出->编码。每个样本的位=8;
+输出->信号速率=8000;
+输出->信号精度=8;
+out->signal.length=SOX\u unsec;
+断言(out=sox\u open\u write(argv[2],&out->signal,&out->encoding,NULL,NULL));
-断言(out=sox\u open\u write(argv[2-断言(out=sox\u open\u write(argv[2],&in->signal,NULL,NULL,NULL));
-
chain=sox\u create\u effects\u chain(&in->encoding,&out->encoding);
e=sox_创建_效果(sox_查找_效果(“输入”));
@@ -17,16 +29,14 @@
断言(sox_添加_效果(链、e和in->信号和in->信号)=sox_成功);
免费(e);
-输出->信号速率=8000;
-in->signal.rate=16000;
如果(输入->信号速率!=输出->信号速率){
e=sox_创造效应(sox_发现效应(“速率”));
-args[0]=“16000”,断言(sox_效果_选项(e,1,args)=sox_成功);
+断言(sox_效果_选项(e,0,NULL)=sox_成功);
+e->handler.flags |=SOX_EFF_LENGTH;
+  
断言(sox_添加_效果(链、e和输入->信号和输出->信号)=sox_成功);
免费(e);

我不熟悉Sox库,所以可能会问一些愚蠢的问题:为什么输出中的'out_8k.wav'又是“Input File”(输入文件)。为什么是“Precision:16 bit”(精度:16 bit),而你说的是“到8KHz的8位mu编码的wav”(8-bit mu-encoded wav)“。为什么您在->signal.rate=16000;中手动指定输入文件的采样率?我们正在传递_8k.wav,以便将数据写入其中。精度用于转换/下采样,与编码无关(其实际14位已在post中更新相同)。我不必指定输入采样率,我尝试了不同的方法并错误地添加了它。该行可以删除。我没有使用库,但从命令行中,
-B位的组合
your
out->signal.rate
-e encoding
表明下采样可能受到以下因素的影响:CM存储是
有符号、无符号或浮点
。我无法从您的代码中判断这是否相关,但这是一个检查的途径。我尝试将16KHz PCM无符号wav文件转换为8KHZ u-law&结果是一样的。在输出文件中只获得一半的输入数据