C++ 正在尝试使用';创建wav文件;“捕捉一条小溪”;从Win32 WASAPI C++;
试图解释getBuffer函数返回的(BYTE*pData)音频数据时遇到问题。我正在尝试将从麦克风捕获的音频写入wav文件,我自己正在构建该文件,目的是更好地理解音频设备、音频数据和音频格式 以下是所有代码,其中大多数代码是从Windows文档中使用的,或者是从Windows文档中引用的:。试着让事情尽可能简单没有什么特别的。此代码捕获几秒钟的麦克风音频,您可以听到失真的和真正的静态y。失真是由于我将pData内容写入文件的方式造成的吗 Main.cpp 注意-请忽略到处的“cout”,仅用于调试C++ 正在尝试使用';创建wav文件;“捕捉一条小溪”;从Win32 WASAPI C++;,c++,audio,wav,wasapi,C++,Audio,Wav,Wasapi,试图解释getBuffer函数返回的(BYTE*pData)音频数据时遇到问题。我正在尝试将从麦克风捕获的音频写入wav文件,我自己正在构建该文件,目的是更好地理解音频设备、音频数据和音频格式 以下是所有代码,其中大多数代码是从Windows文档中使用的,或者是从Windows文档中引用的:。试着让事情尽可能简单没有什么特别的。此代码捕获几秒钟的麦克风音频,您可以听到失真的和真正的静态y。失真是由于我将pData内容写入文件的方式造成的吗 Main.cpp 注意-请忽略到处的“cout”,仅用于
#pragma once
#include "MyAudioSink.h"
#include <windows.h>
// REFERENCE_TIME time units per second and per millisecond
#define REFTIMES_PER_SEC 10000000
#define REFTIMES_PER_MILLISEC 10000
#define EXIT_ON_ERROR(hres) \
if (FAILED(hres)) { goto Exit; }
#define SAFE_RELEASE(punk) \
if ((punk) != NULL) \
{ (punk)->Release(); (punk) = NULL; }
const CLSID CLSID_MMDeviceEnumerator = __uuidof(MMDeviceEnumerator);
const IID IID_IMMDeviceEnumerator = __uuidof(IMMDeviceEnumerator);
const IID IID_IAudioClient = __uuidof(IAudioClient);
const IID IID_IAudioCaptureClient = __uuidof(IAudioCaptureClient);
HRESULT RecordAudioStream(MyAudioSink * pMySink);
int main() {
HRESULT hr;
hr = CoInitialize(nullptr);
//declare MyAudioSink object
MyAudioSink pMySink;
hr = RecordAudioStream(&pMySink);
cout << "done";
}
//-----------------------------------------------------------
// Record an audio stream from the default audio capture
// device. The RecordAudioStream function allocates a shared
// buffer big enough to hold one second of PCM audio data.
// The function uses this buffer to stream data from the
// capture device. The main loop runs every 1/2 second.
//-----------------------------------------------------------
HRESULT RecordAudioStream(MyAudioSink* pMySink)
{
HRESULT hr;
REFERENCE_TIME hnsRequestedDuration = REFTIMES_PER_SEC;
REFERENCE_TIME hnsActualDuration;
UINT32 bufferFrameCount;
UINT32 numFramesAvailable;
IMMDeviceEnumerator* pEnumerator = NULL;
IMMDevice* pDevice = NULL;
IAudioClient* pAudioClient = NULL;
IAudioCaptureClient* pCaptureClient = NULL;
WAVEFORMATEX* pwfx = NULL;
UINT32 packetLength = 0;
BOOL bDone = FALSE;
BYTE* pData;
DWORD flags;
cout << "starting...";
hr = CoCreateInstance(
CLSID_MMDeviceEnumerator, NULL,
CLSCTX_ALL, IID_IMMDeviceEnumerator,
(void**)&pEnumerator);
cout << "test1" ;
EXIT_ON_ERROR(hr)
hr = pEnumerator->GetDefaultAudioEndpoint(
eCapture, eConsole, &pDevice);
cout << "test2" ;
EXIT_ON_ERROR(hr)
hr = pDevice->Activate(
IID_IAudioClient, CLSCTX_ALL,
NULL, (void**)&pAudioClient);
cout << "test3" ;
EXIT_ON_ERROR(hr)
hr = pAudioClient->GetMixFormat(&pwfx);
cout << "test4" ;
EXIT_ON_ERROR(hr)
hr = pAudioClient->Initialize(
AUDCLNT_SHAREMODE_SHARED,
0,
hnsRequestedDuration,
0,
pwfx,
NULL);
cout << "test5" ;
EXIT_ON_ERROR(hr)
// Get the size of the allocated buffer.
hr = pAudioClient->GetBufferSize(&bufferFrameCount);
cout << "test6" ;
EXIT_ON_ERROR(hr)
hr = pAudioClient->GetService(
IID_IAudioCaptureClient,
(void**)&pCaptureClient);
cout << "test7" ;
EXIT_ON_ERROR(hr)
// Calculate the actual duration of the allocated buffer.
hnsActualDuration = (double)REFTIMES_PER_SEC *
bufferFrameCount / pwfx->nSamplesPerSec;
// Notify the audio sink which format to use.
hr = pMySink->SetFormat(pwfx);
cout << "test8" ;
EXIT_ON_ERROR(hr)
//initialize the wav file with the specifications set by SetFormat
hr = pMySink->_Initialize_File();
cout << "test9" ;
EXIT_ON_ERROR(hr)
hr = pAudioClient->Start(); // Start recording.
cout << "test10" ;
EXIT_ON_ERROR(hr)
cout << "about to run while...";
// Each loop fills about half of the shared buffer.
while (bDone == FALSE)
{
// Sleep for half the buffer duration.
Sleep(hnsActualDuration / REFTIMES_PER_MILLISEC / 2);
hr = pCaptureClient->GetNextPacketSize(&packetLength);
EXIT_ON_ERROR(hr)
while (packetLength != 0)
{
// Get the available data in the shared buffer.
hr = pCaptureClient->GetBuffer(
&pData,
&numFramesAvailable,
&flags, NULL, NULL);
EXIT_ON_ERROR(hr)
if (flags & AUDCLNT_BUFFERFLAGS_SILENT)
{
cout << "silent";
pData = NULL; // Tell CopyData to write silence.
}
// Copy the available capture data to the audio sink.
hr = pMySink->CopyData(
pData, numFramesAvailable, &bDone);
EXIT_ON_ERROR(hr)
hr = pCaptureClient->ReleaseBuffer(numFramesAvailable);
EXIT_ON_ERROR(hr)
hr = pCaptureClient->GetNextPacketSize(&packetLength);
EXIT_ON_ERROR(hr)
}
}
hr = pMySink->_File_WrapUp();
EXIT_ON_ERROR(hr)
hr = pAudioClient->Stop(); // Stop recording.
EXIT_ON_ERROR(hr)
Exit:
CoTaskMemFree(pwfx);
SAFE_RELEASE(pEnumerator)
SAFE_RELEASE(pDevice)
SAFE_RELEASE(pAudioClient)
SAFE_RELEASE(pCaptureClient)
return hr;
}
#pragma一次
#包括“MyAudioSink.h”
#包括
//每秒和每毫秒的参考时间单位
#定义每秒参考次数10000000
#定义每毫秒10000次的参考次数
#在错误上定义退出错误(hres)\
如果(失败(hres)){goto Exit;}
#定义安全释放(朋克)\
如果((朋克)!=NULL)\
{(朋克)->Release();(朋克)=NULL;}
常量CLSID CLSID\u MMDeviceEnumerator=\u uuidof(MMDeviceEnumerator);
常量IID IID_IMMDeviceEnumerator=uu uuidof(IMMDeviceEnumerator);
常量IID IID_IAudioClient=uu uuidof(IAudioClient);
const IID IID_IAudioCaptureClient=uu uuidof(IAudioCaptureClient);
HRESULT RecordAudioStream(MyAudioSink*pMySink);
int main(){
HRESULT-hr;
hr=共同初始化(nullptr);
//声明MyAudioSink对象
MyAudioSink-pMySink;
hr=录制音频流(&pMySink);
cout ReleaseBuffer(numFramesAvailable);
出现错误时退出(hr)
hr=pCaptureClient->GetNextPacketSize(&packetLength);
出现错误时退出(hr)
}
}
hr=pMySink->_File_WrapUp();
出现错误时退出(hr)
hr=pAudioClient->Stop();//停止录音。
出现错误时退出(hr)
出口:
CoTaskMemFree(pwfx);
安全释放装置(pEnumerator)
安全释放(pDevice)
安全释放(客户端)
安全发布(pCaptureClient)
返回人力资源;
}
MyAudioSink.cpp
注**-这就是问题所在。您可能会注意到,名为“write_word”的UDF是使用所有音频格式参数初始化wav文件的工具,但是,我不知道如何使用此函数来写入pData内容,因此尝试使用ostream write函数,这是迄今为止产生的最佳结果(听到我的声音)但它听起来极为静态和扭曲
#pragma once
#include "MyAudioSink.h"
#include <string.h>
namespace little_endian_io
{
template <typename Word>
std::ostream& write_word(std::ostream& outs, Word value, unsigned size = sizeof(Word))
{
for (; size; --size, value >>= 8)
outs.put(static_cast <char> (value & 0xFF));
return outs;
}
}
using namespace little_endian_io;
HRESULT MyAudioSink::_Initialize_File() {
cout << "initializing file";
// prepare our wav file
mainFile.open("example.wav", ios::out | ios::binary);
// Write the file headers and sound format
mainFile << "RIFF----WAVEfmt "; // (chunk size to be filled in later)
write_word(mainFile, 16, 4); // no extension data
write_word(mainFile, 1, 2); // PCM - integer samples
write_word(mainFile, nChannels, 2); // two channels (stereo file)
write_word(mainFile, nSamplesPerSec, 4); // samples per second (Hz)
write_word(mainFile, nAvgBytesPerSec, 4); // (Sample Rate * BitsPerSample * Channels) / 8
write_word(mainFile, nBlockAlign, 2); // data block size (size of two integer samples, one for each channel, in bytes)
write_word(mainFile, wBitsPerSample, 2); // number of bits per sample (use a multiple of 8)
// Write the data chunk header
data_chunk_pos = mainFile.tellp();
mainFile << "data----"; // (chunk size to be filled in later)..
//start by setting our complete variable to False, main func will turn to true
bComplete = false;
//testing
test = 0;
return S_OK;
}
HRESULT MyAudioSink::SetFormat(WAVEFORMATEX* pwfx) {
//Update our format variables
wFormatTag = pwfx->wFormatTag;
nChannels = pwfx->nChannels;
nSamplesPerSec = pwfx->nSamplesPerSec;
nAvgBytesPerSec = pwfx->nAvgBytesPerSec;
nBlockAlign = pwfx->nBlockAlign;
wBitsPerSample = pwfx->wBitsPerSample;
cbSize = pwfx->cbSize;
return S_OK;
}
HRESULT MyAudioSink::CopyData(BYTE* pData, UINT32 numFramesAvailable, BOOL* bDone) {
//TODO
//forgot how to do this part, figure it out
for (int i = 0; i < numFramesAvailable; i++) {
mainFile.write((const char*) pData+(i* nBlockAlign), nBlockAlign);
}
//test
test++;
if (test >= nBlockAlign * 120) bComplete = true;
//check if our main function is done to finish capture
if (bComplete) *bDone = true;
return S_OK;
}
HRESULT MyAudioSink::_File_WrapUp() {
// (We'll need the final file size to fix the chunk sizes above)
file_length = mainFile.tellp();
// Fix the data chunk header to contain the data size
mainFile.seekp(data_chunk_pos + 4);
write_word(mainFile, file_length - data_chunk_pos + 8);
// Fix the file header to contain the proper RIFF chunk size, which is (file size - 8) bytes
mainFile.seekp(0 + 4);
write_word(mainFile, file_length - 8, 4);
mainFile.close();
cout << "finalized file";
return S_OK;
}
#pragma一次
#包括“MyAudioSink.h”
#包括
命名空间little_endian_io
{
模板
std::ostream&write_字(std::ostream&out,字值,无符号大小=sizeof(字))
{
对于(;size;--size,值>>=8)
输出(静态_转换(值和0xFF));
退出;
}
}
使用名称空间little_endian_io;
HRESULT MyAudioSink::_初始化_文件(){
不能取样;
nAvgBytesPerSec=pwfx->nAvgBytesPerSec;
nBlockAlign=pwfx->nBlockAlign;
wBitsPerSample=pwfx->wBitsPerSample;
cbSize=pwfx->cbSize;
返回S_OK;
}
HRESULT MyAudioSink::CopyData(字节*pData,UINT32 numFramesAvailable,BOOL*bDone){
//待办事项
//忘了怎么做这部分,想想办法吧
for(int i=0;i=nblockallign*120)bComplete=true;
//检查我们的主要功能是否已完成以完成捕获
如果(b完成)*b一=真;
返回S_OK;
}
HRESULT MyAudioSink::_File_WrapUp(){
//(我们需要最终的文件大小来修复上面的块大小)
file_length=mainFile.tellp();
//修复数据块标题以包含数据大小
mainFile.seekp(数据块位置+4);
写入字(主文件,文件长度-数据块位置+8);
//修复文件头以包含适当的RIFF块大小,即(文件大小-8)字节
mainFile.seekp(0+4);
写入字(主文件,文件长度-8,4);
mainFile.close();
cout我怀疑的问题是,您的程序只处理PCM格式,而不处理可扩展格式。最终的头将不同
添加此代码以确认:
pAudioClient->GetMixFormat(&pwfx);
开关(pwfx->wFormatTag)
{
案例波形\u格式\u PCM:
cout doooood!非常感谢!我已经用C写了所有需要的东西,并且遇到了失真问题。我感觉我真的必须搜索黑暗的网络才能找到这个答案。100%它被捕获为KSDATAFORMAT\u SUBTYPE\u IEEE\u FLOAT
。现在我从写1
改为写3
到我的文件,并且工作正常当我看到十六进制值时,我觉得它们看起来有点像浮点数,但我不确定。
#pragma once
//
#include <audioclient.h>
#include <Mmdeviceapi.h>
#include <fstream>
#include <iostream>
#include <cmath>
using namespace std;
class MyAudioSink
{
private:
size_t data_chunk_pos;
size_t file_length;
ofstream mainFile;
//sample format
WORD wFormatTag;
WORD nChannels;
DWORD nSamplesPerSec;
DWORD nAvgBytesPerSec;
WORD nBlockAlign;
WORD wBitsPerSample;
WORD cbSize;
int test;
public:
bool bComplete;
HRESULT _Initialize_File();
HRESULT SetFormat(WAVEFORMATEX* pwfx);
HRESULT CopyData(BYTE* pData, UINT32 numFramesAvailable, BOOL* bDone);
HRESULT _File_WrapUp();
};
pAudioClient->GetMixFormat(&pwfx);
switch(pwfx->wFormatTag)
{
case WAVE_FORMAT_PCM:
cout << "WAVE_FORMAT_PCM";
break;
case WAVE_FORMAT_IEEE_FLOAT:
cout << "WAVE_FORMAT_IEEE_FLOAT";
break;
case WAVE_FORMAT_EXTENSIBLE:
cout << "WAVE_FORMAT_EXTENSIBLE";
WAVEFORMATEXTENSIBLE *pWaveFormatExtensible = reinterpret_cast<WAVEFORMATEXTENSIBLE *>(pwfx);
if(pWaveFormatExtensible->SubFormat == KSDATAFORMAT_SUBTYPE_PCM)
{
cout << "KSDATAFORMAT_SUBTYPE_PCM";
}
else if(pWaveFormatExtensible->SubFormat == KSDATAFORMAT_SUBTYPE_IEEE_FLOAT)
{
cout << "KSDATAFORMAT_SUBTYPE_IEEE_FLOAT";
}
break;
}