Audio 使用UWP监控现场音频并检测枪声/拍击声
我正在开发一个新的UWP应用程序,它应该监控声音,并为每一次突然的声音打击(比如枪声或拍击声)触发一个事件Audio 使用UWP监控现场音频并检测枪声/拍击声,audio,uwp,audio-streaming,uwp-xaml,Audio,Uwp,Audio Streaming,Uwp Xaml,我正在开发一个新的UWP应用程序,它应该监控声音,并为每一次突然的声音打击(比如枪声或拍击声)触发一个事件 它需要启用默认音频输入并监视实时音频 设置音频灵敏度,用于识别环境噪音和识别枪声/枪声 当有类似于拍击/枪声的高频声音时(理想情况下,它应该类似于+/-40的配置频率,那么它就是枪声/拍击),那么它应该调用事件 不需要保存音频 我试图实现 声音监控页面: public sealed partial class MyPage : Page { private async voi
- 它需要启用默认音频输入并监视实时音频
- 设置音频灵敏度,用于识别环境噪音和识别枪声/枪声
- 当有类似于拍击/枪声的高频声音时(理想情况下,它应该类似于+/-40的配置频率,那么它就是枪声/拍击),那么它应该调用事件
public sealed partial class MyPage : Page
{
private async void Page_Loaded(object sender, RoutedEventArgs e)
{
string deviceId = Windows.Media.Devices.MediaDevice.GetDefaultAudioCaptureId(Windows.Media.Devices.AudioDeviceRole.Communications);
gameChatAudioStateMonitor = AudioStateMonitor.CreateForCaptureMonitoringWithCategoryAndDeviceId(MediaCategory.GameChat, deviceId);
gameChatAudioStateMonitor.SoundLevelChanged += GameChatSoundLevelChanged;
//other logic
}
}
private void GameChatSoundLevelChanged(AudioStateMonitor sender, object args)
{
switch (sender.SoundLevel)
{
case SoundLevel.Full:
LevelChangeEvent();
break;
case SoundLevel.Muted:
LevelChangeEvent();
break;
case SoundLevel.Low:
// Audio capture should never be "ducked", only muted or full volume.
Debug.WriteLine("Unexpected audio state change.");
break;
}
}
声级变化:
public sealed partial class MyPage : Page
{
private async void Page_Loaded(object sender, RoutedEventArgs e)
{
string deviceId = Windows.Media.Devices.MediaDevice.GetDefaultAudioCaptureId(Windows.Media.Devices.AudioDeviceRole.Communications);
gameChatAudioStateMonitor = AudioStateMonitor.CreateForCaptureMonitoringWithCategoryAndDeviceId(MediaCategory.GameChat, deviceId);
gameChatAudioStateMonitor.SoundLevelChanged += GameChatSoundLevelChanged;
//other logic
}
}
private void GameChatSoundLevelChanged(AudioStateMonitor sender, object args)
{
switch (sender.SoundLevel)
{
case SoundLevel.Full:
LevelChangeEvent();
break;
case SoundLevel.Muted:
LevelChangeEvent();
break;
case SoundLevel.Low:
// Audio capture should never be "ducked", only muted or full volume.
Debug.WriteLine("Unexpected audio state change.");
break;
}
}
环境:windows 10(v1809)IDE:VS 2017
不确定这是否是正确的方法。这不会启用音频,也不会触发级别更改事件
我在WinForms&NAudio教程中看到了其他选项。可能通过采样频率,我可以检查事件。。。没有关于使用NAudio和UWP绘制图形和识别频率的教程
更新:
根据@Rob Caplan-MSFT的建议,以下是我的最终结果 IMemoryBufferByteAccess.cs
// We are initializing a COM interface for use within the namespace
// This interface allows access to memory at the byte level which we need to populate audio data that is generated
[ComImport]
[Guid("5B0D3235-4DBA-4D44-865E-8F1D0E4FD04D")]
[InterfaceType(ComInterfaceType.InterfaceIsIUnknown)]
unsafe interface IMemoryBufferByteAccess
{
void GetBuffer(out byte* buffer, out uint capacity);
}
public sealed partial class GunFireMonitorPage : Page
{
private MainPage _rootPage;
public static GunFireMonitorPage Current;
private AudioGraph _graph;
private AudioDeviceOutputNode _deviceOutputNode;
private AudioFrameInputNode _frameInputNode;
public double Theta;
public DrivePage()
{
InitializeComponent();
Current = this;
}
protected override async void OnNavigatedTo(NavigationEventArgs e)
{
_rootPage = MainPage.Current;
await CreateAudioGraph();
}
protected override void OnNavigatedFrom(NavigationEventArgs e)
{
_graph?.Dispose();
}
private void Page_Loaded(object sender, RoutedEventArgs e)
{
}
private unsafe AudioFrame GenerateAudioData(uint samples)
{
// Buffer size is (number of samples) * (size of each sample)
// We choose to generate single channel (mono) audio. For multi-channel, multiply by number of channels
uint bufferSize = samples * sizeof(float);
AudioFrame audioFrame = new AudioFrame(bufferSize);
using (AudioBuffer buffer = audioFrame.LockBuffer(AudioBufferAccessMode.Write))
using (IMemoryBufferReference reference = buffer.CreateReference())
{
// Get the buffer from the AudioFrame
// ReSharper disable once SuspiciousTypeConversion.Global
// ReSharper disable once UnusedVariable
((IMemoryBufferByteAccess) reference).GetBuffer(out var dataInBytes, out var capacityInBytes);
// Cast to float since the data we are generating is float
var dataInFloat = (float*)dataInBytes;
float freq = 1000; // choosing to generate frequency of 1kHz
float amplitude = 0.3f;
int sampleRate = (int)_graph.EncodingProperties.SampleRate;
double sampleIncrement = (freq * (Math.PI * 2)) / sampleRate;
// Generate a 1kHz sine wave and populate the values in the memory buffer
for (int i = 0; i < samples; i++)
{
double sinValue = amplitude * Math.Sin(Theta);
dataInFloat[i] = (float)sinValue;
Theta += sampleIncrement;
}
}
return audioFrame;
}
private void node_QuantumStarted(AudioFrameInputNode sender, FrameInputNodeQuantumStartedEventArgs args)
{
// GenerateAudioData can provide PCM audio data by directly synthesizing it or reading from a file.
// Need to know how many samples are required. In this case, the node is running at the same rate as the rest of the graph
// For minimum latency, only provide the required amount of samples. Extra samples will introduce additional latency.
uint numSamplesNeeded = (uint)args.RequiredSamples;
if (numSamplesNeeded != 0)
{
AudioFrame audioData = GenerateAudioData(numSamplesNeeded);
_frameInputNode.AddFrame(audioData);
}
}
private void Button_Click(object sender, RoutedEventArgs e)
{
if (generateButton.Content != null && generateButton.Content.Equals("Generate Audio"))
{
_frameInputNode.Start();
generateButton.Content = "Stop";
audioPipe.Fill = new SolidColorBrush(Colors.Blue);
}
else if (generateButton.Content != null && generateButton.Content.Equals("Stop"))
{
_frameInputNode.Stop();
generateButton.Content = "Generate Audio";
audioPipe.Fill = new SolidColorBrush(Color.FromArgb(255, 49, 49, 49));
}
}
private async Task CreateAudioGraph()
{
// Create an AudioGraph with default settings
AudioGraphSettings settings = new AudioGraphSettings(AudioRenderCategory.Media);
CreateAudioGraphResult result = await AudioGraph.CreateAsync(settings);
if (result.Status != AudioGraphCreationStatus.Success)
{
// Cannot create graph
_rootPage.NotifyUser($"AudioGraph Creation Error because {result.Status.ToString()}", NotifyType.ErrorMessage);
return;
}
_graph = result.Graph;
// Create a device output node
CreateAudioDeviceOutputNodeResult deviceOutputNodeResult = await _graph.CreateDeviceOutputNodeAsync();
if (deviceOutputNodeResult.Status != AudioDeviceNodeCreationStatus.Success)
{
// Cannot create device output node
_rootPage.NotifyUser(
$"Audio Device Output unavailable because {deviceOutputNodeResult.Status.ToString()}", NotifyType.ErrorMessage);
speakerContainer.Background = new SolidColorBrush(Colors.Red);
}
_deviceOutputNode = deviceOutputNodeResult.DeviceOutputNode;
_rootPage.NotifyUser("Device Output Node successfully created", NotifyType.StatusMessage);
speakerContainer.Background = new SolidColorBrush(Colors.Green);
// Create the FrameInputNode at the same format as the graph, except explicitly set mono.
AudioEncodingProperties nodeEncodingProperties = _graph.EncodingProperties;
nodeEncodingProperties.ChannelCount = 1;
_frameInputNode = _graph.CreateFrameInputNode(nodeEncodingProperties);
_frameInputNode.AddOutgoingConnection(_deviceOutputNode);
frameContainer.Background = new SolidColorBrush(Colors.Green);
// Initialize the Frame Input Node in the stopped state
_frameInputNode.Stop();
// Hook up an event handler so we can start generating samples when needed
// This event is triggered when the node is required to provide data
_frameInputNode.QuantumStarted += node_QuantumStarted;
// Start the graph since we will only start/stop the frame input node
_graph.Start();
}
}
GunFireMonitorPage.xaml.cs
// We are initializing a COM interface for use within the namespace
// This interface allows access to memory at the byte level which we need to populate audio data that is generated
[ComImport]
[Guid("5B0D3235-4DBA-4D44-865E-8F1D0E4FD04D")]
[InterfaceType(ComInterfaceType.InterfaceIsIUnknown)]
unsafe interface IMemoryBufferByteAccess
{
void GetBuffer(out byte* buffer, out uint capacity);
}
public sealed partial class GunFireMonitorPage : Page
{
private MainPage _rootPage;
public static GunFireMonitorPage Current;
private AudioGraph _graph;
private AudioDeviceOutputNode _deviceOutputNode;
private AudioFrameInputNode _frameInputNode;
public double Theta;
public DrivePage()
{
InitializeComponent();
Current = this;
}
protected override async void OnNavigatedTo(NavigationEventArgs e)
{
_rootPage = MainPage.Current;
await CreateAudioGraph();
}
protected override void OnNavigatedFrom(NavigationEventArgs e)
{
_graph?.Dispose();
}
private void Page_Loaded(object sender, RoutedEventArgs e)
{
}
private unsafe AudioFrame GenerateAudioData(uint samples)
{
// Buffer size is (number of samples) * (size of each sample)
// We choose to generate single channel (mono) audio. For multi-channel, multiply by number of channels
uint bufferSize = samples * sizeof(float);
AudioFrame audioFrame = new AudioFrame(bufferSize);
using (AudioBuffer buffer = audioFrame.LockBuffer(AudioBufferAccessMode.Write))
using (IMemoryBufferReference reference = buffer.CreateReference())
{
// Get the buffer from the AudioFrame
// ReSharper disable once SuspiciousTypeConversion.Global
// ReSharper disable once UnusedVariable
((IMemoryBufferByteAccess) reference).GetBuffer(out var dataInBytes, out var capacityInBytes);
// Cast to float since the data we are generating is float
var dataInFloat = (float*)dataInBytes;
float freq = 1000; // choosing to generate frequency of 1kHz
float amplitude = 0.3f;
int sampleRate = (int)_graph.EncodingProperties.SampleRate;
double sampleIncrement = (freq * (Math.PI * 2)) / sampleRate;
// Generate a 1kHz sine wave and populate the values in the memory buffer
for (int i = 0; i < samples; i++)
{
double sinValue = amplitude * Math.Sin(Theta);
dataInFloat[i] = (float)sinValue;
Theta += sampleIncrement;
}
}
return audioFrame;
}
private void node_QuantumStarted(AudioFrameInputNode sender, FrameInputNodeQuantumStartedEventArgs args)
{
// GenerateAudioData can provide PCM audio data by directly synthesizing it or reading from a file.
// Need to know how many samples are required. In this case, the node is running at the same rate as the rest of the graph
// For minimum latency, only provide the required amount of samples. Extra samples will introduce additional latency.
uint numSamplesNeeded = (uint)args.RequiredSamples;
if (numSamplesNeeded != 0)
{
AudioFrame audioData = GenerateAudioData(numSamplesNeeded);
_frameInputNode.AddFrame(audioData);
}
}
private void Button_Click(object sender, RoutedEventArgs e)
{
if (generateButton.Content != null && generateButton.Content.Equals("Generate Audio"))
{
_frameInputNode.Start();
generateButton.Content = "Stop";
audioPipe.Fill = new SolidColorBrush(Colors.Blue);
}
else if (generateButton.Content != null && generateButton.Content.Equals("Stop"))
{
_frameInputNode.Stop();
generateButton.Content = "Generate Audio";
audioPipe.Fill = new SolidColorBrush(Color.FromArgb(255, 49, 49, 49));
}
}
private async Task CreateAudioGraph()
{
// Create an AudioGraph with default settings
AudioGraphSettings settings = new AudioGraphSettings(AudioRenderCategory.Media);
CreateAudioGraphResult result = await AudioGraph.CreateAsync(settings);
if (result.Status != AudioGraphCreationStatus.Success)
{
// Cannot create graph
_rootPage.NotifyUser($"AudioGraph Creation Error because {result.Status.ToString()}", NotifyType.ErrorMessage);
return;
}
_graph = result.Graph;
// Create a device output node
CreateAudioDeviceOutputNodeResult deviceOutputNodeResult = await _graph.CreateDeviceOutputNodeAsync();
if (deviceOutputNodeResult.Status != AudioDeviceNodeCreationStatus.Success)
{
// Cannot create device output node
_rootPage.NotifyUser(
$"Audio Device Output unavailable because {deviceOutputNodeResult.Status.ToString()}", NotifyType.ErrorMessage);
speakerContainer.Background = new SolidColorBrush(Colors.Red);
}
_deviceOutputNode = deviceOutputNodeResult.DeviceOutputNode;
_rootPage.NotifyUser("Device Output Node successfully created", NotifyType.StatusMessage);
speakerContainer.Background = new SolidColorBrush(Colors.Green);
// Create the FrameInputNode at the same format as the graph, except explicitly set mono.
AudioEncodingProperties nodeEncodingProperties = _graph.EncodingProperties;
nodeEncodingProperties.ChannelCount = 1;
_frameInputNode = _graph.CreateFrameInputNode(nodeEncodingProperties);
_frameInputNode.AddOutgoingConnection(_deviceOutputNode);
frameContainer.Background = new SolidColorBrush(Colors.Green);
// Initialize the Frame Input Node in the stopped state
_frameInputNode.Stop();
// Hook up an event handler so we can start generating samples when needed
// This event is triggered when the node is required to provide data
_frameInputNode.QuantumStarted += node_QuantumStarted;
// Start the graph since we will only start/stop the frame input node
_graph.Start();
}
}
现在的挑战是,当波频率高于阈值时,如何连接事件?在这种情况下,我想计算射击次数、时间戳和强度
示例音
这是我的,正如你在这里所能看到的,当有大锤子敲击(每秒或更短时间)时,我想召集一个活动。回答“这是正确的方法吗”问题:不,这对解决问题没有帮助 音频状态监视器。告诉您系统是否正在回避您的声音,以便不会干扰其他内容。例如,它可以使音乐静音以支持电话铃声。SoundLevelChanged不会告诉您录制声音的音量或频率,而这正是您检测手拍所需的 正确的方法是使用听力图(或WASAPI,但不是从C#)将原始音频捕获到音频中,以处理信号,然后通过FFT检测目标频率和音量中的声音。演示如何使用AudioGraph,但不具体说明AudioFrameOutputNode 每次拍击的频率范围为2200Hz至2800Hz
识别枪声看起来要复杂得多,不同的枪有非常不同的特征。快速搜索发现了几篇关于这方面的研究论文,而不是琐碎的算法。我想你需要某种机器学习来分类这些。这里有一个讨论使用ML区分枪声和非枪声的前一个线程:您可以通过查找来自该帧的所有pcm数据的平均振幅来查找该帧的分贝。我相信您希望创建一个处理输入的图形,使其看起来像这样
private static event LoudNoise<double>;
private static int quantum = 0;
static AudioGraph ingraph;
private static AudioDeviceInputNode deviceInputNode;
private static AudioFrameOutputNode frameOutputNode;
public static async Task<bool> CreateInputDeviceNode(string deviceId)
{
Console.WriteLine("Creating AudioGraphs");
// Create an AudioGraph with default settings
AudioGraphSettings graphsettings = new AudioGraphSettings(AudioRenderCategory.Media);
graphsettings.EncodingProperties = new AudioEncodingProperties();
graphsettings.EncodingProperties.Subtype = "Float";
graphsettings.EncodingProperties.SampleRate = 48000;
graphsettings.EncodingProperties.ChannelCount = 2;
graphsettings.EncodingProperties.BitsPerSample = 32;
graphsettings.EncodingProperties.Bitrate = 3072000;
//settings.DesiredSamplesPerQuantum = 960;
//settings.QuantumSizeSelectionMode = QuantumSizeSelectionMode.ClosestToDesired;
CreateAudioGraphResult graphresult = await AudioGraph.CreateAsync(graphsettings);
if (graphresult.Status != AudioGraphCreationStatus.Success)
{
// Cannot create graph
return false;
}
ingraph = graphresult.Graph;AudioGraphSettings nodesettings = new AudioGraphSettings(AudioRenderCategory.GameChat);
nodesettings.EncodingProperties = AudioEncodingProperties.CreatePcm(48000, 2, 32);
nodesettings.DesiredSamplesPerQuantum = 960;
nodesettings.QuantumSizeSelectionMode = QuantumSizeSelectionMode.ClosestToDesired;
frameOutputNode = ingraph.CreateFrameOutputNode(ingraph.EncodingProperties);
quantum = 0;
ingraph.QuantumStarted += Graph_QuantumStarted;
DeviceInformation selectedDevice;
string device = Windows.Media.Devices.MediaDevice.GetDefaultAudioCaptureId(Windows.Media.Devices.AudioDeviceRole.Default);
if (!string.IsNullOrEmpty(device))
{
selectedDevice = await DeviceInformation.CreateFromIdAsync(device);
} else
{
return false;
}
CreateAudioDeviceInputNodeResult result =
await ingraph.CreateDeviceInputNodeAsync(MediaCategory.Media, nodesettings.EncodingProperties, selectedDevice);
if (result.Status != AudioDeviceNodeCreationStatus.Success)
{
// Cannot create device output node
return false;
}
deviceInputNode = result.DeviceInputNode;
deviceInputNode.AddOutgoingConnection(frameOutputNode);
frameOutputNode.Start();
ingraph.Start();
return true;
}
私有静态事件噪声;
私有静态int-quantum=0;
静态听力图;
专用静态音频设备输入节点设备输入节点;
私有静态音频frameOutputNode frameOutputNode;
公共静态异步任务CreateInputDeviceNode(字符串设备ID)
{
Console.WriteLine(“创建听力图”);
//使用默认设置创建听力图
AudioGraphSettings graphsettings=新的AudioGraphSettings(AudioRenderCategory.Media);
graphsettings.EncodingProperties=新的AudioEncodingProperties();
graphsettings.EncodingProperties.Subtype=“Float”;
graphsettings.EncodingProperties.SampleRate=48000;
graphsettings.EncodingProperties.ChannelCount=2;
graphsettings.EncodingProperties.BitsPerSample=32;
graphsettings.EncodingProperties.Bitrate=3072000;
//settings.DesiredSamplesPerQuantum=960;
//settings.QuantumSizeSelectionMode=QuantumSizeSelectionMode.ClosestToDesired;
CreateAudioGraphResult graphresult=等待AudioGraph.CreateAsync(graphsettings);
if(graphresult.Status!=AudioGraphCreationStatus.Success)
{
//无法创建图形
返回false;
}
ingraph=graphresult.Graph;AudioGraphSettings节点设置=新的AudioGraphSettings(AudioRenderCategory.GameChat);
nodesettings.EncodingProperties=AudioEncodingProperties.CreatePcm(48000,2,32);
nodesettings.DesiredSamplesPerQuantum=960;
nodesettings.QuantumSizeSelectionMode=QuantumSizeSelectionMode.ClosestToDesired;
frameOutputNode=ingraph.CreateFrameOutputNode(ingraph.EncodingProperties);
量子=0;
ingraph.QuantumStarted+=图形\u QuantumStarted;
所选设备的设备信息;
字符串设备=Windows.Media.Devices.MediaDevice.GetDefaultAudioCaptureId(Windows.Media.Devices.AudioDeviceRole.Default);
如果(!string.IsNullOrEmpty(设备))
{
selectedDevice=等待设备信息。CreateFromIdAsync(设备);
}否则
{
返回false;
}
CreateAudioDeviceInputNodeResult结果=
等待ingraph.CreateDeviceInputNodeAsync(MediaCategory.Media、nodesettings.EncodingProperties、selectedDevice);
if(result.Status!=AudioDeviceDecreationStatus.Success)
{
//无法创建设备输出节点
返回false;
}
deviceInputNode=result.deviceInputNode;
deviceInputNode.AddOutgoingConnection(frameOutputNode);
frameOutputNode.Start();
ingraph.Start();
返回true;
}
private static void Graph\u QuantumStarted(音频图发送器,对象参数)
{
如果(++量程%2==0)
{
AudioFrame=frameOutputNode.GetFrame();
浮动[]数据流入;
使用(AudioBuffer=frame.LockBuffer(AudioBufferAccessMode.Write))
使用(IMemoryBufferReference reference=buffer.CreateReference())
{
//从音频帧获取缓冲区
((IMemoryBufferByteAccess)引用).GetBuffer(out字节*dataInBytes,ou