Warning: file_get_contents(/data/phpspider/zhask/data//catemap/2/tensorflow/5.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
Python 通过现场话筒的pyaudio检测抽头_Python_Microphone_Pyaudio - Fatal编程技术网

Python 通过现场话筒的pyaudio检测抽头

Python 通过现场话筒的pyaudio检测抽头,python,microphone,pyaudio,Python,Microphone,Pyaudio,我如何使用pyaudio来检测来自现场麦克风的突然敲击噪音?我这样做的一种方法: 一次读取一块样本, 比如说0.05秒 计算 块的均方根振幅(平方 平方的平均根 (个别样本) 如果块的均方根振幅大于阈值,则为“噪声块”,否则为“安静块” 突然的敲击声将是一个安静的街区,接着是少量嘈杂的街区,接着是一个安静的街区 如果你从来没有得到一个安静的街区,你的门槛太低了 如果您从未遇到过嘈杂的块,则阈值太高 我的应用程序在无人值守的情况下记录“有趣的”噪音,所以只要有噪音块,它就会记录。如果有15秒的

我如何使用pyaudio来检测来自现场麦克风的突然敲击噪音?

我这样做的一种方法:

  • 一次读取一块样本, 比如说0.05秒
  • 计算 块的均方根振幅(平方 平方的平均根 (个别样本)
  • 如果块的均方根振幅大于阈值,则为“噪声块”,否则为“安静块”
  • 突然的敲击声将是一个安静的街区,接着是少量嘈杂的街区,接着是一个安静的街区
  • 如果你从来没有得到一个安静的街区,你的门槛太低了
  • 如果您从未遇到过嘈杂的块,则阈值太高
我的应用程序在无人值守的情况下记录“有趣的”噪音,所以只要有噪音块,它就会记录。如果有15秒的噪音周期(“捂住耳朵”),它会将阈值乘以1.1;如果有15分钟的安静周期(“更努力地倾听”),它会将阈值乘以0.9。您的应用程序将有不同的需求

另外,我注意到我的代码中有一些关于观察到的RMS值的注释。在Macbook Pro上的内置话筒上,音频数据范围为+/-1.0,输入音量设置为最大,一些数据点:

  • 0.003-0.006(-50分贝到-44分贝)我家里一台吵闹的中央暖气风扇
  • 0.010-0.40(-40dB到-8dB)在同一台笔记本电脑上打字
  • 0.10(-20dB)在1'距离处轻轻敲击手指
  • 0.60(-4.4dB)在1'处大声敲击手指
更新:这里有一个示例让您开始学习

#!/usr/bin/python

# open a microphone in pyAudio and listen for taps

import pyaudio
import struct
import math

INITIAL_TAP_THRESHOLD = 0.010
FORMAT = pyaudio.paInt16 
SHORT_NORMALIZE = (1.0/32768.0)
CHANNELS = 2
RATE = 44100  
INPUT_BLOCK_TIME = 0.05
INPUT_FRAMES_PER_BLOCK = int(RATE*INPUT_BLOCK_TIME)
# if we get this many noisy blocks in a row, increase the threshold
OVERSENSITIVE = 15.0/INPUT_BLOCK_TIME                    
# if we get this many quiet blocks in a row, decrease the threshold
UNDERSENSITIVE = 120.0/INPUT_BLOCK_TIME 
# if the noise was longer than this many blocks, it's not a 'tap'
MAX_TAP_BLOCKS = 0.15/INPUT_BLOCK_TIME

def get_rms( block ):
    # RMS amplitude is defined as the square root of the 
    # mean over time of the square of the amplitude.
    # so we need to convert this string of bytes into 
    # a string of 16-bit samples...

    # we will get one short out for each 
    # two chars in the string.
    count = len(block)/2
    format = "%dh"%(count)
    shorts = struct.unpack( format, block )

    # iterate over the block.
    sum_squares = 0.0
    for sample in shorts:
        # sample is a signed short in +/- 32768. 
        # normalize it to 1.0
        n = sample * SHORT_NORMALIZE
        sum_squares += n*n

    return math.sqrt( sum_squares / count )

class TapTester(object):
    def __init__(self):
        self.pa = pyaudio.PyAudio()
        self.stream = self.open_mic_stream()
        self.tap_threshold = INITIAL_TAP_THRESHOLD
        self.noisycount = MAX_TAP_BLOCKS+1 
        self.quietcount = 0 
        self.errorcount = 0

    def stop(self):
        self.stream.close()

    def find_input_device(self):
        device_index = None            
        for i in range( self.pa.get_device_count() ):     
            devinfo = self.pa.get_device_info_by_index(i)   
            print( "Device %d: %s"%(i,devinfo["name"]) )

            for keyword in ["mic","input"]:
                if keyword in devinfo["name"].lower():
                    print( "Found an input: device %d - %s"%(i,devinfo["name"]) )
                    device_index = i
                    return device_index

        if device_index == None:
            print( "No preferred input found; using default input device." )

        return device_index

    def open_mic_stream( self ):
        device_index = self.find_input_device()

        stream = self.pa.open(   format = FORMAT,
                                 channels = CHANNELS,
                                 rate = RATE,
                                 input = True,
                                 input_device_index = device_index,
                                 frames_per_buffer = INPUT_FRAMES_PER_BLOCK)

        return stream

    def tapDetected(self):
        print("Tap!")

    def listen(self):
        try:
            block = self.stream.read(INPUT_FRAMES_PER_BLOCK)
        except IOError as e:
            # dammit. 
            self.errorcount += 1
            print( "(%d) Error recording: %s"%(self.errorcount,e) )
            self.noisycount = 1
            return

        amplitude = get_rms( block )
        if amplitude > self.tap_threshold:
            # noisy block
            self.quietcount = 0
            self.noisycount += 1
            if self.noisycount > OVERSENSITIVE:
                # turn down the sensitivity
                self.tap_threshold *= 1.1
        else:            
            # quiet block.

            if 1 <= self.noisycount <= MAX_TAP_BLOCKS:
                self.tapDetected()
            self.noisycount = 0
            self.quietcount += 1
            if self.quietcount > UNDERSENSITIVE:
                # turn up the sensitivity
                self.tap_threshold *= 0.9

if __name__ == "__main__":
    tt = TapTester()

    for i in range(1000):
        tt.listen()
#/usr/bin/python
#在pyAudio中打开麦克风并聆听敲击声
导入pyaudio
导入结构
输入数学
初始_-TAP_阈值=0.010
格式=pyaudio.paInt16
SHORT_NORMALIZE=(1.0/32768.0)
通道=2
费率=44100
输入\块\时间=0.05
每个块的输入帧=int(速率*输入块时间)
#如果我们在一行中获得如此多的噪声块,请增加阈值
过敏=15.0/输入\块\时间
#如果我们连续获得这么多安静的块,请降低阈值
欠灵敏=120.0/输入\块\时间
#如果噪音超过这么多街区,那就不是“敲击声”
最大点击块=0.15/输入块时间
def get_rms(块):
#均方根振幅定义为
#振幅平方随时间的平均值。
#所以我们需要把这个字节串转换成
#一个由16位样本组成的字符串。。。
#我们每个人少一个
#字符串中有两个字符。
计数=长度(块)/2
format=“%dh”%(计数)
shorts=结构解包(格式、块)
#在块上迭代。
平方和=0.0
对于短裤样品:
#样本为+/-32768中的有符号短路。
#将其标准化为1.0
n=样本*短\标准化
平方和+=n*n
返回数学.sqrt(平方和/计数)
类别测试仪(对象):
定义初始化(自):
self.pa=pyaudio.pyaudio()
self.stream=self.open\u mic\u stream()
self.tap\u threshold=初始\u tap\u threshold
self.noisycount=最大抽头块数+1
self.quietcount=0
self.errorcount=0
def停止(自):
self.stream.close()
def查找输入设备(自身):
设备索引=无
对于范围内的i(self.pa.get_device_count()):
devinfo=self.pa.get\u设备\u信息\u索引(i)
打印(“设备%d:%s%”(i,设备信息[“名称]))
对于[“麦克风”、“输入”]中的关键字:
devinfo[“name”]中的if关键字。lower()
打印(“找到输入:设备%d-%s”%(i,设备信息[“名称”]))
设备索引=i
返回设备索引
如果设备索引==无:
打印(“未找到首选输入;使用默认输入设备。”)
返回设备索引
def open_mic_流(自):
设备索引=self.find\u输入设备()
stream=self.pa.open(format=format,
频道=频道,
比率=比率,
输入=真,
输入设备索引=设备索引,
每个缓冲区的帧数=每个块的输入帧数)
回流
检测到def TAP(自身):
打印(“点击!”)
def监听(self):
尝试:
block=self.stream.read(每个块输入帧)
除IOE错误外:
#该死。
self.errorcount+=1
打印((%d)错误记录:%s“%(self.errorcount,e))
self.noisycount=1
返回
振幅=获取均方根值(块)
如果振幅>自拍阈值:
#噪声块
self.quietcount=0
自噪声计数+=1
如果self.noisycount>过敏感:
#把灵敏度调低
self.tap_阈值*=1.1
其他:
#安静的街区。

如果1上述代码的简化版本

import pyaudio
import struct
import math

INITIAL_TAP_THRESHOLD = 0.010
FORMAT = pyaudio.paInt16 
SHORT_NORMALIZE = (1.0/32768.0)
CHANNELS = 2
RATE = 44100  
INPUT_BLOCK_TIME = 0.05
INPUT_FRAMES_PER_BLOCK = int(RATE*INPUT_BLOCK_TIME)

OVERSENSITIVE = 15.0/INPUT_BLOCK_TIME                    

UNDERSENSITIVE = 120.0/INPUT_BLOCK_TIME # if we get this many quiet blocks in a row, decrease the threshold

MAX_TAP_BLOCKS = 0.15/INPUT_BLOCK_TIME # if the noise was longer than this many blocks, it's not a 'tap'

def get_rms(block):

    # RMS amplitude is defined as the square root of the 
    # mean over time of the square of the amplitude.
    # so we need to convert this string of bytes into 
    # a string of 16-bit samples...

    # we will get one short out for each 
    # two chars in the string.
    count = len(block)/2
    format = "%dh"%(count)
    shorts = struct.unpack( format, block )

    # iterate over the block.
    sum_squares = 0.0
    for sample in shorts:
    # sample is a signed short in +/- 32768. 
    # normalize it to 1.0
        n = sample * SHORT_NORMALIZE
        sum_squares += n*n

    return math.sqrt( sum_squares / count )

pa = pyaudio.PyAudio()                                 #]
                                                       #|
stream = pa.open(format = FORMAT,                      #|
         channels = CHANNELS,                          #|---- You always use this in pyaudio...
         rate = RATE,                                  #|
         input = True,                                 #|
         frames_per_buffer = INPUT_FRAMES_PER_BLOCK)   #]

tap_threshold = INITIAL_TAP_THRESHOLD                  #]
noisycount = MAX_TAP_BLOCKS+1                          #|---- Variables for noise detector...
quietcount = 0                                         #|
errorcount = 0                                         #]         

for i in range(1000):
    try:                                                    #]
        block = stream.read(INPUT_FRAMES_PER_BLOCK)         #|
    except IOError, e:                                      #|---- just in case there is an error!
        errorcount += 1                                     #|
        print( "(%d) Error recording: %s"%(errorcount,e) )  #|
        noisycount = 1                                      #]

    amplitude = get_rms(block)
    if amplitude > tap_threshold: # if its to loud...
        quietcount = 0
        noisycount += 1
        if noisycount > OVERSENSITIVE:
            tap_threshold *= 1.1 # turn down the sensitivity

    else: # if its to quiet...

        if 1 <= noisycount <= MAX_TAP_BLOCKS:
            print 'tap!'
        noisycount = 0
        quietcount += 1
        if quietcount > UNDERSENSITIVE:
            tap_threshold *= 0.9 # turn up the sensitivity
导入pyaudio
导入结构
输入数学
初始_-TAP_阈值=0.010
格式=pyaudio.paInt16
SHORT_NORMALIZE=(1.0/32768.0)
通道=2
费率=44100
输入\块\时间=0.05
每个块的输入帧=int(速率*输入块时间)
过敏=15.0/输入\块\时间
UNDERSENSITIVE=120.0/INPUT_BLOCK_TIME#如果我们连续获得这么多安静块,请降低阈值
MAX_TAP_BLOCKS=0.15/INPUT_BLOCK_TIME#如果噪声比这么多块长,则不是“TAP”
def get_rms(块):
#均方根振幅定义为
#振幅平方随时间的平均值。
#所以我们需要把这个字节串转换成
#一个由16位样本组成的字符串。。。
#我们每个人少一个
#字符串中有两个字符。
计数=长度(块)/2
format=“%dh”%(计数)
短路=结构拆包(f