Python PyAudio-将stream.read转换为int以获得振幅
我试图录制音频,同时打印录制信号的振幅。所以我将所有数据保存在stream.read中。但是当我试图打印它们时,我有一个字节字符串,没有整数。我想知道如何转换这些符号以获得振幅 这是我的代码:Python PyAudio-将stream.read转换为int以获得振幅,python,python-2.7,stream,pyaudio,Python,Python 2.7,Stream,Pyaudio,我试图录制音频,同时打印录制信号的振幅。所以我将所有数据保存在stream.read中。但是当我试图打印它们时,我有一个字节字符串,没有整数。我想知道如何转换这些符号以获得振幅 这是我的代码: import pyaudio import wave CHUNK = 1024 FORMAT = pyaudio.paInt16 CHANNELS = 1 RATE = 44100 RECORD_SECONDS = 5 WAVE_OUTPUT_FILENAME = "output.wav" p
import pyaudio
import wave
CHUNK = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 44100
RECORD_SECONDS = 5
WAVE_OUTPUT_FILENAME = "output.wav"
p = pyaudio.PyAudio()
stream = p.open(format=FORMAT,
channels=CHANNELS,
rate=RATE,
input=True,
frames_per_buffer=CHUNK)
print("* recording")
frames = []
for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
data = stream.read(CHUNK)
frames.append(data) # 2 bytes(16 bits) per channel
print("* done recording")
stream.stop_stream()
stream.close()
p.terminate()
for data in frames:
print(data)
这就是我得到的:
������������������������������������������������������������������
���������
���������������
%�� ��(��)����,����.����%����#��
�� �� �����������������������
PyAudio正在以字符串中的字节形式为您提供二进制编码的音频帧。有关如何打印帧的可读表示形式,请参阅此问题的答案:
您当然可以通过以下代码激励自己:
#!/usr/bin/python
# open a microphone in pyAudio and listen for taps
import pyaudio
import struct
import math
INITIAL_TAP_THRESHOLD = 0.010
FORMAT = pyaudio.paInt16
SHORT_NORMALIZE = (1.0/32768.0)
CHANNELS = 2
RATE = 44100
INPUT_BLOCK_TIME = 0.05
INPUT_FRAMES_PER_BLOCK = int(RATE*INPUT_BLOCK_TIME)
# if we get this many noisy blocks in a row, increase the threshold
OVERSENSITIVE = 15.0/INPUT_BLOCK_TIME
# if we get this many quiet blocks in a row, decrease the threshold
UNDERSENSITIVE = 120.0/INPUT_BLOCK_TIME
# if the noise was longer than this many blocks, it's not a 'tap'
MAX_TAP_BLOCKS = 0.15/INPUT_BLOCK_TIME
def get_rms( block ):
# RMS amplitude is defined as the square root of the
# mean over time of the square of the amplitude.
# so we need to convert this string of bytes into
# a string of 16-bit samples...
# we will get one short out for each
# two chars in the string.
count = len(block)/2
format = "%dh"%(count)
shorts = struct.unpack( format, block )
# iterate over the block.
sum_squares = 0.0
for sample in shorts:
# sample is a signed short in +/- 32768.
# normalize it to 1.0
n = sample * SHORT_NORMALIZE
sum_squares += n*n
return math.sqrt( sum_squares / count )
class TapTester(object):
def __init__(self):
self.pa = pyaudio.PyAudio()
self.stream = self.open_mic_stream()
self.tap_threshold = INITIAL_TAP_THRESHOLD
self.noisycount = MAX_TAP_BLOCKS+1
self.quietcount = 0
self.errorcount = 0
def stop(self):
self.stream.close()
def find_input_device(self):
device_index = None
for i in range( self.pa.get_device_count() ):
devinfo = self.pa.get_device_info_by_index(i)
print( "Device %d: %s"%(i,devinfo["name"]) )
for keyword in ["mic","input"]:
if keyword in devinfo["name"].lower():
print( "Found an input: device %d - %s"% (i,devinfo["name"]) )
device_index = i
return device_index
if device_index == None:
print( "No preferred input found; using default input device." )
return device_index
def open_mic_stream( self ):
device_index = self.find_input_device()
stream = self.pa.open( format = FORMAT,
channels = CHANNELS,
rate = RATE,
input = True,
input_device_index = device_index,
frames_per_buffer = INPUT_FRAMES_PER_BLOCK)
return stream
def tapDetected(self):
print "Tap!"
def listen(self):
try:
block = self.stream.read(INPUT_FRAMES_PER_BLOCK)
except IOError, e:
# dammit.
self.errorcount += 1
print( "(%d) Error recording: %s"%(self.errorcount,e) )
self.noisycount = 1
return
amplitude = get_rms( block )
if amplitude > self.tap_threshold:
# noisy block
self.quietcount = 0
self.noisycount += 1
if self.noisycount > OVERSENSITIVE:
# turn down the sensitivity
self.tap_threshold *= 1.1
else:
# quiet block.
if 1 <= self.noisycount <= MAX_TAP_BLOCKS:
self.tapDetected()
self.noisycount = 0
self.quietcount += 1
if self.quietcount > UNDERSENSITIVE:
# turn up the sensitivity
self.tap_threshold *= 0.9
if __name__ == "__main__":
tt = TapTester()
for i in range(1000):
tt.listen()
!/usr/bin/python
#在pyAudio中打开麦克风并聆听敲击声
导入pyaudio
导入结构
输入数学
初始_-TAP_阈值=0.010
格式=pyaudio.paInt16
SHORT_NORMALIZE=(1.0/32768.0)
通道=2
费率=44100
输入\块\时间=0.05
每个块的输入帧=int(速率*输入块时间)
#如果我们在一行中获得如此多的噪声块,请增加阈值
过敏=15.0/输入\块\时间
#如果我们连续获得这么多安静的块,请降低阈值
欠灵敏=120.0/输入\块\时间
#如果噪音超过这么多街区,那就不是“敲击声”
最大点击块=0.15/输入块时间
def get_rms(块):
#均方根振幅定义为
#振幅平方随时间的平均值。
#所以我们需要把这个字节串转换成
#一个由16位样本组成的字符串。。。
#我们每个人少一个
#字符串中有两个字符。
计数=长度(块)/2
format=“%dh”%(计数)
shorts=结构解包(格式、块)
#在块上迭代。
平方和=0.0
对于短裤样品:
#样本为+/-32768中的有符号短路。
#将其标准化为1.0
n=样本*短\标准化
平方和+=n*n
返回数学.sqrt(平方和/计数)
类别测试仪(对象):
定义初始化(自):
self.pa=pyaudio.pyaudio()
self.stream=self.open\u mic\u stream()
self.tap\u threshold=初始\u tap\u threshold
self.noisycount=最大抽头块数+1
self.quietcount=0
self.errorcount=0
def停止(自):
self.stream.close()
def查找输入设备(自身):
设备索引=无
对于范围内的i(self.pa.get_device_count()):
devinfo=self.pa.get\u设备\u信息\u索引(i)
打印(“设备%d:%s%”(i,设备信息[“名称]))
对于[“麦克风”、“输入”]中的关键字:
devinfo[“name”]中的if关键字。lower()
打印(“找到输入:设备%d-%s”%(i,设备信息[“名称”]))
设备索引=i
返回设备索引
如果设备索引==无:
打印(“未找到首选输入;使用默认输入设备。”)
返回设备索引
def open_mic_流(自):
设备索引=self.find\u输入设备()
stream=self.pa.open(format=format,
频道=频道,
比率=比率,
输入=真,
输入设备索引=设备索引,
每个缓冲区的帧数=每个块的输入帧数)
回流
检测到def TAP(自身):
打印“点击!”
def监听(self):
尝试:
block=self.stream.read(每个块输入帧)
除IOError外,e:
#该死。
self.errorcount+=1
打印((%d)错误记录:%s“%(self.errorcount,e))
self.noisycount=1
返回
振幅=获取均方根值(块)
如果振幅>自拍阈值:
#噪声块
self.quietcount=0
自噪声计数+=1
如果self.noisycount>过敏感:
#把灵敏度调低
self.tap_阈值*=1.1
其他:
#安静的街区。
如果我认为你能做到这一点
data = stream.read(CHUNK)
for each in data:
print(each)
我猜这个问题很老了,我一直在寻找其他答案,但在我的项目中,我使用了类似的东西
#Lets assume the constants are defined somewhere
import struct
import pyaudio
import numpy as np
self.input = pyaudio.PyAudio().open(
format=pyaudio.paInt16,
channels=1,
rate=44100,
input=True,
output=False,
frames_per_buffer=1024,
)
wf_data = self.input.read(self.CHUNK)
wf_data = struct.unpack(str(self.CHUNK) + 'h', wf_data)
wf_data = np.array(wf_data)
paInt16和“h”对应。您可以在此处找出与pyaudio格式匹配的字母。
归功于:
处理音频时,您可能需要信号缓冲区的RMS(均方根)值。我相信它可以更好地“查看”音频信号的整体功率
python标准库作为一个名为audioop的模块,该模块有一个名为rms的函数
import pyaudio
import time
import audioop
def get_rms():
# Creates a generator that can iterate rms values
CHUNK = 8
WIDTH = 2
CHANNELS = 1
RATE = 44100
p = pyaudio.PyAudio()
try:
stream = p.open(format=p.get_format_from_width(WIDTH),
channels=CHANNELS,
rate=RATE,
input=True,
output=False,
frames_per_buffer=CHUNK)
# wait a second to allow the stream to be setup
time.sleep(1)
while True:
# read the data
data = stream.read(CHUNK, exception_on_overflow = False)
rms = audioop.rms(data, 1)
yield rms_scaled
finally:
p.terminate()
stream.stop_stream()
stream.close()
您可以像这样使用函数
rms_values = get_rms()
for rms in rms_values:
print(rms)
谢谢你的回答。我刚刚添加了一行“decoded=numpy.fromstring(data,'Float32');”在我的for循环中,但结果不是结论。我得到了一个非常小的数字列表,如:3.67348991e-40 6.42851276e-40 3.67355998e-40 6.42868091e-40 2.75502285e-40 1.10201895e-39 nan 4.59204105e-40 1.19389508e-39 1.37756747e-39您需要为数据使用正确的格式。请尝试decoded=numpy.fromstring(数据,dtype=numpy.int16)
。我建议numpy.int16
,因为您似乎已将流定义为由16位整数样本组成。如果您想尝试不同的样本格式,以下是numpy支持的样本格式列表:感谢您的回答。我刚刚添加了类get\u rms,并将值保存在列表中,一切都很好。I g
rms_values = get_rms()
for rms in rms_values:
print(rms)