Python PyAudio-将stream.read转换为int以获得振幅_Python_Python 2.7_Stream_Pyaudio

Python PyAudio-将stream.read转换为int以获得振幅

python python-2.7 stream

Python PyAudio-将stream.read转换为int以获得振幅,python,python-2.7,stream,pyaudio,Python,Python 2.7,Stream,Pyaudio,我试图录制音频，同时打印录制信号的振幅。所以我将所有数据保存在stream.read中。但是当我试图打印它们时，我有一个字节字符串，没有整数。我想知道如何转换这些符号以获得振幅这是我的代码： import pyaudio import wave CHUNK = 1024 FORMAT = pyaudio.paInt16 CHANNELS = 1 RATE = 44100 RECORD_SECONDS = 5 WAVE_OUTPUT_FILENAME = "output.wav" p

我试图录制音频，同时打印录制信号的振幅。所以我将所有数据保存在stream.read中。但是当我试图打印它们时，我有一个字节字符串，没有整数。我想知道如何转换这些符号以获得振幅

这是我的代码：

import pyaudio
import wave

CHUNK = 1024 
FORMAT = pyaudio.paInt16
CHANNELS = 1 
RATE = 44100 
RECORD_SECONDS = 5
WAVE_OUTPUT_FILENAME = "output.wav"

p = pyaudio.PyAudio()

stream = p.open(format=FORMAT,
                channels=CHANNELS,
                rate=RATE,
                input=True,
                frames_per_buffer=CHUNK) 

print("* recording")

frames = []

for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
    data = stream.read(CHUNK)
    frames.append(data) # 2 bytes(16 bits) per channel

print("* done recording")

stream.stop_stream()
stream.close()
p.terminate()

for data in frames:
    print(data)

这就是我得到的：

��

%�� (��)��,��.��%��#��

PyAudio正在以字符串中的字节形式为您提供二进制编码的音频帧。有关如何打印帧的可读表示形式，请参阅此问题的答案：

您当然可以通过以下代码激励自己：

#!/usr/bin/python

# open a microphone in pyAudio and listen for taps

import pyaudio
import struct
import math

INITIAL_TAP_THRESHOLD = 0.010
FORMAT = pyaudio.paInt16 
SHORT_NORMALIZE = (1.0/32768.0)
CHANNELS = 2
RATE = 44100  
INPUT_BLOCK_TIME = 0.05
INPUT_FRAMES_PER_BLOCK = int(RATE*INPUT_BLOCK_TIME)
# if we get this many noisy blocks in a row, increase the threshold
OVERSENSITIVE = 15.0/INPUT_BLOCK_TIME                    
# if we get this many quiet blocks in a row, decrease the threshold
UNDERSENSITIVE = 120.0/INPUT_BLOCK_TIME 
# if the noise was longer than this many blocks, it's not a 'tap'
MAX_TAP_BLOCKS = 0.15/INPUT_BLOCK_TIME

def get_rms( block ):
    # RMS amplitude is defined as the square root of the 
    # mean over time of the square of the amplitude.
    # so we need to convert this string of bytes into 
    # a string of 16-bit samples...

# we will get one short out for each 
# two chars in the string.
count = len(block)/2
format = "%dh"%(count)
shorts = struct.unpack( format, block )

# iterate over the block.
    sum_squares = 0.0
    for sample in shorts:
        # sample is a signed short in +/- 32768. 
        # normalize it to 1.0
        n = sample * SHORT_NORMALIZE
        sum_squares += n*n

    return math.sqrt( sum_squares / count )

class TapTester(object):
    def __init__(self):
        self.pa = pyaudio.PyAudio()
        self.stream = self.open_mic_stream()
        self.tap_threshold = INITIAL_TAP_THRESHOLD
        self.noisycount = MAX_TAP_BLOCKS+1 
        self.quietcount = 0 
        self.errorcount = 0

    def stop(self):
        self.stream.close()

    def find_input_device(self):
        device_index = None            
        for i in range( self.pa.get_device_count() ):     
            devinfo = self.pa.get_device_info_by_index(i)   
            print( "Device %d: %s"%(i,devinfo["name"]) )

            for keyword in ["mic","input"]:
                if keyword in devinfo["name"].lower():
                    print( "Found an input: device %d - %s"%        (i,devinfo["name"]) )
                    device_index = i
                    return device_index

    if device_index == None:
        print( "No preferred input found; using default input device." )

    return device_index

def open_mic_stream( self ):
    device_index = self.find_input_device()

    stream = self.pa.open(   format = FORMAT,
                             channels = CHANNELS,
                             rate = RATE,
                             input = True,
                             input_device_index = device_index,
                             frames_per_buffer = INPUT_FRAMES_PER_BLOCK)

    return stream

def tapDetected(self):
    print "Tap!"

def listen(self):
    try:
        block = self.stream.read(INPUT_FRAMES_PER_BLOCK)
    except IOError, e:
        # dammit. 
        self.errorcount += 1
        print( "(%d) Error recording: %s"%(self.errorcount,e) )
        self.noisycount = 1
        return

    amplitude = get_rms( block )
    if amplitude > self.tap_threshold:
        # noisy block
        self.quietcount = 0
        self.noisycount += 1
        if self.noisycount > OVERSENSITIVE:
            # turn down the sensitivity
            self.tap_threshold *= 1.1
    else:            
        # quiet block.

        if 1 <= self.noisycount <= MAX_TAP_BLOCKS:
            self.tapDetected()
        self.noisycount = 0
        self.quietcount += 1
        if self.quietcount > UNDERSENSITIVE:
            # turn up the sensitivity
            self.tap_threshold *= 0.9

if __name__ == "__main__":
tt = TapTester()

for i in range(1000):
    tt.listen()

！/usr/bin/python
#在pyAudio中打开麦克风并聆听敲击声
导入pyaudio
导入结构
输入数学
初始_-TAP_阈值=0.010
格式=pyaudio.paInt16
SHORT_NORMALIZE=（1.0/32768.0）
通道=2
费率=44100
输入\块\时间=0.05
每个块的输入帧=int（速率*输入块时间）
#如果我们在一行中获得如此多的噪声块，请增加阈值
过敏=15.0/输入\块\时间
#如果我们连续获得这么多安静的块，请降低阈值
欠灵敏=120.0/输入\块\时间
#如果噪音超过这么多街区，那就不是“敲击声”
最大点击块=0.15/输入块时间
def get_rms（块）：
#均方根振幅定义为
#振幅平方随时间的平均值。
#所以我们需要把这个字节串转换成
#一个由16位样本组成的字符串。。。
#我们每个人少一个
#字符串中有两个字符。
计数=长度（块）/2
format=“%dh”%（计数）
shorts=结构解包（格式、块）
#在块上迭代。
平方和=0.0
对于短裤样品：
#样本为+/-32768中的有符号短路。
#将其标准化为1.0
n=样本*短\标准化
平方和+=n*n
返回数学.sqrt（平方和/计数）
类别测试仪（对象）：
定义初始化（自）：
self.pa=pyaudio.pyaudio（）
self.stream=self.open\u mic\u stream（）
self.tap\u threshold=初始\u tap\u threshold
self.noisycount=最大抽头块数+1
self.quietcount=0
self.errorcount=0
def停止（自）：
self.stream.close（）
def查找输入设备（自身）：
设备索引=无
对于范围内的i（self.pa.get_device_count（））：
devinfo=self.pa.get\u设备\u信息\u索引（i）
打印（“设备%d:%s%”（i，设备信息[“名称]））
对于[“麦克风”、“输入”]中的关键字：
devinfo[“name”]中的if关键字。lower（）
打印（“找到输入：设备%d-%s”%（i，设备信息[“名称”]））
设备索引=i
返回设备索引
如果设备索引==无：
打印（“未找到首选输入；使用默认输入设备。”）
返回设备索引
def open_mic_流（自）：
设备索引=self.find\u输入设备（）
stream=self.pa.open（format=format，
频道=频道，
比率=比率，
输入=真，
输入设备索引=设备索引，
每个缓冲区的帧数=每个块的输入帧数）
回流
检测到def TAP（自身）：
打印“点击！”
def监听（self）：
尝试：
block=self.stream.read（每个块输入帧）
除IOError外，e：
#该死。
self.errorcount+=1
打印（（%d）错误记录：%s“%（self.errorcount，e））
self.noisycount=1
返回
振幅=获取均方根值（块）
如果振幅>自拍阈值：
#噪声块
self.quietcount=0
自噪声计数+=1
如果self.noisycount>过敏感：
#把灵敏度调低
self.tap_阈值*=1.1
其他：
#安静的街区。
如果我认为你能做到这一点
data = stream.read(CHUNK)
for each in data:
    print(each)

我猜这个问题很老了，我一直在寻找其他答案，但在我的项目中，我使用了类似的东西
#Lets assume the constants are defined somewhere

import struct
import pyaudio
import numpy as np

self.input = pyaudio.PyAudio().open(
            format=pyaudio.paInt16,
            channels=1,
            rate=44100,
            input=True,
            output=False,
            frames_per_buffer=1024,
)
wf_data = self.input.read(self.CHUNK)
wf_data = struct.unpack(str(self.CHUNK) + 'h', wf_data)
wf_data = np.array(wf_data)

paInt16和“h”对应。您可以在此处找出与pyaudio格式匹配的字母。

归功于：
处理音频时，您可能需要信号缓冲区的RMS（均方根）值。我相信它可以更好地“查看”音频信号的整体功率
python标准库作为一个名为audioop的模块，该模块有一个名为rms的函数
import pyaudio
import time
import audioop

def get_rms():
    # Creates a generator that can iterate rms values
    CHUNK = 8
    WIDTH = 2
    CHANNELS = 1
    RATE = 44100

    p = pyaudio.PyAudio()

    try:
        stream = p.open(format=p.get_format_from_width(WIDTH),
                        channels=CHANNELS,
                        rate=RATE,
                        input=True,
                        output=False,
                        frames_per_buffer=CHUNK)
        # wait a second to allow the stream to be setup
        time.sleep(1)
        while True:
            # read the data
            data = stream.read(CHUNK, exception_on_overflow = False)
            rms = audioop.rms(data, 1)
            yield rms_scaled
    finally:
        p.terminate()
        stream.stop_stream()
        stream.close()

您可以像这样使用函数
rms_values = get_rms()
for rms in rms_values:
    print(rms)

谢谢你的回答。我刚刚添加了一行“decoded=numpy.fromstring（data，'Float32'）；”在我的for循环中，但结果不是结论。我得到了一个非常小的数字列表，如：3.67348991e-40 6.42851276e-40 3.67355998e-40 6.42868091e-40 2.75502285e-40 1.10201895e-39 nan 4.59204105e-40 1.19389508e-39 1.37756747e-39您需要为数据使用正确的格式。请尝试decoded=numpy.fromstring（数据，dtype=numpy.int16）
。我建议numpy.int16，因为您似乎已将流定义为由16位整数样本组成。如果您想尝试不同的样本格式，以下是numpy支持的样本格式列表：感谢您的回答。我刚刚添加了类get\u rms，并将值保存在列表中，一切都很好。I g
rms_values = get_rms()
for rms in rms_values:
    print(rms)