2017-05-18 34 views
4

我似乎已經確定我可以信任的工具的問題...我可以信任哪一種工具?

我一直在測試的工具是Librosa和Kaldi在的audio file的40個濾波器能 地塊的可視化數據集的創建。

使用卡爾迪中的這些配置來提取濾波器組能量。

fbank.conf

--htk-compat=false 
--window-type=hamming 
--sample-frequency=16000 
--num-mel-bins=40 
--use-log-fbank=true 

提取的數據是使用librosa情節作圖。 Librosa利用matplotlibpcolormesh,這意味着不應該有任何區別,除了librosa提供了一個更容易使用的API。

print static.shape 
print type(static) 
print np.min(static) 
print np.max(static) 
fig = plt.figure() 
librosa.display.specshow(static.T,sr=16000,x_axis='frames',y_axis='mel',hop_length=160,cmap=cm.jet) 
#plt.axis('off') 
plt.title("log mel power spectrum of " + name) 
plt.colorbar(format='%+02.0f dB') 
plt.tight_layout() 
plt.savefig(plot+"/"+name+"_plot_static_conv.png") 
plt.show() 

輸出:

(474, 40) 
<type 'numpy.ndarray'> 
-1.828067 
22.70058 
Got bus address: "unix:abstract=/tmp/dbus-aYbBS1JWyw,guid=17dd413abcda54272e1d93d159174cdf" 
Connected to accessibility bus at: "unix:abstract=/tmp/dbus-aYbBS1JWyw,guid=17dd413abcda54272e1d93d159174cdf" 
Registered DEC: true 
Registered event listener change listener: true 

enter image description here

類似的情節在Librosa創建爲這樣:

audio_path="../../../../Dropbox/SI1392.wav" 
#audio_path = librosa.util.example_audio_file() 
print "Example audio found" 
y, sr = librosa.load(audio_path) 
print "Example audio loaded" 
specto = librosa.feature.melspectrogram(y, sr=sr, n_fft=400, hop_length=160, n_mels=40) 
print "Example audio spectogram" 
log_specto = librosa.core.logamplitude(specto) 

print "min and max" 
print np.min(log_specto) 
print np.max(log_specto) 
print "Example audio log specto" 

plt.figure(figsize=(12,4)) 
librosa.display.specshow(log_specto,sr=sr,x_axis='frames', y_axis='mel', hop_length=160,cmap=cm.jet) 

plt.title('mel power spectrogram') 

plt.colorbar(format='%+02.0f dB') 

plt.tight_layout() 
print "See" 

print specto.shape 

print log_specto.shape 
plt.show() 

輸出該:

libraries loaded! 
Example audio found 
Example audio loaded 
Example audio spectogram 
min and max 
-84.6796661558 
-4.67966615584 
Example audio log specto 
See 
(40, 657) 
(40, 657) 

enter image description here

儘管有顏色,但兩者都顯示類似的圖,但能量範圍似乎有點不同。

Kaldi有-1.828067/22.70058

最小/ MAX和Librosa具有最小/最大-84.6796661558/-4.67966615584

問題是我想保存這些地塊作爲numpy的陣列,用於進一步處理。

這似乎創造一個不同的情節.. 使用Librosa數據,我創建的情節一樣:

plt.figure() 
min_max_scaled_log_specto = min_max_scaler.fit_transform(log_specto) 
convert = plt.get_cmap(cm.jet) 
numpy_static = convert(min_max_scaled_log_specto) 
plt.imshow(np.flipud(log_specto), aspect='auto') 
plt.colorbar() 
print "Sooo?" 
plt.show() 

enter image description here

這是完美的......它類似於原始數據集..

但隨着Kaldi我從這個代碼這個情節:

convert = plt.get_cmap(cm.jet) 
numpy_output_static = convert(np.flipud(static.T)) 
plt.imshow(numpy_output_static,aspect = 'auto') 
plt.show() 
raw_input("sadas") 

enter image description here

我從以前的帖子,對於紅色發生的歷史的原因可能是由於範圍和標準化前,將有助於發現 - 但這導致此:

min_max_scaler = preprocessing.MinMaxScaler(feature_range=(0,1)) 
convert = plt.get_cmap(cm.jet) 
numpy_output_static = convert(min_max_scaler.fit_transform(np.flipud(static.T))) 
plt.imshow(numpy_output_static,aspect = 'auto') 
plt.show() 

enter image description here

但是,這絕不可能與Kaldi陰謀的原始陰謀有關...那麼,爲什麼它看起來像這樣?爲什麼我能夠用從Librosa提取的能量而不是從Kaldi提取的能量來繪製它?對於Librosa

最小工作示例:與kaldi

# 
# Minimal example of Librosa plot example. 
# Made for testing the plot, and test for accurat 
# Conversion between the two parts. 
# 

import os 
import sys 
from os import listdir 
from os.path import isfile, join 
import numpy as np 
import matplotlib 
matplotlib.use('TkAgg') 
import matplotlib.pyplot as plt 
from mpl_toolkits.mplot3d import Axes3D 
from matplotlib.colors import Normalize 
import matplotlib 
from PIL import Image 
import librosa 
import colormaps as cmaps 
import librosa.display 
import ast 
from scipy.misc import toimage 
from matplotlib import cm 
from sklearn import preprocessing 

print "libraries loaded!" 
min_max_scaler = preprocessing.MinMaxScaler(feature_range=(0,1)) 

audio_path="../../../../Dropbox/SI1392.wav" 
#audio_path = librosa.util.example_audio_file() 
print "Example audio found" 
y, sr = librosa.load(audio_path) 
print "Example audio loaded" 
specto = librosa.feature.melspectrogram(y, sr=sr, n_fft=400, hop_length=160, n_mels=40) 
print "Example audio spectogram" 
log_specto = librosa.core.logamplitude(specto) 

print "min and max" 
print np.min(log_specto) 
print np.max(log_specto) 
print "Example audio log specto" 

plt.figure(figsize=(12,4)) 
librosa.display.specshow(log_specto,sr=sr,x_axis='frames', y_axis='mel', hop_length=160,cmap=cm.jet) 

plt.title('mel power spectrogram') 

plt.colorbar(format='%+02.0f dB') 

plt.tight_layout() 
print "See" 
#plt.show() 

print specto.shape 

print log_specto.shape 

plt.figure() 
min_max_scaled_log_specto = min_max_scaler.fit_transform(log_specto) 
convert = plt.get_cmap(cm.jet) 
numpy_static = convert(min_max_scaled_log_specto) 
plt.imshow(np.flipud(log_specto), aspect='auto') 
plt.colorbar() 
print "Sooo?" 
plt.show() 

最小工作示例 - (真實數據):

# 
# Extracted version: 
# 
# 
# 

import numpy as np 
import matplotlib.pyplot as plt 
import matplotlib 
from PIL import Image 
import librosa 
import librosa.display 
from matplotlib import cm 
from sklearn import preprocessing 
import ast 
import urllib 
import os 
import sys 
from os import listdir 
from os.path import isfile, join 

min_max_scaler = preprocessing.MinMaxScaler(feature_range=(0,1)) 

def make_plot_store_data(name,interweaved,static,delta,delta_delta,isTrain,isTest,isDev): 

    print static.shape 
    print type(static) 
    print np.min(static) 
    print np.max(static) 
    fig = plt.figure() 

    librosa.display.specshow(static.T,sr=16000,x_axis='frames',y_axis='mel',hop_length=160,cmap=cm.jet) 
    #plt.axis('off') 
    plt.title("log mel power spectrum of " + name) 
    plt.colorbar(format='%+02.0f dB') 
    plt.tight_layout() 
    #plt.show() 
    #plt.close() 
    #raw_input("asd") 

    if isTrain == True: 
     plt.figure() 
     convert = plt.get_cmap(cm.jet) 
     numpy_output_static = convert(min_max_scaler.fit_transform(np.flipud(static.T))) 
     plt.imshow(numpy_output_static,aspect = 'auto') 
     plt.show() 
     raw_input("sadas") 

link = "https://gist.githubusercontent.com/Miail/51311b34f5e5333bbddf9cb17c737ea4/raw/786b72477190023e93b9dd0cbbb43284ab59921b/feature.txt" 
f = urllib.urlopen(link) 

temp_list = [] 
for line in f: 
    entries = 0 
    data_splitted = line.split() 
    if len(data_splitted) == 2: 
      file_name = data_splitted[0] 
    else: 
     entries = 1+entries 
     if data_splitted[-1] == ']': 
      temp_list.extend([ast.literal_eval(i) for i in data_splitted[:-1]]) 
     else: 
      temp_list.extend([ast.literal_eval(i) for i in data_splitted]) 


dimension = 120 
entries = len(temp_list)/dimension 
data = np.array(temp_list) 
interweaved = data.reshape(entries,dimension) 
static =interweaved[:,:-80] 
delta =interweaved[:,40:-40] 
delta_delta =interweaved[:,80:] 
plot_interweaved = data.reshape(entries*3,dimension/3) 
print static.shape 
print delta.shape 
print delta_delta.shape 
make_plot_store_data(file_name,plot_interweaved,static,delta,delta_delta,True,False,False) 

回答

1

我似乎找到了答案從另一個post類似於此..

問題是我正常化..所以,而不是做

numpy_output_static = convert(min_max_scaler.fit_transform(np.flipud(static.T))) 

我應該做的

norm_static = matplotlib.colors.Normalize(vmin=static.min(),vmax=static.max()) 
    numpy_output_static = convert(norm_static(np.flipud(static.T))) 
相關問題