Read an HTK audio file into an array.
NOTE: Make sure you open the file in binary mode (use ‘rb’ as the mode).
>>> module_dir, module_name = os.path.split(__file__)
>>> fname0 = os.path.join(module_dir, "gaw0_7_st0451.mfc")
>>> with open(fname0, 'rb') as f:
... (data0, (kind, qualifiers), samp_period) = read_htk_audio_file(f)
>>> kind
(6, 'MFCC', 'mel-frequency cepstral coefficients')
>>> qualifiers
(('C', 'is compressed'), ('E', 'has energy'), ('K', 'has CRC checksum'))
The samp_period is given in 100 nanosecond units
>>> samp_period
100000
The shape of the data is (number of vectors, number of features per vector)
>>> data0.shape
(251, 13)
# Verify that we’ve correctly read the data by comparing against HTK ‘HList -r’ output
>>> refname = os.path.join(module_dir, "gaw0_7_st0451.raw")
>>> with open(refname) as f:
... tempdata = []
... for line in f:
... tokens = line.split()
... vals = [float(t) for t in tokens]
... tempdata.append(vals)
>>> refdata = np.array(tempdata)
# Here’s a different version of the same file without compression
>>> fname1 = os.path.join(module_dir, "gaw0_7_st0451_uncomp.mfc")
>>> with open(fname1, 'rb') as f:
... (data1, (kind, qualifiers), samp_period) = read_htk_audio_file(f)
>>> kind
(6, 'MFCC', 'mel-frequency cepstral coefficients')
>>> qualifiers
(('E', 'has energy'),)
>>> np.allclose(data0, data1)
True
>>> np.allclose(data0, refdata)
True
# Here’s a version of the file where deltas and delta-deltas have been added
>>> fname2 = os.path.join(module_dir, "gaw0_7_st0451_39feats.mfc")
>>> with open(fname2, 'rb') as f:
... (data2, (kind, qualifiers), samp_period) = read_htk_audio_file(f)
>>> kind
(6, 'MFCC', 'mel-frequency cepstral coefficients')
>>> qualifiers
(('A', 'has acceleration coefficients'), ('E', 'has energy'), ('D', 'has delta coefficients'))
>>> data2.shape
(251, 39)
Write audio data to an HTK file format. seq – iterable of frame data where the first index is time.
NOTE: Make sure you open the file in binary mode (use ‘wb’ as the mode).
>>> module_dir, module_name = os.path.split(__file__)
>>> fname0 = os.path.join(module_dir, "gaw0_7_st0451.mfc")
>>> with open(fname0, 'rb') as f:
... (data0, (kind0, qualifiers0), samp_period0) = read_htk_audio_file(f)
>>> tmp = StringIO()
>>> write_htk_audio_file(tmp, data0)
>>> tmp.seek(0)
>>> (data1, (kind1, qualifiers1), samp_period1) = read_htk_audio_file(tmp)
>>> np.allclose(data0, data1)
True
>>> tmp.seek(0)
>>> print hashlib.md5(tmp.read()).hexdigest()
4a398c44496245d40695be0b259cdb7c