Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 7e3e23a

Browse filesBrowse files
author
Wei-Ning Hsu
committed
fix bugs; add spectrogram to waverform example
fix hardcoded phn_scp add example scripts for converting log magnitude spectrogram to waveform
1 parent f3842da commit 7e3e23a
Copy full SHA for 7e3e23a

File tree

3 files changed

+39
-2
lines changed
Filter options

3 files changed

+39
-2
lines changed

‎egs/timit/example_spec2wav.py

Copy file name to clipboard
+37Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
#!/usr/bin/python
2+
3+
# Copyright 2017 Wei-Ning Hsu
4+
# Apache 2.0
5+
6+
import numpy as np
7+
from parsers.dataset_parsers import kaldi_ra_dataset_parser
8+
from datasets.datasets_loaders import datasets_loader
9+
from tools.audio import convert_to_complex_spec, complex_spec_to_audio
10+
11+
# load dataset and dataset configuration
12+
d_conf = kaldi_ra_dataset_parser("data/spec_scp/train/dataset.cfg").get_config()
13+
feat_cfg = d_conf["feat_cfg"]
14+
print "\nSTFT configuration:"
15+
print "\n".join([str(k).ljust(15) + str(v) for k, v in feat_cfg.iteritems()]) + "\n"
16+
[_, _, tt_dset] = datasets_loader(d_conf, False, False, True)
17+
18+
# collect utterance log magnitude spectrogram
19+
utt, idx = "fdhc0_si1559", 1
20+
utt_feats = []
21+
for feats, _, _, _ in tt_dset.iterator_by_label(2048, "uttid", idx):
22+
utt_feats.append(feats)
23+
utt_feats = np.concatenate(utt_feats, axis=0)
24+
logmagspec = np.concatenate(tt_dset.undo_mvn(utt_feats), axis=1)
25+
assert(logmagspec.shape[0] == 1)
26+
27+
# estimate phase spectrogram from log magnitude spectrogram
28+
est_phase_opts = {
29+
"frame_size_n": feat_cfg["stft_cfg"]["frame_size_n"],
30+
"shift_size_n": feat_cfg["stft_cfg"]["shift_size_n"],
31+
"fft_size": feat_cfg["stft_cfg"]["fft_size"]}
32+
complex_spec = convert_to_complex_spec(
33+
logmagspec, None, feat_cfg["decom"], "est", feat_cfg["add_dc"], est_phase_opts)
34+
35+
# write reconstructed waveform
36+
out_path = "%s_griffinlim.wav" % utt
37+
complex_spec_to_audio(complex_spec, out_path, trim=20, **feat_cfg["stft_cfg"])

‎egs/timit/local/fbank_data_prep.sh

Copy file name to clipboardExpand all lines: egs/timit/local/fbank_data_prep.sh
+1-1Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ if [ $stage -le 3 ]; then
6767
echo "$0: stage 3, prepare utt2phoneid time-aligned labels"
6868
for s in $tr $dt $tt; do
6969
data_dir=$egs_dir/fbank_scp/$s
70-
phn_scp=/data/sls/scratch/wnhsu/vae_gan/audio_encoder/egs/timit/data/${s}/phn.scp
70+
phn_scp=$TIMIT_KALDI_EGS/data/local/data/${d}_phn.scp
7171
python src/tools/kaldi/phn_to_talabel.py \
7272
$phn_scp $map_file $data_dir/utt2phoneid.talabel \
7373
$data_dir/phone2phoneid || exit 1;

‎egs/timit/local/spec_data_prep.sh

Copy file name to clipboardExpand all lines: egs/timit/local/spec_data_prep.sh
+1-1Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ if [ $stage -le 3 ]; then
7272
echo "$0: stage 3, prepare utt2phoneid time-aligned labels"
7373
for d in $required; do
7474
data_dir=$egs_dir/spec_scp/$d
75-
phn_scp=/data/sls/scratch/wnhsu/vae_gan/audio_encoder/egs/timit/data/${d}/phn.scp
75+
phn_scp=$TIMIT_KALDI_EGS/data/local/data/${d}_phn.scp
7676
python src/tools/kaldi/phn_to_talabel.py \
7777
$phn_scp $map_file $data_dir/utt2phoneid.talabel \
7878
$data_dir/phone2phoneid || exit 1;

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.