Skip to content

Commit 070c77e

Browse files
yfyeungyaozengweiyifanyang
authored
Add Blankskip to Zipformer+CTC (k2-fsa#730)
* init files * add ctc as auxiliary loss and ctc_decode.py * tuning the scalar of HLG score for 1best, nbest and nbest-oracle * rename to pruned_transducer_stateless7_ctc * fix doc * fix bug, recover the hlg scores * modify ctc_decode.py, move out the hlg scale * fix hlg_scale * add export.py and pretrained.py, and so on * upload files, update README.md and RESULTS.md * add CI test * update .gitignore * create symlinks * Add Blank Skip to Zipformer+CTC * Add warmup to blank skip * Add warmup to blank skip * Add __init__.py * Add parameters_names to Adam * Add warmup to blank skip * Modify frame_reducer * Modify frame_reducer * Add Blank Skip to decode. * Add ctc_decode.py * Add blank skip to Zipformer+CTC * process conflict * process conflict * modify ctc_guild_decode_bk.py * modify Lconv * produce the conflict * Add export.py * finish export * fix for running black * Add ci test * Add ci-test * chmod * chmod * fix bug for ci-test * fix bug for ci-test * fix bug for ci-test * rename the dirname * rename the dirname * change dirname * change dirname * fix notes * add pretrained.py * add pretrained.py * add pretrained.py * add pretrained.py * add pretrained.py * add pretrained.py * fix * fix * fix * finished * add the Copyright info and notes Co-authored-by: Zengwei Yao <[email protected]> Co-authored-by: yifanyang <[email protected]>
1 parent 65d7192 commit 070c77e

31 files changed

+6372
-6
lines changed

.github/scripts/run-librispeech-pruned-transducer-stateless7-ctc-2022-12-01.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -148,4 +148,4 @@ if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_LABEL_NAME}" ==
148148
done
149149

150150
rm pruned_transducer_stateless7_ctc/exp/*.pt
151-
fi
151+
fi
Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
#!/usr/bin/env bash
2+
3+
set -e
4+
5+
log() {
6+
# This function is from espnet
7+
local fname=${BASH_SOURCE[1]##*/}
8+
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
9+
}
10+
11+
cd egs/librispeech/ASR
12+
13+
repo_url=https://huggingface.co/yfyeung/icefall-asr-librispeech-pruned_transducer_stateless7_ctc_bs-2022-12-14
14+
15+
log "Downloading pre-trained model from $repo_url"
16+
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
17+
repo=$(basename $repo_url)
18+
19+
log "Display test files"
20+
tree $repo/
21+
soxi $repo/test_wavs/*.wav
22+
ls -lh $repo/test_wavs/*.wav
23+
24+
pushd $repo/exp
25+
git lfs pull --include "data/lang_bpe_500/HLG.pt"
26+
git lfs pull --include "data/lang_bpe_500/L.pt"
27+
git lfs pull --include "data/lang_bpe_500/LG.pt"
28+
git lfs pull --include "data/lang_bpe_500/Linv.pt"
29+
git lfs pull --include "data/lang_bpe_500/bpe.model"
30+
git lfs pull --include "exp/cpu_jit.pt"
31+
git lfs pull --include "exp/pretrained.pt"
32+
ln -s pretrained.pt epoch-99.pt
33+
ls -lh *.pt
34+
popd
35+
36+
log "Export to torchscript model"
37+
./pruned_transducer_stateless7_ctc_bs/export.py \
38+
--exp-dir $repo/exp \
39+
--use-averaged-model false \
40+
--bpe-model $repo/data/lang_bpe_500/bpe.model \
41+
--epoch 99 \
42+
--avg 1 \
43+
--jit 1
44+
45+
ls -lh $repo/exp/*.pt
46+
47+
log "Decode with models exported by torch.jit.script()"
48+
49+
./pruned_transducer_stateless7_ctc_bs/jit_pretrained.py \
50+
--bpe-model $repo/data/lang_bpe_500/bpe.model \
51+
--nn-model-filename $repo/exp/cpu_jit.pt \
52+
$repo/test_wavs/1089-134686-0001.wav \
53+
$repo/test_wavs/1221-135766-0001.wav \
54+
$repo/test_wavs/1221-135766-0002.wav
55+
56+
for m in ctc-decoding 1best; do
57+
./pruned_transducer_stateless7_ctc_bs/jit_pretrained_ctc.py \
58+
--model-filename $repo/exp/cpu_jit.pt \
59+
--words-file $repo/data/lang_bpe_500/words.txt \
60+
--HLG $repo/data/lang_bpe_500/HLG.pt \
61+
--bpe-model $repo/data/lang_bpe_500/bpe.model \
62+
--method $m \
63+
--sample-rate 16000 \
64+
$repo/test_wavs/1089-134686-0001.wav \
65+
$repo/test_wavs/1221-135766-0001.wav \
66+
$repo/test_wavs/1221-135766-0002.wav
67+
done
68+
69+
for sym in 1 2 3; do
70+
log "Greedy search with --max-sym-per-frame $sym"
71+
72+
./pruned_transducer_stateless7_ctc_bs/pretrained.py \
73+
--method greedy_search \
74+
--max-sym-per-frame $sym \
75+
--checkpoint $repo/exp/pretrained.pt \
76+
--bpe-model $repo/data/lang_bpe_500/bpe.model \
77+
$repo/test_wavs/1089-134686-0001.wav \
78+
$repo/test_wavs/1221-135766-0001.wav \
79+
$repo/test_wavs/1221-135766-0002.wav
80+
done
81+
82+
for method in modified_beam_search beam_search fast_beam_search; do
83+
log "$method"
84+
85+
./pruned_transducer_stateless7_ctc_bs/pretrained.py \
86+
--method $method \
87+
--beam-size 4 \
88+
--checkpoint $repo/exp/pretrained.pt \
89+
--bpe-model $repo/data/lang_bpe_500/bpe.model \
90+
$repo/test_wavs/1089-134686-0001.wav \
91+
$repo/test_wavs/1221-135766-0001.wav \
92+
$repo/test_wavs/1221-135766-0002.wav
93+
done
94+
95+
for m in ctc-decoding 1best; do
96+
./pruned_transducer_stateless7_ctc_bs/pretrained_ctc.py \
97+
--checkpoint $repo/exp/pretrained.pt \
98+
--words-file $repo/data/lang_bpe_500/words.txt \
99+
--HLG $repo/data/lang_bpe_500/HLG.pt \
100+
--bpe-model $repo/data/lang_bpe_500/bpe.model \
101+
--method $m \
102+
--sample-rate 16000 \
103+
$repo/test_wavs/1089-134686-0001.wav \
104+
$repo/test_wavs/1221-135766-0001.wav \
105+
$repo/test_wavs/1221-135766-0002.wav
106+
done
107+
108+
echo "GITHUB_EVENT_NAME: ${GITHUB_EVENT_NAME}"
109+
echo "GITHUB_EVENT_LABEL_NAME: ${GITHUB_EVENT_LABEL_NAME}"
110+
111+
if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_LABEL_NAME}" == x"run-decode" ]]; then
112+
mkdir -p pruned_transducer_stateless7_ctc_bs/exp
113+
ln -s $PWD/$repo/exp/pretrained.pt pruned_transducer_stateless7_ctc_bs/exp/epoch-999.pt
114+
ln -s $PWD/$repo/data/lang_bpe_500 data/
115+
116+
ls -lh data
117+
ls -lh pruned_transducer_stateless7_ctc_bs/exp
118+
119+
log "Decoding test-clean and test-other"
120+
121+
# use a small value for decoding with CPU
122+
max_duration=100
123+
124+
for method in greedy_search fast_beam_search modified_beam_search; do
125+
log "Decoding with $method"
126+
127+
./pruned_transducer_stateless7_ctc_bs/decode.py \
128+
--decoding-method $method \
129+
--epoch 999 \
130+
--avg 1 \
131+
--use-averaged-model 0 \
132+
--max-duration $max_duration \
133+
--exp-dir pruned_transducer_stateless7_ctc_bs/exp
134+
done
135+
136+
for m in ctc-decoding 1best; do
137+
./pruned_transducer_stateless7_ctc_bs/ctc_decode.py \
138+
--epoch 999 \
139+
--avg 1 \
140+
--exp-dir ./pruned_transducer_stateless7_ctc_bs/exp \
141+
--max-duration $max_duration \
142+
--use-averaged-model 0 \
143+
--decoding-method $m \
144+
--hlg-scale 0.6
145+
done
146+
147+
rm pruned_transducer_stateless7_ctc_bs/exp/*.pt
148+
fi
Lines changed: 163 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,163 @@
1+
# Copyright 2022 Fangjun Kuang ([email protected])
2+
3+
# See ../../LICENSE for clarification regarding multiple authors
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
17+
name: run-librispeech-2022-12-15-stateless7-ctc-bs
18+
# zipformer
19+
20+
on:
21+
push:
22+
branches:
23+
- master
24+
pull_request:
25+
types: [labeled]
26+
27+
schedule:
28+
# minute (0-59)
29+
# hour (0-23)
30+
# day of the month (1-31)
31+
# month (1-12)
32+
# day of the week (0-6)
33+
# nightly build at 15:50 UTC time every day
34+
- cron: "50 15 * * *"
35+
36+
jobs:
37+
run_librispeech_2022_12_15_zipformer_ctc_bs:
38+
if: github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event.label.name == 'blank-skip' || github.event_name == 'push' || github.event_name == 'schedule'
39+
runs-on: ${{ matrix.os }}
40+
strategy:
41+
matrix:
42+
os: [ubuntu-latest]
43+
python-version: [3.8]
44+
45+
fail-fast: false
46+
47+
steps:
48+
- uses: actions/checkout@v2
49+
with:
50+
fetch-depth: 0
51+
52+
- name: Setup Python ${{ matrix.python-version }}
53+
uses: actions/setup-python@v2
54+
with:
55+
python-version: ${{ matrix.python-version }}
56+
cache: 'pip'
57+
cache-dependency-path: '**/requirements-ci.txt'
58+
59+
- name: Install Python dependencies
60+
run: |
61+
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
62+
pip uninstall -y protobuf
63+
pip install --no-binary protobuf protobuf
64+
65+
- name: Cache kaldifeat
66+
id: my-cache
67+
uses: actions/cache@v2
68+
with:
69+
path: |
70+
~/tmp/kaldifeat
71+
key: cache-tmp-${{ matrix.python-version }}-2022-09-25
72+
73+
- name: Install kaldifeat
74+
if: steps.my-cache.outputs.cache-hit != 'true'
75+
shell: bash
76+
run: |
77+
.github/scripts/install-kaldifeat.sh
78+
79+
- name: Cache LibriSpeech test-clean and test-other datasets
80+
id: libri-test-clean-and-test-other-data
81+
uses: actions/cache@v2
82+
with:
83+
path: |
84+
~/tmp/download
85+
key: cache-libri-test-clean-and-test-other
86+
87+
- name: Download LibriSpeech test-clean and test-other
88+
if: steps.libri-test-clean-and-test-other-data.outputs.cache-hit != 'true'
89+
shell: bash
90+
run: |
91+
.github/scripts/download-librispeech-test-clean-and-test-other-dataset.sh
92+
93+
- name: Prepare manifests for LibriSpeech test-clean and test-other
94+
shell: bash
95+
run: |
96+
.github/scripts/prepare-librispeech-test-clean-and-test-other-manifests.sh
97+
98+
- name: Cache LibriSpeech test-clean and test-other fbank features
99+
id: libri-test-clean-and-test-other-fbank
100+
uses: actions/cache@v2
101+
with:
102+
path: |
103+
~/tmp/fbank-libri
104+
key: cache-libri-fbank-test-clean-and-test-other-v2
105+
106+
- name: Compute fbank for LibriSpeech test-clean and test-other
107+
if: steps.libri-test-clean-and-test-other-fbank.outputs.cache-hit != 'true'
108+
shell: bash
109+
run: |
110+
.github/scripts/compute-fbank-librispeech-test-clean-and-test-other.sh
111+
112+
- name: Inference with pre-trained model
113+
shell: bash
114+
env:
115+
GITHUB_EVENT_NAME: ${{ github.event_name }}
116+
GITHUB_EVENT_LABEL_NAME: ${{ github.event.label.name }}
117+
run: |
118+
mkdir -p egs/librispeech/ASR/data
119+
ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank
120+
ls -lh egs/librispeech/ASR/data/*
121+
122+
sudo apt-get -qq install git-lfs tree sox
123+
export PYTHONPATH=$PWD:$PYTHONPATH
124+
export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
125+
export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
126+
127+
.github/scripts/run-librispeech-pruned-transducer-stateless7-ctc-bs-2022-12-15.sh
128+
129+
- name: Display decoding results for librispeech pruned_transducer_stateless7_ctc_bs
130+
if: github.event_name == 'schedule' || github.event.label.name == 'run-decode'
131+
shell: bash
132+
run: |
133+
cd egs/librispeech/ASR/
134+
tree ./pruned_transducer_stateless7_ctc_bs/exp
135+
136+
cd pruned_transducer_stateless7_ctc_bs
137+
echo "results for pruned_transducer_stateless7_ctc_bs"
138+
echo "===greedy search==="
139+
find exp/greedy_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
140+
find exp/greedy_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
141+
142+
echo "===fast_beam_search==="
143+
find exp/fast_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
144+
find exp/fast_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
145+
146+
echo "===modified beam search==="
147+
find exp/modified_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
148+
find exp/modified_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
149+
150+
echo "===ctc decoding==="
151+
find exp/ctc-decoding -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
152+
find exp/ctc-decoding -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
153+
154+
echo "===1best==="
155+
find exp/1best -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
156+
find exp/1best -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
157+
158+
- name: Upload decoding results for librispeech pruned_transducer_stateless7_ctc_bs
159+
uses: actions/upload-artifact@v2
160+
if: github.event_name == 'schedule' || github.event.label.name == 'run-decode'
161+
with:
162+
name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-18.04-cpu-pruned_transducer_stateless7-ctc-bs-2022-12-15
163+
path: egs/librispeech/ASR/pruned_transducer_stateless7_ctc_bs/exp/

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,3 +33,4 @@ node_modules
3333

3434
*.param
3535
*.bin
36+
.DS_Store

egs/gigaspeech/ASR/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
11
log-*
2+
.DS_Store

egs/librispeech/ASR/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
11
log-*
2+
.DS_Store

egs/librispeech/ASR/pruned_transducer_stateless7/optim.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright 2022 Xiaomi Corp. (authors: Daniel Povey)
1+
# Copyright 2022 Xiaomi Corp. (authors: Daniel Povey)
22
#
33
# See ../LICENSE for clarification regarding multiple authors
44
#

egs/librispeech/ASR/pruned_transducer_stateless7_ctc/jit_pretrained_ctc.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
3232
(1) ctc-decoding
3333
./pruned_transducer_stateless7_ctc/jit_pretrained_ctc.py \
34-
--nn-model-filename ./pruned_transducer_stateless7_ctc/exp/cpu_jit.pt \
34+
--model-filename ./pruned_transducer_stateless7_ctc/exp/cpu_jit.pt \
3535
--bpe-model data/lang_bpe_500/bpe.model \
3636
--method ctc-decoding \
3737
--sample-rate 16000 \
@@ -40,7 +40,7 @@
4040
4141
(2) 1best
4242
./pruned_transducer_stateless7_ctc/jit_pretrained_ctc.py \
43-
--nn-model-filename ./pruned_transducer_stateless7_ctc/exp/cpu_jit.pt \
43+
--model-filename ./pruned_transducer_stateless7_ctc/exp/cpu_jit.pt \
4444
--HLG data/lang_bpe_500/HLG.pt \
4545
--words-file data/lang_bpe_500/words.txt \
4646
--method 1best \
@@ -51,7 +51,7 @@
5151
5252
(3) nbest-rescoring
5353
./pruned_transducer_stateless7_ctc/jit_pretrained_ctc.py \
54-
--nn-model-filename ./pruned_transducer_stateless7_ctc/exp/cpu_jit.pt \
54+
--model-filename ./pruned_transducer_stateless7_ctc/exp/cpu_jit.pt \
5555
--HLG data/lang_bpe_500/HLG.pt \
5656
--words-file data/lang_bpe_500/words.txt \
5757
--G data/lm/G_4_gram.pt \
@@ -63,7 +63,7 @@
6363
6464
(4) whole-lattice-rescoring
6565
./pruned_transducer_stateless7_ctc/jit_pretrained_ctc.py \
66-
--nn-model-filename ./pruned_transducer_stateless7_ctc/exp/cpu_jit.pt \
66+
--model-filename ./pruned_transducer_stateless7_ctc/exp/cpu_jit.pt \
6767
--HLG data/lang_bpe_500/HLG.pt \
6868
--words-file data/lang_bpe_500/words.txt \
6969
--G data/lm/G_4_gram.pt \

egs/librispeech/ASR/pruned_transducer_stateless7_ctc_bs/__init__.py

Whitespace-only changes.
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
../pruned_transducer_stateless2/asr_datamodule.py

0 commit comments

Comments
 (0)