You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
from argparse import ArgumentParser
import os
import soundfile as sf
import time
import numpy as np
import torchaudio
import torch
from funasr import AutoModel
def main():
save_dir = "/mnt/hdd2/ximalaya_16k_spk"
scp_dir = './wav.scp'
if not os.path.exists(save_dir):
os.mkdir(save_dir)
parser = ArgumentParser()
parser.add_argument("-g", "--gpu_index", type=int, help="gpu index")
args = parser.parse_args()
gpu_id = args.gpu_index
with open(scp_dir, 'r') as f:
lines = f.readlines()
avg_file_num = len(lines)//8
process_files = lines[gpu_id*avg_file_num:(gpu_id+1)*avg_file_num]
if gpu_id == 8:
process_files = lines[gpu_id*avg_file_num:]
trans_txt = './TRANS_'+str(gpu_id)+'.txt'
'''inference_pipeline = pipeline(
task=Tasks.auto_speech_recognition,
model='iic/speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k-common-vocab8404-pytorch', model_revision="v2.0.4",
vad_model='iic/speech_fsmn_vad_zh-cn-16k-common-pytorch', vad_model_revision="v2.0.4",
punc_model='iic/punc_ct-transformer_zh-cn-common-vocab272727-pytorch', punc_model_revision="v2.0.4",
spk_model="iic/speech_campplus_sv_zh-cn_16k-common",
spk_model_revision="v2.0.2",
device='cuda:'+str(gpu_id),
ncpu = 1
)'''
from funasr import AutoModel
model = AutoModel(
model="paraformer-zh", model_revision="v2.0.4",
vad_model="fsmn-vad", vad_model_revision="v2.0.4",
punc_model="ct-punc-c", punc_model_revision="v2.0.4",
spk_model="iic/speech_campplus_sv_zh-cn_16k-common",
spk_model_revision="v2.0.2",
device='cuda:'+str(gpu_id),
ncpu=1,
)
with open(trans_txt, 'w') as f:
for process_file in process_files:
cost_st = time.time()
#rec_result = inference_pipeline(process_file.strip())
rec_result = model.generate(input=process_file.strip(), batch_size_s=300)
'''if os.path.exists(process_file.strip()):
data, sr = torchaudio.load(process_file.strip())
wav_name = process_file.strip().split('/')[-1]
save_file = process_file.strip().split('/')[-2]
count = 0
spk_type = np.zeros(10)
for resi in rec_result[0]['sentence_info']:
spk_type[resi['spk']] = spk_type[resi['spk']] + 1
max_index = np.argmax(spk_type)
print("wav_dir:{}".format(process_file))
for resi in rec_result[0]['sentence_info']:
if (resi['spk']) == max_index:
save_file_spk = save_file
save_file_spk_dir = os.path.join(save_dir, save_file_spk)
if not os.path.exists(save_file_spk_dir):
os.mkdir(save_file_spk_dir)
text = resi['text']
st = int(resi['start'])*16
et = int(resi['end'])*16
wav_name_spk = save_file_spk + '_' + wav_name.replace('.wav', '_'+str(count)+'.wav')
save_wav_name_spk = os.path.join(save_file_spk_dir, wav_name_spk)
data_cut = data[:, st:et]
torchaudio.save(save_wav_name_spk, data_cut, sr)
#f.write(wav_name_spk+'\t'+save_file_spk+'\t'+text+'\t'+str(st)+'\t'+str(et)+'\t'+process_file.strip()+'\n')
f.write(wav_name_spk+'\t'+save_file_spk+'\t'+text+'\n')
count = count + 1
cost_et = time.time()
print("time_cost:{:.03f}".format(cost_et-cost_st))'''
print("ASR FINISHED\n")
if name == "main":
main()
PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND
93612 zhengxi+ 20 0 49.856g 5.562g 0.979g R 5705 2.2 18:12.87 python
78305 zhengxi+ 20 0 44064 5404 3532 R 21.1 0.0 0:53.40 top
56160 zhengxi+ 20 0 11.381g 77364 43004 S 5.3 0.0 0:58.93 node
1 root 20 0 225400 7760 5280 S 0.0 0.0 0:14.52 systemd
2 root 20 0 0 0 0 S 0.0 0.0 0:01.67 kthreadd
What have you tried?
What's your environment?
Linux
FunASR 1.1.12
ModelScope 1.18.1:
PyTorch 2.0.1+cu118:
pip install funasr:
Python version 3.8.20:
GPU 3090
CUDA/cuDNN version 11.8:
The text was updated successfully, but these errors were encountered:
deep_update(kwargs, cfg) in auto_model.py is potentially problematic,
replace it with something to create new id for kwargs like kwargs = {**kwargs, **cfg}
deep_update(kwargs, cfg) in auto_model.py is potentially problematic, replace it with something to create new id for kwargs like kwargs = {**kwargs, **cfg}
没看懂,啥意思
deep_update(model_conf, kwargs.get("model_conf", {}))
deep_update(model_conf, kwargs) in auto_model.py是这样子,我应该怎么改
What is your question?
Code
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
from argparse import ArgumentParser
import os
import soundfile as sf
import time
import numpy as np
import torchaudio
import torch
from funasr import AutoModel
def main():
save_dir = "/mnt/hdd2/ximalaya_16k_spk"
scp_dir = './wav.scp'
if not os.path.exists(save_dir):
os.mkdir(save_dir)
parser = ArgumentParser()
parser.add_argument("-g", "--gpu_index", type=int, help="gpu index")
args = parser.parse_args()
gpu_id = args.gpu_index
with open(scp_dir, 'r') as f:
lines = f.readlines()
avg_file_num = len(lines)//8
process_files = lines[gpu_id*avg_file_num:(gpu_id+1)*avg_file_num]
if name == "main":
main()
PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND
93612 zhengxi+ 20 0 49.856g 5.562g 0.979g R 5705 2.2 18:12.87 python
78305 zhengxi+ 20 0 44064 5404 3532 R 21.1 0.0 0:53.40 top
56160 zhengxi+ 20 0 11.381g 77364 43004 S 5.3 0.0 0:58.93 node
1 root 20 0 225400 7760 5280 S 0.0 0.0 0:14.52 systemd
2 root 20 0 0 0 0 S 0.0 0.0 0:01.67 kthreadd
What have you tried?
What's your environment?
The text was updated successfully, but these errors were encountered: