aoxiang1221 commited on
Commit
9912004
1 Parent(s): 85ce65e
Files changed (47) hide show
  1. __pycache__/compress_model.cpython-310.pyc +0 -0
  2. __pycache__/models.cpython-310.pyc +0 -0
  3. __pycache__/utils.cpython-310.pyc +0 -0
  4. cluster/__pycache__/__init__.cpython-310.pyc +0 -0
  5. configs/config.json +0 -0
  6. dataset_raw/wav_structure.txt +20 -0
  7. diffusion/__pycache__/__init__.cpython-310.pyc +0 -0
  8. diffusion/__pycache__/diffusion.cpython-310.pyc +0 -0
  9. diffusion/__pycache__/unit2mel.cpython-310.pyc +0 -0
  10. diffusion/__pycache__/vocoder.cpython-310.pyc +0 -0
  11. diffusion/__pycache__/wavenet.cpython-310.pyc +0 -0
  12. edgetts/__pycache__/tts_voices.cpython-310.pyc +0 -0
  13. filelists/test.txt +4 -0
  14. filelists/train.txt +15 -0
  15. filelists/val.txt +4 -0
  16. inference/__pycache__/__init__.cpython-310.pyc +0 -0
  17. inference/__pycache__/infer_tool.cpython-310.pyc +0 -0
  18. inference/__pycache__/slicer.cpython-310.pyc +0 -0
  19. logs/44k/diffusion/put_diffusion_pretrained_model_here +0 -0
  20. logs/44k/put_pretrained_model_here +0 -0
  21. modules/F0Predictor/__pycache__/F0Predictor.cpython-310.pyc +0 -0
  22. modules/F0Predictor/__pycache__/HarvestF0Predictor.cpython-310.pyc +0 -0
  23. modules/F0Predictor/__pycache__/__init__.cpython-310.pyc +0 -0
  24. modules/__pycache__/DSConv.cpython-310.pyc +0 -0
  25. modules/__pycache__/__init__.cpython-310.pyc +0 -0
  26. modules/__pycache__/attentions.cpython-310.pyc +0 -0
  27. modules/__pycache__/commons.cpython-310.pyc +0 -0
  28. modules/__pycache__/modules.cpython-310.pyc +0 -0
  29. pretrain/__init__.py +0 -0
  30. pretrain/checkpoint_best_legacy_500.pt +3 -0
  31. pretrain/meta.py +39 -0
  32. pretrain/nsf_hifigan/put_nsf_hifigan_ckpt_here +0 -0
  33. pretrain/put_hubert_ckpt_here +0 -0
  34. raw/put_raw_wav_here +0 -0
  35. trained/nahida/nahida.json +96 -0
  36. trained/nahida/nahida_G_40000.pth +3 -0
  37. vdecoder/__pycache__/__init__.cpython-310.pyc +0 -0
  38. vdecoder/hifigan/__pycache__/env.cpython-310.pyc +0 -0
  39. vdecoder/hifigan/__pycache__/models.cpython-310.pyc +0 -0
  40. vdecoder/hifigan/__pycache__/utils.cpython-310.pyc +0 -0
  41. vdecoder/nsf_hifigan/__pycache__/env.cpython-310.pyc +0 -0
  42. vdecoder/nsf_hifigan/__pycache__/models.cpython-310.pyc +0 -0
  43. vdecoder/nsf_hifigan/__pycache__/nvSTFT.cpython-310.pyc +0 -0
  44. vdecoder/nsf_hifigan/__pycache__/utils.cpython-310.pyc +0 -0
  45. vencoder/__pycache__/ContentVec256L9.cpython-310.pyc +0 -0
  46. vencoder/__pycache__/__init__.cpython-310.pyc +0 -0
  47. vencoder/__pycache__/encoder.cpython-310.pyc +0 -0
__pycache__/compress_model.cpython-310.pyc ADDED
Binary file (2.22 kB). View file
 
__pycache__/models.cpython-310.pyc ADDED
Binary file (14.7 kB). View file
 
__pycache__/utils.cpython-310.pyc ADDED
Binary file (20.5 kB). View file
 
cluster/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (1.06 kB). View file
 
configs/config.json ADDED
File without changes
dataset_raw/wav_structure.txt ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 数据集准备
2
+
3
+ raw
4
+ ├───speaker0
5
+ │ ├───xxx1-xxx1.wav
6
+ │ ├───...
7
+ │ └───Lxx-0xx8.wav
8
+ └───speaker1
9
+ ├───xx2-0xxx2.wav
10
+ ├───...
11
+ └───xxx7-xxx007.wav
12
+
13
+ 此外还需要编辑config.json
14
+
15
+ "n_speakers": 10
16
+
17
+ "spk":{
18
+ "speaker0": 0,
19
+ "speaker1": 1,
20
+ }
diffusion/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (132 Bytes). View file
 
diffusion/__pycache__/diffusion.cpython-310.pyc ADDED
Binary file (11.3 kB). View file
 
diffusion/__pycache__/unit2mel.cpython-310.pyc ADDED
Binary file (4.9 kB). View file
 
diffusion/__pycache__/vocoder.cpython-310.pyc ADDED
Binary file (3.52 kB). View file
 
diffusion/__pycache__/wavenet.cpython-310.pyc ADDED
Binary file (3.83 kB). View file
 
edgetts/__pycache__/tts_voices.cpython-310.pyc ADDED
Binary file (11.3 kB). View file
 
filelists/test.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ ./dataset/44k/taffy/000562.wav
2
+ ./dataset/44k/nyaru/000011.wav
3
+ ./dataset/44k/nyaru/000008.wav
4
+ ./dataset/44k/taffy/000563.wav
filelists/train.txt ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ./dataset/44k/taffy/000549.wav
2
+ ./dataset/44k/nyaru/000004.wav
3
+ ./dataset/44k/nyaru/000006.wav
4
+ ./dataset/44k/taffy/000551.wav
5
+ ./dataset/44k/nyaru/000009.wav
6
+ ./dataset/44k/taffy/000561.wav
7
+ ./dataset/44k/nyaru/000001.wav
8
+ ./dataset/44k/taffy/000553.wav
9
+ ./dataset/44k/nyaru/000002.wav
10
+ ./dataset/44k/taffy/000560.wav
11
+ ./dataset/44k/taffy/000557.wav
12
+ ./dataset/44k/nyaru/000005.wav
13
+ ./dataset/44k/taffy/000554.wav
14
+ ./dataset/44k/taffy/000550.wav
15
+ ./dataset/44k/taffy/000559.wav
filelists/val.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ ./dataset/44k/nyaru/000003.wav
2
+ ./dataset/44k/nyaru/000007.wav
3
+ ./dataset/44k/taffy/000558.wav
4
+ ./dataset/44k/taffy/000556.wav
inference/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (132 Bytes). View file
 
inference/__pycache__/infer_tool.cpython-310.pyc ADDED
Binary file (15.4 kB). View file
 
inference/__pycache__/slicer.cpython-310.pyc ADDED
Binary file (3.85 kB). View file
 
logs/44k/diffusion/put_diffusion_pretrained_model_here ADDED
File without changes
logs/44k/put_pretrained_model_here ADDED
File without changes
modules/F0Predictor/__pycache__/F0Predictor.cpython-310.pyc ADDED
Binary file (838 Bytes). View file
 
modules/F0Predictor/__pycache__/HarvestF0Predictor.cpython-310.pyc ADDED
Binary file (2.46 kB). View file
 
modules/F0Predictor/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (142 Bytes). View file
 
modules/__pycache__/DSConv.cpython-310.pyc ADDED
Binary file (2.98 kB). View file
 
modules/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (130 Bytes). View file
 
modules/__pycache__/attentions.cpython-310.pyc ADDED
Binary file (11.1 kB). View file
 
modules/__pycache__/commons.cpython-310.pyc ADDED
Binary file (6.39 kB). View file
 
modules/__pycache__/modules.cpython-310.pyc ADDED
Binary file (9.9 kB). View file
 
pretrain/__init__.py ADDED
File without changes
pretrain/checkpoint_best_legacy_500.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f54b40fd2802423a5643779c4861af1e9ee9c1564dc9d32f54f20b5ffba7db96
3
+ size 189507909
pretrain/meta.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def download_dict():
2
+ return {
3
+ "vec768l12": {
4
+ "url": "https://ibm.ent.box.com/shared/static/z1wgl1stco8ffooyatzdwsqn2psd9lrr",
5
+ "output": "./pretrain/checkpoint_best_legacy_500.pt"
6
+ },
7
+ "vec256l9": {
8
+ "url": "https://ibm.ent.box.com/shared/static/z1wgl1stco8ffooyatzdwsqn2psd9lrr",
9
+ "output": "./pretrain/checkpoint_best_legacy_500.pt"
10
+ },
11
+ "hubertsoft": {
12
+ "url": "https://github.com/bshall/hubert/releases/download/v0.1/hubert-soft-0d54a1f4.pt",
13
+ "output": "./pretrain/hubert-soft-0d54a1f4.pt"
14
+ },
15
+ "whisper-ppg-small": {
16
+ "url": "https://openaipublic.azureedge.net/main/whisper/models/9ecf779972d90ba49c06d968637d720dd632c55bbf19d441fb42bf17a411e794/small.pt",
17
+ "output": "./pretrain/small.pt"
18
+ },
19
+ "whisper-ppg": {
20
+ "url": "https://openaipublic.azureedge.net/main/whisper/models/345ae4da62f9b3d59415adc60127b97c714f32e89e936602e85993674d08dcb1/medium.pt",
21
+ "output": "./pretrain/medium.pt"
22
+ },
23
+ "whisper-ppg-large": {
24
+ "url": "https://openaipublic.azureedge.net/main/whisper/models/81f7c96c852ee8fc832187b0132e569d6c3065a3252ed18e56effd0b6a73e524/large-v2.pt",
25
+ "output": "./pretrain/large-v2.pt"
26
+ }
27
+ }
28
+
29
+
30
+ def get_speech_encoder(config_path="configs/config.json"):
31
+ import json
32
+
33
+ with open(config_path, "r") as f:
34
+ data = f.read()
35
+ config = json.loads(data)
36
+ speech_encoder = config["model"]["speech_encoder"]
37
+ dict = download_dict()
38
+
39
+ return dict[speech_encoder]["url"], dict[speech_encoder]["output"]
pretrain/nsf_hifigan/put_nsf_hifigan_ckpt_here ADDED
File without changes
pretrain/put_hubert_ckpt_here ADDED
File without changes
raw/put_raw_wav_here ADDED
File without changes
trained/nahida/nahida.json ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "train": {
3
+ "log_interval": 200,
4
+ "eval_interval": 800,
5
+ "seed": 1234,
6
+ "epochs": 10000,
7
+ "learning_rate": 0.00015,
8
+ "betas": [
9
+ 0.8,
10
+ 0.99
11
+ ],
12
+ "eps": 1e-09,
13
+ "batch_size": 12,
14
+ "fp16_run": false,
15
+ "lr_decay": 0.999875,
16
+ "segment_size": 10240,
17
+ "init_lr_ratio": 1,
18
+ "warmup_epochs": 0,
19
+ "c_mel": 45,
20
+ "c_kl": 1.0,
21
+ "use_sr": true,
22
+ "max_speclen": 512,
23
+ "port": "8001",
24
+ "keep_ckpts": 3,
25
+ "all_in_mem": false
26
+ },
27
+ "data": {
28
+ "training_files": "filelists/train.txt",
29
+ "validation_files": "filelists/val.txt",
30
+ "max_wav_value": 32768.0,
31
+ "sampling_rate": 44100,
32
+ "filter_length": 2048,
33
+ "hop_length": 512,
34
+ "win_length": 2048,
35
+ "n_mel_channels": 80,
36
+ "mel_fmin": 0.0,
37
+ "mel_fmax": 22050
38
+ },
39
+ "model": {
40
+ "inter_channels": 192,
41
+ "hidden_channels": 192,
42
+ "filter_channels": 768,
43
+ "n_heads": 2,
44
+ "n_layers": 6,
45
+ "kernel_size": 3,
46
+ "p_dropout": 0.1,
47
+ "resblock": "1",
48
+ "resblock_kernel_sizes": [
49
+ 3,
50
+ 7,
51
+ 11
52
+ ],
53
+ "resblock_dilation_sizes": [
54
+ [
55
+ 1,
56
+ 3,
57
+ 5
58
+ ],
59
+ [
60
+ 1,
61
+ 3,
62
+ 5
63
+ ],
64
+ [
65
+ 1,
66
+ 3,
67
+ 5
68
+ ]
69
+ ],
70
+ "upsample_rates": [
71
+ 8,
72
+ 8,
73
+ 2,
74
+ 2,
75
+ 2
76
+ ],
77
+ "upsample_initial_channel": 512,
78
+ "upsample_kernel_sizes": [
79
+ 16,
80
+ 16,
81
+ 4,
82
+ 4,
83
+ 4
84
+ ],
85
+ "n_layers_q": 3,
86
+ "use_spectral_norm": false,
87
+ "gin_channels": 256,
88
+ "ssl_dim": 256,
89
+ "n_speakers": 1,
90
+ "speech_encoder": "vec256l9",
91
+ "speaker_embedding": false
92
+ },
93
+ "spk": {
94
+ "nahida": 0
95
+ }
96
+ }
trained/nahida/nahida_G_40000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fec9506f8a762516e5321bcaedc72b6891d61a631e82ac596fe4e16e45b4652a
3
+ size 542178077
vdecoder/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (131 Bytes). View file
 
vdecoder/hifigan/__pycache__/env.cpython-310.pyc ADDED
Binary file (804 Bytes). View file
 
vdecoder/hifigan/__pycache__/models.cpython-310.pyc ADDED
Binary file (16.3 kB). View file
 
vdecoder/hifigan/__pycache__/utils.cpython-310.pyc ADDED
Binary file (2.3 kB). View file
 
vdecoder/nsf_hifigan/__pycache__/env.cpython-310.pyc ADDED
Binary file (808 Bytes). View file
 
vdecoder/nsf_hifigan/__pycache__/models.cpython-310.pyc ADDED
Binary file (14.2 kB). View file
 
vdecoder/nsf_hifigan/__pycache__/nvSTFT.cpython-310.pyc ADDED
Binary file (4.26 kB). View file
 
vdecoder/nsf_hifigan/__pycache__/utils.cpython-310.pyc ADDED
Binary file (2.35 kB). View file
 
vencoder/__pycache__/ContentVec256L9.cpython-310.pyc ADDED
Binary file (1.55 kB). View file
 
vencoder/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (131 Bytes). View file
 
vencoder/__pycache__/encoder.cpython-310.pyc ADDED
Binary file (757 Bytes). View file