ksang commited on
Commit
448fd25
1 Parent(s): eef135d

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +23 -15
  2. female.mp3 +0 -0
  3. male.mp3 +0 -0
app.py CHANGED
@@ -39,24 +39,32 @@ model.load_state_dict(checkpoint)
39
 
40
  # %%
41
  def predict(input):
42
- waveform, sr = librosa.load(input)
43
- waveform = torch.from_numpy(waveform).unsqueeze(0)
44
- waveform = torchaudio.transforms.Resample(sr, 16_000)(waveform)
45
- inputs = feature_extractor(waveform, sampling_rate=feature_extractor.sampling_rate,
46
- max_length=16000, truncation=True)
47
- tensor = torch.tensor(inputs['input_values'][0])
48
- with torch.no_grad():
49
- output = model(tensor)
50
- logits = output['logits'][0]
51
- label_id = torch.argmax(logits).item()
52
- label_name = id2label[str(label_id)]
53
-
54
- return label_name
 
 
 
 
 
55
  # %%
56
  demo = gr.Interface(
57
  fn=predict,
58
- inputs=gr.Audio(source="microphone", type="filepath", label="Speak to classify your voice!"), # record audio, save in temp file to feed to inference func
59
- outputs="text"
 
 
 
60
  )
61
 
62
  # %%
 
39
 
40
  # %%
41
  def predict(input):
42
+ if input == None:
43
+ return "Please input a valid file or record yourself by clicking the microphone"
44
+ elif input:
45
+ waveform, sr = librosa.load(input)
46
+ waveform = torch.from_numpy(waveform).unsqueeze(0)
47
+ waveform = torchaudio.transforms.Resample(sr, 16_000)(waveform)
48
+ inputs = feature_extractor(waveform, sampling_rate=feature_extractor.sampling_rate,
49
+ max_length=16000, truncation=True)
50
+ tensor = torch.tensor(inputs['input_values'][0])
51
+ with torch.no_grad():
52
+ output = model(tensor)
53
+ logits = output['logits'][0]
54
+ label_id = torch.argmax(logits).item()
55
+ label_name = id2label[str(label_id)]
56
+
57
+ return label_name
58
+ else:
59
+ return "File is not valid"
60
  # %%
61
  demo = gr.Interface(
62
  fn=predict,
63
+ title="Audio Gender Classification",
64
+ description="Record your voice or upload an audio file to see what gender our model classifies it as",
65
+ inputs=gr.Audio(source="microphone", type="filepath", optional=False, label="Speak to classify your voice!"), # record audio, save in temp file to feed to inference func
66
+ outputs="text",
67
+ examples= [["male.mp3"], ["female.mp3"]]
68
  )
69
 
70
  # %%
female.mp3 ADDED
Binary file (61.7 kB). View file
 
male.mp3 ADDED
Binary file (18.7 kB). View file