Skip to content

Commit

Permalink
Add video demo, progress bars
Browse files Browse the repository at this point in the history
  • Loading branch information
pie3636 committed Jan 22, 2023
1 parent 77164ef commit 5c71493
Show file tree
Hide file tree
Showing 4 changed files with 83 additions and 38 deletions.
Binary file added Demo.mp4
Binary file not shown.
28 changes: 12 additions & 16 deletions Flask_VT/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def forward(self, images, features):
scaler = load('models/scaler.joblib') # The scaler, transforms formants so that they have a mean of 0 and a variance of 1
regressor = torch.load('models/neural_regressor.pt', map_location=torch.device('cpu')) # The vowel detection model

rule_based = True
rule_based = False

idx2key = ['2', '9', 'a', 'a~', 'e', 'E', 'i', 'O', 'o', 'o~', 'u', 'U~+', 'y'] # All possible vowels
valid = [0, 1, 2, 4, 5, 6, 7, 8, 10, 12] # Vowels we consider here (depends on the classifier)
Expand All @@ -95,29 +95,23 @@ def upload():
word_ends_with_r = data['r_word']
input_file = "input.wav"

# TODO remove
#return jsonify(predicted_vowel='a',
# confidence=0.85,
# feedback=vowel_feedback(des_vowel, 'a'),
# add_feedback=pron_hack(des_vowel, 'a'))

audio = base64.b64decode(audio)

with open(input_file, 'wb') as f:
f.write(audio)

# Remove leading and trailing silences
sound = AudioSegment.from_file(input_file)
trim_leading_silence = lambda x: x[detect_leading_silence(x, silence_threshold=-25):]
trim_leading_silence = lambda x: x[detect_leading_silence(x, silence_threshold=-40):]
trimmed = trim_leading_silence(trim_leading_silence(sound).reverse()).reverse()
trimmed.export(tmp_wav, format='wav', bitrate='768k')

# Generate log-melspectrogram
try:
y, sr = librosa.load(tmp_wav)
except ValueError:
print('The file is too silent to analyze!')
return jsonify(error='The file is too silent to analyze!')
print('The file is too silent to analyze! Try speaking louder.')
return jsonify(error='The file is too silent to analyze! Try speaking louder.')

mels = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128, n_fft=512, hop_length=512)
mels = np.log(mels + 1e-9) # add small number to avoid log(0)
Expand All @@ -139,8 +133,8 @@ def upload():
vowel_start = pred[0].item()
vowel_end = pred[1].item()
if vowel_start >= vowel_end:
print('The model predicted that the vowel has negative duration!')
return jsonify(error='The model predicted that the vowel has negative duration!')
print('The model predicted that the vowel has negative duration! Try again.')
return jsonify(error='The model predicted that the vowel has negative duration! Try again.')

# Trim file at start and end to only have the vowel
sample_rate, wave_data = wavfile.read(tmp_wav)
Expand All @@ -149,6 +143,9 @@ def upload():
end_sample = int(duration * vowel_end * sample_rate)
wavfile.write(tmp_wav_2, sample_rate, wave_data[start_sample:end_sample])
duration = len(wave_data[start_sample:end_sample]) / sample_rate
if duration < 0.01:
print('The model predicted that the vowel is too short! Try speaking louder.')
return jsonify(error='The model predicted that the vowel is too short! Try speaking louder.')

if rule_based:
# Extract formants
Expand All @@ -168,8 +165,8 @@ def upload():
for i in range(4):
formants.append(sum(f_lists[i]) / len(f_lists[i]))
except ZeroDivisionError:
print('The file is too short/empty to analyze!')
return jsonify(error='The file is too short/empty to analyze!')
print('The file is too short/empty to analyze! Try speaking louder.')
return jsonify(error='The file is too short/empty to analyze! Try speaking louder.')

# Add additional features (gender, previous phoneme)
input_features = torch.cat([input_features[0:1], input_features[2:]]).cpu()
Expand Down Expand Up @@ -199,12 +196,11 @@ def upload():
print(f'Prediction: /{final_vowel}/, confidence: {final_confidence:.3f}')

return jsonify(predicted_vowel=final_vowel,
confidence=final_confidence,
confidence=float(final_confidence),
feedback=vowel_feedback(des_vowel, final_vowel),
add_feedback=pron_hack(des_vowel, final_vowel)
)


@app.route('/')
def index():
return render_template("index.html")
Expand Down
49 changes: 29 additions & 20 deletions Flask_VT/static/js/app.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,77 +9,77 @@ let previousPhoneme;
let wordsEndWithR;
let vowel;
let vowel_dict = {
// X-SAMPA, user notation, IPA, words (text, user notation, IPA, previous phoneme, ends with /R/, simple example, more examples)
// X-SAMPA, user notation, IPA, words (text, user notation, IPA, previous phoneme, ends with /R/, simple example, more examples), successes, tries
"a": ["a", "a", [
["la", "l-a", "la", "l", false],
["ma", "m-a", "ma", "m", false],
["pas", "p-a", "pa", "p", false],
["sa", "s-a", "sa", "s", false],
["ta", "t-a", "ta", "t", false],
], "la", "b<b>a</b>s, m<b>â</b>t"],
], "la", "b<b>a</b>s, m<b>â</b>t", 0, 0],
"i": ["i", "i", [
["lit", "l-i", "li", "l", false],
["mis", "m-i", "mi", "m", false],
["pi", "p-i", "pi", "p", false],
["si", "s-i", "si", "s", false],
["t'y", "t-i", "ti", "t", false],
], "lit", "d<b>i</b>re, f<b>i</b>lle"],
], "lit", "d<b>i</b>re, f<b>i</b>lle", 0, 0],
"u": ["ou", "u", [
["loup", "l-ou", "lu", "l", false],
["mou", "m-ou", "mu", "m", false],
["pou", "p-ou", "pu", "p", false],
["sous", "s-ou", "su", "s", false],
["tout", "t-ou", "tu", "t", false],
], "tout", "l<b>ou</b>p, c<b>oû</b>t, igl<b>oo</b>"],
], "tout", "l<b>ou</b>p, c<b>oû</b>t, igl<b>oo</b>", 0, 0],
"E": ["è", "ɛ", [
["l'air'", "l-è-r", "lɛʁ", "l", true],
["mer", "m-è-r", "mɛʁ", "m", true],
["père", "p-è-r", "pɛʁ", "p", true],
["serre", "s-è-r", "sɛʁ", "s", true],
["terre", "t-è-r", "tɛʁ", "t", true],
], "père", "g<b>è</b>le, m<b>e</b>r, b<b>ê</b>te, f<b>ai</b>te"],
], "père", "g<b>è</b>le, m<b>e</b>r, b<b>ê</b>te, f<b>ai</b>te", 0, 0],
"o": ["ô", "o", [
["lot", "l-ô", "lo", "l", false],
["mot", "m-ô", "mo", "m", false],
["pot", "p-ô", "po", "p", false],
["seau", "s-ô", "so", "s", false],
["tôt", "t-ô", "to", "t", false],
], "mot", "t<b>ô</b>t, l<b>o</b>t, f<b>au</b>x, b<b>eau</b>"],
], "mot", "t<b>ô</b>t, l<b>o</b>t, f<b>au</b>x, b<b>eau</b>", 0, 0],
"y": ["u", "y", [
["lu", "l-u", "ly", "l", false],
["mu", "m-u", "my", "m", false],
["pu", "p-u", "py", "p", false],
["su", "s-u", "sy", "s", false],
["tu", "t-u", "ty", "t", false],
], "tu", "v<b>u</b>, r<b>ue</b>"],
], "tu", "v<b>u</b>, r<b>ue</b>", 0, 0],
"O": ["o (open)", "ɔ", [
["lors'", "l-o (open)-r", "lɔʁ", "l", true],
["lors", "l-o (open)-r", "lɔʁ", "l", true],
["mort", "m-o (open)-r", "mɔʁ", "m", true],
["porc", "p-o (open)-r", "pɔʁ", "p", true],
["sort", "s-o (open)-r", "sɔʁ", "s", true],
["tort", "t-o (open)-r", "tɔʁ", "t", true],
], "fort", "s<b>o</b>l, p<b>o</b>rc"],
], "fort", "s<b>o</b>l, p<b>o</b>rc", 0, 0],
"e": ["é", "e", [
["les", "l-é", "le", "l", false],
["mes", "m-é", "me", "m", false],
["pet", "p-é", "pe", "p", false],
["ses", "s-é", "se", "s", false],
["tes", "t-é", "te", "t", false],
], "les", "n<b>é</b>, nou<b>ée</b>, m<b>es</b>"],
], "les", "n<b>é</b>, nou<b>ée</b>, m<b>es</b>", 0, 0],
"2": ["eu", "ø", [
["le", "l-eu", "lø", "l", false],
["me", "m-eu", "mø", "m", false],
["peu", "p-eu", "pø", "p", false],
["se", "s-eu", "sø", "s", false],
["te", "t-eu", "tø", "t", false],
], "me", "c<b>e</b>, p<b>eu</b>, d<b>eu</b>x"],
], "me", "c<b>e</b>, p<b>eu</b>, d<b>eu</b>x", 0, 0],
"9": ["eu (open)", "œ", [
["leur'", "l-eu (open)-r", "lœʁ", "l", true],
["meurt", "m-eu (open)-r", "mœʁ", "m", true],
["peur", "p-eu (open)-r", "pœʁ", "p", true],
["sœur", "s-eu (open)-r", "sœʁ", "s", true],
["-teur", "t-eu (open)-r", "tœʁ", "t", true],
], "peur", "s<b>eu</b>l, n<b>eu</b>f"]
], "peur", "s<b>eu</b>l, n<b>eu</b>f", 0, 0]
};

const _AudioFormat = "audio/wav";
Expand Down Expand Up @@ -202,6 +202,7 @@ function predictionDone(data) {
changeTab('vowel_prediction_good');
$("#vowel-id-2").html(vowel);
$("#score-good").html(+data['confidence'].toFixed(4)*100 + "%");
vowel_dict[predicted_vowel][5] += 1;
} else {
changeTab('vowel_prediction_bad');
$("#vowel-id-3").html(vowel);
Expand All @@ -213,7 +214,7 @@ function predictionDone(data) {
$("#reg2").html(registered_vowel[1]);
$("#reg3").html(registered_vowel[3]);
$("#reg4").html(registered_vowel[4]);
$("#score-bad").html(+data['confidence'].toFixed(4)*100 + "%");
$("#score-bad").html((+data['confidence']*100).toFixed(2) + "%");
$("#feedback").html(data['feedback']);
if ("add_feedback" in data && data["add_feedback"]) {
$("#feedback-2").html(data['add_feedback']);
Expand All @@ -222,14 +223,22 @@ function predictionDone(data) {
$("#feedback-2-div").hide();
}
}
vowel_dict[vowel][6] += 1;
$("#reg5-" + vowel).hide();
$("#reg6-" + vowel).show();
const vowelBar = $("#reg7-" + vowel);
vowelBar.show();
const progress = 100*vowel_dict[vowel][5] / vowel_dict[vowel][6];
vowelBar.css('width', progress+'%').attr('aria-valuenow', progress).html(vowel_dict[vowel][5] + "/" + vowel_dict[vowel][6]);
vowelBar.removeClass().addClass("progress-bar");
if (progress <= 100/3) {
vowelBar.addClass("progress-bar-danger");
} else if (progress <= 2*100/3) {
vowelBar.addClass("progress-bar-warning");
} else {
vowelBar.addClass("progress-bar-success");
}
}

/*
predicted_vowel='a',
confidence=0.85,
feedback="git gud",
add_feedback
*/
}

function replayAudio() { // TODO test
Expand Down
44 changes: 42 additions & 2 deletions Flask_VT/templates/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
<div class="container-fluid" role="main" id="main">
<div class="alert alert-warning">
<span class="glyphicon glyphicon glyphicon-warning-sign" aria-hidden="true"></span>
<b>Warning:</b> This page is a work in progress. Bugs might still be present.
<b>Warning:</b> This page is a work in progress. Bugs might still be present. In addition, our model is not 100% accurate, please exercise caution.
</div>

<div id="welcome" style="display:none">
Expand Down Expand Up @@ -85,6 +85,10 @@ <h1 id="reg1-a"></h1>
<p>/<span id="reg2-a"></span>/</p>
<h3>as in <span style="font-weight: bold" id="reg3-a"></span></h3>
<h5 id="reg4-a"></h5>
<span id="reg5-a">Not attempted yet</span><br/><br/>
<div class="progress" id="reg6-a" style="display: none;">
<div class="progress-bar" id="reg7-a" role="progressbar" style="width: 0%" aria-valuenow="0" aria-valuemin="0" aria-valuemax="100"></div>
</div>
<button class="btn btn-default" id="a-btn">Let's go!</button>
</div>
</div>
Expand All @@ -94,6 +98,10 @@ <h1 id="reg1-i"></h1>
<p>/<span id="reg2-i"></span>/</p>
<h3>as in <span style="font-weight: bold" id="reg3-i"></span></h3>
<h5 id="reg4-i"></h5>
<span id="reg5-i">Not attempted yet</span><br/><br/>
<div class="progress" id="reg6-i" style="display: none;">
<div class="progress-bar" id="reg7-i" role="progressbar" style="width: 0%" aria-valuenow="0" aria-valuemin="0" aria-valuemax="100"></div>
</div>
<button class="btn btn-default" id="i-btn">Let's go!</button>
</div>
</div>
Expand All @@ -103,6 +111,10 @@ <h1 id="reg1-u"></h1>
<p>/<span id="reg2-u"></span>/</p>
<h3>as in <span style="font-weight: bold" id="reg3-u"></span></h3>
<h5 id="reg4-u"></h5>
<span id="reg5-u">Not attempted yet</span><br/><br/>
<div class="progress" id="reg6-u" style="display: none;">
<div class="progress-bar" id="reg7-u" role="progressbar" style="width: 0%" aria-valuenow="0" aria-valuemin="0" aria-valuemax="100"></div>
</div>
<button class="btn btn-default" id="u-btn">Let's go!</button>
</div>
</div>
Expand All @@ -112,6 +124,10 @@ <h1 id="reg1-E"></h1>
<p>/<span id="reg2-E"></span>/</p>
<h3>as in <span style="font-weight: bold" id="reg3-E"></span></h3>
<h5 id="reg4-E"></h5>
<span id="reg5-E">Not attempted yet</span><br/><br/>
<div class="progress" id="reg6-E" style="display: none;">
<div class="progress-bar" id="reg7-E" role="progressbar" style="width: 0%" aria-valuenow="0" aria-valuemin="0" aria-valuemax="100"></div>
</div>
<button class="btn btn-default" id="E-btn">Let's go!</button>
</div>
</div>
Expand All @@ -121,6 +137,10 @@ <h1 id="reg1-o"></h1>
<p>/<span id="reg2-o"></span>/</p>
<h3>as in <span style="font-weight: bold" id="reg3-o"></span></h3>
<h5 id="reg4-o"></h5>
<span id="reg5-o">Not attempted yet</span><br/><br/>
<div class="progress" id="reg6-o" style="display: none;">
<div class="progress-bar" id="reg7-o" role="progressbar" style="width: 0%" aria-valuenow="0" aria-valuemin="0" aria-valuemax="100"></div>
</div>
<button class="btn btn-default" id="o-btn">Let's go!</button>
</div>
</div>
Expand All @@ -132,6 +152,10 @@ <h1 id="reg1-y"></h1>
<p>/<span id="reg2-y"></span>/</p>
<h3>as in <span style="font-weight: bold" id="reg3-y"></span></h3>
<h5 id="reg4-y"></h5>
<span id="reg5-y">Not attempted yet</span><br/><br/>
<div class="progress" id="reg6-y" style="display: none;">
<div class="progress-bar" id="reg7-y" role="progressbar" style="width: 0%" aria-valuenow="0" aria-valuemin="0" aria-valuemax="100"></div>
</div>
<button class="btn btn-default" id="y-btn">Let's go!</button>
</div>
</div>
Expand All @@ -141,6 +165,10 @@ <h1 id="reg1-O"></h1>
<p>/<span id="reg2-O"></span>/</p>
<h3>as in <span style="font-weight: bold" id="reg3-O"></span></h3>
<h5 id="reg4-O"></h5>
<span id="reg5-O">Not attempted yet</span><br/><br/>
<div class="progress" id="reg6-O" style="display: none;">
<div class="progress-bar" id="reg7-O" role="progressbar" style="width: 0%" aria-valuenow="0" aria-valuemin="0" aria-valuemax="100"></div>
</div>
<button class="btn btn-default" id="O-btn">Let's go!</button>
</div>
</div>
Expand All @@ -150,6 +178,10 @@ <h1 id="reg1-e"></h1>
<p>/<span id="reg2-e"></span>/</p>
<h3>as in <span style="font-weight: bold" id="reg3-e"></span></h3>
<h5 id="reg4-e"></h5>
<span id="reg5-e">Not attempted yet</span><br/><br/>
<div class="progress" id="reg6-e" style="display: none;">
<div class="progress-bar" id="reg7-e" role="progressbar" style="width: 0%" aria-valuenow="0" aria-valuemin="0" aria-valuemax="100"></div>
</div>
<button class="btn btn-default" id="e-btn">Let's go!</button>
</div>
</div>
Expand All @@ -159,6 +191,10 @@ <h1 id="reg1-2"></h1>
<p>/<span id="reg2-2"></span>/</p>
<h3>as in <span style="font-weight: bold" id="reg3-2"></span></h3>
<h5 id="reg4-2"></h5>
<span id="reg5-2">Not attempted yet</span><br/><br/>
<div class="progress" id="reg6-2" style="display: none;">
<div class="progress-bar" id="reg7-2" role="progressbar" style="width: 0%" aria-valuenow="0" aria-valuemin="0" aria-valuemax="100"></div>
</div>
<button class="btn btn-default" id="2-btn">Let's go!</button>
</div>
</div>
Expand All @@ -168,6 +204,10 @@ <h1 id="reg1-9"></h1>
<p>/<span id="reg2-9"></span>/</p>
<h3>as in <span style="font-weight: bold" id="reg3-9"></span></h3>
<h5 id="reg4-9"></h5>
<span id="reg5-9">Not attempted yet</span><br/><br/>
<div class="progress" id="reg6-9" style="display: none;">
<div class="progress-bar" id="reg7-9" role="progressbar" style="width: 0%" aria-valuenow="0" aria-valuemin="0" aria-valuemax="100"></div>
</div>
<button class="btn btn-default" id="9-btn">Let's go!</button>
</div>
</div>
Expand Down Expand Up @@ -198,7 +238,7 @@ <h4><span id="processing" style="display: none;">Processing...</span></h4>
</div>

<div id="vowel_prediction_err" style="display:none">
<div class="alert alert-warning" id="error-banner" style="display: None;">
<div class="alert alert-danger" id="error-banner">
<span class="glyphicon glyphicon glyphicon-remove" aria-hidden="true"></span>
<b>Error:</b> <span id="error-message"></span>
<div class="row">
Expand Down

0 comments on commit 5c71493

Please sign in to comment.