Add voice activity detection

This commit is contained in:
Jonas Herzig 2017-09-20 15:16:49 +02:00
parent c49dabbfc4
commit 80f766379d
6 changed files with 156 additions and 24 deletions

View file

@ -11,7 +11,7 @@ Instead Websockets are used for all communications.
libopus, libcelt (0.7.1) and libsamplerate, compiled to JS via emscripten, are used for audio decoding.
Therefore, at the moment only the Opus and CELT Alpha codecs are supported.
Quite a few features, most noticeably voice activity detection and all
Quite a few features, most noticeably all
administrative functionallity, are still missing.
### Installing

View file

@ -67,11 +67,23 @@
<td>
<select data-bind='value: voiceMode'>
<option value="cont">Continuous</option>
<option value="vad" disabled>Voice Activity</option>
<option value="vad">Voice Activity</option>
<option value="ptt">Push To Talk</option>
</td>
</tr>
<tr data-bind="style: {visibility: voiceMode() == 'ptt' ? 'visible' : 'hidden'}">
<tr data-bind="visible: voiceMode() == 'vad'">
<td colspan="2">
<div class="mic-volume-container">
<div class="mic-volume" data-bind="style: {
width: testVadLevel()*100 + '%',
background: testVadActive() ? 'green' : 'red'
}"></div>
</div>
<input type="range" min="0" max="1" step="0.01"
data-bind="value: vadLevel">
</td>
</tr>
<tr data-bind="visible: voiceMode() == 'ptt'">
<td>PTT Key</td>
<td>
<input type="button" data-bind="value: pttKeyDisplay, click: recordPttKey">

View file

@ -9,7 +9,7 @@ import ko from 'knockout'
import _dompurify from 'dompurify'
import keyboardjs from 'keyboardjs'
import { ContinuousVoiceHandler, PushToTalkVoiceHandler, initVoice } from './voice'
import { ContinuousVoiceHandler, PushToTalkVoiceHandler, VADVoiceHandler, initVoice } from './voice'
const dompurify = _dompurify(window)
@ -58,11 +58,34 @@ class SettingsDialog {
this.voiceMode = ko.observable(settings.voiceMode)
this.pttKey = ko.observable(settings.pttKey)
this.pttKeyDisplay = ko.observable(settings.pttKey)
this.vadLevel = ko.observable(settings.vadLevel)
this.testVadLevel = ko.observable(0)
this.testVadActive = ko.observable(false)
this._setupTestVad()
this.vadLevel.subscribe(() => this._setupTestVad())
}
_setupTestVad () {
if (this._testVad) {
this._testVad.end()
}
this._testVad = new VADVoiceHandler(null, this.vadLevel())
this._testVad.on('started_talking', () => this.testVadActive(true))
.on('stopped_talking', () => this.testVadActive(false))
.on('level', level => this.testVadLevel(level))
testVoiceHandler = this._testVad
}
applyTo (settings) {
settings.voiceMode = this.voiceMode()
settings.pttKey = this.pttKey()
settings.vadLevel = this.vadLevel()
}
end () {
this._testVad.end()
testVoiceHandler = null
}
recordPttKey () {
@ -89,14 +112,16 @@ class SettingsDialog {
class Settings {
constructor () {
const load = key => window.localStorage.getItem('mumble.' + key)
this.voiceMode = load('voiceMode') || 'cont'
this.voiceMode = load('voiceMode') || 'vad'
this.pttKey = load('pttKey') || 'ctrl + shift'
this.vadLevel = load('vadLevel') || 0.3
}
save () {
const save = (key, val) => window.localStorage.setItem('mumble.' + key, val)
save('voiceMode', this.voiceMode)
save('pttKey', this.pttKey)
save('vadLevel', this.vadLevel)
}
}
@ -130,10 +155,13 @@ class GlobalBindings {
this._updateVoiceHandler()
this.settings.save()
this.settingsDialog(null)
this.closeSettings()
}
this.closeSettings = () => {
if (this.settingsDialog()) {
this.settingsDialog().end()
}
this.settingsDialog(null)
}
@ -360,7 +388,7 @@ class GlobalBindings {
} else if (mode === 'ptt') {
voiceHandler = new PushToTalkVoiceHandler(this.client, this.settings.pttKey)
} else if (mode === 'vad') {
voiceHandler = new VADVoiceHandler(this.client, this.settings.vadLevel)
} else {
log('Unknown voice mode:', mode)
return
@ -586,15 +614,19 @@ function userToState () {
}
var voiceHandler
var testVoiceHandler
initVoice(data => {
if (testVoiceHandler) {
testVoiceHandler.write(data)
}
if (!ui.client) {
if (voiceHandler) {
voiceHandler.end()
}
voiceHandler = null
} else if (voiceHandler) {
voiceHandler.write(new Float32Array(data.buffer, data.byteOffset, data.byteLength / 4))
voiceHandler.write(data)
}
}, err => {
log('Cannot initialize user media. Microphone will not work:', err)

View file

@ -1,10 +1,12 @@
import { Writable } from 'stream'
import { Writable, Transform } from 'stream'
import MicrophoneStream from 'microphone-stream'
import audioContext from 'audio-context'
import chunker from 'stream-chunker'
import Resampler from 'libsamplerate.js'
import getUserMedia from 'getusermedia'
import keyboardjs from 'keyboardjs'
import vad from 'voice-activity-detection'
import DropStream from 'drop-stream'
class VoiceHandler extends Writable {
constructor (client) {
@ -15,7 +17,29 @@ class VoiceHandler extends Writable {
_getOrCreateOutbound () {
if (!this._outbound) {
this._outbound = this._client.createVoiceStream()
if (!this._client) {
this._outbound = DropStream.obj()
this.emit('started_talking')
return this._outbound
}
this._outbound = new Resampler({
unsafe: true,
type: Resampler.Type.SINC_FASTEST,
ratio: 48000 / audioContext.sampleRate
})
const buffer2Float32Array = new Transform({
transform (data, _, callback) {
callback(null, new Float32Array(data.buffer, data.byteOffset, data.byteLength / 4))
},
readableObjectMode: true
})
this._outbound
.pipe(chunker(4 * 480))
.pipe(buffer2Float32Array)
.pipe(this._client.createVoiceStream())
this.emit('started_talking')
}
return this._outbound
@ -74,24 +98,76 @@ export class PushToTalkVoiceHandler extends VoiceHandler {
}
}
export class VADVoiceHandler extends VoiceHandler {
constructor (client, level) {
super(client)
const self = this
this._vad = vad(audioContext, theUserMedia, {
onVoiceStart () {
console.log('vad: start')
self._active = true
},
onVoiceStop () {
console.log('vad: stop')
self._stopOutbound()
self._active = false
},
onUpdate (val) {
self._level = val
self.emit('level', val)
},
noiseCaptureDuration: 0,
minNoiseLevel: level,
maxNoiseLevel: level
})
// Need to keep a backlog of the last ~150ms (dependent on sample rate)
// because VAD will activate with ~125ms delay
this._backlog = []
this._backlogLength = 0
this._backlogLengthMin = 1024 * 6 * 4 // vadBufferLen * (vadDelay + 1) * bytesPerSample
}
_write (data, _, callback) {
if (this._active) {
if (this._backlog.length > 0) {
for (let oldData of this._backlog) {
this._getOrCreateOutbound().write(oldData)
}
this._backlog = []
this._backlogLength = 0
}
this._getOrCreateOutbound().write(data, callback)
} else {
// Make sure we always keep the backlog filled if we're not (yet) talking
this._backlog.push(data)
this._backlogLength += data.length
// Check if we can discard the oldest element without becoming too short
if (this._backlogLength - this._backlog[0].length > this._backlogLengthMin) {
this._backlogLength -= this._backlog.shift().length
}
callback()
}
}
_final (callback) {
super._final(e => {
this._vad.destroy()
callback(e)
})
}
}
var theUserMedia = null
export function initVoice (onData, onUserMediaError) {
var resampler = new Resampler({
unsafe: true,
type: Resampler.Type.SINC_FASTEST,
ratio: 48000 / audioContext.sampleRate
})
resampler.pipe(chunker(4 * 480)).on('data', data => {
onData(data)
})
getUserMedia({ audio: true }, (err, userMedia) => {
if (err) {
onUserMediaError(err)
} else {
var micStream = new MicrophoneStream(userMedia, { objectMode: true })
theUserMedia = userMedia
var micStream = new MicrophoneStream(userMedia, { objectMode: true, bufferSize: 1024 })
micStream.on('data', data => {
resampler.write(Buffer.from(data.getChannelData(0).buffer))
onData(Buffer.from(data.getChannelData(0).buffer))
})
}
})

View file

@ -25,6 +25,7 @@
"brfs": "^1.4.3",
"css-loader": "^0.26.0",
"dompurify": "^0.8.9",
"drop-stream": "^1.0.0",
"duplex-maker": "^1.0.0",
"extract-loader": "^0.1.0",
"file-loader": "^0.9.0",
@ -39,10 +40,11 @@
"regexp-replace-loader": "0.0.1",
"stream-chunker": "^1.2.8",
"transform-loader": "^0.2.3",
"voice-activity-detection": "johni0702/voice-activity-detection#9f8bd90",
"webpack": "^1.13.3",
"webworkify-webpack-dropin": "^1.1.9",
"libsamplerate.js": "^1.0.0",
"mumble-client-codecs-browser": "^1.1.0",
"mumble-client-codecs-browser": "^1.1.1",
"mumble-client-websocket": "^1.0.0",
"mumble-client": "^1.1.0",
"web-audio-buffer-queue": "^1.0.0"

View file

@ -208,8 +208,10 @@ form {
border-bottom: 1px solid darkgray;
}
.dialog-footer {
position: absolute;
bottom: 0px;
width: calc(100% - 20px);
margin: 10px;
margin-bottom: 0px;
}
.dialog-close {
float: left;
@ -255,6 +257,14 @@ form {
}
.settings-dialog table input {
width: 100%;
margin: 0px;
}
.settings-dialog .mic-volume-container {
height: 10px;
border: 3px solid black;
}
.settings-dialog .mic-volume {
height: 100%;
}
.connect-dialog {
width: 300px;