'use strict';
var Transform = require('stream').Transform;
var util = require('util');
var TARGET_SAMPLE_RATE = 16000;
/**
* Transforms Buffers or AudioBuffers into a binary stream of l16 (raw wav) audio, downsampling in the process.
*
* The watson speech-to-text service works on 1600khz and internally downsamples audio received at higher samplerates.
* WebAudio is usually 48000khz, so downsampling here reduces bandwidth usage by 2/3.
*
* Format event + stream can be combined with https://www.npmjs.com/package/wav to generate a wav file with a proper header
*
* Todo: support multi-channel audio (for use with <audio>/<video> elements) - will require interleaving audio channels
*
* @constructor
*/
function WebAudioL16Stream(opts) {
opts = this.opts = util._extend({
sourceSampleRate: 48000,
writableObjectMode: true,
downsample: true
}, opts);
Transform.call(this, opts);
this.bufferUnusedSamples = [];
if (opts.writableObjectMode) {
this.formatEmitted = false;
this._transform = this.handleFirstAudioBuffer;
} else {
this._transform = this.transformBuffer;
process.nextTick(this.emitFormat.bind(this));
}
}
util.inherits(WebAudioL16Stream, Transform);
WebAudioL16Stream.prototype.emitFormat = function emitFormat() {
this.formatEmitted = true;
this.emit('format', {
channels: 1,
bitDepth: 16,
sampleRate: this.opts.downsample ? TARGET_SAMPLE_RATE : this.opts.sourceSampleRate,
signed: true,
float: false
});
};
/**
* Downsamples WebAudio to 16 kHz.
*
* Browsers can downsample WebAudio natively with OfflineAudioContext's but it was designed for non-streaming use and
* requires a new context for each AudioBuffer. Firefox can handle this, but chrome (v47) crashes after a few minutes.
* So, we'll do it in JS for now.
*
* This really belongs in it's own stream, but there's no way to create new AudioBuffer instances from JS, so its
* fairly coupled to the wav conversion code.
*
* @param {AudioBuffer} buffer Microphone/MediaElement audio chunk
* @return {Float32Array} 'audio/l16' chunk
*/
WebAudioL16Stream.prototype.downsample = function downsample(bufferNewSamples) {
var buffer = null,
newSamples = bufferNewSamples.length,
unusedSamples = this.bufferUnusedSamples.length;
if (unusedSamples > 0) {
buffer = new Float32Array(unusedSamples + newSamples);
for (var i = 0; i < unusedSamples; ++i) {
buffer[i] = this.bufferUnusedSamples[i];
}
for (i = 0; i < newSamples; ++i) {
buffer[unusedSamples + i] = bufferNewSamples[i];
}
} else {
buffer = bufferNewSamples;
}
// downsampling variables
var filter = [
-0.037935, -0.00089024, 0.040173, 0.019989, 0.0047792, -0.058675, -0.056487,
-0.0040653, 0.14527, 0.26927, 0.33913, 0.26927, 0.14527, -0.0040653, -0.056487,
-0.058675, 0.0047792, 0.019989, 0.040173, -0.00089024, -0.037935
],
samplingRateRatio = this.opts.sourceSampleRate / TARGET_SAMPLE_RATE,
nOutputSamples = Math.floor((buffer.length - filter.length) / (samplingRateRatio)) + 1,
outputBuffer = new Float32Array(nOutputSamples);
for (var offset, i2 = 0; i2 + filter.length - 1 < buffer.length; i2++) {
offset = Math.round(samplingRateRatio * i2);
var sample = 0;
for (var j = 0; j < filter.length; ++j) {
sample += buffer[offset + j] * filter[j];
}
outputBuffer[i2] = sample;
}
var indexSampleAfterLastUsed = Math.round(samplingRateRatio * i2);
var remaining = buffer.length - indexSampleAfterLastUsed;
if (remaining > 0) {
this.bufferUnusedSamples = new Float32Array(remaining);
for (i = 0; i < remaining; ++i) {
this.bufferUnusedSamples[i] = buffer[indexSampleAfterLastUsed + i];
}
} else {
this.bufferUnusedSamples = new Float32Array(0);
}
return outputBuffer
};
/**
* Accepts a Float32Array of audio data and converts it to a Buffer of l16 audio data (raw wav)
*
* Explanation for the math: The raw values captured from the Web Audio API are
* in 32-bit Floating Point, between -1 and 1 (per the specification).
* The values for 16-bit PCM range between -32768 and +32767 (16-bit signed integer).
* Filter & combine samples to reduce frequency, then multiply to by 0x7FFF (32767) to convert.
* Store in little endian.
*
* @param input
* @returns {Buffer}
*/
WebAudioL16Stream.prototype.floatTo16BitPCM = function(input){
var output = new DataView(new ArrayBuffer(input.length*2)); // length is in bytes (8-bit), so *2 to get 16-bit length
for (var i = 0; i < input.length; i++){
var multiplier = input[i] < 0 ? 0x8000 : 0x7FFF; // 16-bit signed range is -32768 to 32767
output.setInt16(i*2, (input[i] * multiplier)|0, true); // index, value, little edian
}
return new Buffer(output.buffer);
};
/**
* Does some one-time setup to grab sampleRate and emit format, then sets _transform to the actual audio buffer handler and calls it.
* @param audioBuffer
* @param encoding
* @param next
*/
WebAudioL16Stream.prototype.handleFirstAudioBuffer = function handleFirstAudioBuffer(audioBuffer, encoding, next) {
this.opts.sourceSampleRate = audioBuffer.sampleRate;
if (!this.formatEmitted) {
this.emitFormat();
}
this._transform = this.transformAudioBuffer;
this._transform(audioBuffer, encoding, next);
};
/**
* Accepts an AudioBuffer (for objectMode), then downsamples to 16000 and converts to a 16-bit pcm
*
* @param audioBuffer
* @param encoding
* @param next
*/
WebAudioL16Stream.prototype.transformAudioBuffer = function (audioBuffer, encoding, next) {
var source = audioBuffer.getChannelData(0);
if (this.opts.downsample) {
source = this.downsample(source);
}
this.push(this.floatTo16BitPCM(source));
next();
};
/**
* Accepts a Buffer (for binary mode), then downsamples to 16000 and converts to a 16-bit pcm
*
* @param audioBuffer
* @param encoding
* @param next
*/
WebAudioL16Stream.prototype.transformBuffer = function (nodebuffer, encoding, next) {
var source = new Float32Array(nodebuffer.buffer);
if (this.opts.downsample) {
source = this.downsample(source);
}
this.push(this.floatTo16BitPCM(source));
next();
};
//new Float32Array(nodebuffer.buffer)
module.exports = WebAudioL16Stream;