(question rewritten integrating bits of information from answers, plus making it more concise.)
I use analyser=audioContext.createAnalyser()
in order to process audio data, and I'm trying to understand the details better.
I choose an fftSize
, say 2048, then I create an array buffer
of 2048 floats with Float32Array, and then, in an animation loop
(called 60 times per second on most machines, via window.requestAnimationFrame
), I do
analyser.getFloatTimeDomainData(buffer);
which will fill my buffer with 2048 floating point sample data points.
When the handler is called the next time, 1/60 second has passed. To calculate how much that is in units of samples, we have to divide it by the duration of 1 sample, and get (1/60)/(1/44100) = 735. So the next handler call takes place (on average) 735 samples later.
So there is overlap between subsequent buffers, like this:
We know from the spec (search for 'render quantum') that everything happens in "chunck sizes" which are multiples of 128. So (in terms of audio processing), one would expect that the next handler call will usually be either 5*128 = 640 samples later, or else 6*128 = 768 samples later - those being the multiples of 128 closest to 735 samples = (1/60) second.
Calling this amount "Δ-samples", how do I find out what it is (during each handler call), 640 or 768 or something else?
Reliably, like this:
Consider the 'old buffer' (from previous handler call). If you delete "Δ-samples" many samples at the beginning, copy the remainder, and then append "Δ-samples" many new samples, that should be the current buffer. And indeed, I tried that, and that is the case. It turns out "Δ-samples" often is 384, 512, 896. It is trivial but time consuming to determine "Δ-samples" in a loop.
I would like to compute "Δ-samples" without performing that loop.
One would think the following would work:
(audioContext.currentTime()
- (result of audioContext.currentTime()
during last time handler ran))/(duration of 1 sample)
I tried that (see code below where I also "stich together" the various buffers, trying to reconstruct the original buffer), and - surprise - it works about 99.9% of the time in Chrome, and about 95% of the time in Firefox.
I also tried audioContent.getOutputTimestamp().contextTime
, which does not work in Chrome, and works 9?% in Firefox.
Is there any way to find "Δ-samples" (without looking at the buffers), which works reliably?
Second question, the "reconstructed" buffer (all the buffers from callbacks stitched together), and the original sound buffer are not exactly the same, there is some (small, but noticable, more than usual "rounding error") difference, and that is bigger in Firefox.
Where does that come from? - You know, as I understand the spec, those should be the same.
var soundFile = 'https://mathheadinclouds.github.io/audio/sounds/la.mp3';
var audioContext = null;
var isPlaying = false;
var sourceNode = null;
var analyser = null;
var theBuffer = null;
var reconstructedBuffer = null;
var soundRequest = null;
var loopCounter = -1;
var FFT_SIZE = 2048;
var rafID = null;
var buffers = [];
var timesSamples = [];
var timeSampleDiffs = [];
var leadingWaste = 0;
window.addEventListener('load', function() {
soundRequest = new XMLHttpRequest();
soundRequest.open("GET", soundFile, true);
soundRequest.responseType = "arraybuffer";
//soundRequest.onload = function(evt) {}
soundRequest.send();
var btn = document.createElement('button');
btn.textContent = 'go';
btn.addEventListener('click', function(evt) {
goButtonClick(this, evt)
});
document.body.appendChild(btn);
});
function goButtonClick(elt, evt) {
initAudioContext(togglePlayback);
elt.parentElement.removeChild(elt);
}
function initAudioContext(callback) {
audioContext = new AudioContext();
audioContext.decodeAudioData(soundRequest.response, function(buffer) {
theBuffer = buffer;
callback();
});
}
function createAnalyser() {
analyser = audioContext.createAnalyser();
analyser.fftSize = FFT_SIZE;
}
function startWithSourceNode() {
sourceNode.connect(analyser);
analyser.connect(audioContext.destination);
sourceNode.start(0);
isPlaying = true;
sourceNode.addEventListener('ended', function(evt) {
sourceNode = null;
analyser = null;
isPlaying = false;
loopCounter = -1;
window.cancelAnimationFrame(rafID);
console.log('buffer length', theBuffer.length);
console.log('reconstructedBuffer length', reconstructedBuffer.length);
console.log('audio callback called counter', buffers.length);
console.log('root mean square error', Math.sqrt(checkResult() / theBuffer.length));
console.log('lengths of time between requestAnimationFrame callbacks, measured in audio samples:');
console.log(timeSampleDiffs);
console.log(
timeSampleDiffs.filter(function(val) {
return val === 384
}).length,
timeSampleDiffs.filter(function(val) {
return val === 512
}).length,
timeSampleDiffs.filter(function(val) {
return val === 640
}).length,
timeSampleDiffs.filter(function(val) {
return val === 768
}).length,
timeSampleDiffs.filter(function(val) {
return val === 896
}).length,
'*',
timeSampleDiffs.filter(function(val) {
return val > 896
}).length,
timeSampleDiffs.filter(function(val) {
return val < 384
}).length
);
console.log(
timeSampleDiffs.filter(function(val) {
return val === 384
}).length +
timeSampleDiffs.filter(function(val) {
return val === 512
}).length +
timeSampleDiffs.filter(function(val) {
return val === 640
}).length +
timeSampleDiffs.filter(function(val) {
return val === 768
}).length +
timeSampleDiffs.filter(function(val) {
return val === 896
}).length
)
});
myAudioCallback();
}
function togglePlayback() {
sourceNode = audioContext.createBufferSource();
sourceNode.buffer = theBuffer;
createAnalyser();
startWithSourceNode();
}
function myAudioCallback(time) {
++loopCounter;
if (!buffers[loopCounter]) {
buffers[loopCounter] = new Float32Array(FFT_SIZE);
}
var buf = buffers[loopCounter];
analyser.getFloatTimeDomainData(buf);
var now = audioContext.currentTime;
var nowSamp = Math.round(audioContext.sampleRate * now);
timesSamples[loopCounter] = nowSamp;
var j, sampDiff;
if (loopCounter === 0) {
console.log('start sample: ', nowSamp);
reconstructedBuffer = new Float32Array(theBuffer.length + FFT_SIZE + nowSamp);
leadingWaste = nowSamp;
for (j = 0; j < FFT_SIZE; j++) {
reconstructedBuffer[nowSamp + j] = buf[j];
}
} else {
sampDiff = nowSamp - timesSamples[loopCounter - 1];
timeSampleDiffs.push(sampDiff);
var expectedEqual = FFT_SIZE - sampDiff;
for (j = 0; j < expectedEqual; j++) {
if (reconstructedBuffer[nowSamp + j] !== buf[j]) {
console.error('unexpected error', loopCounter, j);
// debugger;
}
}
for (j = expectedEqual; j < FFT_SIZE; j++) {
reconstructedBuffer[nowSamp + j] = buf[j];
}
//console.log(loopCounter, nowSamp, sampDiff);
}
rafID = window.requestAnimationFrame(myAudioCallback);
}
function checkResult() {
var ch0 = theBuffer.getChannelData(0);
var ch1 = theBuffer.getChannelData(1);
var sum = 0;
var idxDelta = leadingWaste + FFT_SIZE;
for (var i = 0; i < theBuffer.length; i++) {
var samp0 = ch0[i];
var samp1 = ch1[i];
var samp = (samp0 + samp1) / 2;
var check = reconstructedBuffer[i + idxDelta];
var diff = samp - check;
var sqDiff = diff * diff;
sum += sqDiff;
}
return sum;
}
In above snippet, I do the following. I load with XMLHttpRequest
a 1 second mp3 audio file from my github.io page (I sing 'la' for 1 second). After it has loaded, a button is shown, saying 'go', and after pressing that, the audio is played back by putting it into a bufferSource node and then doing .start
on that. the bufferSource is the fed to our analyser, et cetera
I also have the snippet code on my github.io page - makes reading the console easier.