Files
element-web/src/audio/RecorderWorklet.ts
László Várady 75c2c1a572 Honor advanced audio processing settings when recording voice messages (#9610)
* VoiceRecordings: honor advanced audio processing settings

Audio processing settings introduced in #8759 is now taken into account
when recording a voice message.

Signed-off-by: László Várady <laszlo.varady@protonmail.com>

* VoiceRecordings: add higher-quality audio recording

When recording non-voice audio (e.g. music, FX), a different Opus encoder
application should be specified. It is also recommended to increase the
bitrate to 64-96 kb/s for musical use.

Note: the HQ mode is currently activated when noise suppression is
turned off. This is a very arbitrary condition.

Signed-off-by: László Várady <laszlo.varady@protonmail.com>

* RecorderWorklet: fix type mismatch

src/audio/VoiceRecording.ts:129:67 - Argument of type 'null' is not
assignable to parameter of type 'string | URL'.

Signed-off-by: László Várady <laszlo.varady@protonmail.com>

* VoiceRecording: test audio settings

Signed-off-by: László Várady <laszlo.varady@protonmail.com>

* Fix typos

Signed-off-by: László Várady <laszlo.varady@protonmail.com>

* VoiceRecording: refactor using destructuring assignment

Signed-off-by: László Várady <laszlo.varady@protonmail.com>

* VoiceRecording: add comments about constants and non-trivial conditions

Signed-off-by: László Várady <laszlo.varady@protonmail.com>

Signed-off-by: László Várady <laszlo.varady@protonmail.com>
2022-12-05 11:19:50 -05:00

89 lines
4.1 KiB
TypeScript

/*
Copyright 2021 The Matrix.org Foundation C.I.C.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
import { IAmplitudePayload, ITimingPayload, PayloadEvent, WORKLET_NAME } from "./consts";
import { percentageOf } from "../utils/numbers";
// from AudioWorkletGlobalScope: https://developer.mozilla.org/en-US/docs/Web/API/AudioWorkletGlobalScope
declare const currentTime: number;
// declare const currentFrame: number;
// declare const sampleRate: number;
// We rate limit here to avoid overloading downstream consumers with amplitude information.
// The two major consumers are the voice message waveform thumbnail (resampled down to an
// appropriate length) and the live waveform shown to the user. Effectively, this controls
// the refresh rate of that live waveform and the number of samples the thumbnail has to
// work with.
const TARGET_AMPLITUDE_FREQUENCY = 16; // Hz
function roundTimeToTargetFreq(seconds: number): number {
// Epsilon helps avoid floating point rounding issues (1 + 1 = 1.999999, etc)
return Math.round((seconds + Number.EPSILON) * TARGET_AMPLITUDE_FREQUENCY) / TARGET_AMPLITUDE_FREQUENCY;
}
function nextTimeForTargetFreq(roundedSeconds: number): number {
// The extra round is just to make sure we cut off any floating point issues
return roundTimeToTargetFreq(roundedSeconds + (1 / TARGET_AMPLITUDE_FREQUENCY));
}
class MxVoiceWorklet extends AudioWorkletProcessor {
private nextAmplitudeSecond = 0;
private amplitudeIndex = 0;
process(inputs, outputs, parameters) {
const currentSecond = roundTimeToTargetFreq(currentTime);
// We special case the first ping because there's a fairly good chance that we'll miss the zeroth
// update. Firefox for instance takes 0.06 seconds (roughly) to call this function for the first
// time. Edge and Chrome occasionally lag behind too, but for the most part are on time.
//
// When this doesn't work properly we end up producing a waveform of nulls and no live preview
// of the recorded message.
if (currentSecond === this.nextAmplitudeSecond || this.nextAmplitudeSecond === 0) {
// We're expecting exactly one mono input source, so just grab the very first frame of
// samples for the analysis.
const monoChan = inputs[0][0];
// The amplitude of the frame's samples is effectively the loudness of the frame. This
// translates into a bar which can be rendered as part of the whole recording clip's
// waveform.
//
// We translate the amplitude down to 0-1 for sanity's sake.
const minVal = Math.min(...monoChan);
const maxVal = Math.max(...monoChan);
const amplitude = percentageOf(maxVal, -1, 1) - percentageOf(minVal, -1, 1);
this.port.postMessage(<IAmplitudePayload>{
ev: PayloadEvent.AmplitudeMark,
amplitude: amplitude,
forIndex: this.amplitudeIndex++,
});
this.nextAmplitudeSecond = nextTimeForTargetFreq(currentSecond);
}
// We mostly use this worklet to fire regular clock updates through to components
this.port.postMessage(<ITimingPayload>{ ev: PayloadEvent.Timekeep, timeSeconds: currentTime });
// We're supposed to return false when we're "done" with the audio clip, but seeing as
// we are acting as a passive processor we are never truly "done". The browser will clean
// us up when it is done with us.
return true;
}
}
registerProcessor(WORKLET_NAME, MxVoiceWorklet);
export default ""; // to appease module loaders (we never use the export)