Include waveform when sending voice messages (#1650)

- New `AudioLevelCalculator` that outputs dB0v rescaled to the [0;1] range.
- `VoiceRecorder` now stores the audio levels sampled while recording, then resamples them to 100 samples to use as waveform preview.
- Waveform data is carried all the way as a `List<Float>` and converted to `List<Int>` in the [0;1024] range as per matrix spec only before sending it.
This commit is contained in:
Marco Romano
2023-10-26 17:37:24 +02:00
committed by GitHub
parent 517b422992
commit 4a390296cf
11 changed files with 199 additions and 70 deletions

View File

@@ -148,6 +148,7 @@ class VoiceMessageComposerPresenter @Inject constructor(
appCoroutineScope.sendMessage(
file = finishedState.file,
mimeType = finishedState.mimeType,
waveform = finishedState.waveform,
).invokeOnCompletion {
isSending = false
}
@@ -207,12 +208,14 @@ class VoiceMessageComposerPresenter @Inject constructor(
}
private fun CoroutineScope.sendMessage(
file: File, mimeType: String,
file: File,
mimeType: String,
waveform: List<Float>
) = launch {
val result = mediaSender.sendVoiceMessage(
uri = file.toUri(),
mimeType = mimeType,
waveForm = emptyList(), // TODO generate waveform
waveForm = waveform.toMSC3246range(),
)
if (result.isFailure) {
@@ -223,3 +226,8 @@ class VoiceMessageComposerPresenter @Inject constructor(
voiceRecorder.deleteRecording()
}
}
/**
* Resizes the given [0;1] float list to [0;1024] int list as per unstable MSC3246 spec.
*/
private fun List<Float>.toMSC3246range(): List<Int> = map { (it * 1024).toInt() }

View File

@@ -38,9 +38,11 @@ sealed class VoiceRecorderState {
*
* @property file The recorded file.
* @property mimeType The mime type of the file.
* @property waveform The waveform of the recording.
*/
data class Finished(
val file: File,
val mimeType: String,
val waveform: List<Float>,
) : VoiceRecorderState()
}

View File

@@ -30,6 +30,7 @@ import io.element.android.libraries.voicerecorder.impl.audio.AudioConfig
import io.element.android.libraries.voicerecorder.impl.audio.AudioLevelCalculator
import io.element.android.libraries.voicerecorder.impl.audio.AudioReader
import io.element.android.libraries.voicerecorder.impl.audio.Encoder
import io.element.android.libraries.voicerecorder.impl.audio.resample
import io.element.android.libraries.voicerecorder.impl.file.VoiceFileConfig
import io.element.android.libraries.voicerecorder.impl.file.VoiceFileManager
import kotlinx.coroutines.CoroutineScope
@@ -65,6 +66,7 @@ class VoiceRecorderImpl @Inject constructor(
private var outputFile: File? = null
private var audioReader: AudioReader? = null
private var recordingJob: Job? = null
private val levels: MutableList<Float> = mutableListOf()
private val _state = MutableStateFlow<VoiceRecorderState>(VoiceRecorderState.Idle)
override val state: StateFlow<VoiceRecorderState> = _state
@@ -74,6 +76,7 @@ class VoiceRecorderImpl @Inject constructor(
Timber.i("Voice recorder started recording")
outputFile = fileManager.createFile()
.also(encoder::init)
levels.clear()
val audioRecorder = audioReaderFactory.create(config, dispatchers).also { audioReader = it }
@@ -94,6 +97,7 @@ class VoiceRecorderImpl @Inject constructor(
is Audio.Data -> {
val audioLevel = audioLevelCalculator.calculateAudioLevel(audio.buffer)
_state.emit(VoiceRecorderState.Recording(elapsedTime, audioLevel))
levels.add(audioLevel)
encoder.encode(audio.buffer, audio.readSize)
}
is Audio.Error -> {
@@ -124,12 +128,17 @@ class VoiceRecorderImpl @Inject constructor(
if (cancelled) {
deleteRecording()
levels.clear()
}
_state.emit(
when (val file = outputFile) {
null -> VoiceRecorderState.Idle
else -> VoiceRecorderState.Finished(file, fileConfig.mimeType)
else -> VoiceRecorderState.Finished(
file = file,
mimeType = fileConfig.mimeType,
waveform = levels.resample(100),
)
}
)
}

View File

@@ -20,9 +20,8 @@ interface AudioLevelCalculator {
/**
* Calculate the audio level of the audio buffer.
*
* @param buffer The audio buffer containing raw audio data.
*
* @return A value between 0 and 1.
* @param buffer The audio buffer containing 16bit PCM audio data.
* @return A float value between 0 and 1 proportional to the audio level.
*/
fun calculateAudioLevel(buffer: ShortArray): Float
}

View File

@@ -0,0 +1,61 @@
/*
* Copyright (c) 2023 New Vector Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.element.android.libraries.voicerecorder.impl.audio
import com.squareup.anvil.annotations.ContributesBinding
import io.element.android.libraries.di.RoomScope
import javax.inject.Inject
import kotlin.math.log10
import kotlin.math.sqrt
/**
* Default implementation of [AudioLevelCalculator].
*
* It computes the normalized [0;1] dBov value of the given PCM16 encoded [ShortArray].
* See: https://en.wikipedia.org/wiki/DBFS
*/
@ContributesBinding(RoomScope::class)
class DBovAudioLevelCalculator @Inject constructor() : AudioLevelCalculator {
override fun calculateAudioLevel(buffer: ShortArray): Float {
return buffer.rms().dBov().normalize().coerceIn(0f, 1f)
}
}
/**
* Computes the normalized (range 0.0 to 1.0) root mean square
* value of the given PCM16 encoded [ShortArray].
*/
private fun ShortArray.rms(): Float {
val floats = FloatArray(this.size) { i -> this[i] / Short.MAX_VALUE.toFloat() }
val squared = FloatArray(this.size) { i -> floats[i] * floats[i] }
val sum = squared.fold(0.0f) { acc, f -> acc + f }
val average = sum / this.size
return sqrt(average)
}
/**
* Converts the given RMS value to decibels relative to overload (dBov).
* It has range [-96.0, 0.0] where 0.0 is the value of a full scale square wave.
*/
private fun Float.dBov(): Float = 20 * log10(this)
/**
* Normalizes the given dBov value to the range [0.0, 1.0].
*/
private fun Float.normalize(): Float = (this + DYNAMIC_RANGE_PCM16) / DYNAMIC_RANGE_PCM16
private const val DYNAMIC_RANGE_PCM16: Float = 96.0f

View File

@@ -1,49 +0,0 @@
/*
* Copyright (c) 2023 New Vector Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.element.android.libraries.voicerecorder.impl.audio
import com.squareup.anvil.annotations.ContributesBinding
import io.element.android.libraries.di.RoomScope
import javax.inject.Inject
import kotlin.math.log10
import kotlin.math.min
import kotlin.math.sqrt
@ContributesBinding(RoomScope::class)
class DecibelAudioLevelCalculator @Inject constructor() : AudioLevelCalculator {
companion object {
private const val REFERENCE_DB = 50.0 // Reference dB for normal conversation
}
override fun calculateAudioLevel(buffer: ShortArray): Float {
val rms = buffer.rootMeanSquare()
// Convert to decibels and clip
val db = 20 * log10(rms / REFERENCE_DB)
val clipped = min(db, REFERENCE_DB)
// Scale to the range [0.0, 1.0]
return (clipped / REFERENCE_DB).toFloat()
}
private fun ShortArray.rootMeanSquare(): Double {
// Use Double to avoid overflow
val sumOfSquares: Double = sumOf { it.toDouble() * it.toDouble() }
val avgSquare = sumOfSquares / size.toDouble()
return sqrt(avgSquare)
}
}

View File

@@ -0,0 +1,38 @@
/*
* Copyright (c) 2023 New Vector Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.element.android.libraries.voicerecorder.impl.audio
/**
* Resamples [this] list to [size] using linear interpolation.
*/
fun List<Float>.resample(size: Int): List<Float> {
require(size > 0)
val input = this
if (input.isEmpty()) return List(size) { 0f } // fast path.
if (input.size == 1) return List(size) { input[0] } // fast path.
if (input.size == size) return this // fast path.
val step: Float = input.size.toFloat() / size.toFloat()
return buildList(size) {
for (i in 0 until size) {
val x0 = (i * step).toInt()
val x1 = (x0 + 1).coerceAtMost(input.size - 1)
val x = i * step - x0
val y = input[x0] * (1 - x) + input[x1] * x
add(i, y)
}
}
}

View File

@@ -62,7 +62,7 @@ class VoiceRecorderImplTest {
voiceRecorder.startRecord()
assertThat(awaitItem()).isEqualTo(VoiceRecorderState.Recording(0.seconds, 1.0f))
timeSource += 1.seconds
assertThat(awaitItem()).isEqualTo(VoiceRecorderState.Recording(1.seconds,0.0f))
assertThat(awaitItem()).isEqualTo(VoiceRecorderState.Recording(1.seconds, 0.0f))
timeSource += 1.seconds
assertThat(awaitItem()).isEqualTo(VoiceRecorderState.Recording(2.seconds, 1.0f))
}
@@ -80,7 +80,13 @@ class VoiceRecorderImplTest {
assertThat(awaitItem()).isEqualTo(VoiceRecorderState.Recording(29.minutes, 0.0f))
timeSource += 1.minutes
assertThat(awaitItem()).isEqualTo(VoiceRecorderState.Finished(File(FILE_PATH), "audio/ogg"))
assertThat(awaitItem()).isEqualTo(
VoiceRecorderState.Finished(
file = File(FILE_PATH),
mimeType = "audio/ogg",
waveform = List(100) { 1f },
)
)
}
}
@@ -93,7 +99,13 @@ class VoiceRecorderImplTest {
voiceRecorder.startRecord()
skipItems(3)
voiceRecorder.stopRecord()
assertThat(awaitItem()).isEqualTo(VoiceRecorderState.Finished(File(FILE_PATH), "audio/ogg"))
assertThat(awaitItem()).isEqualTo(
VoiceRecorderState.Finished(
file = File(FILE_PATH),
mimeType = "audio/ogg",
waveform = List(100) { 1f },
)
)
assertThat(fakeFileSystem.files[File(FILE_PATH)]).isEqualTo(ENCODED_DATA)
}
}

View File

@@ -16,31 +16,35 @@
package io.element.android.libraries.voicerecorder.impl.audio
import com.google.common.truth.Truth
import org.junit.Test
class DecibelAudioLevelCalculatorTest {
class DBovAudioLevelCalculatorTest {
@Test
fun `given max values, it returns values within range`() {
val calculator = DecibelAudioLevelCalculator()
fun `given max values, it returns 1`() {
val calculator = DBovAudioLevelCalculator()
val buffer = ShortArray(100) { Short.MAX_VALUE }
val level = calculator.calculateAudioLevel(buffer)
assert(level in 0.0..1.0)
Truth.assertThat(level).isEqualTo(1.0f)
}
@Test
fun `given mixed values, it returns values within range`() {
val calculator = DecibelAudioLevelCalculator()
val buffer = shortArrayOf(Short.MAX_VALUE, Short.MIN_VALUE, -1, 1)
val calculator = DBovAudioLevelCalculator()
val buffer = shortArrayOf(100, -200, 300, -400, 500, -600, 700, -800, 900, -1000)
val level = calculator.calculateAudioLevel(buffer)
assert(level in 0.0..1.0)
Truth.assertThat(level).apply {
isGreaterThan(0f)
isLessThan(1f)
}
}
@Test
fun `given min values, it returns values within range`() {
val calculator = DecibelAudioLevelCalculator()
val buffer = ShortArray(100) { Short.MIN_VALUE }
fun `given min values, it returns 0`() {
val calculator = DBovAudioLevelCalculator()
val buffer = ShortArray(100) { 0 }
val level = calculator.calculateAudioLevel(buffer)
assert(level in 0.0..1.0)
Truth.assertThat(level).isEqualTo(0.0f)
}
}

View File

@@ -0,0 +1,41 @@
/*
* Copyright (c) 2023 New Vector Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.element.android.libraries.voicerecorder.impl.audio
import com.google.common.truth.Truth
import org.junit.Test
class ResampleTest {
@Test
fun `resample works`() {
listOf(0.0f).resample(10).let {
Truth.assertThat(it).isEqualTo(listOf(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f))
}
listOf(1.0f).resample(10).let {
Truth.assertThat(it).isEqualTo(listOf(1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f))
}
listOf(0.0f, 1.0f).resample(10).let {
Truth.assertThat(it).isEqualTo(listOf(0.0f, 0.2f, 0.4f, 0.6f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f))
}
listOf(0.0f, 0.5f, 1.0f).resample(10).let {
Truth.assertThat(it).isEqualTo(listOf(0.0f, 0.15f, 0.3f, 0.45000002f, 0.6f, 0.75f, 0.90000004f, 1.0f, 1.0f, 1.0f))
}
List(100) { it.toFloat() }.resample(10).let {
Truth.assertThat(it).isEqualTo(listOf(0.0f, 10.0f, 20.0f, 30.0f, 40.0f, 50.0f, 60.0f, 70.0f, 80.0f, 90.0f))
}
}
}

View File

@@ -70,7 +70,11 @@ class FakeVoiceRecorder(
_state.emit(
when (curRecording) {
null -> VoiceRecorderState.Idle
else -> VoiceRecorderState.Finished(curRecording!!, "audio/ogg")
else -> VoiceRecorderState.Finished(
file = curRecording!!,
mimeType = "audio/ogg",
waveform = listOf(0f, 1f, 2f, 3f, 4f, 5f, 6f, 7f, 8f, 9f, 8f, 7f, 6f, 5f, 4f, 3f, 2f, 1f, 0f),
)
}
)
}