Include waveform when sending voice messages (#1650)
- New `AudioLevelCalculator` that outputs dB0v rescaled to the [0;1] range. - `VoiceRecorder` now stores the audio levels sampled while recording, then resamples them to 100 samples to use as waveform preview. - Waveform data is carried all the way as a `List<Float>` and converted to `List<Int>` in the [0;1024] range as per matrix spec only before sending it.
This commit is contained in:
@@ -148,6 +148,7 @@ class VoiceMessageComposerPresenter @Inject constructor(
|
||||
appCoroutineScope.sendMessage(
|
||||
file = finishedState.file,
|
||||
mimeType = finishedState.mimeType,
|
||||
waveform = finishedState.waveform,
|
||||
).invokeOnCompletion {
|
||||
isSending = false
|
||||
}
|
||||
@@ -207,12 +208,14 @@ class VoiceMessageComposerPresenter @Inject constructor(
|
||||
}
|
||||
|
||||
private fun CoroutineScope.sendMessage(
|
||||
file: File, mimeType: String,
|
||||
file: File,
|
||||
mimeType: String,
|
||||
waveform: List<Float>
|
||||
) = launch {
|
||||
val result = mediaSender.sendVoiceMessage(
|
||||
uri = file.toUri(),
|
||||
mimeType = mimeType,
|
||||
waveForm = emptyList(), // TODO generate waveform
|
||||
waveForm = waveform.toMSC3246range(),
|
||||
)
|
||||
|
||||
if (result.isFailure) {
|
||||
@@ -223,3 +226,8 @@ class VoiceMessageComposerPresenter @Inject constructor(
|
||||
voiceRecorder.deleteRecording()
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Resizes the given [0;1] float list to [0;1024] int list as per unstable MSC3246 spec.
|
||||
*/
|
||||
private fun List<Float>.toMSC3246range(): List<Int> = map { (it * 1024).toInt() }
|
||||
|
||||
@@ -38,9 +38,11 @@ sealed class VoiceRecorderState {
|
||||
*
|
||||
* @property file The recorded file.
|
||||
* @property mimeType The mime type of the file.
|
||||
* @property waveform The waveform of the recording.
|
||||
*/
|
||||
data class Finished(
|
||||
val file: File,
|
||||
val mimeType: String,
|
||||
val waveform: List<Float>,
|
||||
) : VoiceRecorderState()
|
||||
}
|
||||
|
||||
@@ -30,6 +30,7 @@ import io.element.android.libraries.voicerecorder.impl.audio.AudioConfig
|
||||
import io.element.android.libraries.voicerecorder.impl.audio.AudioLevelCalculator
|
||||
import io.element.android.libraries.voicerecorder.impl.audio.AudioReader
|
||||
import io.element.android.libraries.voicerecorder.impl.audio.Encoder
|
||||
import io.element.android.libraries.voicerecorder.impl.audio.resample
|
||||
import io.element.android.libraries.voicerecorder.impl.file.VoiceFileConfig
|
||||
import io.element.android.libraries.voicerecorder.impl.file.VoiceFileManager
|
||||
import kotlinx.coroutines.CoroutineScope
|
||||
@@ -65,6 +66,7 @@ class VoiceRecorderImpl @Inject constructor(
|
||||
private var outputFile: File? = null
|
||||
private var audioReader: AudioReader? = null
|
||||
private var recordingJob: Job? = null
|
||||
private val levels: MutableList<Float> = mutableListOf()
|
||||
|
||||
private val _state = MutableStateFlow<VoiceRecorderState>(VoiceRecorderState.Idle)
|
||||
override val state: StateFlow<VoiceRecorderState> = _state
|
||||
@@ -74,6 +76,7 @@ class VoiceRecorderImpl @Inject constructor(
|
||||
Timber.i("Voice recorder started recording")
|
||||
outputFile = fileManager.createFile()
|
||||
.also(encoder::init)
|
||||
levels.clear()
|
||||
|
||||
val audioRecorder = audioReaderFactory.create(config, dispatchers).also { audioReader = it }
|
||||
|
||||
@@ -94,6 +97,7 @@ class VoiceRecorderImpl @Inject constructor(
|
||||
is Audio.Data -> {
|
||||
val audioLevel = audioLevelCalculator.calculateAudioLevel(audio.buffer)
|
||||
_state.emit(VoiceRecorderState.Recording(elapsedTime, audioLevel))
|
||||
levels.add(audioLevel)
|
||||
encoder.encode(audio.buffer, audio.readSize)
|
||||
}
|
||||
is Audio.Error -> {
|
||||
@@ -124,12 +128,17 @@ class VoiceRecorderImpl @Inject constructor(
|
||||
|
||||
if (cancelled) {
|
||||
deleteRecording()
|
||||
levels.clear()
|
||||
}
|
||||
|
||||
_state.emit(
|
||||
when (val file = outputFile) {
|
||||
null -> VoiceRecorderState.Idle
|
||||
else -> VoiceRecorderState.Finished(file, fileConfig.mimeType)
|
||||
else -> VoiceRecorderState.Finished(
|
||||
file = file,
|
||||
mimeType = fileConfig.mimeType,
|
||||
waveform = levels.resample(100),
|
||||
)
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
@@ -20,9 +20,8 @@ interface AudioLevelCalculator {
|
||||
/**
|
||||
* Calculate the audio level of the audio buffer.
|
||||
*
|
||||
* @param buffer The audio buffer containing raw audio data.
|
||||
*
|
||||
* @return A value between 0 and 1.
|
||||
* @param buffer The audio buffer containing 16bit PCM audio data.
|
||||
* @return A float value between 0 and 1 proportional to the audio level.
|
||||
*/
|
||||
fun calculateAudioLevel(buffer: ShortArray): Float
|
||||
}
|
||||
|
||||
@@ -0,0 +1,61 @@
|
||||
/*
|
||||
* Copyright (c) 2023 New Vector Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package io.element.android.libraries.voicerecorder.impl.audio
|
||||
|
||||
import com.squareup.anvil.annotations.ContributesBinding
|
||||
import io.element.android.libraries.di.RoomScope
|
||||
import javax.inject.Inject
|
||||
import kotlin.math.log10
|
||||
import kotlin.math.sqrt
|
||||
|
||||
/**
|
||||
* Default implementation of [AudioLevelCalculator].
|
||||
*
|
||||
* It computes the normalized [0;1] dBov value of the given PCM16 encoded [ShortArray].
|
||||
* See: https://en.wikipedia.org/wiki/DBFS
|
||||
*/
|
||||
@ContributesBinding(RoomScope::class)
|
||||
class DBovAudioLevelCalculator @Inject constructor() : AudioLevelCalculator {
|
||||
override fun calculateAudioLevel(buffer: ShortArray): Float {
|
||||
return buffer.rms().dBov().normalize().coerceIn(0f, 1f)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes the normalized (range 0.0 to 1.0) root mean square
|
||||
* value of the given PCM16 encoded [ShortArray].
|
||||
*/
|
||||
private fun ShortArray.rms(): Float {
|
||||
val floats = FloatArray(this.size) { i -> this[i] / Short.MAX_VALUE.toFloat() }
|
||||
val squared = FloatArray(this.size) { i -> floats[i] * floats[i] }
|
||||
val sum = squared.fold(0.0f) { acc, f -> acc + f }
|
||||
val average = sum / this.size
|
||||
return sqrt(average)
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts the given RMS value to decibels relative to overload (dBov).
|
||||
* It has range [-96.0, 0.0] where 0.0 is the value of a full scale square wave.
|
||||
*/
|
||||
private fun Float.dBov(): Float = 20 * log10(this)
|
||||
|
||||
/**
|
||||
* Normalizes the given dBov value to the range [0.0, 1.0].
|
||||
*/
|
||||
private fun Float.normalize(): Float = (this + DYNAMIC_RANGE_PCM16) / DYNAMIC_RANGE_PCM16
|
||||
|
||||
private const val DYNAMIC_RANGE_PCM16: Float = 96.0f
|
||||
@@ -1,49 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2023 New Vector Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package io.element.android.libraries.voicerecorder.impl.audio
|
||||
|
||||
import com.squareup.anvil.annotations.ContributesBinding
|
||||
import io.element.android.libraries.di.RoomScope
|
||||
import javax.inject.Inject
|
||||
import kotlin.math.log10
|
||||
import kotlin.math.min
|
||||
import kotlin.math.sqrt
|
||||
|
||||
@ContributesBinding(RoomScope::class)
|
||||
class DecibelAudioLevelCalculator @Inject constructor() : AudioLevelCalculator {
|
||||
companion object {
|
||||
private const val REFERENCE_DB = 50.0 // Reference dB for normal conversation
|
||||
}
|
||||
|
||||
override fun calculateAudioLevel(buffer: ShortArray): Float {
|
||||
val rms = buffer.rootMeanSquare()
|
||||
|
||||
// Convert to decibels and clip
|
||||
val db = 20 * log10(rms / REFERENCE_DB)
|
||||
val clipped = min(db, REFERENCE_DB)
|
||||
|
||||
// Scale to the range [0.0, 1.0]
|
||||
return (clipped / REFERENCE_DB).toFloat()
|
||||
}
|
||||
|
||||
private fun ShortArray.rootMeanSquare(): Double {
|
||||
// Use Double to avoid overflow
|
||||
val sumOfSquares: Double = sumOf { it.toDouble() * it.toDouble() }
|
||||
val avgSquare = sumOfSquares / size.toDouble()
|
||||
return sqrt(avgSquare)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,38 @@
|
||||
/*
|
||||
* Copyright (c) 2023 New Vector Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package io.element.android.libraries.voicerecorder.impl.audio
|
||||
|
||||
/**
|
||||
* Resamples [this] list to [size] using linear interpolation.
|
||||
*/
|
||||
fun List<Float>.resample(size: Int): List<Float> {
|
||||
require(size > 0)
|
||||
val input = this
|
||||
if (input.isEmpty()) return List(size) { 0f } // fast path.
|
||||
if (input.size == 1) return List(size) { input[0] } // fast path.
|
||||
if (input.size == size) return this // fast path.
|
||||
val step: Float = input.size.toFloat() / size.toFloat()
|
||||
return buildList(size) {
|
||||
for (i in 0 until size) {
|
||||
val x0 = (i * step).toInt()
|
||||
val x1 = (x0 + 1).coerceAtMost(input.size - 1)
|
||||
val x = i * step - x0
|
||||
val y = input[x0] * (1 - x) + input[x1] * x
|
||||
add(i, y)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -62,7 +62,7 @@ class VoiceRecorderImplTest {
|
||||
voiceRecorder.startRecord()
|
||||
assertThat(awaitItem()).isEqualTo(VoiceRecorderState.Recording(0.seconds, 1.0f))
|
||||
timeSource += 1.seconds
|
||||
assertThat(awaitItem()).isEqualTo(VoiceRecorderState.Recording(1.seconds,0.0f))
|
||||
assertThat(awaitItem()).isEqualTo(VoiceRecorderState.Recording(1.seconds, 0.0f))
|
||||
timeSource += 1.seconds
|
||||
assertThat(awaitItem()).isEqualTo(VoiceRecorderState.Recording(2.seconds, 1.0f))
|
||||
}
|
||||
@@ -80,7 +80,13 @@ class VoiceRecorderImplTest {
|
||||
assertThat(awaitItem()).isEqualTo(VoiceRecorderState.Recording(29.minutes, 0.0f))
|
||||
timeSource += 1.minutes
|
||||
|
||||
assertThat(awaitItem()).isEqualTo(VoiceRecorderState.Finished(File(FILE_PATH), "audio/ogg"))
|
||||
assertThat(awaitItem()).isEqualTo(
|
||||
VoiceRecorderState.Finished(
|
||||
file = File(FILE_PATH),
|
||||
mimeType = "audio/ogg",
|
||||
waveform = List(100) { 1f },
|
||||
)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -93,7 +99,13 @@ class VoiceRecorderImplTest {
|
||||
voiceRecorder.startRecord()
|
||||
skipItems(3)
|
||||
voiceRecorder.stopRecord()
|
||||
assertThat(awaitItem()).isEqualTo(VoiceRecorderState.Finished(File(FILE_PATH), "audio/ogg"))
|
||||
assertThat(awaitItem()).isEqualTo(
|
||||
VoiceRecorderState.Finished(
|
||||
file = File(FILE_PATH),
|
||||
mimeType = "audio/ogg",
|
||||
waveform = List(100) { 1f },
|
||||
)
|
||||
)
|
||||
assertThat(fakeFileSystem.files[File(FILE_PATH)]).isEqualTo(ENCODED_DATA)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -16,31 +16,35 @@
|
||||
|
||||
package io.element.android.libraries.voicerecorder.impl.audio
|
||||
|
||||
import com.google.common.truth.Truth
|
||||
import org.junit.Test
|
||||
|
||||
class DecibelAudioLevelCalculatorTest {
|
||||
class DBovAudioLevelCalculatorTest {
|
||||
|
||||
@Test
|
||||
fun `given max values, it returns values within range`() {
|
||||
val calculator = DecibelAudioLevelCalculator()
|
||||
fun `given max values, it returns 1`() {
|
||||
val calculator = DBovAudioLevelCalculator()
|
||||
val buffer = ShortArray(100) { Short.MAX_VALUE }
|
||||
val level = calculator.calculateAudioLevel(buffer)
|
||||
assert(level in 0.0..1.0)
|
||||
Truth.assertThat(level).isEqualTo(1.0f)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `given mixed values, it returns values within range`() {
|
||||
val calculator = DecibelAudioLevelCalculator()
|
||||
val buffer = shortArrayOf(Short.MAX_VALUE, Short.MIN_VALUE, -1, 1)
|
||||
val calculator = DBovAudioLevelCalculator()
|
||||
val buffer = shortArrayOf(100, -200, 300, -400, 500, -600, 700, -800, 900, -1000)
|
||||
val level = calculator.calculateAudioLevel(buffer)
|
||||
assert(level in 0.0..1.0)
|
||||
Truth.assertThat(level).apply {
|
||||
isGreaterThan(0f)
|
||||
isLessThan(1f)
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `given min values, it returns values within range`() {
|
||||
val calculator = DecibelAudioLevelCalculator()
|
||||
val buffer = ShortArray(100) { Short.MIN_VALUE }
|
||||
fun `given min values, it returns 0`() {
|
||||
val calculator = DBovAudioLevelCalculator()
|
||||
val buffer = ShortArray(100) { 0 }
|
||||
val level = calculator.calculateAudioLevel(buffer)
|
||||
assert(level in 0.0..1.0)
|
||||
Truth.assertThat(level).isEqualTo(0.0f)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,41 @@
|
||||
/*
|
||||
* Copyright (c) 2023 New Vector Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package io.element.android.libraries.voicerecorder.impl.audio
|
||||
|
||||
import com.google.common.truth.Truth
|
||||
import org.junit.Test
|
||||
|
||||
class ResampleTest {
|
||||
@Test
|
||||
fun `resample works`() {
|
||||
listOf(0.0f).resample(10).let {
|
||||
Truth.assertThat(it).isEqualTo(listOf(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f))
|
||||
}
|
||||
listOf(1.0f).resample(10).let {
|
||||
Truth.assertThat(it).isEqualTo(listOf(1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f))
|
||||
}
|
||||
listOf(0.0f, 1.0f).resample(10).let {
|
||||
Truth.assertThat(it).isEqualTo(listOf(0.0f, 0.2f, 0.4f, 0.6f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f))
|
||||
}
|
||||
listOf(0.0f, 0.5f, 1.0f).resample(10).let {
|
||||
Truth.assertThat(it).isEqualTo(listOf(0.0f, 0.15f, 0.3f, 0.45000002f, 0.6f, 0.75f, 0.90000004f, 1.0f, 1.0f, 1.0f))
|
||||
}
|
||||
List(100) { it.toFloat() }.resample(10).let {
|
||||
Truth.assertThat(it).isEqualTo(listOf(0.0f, 10.0f, 20.0f, 30.0f, 40.0f, 50.0f, 60.0f, 70.0f, 80.0f, 90.0f))
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -70,7 +70,11 @@ class FakeVoiceRecorder(
|
||||
_state.emit(
|
||||
when (curRecording) {
|
||||
null -> VoiceRecorderState.Idle
|
||||
else -> VoiceRecorderState.Finished(curRecording!!, "audio/ogg")
|
||||
else -> VoiceRecorderState.Finished(
|
||||
file = curRecording!!,
|
||||
mimeType = "audio/ogg",
|
||||
waveform = listOf(0f, 1f, 2f, 3f, 4f, 5f, 6f, 7f, 8f, 9f, 8f, 7f, 6f, 5f, 4f, 3f, 2f, 1f, 0f),
|
||||
)
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user