From 675e787171a4406a7331612031762abfa9a260da Mon Sep 17 00:00:00 2001 From: Digipom Date: Fri, 16 Dec 2022 12:20:13 -0500 Subject: [PATCH] Add Android sample (#277) * Add Android sample * Use main project C files * Stop existing playback before starting new playback * Make text scrollable * Stop playback when starting to record * Remove extra var --- examples/whisper.android/.gitignore | 15 ++ examples/whisper.android/.idea/.gitignore | 3 + examples/whisper.android/.idea/.name | 1 + examples/whisper.android/.idea/compiler.xml | 6 + examples/whisper.android/.idea/gradle.xml | 18 ++ examples/whisper.android/.idea/misc.xml | 10 + examples/whisper.android/.idea/vcs.xml | 6 + examples/whisper.android/README.md | 10 + examples/whisper.android/app/.gitignore | 1 + examples/whisper.android/app/build.gradle | 76 +++++++ .../whisper.android/app/proguard-rules.pro | 21 ++ .../whispercppdemo/ExampleInstrumentedTest.kt | 24 +++ .../app/src/main/AndroidManifest.xml | 32 +++ .../java/com/whispercppdemo/MainActivity.kt | 22 ++ .../whispercppdemo/media/RiffWaveHelper.kt | 76 +++++++ .../com/whispercppdemo/recorder/Recorder.kt | 88 ++++++++ .../com/whispercppdemo/ui/main/MainScreen.kt | 99 +++++++++ .../ui/main/MainScreenViewModel.kt | 193 ++++++++++++++++++ .../java/com/whispercppdemo/ui/theme/Color.kt | 11 + .../java/com/whispercppdemo/ui/theme/Theme.kt | 68 ++++++ .../java/com/whispercppdemo/ui/theme/Type.kt | 34 +++ .../com/whispercppdemo/whisper/LibWhisper.kt | 61 ++++++ .../app/src/main/jni/whisper/Android.mk | 22 ++ .../app/src/main/jni/whisper/Application.mk | 1 + .../app/src/main/jni/whisper/jni.c | 93 +++++++++ .../res/drawable/ic_launcher_background.xml | 170 +++++++++++++++ .../res/drawable/ic_launcher_foreground.xml | 30 +++ .../main/res/mipmap-anydpi/ic_launcher.xml | 5 + .../app/src/main/res/values/colors.xml | 10 + .../app/src/main/res/values/strings.xml | 3 + .../app/src/main/res/values/themes.xml | 5 + .../app/src/main/res/xml/backup_rules.xml | 13 ++ .../main/res/xml/data_extraction_rules.xml | 19 ++ .../com/whispercppdemo/ExampleUnitTest.kt | 17 ++ examples/whisper.android/build.gradle | 6 + examples/whisper.android/gradle.properties | 23 +++ .../gradle/wrapper/gradle-wrapper.jar | Bin 0 -> 59203 bytes .../gradle/wrapper/gradle-wrapper.properties | 6 + examples/whisper.android/gradlew | 185 +++++++++++++++++ examples/whisper.android/gradlew.bat | 89 ++++++++ examples/whisper.android/local.properties | 10 + examples/whisper.android/settings.gradle | 16 ++ 42 files changed, 1598 insertions(+) create mode 100644 examples/whisper.android/.gitignore create mode 100644 examples/whisper.android/.idea/.gitignore create mode 100644 examples/whisper.android/.idea/.name create mode 100644 examples/whisper.android/.idea/compiler.xml create mode 100644 examples/whisper.android/.idea/gradle.xml create mode 100644 examples/whisper.android/.idea/misc.xml create mode 100644 examples/whisper.android/.idea/vcs.xml create mode 100644 examples/whisper.android/README.md create mode 100644 examples/whisper.android/app/.gitignore create mode 100644 examples/whisper.android/app/build.gradle create mode 100644 examples/whisper.android/app/proguard-rules.pro create mode 100644 examples/whisper.android/app/src/androidTest/java/com/whispercppdemo/ExampleInstrumentedTest.kt create mode 100644 examples/whisper.android/app/src/main/AndroidManifest.xml create mode 100644 examples/whisper.android/app/src/main/java/com/whispercppdemo/MainActivity.kt create mode 100644 examples/whisper.android/app/src/main/java/com/whispercppdemo/media/RiffWaveHelper.kt create mode 100644 examples/whisper.android/app/src/main/java/com/whispercppdemo/recorder/Recorder.kt create mode 100644 examples/whisper.android/app/src/main/java/com/whispercppdemo/ui/main/MainScreen.kt create mode 100644 examples/whisper.android/app/src/main/java/com/whispercppdemo/ui/main/MainScreenViewModel.kt create mode 100644 examples/whisper.android/app/src/main/java/com/whispercppdemo/ui/theme/Color.kt create mode 100644 examples/whisper.android/app/src/main/java/com/whispercppdemo/ui/theme/Theme.kt create mode 100644 examples/whisper.android/app/src/main/java/com/whispercppdemo/ui/theme/Type.kt create mode 100644 examples/whisper.android/app/src/main/java/com/whispercppdemo/whisper/LibWhisper.kt create mode 100644 examples/whisper.android/app/src/main/jni/whisper/Android.mk create mode 100644 examples/whisper.android/app/src/main/jni/whisper/Application.mk create mode 100644 examples/whisper.android/app/src/main/jni/whisper/jni.c create mode 100644 examples/whisper.android/app/src/main/res/drawable/ic_launcher_background.xml create mode 100644 examples/whisper.android/app/src/main/res/drawable/ic_launcher_foreground.xml create mode 100644 examples/whisper.android/app/src/main/res/mipmap-anydpi/ic_launcher.xml create mode 100644 examples/whisper.android/app/src/main/res/values/colors.xml create mode 100644 examples/whisper.android/app/src/main/res/values/strings.xml create mode 100644 examples/whisper.android/app/src/main/res/values/themes.xml create mode 100644 examples/whisper.android/app/src/main/res/xml/backup_rules.xml create mode 100644 examples/whisper.android/app/src/main/res/xml/data_extraction_rules.xml create mode 100644 examples/whisper.android/app/src/test/java/com/whispercppdemo/ExampleUnitTest.kt create mode 100644 examples/whisper.android/build.gradle create mode 100644 examples/whisper.android/gradle.properties create mode 100644 examples/whisper.android/gradle/wrapper/gradle-wrapper.jar create mode 100644 examples/whisper.android/gradle/wrapper/gradle-wrapper.properties create mode 100755 examples/whisper.android/gradlew create mode 100644 examples/whisper.android/gradlew.bat create mode 100644 examples/whisper.android/local.properties create mode 100644 examples/whisper.android/settings.gradle diff --git a/examples/whisper.android/.gitignore b/examples/whisper.android/.gitignore new file mode 100644 index 0000000..aa724b7 --- /dev/null +++ b/examples/whisper.android/.gitignore @@ -0,0 +1,15 @@ +*.iml +.gradle +/local.properties +/.idea/caches +/.idea/libraries +/.idea/modules.xml +/.idea/workspace.xml +/.idea/navEditor.xml +/.idea/assetWizardSettings.xml +.DS_Store +/build +/captures +.externalNativeBuild +.cxx +local.properties diff --git a/examples/whisper.android/.idea/.gitignore b/examples/whisper.android/.idea/.gitignore new file mode 100644 index 0000000..26d3352 --- /dev/null +++ b/examples/whisper.android/.idea/.gitignore @@ -0,0 +1,3 @@ +# Default ignored files +/shelf/ +/workspace.xml diff --git a/examples/whisper.android/.idea/.name b/examples/whisper.android/.idea/.name new file mode 100644 index 0000000..6e1efd0 --- /dev/null +++ b/examples/whisper.android/.idea/.name @@ -0,0 +1 @@ +WhisperCppDemo \ No newline at end of file diff --git a/examples/whisper.android/.idea/compiler.xml b/examples/whisper.android/.idea/compiler.xml new file mode 100644 index 0000000..fb7f4a8 --- /dev/null +++ b/examples/whisper.android/.idea/compiler.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/examples/whisper.android/.idea/gradle.xml b/examples/whisper.android/.idea/gradle.xml new file mode 100644 index 0000000..a9f4e52 --- /dev/null +++ b/examples/whisper.android/.idea/gradle.xml @@ -0,0 +1,18 @@ + + + + + + \ No newline at end of file diff --git a/examples/whisper.android/.idea/misc.xml b/examples/whisper.android/.idea/misc.xml new file mode 100644 index 0000000..bdd9278 --- /dev/null +++ b/examples/whisper.android/.idea/misc.xml @@ -0,0 +1,10 @@ + + + + + + + + + \ No newline at end of file diff --git a/examples/whisper.android/.idea/vcs.xml b/examples/whisper.android/.idea/vcs.xml new file mode 100644 index 0000000..b2bdec2 --- /dev/null +++ b/examples/whisper.android/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/examples/whisper.android/README.md b/examples/whisper.android/README.md new file mode 100644 index 0000000..f209194 --- /dev/null +++ b/examples/whisper.android/README.md @@ -0,0 +1,10 @@ +A sample Android app using [whisper.cpp](https://github.com/ggerganov/whisper.cpp/) to do voice-to-text transcriptions. + +To use: + +1. Select a model from the [whisper.cpp repository](https://github.com/ggerganov/whisper.cpp/tree/master/models).[^1] +2. Copy the model to the "app/src/main/assets/models" folder. +3. Select a sample audio file (for example, [jfk.wav](https://github.com/ggerganov/whisper.cpp/raw/master/samples/jfk.wav)). +4. Copy the sample to the "app/src/main/assets/samples" folder. +5. Select the "release" active build variant, and use Android Studio to run and deploy to your device. +[^1]: I recommend the tiny or base models for running on an Android device. \ No newline at end of file diff --git a/examples/whisper.android/app/.gitignore b/examples/whisper.android/app/.gitignore new file mode 100644 index 0000000..42afabf --- /dev/null +++ b/examples/whisper.android/app/.gitignore @@ -0,0 +1 @@ +/build \ No newline at end of file diff --git a/examples/whisper.android/app/build.gradle b/examples/whisper.android/app/build.gradle new file mode 100644 index 0000000..5765cae --- /dev/null +++ b/examples/whisper.android/app/build.gradle @@ -0,0 +1,76 @@ +plugins { + id 'com.android.application' + id 'org.jetbrains.kotlin.android' +} + +android { + namespace 'com.whispercppdemo' + compileSdk 33 + + defaultConfig { + applicationId "com.whispercppdemo" + minSdk 26 + targetSdk 32 + versionCode 1 + versionName "1.0" + + ndk { + abiFilters 'arm64-v8a', 'x86_64' + } + + testInstrumentationRunner "androidx.test.runner.AndroidJUnitRunner" + vectorDrawables { + useSupportLibrary true + } + } + + buildTypes { + release { + signingConfig signingConfigs.debug + minifyEnabled true + proguardFiles getDefaultProguardFile('proguard-android-optimize.txt'), 'proguard-rules.pro' + } + } + compileOptions { + sourceCompatibility JavaVersion.VERSION_1_8 + targetCompatibility JavaVersion.VERSION_1_8 + } + kotlinOptions { + jvmTarget = '1.8' + } + buildFeatures { + compose true + } + composeOptions { + kotlinCompilerExtensionVersion '1.3.1' + } + ndkVersion "25.0.8528842" + externalNativeBuild { + ndkBuild { + path 'src/main/jni/whisper/Android.mk' + } + } + packagingOptions { + resources { + excludes += '/META-INF/{AL2.0,LGPL2.1}' + } + } +} + +dependencies { + implementation 'androidx.activity:activity-compose:1.6.1' + implementation 'androidx.compose.material:material-icons-core:1.3.1' + implementation 'androidx.compose.material3:material3:1.0.1' + implementation "androidx.compose.ui:ui:1.3.2" + implementation "androidx.compose.ui:ui-tooling-preview:1.3.2" + implementation 'androidx.lifecycle:lifecycle-viewmodel-compose:2.5.1' + implementation "com.google.accompanist:accompanist-permissions:0.28.0" + implementation 'org.jetbrains.kotlinx:kotlinx-coroutines-core:1.6.4' + + testImplementation 'junit:junit:4.13.2' + androidTestImplementation 'androidx.test.ext:junit:1.1.4' + androidTestImplementation 'androidx.test.espresso:espresso-core:3.5.0' + androidTestImplementation "androidx.compose.ui:ui-test-junit4:1.3.2" + debugImplementation "androidx.compose.ui:ui-tooling:1.3.2" + debugImplementation "androidx.compose.ui:ui-test-manifest:1.3.2" +} \ No newline at end of file diff --git a/examples/whisper.android/app/proguard-rules.pro b/examples/whisper.android/app/proguard-rules.pro new file mode 100644 index 0000000..481bb43 --- /dev/null +++ b/examples/whisper.android/app/proguard-rules.pro @@ -0,0 +1,21 @@ +# Add project specific ProGuard rules here. +# You can control the set of applied configuration files using the +# proguardFiles setting in build.gradle. +# +# For more details, see +# http://developer.android.com/guide/developing/tools/proguard.html + +# If your project uses WebView with JS, uncomment the following +# and specify the fully qualified class name to the JavaScript interface +# class: +#-keepclassmembers class fqcn.of.javascript.interface.for.webview { +# public *; +#} + +# Uncomment this to preserve the line number information for +# debugging stack traces. +#-keepattributes SourceFile,LineNumberTable + +# If you keep the line number information, uncomment this to +# hide the original source file name. +#-renamesourcefileattribute SourceFile \ No newline at end of file diff --git a/examples/whisper.android/app/src/androidTest/java/com/whispercppdemo/ExampleInstrumentedTest.kt b/examples/whisper.android/app/src/androidTest/java/com/whispercppdemo/ExampleInstrumentedTest.kt new file mode 100644 index 0000000..b7117fc --- /dev/null +++ b/examples/whisper.android/app/src/androidTest/java/com/whispercppdemo/ExampleInstrumentedTest.kt @@ -0,0 +1,24 @@ +package com.whispercppdemo + +import androidx.test.platform.app.InstrumentationRegistry +import androidx.test.ext.junit.runners.AndroidJUnit4 + +import org.junit.Test +import org.junit.runner.RunWith + +import org.junit.Assert.* + +/** + * Instrumented test, which will execute on an Android device. + * + * See [testing documentation](http://d.android.com/tools/testing). + */ +@RunWith(AndroidJUnit4::class) +class ExampleInstrumentedTest { + @Test + fun useAppContext() { + // Context of the app under test. + val appContext = InstrumentationRegistry.getInstrumentation().targetContext + assertEquals("com.whispercppdemo", appContext.packageName) + } +} \ No newline at end of file diff --git a/examples/whisper.android/app/src/main/AndroidManifest.xml b/examples/whisper.android/app/src/main/AndroidManifest.xml new file mode 100644 index 0000000..cf57850 --- /dev/null +++ b/examples/whisper.android/app/src/main/AndroidManifest.xml @@ -0,0 +1,32 @@ + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/examples/whisper.android/app/src/main/java/com/whispercppdemo/MainActivity.kt b/examples/whisper.android/app/src/main/java/com/whispercppdemo/MainActivity.kt new file mode 100644 index 0000000..8bcae71 --- /dev/null +++ b/examples/whisper.android/app/src/main/java/com/whispercppdemo/MainActivity.kt @@ -0,0 +1,22 @@ +package com.whispercppdemo + +import android.os.Bundle +import androidx.activity.ComponentActivity +import androidx.activity.compose.setContent +import androidx.activity.viewModels +import com.whispercppdemo.ui.main.MainScreen +import com.whispercppdemo.ui.main.MainScreenViewModel +import com.whispercppdemo.ui.theme.WhisperCppDemoTheme + +class MainActivity : ComponentActivity() { + private val viewModel: MainScreenViewModel by viewModels { MainScreenViewModel.factory() } + + override fun onCreate(savedInstanceState: Bundle?) { + super.onCreate(savedInstanceState) + setContent { + WhisperCppDemoTheme { + MainScreen(viewModel) + } + } + } +} \ No newline at end of file diff --git a/examples/whisper.android/app/src/main/java/com/whispercppdemo/media/RiffWaveHelper.kt b/examples/whisper.android/app/src/main/java/com/whispercppdemo/media/RiffWaveHelper.kt new file mode 100644 index 0000000..0947a8c --- /dev/null +++ b/examples/whisper.android/app/src/main/java/com/whispercppdemo/media/RiffWaveHelper.kt @@ -0,0 +1,76 @@ +package com.whispercppdemo.media + +import java.io.ByteArrayOutputStream +import java.io.File +import java.nio.ByteBuffer +import java.nio.ByteOrder + +fun decodeWaveFile(file: File): FloatArray { + val baos = ByteArrayOutputStream() + file.inputStream().use { it.copyTo(baos) } + val buffer = ByteBuffer.wrap(baos.toByteArray()) + buffer.order(ByteOrder.LITTLE_ENDIAN) + buffer.position(44) + val shortBuffer = buffer.asShortBuffer() + val shortArray = ShortArray(shortBuffer.limit()) + shortBuffer.get(shortArray) + return FloatArray(shortArray.size) { index -> + (shortArray[index] / 32767.0f).coerceIn(-1f..1f) + } +} + +fun encodeWaveFile(file: File, data: ShortArray) { + file.outputStream().use { + it.write(headerBytes(data.size * 2)) + val buffer = ByteBuffer.allocate(data.size * 2) + buffer.order(ByteOrder.LITTLE_ENDIAN) + buffer.asShortBuffer().put(data) + val bytes = ByteArray(buffer.limit()) + buffer.get(bytes) + it.write(bytes) + } +} + +private fun headerBytes(totalLength: Int): ByteArray { + require(totalLength >= 44) + ByteBuffer.allocate(44).apply { + order(ByteOrder.LITTLE_ENDIAN) + + put('R'.code.toByte()) + put('I'.code.toByte()) + put('F'.code.toByte()) + put('F'.code.toByte()) + + putInt(totalLength - 8) + + put('W'.code.toByte()) + put('A'.code.toByte()) + put('V'.code.toByte()) + put('E'.code.toByte()) + + put('f'.code.toByte()) + put('m'.code.toByte()) + put('t'.code.toByte()) + put(' '.code.toByte()) + + putInt(16) + putShort(1.toShort()) + putShort(1.toShort()) + putInt(16000) + putInt(32000) + putShort(2.toShort()) + putShort(16.toShort()) + + put('d'.code.toByte()) + put('a'.code.toByte()) + put('t'.code.toByte()) + put('a'.code.toByte()) + + putInt(totalLength - 44) + position(0) + }.also { + val bytes = ByteArray(it.limit()) + it.get(bytes) + return bytes + } +} \ No newline at end of file diff --git a/examples/whisper.android/app/src/main/java/com/whispercppdemo/recorder/Recorder.kt b/examples/whisper.android/app/src/main/java/com/whispercppdemo/recorder/Recorder.kt new file mode 100644 index 0000000..68df965 --- /dev/null +++ b/examples/whisper.android/app/src/main/java/com/whispercppdemo/recorder/Recorder.kt @@ -0,0 +1,88 @@ +package com.whispercppdemo.recorder + +import android.annotation.SuppressLint +import android.media.AudioFormat +import android.media.AudioRecord +import android.media.MediaRecorder +import com.whispercppdemo.media.encodeWaveFile +import kotlinx.coroutines.CoroutineScope +import kotlinx.coroutines.asCoroutineDispatcher +import kotlinx.coroutines.withContext +import java.io.File +import java.util.concurrent.Executors +import java.util.concurrent.atomic.AtomicBoolean + +class Recorder { + private val scope: CoroutineScope = CoroutineScope( + Executors.newSingleThreadExecutor().asCoroutineDispatcher() + ) + private var recorder: AudioRecordThread? = null + + suspend fun startRecording(outputFile: File, onError: (Exception) -> Unit) = withContext(scope.coroutineContext) { + recorder = AudioRecordThread(outputFile, onError) + recorder?.start() + } + + suspend fun stopRecording() = withContext(scope.coroutineContext) { + recorder?.stopRecording() + @Suppress("BlockingMethodInNonBlockingContext") + recorder?.join() + recorder = null + } +} + +private class AudioRecordThread( + private val outputFile: File, + private val onError: (Exception) -> Unit +) : + Thread("AudioRecorder") { + private var quit = AtomicBoolean(false) + + @SuppressLint("MissingPermission") + override fun run() { + try { + val bufferSize = AudioRecord.getMinBufferSize( + 16000, + AudioFormat.CHANNEL_IN_MONO, + AudioFormat.ENCODING_PCM_16BIT + ) * 4 + val buffer = ShortArray(bufferSize / 2) + + val audioRecord = AudioRecord( + MediaRecorder.AudioSource.MIC, + 16000, + AudioFormat.CHANNEL_IN_MONO, + AudioFormat.ENCODING_PCM_16BIT, + bufferSize + ) + + try { + audioRecord.startRecording() + + val allData = mutableListOf() + + while (!quit.get()) { + val read = audioRecord.read(buffer, 0, buffer.size) + if (read > 0) { + for (i in 0 until read) { + allData.add(buffer[i]) + } + } else { + throw java.lang.RuntimeException("audioRecord.read returned $read") + } + } + + audioRecord.stop() + encodeWaveFile(outputFile, allData.toShortArray()) + } finally { + audioRecord.release() + } + } catch (e: Exception) { + onError(e) + } + } + + fun stopRecording() { + quit.set(true) + } +} \ No newline at end of file diff --git a/examples/whisper.android/app/src/main/java/com/whispercppdemo/ui/main/MainScreen.kt b/examples/whisper.android/app/src/main/java/com/whispercppdemo/ui/main/MainScreen.kt new file mode 100644 index 0000000..f05f56c --- /dev/null +++ b/examples/whisper.android/app/src/main/java/com/whispercppdemo/ui/main/MainScreen.kt @@ -0,0 +1,99 @@ +package com.whispercppdemo.ui.main + +import androidx.compose.foundation.layout.* +import androidx.compose.foundation.rememberScrollState +import androidx.compose.foundation.verticalScroll +import androidx.compose.material3.* +import androidx.compose.runtime.Composable +import androidx.compose.ui.Modifier +import androidx.compose.ui.res.stringResource +import androidx.compose.ui.unit.dp +import com.google.accompanist.permissions.ExperimentalPermissionsApi +import com.google.accompanist.permissions.isGranted +import com.google.accompanist.permissions.rememberPermissionState +import com.whispercppdemo.R + +@Composable +fun MainScreen(viewModel: MainScreenViewModel) { + MainScreen( + canTranscribe = viewModel.canTranscribe, + isRecording = viewModel.isRecording, + messageLog = viewModel.dataLog, + onTranscribeSampleTapped = viewModel::transcribeSample, + onRecordTapped = viewModel::toggleRecord + ) +} + +@OptIn(ExperimentalMaterial3Api::class) +@Composable +private fun MainScreen( + canTranscribe: Boolean, + isRecording: Boolean, + messageLog: String, + onTranscribeSampleTapped: () -> Unit, + onRecordTapped: () -> Unit +) { + Scaffold( + topBar = { + TopAppBar( + title = { Text(stringResource(R.string.app_name)) } + ) + }, + ) { innerPadding -> + Column( + modifier = Modifier + .padding(innerPadding) + .padding(16.dp) + ) { + Row(horizontalArrangement = Arrangement.SpaceBetween) { + TranscribeSampleButton(enabled = canTranscribe, onClick = onTranscribeSampleTapped) + RecordButton( + enabled = canTranscribe, + isRecording = isRecording, + onClick = onRecordTapped + ) + } + MessageLog(messageLog) + } + } +} + +@Composable +private fun MessageLog(log: String) { + Text(modifier = Modifier.verticalScroll(rememberScrollState()), text = log) +} + +@Composable +private fun TranscribeSampleButton(enabled: Boolean, onClick: () -> Unit) { + Button(onClick = onClick, enabled = enabled) { + Text("Transcribe sample") + } +} + +@OptIn(ExperimentalPermissionsApi::class) +@Composable +private fun RecordButton(enabled: Boolean, isRecording: Boolean, onClick: () -> Unit) { + val micPermissionState = rememberPermissionState( + permission = android.Manifest.permission.RECORD_AUDIO, + onPermissionResult = { granted -> + if (granted) { + onClick() + } + } + ) + Button(onClick = { + if (micPermissionState.status.isGranted) { + onClick() + } else { + micPermissionState.launchPermissionRequest() + } + }, enabled = enabled) { + Text( + if (isRecording) { + "Stop recording" + } else { + "Start recording" + } + ) + } +} \ No newline at end of file diff --git a/examples/whisper.android/app/src/main/java/com/whispercppdemo/ui/main/MainScreenViewModel.kt b/examples/whisper.android/app/src/main/java/com/whispercppdemo/ui/main/MainScreenViewModel.kt new file mode 100644 index 0000000..8664440 --- /dev/null +++ b/examples/whisper.android/app/src/main/java/com/whispercppdemo/ui/main/MainScreenViewModel.kt @@ -0,0 +1,193 @@ +package com.whispercppdemo.ui.main + +import android.app.Application +import android.content.Context +import android.media.MediaPlayer +import android.util.Log +import androidx.compose.runtime.getValue +import androidx.compose.runtime.mutableStateOf +import androidx.compose.runtime.setValue +import androidx.core.net.toUri +import androidx.lifecycle.ViewModel +import androidx.lifecycle.ViewModelProvider +import androidx.lifecycle.viewModelScope +import androidx.lifecycle.viewmodel.initializer +import androidx.lifecycle.viewmodel.viewModelFactory +import com.whispercppdemo.media.decodeWaveFile +import com.whispercppdemo.recorder.Recorder +import com.whispercppdemo.whisper.WhisperContext +import kotlinx.coroutines.Dispatchers +import kotlinx.coroutines.launch +import kotlinx.coroutines.runBlocking +import kotlinx.coroutines.withContext +import java.io.File + +private const val LOG_TAG = "MainScreenViewModel" + +class MainScreenViewModel(private val application: Application) : ViewModel() { + var canTranscribe by mutableStateOf(false) + private set + var dataLog by mutableStateOf("") + private set + var isRecording by mutableStateOf(false) + private set + + private val modelsPath = File(application.filesDir, "models") + private val samplesPath = File(application.filesDir, "samples") + private var recorder: Recorder = Recorder() + private var whisperContext: WhisperContext? = null + private var mediaPlayer: MediaPlayer? = null + private var recordedFile: File? = null + + init { + viewModelScope.launch { + loadData() + } + } + + private suspend fun loadData() { + printMessage("Loading data...\n") + try { + copyAssets() + loadBaseModel() + canTranscribe = true + } catch (e: Exception) { + Log.w(LOG_TAG, e) + printMessage("${e.localizedMessage}\n") + } + } + + private suspend fun printMessage(msg: String) = withContext(Dispatchers.Main) { + dataLog += msg + } + + private suspend fun copyAssets() = withContext(Dispatchers.IO) { + modelsPath.mkdirs() + samplesPath.mkdirs() + application.copyData("models", modelsPath, ::printMessage) + application.copyData("samples", samplesPath, ::printMessage) + printMessage("All data copied to working directory.\n") + } + + private suspend fun loadBaseModel() = withContext(Dispatchers.IO) { + printMessage("Loading model...\n") + val firstModel = modelsPath.listFiles()!!.first() + whisperContext = WhisperContext.createContext(firstModel.absolutePath) + printMessage("Loaded model ${firstModel.name}.\n") + } + + fun transcribeSample() = viewModelScope.launch { + transcribeAudio(getFirstSample()) + } + + private suspend fun getFirstSample(): File = withContext(Dispatchers.IO) { + samplesPath.listFiles()!!.first() + } + + private suspend fun readAudioSamples(file: File): FloatArray = withContext(Dispatchers.IO) { + stopPlayback() + startPlayback(file) + return@withContext decodeWaveFile(file) + } + + private suspend fun stopPlayback() = withContext(Dispatchers.Main) { + mediaPlayer?.stop() + mediaPlayer?.release() + mediaPlayer = null + } + + private suspend fun startPlayback(file: File) = withContext(Dispatchers.Main) { + mediaPlayer = MediaPlayer.create(application, file.absolutePath.toUri()) + mediaPlayer?.start() + } + + private suspend fun transcribeAudio(file: File) { + if (!canTranscribe) { + return + } + + canTranscribe = false + + try { + printMessage("Reading wave samples...\n") + val data = readAudioSamples(file) + printMessage("Transcribing data...\n") + val text = whisperContext?.transcribeData(data) + printMessage("Done: $text\n") + } catch (e: Exception) { + Log.w(LOG_TAG, e) + printMessage("${e.localizedMessage}\n") + } + + canTranscribe = true + } + + fun toggleRecord() = viewModelScope.launch { + try { + if (isRecording) { + recorder.stopRecording() + isRecording = false + recordedFile?.let { transcribeAudio(it) } + } else { + stopPlayback() + val file = getTempFileForRecording() + recorder.startRecording(file) { e -> + viewModelScope.launch { + withContext(Dispatchers.Main) { + printMessage("${e.localizedMessage}\n") + isRecording = false + } + } + } + isRecording = true + recordedFile = file + } + } catch (e: Exception) { + Log.w(LOG_TAG, e) + printMessage("${e.localizedMessage}\n") + isRecording = false + } + } + + private suspend fun getTempFileForRecording() = withContext(Dispatchers.IO) { + File.createTempFile("recording", "wav") + } + + override fun onCleared() { + runBlocking { + whisperContext?.release() + whisperContext = null + stopPlayback() + } + } + + companion object { + fun factory() = viewModelFactory { + initializer { + val application = + this[ViewModelProvider.AndroidViewModelFactory.APPLICATION_KEY] as Application + MainScreenViewModel(application) + } + } + } +} + +private suspend fun Context.copyData( + assetDirName: String, + destDir: File, + printMessage: suspend (String) -> Unit +) = withContext(Dispatchers.IO) { + assets.list(assetDirName)?.forEach { name -> + val assetPath = "$assetDirName/$name" + Log.v(LOG_TAG, "Processing $assetPath...") + val destination = File(destDir, name) + Log.v(LOG_TAG, "Copying $assetPath to $destination...") + printMessage("Copying $name...\n") + assets.open(assetPath).use { input -> + destination.outputStream().use { output -> + input.copyTo(output) + } + } + Log.v(LOG_TAG, "Copied $assetPath to $destination") + } +} \ No newline at end of file diff --git a/examples/whisper.android/app/src/main/java/com/whispercppdemo/ui/theme/Color.kt b/examples/whisper.android/app/src/main/java/com/whispercppdemo/ui/theme/Color.kt new file mode 100644 index 0000000..31e1899 --- /dev/null +++ b/examples/whisper.android/app/src/main/java/com/whispercppdemo/ui/theme/Color.kt @@ -0,0 +1,11 @@ +package com.whispercppdemo.ui.theme + +import androidx.compose.ui.graphics.Color + +val Purple80 = Color(0xFFD0BCFF) +val PurpleGrey80 = Color(0xFFCCC2DC) +val Pink80 = Color(0xFFEFB8C8) + +val Purple40 = Color(0xFF6650a4) +val PurpleGrey40 = Color(0xFF625b71) +val Pink40 = Color(0xFF7D5260) \ No newline at end of file diff --git a/examples/whisper.android/app/src/main/java/com/whispercppdemo/ui/theme/Theme.kt b/examples/whisper.android/app/src/main/java/com/whispercppdemo/ui/theme/Theme.kt new file mode 100644 index 0000000..5f6b5bb --- /dev/null +++ b/examples/whisper.android/app/src/main/java/com/whispercppdemo/ui/theme/Theme.kt @@ -0,0 +1,68 @@ +package com.whispercppdemo.ui.theme + +import android.app.Activity +import android.os.Build +import androidx.compose.foundation.isSystemInDarkTheme +import androidx.compose.material3.MaterialTheme +import androidx.compose.material3.darkColorScheme +import androidx.compose.material3.dynamicDarkColorScheme +import androidx.compose.material3.dynamicLightColorScheme +import androidx.compose.material3.lightColorScheme +import androidx.compose.runtime.Composable +import androidx.compose.runtime.SideEffect +import androidx.compose.ui.graphics.toArgb +import androidx.compose.ui.platform.LocalContext +import androidx.compose.ui.platform.LocalView +import androidx.core.view.ViewCompat + +private val DarkColorScheme = darkColorScheme( + primary = Purple80, + secondary = PurpleGrey80, + tertiary = Pink80 +) + +private val LightColorScheme = lightColorScheme( + primary = Purple40, + secondary = PurpleGrey40, + tertiary = Pink40 + + /* Other default colors to override + background = Color(0xFFFFFBFE), + surface = Color(0xFFFFFBFE), + onPrimary = Color.White, + onSecondary = Color.White, + onTertiary = Color.White, + onBackground = Color(0xFF1C1B1F), + onSurface = Color(0xFF1C1B1F), + */ +) + +@Composable +fun WhisperCppDemoTheme( + darkTheme: Boolean = isSystemInDarkTheme(), + // Dynamic color is available on Android 12+ + dynamicColor: Boolean = true, + content: @Composable () -> Unit +) { + val colorScheme = when { + dynamicColor && Build.VERSION.SDK_INT >= Build.VERSION_CODES.S -> { + val context = LocalContext.current + if (darkTheme) dynamicDarkColorScheme(context) else dynamicLightColorScheme(context) + } + darkTheme -> DarkColorScheme + else -> LightColorScheme + } + val view = LocalView.current + if (!view.isInEditMode) { + SideEffect { + (view.context as Activity).window.statusBarColor = colorScheme.primary.toArgb() + ViewCompat.getWindowInsetsController(view)?.isAppearanceLightStatusBars = darkTheme + } + } + + MaterialTheme( + colorScheme = colorScheme, + typography = Typography, + content = content + ) +} \ No newline at end of file diff --git a/examples/whisper.android/app/src/main/java/com/whispercppdemo/ui/theme/Type.kt b/examples/whisper.android/app/src/main/java/com/whispercppdemo/ui/theme/Type.kt new file mode 100644 index 0000000..5436fe2 --- /dev/null +++ b/examples/whisper.android/app/src/main/java/com/whispercppdemo/ui/theme/Type.kt @@ -0,0 +1,34 @@ +package com.whispercppdemo.ui.theme + +import androidx.compose.material3.Typography +import androidx.compose.ui.text.TextStyle +import androidx.compose.ui.text.font.FontFamily +import androidx.compose.ui.text.font.FontWeight +import androidx.compose.ui.unit.sp + +// Set of Material typography styles to start with +val Typography = Typography( + bodyLarge = TextStyle( + fontFamily = FontFamily.Default, + fontWeight = FontWeight.Normal, + fontSize = 16.sp, + lineHeight = 24.sp, + letterSpacing = 0.5.sp + ) + /* Other default text styles to override + titleLarge = TextStyle( + fontFamily = FontFamily.Default, + fontWeight = FontWeight.Normal, + fontSize = 22.sp, + lineHeight = 28.sp, + letterSpacing = 0.sp + ), + labelSmall = TextStyle( + fontFamily = FontFamily.Default, + fontWeight = FontWeight.Medium, + fontSize = 11.sp, + lineHeight = 16.sp, + letterSpacing = 0.5.sp + ) + */ +) \ No newline at end of file diff --git a/examples/whisper.android/app/src/main/java/com/whispercppdemo/whisper/LibWhisper.kt b/examples/whisper.android/app/src/main/java/com/whispercppdemo/whisper/LibWhisper.kt new file mode 100644 index 0000000..69acec1 --- /dev/null +++ b/examples/whisper.android/app/src/main/java/com/whispercppdemo/whisper/LibWhisper.kt @@ -0,0 +1,61 @@ +package com.whispercppdemo.whisper + +import kotlinx.coroutines.* +import java.util.concurrent.Executors + +class WhisperContext private constructor(private var ptr: Long) { + // Meet Whisper C++ constraint: Don't access from more than one thread at a time. + private val scope: CoroutineScope = CoroutineScope( + Executors.newSingleThreadExecutor().asCoroutineDispatcher() + ) + + suspend fun transcribeData(data: FloatArray): String = withContext(scope.coroutineContext) { + require(ptr != 0L) + WhisperLib.fullTranscribe(ptr, data) + val textCount = WhisperLib.getTextSegmentCount(ptr) + return@withContext buildString { + for (i in 0 until textCount) { + append(WhisperLib.getTextSegment(ptr, i)) + } + } + } + + suspend fun release() = withContext(scope.coroutineContext) { + if (ptr != 0L) { + WhisperLib.freeContext(ptr) + ptr = 0 + } + } + + protected fun finalize() { + runBlocking { + release() + } + } + + companion object { + fun createContext(filePath: String): WhisperContext { + val ptr = WhisperLib.initContext(filePath) + if (ptr == 0L) { + throw java.lang.RuntimeException("Couldn't create context with path $filePath") + } + return WhisperContext(ptr) + } + } +} + +private class WhisperLib { + companion object { + init { + System.loadLibrary("whisper") + } + + // JNI methods + external fun initContext(modelPath: String): Long + external fun freeContext(contextPtr: Long) + external fun fullTranscribe(contextPtr: Long, audioData: FloatArray) + external fun getTextSegmentCount(contextPtr: Long): Int + external fun getTextSegment(contextPtr: Long, index: Int): String + } +} + diff --git a/examples/whisper.android/app/src/main/jni/whisper/Android.mk b/examples/whisper.android/app/src/main/jni/whisper/Android.mk new file mode 100644 index 0000000..549cdcb --- /dev/null +++ b/examples/whisper.android/app/src/main/jni/whisper/Android.mk @@ -0,0 +1,22 @@ +LOCAL_PATH := $(call my-dir) +include $(CLEAR_VARS) +WHISPER_LIB_DIR := $(LOCAL_PATH)/../../../../../../../ +LOCAL_LDLIBS := -llog +LOCAL_MODULE := libwhisper + +# Make the final output library smaller by only keeping the symbols referenced from the app. +ifneq ($(APP_OPTIM),debug) + LOCAL_CFLAGS += -fvisibility=hidden -fvisibility-inlines-hidden + LOCAL_CFLAGS += -ffunction-sections -fdata-sections + LOCAL_LDFLAGS += -Wl,--gc-sections + LOCAL_LDFLAGS += -Wl,--exclude-libs,ALL + LOCAL_LDFLAGS += -flto +endif + +LOCAL_CFLAGS += -DSTDC_HEADERS -std=c11 -I $(WHISPER_LIB_DIR) +LOCAL_CPPFLAGS += -std=c++11 +LOCAL_SRC_FILES := $(WHISPER_LIB_DIR)/ggml.c \ + $(WHISPER_LIB_DIR)/whisper.cpp \ + $(LOCAL_PATH)/jni.c + +include $(BUILD_SHARED_LIBRARY) \ No newline at end of file diff --git a/examples/whisper.android/app/src/main/jni/whisper/Application.mk b/examples/whisper.android/app/src/main/jni/whisper/Application.mk new file mode 100644 index 0000000..067c76f --- /dev/null +++ b/examples/whisper.android/app/src/main/jni/whisper/Application.mk @@ -0,0 +1 @@ +APP_STL := c++_static \ No newline at end of file diff --git a/examples/whisper.android/app/src/main/jni/whisper/jni.c b/examples/whisper.android/app/src/main/jni/whisper/jni.c new file mode 100644 index 0000000..e3fe695 --- /dev/null +++ b/examples/whisper.android/app/src/main/jni/whisper/jni.c @@ -0,0 +1,93 @@ +#include +#include +#include +#include +#include "whisper.h" + +#define UNUSED(x) (void)(x) +#define TAG "JNI" + +#define LOGI(...) __android_log_print(ANDROID_LOG_INFO, TAG, __VA_ARGS__) + +static inline int min(int a, int b) { + return (a < b) ? a : b; +} + +static inline int max(int a, int b) { + return (a > b) ? a : b; +} + +JNIEXPORT jlong JNICALL +Java_com_whispercppdemo_whisper_WhisperLib_00024Companion_initContext( + JNIEnv *env, jobject thiz, jstring model_path_str) { + UNUSED(thiz); + struct whisper_context *context = NULL; + const char *model_path_chars = (*env)->GetStringUTFChars(env, model_path_str, NULL); + context = whisper_init(model_path_chars); + (*env)->ReleaseStringUTFChars(env, model_path_str, model_path_chars); + return (jlong) context; +} + +JNIEXPORT void JNICALL +Java_com_whispercppdemo_whisper_WhisperLib_00024Companion_freeContext( + JNIEnv *env, jobject thiz, jlong context_ptr) { + UNUSED(env); + UNUSED(thiz); + struct whisper_context *context = (struct whisper_context *) context_ptr; + whisper_free(context); +} + +JNIEXPORT void JNICALL +Java_com_whispercppdemo_whisper_WhisperLib_00024Companion_fullTranscribe( + JNIEnv *env, jobject thiz, jlong context_ptr, jfloatArray audio_data) { + UNUSED(thiz); + struct whisper_context *context = (struct whisper_context *) context_ptr; + jfloat *audio_data_arr = (*env)->GetFloatArrayElements(env, audio_data, NULL); + const jsize audio_data_length = (*env)->GetArrayLength(env, audio_data); + + // Leave 2 processors free (i.e. the high-efficiency cores). + int max_threads = max(1, min(8, get_nprocs() - 2)); + LOGI("Selecting %d threads", max_threads); + + // The below adapted from the Objective-C iOS sample + struct whisper_full_params params = whisper_full_default_params(WHISPER_SAMPLING_GREEDY); + params.print_realtime = true; + params.print_progress = false; + params.print_timestamps = true; + params.print_special = false; + params.translate = false; + params.language = "en"; + params.n_threads = max_threads; + params.offset_ms = 0; + params.no_context = true; + params.single_segment = false; + + whisper_reset_timings(context); + + LOGI("About to run whisper_full"); + if (whisper_full(context, params, audio_data_arr, audio_data_length) != 0) { + LOGI("Failed to run the model"); + } else { + whisper_print_timings(context); + } + (*env)->ReleaseFloatArrayElements(env, audio_data, audio_data_arr, JNI_ABORT); +} + +JNIEXPORT jint JNICALL +Java_com_whispercppdemo_whisper_WhisperLib_00024Companion_getTextSegmentCount( + JNIEnv *env, jobject thiz, jlong context_ptr) { + UNUSED(env); + UNUSED(thiz); + struct whisper_context *context = (struct whisper_context *) context_ptr; + return whisper_full_n_segments(context); +} + +JNIEXPORT jstring JNICALL +Java_com_whispercppdemo_whisper_WhisperLib_00024Companion_getTextSegment( + JNIEnv *env, jobject thiz, jlong context_ptr, jint index) { + UNUSED(thiz); + struct whisper_context *context = (struct whisper_context *) context_ptr; + const char *text = whisper_full_get_segment_text(context, index); + jstring string = (*env)->NewStringUTF(env, text); + return string; +} \ No newline at end of file diff --git a/examples/whisper.android/app/src/main/res/drawable/ic_launcher_background.xml b/examples/whisper.android/app/src/main/res/drawable/ic_launcher_background.xml new file mode 100644 index 0000000..07d5da9 --- /dev/null +++ b/examples/whisper.android/app/src/main/res/drawable/ic_launcher_background.xml @@ -0,0 +1,170 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/examples/whisper.android/app/src/main/res/drawable/ic_launcher_foreground.xml b/examples/whisper.android/app/src/main/res/drawable/ic_launcher_foreground.xml new file mode 100644 index 0000000..2b068d1 --- /dev/null +++ b/examples/whisper.android/app/src/main/res/drawable/ic_launcher_foreground.xml @@ -0,0 +1,30 @@ + + + + + + + + + + + \ No newline at end of file diff --git a/examples/whisper.android/app/src/main/res/mipmap-anydpi/ic_launcher.xml b/examples/whisper.android/app/src/main/res/mipmap-anydpi/ic_launcher.xml new file mode 100644 index 0000000..eca70cf --- /dev/null +++ b/examples/whisper.android/app/src/main/res/mipmap-anydpi/ic_launcher.xml @@ -0,0 +1,5 @@ + + + + + \ No newline at end of file diff --git a/examples/whisper.android/app/src/main/res/values/colors.xml b/examples/whisper.android/app/src/main/res/values/colors.xml new file mode 100644 index 0000000..f8c6127 --- /dev/null +++ b/examples/whisper.android/app/src/main/res/values/colors.xml @@ -0,0 +1,10 @@ + + + #FFBB86FC + #FF6200EE + #FF3700B3 + #FF03DAC5 + #FF018786 + #FF000000 + #FFFFFFFF + \ No newline at end of file diff --git a/examples/whisper.android/app/src/main/res/values/strings.xml b/examples/whisper.android/app/src/main/res/values/strings.xml new file mode 100644 index 0000000..40804ec --- /dev/null +++ b/examples/whisper.android/app/src/main/res/values/strings.xml @@ -0,0 +1,3 @@ + + WhisperCppDemo + \ No newline at end of file diff --git a/examples/whisper.android/app/src/main/res/values/themes.xml b/examples/whisper.android/app/src/main/res/values/themes.xml new file mode 100644 index 0000000..c16729f --- /dev/null +++ b/examples/whisper.android/app/src/main/res/values/themes.xml @@ -0,0 +1,5 @@ + + + +