Initial steps to migrate to the whisper/sherpa setup..

Still missing majot changes to the TestModelActivity.kt file which are coming next...
2026-01-22 19:49:48 +11:00
parent 12c0508713
commit 404bc55ed3
4 changed files with 115 additions and 28 deletions
--- a/app/build.gradle.kts
+++ b/app/build.gradle.kts
@@ -47,12 +47,17 @@ dependencies {
    androidTestImplementation(libs.androidx.espresso.core)
    // 1. The "Brain": Vosk Offline Speech Recognition
-    implementation("com.alphacephei:vosk-android:0.3.47")
+    //implementation("com.alphacephei:vosk-android:0.3.47") - removed as part of the migration to whisper
    // (Optional) Helper for memory management if needed later
    // Removed the following as it was listed as optional and it did cause errors -
    // so to avoid a whole list of duplicate class found errors - this is already required via the VOSK libraries
    // implementation("net.java.dev.jna:jna:5.13.0")
    // New Whisper include...
    // implementation("com.k2fsa.sherpa.onnx:sherpa-onnx:1.12.23") // The engine
    implementation("com.github.k2-fsa:sherpa-onnx:v1.12.23")
    // 2. The "Mouth": USB Serial Driver for Android
    implementation("com.github.mik3y:usb-serial-for-android:3.7.0")
 }
--- a/app/src/main/java/net/mmanningau/speechtokeyboard/MainActivity.kt
+++ b/app/src/main/java/net/mmanningau/speechtokeyboard/MainActivity.kt
@@ -9,8 +9,8 @@ import android.widget.TextView
 import android.widget.Toast
 import androidx.activity.result.contract.ActivityResultContracts
 import androidx.appcompat.app.AppCompatActivity
-import org.vosk.Model
+// import org.vosk.Model
-import org.vosk.android.SpeechService
+// import org.vosk.android.SpeechService    --- removed as part of migratoin to whisper.cpp
 import java.io.File
 import java.util.zip.ZipInputStream
@@ -19,9 +19,9 @@ class MainActivity : AppCompatActivity() {
    // UI Components
    private lateinit var statusText: TextView
-    // Vosk Components
+    // Vosk Components - removed as part of whisper migration
-    private var model: Model? = null
+    // private var model: Model? = null
-    private var speechService: SpeechService? = null
+    // private var speechService: SpeechService? = null
    // 1. THE FILE PICKER REGISTRY
    // This handles the result when the user picks a ZIP file
@@ -43,7 +43,7 @@ class MainActivity : AppCompatActivity() {
        // ADD THIS LINE AT THE BOTTOM:
        // This attempts to load the model immediately if files exist
-        initVoskModel()
+        // initVoskModel() - removed as part of whisper migration
    }
    // 2. SETUP THE MENU
@@ -113,7 +113,7 @@ class MainActivity : AppCompatActivity() {
                // Back to UI Thread to say success
                runOnUiThread {
                    statusText.text = "Model Installed! Initializing..."
-                    initVoskModel()
+                    //  initVoskModel() - removed as part of the whisper migration
                }
            } catch (e: Exception) {
@@ -127,7 +127,9 @@ class MainActivity : AppCompatActivity() {
    // 6. INITIALIZE VOSK "BRAIN"
    // Replace your existing initVoskModel with this updated version
    /*
    private fun initVoskModel() {
        val modelPath = File(filesDir, "vosk-model")
        // Check if the directory exists before trying to load
@@ -146,4 +148,5 @@ class MainActivity : AppCompatActivity() {
            statusText.text = "Error loading saved model: ${e.message}"
        }
    }
 */
 }
--- a/app/src/main/java/net/mmanningau/speechtokeyboard/TestModelActivity.kt
+++ b/app/src/main/java/net/mmanningau/speechtokeyboard/TestModelActivity.kt
@@ -10,10 +10,14 @@ import androidx.appcompat.app.AppCompatActivity
 import androidx.core.app.ActivityCompat
 import androidx.core.content.ContextCompat
 import org.json.JSONObject
-import org.vosk.Model
+
-import org.vosk.Recognizer
+import com.k2fsa.sherpa.onnx.*  // import for whisper sherpa wrapper
-import org.vosk.android.RecognitionListener
+
-import org.vosk.android.SpeechService
+// import org.vosk.Model  --- migration to whisper removals
 //  import org.vosk.Recognizer
 // import org.vosk.android.RecognitionListener
 // import org.vosk.android.SpeechService
 import java.io.File
 import android.content.Context
@@ -22,14 +26,20 @@ import com.hoho.android.usbserial.driver.UsbSerialPort
 import com.hoho.android.usbserial.driver.UsbSerialProber
 import com.hoho.android.usbserial.util.SerialInputOutputManager
-class TestModelActivity : AppCompatActivity(), RecognitionListener {
+// class TestModelActivity : AppCompatActivity(), RecognitionListener {
 class TestModelActivity : AppCompatActivity() {
    private lateinit var outputText: TextView
    private lateinit var micButton: ImageButton
-    // Vosk Components
+    // Whisper/Sherpa wrapper setup variables here
-    private var model: Model? = null
+    private var audioRecorder: AudioRecorder? = null // You'll need a new recorder helper
-    private var speechService: SpeechService? = null
+    private var recognizer: OnlineRecognizer? = null
    private var stream: OnlineStream? = null
    // Vosk Components - now removed as whisper migration
    // private var model: Model? = null
    // private var speechService: SpeechService? = null
    private var isListening = false
    // USB Components
@@ -94,7 +104,81 @@ class TestModelActivity : AppCompatActivity(), RecognitionListener {
        }
    }
    //Whisper/Sherpa implementation functions here....
    private fun initSherpaModel() {
        // Sherpa requires specific configuration
        val config = OnlineRecognizerConfig(
            featConfig = FeatureConfig(sampleRate = 16000.0f, featureDim = 80),
            transducerModelConfig = OnlineTransducerModelConfig(
                encoder = "$filesDir/encoder-epoch-99-avg-1.onnx", // Example path
                decoder = "$filesDir/decoder-epoch-99-avg-1.onnx",
                joiner = "$filesDir/joiner-epoch-99-avg-1.onnx",
            ),
            enableEndpoint = true, // Detects when you stop speaking
            ruleFsts = "",
            decodingMethod = "greedy_search",
            maxActivePaths = 4
        )
        try {
            recognizer = OnlineRecognizer(assetManager = assets, config = config)
            stream = recognizer?.createStream()
            outputText.text = "Whisper/Sherpa Ready!"
        } catch (e: Exception) {
            outputText.text = "Error: ${e.message}"
        }
    }
    private fun startRecordingLoop() {
        val sampleRate = 16000
        val bufferSize = AudioRecord.getMinBufferSize(sampleRate, AudioFormat.CHANNEL_IN_MONO, AudioFormat.ENCODING_PCM_16BIT)
        val record = AudioRecord(MediaRecorder.AudioSource.MIC, sampleRate, AudioFormat.CHANNEL_IN_MONO, AudioFormat.ENCODING_PCM_16BIT, bufferSize)
        record.startRecording()
        isListening = true
        Thread {
            val buffer = ShortArray(bufferSize / 2)
            while (isListening) {
                val read = record.read(buffer, 0, buffer.size)
                if (read > 0) {
                    // 1. Feed audio to engine
                    val floatSamples = FloatArray(read) { buffer[it] / 32768.0f } // Normalize
                    stream?.acceptWaveform(floatSamples, sampleRate)
                    // 2. Decode
                    while (recognizer?.isReady(stream) == true) {
                        recognizer?.decode(stream)
                    }
                    // 3. Get Result
                    val result = recognizer?.getResult(stream)
                    val text = result?.text ?: ""
                    if (text.isNotEmpty()) {
                        // Update UI
                        runOnUiThread {
                            // Sherpa returns the FULL string so far, not just chunks
                            // So we just overwrite the "Current" view
                            val cleanText = text.lowercase()
                            outputText.text = cleanText
                            // Check if sentence is "Final" (Endpoint detected)
                            if (recognizer?.isEndpoint(stream) == true) {
                                sendToPico(cleanText)
                                recognizer?.reset(stream) // Clear buffer for next sentence
                            }
                        }
                    }
                }
            }
            record.stop()
            record.release()
        }.start()
    }
    /*      ---- removed as part of the whisper migration
     private fun initModel() {
        // We look for the folder inside private storage (same logic as MainActivity)
        val modelPath = File(filesDir, "vosk-model")
@@ -121,7 +205,7 @@ class TestModelActivity : AppCompatActivity(), RecognitionListener {
            }
        }.start()
    }
-
+*/
    private fun sendToPico(text: String) {
        if (usbPort == null) return // Safety check
@@ -173,6 +257,7 @@ class TestModelActivity : AppCompatActivity(), RecognitionListener {
    }
    // --- Vosk Listener Callbacks ---
 /* removed as part of migration to whisper
    override fun onResult(hypothesis: String?) {
        hypothesis?.let {
@@ -205,6 +290,7 @@ class TestModelActivity : AppCompatActivity(), RecognitionListener {
            }
        }
    }
 */
    override fun onFinalResult(hypothesis: String?) {
        // Final flush when stopping
@@ -215,7 +301,7 @@ class TestModelActivity : AppCompatActivity(), RecognitionListener {
            }
        }
    }
-
+/* Whispoer migration removals
    override fun onError(exception: Exception?) {
        outputText.append("\nError: ${exception?.message}")
    }
@@ -224,15 +310,7 @@ class TestModelActivity : AppCompatActivity(), RecognitionListener {
        outputText.append("\nTimeout.")
    }
-    // Helper to clean JSON: {"text": "hello world"} -> "hello world"
+*/
    private fun parseVoskResult(json: String): String {
        return try {
            JSONObject(json).optString("text", "")
        } catch (e: Exception) {
            ""
        }
    }
    // Permission Helper
    private fun checkAudioPermission() {
        if (ContextCompat.checkSelfPermission(this, Manifest.permission.RECORD_AUDIO) != PackageManager.PERMISSION_GRANTED) {
--- a/gradle/libs.versions.toml
+++ b/gradle/libs.versions.toml
@@ -10,6 +10,7 @@ material = "1.13.0"
 activity = "1.12.2"
 constraintlayout = "2.2.1"
 [libraries]
 androidx-core-ktx = { group = "androidx.core", name = "core-ktx", version.ref = "coreKtx" }
 junit = { group = "junit", name = "junit", version.ref = "junit" }