Updated to finally fix the double up text during the translastion at sometimes at the start of the second press of the microphone button

Added icon for Android display
2026-01-23 12:24:56 +11:00 · 2026-01-23 10:53:03 +11:00
3 changed files with 80 additions and 90 deletions
--- a/app/build.gradle.kts
+++ b/app/build.gradle.kts
@@ -12,7 +12,7 @@ android {
        minSdk = 28
        targetSdk = 36
        versionCode = 10
-        versionName = "1.1"
+        versionName = "1.0"
        testInstrumentationRunner = "androidx.test.runner.AndroidJUnitRunner"
    }
@@ -25,6 +25,11 @@ android {
                "proguard-rules.pro"
            )
        }
        debug {
            applicationIdSuffix = ".streaming"
            // This changes the app name on your homescreen to "MyApp (Dev)"
            resValue("string", "app_name", "Speech To Keyboard (Streaming)")
        }
    }
    compileOptions {
        sourceCompatibility = JavaVersion.VERSION_11
--- a/app/src/main/java/net/mmanningau/speechtokeyboard/MainActivity.kt
+++ b/app/src/main/java/net/mmanningau/speechtokeyboard/MainActivity.kt
@@ -89,7 +89,7 @@ class MainActivity : AppCompatActivity() {
                    var entry = zipInputStream.nextEntry
                    var foundEncoder = false
                    var foundDecoder = false
-                    // var foundJoiner = false - removed for true Whisper model use
+                    var foundJoiner = false
                    var foundTokens = false
                    while (entry != null) {
@@ -100,7 +100,7 @@ class MainActivity : AppCompatActivity() {
                        val targetFileName = when {
                            name.contains("encoder") && name.endsWith(".onnx") -> "encoder.onnx"
                            name.contains("decoder") && name.endsWith(".onnx") -> "decoder.onnx"
-                            // name.contains("joiner") && name.endsWith(".onnx") -> "joiner.onnx" - removed for true Whisper model use
+                            name.contains("joiner") && name.endsWith(".onnx") -> "joiner.onnx"
                            name.contains("tokens.txt") -> "tokens.txt"
                            else -> null
                        }
@@ -115,7 +115,7 @@ class MainActivity : AppCompatActivity() {
                            when (targetFileName) {
                                "encoder.onnx" -> foundEncoder = true
                                "decoder.onnx" -> foundDecoder = true
-                                // "joiner.onnx" -> foundJoiner = true = re,moved for true Whisper model use
+                                "joiner.onnx" -> foundJoiner = true
                                "tokens.txt" -> foundTokens = true
                            }
                        }
@@ -124,8 +124,7 @@ class MainActivity : AppCompatActivity() {
                    }
                    runOnUiThread {
-                        // if (foundEncoder && foundDecoder && foundJoiner && foundTokens) { - removed for true Whisper model use
+                        if (foundEncoder && foundDecoder && foundJoiner && foundTokens) {
                        if (foundEncoder && foundDecoder && foundTokens) {
                            statusText.text = "Model Installed Successfully!"
                            Toast.makeText(this, "Ready to use!", Toast.LENGTH_SHORT).show()
                        } else {
--- a/app/src/main/java/net/mmanningau/speechtokeyboard/TestModelActivity.kt
+++ b/app/src/main/java/net/mmanningau/speechtokeyboard/TestModelActivity.kt
@@ -20,23 +20,11 @@ import com.hoho.android.usbserial.driver.UsbSerialProber
 import com.hoho.android.usbserial.util.SerialInputOutputManager
 import com.k2fsa.sherpa.onnx.EndpointConfig
 import com.k2fsa.sherpa.onnx.EndpointRule
 /*
 import com.k2fsa.sherpa.onnx.FeatureConfig
 import com.k2fsa.sherpa.onnx.OnlineModelConfig
 import com.k2fsa.sherpa.onnx.OnlineRecognizer
 import com.k2fsa.sherpa.onnx.OnlineRecognizerConfig
 import com.k2fsa.sherpa.onnx.OnlineTransducerModelConfig
 import com.k2fsa.sherpa.onnx.OnlineStream
 */
 // Below for the "offline" libraries and the true Whisper integration
 import com.k2fsa.sherpa.onnx.OfflineRecognizer
 import com.k2fsa.sherpa.onnx.OfflineStream
 import com.k2fsa.sherpa.onnx.OfflineRecognizerConfig
 import com.k2fsa.sherpa.onnx.OfflineModelConfig
 import com.k2fsa.sherpa.onnx.OfflineWhisperModelConfig
 import com.k2fsa.sherpa.onnx.FeatureConfig
 import java.io.File
 class TestModelActivity : AppCompatActivity() {
@@ -46,10 +34,8 @@ class TestModelActivity : AppCompatActivity() {
    private lateinit var micButton: ImageButton
    // Sherpa (Whisper) Components
-    // private var recognizer: OnlineRecognizer? = null // - Removed for true Whisper model usa
+    private var recognizer: OnlineRecognizer? = null
-    // private var stream: OnlineStream? = null // - Removed for true Whisper model usa
+    private var stream: OnlineStream? = null
    private var recognizer: OfflineRecognizer? = null // Was OnlineRecognizer
    private var stream: OfflineStream? = null         // Was OnlineStream
    private var isRecording = false
    private var recordingThread: Thread? = null
@@ -90,37 +76,46 @@ class TestModelActivity : AppCompatActivity() {
            return
        }
        // 1. Point to your files
        val encoderPath = File(modelDir, "encoder.onnx").absolutePath
        val decoderPath = File(modelDir, "decoder.onnx").absolutePath
        val tokensPath = File(modelDir, "tokens.txt").absolutePath
        try {
-            // CONFIGURATION FOR WHISPER (OFFLINE)
+            // 1. Define Model Paths
-            val config = OfflineRecognizerConfig(
+            val transducerConfig = OnlineTransducerModelConfig(
-                featConfig = FeatureConfig(sampleRate = 16000, featureDim = 80),
+                encoder = File(modelDir, "encoder.onnx").absolutePath,
-                modelConfig = OfflineModelConfig(
+                decoder = File(modelDir, "decoder.onnx").absolutePath,
-                    // This parameter 'whisper' exists here!
+                joiner = File(modelDir, "joiner.onnx").absolutePath
-                    whisper = OfflineWhisperModelConfig(
+            )
-                        encoder = encoderPath,
+
-                        decoder = decoderPath,
+            // 2. Define General Config
-                        // tokenizer is not strictly needed in config here if passed in tokens param below
+            val onlineModelConfig = com.k2fsa.sherpa.onnx.OnlineModelConfig(
-                        // but usually standard offline config uses just these two:
+                transducer = transducerConfig,
-                    ),
+                tokens = File(modelDir, "tokens.txt").absolutePath,
-                    tokens = tokensPath,
+                numThreads = 1,
                    modelType = "whisper",
                debug = false,
-                    numThreads = 1
+                modelType = "zipformer"
-                ),
+            )
            // 3. Define Endpoint Rule (The fix for your error)
            // rule1 = detected silence after speech. We set this to 2.4 seconds.
            val silenceRule = EndpointRule(
                mustContainNonSilence = false,
                minTrailingSilence = 2.4f,
                minUtteranceLength = 0.0f
            )
            // 4. Create Recognizer Config
            val config = OnlineRecognizerConfig(
                featConfig = FeatureConfig(sampleRate = 16000, featureDim = 80),
                modelConfig = onlineModelConfig,
                endpointConfig = EndpointConfig(rule1 = silenceRule), // Pass the rule object here
                enableEndpoint = true,
                decodingMethod = "greedy_search",
                maxActivePaths = 4
            )
-            // Initialize OFFLINE Engine
+            // recognizer = OnlineRecognizer(assetManager = assets, config = config)
-            recognizer = OfflineRecognizer(config = config)
+            recognizer = OnlineRecognizer(config = config)
            stream = recognizer?.createStream()
-            outputText.text = "Whisper Engine Ready."
+            outputText.text = "Engine Loaded. Ready to Stream."
        } catch (e: Exception) {
            Log.e("Sherpa", "Init Error", e)
@@ -145,9 +140,12 @@ class TestModelActivity : AppCompatActivity() {
            return
        }
-        // Reset the stream for a new session
+        // FIX 1: CLEAR THE BUFFER
-        // Note: Sherpa streams can be persistent, but resetting ensures clean start
+        // This prevents the "ghost text" from the previous session appearing
-        // If you want continuous conversation, don't reset 'committedText'
+        // when you hit record again.
        stream?.let { activeStream ->
            recognizer?.reset(activeStream)
        }
        isRecording = true
        micButton.setColorFilter(android.graphics.Color.RED)
@@ -161,46 +159,19 @@ class TestModelActivity : AppCompatActivity() {
    private fun stopRecording() {
        isRecording = false
-        try {
+        recordingThread?.join()
            recordingThread?.join() // Wait for loop to finish
        } catch (e: InterruptedException) {
            // Handle interruption if necessary
        }
        micButton.clearColorFilter()
-        // FIX: Safely unwrap 'stream' before passing it to getResult
+        // Just show what we have, don't overwrite with "[Stopped]"
-        // This reads: "If stream is NOT null, call getResult. Otherwise return empty string."
+        // to prevent visual jarring.
-        val finalCurrentText = stream?.let { activeStream ->
+        outputText.append("\n[Stopped]")
            recognizer?.getResult(activeStream)?.text
        } ?: ""
        val cleanFinal = finalCurrentText.lowercase()
        if (cleanFinal.isNotEmpty()) {
            // 1. Commit to history
            committedText += "$cleanFinal "
            // 2. Send to Pico
            sendToPico("$cleanFinal ")
            // 3. Update UI
            outputText.text = "$committedText \n[Stopped]"
            // 4. Reset for next time
            // We release the old stream and create a fresh one for the next sentence
            stream?.release()
            stream = recognizer?.createStream()
        } else {
            outputText.append("\n[Stopped - No Text]")
        }
    }
    private fun processAudioLoop() {
        val sampleRate = 16000
        val bufferSize = AudioRecord.getMinBufferSize(sampleRate, AudioFormat.CHANNEL_IN_MONO, AudioFormat.ENCODING_PCM_16BIT)
-        // 1. GUARD CLAUSE (Safely unwrap nullables)
+        // Guard clauses
        val localRec = recognizer ?: return
        val localStream = stream ?: return
@@ -218,28 +189,43 @@ class TestModelActivity : AppCompatActivity() {
            if (ret > 0) {
                val samples = FloatArray(ret) { buffer[it] / 32768.0f }
                // 2. Feed Audio
                localStream.acceptWaveform(samples, sampleRate)
-                // 3. Decode (No isReady check needed for Offline)
+                while (localRec.isReady(localStream)) {
                    localRec.decode(localStream)
                }
                // 4. Get Current Text
                // Whisper updates this string constantly as it hears more
                val text = localRec.getResult(localStream).text
                val isEndpoint = localRec.isEndpoint(localStream)
                if (text.isNotEmpty()) {
                    val cleanText = text.lowercase()
                    if (isEndpoint) {
                        // FIX 2: THE ORDER OF OPERATIONS
                        // A. Update UI first
                        runOnUiThread {
                            committedText += "$cleanText "
                            outputText.text = committedText
                            sendToPico("$cleanText ")
                        }
                        // B. RESET IMMEDIATELY ON BACKGROUND THREAD
                        // We do this HERE, not inside runOnUiThread.
                        // This guarantees the stream is clean BEFORE the loop
                        // reads the next chunk of audio.
                        localRec.reset(localStream)
                    } else {
                        // Standard partial update
                        runOnUiThread {
                        // Update the screen so user sees what is happening
                        // We do NOT send to USB yet, because Whisper might change this text
                        // as you keep speaking.
                            outputText.text = "$committedText $cleanText"
                        }
                    }
                }
            }
        }
        record.stop()
        record.release()
    }
Author	SHA1	Message	Date
mmanningau	f17c6ab84e	Updated to finally fix the double up text during the translastion at sometimes at the start of the second press of the microphone button	2026-01-23 12:24:56 +11:00
mmanningau	cce093db4e	Added icon for Android display	2026-01-23 10:53:03 +11:00