Updated the AndroidManifest.xml to stop the default function exit call upon screen rotation and just have it remain inside the "startRecording" function.

ADded the punctuation sections to the code - but have not added the zip extraction to include this automatically - might ahve to add another upload manager specifically for this one file I think, as it is really messy to get your hands on through other means....
Updated to finally fix the double up text during the translastion at sometimes at the start of the second press of the microphone button
2026-01-26 15:38:34 +11:00 · 2026-01-23 12:52:24 +11:00 · 2026-01-23 12:24:56 +11:00 · 2026-01-23 10:53:03 +11:00
5 changed files with 113 additions and 92 deletions
--- a/.idea/deploymentTargetSelector.xml
+++ b/.idea/deploymentTargetSelector.xml
@@ -4,10 +4,10 @@
    <selectionStates>
      <SelectionState runConfigName="app">
        <option name="selectionMode" value="DROPDOWN" />
-        <DropdownSelection timestamp="2026-01-22T04:36:45.393638454Z">
+        <DropdownSelection timestamp="2026-01-23T01:29:57.710335816Z">
          <Target type="DEFAULT_BOOT">
            <handle>
-              <DeviceId pluginId="LocalEmulator" identifier="path=/home/michael/.android/avd/Pixel_5_API_31_Android_12_.avd" />
+              <DeviceId pluginId="PhysicalDevice" identifier="serial=DKTAB13NEU0019483" />
            </handle>
          </Target>
        </DropdownSelection>
--- a/app/build.gradle.kts
+++ b/app/build.gradle.kts
@@ -11,7 +11,7 @@ android {
        applicationId = "net.mmanningau.speechtokeyboard"
        minSdk = 28
        targetSdk = 36
-        versionCode = 10
+        versionCode = 12
        versionName = "1.1"
        testInstrumentationRunner = "androidx.test.runner.AndroidJUnitRunner"
@@ -25,6 +25,11 @@ android {
                "proguard-rules.pro"
            )
        }
        debug {
            applicationIdSuffix = ".streaming"
            // This changes the app name on your homescreen to "MyApp (Dev)"
            resValue("string", "app_name", "Speech To Keyboard (Streaming)")
        }
    }
    compileOptions {
        sourceCompatibility = JavaVersion.VERSION_11
--- a/app/src/main/AndroidManifest.xml
+++ b/app/src/main/AndroidManifest.xml
@@ -37,6 +37,8 @@
        <activity
            android:name=".TestModelActivity"
            android:parentActivityName=".MainActivity"
            android:exported="false"
            android:configChanges="orientation|screenSize|screenLayout|keyboardHidden"
            android:label="Test Microphone" />
    </application>
--- a/app/src/main/java/net/mmanningau/speechtokeyboard/MainActivity.kt
+++ b/app/src/main/java/net/mmanningau/speechtokeyboard/MainActivity.kt
@@ -89,7 +89,7 @@ class MainActivity : AppCompatActivity() {
                    var entry = zipInputStream.nextEntry
                    var foundEncoder = false
                    var foundDecoder = false
-                    // var foundJoiner = false - removed for true Whisper model use
+                    var foundJoiner = false
                    var foundTokens = false
                    while (entry != null) {
@@ -100,7 +100,7 @@ class MainActivity : AppCompatActivity() {
                        val targetFileName = when {
                            name.contains("encoder") && name.endsWith(".onnx") -> "encoder.onnx"
                            name.contains("decoder") && name.endsWith(".onnx") -> "decoder.onnx"
-                            // name.contains("joiner") && name.endsWith(".onnx") -> "joiner.onnx" - removed for true Whisper model use
+                            name.contains("joiner") && name.endsWith(".onnx") -> "joiner.onnx"
                            name.contains("tokens.txt") -> "tokens.txt"
                            else -> null
                        }
@@ -115,7 +115,7 @@ class MainActivity : AppCompatActivity() {
                            when (targetFileName) {
                                "encoder.onnx" -> foundEncoder = true
                                "decoder.onnx" -> foundDecoder = true
-                                // "joiner.onnx" -> foundJoiner = true = re,moved for true Whisper model use
+                                "joiner.onnx" -> foundJoiner = true
                                "tokens.txt" -> foundTokens = true
                            }
                        }
@@ -124,8 +124,7 @@ class MainActivity : AppCompatActivity() {
                    }
                    runOnUiThread {
-                        // if (foundEncoder && foundDecoder && foundJoiner && foundTokens) { - removed for true Whisper model use
+                        if (foundEncoder && foundDecoder && foundJoiner && foundTokens) {
                        if (foundEncoder && foundDecoder && foundTokens) {
                            statusText.text = "Model Installed Successfully!"
                            Toast.makeText(this, "Ready to use!", Toast.LENGTH_SHORT).show()
                        } else {
--- a/app/src/main/java/net/mmanningau/speechtokeyboard/TestModelActivity.kt
+++ b/app/src/main/java/net/mmanningau/speechtokeyboard/TestModelActivity.kt
@@ -20,25 +20,17 @@ import com.hoho.android.usbserial.driver.UsbSerialProber
 import com.hoho.android.usbserial.util.SerialInputOutputManager
 import com.k2fsa.sherpa.onnx.EndpointConfig
 import com.k2fsa.sherpa.onnx.EndpointRule
 /*
 import com.k2fsa.sherpa.onnx.FeatureConfig
 import com.k2fsa.sherpa.onnx.OnlineModelConfig
 import com.k2fsa.sherpa.onnx.OnlineRecognizer
 import com.k2fsa.sherpa.onnx.OnlineRecognizerConfig
 import com.k2fsa.sherpa.onnx.OnlineTransducerModelConfig
 import com.k2fsa.sherpa.onnx.OnlineStream
 */
 // Below for the "offline" libraries and the true Whisper integration
 import com.k2fsa.sherpa.onnx.OfflineRecognizer
 import com.k2fsa.sherpa.onnx.OfflineStream
 import com.k2fsa.sherpa.onnx.OfflineRecognizerConfig
 import com.k2fsa.sherpa.onnx.OfflineModelConfig
 import com.k2fsa.sherpa.onnx.OfflineWhisperModelConfig
 import com.k2fsa.sherpa.onnx.FeatureConfig
 import java.io.File
 import com.k2fsa.sherpa.onnx.OfflinePunctuation
 import com.k2fsa.sherpa.onnx.OfflinePunctuationConfig
 import com.k2fsa.sherpa.onnx.OfflinePunctuationModelConfig
 class TestModelActivity : AppCompatActivity() {
    // UI Components
@@ -46,13 +38,14 @@ class TestModelActivity : AppCompatActivity() {
    private lateinit var micButton: ImageButton
    // Sherpa (Whisper) Components
-    // private var recognizer: OnlineRecognizer? = null // - Removed for true Whisper model usa
+    private var recognizer: OnlineRecognizer? = null
-    // private var stream: OnlineStream? = null // - Removed for true Whisper model usa
+    private var stream: OnlineStream? = null
    private var recognizer: OfflineRecognizer? = null // Was OnlineRecognizer
    private var stream: OfflineStream? = null         // Was OnlineStream
    private var isRecording = false
    private var recordingThread: Thread? = null
    // Punctuation variables
    private var punctuator: OfflinePunctuation? = null
    // USB Components
    private var usbPort: UsbSerialPort? = null
@@ -90,37 +83,63 @@ class TestModelActivity : AppCompatActivity() {
            return
        }
        // 1. Point to your files
        val encoderPath = File(modelDir, "encoder.onnx").absolutePath
        val decoderPath = File(modelDir, "decoder.onnx").absolutePath
        val tokensPath = File(modelDir, "tokens.txt").absolutePath
        try {
-            // CONFIGURATION FOR WHISPER (OFFLINE)
+            // 1. Define Model Paths
-            val config = OfflineRecognizerConfig(
+            val transducerConfig = OnlineTransducerModelConfig(
                encoder = File(modelDir, "encoder.onnx").absolutePath,
                decoder = File(modelDir, "decoder.onnx").absolutePath,
                joiner = File(modelDir, "joiner.onnx").absolutePath
            )
            // 2. Define General Config
            val onlineModelConfig = com.k2fsa.sherpa.onnx.OnlineModelConfig(
                transducer = transducerConfig,
                tokens = File(modelDir, "tokens.txt").absolutePath,
                numThreads = 1,
                debug = false,
                modelType = "zipformer"
            )
            // 3. Define Endpoint Rule (The fix for your error)
            // rule1 = detected silence after speech. We set this to 2.4 seconds.
            val silenceRule = EndpointRule(
                mustContainNonSilence = false,
                minTrailingSilence = 2.4f,
                minUtteranceLength = 0.0f
            )
            // 4. Create Recognizer Config
            val config = OnlineRecognizerConfig(
                featConfig = FeatureConfig(sampleRate = 16000, featureDim = 80),
-                modelConfig = OfflineModelConfig(
+                modelConfig = onlineModelConfig,
-                    // This parameter 'whisper' exists here!
+                endpointConfig = EndpointConfig(rule1 = silenceRule), // Pass the rule object here
-                    whisper = OfflineWhisperModelConfig(
+                enableEndpoint = true,
                        encoder = encoderPath,
                        decoder = decoderPath,
                        // tokenizer is not strictly needed in config here if passed in tokens param below
                        // but usually standard offline config uses just these two:
                    ),
                    tokens = tokensPath,
                    modelType = "whisper",
                    debug = false,
                    numThreads = 1
                ),
                decodingMethod = "greedy_search",
                maxActivePaths = 4
            )
-            // Initialize OFFLINE Engine
+            // recognizer = OnlineRecognizer(assetManager = assets, config = config)
-            recognizer = OfflineRecognizer(config = config)
+            recognizer = OnlineRecognizer(config = config)
            stream = recognizer?.createStream()
-            outputText.text = "Whisper Engine Ready."
+            outputText.text = "Engine Loaded. Ready to Stream."
            // ... existing recognizer init code ...
 // 5. Initialize Punctuation Engine
            val punctPath = File(modelDir, "punct_model.onnx").absolutePath
            if (File(punctPath).exists()) {
                // CORRECTED: Wrap the path inside 'OfflinePunctuationModelConfig'
                val punctConfig = OfflinePunctuationConfig(
                    model = OfflinePunctuationModelConfig(ctTransformer = punctPath)
                )
                punctuator = OfflinePunctuation(config = punctConfig)
                outputText.append("\n+ Punctuation Ready")
            } else {
                outputText.append("\n(No Punctuation model found)")
            }
        } catch (e: Exception) {
            Log.e("Sherpa", "Init Error", e)
@@ -145,9 +164,12 @@ class TestModelActivity : AppCompatActivity() {
            return
        }
-        // Reset the stream for a new session
+        // FIX 1: CLEAR THE BUFFER
-        // Note: Sherpa streams can be persistent, but resetting ensures clean start
+        // This prevents the "ghost text" from the previous session appearing
-        // If you want continuous conversation, don't reset 'committedText'
+        // when you hit record again.
        stream?.let { activeStream ->
            recognizer?.reset(activeStream)
        }
        isRecording = true
        micButton.setColorFilter(android.graphics.Color.RED)
@@ -161,46 +183,19 @@ class TestModelActivity : AppCompatActivity() {
    private fun stopRecording() {
        isRecording = false
-        try {
+        recordingThread?.join()
            recordingThread?.join() // Wait for loop to finish
        } catch (e: InterruptedException) {
            // Handle interruption if necessary
        }
        micButton.clearColorFilter()
-        // FIX: Safely unwrap 'stream' before passing it to getResult
+        // Just show what we have, don't overwrite with "[Stopped]"
-        // This reads: "If stream is NOT null, call getResult. Otherwise return empty string."
+        // to prevent visual jarring.
-        val finalCurrentText = stream?.let { activeStream ->
+        outputText.append("\n[Stopped]")
            recognizer?.getResult(activeStream)?.text
        } ?: ""
        val cleanFinal = finalCurrentText.lowercase()
        if (cleanFinal.isNotEmpty()) {
            // 1. Commit to history
            committedText += "$cleanFinal "
            // 2. Send to Pico
            sendToPico("$cleanFinal ")
            // 3. Update UI
            outputText.text = "$committedText \n[Stopped]"
            // 4. Reset for next time
            // We release the old stream and create a fresh one for the next sentence
            stream?.release()
            stream = recognizer?.createStream()
        } else {
            outputText.append("\n[Stopped - No Text]")
        }
    }
    private fun processAudioLoop() {
        val sampleRate = 16000
        val bufferSize = AudioRecord.getMinBufferSize(sampleRate, AudioFormat.CHANNEL_IN_MONO, AudioFormat.ENCODING_PCM_16BIT)
-        // 1. GUARD CLAUSE (Safely unwrap nullables)
+        // Guard clauses
        val localRec = recognizer ?: return
        val localStream = stream ?: return
@@ -218,24 +213,44 @@ class TestModelActivity : AppCompatActivity() {
            if (ret > 0) {
                val samples = FloatArray(ret) { buffer[it] / 32768.0f }
                // 2. Feed Audio
                localStream.acceptWaveform(samples, sampleRate)
-                // 3. Decode (No isReady check needed for Offline)
+                while (localRec.isReady(localStream)) {
-                localRec.decode(localStream)
+                    localRec.decode(localStream)
                }
                // 4. Get Current Text
                // Whisper updates this string constantly as it hears more
                val text = localRec.getResult(localStream).text
                val isEndpoint = localRec.isEndpoint(localStream)
                if (text.isNotEmpty()) {
                    val cleanText = text.lowercase()
-                    runOnUiThread {
+                    if (isEndpoint) {
-                        // Update the screen so user sees what is happening
+                        // FIX 2: THE ORDER OF OPERATIONS
-                        // We do NOT send to USB yet, because Whisper might change this text
+
-                        // as you keep speaking.
+                        // A. Update UI first
-                        outputText.text = "$committedText $cleanText"
+                        // 1. PUNCTUATE
                        // We pass the raw text to the punctuator
                        val punctuatedText = punctuator?.addPunctuation(cleanText) ?: cleanText
                        runOnUiThread {
                            // 2. Commit the BEAUTIFUL text
                            committedText += "$punctuatedText "
                            outputText.text = committedText
                            sendToPico("$punctuatedText ")
                        }
                        // B. RESET IMMEDIATELY ON BACKGROUND THREAD
                        // We do this HERE, not inside runOnUiThread.
                        // This guarantees the stream is clean BEFORE the loop
                        // reads the next chunk of audio.
                        localRec.reset(localStream)
                    } else {
                        // Standard partial update
                        runOnUiThread {
                            outputText.text = "$committedText $cleanText"
                        }
                    }
                }
            }
Author	SHA1	Message	Date
mmanningau	2a8f004916	Updated the AndroidManifest.xml to stop the default function exit call upon screen rotation and just have it remain inside the "startRecording" function.	2026-01-26 15:38:34 +11:00
mmanningau	ac7d51b46e	ADded the punctuation sections to the code - but have not added the zip extraction to include this automatically - might ahve to add another upload manager specifically for this one file I think, as it is really messy to get your hands on through other means....	2026-01-23 12:52:24 +11:00
mmanningau	f17c6ab84e	Updated to finally fix the double up text during the translastion at sometimes at the start of the second press of the microphone button	2026-01-23 12:24:56 +11:00
mmanningau	cce093db4e	Added icon for Android display	2026-01-23 10:53:03 +11:00