Updated the AndroidManifest.xml to stop the default function exit call upon screen rotation and just have it remain inside the "startRecording" function.

ADded the punctuation sections to the code - but have not added the zip extraction to include this automatically - might ahve to add another upload manager specifically for this one file I think, as it is really messy to get your hands on through other means....
Updated to finally fix the double up text during the translastion at sometimes at the start of the second press of the microphone button
2026-01-26 15:38:34 +11:00 · 2026-01-23 12:52:24 +11:00 · 2026-01-23 12:24:56 +11:00 · 2026-01-23 10:53:03 +11:00
5 changed files with 113 additions and 92 deletions
--- a/.idea/deploymentTargetSelector.xml
+++ b/.idea/deploymentTargetSelector.xml
@@ -4,10 +4,10 @@
    <selectionStates>
      <SelectionState runConfigName="app">
        <option name="selectionMode" value="DROPDOWN" />
-        <DropdownSelection timestamp="2026-01-22T04:36:45.393638454Z">
+        <DropdownSelection timestamp="2026-01-23T01:29:57.710335816Z">
          <Target type="DEFAULT_BOOT">
            <handle>
-              <DeviceId pluginId="LocalEmulator" identifier="path=/home/michael/.android/avd/Pixel_5_API_31_Android_12_.avd" />
+              <DeviceId pluginId="PhysicalDevice" identifier="serial=DKTAB13NEU0019483" />
            </handle>
          </Target>
        </DropdownSelection>
--- a/app/build.gradle.kts
+++ b/app/build.gradle.kts
@@ -11,7 +11,7 @@ android {
        applicationId = "net.mmanningau.speechtokeyboard"
        minSdk = 28
        targetSdk = 36
-        versionCode = 10
+        versionCode = 12
        versionName = "1.1"

        testInstrumentationRunner = "androidx.test.runner.AndroidJUnitRunner"
@@ -25,6 +25,11 @@ android {
                "proguard-rules.pro"
            )
        }
+        debug {
+            applicationIdSuffix = ".streaming"
+            // This changes the app name on your homescreen to "MyApp (Dev)"
+            resValue("string", "app_name", "Speech To Keyboard (Streaming)")
+        }
    }
    compileOptions {
        sourceCompatibility = JavaVersion.VERSION_11
--- a/app/src/main/AndroidManifest.xml
+++ b/app/src/main/AndroidManifest.xml
@@ -37,6 +37,8 @@
        <activity
            android:name=".TestModelActivity"
            android:parentActivityName=".MainActivity"
+            android:exported="false"
+            android:configChanges="orientation|screenSize|screenLayout|keyboardHidden"
            android:label="Test Microphone" />

    </application>
--- a/app/src/main/java/net/mmanningau/speechtokeyboard/MainActivity.kt
+++ b/app/src/main/java/net/mmanningau/speechtokeyboard/MainActivity.kt
@@ -89,7 +89,7 @@ class MainActivity : AppCompatActivity() {
                    var entry = zipInputStream.nextEntry
                    var foundEncoder = false
                    var foundDecoder = false
-                    // var foundJoiner = false - removed for true Whisper model use
+                    var foundJoiner = false
                    var foundTokens = false

                    while (entry != null) {
@@ -100,7 +100,7 @@ class MainActivity : AppCompatActivity() {
                        val targetFileName = when {
                            name.contains("encoder") && name.endsWith(".onnx") -> "encoder.onnx"
                            name.contains("decoder") && name.endsWith(".onnx") -> "decoder.onnx"
-                            // name.contains("joiner") && name.endsWith(".onnx") -> "joiner.onnx" - removed for true Whisper model use
+                            name.contains("joiner") && name.endsWith(".onnx") -> "joiner.onnx"
                            name.contains("tokens.txt") -> "tokens.txt"
                            else -> null
                        }
@@ -115,7 +115,7 @@ class MainActivity : AppCompatActivity() {
                            when (targetFileName) {
                                "encoder.onnx" -> foundEncoder = true
                                "decoder.onnx" -> foundDecoder = true
-                                // "joiner.onnx" -> foundJoiner = true = re,moved for true Whisper model use
+                                "joiner.onnx" -> foundJoiner = true
                                "tokens.txt" -> foundTokens = true
                            }
                        }
@@ -124,8 +124,7 @@ class MainActivity : AppCompatActivity() {
                    }

                    runOnUiThread {
-                        // if (foundEncoder && foundDecoder && foundJoiner && foundTokens) { - removed for true Whisper model use
-                        if (foundEncoder && foundDecoder && foundTokens) {
+                        if (foundEncoder && foundDecoder && foundJoiner && foundTokens) {
                            statusText.text = "Model Installed Successfully!"
                            Toast.makeText(this, "Ready to use!", Toast.LENGTH_SHORT).show()
                        } else {
--- a/app/src/main/java/net/mmanningau/speechtokeyboard/TestModelActivity.kt
+++ b/app/src/main/java/net/mmanningau/speechtokeyboard/TestModelActivity.kt
@@ -20,25 +20,17 @@ import com.hoho.android.usbserial.driver.UsbSerialProber
 import com.hoho.android.usbserial.util.SerialInputOutputManager
 import com.k2fsa.sherpa.onnx.EndpointConfig
 import com.k2fsa.sherpa.onnx.EndpointRule
-/*
 import com.k2fsa.sherpa.onnx.FeatureConfig
-import com.k2fsa.sherpa.onnx.OnlineModelConfig
 import com.k2fsa.sherpa.onnx.OnlineRecognizer
 import com.k2fsa.sherpa.onnx.OnlineRecognizerConfig
 import com.k2fsa.sherpa.onnx.OnlineTransducerModelConfig
 import com.k2fsa.sherpa.onnx.OnlineStream
-
- */
-// Below for the "offline" libraries and the true Whisper integration
-import com.k2fsa.sherpa.onnx.OfflineRecognizer
-import com.k2fsa.sherpa.onnx.OfflineStream
-import com.k2fsa.sherpa.onnx.OfflineRecognizerConfig
-import com.k2fsa.sherpa.onnx.OfflineModelConfig
-import com.k2fsa.sherpa.onnx.OfflineWhisperModelConfig
-import com.k2fsa.sherpa.onnx.FeatureConfig
-
 import java.io.File

+import com.k2fsa.sherpa.onnx.OfflinePunctuation
+import com.k2fsa.sherpa.onnx.OfflinePunctuationConfig
+import com.k2fsa.sherpa.onnx.OfflinePunctuationModelConfig
+
 class TestModelActivity : AppCompatActivity() {

    // UI Components
@@ -46,13 +38,14 @@ class TestModelActivity : AppCompatActivity() {
    private lateinit var micButton: ImageButton

    // Sherpa (Whisper) Components
-    // private var recognizer: OnlineRecognizer? = null // - Removed for true Whisper model usa
-    // private var stream: OnlineStream? = null // - Removed for true Whisper model usa
-    private var recognizer: OfflineRecognizer? = null // Was OnlineRecognizer
-    private var stream: OfflineStream? = null         // Was OnlineStream
+    private var recognizer: OnlineRecognizer? = null
+    private var stream: OnlineStream? = null
    private var isRecording = false
    private var recordingThread: Thread? = null

+    // Punctuation variables
+    private var punctuator: OfflinePunctuation? = null
+
    // USB Components
    private var usbPort: UsbSerialPort? = null

@@ -90,37 +83,63 @@ class TestModelActivity : AppCompatActivity() {
            return
        }

-        // 1. Point to your files
-        val encoderPath = File(modelDir, "encoder.onnx").absolutePath
-        val decoderPath = File(modelDir, "decoder.onnx").absolutePath
-        val tokensPath = File(modelDir, "tokens.txt").absolutePath
-
        try {
-            // CONFIGURATION FOR WHISPER (OFFLINE)
-            val config = OfflineRecognizerConfig(
-                featConfig = FeatureConfig(sampleRate = 16000, featureDim = 80),
-                modelConfig = OfflineModelConfig(
-                    // This parameter 'whisper' exists here!
-                    whisper = OfflineWhisperModelConfig(
-                        encoder = encoderPath,
-                        decoder = decoderPath,
-                        // tokenizer is not strictly needed in config here if passed in tokens param below
-                        // but usually standard offline config uses just these two:
-                    ),
-                    tokens = tokensPath,
-                    modelType = "whisper",
+            // 1. Define Model Paths
+            val transducerConfig = OnlineTransducerModelConfig(
+                encoder = File(modelDir, "encoder.onnx").absolutePath,
+                decoder = File(modelDir, "decoder.onnx").absolutePath,
+                joiner = File(modelDir, "joiner.onnx").absolutePath
+            )
+
+            // 2. Define General Config
+            val onlineModelConfig = com.k2fsa.sherpa.onnx.OnlineModelConfig(
+                transducer = transducerConfig,
+                tokens = File(modelDir, "tokens.txt").absolutePath,
+                numThreads = 1,
                debug = false,
-                    numThreads = 1
-                ),
+                modelType = "zipformer"
+            )
+
+            // 3. Define Endpoint Rule (The fix for your error)
+            // rule1 = detected silence after speech. We set this to 2.4 seconds.
+            val silenceRule = EndpointRule(
+                mustContainNonSilence = false,
+                minTrailingSilence = 2.4f,
+                minUtteranceLength = 0.0f
+            )
+
+            // 4. Create Recognizer Config
+            val config = OnlineRecognizerConfig(
+                featConfig = FeatureConfig(sampleRate = 16000, featureDim = 80),
+                modelConfig = onlineModelConfig,
+                endpointConfig = EndpointConfig(rule1 = silenceRule), // Pass the rule object here
+                enableEndpoint = true,
                decodingMethod = "greedy_search",
                maxActivePaths = 4
            )

-            // Initialize OFFLINE Engine
-            recognizer = OfflineRecognizer(config = config)
+            // recognizer = OnlineRecognizer(assetManager = assets, config = config)
+            recognizer = OnlineRecognizer(config = config)
            stream = recognizer?.createStream()

-            outputText.text = "Whisper Engine Ready."
+            outputText.text = "Engine Loaded. Ready to Stream."
+
+            // ... existing recognizer init code ...
+
+// 5. Initialize Punctuation Engine
+            val punctPath = File(modelDir, "punct_model.onnx").absolutePath
+
+            if (File(punctPath).exists()) {
+                // CORRECTED: Wrap the path inside 'OfflinePunctuationModelConfig'
+                val punctConfig = OfflinePunctuationConfig(
+                    model = OfflinePunctuationModelConfig(ctTransformer = punctPath)
+                )
+
+                punctuator = OfflinePunctuation(config = punctConfig)
+                outputText.append("\n+ Punctuation Ready")
+            } else {
+                outputText.append("\n(No Punctuation model found)")
+            }

        } catch (e: Exception) {
            Log.e("Sherpa", "Init Error", e)
@@ -145,9 +164,12 @@ class TestModelActivity : AppCompatActivity() {
            return
        }

-        // Reset the stream for a new session
-        // Note: Sherpa streams can be persistent, but resetting ensures clean start
-        // If you want continuous conversation, don't reset 'committedText'
+        // FIX 1: CLEAR THE BUFFER
+        // This prevents the "ghost text" from the previous session appearing
+        // when you hit record again.
+        stream?.let { activeStream ->
+            recognizer?.reset(activeStream)
+        }

        isRecording = true
        micButton.setColorFilter(android.graphics.Color.RED)
@@ -161,46 +183,19 @@ class TestModelActivity : AppCompatActivity() {

    private fun stopRecording() {
        isRecording = false
-        try {
-            recordingThread?.join() // Wait for loop to finish
-        } catch (e: InterruptedException) {
-            // Handle interruption if necessary
-        }
-
+        recordingThread?.join()
        micButton.clearColorFilter()

-        // FIX: Safely unwrap 'stream' before passing it to getResult
-        // This reads: "If stream is NOT null, call getResult. Otherwise return empty string."
-        val finalCurrentText = stream?.let { activeStream ->
-            recognizer?.getResult(activeStream)?.text
-        } ?: ""
-
-        val cleanFinal = finalCurrentText.lowercase()
-
-        if (cleanFinal.isNotEmpty()) {
-            // 1. Commit to history
-            committedText += "$cleanFinal "
-
-            // 2. Send to Pico
-            sendToPico("$cleanFinal ")
-
-            // 3. Update UI
-            outputText.text = "$committedText \n[Stopped]"
-
-            // 4. Reset for next time
-            // We release the old stream and create a fresh one for the next sentence
-            stream?.release()
-            stream = recognizer?.createStream()
-        } else {
-            outputText.append("\n[Stopped - No Text]")
-        }
+        // Just show what we have, don't overwrite with "[Stopped]"
+        // to prevent visual jarring.
+        outputText.append("\n[Stopped]")
    }

    private fun processAudioLoop() {
        val sampleRate = 16000
        val bufferSize = AudioRecord.getMinBufferSize(sampleRate, AudioFormat.CHANNEL_IN_MONO, AudioFormat.ENCODING_PCM_16BIT)

-        // 1. GUARD CLAUSE (Safely unwrap nullables)
+        // Guard clauses
        val localRec = recognizer ?: return
        val localStream = stream ?: return

@@ -218,28 +213,48 @@ class TestModelActivity : AppCompatActivity() {
            if (ret > 0) {
                val samples = FloatArray(ret) { buffer[it] / 32768.0f }

-                // 2. Feed Audio
                localStream.acceptWaveform(samples, sampleRate)

-                // 3. Decode (No isReady check needed for Offline)
+                while (localRec.isReady(localStream)) {
                    localRec.decode(localStream)
+                }

-                // 4. Get Current Text
-                // Whisper updates this string constantly as it hears more
                val text = localRec.getResult(localStream).text
+                val isEndpoint = localRec.isEndpoint(localStream)

                if (text.isNotEmpty()) {
                    val cleanText = text.lowercase()

+                    if (isEndpoint) {
+                        // FIX 2: THE ORDER OF OPERATIONS
+
+                        // A. Update UI first
+                        // 1. PUNCTUATE
+                        // We pass the raw text to the punctuator
+                        val punctuatedText = punctuator?.addPunctuation(cleanText) ?: cleanText
+
+                        runOnUiThread {
+                            // 2. Commit the BEAUTIFUL text
+                            committedText += "$punctuatedText "
+                            outputText.text = committedText
+                            sendToPico("$punctuatedText ")
+                        }
+
+                        // B. RESET IMMEDIATELY ON BACKGROUND THREAD
+                        // We do this HERE, not inside runOnUiThread.
+                        // This guarantees the stream is clean BEFORE the loop
+                        // reads the next chunk of audio.
+                        localRec.reset(localStream)
+
+                    } else {
+                        // Standard partial update
                        runOnUiThread {
-                        // Update the screen so user sees what is happening
-                        // We do NOT send to USB yet, because Whisper might change this text
-                        // as you keep speaking.
                            outputText.text = "$committedText $cleanText"
                        }
                    }
                }
            }
+        }
        record.stop()
        record.release()
    }
Author	SHA1	Message	Date
mmanningau	2a8f004916	Updated the AndroidManifest.xml to stop the default function exit call upon screen rotation and just have it remain inside the "startRecording" function.	2026-01-26 15:38:34 +11:00
mmanningau	ac7d51b46e	ADded the punctuation sections to the code - but have not added the zip extraction to include this automatically - might ahve to add another upload manager specifically for this one file I think, as it is really messy to get your hands on through other means....	2026-01-23 12:52:24 +11:00
mmanningau	f17c6ab84e	Updated to finally fix the double up text during the translastion at sometimes at the start of the second press of the microphone button	2026-01-23 12:24:56 +11:00
mmanningau	cce093db4e	Added icon for Android display	2026-01-23 10:53:03 +11:00