7 Commits

Author SHA1 Message Date
f96189a509 Updated the processAudioLoop to mke sure that the buffer was always flushed after an output stream so we didn't miss any characters. Also made many modifications to the python script running on the Pico to ensure that we get dual inputs and neater outputs now. 2026-02-09 14:48:55 +11:00
75b63f91ea Updated to continually check for the a valid USB connection on each press of the mircophone and also totally rewrote the send to Pico code as well to me more robust... 2026-02-03 20:39:00 +11:00
9f6d67a567 Added the debug.keystore to the repo so that I can move between devices and build the app without getting installation errors on the phone.
Also updated the testModelActivity to a more robust USB connection permissions testing and connection testing....
2026-01-30 16:40:42 +11:00
2a8f004916 Updated the AndroidManifest.xml to stop the default function exit call upon screen rotation and just have it remain inside the "startRecording" function. 2026-01-26 15:38:34 +11:00
ac7d51b46e ADded the punctuation sections to the code - but have not added the zip extraction to include this automatically - might ahve to add another upload manager specifically for this one file I think, as it is really messy to get your hands on through other means.... 2026-01-23 12:52:24 +11:00
f17c6ab84e Updated to finally fix the double up text during the translastion at sometimes at the start of the second press of the microphone button 2026-01-23 12:24:56 +11:00
cce093db4e Added icon for Android display 2026-01-23 10:53:03 +11:00
28 changed files with 167 additions and 59 deletions

View File

@@ -4,10 +4,10 @@
<selectionStates>
<SelectionState runConfigName="app">
<option name="selectionMode" value="DROPDOWN" />
<DropdownSelection timestamp="2026-01-22T04:36:45.393638454Z">
<DropdownSelection timestamp="2026-01-23T01:29:57.710335816Z">
<Target type="DEFAULT_BOOT">
<handle>
<DeviceId pluginId="LocalEmulator" identifier="path=/home/michael/.android/avd/Pixel_5_API_31_Android_12_.avd" />
<DeviceId pluginId="PhysicalDevice" identifier="serial=DKTAB13NEU0019483" />
</handle>
</Target>
</DropdownSelection>

1
.idea/misc.xml generated
View File

@@ -1,4 +1,3 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ExternalStorageConfigurationManager" enabled="true" />
<component name="ProjectRootManager" version="2" languageLevel="JDK_21" default="true" project-jdk-name="jbr-21" project-jdk-type="JavaSDK">

2
app/.gitignore vendored
View File

@@ -44,7 +44,7 @@ captures/
# Keystore files (NEVER commit these!)
*.jks
*.keystore
# *.keystore
*.pem
# OS-specific files

View File

@@ -7,12 +7,22 @@ android {
namespace = "net.mmanningau.speechtokeyboard"
compileSdk = 36
signingConfigs {
getByName("debug") {
// This tells Gradle to look for the key in the same folder as this build file
storeFile = file("debug.keystore")
storePassword = "android"
keyAlias = "androiddebugkey"
keyPassword = "android"
}
}
defaultConfig {
applicationId = "net.mmanningau.speechtokeyboard"
minSdk = 28
targetSdk = 36
versionCode = 9
versionName = "1.0"
versionCode = 15
versionName = "1.1.3"
testInstrumentationRunner = "androidx.test.runner.AndroidJUnitRunner"
}
@@ -25,6 +35,13 @@ android {
"proguard-rules.pro"
)
}
debug {
applicationIdSuffix = ".streaming"
// This changes the app name on your homescreen to "MyApp (Dev)"
resValue("string", "app_name", "Speech To Keyboard (Streaming)")
// Explicitly tell the debug build to use the config we defined above
signingConfig = signingConfigs["debug"]
}
}
compileOptions {
sourceCompatibility = JavaVersion.VERSION_11

BIN
app/debug.keystore Normal file

Binary file not shown.

View File

@@ -37,6 +37,8 @@
<activity
android:name=".TestModelActivity"
android:parentActivityName=".MainActivity"
android:exported="false"
android:configChanges="orientation|screenSize|screenLayout|keyboardHidden"
android:label="Test Microphone" />
</application>

Binary file not shown.

After

Width:  |  Height:  |  Size: 411 KiB

View File

@@ -17,7 +17,6 @@ import androidx.core.app.ActivityCompat
import androidx.core.content.ContextCompat
import com.hoho.android.usbserial.driver.UsbSerialPort
import com.hoho.android.usbserial.driver.UsbSerialProber
import com.hoho.android.usbserial.util.SerialInputOutputManager
import com.k2fsa.sherpa.onnx.EndpointConfig
import com.k2fsa.sherpa.onnx.EndpointRule
import com.k2fsa.sherpa.onnx.FeatureConfig
@@ -26,6 +25,9 @@ import com.k2fsa.sherpa.onnx.OnlineRecognizerConfig
import com.k2fsa.sherpa.onnx.OnlineTransducerModelConfig
import com.k2fsa.sherpa.onnx.OnlineStream
import java.io.File
import com.k2fsa.sherpa.onnx.OfflinePunctuation
import com.k2fsa.sherpa.onnx.OfflinePunctuationConfig
import com.k2fsa.sherpa.onnx.OfflinePunctuationModelConfig
class TestModelActivity : AppCompatActivity() {
@@ -39,8 +41,12 @@ class TestModelActivity : AppCompatActivity() {
private var isRecording = false
private var recordingThread: Thread? = null
// Punctuation variables
private var punctuator: OfflinePunctuation? = null
// USB Components
private var usbPort: UsbSerialPort? = null
private val ACTION_USB_PERMISSION = "net.mmanningau.speechtokeyboard.USB_PERMISSION"
// Text History
private var committedText = ""
@@ -54,7 +60,9 @@ class TestModelActivity : AppCompatActivity() {
micButton = findViewById(R.id.btn_mic_toggle)
checkAudioPermission()
connectToPico() // Try to auto-connect USB on start
// Try to connect immediately on startup
attemptUsbConnection()
// Initialize Engine
initSherpaModel()
@@ -66,7 +74,7 @@ class TestModelActivity : AppCompatActivity() {
}
// ----------------------------------------------------------------
// 1. ENGINE INITIALIZATION (The "Missing Code")
// 1. ENGINE INITIALIZATION
// ----------------------------------------------------------------
private fun initSherpaModel() {
val modelDir = File(filesDir, "sherpa-model")
@@ -77,14 +85,12 @@ class TestModelActivity : AppCompatActivity() {
}
try {
// 1. Define Model Paths
val transducerConfig = OnlineTransducerModelConfig(
encoder = File(modelDir, "encoder.onnx").absolutePath,
decoder = File(modelDir, "decoder.onnx").absolutePath,
joiner = File(modelDir, "joiner.onnx").absolutePath
)
// 2. Define General Config
val onlineModelConfig = com.k2fsa.sherpa.onnx.OnlineModelConfig(
transducer = transducerConfig,
tokens = File(modelDir, "tokens.txt").absolutePath,
@@ -93,29 +99,38 @@ class TestModelActivity : AppCompatActivity() {
modelType = "zipformer"
)
// 3. Define Endpoint Rule (The fix for your error)
// rule1 = detected silence after speech. We set this to 2.4 seconds.
val silenceRule = EndpointRule(
mustContainNonSilence = false,
minTrailingSilence = 2.4f,
minTrailingSilence = 1.2f,
minUtteranceLength = 0.0f
)
// 4. Create Recognizer Config
val config = OnlineRecognizerConfig(
featConfig = FeatureConfig(sampleRate = 16000, featureDim = 80),
modelConfig = onlineModelConfig,
endpointConfig = EndpointConfig(rule1 = silenceRule), // Pass the rule object here
endpointConfig = EndpointConfig(rule1 = silenceRule),
enableEndpoint = true,
decodingMethod = "greedy_search",
maxActivePaths = 4
)
// recognizer = OnlineRecognizer(assetManager = assets, config = config)
recognizer = OnlineRecognizer(config = config)
stream = recognizer?.createStream()
outputText.text = "Engine Loaded. Ready to Stream."
outputText.text = "Engine Loaded. Ready."
// Initialize Punctuation Engine
val punctPath = File(modelDir, "punct_model.onnx").absolutePath
if (File(punctPath).exists()) {
val punctConfig = OfflinePunctuationConfig(
model = OfflinePunctuationModelConfig(ctTransformer = punctPath)
)
punctuator = OfflinePunctuation(config = punctConfig)
outputText.append("\n+ Punctuation Ready")
} else {
outputText.append("\n(No Punctuation model found)")
}
} catch (e: Exception) {
Log.e("Sherpa", "Init Error", e)
@@ -124,7 +139,7 @@ class TestModelActivity : AppCompatActivity() {
}
// ----------------------------------------------------------------
// 2. AUDIO LOOP (The "Manual" Listener)
// 2. AUDIO LOOP
// ----------------------------------------------------------------
private fun toggleRecording() {
if (isRecording) {
@@ -140,9 +155,14 @@ class TestModelActivity : AppCompatActivity() {
return
}
// Reset the stream for a new session
// Note: Sherpa streams can be persistent, but resetting ensures clean start
// If you want continuous conversation, don't reset 'committedText'
// Before we start, check USB connection again!
if (usbPort == null) {
attemptUsbConnection()
}
stream?.let { activeStream ->
recognizer?.reset(activeStream)
}
isRecording = true
micButton.setColorFilter(android.graphics.Color.RED)
@@ -158,9 +178,6 @@ class TestModelActivity : AppCompatActivity() {
isRecording = false
recordingThread?.join()
micButton.clearColorFilter()
// Just show what we have, don't overwrite with "[Stopped]"
// to prevent visual jarring.
outputText.append("\n[Stopped]")
}
@@ -168,9 +185,6 @@ class TestModelActivity : AppCompatActivity() {
val sampleRate = 16000
val bufferSize = AudioRecord.getMinBufferSize(sampleRate, AudioFormat.CHANNEL_IN_MONO, AudioFormat.ENCODING_PCM_16BIT)
// 1. GUARD CLAUSE: Unpack nullables safely
// If recognizer or stream are null, we stop immediately.
// This creates 'localRec' and 'localStream' which are GUARANTEED non-null.
val localRec = recognizer ?: return
val localStream = stream ?: return
@@ -187,8 +201,6 @@ class TestModelActivity : AppCompatActivity() {
val ret = record.read(buffer, 0, buffer.size)
if (ret > 0) {
val samples = FloatArray(ret) { buffer[it] / 32768.0f }
// 2. Use the LOCAL (non-null) variables
localStream.acceptWaveform(samples, sampleRate)
while (localRec.isReady(localStream)) {
@@ -201,50 +213,126 @@ class TestModelActivity : AppCompatActivity() {
if (text.isNotEmpty()) {
val cleanText = text.lowercase()
runOnUiThread {
if (isEndpoint) {
committedText += "$cleanText "
if (isEndpoint) {
// CASE A: Natural Pause (Sentence Finished)
val punctuatedText = punctuator?.addPunctuation(cleanText) ?: cleanText
runOnUiThread {
committedText += "$punctuatedText "
outputText.text = committedText
sendToPico("$cleanText ")
localRec.reset(localStream)
} else {
sendToPico("$punctuatedText ")
}
localRec.reset(localStream)
} else {
// CASE B: Partial (Still talking)
// Update screen ONLY, do not send to Pico yet
runOnUiThread {
outputText.text = "$committedText $cleanText"
}
}
}
}
}
// --- NEW: THE FLUSH BLOCK ---
// This runs when you hit "Stop". It grabs the last unspoken words.
val finalTail = localRec.getResult(localStream).text
if (finalTail.isNotEmpty()) {
val cleanTail = finalTail.lowercase()
val punctuatedTail = punctuator?.addPunctuation(cleanTail) ?: cleanTail
runOnUiThread {
committedText += "$punctuatedTail "
outputText.text = committedText
// FORCE SEND the remaining text
sendToPico("$punctuatedTail ")
}
// Reset for next time
localRec.reset(localStream)
}
// -----------------------------
record.stop()
record.release()
}
// ----------------------------------------------------------------
// 3. USB LOGIC (Unchanged from before)
// 3. ROBUST USB LOGIC (FIXED)
// ----------------------------------------------------------------
private fun connectToPico() {
// RENAMED from 'connectToPico' to be clearer
private fun attemptUsbConnection() {
val usbManager = getSystemService(Context.USB_SERVICE) as UsbManager
// 1. Find Driver
val availableDrivers = UsbSerialProber.getDefaultProber().findAllDrivers(usbManager)
if (availableDrivers.isEmpty()) return
if (availableDrivers.isEmpty()) {
Log.d("USB", "No drivers found")
return
}
val driver = availableDrivers[0]
val connection = usbManager.openDevice(driver.device) ?: return
usbPort = driver.ports[0]
// 2. Check Permission
if (!usbManager.hasPermission(driver.device)) {
Log.d("USB", "Requesting Permission...")
val pendingIntent = android.app.PendingIntent.getBroadcast(
this,
0,
android.content.Intent(ACTION_USB_PERMISSION),
android.app.PendingIntent.FLAG_IMMUTABLE
)
usbManager.requestPermission(driver.device, pendingIntent)
return
}
// 3. Open Connection
try {
val connection = usbManager.openDevice(driver.device)
if (connection == null) {
Log.e("USB", "openDevice returned null")
return
}
// Clean up old port if exists
try { usbPort?.close() } catch (e: Exception) {}
usbPort = driver.ports[0]
usbPort?.open(connection)
usbPort?.setParameters(115200, 8, UsbSerialPort.STOPBITS_1, UsbSerialPort.PARITY_NONE)
outputText.append("\n> USB Connected")
Log.d("USB", "Success! Connected at 115200")
// UI Feedback
runOnUiThread {
Toast.makeText(this, "USB Connected", Toast.LENGTH_SHORT).show()
}
} catch (e: Exception) {
outputText.append("\n> USB Error: ${e.message}")
Log.e("USB", "Connection Error", e)
usbPort = null
}
}
private fun sendToPico(text: String) {
if (usbPort == null) return
// AUTO-RECONNECT FEATURE
if (usbPort == null) {
Log.d("USB", "Port null, trying to reconnect...")
attemptUsbConnection()
if (usbPort == null) {
Log.e("USB", "Reconnect failed.")
return
}
}
try {
usbPort?.write(text.toByteArray(Charsets.UTF_8), 500)
val data = text.toByteArray(Charsets.UTF_8)
usbPort?.write(data, 500)
Log.d("USB", "Sent: $text")
} catch (e: Exception) {
// Log error
Log.e("USB", "Write Failed", e)
// Force reset on next try
usbPort = null
}
}

View File

@@ -0,0 +1,5 @@
<?xml version="1.0" encoding="utf-8"?>
<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
<background android:drawable="@color/ic_launcher_background"/>
<foreground android:drawable="@mipmap/ic_launcher_foreground"/>
</adaptive-icon>

View File

@@ -0,0 +1,5 @@
<?xml version="1.0" encoding="utf-8"?>
<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
<background android:drawable="@color/ic_launcher_background"/>
<foreground android:drawable="@mipmap/ic_launcher_foreground"/>
</adaptive-icon>

View File

@@ -1,6 +0,0 @@
<?xml version="1.0" encoding="utf-8"?>
<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
<background android:drawable="@drawable/ic_launcher_background" />
<foreground android:drawable="@drawable/ic_launcher_foreground" />
<monochrome android:drawable="@drawable/ic_launcher_foreground" />
</adaptive-icon>

View File

@@ -1,6 +0,0 @@
<?xml version="1.0" encoding="utf-8"?>
<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
<background android:drawable="@drawable/ic_launcher_background" />
<foreground android:drawable="@drawable/ic_launcher_foreground" />
<monochrome android:drawable="@drawable/ic_launcher_foreground" />
</adaptive-icon>

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.4 KiB

After

Width:  |  Height:  |  Size: 3.9 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.8 KiB

After

Width:  |  Height:  |  Size: 5.7 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 982 B

After

Width:  |  Height:  |  Size: 2.3 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.7 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.7 KiB

After

Width:  |  Height:  |  Size: 3.2 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.9 KiB

After

Width:  |  Height:  |  Size: 6.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 20 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 3.8 KiB

After

Width:  |  Height:  |  Size: 8.5 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.8 KiB

After

Width:  |  Height:  |  Size: 12 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 43 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 5.8 KiB

After

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 3.8 KiB

After

Width:  |  Height:  |  Size: 19 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 79 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 7.6 KiB

After

Width:  |  Height:  |  Size: 25 KiB

View File

@@ -0,0 +1,4 @@
<?xml version="1.0" encoding="utf-8"?>
<resources>
<color name="ic_launcher_background">#0878F5</color>
</resources>