7 Commits

Author SHA1 Message Date
f96189a509 Updated the processAudioLoop to mke sure that the buffer was always flushed after an output stream so we didn't miss any characters. Also made many modifications to the python script running on the Pico to ensure that we get dual inputs and neater outputs now. 2026-02-09 14:48:55 +11:00
75b63f91ea Updated to continually check for the a valid USB connection on each press of the mircophone and also totally rewrote the send to Pico code as well to me more robust... 2026-02-03 20:39:00 +11:00
9f6d67a567 Added the debug.keystore to the repo so that I can move between devices and build the app without getting installation errors on the phone.
Also updated the testModelActivity to a more robust USB connection permissions testing and connection testing....
2026-01-30 16:40:42 +11:00
2a8f004916 Updated the AndroidManifest.xml to stop the default function exit call upon screen rotation and just have it remain inside the "startRecording" function. 2026-01-26 15:38:34 +11:00
ac7d51b46e ADded the punctuation sections to the code - but have not added the zip extraction to include this automatically - might ahve to add another upload manager specifically for this one file I think, as it is really messy to get your hands on through other means.... 2026-01-23 12:52:24 +11:00
f17c6ab84e Updated to finally fix the double up text during the translastion at sometimes at the start of the second press of the microphone button 2026-01-23 12:24:56 +11:00
cce093db4e Added icon for Android display 2026-01-23 10:53:03 +11:00
28 changed files with 167 additions and 59 deletions

View File

@@ -4,10 +4,10 @@
<selectionStates> <selectionStates>
<SelectionState runConfigName="app"> <SelectionState runConfigName="app">
<option name="selectionMode" value="DROPDOWN" /> <option name="selectionMode" value="DROPDOWN" />
<DropdownSelection timestamp="2026-01-22T04:36:45.393638454Z"> <DropdownSelection timestamp="2026-01-23T01:29:57.710335816Z">
<Target type="DEFAULT_BOOT"> <Target type="DEFAULT_BOOT">
<handle> <handle>
<DeviceId pluginId="LocalEmulator" identifier="path=/home/michael/.android/avd/Pixel_5_API_31_Android_12_.avd" /> <DeviceId pluginId="PhysicalDevice" identifier="serial=DKTAB13NEU0019483" />
</handle> </handle>
</Target> </Target>
</DropdownSelection> </DropdownSelection>

1
.idea/misc.xml generated
View File

@@ -1,4 +1,3 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4"> <project version="4">
<component name="ExternalStorageConfigurationManager" enabled="true" /> <component name="ExternalStorageConfigurationManager" enabled="true" />
<component name="ProjectRootManager" version="2" languageLevel="JDK_21" default="true" project-jdk-name="jbr-21" project-jdk-type="JavaSDK"> <component name="ProjectRootManager" version="2" languageLevel="JDK_21" default="true" project-jdk-name="jbr-21" project-jdk-type="JavaSDK">

2
app/.gitignore vendored
View File

@@ -44,7 +44,7 @@ captures/
# Keystore files (NEVER commit these!) # Keystore files (NEVER commit these!)
*.jks *.jks
*.keystore # *.keystore
*.pem *.pem
# OS-specific files # OS-specific files

View File

@@ -7,12 +7,22 @@ android {
namespace = "net.mmanningau.speechtokeyboard" namespace = "net.mmanningau.speechtokeyboard"
compileSdk = 36 compileSdk = 36
signingConfigs {
getByName("debug") {
// This tells Gradle to look for the key in the same folder as this build file
storeFile = file("debug.keystore")
storePassword = "android"
keyAlias = "androiddebugkey"
keyPassword = "android"
}
}
defaultConfig { defaultConfig {
applicationId = "net.mmanningau.speechtokeyboard" applicationId = "net.mmanningau.speechtokeyboard"
minSdk = 28 minSdk = 28
targetSdk = 36 targetSdk = 36
versionCode = 9 versionCode = 15
versionName = "1.0" versionName = "1.1.3"
testInstrumentationRunner = "androidx.test.runner.AndroidJUnitRunner" testInstrumentationRunner = "androidx.test.runner.AndroidJUnitRunner"
} }
@@ -25,6 +35,13 @@ android {
"proguard-rules.pro" "proguard-rules.pro"
) )
} }
debug {
applicationIdSuffix = ".streaming"
// This changes the app name on your homescreen to "MyApp (Dev)"
resValue("string", "app_name", "Speech To Keyboard (Streaming)")
// Explicitly tell the debug build to use the config we defined above
signingConfig = signingConfigs["debug"]
}
} }
compileOptions { compileOptions {
sourceCompatibility = JavaVersion.VERSION_11 sourceCompatibility = JavaVersion.VERSION_11

BIN
app/debug.keystore Normal file

Binary file not shown.

View File

@@ -37,6 +37,8 @@
<activity <activity
android:name=".TestModelActivity" android:name=".TestModelActivity"
android:parentActivityName=".MainActivity" android:parentActivityName=".MainActivity"
android:exported="false"
android:configChanges="orientation|screenSize|screenLayout|keyboardHidden"
android:label="Test Microphone" /> android:label="Test Microphone" />
</application> </application>

Binary file not shown.

After

Width:  |  Height:  |  Size: 411 KiB

View File

@@ -17,7 +17,6 @@ import androidx.core.app.ActivityCompat
import androidx.core.content.ContextCompat import androidx.core.content.ContextCompat
import com.hoho.android.usbserial.driver.UsbSerialPort import com.hoho.android.usbserial.driver.UsbSerialPort
import com.hoho.android.usbserial.driver.UsbSerialProber import com.hoho.android.usbserial.driver.UsbSerialProber
import com.hoho.android.usbserial.util.SerialInputOutputManager
import com.k2fsa.sherpa.onnx.EndpointConfig import com.k2fsa.sherpa.onnx.EndpointConfig
import com.k2fsa.sherpa.onnx.EndpointRule import com.k2fsa.sherpa.onnx.EndpointRule
import com.k2fsa.sherpa.onnx.FeatureConfig import com.k2fsa.sherpa.onnx.FeatureConfig
@@ -26,6 +25,9 @@ import com.k2fsa.sherpa.onnx.OnlineRecognizerConfig
import com.k2fsa.sherpa.onnx.OnlineTransducerModelConfig import com.k2fsa.sherpa.onnx.OnlineTransducerModelConfig
import com.k2fsa.sherpa.onnx.OnlineStream import com.k2fsa.sherpa.onnx.OnlineStream
import java.io.File import java.io.File
import com.k2fsa.sherpa.onnx.OfflinePunctuation
import com.k2fsa.sherpa.onnx.OfflinePunctuationConfig
import com.k2fsa.sherpa.onnx.OfflinePunctuationModelConfig
class TestModelActivity : AppCompatActivity() { class TestModelActivity : AppCompatActivity() {
@@ -39,8 +41,12 @@ class TestModelActivity : AppCompatActivity() {
private var isRecording = false private var isRecording = false
private var recordingThread: Thread? = null private var recordingThread: Thread? = null
// Punctuation variables
private var punctuator: OfflinePunctuation? = null
// USB Components // USB Components
private var usbPort: UsbSerialPort? = null private var usbPort: UsbSerialPort? = null
private val ACTION_USB_PERMISSION = "net.mmanningau.speechtokeyboard.USB_PERMISSION"
// Text History // Text History
private var committedText = "" private var committedText = ""
@@ -54,7 +60,9 @@ class TestModelActivity : AppCompatActivity() {
micButton = findViewById(R.id.btn_mic_toggle) micButton = findViewById(R.id.btn_mic_toggle)
checkAudioPermission() checkAudioPermission()
connectToPico() // Try to auto-connect USB on start
// Try to connect immediately on startup
attemptUsbConnection()
// Initialize Engine // Initialize Engine
initSherpaModel() initSherpaModel()
@@ -66,7 +74,7 @@ class TestModelActivity : AppCompatActivity() {
} }
// ---------------------------------------------------------------- // ----------------------------------------------------------------
// 1. ENGINE INITIALIZATION (The "Missing Code") // 1. ENGINE INITIALIZATION
// ---------------------------------------------------------------- // ----------------------------------------------------------------
private fun initSherpaModel() { private fun initSherpaModel() {
val modelDir = File(filesDir, "sherpa-model") val modelDir = File(filesDir, "sherpa-model")
@@ -77,14 +85,12 @@ class TestModelActivity : AppCompatActivity() {
} }
try { try {
// 1. Define Model Paths
val transducerConfig = OnlineTransducerModelConfig( val transducerConfig = OnlineTransducerModelConfig(
encoder = File(modelDir, "encoder.onnx").absolutePath, encoder = File(modelDir, "encoder.onnx").absolutePath,
decoder = File(modelDir, "decoder.onnx").absolutePath, decoder = File(modelDir, "decoder.onnx").absolutePath,
joiner = File(modelDir, "joiner.onnx").absolutePath joiner = File(modelDir, "joiner.onnx").absolutePath
) )
// 2. Define General Config
val onlineModelConfig = com.k2fsa.sherpa.onnx.OnlineModelConfig( val onlineModelConfig = com.k2fsa.sherpa.onnx.OnlineModelConfig(
transducer = transducerConfig, transducer = transducerConfig,
tokens = File(modelDir, "tokens.txt").absolutePath, tokens = File(modelDir, "tokens.txt").absolutePath,
@@ -93,29 +99,38 @@ class TestModelActivity : AppCompatActivity() {
modelType = "zipformer" modelType = "zipformer"
) )
// 3. Define Endpoint Rule (The fix for your error)
// rule1 = detected silence after speech. We set this to 2.4 seconds.
val silenceRule = EndpointRule( val silenceRule = EndpointRule(
mustContainNonSilence = false, mustContainNonSilence = false,
minTrailingSilence = 2.4f, minTrailingSilence = 1.2f,
minUtteranceLength = 0.0f minUtteranceLength = 0.0f
) )
// 4. Create Recognizer Config
val config = OnlineRecognizerConfig( val config = OnlineRecognizerConfig(
featConfig = FeatureConfig(sampleRate = 16000, featureDim = 80), featConfig = FeatureConfig(sampleRate = 16000, featureDim = 80),
modelConfig = onlineModelConfig, modelConfig = onlineModelConfig,
endpointConfig = EndpointConfig(rule1 = silenceRule), // Pass the rule object here endpointConfig = EndpointConfig(rule1 = silenceRule),
enableEndpoint = true, enableEndpoint = true,
decodingMethod = "greedy_search", decodingMethod = "greedy_search",
maxActivePaths = 4 maxActivePaths = 4
) )
// recognizer = OnlineRecognizer(assetManager = assets, config = config)
recognizer = OnlineRecognizer(config = config) recognizer = OnlineRecognizer(config = config)
stream = recognizer?.createStream() stream = recognizer?.createStream()
outputText.text = "Engine Loaded. Ready to Stream." outputText.text = "Engine Loaded. Ready."
// Initialize Punctuation Engine
val punctPath = File(modelDir, "punct_model.onnx").absolutePath
if (File(punctPath).exists()) {
val punctConfig = OfflinePunctuationConfig(
model = OfflinePunctuationModelConfig(ctTransformer = punctPath)
)
punctuator = OfflinePunctuation(config = punctConfig)
outputText.append("\n+ Punctuation Ready")
} else {
outputText.append("\n(No Punctuation model found)")
}
} catch (e: Exception) { } catch (e: Exception) {
Log.e("Sherpa", "Init Error", e) Log.e("Sherpa", "Init Error", e)
@@ -124,7 +139,7 @@ class TestModelActivity : AppCompatActivity() {
} }
// ---------------------------------------------------------------- // ----------------------------------------------------------------
// 2. AUDIO LOOP (The "Manual" Listener) // 2. AUDIO LOOP
// ---------------------------------------------------------------- // ----------------------------------------------------------------
private fun toggleRecording() { private fun toggleRecording() {
if (isRecording) { if (isRecording) {
@@ -140,9 +155,14 @@ class TestModelActivity : AppCompatActivity() {
return return
} }
// Reset the stream for a new session // Before we start, check USB connection again!
// Note: Sherpa streams can be persistent, but resetting ensures clean start if (usbPort == null) {
// If you want continuous conversation, don't reset 'committedText' attemptUsbConnection()
}
stream?.let { activeStream ->
recognizer?.reset(activeStream)
}
isRecording = true isRecording = true
micButton.setColorFilter(android.graphics.Color.RED) micButton.setColorFilter(android.graphics.Color.RED)
@@ -158,9 +178,6 @@ class TestModelActivity : AppCompatActivity() {
isRecording = false isRecording = false
recordingThread?.join() recordingThread?.join()
micButton.clearColorFilter() micButton.clearColorFilter()
// Just show what we have, don't overwrite with "[Stopped]"
// to prevent visual jarring.
outputText.append("\n[Stopped]") outputText.append("\n[Stopped]")
} }
@@ -168,9 +185,6 @@ class TestModelActivity : AppCompatActivity() {
val sampleRate = 16000 val sampleRate = 16000
val bufferSize = AudioRecord.getMinBufferSize(sampleRate, AudioFormat.CHANNEL_IN_MONO, AudioFormat.ENCODING_PCM_16BIT) val bufferSize = AudioRecord.getMinBufferSize(sampleRate, AudioFormat.CHANNEL_IN_MONO, AudioFormat.ENCODING_PCM_16BIT)
// 1. GUARD CLAUSE: Unpack nullables safely
// If recognizer or stream are null, we stop immediately.
// This creates 'localRec' and 'localStream' which are GUARANTEED non-null.
val localRec = recognizer ?: return val localRec = recognizer ?: return
val localStream = stream ?: return val localStream = stream ?: return
@@ -187,8 +201,6 @@ class TestModelActivity : AppCompatActivity() {
val ret = record.read(buffer, 0, buffer.size) val ret = record.read(buffer, 0, buffer.size)
if (ret > 0) { if (ret > 0) {
val samples = FloatArray(ret) { buffer[it] / 32768.0f } val samples = FloatArray(ret) { buffer[it] / 32768.0f }
// 2. Use the LOCAL (non-null) variables
localStream.acceptWaveform(samples, sampleRate) localStream.acceptWaveform(samples, sampleRate)
while (localRec.isReady(localStream)) { while (localRec.isReady(localStream)) {
@@ -201,50 +213,126 @@ class TestModelActivity : AppCompatActivity() {
if (text.isNotEmpty()) { if (text.isNotEmpty()) {
val cleanText = text.lowercase() val cleanText = text.lowercase()
runOnUiThread {
if (isEndpoint) { if (isEndpoint) {
committedText += "$cleanText " // CASE A: Natural Pause (Sentence Finished)
val punctuatedText = punctuator?.addPunctuation(cleanText) ?: cleanText
runOnUiThread {
committedText += "$punctuatedText "
outputText.text = committedText outputText.text = committedText
sendToPico("$cleanText ") sendToPico("$punctuatedText ")
}
localRec.reset(localStream) localRec.reset(localStream)
} else { } else {
// CASE B: Partial (Still talking)
// Update screen ONLY, do not send to Pico yet
runOnUiThread {
outputText.text = "$committedText $cleanText" outputText.text = "$committedText $cleanText"
} }
} }
} }
} }
} }
// --- NEW: THE FLUSH BLOCK ---
// This runs when you hit "Stop". It grabs the last unspoken words.
val finalTail = localRec.getResult(localStream).text
if (finalTail.isNotEmpty()) {
val cleanTail = finalTail.lowercase()
val punctuatedTail = punctuator?.addPunctuation(cleanTail) ?: cleanTail
runOnUiThread {
committedText += "$punctuatedTail "
outputText.text = committedText
// FORCE SEND the remaining text
sendToPico("$punctuatedTail ")
}
// Reset for next time
localRec.reset(localStream)
}
// -----------------------------
record.stop() record.stop()
record.release() record.release()
} }
// ---------------------------------------------------------------- // ----------------------------------------------------------------
// 3. USB LOGIC (Unchanged from before) // 3. ROBUST USB LOGIC (FIXED)
// ---------------------------------------------------------------- // ----------------------------------------------------------------
private fun connectToPico() {
val usbManager = getSystemService(Context.USB_SERVICE) as UsbManager
val availableDrivers = UsbSerialProber.getDefaultProber().findAllDrivers(usbManager)
if (availableDrivers.isEmpty()) return
// RENAMED from 'connectToPico' to be clearer
private fun attemptUsbConnection() {
val usbManager = getSystemService(Context.USB_SERVICE) as UsbManager
// 1. Find Driver
val availableDrivers = UsbSerialProber.getDefaultProber().findAllDrivers(usbManager)
if (availableDrivers.isEmpty()) {
Log.d("USB", "No drivers found")
return
}
val driver = availableDrivers[0] val driver = availableDrivers[0]
val connection = usbManager.openDevice(driver.device) ?: return
// 2. Check Permission
if (!usbManager.hasPermission(driver.device)) {
Log.d("USB", "Requesting Permission...")
val pendingIntent = android.app.PendingIntent.getBroadcast(
this,
0,
android.content.Intent(ACTION_USB_PERMISSION),
android.app.PendingIntent.FLAG_IMMUTABLE
)
usbManager.requestPermission(driver.device, pendingIntent)
return
}
// 3. Open Connection
try {
val connection = usbManager.openDevice(driver.device)
if (connection == null) {
Log.e("USB", "openDevice returned null")
return
}
// Clean up old port if exists
try { usbPort?.close() } catch (e: Exception) {}
usbPort = driver.ports[0] usbPort = driver.ports[0]
try {
usbPort?.open(connection) usbPort?.open(connection)
usbPort?.setParameters(115200, 8, UsbSerialPort.STOPBITS_1, UsbSerialPort.PARITY_NONE) usbPort?.setParameters(115200, 8, UsbSerialPort.STOPBITS_1, UsbSerialPort.PARITY_NONE)
outputText.append("\n> USB Connected")
Log.d("USB", "Success! Connected at 115200")
// UI Feedback
runOnUiThread {
Toast.makeText(this, "USB Connected", Toast.LENGTH_SHORT).show()
}
} catch (e: Exception) { } catch (e: Exception) {
outputText.append("\n> USB Error: ${e.message}") Log.e("USB", "Connection Error", e)
usbPort = null
} }
} }
private fun sendToPico(text: String) { private fun sendToPico(text: String) {
if (usbPort == null) return // AUTO-RECONNECT FEATURE
if (usbPort == null) {
Log.d("USB", "Port null, trying to reconnect...")
attemptUsbConnection()
if (usbPort == null) {
Log.e("USB", "Reconnect failed.")
return
}
}
try { try {
usbPort?.write(text.toByteArray(Charsets.UTF_8), 500) val data = text.toByteArray(Charsets.UTF_8)
usbPort?.write(data, 500)
Log.d("USB", "Sent: $text")
} catch (e: Exception) { } catch (e: Exception) {
// Log error Log.e("USB", "Write Failed", e)
// Force reset on next try
usbPort = null
} }
} }

View File

@@ -0,0 +1,5 @@
<?xml version="1.0" encoding="utf-8"?>
<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
<background android:drawable="@color/ic_launcher_background"/>
<foreground android:drawable="@mipmap/ic_launcher_foreground"/>
</adaptive-icon>

View File

@@ -0,0 +1,5 @@
<?xml version="1.0" encoding="utf-8"?>
<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
<background android:drawable="@color/ic_launcher_background"/>
<foreground android:drawable="@mipmap/ic_launcher_foreground"/>
</adaptive-icon>

View File

@@ -1,6 +0,0 @@
<?xml version="1.0" encoding="utf-8"?>
<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
<background android:drawable="@drawable/ic_launcher_background" />
<foreground android:drawable="@drawable/ic_launcher_foreground" />
<monochrome android:drawable="@drawable/ic_launcher_foreground" />
</adaptive-icon>

View File

@@ -1,6 +0,0 @@
<?xml version="1.0" encoding="utf-8"?>
<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
<background android:drawable="@drawable/ic_launcher_background" />
<foreground android:drawable="@drawable/ic_launcher_foreground" />
<monochrome android:drawable="@drawable/ic_launcher_foreground" />
</adaptive-icon>

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.4 KiB

After

Width:  |  Height:  |  Size: 3.9 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.8 KiB

After

Width:  |  Height:  |  Size: 5.7 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 982 B

After

Width:  |  Height:  |  Size: 2.3 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.7 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.7 KiB

After

Width:  |  Height:  |  Size: 3.2 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.9 KiB

After

Width:  |  Height:  |  Size: 6.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 20 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 3.8 KiB

After

Width:  |  Height:  |  Size: 8.5 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.8 KiB

After

Width:  |  Height:  |  Size: 12 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 43 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 5.8 KiB

After

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 3.8 KiB

After

Width:  |  Height:  |  Size: 19 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 79 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 7.6 KiB

After

Width:  |  Height:  |  Size: 25 KiB

View File

@@ -0,0 +1,4 @@
<?xml version="1.0" encoding="utf-8"?>
<resources>
<color name="ic_launcher_background">#0878F5</color>
</resources>