7 Commits

Author SHA1 Message Date
f96189a509 Updated the processAudioLoop to mke sure that the buffer was always flushed after an output stream so we didn't miss any characters. Also made many modifications to the python script running on the Pico to ensure that we get dual inputs and neater outputs now. 2026-02-09 14:48:55 +11:00
75b63f91ea Updated to continually check for the a valid USB connection on each press of the mircophone and also totally rewrote the send to Pico code as well to me more robust... 2026-02-03 20:39:00 +11:00
9f6d67a567 Added the debug.keystore to the repo so that I can move between devices and build the app without getting installation errors on the phone.
Also updated the testModelActivity to a more robust USB connection permissions testing and connection testing....
2026-01-30 16:40:42 +11:00
2a8f004916 Updated the AndroidManifest.xml to stop the default function exit call upon screen rotation and just have it remain inside the "startRecording" function. 2026-01-26 15:38:34 +11:00
ac7d51b46e ADded the punctuation sections to the code - but have not added the zip extraction to include this automatically - might ahve to add another upload manager specifically for this one file I think, as it is really messy to get your hands on through other means.... 2026-01-23 12:52:24 +11:00
f17c6ab84e Updated to finally fix the double up text during the translastion at sometimes at the start of the second press of the microphone button 2026-01-23 12:24:56 +11:00
cce093db4e Added icon for Android display 2026-01-23 10:53:03 +11:00
8 changed files with 191 additions and 112 deletions

View File

@@ -4,10 +4,10 @@
<selectionStates> <selectionStates>
<SelectionState runConfigName="app"> <SelectionState runConfigName="app">
<option name="selectionMode" value="DROPDOWN" /> <option name="selectionMode" value="DROPDOWN" />
<DropdownSelection timestamp="2026-01-22T04:36:45.393638454Z"> <DropdownSelection timestamp="2026-01-23T01:29:57.710335816Z">
<Target type="DEFAULT_BOOT"> <Target type="DEFAULT_BOOT">
<handle> <handle>
<DeviceId pluginId="LocalEmulator" identifier="path=/home/michael/.android/avd/Pixel_5_API_31_Android_12_.avd" /> <DeviceId pluginId="PhysicalDevice" identifier="serial=DKTAB13NEU0019483" />
</handle> </handle>
</Target> </Target>
</DropdownSelection> </DropdownSelection>

1
.idea/misc.xml generated
View File

@@ -1,4 +1,3 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4"> <project version="4">
<component name="ExternalStorageConfigurationManager" enabled="true" /> <component name="ExternalStorageConfigurationManager" enabled="true" />
<component name="ProjectRootManager" version="2" languageLevel="JDK_21" default="true" project-jdk-name="jbr-21" project-jdk-type="JavaSDK"> <component name="ProjectRootManager" version="2" languageLevel="JDK_21" default="true" project-jdk-name="jbr-21" project-jdk-type="JavaSDK">

2
app/.gitignore vendored
View File

@@ -44,7 +44,7 @@ captures/
# Keystore files (NEVER commit these!) # Keystore files (NEVER commit these!)
*.jks *.jks
*.keystore # *.keystore
*.pem *.pem
# OS-specific files # OS-specific files

View File

@@ -7,12 +7,22 @@ android {
namespace = "net.mmanningau.speechtokeyboard" namespace = "net.mmanningau.speechtokeyboard"
compileSdk = 36 compileSdk = 36
signingConfigs {
getByName("debug") {
// This tells Gradle to look for the key in the same folder as this build file
storeFile = file("debug.keystore")
storePassword = "android"
keyAlias = "androiddebugkey"
keyPassword = "android"
}
}
defaultConfig { defaultConfig {
applicationId = "net.mmanningau.speechtokeyboard" applicationId = "net.mmanningau.speechtokeyboard"
minSdk = 28 minSdk = 28
targetSdk = 36 targetSdk = 36
versionCode = 10 versionCode = 15
versionName = "1.1" versionName = "1.1.3"
testInstrumentationRunner = "androidx.test.runner.AndroidJUnitRunner" testInstrumentationRunner = "androidx.test.runner.AndroidJUnitRunner"
} }
@@ -25,6 +35,13 @@ android {
"proguard-rules.pro" "proguard-rules.pro"
) )
} }
debug {
applicationIdSuffix = ".streaming"
// This changes the app name on your homescreen to "MyApp (Dev)"
resValue("string", "app_name", "Speech To Keyboard (Streaming)")
// Explicitly tell the debug build to use the config we defined above
signingConfig = signingConfigs["debug"]
}
} }
compileOptions { compileOptions {
sourceCompatibility = JavaVersion.VERSION_11 sourceCompatibility = JavaVersion.VERSION_11

BIN
app/debug.keystore Normal file

Binary file not shown.

View File

@@ -37,6 +37,8 @@
<activity <activity
android:name=".TestModelActivity" android:name=".TestModelActivity"
android:parentActivityName=".MainActivity" android:parentActivityName=".MainActivity"
android:exported="false"
android:configChanges="orientation|screenSize|screenLayout|keyboardHidden"
android:label="Test Microphone" /> android:label="Test Microphone" />
</application> </application>

View File

@@ -89,7 +89,7 @@ class MainActivity : AppCompatActivity() {
var entry = zipInputStream.nextEntry var entry = zipInputStream.nextEntry
var foundEncoder = false var foundEncoder = false
var foundDecoder = false var foundDecoder = false
// var foundJoiner = false - removed for true Whisper model use var foundJoiner = false
var foundTokens = false var foundTokens = false
while (entry != null) { while (entry != null) {
@@ -100,7 +100,7 @@ class MainActivity : AppCompatActivity() {
val targetFileName = when { val targetFileName = when {
name.contains("encoder") && name.endsWith(".onnx") -> "encoder.onnx" name.contains("encoder") && name.endsWith(".onnx") -> "encoder.onnx"
name.contains("decoder") && name.endsWith(".onnx") -> "decoder.onnx" name.contains("decoder") && name.endsWith(".onnx") -> "decoder.onnx"
// name.contains("joiner") && name.endsWith(".onnx") -> "joiner.onnx" - removed for true Whisper model use name.contains("joiner") && name.endsWith(".onnx") -> "joiner.onnx"
name.contains("tokens.txt") -> "tokens.txt" name.contains("tokens.txt") -> "tokens.txt"
else -> null else -> null
} }
@@ -115,7 +115,7 @@ class MainActivity : AppCompatActivity() {
when (targetFileName) { when (targetFileName) {
"encoder.onnx" -> foundEncoder = true "encoder.onnx" -> foundEncoder = true
"decoder.onnx" -> foundDecoder = true "decoder.onnx" -> foundDecoder = true
// "joiner.onnx" -> foundJoiner = true = re,moved for true Whisper model use "joiner.onnx" -> foundJoiner = true
"tokens.txt" -> foundTokens = true "tokens.txt" -> foundTokens = true
} }
} }
@@ -124,8 +124,7 @@ class MainActivity : AppCompatActivity() {
} }
runOnUiThread { runOnUiThread {
// if (foundEncoder && foundDecoder && foundJoiner && foundTokens) { - removed for true Whisper model use if (foundEncoder && foundDecoder && foundJoiner && foundTokens) {
if (foundEncoder && foundDecoder && foundTokens) {
statusText.text = "Model Installed Successfully!" statusText.text = "Model Installed Successfully!"
Toast.makeText(this, "Ready to use!", Toast.LENGTH_SHORT).show() Toast.makeText(this, "Ready to use!", Toast.LENGTH_SHORT).show()
} else { } else {

View File

@@ -17,27 +17,17 @@ import androidx.core.app.ActivityCompat
import androidx.core.content.ContextCompat import androidx.core.content.ContextCompat
import com.hoho.android.usbserial.driver.UsbSerialPort import com.hoho.android.usbserial.driver.UsbSerialPort
import com.hoho.android.usbserial.driver.UsbSerialProber import com.hoho.android.usbserial.driver.UsbSerialProber
import com.hoho.android.usbserial.util.SerialInputOutputManager
import com.k2fsa.sherpa.onnx.EndpointConfig import com.k2fsa.sherpa.onnx.EndpointConfig
import com.k2fsa.sherpa.onnx.EndpointRule import com.k2fsa.sherpa.onnx.EndpointRule
/*
import com.k2fsa.sherpa.onnx.FeatureConfig import com.k2fsa.sherpa.onnx.FeatureConfig
import com.k2fsa.sherpa.onnx.OnlineModelConfig
import com.k2fsa.sherpa.onnx.OnlineRecognizer import com.k2fsa.sherpa.onnx.OnlineRecognizer
import com.k2fsa.sherpa.onnx.OnlineRecognizerConfig import com.k2fsa.sherpa.onnx.OnlineRecognizerConfig
import com.k2fsa.sherpa.onnx.OnlineTransducerModelConfig import com.k2fsa.sherpa.onnx.OnlineTransducerModelConfig
import com.k2fsa.sherpa.onnx.OnlineStream import com.k2fsa.sherpa.onnx.OnlineStream
*/
// Below for the "offline" libraries and the true Whisper integration
import com.k2fsa.sherpa.onnx.OfflineRecognizer
import com.k2fsa.sherpa.onnx.OfflineStream
import com.k2fsa.sherpa.onnx.OfflineRecognizerConfig
import com.k2fsa.sherpa.onnx.OfflineModelConfig
import com.k2fsa.sherpa.onnx.OfflineWhisperModelConfig
import com.k2fsa.sherpa.onnx.FeatureConfig
import java.io.File import java.io.File
import com.k2fsa.sherpa.onnx.OfflinePunctuation
import com.k2fsa.sherpa.onnx.OfflinePunctuationConfig
import com.k2fsa.sherpa.onnx.OfflinePunctuationModelConfig
class TestModelActivity : AppCompatActivity() { class TestModelActivity : AppCompatActivity() {
@@ -46,15 +36,17 @@ class TestModelActivity : AppCompatActivity() {
private lateinit var micButton: ImageButton private lateinit var micButton: ImageButton
// Sherpa (Whisper) Components // Sherpa (Whisper) Components
// private var recognizer: OnlineRecognizer? = null // - Removed for true Whisper model usa private var recognizer: OnlineRecognizer? = null
// private var stream: OnlineStream? = null // - Removed for true Whisper model usa private var stream: OnlineStream? = null
private var recognizer: OfflineRecognizer? = null // Was OnlineRecognizer
private var stream: OfflineStream? = null // Was OnlineStream
private var isRecording = false private var isRecording = false
private var recordingThread: Thread? = null private var recordingThread: Thread? = null
// Punctuation variables
private var punctuator: OfflinePunctuation? = null
// USB Components // USB Components
private var usbPort: UsbSerialPort? = null private var usbPort: UsbSerialPort? = null
private val ACTION_USB_PERMISSION = "net.mmanningau.speechtokeyboard.USB_PERMISSION"
// Text History // Text History
private var committedText = "" private var committedText = ""
@@ -68,7 +60,9 @@ class TestModelActivity : AppCompatActivity() {
micButton = findViewById(R.id.btn_mic_toggle) micButton = findViewById(R.id.btn_mic_toggle)
checkAudioPermission() checkAudioPermission()
connectToPico() // Try to auto-connect USB on start
// Try to connect immediately on startup
attemptUsbConnection()
// Initialize Engine // Initialize Engine
initSherpaModel() initSherpaModel()
@@ -80,7 +74,7 @@ class TestModelActivity : AppCompatActivity() {
} }
// ---------------------------------------------------------------- // ----------------------------------------------------------------
// 1. ENGINE INITIALIZATION (The "Missing Code") // 1. ENGINE INITIALIZATION
// ---------------------------------------------------------------- // ----------------------------------------------------------------
private fun initSherpaModel() { private fun initSherpaModel() {
val modelDir = File(filesDir, "sherpa-model") val modelDir = File(filesDir, "sherpa-model")
@@ -90,37 +84,53 @@ class TestModelActivity : AppCompatActivity() {
return return
} }
// 1. Point to your files
val encoderPath = File(modelDir, "encoder.onnx").absolutePath
val decoderPath = File(modelDir, "decoder.onnx").absolutePath
val tokensPath = File(modelDir, "tokens.txt").absolutePath
try { try {
// CONFIGURATION FOR WHISPER (OFFLINE) val transducerConfig = OnlineTransducerModelConfig(
val config = OfflineRecognizerConfig( encoder = File(modelDir, "encoder.onnx").absolutePath,
featConfig = FeatureConfig(sampleRate = 16000, featureDim = 80), decoder = File(modelDir, "decoder.onnx").absolutePath,
modelConfig = OfflineModelConfig( joiner = File(modelDir, "joiner.onnx").absolutePath
// This parameter 'whisper' exists here! )
whisper = OfflineWhisperModelConfig(
encoder = encoderPath, val onlineModelConfig = com.k2fsa.sherpa.onnx.OnlineModelConfig(
decoder = decoderPath, transducer = transducerConfig,
// tokenizer is not strictly needed in config here if passed in tokens param below tokens = File(modelDir, "tokens.txt").absolutePath,
// but usually standard offline config uses just these two: numThreads = 1,
),
tokens = tokensPath,
modelType = "whisper",
debug = false, debug = false,
numThreads = 1 modelType = "zipformer"
), )
val silenceRule = EndpointRule(
mustContainNonSilence = false,
minTrailingSilence = 1.2f,
minUtteranceLength = 0.0f
)
val config = OnlineRecognizerConfig(
featConfig = FeatureConfig(sampleRate = 16000, featureDim = 80),
modelConfig = onlineModelConfig,
endpointConfig = EndpointConfig(rule1 = silenceRule),
enableEndpoint = true,
decodingMethod = "greedy_search", decodingMethod = "greedy_search",
maxActivePaths = 4 maxActivePaths = 4
) )
// Initialize OFFLINE Engine recognizer = OnlineRecognizer(config = config)
recognizer = OfflineRecognizer(config = config)
stream = recognizer?.createStream() stream = recognizer?.createStream()
outputText.text = "Whisper Engine Ready." outputText.text = "Engine Loaded. Ready."
// Initialize Punctuation Engine
val punctPath = File(modelDir, "punct_model.onnx").absolutePath
if (File(punctPath).exists()) {
val punctConfig = OfflinePunctuationConfig(
model = OfflinePunctuationModelConfig(ctTransformer = punctPath)
)
punctuator = OfflinePunctuation(config = punctConfig)
outputText.append("\n+ Punctuation Ready")
} else {
outputText.append("\n(No Punctuation model found)")
}
} catch (e: Exception) { } catch (e: Exception) {
Log.e("Sherpa", "Init Error", e) Log.e("Sherpa", "Init Error", e)
@@ -129,7 +139,7 @@ class TestModelActivity : AppCompatActivity() {
} }
// ---------------------------------------------------------------- // ----------------------------------------------------------------
// 2. AUDIO LOOP (The "Manual" Listener) // 2. AUDIO LOOP
// ---------------------------------------------------------------- // ----------------------------------------------------------------
private fun toggleRecording() { private fun toggleRecording() {
if (isRecording) { if (isRecording) {
@@ -145,9 +155,14 @@ class TestModelActivity : AppCompatActivity() {
return return
} }
// Reset the stream for a new session // Before we start, check USB connection again!
// Note: Sherpa streams can be persistent, but resetting ensures clean start if (usbPort == null) {
// If you want continuous conversation, don't reset 'committedText' attemptUsbConnection()
}
stream?.let { activeStream ->
recognizer?.reset(activeStream)
}
isRecording = true isRecording = true
micButton.setColorFilter(android.graphics.Color.RED) micButton.setColorFilter(android.graphics.Color.RED)
@@ -161,46 +176,15 @@ class TestModelActivity : AppCompatActivity() {
private fun stopRecording() { private fun stopRecording() {
isRecording = false isRecording = false
try { recordingThread?.join()
recordingThread?.join() // Wait for loop to finish
} catch (e: InterruptedException) {
// Handle interruption if necessary
}
micButton.clearColorFilter() micButton.clearColorFilter()
outputText.append("\n[Stopped]")
// FIX: Safely unwrap 'stream' before passing it to getResult
// This reads: "If stream is NOT null, call getResult. Otherwise return empty string."
val finalCurrentText = stream?.let { activeStream ->
recognizer?.getResult(activeStream)?.text
} ?: ""
val cleanFinal = finalCurrentText.lowercase()
if (cleanFinal.isNotEmpty()) {
// 1. Commit to history
committedText += "$cleanFinal "
// 2. Send to Pico
sendToPico("$cleanFinal ")
// 3. Update UI
outputText.text = "$committedText \n[Stopped]"
// 4. Reset for next time
// We release the old stream and create a fresh one for the next sentence
stream?.release()
stream = recognizer?.createStream()
} else {
outputText.append("\n[Stopped - No Text]")
}
} }
private fun processAudioLoop() { private fun processAudioLoop() {
val sampleRate = 16000 val sampleRate = 16000
val bufferSize = AudioRecord.getMinBufferSize(sampleRate, AudioFormat.CHANNEL_IN_MONO, AudioFormat.ENCODING_PCM_16BIT) val bufferSize = AudioRecord.getMinBufferSize(sampleRate, AudioFormat.CHANNEL_IN_MONO, AudioFormat.ENCODING_PCM_16BIT)
// 1. GUARD CLAUSE (Safely unwrap nullables)
val localRec = recognizer ?: return val localRec = recognizer ?: return
val localStream = stream ?: return val localStream = stream ?: return
@@ -217,60 +201,138 @@ class TestModelActivity : AppCompatActivity() {
val ret = record.read(buffer, 0, buffer.size) val ret = record.read(buffer, 0, buffer.size)
if (ret > 0) { if (ret > 0) {
val samples = FloatArray(ret) { buffer[it] / 32768.0f } val samples = FloatArray(ret) { buffer[it] / 32768.0f }
// 2. Feed Audio
localStream.acceptWaveform(samples, sampleRate) localStream.acceptWaveform(samples, sampleRate)
// 3. Decode (No isReady check needed for Offline) while (localRec.isReady(localStream)) {
localRec.decode(localStream) localRec.decode(localStream)
}
// 4. Get Current Text
// Whisper updates this string constantly as it hears more
val text = localRec.getResult(localStream).text val text = localRec.getResult(localStream).text
val isEndpoint = localRec.isEndpoint(localStream)
if (text.isNotEmpty()) { if (text.isNotEmpty()) {
val cleanText = text.lowercase() val cleanText = text.lowercase()
if (isEndpoint) {
// CASE A: Natural Pause (Sentence Finished)
val punctuatedText = punctuator?.addPunctuation(cleanText) ?: cleanText
runOnUiThread {
committedText += "$punctuatedText "
outputText.text = committedText
sendToPico("$punctuatedText ")
}
localRec.reset(localStream)
} else {
// CASE B: Partial (Still talking)
// Update screen ONLY, do not send to Pico yet
runOnUiThread { runOnUiThread {
// Update the screen so user sees what is happening
// We do NOT send to USB yet, because Whisper might change this text
// as you keep speaking.
outputText.text = "$committedText $cleanText" outputText.text = "$committedText $cleanText"
} }
} }
} }
} }
}
// --- NEW: THE FLUSH BLOCK ---
// This runs when you hit "Stop". It grabs the last unspoken words.
val finalTail = localRec.getResult(localStream).text
if (finalTail.isNotEmpty()) {
val cleanTail = finalTail.lowercase()
val punctuatedTail = punctuator?.addPunctuation(cleanTail) ?: cleanTail
runOnUiThread {
committedText += "$punctuatedTail "
outputText.text = committedText
// FORCE SEND the remaining text
sendToPico("$punctuatedTail ")
}
// Reset for next time
localRec.reset(localStream)
}
// -----------------------------
record.stop() record.stop()
record.release() record.release()
} }
// ---------------------------------------------------------------- // ----------------------------------------------------------------
// 3. USB LOGIC (Unchanged from before) // 3. ROBUST USB LOGIC (FIXED)
// ---------------------------------------------------------------- // ----------------------------------------------------------------
private fun connectToPico() {
val usbManager = getSystemService(Context.USB_SERVICE) as UsbManager
val availableDrivers = UsbSerialProber.getDefaultProber().findAllDrivers(usbManager)
if (availableDrivers.isEmpty()) return
// RENAMED from 'connectToPico' to be clearer
private fun attemptUsbConnection() {
val usbManager = getSystemService(Context.USB_SERVICE) as UsbManager
// 1. Find Driver
val availableDrivers = UsbSerialProber.getDefaultProber().findAllDrivers(usbManager)
if (availableDrivers.isEmpty()) {
Log.d("USB", "No drivers found")
return
}
val driver = availableDrivers[0] val driver = availableDrivers[0]
val connection = usbManager.openDevice(driver.device) ?: return
// 2. Check Permission
if (!usbManager.hasPermission(driver.device)) {
Log.d("USB", "Requesting Permission...")
val pendingIntent = android.app.PendingIntent.getBroadcast(
this,
0,
android.content.Intent(ACTION_USB_PERMISSION),
android.app.PendingIntent.FLAG_IMMUTABLE
)
usbManager.requestPermission(driver.device, pendingIntent)
return
}
// 3. Open Connection
try {
val connection = usbManager.openDevice(driver.device)
if (connection == null) {
Log.e("USB", "openDevice returned null")
return
}
// Clean up old port if exists
try { usbPort?.close() } catch (e: Exception) {}
usbPort = driver.ports[0] usbPort = driver.ports[0]
try {
usbPort?.open(connection) usbPort?.open(connection)
usbPort?.setParameters(115200, 8, UsbSerialPort.STOPBITS_1, UsbSerialPort.PARITY_NONE) usbPort?.setParameters(115200, 8, UsbSerialPort.STOPBITS_1, UsbSerialPort.PARITY_NONE)
outputText.append("\n> USB Connected")
Log.d("USB", "Success! Connected at 115200")
// UI Feedback
runOnUiThread {
Toast.makeText(this, "USB Connected", Toast.LENGTH_SHORT).show()
}
} catch (e: Exception) { } catch (e: Exception) {
outputText.append("\n> USB Error: ${e.message}") Log.e("USB", "Connection Error", e)
usbPort = null
} }
} }
private fun sendToPico(text: String) { private fun sendToPico(text: String) {
if (usbPort == null) return // AUTO-RECONNECT FEATURE
if (usbPort == null) {
Log.d("USB", "Port null, trying to reconnect...")
attemptUsbConnection()
if (usbPort == null) {
Log.e("USB", "Reconnect failed.")
return
}
}
try { try {
usbPort?.write(text.toByteArray(Charsets.UTF_8), 500) val data = text.toByteArray(Charsets.UTF_8)
usbPort?.write(data, 500)
Log.d("USB", "Sent: $text")
} catch (e: Exception) { } catch (e: Exception) {
// Log error Log.e("USB", "Write Failed", e)
// Force reset on next try
usbPort = null
} }
} }