5 Commits

7 changed files with 138 additions and 49 deletions

View File

@@ -4,10 +4,10 @@
<selectionStates>
<SelectionState runConfigName="app">
<option name="selectionMode" value="DROPDOWN" />
<DropdownSelection timestamp="2026-01-22T04:36:45.393638454Z">
<DropdownSelection timestamp="2026-01-23T01:29:57.710335816Z">
<Target type="DEFAULT_BOOT">
<handle>
<DeviceId pluginId="LocalEmulator" identifier="path=/home/michael/.android/avd/Pixel_5_API_31_Android_12_.avd" />
<DeviceId pluginId="PhysicalDevice" identifier="serial=DKTAB13NEU0019483" />
</handle>
</Target>
</DropdownSelection>

1
.idea/misc.xml generated
View File

@@ -1,4 +1,3 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ExternalStorageConfigurationManager" enabled="true" />
<component name="ProjectRootManager" version="2" languageLevel="JDK_21" default="true" project-jdk-name="jbr-21" project-jdk-type="JavaSDK">

2
app/.gitignore vendored
View File

@@ -44,7 +44,7 @@ captures/
# Keystore files (NEVER commit these!)
*.jks
*.keystore
# *.keystore
*.pem
# OS-specific files

View File

@@ -7,12 +7,22 @@ android {
namespace = "net.mmanningau.speechtokeyboard"
compileSdk = 36
signingConfigs {
getByName("debug") {
// This tells Gradle to look for the key in the same folder as this build file
storeFile = file("debug.keystore")
storePassword = "android"
keyAlias = "androiddebugkey"
keyPassword = "android"
}
}
defaultConfig {
applicationId = "net.mmanningau.speechtokeyboard"
minSdk = 28
targetSdk = 36
versionCode = 10
versionName = "1.0"
versionCode = 15
versionName = "1.1.3"
testInstrumentationRunner = "androidx.test.runner.AndroidJUnitRunner"
}
@@ -29,6 +39,8 @@ android {
applicationIdSuffix = ".streaming"
// This changes the app name on your homescreen to "MyApp (Dev)"
resValue("string", "app_name", "Speech To Keyboard (Streaming)")
// Explicitly tell the debug build to use the config we defined above
signingConfig = signingConfigs["debug"]
}
}
compileOptions {

BIN
app/debug.keystore Normal file

Binary file not shown.

View File

@@ -37,6 +37,8 @@
<activity
android:name=".TestModelActivity"
android:parentActivityName=".MainActivity"
android:exported="false"
android:configChanges="orientation|screenSize|screenLayout|keyboardHidden"
android:label="Test Microphone" />
</application>

View File

@@ -17,7 +17,6 @@ import androidx.core.app.ActivityCompat
import androidx.core.content.ContextCompat
import com.hoho.android.usbserial.driver.UsbSerialPort
import com.hoho.android.usbserial.driver.UsbSerialProber
import com.hoho.android.usbserial.util.SerialInputOutputManager
import com.k2fsa.sherpa.onnx.EndpointConfig
import com.k2fsa.sherpa.onnx.EndpointRule
import com.k2fsa.sherpa.onnx.FeatureConfig
@@ -26,6 +25,9 @@ import com.k2fsa.sherpa.onnx.OnlineRecognizerConfig
import com.k2fsa.sherpa.onnx.OnlineTransducerModelConfig
import com.k2fsa.sherpa.onnx.OnlineStream
import java.io.File
import com.k2fsa.sherpa.onnx.OfflinePunctuation
import com.k2fsa.sherpa.onnx.OfflinePunctuationConfig
import com.k2fsa.sherpa.onnx.OfflinePunctuationModelConfig
class TestModelActivity : AppCompatActivity() {
@@ -39,8 +41,12 @@ class TestModelActivity : AppCompatActivity() {
private var isRecording = false
private var recordingThread: Thread? = null
// Punctuation variables
private var punctuator: OfflinePunctuation? = null
// USB Components
private var usbPort: UsbSerialPort? = null
private val ACTION_USB_PERMISSION = "net.mmanningau.speechtokeyboard.USB_PERMISSION"
// Text History
private var committedText = ""
@@ -54,7 +60,9 @@ class TestModelActivity : AppCompatActivity() {
micButton = findViewById(R.id.btn_mic_toggle)
checkAudioPermission()
connectToPico() // Try to auto-connect USB on start
// Try to connect immediately on startup
attemptUsbConnection()
// Initialize Engine
initSherpaModel()
@@ -66,7 +74,7 @@ class TestModelActivity : AppCompatActivity() {
}
// ----------------------------------------------------------------
// 1. ENGINE INITIALIZATION (The "Missing Code")
// 1. ENGINE INITIALIZATION
// ----------------------------------------------------------------
private fun initSherpaModel() {
val modelDir = File(filesDir, "sherpa-model")
@@ -77,14 +85,12 @@ class TestModelActivity : AppCompatActivity() {
}
try {
// 1. Define Model Paths
val transducerConfig = OnlineTransducerModelConfig(
encoder = File(modelDir, "encoder.onnx").absolutePath,
decoder = File(modelDir, "decoder.onnx").absolutePath,
joiner = File(modelDir, "joiner.onnx").absolutePath
)
// 2. Define General Config
val onlineModelConfig = com.k2fsa.sherpa.onnx.OnlineModelConfig(
transducer = transducerConfig,
tokens = File(modelDir, "tokens.txt").absolutePath,
@@ -93,29 +99,38 @@ class TestModelActivity : AppCompatActivity() {
modelType = "zipformer"
)
// 3. Define Endpoint Rule (The fix for your error)
// rule1 = detected silence after speech. We set this to 2.4 seconds.
val silenceRule = EndpointRule(
mustContainNonSilence = false,
minTrailingSilence = 2.4f,
minTrailingSilence = 1.2f,
minUtteranceLength = 0.0f
)
// 4. Create Recognizer Config
val config = OnlineRecognizerConfig(
featConfig = FeatureConfig(sampleRate = 16000, featureDim = 80),
modelConfig = onlineModelConfig,
endpointConfig = EndpointConfig(rule1 = silenceRule), // Pass the rule object here
endpointConfig = EndpointConfig(rule1 = silenceRule),
enableEndpoint = true,
decodingMethod = "greedy_search",
maxActivePaths = 4
)
// recognizer = OnlineRecognizer(assetManager = assets, config = config)
recognizer = OnlineRecognizer(config = config)
stream = recognizer?.createStream()
outputText.text = "Engine Loaded. Ready to Stream."
outputText.text = "Engine Loaded. Ready."
// Initialize Punctuation Engine
val punctPath = File(modelDir, "punct_model.onnx").absolutePath
if (File(punctPath).exists()) {
val punctConfig = OfflinePunctuationConfig(
model = OfflinePunctuationModelConfig(ctTransformer = punctPath)
)
punctuator = OfflinePunctuation(config = punctConfig)
outputText.append("\n+ Punctuation Ready")
} else {
outputText.append("\n(No Punctuation model found)")
}
} catch (e: Exception) {
Log.e("Sherpa", "Init Error", e)
@@ -124,7 +139,7 @@ class TestModelActivity : AppCompatActivity() {
}
// ----------------------------------------------------------------
// 2. AUDIO LOOP (The "Manual" Listener)
// 2. AUDIO LOOP
// ----------------------------------------------------------------
private fun toggleRecording() {
if (isRecording) {
@@ -140,9 +155,11 @@ class TestModelActivity : AppCompatActivity() {
return
}
// FIX 1: CLEAR THE BUFFER
// This prevents the "ghost text" from the previous session appearing
// when you hit record again.
// Before we start, check USB connection again!
if (usbPort == null) {
attemptUsbConnection()
}
stream?.let { activeStream ->
recognizer?.reset(activeStream)
}
@@ -161,9 +178,6 @@ class TestModelActivity : AppCompatActivity() {
isRecording = false
recordingThread?.join()
micButton.clearColorFilter()
// Just show what we have, don't overwrite with "[Stopped]"
// to prevent visual jarring.
outputText.append("\n[Stopped]")
}
@@ -171,7 +185,6 @@ class TestModelActivity : AppCompatActivity() {
val sampleRate = 16000
val bufferSize = AudioRecord.getMinBufferSize(sampleRate, AudioFormat.CHANNEL_IN_MONO, AudioFormat.ENCODING_PCM_16BIT)
// Guard clauses
val localRec = recognizer ?: return
val localStream = stream ?: return
@@ -188,7 +201,6 @@ class TestModelActivity : AppCompatActivity() {
val ret = record.read(buffer, 0, buffer.size)
if (ret > 0) {
val samples = FloatArray(ret) { buffer[it] / 32768.0f }
localStream.acceptWaveform(samples, sampleRate)
while (localRec.isReady(localStream)) {
@@ -202,23 +214,18 @@ class TestModelActivity : AppCompatActivity() {
val cleanText = text.lowercase()
if (isEndpoint) {
// FIX 2: THE ORDER OF OPERATIONS
// CASE A: Natural Pause (Sentence Finished)
val punctuatedText = punctuator?.addPunctuation(cleanText) ?: cleanText
// A. Update UI first
runOnUiThread {
committedText += "$cleanText "
committedText += "$punctuatedText "
outputText.text = committedText
sendToPico("$cleanText ")
sendToPico("$punctuatedText ")
}
// B. RESET IMMEDIATELY ON BACKGROUND THREAD
// We do this HERE, not inside runOnUiThread.
// This guarantees the stream is clean BEFORE the loop
// reads the next chunk of audio.
localRec.reset(localStream)
} else {
// Standard partial update
// CASE B: Partial (Still talking)
// Update screen ONLY, do not send to Pico yet
runOnUiThread {
outputText.text = "$committedText $cleanText"
}
@@ -226,37 +233,106 @@ class TestModelActivity : AppCompatActivity() {
}
}
}
// --- NEW: THE FLUSH BLOCK ---
// This runs when you hit "Stop". It grabs the last unspoken words.
val finalTail = localRec.getResult(localStream).text
if (finalTail.isNotEmpty()) {
val cleanTail = finalTail.lowercase()
val punctuatedTail = punctuator?.addPunctuation(cleanTail) ?: cleanTail
runOnUiThread {
committedText += "$punctuatedTail "
outputText.text = committedText
// FORCE SEND the remaining text
sendToPico("$punctuatedTail ")
}
// Reset for next time
localRec.reset(localStream)
}
// -----------------------------
record.stop()
record.release()
}
// ----------------------------------------------------------------
// 3. USB LOGIC (Unchanged from before)
// 3. ROBUST USB LOGIC (FIXED)
// ----------------------------------------------------------------
private fun connectToPico() {
// RENAMED from 'connectToPico' to be clearer
private fun attemptUsbConnection() {
val usbManager = getSystemService(Context.USB_SERVICE) as UsbManager
// 1. Find Driver
val availableDrivers = UsbSerialProber.getDefaultProber().findAllDrivers(usbManager)
if (availableDrivers.isEmpty()) return
if (availableDrivers.isEmpty()) {
Log.d("USB", "No drivers found")
return
}
val driver = availableDrivers[0]
val connection = usbManager.openDevice(driver.device) ?: return
usbPort = driver.ports[0]
// 2. Check Permission
if (!usbManager.hasPermission(driver.device)) {
Log.d("USB", "Requesting Permission...")
val pendingIntent = android.app.PendingIntent.getBroadcast(
this,
0,
android.content.Intent(ACTION_USB_PERMISSION),
android.app.PendingIntent.FLAG_IMMUTABLE
)
usbManager.requestPermission(driver.device, pendingIntent)
return
}
// 3. Open Connection
try {
val connection = usbManager.openDevice(driver.device)
if (connection == null) {
Log.e("USB", "openDevice returned null")
return
}
// Clean up old port if exists
try { usbPort?.close() } catch (e: Exception) {}
usbPort = driver.ports[0]
usbPort?.open(connection)
usbPort?.setParameters(115200, 8, UsbSerialPort.STOPBITS_1, UsbSerialPort.PARITY_NONE)
outputText.append("\n> USB Connected")
Log.d("USB", "Success! Connected at 115200")
// UI Feedback
runOnUiThread {
Toast.makeText(this, "USB Connected", Toast.LENGTH_SHORT).show()
}
} catch (e: Exception) {
outputText.append("\n> USB Error: ${e.message}")
Log.e("USB", "Connection Error", e)
usbPort = null
}
}
private fun sendToPico(text: String) {
if (usbPort == null) return
// AUTO-RECONNECT FEATURE
if (usbPort == null) {
Log.d("USB", "Port null, trying to reconnect...")
attemptUsbConnection()
if (usbPort == null) {
Log.e("USB", "Reconnect failed.")
return
}
}
try {
usbPort?.write(text.toByteArray(Charsets.UTF_8), 500)
val data = text.toByteArray(Charsets.UTF_8)
usbPort?.write(data, 500)
Log.d("USB", "Sent: $text")
} catch (e: Exception) {
// Log error
Log.e("USB", "Write Failed", e)
// Force reset on next try
usbPort = null
}
}