Compare commits

3 Commits

5 changed files with 113 additions and 55 deletions

1
.idea/misc.xml generated
View File

@@ -1,4 +1,3 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4"> <project version="4">
<component name="ExternalStorageConfigurationManager" enabled="true" /> <component name="ExternalStorageConfigurationManager" enabled="true" />
<component name="ProjectRootManager" version="2" languageLevel="JDK_21" default="true" project-jdk-name="jbr-21" project-jdk-type="JavaSDK"> <component name="ProjectRootManager" version="2" languageLevel="JDK_21" default="true" project-jdk-name="jbr-21" project-jdk-type="JavaSDK">

2
app/.gitignore vendored
View File

@@ -44,7 +44,7 @@ captures/
# Keystore files (NEVER commit these!) # Keystore files (NEVER commit these!)
*.jks *.jks
*.keystore # *.keystore
*.pem *.pem
# OS-specific files # OS-specific files

View File

@@ -7,12 +7,22 @@ android {
namespace = "net.mmanningau.speechtokeyboard" namespace = "net.mmanningau.speechtokeyboard"
compileSdk = 36 compileSdk = 36
signingConfigs {
getByName("debug") {
// This tells Gradle to look for the key in the same folder as this build file
storeFile = file("debug.keystore")
storePassword = "android"
keyAlias = "androiddebugkey"
keyPassword = "android"
}
}
defaultConfig { defaultConfig {
applicationId = "net.mmanningau.speechtokeyboard" applicationId = "net.mmanningau.speechtokeyboard"
minSdk = 28 minSdk = 28
targetSdk = 36 targetSdk = 36
versionCode = 12 versionCode = 15
versionName = "1.1" versionName = "1.1.3"
testInstrumentationRunner = "androidx.test.runner.AndroidJUnitRunner" testInstrumentationRunner = "androidx.test.runner.AndroidJUnitRunner"
} }
@@ -29,6 +39,8 @@ android {
applicationIdSuffix = ".streaming" applicationIdSuffix = ".streaming"
// This changes the app name on your homescreen to "MyApp (Dev)" // This changes the app name on your homescreen to "MyApp (Dev)"
resValue("string", "app_name", "Speech To Keyboard (Streaming)") resValue("string", "app_name", "Speech To Keyboard (Streaming)")
// Explicitly tell the debug build to use the config we defined above
signingConfig = signingConfigs["debug"]
} }
} }
compileOptions { compileOptions {

BIN
app/debug.keystore Normal file

Binary file not shown.

View File

@@ -17,7 +17,6 @@ import androidx.core.app.ActivityCompat
import androidx.core.content.ContextCompat import androidx.core.content.ContextCompat
import com.hoho.android.usbserial.driver.UsbSerialPort import com.hoho.android.usbserial.driver.UsbSerialPort
import com.hoho.android.usbserial.driver.UsbSerialProber import com.hoho.android.usbserial.driver.UsbSerialProber
import com.hoho.android.usbserial.util.SerialInputOutputManager
import com.k2fsa.sherpa.onnx.EndpointConfig import com.k2fsa.sherpa.onnx.EndpointConfig
import com.k2fsa.sherpa.onnx.EndpointRule import com.k2fsa.sherpa.onnx.EndpointRule
import com.k2fsa.sherpa.onnx.FeatureConfig import com.k2fsa.sherpa.onnx.FeatureConfig
@@ -26,7 +25,6 @@ import com.k2fsa.sherpa.onnx.OnlineRecognizerConfig
import com.k2fsa.sherpa.onnx.OnlineTransducerModelConfig import com.k2fsa.sherpa.onnx.OnlineTransducerModelConfig
import com.k2fsa.sherpa.onnx.OnlineStream import com.k2fsa.sherpa.onnx.OnlineStream
import java.io.File import java.io.File
import com.k2fsa.sherpa.onnx.OfflinePunctuation import com.k2fsa.sherpa.onnx.OfflinePunctuation
import com.k2fsa.sherpa.onnx.OfflinePunctuationConfig import com.k2fsa.sherpa.onnx.OfflinePunctuationConfig
import com.k2fsa.sherpa.onnx.OfflinePunctuationModelConfig import com.k2fsa.sherpa.onnx.OfflinePunctuationModelConfig
@@ -48,6 +46,7 @@ class TestModelActivity : AppCompatActivity() {
// USB Components // USB Components
private var usbPort: UsbSerialPort? = null private var usbPort: UsbSerialPort? = null
private val ACTION_USB_PERMISSION = "net.mmanningau.speechtokeyboard.USB_PERMISSION"
// Text History // Text History
private var committedText = "" private var committedText = ""
@@ -61,7 +60,9 @@ class TestModelActivity : AppCompatActivity() {
micButton = findViewById(R.id.btn_mic_toggle) micButton = findViewById(R.id.btn_mic_toggle)
checkAudioPermission() checkAudioPermission()
connectToPico() // Try to auto-connect USB on start
// Try to connect immediately on startup
attemptUsbConnection()
// Initialize Engine // Initialize Engine
initSherpaModel() initSherpaModel()
@@ -73,7 +74,7 @@ class TestModelActivity : AppCompatActivity() {
} }
// ---------------------------------------------------------------- // ----------------------------------------------------------------
// 1. ENGINE INITIALIZATION (The "Missing Code") // 1. ENGINE INITIALIZATION
// ---------------------------------------------------------------- // ----------------------------------------------------------------
private fun initSherpaModel() { private fun initSherpaModel() {
val modelDir = File(filesDir, "sherpa-model") val modelDir = File(filesDir, "sherpa-model")
@@ -84,14 +85,12 @@ class TestModelActivity : AppCompatActivity() {
} }
try { try {
// 1. Define Model Paths
val transducerConfig = OnlineTransducerModelConfig( val transducerConfig = OnlineTransducerModelConfig(
encoder = File(modelDir, "encoder.onnx").absolutePath, encoder = File(modelDir, "encoder.onnx").absolutePath,
decoder = File(modelDir, "decoder.onnx").absolutePath, decoder = File(modelDir, "decoder.onnx").absolutePath,
joiner = File(modelDir, "joiner.onnx").absolutePath joiner = File(modelDir, "joiner.onnx").absolutePath
) )
// 2. Define General Config
val onlineModelConfig = com.k2fsa.sherpa.onnx.OnlineModelConfig( val onlineModelConfig = com.k2fsa.sherpa.onnx.OnlineModelConfig(
transducer = transducerConfig, transducer = transducerConfig,
tokens = File(modelDir, "tokens.txt").absolutePath, tokens = File(modelDir, "tokens.txt").absolutePath,
@@ -100,41 +99,33 @@ class TestModelActivity : AppCompatActivity() {
modelType = "zipformer" modelType = "zipformer"
) )
// 3. Define Endpoint Rule (The fix for your error)
// rule1 = detected silence after speech. We set this to 2.4 seconds.
val silenceRule = EndpointRule( val silenceRule = EndpointRule(
mustContainNonSilence = false, mustContainNonSilence = false,
minTrailingSilence = 2.4f, minTrailingSilence = 1.2f,
minUtteranceLength = 0.0f minUtteranceLength = 0.0f
) )
// 4. Create Recognizer Config
val config = OnlineRecognizerConfig( val config = OnlineRecognizerConfig(
featConfig = FeatureConfig(sampleRate = 16000, featureDim = 80), featConfig = FeatureConfig(sampleRate = 16000, featureDim = 80),
modelConfig = onlineModelConfig, modelConfig = onlineModelConfig,
endpointConfig = EndpointConfig(rule1 = silenceRule), // Pass the rule object here endpointConfig = EndpointConfig(rule1 = silenceRule),
enableEndpoint = true, enableEndpoint = true,
decodingMethod = "greedy_search", decodingMethod = "greedy_search",
maxActivePaths = 4 maxActivePaths = 4
) )
// recognizer = OnlineRecognizer(assetManager = assets, config = config)
recognizer = OnlineRecognizer(config = config) recognizer = OnlineRecognizer(config = config)
stream = recognizer?.createStream() stream = recognizer?.createStream()
outputText.text = "Engine Loaded. Ready to Stream." outputText.text = "Engine Loaded. Ready."
// ... existing recognizer init code ... // Initialize Punctuation Engine
// 5. Initialize Punctuation Engine
val punctPath = File(modelDir, "punct_model.onnx").absolutePath val punctPath = File(modelDir, "punct_model.onnx").absolutePath
if (File(punctPath).exists()) { if (File(punctPath).exists()) {
// CORRECTED: Wrap the path inside 'OfflinePunctuationModelConfig'
val punctConfig = OfflinePunctuationConfig( val punctConfig = OfflinePunctuationConfig(
model = OfflinePunctuationModelConfig(ctTransformer = punctPath) model = OfflinePunctuationModelConfig(ctTransformer = punctPath)
) )
punctuator = OfflinePunctuation(config = punctConfig) punctuator = OfflinePunctuation(config = punctConfig)
outputText.append("\n+ Punctuation Ready") outputText.append("\n+ Punctuation Ready")
} else { } else {
@@ -148,7 +139,7 @@ class TestModelActivity : AppCompatActivity() {
} }
// ---------------------------------------------------------------- // ----------------------------------------------------------------
// 2. AUDIO LOOP (The "Manual" Listener) // 2. AUDIO LOOP
// ---------------------------------------------------------------- // ----------------------------------------------------------------
private fun toggleRecording() { private fun toggleRecording() {
if (isRecording) { if (isRecording) {
@@ -164,9 +155,11 @@ class TestModelActivity : AppCompatActivity() {
return return
} }
// FIX 1: CLEAR THE BUFFER // Before we start, check USB connection again!
// This prevents the "ghost text" from the previous session appearing if (usbPort == null) {
// when you hit record again. attemptUsbConnection()
}
stream?.let { activeStream -> stream?.let { activeStream ->
recognizer?.reset(activeStream) recognizer?.reset(activeStream)
} }
@@ -185,9 +178,6 @@ class TestModelActivity : AppCompatActivity() {
isRecording = false isRecording = false
recordingThread?.join() recordingThread?.join()
micButton.clearColorFilter() micButton.clearColorFilter()
// Just show what we have, don't overwrite with "[Stopped]"
// to prevent visual jarring.
outputText.append("\n[Stopped]") outputText.append("\n[Stopped]")
} }
@@ -195,7 +185,6 @@ class TestModelActivity : AppCompatActivity() {
val sampleRate = 16000 val sampleRate = 16000
val bufferSize = AudioRecord.getMinBufferSize(sampleRate, AudioFormat.CHANNEL_IN_MONO, AudioFormat.ENCODING_PCM_16BIT) val bufferSize = AudioRecord.getMinBufferSize(sampleRate, AudioFormat.CHANNEL_IN_MONO, AudioFormat.ENCODING_PCM_16BIT)
// Guard clauses
val localRec = recognizer ?: return val localRec = recognizer ?: return
val localStream = stream ?: return val localStream = stream ?: return
@@ -212,7 +201,6 @@ class TestModelActivity : AppCompatActivity() {
val ret = record.read(buffer, 0, buffer.size) val ret = record.read(buffer, 0, buffer.size)
if (ret > 0) { if (ret > 0) {
val samples = FloatArray(ret) { buffer[it] / 32768.0f } val samples = FloatArray(ret) { buffer[it] / 32768.0f }
localStream.acceptWaveform(samples, sampleRate) localStream.acceptWaveform(samples, sampleRate)
while (localRec.isReady(localStream)) { while (localRec.isReady(localStream)) {
@@ -226,28 +214,18 @@ class TestModelActivity : AppCompatActivity() {
val cleanText = text.lowercase() val cleanText = text.lowercase()
if (isEndpoint) { if (isEndpoint) {
// FIX 2: THE ORDER OF OPERATIONS // CASE A: Natural Pause (Sentence Finished)
// A. Update UI first
// 1. PUNCTUATE
// We pass the raw text to the punctuator
val punctuatedText = punctuator?.addPunctuation(cleanText) ?: cleanText val punctuatedText = punctuator?.addPunctuation(cleanText) ?: cleanText
runOnUiThread { runOnUiThread {
// 2. Commit the BEAUTIFUL text
committedText += "$punctuatedText " committedText += "$punctuatedText "
outputText.text = committedText outputText.text = committedText
sendToPico("$punctuatedText ") sendToPico("$punctuatedText ")
} }
// B. RESET IMMEDIATELY ON BACKGROUND THREAD
// We do this HERE, not inside runOnUiThread.
// This guarantees the stream is clean BEFORE the loop
// reads the next chunk of audio.
localRec.reset(localStream) localRec.reset(localStream)
} else { } else {
// Standard partial update // CASE B: Partial (Still talking)
// Update screen ONLY, do not send to Pico yet
runOnUiThread { runOnUiThread {
outputText.text = "$committedText $cleanText" outputText.text = "$committedText $cleanText"
} }
@@ -255,37 +233,106 @@ class TestModelActivity : AppCompatActivity() {
} }
} }
} }
// --- NEW: THE FLUSH BLOCK ---
// This runs when you hit "Stop". It grabs the last unspoken words.
val finalTail = localRec.getResult(localStream).text
if (finalTail.isNotEmpty()) {
val cleanTail = finalTail.lowercase()
val punctuatedTail = punctuator?.addPunctuation(cleanTail) ?: cleanTail
runOnUiThread {
committedText += "$punctuatedTail "
outputText.text = committedText
// FORCE SEND the remaining text
sendToPico("$punctuatedTail ")
}
// Reset for next time
localRec.reset(localStream)
}
// -----------------------------
record.stop() record.stop()
record.release() record.release()
} }
// ---------------------------------------------------------------- // ----------------------------------------------------------------
// 3. USB LOGIC (Unchanged from before) // 3. ROBUST USB LOGIC (FIXED)
// ---------------------------------------------------------------- // ----------------------------------------------------------------
private fun connectToPico() {
val usbManager = getSystemService(Context.USB_SERVICE) as UsbManager
val availableDrivers = UsbSerialProber.getDefaultProber().findAllDrivers(usbManager)
if (availableDrivers.isEmpty()) return
// RENAMED from 'connectToPico' to be clearer
private fun attemptUsbConnection() {
val usbManager = getSystemService(Context.USB_SERVICE) as UsbManager
// 1. Find Driver
val availableDrivers = UsbSerialProber.getDefaultProber().findAllDrivers(usbManager)
if (availableDrivers.isEmpty()) {
Log.d("USB", "No drivers found")
return
}
val driver = availableDrivers[0] val driver = availableDrivers[0]
val connection = usbManager.openDevice(driver.device) ?: return
// 2. Check Permission
if (!usbManager.hasPermission(driver.device)) {
Log.d("USB", "Requesting Permission...")
val pendingIntent = android.app.PendingIntent.getBroadcast(
this,
0,
android.content.Intent(ACTION_USB_PERMISSION),
android.app.PendingIntent.FLAG_IMMUTABLE
)
usbManager.requestPermission(driver.device, pendingIntent)
return
}
// 3. Open Connection
try {
val connection = usbManager.openDevice(driver.device)
if (connection == null) {
Log.e("USB", "openDevice returned null")
return
}
// Clean up old port if exists
try { usbPort?.close() } catch (e: Exception) {}
usbPort = driver.ports[0] usbPort = driver.ports[0]
try {
usbPort?.open(connection) usbPort?.open(connection)
usbPort?.setParameters(115200, 8, UsbSerialPort.STOPBITS_1, UsbSerialPort.PARITY_NONE) usbPort?.setParameters(115200, 8, UsbSerialPort.STOPBITS_1, UsbSerialPort.PARITY_NONE)
outputText.append("\n> USB Connected")
Log.d("USB", "Success! Connected at 115200")
// UI Feedback
runOnUiThread {
Toast.makeText(this, "USB Connected", Toast.LENGTH_SHORT).show()
}
} catch (e: Exception) { } catch (e: Exception) {
outputText.append("\n> USB Error: ${e.message}") Log.e("USB", "Connection Error", e)
usbPort = null
} }
} }
private fun sendToPico(text: String) { private fun sendToPico(text: String) {
if (usbPort == null) return // AUTO-RECONNECT FEATURE
if (usbPort == null) {
Log.d("USB", "Port null, trying to reconnect...")
attemptUsbConnection()
if (usbPort == null) {
Log.e("USB", "Reconnect failed.")
return
}
}
try { try {
usbPort?.write(text.toByteArray(Charsets.UTF_8), 500) val data = text.toByteArray(Charsets.UTF_8)
usbPort?.write(data, 500)
Log.d("USB", "Sent: $text")
} catch (e: Exception) { } catch (e: Exception) {
// Log error Log.e("USB", "Write Failed", e)
// Force reset on next try
usbPort = null
} }
} }