1 Commits

5 changed files with 92 additions and 113 deletions

View File

@@ -4,10 +4,10 @@
<selectionStates> <selectionStates>
<SelectionState runConfigName="app"> <SelectionState runConfigName="app">
<option name="selectionMode" value="DROPDOWN" /> <option name="selectionMode" value="DROPDOWN" />
<DropdownSelection timestamp="2026-01-23T01:29:57.710335816Z"> <DropdownSelection timestamp="2026-01-22T04:36:45.393638454Z">
<Target type="DEFAULT_BOOT"> <Target type="DEFAULT_BOOT">
<handle> <handle>
<DeviceId pluginId="PhysicalDevice" identifier="serial=DKTAB13NEU0019483" /> <DeviceId pluginId="LocalEmulator" identifier="path=/home/michael/.android/avd/Pixel_5_API_31_Android_12_.avd" />
</handle> </handle>
</Target> </Target>
</DropdownSelection> </DropdownSelection>

View File

@@ -11,7 +11,7 @@ android {
applicationId = "net.mmanningau.speechtokeyboard" applicationId = "net.mmanningau.speechtokeyboard"
minSdk = 28 minSdk = 28
targetSdk = 36 targetSdk = 36
versionCode = 12 versionCode = 10
versionName = "1.1" versionName = "1.1"
testInstrumentationRunner = "androidx.test.runner.AndroidJUnitRunner" testInstrumentationRunner = "androidx.test.runner.AndroidJUnitRunner"
@@ -25,11 +25,6 @@ android {
"proguard-rules.pro" "proguard-rules.pro"
) )
} }
debug {
applicationIdSuffix = ".streaming"
// This changes the app name on your homescreen to "MyApp (Dev)"
resValue("string", "app_name", "Speech To Keyboard (Streaming)")
}
} }
compileOptions { compileOptions {
sourceCompatibility = JavaVersion.VERSION_11 sourceCompatibility = JavaVersion.VERSION_11

View File

@@ -37,8 +37,6 @@
<activity <activity
android:name=".TestModelActivity" android:name=".TestModelActivity"
android:parentActivityName=".MainActivity" android:parentActivityName=".MainActivity"
android:exported="false"
android:configChanges="orientation|screenSize|screenLayout|keyboardHidden"
android:label="Test Microphone" /> android:label="Test Microphone" />
</application> </application>

View File

@@ -89,7 +89,7 @@ class MainActivity : AppCompatActivity() {
var entry = zipInputStream.nextEntry var entry = zipInputStream.nextEntry
var foundEncoder = false var foundEncoder = false
var foundDecoder = false var foundDecoder = false
var foundJoiner = false // var foundJoiner = false - removed for true Whisper model use
var foundTokens = false var foundTokens = false
while (entry != null) { while (entry != null) {
@@ -100,7 +100,7 @@ class MainActivity : AppCompatActivity() {
val targetFileName = when { val targetFileName = when {
name.contains("encoder") && name.endsWith(".onnx") -> "encoder.onnx" name.contains("encoder") && name.endsWith(".onnx") -> "encoder.onnx"
name.contains("decoder") && name.endsWith(".onnx") -> "decoder.onnx" name.contains("decoder") && name.endsWith(".onnx") -> "decoder.onnx"
name.contains("joiner") && name.endsWith(".onnx") -> "joiner.onnx" // name.contains("joiner") && name.endsWith(".onnx") -> "joiner.onnx" - removed for true Whisper model use
name.contains("tokens.txt") -> "tokens.txt" name.contains("tokens.txt") -> "tokens.txt"
else -> null else -> null
} }
@@ -115,7 +115,7 @@ class MainActivity : AppCompatActivity() {
when (targetFileName) { when (targetFileName) {
"encoder.onnx" -> foundEncoder = true "encoder.onnx" -> foundEncoder = true
"decoder.onnx" -> foundDecoder = true "decoder.onnx" -> foundDecoder = true
"joiner.onnx" -> foundJoiner = true // "joiner.onnx" -> foundJoiner = true = re,moved for true Whisper model use
"tokens.txt" -> foundTokens = true "tokens.txt" -> foundTokens = true
} }
} }
@@ -124,7 +124,8 @@ class MainActivity : AppCompatActivity() {
} }
runOnUiThread { runOnUiThread {
if (foundEncoder && foundDecoder && foundJoiner && foundTokens) { // if (foundEncoder && foundDecoder && foundJoiner && foundTokens) { - removed for true Whisper model use
if (foundEncoder && foundDecoder && foundTokens) {
statusText.text = "Model Installed Successfully!" statusText.text = "Model Installed Successfully!"
Toast.makeText(this, "Ready to use!", Toast.LENGTH_SHORT).show() Toast.makeText(this, "Ready to use!", Toast.LENGTH_SHORT).show()
} else { } else {

View File

@@ -20,16 +20,24 @@ import com.hoho.android.usbserial.driver.UsbSerialProber
import com.hoho.android.usbserial.util.SerialInputOutputManager import com.hoho.android.usbserial.util.SerialInputOutputManager
import com.k2fsa.sherpa.onnx.EndpointConfig import com.k2fsa.sherpa.onnx.EndpointConfig
import com.k2fsa.sherpa.onnx.EndpointRule import com.k2fsa.sherpa.onnx.EndpointRule
/*
import com.k2fsa.sherpa.onnx.FeatureConfig import com.k2fsa.sherpa.onnx.FeatureConfig
import com.k2fsa.sherpa.onnx.OnlineModelConfig
import com.k2fsa.sherpa.onnx.OnlineRecognizer import com.k2fsa.sherpa.onnx.OnlineRecognizer
import com.k2fsa.sherpa.onnx.OnlineRecognizerConfig import com.k2fsa.sherpa.onnx.OnlineRecognizerConfig
import com.k2fsa.sherpa.onnx.OnlineTransducerModelConfig import com.k2fsa.sherpa.onnx.OnlineTransducerModelConfig
import com.k2fsa.sherpa.onnx.OnlineStream import com.k2fsa.sherpa.onnx.OnlineStream
import java.io.File
import com.k2fsa.sherpa.onnx.OfflinePunctuation */
import com.k2fsa.sherpa.onnx.OfflinePunctuationConfig // Below for the "offline" libraries and the true Whisper integration
import com.k2fsa.sherpa.onnx.OfflinePunctuationModelConfig import com.k2fsa.sherpa.onnx.OfflineRecognizer
import com.k2fsa.sherpa.onnx.OfflineStream
import com.k2fsa.sherpa.onnx.OfflineRecognizerConfig
import com.k2fsa.sherpa.onnx.OfflineModelConfig
import com.k2fsa.sherpa.onnx.OfflineWhisperModelConfig
import com.k2fsa.sherpa.onnx.FeatureConfig
import java.io.File
class TestModelActivity : AppCompatActivity() { class TestModelActivity : AppCompatActivity() {
@@ -38,14 +46,13 @@ class TestModelActivity : AppCompatActivity() {
private lateinit var micButton: ImageButton private lateinit var micButton: ImageButton
// Sherpa (Whisper) Components // Sherpa (Whisper) Components
private var recognizer: OnlineRecognizer? = null // private var recognizer: OnlineRecognizer? = null // - Removed for true Whisper model usa
private var stream: OnlineStream? = null // private var stream: OnlineStream? = null // - Removed for true Whisper model usa
private var recognizer: OfflineRecognizer? = null // Was OnlineRecognizer
private var stream: OfflineStream? = null // Was OnlineStream
private var isRecording = false private var isRecording = false
private var recordingThread: Thread? = null private var recordingThread: Thread? = null
// Punctuation variables
private var punctuator: OfflinePunctuation? = null
// USB Components // USB Components
private var usbPort: UsbSerialPort? = null private var usbPort: UsbSerialPort? = null
@@ -83,63 +90,37 @@ class TestModelActivity : AppCompatActivity() {
return return
} }
// 1. Point to your files
val encoderPath = File(modelDir, "encoder.onnx").absolutePath
val decoderPath = File(modelDir, "decoder.onnx").absolutePath
val tokensPath = File(modelDir, "tokens.txt").absolutePath
try { try {
// 1. Define Model Paths // CONFIGURATION FOR WHISPER (OFFLINE)
val transducerConfig = OnlineTransducerModelConfig( val config = OfflineRecognizerConfig(
encoder = File(modelDir, "encoder.onnx").absolutePath,
decoder = File(modelDir, "decoder.onnx").absolutePath,
joiner = File(modelDir, "joiner.onnx").absolutePath
)
// 2. Define General Config
val onlineModelConfig = com.k2fsa.sherpa.onnx.OnlineModelConfig(
transducer = transducerConfig,
tokens = File(modelDir, "tokens.txt").absolutePath,
numThreads = 1,
debug = false,
modelType = "zipformer"
)
// 3. Define Endpoint Rule (The fix for your error)
// rule1 = detected silence after speech. We set this to 2.4 seconds.
val silenceRule = EndpointRule(
mustContainNonSilence = false,
minTrailingSilence = 2.4f,
minUtteranceLength = 0.0f
)
// 4. Create Recognizer Config
val config = OnlineRecognizerConfig(
featConfig = FeatureConfig(sampleRate = 16000, featureDim = 80), featConfig = FeatureConfig(sampleRate = 16000, featureDim = 80),
modelConfig = onlineModelConfig, modelConfig = OfflineModelConfig(
endpointConfig = EndpointConfig(rule1 = silenceRule), // Pass the rule object here // This parameter 'whisper' exists here!
enableEndpoint = true, whisper = OfflineWhisperModelConfig(
encoder = encoderPath,
decoder = decoderPath,
// tokenizer is not strictly needed in config here if passed in tokens param below
// but usually standard offline config uses just these two:
),
tokens = tokensPath,
modelType = "whisper",
debug = false,
numThreads = 1
),
decodingMethod = "greedy_search", decodingMethod = "greedy_search",
maxActivePaths = 4 maxActivePaths = 4
) )
// recognizer = OnlineRecognizer(assetManager = assets, config = config) // Initialize OFFLINE Engine
recognizer = OnlineRecognizer(config = config) recognizer = OfflineRecognizer(config = config)
stream = recognizer?.createStream() stream = recognizer?.createStream()
outputText.text = "Engine Loaded. Ready to Stream." outputText.text = "Whisper Engine Ready."
// ... existing recognizer init code ...
// 5. Initialize Punctuation Engine
val punctPath = File(modelDir, "punct_model.onnx").absolutePath
if (File(punctPath).exists()) {
// CORRECTED: Wrap the path inside 'OfflinePunctuationModelConfig'
val punctConfig = OfflinePunctuationConfig(
model = OfflinePunctuationModelConfig(ctTransformer = punctPath)
)
punctuator = OfflinePunctuation(config = punctConfig)
outputText.append("\n+ Punctuation Ready")
} else {
outputText.append("\n(No Punctuation model found)")
}
} catch (e: Exception) { } catch (e: Exception) {
Log.e("Sherpa", "Init Error", e) Log.e("Sherpa", "Init Error", e)
@@ -164,12 +145,9 @@ class TestModelActivity : AppCompatActivity() {
return return
} }
// FIX 1: CLEAR THE BUFFER // Reset the stream for a new session
// This prevents the "ghost text" from the previous session appearing // Note: Sherpa streams can be persistent, but resetting ensures clean start
// when you hit record again. // If you want continuous conversation, don't reset 'committedText'
stream?.let { activeStream ->
recognizer?.reset(activeStream)
}
isRecording = true isRecording = true
micButton.setColorFilter(android.graphics.Color.RED) micButton.setColorFilter(android.graphics.Color.RED)
@@ -183,19 +161,46 @@ class TestModelActivity : AppCompatActivity() {
private fun stopRecording() { private fun stopRecording() {
isRecording = false isRecording = false
recordingThread?.join() try {
recordingThread?.join() // Wait for loop to finish
} catch (e: InterruptedException) {
// Handle interruption if necessary
}
micButton.clearColorFilter() micButton.clearColorFilter()
// Just show what we have, don't overwrite with "[Stopped]" // FIX: Safely unwrap 'stream' before passing it to getResult
// to prevent visual jarring. // This reads: "If stream is NOT null, call getResult. Otherwise return empty string."
outputText.append("\n[Stopped]") val finalCurrentText = stream?.let { activeStream ->
recognizer?.getResult(activeStream)?.text
} ?: ""
val cleanFinal = finalCurrentText.lowercase()
if (cleanFinal.isNotEmpty()) {
// 1. Commit to history
committedText += "$cleanFinal "
// 2. Send to Pico
sendToPico("$cleanFinal ")
// 3. Update UI
outputText.text = "$committedText \n[Stopped]"
// 4. Reset for next time
// We release the old stream and create a fresh one for the next sentence
stream?.release()
stream = recognizer?.createStream()
} else {
outputText.append("\n[Stopped - No Text]")
}
} }
private fun processAudioLoop() { private fun processAudioLoop() {
val sampleRate = 16000 val sampleRate = 16000
val bufferSize = AudioRecord.getMinBufferSize(sampleRate, AudioFormat.CHANNEL_IN_MONO, AudioFormat.ENCODING_PCM_16BIT) val bufferSize = AudioRecord.getMinBufferSize(sampleRate, AudioFormat.CHANNEL_IN_MONO, AudioFormat.ENCODING_PCM_16BIT)
// Guard clauses // 1. GUARD CLAUSE (Safely unwrap nullables)
val localRec = recognizer ?: return val localRec = recognizer ?: return
val localStream = stream ?: return val localStream = stream ?: return
@@ -213,44 +218,24 @@ class TestModelActivity : AppCompatActivity() {
if (ret > 0) { if (ret > 0) {
val samples = FloatArray(ret) { buffer[it] / 32768.0f } val samples = FloatArray(ret) { buffer[it] / 32768.0f }
// 2. Feed Audio
localStream.acceptWaveform(samples, sampleRate) localStream.acceptWaveform(samples, sampleRate)
while (localRec.isReady(localStream)) { // 3. Decode (No isReady check needed for Offline)
localRec.decode(localStream) localRec.decode(localStream)
}
// 4. Get Current Text
// Whisper updates this string constantly as it hears more
val text = localRec.getResult(localStream).text val text = localRec.getResult(localStream).text
val isEndpoint = localRec.isEndpoint(localStream)
if (text.isNotEmpty()) { if (text.isNotEmpty()) {
val cleanText = text.lowercase() val cleanText = text.lowercase()
if (isEndpoint) { runOnUiThread {
// FIX 2: THE ORDER OF OPERATIONS // Update the screen so user sees what is happening
// We do NOT send to USB yet, because Whisper might change this text
// A. Update UI first // as you keep speaking.
// 1. PUNCTUATE outputText.text = "$committedText $cleanText"
// We pass the raw text to the punctuator
val punctuatedText = punctuator?.addPunctuation(cleanText) ?: cleanText
runOnUiThread {
// 2. Commit the BEAUTIFUL text
committedText += "$punctuatedText "
outputText.text = committedText
sendToPico("$punctuatedText ")
}
// B. RESET IMMEDIATELY ON BACKGROUND THREAD
// We do this HERE, not inside runOnUiThread.
// This guarantees the stream is clean BEFORE the loop
// reads the next chunk of audio.
localRec.reset(localStream)
} else {
// Standard partial update
runOnUiThread {
outputText.text = "$committedText $cleanText"
}
} }
} }
} }