4 Commits

5 changed files with 113 additions and 92 deletions

View File

@@ -4,10 +4,10 @@
<selectionStates> <selectionStates>
<SelectionState runConfigName="app"> <SelectionState runConfigName="app">
<option name="selectionMode" value="DROPDOWN" /> <option name="selectionMode" value="DROPDOWN" />
<DropdownSelection timestamp="2026-01-22T04:36:45.393638454Z"> <DropdownSelection timestamp="2026-01-23T01:29:57.710335816Z">
<Target type="DEFAULT_BOOT"> <Target type="DEFAULT_BOOT">
<handle> <handle>
<DeviceId pluginId="LocalEmulator" identifier="path=/home/michael/.android/avd/Pixel_5_API_31_Android_12_.avd" /> <DeviceId pluginId="PhysicalDevice" identifier="serial=DKTAB13NEU0019483" />
</handle> </handle>
</Target> </Target>
</DropdownSelection> </DropdownSelection>

View File

@@ -11,7 +11,7 @@ android {
applicationId = "net.mmanningau.speechtokeyboard" applicationId = "net.mmanningau.speechtokeyboard"
minSdk = 28 minSdk = 28
targetSdk = 36 targetSdk = 36
versionCode = 10 versionCode = 12
versionName = "1.1" versionName = "1.1"
testInstrumentationRunner = "androidx.test.runner.AndroidJUnitRunner" testInstrumentationRunner = "androidx.test.runner.AndroidJUnitRunner"
@@ -25,6 +25,11 @@ android {
"proguard-rules.pro" "proguard-rules.pro"
) )
} }
debug {
applicationIdSuffix = ".streaming"
// This changes the app name on your homescreen to "MyApp (Dev)"
resValue("string", "app_name", "Speech To Keyboard (Streaming)")
}
} }
compileOptions { compileOptions {
sourceCompatibility = JavaVersion.VERSION_11 sourceCompatibility = JavaVersion.VERSION_11

View File

@@ -37,6 +37,8 @@
<activity <activity
android:name=".TestModelActivity" android:name=".TestModelActivity"
android:parentActivityName=".MainActivity" android:parentActivityName=".MainActivity"
android:exported="false"
android:configChanges="orientation|screenSize|screenLayout|keyboardHidden"
android:label="Test Microphone" /> android:label="Test Microphone" />
</application> </application>

View File

@@ -89,7 +89,7 @@ class MainActivity : AppCompatActivity() {
var entry = zipInputStream.nextEntry var entry = zipInputStream.nextEntry
var foundEncoder = false var foundEncoder = false
var foundDecoder = false var foundDecoder = false
// var foundJoiner = false - removed for true Whisper model use var foundJoiner = false
var foundTokens = false var foundTokens = false
while (entry != null) { while (entry != null) {
@@ -100,7 +100,7 @@ class MainActivity : AppCompatActivity() {
val targetFileName = when { val targetFileName = when {
name.contains("encoder") && name.endsWith(".onnx") -> "encoder.onnx" name.contains("encoder") && name.endsWith(".onnx") -> "encoder.onnx"
name.contains("decoder") && name.endsWith(".onnx") -> "decoder.onnx" name.contains("decoder") && name.endsWith(".onnx") -> "decoder.onnx"
// name.contains("joiner") && name.endsWith(".onnx") -> "joiner.onnx" - removed for true Whisper model use name.contains("joiner") && name.endsWith(".onnx") -> "joiner.onnx"
name.contains("tokens.txt") -> "tokens.txt" name.contains("tokens.txt") -> "tokens.txt"
else -> null else -> null
} }
@@ -115,7 +115,7 @@ class MainActivity : AppCompatActivity() {
when (targetFileName) { when (targetFileName) {
"encoder.onnx" -> foundEncoder = true "encoder.onnx" -> foundEncoder = true
"decoder.onnx" -> foundDecoder = true "decoder.onnx" -> foundDecoder = true
// "joiner.onnx" -> foundJoiner = true = re,moved for true Whisper model use "joiner.onnx" -> foundJoiner = true
"tokens.txt" -> foundTokens = true "tokens.txt" -> foundTokens = true
} }
} }
@@ -124,8 +124,7 @@ class MainActivity : AppCompatActivity() {
} }
runOnUiThread { runOnUiThread {
// if (foundEncoder && foundDecoder && foundJoiner && foundTokens) { - removed for true Whisper model use if (foundEncoder && foundDecoder && foundJoiner && foundTokens) {
if (foundEncoder && foundDecoder && foundTokens) {
statusText.text = "Model Installed Successfully!" statusText.text = "Model Installed Successfully!"
Toast.makeText(this, "Ready to use!", Toast.LENGTH_SHORT).show() Toast.makeText(this, "Ready to use!", Toast.LENGTH_SHORT).show()
} else { } else {

View File

@@ -20,25 +20,17 @@ import com.hoho.android.usbserial.driver.UsbSerialProber
import com.hoho.android.usbserial.util.SerialInputOutputManager import com.hoho.android.usbserial.util.SerialInputOutputManager
import com.k2fsa.sherpa.onnx.EndpointConfig import com.k2fsa.sherpa.onnx.EndpointConfig
import com.k2fsa.sherpa.onnx.EndpointRule import com.k2fsa.sherpa.onnx.EndpointRule
/*
import com.k2fsa.sherpa.onnx.FeatureConfig import com.k2fsa.sherpa.onnx.FeatureConfig
import com.k2fsa.sherpa.onnx.OnlineModelConfig
import com.k2fsa.sherpa.onnx.OnlineRecognizer import com.k2fsa.sherpa.onnx.OnlineRecognizer
import com.k2fsa.sherpa.onnx.OnlineRecognizerConfig import com.k2fsa.sherpa.onnx.OnlineRecognizerConfig
import com.k2fsa.sherpa.onnx.OnlineTransducerModelConfig import com.k2fsa.sherpa.onnx.OnlineTransducerModelConfig
import com.k2fsa.sherpa.onnx.OnlineStream import com.k2fsa.sherpa.onnx.OnlineStream
*/
// Below for the "offline" libraries and the true Whisper integration
import com.k2fsa.sherpa.onnx.OfflineRecognizer
import com.k2fsa.sherpa.onnx.OfflineStream
import com.k2fsa.sherpa.onnx.OfflineRecognizerConfig
import com.k2fsa.sherpa.onnx.OfflineModelConfig
import com.k2fsa.sherpa.onnx.OfflineWhisperModelConfig
import com.k2fsa.sherpa.onnx.FeatureConfig
import java.io.File import java.io.File
import com.k2fsa.sherpa.onnx.OfflinePunctuation
import com.k2fsa.sherpa.onnx.OfflinePunctuationConfig
import com.k2fsa.sherpa.onnx.OfflinePunctuationModelConfig
class TestModelActivity : AppCompatActivity() { class TestModelActivity : AppCompatActivity() {
// UI Components // UI Components
@@ -46,13 +38,14 @@ class TestModelActivity : AppCompatActivity() {
private lateinit var micButton: ImageButton private lateinit var micButton: ImageButton
// Sherpa (Whisper) Components // Sherpa (Whisper) Components
// private var recognizer: OnlineRecognizer? = null // - Removed for true Whisper model usa private var recognizer: OnlineRecognizer? = null
// private var stream: OnlineStream? = null // - Removed for true Whisper model usa private var stream: OnlineStream? = null
private var recognizer: OfflineRecognizer? = null // Was OnlineRecognizer
private var stream: OfflineStream? = null // Was OnlineStream
private var isRecording = false private var isRecording = false
private var recordingThread: Thread? = null private var recordingThread: Thread? = null
// Punctuation variables
private var punctuator: OfflinePunctuation? = null
// USB Components // USB Components
private var usbPort: UsbSerialPort? = null private var usbPort: UsbSerialPort? = null
@@ -90,37 +83,63 @@ class TestModelActivity : AppCompatActivity() {
return return
} }
// 1. Point to your files
val encoderPath = File(modelDir, "encoder.onnx").absolutePath
val decoderPath = File(modelDir, "decoder.onnx").absolutePath
val tokensPath = File(modelDir, "tokens.txt").absolutePath
try { try {
// CONFIGURATION FOR WHISPER (OFFLINE) // 1. Define Model Paths
val config = OfflineRecognizerConfig( val transducerConfig = OnlineTransducerModelConfig(
encoder = File(modelDir, "encoder.onnx").absolutePath,
decoder = File(modelDir, "decoder.onnx").absolutePath,
joiner = File(modelDir, "joiner.onnx").absolutePath
)
// 2. Define General Config
val onlineModelConfig = com.k2fsa.sherpa.onnx.OnlineModelConfig(
transducer = transducerConfig,
tokens = File(modelDir, "tokens.txt").absolutePath,
numThreads = 1,
debug = false,
modelType = "zipformer"
)
// 3. Define Endpoint Rule (The fix for your error)
// rule1 = detected silence after speech. We set this to 2.4 seconds.
val silenceRule = EndpointRule(
mustContainNonSilence = false,
minTrailingSilence = 2.4f,
minUtteranceLength = 0.0f
)
// 4. Create Recognizer Config
val config = OnlineRecognizerConfig(
featConfig = FeatureConfig(sampleRate = 16000, featureDim = 80), featConfig = FeatureConfig(sampleRate = 16000, featureDim = 80),
modelConfig = OfflineModelConfig( modelConfig = onlineModelConfig,
// This parameter 'whisper' exists here! endpointConfig = EndpointConfig(rule1 = silenceRule), // Pass the rule object here
whisper = OfflineWhisperModelConfig( enableEndpoint = true,
encoder = encoderPath,
decoder = decoderPath,
// tokenizer is not strictly needed in config here if passed in tokens param below
// but usually standard offline config uses just these two:
),
tokens = tokensPath,
modelType = "whisper",
debug = false,
numThreads = 1
),
decodingMethod = "greedy_search", decodingMethod = "greedy_search",
maxActivePaths = 4 maxActivePaths = 4
) )
// Initialize OFFLINE Engine // recognizer = OnlineRecognizer(assetManager = assets, config = config)
recognizer = OfflineRecognizer(config = config) recognizer = OnlineRecognizer(config = config)
stream = recognizer?.createStream() stream = recognizer?.createStream()
outputText.text = "Whisper Engine Ready." outputText.text = "Engine Loaded. Ready to Stream."
// ... existing recognizer init code ...
// 5. Initialize Punctuation Engine
val punctPath = File(modelDir, "punct_model.onnx").absolutePath
if (File(punctPath).exists()) {
// CORRECTED: Wrap the path inside 'OfflinePunctuationModelConfig'
val punctConfig = OfflinePunctuationConfig(
model = OfflinePunctuationModelConfig(ctTransformer = punctPath)
)
punctuator = OfflinePunctuation(config = punctConfig)
outputText.append("\n+ Punctuation Ready")
} else {
outputText.append("\n(No Punctuation model found)")
}
} catch (e: Exception) { } catch (e: Exception) {
Log.e("Sherpa", "Init Error", e) Log.e("Sherpa", "Init Error", e)
@@ -145,9 +164,12 @@ class TestModelActivity : AppCompatActivity() {
return return
} }
// Reset the stream for a new session // FIX 1: CLEAR THE BUFFER
// Note: Sherpa streams can be persistent, but resetting ensures clean start // This prevents the "ghost text" from the previous session appearing
// If you want continuous conversation, don't reset 'committedText' // when you hit record again.
stream?.let { activeStream ->
recognizer?.reset(activeStream)
}
isRecording = true isRecording = true
micButton.setColorFilter(android.graphics.Color.RED) micButton.setColorFilter(android.graphics.Color.RED)
@@ -161,46 +183,19 @@ class TestModelActivity : AppCompatActivity() {
private fun stopRecording() { private fun stopRecording() {
isRecording = false isRecording = false
try { recordingThread?.join()
recordingThread?.join() // Wait for loop to finish
} catch (e: InterruptedException) {
// Handle interruption if necessary
}
micButton.clearColorFilter() micButton.clearColorFilter()
// FIX: Safely unwrap 'stream' before passing it to getResult // Just show what we have, don't overwrite with "[Stopped]"
// This reads: "If stream is NOT null, call getResult. Otherwise return empty string." // to prevent visual jarring.
val finalCurrentText = stream?.let { activeStream -> outputText.append("\n[Stopped]")
recognizer?.getResult(activeStream)?.text
} ?: ""
val cleanFinal = finalCurrentText.lowercase()
if (cleanFinal.isNotEmpty()) {
// 1. Commit to history
committedText += "$cleanFinal "
// 2. Send to Pico
sendToPico("$cleanFinal ")
// 3. Update UI
outputText.text = "$committedText \n[Stopped]"
// 4. Reset for next time
// We release the old stream and create a fresh one for the next sentence
stream?.release()
stream = recognizer?.createStream()
} else {
outputText.append("\n[Stopped - No Text]")
}
} }
private fun processAudioLoop() { private fun processAudioLoop() {
val sampleRate = 16000 val sampleRate = 16000
val bufferSize = AudioRecord.getMinBufferSize(sampleRate, AudioFormat.CHANNEL_IN_MONO, AudioFormat.ENCODING_PCM_16BIT) val bufferSize = AudioRecord.getMinBufferSize(sampleRate, AudioFormat.CHANNEL_IN_MONO, AudioFormat.ENCODING_PCM_16BIT)
// 1. GUARD CLAUSE (Safely unwrap nullables) // Guard clauses
val localRec = recognizer ?: return val localRec = recognizer ?: return
val localStream = stream ?: return val localStream = stream ?: return
@@ -218,24 +213,44 @@ class TestModelActivity : AppCompatActivity() {
if (ret > 0) { if (ret > 0) {
val samples = FloatArray(ret) { buffer[it] / 32768.0f } val samples = FloatArray(ret) { buffer[it] / 32768.0f }
// 2. Feed Audio
localStream.acceptWaveform(samples, sampleRate) localStream.acceptWaveform(samples, sampleRate)
// 3. Decode (No isReady check needed for Offline) while (localRec.isReady(localStream)) {
localRec.decode(localStream) localRec.decode(localStream)
}
// 4. Get Current Text
// Whisper updates this string constantly as it hears more
val text = localRec.getResult(localStream).text val text = localRec.getResult(localStream).text
val isEndpoint = localRec.isEndpoint(localStream)
if (text.isNotEmpty()) { if (text.isNotEmpty()) {
val cleanText = text.lowercase() val cleanText = text.lowercase()
runOnUiThread { if (isEndpoint) {
// Update the screen so user sees what is happening // FIX 2: THE ORDER OF OPERATIONS
// We do NOT send to USB yet, because Whisper might change this text
// as you keep speaking. // A. Update UI first
outputText.text = "$committedText $cleanText" // 1. PUNCTUATE
// We pass the raw text to the punctuator
val punctuatedText = punctuator?.addPunctuation(cleanText) ?: cleanText
runOnUiThread {
// 2. Commit the BEAUTIFUL text
committedText += "$punctuatedText "
outputText.text = committedText
sendToPico("$punctuatedText ")
}
// B. RESET IMMEDIATELY ON BACKGROUND THREAD
// We do this HERE, not inside runOnUiThread.
// This guarantees the stream is clean BEFORE the loop
// reads the next chunk of audio.
localRec.reset(localStream)
} else {
// Standard partial update
runOnUiThread {
outputText.text = "$committedText $cleanText"
}
} }
} }
} }