Compare commits
4 Commits
master
...
2a8f004916
| Author | SHA1 | Date | |
|---|---|---|---|
| 2a8f004916 | |||
| ac7d51b46e | |||
| f17c6ab84e | |||
| cce093db4e |
4
.idea/deploymentTargetSelector.xml
generated
4
.idea/deploymentTargetSelector.xml
generated
@@ -4,10 +4,10 @@
|
||||
<selectionStates>
|
||||
<SelectionState runConfigName="app">
|
||||
<option name="selectionMode" value="DROPDOWN" />
|
||||
<DropdownSelection timestamp="2026-01-22T04:36:45.393638454Z">
|
||||
<DropdownSelection timestamp="2026-01-23T01:29:57.710335816Z">
|
||||
<Target type="DEFAULT_BOOT">
|
||||
<handle>
|
||||
<DeviceId pluginId="LocalEmulator" identifier="path=/home/michael/.android/avd/Pixel_5_API_31_Android_12_.avd" />
|
||||
<DeviceId pluginId="PhysicalDevice" identifier="serial=DKTAB13NEU0019483" />
|
||||
</handle>
|
||||
</Target>
|
||||
</DropdownSelection>
|
||||
|
||||
@@ -11,7 +11,7 @@ android {
|
||||
applicationId = "net.mmanningau.speechtokeyboard"
|
||||
minSdk = 28
|
||||
targetSdk = 36
|
||||
versionCode = 10
|
||||
versionCode = 12
|
||||
versionName = "1.1"
|
||||
|
||||
testInstrumentationRunner = "androidx.test.runner.AndroidJUnitRunner"
|
||||
@@ -25,6 +25,11 @@ android {
|
||||
"proguard-rules.pro"
|
||||
)
|
||||
}
|
||||
debug {
|
||||
applicationIdSuffix = ".streaming"
|
||||
// This changes the app name on your homescreen to "MyApp (Dev)"
|
||||
resValue("string", "app_name", "Speech To Keyboard (Streaming)")
|
||||
}
|
||||
}
|
||||
compileOptions {
|
||||
sourceCompatibility = JavaVersion.VERSION_11
|
||||
|
||||
@@ -37,6 +37,8 @@
|
||||
<activity
|
||||
android:name=".TestModelActivity"
|
||||
android:parentActivityName=".MainActivity"
|
||||
android:exported="false"
|
||||
android:configChanges="orientation|screenSize|screenLayout|keyboardHidden"
|
||||
android:label="Test Microphone" />
|
||||
|
||||
</application>
|
||||
|
||||
@@ -89,7 +89,7 @@ class MainActivity : AppCompatActivity() {
|
||||
var entry = zipInputStream.nextEntry
|
||||
var foundEncoder = false
|
||||
var foundDecoder = false
|
||||
// var foundJoiner = false - removed for true Whisper model use
|
||||
var foundJoiner = false
|
||||
var foundTokens = false
|
||||
|
||||
while (entry != null) {
|
||||
@@ -100,7 +100,7 @@ class MainActivity : AppCompatActivity() {
|
||||
val targetFileName = when {
|
||||
name.contains("encoder") && name.endsWith(".onnx") -> "encoder.onnx"
|
||||
name.contains("decoder") && name.endsWith(".onnx") -> "decoder.onnx"
|
||||
// name.contains("joiner") && name.endsWith(".onnx") -> "joiner.onnx" - removed for true Whisper model use
|
||||
name.contains("joiner") && name.endsWith(".onnx") -> "joiner.onnx"
|
||||
name.contains("tokens.txt") -> "tokens.txt"
|
||||
else -> null
|
||||
}
|
||||
@@ -115,7 +115,7 @@ class MainActivity : AppCompatActivity() {
|
||||
when (targetFileName) {
|
||||
"encoder.onnx" -> foundEncoder = true
|
||||
"decoder.onnx" -> foundDecoder = true
|
||||
// "joiner.onnx" -> foundJoiner = true = re,moved for true Whisper model use
|
||||
"joiner.onnx" -> foundJoiner = true
|
||||
"tokens.txt" -> foundTokens = true
|
||||
}
|
||||
}
|
||||
@@ -124,8 +124,7 @@ class MainActivity : AppCompatActivity() {
|
||||
}
|
||||
|
||||
runOnUiThread {
|
||||
// if (foundEncoder && foundDecoder && foundJoiner && foundTokens) { - removed for true Whisper model use
|
||||
if (foundEncoder && foundDecoder && foundTokens) {
|
||||
if (foundEncoder && foundDecoder && foundJoiner && foundTokens) {
|
||||
statusText.text = "Model Installed Successfully!"
|
||||
Toast.makeText(this, "Ready to use!", Toast.LENGTH_SHORT).show()
|
||||
} else {
|
||||
|
||||
@@ -20,25 +20,17 @@ import com.hoho.android.usbserial.driver.UsbSerialProber
|
||||
import com.hoho.android.usbserial.util.SerialInputOutputManager
|
||||
import com.k2fsa.sherpa.onnx.EndpointConfig
|
||||
import com.k2fsa.sherpa.onnx.EndpointRule
|
||||
/*
|
||||
import com.k2fsa.sherpa.onnx.FeatureConfig
|
||||
import com.k2fsa.sherpa.onnx.OnlineModelConfig
|
||||
import com.k2fsa.sherpa.onnx.OnlineRecognizer
|
||||
import com.k2fsa.sherpa.onnx.OnlineRecognizerConfig
|
||||
import com.k2fsa.sherpa.onnx.OnlineTransducerModelConfig
|
||||
import com.k2fsa.sherpa.onnx.OnlineStream
|
||||
|
||||
*/
|
||||
// Below for the "offline" libraries and the true Whisper integration
|
||||
import com.k2fsa.sherpa.onnx.OfflineRecognizer
|
||||
import com.k2fsa.sherpa.onnx.OfflineStream
|
||||
import com.k2fsa.sherpa.onnx.OfflineRecognizerConfig
|
||||
import com.k2fsa.sherpa.onnx.OfflineModelConfig
|
||||
import com.k2fsa.sherpa.onnx.OfflineWhisperModelConfig
|
||||
import com.k2fsa.sherpa.onnx.FeatureConfig
|
||||
|
||||
import java.io.File
|
||||
|
||||
import com.k2fsa.sherpa.onnx.OfflinePunctuation
|
||||
import com.k2fsa.sherpa.onnx.OfflinePunctuationConfig
|
||||
import com.k2fsa.sherpa.onnx.OfflinePunctuationModelConfig
|
||||
|
||||
class TestModelActivity : AppCompatActivity() {
|
||||
|
||||
// UI Components
|
||||
@@ -46,13 +38,14 @@ class TestModelActivity : AppCompatActivity() {
|
||||
private lateinit var micButton: ImageButton
|
||||
|
||||
// Sherpa (Whisper) Components
|
||||
// private var recognizer: OnlineRecognizer? = null // - Removed for true Whisper model usa
|
||||
// private var stream: OnlineStream? = null // - Removed for true Whisper model usa
|
||||
private var recognizer: OfflineRecognizer? = null // Was OnlineRecognizer
|
||||
private var stream: OfflineStream? = null // Was OnlineStream
|
||||
private var recognizer: OnlineRecognizer? = null
|
||||
private var stream: OnlineStream? = null
|
||||
private var isRecording = false
|
||||
private var recordingThread: Thread? = null
|
||||
|
||||
// Punctuation variables
|
||||
private var punctuator: OfflinePunctuation? = null
|
||||
|
||||
// USB Components
|
||||
private var usbPort: UsbSerialPort? = null
|
||||
|
||||
@@ -90,37 +83,63 @@ class TestModelActivity : AppCompatActivity() {
|
||||
return
|
||||
}
|
||||
|
||||
// 1. Point to your files
|
||||
val encoderPath = File(modelDir, "encoder.onnx").absolutePath
|
||||
val decoderPath = File(modelDir, "decoder.onnx").absolutePath
|
||||
val tokensPath = File(modelDir, "tokens.txt").absolutePath
|
||||
|
||||
try {
|
||||
// CONFIGURATION FOR WHISPER (OFFLINE)
|
||||
val config = OfflineRecognizerConfig(
|
||||
featConfig = FeatureConfig(sampleRate = 16000, featureDim = 80),
|
||||
modelConfig = OfflineModelConfig(
|
||||
// This parameter 'whisper' exists here!
|
||||
whisper = OfflineWhisperModelConfig(
|
||||
encoder = encoderPath,
|
||||
decoder = decoderPath,
|
||||
// tokenizer is not strictly needed in config here if passed in tokens param below
|
||||
// but usually standard offline config uses just these two:
|
||||
),
|
||||
tokens = tokensPath,
|
||||
modelType = "whisper",
|
||||
// 1. Define Model Paths
|
||||
val transducerConfig = OnlineTransducerModelConfig(
|
||||
encoder = File(modelDir, "encoder.onnx").absolutePath,
|
||||
decoder = File(modelDir, "decoder.onnx").absolutePath,
|
||||
joiner = File(modelDir, "joiner.onnx").absolutePath
|
||||
)
|
||||
|
||||
// 2. Define General Config
|
||||
val onlineModelConfig = com.k2fsa.sherpa.onnx.OnlineModelConfig(
|
||||
transducer = transducerConfig,
|
||||
tokens = File(modelDir, "tokens.txt").absolutePath,
|
||||
numThreads = 1,
|
||||
debug = false,
|
||||
numThreads = 1
|
||||
),
|
||||
modelType = "zipformer"
|
||||
)
|
||||
|
||||
// 3. Define Endpoint Rule (The fix for your error)
|
||||
// rule1 = detected silence after speech. We set this to 2.4 seconds.
|
||||
val silenceRule = EndpointRule(
|
||||
mustContainNonSilence = false,
|
||||
minTrailingSilence = 2.4f,
|
||||
minUtteranceLength = 0.0f
|
||||
)
|
||||
|
||||
// 4. Create Recognizer Config
|
||||
val config = OnlineRecognizerConfig(
|
||||
featConfig = FeatureConfig(sampleRate = 16000, featureDim = 80),
|
||||
modelConfig = onlineModelConfig,
|
||||
endpointConfig = EndpointConfig(rule1 = silenceRule), // Pass the rule object here
|
||||
enableEndpoint = true,
|
||||
decodingMethod = "greedy_search",
|
||||
maxActivePaths = 4
|
||||
)
|
||||
|
||||
// Initialize OFFLINE Engine
|
||||
recognizer = OfflineRecognizer(config = config)
|
||||
// recognizer = OnlineRecognizer(assetManager = assets, config = config)
|
||||
recognizer = OnlineRecognizer(config = config)
|
||||
stream = recognizer?.createStream()
|
||||
|
||||
outputText.text = "Whisper Engine Ready."
|
||||
outputText.text = "Engine Loaded. Ready to Stream."
|
||||
|
||||
// ... existing recognizer init code ...
|
||||
|
||||
// 5. Initialize Punctuation Engine
|
||||
val punctPath = File(modelDir, "punct_model.onnx").absolutePath
|
||||
|
||||
if (File(punctPath).exists()) {
|
||||
// CORRECTED: Wrap the path inside 'OfflinePunctuationModelConfig'
|
||||
val punctConfig = OfflinePunctuationConfig(
|
||||
model = OfflinePunctuationModelConfig(ctTransformer = punctPath)
|
||||
)
|
||||
|
||||
punctuator = OfflinePunctuation(config = punctConfig)
|
||||
outputText.append("\n+ Punctuation Ready")
|
||||
} else {
|
||||
outputText.append("\n(No Punctuation model found)")
|
||||
}
|
||||
|
||||
} catch (e: Exception) {
|
||||
Log.e("Sherpa", "Init Error", e)
|
||||
@@ -145,9 +164,12 @@ class TestModelActivity : AppCompatActivity() {
|
||||
return
|
||||
}
|
||||
|
||||
// Reset the stream for a new session
|
||||
// Note: Sherpa streams can be persistent, but resetting ensures clean start
|
||||
// If you want continuous conversation, don't reset 'committedText'
|
||||
// FIX 1: CLEAR THE BUFFER
|
||||
// This prevents the "ghost text" from the previous session appearing
|
||||
// when you hit record again.
|
||||
stream?.let { activeStream ->
|
||||
recognizer?.reset(activeStream)
|
||||
}
|
||||
|
||||
isRecording = true
|
||||
micButton.setColorFilter(android.graphics.Color.RED)
|
||||
@@ -161,46 +183,19 @@ class TestModelActivity : AppCompatActivity() {
|
||||
|
||||
private fun stopRecording() {
|
||||
isRecording = false
|
||||
try {
|
||||
recordingThread?.join() // Wait for loop to finish
|
||||
} catch (e: InterruptedException) {
|
||||
// Handle interruption if necessary
|
||||
}
|
||||
|
||||
recordingThread?.join()
|
||||
micButton.clearColorFilter()
|
||||
|
||||
// FIX: Safely unwrap 'stream' before passing it to getResult
|
||||
// This reads: "If stream is NOT null, call getResult. Otherwise return empty string."
|
||||
val finalCurrentText = stream?.let { activeStream ->
|
||||
recognizer?.getResult(activeStream)?.text
|
||||
} ?: ""
|
||||
|
||||
val cleanFinal = finalCurrentText.lowercase()
|
||||
|
||||
if (cleanFinal.isNotEmpty()) {
|
||||
// 1. Commit to history
|
||||
committedText += "$cleanFinal "
|
||||
|
||||
// 2. Send to Pico
|
||||
sendToPico("$cleanFinal ")
|
||||
|
||||
// 3. Update UI
|
||||
outputText.text = "$committedText \n[Stopped]"
|
||||
|
||||
// 4. Reset for next time
|
||||
// We release the old stream and create a fresh one for the next sentence
|
||||
stream?.release()
|
||||
stream = recognizer?.createStream()
|
||||
} else {
|
||||
outputText.append("\n[Stopped - No Text]")
|
||||
}
|
||||
// Just show what we have, don't overwrite with "[Stopped]"
|
||||
// to prevent visual jarring.
|
||||
outputText.append("\n[Stopped]")
|
||||
}
|
||||
|
||||
private fun processAudioLoop() {
|
||||
val sampleRate = 16000
|
||||
val bufferSize = AudioRecord.getMinBufferSize(sampleRate, AudioFormat.CHANNEL_IN_MONO, AudioFormat.ENCODING_PCM_16BIT)
|
||||
|
||||
// 1. GUARD CLAUSE (Safely unwrap nullables)
|
||||
// Guard clauses
|
||||
val localRec = recognizer ?: return
|
||||
val localStream = stream ?: return
|
||||
|
||||
@@ -218,28 +213,48 @@ class TestModelActivity : AppCompatActivity() {
|
||||
if (ret > 0) {
|
||||
val samples = FloatArray(ret) { buffer[it] / 32768.0f }
|
||||
|
||||
// 2. Feed Audio
|
||||
localStream.acceptWaveform(samples, sampleRate)
|
||||
|
||||
// 3. Decode (No isReady check needed for Offline)
|
||||
while (localRec.isReady(localStream)) {
|
||||
localRec.decode(localStream)
|
||||
}
|
||||
|
||||
// 4. Get Current Text
|
||||
// Whisper updates this string constantly as it hears more
|
||||
val text = localRec.getResult(localStream).text
|
||||
val isEndpoint = localRec.isEndpoint(localStream)
|
||||
|
||||
if (text.isNotEmpty()) {
|
||||
val cleanText = text.lowercase()
|
||||
|
||||
if (isEndpoint) {
|
||||
// FIX 2: THE ORDER OF OPERATIONS
|
||||
|
||||
// A. Update UI first
|
||||
// 1. PUNCTUATE
|
||||
// We pass the raw text to the punctuator
|
||||
val punctuatedText = punctuator?.addPunctuation(cleanText) ?: cleanText
|
||||
|
||||
runOnUiThread {
|
||||
// 2. Commit the BEAUTIFUL text
|
||||
committedText += "$punctuatedText "
|
||||
outputText.text = committedText
|
||||
sendToPico("$punctuatedText ")
|
||||
}
|
||||
|
||||
// B. RESET IMMEDIATELY ON BACKGROUND THREAD
|
||||
// We do this HERE, not inside runOnUiThread.
|
||||
// This guarantees the stream is clean BEFORE the loop
|
||||
// reads the next chunk of audio.
|
||||
localRec.reset(localStream)
|
||||
|
||||
} else {
|
||||
// Standard partial update
|
||||
runOnUiThread {
|
||||
// Update the screen so user sees what is happening
|
||||
// We do NOT send to USB yet, because Whisper might change this text
|
||||
// as you keep speaking.
|
||||
outputText.text = "$committedText $cleanText"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
record.stop()
|
||||
record.release()
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user