2 Commits

3 changed files with 80 additions and 90 deletions

View File

@@ -12,7 +12,7 @@ android {
minSdk = 28 minSdk = 28
targetSdk = 36 targetSdk = 36
versionCode = 10 versionCode = 10
versionName = "1.1" versionName = "1.0"
testInstrumentationRunner = "androidx.test.runner.AndroidJUnitRunner" testInstrumentationRunner = "androidx.test.runner.AndroidJUnitRunner"
} }
@@ -25,6 +25,11 @@ android {
"proguard-rules.pro" "proguard-rules.pro"
) )
} }
debug {
applicationIdSuffix = ".streaming"
// This changes the app name on your homescreen to "MyApp (Dev)"
resValue("string", "app_name", "Speech To Keyboard (Streaming)")
}
} }
compileOptions { compileOptions {
sourceCompatibility = JavaVersion.VERSION_11 sourceCompatibility = JavaVersion.VERSION_11

View File

@@ -89,7 +89,7 @@ class MainActivity : AppCompatActivity() {
var entry = zipInputStream.nextEntry var entry = zipInputStream.nextEntry
var foundEncoder = false var foundEncoder = false
var foundDecoder = false var foundDecoder = false
// var foundJoiner = false - removed for true Whisper model use var foundJoiner = false
var foundTokens = false var foundTokens = false
while (entry != null) { while (entry != null) {
@@ -100,7 +100,7 @@ class MainActivity : AppCompatActivity() {
val targetFileName = when { val targetFileName = when {
name.contains("encoder") && name.endsWith(".onnx") -> "encoder.onnx" name.contains("encoder") && name.endsWith(".onnx") -> "encoder.onnx"
name.contains("decoder") && name.endsWith(".onnx") -> "decoder.onnx" name.contains("decoder") && name.endsWith(".onnx") -> "decoder.onnx"
// name.contains("joiner") && name.endsWith(".onnx") -> "joiner.onnx" - removed for true Whisper model use name.contains("joiner") && name.endsWith(".onnx") -> "joiner.onnx"
name.contains("tokens.txt") -> "tokens.txt" name.contains("tokens.txt") -> "tokens.txt"
else -> null else -> null
} }
@@ -115,7 +115,7 @@ class MainActivity : AppCompatActivity() {
when (targetFileName) { when (targetFileName) {
"encoder.onnx" -> foundEncoder = true "encoder.onnx" -> foundEncoder = true
"decoder.onnx" -> foundDecoder = true "decoder.onnx" -> foundDecoder = true
// "joiner.onnx" -> foundJoiner = true = re,moved for true Whisper model use "joiner.onnx" -> foundJoiner = true
"tokens.txt" -> foundTokens = true "tokens.txt" -> foundTokens = true
} }
} }
@@ -124,8 +124,7 @@ class MainActivity : AppCompatActivity() {
} }
runOnUiThread { runOnUiThread {
// if (foundEncoder && foundDecoder && foundJoiner && foundTokens) { - removed for true Whisper model use if (foundEncoder && foundDecoder && foundJoiner && foundTokens) {
if (foundEncoder && foundDecoder && foundTokens) {
statusText.text = "Model Installed Successfully!" statusText.text = "Model Installed Successfully!"
Toast.makeText(this, "Ready to use!", Toast.LENGTH_SHORT).show() Toast.makeText(this, "Ready to use!", Toast.LENGTH_SHORT).show()
} else { } else {

View File

@@ -20,23 +20,11 @@ import com.hoho.android.usbserial.driver.UsbSerialProber
import com.hoho.android.usbserial.util.SerialInputOutputManager import com.hoho.android.usbserial.util.SerialInputOutputManager
import com.k2fsa.sherpa.onnx.EndpointConfig import com.k2fsa.sherpa.onnx.EndpointConfig
import com.k2fsa.sherpa.onnx.EndpointRule import com.k2fsa.sherpa.onnx.EndpointRule
/*
import com.k2fsa.sherpa.onnx.FeatureConfig import com.k2fsa.sherpa.onnx.FeatureConfig
import com.k2fsa.sherpa.onnx.OnlineModelConfig
import com.k2fsa.sherpa.onnx.OnlineRecognizer import com.k2fsa.sherpa.onnx.OnlineRecognizer
import com.k2fsa.sherpa.onnx.OnlineRecognizerConfig import com.k2fsa.sherpa.onnx.OnlineRecognizerConfig
import com.k2fsa.sherpa.onnx.OnlineTransducerModelConfig import com.k2fsa.sherpa.onnx.OnlineTransducerModelConfig
import com.k2fsa.sherpa.onnx.OnlineStream import com.k2fsa.sherpa.onnx.OnlineStream
*/
// Below for the "offline" libraries and the true Whisper integration
import com.k2fsa.sherpa.onnx.OfflineRecognizer
import com.k2fsa.sherpa.onnx.OfflineStream
import com.k2fsa.sherpa.onnx.OfflineRecognizerConfig
import com.k2fsa.sherpa.onnx.OfflineModelConfig
import com.k2fsa.sherpa.onnx.OfflineWhisperModelConfig
import com.k2fsa.sherpa.onnx.FeatureConfig
import java.io.File import java.io.File
class TestModelActivity : AppCompatActivity() { class TestModelActivity : AppCompatActivity() {
@@ -46,10 +34,8 @@ class TestModelActivity : AppCompatActivity() {
private lateinit var micButton: ImageButton private lateinit var micButton: ImageButton
// Sherpa (Whisper) Components // Sherpa (Whisper) Components
// private var recognizer: OnlineRecognizer? = null // - Removed for true Whisper model usa private var recognizer: OnlineRecognizer? = null
// private var stream: OnlineStream? = null // - Removed for true Whisper model usa private var stream: OnlineStream? = null
private var recognizer: OfflineRecognizer? = null // Was OnlineRecognizer
private var stream: OfflineStream? = null // Was OnlineStream
private var isRecording = false private var isRecording = false
private var recordingThread: Thread? = null private var recordingThread: Thread? = null
@@ -90,37 +76,46 @@ class TestModelActivity : AppCompatActivity() {
return return
} }
// 1. Point to your files
val encoderPath = File(modelDir, "encoder.onnx").absolutePath
val decoderPath = File(modelDir, "decoder.onnx").absolutePath
val tokensPath = File(modelDir, "tokens.txt").absolutePath
try { try {
// CONFIGURATION FOR WHISPER (OFFLINE) // 1. Define Model Paths
val config = OfflineRecognizerConfig( val transducerConfig = OnlineTransducerModelConfig(
featConfig = FeatureConfig(sampleRate = 16000, featureDim = 80), encoder = File(modelDir, "encoder.onnx").absolutePath,
modelConfig = OfflineModelConfig( decoder = File(modelDir, "decoder.onnx").absolutePath,
// This parameter 'whisper' exists here! joiner = File(modelDir, "joiner.onnx").absolutePath
whisper = OfflineWhisperModelConfig( )
encoder = encoderPath,
decoder = decoderPath, // 2. Define General Config
// tokenizer is not strictly needed in config here if passed in tokens param below val onlineModelConfig = com.k2fsa.sherpa.onnx.OnlineModelConfig(
// but usually standard offline config uses just these two: transducer = transducerConfig,
), tokens = File(modelDir, "tokens.txt").absolutePath,
tokens = tokensPath, numThreads = 1,
modelType = "whisper",
debug = false, debug = false,
numThreads = 1 modelType = "zipformer"
), )
// 3. Define Endpoint Rule (The fix for your error)
// rule1 = detected silence after speech. We set this to 2.4 seconds.
val silenceRule = EndpointRule(
mustContainNonSilence = false,
minTrailingSilence = 2.4f,
minUtteranceLength = 0.0f
)
// 4. Create Recognizer Config
val config = OnlineRecognizerConfig(
featConfig = FeatureConfig(sampleRate = 16000, featureDim = 80),
modelConfig = onlineModelConfig,
endpointConfig = EndpointConfig(rule1 = silenceRule), // Pass the rule object here
enableEndpoint = true,
decodingMethod = "greedy_search", decodingMethod = "greedy_search",
maxActivePaths = 4 maxActivePaths = 4
) )
// Initialize OFFLINE Engine // recognizer = OnlineRecognizer(assetManager = assets, config = config)
recognizer = OfflineRecognizer(config = config) recognizer = OnlineRecognizer(config = config)
stream = recognizer?.createStream() stream = recognizer?.createStream()
outputText.text = "Whisper Engine Ready." outputText.text = "Engine Loaded. Ready to Stream."
} catch (e: Exception) { } catch (e: Exception) {
Log.e("Sherpa", "Init Error", e) Log.e("Sherpa", "Init Error", e)
@@ -145,9 +140,12 @@ class TestModelActivity : AppCompatActivity() {
return return
} }
// Reset the stream for a new session // FIX 1: CLEAR THE BUFFER
// Note: Sherpa streams can be persistent, but resetting ensures clean start // This prevents the "ghost text" from the previous session appearing
// If you want continuous conversation, don't reset 'committedText' // when you hit record again.
stream?.let { activeStream ->
recognizer?.reset(activeStream)
}
isRecording = true isRecording = true
micButton.setColorFilter(android.graphics.Color.RED) micButton.setColorFilter(android.graphics.Color.RED)
@@ -161,46 +159,19 @@ class TestModelActivity : AppCompatActivity() {
private fun stopRecording() { private fun stopRecording() {
isRecording = false isRecording = false
try { recordingThread?.join()
recordingThread?.join() // Wait for loop to finish
} catch (e: InterruptedException) {
// Handle interruption if necessary
}
micButton.clearColorFilter() micButton.clearColorFilter()
// FIX: Safely unwrap 'stream' before passing it to getResult // Just show what we have, don't overwrite with "[Stopped]"
// This reads: "If stream is NOT null, call getResult. Otherwise return empty string." // to prevent visual jarring.
val finalCurrentText = stream?.let { activeStream -> outputText.append("\n[Stopped]")
recognizer?.getResult(activeStream)?.text
} ?: ""
val cleanFinal = finalCurrentText.lowercase()
if (cleanFinal.isNotEmpty()) {
// 1. Commit to history
committedText += "$cleanFinal "
// 2. Send to Pico
sendToPico("$cleanFinal ")
// 3. Update UI
outputText.text = "$committedText \n[Stopped]"
// 4. Reset for next time
// We release the old stream and create a fresh one for the next sentence
stream?.release()
stream = recognizer?.createStream()
} else {
outputText.append("\n[Stopped - No Text]")
}
} }
private fun processAudioLoop() { private fun processAudioLoop() {
val sampleRate = 16000 val sampleRate = 16000
val bufferSize = AudioRecord.getMinBufferSize(sampleRate, AudioFormat.CHANNEL_IN_MONO, AudioFormat.ENCODING_PCM_16BIT) val bufferSize = AudioRecord.getMinBufferSize(sampleRate, AudioFormat.CHANNEL_IN_MONO, AudioFormat.ENCODING_PCM_16BIT)
// 1. GUARD CLAUSE (Safely unwrap nullables) // Guard clauses
val localRec = recognizer ?: return val localRec = recognizer ?: return
val localStream = stream ?: return val localStream = stream ?: return
@@ -218,28 +189,43 @@ class TestModelActivity : AppCompatActivity() {
if (ret > 0) { if (ret > 0) {
val samples = FloatArray(ret) { buffer[it] / 32768.0f } val samples = FloatArray(ret) { buffer[it] / 32768.0f }
// 2. Feed Audio
localStream.acceptWaveform(samples, sampleRate) localStream.acceptWaveform(samples, sampleRate)
// 3. Decode (No isReady check needed for Offline) while (localRec.isReady(localStream)) {
localRec.decode(localStream) localRec.decode(localStream)
}
// 4. Get Current Text
// Whisper updates this string constantly as it hears more
val text = localRec.getResult(localStream).text val text = localRec.getResult(localStream).text
val isEndpoint = localRec.isEndpoint(localStream)
if (text.isNotEmpty()) { if (text.isNotEmpty()) {
val cleanText = text.lowercase() val cleanText = text.lowercase()
if (isEndpoint) {
// FIX 2: THE ORDER OF OPERATIONS
// A. Update UI first
runOnUiThread {
committedText += "$cleanText "
outputText.text = committedText
sendToPico("$cleanText ")
}
// B. RESET IMMEDIATELY ON BACKGROUND THREAD
// We do this HERE, not inside runOnUiThread.
// This guarantees the stream is clean BEFORE the loop
// reads the next chunk of audio.
localRec.reset(localStream)
} else {
// Standard partial update
runOnUiThread { runOnUiThread {
// Update the screen so user sees what is happening
// We do NOT send to USB yet, because Whisper might change this text
// as you keep speaking.
outputText.text = "$committedText $cleanText" outputText.text = "$committedText $cleanText"
} }
} }
} }
} }
}
record.stop() record.stop()
record.release() record.release()
} }