2 Commits

3 changed files with 80 additions and 90 deletions

View File

@@ -12,7 +12,7 @@ android {
minSdk = 28
targetSdk = 36
versionCode = 10
versionName = "1.1"
versionName = "1.0"
testInstrumentationRunner = "androidx.test.runner.AndroidJUnitRunner"
}
@@ -25,6 +25,11 @@ android {
"proguard-rules.pro"
)
}
debug {
applicationIdSuffix = ".streaming"
// This changes the app name on your homescreen to "MyApp (Dev)"
resValue("string", "app_name", "Speech To Keyboard (Streaming)")
}
}
compileOptions {
sourceCompatibility = JavaVersion.VERSION_11

View File

@@ -89,7 +89,7 @@ class MainActivity : AppCompatActivity() {
var entry = zipInputStream.nextEntry
var foundEncoder = false
var foundDecoder = false
// var foundJoiner = false - removed for true Whisper model use
var foundJoiner = false
var foundTokens = false
while (entry != null) {
@@ -100,7 +100,7 @@ class MainActivity : AppCompatActivity() {
val targetFileName = when {
name.contains("encoder") && name.endsWith(".onnx") -> "encoder.onnx"
name.contains("decoder") && name.endsWith(".onnx") -> "decoder.onnx"
// name.contains("joiner") && name.endsWith(".onnx") -> "joiner.onnx" - removed for true Whisper model use
name.contains("joiner") && name.endsWith(".onnx") -> "joiner.onnx"
name.contains("tokens.txt") -> "tokens.txt"
else -> null
}
@@ -115,7 +115,7 @@ class MainActivity : AppCompatActivity() {
when (targetFileName) {
"encoder.onnx" -> foundEncoder = true
"decoder.onnx" -> foundDecoder = true
// "joiner.onnx" -> foundJoiner = true = re,moved for true Whisper model use
"joiner.onnx" -> foundJoiner = true
"tokens.txt" -> foundTokens = true
}
}
@@ -124,8 +124,7 @@ class MainActivity : AppCompatActivity() {
}
runOnUiThread {
// if (foundEncoder && foundDecoder && foundJoiner && foundTokens) { - removed for true Whisper model use
if (foundEncoder && foundDecoder && foundTokens) {
if (foundEncoder && foundDecoder && foundJoiner && foundTokens) {
statusText.text = "Model Installed Successfully!"
Toast.makeText(this, "Ready to use!", Toast.LENGTH_SHORT).show()
} else {

View File

@@ -20,23 +20,11 @@ import com.hoho.android.usbserial.driver.UsbSerialProber
import com.hoho.android.usbserial.util.SerialInputOutputManager
import com.k2fsa.sherpa.onnx.EndpointConfig
import com.k2fsa.sherpa.onnx.EndpointRule
/*
import com.k2fsa.sherpa.onnx.FeatureConfig
import com.k2fsa.sherpa.onnx.OnlineModelConfig
import com.k2fsa.sherpa.onnx.OnlineRecognizer
import com.k2fsa.sherpa.onnx.OnlineRecognizerConfig
import com.k2fsa.sherpa.onnx.OnlineTransducerModelConfig
import com.k2fsa.sherpa.onnx.OnlineStream
*/
// Below for the "offline" libraries and the true Whisper integration
import com.k2fsa.sherpa.onnx.OfflineRecognizer
import com.k2fsa.sherpa.onnx.OfflineStream
import com.k2fsa.sherpa.onnx.OfflineRecognizerConfig
import com.k2fsa.sherpa.onnx.OfflineModelConfig
import com.k2fsa.sherpa.onnx.OfflineWhisperModelConfig
import com.k2fsa.sherpa.onnx.FeatureConfig
import java.io.File
class TestModelActivity : AppCompatActivity() {
@@ -46,10 +34,8 @@ class TestModelActivity : AppCompatActivity() {
private lateinit var micButton: ImageButton
// Sherpa (Whisper) Components
// private var recognizer: OnlineRecognizer? = null // - Removed for true Whisper model usa
// private var stream: OnlineStream? = null // - Removed for true Whisper model usa
private var recognizer: OfflineRecognizer? = null // Was OnlineRecognizer
private var stream: OfflineStream? = null // Was OnlineStream
private var recognizer: OnlineRecognizer? = null
private var stream: OnlineStream? = null
private var isRecording = false
private var recordingThread: Thread? = null
@@ -90,37 +76,46 @@ class TestModelActivity : AppCompatActivity() {
return
}
// 1. Point to your files
val encoderPath = File(modelDir, "encoder.onnx").absolutePath
val decoderPath = File(modelDir, "decoder.onnx").absolutePath
val tokensPath = File(modelDir, "tokens.txt").absolutePath
try {
// CONFIGURATION FOR WHISPER (OFFLINE)
val config = OfflineRecognizerConfig(
featConfig = FeatureConfig(sampleRate = 16000, featureDim = 80),
modelConfig = OfflineModelConfig(
// This parameter 'whisper' exists here!
whisper = OfflineWhisperModelConfig(
encoder = encoderPath,
decoder = decoderPath,
// tokenizer is not strictly needed in config here if passed in tokens param below
// but usually standard offline config uses just these two:
),
tokens = tokensPath,
modelType = "whisper",
// 1. Define Model Paths
val transducerConfig = OnlineTransducerModelConfig(
encoder = File(modelDir, "encoder.onnx").absolutePath,
decoder = File(modelDir, "decoder.onnx").absolutePath,
joiner = File(modelDir, "joiner.onnx").absolutePath
)
// 2. Define General Config
val onlineModelConfig = com.k2fsa.sherpa.onnx.OnlineModelConfig(
transducer = transducerConfig,
tokens = File(modelDir, "tokens.txt").absolutePath,
numThreads = 1,
debug = false,
numThreads = 1
),
modelType = "zipformer"
)
// 3. Define Endpoint Rule (The fix for your error)
// rule1 = detected silence after speech. We set this to 2.4 seconds.
val silenceRule = EndpointRule(
mustContainNonSilence = false,
minTrailingSilence = 2.4f,
minUtteranceLength = 0.0f
)
// 4. Create Recognizer Config
val config = OnlineRecognizerConfig(
featConfig = FeatureConfig(sampleRate = 16000, featureDim = 80),
modelConfig = onlineModelConfig,
endpointConfig = EndpointConfig(rule1 = silenceRule), // Pass the rule object here
enableEndpoint = true,
decodingMethod = "greedy_search",
maxActivePaths = 4
)
// Initialize OFFLINE Engine
recognizer = OfflineRecognizer(config = config)
// recognizer = OnlineRecognizer(assetManager = assets, config = config)
recognizer = OnlineRecognizer(config = config)
stream = recognizer?.createStream()
outputText.text = "Whisper Engine Ready."
outputText.text = "Engine Loaded. Ready to Stream."
} catch (e: Exception) {
Log.e("Sherpa", "Init Error", e)
@@ -145,9 +140,12 @@ class TestModelActivity : AppCompatActivity() {
return
}
// Reset the stream for a new session
// Note: Sherpa streams can be persistent, but resetting ensures clean start
// If you want continuous conversation, don't reset 'committedText'
// FIX 1: CLEAR THE BUFFER
// This prevents the "ghost text" from the previous session appearing
// when you hit record again.
stream?.let { activeStream ->
recognizer?.reset(activeStream)
}
isRecording = true
micButton.setColorFilter(android.graphics.Color.RED)
@@ -161,46 +159,19 @@ class TestModelActivity : AppCompatActivity() {
private fun stopRecording() {
isRecording = false
try {
recordingThread?.join() // Wait for loop to finish
} catch (e: InterruptedException) {
// Handle interruption if necessary
}
recordingThread?.join()
micButton.clearColorFilter()
// FIX: Safely unwrap 'stream' before passing it to getResult
// This reads: "If stream is NOT null, call getResult. Otherwise return empty string."
val finalCurrentText = stream?.let { activeStream ->
recognizer?.getResult(activeStream)?.text
} ?: ""
val cleanFinal = finalCurrentText.lowercase()
if (cleanFinal.isNotEmpty()) {
// 1. Commit to history
committedText += "$cleanFinal "
// 2. Send to Pico
sendToPico("$cleanFinal ")
// 3. Update UI
outputText.text = "$committedText \n[Stopped]"
// 4. Reset for next time
// We release the old stream and create a fresh one for the next sentence
stream?.release()
stream = recognizer?.createStream()
} else {
outputText.append("\n[Stopped - No Text]")
}
// Just show what we have, don't overwrite with "[Stopped]"
// to prevent visual jarring.
outputText.append("\n[Stopped]")
}
private fun processAudioLoop() {
val sampleRate = 16000
val bufferSize = AudioRecord.getMinBufferSize(sampleRate, AudioFormat.CHANNEL_IN_MONO, AudioFormat.ENCODING_PCM_16BIT)
// 1. GUARD CLAUSE (Safely unwrap nullables)
// Guard clauses
val localRec = recognizer ?: return
val localStream = stream ?: return
@@ -218,28 +189,43 @@ class TestModelActivity : AppCompatActivity() {
if (ret > 0) {
val samples = FloatArray(ret) { buffer[it] / 32768.0f }
// 2. Feed Audio
localStream.acceptWaveform(samples, sampleRate)
// 3. Decode (No isReady check needed for Offline)
while (localRec.isReady(localStream)) {
localRec.decode(localStream)
}
// 4. Get Current Text
// Whisper updates this string constantly as it hears more
val text = localRec.getResult(localStream).text
val isEndpoint = localRec.isEndpoint(localStream)
if (text.isNotEmpty()) {
val cleanText = text.lowercase()
if (isEndpoint) {
// FIX 2: THE ORDER OF OPERATIONS
// A. Update UI first
runOnUiThread {
committedText += "$cleanText "
outputText.text = committedText
sendToPico("$cleanText ")
}
// B. RESET IMMEDIATELY ON BACKGROUND THREAD
// We do this HERE, not inside runOnUiThread.
// This guarantees the stream is clean BEFORE the loop
// reads the next chunk of audio.
localRec.reset(localStream)
} else {
// Standard partial update
runOnUiThread {
// Update the screen so user sees what is happening
// We do NOT send to USB yet, because Whisper might change this text
// as you keep speaking.
outputText.text = "$committedText $cleanText"
}
}
}
}
}
record.stop()
record.release()
}