1 Commits

24 changed files with 101 additions and 72 deletions

View File

@@ -11,8 +11,8 @@ android {
applicationId = "net.mmanningau.speechtokeyboard"
minSdk = 28
targetSdk = 36
versionCode = 9
versionName = "1.0"
versionCode = 10
versionName = "1.1"
testInstrumentationRunner = "androidx.test.runner.AndroidJUnitRunner"
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 411 KiB

View File

@@ -89,7 +89,7 @@ class MainActivity : AppCompatActivity() {
var entry = zipInputStream.nextEntry
var foundEncoder = false
var foundDecoder = false
var foundJoiner = false
// var foundJoiner = false - removed for true Whisper model use
var foundTokens = false
while (entry != null) {
@@ -100,7 +100,7 @@ class MainActivity : AppCompatActivity() {
val targetFileName = when {
name.contains("encoder") && name.endsWith(".onnx") -> "encoder.onnx"
name.contains("decoder") && name.endsWith(".onnx") -> "decoder.onnx"
name.contains("joiner") && name.endsWith(".onnx") -> "joiner.onnx"
// name.contains("joiner") && name.endsWith(".onnx") -> "joiner.onnx" - removed for true Whisper model use
name.contains("tokens.txt") -> "tokens.txt"
else -> null
}
@@ -115,7 +115,7 @@ class MainActivity : AppCompatActivity() {
when (targetFileName) {
"encoder.onnx" -> foundEncoder = true
"decoder.onnx" -> foundDecoder = true
"joiner.onnx" -> foundJoiner = true
// "joiner.onnx" -> foundJoiner = true = re,moved for true Whisper model use
"tokens.txt" -> foundTokens = true
}
}
@@ -124,7 +124,8 @@ class MainActivity : AppCompatActivity() {
}
runOnUiThread {
if (foundEncoder && foundDecoder && foundJoiner && foundTokens) {
// if (foundEncoder && foundDecoder && foundJoiner && foundTokens) { - removed for true Whisper model use
if (foundEncoder && foundDecoder && foundTokens) {
statusText.text = "Model Installed Successfully!"
Toast.makeText(this, "Ready to use!", Toast.LENGTH_SHORT).show()
} else {

View File

@@ -20,11 +20,23 @@ import com.hoho.android.usbserial.driver.UsbSerialProber
import com.hoho.android.usbserial.util.SerialInputOutputManager
import com.k2fsa.sherpa.onnx.EndpointConfig
import com.k2fsa.sherpa.onnx.EndpointRule
/*
import com.k2fsa.sherpa.onnx.FeatureConfig
import com.k2fsa.sherpa.onnx.OnlineModelConfig
import com.k2fsa.sherpa.onnx.OnlineRecognizer
import com.k2fsa.sherpa.onnx.OnlineRecognizerConfig
import com.k2fsa.sherpa.onnx.OnlineTransducerModelConfig
import com.k2fsa.sherpa.onnx.OnlineStream
*/
// Below for the "offline" libraries and the true Whisper integration
import com.k2fsa.sherpa.onnx.OfflineRecognizer
import com.k2fsa.sherpa.onnx.OfflineStream
import com.k2fsa.sherpa.onnx.OfflineRecognizerConfig
import com.k2fsa.sherpa.onnx.OfflineModelConfig
import com.k2fsa.sherpa.onnx.OfflineWhisperModelConfig
import com.k2fsa.sherpa.onnx.FeatureConfig
import java.io.File
class TestModelActivity : AppCompatActivity() {
@@ -34,8 +46,10 @@ class TestModelActivity : AppCompatActivity() {
private lateinit var micButton: ImageButton
// Sherpa (Whisper) Components
private var recognizer: OnlineRecognizer? = null
private var stream: OnlineStream? = null
// private var recognizer: OnlineRecognizer? = null // - Removed for true Whisper model usa
// private var stream: OnlineStream? = null // - Removed for true Whisper model usa
private var recognizer: OfflineRecognizer? = null // Was OnlineRecognizer
private var stream: OfflineStream? = null // Was OnlineStream
private var isRecording = false
private var recordingThread: Thread? = null
@@ -76,46 +90,37 @@ class TestModelActivity : AppCompatActivity() {
return
}
// 1. Point to your files
val encoderPath = File(modelDir, "encoder.onnx").absolutePath
val decoderPath = File(modelDir, "decoder.onnx").absolutePath
val tokensPath = File(modelDir, "tokens.txt").absolutePath
try {
// 1. Define Model Paths
val transducerConfig = OnlineTransducerModelConfig(
encoder = File(modelDir, "encoder.onnx").absolutePath,
decoder = File(modelDir, "decoder.onnx").absolutePath,
joiner = File(modelDir, "joiner.onnx").absolutePath
)
// 2. Define General Config
val onlineModelConfig = com.k2fsa.sherpa.onnx.OnlineModelConfig(
transducer = transducerConfig,
tokens = File(modelDir, "tokens.txt").absolutePath,
numThreads = 1,
debug = false,
modelType = "zipformer"
)
// 3. Define Endpoint Rule (The fix for your error)
// rule1 = detected silence after speech. We set this to 2.4 seconds.
val silenceRule = EndpointRule(
mustContainNonSilence = false,
minTrailingSilence = 2.4f,
minUtteranceLength = 0.0f
)
// 4. Create Recognizer Config
val config = OnlineRecognizerConfig(
// CONFIGURATION FOR WHISPER (OFFLINE)
val config = OfflineRecognizerConfig(
featConfig = FeatureConfig(sampleRate = 16000, featureDim = 80),
modelConfig = onlineModelConfig,
endpointConfig = EndpointConfig(rule1 = silenceRule), // Pass the rule object here
enableEndpoint = true,
modelConfig = OfflineModelConfig(
// This parameter 'whisper' exists here!
whisper = OfflineWhisperModelConfig(
encoder = encoderPath,
decoder = decoderPath,
// tokenizer is not strictly needed in config here if passed in tokens param below
// but usually standard offline config uses just these two:
),
tokens = tokensPath,
modelType = "whisper",
debug = false,
numThreads = 1
),
decodingMethod = "greedy_search",
maxActivePaths = 4
)
// recognizer = OnlineRecognizer(assetManager = assets, config = config)
recognizer = OnlineRecognizer(config = config)
// Initialize OFFLINE Engine
recognizer = OfflineRecognizer(config = config)
stream = recognizer?.createStream()
outputText.text = "Engine Loaded. Ready to Stream."
outputText.text = "Whisper Engine Ready."
} catch (e: Exception) {
Log.e("Sherpa", "Init Error", e)
@@ -156,21 +161,46 @@ class TestModelActivity : AppCompatActivity() {
private fun stopRecording() {
isRecording = false
recordingThread?.join()
try {
recordingThread?.join() // Wait for loop to finish
} catch (e: InterruptedException) {
// Handle interruption if necessary
}
micButton.clearColorFilter()
// Just show what we have, don't overwrite with "[Stopped]"
// to prevent visual jarring.
outputText.append("\n[Stopped]")
// FIX: Safely unwrap 'stream' before passing it to getResult
// This reads: "If stream is NOT null, call getResult. Otherwise return empty string."
val finalCurrentText = stream?.let { activeStream ->
recognizer?.getResult(activeStream)?.text
} ?: ""
val cleanFinal = finalCurrentText.lowercase()
if (cleanFinal.isNotEmpty()) {
// 1. Commit to history
committedText += "$cleanFinal "
// 2. Send to Pico
sendToPico("$cleanFinal ")
// 3. Update UI
outputText.text = "$committedText \n[Stopped]"
// 4. Reset for next time
// We release the old stream and create a fresh one for the next sentence
stream?.release()
stream = recognizer?.createStream()
} else {
outputText.append("\n[Stopped - No Text]")
}
}
private fun processAudioLoop() {
val sampleRate = 16000
val bufferSize = AudioRecord.getMinBufferSize(sampleRate, AudioFormat.CHANNEL_IN_MONO, AudioFormat.ENCODING_PCM_16BIT)
// 1. GUARD CLAUSE: Unpack nullables safely
// If recognizer or stream are null, we stop immediately.
// This creates 'localRec' and 'localStream' which are GUARANTEED non-null.
// 1. GUARD CLAUSE (Safely unwrap nullables)
val localRec = recognizer ?: return
val localStream = stream ?: return
@@ -188,32 +218,28 @@ class TestModelActivity : AppCompatActivity() {
if (ret > 0) {
val samples = FloatArray(ret) { buffer[it] / 32768.0f }
// 2. Use the LOCAL (non-null) variables
// 2. Feed Audio
localStream.acceptWaveform(samples, sampleRate)
while (localRec.isReady(localStream)) {
// 3. Decode (No isReady check needed for Offline)
localRec.decode(localStream)
}
// 4. Get Current Text
// Whisper updates this string constantly as it hears more
val text = localRec.getResult(localStream).text
val isEndpoint = localRec.isEndpoint(localStream)
if (text.isNotEmpty()) {
val cleanText = text.lowercase()
runOnUiThread {
if (isEndpoint) {
committedText += "$cleanText "
outputText.text = committedText
sendToPico("$cleanText ")
localRec.reset(localStream)
} else {
// Update the screen so user sees what is happening
// We do NOT send to USB yet, because Whisper might change this text
// as you keep speaking.
outputText.text = "$committedText $cleanText"
}
}
}
}
}
record.stop()
record.release()
}

View File

@@ -0,0 +1,5 @@
<?xml version="1.0" encoding="utf-8"?>
<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
<background android:drawable="@color/ic_launcher_background"/>
<foreground android:drawable="@mipmap/ic_launcher_foreground"/>
</adaptive-icon>

View File

@@ -0,0 +1,5 @@
<?xml version="1.0" encoding="utf-8"?>
<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
<background android:drawable="@color/ic_launcher_background"/>
<foreground android:drawable="@mipmap/ic_launcher_foreground"/>
</adaptive-icon>

View File

@@ -1,6 +0,0 @@
<?xml version="1.0" encoding="utf-8"?>
<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
<background android:drawable="@drawable/ic_launcher_background" />
<foreground android:drawable="@drawable/ic_launcher_foreground" />
<monochrome android:drawable="@drawable/ic_launcher_foreground" />
</adaptive-icon>

View File

@@ -1,6 +0,0 @@
<?xml version="1.0" encoding="utf-8"?>
<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
<background android:drawable="@drawable/ic_launcher_background" />
<foreground android:drawable="@drawable/ic_launcher_foreground" />
<monochrome android:drawable="@drawable/ic_launcher_foreground" />
</adaptive-icon>

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.4 KiB

After

Width:  |  Height:  |  Size: 3.9 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.8 KiB

After

Width:  |  Height:  |  Size: 5.7 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 982 B

After

Width:  |  Height:  |  Size: 2.3 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.7 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.7 KiB

After

Width:  |  Height:  |  Size: 3.2 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.9 KiB

After

Width:  |  Height:  |  Size: 6.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 20 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 3.8 KiB

After

Width:  |  Height:  |  Size: 8.5 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.8 KiB

After

Width:  |  Height:  |  Size: 12 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 43 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 5.8 KiB

After

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 3.8 KiB

After

Width:  |  Height:  |  Size: 19 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 79 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 7.6 KiB

After

Width:  |  Height:  |  Size: 25 KiB

View File

@@ -0,0 +1,4 @@
<?xml version="1.0" encoding="utf-8"?>
<resources>
<color name="ic_launcher_background">#0878F5</color>
</resources>