Attempt one after major code update to implement the true Whisper model use and operation
@@ -11,8 +11,8 @@ android {
|
|||||||
applicationId = "net.mmanningau.speechtokeyboard"
|
applicationId = "net.mmanningau.speechtokeyboard"
|
||||||
minSdk = 28
|
minSdk = 28
|
||||||
targetSdk = 36
|
targetSdk = 36
|
||||||
versionCode = 9
|
versionCode = 10
|
||||||
versionName = "1.0"
|
versionName = "1.1"
|
||||||
|
|
||||||
testInstrumentationRunner = "androidx.test.runner.AndroidJUnitRunner"
|
testInstrumentationRunner = "androidx.test.runner.AndroidJUnitRunner"
|
||||||
}
|
}
|
||||||
|
|||||||
BIN
app/src/main/ic_launcher-playstore.png
Normal file
|
After Width: | Height: | Size: 411 KiB |
@@ -89,7 +89,7 @@ class MainActivity : AppCompatActivity() {
|
|||||||
var entry = zipInputStream.nextEntry
|
var entry = zipInputStream.nextEntry
|
||||||
var foundEncoder = false
|
var foundEncoder = false
|
||||||
var foundDecoder = false
|
var foundDecoder = false
|
||||||
var foundJoiner = false
|
// var foundJoiner = false - removed for true Whisper model use
|
||||||
var foundTokens = false
|
var foundTokens = false
|
||||||
|
|
||||||
while (entry != null) {
|
while (entry != null) {
|
||||||
@@ -100,7 +100,7 @@ class MainActivity : AppCompatActivity() {
|
|||||||
val targetFileName = when {
|
val targetFileName = when {
|
||||||
name.contains("encoder") && name.endsWith(".onnx") -> "encoder.onnx"
|
name.contains("encoder") && name.endsWith(".onnx") -> "encoder.onnx"
|
||||||
name.contains("decoder") && name.endsWith(".onnx") -> "decoder.onnx"
|
name.contains("decoder") && name.endsWith(".onnx") -> "decoder.onnx"
|
||||||
name.contains("joiner") && name.endsWith(".onnx") -> "joiner.onnx"
|
// name.contains("joiner") && name.endsWith(".onnx") -> "joiner.onnx" - removed for true Whisper model use
|
||||||
name.contains("tokens.txt") -> "tokens.txt"
|
name.contains("tokens.txt") -> "tokens.txt"
|
||||||
else -> null
|
else -> null
|
||||||
}
|
}
|
||||||
@@ -115,7 +115,7 @@ class MainActivity : AppCompatActivity() {
|
|||||||
when (targetFileName) {
|
when (targetFileName) {
|
||||||
"encoder.onnx" -> foundEncoder = true
|
"encoder.onnx" -> foundEncoder = true
|
||||||
"decoder.onnx" -> foundDecoder = true
|
"decoder.onnx" -> foundDecoder = true
|
||||||
"joiner.onnx" -> foundJoiner = true
|
// "joiner.onnx" -> foundJoiner = true = re,moved for true Whisper model use
|
||||||
"tokens.txt" -> foundTokens = true
|
"tokens.txt" -> foundTokens = true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -124,7 +124,8 @@ class MainActivity : AppCompatActivity() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
runOnUiThread {
|
runOnUiThread {
|
||||||
if (foundEncoder && foundDecoder && foundJoiner && foundTokens) {
|
// if (foundEncoder && foundDecoder && foundJoiner && foundTokens) { - removed for true Whisper model use
|
||||||
|
if (foundEncoder && foundDecoder && foundTokens) {
|
||||||
statusText.text = "Model Installed Successfully!"
|
statusText.text = "Model Installed Successfully!"
|
||||||
Toast.makeText(this, "Ready to use!", Toast.LENGTH_SHORT).show()
|
Toast.makeText(this, "Ready to use!", Toast.LENGTH_SHORT).show()
|
||||||
} else {
|
} else {
|
||||||
|
|||||||
@@ -20,11 +20,23 @@ import com.hoho.android.usbserial.driver.UsbSerialProber
|
|||||||
import com.hoho.android.usbserial.util.SerialInputOutputManager
|
import com.hoho.android.usbserial.util.SerialInputOutputManager
|
||||||
import com.k2fsa.sherpa.onnx.EndpointConfig
|
import com.k2fsa.sherpa.onnx.EndpointConfig
|
||||||
import com.k2fsa.sherpa.onnx.EndpointRule
|
import com.k2fsa.sherpa.onnx.EndpointRule
|
||||||
|
/*
|
||||||
import com.k2fsa.sherpa.onnx.FeatureConfig
|
import com.k2fsa.sherpa.onnx.FeatureConfig
|
||||||
|
import com.k2fsa.sherpa.onnx.OnlineModelConfig
|
||||||
import com.k2fsa.sherpa.onnx.OnlineRecognizer
|
import com.k2fsa.sherpa.onnx.OnlineRecognizer
|
||||||
import com.k2fsa.sherpa.onnx.OnlineRecognizerConfig
|
import com.k2fsa.sherpa.onnx.OnlineRecognizerConfig
|
||||||
import com.k2fsa.sherpa.onnx.OnlineTransducerModelConfig
|
import com.k2fsa.sherpa.onnx.OnlineTransducerModelConfig
|
||||||
import com.k2fsa.sherpa.onnx.OnlineStream
|
import com.k2fsa.sherpa.onnx.OnlineStream
|
||||||
|
|
||||||
|
*/
|
||||||
|
// Below for the "offline" libraries and the true Whisper integration
|
||||||
|
import com.k2fsa.sherpa.onnx.OfflineRecognizer
|
||||||
|
import com.k2fsa.sherpa.onnx.OfflineStream
|
||||||
|
import com.k2fsa.sherpa.onnx.OfflineRecognizerConfig
|
||||||
|
import com.k2fsa.sherpa.onnx.OfflineModelConfig
|
||||||
|
import com.k2fsa.sherpa.onnx.OfflineWhisperModelConfig
|
||||||
|
import com.k2fsa.sherpa.onnx.FeatureConfig
|
||||||
|
|
||||||
import java.io.File
|
import java.io.File
|
||||||
|
|
||||||
class TestModelActivity : AppCompatActivity() {
|
class TestModelActivity : AppCompatActivity() {
|
||||||
@@ -34,8 +46,10 @@ class TestModelActivity : AppCompatActivity() {
|
|||||||
private lateinit var micButton: ImageButton
|
private lateinit var micButton: ImageButton
|
||||||
|
|
||||||
// Sherpa (Whisper) Components
|
// Sherpa (Whisper) Components
|
||||||
private var recognizer: OnlineRecognizer? = null
|
// private var recognizer: OnlineRecognizer? = null // - Removed for true Whisper model usa
|
||||||
private var stream: OnlineStream? = null
|
// private var stream: OnlineStream? = null // - Removed for true Whisper model usa
|
||||||
|
private var recognizer: OfflineRecognizer? = null // Was OnlineRecognizer
|
||||||
|
private var stream: OfflineStream? = null // Was OnlineStream
|
||||||
private var isRecording = false
|
private var isRecording = false
|
||||||
private var recordingThread: Thread? = null
|
private var recordingThread: Thread? = null
|
||||||
|
|
||||||
@@ -76,46 +90,37 @@ class TestModelActivity : AppCompatActivity() {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// 1. Point to your files
|
||||||
|
val encoderPath = File(modelDir, "encoder.onnx").absolutePath
|
||||||
|
val decoderPath = File(modelDir, "decoder.onnx").absolutePath
|
||||||
|
val tokensPath = File(modelDir, "tokens.txt").absolutePath
|
||||||
|
|
||||||
try {
|
try {
|
||||||
// 1. Define Model Paths
|
// CONFIGURATION FOR WHISPER (OFFLINE)
|
||||||
val transducerConfig = OnlineTransducerModelConfig(
|
val config = OfflineRecognizerConfig(
|
||||||
encoder = File(modelDir, "encoder.onnx").absolutePath,
|
|
||||||
decoder = File(modelDir, "decoder.onnx").absolutePath,
|
|
||||||
joiner = File(modelDir, "joiner.onnx").absolutePath
|
|
||||||
)
|
|
||||||
|
|
||||||
// 2. Define General Config
|
|
||||||
val onlineModelConfig = com.k2fsa.sherpa.onnx.OnlineModelConfig(
|
|
||||||
transducer = transducerConfig,
|
|
||||||
tokens = File(modelDir, "tokens.txt").absolutePath,
|
|
||||||
numThreads = 1,
|
|
||||||
debug = false,
|
|
||||||
modelType = "zipformer"
|
|
||||||
)
|
|
||||||
|
|
||||||
// 3. Define Endpoint Rule (The fix for your error)
|
|
||||||
// rule1 = detected silence after speech. We set this to 2.4 seconds.
|
|
||||||
val silenceRule = EndpointRule(
|
|
||||||
mustContainNonSilence = false,
|
|
||||||
minTrailingSilence = 2.4f,
|
|
||||||
minUtteranceLength = 0.0f
|
|
||||||
)
|
|
||||||
|
|
||||||
// 4. Create Recognizer Config
|
|
||||||
val config = OnlineRecognizerConfig(
|
|
||||||
featConfig = FeatureConfig(sampleRate = 16000, featureDim = 80),
|
featConfig = FeatureConfig(sampleRate = 16000, featureDim = 80),
|
||||||
modelConfig = onlineModelConfig,
|
modelConfig = OfflineModelConfig(
|
||||||
endpointConfig = EndpointConfig(rule1 = silenceRule), // Pass the rule object here
|
// This parameter 'whisper' exists here!
|
||||||
enableEndpoint = true,
|
whisper = OfflineWhisperModelConfig(
|
||||||
|
encoder = encoderPath,
|
||||||
|
decoder = decoderPath,
|
||||||
|
// tokenizer is not strictly needed in config here if passed in tokens param below
|
||||||
|
// but usually standard offline config uses just these two:
|
||||||
|
),
|
||||||
|
tokens = tokensPath,
|
||||||
|
modelType = "whisper",
|
||||||
|
debug = false,
|
||||||
|
numThreads = 1
|
||||||
|
),
|
||||||
decodingMethod = "greedy_search",
|
decodingMethod = "greedy_search",
|
||||||
maxActivePaths = 4
|
maxActivePaths = 4
|
||||||
)
|
)
|
||||||
|
|
||||||
// recognizer = OnlineRecognizer(assetManager = assets, config = config)
|
// Initialize OFFLINE Engine
|
||||||
recognizer = OnlineRecognizer(config = config)
|
recognizer = OfflineRecognizer(config = config)
|
||||||
stream = recognizer?.createStream()
|
stream = recognizer?.createStream()
|
||||||
|
|
||||||
outputText.text = "Engine Loaded. Ready to Stream."
|
outputText.text = "Whisper Engine Ready."
|
||||||
|
|
||||||
} catch (e: Exception) {
|
} catch (e: Exception) {
|
||||||
Log.e("Sherpa", "Init Error", e)
|
Log.e("Sherpa", "Init Error", e)
|
||||||
@@ -156,21 +161,46 @@ class TestModelActivity : AppCompatActivity() {
|
|||||||
|
|
||||||
private fun stopRecording() {
|
private fun stopRecording() {
|
||||||
isRecording = false
|
isRecording = false
|
||||||
recordingThread?.join()
|
try {
|
||||||
|
recordingThread?.join() // Wait for loop to finish
|
||||||
|
} catch (e: InterruptedException) {
|
||||||
|
// Handle interruption if necessary
|
||||||
|
}
|
||||||
|
|
||||||
micButton.clearColorFilter()
|
micButton.clearColorFilter()
|
||||||
|
|
||||||
// Just show what we have, don't overwrite with "[Stopped]"
|
// FIX: Safely unwrap 'stream' before passing it to getResult
|
||||||
// to prevent visual jarring.
|
// This reads: "If stream is NOT null, call getResult. Otherwise return empty string."
|
||||||
outputText.append("\n[Stopped]")
|
val finalCurrentText = stream?.let { activeStream ->
|
||||||
|
recognizer?.getResult(activeStream)?.text
|
||||||
|
} ?: ""
|
||||||
|
|
||||||
|
val cleanFinal = finalCurrentText.lowercase()
|
||||||
|
|
||||||
|
if (cleanFinal.isNotEmpty()) {
|
||||||
|
// 1. Commit to history
|
||||||
|
committedText += "$cleanFinal "
|
||||||
|
|
||||||
|
// 2. Send to Pico
|
||||||
|
sendToPico("$cleanFinal ")
|
||||||
|
|
||||||
|
// 3. Update UI
|
||||||
|
outputText.text = "$committedText \n[Stopped]"
|
||||||
|
|
||||||
|
// 4. Reset for next time
|
||||||
|
// We release the old stream and create a fresh one for the next sentence
|
||||||
|
stream?.release()
|
||||||
|
stream = recognizer?.createStream()
|
||||||
|
} else {
|
||||||
|
outputText.append("\n[Stopped - No Text]")
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private fun processAudioLoop() {
|
private fun processAudioLoop() {
|
||||||
val sampleRate = 16000
|
val sampleRate = 16000
|
||||||
val bufferSize = AudioRecord.getMinBufferSize(sampleRate, AudioFormat.CHANNEL_IN_MONO, AudioFormat.ENCODING_PCM_16BIT)
|
val bufferSize = AudioRecord.getMinBufferSize(sampleRate, AudioFormat.CHANNEL_IN_MONO, AudioFormat.ENCODING_PCM_16BIT)
|
||||||
|
|
||||||
// 1. GUARD CLAUSE: Unpack nullables safely
|
// 1. GUARD CLAUSE (Safely unwrap nullables)
|
||||||
// If recognizer or stream are null, we stop immediately.
|
|
||||||
// This creates 'localRec' and 'localStream' which are GUARANTEED non-null.
|
|
||||||
val localRec = recognizer ?: return
|
val localRec = recognizer ?: return
|
||||||
val localStream = stream ?: return
|
val localStream = stream ?: return
|
||||||
|
|
||||||
@@ -188,28 +218,24 @@ class TestModelActivity : AppCompatActivity() {
|
|||||||
if (ret > 0) {
|
if (ret > 0) {
|
||||||
val samples = FloatArray(ret) { buffer[it] / 32768.0f }
|
val samples = FloatArray(ret) { buffer[it] / 32768.0f }
|
||||||
|
|
||||||
// 2. Use the LOCAL (non-null) variables
|
// 2. Feed Audio
|
||||||
localStream.acceptWaveform(samples, sampleRate)
|
localStream.acceptWaveform(samples, sampleRate)
|
||||||
|
|
||||||
while (localRec.isReady(localStream)) {
|
// 3. Decode (No isReady check needed for Offline)
|
||||||
localRec.decode(localStream)
|
localRec.decode(localStream)
|
||||||
}
|
|
||||||
|
|
||||||
|
// 4. Get Current Text
|
||||||
|
// Whisper updates this string constantly as it hears more
|
||||||
val text = localRec.getResult(localStream).text
|
val text = localRec.getResult(localStream).text
|
||||||
val isEndpoint = localRec.isEndpoint(localStream)
|
|
||||||
|
|
||||||
if (text.isNotEmpty()) {
|
if (text.isNotEmpty()) {
|
||||||
val cleanText = text.lowercase()
|
val cleanText = text.lowercase()
|
||||||
|
|
||||||
runOnUiThread {
|
runOnUiThread {
|
||||||
if (isEndpoint) {
|
// Update the screen so user sees what is happening
|
||||||
committedText += "$cleanText "
|
// We do NOT send to USB yet, because Whisper might change this text
|
||||||
outputText.text = committedText
|
// as you keep speaking.
|
||||||
sendToPico("$cleanText ")
|
outputText.text = "$committedText $cleanText"
|
||||||
localRec.reset(localStream)
|
|
||||||
} else {
|
|
||||||
outputText.text = "$committedText $cleanText"
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
5
app/src/main/res/mipmap-anydpi-v26/ic_launcher.xml
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
|
||||||
|
<background android:drawable="@color/ic_launcher_background"/>
|
||||||
|
<foreground android:drawable="@mipmap/ic_launcher_foreground"/>
|
||||||
|
</adaptive-icon>
|
||||||
5
app/src/main/res/mipmap-anydpi-v26/ic_launcher_round.xml
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
|
||||||
|
<background android:drawable="@color/ic_launcher_background"/>
|
||||||
|
<foreground android:drawable="@mipmap/ic_launcher_foreground"/>
|
||||||
|
</adaptive-icon>
|
||||||
@@ -1,6 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="utf-8"?>
|
|
||||||
<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
|
|
||||||
<background android:drawable="@drawable/ic_launcher_background" />
|
|
||||||
<foreground android:drawable="@drawable/ic_launcher_foreground" />
|
|
||||||
<monochrome android:drawable="@drawable/ic_launcher_foreground" />
|
|
||||||
</adaptive-icon>
|
|
||||||
@@ -1,6 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="utf-8"?>
|
|
||||||
<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
|
|
||||||
<background android:drawable="@drawable/ic_launcher_background" />
|
|
||||||
<foreground android:drawable="@drawable/ic_launcher_foreground" />
|
|
||||||
<monochrome android:drawable="@drawable/ic_launcher_foreground" />
|
|
||||||
</adaptive-icon>
|
|
||||||
|
Before Width: | Height: | Size: 1.4 KiB After Width: | Height: | Size: 3.9 KiB |
BIN
app/src/main/res/mipmap-hdpi/ic_launcher_foreground.webp
Normal file
|
After Width: | Height: | Size: 12 KiB |
|
Before Width: | Height: | Size: 2.8 KiB After Width: | Height: | Size: 5.7 KiB |
|
Before Width: | Height: | Size: 982 B After Width: | Height: | Size: 2.3 KiB |
BIN
app/src/main/res/mipmap-mdpi/ic_launcher_foreground.webp
Normal file
|
After Width: | Height: | Size: 5.7 KiB |
|
Before Width: | Height: | Size: 1.7 KiB After Width: | Height: | Size: 3.2 KiB |
|
Before Width: | Height: | Size: 1.9 KiB After Width: | Height: | Size: 6.2 KiB |
BIN
app/src/main/res/mipmap-xhdpi/ic_launcher_foreground.webp
Normal file
|
After Width: | Height: | Size: 20 KiB |
|
Before Width: | Height: | Size: 3.8 KiB After Width: | Height: | Size: 8.5 KiB |
|
Before Width: | Height: | Size: 2.8 KiB After Width: | Height: | Size: 12 KiB |
BIN
app/src/main/res/mipmap-xxhdpi/ic_launcher_foreground.webp
Normal file
|
After Width: | Height: | Size: 43 KiB |
|
Before Width: | Height: | Size: 5.8 KiB After Width: | Height: | Size: 15 KiB |
|
Before Width: | Height: | Size: 3.8 KiB After Width: | Height: | Size: 19 KiB |
BIN
app/src/main/res/mipmap-xxxhdpi/ic_launcher_foreground.webp
Normal file
|
After Width: | Height: | Size: 79 KiB |
|
Before Width: | Height: | Size: 7.6 KiB After Width: | Height: | Size: 25 KiB |
4
app/src/main/res/values/ic_launcher_background.xml
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<resources>
|
||||||
|
<color name="ic_launcher_background">#0878F5</color>
|
||||||
|
</resources>
|
||||||