Initial steps to migrate to the whisper/sherpa setup..

Still missing majot changes to the TestModelActivity.kt file which are coming next...
This commit is contained in:
2026-01-22 19:49:48 +11:00
parent 12c0508713
commit 404bc55ed3
4 changed files with 115 additions and 28 deletions

View File

@@ -9,8 +9,8 @@ import android.widget.TextView
import android.widget.Toast
import androidx.activity.result.contract.ActivityResultContracts
import androidx.appcompat.app.AppCompatActivity
import org.vosk.Model
import org.vosk.android.SpeechService
// import org.vosk.Model
// import org.vosk.android.SpeechService --- removed as part of migratoin to whisper.cpp
import java.io.File
import java.util.zip.ZipInputStream
@@ -19,9 +19,9 @@ class MainActivity : AppCompatActivity() {
// UI Components
private lateinit var statusText: TextView
// Vosk Components
private var model: Model? = null
private var speechService: SpeechService? = null
// Vosk Components - removed as part of whisper migration
// private var model: Model? = null
// private var speechService: SpeechService? = null
// 1. THE FILE PICKER REGISTRY
// This handles the result when the user picks a ZIP file
@@ -43,7 +43,7 @@ class MainActivity : AppCompatActivity() {
// ADD THIS LINE AT THE BOTTOM:
// This attempts to load the model immediately if files exist
initVoskModel()
// initVoskModel() - removed as part of whisper migration
}
// 2. SETUP THE MENU
@@ -113,7 +113,7 @@ class MainActivity : AppCompatActivity() {
// Back to UI Thread to say success
runOnUiThread {
statusText.text = "Model Installed! Initializing..."
initVoskModel()
// initVoskModel() - removed as part of the whisper migration
}
} catch (e: Exception) {
@@ -127,7 +127,9 @@ class MainActivity : AppCompatActivity() {
// 6. INITIALIZE VOSK "BRAIN"
// Replace your existing initVoskModel with this updated version
/*
private fun initVoskModel() {
val modelPath = File(filesDir, "vosk-model")
// Check if the directory exists before trying to load
@@ -146,4 +148,5 @@ class MainActivity : AppCompatActivity() {
statusText.text = "Error loading saved model: ${e.message}"
}
}
*/
}

View File

@@ -10,10 +10,14 @@ import androidx.appcompat.app.AppCompatActivity
import androidx.core.app.ActivityCompat
import androidx.core.content.ContextCompat
import org.json.JSONObject
import org.vosk.Model
import org.vosk.Recognizer
import org.vosk.android.RecognitionListener
import org.vosk.android.SpeechService
import com.k2fsa.sherpa.onnx.* // import for whisper sherpa wrapper
// import org.vosk.Model --- migration to whisper removals
// import org.vosk.Recognizer
// import org.vosk.android.RecognitionListener
// import org.vosk.android.SpeechService
import java.io.File
import android.content.Context
@@ -22,14 +26,20 @@ import com.hoho.android.usbserial.driver.UsbSerialPort
import com.hoho.android.usbserial.driver.UsbSerialProber
import com.hoho.android.usbserial.util.SerialInputOutputManager
class TestModelActivity : AppCompatActivity(), RecognitionListener {
// class TestModelActivity : AppCompatActivity(), RecognitionListener {
class TestModelActivity : AppCompatActivity() {
private lateinit var outputText: TextView
private lateinit var micButton: ImageButton
// Vosk Components
private var model: Model? = null
private var speechService: SpeechService? = null
// Whisper/Sherpa wrapper setup variables here
private var audioRecorder: AudioRecorder? = null // You'll need a new recorder helper
private var recognizer: OnlineRecognizer? = null
private var stream: OnlineStream? = null
// Vosk Components - now removed as whisper migration
// private var model: Model? = null
// private var speechService: SpeechService? = null
private var isListening = false
// USB Components
@@ -94,8 +104,82 @@ class TestModelActivity : AppCompatActivity(), RecognitionListener {
}
}
//Whisper/Sherpa implementation functions here....
private fun initSherpaModel() {
// Sherpa requires specific configuration
val config = OnlineRecognizerConfig(
featConfig = FeatureConfig(sampleRate = 16000.0f, featureDim = 80),
transducerModelConfig = OnlineTransducerModelConfig(
encoder = "$filesDir/encoder-epoch-99-avg-1.onnx", // Example path
decoder = "$filesDir/decoder-epoch-99-avg-1.onnx",
joiner = "$filesDir/joiner-epoch-99-avg-1.onnx",
),
enableEndpoint = true, // Detects when you stop speaking
ruleFsts = "",
decodingMethod = "greedy_search",
maxActivePaths = 4
)
private fun initModel() {
try {
recognizer = OnlineRecognizer(assetManager = assets, config = config)
stream = recognizer?.createStream()
outputText.text = "Whisper/Sherpa Ready!"
} catch (e: Exception) {
outputText.text = "Error: ${e.message}"
}
}
private fun startRecordingLoop() {
val sampleRate = 16000
val bufferSize = AudioRecord.getMinBufferSize(sampleRate, AudioFormat.CHANNEL_IN_MONO, AudioFormat.ENCODING_PCM_16BIT)
val record = AudioRecord(MediaRecorder.AudioSource.MIC, sampleRate, AudioFormat.CHANNEL_IN_MONO, AudioFormat.ENCODING_PCM_16BIT, bufferSize)
record.startRecording()
isListening = true
Thread {
val buffer = ShortArray(bufferSize / 2)
while (isListening) {
val read = record.read(buffer, 0, buffer.size)
if (read > 0) {
// 1. Feed audio to engine
val floatSamples = FloatArray(read) { buffer[it] / 32768.0f } // Normalize
stream?.acceptWaveform(floatSamples, sampleRate)
// 2. Decode
while (recognizer?.isReady(stream) == true) {
recognizer?.decode(stream)
}
// 3. Get Result
val result = recognizer?.getResult(stream)
val text = result?.text ?: ""
if (text.isNotEmpty()) {
// Update UI
runOnUiThread {
// Sherpa returns the FULL string so far, not just chunks
// So we just overwrite the "Current" view
val cleanText = text.lowercase()
outputText.text = cleanText
// Check if sentence is "Final" (Endpoint detected)
if (recognizer?.isEndpoint(stream) == true) {
sendToPico(cleanText)
recognizer?.reset(stream) // Clear buffer for next sentence
}
}
}
}
}
record.stop()
record.release()
}.start()
}
/* ---- removed as part of the whisper migration
private fun initModel() {
// We look for the folder inside private storage (same logic as MainActivity)
val modelPath = File(filesDir, "vosk-model")
@@ -121,7 +205,7 @@ class TestModelActivity : AppCompatActivity(), RecognitionListener {
}
}.start()
}
*/
private fun sendToPico(text: String) {
if (usbPort == null) return // Safety check
@@ -173,6 +257,7 @@ class TestModelActivity : AppCompatActivity(), RecognitionListener {
}
// --- Vosk Listener Callbacks ---
/* removed as part of migration to whisper
override fun onResult(hypothesis: String?) {
hypothesis?.let {
@@ -205,6 +290,7 @@ class TestModelActivity : AppCompatActivity(), RecognitionListener {
}
}
}
*/
override fun onFinalResult(hypothesis: String?) {
// Final flush when stopping
@@ -215,7 +301,7 @@ class TestModelActivity : AppCompatActivity(), RecognitionListener {
}
}
}
/* Whispoer migration removals
override fun onError(exception: Exception?) {
outputText.append("\nError: ${exception?.message}")
}
@@ -224,15 +310,7 @@ class TestModelActivity : AppCompatActivity(), RecognitionListener {
outputText.append("\nTimeout.")
}
// Helper to clean JSON: {"text": "hello world"} -> "hello world"
private fun parseVoskResult(json: String): String {
return try {
JSONObject(json).optString("text", "")
} catch (e: Exception) {
""
}
}
*/
// Permission Helper
private fun checkAudioPermission() {
if (ContextCompat.checkSelfPermission(this, Manifest.permission.RECORD_AUDIO) != PackageManager.PERMISSION_GRANTED) {