Initial steps to migrate to the whisper/sherpa setup..
Still missing majot changes to the TestModelActivity.kt file which are coming next...
This commit is contained in:
@@ -47,12 +47,17 @@ dependencies {
|
||||
androidTestImplementation(libs.androidx.espresso.core)
|
||||
|
||||
// 1. The "Brain": Vosk Offline Speech Recognition
|
||||
implementation("com.alphacephei:vosk-android:0.3.47")
|
||||
//implementation("com.alphacephei:vosk-android:0.3.47") - removed as part of the migration to whisper
|
||||
|
||||
// (Optional) Helper for memory management if needed later
|
||||
// Removed the following as it was listed as optional and it did cause errors -
|
||||
// so to avoid a whole list of duplicate class found errors - this is already required via the VOSK libraries
|
||||
// implementation("net.java.dev.jna:jna:5.13.0")
|
||||
|
||||
// New Whisper include...
|
||||
// implementation("com.k2fsa.sherpa.onnx:sherpa-onnx:1.12.23") // The engine
|
||||
implementation("com.github.k2-fsa:sherpa-onnx:v1.12.23")
|
||||
|
||||
// 2. The "Mouth": USB Serial Driver for Android
|
||||
implementation("com.github.mik3y:usb-serial-for-android:3.7.0")
|
||||
}
|
||||
@@ -9,8 +9,8 @@ import android.widget.TextView
|
||||
import android.widget.Toast
|
||||
import androidx.activity.result.contract.ActivityResultContracts
|
||||
import androidx.appcompat.app.AppCompatActivity
|
||||
import org.vosk.Model
|
||||
import org.vosk.android.SpeechService
|
||||
// import org.vosk.Model
|
||||
// import org.vosk.android.SpeechService --- removed as part of migratoin to whisper.cpp
|
||||
import java.io.File
|
||||
import java.util.zip.ZipInputStream
|
||||
|
||||
@@ -19,9 +19,9 @@ class MainActivity : AppCompatActivity() {
|
||||
// UI Components
|
||||
private lateinit var statusText: TextView
|
||||
|
||||
// Vosk Components
|
||||
private var model: Model? = null
|
||||
private var speechService: SpeechService? = null
|
||||
// Vosk Components - removed as part of whisper migration
|
||||
// private var model: Model? = null
|
||||
// private var speechService: SpeechService? = null
|
||||
|
||||
// 1. THE FILE PICKER REGISTRY
|
||||
// This handles the result when the user picks a ZIP file
|
||||
@@ -43,7 +43,7 @@ class MainActivity : AppCompatActivity() {
|
||||
|
||||
// ADD THIS LINE AT THE BOTTOM:
|
||||
// This attempts to load the model immediately if files exist
|
||||
initVoskModel()
|
||||
// initVoskModel() - removed as part of whisper migration
|
||||
}
|
||||
|
||||
// 2. SETUP THE MENU
|
||||
@@ -113,7 +113,7 @@ class MainActivity : AppCompatActivity() {
|
||||
// Back to UI Thread to say success
|
||||
runOnUiThread {
|
||||
statusText.text = "Model Installed! Initializing..."
|
||||
initVoskModel()
|
||||
// initVoskModel() - removed as part of the whisper migration
|
||||
}
|
||||
|
||||
} catch (e: Exception) {
|
||||
@@ -127,7 +127,9 @@ class MainActivity : AppCompatActivity() {
|
||||
|
||||
// 6. INITIALIZE VOSK "BRAIN"
|
||||
// Replace your existing initVoskModel with this updated version
|
||||
/*
|
||||
private fun initVoskModel() {
|
||||
|
||||
val modelPath = File(filesDir, "vosk-model")
|
||||
|
||||
// Check if the directory exists before trying to load
|
||||
@@ -146,4 +148,5 @@ class MainActivity : AppCompatActivity() {
|
||||
statusText.text = "Error loading saved model: ${e.message}"
|
||||
}
|
||||
}
|
||||
*/
|
||||
}
|
||||
@@ -10,10 +10,14 @@ import androidx.appcompat.app.AppCompatActivity
|
||||
import androidx.core.app.ActivityCompat
|
||||
import androidx.core.content.ContextCompat
|
||||
import org.json.JSONObject
|
||||
import org.vosk.Model
|
||||
import org.vosk.Recognizer
|
||||
import org.vosk.android.RecognitionListener
|
||||
import org.vosk.android.SpeechService
|
||||
|
||||
import com.k2fsa.sherpa.onnx.* // import for whisper sherpa wrapper
|
||||
|
||||
// import org.vosk.Model --- migration to whisper removals
|
||||
// import org.vosk.Recognizer
|
||||
// import org.vosk.android.RecognitionListener
|
||||
// import org.vosk.android.SpeechService
|
||||
|
||||
import java.io.File
|
||||
|
||||
import android.content.Context
|
||||
@@ -22,14 +26,20 @@ import com.hoho.android.usbserial.driver.UsbSerialPort
|
||||
import com.hoho.android.usbserial.driver.UsbSerialProber
|
||||
import com.hoho.android.usbserial.util.SerialInputOutputManager
|
||||
|
||||
class TestModelActivity : AppCompatActivity(), RecognitionListener {
|
||||
// class TestModelActivity : AppCompatActivity(), RecognitionListener {
|
||||
class TestModelActivity : AppCompatActivity() {
|
||||
|
||||
private lateinit var outputText: TextView
|
||||
private lateinit var micButton: ImageButton
|
||||
|
||||
// Vosk Components
|
||||
private var model: Model? = null
|
||||
private var speechService: SpeechService? = null
|
||||
// Whisper/Sherpa wrapper setup variables here
|
||||
private var audioRecorder: AudioRecorder? = null // You'll need a new recorder helper
|
||||
private var recognizer: OnlineRecognizer? = null
|
||||
private var stream: OnlineStream? = null
|
||||
|
||||
// Vosk Components - now removed as whisper migration
|
||||
// private var model: Model? = null
|
||||
// private var speechService: SpeechService? = null
|
||||
private var isListening = false
|
||||
|
||||
// USB Components
|
||||
@@ -94,8 +104,82 @@ class TestModelActivity : AppCompatActivity(), RecognitionListener {
|
||||
}
|
||||
}
|
||||
|
||||
//Whisper/Sherpa implementation functions here....
|
||||
private fun initSherpaModel() {
|
||||
// Sherpa requires specific configuration
|
||||
val config = OnlineRecognizerConfig(
|
||||
featConfig = FeatureConfig(sampleRate = 16000.0f, featureDim = 80),
|
||||
transducerModelConfig = OnlineTransducerModelConfig(
|
||||
encoder = "$filesDir/encoder-epoch-99-avg-1.onnx", // Example path
|
||||
decoder = "$filesDir/decoder-epoch-99-avg-1.onnx",
|
||||
joiner = "$filesDir/joiner-epoch-99-avg-1.onnx",
|
||||
),
|
||||
enableEndpoint = true, // Detects when you stop speaking
|
||||
ruleFsts = "",
|
||||
decodingMethod = "greedy_search",
|
||||
maxActivePaths = 4
|
||||
)
|
||||
|
||||
private fun initModel() {
|
||||
try {
|
||||
recognizer = OnlineRecognizer(assetManager = assets, config = config)
|
||||
stream = recognizer?.createStream()
|
||||
outputText.text = "Whisper/Sherpa Ready!"
|
||||
} catch (e: Exception) {
|
||||
outputText.text = "Error: ${e.message}"
|
||||
}
|
||||
}
|
||||
|
||||
private fun startRecordingLoop() {
|
||||
val sampleRate = 16000
|
||||
val bufferSize = AudioRecord.getMinBufferSize(sampleRate, AudioFormat.CHANNEL_IN_MONO, AudioFormat.ENCODING_PCM_16BIT)
|
||||
val record = AudioRecord(MediaRecorder.AudioSource.MIC, sampleRate, AudioFormat.CHANNEL_IN_MONO, AudioFormat.ENCODING_PCM_16BIT, bufferSize)
|
||||
|
||||
record.startRecording()
|
||||
isListening = true
|
||||
|
||||
Thread {
|
||||
val buffer = ShortArray(bufferSize / 2)
|
||||
while (isListening) {
|
||||
val read = record.read(buffer, 0, buffer.size)
|
||||
if (read > 0) {
|
||||
// 1. Feed audio to engine
|
||||
val floatSamples = FloatArray(read) { buffer[it] / 32768.0f } // Normalize
|
||||
stream?.acceptWaveform(floatSamples, sampleRate)
|
||||
|
||||
// 2. Decode
|
||||
while (recognizer?.isReady(stream) == true) {
|
||||
recognizer?.decode(stream)
|
||||
}
|
||||
|
||||
// 3. Get Result
|
||||
val result = recognizer?.getResult(stream)
|
||||
val text = result?.text ?: ""
|
||||
|
||||
if (text.isNotEmpty()) {
|
||||
// Update UI
|
||||
runOnUiThread {
|
||||
// Sherpa returns the FULL string so far, not just chunks
|
||||
// So we just overwrite the "Current" view
|
||||
val cleanText = text.lowercase()
|
||||
outputText.text = cleanText
|
||||
|
||||
// Check if sentence is "Final" (Endpoint detected)
|
||||
if (recognizer?.isEndpoint(stream) == true) {
|
||||
sendToPico(cleanText)
|
||||
recognizer?.reset(stream) // Clear buffer for next sentence
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
record.stop()
|
||||
record.release()
|
||||
}.start()
|
||||
}
|
||||
|
||||
|
||||
/* ---- removed as part of the whisper migration
|
||||
private fun initModel() {
|
||||
// We look for the folder inside private storage (same logic as MainActivity)
|
||||
val modelPath = File(filesDir, "vosk-model")
|
||||
|
||||
@@ -121,7 +205,7 @@ class TestModelActivity : AppCompatActivity(), RecognitionListener {
|
||||
}
|
||||
}.start()
|
||||
}
|
||||
|
||||
*/
|
||||
private fun sendToPico(text: String) {
|
||||
if (usbPort == null) return // Safety check
|
||||
|
||||
@@ -173,6 +257,7 @@ class TestModelActivity : AppCompatActivity(), RecognitionListener {
|
||||
}
|
||||
|
||||
// --- Vosk Listener Callbacks ---
|
||||
/* removed as part of migration to whisper
|
||||
|
||||
override fun onResult(hypothesis: String?) {
|
||||
hypothesis?.let {
|
||||
@@ -205,6 +290,7 @@ class TestModelActivity : AppCompatActivity(), RecognitionListener {
|
||||
}
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
override fun onFinalResult(hypothesis: String?) {
|
||||
// Final flush when stopping
|
||||
@@ -215,7 +301,7 @@ class TestModelActivity : AppCompatActivity(), RecognitionListener {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Whispoer migration removals
|
||||
override fun onError(exception: Exception?) {
|
||||
outputText.append("\nError: ${exception?.message}")
|
||||
}
|
||||
@@ -224,15 +310,7 @@ class TestModelActivity : AppCompatActivity(), RecognitionListener {
|
||||
outputText.append("\nTimeout.")
|
||||
}
|
||||
|
||||
// Helper to clean JSON: {"text": "hello world"} -> "hello world"
|
||||
private fun parseVoskResult(json: String): String {
|
||||
return try {
|
||||
JSONObject(json).optString("text", "")
|
||||
} catch (e: Exception) {
|
||||
""
|
||||
}
|
||||
}
|
||||
|
||||
*/
|
||||
// Permission Helper
|
||||
private fun checkAudioPermission() {
|
||||
if (ContextCompat.checkSelfPermission(this, Manifest.permission.RECORD_AUDIO) != PackageManager.PERMISSION_GRANTED) {
|
||||
|
||||
Reference in New Issue
Block a user