Initial steps to migrate to the whisper/sherpa setup..
Still missing majot changes to the TestModelActivity.kt file which are coming next...
This commit is contained in:
@@ -47,12 +47,17 @@ dependencies {
|
|||||||
androidTestImplementation(libs.androidx.espresso.core)
|
androidTestImplementation(libs.androidx.espresso.core)
|
||||||
|
|
||||||
// 1. The "Brain": Vosk Offline Speech Recognition
|
// 1. The "Brain": Vosk Offline Speech Recognition
|
||||||
implementation("com.alphacephei:vosk-android:0.3.47")
|
//implementation("com.alphacephei:vosk-android:0.3.47") - removed as part of the migration to whisper
|
||||||
|
|
||||||
// (Optional) Helper for memory management if needed later
|
// (Optional) Helper for memory management if needed later
|
||||||
// Removed the following as it was listed as optional and it did cause errors -
|
// Removed the following as it was listed as optional and it did cause errors -
|
||||||
// so to avoid a whole list of duplicate class found errors - this is already required via the VOSK libraries
|
// so to avoid a whole list of duplicate class found errors - this is already required via the VOSK libraries
|
||||||
// implementation("net.java.dev.jna:jna:5.13.0")
|
// implementation("net.java.dev.jna:jna:5.13.0")
|
||||||
|
|
||||||
|
// New Whisper include...
|
||||||
|
// implementation("com.k2fsa.sherpa.onnx:sherpa-onnx:1.12.23") // The engine
|
||||||
|
implementation("com.github.k2-fsa:sherpa-onnx:v1.12.23")
|
||||||
|
|
||||||
// 2. The "Mouth": USB Serial Driver for Android
|
// 2. The "Mouth": USB Serial Driver for Android
|
||||||
implementation("com.github.mik3y:usb-serial-for-android:3.7.0")
|
implementation("com.github.mik3y:usb-serial-for-android:3.7.0")
|
||||||
}
|
}
|
||||||
@@ -9,8 +9,8 @@ import android.widget.TextView
|
|||||||
import android.widget.Toast
|
import android.widget.Toast
|
||||||
import androidx.activity.result.contract.ActivityResultContracts
|
import androidx.activity.result.contract.ActivityResultContracts
|
||||||
import androidx.appcompat.app.AppCompatActivity
|
import androidx.appcompat.app.AppCompatActivity
|
||||||
import org.vosk.Model
|
// import org.vosk.Model
|
||||||
import org.vosk.android.SpeechService
|
// import org.vosk.android.SpeechService --- removed as part of migratoin to whisper.cpp
|
||||||
import java.io.File
|
import java.io.File
|
||||||
import java.util.zip.ZipInputStream
|
import java.util.zip.ZipInputStream
|
||||||
|
|
||||||
@@ -19,9 +19,9 @@ class MainActivity : AppCompatActivity() {
|
|||||||
// UI Components
|
// UI Components
|
||||||
private lateinit var statusText: TextView
|
private lateinit var statusText: TextView
|
||||||
|
|
||||||
// Vosk Components
|
// Vosk Components - removed as part of whisper migration
|
||||||
private var model: Model? = null
|
// private var model: Model? = null
|
||||||
private var speechService: SpeechService? = null
|
// private var speechService: SpeechService? = null
|
||||||
|
|
||||||
// 1. THE FILE PICKER REGISTRY
|
// 1. THE FILE PICKER REGISTRY
|
||||||
// This handles the result when the user picks a ZIP file
|
// This handles the result when the user picks a ZIP file
|
||||||
@@ -43,7 +43,7 @@ class MainActivity : AppCompatActivity() {
|
|||||||
|
|
||||||
// ADD THIS LINE AT THE BOTTOM:
|
// ADD THIS LINE AT THE BOTTOM:
|
||||||
// This attempts to load the model immediately if files exist
|
// This attempts to load the model immediately if files exist
|
||||||
initVoskModel()
|
// initVoskModel() - removed as part of whisper migration
|
||||||
}
|
}
|
||||||
|
|
||||||
// 2. SETUP THE MENU
|
// 2. SETUP THE MENU
|
||||||
@@ -113,7 +113,7 @@ class MainActivity : AppCompatActivity() {
|
|||||||
// Back to UI Thread to say success
|
// Back to UI Thread to say success
|
||||||
runOnUiThread {
|
runOnUiThread {
|
||||||
statusText.text = "Model Installed! Initializing..."
|
statusText.text = "Model Installed! Initializing..."
|
||||||
initVoskModel()
|
// initVoskModel() - removed as part of the whisper migration
|
||||||
}
|
}
|
||||||
|
|
||||||
} catch (e: Exception) {
|
} catch (e: Exception) {
|
||||||
@@ -127,7 +127,9 @@ class MainActivity : AppCompatActivity() {
|
|||||||
|
|
||||||
// 6. INITIALIZE VOSK "BRAIN"
|
// 6. INITIALIZE VOSK "BRAIN"
|
||||||
// Replace your existing initVoskModel with this updated version
|
// Replace your existing initVoskModel with this updated version
|
||||||
|
/*
|
||||||
private fun initVoskModel() {
|
private fun initVoskModel() {
|
||||||
|
|
||||||
val modelPath = File(filesDir, "vosk-model")
|
val modelPath = File(filesDir, "vosk-model")
|
||||||
|
|
||||||
// Check if the directory exists before trying to load
|
// Check if the directory exists before trying to load
|
||||||
@@ -146,4 +148,5 @@ class MainActivity : AppCompatActivity() {
|
|||||||
statusText.text = "Error loading saved model: ${e.message}"
|
statusText.text = "Error loading saved model: ${e.message}"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
}
|
}
|
||||||
@@ -10,10 +10,14 @@ import androidx.appcompat.app.AppCompatActivity
|
|||||||
import androidx.core.app.ActivityCompat
|
import androidx.core.app.ActivityCompat
|
||||||
import androidx.core.content.ContextCompat
|
import androidx.core.content.ContextCompat
|
||||||
import org.json.JSONObject
|
import org.json.JSONObject
|
||||||
import org.vosk.Model
|
|
||||||
import org.vosk.Recognizer
|
import com.k2fsa.sherpa.onnx.* // import for whisper sherpa wrapper
|
||||||
import org.vosk.android.RecognitionListener
|
|
||||||
import org.vosk.android.SpeechService
|
// import org.vosk.Model --- migration to whisper removals
|
||||||
|
// import org.vosk.Recognizer
|
||||||
|
// import org.vosk.android.RecognitionListener
|
||||||
|
// import org.vosk.android.SpeechService
|
||||||
|
|
||||||
import java.io.File
|
import java.io.File
|
||||||
|
|
||||||
import android.content.Context
|
import android.content.Context
|
||||||
@@ -22,14 +26,20 @@ import com.hoho.android.usbserial.driver.UsbSerialPort
|
|||||||
import com.hoho.android.usbserial.driver.UsbSerialProber
|
import com.hoho.android.usbserial.driver.UsbSerialProber
|
||||||
import com.hoho.android.usbserial.util.SerialInputOutputManager
|
import com.hoho.android.usbserial.util.SerialInputOutputManager
|
||||||
|
|
||||||
class TestModelActivity : AppCompatActivity(), RecognitionListener {
|
// class TestModelActivity : AppCompatActivity(), RecognitionListener {
|
||||||
|
class TestModelActivity : AppCompatActivity() {
|
||||||
|
|
||||||
private lateinit var outputText: TextView
|
private lateinit var outputText: TextView
|
||||||
private lateinit var micButton: ImageButton
|
private lateinit var micButton: ImageButton
|
||||||
|
|
||||||
// Vosk Components
|
// Whisper/Sherpa wrapper setup variables here
|
||||||
private var model: Model? = null
|
private var audioRecorder: AudioRecorder? = null // You'll need a new recorder helper
|
||||||
private var speechService: SpeechService? = null
|
private var recognizer: OnlineRecognizer? = null
|
||||||
|
private var stream: OnlineStream? = null
|
||||||
|
|
||||||
|
// Vosk Components - now removed as whisper migration
|
||||||
|
// private var model: Model? = null
|
||||||
|
// private var speechService: SpeechService? = null
|
||||||
private var isListening = false
|
private var isListening = false
|
||||||
|
|
||||||
// USB Components
|
// USB Components
|
||||||
@@ -94,7 +104,81 @@ class TestModelActivity : AppCompatActivity(), RecognitionListener {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//Whisper/Sherpa implementation functions here....
|
||||||
|
private fun initSherpaModel() {
|
||||||
|
// Sherpa requires specific configuration
|
||||||
|
val config = OnlineRecognizerConfig(
|
||||||
|
featConfig = FeatureConfig(sampleRate = 16000.0f, featureDim = 80),
|
||||||
|
transducerModelConfig = OnlineTransducerModelConfig(
|
||||||
|
encoder = "$filesDir/encoder-epoch-99-avg-1.onnx", // Example path
|
||||||
|
decoder = "$filesDir/decoder-epoch-99-avg-1.onnx",
|
||||||
|
joiner = "$filesDir/joiner-epoch-99-avg-1.onnx",
|
||||||
|
),
|
||||||
|
enableEndpoint = true, // Detects when you stop speaking
|
||||||
|
ruleFsts = "",
|
||||||
|
decodingMethod = "greedy_search",
|
||||||
|
maxActivePaths = 4
|
||||||
|
)
|
||||||
|
|
||||||
|
try {
|
||||||
|
recognizer = OnlineRecognizer(assetManager = assets, config = config)
|
||||||
|
stream = recognizer?.createStream()
|
||||||
|
outputText.text = "Whisper/Sherpa Ready!"
|
||||||
|
} catch (e: Exception) {
|
||||||
|
outputText.text = "Error: ${e.message}"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun startRecordingLoop() {
|
||||||
|
val sampleRate = 16000
|
||||||
|
val bufferSize = AudioRecord.getMinBufferSize(sampleRate, AudioFormat.CHANNEL_IN_MONO, AudioFormat.ENCODING_PCM_16BIT)
|
||||||
|
val record = AudioRecord(MediaRecorder.AudioSource.MIC, sampleRate, AudioFormat.CHANNEL_IN_MONO, AudioFormat.ENCODING_PCM_16BIT, bufferSize)
|
||||||
|
|
||||||
|
record.startRecording()
|
||||||
|
isListening = true
|
||||||
|
|
||||||
|
Thread {
|
||||||
|
val buffer = ShortArray(bufferSize / 2)
|
||||||
|
while (isListening) {
|
||||||
|
val read = record.read(buffer, 0, buffer.size)
|
||||||
|
if (read > 0) {
|
||||||
|
// 1. Feed audio to engine
|
||||||
|
val floatSamples = FloatArray(read) { buffer[it] / 32768.0f } // Normalize
|
||||||
|
stream?.acceptWaveform(floatSamples, sampleRate)
|
||||||
|
|
||||||
|
// 2. Decode
|
||||||
|
while (recognizer?.isReady(stream) == true) {
|
||||||
|
recognizer?.decode(stream)
|
||||||
|
}
|
||||||
|
|
||||||
|
// 3. Get Result
|
||||||
|
val result = recognizer?.getResult(stream)
|
||||||
|
val text = result?.text ?: ""
|
||||||
|
|
||||||
|
if (text.isNotEmpty()) {
|
||||||
|
// Update UI
|
||||||
|
runOnUiThread {
|
||||||
|
// Sherpa returns the FULL string so far, not just chunks
|
||||||
|
// So we just overwrite the "Current" view
|
||||||
|
val cleanText = text.lowercase()
|
||||||
|
outputText.text = cleanText
|
||||||
|
|
||||||
|
// Check if sentence is "Final" (Endpoint detected)
|
||||||
|
if (recognizer?.isEndpoint(stream) == true) {
|
||||||
|
sendToPico(cleanText)
|
||||||
|
recognizer?.reset(stream) // Clear buffer for next sentence
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
record.stop()
|
||||||
|
record.release()
|
||||||
|
}.start()
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* ---- removed as part of the whisper migration
|
||||||
private fun initModel() {
|
private fun initModel() {
|
||||||
// We look for the folder inside private storage (same logic as MainActivity)
|
// We look for the folder inside private storage (same logic as MainActivity)
|
||||||
val modelPath = File(filesDir, "vosk-model")
|
val modelPath = File(filesDir, "vosk-model")
|
||||||
@@ -121,7 +205,7 @@ class TestModelActivity : AppCompatActivity(), RecognitionListener {
|
|||||||
}
|
}
|
||||||
}.start()
|
}.start()
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
private fun sendToPico(text: String) {
|
private fun sendToPico(text: String) {
|
||||||
if (usbPort == null) return // Safety check
|
if (usbPort == null) return // Safety check
|
||||||
|
|
||||||
@@ -173,6 +257,7 @@ class TestModelActivity : AppCompatActivity(), RecognitionListener {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// --- Vosk Listener Callbacks ---
|
// --- Vosk Listener Callbacks ---
|
||||||
|
/* removed as part of migration to whisper
|
||||||
|
|
||||||
override fun onResult(hypothesis: String?) {
|
override fun onResult(hypothesis: String?) {
|
||||||
hypothesis?.let {
|
hypothesis?.let {
|
||||||
@@ -205,6 +290,7 @@ class TestModelActivity : AppCompatActivity(), RecognitionListener {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
override fun onFinalResult(hypothesis: String?) {
|
override fun onFinalResult(hypothesis: String?) {
|
||||||
// Final flush when stopping
|
// Final flush when stopping
|
||||||
@@ -215,7 +301,7 @@ class TestModelActivity : AppCompatActivity(), RecognitionListener {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
/* Whispoer migration removals
|
||||||
override fun onError(exception: Exception?) {
|
override fun onError(exception: Exception?) {
|
||||||
outputText.append("\nError: ${exception?.message}")
|
outputText.append("\nError: ${exception?.message}")
|
||||||
}
|
}
|
||||||
@@ -224,15 +310,7 @@ class TestModelActivity : AppCompatActivity(), RecognitionListener {
|
|||||||
outputText.append("\nTimeout.")
|
outputText.append("\nTimeout.")
|
||||||
}
|
}
|
||||||
|
|
||||||
// Helper to clean JSON: {"text": "hello world"} -> "hello world"
|
*/
|
||||||
private fun parseVoskResult(json: String): String {
|
|
||||||
return try {
|
|
||||||
JSONObject(json).optString("text", "")
|
|
||||||
} catch (e: Exception) {
|
|
||||||
""
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Permission Helper
|
// Permission Helper
|
||||||
private fun checkAudioPermission() {
|
private fun checkAudioPermission() {
|
||||||
if (ContextCompat.checkSelfPermission(this, Manifest.permission.RECORD_AUDIO) != PackageManager.PERMISSION_GRANTED) {
|
if (ContextCompat.checkSelfPermission(this, Manifest.permission.RECORD_AUDIO) != PackageManager.PERMISSION_GRANTED) {
|
||||||
|
|||||||
@@ -10,6 +10,7 @@ material = "1.13.0"
|
|||||||
activity = "1.12.2"
|
activity = "1.12.2"
|
||||||
constraintlayout = "2.2.1"
|
constraintlayout = "2.2.1"
|
||||||
|
|
||||||
|
|
||||||
[libraries]
|
[libraries]
|
||||||
androidx-core-ktx = { group = "androidx.core", name = "core-ktx", version.ref = "coreKtx" }
|
androidx-core-ktx = { group = "androidx.core", name = "core-ktx", version.ref = "coreKtx" }
|
||||||
junit = { group = "junit", name = "junit", version.ref = "junit" }
|
junit = { group = "junit", name = "junit", version.ref = "junit" }
|
||||||
|
|||||||
Reference in New Issue
Block a user