Currently confirmed offline model download and communications are working. Next step is to build the JSON schema for local models to understand tool calling

2026-02-28 14:20:22 +11:00
parent 93a2c48e4b
commit d4322740e2
5 changed files with 46 additions and 26 deletions
@@ -4,10 +4,10 @@
    <selectionStates>
      <SelectionState runConfigName="app">
        <option name="selectionMode" value="DROPDOWN" />
-        <DropdownSelection timestamp="2026-02-25T23:24:39.552459762Z">
+        <DropdownSelection timestamp="2026-02-28T03:12:27.182833316Z">
          <Target type="DEFAULT_BOOT">
            <handle>
-              <DeviceId pluginId="LocalEmulator" identifier="path=/home/michael/.android/avd/Pixel_8_API_35.avd" />
+              <DeviceId pluginId="PhysicalDevice" identifier="serial=461ed66e" />
            </handle>
          </Target>
        </DropdownSelection>
@@ -1,5 +1,6 @@
 package net.mmanningau.alice

+import com.llamatik.library.platform.LlamaBridge // The correct Llamatik import!
 import dev.langchain4j.data.message.AiMessage
 import dev.langchain4j.data.message.ChatMessage
 import dev.langchain4j.data.message.SystemMessage
@@ -8,9 +9,10 @@ import dev.langchain4j.model.chat.ChatLanguageModel
 import dev.langchain4j.model.output.Response
 import java.io.File

+import dev.langchain4j.agent.tool.ToolSpecification
+
 class LlamaCppAdapter(private val modelPath: String) : ChatLanguageModel {

-    // We will initialize the native C++ engine here once the model is downloaded
    private var isEngineLoaded = false

    private fun getOrInitEngine() {
@@ -19,35 +21,43 @@ class LlamaCppAdapter(private val modelPath: String) : ChatLanguageModel {
            if (!modelFile.exists()) {
                throw IllegalStateException("Model file not found at: $modelPath. Please download a model first.")
            }
-            // TODO: In the next step, we will call Llamatik's native init function here
-            // to load the .gguf file into the device's memory.
+
+            // Boot the native C++ backend via Llamatik's Kotlin bridge
+            LlamaBridge.initGenerateModel(modelPath)
            isEngineLoaded = true
        }
    }

    override fun generate(messages: List<ChatMessage>): Response<AiMessage> {
-        // Ensure the engine is booted
        getOrInitEngine()

-        // 1. Translation IN: Convert LangChain4j messages into a raw text prompt format
-        // Local models usually need a specific prompt template (like ChatML).
-        // We will build a basic conversational wrapper here.
+        // 1. Translation IN: Format specifically for Qwen 2.5 (ChatML)
        val promptBuilder = java.lang.StringBuilder()
        for (message in messages) {
            when (message) {
-                is SystemMessage -> promptBuilder.append("System: ${message.text()}\n")
-                is UserMessage -> promptBuilder.append("User: ${message.text()}\n")
-                is AiMessage -> promptBuilder.append("Alice: ${message.text()}\n")
+                is SystemMessage -> promptBuilder.append("<|im_start|>system\n${message.text()}<|im_end|>\n")
+                is UserMessage -> promptBuilder.append("<|im_start|>user\n${message.text()}<|im_end|>\n")
+                is AiMessage -> promptBuilder.append("<|im_start|>assistant\n${message.text()}<|im_end|>\n")
            }
        }
-        promptBuilder.append("Alice: ")
+        // Prompt the AI to start generating its response
+        promptBuilder.append("<|im_start|>assistant\n")

-        // 2. Execution: (Placeholder until we wire the specific Llamatik execution call)
-        // val responseText = LlamatikEngine.generate(promptBuilder.toString())
-        val responseText = "This is a placeholder response from the local Llama.cpp engine!"
+        // 2. Execution: Run it on the local hardware using Llamatik
+        val responseText = LlamaBridge.generate(promptBuilder.toString())

-        // 3. Translation OUT: Wrap the raw string back into LangChain4j's format
-        val finalAiMessage = AiMessage(responseText)
-        return Response.from(finalAiMessage)
+        // 3. Translation OUT: Clean up any trailing ChatML tags the engine might leave behind
+        val cleanResponse = responseText.replace("<|im_end|>", "").trim()
+
+        return Response.from(AiMessage(cleanResponse))
+    }
+    // This catches LangChain4j when it tries to send tools to our local engine
+    override fun generate(
+        messages: List<ChatMessage>,
+        toolSpecifications: List<ToolSpecification>
+    ): Response<AiMessage> {
+        // For Phase 1, we simply ignore the tools and route it to our standard text generator
+        // so we can prove the local GPU engine is successfully generating tokens!
+        return generate(messages)
    }
 }
@@ -11,6 +11,7 @@ import java.time.Duration
 import java.text.SimpleDateFormat
 import java.util.Date
 import java.util.Locale
+import android.content.Context

 object LlmManager {

@@ -28,7 +29,7 @@ object LlmManager {

    // Initialization now makes the dao optional so the UI can safely call it!
    fun initialize(
-        dao: ChatDao?, mode: String, url: String, modelName: String, apiKey: String, systemPrompt: String
+        context: Context,dao: ChatDao?, mode: String, url: String, modelName: String, apiKey: String, systemPrompt: String
    ) {
        // Only update the DAO if one was passed in (like on app boot)
        if (dao != null) {
@@ -50,8 +51,10 @@ object LlmManager {
                .logRequests(true)
                .logResponses(true)
                .build()
-        } else {
-            chatModel = null // MLC Engine goes here later!
+        } else if (mode == "Local") {
+            // NEW: Grab the absolute path from the registry and boot the middleman!
+            val fullPath = ModelRegistry.getModelPath(context, modelName)
+            chatModel = LlamaCppAdapter(fullPath)
        }

        // Database Startup Logic
@@ -102,7 +105,7 @@ object LlmManager {
    }

    fun chat(userText: String): String {
-        if (currentMode == "MLC") return "System: MLC LLM On-Device engine is selected but not yet installed."
+        if (currentMode == "Local" && chatModel == null) return "System: Llamatik Ollama On-Device engine is selected but not yet installed."
        val currentModel = chatModel ?: return "Error: LLM engine not initialized."

        // If the history size is 1, it means only the System prompt exists. This is the first message!
@@ -201,7 +201,7 @@ fun MainChatScreen() {
                                            val response = LlmManager.chat(userText)
                                            messages = messages + ChatMessage(response, false)
                                        } catch (e: Exception) {
-                                            messages = messages + ChatMessage("Connection Error: Is the local LLM server running?", false)
+                                            messages = messages + ChatMessage("System Error: ${e.message}", false)
                                        }
                                    }
                                }
@@ -387,7 +387,7 @@ fun SettingsScreen(onBackClicked: () -> Unit) {
                        .putString("systemPrompt", systemPrompt)
                        .apply()

-                    LlmManager.initialize(null, llmMode, llmUrl, modelName, apiKey, systemPrompt)
+                    LlmManager.initialize(context, null, llmMode, llmUrl, modelName, apiKey, systemPrompt)
                    SkillManager.updateDirectory(skillsPath)

                    onBackClicked()
@@ -481,6 +481,13 @@ fun ModelManagerScreen(onBackClicked: () -> Unit) {
                                    // Save the exact filename so LlmManager knows which one to boot up
                                    prefs.edit().putString("modelName", model.fileName).apply()
                                    activeModelName = model.fileName
+
+                                    // NEW: Hot-reload the LlmManager instantly!
+                                    val mode = prefs.getString("llmMode", "Local") ?: "Local"
+                                    val url = prefs.getString("llmUrl", "") ?: ""
+                                    val apiKey = prefs.getString("apiKey", "") ?: ""
+                                    val prompt = prefs.getString("systemPrompt", "You are a helpful AI assistant.") ?: "You are a helpful AI assistant."
+                                    LlmManager.initialize(context, null, mode, url, model.fileName, apiKey, prompt)
                                },
                                modifier = Modifier.fillMaxWidth(),
                                colors = ButtonDefaults.buttonColors(
@@ -38,6 +38,6 @@ class AliceApp : Application() {
        ).allowMainThreadQueries().build() // We use allowMainThreadQueries for immediate boot loading

        // Pass the DAO into the manager!
-        LlmManager.initialize(db.chatDao(), savedMode, savedUrl, savedModel, savedApiKey, savedSystemPrompt)
+        LlmManager.initialize(this,db.chatDao(), savedMode, savedUrl, savedModel, savedApiKey, savedSystemPrompt)
    }
 }