macha-autonomous/chat.py

#!/usr/bin/env python3
"""
Interactive chat interface with Macha AI agent.
Allows conversational interaction and directive execution.
"""

import json
import os
import subprocess
import sys
from datetime import datetime
from pathlib import Path
from typing import List, Dict, Any

# Add parent directory to path for imports
sys.path.insert(0, str(Path(__file__).parent))

from agent import MachaAgent


class MachaChatSession:
    """Interactive chat session with Macha"""

    def __init__(self):
        self.agent = MachaAgent(use_queue=True, priority="INTERACTIVE")
        self.conversation_history: List[Dict[str, str]] = []
        self.session_start = datetime.now().isoformat()

    def _create_chat_prompt(self, user_message: str) -> str:
        """Create a prompt for the chat session"""

        # Build conversation context
        context = ""
        if self.conversation_history:
            context = "\n\nCONVERSATION HISTORY:\n"
            for entry in self.conversation_history[-10:]:  # Last 10 messages
                role = entry['role'].upper()
                msg = entry['message']
                context += f"{role}: {msg}\n"

        prompt = f"""{MachaAgent.SYSTEM_PROMPT}

TASK: INTERACTIVE CHAT SESSION

You are in an interactive chat session with the system administrator.
You can have a natural conversation and execute commands when directed.

CAPABILITIES:
- Answer questions about system status
- Explain configurations and issues
- Execute commands when explicitly asked
- Provide guidance and recommendations

COMMAND EXECUTION:
When the user asks you to run a command or perform an action that requires execution:
1. Respond with a JSON object containing the command to execute
2. Format: {{"action": "execute", "command": "the command", "explanation": "why you're running it"}}
3. After seeing the output, continue the conversation naturally

RESPONSE FORMAT:
- For normal conversation: Respond naturally in plain text
- For command execution: Respond with JSON containing action/command/explanation
- Keep responses concise but informative

RULES:
- Only execute commands when explicitly asked or when it's clearly needed
- Explain what you're about to do before executing
- Never execute destructive commands without explicit confirmation
- If unsure, ask for clarification
{context}

USER: {user_message}

MACHA:"""

        return prompt

    def _execute_command(self, command: str) -> Dict[str, Any]:
        """Execute a shell command and return results"""
        try:
            result = subprocess.run(
                command,
                shell=True,
                capture_output=True,
                text=True,
                timeout=30
            )

            # Check if command failed due to permissions
            needs_sudo = False
            permission_errors = [
                'Interactive authentication required',
                'Permission denied',
                'Operation not permitted',
                'Must be root',
                'insufficient privileges',
                'authentication is required'
            ]

            if result.returncode != 0:
                error_text = (result.stderr + result.stdout).lower()
                for perm_error in permission_errors:
                    if perm_error.lower() in error_text:
                        needs_sudo = True
                        break

            # Retry with sudo if permission error detected
            if needs_sudo and not command.strip().startswith('sudo'):
                print(f"\n⚠️  Permission denied, retrying with sudo...")
                sudo_command = f"sudo {command}"
                result = subprocess.run(
                    sudo_command,
                    shell=True,
                    capture_output=True,
                    text=True,
                    timeout=30
                )

                return {
                    'success': result.returncode == 0,
                    'exit_code': result.returncode,
                    'stdout': result.stdout,
                    'stderr': result.stderr,
                    'command': sudo_command,
                    'retried_with_sudo': True
                }

            return {
                'success': result.returncode == 0,
                'exit_code': result.returncode,
                'stdout': result.stdout,
                'stderr': result.stderr,
                'command': command,
                'retried_with_sudo': False
            }
        except subprocess.TimeoutExpired:
            return {
                'success': False,
                'exit_code': -1,
                'stdout': '',
                'stderr': 'Command timed out after 30 seconds',
                'command': command,
                'retried_with_sudo': False
            }
        except Exception as e:
            return {
                'success': False,
                'exit_code': -1,
                'stdout': '',
                'stderr': str(e),
                'command': command,
                'retried_with_sudo': False
            }

    def _parse_response(self, response: str) -> Dict[str, Any]:
        """Parse AI response to determine if it's a command or text"""
        try:
            # Try to parse as JSON
            parsed = json.loads(response.strip())
            if isinstance(parsed, dict) and 'action' in parsed:
                return parsed
        except json.JSONDecodeError:
            pass

        # It's plain text conversation
        return {'action': 'chat', 'message': response}

    def _auto_diagnose_ollama(self) -> str:
        """Automatically diagnose Ollama issues"""
        diagnostics = []

        diagnostics.append("🔍 AUTO-DIAGNOSIS: Investigating Ollama failure...\n")

        # Check if Ollama service is running
        try:
            result = subprocess.run(
                ['systemctl', 'is-active', 'ollama.service'],
                capture_output=True,
                text=True,
                timeout=5
            )
            if result.returncode == 0:
                diagnostics.append("✅ Ollama service is active")
            else:
                diagnostics.append(f"❌ Ollama service is NOT active: {result.stdout.strip()}")
                # Get service status
                status_result = subprocess.run(
                    ['systemctl', 'status', 'ollama.service', '--no-pager', '-l'],
                    capture_output=True,
                    text=True,
                    timeout=5
                )
                diagnostics.append(f"\nService status:\n```\n{status_result.stdout[-500:]}\n```")
        except Exception as e:
            diagnostics.append(f"⚠️  Could not check service status: {e}")

        # Check memory usage
        try:
            result = subprocess.run(['free', '-h'], capture_output=True, text=True, timeout=5)
            lines = result.stdout.split('\n')
            for line in lines[:3]:  # First 3 lines
                diagnostics.append(f"  {line}")
        except Exception as e:
            diagnostics.append(f"⚠️  Could not check memory: {e}")

        # Check which models are loaded
        try:
            import requests
            response = requests.get(f"{self.agent.ollama_host}/api/tags", timeout=5)
            if response.status_code == 200:
                models = response.json().get('models', [])
                diagnostics.append(f"\n📦 Loaded models ({len(models)}):")
                for model in models:
                    name = model.get('name', 'unknown')
                    size = model.get('size', 0) / (1024**3)
                    is_current = "← TARGET" if name == self.agent.model else ""
                    diagnostics.append(f"  • {name} ({size:.1f} GB) {is_current}")

                # Check if target model is loaded
                model_names = [m.get('name') for m in models]
                if self.agent.model not in model_names:
                    diagnostics.append(f"\n❌ TARGET MODEL NOT LOADED: {self.agent.model}")
                    diagnostics.append(f"   Available models: {', '.join(model_names)}")
            else:
                diagnostics.append(f"❌ Ollama API returned {response.status_code}")
        except Exception as e:
            diagnostics.append(f"⚠️  Could not query Ollama API: {e}")

        # Check recent Ollama logs
        try:
            result = subprocess.run(
                ['journalctl', '-u', 'ollama.service', '-n', '10', '--no-pager'],
                capture_output=True,
                text=True,
                timeout=5
            )
            if result.stdout:
                diagnostics.append(f"\n📋 Recent Ollama logs (last 10 lines):\n```\n{result.stdout}\n```")
        except Exception as e:
            diagnostics.append(f"⚠️  Could not check logs: {e}")

        return "\n".join(diagnostics)

    def process_message(self, user_message: str) -> str:
        """Process a user message and return Macha's response"""

        # Add user message to history
        self.conversation_history.append({
            'role': 'user',
            'message': user_message,
            'timestamp': datetime.now().isoformat()
        })

        # Build chat messages for tool-calling API
        messages = []

        # Query relevant knowledge based on user message
        knowledge_context = self.agent._query_relevant_knowledge(user_message, limit=3)

        # Add recent conversation history (last 15 messages to stay within context limits)
        # With tool calling, messages grow quickly, so we limit more aggressively
        recent_history = self.conversation_history[-15:]  # Last ~7 exchanges
        for entry in recent_history:
            content = entry['message']
            # Truncate very long messages (e.g., command outputs)
            if len(content) > 3000:
                content = content[:1500] + "\n... [message truncated] ...\n" + content[-1500:]
            # Add knowledge context to first user message if available
            if entry == recent_history[-1] and knowledge_context:
                content += knowledge_context
            messages.append({
                "role": entry['role'],
                "content": content
            })

        try:
            # Use tool-aware chat API
            ai_response = self.agent._query_ollama_with_tools(messages)
        except Exception as e:
            error_msg = (
                f"❌ CRITICAL: Failed to communicate with Ollama inference engine\n\n"
                f"Error Type: {type(e).__name__}\n"
                f"Error Message: {str(e)}\n\n"
            )
            # Auto-diagnose the issue
            diagnostics = self._auto_diagnose_ollama()
            return error_msg + "\n" + diagnostics

        if not ai_response:
            error_msg = (
                f"❌ Empty response from Ollama inference engine\n\n"
                f"The request succeeded but returned no data. This usually means:\n"
                f"  • The model ({self.agent.model}) is still loading\n"
                f"  • Ollama ran out of memory during generation\n"
                f"  • The prompt was too large for the context window\n\n"
            )
            # Auto-diagnose the issue
            diagnostics = self._auto_diagnose_ollama()
            return error_msg + "\n" + diagnostics

        # Check if Ollama returned an error
        try:
            error_check = json.loads(ai_response)
            if isinstance(error_check, dict) and 'error' in error_check:
                error_msg = (
                    f"❌ Ollama API Error\n\n"
                    f"Error: {error_check.get('error', 'Unknown error')}\n"
                    f"Diagnosis: {error_check.get('diagnosis', 'No details')}\n\n"
                )
                # Auto-diagnose the issue
                diagnostics = self._auto_diagnose_ollama()
                return error_msg + "\n" + diagnostics
        except json.JSONDecodeError:
            # Not JSON, it's a normal response
            pass

        # Parse response
        parsed = self._parse_response(ai_response)

        if parsed.get('action') == 'execute':
            # AI wants to execute a command
            command = parsed.get('command', '')
            explanation = parsed.get('explanation', '')

            # Show what we're about to do
            response = f"🔧 {explanation}\n\nExecuting: `{command}`\n\n"

            # Execute the command
            result = self._execute_command(command)

            # Show if we retried with sudo
            if result.get('retried_with_sudo'):
                response += f"⚠️  Permission denied, retried as: `{result['command']}`\n\n"

            if result['success']:
                response += "✅ Command succeeded:\n"
                if result['stdout']:
                    response += f"```\n{result['stdout']}\n```"
                else:
                    response += "(no output)"
            else:
                response += f"❌ Command failed (exit code {result['exit_code']}):\n"
                if result['stderr']:
                    response += f"```\n{result['stderr']}\n```"
                elif result['stdout']:
                    response += f"```\n{result['stdout']}\n```"

            # Add command execution to history
            self.conversation_history.append({
                'role': 'macha',
                'message': response,
                'timestamp': datetime.now().isoformat(),
                'command_result': result
            })

            # Now ask AI to respond to the command output
            followup_prompt = f"""The command completed. Here's what happened:

Command: {command}
Success: {result['success']}
Output: {result['stdout'][:500] if result['stdout'] else '(none)'}
Error: {result['stderr'][:500] if result['stderr'] else '(none)'}

Please provide a brief analysis or next steps."""

            followup_response = self.agent._query_ollama(followup_prompt)

            if followup_response:
                response += f"\n\n{followup_response}"

            return response

        else:
            # Normal conversation response
            message = parsed.get('message', ai_response)

            self.conversation_history.append({
                'role': 'macha',
                'message': message,
                'timestamp': datetime.now().isoformat()
            })

            return message

    def run(self):
        """Run the interactive chat session"""
        print("=" * 70)
        print("🌐 MACHA INTERACTIVE CHAT")
        print("=" * 70)
        print("Type your message and press Enter. Commands:")
        print("  /exit or /quit - End the chat session")
        print("  /clear - Clear conversation history")
        print("  /history - Show conversation history")
        print("  /debug - Show Ollama connection status")
        print("=" * 70)
        print()

        while True:
            try:
                # Get user input
                user_input = input("\n💬 YOU: ").strip()

                if not user_input:
                    continue

                # Handle special commands
                if user_input.lower() in ['/exit', '/quit']:
                    print("\n👋 Ending chat session. Goodbye!")
                    break

                elif user_input.lower() == '/clear':
                    self.conversation_history.clear()
                    print("🧹 Conversation history cleared.")
                    continue

                elif user_input.lower() == '/history':
                    print("\n" + "=" * 70)
                    print("CONVERSATION HISTORY")
                    print("=" * 70)
                    for entry in self.conversation_history:
                        role = entry['role'].upper()
                        msg = entry['message'][:100] + "..." if len(entry['message']) > 100 else entry['message']
                        print(f"{role}: {msg}")
                    print("=" * 70)
                    continue

                elif user_input.lower() == '/debug':
                    import os
                    import subprocess

                    print("\n" + "=" * 70)
                    print("MACHA ARCHITECTURE & STATUS")
                    print("=" * 70)

                    print("\n🏗️  SYSTEM ARCHITECTURE:")
                    print(f"  Hostname: macha.coven.systems")
                    print(f"  Service: macha-autonomous.service (systemd)")
                    print(f"  Working Directory: /var/lib/macha")

                    print("\n👤 EXECUTION CONTEXT:")
                    current_user = os.getenv('USER') or os.getenv('USERNAME') or 'unknown'
                    print(f"  Current User: {current_user}")
                    print(f"  UID: {os.getuid()}")

                    # Check if user has sudo access
                    try:
                        result = subprocess.run(['sudo', '-n', 'true'],
                                              capture_output=True, timeout=1)
                        if result.returncode == 0:
                            print(f"  Sudo Access: ✓ Yes (passwordless)")
                        else:
                            print(f"  Sudo Access: ⚠ Requires password")
                    except:
                        print(f"  Sudo Access: ❌ No")

                    print(f"  Note: Chat runs as invoking user (you), not as macha-autonomous")

                    print("\n🧠 INFERENCE ENGINE:")
                    print(f"  Backend: Ollama")
                    print(f"  Host: {self.agent.ollama_host}")
                    print(f"  Model: {self.agent.model}")
                    print(f"  Service: ollama.service (systemd)")

                    print("\n💾 DATABASE:")
                    print(f"  Backend: ChromaDB")
                    print(f"  Host: http://localhost:8000")
                    print(f"  Data: /var/lib/chromadb")
                    print(f"  Service: chromadb.service (systemd)")

                    print("\n🔍 OLLAMA STATUS:")
                    # Try to query Ollama status
                    try:
                        import requests
                        # Check if Ollama is running
                        response = requests.get(f"{self.agent.ollama_host}/api/tags", timeout=5)
                        if response.status_code == 200:
                            models = response.json().get('models', [])
                            print(f"  Status: ✓ Running")
                            print(f"  Loaded models: {len(models)}")
                            for model in models:
                                name = model.get('name', 'unknown')
                                size = model.get('size', 0) / (1024**3)  # GB
                                is_current = "← ACTIVE" if name == self.agent.model else ""
                                print(f"    • {name} ({size:.1f} GB) {is_current}")
                        else:
                            print(f"  Status: ❌ Error (HTTP {response.status_code})")
                    except Exception as e:
                        print(f"  Status: ❌ Cannot connect: {e}")
                        print(f"  Hint: Check 'systemctl status ollama.service'")

                    print("\n💡 CONVERSATION:")
                    print(f"  History: {len(self.conversation_history)} messages")
                    print(f"  Session started: {self.session_start}")

                    print("=" * 70)
                    continue

                # Process the message
                print("\n🤖 MACHA: ", end='', flush=True)
                response = self.process_message(user_input)
                print(response)

            except KeyboardInterrupt:
                print("\n\n👋 Chat interrupted. Use /exit to quit properly.")
                continue
            except EOFError:
                print("\n\n👋 Ending chat session. Goodbye!")
                break
            except Exception as e:
                print(f"\n❌ Error: {e}")
                continue


def main():
    """Main entry point"""
    session = MachaChatSession()
    session.run()


if __name__ == "__main__":
    main()