Initial commit: Split Macha autonomous system into separate flake

Macha is now a standalone NixOS flake that can be imported into other
systems. This provides:

- Independent versioning
- Easier reusability
- Cleaner separation of concerns
- Better development workflow

Includes:
- Complete autonomous system code
- NixOS module with full configuration options
- Queue-based architecture with priority system
- Chunked map-reduce for large outputs
- ChromaDB knowledge base
- Tool calling system
- Multi-host SSH management
- Gotify notification integration

All capabilities from DESIGN.md are preserved.
This commit is contained in:
Lily Miller
2025-10-06 14:32:37 -06:00
commit 22ba493d9e
30 changed files with 10306 additions and 0 deletions

238
ollama_queue.py Normal file
View File

@@ -0,0 +1,238 @@
#!/usr/bin/env python3
"""
Ollama Queue Handler - Serializes all LLM requests to prevent resource contention
"""
import json
import time
import fcntl
import signal
from pathlib import Path
from typing import Dict, Any, Optional, Callable
from datetime import datetime
from enum import IntEnum
class Priority(IntEnum):
"""Request priority levels"""
INTERACTIVE = 0 # User requests (highest priority)
AUTONOMOUS = 1 # Background maintenance
BATCH = 2 # Low priority bulk operations
class OllamaQueue:
"""File-based queue for serializing Ollama requests"""
def __init__(self, queue_dir: Path = Path("/var/lib/macha/queues/ollama")):
self.queue_dir = queue_dir
self.queue_dir.mkdir(parents=True, exist_ok=True)
self.pending_dir = self.queue_dir / "pending"
self.processing_dir = self.queue_dir / "processing"
self.completed_dir = self.queue_dir / "completed"
self.failed_dir = self.queue_dir / "failed"
for dir in [self.pending_dir, self.processing_dir, self.completed_dir, self.failed_dir]:
dir.mkdir(parents=True, exist_ok=True)
self.lock_file = self.queue_dir / "queue.lock"
self.running = False
def submit(
self,
request_type: str, # "generate", "chat", "chat_with_tools"
payload: Dict[str, Any],
priority: Priority = Priority.INTERACTIVE,
callback: Optional[Callable] = None,
progress_callback: Optional[Callable] = None
) -> str:
"""Submit a request to the queue. Returns request ID."""
request_id = f"{int(time.time() * 1000000)}_{priority.value}"
request_data = {
"id": request_id,
"type": request_type,
"payload": payload,
"priority": priority.value,
"submitted_at": datetime.now().isoformat(),
"status": "pending"
}
request_file = self.pending_dir / f"{request_id}.json"
request_file.write_text(json.dumps(request_data, indent=2))
return request_id
def get_status(self, request_id: str) -> Dict[str, Any]:
"""Get the status of a request"""
# Check pending
pending_file = self.pending_dir / f"{request_id}.json"
if pending_file.exists():
data = json.loads(pending_file.read_text())
# Calculate position in queue
position = self._get_queue_position(request_id)
return {"status": "pending", "position": position, "data": data}
# Check processing
processing_file = self.processing_dir / f"{request_id}.json"
if processing_file.exists():
data = json.loads(processing_file.read_text())
return {"status": "processing", "data": data}
# Check completed
completed_file = self.completed_dir / f"{request_id}.json"
if completed_file.exists():
data = json.loads(completed_file.read_text())
return {"status": "completed", "result": data.get("result"), "data": data}
# Check failed
failed_file = self.failed_dir / f"{request_id}.json"
if failed_file.exists():
data = json.loads(failed_file.read_text())
return {"status": "failed", "error": data.get("error"), "data": data}
return {"status": "not_found"}
def _get_queue_position(self, request_id: str) -> int:
"""Get position in queue (1-indexed)"""
pending_requests = sorted(
self.pending_dir.glob("*.json"),
key=lambda p: (int(p.stem.split('_')[1]), int(p.stem.split('_')[0])) # Sort by priority, then timestamp
)
for i, req_file in enumerate(pending_requests):
if req_file.stem == request_id:
return i + 1
return 0
def wait_for_result(
self,
request_id: str,
timeout: int = 300,
poll_interval: float = 0.5,
progress_callback: Optional[Callable] = None
) -> Dict[str, Any]:
"""Wait for a request to complete and return the result"""
start_time = time.time()
last_status = None
while time.time() - start_time < timeout:
status = self.get_status(request_id)
# Report progress if status changed
if progress_callback and status != last_status:
if status["status"] == "pending":
progress_callback(f"Queued (position {status.get('position', '?')})")
elif status["status"] == "processing":
progress_callback("Processing...")
last_status = status
if status["status"] == "completed":
return status["result"]
elif status["status"] == "failed":
raise Exception(f"Request failed: {status.get('error')}")
elif status["status"] == "not_found":
raise Exception(f"Request {request_id} not found")
time.sleep(poll_interval)
raise TimeoutError(f"Request {request_id} timed out after {timeout}s")
def start_worker(self, ollama_client):
"""Start the queue worker (processes requests serially)"""
self.running = True
self.ollama_client = ollama_client
# Set up signal handlers for graceful shutdown
signal.signal(signal.SIGTERM, self._shutdown_handler)
signal.signal(signal.SIGINT, self._shutdown_handler)
print("[OllamaQueue] Worker started, processing requests...")
while self.running:
try:
self._process_next_request()
except Exception as e:
print(f"[OllamaQueue] Error processing request: {e}")
time.sleep(0.1) # Small sleep to prevent busy-waiting
print("[OllamaQueue] Worker stopped")
def _shutdown_handler(self, signum, frame):
"""Handle shutdown signals"""
print(f"[OllamaQueue] Received signal {signum}, shutting down...")
self.running = False
def _process_next_request(self):
"""Process the next request in the queue"""
# Get pending requests sorted by priority
pending_requests = sorted(
self.pending_dir.glob("*.json"),
key=lambda p: (int(p.stem.split('_')[1]), int(p.stem.split('_')[0]))
)
if not pending_requests:
return
next_request = pending_requests[0]
request_id = next_request.stem
# Move to processing
request_data = json.loads(next_request.read_text())
request_data["status"] = "processing"
request_data["started_at"] = datetime.now().isoformat()
processing_file = self.processing_dir / f"{request_id}.json"
processing_file.write_text(json.dumps(request_data, indent=2))
next_request.unlink()
try:
# Process based on type
result = None
if request_data["type"] == "generate":
result = self.ollama_client.generate(request_data["payload"])
elif request_data["type"] == "chat":
result = self.ollama_client.chat(request_data["payload"])
elif request_data["type"] == "chat_with_tools":
result = self.ollama_client.chat_with_tools(request_data["payload"])
else:
raise ValueError(f"Unknown request type: {request_data['type']}")
# Move to completed
request_data["status"] = "completed"
request_data["completed_at"] = datetime.now().isoformat()
request_data["result"] = result
completed_file = self.completed_dir / f"{request_id}.json"
completed_file.write_text(json.dumps(request_data, indent=2))
processing_file.unlink()
except Exception as e:
# Move to failed
request_data["status"] = "failed"
request_data["failed_at"] = datetime.now().isoformat()
request_data["error"] = str(e)
failed_file = self.failed_dir / f"{request_id}.json"
failed_file.write_text(json.dumps(request_data, indent=2))
processing_file.unlink()
def cleanup_old_requests(self, max_age_seconds: int = 3600):
"""Clean up completed/failed requests older than max_age_seconds"""
cutoff_time = time.time() - max_age_seconds
for directory in [self.completed_dir, self.failed_dir]:
for request_file in directory.glob("*.json"):
# Extract timestamp from filename
timestamp = int(request_file.stem.split('_')[0]) / 1000000
if timestamp < cutoff_time:
request_file.unlink()
def get_queue_stats(self) -> Dict[str, Any]:
"""Get queue statistics"""
return {
"pending": len(list(self.pending_dir.glob("*.json"))),
"processing": len(list(self.processing_dir.glob("*.json"))),
"completed": len(list(self.completed_dir.glob("*.json"))),
"failed": len(list(self.failed_dir.glob("*.json")))
}