Initial commit: Split Macha autonomous system into separate flake
Macha is now a standalone NixOS flake that can be imported into other systems. This provides: - Independent versioning - Easier reusability - Cleaner separation of concerns - Better development workflow Includes: - Complete autonomous system code - NixOS module with full configuration options - Queue-based architecture with priority system - Chunked map-reduce for large outputs - ChromaDB knowledge base - Tool calling system - Multi-host SSH management - Gotify notification integration All capabilities from DESIGN.md are preserved.
This commit is contained in:
537
executor.py
Normal file
537
executor.py
Normal file
@@ -0,0 +1,537 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Action Executor - Safely executes proposed fixes with rollback capability
|
||||
"""
|
||||
|
||||
import json
|
||||
import subprocess
|
||||
import shutil
|
||||
from typing import Dict, List, Any, Optional
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
import time
|
||||
|
||||
|
||||
class SafeExecutor:
|
||||
"""Executes system maintenance actions with safety checks"""
|
||||
|
||||
# Actions that are considered safe to auto-execute
|
||||
SAFE_ACTIONS = {
|
||||
"systemd_restart", # Restart failed services
|
||||
"cleanup", # Disk cleanup, log rotation
|
||||
"investigation", # Read-only diagnostics
|
||||
}
|
||||
|
||||
# Services that should NEVER be stopped/disabled
|
||||
PROTECTED_SERVICES = {
|
||||
"sshd",
|
||||
"systemd-networkd",
|
||||
"NetworkManager",
|
||||
"systemd-resolved",
|
||||
"dbus",
|
||||
}
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
state_dir: Path = Path("/var/lib/macha"),
|
||||
autonomy_level: str = "suggest", # observe, suggest, auto-safe, auto-full
|
||||
dry_run: bool = False,
|
||||
agent = None # Optional agent for learning from actions
|
||||
):
|
||||
self.state_dir = state_dir
|
||||
self.state_dir.mkdir(parents=True, exist_ok=True)
|
||||
self.autonomy_level = autonomy_level
|
||||
self.dry_run = dry_run
|
||||
self.agent = agent
|
||||
self.action_log = self.state_dir / "actions.jsonl"
|
||||
self.approval_queue = self.state_dir / "approval_queue.json"
|
||||
|
||||
def execute_action(self, action: Dict[str, Any], monitoring_context: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Execute a proposed action with appropriate safety checks"""
|
||||
|
||||
action_type = action.get("action_type", "unknown")
|
||||
risk_level = action.get("risk_level", "high")
|
||||
|
||||
# Determine if we should execute
|
||||
should_execute, reason = self._should_execute(action_type, risk_level)
|
||||
|
||||
if not should_execute:
|
||||
if self.autonomy_level == "suggest":
|
||||
# Queue for approval
|
||||
self._queue_for_approval(action, monitoring_context)
|
||||
return {
|
||||
"executed": False,
|
||||
"status": "queued_for_approval",
|
||||
"reason": reason,
|
||||
"queue_file": str(self.approval_queue)
|
||||
}
|
||||
else:
|
||||
return {
|
||||
"executed": False,
|
||||
"status": "blocked",
|
||||
"reason": reason
|
||||
}
|
||||
|
||||
# Execute the action
|
||||
if self.dry_run:
|
||||
return self._dry_run_action(action)
|
||||
|
||||
return self._execute_action_impl(action, monitoring_context)
|
||||
|
||||
def _should_execute(self, action_type: str, risk_level: str) -> tuple[bool, str]:
|
||||
"""Determine if an action should be auto-executed based on autonomy level"""
|
||||
|
||||
if self.autonomy_level == "observe":
|
||||
return False, "Autonomy level set to observe-only"
|
||||
|
||||
# Auto-approve low-risk investigation actions
|
||||
if action_type == "investigation" and risk_level == "low":
|
||||
return True, "Auto-approved: Low-risk information gathering"
|
||||
|
||||
if self.autonomy_level == "suggest":
|
||||
return False, "Autonomy level requires manual approval"
|
||||
|
||||
if self.autonomy_level == "auto-safe":
|
||||
if action_type in self.SAFE_ACTIONS and risk_level == "low":
|
||||
return True, "Auto-executing safe action"
|
||||
return False, "Action requires higher autonomy level"
|
||||
|
||||
if self.autonomy_level == "auto-full":
|
||||
if risk_level == "high":
|
||||
return False, "High risk actions always require approval"
|
||||
return True, "Auto-executing approved action"
|
||||
|
||||
return False, "Unknown autonomy level"
|
||||
|
||||
def _execute_action_impl(self, action: Dict[str, Any], context: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Actually execute the action"""
|
||||
|
||||
action_type = action.get("action_type")
|
||||
result = {
|
||||
"executed": True,
|
||||
"timestamp": datetime.now().isoformat(),
|
||||
"action": action,
|
||||
"success": False,
|
||||
"output": "",
|
||||
"error": None
|
||||
}
|
||||
|
||||
try:
|
||||
if action_type == "systemd_restart":
|
||||
result.update(self._restart_services(action))
|
||||
|
||||
elif action_type == "cleanup":
|
||||
result.update(self._perform_cleanup(action))
|
||||
|
||||
elif action_type == "nix_rebuild":
|
||||
result.update(self._nix_rebuild(action))
|
||||
|
||||
elif action_type == "config_change":
|
||||
result.update(self._apply_config_change(action))
|
||||
|
||||
elif action_type == "investigation":
|
||||
result.update(self._run_investigation(action))
|
||||
|
||||
else:
|
||||
result["error"] = f"Unknown action type: {action_type}"
|
||||
|
||||
except Exception as e:
|
||||
result["error"] = str(e)
|
||||
result["success"] = False
|
||||
|
||||
# Log the action
|
||||
self._log_action(result)
|
||||
|
||||
# Learn from successful operations
|
||||
if result.get("success") and self.agent:
|
||||
try:
|
||||
self.agent.reflect_and_learn(
|
||||
situation=action.get("diagnosis", "Unknown situation"),
|
||||
action_taken=action.get("proposed_action", "Unknown action"),
|
||||
outcome=result.get("output", ""),
|
||||
success=True
|
||||
)
|
||||
except Exception as e:
|
||||
# Don't fail the action if learning fails
|
||||
print(f"Note: Could not record learning: {e}")
|
||||
|
||||
return result
|
||||
|
||||
def _restart_services(self, action: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Restart systemd services"""
|
||||
commands = action.get("commands", [])
|
||||
output_lines = []
|
||||
|
||||
for cmd in commands:
|
||||
if not cmd.startswith("systemctl restart "):
|
||||
continue
|
||||
|
||||
service = cmd.split()[-1]
|
||||
|
||||
# Safety check
|
||||
if any(protected in service for protected in self.PROTECTED_SERVICES):
|
||||
output_lines.append(f"BLOCKED: {service} is protected")
|
||||
continue
|
||||
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["systemctl", "restart", service],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=30
|
||||
)
|
||||
|
||||
if result.returncode == 0:
|
||||
output_lines.append(f"✓ Restarted {service}")
|
||||
else:
|
||||
output_lines.append(f"✗ Failed to restart {service}: {result.stderr}")
|
||||
|
||||
except subprocess.TimeoutExpired:
|
||||
output_lines.append(f"✗ Timeout restarting {service}")
|
||||
|
||||
return {
|
||||
"success": len(output_lines) > 0,
|
||||
"output": "\n".join(output_lines)
|
||||
}
|
||||
|
||||
def _perform_cleanup(self, action: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Perform system cleanup tasks"""
|
||||
output_lines = []
|
||||
|
||||
# Nix store cleanup
|
||||
if "nix" in action.get("proposed_action", "").lower():
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["nix-collect-garbage", "--delete-old"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=300
|
||||
)
|
||||
output_lines.append(f"Nix cleanup: {result.stdout}")
|
||||
except Exception as e:
|
||||
output_lines.append(f"Nix cleanup failed: {e}")
|
||||
|
||||
# Journal cleanup (keep last 7 days)
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["journalctl", "--vacuum-time=7d"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=60
|
||||
)
|
||||
output_lines.append(f"Journal cleanup: {result.stdout}")
|
||||
except Exception as e:
|
||||
output_lines.append(f"Journal cleanup failed: {e}")
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"output": "\n".join(output_lines)
|
||||
}
|
||||
|
||||
def _nix_rebuild(self, action: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Rebuild NixOS configuration"""
|
||||
|
||||
# This is HIGH RISK - always requires approval or full autonomy
|
||||
# And we should test first
|
||||
|
||||
output_lines = []
|
||||
|
||||
# First, try a dry build
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["nixos-rebuild", "dry-build", "--flake", ".#macha"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=600,
|
||||
cwd="/home/lily/Documents/nixos-servers"
|
||||
)
|
||||
|
||||
if result.returncode != 0:
|
||||
return {
|
||||
"success": False,
|
||||
"output": f"Dry build failed:\n{result.stderr}"
|
||||
}
|
||||
|
||||
output_lines.append("✓ Dry build successful")
|
||||
|
||||
except Exception as e:
|
||||
return {
|
||||
"success": False,
|
||||
"output": f"Dry build error: {e}"
|
||||
}
|
||||
|
||||
# Now do the actual rebuild
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["nixos-rebuild", "switch", "--flake", ".#macha"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=1200,
|
||||
cwd="/home/lily/Documents/nixos-servers"
|
||||
)
|
||||
|
||||
output_lines.append(result.stdout)
|
||||
|
||||
return {
|
||||
"success": result.returncode == 0,
|
||||
"output": "\n".join(output_lines),
|
||||
"error": result.stderr if result.returncode != 0 else None
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
return {
|
||||
"success": False,
|
||||
"output": "\n".join(output_lines),
|
||||
"error": str(e)
|
||||
}
|
||||
|
||||
def _apply_config_change(self, action: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Apply a configuration file change"""
|
||||
|
||||
config_changes = action.get("config_changes", {})
|
||||
file_path = config_changes.get("file")
|
||||
|
||||
if not file_path:
|
||||
return {
|
||||
"success": False,
|
||||
"output": "No file specified in config_changes"
|
||||
}
|
||||
|
||||
# For now, we DON'T auto-modify configs - too risky
|
||||
# Instead, we create a suggested patch file
|
||||
|
||||
patch_file = self.state_dir / f"suggested_patch_{int(time.time())}.txt"
|
||||
with open(patch_file, 'w') as f:
|
||||
f.write(f"Suggested change to {file_path}:\n\n")
|
||||
f.write(config_changes.get("change", "No change description"))
|
||||
f.write(f"\n\nReasoning: {action.get('reasoning', 'No reasoning provided')}")
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"output": f"Config change suggestion saved to {patch_file}\nThis requires manual review and application."
|
||||
}
|
||||
|
||||
def _run_investigation(self, action: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Run diagnostic commands"""
|
||||
commands = action.get("commands", [])
|
||||
output_lines = []
|
||||
|
||||
for cmd in commands:
|
||||
# Only allow safe read-only commands
|
||||
safe_commands = ["journalctl", "systemctl status", "df", "free", "ps", "netstat", "ss"]
|
||||
if not any(cmd.startswith(safe) for safe in safe_commands):
|
||||
output_lines.append(f"BLOCKED unsafe command: {cmd}")
|
||||
continue
|
||||
|
||||
try:
|
||||
result = subprocess.run(
|
||||
cmd,
|
||||
shell=True,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=30
|
||||
)
|
||||
output_lines.append(f"$ {cmd}")
|
||||
output_lines.append(result.stdout)
|
||||
except Exception as e:
|
||||
output_lines.append(f"Error running {cmd}: {e}")
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"output": "\n".join(output_lines)
|
||||
}
|
||||
|
||||
def _dry_run_action(self, action: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Simulate action execution"""
|
||||
return {
|
||||
"executed": False,
|
||||
"status": "dry_run",
|
||||
"action": action,
|
||||
"output": "Dry run mode - no actual changes made"
|
||||
}
|
||||
|
||||
def _queue_for_approval(self, action: Dict[str, Any], context: Dict[str, Any]):
|
||||
"""Add action to approval queue"""
|
||||
queue = []
|
||||
if self.approval_queue.exists():
|
||||
with open(self.approval_queue, 'r') as f:
|
||||
queue = json.load(f)
|
||||
|
||||
# Check for duplicate pending actions
|
||||
proposed_action = action.get("proposed_action", "")
|
||||
diagnosis = action.get("diagnosis", "")
|
||||
|
||||
for existing in queue:
|
||||
# Skip already approved/rejected items
|
||||
if existing.get("approved") is not None:
|
||||
continue
|
||||
|
||||
existing_action = existing.get("action", {})
|
||||
existing_proposed = existing_action.get("proposed_action", "")
|
||||
existing_diagnosis = existing_action.get("diagnosis", "")
|
||||
|
||||
# Check if this is essentially the same issue
|
||||
# Match if diagnosis is very similar OR proposed action is very similar
|
||||
if (diagnosis and existing_diagnosis and
|
||||
self._similarity_check(diagnosis, existing_diagnosis) > 0.7):
|
||||
print(f"Skipping duplicate action - similar diagnosis already queued")
|
||||
return
|
||||
|
||||
if (proposed_action and existing_proposed and
|
||||
self._similarity_check(proposed_action, existing_proposed) > 0.7):
|
||||
print(f"Skipping duplicate action - similar proposal already queued")
|
||||
return
|
||||
|
||||
queue.append({
|
||||
"timestamp": datetime.now().isoformat(),
|
||||
"action": action,
|
||||
"context": context,
|
||||
"approved": None
|
||||
})
|
||||
|
||||
with open(self.approval_queue, 'w') as f:
|
||||
json.dump(queue, f, indent=2)
|
||||
|
||||
def _similarity_check(self, str1: str, str2: str) -> float:
|
||||
"""Simple similarity check between two strings"""
|
||||
# Normalize strings
|
||||
s1 = str1.lower().strip()
|
||||
s2 = str2.lower().strip()
|
||||
|
||||
# Exact match
|
||||
if s1 == s2:
|
||||
return 1.0
|
||||
|
||||
# Check for significant word overlap
|
||||
words1 = set(s1.split())
|
||||
words2 = set(s2.split())
|
||||
|
||||
# Remove common words that don't indicate similarity
|
||||
common_words = {'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by', 'is', 'are', 'was', 'were', 'be', 'been', 'have', 'has', 'had'}
|
||||
words1 = words1 - common_words
|
||||
words2 = words2 - common_words
|
||||
|
||||
if not words1 or not words2:
|
||||
return 0.0
|
||||
|
||||
# Calculate Jaccard similarity
|
||||
intersection = len(words1 & words2)
|
||||
union = len(words1 | words2)
|
||||
|
||||
return intersection / union if union > 0 else 0.0
|
||||
|
||||
def _log_action(self, result: Dict[str, Any]):
|
||||
"""Log executed actions"""
|
||||
with open(self.action_log, 'a') as f:
|
||||
f.write(json.dumps(result) + '\n')
|
||||
|
||||
def get_approval_queue(self) -> List[Dict[str, Any]]:
|
||||
"""Get pending actions awaiting approval"""
|
||||
if not self.approval_queue.exists():
|
||||
return []
|
||||
|
||||
with open(self.approval_queue, 'r') as f:
|
||||
return json.load(f)
|
||||
|
||||
def approve_action(self, index: int) -> bool:
|
||||
"""Approve and execute a queued action, then remove it from queue"""
|
||||
queue = self.get_approval_queue()
|
||||
if 0 <= index < len(queue):
|
||||
action_item = queue[index]
|
||||
|
||||
# Execute the approved action
|
||||
result = self._execute_action_impl(action_item["action"], action_item["context"])
|
||||
|
||||
# Archive the action (success or failure)
|
||||
self._archive_action(action_item, result)
|
||||
|
||||
# Remove from queue regardless of outcome
|
||||
queue.pop(index)
|
||||
|
||||
with open(self.approval_queue, 'w') as f:
|
||||
json.dump(queue, f, indent=2)
|
||||
|
||||
return result.get("success", False)
|
||||
|
||||
return False
|
||||
|
||||
def _archive_action(self, action_item: Dict[str, Any], result: Dict[str, Any]):
|
||||
"""Archive an approved action with its execution result"""
|
||||
archive_file = self.state_dir / "approved_actions.jsonl"
|
||||
|
||||
archive_entry = {
|
||||
"timestamp": datetime.now().isoformat(),
|
||||
"original_timestamp": action_item.get("timestamp"),
|
||||
"action": action_item.get("action"),
|
||||
"context": action_item.get("context"),
|
||||
"result": result
|
||||
}
|
||||
|
||||
with open(archive_file, 'a') as f:
|
||||
f.write(json.dumps(archive_entry) + '\n')
|
||||
|
||||
def reject_action(self, index: int) -> bool:
|
||||
"""Reject and remove a queued action"""
|
||||
queue = self.get_approval_queue()
|
||||
if 0 <= index < len(queue):
|
||||
removed_action = queue.pop(index)
|
||||
|
||||
with open(self.approval_queue, 'w') as f:
|
||||
json.dump(queue, f, indent=2)
|
||||
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import sys
|
||||
|
||||
if len(sys.argv) > 1:
|
||||
if sys.argv[1] == "queue":
|
||||
executor = SafeExecutor()
|
||||
queue = executor.get_approval_queue()
|
||||
if queue:
|
||||
print("\n" + "="*70)
|
||||
print(f"PENDING ACTIONS: {len(queue)}")
|
||||
print("="*70)
|
||||
for i, item in enumerate(queue):
|
||||
action = item.get("action", {})
|
||||
timestamp = item.get("timestamp", "unknown")
|
||||
approved = item.get("approved")
|
||||
|
||||
status = "✓ APPROVED" if approved else "⏳ PENDING" if approved is None else "✗ REJECTED"
|
||||
|
||||
print(f"\n[{i}] {status} - {timestamp}")
|
||||
print("-" * 70)
|
||||
print(f"DIAGNOSIS: {action.get('diagnosis', 'N/A')}")
|
||||
print(f"\nPROPOSED ACTION: {action.get('proposed_action', 'N/A')}")
|
||||
print(f"TYPE: {action.get('action_type', 'N/A')}")
|
||||
print(f"RISK: {action.get('risk_level', 'N/A')}")
|
||||
|
||||
if action.get('commands'):
|
||||
print(f"\nCOMMANDS:")
|
||||
for cmd in action['commands']:
|
||||
print(f" - {cmd}")
|
||||
|
||||
if action.get('config_changes'):
|
||||
print(f"\nCONFIG CHANGES:")
|
||||
for key, value in action['config_changes'].items():
|
||||
print(f" {key}: {value}")
|
||||
|
||||
print(f"\nREASONING: {action.get('reasoning', 'N/A')}")
|
||||
print("\n" + "="*70 + "\n")
|
||||
else:
|
||||
print("No pending actions")
|
||||
|
||||
elif sys.argv[1] == "approve" and len(sys.argv) > 2:
|
||||
executor = SafeExecutor()
|
||||
index = int(sys.argv[2])
|
||||
success = executor.approve_action(index)
|
||||
print(f"Approval {'succeeded' if success else 'failed'}")
|
||||
|
||||
elif sys.argv[1] == "reject" and len(sys.argv) > 2:
|
||||
executor = SafeExecutor()
|
||||
index = int(sys.argv[2])
|
||||
success = executor.reject_action(index)
|
||||
print(f"Action {'rejected and removed from queue' if success else 'rejection failed'}")
|
||||
Reference in New Issue
Block a user