#!/usr/bin/env python3 """ llama.cpp Server GUI A graphical interface for managing llama.cpp server instances """ import sys import os import json import subprocess from pathlib import Path from PyQt6.QtWidgets import ( QApplication, QMainWindow, QWidget, QVBoxLayout, QHBoxLayout, QPushButton, QLabel, QLineEdit, QTextEdit, QFileDialog, QGroupBox, QSpinBox, QComboBox, QCheckBox, QMessageBox, QSystemTrayIcon, QMenu ) from PyQt6.QtCore import QThread, pyqtSignal, QTimer, Qt from PyQt6.QtGui import QIcon, QAction class ServerOutputReader(QThread): """Thread to read server output without blocking the GUI""" output_received = pyqtSignal(str) def __init__(self, process): super().__init__() self.process = process self.running = True def run(self): """Read output from the process""" import select while self.running: if self.process.poll() is not None: # Process has terminated, read any remaining output self.read_remaining_output() break # Use select to check if there's data available (non-blocking) try: readable, _, _ = select.select([self.process.stdout, self.process.stderr], [], [], 0.1) if self.process.stdout in readable: output = self.process.stdout.readline() if output: self.output_received.emit(output.strip()) if self.process.stderr in readable: error = self.process.stderr.readline() if error: # Don't prefix with [ERROR] - llama.cpp uses stderr for normal logging self.output_received.emit(error.strip()) except (ValueError, OSError): # File descriptor closed break def read_remaining_output(self): """Read any remaining output after process termination""" try: # Read remaining stdout for line in self.process.stdout: if line: self.output_received.emit(line.strip()) # Read remaining stderr for line in self.process.stderr: if line: # Don't prefix with [ERROR] - llama.cpp uses stderr for normal logging self.output_received.emit(line.strip()) except (ValueError, OSError): pass def stop(self): """Stop reading output""" self.running = False class LlamaServerGUI(QMainWindow): def __init__(self): super().__init__() self.config_file = Path.home() / ".llama_server_gui_config.json" self.server_process = None self.output_reader = None self.config = self.load_config() self.init_ui() self.load_last_profile() # Auto-start if enabled if self.auto_start_checkbox.isChecked(): QTimer.singleShot(500, self.start_server) def init_ui(self): """Initialize the user interface""" self.setWindowTitle("llama.cpp Server Manager") self.setMinimumSize(900, 700) # Central widget central_widget = QWidget() self.setCentralWidget(central_widget) main_layout = QVBoxLayout(central_widget) # Profile management profile_group = self.create_profile_section() main_layout.addWidget(profile_group) # Section 1: Server binary selection binary_group = self.create_binary_section() main_layout.addWidget(binary_group) # Section 2: Model selection model_group = self.create_model_section() main_layout.addWidget(model_group) # Section 3: Server options options_group = self.create_options_section() main_layout.addWidget(options_group) # Control buttons control_layout = self.create_control_buttons() main_layout.addLayout(control_layout) # Log viewer log_group = self.create_log_section() main_layout.addWidget(log_group) # System tray self.create_system_tray() self.update_button_states() def create_profile_section(self): """Create profile management section""" group = QGroupBox("Profile Management") layout = QHBoxLayout() layout.addWidget(QLabel("Profile:")) self.profile_combo = QComboBox() self.profile_combo.setMinimumWidth(200) self.profile_combo.currentTextChanged.connect(self.on_profile_selected) layout.addWidget(self.profile_combo) load_btn = QPushButton("Load") load_btn.clicked.connect(self.load_selected_profile) load_btn.setMaximumWidth(60) layout.addWidget(load_btn) save_btn = QPushButton("Save") save_btn.clicked.connect(self.save_current_profile) save_btn.setMaximumWidth(60) layout.addWidget(save_btn) delete_btn = QPushButton("Delete") delete_btn.clicked.connect(self.delete_profile) delete_btn.setMaximumWidth(80) layout.addWidget(delete_btn) layout.addStretch() self.auto_start_checkbox = QCheckBox("Auto-start on launch") layout.addWidget(self.auto_start_checkbox) group.setLayout(layout) self.update_profile_list() return group def create_binary_section(self): """Create server binary selection section""" group = QGroupBox("1. Server Binary") layout = QHBoxLayout() self.binary_path_edit = QLineEdit() self.binary_path_edit.setPlaceholderText("Path to llama.cpp server binary (e.g., llama-server)") layout.addWidget(self.binary_path_edit) browse_btn = QPushButton("Browse...") browse_btn.clicked.connect(self.browse_binary) layout.addWidget(browse_btn) group.setLayout(layout) return group def create_model_section(self): """Create model selection section""" group = QGroupBox("2. Model Selection") layout = QHBoxLayout() self.model_path_edit = QLineEdit() self.model_path_edit.setPlaceholderText("Path to model file (e.g., model.gguf)") layout.addWidget(self.model_path_edit) browse_btn = QPushButton("Browse...") browse_btn.clicked.connect(self.browse_model) layout.addWidget(browse_btn) group.setLayout(layout) return group def create_options_section(self): """Create server options section""" group = QGroupBox("3. Server Options") layout = QVBoxLayout() # Row 1: Host and Port row1 = QHBoxLayout() row1.addWidget(QLabel("Host:")) self.host_edit = QLineEdit("127.0.0.1") self.host_edit.setMaximumWidth(150) row1.addWidget(self.host_edit) row1.addWidget(QLabel("Port:")) self.port_spin = QSpinBox() self.port_spin.setRange(1, 65535) self.port_spin.setValue(8080) self.port_spin.setMaximumWidth(100) row1.addWidget(self.port_spin) row1.addStretch() layout.addLayout(row1) # Row 2: Context length and GPU layers row2 = QHBoxLayout() row2.addWidget(QLabel("Context Length:")) self.context_spin = QSpinBox() self.context_spin.setRange(128, 1048576) self.context_spin.setValue(2048) self.context_spin.setSingleStep(512) self.context_spin.setMaximumWidth(100) row2.addWidget(self.context_spin) row2.addWidget(QLabel("GPU Layers (ngl):")) self.ngl_spin = QSpinBox() self.ngl_spin.setRange(-1, 999) self.ngl_spin.setValue(33) self.ngl_spin.setMaximumWidth(100) row2.addWidget(self.ngl_spin) row2.addStretch() layout.addLayout(row2) # Row 3: Threads and batch size row3 = QHBoxLayout() row3.addWidget(QLabel("Threads:")) self.threads_spin = QSpinBox() self.threads_spin.setRange(1, 256) self.threads_spin.setValue(8) self.threads_spin.setMaximumWidth(100) row3.addWidget(self.threads_spin) row3.addWidget(QLabel("Batch Size:")) self.batch_spin = QSpinBox() self.batch_spin.setRange(1, 2048) self.batch_spin.setValue(512) self.batch_spin.setMaximumWidth(100) row3.addWidget(self.batch_spin) row3.addStretch() layout.addLayout(row3) # Row 4: Additional options row4 = QHBoxLayout() row4.addWidget(QLabel("Additional Arguments:")) self.additional_args_edit = QLineEdit() self.additional_args_edit.setPlaceholderText("e.g., --numa --mlock") row4.addWidget(self.additional_args_edit) layout.addLayout(row4) group.setLayout(layout) return group def create_control_buttons(self): """Create start/stop control buttons""" layout = QHBoxLayout() self.start_btn = QPushButton("Start Server") self.start_btn.clicked.connect(self.start_server) self.start_btn.setMinimumHeight(40) layout.addWidget(self.start_btn) self.stop_btn = QPushButton("Stop Server") self.stop_btn.clicked.connect(self.stop_server) self.stop_btn.setMinimumHeight(40) layout.addWidget(self.stop_btn) return layout def create_log_section(self): """Create log viewer section""" group = QGroupBox("Server Logs") layout = QVBoxLayout() self.log_text = QTextEdit() self.log_text.setReadOnly(True) self.log_text.setMinimumHeight(200) layout.addWidget(self.log_text) clear_btn = QPushButton("Clear Logs") clear_btn.clicked.connect(self.log_text.clear) layout.addWidget(clear_btn) group.setLayout(layout) return group def create_system_tray(self): """Create system tray icon""" self.tray_icon = QSystemTrayIcon(self) # Try to use a default icon, fallback if not available icon = QApplication.style().standardIcon(QApplication.style().StandardPixmap.SP_ComputerIcon) self.tray_icon.setIcon(icon) # Tray menu tray_menu = QMenu() show_action = QAction("Show", self) show_action.triggered.connect(self.show) tray_menu.addAction(show_action) tray_menu.addSeparator() start_action = QAction("Start Server", self) start_action.triggered.connect(self.start_server) tray_menu.addAction(start_action) stop_action = QAction("Stop Server", self) stop_action.triggered.connect(self.stop_server) tray_menu.addAction(stop_action) tray_menu.addSeparator() quit_action = QAction("Quit", self) quit_action.triggered.connect(self.quit_application) tray_menu.addAction(quit_action) self.tray_icon.setContextMenu(tray_menu) self.tray_icon.activated.connect(self.tray_icon_activated) self.tray_icon.show() def tray_icon_activated(self, reason): """Handle tray icon activation""" if reason == QSystemTrayIcon.ActivationReason.Trigger: if self.isVisible(): self.hide() else: self.show() self.activateWindow() def browse_binary(self): """Browse for server binary""" default_path = "/home/xero110/dev/llama.cpp/build/bin" if os.path.exists(default_path): start_dir = default_path else: start_dir = str(Path.home()) file_path, _ = QFileDialog.getOpenFileName( self, "Select llama.cpp Server Binary", start_dir, "Executable Files (*);;All Files (*)" ) if file_path: self.binary_path_edit.setText(file_path) def browse_model(self): """Browse for model file""" start_dir = str(Path.home()) file_path, _ = QFileDialog.getOpenFileName( self, "Select Model File", start_dir, "GGUF Files (*.gguf);;All Files (*)" ) if file_path: self.model_path_edit.setText(file_path) def start_server(self): """Start the llama.cpp server""" binary_path = self.binary_path_edit.text().strip() model_path = self.model_path_edit.text().strip() if not binary_path: QMessageBox.warning(self, "Error", "Please select a server binary") return if not os.path.exists(binary_path): QMessageBox.warning(self, "Error", f"Server binary not found: {binary_path}") return if not model_path: QMessageBox.warning(self, "Error", "Please select a model file") return if not os.path.exists(model_path): QMessageBox.warning(self, "Error", f"Model file not found: {model_path}") return if self.server_process is not None and self.server_process.poll() is None: QMessageBox.warning(self, "Error", "Server is already running") return # Build command cmd = [ binary_path, "-m", model_path, "--host", self.host_edit.text(), "--port", str(self.port_spin.value()), "-c", str(self.context_spin.value()), "-ngl", str(self.ngl_spin.value()), "-t", str(self.threads_spin.value()), "-b", str(self.batch_spin.value()) ] # Add additional arguments additional_args = self.additional_args_edit.text().strip() if additional_args: cmd.extend(additional_args.split()) self.log_text.append(f"Starting server with command:\n{' '.join(cmd)}\n") try: self.server_process = subprocess.Popen( cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, bufsize=1 ) # Start output reader thread self.output_reader = ServerOutputReader(self.server_process) self.output_reader.output_received.connect(self.append_log) self.output_reader.start() self.log_text.append("Server started successfully!\n") self.update_button_states() self.tray_icon.showMessage( "llama.cpp Server", "Server started successfully", QSystemTrayIcon.MessageIcon.Information, 2000 ) except Exception as e: QMessageBox.critical(self, "Error", f"Failed to start server:\n{str(e)}") self.log_text.append(f"Error starting server: {str(e)}\n") def stop_server(self): """Stop the llama.cpp server""" if not self.server_process or self.server_process.poll() is not None: QMessageBox.warning(self, "Error", "Server is not running") return self.log_text.append("Stopping server...\n") # Disable stop button while stopping self.stop_btn.setEnabled(False) # Stop the output reader thread if self.output_reader: self.output_reader.stop() # Terminate the process self.server_process.terminate() # Use a timer to check if process has stopped (non-blocking) self.stop_timer = QTimer() self.stop_timer.timeout.connect(self.check_server_stopped) self.stop_attempts = 0 self.stop_timer.start(200) # Check every 200ms def check_server_stopped(self): """Check if server has stopped (called by timer)""" if self.server_process.poll() is not None: # Process has terminated self.stop_timer.stop() self.cleanup_after_stop("Server stopped successfully!\n") else: self.stop_attempts += 1 if self.stop_attempts >= 25: # 25 * 200ms = 5 seconds # Force kill after 5 seconds self.log_text.append("Server not responding, forcing kill...\n") self.server_process.kill() self.stop_timer.stop() # Wait a bit more for kill to take effect QTimer.singleShot(500, lambda: self.cleanup_after_stop("Server killed (forced)\n")) def cleanup_after_stop(self, message): """Clean up after server has stopped""" self.log_text.append(message) # Wait for output reader thread to finish if self.output_reader: self.output_reader.wait(1000) # Wait max 1 second self.output_reader = None self.server_process = None self.update_button_states() self.tray_icon.showMessage( "llama.cpp Server", "Server stopped", QSystemTrayIcon.MessageIcon.Information, 2000 ) def append_log(self, text): """Append text to log viewer""" self.log_text.append(text) # Auto-scroll to bottom scrollbar = self.log_text.verticalScrollBar() scrollbar.setValue(scrollbar.maximum()) def update_button_states(self): """Update button enabled/disabled states""" is_running = self.server_process is not None and self.server_process.poll() is None self.start_btn.setEnabled(not is_running) self.stop_btn.setEnabled(is_running) def get_current_settings(self): """Get current settings as dictionary""" return { "binary_path": self.binary_path_edit.text(), "model_path": self.model_path_edit.text(), "host": self.host_edit.text(), "port": self.port_spin.value(), "context": self.context_spin.value(), "ngl": self.ngl_spin.value(), "threads": self.threads_spin.value(), "batch": self.batch_spin.value(), "additional_args": self.additional_args_edit.text(), "auto_start": self.auto_start_checkbox.isChecked() } def apply_settings(self, settings): """Apply settings to UI""" self.log_text.append("Applying settings to UI fields...\n") self.binary_path_edit.setText(settings.get("binary_path", "")) self.model_path_edit.setText(settings.get("model_path", "")) self.host_edit.setText(settings.get("host", "127.0.0.1")) self.port_spin.setValue(settings.get("port", 8080)) self.context_spin.setValue(settings.get("context", 2048)) self.ngl_spin.setValue(settings.get("ngl", 33)) self.threads_spin.setValue(settings.get("threads", 8)) self.batch_spin.setValue(settings.get("batch", 512)) self.additional_args_edit.setText(settings.get("additional_args", "")) self.auto_start_checkbox.setChecked(settings.get("auto_start", False)) self.log_text.append("Settings applied to UI\n") def save_current_profile(self): """Save current settings as a profile""" from PyQt6.QtWidgets import QInputDialog current_name = self.profile_combo.currentText() profile_name, ok = QInputDialog.getText( self, "Save Profile", "Profile name:", text=current_name if current_name else "" ) if ok and profile_name: # Get current settings from UI settings = self.get_current_settings() # Debug: log what we're saving self.log_text.append(f"Saving profile '{profile_name}' with settings:\n") self.log_text.append(f" Binary: {settings['binary_path']}\n") self.log_text.append(f" Model: {settings['model_path']}\n") self.log_text.append(f" Port: {settings['port']}, Context: {settings['context']}, NGL: {settings['ngl']}\n") # Save to config self.config["profiles"][profile_name] = settings self.config["last_profile"] = profile_name self.save_config() # Update profile list and select the saved profile self.update_profile_list() # Temporarily block signals, set the text, then unblock and manually trigger load self.profile_combo.blockSignals(True) self.profile_combo.setCurrentText(profile_name) self.profile_combo.blockSignals(False) # Show confirmation self.log_text.append(f"Profile '{profile_name}' saved successfully\n") def on_profile_selected(self, profile_name): """Called when profile selection changes in dropdown (auto-load)""" if profile_name and profile_name in self.config["profiles"]: self.log_text.append(f"Auto-loading profile '{profile_name}'...\n") self.load_profile(profile_name) else: # Empty selection or invalid profile pass def load_selected_profile(self): """Load the currently selected profile (manual load via button)""" profile_name = self.profile_combo.currentText() if not profile_name: QMessageBox.warning(self, "No Profile Selected", "Please select a profile to load") return if profile_name not in self.config["profiles"]: QMessageBox.warning(self, "Profile Not Found", f"Profile '{profile_name}' not found") return self.log_text.append(f"Manually loading profile '{profile_name}'...\n") self.load_profile(profile_name) def load_profile(self, profile_name): """Load a profile""" if profile_name and profile_name in self.config["profiles"]: settings = self.config["profiles"][profile_name] # Debug: log what we're loading self.log_text.append(f"Loading profile '{profile_name}' with settings:\n") self.log_text.append(f" Binary: {settings.get('binary_path', 'N/A')}\n") self.log_text.append(f" Model: {settings.get('model_path', 'N/A')}\n") self.log_text.append(f" Port: {settings.get('port', 'N/A')}, Context: {settings.get('context', 'N/A')}, NGL: {settings.get('ngl', 'N/A')}\n") self.apply_settings(settings) self.config["last_profile"] = profile_name self.save_config() self.log_text.append(f"Profile '{profile_name}' loaded successfully\n") def load_last_profile(self): """Load the last used profile""" last_profile = self.config.get("last_profile") if last_profile and last_profile in self.config["profiles"]: self.profile_combo.setCurrentText(last_profile) def delete_profile(self): """Delete the current profile""" profile_name = self.profile_combo.currentText() if not profile_name: return reply = QMessageBox.question( self, "Delete Profile", f"Are you sure you want to delete profile '{profile_name}'?", QMessageBox.StandardButton.Yes | QMessageBox.StandardButton.No ) if reply == QMessageBox.StandardButton.Yes: del self.config["profiles"][profile_name] if self.config.get("last_profile") == profile_name: self.config["last_profile"] = None self.save_config() self.update_profile_list() def update_profile_list(self): """Update the profile combo box""" current = self.profile_combo.currentText() # Block signals to prevent triggering load_profile during update self.profile_combo.blockSignals(True) self.profile_combo.clear() self.profile_combo.addItems(sorted(self.config["profiles"].keys())) if current in self.config["profiles"]: self.profile_combo.setCurrentText(current) self.profile_combo.blockSignals(False) def load_config(self): """Load configuration from file""" if self.config_file.exists(): try: with open(self.config_file, 'r') as f: return json.load(f) except Exception as e: print(f"Error loading config: {e}") return {"profiles": {}, "last_profile": None} def save_config(self): """Save configuration to file""" try: with open(self.config_file, 'w') as f: json.dump(self.config, f, indent=2) # Debug: log what we saved if hasattr(self, 'log_text'): profile_count = len(self.config.get("profiles", {})) self.log_text.append(f"Config saved: {profile_count} profile(s)\n") except Exception as e: print(f"Error saving config: {e}") if hasattr(self, 'log_text'): self.log_text.append(f"Error saving config: {e}\n") def closeEvent(self, event): """Handle window close event""" if self.server_process is not None and self.server_process.poll() is None: reply = QMessageBox.question( self, "Server Running", "The server is still running. Do you want to:\n\n" "Yes - Minimize to tray\n" "No - Stop server and quit\n" "Cancel - Do nothing", QMessageBox.StandardButton.Yes | QMessageBox.StandardButton.No | QMessageBox.StandardButton.Cancel ) if reply == QMessageBox.StandardButton.Yes: event.ignore() self.hide() elif reply == QMessageBox.StandardButton.No: # Force kill when closing (no need to wait gracefully) if self.output_reader: self.output_reader.stop() self.server_process.kill() if self.output_reader: self.output_reader.wait(1000) event.accept() else: event.ignore() else: event.accept() def quit_application(self): """Quit the application""" if self.server_process is not None and self.server_process.poll() is None: # Force kill when quitting (no need to wait gracefully) if self.output_reader: self.output_reader.stop() self.server_process.kill() if self.output_reader: self.output_reader.wait(1000) QApplication.quit() def main(): app = QApplication(sys.argv) app.setApplicationName("llama.cpp Server Manager") app.setQuitOnLastWindowClosed(False) window = LlamaServerGUI() window.show() sys.exit(app.exec()) if __name__ == "__main__": main()