commit 03568719462ab8f07482a55a16fc499938175309 Author: coremaven Date: Fri Dec 12 19:00:43 2025 -0500 Initial commit: llama.cpp Server GUI A professional PyQt6-based GUI for managing llama.cpp server instances. Features: - Server binary and model file selection - Comprehensive server options (host, port, context, GPU layers, etc.) - Start/Stop controls with non-blocking operations - Real-time server log viewer - Profile management (save/load/delete configurations) - Configuration persistence - System tray support - Auto-start option 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..40b557e --- /dev/null +++ b/.gitignore @@ -0,0 +1,23 @@ +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +*.egg-info/ +dist/ +build/ + +# User configuration (don't upload personal settings) +.llama_server_gui_config.json + +# IDE +.vscode/ +.idea/ +*.swp +*.swo +*~ + +# OS +.DS_Store +Thumbs.db diff --git a/README.md b/README.md new file mode 100644 index 0000000..ec7eb7a --- /dev/null +++ b/README.md @@ -0,0 +1,119 @@ +# llama.cpp Server GUI + +A professional PyQt6-based graphical interface for managing llama.cpp server instances. + +## Features + +- **Server Binary Selection**: Browse and select your llama.cpp server binary +- **Model Selection**: Easy selection of GGUF model files +- **Comprehensive Options**: Configure host, port, context length, GPU layers, threads, batch size, and more +- **Start/Stop Controls**: Simple buttons to start and stop the server +- **Real-time Logs**: View server output and errors in real-time +- **Profile Management**: Save and load different configurations for different models/use cases +- **Configuration Persistence**: All settings are saved between sessions +- **System Tray Icon**: Minimize to tray to keep the server running in the background +- **Auto-start**: Option to automatically start the server when the GUI launches + +## Requirements + +- Python 3 +- PyQt6 +- llama.cpp server binary + +## Installation + +1. Install PyQt6: +```bash +sudo apt install python3-pyqt6 +``` + +2. Make sure you have llama.cpp compiled with the server binary + +## Usage + +Run the application: +```bash +./llama_server_gui.py +``` + +Or: +```bash +python3 llama_server_gui.py +``` + +## Quick Start + +1. **Select Server Binary**: Click "Browse..." in the "Server Binary" section and navigate to your llama.cpp server binary (e.g., `/home/xero110/dev/llama.cpp/build/bin/llama-server`) + +2. **Select Model**: Click "Browse..." in the "Model Selection" section and choose your GGUF model file + +3. **Configure Options**: Adjust the server options as needed: + - Host: IP address to bind to (default: 127.0.0.1) + - Port: Port number (default: 8080) + - Context Length: Maximum context size (default: 2048) + - GPU Layers (ngl): Number of layers to offload to GPU (default: 33) + - Threads: CPU threads to use (default: 8) + - Batch Size: Batch size for processing (default: 512) + - Additional Arguments: Any extra command-line arguments + +4. **Start Server**: Click "Start Server" + +5. **Save Profile**: Once you have a configuration you like, click "Save Profile" to save it for later use + +## Profile Management + +- **Save Profile**: Saves the current configuration with a custom name +- **Load Profile**: Select a profile from the dropdown and click "Load" to load its settings (profiles also auto-load when selected from dropdown) +- **Delete Profile**: Removes the selected profile +- **Auto-start**: Check this option to automatically start the server when the GUI launches + +The GUI now includes detailed logging in the log viewer at the bottom, showing when profiles are saved, loaded, and what settings are being applied. + +## System Tray + +The application includes a system tray icon that allows you to: +- Show/hide the main window +- Start/stop the server from the tray menu +- Quit the application + +When you close the window while the server is running, you can choose to: +- Minimize to tray (server keeps running) +- Stop server and quit +- Cancel the close operation + +## Configuration File + +Settings are stored in `~/.llama_server_gui_config.json` + +## Common Server Options Explained + +- **Context Length (-c)**: Maximum number of tokens the model can process at once. Larger values use more RAM/VRAM. +- **GPU Layers (-ngl)**: Number of model layers to offload to GPU. Higher = faster but uses more VRAM. Set to -1 for all layers. +- **Threads (-t)**: Number of CPU threads for processing. Usually set to your CPU core count or less. +- **Batch Size (-b)**: Number of tokens processed in parallel. Larger = faster but uses more memory. +- **Host**: Network interface to bind to. Use 127.0.0.1 for local-only access, or 0.0.0.0 to allow network access. +- **Port**: Network port for the server API. + +## Tips + +- For RTX 4070 (8GB VRAM): Start with ngl=33 and adjust based on your model size +- With 96GB RAM and i9 CPU: You can use high thread counts (16-24) and large context sizes +- Create different profiles for different models (e.g., "Llama-3-8B", "Mistral-7B", etc.) +- Use the system tray to keep the server running while working on other tasks + +## Troubleshooting + +**Server won't start:** +- Check that the binary path is correct and the file is executable +- Verify the model path is correct +- Check the logs for error messages + +**Out of memory errors:** +- Reduce context length +- Reduce GPU layers (ngl) +- Use a smaller model + +**Slow performance:** +- Increase GPU layers if you have VRAM available +- Adjust thread count +- Increase batch size (if you have memory available) diff --git a/inspect_config.py b/inspect_config.py new file mode 100755 index 0000000..4985a6a --- /dev/null +++ b/inspect_config.py @@ -0,0 +1,30 @@ +#!/usr/bin/env python3 +""" +Simple script to inspect the saved configuration file +""" + +import json +from pathlib import Path + +config_file = Path.home() / ".llama_server_gui_config.json" + +if config_file.exists(): + with open(config_file, 'r') as f: + config = json.load(f) + + print("=== Configuration File Contents ===") + print(f"File: {config_file}") + print(f"\nLast Profile: {config.get('last_profile', 'None')}") + print(f"\nNumber of Profiles: {len(config.get('profiles', {}))}") + + profiles = config.get('profiles', {}) + if profiles: + print("\n=== Profiles ===") + for name, settings in profiles.items(): + print(f"\nProfile: {name}") + for key, value in settings.items(): + print(f" {key}: {value}") + else: + print("\nNo profiles saved yet") +else: + print(f"Configuration file not found: {config_file}") diff --git a/llama_server_gui.py b/llama_server_gui.py new file mode 100755 index 0000000..d156648 --- /dev/null +++ b/llama_server_gui.py @@ -0,0 +1,751 @@ +#!/usr/bin/env python3 +""" +llama.cpp Server GUI +A graphical interface for managing llama.cpp server instances +""" + +import sys +import os +import json +import subprocess +from pathlib import Path +from PyQt6.QtWidgets import ( + QApplication, QMainWindow, QWidget, QVBoxLayout, QHBoxLayout, + QPushButton, QLabel, QLineEdit, QTextEdit, QFileDialog, + QGroupBox, QSpinBox, QComboBox, QCheckBox, QMessageBox, + QSystemTrayIcon, QMenu +) +from PyQt6.QtCore import QThread, pyqtSignal, QTimer, Qt +from PyQt6.QtGui import QIcon, QAction + + +class ServerOutputReader(QThread): + """Thread to read server output without blocking the GUI""" + output_received = pyqtSignal(str) + + def __init__(self, process): + super().__init__() + self.process = process + self.running = True + + def run(self): + """Read output from the process""" + import select + while self.running: + if self.process.poll() is not None: + # Process has terminated, read any remaining output + self.read_remaining_output() + break + + # Use select to check if there's data available (non-blocking) + try: + readable, _, _ = select.select([self.process.stdout, self.process.stderr], [], [], 0.1) + + if self.process.stdout in readable: + output = self.process.stdout.readline() + if output: + self.output_received.emit(output.strip()) + + if self.process.stderr in readable: + error = self.process.stderr.readline() + if error: + # Don't prefix with [ERROR] - llama.cpp uses stderr for normal logging + self.output_received.emit(error.strip()) + except (ValueError, OSError): + # File descriptor closed + break + + def read_remaining_output(self): + """Read any remaining output after process termination""" + try: + # Read remaining stdout + for line in self.process.stdout: + if line: + self.output_received.emit(line.strip()) + + # Read remaining stderr + for line in self.process.stderr: + if line: + # Don't prefix with [ERROR] - llama.cpp uses stderr for normal logging + self.output_received.emit(line.strip()) + except (ValueError, OSError): + pass + + def stop(self): + """Stop reading output""" + self.running = False + + +class LlamaServerGUI(QMainWindow): + def __init__(self): + super().__init__() + self.config_file = Path.home() / ".llama_server_gui_config.json" + self.server_process = None + self.output_reader = None + self.config = self.load_config() + + self.init_ui() + self.load_last_profile() + + # Auto-start if enabled + if self.auto_start_checkbox.isChecked(): + QTimer.singleShot(500, self.start_server) + + def init_ui(self): + """Initialize the user interface""" + self.setWindowTitle("llama.cpp Server Manager") + self.setMinimumSize(900, 700) + + # Central widget + central_widget = QWidget() + self.setCentralWidget(central_widget) + main_layout = QVBoxLayout(central_widget) + + # Profile management + profile_group = self.create_profile_section() + main_layout.addWidget(profile_group) + + # Section 1: Server binary selection + binary_group = self.create_binary_section() + main_layout.addWidget(binary_group) + + # Section 2: Model selection + model_group = self.create_model_section() + main_layout.addWidget(model_group) + + # Section 3: Server options + options_group = self.create_options_section() + main_layout.addWidget(options_group) + + # Control buttons + control_layout = self.create_control_buttons() + main_layout.addLayout(control_layout) + + # Log viewer + log_group = self.create_log_section() + main_layout.addWidget(log_group) + + # System tray + self.create_system_tray() + + self.update_button_states() + + def create_profile_section(self): + """Create profile management section""" + group = QGroupBox("Profile Management") + layout = QHBoxLayout() + + layout.addWidget(QLabel("Profile:")) + + self.profile_combo = QComboBox() + self.profile_combo.setMinimumWidth(200) + self.profile_combo.currentTextChanged.connect(self.on_profile_selected) + layout.addWidget(self.profile_combo) + + load_btn = QPushButton("Load") + load_btn.clicked.connect(self.load_selected_profile) + load_btn.setMaximumWidth(60) + layout.addWidget(load_btn) + + save_btn = QPushButton("Save") + save_btn.clicked.connect(self.save_current_profile) + save_btn.setMaximumWidth(60) + layout.addWidget(save_btn) + + delete_btn = QPushButton("Delete") + delete_btn.clicked.connect(self.delete_profile) + delete_btn.setMaximumWidth(80) + layout.addWidget(delete_btn) + + layout.addStretch() + + self.auto_start_checkbox = QCheckBox("Auto-start on launch") + layout.addWidget(self.auto_start_checkbox) + + group.setLayout(layout) + self.update_profile_list() + return group + + def create_binary_section(self): + """Create server binary selection section""" + group = QGroupBox("1. Server Binary") + layout = QHBoxLayout() + + self.binary_path_edit = QLineEdit() + self.binary_path_edit.setPlaceholderText("Path to llama.cpp server binary (e.g., llama-server)") + layout.addWidget(self.binary_path_edit) + + browse_btn = QPushButton("Browse...") + browse_btn.clicked.connect(self.browse_binary) + layout.addWidget(browse_btn) + + group.setLayout(layout) + return group + + def create_model_section(self): + """Create model selection section""" + group = QGroupBox("2. Model Selection") + layout = QHBoxLayout() + + self.model_path_edit = QLineEdit() + self.model_path_edit.setPlaceholderText("Path to model file (e.g., model.gguf)") + layout.addWidget(self.model_path_edit) + + browse_btn = QPushButton("Browse...") + browse_btn.clicked.connect(self.browse_model) + layout.addWidget(browse_btn) + + group.setLayout(layout) + return group + + def create_options_section(self): + """Create server options section""" + group = QGroupBox("3. Server Options") + layout = QVBoxLayout() + + # Row 1: Host and Port + row1 = QHBoxLayout() + row1.addWidget(QLabel("Host:")) + self.host_edit = QLineEdit("127.0.0.1") + self.host_edit.setMaximumWidth(150) + row1.addWidget(self.host_edit) + + row1.addWidget(QLabel("Port:")) + self.port_spin = QSpinBox() + self.port_spin.setRange(1, 65535) + self.port_spin.setValue(8080) + self.port_spin.setMaximumWidth(100) + row1.addWidget(self.port_spin) + + row1.addStretch() + layout.addLayout(row1) + + # Row 2: Context length and GPU layers + row2 = QHBoxLayout() + row2.addWidget(QLabel("Context Length:")) + self.context_spin = QSpinBox() + self.context_spin.setRange(128, 1048576) + self.context_spin.setValue(2048) + self.context_spin.setSingleStep(512) + self.context_spin.setMaximumWidth(100) + row2.addWidget(self.context_spin) + + row2.addWidget(QLabel("GPU Layers (ngl):")) + self.ngl_spin = QSpinBox() + self.ngl_spin.setRange(-1, 999) + self.ngl_spin.setValue(33) + self.ngl_spin.setMaximumWidth(100) + row2.addWidget(self.ngl_spin) + + row2.addStretch() + layout.addLayout(row2) + + # Row 3: Threads and batch size + row3 = QHBoxLayout() + row3.addWidget(QLabel("Threads:")) + self.threads_spin = QSpinBox() + self.threads_spin.setRange(1, 256) + self.threads_spin.setValue(8) + self.threads_spin.setMaximumWidth(100) + row3.addWidget(self.threads_spin) + + row3.addWidget(QLabel("Batch Size:")) + self.batch_spin = QSpinBox() + self.batch_spin.setRange(1, 2048) + self.batch_spin.setValue(512) + self.batch_spin.setMaximumWidth(100) + row3.addWidget(self.batch_spin) + + row3.addStretch() + layout.addLayout(row3) + + # Row 4: Additional options + row4 = QHBoxLayout() + row4.addWidget(QLabel("Additional Arguments:")) + self.additional_args_edit = QLineEdit() + self.additional_args_edit.setPlaceholderText("e.g., --numa --mlock") + row4.addWidget(self.additional_args_edit) + layout.addLayout(row4) + + group.setLayout(layout) + return group + + def create_control_buttons(self): + """Create start/stop control buttons""" + layout = QHBoxLayout() + + self.start_btn = QPushButton("Start Server") + self.start_btn.clicked.connect(self.start_server) + self.start_btn.setMinimumHeight(40) + layout.addWidget(self.start_btn) + + self.stop_btn = QPushButton("Stop Server") + self.stop_btn.clicked.connect(self.stop_server) + self.stop_btn.setMinimumHeight(40) + layout.addWidget(self.stop_btn) + + return layout + + def create_log_section(self): + """Create log viewer section""" + group = QGroupBox("Server Logs") + layout = QVBoxLayout() + + self.log_text = QTextEdit() + self.log_text.setReadOnly(True) + self.log_text.setMinimumHeight(200) + layout.addWidget(self.log_text) + + clear_btn = QPushButton("Clear Logs") + clear_btn.clicked.connect(self.log_text.clear) + layout.addWidget(clear_btn) + + group.setLayout(layout) + return group + + def create_system_tray(self): + """Create system tray icon""" + self.tray_icon = QSystemTrayIcon(self) + + # Try to use a default icon, fallback if not available + icon = QApplication.style().standardIcon(QApplication.style().StandardPixmap.SP_ComputerIcon) + self.tray_icon.setIcon(icon) + + # Tray menu + tray_menu = QMenu() + + show_action = QAction("Show", self) + show_action.triggered.connect(self.show) + tray_menu.addAction(show_action) + + tray_menu.addSeparator() + + start_action = QAction("Start Server", self) + start_action.triggered.connect(self.start_server) + tray_menu.addAction(start_action) + + stop_action = QAction("Stop Server", self) + stop_action.triggered.connect(self.stop_server) + tray_menu.addAction(stop_action) + + tray_menu.addSeparator() + + quit_action = QAction("Quit", self) + quit_action.triggered.connect(self.quit_application) + tray_menu.addAction(quit_action) + + self.tray_icon.setContextMenu(tray_menu) + self.tray_icon.activated.connect(self.tray_icon_activated) + self.tray_icon.show() + + def tray_icon_activated(self, reason): + """Handle tray icon activation""" + if reason == QSystemTrayIcon.ActivationReason.Trigger: + if self.isVisible(): + self.hide() + else: + self.show() + self.activateWindow() + + def browse_binary(self): + """Browse for server binary""" + default_path = "/home/xero110/dev/llama.cpp/build/bin" + if os.path.exists(default_path): + start_dir = default_path + else: + start_dir = str(Path.home()) + + file_path, _ = QFileDialog.getOpenFileName( + self, + "Select llama.cpp Server Binary", + start_dir, + "Executable Files (*);;All Files (*)" + ) + + if file_path: + self.binary_path_edit.setText(file_path) + + def browse_model(self): + """Browse for model file""" + start_dir = str(Path.home()) + + file_path, _ = QFileDialog.getOpenFileName( + self, + "Select Model File", + start_dir, + "GGUF Files (*.gguf);;All Files (*)" + ) + + if file_path: + self.model_path_edit.setText(file_path) + + def start_server(self): + """Start the llama.cpp server""" + binary_path = self.binary_path_edit.text().strip() + model_path = self.model_path_edit.text().strip() + + if not binary_path: + QMessageBox.warning(self, "Error", "Please select a server binary") + return + + if not os.path.exists(binary_path): + QMessageBox.warning(self, "Error", f"Server binary not found: {binary_path}") + return + + if not model_path: + QMessageBox.warning(self, "Error", "Please select a model file") + return + + if not os.path.exists(model_path): + QMessageBox.warning(self, "Error", f"Model file not found: {model_path}") + return + + if self.server_process is not None and self.server_process.poll() is None: + QMessageBox.warning(self, "Error", "Server is already running") + return + + # Build command + cmd = [ + binary_path, + "-m", model_path, + "--host", self.host_edit.text(), + "--port", str(self.port_spin.value()), + "-c", str(self.context_spin.value()), + "-ngl", str(self.ngl_spin.value()), + "-t", str(self.threads_spin.value()), + "-b", str(self.batch_spin.value()) + ] + + # Add additional arguments + additional_args = self.additional_args_edit.text().strip() + if additional_args: + cmd.extend(additional_args.split()) + + self.log_text.append(f"Starting server with command:\n{' '.join(cmd)}\n") + + try: + self.server_process = subprocess.Popen( + cmd, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + bufsize=1 + ) + + # Start output reader thread + self.output_reader = ServerOutputReader(self.server_process) + self.output_reader.output_received.connect(self.append_log) + self.output_reader.start() + + self.log_text.append("Server started successfully!\n") + self.update_button_states() + self.tray_icon.showMessage( + "llama.cpp Server", + "Server started successfully", + QSystemTrayIcon.MessageIcon.Information, + 2000 + ) + + except Exception as e: + QMessageBox.critical(self, "Error", f"Failed to start server:\n{str(e)}") + self.log_text.append(f"Error starting server: {str(e)}\n") + + def stop_server(self): + """Stop the llama.cpp server""" + if not self.server_process or self.server_process.poll() is not None: + QMessageBox.warning(self, "Error", "Server is not running") + return + + self.log_text.append("Stopping server...\n") + + # Disable stop button while stopping + self.stop_btn.setEnabled(False) + + # Stop the output reader thread + if self.output_reader: + self.output_reader.stop() + + # Terminate the process + self.server_process.terminate() + + # Use a timer to check if process has stopped (non-blocking) + self.stop_timer = QTimer() + self.stop_timer.timeout.connect(self.check_server_stopped) + self.stop_attempts = 0 + self.stop_timer.start(200) # Check every 200ms + + def check_server_stopped(self): + """Check if server has stopped (called by timer)""" + if self.server_process.poll() is not None: + # Process has terminated + self.stop_timer.stop() + self.cleanup_after_stop("Server stopped successfully!\n") + else: + self.stop_attempts += 1 + if self.stop_attempts >= 25: # 25 * 200ms = 5 seconds + # Force kill after 5 seconds + self.log_text.append("Server not responding, forcing kill...\n") + self.server_process.kill() + self.stop_timer.stop() + # Wait a bit more for kill to take effect + QTimer.singleShot(500, lambda: self.cleanup_after_stop("Server killed (forced)\n")) + + def cleanup_after_stop(self, message): + """Clean up after server has stopped""" + self.log_text.append(message) + + # Wait for output reader thread to finish + if self.output_reader: + self.output_reader.wait(1000) # Wait max 1 second + self.output_reader = None + + self.server_process = None + self.update_button_states() + self.tray_icon.showMessage( + "llama.cpp Server", + "Server stopped", + QSystemTrayIcon.MessageIcon.Information, + 2000 + ) + + def append_log(self, text): + """Append text to log viewer""" + self.log_text.append(text) + # Auto-scroll to bottom + scrollbar = self.log_text.verticalScrollBar() + scrollbar.setValue(scrollbar.maximum()) + + def update_button_states(self): + """Update button enabled/disabled states""" + is_running = self.server_process is not None and self.server_process.poll() is None + self.start_btn.setEnabled(not is_running) + self.stop_btn.setEnabled(is_running) + + def get_current_settings(self): + """Get current settings as dictionary""" + return { + "binary_path": self.binary_path_edit.text(), + "model_path": self.model_path_edit.text(), + "host": self.host_edit.text(), + "port": self.port_spin.value(), + "context": self.context_spin.value(), + "ngl": self.ngl_spin.value(), + "threads": self.threads_spin.value(), + "batch": self.batch_spin.value(), + "additional_args": self.additional_args_edit.text(), + "auto_start": self.auto_start_checkbox.isChecked() + } + + def apply_settings(self, settings): + """Apply settings to UI""" + self.log_text.append("Applying settings to UI fields...\n") + + self.binary_path_edit.setText(settings.get("binary_path", "")) + self.model_path_edit.setText(settings.get("model_path", "")) + self.host_edit.setText(settings.get("host", "127.0.0.1")) + self.port_spin.setValue(settings.get("port", 8080)) + self.context_spin.setValue(settings.get("context", 2048)) + self.ngl_spin.setValue(settings.get("ngl", 33)) + self.threads_spin.setValue(settings.get("threads", 8)) + self.batch_spin.setValue(settings.get("batch", 512)) + self.additional_args_edit.setText(settings.get("additional_args", "")) + self.auto_start_checkbox.setChecked(settings.get("auto_start", False)) + + self.log_text.append("Settings applied to UI\n") + + def save_current_profile(self): + """Save current settings as a profile""" + from PyQt6.QtWidgets import QInputDialog + + current_name = self.profile_combo.currentText() + profile_name, ok = QInputDialog.getText( + self, + "Save Profile", + "Profile name:", + text=current_name if current_name else "" + ) + + if ok and profile_name: + # Get current settings from UI + settings = self.get_current_settings() + + # Debug: log what we're saving + self.log_text.append(f"Saving profile '{profile_name}' with settings:\n") + self.log_text.append(f" Binary: {settings['binary_path']}\n") + self.log_text.append(f" Model: {settings['model_path']}\n") + self.log_text.append(f" Port: {settings['port']}, Context: {settings['context']}, NGL: {settings['ngl']}\n") + + # Save to config + self.config["profiles"][profile_name] = settings + self.config["last_profile"] = profile_name + self.save_config() + + # Update profile list and select the saved profile + self.update_profile_list() + + # Temporarily block signals, set the text, then unblock and manually trigger load + self.profile_combo.blockSignals(True) + self.profile_combo.setCurrentText(profile_name) + self.profile_combo.blockSignals(False) + + # Show confirmation + self.log_text.append(f"Profile '{profile_name}' saved successfully\n") + + def on_profile_selected(self, profile_name): + """Called when profile selection changes in dropdown (auto-load)""" + if profile_name and profile_name in self.config["profiles"]: + self.log_text.append(f"Auto-loading profile '{profile_name}'...\n") + self.load_profile(profile_name) + else: + # Empty selection or invalid profile + pass + + def load_selected_profile(self): + """Load the currently selected profile (manual load via button)""" + profile_name = self.profile_combo.currentText() + if not profile_name: + QMessageBox.warning(self, "No Profile Selected", "Please select a profile to load") + return + + if profile_name not in self.config["profiles"]: + QMessageBox.warning(self, "Profile Not Found", f"Profile '{profile_name}' not found") + return + + self.log_text.append(f"Manually loading profile '{profile_name}'...\n") + self.load_profile(profile_name) + + def load_profile(self, profile_name): + """Load a profile""" + if profile_name and profile_name in self.config["profiles"]: + settings = self.config["profiles"][profile_name] + + # Debug: log what we're loading + self.log_text.append(f"Loading profile '{profile_name}' with settings:\n") + self.log_text.append(f" Binary: {settings.get('binary_path', 'N/A')}\n") + self.log_text.append(f" Model: {settings.get('model_path', 'N/A')}\n") + self.log_text.append(f" Port: {settings.get('port', 'N/A')}, Context: {settings.get('context', 'N/A')}, NGL: {settings.get('ngl', 'N/A')}\n") + + self.apply_settings(settings) + self.config["last_profile"] = profile_name + self.save_config() + self.log_text.append(f"Profile '{profile_name}' loaded successfully\n") + + def load_last_profile(self): + """Load the last used profile""" + last_profile = self.config.get("last_profile") + if last_profile and last_profile in self.config["profiles"]: + self.profile_combo.setCurrentText(last_profile) + + def delete_profile(self): + """Delete the current profile""" + profile_name = self.profile_combo.currentText() + if not profile_name: + return + + reply = QMessageBox.question( + self, + "Delete Profile", + f"Are you sure you want to delete profile '{profile_name}'?", + QMessageBox.StandardButton.Yes | QMessageBox.StandardButton.No + ) + + if reply == QMessageBox.StandardButton.Yes: + del self.config["profiles"][profile_name] + if self.config.get("last_profile") == profile_name: + self.config["last_profile"] = None + self.save_config() + self.update_profile_list() + + def update_profile_list(self): + """Update the profile combo box""" + current = self.profile_combo.currentText() + + # Block signals to prevent triggering load_profile during update + self.profile_combo.blockSignals(True) + self.profile_combo.clear() + self.profile_combo.addItems(sorted(self.config["profiles"].keys())) + if current in self.config["profiles"]: + self.profile_combo.setCurrentText(current) + self.profile_combo.blockSignals(False) + + def load_config(self): + """Load configuration from file""" + if self.config_file.exists(): + try: + with open(self.config_file, 'r') as f: + return json.load(f) + except Exception as e: + print(f"Error loading config: {e}") + + return {"profiles": {}, "last_profile": None} + + def save_config(self): + """Save configuration to file""" + try: + with open(self.config_file, 'w') as f: + json.dump(self.config, f, indent=2) + # Debug: log what we saved + if hasattr(self, 'log_text'): + profile_count = len(self.config.get("profiles", {})) + self.log_text.append(f"Config saved: {profile_count} profile(s)\n") + except Exception as e: + print(f"Error saving config: {e}") + if hasattr(self, 'log_text'): + self.log_text.append(f"Error saving config: {e}\n") + + def closeEvent(self, event): + """Handle window close event""" + if self.server_process is not None and self.server_process.poll() is None: + reply = QMessageBox.question( + self, + "Server Running", + "The server is still running. Do you want to:\n\n" + "Yes - Minimize to tray\n" + "No - Stop server and quit\n" + "Cancel - Do nothing", + QMessageBox.StandardButton.Yes | + QMessageBox.StandardButton.No | + QMessageBox.StandardButton.Cancel + ) + + if reply == QMessageBox.StandardButton.Yes: + event.ignore() + self.hide() + elif reply == QMessageBox.StandardButton.No: + # Force kill when closing (no need to wait gracefully) + if self.output_reader: + self.output_reader.stop() + self.server_process.kill() + if self.output_reader: + self.output_reader.wait(1000) + event.accept() + else: + event.ignore() + else: + event.accept() + + def quit_application(self): + """Quit the application""" + if self.server_process is not None and self.server_process.poll() is None: + # Force kill when quitting (no need to wait gracefully) + if self.output_reader: + self.output_reader.stop() + self.server_process.kill() + if self.output_reader: + self.output_reader.wait(1000) + QApplication.quit() + + +def main(): + app = QApplication(sys.argv) + app.setApplicationName("llama.cpp Server Manager") + app.setQuitOnLastWindowClosed(False) + + window = LlamaServerGUI() + window.show() + + sys.exit(app.exec()) + + +if __name__ == "__main__": + main()