From 983a08600deeaa54c0c586db5273a90ffe483c49 Mon Sep 17 00:00:00 2001 From: jeirmeister Date: Wed, 30 Oct 2024 16:53:48 -0700 Subject: [PATCH] fix(system): recover from failed boot due to filesystem issues Recovery process required multiple steps: - Disabled failed swap mount from SteamOS partition in hardware-configuration.nix - Established network connectivity in emergency mode for Jovian packages - Separated boot loader config from configuration.nix into hardware-configuration.nix - Remounted root and /nix/store with write permissions - Rebuilt system with working network connection Added documentation and recovery tools: - Network recovery procedures and scripts - Filesystem troubleshooting guides - Common issues and solutions for Steam Deck NixOS - Prevention strategies for future issues The system now boots successfully into GUI environment. Documentation and scripts are available under documentation/ to assist with similar recovery scenarios in the future. --- documentation/recovery/README.md | 38 +++++ documentation/support/scripts/README.md | 19 +++ .../support/scripts/network-recovery.sh | 146 ++++++++++++++++++ .../support/troubleshooting/common-issues.md | 51 ++++++ .../support/troubleshooting/jovian-nixos.md | 29 ++++ hosts/steamdeck/configuration.nix | 51 +----- hosts/steamdeck/hardware-configuration.nix | 12 +- 7 files changed, 291 insertions(+), 55 deletions(-) create mode 100644 documentation/recovery/README.md create mode 100644 documentation/support/scripts/README.md create mode 100755 documentation/support/scripts/network-recovery.sh create mode 100644 documentation/support/troubleshooting/common-issues.md create mode 100644 documentation/support/troubleshooting/jovian-nixos.md diff --git a/documentation/recovery/README.md b/documentation/recovery/README.md new file mode 100644 index 0000000..a91cf70 --- /dev/null +++ b/documentation/recovery/README.md @@ -0,0 +1,38 @@ +# NixOS Steam Deck Recovery Procedures + +This documentation covers recovery procedures for NixOS on Steam Deck. It was created based on real recovery scenarios and provides practical solutions for common issues. + +## Quick Reference + +1. Boot Issues + - [Boot Recovery Guide](./boot-recovery.md) + - Common root causes: filesystem mounts, hardware configuration + +2. Network Issues + - [Network Recovery Guide](./network-recovery.md) + - Essential for rebuilding with Jovian packages + +3. Filesystem Issues + - [Filesystem Troubleshooting](./filesystem-issues.md) + - Handling mount points and permissions + +## Emergency Recovery Steps + +1. Boot into emergency mode +2. Establish network connectivity +3. Fix configuration issues +4. Rebuild system +5. Verify and reboot + +## Prevention Strategies + +1. Maintain separate configurations: + - Main configuration + - Minimal fallback configuration + - Hardware-specific configuration + +2. Regular backups of working configurations + +3. Testing changes in VM before applying + +4. Maintaining recovery tools and scripts \ No newline at end of file diff --git a/documentation/support/scripts/README.md b/documentation/support/scripts/README.md new file mode 100644 index 0000000..c2fb956 --- /dev/null +++ b/documentation/support/scripts/README.md @@ -0,0 +1,19 @@ +# Recovery Scripts + +This directory contains scripts for automating common recovery tasks. + +## Available Scripts + +1. `network-recovery.sh` + - Automates network setup in emergency environment + - Handles both ethernet and wifi configurations + - Usage: `sudo ./network-recovery.sh` + +## Script Development Guidelines + +1. All scripts should: + - Include clear documentation + - Handle errors gracefully + - Provide status feedback + - Support both automatic and interactive modes + - Create backups before making changes \ No newline at end of file diff --git a/documentation/support/scripts/network-recovery.sh b/documentation/support/scripts/network-recovery.sh new file mode 100755 index 0000000..4e43dd0 --- /dev/null +++ b/documentation/support/scripts/network-recovery.sh @@ -0,0 +1,146 @@ +#!/usr/bin/env bash + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' # No Color + +# Function to print status messages +print_status() { + echo -e "${GREEN}[*]${NC} $1" +} + +print_error() { + echo -e "${RED}[!]${NC} $1" +} + +print_warning() { + echo -e "${YELLOW}[!]${NC} $1" +} + +# Find ethernet interface +find_interface() { + local interface=$(ip link show | grep enp | cut -d: -f2 | tr -d ' ' | head -n1) + if [ -z "$interface" ]; then + print_error "No ethernet interface found" + exit 1 + } + echo "$interface" +} + +# Prompt for network configuration +get_network_config() { + local default_interface=$(find_interface) + + echo "Current network interfaces:" + ip link show + + read -p "Enter interface name [$default_interface]: " interface + interface=${interface:-$default_interface} + + read -p "Enter static IP address (e.g., 10.0.0.68): " static_ip + read -p "Enter gateway IP address (e.g., 10.0.0.1): " gateway_ip + + # Validate IP addresses + if [[ ! $static_ip =~ ^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$ ]]; then + print_error "Invalid static IP format" + exit 1 + } + + if [[ ! $gateway_ip =~ ^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$ ]]; then + print_error "Invalid gateway IP format" + exit 1 + } +} + +# Configure network +setup_network() { + print_status "Setting up network interface $interface..." + + # Clear any existing IP addresses + ip addr flush dev $interface + + # Bring up interface + ip link set $interface up + if [ $? -ne 0 ]; then + print_error "Failed to bring up interface" + exit 1 + } + + # Add IP address + ip addr add $static_ip/24 dev $interface + if [ $? -ne 0 ]; then + print_error "Failed to set IP address" + exit 1 + } + + # Remove any existing default routes + ip route del default 2>/dev/null + + # Add default route + ip route add default via $gateway_ip dev $interface + if [ $? -ne 0 ]; then + print_error "Failed to add default route" + exit 1 + } + + # Configure DNS + print_status "Configuring DNS..." + echo "nameserver $gateway_ip" > /etc/resolv.conf + echo "nameserver 8.8.8.8" >> /etc/resolv.conf + echo "nameserver 8.8.4.4" >> /etc/resolv.conf + + # Disable IPv6 + print_status "Disabling IPv6..." + sysctl -w net.ipv6.conf.all.disable_ipv6=1 + sysctl -w net.ipv6.conf.default.disable_ipv6=1 + sysctl -w net.ipv6.conf.lo.disable_ipv6=1 +} + +# Test connectivity +test_connectivity() { + print_status "Testing connectivity..." + + # Test local network + print_status "Pinging gateway..." + if ! ping -c 1 $gateway_ip >/dev/null 2>&1; then + print_error "Cannot ping gateway" + return 1 + } + + # Test DNS resolution + print_status "Testing DNS resolution..." + if ! ping -c 1 github.com >/dev/null 2>&1; then + print_warning "DNS resolution failed" + return 1 + } + + print_status "Network setup complete and functional!" + return 0 +} + +# Main execution +main() { + if [[ $EUID -ne 0 ]]; then + print_error "This script must be run as root" + exit 1 + } + + print_status "NixOS Network Recovery Script" + print_status "===============================" + + get_network_config + setup_network + test_connectivity + + if [ $? -eq 0 ]; then + print_status "You should now be able to run nixos-rebuild" + else + print_error "Network setup completed but connectivity test failed" + print_warning "Check your network settings and try again" + fi +} + +# Run main function +main "$@" diff --git a/documentation/support/troubleshooting/common-issues.md b/documentation/support/troubleshooting/common-issues.md new file mode 100644 index 0000000..084112c --- /dev/null +++ b/documentation/support/troubleshooting/common-issues.md @@ -0,0 +1,51 @@ +# Common Issues and Solutions + +## Network Configuration Issues + +### Symptoms +- Unable to fetch packages +- No internet connectivity in emergency mode + +### Solution +1. Use network-recovery.sh script +2. Manual network configuration: + ```bash + ip link set INTERFACE up + ip addr add IP_ADDRESS/24 dev INTERFACE + ip route add default via GATEWAY + echo "nameserver 8.8.8.8" > /etc/resolv.conf + ``` + +## Filesystem Mount Issues + +### Symptoms +- Read-only filesystem errors +- Unable to modify configuration +- Failed mounts during boot + +### Solution +1. Identify problematic mounts: + ```bash + mount | grep ro + lsblk -f + ``` +2. Remount filesystems: + ```bash + mount -o remount,rw / + mount -o remount,rw /nix/store + ``` +3. Check/modify hardware-configuration.nix + +## DBus Issues + +### Symptoms +- Failed to connect to bus +- Service startup failures + +### Solution +1. Setup minimal DBus environment: + ```bash + mkdir -p /run/dbus + mount -t tmpfs tmpfs /run + dbus-daemon --system --fork + ``` \ No newline at end of file diff --git a/documentation/support/troubleshooting/jovian-nixos.md b/documentation/support/troubleshooting/jovian-nixos.md new file mode 100644 index 0000000..80c61f4 --- /dev/null +++ b/documentation/support/troubleshooting/jovian-nixos.md @@ -0,0 +1,29 @@ +# Jovian NixOS Specific Issues + +## Package Fetching Issues + +### Symptoms +- Unable to fetch Jovian packages +- Build failures related to Jovian components + +### Solution +1. Ensure network connectivity +2. Verify Jovian configuration: + ```nix + jovian = { + hardware.has.amd.gpu = true; + devices.steamdeck.enable = true; + }; + ``` +3. Check Jovian cache availability + +## Hardware Detection Issues + +### Symptoms +- Missing Steam Deck specific features +- Hardware not properly recognized + +### Solution +1. Verify hardware configuration +2. Check kernel modules +3. Review Jovian hardware settings \ No newline at end of file diff --git a/hosts/steamdeck/configuration.nix b/hosts/steamdeck/configuration.nix index 3bb1684..626ae8d 100644 --- a/hosts/steamdeck/configuration.nix +++ b/hosts/steamdeck/configuration.nix @@ -20,10 +20,10 @@ in { imports = [ ./hardware-configuration.nix - (jovianNixos + "/modules") - ]; + (jovianNixos + "/modules") + ]; - nixpkgs = { + nixpkgs = { overlays = [ (import (jovianNixos + "/overlay.nix")) ]; @@ -64,49 +64,6 @@ in workarounds.ignoreMissingKernelModules = true; }; - # Hardware Configuration - hardware = { - enableRedistributableFirmware = lib.mkForce true; - firmware = lib.mkForce [ pkgs.linux-firmware ]; - - graphics = { - enable = true; - enable32Bit = true; - extraPackages = with pkgs; [ - libva - vaapiVdpau # required for Immersed-VR - v4l-utils - ]; - }; - }; - - # Boot Configuration - boot = { - loader = { - systemd-boot.enable = true; - efi.canTouchEfiVariables = true; - }; - - # Combined kernel modules configuration - initrd.kernelModules = [ - "amdgpu" - # "v4l2loopback" - ]; - - kernelModules = [ - "amdgpu" - "v4l2loopback" - "snd-aloop" - ]; - # extraModulePackages = [ pkgs.linuxPackages.v4l2loopback ]; - extraModulePackages = with config.boot.kernelPackages; [ - v4l2loopback - ]; - - extraModprobeConfig = '' - options v4l2loopback exclusive_caps=1 video_nr=0 card_label="Immersed Virtual Camera" - ''; - }; # System Services systemd.services = { @@ -261,4 +218,4 @@ in }; system.stateVersion = "24.05"; -} \ No newline at end of file +} diff --git a/hosts/steamdeck/hardware-configuration.nix b/hosts/steamdeck/hardware-configuration.nix index c3d922e..dce37ac 100644 --- a/hosts/steamdeck/hardware-configuration.nix +++ b/hosts/steamdeck/hardware-configuration.nix @@ -8,7 +8,7 @@ [ (modulesPath + "/installer/scan/not-detected.nix") ]; - boot.initrd.availableKernelModules = [ "nvme" "xhci_pci" "usbhid" "usb_storage" "sd_mod" "sdhci_pci" ]; + boot.initrd.availableKernelModules = [ "nvme" "xhci_pci" "usb_storage" "usbhid" "sd_mod" "sdhci_pci" ]; boot.initrd.kernelModules = [ ]; boot.kernelModules = [ "kvm-amd" ]; boot.extraModulePackages = [ ]; @@ -23,19 +23,15 @@ fsType = "vfat"; options = [ "fmask=0077" "dmask=0077" ]; }; - # Mount SteamOS games from NVME - fileSystems."/mnt/steamos" = - { device = "/dev/disk/by-uuid/5261b393-9b3e-4334-9ddf-4a1eb9865905"; - fsType = "ext4"; - }; - swapDevices = [ ]; #TODO: consider releasing the zram swapfile and utilizing the one already on steamos + + swapDevices = [ ]; # Enables DHCP on each ethernet and wireless interface. In case of scripted networking # (the default) this is the recommended approach. When using systemd-networkd it's # still possible to use this option, but it's recommended to use it in conjunction # with explicit per-interface declarations with `networking.interfaces..useDHCP`. networking.useDHCP = lib.mkDefault true; - # networking.interfaces.enp4s0f3u1u4u4.useDHCP = lib.mkDefault true; + # networking.interfaces.enp4s0f3u1u4u3.useDHCP = lib.mkDefault true; # networking.interfaces.wlo1.useDHCP = lib.mkDefault true; nixpkgs.hostPlatform = lib.mkDefault "x86_64-linux";