diff --git a/src/coreboot/default.nix b/src/coreboot/default.nix index 35adbc6..c837b51 100644 --- a/src/coreboot/default.nix +++ b/src/coreboot/default.nix @@ -108,6 +108,7 @@ stdenv.mkDerivation { ./patches/0003-kgpe-d16-bootblock.c-use-RTC_BOOT_BYTE-even-when-CON.patch ./patches/0004-superio-winbond-w83667hg-a-superio.c-do-not-use-get_.patch ./patches/0001-romstage-print-out-dimm-voltages.patch + ./patches/0002-kgpe-d16-do-not-enable-hw-monitor-until-kernel-boots.patch # am1i patches ./patches/0021-am1i-omit-amdfw.rom-completely-it-has-broken-address.patch diff --git a/src/coreboot/patches/0002-kgpe-d16-do-not-enable-hw-monitor-until-kernel-boots.patch b/src/coreboot/patches/0002-kgpe-d16-do-not-enable-hw-monitor-until-kernel-boots.patch new file mode 100644 index 0000000..677342f --- /dev/null +++ b/src/coreboot/patches/0002-kgpe-d16-do-not-enable-hw-monitor-until-kernel-boots.patch @@ -0,0 +1,53 @@ +From 9ab2d370ea17e86301d8de4134f4c0abf82b211e Mon Sep 17 00:00:00 2001 +From: Your Name +Date: Mon, 12 Jun 2023 02:29:45 -0700 +Subject: [PATCH 2/4] kgpe-d16: do not enable hw monitor until kernel boots + +The hardware monitor is one of the blocks within the w83667hg-a chip +(there are many others). It is basically a bunch of ADCs (analog to +digital converters) hooked up to voltage, current, and temperature +sensors in various locations on the motherboard. + +This block has the ability to generate several different interrupts +(SMI#, OVT#, etc) in response to thermal conditions. It appears to +sometimes (about 10% of boot-ups, depending on temperature) spew +erroneous alarm interrupts the instant you enable it, when doing so +from within coreboot. This causes the w83667hg-a chip and the +entire system to hang, and the watchdog cannot recover from this +state because it is part of the w83667hg-a chip too. + +An even bigger problem is that the hardware monitor is initialized +*before* the fans are brought up to full speed. So if the CPU is +above the critical temperature it will remain there because the fans +are in their default low-speed boot state. The chip just keeps +getting hotter and hotter -- not enough to damage itself, but hot +enough that it won't come down to an acceptable temperature with +simple reboots and power-cycles; you have to leave the system off +for a while. Since the fans aren't running while the system is off +this takes quite a while (several minutes). It's a very fussy and +fidgety process, and not something you want to walk a remote-hands +guy at the datacenter through over the phone. + +To avoid this whole mess, let's simply not assign PNP resources to +the hardware monitor from coreboot. Linux doesn't need these +anyways; it communicates with the hardware using I2C. +--- + src/mainboard/asus/kgpe-d16/devicetree.cb | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/src/mainboard/asus/kgpe-d16/devicetree.cb b/src/mainboard/asus/kgpe-d16/devicetree.cb +index 3be328d1725..5725fce4a09 100644 +--- a/src/mainboard/asus/kgpe-d16/devicetree.cb ++++ b/src/mainboard/asus/kgpe-d16/devicetree.cb +@@ -206,7 +206,7 @@ chip northbridge/amd/amdfam10/root_complex # Root complex + device pnp 2e.209 off end # GPIO4 + device pnp 2e.309 off end # GPIO5 + device pnp 2e.a on end # ACPI +- device pnp 2e.b on # HW Monitor ++ device pnp 2e.b off # HW Monitor + io 0x60 = 0x290 + # IRQ purposefully not assigned to prevent lockups + end +-- +2.39.1 +