This is useful on systems with ECC RAM.
You'll want to start by rebuilding the kernel.
Code:
root@machine:/# cd /usr/src/linux
root@machine:/usr/src/linux# make mrproper
root@machine:/usr/src/linux# cp /boot/config-generic-2.6.37.6 .config
Make the following changes to the kernel configuration:
Code:
root@machine:/usr/src/linux# make menuconfig
Device Drivers
|_---> EDAC reporting
|_---> (M) AMD64
|_---> (*) Sysfs HW Err Injection Facility
|_---> (M) INTEL *
# Select all modules for generic build
The resulting .config should be similar to this:
Code:
root@machine:/usr/src/linux# diff /boot/config-generic-2.6.37.6 .config
4c4
< # Sat Apr 9 12:54:40 2011
---
> # Fri Jan 27 22:00:03 2012
4183c4183,4196
< # CONFIG_EDAC_MM_EDAC is not set
---
> CONFIG_EDAC_MM_EDAC=m
> CONFIG_EDAC_MCE=y
> CONFIG_EDAC_AMD64=m
> CONFIG_EDAC_AMD64_ERROR_INJECTION=y
> CONFIG_EDAC_E752X=m
> CONFIG_EDAC_I82975X=m
> CONFIG_EDAC_I3000=m
> CONFIG_EDAC_I3200=m
> CONFIG_EDAC_X38=m
> CONFIG_EDAC_I5400=m
> CONFIG_EDAC_I7CORE=m
> CONFIG_EDAC_I5000=m
> CONFIG_EDAC_I5100=m
> CONFIG_EDAC_I7300=m
Continue to install the kernel as normal. If you need help, follow the instructions here:
http://blog.tpa.me.uk/slackware-kernel-compile-guide/
For modular kernels, add the appropriate modprobe commands into /etc/rc.d/rc.modules.
When you boot up, you will see a message like this:
Code:
[ 5.965514] EDAC MC: Ver: 2.1.0 Jan 22 2012
[ 5.965721] EDAC amd64_edac: Ver: 3.3.0 Jan 22 2012
[ 5.965817] EDAC amd64: ECC is enabled by BIOS.
[ 5.965991] EDAC MC: F10h CPU detected
[ 5.965998] EDAC amd64: using x4 syndromes.
[ 5.966084] EDAC MC: DCT0 chip selects:
[ 5.966085] EDAC MC: 0: 2048MB 1: 2048MB
[ 5.966086] EDAC MC: 2: 2048MB 3: 2048MB
[ 5.966088] EDAC MC: 4: 0MB 5: 0MB
[ 5.966089] EDAC MC: 6: 0MB 7: 0MB
[ 5.966218] EDAC MC0: Giving out device to 'amd64_edac' 'Family 10h': DEV 0000:00:18.2
[ 5.966395] EDAC PCI0: Giving out device to module 'amd64_edac' controller 'EDAC PCI controller': DEV '0000:00:18.2' (POLLED)
You can also peek into /sys. For example:
Code:
root@machine:~# cat /sys/devices/system/edac/mc/mc0/mc_name
Family 10h
root@machine:~# cat /sys/devices/system/edac/mc/mc0/sdram_scrub_rate
390720
root@machine:~# cat /sys/devices/system/edac/mc/mc0/size_mb
16384
root@machine:~# cat /sys/devices/system/edac/mc/mc0/csrow0/mem_type
Unbuffered-DDR3
root@machine:~# cat /sys/devices/system/edac/mc/mc0/csrow0/edac_mode
S4ECD4ED
root@machine:~# cat /sys/devices/system/edac/mc/mc0/csrow0/size_mb
4096
EDAC also provides pci bus error reporting:
Code:
echo “1” > /sys/devices/system/edac/pci/check_pci_errors
Code:
root@machine:~# ls -al /sys/devices/system/edac/pci
total 0
drwxr-xr-x 3 root root 0 Jan 27 22:22 ./
drwxr-xr-x 4 root root 0 Jan 27 22:22 ../
-rw-r--r-- 1 root root 4096 Jan 27 22:33 check_pci_errors
-rw-r--r-- 1 root root 4096 Jan 27 22:33 edac_pci_log_npe
-rw-r--r-- 1 root root 4096 Jan 27 22:33 edac_pci_log_pe
-rw-r--r-- 1 root root 4096 Jan 27 22:33 edac_pci_panic_on_pe
drwxr-xr-x 2 root root 0 Jan 27 22:33 pci0/
-r--r--r-- 1 root root 4096 Jan 27 22:33 pci_nonparity_count
-r--r--r-- 1 root root 4096 Jan 27 22:33 pci_parity_count
User-space tools are also available (you must compile and install these separately):
Code:
user@machine:~$ edac-util -v -s
edac-util: EDAC drivers are loaded. 1 MC detected:
mc0:Family 10h
user@machine:~$ edac-util -v -r
mc0: 0 Uncorrected Errors with no DIMM info
mc0: 0 Corrected Errors with no DIMM info
mc0: csrow0: 0 Uncorrected Errors
mc0: csrow0: ch0: 0 Corrected Errors
mc0: csrow0: ch1: 0 Corrected Errors
mc0: csrow1: 0 Uncorrected Errors
mc0: csrow1: ch0: 0 Corrected Errors
mc0: csrow1: ch1: 0 Corrected Errors
mc0: csrow2: 0 Uncorrected Errors
mc0: csrow2: ch0: 0 Corrected Errors
mc0: csrow2: ch1: 0 Corrected Errors
mc0: csrow3: 0 Uncorrected Errors
mc0: csrow3: ch0: 0 Corrected Errors
mc0: csrow3: ch1: 0 Corrected Errors
Most users will want the system halt on uncorrectable errors. Since the generic Slackware kernel has MCE configured, this should already happen. The following command is only useful on systems without MCE.
Code:
echo "1" > /sys/module/edac_core/parameters/edac_mc_panic_on_ue
For more information on EDAC and ECC, refer to the following websites:
http://git.kernel.org/?p=linux/kerne...ation/edac.txt
http://bluesmoke.sourceforge.net
http://buttersideup.com/edacwiki/
http://cr.yp.to/hardware/ecc.html
http://www.cs.nmsu.edu/~pfeiffer/cla...notes/ecc.html