root@pve:~# nano /etc/default/grub # If you change this file, run 'update-grub' afterwards to update # /boot/grub/grub.cfg. # For full documentation of the options in this file, see: # info -f grub -n 'Simple configuration'
root@pve:~# nano /etc/modules # /etc/modules: kernel modules to load at boot time. # # This file contains the names of kernel modules that should be loaded # at boot time, one per line. Lines beginning with "#" are ignored. # Parameters can be specified after the module name.
root@pve:~# dmesg | grep -e DMAR -e IOMMU [ 0.000000] Warning: PCIe ACS overrides enabled; This may allow non-IOMMU protected peer-to-peer DMA [ 0.010536] ACPI: DMAR 0x000000007A22BC50 0000DC (v01 ALASKA A M I 00000001 INTL 20091013) [ 0.010562] ACPI: Reserving DMAR table memory at [mem 0x7a22bc50-0x7a22bd2b] [ 0.272257] DMAR: IOMMU enabled [ 0.727171] DMAR: Host address width 46 [ 0.727173] DMAR: DRHD base: 0x000000fbffd000 flags: 0x0 [ 0.727183] DMAR: dmar0: reg_base_addr fbffd000 ver 1:0 cap 8d2008c10ef0466 ecap f0205b [ 0.727186] DMAR: DRHD base: 0x000000fbffc000 flags: 0x1 [ 0.727191] DMAR: dmar1: reg_base_addr fbffc000 ver 1:0 cap 8d2078c106f0466 ecap f020df [ 0.727193] DMAR: RMRR base: 0x0000007b3ee000 end: 0x0000007b3fefff [ 0.727198] DMAR: ATSR flags: 0x0 [ 0.727200] DMAR: RHSA base: 0x000000fbffc000 proximity domain: 0x0 [ 0.727203] DMAR-IR: IOAPIC id 1 under DRHD base 0xfbffc000 IOMMU 1 [ 0.727205] DMAR-IR: IOAPIC id 2 under DRHD base 0xfbffc000 IOMMU 1 [ 0.727206] DMAR-IR: HPET id 0 under DRHD base 0xfbffc000 [ 0.727207] DMAR-IR: Queued invalidation will be enabled to support x2apic and Intr-remapping. [ 0.727848] DMAR-IR: Enabled IRQ remapping in x2apic mode [ 1.059916] DMAR: No SATC found [ 1.059918] DMAR: IOMMU feature sc_support inconsistent [ 1.059919] DMAR: IOMMU feature dev_iotlb_support inconsistent [ 1.059920] DMAR: dmar0: Using Queued invalidation [ 1.059924] DMAR: dmar1: Using Queued invalidation [ 1.063715] DMAR: Intel(R) Virtualization Technology for Directed I/O
出现上述信息就算成功(总之不能有什么表示否定的词语)。
需要注意的是,remapping 这个功能需要主板 Bios 支持,成功就是这段:
1 2
[ 0.727206] DMAR-IR: HPET id 0 under DRHD base 0xfbffc000 [ 0.727207] DMAR-IR: Queued invalidation will be enabled to support x2apic and Intr-remapping.
root@pve:~# nano /etc/modprobe.d/pve-blacklist.conf # This file contains a list of modules which are not supported by Proxmox VE # nvidiafb see bugreport https://bugzilla.proxmox.com/show_bug.cgi?id=701 blacklist nvidiafb blacklist nvidia blacklist nouveau
同时笔者发现一个有意思的项目:exo: Run your own AI cluster at home with everyday devices.,大家都知道 Rocm 在 windows 支持不太好(也就是 24 年 6 月份才开始支持的),而 CUDA 则是 windows 也支持的很好,所以笔者决定把 7900xtx 放到 PVE 上,3090 放日常电脑上,这样说不定就能两两联合跑 qwen2.5:72b 了,假如此法可行,还能把室友的 4070 拉进来一起组算力集群。