[PATCH] Add IOMMU support for Intel VT-d and AMD-Vi

classic Classic list List threaded Threaded
9 messages Options
Reply | Threaded
Open this post in threaded view
|

[PATCH] Add IOMMU support for Intel VT-d and AMD-Vi

Jordan Hargrave
[PATCH] Add IOMMU support for Intel VT-d and AMD Vi

This hooks each pci device and overrides bus_dmamap_xxx to issue
remap of DMA requests to virtual DMA space.  It protects devices
from issuing I/O requests to memory in the system that is outside
the requested DMA space.
---
 sys/arch/amd64/conf/GENERIC          |    1 +
 sys/arch/amd64/conf/RAMDISK          |    1 +
 sys/arch/amd64/conf/RAMDISK_CD       |    1 +
 sys/arch/amd64/include/pci_machdep.h |    3 +-
 sys/arch/amd64/pci/pci_machdep.c     |   15 +-
 sys/dev/acpi/acpi.c                  |    5 +
 sys/dev/acpi/acpidmar.c              | 2988 ++++++++++++++++++++++++++
 sys/dev/acpi/acpidmar.h              |  534 +++++
 sys/dev/acpi/acpireg.h               |   21 +-
 sys/dev/acpi/amd_iommu.h             |  358 +++
 sys/dev/acpi/files.acpi              |    5 +
 sys/dev/pci/pci.c                    |   28 +
 sys/dev/pci/pcivar.h                 |    2 +
 13 files changed, 3959 insertions(+), 3 deletions(-)
 create mode 100644 sys/dev/acpi/acpidmar.c
 create mode 100644 sys/dev/acpi/acpidmar.h
 create mode 100644 sys/dev/acpi/amd_iommu.h

diff --git a/sys/arch/amd64/conf/GENERIC b/sys/arch/amd64/conf/GENERIC
index 2c49f91a1..1eda12bc9 100644
--- a/sys/arch/amd64/conf/GENERIC
+++ b/sys/arch/amd64/conf/GENERIC
@@ -45,6 +45,7 @@ acpibtn* at acpi?
 acpicpu* at acpi?
 acpicmos* at acpi?
 acpidock* at acpi?
+acpidmar0 at acpi?
 acpiec* at acpi?
 acpipci* at acpi?
 acpiprt* at acpi?
diff --git a/sys/arch/amd64/conf/RAMDISK b/sys/arch/amd64/conf/RAMDISK
index 10148add1..7ab48f32e 100644
--- a/sys/arch/amd64/conf/RAMDISK
+++ b/sys/arch/amd64/conf/RAMDISK
@@ -34,6 +34,7 @@ acpipci* at acpi?
 acpiprt* at acpi?
 acpimadt0 at acpi?
 #acpitz* at acpi?
+acpidmar* at acpi? disable
 
 mpbios0 at bios0
 
diff --git a/sys/arch/amd64/conf/RAMDISK_CD b/sys/arch/amd64/conf/RAMDISK_CD
index 91022751e..82a24e210 100644
--- a/sys/arch/amd64/conf/RAMDISK_CD
+++ b/sys/arch/amd64/conf/RAMDISK_CD
@@ -48,6 +48,7 @@ sdhc* at acpi?
 acpihve* at acpi?
 chvgpio*        at acpi?
 glkgpio* at acpi?
+acpidmar* at acpi? disable
 
 mpbios0 at bios0
 
diff --git a/sys/arch/amd64/include/pci_machdep.h b/sys/arch/amd64/include/pci_machdep.h
index bc295cc22..c725bdc73 100644
--- a/sys/arch/amd64/include/pci_machdep.h
+++ b/sys/arch/amd64/include/pci_machdep.h
@@ -91,7 +91,8 @@ void *pci_intr_establish_cpu(pci_chipset_tag_t, pci_intr_handle_t,
     int, struct cpu_info *,
     int (*)(void *), void *, const char *);
 void pci_intr_disestablish(pci_chipset_tag_t, void *);
-#define pci_probe_device_hook(c, a) (0)
+int pci_probe_device_hook(pci_chipset_tag_t,
+    struct pci_attach_args *);
 
 void pci_dev_postattach(struct device *, struct pci_attach_args *);
 
diff --git a/sys/arch/amd64/pci/pci_machdep.c b/sys/arch/amd64/pci/pci_machdep.c
index cf4e835de..b700946a4 100644
--- a/sys/arch/amd64/pci/pci_machdep.c
+++ b/sys/arch/amd64/pci/pci_machdep.c
@@ -89,6 +89,11 @@
 #include <machine/mpbiosvar.h>
 #endif
 
+#include "acpi.h"
+#if NACPI > 0
+#include <dev/acpi/acpidmar.h>
+#endif
+
 /*
  * Memory Mapped Configuration space access.
  *
@@ -797,7 +802,15 @@ pci_init_extents(void)
  }
 }
 
-#include "acpi.h"
+int
+pci_probe_device_hook(pci_chipset_tag_t pc, struct pci_attach_args *pa)
+{
+#if NACPI > 0
+ acpidmar_pci_hook(pc, pa);
+#endif
+ return 0;
+}
+
 #if NACPI > 0
 void acpi_pci_match(struct device *, struct pci_attach_args *);
 pcireg_t acpi_pci_min_powerstate(pci_chipset_tag_t, pcitag_t);
diff --git a/sys/dev/acpi/acpi.c b/sys/dev/acpi/acpi.c
index a6239198e..ea11483ad 100644
--- a/sys/dev/acpi/acpi.c
+++ b/sys/dev/acpi/acpi.c
@@ -49,6 +49,7 @@
 #include <dev/acpi/amltypes.h>
 #include <dev/acpi/acpidev.h>
 #include <dev/acpi/dsdt.h>
+#include <dev/acpi/acpidmar.h>
 #include <dev/wscons/wsdisplayvar.h>
 
 #include <dev/pci/pcidevs.h>
@@ -2448,6 +2449,8 @@ acpi_sleep_pm(struct acpi_softc *sc, int state)
     sc->sc_fadt->pm2_cnt_blk && sc->sc_fadt->pm2_cnt_len)
  acpi_write_pmreg(sc, ACPIREG_PM2_CNT, 0, ACPI_PM2_ARB_DIS);
 
+ acpidmar_sw(DVACT_SUSPEND);
+
  /* Write SLP_TYPx values */
  rega = acpi_read_pmreg(sc, ACPIREG_PM1A_CNT, 0);
  regb = acpi_read_pmreg(sc, ACPIREG_PM1B_CNT, 0);
@@ -2483,6 +2486,8 @@ acpi_resume_pm(struct acpi_softc *sc, int fromstate)
 {
  uint16_t rega, regb, en;
 
+ acpidmar_sw(DVACT_RESUME);
+
  /* Write SLP_TYPx values */
  rega = acpi_read_pmreg(sc, ACPIREG_PM1A_CNT, 0);
  regb = acpi_read_pmreg(sc, ACPIREG_PM1B_CNT, 0);
diff --git a/sys/dev/acpi/acpidmar.c b/sys/dev/acpi/acpidmar.c
new file mode 100644
index 000000000..48506e1b1
--- /dev/null
+++ b/sys/dev/acpi/acpidmar.c
@@ -0,0 +1,2988 @@
+/*
+ * Copyright (c) 2015 Jordan Hargrave <[hidden email]>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/device.h>
+#include <sys/malloc.h>
+#include <sys/queue.h>
+#include <sys/types.h>
+#include <sys/mbuf.h>
+#include <sys/proc.h>
+
+#include <uvm/uvm_extern.h>
+
+#include <machine/apicvar.h>
+#include <machine/biosvar.h>
+#include <machine/cpuvar.h>
+#include <machine/bus.h>
+
+#include <dev/acpi/acpireg.h>
+#include <dev/acpi/acpivar.h>
+#include <dev/acpi/acpidev.h>
+#include <dev/acpi/amltypes.h>
+#include <dev/acpi/dsdt.h>
+
+#include <uvm/uvm_extern.h>
+
+#include <machine/i8259.h>
+#include <machine/i82093reg.h>
+#include <machine/i82093var.h>
+#include <machine/i82489reg.h>
+#include <machine/i82489var.h>
+
+#include <machine/mpbiosvar.h>
+
+#include <dev/pci/pcireg.h>
+#include <dev/pci/pcivar.h>
+#include <dev/pci/pcidevs.h>
+#include <dev/pci/ppbreg.h>
+
+#include "ioapic.h"
+
+#include "acpidmar.h"
+#include "amd_iommu.h"
+
+#define dprintf(x...)
+
+#ifdef DDB
+int acpidmar_ddb = 0;
+#endif
+
+int intel_iommu_gfx_mapped = 0;
+int force_cm = 1;
+
+void showahci(void *);
+
+/* Page Table Entry per domain */
+struct iommu_softc;
+
+static inline int
+mksid(int b, int d, int f)
+{
+ return (b << 8) + (d << 3) + f;
+}
+
+static inline int
+sid_devfn(int sid)
+{
+ return sid & 0xff;
+}
+
+static inline int
+sid_bus(int sid)
+{
+ return (sid >> 8) & 0xff;
+}
+
+static inline int
+sid_dev(int sid)
+{
+ return (sid >> 3) & 0x1f;
+}
+
+static inline int
+sid_fun(int sid)
+{
+ return (sid >> 0) & 0x7;
+}
+
+/* Page Table Entry per domain */
+static struct ivhd_dte hwdte[65536] __aligned(PAGE_SIZE);
+
+/* Alias mapping */
+#define SID_INVALID 0x80000000L
+static uint32_t sid_flag[65536];
+
+struct domain_dev {
+ int sid;
+ int sec;
+ int sub;
+ TAILQ_ENTRY(domain_dev) link;
+};
+
+struct domain {
+ struct iommu_softc *iommu;
+ int did;
+ int gaw;
+ struct pte_entry *pte;
+ paddr_t ptep;
+ struct bus_dma_tag dmat;
+ int flag;
+
+ struct mutex            exlck;
+ char exname[32];
+ struct extent *iovamap;
+ TAILQ_HEAD(,domain_dev) devices;
+ TAILQ_ENTRY(domain) link;
+};
+
+#define DOM_DEBUG 0x1
+#define DOM_NOMAP 0x2
+
+struct dmar_devlist {
+ int type;
+ int bus;
+ int ndp;
+ struct acpidmar_devpath *dp;
+ TAILQ_ENTRY(dmar_devlist) link;
+};
+
+TAILQ_HEAD(devlist_head, dmar_devlist);
+
+struct ivhd_devlist {
+ int start_id;
+ int end_id;
+ int cfg;
+ TAILQ_ENTRY(ivhd_devlist) link;
+};
+
+struct rmrr_softc {
+ TAILQ_ENTRY(rmrr_softc) link;
+ struct devlist_head devices;
+ int segment;
+ uint64_t start;
+ uint64_t end;
+};
+
+struct atsr_softc {
+ TAILQ_ENTRY(atsr_softc) link;
+ struct devlist_head devices;
+ int segment;
+ int flags;
+};
+
+struct iommu_pic {
+ struct pic pic;
+ struct iommu_softc *iommu;
+};
+
+#define IOMMU_FLAGS_CATCHALL 0x1
+#define IOMMU_FLAGS_BAD 0x2
+#define IOMMU_FLAGS_SUSPEND 0x4
+
+struct iommu_softc {
+ TAILQ_ENTRY(iommu_softc)link;
+ struct devlist_head devices;
+ int id;
+ int flags;
+ int segment;
+
+ struct mutex reg_lock;
+
+ bus_space_tag_t iot;
+ bus_space_handle_t ioh;
+
+ uint64_t cap;
+ uint64_t ecap;
+ uint32_t gcmd;
+
+ int mgaw;
+ int agaw;
+ int ndoms;
+
+ struct root_entry *root;
+ struct context_entry *ctx[256];
+
+ void *intr;
+ struct iommu_pic pic;
+ int fedata;
+ uint64_t feaddr;
+ uint64_t rtaddr;
+
+ // Queued Invalidation
+ int qi_head;
+ int qi_tail;
+ paddr_t qip;
+ struct qi_entry *qi;
+
+ struct domain *unity;
+ TAILQ_HEAD(,domain) domains;
+
+ // AMD iommu
+ struct ivhd_dte         *dte;
+ void *cmd_tbl;
+ void *evt_tbl;
+ paddr_t cmd_tblp;
+ paddr_t evt_tblp;
+ uint64_t wv[128] __aligned(4096);
+};
+
+static inline int iommu_bad(struct iommu_softc *sc)
+{
+ return (sc->flags & IOMMU_FLAGS_BAD);
+}
+
+static inline int iommu_enabled(struct iommu_softc *sc)
+{
+ if (sc->dte) {
+ return 1;
+ }
+ return (sc->gcmd & GCMD_TE);
+}
+
+struct acpidmar_softc {
+ struct device sc_dev;
+
+ pci_chipset_tag_t sc_pc;
+ bus_space_tag_t sc_memt;
+ int sc_haw;
+ int sc_flags;
+
+ TAILQ_HEAD(,iommu_softc)sc_drhds;
+ TAILQ_HEAD(,rmrr_softc) sc_rmrrs;
+ TAILQ_HEAD(,atsr_softc) sc_atsrs;
+};
+
+int acpidmar_activate(struct device *, int);
+int acpidmar_match(struct device *, void *, void *);
+void acpidmar_attach(struct device *, struct device *, void *);
+struct domain   *acpidmar_pci_attach(struct acpidmar_softc *, int, int, int);
+
+struct cfattach acpidmar_ca = {
+ sizeof(struct acpidmar_softc), acpidmar_match, acpidmar_attach,
+};
+
+struct cfdriver acpidmar_cd = {
+ NULL, "acpidmar", DV_DULL
+};
+
+struct acpidmar_softc *acpidmar_sc;
+int acpidmar_intr(void *);
+int acpiivhd_intr(void *);
+
+#define DID_UNITY 0x1
+
+void _dumppte(struct pte_entry *, int, vaddr_t);
+
+struct domain *domain_create(struct iommu_softc *, int);
+struct domain *domain_lookup(struct acpidmar_softc *, int, int);
+
+void domain_unload_map(struct domain *, bus_dmamap_t);
+void domain_load_map(struct domain *, bus_dmamap_t, int, int, const char *);
+
+void (*domain_map_page)(struct domain *, vaddr_t, paddr_t, uint64_t);
+void domain_map_page_amd(struct domain *, vaddr_t, paddr_t, uint64_t);
+void domain_map_page_intel(struct domain *, vaddr_t, paddr_t, uint64_t);
+void domain_map_pthru(struct domain *, paddr_t, paddr_t);
+
+void acpidmar_pci_hook(pci_chipset_tag_t, struct pci_attach_args *);
+void acpidmar_parse_devscope(union acpidmar_entry *, int, int,
+    struct devlist_head *);
+int  acpidmar_match_devscope(struct devlist_head *, pci_chipset_tag_t, int);
+
+void acpidmar_init(struct acpidmar_softc *, struct acpi_dmar *);
+void acpidmar_drhd(struct acpidmar_softc *, union acpidmar_entry *);
+void acpidmar_rmrr(struct acpidmar_softc *, union acpidmar_entry *);
+void acpidmar_atsr(struct acpidmar_softc *, union acpidmar_entry *);
+void acpiivrs_init(struct acpidmar_softc *, struct acpi_ivrs *);
+
+void *acpidmar_intr_establish(void *, int, int (*)(void *), void *,
+    const char *);
+
+void iommu_writel(struct iommu_softc *, int, uint32_t);
+uint32_t iommu_readl(struct iommu_softc *, int);
+void iommu_writeq(struct iommu_softc *, int, uint64_t);
+uint64_t iommu_readq(struct iommu_softc *, int);
+void iommu_showfault(struct iommu_softc *, int,
+    struct fault_entry *);
+void iommu_showcfg(struct iommu_softc *, int);
+
+int iommu_init(struct acpidmar_softc *, struct iommu_softc *,
+    struct acpidmar_drhd *);
+int iommu_enable_translation(struct iommu_softc *, int);
+void iommu_enable_qi(struct iommu_softc *, int);
+void iommu_flush_cache(struct iommu_softc *, void *, size_t);
+void *iommu_alloc_page(struct iommu_softc *, paddr_t *);
+void iommu_flush_write_buffer(struct iommu_softc *);
+void iommu_issue_qi(struct iommu_softc *, struct qi_entry *);
+
+void iommu_flush_ctx(struct iommu_softc *, int, int, int, int);
+void iommu_flush_ctx_qi(struct iommu_softc *, int, int, int, int);
+void iommu_flush_tlb(struct iommu_softc *, int, int);
+void iommu_flush_tlb_qi(struct iommu_softc *, int, int);
+
+void iommu_set_rtaddr(struct iommu_softc *, paddr_t);
+
+const char *dmar_bdf(int);
+
+const char *
+dmar_bdf(int sid)
+{
+ static char bdf[32];
+
+ snprintf(bdf, sizeof(bdf), "%.4x:%.2x:%.2x.%x", 0,
+    sid_bus(sid), sid_dev(sid), sid_fun(sid));
+
+ return (bdf);
+}
+
+/* busdma */
+static int dmar_dmamap_create(bus_dma_tag_t, bus_size_t, int, bus_size_t,
+    bus_size_t, int, bus_dmamap_t *);
+static void dmar_dmamap_destroy(bus_dma_tag_t, bus_dmamap_t);
+static int dmar_dmamap_load(bus_dma_tag_t, bus_dmamap_t, void *, bus_size_t,
+    struct proc *, int);
+static int dmar_dmamap_load_mbuf(bus_dma_tag_t, bus_dmamap_t, struct mbuf *,
+    int);
+static int dmar_dmamap_load_uio(bus_dma_tag_t, bus_dmamap_t, struct uio *, int);
+static int dmar_dmamap_load_raw(bus_dma_tag_t, bus_dmamap_t,
+    bus_dma_segment_t *, int, bus_size_t, int);
+static void dmar_dmamap_unload(bus_dma_tag_t, bus_dmamap_t);
+static void dmar_dmamap_sync(bus_dma_tag_t, bus_dmamap_t, bus_addr_t,
+    bus_size_t, int);
+static int dmar_dmamem_alloc(bus_dma_tag_t, bus_size_t, bus_size_t, bus_size_t,
+    bus_dma_segment_t *, int, int *, int);
+static void dmar_dmamem_free(bus_dma_tag_t, bus_dma_segment_t *, int);
+static int dmar_dmamem_map(bus_dma_tag_t, bus_dma_segment_t *, int, size_t,
+    caddr_t *, int);
+static void dmar_dmamem_unmap(bus_dma_tag_t, caddr_t, size_t);
+static paddr_t dmar_dmamem_mmap(bus_dma_tag_t, bus_dma_segment_t *, int, off_t,
+    int, int);
+
+static void dmar_dumpseg(bus_dma_tag_t, int, bus_dma_segment_t *, const char *);
+const char *dom_bdf(struct domain *dom);
+void domain_map_check(struct domain *dom);
+
+struct pte_entry *pte_lvl(struct iommu_softc *iommu, struct pte_entry *npte, vaddr_t va, int shift, uint64_t flags);
+int  ivhd_poll_events(struct iommu_softc *iommu);
+void ivhd_showit(struct iommu_softc *);
+void ivhd_showdte(void);
+void ivhd_showcmd(struct iommu_softc *);
+
+static inline int
+debugme(struct domain *dom)
+{
+ return 0;
+ return (dom->flag & DOM_DEBUG);
+}
+
+void
+domain_map_check(struct domain *dom)
+{
+ struct iommu_softc *iommu;
+ struct domain_dev *dd;
+ struct context_entry *ctx;
+ int v;
+
+ iommu = dom->iommu;
+ TAILQ_FOREACH(dd, &dom->devices, link) {
+ acpidmar_pci_attach(acpidmar_sc, iommu->segment, dd->sid, 1);
+
+ if (iommu->dte)
+ continue;
+
+ /* Check if this is the first time we are mapped */
+ ctx = &iommu->ctx[sid_bus(dd->sid)][sid_devfn(dd->sid)];
+ v = context_user(ctx);
+ if (v != 0xA) {
+ printf("  map: %.4x:%.2x:%.2x.%x iommu:%d did:%.4x\n",
+    iommu->segment,
+    sid_bus(dd->sid),
+    sid_dev(dd->sid),
+    sid_fun(dd->sid),
+    iommu->id,
+    dom->did);
+ context_set_user(ctx, 0xA);
+ }
+ }
+}
+
+/* Map a single page as passthrough - used for DRM */
+void
+dmar_ptmap(bus_dma_tag_t tag, bus_addr_t addr)
+{
+ struct domain *dom = tag->_cookie;
+
+ if (!acpidmar_sc)
+ return;
+ domain_map_check(dom);
+ domain_map_page(dom, addr, addr, PTE_P | PTE_R | PTE_W);
+}
+
+/* Map a range of pages 1:1 */
+void
+domain_map_pthru(struct domain *dom, paddr_t start, paddr_t end)
+{
+ domain_map_check(dom);
+ while (start < end) {
+ domain_map_page(dom, start, start, PTE_P | PTE_R | PTE_W);
+ start += VTD_PAGE_SIZE;
+ }
+}
+
+/* Map a single paddr to IOMMU paddr */
+void
+domain_map_page_intel(struct domain *dom, vaddr_t va, paddr_t pa, uint64_t flags)
+{
+ paddr_t paddr;
+ struct pte_entry *pte, *npte;
+ int lvl, idx;
+ struct iommu_softc *iommu;
+
+ iommu = dom->iommu;
+ /* Insert physical address into virtual address map
+ * XXX: could we use private pmap here?
+ * essentially doing a pmap_enter(map, va, pa, prot);
+ */
+
+ /* Only handle 4k pages for now */
+ npte = dom->pte;
+ for (lvl = iommu->agaw - VTD_STRIDE_SIZE; lvl>= VTD_LEVEL0;
+    lvl -= VTD_STRIDE_SIZE) {
+ idx = (va >> lvl) & VTD_STRIDE_MASK;
+ pte = &npte[idx];
+ if (lvl == VTD_LEVEL0) {
+ /* Level 1: Page Table - add physical address */
+ pte->val = pa | flags;
+ iommu_flush_cache(iommu, pte, sizeof(*pte));
+ break;
+ } else if (!(pte->val & PTE_P)) {
+ /* Level N: Point to lower level table */
+ iommu_alloc_page(iommu, &paddr);
+ pte->val = paddr | PTE_P | PTE_R | PTE_W;
+ iommu_flush_cache(iommu, pte, sizeof(*pte));
+ }
+ npte = (void *)PMAP_DIRECT_MAP((pte->val & VTD_PTE_MASK));
+ }
+}
+
+/* Map a single paddr to IOMMU paddr: AMD
+ * physical address breakdown into levels:
+ * xxxxxxxx.xxxxxxxx.xxxxxxxx.xxxxxxxx.xxxxxxxx.xxxxxxxx.xxxxxxxx.xxxxxxxx
+ *        5.55555555.44444444.43333333,33222222.22211111.1111----.--------
+ * mode:
+ *  000 = none   shift
+ *  001 = 1 [21].12
+ *  010 = 2 [30].21
+ *  011 = 3 [39].30
+ *  100 = 4 [48].39
+ *  101 = 5 [57]
+ *  110 = 6
+ *  111 = reserved
+ */
+struct pte_entry *pte_lvl(struct iommu_softc *iommu, struct pte_entry *pte, vaddr_t va,
+ int shift, uint64_t flags)
+{
+ paddr_t paddr;
+ int idx;
+
+ idx = (va >> shift) & VTD_STRIDE_MASK;
+ if (!(pte[idx].val & PTE_P)) {
+ /* Page Table entry is not present... create a new page entry */
+ iommu_alloc_page(iommu, &paddr);
+ pte[idx].val = paddr | flags;
+ iommu_flush_cache(iommu, &pte[idx], sizeof(pte[idx]));
+ }
+ return (void *)PMAP_DIRECT_MAP((pte[idx].val & PTE_PADDR_MASK));
+}
+
+void
+domain_map_page_amd(struct domain *dom, vaddr_t va, paddr_t pa, uint64_t flags)
+{
+ struct pte_entry *pte;
+ struct iommu_softc *iommu;
+ int idx;
+
+ iommu = dom->iommu;
+ /* Insert physical address into virtual address map
+ * XXX: could we use private pmap here?
+ * essentially doing a pmap_enter(map, va, pa, prot);
+ */
+
+ /* Always assume AMD levels=4                           */
+ /*        39        30        21        12              */
+ /* ---------|---------|---------|---------|------------ */
+ pte = dom->pte;
+ //pte = pte_lvl(iommu, pte, va, 39, PTE_NXTLVL(3) | PTE_IR | PTE_IW | PTE_P);
+ pte = pte_lvl(iommu, pte, va, 30, PTE_NXTLVL(2) | PTE_IR | PTE_IW | PTE_P);
+ pte = pte_lvl(iommu, pte, va, 21, PTE_NXTLVL(1) | PTE_IR | PTE_IW | PTE_P);
+ //pte = pte_lvl(iommu, pte, va, 12, PTE_NXTLVL(7) | PTE_IR | PTE_IW | PTE_P);
+
+ if (flags)
+ flags = PTE_P | PTE_R | PTE_W | PTE_IW | PTE_IR | PTE_NXTLVL(0);
+
+ /* Level 1: Page Table - add physical address */
+ idx = (va >> 12) & 0x1FF;
+ pte[idx].val = pa | flags;
+
+ iommu_flush_cache(iommu, pte, sizeof(*pte));
+}
+
+static void
+dmar_dumpseg(bus_dma_tag_t tag, int nseg, bus_dma_segment_t *segs,
+    const char *lbl)
+{
+ struct domain *dom = tag->_cookie;
+ int i;
+
+ return;
+ if (!debugme(dom))
+ return;
+ printf("%s: %s\n", lbl, dom_bdf(dom));
+ for (i = 0; i < nseg; i++) {
+ printf("  %.16llx %.8x\n",
+    (uint64_t)segs[i].ds_addr,
+    (uint32_t)segs[i].ds_len);
+ }
+}
+
+/* Unload mapping */
+void
+domain_unload_map(struct domain *dom, bus_dmamap_t dmam)
+{
+ bus_dma_segment_t *seg;
+ paddr_t base, end, idx;
+ psize_t alen;
+ int i;
+
+ if (iommu_bad(dom->iommu)) {
+ printf("unload map no iommu\n");
+ return;
+ }
+
+ //acpidmar_intr(dom->iommu);
+ for (i = 0; i < dmam->dm_nsegs; i++) {
+ seg  = &dmam->dm_segs[i];
+
+ base = trunc_page(seg->ds_addr);
+ end  = roundup(seg->ds_addr + seg->ds_len, VTD_PAGE_SIZE);
+ alen = end - base;
+
+ if (debugme(dom)) {
+ printf("  va:%.16llx len:%x\n",
+    (uint64_t)base, (uint32_t)alen);
+ }
+
+ /* Clear PTE */
+ for (idx = 0; idx < alen; idx += VTD_PAGE_SIZE)
+ domain_map_page(dom, base + idx, 0, 0);
+
+ if (dom->flag & DOM_NOMAP) {
+ //printf("%s: nomap %.16llx\n", dom_bdf(dom), (uint64_t)base);
+ continue;
+ }
+
+ mtx_enter(&dom->exlck);
+ if (extent_free(dom->iovamap, base, alen, EX_NOWAIT)) {
+ panic("domain_unload_map: extent_free");
+ }
+ mtx_leave(&dom->exlck);
+ }
+}
+
+/* map.segs[x].ds_addr is modified to IOMMU virtual PA */
+void
+domain_load_map(struct domain *dom, bus_dmamap_t map, int flags, int pteflag, const char *fn)
+{
+ bus_dma_segment_t *seg;
+ struct iommu_softc *iommu;
+ paddr_t base, end, idx;
+ psize_t alen;
+ u_long res;
+ int i;
+
+ iommu = dom->iommu;
+ if (!iommu_enabled(iommu)) {
+ /* Lazy enable translation when required */
+ if (iommu_enable_translation(iommu, 1)) {
+ return;
+ }
+ }
+ domain_map_check(dom);
+ //acpidmar_intr(iommu);
+ for (i = 0; i < map->dm_nsegs; i++) {
+ seg = &map->dm_segs[i];
+
+ base = trunc_page(seg->ds_addr);
+ end  = roundup(seg->ds_addr + seg->ds_len, VTD_PAGE_SIZE);
+ alen = end - base;
+ res  = base;
+
+ if (dom->flag & DOM_NOMAP) {
+ goto nomap;
+ }
+
+ /* Allocate DMA Virtual Address */
+ mtx_enter(&dom->exlck);
+ if (extent_alloc(dom->iovamap, alen, VTD_PAGE_SIZE, 0,
+    map->_dm_boundary, EX_NOWAIT, &res)) {
+ panic("domain_load_map: extent_alloc");
+ }
+ if (res == -1) {
+ panic("got -1 address\n");
+ }
+ mtx_leave(&dom->exlck);
+
+ /* Reassign DMA address */
+ seg->ds_addr = res | (seg->ds_addr & VTD_PAGE_MASK);
+nomap:
+ if (debugme(dom)) {
+ printf("  LOADMAP: %.16llx %x => %.16llx\n",
+    (uint64_t)seg->ds_addr, (uint32_t)seg->ds_len,
+    (uint64_t)res);
+ }
+ for (idx = 0; idx < alen; idx += VTD_PAGE_SIZE) {
+ domain_map_page(dom, res + idx, base + idx,
+    PTE_P | pteflag);
+ }
+ }
+ if ((iommu->cap & CAP_CM) || force_cm) {
+ iommu_flush_tlb(iommu, IOTLB_DOMAIN, dom->did);
+ } else {
+ iommu_flush_write_buffer(iommu);
+ }
+}
+
+const char *
+dom_bdf(struct domain *dom)
+{
+ struct domain_dev *dd;
+ static char mmm[48];
+
+ dd = TAILQ_FIRST(&dom->devices);
+ snprintf(mmm, sizeof(mmm), "%s iommu:%d did:%.4x%s",
+    dmar_bdf(dd->sid), dom->iommu->id, dom->did,
+    dom->did == DID_UNITY ? " [unity]" : "");
+ return (mmm);
+}
+
+/* Bus DMA Map functions */
+static int
+dmar_dmamap_create(bus_dma_tag_t tag, bus_size_t size, int nsegments,
+    bus_size_t maxsegsz, bus_size_t boundary, int flags, bus_dmamap_t *dmamp)
+{
+ int rc;
+
+ rc = _bus_dmamap_create(tag, size, nsegments, maxsegsz, boundary,
+    flags, dmamp);
+ if (!rc) {
+ dmar_dumpseg(tag, (*dmamp)->dm_nsegs, (*dmamp)->dm_segs,
+    __FUNCTION__);
+ }
+ return (rc);
+}
+
+static void
+dmar_dmamap_destroy(bus_dma_tag_t tag, bus_dmamap_t dmam)
+{
+ dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs, __FUNCTION__);
+ _bus_dmamap_destroy(tag, dmam);
+}
+
+static int
+dmar_dmamap_load(bus_dma_tag_t tag, bus_dmamap_t dmam, void *buf,
+    bus_size_t buflen, struct proc *p, int flags)
+{
+ struct domain *dom = tag->_cookie;
+ int rc;
+
+ rc = _bus_dmamap_load(tag, dmam, buf, buflen, p, flags);
+ if (!rc) {
+ dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs,
+    __FUNCTION__);
+ domain_load_map(dom, dmam, flags, PTE_R|PTE_W, __FUNCTION__);
+ dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs,
+    __FUNCTION__);
+ }
+ return (rc);
+}
+
+static int
+dmar_dmamap_load_mbuf(bus_dma_tag_t tag, bus_dmamap_t dmam, struct mbuf *chain,
+    int flags)
+{
+ struct domain *dom = tag->_cookie;
+ int rc;
+
+ rc = _bus_dmamap_load_mbuf(tag, dmam, chain, flags);
+ if (!rc) {
+ dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs,
+    __FUNCTION__);
+ domain_load_map(dom, dmam, flags, PTE_R|PTE_W,__FUNCTION__);
+ dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs,
+    __FUNCTION__);
+ }
+ return (rc);
+}
+
+static int
+dmar_dmamap_load_uio(bus_dma_tag_t tag, bus_dmamap_t dmam, struct uio *uio,
+    int flags)
+{
+ struct domain *dom = tag->_cookie;
+ int rc;
+
+ rc = _bus_dmamap_load_uio(tag, dmam, uio, flags);
+ if (!rc) {
+ dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs,
+    __FUNCTION__);
+ domain_load_map(dom, dmam, flags, PTE_R|PTE_W, __FUNCTION__);
+ dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs,
+    __FUNCTION__);
+ }
+ return (rc);
+}
+
+static int
+dmar_dmamap_load_raw(bus_dma_tag_t tag, bus_dmamap_t dmam,
+    bus_dma_segment_t *segs, int nsegs, bus_size_t size, int flags)
+{
+ struct domain *dom = tag->_cookie;
+ int rc;
+
+ rc = _bus_dmamap_load_raw(tag, dmam, segs, nsegs, size, flags);
+ if (!rc) {
+ dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs,
+    __FUNCTION__);
+ domain_load_map(dom, dmam, flags, PTE_R|PTE_W, __FUNCTION__);
+ dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs,
+    __FUNCTION__);
+ }
+ return (rc);
+}
+
+static void
+dmar_dmamap_unload(bus_dma_tag_t tag, bus_dmamap_t dmam)
+{
+ struct domain *dom = tag->_cookie;
+
+ dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs, __FUNCTION__);
+ domain_unload_map(dom, dmam);
+ _bus_dmamap_unload(tag, dmam);
+}
+
+static void
+dmar_dmamap_sync(bus_dma_tag_t tag, bus_dmamap_t dmam, bus_addr_t offset,
+    bus_size_t len, int ops)
+{
+#if 0
+ struct domain *dom = tag->_cookie;
+ //int flag;
+
+ flag = PTE_P;
+ //acpidmar_intr(dom->iommu);
+ if (ops == BUS_DMASYNC_PREREAD) {
+ /* make readable */
+ flag |= PTE_R;
+ }
+ else if (ops == BUS_DMASYNC_PREWRITE) {
+ /* make writeable */
+ flag |= PTE_W;
+ }
+ dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs, __FUNCTION__);
+#endif
+ _bus_dmamap_sync(tag, dmam, offset, len, ops);
+}
+
+static int
+dmar_dmamem_alloc(bus_dma_tag_t tag, bus_size_t size, bus_size_t alignment,
+    bus_size_t boundary, bus_dma_segment_t *segs, int nsegs, int *rsegs,
+    int flags)
+{
+ int rc;
+
+ rc = _bus_dmamem_alloc(tag, size, alignment, boundary, segs, nsegs,
+    rsegs, flags);
+ if (!rc) {
+ dmar_dumpseg(tag, *rsegs, segs, __FUNCTION__);
+ }
+ return (rc);
+}
+
+static void
+dmar_dmamem_free(bus_dma_tag_t tag, bus_dma_segment_t *segs, int nsegs)
+{
+ dmar_dumpseg(tag, nsegs, segs, __FUNCTION__);
+ _bus_dmamem_free(tag, segs, nsegs);
+}
+
+static int
+dmar_dmamem_map(bus_dma_tag_t tag, bus_dma_segment_t *segs, int nsegs,
+    size_t size, caddr_t *kvap, int flags)
+{
+ dmar_dumpseg(tag, nsegs, segs, __FUNCTION__);
+ return (_bus_dmamem_map(tag, segs, nsegs, size, kvap, flags));
+}
+
+static void
+dmar_dmamem_unmap(bus_dma_tag_t tag, caddr_t kva, size_t size)
+{
+ struct domain *dom = tag->_cookie;
+
+ if (debugme(dom)) {
+ printf("dmamap_unmap: %s\n", dom_bdf(dom));
+ }
+ _bus_dmamem_unmap(tag, kva, size);
+}
+
+static paddr_t
+dmar_dmamem_mmap(bus_dma_tag_t tag, bus_dma_segment_t *segs, int nsegs,
+    off_t off, int prot, int flags)
+{
+ dmar_dumpseg(tag, nsegs, segs, __FUNCTION__);
+ return (_bus_dmamem_mmap(tag, segs, nsegs, off, prot, flags));
+}
+
+/*===================================
+ * IOMMU code
+ *===================================*/
+
+/* Intel: Set Context Root Address */
+void
+iommu_set_rtaddr(struct iommu_softc *iommu, paddr_t paddr)
+{
+ int i, sts;
+
+ mtx_enter(&iommu->reg_lock);
+ iommu_writeq(iommu, DMAR_RTADDR_REG, paddr);
+ iommu_writel(iommu, DMAR_GCMD_REG, iommu->gcmd | GCMD_SRTP);
+ for (i = 0; i < 5; i++) {
+ sts = iommu_readl(iommu, DMAR_GSTS_REG);
+ if (sts & GSTS_RTPS)
+ break;
+ }
+ mtx_leave(&iommu->reg_lock);
+
+ if (i == 5) {
+ printf("set_rtaddr fails\n");
+ }
+}
+
+/* COMMON: Allocate a new memory page */
+void *
+iommu_alloc_page(struct iommu_softc *iommu, paddr_t *paddr)
+{
+ void *va;
+
+ *paddr = 0;
+ va = km_alloc(VTD_PAGE_SIZE, &kv_page, &kp_zero, &kd_nowait);
+ if (va == NULL) {
+ panic("can't allocate page\n");
+ }
+ pmap_extract(pmap_kernel(), (vaddr_t)va, paddr);
+ return (va);
+}
+
+
+/* Intel: Issue command via queued invalidation */
+void
+iommu_issue_qi(struct iommu_softc *iommu, struct qi_entry *qi)
+{
+#if 0
+ struct qi_entry *pi, *pw;
+
+ idx = iommu->qi_head;
+ pi = &iommu->qi[idx];
+ pw = &iommu->qi[(idx+1) % MAXQ];
+ iommu->qi_head = (idx+2) % MAXQ;
+
+ memcpy(pw, &qi, sizeof(qi));
+ issue command;
+ while (pw->xxx)
+ ;
+#endif
+}
+
+/* Intel: Flush TLB entries, Queued Invalidation mode */
+void
+iommu_flush_tlb_qi(struct iommu_softc *iommu, int mode, int did)
+{
+ struct qi_entry qi;
+
+ /* Use queued invalidation */
+ qi.hi = 0;
+ switch (mode) {
+ case IOTLB_GLOBAL:
+ qi.lo = QI_IOTLB | QI_IOTLB_IG_GLOBAL;
+ break;
+ case IOTLB_DOMAIN:
+ qi.lo = QI_IOTLB | QI_IOTLB_IG_DOMAIN |
+    QI_IOTLB_DID(did);
+ break;
+ case IOTLB_PAGE:
+ qi.lo = QI_IOTLB | QI_IOTLB_IG_PAGE | QI_IOTLB_DID(did);
+ qi.hi = 0;
+ break;
+ }
+ if (iommu->cap & CAP_DRD)
+ qi.lo |= QI_IOTLB_DR;
+ if (iommu->cap & CAP_DWD)
+ qi.lo |= QI_IOTLB_DW;
+ iommu_issue_qi(iommu, &qi);
+}
+
+/* Intel: Flush Context entries, Queued Invalidation mode */
+void
+iommu_flush_ctx_qi(struct iommu_softc *iommu, int mode, int did,
+    int sid, int fm)
+{
+ struct qi_entry qi;
+
+ /* Use queued invalidation */
+ qi.hi = 0;
+ switch (mode) {
+ case CTX_GLOBAL:
+ qi.lo = QI_CTX | QI_CTX_IG_GLOBAL;
+ break;
+ case CTX_DOMAIN:
+ qi.lo = QI_CTX | QI_CTX_IG_DOMAIN | QI_CTX_DID(did);
+ break;
+ case CTX_DEVICE:
+ qi.lo = QI_CTX | QI_CTX_IG_DEVICE | QI_CTX_DID(did) |
+    QI_CTX_SID(sid) | QI_CTX_FM(fm);
+ break;
+ }
+ iommu_issue_qi(iommu, &qi);
+}
+
+/* Intel: Flush write buffers */
+void
+iommu_flush_write_buffer(struct iommu_softc *iommu)
+{
+ int i, sts;
+
+ if (iommu->dte)
+ return;
+ if (!(iommu->cap & CAP_RWBF))
+ return;
+ printf("writebuf\n");
+ iommu_writel(iommu, DMAR_GCMD_REG, iommu->gcmd | GCMD_WBF);
+ for (i = 0; i < 5; i++) {
+ sts = iommu_readl(iommu, DMAR_GSTS_REG);
+ if (sts & GSTS_WBFS)
+ break;
+ delay(10000);
+ }
+ if (i == 5) {
+ printf("write buffer flush fails\n");
+ }
+}
+
+void
+iommu_flush_cache(struct iommu_softc *iommu, void *addr, size_t size)
+{
+ if (iommu->dte) {
+ pmap_flush_cache((vaddr_t)addr, size);
+ return;
+ }
+ if (!(iommu->ecap & ECAP_C))
+ pmap_flush_cache((vaddr_t)addr, size);
+}
+
+/*
+ * Intel: Flush IOMMU TLB Entries
+ * Flushing can occur globally, per domain or per page
+ */
+void
+iommu_flush_tlb(struct iommu_softc *iommu, int mode, int did)
+{
+ int n;
+ uint64_t val;
+
+ /* Call AMD */
+ if (iommu->dte) {
+ ivhd_invalidate_domain(iommu, did);
+ //ivhd_poll_events(iommu);
+ return;
+ }
+ val = IOTLB_IVT;
+ switch (mode) {
+ case IOTLB_GLOBAL:
+ val |= IIG_GLOBAL;
+ break;
+ case IOTLB_DOMAIN:
+ val |= IIG_DOMAIN | IOTLB_DID(did);
+ break;
+ case IOTLB_PAGE:
+ val |= IIG_PAGE | IOTLB_DID(did);
+ break;
+ }
+
+ /* Check for Read/Write Drain */
+ if (iommu->cap & CAP_DRD)
+ val |= IOTLB_DR;
+ if (iommu->cap & CAP_DWD)
+ val |= IOTLB_DW;
+
+ mtx_enter(&iommu->reg_lock);
+
+ iommu_writeq(iommu, DMAR_IOTLB_REG(iommu), val);
+ n = 0;
+ do {
+ val = iommu_readq(iommu, DMAR_IOTLB_REG(iommu));
+ } while (n++ < 5 && val & IOTLB_IVT);
+
+ mtx_leave(&iommu->reg_lock);
+
+#ifdef DEBUG
+ {
+ static int rg;
+ int a, r;
+
+ if (!rg) {
+ a = (val >> IOTLB_IAIG_SHIFT) & IOTLB_IAIG_MASK;
+ r = (val >> IOTLB_IIRG_SHIFT) & IOTLB_IIRG_MASK;
+ if (a != r) {
+ printf("TLB Requested:%d Actual:%d\n", r, a);
+ rg = 1;
+ }
+ }
+ }
+#endif
+}
+
+/* Intel: Flush IOMMU settings
+ * Flushes can occur globally, per domain, or per device
+ */
+void
+iommu_flush_ctx(struct iommu_softc *iommu, int mode, int did, int sid, int fm)
+{
+ uint64_t val;
+ int n;
+
+ if (iommu->dte)
+ return;
+ val = CCMD_ICC;
+ switch (mode) {
+ case CTX_GLOBAL:
+ val |= CIG_GLOBAL;
+ break;
+ case CTX_DOMAIN:
+ val |= CIG_DOMAIN | CCMD_DID(did);
+ break;
+ case CTX_DEVICE:
+ val |= CIG_DEVICE | CCMD_DID(did) |
+    CCMD_SID(sid) | CCMD_FM(fm);
+ break;
+ }
+
+ mtx_enter(&iommu->reg_lock);
+
+ n = 0;
+ iommu_writeq(iommu, DMAR_CCMD_REG, val);
+ do {
+ val = iommu_readq(iommu, DMAR_CCMD_REG);
+ } while (n++ < 5 && val & CCMD_ICC);
+
+ mtx_leave(&iommu->reg_lock);
+
+#ifdef DEBUG
+ {
+ static int rg;
+ int a, r;
+
+ if (!rg) {
+ a = (val >> CCMD_CAIG_SHIFT) & CCMD_CAIG_MASK;
+ r = (val >> CCMD_CIRG_SHIFT) & CCMD_CIRG_MASK;
+ if (a != r) {
+ printf("CTX Requested:%d Actual:%d\n", r, a);
+ rg = 1;
+ }
+ }
+ }
+#endif
+}
+
+/* Intel: Enable Queued Invalidation */
+void
+iommu_enable_qi(struct iommu_softc *iommu, int enable)
+{
+ int n = 0;
+ int sts;
+
+ if (!(iommu->ecap & ECAP_QI))
+ return;
+
+ if (enable) {
+ iommu->gcmd |= GCMD_QIE;
+
+ mtx_enter(&iommu->reg_lock);
+
+ iommu_writel(iommu, DMAR_GCMD_REG, iommu->gcmd);
+ do {
+ sts = iommu_readl(iommu, DMAR_GSTS_REG);
+ } while (n++ < 5 && !(sts & GSTS_QIES));
+
+ mtx_leave(&iommu->reg_lock);
+
+ printf("set.qie: %d\n", n);
+ } else {
+ iommu->gcmd &= ~GCMD_QIE;
+
+ mtx_enter(&iommu->reg_lock);
+
+ iommu_writel(iommu, DMAR_GCMD_REG, iommu->gcmd);
+ do {
+ sts = iommu_readl(iommu, DMAR_GSTS_REG);
+ } while (n++ < 5 && sts & GSTS_QIES);
+
+ mtx_leave(&iommu->reg_lock);
+
+ printf("clr.qie: %d\n", n);
+ }
+}
+
+/* Intel: Enable IOMMU translation */
+int
+iommu_enable_translation(struct iommu_softc *iommu, int enable)
+{
+ uint32_t sts;
+ uint64_t reg;
+ int n = 0;
+
+ if (iommu->dte)
+ return (0);
+ reg = 0;
+ if (enable) {
+ printf("enable iommu %d\n", iommu->id);
+ iommu_showcfg(iommu, -1);
+
+ iommu->gcmd |= GCMD_TE;
+
+ /* Enable translation */
+ printf(" pre tes: ");
+
+ mtx_enter(&iommu->reg_lock);
+ iommu_writel(iommu, DMAR_GCMD_REG, iommu->gcmd);
+ printf("xxx");
+ do {
+ printf("yyy");
+ sts = iommu_readl(iommu, DMAR_GSTS_REG);
+ delay(n * 10000);
+ } while (n++ < 5 && !(sts & GSTS_TES));
+ mtx_leave(&iommu->reg_lock);
+
+ printf(" set.tes: %d\n", n);
+
+ if (n >= 5) {
+ printf("error.. unable to initialize iommu %d\n",
+    iommu->id);
+ iommu->flags |= IOMMU_FLAGS_BAD;
+
+ /* Disable IOMMU */
+ iommu->gcmd &= ~GCMD_TE;
+ mtx_enter(&iommu->reg_lock);
+ iommu_writel(iommu, DMAR_GCMD_REG, iommu->gcmd);
+ mtx_leave(&iommu->reg_lock);
+
+ return (1);
+ }
+
+ iommu_flush_ctx(iommu, CTX_GLOBAL, 0, 0, 0);
+ iommu_flush_tlb(iommu, IOTLB_GLOBAL, 0);
+ } else {
+ iommu->gcmd &= ~GCMD_TE;
+
+ mtx_enter(&iommu->reg_lock);
+
+ iommu_writel(iommu, DMAR_GCMD_REG, iommu->gcmd);
+ do {
+ sts = iommu_readl(iommu, DMAR_GSTS_REG);
+ } while (n++ < 5 && sts & GSTS_TES);
+ mtx_leave(&iommu->reg_lock);
+
+ printf(" clr.tes: %d\n", n);
+ }
+
+ return (0);
+}
+
+/* Intel: Initialize IOMMU */
+int
+iommu_init(struct acpidmar_softc *sc, struct iommu_softc *iommu,
+    struct acpidmar_drhd *dh)
+{
+ static int niommu;
+ int len = VTD_PAGE_SIZE;
+ int i, gaw;
+ uint32_t sts;
+ paddr_t paddr;
+
+ if (_bus_space_map(sc->sc_memt, dh->address, len, 0, &iommu->ioh) != 0) {
+ return (-1);
+ }
+
+ TAILQ_INIT(&iommu->domains);
+ iommu->id = ++niommu;
+ iommu->flags = dh->flags;
+ iommu->segment = dh->segment;
+ iommu->iot = sc->sc_memt;
+
+ iommu->cap = iommu_readq(iommu, DMAR_CAP_REG);
+ iommu->ecap = iommu_readq(iommu, DMAR_ECAP_REG);
+ iommu->ndoms = cap_nd(iommu->cap);
+
+ printf("  caps: %s%s%s%s%s%s%s%s%s%s%s\n",
+    iommu->cap & CAP_AFL ? "afl " : "", // adv fault
+    iommu->cap & CAP_RWBF ? "rwbf " : "", // write-buffer flush
+    iommu->cap & CAP_PLMR ? "plmr " : "", // protected lo region
+    iommu->cap & CAP_PHMR ? "phmr " : "", // protected hi region
+    iommu->cap & CAP_CM ? "cm " : "", // caching mode
+    iommu->cap & CAP_ZLR ? "zlr " : "", // zero-length read
+    iommu->cap & CAP_PSI ? "psi " : "", // page invalidate
+    iommu->cap & CAP_DWD ? "dwd " : "", // write drain
+    iommu->cap & CAP_DRD ? "drd " : "", // read drain
+    iommu->cap & CAP_FL1GP ? "Gb " : "", // 1Gb pages
+    iommu->cap & CAP_PI ? "pi " : ""); // posted interrupts
+ printf("  ecap: %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
+    iommu->ecap & ECAP_C ? "c " : "", // coherent
+    iommu->ecap & ECAP_QI ? "qi " : "", // queued invalidate
+    iommu->ecap & ECAP_DT ? "dt " : "", // device iotlb
+    iommu->ecap & ECAP_IR ? "ir " : "", // intr remap
+    iommu->ecap & ECAP_EIM ? "eim " : "", // x2apic
+    iommu->ecap & ECAP_PT ? "pt " : "", // passthrough
+    iommu->ecap & ECAP_SC ? "sc " : "", // snoop control
+    iommu->ecap & ECAP_ECS ? "ecs " : "", // extended context
+    iommu->ecap & ECAP_MTS ? "mts " : "", // memory type
+    iommu->ecap & ECAP_NEST ? "nest " : "", // nested translations
+    iommu->ecap & ECAP_DIS ? "dis " : "", // deferred invalidation
+    iommu->ecap & ECAP_PASID ? "pas " : "", // pasid
+    iommu->ecap & ECAP_PRS ? "prs " : "", // page request
+    iommu->ecap & ECAP_ERS ? "ers " : "", // execute request
+    iommu->ecap & ECAP_SRS ? "srs " : "", // supervisor request
+    iommu->ecap & ECAP_NWFS ? "nwfs " : "", // no write flag
+    iommu->ecap & ECAP_EAFS ? "eafs " : ""); // extended accessed flag
+
+ mtx_init(&iommu->reg_lock, IPL_HIGH);
+
+ /* Clear Interrupt Masking */
+ iommu_writel(iommu, DMAR_FSTS_REG, FSTS_PFO | FSTS_PPF);
+
+ iommu->intr = acpidmar_intr_establish(iommu, IPL_HIGH,
+    acpidmar_intr, iommu, "dmarintr");
+
+ /* Enable interrupts */
+ sts = iommu_readl(iommu, DMAR_FECTL_REG);
+ iommu_writel(iommu, DMAR_FECTL_REG, sts & ~FECTL_IM);
+
+ /* Allocate root pointer */
+ iommu->root = iommu_alloc_page(iommu, &paddr);
+#ifdef DEBUG
+ printf("Allocated root pointer: pa:%.16llx va:%p\n",
+    (uint64_t)paddr, iommu->root);
+#endif
+ iommu->rtaddr = paddr;
+ iommu_flush_write_buffer(iommu);
+ iommu_set_rtaddr(iommu, paddr);
+
+#if 0
+ if (iommu->ecap & ECAP_QI) {
+ /* Queued Invalidation support */
+ iommu->qi = iommu_alloc_page(iommu, &iommu->qip);
+ iommu_writeq(iommu, DMAR_IQT_REG, 0);
+ iommu_writeq(iommu, DMAR_IQA_REG, iommu->qip | IQA_QS_256);
+ }
+ if (iommu->ecap & ECAP_IR) {
+ /* Interrupt remapping support */
+ iommu_writeq(iommu, DMAR_IRTA_REG, 0);
+ }
+#endif
+
+ /* Calculate guest address width and supported guest widths */
+ gaw = -1;
+ iommu->mgaw = cap_mgaw(iommu->cap);
+ printf("gaw: %d { ", iommu->mgaw);
+ for (i = 0; i < 5; i++) {
+ if (cap_sagaw(iommu->cap) & (1L << i)) {
+ gaw = VTD_LEVELTOAW(i);
+ printf("%d ", gaw);
+ iommu->agaw = gaw;
+ }
+ }
+ printf("}\n");
+
+ /* Cache current status register bits */
+ sts = iommu_readl(iommu, DMAR_GSTS_REG);
+ if (sts & GSTS_TES)
+ iommu->gcmd |= GCMD_TE;
+ if (sts & GSTS_QIES)
+ iommu->gcmd |= GCMD_QIE;
+ if (sts & GSTS_IRES)
+ iommu->gcmd |= GCMD_IRE;
+ if (iommu->gcmd) {
+ printf("gcmd: %x preset\n", iommu->gcmd);
+ }
+ acpidmar_intr(iommu);
+ return (0);
+}
+
+const char *dmar_rn(int reg);
+
+const char *
+dmar_rn(int reg)
+{
+ switch (reg) {
+ case EVT_HEAD_REG: return "evthead";
+ case EVT_TAIL_REG: return "evttail";
+ case CMD_HEAD_REG: return "cmdhead";
+ case CMD_TAIL_REG: return "cmdtail";
+ case CMD_BASE_REG: return "cmdbase";
+ case EVT_BASE_REG: return "evtbase";
+ case DEV_TAB_BASE_REG: return "devtblbase";
+ case IOMMUCTL_REG: return "iommuctl";
+#if 0
+ case DMAR_VER_REG: return "ver";
+ case DMAR_CAP_REG: return "cap";
+ case DMAR_ECAP_REG: return "ecap";
+ case DMAR_GSTS_REG: return "gsts";
+ case DMAR_GCMD_REG: return "gcmd";
+ case DMAR_FSTS_REG: return "fsts";
+ case DMAR_FECTL_REG: return "fectl";
+ case DMAR_RTADDR_REG: return "rtaddr";
+ case DMAR_FEDATA_REG: return "fedata";
+ case DMAR_FEADDR_REG: return "feaddr";
+ case DMAR_FEUADDR_REG: return "feuaddr";
+ case DMAR_PMEN_REG: return "pmen";
+ case DMAR_IEDATA_REG: return "iedata";
+ case DMAR_IEADDR_REG: return "ieaddr";
+ case DMAR_IEUADDR_REG: return "ieuaddr";
+ case DMAR_IRTA_REG: return "irta";
+ case DMAR_CCMD_REG: return "ccmd";
+ case DMAR_IQH_REG: return "iqh";
+ case DMAR_IQT_REG: return "iqt";
+ case DMAR_IQA_REG: return "iqa";
+#endif
+ }
+ return "unknown";
+}
+
+/* Read/Write IOMMU register */
+uint32_t
+iommu_readl(struct iommu_softc *iommu, int reg)
+{
+ uint32_t v;
+
+ v = bus_space_read_4(iommu->iot, iommu->ioh, reg);
+ if (reg < 00) {
+ printf("iommu%d: read %x %.8lx [%s]\n",
+    iommu->id, reg, (unsigned long)v, dmar_rn(reg));
+ }
+
+ return (v);
+}
+
+
+#define dbprintf(x...)
+
+void
+iommu_writel(struct iommu_softc *iommu, int reg, uint32_t v)
+{
+ dbprintf("iommu%d: write %.8x %.16lx [%s]\n",
+    iommu->id, reg, (unsigned long)v, dmar_rn(reg));
+ bus_space_write_4(iommu->iot, iommu->ioh, reg, (uint32_t)v);
+}
+
+uint64_t
+iommu_readq(struct iommu_softc *iommu, int reg)
+{
+ uint64_t v;
+
+ v = bus_space_read_8(iommu->iot, iommu->ioh, reg);
+ if (reg < 00) {
+ printf("iommu%d: read %x %.8lx [%s]\n",
+    iommu->id, reg, (unsigned long)v, dmar_rn(reg));
+ }
+
+ return (v);
+}
+
+void
+iommu_writeq(struct iommu_softc *iommu, int reg, uint64_t v)
+{
+ dbprintf("iommu%d: write %.8x %.16lx [%s]\n",
+    iommu->id, reg, (unsigned long)v, dmar_rn(reg));
+ bus_space_write_8(iommu->iot, iommu->ioh, reg, v);
+}
+
+/* Check if a device is within a device scope */
+int
+acpidmar_match_devscope(struct devlist_head *devlist, pci_chipset_tag_t pc,
+    int sid)
+{
+ struct dmar_devlist *ds;
+ int sub, sec, i;
+ int bus, dev, fun, sbus;
+ pcireg_t reg;
+ pcitag_t tag;
+
+ sbus = sid_bus(sid);
+ TAILQ_FOREACH(ds, devlist, link) {
+ bus = ds->bus;
+ dev = ds->dp[0].device;
+ fun = ds->dp[0].function;
+ /* Walk PCI bridges in path */
+ for (i = 1; i < ds->ndp; i++) {
+ tag = pci_make_tag(pc, bus, dev, fun);
+ reg = pci_conf_read(pc, tag, PPB_REG_BUSINFO);
+ bus = PPB_BUSINFO_SECONDARY(reg);
+ dev = ds->dp[i].device;
+ fun = ds->dp[i].function;
+ }
+
+ /* Check for device exact match */
+ if (sid == mksid(bus, dev, fun)) {
+ return DMAR_ENDPOINT;
+ }
+
+ /* Check for device subtree match */
+ if (ds->type == DMAR_BRIDGE) {
+ tag = pci_make_tag(pc, bus, dev, fun);
+ reg = pci_conf_read(pc, tag, PPB_REG_BUSINFO);
+ sec = PPB_BUSINFO_SECONDARY(reg);
+ sub = PPB_BUSINFO_SUBORDINATE(reg);
+ if (sec <= sbus && sbus <= sub) {
+ return DMAR_BRIDGE;
+ }
+ }
+ }
+
+ return (0);
+}
+
+struct domain *
+domain_create(struct iommu_softc *iommu, int did)
+{
+ struct domain *dom;
+ int gaw;
+
+ printf("iommu%d: create domain: %.4x\n", iommu->id, did);
+ dom = malloc(sizeof(*dom), M_DEVBUF, M_ZERO | M_WAITOK);
+ dom->did = did;
+ dom->iommu = iommu;
+ dom->pte = iommu_alloc_page(iommu, &dom->ptep);
+ TAILQ_INIT(&dom->devices);
+
+ /* Setup DMA */
+ dom->dmat._cookie = dom;
+ dom->dmat._dmamap_create    = dmar_dmamap_create; // nop
+ dom->dmat._dmamap_destroy   = dmar_dmamap_destroy; // nop
+ dom->dmat._dmamap_load      = dmar_dmamap_load; // lm
+ dom->dmat._dmamap_load_mbuf = dmar_dmamap_load_mbuf; // lm
+ dom->dmat._dmamap_load_uio  = dmar_dmamap_load_uio; // lm
+ dom->dmat._dmamap_load_raw  = dmar_dmamap_load_raw; // lm
+ dom->dmat._dmamap_unload    = dmar_dmamap_unload; // um
+ dom->dmat._dmamap_sync      = dmar_dmamap_sync; // lm
+ dom->dmat._dmamem_alloc     = dmar_dmamem_alloc; // nop
+ dom->dmat._dmamem_free      = dmar_dmamem_free; // nop
+ dom->dmat._dmamem_map       = dmar_dmamem_map; // nop
+ dom->dmat._dmamem_unmap     = dmar_dmamem_unmap; // nop
+ dom->dmat._dmamem_mmap      = dmar_dmamem_mmap;
+
+ snprintf(dom->exname, sizeof(dom->exname), "did:%x.%.4x",
+    iommu->id, dom->did);
+
+ /* Setup IOMMU address map */
+ gaw = min(iommu->agaw, iommu->mgaw);
+ dom->iovamap = extent_create(dom->exname, 1024*1024*16,
+    (1LL << gaw)-1,
+    M_DEVBUF, NULL, 0,
+    EX_WAITOK|EX_NOCOALESCE);
+
+ /* Zero out Interrupt region */
+ extent_alloc_region(dom->iovamap, 0xFEE00000L, 0x100000,
+    EX_WAITOK);
+ mtx_init(&dom->exlck, IPL_HIGH);
+
+ TAILQ_INSERT_TAIL(&iommu->domains, dom, link);
+
+ return dom;
+}
+
+void domain_add_device(struct domain *dom, int sid)
+{
+ struct domain_dev *ddev;
+
+ printf("add %s to iommu%d.%.4x\n", dmar_bdf(sid), dom->iommu->id, dom->did);
+ ddev = malloc(sizeof(*ddev), M_DEVBUF, M_ZERO | M_WAITOK);
+ ddev->sid = sid;
+ TAILQ_INSERT_TAIL(&dom->devices, ddev, link);
+
+ /* Should set context entry here?? */
+}
+
+void domain_remove_device(struct domain *dom, int sid)
+{
+ struct domain_dev *ddev, *tmp;
+
+ TAILQ_FOREACH_SAFE(ddev, &dom->devices, link, tmp) {
+ if (ddev->sid == sid) {
+ TAILQ_REMOVE(&dom->devices, ddev, link);
+ free(ddev, sizeof(*ddev), M_DEVBUF);
+ }
+ }
+}
+
+/* Lookup domain by segment & source id (bus.device.function) */
+struct domain *
+domain_lookup(struct acpidmar_softc *sc, int segment, int sid)
+{
+ struct iommu_softc *iommu;
+ struct domain_dev *ddev;
+ struct domain *dom;
+ int rc;
+
+ if (sc == NULL) {
+ return NULL;
+ }
+
+ /* Lookup IOMMU for this device */
+ TAILQ_FOREACH(iommu, &sc->sc_drhds, link) {
+ if (iommu->segment != segment)
+ continue;
+ /* Check for devscope match or catchall iommu */
+ rc = acpidmar_match_devscope(&iommu->devices, sc->sc_pc, sid);
+ if (rc != 0 || iommu->flags) {
+ break;
+ }
+ }
+ if (!iommu) {
+ printf("%s: no iommu found\n", dmar_bdf(sid));
+ return NULL;
+ }
+
+ //acpidmar_intr(iommu);
+
+ /* Search domain devices */
+ TAILQ_FOREACH(dom, &iommu->domains, link) {
+ TAILQ_FOREACH(ddev, &dom->devices, link) {
+ /* XXX: match all functions? */
+ if (ddev->sid == sid) {
+ return dom;
+ }
+ }
+ }
+ if (iommu->ndoms <= 2) {
+ /* Running out of domains.. create catchall domain */
+ if (!iommu->unity) {
+ iommu->unity = domain_create(iommu, 1);
+ }
+ dom = iommu->unity;
+ } else {
+ dom = domain_create(iommu, --iommu->ndoms);
+ }
+ if (!dom) {
+ printf("no domain here\n");
+ return NULL;
+ }
+
+ /* Add device to domain */
+ domain_add_device(dom, sid);
+
+ return dom;
+}
+
+/* Map Guest Pages into IOMMU */
+void  _iommu_map(void *dom, vaddr_t va, bus_addr_t gpa, bus_size_t len)
+{
+ bus_size_t i;
+ paddr_t hpa;
+
+ if (dom == NULL) {
+ return;
+ }
+ printf("Mapping dma: %lx = %lx/%lx\n", va, gpa, len);
+ for (i = 0; i < len; i += PAGE_SIZE) {
+ hpa = 0;
+ pmap_extract(curproc->p_vmspace->vm_map.pmap, va, &hpa);
+ domain_map_page(dom, gpa, hpa, PTE_P | PTE_R | PTE_W);
+ gpa += PAGE_SIZE;
+ va  += PAGE_SIZE;
+ }
+}
+
+/* Find IOMMU for a given PCI device */
+void *_iommu_domain(int segment, int bus, int dev, int func, int *id)
+{
+ struct domain *dom;
+
+ dom = domain_lookup(acpidmar_sc, segment, mksid(bus, dev, func));
+ if (dom) {
+ *id = dom->did;
+ }
+ return dom;
+}
+
+void domain_map_device(struct domain *dom, int sid);
+
+void
+domain_map_device(struct domain *dom, int sid)
+{
+ struct iommu_softc *iommu;
+ struct context_entry *ctx;
+ paddr_t paddr;
+ int bus, devfn;
+ int tt, lvl;
+
+ iommu = dom->iommu;
+
+ bus = sid_bus(sid);
+ devfn = sid_devfn(sid);
+ /* AMD attach device */
+ if (iommu->dte) {
+ struct ivhd_dte *dte = &iommu->dte[sid];
+ if (!dte->dw0) {
+ /* Setup Device Table Entry: bus.devfn */
+ printf("@@@ PCI Attach: %.4x[%s] %.4x\n", sid, dmar_bdf(sid), dom->did);
+ dte_set_host_page_table_root_ptr(dte, dom->ptep);
+ dte_set_domain(dte, dom->did);
+ dte_set_mode(dte, 3);  // Set 4 level PTE
+ dte_set_tv(dte);
+ dte_set_valid(dte);
+ ivhd_flush_devtab(iommu, dom->did);
+ //ivhd_showit(iommu);
+ ivhd_showdte();
+ }
+ //ivhd_poll_events(iommu);
+ return;
+ }
+
+ /* Create Bus mapping */
+ if (!root_entry_is_valid(&iommu->root[bus])) {
+ iommu->ctx[bus] = iommu_alloc_page(iommu, &paddr);
+ iommu->root[bus].lo = paddr | ROOT_P;
+ iommu_flush_cache(iommu, &iommu->root[bus],
+    sizeof(struct root_entry));
+ dprintf("iommu%d: Allocate context for bus: %.2x pa:%.16llx va:%p\n",
+    iommu->id, bus, (uint64_t)paddr,
+    iommu->ctx[bus]);
+ }
+
+ /* Create DevFn mapping */
+ ctx = iommu->ctx[bus] + devfn;
+ if (!context_entry_is_valid(ctx)) {
+ tt = CTX_T_MULTI;
+ lvl = VTD_AWTOLEVEL(iommu->agaw);
+
+ /* Initialize context */
+ context_set_slpte(ctx, dom->ptep);
+ context_set_translation_type(ctx, tt);
+ context_set_domain_id(ctx, dom->did);
+ context_set_address_width(ctx, lvl);
+ context_set_present(ctx);
+
+ /* Flush it */
+ iommu_flush_cache(iommu, ctx, sizeof(struct context_entry));
+ if ((iommu->cap & CAP_CM) || force_cm) {
+ iommu_flush_ctx(iommu, CTX_DEVICE, dom->did, sid, 0);
+ iommu_flush_tlb(iommu, IOTLB_GLOBAL, 0);
+ } else {
+ iommu_flush_write_buffer(iommu);
+ }
+ dprintf("iommu%d: %s set context ptep:%.16llx lvl:%d did:%.4x tt:%d\n",
+    iommu->id, dmar_bdf(sid), (uint64_t)dom->ptep, lvl,
+    dom->did, tt);
+ }
+}
+
+struct domain *
+acpidmar_pci_attach(struct acpidmar_softc *sc, int segment, int sid, int mapctx)
+{
+ static struct domain *dom;
+
+ dom = domain_lookup(sc, segment, sid);
+ if (!dom) {
+ printf("no domain: %s\n", dmar_bdf(sid));
+ return NULL;
+ }
+
+ if (mapctx) {
+ domain_map_device(dom, sid);
+ }
+
+ return dom;
+}
+
+int ismap(int bus, int dev, int fun) {
+ return 1;
+}
+
+void
+acpidmar_pci_hook(pci_chipset_tag_t pc, struct pci_attach_args *pa)
+{
+ int bus, dev, fun, sid;
+ struct domain *dom;
+ pcireg_t reg;
+
+ if (!acpidmar_sc) {
+ /* No DMAR, ignore */
+ return;
+ }
+
+ /* Add device to our list */
+ pci_decompose_tag(pc, pa->pa_tag, &bus, &dev, &fun);
+ sid = mksid(bus, dev, fun);
+ if (sid_flag[sid] & SID_INVALID)
+ return;
+
+ reg = pci_conf_read(pc, pa->pa_tag, PCI_CLASS_REG);
+#if 0
+ if (PCI_CLASS(reg) == PCI_CLASS_DISPLAY &&
+    PCI_SUBCLASS(reg) == PCI_SUBCLASS_DISPLAY_VGA) {
+ printf("dmar: %.4x:%.2x:%.2x.%x is VGA, ignoring\n",
+    pa->pa_domain, bus, dev, fun);
+ return;
+ }
+#endif
+ /* Add device to domain */
+ dom = acpidmar_pci_attach(acpidmar_sc, pa->pa_domain, sid, 0);
+ if (dom == NULL)
+ return;
+
+ if (PCI_CLASS(reg) == PCI_CLASS_DISPLAY &&
+    PCI_SUBCLASS(reg) == PCI_SUBCLASS_DISPLAY_VGA) {
+ dom->flag = DOM_NOMAP;
+ }
+ if (PCI_CLASS(reg) == PCI_CLASS_BRIDGE &&
+    PCI_SUBCLASS(reg) == PCI_SUBCLASS_BRIDGE_ISA) {
+ /* For ISA Bridges, map 0-16Mb as 1:1 */
+ printf("dmar: %.4x:%.2x:%.2x.%x mapping ISA\n",
+    pa->pa_domain, bus, dev, fun);
+ domain_map_pthru(dom, 0x00, 16*1024*1024);
+ }
+
+ /* Change DMA tag */
+ pa->pa_dmat = &dom->dmat;
+}
+
+/* Create list of device scope entries from ACPI table */
+void
+acpidmar_parse_devscope(union acpidmar_entry *de, int off, int segment,
+    struct devlist_head *devlist)
+{
+ struct acpidmar_devscope *ds;
+ struct dmar_devlist *d;
+ int dplen, i;
+
+ TAILQ_INIT(devlist);
+ while (off < de->length) {
+ ds = (struct acpidmar_devscope *)((unsigned char *)de + off);
+ off += ds->length;
+
+ /* We only care about bridges and endpoints */
+ if (ds->type != DMAR_ENDPOINT && ds->type != DMAR_BRIDGE)
+ continue;
+
+ dplen = ds->length - sizeof(*ds);
+ d = malloc(sizeof(*d) + dplen, M_DEVBUF, M_ZERO | M_WAITOK);
+ d->bus  = ds->bus;
+ d->type = ds->type;
+ d->ndp  = dplen / 2;
+ d->dp   = (void *)&d[1];
+ memcpy(d->dp, &ds[1], dplen);
+ TAILQ_INSERT_TAIL(devlist, d, link);
+
+ printf("  %8s  %.4x:%.2x.%.2x.%x {",
+    ds->type == DMAR_BRIDGE ? "bridge" : "endpoint",
+    segment, ds->bus,
+    d->dp[0].device,
+    d->dp[0].function);
+
+ for (i = 1; i < d->ndp; i++) {
+ printf(" %2x.%x ",
+    d->dp[i].device,
+    d->dp[i].function);
+ }
+ printf("}\n");
+ }
+}
+
+/* DMA Remapping Hardware Unit */
+void
+acpidmar_drhd(struct acpidmar_softc *sc, union acpidmar_entry *de)
+{
+ struct iommu_softc *iommu;
+
+ printf("DRHD: segment:%.4x base:%.16llx flags:%.2x\n",
+    de->drhd.segment,
+    de->drhd.address,
+    de->drhd.flags);
+ iommu = malloc(sizeof(*iommu), M_DEVBUF, M_ZERO | M_WAITOK);
+ acpidmar_parse_devscope(de, sizeof(de->drhd), de->drhd.segment,
+    &iommu->devices);
+ iommu_init(sc, iommu, &de->drhd);
+
+ if (de->drhd.flags) {
+ /* Catchall IOMMU goes at end of list */
+ TAILQ_INSERT_TAIL(&sc->sc_drhds, iommu, link);
+ } else {
+ TAILQ_INSERT_HEAD(&sc->sc_drhds, iommu, link);
+ }
+}
+
+/* Reserved Memory Region Reporting */
+void
+acpidmar_rmrr(struct acpidmar_softc *sc, union acpidmar_entry *de)
+{
+ struct rmrr_softc *rmrr;
+ bios_memmap_t *im, *jm;
+ uint64_t start, end;
+
+ printf("RMRR: segment:%.4x range:%.16llx-%.16llx\n",
+    de->rmrr.segment, de->rmrr.base, de->rmrr.limit);
+ if (de->rmrr.limit <= de->rmrr.base) {
+ printf("  buggy BIOS\n");
+ return;
+ }
+
+ rmrr = malloc(sizeof(*rmrr), M_DEVBUF, M_ZERO | M_WAITOK);
+ rmrr->start = trunc_page(de->rmrr.base);
+ rmrr->end = round_page(de->rmrr.limit);
+ rmrr->segment = de->rmrr.segment;
+ acpidmar_parse_devscope(de, sizeof(de->rmrr), de->rmrr.segment,
+    &rmrr->devices);
+
+ for (im = bios_memmap; im->type != BIOS_MAP_END; im++) {
+ if (im->type != BIOS_MAP_RES)
+ continue;
+ /* Search for adjacent reserved regions */
+ start = im->addr;
+ end   = im->addr+im->size;
+ for (jm = im+1; jm->type == BIOS_MAP_RES && end == jm->addr;
+    jm++) {
+ end = jm->addr+jm->size;
+ }
+ printf("e820: %.16llx - %.16llx\n", start, end);
+ if (start <= rmrr->start && rmrr->end <= end) {
+ /* Bah.. some buggy BIOS stomp outside RMRR */
+ printf("  ** inside E820 Reserved %.16llx %.16llx\n",
+    start, end);
+ rmrr->start = trunc_page(start);
+ rmrr->end   = round_page(end);
+ break;
+ }
+ }
+ TAILQ_INSERT_TAIL(&sc->sc_rmrrs, rmrr, link);
+}
+
+/* Root Port ATS Reporting */
+void
+acpidmar_atsr(struct acpidmar_softc *sc, union acpidmar_entry *de)
+{
+ struct atsr_softc *atsr;
+
+ printf("ATSR: segment:%.4x flags:%x\n",
+    de->atsr.segment,
+    de->atsr.flags);
+
+ atsr = malloc(sizeof(*atsr), M_DEVBUF, M_ZERO | M_WAITOK);
+ atsr->flags = de->atsr.flags;
+ atsr->segment = de->atsr.segment;
+ acpidmar_parse_devscope(de, sizeof(de->atsr), de->atsr.segment,
+    &atsr->devices);
+
+ TAILQ_INSERT_TAIL(&sc->sc_atsrs, atsr, link);
+}
+
+void
+acpidmar_init(struct acpidmar_softc *sc, struct acpi_dmar *dmar)
+{
+ struct rmrr_softc *rmrr;
+ struct iommu_softc *iommu;
+ struct domain *dom;
+ struct dmar_devlist *dl;
+ union acpidmar_entry *de;
+ int off, sid, rc;
+
+ domain_map_page = domain_map_page_intel;
+ printf(": hardware width: %d, intr_remap:%d x2apic_opt_out:%d\n",
+    dmar->haw+1,
+    !!(dmar->flags & 0x1),
+    !!(dmar->flags & 0x2));
+ sc->sc_haw = dmar->haw+1;
+ sc->sc_flags = dmar->flags;
+
+ TAILQ_INIT(&sc->sc_drhds);
+ TAILQ_INIT(&sc->sc_rmrrs);
+ TAILQ_INIT(&sc->sc_atsrs);
+
+ off = sizeof(*dmar);
+ while (off < dmar->hdr.length) {
+ de = (union acpidmar_entry *)((unsigned char *)dmar + off);
+ switch (de->type) {
+ case DMAR_DRHD:
+ acpidmar_drhd(sc, de);
+ break;
+ case DMAR_RMRR:
+ acpidmar_rmrr(sc, de);
+ break;
+ case DMAR_ATSR:
+ acpidmar_atsr(sc, de);
+ break;
+ default:
+ printf("DMAR: unknown %x\n", de->type);
+ break;
+ }
+ off += de->length;
+ }
+
+ /* Pre-create domains for iommu devices */
+ TAILQ_FOREACH(iommu, &sc->sc_drhds, link) {
+ TAILQ_FOREACH(dl, &iommu->devices, link) {
+ sid = mksid(dl->bus, dl->dp[0].device,
+    dl->dp[0].function);
+ dom = acpidmar_pci_attach(sc, iommu->segment, sid, 0);
+ if (dom != NULL) {
+ printf("%.4x:%.2x:%.2x.%x iommu:%d did:%.4x\n",
+    iommu->segment, dl->bus, dl->dp[0].device, dl->dp[0].function,
+    iommu->id, dom->did);
+ }
+ }
+ }
+ /* Map passthrough pages for RMRR */
+ TAILQ_FOREACH(rmrr, &sc->sc_rmrrs, link) {
+ TAILQ_FOREACH(dl, &rmrr->devices, link) {
+ sid = mksid(dl->bus, dl->dp[0].device,
+    dl->dp[0].function);
+ dom = acpidmar_pci_attach(sc, rmrr->segment, sid, 0);
+ if (dom != NULL) {
+ printf("%s map ident: %.16llx %.16llx\n",
+    dom_bdf(dom), rmrr->start, rmrr->end);
+ domain_map_pthru(dom, rmrr->start, rmrr->end);
+ rc = extent_alloc_region(dom->iovamap,
+    rmrr->start, rmrr->end, EX_WAITOK);
+ }
+ }
+ }
+}
+
+
+/*=====================================================
+ * AMD Vi
+ *=====================================================*/
+void acpiivrs_ivhd(struct acpidmar_softc *, struct acpi_ivhd *);
+int acpiivrs_iommu_match(struct pci_attach_args *);
+int ivhd_iommu_init(struct acpidmar_softc *, struct iommu_softc *,
+ struct acpi_ivhd *);
+void iommu_ivhd_add(struct iommu_softc *, int, int, int);
+int _ivhd_issue_command(struct iommu_softc *, const struct ivhd_command *);
+void ivhd_show_event(struct iommu_softc *, struct ivhd_event *evt, int);
+int ivhd_issue_command(struct iommu_softc *, const struct ivhd_command *, int);
+int ivhd_invalidate_domain(struct iommu_softc *, int);
+void ivhd_intr_map(struct iommu_softc *, int);
+
+int
+acpiivhd_intr(void *ctx)
+{
+ struct iommu_softc *iommu = ctx;
+
+ if (!iommu->dte)
+ return (0);
+ ivhd_poll_events(iommu);
+ return (1);
+}
+
+/* Setup interrupt for AMD */
+void
+ivhd_intr_map(struct iommu_softc *iommu, int devid) {
+ pci_intr_handle_t ih;
+
+ if (iommu->intr)
+ return;
+ ih.tag = pci_make_tag(NULL, sid_bus(devid), sid_dev(devid), sid_fun(devid));
+ ih.line = APIC_INT_VIA_MSG;
+ ih.pin = 0;
+ iommu->intr = pci_intr_establish(NULL, ih, IPL_NET | IPL_MPSAFE,
+ acpiivhd_intr, iommu, "amd_iommu");
+ printf("amd iommu intr: %p\n", iommu->intr);
+}
+
+void _dumppte(struct pte_entry *pte, int lvl, vaddr_t va)
+{
+ char *pfx[] = { "    ", "   ", "  ", " ", "" };
+ uint64_t i, sh;
+ struct pte_entry *npte;
+  
+ for (i = 0; i < 512; i++) {
+ sh = (i << (((lvl-1) * 9) + 12));
+ if (pte[i].val & PTE_P) {
+ if (lvl > 1) {
+ npte = (void *)PMAP_DIRECT_MAP((pte[i].val & PTE_PADDR_MASK));
+ printf("%slvl%d: %.16llx nxt:%llu\n", pfx[lvl], lvl,
+    pte[i].val, (pte[i].val >> 9) & 7);
+ _dumppte(npte, lvl-1, va | sh);
+ }
+ else {
+ printf("%slvl%d: %.16llx <- %.16llx \n", pfx[lvl], lvl,
+    pte[i].val, va | sh);
+ }
+ }
+ }
+}
+
+void showpage(int sid, paddr_t paddr)
+{
+ struct domain *dom;
+ static int show = 0;
+
+ if (show > 10)
+ return;
+ show++;
+ dom = acpidmar_pci_attach(acpidmar_sc, 0, sid, 0);
+ if (!dom)
+ return;
+ printf("DTE: %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
+ hwdte[sid].dw0,
+ hwdte[sid].dw1,
+ hwdte[sid].dw2,
+ hwdte[sid].dw3,
+ hwdte[sid].dw4,
+ hwdte[sid].dw5,
+ hwdte[sid].dw6,
+ hwdte[sid].dw7);
+ _dumppte(dom->pte, 3, 0);
+}
+
+/* Display AMD IOMMU Error */
+void
+ivhd_show_event(struct iommu_softc *iommu, struct ivhd_event *evt, int head)
+{
+ int type, sid, did, flag;
+ uint64_t address;
+
+ /* Get Device, Domain, Address and Type of event */
+ sid  = __EXTRACT(evt->dw0, EVT_SID);
+ type = __EXTRACT(evt->dw1, EVT_TYPE);
+ did  = __EXTRACT(evt->dw1, EVT_DID);
+ flag = __EXTRACT(evt->dw1, EVT_FLAG);
+ address = _get64(&evt->dw2);
+
+ printf("=== IOMMU Error[%.4x]: ", head);
+ switch (type) {
+ case ILLEGAL_DEV_TABLE_ENTRY: // ok
+ printf("illegal dev table entry dev=%s addr=0x%.16llx %s, %s, %s, %s\n",
+   dmar_bdf(sid), address,
+   evt->dw1 & EVT_TR ? "translation" : "transaction",
+   evt->dw1 & EVT_RZ ? "reserved bit" : "invalid level",
+   evt->dw1 & EVT_RW ? "write" : "read",
+   evt->dw1 & EVT_I  ? "interrupt" : "memory");
+ ivhd_showdte();
+ break;
+ case IO_PAGE_FAULT: // ok
+ printf("io page fault dev=%s did=0x%.4x addr=0x%.16llx\n%s, %s, %s, %s, %s, %s\n",
+   dmar_bdf(sid), did, address,
+   evt->dw1 & EVT_TR ? "translation" : "transaction",
+   evt->dw1 & EVT_RZ ? "reserved bit" : "invalid level",
+   evt->dw1 & EVT_PE ? "no perm" : "perm",
+   evt->dw1 & EVT_RW ? "write" : "read",
+   evt->dw1 & EVT_PR ? "present" : "not present",
+   evt->dw1 & EVT_I  ? "interrupt" : "memory");
+ ivhd_showdte();
+ showpage(sid, address);
+ break;
+ case DEV_TAB_HARDWARE_ERROR: // ok
+ printf("device table hardware error dev=%s addr=0x%.16llx %s, %s, %s\n",
+    dmar_bdf(sid), address,
+   evt->dw1 & EVT_TR ? "translation" : "transaction",
+   evt->dw1 & EVT_RW ? "write" : "read",
+   evt->dw1 & EVT_I  ? "interrupt" : "memory");
+ ivhd_showdte();
+ break;
+ case PAGE_TAB_HARDWARE_ERROR:
+ printf("page table hardware error dev=%s addr=0x%.16llx %s, %s, %s\n",
+   dmar_bdf(sid), address,
+   evt->dw1 & EVT_TR ? "translation" : "transaction",
+   evt->dw1 & EVT_RW ? "write" : "read",
+   evt->dw1 & EVT_I  ? "interrupt" : "memory");
+ ivhd_showdte();
+ break;
+ case ILLEGAL_COMMAND_ERROR: // ok
+ printf("illegal command addr=0x%.16llx\n", address);
+ ivhd_showcmd(iommu);
+ break;
+ case COMMAND_HARDWARE_ERROR:
+ printf("command hardware error addr=0x%.16llx flag=0x%.4x\n",
+   address, flag);
+ ivhd_showcmd(iommu);
+ break;
+ case IOTLB_INV_TIMEOUT:
+ printf("iotlb invalidation timeout dev=%s address=0x%.16llx\n",
+   dmar_bdf(sid), address);
+ break;
+ case INVALID_DEVICE_REQUEST:
+ printf("invalid device request dev=%s addr=0x%.16llx flag=0x%.4x\n",
+   dmar_bdf(sid), address, flag);
+ break;
+ default:
+ printf("unknown type=0x%.2x\n", type);
+ break;
+ }
+ //ivhd_showdte();
+ /* Clear old event */
+ evt->dw0 = 0;
+ evt->dw1 = 0;
+ evt->dw2 = 0;
+ evt->dw3 = 0;
+}
+
+/* AMD: Process IOMMU error from hardware */
+int
+ivhd_poll_events(struct iommu_softc *iommu)
+{
+ uint32_t head, tail;
+ int sz;
+
+ sz = sizeof(struct ivhd_event);
+ head = iommu_readl(iommu, EVT_HEAD_REG);
+ tail = iommu_readl(iommu, EVT_TAIL_REG);
+ if (head == tail) {
+ /* No pending events */
+ return (0);
+ }
+ while (head != tail) {
+ ivhd_show_event(iommu, iommu->evt_tbl + head, head);
+ head = (head + sz) % EVT_TBL_SIZE;
+ }
+ iommu_writel(iommu, EVT_HEAD_REG, head);
+ return (0);
+}
+
+/* AMD: Issue command to IOMMU queue */
+int
+_ivhd_issue_command(struct iommu_softc *iommu, const struct ivhd_command *cmd)
+{
+ u_long rf;
+ uint32_t head, tail, next;
+ int sz;
+
+ head = iommu_readl(iommu, CMD_HEAD_REG);
+ sz = sizeof(*cmd);
+ rf = intr_disable();
+ tail = iommu_readl(iommu, CMD_TAIL_REG);
+ next = (tail + sz) % CMD_TBL_SIZE;
+ if (next == head) {
+ printf("FULL\n");
+ /* Queue is full */
+ intr_restore(rf);
+ return -EBUSY;
+ }
+ memcpy(iommu->cmd_tbl + tail, cmd, sz);
+ iommu_writel(iommu, CMD_TAIL_REG, next);
+ intr_restore(rf);
+ return (tail / sz);
+}
+
+#define IVHD_MAXDELAY 8
+
+int
+ivhd_issue_command(struct iommu_softc *iommu, const struct ivhd_command *cmd, int wait)
+{
+ struct ivhd_command wq = { 0 };
+ volatile uint64_t wv __aligned(16) = 0LL;
+ paddr_t paddr;
+ int rc, i;
+ static int mi;
+
+ rc = _ivhd_issue_command(iommu, cmd);
+ if (rc >= 0 && wait) {
+ /* Wait for previous commands to complete.
+ * Store address of completion variable to command */
+ pmap_extract(pmap_kernel(), (vaddr_t)&wv, &paddr);
+ wq.dw0 = (paddr & ~0xF) | 0x1;
+ wq.dw1 = (COMPLETION_WAIT << CMD_SHIFT) | ((paddr >> 32) & 0xFFFFF);
+ wq.dw2 = 0xDEADBEEF;
+ wq.dw3 = 0xFEEDC0DE;
+
+ rc = _ivhd_issue_command(iommu, &wq);
+ /* wv will change to value in dw2/dw3 when command is complete */
+ for (i = 0; i < IVHD_MAXDELAY && !wv; i++) {
+ DELAY(10 << i);
+ }
+ if (mi < i && mi != IVHD_MAXDELAY) {
+ printf("maxdel: %d\n", i);
+ mi = i;
+ }
+ if (i == IVHD_MAXDELAY) {
+ printf("ivhd command timeout: %.8x %.8x %.8x %.8x wv:%llx idx:%x\n",
+ cmd->dw0, cmd->dw1, cmd->dw2, cmd->dw3, wv, rc);
+ }
+ }
+ return rc;
+
+}
+
+/* AMD: Flush changes to Device Table Entry for a specific domain */
+int ivhd_flush_devtab(struct iommu_softc *iommu, int did)
+{
+ struct ivhd_command cmd = { .dw0 = did, .dw1 = INVALIDATE_DEVTAB_ENTRY << CMD_SHIFT };
+ return ivhd_issue_command(iommu, &cmd, 1);
+}
+
+/* AMD: Invalidate all IOMMU device and page tables */
+int ivhd_invalidate_iommu_all(struct iommu_softc *iommu)
+{
+ struct ivhd_command cmd = { .dw1 = INVALIDATE_IOMMU_ALL << CMD_SHIFT };
+ return ivhd_issue_command(iommu, &cmd, 0);
+}
+
+/* AMD: Invalidate interrupt remapping */
+int ivhd_invalidate_interrupt_table(struct iommu_softc *iommu, int did)
+{
+ struct ivhd_command cmd = { .dw0 = did, .dw1 = INVALIDATE_INTERRUPT_TABLE << CMD_SHIFT };
+ return ivhd_issue_command(iommu, &cmd, 0);
+}
+
+/* AMD: Invalidate all page tables in a domain */
+int ivhd_invalidate_domain(struct iommu_softc *iommu, int did)
+{
+ struct ivhd_command cmd = { .dw1 = did | (INVALIDATE_IOMMU_PAGES << CMD_SHIFT) };
+
+ cmd.dw2 = 0xFFFFF000 | 0x3;
+ cmd.dw3 = 0x7FFFFFFF;
+ return ivhd_issue_command(iommu, &cmd, 1);
+}
+
+/* AMD: Display Registers */
+void ivhd_showit(struct iommu_softc *iommu)
+{
+ printf("---- dt:%.16llx cmd:%.16llx evt:%.16llx ctl:%.16llx sts:%.16llx\n",
+ iommu_readq(iommu, DEV_TAB_BASE_REG),
+ iommu_readq(iommu, CMD_BASE_REG),
+ iommu_readq(iommu, EVT_BASE_REG),
+ iommu_readq(iommu, IOMMUCTL_REG),
+ iommu_readq(iommu, IOMMUSTS_REG));
+ printf("---- cmd queue:%.16llx %.16llx evt queue:%.16llx %.16llx\n",
+ iommu_readq(iommu, CMD_HEAD_REG),
+ iommu_readq(iommu, CMD_TAIL_REG),
+ iommu_readq(iommu, EVT_HEAD_REG),
+ iommu_readq(iommu, EVT_TAIL_REG));
+}
+
+/* AMD: Generate Errors to test event handler */
+void ivhd_checkerr(struct iommu_softc *iommu);
+void ivhd_checkerr(struct iommu_softc *iommu)
+{
+ struct ivhd_command cmd = { -1, -1, -1, -1 };
+
+ /* Generate ILLEGAL DEV TAB entry? */
+ iommu->dte[0x2303].dw0 = -1;      // invalid
+ iommu->dte[0x2303].dw2 = 0x1234;  // domain
+ iommu->dte[0x2303].dw7 = -1;      // reserved
+ ivhd_flush_devtab(iommu, 0x1234);
+ ivhd_poll_events(iommu);
+
+ /* Generate ILLEGAL_COMMAND_ERROR : ok */
+ ivhd_issue_command(iommu, &cmd, 0);
+ ivhd_poll_events(iommu);
+
+ /* Generate page hardware error */
+}
+
+/* AMD: Show Device Table Entry */
+void ivhd_showdte(void)
+{
+ int i;
+
+ for (i = 0; i < 65536; i++) {
+ if (hwdte[i].dw0) {
+ printf("%.2x:%.2x.%x: %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
+ i >> 8, (i >> 3) & 0x1F, i & 0x7,
+ hwdte[i].dw0, hwdte[i].dw1,
+ hwdte[i].dw2, hwdte[i].dw3,
+ hwdte[i].dw4, hwdte[i].dw5,
+ hwdte[i].dw6, hwdte[i].dw7);
+ }
+ }
+}
+
+/* AMD: Show command entries */
+void ivhd_showcmd(struct iommu_softc *iommu)
+{
+ struct ivhd_command *ihd;
+ paddr_t phd;
+ int i;
+
+ ihd = iommu->cmd_tbl;
+ phd = iommu_readq(iommu, CMD_BASE_REG) & CMD_BASE_MASK;
+ for (i = 0; i < 4096 / 128; i++) {
+ printf("%.2x: %.16llx %.8x %.8x %.8x %.8x\n", i,
+ (uint64_t)phd + i * sizeof(*ihd),
+ ihd[i].dw0,ihd[i].dw1,ihd[i].dw2,ihd[i].dw3);
+ }
+}
+
+#define _c(x) (int)((iommu->ecap >> x ##_SHIFT) & x ## _MASK)
+
+/* AMD: Initialize IOMMU */
+int
+ivhd_iommu_init(struct acpidmar_softc *sc, struct iommu_softc *iommu,
+ struct acpi_ivhd *ivhd)
+{
+ static int niommu;
+ paddr_t paddr;
+ uint64_t ov;
+
+ if (sc == NULL || iommu == NULL || ivhd == NULL) {
+ printf("Bad pointer to iommu_init!\n");
+ return -1;
+ }
+ if (_bus_space_map(sc->sc_memt, ivhd->address, 0x80000, 0, &iommu->ioh) != 0) {
+ printf("Bus Space Map fails\n");
+ return -1;
+ }
+ TAILQ_INIT(&iommu->domains);
+ TAILQ_INIT(&iommu->devices);
+
+ /* Setup address width and number of domains */
+ iommu->id = ++niommu;
+ iommu->iot = sc->sc_memt;
+ iommu->mgaw = 48;
+ iommu->agaw = 48;
+ iommu->flags = 1;
+ iommu->segment = 0;
+ iommu->ndoms = 256;
+
+ iommu->ecap = iommu_readq(iommu, EXTFEAT_REG);
+ printf("ecap = %.16llx\n", iommu->ecap);
+ printf("%s%s%s%s%s%s%s%s\n",
+ iommu->ecap & EFR_PREFSUP ? "pref " : "",
+ iommu->ecap & EFR_PPRSUP  ? "ppr " : "",
+ iommu->ecap & EFR_NXSUP   ? "nx " : "",
+ iommu->ecap & EFR_GTSUP   ? "gt " : "",
+ iommu->ecap & EFR_IASUP   ? "ia " : "",
+ iommu->ecap & EFR_GASUP   ? "ga " : "",
+ iommu->ecap & EFR_HESUP   ? "he " : "",
+ iommu->ecap & EFR_PCSUP   ? "pc " : "");
+ printf("hats:%x gats:%x glxsup:%x smif:%x smifrc:%x gam:%x\n",
+ _c(EFR_HATS), _c(EFR_GATS), _c(EFR_GLXSUP), _c(EFR_SMIFSUP),
+ _c(EFR_SMIFRC), _c(EFR_GAMSUP));
+
+ /* Turn off iommu */
+ ov = iommu_readq(iommu, IOMMUCTL_REG);
+ iommu_writeq(iommu, IOMMUCTL_REG, ov & ~(CTL_IOMMUEN | CTL_COHERENT |
+ CTL_HTTUNEN | CTL_RESPASSPW | CTL_PASSPW | CTL_ISOC));
+
+ /* Enable intr */
+ sid_flag[ivhd->devid] |= SID_INVALID;
+ ivhd_intr_map(iommu, ivhd->devid);
+
+ /* Setup command buffer with 4k buffer (128 entries) */
+ iommu->cmd_tbl = iommu_alloc_page(iommu, &paddr);
+ iommu_writeq(iommu, CMD_BASE_REG, (paddr & CMD_BASE_MASK) | CMD_TBL_LEN_4K);
+ iommu_writel(iommu, CMD_HEAD_REG, 0x00);
+ iommu_writel(iommu, CMD_TAIL_REG, 0x00);
+ iommu->cmd_tblp = paddr;
+
+ /* Setup event log with 4k buffer (128 entries) */
+ iommu->evt_tbl = iommu_alloc_page(iommu, &paddr);
+ iommu_writeq(iommu, EVT_BASE_REG, (paddr & EVT_BASE_MASK) | EVT_TBL_LEN_4K);
+ iommu_writel(iommu, EVT_HEAD_REG, 0x00);
+ iommu_writel(iommu, EVT_TAIL_REG, 0x00);
+ iommu->evt_tblp = paddr;
+
+ /* Setup device table
+ * 1 entry per source ID (bus:device:function - 64k entries)
+ */
+ iommu->dte = hwdte;
+ pmap_extract(pmap_kernel(), (vaddr_t)iommu->dte, &paddr);
+ iommu_writeq(iommu, DEV_TAB_BASE_REG, (paddr & DEV_TAB_MASK) | DEV_TAB_LEN);
+
+ /* Enable IOMMU */
+ ov |= (CTL_IOMMUEN | CTL_EVENTLOGEN | CTL_CMDBUFEN | CTL_EVENTINTEN | CTL_COMWAITINTEN);
+ if (ivhd->flags & IVHD_COHERENT)
+ ov |= CTL_COHERENT;
+ if (ivhd->flags & IVHD_HTTUNEN)
+ ov |= CTL_HTTUNEN;
+ if (ivhd->flags & IVHD_RESPASSPW)
+ ov |= CTL_RESPASSPW;
+ if (ivhd->flags & IVHD_PASSPW)
+ ov |= CTL_PASSPW;
+ if (ivhd->flags & IVHD_ISOC)
+ ov |= CTL_ISOC;
+ ov &= ~(CTL_INVTIMEOUT_MASK << CTL_INVTIMEOUT_SHIFT);
+ ov |=  (CTL_INVTIMEOUT_1MS  << CTL_INVTIMEOUT_SHIFT);
+ iommu_writeq(iommu, IOMMUCTL_REG, ov);
+
+ ivhd_invalidate_iommu_all(iommu);
+ //ivhd_checkerr(iommu);
+
+ TAILQ_INSERT_TAIL(&sc->sc_drhds, iommu, link);
+ return 0;
+}
+
+void
+iommu_ivhd_add(struct iommu_softc *iommu, int start, int end, int cfg)
+{
+ struct ivhd_devlist *idev;
+
+ idev = malloc(sizeof(*idev), M_DEVBUF, M_ZERO | M_WAITOK);
+ idev->start_id = start;
+ idev->end_id = end;
+ idev->cfg = cfg;
+}
+
+int acpiivrs_iommu_match(struct pci_attach_args *pa)
+{
+ int b,d,f;
+
+ pci_decompose_tag(pa->pa_pc, pa->pa_tag, &b, &d, &f);
+ printf(" matchdev: %d.%d.%d\n", b, d, f);
+ if (PCI_CLASS(pa->pa_class) == PCI_CLASS_SYSTEM &&
+    PCI_SUBCLASS(pa->pa_class) == PCI_SUBCLASS_SYSTEM_IOMMU) {
+ printf("iziommu\n");
+ return (1);
+ }
+ return (0);
+}
+
+void
+acpiivrs_ivhd(struct acpidmar_softc *sc, struct acpi_ivhd *ivhd)
+{
+ struct iommu_softc *iommu;
+ struct acpi_ivhd_ext *ext;
+ union acpi_ivhd_entry *ie;
+ int start, off, dte, all_dte = 0;
+
+ if (ivhd->type == IVRS_IVHD_EXT) {
+ ext = (struct acpi_ivhd_ext *)ivhd;
+ printf("ivhd: %.2x %.2x %.4x %.4x:%s %.4x %.16llx %.4x %.8x %.16llx\n",
+       ext->type, ext->flags, ext->length,
+             ext->segment, dmar_bdf(ext->devid), ext->cap,
+             ext->address, ext->info,
+       ext->attrib, ext->efr);
+ if (ext->flags & IVHD_PPRSUP)
+ printf(" PPRSup");
+ if (ext->flags & IVHD_PREFSUP)
+ printf(" PreFSup");
+ if (ext->flags & IVHD_COHERENT)
+ printf(" Coherent");
+ if (ext->flags & IVHD_IOTLB)
+ printf(" Iotlb");
+ if (ext->flags & IVHD_ISOC)
+ printf(" ISoc");
+ if (ext->flags & IVHD_RESPASSPW)
+ printf(" ResPassPW");
+ if (ext->flags & IVHD_PASSPW)
+ printf(" PassPW");
+ if (ext->flags & IVHD_HTTUNEN)
+ printf( " HtTunEn");
+ if (ext->flags)
+ printf("\n");
+ off = sizeof(*ext);
+ iommu = malloc(sizeof(*iommu), M_DEVBUF, M_ZERO|M_WAITOK);
+ ivhd_iommu_init(sc, iommu, ivhd);
+ } else {
+ printf("ivhd: %.2x %.2x %.4x %.4x:%s %.4x %.16llx %.4x %.8x\n",
+       ivhd->type, ivhd->flags, ivhd->length,
+             ivhd->segment, dmar_bdf(ivhd->devid), ivhd->cap,
+             ivhd->address, ivhd->info,
+       ivhd->feature);
+ if (ivhd->flags & IVHD_PPRSUP)
+ printf(" PPRSup");
+ if (ivhd->flags & IVHD_PREFSUP)
+ printf(" PreFSup");
+ if (ivhd->flags & IVHD_COHERENT)
+ printf(" Coherent");
+ if (ivhd->flags & IVHD_IOTLB)
+ printf(" Iotlb");
+ if (ivhd->flags & IVHD_ISOC)
+ printf(" ISoc");
+ if (ivhd->flags & IVHD_RESPASSPW)
+ printf(" ResPassPW");
+ if (ivhd->flags & IVHD_PASSPW)
+ printf(" PassPW");
+ if (ivhd->flags & IVHD_HTTUNEN)
+ printf( " HtTunEn");
+ if (ivhd->flags)
+ printf("\n");
+ off = sizeof(*ivhd);
+ }
+ while (off < ivhd->length) {
+ ie = (void *)ivhd + off;
+ switch (ie->type) {
+ case IVHD_ALL:
+ all_dte = ie->all.data;
+ printf(" ALL %.4x\n", dte);
+ off += sizeof(ie->all);
+ break;
+ case IVHD_SEL:
+ dte = ie->sel.data;
+ printf(" SELECT: %s %.4x\n", dmar_bdf(ie->sel.devid), dte);
+ off += sizeof(ie->sel);
+ break;
+ case IVHD_SOR:
+ dte = ie->sor.data;
+ start = ie->sor.devid;
+ printf(" SOR: %s %.4x\n", dmar_bdf(start), dte);
+ off += sizeof(ie->sor);
+ break;
+ case IVHD_EOR:
+ printf(" EOR: %s\n", dmar_bdf(ie->eor.devid));
+ off += sizeof(ie->eor);
+ break;
+ case IVHD_ALIAS_SEL:
+ dte = ie->alias.data;
+ printf(" ALIAS: src=%s: ", dmar_bdf(ie->alias.srcid));
+ printf(" %s %.4x\n", dmar_bdf(ie->alias.devid), dte);
+ off += sizeof(ie->alias);
+ break;
+ case IVHD_ALIAS_SOR:
+ dte = ie->alias.data;
+ printf(" ALIAS_SOR: %s %.4x ", dmar_bdf(ie->alias.devid), dte);
+ printf(" src=%s\n", dmar_bdf(ie->alias.srcid));
+ off += sizeof(ie->alias);
+ break;
+ case IVHD_EXT_SEL:
+ dte = ie->ext.data;
+ printf(" EXT SEL: %s %.4x %.8x\n", dmar_bdf(ie->ext.devid),
+ dte, ie->ext.extdata);
+ off += sizeof(ie->ext);
+ break;
+ case IVHD_EXT_SOR:
+ dte = ie->ext.data;
+ printf(" EXT SOR: %s %.4x %.8x\n", dmar_bdf(ie->ext.devid),
+       dte, ie->ext.extdata);
+ off += sizeof(ie->ext);
+ break;
+ case IVHD_SPECIAL:
+ printf(" SPECIAL\n");
+ off += sizeof(ie->special);
+ break;
+ default:
+ printf(" 2:unknown %x\n", ie->type);
+ off = ivhd->length;
+ break;
+ }
+ }
+}
+
+void
+acpiivrs_init(struct acpidmar_softc *sc, struct acpi_ivrs *ivrs)
+{
+ union acpi_ivrs_entry *ie;
+ int off;
+
+ domain_map_page = domain_map_page_amd;
+ printf("IVRS Version: %d\n", ivrs->hdr.revision);
+ printf(" VA Size: %d\n", (ivrs->ivinfo >> IVRS_VASIZE_SHIFT) & IVRS_VASIZE_MASK);
+ printf(" PA Size: %d\n", (ivrs->ivinfo >> IVRS_PASIZE_SHIFT) & IVRS_PASIZE_MASK);
+
+ TAILQ_INIT(&sc->sc_drhds);
+ TAILQ_INIT(&sc->sc_rmrrs);
+ TAILQ_INIT(&sc->sc_atsrs);
+
+ printf("======== IVRS\n");
+ off = sizeof(*ivrs);
+ while (off < ivrs->hdr.length) {
+ ie = (void *)ivrs + off;
+ switch (ie->type) {
+ case IVRS_IVHD:
+ case IVRS_IVHD_EXT:
+ acpiivrs_ivhd(sc, &ie->ivhd);
+ break;
+ case IVRS_IVMD_ALL:
+ case IVRS_IVMD_SPECIFIED:
+ case IVRS_IVMD_RANGE:
+ printf("ivmd\n");
+ break;
+ default:
+ printf("1:unknown: %x\n", ie->type);
+ break;
+ }
+ off += ie->length;
+ }
+ printf("======== End IVRS\n");
+}
+
+static int
+acpiivhd_activate(struct iommu_softc *iommu, int act)
+{
+ switch (act) {
+ case DVACT_SUSPEND:
+ iommu->flags |= IOMMU_FLAGS_SUSPEND;
+ break;
+ case DVACT_RESUME:
+ iommu->flags &= ~IOMMU_FLAGS_SUSPEND;
+ break;
+ }
+ return (0);
+}
+
+int
+acpidmar_activate(struct device *self, int act)
+{
+ struct acpidmar_softc *sc = (struct acpidmar_softc *)self;
+ struct iommu_softc *iommu;
+
+ printf("called acpidmar_activate %d %p\n", act, sc);
+
+ if (sc == NULL) {
+ return (0);
+ }
+
+ switch (act) {
+ case DVACT_RESUME:
+ TAILQ_FOREACH(iommu, &sc->sc_drhds, link) {
+ printf("iommu%d resume\n", iommu->id);
+ if (iommu->dte) {
+ acpiivhd_activate(iommu, act);
+ continue;
+ }
+ iommu_flush_write_buffer(iommu);
+ iommu_set_rtaddr(iommu, iommu->rtaddr);
+ iommu_writel(iommu, DMAR_FEDATA_REG, iommu->fedata);
+ iommu_writel(iommu, DMAR_FEADDR_REG, iommu->feaddr);
+ iommu_writel(iommu, DMAR_FEUADDR_REG,
+    iommu->feaddr >> 32);
+ if ((iommu->flags & (IOMMU_FLAGS_BAD|IOMMU_FLAGS_SUSPEND)) ==
+    IOMMU_FLAGS_SUSPEND) {
+ printf("enable wakeup translation\n");
+ iommu_enable_translation(iommu, 1);
+ }
+ iommu_showcfg(iommu, -1);
+ }
+ break;
+ case DVACT_SUSPEND:
+ TAILQ_FOREACH(iommu, &sc->sc_drhds, link) {
+ printf("iommu%d suspend\n", iommu->id);
+ if (iommu->flags & IOMMU_FLAGS_BAD)
+ continue;
+ if (iommu->dte) {
+ acpiivhd_activate(iommu, act);
+ continue;
+ }
+ iommu->flags |= IOMMU_FLAGS_SUSPEND;
+ iommu_enable_translation(iommu, 0);
+ iommu_showcfg(iommu, -1);
+ }
+ break;
+ }
+ return (0);
+}
+
+void
+acpidmar_sw(int act)
+{
+ acpidmar_activate((struct device *)acpidmar_sc, act);
+}
+
+int
+acpidmar_match(struct device *parent, void *match, void *aux)
+{
+ struct acpi_attach_args *aaa = aux;
+ struct acpi_table_header *hdr;
+
+ /* If we do not have a table, it is not us */
+ if (aaa->aaa_table == NULL)
+ return (0);
+
+ /* If it is an DMAR table, we can attach */
+ hdr = (struct acpi_table_header *)aaa->aaa_table;
+ if (memcmp(hdr->signature, DMAR_SIG, sizeof(DMAR_SIG) - 1) == 0)
+ return (1);
+ if (memcmp(hdr->signature, IVRS_SIG, sizeof(IVRS_SIG) - 1) == 0)
+ return (1);
+
+ return (0);
+}
+
+void
+acpidmar_attach(struct device *parent, struct device *self, void *aux)
+{
+ struct acpidmar_softc *sc = (void *)self;
+ struct acpi_attach_args *aaa = aux;
+ struct acpi_dmar *dmar = (struct acpi_dmar *)aaa->aaa_table;
+ struct acpi_ivrs        *ivrs = (struct acpi_ivrs *)aaa->aaa_table;
+ struct acpi_table_header *hdr;
+
+ hdr = (struct acpi_table_header *)aaa->aaa_table;
+ sc->sc_memt = aaa->aaa_memt;
+ if (memcmp(hdr->signature, DMAR_SIG, sizeof(DMAR_SIG) - 1) == 0) {
+ acpidmar_sc = sc;
+ acpidmar_init(sc, dmar);
+ }
+ if (memcmp(hdr->signature, IVRS_SIG, sizeof(IVRS_SIG) - 1) == 0) {
+ acpidmar_sc = sc;
+ acpiivrs_init(sc, ivrs);
+ }
+}
+
+/* Interrupt shiz */
+void acpidmar_msi_hwmask(struct pic *, int);
+void acpidmar_msi_hwunmask(struct pic *, int);
+void acpidmar_msi_addroute(struct pic *, struct cpu_info *, int, int, int);
+void acpidmar_msi_delroute(struct pic *, struct cpu_info *, int, int, int);
+
+void
+acpidmar_msi_hwmask(struct pic *pic, int pin)
+{
+ struct iommu_pic *ip = (void *)pic;
+ struct iommu_softc *iommu = ip->iommu;
+
+ printf("msi_hwmask\n");
+
+ mtx_enter(&iommu->reg_lock);
+
+ iommu_writel(iommu, DMAR_FECTL_REG, FECTL_IM);
+ iommu_readl(iommu, DMAR_FECTL_REG);
+
+ mtx_leave(&iommu->reg_lock);
+}
+
+void
+acpidmar_msi_hwunmask(struct pic *pic, int pin)
+{
+ struct iommu_pic *ip = (void *)pic;
+ struct iommu_softc *iommu = ip->iommu;
+
+ printf("msi_hwunmask\n");
+
+ mtx_enter(&iommu->reg_lock);
+
+ iommu_writel(iommu, DMAR_FECTL_REG, 0);
+ iommu_readl(iommu, DMAR_FECTL_REG);
+
+ mtx_leave(&iommu->reg_lock);
+}
+
+void
+acpidmar_msi_addroute(struct pic *pic, struct cpu_info *ci, int pin, int vec,
+    int type)
+{
+ struct iommu_pic *ip = (void *)pic;
+ struct iommu_softc *iommu = ip->iommu;
+
+ mtx_enter(&iommu->reg_lock);
+
+ iommu->fedata = vec;
+ iommu->feaddr = 0xfee00000L | (ci->ci_apicid << 12);
+ iommu_writel(iommu, DMAR_FEDATA_REG, vec);
+ iommu_writel(iommu, DMAR_FEADDR_REG, iommu->feaddr);
+ iommu_writel(iommu, DMAR_FEUADDR_REG, iommu->feaddr >> 32);
+
+ mtx_leave(&iommu->reg_lock);
+}
+
+void
+acpidmar_msi_delroute(struct pic *pic, struct cpu_info *ci, int pin, int vec,
+    int type)
+{
+ printf("msi_delroute\n");
+}
+
+void *
+acpidmar_intr_establish(void *ctx, int level, int (*func)(void *),
+    void *arg, const char *what)
+{
+ struct iommu_softc *iommu = ctx;
+ struct pic *pic;
+
+ pic = &iommu->pic.pic;
+ iommu->pic.iommu = iommu;
+
+ strlcpy(pic->pic_dev.dv_xname, "dmarpic",
+ sizeof(pic->pic_dev.dv_xname));
+ pic->pic_type = PIC_MSI;
+ pic->pic_hwmask = acpidmar_msi_hwmask;
+ pic->pic_hwunmask = acpidmar_msi_hwunmask;
+ pic->pic_addroute = acpidmar_msi_addroute;
+ pic->pic_delroute = acpidmar_msi_delroute;
+ pic->pic_edge_stubs = ioapic_edge_stubs;
+#ifdef MULTIPROCESSOR
+ mtx_init(&pic->pic_mutex, level);
+#endif
+
+ return intr_establish(-1, pic, 0, IST_PULSE, level, NULL, func, arg, what);
+}
+
+int
+acpidmar_intr(void *ctx)
+{
+ struct iommu_softc *iommu = ctx;
+ struct fault_entry fe;
+ static struct fault_entry ofe;
+ int fro, nfr, fri, i;
+ uint32_t sts;
+
+ //splassert(IPL_HIGH);
+
+ if (!(iommu->gcmd & GCMD_TE)) {
+ return (1);
+ }
+ mtx_enter(&iommu->reg_lock);
+ sts = iommu_readl(iommu, DMAR_FECTL_REG);
+ sts = iommu_readl(iommu, DMAR_FSTS_REG);
+
+ if (!(sts & FSTS_PPF)) {
+ mtx_leave(&iommu->reg_lock);
+ return (1);
+ }
+
+ nfr = cap_nfr(iommu->cap);
+ fro = cap_fro(iommu->cap);
+ fri = (sts >> FSTS_FRI_SHIFT) & FSTS_FRI_MASK;
+ for (i = 0; i < nfr; i++) {
+ fe.hi = iommu_readq(iommu, fro + (fri*16) + 8);
+ if (!(fe.hi & FRCD_HI_F))
+ break;
+
+ fe.lo = iommu_readq(iommu, fro + (fri*16));
+ if (ofe.hi != fe.hi || ofe.lo != fe.lo) {
+ iommu_showfault(iommu, fri, &fe);
+ ofe.hi = fe.hi;
+ ofe.lo = fe.lo;
+ }
+ fri = (fri + 1) % nfr;
+ }
+
+ iommu_writel(iommu, DMAR_FSTS_REG, FSTS_PFO | FSTS_PPF);
+
+ mtx_leave(&iommu->reg_lock);
+
+ return (1);
+}
+
+const char *vtd_faults[] = {
+ "Software",
+ "Root Entry Not Present", /* ok (rtaddr + 4096) */
+ "Context Entry Not Present", /* ok (no CTX_P) */
+ "Context Entry Invalid", /* ok (tt = 3) */
+ "Address Beyond MGAW",
+ "Write", /* ok */
+ "Read", /* ok */
+ "Paging Entry Invalid", /* ok */
+ "Root Table Invalid",
+ "Context Table Invalid",
+ "Root Entry Reserved",          /* ok (root.lo |= 0x4) */
+ "Context Entry Reserved",
+ "Paging Entry Reserved",
+ "Context Entry TT",
+ "Reserved",
+};
+
+void iommu_showpte(uint64_t, int, uint64_t);
+
+void
+iommu_showpte(uint64_t ptep, int lvl, uint64_t base)
+{
+ uint64_t nb, pb, i;
+ struct pte_entry *pte;
+
+ pte = (void *)PMAP_DIRECT_MAP(ptep);
+ for (i = 0; i < 512; i++) {
+ if (!(pte[i].val & PTE_P))
+ continue;
+ nb = base + (i << lvl);
+ pb = pte[i].val & ~VTD_PAGE_MASK;
+ if(lvl == VTD_LEVEL0) {
+ printf("   %3llx %.16llx = %.16llx %c%c %s\n",
+    i, nb, pb,
+    pte[i].val == PTE_R ? 'r' : ' ',
+    pte[i].val & PTE_W ? 'w' : ' ',
+    (nb == pb) ? " ident" : "");
+ if (nb == pb)
+ return;
+ } else {
+ iommu_showpte(pb, lvl - VTD_STRIDE_SIZE, nb);
+ }
+ }
+}
+
+void
+iommu_showcfg(struct iommu_softc *iommu, int sid)
+{
+ int i, j, sts, cmd;
+ struct context_entry *ctx;
+ pcitag_t tag;
+ pcireg_t clc;
+
+ cmd = iommu_readl(iommu, DMAR_GCMD_REG);
+ sts = iommu_readl(iommu, DMAR_GSTS_REG);
+ printf("iommu%d: flags:%d root pa:%.16llx %s %s %s %.8x %.8x\n",
+    iommu->id, iommu->flags, iommu_readq(iommu, DMAR_RTADDR_REG),
+    sts & GSTS_TES ? "enabled" : "disabled",
+    sts & GSTS_QIES ? "qi" : "ccmd",
+    sts & GSTS_IRES ? "ir" : "",
+    cmd, sts);
+ for (i = 0; i < 256; i++) {
+ if (!root_entry_is_valid(&iommu->root[i])) {
+ continue;
+ }
+ for (j = 0; j < 256; j++) {
+ ctx = iommu->ctx[i] + j;
+ if (!context_entry_is_valid(ctx)) {
+ continue;
+ }
+ tag = pci_make_tag(NULL, i, (j >> 3), j & 0x7);
+ clc = pci_conf_read(NULL, tag, 0x08) >> 8;
+ printf("  %.2x:%.2x.%x lvl:%d did:%.4x tt:%d ptep:%.16llx flag:%x cc:%.6x\n",
+    i, (j >> 3), j & 7,
+    context_address_width(ctx),
+    context_domain_id(ctx),
+    context_translation_type(ctx),
+    context_pte(ctx),
+    context_user(ctx),
+    clc);
+#if 0
+ /* dump pagetables */
+ iommu_showpte(ctx->lo & ~VTD_PAGE_MASK, iommu->agaw -
+    VTD_STRIDE_SIZE, 0);
+#endif
+ }
+ }
+}
+
+void
+iommu_showfault(struct iommu_softc *iommu, int fri, struct fault_entry *fe)
+{
+ int bus, dev, fun, type, fr, df;
+ bios_memmap_t *im;
+ const char *mapped;
+
+ if (!(fe->hi & FRCD_HI_F))
+ return;
+ type = (fe->hi & FRCD_HI_T) ? 'r' : 'w';
+ fr = (fe->hi >> FRCD_HI_FR_SHIFT) & FRCD_HI_FR_MASK;
+ bus = (fe->hi >> FRCD_HI_BUS_SHIFT) & FRCD_HI_BUS_MASK;
+ dev = (fe->hi >> FRCD_HI_DEV_SHIFT) & FRCD_HI_DEV_MASK;
+ fun = (fe->hi >> FRCD_HI_FUN_SHIFT) & FRCD_HI_FUN_MASK;
+ df  = (fe->hi >> FRCD_HI_FUN_SHIFT) & 0xFF;
+ iommu_showcfg(iommu, mksid(bus,dev,fun));
+ if (!iommu->ctx[bus]) {
+ /* Bus is not initialized */
+ mapped = "nobus";
+ } else if (!context_entry_is_valid(&iommu->ctx[bus][df])) {
+ /* DevFn not initialized */
+ mapped = "nodevfn";
+ } else if (context_user(&iommu->ctx[bus][df]) != 0xA) {
+ /* no bus_space_map */
+ mapped = "nomap";
+ } else {
+ /* bus_space_map */
+ mapped = "mapped";
+ }
+ printf("fri%d: dmar: %.2x:%.2x.%x %s error at %llx fr:%d [%s] iommu:%d [%s]\n",
+    fri, bus, dev, fun,
+    type == 'r' ? "read" : "write",
+    fe->lo,
+    fr, fr <= 13 ? vtd_faults[fr] : "unknown",
+    iommu->id,
+    mapped);
+ for (im = bios_memmap; im->type != BIOS_MAP_END; im++) {
+ if ((im->type == BIOS_MAP_RES) &&
+    (im->addr <= fe->lo) &&
+    (fe->lo <= im->addr+im->size)) {
+ printf("mem in e820.reserved\n");
+ }
+ }
+#ifdef DDB
+ if (acpidmar_ddb)
+ db_enter();
+#endif
+}
+
+
diff --git a/sys/dev/acpi/acpidmar.h b/sys/dev/acpi/acpidmar.h
new file mode 100644
index 000000000..33659ecaf
--- /dev/null
+++ b/sys/dev/acpi/acpidmar.h
@@ -0,0 +1,534 @@
+/*
+ * Copyright (c) 2015 Jordan Hargrave <[hidden email]>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef _DEV_ACPI_DMARREG_H_
+#define _DEV_ACPI_DMARREG_H_
+
+#define VTD_STRIDE_MASK 0x1FF
+#define VTD_STRIDE_SIZE 9
+#define VTD_PAGE_SIZE   4096
+#define VTD_PAGE_MASK   0xFFF
+#define VTD_PTE_MASK    0x0000FFFFFFFFF000LL
+
+#define VTD_LEVEL0 12
+#define VTD_LEVEL1 21
+#define VTD_LEVEL2 30 /* Minimum level supported */
+#define VTD_LEVEL3 39 /* Also supported */
+#define VTD_LEVEL4 48
+#define VTD_LEVEL5 57
+
+#define _xbit(x,y) (((x)>> (y)) & 1)
+#define _xfld(x,y) (uint32_t)(((x)>> y##_SHIFT) & y##_MASK)
+
+#define VTD_AWTOLEVEL(x)    (((x) - 30) / VTD_STRIDE_SIZE)
+#define VTD_LEVELTOAW(x)    (((x) * VTD_STRIDE_SIZE) + 30)
+
+#define DMAR_VER_REG 0x00    /* 32:Arch version supported by this IOMMU */
+#define DMAR_RTADDR_REG 0x20    /* 64:Root entry table */
+#define DMAR_FEDATA_REG 0x3c    /* 32:Fault event interrupt data register */
+#define DMAR_FEADDR_REG 0x40    /* 32:Fault event interrupt addr register */
+#define DMAR_FEUADDR_REG 0x44    /* 32:Upper address register */
+#define DMAR_AFLOG_REG 0x58    /* 64:Advanced Fault control */
+#define DMAR_PMEN_REG 0x64    /* 32:Enable Protected Memory Region */
+#define DMAR_PLMBASE_REG 0x68    /* 32:PMRR Low addr */
+#define DMAR_PLMLIMIT_REG 0x6c    /* 32:PMRR low limit */
+#define DMAR_PHMBASE_REG 0x70    /* 64:pmrr high base addr */
+#define DMAR_PHMLIMIT_REG 0x78    /* 64:pmrr high limit */
+#define DMAR_ICS_REG 0x9C    /* 32:Invalidation complete status register */
+#define DMAR_IECTL_REG 0xa0    /* 32:Invalidation event control register */
+#define DMAR_IEDATA_REG 0xa4    /* 32:Invalidation event data register */
+#define DMAR_IEADDR_REG 0xa8    /* 32:Invalidation event address register */
+#define DMAR_IEUADDR_REG 0xac    /* 32:Invalidation event upper address register */
+#define DMAR_IRTA_REG 0xb8    /* 64:Interrupt remapping table addr register */
+#define DMAR_CAP_REG 0x08    /* 64:Hardware supported capabilities */
+#define   CAP_PI (1LL << 59)
+#define   CAP_FL1GP (1LL << 56)
+#define   CAP_DRD (1LL << 55)
+#define   CAP_DWD (1LL << 54)
+#define   CAP_MAMV_MASK 0x3F
+#define   CAP_MAMV_SHIFT 48LL
+#define   cap_mamv(x) _xfld(x,CAP_MAMV)
+#define   CAP_NFR_MASK 0xFF
+#define   CAP_NFR_SHIFT 40LL
+#define   cap_nfr(x) (_xfld(x,CAP_NFR) + 1)
+#define   CAP_PSI (1LL << 39)
+#define   CAP_SLLPS_MASK 0xF
+#define   CAP_SLLPS_SHIFT 34LL
+#define   cap_sllps(x) _xfld(x,CAP_SLLPS)
+#define   CAP_FRO_MASK 0x3FF
+#define   CAP_FRO_SHIFT 24LL
+#define   cap_fro(x) (_xfld(x,CAP_FRO) * 16)
+#define   CAP_ZLR (1LL << 22)
+#define   CAP_MGAW_MASK 0x3F
+#define   CAP_MGAW_SHIFT 16LL
+#define   cap_mgaw(x) (_xfld(x,CAP_MGAW) + 1)
+#define   CAP_SAGAW_MASK 0x1F
+#define   CAP_SAGAW_SHIFT 8LL
+#define   cap_sagaw(x) _xfld(x,CAP_SAGAW)
+#define   CAP_CM (1LL << 7)
+#define   CAP_PHMR (1LL << 6)
+#define   CAP_PLMR (1LL << 5)
+#define   CAP_RWBF (1LL << 4)
+#define   CAP_AFL (1LL << 3)
+#define   CAP_ND_MASK 0x7
+#define   CAP_ND_SHIFT 0x00
+#define   cap_nd(x) (16 << (((x) & CAP_ND_MASK) << 1))
+
+#define DMAR_ECAP_REG 0x10 /* 64:Extended capabilities supported */
+#define   ECAP_PSS_MASK 0x1F
+#define   ECAP_PSS_SHIFT 35
+#define   ECAP_EAFS (1LL << 34)
+#define   ECAP_NWFS (1LL << 33)
+#define   ECAP_SRS (1LL << 31)
+#define   ECAP_ERS (1LL << 30)
+#define   ECAP_PRS (1LL << 29)
+#define   ECAP_PASID (1LL << 28)
+#define   ECAP_DIS (1LL << 27)
+#define   ECAP_NEST (1LL << 26)
+#define   ECAP_MTS (1LL << 25)
+#define   ECAP_ECS (1LL << 24)
+#define   ECAP_MHMV_MASK 0xF
+#define   ECAP_MHMV_SHIFT 0x20
+#define   ecap_mhmv(x) _xfld(x,ECAP_MHMV)
+#define   ECAP_IRO_MASK 0x3FF /* IOTLB Register */
+#define   ECAP_IRO_SHIFT 0x8
+#define   ecap_iro(x) (_xfld(x,ECAP_IRO) * 16)
+#define   ECAP_SC (1LL << 7) /* Snoop Control */
+#define   ECAP_PT (1LL << 6) /* HW Passthru */
+#define   ECAP_EIM (1LL << 4)
+#define   ECAP_IR (1LL << 3) /* Interrupt remap */
+#define   ECAP_DT (1LL << 2) /* Device IOTLB */
+#define   ECAP_QI (1LL << 1) /* Queued Invalidation */
+#define   ECAP_C (1LL << 0) /* Coherent cache */
+
+#define DMAR_GCMD_REG 0x18 /* 32:Global command register */
+#define   GCMD_TE (1LL << 31)
+#define   GCMD_SRTP (1LL << 30)
+#define   GCMD_SFL (1LL << 29)
+#define   GCMD_EAFL (1LL << 28)
+#define   GCMD_WBF (1LL << 27)
+#define   GCMD_QIE (1LL << 26)
+#define   GCMD_IRE (1LL << 25)
+#define   GCMD_SIRTP (1LL << 24)
+#define   GCMD_CFI (1LL << 23)
+
+#define DMAR_GSTS_REG 0x1c /* 32:Global status register */
+#define   GSTS_TES (1LL << 31)
+#define   GSTS_RTPS (1LL << 30)
+#define   GSTS_FLS (1LL << 29)
+#define   GSTS_AFLS (1LL << 28)
+#define   GSTS_WBFS (1LL << 27)
+#define   GSTS_QIES (1LL << 26)
+#define   GSTS_IRES (1LL << 25)
+#define   GSTS_IRTPS (1LL << 24)
+#define   GSTS_CFIS (1LL << 23)
+
+#define DMAR_CCMD_REG 0x28 /* 64:Context command reg */
+#define   CCMD_ICC (1LL << 63)
+#define   CCMD_CIRG_MASK 0x3
+#define   CCMD_CIRG_SHIFT 61
+#define   CCMD_CIRG(x) ((uint64_t)(x) << CCMD_CIRG_SHIFT)
+#define   CCMD_CAIG_MASK 0x3
+#define   CCMD_CAIG_SHIFT 59
+#define   CCMD_FM_MASK 0x3
+#define   CCMD_FM_SHIFT 32
+#define   CCMD_FM(x) (((uint64_t)(x) << CCMD_FM_SHIFT))
+#define   CCMD_SID_MASK 0xFFFF
+#define   CCMD_SID_SHIFT 8
+#define   CCMD_SID(x) (((x) << CCMD_SID_SHIFT))
+#define   CCMD_DID_MASK 0xFFFF
+#define   CCMD_DID_SHIFT 0
+#define   CCMD_DID(x) (((x) << CCMD_DID_SHIFT))
+
+#define CIG_GLOBAL CCMD_CIRG(CTX_GLOBAL)
+#define CIG_DOMAIN CCMD_CIRG(CTX_DOMAIN)
+#define CIG_DEVICE CCMD_CIRG(CTX_DEVICE)
+
+
+#define DMAR_FSTS_REG 0x34 /* 32:Fault Status register */
+#define   FSTS_FRI_MASK 0xFF
+#define   FSTS_FRI_SHIFT 8
+#define   FSTS_PRO (1LL << 7)
+#define   FSTS_ITE (1LL << 6)
+#define   FSTS_ICE (1LL << 5)
+#define   FSTS_IQE (1LL << 4)
+#define   FSTS_APF (1LL << 3)
+#define   FSTS_APO (1LL << 2)
+#define   FSTS_PPF (1LL << 1)
+#define   FSTS_PFO (1LL << 0)
+
+#define DMAR_FECTL_REG 0x38 /* 32:Fault control register */
+#define   FECTL_IM (1LL << 31)
+#define   FECTL_IP (1LL << 30)
+
+#define FRCD_HI_F (1LL << (127-64))
+#define FRCD_HI_T (1LL << (126-64))
+#define FRCD_HI_AT_MASK 0x3
+#define FRCD_HI_AT_SHIFT (124-64)
+#define FRCD_HI_PV_MASK 0xFFFFF
+#define FRCD_HI_PV_SHIFT (104-64)
+#define FRCD_HI_FR_MASK 0xFF
+#define FRCD_HI_FR_SHIFT (96-64)
+#define FRCD_HI_PP (1LL << (95-64))
+
+#define FRCD_HI_SID_MASK 0xFF
+#define FRCD_HI_SID_SHIFT 0
+#define FRCD_HI_BUS_SHIFT 8
+#define FRCD_HI_BUS_MASK 0xFF
+#define FRCD_HI_DEV_SHIFT 3
+#define FRCD_HI_DEV_MASK 0x1F
+#define FRCD_HI_FUN_SHIFT 0
+#define FRCD_HI_FUN_MASK 0x7
+
+#define DMAR_IOTLB_REG(x) (ecap_iro((x)->ecap) + 8)
+#define DMAR_IVA_REG(x) (ecap_iro((x)->ecap) + 0)
+
+#define DMAR_FRIH_REG(x,i) (cap_fro((x)->cap) + 16*(i) + 8)
+#define DMAR_FRIL_REG(x,i) (cap_fro((x)->cap) + 16*(i) + 0)
+
+#define IOTLB_IVT (1LL << 63)
+#define IOTLB_IIRG_MASK 0x3
+#define IOTLB_IIRG_SHIFT 60
+#define IOTLB_IIRG(x) ((uint64_t)(x) << IOTLB_IIRG_SHIFT)
+#define IOTLB_IAIG_MASK 0x3
+#define IOTLB_IAIG_SHIFT 57
+#define IOTLB_DR (1LL << 49)
+#define IOTLB_DW (1LL << 48)
+#define IOTLB_DID_MASK 0xFFFF
+#define IOTLB_DID_SHIFT 32
+#define IOTLB_DID(x) ((uint64_t)(x) << IOTLB_DID_SHIFT)
+
+#define IIG_GLOBAL IOTLB_IIRG(IOTLB_GLOBAL)
+#define IIG_DOMAIN IOTLB_IIRG(IOTLB_DOMAIN)
+#define IIG_PAGE IOTLB_IIRG(IOTLB_PAGE)
+
+#define DMAR_IQH_REG 0x80 /* 64:Invalidation queue head register */
+#define DMAR_IQT_REG 0x88 /* 64:Invalidation queue tail register */
+#define DMAR_IQA_REG 0x90 /* 64:Invalidation queue addr register */
+#define IQA_QS_256 0 /* 256 entries */
+#define IQA_QS_512 1 /* 512 */
+#define IQA_QS_1K 2 /* 1024 */
+#define IQA_QS_2K 3 /* 2048 */
+#define IQA_QS_4K 4 /* 4096 */
+#define IQA_QS_8K 5 /* 8192 */
+#define IQA_QS_16K 6 /* 16384 */
+#define IQA_QS_32K 7 /* 32768 */
+
+/* Read-Modify-Write helpers */
+static inline void iommu_rmw32(void *ov, uint32_t mask, uint32_t shift, uint32_t nv)
+{
+ *(uint32_t *)ov &= ~(mask << shift);
+ *(uint32_t *)ov |= (nv & mask) << shift;
+}
+static inline void iommu_rmw64(void *ov, uint32_t mask, uint32_t shift, uint64_t nv)
+{
+ *(uint64_t *)ov &= ~(mask << shift);
+ *(uint64_t *)ov |= (nv & mask) << shift;
+}
+
+/*
+ * Root Entry: one per bus (256 x 128 bit = 4k)
+ *   0        = Present
+ *   1:11     = Reserved
+ *   12:HAW-1 = Context Table Pointer
+ *   HAW:63   = Reserved
+ *   64:127   = Reserved
+ */
+#define ROOT_P (1L << 0)
+struct root_entry {
+ uint64_t lo;
+ uint64_t hi;
+};
+
+/* Check if root entry is valid */
+static inline bool
+root_entry_is_valid(struct root_entry *re)
+{
+ return (re->lo & ROOT_P);
+}
+
+/*
+ * Context Entry: one per devfn (256 x 128 bit = 4k)
+ *   0      = Present
+ *   1      = Fault Processing Disable
+ *   2:3    = Translation Type
+ *   4:11   = Reserved
+ *   12:63  = Second Level Page Translation
+ *   64:66  = Address Width (# PTE levels)
+ *   67:70  = Ignore
+ *   71     = Reserved
+ *   72:87  = Domain ID
+ *   88:127 = Reserved
+ */
+#define CTX_P (1L << 0)
+#define CTX_FPD (1L << 1)
+#define CTX_T_MASK 0x3
+#define CTX_T_SHIFT 2
+enum {
+ CTX_T_MULTI,
+ CTX_T_IOTLB,
+ CTX_T_PASSTHRU
+};
+
+#define CTX_H_AW_MASK 0x7
+#define CTX_H_AW_SHIFT 0
+#define CTX_H_USER_MASK 0xF
+#define CTX_H_USER_SHIFT 3
+#define CTX_H_DID_MASK 0xFFFF
+#define CTX_H_DID_SHIFT 8
+
+struct context_entry {
+ uint64_t lo;
+ uint64_t hi;
+};
+
+/* Set fault processing enable/disable */
+static inline void
+context_set_fpd(struct context_entry *ce, int enable)
+{
+ ce->lo &= ~CTX_FPD;
+ if (enable)
+ ce->lo |= CTX_FPD;
+}
+
+/* Set context entry present */
+static inline void
+context_set_present(struct context_entry *ce)
+{
+ ce->lo |= CTX_P;
+}
+
+/* Set Second Level Page Table Entry PA */
+static inline void
+context_set_slpte(struct context_entry *ce, paddr_t slpte)
+{
+ ce->lo &= VTD_PAGE_MASK;
+ ce->lo |= (slpte & ~VTD_PAGE_MASK);
+}
+
+/* Set translation type */
+static inline void
+context_set_translation_type(struct context_entry *ce, int tt)
+{
+ ce->lo &= ~(CTX_T_MASK << CTX_T_SHIFT);
+ ce->lo |= ((tt & CTX_T_MASK) << CTX_T_SHIFT);
+}
+
+/* Set Address Width (# of Page Table levels) */
+static inline void
+context_set_address_width(struct context_entry *ce, int lvl)
+{
+ ce->hi &= ~(CTX_H_AW_MASK << CTX_H_AW_SHIFT);
+ ce->hi |= ((lvl & CTX_H_AW_MASK) << CTX_H_AW_SHIFT);
+}
+
+/* Set domain ID */
+static inline void
+context_set_domain_id(struct context_entry *ce, int did)
+{
+ ce->hi &= ~(CTX_H_DID_MASK << CTX_H_DID_SHIFT);
+ ce->hi |= ((did & CTX_H_DID_MASK) << CTX_H_DID_SHIFT);
+}
+
+/* Get Second Level Page Table PA */
+static inline uint64_t
+context_pte(struct context_entry *ce)
+{
+ return (ce->lo & ~VTD_PAGE_MASK);
+}
+
+/* Get translation type */
+static inline int
+context_translation_type(struct context_entry *ce)
+{
+ return (ce->lo >> CTX_T_SHIFT) & CTX_T_MASK;
+}
+
+/* Get domain ID */
+static inline int
+context_domain_id(struct context_entry *ce)
+{
+ return (ce->hi >> CTX_H_DID_SHIFT) & CTX_H_DID_MASK;
+}
+
+/* Get Address Width */
+static inline int
+context_address_width(struct context_entry *ce)
+{
+ return VTD_LEVELTOAW((ce->hi >> CTX_H_AW_SHIFT) & CTX_H_AW_MASK);
+}
+
+/* Check if context entry is valid */
+static inline bool
+context_entry_is_valid(struct context_entry *ce)
+{
+ return (ce->lo & CTX_P);
+}
+
+/* User-available bits in context entry */
+static inline int
+context_user(struct context_entry *ce)
+{
+ return (ce->hi >> CTX_H_USER_SHIFT) & CTX_H_USER_MASK;
+}
+
+static inline void
+context_set_user(struct context_entry *ce, int v)
+{
+ ce->hi &= ~(CTX_H_USER_MASK << CTX_H_USER_SHIFT);
+ ce->hi |=  ((v & CTX_H_USER_MASK) << CTX_H_USER_SHIFT);
+}
+
+/*
+ * Fault entry
+ *   0..HAW-1 = Fault address
+ *   HAW:63   = Reserved
+ *   64:71    = Source ID
+ *   96:103   = Fault Reason
+ *   104:123  = PV
+ *   124:125  = Address Translation type
+ *   126      = Type (0 = Read, 1 = Write)
+ *   127      = Fault bit
+ */
+struct fault_entry
+{
+ uint64_t lo;
+ uint64_t hi;
+};
+
+/* PTE Entry: 512 x 64-bit = 4k */
+#define PTE_P (1L << 0)
+#define PTE_R 0x00
+#define PTE_W (1L << 1)
+#define PTE_US  (1L << 2)
+#define PTE_PWT (1L << 3)
+#define PTE_PCD (1L << 4)
+#define PTE_A   (1L << 5)
+#define PTE_D   (1L << 6)
+#define PTE_PAT (1L << 7)
+#define PTE_G   (1L << 8)
+#define PTE_EA  (1L << 10)
+#define PTE_XD  (1LL << 63)
+
+/* PDE Level entry */
+#define PTE_PS  (1L << 7)
+
+/* PDPE Level entry */
+
+/* ----------------------------------------------------------------
+ * 5555555444444444333333333222222222111111111000000000------------
+ * [PML4 ->] PDPE.1GB
+ * [PML4 ->] PDPE.PDE -> PDE.2MB
+ * [PML4 ->] PDPE.PDE -> PDE -> PTE
+ * GAW0 = (12.20) (PTE)
+ * GAW1 = (21.29) (PDE)
+ * GAW2 = (30.38) (PDPE)
+ * GAW3 = (39.47) (PML4)
+ * GAW4 = (48.57) (n/a)
+ * GAW5 = (58.63) (n/a)
+ */
+struct pte_entry {
+ uint64_t val;
+};
+
+/*
+ * Queued Invalidation entry
+ *  0:3   = 01h
+ *  4:5   = Granularity
+ *  6:15  = Reserved
+ *  16:31 = Domain ID
+ *  32:47 = Source ID
+ *  48:49 = FM
+ */
+
+/* Invalidate Context Entry */
+#define QI_CTX_DID_MASK 0xFFFF
+#define QI_CTX_DID_SHIFT 16
+#define QI_CTX_SID_MASK 0xFFFF
+#define QI_CTX_SID_SHIFT 32
+#define QI_CTX_FM_MASK 0x3
+#define QI_CTX_FM_SHIFT 48
+#define QI_CTX_IG_MASK 0x3
+#define QI_CTX_IG_SHIFT 4
+#define QI_CTX_DID(x) (((uint64_t)(x) << QI_CTX_DID_SHIFT))
+#define QI_CTX_SID(x) (((uint64_t)(x) << QI_CTX_SID_SHIFT))
+#define QI_CTX_FM(x) (((uint64_t)(x) << QI_CTX_FM_SHIFT))
+
+#define QI_CTX_IG_GLOBAL (CTX_GLOBAL << QI_CTX_IG_SHIFT)
+#define QI_CTX_IG_DOMAIN (CTX_DOMAIN << QI_CTX_IG_SHIFT)
+#define QI_CTX_IG_DEVICE (CTX_DEVICE << QI_CTX_IG_SHIFT)
+
+/* Invalidate IOTLB Entry */
+#define QI_IOTLB_DID_MASK 0xFFFF
+#define QI_IOTLB_DID_SHIFT 16
+#define QI_IOTLB_IG_MASK 0x3
+#define QI_IOTLB_IG_SHIFT 4
+#define QI_IOTLB_DR (1LL << 6)
+#define QI_IOTLB_DW (1LL << 5)
+#define QI_IOTLB_DID(x) (((uint64_t)(x) << QI_IOTLB_DID_SHIFT))
+
+#define QI_IOTLB_IG_GLOBAL (1 << QI_IOTLB_IG_SHIFT)
+#define QI_IOTLB_IG_DOMAIN (2 << QI_IOTLB_IG_SHIFT)
+#define QI_IOTLB_IG_PAGE (3 << QI_IOTLB_IG_SHIFT)
+
+/* QI Commands */
+#define QI_CTX 0x1
+#define QI_IOTLB 0x2
+#define QI_DEVTLB 0x3
+#define QI_INTR 0x4
+#define QI_WAIT 0x5
+#define QI_EXTTLB 0x6
+#define QI_PAS 0x7
+#define QI_EXTDEV 0x8
+
+struct qi_entry {
+ uint64_t lo;
+ uint64_t hi;
+};
+
+enum {
+ CTX_GLOBAL = 1,
+ CTX_DOMAIN,
+ CTX_DEVICE,
+
+ IOTLB_GLOBAL = 1,
+ IOTLB_DOMAIN,
+ IOTLB_PAGE,
+};
+
+enum {
+ VTD_FAULT_ROOT_P = 0x1,         /* P field in root entry is 0 */
+ VTD_FAULT_CTX_P = 0x2,          /* P field in context entry is 0 */
+ VTD_FAULT_CTX_INVAL = 0x3,      /* context AW/TT/SLPPTR invalid */
+ VTD_FAULT_LIMIT = 0x4,          /* Address is outside of MGAW */
+ VTD_FAULT_WRITE = 0x5,          /* Address-translation fault, non-writable */
+ VTD_FAULT_READ = 0x6,           /* Address-translation fault, non-readable */
+ VTD_FAULT_PTE_INVAL = 0x7,      /* page table hw access error */
+ VTD_FAULT_ROOT_INVAL = 0x8,     /* root table hw access error */
+ VTD_FAULT_CTX_TBL_INVAL = 0x9,  /* context entry hw access error */
+ VTD_FAULT_ROOT_RESERVED = 0xa,  /* non-zero reserved field in root entry */
+ VTD_FAULT_CTX_RESERVED = 0xb,   /* non-zero reserved field in context entry */
+ VTD_FAULT_PTE_RESERVED = 0xc,   /* non-zero reserved field in paging entry */
+ VTD_FAULT_CTX_TT = 0xd,         /* invalid translation type */
+};
+
+#endif
+
+void acpidmar_pci_hook(pci_chipset_tag_t, struct pci_attach_args *);
+void dmar_ptmap(bus_dma_tag_t, bus_addr_t);
+void acpidmar_sw(int);
+
+#define __EXTRACT(v,m) (((v) >> m##_SHIFT) & m##_MASK)
diff --git a/sys/dev/acpi/acpireg.h b/sys/dev/acpi/acpireg.h
index bfbb73ce2..8ba55c8ee 100644
--- a/sys/dev/acpi/acpireg.h
+++ b/sys/dev/acpi/acpireg.h
@@ -623,6 +623,9 @@ struct acpi_ivmd {
 struct acpi_ivhd {
  uint8_t type;
  uint8_t flags;
+#define IVHD_PPRSUP (1L << 7)
+#define IVHD_PREFSUP (1L << 6)
+#define IVHD_COHERENT (1L << 5)
 #define IVHD_IOTLB (1L << 4)
 #define IVHD_ISOC (1L << 3)
 #define IVHD_RESPASSPW (1L << 2)
@@ -638,13 +641,28 @@ struct acpi_ivhd {
 #define IVHD_UNITID_MASK 0x1F
 #define IVHD_MSINUM_SHIFT 0
 #define IVHD_MSINUM_MASK 0x1F
- uint32_t reserved;
+ uint32_t feature;
 } __packed;
 
+struct acpi_ivhd_ext {
+ uint8_t type;
+ uint8_t flags;
+ uint16_t length;
+ uint16_t devid;
+ uint16_t cap;
+ uint64_t address;
+ uint16_t segment;
+ uint16_t info;
+ uint32_t attrib;
+ uint64_t efr;
+ uint8_t reserved[8];
+} __packd;
+
 union acpi_ivrs_entry {
  struct {
  uint8_t type;
 #define IVRS_IVHD 0x10
+#define IVRS_IVHD_EXT 0x11
 #define IVRS_IVMD_ALL 0x20
 #define IVRS_IVMD_SPECIFIED 0x21
 #define IVRS_IVMD_RANGE 0x22
@@ -652,6 +670,7 @@ union acpi_ivrs_entry {
  uint16_t length;
  } __packed;
  struct acpi_ivhd ivhd;
+ struct acpi_ivhd_ext ivhd_ext;
  struct acpi_ivmd ivmd;
 } __packed;
 
diff --git a/sys/dev/acpi/amd_iommu.h b/sys/dev/acpi/amd_iommu.h
new file mode 100644
index 000000000..db6d371aa
--- /dev/null
+++ b/sys/dev/acpi/amd_iommu.h
@@ -0,0 +1,358 @@
+/*
+ * Copyright (c) 2019 Jordan Hargrave <[hidden email]>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#ifndef __amd_iommu_h__
+#define __amd_iommu_h__
+
+#define DEV_TAB_BASE_REG 0x0000
+#define CMD_BASE_REG 0x0008
+#define EVT_BASE_REG 0x0010
+
+#define EXCL_BASE_REG 0x0020
+#define EXCL_LIMIT_REG 0x0028
+
+/* Extended Feature Register */
+#define EXTFEAT_REG 0x0030
+#define  EFR_PREFSUP (1L << 0)
+#define  EFR_PPRSUP (1L << 1)
+#define  EFR_NXSUP (1L << 3)
+#define  EFR_GTSUP (1L << 4)
+#define  EFR_IASUP (1L << 6)
+#define  EFR_GASUP (1L << 7)
+#define  EFR_HESUP (1L << 8)
+#define  EFR_PCSUP (1L << 9)
+#define  EFR_HATS_SHIFT 10
+#define  EFR_HATS_MASK 0x3
+#define  EFR_GATS_SHIFT 12
+#define  EFR_GATS_MASK 0x3
+#define  EFR_GLXSUP_SHIFT 14
+#define  EFR_GLXSUP_MASK 0x3
+#define  EFR_SMIFSUP_SHIFT 16
+#define  EFR_SMIFSUP_MASK 0x3        
+#define  EFR_SMIFRC_SHIFT 18
+#define  EFR_SMIFRC_MASK 0x7
+#define  EFR_GAMSUP_SHIFT 21
+#define  EFR_GAMSUP_MASK 0x7
+
+#define CMD_HEAD_REG 0x2000
+#define CMD_TAIL_REG 0x2008
+#define EVT_HEAD_REG 0x2010
+#define EVT_TAIL_REG 0x2018
+
+#define IOMMUSTS_REG 0x2020
+
+#define DEV_TAB_MASK 0x000FFFFFFFFFF000LL
+#define DEV_TAB_LEN 0x1FF
+
+/* IOMMU Control */
+#define IOMMUCTL_REG 0x0018
+#define  CTL_IOMMUEN (1L << 0)
+#define  CTL_HTTUNEN (1L << 1)
+#define  CTL_EVENTLOGEN (1L << 2)
+#define  CTL_EVENTINTEN (1L << 3)
+#define  CTL_COMWAITINTEN (1L << 4)
+#define  CTL_INVTIMEOUT_SHIFT 5
+#define  CTL_INVTIMEOUT_MASK   0x7
+#define  CTL_INVTIMEOUT_NONE 0
+#define  CTL_INVTIMEOUT_1MS     1
+#define  CTL_INVTIMEOUT_10MS    2
+#define  CTL_INVTIMEOUT_100MS   3
+#define  CTL_INVTIMEOUT_1S      4
+#define  CTL_INVTIMEOUT_10S     5
+#define  CTL_INVTIMEOUT_100S    6
+#define  CTL_PASSPW (1L << 8)
+#define  CTL_RESPASSPW (1L << 9)
+#define  CTL_COHERENT (1L << 10)
+#define  CTL_ISOC (1L << 11)
+#define  CTL_CMDBUFEN (1L << 12)
+#define  CTL_PPRLOGEN (1L << 13)
+#define  CTL_PPRINTEN (1L << 14)
+#define  CTL_PPREN (1L << 15)
+#define  CTL_GTEN (1L << 16)
+#define  CTL_GAEN (1L << 17)
+#define  CTL_CRW_SHIFT 18
+#define  CTL_CRW_MASK 0xF
+#define  CTL_SMIFEN (1L << 22)
+#define  CTL_SLFWBDIS (1L << 23)
+#define  CTL_SMIFLOGEN (1L << 24)
+#define  CTL_GAMEN_SHIFT 25
+#define  CTL_GAMEN_MASK 0x7
+#define  CTL_GALOGEN (1L << 28)
+#define  CTL_GAINTEN (1L << 29)
+#define  CTL_DUALPPRLOGEN_SHIFT 30
+#define  CTL_DUALPPRLOGEN_MASK 0x3
+#define  CTL_DUALEVTLOGEN_SHIFT 32
+#define  CTL_DUALEVTLOGEN_MASK 0x3
+#define  CTL_DEVTBLSEGEN_SHIFT 34
+#define  CTL_DEVTBLSEGEN_MASK 0x7
+#define  CTL_PRIVABRTEN_SHIFT 37
+#define  CTL_PRIVABRTEN_MASK 0x3
+#define  CTL_PPRAUTORSPEN (1LL << 39)
+#define  CTL_MARCEN (1LL << 40)
+#define  CTL_BLKSTOPMRKEN (1LL << 41)
+#define  CTL_PPRAUTOSPAON (1LL << 42)
+#define  CTL_DOMAINIDPNE (1LL << 43)
+
+#define CMD_BASE_MASK 0x000FFFFFFFFFF000LL
+#define CMD_TBL_SIZE 4096
+#define CMD_TBL_LEN_4K (8LL << 56)
+#define CMD_TBL_LEN_8K (9lL << 56)
+
+#define EVT_BASE_MASK 0x000FFFFFFFFFF000LL
+#define EVT_TBL_SIZE 4096
+#define EVT_TBL_LEN_4K (8LL << 56)
+#define EVT_TBL_LEN_8K (9LL << 56)
+
+/*========================
+ * DEVICE TABLE ENTRY
+ * Contains mapping of bus-device-function
+ *
+ *  0       Valid (V)
+ *  1       Translation Valid (TV)
+ *  7:8     Host Address Dirty (HAD)
+ *  9:11    Page Table Depth (usually 4)
+ *  12:51   Page Table Physical Address
+ *  52      PPR Enable
+ *  53      GPRP
+ *  54      Guest I/O Protection Valid (GIoV)
+ *  55      Guest Translation Valid (GV)
+ *  56:57   Guest Levels translated (GLX)
+ *  58:60   Guest CR3 bits 12:14 (GCR3TRP)
+ *  61      I/O Read Permission (IR)
+ *  62      I/O Write Permission (IW)
+ *  64:79   Domain ID
+ *  80:95   Guest CR3 bits 15:30 (GCR3TRP)
+ *  96      IOTLB Enable (I)
+ *  97      Suppress multiple I/O page faults (I)
+ *  98      Supress all I/O page faults (SA)
+ *  99:100  Port I/O Control (IoCTL)
+ *  101     Cache IOTLB Hint
+ *  102     Snoop Disable (SD)
+ *  103     Allow Exclusion (EX)
+ *  104:105 System Management Message (SysMgt)
+ *  107:127 Guest CR3 bits 31:51 (GCR3TRP)
+ *  128     Interrupt Map Valid (IV)
+ *  129:132 Interrupt Table Length (IntTabLen)
+ *========================*/
+struct ivhd_dte {
+ uint32_t dw0;
+ uint32_t dw1;
+ uint32_t dw2;
+ uint32_t dw3;
+ uint32_t dw4;
+ uint32_t dw5;
+ uint32_t dw6;
+ uint32_t dw7;
+} __packed;
+
+#define DTE_V (1L << 0) // dw0
+#define DTE_TV (1L << 1) // dw0
+#define DTE_LEVEL_SHIFT 9 // dw0
+#define DTE_LEVEL_MASK 0x7 // dw0
+#define DTE_HPTRP_MASK 0x000FFFFFFFFFF000LL // dw0,1
+
+#define DTE_PPR (1L << 20) // dw1
+#define DTE_GPRP (1L << 21) // dw1
+#define DTE_GIOV (1L << 22) // dw1
+#define DTE_GV (1L << 23) // dw1
+#define DTE_IR (1L << 29) // dw1
+#define DTE_IW (1L << 30) // dw1
+
+#define DTE_DID_MASK 0xFFFF // dw2
+
+#define DTE_IV (1L << 0) // dw3
+#define DTE_SE (1L << 1)
+#define DTE_SA (1L << 2)
+#define DTE_INTTABLEN_SHIFT 1
+#define DTE_INTTABLEN_MASK 0xF
+#define DTE_IRTP_MASK 0x000FFFFFFFFFFFC0LL
+
+#define PTE_LVL5                48
+#define PTE_LVL4                39
+#define PTE_LVL3                30
+#define PTE_LVL2                21
+#define PTE_LVL1                12
+
+#define PTE_NXTLVL(x)           (((x) & 0x7) << 9)
+#define PTE_PADDR_MASK 0x000FFFFFFFFFF000LL
+#define PTE_IR                  (1LL << 61)
+#define PTE_IW                  (1LL << 62)
+
+#define DTE_GCR312_MASK 0x3
+#define DTE_GCR312_SHIFT 24
+
+#define DTE_GCR315_MASK 0xFFFF
+#define DTE_GCR315_SHIFT 16
+
+#define DTE_GCR331_MASK 0xFFFFF
+#define DTE_GCR331_SHIFT 12
+
+#define _get64(x)   *(uint64_t *)(x)
+#define _put64(x,v) *(uint64_t *)(x) = (v)
+
+/* Set Guest CR3 address */
+static inline void
+dte_set_guest_cr3(struct ivhd_dte *dte, paddr_t paddr)
+{
+ iommu_rmw32(&dte->dw1, DTE_GCR312_MASK, DTE_GCR312_SHIFT, paddr >> 12);
+ iommu_rmw32(&dte->dw2, DTE_GCR315_MASK, DTE_GCR315_SHIFT, paddr >> 15);
+ iommu_rmw32(&dte->dw3, DTE_GCR331_MASK, DTE_GCR331_SHIFT, paddr >> 31);
+}
+
+/* Set Interrupt Remapping Root Pointer */
+static inline void
+dte_set_interrupt_table_root_ptr(struct ivhd_dte *dte, paddr_t paddr)
+{
+ uint64_t ov = _get64(&dte->dw4);
+ _put64(&dte->dw4, (ov & ~DTE_IRTP_MASK) | (paddr & DTE_IRTP_MASK));
+}
+
+/* Set Interrupt Remapping Table length */
+static inline void
+dte_set_interrupt_table_length(struct ivhd_dte *dte, int nEnt)
+{
+ iommu_rmw32(&dte->dw4, DTE_INTTABLEN_MASK, DTE_INTTABLEN_SHIFT, nEnt);
+}
+
+/* Set Interrupt Remapping Valid */
+static inline void
+dte_set_interrupt_valid(struct ivhd_dte *dte)
+{
+ dte->dw4 |= DTE_IV;
+}
+
+/* Set Domain ID in Device Table Entry */
+static inline void
+dte_set_domain(struct ivhd_dte *dte, uint16_t did)
+{
+ dte->dw2 = (dte->dw2 & ~DTE_DID_MASK) | (did & DTE_DID_MASK);
+}
+
+/* Set Page Table Pointer for device */
+static inline void
+dte_set_host_page_table_root_ptr(struct ivhd_dte *dte, paddr_t paddr)
+{
+ uint64_t ov;
+
+ ov = _get64(&dte->dw0) & ~DTE_HPTRP_MASK;
+ ov |= (paddr & DTE_HPTRP_MASK) | PTE_IW | PTE_IR;
+
+ _put64(&dte->dw0, ov);
+}
+
+/* Set Page Table Levels Mask */
+static inline void
+dte_set_mode(struct ivhd_dte *dte, int mode)
+{
+ iommu_rmw32(&dte->dw0, DTE_LEVEL_MASK, DTE_LEVEL_SHIFT, mode);
+}
+
+static inline void
+dte_set_tv(struct ivhd_dte *dte)
+{
+ dte->dw0 |= DTE_TV;
+}
+
+/* Set Device Table Entry valid.
+ * Domain/Level/Mode/PageTable should already be set
+ */
+static inline void
+dte_set_valid(struct ivhd_dte *dte)
+{
+ dte->dw0 |= DTE_V;
+}
+
+/* Check if Device Table Entry is valid */
+static inline int
+dte_is_valid(struct ivhd_dte *dte)
+{
+ return (dte->dw0 & DTE_V);
+}
+
+/*=========================================
+ * COMMAND
+ *=========================================*/
+struct ivhd_command {
+ uint32_t dw0;
+ uint32_t dw1;
+ uint32_t dw2;
+ uint32_t dw3;
+} __packed;
+
+#define CMD_SHIFT 28
+
+enum {
+ COMPLETION_WAIT = 0x01,
+ INVALIDATE_DEVTAB_ENTRY = 0x02,
+ INVALIDATE_IOMMU_PAGES = 0x03,
+ INVALIDATE_IOTLB_PAGES = 0x04,
+ INVALIDATE_INTERRUPT_TABLE = 0x05,
+ PREFETCH_IOMMU_PAGES = 0x06,
+ COMPLETE_PPR_REQUEST = 0x07,
+ INVALIDATE_IOMMU_ALL = 0x08,
+};
+
+/*=========================================
+ * EVENT
+ *=========================================*/
+struct ivhd_event {
+ uint32_t dw0;
+ uint32_t dw1;
+ uint32_t dw2;   // address.lo
+ uint32_t dw3; // address.hi
+} __packed;
+
+#define EVT_TYPE_SHIFT 28       // dw1.0xF0000000
+#define EVT_TYPE_MASK 0xF
+#define EVT_SID_SHIFT 0        // dw0.0x0000FFFF
+#define EVT_SID_MASK 0xFFFF
+#define EVT_DID_SHIFT 0
+#define EVT_DID_MASK 0xFFFF   // dw1.0x0000FFFF
+#define EVT_FLAG_SHIFT   16
+#define EVT_FLAG_MASK   0xFFF    // dw1.0x0FFF0000
+
+/* IOMMU Fault reasons */
+enum {
+ ILLEGAL_DEV_TABLE_ENTRY = 0x1,
+ IO_PAGE_FAULT = 0x2,
+ DEV_TAB_HARDWARE_ERROR = 0x3,
+ PAGE_TAB_HARDWARE_ERROR = 0x4,
+ ILLEGAL_COMMAND_ERROR = 0x5,
+ COMMAND_HARDWARE_ERROR = 0x6,
+ IOTLB_INV_TIMEOUT = 0x7,
+ INVALID_DEVICE_REQUEST = 0x8,
+};
+
+#define EVT_GN (1L << 16)
+#define EVT_NX (1L << 17)
+#define EVT_US (1L << 18)
+#define EVT_I (1L << 19)
+#define EVT_PR (1L << 20)
+#define EVT_RW (1L << 21)
+#define EVT_PE (1L << 22)
+#define EVT_RZ (1L << 23)
+#define EVT_TR (1L << 24)
+
+struct iommu_softc;
+
+int ivhd_flush_devtab(struct iommu_softc *, int);
+int ivhd_invalidate_iommu_all(struct iommu_softc *);
+int ivhd_invalidate_interrupt_table(struct iommu_softc *, int);
+int ivhd_issue_command(struct iommu_softc *, const struct ivhd_command *, int);
+int ivhd_invalidate_domain(struct iommu_softc *, int);
+
+void _dumppte(struct pte_entry *, int, vaddr_t);
+
+#endif
diff --git a/sys/dev/acpi/files.acpi b/sys/dev/acpi/files.acpi
index e57c39938..1cf6f2fbb 100644
--- a/sys/dev/acpi/files.acpi
+++ b/sys/dev/acpi/files.acpi
@@ -70,6 +70,11 @@ device acpiprt
 attach acpiprt at acpi
 file dev/acpi/acpiprt.c acpiprt needs-flag
 
+# DMAR device
+device acpidmar
+attach acpidmar at acpi
+file dev/acpi/acpidmar.c acpidmar
+
 # Docking station
 device acpidock
 attach acpidock at acpi

Reply | Threaded
Open this post in threaded view
|

Re: [PATCH] Add IOMMU support for Intel VT-d and AMD-Vi

Daniel Dickman
> [PATCH] Add IOMMU support for Intel VT-d and AMD Vi
>
> This hooks each pci device and overrides bus_dmamap_xxx to issue
> remap of DMA requests to virtual DMA space.  It protects devices
> from issuing I/O requests to memory in the system that is outside
> the requested DMA space.

Hi Jordan, thanks for working on this. I would like to see iommu
support...

> + uint64_t efr;
> + uint8_t reserved[8];
> +} __packd;

...that being said, is the above a typo?

Reply | Threaded
Open this post in threaded view
|

Re: [PATCH] Add IOMMU support for Intel VT-d and AMD-Vi

Jordan Hargrave
Oh good catch thanks. Weird, it does compile!

________________________________
From: Daniel Dickman <[hidden email]>
Sent: Tuesday, September 1, 2020 11:23 PM
To: Jordan Hargrave <[hidden email]>
Cc: [hidden email] <[hidden email]>
Subject: Re: [PATCH] Add IOMMU support for Intel VT-d and AMD-Vi

> [PATCH] Add IOMMU support for Intel VT-d and AMD Vi
>
> This hooks each pci device and overrides bus_dmamap_xxx to issue
> remap of DMA requests to virtual DMA space.  It protects devices
> from issuing I/O requests to memory in the system that is outside
> the requested DMA space.

Hi Jordan, thanks for working on this. I would like to see iommu
support...

> +     uint64_t        efr;
> +     uint8_t         reserved[8];
> +} __packd;

...that being said, is the above a typo?
Reply | Threaded
Open this post in threaded view
|

Re: [PATCH] Add IOMMU support for Intel VT-d and AMD-Vi

Mark Kettenis
In reply to this post by Jordan Hargrave
> Date: Tue, 1 Sep 2020 17:20:19 -0500
> From: Jordan Hargrave <[hidden email]>
>
> [PATCH] Add IOMMU support for Intel VT-d and AMD Vi
>
> This hooks each pci device and overrides bus_dmamap_xxx to issue
> remap of DMA requests to virtual DMA space.  It protects devices
> from issuing I/O requests to memory in the system that is outside
> the requested DMA space.
> ---
>  sys/arch/amd64/conf/GENERIC          |    1 +
>  sys/arch/amd64/conf/RAMDISK          |    1 +
>  sys/arch/amd64/conf/RAMDISK_CD       |    1 +
>  sys/arch/amd64/include/pci_machdep.h |    3 +-
>  sys/arch/amd64/pci/pci_machdep.c     |   15 +-
>  sys/dev/acpi/acpi.c                  |    5 +
>  sys/dev/acpi/acpidmar.c              | 2988 ++++++++++++++++++++++++++
>  sys/dev/acpi/acpidmar.h              |  534 +++++
>  sys/dev/acpi/acpireg.h               |   21 +-
>  sys/dev/acpi/amd_iommu.h             |  358 +++
>  sys/dev/acpi/files.acpi              |    5 +
>  sys/dev/pci/pci.c                    |   28 +
>  sys/dev/pci/pcivar.h                 |    2 +
>  13 files changed, 3959 insertions(+), 3 deletions(-)
>  create mode 100644 sys/dev/acpi/acpidmar.c
>  create mode 100644 sys/dev/acpi/acpidmar.h
>  create mode 100644 sys/dev/acpi/amd_iommu.h

This needs some further cleanup and style love.  But let's leave that
alone for now.

How much of this code is really shared between DMAR and IVRS?  It
would be nice to split it out between those two if we can avoid code
duplication.

iommu_writel(), iommu_readl(), iommu_writeq() etc., are a bit too
Linuxy; iommu_write_4(), iommu_read_4(), iommu_write_8() would be
better names.

I don't fully grasp why you need acpidmar_intr_establish().  I can see
that MSI interrupts from devices behind the IOMMU need to go through
the IOMMU since they're essentially memory transaction.  But your code
seems to only deal with the IOMMU's error interrupt.  Does the IOMMU
interrupt itself go through the IOMMU as well?

Why do you need to explicitly call acpidmar_sw()?  Naively I would
think that you need to call this fairly late, but you call it before
config_suspend_all(DVACT_SUSPEND) happens.  Is there a reason why this
can't happen as part of normal config_suspend_all(DVACT_SUSPEND)
processing?

I think the way you use pci_probe_device_hook() is fine.

What is the point of having function that start with an underscore?
Feels like another Linux-ism to me...

A few more random things in the code below...


> diff --git a/sys/arch/amd64/conf/GENERIC b/sys/arch/amd64/conf/GENERIC
> index 2c49f91a1..1eda12bc9 100644
> --- a/sys/arch/amd64/conf/GENERIC
> +++ b/sys/arch/amd64/conf/GENERIC
> @@ -45,6 +45,7 @@ acpibtn* at acpi?
>  acpicpu* at acpi?
>  acpicmos* at acpi?
>  acpidock* at acpi?
> +acpidmar0 at acpi?
>  acpiec* at acpi?
>  acpipci* at acpi?
>  acpiprt* at acpi?
> diff --git a/sys/arch/amd64/conf/RAMDISK b/sys/arch/amd64/conf/RAMDISK
> index 10148add1..7ab48f32e 100644
> --- a/sys/arch/amd64/conf/RAMDISK
> +++ b/sys/arch/amd64/conf/RAMDISK
> @@ -34,6 +34,7 @@ acpipci* at acpi?
>  acpiprt* at acpi?
>  acpimadt0 at acpi?
>  #acpitz* at acpi?
> +acpidmar* at acpi? disable
>  
>  mpbios0 at bios0
>  
> diff --git a/sys/arch/amd64/conf/RAMDISK_CD b/sys/arch/amd64/conf/RAMDISK_CD
> index 91022751e..82a24e210 100644
> --- a/sys/arch/amd64/conf/RAMDISK_CD
> +++ b/sys/arch/amd64/conf/RAMDISK_CD
> @@ -48,6 +48,7 @@ sdhc* at acpi?
>  acpihve* at acpi?
>  chvgpio*        at acpi?
>  glkgpio* at acpi?
> +acpidmar* at acpi? disable
>  
>  mpbios0 at bios0
>  
> diff --git a/sys/arch/amd64/include/pci_machdep.h b/sys/arch/amd64/include/pci_machdep.h
> index bc295cc22..c725bdc73 100644
> --- a/sys/arch/amd64/include/pci_machdep.h
> +++ b/sys/arch/amd64/include/pci_machdep.h
> @@ -91,7 +91,8 @@ void *pci_intr_establish_cpu(pci_chipset_tag_t, pci_intr_handle_t,
>      int, struct cpu_info *,
>      int (*)(void *), void *, const char *);
>  void pci_intr_disestablish(pci_chipset_tag_t, void *);
> -#define pci_probe_device_hook(c, a) (0)
> +int pci_probe_device_hook(pci_chipset_tag_t,
> +    struct pci_attach_args *);
>  
>  void pci_dev_postattach(struct device *, struct pci_attach_args *);
>  
> diff --git a/sys/arch/amd64/pci/pci_machdep.c b/sys/arch/amd64/pci/pci_machdep.c
> index cf4e835de..b700946a4 100644
> --- a/sys/arch/amd64/pci/pci_machdep.c
> +++ b/sys/arch/amd64/pci/pci_machdep.c
> @@ -89,6 +89,11 @@
>  #include <machine/mpbiosvar.h>
>  #endif
>  
> +#include "acpi.h"
> +#if NACPI > 0
> +#include <dev/acpi/acpidmar.h>
> +#endif
> +
>  /*
>   * Memory Mapped Configuration space access.
>   *
> @@ -797,7 +802,15 @@ pci_init_extents(void)
>   }
>  }
>  
> -#include "acpi.h"
> +int
> +pci_probe_device_hook(pci_chipset_tag_t pc, struct pci_attach_args *pa)
> +{
> +#if NACPI > 0
> + acpidmar_pci_hook(pc, pa);
> +#endif
> + return 0;
> +}
> +
>  #if NACPI > 0
>  void acpi_pci_match(struct device *, struct pci_attach_args *);
>  pcireg_t acpi_pci_min_powerstate(pci_chipset_tag_t, pcitag_t);
> diff --git a/sys/dev/acpi/acpi.c b/sys/dev/acpi/acpi.c
> index a6239198e..ea11483ad 100644
> --- a/sys/dev/acpi/acpi.c
> +++ b/sys/dev/acpi/acpi.c
> @@ -49,6 +49,7 @@
>  #include <dev/acpi/amltypes.h>
>  #include <dev/acpi/acpidev.h>
>  #include <dev/acpi/dsdt.h>
> +#include <dev/acpi/acpidmar.h>
>  #include <dev/wscons/wsdisplayvar.h>
>  
>  #include <dev/pci/pcidevs.h>
> @@ -2448,6 +2449,8 @@ acpi_sleep_pm(struct acpi_softc *sc, int state)
>      sc->sc_fadt->pm2_cnt_blk && sc->sc_fadt->pm2_cnt_len)
>   acpi_write_pmreg(sc, ACPIREG_PM2_CNT, 0, ACPI_PM2_ARB_DIS);
>  
> + acpidmar_sw(DVACT_SUSPEND);
> +
>   /* Write SLP_TYPx values */
>   rega = acpi_read_pmreg(sc, ACPIREG_PM1A_CNT, 0);
>   regb = acpi_read_pmreg(sc, ACPIREG_PM1B_CNT, 0);
> @@ -2483,6 +2486,8 @@ acpi_resume_pm(struct acpi_softc *sc, int fromstate)
>  {
>   uint16_t rega, regb, en;
>  
> + acpidmar_sw(DVACT_RESUME);
> +
>   /* Write SLP_TYPx values */
>   rega = acpi_read_pmreg(sc, ACPIREG_PM1A_CNT, 0);
>   regb = acpi_read_pmreg(sc, ACPIREG_PM1B_CNT, 0);
> diff --git a/sys/dev/acpi/acpidmar.c b/sys/dev/acpi/acpidmar.c
> new file mode 100644
> index 000000000..48506e1b1
> --- /dev/null
> +++ b/sys/dev/acpi/acpidmar.c
> @@ -0,0 +1,2988 @@
> +/*
> + * Copyright (c) 2015 Jordan Hargrave <[hidden email]>
> + *
> + * Permission to use, copy, modify, and distribute this software for any
> + * purpose with or without fee is hereby granted, provided that the above
> + * copyright notice and this permission notice appear in all copies.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
> + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
> + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
> + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
> + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
> + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
> + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
> + */
> +
> +#include <sys/param.h>
> +#include <sys/systm.h>
> +#include <sys/kernel.h>
> +#include <sys/device.h>
> +#include <sys/malloc.h>
> +#include <sys/queue.h>
> +#include <sys/types.h>
> +#include <sys/mbuf.h>
> +#include <sys/proc.h>
> +
> +#include <uvm/uvm_extern.h>
> +
> +#include <machine/apicvar.h>
> +#include <machine/biosvar.h>
> +#include <machine/cpuvar.h>
> +#include <machine/bus.h>
> +
> +#include <dev/acpi/acpireg.h>
> +#include <dev/acpi/acpivar.h>
> +#include <dev/acpi/acpidev.h>
> +#include <dev/acpi/amltypes.h>
> +#include <dev/acpi/dsdt.h>
> +
> +#include <uvm/uvm_extern.h>
> +
> +#include <machine/i8259.h>
> +#include <machine/i82093reg.h>
> +#include <machine/i82093var.h>
> +#include <machine/i82489reg.h>
> +#include <machine/i82489var.h>
> +
> +#include <machine/mpbiosvar.h>
> +
> +#include <dev/pci/pcireg.h>
> +#include <dev/pci/pcivar.h>
> +#include <dev/pci/pcidevs.h>
> +#include <dev/pci/ppbreg.h>
> +
> +#include "ioapic.h"
> +
> +#include "acpidmar.h"
> +#include "amd_iommu.h"
> +
> +#define dprintf(x...)
> +
> +#ifdef DDB
> +int acpidmar_ddb = 0;
> +#endif
> +
> +int intel_iommu_gfx_mapped = 0;
> +int force_cm = 1;
> +
> +void showahci(void *);
> +
> +/* Page Table Entry per domain */
> +struct iommu_softc;
> +
> +static inline int
> +mksid(int b, int d, int f)
> +{
> + return (b << 8) + (d << 3) + f;
> +}
> +
> +static inline int
> +sid_devfn(int sid)
> +{
> + return sid & 0xff;
> +}
> +
> +static inline int
> +sid_bus(int sid)
> +{
> + return (sid >> 8) & 0xff;
> +}
> +
> +static inline int
> +sid_dev(int sid)
> +{
> + return (sid >> 3) & 0x1f;
> +}
> +
> +static inline int
> +sid_fun(int sid)
> +{
> + return (sid >> 0) & 0x7;
> +}
> +
> +/* Page Table Entry per domain */
> +static struct ivhd_dte hwdte[65536] __aligned(PAGE_SIZE);
> +
> +/* Alias mapping */
> +#define SID_INVALID 0x80000000L
> +static uint32_t sid_flag[65536];

Can we avoid having these large arrays, or at least allocate them
dynamically?  That would also avoid the explicit alignment which is
somewhat nasty since it affects the entire kernel.

> +
> +struct domain_dev {
> + int sid;
> + int sec;
> + int sub;
> + TAILQ_ENTRY(domain_dev) link;
> +};
> +
> +struct domain {
> + struct iommu_softc *iommu;
> + int did;
> + int gaw;
> + struct pte_entry *pte;
> + paddr_t ptep;
> + struct bus_dma_tag dmat;
> + int flag;
> +
> + struct mutex            exlck;
> + char exname[32];
> + struct extent *iovamap;
> + TAILQ_HEAD(,domain_dev) devices;
> + TAILQ_ENTRY(domain) link;
> +};
> +
> +#define DOM_DEBUG 0x1
> +#define DOM_NOMAP 0x2
> +
> +struct dmar_devlist {
> + int type;
> + int bus;
> + int ndp;
> + struct acpidmar_devpath *dp;
> + TAILQ_ENTRY(dmar_devlist) link;
> +};
> +
> +TAILQ_HEAD(devlist_head, dmar_devlist);
> +
> +struct ivhd_devlist {
> + int start_id;
> + int end_id;
> + int cfg;
> + TAILQ_ENTRY(ivhd_devlist) link;
> +};
> +
> +struct rmrr_softc {
> + TAILQ_ENTRY(rmrr_softc) link;
> + struct devlist_head devices;
> + int segment;
> + uint64_t start;
> + uint64_t end;
> +};
> +
> +struct atsr_softc {
> + TAILQ_ENTRY(atsr_softc) link;
> + struct devlist_head devices;
> + int segment;
> + int flags;
> +};
> +
> +struct iommu_pic {
> + struct pic pic;
> + struct iommu_softc *iommu;
> +};
> +
> +#define IOMMU_FLAGS_CATCHALL 0x1
> +#define IOMMU_FLAGS_BAD 0x2
> +#define IOMMU_FLAGS_SUSPEND 0x4
> +
> +struct iommu_softc {
> + TAILQ_ENTRY(iommu_softc)link;
> + struct devlist_head devices;
> + int id;
> + int flags;
> + int segment;
> +
> + struct mutex reg_lock;
> +
> + bus_space_tag_t iot;
> + bus_space_handle_t ioh;
> +
> + uint64_t cap;
> + uint64_t ecap;
> + uint32_t gcmd;
> +
> + int mgaw;
> + int agaw;
> + int ndoms;
> +
> + struct root_entry *root;
> + struct context_entry *ctx[256];
> +
> + void *intr;
> + struct iommu_pic pic;
> + int fedata;
> + uint64_t feaddr;
> + uint64_t rtaddr;
> +
> + // Queued Invalidation
> + int qi_head;
> + int qi_tail;
> + paddr_t qip;
> + struct qi_entry *qi;
> +
> + struct domain *unity;
> + TAILQ_HEAD(,domain) domains;
> +
> + // AMD iommu
> + struct ivhd_dte         *dte;
> + void *cmd_tbl;
> + void *evt_tbl;
> + paddr_t cmd_tblp;
> + paddr_t evt_tblp;
> + uint64_t wv[128] __aligned(4096);

This wv array isn't used as far as I can tell.

> +};
> +
> +static inline int iommu_bad(struct iommu_softc *sc)
> +{
> + return (sc->flags & IOMMU_FLAGS_BAD);
> +}
> +
> +static inline int iommu_enabled(struct iommu_softc *sc)
> +{
> + if (sc->dte) {
> + return 1;
> + }
> + return (sc->gcmd & GCMD_TE);
> +}
> +
> +struct acpidmar_softc {
> + struct device sc_dev;
> +
> + pci_chipset_tag_t sc_pc;
> + bus_space_tag_t sc_memt;
> + int sc_haw;
> + int sc_flags;
> +
> + TAILQ_HEAD(,iommu_softc)sc_drhds;
> + TAILQ_HEAD(,rmrr_softc) sc_rmrrs;
> + TAILQ_HEAD(,atsr_softc) sc_atsrs;
> +};
> +
> +int acpidmar_activate(struct device *, int);
> +int acpidmar_match(struct device *, void *, void *);
> +void acpidmar_attach(struct device *, struct device *, void *);
> +struct domain   *acpidmar_pci_attach(struct acpidmar_softc *, int, int, int);
> +
> +struct cfattach acpidmar_ca = {
> + sizeof(struct acpidmar_softc), acpidmar_match, acpidmar_attach,
> +};
> +
> +struct cfdriver acpidmar_cd = {
> + NULL, "acpidmar", DV_DULL
> +};
> +
> +struct acpidmar_softc *acpidmar_sc;
> +int acpidmar_intr(void *);
> +int acpiivhd_intr(void *);
> +
> +#define DID_UNITY 0x1
> +
> +void _dumppte(struct pte_entry *, int, vaddr_t);
> +
> +struct domain *domain_create(struct iommu_softc *, int);
> +struct domain *domain_lookup(struct acpidmar_softc *, int, int);
> +
> +void domain_unload_map(struct domain *, bus_dmamap_t);
> +void domain_load_map(struct domain *, bus_dmamap_t, int, int, const char *);
> +
> +void (*domain_map_page)(struct domain *, vaddr_t, paddr_t, uint64_t);
> +void domain_map_page_amd(struct domain *, vaddr_t, paddr_t, uint64_t);
> +void domain_map_page_intel(struct domain *, vaddr_t, paddr_t, uint64_t);
> +void domain_map_pthru(struct domain *, paddr_t, paddr_t);
> +
> +void acpidmar_pci_hook(pci_chipset_tag_t, struct pci_attach_args *);
> +void acpidmar_parse_devscope(union acpidmar_entry *, int, int,
> +    struct devlist_head *);
> +int  acpidmar_match_devscope(struct devlist_head *, pci_chipset_tag_t, int);
> +
> +void acpidmar_init(struct acpidmar_softc *, struct acpi_dmar *);
> +void acpidmar_drhd(struct acpidmar_softc *, union acpidmar_entry *);
> +void acpidmar_rmrr(struct acpidmar_softc *, union acpidmar_entry *);
> +void acpidmar_atsr(struct acpidmar_softc *, union acpidmar_entry *);
> +void acpiivrs_init(struct acpidmar_softc *, struct acpi_ivrs *);
> +
> +void *acpidmar_intr_establish(void *, int, int (*)(void *), void *,
> +    const char *);
> +
> +void iommu_writel(struct iommu_softc *, int, uint32_t);
> +uint32_t iommu_readl(struct iommu_softc *, int);
> +void iommu_writeq(struct iommu_softc *, int, uint64_t);
> +uint64_t iommu_readq(struct iommu_softc *, int);
> +void iommu_showfault(struct iommu_softc *, int,
> +    struct fault_entry *);
> +void iommu_showcfg(struct iommu_softc *, int);
> +
> +int iommu_init(struct acpidmar_softc *, struct iommu_softc *,
> +    struct acpidmar_drhd *);
> +int iommu_enable_translation(struct iommu_softc *, int);
> +void iommu_enable_qi(struct iommu_softc *, int);
> +void iommu_flush_cache(struct iommu_softc *, void *, size_t);
> +void *iommu_alloc_page(struct iommu_softc *, paddr_t *);
> +void iommu_flush_write_buffer(struct iommu_softc *);
> +void iommu_issue_qi(struct iommu_softc *, struct qi_entry *);
> +
> +void iommu_flush_ctx(struct iommu_softc *, int, int, int, int);
> +void iommu_flush_ctx_qi(struct iommu_softc *, int, int, int, int);
> +void iommu_flush_tlb(struct iommu_softc *, int, int);
> +void iommu_flush_tlb_qi(struct iommu_softc *, int, int);
> +
> +void iommu_set_rtaddr(struct iommu_softc *, paddr_t);
> +
> +const char *dmar_bdf(int);
> +
> +const char *
> +dmar_bdf(int sid)
> +{
> + static char bdf[32];
> +
> + snprintf(bdf, sizeof(bdf), "%.4x:%.2x:%.2x.%x", 0,
> +    sid_bus(sid), sid_dev(sid), sid_fun(sid));
> +
> + return (bdf);
> +}
> +
> +/* busdma */
> +static int dmar_dmamap_create(bus_dma_tag_t, bus_size_t, int, bus_size_t,
> +    bus_size_t, int, bus_dmamap_t *);
> +static void dmar_dmamap_destroy(bus_dma_tag_t, bus_dmamap_t);
> +static int dmar_dmamap_load(bus_dma_tag_t, bus_dmamap_t, void *, bus_size_t,
> +    struct proc *, int);
> +static int dmar_dmamap_load_mbuf(bus_dma_tag_t, bus_dmamap_t, struct mbuf *,
> +    int);
> +static int dmar_dmamap_load_uio(bus_dma_tag_t, bus_dmamap_t, struct uio *, int);
> +static int dmar_dmamap_load_raw(bus_dma_tag_t, bus_dmamap_t,
> +    bus_dma_segment_t *, int, bus_size_t, int);
> +static void dmar_dmamap_unload(bus_dma_tag_t, bus_dmamap_t);
> +static void dmar_dmamap_sync(bus_dma_tag_t, bus_dmamap_t, bus_addr_t,
> +    bus_size_t, int);
> +static int dmar_dmamem_alloc(bus_dma_tag_t, bus_size_t, bus_size_t, bus_size_t,
> +    bus_dma_segment_t *, int, int *, int);
> +static void dmar_dmamem_free(bus_dma_tag_t, bus_dma_segment_t *, int);
> +static int dmar_dmamem_map(bus_dma_tag_t, bus_dma_segment_t *, int, size_t,
> +    caddr_t *, int);
> +static void dmar_dmamem_unmap(bus_dma_tag_t, caddr_t, size_t);
> +static paddr_t dmar_dmamem_mmap(bus_dma_tag_t, bus_dma_segment_t *, int, off_t,
> +    int, int);
> +
> +static void dmar_dumpseg(bus_dma_tag_t, int, bus_dma_segment_t *, const char *);
> +const char *dom_bdf(struct domain *dom);
> +void domain_map_check(struct domain *dom);
> +
> +struct pte_entry *pte_lvl(struct iommu_softc *iommu, struct pte_entry *npte, vaddr_t va, int shift, uint64_t flags);
> +int  ivhd_poll_events(struct iommu_softc *iommu);
> +void ivhd_showit(struct iommu_softc *);
> +void ivhd_showdte(void);
> +void ivhd_showcmd(struct iommu_softc *);
> +
> +static inline int
> +debugme(struct domain *dom)
> +{
> + return 0;
> + return (dom->flag & DOM_DEBUG);
> +}
> +
> +void
> +domain_map_check(struct domain *dom)
> +{
> + struct iommu_softc *iommu;
> + struct domain_dev *dd;
> + struct context_entry *ctx;
> + int v;
> +
> + iommu = dom->iommu;
> + TAILQ_FOREACH(dd, &dom->devices, link) {
> + acpidmar_pci_attach(acpidmar_sc, iommu->segment, dd->sid, 1);
> +
> + if (iommu->dte)
> + continue;
> +
> + /* Check if this is the first time we are mapped */
> + ctx = &iommu->ctx[sid_bus(dd->sid)][sid_devfn(dd->sid)];
> + v = context_user(ctx);
> + if (v != 0xA) {
> + printf("  map: %.4x:%.2x:%.2x.%x iommu:%d did:%.4x\n",
> +    iommu->segment,
> +    sid_bus(dd->sid),
> +    sid_dev(dd->sid),
> +    sid_fun(dd->sid),
> +    iommu->id,
> +    dom->did);
> + context_set_user(ctx, 0xA);
> + }
> + }
> +}
> +
> +/* Map a single page as passthrough - used for DRM */
> +void
> +dmar_ptmap(bus_dma_tag_t tag, bus_addr_t addr)
> +{
> + struct domain *dom = tag->_cookie;
> +
> + if (!acpidmar_sc)
> + return;
> + domain_map_check(dom);
> + domain_map_page(dom, addr, addr, PTE_P | PTE_R | PTE_W);
> +}
> +
> +/* Map a range of pages 1:1 */
> +void
> +domain_map_pthru(struct domain *dom, paddr_t start, paddr_t end)
> +{
> + domain_map_check(dom);
> + while (start < end) {
> + domain_map_page(dom, start, start, PTE_P | PTE_R | PTE_W);
> + start += VTD_PAGE_SIZE;
> + }
> +}
> +
> +/* Map a single paddr to IOMMU paddr */
> +void
> +domain_map_page_intel(struct domain *dom, vaddr_t va, paddr_t pa, uint64_t flags)
> +{
> + paddr_t paddr;
> + struct pte_entry *pte, *npte;
> + int lvl, idx;
> + struct iommu_softc *iommu;
> +
> + iommu = dom->iommu;
> + /* Insert physical address into virtual address map
> + * XXX: could we use private pmap here?
> + * essentially doing a pmap_enter(map, va, pa, prot);
> + */
> +
> + /* Only handle 4k pages for now */
> + npte = dom->pte;
> + for (lvl = iommu->agaw - VTD_STRIDE_SIZE; lvl>= VTD_LEVEL0;
> +    lvl -= VTD_STRIDE_SIZE) {
> + idx = (va >> lvl) & VTD_STRIDE_MASK;
> + pte = &npte[idx];
> + if (lvl == VTD_LEVEL0) {
> + /* Level 1: Page Table - add physical address */
> + pte->val = pa | flags;
> + iommu_flush_cache(iommu, pte, sizeof(*pte));
> + break;
> + } else if (!(pte->val & PTE_P)) {
> + /* Level N: Point to lower level table */
> + iommu_alloc_page(iommu, &paddr);
> + pte->val = paddr | PTE_P | PTE_R | PTE_W;
> + iommu_flush_cache(iommu, pte, sizeof(*pte));
> + }
> + npte = (void *)PMAP_DIRECT_MAP((pte->val & VTD_PTE_MASK));
> + }
> +}
> +
> +/* Map a single paddr to IOMMU paddr: AMD
> + * physical address breakdown into levels:
> + * xxxxxxxx.xxxxxxxx.xxxxxxxx.xxxxxxxx.xxxxxxxx.xxxxxxxx.xxxxxxxx.xxxxxxxx
> + *        5.55555555.44444444.43333333,33222222.22211111.1111----.--------
> + * mode:
> + *  000 = none   shift
> + *  001 = 1 [21].12
> + *  010 = 2 [30].21
> + *  011 = 3 [39].30
> + *  100 = 4 [48].39
> + *  101 = 5 [57]
> + *  110 = 6
> + *  111 = reserved
> + */
> +struct pte_entry *pte_lvl(struct iommu_softc *iommu, struct pte_entry *pte, vaddr_t va,
> + int shift, uint64_t flags)
> +{
> + paddr_t paddr;
> + int idx;
> +
> + idx = (va >> shift) & VTD_STRIDE_MASK;
> + if (!(pte[idx].val & PTE_P)) {
> + /* Page Table entry is not present... create a new page entry */
> + iommu_alloc_page(iommu, &paddr);
> + pte[idx].val = paddr | flags;
> + iommu_flush_cache(iommu, &pte[idx], sizeof(pte[idx]));
> + }
> + return (void *)PMAP_DIRECT_MAP((pte[idx].val & PTE_PADDR_MASK));
> +}
> +
> +void
> +domain_map_page_amd(struct domain *dom, vaddr_t va, paddr_t pa, uint64_t flags)
> +{
> + struct pte_entry *pte;
> + struct iommu_softc *iommu;
> + int idx;
> +
> + iommu = dom->iommu;
> + /* Insert physical address into virtual address map
> + * XXX: could we use private pmap here?
> + * essentially doing a pmap_enter(map, va, pa, prot);
> + */
> +
> + /* Always assume AMD levels=4                           */
> + /*        39        30        21        12              */
> + /* ---------|---------|---------|---------|------------ */
> + pte = dom->pte;
> + //pte = pte_lvl(iommu, pte, va, 39, PTE_NXTLVL(3) | PTE_IR | PTE_IW | PTE_P);
> + pte = pte_lvl(iommu, pte, va, 30, PTE_NXTLVL(2) | PTE_IR | PTE_IW | PTE_P);
> + pte = pte_lvl(iommu, pte, va, 21, PTE_NXTLVL(1) | PTE_IR | PTE_IW | PTE_P);
> + //pte = pte_lvl(iommu, pte, va, 12, PTE_NXTLVL(7) | PTE_IR | PTE_IW | PTE_P);
> +
> + if (flags)
> + flags = PTE_P | PTE_R | PTE_W | PTE_IW | PTE_IR | PTE_NXTLVL(0);
> +
> + /* Level 1: Page Table - add physical address */
> + idx = (va >> 12) & 0x1FF;
> + pte[idx].val = pa | flags;
> +
> + iommu_flush_cache(iommu, pte, sizeof(*pte));
> +}
> +
> +static void
> +dmar_dumpseg(bus_dma_tag_t tag, int nseg, bus_dma_segment_t *segs,
> +    const char *lbl)
> +{
> + struct domain *dom = tag->_cookie;
> + int i;
> +
> + return;
> + if (!debugme(dom))
> + return;
> + printf("%s: %s\n", lbl, dom_bdf(dom));
> + for (i = 0; i < nseg; i++) {
> + printf("  %.16llx %.8x\n",
> +    (uint64_t)segs[i].ds_addr,
> +    (uint32_t)segs[i].ds_len);
> + }
> +}
> +
> +/* Unload mapping */
> +void
> +domain_unload_map(struct domain *dom, bus_dmamap_t dmam)
> +{
> + bus_dma_segment_t *seg;
> + paddr_t base, end, idx;
> + psize_t alen;
> + int i;
> +
> + if (iommu_bad(dom->iommu)) {
> + printf("unload map no iommu\n");
> + return;
> + }
> +
> + //acpidmar_intr(dom->iommu);
> + for (i = 0; i < dmam->dm_nsegs; i++) {
> + seg  = &dmam->dm_segs[i];
> +
> + base = trunc_page(seg->ds_addr);
> + end  = roundup(seg->ds_addr + seg->ds_len, VTD_PAGE_SIZE);
> + alen = end - base;
> +
> + if (debugme(dom)) {
> + printf("  va:%.16llx len:%x\n",
> +    (uint64_t)base, (uint32_t)alen);
> + }
> +
> + /* Clear PTE */
> + for (idx = 0; idx < alen; idx += VTD_PAGE_SIZE)
> + domain_map_page(dom, base + idx, 0, 0);
> +
> + if (dom->flag & DOM_NOMAP) {
> + //printf("%s: nomap %.16llx\n", dom_bdf(dom), (uint64_t)base);
> + continue;
> + }
> +
> + mtx_enter(&dom->exlck);
> + if (extent_free(dom->iovamap, base, alen, EX_NOWAIT)) {
> + panic("domain_unload_map: extent_free");
> + }
> + mtx_leave(&dom->exlck);
> + }
> +}
> +
> +/* map.segs[x].ds_addr is modified to IOMMU virtual PA */
> +void
> +domain_load_map(struct domain *dom, bus_dmamap_t map, int flags, int pteflag, const char *fn)
> +{
> + bus_dma_segment_t *seg;
> + struct iommu_softc *iommu;
> + paddr_t base, end, idx;
> + psize_t alen;
> + u_long res;
> + int i;
> +
> + iommu = dom->iommu;
> + if (!iommu_enabled(iommu)) {
> + /* Lazy enable translation when required */
> + if (iommu_enable_translation(iommu, 1)) {
> + return;
> + }
> + }
> + domain_map_check(dom);
> + //acpidmar_intr(iommu);
> + for (i = 0; i < map->dm_nsegs; i++) {
> + seg = &map->dm_segs[i];
> +
> + base = trunc_page(seg->ds_addr);
> + end  = roundup(seg->ds_addr + seg->ds_len, VTD_PAGE_SIZE);
> + alen = end - base;
> + res  = base;
> +
> + if (dom->flag & DOM_NOMAP) {
> + goto nomap;
> + }
> +
> + /* Allocate DMA Virtual Address */
> + mtx_enter(&dom->exlck);
> + if (extent_alloc(dom->iovamap, alen, VTD_PAGE_SIZE, 0,
> +    map->_dm_boundary, EX_NOWAIT, &res)) {
> + panic("domain_load_map: extent_alloc");
> + }
> + if (res == -1) {
> + panic("got -1 address\n");
> + }
> + mtx_leave(&dom->exlck);
> +
> + /* Reassign DMA address */
> + seg->ds_addr = res | (seg->ds_addr & VTD_PAGE_MASK);
> +nomap:
> + if (debugme(dom)) {
> + printf("  LOADMAP: %.16llx %x => %.16llx\n",
> +    (uint64_t)seg->ds_addr, (uint32_t)seg->ds_len,
> +    (uint64_t)res);
> + }
> + for (idx = 0; idx < alen; idx += VTD_PAGE_SIZE) {
> + domain_map_page(dom, res + idx, base + idx,
> +    PTE_P | pteflag);
> + }
> + }
> + if ((iommu->cap & CAP_CM) || force_cm) {
> + iommu_flush_tlb(iommu, IOTLB_DOMAIN, dom->did);
> + } else {
> + iommu_flush_write_buffer(iommu);
> + }
> +}
> +
> +const char *
> +dom_bdf(struct domain *dom)
> +{
> + struct domain_dev *dd;
> + static char mmm[48];
> +
> + dd = TAILQ_FIRST(&dom->devices);
> + snprintf(mmm, sizeof(mmm), "%s iommu:%d did:%.4x%s",
> +    dmar_bdf(dd->sid), dom->iommu->id, dom->did,
> +    dom->did == DID_UNITY ? " [unity]" : "");
> + return (mmm);
> +}
> +
> +/* Bus DMA Map functions */
> +static int
> +dmar_dmamap_create(bus_dma_tag_t tag, bus_size_t size, int nsegments,
> +    bus_size_t maxsegsz, bus_size_t boundary, int flags, bus_dmamap_t *dmamp)
> +{
> + int rc;
> +
> + rc = _bus_dmamap_create(tag, size, nsegments, maxsegsz, boundary,
> +    flags, dmamp);
> + if (!rc) {
> + dmar_dumpseg(tag, (*dmamp)->dm_nsegs, (*dmamp)->dm_segs,
> +    __FUNCTION__);
> + }
> + return (rc);
> +}
> +
> +static void
> +dmar_dmamap_destroy(bus_dma_tag_t tag, bus_dmamap_t dmam)
> +{
> + dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs, __FUNCTION__);
> + _bus_dmamap_destroy(tag, dmam);
> +}
> +
> +static int
> +dmar_dmamap_load(bus_dma_tag_t tag, bus_dmamap_t dmam, void *buf,
> +    bus_size_t buflen, struct proc *p, int flags)
> +{
> + struct domain *dom = tag->_cookie;
> + int rc;
> +
> + rc = _bus_dmamap_load(tag, dmam, buf, buflen, p, flags);
> + if (!rc) {
> + dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs,
> +    __FUNCTION__);
> + domain_load_map(dom, dmam, flags, PTE_R|PTE_W, __FUNCTION__);
> + dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs,
> +    __FUNCTION__);
> + }
> + return (rc);
> +}
> +
> +static int
> +dmar_dmamap_load_mbuf(bus_dma_tag_t tag, bus_dmamap_t dmam, struct mbuf *chain,
> +    int flags)
> +{
> + struct domain *dom = tag->_cookie;
> + int rc;
> +
> + rc = _bus_dmamap_load_mbuf(tag, dmam, chain, flags);
> + if (!rc) {
> + dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs,
> +    __FUNCTION__);
> + domain_load_map(dom, dmam, flags, PTE_R|PTE_W,__FUNCTION__);
> + dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs,
> +    __FUNCTION__);
> + }
> + return (rc);
> +}
> +
> +static int
> +dmar_dmamap_load_uio(bus_dma_tag_t tag, bus_dmamap_t dmam, struct uio *uio,
> +    int flags)
> +{
> + struct domain *dom = tag->_cookie;
> + int rc;
> +
> + rc = _bus_dmamap_load_uio(tag, dmam, uio, flags);
> + if (!rc) {
> + dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs,
> +    __FUNCTION__);
> + domain_load_map(dom, dmam, flags, PTE_R|PTE_W, __FUNCTION__);
> + dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs,
> +    __FUNCTION__);
> + }
> + return (rc);
> +}
> +
> +static int
> +dmar_dmamap_load_raw(bus_dma_tag_t tag, bus_dmamap_t dmam,
> +    bus_dma_segment_t *segs, int nsegs, bus_size_t size, int flags)
> +{
> + struct domain *dom = tag->_cookie;
> + int rc;
> +
> + rc = _bus_dmamap_load_raw(tag, dmam, segs, nsegs, size, flags);
> + if (!rc) {
> + dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs,
> +    __FUNCTION__);
> + domain_load_map(dom, dmam, flags, PTE_R|PTE_W, __FUNCTION__);
> + dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs,
> +    __FUNCTION__);
> + }
> + return (rc);
> +}
> +
> +static void
> +dmar_dmamap_unload(bus_dma_tag_t tag, bus_dmamap_t dmam)
> +{
> + struct domain *dom = tag->_cookie;
> +
> + dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs, __FUNCTION__);
> + domain_unload_map(dom, dmam);
> + _bus_dmamap_unload(tag, dmam);
> +}
> +
> +static void
> +dmar_dmamap_sync(bus_dma_tag_t tag, bus_dmamap_t dmam, bus_addr_t offset,
> +    bus_size_t len, int ops)
> +{
> +#if 0
> + struct domain *dom = tag->_cookie;
> + //int flag;
> +
> + flag = PTE_P;
> + //acpidmar_intr(dom->iommu);
> + if (ops == BUS_DMASYNC_PREREAD) {
> + /* make readable */
> + flag |= PTE_R;
> + }
> + else if (ops == BUS_DMASYNC_PREWRITE) {
> + /* make writeable */
> + flag |= PTE_W;
> + }
> + dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs, __FUNCTION__);
> +#endif
> + _bus_dmamap_sync(tag, dmam, offset, len, ops);
> +}
> +
> +static int
> +dmar_dmamem_alloc(bus_dma_tag_t tag, bus_size_t size, bus_size_t alignment,
> +    bus_size_t boundary, bus_dma_segment_t *segs, int nsegs, int *rsegs,
> +    int flags)
> +{
> + int rc;
> +
> + rc = _bus_dmamem_alloc(tag, size, alignment, boundary, segs, nsegs,
> +    rsegs, flags);
> + if (!rc) {
> + dmar_dumpseg(tag, *rsegs, segs, __FUNCTION__);
> + }
> + return (rc);
> +}
> +
> +static void
> +dmar_dmamem_free(bus_dma_tag_t tag, bus_dma_segment_t *segs, int nsegs)
> +{
> + dmar_dumpseg(tag, nsegs, segs, __FUNCTION__);
> + _bus_dmamem_free(tag, segs, nsegs);
> +}
> +
> +static int
> +dmar_dmamem_map(bus_dma_tag_t tag, bus_dma_segment_t *segs, int nsegs,
> +    size_t size, caddr_t *kvap, int flags)
> +{
> + dmar_dumpseg(tag, nsegs, segs, __FUNCTION__);
> + return (_bus_dmamem_map(tag, segs, nsegs, size, kvap, flags));
> +}
> +
> +static void
> +dmar_dmamem_unmap(bus_dma_tag_t tag, caddr_t kva, size_t size)
> +{
> + struct domain *dom = tag->_cookie;
> +
> + if (debugme(dom)) {
> + printf("dmamap_unmap: %s\n", dom_bdf(dom));
> + }
> + _bus_dmamem_unmap(tag, kva, size);
> +}
> +
> +static paddr_t
> +dmar_dmamem_mmap(bus_dma_tag_t tag, bus_dma_segment_t *segs, int nsegs,
> +    off_t off, int prot, int flags)
> +{
> + dmar_dumpseg(tag, nsegs, segs, __FUNCTION__);
> + return (_bus_dmamem_mmap(tag, segs, nsegs, off, prot, flags));
> +}
> +
> +/*===================================
> + * IOMMU code
> + *===================================*/
> +
> +/* Intel: Set Context Root Address */
> +void
> +iommu_set_rtaddr(struct iommu_softc *iommu, paddr_t paddr)
> +{
> + int i, sts;
> +
> + mtx_enter(&iommu->reg_lock);
> + iommu_writeq(iommu, DMAR_RTADDR_REG, paddr);
> + iommu_writel(iommu, DMAR_GCMD_REG, iommu->gcmd | GCMD_SRTP);
> + for (i = 0; i < 5; i++) {
> + sts = iommu_readl(iommu, DMAR_GSTS_REG);
> + if (sts & GSTS_RTPS)
> + break;
> + }
> + mtx_leave(&iommu->reg_lock);
> +
> + if (i == 5) {
> + printf("set_rtaddr fails\n");
> + }
> +}
> +
> +/* COMMON: Allocate a new memory page */
> +void *
> +iommu_alloc_page(struct iommu_softc *iommu, paddr_t *paddr)
> +{
> + void *va;
> +
> + *paddr = 0;
> + va = km_alloc(VTD_PAGE_SIZE, &kv_page, &kp_zero, &kd_nowait);
> + if (va == NULL) {
> + panic("can't allocate page\n");
> + }
> + pmap_extract(pmap_kernel(), (vaddr_t)va, paddr);
> + return (va);
> +}
> +
> +
> +/* Intel: Issue command via queued invalidation */
> +void
> +iommu_issue_qi(struct iommu_softc *iommu, struct qi_entry *qi)
> +{
> +#if 0
> + struct qi_entry *pi, *pw;
> +
> + idx = iommu->qi_head;
> + pi = &iommu->qi[idx];
> + pw = &iommu->qi[(idx+1) % MAXQ];
> + iommu->qi_head = (idx+2) % MAXQ;
> +
> + memcpy(pw, &qi, sizeof(qi));
> + issue command;
> + while (pw->xxx)
> + ;
> +#endif
> +}
> +
> +/* Intel: Flush TLB entries, Queued Invalidation mode */
> +void
> +iommu_flush_tlb_qi(struct iommu_softc *iommu, int mode, int did)
> +{
> + struct qi_entry qi;
> +
> + /* Use queued invalidation */
> + qi.hi = 0;
> + switch (mode) {
> + case IOTLB_GLOBAL:
> + qi.lo = QI_IOTLB | QI_IOTLB_IG_GLOBAL;
> + break;
> + case IOTLB_DOMAIN:
> + qi.lo = QI_IOTLB | QI_IOTLB_IG_DOMAIN |
> +    QI_IOTLB_DID(did);
> + break;
> + case IOTLB_PAGE:
> + qi.lo = QI_IOTLB | QI_IOTLB_IG_PAGE | QI_IOTLB_DID(did);
> + qi.hi = 0;
> + break;
> + }
> + if (iommu->cap & CAP_DRD)
> + qi.lo |= QI_IOTLB_DR;
> + if (iommu->cap & CAP_DWD)
> + qi.lo |= QI_IOTLB_DW;
> + iommu_issue_qi(iommu, &qi);
> +}
> +
> +/* Intel: Flush Context entries, Queued Invalidation mode */
> +void
> +iommu_flush_ctx_qi(struct iommu_softc *iommu, int mode, int did,
> +    int sid, int fm)
> +{
> + struct qi_entry qi;
> +
> + /* Use queued invalidation */
> + qi.hi = 0;
> + switch (mode) {
> + case CTX_GLOBAL:
> + qi.lo = QI_CTX | QI_CTX_IG_GLOBAL;
> + break;
> + case CTX_DOMAIN:
> + qi.lo = QI_CTX | QI_CTX_IG_DOMAIN | QI_CTX_DID(did);
> + break;
> + case CTX_DEVICE:
> + qi.lo = QI_CTX | QI_CTX_IG_DEVICE | QI_CTX_DID(did) |
> +    QI_CTX_SID(sid) | QI_CTX_FM(fm);
> + break;
> + }
> + iommu_issue_qi(iommu, &qi);
> +}
> +
> +/* Intel: Flush write buffers */
> +void
> +iommu_flush_write_buffer(struct iommu_softc *iommu)
> +{
> + int i, sts;
> +
> + if (iommu->dte)
> + return;
> + if (!(iommu->cap & CAP_RWBF))
> + return;
> + printf("writebuf\n");
> + iommu_writel(iommu, DMAR_GCMD_REG, iommu->gcmd | GCMD_WBF);
> + for (i = 0; i < 5; i++) {
> + sts = iommu_readl(iommu, DMAR_GSTS_REG);
> + if (sts & GSTS_WBFS)
> + break;
> + delay(10000);
> + }
> + if (i == 5) {
> + printf("write buffer flush fails\n");
> + }
> +}
> +
> +void
> +iommu_flush_cache(struct iommu_softc *iommu, void *addr, size_t size)
> +{
> + if (iommu->dte) {
> + pmap_flush_cache((vaddr_t)addr, size);
> + return;
> + }
> + if (!(iommu->ecap & ECAP_C))
> + pmap_flush_cache((vaddr_t)addr, size);
> +}
> +
> +/*
> + * Intel: Flush IOMMU TLB Entries
> + * Flushing can occur globally, per domain or per page
> + */
> +void
> +iommu_flush_tlb(struct iommu_softc *iommu, int mode, int did)
> +{
> + int n;
> + uint64_t val;
> +
> + /* Call AMD */
> + if (iommu->dte) {
> + ivhd_invalidate_domain(iommu, did);
> + //ivhd_poll_events(iommu);
> + return;
> + }
> + val = IOTLB_IVT;
> + switch (mode) {
> + case IOTLB_GLOBAL:
> + val |= IIG_GLOBAL;
> + break;
> + case IOTLB_DOMAIN:
> + val |= IIG_DOMAIN | IOTLB_DID(did);
> + break;
> + case IOTLB_PAGE:
> + val |= IIG_PAGE | IOTLB_DID(did);
> + break;
> + }
> +
> + /* Check for Read/Write Drain */
> + if (iommu->cap & CAP_DRD)
> + val |= IOTLB_DR;
> + if (iommu->cap & CAP_DWD)
> + val |= IOTLB_DW;
> +
> + mtx_enter(&iommu->reg_lock);
> +
> + iommu_writeq(iommu, DMAR_IOTLB_REG(iommu), val);
> + n = 0;
> + do {
> + val = iommu_readq(iommu, DMAR_IOTLB_REG(iommu));
> + } while (n++ < 5 && val & IOTLB_IVT);
> +
> + mtx_leave(&iommu->reg_lock);
> +
> +#ifdef DEBUG
> + {
> + static int rg;
> + int a, r;
> +
> + if (!rg) {
> + a = (val >> IOTLB_IAIG_SHIFT) & IOTLB_IAIG_MASK;
> + r = (val >> IOTLB_IIRG_SHIFT) & IOTLB_IIRG_MASK;
> + if (a != r) {
> + printf("TLB Requested:%d Actual:%d\n", r, a);
> + rg = 1;
> + }
> + }
> + }
> +#endif
> +}
> +
> +/* Intel: Flush IOMMU settings
> + * Flushes can occur globally, per domain, or per device
> + */
> +void
> +iommu_flush_ctx(struct iommu_softc *iommu, int mode, int did, int sid, int fm)
> +{
> + uint64_t val;
> + int n;
> +
> + if (iommu->dte)
> + return;
> + val = CCMD_ICC;
> + switch (mode) {
> + case CTX_GLOBAL:
> + val |= CIG_GLOBAL;
> + break;
> + case CTX_DOMAIN:
> + val |= CIG_DOMAIN | CCMD_DID(did);
> + break;
> + case CTX_DEVICE:
> + val |= CIG_DEVICE | CCMD_DID(did) |
> +    CCMD_SID(sid) | CCMD_FM(fm);
> + break;
> + }
> +
> + mtx_enter(&iommu->reg_lock);
> +
> + n = 0;
> + iommu_writeq(iommu, DMAR_CCMD_REG, val);
> + do {
> + val = iommu_readq(iommu, DMAR_CCMD_REG);
> + } while (n++ < 5 && val & CCMD_ICC);
> +
> + mtx_leave(&iommu->reg_lock);
> +
> +#ifdef DEBUG
> + {
> + static int rg;
> + int a, r;
> +
> + if (!rg) {
> + a = (val >> CCMD_CAIG_SHIFT) & CCMD_CAIG_MASK;
> + r = (val >> CCMD_CIRG_SHIFT) & CCMD_CIRG_MASK;
> + if (a != r) {
> + printf("CTX Requested:%d Actual:%d\n", r, a);
> + rg = 1;
> + }
> + }
> + }
> +#endif
> +}
> +
> +/* Intel: Enable Queued Invalidation */
> +void
> +iommu_enable_qi(struct iommu_softc *iommu, int enable)
> +{
> + int n = 0;
> + int sts;
> +
> + if (!(iommu->ecap & ECAP_QI))
> + return;
> +
> + if (enable) {
> + iommu->gcmd |= GCMD_QIE;
> +
> + mtx_enter(&iommu->reg_lock);
> +
> + iommu_writel(iommu, DMAR_GCMD_REG, iommu->gcmd);
> + do {
> + sts = iommu_readl(iommu, DMAR_GSTS_REG);
> + } while (n++ < 5 && !(sts & GSTS_QIES));
> +
> + mtx_leave(&iommu->reg_lock);
> +
> + printf("set.qie: %d\n", n);
> + } else {
> + iommu->gcmd &= ~GCMD_QIE;
> +
> + mtx_enter(&iommu->reg_lock);
> +
> + iommu_writel(iommu, DMAR_GCMD_REG, iommu->gcmd);
> + do {
> + sts = iommu_readl(iommu, DMAR_GSTS_REG);
> + } while (n++ < 5 && sts & GSTS_QIES);
> +
> + mtx_leave(&iommu->reg_lock);
> +
> + printf("clr.qie: %d\n", n);
> + }
> +}
> +
> +/* Intel: Enable IOMMU translation */
> +int
> +iommu_enable_translation(struct iommu_softc *iommu, int enable)
> +{
> + uint32_t sts;
> + uint64_t reg;
> + int n = 0;
> +
> + if (iommu->dte)
> + return (0);
> + reg = 0;
> + if (enable) {
> + printf("enable iommu %d\n", iommu->id);
> + iommu_showcfg(iommu, -1);
> +
> + iommu->gcmd |= GCMD_TE;
> +
> + /* Enable translation */
> + printf(" pre tes: ");
> +
> + mtx_enter(&iommu->reg_lock);
> + iommu_writel(iommu, DMAR_GCMD_REG, iommu->gcmd);
> + printf("xxx");
> + do {
> + printf("yyy");
> + sts = iommu_readl(iommu, DMAR_GSTS_REG);
> + delay(n * 10000);
> + } while (n++ < 5 && !(sts & GSTS_TES));
> + mtx_leave(&iommu->reg_lock);
> +
> + printf(" set.tes: %d\n", n);
> +
> + if (n >= 5) {
> + printf("error.. unable to initialize iommu %d\n",
> +    iommu->id);
> + iommu->flags |= IOMMU_FLAGS_BAD;
> +
> + /* Disable IOMMU */
> + iommu->gcmd &= ~GCMD_TE;
> + mtx_enter(&iommu->reg_lock);
> + iommu_writel(iommu, DMAR_GCMD_REG, iommu->gcmd);
> + mtx_leave(&iommu->reg_lock);
> +
> + return (1);
> + }
> +
> + iommu_flush_ctx(iommu, CTX_GLOBAL, 0, 0, 0);
> + iommu_flush_tlb(iommu, IOTLB_GLOBAL, 0);
> + } else {
> + iommu->gcmd &= ~GCMD_TE;
> +
> + mtx_enter(&iommu->reg_lock);
> +
> + iommu_writel(iommu, DMAR_GCMD_REG, iommu->gcmd);
> + do {
> + sts = iommu_readl(iommu, DMAR_GSTS_REG);
> + } while (n++ < 5 && sts & GSTS_TES);
> + mtx_leave(&iommu->reg_lock);
> +
> + printf(" clr.tes: %d\n", n);
> + }
> +
> + return (0);
> +}
> +
> +/* Intel: Initialize IOMMU */
> +int
> +iommu_init(struct acpidmar_softc *sc, struct iommu_softc *iommu,
> +    struct acpidmar_drhd *dh)
> +{
> + static int niommu;
> + int len = VTD_PAGE_SIZE;
> + int i, gaw;
> + uint32_t sts;
> + paddr_t paddr;
> +
> + if (_bus_space_map(sc->sc_memt, dh->address, len, 0, &iommu->ioh) != 0) {
> + return (-1);
> + }
> +
> + TAILQ_INIT(&iommu->domains);
> + iommu->id = ++niommu;
> + iommu->flags = dh->flags;
> + iommu->segment = dh->segment;
> + iommu->iot = sc->sc_memt;
> +
> + iommu->cap = iommu_readq(iommu, DMAR_CAP_REG);
> + iommu->ecap = iommu_readq(iommu, DMAR_ECAP_REG);
> + iommu->ndoms = cap_nd(iommu->cap);
> +
> + printf("  caps: %s%s%s%s%s%s%s%s%s%s%s\n",
> +    iommu->cap & CAP_AFL ? "afl " : "", // adv fault
> +    iommu->cap & CAP_RWBF ? "rwbf " : "", // write-buffer flush
> +    iommu->cap & CAP_PLMR ? "plmr " : "", // protected lo region
> +    iommu->cap & CAP_PHMR ? "phmr " : "", // protected hi region
> +    iommu->cap & CAP_CM ? "cm " : "", // caching mode
> +    iommu->cap & CAP_ZLR ? "zlr " : "", // zero-length read
> +    iommu->cap & CAP_PSI ? "psi " : "", // page invalidate
> +    iommu->cap & CAP_DWD ? "dwd " : "", // write drain
> +    iommu->cap & CAP_DRD ? "drd " : "", // read drain
> +    iommu->cap & CAP_FL1GP ? "Gb " : "", // 1Gb pages
> +    iommu->cap & CAP_PI ? "pi " : ""); // posted interrupts
> + printf("  ecap: %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
> +    iommu->ecap & ECAP_C ? "c " : "", // coherent
> +    iommu->ecap & ECAP_QI ? "qi " : "", // queued invalidate
> +    iommu->ecap & ECAP_DT ? "dt " : "", // device iotlb
> +    iommu->ecap & ECAP_IR ? "ir " : "", // intr remap
> +    iommu->ecap & ECAP_EIM ? "eim " : "", // x2apic
> +    iommu->ecap & ECAP_PT ? "pt " : "", // passthrough
> +    iommu->ecap & ECAP_SC ? "sc " : "", // snoop control
> +    iommu->ecap & ECAP_ECS ? "ecs " : "", // extended context
> +    iommu->ecap & ECAP_MTS ? "mts " : "", // memory type
> +    iommu->ecap & ECAP_NEST ? "nest " : "", // nested translations
> +    iommu->ecap & ECAP_DIS ? "dis " : "", // deferred invalidation
> +    iommu->ecap & ECAP_PASID ? "pas " : "", // pasid
> +    iommu->ecap & ECAP_PRS ? "prs " : "", // page request
> +    iommu->ecap & ECAP_ERS ? "ers " : "", // execute request
> +    iommu->ecap & ECAP_SRS ? "srs " : "", // supervisor request
> +    iommu->ecap & ECAP_NWFS ? "nwfs " : "", // no write flag
> +    iommu->ecap & ECAP_EAFS ? "eafs " : ""); // extended accessed flag
> +
> + mtx_init(&iommu->reg_lock, IPL_HIGH);
> +
> + /* Clear Interrupt Masking */
> + iommu_writel(iommu, DMAR_FSTS_REG, FSTS_PFO | FSTS_PPF);
> +
> + iommu->intr = acpidmar_intr_establish(iommu, IPL_HIGH,
> +    acpidmar_intr, iommu, "dmarintr");
> +
> + /* Enable interrupts */
> + sts = iommu_readl(iommu, DMAR_FECTL_REG);
> + iommu_writel(iommu, DMAR_FECTL_REG, sts & ~FECTL_IM);
> +
> + /* Allocate root pointer */
> + iommu->root = iommu_alloc_page(iommu, &paddr);
> +#ifdef DEBUG
> + printf("Allocated root pointer: pa:%.16llx va:%p\n",
> +    (uint64_t)paddr, iommu->root);
> +#endif
> + iommu->rtaddr = paddr;
> + iommu_flush_write_buffer(iommu);
> + iommu_set_rtaddr(iommu, paddr);
> +
> +#if 0
> + if (iommu->ecap & ECAP_QI) {
> + /* Queued Invalidation support */
> + iommu->qi = iommu_alloc_page(iommu, &iommu->qip);
> + iommu_writeq(iommu, DMAR_IQT_REG, 0);
> + iommu_writeq(iommu, DMAR_IQA_REG, iommu->qip | IQA_QS_256);
> + }
> + if (iommu->ecap & ECAP_IR) {
> + /* Interrupt remapping support */
> + iommu_writeq(iommu, DMAR_IRTA_REG, 0);
> + }
> +#endif
> +
> + /* Calculate guest address width and supported guest widths */
> + gaw = -1;
> + iommu->mgaw = cap_mgaw(iommu->cap);
> + printf("gaw: %d { ", iommu->mgaw);
> + for (i = 0; i < 5; i++) {
> + if (cap_sagaw(iommu->cap) & (1L << i)) {
> + gaw = VTD_LEVELTOAW(i);
> + printf("%d ", gaw);
> + iommu->agaw = gaw;
> + }
> + }
> + printf("}\n");
> +
> + /* Cache current status register bits */
> + sts = iommu_readl(iommu, DMAR_GSTS_REG);
> + if (sts & GSTS_TES)
> + iommu->gcmd |= GCMD_TE;
> + if (sts & GSTS_QIES)
> + iommu->gcmd |= GCMD_QIE;
> + if (sts & GSTS_IRES)
> + iommu->gcmd |= GCMD_IRE;
> + if (iommu->gcmd) {
> + printf("gcmd: %x preset\n", iommu->gcmd);
> + }
> + acpidmar_intr(iommu);
> + return (0);
> +}
> +
> +const char *dmar_rn(int reg);
> +
> +const char *
> +dmar_rn(int reg)
> +{
> + switch (reg) {
> + case EVT_HEAD_REG: return "evthead";
> + case EVT_TAIL_REG: return "evttail";
> + case CMD_HEAD_REG: return "cmdhead";
> + case CMD_TAIL_REG: return "cmdtail";
> + case CMD_BASE_REG: return "cmdbase";
> + case EVT_BASE_REG: return "evtbase";
> + case DEV_TAB_BASE_REG: return "devtblbase";
> + case IOMMUCTL_REG: return "iommuctl";
> +#if 0
> + case DMAR_VER_REG: return "ver";
> + case DMAR_CAP_REG: return "cap";
> + case DMAR_ECAP_REG: return "ecap";
> + case DMAR_GSTS_REG: return "gsts";
> + case DMAR_GCMD_REG: return "gcmd";
> + case DMAR_FSTS_REG: return "fsts";
> + case DMAR_FECTL_REG: return "fectl";
> + case DMAR_RTADDR_REG: return "rtaddr";
> + case DMAR_FEDATA_REG: return "fedata";
> + case DMAR_FEADDR_REG: return "feaddr";
> + case DMAR_FEUADDR_REG: return "feuaddr";
> + case DMAR_PMEN_REG: return "pmen";
> + case DMAR_IEDATA_REG: return "iedata";
> + case DMAR_IEADDR_REG: return "ieaddr";
> + case DMAR_IEUADDR_REG: return "ieuaddr";
> + case DMAR_IRTA_REG: return "irta";
> + case DMAR_CCMD_REG: return "ccmd";
> + case DMAR_IQH_REG: return "iqh";
> + case DMAR_IQT_REG: return "iqt";
> + case DMAR_IQA_REG: return "iqa";
> +#endif
> + }
> + return "unknown";
> +}
> +
> +/* Read/Write IOMMU register */
> +uint32_t
> +iommu_readl(struct iommu_softc *iommu, int reg)
> +{
> + uint32_t v;
> +
> + v = bus_space_read_4(iommu->iot, iommu->ioh, reg);
> + if (reg < 00) {
> + printf("iommu%d: read %x %.8lx [%s]\n",
> +    iommu->id, reg, (unsigned long)v, dmar_rn(reg));
> + }
> +
> + return (v);
> +}
> +
> +
> +#define dbprintf(x...)
> +
> +void
> +iommu_writel(struct iommu_softc *iommu, int reg, uint32_t v)
> +{
> + dbprintf("iommu%d: write %.8x %.16lx [%s]\n",
> +    iommu->id, reg, (unsigned long)v, dmar_rn(reg));
> + bus_space_write_4(iommu->iot, iommu->ioh, reg, (uint32_t)v);
> +}
> +
> +uint64_t
> +iommu_readq(struct iommu_softc *iommu, int reg)
> +{
> + uint64_t v;
> +
> + v = bus_space_read_8(iommu->iot, iommu->ioh, reg);
> + if (reg < 00) {
> + printf("iommu%d: read %x %.8lx [%s]\n",
> +    iommu->id, reg, (unsigned long)v, dmar_rn(reg));
> + }
> +
> + return (v);
> +}
> +
> +void
> +iommu_writeq(struct iommu_softc *iommu, int reg, uint64_t v)
> +{
> + dbprintf("iommu%d: write %.8x %.16lx [%s]\n",
> +    iommu->id, reg, (unsigned long)v, dmar_rn(reg));
> + bus_space_write_8(iommu->iot, iommu->ioh, reg, v);
> +}
> +
> +/* Check if a device is within a device scope */
> +int
> +acpidmar_match_devscope(struct devlist_head *devlist, pci_chipset_tag_t pc,
> +    int sid)
> +{
> + struct dmar_devlist *ds;
> + int sub, sec, i;
> + int bus, dev, fun, sbus;
> + pcireg_t reg;
> + pcitag_t tag;
> +
> + sbus = sid_bus(sid);
> + TAILQ_FOREACH(ds, devlist, link) {
> + bus = ds->bus;
> + dev = ds->dp[0].device;
> + fun = ds->dp[0].function;
> + /* Walk PCI bridges in path */
> + for (i = 1; i < ds->ndp; i++) {
> + tag = pci_make_tag(pc, bus, dev, fun);
> + reg = pci_conf_read(pc, tag, PPB_REG_BUSINFO);
> + bus = PPB_BUSINFO_SECONDARY(reg);
> + dev = ds->dp[i].device;
> + fun = ds->dp[i].function;
> + }
> +
> + /* Check for device exact match */
> + if (sid == mksid(bus, dev, fun)) {
> + return DMAR_ENDPOINT;
> + }
> +
> + /* Check for device subtree match */
> + if (ds->type == DMAR_BRIDGE) {
> + tag = pci_make_tag(pc, bus, dev, fun);
> + reg = pci_conf_read(pc, tag, PPB_REG_BUSINFO);
> + sec = PPB_BUSINFO_SECONDARY(reg);
> + sub = PPB_BUSINFO_SUBORDINATE(reg);
> + if (sec <= sbus && sbus <= sub) {
> + return DMAR_BRIDGE;
> + }
> + }
> + }
> +
> + return (0);
> +}
> +
> +struct domain *
> +domain_create(struct iommu_softc *iommu, int did)
> +{
> + struct domain *dom;
> + int gaw;
> +
> + printf("iommu%d: create domain: %.4x\n", iommu->id, did);
> + dom = malloc(sizeof(*dom), M_DEVBUF, M_ZERO | M_WAITOK);
> + dom->did = did;
> + dom->iommu = iommu;
> + dom->pte = iommu_alloc_page(iommu, &dom->ptep);
> + TAILQ_INIT(&dom->devices);
> +
> + /* Setup DMA */
> + dom->dmat._cookie = dom;
> + dom->dmat._dmamap_create    = dmar_dmamap_create; // nop
> + dom->dmat._dmamap_destroy   = dmar_dmamap_destroy; // nop
> + dom->dmat._dmamap_load      = dmar_dmamap_load; // lm
> + dom->dmat._dmamap_load_mbuf = dmar_dmamap_load_mbuf; // lm
> + dom->dmat._dmamap_load_uio  = dmar_dmamap_load_uio; // lm
> + dom->dmat._dmamap_load_raw  = dmar_dmamap_load_raw; // lm
> + dom->dmat._dmamap_unload    = dmar_dmamap_unload; // um
> + dom->dmat._dmamap_sync      = dmar_dmamap_sync; // lm
> + dom->dmat._dmamem_alloc     = dmar_dmamem_alloc; // nop
> + dom->dmat._dmamem_free      = dmar_dmamem_free; // nop
> + dom->dmat._dmamem_map       = dmar_dmamem_map; // nop
> + dom->dmat._dmamem_unmap     = dmar_dmamem_unmap; // nop
> + dom->dmat._dmamem_mmap      = dmar_dmamem_mmap;
> +
> + snprintf(dom->exname, sizeof(dom->exname), "did:%x.%.4x",
> +    iommu->id, dom->did);
> +
> + /* Setup IOMMU address map */
> + gaw = min(iommu->agaw, iommu->mgaw);
> + dom->iovamap = extent_create(dom->exname, 1024*1024*16,
> +    (1LL << gaw)-1,
> +    M_DEVBUF, NULL, 0,
> +    EX_WAITOK|EX_NOCOALESCE);
> +
> + /* Zero out Interrupt region */
> + extent_alloc_region(dom->iovamap, 0xFEE00000L, 0x100000,
> +    EX_WAITOK);
> + mtx_init(&dom->exlck, IPL_HIGH);
> +
> + TAILQ_INSERT_TAIL(&iommu->domains, dom, link);
> +
> + return dom;
> +}
> +
> +void domain_add_device(struct domain *dom, int sid)
> +{
> + struct domain_dev *ddev;
> +
> + printf("add %s to iommu%d.%.4x\n", dmar_bdf(sid), dom->iommu->id, dom->did);
> + ddev = malloc(sizeof(*ddev), M_DEVBUF, M_ZERO | M_WAITOK);
> + ddev->sid = sid;
> + TAILQ_INSERT_TAIL(&dom->devices, ddev, link);
> +
> + /* Should set context entry here?? */
> +}
> +
> +void domain_remove_device(struct domain *dom, int sid)
> +{
> + struct domain_dev *ddev, *tmp;
> +
> + TAILQ_FOREACH_SAFE(ddev, &dom->devices, link, tmp) {
> + if (ddev->sid == sid) {
> + TAILQ_REMOVE(&dom->devices, ddev, link);
> + free(ddev, sizeof(*ddev), M_DEVBUF);
> + }
> + }
> +}
> +
> +/* Lookup domain by segment & source id (bus.device.function) */
> +struct domain *
> +domain_lookup(struct acpidmar_softc *sc, int segment, int sid)
> +{
> + struct iommu_softc *iommu;
> + struct domain_dev *ddev;
> + struct domain *dom;
> + int rc;
> +
> + if (sc == NULL) {
> + return NULL;
> + }
> +
> + /* Lookup IOMMU for this device */
> + TAILQ_FOREACH(iommu, &sc->sc_drhds, link) {
> + if (iommu->segment != segment)
> + continue;
> + /* Check for devscope match or catchall iommu */
> + rc = acpidmar_match_devscope(&iommu->devices, sc->sc_pc, sid);
> + if (rc != 0 || iommu->flags) {
> + break;
> + }
> + }
> + if (!iommu) {
> + printf("%s: no iommu found\n", dmar_bdf(sid));
> + return NULL;
> + }
> +
> + //acpidmar_intr(iommu);
> +
> + /* Search domain devices */
> + TAILQ_FOREACH(dom, &iommu->domains, link) {
> + TAILQ_FOREACH(ddev, &dom->devices, link) {
> + /* XXX: match all functions? */
> + if (ddev->sid == sid) {
> + return dom;
> + }
> + }
> + }
> + if (iommu->ndoms <= 2) {
> + /* Running out of domains.. create catchall domain */
> + if (!iommu->unity) {
> + iommu->unity = domain_create(iommu, 1);
> + }
> + dom = iommu->unity;
> + } else {
> + dom = domain_create(iommu, --iommu->ndoms);
> + }
> + if (!dom) {
> + printf("no domain here\n");
> + return NULL;
> + }
> +
> + /* Add device to domain */
> + domain_add_device(dom, sid);
> +
> + return dom;
> +}
> +
> +/* Map Guest Pages into IOMMU */
> +void  _iommu_map(void *dom, vaddr_t va, bus_addr_t gpa, bus_size_t len)
> +{
> + bus_size_t i;
> + paddr_t hpa;
> +
> + if (dom == NULL) {
> + return;
> + }
> + printf("Mapping dma: %lx = %lx/%lx\n", va, gpa, len);
> + for (i = 0; i < len; i += PAGE_SIZE) {
> + hpa = 0;
> + pmap_extract(curproc->p_vmspace->vm_map.pmap, va, &hpa);
> + domain_map_page(dom, gpa, hpa, PTE_P | PTE_R | PTE_W);
> + gpa += PAGE_SIZE;
> + va  += PAGE_SIZE;
> + }
> +}
> +
> +/* Find IOMMU for a given PCI device */
> +void *_iommu_domain(int segment, int bus, int dev, int func, int *id)
> +{
> + struct domain *dom;
> +
> + dom = domain_lookup(acpidmar_sc, segment, mksid(bus, dev, func));
> + if (dom) {
> + *id = dom->did;
> + }
> + return dom;
> +}
> +
> +void domain_map_device(struct domain *dom, int sid);
> +
> +void
> +domain_map_device(struct domain *dom, int sid)
> +{
> + struct iommu_softc *iommu;
> + struct context_entry *ctx;
> + paddr_t paddr;
> + int bus, devfn;
> + int tt, lvl;
> +
> + iommu = dom->iommu;
> +
> + bus = sid_bus(sid);
> + devfn = sid_devfn(sid);
> + /* AMD attach device */
> + if (iommu->dte) {
> + struct ivhd_dte *dte = &iommu->dte[sid];
> + if (!dte->dw0) {
> + /* Setup Device Table Entry: bus.devfn */
> + printf("@@@ PCI Attach: %.4x[%s] %.4x\n", sid, dmar_bdf(sid), dom->did);
> + dte_set_host_page_table_root_ptr(dte, dom->ptep);
> + dte_set_domain(dte, dom->did);
> + dte_set_mode(dte, 3);  // Set 4 level PTE
> + dte_set_tv(dte);
> + dte_set_valid(dte);
> + ivhd_flush_devtab(iommu, dom->did);
> + //ivhd_showit(iommu);
> + ivhd_showdte();
> + }
> + //ivhd_poll_events(iommu);
> + return;
> + }
> +
> + /* Create Bus mapping */
> + if (!root_entry_is_valid(&iommu->root[bus])) {
> + iommu->ctx[bus] = iommu_alloc_page(iommu, &paddr);
> + iommu->root[bus].lo = paddr | ROOT_P;
> + iommu_flush_cache(iommu, &iommu->root[bus],
> +    sizeof(struct root_entry));
> + dprintf("iommu%d: Allocate context for bus: %.2x pa:%.16llx va:%p\n",
> +    iommu->id, bus, (uint64_t)paddr,
> +    iommu->ctx[bus]);
> + }
> +
> + /* Create DevFn mapping */
> + ctx = iommu->ctx[bus] + devfn;
> + if (!context_entry_is_valid(ctx)) {
> + tt = CTX_T_MULTI;
> + lvl = VTD_AWTOLEVEL(iommu->agaw);
> +
> + /* Initialize context */
> + context_set_slpte(ctx, dom->ptep);
> + context_set_translation_type(ctx, tt);
> + context_set_domain_id(ctx, dom->did);
> + context_set_address_width(ctx, lvl);
> + context_set_present(ctx);
> +
> + /* Flush it */
> + iommu_flush_cache(iommu, ctx, sizeof(struct context_entry));
> + if ((iommu->cap & CAP_CM) || force_cm) {
> + iommu_flush_ctx(iommu, CTX_DEVICE, dom->did, sid, 0);
> + iommu_flush_tlb(iommu, IOTLB_GLOBAL, 0);
> + } else {
> + iommu_flush_write_buffer(iommu);
> + }
> + dprintf("iommu%d: %s set context ptep:%.16llx lvl:%d did:%.4x tt:%d\n",
> +    iommu->id, dmar_bdf(sid), (uint64_t)dom->ptep, lvl,
> +    dom->did, tt);
> + }
> +}
> +
> +struct domain *
> +acpidmar_pci_attach(struct acpidmar_softc *sc, int segment, int sid, int mapctx)
> +{
> + static struct domain *dom;
> +
> + dom = domain_lookup(sc, segment, sid);
> + if (!dom) {
> + printf("no domain: %s\n", dmar_bdf(sid));
> + return NULL;
> + }
> +
> + if (mapctx) {
> + domain_map_device(dom, sid);
> + }
> +
> + return dom;
> +}
> +
> +int ismap(int bus, int dev, int fun) {
> + return 1;
> +}
> +
> +void
> +acpidmar_pci_hook(pci_chipset_tag_t pc, struct pci_attach_args *pa)
> +{
> + int bus, dev, fun, sid;
> + struct domain *dom;
> + pcireg_t reg;
> +
> + if (!acpidmar_sc) {
> + /* No DMAR, ignore */
> + return;
> + }
> +
> + /* Add device to our list */
> + pci_decompose_tag(pc, pa->pa_tag, &bus, &dev, &fun);
> + sid = mksid(bus, dev, fun);
> + if (sid_flag[sid] & SID_INVALID)
> + return;
> +
> + reg = pci_conf_read(pc, pa->pa_tag, PCI_CLASS_REG);
> +#if 0
> + if (PCI_CLASS(reg) == PCI_CLASS_DISPLAY &&
> +    PCI_SUBCLASS(reg) == PCI_SUBCLASS_DISPLAY_VGA) {
> + printf("dmar: %.4x:%.2x:%.2x.%x is VGA, ignoring\n",
> +    pa->pa_domain, bus, dev, fun);
> + return;
> + }
> +#endif
> + /* Add device to domain */
> + dom = acpidmar_pci_attach(acpidmar_sc, pa->pa_domain, sid, 0);
> + if (dom == NULL)
> + return;
> +
> + if (PCI_CLASS(reg) == PCI_CLASS_DISPLAY &&
> +    PCI_SUBCLASS(reg) == PCI_SUBCLASS_DISPLAY_VGA) {
> + dom->flag = DOM_NOMAP;
> + }
> + if (PCI_CLASS(reg) == PCI_CLASS_BRIDGE &&
> +    PCI_SUBCLASS(reg) == PCI_SUBCLASS_BRIDGE_ISA) {
> + /* For ISA Bridges, map 0-16Mb as 1:1 */
> + printf("dmar: %.4x:%.2x:%.2x.%x mapping ISA\n",
> +    pa->pa_domain, bus, dev, fun);
> + domain_map_pthru(dom, 0x00, 16*1024*1024);
> + }
> +
> + /* Change DMA tag */
> + pa->pa_dmat = &dom->dmat;
> +}
> +
> +/* Create list of device scope entries from ACPI table */
> +void
> +acpidmar_parse_devscope(union acpidmar_entry *de, int off, int segment,
> +    struct devlist_head *devlist)
> +{
> + struct acpidmar_devscope *ds;
> + struct dmar_devlist *d;
> + int dplen, i;
> +
> + TAILQ_INIT(devlist);
> + while (off < de->length) {
> + ds = (struct acpidmar_devscope *)((unsigned char *)de + off);
> + off += ds->length;
> +
> + /* We only care about bridges and endpoints */
> + if (ds->type != DMAR_ENDPOINT && ds->type != DMAR_BRIDGE)
> + continue;
> +
> + dplen = ds->length - sizeof(*ds);
> + d = malloc(sizeof(*d) + dplen, M_DEVBUF, M_ZERO | M_WAITOK);
> + d->bus  = ds->bus;
> + d->type = ds->type;
> + d->ndp  = dplen / 2;
> + d->dp   = (void *)&d[1];
> + memcpy(d->dp, &ds[1], dplen);
> + TAILQ_INSERT_TAIL(devlist, d, link);
> +
> + printf("  %8s  %.4x:%.2x.%.2x.%x {",
> +    ds->type == DMAR_BRIDGE ? "bridge" : "endpoint",
> +    segment, ds->bus,
> +    d->dp[0].device,
> +    d->dp[0].function);
> +
> + for (i = 1; i < d->ndp; i++) {
> + printf(" %2x.%x ",
> +    d->dp[i].device,
> +    d->dp[i].function);
> + }
> + printf("}\n");
> + }
> +}
> +
> +/* DMA Remapping Hardware Unit */
> +void
> +acpidmar_drhd(struct acpidmar_softc *sc, union acpidmar_entry *de)
> +{
> + struct iommu_softc *iommu;
> +
> + printf("DRHD: segment:%.4x base:%.16llx flags:%.2x\n",
> +    de->drhd.segment,
> +    de->drhd.address,
> +    de->drhd.flags);
> + iommu = malloc(sizeof(*iommu), M_DEVBUF, M_ZERO | M_WAITOK);
> + acpidmar_parse_devscope(de, sizeof(de->drhd), de->drhd.segment,
> +    &iommu->devices);
> + iommu_init(sc, iommu, &de->drhd);
> +
> + if (de->drhd.flags) {
> + /* Catchall IOMMU goes at end of list */
> + TAILQ_INSERT_TAIL(&sc->sc_drhds, iommu, link);
> + } else {
> + TAILQ_INSERT_HEAD(&sc->sc_drhds, iommu, link);
> + }
> +}
> +
> +/* Reserved Memory Region Reporting */
> +void
> +acpidmar_rmrr(struct acpidmar_softc *sc, union acpidmar_entry *de)
> +{
> + struct rmrr_softc *rmrr;
> + bios_memmap_t *im, *jm;
> + uint64_t start, end;
> +
> + printf("RMRR: segment:%.4x range:%.16llx-%.16llx\n",
> +    de->rmrr.segment, de->rmrr.base, de->rmrr.limit);
> + if (de->rmrr.limit <= de->rmrr.base) {
> + printf("  buggy BIOS\n");
> + return;
> + }
> +
> + rmrr = malloc(sizeof(*rmrr), M_DEVBUF, M_ZERO | M_WAITOK);
> + rmrr->start = trunc_page(de->rmrr.base);
> + rmrr->end = round_page(de->rmrr.limit);
> + rmrr->segment = de->rmrr.segment;
> + acpidmar_parse_devscope(de, sizeof(de->rmrr), de->rmrr.segment,
> +    &rmrr->devices);
> +
> + for (im = bios_memmap; im->type != BIOS_MAP_END; im++) {
> + if (im->type != BIOS_MAP_RES)
> + continue;
> + /* Search for adjacent reserved regions */
> + start = im->addr;
> + end   = im->addr+im->size;
> + for (jm = im+1; jm->type == BIOS_MAP_RES && end == jm->addr;
> +    jm++) {
> + end = jm->addr+jm->size;
> + }
> + printf("e820: %.16llx - %.16llx\n", start, end);
> + if (start <= rmrr->start && rmrr->end <= end) {
> + /* Bah.. some buggy BIOS stomp outside RMRR */
> + printf("  ** inside E820 Reserved %.16llx %.16llx\n",
> +    start, end);
> + rmrr->start = trunc_page(start);
> + rmrr->end   = round_page(end);
> + break;
> + }
> + }
> + TAILQ_INSERT_TAIL(&sc->sc_rmrrs, rmrr, link);
> +}
> +
> +/* Root Port ATS Reporting */
> +void
> +acpidmar_atsr(struct acpidmar_softc *sc, union acpidmar_entry *de)
> +{
> + struct atsr_softc *atsr;
> +
> + printf("ATSR: segment:%.4x flags:%x\n",
> +    de->atsr.segment,
> +    de->atsr.flags);
> +
> + atsr = malloc(sizeof(*atsr), M_DEVBUF, M_ZERO | M_WAITOK);
> + atsr->flags = de->atsr.flags;
> + atsr->segment = de->atsr.segment;
> + acpidmar_parse_devscope(de, sizeof(de->atsr), de->atsr.segment,
> +    &atsr->devices);
> +
> + TAILQ_INSERT_TAIL(&sc->sc_atsrs, atsr, link);
> +}
> +
> +void
> +acpidmar_init(struct acpidmar_softc *sc, struct acpi_dmar *dmar)
> +{
> + struct rmrr_softc *rmrr;
> + struct iommu_softc *iommu;
> + struct domain *dom;
> + struct dmar_devlist *dl;
> + union acpidmar_entry *de;
> + int off, sid, rc;
> +
> + domain_map_page = domain_map_page_intel;
> + printf(": hardware width: %d, intr_remap:%d x2apic_opt_out:%d\n",
> +    dmar->haw+1,
> +    !!(dmar->flags & 0x1),
> +    !!(dmar->flags & 0x2));
> + sc->sc_haw = dmar->haw+1;
> + sc->sc_flags = dmar->flags;
> +
> + TAILQ_INIT(&sc->sc_drhds);
> + TAILQ_INIT(&sc->sc_rmrrs);
> + TAILQ_INIT(&sc->sc_atsrs);
> +
> + off = sizeof(*dmar);
> + while (off < dmar->hdr.length) {
> + de = (union acpidmar_entry *)((unsigned char *)dmar + off);
> + switch (de->type) {
> + case DMAR_DRHD:
> + acpidmar_drhd(sc, de);
> + break;
> + case DMAR_RMRR:
> + acpidmar_rmrr(sc, de);
> + break;
> + case DMAR_ATSR:
> + acpidmar_atsr(sc, de);
> + break;
> + default:
> + printf("DMAR: unknown %x\n", de->type);
> + break;
> + }
> + off += de->length;
> + }
> +
> + /* Pre-create domains for iommu devices */
> + TAILQ_FOREACH(iommu, &sc->sc_drhds, link) {
> + TAILQ_FOREACH(dl, &iommu->devices, link) {
> + sid = mksid(dl->bus, dl->dp[0].device,
> +    dl->dp[0].function);
> + dom = acpidmar_pci_attach(sc, iommu->segment, sid, 0);
> + if (dom != NULL) {
> + printf("%.4x:%.2x:%.2x.%x iommu:%d did:%.4x\n",
> +    iommu->segment, dl->bus, dl->dp[0].device, dl->dp[0].function,
> +    iommu->id, dom->did);
> + }
> + }
> + }
> + /* Map passthrough pages for RMRR */
> + TAILQ_FOREACH(rmrr, &sc->sc_rmrrs, link) {
> + TAILQ_FOREACH(dl, &rmrr->devices, link) {
> + sid = mksid(dl->bus, dl->dp[0].device,
> +    dl->dp[0].function);
> + dom = acpidmar_pci_attach(sc, rmrr->segment, sid, 0);
> + if (dom != NULL) {
> + printf("%s map ident: %.16llx %.16llx\n",
> +    dom_bdf(dom), rmrr->start, rmrr->end);
> + domain_map_pthru(dom, rmrr->start, rmrr->end);
> + rc = extent_alloc_region(dom->iovamap,
> +    rmrr->start, rmrr->end, EX_WAITOK);
> + }
> + }
> + }
> +}
> +
> +
> +/*=====================================================
> + * AMD Vi
> + *=====================================================*/
> +void acpiivrs_ivhd(struct acpidmar_softc *, struct acpi_ivhd *);
> +int acpiivrs_iommu_match(struct pci_attach_args *);
> +int ivhd_iommu_init(struct acpidmar_softc *, struct iommu_softc *,
> + struct acpi_ivhd *);
> +void iommu_ivhd_add(struct iommu_softc *, int, int, int);
> +int _ivhd_issue_command(struct iommu_softc *, const struct ivhd_command *);
> +void ivhd_show_event(struct iommu_softc *, struct ivhd_event *evt, int);
> +int ivhd_issue_command(struct iommu_softc *, const struct ivhd_command *, int);
> +int ivhd_invalidate_domain(struct iommu_softc *, int);
> +void ivhd_intr_map(struct iommu_softc *, int);
> +
> +int
> +acpiivhd_intr(void *ctx)
> +{
> + struct iommu_softc *iommu = ctx;
> +
> + if (!iommu->dte)
> + return (0);
> + ivhd_poll_events(iommu);
> + return (1);
> +}
> +
> +/* Setup interrupt for AMD */
> +void
> +ivhd_intr_map(struct iommu_softc *iommu, int devid) {
> + pci_intr_handle_t ih;
> +
> + if (iommu->intr)
> + return;
> + ih.tag = pci_make_tag(NULL, sid_bus(devid), sid_dev(devid), sid_fun(devid));
> + ih.line = APIC_INT_VIA_MSG;
> + ih.pin = 0;
> + iommu->intr = pci_intr_establish(NULL, ih, IPL_NET | IPL_MPSAFE,
> + acpiivhd_intr, iommu, "amd_iommu");
> + printf("amd iommu intr: %p\n", iommu->intr);
> +}
> +
> +void _dumppte(struct pte_entry *pte, int lvl, vaddr_t va)
> +{
> + char *pfx[] = { "    ", "   ", "  ", " ", "" };
> + uint64_t i, sh;
> + struct pte_entry *npte;
> +  
> + for (i = 0; i < 512; i++) {
> + sh = (i << (((lvl-1) * 9) + 12));
> + if (pte[i].val & PTE_P) {
> + if (lvl > 1) {
> + npte = (void *)PMAP_DIRECT_MAP((pte[i].val & PTE_PADDR_MASK));
> + printf("%slvl%d: %.16llx nxt:%llu\n", pfx[lvl], lvl,
> +    pte[i].val, (pte[i].val >> 9) & 7);
> + _dumppte(npte, lvl-1, va | sh);
> + }
> + else {
> + printf("%slvl%d: %.16llx <- %.16llx \n", pfx[lvl], lvl,
> +    pte[i].val, va | sh);
> + }
> + }
> + }
> +}
> +
> +void showpage(int sid, paddr_t paddr)
> +{
> + struct domain *dom;
> + static int show = 0;
> +
> + if (show > 10)
> + return;
> + show++;
> + dom = acpidmar_pci_attach(acpidmar_sc, 0, sid, 0);
> + if (!dom)
> + return;
> + printf("DTE: %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
> + hwdte[sid].dw0,
> + hwdte[sid].dw1,
> + hwdte[sid].dw2,
> + hwdte[sid].dw3,
> + hwdte[sid].dw4,
> + hwdte[sid].dw5,
> + hwdte[sid].dw6,
> + hwdte[sid].dw7);
> + _dumppte(dom->pte, 3, 0);
> +}
> +
> +/* Display AMD IOMMU Error */
> +void
> +ivhd_show_event(struct iommu_softc *iommu, struct ivhd_event *evt, int head)
> +{
> + int type, sid, did, flag;
> + uint64_t address;
> +
> + /* Get Device, Domain, Address and Type of event */
> + sid  = __EXTRACT(evt->dw0, EVT_SID);
> + type = __EXTRACT(evt->dw1, EVT_TYPE);
> + did  = __EXTRACT(evt->dw1, EVT_DID);
> + flag = __EXTRACT(evt->dw1, EVT_FLAG);
> + address = _get64(&evt->dw2);
> +
> + printf("=== IOMMU Error[%.4x]: ", head);
> + switch (type) {
> + case ILLEGAL_DEV_TABLE_ENTRY: // ok
> + printf("illegal dev table entry dev=%s addr=0x%.16llx %s, %s, %s, %s\n",
> +   dmar_bdf(sid), address,
> +   evt->dw1 & EVT_TR ? "translation" : "transaction",
> +   evt->dw1 & EVT_RZ ? "reserved bit" : "invalid level",
> +   evt->dw1 & EVT_RW ? "write" : "read",
> +   evt->dw1 & EVT_I  ? "interrupt" : "memory");
> + ivhd_showdte();
> + break;
> + case IO_PAGE_FAULT: // ok
> + printf("io page fault dev=%s did=0x%.4x addr=0x%.16llx\n%s, %s, %s, %s, %s, %s\n",
> +   dmar_bdf(sid), did, address,
> +   evt->dw1 & EVT_TR ? "translation" : "transaction",
> +   evt->dw1 & EVT_RZ ? "reserved bit" : "invalid level",
> +   evt->dw1 & EVT_PE ? "no perm" : "perm",
> +   evt->dw1 & EVT_RW ? "write" : "read",
> +   evt->dw1 & EVT_PR ? "present" : "not present",
> +   evt->dw1 & EVT_I  ? "interrupt" : "memory");
> + ivhd_showdte();
> + showpage(sid, address);
> + break;
> + case DEV_TAB_HARDWARE_ERROR: // ok
> + printf("device table hardware error dev=%s addr=0x%.16llx %s, %s, %s\n",
> +    dmar_bdf(sid), address,
> +   evt->dw1 & EVT_TR ? "translation" : "transaction",
> +   evt->dw1 & EVT_RW ? "write" : "read",
> +   evt->dw1 & EVT_I  ? "interrupt" : "memory");
> + ivhd_showdte();
> + break;
> + case PAGE_TAB_HARDWARE_ERROR:
> + printf("page table hardware error dev=%s addr=0x%.16llx %s, %s, %s\n",
> +   dmar_bdf(sid), address,
> +   evt->dw1 & EVT_TR ? "translation" : "transaction",
> +   evt->dw1 & EVT_RW ? "write" : "read",
> +   evt->dw1 & EVT_I  ? "interrupt" : "memory");
> + ivhd_showdte();
> + break;
> + case ILLEGAL_COMMAND_ERROR: // ok
> + printf("illegal command addr=0x%.16llx\n", address);
> + ivhd_showcmd(iommu);
> + break;
> + case COMMAND_HARDWARE_ERROR:
> + printf("command hardware error addr=0x%.16llx flag=0x%.4x\n",
> +   address, flag);
> + ivhd_showcmd(iommu);
> + break;
> + case IOTLB_INV_TIMEOUT:
> + printf("iotlb invalidation timeout dev=%s address=0x%.16llx\n",
> +   dmar_bdf(sid), address);
> + break;
> + case INVALID_DEVICE_REQUEST:
> + printf("invalid device request dev=%s addr=0x%.16llx flag=0x%.4x\n",
> +   dmar_bdf(sid), address, flag);
> + break;
> + default:
> + printf("unknown type=0x%.2x\n", type);
> + break;
> + }
> + //ivhd_showdte();
> + /* Clear old event */
> + evt->dw0 = 0;
> + evt->dw1 = 0;
> + evt->dw2 = 0;
> + evt->dw3 = 0;
> +}
> +
> +/* AMD: Process IOMMU error from hardware */
> +int
> +ivhd_poll_events(struct iommu_softc *iommu)
> +{
> + uint32_t head, tail;
> + int sz;
> +
> + sz = sizeof(struct ivhd_event);
> + head = iommu_readl(iommu, EVT_HEAD_REG);
> + tail = iommu_readl(iommu, EVT_TAIL_REG);
> + if (head == tail) {
> + /* No pending events */
> + return (0);
> + }
> + while (head != tail) {
> + ivhd_show_event(iommu, iommu->evt_tbl + head, head);
> + head = (head + sz) % EVT_TBL_SIZE;
> + }
> + iommu_writel(iommu, EVT_HEAD_REG, head);
> + return (0);
> +}
> +
> +/* AMD: Issue command to IOMMU queue */
> +int
> +_ivhd_issue_command(struct iommu_softc *iommu, const struct ivhd_command *cmd)
> +{
> + u_long rf;
> + uint32_t head, tail, next;
> + int sz;
> +
> + head = iommu_readl(iommu, CMD_HEAD_REG);
> + sz = sizeof(*cmd);
> + rf = intr_disable();
> + tail = iommu_readl(iommu, CMD_TAIL_REG);
> + next = (tail + sz) % CMD_TBL_SIZE;
> + if (next == head) {
> + printf("FULL\n");
> + /* Queue is full */
> + intr_restore(rf);
> + return -EBUSY;
> + }
> + memcpy(iommu->cmd_tbl + tail, cmd, sz);
> + iommu_writel(iommu, CMD_TAIL_REG, next);
> + intr_restore(rf);
> + return (tail / sz);
> +}
> +
> +#define IVHD_MAXDELAY 8
> +
> +int
> +ivhd_issue_command(struct iommu_softc *iommu, const struct ivhd_command *cmd, int wait)
> +{
> + struct ivhd_command wq = { 0 };
> + volatile uint64_t wv __aligned(16) = 0LL;
> + paddr_t paddr;
> + int rc, i;
> + static int mi;
> +
> + rc = _ivhd_issue_command(iommu, cmd);
> + if (rc >= 0 && wait) {
> + /* Wait for previous commands to complete.
> + * Store address of completion variable to command */
> + pmap_extract(pmap_kernel(), (vaddr_t)&wv, &paddr);
> + wq.dw0 = (paddr & ~0xF) | 0x1;
> + wq.dw1 = (COMPLETION_WAIT << CMD_SHIFT) | ((paddr >> 32) & 0xFFFFF);
> + wq.dw2 = 0xDEADBEEF;
> + wq.dw3 = 0xFEEDC0DE;
> +
> + rc = _ivhd_issue_command(iommu, &wq);
> + /* wv will change to value in dw2/dw3 when command is complete */
> + for (i = 0; i < IVHD_MAXDELAY && !wv; i++) {
> + DELAY(10 << i);
> + }
> + if (mi < i && mi != IVHD_MAXDELAY) {
> + printf("maxdel: %d\n", i);
> + mi = i;
> + }
> + if (i == IVHD_MAXDELAY) {
> + printf("ivhd command timeout: %.8x %.8x %.8x %.8x wv:%llx idx:%x\n",
> + cmd->dw0, cmd->dw1, cmd->dw2, cmd->dw3, wv, rc);
> + }
> + }
> + return rc;
> +
> +}
> +
> +/* AMD: Flush changes to Device Table Entry for a specific domain */
> +int ivhd_flush_devtab(struct iommu_softc *iommu, int did)
> +{
> + struct ivhd_command cmd = { .dw0 = did, .dw1 = INVALIDATE_DEVTAB_ENTRY << CMD_SHIFT };
> + return ivhd_issue_command(iommu, &cmd, 1);
> +}
> +
> +/* AMD: Invalidate all IOMMU device and page tables */
> +int ivhd_invalidate_iommu_all(struct iommu_softc *iommu)
> +{
> + struct ivhd_command cmd = { .dw1 = INVALIDATE_IOMMU_ALL << CMD_SHIFT };
> + return ivhd_issue_command(iommu, &cmd, 0);
> +}
> +
> +/* AMD: Invalidate interrupt remapping */
> +int ivhd_invalidate_interrupt_table(struct iommu_softc *iommu, int did)
> +{
> + struct ivhd_command cmd = { .dw0 = did, .dw1 = INVALIDATE_INTERRUPT_TABLE << CMD_SHIFT };
> + return ivhd_issue_command(iommu, &cmd, 0);
> +}
> +
> +/* AMD: Invalidate all page tables in a domain */
> +int ivhd_invalidate_domain(struct iommu_softc *iommu, int did)
> +{
> + struct ivhd_command cmd = { .dw1 = did | (INVALIDATE_IOMMU_PAGES << CMD_SHIFT) };
> +
> + cmd.dw2 = 0xFFFFF000 | 0x3;
> + cmd.dw3 = 0x7FFFFFFF;
> + return ivhd_issue_command(iommu, &cmd, 1);
> +}
> +
> +/* AMD: Display Registers */
> +void ivhd_showit(struct iommu_softc *iommu)
> +{
> + printf("---- dt:%.16llx cmd:%.16llx evt:%.16llx ctl:%.16llx sts:%.16llx\n",
> + iommu_readq(iommu, DEV_TAB_BASE_REG),
> + iommu_readq(iommu, CMD_BASE_REG),
> + iommu_readq(iommu, EVT_BASE_REG),
> + iommu_readq(iommu, IOMMUCTL_REG),
> + iommu_readq(iommu, IOMMUSTS_REG));
> + printf("---- cmd queue:%.16llx %.16llx evt queue:%.16llx %.16llx\n",
> + iommu_readq(iommu, CMD_HEAD_REG),
> + iommu_readq(iommu, CMD_TAIL_REG),
> + iommu_readq(iommu, EVT_HEAD_REG),
> + iommu_readq(iommu, EVT_TAIL_REG));
> +}
> +
> +/* AMD: Generate Errors to test event handler */
> +void ivhd_checkerr(struct iommu_softc *iommu);
> +void ivhd_checkerr(struct iommu_softc *iommu)
> +{
> + struct ivhd_command cmd = { -1, -1, -1, -1 };
> +
> + /* Generate ILLEGAL DEV TAB entry? */
> + iommu->dte[0x2303].dw0 = -1;      // invalid
> + iommu->dte[0x2303].dw2 = 0x1234;  // domain
> + iommu->dte[0x2303].dw7 = -1;      // reserved
> + ivhd_flush_devtab(iommu, 0x1234);
> + ivhd_poll_events(iommu);
> +
> + /* Generate ILLEGAL_COMMAND_ERROR : ok */
> + ivhd_issue_command(iommu, &cmd, 0);
> + ivhd_poll_events(iommu);
> +
> + /* Generate page hardware error */
> +}
> +
> +/* AMD: Show Device Table Entry */
> +void ivhd_showdte(void)
> +{
> + int i;
> +
> + for (i = 0; i < 65536; i++) {
> + if (hwdte[i].dw0) {
> + printf("%.2x:%.2x.%x: %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
> + i >> 8, (i >> 3) & 0x1F, i & 0x7,
> + hwdte[i].dw0, hwdte[i].dw1,
> + hwdte[i].dw2, hwdte[i].dw3,
> + hwdte[i].dw4, hwdte[i].dw5,
> + hwdte[i].dw6, hwdte[i].dw7);
> + }
> + }
> +}
> +
> +/* AMD: Show command entries */
> +void ivhd_showcmd(struct iommu_softc *iommu)
> +{
> + struct ivhd_command *ihd;
> + paddr_t phd;
> + int i;
> +
> + ihd = iommu->cmd_tbl;
> + phd = iommu_readq(iommu, CMD_BASE_REG) & CMD_BASE_MASK;
> + for (i = 0; i < 4096 / 128; i++) {
> + printf("%.2x: %.16llx %.8x %.8x %.8x %.8x\n", i,
> + (uint64_t)phd + i * sizeof(*ihd),
> + ihd[i].dw0,ihd[i].dw1,ihd[i].dw2,ihd[i].dw3);
> + }
> +}
> +
> +#define _c(x) (int)((iommu->ecap >> x ##_SHIFT) & x ## _MASK)
> +
> +/* AMD: Initialize IOMMU */
> +int
> +ivhd_iommu_init(struct acpidmar_softc *sc, struct iommu_softc *iommu,
> + struct acpi_ivhd *ivhd)
> +{
> + static int niommu;
> + paddr_t paddr;
> + uint64_t ov;
> +
> + if (sc == NULL || iommu == NULL || ivhd == NULL) {
> + printf("Bad pointer to iommu_init!\n");
> + return -1;
> + }
> + if (_bus_space_map(sc->sc_memt, ivhd->address, 0x80000, 0, &iommu->ioh) != 0) {
> + printf("Bus Space Map fails\n");
> + return -1;
> + }
> + TAILQ_INIT(&iommu->domains);
> + TAILQ_INIT(&iommu->devices);
> +
> + /* Setup address width and number of domains */
> + iommu->id = ++niommu;
> + iommu->iot = sc->sc_memt;
> + iommu->mgaw = 48;
> + iommu->agaw = 48;
> + iommu->flags = 1;
> + iommu->segment = 0;
> + iommu->ndoms = 256;
> +
> + iommu->ecap = iommu_readq(iommu, EXTFEAT_REG);
> + printf("ecap = %.16llx\n", iommu->ecap);
> + printf("%s%s%s%s%s%s%s%s\n",
> + iommu->ecap & EFR_PREFSUP ? "pref " : "",
> + iommu->ecap & EFR_PPRSUP  ? "ppr " : "",
> + iommu->ecap & EFR_NXSUP   ? "nx " : "",
> + iommu->ecap & EFR_GTSUP   ? "gt " : "",
> + iommu->ecap & EFR_IASUP   ? "ia " : "",
> + iommu->ecap & EFR_GASUP   ? "ga " : "",
> + iommu->ecap & EFR_HESUP   ? "he " : "",
> + iommu->ecap & EFR_PCSUP   ? "pc " : "");
> + printf("hats:%x gats:%x glxsup:%x smif:%x smifrc:%x gam:%x\n",
> + _c(EFR_HATS), _c(EFR_GATS), _c(EFR_GLXSUP), _c(EFR_SMIFSUP),
> + _c(EFR_SMIFRC), _c(EFR_GAMSUP));
> +
> + /* Turn off iommu */
> + ov = iommu_readq(iommu, IOMMUCTL_REG);
> + iommu_writeq(iommu, IOMMUCTL_REG, ov & ~(CTL_IOMMUEN | CTL_COHERENT |
> + CTL_HTTUNEN | CTL_RESPASSPW | CTL_PASSPW | CTL_ISOC));
> +
> + /* Enable intr */
> + sid_flag[ivhd->devid] |= SID_INVALID;
> + ivhd_intr_map(iommu, ivhd->devid);
> +
> + /* Setup command buffer with 4k buffer (128 entries) */
> + iommu->cmd_tbl = iommu_alloc_page(iommu, &paddr);
> + iommu_writeq(iommu, CMD_BASE_REG, (paddr & CMD_BASE_MASK) | CMD_TBL_LEN_4K);
> + iommu_writel(iommu, CMD_HEAD_REG, 0x00);
> + iommu_writel(iommu, CMD_TAIL_REG, 0x00);
> + iommu->cmd_tblp = paddr;
> +
> + /* Setup event log with 4k buffer (128 entries) */
> + iommu->evt_tbl = iommu_alloc_page(iommu, &paddr);
> + iommu_writeq(iommu, EVT_BASE_REG, (paddr & EVT_BASE_MASK) | EVT_TBL_LEN_4K);
> + iommu_writel(iommu, EVT_HEAD_REG, 0x00);
> + iommu_writel(iommu, EVT_TAIL_REG, 0x00);
> + iommu->evt_tblp = paddr;
> +
> + /* Setup device table
> + * 1 entry per source ID (bus:device:function - 64k entries)
> + */
> + iommu->dte = hwdte;
> + pmap_extract(pmap_kernel(), (vaddr_t)iommu->dte, &paddr);
> + iommu_writeq(iommu, DEV_TAB_BASE_REG, (paddr & DEV_TAB_MASK) | DEV_TAB_LEN);
> +
> + /* Enable IOMMU */
> + ov |= (CTL_IOMMUEN | CTL_EVENTLOGEN | CTL_CMDBUFEN | CTL_EVENTINTEN | CTL_COMWAITINTEN);
> + if (ivhd->flags & IVHD_COHERENT)
> + ov |= CTL_COHERENT;
> + if (ivhd->flags & IVHD_HTTUNEN)
> + ov |= CTL_HTTUNEN;
> + if (ivhd->flags & IVHD_RESPASSPW)
> + ov |= CTL_RESPASSPW;
> + if (ivhd->flags & IVHD_PASSPW)
> + ov |= CTL_PASSPW;
> + if (ivhd->flags & IVHD_ISOC)
> + ov |= CTL_ISOC;
> + ov &= ~(CTL_INVTIMEOUT_MASK << CTL_INVTIMEOUT_SHIFT);
> + ov |=  (CTL_INVTIMEOUT_1MS  << CTL_INVTIMEOUT_SHIFT);
> + iommu_writeq(iommu, IOMMUCTL_REG, ov);
> +
> + ivhd_invalidate_iommu_all(iommu);
> + //ivhd_checkerr(iommu);
> +
> + TAILQ_INSERT_TAIL(&sc->sc_drhds, iommu, link);
> + return 0;
> +}
> +
> +void
> +iommu_ivhd_add(struct iommu_softc *iommu, int start, int end, int cfg)
> +{
> + struct ivhd_devlist *idev;
> +
> + idev = malloc(sizeof(*idev), M_DEVBUF, M_ZERO | M_WAITOK);
> + idev->start_id = start;
> + idev->end_id = end;
> + idev->cfg = cfg;
> +}
> +
> +int acpiivrs_iommu_match(struct pci_attach_args *pa)
> +{
> + int b,d,f;
> +
> + pci_decompose_tag(pa->pa_pc, pa->pa_tag, &b, &d, &f);
> + printf(" matchdev: %d.%d.%d\n", b, d, f);
> + if (PCI_CLASS(pa->pa_class) == PCI_CLASS_SYSTEM &&
> +    PCI_SUBCLASS(pa->pa_class) == PCI_SUBCLASS_SYSTEM_IOMMU) {
> + printf("iziommu\n");
> + return (1);
> + }
> + return (0);
> +}
> +
> +void
> +acpiivrs_ivhd(struct acpidmar_softc *sc, struct acpi_ivhd *ivhd)
> +{
> + struct iommu_softc *iommu;
> + struct acpi_ivhd_ext *ext;
> + union acpi_ivhd_entry *ie;
> + int start, off, dte, all_dte = 0;
> +
> + if (ivhd->type == IVRS_IVHD_EXT) {
> + ext = (struct acpi_ivhd_ext *)ivhd;
> + printf("ivhd: %.2x %.2x %.4x %.4x:%s %.4x %.16llx %.4x %.8x %.16llx\n",
> +       ext->type, ext->flags, ext->length,
> +             ext->segment, dmar_bdf(ext->devid), ext->cap,
> +             ext->address, ext->info,
> +       ext->attrib, ext->efr);
> + if (ext->flags & IVHD_PPRSUP)
> + printf(" PPRSup");
> + if (ext->flags & IVHD_PREFSUP)
> + printf(" PreFSup");
> + if (ext->flags & IVHD_COHERENT)
> + printf(" Coherent");
> + if (ext->flags & IVHD_IOTLB)
> + printf(" Iotlb");
> + if (ext->flags & IVHD_ISOC)
> + printf(" ISoc");
> + if (ext->flags & IVHD_RESPASSPW)
> + printf(" ResPassPW");
> + if (ext->flags & IVHD_PASSPW)
> + printf(" PassPW");
> + if (ext->flags & IVHD_HTTUNEN)
> + printf( " HtTunEn");
> + if (ext->flags)
> + printf("\n");
> + off = sizeof(*ext);
> + iommu = malloc(sizeof(*iommu), M_DEVBUF, M_ZERO|M_WAITOK);
> + ivhd_iommu_init(sc, iommu, ivhd);
> + } else {
> + printf("ivhd: %.2x %.2x %.4x %.4x:%s %.4x %.16llx %.4x %.8x\n",
> +       ivhd->type, ivhd->flags, ivhd->length,
> +             ivhd->segment, dmar_bdf(ivhd->devid), ivhd->cap,
> +             ivhd->address, ivhd->info,
> +       ivhd->feature);
> + if (ivhd->flags & IVHD_PPRSUP)
> + printf(" PPRSup");
> + if (ivhd->flags & IVHD_PREFSUP)
> + printf(" PreFSup");
> + if (ivhd->flags & IVHD_COHERENT)
> + printf(" Coherent");
> + if (ivhd->flags & IVHD_IOTLB)
> + printf(" Iotlb");
> + if (ivhd->flags & IVHD_ISOC)
> + printf(" ISoc");
> + if (ivhd->flags & IVHD_RESPASSPW)
> + printf(" ResPassPW");
> + if (ivhd->flags & IVHD_PASSPW)
> + printf(" PassPW");
> + if (ivhd->flags & IVHD_HTTUNEN)
> + printf( " HtTunEn");
> + if (ivhd->flags)
> + printf("\n");
> + off = sizeof(*ivhd);
> + }
> + while (off < ivhd->length) {
> + ie = (void *)ivhd + off;
> + switch (ie->type) {
> + case IVHD_ALL:
> + all_dte = ie->all.data;
> + printf(" ALL %.4x\n", dte);
> + off += sizeof(ie->all);
> + break;
> + case IVHD_SEL:
> + dte = ie->sel.data;
> + printf(" SELECT: %s %.4x\n", dmar_bdf(ie->sel.devid), dte);
> + off += sizeof(ie->sel);
> + break;
> + case IVHD_SOR:
> + dte = ie->sor.data;
> + start = ie->sor.devid;
> + printf(" SOR: %s %.4x\n", dmar_bdf(start), dte);
> + off += sizeof(ie->sor);
> + break;
> + case IVHD_EOR:
> + printf(" EOR: %s\n", dmar_bdf(ie->eor.devid));
> + off += sizeof(ie->eor);
> + break;
> + case IVHD_ALIAS_SEL:
> + dte = ie->alias.data;
> + printf(" ALIAS: src=%s: ", dmar_bdf(ie->alias.srcid));
> + printf(" %s %.4x\n", dmar_bdf(ie->alias.devid), dte);
> + off += sizeof(ie->alias);
> + break;
> + case IVHD_ALIAS_SOR:
> + dte = ie->alias.data;
> + printf(" ALIAS_SOR: %s %.4x ", dmar_bdf(ie->alias.devid), dte);
> + printf(" src=%s\n", dmar_bdf(ie->alias.srcid));
> + off += sizeof(ie->alias);
> + break;
> + case IVHD_EXT_SEL:
> + dte = ie->ext.data;
> + printf(" EXT SEL: %s %.4x %.8x\n", dmar_bdf(ie->ext.devid),
> + dte, ie->ext.extdata);
> + off += sizeof(ie->ext);
> + break;
> + case IVHD_EXT_SOR:
> + dte = ie->ext.data;
> + printf(" EXT SOR: %s %.4x %.8x\n", dmar_bdf(ie->ext.devid),
> +       dte, ie->ext.extdata);
> + off += sizeof(ie->ext);
> + break;
> + case IVHD_SPECIAL:
> + printf(" SPECIAL\n");
> + off += sizeof(ie->special);
> + break;
> + default:
> + printf(" 2:unknown %x\n", ie->type);
> + off = ivhd->length;
> + break;
> + }
> + }
> +}
> +
> +void
> +acpiivrs_init(struct acpidmar_softc *sc, struct acpi_ivrs *ivrs)
> +{
> + union acpi_ivrs_entry *ie;
> + int off;
> +
> + domain_map_page = domain_map_page_amd;
> + printf("IVRS Version: %d\n", ivrs->hdr.revision);
> + printf(" VA Size: %d\n", (ivrs->ivinfo >> IVRS_VASIZE_SHIFT) & IVRS_VASIZE_MASK);
> + printf(" PA Size: %d\n", (ivrs->ivinfo >> IVRS_PASIZE_SHIFT) & IVRS_PASIZE_MASK);
> +
> + TAILQ_INIT(&sc->sc_drhds);
> + TAILQ_INIT(&sc->sc_rmrrs);
> + TAILQ_INIT(&sc->sc_atsrs);
> +
> + printf("======== IVRS\n");
> + off = sizeof(*ivrs);
> + while (off < ivrs->hdr.length) {
> + ie = (void *)ivrs + off;
> + switch (ie->type) {
> + case IVRS_IVHD:
> + case IVRS_IVHD_EXT:
> + acpiivrs_ivhd(sc, &ie->ivhd);
> + break;
> + case IVRS_IVMD_ALL:
> + case IVRS_IVMD_SPECIFIED:
> + case IVRS_IVMD_RANGE:
> + printf("ivmd\n");
> + break;
> + default:
> + printf("1:unknown: %x\n", ie->type);
> + break;
> + }
> + off += ie->length;
> + }
> + printf("======== End IVRS\n");
> +}
> +
> +static int
> +acpiivhd_activate(struct iommu_softc *iommu, int act)
> +{
> + switch (act) {
> + case DVACT_SUSPEND:
> + iommu->flags |= IOMMU_FLAGS_SUSPEND;
> + break;
> + case DVACT_RESUME:
> + iommu->flags &= ~IOMMU_FLAGS_SUSPEND;
> + break;
> + }
> + return (0);
> +}
> +
> +int
> +acpidmar_activate(struct device *self, int act)
> +{
> + struct acpidmar_softc *sc = (struct acpidmar_softc *)self;
> + struct iommu_softc *iommu;
> +
> + printf("called acpidmar_activate %d %p\n", act, sc);
> +
> + if (sc == NULL) {
> + return (0);
> + }
> +
> + switch (act) {
> + case DVACT_RESUME:
> + TAILQ_FOREACH(iommu, &sc->sc_drhds, link) {
> + printf("iommu%d resume\n", iommu->id);
> + if (iommu->dte) {
> + acpiivhd_activate(iommu, act);
> + continue;
> + }
> + iommu_flush_write_buffer(iommu);
> + iommu_set_rtaddr(iommu, iommu->rtaddr);
> + iommu_writel(iommu, DMAR_FEDATA_REG, iommu->fedata);
> + iommu_writel(iommu, DMAR_FEADDR_REG, iommu->feaddr);
> + iommu_writel(iommu, DMAR_FEUADDR_REG,
> +    iommu->feaddr >> 32);
> + if ((iommu->flags & (IOMMU_FLAGS_BAD|IOMMU_FLAGS_SUSPEND)) ==
> +    IOMMU_FLAGS_SUSPEND) {
> + printf("enable wakeup translation\n");
> + iommu_enable_translation(iommu, 1);
> + }
> + iommu_showcfg(iommu, -1);
> + }
> + break;
> + case DVACT_SUSPEND:
> + TAILQ_FOREACH(iommu, &sc->sc_drhds, link) {
> + printf("iommu%d suspend\n", iommu->id);
> + if (iommu->flags & IOMMU_FLAGS_BAD)
> + continue;
> + if (iommu->dte) {
> + acpiivhd_activate(iommu, act);
> + continue;
> + }
> + iommu->flags |= IOMMU_FLAGS_SUSPEND;
> + iommu_enable_translation(iommu, 0);
> + iommu_showcfg(iommu, -1);
> + }
> + break;
> + }
> + return (0);
> +}
> +
> +void
> +acpidmar_sw(int act)
> +{
> + acpidmar_activate((struct device *)acpidmar_sc, act);
> +}
> +
> +int
> +acpidmar_match(struct device *parent, void *match, void *aux)
> +{
> + struct acpi_attach_args *aaa = aux;
> + struct acpi_table_header *hdr;
> +
> + /* If we do not have a table, it is not us */
> + if (aaa->aaa_table == NULL)
> + return (0);
> +
> + /* If it is an DMAR table, we can attach */
> + hdr = (struct acpi_table_header *)aaa->aaa_table;
> + if (memcmp(hdr->signature, DMAR_SIG, sizeof(DMAR_SIG) - 1) == 0)
> + return (1);
> + if (memcmp(hdr->signature, IVRS_SIG, sizeof(IVRS_SIG) - 1) == 0)
> + return (1);
> +
> + return (0);
> +}
> +
> +void
> +acpidmar_attach(struct device *parent, struct device *self, void *aux)
> +{
> + struct acpidmar_softc *sc = (void *)self;
> + struct acpi_attach_args *aaa = aux;
> + struct acpi_dmar *dmar = (struct acpi_dmar *)aaa->aaa_table;
> + struct acpi_ivrs        *ivrs = (struct acpi_ivrs *)aaa->aaa_table;
> + struct acpi_table_header *hdr;
> +
> + hdr = (struct acpi_table_header *)aaa->aaa_table;
> + sc->sc_memt = aaa->aaa_memt;
> + if (memcmp(hdr->signature, DMAR_SIG, sizeof(DMAR_SIG) - 1) == 0) {
> + acpidmar_sc = sc;
> + acpidmar_init(sc, dmar);
> + }
> + if (memcmp(hdr->signature, IVRS_SIG, sizeof(IVRS_SIG) - 1) == 0) {
> + acpidmar_sc = sc;
> + acpiivrs_init(sc, ivrs);
> + }
> +}
> +
> +/* Interrupt shiz */
> +void acpidmar_msi_hwmask(struct pic *, int);
> +void acpidmar_msi_hwunmask(struct pic *, int);
> +void acpidmar_msi_addroute(struct pic *, struct cpu_info *, int, int, int);
> +void acpidmar_msi_delroute(struct pic *, struct cpu_info *, int, int, int);
> +
> +void
> +acpidmar_msi_hwmask(struct pic *pic, int pin)
> +{
> + struct iommu_pic *ip = (void *)pic;
> + struct iommu_softc *iommu = ip->iommu;
> +
> + printf("msi_hwmask\n");
> +
> + mtx_enter(&iommu->reg_lock);
> +
> + iommu_writel(iommu, DMAR_FECTL_REG, FECTL_IM);
> + iommu_readl(iommu, DMAR_FECTL_REG);
> +
> + mtx_leave(&iommu->reg_lock);
> +}
> +
> +void
> +acpidmar_msi_hwunmask(struct pic *pic, int pin)
> +{
> + struct iommu_pic *ip = (void *)pic;
> + struct iommu_softc *iommu = ip->iommu;
> +
> + printf("msi_hwunmask\n");
> +
> + mtx_enter(&iommu->reg_lock);
> +
> + iommu_writel(iommu, DMAR_FECTL_REG, 0);
> + iommu_readl(iommu, DMAR_FECTL_REG);
> +
> + mtx_leave(&iommu->reg_lock);
> +}
> +
> +void
> +acpidmar_msi_addroute(struct pic *pic, struct cpu_info *ci, int pin, int vec,
> +    int type)
> +{
> + struct iommu_pic *ip = (void *)pic;
> + struct iommu_softc *iommu = ip->iommu;
> +
> + mtx_enter(&iommu->reg_lock);
> +
> + iommu->fedata = vec;
> + iommu->feaddr = 0xfee00000L | (ci->ci_apicid << 12);
> + iommu_writel(iommu, DMAR_FEDATA_REG, vec);
> + iommu_writel(iommu, DMAR_FEADDR_REG, iommu->feaddr);
> + iommu_writel(iommu, DMAR_FEUADDR_REG, iommu->feaddr >> 32);
> +
> + mtx_leave(&iommu->reg_lock);
> +}
> +
> +void
> +acpidmar_msi_delroute(struct pic *pic, struct cpu_info *ci, int pin, int vec,
> +    int type)
> +{
> + printf("msi_delroute\n");
> +}
> +
> +void *
> +acpidmar_intr_establish(void *ctx, int level, int (*func)(void *),
> +    void *arg, const char *what)
> +{
> + struct iommu_softc *iommu = ctx;
> + struct pic *pic;
> +
> + pic = &iommu->pic.pic;
> + iommu->pic.iommu = iommu;
> +
> + strlcpy(pic->pic_dev.dv_xname, "dmarpic",
> + sizeof(pic->pic_dev.dv_xname));
> + pic->pic_type = PIC_MSI;
> + pic->pic_hwmask = acpidmar_msi_hwmask;
> + pic->pic_hwunmask = acpidmar_msi_hwunmask;
> + pic->pic_addroute = acpidmar_msi_addroute;
> + pic->pic_delroute = acpidmar_msi_delroute;
> + pic->pic_edge_stubs = ioapic_edge_stubs;
> +#ifdef MULTIPROCESSOR
> + mtx_init(&pic->pic_mutex, level);
> +#endif
> +
> + return intr_establish(-1, pic, 0, IST_PULSE, level, NULL, func, arg, what);
> +}
> +
> +int
> +acpidmar_intr(void *ctx)
> +{
> + struct iommu_softc *iommu = ctx;
> + struct fault_entry fe;
> + static struct fault_entry ofe;
> + int fro, nfr, fri, i;
> + uint32_t sts;
> +
> + //splassert(IPL_HIGH);
> +
> + if (!(iommu->gcmd & GCMD_TE)) {
> + return (1);
> + }
> + mtx_enter(&iommu->reg_lock);
> + sts = iommu_readl(iommu, DMAR_FECTL_REG);
> + sts = iommu_readl(iommu, DMAR_FSTS_REG);
> +
> + if (!(sts & FSTS_PPF)) {
> + mtx_leave(&iommu->reg_lock);
> + return (1);
> + }
> +
> + nfr = cap_nfr(iommu->cap);
> + fro = cap_fro(iommu->cap);
> + fri = (sts >> FSTS_FRI_SHIFT) & FSTS_FRI_MASK;
> + for (i = 0; i < nfr; i++) {
> + fe.hi = iommu_readq(iommu, fro + (fri*16) + 8);
> + if (!(fe.hi & FRCD_HI_F))
> + break;
> +
> + fe.lo = iommu_readq(iommu, fro + (fri*16));
> + if (ofe.hi != fe.hi || ofe.lo != fe.lo) {
> + iommu_showfault(iommu, fri, &fe);
> + ofe.hi = fe.hi;
> + ofe.lo = fe.lo;
> + }
> + fri = (fri + 1) % nfr;
> + }
> +
> + iommu_writel(iommu, DMAR_FSTS_REG, FSTS_PFO | FSTS_PPF);
> +
> + mtx_leave(&iommu->reg_lock);
> +
> + return (1);
> +}
> +
> +const char *vtd_faults[] = {
> + "Software",
> + "Root Entry Not Present", /* ok (rtaddr + 4096) */
> + "Context Entry Not Present", /* ok (no CTX_P) */
> + "Context Entry Invalid", /* ok (tt = 3) */
> + "Address Beyond MGAW",
> + "Write", /* ok */
> + "Read", /* ok */
> + "Paging Entry Invalid", /* ok */
> + "Root Table Invalid",
> + "Context Table Invalid",
> + "Root Entry Reserved",          /* ok (root.lo |= 0x4) */
> + "Context Entry Reserved",
> + "Paging Entry Reserved",
> + "Context Entry TT",
> + "Reserved",
> +};
> +
> +void iommu_showpte(uint64_t, int, uint64_t);
> +
> +void
> +iommu_showpte(uint64_t ptep, int lvl, uint64_t base)
> +{
> + uint64_t nb, pb, i;
> + struct pte_entry *pte;
> +
> + pte = (void *)PMAP_DIRECT_MAP(ptep);
> + for (i = 0; i < 512; i++) {
> + if (!(pte[i].val & PTE_P))
> + continue;
> + nb = base + (i << lvl);
> + pb = pte[i].val & ~VTD_PAGE_MASK;
> + if(lvl == VTD_LEVEL0) {
> + printf("   %3llx %.16llx = %.16llx %c%c %s\n",
> +    i, nb, pb,
> +    pte[i].val == PTE_R ? 'r' : ' ',
> +    pte[i].val & PTE_W ? 'w' : ' ',
> +    (nb == pb) ? " ident" : "");
> + if (nb == pb)
> + return;
> + } else {
> + iommu_showpte(pb, lvl - VTD_STRIDE_SIZE, nb);
> + }
> + }
> +}
> +
> +void
> +iommu_showcfg(struct iommu_softc *iommu, int sid)
> +{
> + int i, j, sts, cmd;
> + struct context_entry *ctx;
> + pcitag_t tag;
> + pcireg_t clc;
> +
> + cmd = iommu_readl(iommu, DMAR_GCMD_REG);
> + sts = iommu_readl(iommu, DMAR_GSTS_REG);
> + printf("iommu%d: flags:%d root pa:%.16llx %s %s %s %.8x %.8x\n",
> +    iommu->id, iommu->flags, iommu_readq(iommu, DMAR_RTADDR_REG),
> +    sts & GSTS_TES ? "enabled" : "disabled",
> +    sts & GSTS_QIES ? "qi" : "ccmd",
> +    sts & GSTS_IRES ? "ir" : "",
> +    cmd, sts);
> + for (i = 0; i < 256; i++) {
> + if (!root_entry_is_valid(&iommu->root[i])) {
> + continue;
> + }
> + for (j = 0; j < 256; j++) {
> + ctx = iommu->ctx[i] + j;
> + if (!context_entry_is_valid(ctx)) {
> + continue;
> + }
> + tag = pci_make_tag(NULL, i, (j >> 3), j & 0x7);
> + clc = pci_conf_read(NULL, tag, 0x08) >> 8;
> + printf("  %.2x:%.2x.%x lvl:%d did:%.4x tt:%d ptep:%.16llx flag:%x cc:%.6x\n",
> +    i, (j >> 3), j & 7,
> +    context_address_width(ctx),
> +    context_domain_id(ctx),
> +    context_translation_type(ctx),
> +    context_pte(ctx),
> +    context_user(ctx),
> +    clc);
> +#if 0
> + /* dump pagetables */
> + iommu_showpte(ctx->lo & ~VTD_PAGE_MASK, iommu->agaw -
> +    VTD_STRIDE_SIZE, 0);
> +#endif
> + }
> + }
> +}
> +
> +void
> +iommu_showfault(struct iommu_softc *iommu, int fri, struct fault_entry *fe)
> +{
> + int bus, dev, fun, type, fr, df;
> + bios_memmap_t *im;
> + const char *mapped;
> +
> + if (!(fe->hi & FRCD_HI_F))
> + return;
> + type = (fe->hi & FRCD_HI_T) ? 'r' : 'w';
> + fr = (fe->hi >> FRCD_HI_FR_SHIFT) & FRCD_HI_FR_MASK;
> + bus = (fe->hi >> FRCD_HI_BUS_SHIFT) & FRCD_HI_BUS_MASK;
> + dev = (fe->hi >> FRCD_HI_DEV_SHIFT) & FRCD_HI_DEV_MASK;
> + fun = (fe->hi >> FRCD_HI_FUN_SHIFT) & FRCD_HI_FUN_MASK;
> + df  = (fe->hi >> FRCD_HI_FUN_SHIFT) & 0xFF;
> + iommu_showcfg(iommu, mksid(bus,dev,fun));
> + if (!iommu->ctx[bus]) {
> + /* Bus is not initialized */
> + mapped = "nobus";
> + } else if (!context_entry_is_valid(&iommu->ctx[bus][df])) {
> + /* DevFn not initialized */
> + mapped = "nodevfn";
> + } else if (context_user(&iommu->ctx[bus][df]) != 0xA) {
> + /* no bus_space_map */
> + mapped = "nomap";
> + } else {
> + /* bus_space_map */
> + mapped = "mapped";
> + }
> + printf("fri%d: dmar: %.2x:%.2x.%x %s error at %llx fr:%d [%s] iommu:%d [%s]\n",
> +    fri, bus, dev, fun,
> +    type == 'r' ? "read" : "write",
> +    fe->lo,
> +    fr, fr <= 13 ? vtd_faults[fr] : "unknown",
> +    iommu->id,
> +    mapped);
> + for (im = bios_memmap; im->type != BIOS_MAP_END; im++) {
> + if ((im->type == BIOS_MAP_RES) &&
> +    (im->addr <= fe->lo) &&
> +    (fe->lo <= im->addr+im->size)) {
> + printf("mem in e820.reserved\n");
> + }
> + }
> +#ifdef DDB
> + if (acpidmar_ddb)
> + db_enter();
> +#endif
> +}
> +
> +
> diff --git a/sys/dev/acpi/acpidmar.h b/sys/dev/acpi/acpidmar.h
> new file mode 100644
> index 000000000..33659ecaf
> --- /dev/null
> +++ b/sys/dev/acpi/acpidmar.h
> @@ -0,0 +1,534 @@
> +/*
> + * Copyright (c) 2015 Jordan Hargrave <[hidden email]>
> + *
> + * Permission to use, copy, modify, and distribute this software for any
> + * purpose with or without fee is hereby granted, provided that the above
> + * copyright notice and this permission notice appear in all copies.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
> + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
> + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
> + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
> + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
> + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
> + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
> + */
> +
> +#ifndef _DEV_ACPI_DMARREG_H_
> +#define _DEV_ACPI_DMARREG_H_
> +
> +#define VTD_STRIDE_MASK 0x1FF
> +#define VTD_STRIDE_SIZE 9
> +#define VTD_PAGE_SIZE   4096
> +#define VTD_PAGE_MASK   0xFFF
> +#define VTD_PTE_MASK    0x0000FFFFFFFFF000LL
> +
> +#define VTD_LEVEL0 12
> +#define VTD_LEVEL1 21
> +#define VTD_LEVEL2 30 /* Minimum level supported */
> +#define VTD_LEVEL3 39 /* Also supported */
> +#define VTD_LEVEL4 48
> +#define VTD_LEVEL5 57
> +
> +#define _xbit(x,y) (((x)>> (y)) & 1)
> +#define _xfld(x,y) (uint32_t)(((x)>> y##_SHIFT) & y##_MASK)
> +
> +#define VTD_AWTOLEVEL(x)    (((x) - 30) / VTD_STRIDE_SIZE)
> +#define VTD_LEVELTOAW(x)    (((x) * VTD_STRIDE_SIZE) + 30)
> +
> +#define DMAR_VER_REG 0x00    /* 32:Arch version supported by this IOMMU */
> +#define DMAR_RTADDR_REG 0x20    /* 64:Root entry table */
> +#define DMAR_FEDATA_REG 0x3c    /* 32:Fault event interrupt data register */
> +#define DMAR_FEADDR_REG 0x40    /* 32:Fault event interrupt addr register */
> +#define DMAR_FEUADDR_REG 0x44    /* 32:Upper address register */
> +#define DMAR_AFLOG_REG 0x58    /* 64:Advanced Fault control */
> +#define DMAR_PMEN_REG 0x64    /* 32:Enable Protected Memory Region */
> +#define DMAR_PLMBASE_REG 0x68    /* 32:PMRR Low addr */
> +#define DMAR_PLMLIMIT_REG 0x6c    /* 32:PMRR low limit */
> +#define DMAR_PHMBASE_REG 0x70    /* 64:pmrr high base addr */
> +#define DMAR_PHMLIMIT_REG 0x78    /* 64:pmrr high limit */
> +#define DMAR_ICS_REG 0x9C    /* 32:Invalidation complete status register */
> +#define DMAR_IECTL_REG 0xa0    /* 32:Invalidation event control register */
> +#define DMAR_IEDATA_REG 0xa4    /* 32:Invalidation event data register */
> +#define DMAR_IEADDR_REG 0xa8    /* 32:Invalidation event address register */
> +#define DMAR_IEUADDR_REG 0xac    /* 32:Invalidation event upper address register */
> +#define DMAR_IRTA_REG 0xb8    /* 64:Interrupt remapping table addr register */
> +#define DMAR_CAP_REG 0x08    /* 64:Hardware supported capabilities */
> +#define   CAP_PI (1LL << 59)
> +#define   CAP_FL1GP (1LL << 56)
> +#define   CAP_DRD (1LL << 55)
> +#define   CAP_DWD (1LL << 54)
> +#define   CAP_MAMV_MASK 0x3F
> +#define   CAP_MAMV_SHIFT 48LL
> +#define   cap_mamv(x) _xfld(x,CAP_MAMV)
> +#define   CAP_NFR_MASK 0xFF
> +#define   CAP_NFR_SHIFT 40LL
> +#define   cap_nfr(x) (_xfld(x,CAP_NFR) + 1)
> +#define   CAP_PSI (1LL << 39)
> +#define   CAP_SLLPS_MASK 0xF
> +#define   CAP_SLLPS_SHIFT 34LL
> +#define   cap_sllps(x) _xfld(x,CAP_SLLPS)
> +#define   CAP_FRO_MASK 0x3FF
> +#define   CAP_FRO_SHIFT 24LL
> +#define   cap_fro(x) (_xfld(x,CAP_FRO) * 16)
> +#define   CAP_ZLR (1LL << 22)
> +#define   CAP_MGAW_MASK 0x3F
> +#define   CAP_MGAW_SHIFT 16LL
> +#define   cap_mgaw(x) (_xfld(x,CAP_MGAW) + 1)
> +#define   CAP_SAGAW_MASK 0x1F
> +#define   CAP_SAGAW_SHIFT 8LL
> +#define   cap_sagaw(x) _xfld(x,CAP_SAGAW)
> +#define   CAP_CM (1LL << 7)
> +#define   CAP_PHMR (1LL << 6)
> +#define   CAP_PLMR (1LL << 5)
> +#define   CAP_RWBF (1LL << 4)
> +#define   CAP_AFL (1LL << 3)
> +#define   CAP_ND_MASK 0x7
> +#define   CAP_ND_SHIFT 0x00
> +#define   cap_nd(x) (16 << (((x) & CAP_ND_MASK) << 1))
> +
> +#define DMAR_ECAP_REG 0x10 /* 64:Extended capabilities supported */
> +#define   ECAP_PSS_MASK 0x1F
> +#define   ECAP_PSS_SHIFT 35
> +#define   ECAP_EAFS (1LL << 34)
> +#define   ECAP_NWFS (1LL << 33)
> +#define   ECAP_SRS (1LL << 31)
> +#define   ECAP_ERS (1LL << 30)
> +#define   ECAP_PRS (1LL << 29)
> +#define   ECAP_PASID (1LL << 28)
> +#define   ECAP_DIS (1LL << 27)
> +#define   ECAP_NEST (1LL << 26)
> +#define   ECAP_MTS (1LL << 25)
> +#define   ECAP_ECS (1LL << 24)
> +#define   ECAP_MHMV_MASK 0xF
> +#define   ECAP_MHMV_SHIFT 0x20
> +#define   ecap_mhmv(x) _xfld(x,ECAP_MHMV)
> +#define   ECAP_IRO_MASK 0x3FF /* IOTLB Register */
> +#define   ECAP_IRO_SHIFT 0x8
> +#define   ecap_iro(x) (_xfld(x,ECAP_IRO) * 16)
> +#define   ECAP_SC (1LL << 7) /* Snoop Control */
> +#define   ECAP_PT (1LL << 6) /* HW Passthru */
> +#define   ECAP_EIM (1LL << 4)
> +#define   ECAP_IR (1LL << 3) /* Interrupt remap */
> +#define   ECAP_DT (1LL << 2) /* Device IOTLB */
> +#define   ECAP_QI (1LL << 1) /* Queued Invalidation */
> +#define   ECAP_C (1LL << 0) /* Coherent cache */
> +
> +#define DMAR_GCMD_REG 0x18 /* 32:Global command register */
> +#define   GCMD_TE (1LL << 31)
> +#define   GCMD_SRTP (1LL << 30)
> +#define   GCMD_SFL (1LL << 29)
> +#define   GCMD_EAFL (1LL << 28)
> +#define   GCMD_WBF (1LL << 27)
> +#define   GCMD_QIE (1LL << 26)
> +#define   GCMD_IRE (1LL << 25)
> +#define   GCMD_SIRTP (1LL << 24)
> +#define   GCMD_CFI (1LL << 23)
> +
> +#define DMAR_GSTS_REG 0x1c /* 32:Global status register */
> +#define   GSTS_TES (1LL << 31)
> +#define   GSTS_RTPS (1LL << 30)
> +#define   GSTS_FLS (1LL << 29)
> +#define   GSTS_AFLS (1LL << 28)
> +#define   GSTS_WBFS (1LL << 27)
> +#define   GSTS_QIES (1LL << 26)
> +#define   GSTS_IRES (1LL << 25)
> +#define   GSTS_IRTPS (1LL << 24)
> +#define   GSTS_CFIS (1LL << 23)
> +
> +#define DMAR_CCMD_REG 0x28 /* 64:Context command reg */
> +#define   CCMD_ICC (1LL << 63)
> +#define   CCMD_CIRG_MASK 0x3
> +#define   CCMD_CIRG_SHIFT 61
> +#define   CCMD_CIRG(x) ((uint64_t)(x) << CCMD_CIRG_SHIFT)
> +#define   CCMD_CAIG_MASK 0x3
> +#define   CCMD_CAIG_SHIFT 59
> +#define   CCMD_FM_MASK 0x3
> +#define   CCMD_FM_SHIFT 32
> +#define   CCMD_FM(x) (((uint64_t)(x) << CCMD_FM_SHIFT))
> +#define   CCMD_SID_MASK 0xFFFF
> +#define   CCMD_SID_SHIFT 8
> +#define   CCMD_SID(x) (((x) << CCMD_SID_SHIFT))
> +#define   CCMD_DID_MASK 0xFFFF
> +#define   CCMD_DID_SHIFT 0
> +#define   CCMD_DID(x) (((x) << CCMD_DID_SHIFT))
> +
> +#define CIG_GLOBAL CCMD_CIRG(CTX_GLOBAL)
> +#define CIG_DOMAIN CCMD_CIRG(CTX_DOMAIN)
> +#define CIG_DEVICE CCMD_CIRG(CTX_DEVICE)
> +
> +
> +#define DMAR_FSTS_REG 0x34 /* 32:Fault Status register */
> +#define   FSTS_FRI_MASK 0xFF
> +#define   FSTS_FRI_SHIFT 8
> +#define   FSTS_PRO (1LL << 7)
> +#define   FSTS_ITE (1LL << 6)
> +#define   FSTS_ICE (1LL << 5)
> +#define   FSTS_IQE (1LL << 4)
> +#define   FSTS_APF (1LL << 3)
> +#define   FSTS_APO (1LL << 2)
> +#define   FSTS_PPF (1LL << 1)
> +#define   FSTS_PFO (1LL << 0)
> +
> +#define DMAR_FECTL_REG 0x38 /* 32:Fault control register */
> +#define   FECTL_IM (1LL << 31)
> +#define   FECTL_IP (1LL << 30)
> +
> +#define FRCD_HI_F (1LL << (127-64))
> +#define FRCD_HI_T (1LL << (126-64))
> +#define FRCD_HI_AT_MASK 0x3
> +#define FRCD_HI_AT_SHIFT (124-64)
> +#define FRCD_HI_PV_MASK 0xFFFFF
> +#define FRCD_HI_PV_SHIFT (104-64)
> +#define FRCD_HI_FR_MASK 0xFF
> +#define FRCD_HI_FR_SHIFT (96-64)
> +#define FRCD_HI_PP (1LL << (95-64))
> +
> +#define FRCD_HI_SID_MASK 0xFF
> +#define FRCD_HI_SID_SHIFT 0
> +#define FRCD_HI_BUS_SHIFT 8
> +#define FRCD_HI_BUS_MASK 0xFF
> +#define FRCD_HI_DEV_SHIFT 3
> +#define FRCD_HI_DEV_MASK 0x1F
> +#define FRCD_HI_FUN_SHIFT 0
> +#define FRCD_HI_FUN_MASK 0x7
> +
> +#define DMAR_IOTLB_REG(x) (ecap_iro((x)->ecap) + 8)
> +#define DMAR_IVA_REG(x) (ecap_iro((x)->ecap) + 0)
> +
> +#define DMAR_FRIH_REG(x,i) (cap_fro((x)->cap) + 16*(i) + 8)
> +#define DMAR_FRIL_REG(x,i) (cap_fro((x)->cap) + 16*(i) + 0)
> +
> +#define IOTLB_IVT (1LL << 63)
> +#define IOTLB_IIRG_MASK 0x3
> +#define IOTLB_IIRG_SHIFT 60
> +#define IOTLB_IIRG(x) ((uint64_t)(x) << IOTLB_IIRG_SHIFT)
> +#define IOTLB_IAIG_MASK 0x3
> +#define IOTLB_IAIG_SHIFT 57
> +#define IOTLB_DR (1LL << 49)
> +#define IOTLB_DW (1LL << 48)
> +#define IOTLB_DID_MASK 0xFFFF
> +#define IOTLB_DID_SHIFT 32
> +#define IOTLB_DID(x) ((uint64_t)(x) << IOTLB_DID_SHIFT)
> +
> +#define IIG_GLOBAL IOTLB_IIRG(IOTLB_GLOBAL)
> +#define IIG_DOMAIN IOTLB_IIRG(IOTLB_DOMAIN)
> +#define IIG_PAGE IOTLB_IIRG(IOTLB_PAGE)
> +
> +#define DMAR_IQH_REG 0x80 /* 64:Invalidation queue head register */
> +#define DMAR_IQT_REG 0x88 /* 64:Invalidation queue tail register */
> +#define DMAR_IQA_REG 0x90 /* 64:Invalidation queue addr register */
> +#define IQA_QS_256 0 /* 256 entries */
> +#define IQA_QS_512 1 /* 512 */
> +#define IQA_QS_1K 2 /* 1024 */
> +#define IQA_QS_2K 3 /* 2048 */
> +#define IQA_QS_4K 4 /* 4096 */
> +#define IQA_QS_8K 5 /* 8192 */
> +#define IQA_QS_16K 6 /* 16384 */
> +#define IQA_QS_32K 7 /* 32768 */
> +
> +/* Read-Modify-Write helpers */
> +static inline void iommu_rmw32(void *ov, uint32_t mask, uint32_t shift, uint32_t nv)
> +{
> + *(uint32_t *)ov &= ~(mask << shift);
> + *(uint32_t *)ov |= (nv & mask) << shift;
> +}
> +static inline void iommu_rmw64(void *ov, uint32_t mask, uint32_t shift, uint64_t nv)
> +{
> + *(uint64_t *)ov &= ~(mask << shift);
> + *(uint64_t *)ov |= (nv & mask) << shift;
> +}
> +
> +/*
> + * Root Entry: one per bus (256 x 128 bit = 4k)
> + *   0        = Present
> + *   1:11     = Reserved
> + *   12:HAW-1 = Context Table Pointer
> + *   HAW:63   = Reserved
> + *   64:127   = Reserved
> + */
> +#define ROOT_P (1L << 0)
> +struct root_entry {
> + uint64_t lo;
> + uint64_t hi;
> +};
> +
> +/* Check if root entry is valid */
> +static inline bool
> +root_entry_is_valid(struct root_entry *re)
> +{
> + return (re->lo & ROOT_P);
> +}
> +
> +/*
> + * Context Entry: one per devfn (256 x 128 bit = 4k)
> + *   0      = Present
> + *   1      = Fault Processing Disable
> + *   2:3    = Translation Type
> + *   4:11   = Reserved
> + *   12:63  = Second Level Page Translation
> + *   64:66  = Address Width (# PTE levels)
> + *   67:70  = Ignore
> + *   71     = Reserved
> + *   72:87  = Domain ID
> + *   88:127 = Reserved
> + */
> +#define CTX_P (1L << 0)
> +#define CTX_FPD (1L << 1)
> +#define CTX_T_MASK 0x3
> +#define CTX_T_SHIFT 2
> +enum {
> + CTX_T_MULTI,
> + CTX_T_IOTLB,
> + CTX_T_PASSTHRU
> +};
> +
> +#define CTX_H_AW_MASK 0x7
> +#define CTX_H_AW_SHIFT 0
> +#define CTX_H_USER_MASK 0xF
> +#define CTX_H_USER_SHIFT 3
> +#define CTX_H_DID_MASK 0xFFFF
> +#define CTX_H_DID_SHIFT 8
> +
> +struct context_entry {
> + uint64_t lo;
> + uint64_t hi;
> +};
> +
> +/* Set fault processing enable/disable */
> +static inline void
> +context_set_fpd(struct context_entry *ce, int enable)
> +{
> + ce->lo &= ~CTX_FPD;
> + if (enable)
> + ce->lo |= CTX_FPD;
> +}
> +
> +/* Set context entry present */
> +static inline void
> +context_set_present(struct context_entry *ce)
> +{
> + ce->lo |= CTX_P;
> +}
> +
> +/* Set Second Level Page Table Entry PA */
> +static inline void
> +context_set_slpte(struct context_entry *ce, paddr_t slpte)
> +{
> + ce->lo &= VTD_PAGE_MASK;
> + ce->lo |= (slpte & ~VTD_PAGE_MASK);
> +}
> +
> +/* Set translation type */
> +static inline void
> +context_set_translation_type(struct context_entry *ce, int tt)
> +{
> + ce->lo &= ~(CTX_T_MASK << CTX_T_SHIFT);
> + ce->lo |= ((tt & CTX_T_MASK) << CTX_T_SHIFT);
> +}
> +
> +/* Set Address Width (# of Page Table levels) */
> +static inline void
> +context_set_address_width(struct context_entry *ce, int lvl)
> +{
> + ce->hi &= ~(CTX_H_AW_MASK << CTX_H_AW_SHIFT);
> + ce->hi |= ((lvl & CTX_H_AW_MASK) << CTX_H_AW_SHIFT);
> +}
> +
> +/* Set domain ID */
> +static inline void
> +context_set_domain_id(struct context_entry *ce, int did)
> +{
> + ce->hi &= ~(CTX_H_DID_MASK << CTX_H_DID_SHIFT);
> + ce->hi |= ((did & CTX_H_DID_MASK) << CTX_H_DID_SHIFT);
> +}
> +
> +/* Get Second Level Page Table PA */
> +static inline uint64_t
> +context_pte(struct context_entry *ce)
> +{
> + return (ce->lo & ~VTD_PAGE_MASK);
> +}
> +
> +/* Get translation type */
> +static inline int
> +context_translation_type(struct context_entry *ce)
> +{
> + return (ce->lo >> CTX_T_SHIFT) & CTX_T_MASK;
> +}
> +
> +/* Get domain ID */
> +static inline int
> +context_domain_id(struct context_entry *ce)
> +{
> + return (ce->hi >> CTX_H_DID_SHIFT) & CTX_H_DID_MASK;
> +}
> +
> +/* Get Address Width */
> +static inline int
> +context_address_width(struct context_entry *ce)
> +{
> + return VTD_LEVELTOAW((ce->hi >> CTX_H_AW_SHIFT) & CTX_H_AW_MASK);
> +}
> +
> +/* Check if context entry is valid */
> +static inline bool
> +context_entry_is_valid(struct context_entry *ce)
> +{
> + return (ce->lo & CTX_P);
> +}
> +
> +/* User-available bits in context entry */
> +static inline int
> +context_user(struct context_entry *ce)
> +{
> + return (ce->hi >> CTX_H_USER_SHIFT) & CTX_H_USER_MASK;
> +}
> +
> +static inline void
> +context_set_user(struct context_entry *ce, int v)
> +{
> + ce->hi &= ~(CTX_H_USER_MASK << CTX_H_USER_SHIFT);
> + ce->hi |=  ((v & CTX_H_USER_MASK) << CTX_H_USER_SHIFT);
> +}
> +
> +/*
> + * Fault entry
> + *   0..HAW-1 = Fault address
> + *   HAW:63   = Reserved
> + *   64:71    = Source ID
> + *   96:103   = Fault Reason
> + *   104:123  = PV
> + *   124:125  = Address Translation type
> + *   126      = Type (0 = Read, 1 = Write)
> + *   127      = Fault bit
> + */
> +struct fault_entry
> +{
> + uint64_t lo;
> + uint64_t hi;
> +};
> +
> +/* PTE Entry: 512 x 64-bit = 4k */
> +#define PTE_P (1L << 0)
> +#define PTE_R 0x00
> +#define PTE_W (1L << 1)
> +#define PTE_US  (1L << 2)
> +#define PTE_PWT (1L << 3)
> +#define PTE_PCD (1L << 4)
> +#define PTE_A   (1L << 5)
> +#define PTE_D   (1L << 6)
> +#define PTE_PAT (1L << 7)
> +#define PTE_G   (1L << 8)
> +#define PTE_EA  (1L << 10)
> +#define PTE_XD  (1LL << 63)
> +
> +/* PDE Level entry */
> +#define PTE_PS  (1L << 7)
> +
> +/* PDPE Level entry */
> +
> +/* ----------------------------------------------------------------
> + * 5555555444444444333333333222222222111111111000000000------------
> + * [PML4 ->] PDPE.1GB
> + * [PML4 ->] PDPE.PDE -> PDE.2MB
> + * [PML4 ->] PDPE.PDE -> PDE -> PTE
> + * GAW0 = (12.20) (PTE)
> + * GAW1 = (21.29) (PDE)
> + * GAW2 = (30.38) (PDPE)
> + * GAW3 = (39.47) (PML4)
> + * GAW4 = (48.57) (n/a)
> + * GAW5 = (58.63) (n/a)
> + */
> +struct pte_entry {
> + uint64_t val;
> +};
> +
> +/*
> + * Queued Invalidation entry
> + *  0:3   = 01h
> + *  4:5   = Granularity
> + *  6:15  = Reserved
> + *  16:31 = Domain ID
> + *  32:47 = Source ID
> + *  48:49 = FM
> + */
> +
> +/* Invalidate Context Entry */
> +#define QI_CTX_DID_MASK 0xFFFF
> +#define QI_CTX_DID_SHIFT 16
> +#define QI_CTX_SID_MASK 0xFFFF
> +#define QI_CTX_SID_SHIFT 32
> +#define QI_CTX_FM_MASK 0x3
> +#define QI_CTX_FM_SHIFT 48
> +#define QI_CTX_IG_MASK 0x3
> +#define QI_CTX_IG_SHIFT 4
> +#define QI_CTX_DID(x) (((uint64_t)(x) << QI_CTX_DID_SHIFT))
> +#define QI_CTX_SID(x) (((uint64_t)(x) << QI_CTX_SID_SHIFT))
> +#define QI_CTX_FM(x) (((uint64_t)(x) << QI_CTX_FM_SHIFT))
> +
> +#define QI_CTX_IG_GLOBAL (CTX_GLOBAL << QI_CTX_IG_SHIFT)
> +#define QI_CTX_IG_DOMAIN (CTX_DOMAIN << QI_CTX_IG_SHIFT)
> +#define QI_CTX_IG_DEVICE (CTX_DEVICE << QI_CTX_IG_SHIFT)
> +
> +/* Invalidate IOTLB Entry */
> +#define QI_IOTLB_DID_MASK 0xFFFF
> +#define QI_IOTLB_DID_SHIFT 16
> +#define QI_IOTLB_IG_MASK 0x3
> +#define QI_IOTLB_IG_SHIFT 4
> +#define QI_IOTLB_DR (1LL << 6)
> +#define QI_IOTLB_DW (1LL << 5)
> +#define QI_IOTLB_DID(x) (((uint64_t)(x) << QI_IOTLB_DID_SHIFT))
> +
> +#define QI_IOTLB_IG_GLOBAL (1 << QI_IOTLB_IG_SHIFT)
> +#define QI_IOTLB_IG_DOMAIN (2 << QI_IOTLB_IG_SHIFT)
> +#define QI_IOTLB_IG_PAGE (3 << QI_IOTLB_IG_SHIFT)
> +
> +/* QI Commands */
> +#define QI_CTX 0x1
> +#define QI_IOTLB 0x2
> +#define QI_DEVTLB 0x3
> +#define QI_INTR 0x4
> +#define QI_WAIT 0x5
> +#define QI_EXTTLB 0x6
> +#define QI_PAS 0x7
> +#define QI_EXTDEV 0x8
> +
> +struct qi_entry {
> + uint64_t lo;
> + uint64_t hi;
> +};
> +
> +enum {
> + CTX_GLOBAL = 1,
> + CTX_DOMAIN,
> + CTX_DEVICE,
> +
> + IOTLB_GLOBAL = 1,
> + IOTLB_DOMAIN,
> + IOTLB_PAGE,
> +};
> +
> +enum {
> + VTD_FAULT_ROOT_P = 0x1,         /* P field in root entry is 0 */
> + VTD_FAULT_CTX_P = 0x2,          /* P field in context entry is 0 */
> + VTD_FAULT_CTX_INVAL = 0x3,      /* context AW/TT/SLPPTR invalid */
> + VTD_FAULT_LIMIT = 0x4,          /* Address is outside of MGAW */
> + VTD_FAULT_WRITE = 0x5,          /* Address-translation fault, non-writable */
> + VTD_FAULT_READ = 0x6,           /* Address-translation fault, non-readable */
> + VTD_FAULT_PTE_INVAL = 0x7,      /* page table hw access error */
> + VTD_FAULT_ROOT_INVAL = 0x8,     /* root table hw access error */
> + VTD_FAULT_CTX_TBL_INVAL = 0x9,  /* context entry hw access error */
> + VTD_FAULT_ROOT_RESERVED = 0xa,  /* non-zero reserved field in root entry */
> + VTD_FAULT_CTX_RESERVED = 0xb,   /* non-zero reserved field in context entry */
> + VTD_FAULT_PTE_RESERVED = 0xc,   /* non-zero reserved field in paging entry */
> + VTD_FAULT_CTX_TT = 0xd,         /* invalid translation type */
> +};
> +
> +#endif
> +
> +void acpidmar_pci_hook(pci_chipset_tag_t, struct pci_attach_args *);
> +void dmar_ptmap(bus_dma_tag_t, bus_addr_t);
> +void acpidmar_sw(int);
> +
> +#define __EXTRACT(v,m) (((v) >> m##_SHIFT) & m##_MASK)
> diff --git a/sys/dev/acpi/acpireg.h b/sys/dev/acpi/acpireg.h
> index bfbb73ce2..8ba55c8ee 100644
> --- a/sys/dev/acpi/acpireg.h
> +++ b/sys/dev/acpi/acpireg.h
> @@ -623,6 +623,9 @@ struct acpi_ivmd {
>  struct acpi_ivhd {
>   uint8_t type;
>   uint8_t flags;
> +#define IVHD_PPRSUP (1L << 7)
> +#define IVHD_PREFSUP (1L << 6)
> +#define IVHD_COHERENT (1L << 5)
>  #define IVHD_IOTLB (1L << 4)
>  #define IVHD_ISOC (1L << 3)
>  #define IVHD_RESPASSPW (1L << 2)
> @@ -638,13 +641,28 @@ struct acpi_ivhd {
>  #define IVHD_UNITID_MASK 0x1F
>  #define IVHD_MSINUM_SHIFT 0
>  #define IVHD_MSINUM_MASK 0x1F
> - uint32_t reserved;
> + uint32_t feature;
>  } __packed;
>  
> +struct acpi_ivhd_ext {
> + uint8_t type;
> + uint8_t flags;
> + uint16_t length;
> + uint16_t devid;
> + uint16_t cap;
> + uint64_t address;
> + uint16_t segment;
> + uint16_t info;
> + uint32_t attrib;
> + uint64_t efr;
> + uint8_t reserved[8];
> +} __packd;
> +
>  union acpi_ivrs_entry {
>   struct {
>   uint8_t type;
>  #define IVRS_IVHD 0x10
> +#define IVRS_IVHD_EXT 0x11
>  #define IVRS_IVMD_ALL 0x20
>  #define IVRS_IVMD_SPECIFIED 0x21
>  #define IVRS_IVMD_RANGE 0x22
> @@ -652,6 +670,7 @@ union acpi_ivrs_entry {
>   uint16_t length;
>   } __packed;
>   struct acpi_ivhd ivhd;
> + struct acpi_ivhd_ext ivhd_ext;
>   struct acpi_ivmd ivmd;
>  } __packed;
>  
> diff --git a/sys/dev/acpi/amd_iommu.h b/sys/dev/acpi/amd_iommu.h
> new file mode 100644
> index 000000000..db6d371aa
> --- /dev/null
> +++ b/sys/dev/acpi/amd_iommu.h
> @@ -0,0 +1,358 @@
> +/*
> + * Copyright (c) 2019 Jordan Hargrave <[hidden email]>
> + *
> + * Permission to use, copy, modify, and distribute this software for any
> + * purpose with or without fee is hereby granted, provided that the above
> + * copyright notice and this permission notice appear in all copies.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
> + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
> + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
> + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
> + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
> + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
> + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
> + */
> +#ifndef __amd_iommu_h__
> +#define __amd_iommu_h__
> +
> +#define DEV_TAB_BASE_REG 0x0000
> +#define CMD_BASE_REG 0x0008
> +#define EVT_BASE_REG 0x0010
> +
> +#define EXCL_BASE_REG 0x0020
> +#define EXCL_LIMIT_REG 0x0028
> +
> +/* Extended Feature Register */
> +#define EXTFEAT_REG 0x0030
> +#define  EFR_PREFSUP (1L << 0)
> +#define  EFR_PPRSUP (1L << 1)
> +#define  EFR_NXSUP (1L << 3)
> +#define  EFR_GTSUP (1L << 4)
> +#define  EFR_IASUP (1L << 6)
> +#define  EFR_GASUP (1L << 7)
> +#define  EFR_HESUP (1L << 8)
> +#define  EFR_PCSUP (1L << 9)
> +#define  EFR_HATS_SHIFT 10
> +#define  EFR_HATS_MASK 0x3
> +#define  EFR_GATS_SHIFT 12
> +#define  EFR_GATS_MASK 0x3
> +#define  EFR_GLXSUP_SHIFT 14
> +#define  EFR_GLXSUP_MASK 0x3
> +#define  EFR_SMIFSUP_SHIFT 16
> +#define  EFR_SMIFSUP_MASK 0x3        
> +#define  EFR_SMIFRC_SHIFT 18
> +#define  EFR_SMIFRC_MASK 0x7
> +#define  EFR_GAMSUP_SHIFT 21
> +#define  EFR_GAMSUP_MASK 0x7
> +
> +#define CMD_HEAD_REG 0x2000
> +#define CMD_TAIL_REG 0x2008
> +#define EVT_HEAD_REG 0x2010
> +#define EVT_TAIL_REG 0x2018
> +
> +#define IOMMUSTS_REG 0x2020
> +
> +#define DEV_TAB_MASK 0x000FFFFFFFFFF000LL
> +#define DEV_TAB_LEN 0x1FF
> +
> +/* IOMMU Control */
> +#define IOMMUCTL_REG 0x0018
> +#define  CTL_IOMMUEN (1L << 0)
> +#define  CTL_HTTUNEN (1L << 1)
> +#define  CTL_EVENTLOGEN (1L << 2)
> +#define  CTL_EVENTINTEN (1L << 3)
> +#define  CTL_COMWAITINTEN (1L << 4)
> +#define  CTL_INVTIMEOUT_SHIFT 5
> +#define  CTL_INVTIMEOUT_MASK   0x7
> +#define  CTL_INVTIMEOUT_NONE 0
> +#define  CTL_INVTIMEOUT_1MS     1
> +#define  CTL_INVTIMEOUT_10MS    2
> +#define  CTL_INVTIMEOUT_100MS   3
> +#define  CTL_INVTIMEOUT_1S      4
> +#define  CTL_INVTIMEOUT_10S     5
> +#define  CTL_INVTIMEOUT_100S    6
> +#define  CTL_PASSPW (1L << 8)
> +#define  CTL_RESPASSPW (1L << 9)
> +#define  CTL_COHERENT (1L << 10)
> +#define  CTL_ISOC (1L << 11)
> +#define  CTL_CMDBUFEN (1L << 12)
> +#define  CTL_PPRLOGEN (1L << 13)
> +#define  CTL_PPRINTEN (1L << 14)
> +#define  CTL_PPREN (1L << 15)
> +#define  CTL_GTEN (1L << 16)
> +#define  CTL_GAEN (1L << 17)
> +#define  CTL_CRW_SHIFT 18
> +#define  CTL_CRW_MASK 0xF
> +#define  CTL_SMIFEN (1L << 22)
> +#define  CTL_SLFWBDIS (1L << 23)
> +#define  CTL_SMIFLOGEN (1L << 24)
> +#define  CTL_GAMEN_SHIFT 25
> +#define  CTL_GAMEN_MASK 0x7
> +#define  CTL_GALOGEN (1L << 28)
> +#define  CTL_GAINTEN (1L << 29)
> +#define  CTL_DUALPPRLOGEN_SHIFT 30
> +#define  CTL_DUALPPRLOGEN_MASK 0x3
> +#define  CTL_DUALEVTLOGEN_SHIFT 32
> +#define  CTL_DUALEVTLOGEN_MASK 0x3
> +#define  CTL_DEVTBLSEGEN_SHIFT 34
> +#define  CTL_DEVTBLSEGEN_MASK 0x7
> +#define  CTL_PRIVABRTEN_SHIFT 37
> +#define  CTL_PRIVABRTEN_MASK 0x3
> +#define  CTL_PPRAUTORSPEN (1LL << 39)
> +#define  CTL_MARCEN (1LL << 40)
> +#define  CTL_BLKSTOPMRKEN (1LL << 41)
> +#define  CTL_PPRAUTOSPAON (1LL << 42)
> +#define  CTL_DOMAINIDPNE (1LL << 43)
> +
> +#define CMD_BASE_MASK 0x000FFFFFFFFFF000LL
> +#define CMD_TBL_SIZE 4096
> +#define CMD_TBL_LEN_4K (8LL << 56)
> +#define CMD_TBL_LEN_8K (9lL << 56)
> +
> +#define EVT_BASE_MASK 0x000FFFFFFFFFF000LL
> +#define EVT_TBL_SIZE 4096
> +#define EVT_TBL_LEN_4K (8LL << 56)
> +#define EVT_TBL_LEN_8K (9LL << 56)
> +
> +/*========================
> + * DEVICE TABLE ENTRY
> + * Contains mapping of bus-device-function
> + *
> + *  0       Valid (V)
> + *  1       Translation Valid (TV)
> + *  7:8     Host Address Dirty (HAD)
> + *  9:11    Page Table Depth (usually 4)
> + *  12:51   Page Table Physical Address
> + *  52      PPR Enable
> + *  53      GPRP
> + *  54      Guest I/O Protection Valid (GIoV)
> + *  55      Guest Translation Valid (GV)
> + *  56:57   Guest Levels translated (GLX)
> + *  58:60   Guest CR3 bits 12:14 (GCR3TRP)
> + *  61      I/O Read Permission (IR)
> + *  62      I/O Write Permission (IW)
> + *  64:79   Domain ID
> + *  80:95   Guest CR3 bits 15:30 (GCR3TRP)
> + *  96      IOTLB Enable (I)
> + *  97      Suppress multiple I/O page faults (I)
> + *  98      Supress all I/O page faults (SA)
> + *  99:100  Port I/O Control (IoCTL)
> + *  101     Cache IOTLB Hint
> + *  102     Snoop Disable (SD)
> + *  103     Allow Exclusion (EX)
> + *  104:105 System Management Message (SysMgt)
> + *  107:127 Guest CR3 bits 31:51 (GCR3TRP)
> + *  128     Interrupt Map Valid (IV)
> + *  129:132 Interrupt Table Length (IntTabLen)
> + *========================*/
> +struct ivhd_dte {
> + uint32_t dw0;
> + uint32_t dw1;
> + uint32_t dw2;
> + uint32_t dw3;
> + uint32_t dw4;
> + uint32_t dw5;
> + uint32_t dw6;
> + uint32_t dw7;
> +} __packed;
> +
> +#define DTE_V (1L << 0) // dw0
> +#define DTE_TV (1L << 1) // dw0
> +#define DTE_LEVEL_SHIFT 9 // dw0
> +#define DTE_LEVEL_MASK 0x7 // dw0
> +#define DTE_HPTRP_MASK 0x000FFFFFFFFFF000LL // dw0,1
> +
> +#define DTE_PPR (1L << 20) // dw1
> +#define DTE_GPRP (1L << 21) // dw1
> +#define DTE_GIOV (1L << 22) // dw1
> +#define DTE_GV (1L << 23) // dw1
> +#define DTE_IR (1L << 29) // dw1
> +#define DTE_IW (1L << 30) // dw1
> +
> +#define DTE_DID_MASK 0xFFFF // dw2
> +
> +#define DTE_IV (1L << 0) // dw3
> +#define DTE_SE (1L << 1)
> +#define DTE_SA (1L << 2)
> +#define DTE_INTTABLEN_SHIFT 1
> +#define DTE_INTTABLEN_MASK 0xF
> +#define DTE_IRTP_MASK 0x000FFFFFFFFFFFC0LL
> +
> +#define PTE_LVL5                48
> +#define PTE_LVL4                39
> +#define PTE_LVL3                30
> +#define PTE_LVL2                21
> +#define PTE_LVL1                12
> +
> +#define PTE_NXTLVL(x)           (((x) & 0x7) << 9)
> +#define PTE_PADDR_MASK 0x000FFFFFFFFFF000LL
> +#define PTE_IR                  (1LL << 61)
> +#define PTE_IW                  (1LL << 62)
> +
> +#define DTE_GCR312_MASK 0x3
> +#define DTE_GCR312_SHIFT 24
> +
> +#define DTE_GCR315_MASK 0xFFFF
> +#define DTE_GCR315_SHIFT 16
> +
> +#define DTE_GCR331_MASK 0xFFFFF
> +#define DTE_GCR331_SHIFT 12
> +
> +#define _get64(x)   *(uint64_t *)(x)
> +#define _put64(x,v) *(uint64_t *)(x) = (v)
> +
> +/* Set Guest CR3 address */
> +static inline void
> +dte_set_guest_cr3(struct ivhd_dte *dte, paddr_t paddr)
> +{
> + iommu_rmw32(&dte->dw1, DTE_GCR312_MASK, DTE_GCR312_SHIFT, paddr >> 12);
> + iommu_rmw32(&dte->dw2, DTE_GCR315_MASK, DTE_GCR315_SHIFT, paddr >> 15);
> + iommu_rmw32(&dte->dw3, DTE_GCR331_MASK, DTE_GCR331_SHIFT, paddr >> 31);
> +}
> +
> +/* Set Interrupt Remapping Root Pointer */
> +static inline void
> +dte_set_interrupt_table_root_ptr(struct ivhd_dte *dte, paddr_t paddr)
> +{
> + uint64_t ov = _get64(&dte->dw4);
> + _put64(&dte->dw4, (ov & ~DTE_IRTP_MASK) | (paddr & DTE_IRTP_MASK));
> +}
> +
> +/* Set Interrupt Remapping Table length */
> +static inline void
> +dte_set_interrupt_table_length(struct ivhd_dte *dte, int nEnt)
> +{
> + iommu_rmw32(&dte->dw4, DTE_INTTABLEN_MASK, DTE_INTTABLEN_SHIFT, nEnt);
> +}
> +
> +/* Set Interrupt Remapping Valid */
> +static inline void
> +dte_set_interrupt_valid(struct ivhd_dte *dte)
> +{
> + dte->dw4 |= DTE_IV;
> +}
> +
> +/* Set Domain ID in Device Table Entry */
> +static inline void
> +dte_set_domain(struct ivhd_dte *dte, uint16_t did)
> +{
> + dte->dw2 = (dte->dw2 & ~DTE_DID_MASK) | (did & DTE_DID_MASK);
> +}
> +
> +/* Set Page Table Pointer for device */
> +static inline void
> +dte_set_host_page_table_root_ptr(struct ivhd_dte *dte, paddr_t paddr)
> +{
> + uint64_t ov;
> +
> + ov = _get64(&dte->dw0) & ~DTE_HPTRP_MASK;
> + ov |= (paddr & DTE_HPTRP_MASK) | PTE_IW | PTE_IR;
> +
> + _put64(&dte->dw0, ov);
> +}
> +
> +/* Set Page Table Levels Mask */
> +static inline void
> +dte_set_mode(struct ivhd_dte *dte, int mode)
> +{
> + iommu_rmw32(&dte->dw0, DTE_LEVEL_MASK, DTE_LEVEL_SHIFT, mode);
> +}
> +
> +static inline void
> +dte_set_tv(struct ivhd_dte *dte)
> +{
> + dte->dw0 |= DTE_TV;
> +}
> +
> +/* Set Device Table Entry valid.
> + * Domain/Level/Mode/PageTable should already be set
> + */
> +static inline void
> +dte_set_valid(struct ivhd_dte *dte)
> +{
> + dte->dw0 |= DTE_V;
> +}
> +
> +/* Check if Device Table Entry is valid */
> +static inline int
> +dte_is_valid(struct ivhd_dte *dte)
> +{
> + return (dte->dw0 & DTE_V);
> +}
> +
> +/*=========================================
> + * COMMAND
> + *=========================================*/
> +struct ivhd_command {
> + uint32_t dw0;
> + uint32_t dw1;
> + uint32_t dw2;
> + uint32_t dw3;
> +} __packed;
> +
> +#define CMD_SHIFT 28
> +
> +enum {
> + COMPLETION_WAIT = 0x01,
> + INVALIDATE_DEVTAB_ENTRY = 0x02,
> + INVALIDATE_IOMMU_PAGES = 0x03,
> + INVALIDATE_IOTLB_PAGES = 0x04,
> + INVALIDATE_INTERRUPT_TABLE = 0x05,
> + PREFETCH_IOMMU_PAGES = 0x06,
> + COMPLETE_PPR_REQUEST = 0x07,
> + INVALIDATE_IOMMU_ALL = 0x08,
> +};
> +
> +/*=========================================
> + * EVENT
> + *=========================================*/
> +struct ivhd_event {
> + uint32_t dw0;
> + uint32_t dw1;
> + uint32_t dw2;   // address.lo
> + uint32_t dw3; // address.hi
> +} __packed;
> +
> +#define EVT_TYPE_SHIFT 28       // dw1.0xF0000000
> +#define EVT_TYPE_MASK 0xF
> +#define EVT_SID_SHIFT 0        // dw0.0x0000FFFF
> +#define EVT_SID_MASK 0xFFFF
> +#define EVT_DID_SHIFT 0
> +#define EVT_DID_MASK 0xFFFF   // dw1.0x0000FFFF
> +#define EVT_FLAG_SHIFT   16
> +#define EVT_FLAG_MASK   0xFFF    // dw1.0x0FFF0000
> +
> +/* IOMMU Fault reasons */
> +enum {
> + ILLEGAL_DEV_TABLE_ENTRY = 0x1,
> + IO_PAGE_FAULT = 0x2,
> + DEV_TAB_HARDWARE_ERROR = 0x3,
> + PAGE_TAB_HARDWARE_ERROR = 0x4,
> + ILLEGAL_COMMAND_ERROR = 0x5,
> + COMMAND_HARDWARE_ERROR = 0x6,
> + IOTLB_INV_TIMEOUT = 0x7,
> + INVALID_DEVICE_REQUEST = 0x8,
> +};
> +
> +#define EVT_GN (1L << 16)
> +#define EVT_NX (1L << 17)
> +#define EVT_US (1L << 18)
> +#define EVT_I (1L << 19)
> +#define EVT_PR (1L << 20)
> +#define EVT_RW (1L << 21)
> +#define EVT_PE (1L << 22)
> +#define EVT_RZ (1L << 23)
> +#define EVT_TR (1L << 24)
> +
> +struct iommu_softc;
> +
> +int ivhd_flush_devtab(struct iommu_softc *, int);
> +int ivhd_invalidate_iommu_all(struct iommu_softc *);
> +int ivhd_invalidate_interrupt_table(struct iommu_softc *, int);
> +int ivhd_issue_command(struct iommu_softc *, const struct ivhd_command *, int);
> +int ivhd_invalidate_domain(struct iommu_softc *, int);
> +
> +void _dumppte(struct pte_entry *, int, vaddr_t);
> +
> +#endif
> diff --git a/sys/dev/acpi/files.acpi b/sys/dev/acpi/files.acpi
> index e57c39938..1cf6f2fbb 100644
> --- a/sys/dev/acpi/files.acpi
> +++ b/sys/dev/acpi/files.acpi
> @@ -70,6 +70,11 @@ device acpiprt
>  attach acpiprt at acpi
>  file dev/acpi/acpiprt.c acpiprt needs-flag
>  
> +# DMAR device
> +device acpidmar
> +attach acpidmar at acpi
> +file dev/acpi/acpidmar.c acpidmar
> +
>  # Docking station
>  device acpidock
>  attach acpidock at acpi
>
>

Reply | Threaded
Open this post in threaded view
|

Re: [PATCH] Add IOMMU support for Intel VT-d and AMD-Vi

Jordan Hargrave
On Thu, Sep 03, 2020 at 09:06:59PM +0200, Mark Kettenis wrote:

> > Date: Tue, 1 Sep 2020 17:20:19 -0500
> > From: Jordan Hargrave <[hidden email]>
> >
> > [PATCH] Add IOMMU support for Intel VT-d and AMD Vi
> >
> > This hooks each pci device and overrides bus_dmamap_xxx to issue
> > remap of DMA requests to virtual DMA space.  It protects devices
> > from issuing I/O requests to memory in the system that is outside
> > the requested DMA space.
> > ---
> >  sys/arch/amd64/conf/GENERIC          |    1 +
> >  sys/arch/amd64/conf/RAMDISK          |    1 +
> >  sys/arch/amd64/conf/RAMDISK_CD       |    1 +
> >  sys/arch/amd64/include/pci_machdep.h |    3 +-
> >  sys/arch/amd64/pci/pci_machdep.c     |   15 +-
> >  sys/dev/acpi/acpi.c                  |    5 +
> >  sys/dev/acpi/acpidmar.c              | 2988 ++++++++++++++++++++++++++
> >  sys/dev/acpi/acpidmar.h              |  534 +++++
> >  sys/dev/acpi/acpireg.h               |   21 +-
> >  sys/dev/acpi/amd_iommu.h             |  358 +++
> >  sys/dev/acpi/files.acpi              |    5 +
> >  sys/dev/pci/pci.c                    |   28 +
> >  sys/dev/pci/pcivar.h                 |    2 +
> >  13 files changed, 3959 insertions(+), 3 deletions(-)
> >  create mode 100644 sys/dev/acpi/acpidmar.c
> >  create mode 100644 sys/dev/acpi/acpidmar.h
> >  create mode 100644 sys/dev/acpi/amd_iommu.h
>
> This needs some further cleanup and style love.  But let's leave that
> alone for now.
>
> How much of this code is really shared between DMAR and IVRS?  It
> would be nice to split it out between those two if we can avoid code
> duplication.
>

Yes that could be possible, and have a common iommu attach function?
I wrote that Intel code like 5 years ago... then kinda bolted the
AMD stuff on top last year.

> iommu_writel(), iommu_readl(), iommu_writeq() etc., are a bit too
> Linuxy; iommu_write_4(), iommu_read_4(), iommu_write_8() would be
> better names.

Fair enough
>
> I don't fully grasp why you need acpidmar_intr_establish().  I can see
> that MSI interrupts from devices behind the IOMMU need to go through
> the IOMMU since they're essentially memory transaction.  But your code
> seems to only deal with the IOMMU's error interrupt.  Does the IOMMU
> interrupt itself go through the IOMMU as well?
>
The Intel interrupt is a bit weird. It's not on a PCI device, so
pci_map_msi or something similar doesn't work.  There isn't a vector
number or anything that's provided in the DMAR structure.  So I went
with what Linux was doing for establishing the fault handler IRQ.

> Why do you need to explicitly call acpidmar_sw()?  Naively I would
> think that you need to call this fairly late, but you call it before
> config_suspend_all(DVACT_SUSPEND) happens.  Is there a reason why this
> can't happen as part of normal config_suspend_all(DVACT_SUSPEND)
> processing?

I haven't looked at the suspend/resume in a long while. I did have it
working on Intel at one point, but only one system worked and years ago.
Suspend/Resume doesn't even work at all on my current laptops, even without
these patches.

>
> I think the way you use pci_probe_device_hook() is fine.
>
> What is the point of having function that start with an underscore?
> Feels like another Linux-ism to me...
>
> A few more random things in the code below...
>
>
> > diff --git a/sys/arch/amd64/conf/GENERIC b/sys/arch/amd64/conf/GENERIC
> > index 2c49f91a1..1eda12bc9 100644
> > --- a/sys/arch/amd64/conf/GENERIC
> > +++ b/sys/arch/amd64/conf/GENERIC
> > @@ -45,6 +45,7 @@ acpibtn* at acpi?
> >  acpicpu* at acpi?
> >  acpicmos* at acpi?
> >  acpidock* at acpi?
> > +acpidmar0 at acpi?
> >  acpiec* at acpi?
> >  acpipci* at acpi?
> >  acpiprt* at acpi?
> > diff --git a/sys/arch/amd64/conf/RAMDISK b/sys/arch/amd64/conf/RAMDISK
> > index 10148add1..7ab48f32e 100644
> > --- a/sys/arch/amd64/conf/RAMDISK
> > +++ b/sys/arch/amd64/conf/RAMDISK
> > @@ -34,6 +34,7 @@ acpipci* at acpi?
> >  acpiprt* at acpi?
> >  acpimadt0 at acpi?
> >  #acpitz* at acpi?
> > +acpidmar* at acpi? disable
> >  
> >  mpbios0 at bios0
> >  
> > diff --git a/sys/arch/amd64/conf/RAMDISK_CD b/sys/arch/amd64/conf/RAMDISK_CD
> > index 91022751e..82a24e210 100644
> > --- a/sys/arch/amd64/conf/RAMDISK_CD
> > +++ b/sys/arch/amd64/conf/RAMDISK_CD
> > @@ -48,6 +48,7 @@ sdhc* at acpi?
> >  acpihve* at acpi?
> >  chvgpio*        at acpi?
> >  glkgpio* at acpi?
> > +acpidmar* at acpi? disable
> >  
> >  mpbios0 at bios0
> >  
> > diff --git a/sys/arch/amd64/include/pci_machdep.h b/sys/arch/amd64/include/pci_machdep.h
> > index bc295cc22..c725bdc73 100644
> > --- a/sys/arch/amd64/include/pci_machdep.h
> > +++ b/sys/arch/amd64/include/pci_machdep.h
> > @@ -91,7 +91,8 @@ void *pci_intr_establish_cpu(pci_chipset_tag_t, pci_intr_handle_t,
> >      int, struct cpu_info *,
> >      int (*)(void *), void *, const char *);
> >  void pci_intr_disestablish(pci_chipset_tag_t, void *);
> > -#define pci_probe_device_hook(c, a) (0)
> > +int pci_probe_device_hook(pci_chipset_tag_t,
> > +    struct pci_attach_args *);
> >  
> >  void pci_dev_postattach(struct device *, struct pci_attach_args *);
> >  
> > diff --git a/sys/arch/amd64/pci/pci_machdep.c b/sys/arch/amd64/pci/pci_machdep.c
> > index cf4e835de..b700946a4 100644
> > --- a/sys/arch/amd64/pci/pci_machdep.c
> > +++ b/sys/arch/amd64/pci/pci_machdep.c
> > @@ -89,6 +89,11 @@
> >  #include <machine/mpbiosvar.h>
> >  #endif
> >  
> > +#include "acpi.h"
> > +#if NACPI > 0
> > +#include <dev/acpi/acpidmar.h>
> > +#endif
> > +
> >  /*
> >   * Memory Mapped Configuration space access.
> >   *
> > @@ -797,7 +802,15 @@ pci_init_extents(void)
> >   }
> >  }
> >  
> > -#include "acpi.h"
> > +int
> > +pci_probe_device_hook(pci_chipset_tag_t pc, struct pci_attach_args *pa)
> > +{
> > +#if NACPI > 0
> > + acpidmar_pci_hook(pc, pa);
> > +#endif
> > + return 0;
> > +}
> > +
> >  #if NACPI > 0
> >  void acpi_pci_match(struct device *, struct pci_attach_args *);
> >  pcireg_t acpi_pci_min_powerstate(pci_chipset_tag_t, pcitag_t);
> > diff --git a/sys/dev/acpi/acpi.c b/sys/dev/acpi/acpi.c
> > index a6239198e..ea11483ad 100644
> > --- a/sys/dev/acpi/acpi.c
> > +++ b/sys/dev/acpi/acpi.c
> > @@ -49,6 +49,7 @@
> >  #include <dev/acpi/amltypes.h>
> >  #include <dev/acpi/acpidev.h>
> >  #include <dev/acpi/dsdt.h>
> > +#include <dev/acpi/acpidmar.h>
> >  #include <dev/wscons/wsdisplayvar.h>
> >  
> >  #include <dev/pci/pcidevs.h>
> > @@ -2448,6 +2449,8 @@ acpi_sleep_pm(struct acpi_softc *sc, int state)
> >      sc->sc_fadt->pm2_cnt_blk && sc->sc_fadt->pm2_cnt_len)
> >   acpi_write_pmreg(sc, ACPIREG_PM2_CNT, 0, ACPI_PM2_ARB_DIS);
> >  
> > + acpidmar_sw(DVACT_SUSPEND);
> > +
> >   /* Write SLP_TYPx values */
> >   rega = acpi_read_pmreg(sc, ACPIREG_PM1A_CNT, 0);
> >   regb = acpi_read_pmreg(sc, ACPIREG_PM1B_CNT, 0);
> > @@ -2483,6 +2486,8 @@ acpi_resume_pm(struct acpi_softc *sc, int fromstate)
> >  {
> >   uint16_t rega, regb, en;
> >  
> > + acpidmar_sw(DVACT_RESUME);
> > +
> >   /* Write SLP_TYPx values */
> >   rega = acpi_read_pmreg(sc, ACPIREG_PM1A_CNT, 0);
> >   regb = acpi_read_pmreg(sc, ACPIREG_PM1B_CNT, 0);
> > diff --git a/sys/dev/acpi/acpidmar.c b/sys/dev/acpi/acpidmar.c
> > new file mode 100644
> > index 000000000..48506e1b1
> > --- /dev/null
> > +++ b/sys/dev/acpi/acpidmar.c
> > @@ -0,0 +1,2988 @@
> > +/*
> > + * Copyright (c) 2015 Jordan Hargrave <[hidden email]>
> > + *
> > + * Permission to use, copy, modify, and distribute this software for any
> > + * purpose with or without fee is hereby granted, provided that the above
> > + * copyright notice and this permission notice appear in all copies.
> > + *
> > + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
> > + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
> > + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
> > + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
> > + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
> > + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
> > + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
> > + */
> > +
> > +#include <sys/param.h>
> > +#include <sys/systm.h>
> > +#include <sys/kernel.h>
> > +#include <sys/device.h>
> > +#include <sys/malloc.h>
> > +#include <sys/queue.h>
> > +#include <sys/types.h>
> > +#include <sys/mbuf.h>
> > +#include <sys/proc.h>
> > +
> > +#include <uvm/uvm_extern.h>
> > +
> > +#include <machine/apicvar.h>
> > +#include <machine/biosvar.h>
> > +#include <machine/cpuvar.h>
> > +#include <machine/bus.h>
> > +
> > +#include <dev/acpi/acpireg.h>
> > +#include <dev/acpi/acpivar.h>
> > +#include <dev/acpi/acpidev.h>
> > +#include <dev/acpi/amltypes.h>
> > +#include <dev/acpi/dsdt.h>
> > +
> > +#include <uvm/uvm_extern.h>
> > +
> > +#include <machine/i8259.h>
> > +#include <machine/i82093reg.h>
> > +#include <machine/i82093var.h>
> > +#include <machine/i82489reg.h>
> > +#include <machine/i82489var.h>
> > +
> > +#include <machine/mpbiosvar.h>
> > +
> > +#include <dev/pci/pcireg.h>
> > +#include <dev/pci/pcivar.h>
> > +#include <dev/pci/pcidevs.h>
> > +#include <dev/pci/ppbreg.h>
> > +
> > +#include "ioapic.h"
> > +
> > +#include "acpidmar.h"
> > +#include "amd_iommu.h"
> > +
> > +#define dprintf(x...)
> > +
> > +#ifdef DDB
> > +int acpidmar_ddb = 0;
> > +#endif
> > +
> > +int intel_iommu_gfx_mapped = 0;
> > +int force_cm = 1;
> > +
> > +void showahci(void *);
> > +
> > +/* Page Table Entry per domain */
> > +struct iommu_softc;
> > +
> > +static inline int
> > +mksid(int b, int d, int f)
> > +{
> > + return (b << 8) + (d << 3) + f;
> > +}
> > +
> > +static inline int
> > +sid_devfn(int sid)
> > +{
> > + return sid & 0xff;
> > +}
> > +
> > +static inline int
> > +sid_bus(int sid)
> > +{
> > + return (sid >> 8) & 0xff;
> > +}
> > +
> > +static inline int
> > +sid_dev(int sid)
> > +{
> > + return (sid >> 3) & 0x1f;
> > +}
> > +
> > +static inline int
> > +sid_fun(int sid)
> > +{
> > + return (sid >> 0) & 0x7;
> > +}
> > +
> > +/* Page Table Entry per domain */
> > +static struct ivhd_dte hwdte[65536] __aligned(PAGE_SIZE);
> > +
> > +/* Alias mapping */
> > +#define SID_INVALID 0x80000000L
> > +static uint32_t sid_flag[65536];
>
> Can we avoid having these large arrays, or at least allocate them
> dynamically?  That would also avoid the explicit alignment which is
> somewhat nasty since it affects the entire kernel.

OK. But the hwdte does need the 2M area to be all contiguous but it is not
needed for DMAR/Intel.  You *can* have up to 8 different device table entries
though to split up the area.  

>
> > +
> > +struct domain_dev {
> > + int sid;
> > + int sec;
> > + int sub;
> > + TAILQ_ENTRY(domain_dev) link;
> > +};
> > +
> > +struct domain {
> > + struct iommu_softc *iommu;
> > + int did;
> > + int gaw;
> > + struct pte_entry *pte;
> > + paddr_t ptep;
> > + struct bus_dma_tag dmat;
> > + int flag;
> > +
> > + struct mutex            exlck;
> > + char exname[32];
> > + struct extent *iovamap;
> > + TAILQ_HEAD(,domain_dev) devices;
> > + TAILQ_ENTRY(domain) link;
> > +};
> > +
> > +#define DOM_DEBUG 0x1
> > +#define DOM_NOMAP 0x2
> > +
> > +struct dmar_devlist {
> > + int type;
> > + int bus;
> > + int ndp;
> > + struct acpidmar_devpath *dp;
> > + TAILQ_ENTRY(dmar_devlist) link;
> > +};
> > +
> > +TAILQ_HEAD(devlist_head, dmar_devlist);
> > +
> > +struct ivhd_devlist {
> > + int start_id;
> > + int end_id;
> > + int cfg;
> > + TAILQ_ENTRY(ivhd_devlist) link;
> > +};
> > +
> > +struct rmrr_softc {
> > + TAILQ_ENTRY(rmrr_softc) link;
> > + struct devlist_head devices;
> > + int segment;
> > + uint64_t start;
> > + uint64_t end;
> > +};
> > +
> > +struct atsr_softc {
> > + TAILQ_ENTRY(atsr_softc) link;
> > + struct devlist_head devices;
> > + int segment;
> > + int flags;
> > +};
> > +
> > +struct iommu_pic {
> > + struct pic pic;
> > + struct iommu_softc *iommu;
> > +};
> > +
> > +#define IOMMU_FLAGS_CATCHALL 0x1
> > +#define IOMMU_FLAGS_BAD 0x2
> > +#define IOMMU_FLAGS_SUSPEND 0x4
> > +
> > +struct iommu_softc {
> > + TAILQ_ENTRY(iommu_softc)link;
> > + struct devlist_head devices;
> > + int id;
> > + int flags;
> > + int segment;
> > +
> > + struct mutex reg_lock;
> > +
> > + bus_space_tag_t iot;
> > + bus_space_handle_t ioh;
> > +
> > + uint64_t cap;
> > + uint64_t ecap;
> > + uint32_t gcmd;
> > +
> > + int mgaw;
> > + int agaw;
> > + int ndoms;
> > +
> > + struct root_entry *root;
> > + struct context_entry *ctx[256];
> > +
> > + void *intr;
> > + struct iommu_pic pic;
> > + int fedata;
> > + uint64_t feaddr;
> > + uint64_t rtaddr;
> > +
> > + // Queued Invalidation
> > + int qi_head;
> > + int qi_tail;
> > + paddr_t qip;
> > + struct qi_entry *qi;
> > +
> > + struct domain *unity;
> > + TAILQ_HEAD(,domain) domains;
> > +
> > + // AMD iommu
> > + struct ivhd_dte         *dte;
> > + void *cmd_tbl;
> > + void *evt_tbl;
> > + paddr_t cmd_tblp;
> > + paddr_t evt_tblp;
> > + uint64_t wv[128] __aligned(4096);
>
> This wv array isn't used as far as I can tell.

Ah I was doing some testing on the commands.. I keep getting iommu command timeouts

>
> > +};
> > +
> > +static inline int iommu_bad(struct iommu_softc *sc)
> > +{
> > + return (sc->flags & IOMMU_FLAGS_BAD);
> > +}
> > +
> > +static inline int iommu_enabled(struct iommu_softc *sc)
> > +{
> > + if (sc->dte) {
> > + return 1;
> > + }
> > + return (sc->gcmd & GCMD_TE);
> > +}
> > +
> > +struct acpidmar_softc {
> > + struct device sc_dev;
> > +
> > + pci_chipset_tag_t sc_pc;
> > + bus_space_tag_t sc_memt;
> > + int sc_haw;
> > + int sc_flags;
> > +
> > + TAILQ_HEAD(,iommu_softc)sc_drhds;
> > + TAILQ_HEAD(,rmrr_softc) sc_rmrrs;
> > + TAILQ_HEAD(,atsr_softc) sc_atsrs;
> > +};
> > +
> > +int acpidmar_activate(struct device *, int);
> > +int acpidmar_match(struct device *, void *, void *);
> > +void acpidmar_attach(struct device *, struct device *, void *);
> > +struct domain   *acpidmar_pci_attach(struct acpidmar_softc *, int, int, int);
> > +
> > +struct cfattach acpidmar_ca = {
> > + sizeof(struct acpidmar_softc), acpidmar_match, acpidmar_attach,
> > +};
> > +
> > +struct cfdriver acpidmar_cd = {
> > + NULL, "acpidmar", DV_DULL
> > +};
> > +
> > +struct acpidmar_softc *acpidmar_sc;
> > +int acpidmar_intr(void *);
> > +int acpiivhd_intr(void *);
> > +
> > +#define DID_UNITY 0x1
> > +
> > +void _dumppte(struct pte_entry *, int, vaddr_t);
> > +
> > +struct domain *domain_create(struct iommu_softc *, int);
> > +struct domain *domain_lookup(struct acpidmar_softc *, int, int);
> > +
> > +void domain_unload_map(struct domain *, bus_dmamap_t);
> > +void domain_load_map(struct domain *, bus_dmamap_t, int, int, const char *);
> > +
> > +void (*domain_map_page)(struct domain *, vaddr_t, paddr_t, uint64_t);
> > +void domain_map_page_amd(struct domain *, vaddr_t, paddr_t, uint64_t);
> > +void domain_map_page_intel(struct domain *, vaddr_t, paddr_t, uint64_t);
> > +void domain_map_pthru(struct domain *, paddr_t, paddr_t);
> > +
> > +void acpidmar_pci_hook(pci_chipset_tag_t, struct pci_attach_args *);
> > +void acpidmar_parse_devscope(union acpidmar_entry *, int, int,
> > +    struct devlist_head *);
> > +int  acpidmar_match_devscope(struct devlist_head *, pci_chipset_tag_t, int);
> > +
> > +void acpidmar_init(struct acpidmar_softc *, struct acpi_dmar *);
> > +void acpidmar_drhd(struct acpidmar_softc *, union acpidmar_entry *);
> > +void acpidmar_rmrr(struct acpidmar_softc *, union acpidmar_entry *);
> > +void acpidmar_atsr(struct acpidmar_softc *, union acpidmar_entry *);
> > +void acpiivrs_init(struct acpidmar_softc *, struct acpi_ivrs *);
> > +
> > +void *acpidmar_intr_establish(void *, int, int (*)(void *), void *,
> > +    const char *);
> > +
> > +void iommu_writel(struct iommu_softc *, int, uint32_t);
> > +uint32_t iommu_readl(struct iommu_softc *, int);
> > +void iommu_writeq(struct iommu_softc *, int, uint64_t);
> > +uint64_t iommu_readq(struct iommu_softc *, int);
> > +void iommu_showfault(struct iommu_softc *, int,
> > +    struct fault_entry *);
> > +void iommu_showcfg(struct iommu_softc *, int);
> > +
> > +int iommu_init(struct acpidmar_softc *, struct iommu_softc *,
> > +    struct acpidmar_drhd *);
> > +int iommu_enable_translation(struct iommu_softc *, int);
> > +void iommu_enable_qi(struct iommu_softc *, int);
> > +void iommu_flush_cache(struct iommu_softc *, void *, size_t);
> > +void *iommu_alloc_page(struct iommu_softc *, paddr_t *);
> > +void iommu_flush_write_buffer(struct iommu_softc *);
> > +void iommu_issue_qi(struct iommu_softc *, struct qi_entry *);
> > +
> > +void iommu_flush_ctx(struct iommu_softc *, int, int, int, int);
> > +void iommu_flush_ctx_qi(struct iommu_softc *, int, int, int, int);
> > +void iommu_flush_tlb(struct iommu_softc *, int, int);
> > +void iommu_flush_tlb_qi(struct iommu_softc *, int, int);
> > +
> > +void iommu_set_rtaddr(struct iommu_softc *, paddr_t);
> > +
> > +const char *dmar_bdf(int);
> > +
> > +const char *
> > +dmar_bdf(int sid)
> > +{
> > + static char bdf[32];
> > +
> > + snprintf(bdf, sizeof(bdf), "%.4x:%.2x:%.2x.%x", 0,
> > +    sid_bus(sid), sid_dev(sid), sid_fun(sid));
> > +
> > + return (bdf);
> > +}
> > +
> > +/* busdma */
> > +static int dmar_dmamap_create(bus_dma_tag_t, bus_size_t, int, bus_size_t,
> > +    bus_size_t, int, bus_dmamap_t *);
> > +static void dmar_dmamap_destroy(bus_dma_tag_t, bus_dmamap_t);
> > +static int dmar_dmamap_load(bus_dma_tag_t, bus_dmamap_t, void *, bus_size_t,
> > +    struct proc *, int);
> > +static int dmar_dmamap_load_mbuf(bus_dma_tag_t, bus_dmamap_t, struct mbuf *,
> > +    int);
> > +static int dmar_dmamap_load_uio(bus_dma_tag_t, bus_dmamap_t, struct uio *, int);
> > +static int dmar_dmamap_load_raw(bus_dma_tag_t, bus_dmamap_t,
> > +    bus_dma_segment_t *, int, bus_size_t, int);
> > +static void dmar_dmamap_unload(bus_dma_tag_t, bus_dmamap_t);
> > +static void dmar_dmamap_sync(bus_dma_tag_t, bus_dmamap_t, bus_addr_t,
> > +    bus_size_t, int);
> > +static int dmar_dmamem_alloc(bus_dma_tag_t, bus_size_t, bus_size_t, bus_size_t,
> > +    bus_dma_segment_t *, int, int *, int);
> > +static void dmar_dmamem_free(bus_dma_tag_t, bus_dma_segment_t *, int);
> > +static int dmar_dmamem_map(bus_dma_tag_t, bus_dma_segment_t *, int, size_t,
> > +    caddr_t *, int);
> > +static void dmar_dmamem_unmap(bus_dma_tag_t, caddr_t, size_t);
> > +static paddr_t dmar_dmamem_mmap(bus_dma_tag_t, bus_dma_segment_t *, int, off_t,
> > +    int, int);
> > +
> > +static void dmar_dumpseg(bus_dma_tag_t, int, bus_dma_segment_t *, const char *);
> > +const char *dom_bdf(struct domain *dom);
> > +void domain_map_check(struct domain *dom);
> > +
> > +struct pte_entry *pte_lvl(struct iommu_softc *iommu, struct pte_entry *npte, vaddr_t va, int shift, uint64_t flags);
> > +int  ivhd_poll_events(struct iommu_softc *iommu);
> > +void ivhd_showit(struct iommu_softc *);
> > +void ivhd_showdte(void);
> > +void ivhd_showcmd(struct iommu_softc *);
> > +
> > +static inline int
> > +debugme(struct domain *dom)
> > +{
> > + return 0;
> > + return (dom->flag & DOM_DEBUG);
> > +}
> > +
> > +void
> > +domain_map_check(struct domain *dom)
> > +{
> > + struct iommu_softc *iommu;
> > + struct domain_dev *dd;
> > + struct context_entry *ctx;
> > + int v;
> > +
> > + iommu = dom->iommu;
> > + TAILQ_FOREACH(dd, &dom->devices, link) {
> > + acpidmar_pci_attach(acpidmar_sc, iommu->segment, dd->sid, 1);
> > +
> > + if (iommu->dte)
> > + continue;
> > +
> > + /* Check if this is the first time we are mapped */
> > + ctx = &iommu->ctx[sid_bus(dd->sid)][sid_devfn(dd->sid)];
> > + v = context_user(ctx);
> > + if (v != 0xA) {
> > + printf("  map: %.4x:%.2x:%.2x.%x iommu:%d did:%.4x\n",
> > +    iommu->segment,
> > +    sid_bus(dd->sid),
> > +    sid_dev(dd->sid),
> > +    sid_fun(dd->sid),
> > +    iommu->id,
> > +    dom->did);
> > + context_set_user(ctx, 0xA);
> > + }
> > + }
> > +}
> > +
> > +/* Map a single page as passthrough - used for DRM */
> > +void
> > +dmar_ptmap(bus_dma_tag_t tag, bus_addr_t addr)
> > +{
> > + struct domain *dom = tag->_cookie;
> > +
> > + if (!acpidmar_sc)
> > + return;
> > + domain_map_check(dom);
> > + domain_map_page(dom, addr, addr, PTE_P | PTE_R | PTE_W);
> > +}
> > +
> > +/* Map a range of pages 1:1 */
> > +void
> > +domain_map_pthru(struct domain *dom, paddr_t start, paddr_t end)
> > +{
> > + domain_map_check(dom);
> > + while (start < end) {
> > + domain_map_page(dom, start, start, PTE_P | PTE_R | PTE_W);
> > + start += VTD_PAGE_SIZE;
> > + }
> > +}
> > +
> > +/* Map a single paddr to IOMMU paddr */
> > +void
> > +domain_map_page_intel(struct domain *dom, vaddr_t va, paddr_t pa, uint64_t flags)
> > +{
> > + paddr_t paddr;
> > + struct pte_entry *pte, *npte;
> > + int lvl, idx;
> > + struct iommu_softc *iommu;
> > +
> > + iommu = dom->iommu;
> > + /* Insert physical address into virtual address map
> > + * XXX: could we use private pmap here?
> > + * essentially doing a pmap_enter(map, va, pa, prot);
> > + */
> > +
> > + /* Only handle 4k pages for now */
> > + npte = dom->pte;
> > + for (lvl = iommu->agaw - VTD_STRIDE_SIZE; lvl>= VTD_LEVEL0;
> > +    lvl -= VTD_STRIDE_SIZE) {
> > + idx = (va >> lvl) & VTD_STRIDE_MASK;
> > + pte = &npte[idx];
> > + if (lvl == VTD_LEVEL0) {
> > + /* Level 1: Page Table - add physical address */
> > + pte->val = pa | flags;
> > + iommu_flush_cache(iommu, pte, sizeof(*pte));
> > + break;
> > + } else if (!(pte->val & PTE_P)) {
> > + /* Level N: Point to lower level table */
> > + iommu_alloc_page(iommu, &paddr);
> > + pte->val = paddr | PTE_P | PTE_R | PTE_W;
> > + iommu_flush_cache(iommu, pte, sizeof(*pte));
> > + }
> > + npte = (void *)PMAP_DIRECT_MAP((pte->val & VTD_PTE_MASK));
> > + }
> > +}
> > +
> > +/* Map a single paddr to IOMMU paddr: AMD
> > + * physical address breakdown into levels:
> > + * xxxxxxxx.xxxxxxxx.xxxxxxxx.xxxxxxxx.xxxxxxxx.xxxxxxxx.xxxxxxxx.xxxxxxxx
> > + *        5.55555555.44444444.43333333,33222222.22211111.1111----.--------
> > + * mode:
> > + *  000 = none   shift
> > + *  001 = 1 [21].12
> > + *  010 = 2 [30].21
> > + *  011 = 3 [39].30
> > + *  100 = 4 [48].39
> > + *  101 = 5 [57]
> > + *  110 = 6
> > + *  111 = reserved
> > + */
> > +struct pte_entry *pte_lvl(struct iommu_softc *iommu, struct pte_entry *pte, vaddr_t va,
> > + int shift, uint64_t flags)
> > +{
> > + paddr_t paddr;
> > + int idx;
> > +
> > + idx = (va >> shift) & VTD_STRIDE_MASK;
> > + if (!(pte[idx].val & PTE_P)) {
> > + /* Page Table entry is not present... create a new page entry */
> > + iommu_alloc_page(iommu, &paddr);
> > + pte[idx].val = paddr | flags;
> > + iommu_flush_cache(iommu, &pte[idx], sizeof(pte[idx]));
> > + }
> > + return (void *)PMAP_DIRECT_MAP((pte[idx].val & PTE_PADDR_MASK));
> > +}
> > +
> > +void
> > +domain_map_page_amd(struct domain *dom, vaddr_t va, paddr_t pa, uint64_t flags)
> > +{
> > + struct pte_entry *pte;
> > + struct iommu_softc *iommu;
> > + int idx;
> > +
> > + iommu = dom->iommu;
> > + /* Insert physical address into virtual address map
> > + * XXX: could we use private pmap here?
> > + * essentially doing a pmap_enter(map, va, pa, prot);
> > + */
> > +
> > + /* Always assume AMD levels=4                           */
> > + /*        39        30        21        12              */
> > + /* ---------|---------|---------|---------|------------ */
> > + pte = dom->pte;
> > + //pte = pte_lvl(iommu, pte, va, 39, PTE_NXTLVL(3) | PTE_IR | PTE_IW | PTE_P);
> > + pte = pte_lvl(iommu, pte, va, 30, PTE_NXTLVL(2) | PTE_IR | PTE_IW | PTE_P);
> > + pte = pte_lvl(iommu, pte, va, 21, PTE_NXTLVL(1) | PTE_IR | PTE_IW | PTE_P);
> > + //pte = pte_lvl(iommu, pte, va, 12, PTE_NXTLVL(7) | PTE_IR | PTE_IW | PTE_P);
> > +
> > + if (flags)
> > + flags = PTE_P | PTE_R | PTE_W | PTE_IW | PTE_IR | PTE_NXTLVL(0);
> > +
> > + /* Level 1: Page Table - add physical address */
> > + idx = (va >> 12) & 0x1FF;
> > + pte[idx].val = pa | flags;
> > +
> > + iommu_flush_cache(iommu, pte, sizeof(*pte));
> > +}
> > +
> > +static void
> > +dmar_dumpseg(bus_dma_tag_t tag, int nseg, bus_dma_segment_t *segs,
> > +    const char *lbl)
> > +{
> > + struct domain *dom = tag->_cookie;
> > + int i;
> > +
> > + return;
> > + if (!debugme(dom))
> > + return;
> > + printf("%s: %s\n", lbl, dom_bdf(dom));
> > + for (i = 0; i < nseg; i++) {
> > + printf("  %.16llx %.8x\n",
> > +    (uint64_t)segs[i].ds_addr,
> > +    (uint32_t)segs[i].ds_len);
> > + }
> > +}
> > +
> > +/* Unload mapping */
> > +void
> > +domain_unload_map(struct domain *dom, bus_dmamap_t dmam)
> > +{
> > + bus_dma_segment_t *seg;
> > + paddr_t base, end, idx;
> > + psize_t alen;
> > + int i;
> > +
> > + if (iommu_bad(dom->iommu)) {
> > + printf("unload map no iommu\n");
> > + return;
> > + }
> > +
> > + //acpidmar_intr(dom->iommu);
> > + for (i = 0; i < dmam->dm_nsegs; i++) {
> > + seg  = &dmam->dm_segs[i];
> > +
> > + base = trunc_page(seg->ds_addr);
> > + end  = roundup(seg->ds_addr + seg->ds_len, VTD_PAGE_SIZE);
> > + alen = end - base;
> > +
> > + if (debugme(dom)) {
> > + printf("  va:%.16llx len:%x\n",
> > +    (uint64_t)base, (uint32_t)alen);
> > + }
> > +
> > + /* Clear PTE */
> > + for (idx = 0; idx < alen; idx += VTD_PAGE_SIZE)
> > + domain_map_page(dom, base + idx, 0, 0);
> > +
> > + if (dom->flag & DOM_NOMAP) {
> > + //printf("%s: nomap %.16llx\n", dom_bdf(dom), (uint64_t)base);
> > + continue;
> > + }
> > +
> > + mtx_enter(&dom->exlck);
> > + if (extent_free(dom->iovamap, base, alen, EX_NOWAIT)) {
> > + panic("domain_unload_map: extent_free");
> > + }
> > + mtx_leave(&dom->exlck);
> > + }
> > +}
> > +
> > +/* map.segs[x].ds_addr is modified to IOMMU virtual PA */
> > +void
> > +domain_load_map(struct domain *dom, bus_dmamap_t map, int flags, int pteflag, const char *fn)
> > +{
> > + bus_dma_segment_t *seg;
> > + struct iommu_softc *iommu;
> > + paddr_t base, end, idx;
> > + psize_t alen;
> > + u_long res;
> > + int i;
> > +
> > + iommu = dom->iommu;
> > + if (!iommu_enabled(iommu)) {
> > + /* Lazy enable translation when required */
> > + if (iommu_enable_translation(iommu, 1)) {
> > + return;
> > + }
> > + }
> > + domain_map_check(dom);
> > + //acpidmar_intr(iommu);
> > + for (i = 0; i < map->dm_nsegs; i++) {
> > + seg = &map->dm_segs[i];
> > +
> > + base = trunc_page(seg->ds_addr);
> > + end  = roundup(seg->ds_addr + seg->ds_len, VTD_PAGE_SIZE);
> > + alen = end - base;
> > + res  = base;
> > +
> > + if (dom->flag & DOM_NOMAP) {
> > + goto nomap;
> > + }
> > +
> > + /* Allocate DMA Virtual Address */
> > + mtx_enter(&dom->exlck);
> > + if (extent_alloc(dom->iovamap, alen, VTD_PAGE_SIZE, 0,
> > +    map->_dm_boundary, EX_NOWAIT, &res)) {
> > + panic("domain_load_map: extent_alloc");
> > + }
> > + if (res == -1) {
> > + panic("got -1 address\n");
> > + }
> > + mtx_leave(&dom->exlck);
> > +
> > + /* Reassign DMA address */
> > + seg->ds_addr = res | (seg->ds_addr & VTD_PAGE_MASK);
> > +nomap:
> > + if (debugme(dom)) {
> > + printf("  LOADMAP: %.16llx %x => %.16llx\n",
> > +    (uint64_t)seg->ds_addr, (uint32_t)seg->ds_len,
> > +    (uint64_t)res);
> > + }
> > + for (idx = 0; idx < alen; idx += VTD_PAGE_SIZE) {
> > + domain_map_page(dom, res + idx, base + idx,
> > +    PTE_P | pteflag);
> > + }
> > + }
> > + if ((iommu->cap & CAP_CM) || force_cm) {
> > + iommu_flush_tlb(iommu, IOTLB_DOMAIN, dom->did);
> > + } else {
> > + iommu_flush_write_buffer(iommu);
> > + }
> > +}
> > +
> > +const char *
> > +dom_bdf(struct domain *dom)
> > +{
> > + struct domain_dev *dd;
> > + static char mmm[48];
> > +
> > + dd = TAILQ_FIRST(&dom->devices);
> > + snprintf(mmm, sizeof(mmm), "%s iommu:%d did:%.4x%s",
> > +    dmar_bdf(dd->sid), dom->iommu->id, dom->did,
> > +    dom->did == DID_UNITY ? " [unity]" : "");
> > + return (mmm);
> > +}
> > +
> > +/* Bus DMA Map functions */
> > +static int
> > +dmar_dmamap_create(bus_dma_tag_t tag, bus_size_t size, int nsegments,
> > +    bus_size_t maxsegsz, bus_size_t boundary, int flags, bus_dmamap_t *dmamp)
> > +{
> > + int rc;
> > +
> > + rc = _bus_dmamap_create(tag, size, nsegments, maxsegsz, boundary,
> > +    flags, dmamp);
> > + if (!rc) {
> > + dmar_dumpseg(tag, (*dmamp)->dm_nsegs, (*dmamp)->dm_segs,
> > +    __FUNCTION__);
> > + }
> > + return (rc);
> > +}
> > +
> > +static void
> > +dmar_dmamap_destroy(bus_dma_tag_t tag, bus_dmamap_t dmam)
> > +{
> > + dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs, __FUNCTION__);
> > + _bus_dmamap_destroy(tag, dmam);
> > +}
> > +
> > +static int
> > +dmar_dmamap_load(bus_dma_tag_t tag, bus_dmamap_t dmam, void *buf,
> > +    bus_size_t buflen, struct proc *p, int flags)
> > +{
> > + struct domain *dom = tag->_cookie;
> > + int rc;
> > +
> > + rc = _bus_dmamap_load(tag, dmam, buf, buflen, p, flags);
> > + if (!rc) {
> > + dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs,
> > +    __FUNCTION__);
> > + domain_load_map(dom, dmam, flags, PTE_R|PTE_W, __FUNCTION__);
> > + dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs,
> > +    __FUNCTION__);
> > + }
> > + return (rc);
> > +}
> > +
> > +static int
> > +dmar_dmamap_load_mbuf(bus_dma_tag_t tag, bus_dmamap_t dmam, struct mbuf *chain,
> > +    int flags)
> > +{
> > + struct domain *dom = tag->_cookie;
> > + int rc;
> > +
> > + rc = _bus_dmamap_load_mbuf(tag, dmam, chain, flags);
> > + if (!rc) {
> > + dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs,
> > +    __FUNCTION__);
> > + domain_load_map(dom, dmam, flags, PTE_R|PTE_W,__FUNCTION__);
> > + dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs,
> > +    __FUNCTION__);
> > + }
> > + return (rc);
> > +}
> > +
> > +static int
> > +dmar_dmamap_load_uio(bus_dma_tag_t tag, bus_dmamap_t dmam, struct uio *uio,
> > +    int flags)
> > +{
> > + struct domain *dom = tag->_cookie;
> > + int rc;
> > +
> > + rc = _bus_dmamap_load_uio(tag, dmam, uio, flags);
> > + if (!rc) {
> > + dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs,
> > +    __FUNCTION__);
> > + domain_load_map(dom, dmam, flags, PTE_R|PTE_W, __FUNCTION__);
> > + dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs,
> > +    __FUNCTION__);
> > + }
> > + return (rc);
> > +}
> > +
> > +static int
> > +dmar_dmamap_load_raw(bus_dma_tag_t tag, bus_dmamap_t dmam,
> > +    bus_dma_segment_t *segs, int nsegs, bus_size_t size, int flags)
> > +{
> > + struct domain *dom = tag->_cookie;
> > + int rc;
> > +
> > + rc = _bus_dmamap_load_raw(tag, dmam, segs, nsegs, size, flags);
> > + if (!rc) {
> > + dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs,
> > +    __FUNCTION__);
> > + domain_load_map(dom, dmam, flags, PTE_R|PTE_W, __FUNCTION__);
> > + dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs,
> > +    __FUNCTION__);
> > + }
> > + return (rc);
> > +}
> > +
> > +static void
> > +dmar_dmamap_unload(bus_dma_tag_t tag, bus_dmamap_t dmam)
> > +{
> > + struct domain *dom = tag->_cookie;
> > +
> > + dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs, __FUNCTION__);
> > + domain_unload_map(dom, dmam);
> > + _bus_dmamap_unload(tag, dmam);
> > +}
> > +
> > +static void
> > +dmar_dmamap_sync(bus_dma_tag_t tag, bus_dmamap_t dmam, bus_addr_t offset,
> > +    bus_size_t len, int ops)
> > +{
> > +#if 0
> > + struct domain *dom = tag->_cookie;
> > + //int flag;
> > +
> > + flag = PTE_P;
> > + //acpidmar_intr(dom->iommu);
> > + if (ops == BUS_DMASYNC_PREREAD) {
> > + /* make readable */
> > + flag |= PTE_R;
> > + }
> > + else if (ops == BUS_DMASYNC_PREWRITE) {
> > + /* make writeable */
> > + flag |= PTE_W;
> > + }
> > + dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs, __FUNCTION__);
> > +#endif
> > + _bus_dmamap_sync(tag, dmam, offset, len, ops);
> > +}
> > +
> > +static int
> > +dmar_dmamem_alloc(bus_dma_tag_t tag, bus_size_t size, bus_size_t alignment,
> > +    bus_size_t boundary, bus_dma_segment_t *segs, int nsegs, int *rsegs,
> > +    int flags)
> > +{
> > + int rc;
> > +
> > + rc = _bus_dmamem_alloc(tag, size, alignment, boundary, segs, nsegs,
> > +    rsegs, flags);
> > + if (!rc) {
> > + dmar_dumpseg(tag, *rsegs, segs, __FUNCTION__);
> > + }
> > + return (rc);
> > +}
> > +
> > +static void
> > +dmar_dmamem_free(bus_dma_tag_t tag, bus_dma_segment_t *segs, int nsegs)
> > +{
> > + dmar_dumpseg(tag, nsegs, segs, __FUNCTION__);
> > + _bus_dmamem_free(tag, segs, nsegs);
> > +}
> > +
> > +static int
> > +dmar_dmamem_map(bus_dma_tag_t tag, bus_dma_segment_t *segs, int nsegs,
> > +    size_t size, caddr_t *kvap, int flags)
> > +{
> > + dmar_dumpseg(tag, nsegs, segs, __FUNCTION__);
> > + return (_bus_dmamem_map(tag, segs, nsegs, size, kvap, flags));
> > +}
> > +
> > +static void
> > +dmar_dmamem_unmap(bus_dma_tag_t tag, caddr_t kva, size_t size)
> > +{
> > + struct domain *dom = tag->_cookie;
> > +
> > + if (debugme(dom)) {
> > + printf("dmamap_unmap: %s\n", dom_bdf(dom));
> > + }
> > + _bus_dmamem_unmap(tag, kva, size);
> > +}
> > +
> > +static paddr_t
> > +dmar_dmamem_mmap(bus_dma_tag_t tag, bus_dma_segment_t *segs, int nsegs,
> > +    off_t off, int prot, int flags)
> > +{
> > + dmar_dumpseg(tag, nsegs, segs, __FUNCTION__);
> > + return (_bus_dmamem_mmap(tag, segs, nsegs, off, prot, flags));
> > +}
> > +
> > +/*===================================
> > + * IOMMU code
> > + *===================================*/
> > +
> > +/* Intel: Set Context Root Address */
> > +void
> > +iommu_set_rtaddr(struct iommu_softc *iommu, paddr_t paddr)
> > +{
> > + int i, sts;
> > +
> > + mtx_enter(&iommu->reg_lock);
> > + iommu_writeq(iommu, DMAR_RTADDR_REG, paddr);
> > + iommu_writel(iommu, DMAR_GCMD_REG, iommu->gcmd | GCMD_SRTP);
> > + for (i = 0; i < 5; i++) {
> > + sts = iommu_readl(iommu, DMAR_GSTS_REG);
> > + if (sts & GSTS_RTPS)
> > + break;
> > + }
> > + mtx_leave(&iommu->reg_lock);
> > +
> > + if (i == 5) {
> > + printf("set_rtaddr fails\n");
> > + }
> > +}
> > +
> > +/* COMMON: Allocate a new memory page */
> > +void *
> > +iommu_alloc_page(struct iommu_softc *iommu, paddr_t *paddr)
> > +{
> > + void *va;
> > +
> > + *paddr = 0;
> > + va = km_alloc(VTD_PAGE_SIZE, &kv_page, &kp_zero, &kd_nowait);
> > + if (va == NULL) {
> > + panic("can't allocate page\n");
> > + }
> > + pmap_extract(pmap_kernel(), (vaddr_t)va, paddr);
> > + return (va);
> > +}
> > +
> > +
> > +/* Intel: Issue command via queued invalidation */
> > +void
> > +iommu_issue_qi(struct iommu_softc *iommu, struct qi_entry *qi)
> > +{
> > +#if 0
> > + struct qi_entry *pi, *pw;
> > +
> > + idx = iommu->qi_head;
> > + pi = &iommu->qi[idx];
> > + pw = &iommu->qi[(idx+1) % MAXQ];
> > + iommu->qi_head = (idx+2) % MAXQ;
> > +
> > + memcpy(pw, &qi, sizeof(qi));
> > + issue command;
> > + while (pw->xxx)
> > + ;
> > +#endif
> > +}
> > +
> > +/* Intel: Flush TLB entries, Queued Invalidation mode */
> > +void
> > +iommu_flush_tlb_qi(struct iommu_softc *iommu, int mode, int did)
> > +{
> > + struct qi_entry qi;
> > +
> > + /* Use queued invalidation */
> > + qi.hi = 0;
> > + switch (mode) {
> > + case IOTLB_GLOBAL:
> > + qi.lo = QI_IOTLB | QI_IOTLB_IG_GLOBAL;
> > + break;
> > + case IOTLB_DOMAIN:
> > + qi.lo = QI_IOTLB | QI_IOTLB_IG_DOMAIN |
> > +    QI_IOTLB_DID(did);
> > + break;
> > + case IOTLB_PAGE:
> > + qi.lo = QI_IOTLB | QI_IOTLB_IG_PAGE | QI_IOTLB_DID(did);
> > + qi.hi = 0;
> > + break;
> > + }
> > + if (iommu->cap & CAP_DRD)
> > + qi.lo |= QI_IOTLB_DR;
> > + if (iommu->cap & CAP_DWD)
> > + qi.lo |= QI_IOTLB_DW;
> > + iommu_issue_qi(iommu, &qi);
> > +}
> > +
> > +/* Intel: Flush Context entries, Queued Invalidation mode */
> > +void
> > +iommu_flush_ctx_qi(struct iommu_softc *iommu, int mode, int did,
> > +    int sid, int fm)
> > +{
> > + struct qi_entry qi;
> > +
> > + /* Use queued invalidation */
> > + qi.hi = 0;
> > + switch (mode) {
> > + case CTX_GLOBAL:
> > + qi.lo = QI_CTX | QI_CTX_IG_GLOBAL;
> > + break;
> > + case CTX_DOMAIN:
> > + qi.lo = QI_CTX | QI_CTX_IG_DOMAIN | QI_CTX_DID(did);
> > + break;
> > + case CTX_DEVICE:
> > + qi.lo = QI_CTX | QI_CTX_IG_DEVICE | QI_CTX_DID(did) |
> > +    QI_CTX_SID(sid) | QI_CTX_FM(fm);
> > + break;
> > + }
> > + iommu_issue_qi(iommu, &qi);
> > +}
> > +
> > +/* Intel: Flush write buffers */
> > +void
> > +iommu_flush_write_buffer(struct iommu_softc *iommu)
> > +{
> > + int i, sts;
> > +
> > + if (iommu->dte)
> > + return;
> > + if (!(iommu->cap & CAP_RWBF))
> > + return;
> > + printf("writebuf\n");
> > + iommu_writel(iommu, DMAR_GCMD_REG, iommu->gcmd | GCMD_WBF);
> > + for (i = 0; i < 5; i++) {
> > + sts = iommu_readl(iommu, DMAR_GSTS_REG);
> > + if (sts & GSTS_WBFS)
> > + break;
> > + delay(10000);
> > + }
> > + if (i == 5) {
> > + printf("write buffer flush fails\n");
> > + }
> > +}
> > +
> > +void
> > +iommu_flush_cache(struct iommu_softc *iommu, void *addr, size_t size)
> > +{
> > + if (iommu->dte) {
> > + pmap_flush_cache((vaddr_t)addr, size);
> > + return;
> > + }
> > + if (!(iommu->ecap & ECAP_C))
> > + pmap_flush_cache((vaddr_t)addr, size);
> > +}
> > +
> > +/*
> > + * Intel: Flush IOMMU TLB Entries
> > + * Flushing can occur globally, per domain or per page
> > + */
> > +void
> > +iommu_flush_tlb(struct iommu_softc *iommu, int mode, int did)
> > +{
> > + int n;
> > + uint64_t val;
> > +
> > + /* Call AMD */
> > + if (iommu->dte) {
> > + ivhd_invalidate_domain(iommu, did);
> > + //ivhd_poll_events(iommu);
> > + return;
> > + }
> > + val = IOTLB_IVT;
> > + switch (mode) {
> > + case IOTLB_GLOBAL:
> > + val |= IIG_GLOBAL;
> > + break;
> > + case IOTLB_DOMAIN:
> > + val |= IIG_DOMAIN | IOTLB_DID(did);
> > + break;
> > + case IOTLB_PAGE:
> > + val |= IIG_PAGE | IOTLB_DID(did);
> > + break;
> > + }
> > +
> > + /* Check for Read/Write Drain */
> > + if (iommu->cap & CAP_DRD)
> > + val |= IOTLB_DR;
> > + if (iommu->cap & CAP_DWD)
> > + val |= IOTLB_DW;
> > +
> > + mtx_enter(&iommu->reg_lock);
> > +
> > + iommu_writeq(iommu, DMAR_IOTLB_REG(iommu), val);
> > + n = 0;
> > + do {
> > + val = iommu_readq(iommu, DMAR_IOTLB_REG(iommu));
> > + } while (n++ < 5 && val & IOTLB_IVT);
> > +
> > + mtx_leave(&iommu->reg_lock);
> > +
> > +#ifdef DEBUG
> > + {
> > + static int rg;
> > + int a, r;
> > +
> > + if (!rg) {
> > + a = (val >> IOTLB_IAIG_SHIFT) & IOTLB_IAIG_MASK;
> > + r = (val >> IOTLB_IIRG_SHIFT) & IOTLB_IIRG_MASK;
> > + if (a != r) {
> > + printf("TLB Requested:%d Actual:%d\n", r, a);
> > + rg = 1;
> > + }
> > + }
> > + }
> > +#endif
> > +}
> > +
> > +/* Intel: Flush IOMMU settings
> > + * Flushes can occur globally, per domain, or per device
> > + */
> > +void
> > +iommu_flush_ctx(struct iommu_softc *iommu, int mode, int did, int sid, int fm)
> > +{
> > + uint64_t val;
> > + int n;
> > +
> > + if (iommu->dte)
> > + return;
> > + val = CCMD_ICC;
> > + switch (mode) {
> > + case CTX_GLOBAL:
> > + val |= CIG_GLOBAL;
> > + break;
> > + case CTX_DOMAIN:
> > + val |= CIG_DOMAIN | CCMD_DID(did);
> > + break;
> > + case CTX_DEVICE:
> > + val |= CIG_DEVICE | CCMD_DID(did) |
> > +    CCMD_SID(sid) | CCMD_FM(fm);
> > + break;
> > + }
> > +
> > + mtx_enter(&iommu->reg_lock);
> > +
> > + n = 0;
> > + iommu_writeq(iommu, DMAR_CCMD_REG, val);
> > + do {
> > + val = iommu_readq(iommu, DMAR_CCMD_REG);
> > + } while (n++ < 5 && val & CCMD_ICC);
> > +
> > + mtx_leave(&iommu->reg_lock);
> > +
> > +#ifdef DEBUG
> > + {
> > + static int rg;
> > + int a, r;
> > +
> > + if (!rg) {
> > + a = (val >> CCMD_CAIG_SHIFT) & CCMD_CAIG_MASK;
> > + r = (val >> CCMD_CIRG_SHIFT) & CCMD_CIRG_MASK;
> > + if (a != r) {
> > + printf("CTX Requested:%d Actual:%d\n", r, a);
> > + rg = 1;
> > + }
> > + }
> > + }
> > +#endif
> > +}
> > +
> > +/* Intel: Enable Queued Invalidation */
> > +void
> > +iommu_enable_qi(struct iommu_softc *iommu, int enable)
> > +{
> > + int n = 0;
> > + int sts;
> > +
> > + if (!(iommu->ecap & ECAP_QI))
> > + return;
> > +
> > + if (enable) {
> > + iommu->gcmd |= GCMD_QIE;
> > +
> > + mtx_enter(&iommu->reg_lock);
> > +
> > + iommu_writel(iommu, DMAR_GCMD_REG, iommu->gcmd);
> > + do {
> > + sts = iommu_readl(iommu, DMAR_GSTS_REG);
> > + } while (n++ < 5 && !(sts & GSTS_QIES));
> > +
> > + mtx_leave(&iommu->reg_lock);
> > +
> > + printf("set.qie: %d\n", n);
> > + } else {
> > + iommu->gcmd &= ~GCMD_QIE;
> > +
> > + mtx_enter(&iommu->reg_lock);
> > +
> > + iommu_writel(iommu, DMAR_GCMD_REG, iommu->gcmd);
> > + do {
> > + sts = iommu_readl(iommu, DMAR_GSTS_REG);
> > + } while (n++ < 5 && sts & GSTS_QIES);
> > +
> > + mtx_leave(&iommu->reg_lock);
> > +
> > + printf("clr.qie: %d\n", n);
> > + }
> > +}
> > +
> > +/* Intel: Enable IOMMU translation */
> > +int
> > +iommu_enable_translation(struct iommu_softc *iommu, int enable)
> > +{
> > + uint32_t sts;
> > + uint64_t reg;
> > + int n = 0;
> > +
> > + if (iommu->dte)
> > + return (0);
> > + reg = 0;
> > + if (enable) {
> > + printf("enable iommu %d\n", iommu->id);
> > + iommu_showcfg(iommu, -1);
> > +
> > + iommu->gcmd |= GCMD_TE;
> > +
> > + /* Enable translation */
> > + printf(" pre tes: ");
> > +
> > + mtx_enter(&iommu->reg_lock);
> > + iommu_writel(iommu, DMAR_GCMD_REG, iommu->gcmd);
> > + printf("xxx");
> > + do {
> > + printf("yyy");
> > + sts = iommu_readl(iommu, DMAR_GSTS_REG);
> > + delay(n * 10000);
> > + } while (n++ < 5 && !(sts & GSTS_TES));
> > + mtx_leave(&iommu->reg_lock);
> > +
> > + printf(" set.tes: %d\n", n);
> > +
> > + if (n >= 5) {
> > + printf("error.. unable to initialize iommu %d\n",
> > +    iommu->id);
> > + iommu->flags |= IOMMU_FLAGS_BAD;
> > +
> > + /* Disable IOMMU */
> > + iommu->gcmd &= ~GCMD_TE;
> > + mtx_enter(&iommu->reg_lock);
> > + iommu_writel(iommu, DMAR_GCMD_REG, iommu->gcmd);
> > + mtx_leave(&iommu->reg_lock);
> > +
> > + return (1);
> > + }
> > +
> > + iommu_flush_ctx(iommu, CTX_GLOBAL, 0, 0, 0);
> > + iommu_flush_tlb(iommu, IOTLB_GLOBAL, 0);
> > + } else {
> > + iommu->gcmd &= ~GCMD_TE;
> > +
> > + mtx_enter(&iommu->reg_lock);
> > +
> > + iommu_writel(iommu, DMAR_GCMD_REG, iommu->gcmd);
> > + do {
> > + sts = iommu_readl(iommu, DMAR_GSTS_REG);
> > + } while (n++ < 5 && sts & GSTS_TES);
> > + mtx_leave(&iommu->reg_lock);
> > +
> > + printf(" clr.tes: %d\n", n);
> > + }
> > +
> > + return (0);
> > +}
> > +
> > +/* Intel: Initialize IOMMU */
> > +int
> > +iommu_init(struct acpidmar_softc *sc, struct iommu_softc *iommu,
> > +    struct acpidmar_drhd *dh)
> > +{
> > + static int niommu;
> > + int len = VTD_PAGE_SIZE;
> > + int i, gaw;
> > + uint32_t sts;
> > + paddr_t paddr;
> > +
> > + if (_bus_space_map(sc->sc_memt, dh->address, len, 0, &iommu->ioh) != 0) {
> > + return (-1);
> > + }
> > +
> > + TAILQ_INIT(&iommu->domains);
> > + iommu->id = ++niommu;
> > + iommu->flags = dh->flags;
> > + iommu->segment = dh->segment;
> > + iommu->iot = sc->sc_memt;
> > +
> > + iommu->cap = iommu_readq(iommu, DMAR_CAP_REG);
> > + iommu->ecap = iommu_readq(iommu, DMAR_ECAP_REG);
> > + iommu->ndoms = cap_nd(iommu->cap);
> > +
> > + printf("  caps: %s%s%s%s%s%s%s%s%s%s%s\n",
> > +    iommu->cap & CAP_AFL ? "afl " : "", // adv fault
> > +    iommu->cap & CAP_RWBF ? "rwbf " : "", // write-buffer flush
> > +    iommu->cap & CAP_PLMR ? "plmr " : "", // protected lo region
> > +    iommu->cap & CAP_PHMR ? "phmr " : "", // protected hi region
> > +    iommu->cap & CAP_CM ? "cm " : "", // caching mode
> > +    iommu->cap & CAP_ZLR ? "zlr " : "", // zero-length read
> > +    iommu->cap & CAP_PSI ? "psi " : "", // page invalidate
> > +    iommu->cap & CAP_DWD ? "dwd " : "", // write drain
> > +    iommu->cap & CAP_DRD ? "drd " : "", // read drain
> > +    iommu->cap & CAP_FL1GP ? "Gb " : "", // 1Gb pages
> > +    iommu->cap & CAP_PI ? "pi " : ""); // posted interrupts
> > + printf("  ecap: %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
> > +    iommu->ecap & ECAP_C ? "c " : "", // coherent
> > +    iommu->ecap & ECAP_QI ? "qi " : "", // queued invalidate
> > +    iommu->ecap & ECAP_DT ? "dt " : "", // device iotlb
> > +    iommu->ecap & ECAP_IR ? "ir " : "", // intr remap
> > +    iommu->ecap & ECAP_EIM ? "eim " : "", // x2apic
> > +    iommu->ecap & ECAP_PT ? "pt " : "", // passthrough
> > +    iommu->ecap & ECAP_SC ? "sc " : "", // snoop control
> > +    iommu->ecap & ECAP_ECS ? "ecs " : "", // extended context
> > +    iommu->ecap & ECAP_MTS ? "mts " : "", // memory type
> > +    iommu->ecap & ECAP_NEST ? "nest " : "", // nested translations
> > +    iommu->ecap & ECAP_DIS ? "dis " : "", // deferred invalidation
> > +    iommu->ecap & ECAP_PASID ? "pas " : "", // pasid
> > +    iommu->ecap & ECAP_PRS ? "prs " : "", // page request
> > +    iommu->ecap & ECAP_ERS ? "ers " : "", // execute request
> > +    iommu->ecap & ECAP_SRS ? "srs " : "", // supervisor request
> > +    iommu->ecap & ECAP_NWFS ? "nwfs " : "", // no write flag
> > +    iommu->ecap & ECAP_EAFS ? "eafs " : ""); // extended accessed flag
> > +
> > + mtx_init(&iommu->reg_lock, IPL_HIGH);
> > +
> > + /* Clear Interrupt Masking */
> > + iommu_writel(iommu, DMAR_FSTS_REG, FSTS_PFO | FSTS_PPF);
> > +
> > + iommu->intr = acpidmar_intr_establish(iommu, IPL_HIGH,
> > +    acpidmar_intr, iommu, "dmarintr");
> > +
> > + /* Enable interrupts */
> > + sts = iommu_readl(iommu, DMAR_FECTL_REG);
> > + iommu_writel(iommu, DMAR_FECTL_REG, sts & ~FECTL_IM);
> > +
> > + /* Allocate root pointer */
> > + iommu->root = iommu_alloc_page(iommu, &paddr);
> > +#ifdef DEBUG
> > + printf("Allocated root pointer: pa:%.16llx va:%p\n",
> > +    (uint64_t)paddr, iommu->root);
> > +#endif
> > + iommu->rtaddr = paddr;
> > + iommu_flush_write_buffer(iommu);
> > + iommu_set_rtaddr(iommu, paddr);
> > +
> > +#if 0
> > + if (iommu->ecap & ECAP_QI) {
> > + /* Queued Invalidation support */
> > + iommu->qi = iommu_alloc_page(iommu, &iommu->qip);
> > + iommu_writeq(iommu, DMAR_IQT_REG, 0);
> > + iommu_writeq(iommu, DMAR_IQA_REG, iommu->qip | IQA_QS_256);
> > + }
> > + if (iommu->ecap & ECAP_IR) {
> > + /* Interrupt remapping support */
> > + iommu_writeq(iommu, DMAR_IRTA_REG, 0);
> > + }
> > +#endif
> > +
> > + /* Calculate guest address width and supported guest widths */
> > + gaw = -1;
> > + iommu->mgaw = cap_mgaw(iommu->cap);
> > + printf("gaw: %d { ", iommu->mgaw);
> > + for (i = 0; i < 5; i++) {
> > + if (cap_sagaw(iommu->cap) & (1L << i)) {
> > + gaw = VTD_LEVELTOAW(i);
> > + printf("%d ", gaw);
> > + iommu->agaw = gaw;
> > + }
> > + }
> > + printf("}\n");
> > +
> > + /* Cache current status register bits */
> > + sts = iommu_readl(iommu, DMAR_GSTS_REG);
> > + if (sts & GSTS_TES)
> > + iommu->gcmd |= GCMD_TE;
> > + if (sts & GSTS_QIES)
> > + iommu->gcmd |= GCMD_QIE;
> > + if (sts & GSTS_IRES)
> > + iommu->gcmd |= GCMD_IRE;
> > + if (iommu->gcmd) {
> > + printf("gcmd: %x preset\n", iommu->gcmd);
> > + }
> > + acpidmar_intr(iommu);
> > + return (0);
> > +}
> > +
> > +const char *dmar_rn(int reg);
> > +
> > +const char *
> > +dmar_rn(int reg)
> > +{
> > + switch (reg) {
> > + case EVT_HEAD_REG: return "evthead";
> > + case EVT_TAIL_REG: return "evttail";
> > + case CMD_HEAD_REG: return "cmdhead";
> > + case CMD_TAIL_REG: return "cmdtail";
> > + case CMD_BASE_REG: return "cmdbase";
> > + case EVT_BASE_REG: return "evtbase";
> > + case DEV_TAB_BASE_REG: return "devtblbase";
> > + case IOMMUCTL_REG: return "iommuctl";
> > +#if 0
> > + case DMAR_VER_REG: return "ver";
> > + case DMAR_CAP_REG: return "cap";
> > + case DMAR_ECAP_REG: return "ecap";
> > + case DMAR_GSTS_REG: return "gsts";
> > + case DMAR_GCMD_REG: return "gcmd";
> > + case DMAR_FSTS_REG: return "fsts";
> > + case DMAR_FECTL_REG: return "fectl";
> > + case DMAR_RTADDR_REG: return "rtaddr";
> > + case DMAR_FEDATA_REG: return "fedata";
> > + case DMAR_FEADDR_REG: return "feaddr";
> > + case DMAR_FEUADDR_REG: return "feuaddr";
> > + case DMAR_PMEN_REG: return "pmen";
> > + case DMAR_IEDATA_REG: return "iedata";
> > + case DMAR_IEADDR_REG: return "ieaddr";
> > + case DMAR_IEUADDR_REG: return "ieuaddr";
> > + case DMAR_IRTA_REG: return "irta";
> > + case DMAR_CCMD_REG: return "ccmd";
> > + case DMAR_IQH_REG: return "iqh";
> > + case DMAR_IQT_REG: return "iqt";
> > + case DMAR_IQA_REG: return "iqa";
> > +#endif
> > + }
> > + return "unknown";
> > +}
> > +
> > +/* Read/Write IOMMU register */
> > +uint32_t
> > +iommu_readl(struct iommu_softc *iommu, int reg)
> > +{
> > + uint32_t v;
> > +
> > + v = bus_space_read_4(iommu->iot, iommu->ioh, reg);
> > + if (reg < 00) {
> > + printf("iommu%d: read %x %.8lx [%s]\n",
> > +    iommu->id, reg, (unsigned long)v, dmar_rn(reg));
> > + }
> > +
> > + return (v);
> > +}
> > +
> > +
> > +#define dbprintf(x...)
> > +
> > +void
> > +iommu_writel(struct iommu_softc *iommu, int reg, uint32_t v)
> > +{
> > + dbprintf("iommu%d: write %.8x %.16lx [%s]\n",
> > +    iommu->id, reg, (unsigned long)v, dmar_rn(reg));
> > + bus_space_write_4(iommu->iot, iommu->ioh, reg, (uint32_t)v);
> > +}
> > +
> > +uint64_t
> > +iommu_readq(struct iommu_softc *iommu, int reg)
> > +{
> > + uint64_t v;
> > +
> > + v = bus_space_read_8(iommu->iot, iommu->ioh, reg);
> > + if (reg < 00) {
> > + printf("iommu%d: read %x %.8lx [%s]\n",
> > +    iommu->id, reg, (unsigned long)v, dmar_rn(reg));
> > + }
> > +
> > + return (v);
> > +}
> > +
> > +void
> > +iommu_writeq(struct iommu_softc *iommu, int reg, uint64_t v)
> > +{
> > + dbprintf("iommu%d: write %.8x %.16lx [%s]\n",
> > +    iommu->id, reg, (unsigned long)v, dmar_rn(reg));
> > + bus_space_write_8(iommu->iot, iommu->ioh, reg, v);
> > +}
> > +
> > +/* Check if a device is within a device scope */
> > +int
> > +acpidmar_match_devscope(struct devlist_head *devlist, pci_chipset_tag_t pc,
> > +    int sid)
> > +{
> > + struct dmar_devlist *ds;
> > + int sub, sec, i;
> > + int bus, dev, fun, sbus;
> > + pcireg_t reg;
> > + pcitag_t tag;
> > +
> > + sbus = sid_bus(sid);
> > + TAILQ_FOREACH(ds, devlist, link) {
> > + bus = ds->bus;
> > + dev = ds->dp[0].device;
> > + fun = ds->dp[0].function;
> > + /* Walk PCI bridges in path */
> > + for (i = 1; i < ds->ndp; i++) {
> > + tag = pci_make_tag(pc, bus, dev, fun);
> > + reg = pci_conf_read(pc, tag, PPB_REG_BUSINFO);
> > + bus = PPB_BUSINFO_SECONDARY(reg);
> > + dev = ds->dp[i].device;
> > + fun = ds->dp[i].function;
> > + }
> > +
> > + /* Check for device exact match */
> > + if (sid == mksid(bus, dev, fun)) {
> > + return DMAR_ENDPOINT;
> > + }
> > +
> > + /* Check for device subtree match */
> > + if (ds->type == DMAR_BRIDGE) {
> > + tag = pci_make_tag(pc, bus, dev, fun);
> > + reg = pci_conf_read(pc, tag, PPB_REG_BUSINFO);
> > + sec = PPB_BUSINFO_SECONDARY(reg);
> > + sub = PPB_BUSINFO_SUBORDINATE(reg);
> > + if (sec <= sbus && sbus <= sub) {
> > + return DMAR_BRIDGE;
> > + }
> > + }
> > + }
> > +
> > + return (0);
> > +}
> > +
> > +struct domain *
> > +domain_create(struct iommu_softc *iommu, int did)
> > +{
> > + struct domain *dom;
> > + int gaw;
> > +
> > + printf("iommu%d: create domain: %.4x\n", iommu->id, did);
> > + dom = malloc(sizeof(*dom), M_DEVBUF, M_ZERO | M_WAITOK);
> > + dom->did = did;
> > + dom->iommu = iommu;
> > + dom->pte = iommu_alloc_page(iommu, &dom->ptep);
> > + TAILQ_INIT(&dom->devices);
> > +
> > + /* Setup DMA */
> > + dom->dmat._cookie = dom;
> > + dom->dmat._dmamap_create    = dmar_dmamap_create; // nop
> > + dom->dmat._dmamap_destroy   = dmar_dmamap_destroy; // nop
> > + dom->dmat._dmamap_load      = dmar_dmamap_load; // lm
> > + dom->dmat._dmamap_load_mbuf = dmar_dmamap_load_mbuf; // lm
> > + dom->dmat._dmamap_load_uio  = dmar_dmamap_load_uio; // lm
> > + dom->dmat._dmamap_load_raw  = dmar_dmamap_load_raw; // lm
> > + dom->dmat._dmamap_unload    = dmar_dmamap_unload; // um
> > + dom->dmat._dmamap_sync      = dmar_dmamap_sync; // lm
> > + dom->dmat._dmamem_alloc     = dmar_dmamem_alloc; // nop
> > + dom->dmat._dmamem_free      = dmar_dmamem_free; // nop
> > + dom->dmat._dmamem_map       = dmar_dmamem_map; // nop
> > + dom->dmat._dmamem_unmap     = dmar_dmamem_unmap; // nop
> > + dom->dmat._dmamem_mmap      = dmar_dmamem_mmap;
> > +
> > + snprintf(dom->exname, sizeof(dom->exname), "did:%x.%.4x",
> > +    iommu->id, dom->did);
> > +
> > + /* Setup IOMMU address map */
> > + gaw = min(iommu->agaw, iommu->mgaw);
> > + dom->iovamap = extent_create(dom->exname, 1024*1024*16,
> > +    (1LL << gaw)-1,
> > +    M_DEVBUF, NULL, 0,
> > +    EX_WAITOK|EX_NOCOALESCE);
> > +
> > + /* Zero out Interrupt region */
> > + extent_alloc_region(dom->iovamap, 0xFEE00000L, 0x100000,
> > +    EX_WAITOK);
> > + mtx_init(&dom->exlck, IPL_HIGH);
> > +
> > + TAILQ_INSERT_TAIL(&iommu->domains, dom, link);
> > +
> > + return dom;
> > +}
> > +
> > +void domain_add_device(struct domain *dom, int sid)
> > +{
> > + struct domain_dev *ddev;
> > +
> > + printf("add %s to iommu%d.%.4x\n", dmar_bdf(sid), dom->iommu->id, dom->did);
> > + ddev = malloc(sizeof(*ddev), M_DEVBUF, M_ZERO | M_WAITOK);
> > + ddev->sid = sid;
> > + TAILQ_INSERT_TAIL(&dom->devices, ddev, link);
> > +
> > + /* Should set context entry here?? */
> > +}
> > +
> > +void domain_remove_device(struct domain *dom, int sid)
> > +{
> > + struct domain_dev *ddev, *tmp;
> > +
> > + TAILQ_FOREACH_SAFE(ddev, &dom->devices, link, tmp) {
> > + if (ddev->sid == sid) {
> > + TAILQ_REMOVE(&dom->devices, ddev, link);
> > + free(ddev, sizeof(*ddev), M_DEVBUF);
> > + }
> > + }
> > +}
> > +
> > +/* Lookup domain by segment & source id (bus.device.function) */
> > +struct domain *
> > +domain_lookup(struct acpidmar_softc *sc, int segment, int sid)
> > +{
> > + struct iommu_softc *iommu;
> > + struct domain_dev *ddev;
> > + struct domain *dom;
> > + int rc;
> > +
> > + if (sc == NULL) {
> > + return NULL;
> > + }
> > +
> > + /* Lookup IOMMU for this device */
> > + TAILQ_FOREACH(iommu, &sc->sc_drhds, link) {
> > + if (iommu->segment != segment)
> > + continue;
> > + /* Check for devscope match or catchall iommu */
> > + rc = acpidmar_match_devscope(&iommu->devices, sc->sc_pc, sid);
> > + if (rc != 0 || iommu->flags) {
> > + break;
> > + }
> > + }
> > + if (!iommu) {
> > + printf("%s: no iommu found\n", dmar_bdf(sid));
> > + return NULL;
> > + }
> > +
> > + //acpidmar_intr(iommu);
> > +
> > + /* Search domain devices */
> > + TAILQ_FOREACH(dom, &iommu->domains, link) {
> > + TAILQ_FOREACH(ddev, &dom->devices, link) {
> > + /* XXX: match all functions? */
> > + if (ddev->sid == sid) {
> > + return dom;
> > + }
> > + }
> > + }
> > + if (iommu->ndoms <= 2) {
> > + /* Running out of domains.. create catchall domain */
> > + if (!iommu->unity) {
> > + iommu->unity = domain_create(iommu, 1);
> > + }
> > + dom = iommu->unity;
> > + } else {
> > + dom = domain_create(iommu, --iommu->ndoms);
> > + }
> > + if (!dom) {
> > + printf("no domain here\n");
> > + return NULL;
> > + }
> > +
> > + /* Add device to domain */
> > + domain_add_device(dom, sid);
> > +
> > + return dom;
> > +}
> > +
> > +/* Map Guest Pages into IOMMU */
> > +void  _iommu_map(void *dom, vaddr_t va, bus_addr_t gpa, bus_size_t len)
> > +{
> > + bus_size_t i;
> > + paddr_t hpa;
> > +
> > + if (dom == NULL) {
> > + return;
> > + }
> > + printf("Mapping dma: %lx = %lx/%lx\n", va, gpa, len);
> > + for (i = 0; i < len; i += PAGE_SIZE) {
> > + hpa = 0;
> > + pmap_extract(curproc->p_vmspace->vm_map.pmap, va, &hpa);
> > + domain_map_page(dom, gpa, hpa, PTE_P | PTE_R | PTE_W);
> > + gpa += PAGE_SIZE;
> > + va  += PAGE_SIZE;
> > + }
> > +}
> > +
> > +/* Find IOMMU for a given PCI device */
> > +void *_iommu_domain(int segment, int bus, int dev, int func, int *id)
> > +{
> > + struct domain *dom;
> > +
> > + dom = domain_lookup(acpidmar_sc, segment, mksid(bus, dev, func));
> > + if (dom) {
> > + *id = dom->did;
> > + }
> > + return dom;
> > +}
> > +
> > +void domain_map_device(struct domain *dom, int sid);
> > +
> > +void
> > +domain_map_device(struct domain *dom, int sid)
> > +{
> > + struct iommu_softc *iommu;
> > + struct context_entry *ctx;
> > + paddr_t paddr;
> > + int bus, devfn;
> > + int tt, lvl;
> > +
> > + iommu = dom->iommu;
> > +
> > + bus = sid_bus(sid);
> > + devfn = sid_devfn(sid);
> > + /* AMD attach device */
> > + if (iommu->dte) {
> > + struct ivhd_dte *dte = &iommu->dte[sid];
> > + if (!dte->dw0) {
> > + /* Setup Device Table Entry: bus.devfn */
> > + printf("@@@ PCI Attach: %.4x[%s] %.4x\n", sid, dmar_bdf(sid), dom->did);
> > + dte_set_host_page_table_root_ptr(dte, dom->ptep);
> > + dte_set_domain(dte, dom->did);
> > + dte_set_mode(dte, 3);  // Set 4 level PTE
> > + dte_set_tv(dte);
> > + dte_set_valid(dte);
> > + ivhd_flush_devtab(iommu, dom->did);
> > + //ivhd_showit(iommu);
> > + ivhd_showdte();
> > + }
> > + //ivhd_poll_events(iommu);
> > + return;
> > + }
> > +
> > + /* Create Bus mapping */
> > + if (!root_entry_is_valid(&iommu->root[bus])) {
> > + iommu->ctx[bus] = iommu_alloc_page(iommu, &paddr);
> > + iommu->root[bus].lo = paddr | ROOT_P;
> > + iommu_flush_cache(iommu, &iommu->root[bus],
> > +    sizeof(struct root_entry));
> > + dprintf("iommu%d: Allocate context for bus: %.2x pa:%.16llx va:%p\n",
> > +    iommu->id, bus, (uint64_t)paddr,
> > +    iommu->ctx[bus]);
> > + }
> > +
> > + /* Create DevFn mapping */
> > + ctx = iommu->ctx[bus] + devfn;
> > + if (!context_entry_is_valid(ctx)) {
> > + tt = CTX_T_MULTI;
> > + lvl = VTD_AWTOLEVEL(iommu->agaw);
> > +
> > + /* Initialize context */
> > + context_set_slpte(ctx, dom->ptep);
> > + context_set_translation_type(ctx, tt);
> > + context_set_domain_id(ctx, dom->did);
> > + context_set_address_width(ctx, lvl);
> > + context_set_present(ctx);
> > +
> > + /* Flush it */
> > + iommu_flush_cache(iommu, ctx, sizeof(struct context_entry));
> > + if ((iommu->cap & CAP_CM) || force_cm) {
> > + iommu_flush_ctx(iommu, CTX_DEVICE, dom->did, sid, 0);
> > + iommu_flush_tlb(iommu, IOTLB_GLOBAL, 0);
> > + } else {
> > + iommu_flush_write_buffer(iommu);
> > + }
> > + dprintf("iommu%d: %s set context ptep:%.16llx lvl:%d did:%.4x tt:%d\n",
> > +    iommu->id, dmar_bdf(sid), (uint64_t)dom->ptep, lvl,
> > +    dom->did, tt);
> > + }
> > +}
> > +
> > +struct domain *
> > +acpidmar_pci_attach(struct acpidmar_softc *sc, int segment, int sid, int mapctx)
> > +{
> > + static struct domain *dom;
> > +
> > + dom = domain_lookup(sc, segment, sid);
> > + if (!dom) {
> > + printf("no domain: %s\n", dmar_bdf(sid));
> > + return NULL;
> > + }
> > +
> > + if (mapctx) {
> > + domain_map_device(dom, sid);
> > + }
> > +
> > + return dom;
> > +}
> > +
> > +int ismap(int bus, int dev, int fun) {
> > + return 1;
> > +}
> > +
> > +void
> > +acpidmar_pci_hook(pci_chipset_tag_t pc, struct pci_attach_args *pa)
> > +{
> > + int bus, dev, fun, sid;
> > + struct domain *dom;
> > + pcireg_t reg;
> > +
> > + if (!acpidmar_sc) {
> > + /* No DMAR, ignore */
> > + return;
> > + }
> > +
> > + /* Add device to our list */
> > + pci_decompose_tag(pc, pa->pa_tag, &bus, &dev, &fun);
> > + sid = mksid(bus, dev, fun);
> > + if (sid_flag[sid] & SID_INVALID)
> > + return;
> > +
> > + reg = pci_conf_read(pc, pa->pa_tag, PCI_CLASS_REG);
> > +#if 0
> > + if (PCI_CLASS(reg) == PCI_CLASS_DISPLAY &&
> > +    PCI_SUBCLASS(reg) == PCI_SUBCLASS_DISPLAY_VGA) {
> > + printf("dmar: %.4x:%.2x:%.2x.%x is VGA, ignoring\n",
> > +    pa->pa_domain, bus, dev, fun);
> > + return;
> > + }
> > +#endif
> > + /* Add device to domain */
> > + dom = acpidmar_pci_attach(acpidmar_sc, pa->pa_domain, sid, 0);
> > + if (dom == NULL)
> > + return;
> > +
> > + if (PCI_CLASS(reg) == PCI_CLASS_DISPLAY &&
> > +    PCI_SUBCLASS(reg) == PCI_SUBCLASS_DISPLAY_VGA) {
> > + dom->flag = DOM_NOMAP;
> > + }
> > + if (PCI_CLASS(reg) == PCI_CLASS_BRIDGE &&
> > +    PCI_SUBCLASS(reg) == PCI_SUBCLASS_BRIDGE_ISA) {
> > + /* For ISA Bridges, map 0-16Mb as 1:1 */
> > + printf("dmar: %.4x:%.2x:%.2x.%x mapping ISA\n",
> > +    pa->pa_domain, bus, dev, fun);
> > + domain_map_pthru(dom, 0x00, 16*1024*1024);
> > + }
> > +
> > + /* Change DMA tag */
> > + pa->pa_dmat = &dom->dmat;
> > +}
> > +
> > +/* Create list of device scope entries from ACPI table */
> > +void
> > +acpidmar_parse_devscope(union acpidmar_entry *de, int off, int segment,
> > +    struct devlist_head *devlist)
> > +{
> > + struct acpidmar_devscope *ds;
> > + struct dmar_devlist *d;
> > + int dplen, i;
> > +
> > + TAILQ_INIT(devlist);
> > + while (off < de->length) {
> > + ds = (struct acpidmar_devscope *)((unsigned char *)de + off);
> > + off += ds->length;
> > +
> > + /* We only care about bridges and endpoints */
> > + if (ds->type != DMAR_ENDPOINT && ds->type != DMAR_BRIDGE)
> > + continue;
> > +
> > + dplen = ds->length - sizeof(*ds);
> > + d = malloc(sizeof(*d) + dplen, M_DEVBUF, M_ZERO | M_WAITOK);
> > + d->bus  = ds->bus;
> > + d->type = ds->type;
> > + d->ndp  = dplen / 2;
> > + d->dp   = (void *)&d[1];
> > + memcpy(d->dp, &ds[1], dplen);
> > + TAILQ_INSERT_TAIL(devlist, d, link);
> > +
> > + printf("  %8s  %.4x:%.2x.%.2x.%x {",
> > +    ds->type == DMAR_BRIDGE ? "bridge" : "endpoint",
> > +    segment, ds->bus,
> > +    d->dp[0].device,
> > +    d->dp[0].function);
> > +
> > + for (i = 1; i < d->ndp; i++) {
> > + printf(" %2x.%x ",
> > +    d->dp[i].device,
> > +    d->dp[i].function);
> > + }
> > + printf("}\n");
> > + }
> > +}
> > +
> > +/* DMA Remapping Hardware Unit */
> > +void
> > +acpidmar_drhd(struct acpidmar_softc *sc, union acpidmar_entry *de)
> > +{
> > + struct iommu_softc *iommu;
> > +
> > + printf("DRHD: segment:%.4x base:%.16llx flags:%.2x\n",
> > +    de->drhd.segment,
> > +    de->drhd.address,
> > +    de->drhd.flags);
> > + iommu = malloc(sizeof(*iommu), M_DEVBUF, M_ZERO | M_WAITOK);
> > + acpidmar_parse_devscope(de, sizeof(de->drhd), de->drhd.segment,
> > +    &iommu->devices);
> > + iommu_init(sc, iommu, &de->drhd);
> > +
> > + if (de->drhd.flags) {
> > + /* Catchall IOMMU goes at end of list */
> > + TAILQ_INSERT_TAIL(&sc->sc_drhds, iommu, link);
> > + } else {
> > + TAILQ_INSERT_HEAD(&sc->sc_drhds, iommu, link);
> > + }
> > +}
> > +
> > +/* Reserved Memory Region Reporting */
> > +void
> > +acpidmar_rmrr(struct acpidmar_softc *sc, union acpidmar_entry *de)
> > +{
> > + struct rmrr_softc *rmrr;
> > + bios_memmap_t *im, *jm;
> > + uint64_t start, end;
> > +
> > + printf("RMRR: segment:%.4x range:%.16llx-%.16llx\n",
> > +    de->rmrr.segment, de->rmrr.base, de->rmrr.limit);
> > + if (de->rmrr.limit <= de->rmrr.base) {
> > + printf("  buggy BIOS\n");
> > + return;
> > + }
> > +
> > + rmrr = malloc(sizeof(*rmrr), M_DEVBUF, M_ZERO | M_WAITOK);
> > + rmrr->start = trunc_page(de->rmrr.base);
> > + rmrr->end = round_page(de->rmrr.limit);
> > + rmrr->segment = de->rmrr.segment;
> > + acpidmar_parse_devscope(de, sizeof(de->rmrr), de->rmrr.segment,
> > +    &rmrr->devices);
> > +
> > + for (im = bios_memmap; im->type != BIOS_MAP_END; im++) {
> > + if (im->type != BIOS_MAP_RES)
> > + continue;
> > + /* Search for adjacent reserved regions */
> > + start = im->addr;
> > + end   = im->addr+im->size;
> > + for (jm = im+1; jm->type == BIOS_MAP_RES && end == jm->addr;
> > +    jm++) {
> > + end = jm->addr+jm->size;
> > + }
> > + printf("e820: %.16llx - %.16llx\n", start, end);
> > + if (start <= rmrr->start && rmrr->end <= end) {
> > + /* Bah.. some buggy BIOS stomp outside RMRR */
> > + printf("  ** inside E820 Reserved %.16llx %.16llx\n",
> > +    start, end);
> > + rmrr->start = trunc_page(start);
> > + rmrr->end   = round_page(end);
> > + break;
> > + }
> > + }
> > + TAILQ_INSERT_TAIL(&sc->sc_rmrrs, rmrr, link);
> > +}
> > +
> > +/* Root Port ATS Reporting */
> > +void
> > +acpidmar_atsr(struct acpidmar_softc *sc, union acpidmar_entry *de)
> > +{
> > + struct atsr_softc *atsr;
> > +
> > + printf("ATSR: segment:%.4x flags:%x\n",
> > +    de->atsr.segment,
> > +    de->atsr.flags);
> > +
> > + atsr = malloc(sizeof(*atsr), M_DEVBUF, M_ZERO | M_WAITOK);
> > + atsr->flags = de->atsr.flags;
> > + atsr->segment = de->atsr.segment;
> > + acpidmar_parse_devscope(de, sizeof(de->atsr), de->atsr.segment,
> > +    &atsr->devices);
> > +
> > + TAILQ_INSERT_TAIL(&sc->sc_atsrs, atsr, link);
> > +}
> > +
> > +void
> > +acpidmar_init(struct acpidmar_softc *sc, struct acpi_dmar *dmar)
> > +{
> > + struct rmrr_softc *rmrr;
> > + struct iommu_softc *iommu;
> > + struct domain *dom;
> > + struct dmar_devlist *dl;
> > + union acpidmar_entry *de;
> > + int off, sid, rc;
> > +
> > + domain_map_page = domain_map_page_intel;
> > + printf(": hardware width: %d, intr_remap:%d x2apic_opt_out:%d\n",
> > +    dmar->haw+1,
> > +    !!(dmar->flags & 0x1),
> > +    !!(dmar->flags & 0x2));
> > + sc->sc_haw = dmar->haw+1;
> > + sc->sc_flags = dmar->flags;
> > +
> > + TAILQ_INIT(&sc->sc_drhds);
> > + TAILQ_INIT(&sc->sc_rmrrs);
> > + TAILQ_INIT(&sc->sc_atsrs);
> > +
> > + off = sizeof(*dmar);
> > + while (off < dmar->hdr.length) {
> > + de = (union acpidmar_entry *)((unsigned char *)dmar + off);
> > + switch (de->type) {
> > + case DMAR_DRHD:
> > + acpidmar_drhd(sc, de);
> > + break;
> > + case DMAR_RMRR:
> > + acpidmar_rmrr(sc, de);
> > + break;
> > + case DMAR_ATSR:
> > + acpidmar_atsr(sc, de);
> > + break;
> > + default:
> > + printf("DMAR: unknown %x\n", de->type);
> > + break;
> > + }
> > + off += de->length;
> > + }
> > +
> > + /* Pre-create domains for iommu devices */
> > + TAILQ_FOREACH(iommu, &sc->sc_drhds, link) {
> > + TAILQ_FOREACH(dl, &iommu->devices, link) {
> > + sid = mksid(dl->bus, dl->dp[0].device,
> > +    dl->dp[0].function);
> > + dom = acpidmar_pci_attach(sc, iommu->segment, sid, 0);
> > + if (dom != NULL) {
> > + printf("%.4x:%.2x:%.2x.%x iommu:%d did:%.4x\n",
> > +    iommu->segment, dl->bus, dl->dp[0].device, dl->dp[0].function,
> > +    iommu->id, dom->did);
> > + }
> > + }
> > + }
> > + /* Map passthrough pages for RMRR */
> > + TAILQ_FOREACH(rmrr, &sc->sc_rmrrs, link) {
> > + TAILQ_FOREACH(dl, &rmrr->devices, link) {
> > + sid = mksid(dl->bus, dl->dp[0].device,
> > +    dl->dp[0].function);
> > + dom = acpidmar_pci_attach(sc, rmrr->segment, sid, 0);
> > + if (dom != NULL) {
> > + printf("%s map ident: %.16llx %.16llx\n",
> > +    dom_bdf(dom), rmrr->start, rmrr->end);
> > + domain_map_pthru(dom, rmrr->start, rmrr->end);
> > + rc = extent_alloc_region(dom->iovamap,
> > +    rmrr->start, rmrr->end, EX_WAITOK);
> > + }
> > + }
> > + }
> > +}
> > +
> > +
> > +/*=====================================================
> > + * AMD Vi
> > + *=====================================================*/
> > +void acpiivrs_ivhd(struct acpidmar_softc *, struct acpi_ivhd *);
> > +int acpiivrs_iommu_match(struct pci_attach_args *);
> > +int ivhd_iommu_init(struct acpidmar_softc *, struct iommu_softc *,
> > + struct acpi_ivhd *);
> > +void iommu_ivhd_add(struct iommu_softc *, int, int, int);
> > +int _ivhd_issue_command(struct iommu_softc *, const struct ivhd_command *);
> > +void ivhd_show_event(struct iommu_softc *, struct ivhd_event *evt, int);
> > +int ivhd_issue_command(struct iommu_softc *, const struct ivhd_command *, int);
> > +int ivhd_invalidate_domain(struct iommu_softc *, int);
> > +void ivhd_intr_map(struct iommu_softc *, int);
> > +
> > +int
> > +acpiivhd_intr(void *ctx)
> > +{
> > + struct iommu_softc *iommu = ctx;
> > +
> > + if (!iommu->dte)
> > + return (0);
> > + ivhd_poll_events(iommu);
> > + return (1);
> > +}
> > +
> > +/* Setup interrupt for AMD */
> > +void
> > +ivhd_intr_map(struct iommu_softc *iommu, int devid) {
> > + pci_intr_handle_t ih;
> > +
> > + if (iommu->intr)
> > + return;
> > + ih.tag = pci_make_tag(NULL, sid_bus(devid), sid_dev(devid), sid_fun(devid));
> > + ih.line = APIC_INT_VIA_MSG;
> > + ih.pin = 0;
> > + iommu->intr = pci_intr_establish(NULL, ih, IPL_NET | IPL_MPSAFE,
> > + acpiivhd_intr, iommu, "amd_iommu");
> > + printf("amd iommu intr: %p\n", iommu->intr);
> > +}
> > +
> > +void _dumppte(struct pte_entry *pte, int lvl, vaddr_t va)
> > +{
> > + char *pfx[] = { "    ", "   ", "  ", " ", "" };
> > + uint64_t i, sh;
> > + struct pte_entry *npte;
> > +  
> > + for (i = 0; i < 512; i++) {
> > + sh = (i << (((lvl-1) * 9) + 12));
> > + if (pte[i].val & PTE_P) {
> > + if (lvl > 1) {
> > + npte = (void *)PMAP_DIRECT_MAP((pte[i].val & PTE_PADDR_MASK));
> > + printf("%slvl%d: %.16llx nxt:%llu\n", pfx[lvl], lvl,
> > +    pte[i].val, (pte[i].val >> 9) & 7);
> > + _dumppte(npte, lvl-1, va | sh);
> > + }
> > + else {
> > + printf("%slvl%d: %.16llx <- %.16llx \n", pfx[lvl], lvl,
> > +    pte[i].val, va | sh);
> > + }
> > + }
> > + }
> > +}
> > +
> > +void showpage(int sid, paddr_t paddr)
> > +{
> > + struct domain *dom;
> > + static int show = 0;
> > +
> > + if (show > 10)
> > + return;
> > + show++;
> > + dom = acpidmar_pci_attach(acpidmar_sc, 0, sid, 0);
> > + if (!dom)
> > + return;
> > + printf("DTE: %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
> > + hwdte[sid].dw0,
> > + hwdte[sid].dw1,
> > + hwdte[sid].dw2,
> > + hwdte[sid].dw3,
> > + hwdte[sid].dw4,
> > + hwdte[sid].dw5,
> > + hwdte[sid].dw6,
> > + hwdte[sid].dw7);
> > + _dumppte(dom->pte, 3, 0);
> > +}
> > +
> > +/* Display AMD IOMMU Error */
> > +void
> > +ivhd_show_event(struct iommu_softc *iommu, struct ivhd_event *evt, int head)
> > +{
> > + int type, sid, did, flag;
> > + uint64_t address;
> > +
> > + /* Get Device, Domain, Address and Type of event */
> > + sid  = __EXTRACT(evt->dw0, EVT_SID);
> > + type = __EXTRACT(evt->dw1, EVT_TYPE);
> > + did  = __EXTRACT(evt->dw1, EVT_DID);
> > + flag = __EXTRACT(evt->dw1, EVT_FLAG);
> > + address = _get64(&evt->dw2);
> > +
> > + printf("=== IOMMU Error[%.4x]: ", head);
> > + switch (type) {
> > + case ILLEGAL_DEV_TABLE_ENTRY: // ok
> > + printf("illegal dev table entry dev=%s addr=0x%.16llx %s, %s, %s, %s\n",
> > +   dmar_bdf(sid), address,
> > +   evt->dw1 & EVT_TR ? "translation" : "transaction",
> > +   evt->dw1 & EVT_RZ ? "reserved bit" : "invalid level",
> > +   evt->dw1 & EVT_RW ? "write" : "read",
> > +   evt->dw1 & EVT_I  ? "interrupt" : "memory");
> > + ivhd_showdte();
> > + break;
> > + case IO_PAGE_FAULT: // ok
> > + printf("io page fault dev=%s did=0x%.4x addr=0x%.16llx\n%s, %s, %s, %s, %s, %s\n",
> > +   dmar_bdf(sid), did, address,
> > +   evt->dw1 & EVT_TR ? "translation" : "transaction",
> > +   evt->dw1 & EVT_RZ ? "reserved bit" : "invalid level",
> > +   evt->dw1 & EVT_PE ? "no perm" : "perm",
> > +   evt->dw1 & EVT_RW ? "write" : "read",
> > +   evt->dw1 & EVT_PR ? "present" : "not present",
> > +   evt->dw1 & EVT_I  ? "interrupt" : "memory");
> > + ivhd_showdte();
> > + showpage(sid, address);
> > + break;
> > + case DEV_TAB_HARDWARE_ERROR: // ok
> > + printf("device table hardware error dev=%s addr=0x%.16llx %s, %s, %s\n",
> > +    dmar_bdf(sid), address,
> > +   evt->dw1 & EVT_TR ? "translation" : "transaction",
> > +   evt->dw1 & EVT_RW ? "write" : "read",
> > +   evt->dw1 & EVT_I  ? "interrupt" : "memory");
> > + ivhd_showdte();
> > + break;
> > + case PAGE_TAB_HARDWARE_ERROR:
> > + printf("page table hardware error dev=%s addr=0x%.16llx %s, %s, %s\n",
> > +   dmar_bdf(sid), address,
> > +   evt->dw1 & EVT_TR ? "translation" : "transaction",
> > +   evt->dw1 & EVT_RW ? "write" : "read",
> > +   evt->dw1 & EVT_I  ? "interrupt" : "memory");
> > + ivhd_showdte();
> > + break;
> > + case ILLEGAL_COMMAND_ERROR: // ok
> > + printf("illegal command addr=0x%.16llx\n", address);
> > + ivhd_showcmd(iommu);
> > + break;
> > + case COMMAND_HARDWARE_ERROR:
> > + printf("command hardware error addr=0x%.16llx flag=0x%.4x\n",
> > +   address, flag);
> > + ivhd_showcmd(iommu);
> > + break;
> > + case IOTLB_INV_TIMEOUT:
> > + printf("iotlb invalidation timeout dev=%s address=0x%.16llx\n",
> > +   dmar_bdf(sid), address);
> > + break;
> > + case INVALID_DEVICE_REQUEST:
> > + printf("invalid device request dev=%s addr=0x%.16llx flag=0x%.4x\n",
> > +   dmar_bdf(sid), address, flag);
> > + break;
> > + default:
> > + printf("unknown type=0x%.2x\n", type);
> > + break;
> > + }
> > + //ivhd_showdte();
> > + /* Clear old event */
> > + evt->dw0 = 0;
> > + evt->dw1 = 0;
> > + evt->dw2 = 0;
> > + evt->dw3 = 0;
> > +}
> > +
> > +/* AMD: Process IOMMU error from hardware */
> > +int
> > +ivhd_poll_events(struct iommu_softc *iommu)
> > +{
> > + uint32_t head, tail;
> > + int sz;
> > +
> > + sz = sizeof(struct ivhd_event);
> > + head = iommu_readl(iommu, EVT_HEAD_REG);
> > + tail = iommu_readl(iommu, EVT_TAIL_REG);
> > + if (head == tail) {
> > + /* No pending events */
> > + return (0);
> > + }
> > + while (head != tail) {
> > + ivhd_show_event(iommu, iommu->evt_tbl + head, head);
> > + head = (head + sz) % EVT_TBL_SIZE;
> > + }
> > + iommu_writel(iommu, EVT_HEAD_REG, head);
> > + return (0);
> > +}
> > +
> > +/* AMD: Issue command to IOMMU queue */
> > +int
> > +_ivhd_issue_command(struct iommu_softc *iommu, const struct ivhd_command *cmd)
> > +{
> > + u_long rf;
> > + uint32_t head, tail, next;
> > + int sz;
> > +
> > + head = iommu_readl(iommu, CMD_HEAD_REG);
> > + sz = sizeof(*cmd);
> > + rf = intr_disable();
> > + tail = iommu_readl(iommu, CMD_TAIL_REG);
> > + next = (tail + sz) % CMD_TBL_SIZE;
> > + if (next == head) {
> > + printf("FULL\n");
> > + /* Queue is full */
> > + intr_restore(rf);
> > + return -EBUSY;
> > + }
> > + memcpy(iommu->cmd_tbl + tail, cmd, sz);
> > + iommu_writel(iommu, CMD_TAIL_REG, next);
> > + intr_restore(rf);
> > + return (tail / sz);
> > +}
> > +
> > +#define IVHD_MAXDELAY 8
> > +
> > +int
> > +ivhd_issue_command(struct iommu_softc *iommu, const struct ivhd_command *cmd, int wait)
> > +{
> > + struct ivhd_command wq = { 0 };
> > + volatile uint64_t wv __aligned(16) = 0LL;
> > + paddr_t paddr;
> > + int rc, i;
> > + static int mi;
> > +
> > + rc = _ivhd_issue_command(iommu, cmd);
> > + if (rc >= 0 && wait) {
> > + /* Wait for previous commands to complete.
> > + * Store address of completion variable to command */
> > + pmap_extract(pmap_kernel(), (vaddr_t)&wv, &paddr);
> > + wq.dw0 = (paddr & ~0xF) | 0x1;
> > + wq.dw1 = (COMPLETION_WAIT << CMD_SHIFT) | ((paddr >> 32) & 0xFFFFF);
> > + wq.dw2 = 0xDEADBEEF;
> > + wq.dw3 = 0xFEEDC0DE;
> > +
> > + rc = _ivhd_issue_command(iommu, &wq);
> > + /* wv will change to value in dw2/dw3 when command is complete */
> > + for (i = 0; i < IVHD_MAXDELAY && !wv; i++) {
> > + DELAY(10 << i);
> > + }
> > + if (mi < i && mi != IVHD_MAXDELAY) {
> > + printf("maxdel: %d\n", i);
> > + mi = i;
> > + }
> > + if (i == IVHD_MAXDELAY) {
> > + printf("ivhd command timeout: %.8x %.8x %.8x %.8x wv:%llx idx:%x\n",
> > + cmd->dw0, cmd->dw1, cmd->dw2, cmd->dw3, wv, rc);
> > + }
> > + }
> > + return rc;
> > +
> > +}
> > +
> > +/* AMD: Flush changes to Device Table Entry for a specific domain */
> > +int ivhd_flush_devtab(struct iommu_softc *iommu, int did)
> > +{
> > + struct ivhd_command cmd = { .dw0 = did, .dw1 = INVALIDATE_DEVTAB_ENTRY << CMD_SHIFT };
> > + return ivhd_issue_command(iommu, &cmd, 1);
> > +}
> > +
> > +/* AMD: Invalidate all IOMMU device and page tables */
> > +int ivhd_invalidate_iommu_all(struct iommu_softc *iommu)
> > +{
> > + struct ivhd_command cmd = { .dw1 = INVALIDATE_IOMMU_ALL << CMD_SHIFT };
> > + return ivhd_issue_command(iommu, &cmd, 0);
> > +}
> > +
> > +/* AMD: Invalidate interrupt remapping */
> > +int ivhd_invalidate_interrupt_table(struct iommu_softc *iommu, int did)
> > +{
> > + struct ivhd_command cmd = { .dw0 = did, .dw1 = INVALIDATE_INTERRUPT_TABLE << CMD_SHIFT };
> > + return ivhd_issue_command(iommu, &cmd, 0);
> > +}
> > +
> > +/* AMD: Invalidate all page tables in a domain */
> > +int ivhd_invalidate_domain(struct iommu_softc *iommu, int did)
> > +{
> > + struct ivhd_command cmd = { .dw1 = did | (INVALIDATE_IOMMU_PAGES << CMD_SHIFT) };
> > +
> > + cmd.dw2 = 0xFFFFF000 | 0x3;
> > + cmd.dw3 = 0x7FFFFFFF;
> > + return ivhd_issue_command(iommu, &cmd, 1);
> > +}
> > +
> > +/* AMD: Display Registers */
> > +void ivhd_showit(struct iommu_softc *iommu)
> > +{
> > + printf("---- dt:%.16llx cmd:%.16llx evt:%.16llx ctl:%.16llx sts:%.16llx\n",
> > + iommu_readq(iommu, DEV_TAB_BASE_REG),
> > + iommu_readq(iommu, CMD_BASE_REG),
> > + iommu_readq(iommu, EVT_BASE_REG),
> > + iommu_readq(iommu, IOMMUCTL_REG),
> > + iommu_readq(iommu, IOMMUSTS_REG));
> > + printf("---- cmd queue:%.16llx %.16llx evt queue:%.16llx %.16llx\n",
> > + iommu_readq(iommu, CMD_HEAD_REG),
> > + iommu_readq(iommu, CMD_TAIL_REG),
> > + iommu_readq(iommu, EVT_HEAD_REG),
> > + iommu_readq(iommu, EVT_TAIL_REG));
> > +}
> > +
> > +/* AMD: Generate Errors to test event handler */
> > +void ivhd_checkerr(struct iommu_softc *iommu);
> > +void ivhd_checkerr(struct iommu_softc *iommu)
> > +{
> > + struct ivhd_command cmd = { -1, -1, -1, -1 };
> > +
> > + /* Generate ILLEGAL DEV TAB entry? */
> > + iommu->dte[0x2303].dw0 = -1;      // invalid
> > + iommu->dte[0x2303].dw2 = 0x1234;  // domain
> > + iommu->dte[0x2303].dw7 = -1;      // reserved
> > + ivhd_flush_devtab(iommu, 0x1234);
> > + ivhd_poll_events(iommu);
> > +
> > + /* Generate ILLEGAL_COMMAND_ERROR : ok */
> > + ivhd_issue_command(iommu, &cmd, 0);
> > + ivhd_poll_events(iommu);
> > +
> > + /* Generate page hardware error */
> > +}
> > +
> > +/* AMD: Show Device Table Entry */
> > +void ivhd_showdte(void)
> > +{
> > + int i;
> > +
> > + for (i = 0; i < 65536; i++) {
> > + if (hwdte[i].dw0) {
> > + printf("%.2x:%.2x.%x: %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
> > + i >> 8, (i >> 3) & 0x1F, i & 0x7,
> > + hwdte[i].dw0, hwdte[i].dw1,
> > + hwdte[i].dw2, hwdte[i].dw3,
> > + hwdte[i].dw4, hwdte[i].dw5,
> > + hwdte[i].dw6, hwdte[i].dw7);
> > + }
> > + }
> > +}
> > +
> > +/* AMD: Show command entries */
> > +void ivhd_showcmd(struct iommu_softc *iommu)
> > +{
> > + struct ivhd_command *ihd;
> > + paddr_t phd;
> > + int i;
> > +
> > + ihd = iommu->cmd_tbl;
> > + phd = iommu_readq(iommu, CMD_BASE_REG) & CMD_BASE_MASK;
> > + for (i = 0; i < 4096 / 128; i++) {
> > + printf("%.2x: %.16llx %.8x %.8x %.8x %.8x\n", i,
> > + (uint64_t)phd + i * sizeof(*ihd),
> > + ihd[i].dw0,ihd[i].dw1,ihd[i].dw2,ihd[i].dw3);
> > + }
> > +}
> > +
> > +#define _c(x) (int)((iommu->ecap >> x ##_SHIFT) & x ## _MASK)
> > +
> > +/* AMD: Initialize IOMMU */
> > +int
> > +ivhd_iommu_init(struct acpidmar_softc *sc, struct iommu_softc *iommu,
> > + struct acpi_ivhd *ivhd)
> > +{
> > + static int niommu;
> > + paddr_t paddr;
> > + uint64_t ov;
> > +
> > + if (sc == NULL || iommu == NULL || ivhd == NULL) {
> > + printf("Bad pointer to iommu_init!\n");
> > + return -1;
> > + }
> > + if (_bus_space_map(sc->sc_memt, ivhd->address, 0x80000, 0, &iommu->ioh) != 0) {
> > + printf("Bus Space Map fails\n");
> > + return -1;
> > + }
> > + TAILQ_INIT(&iommu->domains);
> > + TAILQ_INIT(&iommu->devices);
> > +
> > + /* Setup address width and number of domains */
> > + iommu->id = ++niommu;
> > + iommu->iot = sc->sc_memt;
> > + iommu->mgaw = 48;
> > + iommu->agaw = 48;
> > + iommu->flags = 1;
> > + iommu->segment = 0;
> > + iommu->ndoms = 256;
> > +
> > + iommu->ecap = iommu_readq(iommu, EXTFEAT_REG);
> > + printf("ecap = %.16llx\n", iommu->ecap);
> > + printf("%s%s%s%s%s%s%s%s\n",
> > + iommu->ecap & EFR_PREFSUP ? "pref " : "",
> > + iommu->ecap & EFR_PPRSUP  ? "ppr " : "",
> > + iommu->ecap & EFR_NXSUP   ? "nx " : "",
> > + iommu->ecap & EFR_GTSUP   ? "gt " : "",
> > + iommu->ecap & EFR_IASUP   ? "ia " : "",
> > + iommu->ecap & EFR_GASUP   ? "ga " : "",
> > + iommu->ecap & EFR_HESUP   ? "he " : "",
> > + iommu->ecap & EFR_PCSUP   ? "pc " : "");
> > + printf("hats:%x gats:%x glxsup:%x smif:%x smifrc:%x gam:%x\n",
> > + _c(EFR_HATS), _c(EFR_GATS), _c(EFR_GLXSUP), _c(EFR_SMIFSUP),
> > + _c(EFR_SMIFRC), _c(EFR_GAMSUP));
> > +
> > + /* Turn off iommu */
> > + ov = iommu_readq(iommu, IOMMUCTL_REG);
> > + iommu_writeq(iommu, IOMMUCTL_REG, ov & ~(CTL_IOMMUEN | CTL_COHERENT |
> > + CTL_HTTUNEN | CTL_RESPASSPW | CTL_PASSPW | CTL_ISOC));
> > +
> > + /* Enable intr */
> > + sid_flag[ivhd->devid] |= SID_INVALID;
> > + ivhd_intr_map(iommu, ivhd->devid);
> > +
> > + /* Setup command buffer with 4k buffer (128 entries) */
> > + iommu->cmd_tbl = iommu_alloc_page(iommu, &paddr);
> > + iommu_writeq(iommu, CMD_BASE_REG, (paddr & CMD_BASE_MASK) | CMD_TBL_LEN_4K);
> > + iommu_writel(iommu, CMD_HEAD_REG, 0x00);
> > + iommu_writel(iommu, CMD_TAIL_REG, 0x00);
> > + iommu->cmd_tblp = paddr;
> > +
> > + /* Setup event log with 4k buffer (128 entries) */
> > + iommu->evt_tbl = iommu_alloc_page(iommu, &paddr);
> > + iommu_writeq(iommu, EVT_BASE_REG, (paddr & EVT_BASE_MASK) | EVT_TBL_LEN_4K);
> > + iommu_writel(iommu, EVT_HEAD_REG, 0x00);
> > + iommu_writel(iommu, EVT_TAIL_REG, 0x00);
> > + iommu->evt_tblp = paddr;
> > +
> > + /* Setup device table
> > + * 1 entry per source ID (bus:device:function - 64k entries)
> > + */
> > + iommu->dte = hwdte;
> > + pmap_extract(pmap_kernel(), (vaddr_t)iommu->dte, &paddr);
> > + iommu_writeq(iommu, DEV_TAB_BASE_REG, (paddr & DEV_TAB_MASK) | DEV_TAB_LEN);
> > +
> > + /* Enable IOMMU */
> > + ov |= (CTL_IOMMUEN | CTL_EVENTLOGEN | CTL_CMDBUFEN | CTL_EVENTINTEN | CTL_COMWAITINTEN);
> > + if (ivhd->flags & IVHD_COHERENT)
> > + ov |= CTL_COHERENT;
> > + if (ivhd->flags & IVHD_HTTUNEN)
> > + ov |= CTL_HTTUNEN;
> > + if (ivhd->flags & IVHD_RESPASSPW)
> > + ov |= CTL_RESPASSPW;
> > + if (ivhd->flags & IVHD_PASSPW)
> > + ov |= CTL_PASSPW;
> > + if (ivhd->flags & IVHD_ISOC)
> > + ov |= CTL_ISOC;
> > + ov &= ~(CTL_INVTIMEOUT_MASK << CTL_INVTIMEOUT_SHIFT);
> > + ov |=  (CTL_INVTIMEOUT_1MS  << CTL_INVTIMEOUT_SHIFT);
> > + iommu_writeq(iommu, IOMMUCTL_REG, ov);
> > +
> > + ivhd_invalidate_iommu_all(iommu);
> > + //ivhd_checkerr(iommu);
> > +
> > + TAILQ_INSERT_TAIL(&sc->sc_drhds, iommu, link);
> > + return 0;
> > +}
> > +
> > +void
> > +iommu_ivhd_add(struct iommu_softc *iommu, int start, int end, int cfg)
> > +{
> > + struct ivhd_devlist *idev;
> > +
> > + idev = malloc(sizeof(*idev), M_DEVBUF, M_ZERO | M_WAITOK);
> > + idev->start_id = start;
> > + idev->end_id = end;
> > + idev->cfg = cfg;
> > +}
> > +
> > +int acpiivrs_iommu_match(struct pci_attach_args *pa)
> > +{
> > + int b,d,f;
> > +
> > + pci_decompose_tag(pa->pa_pc, pa->pa_tag, &b, &d, &f);
> > + printf(" matchdev: %d.%d.%d\n", b, d, f);
> > + if (PCI_CLASS(pa->pa_class) == PCI_CLASS_SYSTEM &&
> > +    PCI_SUBCLASS(pa->pa_class) == PCI_SUBCLASS_SYSTEM_IOMMU) {
> > + printf("iziommu\n");
> > + return (1);
> > + }
> > + return (0);
> > +}
> > +
> > +void
> > +acpiivrs_ivhd(struct acpidmar_softc *sc, struct acpi_ivhd *ivhd)
> > +{
> > + struct iommu_softc *iommu;
> > + struct acpi_ivhd_ext *ext;
> > + union acpi_ivhd_entry *ie;
> > + int start, off, dte, all_dte = 0;
> > +
> > + if (ivhd->type == IVRS_IVHD_EXT) {
> > + ext = (struct acpi_ivhd_ext *)ivhd;
> > + printf("ivhd: %.2x %.2x %.4x %.4x:%s %.4x %.16llx %.4x %.8x %.16llx\n",
> > +       ext->type, ext->flags, ext->length,
> > +             ext->segment, dmar_bdf(ext->devid), ext->cap,
> > +             ext->address, ext->info,
> > +       ext->attrib, ext->efr);
> > + if (ext->flags & IVHD_PPRSUP)
> > + printf(" PPRSup");
> > + if (ext->flags & IVHD_PREFSUP)
> > + printf(" PreFSup");
> > + if (ext->flags & IVHD_COHERENT)
> > + printf(" Coherent");
> > + if (ext->flags & IVHD_IOTLB)
> > + printf(" Iotlb");
> > + if (ext->flags & IVHD_ISOC)
> > + printf(" ISoc");
> > + if (ext->flags & IVHD_RESPASSPW)
> > + printf(" ResPassPW");
> > + if (ext->flags & IVHD_PASSPW)
> > + printf(" PassPW");
> > + if (ext->flags & IVHD_HTTUNEN)
> > + printf( " HtTunEn");
> > + if (ext->flags)
> > + printf("\n");
> > + off = sizeof(*ext);
> > + iommu = malloc(sizeof(*iommu), M_DEVBUF, M_ZERO|M_WAITOK);
> > + ivhd_iommu_init(sc, iommu, ivhd);
> > + } else {
> > + printf("ivhd: %.2x %.2x %.4x %.4x:%s %.4x %.16llx %.4x %.8x\n",
> > +       ivhd->type, ivhd->flags, ivhd->length,
> > +             ivhd->segment, dmar_bdf(ivhd->devid), ivhd->cap,
> > +             ivhd->address, ivhd->info,
> > +       ivhd->feature);
> > + if (ivhd->flags & IVHD_PPRSUP)
> > + printf(" PPRSup");
> > + if (ivhd->flags & IVHD_PREFSUP)
> > + printf(" PreFSup");
> > + if (ivhd->flags & IVHD_COHERENT)
> > + printf(" Coherent");
> > + if (ivhd->flags & IVHD_IOTLB)
> > + printf(" Iotlb");
> > + if (ivhd->flags & IVHD_ISOC)
> > + printf(" ISoc");
> > + if (ivhd->flags & IVHD_RESPASSPW)
> > + printf(" ResPassPW");
> > + if (ivhd->flags & IVHD_PASSPW)
> > + printf(" PassPW");
> > + if (ivhd->flags & IVHD_HTTUNEN)
> > + printf( " HtTunEn");
> > + if (ivhd->flags)
> > + printf("\n");
> > + off = sizeof(*ivhd);
> > + }
> > + while (off < ivhd->length) {
> > + ie = (void *)ivhd + off;
> > + switch (ie->type) {
> > + case IVHD_ALL:
> > + all_dte = ie->all.data;
> > + printf(" ALL %.4x\n", dte);
> > + off += sizeof(ie->all);
> > + break;
> > + case IVHD_SEL:
> > + dte = ie->sel.data;
> > + printf(" SELECT: %s %.4x\n", dmar_bdf(ie->sel.devid), dte);
> > + off += sizeof(ie->sel);
> > + break;
> > + case IVHD_SOR:
> > + dte = ie->sor.data;
> > + start = ie->sor.devid;
> > + printf(" SOR: %s %.4x\n", dmar_bdf(start), dte);
> > + off += sizeof(ie->sor);
> > + break;
> > + case IVHD_EOR:
> > + printf(" EOR: %s\n", dmar_bdf(ie->eor.devid));
> > + off += sizeof(ie->eor);
> > + break;
> > + case IVHD_ALIAS_SEL:
> > + dte = ie->alias.data;
> > + printf(" ALIAS: src=%s: ", dmar_bdf(ie->alias.srcid));
> > + printf(" %s %.4x\n", dmar_bdf(ie->alias.devid), dte);
> > + off += sizeof(ie->alias);
> > + break;
> > + case IVHD_ALIAS_SOR:
> > + dte = ie->alias.data;
> > + printf(" ALIAS_SOR: %s %.4x ", dmar_bdf(ie->alias.devid), dte);
> > + printf(" src=%s\n", dmar_bdf(ie->alias.srcid));
> > + off += sizeof(ie->alias);
> > + break;
> > + case IVHD_EXT_SEL:
> > + dte = ie->ext.data;
> > + printf(" EXT SEL: %s %.4x %.8x\n", dmar_bdf(ie->ext.devid),
> > + dte, ie->ext.extdata);
> > + off += sizeof(ie->ext);
> > + break;
> > + case IVHD_EXT_SOR:
> > + dte = ie->ext.data;
> > + printf(" EXT SOR: %s %.4x %.8x\n", dmar_bdf(ie->ext.devid),
> > +       dte, ie->ext.extdata);
> > + off += sizeof(ie->ext);
> > + break;
> > + case IVHD_SPECIAL:
> > + printf(" SPECIAL\n");
> > + off += sizeof(ie->special);
> > + break;
> > + default:
> > + printf(" 2:unknown %x\n", ie->type);
> > + off = ivhd->length;
> > + break;
> > + }
> > + }
> > +}
> > +
> > +void
> > +acpiivrs_init(struct acpidmar_softc *sc, struct acpi_ivrs *ivrs)
> > +{
> > + union acpi_ivrs_entry *ie;
> > + int off;
> > +
> > + domain_map_page = domain_map_page_amd;
> > + printf("IVRS Version: %d\n", ivrs->hdr.revision);
> > + printf(" VA Size: %d\n", (ivrs->ivinfo >> IVRS_VASIZE_SHIFT) & IVRS_VASIZE_MASK);
> > + printf(" PA Size: %d\n", (ivrs->ivinfo >> IVRS_PASIZE_SHIFT) & IVRS_PASIZE_MASK);
> > +
> > + TAILQ_INIT(&sc->sc_drhds);
> > + TAILQ_INIT(&sc->sc_rmrrs);
> > + TAILQ_INIT(&sc->sc_atsrs);
> > +
> > + printf("======== IVRS\n");
> > + off = sizeof(*ivrs);
> > + while (off < ivrs->hdr.length) {
> > + ie = (void *)ivrs + off;
> > + switch (ie->type) {
> > + case IVRS_IVHD:
> > + case IVRS_IVHD_EXT:
> > + acpiivrs_ivhd(sc, &ie->ivhd);
> > + break;
> > + case IVRS_IVMD_ALL:
> > + case IVRS_IVMD_SPECIFIED:
> > + case IVRS_IVMD_RANGE:
> > + printf("ivmd\n");
> > + break;
> > + default:
> > + printf("1:unknown: %x\n", ie->type);
> > + break;
> > + }
> > + off += ie->length;
> > + }
> > + printf("======== End IVRS\n");
> > +}
> > +
> > +static int
> > +acpiivhd_activate(struct iommu_softc *iommu, int act)
> > +{
> > + switch (act) {
> > + case DVACT_SUSPEND:
> > + iommu->flags |= IOMMU_FLAGS_SUSPEND;
> > + break;
> > + case DVACT_RESUME:
> > + iommu->flags &= ~IOMMU_FLAGS_SUSPEND;
> > + break;
> > + }
> > + return (0);
> > +}
> > +
> > +int
> > +acpidmar_activate(struct device *self, int act)
> > +{
> > + struct acpidmar_softc *sc = (struct acpidmar_softc *)self;
> > + struct iommu_softc *iommu;
> > +
> > + printf("called acpidmar_activate %d %p\n", act, sc);
> > +
> > + if (sc == NULL) {
> > + return (0);
> > + }
> > +
> > + switch (act) {
> > + case DVACT_RESUME:
> > + TAILQ_FOREACH(iommu, &sc->sc_drhds, link) {
> > + printf("iommu%d resume\n", iommu->id);
> > + if (iommu->dte) {
> > + acpiivhd_activate(iommu, act);
> > + continue;
> > + }
> > + iommu_flush_write_buffer(iommu);
> > + iommu_set_rtaddr(iommu, iommu->rtaddr);
> > + iommu_writel(iommu, DMAR_FEDATA_REG, iommu->fedata);
> > + iommu_writel(iommu, DMAR_FEADDR_REG, iommu->feaddr);
> > + iommu_writel(iommu, DMAR_FEUADDR_REG,
> > +    iommu->feaddr >> 32);
> > + if ((iommu->flags & (IOMMU_FLAGS_BAD|IOMMU_FLAGS_SUSPEND)) ==
> > +    IOMMU_FLAGS_SUSPEND) {
> > + printf("enable wakeup translation\n");
> > + iommu_enable_translation(iommu, 1);
> > + }
> > + iommu_showcfg(iommu, -1);
> > + }
> > + break;
> > + case DVACT_SUSPEND:
> > + TAILQ_FOREACH(iommu, &sc->sc_drhds, link) {
> > + printf("iommu%d suspend\n", iommu->id);
> > + if (iommu->flags & IOMMU_FLAGS_BAD)
> > + continue;
> > + if (iommu->dte) {
> > + acpiivhd_activate(iommu, act);
> > + continue;
> > + }
> > + iommu->flags |= IOMMU_FLAGS_SUSPEND;
> > + iommu_enable_translation(iommu, 0);
> > + iommu_showcfg(iommu, -1);
> > + }
> > + break;
> > + }
> > + return (0);
> > +}
> > +
> > +void
> > +acpidmar_sw(int act)
> > +{
> > + acpidmar_activate((struct device *)acpidmar_sc, act);
> > +}
> > +
> > +int
> > +acpidmar_match(struct device *parent, void *match, void *aux)
> > +{
> > + struct acpi_attach_args *aaa = aux;
> > + struct acpi_table_header *hdr;
> > +
> > + /* If we do not have a table, it is not us */
> > + if (aaa->aaa_table == NULL)
> > + return (0);
> > +
> > + /* If it is an DMAR table, we can attach */
> > + hdr = (struct acpi_table_header *)aaa->aaa_table;
> > + if (memcmp(hdr->signature, DMAR_SIG, sizeof(DMAR_SIG) - 1) == 0)
> > + return (1);
> > + if (memcmp(hdr->signature, IVRS_SIG, sizeof(IVRS_SIG) - 1) == 0)
> > + return (1);
> > +
> > + return (0);
> > +}
> > +
> > +void
> > +acpidmar_attach(struct device *parent, struct device *self, void *aux)
> > +{
> > + struct acpidmar_softc *sc = (void *)self;
> > + struct acpi_attach_args *aaa = aux;
> > + struct acpi_dmar *dmar = (struct acpi_dmar *)aaa->aaa_table;
> > + struct acpi_ivrs        *ivrs = (struct acpi_ivrs *)aaa->aaa_table;
> > + struct acpi_table_header *hdr;
> > +
> > + hdr = (struct acpi_table_header *)aaa->aaa_table;
> > + sc->sc_memt = aaa->aaa_memt;
> > + if (memcmp(hdr->signature, DMAR_SIG, sizeof(DMAR_SIG) - 1) == 0) {
> > + acpidmar_sc = sc;
> > + acpidmar_init(sc, dmar);
> > + }
> > + if (memcmp(hdr->signature, IVRS_SIG, sizeof(IVRS_SIG) - 1) == 0) {
> > + acpidmar_sc = sc;
> > + acpiivrs_init(sc, ivrs);
> > + }
> > +}
> > +
> > +/* Interrupt shiz */
> > +void acpidmar_msi_hwmask(struct pic *, int);
> > +void acpidmar_msi_hwunmask(struct pic *, int);
> > +void acpidmar_msi_addroute(struct pic *, struct cpu_info *, int, int, int);
> > +void acpidmar_msi_delroute(struct pic *, struct cpu_info *, int, int, int);
> > +
> > +void
> > +acpidmar_msi_hwmask(struct pic *pic, int pin)
> > +{
> > + struct iommu_pic *ip = (void *)pic;
> > + struct iommu_softc *iommu = ip->iommu;
> > +
> > + printf("msi_hwmask\n");
> > +
> > + mtx_enter(&iommu->reg_lock);
> > +
> > + iommu_writel(iommu, DMAR_FECTL_REG, FECTL_IM);
> > + iommu_readl(iommu, DMAR_FECTL_REG);
> > +
> > + mtx_leave(&iommu->reg_lock);
> > +}
> > +
> > +void
> > +acpidmar_msi_hwunmask(struct pic *pic, int pin)
> > +{
> > + struct iommu_pic *ip = (void *)pic;
> > + struct iommu_softc *iommu = ip->iommu;
> > +
> > + printf("msi_hwunmask\n");
> > +
> > + mtx_enter(&iommu->reg_lock);
> > +
> > + iommu_writel(iommu, DMAR_FECTL_REG, 0);
> > + iommu_readl(iommu, DMAR_FECTL_REG);
> > +
> > + mtx_leave(&iommu->reg_lock);
> > +}
> > +
> > +void
> > +acpidmar_msi_addroute(struct pic *pic, struct cpu_info *ci, int pin, int vec,
> > +    int type)
> > +{
> > + struct iommu_pic *ip = (void *)pic;
> > + struct iommu_softc *iommu = ip->iommu;
> > +
> > + mtx_enter(&iommu->reg_lock);
> > +
> > + iommu->fedata = vec;
> > + iommu->feaddr = 0xfee00000L | (ci->ci_apicid << 12);
> > + iommu_writel(iommu, DMAR_FEDATA_REG, vec);
> > + iommu_writel(iommu, DMAR_FEADDR_REG, iommu->feaddr);
> > + iommu_writel(iommu, DMAR_FEUADDR_REG, iommu->feaddr >> 32);
> > +
> > + mtx_leave(&iommu->reg_lock);
> > +}
> > +
> > +void
> > +acpidmar_msi_delroute(struct pic *pic, struct cpu_info *ci, int pin, int vec,
> > +    int type)
> > +{
> > + printf("msi_delroute\n");
> > +}
> > +
> > +void *
> > +acpidmar_intr_establish(void *ctx, int level, int (*func)(void *),
> > +    void *arg, const char *what)
> > +{
> > + struct iommu_softc *iommu = ctx;
> > + struct pic *pic;
> > +
> > + pic = &iommu->pic.pic;
> > + iommu->pic.iommu = iommu;
> > +
> > + strlcpy(pic->pic_dev.dv_xname, "dmarpic",
> > + sizeof(pic->pic_dev.dv_xname));
> > + pic->pic_type = PIC_MSI;
> > + pic->pic_hwmask = acpidmar_msi_hwmask;
> > + pic->pic_hwunmask = acpidmar_msi_hwunmask;
> > + pic->pic_addroute = acpidmar_msi_addroute;
> > + pic->pic_delroute = acpidmar_msi_delroute;
> > + pic->pic_edge_stubs = ioapic_edge_stubs;
> > +#ifdef MULTIPROCESSOR
> > + mtx_init(&pic->pic_mutex, level);
> > +#endif
> > +
> > + return intr_establish(-1, pic, 0, IST_PULSE, level, NULL, func, arg, what);
> > +}
> > +
> > +int
> > +acpidmar_intr(void *ctx)
> > +{
> > + struct iommu_softc *iommu = ctx;
> > + struct fault_entry fe;
> > + static struct fault_entry ofe;
> > + int fro, nfr, fri, i;
> > + uint32_t sts;
> > +
> > + //splassert(IPL_HIGH);
> > +
> > + if (!(iommu->gcmd & GCMD_TE)) {
> > + return (1);
> > + }
> > + mtx_enter(&iommu->reg_lock);
> > + sts = iommu_readl(iommu, DMAR_FECTL_REG);
> > + sts = iommu_readl(iommu, DMAR_FSTS_REG);
> > +
> > + if (!(sts & FSTS_PPF)) {
> > + mtx_leave(&iommu->reg_lock);
> > + return (1);
> > + }
> > +
> > + nfr = cap_nfr(iommu->cap);
> > + fro = cap_fro(iommu->cap);
> > + fri = (sts >> FSTS_FRI_SHIFT) & FSTS_FRI_MASK;
> > + for (i = 0; i < nfr; i++) {
> > + fe.hi = iommu_readq(iommu, fro + (fri*16) + 8);
> > + if (!(fe.hi & FRCD_HI_F))
> > + break;
> > +
> > + fe.lo = iommu_readq(iommu, fro + (fri*16));
> > + if (ofe.hi != fe.hi || ofe.lo != fe.lo) {
> > + iommu_showfault(iommu, fri, &fe);
> > + ofe.hi = fe.hi;
> > + ofe.lo = fe.lo;
> > + }
> > + fri = (fri + 1) % nfr;
> > + }
> > +
> > + iommu_writel(iommu, DMAR_FSTS_REG, FSTS_PFO | FSTS_PPF);
> > +
> > + mtx_leave(&iommu->reg_lock);
> > +
> > + return (1);
> > +}
> > +
> > +const char *vtd_faults[] = {
> > + "Software",
> > + "Root Entry Not Present", /* ok (rtaddr + 4096) */
> > + "Context Entry Not Present", /* ok (no CTX_P) */
> > + "Context Entry Invalid", /* ok (tt = 3) */
> > + "Address Beyond MGAW",
> > + "Write", /* ok */
> > + "Read", /* ok */
> > + "Paging Entry Invalid", /* ok */
> > + "Root Table Invalid",
> > + "Context Table Invalid",
> > + "Root Entry Reserved",          /* ok (root.lo |= 0x4) */
> > + "Context Entry Reserved",
> > + "Paging Entry Reserved",
> > + "Context Entry TT",
> > + "Reserved",
> > +};
> > +
> > +void iommu_showpte(uint64_t, int, uint64_t);
> > +
> > +void
> > +iommu_showpte(uint64_t ptep, int lvl, uint64_t base)
> > +{
> > + uint64_t nb, pb, i;
> > + struct pte_entry *pte;
> > +
> > + pte = (void *)PMAP_DIRECT_MAP(ptep);
> > + for (i = 0; i < 512; i++) {
> > + if (!(pte[i].val & PTE_P))
> > + continue;
> > + nb = base + (i << lvl);
> > + pb = pte[i].val & ~VTD_PAGE_MASK;
> > + if(lvl == VTD_LEVEL0) {
> > + printf("   %3llx %.16llx = %.16llx %c%c %s\n",
> > +    i, nb, pb,
> > +    pte[i].val == PTE_R ? 'r' : ' ',
> > +    pte[i].val & PTE_W ? 'w' : ' ',
> > +    (nb == pb) ? " ident" : "");
> > + if (nb == pb)
> > + return;
> > + } else {
> > + iommu_showpte(pb, lvl - VTD_STRIDE_SIZE, nb);
> > + }
> > + }
> > +}
> > +
> > +void
> > +iommu_showcfg(struct iommu_softc *iommu, int sid)
> > +{
> > + int i, j, sts, cmd;
> > + struct context_entry *ctx;
> > + pcitag_t tag;
> > + pcireg_t clc;
> > +
> > + cmd = iommu_readl(iommu, DMAR_GCMD_REG);
> > + sts = iommu_readl(iommu, DMAR_GSTS_REG);
> > + printf("iommu%d: flags:%d root pa:%.16llx %s %s %s %.8x %.8x\n",
> > +    iommu->id, iommu->flags, iommu_readq(iommu, DMAR_RTADDR_REG),
> > +    sts & GSTS_TES ? "enabled" : "disabled",
> > +    sts & GSTS_QIES ? "qi" : "ccmd",
> > +    sts & GSTS_IRES ? "ir" : "",
> > +    cmd, sts);
> > + for (i = 0; i < 256; i++) {
> > + if (!root_entry_is_valid(&iommu->root[i])) {
> > + continue;
> > + }
> > + for (j = 0; j < 256; j++) {
> > + ctx = iommu->ctx[i] + j;
> > + if (!context_entry_is_valid(ctx)) {
> > + continue;
> > + }
> > + tag = pci_make_tag(NULL, i, (j >> 3), j & 0x7);
> > + clc = pci_conf_read(NULL, tag, 0x08) >> 8;
> > + printf("  %.2x:%.2x.%x lvl:%d did:%.4x tt:%d ptep:%.16llx flag:%x cc:%.6x\n",
> > +    i, (j >> 3), j & 7,
> > +    context_address_width(ctx),
> > +    context_domain_id(ctx),
> > +    context_translation_type(ctx),
> > +    context_pte(ctx),
> > +    context_user(ctx),
> > +    clc);
> > +#if 0
> > + /* dump pagetables */
> > + iommu_showpte(ctx->lo & ~VTD_PAGE_MASK, iommu->agaw -
> > +    VTD_STRIDE_SIZE, 0);
> > +#endif
> > + }
> > + }
> > +}
> > +
> > +void
> > +iommu_showfault(struct iommu_softc *iommu, int fri, struct fault_entry *fe)
> > +{
> > + int bus, dev, fun, type, fr, df;
> > + bios_memmap_t *im;
> > + const char *mapped;
> > +
> > + if (!(fe->hi & FRCD_HI_F))
> > + return;
> > + type = (fe->hi & FRCD_HI_T) ? 'r' : 'w';
> > + fr = (fe->hi >> FRCD_HI_FR_SHIFT) & FRCD_HI_FR_MASK;
> > + bus = (fe->hi >> FRCD_HI_BUS_SHIFT) & FRCD_HI_BUS_MASK;
> > + dev = (fe->hi >> FRCD_HI_DEV_SHIFT) & FRCD_HI_DEV_MASK;
> > + fun = (fe->hi >> FRCD_HI_FUN_SHIFT) & FRCD_HI_FUN_MASK;
> > + df  = (fe->hi >> FRCD_HI_FUN_SHIFT) & 0xFF;
> > + iommu_showcfg(iommu, mksid(bus,dev,fun));
> > + if (!iommu->ctx[bus]) {
> > + /* Bus is not initialized */
> > + mapped = "nobus";
> > + } else if (!context_entry_is_valid(&iommu->ctx[bus][df])) {
> > + /* DevFn not initialized */
> > + mapped = "nodevfn";
> > + } else if (context_user(&iommu->ctx[bus][df]) != 0xA) {
> > + /* no bus_space_map */
> > + mapped = "nomap";
> > + } else {
> > + /* bus_space_map */
> > + mapped = "mapped";
> > + }
> > + printf("fri%d: dmar: %.2x:%.2x.%x %s error at %llx fr:%d [%s] iommu:%d [%s]\n",
> > +    fri, bus, dev, fun,
> > +    type == 'r' ? "read" : "write",
> > +    fe->lo,
> > +    fr, fr <= 13 ? vtd_faults[fr] : "unknown",
> > +    iommu->id,
> > +    mapped);
> > + for (im = bios_memmap; im->type != BIOS_MAP_END; im++) {
> > + if ((im->type == BIOS_MAP_RES) &&
> > +    (im->addr <= fe->lo) &&
> > +    (fe->lo <= im->addr+im->size)) {
> > + printf("mem in e820.reserved\n");
> > + }
> > + }
> > +#ifdef DDB
> > + if (acpidmar_ddb)
> > + db_enter();
> > +#endif
> > +}
> > +
> > +
> > diff --git a/sys/dev/acpi/acpidmar.h b/sys/dev/acpi/acpidmar.h
> > new file mode 100644
> > index 000000000..33659ecaf
> > --- /dev/null
> > +++ b/sys/dev/acpi/acpidmar.h
> > @@ -0,0 +1,534 @@
> > +/*
> > + * Copyright (c) 2015 Jordan Hargrave <[hidden email]>
> > + *
> > + * Permission to use, copy, modify, and distribute this software for any
> > + * purpose with or without fee is hereby granted, provided that the above
> > + * copyright notice and this permission notice appear in all copies.
> > + *
> > + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
> > + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
> > + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
> > + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
> > + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
> > + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
> > + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
> > + */
> > +
> > +#ifndef _DEV_ACPI_DMARREG_H_
> > +#define _DEV_ACPI_DMARREG_H_
> > +
> > +#define VTD_STRIDE_MASK 0x1FF
> > +#define VTD_STRIDE_SIZE 9
> > +#define VTD_PAGE_SIZE   4096
> > +#define VTD_PAGE_MASK   0xFFF
> > +#define VTD_PTE_MASK    0x0000FFFFFFFFF000LL
> > +
> > +#define VTD_LEVEL0 12
> > +#define VTD_LEVEL1 21
> > +#define VTD_LEVEL2 30 /* Minimum level supported */
> > +#define VTD_LEVEL3 39 /* Also supported */
> > +#define VTD_LEVEL4 48
> > +#define VTD_LEVEL5 57
> > +
> > +#define _xbit(x,y) (((x)>> (y)) & 1)
> > +#define _xfld(x,y) (uint32_t)(((x)>> y##_SHIFT) & y##_MASK)
> > +
> > +#define VTD_AWTOLEVEL(x)    (((x) - 30) / VTD_STRIDE_SIZE)
> > +#define VTD_LEVELTOAW(x)    (((x) * VTD_STRIDE_SIZE) + 30)
> > +
> > +#define DMAR_VER_REG 0x00    /* 32:Arch version supported by this IOMMU */
> > +#define DMAR_RTADDR_REG 0x20    /* 64:Root entry table */
> > +#define DMAR_FEDATA_REG 0x3c    /* 32:Fault event interrupt data register */
> > +#define DMAR_FEADDR_REG 0x40    /* 32:Fault event interrupt addr register */
> > +#define DMAR_FEUADDR_REG 0x44    /* 32:Upper address register */
> > +#define DMAR_AFLOG_REG 0x58    /* 64:Advanced Fault control */
> > +#define DMAR_PMEN_REG 0x64    /* 32:Enable Protected Memory Region */
> > +#define DMAR_PLMBASE_REG 0x68    /* 32:PMRR Low addr */
> > +#define DMAR_PLMLIMIT_REG 0x6c    /* 32:PMRR low limit */
> > +#define DMAR_PHMBASE_REG 0x70    /* 64:pmrr high base addr */
> > +#define DMAR_PHMLIMIT_REG 0x78    /* 64:pmrr high limit */
> > +#define DMAR_ICS_REG 0x9C    /* 32:Invalidation complete status register */
> > +#define DMAR_IECTL_REG 0xa0    /* 32:Invalidation event control register */
> > +#define DMAR_IEDATA_REG 0xa4    /* 32:Invalidation event data register */
> > +#define DMAR_IEADDR_REG 0xa8    /* 32:Invalidation event address register */
> > +#define DMAR_IEUADDR_REG 0xac    /* 32:Invalidation event upper address register */
> > +#define DMAR_IRTA_REG 0xb8    /* 64:Interrupt remapping table addr register */
> > +#define DMAR_CAP_REG 0x08    /* 64:Hardware supported capabilities */
> > +#define   CAP_PI (1LL << 59)
> > +#define   CAP_FL1GP (1LL << 56)
> > +#define   CAP_DRD (1LL << 55)
> > +#define   CAP_DWD (1LL << 54)
> > +#define   CAP_MAMV_MASK 0x3F
> > +#define   CAP_MAMV_SHIFT 48LL
> > +#define   cap_mamv(x) _xfld(x,CAP_MAMV)
> > +#define   CAP_NFR_MASK 0xFF
> > +#define   CAP_NFR_SHIFT 40LL
> > +#define   cap_nfr(x) (_xfld(x,CAP_NFR) + 1)
> > +#define   CAP_PSI (1LL << 39)
> > +#define   CAP_SLLPS_MASK 0xF
> > +#define   CAP_SLLPS_SHIFT 34LL
> > +#define   cap_sllps(x) _xfld(x,CAP_SLLPS)
> > +#define   CAP_FRO_MASK 0x3FF
> > +#define   CAP_FRO_SHIFT 24LL
> > +#define   cap_fro(x) (_xfld(x,CAP_FRO) * 16)
> > +#define   CAP_ZLR (1LL << 22)
> > +#define   CAP_MGAW_MASK 0x3F
> > +#define   CAP_MGAW_SHIFT 16LL
> > +#define   cap_mgaw(x) (_xfld(x,CAP_MGAW) + 1)
> > +#define   CAP_SAGAW_MASK 0x1F
> > +#define   CAP_SAGAW_SHIFT 8LL
> > +#define   cap_sagaw(x) _xfld(x,CAP_SAGAW)
> > +#define   CAP_CM (1LL << 7)
> > +#define   CAP_PHMR (1LL << 6)
> > +#define   CAP_PLMR (1LL << 5)
> > +#define   CAP_RWBF (1LL << 4)
> > +#define   CAP_AFL (1LL << 3)
> > +#define   CAP_ND_MASK 0x7
> > +#define   CAP_ND_SHIFT 0x00
> > +#define   cap_nd(x) (16 << (((x) & CAP_ND_MASK) << 1))
> > +
> > +#define DMAR_ECAP_REG 0x10 /* 64:Extended capabilities supported */
> > +#define   ECAP_PSS_MASK 0x1F
> > +#define   ECAP_PSS_SHIFT 35
> > +#define   ECAP_EAFS (1LL << 34)
> > +#define   ECAP_NWFS (1LL << 33)
> > +#define   ECAP_SRS (1LL << 31)
> > +#define   ECAP_ERS (1LL << 30)
> > +#define   ECAP_PRS (1LL << 29)
> > +#define   ECAP_PASID (1LL << 28)
> > +#define   ECAP_DIS (1LL << 27)
> > +#define   ECAP_NEST (1LL << 26)
> > +#define   ECAP_MTS (1LL << 25)
> > +#define   ECAP_ECS (1LL << 24)
> > +#define   ECAP_MHMV_MASK 0xF
> > +#define   ECAP_MHMV_SHIFT 0x20
> > +#define   ecap_mhmv(x) _xfld(x,ECAP_MHMV)
> > +#define   ECAP_IRO_MASK 0x3FF /* IOTLB Register */
> > +#define   ECAP_IRO_SHIFT 0x8
> > +#define   ecap_iro(x) (_xfld(x,ECAP_IRO) * 16)
> > +#define   ECAP_SC (1LL << 7) /* Snoop Control */
> > +#define   ECAP_PT (1LL << 6) /* HW Passthru */
> > +#define   ECAP_EIM (1LL << 4)
> > +#define   ECAP_IR (1LL << 3) /* Interrupt remap */
> > +#define   ECAP_DT (1LL << 2) /* Device IOTLB */
> > +#define   ECAP_QI (1LL << 1) /* Queued Invalidation */
> > +#define   ECAP_C (1LL << 0) /* Coherent cache */
> > +
> > +#define DMAR_GCMD_REG 0x18 /* 32:Global command register */
> > +#define   GCMD_TE (1LL << 31)
> > +#define   GCMD_SRTP (1LL << 30)
> > +#define   GCMD_SFL (1LL << 29)
> > +#define   GCMD_EAFL (1LL << 28)
> > +#define   GCMD_WBF (1LL << 27)
> > +#define   GCMD_QIE (1LL << 26)
> > +#define   GCMD_IRE (1LL << 25)
> > +#define   GCMD_SIRTP (1LL << 24)
> > +#define   GCMD_CFI (1LL << 23)
> > +
> > +#define DMAR_GSTS_REG 0x1c /* 32:Global status register */
> > +#define   GSTS_TES (1LL << 31)
> > +#define   GSTS_RTPS (1LL << 30)
> > +#define   GSTS_FLS (1LL << 29)
> > +#define   GSTS_AFLS (1LL << 28)
> > +#define   GSTS_WBFS (1LL << 27)
> > +#define   GSTS_QIES (1LL << 26)
> > +#define   GSTS_IRES (1LL << 25)
> > +#define   GSTS_IRTPS (1LL << 24)
> > +#define   GSTS_CFIS (1LL << 23)
> > +
> > +#define DMAR_CCMD_REG 0x28 /* 64:Context command reg */
> > +#define   CCMD_ICC (1LL << 63)
> > +#define   CCMD_CIRG_MASK 0x3
> > +#define   CCMD_CIRG_SHIFT 61
> > +#define   CCMD_CIRG(x) ((uint64_t)(x) << CCMD_CIRG_SHIFT)
> > +#define   CCMD_CAIG_MASK 0x3
> > +#define   CCMD_CAIG_SHIFT 59
> > +#define   CCMD_FM_MASK 0x3
> > +#define   CCMD_FM_SHIFT 32
> > +#define   CCMD_FM(x) (((uint64_t)(x) << CCMD_FM_SHIFT))
> > +#define   CCMD_SID_MASK 0xFFFF
> > +#define   CCMD_SID_SHIFT 8
> > +#define   CCMD_SID(x) (((x) << CCMD_SID_SHIFT))
> > +#define   CCMD_DID_MASK 0xFFFF
> > +#define   CCMD_DID_SHIFT 0
> > +#define   CCMD_DID(x) (((x) << CCMD_DID_SHIFT))
> > +
> > +#define CIG_GLOBAL CCMD_CIRG(CTX_GLOBAL)
> > +#define CIG_DOMAIN CCMD_CIRG(CTX_DOMAIN)
> > +#define CIG_DEVICE CCMD_CIRG(CTX_DEVICE)
> > +
> > +
> > +#define DMAR_FSTS_REG 0x34 /* 32:Fault Status register */
> > +#define   FSTS_FRI_MASK 0xFF
> > +#define   FSTS_FRI_SHIFT 8
> > +#define   FSTS_PRO (1LL << 7)
> > +#define   FSTS_ITE (1LL << 6)
> > +#define   FSTS_ICE (1LL << 5)
> > +#define   FSTS_IQE (1LL << 4)
> > +#define   FSTS_APF (1LL << 3)
> > +#define   FSTS_APO (1LL << 2)
> > +#define   FSTS_PPF (1LL << 1)
> > +#define   FSTS_PFO (1LL << 0)
> > +
> > +#define DMAR_FECTL_REG 0x38 /* 32:Fault control register */
> > +#define   FECTL_IM (1LL << 31)
> > +#define   FECTL_IP (1LL << 30)
> > +
> > +#define FRCD_HI_F (1LL << (127-64))
> > +#define FRCD_HI_T (1LL << (126-64))
> > +#define FRCD_HI_AT_MASK 0x3
> > +#define FRCD_HI_AT_SHIFT (124-64)
> > +#define FRCD_HI_PV_MASK 0xFFFFF
> > +#define FRCD_HI_PV_SHIFT (104-64)
> > +#define FRCD_HI_FR_MASK 0xFF
> > +#define FRCD_HI_FR_SHIFT (96-64)
> > +#define FRCD_HI_PP (1LL << (95-64))
> > +
> > +#define FRCD_HI_SID_MASK 0xFF
> > +#define FRCD_HI_SID_SHIFT 0
> > +#define FRCD_HI_BUS_SHIFT 8
> > +#define FRCD_HI_BUS_MASK 0xFF
> > +#define FRCD_HI_DEV_SHIFT 3
> > +#define FRCD_HI_DEV_MASK 0x1F
> > +#define FRCD_HI_FUN_SHIFT 0
> > +#define FRCD_HI_FUN_MASK 0x7
> > +
> > +#define DMAR_IOTLB_REG(x) (ecap_iro((x)->ecap) + 8)
> > +#define DMAR_IVA_REG(x) (ecap_iro((x)->ecap) + 0)
> > +
> > +#define DMAR_FRIH_REG(x,i) (cap_fro((x)->cap) + 16*(i) + 8)
> > +#define DMAR_FRIL_REG(x,i) (cap_fro((x)->cap) + 16*(i) + 0)
> > +
> > +#define IOTLB_IVT (1LL << 63)
> > +#define IOTLB_IIRG_MASK 0x3
> > +#define IOTLB_IIRG_SHIFT 60
> > +#define IOTLB_IIRG(x) ((uint64_t)(x) << IOTLB_IIRG_SHIFT)
> > +#define IOTLB_IAIG_MASK 0x3
> > +#define IOTLB_IAIG_SHIFT 57
> > +#define IOTLB_DR (1LL << 49)
> > +#define IOTLB_DW (1LL << 48)
> > +#define IOTLB_DID_MASK 0xFFFF
> > +#define IOTLB_DID_SHIFT 32
> > +#define IOTLB_DID(x) ((uint64_t)(x) << IOTLB_DID_SHIFT)
> > +
> > +#define IIG_GLOBAL IOTLB_IIRG(IOTLB_GLOBAL)
> > +#define IIG_DOMAIN IOTLB_IIRG(IOTLB_DOMAIN)
> > +#define IIG_PAGE IOTLB_IIRG(IOTLB_PAGE)
> > +
> > +#define DMAR_IQH_REG 0x80 /* 64:Invalidation queue head register */
> > +#define DMAR_IQT_REG 0x88 /* 64:Invalidation queue tail register */
> > +#define DMAR_IQA_REG 0x90 /* 64:Invalidation queue addr register */
> > +#define IQA_QS_256 0 /* 256 entries */
> > +#define IQA_QS_512 1 /* 512 */
> > +#define IQA_QS_1K 2 /* 1024 */
> > +#define IQA_QS_2K 3 /* 2048 */
> > +#define IQA_QS_4K 4 /* 4096 */
> > +#define IQA_QS_8K 5 /* 8192 */
> > +#define IQA_QS_16K 6 /* 16384 */
> > +#define IQA_QS_32K 7 /* 32768 */
> > +
> > +/* Read-Modify-Write helpers */
> > +static inline void iommu_rmw32(void *ov, uint32_t mask, uint32_t shift, uint32_t nv)
> > +{
> > + *(uint32_t *)ov &= ~(mask << shift);
> > + *(uint32_t *)ov |= (nv & mask) << shift;
> > +}
> > +static inline void iommu_rmw64(void *ov, uint32_t mask, uint32_t shift, uint64_t nv)
> > +{
> > + *(uint64_t *)ov &= ~(mask << shift);
> > + *(uint64_t *)ov |= (nv & mask) << shift;
> > +}
> > +
> > +/*
> > + * Root Entry: one per bus (256 x 128 bit = 4k)
> > + *   0        = Present
> > + *   1:11     = Reserved
> > + *   12:HAW-1 = Context Table Pointer
> > + *   HAW:63   = Reserved
> > + *   64:127   = Reserved
> > + */
> > +#define ROOT_P (1L << 0)
> > +struct root_entry {
> > + uint64_t lo;
> > + uint64_t hi;
> > +};
> > +
> > +/* Check if root entry is valid */
> > +static inline bool
> > +root_entry_is_valid(struct root_entry *re)
> > +{
> > + return (re->lo & ROOT_P);
> > +}
> > +
> > +/*
> > + * Context Entry: one per devfn (256 x 128 bit = 4k)
> > + *   0      = Present
> > + *   1      = Fault Processing Disable
> > + *   2:3    = Translation Type
> > + *   4:11   = Reserved
> > + *   12:63  = Second Level Page Translation
> > + *   64:66  = Address Width (# PTE levels)
> > + *   67:70  = Ignore
> > + *   71     = Reserved
> > + *   72:87  = Domain ID
> > + *   88:127 = Reserved
> > + */
> > +#define CTX_P (1L << 0)
> > +#define CTX_FPD (1L << 1)
> > +#define CTX_T_MASK 0x3
> > +#define CTX_T_SHIFT 2
> > +enum {
> > + CTX_T_MULTI,
> > + CTX_T_IOTLB,
> > + CTX_T_PASSTHRU
> > +};
> > +
> > +#define CTX_H_AW_MASK 0x7
> > +#define CTX_H_AW_SHIFT 0
> > +#define CTX_H_USER_MASK 0xF
> > +#define CTX_H_USER_SHIFT 3
> > +#define CTX_H_DID_MASK 0xFFFF
> > +#define CTX_H_DID_SHIFT 8
> > +
> > +struct context_entry {
> > + uint64_t lo;
> > + uint64_t hi;
> > +};
> > +
> > +/* Set fault processing enable/disable */
> > +static inline void
> > +context_set_fpd(struct context_entry *ce, int enable)
> > +{
> > + ce->lo &= ~CTX_FPD;
> > + if (enable)
> > + ce->lo |= CTX_FPD;
> > +}
> > +
> > +/* Set context entry present */
> > +static inline void
> > +context_set_present(struct context_entry *ce)
> > +{
> > + ce->lo |= CTX_P;
> > +}
> > +
> > +/* Set Second Level Page Table Entry PA */
> > +static inline void
> > +context_set_slpte(struct context_entry *ce, paddr_t slpte)
> > +{
> > + ce->lo &= VTD_PAGE_MASK;
> > + ce->lo |= (slpte & ~VTD_PAGE_MASK);
> > +}
> > +
> > +/* Set translation type */
> > +static inline void
> > +context_set_translation_type(struct context_entry *ce, int tt)
> > +{
> > + ce->lo &= ~(CTX_T_MASK << CTX_T_SHIFT);
> > + ce->lo |= ((tt & CTX_T_MASK) << CTX_T_SHIFT);
> > +}
> > +
> > +/* Set Address Width (# of Page Table levels) */
> > +static inline void
> > +context_set_address_width(struct context_entry *ce, int lvl)
> > +{
> > + ce->hi &= ~(CTX_H_AW_MASK << CTX_H_AW_SHIFT);
> > + ce->hi |= ((lvl & CTX_H_AW_MASK) << CTX_H_AW_SHIFT);
> > +}
> > +
> > +/* Set domain ID */
> > +static inline void
> > +context_set_domain_id(struct context_entry *ce, int did)
> > +{
> > + ce->hi &= ~(CTX_H_DID_MASK << CTX_H_DID_SHIFT);
> > + ce->hi |= ((did & CTX_H_DID_MASK) << CTX_H_DID_SHIFT);
> > +}
> > +
> > +/* Get Second Level Page Table PA */
> > +static inline uint64_t
> > +context_pte(struct context_entry *ce)
> > +{
> > + return (ce->lo & ~VTD_PAGE_MASK);
> > +}
> > +
> > +/* Get translation type */
> > +static inline int
> > +context_translation_type(struct context_entry *ce)
> > +{
> > + return (ce->lo >> CTX_T_SHIFT) & CTX_T_MASK;
> > +}
> > +
> > +/* Get domain ID */
> > +static inline int
> > +context_domain_id(struct context_entry *ce)
> > +{
> > + return (ce->hi >> CTX_H_DID_SHIFT) & CTX_H_DID_MASK;
> > +}
> > +
> > +/* Get Address Width */
> > +static inline int
> > +context_address_width(struct context_entry *ce)
> > +{
> > + return VTD_LEVELTOAW((ce->hi >> CTX_H_AW_SHIFT) & CTX_H_AW_MASK);
> > +}
> > +
> > +/* Check if context entry is valid */
> > +static inline bool
> > +context_entry_is_valid(struct context_entry *ce)
> > +{
> > + return (ce->lo & CTX_P);
> > +}
> > +
> > +/* User-available bits in context entry */
> > +static inline int
> > +context_user(struct context_entry *ce)
> > +{
> > + return (ce->hi >> CTX_H_USER_SHIFT) & CTX_H_USER_MASK;
> > +}
> > +
> > +static inline void
> > +context_set_user(struct context_entry *ce, int v)
> > +{
> > + ce->hi &= ~(CTX_H_USER_MASK << CTX_H_USER_SHIFT);
> > + ce->hi |=  ((v & CTX_H_USER_MASK) << CTX_H_USER_SHIFT);
> > +}
> > +
> > +/*
> > + * Fault entry
> > + *   0..HAW-1 = Fault address
> > + *   HAW:63   = Reserved
> > + *   64:71    = Source ID
> > + *   96:103   = Fault Reason
> > + *   104:123  = PV
> > + *   124:125  = Address Translation type
> > + *   126      = Type (0 = Read, 1 = Write)
> > + *   127      = Fault bit
> > + */
> > +struct fault_entry
> > +{
> > + uint64_t lo;
> > + uint64_t hi;
> > +};
> > +
> > +/* PTE Entry: 512 x 64-bit = 4k */
> > +#define PTE_P (1L << 0)
> > +#define PTE_R 0x00
> > +#define PTE_W (1L << 1)
> > +#define PTE_US  (1L << 2)
> > +#define PTE_PWT (1L << 3)
> > +#define PTE_PCD (1L << 4)
> > +#define PTE_A   (1L << 5)
> > +#define PTE_D   (1L << 6)
> > +#define PTE_PAT (1L << 7)
> > +#define PTE_G   (1L << 8)
> > +#define PTE_EA  (1L << 10)
> > +#define PTE_XD  (1LL << 63)
> > +
> > +/* PDE Level entry */
> > +#define PTE_PS  (1L << 7)
> > +
> > +/* PDPE Level entry */
> > +
> > +/* ----------------------------------------------------------------
> > + * 5555555444444444333333333222222222111111111000000000------------
> > + * [PML4 ->] PDPE.1GB
> > + * [PML4 ->] PDPE.PDE -> PDE.2MB
> > + * [PML4 ->] PDPE.PDE -> PDE -> PTE
> > + * GAW0 = (12.20) (PTE)
> > + * GAW1 = (21.29) (PDE)
> > + * GAW2 = (30.38) (PDPE)
> > + * GAW3 = (39.47) (PML4)
> > + * GAW4 = (48.57) (n/a)
> > + * GAW5 = (58.63) (n/a)
> > + */
> > +struct pte_entry {
> > + uint64_t val;
> > +};
> > +
> > +/*
> > + * Queued Invalidation entry
> > + *  0:3   = 01h
> > + *  4:5   = Granularity
> > + *  6:15  = Reserved
> > + *  16:31 = Domain ID
> > + *  32:47 = Source ID
> > + *  48:49 = FM
> > + */
> > +
> > +/* Invalidate Context Entry */
> > +#define QI_CTX_DID_MASK 0xFFFF
> > +#define QI_CTX_DID_SHIFT 16
> > +#define QI_CTX_SID_MASK 0xFFFF
> > +#define QI_CTX_SID_SHIFT 32
> > +#define QI_CTX_FM_MASK 0x3
> > +#define QI_CTX_FM_SHIFT 48
> > +#define QI_CTX_IG_MASK 0x3
> > +#define QI_CTX_IG_SHIFT 4
> > +#define QI_CTX_DID(x) (((uint64_t)(x) << QI_CTX_DID_SHIFT))
> > +#define QI_CTX_SID(x) (((uint64_t)(x) << QI_CTX_SID_SHIFT))
> > +#define QI_CTX_FM(x) (((uint64_t)(x) << QI_CTX_FM_SHIFT))
> > +
> > +#define QI_CTX_IG_GLOBAL (CTX_GLOBAL << QI_CTX_IG_SHIFT)
> > +#define QI_CTX_IG_DOMAIN (CTX_DOMAIN << QI_CTX_IG_SHIFT)
> > +#define QI_CTX_IG_DEVICE (CTX_DEVICE << QI_CTX_IG_SHIFT)
> > +
> > +/* Invalidate IOTLB Entry */
> > +#define QI_IOTLB_DID_MASK 0xFFFF
> > +#define QI_IOTLB_DID_SHIFT 16
> > +#define QI_IOTLB_IG_MASK 0x3
> > +#define QI_IOTLB_IG_SHIFT 4
> > +#define QI_IOTLB_DR (1LL << 6)
> > +#define QI_IOTLB_DW (1LL << 5)
> > +#define QI_IOTLB_DID(x) (((uint64_t)(x) << QI_IOTLB_DID_SHIFT))
> > +
> > +#define QI_IOTLB_IG_GLOBAL (1 << QI_IOTLB_IG_SHIFT)
> > +#define QI_IOTLB_IG_DOMAIN (2 << QI_IOTLB_IG_SHIFT)
> > +#define QI_IOTLB_IG_PAGE (3 << QI_IOTLB_IG_SHIFT)
> > +
> > +/* QI Commands */
> > +#define QI_CTX 0x1
> > +#define QI_IOTLB 0x2
> > +#define QI_DEVTLB 0x3
> > +#define QI_INTR 0x4
> > +#define QI_WAIT 0x5
> > +#define QI_EXTTLB 0x6
> > +#define QI_PAS 0x7
> > +#define QI_EXTDEV 0x8
> > +
> > +struct qi_entry {
> > + uint64_t lo;
> > + uint64_t hi;
> > +};
> > +
> > +enum {
> > + CTX_GLOBAL = 1,
> > + CTX_DOMAIN,
> > + CTX_DEVICE,
> > +
> > + IOTLB_GLOBAL = 1,
> > + IOTLB_DOMAIN,
> > + IOTLB_PAGE,
> > +};
> > +
> > +enum {
> > + VTD_FAULT_ROOT_P = 0x1,         /* P field in root entry is 0 */
> > + VTD_FAULT_CTX_P = 0x2,          /* P field in context entry is 0 */
> > + VTD_FAULT_CTX_INVAL = 0x3,      /* context AW/TT/SLPPTR invalid */
> > + VTD_FAULT_LIMIT = 0x4,          /* Address is outside of MGAW */
> > + VTD_FAULT_WRITE = 0x5,          /* Address-translation fault, non-writable */
> > + VTD_FAULT_READ = 0x6,           /* Address-translation fault, non-readable */
> > + VTD_FAULT_PTE_INVAL = 0x7,      /* page table hw access error */
> > + VTD_FAULT_ROOT_INVAL = 0x8,     /* root table hw access error */
> > + VTD_FAULT_CTX_TBL_INVAL = 0x9,  /* context entry hw access error */
> > + VTD_FAULT_ROOT_RESERVED = 0xa,  /* non-zero reserved field in root entry */
> > + VTD_FAULT_CTX_RESERVED = 0xb,   /* non-zero reserved field in context entry */
> > + VTD_FAULT_PTE_RESERVED = 0xc,   /* non-zero reserved field in paging entry */
> > + VTD_FAULT_CTX_TT = 0xd,         /* invalid translation type */
> > +};
> > +
> > +#endif
> > +
> > +void acpidmar_pci_hook(pci_chipset_tag_t, struct pci_attach_args *);
> > +void dmar_ptmap(bus_dma_tag_t, bus_addr_t);
> > +void acpidmar_sw(int);
> > +
> > +#define __EXTRACT(v,m) (((v) >> m##_SHIFT) & m##_MASK)
> > diff --git a/sys/dev/acpi/acpireg.h b/sys/dev/acpi/acpireg.h
> > index bfbb73ce2..8ba55c8ee 100644
> > --- a/sys/dev/acpi/acpireg.h
> > +++ b/sys/dev/acpi/acpireg.h
> > @@ -623,6 +623,9 @@ struct acpi_ivmd {
> >  struct acpi_ivhd {
> >   uint8_t type;
> >   uint8_t flags;
> > +#define IVHD_PPRSUP (1L << 7)
> > +#define IVHD_PREFSUP (1L << 6)
> > +#define IVHD_COHERENT (1L << 5)
> >  #define IVHD_IOTLB (1L << 4)
> >  #define IVHD_ISOC (1L << 3)
> >  #define IVHD_RESPASSPW (1L << 2)
> > @@ -638,13 +641,28 @@ struct acpi_ivhd {
> >  #define IVHD_UNITID_MASK 0x1F
> >  #define IVHD_MSINUM_SHIFT 0
> >  #define IVHD_MSINUM_MASK 0x1F
> > - uint32_t reserved;
> > + uint32_t feature;
> >  } __packed;
> >  
> > +struct acpi_ivhd_ext {
> > + uint8_t type;
> > + uint8_t flags;
> > + uint16_t length;
> > + uint16_t devid;
> > + uint16_t cap;
> > + uint64_t address;
> > + uint16_t segment;
> > + uint16_t info;
> > + uint32_t attrib;
> > + uint64_t efr;
> > + uint8_t reserved[8];
> > +} __packd;
> > +
> >  union acpi_ivrs_entry {
> >   struct {
> >   uint8_t type;
> >  #define IVRS_IVHD 0x10
> > +#define IVRS_IVHD_EXT 0x11
> >  #define IVRS_IVMD_ALL 0x20
> >  #define IVRS_IVMD_SPECIFIED 0x21
> >  #define IVRS_IVMD_RANGE 0x22
> > @@ -652,6 +670,7 @@ union acpi_ivrs_entry {
> >   uint16_t length;
> >   } __packed;
> >   struct acpi_ivhd ivhd;
> > + struct acpi_ivhd_ext ivhd_ext;
> >   struct acpi_ivmd ivmd;
> >  } __packed;
> >  
> > diff --git a/sys/dev/acpi/amd_iommu.h b/sys/dev/acpi/amd_iommu.h
> > new file mode 100644
> > index 000000000..db6d371aa
> > --- /dev/null
> > +++ b/sys/dev/acpi/amd_iommu.h
> > @@ -0,0 +1,358 @@
> > +/*
> > + * Copyright (c) 2019 Jordan Hargrave <[hidden email]>
> > + *
> > + * Permission to use, copy, modify, and distribute this software for any
> > + * purpose with or without fee is hereby granted, provided that the above
> > + * copyright notice and this permission notice appear in all copies.
> > + *
> > + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
> > + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
> > + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
> > + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
> > + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
> > + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
> > + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
> > + */
> > +#ifndef __amd_iommu_h__
> > +#define __amd_iommu_h__
> > +
> > +#define DEV_TAB_BASE_REG 0x0000
> > +#define CMD_BASE_REG 0x0008
> > +#define EVT_BASE_REG 0x0010
> > +
> > +#define EXCL_BASE_REG 0x0020
> > +#define EXCL_LIMIT_REG 0x0028
> > +
> > +/* Extended Feature Register */
> > +#define EXTFEAT_REG 0x0030
> > +#define  EFR_PREFSUP (1L << 0)
> > +#define  EFR_PPRSUP (1L << 1)
> > +#define  EFR_NXSUP (1L << 3)
> > +#define  EFR_GTSUP (1L << 4)
> > +#define  EFR_IASUP (1L << 6)
> > +#define  EFR_GASUP (1L << 7)
> > +#define  EFR_HESUP (1L << 8)
> > +#define  EFR_PCSUP (1L << 9)
> > +#define  EFR_HATS_SHIFT 10
> > +#define  EFR_HATS_MASK 0x3
> > +#define  EFR_GATS_SHIFT 12
> > +#define  EFR_GATS_MASK 0x3
> > +#define  EFR_GLXSUP_SHIFT 14
> > +#define  EFR_GLXSUP_MASK 0x3
> > +#define  EFR_SMIFSUP_SHIFT 16
> > +#define  EFR_SMIFSUP_MASK 0x3        
> > +#define  EFR_SMIFRC_SHIFT 18
> > +#define  EFR_SMIFRC_MASK 0x7
> > +#define  EFR_GAMSUP_SHIFT 21
> > +#define  EFR_GAMSUP_MASK 0x7
> > +
> > +#define CMD_HEAD_REG 0x2000
> > +#define CMD_TAIL_REG 0x2008
> > +#define EVT_HEAD_REG 0x2010
> > +#define EVT_TAIL_REG 0x2018
> > +
> > +#define IOMMUSTS_REG 0x2020
> > +
> > +#define DEV_TAB_MASK 0x000FFFFFFFFFF000LL
> > +#define DEV_TAB_LEN 0x1FF
> > +
> > +/* IOMMU Control */
> > +#define IOMMUCTL_REG 0x0018
> > +#define  CTL_IOMMUEN (1L << 0)
> > +#define  CTL_HTTUNEN (1L << 1)
> > +#define  CTL_EVENTLOGEN (1L << 2)
> > +#define  CTL_EVENTINTEN (1L << 3)
> > +#define  CTL_COMWAITINTEN (1L << 4)
> > +#define  CTL_INVTIMEOUT_SHIFT 5
> > +#define  CTL_INVTIMEOUT_MASK   0x7
> > +#define  CTL_INVTIMEOUT_NONE 0
> > +#define  CTL_INVTIMEOUT_1MS     1
> > +#define  CTL_INVTIMEOUT_10MS    2
> > +#define  CTL_INVTIMEOUT_100MS   3
> > +#define  CTL_INVTIMEOUT_1S      4
> > +#define  CTL_INVTIMEOUT_10S     5
> > +#define  CTL_INVTIMEOUT_100S    6
> > +#define  CTL_PASSPW (1L << 8)
> > +#define  CTL_RESPASSPW (1L << 9)
> > +#define  CTL_COHERENT (1L << 10)
> > +#define  CTL_ISOC (1L << 11)
> > +#define  CTL_CMDBUFEN (1L << 12)
> > +#define  CTL_PPRLOGEN (1L << 13)
> > +#define  CTL_PPRINTEN (1L << 14)
> > +#define  CTL_PPREN (1L << 15)
> > +#define  CTL_GTEN (1L << 16)
> > +#define  CTL_GAEN (1L << 17)
> > +#define  CTL_CRW_SHIFT 18
> > +#define  CTL_CRW_MASK 0xF
> > +#define  CTL_SMIFEN (1L << 22)
> > +#define  CTL_SLFWBDIS (1L << 23)
> > +#define  CTL_SMIFLOGEN (1L << 24)
> > +#define  CTL_GAMEN_SHIFT 25
> > +#define  CTL_GAMEN_MASK 0x7
> > +#define  CTL_GALOGEN (1L << 28)
> > +#define  CTL_GAINTEN (1L << 29)
> > +#define  CTL_DUALPPRLOGEN_SHIFT 30
> > +#define  CTL_DUALPPRLOGEN_MASK 0x3
> > +#define  CTL_DUALEVTLOGEN_SHIFT 32
> > +#define  CTL_DUALEVTLOGEN_MASK 0x3
> > +#define  CTL_DEVTBLSEGEN_SHIFT 34
> > +#define  CTL_DEVTBLSEGEN_MASK 0x7
> > +#define  CTL_PRIVABRTEN_SHIFT 37
> > +#define  CTL_PRIVABRTEN_MASK 0x3
> > +#define  CTL_PPRAUTORSPEN (1LL << 39)
> > +#define  CTL_MARCEN (1LL << 40)
> > +#define  CTL_BLKSTOPMRKEN (1LL << 41)
> > +#define  CTL_PPRAUTOSPAON (1LL << 42)
> > +#define  CTL_DOMAINIDPNE (1LL << 43)
> > +
> > +#define CMD_BASE_MASK 0x000FFFFFFFFFF000LL
> > +#define CMD_TBL_SIZE 4096
> > +#define CMD_TBL_LEN_4K (8LL << 56)
> > +#define CMD_TBL_LEN_8K (9lL << 56)
> > +
> > +#define EVT_BASE_MASK 0x000FFFFFFFFFF000LL
> > +#define EVT_TBL_SIZE 4096
> > +#define EVT_TBL_LEN_4K (8LL << 56)
> > +#define EVT_TBL_LEN_8K (9LL << 56)
> > +
> > +/*========================
> > + * DEVICE TABLE ENTRY
> > + * Contains mapping of bus-device-function
> > + *
> > + *  0       Valid (V)
> > + *  1       Translation Valid (TV)
> > + *  7:8     Host Address Dirty (HAD)
> > + *  9:11    Page Table Depth (usually 4)
> > + *  12:51   Page Table Physical Address
> > + *  52      PPR Enable
> > + *  53      GPRP
> > + *  54      Guest I/O Protection Valid (GIoV)
> > + *  55      Guest Translation Valid (GV)
> > + *  56:57   Guest Levels translated (GLX)
> > + *  58:60   Guest CR3 bits 12:14 (GCR3TRP)
> > + *  61      I/O Read Permission (IR)
> > + *  62      I/O Write Permission (IW)
> > + *  64:79   Domain ID
> > + *  80:95   Guest CR3 bits 15:30 (GCR3TRP)
> > + *  96      IOTLB Enable (I)
> > + *  97      Suppress multiple I/O page faults (I)
> > + *  98      Supress all I/O page faults (SA)
> > + *  99:100  Port I/O Control (IoCTL)
> > + *  101     Cache IOTLB Hint
> > + *  102     Snoop Disable (SD)
> > + *  103     Allow Exclusion (EX)
> > + *  104:105 System Management Message (SysMgt)
> > + *  107:127 Guest CR3 bits 31:51 (GCR3TRP)
> > + *  128     Interrupt Map Valid (IV)
> > + *  129:132 Interrupt Table Length (IntTabLen)
> > + *========================*/
> > +struct ivhd_dte {
> > + uint32_t dw0;
> > + uint32_t dw1;
> > + uint32_t dw2;
> > + uint32_t dw3;
> > + uint32_t dw4;
> > + uint32_t dw5;
> > + uint32_t dw6;
> > + uint32_t dw7;
> > +} __packed;
> > +
> > +#define DTE_V (1L << 0) // dw0
> > +#define DTE_TV (1L << 1) // dw0
> > +#define DTE_LEVEL_SHIFT 9 // dw0
> > +#define DTE_LEVEL_MASK 0x7 // dw0
> > +#define DTE_HPTRP_MASK 0x000FFFFFFFFFF000LL // dw0,1
> > +
> > +#define DTE_PPR (1L << 20) // dw1
> > +#define DTE_GPRP (1L << 21) // dw1
> > +#define DTE_GIOV (1L << 22) // dw1
> > +#define DTE_GV (1L << 23) // dw1
> > +#define DTE_IR (1L << 29) // dw1
> > +#define DTE_IW (1L << 30) // dw1
> > +
> > +#define DTE_DID_MASK 0xFFFF // dw2
> > +
> > +#define DTE_IV (1L << 0) // dw3
> > +#define DTE_SE (1L << 1)
> > +#define DTE_SA (1L << 2)
> > +#define DTE_INTTABLEN_SHIFT 1
> > +#define DTE_INTTABLEN_MASK 0xF
> > +#define DTE_IRTP_MASK 0x000FFFFFFFFFFFC0LL
> > +
> > +#define PTE_LVL5                48
> > +#define PTE_LVL4                39
> > +#define PTE_LVL3                30
> > +#define PTE_LVL2                21
> > +#define PTE_LVL1                12
> > +
> > +#define PTE_NXTLVL(x)           (((x) & 0x7) << 9)
> > +#define PTE_PADDR_MASK 0x000FFFFFFFFFF000LL
> > +#define PTE_IR                  (1LL << 61)
> > +#define PTE_IW                  (1LL << 62)
> > +
> > +#define DTE_GCR312_MASK 0x3
> > +#define DTE_GCR312_SHIFT 24
> > +
> > +#define DTE_GCR315_MASK 0xFFFF
> > +#define DTE_GCR315_SHIFT 16
> > +
> > +#define DTE_GCR331_MASK 0xFFFFF
> > +#define DTE_GCR331_SHIFT 12
> > +
> > +#define _get64(x)   *(uint64_t *)(x)
> > +#define _put64(x,v) *(uint64_t *)(x) = (v)
> > +
> > +/* Set Guest CR3 address */
> > +static inline void
> > +dte_set_guest_cr3(struct ivhd_dte *dte, paddr_t paddr)
> > +{
> > + iommu_rmw32(&dte->dw1, DTE_GCR312_MASK, DTE_GCR312_SHIFT, paddr >> 12);
> > + iommu_rmw32(&dte->dw2, DTE_GCR315_MASK, DTE_GCR315_SHIFT, paddr >> 15);
> > + iommu_rmw32(&dte->dw3, DTE_GCR331_MASK, DTE_GCR331_SHIFT, paddr >> 31);
> > +}
> > +
> > +/* Set Interrupt Remapping Root Pointer */
> > +static inline void
> > +dte_set_interrupt_table_root_ptr(struct ivhd_dte *dte, paddr_t paddr)
> > +{
> > + uint64_t ov = _get64(&dte->dw4);
> > + _put64(&dte->dw4, (ov & ~DTE_IRTP_MASK) | (paddr & DTE_IRTP_MASK));
> > +}
> > +
> > +/* Set Interrupt Remapping Table length */
> > +static inline void
> > +dte_set_interrupt_table_length(struct ivhd_dte *dte, int nEnt)
> > +{
> > + iommu_rmw32(&dte->dw4, DTE_INTTABLEN_MASK, DTE_INTTABLEN_SHIFT, nEnt);
> > +}
> > +
> > +/* Set Interrupt Remapping Valid */
> > +static inline void
> > +dte_set_interrupt_valid(struct ivhd_dte *dte)
> > +{
> > + dte->dw4 |= DTE_IV;
> > +}
> > +
> > +/* Set Domain ID in Device Table Entry */
> > +static inline void
> > +dte_set_domain(struct ivhd_dte *dte, uint16_t did)
> > +{
> > + dte->dw2 = (dte->dw2 & ~DTE_DID_MASK) | (did & DTE_DID_MASK);
> > +}
> > +
> > +/* Set Page Table Pointer for device */
> > +static inline void
> > +dte_set_host_page_table_root_ptr(struct ivhd_dte *dte, paddr_t paddr)
> > +{
> > + uint64_t ov;
> > +
> > + ov = _get64(&dte->dw0) & ~DTE_HPTRP_MASK;
> > + ov |= (paddr & DTE_HPTRP_MASK) | PTE_IW | PTE_IR;
> > +
> > + _put64(&dte->dw0, ov);
> > +}
> > +
> > +/* Set Page Table Levels Mask */
> > +static inline void
> > +dte_set_mode(struct ivhd_dte *dte, int mode)
> > +{
> > + iommu_rmw32(&dte->dw0, DTE_LEVEL_MASK, DTE_LEVEL_SHIFT, mode);
> > +}
> > +
> > +static inline void
> > +dte_set_tv(struct ivhd_dte *dte)
> > +{
> > + dte->dw0 |= DTE_TV;
> > +}
> > +
> > +/* Set Device Table Entry valid.
> > + * Domain/Level/Mode/PageTable should already be set
> > + */
> > +static inline void
> > +dte_set_valid(struct ivhd_dte *dte)
> > +{
> > + dte->dw0 |= DTE_V;
> > +}
> > +
> > +/* Check if Device Table Entry is valid */
> > +static inline int
> > +dte_is_valid(struct ivhd_dte *dte)
> > +{
> > + return (dte->dw0 & DTE_V);
> > +}
> > +
> > +/*=========================================
> > + * COMMAND
> > + *=========================================*/
> > +struct ivhd_command {
> > + uint32_t dw0;
> > + uint32_t dw1;
> > + uint32_t dw2;
> > + uint32_t dw3;
> > +} __packed;
> > +
> > +#define CMD_SHIFT 28
> > +
> > +enum {
> > + COMPLETION_WAIT = 0x01,
> > + INVALIDATE_DEVTAB_ENTRY = 0x02,
> > + INVALIDATE_IOMMU_PAGES = 0x03,
> > + INVALIDATE_IOTLB_PAGES = 0x04,
> > + INVALIDATE_INTERRUPT_TABLE = 0x05,
> > + PREFETCH_IOMMU_PAGES = 0x06,
> > + COMPLETE_PPR_REQUEST = 0x07,
> > + INVALIDATE_IOMMU_ALL = 0x08,
> > +};
> > +
> > +/*=========================================
> > + * EVENT
> > + *=========================================*/
> > +struct ivhd_event {
> > + uint32_t dw0;
> > + uint32_t dw1;
> > + uint32_t dw2;   // address.lo
> > + uint32_t dw3; // address.hi
> > +} __packed;
> > +
> > +#define EVT_TYPE_SHIFT 28       // dw1.0xF0000000
> > +#define EVT_TYPE_MASK 0xF
> > +#define EVT_SID_SHIFT 0        // dw0.0x0000FFFF
> > +#define EVT_SID_MASK 0xFFFF
> > +#define EVT_DID_SHIFT 0
> > +#define EVT_DID_MASK 0xFFFF   // dw1.0x0000FFFF
> > +#define EVT_FLAG_SHIFT   16
> > +#define EVT_FLAG_MASK   0xFFF    // dw1.0x0FFF0000
> > +
> > +/* IOMMU Fault reasons */
> > +enum {
> > + ILLEGAL_DEV_TABLE_ENTRY = 0x1,
> > + IO_PAGE_FAULT = 0x2,
> > + DEV_TAB_HARDWARE_ERROR = 0x3,
> > + PAGE_TAB_HARDWARE_ERROR = 0x4,
> > + ILLEGAL_COMMAND_ERROR = 0x5,
> > + COMMAND_HARDWARE_ERROR = 0x6,
> > + IOTLB_INV_TIMEOUT = 0x7,
> > + INVALID_DEVICE_REQUEST = 0x8,
> > +};
> > +
> > +#define EVT_GN (1L << 16)
> > +#define EVT_NX (1L << 17)
> > +#define EVT_US (1L << 18)
> > +#define EVT_I (1L << 19)
> > +#define EVT_PR (1L << 20)
> > +#define EVT_RW (1L << 21)
> > +#define EVT_PE (1L << 22)
> > +#define EVT_RZ (1L << 23)
> > +#define EVT_TR (1L << 24)
> > +
> > +struct iommu_softc;
> > +
> > +int ivhd_flush_devtab(struct iommu_softc *, int);
> > +int ivhd_invalidate_iommu_all(struct iommu_softc *);
> > +int ivhd_invalidate_interrupt_table(struct iommu_softc *, int);
> > +int ivhd_issue_command(struct iommu_softc *, const struct ivhd_command *, int);
> > +int ivhd_invalidate_domain(struct iommu_softc *, int);
> > +
> > +void _dumppte(struct pte_entry *, int, vaddr_t);
> > +
> > +#endif
> > diff --git a/sys/dev/acpi/files.acpi b/sys/dev/acpi/files.acpi
> > index e57c39938..1cf6f2fbb 100644
> > --- a/sys/dev/acpi/files.acpi
> > +++ b/sys/dev/acpi/files.acpi
> > @@ -70,6 +70,11 @@ device acpiprt
> >  attach acpiprt at acpi
> >  file dev/acpi/acpiprt.c acpiprt needs-flag
> >  
> > +# DMAR device
> > +device acpidmar
> > +attach acpidmar at acpi
> > +file dev/acpi/acpidmar.c acpidmar
> > +
> >  # Docking station
> >  device acpidock
> >  attach acpidock at acpi
> >
> >

Reply | Threaded
Open this post in threaded view
|

Re: [PATCH] Add IOMMU support for Intel VT-d and AMD-Vi

Mark Kettenis
> Date: Fri, 4 Sep 2020 00:50:44 -0500
> From: Jordan Hargrave <[hidden email]>

A few hints below...

> > > +
> > > +/* Page Table Entry per domain */
> > > +static struct ivhd_dte hwdte[65536] __aligned(PAGE_SIZE);
> > > +
> > > +/* Alias mapping */
> > > +#define SID_INVALID 0x80000000L
> > > +static uint32_t sid_flag[65536];
> >
> > Can we avoid having these large arrays, or at least allocate them
> > dynamically?  That would also avoid the explicit alignment which is
> > somewhat nasty since it affects the entire kernel.
>
> OK. But the hwdte does need the 2M area to be all contiguous but it is not
> needed for DMAR/Intel.  You *can* have up to 8 different device table entries
> though to split up the area.

The appropriate interface to use in this context is
bus_dmamem_alloc(9).  You can specify alignment, and if you set nsegs
to 1, you will get memory that is physicaly contiguous.

To map the memory into kernel address space you'll need create a map
using bus_dmamap_create(9) and map it using bus_dmamem_map(9).  Then
instead of using pmap_extract(9) you use bus_dmamap_load_raw(9) which
then populates the physical addresses.

Many of the drivers written by dlg@ define convenience functions to do
all these steps, although interestingly enough he tends to use
bus_dmamap_load(9) instead of bus_dmamap_load_raw(9) which is
sub-optimal.

> > > +
> > > +struct domain_dev {
> > > + int sid;
> > > + int sec;
> > > + int sub;
> > > + TAILQ_ENTRY(domain_dev) link;
> > > +};
> > > +
> > > +struct domain {
> > > + struct iommu_softc *iommu;
> > > + int did;
> > > + int gaw;
> > > + struct pte_entry *pte;
> > > + paddr_t ptep;
> > > + struct bus_dma_tag dmat;
> > > + int flag;
> > > +
> > > + struct mutex            exlck;
> > > + char exname[32];
> > > + struct extent *iovamap;
> > > + TAILQ_HEAD(,domain_dev) devices;
> > > + TAILQ_ENTRY(domain) link;
> > > +};
> > > +
> > > +#define DOM_DEBUG 0x1
> > > +#define DOM_NOMAP 0x2
> > > +
> > > +struct dmar_devlist {
> > > + int type;
> > > + int bus;
> > > + int ndp;
> > > + struct acpidmar_devpath *dp;
> > > + TAILQ_ENTRY(dmar_devlist) link;
> > > +};
> > > +
> > > +TAILQ_HEAD(devlist_head, dmar_devlist);
> > > +
> > > +struct ivhd_devlist {
> > > + int start_id;
> > > + int end_id;
> > > + int cfg;
> > > + TAILQ_ENTRY(ivhd_devlist) link;
> > > +};
> > > +
> > > +struct rmrr_softc {
> > > + TAILQ_ENTRY(rmrr_softc) link;
> > > + struct devlist_head devices;
> > > + int segment;
> > > + uint64_t start;
> > > + uint64_t end;
> > > +};
> > > +
> > > +struct atsr_softc {
> > > + TAILQ_ENTRY(atsr_softc) link;
> > > + struct devlist_head devices;
> > > + int segment;
> > > + int flags;
> > > +};
> > > +
> > > +struct iommu_pic {
> > > + struct pic pic;
> > > + struct iommu_softc *iommu;
> > > +};
> > > +
> > > +#define IOMMU_FLAGS_CATCHALL 0x1
> > > +#define IOMMU_FLAGS_BAD 0x2
> > > +#define IOMMU_FLAGS_SUSPEND 0x4
> > > +
> > > +struct iommu_softc {
> > > + TAILQ_ENTRY(iommu_softc)link;
> > > + struct devlist_head devices;
> > > + int id;
> > > + int flags;
> > > + int segment;
> > > +
> > > + struct mutex reg_lock;
> > > +
> > > + bus_space_tag_t iot;
> > > + bus_space_handle_t ioh;
> > > +
> > > + uint64_t cap;
> > > + uint64_t ecap;
> > > + uint32_t gcmd;
> > > +
> > > + int mgaw;
> > > + int agaw;
> > > + int ndoms;
> > > +
> > > + struct root_entry *root;
> > > + struct context_entry *ctx[256];
> > > +
> > > + void *intr;
> > > + struct iommu_pic pic;
> > > + int fedata;
> > > + uint64_t feaddr;
> > > + uint64_t rtaddr;
> > > +
> > > + // Queued Invalidation
> > > + int qi_head;
> > > + int qi_tail;
> > > + paddr_t qip;
> > > + struct qi_entry *qi;
> > > +
> > > + struct domain *unity;
> > > + TAILQ_HEAD(,domain) domains;
> > > +
> > > + // AMD iommu
> > > + struct ivhd_dte         *dte;
> > > + void *cmd_tbl;
> > > + void *evt_tbl;
> > > + paddr_t cmd_tblp;
> > > + paddr_t evt_tblp;
> > > + uint64_t wv[128] __aligned(4096);
> >
> > This wv array isn't used as far as I can tell.
>
> Ah I was doing some testing on the commands.. I keep getting iommu
> command timeouts

Hmm, yes, using the wv variable on the stack as you do below is a bit
suspect.  Using __aligned() for stack variables may not give you the
proper alignment if the stack alignment is smaller.

> > > +};
> > > +
> > > +static inline int iommu_bad(struct iommu_softc *sc)
> > > +{
> > > + return (sc->flags & IOMMU_FLAGS_BAD);
> > > +}
> > > +
> > > +static inline int iommu_enabled(struct iommu_softc *sc)
> > > +{
> > > + if (sc->dte) {
> > > + return 1;
> > > + }
> > > + return (sc->gcmd & GCMD_TE);
> > > +}
> > > +
> > > +struct acpidmar_softc {
> > > + struct device sc_dev;
> > > +
> > > + pci_chipset_tag_t sc_pc;
> > > + bus_space_tag_t sc_memt;
> > > + int sc_haw;
> > > + int sc_flags;
> > > +
> > > + TAILQ_HEAD(,iommu_softc)sc_drhds;
> > > + TAILQ_HEAD(,rmrr_softc) sc_rmrrs;
> > > + TAILQ_HEAD(,atsr_softc) sc_atsrs;
> > > +};
> > > +
> > > +int acpidmar_activate(struct device *, int);
> > > +int acpidmar_match(struct device *, void *, void *);
> > > +void acpidmar_attach(struct device *, struct device *, void *);
> > > +struct domain   *acpidmar_pci_attach(struct acpidmar_softc *, int, int, int);
> > > +
> > > +struct cfattach acpidmar_ca = {
> > > + sizeof(struct acpidmar_softc), acpidmar_match, acpidmar_attach,
> > > +};
> > > +
> > > +struct cfdriver acpidmar_cd = {
> > > + NULL, "acpidmar", DV_DULL
> > > +};
> > > +
> > > +struct acpidmar_softc *acpidmar_sc;
> > > +int acpidmar_intr(void *);
> > > +int acpiivhd_intr(void *);
> > > +
> > > +#define DID_UNITY 0x1
> > > +
> > > +void _dumppte(struct pte_entry *, int, vaddr_t);
> > > +
> > > +struct domain *domain_create(struct iommu_softc *, int);
> > > +struct domain *domain_lookup(struct acpidmar_softc *, int, int);
> > > +
> > > +void domain_unload_map(struct domain *, bus_dmamap_t);
> > > +void domain_load_map(struct domain *, bus_dmamap_t, int, int, const char *);
> > > +
> > > +void (*domain_map_page)(struct domain *, vaddr_t, paddr_t, uint64_t);
> > > +void domain_map_page_amd(struct domain *, vaddr_t, paddr_t, uint64_t);
> > > +void domain_map_page_intel(struct domain *, vaddr_t, paddr_t, uint64_t);
> > > +void domain_map_pthru(struct domain *, paddr_t, paddr_t);
> > > +
> > > +void acpidmar_pci_hook(pci_chipset_tag_t, struct pci_attach_args *);
> > > +void acpidmar_parse_devscope(union acpidmar_entry *, int, int,
> > > +    struct devlist_head *);
> > > +int  acpidmar_match_devscope(struct devlist_head *, pci_chipset_tag_t, int);
> > > +
> > > +void acpidmar_init(struct acpidmar_softc *, struct acpi_dmar *);
> > > +void acpidmar_drhd(struct acpidmar_softc *, union acpidmar_entry *);
> > > +void acpidmar_rmrr(struct acpidmar_softc *, union acpidmar_entry *);
> > > +void acpidmar_atsr(struct acpidmar_softc *, union acpidmar_entry *);
> > > +void acpiivrs_init(struct acpidmar_softc *, struct acpi_ivrs *);
> > > +
> > > +void *acpidmar_intr_establish(void *, int, int (*)(void *), void *,
> > > +    const char *);
> > > +
> > > +void iommu_writel(struct iommu_softc *, int, uint32_t);
> > > +uint32_t iommu_readl(struct iommu_softc *, int);
> > > +void iommu_writeq(struct iommu_softc *, int, uint64_t);
> > > +uint64_t iommu_readq(struct iommu_softc *, int);
> > > +void iommu_showfault(struct iommu_softc *, int,
> > > +    struct fault_entry *);
> > > +void iommu_showcfg(struct iommu_softc *, int);
> > > +
> > > +int iommu_init(struct acpidmar_softc *, struct iommu_softc *,
> > > +    struct acpidmar_drhd *);
> > > +int iommu_enable_translation(struct iommu_softc *, int);
> > > +void iommu_enable_qi(struct iommu_softc *, int);
> > > +void iommu_flush_cache(struct iommu_softc *, void *, size_t);
> > > +void *iommu_alloc_page(struct iommu_softc *, paddr_t *);
> > > +void iommu_flush_write_buffer(struct iommu_softc *);
> > > +void iommu_issue_qi(struct iommu_softc *, struct qi_entry *);
> > > +
> > > +void iommu_flush_ctx(struct iommu_softc *, int, int, int, int);
> > > +void iommu_flush_ctx_qi(struct iommu_softc *, int, int, int, int);
> > > +void iommu_flush_tlb(struct iommu_softc *, int, int);
> > > +void iommu_flush_tlb_qi(struct iommu_softc *, int, int);
> > > +
> > > +void iommu_set_rtaddr(struct iommu_softc *, paddr_t);
> > > +
> > > +const char *dmar_bdf(int);
> > > +
> > > +const char *
> > > +dmar_bdf(int sid)
> > > +{
> > > + static char bdf[32];
> > > +
> > > + snprintf(bdf, sizeof(bdf), "%.4x:%.2x:%.2x.%x", 0,
> > > +    sid_bus(sid), sid_dev(sid), sid_fun(sid));
> > > +
> > > + return (bdf);
> > > +}
> > > +
> > > +/* busdma */
> > > +static int dmar_dmamap_create(bus_dma_tag_t, bus_size_t, int, bus_size_t,
> > > +    bus_size_t, int, bus_dmamap_t *);
> > > +static void dmar_dmamap_destroy(bus_dma_tag_t, bus_dmamap_t);
> > > +static int dmar_dmamap_load(bus_dma_tag_t, bus_dmamap_t, void *, bus_size_t,
> > > +    struct proc *, int);
> > > +static int dmar_dmamap_load_mbuf(bus_dma_tag_t, bus_dmamap_t, struct mbuf *,
> > > +    int);
> > > +static int dmar_dmamap_load_uio(bus_dma_tag_t, bus_dmamap_t, struct uio *, int);
> > > +static int dmar_dmamap_load_raw(bus_dma_tag_t, bus_dmamap_t,
> > > +    bus_dma_segment_t *, int, bus_size_t, int);
> > > +static void dmar_dmamap_unload(bus_dma_tag_t, bus_dmamap_t);
> > > +static void dmar_dmamap_sync(bus_dma_tag_t, bus_dmamap_t, bus_addr_t,
> > > +    bus_size_t, int);
> > > +static int dmar_dmamem_alloc(bus_dma_tag_t, bus_size_t, bus_size_t, bus_size_t,
> > > +    bus_dma_segment_t *, int, int *, int);
> > > +static void dmar_dmamem_free(bus_dma_tag_t, bus_dma_segment_t *, int);
> > > +static int dmar_dmamem_map(bus_dma_tag_t, bus_dma_segment_t *, int, size_t,
> > > +    caddr_t *, int);
> > > +static void dmar_dmamem_unmap(bus_dma_tag_t, caddr_t, size_t);
> > > +static paddr_t dmar_dmamem_mmap(bus_dma_tag_t, bus_dma_segment_t *, int, off_t,
> > > +    int, int);
> > > +
> > > +static void dmar_dumpseg(bus_dma_tag_t, int, bus_dma_segment_t *, const char *);
> > > +const char *dom_bdf(struct domain *dom);
> > > +void domain_map_check(struct domain *dom);
> > > +
> > > +struct pte_entry *pte_lvl(struct iommu_softc *iommu, struct pte_entry *npte, vaddr_t va, int shift, uint64_t flags);
> > > +int  ivhd_poll_events(struct iommu_softc *iommu);
> > > +void ivhd_showit(struct iommu_softc *);
> > > +void ivhd_showdte(void);
> > > +void ivhd_showcmd(struct iommu_softc *);
> > > +
> > > +static inline int
> > > +debugme(struct domain *dom)
> > > +{
> > > + return 0;
> > > + return (dom->flag & DOM_DEBUG);
> > > +}
> > > +
> > > +void
> > > +domain_map_check(struct domain *dom)
> > > +{
> > > + struct iommu_softc *iommu;
> > > + struct domain_dev *dd;
> > > + struct context_entry *ctx;
> > > + int v;
> > > +
> > > + iommu = dom->iommu;
> > > + TAILQ_FOREACH(dd, &dom->devices, link) {
> > > + acpidmar_pci_attach(acpidmar_sc, iommu->segment, dd->sid, 1);
> > > +
> > > + if (iommu->dte)
> > > + continue;
> > > +
> > > + /* Check if this is the first time we are mapped */
> > > + ctx = &iommu->ctx[sid_bus(dd->sid)][sid_devfn(dd->sid)];
> > > + v = context_user(ctx);
> > > + if (v != 0xA) {
> > > + printf("  map: %.4x:%.2x:%.2x.%x iommu:%d did:%.4x\n",
> > > +    iommu->segment,
> > > +    sid_bus(dd->sid),
> > > +    sid_dev(dd->sid),
> > > +    sid_fun(dd->sid),
> > > +    iommu->id,
> > > +    dom->did);
> > > + context_set_user(ctx, 0xA);
> > > + }
> > > + }
> > > +}
> > > +
> > > +/* Map a single page as passthrough - used for DRM */
> > > +void
> > > +dmar_ptmap(bus_dma_tag_t tag, bus_addr_t addr)
> > > +{
> > > + struct domain *dom = tag->_cookie;
> > > +
> > > + if (!acpidmar_sc)
> > > + return;
> > > + domain_map_check(dom);
> > > + domain_map_page(dom, addr, addr, PTE_P | PTE_R | PTE_W);
> > > +}
> > > +
> > > +/* Map a range of pages 1:1 */
> > > +void
> > > +domain_map_pthru(struct domain *dom, paddr_t start, paddr_t end)
> > > +{
> > > + domain_map_check(dom);
> > > + while (start < end) {
> > > + domain_map_page(dom, start, start, PTE_P | PTE_R | PTE_W);
> > > + start += VTD_PAGE_SIZE;
> > > + }
> > > +}
> > > +
> > > +/* Map a single paddr to IOMMU paddr */
> > > +void
> > > +domain_map_page_intel(struct domain *dom, vaddr_t va, paddr_t pa, uint64_t flags)
> > > +{
> > > + paddr_t paddr;
> > > + struct pte_entry *pte, *npte;
> > > + int lvl, idx;
> > > + struct iommu_softc *iommu;
> > > +
> > > + iommu = dom->iommu;
> > > + /* Insert physical address into virtual address map
> > > + * XXX: could we use private pmap here?
> > > + * essentially doing a pmap_enter(map, va, pa, prot);
> > > + */
> > > +
> > > + /* Only handle 4k pages for now */
> > > + npte = dom->pte;
> > > + for (lvl = iommu->agaw - VTD_STRIDE_SIZE; lvl>= VTD_LEVEL0;
> > > +    lvl -= VTD_STRIDE_SIZE) {
> > > + idx = (va >> lvl) & VTD_STRIDE_MASK;
> > > + pte = &npte[idx];
> > > + if (lvl == VTD_LEVEL0) {
> > > + /* Level 1: Page Table - add physical address */
> > > + pte->val = pa | flags;
> > > + iommu_flush_cache(iommu, pte, sizeof(*pte));
> > > + break;
> > > + } else if (!(pte->val & PTE_P)) {
> > > + /* Level N: Point to lower level table */
> > > + iommu_alloc_page(iommu, &paddr);
> > > + pte->val = paddr | PTE_P | PTE_R | PTE_W;
> > > + iommu_flush_cache(iommu, pte, sizeof(*pte));
> > > + }
> > > + npte = (void *)PMAP_DIRECT_MAP((pte->val & VTD_PTE_MASK));
> > > + }
> > > +}
> > > +
> > > +/* Map a single paddr to IOMMU paddr: AMD
> > > + * physical address breakdown into levels:
> > > + * xxxxxxxx.xxxxxxxx.xxxxxxxx.xxxxxxxx.xxxxxxxx.xxxxxxxx.xxxxxxxx.xxxxxxxx
> > > + *        5.55555555.44444444.43333333,33222222.22211111.1111----.--------
> > > + * mode:
> > > + *  000 = none   shift
> > > + *  001 = 1 [21].12
> > > + *  010 = 2 [30].21
> > > + *  011 = 3 [39].30
> > > + *  100 = 4 [48].39
> > > + *  101 = 5 [57]
> > > + *  110 = 6
> > > + *  111 = reserved
> > > + */
> > > +struct pte_entry *pte_lvl(struct iommu_softc *iommu, struct pte_entry *pte, vaddr_t va,
> > > + int shift, uint64_t flags)
> > > +{
> > > + paddr_t paddr;
> > > + int idx;
> > > +
> > > + idx = (va >> shift) & VTD_STRIDE_MASK;
> > > + if (!(pte[idx].val & PTE_P)) {
> > > + /* Page Table entry is not present... create a new page entry */
> > > + iommu_alloc_page(iommu, &paddr);
> > > + pte[idx].val = paddr | flags;
> > > + iommu_flush_cache(iommu, &pte[idx], sizeof(pte[idx]));
> > > + }
> > > + return (void *)PMAP_DIRECT_MAP((pte[idx].val & PTE_PADDR_MASK));
> > > +}
> > > +
> > > +void
> > > +domain_map_page_amd(struct domain *dom, vaddr_t va, paddr_t pa, uint64_t flags)
> > > +{
> > > + struct pte_entry *pte;
> > > + struct iommu_softc *iommu;
> > > + int idx;
> > > +
> > > + iommu = dom->iommu;
> > > + /* Insert physical address into virtual address map
> > > + * XXX: could we use private pmap here?
> > > + * essentially doing a pmap_enter(map, va, pa, prot);
> > > + */
> > > +
> > > + /* Always assume AMD levels=4                           */
> > > + /*        39        30        21        12              */
> > > + /* ---------|---------|---------|---------|------------ */
> > > + pte = dom->pte;
> > > + //pte = pte_lvl(iommu, pte, va, 39, PTE_NXTLVL(3) | PTE_IR | PTE_IW | PTE_P);
> > > + pte = pte_lvl(iommu, pte, va, 30, PTE_NXTLVL(2) | PTE_IR | PTE_IW | PTE_P);
> > > + pte = pte_lvl(iommu, pte, va, 21, PTE_NXTLVL(1) | PTE_IR | PTE_IW | PTE_P);
> > > + //pte = pte_lvl(iommu, pte, va, 12, PTE_NXTLVL(7) | PTE_IR | PTE_IW | PTE_P);
> > > +
> > > + if (flags)
> > > + flags = PTE_P | PTE_R | PTE_W | PTE_IW | PTE_IR | PTE_NXTLVL(0);
> > > +
> > > + /* Level 1: Page Table - add physical address */
> > > + idx = (va >> 12) & 0x1FF;
> > > + pte[idx].val = pa | flags;
> > > +
> > > + iommu_flush_cache(iommu, pte, sizeof(*pte));
> > > +}
> > > +
> > > +static void
> > > +dmar_dumpseg(bus_dma_tag_t tag, int nseg, bus_dma_segment_t *segs,
> > > +    const char *lbl)
> > > +{
> > > + struct domain *dom = tag->_cookie;
> > > + int i;
> > > +
> > > + return;
> > > + if (!debugme(dom))
> > > + return;
> > > + printf("%s: %s\n", lbl, dom_bdf(dom));
> > > + for (i = 0; i < nseg; i++) {
> > > + printf("  %.16llx %.8x\n",
> > > +    (uint64_t)segs[i].ds_addr,
> > > +    (uint32_t)segs[i].ds_len);
> > > + }
> > > +}
> > > +
> > > +/* Unload mapping */
> > > +void
> > > +domain_unload_map(struct domain *dom, bus_dmamap_t dmam)
> > > +{
> > > + bus_dma_segment_t *seg;
> > > + paddr_t base, end, idx;
> > > + psize_t alen;
> > > + int i;
> > > +
> > > + if (iommu_bad(dom->iommu)) {
> > > + printf("unload map no iommu\n");
> > > + return;
> > > + }
> > > +
> > > + //acpidmar_intr(dom->iommu);
> > > + for (i = 0; i < dmam->dm_nsegs; i++) {
> > > + seg  = &dmam->dm_segs[i];
> > > +
> > > + base = trunc_page(seg->ds_addr);
> > > + end  = roundup(seg->ds_addr + seg->ds_len, VTD_PAGE_SIZE);
> > > + alen = end - base;
> > > +
> > > + if (debugme(dom)) {
> > > + printf("  va:%.16llx len:%x\n",
> > > +    (uint64_t)base, (uint32_t)alen);
> > > + }
> > > +
> > > + /* Clear PTE */
> > > + for (idx = 0; idx < alen; idx += VTD_PAGE_SIZE)
> > > + domain_map_page(dom, base + idx, 0, 0);
> > > +
> > > + if (dom->flag & DOM_NOMAP) {
> > > + //printf("%s: nomap %.16llx\n", dom_bdf(dom), (uint64_t)base);
> > > + continue;
> > > + }
> > > +
> > > + mtx_enter(&dom->exlck);
> > > + if (extent_free(dom->iovamap, base, alen, EX_NOWAIT)) {
> > > + panic("domain_unload_map: extent_free");
> > > + }
> > > + mtx_leave(&dom->exlck);
> > > + }
> > > +}
> > > +
> > > +/* map.segs[x].ds_addr is modified to IOMMU virtual PA */
> > > +void
> > > +domain_load_map(struct domain *dom, bus_dmamap_t map, int flags, int pteflag, const char *fn)
> > > +{
> > > + bus_dma_segment_t *seg;
> > > + struct iommu_softc *iommu;
> > > + paddr_t base, end, idx;
> > > + psize_t alen;
> > > + u_long res;
> > > + int i;
> > > +
> > > + iommu = dom->iommu;
> > > + if (!iommu_enabled(iommu)) {
> > > + /* Lazy enable translation when required */
> > > + if (iommu_enable_translation(iommu, 1)) {
> > > + return;
> > > + }
> > > + }
> > > + domain_map_check(dom);
> > > + //acpidmar_intr(iommu);
> > > + for (i = 0; i < map->dm_nsegs; i++) {
> > > + seg = &map->dm_segs[i];
> > > +
> > > + base = trunc_page(seg->ds_addr);
> > > + end  = roundup(seg->ds_addr + seg->ds_len, VTD_PAGE_SIZE);
> > > + alen = end - base;
> > > + res  = base;
> > > +
> > > + if (dom->flag & DOM_NOMAP) {
> > > + goto nomap;
> > > + }
> > > +
> > > + /* Allocate DMA Virtual Address */
> > > + mtx_enter(&dom->exlck);
> > > + if (extent_alloc(dom->iovamap, alen, VTD_PAGE_SIZE, 0,
> > > +    map->_dm_boundary, EX_NOWAIT, &res)) {
> > > + panic("domain_load_map: extent_alloc");
> > > + }
> > > + if (res == -1) {
> > > + panic("got -1 address\n");
> > > + }
> > > + mtx_leave(&dom->exlck);
> > > +
> > > + /* Reassign DMA address */
> > > + seg->ds_addr = res | (seg->ds_addr & VTD_PAGE_MASK);
> > > +nomap:
> > > + if (debugme(dom)) {
> > > + printf("  LOADMAP: %.16llx %x => %.16llx\n",
> > > +    (uint64_t)seg->ds_addr, (uint32_t)seg->ds_len,
> > > +    (uint64_t)res);
> > > + }
> > > + for (idx = 0; idx < alen; idx += VTD_PAGE_SIZE) {
> > > + domain_map_page(dom, res + idx, base + idx,
> > > +    PTE_P | pteflag);
> > > + }
> > > + }
> > > + if ((iommu->cap & CAP_CM) || force_cm) {
> > > + iommu_flush_tlb(iommu, IOTLB_DOMAIN, dom->did);
> > > + } else {
> > > + iommu_flush_write_buffer(iommu);
> > > + }
> > > +}
> > > +
> > > +const char *
> > > +dom_bdf(struct domain *dom)
> > > +{
> > > + struct domain_dev *dd;
> > > + static char mmm[48];
> > > +
> > > + dd = TAILQ_FIRST(&dom->devices);
> > > + snprintf(mmm, sizeof(mmm), "%s iommu:%d did:%.4x%s",
> > > +    dmar_bdf(dd->sid), dom->iommu->id, dom->did,
> > > +    dom->did == DID_UNITY ? " [unity]" : "");
> > > + return (mmm);
> > > +}
> > > +
> > > +/* Bus DMA Map functions */
> > > +static int
> > > +dmar_dmamap_create(bus_dma_tag_t tag, bus_size_t size, int nsegments,
> > > +    bus_size_t maxsegsz, bus_size_t boundary, int flags, bus_dmamap_t *dmamp)
> > > +{
> > > + int rc;
> > > +
> > > + rc = _bus_dmamap_create(tag, size, nsegments, maxsegsz, boundary,
> > > +    flags, dmamp);
> > > + if (!rc) {
> > > + dmar_dumpseg(tag, (*dmamp)->dm_nsegs, (*dmamp)->dm_segs,
> > > +    __FUNCTION__);
> > > + }
> > > + return (rc);
> > > +}
> > > +
> > > +static void
> > > +dmar_dmamap_destroy(bus_dma_tag_t tag, bus_dmamap_t dmam)
> > > +{
> > > + dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs, __FUNCTION__);
> > > + _bus_dmamap_destroy(tag, dmam);
> > > +}
> > > +
> > > +static int
> > > +dmar_dmamap_load(bus_dma_tag_t tag, bus_dmamap_t dmam, void *buf,
> > > +    bus_size_t buflen, struct proc *p, int flags)
> > > +{
> > > + struct domain *dom = tag->_cookie;
> > > + int rc;
> > > +
> > > + rc = _bus_dmamap_load(tag, dmam, buf, buflen, p, flags);
> > > + if (!rc) {
> > > + dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs,
> > > +    __FUNCTION__);
> > > + domain_load_map(dom, dmam, flags, PTE_R|PTE_W, __FUNCTION__);
> > > + dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs,
> > > +    __FUNCTION__);
> > > + }
> > > + return (rc);
> > > +}
> > > +
> > > +static int
> > > +dmar_dmamap_load_mbuf(bus_dma_tag_t tag, bus_dmamap_t dmam, struct mbuf *chain,
> > > +    int flags)
> > > +{
> > > + struct domain *dom = tag->_cookie;
> > > + int rc;
> > > +
> > > + rc = _bus_dmamap_load_mbuf(tag, dmam, chain, flags);
> > > + if (!rc) {
> > > + dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs,
> > > +    __FUNCTION__);
> > > + domain_load_map(dom, dmam, flags, PTE_R|PTE_W,__FUNCTION__);
> > > + dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs,
> > > +    __FUNCTION__);
> > > + }
> > > + return (rc);
> > > +}
> > > +
> > > +static int
> > > +dmar_dmamap_load_uio(bus_dma_tag_t tag, bus_dmamap_t dmam, struct uio *uio,
> > > +    int flags)
> > > +{
> > > + struct domain *dom = tag->_cookie;
> > > + int rc;
> > > +
> > > + rc = _bus_dmamap_load_uio(tag, dmam, uio, flags);
> > > + if (!rc) {
> > > + dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs,
> > > +    __FUNCTION__);
> > > + domain_load_map(dom, dmam, flags, PTE_R|PTE_W, __FUNCTION__);
> > > + dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs,
> > > +    __FUNCTION__);
> > > + }
> > > + return (rc);
> > > +}
> > > +
> > > +static int
> > > +dmar_dmamap_load_raw(bus_dma_tag_t tag, bus_dmamap_t dmam,
> > > +    bus_dma_segment_t *segs, int nsegs, bus_size_t size, int flags)
> > > +{
> > > + struct domain *dom = tag->_cookie;
> > > + int rc;
> > > +
> > > + rc = _bus_dmamap_load_raw(tag, dmam, segs, nsegs, size, flags);
> > > + if (!rc) {
> > > + dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs,
> > > +    __FUNCTION__);
> > > + domain_load_map(dom, dmam, flags, PTE_R|PTE_W, __FUNCTION__);
> > > + dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs,
> > > +    __FUNCTION__);
> > > + }
> > > + return (rc);
> > > +}
> > > +
> > > +static void
> > > +dmar_dmamap_unload(bus_dma_tag_t tag, bus_dmamap_t dmam)
> > > +{
> > > + struct domain *dom = tag->_cookie;
> > > +
> > > + dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs, __FUNCTION__);
> > > + domain_unload_map(dom, dmam);
> > > + _bus_dmamap_unload(tag, dmam);
> > > +}
> > > +
> > > +static void
> > > +dmar_dmamap_sync(bus_dma_tag_t tag, bus_dmamap_t dmam, bus_addr_t offset,
> > > +    bus_size_t len, int ops)
> > > +{
> > > +#if 0
> > > + struct domain *dom = tag->_cookie;
> > > + //int flag;
> > > +
> > > + flag = PTE_P;
> > > + //acpidmar_intr(dom->iommu);
> > > + if (ops == BUS_DMASYNC_PREREAD) {
> > > + /* make readable */
> > > + flag |= PTE_R;
> > > + }
> > > + else if (ops == BUS_DMASYNC_PREWRITE) {
> > > + /* make writeable */
> > > + flag |= PTE_W;
> > > + }
> > > + dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs, __FUNCTION__);
> > > +#endif
> > > + _bus_dmamap_sync(tag, dmam, offset, len, ops);
> > > +}
> > > +
> > > +static int
> > > +dmar_dmamem_alloc(bus_dma_tag_t tag, bus_size_t size, bus_size_t alignment,
> > > +    bus_size_t boundary, bus_dma_segment_t *segs, int nsegs, int *rsegs,
> > > +    int flags)
> > > +{
> > > + int rc;
> > > +
> > > + rc = _bus_dmamem_alloc(tag, size, alignment, boundary, segs, nsegs,
> > > +    rsegs, flags);
> > > + if (!rc) {
> > > + dmar_dumpseg(tag, *rsegs, segs, __FUNCTION__);
> > > + }
> > > + return (rc);
> > > +}
> > > +
> > > +static void
> > > +dmar_dmamem_free(bus_dma_tag_t tag, bus_dma_segment_t *segs, int nsegs)
> > > +{
> > > + dmar_dumpseg(tag, nsegs, segs, __FUNCTION__);
> > > + _bus_dmamem_free(tag, segs, nsegs);
> > > +}
> > > +
> > > +static int
> > > +dmar_dmamem_map(bus_dma_tag_t tag, bus_dma_segment_t *segs, int nsegs,
> > > +    size_t size, caddr_t *kvap, int flags)
> > > +{
> > > + dmar_dumpseg(tag, nsegs, segs, __FUNCTION__);
> > > + return (_bus_dmamem_map(tag, segs, nsegs, size, kvap, flags));
> > > +}
> > > +
> > > +static void
> > > +dmar_dmamem_unmap(bus_dma_tag_t tag, caddr_t kva, size_t size)
> > > +{
> > > + struct domain *dom = tag->_cookie;
> > > +
> > > + if (debugme(dom)) {
> > > + printf("dmamap_unmap: %s\n", dom_bdf(dom));
> > > + }
> > > + _bus_dmamem_unmap(tag, kva, size);
> > > +}
> > > +
> > > +static paddr_t
> > > +dmar_dmamem_mmap(bus_dma_tag_t tag, bus_dma_segment_t *segs, int nsegs,
> > > +    off_t off, int prot, int flags)
> > > +{
> > > + dmar_dumpseg(tag, nsegs, segs, __FUNCTION__);
> > > + return (_bus_dmamem_mmap(tag, segs, nsegs, off, prot, flags));
> > > +}
> > > +
> > > +/*===================================
> > > + * IOMMU code
> > > + *===================================*/
> > > +
> > > +/* Intel: Set Context Root Address */
> > > +void
> > > +iommu_set_rtaddr(struct iommu_softc *iommu, paddr_t paddr)
> > > +{
> > > + int i, sts;
> > > +
> > > + mtx_enter(&iommu->reg_lock);
> > > + iommu_writeq(iommu, DMAR_RTADDR_REG, paddr);
> > > + iommu_writel(iommu, DMAR_GCMD_REG, iommu->gcmd | GCMD_SRTP);
> > > + for (i = 0; i < 5; i++) {
> > > + sts = iommu_readl(iommu, DMAR_GSTS_REG);
> > > + if (sts & GSTS_RTPS)
> > > + break;
> > > + }
> > > + mtx_leave(&iommu->reg_lock);
> > > +
> > > + if (i == 5) {
> > > + printf("set_rtaddr fails\n");
> > > + }
> > > +}
> > > +
> > > +/* COMMON: Allocate a new memory page */
> > > +void *
> > > +iommu_alloc_page(struct iommu_softc *iommu, paddr_t *paddr)
> > > +{
> > > + void *va;
> > > +
> > > + *paddr = 0;
> > > + va = km_alloc(VTD_PAGE_SIZE, &kv_page, &kp_zero, &kd_nowait);
> > > + if (va == NULL) {
> > > + panic("can't allocate page\n");
> > > + }
> > > + pmap_extract(pmap_kernel(), (vaddr_t)va, paddr);
> > > + return (va);
> > > +}
> > > +
> > > +
> > > +/* Intel: Issue command via queued invalidation */
> > > +void
> > > +iommu_issue_qi(struct iommu_softc *iommu, struct qi_entry *qi)
> > > +{
> > > +#if 0
> > > + struct qi_entry *pi, *pw;
> > > +
> > > + idx = iommu->qi_head;
> > > + pi = &iommu->qi[idx];
> > > + pw = &iommu->qi[(idx+1) % MAXQ];
> > > + iommu->qi_head = (idx+2) % MAXQ;
> > > +
> > > + memcpy(pw, &qi, sizeof(qi));
> > > + issue command;
> > > + while (pw->xxx)
> > > + ;
> > > +#endif
> > > +}
> > > +
> > > +/* Intel: Flush TLB entries, Queued Invalidation mode */
> > > +void
> > > +iommu_flush_tlb_qi(struct iommu_softc *iommu, int mode, int did)
> > > +{
> > > + struct qi_entry qi;
> > > +
> > > + /* Use queued invalidation */
> > > + qi.hi = 0;
> > > + switch (mode) {
> > > + case IOTLB_GLOBAL:
> > > + qi.lo = QI_IOTLB | QI_IOTLB_IG_GLOBAL;
> > > + break;
> > > + case IOTLB_DOMAIN:
> > > + qi.lo = QI_IOTLB | QI_IOTLB_IG_DOMAIN |
> > > +    QI_IOTLB_DID(did);
> > > + break;
> > > + case IOTLB_PAGE:
> > > + qi.lo = QI_IOTLB | QI_IOTLB_IG_PAGE | QI_IOTLB_DID(did);
> > > + qi.hi = 0;
> > > + break;
> > > + }
> > > + if (iommu->cap & CAP_DRD)
> > > + qi.lo |= QI_IOTLB_DR;
> > > + if (iommu->cap & CAP_DWD)
> > > + qi.lo |= QI_IOTLB_DW;
> > > + iommu_issue_qi(iommu, &qi);
> > > +}
> > > +
> > > +/* Intel: Flush Context entries, Queued Invalidation mode */
> > > +void
> > > +iommu_flush_ctx_qi(struct iommu_softc *iommu, int mode, int did,
> > > +    int sid, int fm)
> > > +{
> > > + struct qi_entry qi;
> > > +
> > > + /* Use queued invalidation */
> > > + qi.hi = 0;
> > > + switch (mode) {
> > > + case CTX_GLOBAL:
> > > + qi.lo = QI_CTX | QI_CTX_IG_GLOBAL;
> > > + break;
> > > + case CTX_DOMAIN:
> > > + qi.lo = QI_CTX | QI_CTX_IG_DOMAIN | QI_CTX_DID(did);
> > > + break;
> > > + case CTX_DEVICE:
> > > + qi.lo = QI_CTX | QI_CTX_IG_DEVICE | QI_CTX_DID(did) |
> > > +    QI_CTX_SID(sid) | QI_CTX_FM(fm);
> > > + break;
> > > + }
> > > + iommu_issue_qi(iommu, &qi);
> > > +}
> > > +
> > > +/* Intel: Flush write buffers */
> > > +void
> > > +iommu_flush_write_buffer(struct iommu_softc *iommu)
> > > +{
> > > + int i, sts;
> > > +
> > > + if (iommu->dte)
> > > + return;
> > > + if (!(iommu->cap & CAP_RWBF))
> > > + return;
> > > + printf("writebuf\n");
> > > + iommu_writel(iommu, DMAR_GCMD_REG, iommu->gcmd | GCMD_WBF);
> > > + for (i = 0; i < 5; i++) {
> > > + sts = iommu_readl(iommu, DMAR_GSTS_REG);
> > > + if (sts & GSTS_WBFS)
> > > + break;
> > > + delay(10000);
> > > + }
> > > + if (i == 5) {
> > > + printf("write buffer flush fails\n");
> > > + }
> > > +}
> > > +
> > > +void
> > > +iommu_flush_cache(struct iommu_softc *iommu, void *addr, size_t size)
> > > +{
> > > + if (iommu->dte) {
> > > + pmap_flush_cache((vaddr_t)addr, size);
> > > + return;
> > > + }
> > > + if (!(iommu->ecap & ECAP_C))
> > > + pmap_flush_cache((vaddr_t)addr, size);
> > > +}
> > > +
> > > +/*
> > > + * Intel: Flush IOMMU TLB Entries
> > > + * Flushing can occur globally, per domain or per page
> > > + */
> > > +void
> > > +iommu_flush_tlb(struct iommu_softc *iommu, int mode, int did)
> > > +{
> > > + int n;
> > > + uint64_t val;
> > > +
> > > + /* Call AMD */
> > > + if (iommu->dte) {
> > > + ivhd_invalidate_domain(iommu, did);
> > > + //ivhd_poll_events(iommu);
> > > + return;
> > > + }
> > > + val = IOTLB_IVT;
> > > + switch (mode) {
> > > + case IOTLB_GLOBAL:
> > > + val |= IIG_GLOBAL;
> > > + break;
> > > + case IOTLB_DOMAIN:
> > > + val |= IIG_DOMAIN | IOTLB_DID(did);
> > > + break;
> > > + case IOTLB_PAGE:
> > > + val |= IIG_PAGE | IOTLB_DID(did);
> > > + break;
> > > + }
> > > +
> > > + /* Check for Read/Write Drain */
> > > + if (iommu->cap & CAP_DRD)
> > > + val |= IOTLB_DR;
> > > + if (iommu->cap & CAP_DWD)
> > > + val |= IOTLB_DW;
> > > +
> > > + mtx_enter(&iommu->reg_lock);
> > > +
> > > + iommu_writeq(iommu, DMAR_IOTLB_REG(iommu), val);
> > > + n = 0;
> > > + do {
> > > + val = iommu_readq(iommu, DMAR_IOTLB_REG(iommu));
> > > + } while (n++ < 5 && val & IOTLB_IVT);
> > > +
> > > + mtx_leave(&iommu->reg_lock);
> > > +
> > > +#ifdef DEBUG
> > > + {
> > > + static int rg;
> > > + int a, r;
> > > +
> > > + if (!rg) {
> > > + a = (val >> IOTLB_IAIG_SHIFT) & IOTLB_IAIG_MASK;
> > > + r = (val >> IOTLB_IIRG_SHIFT) & IOTLB_IIRG_MASK;
> > > + if (a != r) {
> > > + printf("TLB Requested:%d Actual:%d\n", r, a);
> > > + rg = 1;
> > > + }
> > > + }
> > > + }
> > > +#endif
> > > +}
> > > +
> > > +/* Intel: Flush IOMMU settings
> > > + * Flushes can occur globally, per domain, or per device
> > > + */
> > > +void
> > > +iommu_flush_ctx(struct iommu_softc *iommu, int mode, int did, int sid, int fm)
> > > +{
> > > + uint64_t val;
> > > + int n;
> > > +
> > > + if (iommu->dte)
> > > + return;
> > > + val = CCMD_ICC;
> > > + switch (mode) {
> > > + case CTX_GLOBAL:
> > > + val |= CIG_GLOBAL;
> > > + break;
> > > + case CTX_DOMAIN:
> > > + val |= CIG_DOMAIN | CCMD_DID(did);
> > > + break;
> > > + case CTX_DEVICE:
> > > + val |= CIG_DEVICE | CCMD_DID(did) |
> > > +    CCMD_SID(sid) | CCMD_FM(fm);
> > > + break;
> > > + }
> > > +
> > > + mtx_enter(&iommu->reg_lock);
> > > +
> > > + n = 0;
> > > + iommu_writeq(iommu, DMAR_CCMD_REG, val);
> > > + do {
> > > + val = iommu_readq(iommu, DMAR_CCMD_REG);
> > > + } while (n++ < 5 && val & CCMD_ICC);
> > > +
> > > + mtx_leave(&iommu->reg_lock);
> > > +
> > > +#ifdef DEBUG
> > > + {
> > > + static int rg;
> > > + int a, r;
> > > +
> > > + if (!rg) {
> > > + a = (val >> CCMD_CAIG_SHIFT) & CCMD_CAIG_MASK;
> > > + r = (val >> CCMD_CIRG_SHIFT) & CCMD_CIRG_MASK;
> > > + if (a != r) {
> > > + printf("CTX Requested:%d Actual:%d\n", r, a);
> > > + rg = 1;
> > > + }
> > > + }
> > > + }
> > > +#endif
> > > +}
> > > +
> > > +/* Intel: Enable Queued Invalidation */
> > > +void
> > > +iommu_enable_qi(struct iommu_softc *iommu, int enable)
> > > +{
> > > + int n = 0;
> > > + int sts;
> > > +
> > > + if (!(iommu->ecap & ECAP_QI))
> > > + return;
> > > +
> > > + if (enable) {
> > > + iommu->gcmd |= GCMD_QIE;
> > > +
> > > + mtx_enter(&iommu->reg_lock);
> > > +
> > > + iommu_writel(iommu, DMAR_GCMD_REG, iommu->gcmd);
> > > + do {
> > > + sts = iommu_readl(iommu, DMAR_GSTS_REG);
> > > + } while (n++ < 5 && !(sts & GSTS_QIES));
> > > +
> > > + mtx_leave(&iommu->reg_lock);
> > > +
> > > + printf("set.qie: %d\n", n);
> > > + } else {
> > > + iommu->gcmd &= ~GCMD_QIE;
> > > +
> > > + mtx_enter(&iommu->reg_lock);
> > > +
> > > + iommu_writel(iommu, DMAR_GCMD_REG, iommu->gcmd);
> > > + do {
> > > + sts = iommu_readl(iommu, DMAR_GSTS_REG);
> > > + } while (n++ < 5 && sts & GSTS_QIES);
> > > +
> > > + mtx_leave(&iommu->reg_lock);
> > > +
> > > + printf("clr.qie: %d\n", n);
> > > + }
> > > +}
> > > +
> > > +/* Intel: Enable IOMMU translation */
> > > +int
> > > +iommu_enable_translation(struct iommu_softc *iommu, int enable)
> > > +{
> > > + uint32_t sts;
> > > + uint64_t reg;
> > > + int n = 0;
> > > +
> > > + if (iommu->dte)
> > > + return (0);
> > > + reg = 0;
> > > + if (enable) {
> > > + printf("enable iommu %d\n", iommu->id);
> > > + iommu_showcfg(iommu, -1);
> > > +
> > > + iommu->gcmd |= GCMD_TE;
> > > +
> > > + /* Enable translation */
> > > + printf(" pre tes: ");
> > > +
> > > + mtx_enter(&iommu->reg_lock);
> > > + iommu_writel(iommu, DMAR_GCMD_REG, iommu->gcmd);
> > > + printf("xxx");
> > > + do {
> > > + printf("yyy");
> > > + sts = iommu_readl(iommu, DMAR_GSTS_REG);
> > > + delay(n * 10000);
> > > + } while (n++ < 5 && !(sts & GSTS_TES));
> > > + mtx_leave(&iommu->reg_lock);
> > > +
> > > + printf(" set.tes: %d\n", n);
> > > +
> > > + if (n >= 5) {
> > > + printf("error.. unable to initialize iommu %d\n",
> > > +    iommu->id);
> > > + iommu->flags |= IOMMU_FLAGS_BAD;
> > > +
> > > + /* Disable IOMMU */
> > > + iommu->gcmd &= ~GCMD_TE;
> > > + mtx_enter(&iommu->reg_lock);
> > > + iommu_writel(iommu, DMAR_GCMD_REG, iommu->gcmd);
> > > + mtx_leave(&iommu->reg_lock);
> > > +
> > > + return (1);
> > > + }
> > > +
> > > + iommu_flush_ctx(iommu, CTX_GLOBAL, 0, 0, 0);
> > > + iommu_flush_tlb(iommu, IOTLB_GLOBAL, 0);
> > > + } else {
> > > + iommu->gcmd &= ~GCMD_TE;
> > > +
> > > + mtx_enter(&iommu->reg_lock);
> > > +
> > > + iommu_writel(iommu, DMAR_GCMD_REG, iommu->gcmd);
> > > + do {
> > > + sts = iommu_readl(iommu, DMAR_GSTS_REG);
> > > + } while (n++ < 5 && sts & GSTS_TES);
> > > + mtx_leave(&iommu->reg_lock);
> > > +
> > > + printf(" clr.tes: %d\n", n);
> > > + }
> > > +
> > > + return (0);
> > > +}
> > > +
> > > +/* Intel: Initialize IOMMU */
> > > +int
> > > +iommu_init(struct acpidmar_softc *sc, struct iommu_softc *iommu,
> > > +    struct acpidmar_drhd *dh)
> > > +{
> > > + static int niommu;
> > > + int len = VTD_PAGE_SIZE;
> > > + int i, gaw;
> > > + uint32_t sts;
> > > + paddr_t paddr;
> > > +
> > > + if (_bus_space_map(sc->sc_memt, dh->address, len, 0, &iommu->ioh) != 0) {
> > > + return (-1);
> > > + }
> > > +
> > > + TAILQ_INIT(&iommu->domains);
> > > + iommu->id = ++niommu;
> > > + iommu->flags = dh->flags;
> > > + iommu->segment = dh->segment;
> > > + iommu->iot = sc->sc_memt;
> > > +
> > > + iommu->cap = iommu_readq(iommu, DMAR_CAP_REG);
> > > + iommu->ecap = iommu_readq(iommu, DMAR_ECAP_REG);
> > > + iommu->ndoms = cap_nd(iommu->cap);
> > > +
> > > + printf("  caps: %s%s%s%s%s%s%s%s%s%s%s\n",
> > > +    iommu->cap & CAP_AFL ? "afl " : "", // adv fault
> > > +    iommu->cap & CAP_RWBF ? "rwbf " : "", // write-buffer flush
> > > +    iommu->cap & CAP_PLMR ? "plmr " : "", // protected lo region
> > > +    iommu->cap & CAP_PHMR ? "phmr " : "", // protected hi region
> > > +    iommu->cap & CAP_CM ? "cm " : "", // caching mode
> > > +    iommu->cap & CAP_ZLR ? "zlr " : "", // zero-length read
> > > +    iommu->cap & CAP_PSI ? "psi " : "", // page invalidate
> > > +    iommu->cap & CAP_DWD ? "dwd " : "", // write drain
> > > +    iommu->cap & CAP_DRD ? "drd " : "", // read drain
> > > +    iommu->cap & CAP_FL1GP ? "Gb " : "", // 1Gb pages
> > > +    iommu->cap & CAP_PI ? "pi " : ""); // posted interrupts
> > > + printf("  ecap: %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
> > > +    iommu->ecap & ECAP_C ? "c " : "", // coherent
> > > +    iommu->ecap & ECAP_QI ? "qi " : "", // queued invalidate
> > > +    iommu->ecap & ECAP_DT ? "dt " : "", // device iotlb
> > > +    iommu->ecap & ECAP_IR ? "ir " : "", // intr remap
> > > +    iommu->ecap & ECAP_EIM ? "eim " : "", // x2apic
> > > +    iommu->ecap & ECAP_PT ? "pt " : "", // passthrough
> > > +    iommu->ecap & ECAP_SC ? "sc " : "", // snoop control
> > > +    iommu->ecap & ECAP_ECS ? "ecs " : "", // extended context
> > > +    iommu->ecap & ECAP_MTS ? "mts " : "", // memory type
> > > +    iommu->ecap & ECAP_NEST ? "nest " : "", // nested translations
> > > +    iommu->ecap & ECAP_DIS ? "dis " : "", // deferred invalidation
> > > +    iommu->ecap & ECAP_PASID ? "pas " : "", // pasid
> > > +    iommu->ecap & ECAP_PRS ? "prs " : "", // page request
> > > +    iommu->ecap & ECAP_ERS ? "ers " : "", // execute request
> > > +    iommu->ecap & ECAP_SRS ? "srs " : "", // supervisor request
> > > +    iommu->ecap & ECAP_NWFS ? "nwfs " : "", // no write flag
> > > +    iommu->ecap & ECAP_EAFS ? "eafs " : ""); // extended accessed flag
> > > +
> > > + mtx_init(&iommu->reg_lock, IPL_HIGH);
> > > +
> > > + /* Clear Interrupt Masking */
> > > + iommu_writel(iommu, DMAR_FSTS_REG, FSTS_PFO | FSTS_PPF);
> > > +
> > > + iommu->intr = acpidmar_intr_establish(iommu, IPL_HIGH,
> > > +    acpidmar_intr, iommu, "dmarintr");
> > > +
> > > + /* Enable interrupts */
> > > + sts = iommu_readl(iommu, DMAR_FECTL_REG);
> > > + iommu_writel(iommu, DMAR_FECTL_REG, sts & ~FECTL_IM);
> > > +
> > > + /* Allocate root pointer */
> > > + iommu->root = iommu_alloc_page(iommu, &paddr);
> > > +#ifdef DEBUG
> > > + printf("Allocated root pointer: pa:%.16llx va:%p\n",
> > > +    (uint64_t)paddr, iommu->root);
> > > +#endif
> > > + iommu->rtaddr = paddr;
> > > + iommu_flush_write_buffer(iommu);
> > > + iommu_set_rtaddr(iommu, paddr);
> > > +
> > > +#if 0
> > > + if (iommu->ecap & ECAP_QI) {
> > > + /* Queued Invalidation support */
> > > + iommu->qi = iommu_alloc_page(iommu, &iommu->qip);
> > > + iommu_writeq(iommu, DMAR_IQT_REG, 0);
> > > + iommu_writeq(iommu, DMAR_IQA_REG, iommu->qip | IQA_QS_256);
> > > + }
> > > + if (iommu->ecap & ECAP_IR) {
> > > + /* Interrupt remapping support */
> > > + iommu_writeq(iommu, DMAR_IRTA_REG, 0);
> > > + }
> > > +#endif
> > > +
> > > + /* Calculate guest address width and supported guest widths */
> > > + gaw = -1;
> > > + iommu->mgaw = cap_mgaw(iommu->cap);
> > > + printf("gaw: %d { ", iommu->mgaw);
> > > + for (i = 0; i < 5; i++) {
> > > + if (cap_sagaw(iommu->cap) & (1L << i)) {
> > > + gaw = VTD_LEVELTOAW(i);
> > > + printf("%d ", gaw);
> > > + iommu->agaw = gaw;
> > > + }
> > > + }
> > > + printf("}\n");
> > > +
> > > + /* Cache current status register bits */
> > > + sts = iommu_readl(iommu, DMAR_GSTS_REG);
> > > + if (sts & GSTS_TES)
> > > + iommu->gcmd |= GCMD_TE;
> > > + if (sts & GSTS_QIES)
> > > + iommu->gcmd |= GCMD_QIE;
> > > + if (sts & GSTS_IRES)
> > > + iommu->gcmd |= GCMD_IRE;
> > > + if (iommu->gcmd) {
> > > + printf("gcmd: %x preset\n", iommu->gcmd);
> > > + }
> > > + acpidmar_intr(iommu);
> > > + return (0);
> > > +}
> > > +
> > > +const char *dmar_rn(int reg);
> > > +
> > > +const char *
> > > +dmar_rn(int reg)
> > > +{
> > > + switch (reg) {
> > > + case EVT_HEAD_REG: return "evthead";
> > > + case EVT_TAIL_REG: return "evttail";
> > > + case CMD_HEAD_REG: return "cmdhead";
> > > + case CMD_TAIL_REG: return "cmdtail";
> > > + case CMD_BASE_REG: return "cmdbase";
> > > + case EVT_BASE_REG: return "evtbase";
> > > + case DEV_TAB_BASE_REG: return "devtblbase";
> > > + case IOMMUCTL_REG: return "iommuctl";
> > > +#if 0
> > > + case DMAR_VER_REG: return "ver";
> > > + case DMAR_CAP_REG: return "cap";
> > > + case DMAR_ECAP_REG: return "ecap";
> > > + case DMAR_GSTS_REG: return "gsts";
> > > + case DMAR_GCMD_REG: return "gcmd";
> > > + case DMAR_FSTS_REG: return "fsts";
> > > + case DMAR_FECTL_REG: return "fectl";
> > > + case DMAR_RTADDR_REG: return "rtaddr";
> > > + case DMAR_FEDATA_REG: return "fedata";
> > > + case DMAR_FEADDR_REG: return "feaddr";
> > > + case DMAR_FEUADDR_REG: return "feuaddr";
> > > + case DMAR_PMEN_REG: return "pmen";
> > > + case DMAR_IEDATA_REG: return "iedata";
> > > + case DMAR_IEADDR_REG: return "ieaddr";
> > > + case DMAR_IEUADDR_REG: return "ieuaddr";
> > > + case DMAR_IRTA_REG: return "irta";
> > > + case DMAR_CCMD_REG: return "ccmd";
> > > + case DMAR_IQH_REG: return "iqh";
> > > + case DMAR_IQT_REG: return "iqt";
> > > + case DMAR_IQA_REG: return "iqa";
> > > +#endif
> > > + }
> > > + return "unknown";
> > > +}
> > > +
> > > +/* Read/Write IOMMU register */
> > > +uint32_t
> > > +iommu_readl(struct iommu_softc *iommu, int reg)
> > > +{
> > > + uint32_t v;
> > > +
> > > + v = bus_space_read_4(iommu->iot, iommu->ioh, reg);
> > > + if (reg < 00) {
> > > + printf("iommu%d: read %x %.8lx [%s]\n",
> > > +    iommu->id, reg, (unsigned long)v, dmar_rn(reg));
> > > + }
> > > +
> > > + return (v);
> > > +}
> > > +
> > > +
> > > +#define dbprintf(x...)
> > > +
> > > +void
> > > +iommu_writel(struct iommu_softc *iommu, int reg, uint32_t v)
> > > +{
> > > + dbprintf("iommu%d: write %.8x %.16lx [%s]\n",
> > > +    iommu->id, reg, (unsigned long)v, dmar_rn(reg));
> > > + bus_space_write_4(iommu->iot, iommu->ioh, reg, (uint32_t)v);
> > > +}
> > > +
> > > +uint64_t
> > > +iommu_readq(struct iommu_softc *iommu, int reg)
> > > +{
> > > + uint64_t v;
> > > +
> > > + v = bus_space_read_8(iommu->iot, iommu->ioh, reg);
> > > + if (reg < 00) {
> > > + printf("iommu%d: read %x %.8lx [%s]\n",
> > > +    iommu->id, reg, (unsigned long)v, dmar_rn(reg));
> > > + }
> > > +
> > > + return (v);
> > > +}
> > > +
> > > +void
> > > +iommu_writeq(struct iommu_softc *iommu, int reg, uint64_t v)
> > > +{
> > > + dbprintf("iommu%d: write %.8x %.16lx [%s]\n",
> > > +    iommu->id, reg, (unsigned long)v, dmar_rn(reg));
> > > + bus_space_write_8(iommu->iot, iommu->ioh, reg, v);
> > > +}
> > > +
> > > +/* Check if a device is within a device scope */
> > > +int
> > > +acpidmar_match_devscope(struct devlist_head *devlist, pci_chipset_tag_t pc,
> > > +    int sid)
> > > +{
> > > + struct dmar_devlist *ds;
> > > + int sub, sec, i;
> > > + int bus, dev, fun, sbus;
> > > + pcireg_t reg;
> > > + pcitag_t tag;
> > > +
> > > + sbus = sid_bus(sid);
> > > + TAILQ_FOREACH(ds, devlist, link) {
> > > + bus = ds->bus;
> > > + dev = ds->dp[0].device;
> > > + fun = ds->dp[0].function;
> > > + /* Walk PCI bridges in path */
> > > + for (i = 1; i < ds->ndp; i++) {
> > > + tag = pci_make_tag(pc, bus, dev, fun);
> > > + reg = pci_conf_read(pc, tag, PPB_REG_BUSINFO);
> > > + bus = PPB_BUSINFO_SECONDARY(reg);
> > > + dev = ds->dp[i].device;
> > > + fun = ds->dp[i].function;
> > > + }
> > > +
> > > + /* Check for device exact match */
> > > + if (sid == mksid(bus, dev, fun)) {
> > > + return DMAR_ENDPOINT;
> > > + }
> > > +
> > > + /* Check for device subtree match */
> > > + if (ds->type == DMAR_BRIDGE) {
> > > + tag = pci_make_tag(pc, bus, dev, fun);
> > > + reg = pci_conf_read(pc, tag, PPB_REG_BUSINFO);
> > > + sec = PPB_BUSINFO_SECONDARY(reg);
> > > + sub = PPB_BUSINFO_SUBORDINATE(reg);
> > > + if (sec <= sbus && sbus <= sub) {
> > > + return DMAR_BRIDGE;
> > > + }
> > > + }
> > > + }
> > > +
> > > + return (0);
> > > +}
> > > +
> > > +struct domain *
> > > +domain_create(struct iommu_softc *iommu, int did)
> > > +{
> > > + struct domain *dom;
> > > + int gaw;
> > > +
> > > + printf("iommu%d: create domain: %.4x\n", iommu->id, did);
> > > + dom = malloc(sizeof(*dom), M_DEVBUF, M_ZERO | M_WAITOK);
> > > + dom->did = did;
> > > + dom->iommu = iommu;
> > > + dom->pte = iommu_alloc_page(iommu, &dom->ptep);
> > > + TAILQ_INIT(&dom->devices);
> > > +
> > > + /* Setup DMA */
> > > + dom->dmat._cookie = dom;
> > > + dom->dmat._dmamap_create    = dmar_dmamap_create; // nop
> > > + dom->dmat._dmamap_destroy   = dmar_dmamap_destroy; // nop
> > > + dom->dmat._dmamap_load      = dmar_dmamap_load; // lm
> > > + dom->dmat._dmamap_load_mbuf = dmar_dmamap_load_mbuf; // lm
> > > + dom->dmat._dmamap_load_uio  = dmar_dmamap_load_uio; // lm
> > > + dom->dmat._dmamap_load_raw  = dmar_dmamap_load_raw; // lm
> > > + dom->dmat._dmamap_unload    = dmar_dmamap_unload; // um
> > > + dom->dmat._dmamap_sync      = dmar_dmamap_sync; // lm
> > > + dom->dmat._dmamem_alloc     = dmar_dmamem_alloc; // nop
> > > + dom->dmat._dmamem_free      = dmar_dmamem_free; // nop
> > > + dom->dmat._dmamem_map       = dmar_dmamem_map; // nop
> > > + dom->dmat._dmamem_unmap     = dmar_dmamem_unmap; // nop
> > > + dom->dmat._dmamem_mmap      = dmar_dmamem_mmap;
> > > +
> > > + snprintf(dom->exname, sizeof(dom->exname), "did:%x.%.4x",
> > > +    iommu->id, dom->did);
> > > +
> > > + /* Setup IOMMU address map */
> > > + gaw = min(iommu->agaw, iommu->mgaw);
> > > + dom->iovamap = extent_create(dom->exname, 1024*1024*16,
> > > +    (1LL << gaw)-1,
> > > +    M_DEVBUF, NULL, 0,
> > > +    EX_WAITOK|EX_NOCOALESCE);
> > > +
> > > + /* Zero out Interrupt region */
> > > + extent_alloc_region(dom->iovamap, 0xFEE00000L, 0x100000,
> > > +    EX_WAITOK);
> > > + mtx_init(&dom->exlck, IPL_HIGH);
> > > +
> > > + TAILQ_INSERT_TAIL(&iommu->domains, dom, link);
> > > +
> > > + return dom;
> > > +}
> > > +
> > > +void domain_add_device(struct domain *dom, int sid)
> > > +{
> > > + struct domain_dev *ddev;
> > > +
> > > + printf("add %s to iommu%d.%.4x\n", dmar_bdf(sid), dom->iommu->id, dom->did);
> > > + ddev = malloc(sizeof(*ddev), M_DEVBUF, M_ZERO | M_WAITOK);
> > > + ddev->sid = sid;
> > > + TAILQ_INSERT_TAIL(&dom->devices, ddev, link);
> > > +
> > > + /* Should set context entry here?? */
> > > +}
> > > +
> > > +void domain_remove_device(struct domain *dom, int sid)
> > > +{
> > > + struct domain_dev *ddev, *tmp;
> > > +
> > > + TAILQ_FOREACH_SAFE(ddev, &dom->devices, link, tmp) {
> > > + if (ddev->sid == sid) {
> > > + TAILQ_REMOVE(&dom->devices, ddev, link);
> > > + free(ddev, sizeof(*ddev), M_DEVBUF);
> > > + }
> > > + }
> > > +}
> > > +
> > > +/* Lookup domain by segment & source id (bus.device.function) */
> > > +struct domain *
> > > +domain_lookup(struct acpidmar_softc *sc, int segment, int sid)
> > > +{
> > > + struct iommu_softc *iommu;
> > > + struct domain_dev *ddev;
> > > + struct domain *dom;
> > > + int rc;
> > > +
> > > + if (sc == NULL) {
> > > + return NULL;
> > > + }
> > > +
> > > + /* Lookup IOMMU for this device */
> > > + TAILQ_FOREACH(iommu, &sc->sc_drhds, link) {
> > > + if (iommu->segment != segment)
> > > + continue;
> > > + /* Check for devscope match or catchall iommu */
> > > + rc = acpidmar_match_devscope(&iommu->devices, sc->sc_pc, sid);
> > > + if (rc != 0 || iommu->flags) {
> > > + break;
> > > + }
> > > + }
> > > + if (!iommu) {
> > > + printf("%s: no iommu found\n", dmar_bdf(sid));
> > > + return NULL;
> > > + }
> > > +
> > > + //acpidmar_intr(iommu);
> > > +
> > > + /* Search domain devices */
> > > + TAILQ_FOREACH(dom, &iommu->domains, link) {
> > > + TAILQ_FOREACH(ddev, &dom->devices, link) {
> > > + /* XXX: match all functions? */
> > > + if (ddev->sid == sid) {
> > > + return dom;
> > > + }
> > > + }
> > > + }
> > > + if (iommu->ndoms <= 2) {
> > > + /* Running out of domains.. create catchall domain */
> > > + if (!iommu->unity) {
> > > + iommu->unity = domain_create(iommu, 1);
> > > + }
> > > + dom = iommu->unity;
> > > + } else {
> > > + dom = domain_create(iommu, --iommu->ndoms);
> > > + }
> > > + if (!dom) {
> > > + printf("no domain here\n");
> > > + return NULL;
> > > + }
> > > +
> > > + /* Add device to domain */
> > > + domain_add_device(dom, sid);
> > > +
> > > + return dom;
> > > +}
> > > +
> > > +/* Map Guest Pages into IOMMU */
> > > +void  _iommu_map(void *dom, vaddr_t va, bus_addr_t gpa, bus_size_t len)
> > > +{
> > > + bus_size_t i;
> > > + paddr_t hpa;
> > > +
> > > + if (dom == NULL) {
> > > + return;
> > > + }
> > > + printf("Mapping dma: %lx = %lx/%lx\n", va, gpa, len);
> > > + for (i = 0; i < len; i += PAGE_SIZE) {
> > > + hpa = 0;
> > > + pmap_extract(curproc->p_vmspace->vm_map.pmap, va, &hpa);
> > > + domain_map_page(dom, gpa, hpa, PTE_P | PTE_R | PTE_W);
> > > + gpa += PAGE_SIZE;
> > > + va  += PAGE_SIZE;
> > > + }
> > > +}
> > > +
> > > +/* Find IOMMU for a given PCI device */
> > > +void *_iommu_domain(int segment, int bus, int dev, int func, int *id)
> > > +{
> > > + struct domain *dom;
> > > +
> > > + dom = domain_lookup(acpidmar_sc, segment, mksid(bus, dev, func));
> > > + if (dom) {
> > > + *id = dom->did;
> > > + }
> > > + return dom;
> > > +}
> > > +
> > > +void domain_map_device(struct domain *dom, int sid);
> > > +
> > > +void
> > > +domain_map_device(struct domain *dom, int sid)
> > > +{
> > > + struct iommu_softc *iommu;
> > > + struct context_entry *ctx;
> > > + paddr_t paddr;
> > > + int bus, devfn;
> > > + int tt, lvl;
> > > +
> > > + iommu = dom->iommu;
> > > +
> > > + bus = sid_bus(sid);
> > > + devfn = sid_devfn(sid);
> > > + /* AMD attach device */
> > > + if (iommu->dte) {
> > > + struct ivhd_dte *dte = &iommu->dte[sid];
> > > + if (!dte->dw0) {
> > > + /* Setup Device Table Entry: bus.devfn */
> > > + printf("@@@ PCI Attach: %.4x[%s] %.4x\n", sid, dmar_bdf(sid), dom->did);
> > > + dte_set_host_page_table_root_ptr(dte, dom->ptep);
> > > + dte_set_domain(dte, dom->did);
> > > + dte_set_mode(dte, 3);  // Set 4 level PTE
> > > + dte_set_tv(dte);
> > > + dte_set_valid(dte);
> > > + ivhd_flush_devtab(iommu, dom->did);
> > > + //ivhd_showit(iommu);
> > > + ivhd_showdte();
> > > + }
> > > + //ivhd_poll_events(iommu);
> > > + return;
> > > + }
> > > +
> > > + /* Create Bus mapping */
> > > + if (!root_entry_is_valid(&iommu->root[bus])) {
> > > + iommu->ctx[bus] = iommu_alloc_page(iommu, &paddr);
> > > + iommu->root[bus].lo = paddr | ROOT_P;
> > > + iommu_flush_cache(iommu, &iommu->root[bus],
> > > +    sizeof(struct root_entry));
> > > + dprintf("iommu%d: Allocate context for bus: %.2x pa:%.16llx va:%p\n",
> > > +    iommu->id, bus, (uint64_t)paddr,
> > > +    iommu->ctx[bus]);
> > > + }
> > > +
> > > + /* Create DevFn mapping */
> > > + ctx = iommu->ctx[bus] + devfn;
> > > + if (!context_entry_is_valid(ctx)) {
> > > + tt = CTX_T_MULTI;
> > > + lvl = VTD_AWTOLEVEL(iommu->agaw);
> > > +
> > > + /* Initialize context */
> > > + context_set_slpte(ctx, dom->ptep);
> > > + context_set_translation_type(ctx, tt);
> > > + context_set_domain_id(ctx, dom->did);
> > > + context_set_address_width(ctx, lvl);
> > > + context_set_present(ctx);
> > > +
> > > + /* Flush it */
> > > + iommu_flush_cache(iommu, ctx, sizeof(struct context_entry));
> > > + if ((iommu->cap & CAP_CM) || force_cm) {
> > > + iommu_flush_ctx(iommu, CTX_DEVICE, dom->did, sid, 0);
> > > + iommu_flush_tlb(iommu, IOTLB_GLOBAL, 0);
> > > + } else {
> > > + iommu_flush_write_buffer(iommu);
> > > + }
> > > + dprintf("iommu%d: %s set context ptep:%.16llx lvl:%d did:%.4x tt:%d\n",
> > > +    iommu->id, dmar_bdf(sid), (uint64_t)dom->ptep, lvl,
> > > +    dom->did, tt);
> > > + }
> > > +}
> > > +
> > > +struct domain *
> > > +acpidmar_pci_attach(struct acpidmar_softc *sc, int segment, int sid, int mapctx)
> > > +{
> > > + static struct domain *dom;
> > > +
> > > + dom = domain_lookup(sc, segment, sid);
> > > + if (!dom) {
> > > + printf("no domain: %s\n", dmar_bdf(sid));
> > > + return NULL;
> > > + }
> > > +
> > > + if (mapctx) {
> > > + domain_map_device(dom, sid);
> > > + }
> > > +
> > > + return dom;
> > > +}
> > > +
> > > +int ismap(int bus, int dev, int fun) {
> > > + return 1;
> > > +}
> > > +
> > > +void
> > > +acpidmar_pci_hook(pci_chipset_tag_t pc, struct pci_attach_args *pa)
> > > +{
> > > + int bus, dev, fun, sid;
> > > + struct domain *dom;
> > > + pcireg_t reg;
> > > +
> > > + if (!acpidmar_sc) {
> > > + /* No DMAR, ignore */
> > > + return;
> > > + }
> > > +
> > > + /* Add device to our list */
> > > + pci_decompose_tag(pc, pa->pa_tag, &bus, &dev, &fun);
> > > + sid = mksid(bus, dev, fun);
> > > + if (sid_flag[sid] & SID_INVALID)
> > > + return;
> > > +
> > > + reg = pci_conf_read(pc, pa->pa_tag, PCI_CLASS_REG);
> > > +#if 0
> > > + if (PCI_CLASS(reg) == PCI_CLASS_DISPLAY &&
> > > +    PCI_SUBCLASS(reg) == PCI_SUBCLASS_DISPLAY_VGA) {
> > > + printf("dmar: %.4x:%.2x:%.2x.%x is VGA, ignoring\n",
> > > +    pa->pa_domain, bus, dev, fun);
> > > + return;
> > > + }
> > > +#endif
> > > + /* Add device to domain */
> > > + dom = acpidmar_pci_attach(acpidmar_sc, pa->pa_domain, sid, 0);
> > > + if (dom == NULL)
> > > + return;
> > > +
> > > + if (PCI_CLASS(reg) == PCI_CLASS_DISPLAY &&
> > > +    PCI_SUBCLASS(reg) == PCI_SUBCLASS_DISPLAY_VGA) {
> > > + dom->flag = DOM_NOMAP;
> > > + }
> > > + if (PCI_CLASS(reg) == PCI_CLASS_BRIDGE &&
> > > +    PCI_SUBCLASS(reg) == PCI_SUBCLASS_BRIDGE_ISA) {
> > > + /* For ISA Bridges, map 0-16Mb as 1:1 */
> > > + printf("dmar: %.4x:%.2x:%.2x.%x mapping ISA\n",
> > > +    pa->pa_domain, bus, dev, fun);
> > > + domain_map_pthru(dom, 0x00, 16*1024*1024);
> > > + }
> > > +
> > > + /* Change DMA tag */
> > > + pa->pa_dmat = &dom->dmat;
> > > +}
> > > +
> > > +/* Create list of device scope entries from ACPI table */
> > > +void
> > > +acpidmar_parse_devscope(union acpidmar_entry *de, int off, int segment,
> > > +    struct devlist_head *devlist)
> > > +{
> > > + struct acpidmar_devscope *ds;
> > > + struct dmar_devlist *d;
> > > + int dplen, i;
> > > +
> > > + TAILQ_INIT(devlist);
> > > + while (off < de->length) {
> > > + ds = (struct acpidmar_devscope *)((unsigned char *)de + off);
> > > + off += ds->length;
> > > +
> > > + /* We only care about bridges and endpoints */
> > > + if (ds->type != DMAR_ENDPOINT && ds->type != DMAR_BRIDGE)
> > > + continue;
> > > +
> > > + dplen = ds->length - sizeof(*ds);
> > > + d = malloc(sizeof(*d) + dplen, M_DEVBUF, M_ZERO | M_WAITOK);
> > > + d->bus  = ds->bus;
> > > + d->type = ds->type;
> > > + d->ndp  = dplen / 2;
> > > + d->dp   = (void *)&d[1];
> > > + memcpy(d->dp, &ds[1], dplen);
> > > + TAILQ_INSERT_TAIL(devlist, d, link);
> > > +
> > > + printf("  %8s  %.4x:%.2x.%.2x.%x {",
> > > +    ds->type == DMAR_BRIDGE ? "bridge" : "endpoint",
> > > +    segment, ds->bus,
> > > +    d->dp[0].device,
> > > +    d->dp[0].function);
> > > +
> > > + for (i = 1; i < d->ndp; i++) {
> > > + printf(" %2x.%x ",
> > > +    d->dp[i].device,
> > > +    d->dp[i].function);
> > > + }
> > > + printf("}\n");
> > > + }
> > > +}
> > > +
> > > +/* DMA Remapping Hardware Unit */
> > > +void
> > > +acpidmar_drhd(struct acpidmar_softc *sc, union acpidmar_entry *de)
> > > +{
> > > + struct iommu_softc *iommu;
> > > +
> > > + printf("DRHD: segment:%.4x base:%.16llx flags:%.2x\n",
> > > +    de->drhd.segment,
> > > +    de->drhd.address,
> > > +    de->drhd.flags);
> > > + iommu = malloc(sizeof(*iommu), M_DEVBUF, M_ZERO | M_WAITOK);
> > > + acpidmar_parse_devscope(de, sizeof(de->drhd), de->drhd.segment,
> > > +    &iommu->devices);
> > > + iommu_init(sc, iommu, &de->drhd);
> > > +
> > > + if (de->drhd.flags) {
> > > + /* Catchall IOMMU goes at end of list */
> > > + TAILQ_INSERT_TAIL(&sc->sc_drhds, iommu, link);
> > > + } else {
> > > + TAILQ_INSERT_HEAD(&sc->sc_drhds, iommu, link);
> > > + }
> > > +}
> > > +
> > > +/* Reserved Memory Region Reporting */
> > > +void
> > > +acpidmar_rmrr(struct acpidmar_softc *sc, union acpidmar_entry *de)
> > > +{
> > > + struct rmrr_softc *rmrr;
> > > + bios_memmap_t *im, *jm;
> > > + uint64_t start, end;
> > > +
> > > + printf("RMRR: segment:%.4x range:%.16llx-%.16llx\n",
> > > +    de->rmrr.segment, de->rmrr.base, de->rmrr.limit);
> > > + if (de->rmrr.limit <= de->rmrr.base) {
> > > + printf("  buggy BIOS\n");
> > > + return;
> > > + }
> > > +
> > > + rmrr = malloc(sizeof(*rmrr), M_DEVBUF, M_ZERO | M_WAITOK);
> > > + rmrr->start = trunc_page(de->rmrr.base);
> > > + rmrr->end = round_page(de->rmrr.limit);
> > > + rmrr->segment = de->rmrr.segment;
> > > + acpidmar_parse_devscope(de, sizeof(de->rmrr), de->rmrr.segment,
> > > +    &rmrr->devices);
> > > +
> > > + for (im = bios_memmap; im->type != BIOS_MAP_END; im++) {
> > > + if (im->type != BIOS_MAP_RES)
> > > + continue;
> > > + /* Search for adjacent reserved regions */
> > > + start = im->addr;
> > > + end   = im->addr+im->size;
> > > + for (jm = im+1; jm->type == BIOS_MAP_RES && end == jm->addr;
> > > +    jm++) {
> > > + end = jm->addr+jm->size;
> > > + }
> > > + printf("e820: %.16llx - %.16llx\n", start, end);
> > > + if (start <= rmrr->start && rmrr->end <= end) {
> > > + /* Bah.. some buggy BIOS stomp outside RMRR */
> > > + printf("  ** inside E820 Reserved %.16llx %.16llx\n",
> > > +    start, end);
> > > + rmrr->start = trunc_page(start);
> > > + rmrr->end   = round_page(end);
> > > + break;
> > > + }
> > > + }
> > > + TAILQ_INSERT_TAIL(&sc->sc_rmrrs, rmrr, link);
> > > +}
> > > +
> > > +/* Root Port ATS Reporting */
> > > +void
> > > +acpidmar_atsr(struct acpidmar_softc *sc, union acpidmar_entry *de)
> > > +{
> > > + struct atsr_softc *atsr;
> > > +
> > > + printf("ATSR: segment:%.4x flags:%x\n",
> > > +    de->atsr.segment,
> > > +    de->atsr.flags);
> > > +
> > > + atsr = malloc(sizeof(*atsr), M_DEVBUF, M_ZERO | M_WAITOK);
> > > + atsr->flags = de->atsr.flags;
> > > + atsr->segment = de->atsr.segment;
> > > + acpidmar_parse_devscope(de, sizeof(de->atsr), de->atsr.segment,
> > > +    &atsr->devices);
> > > +
> > > + TAILQ_INSERT_TAIL(&sc->sc_atsrs, atsr, link);
> > > +}
> > > +
> > > +void
> > > +acpidmar_init(struct acpidmar_softc *sc, struct acpi_dmar *dmar)
> > > +{
> > > + struct rmrr_softc *rmrr;
> > > + struct iommu_softc *iommu;
> > > + struct domain *dom;
> > > + struct dmar_devlist *dl;
> > > + union acpidmar_entry *de;
> > > + int off, sid, rc;
> > > +
> > > + domain_map_page = domain_map_page_intel;
> > > + printf(": hardware width: %d, intr_remap:%d x2apic_opt_out:%d\n",
> > > +    dmar->haw+1,
> > > +    !!(dmar->flags & 0x1),
> > > +    !!(dmar->flags & 0x2));
> > > + sc->sc_haw = dmar->haw+1;
> > > + sc->sc_flags = dmar->flags;
> > > +
> > > + TAILQ_INIT(&sc->sc_drhds);
> > > + TAILQ_INIT(&sc->sc_rmrrs);
> > > + TAILQ_INIT(&sc->sc_atsrs);
> > > +
> > > + off = sizeof(*dmar);
> > > + while (off < dmar->hdr.length) {
> > > + de = (union acpidmar_entry *)((unsigned char *)dmar + off);
> > > + switch (de->type) {
> > > + case DMAR_DRHD:
> > > + acpidmar_drhd(sc, de);
> > > + break;
> > > + case DMAR_RMRR:
> > > + acpidmar_rmrr(sc, de);
> > > + break;
> > > + case DMAR_ATSR:
> > > + acpidmar_atsr(sc, de);
> > > + break;
> > > + default:
> > > + printf("DMAR: unknown %x\n", de->type);
> > > + break;
> > > + }
> > > + off += de->length;
> > > + }
> > > +
> > > + /* Pre-create domains for iommu devices */
> > > + TAILQ_FOREACH(iommu, &sc->sc_drhds, link) {
> > > + TAILQ_FOREACH(dl, &iommu->devices, link) {
> > > + sid = mksid(dl->bus, dl->dp[0].device,
> > > +    dl->dp[0].function);
> > > + dom = acpidmar_pci_attach(sc, iommu->segment, sid, 0);
> > > + if (dom != NULL) {
> > > + printf("%.4x:%.2x:%.2x.%x iommu:%d did:%.4x\n",
> > > +    iommu->segment, dl->bus, dl->dp[0].device, dl->dp[0].function,
> > > +    iommu->id, dom->did);
> > > + }
> > > + }
> > > + }
> > > + /* Map passthrough pages for RMRR */
> > > + TAILQ_FOREACH(rmrr, &sc->sc_rmrrs, link) {
> > > + TAILQ_FOREACH(dl, &rmrr->devices, link) {
> > > + sid = mksid(dl->bus, dl->dp[0].device,
> > > +    dl->dp[0].function);
> > > + dom = acpidmar_pci_attach(sc, rmrr->segment, sid, 0);
> > > + if (dom != NULL) {
> > > + printf("%s map ident: %.16llx %.16llx\n",
> > > +    dom_bdf(dom), rmrr->start, rmrr->end);
> > > + domain_map_pthru(dom, rmrr->start, rmrr->end);
> > > + rc = extent_alloc_region(dom->iovamap,
> > > +    rmrr->start, rmrr->end, EX_WAITOK);
> > > + }
> > > + }
> > > + }
> > > +}
> > > +
> > > +
> > > +/*=====================================================
> > > + * AMD Vi
> > > + *=====================================================*/
> > > +void acpiivrs_ivhd(struct acpidmar_softc *, struct acpi_ivhd *);
> > > +int acpiivrs_iommu_match(struct pci_attach_args *);
> > > +int ivhd_iommu_init(struct acpidmar_softc *, struct iommu_softc *,
> > > + struct acpi_ivhd *);
> > > +void iommu_ivhd_add(struct iommu_softc *, int, int, int);
> > > +int _ivhd_issue_command(struct iommu_softc *, const struct ivhd_command *);
> > > +void ivhd_show_event(struct iommu_softc *, struct ivhd_event *evt, int);
> > > +int ivhd_issue_command(struct iommu_softc *, const struct ivhd_command *, int);
> > > +int ivhd_invalidate_domain(struct iommu_softc *, int);
> > > +void ivhd_intr_map(struct iommu_softc *, int);
> > > +
> > > +int
> > > +acpiivhd_intr(void *ctx)
> > > +{
> > > + struct iommu_softc *iommu = ctx;
> > > +
> > > + if (!iommu->dte)
> > > + return (0);
> > > + ivhd_poll_events(iommu);
> > > + return (1);
> > > +}
> > > +
> > > +/* Setup interrupt for AMD */
> > > +void
> > > +ivhd_intr_map(struct iommu_softc *iommu, int devid) {
> > > + pci_intr_handle_t ih;
> > > +
> > > + if (iommu->intr)
> > > + return;
> > > + ih.tag = pci_make_tag(NULL, sid_bus(devid), sid_dev(devid), sid_fun(devid));
> > > + ih.line = APIC_INT_VIA_MSG;
> > > + ih.pin = 0;
> > > + iommu->intr = pci_intr_establish(NULL, ih, IPL_NET | IPL_MPSAFE,
> > > + acpiivhd_intr, iommu, "amd_iommu");
> > > + printf("amd iommu intr: %p\n", iommu->intr);
> > > +}
> > > +
> > > +void _dumppte(struct pte_entry *pte, int lvl, vaddr_t va)
> > > +{
> > > + char *pfx[] = { "    ", "   ", "  ", " ", "" };
> > > + uint64_t i, sh;
> > > + struct pte_entry *npte;
> > > +  
> > > + for (i = 0; i < 512; i++) {
> > > + sh = (i << (((lvl-1) * 9) + 12));
> > > + if (pte[i].val & PTE_P) {
> > > + if (lvl > 1) {
> > > + npte = (void *)PMAP_DIRECT_MAP((pte[i].val & PTE_PADDR_MASK));
> > > + printf("%slvl%d: %.16llx nxt:%llu\n", pfx[lvl], lvl,
> > > +    pte[i].val, (pte[i].val >> 9) & 7);
> > > + _dumppte(npte, lvl-1, va | sh);
> > > + }
> > > + else {
> > > + printf("%slvl%d: %.16llx <- %.16llx \n", pfx[lvl], lvl,
> > > +    pte[i].val, va | sh);
> > > + }
> > > + }
> > > + }
> > > +}
> > > +
> > > +void showpage(int sid, paddr_t paddr)
> > > +{
> > > + struct domain *dom;
> > > + static int show = 0;
> > > +
> > > + if (show > 10)
> > > + return;
> > > + show++;
> > > + dom = acpidmar_pci_attach(acpidmar_sc, 0, sid, 0);
> > > + if (!dom)
> > > + return;
> > > + printf("DTE: %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
> > > + hwdte[sid].dw0,
> > > + hwdte[sid].dw1,
> > > + hwdte[sid].dw2,
> > > + hwdte[sid].dw3,
> > > + hwdte[sid].dw4,
> > > + hwdte[sid].dw5,
> > > + hwdte[sid].dw6,
> > > + hwdte[sid].dw7);
> > > + _dumppte(dom->pte, 3, 0);
> > > +}
> > > +
> > > +/* Display AMD IOMMU Error */
> > > +void
> > > +ivhd_show_event(struct iommu_softc *iommu, struct ivhd_event *evt, int head)
> > > +{
> > > + int type, sid, did, flag;
> > > + uint64_t address;
> > > +
> > > + /* Get Device, Domain, Address and Type of event */
> > > + sid  = __EXTRACT(evt->dw0, EVT_SID);
> > > + type = __EXTRACT(evt->dw1, EVT_TYPE);
> > > + did  = __EXTRACT(evt->dw1, EVT_DID);
> > > + flag = __EXTRACT(evt->dw1, EVT_FLAG);
> > > + address = _get64(&evt->dw2);
> > > +
> > > + printf("=== IOMMU Error[%.4x]: ", head);
> > > + switch (type) {
> > > + case ILLEGAL_DEV_TABLE_ENTRY: // ok
> > > + printf("illegal dev table entry dev=%s addr=0x%.16llx %s, %s, %s, %s\n",
> > > +   dmar_bdf(sid), address,
> > > +   evt->dw1 & EVT_TR ? "translation" : "transaction",
> > > +   evt->dw1 & EVT_RZ ? "reserved bit" : "invalid level",
> > > +   evt->dw1 & EVT_RW ? "write" : "read",
> > > +   evt->dw1 & EVT_I  ? "interrupt" : "memory");
> > > + ivhd_showdte();
> > > + break;
> > > + case IO_PAGE_FAULT: // ok
> > > + printf("io page fault dev=%s did=0x%.4x addr=0x%.16llx\n%s, %s, %s, %s, %s, %s\n",
> > > +   dmar_bdf(sid), did, address,
> > > +   evt->dw1 & EVT_TR ? "translation" : "transaction",
> > > +   evt->dw1 & EVT_RZ ? "reserved bit" : "invalid level",
> > > +   evt->dw1 & EVT_PE ? "no perm" : "perm",
> > > +   evt->dw1 & EVT_RW ? "write" : "read",
> > > +   evt->dw1 & EVT_PR ? "present" : "not present",
> > > +   evt->dw1 & EVT_I  ? "interrupt" : "memory");
> > > + ivhd_showdte();
> > > + showpage(sid, address);
> > > + break;
> > > + case DEV_TAB_HARDWARE_ERROR: // ok
> > > + printf("device table hardware error dev=%s addr=0x%.16llx %s, %s, %s\n",
> > > +    dmar_bdf(sid), address,
> > > +   evt->dw1 & EVT_TR ? "translation" : "transaction",
> > > +   evt->dw1 & EVT_RW ? "write" : "read",
> > > +   evt->dw1 & EVT_I  ? "interrupt" : "memory");
> > > + ivhd_showdte();
> > > + break;
> > > + case PAGE_TAB_HARDWARE_ERROR:
> > > + printf("page table hardware error dev=%s addr=0x%.16llx %s, %s, %s\n",
> > > +   dmar_bdf(sid), address,
> > > +   evt->dw1 & EVT_TR ? "translation" : "transaction",
> > > +   evt->dw1 & EVT_RW ? "write" : "read",
> > > +   evt->dw1 & EVT_I  ? "interrupt" : "memory");
> > > + ivhd_showdte();
> > > + break;
> > > + case ILLEGAL_COMMAND_ERROR: // ok
> > > + printf("illegal command addr=0x%.16llx\n", address);
> > > + ivhd_showcmd(iommu);
> > > + break;
> > > + case COMMAND_HARDWARE_ERROR:
> > > + printf("command hardware error addr=0x%.16llx flag=0x%.4x\n",
> > > +   address, flag);
> > > + ivhd_showcmd(iommu);
> > > + break;
> > > + case IOTLB_INV_TIMEOUT:
> > > + printf("iotlb invalidation timeout dev=%s address=0x%.16llx\n",
> > > +   dmar_bdf(sid), address);
> > > + break;
> > > + case INVALID_DEVICE_REQUEST:
> > > + printf("invalid device request dev=%s addr=0x%.16llx flag=0x%.4x\n",
> > > +   dmar_bdf(sid), address, flag);
> > > + break;
> > > + default:
> > > + printf("unknown type=0x%.2x\n", type);
> > > + break;
> > > + }
> > > + //ivhd_showdte();
> > > + /* Clear old event */
> > > + evt->dw0 = 0;
> > > + evt->dw1 = 0;
> > > + evt->dw2 = 0;
> > > + evt->dw3 = 0;
> > > +}
> > > +
> > > +/* AMD: Process IOMMU error from hardware */
> > > +int
> > > +ivhd_poll_events(struct iommu_softc *iommu)
> > > +{
> > > + uint32_t head, tail;
> > > + int sz;
> > > +
> > > + sz = sizeof(struct ivhd_event);
> > > + head = iommu_readl(iommu, EVT_HEAD_REG);
> > > + tail = iommu_readl(iommu, EVT_TAIL_REG);
> > > + if (head == tail) {
> > > + /* No pending events */
> > > + return (0);
> > > + }
> > > + while (head != tail) {
> > > + ivhd_show_event(iommu, iommu->evt_tbl + head, head);
> > > + head = (head + sz) % EVT_TBL_SIZE;
> > > + }
> > > + iommu_writel(iommu, EVT_HEAD_REG, head);
> > > + return (0);
> > > +}
> > > +
> > > +/* AMD: Issue command to IOMMU queue */
> > > +int
> > > +_ivhd_issue_command(struct iommu_softc *iommu, const struct ivhd_command *cmd)
> > > +{
> > > + u_long rf;
> > > + uint32_t head, tail, next;
> > > + int sz;
> > > +
> > > + head = iommu_readl(iommu, CMD_HEAD_REG);
> > > + sz = sizeof(*cmd);
> > > + rf = intr_disable();
> > > + tail = iommu_readl(iommu, CMD_TAIL_REG);
> > > + next = (tail + sz) % CMD_TBL_SIZE;
> > > + if (next == head) {
> > > + printf("FULL\n");
> > > + /* Queue is full */
> > > + intr_restore(rf);
> > > + return -EBUSY;
> > > + }
> > > + memcpy(iommu->cmd_tbl + tail, cmd, sz);
> > > + iommu_writel(iommu, CMD_TAIL_REG, next);
> > > + intr_restore(rf);
> > > + return (tail / sz);
> > > +}
> > > +
> > > +#define IVHD_MAXDELAY 8
> > > +
> > > +int
> > > +ivhd_issue_command(struct iommu_softc *iommu, const struct ivhd_command *cmd, int wait)
> > > +{
> > > + struct ivhd_command wq = { 0 };
> > > + volatile uint64_t wv __aligned(16) = 0LL;
> > > + paddr_t paddr;
> > > + int rc, i;
> > > + static int mi;
> > > +

Reply | Threaded
Open this post in threaded view
|

Re: [PATCH] Add IOMMU support for Intel VT-d and AMD-Vi

Jordan Hargrave
Made changes for the iommu_readq -> iommu_read_8 and also now dynamically allocate
the hwdte for AMD IOMMU.

On Fri, Sep 04, 2020 at 09:17:18PM +0200, Mark Kettenis wrote:

> > Date: Fri, 4 Sep 2020 00:50:44 -0500
> > From: Jordan Hargrave <[hidden email]>
>
> A few hints below...
>
> > > > +
> > > > +/* Page Table Entry per domain */
> > > > +static struct ivhd_dte hwdte[65536] __aligned(PAGE_SIZE);
> > > > +
> > > > +/* Alias mapping */
> > > > +#define SID_INVALID 0x80000000L
> > > > +static uint32_t sid_flag[65536];
> > >
> > > Can we avoid having these large arrays, or at least allocate them
> > > dynamically?  That would also avoid the explicit alignment which is
> > > somewhat nasty since it affects the entire kernel.
> >
> > OK. But the hwdte does need the 2M area to be all contiguous but it is not
> > needed for DMAR/Intel.  You *can* have up to 8 different device table entries
> > though to split up the area.
>
> The appropriate interface to use in this context is
> bus_dmamem_alloc(9).  You can specify alignment, and if you set nsegs
> to 1, you will get memory that is physicaly contiguous.
>
> To map the memory into kernel address space you'll need create a map
> using bus_dmamap_create(9) and map it using bus_dmamem_map(9).  Then
> instead of using pmap_extract(9) you use bus_dmamap_load_raw(9) which
> then populates the physical addresses.
>
> Many of the drivers written by dlg@ define convenience functions to do
> all these steps, although interestingly enough he tends to use
> bus_dmamap_load(9) instead of bus_dmamap_load_raw(9) which is
> sub-optimal.
>
> > > > +
> > > > +struct domain_dev {
> > > > + int sid;
> > > > + int sec;
> > > > + int sub;
> > > > + TAILQ_ENTRY(domain_dev) link;
> > > > +};
> > > > +
> > > > +struct domain {
> > > > + struct iommu_softc *iommu;
> > > > + int did;
> > > > + int gaw;
> > > > + struct pte_entry *pte;
> > > > + paddr_t ptep;
> > > > + struct bus_dma_tag dmat;
> > > > + int flag;
> > > > +
> > > > + struct mutex            exlck;
> > > > + char exname[32];
> > > > + struct extent *iovamap;
> > > > + TAILQ_HEAD(,domain_dev) devices;
> > > > + TAILQ_ENTRY(domain) link;
> > > > +};
> > > > +
> > > > +#define DOM_DEBUG 0x1
> > > > +#define DOM_NOMAP 0x2
> > > > +
> > > > +struct dmar_devlist {
> > > > + int type;
> > > > + int bus;
> > > > + int ndp;
> > > > + struct acpidmar_devpath *dp;
> > > > + TAILQ_ENTRY(dmar_devlist) link;
> > > > +};
> > > > +
> > > > +TAILQ_HEAD(devlist_head, dmar_devlist);
> > > > +
> > > > +struct ivhd_devlist {
> > > > + int start_id;
> > > > + int end_id;
> > > > + int cfg;
> > > > + TAILQ_ENTRY(ivhd_devlist) link;
> > > > +};
> > > > +
> > > > +struct rmrr_softc {
> > > > + TAILQ_ENTRY(rmrr_softc) link;
> > > > + struct devlist_head devices;
> > > > + int segment;
> > > > + uint64_t start;
> > > > + uint64_t end;
> > > > +};
> > > > +
> > > > +struct atsr_softc {
> > > > + TAILQ_ENTRY(atsr_softc) link;
> > > > + struct devlist_head devices;
> > > > + int segment;
> > > > + int flags;
> > > > +};
> > > > +
> > > > +struct iommu_pic {
> > > > + struct pic pic;
> > > > + struct iommu_softc *iommu;
> > > > +};
> > > > +
> > > > +#define IOMMU_FLAGS_CATCHALL 0x1
> > > > +#define IOMMU_FLAGS_BAD 0x2
> > > > +#define IOMMU_FLAGS_SUSPEND 0x4
> > > > +
> > > > +struct iommu_softc {
> > > > + TAILQ_ENTRY(iommu_softc)link;
> > > > + struct devlist_head devices;
> > > > + int id;
> > > > + int flags;
> > > > + int segment;
> > > > +
> > > > + struct mutex reg_lock;
> > > > +
> > > > + bus_space_tag_t iot;
> > > > + bus_space_handle_t ioh;
> > > > +
> > > > + uint64_t cap;
> > > > + uint64_t ecap;
> > > > + uint32_t gcmd;
> > > > +
> > > > + int mgaw;
> > > > + int agaw;
> > > > + int ndoms;
> > > > +
> > > > + struct root_entry *root;
> > > > + struct context_entry *ctx[256];
> > > > +
> > > > + void *intr;
> > > > + struct iommu_pic pic;
> > > > + int fedata;
> > > > + uint64_t feaddr;
> > > > + uint64_t rtaddr;
> > > > +
> > > > + // Queued Invalidation
> > > > + int qi_head;
> > > > + int qi_tail;
> > > > + paddr_t qip;
> > > > + struct qi_entry *qi;
> > > > +
> > > > + struct domain *unity;
> > > > + TAILQ_HEAD(,domain) domains;
> > > > +
> > > > + // AMD iommu
> > > > + struct ivhd_dte         *dte;
> > > > + void *cmd_tbl;
> > > > + void *evt_tbl;
> > > > + paddr_t cmd_tblp;
> > > > + paddr_t evt_tblp;
> > > > + uint64_t wv[128] __aligned(4096);
> > >
> > > This wv array isn't used as far as I can tell.
> >
> > Ah I was doing some testing on the commands.. I keep getting iommu
> > command timeouts
>
> Hmm, yes, using the wv variable on the stack as you do below is a bit
> suspect.  Using __aligned() for stack variables may not give you the
> proper alignment if the stack alignment is smaller.
>
diff --git a/sys/arch/amd64/conf/GENERIC b/sys/arch/amd64/conf/GENERIC
index 2c49f91a1..1eda12bc9 100644
--- a/sys/arch/amd64/conf/GENERIC
+++ b/sys/arch/amd64/conf/GENERIC
@@ -45,6 +45,7 @@ acpibtn* at acpi?
 acpicpu* at acpi?
 acpicmos* at acpi?
 acpidock* at acpi?
+acpidmar0 at acpi?
 acpiec* at acpi?
 acpipci* at acpi?
 acpiprt* at acpi?
diff --git a/sys/arch/amd64/conf/RAMDISK b/sys/arch/amd64/conf/RAMDISK
index 10148add1..7d4134000 100644
--- a/sys/arch/amd64/conf/RAMDISK
+++ b/sys/arch/amd64/conf/RAMDISK
@@ -34,6 +34,7 @@ acpipci* at acpi?
 acpiprt* at acpi?
 acpimadt0 at acpi?
 #acpitz* at acpi?
+acpidmar0 at acpi? disable
 
 mpbios0 at bios0
 
diff --git a/sys/arch/amd64/conf/RAMDISK_CD b/sys/arch/amd64/conf/RAMDISK_CD
index 91022751e..d043ff8dd 100644
--- a/sys/arch/amd64/conf/RAMDISK_CD
+++ b/sys/arch/amd64/conf/RAMDISK_CD
@@ -48,6 +48,7 @@ sdhc* at acpi?
 acpihve* at acpi?
 chvgpio*        at acpi?
 glkgpio* at acpi?
+acpidmar0 at acpi? disable
 
 mpbios0 at bios0
 
diff --git a/sys/arch/amd64/conf/files.amd64 b/sys/arch/amd64/conf/files.amd64
index 7a5d40bf4..74c7fe5a9 100644
--- a/sys/arch/amd64/conf/files.amd64
+++ b/sys/arch/amd64/conf/files.amd64
@@ -132,6 +132,10 @@ device pchb: pcibus, agpbus
 attach pchb at pci
 file arch/amd64/pci/pchb.c pchb
 
+device vmmpci
+attach vmmpci at pci
+file   arch/amd64/pci/vmmpci.c vmmpci
+
 # AMAS AMD memory address switch
 device amas
 attach amas at pci
diff --git a/sys/arch/amd64/include/pci_machdep.h b/sys/arch/amd64/include/pci_machdep.h
index bc295cc22..c725bdc73 100644
--- a/sys/arch/amd64/include/pci_machdep.h
+++ b/sys/arch/amd64/include/pci_machdep.h
@@ -91,7 +91,8 @@ void *pci_intr_establish_cpu(pci_chipset_tag_t, pci_intr_handle_t,
     int, struct cpu_info *,
     int (*)(void *), void *, const char *);
 void pci_intr_disestablish(pci_chipset_tag_t, void *);
-#define pci_probe_device_hook(c, a) (0)
+int pci_probe_device_hook(pci_chipset_tag_t,
+    struct pci_attach_args *);
 
 void pci_dev_postattach(struct device *, struct pci_attach_args *);
 
diff --git a/sys/arch/amd64/pci/pci_machdep.c b/sys/arch/amd64/pci/pci_machdep.c
index cf4e835de..b700946a4 100644
--- a/sys/arch/amd64/pci/pci_machdep.c
+++ b/sys/arch/amd64/pci/pci_machdep.c
@@ -89,6 +89,11 @@
 #include <machine/mpbiosvar.h>
 #endif
 
+#include "acpi.h"
+#if NACPI > 0
+#include <dev/acpi/acpidmar.h>
+#endif
+
 /*
  * Memory Mapped Configuration space access.
  *
@@ -797,7 +802,15 @@ pci_init_extents(void)
  }
 }
 
-#include "acpi.h"
+int
+pci_probe_device_hook(pci_chipset_tag_t pc, struct pci_attach_args *pa)
+{
+#if NACPI > 0
+ acpidmar_pci_hook(pc, pa);
+#endif
+ return 0;
+}
+
 #if NACPI > 0
 void acpi_pci_match(struct device *, struct pci_attach_args *);
 pcireg_t acpi_pci_min_powerstate(pci_chipset_tag_t, pcitag_t);
diff --git a/sys/dev/acpi/acpi.c b/sys/dev/acpi/acpi.c
index a6239198e..ea11483ad 100644
--- a/sys/dev/acpi/acpi.c
+++ b/sys/dev/acpi/acpi.c
@@ -49,6 +49,7 @@
 #include <dev/acpi/amltypes.h>
 #include <dev/acpi/acpidev.h>
 #include <dev/acpi/dsdt.h>
+#include <dev/acpi/acpidmar.h>
 #include <dev/wscons/wsdisplayvar.h>
 
 #include <dev/pci/pcidevs.h>
@@ -2448,6 +2449,8 @@ acpi_sleep_pm(struct acpi_softc *sc, int state)
     sc->sc_fadt->pm2_cnt_blk && sc->sc_fadt->pm2_cnt_len)
  acpi_write_pmreg(sc, ACPIREG_PM2_CNT, 0, ACPI_PM2_ARB_DIS);
 
+ acpidmar_sw(DVACT_SUSPEND);
+
  /* Write SLP_TYPx values */
  rega = acpi_read_pmreg(sc, ACPIREG_PM1A_CNT, 0);
  regb = acpi_read_pmreg(sc, ACPIREG_PM1B_CNT, 0);
@@ -2483,6 +2486,8 @@ acpi_resume_pm(struct acpi_softc *sc, int fromstate)
 {
  uint16_t rega, regb, en;
 
+ acpidmar_sw(DVACT_RESUME);
+
  /* Write SLP_TYPx values */
  rega = acpi_read_pmreg(sc, ACPIREG_PM1A_CNT, 0);
  regb = acpi_read_pmreg(sc, ACPIREG_PM1B_CNT, 0);
diff --git a/sys/dev/acpi/acpidmar.c b/sys/dev/acpi/acpidmar.c
new file mode 100644
index 000000000..e7daddc43
--- /dev/null
+++ b/sys/dev/acpi/acpidmar.c
@@ -0,0 +1,3028 @@
+/*
+ * Copyright (c) 2015 Jordan Hargrave <[hidden email]>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/device.h>
+#include <sys/malloc.h>
+#include <sys/queue.h>
+#include <sys/types.h>
+#include <sys/mbuf.h>
+#include <sys/proc.h>
+
+#include <uvm/uvm_extern.h>
+
+#include <machine/apicvar.h>
+#include <machine/biosvar.h>
+#include <machine/cpuvar.h>
+#include <machine/bus.h>
+
+#include <dev/acpi/acpireg.h>
+#include <dev/acpi/acpivar.h>
+#include <dev/acpi/acpidev.h>
+#include <dev/acpi/amltypes.h>
+#include <dev/acpi/dsdt.h>
+
+#include <uvm/uvm_extern.h>
+
+#include <machine/i8259.h>
+#include <machine/i82093reg.h>
+#include <machine/i82093var.h>
+#include <machine/i82489reg.h>
+#include <machine/i82489var.h>
+
+#include <machine/mpbiosvar.h>
+
+#include <dev/pci/pcireg.h>
+#include <dev/pci/pcivar.h>
+#include <dev/pci/pcidevs.h>
+#include <dev/pci/ppbreg.h>
+
+#include "ioapic.h"
+
+#include "acpidmar.h"
+#include "amd_iommu.h"
+
+#define dprintf(x...)
+
+#ifdef DDB
+int acpidmar_ddb = 0;
+#endif
+
+int intel_iommu_gfx_mapped = 0;
+int force_cm = 1;
+
+void showahci(void *);
+
+/* Page Table Entry per domain */
+struct iommu_softc;
+
+static inline int
+mksid(int b, int d, int f)
+{
+ return (b << 8) + (d << 3) + f;
+}
+
+static inline int
+sid_devfn(int sid)
+{
+ return sid & 0xff;
+}
+
+static inline int
+sid_bus(int sid)
+{
+ return (sid >> 8) & 0xff;
+}
+
+static inline int
+sid_dev(int sid)
+{
+ return (sid >> 3) & 0x1f;
+}
+
+static inline int
+sid_fun(int sid)
+{
+ return (sid >> 0) & 0x7;
+}
+
+/* Alias mapping */
+#define SID_INVALID 0x80000000L
+static uint32_t sid_flag[65536];
+
+struct domain_dev {
+ int sid;
+ int sec;
+ int sub;
+ TAILQ_ENTRY(domain_dev) link;
+};
+
+struct domain {
+ struct iommu_softc *iommu;
+ int did;
+ int gaw;
+ struct pte_entry *pte;
+ paddr_t ptep;
+ struct bus_dma_tag dmat;
+ int flag;
+
+ struct mutex            exlck;
+ char exname[32];
+ struct extent *iovamap;
+ TAILQ_HEAD(,domain_dev) devices;
+ TAILQ_ENTRY(domain) link;
+};
+
+#define DOM_DEBUG 0x1
+#define DOM_NOMAP 0x2
+
+struct dmar_devlist {
+ int type;
+ int bus;
+ int ndp;
+ struct acpidmar_devpath *dp;
+ TAILQ_ENTRY(dmar_devlist) link;
+};
+
+TAILQ_HEAD(devlist_head, dmar_devlist);
+
+struct ivhd_devlist {
+ int start_id;
+ int end_id;
+ int cfg;
+ TAILQ_ENTRY(ivhd_devlist) link;
+};
+
+struct rmrr_softc {
+ TAILQ_ENTRY(rmrr_softc) link;
+ struct devlist_head devices;
+ int segment;
+ uint64_t start;
+ uint64_t end;
+};
+
+struct atsr_softc {
+ TAILQ_ENTRY(atsr_softc) link;
+ struct devlist_head devices;
+ int segment;
+ int flags;
+};
+
+struct iommu_pic {
+ struct pic pic;
+ struct iommu_softc *iommu;
+};
+
+#define IOMMU_FLAGS_CATCHALL 0x1
+#define IOMMU_FLAGS_BAD 0x2
+#define IOMMU_FLAGS_SUSPEND 0x4
+
+struct iommu_softc {
+ TAILQ_ENTRY(iommu_softc)link;
+ struct devlist_head devices;
+ int id;
+ int flags;
+ int segment;
+
+ struct mutex reg_lock;
+
+ bus_space_tag_t iot;
+ bus_space_handle_t ioh;
+
+ uint64_t cap;
+ uint64_t ecap;
+ uint32_t gcmd;
+
+ int mgaw;
+ int agaw;
+ int ndoms;
+
+ struct root_entry *root;
+ struct context_entry *ctx[256];
+
+ void *intr;
+ struct iommu_pic pic;
+ int fedata;
+ uint64_t feaddr;
+ uint64_t rtaddr;
+
+ // Queued Invalidation
+ int qi_head;
+ int qi_tail;
+ paddr_t qip;
+ struct qi_entry *qi;
+
+ struct domain *unity;
+ TAILQ_HEAD(,domain) domains;
+
+ // AMD iommu
+ struct ivhd_dte         *dte;
+ void *cmd_tbl;
+ void *evt_tbl;
+ paddr_t cmd_tblp;
+ paddr_t evt_tblp;
+};
+
+static inline int iommu_bad(struct iommu_softc *sc)
+{
+ return (sc->flags & IOMMU_FLAGS_BAD);
+}
+
+static inline int iommu_enabled(struct iommu_softc *sc)
+{
+ if (sc->dte) {
+ return 1;
+ }
+ return (sc->gcmd & GCMD_TE);
+}
+
+struct acpidmar_softc {
+ struct device sc_dev;
+
+ pci_chipset_tag_t sc_pc;
+ bus_space_tag_t sc_memt;
+ int sc_haw;
+ int sc_flags;
+ struct bus_dma_tag sc_dmat;
+
+ struct ivhd_dte *sc_hwdte;
+ paddr_t sc_hwdtep;
+
+ TAILQ_HEAD(,iommu_softc)sc_drhds;
+ TAILQ_HEAD(,rmrr_softc) sc_rmrrs;
+ TAILQ_HEAD(,atsr_softc) sc_atsrs;
+};
+
+int acpidmar_activate(struct device *, int);
+int acpidmar_match(struct device *, void *, void *);
+void acpidmar_attach(struct device *, struct device *, void *);
+struct domain   *acpidmar_pci_attach(struct acpidmar_softc *, int, int, int);
+
+struct cfattach acpidmar_ca = {
+ sizeof(struct acpidmar_softc), acpidmar_match, acpidmar_attach,
+};
+
+struct cfdriver acpidmar_cd = {
+ NULL, "acpidmar", DV_DULL
+};
+
+struct acpidmar_softc *acpidmar_sc;
+int acpidmar_intr(void *);
+int acpiivhd_intr(void *);
+
+#define DID_UNITY 0x1
+
+void _dumppte(struct pte_entry *, int, vaddr_t);
+
+struct domain *domain_create(struct iommu_softc *, int);
+struct domain *domain_lookup(struct acpidmar_softc *, int, int);
+
+void domain_unload_map(struct domain *, bus_dmamap_t);
+void domain_load_map(struct domain *, bus_dmamap_t, int, int, const char *);
+
+void (*domain_map_page)(struct domain *, vaddr_t, paddr_t, uint64_t);
+void domain_map_page_amd(struct domain *, vaddr_t, paddr_t, uint64_t);
+void domain_map_page_intel(struct domain *, vaddr_t, paddr_t, uint64_t);
+void domain_map_pthru(struct domain *, paddr_t, paddr_t);
+
+void acpidmar_pci_hook(pci_chipset_tag_t, struct pci_attach_args *);
+void acpidmar_parse_devscope(union acpidmar_entry *, int, int,
+    struct devlist_head *);
+int  acpidmar_match_devscope(struct devlist_head *, pci_chipset_tag_t, int);
+
+void acpidmar_init(struct acpidmar_softc *, struct acpi_dmar *);
+void acpidmar_drhd(struct acpidmar_softc *, union acpidmar_entry *);
+void acpidmar_rmrr(struct acpidmar_softc *, union acpidmar_entry *);
+void acpidmar_atsr(struct acpidmar_softc *, union acpidmar_entry *);
+void acpiivrs_init(struct acpidmar_softc *, struct acpi_ivrs *);
+
+void *acpidmar_intr_establish(void *, int, int (*)(void *), void *,
+    const char *);
+
+void iommu_write_4(struct iommu_softc *, int, uint32_t);
+uint32_t iommu_read_4(struct iommu_softc *, int);
+void iommu_write_8(struct iommu_softc *, int, uint64_t);
+uint64_t iommu_read_8(struct iommu_softc *, int);
+void iommu_showfault(struct iommu_softc *, int,
+    struct fault_entry *);
+void iommu_showcfg(struct iommu_softc *, int);
+
+int iommu_init(struct acpidmar_softc *, struct iommu_softc *,
+    struct acpidmar_drhd *);
+int iommu_enable_translation(struct iommu_softc *, int);
+void iommu_enable_qi(struct iommu_softc *, int);
+void iommu_flush_cache(struct iommu_softc *, void *, size_t);
+void *iommu_alloc_page(struct iommu_softc *, paddr_t *);
+void iommu_flush_write_buffer(struct iommu_softc *);
+void iommu_issue_qi(struct iommu_softc *, struct qi_entry *);
+
+void iommu_flush_ctx(struct iommu_softc *, int, int, int, int);
+void iommu_flush_ctx_qi(struct iommu_softc *, int, int, int, int);
+void iommu_flush_tlb(struct iommu_softc *, int, int);
+void iommu_flush_tlb_qi(struct iommu_softc *, int, int);
+
+void iommu_set_rtaddr(struct iommu_softc *, paddr_t);
+
+const char *dmar_bdf(int);
+
+void *iommu_alloc_contig(struct acpidmar_softc *sc, size_t size, paddr_t *paddr);
+
+const char *
+dmar_bdf(int sid)
+{
+ static char bdf[32];
+
+ snprintf(bdf, sizeof(bdf), "%.4x:%.2x:%.2x.%x", 0,
+    sid_bus(sid), sid_dev(sid), sid_fun(sid));
+
+ return (bdf);
+}
+
+/* busdma */
+static int dmar_dmamap_create(bus_dma_tag_t, bus_size_t, int, bus_size_t,
+    bus_size_t, int, bus_dmamap_t *);
+static void dmar_dmamap_destroy(bus_dma_tag_t, bus_dmamap_t);
+static int dmar_dmamap_load(bus_dma_tag_t, bus_dmamap_t, void *, bus_size_t,
+    struct proc *, int);
+static int dmar_dmamap_load_mbuf(bus_dma_tag_t, bus_dmamap_t, struct mbuf *,
+    int);
+static int dmar_dmamap_load_uio(bus_dma_tag_t, bus_dmamap_t, struct uio *, int);
+static int dmar_dmamap_load_raw(bus_dma_tag_t, bus_dmamap_t,
+    bus_dma_segment_t *, int, bus_size_t, int);
+static void dmar_dmamap_unload(bus_dma_tag_t, bus_dmamap_t);
+static void dmar_dmamap_sync(bus_dma_tag_t, bus_dmamap_t, bus_addr_t,
+    bus_size_t, int);
+static int dmar_dmamem_alloc(bus_dma_tag_t, bus_size_t, bus_size_t, bus_size_t,
+    bus_dma_segment_t *, int, int *, int);
+static void dmar_dmamem_free(bus_dma_tag_t, bus_dma_segment_t *, int);
+static int dmar_dmamem_map(bus_dma_tag_t, bus_dma_segment_t *, int, size_t,
+    caddr_t *, int);
+static void dmar_dmamem_unmap(bus_dma_tag_t, caddr_t, size_t);
+static paddr_t dmar_dmamem_mmap(bus_dma_tag_t, bus_dma_segment_t *, int, off_t,
+    int, int);
+
+static void dmar_dumpseg(bus_dma_tag_t, int, bus_dma_segment_t *, const char *);
+const char *dom_bdf(struct domain *);
+void domain_map_check(struct domain *);
+
+struct pte_entry *pte_lvl(struct iommu_softc *, struct pte_entry *, vaddr_t, int, uint64_t);
+int  ivhd_poll_events(struct iommu_softc *);
+void ivhd_showit(struct iommu_softc *);
+void ivhd_showdte(struct iommu_softc *);
+void ivhd_showcmd(struct iommu_softc *);
+
+static inline int
+debugme(struct domain *dom)
+{
+ return 0;
+ return (dom->flag & DOM_DEBUG);
+}
+
+void
+domain_map_check(struct domain *dom)
+{
+ struct iommu_softc *iommu;
+ struct domain_dev *dd;
+ struct context_entry *ctx;
+ int v;
+
+ iommu = dom->iommu;
+ TAILQ_FOREACH(dd, &dom->devices, link) {
+ acpidmar_pci_attach(acpidmar_sc, iommu->segment, dd->sid, 1);
+
+ if (iommu->dte)
+ continue;
+
+ /* Check if this is the first time we are mapped */
+ ctx = &iommu->ctx[sid_bus(dd->sid)][sid_devfn(dd->sid)];
+ v = context_user(ctx);
+ if (v != 0xA) {
+ printf("  map: %.4x:%.2x:%.2x.%x iommu:%d did:%.4x\n",
+    iommu->segment,
+    sid_bus(dd->sid),
+    sid_dev(dd->sid),
+    sid_fun(dd->sid),
+    iommu->id,
+    dom->did);
+ context_set_user(ctx, 0xA);
+ }
+ }
+}
+
+/* Map a single page as passthrough - used for DRM */
+void
+dmar_ptmap(bus_dma_tag_t tag, bus_addr_t addr)
+{
+ struct domain *dom = tag->_cookie;
+
+ if (!acpidmar_sc)
+ return;
+ domain_map_check(dom);
+ domain_map_page(dom, addr, addr, PTE_P | PTE_R | PTE_W);
+}
+
+/* Map a range of pages 1:1 */
+void
+domain_map_pthru(struct domain *dom, paddr_t start, paddr_t end)
+{
+ domain_map_check(dom);
+ while (start < end) {
+ domain_map_page(dom, start, start, PTE_P | PTE_R | PTE_W);
+ start += VTD_PAGE_SIZE;
+ }
+}
+
+/* Map a single paddr to IOMMU paddr */
+void
+domain_map_page_intel(struct domain *dom, vaddr_t va, paddr_t pa, uint64_t flags)
+{
+ paddr_t paddr;
+ struct pte_entry *pte, *npte;
+ int lvl, idx;
+ struct iommu_softc *iommu;
+
+ iommu = dom->iommu;
+ /* Insert physical address into virtual address map
+ * XXX: could we use private pmap here?
+ * essentially doing a pmap_enter(map, va, pa, prot);
+ */
+
+ /* Only handle 4k pages for now */
+ npte = dom->pte;
+ for (lvl = iommu->agaw - VTD_STRIDE_SIZE; lvl>= VTD_LEVEL0;
+    lvl -= VTD_STRIDE_SIZE) {
+ idx = (va >> lvl) & VTD_STRIDE_MASK;
+ pte = &npte[idx];
+ if (lvl == VTD_LEVEL0) {
+ /* Level 1: Page Table - add physical address */
+ pte->val = pa | flags;
+ iommu_flush_cache(iommu, pte, sizeof(*pte));
+ break;
+ } else if (!(pte->val & PTE_P)) {
+ /* Level N: Point to lower level table */
+ iommu_alloc_page(iommu, &paddr);
+ pte->val = paddr | PTE_P | PTE_R | PTE_W;
+ iommu_flush_cache(iommu, pte, sizeof(*pte));
+ }
+ npte = (void *)PMAP_DIRECT_MAP((pte->val & VTD_PTE_MASK));
+ }
+}
+
+/* Map a single paddr to IOMMU paddr: AMD
+ * physical address breakdown into levels:
+ * xxxxxxxx.xxxxxxxx.xxxxxxxx.xxxxxxxx.xxxxxxxx.xxxxxxxx.xxxxxxxx.xxxxxxxx
+ *        5.55555555.44444444.43333333,33222222.22211111.1111----.--------
+ * mode:
+ *  000 = none   shift
+ *  001 = 1 [21].12
+ *  010 = 2 [30].21
+ *  011 = 3 [39].30
+ *  100 = 4 [48].39
+ *  101 = 5 [57]
+ *  110 = 6
+ *  111 = reserved
+ */
+struct pte_entry *
+pte_lvl(struct iommu_softc *iommu, struct pte_entry *pte, vaddr_t va,
+ int shift, uint64_t flags)
+{
+ paddr_t paddr;
+ int idx;
+
+ idx = (va >> shift) & VTD_STRIDE_MASK;
+ if (!(pte[idx].val & PTE_P)) {
+ /* Page Table entry is not present... create a new page entry */
+ iommu_alloc_page(iommu, &paddr);
+ pte[idx].val = paddr | flags;
+ iommu_flush_cache(iommu, &pte[idx], sizeof(pte[idx]));
+ }
+ return (void *)PMAP_DIRECT_MAP((pte[idx].val & PTE_PADDR_MASK));
+}
+
+void
+domain_map_page_amd(struct domain *dom, vaddr_t va, paddr_t pa, uint64_t flags)
+{
+ struct pte_entry *pte;
+ struct iommu_softc *iommu;
+ int idx;
+
+ iommu = dom->iommu;
+ /* Insert physical address into virtual address map
+ * XXX: could we use private pmap here?
+ * essentially doing a pmap_enter(map, va, pa, prot);
+ */
+
+ /* Always assume AMD levels=4                           */
+ /*        39        30        21        12              */
+ /* ---------|---------|---------|---------|------------ */
+ pte = dom->pte;
+ //pte = pte_lvl(iommu, pte, va, 39, PTE_NXTLVL(3) | PTE_IR | PTE_IW | PTE_P);
+ pte = pte_lvl(iommu, pte, va, 30, PTE_NXTLVL(2) | PTE_IR | PTE_IW | PTE_P);
+ pte = pte_lvl(iommu, pte, va, 21, PTE_NXTLVL(1) | PTE_IR | PTE_IW | PTE_P);
+ //pte = pte_lvl(iommu, pte, va, 12, PTE_NXTLVL(7) | PTE_IR | PTE_IW | PTE_P);
+
+ if (flags)
+ flags = PTE_P | PTE_R | PTE_W | PTE_IW | PTE_IR | PTE_NXTLVL(0);
+
+ /* Level 1: Page Table - add physical address */
+ idx = (va >> 12) & 0x1FF;
+ pte[idx].val = pa | flags;
+
+ iommu_flush_cache(iommu, pte, sizeof(*pte));
+}
+
+static void
+dmar_dumpseg(bus_dma_tag_t tag, int nseg, bus_dma_segment_t *segs,
+    const char *lbl)
+{
+ struct domain *dom = tag->_cookie;
+ int i;
+
+ return;
+ if (!debugme(dom))
+ return;
+ printf("%s: %s\n", lbl, dom_bdf(dom));
+ for (i = 0; i < nseg; i++) {
+ printf("  %.16llx %.8x\n",
+    (uint64_t)segs[i].ds_addr,
+    (uint32_t)segs[i].ds_len);
+ }
+}
+
+/* Unload mapping */
+void
+domain_unload_map(struct domain *dom, bus_dmamap_t dmam)
+{
+ bus_dma_segment_t *seg;
+ paddr_t base, end, idx;
+ psize_t alen;
+ int i;
+
+ if (iommu_bad(dom->iommu)) {
+ printf("unload map no iommu\n");
+ return;
+ }
+
+ //acpidmar_intr(dom->iommu);
+ for (i = 0; i < dmam->dm_nsegs; i++) {
+ seg  = &dmam->dm_segs[i];
+
+ base = trunc_page(seg->ds_addr);
+ end  = roundup(seg->ds_addr + seg->ds_len, VTD_PAGE_SIZE);
+ alen = end - base;
+
+ if (debugme(dom)) {
+ printf("  va:%.16llx len:%x\n",
+    (uint64_t)base, (uint32_t)alen);
+ }
+
+ /* Clear PTE */
+ for (idx = 0; idx < alen; idx += VTD_PAGE_SIZE)
+ domain_map_page(dom, base + idx, 0, 0);
+
+ if (dom->flag & DOM_NOMAP) {
+ //printf("%s: nomap %.16llx\n", dom_bdf(dom), (uint64_t)base);
+ continue;
+ }
+
+ mtx_enter(&dom->exlck);
+ if (extent_free(dom->iovamap, base, alen, EX_NOWAIT)) {
+ panic("domain_unload_map: extent_free");
+ }
+ mtx_leave(&dom->exlck);
+ }
+}
+
+/* map.segs[x].ds_addr is modified to IOMMU virtual PA */
+void
+domain_load_map(struct domain *dom, bus_dmamap_t map, int flags, int pteflag, const char *fn)
+{
+ bus_dma_segment_t *seg;
+ struct iommu_softc *iommu;
+ paddr_t base, end, idx;
+ psize_t alen;
+ u_long res;
+ int i;
+
+ iommu = dom->iommu;
+ if (!iommu_enabled(iommu)) {
+ /* Lazy enable translation when required */
+ if (iommu_enable_translation(iommu, 1)) {
+ return;
+ }
+ }
+ domain_map_check(dom);
+ //acpidmar_intr(iommu);
+ for (i = 0; i < map->dm_nsegs; i++) {
+ seg = &map->dm_segs[i];
+
+ base = trunc_page(seg->ds_addr);
+ end  = roundup(seg->ds_addr + seg->ds_len, VTD_PAGE_SIZE);
+ alen = end - base;
+ res  = base;
+
+ if (dom->flag & DOM_NOMAP) {
+ goto nomap;
+ }
+
+ /* Allocate DMA Virtual Address */
+ mtx_enter(&dom->exlck);
+ if (extent_alloc(dom->iovamap, alen, VTD_PAGE_SIZE, 0,
+    map->_dm_boundary, EX_NOWAIT, &res)) {
+ panic("domain_load_map: extent_alloc");
+ }
+ if (res == -1) {
+ panic("got -1 address\n");
+ }
+ mtx_leave(&dom->exlck);
+
+ /* Reassign DMA address */
+ seg->ds_addr = res | (seg->ds_addr & VTD_PAGE_MASK);
+nomap:
+ if (debugme(dom)) {
+ printf("  LOADMAP: %.16llx %x => %.16llx\n",
+    (uint64_t)seg->ds_addr, (uint32_t)seg->ds_len,
+    (uint64_t)res);
+ }
+ for (idx = 0; idx < alen; idx += VTD_PAGE_SIZE) {
+ domain_map_page(dom, res + idx, base + idx,
+    PTE_P | pteflag);
+ }
+ }
+ if ((iommu->cap & CAP_CM) || force_cm) {
+ iommu_flush_tlb(iommu, IOTLB_DOMAIN, dom->did);
+ } else {
+ iommu_flush_write_buffer(iommu);
+ }
+}
+
+const char *
+dom_bdf(struct domain *dom)
+{
+ struct domain_dev *dd;
+ static char mmm[48];
+
+ dd = TAILQ_FIRST(&dom->devices);
+ snprintf(mmm, sizeof(mmm), "%s iommu:%d did:%.4x%s",
+    dmar_bdf(dd->sid), dom->iommu->id, dom->did,
+    dom->did == DID_UNITY ? " [unity]" : "");
+ return (mmm);
+}
+
+/* Bus DMA Map functions */
+static int
+dmar_dmamap_create(bus_dma_tag_t tag, bus_size_t size, int nsegments,
+    bus_size_t maxsegsz, bus_size_t boundary, int flags, bus_dmamap_t *dmamp)
+{
+ int rc;
+
+ rc = _bus_dmamap_create(tag, size, nsegments, maxsegsz, boundary,
+    flags, dmamp);
+ if (!rc) {
+ dmar_dumpseg(tag, (*dmamp)->dm_nsegs, (*dmamp)->dm_segs,
+    __FUNCTION__);
+ }
+ return (rc);
+}
+
+static void
+dmar_dmamap_destroy(bus_dma_tag_t tag, bus_dmamap_t dmam)
+{
+ dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs, __FUNCTION__);
+ _bus_dmamap_destroy(tag, dmam);
+}
+
+static int
+dmar_dmamap_load(bus_dma_tag_t tag, bus_dmamap_t dmam, void *buf,
+    bus_size_t buflen, struct proc *p, int flags)
+{
+ struct domain *dom = tag->_cookie;
+ int rc;
+
+ rc = _bus_dmamap_load(tag, dmam, buf, buflen, p, flags);
+ if (!rc) {
+ dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs,
+    __FUNCTION__);
+ domain_load_map(dom, dmam, flags, PTE_R|PTE_W, __FUNCTION__);
+ dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs,
+    __FUNCTION__);
+ }
+ return (rc);
+}
+
+static int
+dmar_dmamap_load_mbuf(bus_dma_tag_t tag, bus_dmamap_t dmam, struct mbuf *chain,
+    int flags)
+{
+ struct domain *dom = tag->_cookie;
+ int rc;
+
+ rc = _bus_dmamap_load_mbuf(tag, dmam, chain, flags);
+ if (!rc) {
+ dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs,
+    __FUNCTION__);
+ domain_load_map(dom, dmam, flags, PTE_R|PTE_W,__FUNCTION__);
+ dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs,
+    __FUNCTION__);
+ }
+ return (rc);
+}
+
+static int
+dmar_dmamap_load_uio(bus_dma_tag_t tag, bus_dmamap_t dmam, struct uio *uio,
+    int flags)
+{
+ struct domain *dom = tag->_cookie;
+ int rc;
+
+ rc = _bus_dmamap_load_uio(tag, dmam, uio, flags);
+ if (!rc) {
+ dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs,
+    __FUNCTION__);
+ domain_load_map(dom, dmam, flags, PTE_R|PTE_W, __FUNCTION__);
+ dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs,
+    __FUNCTION__);
+ }
+ return (rc);
+}
+
+static int
+dmar_dmamap_load_raw(bus_dma_tag_t tag, bus_dmamap_t dmam,
+    bus_dma_segment_t *segs, int nsegs, bus_size_t size, int flags)
+{
+ struct domain *dom = tag->_cookie;
+ int rc;
+
+ rc = _bus_dmamap_load_raw(tag, dmam, segs, nsegs, size, flags);
+ if (!rc) {
+ dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs,
+    __FUNCTION__);
+ domain_load_map(dom, dmam, flags, PTE_R|PTE_W, __FUNCTION__);
+ dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs,
+    __FUNCTION__);
+ }
+ return (rc);
+}
+
+static void
+dmar_dmamap_unload(bus_dma_tag_t tag, bus_dmamap_t dmam)
+{
+ struct domain *dom = tag->_cookie;
+
+ dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs, __FUNCTION__);
+ domain_unload_map(dom, dmam);
+ _bus_dmamap_unload(tag, dmam);
+}
+
+static void
+dmar_dmamap_sync(bus_dma_tag_t tag, bus_dmamap_t dmam, bus_addr_t offset,
+    bus_size_t len, int ops)
+{
+#if 0
+ struct domain *dom = tag->_cookie;
+ //int flag;
+
+ flag = PTE_P;
+ //acpidmar_intr(dom->iommu);
+ if (ops == BUS_DMASYNC_PREREAD) {
+ /* make readable */
+ flag |= PTE_R;
+ }
+ else if (ops == BUS_DMASYNC_PREWRITE) {
+ /* make writeable */
+ flag |= PTE_W;
+ }
+ dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs, __FUNCTION__);
+#endif
+ _bus_dmamap_sync(tag, dmam, offset, len, ops);
+}
+
+static int
+dmar_dmamem_alloc(bus_dma_tag_t tag, bus_size_t size, bus_size_t alignment,
+    bus_size_t boundary, bus_dma_segment_t *segs, int nsegs, int *rsegs,
+    int flags)
+{
+ int rc;
+
+ rc = _bus_dmamem_alloc(tag, size, alignment, boundary, segs, nsegs,
+    rsegs, flags);
+ if (!rc) {
+ dmar_dumpseg(tag, *rsegs, segs, __FUNCTION__);
+ }
+ return (rc);
+}
+
+static void
+dmar_dmamem_free(bus_dma_tag_t tag, bus_dma_segment_t *segs, int nsegs)
+{
+ dmar_dumpseg(tag, nsegs, segs, __FUNCTION__);
+ _bus_dmamem_free(tag, segs, nsegs);
+}
+
+static int
+dmar_dmamem_map(bus_dma_tag_t tag, bus_dma_segment_t *segs, int nsegs,
+    size_t size, caddr_t *kvap, int flags)
+{
+ dmar_dumpseg(tag, nsegs, segs, __FUNCTION__);
+ return (_bus_dmamem_map(tag, segs, nsegs, size, kvap, flags));
+}
+
+static void
+dmar_dmamem_unmap(bus_dma_tag_t tag, caddr_t kva, size_t size)
+{
+ struct domain *dom = tag->_cookie;
+
+ if (debugme(dom)) {
+ printf("dmamap_unmap: %s\n", dom_bdf(dom));
+ }
+ _bus_dmamem_unmap(tag, kva, size);
+}
+
+static paddr_t
+dmar_dmamem_mmap(bus_dma_tag_t tag, bus_dma_segment_t *segs, int nsegs,
+    off_t off, int prot, int flags)
+{
+ dmar_dumpseg(tag, nsegs, segs, __FUNCTION__);
+ return (_bus_dmamem_mmap(tag, segs, nsegs, off, prot, flags));
+}
+
+/*===================================
+ * IOMMU code
+ *===================================*/
+
+/* Intel: Set Context Root Address */
+void
+iommu_set_rtaddr(struct iommu_softc *iommu, paddr_t paddr)
+{
+ int i, sts;
+
+ mtx_enter(&iommu->reg_lock);
+ iommu_write_8(iommu, DMAR_RTADDR_REG, paddr);
+ iommu_write_4(iommu, DMAR_GCMD_REG, iommu->gcmd | GCMD_SRTP);
+ for (i = 0; i < 5; i++) {
+ sts = iommu_read_4(iommu, DMAR_GSTS_REG);
+ if (sts & GSTS_RTPS)
+ break;
+ }
+ mtx_leave(&iommu->reg_lock);
+
+ if (i == 5) {
+ printf("set_rtaddr fails\n");
+ }
+}
+
+/* Allocate contiguous memory (1Mb) for the Device Table Entries */
+void *
+iommu_alloc_contig(struct acpidmar_softc *sc, size_t size, paddr_t *paddr)
+{
+ caddr_t vaddr;
+ bus_dmamap_t map;
+ bus_dma_segment_t seg;
+ bus_dma_tag_t dmat;
+ int rc, nsegs;
+
+ rc = _bus_dmamap_create(dmat, size, 1, size, 0,
+ BUS_DMA_NOWAIT, &map);
+ if (rc != 0) {
+ printf("hwdte_create fails\n");
+ return NULL;
+ }
+ rc = _bus_dmamem_alloc(dmat, size, 4, 0, &seg, 1,
+ &nsegs, BUS_DMA_NOWAIT | BUS_DMA_ZERO);
+ if (rc != 0) {
+ printf("hwdte alloc fails\n");
+ return NULL;
+ }
+ rc = _bus_dmamem_map(dmat, &seg, 1, size, &vaddr,
+ BUS_DMA_NOWAIT | BUS_DMA_COHERENT);
+ if (rc != 0) {
+ printf("hwdte map fails\n");
+ return NULL;
+ }
+ rc = _bus_dmamap_load_raw(dmat, map, &seg, 1, size, BUS_DMA_NOWAIT);
+ if (rc != 0) {
+ printf("hwdte load raw fails\n");
+ return NULL;
+ }
+ *paddr = map->dm_segs[0].ds_addr;
+ printf("hwdte: Got P:%lx V:%p\n", *paddr, vaddr);
+ return vaddr;
+}
+
+/* COMMON: Allocate a new memory page */
+void *
+iommu_alloc_page(struct iommu_softc *iommu, paddr_t *paddr)
+{
+ void *va;
+
+ *paddr = 0;
+ va = km_alloc(VTD_PAGE_SIZE, &kv_page, &kp_zero, &kd_nowait);
+ if (va == NULL) {
+ panic("can't allocate page\n");
+ }
+ pmap_extract(pmap_kernel(), (vaddr_t)va, paddr);
+ return (va);
+}
+
+
+/* Intel: Issue command via queued invalidation */
+void
+iommu_issue_qi(struct iommu_softc *iommu, struct qi_entry *qi)
+{
+#if 0
+ struct qi_entry *pi, *pw;
+
+ idx = iommu->qi_head;
+ pi = &iommu->qi[idx];
+ pw = &iommu->qi[(idx+1) % MAXQ];
+ iommu->qi_head = (idx+2) % MAXQ;
+
+ memcpy(pw, &qi, sizeof(qi));
+ issue command;
+ while (pw->xxx)
+ ;
+#endif
+}
+
+/* Intel: Flush TLB entries, Queued Invalidation mode */
+void
+iommu_flush_tlb_qi(struct iommu_softc *iommu, int mode, int did)
+{
+ struct qi_entry qi;
+
+ /* Use queued invalidation */
+ qi.hi = 0;
+ switch (mode) {
+ case IOTLB_GLOBAL:
+ qi.lo = QI_IOTLB | QI_IOTLB_IG_GLOBAL;
+ break;
+ case IOTLB_DOMAIN:
+ qi.lo = QI_IOTLB | QI_IOTLB_IG_DOMAIN |
+    QI_IOTLB_DID(did);
+ break;
+ case IOTLB_PAGE:
+ qi.lo = QI_IOTLB | QI_IOTLB_IG_PAGE | QI_IOTLB_DID(did);
+ qi.hi = 0;
+ break;
+ }
+ if (iommu->cap & CAP_DRD)
+ qi.lo |= QI_IOTLB_DR;
+ if (iommu->cap & CAP_DWD)
+ qi.lo |= QI_IOTLB_DW;
+ iommu_issue_qi(iommu, &qi);
+}
+
+/* Intel: Flush Context entries, Queued Invalidation mode */
+void
+iommu_flush_ctx_qi(struct iommu_softc *iommu, int mode, int did,
+    int sid, int fm)
+{
+ struct qi_entry qi;
+
+ /* Use queued invalidation */
+ qi.hi = 0;
+ switch (mode) {
+ case CTX_GLOBAL:
+ qi.lo = QI_CTX | QI_CTX_IG_GLOBAL;
+ break;
+ case CTX_DOMAIN:
+ qi.lo = QI_CTX | QI_CTX_IG_DOMAIN | QI_CTX_DID(did);
+ break;
+ case CTX_DEVICE:
+ qi.lo = QI_CTX | QI_CTX_IG_DEVICE | QI_CTX_DID(did) |
+    QI_CTX_SID(sid) | QI_CTX_FM(fm);
+ break;
+ }
+ iommu_issue_qi(iommu, &qi);
+}
+
+/* Intel: Flush write buffers */
+void
+iommu_flush_write_buffer(struct iommu_softc *iommu)
+{
+ int i, sts;
+
+ if (iommu->dte)
+ return;
+ if (!(iommu->cap & CAP_RWBF))
+ return;
+ printf("writebuf\n");
+ iommu_write_4(iommu, DMAR_GCMD_REG, iommu->gcmd | GCMD_WBF);
+ for (i = 0; i < 5; i++) {
+ sts = iommu_read_4(iommu, DMAR_GSTS_REG);
+ if (sts & GSTS_WBFS)
+ break;
+ delay(10000);
+ }
+ if (i == 5) {
+ printf("write buffer flush fails\n");
+ }
+}
+
+void
+iommu_flush_cache(struct iommu_softc *iommu, void *addr, size_t size)
+{
+ if (iommu->dte) {
+ pmap_flush_cache((vaddr_t)addr, size);
+ return;
+ }
+ if (!(iommu->ecap & ECAP_C))
+ pmap_flush_cache((vaddr_t)addr, size);
+}
+
+/*
+ * Intel: Flush IOMMU TLB Entries
+ * Flushing can occur globally, per domain or per page
+ */
+void
+iommu_flush_tlb(struct iommu_softc *iommu, int mode, int did)
+{
+ int n;
+ uint64_t val;
+
+ /* Call AMD */
+ if (iommu->dte) {
+ ivhd_invalidate_domain(iommu, did);
+ //ivhd_poll_events(iommu);
+ return;
+ }
+ val = IOTLB_IVT;
+ switch (mode) {
+ case IOTLB_GLOBAL:
+ val |= IIG_GLOBAL;
+ break;
+ case IOTLB_DOMAIN:
+ val |= IIG_DOMAIN | IOTLB_DID(did);
+ break;
+ case IOTLB_PAGE:
+ val |= IIG_PAGE | IOTLB_DID(did);
+ break;
+ }
+
+ /* Check for Read/Write Drain */
+ if (iommu->cap & CAP_DRD)
+ val |= IOTLB_DR;
+ if (iommu->cap & CAP_DWD)
+ val |= IOTLB_DW;
+
+ mtx_enter(&iommu->reg_lock);
+
+ iommu_write_8(iommu, DMAR_IOTLB_REG(iommu), val);
+ n = 0;
+ do {
+ val = iommu_read_8(iommu, DMAR_IOTLB_REG(iommu));
+ } while (n++ < 5 && val & IOTLB_IVT);
+
+ mtx_leave(&iommu->reg_lock);
+
+#ifdef DEBUG
+ {
+ static int rg;
+ int a, r;
+
+ if (!rg) {
+ a = (val >> IOTLB_IAIG_SHIFT) & IOTLB_IAIG_MASK;
+ r = (val >> IOTLB_IIRG_SHIFT) & IOTLB_IIRG_MASK;
+ if (a != r) {
+ printf("TLB Requested:%d Actual:%d\n", r, a);
+ rg = 1;
+ }
+ }
+ }
+#endif
+}
+
+/* Intel: Flush IOMMU settings
+ * Flushes can occur globally, per domain, or per device
+ */
+void
+iommu_flush_ctx(struct iommu_softc *iommu, int mode, int did, int sid, int fm)
+{
+ uint64_t val;
+ int n;
+
+ if (iommu->dte)
+ return;
+ val = CCMD_ICC;
+ switch (mode) {
+ case CTX_GLOBAL:
+ val |= CIG_GLOBAL;
+ break;
+ case CTX_DOMAIN:
+ val |= CIG_DOMAIN | CCMD_DID(did);
+ break;
+ case CTX_DEVICE:
+ val |= CIG_DEVICE | CCMD_DID(did) |
+    CCMD_SID(sid) | CCMD_FM(fm);
+ break;
+ }
+
+ mtx_enter(&iommu->reg_lock);
+
+ n = 0;
+ iommu_write_8(iommu, DMAR_CCMD_REG, val);
+ do {
+ val = iommu_read_8(iommu, DMAR_CCMD_REG);
+ } while (n++ < 5 && val & CCMD_ICC);
+
+ mtx_leave(&iommu->reg_lock);
+
+#ifdef DEBUG
+ {
+ static int rg;
+ int a, r;
+
+ if (!rg) {
+ a = (val >> CCMD_CAIG_SHIFT) & CCMD_CAIG_MASK;
+ r = (val >> CCMD_CIRG_SHIFT) & CCMD_CIRG_MASK;
+ if (a != r) {
+ printf("CTX Requested:%d Actual:%d\n", r, a);
+ rg = 1;
+ }
+ }
+ }
+#endif
+}
+
+/* Intel: Enable Queued Invalidation */
+void
+iommu_enable_qi(struct iommu_softc *iommu, int enable)
+{
+ int n = 0;
+ int sts;
+
+ if (!(iommu->ecap & ECAP_QI))
+ return;
+
+ if (enable) {
+ iommu->gcmd |= GCMD_QIE;
+
+ mtx_enter(&iommu->reg_lock);
+
+ iommu_write_4(iommu, DMAR_GCMD_REG, iommu->gcmd);
+ do {
+ sts = iommu_read_4(iommu, DMAR_GSTS_REG);
+ } while (n++ < 5 && !(sts & GSTS_QIES));
+
+ mtx_leave(&iommu->reg_lock);
+
+ printf("set.qie: %d\n", n);
+ } else {
+ iommu->gcmd &= ~GCMD_QIE;
+
+ mtx_enter(&iommu->reg_lock);
+
+ iommu_write_4(iommu, DMAR_GCMD_REG, iommu->gcmd);
+ do {
+ sts = iommu_read_4(iommu, DMAR_GSTS_REG);
+ } while (n++ < 5 && sts & GSTS_QIES);
+
+ mtx_leave(&iommu->reg_lock);
+
+ printf("clr.qie: %d\n", n);
+ }
+}
+
+/* Intel: Enable IOMMU translation */
+int
+iommu_enable_translation(struct iommu_softc *iommu, int enable)
+{
+ uint32_t sts;
+ uint64_t reg;
+ int n = 0;
+
+ if (iommu->dte)
+ return (0);
+ reg = 0;
+ if (enable) {
+ printf("enable iommu %d\n", iommu->id);
+ iommu_showcfg(iommu, -1);
+
+ iommu->gcmd |= GCMD_TE;
+
+ /* Enable translation */
+ printf(" pre tes: ");
+
+ mtx_enter(&iommu->reg_lock);
+ iommu_write_4(iommu, DMAR_GCMD_REG, iommu->gcmd);
+ printf("xxx");
+ do {
+ printf("yyy");
+ sts = iommu_read_4(iommu, DMAR_GSTS_REG);
+ delay(n * 10000);
+ } while (n++ < 5 && !(sts & GSTS_TES));
+ mtx_leave(&iommu->reg_lock);
+
+ printf(" set.tes: %d\n", n);
+
+ if (n >= 5) {
+ printf("error.. unable to initialize iommu %d\n",
+    iommu->id);
+ iommu->flags |= IOMMU_FLAGS_BAD;
+
+ /* Disable IOMMU */
+ iommu->gcmd &= ~GCMD_TE;
+ mtx_enter(&iommu->reg_lock);
+ iommu_write_4(iommu, DMAR_GCMD_REG, iommu->gcmd);
+ mtx_leave(&iommu->reg_lock);
+
+ return (1);
+ }
+
+ iommu_flush_ctx(iommu, CTX_GLOBAL, 0, 0, 0);
+ iommu_flush_tlb(iommu, IOTLB_GLOBAL, 0);
+ } else {
+ iommu->gcmd &= ~GCMD_TE;
+
+ mtx_enter(&iommu->reg_lock);
+
+ iommu_write_4(iommu, DMAR_GCMD_REG, iommu->gcmd);
+ do {
+ sts = iommu_read_4(iommu, DMAR_GSTS_REG);
+ } while (n++ < 5 && sts & GSTS_TES);
+ mtx_leave(&iommu->reg_lock);
+
+ printf(" clr.tes: %d\n", n);
+ }
+
+ return (0);
+}
+
+/* Intel: Initialize IOMMU */
+int
+iommu_init(struct acpidmar_softc *sc, struct iommu_softc *iommu,
+    struct acpidmar_drhd *dh)
+{
+ static int niommu;
+ int len = VTD_PAGE_SIZE;
+ int i, gaw;
+ uint32_t sts;
+ paddr_t paddr;
+
+ if (_bus_space_map(sc->sc_memt, dh->address, len, 0, &iommu->ioh) != 0) {
+ return (-1);
+ }
+
+ TAILQ_INIT(&iommu->domains);
+ iommu->id = ++niommu;
+ iommu->flags = dh->flags;
+ iommu->segment = dh->segment;
+ iommu->iot = sc->sc_memt;
+
+ iommu->cap = iommu_read_8(iommu, DMAR_CAP_REG);
+ iommu->ecap = iommu_read_8(iommu, DMAR_ECAP_REG);
+ iommu->ndoms = cap_nd(iommu->cap);
+
+ printf("  caps: %s%s%s%s%s%s%s%s%s%s%s\n",
+    iommu->cap & CAP_AFL ? "afl " : "", // adv fault
+    iommu->cap & CAP_RWBF ? "rwbf " : "", // write-buffer flush
+    iommu->cap & CAP_PLMR ? "plmr " : "", // protected lo region
+    iommu->cap & CAP_PHMR ? "phmr " : "", // protected hi region
+    iommu->cap & CAP_CM ? "cm " : "", // caching mode
+    iommu->cap & CAP_ZLR ? "zlr " : "", // zero-length read
+    iommu->cap & CAP_PSI ? "psi " : "", // page invalidate
+    iommu->cap & CAP_DWD ? "dwd " : "", // write drain
+    iommu->cap & CAP_DRD ? "drd " : "", // read drain
+    iommu->cap & CAP_FL1GP ? "Gb " : "", // 1Gb pages
+    iommu->cap & CAP_PI ? "pi " : ""); // posted interrupts
+ printf("  ecap: %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
+    iommu->ecap & ECAP_C ? "c " : "", // coherent
+    iommu->ecap & ECAP_QI ? "qi " : "", // queued invalidate
+    iommu->ecap & ECAP_DT ? "dt " : "", // device iotlb
+    iommu->ecap & ECAP_IR ? "ir " : "", // intr remap
+    iommu->ecap & ECAP_EIM ? "eim " : "", // x2apic
+    iommu->ecap & ECAP_PT ? "pt " : "", // passthrough
+    iommu->ecap & ECAP_SC ? "sc " : "", // snoop control
+    iommu->ecap & ECAP_ECS ? "ecs " : "", // extended context
+    iommu->ecap & ECAP_MTS ? "mts " : "", // memory type
+    iommu->ecap & ECAP_NEST ? "nest " : "", // nested translations
+    iommu->ecap & ECAP_DIS ? "dis " : "", // deferred invalidation
+    iommu->ecap & ECAP_PASID ? "pas " : "", // pasid
+    iommu->ecap & ECAP_PRS ? "prs " : "", // page request
+    iommu->ecap & ECAP_ERS ? "ers " : "", // execute request
+    iommu->ecap & ECAP_SRS ? "srs " : "", // supervisor request
+    iommu->ecap & ECAP_NWFS ? "nwfs " : "", // no write flag
+    iommu->ecap & ECAP_EAFS ? "eafs " : ""); // extended accessed flag
+
+ mtx_init(&iommu->reg_lock, IPL_HIGH);
+
+ /* Clear Interrupt Masking */
+ iommu_write_4(iommu, DMAR_FSTS_REG, FSTS_PFO | FSTS_PPF);
+
+ iommu->intr = acpidmar_intr_establish(iommu, IPL_HIGH,
+    acpidmar_intr, iommu, "dmarintr");
+
+ /* Enable interrupts */
+ sts = iommu_read_4(iommu, DMAR_FECTL_REG);
+ iommu_write_4(iommu, DMAR_FECTL_REG, sts & ~FECTL_IM);
+
+ /* Allocate root pointer */
+ iommu->root = iommu_alloc_page(iommu, &paddr);
+#ifdef DEBUG
+ printf("Allocated root pointer: pa:%.16llx va:%p\n",
+    (uint64_t)paddr, iommu->root);
+#endif
+ iommu->rtaddr = paddr;
+ iommu_flush_write_buffer(iommu);
+ iommu_set_rtaddr(iommu, paddr);
+
+#if 0
+ if (iommu->ecap & ECAP_QI) {
+ /* Queued Invalidation support */
+ iommu->qi = iommu_alloc_page(iommu, &iommu->qip);
+ iommu_write_8(iommu, DMAR_IQT_REG, 0);
+ iommu_write_8(iommu, DMAR_IQA_REG, iommu->qip | IQA_QS_256);
+ }
+ if (iommu->ecap & ECAP_IR) {
+ /* Interrupt remapping support */
+ iommu_write_8(iommu, DMAR_IRTA_REG, 0);
+ }
+#endif
+
+ /* Calculate guest address width and supported guest widths */
+ gaw = -1;
+ iommu->mgaw = cap_mgaw(iommu->cap);
+ printf("gaw: %d { ", iommu->mgaw);
+ for (i = 0; i < 5; i++) {
+ if (cap_sagaw(iommu->cap) & (1L << i)) {
+ gaw = VTD_LEVELTOAW(i);
+ printf("%d ", gaw);
+ iommu->agaw = gaw;
+ }
+ }
+ printf("}\n");
+
+ /* Cache current status register bits */
+ sts = iommu_read_4(iommu, DMAR_GSTS_REG);
+ if (sts & GSTS_TES)
+ iommu->gcmd |= GCMD_TE;
+ if (sts & GSTS_QIES)
+ iommu->gcmd |= GCMD_QIE;
+ if (sts & GSTS_IRES)
+ iommu->gcmd |= GCMD_IRE;
+ if (iommu->gcmd) {
+ printf("gcmd: %x preset\n", iommu->gcmd);
+ }
+ acpidmar_intr(iommu);
+ return (0);
+}
+
+const char *dmar_rn(int reg);
+
+const char *
+dmar_rn(int reg)
+{
+ switch (reg) {
+ case EVT_HEAD_REG: return "evthead";
+ case EVT_TAIL_REG: return "evttail";
+ case CMD_HEAD_REG: return "cmdhead";
+ case CMD_TAIL_REG: return "cmdtail";
+ case CMD_BASE_REG: return "cmdbase";
+ case EVT_BASE_REG: return "evtbase";
+ case DEV_TAB_BASE_REG: return "devtblbase";
+ case IOMMUCTL_REG: return "iommuctl";
+#if 0
+ case DMAR_VER_REG: return "ver";
+ case DMAR_CAP_REG: return "cap";
+ case DMAR_ECAP_REG: return "ecap";
+ case DMAR_GSTS_REG: return "gsts";
+ case DMAR_GCMD_REG: return "gcmd";
+ case DMAR_FSTS_REG: return "fsts";
+ case DMAR_FECTL_REG: return "fectl";
+ case DMAR_RTADDR_REG: return "rtaddr";
+ case DMAR_FEDATA_REG: return "fedata";
+ case DMAR_FEADDR_REG: return "feaddr";
+ case DMAR_FEUADDR_REG: return "feuaddr";
+ case DMAR_PMEN_REG: return "pmen";
+ case DMAR_IEDATA_REG: return "iedata";
+ case DMAR_IEADDR_REG: return "ieaddr";
+ case DMAR_IEUADDR_REG: return "ieuaddr";
+ case DMAR_IRTA_REG: return "irta";
+ case DMAR_CCMD_REG: return "ccmd";
+ case DMAR_IQH_REG: return "iqh";
+ case DMAR_IQT_REG: return "iqt";
+ case DMAR_IQA_REG: return "iqa";
+#endif
+ }
+ return "unknown";
+}
+
+/* Read/Write IOMMU register */
+uint32_t
+iommu_read_4(struct iommu_softc *iommu, int reg)
+{
+ uint32_t v;
+
+ v = bus_space_read_4(iommu->iot, iommu->ioh, reg);
+ if (reg < 00) {
+ printf("iommu%d: read %x %.8lx [%s]\n",
+    iommu->id, reg, (unsigned long)v, dmar_rn(reg));
+ }
+
+ return (v);
+}
+
+
+#define dbprintf(x...)
+
+void
+iommu_write_4(struct iommu_softc *iommu, int reg, uint32_t v)
+{
+ dbprintf("iommu%d: write %.8x %.16lx [%s]\n",
+    iommu->id, reg, (unsigned long)v, dmar_rn(reg));
+ bus_space_write_4(iommu->iot, iommu->ioh, reg, (uint32_t)v);
+}
+
+uint64_t
+iommu_read_8(struct iommu_softc *iommu, int reg)
+{
+ uint64_t v;
+
+ v = bus_space_read_8(iommu->iot, iommu->ioh, reg);
+ if (reg < 00) {
+ printf("iommu%d: read %x %.8lx [%s]\n",
+    iommu->id, reg, (unsigned long)v, dmar_rn(reg));
+ }
+
+ return (v);
+}
+
+void
+iommu_write_8(struct iommu_softc *iommu, int reg, uint64_t v)
+{
+ dbprintf("iommu%d: write %.8x %.16lx [%s]\n",
+    iommu->id, reg, (unsigned long)v, dmar_rn(reg));
+ bus_space_write_8(iommu->iot, iommu->ioh, reg, v);
+}
+
+/* Check if a device is within a device scope */
+int
+acpidmar_match_devscope(struct devlist_head *devlist, pci_chipset_tag_t pc,
+    int sid)
+{
+ struct dmar_devlist *ds;
+ int sub, sec, i;
+ int bus, dev, fun, sbus;
+ pcireg_t reg;
+ pcitag_t tag;
+
+ sbus = sid_bus(sid);
+ TAILQ_FOREACH(ds, devlist, link) {
+ bus = ds->bus;
+ dev = ds->dp[0].device;
+ fun = ds->dp[0].function;
+ /* Walk PCI bridges in path */
+ for (i = 1; i < ds->ndp; i++) {
+ tag = pci_make_tag(pc, bus, dev, fun);
+ reg = pci_conf_read(pc, tag, PPB_REG_BUSINFO);
+ bus = PPB_BUSINFO_SECONDARY(reg);
+ dev = ds->dp[i].device;
+ fun = ds->dp[i].function;
+ }
+
+ /* Check for device exact match */
+ if (sid == mksid(bus, dev, fun)) {
+ return DMAR_ENDPOINT;
+ }
+
+ /* Check for device subtree match */
+ if (ds->type == DMAR_BRIDGE) {
+ tag = pci_make_tag(pc, bus, dev, fun);
+ reg = pci_conf_read(pc, tag, PPB_REG_BUSINFO);
+ sec = PPB_BUSINFO_SECONDARY(reg);
+ sub = PPB_BUSINFO_SUBORDINATE(reg);
+ if (sec <= sbus && sbus <= sub) {
+ return DMAR_BRIDGE;
+ }
+ }
+ }
+
+ return (0);
+}
+
+struct domain *
+domain_create(struct iommu_softc *iommu, int did)
+{
+ struct domain *dom;
+ int gaw;
+
+ printf("iommu%d: create domain: %.4x\n", iommu->id, did);
+ dom = malloc(sizeof(*dom), M_DEVBUF, M_ZERO | M_WAITOK);
+ dom->did = did;
+ dom->iommu = iommu;
+ dom->pte = iommu_alloc_page(iommu, &dom->ptep);
+ TAILQ_INIT(&dom->devices);
+
+ /* Setup DMA */
+ dom->dmat._cookie = dom;
+ dom->dmat._dmamap_create    = dmar_dmamap_create; // nop
+ dom->dmat._dmamap_destroy   = dmar_dmamap_destroy; // nop
+ dom->dmat._dmamap_load      = dmar_dmamap_load; // lm
+ dom->dmat._dmamap_load_mbuf = dmar_dmamap_load_mbuf; // lm
+ dom->dmat._dmamap_load_uio  = dmar_dmamap_load_uio; // lm
+ dom->dmat._dmamap_load_raw  = dmar_dmamap_load_raw; // lm
+ dom->dmat._dmamap_unload    = dmar_dmamap_unload; // um
+ dom->dmat._dmamap_sync      = dmar_dmamap_sync; // lm
+ dom->dmat._dmamem_alloc     = dmar_dmamem_alloc; // nop
+ dom->dmat._dmamem_free      = dmar_dmamem_free; // nop
+ dom->dmat._dmamem_map       = dmar_dmamem_map; // nop
+ dom->dmat._dmamem_unmap     = dmar_dmamem_unmap; // nop
+ dom->dmat._dmamem_mmap      = dmar_dmamem_mmap;
+
+ snprintf(dom->exname, sizeof(dom->exname), "did:%x.%.4x",
+    iommu->id, dom->did);
+
+ /* Setup IOMMU address map */
+ gaw = min(iommu->agaw, iommu->mgaw);
+ dom->iovamap = extent_create(dom->exname, 1024*1024*16,
+    (1LL << gaw)-1,
+    M_DEVBUF, NULL, 0,
+    EX_WAITOK|EX_NOCOALESCE);
+
+ /* Zero out Interrupt region */
+ extent_alloc_region(dom->iovamap, 0xFEE00000L, 0x100000,
+    EX_WAITOK);
+ mtx_init(&dom->exlck, IPL_HIGH);
+
+ TAILQ_INSERT_TAIL(&iommu->domains, dom, link);
+
+ return dom;
+}
+
+void domain_add_device(struct domain *dom, int sid)
+{
+ struct domain_dev *ddev;
+
+ printf("add %s to iommu%d.%.4x\n", dmar_bdf(sid), dom->iommu->id, dom->did);
+ ddev = malloc(sizeof(*ddev), M_DEVBUF, M_ZERO | M_WAITOK);
+ ddev->sid = sid;
+ TAILQ_INSERT_TAIL(&dom->devices, ddev, link);
+
+ /* Should set context entry here?? */
+}
+
+void domain_remove_device(struct domain *dom, int sid)
+{
+ struct domain_dev *ddev, *tmp;
+
+ TAILQ_FOREACH_SAFE(ddev, &dom->devices, link, tmp) {
+ if (ddev->sid == sid) {
+ TAILQ_REMOVE(&dom->devices, ddev, link);
+ free(ddev, sizeof(*ddev), M_DEVBUF);
+ }
+ }
+}
+
+/* Lookup domain by segment & source id (bus.device.function) */
+struct domain *
+domain_lookup(struct acpidmar_softc *sc, int segment, int sid)
+{
+ struct iommu_softc *iommu;
+ struct domain_dev *ddev;
+ struct domain *dom;
+ int rc;
+
+ if (sc == NULL) {
+ return NULL;
+ }
+
+ /* Lookup IOMMU for this device */
+ TAILQ_FOREACH(iommu, &sc->sc_drhds, link) {
+ if (iommu->segment != segment)
+ continue;
+ /* Check for devscope match or catchall iommu */
+ rc = acpidmar_match_devscope(&iommu->devices, sc->sc_pc, sid);
+ if (rc != 0 || iommu->flags) {
+ break;
+ }
+ }
+ if (!iommu) {
+ printf("%s: no iommu found\n", dmar_bdf(sid));
+ return NULL;
+ }
+
+ //acpidmar_intr(iommu);
+
+ /* Search domain devices */
+ TAILQ_FOREACH(dom, &iommu->domains, link) {
+ TAILQ_FOREACH(ddev, &dom->devices, link) {
+ /* XXX: match all functions? */
+ if (ddev->sid == sid) {
+ return dom;
+ }
+ }
+ }
+ if (iommu->ndoms <= 2) {
+ /* Running out of domains.. create catchall domain */
+ if (!iommu->unity) {
+ iommu->unity = domain_create(iommu, 1);
+ }
+ dom = iommu->unity;
+ } else {
+ dom = domain_create(iommu, --iommu->ndoms);
+ }
+ if (!dom) {
+ printf("no domain here\n");
+ return NULL;
+ }
+
+ /* Add device to domain */
+ domain_add_device(dom, sid);
+
+ return dom;
+}
+
+/* Map Guest Pages into IOMMU */
+void  _iommu_map(void *dom, vaddr_t va, bus_addr_t gpa, bus_size_t len)
+{
+ bus_size_t i;
+ paddr_t hpa;
+
+ if (dom == NULL) {
+ return;
+ }
+ printf("Mapping dma: %lx = %lx/%lx\n", va, gpa, len);
+ for (i = 0; i < len; i += PAGE_SIZE) {
+ hpa = 0;
+ pmap_extract(curproc->p_vmspace->vm_map.pmap, va, &hpa);
+ domain_map_page(dom, gpa, hpa, PTE_P | PTE_R | PTE_W);
+ gpa += PAGE_SIZE;
+ va  += PAGE_SIZE;
+ }
+}
+
+/* Find IOMMU for a given PCI device */
+void *_iommu_domain(int segment, int bus, int dev, int func, int *id)
+{
+ struct domain *dom;
+
+ dom = domain_lookup(acpidmar_sc, segment, mksid(bus, dev, func));
+ if (dom) {
+ *id = dom->did;
+ }
+ return dom;
+}
+
+void domain_map_device(struct domain *dom, int sid);
+
+void
+domain_map_device(struct domain *dom, int sid)
+{
+ struct iommu_softc *iommu;
+ struct context_entry *ctx;
+ paddr_t paddr;
+ int bus, devfn;
+ int tt, lvl;
+
+ iommu = dom->iommu;
+
+ bus = sid_bus(sid);
+ devfn = sid_devfn(sid);
+ /* AMD attach device */
+ if (iommu->dte) {
+ struct ivhd_dte *dte = &iommu->dte[sid];
+ if (!dte->dw0) {
+ /* Setup Device Table Entry: bus.devfn */
+ printf("@@@ PCI Attach: %.4x[%s] %.4x\n", sid, dmar_bdf(sid), dom->did);
+ dte_set_host_page_table_root_ptr(dte, dom->ptep);
+ dte_set_domain(dte, dom->did);
+ dte_set_mode(dte, 3);  // Set 4 level PTE
+ dte_set_tv(dte);
+ dte_set_valid(dte);
+ ivhd_flush_devtab(iommu, dom->did);
+ //ivhd_showit(iommu);
+ ivhd_showdte(iommu);
+ }
+ //ivhd_poll_events(iommu);
+ return;
+ }
+
+ /* Create Bus mapping */
+ if (!root_entry_is_valid(&iommu->root[bus])) {
+ iommu->ctx[bus] = iommu_alloc_page(iommu, &paddr);
+ iommu->root[bus].lo = paddr | ROOT_P;
+ iommu_flush_cache(iommu, &iommu->root[bus],
+    sizeof(struct root_entry));
+ dprintf("iommu%d: Allocate context for bus: %.2x pa:%.16llx va:%p\n",
+    iommu->id, bus, (uint64_t)paddr,
+    iommu->ctx[bus]);
+ }
+
+ /* Create DevFn mapping */
+ ctx = iommu->ctx[bus] + devfn;
+ if (!context_entry_is_valid(ctx)) {
+ tt = CTX_T_MULTI;
+ lvl = VTD_AWTOLEVEL(iommu->agaw);
+
+ /* Initialize context */
+ context_set_slpte(ctx, dom->ptep);
+ context_set_translation_type(ctx, tt);
+ context_set_domain_id(ctx, dom->did);
+ context_set_address_width(ctx, lvl);
+ context_set_present(ctx);
+
+ /* Flush it */
+ iommu_flush_cache(iommu, ctx, sizeof(struct context_entry));
+ if ((iommu->cap & CAP_CM) || force_cm) {
+ iommu_flush_ctx(iommu, CTX_DEVICE, dom->did, sid, 0);
+ iommu_flush_tlb(iommu, IOTLB_GLOBAL, 0);
+ } else {
+ iommu_flush_write_buffer(iommu);
+ }
+ dprintf("iommu%d: %s set context ptep:%.16llx lvl:%d did:%.4x tt:%d\n",
+    iommu->id, dmar_bdf(sid), (uint64_t)dom->ptep, lvl,
+    dom->did, tt);
+ }
+}
+
+struct domain *
+acpidmar_pci_attach(struct acpidmar_softc *sc, int segment, int sid, int mapctx)
+{
+ static struct domain *dom;
+
+ dom = domain_lookup(sc, segment, sid);
+ if (!dom) {
+ printf("no domain: %s\n", dmar_bdf(sid));
+ return NULL;
+ }
+
+ if (mapctx) {
+ domain_map_device(dom, sid);
+ }
+
+ return dom;
+}
+
+int ismap(int bus, int dev, int fun) {
+ return 1;
+}
+
+void
+acpidmar_pci_hook(pci_chipset_tag_t pc, struct pci_attach_args *pa)
+{
+ int bus, dev, fun, sid;
+ struct domain *dom;
+ pcireg_t reg;
+
+ if (!acpidmar_sc) {
+ /* No DMAR, ignore */
+ return;
+ }
+
+ /* Add device to our list */
+ pci_decompose_tag(pc, pa->pa_tag, &bus, &dev, &fun);
+ sid = mksid(bus, dev, fun);
+ if (sid_flag[sid] & SID_INVALID)
+ return;
+
+ reg = pci_conf_read(pc, pa->pa_tag, PCI_CLASS_REG);
+#if 0
+ if (PCI_CLASS(reg) == PCI_CLASS_DISPLAY &&
+    PCI_SUBCLASS(reg) == PCI_SUBCLASS_DISPLAY_VGA) {
+ printf("dmar: %.4x:%.2x:%.2x.%x is VGA, ignoring\n",
+    pa->pa_domain, bus, dev, fun);
+ return;
+ }
+#endif
+ /* Add device to domain */
+ dom = acpidmar_pci_attach(acpidmar_sc, pa->pa_domain, sid, 0);
+ if (dom == NULL)
+ return;
+
+ if (PCI_CLASS(reg) == PCI_CLASS_DISPLAY &&
+    PCI_SUBCLASS(reg) == PCI_SUBCLASS_DISPLAY_VGA) {
+ dom->flag = DOM_NOMAP;
+ }
+ if (PCI_CLASS(reg) == PCI_CLASS_BRIDGE &&
+    PCI_SUBCLASS(reg) == PCI_SUBCLASS_BRIDGE_ISA) {
+ /* For ISA Bridges, map 0-16Mb as 1:1 */
+ printf("dmar: %.4x:%.2x:%.2x.%x mapping ISA\n",
+    pa->pa_domain, bus, dev, fun);
+ domain_map_pthru(dom, 0x00, 16*1024*1024);
+ }
+
+ /* Change DMA tag */
+ pa->pa_dmat = &dom->dmat;
+}
+
+/* Create list of device scope entries from ACPI table */
+void
+acpidmar_parse_devscope(union acpidmar_entry *de, int off, int segment,
+    struct devlist_head *devlist)
+{
+ struct acpidmar_devscope *ds;
+ struct dmar_devlist *d;
+ int dplen, i;
+
+ TAILQ_INIT(devlist);
+ while (off < de->length) {
+ ds = (struct acpidmar_devscope *)((unsigned char *)de + off);
+ off += ds->length;
+
+ /* We only care about bridges and endpoints */
+ if (ds->type != DMAR_ENDPOINT && ds->type != DMAR_BRIDGE)
+ continue;
+
+ dplen = ds->length - sizeof(*ds);
+ d = malloc(sizeof(*d) + dplen, M_DEVBUF, M_ZERO | M_WAITOK);
+ d->bus  = ds->bus;
+ d->type = ds->type;
+ d->ndp  = dplen / 2;
+ d->dp   = (void *)&d[1];
+ memcpy(d->dp, &ds[1], dplen);
+ TAILQ_INSERT_TAIL(devlist, d, link);
+
+ printf("  %8s  %.4x:%.2x.%.2x.%x {",
+    ds->type == DMAR_BRIDGE ? "bridge" : "endpoint",
+    segment, ds->bus,
+    d->dp[0].device,
+    d->dp[0].function);
+
+ for (i = 1; i < d->ndp; i++) {
+ printf(" %2x.%x ",
+    d->dp[i].device,
+    d->dp[i].function);
+ }
+ printf("}\n");
+ }
+}
+
+/* DMA Remapping Hardware Unit */
+void
+acpidmar_drhd(struct acpidmar_softc *sc, union acpidmar_entry *de)
+{
+ struct iommu_softc *iommu;
+
+ printf("DRHD: segment:%.4x base:%.16llx flags:%.2x\n",
+    de->drhd.segment,
+    de->drhd.address,
+    de->drhd.flags);
+ iommu = malloc(sizeof(*iommu), M_DEVBUF, M_ZERO | M_WAITOK);
+ acpidmar_parse_devscope(de, sizeof(de->drhd), de->drhd.segment,
+    &iommu->devices);
+ iommu_init(sc, iommu, &de->drhd);
+
+ if (de->drhd.flags) {
+ /* Catchall IOMMU goes at end of list */
+ TAILQ_INSERT_TAIL(&sc->sc_drhds, iommu, link);
+ } else {
+ TAILQ_INSERT_HEAD(&sc->sc_drhds, iommu, link);
+ }
+}
+
+/* Reserved Memory Region Reporting */
+void
+acpidmar_rmrr(struct acpidmar_softc *sc, union acpidmar_entry *de)
+{
+ struct rmrr_softc *rmrr;
+ bios_memmap_t *im, *jm;
+ uint64_t start, end;
+
+ printf("RMRR: segment:%.4x range:%.16llx-%.16llx\n",
+    de->rmrr.segment, de->rmrr.base, de->rmrr.limit);
+ if (de->rmrr.limit <= de->rmrr.base) {
+ printf("  buggy BIOS\n");
+ return;
+ }
+
+ rmrr = malloc(sizeof(*rmrr), M_DEVBUF, M_ZERO | M_WAITOK);
+ rmrr->start = trunc_page(de->rmrr.base);
+ rmrr->end = round_page(de->rmrr.limit);
+ rmrr->segment = de->rmrr.segment;
+ acpidmar_parse_devscope(de, sizeof(de->rmrr), de->rmrr.segment,
+    &rmrr->devices);
+
+ for (im = bios_memmap; im->type != BIOS_MAP_END; im++) {
+ if (im->type != BIOS_MAP_RES)
+ continue;
+ /* Search for adjacent reserved regions */
+ start = im->addr;
+ end   = im->addr+im->size;
+ for (jm = im+1; jm->type == BIOS_MAP_RES && end == jm->addr;
+    jm++) {
+ end = jm->addr+jm->size;
+ }
+ printf("e820: %.16llx - %.16llx\n", start, end);
+ if (start <= rmrr->start && rmrr->end <= end) {
+ /* Bah.. some buggy BIOS stomp outside RMRR */
+ printf("  ** inside E820 Reserved %.16llx %.16llx\n",
+    start, end);
+ rmrr->start = trunc_page(start);
+ rmrr->end   = round_page(end);
+ break;
+ }
+ }
+ TAILQ_INSERT_TAIL(&sc->sc_rmrrs, rmrr, link);
+}
+
+/* Root Port ATS Reporting */
+void
+acpidmar_atsr(struct acpidmar_softc *sc, union acpidmar_entry *de)
+{
+ struct atsr_softc *atsr;
+
+ printf("ATSR: segment:%.4x flags:%x\n",
+    de->atsr.segment,
+    de->atsr.flags);
+
+ atsr = malloc(sizeof(*atsr), M_DEVBUF, M_ZERO | M_WAITOK);
+ atsr->flags = de->atsr.flags;
+ atsr->segment = de->atsr.segment;
+ acpidmar_parse_devscope(de, sizeof(de->atsr), de->atsr.segment,
+    &atsr->devices);
+
+ TAILQ_INSERT_TAIL(&sc->sc_atsrs, atsr, link);
+}
+
+void
+acpidmar_init(struct acpidmar_softc *sc, struct acpi_dmar *dmar)
+{
+ struct rmrr_softc *rmrr;
+ struct iommu_softc *iommu;
+ struct domain *dom;
+ struct dmar_devlist *dl;
+ union acpidmar_entry *de;
+ int off, sid, rc;
+
+ domain_map_page = domain_map_page_intel;
+ printf(": hardware width: %d, intr_remap:%d x2apic_opt_out:%d\n",
+    dmar->haw+1,
+    !!(dmar->flags & 0x1),
+    !!(dmar->flags & 0x2));
+ sc->sc_haw = dmar->haw+1;
+ sc->sc_flags = dmar->flags;
+
+ TAILQ_INIT(&sc->sc_drhds);
+ TAILQ_INIT(&sc->sc_rmrrs);
+ TAILQ_INIT(&sc->sc_atsrs);
+
+ off = sizeof(*dmar);
+ while (off < dmar->hdr.length) {
+ de = (union acpidmar_entry *)((unsigned char *)dmar + off);
+ switch (de->type) {
+ case DMAR_DRHD:
+ acpidmar_drhd(sc, de);
+ break;
+ case DMAR_RMRR:
+ acpidmar_rmrr(sc, de);
+ break;
+ case DMAR_ATSR:
+ acpidmar_atsr(sc, de);
+ break;
+ default:
+ printf("DMAR: unknown %x\n", de->type);
+ break;
+ }
+ off += de->length;
+ }
+
+ /* Pre-create domains for iommu devices */
+ TAILQ_FOREACH(iommu, &sc->sc_drhds, link) {
+ TAILQ_FOREACH(dl, &iommu->devices, link) {
+ sid = mksid(dl->bus, dl->dp[0].device,
+    dl->dp[0].function);
+ dom = acpidmar_pci_attach(sc, iommu->segment, sid, 0);
+ if (dom != NULL) {
+ printf("%.4x:%.2x:%.2x.%x iommu:%d did:%.4x\n",
+    iommu->segment, dl->bus, dl->dp[0].device, dl->dp[0].function,
+    iommu->id, dom->did);
+ }
+ }
+ }
+ /* Map passthrough pages for RMRR */
+ TAILQ_FOREACH(rmrr, &sc->sc_rmrrs, link) {
+ TAILQ_FOREACH(dl, &rmrr->devices, link) {
+ sid = mksid(dl->bus, dl->dp[0].device,
+    dl->dp[0].function);
+ dom = acpidmar_pci_attach(sc, rmrr->segment, sid, 0);
+ if (dom != NULL) {
+ printf("%s map ident: %.16llx %.16llx\n",
+    dom_bdf(dom), rmrr->start, rmrr->end);
+ domain_map_pthru(dom, rmrr->start, rmrr->end);
+ rc = extent_alloc_region(dom->iovamap,
+    rmrr->start, rmrr->end, EX_WAITOK);
+ }
+ }
+ }
+}
+
+
+/*=====================================================
+ * AMD Vi
+ *=====================================================*/
+void acpiivrs_ivhd(struct acpidmar_softc *, struct acpi_ivhd *);
+int ivhd_iommu_init(struct acpidmar_softc *, struct iommu_softc *,
+ struct acpi_ivhd *);
+void iommu_ivhd_add(struct iommu_softc *, int, int, int);
+int _ivhd_issue_command(struct iommu_softc *, const struct ivhd_command *);
+void ivhd_show_event(struct iommu_softc *, struct ivhd_event *evt, int);
+int ivhd_issue_command(struct iommu_softc *, const struct ivhd_command *, int);
+int ivhd_invalidate_domain(struct iommu_softc *, int);
+void ivhd_intr_map(struct iommu_softc *, int);
+
+int
+acpiivhd_intr(void *ctx)
+{
+ struct iommu_softc *iommu = ctx;
+
+ if (!iommu->dte)
+ return (0);
+ ivhd_poll_events(iommu);
+ return (1);
+}
+
+/* Setup interrupt for AMD */
+void
+ivhd_intr_map(struct iommu_softc *iommu, int devid) {
+ pci_intr_handle_t ih;
+
+ if (iommu->intr)
+ return;
+ ih.tag = pci_make_tag(NULL, sid_bus(devid), sid_dev(devid), sid_fun(devid));
+ ih.line = APIC_INT_VIA_MSG;
+ ih.pin = 0;
+ iommu->intr = pci_intr_establish(NULL, ih, IPL_NET | IPL_MPSAFE,
+ acpiivhd_intr, iommu, "amd_iommu");
+ printf("amd iommu intr: %p\n", iommu->intr);
+}
+
+void _dumppte(struct pte_entry *pte, int lvl, vaddr_t va)
+{
+ char *pfx[] = { "    ", "   ", "  ", " ", "" };
+ uint64_t i, sh;
+ struct pte_entry *npte;
+  
+ for (i = 0; i < 512; i++) {
+ sh = (i << (((lvl-1) * 9) + 12));
+ if (pte[i].val & PTE_P) {
+ if (lvl > 1) {
+ npte = (void *)PMAP_DIRECT_MAP((pte[i].val & PTE_PADDR_MASK));
+ printf("%slvl%d: %.16llx nxt:%llu\n", pfx[lvl], lvl,
+    pte[i].val, (pte[i].val >> 9) & 7);
+ _dumppte(npte, lvl-1, va | sh);
+ }
+ else {
+ printf("%slvl%d: %.16llx <- %.16llx \n", pfx[lvl], lvl,
+    pte[i].val, va | sh);
+ }
+ }
+ }
+}
+
+void ivhd_showpage(struct iommu_softc *iommu, int sid, paddr_t paddr)
+{
+ struct domain *dom;
+ static int show = 0;
+
+ if (show > 10)
+ return;
+ show++;
+ dom = acpidmar_pci_attach(acpidmar_sc, 0, sid, 0);
+ if (!dom)
+ return;
+ printf("DTE: %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
+ iommu->dte[sid].dw0,
+ iommu->dte[sid].dw1,
+ iommu->dte[sid].dw2,
+ iommu->dte[sid].dw3,
+ iommu->dte[sid].dw4,
+ iommu->dte[sid].dw5,
+ iommu->dte[sid].dw6,
+ iommu->dte[sid].dw7);
+ _dumppte(dom->pte, 3, 0);
+}
+
+/* Display AMD IOMMU Error */
+void
+ivhd_show_event(struct iommu_softc *iommu, struct ivhd_event *evt, int head)
+{
+ int type, sid, did, flag;
+ uint64_t address;
+
+ /* Get Device, Domain, Address and Type of event */
+ sid  = __EXTRACT(evt->dw0, EVT_SID);
+ type = __EXTRACT(evt->dw1, EVT_TYPE);
+ did  = __EXTRACT(evt->dw1, EVT_DID);
+ flag = __EXTRACT(evt->dw1, EVT_FLAG);
+ address = _get64(&evt->dw2);
+
+ printf("=== IOMMU Error[%.4x]: ", head);
+ switch (type) {
+ case ILLEGAL_DEV_TABLE_ENTRY: // ok
+ printf("illegal dev table entry dev=%s addr=0x%.16llx %s, %s, %s, %s\n",
+   dmar_bdf(sid), address,
+   evt->dw1 & EVT_TR ? "translation" : "transaction",
+   evt->dw1 & EVT_RZ ? "reserved bit" : "invalid level",
+   evt->dw1 & EVT_RW ? "write" : "read",
+   evt->dw1 & EVT_I  ? "interrupt" : "memory");
+ ivhd_showdte(iommu);
+ break;
+ case IO_PAGE_FAULT: // ok
+ printf("io page fault dev=%s did=0x%.4x addr=0x%.16llx\n%s, %s, %s, %s, %s, %s\n",
+   dmar_bdf(sid), did, address,
+   evt->dw1 & EVT_TR ? "translation" : "transaction",
+   evt->dw1 & EVT_RZ ? "reserved bit" : "invalid level",
+   evt->dw1 & EVT_PE ? "no perm" : "perm",
+   evt->dw1 & EVT_RW ? "write" : "read",
+   evt->dw1 & EVT_PR ? "present" : "not present",
+   evt->dw1 & EVT_I  ? "interrupt" : "memory");
+ ivhd_showdte(iommu);
+ ivhd_showpage(iommu, sid, address);
+ break;
+ case DEV_TAB_HARDWARE_ERROR: // ok
+ printf("device table hardware error dev=%s addr=0x%.16llx %s, %s, %s\n",
+    dmar_bdf(sid), address,
+   evt->dw1 & EVT_TR ? "translation" : "transaction",
+   evt->dw1 & EVT_RW ? "write" : "read",
+   evt->dw1 & EVT_I  ? "interrupt" : "memory");
+ ivhd_showdte(iommu);
+ break;
+ case PAGE_TAB_HARDWARE_ERROR:
+ printf("page table hardware error dev=%s addr=0x%.16llx %s, %s, %s\n",
+   dmar_bdf(sid), address,
+   evt->dw1 & EVT_TR ? "translation" : "transaction",
+   evt->dw1 & EVT_RW ? "write" : "read",
+   evt->dw1 & EVT_I  ? "interrupt" : "memory");
+ ivhd_showdte(iommu);
+ break;
+ case ILLEGAL_COMMAND_ERROR: // ok
+ printf("illegal command addr=0x%.16llx\n", address);
+ ivhd_showcmd(iommu);
+ break;
+ case COMMAND_HARDWARE_ERROR:
+ printf("command hardware error addr=0x%.16llx flag=0x%.4x\n",
+   address, flag);
+ ivhd_showcmd(iommu);
+ break;
+ case IOTLB_INV_TIMEOUT:
+ printf("iotlb invalidation timeout dev=%s address=0x%.16llx\n",
+   dmar_bdf(sid), address);
+ break;
+ case INVALID_DEVICE_REQUEST:
+ printf("invalid device request dev=%s addr=0x%.16llx flag=0x%.4x\n",
+   dmar_bdf(sid), address, flag);
+ break;
+ default:
+ printf("unknown type=0x%.2x\n", type);
+ break;
+ }
+ //ivhd_showdte(iommu);
+ /* Clear old event */
+ evt->dw0 = 0;
+ evt->dw1 = 0;
+ evt->dw2 = 0;
+ evt->dw3 = 0;
+}
+
+/* AMD: Process IOMMU error from hardware */
+int
+ivhd_poll_events(struct iommu_softc *iommu)
+{
+ uint32_t head, tail;
+ int sz;
+
+ sz = sizeof(struct ivhd_event);
+ head = iommu_read_4(iommu, EVT_HEAD_REG);
+ tail = iommu_read_4(iommu, EVT_TAIL_REG);
+ if (head == tail) {
+ /* No pending events */
+ return (0);
+ }
+ while (head != tail) {
+ ivhd_show_event(iommu, iommu->evt_tbl + head, head);
+ head = (head + sz) % EVT_TBL_SIZE;
+ }
+ iommu_write_4(iommu, EVT_HEAD_REG, head);
+ return (0);
+}
+
+/* AMD: Issue command to IOMMU queue */
+int
+_ivhd_issue_command(struct iommu_softc *iommu, const struct ivhd_command *cmd)
+{
+ u_long rf;
+ uint32_t head, tail, next;
+ int sz;
+
+ head = iommu_read_4(iommu, CMD_HEAD_REG);
+ sz = sizeof(*cmd);
+ rf = intr_disable();
+ tail = iommu_read_4(iommu, CMD_TAIL_REG);
+ next = (tail + sz) % CMD_TBL_SIZE;
+ if (next == head) {
+ printf("FULL\n");
+ /* Queue is full */
+ intr_restore(rf);
+ return -EBUSY;
+ }
+ memcpy(iommu->cmd_tbl + tail, cmd, sz);
+ iommu_write_4(iommu, CMD_TAIL_REG, next);
+ intr_restore(rf);
+ return (tail / sz);
+}
+
+#define IVHD_MAXDELAY 8
+
+int
+ivhd_issue_command(struct iommu_softc *iommu, const struct ivhd_command *cmd, int wait)
+{
+ struct ivhd_command wq = { 0 };
+ volatile uint64_t wv __aligned(16) = 0LL;
+ paddr_t paddr;
+ int rc, i;
+ static int mi;
+
+ rc = _ivhd_issue_command(iommu, cmd);
+ if (rc >= 0 && wait) {
+ /* Wait for previous commands to complete.
+ * Store address of completion variable to command */
+ pmap_extract(pmap_kernel(), (vaddr_t)&wv, &paddr);
+ wq.dw0 = (paddr & ~0xF) | 0x1;
+ wq.dw1 = (COMPLETION_WAIT << CMD_SHIFT) | ((paddr >> 32) & 0xFFFFF);
+ wq.dw2 = 0xDEADBEEF;
+ wq.dw3 = 0xFEEDC0DE;
+
+ rc = _ivhd_issue_command(iommu, &wq);
+ /* wv will change to value in dw2/dw3 when command is complete */
+ for (i = 0; i < IVHD_MAXDELAY && !wv; i++) {
+ DELAY(10 << i);
+ }
+ if (mi < i && mi != IVHD_MAXDELAY) {
+ printf("maxdel: %d\n", i);
+ mi = i;
+ }
+ if (i == IVHD_MAXDELAY) {
+ printf("ivhd command timeout: %.8x %.8x %.8x %.8x wv:%llx idx:%x\n",
+ cmd->dw0, cmd->dw1, cmd->dw2, cmd->dw3, wv, rc);
+ }
+ }
+ return rc;
+
+}
+
+/* AMD: Flush changes to Device Table Entry for a specific domain */
+int
+ivhd_flush_devtab(struct iommu_softc *iommu, int did)
+{
+ struct ivhd_command cmd = { .dw0 = did, .dw1 = INVALIDATE_DEVTAB_ENTRY << CMD_SHIFT };
+ return ivhd_issue_command(iommu, &cmd, 1);
+}
+
+/* AMD: Invalidate all IOMMU device and page tables */
+int
+ivhd_invalidate_iommu_all(struct iommu_softc *iommu)
+{
+ struct ivhd_command cmd = { .dw1 = INVALIDATE_IOMMU_ALL << CMD_SHIFT };
+ return ivhd_issue_command(iommu, &cmd, 0);
+}
+
+/* AMD: Invalidate interrupt remapping */
+int
+ivhd_invalidate_interrupt_table(struct iommu_softc *iommu, int did)
+{
+ struct ivhd_command cmd = { .dw0 = did, .dw1 = INVALIDATE_INTERRUPT_TABLE << CMD_SHIFT };
+ return ivhd_issue_command(iommu, &cmd, 0);
+}
+
+/* AMD: Invalidate all page tables in a domain */
+int
+ivhd_invalidate_domain(struct iommu_softc *iommu, int did)
+{
+ struct ivhd_command cmd = { .dw1 = did | (INVALIDATE_IOMMU_PAGES << CMD_SHIFT) };
+
+ cmd.dw2 = 0xFFFFF000 | 0x3;
+ cmd.dw3 = 0x7FFFFFFF;
+ return ivhd_issue_command(iommu, &cmd, 1);
+}
+
+/* AMD: Display Registers */
+void
+ivhd_showit(struct iommu_softc *iommu)
+{
+ printf("---- dt:%.16llx cmd:%.16llx evt:%.16llx ctl:%.16llx sts:%.16llx\n",
+ iommu_read_8(iommu, DEV_TAB_BASE_REG),
+ iommu_read_8(iommu, CMD_BASE_REG),
+ iommu_read_8(iommu, EVT_BASE_REG),
+ iommu_read_8(iommu, IOMMUCTL_REG),
+ iommu_read_8(iommu, IOMMUSTS_REG));
+ printf("---- cmd queue:%.16llx %.16llx evt queue:%.16llx %.16llx\n",
+ iommu_read_8(iommu, CMD_HEAD_REG),
+ iommu_read_8(iommu, CMD_TAIL_REG),
+ iommu_read_8(iommu, EVT_HEAD_REG),
+ iommu_read_8(iommu, EVT_TAIL_REG));
+}
+
+/* AMD: Generate Fake Errors to test event handler */
+void ivhd_checkerr(struct iommu_softc *iommu);
+void ivhd_checkerr(struct iommu_softc *iommu)
+{
+ struct ivhd_command cmd = { -1, -1, -1, -1 };
+
+ /* Generate ILLEGAL DEV TAB entry? */
+ iommu->dte[0x2303].dw0 = -1;      // invalid
+ iommu->dte[0x2303].dw2 = 0x1234;  // domain
+ iommu->dte[0x2303].dw7 = -1;      // reserved
+ ivhd_flush_devtab(iommu, 0x1234);
+ ivhd_poll_events(iommu);
+
+ /* Generate ILLEGAL_COMMAND_ERROR : ok */
+ ivhd_issue_command(iommu, &cmd, 0);
+ ivhd_poll_events(iommu);
+
+ /* Generate page hardware error */
+}
+
+/* AMD: Show Device Table Entry */
+void ivhd_showdte(struct iommu_softc *iommu)
+{
+ int i;
+
+ for (i = 0; i < 65536; i++) {
+ if (iommu->dte[i].dw0) {
+ printf("%.2x:%.2x.%x: %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
+ i >> 8, (i >> 3) & 0x1F, i & 0x7,
+ iommu->dte[i].dw0, iommu->dte[i].dw1,
+ iommu->dte[i].dw2, iommu->dte[i].dw3,
+ iommu->dte[i].dw4, iommu->dte[i].dw5,
+ iommu->dte[i].dw6, iommu->dte[i].dw7);
+ }
+ }
+}
+
+/* AMD: Show command entries */
+void ivhd_showcmd(struct iommu_softc *iommu)
+{
+ struct ivhd_command *ihd;
+ paddr_t phd;
+ int i;
+
+ ihd = iommu->cmd_tbl;
+ phd = iommu_read_8(iommu, CMD_BASE_REG) & CMD_BASE_MASK;
+ for (i = 0; i < 4096 / 128; i++) {
+ printf("%.2x: %.16llx %.8x %.8x %.8x %.8x\n", i,
+ (uint64_t)phd + i * sizeof(*ihd),
+ ihd[i].dw0,ihd[i].dw1,ihd[i].dw2,ihd[i].dw3);
+ }
+}
+
+#define _c(x) (int)((iommu->ecap >> x ##_SHIFT) & x ## _MASK)
+
+/* AMD: Initialize IOMMU */
+int
+ivhd_iommu_init(struct acpidmar_softc *sc, struct iommu_softc *iommu,
+ struct acpi_ivhd *ivhd)
+{
+ static int niommu;
+ paddr_t paddr;
+ uint64_t ov;
+
+ if (sc == NULL || iommu == NULL || ivhd == NULL) {
+ printf("Bad pointer to iommu_init!\n");
+ return -1;
+ }
+ if (_bus_space_map(sc->sc_memt, ivhd->address, 0x80000, 0, &iommu->ioh) != 0) {
+ printf("Bus Space Map fails\n");
+ return -1;
+ }
+ TAILQ_INIT(&iommu->domains);
+ TAILQ_INIT(&iommu->devices);
+
+ /* Setup address width and number of domains */
+ iommu->id = ++niommu;
+ iommu->iot = sc->sc_memt;
+ iommu->mgaw = 48;
+ iommu->agaw = 48;
+ iommu->flags = 1;
+ iommu->segment = 0;
+ iommu->ndoms = 256;
+
+ iommu->ecap = iommu_read_8(iommu, EXTFEAT_REG);
+ printf("ecap = %.16llx\n", iommu->ecap);
+ printf("%s%s%s%s%s%s%s%s\n",
+ iommu->ecap & EFR_PREFSUP ? "pref " : "",
+ iommu->ecap & EFR_PPRSUP  ? "ppr " : "",
+ iommu->ecap & EFR_NXSUP   ? "nx " : "",
+ iommu->ecap & EFR_GTSUP   ? "gt " : "",
+ iommu->ecap & EFR_IASUP   ? "ia " : "",
+ iommu->ecap & EFR_GASUP   ? "ga " : "",
+ iommu->ecap & EFR_HESUP   ? "he " : "",
+ iommu->ecap & EFR_PCSUP   ? "pc " : "");
+ printf("hats:%x gats:%x glxsup:%x smif:%x smifrc:%x gam:%x\n",
+ _c(EFR_HATS), _c(EFR_GATS), _c(EFR_GLXSUP), _c(EFR_SMIFSUP),
+ _c(EFR_SMIFRC), _c(EFR_GAMSUP));
+
+ /* Turn off iommu */
+ ov = iommu_read_8(iommu, IOMMUCTL_REG);
+ iommu_write_8(iommu, IOMMUCTL_REG, ov & ~(CTL_IOMMUEN | CTL_COHERENT |
+ CTL_HTTUNEN | CTL_RESPASSPW | CTL_PASSPW | CTL_ISOC));
+
+ /* Enable intr */
+ sid_flag[ivhd->devid] |= SID_INVALID;
+ ivhd_intr_map(iommu, ivhd->devid);
+
+ /* Setup command buffer with 4k buffer (128 entries) */
+ iommu->cmd_tbl = iommu_alloc_page(iommu, &paddr);
+ iommu_write_8(iommu, CMD_BASE_REG, (paddr & CMD_BASE_MASK) | CMD_TBL_LEN_4K);
+ iommu_write_4(iommu, CMD_HEAD_REG, 0x00);
+ iommu_write_4(iommu, CMD_TAIL_REG, 0x00);
+ iommu->cmd_tblp = paddr;
+
+ /* Setup event log with 4k buffer (128 entries) */
+ iommu->evt_tbl = iommu_alloc_page(iommu, &paddr);
+ iommu_write_8(iommu, EVT_BASE_REG, (paddr & EVT_BASE_MASK) | EVT_TBL_LEN_4K);
+ iommu_write_4(iommu, EVT_HEAD_REG, 0x00);
+ iommu_write_4(iommu, EVT_TAIL_REG, 0x00);
+ iommu->evt_tblp = paddr;
+
+ /* Setup device table
+ * 1 entry per source ID (bus:device:function - 64k entries)
+ */
+ iommu->dte = sc->sc_hwdte;
+ //pmap_extract(pmap_kernel(), (vaddr_t)iommu->dte, &paddr);
+ iommu_write_8(iommu, DEV_TAB_BASE_REG, (sc->sc_hwdtep & DEV_TAB_MASK) | DEV_TAB_LEN);
+
+ /* Enable IOMMU */
+ ov |= (CTL_IOMMUEN | CTL_EVENTLOGEN | CTL_CMDBUFEN | CTL_EVENTINTEN | CTL_COMWAITINTEN);
+ if (ivhd->flags & IVHD_COHERENT)
+ ov |= CTL_COHERENT;
+ if (ivhd->flags & IVHD_HTTUNEN)
+ ov |= CTL_HTTUNEN;
+ if (ivhd->flags & IVHD_RESPASSPW)
+ ov |= CTL_RESPASSPW;
+ if (ivhd->flags & IVHD_PASSPW)
+ ov |= CTL_PASSPW;
+ if (ivhd->flags & IVHD_ISOC)
+ ov |= CTL_ISOC;
+ ov &= ~(CTL_INVTIMEOUT_MASK << CTL_INVTIMEOUT_SHIFT);
+ ov |=  (CTL_INVTIMEOUT_1MS  << CTL_INVTIMEOUT_SHIFT);
+ iommu_write_8(iommu, IOMMUCTL_REG, ov);
+
+ ivhd_invalidate_iommu_all(iommu);
+ //ivhd_checkerr(iommu);
+
+ TAILQ_INSERT_TAIL(&sc->sc_drhds, iommu, link);
+ return 0;
+}
+
+void
+iommu_ivhd_add(struct iommu_softc *iommu, int start, int end, int cfg)
+{
+ struct ivhd_devlist *idev;
+
+ idev = malloc(sizeof(*idev), M_DEVBUF, M_ZERO | M_WAITOK);
+ idev->start_id = start;
+ idev->end_id = end;
+ idev->cfg = cfg;
+}
+
+void
+acpiivrs_ivhd(struct acpidmar_softc *sc, struct acpi_ivhd *ivhd)
+{
+ struct iommu_softc *iommu;
+ struct acpi_ivhd_ext *ext;
+ union acpi_ivhd_entry *ie;
+ int start, off, dte, all_dte = 0;
+
+ if (ivhd->type == IVRS_IVHD_EXT) {
+ ext = (struct acpi_ivhd_ext *)ivhd;
+ printf("ivhd: %.2x %.2x %.4x %.4x:%s %.4x %.16llx %.4x %.8x %.16llx\n",
+       ext->type, ext->flags, ext->length,
+             ext->segment, dmar_bdf(ext->devid), ext->cap,
+             ext->address, ext->info,
+       ext->attrib, ext->efr);
+ if (ext->flags & IVHD_PPRSUP)
+ printf(" PPRSup");
+ if (ext->flags & IVHD_PREFSUP)
+ printf(" PreFSup");
+ if (ext->flags & IVHD_COHERENT)
+ printf(" Coherent");
+ if (ext->flags & IVHD_IOTLB)
+ printf(" Iotlb");
+ if (ext->flags & IVHD_ISOC)
+ printf(" ISoc");
+ if (ext->flags & IVHD_RESPASSPW)
+ printf(" ResPassPW");
+ if (ext->flags & IVHD_PASSPW)
+ printf(" PassPW");
+ if (ext->flags & IVHD_HTTUNEN)
+ printf( " HtTunEn");
+ if (ext->flags)
+ printf("\n");
+ off = sizeof(*ext);
+ iommu = malloc(sizeof(*iommu), M_DEVBUF, M_ZERO|M_WAITOK);
+ ivhd_iommu_init(sc, iommu, ivhd);
+ } else {
+ printf("ivhd: %.2x %.2x %.4x %.4x:%s %.4x %.16llx %.4x %.8x\n",
+       ivhd->type, ivhd->flags, ivhd->length,
+             ivhd->segment, dmar_bdf(ivhd->devid), ivhd->cap,
+             ivhd->address, ivhd->info,
+       ivhd->feature);
+ if (ivhd->flags & IVHD_PPRSUP)
+ printf(" PPRSup");
+ if (ivhd->flags & IVHD_PREFSUP)
+ printf(" PreFSup");
+ if (ivhd->flags & IVHD_COHERENT)
+ printf(" Coherent");
+ if (ivhd->flags & IVHD_IOTLB)
+ printf(" Iotlb");
+ if (ivhd->flags & IVHD_ISOC)
+ printf(" ISoc");
+ if (ivhd->flags & IVHD_RESPASSPW)
+ printf(" ResPassPW");
+ if (ivhd->flags & IVHD_PASSPW)
+ printf(" PassPW");
+ if (ivhd->flags & IVHD_HTTUNEN)
+ printf( " HtTunEn");
+ if (ivhd->flags)
+ printf("\n");
+ off = sizeof(*ivhd);
+ }
+ while (off < ivhd->length) {
+ ie = (void *)ivhd + off;
+ switch (ie->type) {
+ case IVHD_ALL:
+ all_dte = ie->all.data;
+ printf(" ALL %.4x\n", dte);
+ off += sizeof(ie->all);
+ break;
+ case IVHD_SEL:
+ dte = ie->sel.data;
+ printf(" SELECT: %s %.4x\n", dmar_bdf(ie->sel.devid), dte);
+ off += sizeof(ie->sel);
+ break;
+ case IVHD_SOR:
+ dte = ie->sor.data;
+ start = ie->sor.devid;
+ printf(" SOR: %s %.4x\n", dmar_bdf(start), dte);
+ off += sizeof(ie->sor);
+ break;
+ case IVHD_EOR:
+ printf(" EOR: %s\n", dmar_bdf(ie->eor.devid));
+ off += sizeof(ie->eor);
+ break;
+ case IVHD_ALIAS_SEL:
+ dte = ie->alias.data;
+ printf(" ALIAS: src=%s: ", dmar_bdf(ie->alias.srcid));
+ printf(" %s %.4x\n", dmar_bdf(ie->alias.devid), dte);
+ off += sizeof(ie->alias);
+ break;
+ case IVHD_ALIAS_SOR:
+ dte = ie->alias.data;
+ printf(" ALIAS_SOR: %s %.4x ", dmar_bdf(ie->alias.devid), dte);
+ printf(" src=%s\n", dmar_bdf(ie->alias.srcid));
+ off += sizeof(ie->alias);
+ break;
+ case IVHD_EXT_SEL:
+ dte = ie->ext.data;
+ printf(" EXT SEL: %s %.4x %.8x\n", dmar_bdf(ie->ext.devid),
+ dte, ie->ext.extdata);
+ off += sizeof(ie->ext);
+ break;
+ case IVHD_EXT_SOR:
+ dte = ie->ext.data;
+ printf(" EXT SOR: %s %.4x %.8x\n", dmar_bdf(ie->ext.devid),
+       dte, ie->ext.extdata);
+ off += sizeof(ie->ext);
+ break;
+ case IVHD_SPECIAL:
+ printf(" SPECIAL\n");
+ off += sizeof(ie->special);
+ break;
+ default:
+ printf(" 2:unknown %x\n", ie->type);
+ off = ivhd->length;
+ break;
+ }
+ }
+}
+
+void
+acpiivrs_init(struct acpidmar_softc *sc, struct acpi_ivrs *ivrs)
+{
+ union acpi_ivrs_entry *ie;
+ int off;
+
+ if (!sc->sc_hwdte) {
+ sc->sc_hwdte = iommu_alloc_contig(sc, HWDTE_SIZE, &sc->sc_hwdtep);
+ if (sc->sc_hwdte == NULL)
+ panic("Can't allocate HWDTE!\n");
+ }
+
+ domain_map_page = domain_map_page_amd;
+ printf("IVRS Version: %d\n", ivrs->hdr.revision);
+ printf(" VA Size: %d\n", (ivrs->ivinfo >> IVRS_VASIZE_SHIFT) & IVRS_VASIZE_MASK);
+ printf(" PA Size: %d\n", (ivrs->ivinfo >> IVRS_PASIZE_SHIFT) & IVRS_PASIZE_MASK);
+
+ TAILQ_INIT(&sc->sc_drhds);
+ TAILQ_INIT(&sc->sc_rmrrs);
+ TAILQ_INIT(&sc->sc_atsrs);
+
+ printf("======== IVRS\n");
+ off = sizeof(*ivrs);
+ while (off < ivrs->hdr.length) {
+ ie = (void *)ivrs + off;
+ switch (ie->type) {
+ case IVRS_IVHD:
+ case IVRS_IVHD_EXT:
+ acpiivrs_ivhd(sc, &ie->ivhd);
+ break;
+ case IVRS_IVMD_ALL:
+ case IVRS_IVMD_SPECIFIED:
+ case IVRS_IVMD_RANGE:
+ printf("ivmd\n");
+ break;
+ default:
+ printf("1:unknown: %x\n", ie->type);
+ break;
+ }
+ off += ie->length;
+ }
+ printf("======== End IVRS\n");
+}
+
+static int
+acpiivhd_activate(struct iommu_softc *iommu, int act)
+{
+ switch (act) {
+ case DVACT_SUSPEND:
+ iommu->flags |= IOMMU_FLAGS_SUSPEND;
+ break;
+ case DVACT_RESUME:
+ iommu->flags &= ~IOMMU_FLAGS_SUSPEND;
+ break;
+ }
+ return (0);
+}
+
+int
+acpidmar_activate(struct device *self, int act)
+{
+ struct acpidmar_softc *sc = (struct acpidmar_softc *)self;
+ struct iommu_softc *iommu;
+
+ printf("called acpidmar_activate %d %p\n", act, sc);
+
+ if (sc == NULL) {
+ return (0);
+ }
+
+ switch (act) {
+ case DVACT_RESUME:
+ TAILQ_FOREACH(iommu, &sc->sc_drhds, link) {
+ printf("iommu%d resume\n", iommu->id);
+ if (iommu->dte) {
+ acpiivhd_activate(iommu, act);
+ continue;
+ }
+ iommu_flush_write_buffer(iommu);
+ iommu_set_rtaddr(iommu, iommu->rtaddr);
+ iommu_write_4(iommu, DMAR_FEDATA_REG, iommu->fedata);
+ iommu_write_4(iommu, DMAR_FEADDR_REG, iommu->feaddr);
+ iommu_write_4(iommu, DMAR_FEUADDR_REG,
+    iommu->feaddr >> 32);
+ if ((iommu->flags & (IOMMU_FLAGS_BAD|IOMMU_FLAGS_SUSPEND)) ==
+    IOMMU_FLAGS_SUSPEND) {
+ printf("enable wakeup translation\n");
+ iommu_enable_translation(iommu, 1);
+ }
+ iommu_showcfg(iommu, -1);
+ }
+ break;
+ case DVACT_SUSPEND:
+ TAILQ_FOREACH(iommu, &sc->sc_drhds, link) {
+ printf("iommu%d suspend\n", iommu->id);
+ if (iommu->flags & IOMMU_FLAGS_BAD)
+ continue;
+ if (iommu->dte) {
+ acpiivhd_activate(iommu, act);
+ continue;
+ }
+ iommu->flags |= IOMMU_FLAGS_SUSPEND;
+ iommu_enable_translation(iommu, 0);
+ iommu_showcfg(iommu, -1);
+ }
+ break;
+ }
+ return (0);
+}
+
+void
+acpidmar_sw(int act)
+{
+ acpidmar_activate((struct device *)acpidmar_sc, act);
+}
+
+int
+acpidmar_match(struct device *parent, void *match, void *aux)
+{
+ struct acpi_attach_args *aaa = aux;
+ struct acpi_table_header *hdr;
+
+ /* If we do not have a table, it is not us */
+ if (aaa->aaa_table == NULL)
+ return (0);
+
+ /* If it is an DMAR table, we can attach */
+ hdr = (struct acpi_table_header *)aaa->aaa_table;
+ if (memcmp(hdr->signature, DMAR_SIG, sizeof(DMAR_SIG) - 1) == 0)
+ return (1);
+ if (memcmp(hdr->signature, IVRS_SIG, sizeof(IVRS_SIG) - 1) == 0)
+ return (1);
+
+ return (0);
+}
+
+void
+acpidmar_attach(struct device *parent, struct device *self, void *aux)
+{
+ struct acpidmar_softc *sc = (void *)self;
+ struct acpi_attach_args *aaa = aux;
+ struct acpi_dmar *dmar = (struct acpi_dmar *)aaa->aaa_table;
+ struct acpi_ivrs        *ivrs = (struct acpi_ivrs *)aaa->aaa_table;
+ struct acpi_table_header *hdr;
+
+ hdr = (struct acpi_table_header *)aaa->aaa_table;
+ sc->sc_memt = aaa->aaa_memt;
+ if (memcmp(hdr->signature, DMAR_SIG, sizeof(DMAR_SIG) - 1) == 0) {
+ acpidmar_sc = sc;
+ acpidmar_init(sc, dmar);
+ }
+ if (memcmp(hdr->signature, IVRS_SIG, sizeof(IVRS_SIG) - 1) == 0) {
+ acpidmar_sc = sc;
+ acpiivrs_init(sc, ivrs);
+ }
+}
+
+/* Interrupt shiz */
+void acpidmar_msi_hwmask(struct pic *, int);
+void acpidmar_msi_hwunmask(struct pic *, int);
+void acpidmar_msi_addroute(struct pic *, struct cpu_info *, int, int, int);
+void acpidmar_msi_delroute(struct pic *, struct cpu_info *, int, int, int);
+
+void
+acpidmar_msi_hwmask(struct pic *pic, int pin)
+{
+ struct iommu_pic *ip = (void *)pic;
+ struct iommu_softc *iommu = ip->iommu;
+
+ printf("msi_hwmask\n");
+
+ mtx_enter(&iommu->reg_lock);
+
+ iommu_write_4(iommu, DMAR_FECTL_REG, FECTL_IM);
+ iommu_read_4(iommu, DMAR_FECTL_REG);
+
+ mtx_leave(&iommu->reg_lock);
+}
+
+void
+acpidmar_msi_hwunmask(struct pic *pic, int pin)
+{
+ struct iommu_pic *ip = (void *)pic;
+ struct iommu_softc *iommu = ip->iommu;
+
+ printf("msi_hwunmask\n");
+
+ mtx_enter(&iommu->reg_lock);
+
+ iommu_write_4(iommu, DMAR_FECTL_REG, 0);
+ iommu_read_4(iommu, DMAR_FECTL_REG);
+
+ mtx_leave(&iommu->reg_lock);
+}
+
+void
+acpidmar_msi_addroute(struct pic *pic, struct cpu_info *ci, int pin, int vec,
+    int type)
+{
+ struct iommu_pic *ip = (void *)pic;
+ struct iommu_softc *iommu = ip->iommu;
+
+ mtx_enter(&iommu->reg_lock);
+
+ iommu->fedata = vec;
+ iommu->feaddr = 0xfee00000L | (ci->ci_apicid << 12);
+ iommu_write_4(iommu, DMAR_FEDATA_REG, vec);
+ iommu_write_4(iommu, DMAR_FEADDR_REG, iommu->feaddr);
+ iommu_write_4(iommu, DMAR_FEUADDR_REG, iommu->feaddr >> 32);
+
+ mtx_leave(&iommu->reg_lock);
+}
+
+void
+acpidmar_msi_delroute(struct pic *pic, struct cpu_info *ci, int pin, int vec,
+    int type)
+{
+ printf("msi_delroute\n");
+}
+
+void *
+acpidmar_intr_establish(void *ctx, int level, int (*func)(void *),
+    void *arg, const char *what)
+{
+ struct iommu_softc *iommu = ctx;
+ struct pic *pic;
+
+ pic = &iommu->pic.pic;
+ iommu->pic.iommu = iommu;
+
+ strlcpy(pic->pic_dev.dv_xname, "dmarpic",
+ sizeof(pic->pic_dev.dv_xname));
+ pic->pic_type = PIC_MSI;
+ pic->pic_hwmask = acpidmar_msi_hwmask;
+ pic->pic_hwunmask = acpidmar_msi_hwunmask;
+ pic->pic_addroute = acpidmar_msi_addroute;
+ pic->pic_delroute = acpidmar_msi_delroute;
+ pic->pic_edge_stubs = ioapic_edge_stubs;
+#ifdef MULTIPROCESSOR
+ mtx_init(&pic->pic_mutex, level);
+#endif
+
+ return intr_establish(-1, pic, 0, IST_PULSE, level, NULL, func, arg, what);
+}
+
+/* Intel: Handle DMAR interrupt */
+int
+acpidmar_intr(void *ctx)
+{
+ struct iommu_softc *iommu = ctx;
+ struct fault_entry fe;
+ static struct fault_entry ofe;
+ int fro, nfr, fri, i;
+ uint32_t sts;
+
+ //splassert(IPL_HIGH);
+
+ if (!(iommu->gcmd & GCMD_TE)) {
+ return (1);
+ }
+ mtx_enter(&iommu->reg_lock);
+ sts = iommu_read_4(iommu, DMAR_FECTL_REG);
+ sts = iommu_read_4(iommu, DMAR_FSTS_REG);
+
+ if (!(sts & FSTS_PPF)) {
+ mtx_leave(&iommu->reg_lock);
+ return (1);
+ }
+
+ nfr = cap_nfr(iommu->cap);
+ fro = cap_fro(iommu->cap);
+ fri = (sts >> FSTS_FRI_SHIFT) & FSTS_FRI_MASK;
+ for (i = 0; i < nfr; i++) {
+ fe.hi = iommu_read_8(iommu, fro + (fri*16) + 8);
+ if (!(fe.hi & FRCD_HI_F))
+ break;
+
+ fe.lo = iommu_read_8(iommu, fro + (fri*16));
+ if (ofe.hi != fe.hi || ofe.lo != fe.lo) {
+ iommu_showfault(iommu, fri, &fe);
+ ofe.hi = fe.hi;
+ ofe.lo = fe.lo;
+ }
+ fri = (fri + 1) % nfr;
+ }
+
+ iommu_write_4(iommu, DMAR_FSTS_REG, FSTS_PFO | FSTS_PPF);
+
+ mtx_leave(&iommu->reg_lock);
+
+ return (1);
+}
+
+const char *vtd_faults[] = {
+ "Software",
+ "Root Entry Not Present", /* ok (rtaddr + 4096) */
+ "Context Entry Not Present", /* ok (no CTX_P) */
+ "Context Entry Invalid", /* ok (tt = 3) */
+ "Address Beyond MGAW",
+ "Write", /* ok */
+ "Read", /* ok */
+ "Paging Entry Invalid", /* ok */
+ "Root Table Invalid",
+ "Context Table Invalid",
+ "Root Entry Reserved",          /* ok (root.lo |= 0x4) */
+ "Context Entry Reserved",
+ "Paging Entry Reserved",
+ "Context Entry TT",
+ "Reserved",
+};
+
+void iommu_showpte(uint64_t, int, uint64_t);
+
+/* Intel: Show IOMMU page table entry */
+void
+iommu_showpte(uint64_t ptep, int lvl, uint64_t base)
+{
+ uint64_t nb, pb, i;
+ struct pte_entry *pte;
+
+ pte = (void *)PMAP_DIRECT_MAP(ptep);
+ for (i = 0; i < 512; i++) {
+ if (!(pte[i].val & PTE_P))
+ continue;
+ nb = base + (i << lvl);
+ pb = pte[i].val & ~VTD_PAGE_MASK;
+ if(lvl == VTD_LEVEL0) {
+ printf("   %3llx %.16llx = %.16llx %c%c %s\n",
+    i, nb, pb,
+    pte[i].val == PTE_R ? 'r' : ' ',
+    pte[i].val & PTE_W ? 'w' : ' ',
+    (nb == pb) ? " ident" : "");
+ if (nb == pb)
+ return;
+ } else {
+ iommu_showpte(pb, lvl - VTD_STRIDE_SIZE, nb);
+ }
+ }
+}
+
+/* Intel: Show IOMMU configuration */
+void
+iommu_showcfg(struct iommu_softc *iommu, int sid)
+{
+ int i, j, sts, cmd;
+ struct context_entry *ctx;
+ pcitag_t tag;
+ pcireg_t clc;
+
+ cmd = iommu_read_4(iommu, DMAR_GCMD_REG);
+ sts = iommu_read_4(iommu, DMAR_GSTS_REG);
+ printf("iommu%d: flags:%d root pa:%.16llx %s %s %s %.8x %.8x\n",
+    iommu->id, iommu->flags, iommu_read_8(iommu, DMAR_RTADDR_REG),
+    sts & GSTS_TES ? "enabled" : "disabled",
+    sts & GSTS_QIES ? "qi" : "ccmd",
+    sts & GSTS_IRES ? "ir" : "",
+    cmd, sts);
+ for (i = 0; i < 256; i++) {
+ if (!root_entry_is_valid(&iommu->root[i])) {
+ continue;
+ }
+ for (j = 0; j < 256; j++) {
+ ctx = iommu->ctx[i] + j;
+ if (!context_entry_is_valid(ctx)) {
+ continue;
+ }
+ tag = pci_make_tag(NULL, i, (j >> 3), j & 0x7);
+ clc = pci_conf_read(NULL, tag, 0x08) >> 8;
+ printf("  %.2x:%.2x.%x lvl:%d did:%.4x tt:%d ptep:%.16llx flag:%x cc:%.6x\n",
+    i, (j >> 3), j & 7,
+    context_address_width(ctx),
+    context_domain_id(ctx),
+    context_translation_type(ctx),
+    context_pte(ctx),
+    context_user(ctx),
+    clc);
+#if 0
+ /* dump pagetables */
+ iommu_showpte(ctx->lo & ~VTD_PAGE_MASK, iommu->agaw -
+    VTD_STRIDE_SIZE, 0);
+#endif
+ }
+ }
+}
+
+/* Intel: Show IOMMU fault */
+void
+iommu_showfault(struct iommu_softc *iommu, int fri, struct fault_entry *fe)
+{
+ int bus, dev, fun, type, fr, df;
+ bios_memmap_t *im;
+ const char *mapped;
+
+ if (!(fe->hi & FRCD_HI_F))
+ return;
+ type = (fe->hi & FRCD_HI_T) ? 'r' : 'w';
+ fr = (fe->hi >> FRCD_HI_FR_SHIFT) & FRCD_HI_FR_MASK;
+ bus = (fe->hi >> FRCD_HI_BUS_SHIFT) & FRCD_HI_BUS_MASK;
+ dev = (fe->hi >> FRCD_HI_DEV_SHIFT) & FRCD_HI_DEV_MASK;
+ fun = (fe->hi >> FRCD_HI_FUN_SHIFT) & FRCD_HI_FUN_MASK;
+ df  = (fe->hi >> FRCD_HI_FUN_SHIFT) & 0xFF;
+ iommu_showcfg(iommu, mksid(bus,dev,fun));
+ if (!iommu->ctx[bus]) {
+ /* Bus is not initialized */
+ mapped = "nobus";
+ } else if (!context_entry_is_valid(&iommu->ctx[bus][df])) {
+ /* DevFn not initialized */
+ mapped = "nodevfn";
+ } else if (context_user(&iommu->ctx[bus][df]) != 0xA) {
+ /* no bus_space_map */
+ mapped = "nomap";
+ } else {
+ /* bus_space_map */
+ mapped = "mapped";
+ }
+ printf("fri%d: dmar: %.2x:%.2x.%x %s error at %llx fr:%d [%s] iommu:%d [%s]\n",
+    fri, bus, dev, fun,
+    type == 'r' ? "read" : "write",
+    fe->lo,
+    fr, fr <= 13 ? vtd_faults[fr] : "unknown",
+    iommu->id,
+    mapped);
+ for (im = bios_memmap; im->type != BIOS_MAP_END; im++) {
+ if ((im->type == BIOS_MAP_RES) &&
+    (im->addr <= fe->lo) &&
+    (fe->lo <= im->addr+im->size)) {
+ printf("mem in e820.reserved\n");
+ }
+ }
+#ifdef DDB
+ if (acpidmar_ddb)
+ db_enter();
+#endif
+}
+
diff --git a/sys/dev/acpi/acpidmar.h b/sys/dev/acpi/acpidmar.h
new file mode 100644
index 000000000..33659ecaf
--- /dev/null
+++ b/sys/dev/acpi/acpidmar.h
@@ -0,0 +1,534 @@
+/*
+ * Copyright (c) 2015 Jordan Hargrave <[hidden email]>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef _DEV_ACPI_DMARREG_H_
+#define _DEV_ACPI_DMARREG_H_
+
+#define VTD_STRIDE_MASK 0x1FF
+#define VTD_STRIDE_SIZE 9
+#define VTD_PAGE_SIZE   4096
+#define VTD_PAGE_MASK   0xFFF
+#define VTD_PTE_MASK    0x0000FFFFFFFFF000LL
+
+#define VTD_LEVEL0 12
+#define VTD_LEVEL1 21
+#define VTD_LEVEL2 30 /* Minimum level supported */
+#define VTD_LEVEL3 39 /* Also supported */
+#define VTD_LEVEL4 48
+#define VTD_LEVEL5 57
+
+#define _xbit(x,y) (((x)>> (y)) & 1)
+#define _xfld(x,y) (uint32_t)(((x)>> y##_SHIFT) & y##_MASK)
+
+#define VTD_AWTOLEVEL(x)    (((x) - 30) / VTD_STRIDE_SIZE)
+#define VTD_LEVELTOAW(x)    (((x) * VTD_STRIDE_SIZE) + 30)
+
+#define DMAR_VER_REG 0x00    /* 32:Arch version supported by this IOMMU */
+#define DMAR_RTADDR_REG 0x20    /* 64:Root entry table */
+#define DMAR_FEDATA_REG 0x3c    /* 32:Fault event interrupt data register */
+#define DMAR_FEADDR_REG 0x40    /* 32:Fault event interrupt addr register */
+#define DMAR_FEUADDR_REG 0x44    /* 32:Upper address register */
+#define DMAR_AFLOG_REG 0x58    /* 64:Advanced Fault control */
+#define DMAR_PMEN_REG 0x64    /* 32:Enable Protected Memory Region */
+#define DMAR_PLMBASE_REG 0x68    /* 32:PMRR Low addr */
+#define DMAR_PLMLIMIT_REG 0x6c    /* 32:PMRR low limit */
+#define DMAR_PHMBASE_REG 0x70    /* 64:pmrr high base addr */
+#define DMAR_PHMLIMIT_REG 0x78    /* 64:pmrr high limit */
+#define DMAR_ICS_REG 0x9C    /* 32:Invalidation complete status register */
+#define DMAR_IECTL_REG 0xa0    /* 32:Invalidation event control register */
+#define DMAR_IEDATA_REG 0xa4    /* 32:Invalidation event data register */
+#define DMAR_IEADDR_REG 0xa8    /* 32:Invalidation event address register */
+#define DMAR_IEUADDR_REG 0xac    /* 32:Invalidation event upper address register */
+#define DMAR_IRTA_REG 0xb8    /* 64:Interrupt remapping table addr register */
+#define DMAR_CAP_REG 0x08    /* 64:Hardware supported capabilities */
+#define   CAP_PI (1LL << 59)
+#define   CAP_FL1GP (1LL << 56)
+#define   CAP_DRD (1LL << 55)
+#define   CAP_DWD (1LL << 54)
+#define   CAP_MAMV_MASK 0x3F
+#define   CAP_MAMV_SHIFT 48LL
+#define   cap_mamv(x) _xfld(x,CAP_MAMV)
+#define   CAP_NFR_MASK 0xFF
+#define   CAP_NFR_SHIFT 40LL
+#define   cap_nfr(x) (_xfld(x,CAP_NFR) + 1)
+#define   CAP_PSI (1LL << 39)
+#define   CAP_SLLPS_MASK 0xF
+#define   CAP_SLLPS_SHIFT 34LL
+#define   cap_sllps(x) _xfld(x,CAP_SLLPS)
+#define   CAP_FRO_MASK 0x3FF
+#define   CAP_FRO_SHIFT 24LL
+#define   cap_fro(x) (_xfld(x,CAP_FRO) * 16)
+#define   CAP_ZLR (1LL << 22)
+#define   CAP_MGAW_MASK 0x3F
+#define   CAP_MGAW_SHIFT 16LL
+#define   cap_mgaw(x) (_xfld(x,CAP_MGAW) + 1)
+#define   CAP_SAGAW_MASK 0x1F
+#define   CAP_SAGAW_SHIFT 8LL
+#define   cap_sagaw(x) _xfld(x,CAP_SAGAW)
+#define   CAP_CM (1LL << 7)
+#define   CAP_PHMR (1LL << 6)
+#define   CAP_PLMR (1LL << 5)
+#define   CAP_RWBF (1LL << 4)
+#define   CAP_AFL (1LL << 3)
+#define   CAP_ND_MASK 0x7
+#define   CAP_ND_SHIFT 0x00
+#define   cap_nd(x) (16 << (((x) & CAP_ND_MASK) << 1))
+
+#define DMAR_ECAP_REG 0x10 /* 64:Extended capabilities supported */
+#define   ECAP_PSS_MASK 0x1F
+#define   ECAP_PSS_SHIFT 35
+#define   ECAP_EAFS (1LL << 34)
+#define   ECAP_NWFS (1LL << 33)
+#define   ECAP_SRS (1LL << 31)
+#define   ECAP_ERS (1LL << 30)
+#define   ECAP_PRS (1LL << 29)
+#define   ECAP_PASID (1LL << 28)
+#define   ECAP_DIS (1LL << 27)
+#define   ECAP_NEST (1LL << 26)
+#define   ECAP_MTS (1LL << 25)
+#define   ECAP_ECS (1LL << 24)
+#define   ECAP_MHMV_MASK 0xF
+#define   ECAP_MHMV_SHIFT 0x20
+#define   ecap_mhmv(x) _xfld(x,ECAP_MHMV)
+#define   ECAP_IRO_MASK 0x3FF /* IOTLB Register */
+#define   ECAP_IRO_SHIFT 0x8
+#define   ecap_iro(x) (_xfld(x,ECAP_IRO) * 16)
+#define   ECAP_SC (1LL << 7) /* Snoop Control */
+#define   ECAP_PT (1LL << 6) /* HW Passthru */
+#define   ECAP_EIM (1LL << 4)
+#define   ECAP_IR (1LL << 3) /* Interrupt remap */
+#define   ECAP_DT (1LL << 2) /* Device IOTLB */
+#define   ECAP_QI (1LL << 1) /* Queued Invalidation */
+#define   ECAP_C (1LL << 0) /* Coherent cache */
+
+#define DMAR_GCMD_REG 0x18 /* 32:Global command register */
+#define   GCMD_TE (1LL << 31)
+#define   GCMD_SRTP (1LL << 30)
+#define   GCMD_SFL (1LL << 29)
+#define   GCMD_EAFL (1LL << 28)
+#define   GCMD_WBF (1LL << 27)
+#define   GCMD_QIE (1LL << 26)
+#define   GCMD_IRE (1LL << 25)
+#define   GCMD_SIRTP (1LL << 24)
+#define   GCMD_CFI (1LL << 23)
+
+#define DMAR_GSTS_REG 0x1c /* 32:Global status register */
+#define   GSTS_TES (1LL << 31)
+#define   GSTS_RTPS (1LL << 30)
+#define   GSTS_FLS (1LL << 29)
+#define   GSTS_AFLS (1LL << 28)
+#define   GSTS_WBFS (1LL << 27)
+#define   GSTS_QIES (1LL << 26)
+#define   GSTS_IRES (1LL << 25)
+#define   GSTS_IRTPS (1LL << 24)
+#define   GSTS_CFIS (1LL << 23)
+
+#define DMAR_CCMD_REG 0x28 /* 64:Context command reg */
+#define   CCMD_ICC (1LL << 63)
+#define   CCMD_CIRG_MASK 0x3
+#define   CCMD_CIRG_SHIFT 61
+#define   CCMD_CIRG(x) ((uint64_t)(x) << CCMD_CIRG_SHIFT)
+#define   CCMD_CAIG_MASK 0x3
+#define   CCMD_CAIG_SHIFT 59
+#define   CCMD_FM_MASK 0x3
+#define   CCMD_FM_SHIFT 32
+#define   CCMD_FM(x) (((uint64_t)(x) << CCMD_FM_SHIFT))
+#define   CCMD_SID_MASK 0xFFFF
+#define   CCMD_SID_SHIFT 8
+#define   CCMD_SID(x) (((x) << CCMD_SID_SHIFT))
+#define   CCMD_DID_MASK 0xFFFF
+#define   CCMD_DID_SHIFT 0
+#define   CCMD_DID(x) (((x) << CCMD_DID_SHIFT))
+
+#define CIG_GLOBAL CCMD_CIRG(CTX_GLOBAL)
+#define CIG_DOMAIN CCMD_CIRG(CTX_DOMAIN)
+#define CIG_DEVICE CCMD_CIRG(CTX_DEVICE)
+
+
+#define DMAR_FSTS_REG 0x34 /* 32:Fault Status register */
+#define   FSTS_FRI_MASK 0xFF
+#define   FSTS_FRI_SHIFT 8
+#define   FSTS_PRO (1LL << 7)
+#define   FSTS_ITE (1LL << 6)
+#define   FSTS_ICE (1LL << 5)
+#define   FSTS_IQE (1LL << 4)
+#define   FSTS_APF (1LL << 3)
+#define   FSTS_APO (1LL << 2)
+#define   FSTS_PPF (1LL << 1)
+#define   FSTS_PFO (1LL << 0)
+
+#define DMAR_FECTL_REG 0x38 /* 32:Fault control register */
+#define   FECTL_IM (1LL << 31)
+#define   FECTL_IP (1LL << 30)
+
+#define FRCD_HI_F (1LL << (127-64))
+#define FRCD_HI_T (1LL << (126-64))
+#define FRCD_HI_AT_MASK 0x3
+#define FRCD_HI_AT_SHIFT (124-64)
+#define FRCD_HI_PV_MASK 0xFFFFF
+#define FRCD_HI_PV_SHIFT (104-64)
+#define FRCD_HI_FR_MASK 0xFF
+#define FRCD_HI_FR_SHIFT (96-64)
+#define FRCD_HI_PP (1LL << (95-64))
+
+#define FRCD_HI_SID_MASK 0xFF
+#define FRCD_HI_SID_SHIFT 0
+#define FRCD_HI_BUS_SHIFT 8
+#define FRCD_HI_BUS_MASK 0xFF
+#define FRCD_HI_DEV_SHIFT 3
+#define FRCD_HI_DEV_MASK 0x1F
+#define FRCD_HI_FUN_SHIFT 0
+#define FRCD_HI_FUN_MASK 0x7
+
+#define DMAR_IOTLB_REG(x) (ecap_iro((x)->ecap) + 8)
+#define DMAR_IVA_REG(x) (ecap_iro((x)->ecap) + 0)
+
+#define DMAR_FRIH_REG(x,i) (cap_fro((x)->cap) + 16*(i) + 8)
+#define DMAR_FRIL_REG(x,i) (cap_fro((x)->cap) + 16*(i) + 0)
+
+#define IOTLB_IVT (1LL << 63)
+#define IOTLB_IIRG_MASK 0x3
+#define IOTLB_IIRG_SHIFT 60
+#define IOTLB_IIRG(x) ((uint64_t)(x) << IOTLB_IIRG_SHIFT)
+#define IOTLB_IAIG_MASK 0x3
+#define IOTLB_IAIG_SHIFT 57
+#define IOTLB_DR (1LL << 49)
+#define IOTLB_DW (1LL << 48)
+#define IOTLB_DID_MASK 0xFFFF
+#define IOTLB_DID_SHIFT 32
+#define IOTLB_DID(x) ((uint64_t)(x) << IOTLB_DID_SHIFT)
+
+#define IIG_GLOBAL IOTLB_IIRG(IOTLB_GLOBAL)
+#define IIG_DOMAIN IOTLB_IIRG(IOTLB_DOMAIN)
+#define IIG_PAGE IOTLB_IIRG(IOTLB_PAGE)
+
+#define DMAR_IQH_REG 0x80 /* 64:Invalidation queue head register */
+#define DMAR_IQT_REG 0x88 /* 64:Invalidation queue tail register */
+#define DMAR_IQA_REG 0x90 /* 64:Invalidation queue addr register */
+#define IQA_QS_256 0 /* 256 entries */
+#define IQA_QS_512 1 /* 512 */
+#define IQA_QS_1K 2 /* 1024 */
+#define IQA_QS_2K 3 /* 2048 */
+#define IQA_QS_4K 4 /* 4096 */
+#define IQA_QS_8K 5 /* 8192 */
+#define IQA_QS_16K 6 /* 16384 */
+#define IQA_QS_32K 7 /* 32768 */
+
+/* Read-Modify-Write helpers */
+static inline void iommu_rmw32(void *ov, uint32_t mask, uint32_t shift, uint32_t nv)
+{
+ *(uint32_t *)ov &= ~(mask << shift);
+ *(uint32_t *)ov |= (nv & mask) << shift;
+}
+static inline void iommu_rmw64(void *ov, uint32_t mask, uint32_t shift, uint64_t nv)
+{
+ *(uint64_t *)ov &= ~(mask << shift);
+ *(uint64_t *)ov |= (nv & mask) << shift;
+}
+
+/*
+ * Root Entry: one per bus (256 x 128 bit = 4k)
+ *   0        = Present
+ *   1:11     = Reserved
+ *   12:HAW-1 = Context Table Pointer
+ *   HAW:63   = Reserved
+ *   64:127   = Reserved
+ */
+#define ROOT_P (1L << 0)
+struct root_entry {
+ uint64_t lo;
+ uint64_t hi;
+};
+
+/* Check if root entry is valid */
+static inline bool
+root_entry_is_valid(struct root_entry *re)
+{
+ return (re->lo & ROOT_P);
+}
+
+/*
+ * Context Entry: one per devfn (256 x 128 bit = 4k)
+ *   0      = Present
+ *   1      = Fault Processing Disable
+ *   2:3    = Translation Type
+ *   4:11   = Reserved
+ *   12:63  = Second Level Page Translation
+ *   64:66  = Address Width (# PTE levels)
+ *   67:70  = Ignore
+ *   71     = Reserved
+ *   72:87  = Domain ID
+ *   88:127 = Reserved
+ */
+#define CTX_P (1L << 0)
+#define CTX_FPD (1L << 1)
+#define CTX_T_MASK 0x3
+#define CTX_T_SHIFT 2
+enum {
+ CTX_T_MULTI,
+ CTX_T_IOTLB,
+ CTX_T_PASSTHRU
+};
+
+#define CTX_H_AW_MASK 0x7
+#define CTX_H_AW_SHIFT 0
+#define CTX_H_USER_MASK 0xF
+#define CTX_H_USER_SHIFT 3
+#define CTX_H_DID_MASK 0xFFFF
+#define CTX_H_DID_SHIFT 8
+
+struct context_entry {
+ uint64_t lo;
+ uint64_t hi;
+};
+
+/* Set fault processing enable/disable */
+static inline void
+context_set_fpd(struct context_entry *ce, int enable)
+{
+ ce->lo &= ~CTX_FPD;
+ if (enable)
+ ce->lo |= CTX_FPD;
+}
+
+/* Set context entry present */
+static inline void
+context_set_present(struct context_entry *ce)
+{
+ ce->lo |= CTX_P;
+}
+
+/* Set Second Level Page Table Entry PA */
+static inline void
+context_set_slpte(struct context_entry *ce, paddr_t slpte)
+{
+ ce->lo &= VTD_PAGE_MASK;
+ ce->lo |= (slpte & ~VTD_PAGE_MASK);
+}
+
+/* Set translation type */
+static inline void
+context_set_translation_type(struct context_entry *ce, int tt)
+{
+ ce->lo &= ~(CTX_T_MASK << CTX_T_SHIFT);
+ ce->lo |= ((tt & CTX_T_MASK) << CTX_T_SHIFT);
+}
+
+/* Set Address Width (# of Page Table levels) */
+static inline void
+context_set_address_width(struct context_entry *ce, int lvl)
+{
+ ce->hi &= ~(CTX_H_AW_MASK << CTX_H_AW_SHIFT);
+ ce->hi |= ((lvl & CTX_H_AW_MASK) << CTX_H_AW_SHIFT);
+}
+
+/* Set domain ID */
+static inline void
+context_set_domain_id(struct context_entry *ce, int did)
+{
+ ce->hi &= ~(CTX_H_DID_MASK << CTX_H_DID_SHIFT);
+ ce->hi |= ((did & CTX_H_DID_MASK) << CTX_H_DID_SHIFT);
+}
+
+/* Get Second Level Page Table PA */
+static inline uint64_t
+context_pte(struct context_entry *ce)
+{
+ return (ce->lo & ~VTD_PAGE_MASK);
+}
+
+/* Get translation type */
+static inline int
+context_translation_type(struct context_entry *ce)
+{
+ return (ce->lo >> CTX_T_SHIFT) & CTX_T_MASK;
+}
+
+/* Get domain ID */
+static inline int
+context_domain_id(struct context_entry *ce)
+{
+ return (ce->hi >> CTX_H_DID_SHIFT) & CTX_H_DID_MASK;
+}
+
+/* Get Address Width */
+static inline int
+context_address_width(struct context_entry *ce)
+{
+ return VTD_LEVELTOAW((ce->hi >> CTX_H_AW_SHIFT) & CTX_H_AW_MASK);
+}
+
+/* Check if context entry is valid */
+static inline bool
+context_entry_is_valid(struct context_entry *ce)
+{
+ return (ce->lo & CTX_P);
+}
+
+/* User-available bits in context entry */
+static inline int
+context_user(struct context_entry *ce)
+{
+ return (ce->hi >> CTX_H_USER_SHIFT) & CTX_H_USER_MASK;
+}
+
+static inline void
+context_set_user(struct context_entry *ce, int v)
+{
+ ce->hi &= ~(CTX_H_USER_MASK << CTX_H_USER_SHIFT);
+ ce->hi |=  ((v & CTX_H_USER_MASK) << CTX_H_USER_SHIFT);
+}
+
+/*
+ * Fault entry
+ *   0..HAW-1 = Fault address
+ *   HAW:63   = Reserved
+ *   64:71    = Source ID
+ *   96:103   = Fault Reason
+ *   104:123  = PV
+ *   124:125  = Address Translation type
+ *   126      = Type (0 = Read, 1 = Write)
+ *   127      = Fault bit
+ */
+struct fault_entry
+{
+ uint64_t lo;
+ uint64_t hi;
+};
+
+/* PTE Entry: 512 x 64-bit = 4k */
+#define PTE_P (1L << 0)
+#define PTE_R 0x00
+#define PTE_W (1L << 1)
+#define PTE_US  (1L << 2)
+#define PTE_PWT (1L << 3)
+#define PTE_PCD (1L << 4)
+#define PTE_A   (1L << 5)
+#define PTE_D   (1L << 6)
+#define PTE_PAT (1L << 7)
+#define PTE_G   (1L << 8)
+#define PTE_EA  (1L << 10)
+#define PTE_XD  (1LL << 63)
+
+/* PDE Level entry */
+#define PTE_PS  (1L << 7)
+
+/* PDPE Level entry */
+
+/* ----------------------------------------------------------------
+ * 5555555444444444333333333222222222111111111000000000------------
+ * [PML4 ->] PDPE.1GB
+ * [PML4 ->] PDPE.PDE -> PDE.2MB
+ * [PML4 ->] PDPE.PDE -> PDE -> PTE
+ * GAW0 = (12.20) (PTE)
+ * GAW1 = (21.29) (PDE)
+ * GAW2 = (30.38) (PDPE)
+ * GAW3 = (39.47) (PML4)
+ * GAW4 = (48.57) (n/a)
+ * GAW5 = (58.63) (n/a)
+ */
+struct pte_entry {
+ uint64_t val;
+};
+
+/*
+ * Queued Invalidation entry
+ *  0:3   = 01h
+ *  4:5   = Granularity
+ *  6:15  = Reserved
+ *  16:31 = Domain ID
+ *  32:47 = Source ID
+ *  48:49 = FM
+ */
+
+/* Invalidate Context Entry */
+#define QI_CTX_DID_MASK 0xFFFF
+#define QI_CTX_DID_SHIFT 16
+#define QI_CTX_SID_MASK 0xFFFF
+#define QI_CTX_SID_SHIFT 32
+#define QI_CTX_FM_MASK 0x3
+#define QI_CTX_FM_SHIFT 48
+#define QI_CTX_IG_MASK 0x3
+#define QI_CTX_IG_SHIFT 4
+#define QI_CTX_DID(x) (((uint64_t)(x) << QI_CTX_DID_SHIFT))
+#define QI_CTX_SID(x) (((uint64_t)(x) << QI_CTX_SID_SHIFT))
+#define QI_CTX_FM(x) (((uint64_t)(x) << QI_CTX_FM_SHIFT))
+
+#define QI_CTX_IG_GLOBAL (CTX_GLOBAL << QI_CTX_IG_SHIFT)
+#define QI_CTX_IG_DOMAIN (CTX_DOMAIN << QI_CTX_IG_SHIFT)
+#define QI_CTX_IG_DEVICE (CTX_DEVICE << QI_CTX_IG_SHIFT)
+
+/* Invalidate IOTLB Entry */
+#define QI_IOTLB_DID_MASK 0xFFFF
+#define QI_IOTLB_DID_SHIFT 16
+#define QI_IOTLB_IG_MASK 0x3
+#define QI_IOTLB_IG_SHIFT 4
+#define QI_IOTLB_DR (1LL << 6)
+#define QI_IOTLB_DW (1LL << 5)
+#define QI_IOTLB_DID(x) (((uint64_t)(x) << QI_IOTLB_DID_SHIFT))
+
+#define QI_IOTLB_IG_GLOBAL (1 << QI_IOTLB_IG_SHIFT)
+#define QI_IOTLB_IG_DOMAIN (2 << QI_IOTLB_IG_SHIFT)
+#define QI_IOTLB_IG_PAGE (3 << QI_IOTLB_IG_SHIFT)
+
+/* QI Commands */
+#define QI_CTX 0x1
+#define QI_IOTLB 0x2
+#define QI_DEVTLB 0x3
+#define QI_INTR 0x4
+#define QI_WAIT 0x5
+#define QI_EXTTLB 0x6
+#define QI_PAS 0x7
+#define QI_EXTDEV 0x8
+
+struct qi_entry {
+ uint64_t lo;
+ uint64_t hi;
+};
+
+enum {
+ CTX_GLOBAL = 1,
+ CTX_DOMAIN,
+ CTX_DEVICE,
+
+ IOTLB_GLOBAL = 1,
+ IOTLB_DOMAIN,
+ IOTLB_PAGE,
+};
+
+enum {
+ VTD_FAULT_ROOT_P = 0x1,         /* P field in root entry is 0 */
+ VTD_FAULT_CTX_P = 0x2,          /* P field in context entry is 0 */
+ VTD_FAULT_CTX_INVAL = 0x3,      /* context AW/TT/SLPPTR invalid */
+ VTD_FAULT_LIMIT = 0x4,          /* Address is outside of MGAW */
+ VTD_FAULT_WRITE = 0x5,          /* Address-translation fault, non-writable */
+ VTD_FAULT_READ = 0x6,           /* Address-translation fault, non-readable */
+ VTD_FAULT_PTE_INVAL = 0x7,      /* page table hw access error */
+ VTD_FAULT_ROOT_INVAL = 0x8,     /* root table hw access error */
+ VTD_FAULT_CTX_TBL_INVAL = 0x9,  /* context entry hw access error */
+ VTD_FAULT_ROOT_RESERVED = 0xa,  /* non-zero reserved field in root entry */
+ VTD_FAULT_CTX_RESERVED = 0xb,   /* non-zero reserved field in context entry */
+ VTD_FAULT_PTE_RESERVED = 0xc,   /* non-zero reserved field in paging entry */
+ VTD_FAULT_CTX_TT = 0xd,         /* invalid translation type */
+};
+
+#endif
+
+void acpidmar_pci_hook(pci_chipset_tag_t, struct pci_attach_args *);
+void dmar_ptmap(bus_dma_tag_t, bus_addr_t);
+void acpidmar_sw(int);
+
+#define __EXTRACT(v,m) (((v) >> m##_SHIFT) & m##_MASK)
diff --git a/sys/dev/acpi/acpireg.h b/sys/dev/acpi/acpireg.h
index bfbb73ce2..62d7990e2 100644
--- a/sys/dev/acpi/acpireg.h
+++ b/sys/dev/acpi/acpireg.h
@@ -623,6 +623,9 @@ struct acpi_ivmd {
 struct acpi_ivhd {
  uint8_t type;
  uint8_t flags;
+#define IVHD_PPRSUP (1L << 7)
+#define IVHD_PREFSUP (1L << 6)
+#define IVHD_COHERENT (1L << 5)
 #define IVHD_IOTLB (1L << 4)
 #define IVHD_ISOC (1L << 3)
 #define IVHD_RESPASSPW (1L << 2)
@@ -638,13 +641,28 @@ struct acpi_ivhd {
 #define IVHD_UNITID_MASK 0x1F
 #define IVHD_MSINUM_SHIFT 0
 #define IVHD_MSINUM_MASK 0x1F
- uint32_t reserved;
+ uint32_t feature;
+} __packed;
+
+struct acpi_ivhd_ext {
+ uint8_t type;
+ uint8_t flags;
+ uint16_t length;
+ uint16_t devid;
+ uint16_t cap;
+ uint64_t address;
+ uint16_t segment;
+ uint16_t info;
+ uint32_t attrib;
+ uint64_t efr;
+ uint8_t reserved[8];
 } __packed;
 
 union acpi_ivrs_entry {
  struct {
  uint8_t type;
 #define IVRS_IVHD 0x10
+#define IVRS_IVHD_EXT 0x11
 #define IVRS_IVMD_ALL 0x20
 #define IVRS_IVMD_SPECIFIED 0x21
 #define IVRS_IVMD_RANGE 0x22
@@ -652,6 +670,7 @@ union acpi_ivrs_entry {
  uint16_t length;
  } __packed;
  struct acpi_ivhd ivhd;
+ struct acpi_ivhd_ext ivhd_ext;
  struct acpi_ivmd ivmd;
 } __packed;
 
diff --git a/sys/dev/acpi/amd_iommu.h b/sys/dev/acpi/amd_iommu.h
new file mode 100644
index 000000000..c7652011e
--- /dev/null
+++ b/sys/dev/acpi/amd_iommu.h
@@ -0,0 +1,360 @@
+/*
+ * Copyright (c) 2019 Jordan Hargrave <[hidden email]>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#ifndef __amd_iommu_h__
+#define __amd_iommu_h__
+
+#define DEV_TAB_BASE_REG 0x0000
+#define CMD_BASE_REG 0x0008
+#define EVT_BASE_REG 0x0010
+
+#define EXCL_BASE_REG 0x0020
+#define EXCL_LIMIT_REG 0x0028
+
+/* Extended Feature Register */
+#define EXTFEAT_REG 0x0030
+#define  EFR_PREFSUP (1L << 0)
+#define  EFR_PPRSUP (1L << 1)
+#define  EFR_NXSUP (1L << 3)
+#define  EFR_GTSUP (1L << 4)
+#define  EFR_IASUP (1L << 6)
+#define  EFR_GASUP (1L << 7)
+#define  EFR_HESUP (1L << 8)
+#define  EFR_PCSUP (1L << 9)
+#define  EFR_HATS_SHIFT 10
+#define  EFR_HATS_MASK 0x3
+#define  EFR_GATS_SHIFT 12
+#define  EFR_GATS_MASK 0x3
+#define  EFR_GLXSUP_SHIFT 14
+#define  EFR_GLXSUP_MASK 0x3
+#define  EFR_SMIFSUP_SHIFT 16
+#define  EFR_SMIFSUP_MASK 0x3        
+#define  EFR_SMIFRC_SHIFT 18
+#define  EFR_SMIFRC_MASK 0x7
+#define  EFR_GAMSUP_SHIFT 21
+#define  EFR_GAMSUP_MASK 0x7
+
+#define CMD_HEAD_REG 0x2000
+#define CMD_TAIL_REG 0x2008
+#define EVT_HEAD_REG 0x2010
+#define EVT_TAIL_REG 0x2018
+
+#define IOMMUSTS_REG 0x2020
+
+#define DEV_TAB_MASK 0x000FFFFFFFFFF000LL
+#define DEV_TAB_LEN 0x1FF
+
+/* IOMMU Control */
+#define IOMMUCTL_REG 0x0018
+#define  CTL_IOMMUEN (1L << 0)
+#define  CTL_HTTUNEN (1L << 1)
+#define  CTL_EVENTLOGEN (1L << 2)
+#define  CTL_EVENTINTEN (1L << 3)
+#define  CTL_COMWAITINTEN (1L << 4)
+#define  CTL_INVTIMEOUT_SHIFT 5
+#define  CTL_INVTIMEOUT_MASK   0x7
+#define  CTL_INVTIMEOUT_NONE 0
+#define  CTL_INVTIMEOUT_1MS     1
+#define  CTL_INVTIMEOUT_10MS    2
+#define  CTL_INVTIMEOUT_100MS   3
+#define  CTL_INVTIMEOUT_1S      4
+#define  CTL_INVTIMEOUT_10S     5
+#define  CTL_INVTIMEOUT_100S    6
+#define  CTL_PASSPW (1L << 8)
+#define  CTL_RESPASSPW (1L << 9)
+#define  CTL_COHERENT (1L << 10)
+#define  CTL_ISOC (1L << 11)
+#define  CTL_CMDBUFEN (1L << 12)
+#define  CTL_PPRLOGEN (1L << 13)
+#define  CTL_PPRINTEN (1L << 14)
+#define  CTL_PPREN (1L << 15)
+#define  CTL_GTEN (1L << 16)
+#define  CTL_GAEN (1L << 17)
+#define  CTL_CRW_SHIFT 18
+#define  CTL_CRW_MASK 0xF
+#define  CTL_SMIFEN (1L << 22)
+#define  CTL_SLFWBDIS (1L << 23)
+#define  CTL_SMIFLOGEN (1L << 24)
+#define  CTL_GAMEN_SHIFT 25
+#define  CTL_GAMEN_MASK 0x7
+#define  CTL_GALOGEN (1L << 28)
+#define  CTL_GAINTEN (1L << 29)
+#define  CTL_DUALPPRLOGEN_SHIFT 30
+#define  CTL_DUALPPRLOGEN_MASK 0x3
+#define  CTL_DUALEVTLOGEN_SHIFT 32
+#define  CTL_DUALEVTLOGEN_MASK 0x3
+#define  CTL_DEVTBLSEGEN_SHIFT 34
+#define  CTL_DEVTBLSEGEN_MASK 0x7
+#define  CTL_PRIVABRTEN_SHIFT 37
+#define  CTL_PRIVABRTEN_MASK 0x3
+#define  CTL_PPRAUTORSPEN (1LL << 39)
+#define  CTL_MARCEN (1LL << 40)
+#define  CTL_BLKSTOPMRKEN (1LL << 41)
+#define  CTL_PPRAUTOSPAON (1LL << 42)
+#define  CTL_DOMAINIDPNE (1LL << 43)
+
+#define CMD_BASE_MASK 0x000FFFFFFFFFF000LL
+#define CMD_TBL_SIZE 4096
+#define CMD_TBL_LEN_4K (8LL << 56)
+#define CMD_TBL_LEN_8K (9lL << 56)
+
+#define EVT_BASE_MASK 0x000FFFFFFFFFF000LL
+#define EVT_TBL_SIZE 4096
+#define EVT_TBL_LEN_4K (8LL << 56)
+#define EVT_TBL_LEN_8K (9LL << 56)
+
+/*========================
+ * DEVICE TABLE ENTRY
+ * Contains mapping of bus-device-function
+ *
+ *  0       Valid (V)
+ *  1       Translation Valid (TV)
+ *  7:8     Host Address Dirty (HAD)
+ *  9:11    Page Table Depth (usually 4)
+ *  12:51   Page Table Physical Address
+ *  52      PPR Enable
+ *  53      GPRP
+ *  54      Guest I/O Protection Valid (GIoV)
+ *  55      Guest Translation Valid (GV)
+ *  56:57   Guest Levels translated (GLX)
+ *  58:60   Guest CR3 bits 12:14 (GCR3TRP)
+ *  61      I/O Read Permission (IR)
+ *  62      I/O Write Permission (IW)
+ *  64:79   Domain ID
+ *  80:95   Guest CR3 bits 15:30 (GCR3TRP)
+ *  96      IOTLB Enable (I)
+ *  97      Suppress multiple I/O page faults (I)
+ *  98      Supress all I/O page faults (SA)
+ *  99:100  Port I/O Control (IoCTL)
+ *  101     Cache IOTLB Hint
+ *  102     Snoop Disable (SD)
+ *  103     Allow Exclusion (EX)
+ *  104:105 System Management Message (SysMgt)
+ *  107:127 Guest CR3 bits 31:51 (GCR3TRP)
+ *  128     Interrupt Map Valid (IV)
+ *  129:132 Interrupt Table Length (IntTabLen)
+ *========================*/
+struct ivhd_dte {
+ uint32_t dw0;
+ uint32_t dw1;
+ uint32_t dw2;
+ uint32_t dw3;
+ uint32_t dw4;
+ uint32_t dw5;
+ uint32_t dw6;
+ uint32_t dw7;
+} __packed;
+
+#define HWDTE_SIZE (65536 * sizeof(struct ivhd_dte))
+
+#define DTE_V (1L << 0) // dw0
+#define DTE_TV (1L << 1) // dw0
+#define DTE_LEVEL_SHIFT 9 // dw0
+#define DTE_LEVEL_MASK 0x7 // dw0
+#define DTE_HPTRP_MASK 0x000FFFFFFFFFF000LL // dw0,1
+
+#define DTE_PPR (1L << 20) // dw1
+#define DTE_GPRP (1L << 21) // dw1
+#define DTE_GIOV (1L << 22) // dw1
+#define DTE_GV (1L << 23) // dw1
+#define DTE_IR (1L << 29) // dw1
+#define DTE_IW (1L << 30) // dw1
+
+#define DTE_DID_MASK 0xFFFF // dw2
+
+#define DTE_IV (1L << 0) // dw3
+#define DTE_SE (1L << 1)
+#define DTE_SA (1L << 2)
+#define DTE_INTTABLEN_SHIFT 1
+#define DTE_INTTABLEN_MASK 0xF
+#define DTE_IRTP_MASK 0x000FFFFFFFFFFFC0LL
+
+#define PTE_LVL5                48
+#define PTE_LVL4                39
+#define PTE_LVL3                30
+#define PTE_LVL2                21
+#define PTE_LVL1                12
+
+#define PTE_NXTLVL(x)           (((x) & 0x7) << 9)
+#define PTE_PADDR_MASK 0x000FFFFFFFFFF000LL
+#define PTE_IR                  (1LL << 61)
+#define PTE_IW                  (1LL << 62)
+
+#define DTE_GCR312_MASK 0x3
+#define DTE_GCR312_SHIFT 24
+
+#define DTE_GCR315_MASK 0xFFFF
+#define DTE_GCR315_SHIFT 16
+
+#define DTE_GCR331_MASK 0xFFFFF
+#define DTE_GCR331_SHIFT 12
+
+#define _get64(x)   *(uint64_t *)(x)
+#define _put64(x,v) *(uint64_t *)(x) = (v)
+
+/* Set Guest CR3 address */
+static inline void
+dte_set_guest_cr3(struct ivhd_dte *dte, paddr_t paddr)
+{
+ iommu_rmw32(&dte->dw1, DTE_GCR312_MASK, DTE_GCR312_SHIFT, paddr >> 12);
+ iommu_rmw32(&dte->dw2, DTE_GCR315_MASK, DTE_GCR315_SHIFT, paddr >> 15);
+ iommu_rmw32(&dte->dw3, DTE_GCR331_MASK, DTE_GCR331_SHIFT, paddr >> 31);
+}
+
+/* Set Interrupt Remapping Root Pointer */
+static inline void
+dte_set_interrupt_table_root_ptr(struct ivhd_dte *dte, paddr_t paddr)
+{
+ uint64_t ov = _get64(&dte->dw4);
+ _put64(&dte->dw4, (ov & ~DTE_IRTP_MASK) | (paddr & DTE_IRTP_MASK));
+}
+
+/* Set Interrupt Remapping Table length */
+static inline void
+dte_set_interrupt_table_length(struct ivhd_dte *dte, int nEnt)
+{
+ iommu_rmw32(&dte->dw4, DTE_INTTABLEN_MASK, DTE_INTTABLEN_SHIFT, nEnt);
+}
+
+/* Set Interrupt Remapping Valid */
+static inline void
+dte_set_interrupt_valid(struct ivhd_dte *dte)
+{
+ dte->dw4 |= DTE_IV;
+}
+
+/* Set Domain ID in Device Table Entry */
+static inline void
+dte_set_domain(struct ivhd_dte *dte, uint16_t did)
+{
+ dte->dw2 = (dte->dw2 & ~DTE_DID_MASK) | (did & DTE_DID_MASK);
+}
+
+/* Set Page Table Pointer for device */
+static inline void
+dte_set_host_page_table_root_ptr(struct ivhd_dte *dte, paddr_t paddr)
+{
+ uint64_t ov;
+
+ ov = _get64(&dte->dw0) & ~DTE_HPTRP_MASK;
+ ov |= (paddr & DTE_HPTRP_MASK) | PTE_IW | PTE_IR;
+
+ _put64(&dte->dw0, ov);
+}
+
+/* Set Page Table Levels Mask */
+static inline void
+dte_set_mode(struct ivhd_dte *dte, int mode)
+{
+ iommu_rmw32(&dte->dw0, DTE_LEVEL_MASK, DTE_LEVEL_SHIFT, mode);
+}
+
+static inline void
+dte_set_tv(struct ivhd_dte *dte)
+{
+ dte->dw0 |= DTE_TV;
+}
+
+/* Set Device Table Entry valid.
+ * Domain/Level/Mode/PageTable should already be set
+ */
+static inline void
+dte_set_valid(struct ivhd_dte *dte)
+{
+ dte->dw0 |= DTE_V;
+}
+
+/* Check if Device Table Entry is valid */
+static inline int
+dte_is_valid(struct ivhd_dte *dte)
+{
+ return (dte->dw0 & DTE_V);
+}
+
+/*=========================================
+ * COMMAND
+ *=========================================*/
+struct ivhd_command {
+ uint32_t dw0;
+ uint32_t dw1;
+ uint32_t dw2;
+ uint32_t dw3;
+} __packed;
+
+#define CMD_SHIFT 28
+
+enum {
+ COMPLETION_WAIT = 0x01,
+ INVALIDATE_DEVTAB_ENTRY = 0x02,
+ INVALIDATE_IOMMU_PAGES = 0x03,
+ INVALIDATE_IOTLB_PAGES = 0x04,
+ INVALIDATE_INTERRUPT_TABLE = 0x05,
+ PREFETCH_IOMMU_PAGES = 0x06,
+ COMPLETE_PPR_REQUEST = 0x07,
+ INVALIDATE_IOMMU_ALL = 0x08,
+};
+
+/*=========================================
+ * EVENT
+ *=========================================*/
+struct ivhd_event {
+ uint32_t dw0;
+ uint32_t dw1;
+ uint32_t dw2;   // address.lo
+ uint32_t dw3; // address.hi
+} __packed;
+
+#define EVT_TYPE_SHIFT 28       // dw1.0xF0000000
+#define EVT_TYPE_MASK 0xF
+#define EVT_SID_SHIFT 0        // dw0.0x0000FFFF
+#define EVT_SID_MASK 0xFFFF
+#define EVT_DID_SHIFT 0
+#define EVT_DID_MASK 0xFFFF   // dw1.0x0000FFFF
+#define EVT_FLAG_SHIFT   16
+#define EVT_FLAG_MASK   0xFFF    // dw1.0x0FFF0000
+
+/* IOMMU Fault reasons */
+enum {
+ ILLEGAL_DEV_TABLE_ENTRY = 0x1,
+ IO_PAGE_FAULT = 0x2,
+ DEV_TAB_HARDWARE_ERROR = 0x3,
+ PAGE_TAB_HARDWARE_ERROR = 0x4,
+ ILLEGAL_COMMAND_ERROR = 0x5,
+ COMMAND_HARDWARE_ERROR = 0x6,
+ IOTLB_INV_TIMEOUT = 0x7,
+ INVALID_DEVICE_REQUEST = 0x8,
+};
+
+#define EVT_GN (1L << 16)
+#define EVT_NX (1L << 17)
+#define EVT_US (1L << 18)
+#define EVT_I (1L << 19)
+#define EVT_PR (1L << 20)
+#define EVT_RW (1L << 21)
+#define EVT_PE (1L << 22)
+#define EVT_RZ (1L << 23)
+#define EVT_TR (1L << 24)
+
+struct iommu_softc;
+
+int ivhd_flush_devtab(struct iommu_softc *, int);
+int ivhd_invalidate_iommu_all(struct iommu_softc *);
+int ivhd_invalidate_interrupt_table(struct iommu_softc *, int);
+int ivhd_issue_command(struct iommu_softc *, const struct ivhd_command *, int);
+int ivhd_invalidate_domain(struct iommu_softc *, int);
+
+void _dumppte(struct pte_entry *, int, vaddr_t);
+
+#endif
diff --git a/sys/dev/acpi/files.acpi b/sys/dev/acpi/files.acpi
index e57c39938..1cf6f2fbb 100644
--- a/sys/dev/acpi/files.acpi
+++ b/sys/dev/acpi/files.acpi
@@ -70,6 +70,11 @@ device acpiprt
 attach acpiprt at acpi
 file dev/acpi/acpiprt.c acpiprt needs-flag
 
+# DMAR device
+device acpidmar
+attach acpidmar at acpi
+file dev/acpi/acpidmar.c acpidmar
+
 # Docking station
 device acpidock
 attach acpidock at acpi

Reply | Threaded
Open this post in threaded view
|

Re: [PATCH] Add IOMMU support for Intel VT-d and AMD-Vi

Mark Kettenis
> Date: Tue, 8 Sep 2020 21:43:39 -0500
> From: Jordan Hargrave <[hidden email]>
>
> Made changes for the iommu_readq -> iommu_read_8 and also now
> dynamically allocate the hwdte for AMD IOMMU.

Some more bits...

> On Fri, Sep 04, 2020 at 09:17:18PM +0200, Mark Kettenis wrote:
> > > Date: Fri, 4 Sep 2020 00:50:44 -0500
> > > From: Jordan Hargrave <[hidden email]>
> >
> > A few hints below...
> >
> > > > > +
> > > > > +/* Page Table Entry per domain */
> > > > > +static struct ivhd_dte hwdte[65536] __aligned(PAGE_SIZE);
> > > > > +
> > > > > +/* Alias mapping */
> > > > > +#define SID_INVALID 0x80000000L
> > > > > +static uint32_t sid_flag[65536];
> > > >
> > > > Can we avoid having these large arrays, or at least allocate them
> > > > dynamically?  That would also avoid the explicit alignment which is
> > > > somewhat nasty since it affects the entire kernel.
> > >
> > > OK. But the hwdte does need the 2M area to be all contiguous but it is not
> > > needed for DMAR/Intel.  You *can* have up to 8 different device table entries
> > > though to split up the area.
> >
> > The appropriate interface to use in this context is
> > bus_dmamem_alloc(9).  You can specify alignment, and if you set nsegs
> > to 1, you will get memory that is physicaly contiguous.
> >
> > To map the memory into kernel address space you'll need create a map
> > using bus_dmamap_create(9) and map it using bus_dmamem_map(9).  Then
> > instead of using pmap_extract(9) you use bus_dmamap_load_raw(9) which
> > then populates the physical addresses.
> >
> > Many of the drivers written by dlg@ define convenience functions to do
> > all these steps, although interestingly enough he tends to use
> > bus_dmamap_load(9) instead of bus_dmamap_load_raw(9) which is
> > sub-optimal.
> >
> > > > > +
> > > > > +struct domain_dev {
> > > > > + int sid;
> > > > > + int sec;
> > > > > + int sub;
> > > > > + TAILQ_ENTRY(domain_dev) link;
> > > > > +};
> > > > > +
> > > > > +struct domain {
> > > > > + struct iommu_softc *iommu;
> > > > > + int did;
> > > > > + int gaw;
> > > > > + struct pte_entry *pte;
> > > > > + paddr_t ptep;
> > > > > + struct bus_dma_tag dmat;
> > > > > + int flag;
> > > > > +
> > > > > + struct mutex            exlck;
> > > > > + char exname[32];
> > > > > + struct extent *iovamap;
> > > > > + TAILQ_HEAD(,domain_dev) devices;
> > > > > + TAILQ_ENTRY(domain) link;
> > > > > +};
> > > > > +
> > > > > +#define DOM_DEBUG 0x1
> > > > > +#define DOM_NOMAP 0x2
> > > > > +
> > > > > +struct dmar_devlist {
> > > > > + int type;
> > > > > + int bus;
> > > > > + int ndp;
> > > > > + struct acpidmar_devpath *dp;
> > > > > + TAILQ_ENTRY(dmar_devlist) link;
> > > > > +};
> > > > > +
> > > > > +TAILQ_HEAD(devlist_head, dmar_devlist);
> > > > > +
> > > > > +struct ivhd_devlist {
> > > > > + int start_id;
> > > > > + int end_id;
> > > > > + int cfg;
> > > > > + TAILQ_ENTRY(ivhd_devlist) link;
> > > > > +};
> > > > > +
> > > > > +struct rmrr_softc {
> > > > > + TAILQ_ENTRY(rmrr_softc) link;
> > > > > + struct devlist_head devices;
> > > > > + int segment;
> > > > > + uint64_t start;
> > > > > + uint64_t end;
> > > > > +};
> > > > > +
> > > > > +struct atsr_softc {
> > > > > + TAILQ_ENTRY(atsr_softc) link;
> > > > > + struct devlist_head devices;
> > > > > + int segment;
> > > > > + int flags;
> > > > > +};
> > > > > +
> > > > > +struct iommu_pic {
> > > > > + struct pic pic;
> > > > > + struct iommu_softc *iommu;
> > > > > +};
> > > > > +
> > > > > +#define IOMMU_FLAGS_CATCHALL 0x1
> > > > > +#define IOMMU_FLAGS_BAD 0x2
> > > > > +#define IOMMU_FLAGS_SUSPEND 0x4
> > > > > +
> > > > > +struct iommu_softc {
> > > > > + TAILQ_ENTRY(iommu_softc)link;
> > > > > + struct devlist_head devices;
> > > > > + int id;
> > > > > + int flags;
> > > > > + int segment;
> > > > > +
> > > > > + struct mutex reg_lock;
> > > > > +
> > > > > + bus_space_tag_t iot;
> > > > > + bus_space_handle_t ioh;
> > > > > +
> > > > > + uint64_t cap;
> > > > > + uint64_t ecap;
> > > > > + uint32_t gcmd;
> > > > > +
> > > > > + int mgaw;
> > > > > + int agaw;
> > > > > + int ndoms;
> > > > > +
> > > > > + struct root_entry *root;
> > > > > + struct context_entry *ctx[256];
> > > > > +
> > > > > + void *intr;
> > > > > + struct iommu_pic pic;
> > > > > + int fedata;
> > > > > + uint64_t feaddr;
> > > > > + uint64_t rtaddr;
> > > > > +
> > > > > + // Queued Invalidation
> > > > > + int qi_head;
> > > > > + int qi_tail;
> > > > > + paddr_t qip;
> > > > > + struct qi_entry *qi;
> > > > > +
> > > > > + struct domain *unity;
> > > > > + TAILQ_HEAD(,domain) domains;
> > > > > +
> > > > > + // AMD iommu
> > > > > + struct ivhd_dte         *dte;
> > > > > + void *cmd_tbl;
> > > > > + void *evt_tbl;
> > > > > + paddr_t cmd_tblp;
> > > > > + paddr_t evt_tblp;
> > > > > + uint64_t wv[128] __aligned(4096);
> > > >
> > > > This wv array isn't used as far as I can tell.
> > >
> > > Ah I was doing some testing on the commands.. I keep getting iommu
> > > command timeouts
> >
> > Hmm, yes, using the wv variable on the stack as you do below is a bit
> > suspect.  Using __aligned() for stack variables may not give you the
> > proper alignment if the stack alignment is smaller.
> >
> diff --git a/sys/arch/amd64/conf/GENERIC b/sys/arch/amd64/conf/GENERIC
> index 2c49f91a1..1eda12bc9 100644
> --- a/sys/arch/amd64/conf/GENERIC
> +++ b/sys/arch/amd64/conf/GENERIC
> @@ -45,6 +45,7 @@ acpibtn* at acpi?
>  acpicpu* at acpi?
>  acpicmos* at acpi?
>  acpidock* at acpi?
> +acpidmar0 at acpi?
>  acpiec* at acpi?
>  acpipci* at acpi?
>  acpiprt* at acpi?
> diff --git a/sys/arch/amd64/conf/RAMDISK b/sys/arch/amd64/conf/RAMDISK
> index 10148add1..7d4134000 100644
> --- a/sys/arch/amd64/conf/RAMDISK
> +++ b/sys/arch/amd64/conf/RAMDISK
> @@ -34,6 +34,7 @@ acpipci* at acpi?
>  acpiprt* at acpi?
>  acpimadt0 at acpi?
>  #acpitz* at acpi?
> +acpidmar0 at acpi? disable

That shouldn't be here.

What you need to do i use the needs-flag attribute in files.acpi.  See below.

>  mpbios0 at bios0
>  
> diff --git a/sys/arch/amd64/conf/RAMDISK_CD b/sys/arch/amd64/conf/RAMDISK_CD
> index 91022751e..d043ff8dd 100644
> --- a/sys/arch/amd64/conf/RAMDISK_CD
> +++ b/sys/arch/amd64/conf/RAMDISK_CD
> @@ -48,6 +48,7 @@ sdhc* at acpi?
>  acpihve* at acpi?
>  chvgpio*        at acpi?
>  glkgpio* at acpi?
> +acpidmar0 at acpi? disable

Shouldn't be here either.

>  mpbios0 at bios0
>  
> diff --git a/sys/arch/amd64/conf/files.amd64 b/sys/arch/amd64/conf/files.amd64
> index 7a5d40bf4..74c7fe5a9 100644
> --- a/sys/arch/amd64/conf/files.amd64
> +++ b/sys/arch/amd64/conf/files.amd64
> @@ -132,6 +132,10 @@ device pchb: pcibus, agpbus
>  attach pchb at pci
>  file arch/amd64/pci/pchb.c pchb
>  
> +device vmmpci
> +attach vmmpci at pci
> +file   arch/amd64/pci/vmmpci.c vmmpci
> +

Shouldn't be part of this diff.

>  # AMAS AMD memory address switch
>  device amas
>  attach amas at pci
> diff --git a/sys/arch/amd64/include/pci_machdep.h b/sys/arch/amd64/include/pci_machdep.h
> index bc295cc22..c725bdc73 100644
> --- a/sys/arch/amd64/include/pci_machdep.h
> +++ b/sys/arch/amd64/include/pci_machdep.h
> @@ -91,7 +91,8 @@ void *pci_intr_establish_cpu(pci_chipset_tag_t, pci_intr_handle_t,
>      int, struct cpu_info *,
>      int (*)(void *), void *, const char *);
>  void pci_intr_disestablish(pci_chipset_tag_t, void *);
> -#define pci_probe_device_hook(c, a) (0)
> +int pci_probe_device_hook(pci_chipset_tag_t,
> +    struct pci_attach_args *);
>  
>  void pci_dev_postattach(struct device *, struct pci_attach_args *);
>  
> diff --git a/sys/arch/amd64/pci/pci_machdep.c b/sys/arch/amd64/pci/pci_machdep.c
> index cf4e835de..b700946a4 100644
> --- a/sys/arch/amd64/pci/pci_machdep.c
> +++ b/sys/arch/amd64/pci/pci_machdep.c
> @@ -89,6 +89,11 @@
>  #include <machine/mpbiosvar.h>
>  #endif
>  
> +#include "acpi.h"
> +#if NACPI > 0
> +#include <dev/acpi/acpidmar.h>
> +#endif
> +

With the "needs-flag" fro acpidmar, you can do:

#include "acpidmar.h"
#if NACPIDMAR > 0
#include <dev/acpi/acpidmar.h>
#endif

>  /*
>   * Memory Mapped Configuration space access.
>   *
> @@ -797,7 +802,15 @@ pci_init_extents(void)
>   }
>  }
>  
> -#include "acpi.h"
> +int
> +pci_probe_device_hook(pci_chipset_tag_t pc, struct pci_attach_args *pa)
> +{
> +#if NACPI > 0

#if NACPIDMAR > 0

> + acpidmar_pci_hook(pc, pa);
> +#endif
> + return 0;
> +}
> +
>  #if NACPI > 0
>  void acpi_pci_match(struct device *, struct pci_attach_args *);
>  pcireg_t acpi_pci_min_powerstate(pci_chipset_tag_t, pcitag_t);
> diff --git a/sys/dev/acpi/acpi.c b/sys/dev/acpi/acpi.c
> index a6239198e..ea11483ad 100644
> --- a/sys/dev/acpi/acpi.c
> +++ b/sys/dev/acpi/acpi.c
> @@ -49,6 +49,7 @@
>  #include <dev/acpi/amltypes.h>
>  #include <dev/acpi/acpidev.h>
>  #include <dev/acpi/dsdt.h>
> +#include <dev/acpi/acpidmar.h>

Must be made conditional.  This file is used on arm64 as well, and
there won't be acpidmar there.  Actually, you may not need this header
at all.

>  #include <dev/wscons/wsdisplayvar.h>
>  
>  #include <dev/pci/pcidevs.h>
> @@ -2448,6 +2449,8 @@ acpi_sleep_pm(struct acpi_softc *sc, int state)
>      sc->sc_fadt->pm2_cnt_blk && sc->sc_fadt->pm2_cnt_len)
>   acpi_write_pmreg(sc, ACPIREG_PM2_CNT, 0, ACPI_PM2_ARB_DIS);
>  
> + acpidmar_sw(DVACT_SUSPEND);
> +

This should be handled by having an activate function in acpidmar(4).

>   /* Write SLP_TYPx values */
>   rega = acpi_read_pmreg(sc, ACPIREG_PM1A_CNT, 0);
>   regb = acpi_read_pmreg(sc, ACPIREG_PM1B_CNT, 0);
> @@ -2483,6 +2486,8 @@ acpi_resume_pm(struct acpi_softc *sc, int fromstate)
>  {
>   uint16_t rega, regb, en;
>  
> + acpidmar_sw(DVACT_RESUME);
> +

Should be handled by that activate function as well.

>   /* Write SLP_TYPx values */
>   rega = acpi_read_pmreg(sc, ACPIREG_PM1A_CNT, 0);
>   regb = acpi_read_pmreg(sc, ACPIREG_PM1B_CNT, 0);
> diff --git a/sys/dev/acpi/acpidmar.c b/sys/dev/acpi/acpidmar.c
> new file mode 100644
> index 000000000..e7daddc43
> --- /dev/null
> +++ b/sys/dev/acpi/acpidmar.c
> @@ -0,0 +1,3028 @@
> +/*
> + * Copyright (c) 2015 Jordan Hargrave <[hidden email]>
> + *
> + * Permission to use, copy, modify, and distribute this software for any
> + * purpose with or without fee is hereby granted, provided that the above
> + * copyright notice and this permission notice appear in all copies.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
> + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
> + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
> + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
> + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
> + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
> + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
> + */
> +
> +#include <sys/param.h>
> +#include <sys/systm.h>
> +#include <sys/kernel.h>
> +#include <sys/device.h>
> +#include <sys/malloc.h>
> +#include <sys/queue.h>
> +#include <sys/types.h>
> +#include <sys/mbuf.h>
> +#include <sys/proc.h>
> +
> +#include <uvm/uvm_extern.h>
> +
> +#include <machine/apicvar.h>
> +#include <machine/biosvar.h>
> +#include <machine/cpuvar.h>
> +#include <machine/bus.h>
> +
> +#include <dev/acpi/acpireg.h>
> +#include <dev/acpi/acpivar.h>
> +#include <dev/acpi/acpidev.h>
> +#include <dev/acpi/amltypes.h>
> +#include <dev/acpi/dsdt.h>
> +
> +#include <uvm/uvm_extern.h>
> +
> +#include <machine/i8259.h>
> +#include <machine/i82093reg.h>
> +#include <machine/i82093var.h>
> +#include <machine/i82489reg.h>
> +#include <machine/i82489var.h>
> +
> +#include <machine/mpbiosvar.h>
> +
> +#include <dev/pci/pcireg.h>
> +#include <dev/pci/pcivar.h>
> +#include <dev/pci/pcidevs.h>
> +#include <dev/pci/ppbreg.h>
> +
> +#include "ioapic.h"
> +
> +#include "acpidmar.h"
> +#include "amd_iommu.h"
> +
> +#define dprintf(x...)
> +
> +#ifdef DDB
> +int acpidmar_ddb = 0;
> +#endif
> +
> +int intel_iommu_gfx_mapped = 0;
> +int force_cm = 1;
> +
> +void showahci(void *);
> +
> +/* Page Table Entry per domain */
> +struct iommu_softc;
> +
> +static inline int
> +mksid(int b, int d, int f)
> +{
> + return (b << 8) + (d << 3) + f;
> +}
> +
> +static inline int
> +sid_devfn(int sid)
> +{
> + return sid & 0xff;
> +}
> +
> +static inline int
> +sid_bus(int sid)
> +{
> + return (sid >> 8) & 0xff;
> +}
> +
> +static inline int
> +sid_dev(int sid)
> +{
> + return (sid >> 3) & 0x1f;
> +}
> +
> +static inline int
> +sid_fun(int sid)
> +{
> + return (sid >> 0) & 0x7;
> +}
> +
> +/* Alias mapping */
> +#define SID_INVALID 0x80000000L
> +static uint32_t sid_flag[65536];
> +
> +struct domain_dev {
> + int sid;
> + int sec;
> + int sub;
> + TAILQ_ENTRY(domain_dev) link;
> +};
> +
> +struct domain {
> + struct iommu_softc *iommu;
> + int did;
> + int gaw;
> + struct pte_entry *pte;
> + paddr_t ptep;
> + struct bus_dma_tag dmat;
> + int flag;
> +
> + struct mutex            exlck;
> + char exname[32];
> + struct extent *iovamap;
> + TAILQ_HEAD(,domain_dev) devices;
> + TAILQ_ENTRY(domain) link;
> +};
> +
> +#define DOM_DEBUG 0x1
> +#define DOM_NOMAP 0x2
> +
> +struct dmar_devlist {
> + int type;
> + int bus;
> + int ndp;
> + struct acpidmar_devpath *dp;
> + TAILQ_ENTRY(dmar_devlist) link;
> +};
> +
> +TAILQ_HEAD(devlist_head, dmar_devlist);
> +
> +struct ivhd_devlist {
> + int start_id;
> + int end_id;
> + int cfg;
> + TAILQ_ENTRY(ivhd_devlist) link;
> +};
> +
> +struct rmrr_softc {
> + TAILQ_ENTRY(rmrr_softc) link;
> + struct devlist_head devices;
> + int segment;
> + uint64_t start;
> + uint64_t end;
> +};
> +
> +struct atsr_softc {
> + TAILQ_ENTRY(atsr_softc) link;
> + struct devlist_head devices;
> + int segment;
> + int flags;
> +};
> +
> +struct iommu_pic {
> + struct pic pic;
> + struct iommu_softc *iommu;
> +};
> +
> +#define IOMMU_FLAGS_CATCHALL 0x1
> +#define IOMMU_FLAGS_BAD 0x2
> +#define IOMMU_FLAGS_SUSPEND 0x4
> +
> +struct iommu_softc {
> + TAILQ_ENTRY(iommu_softc)link;
> + struct devlist_head devices;
> + int id;
> + int flags;
> + int segment;
> +
> + struct mutex reg_lock;
> +
> + bus_space_tag_t iot;
> + bus_space_handle_t ioh;
> +
> + uint64_t cap;
> + uint64_t ecap;
> + uint32_t gcmd;
> +
> + int mgaw;
> + int agaw;
> + int ndoms;
> +
> + struct root_entry *root;
> + struct context_entry *ctx[256];
> +
> + void *intr;
> + struct iommu_pic pic;
> + int fedata;
> + uint64_t feaddr;
> + uint64_t rtaddr;
> +
> + // Queued Invalidation
> + int qi_head;
> + int qi_tail;
> + paddr_t qip;
> + struct qi_entry *qi;
> +
> + struct domain *unity;
> + TAILQ_HEAD(,domain) domains;
> +
> + // AMD iommu
> + struct ivhd_dte         *dte;
> + void *cmd_tbl;
> + void *evt_tbl;
> + paddr_t cmd_tblp;
> + paddr_t evt_tblp;
> +};
> +
> +static inline int iommu_bad(struct iommu_softc *sc)
> +{
> + return (sc->flags & IOMMU_FLAGS_BAD);
> +}
> +
> +static inline int iommu_enabled(struct iommu_softc *sc)
> +{
> + if (sc->dte) {
> + return 1;
> + }
> + return (sc->gcmd & GCMD_TE);
> +}
> +
> +struct acpidmar_softc {
> + struct device sc_dev;
> +
> + pci_chipset_tag_t sc_pc;
> + bus_space_tag_t sc_memt;
> + int sc_haw;
> + int sc_flags;
> + struct bus_dma_tag sc_dmat;
> +
> + struct ivhd_dte *sc_hwdte;
> + paddr_t sc_hwdtep;
> +
> + TAILQ_HEAD(,iommu_softc)sc_drhds;
> + TAILQ_HEAD(,rmrr_softc) sc_rmrrs;
> + TAILQ_HEAD(,atsr_softc) sc_atsrs;
> +};
> +
> +int acpidmar_activate(struct device *, int);
> +int acpidmar_match(struct device *, void *, void *);
> +void acpidmar_attach(struct device *, struct device *, void *);
> +struct domain   *acpidmar_pci_attach(struct acpidmar_softc *, int, int, int);
> +
> +struct cfattach acpidmar_ca = {
> + sizeof(struct acpidmar_softc), acpidmar_match, acpidmar_attach,
> +};
> +
> +struct cfdriver acpidmar_cd = {
> + NULL, "acpidmar", DV_DULL
> +};
> +
> +struct acpidmar_softc *acpidmar_sc;
> +int acpidmar_intr(void *);
> +int acpiivhd_intr(void *);
> +
> +#define DID_UNITY 0x1
> +
> +void _dumppte(struct pte_entry *, int, vaddr_t);
> +
> +struct domain *domain_create(struct iommu_softc *, int);
> +struct domain *domain_lookup(struct acpidmar_softc *, int, int);
> +
> +void domain_unload_map(struct domain *, bus_dmamap_t);
> +void domain_load_map(struct domain *, bus_dmamap_t, int, int, const char *);
> +
> +void (*domain_map_page)(struct domain *, vaddr_t, paddr_t, uint64_t);
> +void domain_map_page_amd(struct domain *, vaddr_t, paddr_t, uint64_t);
> +void domain_map_page_intel(struct domain *, vaddr_t, paddr_t, uint64_t);
> +void domain_map_pthru(struct domain *, paddr_t, paddr_t);
> +
> +void acpidmar_pci_hook(pci_chipset_tag_t, struct pci_attach_args *);
> +void acpidmar_parse_devscope(union acpidmar_entry *, int, int,
> +    struct devlist_head *);
> +int  acpidmar_match_devscope(struct devlist_head *, pci_chipset_tag_t, int);
> +
> +void acpidmar_init(struct acpidmar_softc *, struct acpi_dmar *);
> +void acpidmar_drhd(struct acpidmar_softc *, union acpidmar_entry *);
> +void acpidmar_rmrr(struct acpidmar_softc *, union acpidmar_entry *);
> +void acpidmar_atsr(struct acpidmar_softc *, union acpidmar_entry *);
> +void acpiivrs_init(struct acpidmar_softc *, struct acpi_ivrs *);
> +
> +void *acpidmar_intr_establish(void *, int, int (*)(void *), void *,
> +    const char *);
> +
> +void iommu_write_4(struct iommu_softc *, int, uint32_t);
> +uint32_t iommu_read_4(struct iommu_softc *, int);
> +void iommu_write_8(struct iommu_softc *, int, uint64_t);
> +uint64_t iommu_read_8(struct iommu_softc *, int);
> +void iommu_showfault(struct iommu_softc *, int,
> +    struct fault_entry *);
> +void iommu_showcfg(struct iommu_softc *, int);
> +
> +int iommu_init(struct acpidmar_softc *, struct iommu_softc *,
> +    struct acpidmar_drhd *);
> +int iommu_enable_translation(struct iommu_softc *, int);
> +void iommu_enable_qi(struct iommu_softc *, int);
> +void iommu_flush_cache(struct iommu_softc *, void *, size_t);
> +void *iommu_alloc_page(struct iommu_softc *, paddr_t *);
> +void iommu_flush_write_buffer(struct iommu_softc *);
> +void iommu_issue_qi(struct iommu_softc *, struct qi_entry *);
> +
> +void iommu_flush_ctx(struct iommu_softc *, int, int, int, int);
> +void iommu_flush_ctx_qi(struct iommu_softc *, int, int, int, int);
> +void iommu_flush_tlb(struct iommu_softc *, int, int);
> +void iommu_flush_tlb_qi(struct iommu_softc *, int, int);
> +
> +void iommu_set_rtaddr(struct iommu_softc *, paddr_t);
> +
> +const char *dmar_bdf(int);
> +
> +void *iommu_alloc_contig(struct acpidmar_softc *sc, size_t size, paddr_t *paddr);
> +
> +const char *
> +dmar_bdf(int sid)
> +{
> + static char bdf[32];
> +
> + snprintf(bdf, sizeof(bdf), "%.4x:%.2x:%.2x.%x", 0,
> +    sid_bus(sid), sid_dev(sid), sid_fun(sid));
> +
> + return (bdf);
> +}
> +
> +/* busdma */
> +static int dmar_dmamap_create(bus_dma_tag_t, bus_size_t, int, bus_size_t,
> +    bus_size_t, int, bus_dmamap_t *);
> +static void dmar_dmamap_destroy(bus_dma_tag_t, bus_dmamap_t);
> +static int dmar_dmamap_load(bus_dma_tag_t, bus_dmamap_t, void *, bus_size_t,
> +    struct proc *, int);
> +static int dmar_dmamap_load_mbuf(bus_dma_tag_t, bus_dmamap_t, struct mbuf *,
> +    int);
> +static int dmar_dmamap_load_uio(bus_dma_tag_t, bus_dmamap_t, struct uio *, int);
> +static int dmar_dmamap_load_raw(bus_dma_tag_t, bus_dmamap_t,
> +    bus_dma_segment_t *, int, bus_size_t, int);
> +static void dmar_dmamap_unload(bus_dma_tag_t, bus_dmamap_t);
> +static void dmar_dmamap_sync(bus_dma_tag_t, bus_dmamap_t, bus_addr_t,
> +    bus_size_t, int);
> +static int dmar_dmamem_alloc(bus_dma_tag_t, bus_size_t, bus_size_t, bus_size_t,
> +    bus_dma_segment_t *, int, int *, int);
> +static void dmar_dmamem_free(bus_dma_tag_t, bus_dma_segment_t *, int);
> +static int dmar_dmamem_map(bus_dma_tag_t, bus_dma_segment_t *, int, size_t,
> +    caddr_t *, int);
> +static void dmar_dmamem_unmap(bus_dma_tag_t, caddr_t, size_t);
> +static paddr_t dmar_dmamem_mmap(bus_dma_tag_t, bus_dma_segment_t *, int, off_t,
> +    int, int);
> +
> +static void dmar_dumpseg(bus_dma_tag_t, int, bus_dma_segment_t *, const char *);
> +const char *dom_bdf(struct domain *);
> +void domain_map_check(struct domain *);
> +
> +struct pte_entry *pte_lvl(struct iommu_softc *, struct pte_entry *, vaddr_t, int, uint64_t);
> +int  ivhd_poll_events(struct iommu_softc *);
> +void ivhd_showit(struct iommu_softc *);
> +void ivhd_showdte(struct iommu_softc *);
> +void ivhd_showcmd(struct iommu_softc *);
> +
> +static inline int
> +debugme(struct domain *dom)
> +{
> + return 0;
> + return (dom->flag & DOM_DEBUG);
> +}
> +
> +void
> +domain_map_check(struct domain *dom)
> +{
> + struct iommu_softc *iommu;
> + struct domain_dev *dd;
> + struct context_entry *ctx;
> + int v;
> +
> + iommu = dom->iommu;
> + TAILQ_FOREACH(dd, &dom->devices, link) {
> + acpidmar_pci_attach(acpidmar_sc, iommu->segment, dd->sid, 1);
> +
> + if (iommu->dte)
> + continue;
> +
> + /* Check if this is the first time we are mapped */
> + ctx = &iommu->ctx[sid_bus(dd->sid)][sid_devfn(dd->sid)];
> + v = context_user(ctx);
> + if (v != 0xA) {
> + printf("  map: %.4x:%.2x:%.2x.%x iommu:%d did:%.4x\n",
> +    iommu->segment,
> +    sid_bus(dd->sid),
> +    sid_dev(dd->sid),
> +    sid_fun(dd->sid),
> +    iommu->id,
> +    dom->did);
> + context_set_user(ctx, 0xA);
> + }
> + }
> +}
> +
> +/* Map a single page as passthrough - used for DRM */
> +void
> +dmar_ptmap(bus_dma_tag_t tag, bus_addr_t addr)
> +{
> + struct domain *dom = tag->_cookie;
> +
> + if (!acpidmar_sc)
> + return;
> + domain_map_check(dom);
> + domain_map_page(dom, addr, addr, PTE_P | PTE_R | PTE_W);
> +}
> +
> +/* Map a range of pages 1:1 */
> +void
> +domain_map_pthru(struct domain *dom, paddr_t start, paddr_t end)
> +{
> + domain_map_check(dom);
> + while (start < end) {
> + domain_map_page(dom, start, start, PTE_P | PTE_R | PTE_W);
> + start += VTD_PAGE_SIZE;
> + }
> +}
> +
> +/* Map a single paddr to IOMMU paddr */
> +void
> +domain_map_page_intel(struct domain *dom, vaddr_t va, paddr_t pa, uint64_t flags)
> +{
> + paddr_t paddr;
> + struct pte_entry *pte, *npte;
> + int lvl, idx;
> + struct iommu_softc *iommu;
> +
> + iommu = dom->iommu;
> + /* Insert physical address into virtual address map
> + * XXX: could we use private pmap here?
> + * essentially doing a pmap_enter(map, va, pa, prot);
> + */
> +
> + /* Only handle 4k pages for now */
> + npte = dom->pte;
> + for (lvl = iommu->agaw - VTD_STRIDE_SIZE; lvl>= VTD_LEVEL0;
> +    lvl -= VTD_STRIDE_SIZE) {
> + idx = (va >> lvl) & VTD_STRIDE_MASK;
> + pte = &npte[idx];
> + if (lvl == VTD_LEVEL0) {
> + /* Level 1: Page Table - add physical address */
> + pte->val = pa | flags;
> + iommu_flush_cache(iommu, pte, sizeof(*pte));
> + break;
> + } else if (!(pte->val & PTE_P)) {
> + /* Level N: Point to lower level table */
> + iommu_alloc_page(iommu, &paddr);
> + pte->val = paddr | PTE_P | PTE_R | PTE_W;
> + iommu_flush_cache(iommu, pte, sizeof(*pte));
> + }
> + npte = (void *)PMAP_DIRECT_MAP((pte->val & VTD_PTE_MASK));
> + }
> +}
> +
> +/* Map a single paddr to IOMMU paddr: AMD
> + * physical address breakdown into levels:
> + * xxxxxxxx.xxxxxxxx.xxxxxxxx.xxxxxxxx.xxxxxxxx.xxxxxxxx.xxxxxxxx.xxxxxxxx
> + *        5.55555555.44444444.43333333,33222222.22211111.1111----.--------
> + * mode:
> + *  000 = none   shift
> + *  001 = 1 [21].12
> + *  010 = 2 [30].21
> + *  011 = 3 [39].30
> + *  100 = 4 [48].39
> + *  101 = 5 [57]
> + *  110 = 6
> + *  111 = reserved
> + */
> +struct pte_entry *
> +pte_lvl(struct iommu_softc *iommu, struct pte_entry *pte, vaddr_t va,
> + int shift, uint64_t flags)
> +{
> + paddr_t paddr;
> + int idx;
> +
> + idx = (va >> shift) & VTD_STRIDE_MASK;
> + if (!(pte[idx].val & PTE_P)) {
> + /* Page Table entry is not present... create a new page entry */
> + iommu_alloc_page(iommu, &paddr);
> + pte[idx].val = paddr | flags;
> + iommu_flush_cache(iommu, &pte[idx], sizeof(pte[idx]));
> + }
> + return (void *)PMAP_DIRECT_MAP((pte[idx].val & PTE_PADDR_MASK));
> +}
> +
> +void
> +domain_map_page_amd(struct domain *dom, vaddr_t va, paddr_t pa, uint64_t flags)
> +{
> + struct pte_entry *pte;
> + struct iommu_softc *iommu;
> + int idx;
> +
> + iommu = dom->iommu;
> + /* Insert physical address into virtual address map
> + * XXX: could we use private pmap here?
> + * essentially doing a pmap_enter(map, va, pa, prot);
> + */
> +
> + /* Always assume AMD levels=4                           */
> + /*        39        30        21        12              */
> + /* ---------|---------|---------|---------|------------ */
> + pte = dom->pte;
> + //pte = pte_lvl(iommu, pte, va, 39, PTE_NXTLVL(3) | PTE_IR | PTE_IW | PTE_P);
> + pte = pte_lvl(iommu, pte, va, 30, PTE_NXTLVL(2) | PTE_IR | PTE_IW | PTE_P);
> + pte = pte_lvl(iommu, pte, va, 21, PTE_NXTLVL(1) | PTE_IR | PTE_IW | PTE_P);
> + //pte = pte_lvl(iommu, pte, va, 12, PTE_NXTLVL(7) | PTE_IR | PTE_IW | PTE_P);
> +
> + if (flags)
> + flags = PTE_P | PTE_R | PTE_W | PTE_IW | PTE_IR | PTE_NXTLVL(0);
> +
> + /* Level 1: Page Table - add physical address */
> + idx = (va >> 12) & 0x1FF;
> + pte[idx].val = pa | flags;
> +
> + iommu_flush_cache(iommu, pte, sizeof(*pte));
> +}
> +
> +static void
> +dmar_dumpseg(bus_dma_tag_t tag, int nseg, bus_dma_segment_t *segs,
> +    const char *lbl)
> +{
> + struct domain *dom = tag->_cookie;
> + int i;
> +
> + return;
> + if (!debugme(dom))
> + return;
> + printf("%s: %s\n", lbl, dom_bdf(dom));
> + for (i = 0; i < nseg; i++) {
> + printf("  %.16llx %.8x\n",
> +    (uint64_t)segs[i].ds_addr,
> +    (uint32_t)segs[i].ds_len);
> + }
> +}
> +
> +/* Unload mapping */
> +void
> +domain_unload_map(struct domain *dom, bus_dmamap_t dmam)
> +{
> + bus_dma_segment_t *seg;
> + paddr_t base, end, idx;
> + psize_t alen;
> + int i;
> +
> + if (iommu_bad(dom->iommu)) {
> + printf("unload map no iommu\n");
> + return;
> + }
> +
> + //acpidmar_intr(dom->iommu);
> + for (i = 0; i < dmam->dm_nsegs; i++) {
> + seg  = &dmam->dm_segs[i];
> +
> + base = trunc_page(seg->ds_addr);
> + end  = roundup(seg->ds_addr + seg->ds_len, VTD_PAGE_SIZE);
> + alen = end - base;
> +
> + if (debugme(dom)) {
> + printf("  va:%.16llx len:%x\n",
> +    (uint64_t)base, (uint32_t)alen);
> + }
> +
> + /* Clear PTE */
> + for (idx = 0; idx < alen; idx += VTD_PAGE_SIZE)
> + domain_map_page(dom, base + idx, 0, 0);
> +
> + if (dom->flag & DOM_NOMAP) {
> + //printf("%s: nomap %.16llx\n", dom_bdf(dom), (uint64_t)base);
> + continue;
> + }
> +
> + mtx_enter(&dom->exlck);
> + if (extent_free(dom->iovamap, base, alen, EX_NOWAIT)) {
> + panic("domain_unload_map: extent_free");
> + }
> + mtx_leave(&dom->exlck);
> + }
> +}
> +
> +/* map.segs[x].ds_addr is modified to IOMMU virtual PA */
> +void
> +domain_load_map(struct domain *dom, bus_dmamap_t map, int flags, int pteflag, const char *fn)
> +{
> + bus_dma_segment_t *seg;
> + struct iommu_softc *iommu;
> + paddr_t base, end, idx;
> + psize_t alen;
> + u_long res;
> + int i;
> +
> + iommu = dom->iommu;
> + if (!iommu_enabled(iommu)) {
> + /* Lazy enable translation when required */
> + if (iommu_enable_translation(iommu, 1)) {
> + return;
> + }
> + }
> + domain_map_check(dom);
> + //acpidmar_intr(iommu);
> + for (i = 0; i < map->dm_nsegs; i++) {
> + seg = &map->dm_segs[i];
> +
> + base = trunc_page(seg->ds_addr);
> + end  = roundup(seg->ds_addr + seg->ds_len, VTD_PAGE_SIZE);
> + alen = end - base;
> + res  = base;
> +
> + if (dom->flag & DOM_NOMAP) {
> + goto nomap;
> + }
> +
> + /* Allocate DMA Virtual Address */
> + mtx_enter(&dom->exlck);
> + if (extent_alloc(dom->iovamap, alen, VTD_PAGE_SIZE, 0,
> +    map->_dm_boundary, EX_NOWAIT, &res)) {
> + panic("domain_load_map: extent_alloc");
> + }
> + if (res == -1) {
> + panic("got -1 address\n");
> + }
> + mtx_leave(&dom->exlck);
> +
> + /* Reassign DMA address */
> + seg->ds_addr = res | (seg->ds_addr & VTD_PAGE_MASK);
> +nomap:
> + if (debugme(dom)) {
> + printf("  LOADMAP: %.16llx %x => %.16llx\n",
> +    (uint64_t)seg->ds_addr, (uint32_t)seg->ds_len,
> +    (uint64_t)res);
> + }
> + for (idx = 0; idx < alen; idx += VTD_PAGE_SIZE) {
> + domain_map_page(dom, res + idx, base + idx,
> +    PTE_P | pteflag);
> + }
> + }
> + if ((iommu->cap & CAP_CM) || force_cm) {
> + iommu_flush_tlb(iommu, IOTLB_DOMAIN, dom->did);
> + } else {
> + iommu_flush_write_buffer(iommu);
> + }
> +}
> +
> +const char *
> +dom_bdf(struct domain *dom)
> +{
> + struct domain_dev *dd;
> + static char mmm[48];
> +
> + dd = TAILQ_FIRST(&dom->devices);
> + snprintf(mmm, sizeof(mmm), "%s iommu:%d did:%.4x%s",
> +    dmar_bdf(dd->sid), dom->iommu->id, dom->did,
> +    dom->did == DID_UNITY ? " [unity]" : "");
> + return (mmm);
> +}
> +
> +/* Bus DMA Map functions */
> +static int
> +dmar_dmamap_create(bus_dma_tag_t tag, bus_size_t size, int nsegments,
> +    bus_size_t maxsegsz, bus_size_t boundary, int flags, bus_dmamap_t *dmamp)
> +{
> + int rc;
> +
> + rc = _bus_dmamap_create(tag, size, nsegments, maxsegsz, boundary,
> +    flags, dmamp);
> + if (!rc) {
> + dmar_dumpseg(tag, (*dmamp)->dm_nsegs, (*dmamp)->dm_segs,
> +    __FUNCTION__);
> + }
> + return (rc);
> +}
> +
> +static void
> +dmar_dmamap_destroy(bus_dma_tag_t tag, bus_dmamap_t dmam)
> +{
> + dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs, __FUNCTION__);
> + _bus_dmamap_destroy(tag, dmam);
> +}
> +
> +static int
> +dmar_dmamap_load(bus_dma_tag_t tag, bus_dmamap_t dmam, void *buf,
> +    bus_size_t buflen, struct proc *p, int flags)
> +{
> + struct domain *dom = tag->_cookie;
> + int rc;
> +
> + rc = _bus_dmamap_load(tag, dmam, buf, buflen, p, flags);
> + if (!rc) {
> + dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs,
> +    __FUNCTION__);
> + domain_load_map(dom, dmam, flags, PTE_R|PTE_W, __FUNCTION__);
> + dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs,
> +    __FUNCTION__);
> + }
> + return (rc);
> +}
> +
> +static int
> +dmar_dmamap_load_mbuf(bus_dma_tag_t tag, bus_dmamap_t dmam, struct mbuf *chain,
> +    int flags)
> +{
> + struct domain *dom = tag->_cookie;
> + int rc;
> +
> + rc = _bus_dmamap_load_mbuf(tag, dmam, chain, flags);
> + if (!rc) {
> + dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs,
> +    __FUNCTION__);
> + domain_load_map(dom, dmam, flags, PTE_R|PTE_W,__FUNCTION__);
> + dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs,
> +    __FUNCTION__);
> + }
> + return (rc);
> +}
> +
> +static int
> +dmar_dmamap_load_uio(bus_dma_tag_t tag, bus_dmamap_t dmam, struct uio *uio,
> +    int flags)
> +{
> + struct domain *dom = tag->_cookie;
> + int rc;
> +
> + rc = _bus_dmamap_load_uio(tag, dmam, uio, flags);
> + if (!rc) {
> + dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs,
> +    __FUNCTION__);
> + domain_load_map(dom, dmam, flags, PTE_R|PTE_W, __FUNCTION__);
> + dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs,
> +    __FUNCTION__);
> + }
> + return (rc);
> +}
> +
> +static int
> +dmar_dmamap_load_raw(bus_dma_tag_t tag, bus_dmamap_t dmam,
> +    bus_dma_segment_t *segs, int nsegs, bus_size_t size, int flags)
> +{
> + struct domain *dom = tag->_cookie;
> + int rc;
> +
> + rc = _bus_dmamap_load_raw(tag, dmam, segs, nsegs, size, flags);
> + if (!rc) {
> + dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs,
> +    __FUNCTION__);
> + domain_load_map(dom, dmam, flags, PTE_R|PTE_W, __FUNCTION__);
> + dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs,
> +    __FUNCTION__);
> + }
> + return (rc);
> +}
> +
> +static void
> +dmar_dmamap_unload(bus_dma_tag_t tag, bus_dmamap_t dmam)
> +{
> + struct domain *dom = tag->_cookie;
> +
> + dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs, __FUNCTION__);
> + domain_unload_map(dom, dmam);
> + _bus_dmamap_unload(tag, dmam);
> +}
> +
> +static void
> +dmar_dmamap_sync(bus_dma_tag_t tag, bus_dmamap_t dmam, bus_addr_t offset,
> +    bus_size_t len, int ops)
> +{
> +#if 0
> + struct domain *dom = tag->_cookie;
> + //int flag;
> +
> + flag = PTE_P;
> + //acpidmar_intr(dom->iommu);
> + if (ops == BUS_DMASYNC_PREREAD) {
> + /* make readable */
> + flag |= PTE_R;
> + }
> + else if (ops == BUS_DMASYNC_PREWRITE) {
> + /* make writeable */
> + flag |= PTE_W;
> + }
> + dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs, __FUNCTION__);
> +#endif
> + _bus_dmamap_sync(tag, dmam, offset, len, ops);
> +}
> +
> +static int
> +dmar_dmamem_alloc(bus_dma_tag_t tag, bus_size_t size, bus_size_t alignment,
> +    bus_size_t boundary, bus_dma_segment_t *segs, int nsegs, int *rsegs,
> +    int flags)
> +{
> + int rc;
> +
> + rc = _bus_dmamem_alloc(tag, size, alignment, boundary, segs, nsegs,
> +    rsegs, flags);
> + if (!rc) {
> + dmar_dumpseg(tag, *rsegs, segs, __FUNCTION__);
> + }
> + return (rc);
> +}
> +
> +static void
> +dmar_dmamem_free(bus_dma_tag_t tag, bus_dma_segment_t *segs, int nsegs)
> +{
> + dmar_dumpseg(tag, nsegs, segs, __FUNCTION__);
> + _bus_dmamem_free(tag, segs, nsegs);
> +}
> +
> +static int
> +dmar_dmamem_map(bus_dma_tag_t tag, bus_dma_segment_t *segs, int nsegs,
> +    size_t size, caddr_t *kvap, int flags)
> +{
> + dmar_dumpseg(tag, nsegs, segs, __FUNCTION__);
> + return (_bus_dmamem_map(tag, segs, nsegs, size, kvap, flags));
> +}
> +
> +static void
> +dmar_dmamem_unmap(bus_dma_tag_t tag, caddr_t kva, size_t size)
> +{
> + struct domain *dom = tag->_cookie;
> +
> + if (debugme(dom)) {
> + printf("dmamap_unmap: %s\n", dom_bdf(dom));
> + }
> + _bus_dmamem_unmap(tag, kva, size);
> +}
> +
> +static paddr_t
> +dmar_dmamem_mmap(bus_dma_tag_t tag, bus_dma_segment_t *segs, int nsegs,
> +    off_t off, int prot, int flags)
> +{
> + dmar_dumpseg(tag, nsegs, segs, __FUNCTION__);
> + return (_bus_dmamem_mmap(tag, segs, nsegs, off, prot, flags));
> +}
> +
> +/*===================================
> + * IOMMU code
> + *===================================*/
> +
> +/* Intel: Set Context Root Address */
> +void
> +iommu_set_rtaddr(struct iommu_softc *iommu, paddr_t paddr)
> +{
> + int i, sts;
> +
> + mtx_enter(&iommu->reg_lock);
> + iommu_write_8(iommu, DMAR_RTADDR_REG, paddr);
> + iommu_write_4(iommu, DMAR_GCMD_REG, iommu->gcmd | GCMD_SRTP);
> + for (i = 0; i < 5; i++) {
> + sts = iommu_read_4(iommu, DMAR_GSTS_REG);
> + if (sts & GSTS_RTPS)
> + break;
> + }
> + mtx_leave(&iommu->reg_lock);
> +
> + if (i == 5) {
> + printf("set_rtaddr fails\n");
> + }
> +}
> +
> +/* Allocate contiguous memory (1Mb) for the Device Table Entries */
> +void *
> +iommu_alloc_contig(struct acpidmar_softc *sc, size_t size, paddr_t *paddr)
> +{
> + caddr_t vaddr;
> + bus_dmamap_t map;
> + bus_dma_segment_t seg;
> + bus_dma_tag_t dmat;
> + int rc, nsegs;
> +
> + rc = _bus_dmamap_create(dmat, size, 1, size, 0,
> + BUS_DMA_NOWAIT, &map);
> + if (rc != 0) {
> + printf("hwdte_create fails\n");
> + return NULL;
> + }
> + rc = _bus_dmamem_alloc(dmat, size, 4, 0, &seg, 1,
> + &nsegs, BUS_DMA_NOWAIT | BUS_DMA_ZERO);
> + if (rc != 0) {
> + printf("hwdte alloc fails\n");
> + return NULL;
> + }
> + rc = _bus_dmamem_map(dmat, &seg, 1, size, &vaddr,
> + BUS_DMA_NOWAIT | BUS_DMA_COHERENT);
> + if (rc != 0) {
> + printf("hwdte map fails\n");
> + return NULL;
> + }
> + rc = _bus_dmamap_load_raw(dmat, map, &seg, 1, size, BUS_DMA_NOWAIT);
> + if (rc != 0) {
> + printf("hwdte load raw fails\n");
> + return NULL;
> + }
> + *paddr = map->dm_segs[0].ds_addr;
> + printf("hwdte: Got P:%lx V:%p\n", *paddr, vaddr);
> + return vaddr;
> +}
> +
> +/* COMMON: Allocate a new memory page */
> +void *
> +iommu_alloc_page(struct iommu_softc *iommu, paddr_t *paddr)
> +{
> + void *va;
> +
> + *paddr = 0;
> + va = km_alloc(VTD_PAGE_SIZE, &kv_page, &kp_zero, &kd_nowait);
> + if (va == NULL) {
> + panic("can't allocate page\n");
> + }
> + pmap_extract(pmap_kernel(), (vaddr_t)va, paddr);
> + return (va);
> +}
> +
> +
> +/* Intel: Issue command via queued invalidation */
> +void
> +iommu_issue_qi(struct iommu_softc *iommu, struct qi_entry *qi)
> +{
> +#if 0
> + struct qi_entry *pi, *pw;
> +
> + idx = iommu->qi_head;
> + pi = &iommu->qi[idx];
> + pw = &iommu->qi[(idx+1) % MAXQ];
> + iommu->qi_head = (idx+2) % MAXQ;
> +
> + memcpy(pw, &qi, sizeof(qi));
> + issue command;
> + while (pw->xxx)
> + ;
> +#endif
> +}
> +
> +/* Intel: Flush TLB entries, Queued Invalidation mode */
> +void
> +iommu_flush_tlb_qi(struct iommu_softc *iommu, int mode, int did)
> +{
> + struct qi_entry qi;
> +
> + /* Use queued invalidation */
> + qi.hi = 0;
> + switch (mode) {
> + case IOTLB_GLOBAL:
> + qi.lo = QI_IOTLB | QI_IOTLB_IG_GLOBAL;
> + break;
> + case IOTLB_DOMAIN:
> + qi.lo = QI_IOTLB | QI_IOTLB_IG_DOMAIN |
> +    QI_IOTLB_DID(did);
> + break;
> + case IOTLB_PAGE:
> + qi.lo = QI_IOTLB | QI_IOTLB_IG_PAGE | QI_IOTLB_DID(did);
> + qi.hi = 0;
> + break;
> + }
> + if (iommu->cap & CAP_DRD)
> + qi.lo |= QI_IOTLB_DR;
> + if (iommu->cap & CAP_DWD)
> + qi.lo |= QI_IOTLB_DW;
> + iommu_issue_qi(iommu, &qi);
> +}
> +
> +/* Intel: Flush Context entries, Queued Invalidation mode */
> +void
> +iommu_flush_ctx_qi(struct iommu_softc *iommu, int mode, int did,
> +    int sid, int fm)
> +{
> + struct qi_entry qi;
> +
> + /* Use queued invalidation */
> + qi.hi = 0;
> + switch (mode) {
> + case CTX_GLOBAL:
> + qi.lo = QI_CTX | QI_CTX_IG_GLOBAL;
> + break;
> + case CTX_DOMAIN:
> + qi.lo = QI_CTX | QI_CTX_IG_DOMAIN | QI_CTX_DID(did);
> + break;
> + case CTX_DEVICE:
> + qi.lo = QI_CTX | QI_CTX_IG_DEVICE | QI_CTX_DID(did) |
> +    QI_CTX_SID(sid) | QI_CTX_FM(fm);
> + break;
> + }
> + iommu_issue_qi(iommu, &qi);
> +}
> +
> +/* Intel: Flush write buffers */
> +void
> +iommu_flush_write_buffer(struct iommu_softc *iommu)
> +{
> + int i, sts;
> +
> + if (iommu->dte)
> + return;
> + if (!(iommu->cap & CAP_RWBF))
> + return;
> + printf("writebuf\n");
> + iommu_write_4(iommu, DMAR_GCMD_REG, iommu->gcmd | GCMD_WBF);
> + for (i = 0; i < 5; i++) {
> + sts = iommu_read_4(iommu, DMAR_GSTS_REG);
> + if (sts & GSTS_WBFS)
> + break;
> + delay(10000);
> + }
> + if (i == 5) {
> + printf("write buffer flush fails\n");
> + }
> +}
> +
> +void
> +iommu_flush_cache(struct iommu_softc *iommu, void *addr, size_t size)
> +{
> + if (iommu->dte) {
> + pmap_flush_cache((vaddr_t)addr, size);
> + return;
> + }
> + if (!(iommu->ecap & ECAP_C))
> + pmap_flush_cache((vaddr_t)addr, size);
> +}
> +
> +/*
> + * Intel: Flush IOMMU TLB Entries
> + * Flushing can occur globally, per domain or per page
> + */
> +void
> +iommu_flush_tlb(struct iommu_softc *iommu, int mode, int did)
> +{
> + int n;
> + uint64_t val;
> +
> + /* Call AMD */
> + if (iommu->dte) {
> + ivhd_invalidate_domain(iommu, did);
> + //ivhd_poll_events(iommu);
> + return;
> + }
> + val = IOTLB_IVT;
> + switch (mode) {
> + case IOTLB_GLOBAL:
> + val |= IIG_GLOBAL;
> + break;
> + case IOTLB_DOMAIN:
> + val |= IIG_DOMAIN | IOTLB_DID(did);
> + break;
> + case IOTLB_PAGE:
> + val |= IIG_PAGE | IOTLB_DID(did);
> + break;
> + }
> +
> + /* Check for Read/Write Drain */
> + if (iommu->cap & CAP_DRD)
> + val |= IOTLB_DR;
> + if (iommu->cap & CAP_DWD)
> + val |= IOTLB_DW;
> +
> + mtx_enter(&iommu->reg_lock);
> +
> + iommu_write_8(iommu, DMAR_IOTLB_REG(iommu), val);
> + n = 0;
> + do {
> + val = iommu_read_8(iommu, DMAR_IOTLB_REG(iommu));
> + } while (n++ < 5 && val & IOTLB_IVT);
> +
> + mtx_leave(&iommu->reg_lock);
> +
> +#ifdef DEBUG
> + {
> + static int rg;
> + int a, r;
> +
> + if (!rg) {
> + a = (val >> IOTLB_IAIG_SHIFT) & IOTLB_IAIG_MASK;
> + r = (val >> IOTLB_IIRG_SHIFT) & IOTLB_IIRG_MASK;
> + if (a != r) {
> + printf("TLB Requested:%d Actual:%d\n", r, a);
> + rg = 1;
> + }
> + }
> + }
> +#endif
> +}
> +
> +/* Intel: Flush IOMMU settings
> + * Flushes can occur globally, per domain, or per device
> + */
> +void
> +iommu_flush_ctx(struct iommu_softc *iommu, int mode, int did, int sid, int fm)
> +{
> + uint64_t val;
> + int n;
> +
> + if (iommu->dte)
> + return;
> + val = CCMD_ICC;
> + switch (mode) {
> + case CTX_GLOBAL:
> + val |= CIG_GLOBAL;
> + break;
> + case CTX_DOMAIN:
> + val |= CIG_DOMAIN | CCMD_DID(did);
> + break;
> + case CTX_DEVICE:
> + val |= CIG_DEVICE | CCMD_DID(did) |
> +    CCMD_SID(sid) | CCMD_FM(fm);
> + break;
> + }
> +
> + mtx_enter(&iommu->reg_lock);
> +
> + n = 0;
> + iommu_write_8(iommu, DMAR_CCMD_REG, val);
> + do {
> + val = iommu_read_8(iommu, DMAR_CCMD_REG);
> + } while (n++ < 5 && val & CCMD_ICC);
> +
> + mtx_leave(&iommu->reg_lock);
> +
> +#ifdef DEBUG
> + {
> + static int rg;
> + int a, r;
> +
> + if (!rg) {
> + a = (val >> CCMD_CAIG_SHIFT) & CCMD_CAIG_MASK;
> + r = (val >> CCMD_CIRG_SHIFT) & CCMD_CIRG_MASK;
> + if (a != r) {
> + printf("CTX Requested:%d Actual:%d\n", r, a);
> + rg = 1;
> + }
> + }
> + }
> +#endif
> +}
> +
> +/* Intel: Enable Queued Invalidation */
> +void
> +iommu_enable_qi(struct iommu_softc *iommu, int enable)
> +{
> + int n = 0;
> + int sts;
> +
> + if (!(iommu->ecap & ECAP_QI))
> + return;
> +
> + if (enable) {
> + iommu->gcmd |= GCMD_QIE;
> +
> + mtx_enter(&iommu->reg_lock);
> +
> + iommu_write_4(iommu, DMAR_GCMD_REG, iommu->gcmd);
> + do {
> + sts = iommu_read_4(iommu, DMAR_GSTS_REG);
> + } while (n++ < 5 && !(sts & GSTS_QIES));
> +
> + mtx_leave(&iommu->reg_lock);
> +
> + printf("set.qie: %d\n", n);
> + } else {
> + iommu->gcmd &= ~GCMD_QIE;
> +
> + mtx_enter(&iommu->reg_lock);
> +
> + iommu_write_4(iommu, DMAR_GCMD_REG, iommu->gcmd);
> + do {
> + sts = iommu_read_4(iommu, DMAR_GSTS_REG);
> + } while (n++ < 5 && sts & GSTS_QIES);
> +
> + mtx_leave(&iommu->reg_lock);
> +
> + printf("clr.qie: %d\n", n);
> + }
> +}
> +
> +/* Intel: Enable IOMMU translation */
> +int
> +iommu_enable_translation(struct iommu_softc *iommu, int enable)
> +{
> + uint32_t sts;
> + uint64_t reg;
> + int n = 0;
> +
> + if (iommu->dte)
> + return (0);
> + reg = 0;
> + if (enable) {
> + printf("enable iommu %d\n", iommu->id);
> + iommu_showcfg(iommu, -1);
> +
> + iommu->gcmd |= GCMD_TE;
> +
> + /* Enable translation */
> + printf(" pre tes: ");
> +
> + mtx_enter(&iommu->reg_lock);
> + iommu_write_4(iommu, DMAR_GCMD_REG, iommu->gcmd);
> + printf("xxx");
> + do {
> + printf("yyy");
> + sts = iommu_read_4(iommu, DMAR_GSTS_REG);
> + delay(n * 10000);
> + } while (n++ < 5 && !(sts & GSTS_TES));
> + mtx_leave(&iommu->reg_lock);
> +
> + printf(" set.tes: %d\n", n);
> +
> + if (n >= 5) {
> + printf("error.. unable to initialize iommu %d\n",
> +    iommu->id);
> + iommu->flags |= IOMMU_FLAGS_BAD;
> +
> + /* Disable IOMMU */
> + iommu->gcmd &= ~GCMD_TE;
> + mtx_enter(&iommu->reg_lock);
> + iommu_write_4(iommu, DMAR_GCMD_REG, iommu->gcmd);
> + mtx_leave(&iommu->reg_lock);
> +
> + return (1);
> + }
> +
> + iommu_flush_ctx(iommu, CTX_GLOBAL, 0, 0, 0);
> + iommu_flush_tlb(iommu, IOTLB_GLOBAL, 0);
> + } else {
> + iommu->gcmd &= ~GCMD_TE;
> +
> + mtx_enter(&iommu->reg_lock);
> +
> + iommu_write_4(iommu, DMAR_GCMD_REG, iommu->gcmd);
> + do {
> + sts = iommu_read_4(iommu, DMAR_GSTS_REG);
> + } while (n++ < 5 && sts & GSTS_TES);
> + mtx_leave(&iommu->reg_lock);
> +
> + printf(" clr.tes: %d\n", n);
> + }
> +
> + return (0);
> +}
> +
> +/* Intel: Initialize IOMMU */
> +int
> +iommu_init(struct acpidmar_softc *sc, struct iommu_softc *iommu,
> +    struct acpidmar_drhd *dh)
> +{
> + static int niommu;
> + int len = VTD_PAGE_SIZE;
> + int i, gaw;
> + uint32_t sts;
> + paddr_t paddr;
> +
> + if (_bus_space_map(sc->sc_memt, dh->address, len, 0, &iommu->ioh) != 0) {
> + return (-1);
> + }
> +
> + TAILQ_INIT(&iommu->domains);
> + iommu->id = ++niommu;
> + iommu->flags = dh->flags;
> + iommu->segment = dh->segment;
> + iommu->iot = sc->sc_memt;
> +
> + iommu->cap = iommu_read_8(iommu, DMAR_CAP_REG);
> + iommu->ecap = iommu_read_8(iommu, DMAR_ECAP_REG);
> + iommu->ndoms = cap_nd(iommu->cap);
> +
> + printf("  caps: %s%s%s%s%s%s%s%s%s%s%s\n",
> +    iommu->cap & CAP_AFL ? "afl " : "", // adv fault
> +    iommu->cap & CAP_RWBF ? "rwbf " : "", // write-buffer flush
> +    iommu->cap & CAP_PLMR ? "plmr " : "", // protected lo region
> +    iommu->cap & CAP_PHMR ? "phmr " : "", // protected hi region
> +    iommu->cap & CAP_CM ? "cm " : "", // caching mode
> +    iommu->cap & CAP_ZLR ? "zlr " : "", // zero-length read
> +    iommu->cap & CAP_PSI ? "psi " : "", // page invalidate
> +    iommu->cap & CAP_DWD ? "dwd " : "", // write drain
> +    iommu->cap & CAP_DRD ? "drd " : "", // read drain
> +    iommu->cap & CAP_FL1GP ? "Gb " : "", // 1Gb pages
> +    iommu->cap & CAP_PI ? "pi " : ""); // posted interrupts
> + printf("  ecap: %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
> +    iommu->ecap & ECAP_C ? "c " : "", // coherent
> +    iommu->ecap & ECAP_QI ? "qi " : "", // queued invalidate
> +    iommu->ecap & ECAP_DT ? "dt " : "", // device iotlb
> +    iommu->ecap & ECAP_IR ? "ir " : "", // intr remap
> +    iommu->ecap & ECAP_EIM ? "eim " : "", // x2apic
> +    iommu->ecap & ECAP_PT ? "pt " : "", // passthrough
> +    iommu->ecap & ECAP_SC ? "sc " : "", // snoop control
> +    iommu->ecap & ECAP_ECS ? "ecs " : "", // extended context
> +    iommu->ecap & ECAP_MTS ? "mts " : "", // memory type
> +    iommu->ecap & ECAP_NEST ? "nest " : "", // nested translations
> +    iommu->ecap & ECAP_DIS ? "dis " : "", // deferred invalidation
> +    iommu->ecap & ECAP_PASID ? "pas " : "", // pasid
> +    iommu->ecap & ECAP_PRS ? "prs " : "", // page request
> +    iommu->ecap & ECAP_ERS ? "ers " : "", // execute request
> +    iommu->ecap & ECAP_SRS ? "srs " : "", // supervisor request
> +    iommu->ecap & ECAP_NWFS ? "nwfs " : "", // no write flag
> +    iommu->ecap & ECAP_EAFS ? "eafs " : ""); // extended accessed flag
> +
> + mtx_init(&iommu->reg_lock, IPL_HIGH);
> +
> + /* Clear Interrupt Masking */
> + iommu_write_4(iommu, DMAR_FSTS_REG, FSTS_PFO | FSTS_PPF);
> +
> + iommu->intr = acpidmar_intr_establish(iommu, IPL_HIGH,
> +    acpidmar_intr, iommu, "dmarintr");
> +
> + /* Enable interrupts */
> + sts = iommu_read_4(iommu, DMAR_FECTL_REG);
> + iommu_write_4(iommu, DMAR_FECTL_REG, sts & ~FECTL_IM);
> +
> + /* Allocate root pointer */
> + iommu->root = iommu_alloc_page(iommu, &paddr);
> +#ifdef DEBUG
> + printf("Allocated root pointer: pa:%.16llx va:%p\n",
> +    (uint64_t)paddr, iommu->root);
> +#endif
> + iommu->rtaddr = paddr;
> + iommu_flush_write_buffer(iommu);
> + iommu_set_rtaddr(iommu, paddr);
> +
> +#if 0
> + if (iommu->ecap & ECAP_QI) {
> + /* Queued Invalidation support */
> + iommu->qi = iommu_alloc_page(iommu, &iommu->qip);
> + iommu_write_8(iommu, DMAR_IQT_REG, 0);
> + iommu_write_8(iommu, DMAR_IQA_REG, iommu->qip | IQA_QS_256);
> + }
> + if (iommu->ecap & ECAP_IR) {
> + /* Interrupt remapping support */
> + iommu_write_8(iommu, DMAR_IRTA_REG, 0);
> + }
> +#endif
> +
> + /* Calculate guest address width and supported guest widths */
> + gaw = -1;
> + iommu->mgaw = cap_mgaw(iommu->cap);
> + printf("gaw: %d { ", iommu->mgaw);
> + for (i = 0; i < 5; i++) {
> + if (cap_sagaw(iommu->cap) & (1L << i)) {
> + gaw = VTD_LEVELTOAW(i);
> + printf("%d ", gaw);
> + iommu->agaw = gaw;
> + }
> + }
> + printf("}\n");
> +
> + /* Cache current status register bits */
> + sts = iommu_read_4(iommu, DMAR_GSTS_REG);
> + if (sts & GSTS_TES)
> + iommu->gcmd |= GCMD_TE;
> + if (sts & GSTS_QIES)
> + iommu->gcmd |= GCMD_QIE;
> + if (sts & GSTS_IRES)
> + iommu->gcmd |= GCMD_IRE;
> + if (iommu->gcmd) {
> + printf("gcmd: %x preset\n", iommu->gcmd);
> + }
> + acpidmar_intr(iommu);
> + return (0);
> +}
> +
> +const char *dmar_rn(int reg);
> +
> +const char *
> +dmar_rn(int reg)
> +{
> + switch (reg) {
> + case EVT_HEAD_REG: return "evthead";
> + case EVT_TAIL_REG: return "evttail";
> + case CMD_HEAD_REG: return "cmdhead";
> + case CMD_TAIL_REG: return "cmdtail";
> + case CMD_BASE_REG: return "cmdbase";
> + case EVT_BASE_REG: return "evtbase";
> + case DEV_TAB_BASE_REG: return "devtblbase";
> + case IOMMUCTL_REG: return "iommuctl";
> +#if 0
> + case DMAR_VER_REG: return "ver";
> + case DMAR_CAP_REG: return "cap";
> + case DMAR_ECAP_REG: return "ecap";
> + case DMAR_GSTS_REG: return "gsts";
> + case DMAR_GCMD_REG: return "gcmd";
> + case DMAR_FSTS_REG: return "fsts";
> + case DMAR_FECTL_REG: return "fectl";
> + case DMAR_RTADDR_REG: return "rtaddr";
> + case DMAR_FEDATA_REG: return "fedata";
> + case DMAR_FEADDR_REG: return "feaddr";
> + case DMAR_FEUADDR_REG: return "feuaddr";
> + case DMAR_PMEN_REG: return "pmen";
> + case DMAR_IEDATA_REG: return "iedata";
> + case DMAR_IEADDR_REG: return "ieaddr";
> + case DMAR_IEUADDR_REG: return "ieuaddr";
> + case DMAR_IRTA_REG: return "irta";
> + case DMAR_CCMD_REG: return "ccmd";
> + case DMAR_IQH_REG: return "iqh";
> + case DMAR_IQT_REG: return "iqt";
> + case DMAR_IQA_REG: return "iqa";
> +#endif
> + }
> + return "unknown";
> +}
> +
> +/* Read/Write IOMMU register */
> +uint32_t
> +iommu_read_4(struct iommu_softc *iommu, int reg)
> +{
> + uint32_t v;
> +
> + v = bus_space_read_4(iommu->iot, iommu->ioh, reg);
> + if (reg < 00) {
> + printf("iommu%d: read %x %.8lx [%s]\n",
> +    iommu->id, reg, (unsigned long)v, dmar_rn(reg));
> + }
> +
> + return (v);
> +}
> +
> +
> +#define dbprintf(x...)
> +
> +void
> +iommu_write_4(struct iommu_softc *iommu, int reg, uint32_t v)
> +{
> + dbprintf("iommu%d: write %.8x %.16lx [%s]\n",
> +    iommu->id, reg, (unsigned long)v, dmar_rn(reg));
> + bus_space_write_4(iommu->iot, iommu->ioh, reg, (uint32_t)v);
> +}
> +
> +uint64_t
> +iommu_read_8(struct iommu_softc *iommu, int reg)
> +{
> + uint64_t v;
> +
> + v = bus_space_read_8(iommu->iot, iommu->ioh, reg);
> + if (reg < 00) {
> + printf("iommu%d: read %x %.8lx [%s]\n",
> +    iommu->id, reg, (unsigned long)v, dmar_rn(reg));
> + }
> +
> + return (v);
> +}
> +
> +void
> +iommu_write_8(struct iommu_softc *iommu, int reg, uint64_t v)
> +{
> + dbprintf("iommu%d: write %.8x %.16lx [%s]\n",
> +    iommu->id, reg, (unsigned long)v, dmar_rn(reg));
> + bus_space_write_8(iommu->iot, iommu->ioh, reg, v);
> +}
> +
> +/* Check if a device is within a device scope */
> +int
> +acpidmar_match_devscope(struct devlist_head *devlist, pci_chipset_tag_t pc,
> +    int sid)
> +{
> + struct dmar_devlist *ds;
> + int sub, sec, i;
> + int bus, dev, fun, sbus;
> + pcireg_t reg;
> + pcitag_t tag;
> +
> + sbus = sid_bus(sid);
> + TAILQ_FOREACH(ds, devlist, link) {
> + bus = ds->bus;
> + dev = ds->dp[0].device;
> + fun = ds->dp[0].function;
> + /* Walk PCI bridges in path */
> + for (i = 1; i < ds->ndp; i++) {
> + tag = pci_make_tag(pc, bus, dev, fun);
> + reg = pci_conf_read(pc, tag, PPB_REG_BUSINFO);
> + bus = PPB_BUSINFO_SECONDARY(reg);
> + dev = ds->dp[i].device;
> + fun = ds->dp[i].function;
> + }
> +
> + /* Check for device exact match */
> + if (sid == mksid(bus, dev, fun)) {
> + return DMAR_ENDPOINT;
> + }
> +
> + /* Check for device subtree match */
> + if (ds->type == DMAR_BRIDGE) {
> + tag = pci_make_tag(pc, bus, dev, fun);
> + reg = pci_conf_read(pc, tag, PPB_REG_BUSINFO);
> + sec = PPB_BUSINFO_SECONDARY(reg);
> + sub = PPB_BUSINFO_SUBORDINATE(reg);
> + if (sec <= sbus && sbus <= sub) {
> + return DMAR_BRIDGE;
> + }
> + }
> + }
> +
> + return (0);
> +}
> +
> +struct domain *
> +domain_create(struct iommu_softc *iommu, int did)
> +{
> + struct domain *dom;
> + int gaw;
> +
> + printf("iommu%d: create domain: %.4x\n", iommu->id, did);
> + dom = malloc(sizeof(*dom), M_DEVBUF, M_ZERO | M_WAITOK);
> + dom->did = did;
> + dom->iommu = iommu;
> + dom->pte = iommu_alloc_page(iommu, &dom->ptep);
> + TAILQ_INIT(&dom->devices);
> +
> + /* Setup DMA */
> + dom->dmat._cookie = dom;
> + dom->dmat._dmamap_create    = dmar_dmamap_create; // nop
> + dom->dmat._dmamap_destroy   = dmar_dmamap_destroy; // nop
> + dom->dmat._dmamap_load      = dmar_dmamap_load; // lm
> + dom->dmat._dmamap_load_mbuf = dmar_dmamap_load_mbuf; // lm
> + dom->dmat._dmamap_load_uio  = dmar_dmamap_load_uio; // lm
> + dom->dmat._dmamap_load_raw  = dmar_dmamap_load_raw; // lm
> + dom->dmat._dmamap_unload    = dmar_dmamap_unload; // um
> + dom->dmat._dmamap_sync      = dmar_dmamap_sync; // lm
> + dom->dmat._dmamem_alloc     = dmar_dmamem_alloc; // nop
> + dom->dmat._dmamem_free      = dmar_dmamem_free; // nop
> + dom->dmat._dmamem_map       = dmar_dmamem_map; // nop
> + dom->dmat._dmamem_unmap     = dmar_dmamem_unmap; // nop
> + dom->dmat._dmamem_mmap      = dmar_dmamem_mmap;
> +
> + snprintf(dom->exname, sizeof(dom->exname), "did:%x.%.4x",
> +    iommu->id, dom->did);
> +
> + /* Setup IOMMU address map */
> + gaw = min(iommu->agaw, iommu->mgaw);
> + dom->iovamap = extent_create(dom->exname, 1024*1024*16,
> +    (1LL << gaw)-1,
> +    M_DEVBUF, NULL, 0,
> +    EX_WAITOK|EX_NOCOALESCE);
> +
> + /* Zero out Interrupt region */
> + extent_alloc_region(dom->iovamap, 0xFEE00000L, 0x100000,
> +    EX_WAITOK);
> + mtx_init(&dom->exlck, IPL_HIGH);
> +
> + TAILQ_INSERT_TAIL(&iommu->domains, dom, link);
> +
> + return dom;
> +}
> +
> +void domain_add_device(struct domain *dom, int sid)
> +{
> + struct domain_dev *ddev;
> +
> + printf("add %s to iommu%d.%.4x\n", dmar_bdf(sid), dom->iommu->id, dom->did);
> + ddev = malloc(sizeof(*ddev), M_DEVBUF, M_ZERO | M_WAITOK);
> + ddev->sid = sid;
> + TAILQ_INSERT_TAIL(&dom->devices, ddev, link);
> +
> + /* Should set context entry here?? */
> +}
> +
> +void domain_remove_device(struct domain *dom, int sid)
> +{
> + struct domain_dev *ddev, *tmp;
> +
> + TAILQ_FOREACH_SAFE(ddev, &dom->devices, link, tmp) {
> + if (ddev->sid == sid) {
> + TAILQ_REMOVE(&dom->devices, ddev, link);
> + free(ddev, sizeof(*ddev), M_DEVBUF);
> + }
> + }
> +}
> +
> +/* Lookup domain by segment & source id (bus.device.function) */
> +struct domain *
> +domain_lookup(struct acpidmar_softc *sc, int segment, int sid)
> +{
> + struct iommu_softc *iommu;
> + struct domain_dev *ddev;
> + struct domain *dom;
> + int rc;
> +
> + if (sc == NULL) {
> + return NULL;
> + }
> +
> + /* Lookup IOMMU for this device */
> + TAILQ_FOREACH(iommu, &sc->sc_drhds, link) {
> + if (iommu->segment != segment)
> + continue;
> + /* Check for devscope match or catchall iommu */
> + rc = acpidmar_match_devscope(&iommu->devices, sc->sc_pc, sid);
> + if (rc != 0 || iommu->flags) {
> + break;
> + }
> + }
> + if (!iommu) {
> + printf("%s: no iommu found\n", dmar_bdf(sid));
> + return NULL;
> + }
> +
> + //acpidmar_intr(iommu);
> +
> + /* Search domain devices */
> + TAILQ_FOREACH(dom, &iommu->domains, link) {
> + TAILQ_FOREACH(ddev, &dom->devices, link) {
> + /* XXX: match all functions? */
> + if (ddev->sid == sid) {
> + return dom;
> + }
> + }
> + }
> + if (iommu->ndoms <= 2) {
> + /* Running out of domains.. create catchall domain */
> + if (!iommu->unity) {
> + iommu->unity = domain_create(iommu, 1);
> + }
> + dom = iommu->unity;
> + } else {
> + dom = domain_create(iommu, --iommu->ndoms);
> + }
> + if (!dom) {
> + printf("no domain here\n");
> + return NULL;
> + }
> +
> + /* Add device to domain */
> + domain_add_device(dom, sid);
> +
> + return dom;
> +}
> +
> +/* Map Guest Pages into IOMMU */
> +void  _iommu_map(void *dom, vaddr_t va, bus_addr_t gpa, bus_size_t len)
> +{
> + bus_size_t i;
> + paddr_t hpa;
> +
> + if (dom == NULL) {
> + return;
> + }
> + printf("Mapping dma: %lx = %lx/%lx\n", va, gpa, len);
> + for (i = 0; i < len; i += PAGE_SIZE) {
> + hpa = 0;
> + pmap_extract(curproc->p_vmspace->vm_map.pmap, va, &hpa);
> + domain_map_page(dom, gpa, hpa, PTE_P | PTE_R | PTE_W);
> + gpa += PAGE_SIZE;
> + va  += PAGE_SIZE;
> + }
> +}
> +
> +/* Find IOMMU for a given PCI device */
> +void *_iommu_domain(int segment, int bus, int dev, int func, int *id)
> +{
> + struct domain *dom;
> +
> + dom = domain_lookup(acpidmar_sc, segment, mksid(bus, dev, func));
> + if (dom) {
> + *id = dom->did;
> + }
> + return dom;
> +}
> +
> +void domain_map_device(struct domain *dom, int sid);
> +
> +void
> +domain_map_device(struct domain *dom, int sid)
> +{
> + struct iommu_softc *iommu;
> + struct context_entry *ctx;
> + paddr_t paddr;
> + int bus, devfn;
> + int tt, lvl;
> +
> + iommu = dom->iommu;
> +
> + bus = sid_bus(sid);
> + devfn = sid_devfn(sid);
> + /* AMD attach device */
> + if (iommu->dte) {
> + struct ivhd_dte *dte = &iommu->dte[sid];
> + if (!dte->dw0) {
> + /* Setup Device Table Entry: bus.devfn */
> + printf("@@@ PCI Attach: %.4x[%s] %.4x\n", sid, dmar_bdf(sid), dom->did);
> + dte_set_host_page_table_root_ptr(dte, dom->ptep);
> + dte_set_domain(dte, dom->did);
> + dte_set_mode(dte, 3);  // Set 4 level PTE
> + dte_set_tv(dte);
> + dte_set_valid(dte);
> + ivhd_flush_devtab(iommu, dom->did);
> + //ivhd_showit(iommu);
> + ivhd_showdte(iommu);
> + }
> + //ivhd_poll_events(iommu);
> + return;
> + }
> +
> + /* Create Bus mapping */
> + if (!root_entry_is_valid(&iommu->root[bus])) {
> + iommu->ctx[bus] = iommu_alloc_page(iommu, &paddr);
> + iommu->root[bus].lo = paddr | ROOT_P;
> + iommu_flush_cache(iommu, &iommu->root[bus],
> +    sizeof(struct root_entry));
> + dprintf("iommu%d: Allocate context for bus: %.2x pa:%.16llx va:%p\n",
> +    iommu->id, bus, (uint64_t)paddr,
> +    iommu->ctx[bus]);
> + }
> +
> + /* Create DevFn mapping */
> + ctx = iommu->ctx[bus] + devfn;
> + if (!context_entry_is_valid(ctx)) {
> + tt = CTX_T_MULTI;
> + lvl = VTD_AWTOLEVEL(iommu->agaw);
> +
> + /* Initialize context */
> + context_set_slpte(ctx, dom->ptep);
> + context_set_translation_type(ctx, tt);
> + context_set_domain_id(ctx, dom->did);
> + context_set_address_width(ctx, lvl);
> + context_set_present(ctx);
> +
> + /* Flush it */
> + iommu_flush_cache(iommu, ctx, sizeof(struct context_entry));
> + if ((iommu->cap & CAP_CM) || force_cm) {
> + iommu_flush_ctx(iommu, CTX_DEVICE, dom->did, sid, 0);
> + iommu_flush_tlb(iommu, IOTLB_GLOBAL, 0);
> + } else {
> + iommu_flush_write_buffer(iommu);
> + }
> + dprintf("iommu%d: %s set context ptep:%.16llx lvl:%d did:%.4x tt:%d\n",
> +    iommu->id, dmar_bdf(sid), (uint64_t)dom->ptep, lvl,
> +    dom->did, tt);
> + }
> +}
> +
> +struct domain *
> +acpidmar_pci_attach(struct acpidmar_softc *sc, int segment, int sid, int mapctx)
> +{
> + static struct domain *dom;
> +
> + dom = domain_lookup(sc, segment, sid);
> + if (!dom) {
> + printf("no domain: %s\n", dmar_bdf(sid));
> + return NULL;
> + }
> +
> + if (mapctx) {
> + domain_map_device(dom, sid);
> + }
> +
> + return dom;
> +}
> +
> +int ismap(int bus, int dev, int fun) {
> + return 1;
> +}
> +
> +void
> +acpidmar_pci_hook(pci_chipset_tag_t pc, struct pci_attach_args *pa)
> +{
> + int bus, dev, fun, sid;
> + struct domain *dom;
> + pcireg_t reg;
> +
> + if (!acpidmar_sc) {
> + /* No DMAR, ignore */
> + return;
> + }
> +
> + /* Add device to our list */
> + pci_decompose_tag(pc, pa->pa_tag, &bus, &dev, &fun);
> + sid = mksid(bus, dev, fun);
> + if (sid_flag[sid] & SID_INVALID)
> + return;
> +
> + reg = pci_conf_read(pc, pa->pa_tag, PCI_CLASS_REG);
> +#if 0
> + if (PCI_CLASS(reg) == PCI_CLASS_DISPLAY &&
> +    PCI_SUBCLASS(reg) == PCI_SUBCLASS_DISPLAY_VGA) {
> + printf("dmar: %.4x:%.2x:%.2x.%x is VGA, ignoring\n",
> +    pa->pa_domain, bus, dev, fun);
> + return;
> + }
> +#endif
> + /* Add device to domain */
> + dom = acpidmar_pci_attach(acpidmar_sc, pa->pa_domain, sid, 0);
> + if (dom == NULL)
> + return;
> +
> + if (PCI_CLASS(reg) == PCI_CLASS_DISPLAY &&
> +    PCI_SUBCLASS(reg) == PCI_SUBCLASS_DISPLAY_VGA) {
> + dom->flag = DOM_NOMAP;
> + }
> + if (PCI_CLASS(reg) == PCI_CLASS_BRIDGE &&
> +    PCI_SUBCLASS(reg) == PCI_SUBCLASS_BRIDGE_ISA) {
> + /* For ISA Bridges, map 0-16Mb as 1:1 */
> + printf("dmar: %.4x:%.2x:%.2x.%x mapping ISA\n",
> +    pa->pa_domain, bus, dev, fun);
> + domain_map_pthru(dom, 0x00, 16*1024*1024);
> + }
> +
> + /* Change DMA tag */
> + pa->pa_dmat = &dom->dmat;
> +}
> +
> +/* Create list of device scope entries from ACPI table */
> +void
> +acpidmar_parse_devscope(union acpidmar_entry *de, int off, int segment,
> +    struct devlist_head *devlist)
> +{
> + struct acpidmar_devscope *ds;
> + struct dmar_devlist *d;
> + int dplen, i;
> +
> + TAILQ_INIT(devlist);
> + while (off < de->length) {
> + ds = (struct acpidmar_devscope *)((unsigned char *)de + off);
> + off += ds->length;
> +
> + /* We only care about bridges and endpoints */
> + if (ds->type != DMAR_ENDPOINT && ds->type != DMAR_BRIDGE)
> + continue;
> +
> + dplen = ds->length - sizeof(*ds);
> + d = malloc(sizeof(*d) + dplen, M_DEVBUF, M_ZERO | M_WAITOK);
> + d->bus  = ds->bus;
> + d->type = ds->type;
> + d->ndp  = dplen / 2;
> + d->dp   = (void *)&d[1];
> + memcpy(d->dp, &ds[1], dplen);
> + TAILQ_INSERT_TAIL(devlist, d, link);
> +
> + printf("  %8s  %.4x:%.2x.%.2x.%x {",
> +    ds->type == DMAR_BRIDGE ? "bridge" : "endpoint",
> +    segment, ds->bus,
> +    d->dp[0].device,
> +    d->dp[0].function);
> +
> + for (i = 1; i < d->ndp; i++) {
> + printf(" %2x.%x ",
> +    d->dp[i].device,
> +    d->dp[i].function);
> + }
> + printf("}\n");
> + }
> +}
> +
> +/* DMA Remapping Hardware Unit */
> +void
> +acpidmar_drhd(struct acpidmar_softc *sc, union acpidmar_entry *de)
> +{
> + struct iommu_softc *iommu;
> +
> + printf("DRHD: segment:%.4x base:%.16llx flags:%.2x\n",
> +    de->drhd.segment,
> +    de->drhd.address,
> +    de->drhd.flags);
> + iommu = malloc(sizeof(*iommu), M_DEVBUF, M_ZERO | M_WAITOK);
> + acpidmar_parse_devscope(de, sizeof(de->drhd), de->drhd.segment,
> +    &iommu->devices);
> + iommu_init(sc, iommu, &de->drhd);
> +
> + if (de->drhd.flags) {
> + /* Catchall IOMMU goes at end of list */
> + TAILQ_INSERT_TAIL(&sc->sc_drhds, iommu, link);
> + } else {
> + TAILQ_INSERT_HEAD(&sc->sc_drhds, iommu, link);
> + }
> +}
> +
> +/* Reserved Memory Region Reporting */
> +void
> +acpidmar_rmrr(struct acpidmar_softc *sc, union acpidmar_entry *de)
> +{
> + struct rmrr_softc *rmrr;
> + bios_memmap_t *im, *jm;
> + uint64_t start, end;
> +
> + printf("RMRR: segment:%.4x range:%.16llx-%.16llx\n",
> +    de->rmrr.segment, de->rmrr.base, de->rmrr.limit);
> + if (de->rmrr.limit <= de->rmrr.base) {
> + printf("  buggy BIOS\n");
> + return;
> + }
> +
> + rmrr = malloc(sizeof(*rmrr), M_DEVBUF, M_ZERO | M_WAITOK);
> + rmrr->start = trunc_page(de->rmrr.base);
> + rmrr->end = round_page(de->rmrr.limit);
> + rmrr->segment = de->rmrr.segment;
> + acpidmar_parse_devscope(de, sizeof(de->rmrr), de->rmrr.segment,
> +    &rmrr->devices);
> +
> + for (im = bios_memmap; im->type != BIOS_MAP_END; im++) {
> + if (im->type != BIOS_MAP_RES)
> + continue;
> + /* Search for adjacent reserved regions */
> + start = im->addr;
> + end   = im->addr+im->size;
> + for (jm = im+1; jm->type == BIOS_MAP_RES && end == jm->addr;
> +    jm++) {
> + end = jm->addr+jm->size;
> + }
> + printf("e820: %.16llx - %.16llx\n", start, end);
> + if (start <= rmrr->start && rmrr->end <= end) {
> + /* Bah.. some buggy BIOS stomp outside RMRR */
> + printf("  ** inside E820 Reserved %.16llx %.16llx\n",
> +    start, end);
> + rmrr->start = trunc_page(start);
> + rmrr->end   = round_page(end);
> + break;
> + }
> + }
> + TAILQ_INSERT_TAIL(&sc->sc_rmrrs, rmrr, link);
> +}
> +
> +/* Root Port ATS Reporting */
> +void
> +acpidmar_atsr(struct acpidmar_softc *sc, union acpidmar_entry *de)
> +{
> + struct atsr_softc *atsr;
> +
> + printf("ATSR: segment:%.4x flags:%x\n",
> +    de->atsr.segment,
> +    de->atsr.flags);
> +
> + atsr = malloc(sizeof(*atsr), M_DEVBUF, M_ZERO | M_WAITOK);
> + atsr->flags = de->atsr.flags;
> + atsr->segment = de->atsr.segment;
> + acpidmar_parse_devscope(de, sizeof(de->atsr), de->atsr.segment,
> +    &atsr->devices);
> +
> + TAILQ_INSERT_TAIL(&sc->sc_atsrs, atsr, link);
> +}
> +
> +void
> +acpidmar_init(struct acpidmar_softc *sc, struct acpi_dmar *dmar)
> +{
> + struct rmrr_softc *rmrr;
> + struct iommu_softc *iommu;
> + struct domain *dom;
> + struct dmar_devlist *dl;
> + union acpidmar_entry *de;
> + int off, sid, rc;
> +
> + domain_map_page = domain_map_page_intel;
> + printf(": hardware width: %d, intr_remap:%d x2apic_opt_out:%d\n",
> +    dmar->haw+1,
> +    !!(dmar->flags & 0x1),
> +    !!(dmar->flags & 0x2));
> + sc->sc_haw = dmar->haw+1;
> + sc->sc_flags = dmar->flags;
> +
> + TAILQ_INIT(&sc->sc_drhds);
> + TAILQ_INIT(&sc->sc_rmrrs);
> + TAILQ_INIT(&sc->sc_atsrs);
> +
> + off = sizeof(*dmar);
> + while (off < dmar->hdr.length) {
> + de = (union acpidmar_entry *)((unsigned char *)dmar + off);
> + switch (de->type) {
> + case DMAR_DRHD:
> + acpidmar_drhd(sc, de);
> + break;
> + case DMAR_RMRR:
> + acpidmar_rmrr(sc, de);
> + break;
> + case DMAR_ATSR:
> + acpidmar_atsr(sc, de);
> + break;
> + default:
> + printf("DMAR: unknown %x\n", de->type);
> + break;
> + }
> + off += de->length;
> + }
> +
> + /* Pre-create domains for iommu devices */
> + TAILQ_FOREACH(iommu, &sc->sc_drhds, link) {
> + TAILQ_FOREACH(dl, &iommu->devices, link) {
> + sid = mksid(dl->bus, dl->dp[0].device,
> +    dl->dp[0].function);
> + dom = acpidmar_pci_attach(sc, iommu->segment, sid, 0);
> + if (dom != NULL) {
> + printf("%.4x:%.2x:%.2x.%x iommu:%d did:%.4x\n",
> +    iommu->segment, dl->bus, dl->dp[0].device, dl->dp[0].function,
> +    iommu->id, dom->did);
> + }
> + }
> + }
> + /* Map passthrough pages for RMRR */
> + TAILQ_FOREACH(rmrr, &sc->sc_rmrrs, link) {
> + TAILQ_FOREACH(dl, &rmrr->devices, link) {
> + sid = mksid(dl->bus, dl->dp[0].device,
> +    dl->dp[0].function);
> + dom = acpidmar_pci_attach(sc, rmrr->segment, sid, 0);
> + if (dom != NULL) {
> + printf("%s map ident: %.16llx %.16llx\n",
> +    dom_bdf(dom), rmrr->start, rmrr->end);
> + domain_map_pthru(dom, rmrr->start, rmrr->end);
> + rc = extent_alloc_region(dom->iovamap,
> +    rmrr->start, rmrr->end, EX_WAITOK);
> + }
> + }
> + }
> +}
> +
> +
> +/*=====================================================
> + * AMD Vi
> + *=====================================================*/
> +void acpiivrs_ivhd(struct acpidmar_softc *, struct acpi_ivhd *);
> +int ivhd_iommu_init(struct acpidmar_softc *, struct iommu_softc *,
> + struct acpi_ivhd *);
> +void iommu_ivhd_add(struct iommu_softc *, int, int, int);
> +int _ivhd_issue_command(struct iommu_softc *, const struct ivhd_command *);
> +void ivhd_show_event(struct iommu_softc *, struct ivhd_event *evt, int);
> +int ivhd_issue_command(struct iommu_softc *, const struct ivhd_command *, int);
> +int ivhd_invalidate_domain(struct iommu_softc *, int);
> +void ivhd_intr_map(struct iommu_softc *, int);
> +
> +int
> +acpiivhd_intr(void *ctx)
> +{
> + struct iommu_softc *iommu = ctx;
> +
> + if (!iommu->dte)
> + return (0);
> + ivhd_poll_events(iommu);
> + return (1);
> +}
> +
> +/* Setup interrupt for AMD */
> +void
> +ivhd_intr_map(struct iommu_softc *iommu, int devid) {
> + pci_intr_handle_t ih;
> +
> + if (iommu->intr)
> + return;
> + ih.tag = pci_make_tag(NULL, sid_bus(devid), sid_dev(devid), sid_fun(devid));
> + ih.line = APIC_INT_VIA_MSG;
> + ih.pin = 0;
> + iommu->intr = pci_intr_establish(NULL, ih, IPL_NET | IPL_MPSAFE,
> + acpiivhd_intr, iommu, "amd_iommu");
> + printf("amd iommu intr: %p\n", iommu->intr);
> +}
> +
> +void _dumppte(struct pte_entry *pte, int lvl, vaddr_t va)
> +{
> + char *pfx[] = { "    ", "   ", "  ", " ", "" };
> + uint64_t i, sh;
> + struct pte_entry *npte;
> +  
> + for (i = 0; i < 512; i++) {
> + sh = (i << (((lvl-1) * 9) + 12));
> + if (pte[i].val & PTE_P) {
> + if (lvl > 1) {
> + npte = (void *)PMAP_DIRECT_MAP((pte[i].val & PTE_PADDR_MASK));
> + printf("%slvl%d: %.16llx nxt:%llu\n", pfx[lvl], lvl,
> +    pte[i].val, (pte[i].val >> 9) & 7);
> + _dumppte(npte, lvl-1, va | sh);
> + }
> + else {
> + printf("%slvl%d: %.16llx <- %.16llx \n", pfx[lvl], lvl,
> +    pte[i].val, va | sh);
> + }
> + }
> + }
> +}
> +
> +void ivhd_showpage(struct iommu_softc *iommu, int sid, paddr_t paddr)
> +{
> + struct domain *dom;
> + static int show = 0;
> +
> + if (show > 10)
> + return;
> + show++;
> + dom = acpidmar_pci_attach(acpidmar_sc, 0, sid, 0);
> + if (!dom)
> + return;
> + printf("DTE: %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
> + iommu->dte[sid].dw0,
> + iommu->dte[sid].dw1,
> + iommu->dte[sid].dw2,
> + iommu->dte[sid].dw3,
> + iommu->dte[sid].dw4,
> + iommu->dte[sid].dw5,
> + iommu->dte[sid].dw6,
> + iommu->dte[sid].dw7);
> + _dumppte(dom->pte, 3, 0);
> +}
> +
> +/* Display AMD IOMMU Error */
> +void
> +ivhd_show_event(struct iommu_softc *iommu, struct ivhd_event *evt, int head)
> +{
> + int type, sid, did, flag;
> + uint64_t address;
> +
> + /* Get Device, Domain, Address and Type of event */
> + sid  = __EXTRACT(evt->dw0, EVT_SID);
> + type = __EXTRACT(evt->dw1, EVT_TYPE);
> + did  = __EXTRACT(evt->dw1, EVT_DID);
> + flag = __EXTRACT(evt->dw1, EVT_FLAG);
> + address = _get64(&evt->dw2);
> +
> + printf("=== IOMMU Error[%.4x]: ", head);
> + switch (type) {
> + case ILLEGAL_DEV_TABLE_ENTRY: // ok
> + printf("illegal dev table entry dev=%s addr=0x%.16llx %s, %s, %s, %s\n",
> +   dmar_bdf(sid), address,
> +   evt->dw1 & EVT_TR ? "translation" : "transaction",
> +   evt->dw1 & EVT_RZ ? "reserved bit" : "invalid level",
> +   evt->dw1 & EVT_RW ? "write" : "read",
> +   evt->dw1 & EVT_I  ? "interrupt" : "memory");
> + ivhd_showdte(iommu);
> + break;
> + case IO_PAGE_FAULT: // ok
> + printf("io page fault dev=%s did=0x%.4x addr=0x%.16llx\n%s, %s, %s, %s, %s, %s\n",
> +   dmar_bdf(sid), did, address,
> +   evt->dw1 & EVT_TR ? "translation" : "transaction",
> +   evt->dw1 & EVT_RZ ? "reserved bit" : "invalid level",
> +   evt->dw1 & EVT_PE ? "no perm" : "perm",
> +   evt->dw1 & EVT_RW ? "write" : "read",
> +   evt->dw1 & EVT_PR ? "present" : "not present",
> +   evt->dw1 & EVT_I  ? "interrupt" : "memory");
> + ivhd_showdte(iommu);
> + ivhd_showpage(iommu, sid, address);
> + break;
> + case DEV_TAB_HARDWARE_ERROR: // ok
> + printf("device table hardware error dev=%s addr=0x%.16llx %s, %s, %s\n",
> +    dmar_bdf(sid), address,
> +   evt->dw1 & EVT_TR ? "translation" : "transaction",
> +   evt->dw1 & EVT_RW ? "write" : "read",
> +   evt->dw1 & EVT_I  ? "interrupt" : "memory");
> + ivhd_showdte(iommu);
> + break;
> + case PAGE_TAB_HARDWARE_ERROR:
> + printf("page table hardware error dev=%s addr=0x%.16llx %s, %s, %s\n",
> +   dmar_bdf(sid), address,
> +   evt->dw1 & EVT_TR ? "translation" : "transaction",
> +   evt->dw1 & EVT_RW ? "write" : "read",
> +   evt->dw1 & EVT_I  ? "interrupt" : "memory");
> + ivhd_showdte(iommu);
> + break;
> + case ILLEGAL_COMMAND_ERROR: // ok
> + printf("illegal command addr=0x%.16llx\n", address);
> + ivhd_showcmd(iommu);
> + break;
> + case COMMAND_HARDWARE_ERROR:
> + printf("command hardware error addr=0x%.16llx flag=0x%.4x\n",
> +   address, flag);
> + ivhd_showcmd(iommu);
> + break;
> + case IOTLB_INV_TIMEOUT:
> + printf("iotlb invalidation timeout dev=%s address=0x%.16llx\n",
> +   dmar_bdf(sid), address);
> + break;
> + case INVALID_DEVICE_REQUEST:
> + printf("invalid device request dev=%s addr=0x%.16llx flag=0x%.4x\n",
> +   dmar_bdf(sid), address, flag);
> + break;
> + default:
> + printf("unknown type=0x%.2x\n", type);
> + break;
> + }
> + //ivhd_showdte(iommu);
> + /* Clear old event */
> + evt->dw0 = 0;
> + evt->dw1 = 0;
> + evt->dw2 = 0;
> + evt->dw3 = 0;
> +}
> +
> +/* AMD: Process IOMMU error from hardware */
> +int
> +ivhd_poll_events(struct iommu_softc *iommu)
> +{
> + uint32_t head, tail;
> + int sz;
> +
> + sz = sizeof(struct ivhd_event);
> + head = iommu_read_4(iommu, EVT_HEAD_REG);
> + tail = iommu_read_4(iommu, EVT_TAIL_REG);
> + if (head == tail) {
> + /* No pending events */
> + return (0);
> + }
> + while (head != tail) {
> + ivhd_show_event(iommu, iommu->evt_tbl + head, head);
> + head = (head + sz) % EVT_TBL_SIZE;
> + }
> + iommu_write_4(iommu, EVT_HEAD_REG, head);
> + return (0);
> +}
> +
> +/* AMD: Issue command to IOMMU queue */
> +int
> +_ivhd_issue_command(struct iommu_softc *iommu, const struct ivhd_command *cmd)
> +{
> + u_long rf;
> + uint32_t head, tail, next;
> + int sz;
> +
> + head = iommu_read_4(iommu, CMD_HEAD_REG);
> + sz = sizeof(*cmd);
> + rf = intr_disable();
> + tail = iommu_read_4(iommu, CMD_TAIL_REG);
> + next = (tail + sz) % CMD_TBL_SIZE;
> + if (next == head) {
> + printf("FULL\n");
> + /* Queue is full */
> + intr_restore(rf);
> + return -EBUSY;
> + }
> + memcpy(iommu->cmd_tbl + tail, cmd, sz);
> + iommu_write_4(iommu, CMD_TAIL_REG, next);
> + intr_restore(rf);
> + return (tail / sz);
> +}
> +
> +#define IVHD_MAXDELAY 8
> +
> +int
> +ivhd_issue_command(struct iommu_softc *iommu, const struct ivhd_command *cmd, int wait)
> +{
> + struct ivhd_command wq = { 0 };
> + volatile uint64_t wv __aligned(16) = 0LL;
> + paddr_t paddr;
> + int rc, i;
> + static int mi;
> +
> + rc = _ivhd_issue_command(iommu, cmd);
> + if (rc >= 0 && wait) {
> + /* Wait for previous commands to complete.
> + * Store address of completion variable to command */
> + pmap_extract(pmap_kernel(), (vaddr_t)&wv, &paddr);
> + wq.dw0 = (paddr & ~0xF) | 0x1;
> + wq.dw1 = (COMPLETION_WAIT << CMD_SHIFT) | ((paddr >> 32) & 0xFFFFF);
> + wq.dw2 = 0xDEADBEEF;
> + wq.dw3 = 0xFEEDC0DE;
> +
> + rc = _ivhd_issue_command(iommu, &wq);
> + /* wv will change to value in dw2/dw3 when command is complete */
> + for (i = 0; i < IVHD_MAXDELAY && !wv; i++) {
> + DELAY(10 << i);
> + }
> + if (mi < i && mi != IVHD_MAXDELAY) {
> + printf("maxdel: %d\n", i);
> + mi = i;
> + }
> + if (i == IVHD_MAXDELAY) {
> + printf("ivhd command timeout: %.8x %.8x %.8x %.8x wv:%llx idx:%x\n",
> + cmd->dw0, cmd->dw1, cmd->dw2, cmd->dw3, wv, rc);
> + }
> + }
> + return rc;
> +
> +}
> +
> +/* AMD: Flush changes to Device Table Entry for a specific domain */
> +int
> +ivhd_flush_devtab(struct iommu_softc *iommu, int did)
> +{
> + struct ivhd_command cmd = { .dw0 = did, .dw1 = INVALIDATE_DEVTAB_ENTRY << CMD_SHIFT };
> + return ivhd_issue_command(iommu, &cmd, 1);
> +}
> +
> +/* AMD: Invalidate all IOMMU device and page tables */
> +int
> +ivhd_invalidate_iommu_all(struct iommu_softc *iommu)
> +{
> + struct ivhd_command cmd = { .dw1 = INVALIDATE_IOMMU_ALL << CMD_SHIFT };
> + return ivhd_issue_command(iommu, &cmd, 0);
> +}
> +
> +/* AMD: Invalidate interrupt remapping */
> +int
> +ivhd_invalidate_interrupt_table(struct iommu_softc *iommu, int did)
> +{
> + struct ivhd_command cmd = { .dw0 = did, .dw1 = INVALIDATE_INTERRUPT_TABLE << CMD_SHIFT };
> + return ivhd_issue_command(iommu, &cmd, 0);
> +}
> +
> +/* AMD: Invalidate all page tables in a domain */
> +int
> +ivhd_invalidate_domain(struct iommu_softc *iommu, int did)
> +{
> + struct ivhd_command cmd = { .dw1 = did | (INVALIDATE_IOMMU_PAGES << CMD_SHIFT) };
> +
> + cmd.dw2 = 0xFFFFF000 | 0x3;
> + cmd.dw3 = 0x7FFFFFFF;
> + return ivhd_issue_command(iommu, &cmd, 1);
> +}
> +
> +/* AMD: Display Registers */
> +void
> +ivhd_showit(struct iommu_softc *iommu)
> +{
> + printf("---- dt:%.16llx cmd:%.16llx evt:%.16llx ctl:%.16llx sts:%.16llx\n",
> + iommu_read_8(iommu, DEV_TAB_BASE_REG),
> + iommu_read_8(iommu, CMD_BASE_REG),
> + iommu_read_8(iommu, EVT_BASE_REG),
> + iommu_read_8(iommu, IOMMUCTL_REG),
> + iommu_read_8(iommu, IOMMUSTS_REG));
> + printf("---- cmd queue:%.16llx %.16llx evt queue:%.16llx %.16llx\n",
> + iommu_read_8(iommu, CMD_HEAD_REG),
> + iommu_read_8(iommu, CMD_TAIL_REG),
> + iommu_read_8(iommu, EVT_HEAD_REG),
> + iommu_read_8(iommu, EVT_TAIL_REG));
> +}
> +
> +/* AMD: Generate Fake Errors to test event handler */
> +void ivhd_checkerr(struct iommu_softc *iommu);
> +void ivhd_checkerr(struct iommu_softc *iommu)
> +{
> + struct ivhd_command cmd = { -1, -1, -1, -1 };
> +
> + /* Generate ILLEGAL DEV TAB entry? */
> + iommu->dte[0x2303].dw0 = -1;      // invalid
> + iommu->dte[0x2303].dw2 = 0x1234;  // domain
> + iommu->dte[0x2303].dw7 = -1;      // reserved
> + ivhd_flush_devtab(iommu, 0x1234);
> + ivhd_poll_events(iommu);
> +
> + /* Generate ILLEGAL_COMMAND_ERROR : ok */
> + ivhd_issue_command(iommu, &cmd, 0);
> + ivhd_poll_events(iommu);
> +
> + /* Generate page hardware error */
> +}
> +
> +/* AMD: Show Device Table Entry */
> +void ivhd_showdte(struct iommu_softc *iommu)
> +{
> + int i;
> +
> + for (i = 0; i < 65536; i++) {
> + if (iommu->dte[i].dw0) {
> + printf("%.2x:%.2x.%x: %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
> + i >> 8, (i >> 3) & 0x1F, i & 0x7,
> + iommu->dte[i].dw0, iommu->dte[i].dw1,
> + iommu->dte[i].dw2, iommu->dte[i].dw3,
> + iommu->dte[i].dw4, iommu->dte[i].dw5,
> + iommu->dte[i].dw6, iommu->dte[i].dw7);
> + }
> + }
> +}
> +
> +/* AMD: Show command entries */
> +void ivhd_showcmd(struct iommu_softc *iommu)
> +{
> + struct ivhd_command *ihd;
> + paddr_t phd;
> + int i;
> +
> + ihd = iommu->cmd_tbl;
> + phd = iommu_read_8(iommu, CMD_BASE_REG) & CMD_BASE_MASK;
> + for (i = 0; i < 4096 / 128; i++) {
> + printf("%.2x: %.16llx %.8x %.8x %.8x %.8x\n", i,
> + (uint64_t)phd + i * sizeof(*ihd),
> + ihd[i].dw0,ihd[i].dw1,ihd[i].dw2,ihd[i].dw3);
> + }
> +}
> +
> +#define _c(x) (int)((iommu->ecap >> x ##_SHIFT) & x ## _MASK)
> +
> +/* AMD: Initialize IOMMU */
> +int
> +ivhd_iommu_init(struct acpidmar_softc *sc, struct iommu_softc *iommu,
> + struct acpi_ivhd *ivhd)
> +{
> + static int niommu;
> + paddr_t paddr;
> + uint64_t ov;
> +
> + if (sc == NULL || iommu == NULL || ivhd == NULL) {
> + printf("Bad pointer to iommu_init!\n");
> + return -1;
> + }
> + if (_bus_space_map(sc->sc_memt, ivhd->address, 0x80000, 0, &iommu->ioh) != 0) {
> + printf("Bus Space Map fails\n");
> + return -1;
> + }
> + TAILQ_INIT(&iommu->domains);
> + TAILQ_INIT(&iommu->devices);
> +
> + /* Setup address width and number of domains */
> + iommu->id = ++niommu;
> + iommu->iot = sc->sc_memt;
> + iommu->mgaw = 48;
> + iommu->agaw = 48;
> + iommu->flags = 1;
> + iommu->segment = 0;
> + iommu->ndoms = 256;
> +
> + iommu->ecap = iommu_read_8(iommu, EXTFEAT_REG);
> + printf("ecap = %.16llx\n", iommu->ecap);
> + printf("%s%s%s%s%s%s%s%s\n",
> + iommu->ecap & EFR_PREFSUP ? "pref " : "",
> + iommu->ecap & EFR_PPRSUP  ? "ppr " : "",
> + iommu->ecap & EFR_NXSUP   ? "nx " : "",
> + iommu->ecap & EFR_GTSUP   ? "gt " : "",
> + iommu->ecap & EFR_IASUP   ? "ia " : "",
> + iommu->ecap & EFR_GASUP   ? "ga " : "",
> + iommu->ecap & EFR_HESUP   ? "he " : "",
> + iommu->ecap & EFR_PCSUP   ? "pc " : "");
> + printf("hats:%x gats:%x glxsup:%x smif:%x smifrc:%x gam:%x\n",
> + _c(EFR_HATS), _c(EFR_GATS), _c(EFR_GLXSUP), _c(EFR_SMIFSUP),
> + _c(EFR_SMIFRC), _c(EFR_GAMSUP));
> +
> + /* Turn off iommu */
> + ov = iommu_read_8(iommu, IOMMUCTL_REG);
> + iommu_write_8(iommu, IOMMUCTL_REG, ov & ~(CTL_IOMMUEN | CTL_COHERENT |
> + CTL_HTTUNEN | CTL_RESPASSPW | CTL_PASSPW | CTL_ISOC));
> +
> + /* Enable intr */
> + sid_flag[ivhd->devid] |= SID_INVALID;
> + ivhd_intr_map(iommu, ivhd->devid);
> +
> + /* Setup command buffer with 4k buffer (128 entries) */
> + iommu->cmd_tbl = iommu_alloc_page(iommu, &paddr);
> + iommu_write_8(iommu, CMD_BASE_REG, (paddr & CMD_BASE_MASK) | CMD_TBL_LEN_4K);
> + iommu_write_4(iommu, CMD_HEAD_REG, 0x00);
> + iommu_write_4(iommu, CMD_TAIL_REG, 0x00);
> + iommu->cmd_tblp = paddr;
> +
> + /* Setup event log with 4k buffer (128 entries) */
> + iommu->evt_tbl = iommu_alloc_page(iommu, &paddr);
> + iommu_write_8(iommu, EVT_BASE_REG, (paddr & EVT_BASE_MASK) | EVT_TBL_LEN_4K);
> + iommu_write_4(iommu, EVT_HEAD_REG, 0x00);
> + iommu_write_4(iommu, EVT_TAIL_REG, 0x00);
> + iommu->evt_tblp = paddr;
> +
> + /* Setup device table
> + * 1 entry per source ID (bus:device:function - 64k entries)
> + */
> + iommu->dte = sc->sc_hwdte;
> + //pmap_extract(pmap_kernel(), (vaddr_t)iommu->dte, &paddr);
> + iommu_write_8(iommu, DEV_TAB_BASE_REG, (sc->sc_hwdtep & DEV_TAB_MASK) | DEV_TAB_LEN);
> +
> + /* Enable IOMMU */
> + ov |= (CTL_IOMMUEN | CTL_EVENTLOGEN | CTL_CMDBUFEN | CTL_EVENTINTEN | CTL_COMWAITINTEN);
> + if (ivhd->flags & IVHD_COHERENT)
> + ov |= CTL_COHERENT;
> + if (ivhd->flags & IVHD_HTTUNEN)
> + ov |= CTL_HTTUNEN;
> + if (ivhd->flags & IVHD_RESPASSPW)
> + ov |= CTL_RESPASSPW;
> + if (ivhd->flags & IVHD_PASSPW)
> + ov |= CTL_PASSPW;
> + if (ivhd->flags & IVHD_ISOC)
> + ov |= CTL_ISOC;
> + ov &= ~(CTL_INVTIMEOUT_MASK << CTL_INVTIMEOUT_SHIFT);
> + ov |=  (CTL_INVTIMEOUT_1MS  << CTL_INVTIMEOUT_SHIFT);
> + iommu_write_8(iommu, IOMMUCTL_REG, ov);
> +
> + ivhd_invalidate_iommu_all(iommu);
> + //ivhd_checkerr(iommu);
> +
> + TAILQ_INSERT_TAIL(&sc->sc_drhds, iommu, link);
> + return 0;
> +}
> +
> +void
> +iommu_ivhd_add(struct iommu_softc *iommu, int start, int end, int cfg)
> +{
> + struct ivhd_devlist *idev;
> +
> + idev = malloc(sizeof(*idev), M_DEVBUF, M_ZERO | M_WAITOK);
> + idev->start_id = start;
> + idev->end_id = end;
> + idev->cfg = cfg;
> +}
> +
> +void
> +acpiivrs_ivhd(struct acpidmar_softc *sc, struct acpi_ivhd *ivhd)
> +{
> + struct iommu_softc *iommu;
> + struct acpi_ivhd_ext *ext;
> + union acpi_ivhd_entry *ie;
> + int start, off, dte, all_dte = 0;
> +
> + if (ivhd->type == IVRS_IVHD_EXT) {
> + ext = (struct acpi_ivhd_ext *)ivhd;
> + printf("ivhd: %.2x %.2x %.4x %.4x:%s %.4x %.16llx %.4x %.8x %.16llx\n",
> +       ext->type, ext->flags, ext->length,
> +             ext->segment, dmar_bdf(ext->devid), ext->cap,
> +             ext->address, ext->info,
> +       ext->attrib, ext->efr);
> + if (ext->flags & IVHD_PPRSUP)
> + printf(" PPRSup");
> + if (ext->flags & IVHD_PREFSUP)
> + printf(" PreFSup");
> + if (ext->flags & IVHD_COHERENT)
> + printf(" Coherent");
> + if (ext->flags & IVHD_IOTLB)
> + printf(" Iotlb");
> + if (ext->flags & IVHD_ISOC)
> + printf(" ISoc");
> + if (ext->flags & IVHD_RESPASSPW)
> + printf(" ResPassPW");
> + if (ext->flags & IVHD_PASSPW)
> + printf(" PassPW");
> + if (ext->flags & IVHD_HTTUNEN)
> + printf( " HtTunEn");
> + if (ext->flags)
> + printf("\n");
> + off = sizeof(*ext);
> + iommu = malloc(sizeof(*iommu), M_DEVBUF, M_ZERO|M_WAITOK);
> + ivhd_iommu_init(sc, iommu, ivhd);
> + } else {
> + printf("ivhd: %.2x %.2x %.4x %.4x:%s %.4x %.16llx %.4x %.8x\n",
> +       ivhd->type, ivhd->flags, ivhd->length,
> +             ivhd->segment, dmar_bdf(ivhd->devid), ivhd->cap,
> +             ivhd->address, ivhd->info,
> +       ivhd->feature);
> + if (ivhd->flags & IVHD_PPRSUP)
> + printf(" PPRSup");
> + if (ivhd->flags & IVHD_PREFSUP)
> + printf(" PreFSup");
> + if (ivhd->flags & IVHD_COHERENT)
> + printf(" Coherent");
> + if (ivhd->flags & IVHD_IOTLB)
> + printf(" Iotlb");
> + if (ivhd->flags & IVHD_ISOC)
> + printf(" ISoc");
> + if (ivhd->flags & IVHD_RESPASSPW)
> + printf(" ResPassPW");
> + if (ivhd->flags & IVHD_PASSPW)
> + printf(" PassPW");
> + if (ivhd->flags & IVHD_HTTUNEN)
> + printf( " HtTunEn");
> + if (ivhd->flags)
> + printf("\n");
> + off = sizeof(*ivhd);
> + }
> + while (off < ivhd->length) {
> + ie = (void *)ivhd + off;
> + switch (ie->type) {
> + case IVHD_ALL:
> + all_dte = ie->all.data;
> + printf(" ALL %.4x\n", dte);
> + off += sizeof(ie->all);
> + break;
> + case IVHD_SEL:
> + dte = ie->sel.data;
> + printf(" SELECT: %s %.4x\n", dmar_bdf(ie->sel.devid), dte);
> + off += sizeof(ie->sel);
> + break;
> + case IVHD_SOR:
> + dte = ie->sor.data;
> + start = ie->sor.devid;
> + printf(" SOR: %s %.4x\n", dmar_bdf(start), dte);
> + off += sizeof(ie->sor);
> + break;
> + case IVHD_EOR:
> + printf(" EOR: %s\n", dmar_bdf(ie->eor.devid));
> + off += sizeof(ie->eor);
> + break;
> + case IVHD_ALIAS_SEL:
> + dte = ie->alias.data;
> + printf(" ALIAS: src=%s: ", dmar_bdf(ie->alias.srcid));
> + printf(" %s %.4x\n", dmar_bdf(ie->alias.devid), dte);
> + off += sizeof(ie->alias);
> + break;
> + case IVHD_ALIAS_SOR:
> + dte = ie->alias.data;
> + printf(" ALIAS_SOR: %s %.4x ", dmar_bdf(ie->alias.devid), dte);
> + printf(" src=%s\n", dmar_bdf(ie->alias.srcid));
> + off += sizeof(ie->alias);
> + break;
> + case IVHD_EXT_SEL:
> + dte = ie->ext.data;
> + printf(" EXT SEL: %s %.4x %.8x\n", dmar_bdf(ie->ext.devid),
> + dte, ie->ext.extdata);
> + off += sizeof(ie->ext);
> + break;
> + case IVHD_EXT_SOR:
> + dte = ie->ext.data;
> + printf(" EXT SOR: %s %.4x %.8x\n", dmar_bdf(ie->ext.devid),
> +       dte, ie->ext.extdata);
> + off += sizeof(ie->ext);
> + break;
> + case IVHD_SPECIAL:
> + printf(" SPECIAL\n");
> + off += sizeof(ie->special);
> + break;
> + default:
> + printf(" 2:unknown %x\n", ie->type);
> + off = ivhd->length;
> + break;
> + }
> + }
> +}
> +
> +void
> +acpiivrs_init(struct acpidmar_softc *sc, struct acpi_ivrs *ivrs)
> +{
> + union acpi_ivrs_entry *ie;
> + int off;
> +
> + if (!sc->sc_hwdte) {
> + sc->sc_hwdte = iommu_alloc_contig(sc, HWDTE_SIZE, &sc->sc_hwdtep);
> + if (sc->sc_hwdte == NULL)
> + panic("Can't allocate HWDTE!\n");
> + }
> +
> + domain_map_page = domain_map_page_amd;
> + printf("IVRS Version: %d\n", ivrs->hdr.revision);
> + printf(" VA Size: %d\n", (ivrs->ivinfo >> IVRS_VASIZE_SHIFT) & IVRS_VASIZE_MASK);
> + printf(" PA Size: %d\n", (ivrs->ivinfo >> IVRS_PASIZE_SHIFT) & IVRS_PASIZE_MASK);
> +
> + TAILQ_INIT(&sc->sc_drhds);
> + TAILQ_INIT(&sc->sc_rmrrs);
> + TAILQ_INIT(&sc->sc_atsrs);
> +
> + printf("======== IVRS\n");
> + off = sizeof(*ivrs);
> + while (off < ivrs->hdr.length) {
> + ie = (void *)ivrs + off;
> + switch (ie->type) {
> + case IVRS_IVHD:
> + case IVRS_IVHD_EXT:
> + acpiivrs_ivhd(sc, &ie->ivhd);
> + break;
> + case IVRS_IVMD_ALL:
> + case IVRS_IVMD_SPECIFIED:
> + case IVRS_IVMD_RANGE:
> + printf("ivmd\n");
> + break;
> + default:
> + printf("1:unknown: %x\n", ie->type);
> + break;
> + }
> + off += ie->length;
> + }
> + printf("======== End IVRS\n");
> +}
> +
> +static int
> +acpiivhd_activate(struct iommu_softc *iommu, int act)
> +{
> + switch (act) {
> + case DVACT_SUSPEND:
> + iommu->flags |= IOMMU_FLAGS_SUSPEND;
> + break;
> + case DVACT_RESUME:
> + iommu->flags &= ~IOMMU_FLAGS_SUSPEND;
> + break;
> + }
> + return (0);
> +}
> +
> +int
> +acpidmar_activate(struct device *self, int act)
> +{
> + struct acpidmar_softc *sc = (struct acpidmar_softc *)self;
> + struct iommu_softc *iommu;
> +
> + printf("called acpidmar_activate %d %p\n", act, sc);
> +
> + if (sc == NULL) {
> + return (0);
> + }
> +
> + switch (act) {
> + case DVACT_RESUME:
> + TAILQ_FOREACH(iommu, &sc->sc_drhds, link) {
> + printf("iommu%d resume\n", iommu->id);
> + if (iommu->dte) {
> + acpiivhd_activate(iommu, act);
> + continue;
> + }
> + iommu_flush_write_buffer(iommu);
> + iommu_set_rtaddr(iommu, iommu->rtaddr);
> + iommu_write_4(iommu, DMAR_FEDATA_REG, iommu->fedata);
> + iommu_write_4(iommu, DMAR_FEADDR_REG, iommu->feaddr);
> + iommu_write_4(iommu, DMAR_FEUADDR_REG,
> +    iommu->feaddr >> 32);
> + if ((iommu->flags & (IOMMU_FLAGS_BAD|IOMMU_FLAGS_SUSPEND)) ==
> +    IOMMU_FLAGS_SUSPEND) {
> + printf("enable wakeup translation\n");
> + iommu_enable_translation(iommu, 1);
> + }
> + iommu_showcfg(iommu, -1);
> + }
> + break;
> + case DVACT_SUSPEND:
> + TAILQ_FOREACH(iommu, &sc->sc_drhds, link) {
> + printf("iommu%d suspend\n", iommu->id);
> + if (iommu->flags & IOMMU_FLAGS_BAD)
> + continue;
> + if (iommu->dte) {
> + acpiivhd_activate(iommu, act);
> + continue;
> + }
> + iommu->flags |= IOMMU_FLAGS_SUSPEND;
> + iommu_enable_translation(iommu, 0);
> + iommu_showcfg(iommu, -1);
> + }
> + break;
> + }
> + return (0);
> +}
> +
> +void
> +acpidmar_sw(int act)
> +{
> + acpidmar_activate((struct device *)acpidmar_sc, act);
> +}
> +
> +int
> +acpidmar_match(struct device *parent, void *match, void *aux)
> +{
> + struct acpi_attach_args *aaa = aux;
> + struct acpi_table_header *hdr;
> +
> + /* If we do not have a table, it is not us */
> + if (aaa->aaa_table == NULL)
> + return (0);
> +
> + /* If it is an DMAR table, we can attach */
> + hdr = (struct acpi_table_header *)aaa->aaa_table;
> + if (memcmp(hdr->signature, DMAR_SIG, sizeof(DMAR_SIG) - 1) == 0)
> + return (1);
> + if (memcmp(hdr->signature, IVRS_SIG, sizeof(IVRS_SIG) - 1) == 0)
> + return (1);
> +
> + return (0);
> +}
> +
> +void
> +acpidmar_attach(struct device *parent, struct device *self, void *aux)
> +{
> + struct acpidmar_softc *sc = (void *)self;
> + struct acpi_attach_args *aaa = aux;
> + struct acpi_dmar *dmar = (struct acpi_dmar *)aaa->aaa_table;
> + struct acpi_ivrs        *ivrs = (struct acpi_ivrs *)aaa->aaa_table;
> + struct acpi_table_header *hdr;
> +
> + hdr = (struct acpi_table_header *)aaa->aaa_table;
> + sc->sc_memt = aaa->aaa_memt;
> + if (memcmp(hdr->signature, DMAR_SIG, sizeof(DMAR_SIG) - 1) == 0) {
> + acpidmar_sc = sc;
> + acpidmar_init(sc, dmar);
> + }
> + if (memcmp(hdr->signature, IVRS_SIG, sizeof(IVRS_SIG) - 1) == 0) {
> + acpidmar_sc = sc;
> + acpiivrs_init(sc, ivrs);
> + }
> +}
> +
> +/* Interrupt shiz */
> +void acpidmar_msi_hwmask(struct pic *, int);
> +void acpidmar_msi_hwunmask(struct pic *, int);
> +void acpidmar_msi_addroute(struct pic *, struct cpu_info *, int, int, int);
> +void acpidmar_msi_delroute(struct pic *, struct cpu_info *, int, int, int);
> +
> +void
> +acpidmar_msi_hwmask(struct pic *pic, int pin)
> +{
> + struct iommu_pic *ip = (void *)pic;
> + struct iommu_softc *iommu = ip->iommu;
> +
> + printf("msi_hwmask\n");
> +
> + mtx_enter(&iommu->reg_lock);
> +
> + iommu_write_4(iommu, DMAR_FECTL_REG, FECTL_IM);
> + iommu_read_4(iommu, DMAR_FECTL_REG);
> +
> + mtx_leave(&iommu->reg_lock);
> +}
> +
> +void
> +acpidmar_msi_hwunmask(struct pic *pic, int pin)
> +{
> + struct iommu_pic *ip = (void *)pic;
> + struct iommu_softc *iommu = ip->iommu;
> +
> + printf("msi_hwunmask\n");
> +
> + mtx_enter(&iommu->reg_lock);
> +
> + iommu_write_4(iommu, DMAR_FECTL_REG, 0);
> + iommu_read_4(iommu, DMAR_FECTL_REG);
> +
> + mtx_leave(&iommu->reg_lock);
> +}
> +
> +void
> +acpidmar_msi_addroute(struct pic *pic, struct cpu_info *ci, int pin, int vec,
> +    int type)
> +{
> + struct iommu_pic *ip = (void *)pic;
> + struct iommu_softc *iommu = ip->iommu;
> +
> + mtx_enter(&iommu->reg_lock);
> +
> + iommu->fedata = vec;
> + iommu->feaddr = 0xfee00000L | (ci->ci_apicid << 12);
> + iommu_write_4(iommu, DMAR_FEDATA_REG, vec);
> + iommu_write_4(iommu, DMAR_FEADDR_REG, iommu->feaddr);
> + iommu_write_4(iommu, DMAR_FEUADDR_REG, iommu->feaddr >> 32);
> +
> + mtx_leave(&iommu->reg_lock);
> +}
> +
> +void
> +acpidmar_msi_delroute(struct pic *pic, struct cpu_info *ci, int pin, int vec,
> +    int type)
> +{
> + printf("msi_delroute\n");
> +}
> +
> +void *
> +acpidmar_intr_establish(void *ctx, int level, int (*func)(void *),
> +    void *arg, const char *what)
> +{
> + struct iommu_softc *iommu = ctx;
> + struct pic *pic;
> +
> + pic = &iommu->pic.pic;
> + iommu->pic.iommu = iommu;
> +
> + strlcpy(pic->pic_dev.dv_xname, "dmarpic",
> + sizeof(pic->pic_dev.dv_xname));
> + pic->pic_type = PIC_MSI;
> + pic->pic_hwmask = acpidmar_msi_hwmask;
> + pic->pic_hwunmask = acpidmar_msi_hwunmask;
> + pic->pic_addroute = acpidmar_msi_addroute;
> + pic->pic_delroute = acpidmar_msi_delroute;
> + pic->pic_edge_stubs = ioapic_edge_stubs;
> +#ifdef MULTIPROCESSOR
> + mtx_init(&pic->pic_mutex, level);
> +#endif
> +
> + return intr_establish(-1, pic, 0, IST_PULSE, level, NULL, func, arg, what);
> +}
> +
> +/* Intel: Handle DMAR interrupt */
> +int
> +acpidmar_intr(void *ctx)
> +{
> + struct iommu_softc *iommu = ctx;
> + struct fault_entry fe;
> + static struct fault_entry ofe;
> + int fro, nfr, fri, i;
> + uint32_t sts;
> +
> + //splassert(IPL_HIGH);
> +
> + if (!(iommu->gcmd & GCMD_TE)) {
> + return (1);
> + }
> + mtx_enter(&iommu->reg_lock);
> + sts = iommu_read_4(iommu, DMAR_FECTL_REG);
> + sts = iommu_read_4(iommu, DMAR_FSTS_REG);
> +
> + if (!(sts & FSTS_PPF)) {
> + mtx_leave(&iommu->reg_lock);
> + return (1);
> + }
> +
> + nfr = cap_nfr(iommu->cap);
> + fro = cap_fro(iommu->cap);
> + fri = (sts >> FSTS_FRI_SHIFT) & FSTS_FRI_MASK;
> + for (i = 0; i < nfr; i++) {
> + fe.hi = iommu_read_8(iommu, fro + (fri*16) + 8);
> + if (!(fe.hi & FRCD_HI_F))
> + break;
> +
> + fe.lo = iommu_read_8(iommu, fro + (fri*16));
> + if (ofe.hi != fe.hi || ofe.lo != fe.lo) {
> + iommu_showfault(iommu, fri, &fe);
> + ofe.hi = fe.hi;
> + ofe.lo = fe.lo;
> + }
> + fri = (fri + 1) % nfr;
> + }
> +
> + iommu_write_4(iommu, DMAR_FSTS_REG, FSTS_PFO | FSTS_PPF);
> +
> + mtx_leave(&iommu->reg_lock);
> +
> + return (1);
> +}
> +
> +const char *vtd_faults[] = {
> + "Software",
> + "Root Entry Not Present", /* ok (rtaddr + 4096) */
> + "Context Entry Not Present", /* ok (no CTX_P) */
> + "Context Entry Invalid", /* ok (tt = 3) */
> + "Address Beyond MGAW",
> + "Write", /* ok */
> + "Read", /* ok */
> + "Paging Entry Invalid", /* ok */
> + "Root Table Invalid",
> + "Context Table Invalid",
> + "Root Entry Reserved",          /* ok (root.lo |= 0x4) */
> + "Context Entry Reserved",
> + "Paging Entry Reserved",
> + "Context Entry TT",
> + "Reserved",
> +};
> +
> +void iommu_showpte(uint64_t, int, uint64_t);
> +
> +/* Intel: Show IOMMU page table entry */
> +void
> +iommu_showpte(uint64_t ptep, int lvl, uint64_t base)
> +{
> + uint64_t nb, pb, i;
> + struct pte_entry *pte;
> +
> + pte = (void *)PMAP_DIRECT_MAP(ptep);
> + for (i = 0; i < 512; i++) {
> + if (!(pte[i].val & PTE_P))
> + continue;
> + nb = base + (i << lvl);
> + pb = pte[i].val & ~VTD_PAGE_MASK;
> + if(lvl == VTD_LEVEL0) {
> + printf("   %3llx %.16llx = %.16llx %c%c %s\n",
> +    i, nb, pb,
> +    pte[i].val == PTE_R ? 'r' : ' ',
> +    pte[i].val & PTE_W ? 'w' : ' ',
> +    (nb == pb) ? " ident" : "");
> + if (nb == pb)
> + return;
> + } else {
> + iommu_showpte(pb, lvl - VTD_STRIDE_SIZE, nb);
> + }
> + }
> +}
> +
> +/* Intel: Show IOMMU configuration */
> +void
> +iommu_showcfg(struct iommu_softc *iommu, int sid)
> +{
> + int i, j, sts, cmd;
> + struct context_entry *ctx;
> + pcitag_t tag;
> + pcireg_t clc;
> +
> + cmd = iommu_read_4(iommu, DMAR_GCMD_REG);
> + sts = iommu_read_4(iommu, DMAR_GSTS_REG);
> + printf("iommu%d: flags:%d root pa:%.16llx %s %s %s %.8x %.8x\n",
> +    iommu->id, iommu->flags, iommu_read_8(iommu, DMAR_RTADDR_REG),
> +    sts & GSTS_TES ? "enabled" : "disabled",
> +    sts & GSTS_QIES ? "qi" : "ccmd",
> +    sts & GSTS_IRES ? "ir" : "",
> +    cmd, sts);
> + for (i = 0; i < 256; i++) {
> + if (!root_entry_is_valid(&iommu->root[i])) {
> + continue;
> + }
> + for (j = 0; j < 256; j++) {
> + ctx = iommu->ctx[i] + j;
> + if (!context_entry_is_valid(ctx)) {
> + continue;
> + }
> + tag = pci_make_tag(NULL, i, (j >> 3), j & 0x7);
> + clc = pci_conf_read(NULL, tag, 0x08) >> 8;
> + printf("  %.2x:%.2x.%x lvl:%d did:%.4x tt:%d ptep:%.16llx flag:%x cc:%.6x\n",
> +    i, (j >> 3), j & 7,
> +    context_address_width(ctx),
> +    context_domain_id(ctx),
> +    context_translation_type(ctx),
> +    context_pte(ctx),
> +    context_user(ctx),
> +    clc);
> +#if 0
> + /* dump pagetables */
> + iommu_showpte(ctx->lo & ~VTD_PAGE_MASK, iommu->agaw -
> +    VTD_STRIDE_SIZE, 0);
> +#endif
> + }
> + }
> +}
> +
> +/* Intel: Show IOMMU fault */
> +void
> +iommu_showfault(struct iommu_softc *iommu, int fri, struct fault_entry *fe)
> +{
> + int bus, dev, fun, type, fr, df;
> + bios_memmap_t *im;
> + const char *mapped;
> +
> + if (!(fe->hi & FRCD_HI_F))
> + return;
> + type = (fe->hi & FRCD_HI_T) ? 'r' : 'w';
> + fr = (fe->hi >> FRCD_HI_FR_SHIFT) & FRCD_HI_FR_MASK;
> + bus = (fe->hi >> FRCD_HI_BUS_SHIFT) & FRCD_HI_BUS_MASK;
> + dev = (fe->hi >> FRCD_HI_DEV_SHIFT) & FRCD_HI_DEV_MASK;
> + fun = (fe->hi >> FRCD_HI_FUN_SHIFT) & FRCD_HI_FUN_MASK;
> + df  = (fe->hi >> FRCD_HI_FUN_SHIFT) & 0xFF;
> + iommu_showcfg(iommu, mksid(bus,dev,fun));
> + if (!iommu->ctx[bus]) {
> + /* Bus is not initialized */
> + mapped = "nobus";
> + } else if (!context_entry_is_valid(&iommu->ctx[bus][df])) {
> + /* DevFn not initialized */
> + mapped = "nodevfn";
> + } else if (context_user(&iommu->ctx[bus][df]) != 0xA) {
> + /* no bus_space_map */
> + mapped = "nomap";
> + } else {
> + /* bus_space_map */
> + mapped = "mapped";
> + }
> + printf("fri%d: dmar: %.2x:%.2x.%x %s error at %llx fr:%d [%s] iommu:%d [%s]\n",
> +    fri, bus, dev, fun,
> +    type == 'r' ? "read" : "write",
> +    fe->lo,
> +    fr, fr <= 13 ? vtd_faults[fr] : "unknown",
> +    iommu->id,
> +    mapped);
> + for (im = bios_memmap; im->type != BIOS_MAP_END; im++) {
> + if ((im->type == BIOS_MAP_RES) &&
> +    (im->addr <= fe->lo) &&
> +    (fe->lo <= im->addr+im->size)) {
> + printf("mem in e820.reserved\n");
> + }
> + }
> +#ifdef DDB
> + if (acpidmar_ddb)
> + db_enter();
> +#endif
> +}
> +
> diff --git a/sys/dev/acpi/acpidmar.h b/sys/dev/acpi/acpidmar.h
> new file mode 100644
> index 000000000..33659ecaf
> --- /dev/null
> +++ b/sys/dev/acpi/acpidmar.h
> @@ -0,0 +1,534 @@
> +/*
> + * Copyright (c) 2015 Jordan Hargrave <[hidden email]>
> + *
> + * Permission to use, copy, modify, and distribute this software for any
> + * purpose with or without fee is hereby granted, provided that the above
> + * copyright notice and this permission notice appear in all copies.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
> + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
> + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
> + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
> + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
> + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
> + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
> + */
> +
> +#ifndef _DEV_ACPI_DMARREG_H_
> +#define _DEV_ACPI_DMARREG_H_
> +
> +#define VTD_STRIDE_MASK 0x1FF
> +#define VTD_STRIDE_SIZE 9
> +#define VTD_PAGE_SIZE   4096
> +#define VTD_PAGE_MASK   0xFFF
> +#define VTD_PTE_MASK    0x0000FFFFFFFFF000LL
> +
> +#define VTD_LEVEL0 12
> +#define VTD_LEVEL1 21
> +#define VTD_LEVEL2 30 /* Minimum level supported */
> +#define VTD_LEVEL3 39 /* Also supported */
> +#define VTD_LEVEL4 48
> +#define VTD_LEVEL5 57
> +
> +#define _xbit(x,y) (((x)>> (y)) & 1)
> +#define _xfld(x,y) (uint32_t)(((x)>> y##_SHIFT) & y##_MASK)
> +
> +#define VTD_AWTOLEVEL(x)    (((x) - 30) / VTD_STRIDE_SIZE)
> +#define VTD_LEVELTOAW(x)    (((x) * VTD_STRIDE_SIZE) + 30)
> +
> +#define DMAR_VER_REG 0x00    /* 32:Arch version supported by this IOMMU */
> +#define DMAR_RTADDR_REG 0x20    /* 64:Root entry table */
> +#define DMAR_FEDATA_REG 0x3c    /* 32:Fault event interrupt data register */
> +#define DMAR_FEADDR_REG 0x40    /* 32:Fault event interrupt addr register */
> +#define DMAR_FEUADDR_REG 0x44    /* 32:Upper address register */
> +#define DMAR_AFLOG_REG 0x58    /* 64:Advanced Fault control */
> +#define DMAR_PMEN_REG 0x64    /* 32:Enable Protected Memory Region */
> +#define DMAR_PLMBASE_REG 0x68    /* 32:PMRR Low addr */
> +#define DMAR_PLMLIMIT_REG 0x6c    /* 32:PMRR low limit */
> +#define DMAR_PHMBASE_REG 0x70    /* 64:pmrr high base addr */
> +#define DMAR_PHMLIMIT_REG 0x78    /* 64:pmrr high limit */
> +#define DMAR_ICS_REG 0x9C    /* 32:Invalidation complete status register */
> +#define DMAR_IECTL_REG 0xa0    /* 32:Invalidation event control register */
> +#define DMAR_IEDATA_REG 0xa4    /* 32:Invalidation event data register */
> +#define DMAR_IEADDR_REG 0xa8    /* 32:Invalidation event address register */
> +#define DMAR_IEUADDR_REG 0xac    /* 32:Invalidation event upper address register */
> +#define DMAR_IRTA_REG 0xb8    /* 64:Interrupt remapping table addr register */
> +#define DMAR_CAP_REG 0x08    /* 64:Hardware supported capabilities */
> +#define   CAP_PI (1LL << 59)
> +#define   CAP_FL1GP (1LL << 56)
> +#define   CAP_DRD (1LL << 55)
> +#define   CAP_DWD (1LL << 54)
> +#define   CAP_MAMV_MASK 0x3F
> +#define   CAP_MAMV_SHIFT 48LL
> +#define   cap_mamv(x) _xfld(x,CAP_MAMV)
> +#define   CAP_NFR_MASK 0xFF
> +#define   CAP_NFR_SHIFT 40LL
> +#define   cap_nfr(x) (_xfld(x,CAP_NFR) + 1)
> +#define   CAP_PSI (1LL << 39)
> +#define   CAP_SLLPS_MASK 0xF
> +#define   CAP_SLLPS_SHIFT 34LL
> +#define   cap_sllps(x) _xfld(x,CAP_SLLPS)
> +#define   CAP_FRO_MASK 0x3FF
> +#define   CAP_FRO_SHIFT 24LL
> +#define   cap_fro(x) (_xfld(x,CAP_FRO) * 16)
> +#define   CAP_ZLR (1LL << 22)
> +#define   CAP_MGAW_MASK 0x3F
> +#define   CAP_MGAW_SHIFT 16LL
> +#define   cap_mgaw(x) (_xfld(x,CAP_MGAW) + 1)
> +#define   CAP_SAGAW_MASK 0x1F
> +#define   CAP_SAGAW_SHIFT 8LL
> +#define   cap_sagaw(x) _xfld(x,CAP_SAGAW)
> +#define   CAP_CM (1LL << 7)
> +#define   CAP_PHMR (1LL << 6)
> +#define   CAP_PLMR (1LL << 5)
> +#define   CAP_RWBF (1LL << 4)
> +#define   CAP_AFL (1LL << 3)
> +#define   CAP_ND_MASK 0x7
> +#define   CAP_ND_SHIFT 0x00
> +#define   cap_nd(x) (16 << (((x) & CAP_ND_MASK) << 1))
> +
> +#define DMAR_ECAP_REG 0x10 /* 64:Extended capabilities supported */
> +#define   ECAP_PSS_MASK 0x1F
> +#define   ECAP_PSS_SHIFT 35
> +#define   ECAP_EAFS (1LL << 34)
> +#define   ECAP_NWFS (1LL << 33)
> +#define   ECAP_SRS (1LL << 31)
> +#define   ECAP_ERS (1LL << 30)
> +#define   ECAP_PRS (1LL << 29)
> +#define   ECAP_PASID (1LL << 28)
> +#define   ECAP_DIS (1LL << 27)
> +#define   ECAP_NEST (1LL << 26)
> +#define   ECAP_MTS (1LL << 25)
> +#define   ECAP_ECS (1LL << 24)
> +#define   ECAP_MHMV_MASK 0xF
> +#define   ECAP_MHMV_SHIFT 0x20
> +#define   ecap_mhmv(x) _xfld(x,ECAP_MHMV)
> +#define   ECAP_IRO_MASK 0x3FF /* IOTLB Register */
> +#define   ECAP_IRO_SHIFT 0x8
> +#define   ecap_iro(x) (_xfld(x,ECAP_IRO) * 16)
> +#define   ECAP_SC (1LL << 7) /* Snoop Control */
> +#define   ECAP_PT (1LL << 6) /* HW Passthru */
> +#define   ECAP_EIM (1LL << 4)
> +#define   ECAP_IR (1LL << 3) /* Interrupt remap */
> +#define   ECAP_DT (1LL << 2) /* Device IOTLB */
> +#define   ECAP_QI (1LL << 1) /* Queued Invalidation */
> +#define   ECAP_C (1LL << 0) /* Coherent cache */
> +
> +#define DMAR_GCMD_REG 0x18 /* 32:Global command register */
> +#define   GCMD_TE (1LL << 31)
> +#define   GCMD_SRTP (1LL << 30)
> +#define   GCMD_SFL (1LL << 29)
> +#define   GCMD_EAFL (1LL << 28)
> +#define   GCMD_WBF (1LL << 27)
> +#define   GCMD_QIE (1LL << 26)
> +#define   GCMD_IRE (1LL << 25)
> +#define   GCMD_SIRTP (1LL << 24)
> +#define   GCMD_CFI (1LL << 23)
> +
> +#define DMAR_GSTS_REG 0x1c /* 32:Global status register */
> +#define   GSTS_TES (1LL << 31)
> +#define   GSTS_RTPS (1LL << 30)
> +#define   GSTS_FLS (1LL << 29)
> +#define   GSTS_AFLS (1LL << 28)
> +#define   GSTS_WBFS (1LL << 27)
> +#define   GSTS_QIES (1LL << 26)
> +#define   GSTS_IRES (1LL << 25)
> +#define   GSTS_IRTPS (1LL << 24)
> +#define   GSTS_CFIS (1LL << 23)
> +
> +#define DMAR_CCMD_REG 0x28 /* 64:Context command reg */
> +#define   CCMD_ICC (1LL << 63)
> +#define   CCMD_CIRG_MASK 0x3
> +#define   CCMD_CIRG_SHIFT 61
> +#define   CCMD_CIRG(x) ((uint64_t)(x) << CCMD_CIRG_SHIFT)
> +#define   CCMD_CAIG_MASK 0x3
> +#define   CCMD_CAIG_SHIFT 59
> +#define   CCMD_FM_MASK 0x3
> +#define   CCMD_FM_SHIFT 32
> +#define   CCMD_FM(x) (((uint64_t)(x) << CCMD_FM_SHIFT))
> +#define   CCMD_SID_MASK 0xFFFF
> +#define   CCMD_SID_SHIFT 8
> +#define   CCMD_SID(x) (((x) << CCMD_SID_SHIFT))
> +#define   CCMD_DID_MASK 0xFFFF
> +#define   CCMD_DID_SHIFT 0
> +#define   CCMD_DID(x) (((x) << CCMD_DID_SHIFT))
> +
> +#define CIG_GLOBAL CCMD_CIRG(CTX_GLOBAL)
> +#define CIG_DOMAIN CCMD_CIRG(CTX_DOMAIN)
> +#define CIG_DEVICE CCMD_CIRG(CTX_DEVICE)
> +
> +
> +#define DMAR_FSTS_REG 0x34 /* 32:Fault Status register */
> +#define   FSTS_FRI_MASK 0xFF
> +#define   FSTS_FRI_SHIFT 8
> +#define   FSTS_PRO (1LL << 7)
> +#define   FSTS_ITE (1LL << 6)
> +#define   FSTS_ICE (1LL << 5)
> +#define   FSTS_IQE (1LL << 4)
> +#define   FSTS_APF (1LL << 3)
> +#define   FSTS_APO (1LL << 2)
> +#define   FSTS_PPF (1LL << 1)
> +#define   FSTS_PFO (1LL << 0)
> +
> +#define DMAR_FECTL_REG 0x38 /* 32:Fault control register */
> +#define   FECTL_IM (1LL << 31)
> +#define   FECTL_IP (1LL << 30)
> +
> +#define FRCD_HI_F (1LL << (127-64))
> +#define FRCD_HI_T (1LL << (126-64))
> +#define FRCD_HI_AT_MASK 0x3
> +#define FRCD_HI_AT_SHIFT (124-64)
> +#define FRCD_HI_PV_MASK 0xFFFFF
> +#define FRCD_HI_PV_SHIFT (104-64)
> +#define FRCD_HI_FR_MASK 0xFF
> +#define FRCD_HI_FR_SHIFT (96-64)
> +#define FRCD_HI_PP (1LL << (95-64))
> +
> +#define FRCD_HI_SID_MASK 0xFF
> +#define FRCD_HI_SID_SHIFT 0
> +#define FRCD_HI_BUS_SHIFT 8
> +#define FRCD_HI_BUS_MASK 0xFF
> +#define FRCD_HI_DEV_SHIFT 3
> +#define FRCD_HI_DEV_MASK 0x1F
> +#define FRCD_HI_FUN_SHIFT 0
> +#define FRCD_HI_FUN_MASK 0x7
> +
> +#define DMAR_IOTLB_REG(x) (ecap_iro((x)->ecap) + 8)
> +#define DMAR_IVA_REG(x) (ecap_iro((x)->ecap) + 0)
> +
> +#define DMAR_FRIH_REG(x,i) (cap_fro((x)->cap) + 16*(i) + 8)
> +#define DMAR_FRIL_REG(x,i) (cap_fro((x)->cap) + 16*(i) + 0)
> +
> +#define IOTLB_IVT (1LL << 63)
> +#define IOTLB_IIRG_MASK 0x3
> +#define IOTLB_IIRG_SHIFT 60
> +#define IOTLB_IIRG(x) ((uint64_t)(x) << IOTLB_IIRG_SHIFT)
> +#define IOTLB_IAIG_MASK 0x3
> +#define IOTLB_IAIG_SHIFT 57
> +#define IOTLB_DR (1LL << 49)
> +#define IOTLB_DW (1LL << 48)
> +#define IOTLB_DID_MASK 0xFFFF
> +#define IOTLB_DID_SHIFT 32
> +#define IOTLB_DID(x) ((uint64_t)(x) << IOTLB_DID_SHIFT)
> +
> +#define IIG_GLOBAL IOTLB_IIRG(IOTLB_GLOBAL)
> +#define IIG_DOMAIN IOTLB_IIRG(IOTLB_DOMAIN)
> +#define IIG_PAGE IOTLB_IIRG(IOTLB_PAGE)
> +
> +#define DMAR_IQH_REG 0x80 /* 64:Invalidation queue head register */
> +#define DMAR_IQT_REG 0x88 /* 64:Invalidation queue tail register */
> +#define DMAR_IQA_REG 0x90 /* 64:Invalidation queue addr register */
> +#define IQA_QS_256 0 /* 256 entries */
> +#define IQA_QS_512 1 /* 512 */
> +#define IQA_QS_1K 2 /* 1024 */
> +#define IQA_QS_2K 3 /* 2048 */
> +#define IQA_QS_4K 4 /* 4096 */
> +#define IQA_QS_8K 5 /* 8192 */
> +#define IQA_QS_16K 6 /* 16384 */
> +#define IQA_QS_32K 7 /* 32768 */
> +
> +/* Read-Modify-Write helpers */
> +static inline void iommu_rmw32(void *ov, uint32_t mask, uint32_t shift, uint32_t nv)
> +{
> + *(uint32_t *)ov &= ~(mask << shift);
> + *(uint32_t *)ov |= (nv & mask) << shift;
> +}
> +static inline void iommu_rmw64(void *ov, uint32_t mask, uint32_t shift, uint64_t nv)
> +{
> + *(uint64_t *)ov &= ~(mask << shift);
> + *(uint64_t *)ov |= (nv & mask) << shift;
> +}
> +
> +/*
> + * Root Entry: one per bus (256 x 128 bit = 4k)
> + *   0        = Present
> + *   1:11     = Reserved
> + *   12:HAW-1 = Context Table Pointer
> + *   HAW:63   = Reserved
> + *   64:127   = Reserved
> + */
> +#define ROOT_P (1L << 0)
> +struct root_entry {
> + uint64_t lo;
> + uint64_t hi;
> +};
> +
> +/* Check if root entry is valid */
> +static inline bool
> +root_entry_is_valid(struct root_entry *re)
> +{
> + return (re->lo & ROOT_P);
> +}
> +
> +/*
> + * Context Entry: one per devfn (256 x 128 bit = 4k)
> + *   0      = Present
> + *   1      = Fault Processing Disable
> + *   2:3    = Translation Type
> + *   4:11   = Reserved
> + *   12:63  = Second Level Page Translation
> + *   64:66  = Address Width (# PTE levels)
> + *   67:70  = Ignore
> + *   71     = Reserved
> + *   72:87  = Domain ID
> + *   88:127 = Reserved
> + */
> +#define CTX_P (1L << 0)
> +#define CTX_FPD (1L << 1)
> +#define CTX_T_MASK 0x3
> +#define CTX_T_SHIFT 2
> +enum {
> + CTX_T_MULTI,
> + CTX_T_IOTLB,
> + CTX_T_PASSTHRU
> +};
> +
> +#define CTX_H_AW_MASK 0x7
> +#define CTX_H_AW_SHIFT 0
> +#define CTX_H_USER_MASK 0xF
> +#define CTX_H_USER_SHIFT 3
> +#define CTX_H_DID_MASK 0xFFFF
> +#define CTX_H_DID_SHIFT 8
> +
> +struct context_entry {
> + uint64_t lo;
> + uint64_t hi;
> +};
> +
> +/* Set fault processing enable/disable */
> +static inline void
> +context_set_fpd(struct context_entry *ce, int enable)
> +{
> + ce->lo &= ~CTX_FPD;
> + if (enable)
> + ce->lo |= CTX_FPD;
> +}
> +
> +/* Set context entry present */
> +static inline void
> +context_set_present(struct context_entry *ce)
> +{
> + ce->lo |= CTX_P;
> +}
> +
> +/* Set Second Level Page Table Entry PA */
> +static inline void
> +context_set_slpte(struct context_entry *ce, paddr_t slpte)
> +{
> + ce->lo &= VTD_PAGE_MASK;
> + ce->lo |= (slpte & ~VTD_PAGE_MASK);
> +}
> +
> +/* Set translation type */
> +static inline void
> +context_set_translation_type(struct context_entry *ce, int tt)
> +{
> + ce->lo &= ~(CTX_T_MASK << CTX_T_SHIFT);
> + ce->lo |= ((tt & CTX_T_MASK) << CTX_T_SHIFT);
> +}
> +
> +/* Set Address Width (# of Page Table levels) */
> +static inline void
> +context_set_address_width(struct context_entry *ce, int lvl)
> +{
> + ce->hi &= ~(CTX_H_AW_MASK << CTX_H_AW_SHIFT);
> + ce->hi |= ((lvl & CTX_H_AW_MASK) << CTX_H_AW_SHIFT);
> +}
> +
> +/* Set domain ID */
> +static inline void
> +context_set_domain_id(struct context_entry *ce, int did)
> +{
> + ce->hi &= ~(CTX_H_DID_MASK << CTX_H_DID_SHIFT);
> + ce->hi |= ((did & CTX_H_DID_MASK) << CTX_H_DID_SHIFT);
> +}
> +
> +/* Get Second Level Page Table PA */
> +static inline uint64_t
> +context_pte(struct context_entry *ce)
> +{
> + return (ce->lo & ~VTD_PAGE_MASK);
> +}
> +
> +/* Get translation type */
> +static inline int
> +context_translation_type(struct context_entry *ce)
> +{
> + return (ce->lo >> CTX_T_SHIFT) & CTX_T_MASK;
> +}
> +
> +/* Get domain ID */
> +static inline int
> +context_domain_id(struct context_entry *ce)
> +{
> + return (ce->hi >> CTX_H_DID_SHIFT) & CTX_H_DID_MASK;
> +}
> +
> +/* Get Address Width */
> +static inline int
> +context_address_width(struct context_entry *ce)
> +{
> + return VTD_LEVELTOAW((ce->hi >> CTX_H_AW_SHIFT) & CTX_H_AW_MASK);
> +}
> +
> +/* Check if context entry is valid */
> +static inline bool
> +context_entry_is_valid(struct context_entry *ce)
> +{
> + return (ce->lo & CTX_P);
> +}
> +
> +/* User-available bits in context entry */
> +static inline int
> +context_user(struct context_entry *ce)
> +{
> + return (ce->hi >> CTX_H_USER_SHIFT) & CTX_H_USER_MASK;
> +}
> +
> +static inline void
> +context_set_user(struct context_entry *ce, int v)
> +{
> + ce->hi &= ~(CTX_H_USER_MASK << CTX_H_USER_SHIFT);
> + ce->hi |=  ((v & CTX_H_USER_MASK) << CTX_H_USER_SHIFT);
> +}
> +
> +/*
> + * Fault entry
> + *   0..HAW-1 = Fault address
> + *   HAW:63   = Reserved
> + *   64:71    = Source ID
> + *   96:103   = Fault Reason
> + *   104:123  = PV
> + *   124:125  = Address Translation type
> + *   126      = Type (0 = Read, 1 = Write)
> + *   127      = Fault bit
> + */
> +struct fault_entry
> +{
> + uint64_t lo;
> + uint64_t hi;
> +};
> +
> +/* PTE Entry: 512 x 64-bit = 4k */
> +#define PTE_P (1L << 0)
> +#define PTE_R 0x00
> +#define PTE_W (1L << 1)
> +#define PTE_US  (1L << 2)
> +#define PTE_PWT (1L << 3)
> +#define PTE_PCD (1L << 4)
> +#define PTE_A   (1L << 5)
> +#define PTE_D   (1L << 6)
> +#define PTE_PAT (1L << 7)
> +#define PTE_G   (1L << 8)
> +#define PTE_EA  (1L << 10)
> +#define PTE_XD  (1LL << 63)
> +
> +/* PDE Level entry */
> +#define PTE_PS  (1L << 7)
> +
> +/* PDPE Level entry */
> +
> +/* ----------------------------------------------------------------
> + * 5555555444444444333333333222222222111111111000000000------------
> + * [PML4 ->] PDPE.1GB
> + * [PML4 ->] PDPE.PDE -> PDE.2MB
> + * [PML4 ->] PDPE.PDE -> PDE -> PTE
> + * GAW0 = (12.20) (PTE)
> + * GAW1 = (21.29) (PDE)
> + * GAW2 = (30.38) (PDPE)
> + * GAW3 = (39.47) (PML4)
> + * GAW4 = (48.57) (n/a)
> + * GAW5 = (58.63) (n/a)
> + */
> +struct pte_entry {
> + uint64_t val;
> +};
> +
> +/*
> + * Queued Invalidation entry
> + *  0:3   = 01h
> + *  4:5   = Granularity
> + *  6:15  = Reserved
> + *  16:31 = Domain ID
> + *  32:47 = Source ID
> + *  48:49 = FM
> + */
> +
> +/* Invalidate Context Entry */
> +#define QI_CTX_DID_MASK 0xFFFF
> +#define QI_CTX_DID_SHIFT 16
> +#define QI_CTX_SID_MASK 0xFFFF
> +#define QI_CTX_SID_SHIFT 32
> +#define QI_CTX_FM_MASK 0x3
> +#define QI_CTX_FM_SHIFT 48
> +#define QI_CTX_IG_MASK 0x3
> +#define QI_CTX_IG_SHIFT 4
> +#define QI_CTX_DID(x) (((uint64_t)(x) << QI_CTX_DID_SHIFT))
> +#define QI_CTX_SID(x) (((uint64_t)(x) << QI_CTX_SID_SHIFT))
> +#define QI_CTX_FM(x) (((uint64_t)(x) << QI_CTX_FM_SHIFT))
> +
> +#define QI_CTX_IG_GLOBAL (CTX_GLOBAL << QI_CTX_IG_SHIFT)
> +#define QI_CTX_IG_DOMAIN (CTX_DOMAIN << QI_CTX_IG_SHIFT)
> +#define QI_CTX_IG_DEVICE (CTX_DEVICE << QI_CTX_IG_SHIFT)
> +
> +/* Invalidate IOTLB Entry */
> +#define QI_IOTLB_DID_MASK 0xFFFF
> +#define QI_IOTLB_DID_SHIFT 16
> +#define QI_IOTLB_IG_MASK 0x3
> +#define QI_IOTLB_IG_SHIFT 4
> +#define QI_IOTLB_DR (1LL << 6)
> +#define QI_IOTLB_DW (1LL << 5)
> +#define QI_IOTLB_DID(x) (((uint64_t)(x) << QI_IOTLB_DID_SHIFT))
> +
> +#define QI_IOTLB_IG_GLOBAL (1 << QI_IOTLB_IG_SHIFT)
> +#define QI_IOTLB_IG_DOMAIN (2 << QI_IOTLB_IG_SHIFT)
> +#define QI_IOTLB_IG_PAGE (3 << QI_IOTLB_IG_SHIFT)
> +
> +/* QI Commands */
> +#define QI_CTX 0x1
> +#define QI_IOTLB 0x2
> +#define QI_DEVTLB 0x3
> +#define QI_INTR 0x4
> +#define QI_WAIT 0x5
> +#define QI_EXTTLB 0x6
> +#define QI_PAS 0x7
> +#define QI_EXTDEV 0x8
> +
> +struct qi_entry {
> + uint64_t lo;
> + uint64_t hi;
> +};
> +
> +enum {
> + CTX_GLOBAL = 1,
> + CTX_DOMAIN,
> + CTX_DEVICE,
> +
> + IOTLB_GLOBAL = 1,
> + IOTLB_DOMAIN,
> + IOTLB_PAGE,
> +};
> +
> +enum {
> + VTD_FAULT_ROOT_P = 0x1,         /* P field in root entry is 0 */
> + VTD_FAULT_CTX_P = 0x2,          /* P field in context entry is 0 */
> + VTD_FAULT_CTX_INVAL = 0x3,      /* context AW/TT/SLPPTR invalid */
> + VTD_FAULT_LIMIT = 0x4,          /* Address is outside of MGAW */
> + VTD_FAULT_WRITE = 0x5,          /* Address-translation fault, non-writable */
> + VTD_FAULT_READ = 0x6,           /* Address-translation fault, non-readable */
> + VTD_FAULT_PTE_INVAL = 0x7,      /* page table hw access error */
> + VTD_FAULT_ROOT_INVAL = 0x8,     /* root table hw access error */
> + VTD_FAULT_CTX_TBL_INVAL = 0x9,  /* context entry hw access error */
> + VTD_FAULT_ROOT_RESERVED = 0xa,  /* non-zero reserved field in root entry */
> + VTD_FAULT_CTX_RESERVED = 0xb,   /* non-zero reserved field in context entry */
> + VTD_FAULT_PTE_RESERVED = 0xc,   /* non-zero reserved field in paging entry */
> + VTD_FAULT_CTX_TT = 0xd,         /* invalid translation type */
> +};
> +
> +#endif
> +
> +void acpidmar_pci_hook(pci_chipset_tag_t, struct pci_attach_args *);
> +void dmar_ptmap(bus_dma_tag_t, bus_addr_t);
> +void acpidmar_sw(int);
> +
> +#define __EXTRACT(v,m) (((v) >> m##_SHIFT) & m##_MASK)
> diff --git a/sys/dev/acpi/acpireg.h b/sys/dev/acpi/acpireg.h
> index bfbb73ce2..62d7990e2 100644
> --- a/sys/dev/acpi/acpireg.h
> +++ b/sys/dev/acpi/acpireg.h
> @@ -623,6 +623,9 @@ struct acpi_ivmd {
>  struct acpi_ivhd {
>   uint8_t type;
>   uint8_t flags;
> +#define IVHD_PPRSUP (1L << 7)
> +#define IVHD_PREFSUP (1L << 6)
> +#define IVHD_COHERENT (1L << 5)
>  #define IVHD_IOTLB (1L << 4)
>  #define IVHD_ISOC (1L << 3)
>  #define IVHD_RESPASSPW (1L << 2)
> @@ -638,13 +641,28 @@ struct acpi_ivhd {
>  #define IVHD_UNITID_MASK 0x1F
>  #define IVHD_MSINUM_SHIFT 0
>  #define IVHD_MSINUM_MASK 0x1F
> - uint32_t reserved;
> + uint32_t feature;
> +} __packed;
> +
> +struct acpi_ivhd_ext {
> + uint8_t type;
> + uint8_t flags;
> + uint16_t length;
> + uint16_t devid;
> + uint16_t cap;
> + uint64_t address;
> + uint16_t segment;
> + uint16_t info;
> + uint32_t attrib;
> + uint64_t efr;
> + uint8_t reserved[8];
>  } __packed;
>  
>  union acpi_ivrs_entry {
>   struct {
>   uint8_t type;
>  #define IVRS_IVHD 0x10
> +#define IVRS_IVHD_EXT 0x11
>  #define IVRS_IVMD_ALL 0x20
>  #define IVRS_IVMD_SPECIFIED 0x21
>  #define IVRS_IVMD_RANGE 0x22
> @@ -652,6 +670,7 @@ union acpi_ivrs_entry {
>   uint16_t length;
>   } __packed;
>   struct acpi_ivhd ivhd;
> + struct acpi_ivhd_ext ivhd_ext;
>   struct acpi_ivmd ivmd;
>  } __packed;
>  
> diff --git a/sys/dev/acpi/amd_iommu.h b/sys/dev/acpi/amd_iommu.h
> new file mode 100644
> index 000000000..c7652011e
> --- /dev/null
> +++ b/sys/dev/acpi/amd_iommu.h
> @@ -0,0 +1,360 @@
> +/*
> + * Copyright (c) 2019 Jordan Hargrave <[hidden email]>
> + *
> + * Permission to use, copy, modify, and distribute this software for any
> + * purpose with or without fee is hereby granted, provided that the above
> + * copyright notice and this permission notice appear in all copies.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
> + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
> + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
> + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
> + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
> + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
> + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
> + */
> +#ifndef __amd_iommu_h__
> +#define __amd_iommu_h__
> +
> +#define DEV_TAB_BASE_REG 0x0000
> +#define CMD_BASE_REG 0x0008
> +#define EVT_BASE_REG 0x0010
> +
> +#define EXCL_BASE_REG 0x0020
> +#define EXCL_LIMIT_REG 0x0028
> +
> +/* Extended Feature Register */
> +#define EXTFEAT_REG 0x0030
> +#define  EFR_PREFSUP (1L << 0)
> +#define  EFR_PPRSUP (1L << 1)
> +#define  EFR_NXSUP (1L << 3)
> +#define  EFR_GTSUP (1L << 4)
> +#define  EFR_IASUP (1L << 6)
> +#define  EFR_GASUP (1L << 7)
> +#define  EFR_HESUP (1L << 8)
> +#define  EFR_PCSUP (1L << 9)
> +#define  EFR_HATS_SHIFT 10
> +#define  EFR_HATS_MASK 0x3
> +#define  EFR_GATS_SHIFT 12
> +#define  EFR_GATS_MASK 0x3
> +#define  EFR_GLXSUP_SHIFT 14
> +#define  EFR_GLXSUP_MASK 0x3
> +#define  EFR_SMIFSUP_SHIFT 16
> +#define  EFR_SMIFSUP_MASK 0x3        
> +#define  EFR_SMIFRC_SHIFT 18
> +#define  EFR_SMIFRC_MASK 0x7
> +#define  EFR_GAMSUP_SHIFT 21
> +#define  EFR_GAMSUP_MASK 0x7
> +
> +#define CMD_HEAD_REG 0x2000
> +#define CMD_TAIL_REG 0x2008
> +#define EVT_HEAD_REG 0x2010
> +#define EVT_TAIL_REG 0x2018
> +
> +#define IOMMUSTS_REG 0x2020
> +
> +#define DEV_TAB_MASK 0x000FFFFFFFFFF000LL
> +#define DEV_TAB_LEN 0x1FF
> +
> +/* IOMMU Control */
> +#define IOMMUCTL_REG 0x0018
> +#define  CTL_IOMMUEN (1L << 0)
> +#define  CTL_HTTUNEN (1L << 1)
> +#define  CTL_EVENTLOGEN (1L << 2)
> +#define  CTL_EVENTINTEN (1L << 3)
> +#define  CTL_COMWAITINTEN (1L << 4)
> +#define  CTL_INVTIMEOUT_SHIFT 5
> +#define  CTL_INVTIMEOUT_MASK   0x7
> +#define  CTL_INVTIMEOUT_NONE 0
> +#define  CTL_INVTIMEOUT_1MS     1
> +#define  CTL_INVTIMEOUT_10MS    2
> +#define  CTL_INVTIMEOUT_100MS   3
> +#define  CTL_INVTIMEOUT_1S      4
> +#define  CTL_INVTIMEOUT_10S     5
> +#define  CTL_INVTIMEOUT_100S    6
> +#define  CTL_PASSPW (1L << 8)
> +#define  CTL_RESPASSPW (1L << 9)
> +#define  CTL_COHERENT (1L << 10)
> +#define  CTL_ISOC (1L << 11)
> +#define  CTL_CMDBUFEN (1L << 12)
> +#define  CTL_PPRLOGEN (1L << 13)
> +#define  CTL_PPRINTEN (1L << 14)
> +#define  CTL_PPREN (1L << 15)
> +#define  CTL_GTEN (1L << 16)
> +#define  CTL_GAEN (1L << 17)
> +#define  CTL_CRW_SHIFT 18
> +#define  CTL_CRW_MASK 0xF
> +#define  CTL_SMIFEN (1L << 22)
> +#define  CTL_SLFWBDIS (1L << 23)
> +#define  CTL_SMIFLOGEN (1L << 24)
> +#define  CTL_GAMEN_SHIFT 25
> +#define  CTL_GAMEN_MASK 0x7
> +#define  CTL_GALOGEN (1L << 28)
> +#define  CTL_GAINTEN (1L << 29)
> +#define  CTL_DUALPPRLOGEN_SHIFT 30
> +#define  CTL_DUALPPRLOGEN_MASK 0x3
> +#define  CTL_DUALEVTLOGEN_SHIFT 32
> +#define  CTL_DUALEVTLOGEN_MASK 0x3
> +#define  CTL_DEVTBLSEGEN_SHIFT 34
> +#define  CTL_DEVTBLSEGEN_MASK 0x7
> +#define  CTL_PRIVABRTEN_SHIFT 37
> +#define  CTL_PRIVABRTEN_MASK 0x3
> +#define  CTL_PPRAUTORSPEN (1LL << 39)
> +#define  CTL_MARCEN (1LL << 40)
> +#define  CTL_BLKSTOPMRKEN (1LL << 41)
> +#define  CTL_PPRAUTOSPAON (1LL << 42)
> +#define  CTL_DOMAINIDPNE (1LL << 43)
> +
> +#define CMD_BASE_MASK 0x000FFFFFFFFFF000LL
> +#define CMD_TBL_SIZE 4096
> +#define CMD_TBL_LEN_4K (8LL << 56)
> +#define CMD_TBL_LEN_8K (9lL << 56)
> +
> +#define EVT_BASE_MASK 0x000FFFFFFFFFF000LL
> +#define EVT_TBL_SIZE 4096
> +#define EVT_TBL_LEN_4K (8LL << 56)
> +#define EVT_TBL_LEN_8K (9LL << 56)
> +
> +/*========================
> + * DEVICE TABLE ENTRY
> + * Contains mapping of bus-device-function
> + *
> + *  0       Valid (V)
> + *  1       Translation Valid (TV)
> + *  7:8     Host Address Dirty (HAD)
> + *  9:11    Page Table Depth (usually 4)
> + *  12:51   Page Table Physical Address
> + *  52      PPR Enable
> + *  53      GPRP
> + *  54      Guest I/O Protection Valid (GIoV)
> + *  55      Guest Translation Valid (GV)
> + *  56:57   Guest Levels translated (GLX)
> + *  58:60   Guest CR3 bits 12:14 (GCR3TRP)
> + *  61      I/O Read Permission (IR)
> + *  62      I/O Write Permission (IW)
> + *  64:79   Domain ID
> + *  80:95   Guest CR3 bits 15:30 (GCR3TRP)
> + *  96      IOTLB Enable (I)
> + *  97      Suppress multiple I/O page faults (I)
> + *  98      Supress all I/O page faults (SA)
> + *  99:100  Port I/O Control (IoCTL)
> + *  101     Cache IOTLB Hint
> + *  102     Snoop Disable (SD)
> + *  103     Allow Exclusion (EX)
> + *  104:105 System Management Message (SysMgt)
> + *  107:127 Guest CR3 bits 31:51 (GCR3TRP)
> + *  128     Interrupt Map Valid (IV)
> + *  129:132 Interrupt Table Length (IntTabLen)
> + *========================*/
> +struct ivhd_dte {
> + uint32_t dw0;
> + uint32_t dw1;
> + uint32_t dw2;
> + uint32_t dw3;
> + uint32_t dw4;
> + uint32_t dw5;
> + uint32_t dw6;
> + uint32_t dw7;
> +} __packed;
> +
> +#define HWDTE_SIZE (65536 * sizeof(struct ivhd_dte))
> +
> +#define DTE_V (1L << 0) // dw0
> +#define DTE_TV (1L << 1) // dw0
> +#define DTE_LEVEL_SHIFT 9 // dw0
> +#define DTE_LEVEL_MASK 0x7 // dw0
> +#define DTE_HPTRP_MASK 0x000FFFFFFFFFF000LL // dw0,1
> +
> +#define DTE_PPR (1L << 20) // dw1
> +#define DTE_GPRP (1L << 21) // dw1
> +#define DTE_GIOV (1L << 22) // dw1
> +#define DTE_GV (1L << 23) // dw1
> +#define DTE_IR (1L << 29) // dw1
> +#define DTE_IW (1L << 30) // dw1
> +
> +#define DTE_DID_MASK 0xFFFF // dw2
> +
> +#define DTE_IV (1L << 0) // dw3
> +#define DTE_SE (1L << 1)
> +#define DTE_SA (1L << 2)
> +#define DTE_INTTABLEN_SHIFT 1
> +#define DTE_INTTABLEN_MASK 0xF
> +#define DTE_IRTP_MASK 0x000FFFFFFFFFFFC0LL
> +
> +#define PTE_LVL5                48
> +#define PTE_LVL4                39
> +#define PTE_LVL3                30
> +#define PTE_LVL2                21
> +#define PTE_LVL1                12
> +
> +#define PTE_NXTLVL(x)           (((x) & 0x7) << 9)
> +#define PTE_PADDR_MASK 0x000FFFFFFFFFF000LL
> +#define PTE_IR                  (1LL << 61)
> +#define PTE_IW                  (1LL << 62)
> +
> +#define DTE_GCR312_MASK 0x3
> +#define DTE_GCR312_SHIFT 24
> +
> +#define DTE_GCR315_MASK 0xFFFF
> +#define DTE_GCR315_SHIFT 16
> +
> +#define DTE_GCR331_MASK 0xFFFFF
> +#define DTE_GCR331_SHIFT 12
> +
> +#define _get64(x)   *(uint64_t *)(x)
> +#define _put64(x,v) *(uint64_t *)(x) = (v)
> +
> +/* Set Guest CR3 address */
> +static inline void
> +dte_set_guest_cr3(struct ivhd_dte *dte, paddr_t paddr)
> +{
> + iommu_rmw32(&dte->dw1, DTE_GCR312_MASK, DTE_GCR312_SHIFT, paddr >> 12);
> + iommu_rmw32(&dte->dw2, DTE_GCR315_MASK, DTE_GCR315_SHIFT, paddr >> 15);
> + iommu_rmw32(&dte->dw3, DTE_GCR331_MASK, DTE_GCR331_SHIFT, paddr >> 31);
> +}
> +
> +/* Set Interrupt Remapping Root Pointer */
> +static inline void
> +dte_set_interrupt_table_root_ptr(struct ivhd_dte *dte, paddr_t paddr)
> +{
> + uint64_t ov = _get64(&dte->dw4);
> + _put64(&dte->dw4, (ov & ~DTE_IRTP_MASK) | (paddr & DTE_IRTP_MASK));
> +}
> +
> +/* Set Interrupt Remapping Table length */
> +static inline void
> +dte_set_interrupt_table_length(struct ivhd_dte *dte, int nEnt)
> +{
> + iommu_rmw32(&dte->dw4, DTE_INTTABLEN_MASK, DTE_INTTABLEN_SHIFT, nEnt);
> +}
> +
> +/* Set Interrupt Remapping Valid */
> +static inline void
> +dte_set_interrupt_valid(struct ivhd_dte *dte)
> +{
> + dte->dw4 |= DTE_IV;
> +}
> +
> +/* Set Domain ID in Device Table Entry */
> +static inline void
> +dte_set_domain(struct ivhd_dte *dte, uint16_t did)
> +{
> + dte->dw2 = (dte->dw2 & ~DTE_DID_MASK) | (did & DTE_DID_MASK);
> +}
> +
> +/* Set Page Table Pointer for device */
> +static inline void
> +dte_set_host_page_table_root_ptr(struct ivhd_dte *dte, paddr_t paddr)
> +{
> + uint64_t ov;
> +
> + ov = _get64(&dte->dw0) & ~DTE_HPTRP_MASK;
> + ov |= (paddr & DTE_HPTRP_MASK) | PTE_IW | PTE_IR;
> +
> + _put64(&dte->dw0, ov);
> +}
> +
> +/* Set Page Table Levels Mask */
> +static inline void
> +dte_set_mode(struct ivhd_dte *dte, int mode)
> +{
> + iommu_rmw32(&dte->dw0, DTE_LEVEL_MASK, DTE_LEVEL_SHIFT, mode);
> +}
> +
> +static inline void
> +dte_set_tv(struct ivhd_dte *dte)
> +{
> + dte->dw0 |= DTE_TV;
> +}
> +
> +/* Set Device Table Entry valid.
> + * Domain/Level/Mode/PageTable should already be set
> + */
> +static inline void
> +dte_set_valid(struct ivhd_dte *dte)
> +{
> + dte->dw0 |= DTE_V;
> +}
> +
> +/* Check if Device Table Entry is valid */
> +static inline int
> +dte_is_valid(struct ivhd_dte *dte)
> +{
> + return (dte->dw0 & DTE_V);
> +}
> +
> +/*=========================================
> + * COMMAND
> + *=========================================*/
> +struct ivhd_command {
> + uint32_t dw0;
> + uint32_t dw1;
> + uint32_t dw2;
> + uint32_t dw3;
> +} __packed;
> +
> +#define CMD_SHIFT 28
> +
> +enum {
> + COMPLETION_WAIT = 0x01,
> + INVALIDATE_DEVTAB_ENTRY = 0x02,
> + INVALIDATE_IOMMU_PAGES = 0x03,
> + INVALIDATE_IOTLB_PAGES = 0x04,
> + INVALIDATE_INTERRUPT_TABLE = 0x05,
> + PREFETCH_IOMMU_PAGES = 0x06,
> + COMPLETE_PPR_REQUEST = 0x07,
> + INVALIDATE_IOMMU_ALL = 0x08,
> +};
> +
> +/*=========================================
> + * EVENT
> + *=========================================*/
> +struct ivhd_event {
> + uint32_t dw0;
> + uint32_t dw1;
> + uint32_t dw2;   // address.lo
> + uint32_t dw3; // address.hi
> +} __packed;
> +
> +#define EVT_TYPE_SHIFT 28       // dw1.0xF0000000
> +#define EVT_TYPE_MASK 0xF
> +#define EVT_SID_SHIFT 0        // dw0.0x0000FFFF
> +#define EVT_SID_MASK 0xFFFF
> +#define EVT_DID_SHIFT 0
> +#define EVT_DID_MASK 0xFFFF   // dw1.0x0000FFFF
> +#define EVT_FLAG_SHIFT   16
> +#define EVT_FLAG_MASK   0xFFF    // dw1.0x0FFF0000
> +
> +/* IOMMU Fault reasons */
> +enum {
> + ILLEGAL_DEV_TABLE_ENTRY = 0x1,
> + IO_PAGE_FAULT = 0x2,
> + DEV_TAB_HARDWARE_ERROR = 0x3,
> + PAGE_TAB_HARDWARE_ERROR = 0x4,
> + ILLEGAL_COMMAND_ERROR = 0x5,
> + COMMAND_HARDWARE_ERROR = 0x6,
> + IOTLB_INV_TIMEOUT = 0x7,
> + INVALID_DEVICE_REQUEST = 0x8,
> +};
> +
> +#define EVT_GN (1L << 16)
> +#define EVT_NX (1L << 17)
> +#define EVT_US (1L << 18)
> +#define EVT_I (1L << 19)
> +#define EVT_PR (1L << 20)
> +#define EVT_RW (1L << 21)
> +#define EVT_PE (1L << 22)
> +#define EVT_RZ (1L << 23)
> +#define EVT_TR (1L << 24)
> +
> +struct iommu_softc;
> +
> +int ivhd_flush_devtab(struct iommu_softc *, int);
> +int ivhd_invalidate_iommu_all(struct iommu_softc *);
> +int ivhd_invalidate_interrupt_table(struct iommu_softc *, int);
> +int ivhd_issue_command(struct iommu_softc *, const struct ivhd_command *, int);
> +int ivhd_invalidate_domain(struct iommu_softc *, int);
> +
> +void _dumppte(struct pte_entry *, int, vaddr_t);
> +
> +#endif
> diff --git a/sys/dev/acpi/files.acpi b/sys/dev/acpi/files.acpi
> index e57c39938..1cf6f2fbb 100644
> --- a/sys/dev/acpi/files.acpi
> +++ b/sys/dev/acpi/files.acpi
> @@ -70,6 +70,11 @@ device acpiprt
>  attach acpiprt at acpi
>  file dev/acpi/acpiprt.c acpiprt needs-flag
>  
> +# DMAR device
> +device acpidmar
> +attach acpidmar at acpi
> +file dev/acpi/acpidmar.c acpidmar
> +
>  # Docking station
>  device acpidock
>  attach acpidock at acpi
>
>

Reply | Threaded
Open this post in threaded view
|

Re: [PATCH] Add IOMMU support for Intel VT-d and AMD-Vi

Jordan Hargrave
Ok made more changes....

On Mon, Sep 14, 2020 at 08:19:18PM +0200, Mark Kettenis wrote:

> > Date: Tue, 8 Sep 2020 21:43:39 -0500
> > From: Jordan Hargrave <[hidden email]>
> >
> > Made changes for the iommu_readq -> iommu_read_8 and also now
> > dynamically allocate the hwdte for AMD IOMMU.
>
> Some more bits...
>
> > On Fri, Sep 04, 2020 at 09:17:18PM +0200, Mark Kettenis wrote:
> > > > Date: Fri, 4 Sep 2020 00:50:44 -0500
> > > > From: Jordan Hargrave <[hidden email]>
> > >
> > > A few hints below...
> > >
> > > > > > +
> > > > > > +/* Page Table Entry per domain */
> > > > > > +static struct ivhd_dte hwdte[65536] __aligned(PAGE_SIZE);
> > > > > > +
> > > > > > +/* Alias mapping */
> > > > > > +#define SID_INVALID 0x80000000L
> > > > > > +static uint32_t sid_flag[65536];
> > > > >
> > > > > Can we avoid having these large arrays, or at least allocate them
> > > > > dynamically?  That would also avoid the explicit alignment which is
> > > > > somewhat nasty since it affects the entire kernel.
> > > >
> > > > OK. But the hwdte does need the 2M area to be all contiguous but it is not
> > > > needed for DMAR/Intel.  You *can* have up to 8 different device table entries
> > > > though to split up the area.
> > >
> > > The appropriate interface to use in this context is
> > > bus_dmamem_alloc(9).  You can specify alignment, and if you set nsegs
> > > to 1, you will get memory that is physicaly contiguous.
> > >
> > > To map the memory into kernel address space you'll need create a map
> > > using bus_dmamap_create(9) and map it using bus_dmamem_map(9).  Then
> > > instead of using pmap_extract(9) you use bus_dmamap_load_raw(9) which
> > > then populates the physical addresses.
> > >
> > > Many of the drivers written by dlg@ define convenience functions to do
> > > all these steps, although interestingly enough he tends to use
> > > bus_dmamap_load(9) instead of bus_dmamap_load_raw(9) which is
> > > sub-optimal.
> > >
> > > > > > +
> > > > > > +struct domain_dev {
> > > > > > + int sid;
> > > > > > + int sec;
> > > > > > + int sub;
> > > > > > + TAILQ_ENTRY(domain_dev) link;
> > > > > > +};
> > > > > > +
> > > > > > +struct domain {
> > > > > > + struct iommu_softc *iommu;
> > > > > > + int did;
> > > > > > + int gaw;
> > > > > > + struct pte_entry *pte;
> > > > > > + paddr_t ptep;
> > > > > > + struct bus_dma_tag dmat;
> > > > > > + int flag;
> > > > > > +
> > > > > > + struct mutex            exlck;
> > > > > > + char exname[32];
> > > > > > + struct extent *iovamap;
> > > > > > + TAILQ_HEAD(,domain_dev) devices;
> > > > > > + TAILQ_ENTRY(domain) link;
> > > > > > +};
> > > > > > +
> > > > > > +#define DOM_DEBUG 0x1
> > > > > > +#define DOM_NOMAP 0x2
> > > > > > +
> > > > > > +struct dmar_devlist {
> > > > > > + int type;
> > > > > > + int bus;
> > > > > > + int ndp;
> > > > > > + struct acpidmar_devpath *dp;
> > > > > > + TAILQ_ENTRY(dmar_devlist) link;
> > > > > > +};
> > > > > > +
> > > > > > +TAILQ_HEAD(devlist_head, dmar_devlist);
> > > > > > +
> > > > > > +struct ivhd_devlist {
> > > > > > + int start_id;
> > > > > > + int end_id;
> > > > > > + int cfg;
> > > > > > + TAILQ_ENTRY(ivhd_devlist) link;
> > > > > > +};
> > > > > > +
> > > > > > +struct rmrr_softc {
> > > > > > + TAILQ_ENTRY(rmrr_softc) link;
> > > > > > + struct devlist_head devices;
> > > > > > + int segment;
> > > > > > + uint64_t start;
> > > > > > + uint64_t end;
> > > > > > +};
> > > > > > +
> > > > > > +struct atsr_softc {
> > > > > > + TAILQ_ENTRY(atsr_softc) link;
> > > > > > + struct devlist_head devices;
> > > > > > + int segment;
> > > > > > + int flags;
> > > > > > +};
> > > > > > +
> > > > > > +struct iommu_pic {
> > > > > > + struct pic pic;
> > > > > > + struct iommu_softc *iommu;
> > > > > > +};
> > > > > > +
> > > > > > +#define IOMMU_FLAGS_CATCHALL 0x1
> > > > > > +#define IOMMU_FLAGS_BAD 0x2
> > > > > > +#define IOMMU_FLAGS_SUSPEND 0x4
> > > > > > +
> > > > > > +struct iommu_softc {
> > > > > > + TAILQ_ENTRY(iommu_softc)link;
> > > > > > + struct devlist_head devices;
> > > > > > + int id;
> > > > > > + int flags;
> > > > > > + int segment;
> > > > > > +
> > > > > > + struct mutex reg_lock;
> > > > > > +
> > > > > &