another go at bypass support for sparc64 iommu and BUS_DMA_64BIT

classic Classic list List threaded Threaded
8 messages Options
Reply | Threaded
Open this post in threaded view
|

another go at bypass support for sparc64 iommu and BUS_DMA_64BIT

David Gwynne-5
this is a reposting of the diff i sent out a while back. it lets sparc64
enable iommu bypass, and then uses that bypass support for BUS_DMA_64BIT
dmamaps.

the main benefit is around performance. without this diff on an M4000,
tcpbench can do about 70Mbps before the system is CPU bound. all that
cpu time is in the iommu tte management. with this diff and an
appropriate network driver, tcpbench does 940 Mbps and the box is mostly
idle. that is a 1300 percent improvement in network transfer speed. if
you factor the cpu time benefits it would be another order of magnitude
improvement.

kernel build times are slightly improved with this diff on mpii. system
time is about 5 percent less, but there's also a lot less I/O when
you're mostly running a compiler.

i hit an edge case with my previous diff that i believe is fixed with
this one. this one properly checks boundaries, segment sizes, and number
of segments for things like mbufs. i got lucky previously with the
drivers i was using.

Index: dev/iommu.c
===================================================================
RCS file: /cvs/src/sys/arch/sparc64/dev/iommu.c,v
retrieving revision 1.75
diff -u -p -r1.75 iommu.c
--- dev/iommu.c 25 May 2017 03:19:39 -0000 1.75
+++ dev/iommu.c 11 Jun 2019 01:37:25 -0000
@@ -87,6 +87,8 @@ void iommu_dvmamap_print_map(bus_dma_tag
     bus_dmamap_t);
 int iommu_dvmamap_append_range(bus_dma_tag_t, bus_dmamap_t, paddr_t,
     bus_size_t, int, bus_size_t);
+int iommu_dvmamap_insert(bus_dma_tag_t, bus_dmamap_t, bus_addr_t,
+    bus_size_t, int, bus_size_t);
 int64_t iommu_tsb_entry(struct iommu_state *, bus_addr_t);
 void strbuf_reset(struct strbuf_ctl *);
 int iommu_iomap_insert_page(struct iommu_map_state *, paddr_t);
@@ -100,6 +102,25 @@ void iommu_iomap_clear_pages(struct iomm
 void _iommu_dvmamap_sync(bus_dma_tag_t, bus_dma_tag_t, bus_dmamap_t,
     bus_addr_t, bus_size_t, int);
 
+void iommu_hw_enable(struct iommu_state *);
+
+const struct iommu_hw iommu_hw_default = {
+ .ihw_enable = iommu_hw_enable,
+
+ .ihw_dvma_pa = IOTTE_PAMASK,
+
+ .ihw_bypass = 0x3fffUL << 50,
+ .ihw_bypass_nc = 0,
+ .ihw_bypass_ro = 0,
+};
+
+void
+iommu_hw_enable(struct iommu_state *is)
+{
+ IOMMUREG_WRITE(is, iommu_tsb, is->is_ptsb);
+ IOMMUREG_WRITE(is, iommu_cr, IOMMUCR_EN | (is->is_tsbsize << 16));
+}
+
 /*
  * Initiate an STC entry flush.
  */
@@ -125,7 +146,8 @@ iommu_strbuf_flush(struct strbuf_ctl *sb
  * - create a private DVMA map.
  */
 void
-iommu_init(char *name, struct iommu_state *is, int tsbsize, u_int32_t iovabase)
+iommu_init(char *name, const struct iommu_hw *ihw, struct iommu_state *is,
+    int tsbsize, u_int32_t iovabase)
 {
  psize_t size;
  vaddr_t va;
@@ -149,13 +171,9 @@ iommu_init(char *name, struct iommu_stat
  * be hard-wired, so we read the start and size from the PROM and
  * just use those values.
  */
- if (strncmp(name, "pyro", 4) == 0) {
- is->is_cr = IOMMUREG_READ(is, iommu_cr);
- is->is_cr &= ~IOMMUCR_FIRE_BE;
- is->is_cr |= (IOMMUCR_FIRE_SE | IOMMUCR_FIRE_CM_EN |
-    IOMMUCR_FIRE_TE);
- } else
- is->is_cr = IOMMUCR_EN;
+
+ is->is_hw = ihw;
+
  is->is_tsbsize = tsbsize;
  if (iovabase == (u_int32_t)-1) {
  is->is_dvmabase = IOTSB_VSTART(is->is_tsbsize);
@@ -237,15 +255,6 @@ iommu_init(char *name, struct iommu_stat
  mtx_init(&is->is_mtx, IPL_HIGH);
 
  /*
- * Set the TSB size.  The relevant bits were moved to the TSB
- * base register in the PCIe host bridges.
- */
- if (strncmp(name, "pyro", 4) == 0)
- is->is_ptsb |= is->is_tsbsize;
- else
- is->is_cr |= (is->is_tsbsize << 16);
-
- /*
  * Now actually start up the IOMMU.
  */
  iommu_reset(is);
@@ -262,10 +271,7 @@ iommu_reset(struct iommu_state *is)
 {
  int i;
 
- IOMMUREG_WRITE(is, iommu_tsb, is->is_ptsb);
-
- /* Enable IOMMU */
- IOMMUREG_WRITE(is, iommu_cr, is->is_cr);
+ (*is->is_hw->ihw_enable)(is);
 
  for (i = 0; i < 2; ++i) {
  struct strbuf_ctl *sb = is->is_sb[i];
@@ -280,7 +286,7 @@ iommu_reset(struct iommu_state *is)
  printf(", STC%d enabled", i);
  }
 
- if (is->is_flags & IOMMU_FLUSH_CACHE)
+ if (ISSET(is->is_hw->ihw_flags, IOMMU_HW_FLUSH_CACHE))
  IOMMUREG_WRITE(is, iommu_cache_invalidate, -1ULL);
 }
 
@@ -433,7 +439,7 @@ iommu_extract(struct iommu_state *is, bu
  if (dva >= is->is_dvmabase && dva <= is->is_dvmaend)
  tte = is->is_tsb[IOTSBSLOT(dva, is->is_tsbsize)];
 
- return (tte & IOTTE_PAMASK);
+ return (tte & is->is_hw->ihw_dvma_pa);
 }
 
 /*
@@ -601,6 +607,7 @@ iommu_dvmamap_create(bus_dma_tag_t t, bu
 {
  int ret;
  bus_dmamap_t map;
+ struct iommu_state *is = sb->sb_iommu;
  struct iommu_map_state *ims;
 
  BUS_DMA_FIND_PARENT(t, _dmamap_create);
@@ -610,6 +617,12 @@ iommu_dvmamap_create(bus_dma_tag_t t, bu
  if (ret)
  return (ret);
 
+ if (flags & BUS_DMA_64BIT) {
+ map->_dm_cookie = is;
+ *dmamap = map;
+ return (0);
+ }
+
  ims = iommu_iomap_create(atop(round_page(size)));
 
  if (ims == NULL) {
@@ -641,8 +654,10 @@ iommu_dvmamap_destroy(bus_dma_tag_t t, b
  if (map->dm_nsegs)
  bus_dmamap_unload(t0, map);
 
-        if (map->_dm_cookie)
-                iommu_iomap_destroy(map->_dm_cookie);
+ if (!ISSET(map->_dm_flags, BUS_DMA_64BIT)) {
+        if (map->_dm_cookie)
+ iommu_iomap_destroy(map->_dm_cookie);
+ }
  map->_dm_cookie = NULL;
 
  BUS_DMA_FIND_PARENT(t, _dmamap_destroy);
@@ -667,36 +682,36 @@ iommu_dvmamap_load(bus_dma_tag_t t, bus_
  u_long dvmaddr, sgstart, sgend;
  bus_size_t align, boundary;
  struct iommu_state *is;
- struct iommu_map_state *ims = map->_dm_cookie;
+ struct iommu_map_state *ims;
  pmap_t pmap;
 
-#ifdef DIAGNOSTIC
- if (ims == NULL)
- panic("iommu_dvmamap_load: null map state");
-#endif
-#ifdef DEBUG
- if (ims->ims_sb == NULL)
- panic("iommu_dvmamap_load: null sb");
- if (ims->ims_sb->sb_iommu == NULL)
- panic("iommu_dvmamap_load: null iommu");
-#endif /* DEBUG */
- is = ims->ims_sb->sb_iommu;
-
- if (map->dm_nsegs) {
- /*
- * Is it still in use? _bus_dmamap_load should have taken care
- * of this.
- */
-#ifdef DIAGNOSTIC
- panic("iommu_dvmamap_load: map still in use");
-#endif
- bus_dmamap_unload(t0, map);
- }
-
  /*
  * Make sure that on error condition we return "no valid mappings".
  */
- map->dm_nsegs = 0;
+ KASSERTMSG(map->dm_nsegs == 0, "map still in use");
+
+ if (ISSET(map->_dm_flags, BUS_DMA_64BIT)) {
+ unsigned long bypass;
+ int i;
+
+ is = map->_dm_cookie;
+ bypass = is->is_hw->ihw_bypass;
+
+ /* Bypass translation by the IOMMU. */
+
+ BUS_DMA_FIND_PARENT(t, _dmamap_load);
+ err = (*t->_dmamap_load)(t, t0, map, buf, buflen, p, flags);
+ if (err != 0)
+ return (err);
+
+ for (i = 0; i < map->dm_nsegs; i++)
+ map->dm_segs[i].ds_addr |= bypass;
+
+ return (0);
+ }
+
+ ims = map->_dm_cookie;
+ is = ims->ims_sb->sb_iommu;
 
  if (buflen < 1 || buflen > map->_dm_size) {
  DPRINTF(IDB_BUSDMA,
@@ -876,27 +891,9 @@ iommu_dvmamap_load_raw(bus_dma_tag_t t,
  bus_size_t boundary, align;
  u_long dvmaddr, sgstart, sgend;
  struct iommu_state *is;
- struct iommu_map_state *ims = map->_dm_cookie;
-
-#ifdef DIAGNOSTIC
- if (ims == NULL)
- panic("iommu_dvmamap_load_raw: null map state");
-#endif
-#ifdef DEBUG
- if (ims->ims_sb == NULL)
- panic("iommu_dvmamap_load_raw: null sb");
- if (ims->ims_sb->sb_iommu == NULL)
- panic("iommu_dvmamap_load_raw: null iommu");
-#endif /* DEBUG */
- is = ims->ims_sb->sb_iommu;
+ struct iommu_map_state *ims;
 
- if (map->dm_nsegs) {
- /* Already in use?? */
-#ifdef DIAGNOSTIC
- panic("iommu_dvmamap_load_raw: map still in use");
-#endif
- bus_dmamap_unload(t0, map);
- }
+ KASSERTMSG(map->dm_nsegs == 0, "map stil in use");
 
  /*
  * A boundary presented to bus_dmamem_alloc() takes precedence
@@ -905,6 +902,31 @@ iommu_dvmamap_load_raw(bus_dma_tag_t t,
  if ((boundary = segs[0]._ds_boundary) == 0)
  boundary = map->_dm_boundary;
 
+ if (ISSET(map->_dm_flags, BUS_DMA_64BIT)) {
+ unsigned long bypass;
+
+ is = map->_dm_cookie;
+ bypass = is->is_hw->ihw_bypass;
+
+ /* Bypass translation by the IOMMU. */
+ for (i = 0; i < nsegs; i++) {
+ err = iommu_dvmamap_insert(t, map,
+    bypass | segs[i].ds_addr, segs[i].ds_len,
+    0, boundary);
+ if (err != 0) {
+ map->dm_nsegs = 0;
+ return (err);
+ }
+ }
+
+ map->dm_mapsize = size;
+
+ return (0);
+ }
+
+ ims = map->_dm_cookie;
+ is = ims->ims_sb->sb_iommu;
+
  align = MAX(segs[0]._ds_align, PAGE_SIZE);
 
  /*
@@ -1084,22 +1106,24 @@ iommu_dvmamap_append_range(bus_dma_tag_t
     bus_size_t length, int flags, bus_size_t boundary)
 {
  struct iommu_map_state *ims = map->_dm_cookie;
- bus_addr_t sgstart, sgend, bd_mask;
- bus_dma_segment_t *seg = NULL;
- int i = map->dm_nsegs;
+ bus_addr_t sgstart = iommu_iomap_translate(ims, pa);
 
-#ifdef DEBUG
- if (ims == NULL)
- panic("iommu_dvmamap_append_range: null map state");
-#endif
+ return (iommu_dvmamap_insert(t, map, sgstart, length, flags, boundary));
+}
 
- sgstart = iommu_iomap_translate(ims, pa);
- sgend = sgstart + length - 1;
+int
+iommu_dvmamap_insert(bus_dma_tag_t t, bus_dmamap_t map,
+    bus_addr_t sgstart, bus_size_t length, int flags, bus_size_t boundary)
+{
+ bus_addr_t sgend = sgstart + length - 1;
+ bus_addr_t bd_mask;
+ bus_dma_segment_t *seg = NULL;
+ int i = map->dm_nsegs;
 
 #ifdef DIAGNOSTIC
  if (sgstart == 0 || sgstart > sgend) {
- printf("append range invalid mapping for %lx "
-    "(0x%lx - 0x%lx)\n", pa, sgstart, sgend);
+ printf("append range invalid mapping for "
+    "0x%lx - 0x%lx\n", sgstart, sgend);
  map->dm_nsegs = 0;
  return (EINVAL);
  }
@@ -1298,20 +1322,17 @@ void
 iommu_dvmamap_unload(bus_dma_tag_t t, bus_dma_tag_t t0, bus_dmamap_t map)
 {
  struct iommu_state *is;
- struct iommu_map_state *ims = map->_dm_cookie;
+ struct iommu_map_state *ims;
  bus_addr_t dvmaddr = map->_dm_dvmastart;
  bus_size_t sgsize = map->_dm_dvmasize;
  int error;
 
-#ifdef DEBUG
- if (ims == NULL)
- panic("iommu_dvmamap_unload: null map state");
- if (ims->ims_sb == NULL)
- panic("iommu_dvmamap_unload: null sb");
- if (ims->ims_sb->sb_iommu == NULL)
- panic("iommu_dvmamap_unload: null iommu");
-#endif /* DEBUG */
+ if (ISSET(map->_dm_flags, BUS_DMA_64BIT)) {
+ bus_dmamap_unload(t->_parent, map);
+ return;
+ }
 
+ ims = map->_dm_cookie;
  is = ims->ims_sb->sb_iommu;
 
  /* Flush the iommu */
@@ -1488,7 +1509,7 @@ iommu_dvmamap_print_map(bus_dma_tag_t t,
  break;
  }
 
- if (map->_dm_cookie) {
+ if (!ISSET(map->_dm_flags, BUS_DMA_64BIT) && map->_dm_cookie != NULL) {
  struct iommu_map_state *ims = map->_dm_cookie;
  struct iommu_page_map *ipm = &ims->ims_map;
 
@@ -1546,19 +1567,19 @@ void
 iommu_dvmamap_sync(bus_dma_tag_t t, bus_dma_tag_t t0, bus_dmamap_t map,
     bus_addr_t offset, bus_size_t len, int ops)
 {
- struct iommu_map_state *ims = map->_dm_cookie;
+ struct iommu_map_state *ims;
 
-#ifdef DIAGNOSTIC
- if (ims == NULL)
- panic("iommu_dvmamap_sync: null map state");
- if (ims->ims_sb == NULL)
- panic("iommu_dvmamap_sync: null sb");
- if (ims->ims_sb->sb_iommu == NULL)
- panic("iommu_dvmamap_sync: null iommu");
-#endif
  if (len == 0)
  return;
 
+ if (map->_dm_flags & BUS_DMA_64BIT) {
+ if (ops & (BUS_DMASYNC_PREWRITE | BUS_DMASYNC_POSTREAD))
+ __membar("#MemIssue");
+ return;
+ }
+
+ ims = map->_dm_cookie;
+
  if (ops & BUS_DMASYNC_PREWRITE)
  __membar("#MemIssue");
 
@@ -1622,9 +1643,13 @@ iommu_dvmamem_alloc(bus_dma_tag_t t, bus
     "bound %llx segp %p flags %d\n", (unsigned long long)size,
     (unsigned long long)alignment, (unsigned long long)boundary,
     segs, flags));
+
+ if ((flags & BUS_DMA_64BIT) == 0)
+ flags |= BUS_DMA_DVMA;
+
  BUS_DMA_FIND_PARENT(t, _dmamem_alloc);
  return ((*t->_dmamem_alloc)(t, t0, size, alignment, boundary,
-    segs, nsegs, rsegs, flags | BUS_DMA_DVMA));
+    segs, nsegs, rsegs, flags));
 }
 
 void
@@ -1763,7 +1788,7 @@ iommu_iomap_load_map(struct iommu_state
 
  /* Flush cache if necessary. */
  slot = IOTSBSLOT(e->ipe_va, is->is_tsbsize);
- if (is->is_flags & IOMMU_FLUSH_CACHE &&
+ if (ISSET(is->is_hw->ihw_flags, IOMMU_HW_FLUSH_CACHE) &&
     (i == (ipm->ipm_pagecnt - 1) || (slot % 8) == 7))
  IOMMUREG_WRITE(is, iommu_cache_flush,
     is->is_ptsb + slot * 8);
@@ -1788,7 +1813,7 @@ iommu_iomap_unload_map(struct iommu_stat
 
  /* Flush cache if necessary. */
  slot = IOTSBSLOT(e->ipe_va, is->is_tsbsize);
- if (is->is_flags & IOMMU_FLUSH_CACHE &&
+ if (ISSET(is->is_hw->ihw_flags, IOMMU_HW_FLUSH_CACHE) &&
     (i == (ipm->ipm_pagecnt - 1) || (slot % 8) == 7))
  IOMMUREG_WRITE(is, iommu_cache_flush,
     is->is_ptsb + slot * 8);
Index: dev/iommureg.h
===================================================================
RCS file: /cvs/src/sys/arch/sparc64/dev/iommureg.h,v
retrieving revision 1.17
diff -u -p -r1.17 iommureg.h
--- dev/iommureg.h 17 Aug 2012 20:46:50 -0000 1.17
+++ dev/iommureg.h 11 Jun 2019 01:37:25 -0000
@@ -90,10 +90,11 @@ struct iommu_strbuf {
 #define IOMMUCR_DE 0x000000000000000002LL /* Diag enable */
 #define IOMMUCR_EN 0x000000000000000001LL /* Enable IOMMU */
 
-#define IOMMUCR_FIRE_SE 0x000000000000000400LL /* Snoop enable */
-#define IOMMUCR_FIRE_CM_EN 0x000000000000000300LL  /* Cache mode enable */
-#define IOMMUCR_FIRE_BE 0x000000000000000002LL /* Bypass enable */
-#define IOMMUCR_FIRE_TE 0x000000000000000001LL /* Translation enabled */
+#define IOMMUCR_FIRE_PD 0x000000000000001000UL /* Process disable */
+#define IOMMUCR_FIRE_SE 0x000000000000000400UL /* Snoop enable */
+#define IOMMUCR_FIRE_CM_EN 0x000000000000000300UL  /* Cache mode enable */
+#define IOMMUCR_FIRE_BE 0x000000000000000002UL /* Bypass enable */
+#define IOMMUCR_FIRE_TE 0x000000000000000001UL /* Translation enabled */
 
 /*
  * IOMMU stuff
Index: dev/iommuvar.h
===================================================================
RCS file: /cvs/src/sys/arch/sparc64/dev/iommuvar.h,v
retrieving revision 1.17
diff -u -p -r1.17 iommuvar.h
--- dev/iommuvar.h 4 May 2016 18:26:12 -0000 1.17
+++ dev/iommuvar.h 11 Jun 2019 01:37:25 -0000
@@ -100,6 +100,21 @@ struct iommu_map_state {
 };
 #define IOMMU_MAP_STREAM 1
 
+struct iommu_hw {
+ void (*ihw_enable)(struct iommu_state *);
+
+ unsigned long ihw_dvma_pa;
+
+ unsigned long ihw_bypass;
+ unsigned long ihw_bypass_nc; /* non-cached */
+ unsigned long ihw_bypass_ro; /* relaxed ordering */
+
+ unsigned int ihw_flags;
+#define IOMMU_HW_FLUSH_CACHE (1 << 0)
+};
+
+extern const struct iommu_hw iommu_hw_default;
+
 /*
  * per-IOMMU state
  */
@@ -112,8 +127,7 @@ struct iommu_state {
  int64_t is_cr; /* Control register value */
  struct mutex is_mtx;
  struct extent *is_dvmamap; /* DVMA map for this instance */
- int is_flags;
-#define IOMMU_FLUSH_CACHE 0x00000001
+ const struct iommu_hw *is_hw;
 
  struct strbuf_ctl *is_sb[2]; /* Streaming buffers if any */
 
@@ -126,7 +140,8 @@ struct iommu_state {
 };
 
 /* interfaces for PCI/SBus code */
-void iommu_init(char *, struct iommu_state *, int, u_int32_t);
+void iommu_init(char *, const struct iommu_hw *, struct iommu_state *,
+    int, u_int32_t);
 void iommu_reset(struct iommu_state *);
 paddr_t iommu_extract(struct iommu_state *, bus_addr_t);
 int64_t iommu_lookup_tte(struct iommu_state *, bus_addr_t);
@@ -146,6 +161,7 @@ int iommu_dvmamem_alloc(bus_dma_tag_t, b
     bus_size_t, bus_size_t, bus_dma_segment_t *, int, int *, int);
 void iommu_dvmamem_free(bus_dma_tag_t, bus_dma_tag_t, bus_dma_segment_t *,
     int);
+
 
 #define IOMMUREG_READ(is, reg) \
  bus_space_read_8((is)->is_bustag, \
Index: dev/pci_machdep.c
===================================================================
RCS file: /cvs/src/sys/arch/sparc64/dev/pci_machdep.c,v
retrieving revision 1.47
diff -u -p -r1.47 pci_machdep.c
--- dev/pci_machdep.c 11 Jun 2019 00:45:31 -0000 1.47
+++ dev/pci_machdep.c 11 Jun 2019 01:37:25 -0000
@@ -57,6 +57,7 @@ int sparc_pci_debug = 0x0;
 #include <machine/openfirm.h>
 #include <dev/pci/pcivar.h>
 #include <dev/pci/pcireg.h>
+#include <dev/pci/pcidevs.h>
 
 #include <dev/ofw/ofw_pci.h>
 
@@ -84,6 +85,46 @@ pci_attach_hook(parent, self, pba)
  struct pcibus_attach_args *pba;
 {
  /* Don't do anything */
+}
+
+int
+pci_bcm_dmamap_create(bus_dma_tag_t dt, bus_dma_tag_t t0, bus_size_t size,
+    int nsegments, bus_size_t maxsegsz, bus_size_t boundary, int flags,
+    bus_dmamap_t *dmamp)
+{
+ bus_dma_tag_t pdt = dt->_parent;
+
+ CLR(flags, BUS_DMA_64BIT);
+
+ return ((*pdt->_dmamap_create)(pdt, t0, size, nsegments, maxsegsz,
+    boundary, flags, dmamp));
+}
+
+int
+pci_probe_device_hook(pci_chipset_tag_t pc, struct pci_attach_args *pa)
+{
+ bus_dma_tag_t dt, pdt;
+
+ if (pa->pa_id ==
+    PCI_ID_CODE(PCI_VENDOR_RCC, PCI_PRODUCT_RCC_PCIE_PCIX)) {
+ /*
+ * These PCI bridges only support 40bit DVA, so intercept
+ * bus_dmamap_create so we can clear BUS_DMA_64BIT.
+ */
+
+ dt = malloc(sizeof(*dt), M_DEVBUF, M_NOWAIT | M_ZERO);
+ if (dt == NULL)
+ panic("%s: could not alloc dma tag", __func__);
+
+ pdt = pa->pa_dmat;
+
+ dt->_parent = pdt;
+ dt->_dmamap_create = pci_bcm_dmamap_create;
+
+ pa->pa_dmat = dt;
+ }
+
+ return (0);
 }
 
 int
Index: dev/psycho.c
===================================================================
RCS file: /cvs/src/sys/arch/sparc64/dev/psycho.c,v
retrieving revision 1.75
diff -u -p -r1.75 psycho.c
--- dev/psycho.c 25 May 2017 03:19:39 -0000 1.75
+++ dev/psycho.c 11 Jun 2019 01:37:25 -0000
@@ -902,7 +902,7 @@ psycho_iommu_init(struct psycho_softc *s
  panic("couldn't malloc iommu name");
  snprintf(name, 32, "%s dvma", sc->sc_dev.dv_xname);
 
- iommu_init(name, is, tsbsize, iobase);
+ iommu_init(name, &iommu_hw_default, is, tsbsize, iobase);
 }
 
 /*
Index: dev/pyro.c
===================================================================
RCS file: /cvs/src/sys/arch/sparc64/dev/pyro.c,v
retrieving revision 1.32
diff -u -p -r1.32 pyro.c
--- dev/pyro.c 11 Jun 2019 00:45:31 -0000 1.32
+++ dev/pyro.c 11 Jun 2019 01:37:25 -0000
@@ -131,6 +131,30 @@ int pyro_msi_eq_intr(void *);
 int pyro_dmamap_create(bus_dma_tag_t, bus_dma_tag_t, bus_size_t, int,
     bus_size_t, bus_size_t, int, bus_dmamap_t *);
 
+void pyro_iommu_enable(struct iommu_state *);
+
+const struct iommu_hw iommu_hw_fire = {
+ .ihw_enable = pyro_iommu_enable,
+
+ .ihw_dvma_pa = 0x000007ffffffffffUL,
+
+ .ihw_bypass = 0xfffc000000000000UL,
+ .ihw_bypass_nc = 0x0000080000000000UL,
+ .ihw_bypass_ro = 0,
+};
+
+const struct iommu_hw iommu_hw_oberon = {
+ .ihw_enable = pyro_iommu_enable,
+
+ .ihw_dvma_pa = 0x00007fffffffffffUL,
+
+ .ihw_bypass = 0x7ffc000000000000UL,
+ .ihw_bypass_nc = 0x0000800000000000UL,
+ .ihw_bypass_ro = 0x8000000000000000UL,
+
+ .ihw_flags = IOMMU_HW_FLUSH_CACHE,
+};
+
 #ifdef DDB
 void pyro_xir(void *, int);
 #endif
@@ -266,6 +290,7 @@ pyro_init_iommu(struct pyro_softc *sc, s
  int tsbsize = 7;
  u_int32_t iobase = -1;
  char *name;
+ const struct iommu_hw *ihw = &iommu_hw_fire;
 
  is->is_bustag = sc->sc_bust;
 
@@ -282,11 +307,23 @@ pyro_init_iommu(struct pyro_softc *sc, s
  panic("couldn't malloc iommu name");
  snprintf(name, 32, "%s dvma", sc->sc_dv.dv_xname);
 
- /* On Oberon, we need to flush the cache. */
  if (sc->sc_oberon)
- is->is_flags |= IOMMU_FLUSH_CACHE;
+ ihw = &iommu_hw_oberon;
+
+ iommu_init(name, ihw, is, tsbsize, iobase);
+}
+
+void
+pyro_iommu_enable(struct iommu_state *is)
+{
+ unsigned long cr;
+
+ cr = IOMMUREG_READ(is, iommu_cr);
+ cr |= IOMMUCR_FIRE_BE | IOMMUCR_FIRE_SE | IOMMUCR_FIRE_CM_EN |
+    IOMMUCR_FIRE_TE;
 
- iommu_init(name, is, tsbsize, iobase);
+ IOMMUREG_WRITE(is, iommu_tsb, is->is_ptsb | is->is_tsbsize);
+ IOMMUREG_WRITE(is, iommu_cr, cr);
 }
 
 void
Index: dev/sbus.c
===================================================================
RCS file: /cvs/src/sys/arch/sparc64/dev/sbus.c,v
retrieving revision 1.44
diff -u -p -r1.44 sbus.c
--- dev/sbus.c 19 Sep 2015 21:07:04 -0000 1.44
+++ dev/sbus.c 11 Jun 2019 01:37:25 -0000
@@ -349,7 +349,7 @@ sbus_mb_attach(struct device *parent, st
  snprintf(name, 32, "%s dvma", sc->sc_dev.dv_xname);
 
  printf("%s: ", sc->sc_dev.dv_xname);
- iommu_init(name, &sc->sc_is, 0, -1);
+ iommu_init(name, &iommu_hw_default, &sc->sc_is, 0, -1);
 
  /* Initialize Starfire PC interrupt translation. */
  if (OF_getprop(findroot(), "name", buf, sizeof(buf)) > 0 &&
Index: dev/schizo.c
===================================================================
RCS file: /cvs/src/sys/arch/sparc64/dev/schizo.c,v
retrieving revision 1.68
diff -u -p -r1.68 schizo.c
--- dev/schizo.c 25 May 2017 03:19:39 -0000 1.68
+++ dev/schizo.c 11 Jun 2019 01:37:25 -0000
@@ -451,7 +451,7 @@ schizo_init_iommu(struct schizo_softc *s
     "using iobase=0x%x, tsbsize=%d\n", iobase, tsbsize));
  }
 
- iommu_init(name, is, tsbsize, iobase);
+ iommu_init(name, &iommu_hw_default, is, tsbsize, iobase);
 }
 
 int
Index: include/pci_machdep.h
===================================================================
RCS file: /cvs/src/sys/arch/sparc64/include/pci_machdep.h,v
retrieving revision 1.34
diff -u -p -r1.34 pci_machdep.h
--- include/pci_machdep.h 11 Jun 2019 00:45:31 -0000 1.34
+++ include/pci_machdep.h 11 Jun 2019 01:37:25 -0000
@@ -84,10 +84,13 @@ struct sparc_pci_chipset {
  pcireg_t (*conf_read)(pci_chipset_tag_t, pcitag_t, int);
  void (*conf_write)(pci_chipset_tag_t, pcitag_t, int, pcireg_t);
  int (*intr_map)(struct pci_attach_args *, pci_intr_handle_t *);
+ int (*probe_device_hook)(void *, struct pci_attach_args *);
 };
 
 void pci_attach_hook(struct device *, struct device *,
      struct pcibus_attach_args *);
+int pci_probe_device_hook(pci_chipset_tag_t,
+    struct pci_attach_args *);
 int pci_bus_maxdevs(pci_chipset_tag_t, int);
 pcitag_t pci_make_tag(pci_chipset_tag_t, int, int, int);
 void pci_decompose_tag(pci_chipset_tag_t, pcitag_t, int *, int *,
@@ -115,8 +118,6 @@ int sparc64_pci_enumerate_bus(struct pc
     struct pci_attach_args *);
 
 #define PCI_MACHDEP_ENUMERATE_BUS sparc64_pci_enumerate_bus
-
-#define pci_probe_device_hook(c, a) (0)
 
 #define pci_min_powerstate(c, t) (PCI_PMCSR_STATE_D3)
 #define pci_set_powerstate_md(c, t, s, p)

Reply | Threaded
Open this post in threaded view
|

Re: another go at bypass support for sparc64 iommu and BUS_DMA_64BIT

Theo de Raadt-2
David Gwynne <[hidden email]> wrote:

> this is a reposting of the diff i sent out a while back. it lets sparc64
> enable iommu bypass, and then uses that bypass support for BUS_DMA_64BIT
> dmamaps.

BTW, this is in snapshots and I'd urge everyone running sparc64 to give it
a try.

Reply | Threaded
Open this post in threaded view
|

Re: another go at bypass support for sparc64 iommu and BUS_DMA_64BIT

Andrew Grillet
I have the snapshot running on a guest domain on a T1000 - see attached.

I will test on the primary in a few days, and may be able to test on a
T5220 primary towards the end of next week.

Let me know if you want any specific test procedure run.

On Tue, 18 Jun 2019 at 02:37, Theo de Raadt <[hidden email]> wrote:

> David Gwynne <[hidden email]> wrote:
>
> > this is a reposting of the diff i sent out a while back. it lets sparc64
> > enable iommu bypass, and then uses that bypass support for BUS_DMA_64BIT
> > dmamaps.
>
> BTW, this is in snapshots and I'd urge everyone running sparc64 to give it
> a try.
>
>

snapshots-sparc64-dmesg.txt (1K) Download Attachment
Reply | Threaded
Open this post in threaded view
|

Re: another go at bypass support for sparc64 iommu and BUS_DMA_64BIT

Kaashif Hymabaccus-2
In reply to this post by David Gwynne-5
This is great, thanks for the work. I have the new snapshot running on
a T5220. I also have an Ultra 45 with some GPUs I use mainly for
playing games and sending people bug reports about how their game
doesn't work on sparc64, I'll try it there also. If there are any
specific tests you want done, I am happy to do them. Here is the dmesg
of the T5220 if there is anything that interests you:

console is /virtual-devices@100/console@1
Copyright (c) 1982, 1986, 1989, 1991, 1993
        The Regents of the University of California.  All rights reserved.
Copyright (c) 1995-2019 OpenBSD. All rights reserved.  https://www.OpenBSD.org

OpenBSD 6.5-current (GENERIC.MP) #207: Tue Jun 18 13:15:53 MDT 2019
    [hidden email]:/usr/src/sys/arch/sparc64/compile/GENERIC.MP
real mem = 17045651456 (16256MB)
avail mem = 16721928192 (15947MB)
mpath0 at root
scsibus0 at mpath0: 256 targets
mainbus0 at root: SPARC Enterprise T5220
cpu0 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
cpu1 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
cpu2 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
cpu3 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
cpu4 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
cpu5 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
cpu6 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
cpu7 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
cpu8 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
cpu9 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
cpu10 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
cpu11 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
cpu12 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
cpu13 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
cpu14 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
cpu15 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
cpu16 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
cpu17 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
cpu18 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
cpu19 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
cpu20 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
cpu21 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
cpu22 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
cpu23 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
cpu24 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
cpu25 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
cpu26 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
cpu27 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
cpu28 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
cpu29 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
cpu30 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
cpu31 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
cpu32 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
cpu33 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
cpu34 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
cpu35 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
cpu36 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
cpu37 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
cpu38 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
cpu39 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
cpu40 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
cpu41 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
cpu42 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
cpu43 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
cpu44 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
cpu45 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
cpu46 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
cpu47 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
cpu48 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
cpu49 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
cpu50 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
cpu51 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
cpu52 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
cpu53 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
cpu54 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
cpu55 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
cpu56 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
cpu57 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
cpu58 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
cpu59 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
cpu60 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
cpu61 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
cpu62 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
cpu63 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
vbus0 at mainbus0
"flashprom" at vbus0 not configured
"tpm" at vbus0 not configured
cbus0 at vbus0
vldc0 at cbus0
vldcp0 at vldc0 chan 0x14: ivec 0x28, 0x29 channel "spds"
"sunvts" at vldc0 chan 0x6 not configured
"sunmc" at vldc0 chan 0x7 not configured
"explorer" at vldc0 chan 0x8 not configured
"led" at vldc0 chan 0x9 not configured
"flashupdate" at vldc0 chan 0xa not configured
"ipmi" at vldc0 chan 0xc not configured
"system-management" at vldc0 chan 0xd not configured
vldc1 at cbus0
"spfma" at vldc1 chan 0x5 not configured
vldc2 at cbus0
vldcp1 at vldc2 chan 0x0: ivec 0x0, 0x1 channel "hvctl"
"ldom-primary" at vldc2 chan 0x1 not configured
"fmactl" at vldc2 chan 0x3 not configured
vldc3 at cbus0
"ldmfma" at vldc3 chan 0x4 not configured
"n2cp" at vbus0 not configured
"ncp" at vbus0 not configured
vrng0 at vbus0
vcons0 at vbus0: ivec 0x111, console
vrtc0 at vbus0
"niu" at mainbus0 not configured
vpci0 at mainbus0: bus 2 to 18, dvma map 80000000-ffffffff
pci0 at vpci0
ppb0 at pci0 dev 0 function 0 "PLX PEX 8533" rev 0xaa
pci1 at ppb0 bus 3
ppb1 at pci1 dev 1 function 0 "PLX PEX 8533" rev 0xaa
pci2 at ppb1 bus 4
ppb2 at pci2 dev 0 function 0 "PLX PEX 8517" rev 0xad
pci3 at ppb2 bus 5
ppb3 at pci3 dev 1 function 0 "PLX PEX 8517" rev 0xad
pci4 at ppb3 bus 6
ppb4 at pci4 dev 0 function 0 "PLX PEX 8111" rev 0x21
pci5 at ppb4 bus 7
ohci0 at pci5 dev 0 function 0 "NEC USB" rev 0x43: ivec 0x16, version 1.0
ohci1 at pci5 dev 0 function 1 "NEC USB" rev 0x43: ivec 0x17, version 1.0
ehci0 at pci5 dev 0 function 2 "NEC USB" rev 0x04: ivec 0x14
usb0 at ehci0: USB revision 2.0
uhub0 at usb0 configuration 1 interface 0 "NEC EHCI root hub" rev 2.00/1.00 addr 1
usb1 at ohci0: USB revision 1.0
uhub1 at usb1 configuration 1 interface 0 "NEC OHCI root hub" rev 1.00/1.00 addr 1
usb2 at ohci1: USB revision 1.0
uhub2 at usb2 configuration 1 interface 0 "NEC OHCI root hub" rev 1.00/1.00 addr 1
ppb5 at pci3 dev 2 function 0 "PLX PEX 8517" rev 0xad
pci6 at ppb5 bus 8
em0 at pci6 dev 0 function 0 "Intel 82571EB" rev 0x06: ivec 0x17, address 00:14:4f:97:8a:ba
em1 at pci6 dev 0 function 1 "Intel 82571EB" rev 0x06: ivec 0x14, address 00:14:4f:97:8a:bb
ppb6 at pci3 dev 3 function 0 "PLX PEX 8517" rev 0xad
pci7 at ppb6 bus 9
em2 at pci7 dev 0 function 0 "Intel 82571EB" rev 0x06: ivec 0x14, address 00:14:4f:97:8a:bc
em3 at pci7 dev 0 function 1 "Intel 82571EB" rev 0x06: ivec 0x15, address 00:14:4f:97:8a:bd
ppb7 at pci1 dev 2 function 0 "PLX PEX 8533" rev 0xaa
pci8 at ppb7 bus 10
mpi0 at pci8 dev 0 function 0 "Symbios Logic SAS1068E" rev 0x02: msi
mpi0: UNUSED, firmware 1.22.0.0
scsibus1 at mpi0: 112 targets
sd0 at scsibus1 targ 1 lun 0: <ATA, ST9500325AS, BSM1> SCSI3 0/direct fixed naa.5000c500189da8a2
sd0: 476940MB, 512 bytes/sector, 976773168 sectors
ppb8 at pci1 dev 8 function 0 "PLX PEX 8533" rev 0xaa
pci9 at ppb8 bus 11
ppb9 at pci9 dev 0 function 0 "PLX PEX 8533" rev 0xaa
pci10 at ppb9 bus 12
ppb10 at pci10 dev 1 function 0 "PLX PEX 8533" rev 0xaa: msi
pci11 at ppb10 bus 13
ppb11 at pci10 dev 2 function 0 "PLX PEX 8533" rev 0xaa: msi
pci12 at ppb11 bus 14
ppb12 at pci10 dev 8 function 0 "PLX PEX 8533" rev 0xaa: msi
pci13 at ppb12 bus 15
ppb13 at pci10 dev 9 function 0 "PLX PEX 8533" rev 0xaa: msi
pci14 at ppb13 bus 16
ppb14 at pci10 dev 10 function 0 "PLX PEX 8533" rev 0xaa: msi
pci15 at ppb14 bus 17
ppb15 at pci1 dev 9 function 0 "PLX PEX 8533" rev 0xaa: msi
pci16 at ppb15 bus 18
"pci-performance-counters" at mainbus0 not configured
ebus0 at mainbus0
com0 at ebus0 addr ca0000-ca0007 ivec 0x13: ns16550a, 16 byte fifo
umass0 at uhub0 port 2 configuration 1 interface 0 "Texas Instruments Incorporated 6250 FW: 03.03.02.22.562AD423" rev 2.00/2.00 addr 2
umass0: using SCSI over Bulk-Only
scsibus2 at umass0: 2 targets, initiator 0
cd0 at scsibus2 targ 1 lun 0: <TSSTcorp, CD/DVDW TS-T632A, SR03> SCSI0 5/cdrom removable serial.0451625fAAC50E0D301C
uhub3 at uhub0 port 4 configuration 1 interface 0 "Cypress Semiconductor USB2 Hub" rev 2.00/0.0b addr 3
vscsi0 at root
scsibus3 at vscsi0: 256 targets
softraid0 at root
scsibus4 at softraid0: 256 targets
bootpath: /pci@0,0/pci@0,0/pci@2,0/scsi@0,0/disk@1,0
root on sd0a (b82bc96addc58f31.a) swap on sd0b dump on sd0b

Reply | Threaded
Open this post in threaded view
|

Re: another go at bypass support for sparc64 iommu and BUS_DMA_64BIT

David Gwynne-5
I'm mostly concerned that nothing that currently works breaks. This changes a fairly fundamental chunk of code in the guts of the platform, so having machines still work afterward would be nice.

Glad your T5220 works, hopefully the u45 will be ok too.

dlg

> On 20 Jun 2019, at 08:52, Kaashif Hymabaccus <[hidden email]> wrote:
>
> This is great, thanks for the work. I have the new snapshot running on
> a T5220. I also have an Ultra 45 with some GPUs I use mainly for
> playing games and sending people bug reports about how their game
> doesn't work on sparc64, I'll try it there also. If there are any
> specific tests you want done, I am happy to do them. Here is the dmesg
> of the T5220 if there is anything that interests you:
>
> console is /virtual-devices@100/console@1
> Copyright (c) 1982, 1986, 1989, 1991, 1993
> The Regents of the University of California.  All rights reserved.
> Copyright (c) 1995-2019 OpenBSD. All rights reserved.  https://www.OpenBSD.org
>
> OpenBSD 6.5-current (GENERIC.MP) #207: Tue Jun 18 13:15:53 MDT 2019
>    [hidden email]:/usr/src/sys/arch/sparc64/compile/GENERIC.MP
> real mem = 17045651456 (16256MB)
> avail mem = 16721928192 (15947MB)
> mpath0 at root
> scsibus0 at mpath0: 256 targets
> mainbus0 at root: SPARC Enterprise T5220
> cpu0 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
> cpu1 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
> cpu2 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
> cpu3 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
> cpu4 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
> cpu5 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
> cpu6 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
> cpu7 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
> cpu8 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
> cpu9 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
> cpu10 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
> cpu11 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
> cpu12 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
> cpu13 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
> cpu14 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
> cpu15 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
> cpu16 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
> cpu17 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
> cpu18 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
> cpu19 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
> cpu20 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
> cpu21 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
> cpu22 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
> cpu23 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
> cpu24 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
> cpu25 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
> cpu26 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
> cpu27 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
> cpu28 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
> cpu29 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
> cpu30 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
> cpu31 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
> cpu32 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
> cpu33 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
> cpu34 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
> cpu35 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
> cpu36 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
> cpu37 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
> cpu38 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
> cpu39 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
> cpu40 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
> cpu41 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
> cpu42 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
> cpu43 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
> cpu44 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
> cpu45 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
> cpu46 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
> cpu47 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
> cpu48 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
> cpu49 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
> cpu50 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
> cpu51 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
> cpu52 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
> cpu53 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
> cpu54 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
> cpu55 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
> cpu56 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
> cpu57 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
> cpu58 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
> cpu59 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
> cpu60 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
> cpu61 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
> cpu62 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
> cpu63 at mainbus0: SUNW,UltraSPARC-T2 (rev 0.0) @ 1165.379 MHz
> vbus0 at mainbus0
> "flashprom" at vbus0 not configured
> "tpm" at vbus0 not configured
> cbus0 at vbus0
> vldc0 at cbus0
> vldcp0 at vldc0 chan 0x14: ivec 0x28, 0x29 channel "spds"
> "sunvts" at vldc0 chan 0x6 not configured
> "sunmc" at vldc0 chan 0x7 not configured
> "explorer" at vldc0 chan 0x8 not configured
> "led" at vldc0 chan 0x9 not configured
> "flashupdate" at vldc0 chan 0xa not configured
> "ipmi" at vldc0 chan 0xc not configured
> "system-management" at vldc0 chan 0xd not configured
> vldc1 at cbus0
> "spfma" at vldc1 chan 0x5 not configured
> vldc2 at cbus0
> vldcp1 at vldc2 chan 0x0: ivec 0x0, 0x1 channel "hvctl"
> "ldom-primary" at vldc2 chan 0x1 not configured
> "fmactl" at vldc2 chan 0x3 not configured
> vldc3 at cbus0
> "ldmfma" at vldc3 chan 0x4 not configured
> "n2cp" at vbus0 not configured
> "ncp" at vbus0 not configured
> vrng0 at vbus0
> vcons0 at vbus0: ivec 0x111, console
> vrtc0 at vbus0
> "niu" at mainbus0 not configured
> vpci0 at mainbus0: bus 2 to 18, dvma map 80000000-ffffffff
> pci0 at vpci0
> ppb0 at pci0 dev 0 function 0 "PLX PEX 8533" rev 0xaa
> pci1 at ppb0 bus 3
> ppb1 at pci1 dev 1 function 0 "PLX PEX 8533" rev 0xaa
> pci2 at ppb1 bus 4
> ppb2 at pci2 dev 0 function 0 "PLX PEX 8517" rev 0xad
> pci3 at ppb2 bus 5
> ppb3 at pci3 dev 1 function 0 "PLX PEX 8517" rev 0xad
> pci4 at ppb3 bus 6
> ppb4 at pci4 dev 0 function 0 "PLX PEX 8111" rev 0x21
> pci5 at ppb4 bus 7
> ohci0 at pci5 dev 0 function 0 "NEC USB" rev 0x43: ivec 0x16, version 1.0
> ohci1 at pci5 dev 0 function 1 "NEC USB" rev 0x43: ivec 0x17, version 1.0
> ehci0 at pci5 dev 0 function 2 "NEC USB" rev 0x04: ivec 0x14
> usb0 at ehci0: USB revision 2.0
> uhub0 at usb0 configuration 1 interface 0 "NEC EHCI root hub" rev 2.00/1.00 addr 1
> usb1 at ohci0: USB revision 1.0
> uhub1 at usb1 configuration 1 interface 0 "NEC OHCI root hub" rev 1.00/1.00 addr 1
> usb2 at ohci1: USB revision 1.0
> uhub2 at usb2 configuration 1 interface 0 "NEC OHCI root hub" rev 1.00/1.00 addr 1
> ppb5 at pci3 dev 2 function 0 "PLX PEX 8517" rev 0xad
> pci6 at ppb5 bus 8
> em0 at pci6 dev 0 function 0 "Intel 82571EB" rev 0x06: ivec 0x17, address 00:14:4f:97:8a:ba
> em1 at pci6 dev 0 function 1 "Intel 82571EB" rev 0x06: ivec 0x14, address 00:14:4f:97:8a:bb
> ppb6 at pci3 dev 3 function 0 "PLX PEX 8517" rev 0xad
> pci7 at ppb6 bus 9
> em2 at pci7 dev 0 function 0 "Intel 82571EB" rev 0x06: ivec 0x14, address 00:14:4f:97:8a:bc
> em3 at pci7 dev 0 function 1 "Intel 82571EB" rev 0x06: ivec 0x15, address 00:14:4f:97:8a:bd
> ppb7 at pci1 dev 2 function 0 "PLX PEX 8533" rev 0xaa
> pci8 at ppb7 bus 10
> mpi0 at pci8 dev 0 function 0 "Symbios Logic SAS1068E" rev 0x02: msi
> mpi0: UNUSED, firmware 1.22.0.0
> scsibus1 at mpi0: 112 targets
> sd0 at scsibus1 targ 1 lun 0: <ATA, ST9500325AS, BSM1> SCSI3 0/direct fixed naa.5000c500189da8a2
> sd0: 476940MB, 512 bytes/sector, 976773168 sectors
> ppb8 at pci1 dev 8 function 0 "PLX PEX 8533" rev 0xaa
> pci9 at ppb8 bus 11
> ppb9 at pci9 dev 0 function 0 "PLX PEX 8533" rev 0xaa
> pci10 at ppb9 bus 12
> ppb10 at pci10 dev 1 function 0 "PLX PEX 8533" rev 0xaa: msi
> pci11 at ppb10 bus 13
> ppb11 at pci10 dev 2 function 0 "PLX PEX 8533" rev 0xaa: msi
> pci12 at ppb11 bus 14
> ppb12 at pci10 dev 8 function 0 "PLX PEX 8533" rev 0xaa: msi
> pci13 at ppb12 bus 15
> ppb13 at pci10 dev 9 function 0 "PLX PEX 8533" rev 0xaa: msi
> pci14 at ppb13 bus 16
> ppb14 at pci10 dev 10 function 0 "PLX PEX 8533" rev 0xaa: msi
> pci15 at ppb14 bus 17
> ppb15 at pci1 dev 9 function 0 "PLX PEX 8533" rev 0xaa: msi
> pci16 at ppb15 bus 18
> "pci-performance-counters" at mainbus0 not configured
> ebus0 at mainbus0
> com0 at ebus0 addr ca0000-ca0007 ivec 0x13: ns16550a, 16 byte fifo
> umass0 at uhub0 port 2 configuration 1 interface 0 "Texas Instruments Incorporated 6250 FW: 03.03.02.22.562AD423" rev 2.00/2.00 addr 2
> umass0: using SCSI over Bulk-Only
> scsibus2 at umass0: 2 targets, initiator 0
> cd0 at scsibus2 targ 1 lun 0: <TSSTcorp, CD/DVDW TS-T632A, SR03> SCSI0 5/cdrom removable serial.0451625fAAC50E0D301C
> uhub3 at uhub0 port 4 configuration 1 interface 0 "Cypress Semiconductor USB2 Hub" rev 2.00/0.0b addr 3
> vscsi0 at root
> scsibus3 at vscsi0: 256 targets
> softraid0 at root
> scsibus4 at softraid0: 256 targets
> bootpath: /pci@0,0/pci@0,0/pci@2,0/scsi@0,0/disk@1,0
> root on sd0a (b82bc96addc58f31.a) swap on sd0b dump on sd0b
>

Reply | Threaded
Open this post in threaded view
|

Re: another go at bypass support for sparc64 iommu and BUS_DMA_64BIT

Klemens Nanni-2
In reply to this post by David Gwynne-5
Upgraded my T5240 using the latest bsd.rd, all is fine - no noticable
regression in production.

Reply | Threaded
Open this post in threaded view
|

Re: another go at bypass support for sparc64 iommu and BUS_DMA_64BIT

Kurt Mosiejczuk-9
In reply to this post by Theo de Raadt-2
On Mon, Jun 17, 2019 at 07:26:13PM -0600, Theo de Raadt wrote:
> David Gwynne <[hidden email]> wrote:

> > this is a reposting of the diff i sent out a while back. it lets sparc64
> > enable iommu bypass, and then uses that bypass support for BUS_DMA_64BIT
> > dmamaps.

> BTW, this is in snapshots and I'd urge everyone running sparc64 to give it
> a try.

I've tried this now on an Ultra 2 and a v210. Disks seem faster than a couple
of weeks ago (before this was in snaps).

--Kurt

Reply | Threaded
Open this post in threaded view
|

Re: another go at bypass support for sparc64 iommu and BUS_DMA_64BIT

Mark Kettenis
In reply to this post by David Gwynne-5
> Date: Tue, 11 Jun 2019 11:51:51 +1000
> From: David Gwynne <[hidden email]>
>
> this is a reposting of the diff i sent out a while back. it lets sparc64
> enable iommu bypass, and then uses that bypass support for BUS_DMA_64BIT
> dmamaps.
>
> the main benefit is around performance. without this diff on an M4000,
> tcpbench can do about 70Mbps before the system is CPU bound. all that
> cpu time is in the iommu tte management. with this diff and an
> appropriate network driver, tcpbench does 940 Mbps and the box is mostly
> idle. that is a 1300 percent improvement in network transfer speed. if
> you factor the cpu time benefits it would be another order of magnitude
> improvement.
>
> kernel build times are slightly improved with this diff on mpii. system
> time is about 5 percent less, but there's also a lot less I/O when
> you're mostly running a compiler.
>
> i hit an edge case with my previous diff that i believe is fixed with
> this one. this one properly checks boundaries, segment sizes, and number
> of segments for things like mbufs. i got lucky previously with the
> drivers i was using.

Looks mostly ok, one request below.  With that fixed, ok kettenis@

> Index: dev/iommu.c
> ===================================================================
> RCS file: /cvs/src/sys/arch/sparc64/dev/iommu.c,v
> retrieving revision 1.75
> diff -u -p -r1.75 iommu.c
> --- dev/iommu.c 25 May 2017 03:19:39 -0000 1.75
> +++ dev/iommu.c 11 Jun 2019 01:37:25 -0000
> @@ -87,6 +87,8 @@ void iommu_dvmamap_print_map(bus_dma_tag
>      bus_dmamap_t);
>  int iommu_dvmamap_append_range(bus_dma_tag_t, bus_dmamap_t, paddr_t,
>      bus_size_t, int, bus_size_t);
> +int iommu_dvmamap_insert(bus_dma_tag_t, bus_dmamap_t, bus_addr_t,
> +    bus_size_t, int, bus_size_t);
>  int64_t iommu_tsb_entry(struct iommu_state *, bus_addr_t);
>  void strbuf_reset(struct strbuf_ctl *);
>  int iommu_iomap_insert_page(struct iommu_map_state *, paddr_t);
> @@ -100,6 +102,25 @@ void iommu_iomap_clear_pages(struct iomm
>  void _iommu_dvmamap_sync(bus_dma_tag_t, bus_dma_tag_t, bus_dmamap_t,
>      bus_addr_t, bus_size_t, int);
>  
> +void iommu_hw_enable(struct iommu_state *);
> +
> +const struct iommu_hw iommu_hw_default = {
> + .ihw_enable = iommu_hw_enable,
> +
> + .ihw_dvma_pa = IOTTE_PAMASK,
> +
> + .ihw_bypass = 0x3fffUL << 50,
> + .ihw_bypass_nc = 0,
> + .ihw_bypass_ro = 0,
> +};
> +
> +void
> +iommu_hw_enable(struct iommu_state *is)
> +{
> + IOMMUREG_WRITE(is, iommu_tsb, is->is_ptsb);
> + IOMMUREG_WRITE(is, iommu_cr, IOMMUCR_EN | (is->is_tsbsize << 16));
> +}
> +
>  /*
>   * Initiate an STC entry flush.
>   */
> @@ -125,7 +146,8 @@ iommu_strbuf_flush(struct strbuf_ctl *sb
>   * - create a private DVMA map.
>   */
>  void
> -iommu_init(char *name, struct iommu_state *is, int tsbsize, u_int32_t iovabase)
> +iommu_init(char *name, const struct iommu_hw *ihw, struct iommu_state *is,
> +    int tsbsize, u_int32_t iovabase)
>  {
>   psize_t size;
>   vaddr_t va;
> @@ -149,13 +171,9 @@ iommu_init(char *name, struct iommu_stat
>   * be hard-wired, so we read the start and size from the PROM and
>   * just use those values.
>   */
> - if (strncmp(name, "pyro", 4) == 0) {
> - is->is_cr = IOMMUREG_READ(is, iommu_cr);
> - is->is_cr &= ~IOMMUCR_FIRE_BE;
> - is->is_cr |= (IOMMUCR_FIRE_SE | IOMMUCR_FIRE_CM_EN |
> -    IOMMUCR_FIRE_TE);
> - } else
> - is->is_cr = IOMMUCR_EN;
> +
> + is->is_hw = ihw;
> +
>   is->is_tsbsize = tsbsize;
>   if (iovabase == (u_int32_t)-1) {
>   is->is_dvmabase = IOTSB_VSTART(is->is_tsbsize);
> @@ -237,15 +255,6 @@ iommu_init(char *name, struct iommu_stat
>   mtx_init(&is->is_mtx, IPL_HIGH);
>  
>   /*
> - * Set the TSB size.  The relevant bits were moved to the TSB
> - * base register in the PCIe host bridges.
> - */
> - if (strncmp(name, "pyro", 4) == 0)
> - is->is_ptsb |= is->is_tsbsize;
> - else
> - is->is_cr |= (is->is_tsbsize << 16);
> -
> - /*
>   * Now actually start up the IOMMU.
>   */
>   iommu_reset(is);
> @@ -262,10 +271,7 @@ iommu_reset(struct iommu_state *is)
>  {
>   int i;
>  
> - IOMMUREG_WRITE(is, iommu_tsb, is->is_ptsb);
> -
> - /* Enable IOMMU */
> - IOMMUREG_WRITE(is, iommu_cr, is->is_cr);
> + (*is->is_hw->ihw_enable)(is);
>  
>   for (i = 0; i < 2; ++i) {
>   struct strbuf_ctl *sb = is->is_sb[i];
> @@ -280,7 +286,7 @@ iommu_reset(struct iommu_state *is)
>   printf(", STC%d enabled", i);
>   }
>  
> - if (is->is_flags & IOMMU_FLUSH_CACHE)
> + if (ISSET(is->is_hw->ihw_flags, IOMMU_HW_FLUSH_CACHE))
>   IOMMUREG_WRITE(is, iommu_cache_invalidate, -1ULL);
>  }
>  
> @@ -433,7 +439,7 @@ iommu_extract(struct iommu_state *is, bu
>   if (dva >= is->is_dvmabase && dva <= is->is_dvmaend)
>   tte = is->is_tsb[IOTSBSLOT(dva, is->is_tsbsize)];
>  
> - return (tte & IOTTE_PAMASK);
> + return (tte & is->is_hw->ihw_dvma_pa);
>  }
>  
>  /*
> @@ -601,6 +607,7 @@ iommu_dvmamap_create(bus_dma_tag_t t, bu
>  {
>   int ret;
>   bus_dmamap_t map;
> + struct iommu_state *is = sb->sb_iommu;
>   struct iommu_map_state *ims;
>  
>   BUS_DMA_FIND_PARENT(t, _dmamap_create);
> @@ -610,6 +617,12 @@ iommu_dvmamap_create(bus_dma_tag_t t, bu
>   if (ret)
>   return (ret);
>  
> + if (flags & BUS_DMA_64BIT) {
> + map->_dm_cookie = is;
> + *dmamap = map;
> + return (0);
> + }
> +
>   ims = iommu_iomap_create(atop(round_page(size)));
>  
>   if (ims == NULL) {
> @@ -641,8 +654,10 @@ iommu_dvmamap_destroy(bus_dma_tag_t t, b
>   if (map->dm_nsegs)
>   bus_dmamap_unload(t0, map);
>  
> -        if (map->_dm_cookie)
> -                iommu_iomap_destroy(map->_dm_cookie);
> + if (!ISSET(map->_dm_flags, BUS_DMA_64BIT)) {
> +        if (map->_dm_cookie)
> + iommu_iomap_destroy(map->_dm_cookie);
> + }
>   map->_dm_cookie = NULL;
>  
>   BUS_DMA_FIND_PARENT(t, _dmamap_destroy);
> @@ -667,36 +682,36 @@ iommu_dvmamap_load(bus_dma_tag_t t, bus_
>   u_long dvmaddr, sgstart, sgend;
>   bus_size_t align, boundary;
>   struct iommu_state *is;
> - struct iommu_map_state *ims = map->_dm_cookie;
> + struct iommu_map_state *ims;
>   pmap_t pmap;
>  
> -#ifdef DIAGNOSTIC
> - if (ims == NULL)
> - panic("iommu_dvmamap_load: null map state");
> -#endif
> -#ifdef DEBUG
> - if (ims->ims_sb == NULL)
> - panic("iommu_dvmamap_load: null sb");
> - if (ims->ims_sb->sb_iommu == NULL)
> - panic("iommu_dvmamap_load: null iommu");
> -#endif /* DEBUG */
> - is = ims->ims_sb->sb_iommu;
> -
> - if (map->dm_nsegs) {
> - /*
> - * Is it still in use? _bus_dmamap_load should have taken care
> - * of this.
> - */
> -#ifdef DIAGNOSTIC
> - panic("iommu_dvmamap_load: map still in use");
> -#endif
> - bus_dmamap_unload(t0, map);
> - }
> -
>   /*
>   * Make sure that on error condition we return "no valid mappings".
>   */
> - map->dm_nsegs = 0;
> + KASSERTMSG(map->dm_nsegs == 0, "map still in use");
> +
> + if (ISSET(map->_dm_flags, BUS_DMA_64BIT)) {
> + unsigned long bypass;
> + int i;
> +
> + is = map->_dm_cookie;
> + bypass = is->is_hw->ihw_bypass;
> +
> + /* Bypass translation by the IOMMU. */
> +
> + BUS_DMA_FIND_PARENT(t, _dmamap_load);
> + err = (*t->_dmamap_load)(t, t0, map, buf, buflen, p, flags);
> + if (err != 0)
> + return (err);
> +
> + for (i = 0; i < map->dm_nsegs; i++)
> + map->dm_segs[i].ds_addr |= bypass;
> +
> + return (0);
> + }
> +
> + ims = map->_dm_cookie;
> + is = ims->ims_sb->sb_iommu;
>  
>   if (buflen < 1 || buflen > map->_dm_size) {
>   DPRINTF(IDB_BUSDMA,
> @@ -876,27 +891,9 @@ iommu_dvmamap_load_raw(bus_dma_tag_t t,
>   bus_size_t boundary, align;
>   u_long dvmaddr, sgstart, sgend;
>   struct iommu_state *is;
> - struct iommu_map_state *ims = map->_dm_cookie;
> -
> -#ifdef DIAGNOSTIC
> - if (ims == NULL)
> - panic("iommu_dvmamap_load_raw: null map state");
> -#endif
> -#ifdef DEBUG
> - if (ims->ims_sb == NULL)
> - panic("iommu_dvmamap_load_raw: null sb");
> - if (ims->ims_sb->sb_iommu == NULL)
> - panic("iommu_dvmamap_load_raw: null iommu");
> -#endif /* DEBUG */
> - is = ims->ims_sb->sb_iommu;
> + struct iommu_map_state *ims;
>  
> - if (map->dm_nsegs) {
> - /* Already in use?? */
> -#ifdef DIAGNOSTIC
> - panic("iommu_dvmamap_load_raw: map still in use");
> -#endif
> - bus_dmamap_unload(t0, map);
> - }
> + KASSERTMSG(map->dm_nsegs == 0, "map stil in use");
>  
>   /*
>   * A boundary presented to bus_dmamem_alloc() takes precedence
> @@ -905,6 +902,31 @@ iommu_dvmamap_load_raw(bus_dma_tag_t t,
>   if ((boundary = segs[0]._ds_boundary) == 0)
>   boundary = map->_dm_boundary;
>  
> + if (ISSET(map->_dm_flags, BUS_DMA_64BIT)) {
> + unsigned long bypass;
> +
> + is = map->_dm_cookie;
> + bypass = is->is_hw->ihw_bypass;
> +
> + /* Bypass translation by the IOMMU. */
> + for (i = 0; i < nsegs; i++) {
> + err = iommu_dvmamap_insert(t, map,
> +    bypass | segs[i].ds_addr, segs[i].ds_len,
> +    0, boundary);
> + if (err != 0) {
> + map->dm_nsegs = 0;
> + return (err);
> + }
> + }
> +
> + map->dm_mapsize = size;
> +
> + return (0);
> + }
> +
> + ims = map->_dm_cookie;
> + is = ims->ims_sb->sb_iommu;
> +
>   align = MAX(segs[0]._ds_align, PAGE_SIZE);
>  
>   /*
> @@ -1084,22 +1106,24 @@ iommu_dvmamap_append_range(bus_dma_tag_t
>      bus_size_t length, int flags, bus_size_t boundary)
>  {
>   struct iommu_map_state *ims = map->_dm_cookie;
> - bus_addr_t sgstart, sgend, bd_mask;
> - bus_dma_segment_t *seg = NULL;
> - int i = map->dm_nsegs;
> + bus_addr_t sgstart = iommu_iomap_translate(ims, pa);
>  
> -#ifdef DEBUG
> - if (ims == NULL)
> - panic("iommu_dvmamap_append_range: null map state");
> -#endif
> + return (iommu_dvmamap_insert(t, map, sgstart, length, flags, boundary));
> +}
>  
> - sgstart = iommu_iomap_translate(ims, pa);
> - sgend = sgstart + length - 1;
> +int
> +iommu_dvmamap_insert(bus_dma_tag_t t, bus_dmamap_t map,
> +    bus_addr_t sgstart, bus_size_t length, int flags, bus_size_t boundary)
> +{
> + bus_addr_t sgend = sgstart + length - 1;
> + bus_addr_t bd_mask;
> + bus_dma_segment_t *seg = NULL;
> + int i = map->dm_nsegs;
>  
>  #ifdef DIAGNOSTIC
>   if (sgstart == 0 || sgstart > sgend) {
> - printf("append range invalid mapping for %lx "
> -    "(0x%lx - 0x%lx)\n", pa, sgstart, sgend);
> + printf("append range invalid mapping for "
> +    "0x%lx - 0x%lx\n", sgstart, sgend);
>   map->dm_nsegs = 0;
>   return (EINVAL);
>   }
> @@ -1298,20 +1322,17 @@ void
>  iommu_dvmamap_unload(bus_dma_tag_t t, bus_dma_tag_t t0, bus_dmamap_t map)
>  {
>   struct iommu_state *is;
> - struct iommu_map_state *ims = map->_dm_cookie;
> + struct iommu_map_state *ims;
>   bus_addr_t dvmaddr = map->_dm_dvmastart;
>   bus_size_t sgsize = map->_dm_dvmasize;
>   int error;
>  
> -#ifdef DEBUG
> - if (ims == NULL)
> - panic("iommu_dvmamap_unload: null map state");
> - if (ims->ims_sb == NULL)
> - panic("iommu_dvmamap_unload: null sb");
> - if (ims->ims_sb->sb_iommu == NULL)
> - panic("iommu_dvmamap_unload: null iommu");
> -#endif /* DEBUG */
> + if (ISSET(map->_dm_flags, BUS_DMA_64BIT)) {
> + bus_dmamap_unload(t->_parent, map);
> + return;
> + }
>  
> + ims = map->_dm_cookie;
>   is = ims->ims_sb->sb_iommu;
>  
>   /* Flush the iommu */
> @@ -1488,7 +1509,7 @@ iommu_dvmamap_print_map(bus_dma_tag_t t,
>   break;
>   }
>  
> - if (map->_dm_cookie) {
> + if (!ISSET(map->_dm_flags, BUS_DMA_64BIT) && map->_dm_cookie != NULL) {
>   struct iommu_map_state *ims = map->_dm_cookie;
>   struct iommu_page_map *ipm = &ims->ims_map;
>  
> @@ -1546,19 +1567,19 @@ void
>  iommu_dvmamap_sync(bus_dma_tag_t t, bus_dma_tag_t t0, bus_dmamap_t map,
>      bus_addr_t offset, bus_size_t len, int ops)
>  {
> - struct iommu_map_state *ims = map->_dm_cookie;
> + struct iommu_map_state *ims;
>  
> -#ifdef DIAGNOSTIC
> - if (ims == NULL)
> - panic("iommu_dvmamap_sync: null map state");
> - if (ims->ims_sb == NULL)
> - panic("iommu_dvmamap_sync: null sb");
> - if (ims->ims_sb->sb_iommu == NULL)
> - panic("iommu_dvmamap_sync: null iommu");
> -#endif
>   if (len == 0)
>   return;
>  
> + if (map->_dm_flags & BUS_DMA_64BIT) {
> + if (ops & (BUS_DMASYNC_PREWRITE | BUS_DMASYNC_POSTREAD))
> + __membar("#MemIssue");
> + return;
> + }
> +
> + ims = map->_dm_cookie;
> +
>   if (ops & BUS_DMASYNC_PREWRITE)
>   __membar("#MemIssue");
>  
> @@ -1622,9 +1643,13 @@ iommu_dvmamem_alloc(bus_dma_tag_t t, bus
>      "bound %llx segp %p flags %d\n", (unsigned long long)size,
>      (unsigned long long)alignment, (unsigned long long)boundary,
>      segs, flags));
> +
> + if ((flags & BUS_DMA_64BIT) == 0)
> + flags |= BUS_DMA_DVMA;
> +
>   BUS_DMA_FIND_PARENT(t, _dmamem_alloc);
>   return ((*t->_dmamem_alloc)(t, t0, size, alignment, boundary,
> -    segs, nsegs, rsegs, flags | BUS_DMA_DVMA));
> +    segs, nsegs, rsegs, flags));
>  }
>  
>  void
> @@ -1763,7 +1788,7 @@ iommu_iomap_load_map(struct iommu_state
>  
>   /* Flush cache if necessary. */
>   slot = IOTSBSLOT(e->ipe_va, is->is_tsbsize);
> - if (is->is_flags & IOMMU_FLUSH_CACHE &&
> + if (ISSET(is->is_hw->ihw_flags, IOMMU_HW_FLUSH_CACHE) &&
>      (i == (ipm->ipm_pagecnt - 1) || (slot % 8) == 7))
>   IOMMUREG_WRITE(is, iommu_cache_flush,
>      is->is_ptsb + slot * 8);
> @@ -1788,7 +1813,7 @@ iommu_iomap_unload_map(struct iommu_stat
>  
>   /* Flush cache if necessary. */
>   slot = IOTSBSLOT(e->ipe_va, is->is_tsbsize);
> - if (is->is_flags & IOMMU_FLUSH_CACHE &&
> + if (ISSET(is->is_hw->ihw_flags, IOMMU_HW_FLUSH_CACHE) &&
>      (i == (ipm->ipm_pagecnt - 1) || (slot % 8) == 7))
>   IOMMUREG_WRITE(is, iommu_cache_flush,
>      is->is_ptsb + slot * 8);
> Index: dev/iommureg.h
> ===================================================================
> RCS file: /cvs/src/sys/arch/sparc64/dev/iommureg.h,v
> retrieving revision 1.17
> diff -u -p -r1.17 iommureg.h
> --- dev/iommureg.h 17 Aug 2012 20:46:50 -0000 1.17
> +++ dev/iommureg.h 11 Jun 2019 01:37:25 -0000
> @@ -90,10 +90,11 @@ struct iommu_strbuf {
>  #define IOMMUCR_DE 0x000000000000000002LL /* Diag enable */
>  #define IOMMUCR_EN 0x000000000000000001LL /* Enable IOMMU */
>  
> -#define IOMMUCR_FIRE_SE 0x000000000000000400LL /* Snoop enable */
> -#define IOMMUCR_FIRE_CM_EN 0x000000000000000300LL  /* Cache mode enable */
> -#define IOMMUCR_FIRE_BE 0x000000000000000002LL /* Bypass enable */
> -#define IOMMUCR_FIRE_TE 0x000000000000000001LL /* Translation enabled */
> +#define IOMMUCR_FIRE_PD 0x000000000000001000UL /* Process disable */
> +#define IOMMUCR_FIRE_SE 0x000000000000000400UL /* Snoop enable */
> +#define IOMMUCR_FIRE_CM_EN 0x000000000000000300UL  /* Cache mode enable */
> +#define IOMMUCR_FIRE_BE 0x000000000000000002UL /* Bypass enable */
> +#define IOMMUCR_FIRE_TE 0x000000000000000001UL /* Translation enabled */
>  
>  /*
>   * IOMMU stuff
> Index: dev/iommuvar.h
> ===================================================================
> RCS file: /cvs/src/sys/arch/sparc64/dev/iommuvar.h,v
> retrieving revision 1.17
> diff -u -p -r1.17 iommuvar.h
> --- dev/iommuvar.h 4 May 2016 18:26:12 -0000 1.17
> +++ dev/iommuvar.h 11 Jun 2019 01:37:25 -0000
> @@ -100,6 +100,21 @@ struct iommu_map_state {
>  };
>  #define IOMMU_MAP_STREAM 1
>  
> +struct iommu_hw {
> + void (*ihw_enable)(struct iommu_state *);
> +
> + unsigned long ihw_dvma_pa;
> +
> + unsigned long ihw_bypass;
> + unsigned long ihw_bypass_nc; /* non-cached */
> + unsigned long ihw_bypass_ro; /* relaxed ordering */
> +
> + unsigned int ihw_flags;
> +#define IOMMU_HW_FLUSH_CACHE (1 << 0)
> +};
> +
> +extern const struct iommu_hw iommu_hw_default;
> +
>  /*
>   * per-IOMMU state
>   */
> @@ -112,8 +127,7 @@ struct iommu_state {
>   int64_t is_cr; /* Control register value */
>   struct mutex is_mtx;
>   struct extent *is_dvmamap; /* DVMA map for this instance */
> - int is_flags;
> -#define IOMMU_FLUSH_CACHE 0x00000001
> + const struct iommu_hw *is_hw;
>  
>   struct strbuf_ctl *is_sb[2]; /* Streaming buffers if any */
>  
> @@ -126,7 +140,8 @@ struct iommu_state {
>  };
>  
>  /* interfaces for PCI/SBus code */
> -void iommu_init(char *, struct iommu_state *, int, u_int32_t);
> +void iommu_init(char *, const struct iommu_hw *, struct iommu_state *,
> +    int, u_int32_t);
>  void iommu_reset(struct iommu_state *);
>  paddr_t iommu_extract(struct iommu_state *, bus_addr_t);
>  int64_t iommu_lookup_tte(struct iommu_state *, bus_addr_t);
> @@ -146,6 +161,7 @@ int iommu_dvmamem_alloc(bus_dma_tag_t, b
>      bus_size_t, bus_size_t, bus_dma_segment_t *, int, int *, int);
>  void iommu_dvmamem_free(bus_dma_tag_t, bus_dma_tag_t, bus_dma_segment_t *,
>      int);
> +
>  
>  #define IOMMUREG_READ(is, reg) \
>   bus_space_read_8((is)->is_bustag, \
> Index: dev/pci_machdep.c
> ===================================================================
> RCS file: /cvs/src/sys/arch/sparc64/dev/pci_machdep.c,v
> retrieving revision 1.47
> diff -u -p -r1.47 pci_machdep.c
> --- dev/pci_machdep.c 11 Jun 2019 00:45:31 -0000 1.47
> +++ dev/pci_machdep.c 11 Jun 2019 01:37:25 -0000
> @@ -57,6 +57,7 @@ int sparc_pci_debug = 0x0;
>  #include <machine/openfirm.h>
>  #include <dev/pci/pcivar.h>
>  #include <dev/pci/pcireg.h>
> +#include <dev/pci/pcidevs.h>
>  
>  #include <dev/ofw/ofw_pci.h>
>  
> @@ -84,6 +85,46 @@ pci_attach_hook(parent, self, pba)
>   struct pcibus_attach_args *pba;
>  {
>   /* Don't do anything */
> +}
> +
> +int
> +pci_bcm_dmamap_create(bus_dma_tag_t dt, bus_dma_tag_t t0, bus_size_t size,
> +    int nsegments, bus_size_t maxsegsz, bus_size_t boundary, int flags,
> +    bus_dmamap_t *dmamp)
> +{
> + bus_dma_tag_t pdt = dt->_parent;
> +
> + CLR(flags, BUS_DMA_64BIT);
> +
> + return ((*pdt->_dmamap_create)(pdt, t0, size, nsegments, maxsegsz,
> +    boundary, flags, dmamp));
> +}

Maybe rename this function to pci_32bit_dmamap_create().

> +
> +int
> +pci_probe_device_hook(pci_chipset_tag_t pc, struct pci_attach_args *pa)
> +{
> + bus_dma_tag_t dt, pdt;
> +
> + if (pa->pa_id ==
> +    PCI_ID_CODE(PCI_VENDOR_RCC, PCI_PRODUCT_RCC_PCIE_PCIX)) {

I'd really prefer to write this as

    if (PCI_VENDOR(pa->pa_id) == PCI_VENDOR_RCC &&
        PCI_PRODUCT(pa->pa_id) == PCI_PRODUCT_RCC_PCIE_PCIX) {

since that is how most other drivers handle this.

> + /*
> + * These PCI bridges only support 40bit DVA, so intercept
> + * bus_dmamap_create so we can clear BUS_DMA_64BIT.
> + */
> +
> + dt = malloc(sizeof(*dt), M_DEVBUF, M_NOWAIT | M_ZERO);
> + if (dt == NULL)
> + panic("%s: could not alloc dma tag", __func__);
> +
> + pdt = pa->pa_dmat;
> +
> + dt->_parent = pdt;
> + dt->_dmamap_create = pci_bcm_dmamap_create;
> +
> + pa->pa_dmat = dt;
> + }
> +
> + return (0);
>  }
>  
>  int
> Index: dev/psycho.c
> ===================================================================
> RCS file: /cvs/src/sys/arch/sparc64/dev/psycho.c,v
> retrieving revision 1.75
> diff -u -p -r1.75 psycho.c
> --- dev/psycho.c 25 May 2017 03:19:39 -0000 1.75
> +++ dev/psycho.c 11 Jun 2019 01:37:25 -0000
> @@ -902,7 +902,7 @@ psycho_iommu_init(struct psycho_softc *s
>   panic("couldn't malloc iommu name");
>   snprintf(name, 32, "%s dvma", sc->sc_dev.dv_xname);
>  
> - iommu_init(name, is, tsbsize, iobase);
> + iommu_init(name, &iommu_hw_default, is, tsbsize, iobase);
>  }
>  
>  /*
> Index: dev/pyro.c
> ===================================================================
> RCS file: /cvs/src/sys/arch/sparc64/dev/pyro.c,v
> retrieving revision 1.32
> diff -u -p -r1.32 pyro.c
> --- dev/pyro.c 11 Jun 2019 00:45:31 -0000 1.32
> +++ dev/pyro.c 11 Jun 2019 01:37:25 -0000
> @@ -131,6 +131,30 @@ int pyro_msi_eq_intr(void *);
>  int pyro_dmamap_create(bus_dma_tag_t, bus_dma_tag_t, bus_size_t, int,
>      bus_size_t, bus_size_t, int, bus_dmamap_t *);
>  
> +void pyro_iommu_enable(struct iommu_state *);
> +
> +const struct iommu_hw iommu_hw_fire = {
> + .ihw_enable = pyro_iommu_enable,
> +
> + .ihw_dvma_pa = 0x000007ffffffffffUL,
> +
> + .ihw_bypass = 0xfffc000000000000UL,
> + .ihw_bypass_nc = 0x0000080000000000UL,
> + .ihw_bypass_ro = 0,
> +};
> +
> +const struct iommu_hw iommu_hw_oberon = {
> + .ihw_enable = pyro_iommu_enable,
> +
> + .ihw_dvma_pa = 0x00007fffffffffffUL,
> +
> + .ihw_bypass = 0x7ffc000000000000UL,
> + .ihw_bypass_nc = 0x0000800000000000UL,
> + .ihw_bypass_ro = 0x8000000000000000UL,
> +
> + .ihw_flags = IOMMU_HW_FLUSH_CACHE,
> +};
> +
>  #ifdef DDB
>  void pyro_xir(void *, int);
>  #endif
> @@ -266,6 +290,7 @@ pyro_init_iommu(struct pyro_softc *sc, s
>   int tsbsize = 7;
>   u_int32_t iobase = -1;
>   char *name;
> + const struct iommu_hw *ihw = &iommu_hw_fire;
>  
>   is->is_bustag = sc->sc_bust;
>  
> @@ -282,11 +307,23 @@ pyro_init_iommu(struct pyro_softc *sc, s
>   panic("couldn't malloc iommu name");
>   snprintf(name, 32, "%s dvma", sc->sc_dv.dv_xname);
>  
> - /* On Oberon, we need to flush the cache. */
>   if (sc->sc_oberon)
> - is->is_flags |= IOMMU_FLUSH_CACHE;
> + ihw = &iommu_hw_oberon;
> +
> + iommu_init(name, ihw, is, tsbsize, iobase);
> +}
> +
> +void
> +pyro_iommu_enable(struct iommu_state *is)
> +{
> + unsigned long cr;
> +
> + cr = IOMMUREG_READ(is, iommu_cr);
> + cr |= IOMMUCR_FIRE_BE | IOMMUCR_FIRE_SE | IOMMUCR_FIRE_CM_EN |
> +    IOMMUCR_FIRE_TE;
>  
> - iommu_init(name, is, tsbsize, iobase);
> + IOMMUREG_WRITE(is, iommu_tsb, is->is_ptsb | is->is_tsbsize);
> + IOMMUREG_WRITE(is, iommu_cr, cr);
>  }
>  
>  void
> Index: dev/sbus.c
> ===================================================================
> RCS file: /cvs/src/sys/arch/sparc64/dev/sbus.c,v
> retrieving revision 1.44
> diff -u -p -r1.44 sbus.c
> --- dev/sbus.c 19 Sep 2015 21:07:04 -0000 1.44
> +++ dev/sbus.c 11 Jun 2019 01:37:25 -0000
> @@ -349,7 +349,7 @@ sbus_mb_attach(struct device *parent, st
>   snprintf(name, 32, "%s dvma", sc->sc_dev.dv_xname);
>  
>   printf("%s: ", sc->sc_dev.dv_xname);
> - iommu_init(name, &sc->sc_is, 0, -1);
> + iommu_init(name, &iommu_hw_default, &sc->sc_is, 0, -1);
>  
>   /* Initialize Starfire PC interrupt translation. */
>   if (OF_getprop(findroot(), "name", buf, sizeof(buf)) > 0 &&
> Index: dev/schizo.c
> ===================================================================
> RCS file: /cvs/src/sys/arch/sparc64/dev/schizo.c,v
> retrieving revision 1.68
> diff -u -p -r1.68 schizo.c
> --- dev/schizo.c 25 May 2017 03:19:39 -0000 1.68
> +++ dev/schizo.c 11 Jun 2019 01:37:25 -0000
> @@ -451,7 +451,7 @@ schizo_init_iommu(struct schizo_softc *s
>      "using iobase=0x%x, tsbsize=%d\n", iobase, tsbsize));
>   }
>  
> - iommu_init(name, is, tsbsize, iobase);
> + iommu_init(name, &iommu_hw_default, is, tsbsize, iobase);
>  }
>  
>  int
> Index: include/pci_machdep.h
> ===================================================================
> RCS file: /cvs/src/sys/arch/sparc64/include/pci_machdep.h,v
> retrieving revision 1.34
> diff -u -p -r1.34 pci_machdep.h
> --- include/pci_machdep.h 11 Jun 2019 00:45:31 -0000 1.34
> +++ include/pci_machdep.h 11 Jun 2019 01:37:25 -0000
> @@ -84,10 +84,13 @@ struct sparc_pci_chipset {
>   pcireg_t (*conf_read)(pci_chipset_tag_t, pcitag_t, int);
>   void (*conf_write)(pci_chipset_tag_t, pcitag_t, int, pcireg_t);
>   int (*intr_map)(struct pci_attach_args *, pci_intr_handle_t *);
> + int (*probe_device_hook)(void *, struct pci_attach_args *);
>  };
>  
>  void pci_attach_hook(struct device *, struct device *,
>       struct pcibus_attach_args *);
> +int pci_probe_device_hook(pci_chipset_tag_t,
> +    struct pci_attach_args *);
>  int pci_bus_maxdevs(pci_chipset_tag_t, int);
>  pcitag_t pci_make_tag(pci_chipset_tag_t, int, int, int);
>  void pci_decompose_tag(pci_chipset_tag_t, pcitag_t, int *, int *,
> @@ -115,8 +118,6 @@ int sparc64_pci_enumerate_bus(struct pc
>      struct pci_attach_args *);
>  
>  #define PCI_MACHDEP_ENUMERATE_BUS sparc64_pci_enumerate_bus
> -
> -#define pci_probe_device_hook(c, a) (0)
>  
>  #define pci_min_powerstate(c, t) (PCI_PMCSR_STATE_D3)
>  #define pci_set_powerstate_md(c, t, s, p)
>
>