random malloc junk

classic Classic list List threaded Threaded
19 messages Options
Reply | Threaded
Open this post in threaded view
|

random malloc junk

Ted Unangst-6
Instead of always using a fixed byte pattern, I think malloc should use a
random pattern. Now, this sometimes means it's harder to identify exactly
what's used after free, so we should provide a means to get the old 0xdf
pattern back.

Since we already have two junk modes, I thought I'd carry on along those
lines. The default junk behavior, for free chunks only, is more of a security
measure. I think this means we want random junk. The second level 'J' junk is
more of a debugging tool, so that retains 0xdf.

There's some overlap here with canaries, but nothing wrong with that. :)

Index: malloc.c
===================================================================
RCS file: /cvs/src/lib/libc/stdlib/malloc.c,v
retrieving revision 1.195
diff -u -p -r1.195 malloc.c
--- malloc.c 1 Sep 2016 10:41:02 -0000 1.195
+++ malloc.c 7 Sep 2016 22:21:37 -0000
@@ -186,6 +186,7 @@ struct malloc_readonly {
 #endif
  u_int32_t malloc_canary; /* Matched against ones in malloc_pool */
  uintptr_t malloc_chunk_canary;
+ u_char malloc_junkbytes[256];
 };
 
 /* This object is mapped PROT_READ after initialisation to prevent tampering */
@@ -597,6 +598,8 @@ omalloc_init(void)
  mopts.malloc_move = 1;
  mopts.malloc_cache = MALLOC_DEFAULT_CACHE;
 
+ arc4random_buf(mopts.malloc_junkbytes, sizeof(mopts.malloc_junkbytes));
+
  for (i = 0; i < 3; i++) {
  switch (i) {
  case 0:
@@ -1260,7 +1263,29 @@ malloc(size_t size)
 /*DEF_STRONG(malloc);*/
 
 static void
-validate_junk(struct dir_info *pool, void *p) {
+random_junk(void *p, size_t amt)
+{
+ u_long offset = ((u_long)p) & (sizeof(mopts.malloc_junkbytes) - 1);
+
+ if (amt < sizeof(mopts.malloc_junkbytes) - offset) {
+ memcpy(p, mopts.malloc_junkbytes + offset, amt);
+ } else {
+ memcpy(p, mopts.malloc_junkbytes + offset,
+    sizeof(mopts.malloc_junkbytes) - offset);
+ amt -= sizeof(mopts.malloc_junkbytes) - offset;
+ while (amt > 0) {
+ size_t x = amt > sizeof(mopts.malloc_junkbytes) ?
+ sizeof(mopts.malloc_junkbytes) : amt;
+ memcpy(p, mopts.malloc_junkbytes, x);
+ amt -= x;
+ }
+ }
+}
+
+
+static void
+validate_junk(struct dir_info *pool, void *p)
+{
  struct region_info *r;
  size_t byte, sz;
 
@@ -1274,9 +1299,15 @@ validate_junk(struct dir_info *pool, voi
  sz -= mopts.malloc_canaries;
  if (sz > 32)
  sz = 32;
- for (byte = 0; byte < sz; byte++) {
- if (((unsigned char *)p)[byte] != SOME_FREEJUNK)
+ if (mopts.malloc_junk == 1) {
+ u_long offset = ((u_long)p) & (sizeof(mopts.malloc_junkbytes) - 1);
+ if (memcmp(p, mopts.malloc_junkbytes + offset, sz) != 0)
  wrterror(pool, "use after free", p);
+ } else {
+ for (byte = 0; byte < sz; byte++) {
+ if (((unsigned char *)p)[byte] != SOME_FREEJUNK)
+ wrterror(pool, "use after free", p);
+ }
  }
 }
 
@@ -1336,10 +1367,11 @@ ofree(struct dir_info *argpool, void *p)
  }
  STATS_SUB(pool->malloc_guarded, mopts.malloc_guard);
  }
- if (mopts.malloc_junk && !mopts.malloc_freeunmap) {
- size_t amt = mopts.malloc_junk == 1 ? MALLOC_MAXCHUNK :
-    PAGEROUND(sz) - mopts.malloc_guard;
- memset(p, SOME_FREEJUNK, amt);
+ if (mopts.malloc_junk == 2 && !mopts.malloc_freeunmap) {
+ memset(p, SOME_FREEJUNK,
+    PAGEROUND(sz) - mopts.malloc_guard);
+ } else if (mopts.malloc_junk == 1 && !mopts.malloc_freeunmap) {
+ random_junk(p, MALLOC_MAXCHUNK);
  }
  unmap(pool, p, PAGEROUND(sz));
  delete(pool, r);
@@ -1347,8 +1379,10 @@ ofree(struct dir_info *argpool, void *p)
  void *tmp;
  int i;
 
- if (mopts.malloc_junk && sz > 0)
+ if (mopts.malloc_junk == 2 && sz > 0)
  memset(p, SOME_FREEJUNK, sz - mopts.malloc_canaries);
+ else if (mopts.malloc_junk == 1 && sz > 0)
+ random_junk(p, sz);
  if (!mopts.malloc_freenow) {
  if (find_chunknum(pool, r, p) == -1)
  goto done;

Reply | Threaded
Open this post in threaded view
|

Re: random malloc junk

Otto Moerbeek
On Wed, Sep 07, 2016 at 06:29:07PM -0400, Ted Unangst wrote:

> Instead of always using a fixed byte pattern, I think malloc should use a
> random pattern. Now, this sometimes means it's harder to identify exactly
> what's used after free, so we should provide a means to get the old 0xdf
> pattern back.
>
> Since we already have two junk modes, I thought I'd carry on along those
> lines. The default junk behavior, for free chunks only, is more of a security
> measure. I think this means we want random junk. The second level 'J' junk is
> more of a debugging tool, so that retains 0xdf.
>
> There's some overlap here with canaries, but nothing wrong with that. :)

Like it, though I am a bit worried about the costs. Any measurements?

Should be able to look more closely the coming days.

BTW, we should revisit canaries and work further on moving them
closer to requested size. There's a chance this diff wil complicate
that.

        -Otto

>
> Index: malloc.c
> ===================================================================
> RCS file: /cvs/src/lib/libc/stdlib/malloc.c,v
> retrieving revision 1.195
> diff -u -p -r1.195 malloc.c
> --- malloc.c 1 Sep 2016 10:41:02 -0000 1.195
> +++ malloc.c 7 Sep 2016 22:21:37 -0000
> @@ -186,6 +186,7 @@ struct malloc_readonly {
>  #endif
>   u_int32_t malloc_canary; /* Matched against ones in malloc_pool */
>   uintptr_t malloc_chunk_canary;
> + u_char malloc_junkbytes[256];
>  };
>  
>  /* This object is mapped PROT_READ after initialisation to prevent tampering */
> @@ -597,6 +598,8 @@ omalloc_init(void)
>   mopts.malloc_move = 1;
>   mopts.malloc_cache = MALLOC_DEFAULT_CACHE;
>  
> + arc4random_buf(mopts.malloc_junkbytes, sizeof(mopts.malloc_junkbytes));
> +
>   for (i = 0; i < 3; i++) {
>   switch (i) {
>   case 0:
> @@ -1260,7 +1263,29 @@ malloc(size_t size)
>  /*DEF_STRONG(malloc);*/
>  
>  static void
> -validate_junk(struct dir_info *pool, void *p) {
> +random_junk(void *p, size_t amt)
> +{
> + u_long offset = ((u_long)p) & (sizeof(mopts.malloc_junkbytes) - 1);
> +
> + if (amt < sizeof(mopts.malloc_junkbytes) - offset) {
> + memcpy(p, mopts.malloc_junkbytes + offset, amt);
> + } else {
> + memcpy(p, mopts.malloc_junkbytes + offset,
> +    sizeof(mopts.malloc_junkbytes) - offset);
> + amt -= sizeof(mopts.malloc_junkbytes) - offset;
> + while (amt > 0) {
> + size_t x = amt > sizeof(mopts.malloc_junkbytes) ?
> + sizeof(mopts.malloc_junkbytes) : amt;
> + memcpy(p, mopts.malloc_junkbytes, x);
> + amt -= x;
> + }
> + }
> +}
> +
> +
> +static void
> +validate_junk(struct dir_info *pool, void *p)
> +{
>   struct region_info *r;
>   size_t byte, sz;
>  
> @@ -1274,9 +1299,15 @@ validate_junk(struct dir_info *pool, voi
>   sz -= mopts.malloc_canaries;
>   if (sz > 32)
>   sz = 32;
> - for (byte = 0; byte < sz; byte++) {
> - if (((unsigned char *)p)[byte] != SOME_FREEJUNK)
> + if (mopts.malloc_junk == 1) {
> + u_long offset = ((u_long)p) & (sizeof(mopts.malloc_junkbytes) - 1);
> + if (memcmp(p, mopts.malloc_junkbytes + offset, sz) != 0)
>   wrterror(pool, "use after free", p);
> + } else {
> + for (byte = 0; byte < sz; byte++) {
> + if (((unsigned char *)p)[byte] != SOME_FREEJUNK)
> + wrterror(pool, "use after free", p);
> + }
>   }
>  }
>  
> @@ -1336,10 +1367,11 @@ ofree(struct dir_info *argpool, void *p)
>   }
>   STATS_SUB(pool->malloc_guarded, mopts.malloc_guard);
>   }
> - if (mopts.malloc_junk && !mopts.malloc_freeunmap) {
> - size_t amt = mopts.malloc_junk == 1 ? MALLOC_MAXCHUNK :
> -    PAGEROUND(sz) - mopts.malloc_guard;
> - memset(p, SOME_FREEJUNK, amt);
> + if (mopts.malloc_junk == 2 && !mopts.malloc_freeunmap) {
> + memset(p, SOME_FREEJUNK,
> +    PAGEROUND(sz) - mopts.malloc_guard);
> + } else if (mopts.malloc_junk == 1 && !mopts.malloc_freeunmap) {
> + random_junk(p, MALLOC_MAXCHUNK);
>   }
>   unmap(pool, p, PAGEROUND(sz));
>   delete(pool, r);
> @@ -1347,8 +1379,10 @@ ofree(struct dir_info *argpool, void *p)
>   void *tmp;
>   int i;
>  
> - if (mopts.malloc_junk && sz > 0)
> + if (mopts.malloc_junk == 2 && sz > 0)
>   memset(p, SOME_FREEJUNK, sz - mopts.malloc_canaries);
> + else if (mopts.malloc_junk == 1 && sz > 0)
> + random_junk(p, sz);
>   if (!mopts.malloc_freenow) {
>   if (find_chunknum(pool, r, p) == -1)
>   goto done;

Reply | Threaded
Open this post in threaded view
|

Re: random malloc junk

Daniel Micay
In reply to this post by Ted Unangst-6
On Wed, 2016-09-07 at 18:29 -0400, Ted Unangst wrote:

> Instead of always using a fixed byte pattern, I think malloc should
> use a
> random pattern. Now, this sometimes means it's harder to identify
> exactly
> what's used after free, so we should provide a means to get the old
> 0xdf
> pattern back.
>
> Since we already have two junk modes, I thought I'd carry on along
> those
> lines. The default junk behavior, for free chunks only, is more of a
> security
> measure. I think this means we want random junk. The second level 'J'
> junk is
> more of a debugging tool, so that retains 0xdf.

A bit off-topic: 'J' enables junk-on-init which is for debugging, but it
also currently has security improvements for large allocations. There's
only partial junk-on-free by default (half a page), and 'U' disables
large allocation junk-on-free without 'J'. I think it would make sense
to remove those optimizations since it's fine if the cost scales up with
larger allocations and losing the guarantee of not leaking data via
uninitialized memory with 'U' is not great. Using 'U' is quite expensive
regardless, and adds some pathological performance cases for small size
allocations which is more important. I ended up removing both of those
optimizations for the CopperheadOS port.

Reply | Threaded
Open this post in threaded view
|

Re: random malloc junk

Daniel Micay
In reply to this post by Otto Moerbeek
> BTW, we should revisit canaries and work further on moving them
> closer to requested size. There's a chance this diff wil complicate
> that.

Can switch the canary code to memcpy/memcmp (to handle unaligned
canaries) and could then use the last byte as an index to the start of
the canary. For larger movement, it could have a special value (like
255) meaning that there's a 4 byte length at an 8 byte offset. If you
really wanted you could micro-optimize to lose only 1 canary bit, but
that seems too complicated to save 7 bits of the canary. It could also
sanity check the offsets based on the known minimum size of a chunk in
that size class.

Reply | Threaded
Open this post in threaded view
|

Re: random malloc junk

Daniel Micay
In reply to this post by Ted Unangst-6
A nice security property of 0xdf filling is that a use-after-free of a
pointer is guaranteed to fault in a typical environment since it ends up
pointing outside userspace (I assume that's the case on OpenBSD). A heap
spray could potentially allow exploiting a random pointer. Perhaps it
would be better if only the byte range guaranteeing faults for pointers
was used? Less random, but strictly better than the current situation
rather than losing a nice guarantee.

Reply | Threaded
Open this post in threaded view
|

Re: random malloc junk

Otto Moerbeek
On Thu, Sep 08, 2016 at 07:47:58PM -0400, Daniel Micay wrote:

> A nice security property of 0xdf filling is that a use-after-free of a
> pointer is guaranteed to fault in a typical environment since it ends up
> pointing outside userspace (I assume that's the case on OpenBSD). A heap
> spray could potentially allow exploiting a random pointer. Perhaps it
> would be better if only the byte range guaranteeing faults for pointers
> was used? Less random, but strictly better than the current situation
> rather than losing a nice guarantee.

AFAIK 0xdf...df it is not guaranteed, just often outside the address
space.

I selected 0xdf a long time ago as an alternative to the 0xd0 (Duh)
byte used for new chunks. Both as a mnemonic for "free" and because it
is likely to cause segfaults. A pointer ending in 0xdf often will be
unaligned. Of course that won't work on all archs or all pointers.

Random patterns are also likely to produce segfaults, using them as a
pointer has a big chance of being unaligned or pointing to an unmapped
page.

        -Otto

Reply | Threaded
Open this post in threaded view
|

Re: random malloc junk

Theo de Raadt-2
> On Thu, Sep 08, 2016 at 07:47:58PM -0400, Daniel Micay wrote:
>
> > A nice security property of 0xdf filling is that a use-after-free of a
> > pointer is guaranteed to fault in a typical environment since it ends up
> > pointing outside userspace (I assume that's the case on OpenBSD). A heap
> > spray could potentially allow exploiting a random pointer. Perhaps it
> > would be better if only the byte range guaranteeing faults for pointers
> > was used? Less random, but strictly better than the current situation
> > rather than losing a nice guarantee.
>
> AFAIK 0xdf...df it is not guaranteed, just often outside the address
> space.
>
> I selected 0xdf a long time ago as an alternative to the 0xd0 (Duh)
> byte used for new chunks. Both as a mnemonic for "free" and because it
> is likely to cause segfaults. A pointer ending in 0xdf often will be
> unaligned. Of course that won't work on all archs or all pointers.
>
> Random patterns are also likely to produce segfaults, using them as a
> pointer has a big chance of being unaligned or pointing to an unmapped
> page.

There is only one benefit from full-random.  That it creates a little
bit more register damage as the code goes fully astray.

On non-shared address spaces, no byte-repeat address we choose is
gauranteed to be outside the address space.  Some of our architectures
in that family do have full address spaces.  On any such systems if the
attacked can place something at that address before things go wrong,
then he probably has substantial control already.

A machine-dependent value could be chosen to land within the VA hole
that some 64-bit architectures have, but shrug, I don't see the point.

I think 0xdf is still the best of all worlds.

Reply | Threaded
Open this post in threaded view
|

Re: random malloc junk

Otto Moerbeek
In reply to this post by Daniel Micay
On Thu, Sep 08, 2016 at 06:42:33PM -0400, Daniel Micay wrote:

> On Wed, 2016-09-07 at 18:29 -0400, Ted Unangst wrote:
> > Instead of always using a fixed byte pattern, I think malloc should
> > use a
> > random pattern. Now, this sometimes means it's harder to identify
> > exactly
> > what's used after free, so we should provide a means to get the old
> > 0xdf
> > pattern back.
> >
> > Since we already have two junk modes, I thought I'd carry on along
> > those
> > lines. The default junk behavior, for free chunks only, is more of a
> > security
> > measure. I think this means we want random junk. The second level 'J'
> > junk is
> > more of a debugging tool, so that retains 0xdf.
>
> A bit off-topic: 'J' enables junk-on-init which is for debugging, but it
> also currently has security improvements for large allocations. There's
> only partial junk-on-free by default (half a page), and 'U' disables
> large allocation junk-on-free without 'J'. I think it would make sense
> to remove those optimizations since it's fine if the cost scales up with
> larger allocations and losing the guarantee of not leaking data via
> uninitialized memory with 'U' is not great. Using 'U' is quite expensive
> regardless, and adds some pathological performance cases for small size
> allocations which is more important. I ended up removing both of those
> optimizations for the CopperheadOS port.

I would prefer to see a diff with this. For me, that should be easier
to understand than you description.

        -Otto

Reply | Threaded
Open this post in threaded view
|

Re: random malloc junk

Otto Moerbeek
In reply to this post by Otto Moerbeek
On Thu, Sep 08, 2016 at 08:15:42AM +0200, Otto Moerbeek wrote:

> On Wed, Sep 07, 2016 at 06:29:07PM -0400, Ted Unangst wrote:
>
> > Instead of always using a fixed byte pattern, I think malloc should use a
> > random pattern. Now, this sometimes means it's harder to identify exactly
> > what's used after free, so we should provide a means to get the old 0xdf
> > pattern back.
> >
> > Since we already have two junk modes, I thought I'd carry on along those
> > lines. The default junk behavior, for free chunks only, is more of a security
> > measure. I think this means we want random junk. The second level 'J' junk is
> > more of a debugging tool, so that retains 0xdf.
> >
> > There's some overlap here with canaries, but nothing wrong with that. :)
>
> Like it, though I am a bit worried about the costs. Any measurements?
>
> Should be able to look more closely the coming days.

As often, real life came in between. Did anybody do measurements? I
really would like to to see hard data.

        -Otto

>
> BTW, we should revisit canaries and work further on moving them
> closer to requested size. There's a chance this diff wil complicate
> that.
>
> -Otto
>
> >
> > Index: malloc.c
> > ===================================================================
> > RCS file: /cvs/src/lib/libc/stdlib/malloc.c,v
> > retrieving revision 1.195
> > diff -u -p -r1.195 malloc.c
> > --- malloc.c 1 Sep 2016 10:41:02 -0000 1.195
> > +++ malloc.c 7 Sep 2016 22:21:37 -0000
> > @@ -186,6 +186,7 @@ struct malloc_readonly {
> >  #endif
> >   u_int32_t malloc_canary; /* Matched against ones in malloc_pool */
> >   uintptr_t malloc_chunk_canary;
> > + u_char malloc_junkbytes[256];
> >  };
> >  
> >  /* This object is mapped PROT_READ after initialisation to prevent tampering */
> > @@ -597,6 +598,8 @@ omalloc_init(void)
> >   mopts.malloc_move = 1;
> >   mopts.malloc_cache = MALLOC_DEFAULT_CACHE;
> >  
> > + arc4random_buf(mopts.malloc_junkbytes, sizeof(mopts.malloc_junkbytes));
> > +
> >   for (i = 0; i < 3; i++) {
> >   switch (i) {
> >   case 0:
> > @@ -1260,7 +1263,29 @@ malloc(size_t size)
> >  /*DEF_STRONG(malloc);*/
> >  
> >  static void
> > -validate_junk(struct dir_info *pool, void *p) {
> > +random_junk(void *p, size_t amt)
> > +{
> > + u_long offset = ((u_long)p) & (sizeof(mopts.malloc_junkbytes) - 1);
> > +
> > + if (amt < sizeof(mopts.malloc_junkbytes) - offset) {
> > + memcpy(p, mopts.malloc_junkbytes + offset, amt);
> > + } else {
> > + memcpy(p, mopts.malloc_junkbytes + offset,
> > +    sizeof(mopts.malloc_junkbytes) - offset);
> > + amt -= sizeof(mopts.malloc_junkbytes) - offset;
> > + while (amt > 0) {
> > + size_t x = amt > sizeof(mopts.malloc_junkbytes) ?
> > + sizeof(mopts.malloc_junkbytes) : amt;
> > + memcpy(p, mopts.malloc_junkbytes, x);
> > + amt -= x;
> > + }
> > + }
> > +}
> > +
> > +
> > +static void
> > +validate_junk(struct dir_info *pool, void *p)
> > +{
> >   struct region_info *r;
> >   size_t byte, sz;
> >  
> > @@ -1274,9 +1299,15 @@ validate_junk(struct dir_info *pool, voi
> >   sz -= mopts.malloc_canaries;
> >   if (sz > 32)
> >   sz = 32;
> > - for (byte = 0; byte < sz; byte++) {
> > - if (((unsigned char *)p)[byte] != SOME_FREEJUNK)
> > + if (mopts.malloc_junk == 1) {
> > + u_long offset = ((u_long)p) & (sizeof(mopts.malloc_junkbytes) - 1);
> > + if (memcmp(p, mopts.malloc_junkbytes + offset, sz) != 0)
> >   wrterror(pool, "use after free", p);
> > + } else {
> > + for (byte = 0; byte < sz; byte++) {
> > + if (((unsigned char *)p)[byte] != SOME_FREEJUNK)
> > + wrterror(pool, "use after free", p);
> > + }
> >   }
> >  }
> >  
> > @@ -1336,10 +1367,11 @@ ofree(struct dir_info *argpool, void *p)
> >   }
> >   STATS_SUB(pool->malloc_guarded, mopts.malloc_guard);
> >   }
> > - if (mopts.malloc_junk && !mopts.malloc_freeunmap) {
> > - size_t amt = mopts.malloc_junk == 1 ? MALLOC_MAXCHUNK :
> > -    PAGEROUND(sz) - mopts.malloc_guard;
> > - memset(p, SOME_FREEJUNK, amt);
> > + if (mopts.malloc_junk == 2 && !mopts.malloc_freeunmap) {
> > + memset(p, SOME_FREEJUNK,
> > +    PAGEROUND(sz) - mopts.malloc_guard);
> > + } else if (mopts.malloc_junk == 1 && !mopts.malloc_freeunmap) {
> > + random_junk(p, MALLOC_MAXCHUNK);
> >   }
> >   unmap(pool, p, PAGEROUND(sz));
> >   delete(pool, r);
> > @@ -1347,8 +1379,10 @@ ofree(struct dir_info *argpool, void *p)
> >   void *tmp;
> >   int i;
> >  
> > - if (mopts.malloc_junk && sz > 0)
> > + if (mopts.malloc_junk == 2 && sz > 0)
> >   memset(p, SOME_FREEJUNK, sz - mopts.malloc_canaries);
> > + else if (mopts.malloc_junk == 1 && sz > 0)
> > + random_junk(p, sz);
> >   if (!mopts.malloc_freenow) {
> >   if (find_chunknum(pool, r, p) == -1)
> >   goto done;

Reply | Threaded
Open this post in threaded view
|

Re: random malloc junk

Theo Buehler
In reply to this post by Ted Unangst-6
On Wed, Sep 07, 2016 at 06:29:07PM -0400, Ted Unangst wrote:
> There's some overlap here with canaries, but nothing wrong with that. :)

The diff breaks canaries since random_junk() overwrites them before they
are validated.  The following straightforward modification fixes that:

> Index: malloc.c
> ===================================================================

[..]

> @@ -1336,10 +1367,11 @@ ofree(struct dir_info *argpool, void *p)
>   }
>   STATS_SUB(pool->malloc_guarded, mopts.malloc_guard);
>   }
> - if (mopts.malloc_junk && !mopts.malloc_freeunmap) {
> - size_t amt = mopts.malloc_junk == 1 ? MALLOC_MAXCHUNK :
> -    PAGEROUND(sz) - mopts.malloc_guard;
> - memset(p, SOME_FREEJUNK, amt);
> + if (mopts.malloc_junk == 2 && !mopts.malloc_freeunmap) {
> + memset(p, SOME_FREEJUNK,
> +    PAGEROUND(sz) - mopts.malloc_guard);
> + } else if (mopts.malloc_junk == 1 && !mopts.malloc_freeunmap) {
> + random_junk(p, MALLOC_MAXCHUNK);

should be:
                        random_junk(p, MALLOC_MAXCHUNK - mopts.malloc_canaries);

>   }
>   unmap(pool, p, PAGEROUND(sz));
>   delete(pool, r);
> @@ -1347,8 +1379,10 @@ ofree(struct dir_info *argpool, void *p)
>   void *tmp;
>   int i;
>  
> - if (mopts.malloc_junk && sz > 0)
> + if (mopts.malloc_junk == 2 && sz > 0)
>   memset(p, SOME_FREEJUNK, sz - mopts.malloc_canaries);
> + else if (mopts.malloc_junk == 1 && sz > 0)
> + random_junk(p, sz);

should be:
                        random_junk(p, sz - mopts.malloc_canaries);

>   if (!mopts.malloc_freenow) {
>   if (find_chunknum(pool, r, p) == -1)
>   goto done;
>

Reply | Threaded
Open this post in threaded view
|

Re: random malloc junk

Theo Buehler
In reply to this post by Otto Moerbeek
On Tue, Sep 13, 2016 at 01:29:17PM +0200, Otto Moerbeek wrote:
> As often, real life came in between. Did anybody do measurements? I
> really would like to to see hard data.

It seems that the price is relatively modest.

I ran several 'make build's:

3rd gen X1, i7 5500U, 2.4GHz (amd64):
        no malloc.conf, malloc.c r1.195:
             1525.04 real      2505.98 user      1362.47 sys
        no malloc.conf, malloc.c + diff:
             1532.15 real      2540.63 user      1356.98 sys

        for comparison:
        malloc.conf -> J, malloc.c + diff:
             1554.76 real      2596.40 user      1353.26 sys

Acer Aspire 5633WLMi, Core 2 Duo 1.66 GHz (i386):
        no malloc.conf, malloc.c, r1.195:
             5020.77 real      5725.21 user      1609.28 sys
        no malloc.conf, malloc.c + diff:
             5088.07 real      5865.80 user      1572.12 sys

Reply | Threaded
Open this post in threaded view
|

Re: random malloc junk

Daniel Micay
In reply to this post by Otto Moerbeek
On Tue, 2016-09-13 at 13:27 +0200, Otto Moerbeek wrote:

> On Thu, Sep 08, 2016 at 06:42:33PM -0400, Daniel Micay wrote:
>
> > A bit off-topic: 'J' enables junk-on-init which is for debugging,
> > but it
> > also currently has security improvements for large allocations.
> > There's
> > only partial junk-on-free by default (half a page), and 'U' disables
> > large allocation junk-on-free without 'J'. I think it would make
> > sense
> > to remove those optimizations since it's fine if the cost scales up
> > with
> > larger allocations and losing the guarantee of not leaking data via
> > uninitialized memory with 'U' is not great. Using 'U' is quite
> > expensive
> > regardless, and adds some pathological performance cases for small
> > size
> > allocations which is more important. I ended up removing both of
> > those
> > optimizations for the CopperheadOS port.
>
> I would prefer to see a diff with this. For me, that should be easier
> to understand than you description.

This is the diff from the CopperheadOS port which won't apply directly
to malloc.c in OpenBSD, but should explain what I mean since it's just a
few lines. Just ignore the part where it removes malloc_junk=2, which is
because junk-on-init is split out (so this obsoleted the extra mode).

The current OpenBSD code only wipes up to MALLOC_MAXCHUNK with junk @ 1,
and it similarly doesn't wipe at all with 'U' (even though junk-on-free
also serves the purpose of preventing information leaks, not just
mitigating use-after-free). IMO, optimizing large allocation perf like
this isn't worthwhile.

diff --git a/libc/bionic/omalloc.c b/libc/bionic/omalloc.c
index e451d79..9277ee7 100644
--- a/libc/bionic/omalloc.c
+++ b/libc/bionic/omalloc.c
@@ -504,7 +504,7 @@ map(struct dir_info *d, void *hint, size_t sz, int zero_fill)
  madvise(p, sz, MADV_NORMAL);
  if (zero_fill)
  memset(p, 0, sz);
- else if (mopts.malloc_junk == 2 &&
+ else if (mopts.malloc_junk &&
      mopts.malloc_freeunmap)
  memset(p, SOME_FREEJUNK, sz);
  return p;
@@ -524,7 +524,7 @@ map(struct dir_info *d, void *hint, size_t sz, int zero_fill)
  d->free_regions_size -= psz;
  if (zero_fill)
  memset(p, 0, sz);
- else if (mopts.malloc_junk == 2 && mopts.malloc_freeunmap)
+ else if (mopts.malloc_junk && mopts.malloc_freeunmap)
  memset(p, SOME_FREEJUNK, sz);
  return p;
  }
@@ -603,7 +603,7 @@ omalloc_parseopt(char opt)
  mopts.malloc_junk = 0;
  break;
  case 'J':
- mopts.malloc_junk = 2;
+ mopts.malloc_junk = 1;
  break;
  case 'i':
  mopts.malloc_junk_init = 0;
@@ -1517,8 +1517,7 @@ ofree(struct dir_info *pool, void *p)
  STATS_SUB(pool->malloc_guarded, mopts.malloc_guard);
  }
  if (mopts.malloc_junk && !mopts.malloc_freeunmap) {
- size_t amt = mopts.malloc_junk == 1 ? MALLOC_MAXCHUNK :
-     PAGEROUND(sz) - mopts.malloc_guard;
+ size_t amt = PAGEROUND(sz) - mopts.malloc_guard;
  memset(p, SOME_FREEJUNK, amt);
  }
  unmap(pool, p, PAGEROUND(sz));

Reply | Threaded
Open this post in threaded view
|

Re: random malloc junk

Ted Unangst-6
Daniel Micay wrote:
>
> The current OpenBSD code only wipes up to MALLOC_MAXCHUNK with junk @ 1,
> and it similarly doesn't wipe at all with 'U' (even though junk-on-free
> also serves the purpose of preventing information leaks, not just
> mitigating use-after-free). IMO, optimizing large allocation perf like
> this isn't worthwhile.

this requires some analysis of what programs do in the wild. some programs
preemptively malloc large buffers, but don't touch them. it would be a serious
reqression for free to fault in new pages, just to ditry them, then turn
around and unmap them. some of this is because i believe the code is doing
things at the wrong time. if you want to dirty whole pages, it should be when
they go on the freelist, not immediately.

Reply | Threaded
Open this post in threaded view
|

Re: random malloc junk

Theo de Raadt-2
> Daniel Micay wrote:
> >
> > The current OpenBSD code only wipes up to MALLOC_MAXCHUNK with junk @ 1,
> > and it similarly doesn't wipe at all with 'U' (even though junk-on-free
> > also serves the purpose of preventing information leaks, not just
> > mitigating use-after-free). IMO, optimizing large allocation perf like
> > this isn't worthwhile.
>
> this requires some analysis of what programs do in the wild. some programs
> preemptively malloc large buffers, but don't touch them. it would be a serious
> reqression for free to fault in new pages, just to ditry them, then turn
> around and unmap them. some of this is because i believe the code is doing
> things at the wrong time. if you want to dirty whole pages, it should be when
> they go on the freelist, not immediately.
>

Exactly.

Daniel the giant-allocation situation may not be normal in your
ecosystem, but it is common in general purpose code.  That is why an
upper bound was chosen.

I would also argue that that gigantic allocations have far fewer
security risks, requiring them to be smashed in this way.  We defend
against those problems by unmapping them, so that the address space
becomes unavailable -> SIGSEGV.

Reply | Threaded
Open this post in threaded view
|

Re: random malloc junk

Otto Moerbeek
In reply to this post by Ted Unangst-6
On Wed, Sep 14, 2016 at 12:53:05PM -0400, Ted Unangst wrote:

> Daniel Micay wrote:
> >
> > The current OpenBSD code only wipes up to MALLOC_MAXCHUNK with junk @ 1,
> > and it similarly doesn't wipe at all with 'U' (even though junk-on-free
> > also serves the purpose of preventing information leaks, not just
> > mitigating use-after-free). IMO, optimizing large allocation perf like
> > this isn't worthwhile.
>
> this requires some analysis of what programs do in the wild. some programs
> preemptively malloc large buffers, but don't touch them. it would be a serious
> reqression for free to fault in new pages, just to ditry them, then turn
> around and unmap them. some of this is because i believe the code is doing
> things at the wrong time. if you want to dirty whole pages, it should be when
> they go on the freelist, not immediately.

Something like this?

        -Otto

Index: malloc.c
===================================================================
RCS file: /cvs/src/lib/libc/stdlib/malloc.c,v
retrieving revision 1.195
diff -u -p -r1.195 malloc.c
--- malloc.c 1 Sep 2016 10:41:02 -0000 1.195
+++ malloc.c 15 Sep 2016 06:09:28 -0000
@@ -376,6 +376,12 @@ unmap(struct dir_info *d, void *p, size_
  for (i = 0; i < mopts.malloc_cache; i++) {
  r = &d->free_regions[(i + offset) & (mopts.malloc_cache - 1)];
  if (r->p == NULL) {
+ if (mopts.malloc_junk && !mopts.malloc_freeunmap) {
+ size_t amt = mopts.malloc_junk == 1 ?
+    MALLOC_MAXCHUNK :
+    PAGEROUND(sz) - mopts.malloc_guard;
+ memset(p, SOME_FREEJUNK, amt);
+ }
  if (mopts.malloc_hint)
  madvise(p, sz, MADV_FREE);
  if (mopts.malloc_freeunmap)
@@ -1335,11 +1341,6 @@ ofree(struct dir_info *argpool, void *p)
  wrterror(pool, "mprotect", NULL);
  }
  STATS_SUB(pool->malloc_guarded, mopts.malloc_guard);
- }
- if (mopts.malloc_junk && !mopts.malloc_freeunmap) {
- size_t amt = mopts.malloc_junk == 1 ? MALLOC_MAXCHUNK :
-    PAGEROUND(sz) - mopts.malloc_guard;
- memset(p, SOME_FREEJUNK, amt);
  }
  unmap(pool, p, PAGEROUND(sz));
  delete(pool, r);


Reply | Threaded
Open this post in threaded view
|

Re: random malloc junk

Ted Unangst-6
Otto Moerbeek wrote:

> On Wed, Sep 14, 2016 at 12:53:05PM -0400, Ted Unangst wrote:
>
> > Daniel Micay wrote:
> > >
> > > The current OpenBSD code only wipes up to MALLOC_MAXCHUNK with junk @ 1,
> > > and it similarly doesn't wipe at all with 'U' (even though junk-on-free
> > > also serves the purpose of preventing information leaks, not just
> > > mitigating use-after-free). IMO, optimizing large allocation perf like
> > > this isn't worthwhile.
> >
> > this requires some analysis of what programs do in the wild. some programs
> > preemptively malloc large buffers, but don't touch them. it would be a serious
> > reqression for free to fault in new pages, just to ditry them, then turn
> > around and unmap them. some of this is because i believe the code is doing
> > things at the wrong time. if you want to dirty whole pages, it should be when
> > they go on the freelist, not immediately.
>
> Something like this?

didn't look too closely, but looks good from a distance. :)

>
> -Otto
>
> Index: malloc.c
> ===================================================================
> RCS file: /cvs/src/lib/libc/stdlib/malloc.c,v
> retrieving revision 1.195
> diff -u -p -r1.195 malloc.c
> --- malloc.c 1 Sep 2016 10:41:02 -0000 1.195
> +++ malloc.c 15 Sep 2016 06:09:28 -0000
> @@ -376,6 +376,12 @@ unmap(struct dir_info *d, void *p, size_
>   for (i = 0; i < mopts.malloc_cache; i++) {
>   r = &d->free_regions[(i + offset) & (mopts.malloc_cache - 1)];
>   if (r->p == NULL) {
> + if (mopts.malloc_junk && !mopts.malloc_freeunmap) {
> + size_t amt = mopts.malloc_junk == 1 ?
> +    MALLOC_MAXCHUNK :
> +    PAGEROUND(sz) - mopts.malloc_guard;
> + memset(p, SOME_FREEJUNK, amt);
> + }
>   if (mopts.malloc_hint)
>   madvise(p, sz, MADV_FREE);
>   if (mopts.malloc_freeunmap)
> @@ -1335,11 +1341,6 @@ ofree(struct dir_info *argpool, void *p)
>   wrterror(pool, "mprotect", NULL);
>   }
>   STATS_SUB(pool->malloc_guarded, mopts.malloc_guard);
> - }
> - if (mopts.malloc_junk && !mopts.malloc_freeunmap) {
> - size_t amt = mopts.malloc_junk == 1 ? MALLOC_MAXCHUNK :
> -    PAGEROUND(sz) - mopts.malloc_guard;
> - memset(p, SOME_FREEJUNK, amt);
>   }
>   unmap(pool, p, PAGEROUND(sz));
>   delete(pool, r);
>
>

Reply | Threaded
Open this post in threaded view
|

Re: random malloc junk

Otto Moerbeek
On Thu, Sep 15, 2016 at 10:08:26AM -0400, Ted Unangst wrote:

> Otto Moerbeek wrote:
> > On Wed, Sep 14, 2016 at 12:53:05PM -0400, Ted Unangst wrote:
> >
> > > Daniel Micay wrote:
> > > >
> > > > The current OpenBSD code only wipes up to MALLOC_MAXCHUNK with junk @ 1,
> > > > and it similarly doesn't wipe at all with 'U' (even though junk-on-free
> > > > also serves the purpose of preventing information leaks, not just
> > > > mitigating use-after-free). IMO, optimizing large allocation perf like
> > > > this isn't worthwhile.
> > >
> > > this requires some analysis of what programs do in the wild. some programs
> > > preemptively malloc large buffers, but don't touch them. it would be a serious
> > > reqression for free to fault in new pages, just to ditry them, then turn
> > > around and unmap them. some of this is because i believe the code is doing
> > > things at the wrong time. if you want to dirty whole pages, it should be when
> > > they go on the freelist, not immediately.
> >
> > Something like this?
>
> didn't look too closely, but looks good from a distance. :)
>

Sligtly better diff: it gets rid of a PAGEROUND(sz) call, since sz ==
PAGEROUND(sz) in unmap().

        -Otto

Index: malloc.c
===================================================================
RCS file: /cvs/src/lib/libc/stdlib/malloc.c,v
retrieving revision 1.195
diff -u -p -r1.195 malloc.c
--- malloc.c 1 Sep 2016 10:41:02 -0000 1.195
+++ malloc.c 16 Sep 2016 19:26:34 -0000
@@ -376,6 +376,11 @@ unmap(struct dir_info *d, void *p, size_
  for (i = 0; i < mopts.malloc_cache; i++) {
  r = &d->free_regions[(i + offset) & (mopts.malloc_cache - 1)];
  if (r->p == NULL) {
+ if (mopts.malloc_junk && !mopts.malloc_freeunmap) {
+ size_t amt = mopts.malloc_junk == 1 ?
+    MALLOC_MAXCHUNK : sz - mopts.malloc_guard;
+ memset(p, SOME_FREEJUNK, amt);
+ }
  if (mopts.malloc_hint)
  madvise(p, sz, MADV_FREE);
  if (mopts.malloc_freeunmap)
@@ -1335,11 +1340,6 @@ ofree(struct dir_info *argpool, void *p)
  wrterror(pool, "mprotect", NULL);
  }
  STATS_SUB(pool->malloc_guarded, mopts.malloc_guard);
- }
- if (mopts.malloc_junk && !mopts.malloc_freeunmap) {
- size_t amt = mopts.malloc_junk == 1 ? MALLOC_MAXCHUNK :
-    PAGEROUND(sz) - mopts.malloc_guard;
- memset(p, SOME_FREEJUNK, amt);
  }
  unmap(pool, p, PAGEROUND(sz));
  delete(pool, r);

Reply | Threaded
Open this post in threaded view
|

Re: random malloc junk

Otto Moerbeek
On Fri, Sep 16, 2016 at 09:30:15PM +0200, Otto Moerbeek wrote:

> On Thu, Sep 15, 2016 at 10:08:26AM -0400, Ted Unangst wrote:
>
> > Otto Moerbeek wrote:
> > > On Wed, Sep 14, 2016 at 12:53:05PM -0400, Ted Unangst wrote:
> > >
> > > > Daniel Micay wrote:
> > > > >
> > > > > The current OpenBSD code only wipes up to MALLOC_MAXCHUNK with junk @ 1,
> > > > > and it similarly doesn't wipe at all with 'U' (even though junk-on-free
> > > > > also serves the purpose of preventing information leaks, not just
> > > > > mitigating use-after-free). IMO, optimizing large allocation perf like
> > > > > this isn't worthwhile.
> > > >
> > > > this requires some analysis of what programs do in the wild. some programs
> > > > preemptively malloc large buffers, but don't touch them. it would be a serious
> > > > reqression for free to fault in new pages, just to ditry them, then turn
> > > > around and unmap them. some of this is because i believe the code is doing
> > > > things at the wrong time. if you want to dirty whole pages, it should be when
> > > > they go on the freelist, not immediately.
> > >
> > > Something like this?
> >
> > didn't look too closely, but looks good from a distance. :)
> >
>
> Sligtly better diff: it gets rid of a PAGEROUND(sz) call, since sz ==
> PAGEROUND(sz) in unmap().

Actually, I think the  - mopts.malloc_guard can go as well. We're
dealing with unguarded regions at this spot.

>
> -Otto
>
> Index: malloc.c
> ===================================================================
> RCS file: /cvs/src/lib/libc/stdlib/malloc.c,v
> retrieving revision 1.195
> diff -u -p -r1.195 malloc.c
> --- malloc.c 1 Sep 2016 10:41:02 -0000 1.195
> +++ malloc.c 16 Sep 2016 19:26:34 -0000
> @@ -376,6 +376,11 @@ unmap(struct dir_info *d, void *p, size_
>   for (i = 0; i < mopts.malloc_cache; i++) {
>   r = &d->free_regions[(i + offset) & (mopts.malloc_cache - 1)];
>   if (r->p == NULL) {
> + if (mopts.malloc_junk && !mopts.malloc_freeunmap) {
> + size_t amt = mopts.malloc_junk == 1 ?
> +    MALLOC_MAXCHUNK : sz - mopts.malloc_guard;
> + memset(p, SOME_FREEJUNK, amt);
> + }
>   if (mopts.malloc_hint)
>   madvise(p, sz, MADV_FREE);
>   if (mopts.malloc_freeunmap)
> @@ -1335,11 +1340,6 @@ ofree(struct dir_info *argpool, void *p)
>   wrterror(pool, "mprotect", NULL);
>   }
>   STATS_SUB(pool->malloc_guarded, mopts.malloc_guard);
> - }
> - if (mopts.malloc_junk && !mopts.malloc_freeunmap) {
> - size_t amt = mopts.malloc_junk == 1 ? MALLOC_MAXCHUNK :
> -    PAGEROUND(sz) - mopts.malloc_guard;
> - memset(p, SOME_FREEJUNK, amt);
>   }
>   unmap(pool, p, PAGEROUND(sz));
>   delete(pool, r);

Reply | Threaded
Open this post in threaded view
|

Re: random malloc junk

Otto Moerbeek
In reply to this post by Theo Buehler
On Wed, Sep 14, 2016 at 05:41:55AM +0200, Theo Buehler wrote:

> On Tue, Sep 13, 2016 at 01:29:17PM +0200, Otto Moerbeek wrote:
> > As often, real life came in between. Did anybody do measurements? I
> > really would like to to see hard data.
>
> It seems that the price is relatively modest.
>
> I ran several 'make build's:
>
> 3rd gen X1, i7 5500U, 2.4GHz (amd64):
> no malloc.conf, malloc.c r1.195:
>     1525.04 real      2505.98 user      1362.47 sys
> no malloc.conf, malloc.c + diff:
>     1532.15 real      2540.63 user      1356.98 sys
>
> for comparison:
> malloc.conf -> J, malloc.c + diff:
>     1554.76 real      2596.40 user      1353.26 sys
>
> Acer Aspire 5633WLMi, Core 2 Duo 1.66 GHz (i386):
> no malloc.conf, malloc.c, r1.195:
>     5020.77 real      5725.21 user      1609.28 sys
> no malloc.conf, malloc.c + diff:
>     5088.07 real      5865.80 user      1572.12 sys

Did some more thinking about this. Since this diff interferes with the
effort to move canary bytes closer to the end of the requested size
(See tedu's diff from December
https://marc.info/?l=openbsd-tech&m=144966528402282&w=2), I like to
revisit that diff and fix it, and then see if and how we can to random
junking.

tb@ pointed out at least realloc is broken with that diff. I have to
go and see if I can spot the actual problem.

        -Otto