sort(1) without arguments segfaults and sorts wrong

classic Classic list List threaded Threaded
2 messages Options
Reply | Threaded
Open this post in threaded view
|

sort(1) without arguments segfaults and sorts wrong

Paul Stoeber-2
Run
  perl -e '$x="x"x530000 ."\n";print $x,$x' | sort
or
  perl -e '$x="x"x530000 ."\n";print $x,$x' | sort -H
and collect the core file.  This happens on the
  -rw-r--r--  1 0  122  238186496 Nov 26 04:16 install44.iso
snapshot that includes the
  4.4 GENERIC#1511 i386
kernel.

Here's a replacement for sort(1) without arguments that doesn't
have this problem.

The goo outside of main() is for avoiding iterated use of realloc().
Memory is greedily allocated with mmap().  setrlimit() is used to
avoid an uncomfortably large core file.  err() is not used because
it would use malloc().

On i386, the pages returned by mmap(...MAP_ANON...) aren't provided
by the system until they're accessed.  If this isn't true on all
architectures, then this program is no good.

-- BEGIN rsort.c --

/*
 * Copyright (c) 2008 Paul Stoeber
 *
 * Permission to use, copy, modify, and distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 */

/*
 * rsort - sort lines with radixsort(3)
 *
 * usage: rsort
 *
 * Reads from stdin.  Writes to stdout.  Data must fit in memory.
 */

#include <sys/types.h>
#include <sys/mman.h>
#include <sys/resource.h>
#include <sys/stat.h>
#include <sys/uio.h>

#include <errno.h>
#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>

extern char *__progname;

#define CK_(x, e) do { if (!(x)) \
        fail(": " __FILE__ ":", __LINE__, ":!(" #x "): ", e); } while (0)
#define CK(x) CK_(x, NULL)

void
fail(char *s1, unsigned int n, char *s2, char *e)
{
        char buf[11], *p;
        struct iovec v[6];
        int i = 0;

        p = &buf[sizeof(buf) - 1];
        *p-- = 0;
        *p = '0';
        if (n != 0) {
                p++;
                while (n != 0 && p > buf) {
                        *--p = '0' + n % 10;
                        n /= 10;
                }
        }
#define X(s) v[i].iov_base = s; v[i].iov_len = strlen(v[i].iov_base); i++
        X(__progname);
        X(s1);
        X(p);
        X(s2);
        if (e == NULL) {
                X(strerror(errno));
        } else {
                X(e);
        }
        X("\n");
#undef X
        writev(STDERR_FILENO, v, i);
        exit(1);
}

#define MMAP(n) mmap(NULL, n, PROT_READ | PROT_WRITE, \
        MAP_ANON | MAP_PRIVATE, -1, 0)

size_t pagesize;
struct rlimit rlimit_core;

struct mem {
        void *addr;
        size_t len;
};

void
alloc_max(struct mem *m)
{
        static int init = 1;
        void *p;
        size_t a, b, c;
        struct rlimit r;

        if (init) {
                init = 0;
                pagesize = getpagesize();
                CK(getrlimit(RLIMIT_CORE, &rlimit_core) == 0);
        }
        r = rlimit_core;
        r.rlim_cur = 0;
        CK(setrlimit(RLIMIT_CORE, &r) == 0);
        a = SIZE_MAX/pagesize;
        b = a + 1;
        for (;;) {
                p = MMAP(a*pagesize);
                if (p != MAP_FAILED)
                        break;
                b = a;
                a /= 2;
                CK_(a != 0, "Can't mmap anything");
        }
        while (a + 1 < b) {
                c = a + (b - a)/2;
                if (p != MAP_FAILED)
                        CK(munmap(p, a*pagesize) == 0);
                p = MMAP(c*pagesize);
                if (p != MAP_FAILED)
                        a = c;
                else
                        b = c;
        }
        if (p == MAP_FAILED)
                CK((p = MMAP(a*pagesize)) != MAP_FAILED);
        m->addr = p;
        m->len = a*pagesize;
}

void
free_tail(struct mem *m, void *p)
{
        size_t d, r;

        d = (char *)p - (char *)m->addr;
        r = d % pagesize;
        if (r != 0)
                d += pagesize - r;
        CK(munmap((char *)m->addr + d, m->len - d) == 0);
        m->len = d;
        CK(setrlimit(RLIMIT_CORE, &rlimit_core) == 0);
}

void
slurp(int fd, struct mem *m)
{
        char *buf;
        size_t len, rest;
        ssize_t r;
        off_t o, len_;
        struct stat st;

        CK(fstat(fd, &st) == 0);
        if (S_ISREG(st.st_mode)) {
                CK((o = lseek(fd, 0, SEEK_CUR)) != -1);
                len_ = st.st_size - o;
                len = len_;
                CK_((off_t)len == len_, "File too large");
                CK((buf = mmap(NULL, len, PROT_READ, 0, fd, o)) != MAP_FAILED);
                m->addr = buf;
        } else {
                alloc_max(m);
                buf = m->addr;
                len = 0;
                rest = m->len;
                for (;;) {
                        CK((r = read(fd, buf + len, rest)) != -1);
                        if (r == 0)
                                break;
                        len += (size_t)r;
                        rest -= (size_t)r;
                        CK_(rest != 0, "Not enough memory");
                }
                free_tail(m, buf + len);
        }
        m->len = len;
}

int
main(int argc, char **argv)
{
        u_char nl = '\n';
        const u_char **base, **ptr, *buf, *p, *q;
        size_t len, rest;
        struct mem m;

        CK_(argc == 1, "No args please");

        slurp(STDIN_FILENO, &m);
        buf = m.addr;
        len = m.len;
        CK_(len == 0 || buf[len - 1] == nl, "Missing final newline");

        alloc_max(&m);
        base = ptr = m.addr;
        rest = m.len/sizeof(u_char *);
        for (q = buf, p = q; q < buf + len; q++) {
                if (*q == nl) {
                        CK_(rest != 0, "Not enough memory");
                        *ptr = p;
                        p = q + 1;
                        ptr++;
                        rest--;
                }
        }
        free_tail(&m, ptr);

        CK_(ptr - base <= (size_t)INT_MAX, "Fix radixsort(3)");
        CK(radixsort(base, (int)(ptr - base), NULL, nl) == 0);

        while (base < ptr) {
                p = *base;
                do
                        putchar(*p);
                while (*p++ != nl);
                base++;
        }

        return 0;
}

-- END rsort.c --

Here's an example of sort(1) without arguments silently sorting wrong.
Pipe it into
  b64decode -p | gunzip | sort | sort -c

begin-base64 644 -
H4sIAAAAAAACA+3dzXLaMBQG0DV6Cmay66bdZJVtX6YzfQOP3r2hAXMlm58AA5J9zrQEmq7w/eQr
IZuUAC775S1wpNgZDk/y/nkO/7Z/kcNLtQJGzdWPmtm7gAi+uG8JXQvI5ctTudk9jvOG2cd6WqEC
MD6gprijG5xZuYrP8/gCKUApApK4iu4oTVqjz59/wnQ8xbl5+KG4mCupfHKhp2y0NdxKHeQIVcC+
fyj7zKppqJoILYSQgHgwmYcW4qQz/O1+KQMBbMdx6efz4WN8lmzGkxpQlCCS3D+1GRfIcuy8Ulga
C5ObsSsbNGGSBGLB5TNMPJkcP4LZ7v4UJ5iwy8MZRgJBLHi0XL0YZh6UHCh07h1jQ4cfF5uGcgIQ
93uf2PGNwIKi507Fhq3q1pTXnaSOy1jOT0g1CERbZ7g0Xeo6nLsWtcylkjBEcp0fZwfMekbwcRhF
tfnKutMmYDOp6uqGqenCHVXjJo3ipRVaWmt0xw8a4g7WbbUF6VDOHd6LyOiJbgGQyzX1Nrlq4rcp
di7uQ4CYgkg4fHzPdqbxiK1E3BwTLhCuv3HJaqBAgbInzd5kItzCKxdT23Vee4U4oiqeyhiLvKEw
UB4gBX0Z6n62/IrmMxtZZ/57+N0QP2W3ni0JvUWh/k6KIZ3/KqTToQn/KU9iVqSmzIm8CFbfJvut
6iJ31b8MAhLWZcPok1b12dHkJs9MQg5N2cx1qi1UtgJ5vJ+XiqSavk4v6de4G7EB0er0ZOfw9T3v
KruRsWXZ3aNlk+ZX7htpadUhCB43jPzmnYhoR8qLXKuHokVDhJzhyq9Q624DnepAyVB49xY4Whg4
QToBYUTxAGKJUgXF9BJfu0TtL0HAQFJXrrzPdKquF1UlBFkxgFJe1AkwnxvvytNiXsLIqK6e4++p
0rD0gNA2OROauTuO61aQz0ZsvAVK7+nnBYO/3AB3ccGRA4WTFggmIIK9+O0tEBJAIkFEFuj4HTDZ
kQYjKbfIjvcaj3Ze3dXFCguF1q2tt0BauMqbtwDpbJ/LBJT1ggzVJFMZQL/sr3aYcKIBsQTEr2l2
vyNoII/QBQtYDhDOJiCRgNA1wGoiMgaiyOpY9nGcMFqDXAIC2DqLdqICCCQIB99hNdGBwjAIggmI
YCcs/goLIJEgHTyflUlHCWMoSCUgfM2yaCwhgDiCkIBMLF7++s7C/z+OD8dfInGg3Lnl5OKgLsKg
I1DHy6vpHB73tZ1DtZdNstpX2T2N1eurWIX1eG/eAgQU1DU3s9Xc4cHYCuIIiNtL2OyvuAE5RDGB
+m7AZJPNZnxm8w0AALTpHziJx1BSzAEA
====

Reply | Threaded
Open this post in threaded view
|

Re: sort(1) without arguments segfaults and sorts wrong

Paul Stoeber-2
A simpler wrong-sorting example:
  perl -e 'print "\n"x117000,"x\n"' | sort | sort -c