grep(1) -m support

Previous Topic Next Topic
 
classic Classic list List threaded Threaded
12 messages Options
Reply | Threaded
Open this post in threaded view
|

grep(1) -m support

Paul Irofti-4
Hi,

The -m, or --max-count, option allows the user to stop grep(1)
processing after a given number of matches are found.
If you use constructs such as

  $ grep foo foo.txt | head -n1

you will want to use the much faster version

  $ grep -m1 foo foo.txt

This option already exists in at least Linux, FreeBSD and NetBSD.

For example, I need grep -m for vimtex (see Issue #1018 on GitHub[0]).
But, if I had known that this exists, I would have used it in the
past with other scripts.

The following diff is adapted from FreeBSD and I tested it with vimtex
and a few hand-crafted examples. We could probably do some tricks with
mcount and mlimit, but I do not think it is worth it. OK?

[0] -- https://github.com/lervag/vimtex/issues/1018

Index: grep.c
===================================================================
RCS file: /cvs/src/usr.bin/grep/grep.c,v
retrieving revision 1.55
diff -u -p -u -p -r1.55 grep.c
--- grep.c 28 Nov 2015 01:17:12 -0000 1.55
+++ grep.c 7 Dec 2017 15:23:00 -0000
@@ -71,6 +71,9 @@ int cflag; /* -c: only show a count of
 int hflag; /* -h: don't print filename headers */
 int iflag; /* -i: ignore case */
 int lflag; /* -l: only show names of files with matches */
+int mflag; /* -m x: stop reading the files after x matches */
+long long mcount; /* count for -m */
+long long mlimit; /* requested value for -m */
 int nflag; /* -n: show line numbers in front of matching lines */
 int oflag; /* -o: print each match */
 int qflag; /* -q: quiet mode (don't output anything) */
@@ -107,9 +110,9 @@ usage(void)
 {
  fprintf(stderr,
 #ifdef NOZ
-    "usage: %s [-abcEFGHhIiLlnoqRsUVvwx] [-A num] [-B num] [-C[num]]\n"
+    "usage: %s [-abcEFGHhIiLlmnoqRsUVvwx] [-A num] [-B num] [-C[num]]\n"
 #else
-    "usage: %s [-abcEFGHhIiLlnoqRsUVvwxZ] [-A num] [-B num] [-C[num]]\n"
+    "usage: %s [-abcEFGHhIiLlmnoqRsUVvwxZ] [-A num] [-B num] [-C[num]]\n"
 #endif
     "\t[-e pattern] [-f file] [--binary-files=value] [--context[=num]]\n"
     "\t[--line-buffered] [pattern] [file ...]\n", __progname);
@@ -117,9 +120,9 @@ usage(void)
 }
 
 #ifdef NOZ
-static const char optstr[] = "0123456789A:B:CEFGHILRUVabce:f:hilnoqrsuvwxy";
+static const char optstr[] = "0123456789A:B:CEFGHILRUVabce:f:hilm:noqrsuvwxy";
 #else
-static const char optstr[] = "0123456789A:B:CEFGHILRUVZabce:f:hilnoqrsuvwxy";
+static const char optstr[] = "0123456789A:B:CEFGHILRUVZabce:f:hilm:noqrsuvwxy";
 #endif
 
 static const struct option long_options[] =
@@ -147,6 +150,7 @@ static const struct option long_options[
  {"ignore-case", no_argument, NULL, 'i'},
  {"files-without-match", no_argument, NULL, 'L'},
  {"files-with-matches", no_argument, NULL, 'l'},
+ {"max-count", required_argument, NULL, 'm'},
  {"line-number", no_argument, NULL, 'n'},
  {"quiet", no_argument, NULL, 'q'},
  {"silent", no_argument, NULL, 'q'},
@@ -375,6 +379,14 @@ main(int argc, char *argv[])
  case 'l':
  Lflag = 0;
  lflag = qflag = 1;
+ break;
+ case 'm':
+ mflag = 1;
+ errno = 0;
+ mlimit = mcount = strtonum(optarg, 1, LLONG_MAX,
+   &errstr);
+ if (errstr != NULL)
+ errx(2, "mcount %s", errstr);
  break;
  case 'n':
  nflag = 1;
Index: grep.h
===================================================================
RCS file: /cvs/src/usr.bin/grep/grep.h,v
retrieving revision 1.24
diff -u -p -u -p -r1.24 grep.h
--- grep.h 14 Dec 2015 20:02:07 -0000 1.24
+++ grep.h 7 Dec 2017 15:23:00 -0000
@@ -66,14 +66,17 @@ extern int cflags, eflags;
 /* Command line flags */
 extern int Aflag, Bflag, Eflag, Fflag, Hflag, Lflag,
  Rflag, Zflag,
- bflag, cflag, hflag, iflag, lflag, nflag, oflag, qflag, sflag,
- vflag, wflag, xflag;
+ bflag, cflag, hflag, iflag, lflag, mflag, nflag, oflag, qflag,
+ sflag, vflag, wflag, xflag;
 extern int binbehave;
 
 extern int first, matchall, patterns, tail, file_err;
 extern char    **pattern;
 extern fastgrep_t *fg_pattern;
 extern regex_t *r_pattern;
+
+/* For -m max-count */
+extern long long mcount, mlimit;
 
 /* For regex errors  */
 #define RE_ERROR_BUF 512
Index: util.c
===================================================================
RCS file: /cvs/src/usr.bin/grep/util.c,v
retrieving revision 1.57
diff -u -p -u -p -r1.57 util.c
--- util.c 3 Apr 2017 16:18:35 -0000 1.57
+++ util.c 7 Dec 2017 15:23:00 -0000
@@ -97,6 +97,8 @@ procfile(char *fn)
  file_t *f;
  int c, t, z, nottext;
 
+ mcount = mlimit;
+
  if (fn == NULL) {
  fn = "(standard input)";
  f = grep_fdopen(STDIN_FILENO, "r");
@@ -140,6 +142,8 @@ procfile(char *fn)
  linesqueued++;
  }
  c += t;
+ if (mflag && mcount <= 0)
+ break;
  }
  if (Bflag > 0)
  clearqueue();
@@ -223,6 +227,10 @@ redo:
 print:
  if (vflag)
  c = !c;
+
+ /* Count the matches if we have a match limit */
+ if (mflag)
+ mcount -= c;
 
  if (c && binbehave == BIN_FILE_BIN && nottext)
  return c; /* Binary file */

Reply | Threaded
Open this post in threaded view
|

Re: grep(1) -m support

Paul Irofti-4
On Thu, Dec 07, 2017 at 05:36:59PM +0200, Paul Irofti wrote:

> Hi,
>
> The -m, or --max-count, option allows the user to stop grep(1)
> processing after a given number of matches are found.
> If you use constructs such as
>
>   $ grep foo foo.txt | head -n1
>
> you will want to use the much faster version
>
>   $ grep -m1 foo foo.txt
>
> This option already exists in at least Linux, FreeBSD and NetBSD.
>
> For example, I need grep -m for vimtex (see Issue #1018 on GitHub[0]).
> But, if I had known that this exists, I would have used it in the
> past with other scripts.
>
> The following diff is adapted from FreeBSD and I tested it with vimtex
> and a few hand-crafted examples. We could probably do some tricks with
> mcount and mlimit, but I do not think it is worth it. OK?
>
> [0] -- https://github.com/lervag/vimtex/issues/1018

Here is a new diff that corrects usage to show that -m expects an
argument (as suggested by deraadt@) and also adds the manpage bits.

Index: grep.1
===================================================================
RCS file: /cvs/src/usr.bin/grep/grep.1,v
retrieving revision 1.43
diff -u -p -u -p -r1.43 grep.1
--- grep.1 13 Jan 2015 04:45:34 -0000 1.43
+++ grep.1 7 Dec 2017 18:00:37 -0000
@@ -44,6 +44,7 @@
 .Op Fl C Ns Op Ar num
 .Op Fl e Ar pattern
 .Op Fl f Ar file
+.Op Fl m Ar num
 .Op Fl -binary-files Ns = Ns Ar value
 .Op Fl -context Ns Op = Ns Ar num
 .Op Fl -line-buffered
@@ -216,6 +217,10 @@ Pathnames are listed once per file searc
 If the standard input is searched, the string
 .Dq (standard input)
 is written.
+.It Fl m Ar num
+Stop after
+.Ar num
+matches.
 .It Fl n
 Each output line is preceded by its relative line number in the file,
 starting at line 1.
Index: grep.c
===================================================================
RCS file: /cvs/src/usr.bin/grep/grep.c,v
retrieving revision 1.55
diff -u -p -u -p -r1.55 grep.c
--- grep.c 28 Nov 2015 01:17:12 -0000 1.55
+++ grep.c 7 Dec 2017 18:00:38 -0000
@@ -71,6 +71,9 @@ int cflag; /* -c: only show a count of
 int hflag; /* -h: don't print filename headers */
 int iflag; /* -i: ignore case */
 int lflag; /* -l: only show names of files with matches */
+int mflag; /* -m x: stop reading the files after x matches */
+long long mcount; /* count for -m */
+long long mlimit; /* requested value for -m */
 int nflag; /* -n: show line numbers in front of matching lines */
 int oflag; /* -o: print each match */
 int qflag; /* -q: quiet mode (don't output anything) */
@@ -111,15 +114,16 @@ usage(void)
 #else
     "usage: %s [-abcEFGHhIiLlnoqRsUVvwxZ] [-A num] [-B num] [-C[num]]\n"
 #endif
-    "\t[-e pattern] [-f file] [--binary-files=value] [--context[=num]]\n"
-    "\t[--line-buffered] [pattern] [file ...]\n", __progname);
+    "\t[-e pattern] [-f file] [-m num] [--binary-files=value]\n"
+    "\t[--context[=num]] [--line-buffered] [pattern] [file ...]\n",
+    __progname);
  exit(2);
 }
 
 #ifdef NOZ
-static const char optstr[] = "0123456789A:B:CEFGHILRUVabce:f:hilnoqrsuvwxy";
+static const char optstr[] = "0123456789A:B:CEFGHILRUVabce:f:hilm:noqrsuvwxy";
 #else
-static const char optstr[] = "0123456789A:B:CEFGHILRUVZabce:f:hilnoqrsuvwxy";
+static const char optstr[] = "0123456789A:B:CEFGHILRUVZabce:f:hilm:noqrsuvwxy";
 #endif
 
 static const struct option long_options[] =
@@ -147,6 +151,7 @@ static const struct option long_options[
  {"ignore-case", no_argument, NULL, 'i'},
  {"files-without-match", no_argument, NULL, 'L'},
  {"files-with-matches", no_argument, NULL, 'l'},
+ {"max-count", required_argument, NULL, 'm'},
  {"line-number", no_argument, NULL, 'n'},
  {"quiet", no_argument, NULL, 'q'},
  {"silent", no_argument, NULL, 'q'},
@@ -375,6 +380,14 @@ main(int argc, char *argv[])
  case 'l':
  Lflag = 0;
  lflag = qflag = 1;
+ break;
+ case 'm':
+ mflag = 1;
+ errno = 0;
+ mlimit = mcount = strtonum(optarg, 1, LLONG_MAX,
+   &errstr);
+ if (errstr != NULL)
+ errx(2, "mcount %s", errstr);
  break;
  case 'n':
  nflag = 1;
Index: grep.h
===================================================================
RCS file: /cvs/src/usr.bin/grep/grep.h,v
retrieving revision 1.24
diff -u -p -u -p -r1.24 grep.h
--- grep.h 14 Dec 2015 20:02:07 -0000 1.24
+++ grep.h 7 Dec 2017 18:00:38 -0000
@@ -66,14 +66,17 @@ extern int cflags, eflags;
 /* Command line flags */
 extern int Aflag, Bflag, Eflag, Fflag, Hflag, Lflag,
  Rflag, Zflag,
- bflag, cflag, hflag, iflag, lflag, nflag, oflag, qflag, sflag,
- vflag, wflag, xflag;
+ bflag, cflag, hflag, iflag, lflag, mflag, nflag, oflag, qflag,
+ sflag, vflag, wflag, xflag;
 extern int binbehave;
 
 extern int first, matchall, patterns, tail, file_err;
 extern char    **pattern;
 extern fastgrep_t *fg_pattern;
 extern regex_t *r_pattern;
+
+/* For -m max-count */
+extern long long mcount, mlimit;
 
 /* For regex errors  */
 #define RE_ERROR_BUF 512
Index: util.c
===================================================================
RCS file: /cvs/src/usr.bin/grep/util.c,v
retrieving revision 1.57
diff -u -p -u -p -r1.57 util.c
--- util.c 3 Apr 2017 16:18:35 -0000 1.57
+++ util.c 7 Dec 2017 18:00:38 -0000
@@ -97,6 +97,8 @@ procfile(char *fn)
  file_t *f;
  int c, t, z, nottext;
 
+ mcount = mlimit;
+
  if (fn == NULL) {
  fn = "(standard input)";
  f = grep_fdopen(STDIN_FILENO, "r");
@@ -140,6 +142,8 @@ procfile(char *fn)
  linesqueued++;
  }
  c += t;
+ if (mflag && mcount <= 0)
+ break;
  }
  if (Bflag > 0)
  clearqueue();
@@ -223,6 +227,10 @@ redo:
 print:
  if (vflag)
  c = !c;
+
+ /* Count the matches if we have a match limit */
+ if (mflag)
+ mcount -= c;
 
  if (c && binbehave == BIN_FILE_BIN && nottext)
  return c; /* Binary file */

Reply | Threaded
Open this post in threaded view
|

Re: grep(1) -m support

Theo de Raadt-2
seems fairly simple to me, but a few comments

- Could mflag be removed and replaced by overloading mcount = -1
  as the no -m condition

- What do other systems do with "-m 0"

> +                     errno = 0;
> +                     mlimit = mcount = strtonum(optarg, 1, LLONG_MAX,
- you should not touch errno like that, this isn't the insane strtol
  interface

> +                             errx(2, "mcount %s", errstr);
-  that is a poor error message


> On Thu, Dec 07, 2017 at 05:36:59PM +0200, Paul Irofti wrote:
> > Hi,
> >
> > The -m, or --max-count, option allows the user to stop grep(1)
> > processing after a given number of matches are found.
> > If you use constructs such as
> >
> >   $ grep foo foo.txt | head -n1
> >
> > you will want to use the much faster version
> >
> >   $ grep -m1 foo foo.txt
> >
> > This option already exists in at least Linux, FreeBSD and NetBSD.
> >
> > For example, I need grep -m for vimtex (see Issue #1018 on GitHub[0]).
> > But, if I had known that this exists, I would have used it in the
> > past with other scripts.
> >
> > The following diff is adapted from FreeBSD and I tested it with vimtex
> > and a few hand-crafted examples. We could probably do some tricks with
> > mcount and mlimit, but I do not think it is worth it. OK?
> >
> > [0] -- https://github.com/lervag/vimtex/issues/1018
>
> Here is a new diff that corrects usage to show that -m expects an
> argument (as suggested by deraadt@) and also adds the manpage bits.
>
> Index: grep.1
> ===================================================================
> RCS file: /cvs/src/usr.bin/grep/grep.1,v
> retrieving revision 1.43
> diff -u -p -u -p -r1.43 grep.1
> --- grep.1 13 Jan 2015 04:45:34 -0000 1.43
> +++ grep.1 7 Dec 2017 18:00:37 -0000
> @@ -44,6 +44,7 @@
>  .Op Fl C Ns Op Ar num
>  .Op Fl e Ar pattern
>  .Op Fl f Ar file
> +.Op Fl m Ar num
>  .Op Fl -binary-files Ns = Ns Ar value
>  .Op Fl -context Ns Op = Ns Ar num
>  .Op Fl -line-buffered
> @@ -216,6 +217,10 @@ Pathnames are listed once per file searc
>  If the standard input is searched, the string
>  .Dq (standard input)
>  is written.
> +.It Fl m Ar num
> +Stop after
> +.Ar num
> +matches.
>  .It Fl n
>  Each output line is preceded by its relative line number in the file,
>  starting at line 1.
> Index: grep.c
> ===================================================================
> RCS file: /cvs/src/usr.bin/grep/grep.c,v
> retrieving revision 1.55
> diff -u -p -u -p -r1.55 grep.c
> --- grep.c 28 Nov 2015 01:17:12 -0000 1.55
> +++ grep.c 7 Dec 2017 18:00:38 -0000
> @@ -71,6 +71,9 @@ int cflag; /* -c: only show a count of
>  int hflag; /* -h: don't print filename headers */
>  int iflag; /* -i: ignore case */
>  int lflag; /* -l: only show names of files with matches */
> +int mflag; /* -m x: stop reading the files after x matches */
> +long long mcount; /* count for -m */
> +long long mlimit; /* requested value for -m */
>  int nflag; /* -n: show line numbers in front of matching lines */
>  int oflag; /* -o: print each match */
>  int qflag; /* -q: quiet mode (don't output anything) */
> @@ -111,15 +114,16 @@ usage(void)
>  #else
>      "usage: %s [-abcEFGHhIiLlnoqRsUVvwxZ] [-A num] [-B num] [-C[num]]\n"
>  #endif
> -    "\t[-e pattern] [-f file] [--binary-files=value] [--context[=num]]\n"
> -    "\t[--line-buffered] [pattern] [file ...]\n", __progname);
> +    "\t[-e pattern] [-f file] [-m num] [--binary-files=value]\n"
> +    "\t[--context[=num]] [--line-buffered] [pattern] [file ...]\n",
> +    __progname);
>   exit(2);
>  }
>  
>  #ifdef NOZ
> -static const char optstr[] = "0123456789A:B:CEFGHILRUVabce:f:hilnoqrsuvwxy";
> +static const char optstr[] = "0123456789A:B:CEFGHILRUVabce:f:hilm:noqrsuvwxy";
>  #else
> -static const char optstr[] = "0123456789A:B:CEFGHILRUVZabce:f:hilnoqrsuvwxy";
> +static const char optstr[] = "0123456789A:B:CEFGHILRUVZabce:f:hilm:noqrsuvwxy";
>  #endif
>  
>  static const struct option long_options[] =
> @@ -147,6 +151,7 @@ static const struct option long_options[
>   {"ignore-case", no_argument, NULL, 'i'},
>   {"files-without-match", no_argument, NULL, 'L'},
>   {"files-with-matches", no_argument, NULL, 'l'},
> + {"max-count", required_argument, NULL, 'm'},
>   {"line-number", no_argument, NULL, 'n'},
>   {"quiet", no_argument, NULL, 'q'},
>   {"silent", no_argument, NULL, 'q'},
> @@ -375,6 +380,14 @@ main(int argc, char *argv[])
>   case 'l':
>   Lflag = 0;
>   lflag = qflag = 1;
> + break;
> + case 'm':
> + mflag = 1;
> + errno = 0;
> + mlimit = mcount = strtonum(optarg, 1, LLONG_MAX,
> +   &errstr);
> + if (errstr != NULL)
> + errx(2, "mcount %s", errstr);
>   break;
>   case 'n':
>   nflag = 1;
> Index: grep.h
> ===================================================================
> RCS file: /cvs/src/usr.bin/grep/grep.h,v
> retrieving revision 1.24
> diff -u -p -u -p -r1.24 grep.h
> --- grep.h 14 Dec 2015 20:02:07 -0000 1.24
> +++ grep.h 7 Dec 2017 18:00:38 -0000
> @@ -66,14 +66,17 @@ extern int cflags, eflags;
>  /* Command line flags */
>  extern int Aflag, Bflag, Eflag, Fflag, Hflag, Lflag,
>   Rflag, Zflag,
> - bflag, cflag, hflag, iflag, lflag, nflag, oflag, qflag, sflag,
> - vflag, wflag, xflag;
> + bflag, cflag, hflag, iflag, lflag, mflag, nflag, oflag, qflag,
> + sflag, vflag, wflag, xflag;
>  extern int binbehave;
>  
>  extern int first, matchall, patterns, tail, file_err;
>  extern char    **pattern;
>  extern fastgrep_t *fg_pattern;
>  extern regex_t *r_pattern;
> +
> +/* For -m max-count */
> +extern long long mcount, mlimit;
>  
>  /* For regex errors  */
>  #define RE_ERROR_BUF 512
> Index: util.c
> ===================================================================
> RCS file: /cvs/src/usr.bin/grep/util.c,v
> retrieving revision 1.57
> diff -u -p -u -p -r1.57 util.c
> --- util.c 3 Apr 2017 16:18:35 -0000 1.57
> +++ util.c 7 Dec 2017 18:00:38 -0000
> @@ -97,6 +97,8 @@ procfile(char *fn)
>   file_t *f;
>   int c, t, z, nottext;
>  
> + mcount = mlimit;
> +
>   if (fn == NULL) {
>   fn = "(standard input)";
>   f = grep_fdopen(STDIN_FILENO, "r");
> @@ -140,6 +142,8 @@ procfile(char *fn)
>   linesqueued++;
>   }
>   c += t;
> + if (mflag && mcount <= 0)
> + break;
>   }
>   if (Bflag > 0)
>   clearqueue();
> @@ -223,6 +227,10 @@ redo:
>  print:
>   if (vflag)
>   c = !c;
> +
> + /* Count the matches if we have a match limit */
> + if (mflag)
> + mcount -= c;
>  
>   if (c && binbehave == BIN_FILE_BIN && nottext)
>   return c; /* Binary file */

Reply | Threaded
Open this post in threaded view
|

Re: grep(1) -m support

Paul Irofti-4
On Thu, Dec 07, 2017 at 11:23:51AM -0700, Theo de Raadt wrote:
> seems fairly simple to me, but a few comments
>
> - Could mflag be removed and replaced by overloading mcount = -1
>   as the no -m condition
>
> - What do other systems do with "-m 0"

GNU grep is kind of random
%-----------------------------------------------------------
root@roslunar:~# echo foo > foo
root@roslunar:~# grep -m-1 foo foo
foo
root@roslunar:~# grep -m0 foo foo                                              
root@roslunar:~# grep -m5 foo foo                                              
foo
%-----------------------------------------------------------
although it uses signed ints
%-----------------------------------------------------------
switch (xstrtoimax (optarg, 0, 10, &max_count, ""))
  {
  case LONGINT_OK:
  case LONGINT_OVERFLOW:
    break;

  default:
    die (EXIT_TROUBLE, 0, _("invalid max count"));
  }
...
if ((max_count == 0
     || (keycc == 0 && out_invert && !match_lines && !match_words))
    && list_files != LISTFILES_NONMATCHING)
  return EXIT_FAILURE;
%-----------------------------------------------------------

FreeBSD handles 0 with error,
%-----------------------------------------------------------
mlimit = mcount = strtoll(optarg, &ep, 10);
if (((errno == ERANGE) && (mcount == LLONG_MAX)) ||
    ((errno == EINVAL) && (mcount == 0)))
        err(2, NULL);
else if (ep[0] != '\0') {
        errno = EINVAL;
        err(2, NULL);
}
%-----------------------------------------------------------
but I think it accepts negative numbers and treats them as -m1
%-----------------------------------------------------------
/* Count the matches if we have a match limit */
if (t == 0 && mflag) {
        --mcount;
        if (mflag && mcount <= 0)
                break;
}
%-----------------------------------------------------------

NetBSD is the only implementation that I have found that uses unsigned
ints and it also errors out on -m0.
%-----------------------------------------------------------
mcount = strtoull(optarg, &ep, 10);
if (((errno == ERANGE) && (mcount == ULLONG_MAX)) ||
    ((errno == EINVAL) && (mcount == 0)))
        err(2, NULL);
else if (ep[0] != '\0') {
        errno = EINVAL;
        err(2, NULL);
}
%-----------------------------------------------------------

So I think we can do whatever, but I would prefer the current
implementation.

>
> > +                     errno = 0;
> > +                     mlimit = mcount = strtonum(optarg, 1, LLONG_MAX,
> - you should not touch errno like that, this isn't the insane strtol
>   interface
>
> > +                             errx(2, "mcount %s", errstr);
> -  that is a poor error message


Updated diff with, I hope, a proper message

%-----------------------------------------------------------
grep: number of matches too small
$ obj/grep -m3.1                                          
grep: number of matches invalid
$ obj/grep -m999999999999999999999999999999999999999999999
grep: number of matches too large
%-----------------------------------------------------------

I also added -m in the non-standard man page section as suggested by jmc@.


Index: grep.1
===================================================================
RCS file: /cvs/src/usr.bin/grep/grep.1,v
retrieving revision 1.43
diff -u -p -u -p -r1.43 grep.1
--- grep.1 13 Jan 2015 04:45:34 -0000 1.43
+++ grep.1 8 Dec 2017 21:34:24 -0000
@@ -44,6 +44,7 @@
 .Op Fl C Ns Op Ar num
 .Op Fl e Ar pattern
 .Op Fl f Ar file
+.Op Fl m Ar num
 .Op Fl -binary-files Ns = Ns Ar value
 .Op Fl -context Ns Op = Ns Ar num
 .Op Fl -line-buffered
@@ -216,6 +217,10 @@ Pathnames are listed once per file searc
 If the standard input is searched, the string
 .Dq (standard input)
 is written.
+.It Fl m Ar num
+Stop after
+.Ar num
+matches.
 .It Fl n
 Each output line is preceded by its relative line number in the file,
 starting at line 1.
@@ -354,7 +359,7 @@ utility is compliant with the
 specification.
 .Pp
 The flags
-.Op Fl AaBbCGHhILoRUVwZ
+.Op Fl AaBbCGHhILmoRUVwZ
 are extensions to that specification, and the behaviour of the
 .Fl f
 flag when used with an empty pattern file is left undefined.
Index: grep.c
===================================================================
RCS file: /cvs/src/usr.bin/grep/grep.c,v
retrieving revision 1.55
diff -u -p -u -p -r1.55 grep.c
--- grep.c 28 Nov 2015 01:17:12 -0000 1.55
+++ grep.c 8 Dec 2017 21:34:24 -0000
@@ -71,6 +71,9 @@ int cflag; /* -c: only show a count of
 int hflag; /* -h: don't print filename headers */
 int iflag; /* -i: ignore case */
 int lflag; /* -l: only show names of files with matches */
+int mflag; /* -m x: stop reading the files after x matches */
+long long mcount; /* count for -m */
+long long mlimit; /* requested value for -m */
 int nflag; /* -n: show line numbers in front of matching lines */
 int oflag; /* -o: print each match */
 int qflag; /* -q: quiet mode (don't output anything) */
@@ -111,15 +114,16 @@ usage(void)
 #else
     "usage: %s [-abcEFGHhIiLlnoqRsUVvwxZ] [-A num] [-B num] [-C[num]]\n"
 #endif
-    "\t[-e pattern] [-f file] [--binary-files=value] [--context[=num]]\n"
-    "\t[--line-buffered] [pattern] [file ...]\n", __progname);
+    "\t[-e pattern] [-f file] [-m num] [--binary-files=value]\n"
+    "\t[--context[=num]] [--line-buffered] [pattern] [file ...]\n",
+    __progname);
  exit(2);
 }
 
 #ifdef NOZ
-static const char optstr[] = "0123456789A:B:CEFGHILRUVabce:f:hilnoqrsuvwxy";
+static const char optstr[] = "0123456789A:B:CEFGHILRUVabce:f:hilm:noqrsuvwxy";
 #else
-static const char optstr[] = "0123456789A:B:CEFGHILRUVZabce:f:hilnoqrsuvwxy";
+static const char optstr[] = "0123456789A:B:CEFGHILRUVZabce:f:hilm:noqrsuvwxy";
 #endif
 
 static const struct option long_options[] =
@@ -147,6 +151,7 @@ static const struct option long_options[
  {"ignore-case", no_argument, NULL, 'i'},
  {"files-without-match", no_argument, NULL, 'L'},
  {"files-with-matches", no_argument, NULL, 'l'},
+ {"max-count", required_argument, NULL, 'm'},
  {"line-number", no_argument, NULL, 'n'},
  {"quiet", no_argument, NULL, 'q'},
  {"silent", no_argument, NULL, 'q'},
@@ -375,6 +380,14 @@ main(int argc, char *argv[])
  case 'l':
  Lflag = 0;
  lflag = qflag = 1;
+ break;
+ case 'm':
+ mflag = 1;
+ errno = 0;
+ mlimit = mcount = strtonum(optarg, 1, LLONG_MAX,
+   &errstr);
+ if (errstr != NULL)
+ errx(2, "number of matches %s", errstr);
  break;
  case 'n':
  nflag = 1;
Index: grep.h
===================================================================
RCS file: /cvs/src/usr.bin/grep/grep.h,v
retrieving revision 1.24
diff -u -p -u -p -r1.24 grep.h
--- grep.h 14 Dec 2015 20:02:07 -0000 1.24
+++ grep.h 8 Dec 2017 21:34:24 -0000
@@ -66,14 +66,17 @@ extern int cflags, eflags;
 /* Command line flags */
 extern int Aflag, Bflag, Eflag, Fflag, Hflag, Lflag,
  Rflag, Zflag,
- bflag, cflag, hflag, iflag, lflag, nflag, oflag, qflag, sflag,
- vflag, wflag, xflag;
+ bflag, cflag, hflag, iflag, lflag, mflag, nflag, oflag, qflag,
+ sflag, vflag, wflag, xflag;
 extern int binbehave;
 
 extern int first, matchall, patterns, tail, file_err;
 extern char    **pattern;
 extern fastgrep_t *fg_pattern;
 extern regex_t *r_pattern;
+
+/* For -m max-count */
+extern long long mcount, mlimit;
 
 /* For regex errors  */
 #define RE_ERROR_BUF 512
Index: util.c
===================================================================
RCS file: /cvs/src/usr.bin/grep/util.c,v
retrieving revision 1.57
diff -u -p -u -p -r1.57 util.c
--- util.c 3 Apr 2017 16:18:35 -0000 1.57
+++ util.c 8 Dec 2017 21:34:24 -0000
@@ -97,6 +97,8 @@ procfile(char *fn)
  file_t *f;
  int c, t, z, nottext;
 
+ mcount = mlimit;
+
  if (fn == NULL) {
  fn = "(standard input)";
  f = grep_fdopen(STDIN_FILENO, "r");
@@ -140,6 +142,8 @@ procfile(char *fn)
  linesqueued++;
  }
  c += t;
+ if (mflag && mcount <= 0)
+ break;
  }
  if (Bflag > 0)
  clearqueue();
@@ -223,6 +227,10 @@ redo:
 print:
  if (vflag)
  c = !c;
+
+ /* Count the matches if we have a match limit */
+ if (mflag)
+ mcount -= c;
 
  if (c && binbehave == BIN_FILE_BIN && nottext)
  return c; /* Binary file */

Reply | Threaded
Open this post in threaded view
|

Re: grep(1) -m support

Theo de Raadt-2
> root@roslunar:~# grep -m0 foo foo

It looks like it returns 0 lines of the output.

I believe -m0 should work right.  It is the kind of situation
I expect would occur in scripts.

Reply | Threaded
Open this post in threaded view
|

Re: grep(1) -m support

Paul Irofti-4
On Fri, Dec 08, 2017 at 02:36:47PM -0700, Theo de Raadt wrote:
> > root@roslunar:~# grep -m0 foo foo
>
> It looks like it returns 0 lines of the output.
>
> I believe -m0 should work right.  It is the kind of situation
> I expect would occur in scripts.

But it still sets the return code (which I think is wrong)
%------------------------------------------
root@roslunar:~# echo foo > foo
root@roslunar:~# grep -m0 foo foo
root@roslunar:~# echo $?
1
root@roslunar:~# grep -m5 foo foo
foo
root@roslunar:~# echo $?                                                        
0
%------------------------------------------

Reply | Threaded
Open this post in threaded view
|

Re: grep(1) -m support

Theo de Raadt-2
> But it still sets the return code (which I think is wrong)
> %------------------------------------------
> root@roslunar:~# echo foo > foo
> root@roslunar:~# grep -m0 foo foo
> root@roslunar:~# echo $?
> 1
> root@roslunar:~# grep -m5 foo foo
> foo
> root@roslunar:~# echo $?                                                        
> 0
> %------------------------------------------

Why do you think it is wrong?

EXIT STATUS
     The grep utility exits with one of the following values:

           0       One or more lines were selected.
           1       No lines were selected.

With -m 0, no lines were selected.  So error 1.

Reply | Threaded
Open this post in threaded view
|

Re: grep(1) -m support

Paul Irofti-4
On Fri, Dec 08, 2017 at 02:40:54PM -0700, Theo de Raadt wrote:

> > But it still sets the return code (which I think is wrong)
> > %------------------------------------------
> > root@roslunar:~# echo foo > foo
> > root@roslunar:~# grep -m0 foo foo
> > root@roslunar:~# echo $?
> > 1
> > root@roslunar:~# grep -m5 foo foo
> > foo
> > root@roslunar:~# echo $?                                                        
> > 0
> > %------------------------------------------
>
> Why do you think it is wrong?
>
> EXIT STATUS
>      The grep utility exits with one of the following values:
>
>            0       One or more lines were selected.
>            1       No lines were selected.
>
> With -m 0, no lines were selected.  So error 1.

Ugh, sorry. My bad. It is correct.

Reply | Threaded
Open this post in threaded view
|

Re: grep(1) -m support

Paul Irofti-4
On Fri, Dec 08, 2017 at 11:44:04PM +0200, Paul Irofti wrote:

> On Fri, Dec 08, 2017 at 02:40:54PM -0700, Theo de Raadt wrote:
> > > But it still sets the return code (which I think is wrong)
> > > %------------------------------------------
> > > root@roslunar:~# echo foo > foo
> > > root@roslunar:~# grep -m0 foo foo
> > > root@roslunar:~# echo $?
> > > 1
> > > root@roslunar:~# grep -m5 foo foo
> > > foo
> > > root@roslunar:~# echo $?                                                        
> > > 0
> > > %------------------------------------------
> >
> > Why do you think it is wrong?
> >
> > EXIT STATUS
> >      The grep utility exits with one of the following values:
> >
> >            0       One or more lines were selected.
> >            1       No lines were selected.
> >
> > With -m 0, no lines were selected.  So error 1.
>
> Ugh, sorry. My bad. It is correct.

Here is a new diff that supports 0 and negative m values.
Negative m reverts mflag for constructs such as

  $ grep -m7 -m-1 pattern file

We continue processing in the case of m0 to maintain expected behaviour
when used with other flags such as -L.

This should make us GNU grep compatible according to all the tests and
option combinations that I could think of at this hour.


Index: grep.1
===================================================================
RCS file: /cvs/src/usr.bin/grep/grep.1,v
retrieving revision 1.43
diff -u -p -u -p -r1.43 grep.1
--- grep.1 13 Jan 2015 04:45:34 -0000 1.43
+++ grep.1 8 Dec 2017 22:17:45 -0000
@@ -44,6 +44,7 @@
 .Op Fl C Ns Op Ar num
 .Op Fl e Ar pattern
 .Op Fl f Ar file
+.Op Fl m Ar num
 .Op Fl -binary-files Ns = Ns Ar value
 .Op Fl -context Ns Op = Ns Ar num
 .Op Fl -line-buffered
@@ -216,6 +217,10 @@ Pathnames are listed once per file searc
 If the standard input is searched, the string
 .Dq (standard input)
 is written.
+.It Fl m Ar num
+Stop after
+.Ar num
+matches.
 .It Fl n
 Each output line is preceded by its relative line number in the file,
 starting at line 1.
@@ -354,7 +359,7 @@ utility is compliant with the
 specification.
 .Pp
 The flags
-.Op Fl AaBbCGHhILoRUVwZ
+.Op Fl AaBbCGHhILmoRUVwZ
 are extensions to that specification, and the behaviour of the
 .Fl f
 flag when used with an empty pattern file is left undefined.
Index: grep.c
===================================================================
RCS file: /cvs/src/usr.bin/grep/grep.c,v
retrieving revision 1.55
diff -u -p -u -p -r1.55 grep.c
--- grep.c 28 Nov 2015 01:17:12 -0000 1.55
+++ grep.c 8 Dec 2017 22:17:45 -0000
@@ -71,6 +71,9 @@ int cflag; /* -c: only show a count of
 int hflag; /* -h: don't print filename headers */
 int iflag; /* -i: ignore case */
 int lflag; /* -l: only show names of files with matches */
+int mflag; /* -m x: stop reading the files after x matches */
+long long mcount; /* count for -m */
+long long mlimit; /* requested value for -m */
 int nflag; /* -n: show line numbers in front of matching lines */
 int oflag; /* -o: print each match */
 int qflag; /* -q: quiet mode (don't output anything) */
@@ -111,15 +114,16 @@ usage(void)
 #else
     "usage: %s [-abcEFGHhIiLlnoqRsUVvwxZ] [-A num] [-B num] [-C[num]]\n"
 #endif
-    "\t[-e pattern] [-f file] [--binary-files=value] [--context[=num]]\n"
-    "\t[--line-buffered] [pattern] [file ...]\n", __progname);
+    "\t[-e pattern] [-f file] [-m num] [--binary-files=value]\n"
+    "\t[--context[=num]] [--line-buffered] [pattern] [file ...]\n",
+    __progname);
  exit(2);
 }
 
 #ifdef NOZ
-static const char optstr[] = "0123456789A:B:CEFGHILRUVabce:f:hilnoqrsuvwxy";
+static const char optstr[] = "0123456789A:B:CEFGHILRUVabce:f:hilm:noqrsuvwxy";
 #else
-static const char optstr[] = "0123456789A:B:CEFGHILRUVZabce:f:hilnoqrsuvwxy";
+static const char optstr[] = "0123456789A:B:CEFGHILRUVZabce:f:hilm:noqrsuvwxy";
 #endif
 
 static const struct option long_options[] =
@@ -147,6 +151,7 @@ static const struct option long_options[
  {"ignore-case", no_argument, NULL, 'i'},
  {"files-without-match", no_argument, NULL, 'L'},
  {"files-with-matches", no_argument, NULL, 'l'},
+ {"max-count", required_argument, NULL, 'm'},
  {"line-number", no_argument, NULL, 'n'},
  {"quiet", no_argument, NULL, 'q'},
  {"silent", no_argument, NULL, 'q'},
@@ -375,6 +380,16 @@ main(int argc, char *argv[])
  case 'l':
  Lflag = 0;
  lflag = qflag = 1;
+ break;
+ case 'm':
+ mflag = 1;
+ errno = 0;
+ mlimit = mcount = strtonum(optarg, LLONG_MIN, LLONG_MAX,
+   &errstr);
+ if (errstr != NULL)
+ errx(2, "number of matches %s", errstr);
+ if (mcount < 0) /* disable max-count */
+ mflag = 0; /* for -m7 -m-1 constructs */
  break;
  case 'n':
  nflag = 1;
Index: grep.h
===================================================================
RCS file: /cvs/src/usr.bin/grep/grep.h,v
retrieving revision 1.24
diff -u -p -u -p -r1.24 grep.h
--- grep.h 14 Dec 2015 20:02:07 -0000 1.24
+++ grep.h 8 Dec 2017 22:17:45 -0000
@@ -66,14 +66,17 @@ extern int cflags, eflags;
 /* Command line flags */
 extern int Aflag, Bflag, Eflag, Fflag, Hflag, Lflag,
  Rflag, Zflag,
- bflag, cflag, hflag, iflag, lflag, nflag, oflag, qflag, sflag,
- vflag, wflag, xflag;
+ bflag, cflag, hflag, iflag, lflag, mflag, nflag, oflag, qflag,
+ sflag, vflag, wflag, xflag;
 extern int binbehave;
 
 extern int first, matchall, patterns, tail, file_err;
 extern char    **pattern;
 extern fastgrep_t *fg_pattern;
 extern regex_t *r_pattern;
+
+/* For -m max-count */
+extern long long mcount, mlimit;
 
 /* For regex errors  */
 #define RE_ERROR_BUF 512
Index: util.c
===================================================================
RCS file: /cvs/src/usr.bin/grep/util.c,v
retrieving revision 1.57
diff -u -p -u -p -r1.57 util.c
--- util.c 3 Apr 2017 16:18:35 -0000 1.57
+++ util.c 8 Dec 2017 22:17:45 -0000
@@ -97,6 +97,8 @@ procfile(char *fn)
  file_t *f;
  int c, t, z, nottext;
 
+ mcount = mlimit;
+
  if (fn == NULL) {
  fn = "(standard input)";
  f = grep_fdopen(STDIN_FILENO, "r");
@@ -126,6 +128,8 @@ procfile(char *fn)
  if (Bflag > 0)
  initqueue();
  for (c = 0;  c == 0 || !(lflag || qflag); ) {
+ if (mflag && mlimit == 0)
+ break;
  ln.off += ln.len + 1;
  if ((ln.dat = grep_fgetln(f, &ln.len)) == NULL)
  break;
@@ -140,6 +144,8 @@ procfile(char *fn)
  linesqueued++;
  }
  c += t;
+ if (mflag && mlimit > 0 && mcount <= 0)
+ break;
  }
  if (Bflag > 0)
  clearqueue();
@@ -223,6 +229,10 @@ redo:
 print:
  if (vflag)
  c = !c;
+
+ /* Count the matches if we have a match limit */
+ if (mflag)
+ mcount -= c;
 
  if (c && binbehave == BIN_FILE_BIN && nottext)
  return c; /* Binary file */

Reply | Threaded
Open this post in threaded view
|

Re: grep(1) -m support

Theo de Raadt-2
> Here is a new diff that supports 0 and negative m values.

I am very unsure about negative values.

In posix you have

   -m-1
and
   -m -1

And you'll observe the latter doesn't work like you think it should.

Reply | Threaded
Open this post in threaded view
|

Re: grep(1) -m support

Paul Irofti-4
On Fri, Dec 08, 2017 at 03:23:02PM -0700, Theo de Raadt wrote:

> > Here is a new diff that supports 0 and negative m values.
>
> I am very unsure about negative values.
>
> In posix you have
>
>    -m-1
> and
>    -m -1
>
> And you'll observe the latter doesn't work like you think it should.

Hmm, I am getting the same output with and without spaces. Maybe I am
missing something... Can you show me a possible scenario where these
might differ?

Reply | Threaded
Open this post in threaded view
|

Re: grep(1) -m support

Paul Irofti-4
In reply to this post by Theo de Raadt-2
On Fri, Dec 08, 2017 at 03:23:02PM -0700, Theo de Raadt wrote:

> > Here is a new diff that supports 0 and negative m values.
>
> I am very unsure about negative values.
>
> In posix you have
>
>    -m-1
> and
>    -m -1
>
> And you'll observe the latter doesn't work like you think it should.

Here is a version without negative values, but with m0 support.


Index: grep.1
===================================================================
RCS file: /cvs/src/usr.bin/grep/grep.1,v
retrieving revision 1.43
diff -u -p -u -p -r1.43 grep.1
--- grep.1 13 Jan 2015 04:45:34 -0000 1.43
+++ grep.1 9 Dec 2017 17:29:08 -0000
@@ -44,6 +44,7 @@
 .Op Fl C Ns Op Ar num
 .Op Fl e Ar pattern
 .Op Fl f Ar file
+.Op Fl m Ar num
 .Op Fl -binary-files Ns = Ns Ar value
 .Op Fl -context Ns Op = Ns Ar num
 .Op Fl -line-buffered
@@ -216,6 +217,10 @@ Pathnames are listed once per file searc
 If the standard input is searched, the string
 .Dq (standard input)
 is written.
+.It Fl m Ar num
+Stop after
+.Ar num
+matches.
 .It Fl n
 Each output line is preceded by its relative line number in the file,
 starting at line 1.
@@ -354,7 +359,7 @@ utility is compliant with the
 specification.
 .Pp
 The flags
-.Op Fl AaBbCGHhILoRUVwZ
+.Op Fl AaBbCGHhILmoRUVwZ
 are extensions to that specification, and the behaviour of the
 .Fl f
 flag when used with an empty pattern file is left undefined.
Index: grep.c
===================================================================
RCS file: /cvs/src/usr.bin/grep/grep.c,v
retrieving revision 1.55
diff -u -p -u -p -r1.55 grep.c
--- grep.c 28 Nov 2015 01:17:12 -0000 1.55
+++ grep.c 9 Dec 2017 17:29:08 -0000
@@ -71,6 +71,9 @@ int cflag; /* -c: only show a count of
 int hflag; /* -h: don't print filename headers */
 int iflag; /* -i: ignore case */
 int lflag; /* -l: only show names of files with matches */
+int mflag; /* -m x: stop reading the files after x matches */
+long long mcount; /* count for -m */
+long long mlimit; /* requested value for -m */
 int nflag; /* -n: show line numbers in front of matching lines */
 int oflag; /* -o: print each match */
 int qflag; /* -q: quiet mode (don't output anything) */
@@ -111,15 +114,16 @@ usage(void)
 #else
     "usage: %s [-abcEFGHhIiLlnoqRsUVvwxZ] [-A num] [-B num] [-C[num]]\n"
 #endif
-    "\t[-e pattern] [-f file] [--binary-files=value] [--context[=num]]\n"
-    "\t[--line-buffered] [pattern] [file ...]\n", __progname);
+    "\t[-e pattern] [-f file] [-m num] [--binary-files=value]\n"
+    "\t[--context[=num]] [--line-buffered] [pattern] [file ...]\n",
+    __progname);
  exit(2);
 }
 
 #ifdef NOZ
-static const char optstr[] = "0123456789A:B:CEFGHILRUVabce:f:hilnoqrsuvwxy";
+static const char optstr[] = "0123456789A:B:CEFGHILRUVabce:f:hilm:noqrsuvwxy";
 #else
-static const char optstr[] = "0123456789A:B:CEFGHILRUVZabce:f:hilnoqrsuvwxy";
+static const char optstr[] = "0123456789A:B:CEFGHILRUVZabce:f:hilm:noqrsuvwxy";
 #endif
 
 static const struct option long_options[] =
@@ -147,6 +151,7 @@ static const struct option long_options[
  {"ignore-case", no_argument, NULL, 'i'},
  {"files-without-match", no_argument, NULL, 'L'},
  {"files-with-matches", no_argument, NULL, 'l'},
+ {"max-count", required_argument, NULL, 'm'},
  {"line-number", no_argument, NULL, 'n'},
  {"quiet", no_argument, NULL, 'q'},
  {"silent", no_argument, NULL, 'q'},
@@ -375,6 +380,13 @@ main(int argc, char *argv[])
  case 'l':
  Lflag = 0;
  lflag = qflag = 1;
+ break;
+ case 'm':
+ mflag = 1;
+ mlimit = mcount = strtonum(optarg, 0, LLONG_MAX,
+   &errstr);
+ if (errstr != NULL)
+ errx(2, "number of matches %s", errstr);
  break;
  case 'n':
  nflag = 1;
Index: grep.h
===================================================================
RCS file: /cvs/src/usr.bin/grep/grep.h,v
retrieving revision 1.24
diff -u -p -u -p -r1.24 grep.h
--- grep.h 14 Dec 2015 20:02:07 -0000 1.24
+++ grep.h 9 Dec 2017 17:29:08 -0000
@@ -66,14 +66,17 @@ extern int cflags, eflags;
 /* Command line flags */
 extern int Aflag, Bflag, Eflag, Fflag, Hflag, Lflag,
  Rflag, Zflag,
- bflag, cflag, hflag, iflag, lflag, nflag, oflag, qflag, sflag,
- vflag, wflag, xflag;
+ bflag, cflag, hflag, iflag, lflag, mflag, nflag, oflag, qflag,
+ sflag, vflag, wflag, xflag;
 extern int binbehave;
 
 extern int first, matchall, patterns, tail, file_err;
 extern char    **pattern;
 extern fastgrep_t *fg_pattern;
 extern regex_t *r_pattern;
+
+/* For -m max-count */
+extern long long mcount, mlimit;
 
 /* For regex errors  */
 #define RE_ERROR_BUF 512
Index: util.c
===================================================================
RCS file: /cvs/src/usr.bin/grep/util.c,v
retrieving revision 1.57
diff -u -p -u -p -r1.57 util.c
--- util.c 3 Apr 2017 16:18:35 -0000 1.57
+++ util.c 9 Dec 2017 17:29:08 -0000
@@ -97,6 +97,8 @@ procfile(char *fn)
  file_t *f;
  int c, t, z, nottext;
 
+ mcount = mlimit;
+
  if (fn == NULL) {
  fn = "(standard input)";
  f = grep_fdopen(STDIN_FILENO, "r");
@@ -126,6 +128,8 @@ procfile(char *fn)
  if (Bflag > 0)
  initqueue();
  for (c = 0;  c == 0 || !(lflag || qflag); ) {
+ if (mflag && mlimit == 0)
+ break;
  ln.off += ln.len + 1;
  if ((ln.dat = grep_fgetln(f, &ln.len)) == NULL)
  break;
@@ -140,6 +144,8 @@ procfile(char *fn)
  linesqueued++;
  }
  c += t;
+ if (mflag && mcount <= 0)
+ break;
  }
  if (Bflag > 0)
  clearqueue();
@@ -223,6 +229,10 @@ redo:
 print:
  if (vflag)
  c = !c;
+
+ /* Count the matches if we have a match limit */
+ if (mflag)
+ mcount -= c;
 
  if (c && binbehave == BIN_FILE_BIN && nottext)
  return c; /* Binary file */