[patch] improve strptime(3) %z timezone parsing

classic Classic list List threaded Threaded
19 messages Options
Reply | Threaded
Open this post in threaded view
|

[patch] improve strptime(3) %z timezone parsing

Hiltjo Posthuma
Hi,

I noticed some things in the strptime(3) implementing when parsing timezone
strings using the %z format string.

1. I noticed the tm_gmtoff value is not correctly set in some cases. It should
set tm_gmtoff to the offset from UTC in seconds (as described in mktime(3)).

2. The military/nautical UTC offsets are also reversed. This was also actually
a bug in RFC822:

RFC5322 (referenced in strptime(3) man page):
https://tools.ietf.org/html/rfc5322
Section 4.3, page 34 says:
"
   The 1 character military time zones were defined in a non-standard
   way in [RFC0822] and are therefore unpredictable in their meaning.
   The original definitions of the military zones "A" through "I" are
   equivalent to "+0100" through "+0900", respectively; "K", "L", and
   "M" are equivalent to "+1000", "+1100", and "+1200", respectively;
   "N" through "Y" are equivalent to "-0100" through "-1200".
   respectively; and "Z" is equivalent to "+0000".  However, because of
   the error in [RFC0822], they SHOULD all be considered equivalent to
   "-0000" unless there is out-of-band information confirming their
   meaning.
"

While comparing the other BSD and Linux code-bases I noticed NetBSD has fixed
this on 2017-08-24. It is not in NetBSD 8 stable yet, but in NetBSD-current:
http://cvsweb.netbsd.org/bsdweb.cgi/src/lib/libc/time/strptime.c

3. The military zone J is also ambiguous. Some standards define it as unused,
while others define it as "local observed time". NetBSD handles it as local
observed time, but OpenBSD returns NULL in strptime(3). I left this as is.

4. While at it I also fixed some trailing whitespaces.

Thanks for not falling asleep and reading the long text :)

Patch and test program below:

diff --git lib/libc/time/strptime.c lib/libc/time/strptime.c
index eaf182dc773..86a0d5353ee 100644
--- lib/libc/time/strptime.c
+++ lib/libc/time/strptime.c
@@ -116,7 +116,7 @@ _strptime(const char *buf, const char *fmt, struct tm *tm, int initialize)
  fmt++;
  continue;
  }
-
+
  if ((c = *fmt++) != '%')
  goto literal;
 
@@ -142,7 +142,7 @@ literal:
  _LEGAL_ALT(0);
  alt_format |= _ALT_O;
  goto again;
-
+
  /*
  * "Complex" conversion rules, implemented through recursion.
  */
@@ -380,7 +380,7 @@ literal:
  * number but without the century.
  */
  if (!(_conv_num(&bp, &i, 0, 99)))
- return (NULL);
+ return (NULL);
  continue;
 
  case 'G': /* The year corresponding to the ISO week
@@ -500,7 +500,7 @@ literal:
  ep = _find_string(bp, &i, nast, NULL, 4);
  if (ep != NULL) {
 #ifdef TM_GMTOFF
- tm->TM_GMTOFF = -5 - i;
+ tm->TM_GMTOFF = (-5 - i) * SECSPERHOUR;
 #endif
 #ifdef TM_ZONE
  tm->TM_ZONE = (char *)nast[i];
@@ -512,7 +512,7 @@ literal:
  if (ep != NULL) {
  tm->tm_isdst = 1;
 #ifdef TM_GMTOFF
- tm->TM_GMTOFF = -4 - i;
+ tm->TM_GMTOFF = (-4 - i) * SECSPERHOUR;
 #endif
 #ifdef TM_ZONE
  tm->TM_ZONE = (char *)nadt[i];
@@ -527,11 +527,12 @@ literal:
  /* Argh! No 'J'! */
  if (*bp >= 'A' && *bp <= 'I')
  tm->TM_GMTOFF =
-    ('A' - 1) - (int)*bp;
+    (int)*bp - ('A' - 1);
  else if (*bp >= 'L' && *bp <= 'M')
- tm->TM_GMTOFF = 'A' - (int)*bp;
+ tm->TM_GMTOFF = (int)*bp - 'A';
  else if (*bp >= 'N' && *bp <= 'Y')
- tm->TM_GMTOFF = (int)*bp - 'M';
+ tm->TM_GMTOFF = 'M' - (int)*bp;
+ tm->TM_GMTOFF *= SECSPERHOUR;
 #endif
 #ifdef TM_ZONE
  tm->TM_ZONE = NULL; /* XXX */
@@ -556,14 +557,15 @@ literal:
  }
  switch (i) {
  case 2:
- offs *= 100;
+ offs *= SECSPERHOUR;
  break;
  case 4:
  i = offs % 100;
- if (i >= 60)
+ offs /= 100;
+ if (i >= SECSPERMIN)
  return NULL;
  /* Convert minutes into decimal */
- offs = (offs / 100) * 100 + (i * 50) / 30;
+ offs = offs * SECSPERHOUR + i * SECSPERMIN;
  break;
  default:
  return NULL;
@@ -719,7 +721,7 @@ _find_string(const u_char *bp, int *tgt, const char * const *n1,
  return NULL;
 }
 
-static int              
+static int
 leaps_thru_end_of(const int y)
 {
  return (y >= 0) ? (y / 4 - y / 100 + y / 400) :


Quick & dirty test program below:


#include <sys/types.h>

#include <err.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>

void
testtime(const char *s)
{
        struct tm tm;

        memset(&tm, 0, sizeof(tm));
        if (strptime(s, "%Y-%m-%d %H:%M:%S %z", &tm) == NULL)
                errx(1, "strptime: s=%s", s);

        printf("s=%s, gmtoff=%ld, zone=%s\n", s, tm.tm_gmtoff,
               tm.tm_zone ? tm.tm_zone : "(null)");
}

int
main(void)
{
        testtime("2000-01-01 00:00:01 Z");
        testtime("2000-01-01 00:00:01 UT");
        testtime("2000-01-01 00:00:01 GMT");
        testtime("2000-01-01 00:00:01 -04:00");
        testtime("2000-01-01 00:00:01 -0400");
        testtime("2000-01-01 00:00:01 +04:00");
        testtime("2000-01-01 00:00:01 +0400");
        testtime("2000-01-01 00:00:01 EST");
        testtime("2000-01-01 00:00:01 EDT");
        testtime("2000-01-01 00:00:01 A");
        testtime("2000-01-01 00:00:01 B");
        testtime("2000-01-01 00:00:01 C");
        //testtime("2000-01-01 00:00:01 J"); // strptime returns NULL.
        testtime("2000-01-01 00:00:01 Q");
        testtime("2000-01-01 00:00:01 R");

        return 0;
}

--
Kind regards,
Hiltjo

Reply | Threaded
Open this post in threaded view
|

Re: [patch] improve strptime(3) %z timezone parsing

Hiltjo Posthuma
On Sun, Feb 24, 2019 at 01:11:39PM +0100, Hiltjo Posthuma wrote:

> Hi,
>
> I noticed some things in the strptime(3) implementing when parsing timezone
> strings using the %z format string.
>
> 1. I noticed the tm_gmtoff value is not correctly set in some cases. It should
> set tm_gmtoff to the offset from UTC in seconds (as described in mktime(3)).
>
> 2. The military/nautical UTC offsets are also reversed. This was also actually
> a bug in RFC822:
>
> RFC5322 (referenced in strptime(3) man page):
> https://tools.ietf.org/html/rfc5322
> Section 4.3, page 34 says:
> "
>    The 1 character military time zones were defined in a non-standard
>    way in [RFC0822] and are therefore unpredictable in their meaning.
>    The original definitions of the military zones "A" through "I" are
>    equivalent to "+0100" through "+0900", respectively; "K", "L", and
>    "M" are equivalent to "+1000", "+1100", and "+1200", respectively;
>    "N" through "Y" are equivalent to "-0100" through "-1200".
>    respectively; and "Z" is equivalent to "+0000".  However, because of
>    the error in [RFC0822], they SHOULD all be considered equivalent to
>    "-0000" unless there is out-of-band information confirming their
>    meaning.
> "
>
> While comparing the other BSD and Linux code-bases I noticed NetBSD has fixed
> this on 2017-08-24. It is not in NetBSD 8 stable yet, but in NetBSD-current:
> http://cvsweb.netbsd.org/bsdweb.cgi/src/lib/libc/time/strptime.c
>
> 3. The military zone J is also ambiguous. Some standards define it as unused,
> while others define it as "local observed time". NetBSD handles it as local
> observed time, but OpenBSD returns NULL in strptime(3). I left this as is.
>
> 4. While at it I also fixed some trailing whitespaces.
>
> Thanks for not falling asleep and reading the long text :)
>
> Patch and test program below:
>
> diff --git lib/libc/time/strptime.c lib/libc/time/strptime.c
> index eaf182dc773..86a0d5353ee 100644
> --- lib/libc/time/strptime.c
> +++ lib/libc/time/strptime.c
> @@ -116,7 +116,7 @@ _strptime(const char *buf, const char *fmt, struct tm *tm, int initialize)
>   fmt++;
>   continue;
>   }
> -
> +
>   if ((c = *fmt++) != '%')
>   goto literal;
>  
> @@ -142,7 +142,7 @@ literal:
>   _LEGAL_ALT(0);
>   alt_format |= _ALT_O;
>   goto again;
> -
> +
>   /*
>   * "Complex" conversion rules, implemented through recursion.
>   */
> @@ -380,7 +380,7 @@ literal:
>   * number but without the century.
>   */
>   if (!(_conv_num(&bp, &i, 0, 99)))
> - return (NULL);
> + return (NULL);
>   continue;
>  
>   case 'G': /* The year corresponding to the ISO week
> @@ -500,7 +500,7 @@ literal:
>   ep = _find_string(bp, &i, nast, NULL, 4);
>   if (ep != NULL) {
>  #ifdef TM_GMTOFF
> - tm->TM_GMTOFF = -5 - i;
> + tm->TM_GMTOFF = (-5 - i) * SECSPERHOUR;
>  #endif
>  #ifdef TM_ZONE
>   tm->TM_ZONE = (char *)nast[i];
> @@ -512,7 +512,7 @@ literal:
>   if (ep != NULL) {
>   tm->tm_isdst = 1;
>  #ifdef TM_GMTOFF
> - tm->TM_GMTOFF = -4 - i;
> + tm->TM_GMTOFF = (-4 - i) * SECSPERHOUR;
>  #endif
>  #ifdef TM_ZONE
>   tm->TM_ZONE = (char *)nadt[i];
> @@ -527,11 +527,12 @@ literal:
>   /* Argh! No 'J'! */
>   if (*bp >= 'A' && *bp <= 'I')
>   tm->TM_GMTOFF =
> -    ('A' - 1) - (int)*bp;
> +    (int)*bp - ('A' - 1);
>   else if (*bp >= 'L' && *bp <= 'M')
> - tm->TM_GMTOFF = 'A' - (int)*bp;
> + tm->TM_GMTOFF = (int)*bp - 'A';
>   else if (*bp >= 'N' && *bp <= 'Y')
> - tm->TM_GMTOFF = (int)*bp - 'M';
> + tm->TM_GMTOFF = 'M' - (int)*bp;
> + tm->TM_GMTOFF *= SECSPERHOUR;
>  #endif
>  #ifdef TM_ZONE
>   tm->TM_ZONE = NULL; /* XXX */
> @@ -556,14 +557,15 @@ literal:
>   }
>   switch (i) {
>   case 2:
> - offs *= 100;
> + offs *= SECSPERHOUR;
>   break;
>   case 4:
>   i = offs % 100;
> - if (i >= 60)
> + offs /= 100;
> + if (i >= SECSPERMIN)
>   return NULL;
>   /* Convert minutes into decimal */
> - offs = (offs / 100) * 100 + (i * 50) / 30;
> + offs = offs * SECSPERHOUR + i * SECSPERMIN;
>   break;
>   default:
>   return NULL;
> @@ -719,7 +721,7 @@ _find_string(const u_char *bp, int *tgt, const char * const *n1,
>   return NULL;
>  }
>  
> -static int              
> +static int
>  leaps_thru_end_of(const int y)
>  {
>   return (y >= 0) ? (y / 4 - y / 100 + y / 400) :
>
>
> Quick & dirty test program below:
>
>
> #include <sys/types.h>
>
> #include <err.h>
> #include <stdio.h>
> #include <stdlib.h>
> #include <string.h>
> #include <time.h>
>
> void
> testtime(const char *s)
> {
> struct tm tm;
>
> memset(&tm, 0, sizeof(tm));
> if (strptime(s, "%Y-%m-%d %H:%M:%S %z", &tm) == NULL)
> errx(1, "strptime: s=%s", s);
>
> printf("s=%s, gmtoff=%ld, zone=%s\n", s, tm.tm_gmtoff,
>       tm.tm_zone ? tm.tm_zone : "(null)");
> }
>
> int
> main(void)
> {
> testtime("2000-01-01 00:00:01 Z");
> testtime("2000-01-01 00:00:01 UT");
> testtime("2000-01-01 00:00:01 GMT");
> testtime("2000-01-01 00:00:01 -04:00");
> testtime("2000-01-01 00:00:01 -0400");
> testtime("2000-01-01 00:00:01 +04:00");
> testtime("2000-01-01 00:00:01 +0400");
> testtime("2000-01-01 00:00:01 EST");
> testtime("2000-01-01 00:00:01 EDT");
> testtime("2000-01-01 00:00:01 A");
> testtime("2000-01-01 00:00:01 B");
> testtime("2000-01-01 00:00:01 C");
> //testtime("2000-01-01 00:00:01 J"); // strptime returns NULL.
> testtime("2000-01-01 00:00:01 Q");
> testtime("2000-01-01 00:00:01 R");
>
> return 0;
> }
>

Hi,

Any thoughts on the above comments and patch?

--
Kind regards,
Hiltjo

Reply | Threaded
Open this post in threaded view
|

Re: [patch] improve strptime(3) %z timezone parsing

Hiltjo Posthuma
On Sun, Mar 03, 2019 at 09:01:58PM +0100, Hiltjo Posthuma wrote:

> On Sun, Feb 24, 2019 at 01:11:39PM +0100, Hiltjo Posthuma wrote:
> > Hi,
> >
> > I noticed some things in the strptime(3) implementing when parsing timezone
> > strings using the %z format string.
> >
> > 1. I noticed the tm_gmtoff value is not correctly set in some cases. It should
> > set tm_gmtoff to the offset from UTC in seconds (as described in mktime(3)).
> >
> > 2. The military/nautical UTC offsets are also reversed. This was also actually
> > a bug in RFC822:
> >
> > RFC5322 (referenced in strptime(3) man page):
> > https://tools.ietf.org/html/rfc5322
> > Section 4.3, page 34 says:
> > "
> >    The 1 character military time zones were defined in a non-standard
> >    way in [RFC0822] and are therefore unpredictable in their meaning.
> >    The original definitions of the military zones "A" through "I" are
> >    equivalent to "+0100" through "+0900", respectively; "K", "L", and
> >    "M" are equivalent to "+1000", "+1100", and "+1200", respectively;
> >    "N" through "Y" are equivalent to "-0100" through "-1200".
> >    respectively; and "Z" is equivalent to "+0000".  However, because of
> >    the error in [RFC0822], they SHOULD all be considered equivalent to
> >    "-0000" unless there is out-of-band information confirming their
> >    meaning.
> > "
> >
> > While comparing the other BSD and Linux code-bases I noticed NetBSD has fixed
> > this on 2017-08-24. It is not in NetBSD 8 stable yet, but in NetBSD-current:
> > http://cvsweb.netbsd.org/bsdweb.cgi/src/lib/libc/time/strptime.c
> >
> > 3. The military zone J is also ambiguous. Some standards define it as unused,
> > while others define it as "local observed time". NetBSD handles it as local
> > observed time, but OpenBSD returns NULL in strptime(3). I left this as is.
> >
> > 4. While at it I also fixed some trailing whitespaces.
> >
> > Thanks for not falling asleep and reading the long text :)
> >
> > Patch and test program below:
> >
> > diff --git lib/libc/time/strptime.c lib/libc/time/strptime.c
> > index eaf182dc773..86a0d5353ee 100644
> > --- lib/libc/time/strptime.c
> > +++ lib/libc/time/strptime.c
> > @@ -116,7 +116,7 @@ _strptime(const char *buf, const char *fmt, struct tm *tm, int initialize)
> >   fmt++;
> >   continue;
> >   }
> > -
> > +
> >   if ((c = *fmt++) != '%')
> >   goto literal;
> >  
> > @@ -142,7 +142,7 @@ literal:
> >   _LEGAL_ALT(0);
> >   alt_format |= _ALT_O;
> >   goto again;
> > -
> > +
> >   /*
> >   * "Complex" conversion rules, implemented through recursion.
> >   */
> > @@ -380,7 +380,7 @@ literal:
> >   * number but without the century.
> >   */
> >   if (!(_conv_num(&bp, &i, 0, 99)))
> > - return (NULL);
> > + return (NULL);
> >   continue;
> >  
> >   case 'G': /* The year corresponding to the ISO week
> > @@ -500,7 +500,7 @@ literal:
> >   ep = _find_string(bp, &i, nast, NULL, 4);
> >   if (ep != NULL) {
> >  #ifdef TM_GMTOFF
> > - tm->TM_GMTOFF = -5 - i;
> > + tm->TM_GMTOFF = (-5 - i) * SECSPERHOUR;
> >  #endif
> >  #ifdef TM_ZONE
> >   tm->TM_ZONE = (char *)nast[i];
> > @@ -512,7 +512,7 @@ literal:
> >   if (ep != NULL) {
> >   tm->tm_isdst = 1;
> >  #ifdef TM_GMTOFF
> > - tm->TM_GMTOFF = -4 - i;
> > + tm->TM_GMTOFF = (-4 - i) * SECSPERHOUR;
> >  #endif
> >  #ifdef TM_ZONE
> >   tm->TM_ZONE = (char *)nadt[i];
> > @@ -527,11 +527,12 @@ literal:
> >   /* Argh! No 'J'! */
> >   if (*bp >= 'A' && *bp <= 'I')
> >   tm->TM_GMTOFF =
> > -    ('A' - 1) - (int)*bp;
> > +    (int)*bp - ('A' - 1);
> >   else if (*bp >= 'L' && *bp <= 'M')
> > - tm->TM_GMTOFF = 'A' - (int)*bp;
> > + tm->TM_GMTOFF = (int)*bp - 'A';
> >   else if (*bp >= 'N' && *bp <= 'Y')
> > - tm->TM_GMTOFF = (int)*bp - 'M';
> > + tm->TM_GMTOFF = 'M' - (int)*bp;
> > + tm->TM_GMTOFF *= SECSPERHOUR;
> >  #endif
> >  #ifdef TM_ZONE
> >   tm->TM_ZONE = NULL; /* XXX */
> > @@ -556,14 +557,15 @@ literal:
> >   }
> >   switch (i) {
> >   case 2:
> > - offs *= 100;
> > + offs *= SECSPERHOUR;
> >   break;
> >   case 4:
> >   i = offs % 100;
> > - if (i >= 60)
> > + offs /= 100;
> > + if (i >= SECSPERMIN)
> >   return NULL;
> >   /* Convert minutes into decimal */
> > - offs = (offs / 100) * 100 + (i * 50) / 30;
> > + offs = offs * SECSPERHOUR + i * SECSPERMIN;
> >   break;
> >   default:
> >   return NULL;
> > @@ -719,7 +721,7 @@ _find_string(const u_char *bp, int *tgt, const char * const *n1,
> >   return NULL;
> >  }
> >  
> > -static int              
> > +static int
> >  leaps_thru_end_of(const int y)
> >  {
> >   return (y >= 0) ? (y / 4 - y / 100 + y / 400) :
> >
> >
> > Quick & dirty test program below:
> >
> >
> > #include <sys/types.h>
> >
> > #include <err.h>
> > #include <stdio.h>
> > #include <stdlib.h>
> > #include <string.h>
> > #include <time.h>
> >
> > void
> > testtime(const char *s)
> > {
> > struct tm tm;
> >
> > memset(&tm, 0, sizeof(tm));
> > if (strptime(s, "%Y-%m-%d %H:%M:%S %z", &tm) == NULL)
> > errx(1, "strptime: s=%s", s);
> >
> > printf("s=%s, gmtoff=%ld, zone=%s\n", s, tm.tm_gmtoff,
> >       tm.tm_zone ? tm.tm_zone : "(null)");
> > }
> >
> > int
> > main(void)
> > {
> > testtime("2000-01-01 00:00:01 Z");
> > testtime("2000-01-01 00:00:01 UT");
> > testtime("2000-01-01 00:00:01 GMT");
> > testtime("2000-01-01 00:00:01 -04:00");
> > testtime("2000-01-01 00:00:01 -0400");
> > testtime("2000-01-01 00:00:01 +04:00");
> > testtime("2000-01-01 00:00:01 +0400");
> > testtime("2000-01-01 00:00:01 EST");
> > testtime("2000-01-01 00:00:01 EDT");
> > testtime("2000-01-01 00:00:01 A");
> > testtime("2000-01-01 00:00:01 B");
> > testtime("2000-01-01 00:00:01 C");
> > //testtime("2000-01-01 00:00:01 J"); // strptime returns NULL.
> > testtime("2000-01-01 00:00:01 Q");
> > testtime("2000-01-01 00:00:01 R");
> >
> > return 0;
> > }
> >
>

Another bump, any thoughts on the above comments and patch?

--
Kind regards,
Hiltjo

Reply | Threaded
Open this post in threaded view
|

Re: [patch] improve strptime(3) %z timezone parsing

Hiltjo Posthuma
In reply to this post by Hiltjo Posthuma
On Sun, Mar 10, 2019 at 04:16:37PM -0400, Ted Unangst wrote:

> Hiltjo Posthuma wrote:
> > 2. The military/nautical UTC offsets are also reversed. This was also actually
> > a bug in RFC822:
> >
> > RFC5322 (referenced in strptime(3) man page):
> > https://tools.ietf.org/html/rfc5322
> > Section 4.3, page 34 says:
> > "
> >    The 1 character military time zones were defined in a non-standard
> >    way in [RFC0822] and are therefore unpredictable in their meaning.
> >    The original definitions of the military zones "A" through "I" are
> >    equivalent to "+0100" through "+0900", respectively; "K", "L", and
> >    "M" are equivalent to "+1000", "+1100", and "+1200", respectively;
> >    "N" through "Y" are equivalent to "-0100" through "-1200".
> >    respectively; and "Z" is equivalent to "+0000".  However, because of
> >    the error in [RFC0822], they SHOULD all be considered equivalent to
> >    "-0000" unless there is out-of-band information confirming their
> >    meaning.
> > "
> >
> > While comparing the other BSD and Linux code-bases I noticed NetBSD has fixed
> > this on 2017-08-24. It is not in NetBSD 8 stable yet, but in NetBSD-current:
> > http://cvsweb.netbsd.org/bsdweb.cgi/src/lib/libc/time/strptime.c
>
> So the fix doesn't follow either RFC? Does anyone implement the 5322 suggested
> behavior of all zero offsets?

Afaik no implementation uses zero offsets for military zones, but most
implementations do return NULL when parsing a military zone with strptime(3)
%z.

Not all BSD man pages reference the same RFCs, like RFC5322. The OpenBSD
strptime(3) man page (%z) describes military/nautical zones are supported.

- NetBSD supports it (correctly since only a few months ago and it is not in
  stable or backported).
- FreeBSD and DragonFlyBSD do not support them (strptime returns NULL).
- Linux glibc does not support them (strptime returns NULL).
- Linux musl libc does not support %z.

Any comments/OKs for the other points (1, 3, 4) ?

Thanks for the review,

--
Kind regards,
Hiltjo

Reply | Threaded
Open this post in threaded view
|

Re: [patch] improve strptime(3) %z timezone parsing

Ted Unangst-6
Hiltjo Posthuma wrote:
> Not all BSD man pages reference the same RFCs, like RFC5322. The OpenBSD
> strptime(3) man page (%z) describes military/nautical zones are supported.
>
> - NetBSD supports it (correctly since only a few months ago and it is not in
>   stable or backported).
> - FreeBSD and DragonFlyBSD do not support them (strptime returns NULL).
> - Linux glibc does not support them (strptime returns NULL).
> - Linux musl libc does not support %z.

This sounds like we should just drop support as well. I think that's better
than supporting something that's clearly unreliable.

Reply | Threaded
Open this post in threaded view
|

Re: [patch] improve strptime(3) %z timezone parsing

Marc Espie-2
On Sun, Mar 17, 2019 at 03:10:04PM -0400, Ted Unangst wrote:

> Hiltjo Posthuma wrote:
> > Not all BSD man pages reference the same RFCs, like RFC5322. The OpenBSD
> > strptime(3) man page (%z) describes military/nautical zones are supported.
> >
> > - NetBSD supports it (correctly since only a few months ago and it is not in
> >   stable or backported).
> > - FreeBSD and DragonFlyBSD do not support them (strptime returns NULL).
> > - Linux glibc does not support them (strptime returns NULL).
> > - Linux musl libc does not support %z.
>
> This sounds like we should just drop support as well. I think that's better
> than supporting something that's clearly unreliable.

By that reckoning, you would never implement anything new... :(

Reply | Threaded
Open this post in threaded view
|

Re: [patch] improve strptime(3) %z timezone parsing

Hiltjo Posthuma
In reply to this post by Hiltjo Posthuma
On Sun, Feb 24, 2019 at 01:11:39PM +0100, Hiltjo Posthuma wrote:

> Hi,
>
> I noticed some things in the strptime(3) implementing when parsing timezone
> strings using the %z format string.
>
> 1. I noticed the tm_gmtoff value is not correctly set in some cases. It should
> set tm_gmtoff to the offset from UTC in seconds (as described in mktime(3)).
>
> 2. The military/nautical UTC offsets are also reversed. This was also actually
> a bug in RFC822:
>
> RFC5322 (referenced in strptime(3) man page):
> https://tools.ietf.org/html/rfc5322
> Section 4.3, page 34 says:
> "
>    The 1 character military time zones were defined in a non-standard
>    way in [RFC0822] and are therefore unpredictable in their meaning.
>    The original definitions of the military zones "A" through "I" are
>    equivalent to "+0100" through "+0900", respectively; "K", "L", and
>    "M" are equivalent to "+1000", "+1100", and "+1200", respectively;
>    "N" through "Y" are equivalent to "-0100" through "-1200".
>    respectively; and "Z" is equivalent to "+0000".  However, because of
>    the error in [RFC0822], they SHOULD all be considered equivalent to
>    "-0000" unless there is out-of-band information confirming their
>    meaning.
> "
>
> While comparing the other BSD and Linux code-bases I noticed NetBSD has fixed
> this on 2017-08-24. It is not in NetBSD 8 stable yet, but in NetBSD-current:
> http://cvsweb.netbsd.org/bsdweb.cgi/src/lib/libc/time/strptime.c
>
> 3. The military zone J is also ambiguous. Some standards define it as unused,
> while others define it as "local observed time". NetBSD handles it as local
> observed time, but OpenBSD returns NULL in strptime(3). I left this as is.
>
> 4. While at it I also fixed some trailing whitespaces.
>
> Thanks for not falling asleep and reading the long text :)
>
> Patch and test program below:
>
> diff --git lib/libc/time/strptime.c lib/libc/time/strptime.c
> index eaf182dc773..86a0d5353ee 100644
> --- lib/libc/time/strptime.c
> +++ lib/libc/time/strptime.c
> @@ -116,7 +116,7 @@ _strptime(const char *buf, const char *fmt, struct tm *tm, int initialize)
>   fmt++;
>   continue;
>   }
> -
> +
>   if ((c = *fmt++) != '%')
>   goto literal;
>  
> @@ -142,7 +142,7 @@ literal:
>   _LEGAL_ALT(0);
>   alt_format |= _ALT_O;
>   goto again;
> -
> +
>   /*
>   * "Complex" conversion rules, implemented through recursion.
>   */
> @@ -380,7 +380,7 @@ literal:
>   * number but without the century.
>   */
>   if (!(_conv_num(&bp, &i, 0, 99)))
> - return (NULL);
> + return (NULL);
>   continue;
>  
>   case 'G': /* The year corresponding to the ISO week
> @@ -500,7 +500,7 @@ literal:
>   ep = _find_string(bp, &i, nast, NULL, 4);
>   if (ep != NULL) {
>  #ifdef TM_GMTOFF
> - tm->TM_GMTOFF = -5 - i;
> + tm->TM_GMTOFF = (-5 - i) * SECSPERHOUR;
>  #endif
>  #ifdef TM_ZONE
>   tm->TM_ZONE = (char *)nast[i];
> @@ -512,7 +512,7 @@ literal:
>   if (ep != NULL) {
>   tm->tm_isdst = 1;
>  #ifdef TM_GMTOFF
> - tm->TM_GMTOFF = -4 - i;
> + tm->TM_GMTOFF = (-4 - i) * SECSPERHOUR;
>  #endif
>  #ifdef TM_ZONE
>   tm->TM_ZONE = (char *)nadt[i];
> @@ -527,11 +527,12 @@ literal:
>   /* Argh! No 'J'! */
>   if (*bp >= 'A' && *bp <= 'I')
>   tm->TM_GMTOFF =
> -    ('A' - 1) - (int)*bp;
> +    (int)*bp - ('A' - 1);
>   else if (*bp >= 'L' && *bp <= 'M')
> - tm->TM_GMTOFF = 'A' - (int)*bp;
> + tm->TM_GMTOFF = (int)*bp - 'A';
>   else if (*bp >= 'N' && *bp <= 'Y')
> - tm->TM_GMTOFF = (int)*bp - 'M';
> + tm->TM_GMTOFF = 'M' - (int)*bp;
> + tm->TM_GMTOFF *= SECSPERHOUR;
>  #endif
>  #ifdef TM_ZONE
>   tm->TM_ZONE = NULL; /* XXX */
> @@ -556,14 +557,15 @@ literal:
>   }
>   switch (i) {
>   case 2:
> - offs *= 100;
> + offs *= SECSPERHOUR;
>   break;
>   case 4:
>   i = offs % 100;
> - if (i >= 60)
> + offs /= 100;
> + if (i >= SECSPERMIN)
>   return NULL;
>   /* Convert minutes into decimal */
> - offs = (offs / 100) * 100 + (i * 50) / 30;
> + offs = offs * SECSPERHOUR + i * SECSPERMIN;
>   break;
>   default:
>   return NULL;
> @@ -719,7 +721,7 @@ _find_string(const u_char *bp, int *tgt, const char * const *n1,
>   return NULL;
>  }
>  
> -static int              
> +static int
>  leaps_thru_end_of(const int y)
>  {
>   return (y >= 0) ? (y / 4 - y / 100 + y / 400) :
>
>
> Quick & dirty test program below:
>
>
> #include <sys/types.h>
>
> #include <err.h>
> #include <stdio.h>
> #include <stdlib.h>
> #include <string.h>
> #include <time.h>
>
> void
> testtime(const char *s)
> {
> struct tm tm;
>
> memset(&tm, 0, sizeof(tm));
> if (strptime(s, "%Y-%m-%d %H:%M:%S %z", &tm) == NULL)
> errx(1, "strptime: s=%s", s);
>
> printf("s=%s, gmtoff=%ld, zone=%s\n", s, tm.tm_gmtoff,
>       tm.tm_zone ? tm.tm_zone : "(null)");
> }
>
> int
> main(void)
> {
> testtime("2000-01-01 00:00:01 Z");
> testtime("2000-01-01 00:00:01 UT");
> testtime("2000-01-01 00:00:01 GMT");
> testtime("2000-01-01 00:00:01 -04:00");
> testtime("2000-01-01 00:00:01 -0400");
> testtime("2000-01-01 00:00:01 +04:00");
> testtime("2000-01-01 00:00:01 +0400");
> testtime("2000-01-01 00:00:01 EST");
> testtime("2000-01-01 00:00:01 EDT");
> testtime("2000-01-01 00:00:01 A");
> testtime("2000-01-01 00:00:01 B");
> testtime("2000-01-01 00:00:01 C");
> //testtime("2000-01-01 00:00:01 J"); // strptime returns NULL.
> testtime("2000-01-01 00:00:01 Q");
> testtime("2000-01-01 00:00:01 R");
>
> return 0;
> }
>

Last weekly bump,

Any further comments or OK's to get this in or reject it?

As discussed previously for point 2 I think it is fine to either return NULL
and not handle military zones like most libc's do or handle them properly like
NetBSD (reversed offsets).

Thanks,

--
Kind regards,
Hiltjo

Reply | Threaded
Open this post in threaded view
|

Re: [patch] improve strptime(3) %z timezone parsing

Theo de Raadt-2
My position is we should delete support.

Hiltjo Posthuma <[hidden email]> wrote:

> On Sun, Feb 24, 2019 at 01:11:39PM +0100, Hiltjo Posthuma wrote:
> > Hi,
> >
> > I noticed some things in the strptime(3) implementing when parsing timezone
> > strings using the %z format string.
> >
> > 1. I noticed the tm_gmtoff value is not correctly set in some cases. It should
> > set tm_gmtoff to the offset from UTC in seconds (as described in mktime(3)).
> >
> > 2. The military/nautical UTC offsets are also reversed. This was also actually
> > a bug in RFC822:
> >
> > RFC5322 (referenced in strptime(3) man page):
> > https://tools.ietf.org/html/rfc5322
> > Section 4.3, page 34 says:
> > "
> >    The 1 character military time zones were defined in a non-standard
> >    way in [RFC0822] and are therefore unpredictable in their meaning.
> >    The original definitions of the military zones "A" through "I" are
> >    equivalent to "+0100" through "+0900", respectively; "K", "L", and
> >    "M" are equivalent to "+1000", "+1100", and "+1200", respectively;
> >    "N" through "Y" are equivalent to "-0100" through "-1200".
> >    respectively; and "Z" is equivalent to "+0000".  However, because of
> >    the error in [RFC0822], they SHOULD all be considered equivalent to
> >    "-0000" unless there is out-of-band information confirming their
> >    meaning.
> > "
> >
> > While comparing the other BSD and Linux code-bases I noticed NetBSD has fixed
> > this on 2017-08-24. It is not in NetBSD 8 stable yet, but in NetBSD-current:
> > http://cvsweb.netbsd.org/bsdweb.cgi/src/lib/libc/time/strptime.c
> >
> > 3. The military zone J is also ambiguous. Some standards define it as unused,
> > while others define it as "local observed time". NetBSD handles it as local
> > observed time, but OpenBSD returns NULL in strptime(3). I left this as is.
> >
> > 4. While at it I also fixed some trailing whitespaces.
> >
> > Thanks for not falling asleep and reading the long text :)
> >
> > Patch and test program below:
> >
> > diff --git lib/libc/time/strptime.c lib/libc/time/strptime.c
> > index eaf182dc773..86a0d5353ee 100644
> > --- lib/libc/time/strptime.c
> > +++ lib/libc/time/strptime.c
> > @@ -116,7 +116,7 @@ _strptime(const char *buf, const char *fmt, struct tm *tm, int initialize)
> >   fmt++;
> >   continue;
> >   }
> > -
> > +
> >   if ((c = *fmt++) != '%')
> >   goto literal;
> >  
> > @@ -142,7 +142,7 @@ literal:
> >   _LEGAL_ALT(0);
> >   alt_format |= _ALT_O;
> >   goto again;
> > -
> > +
> >   /*
> >   * "Complex" conversion rules, implemented through recursion.
> >   */
> > @@ -380,7 +380,7 @@ literal:
> >   * number but without the century.
> >   */
> >   if (!(_conv_num(&bp, &i, 0, 99)))
> > - return (NULL);
> > + return (NULL);
> >   continue;
> >  
> >   case 'G': /* The year corresponding to the ISO week
> > @@ -500,7 +500,7 @@ literal:
> >   ep = _find_string(bp, &i, nast, NULL, 4);
> >   if (ep != NULL) {
> >  #ifdef TM_GMTOFF
> > - tm->TM_GMTOFF = -5 - i;
> > + tm->TM_GMTOFF = (-5 - i) * SECSPERHOUR;
> >  #endif
> >  #ifdef TM_ZONE
> >   tm->TM_ZONE = (char *)nast[i];
> > @@ -512,7 +512,7 @@ literal:
> >   if (ep != NULL) {
> >   tm->tm_isdst = 1;
> >  #ifdef TM_GMTOFF
> > - tm->TM_GMTOFF = -4 - i;
> > + tm->TM_GMTOFF = (-4 - i) * SECSPERHOUR;
> >  #endif
> >  #ifdef TM_ZONE
> >   tm->TM_ZONE = (char *)nadt[i];
> > @@ -527,11 +527,12 @@ literal:
> >   /* Argh! No 'J'! */
> >   if (*bp >= 'A' && *bp <= 'I')
> >   tm->TM_GMTOFF =
> > -    ('A' - 1) - (int)*bp;
> > +    (int)*bp - ('A' - 1);
> >   else if (*bp >= 'L' && *bp <= 'M')
> > - tm->TM_GMTOFF = 'A' - (int)*bp;
> > + tm->TM_GMTOFF = (int)*bp - 'A';
> >   else if (*bp >= 'N' && *bp <= 'Y')
> > - tm->TM_GMTOFF = (int)*bp - 'M';
> > + tm->TM_GMTOFF = 'M' - (int)*bp;
> > + tm->TM_GMTOFF *= SECSPERHOUR;
> >  #endif
> >  #ifdef TM_ZONE
> >   tm->TM_ZONE = NULL; /* XXX */
> > @@ -556,14 +557,15 @@ literal:
> >   }
> >   switch (i) {
> >   case 2:
> > - offs *= 100;
> > + offs *= SECSPERHOUR;
> >   break;
> >   case 4:
> >   i = offs % 100;
> > - if (i >= 60)
> > + offs /= 100;
> > + if (i >= SECSPERMIN)
> >   return NULL;
> >   /* Convert minutes into decimal */
> > - offs = (offs / 100) * 100 + (i * 50) / 30;
> > + offs = offs * SECSPERHOUR + i * SECSPERMIN;
> >   break;
> >   default:
> >   return NULL;
> > @@ -719,7 +721,7 @@ _find_string(const u_char *bp, int *tgt, const char * const *n1,
> >   return NULL;
> >  }
> >  
> > -static int              
> > +static int
> >  leaps_thru_end_of(const int y)
> >  {
> >   return (y >= 0) ? (y / 4 - y / 100 + y / 400) :
> >
> >
> > Quick & dirty test program below:
> >
> >
> > #include <sys/types.h>
> >
> > #include <err.h>
> > #include <stdio.h>
> > #include <stdlib.h>
> > #include <string.h>
> > #include <time.h>
> >
> > void
> > testtime(const char *s)
> > {
> > struct tm tm;
> >
> > memset(&tm, 0, sizeof(tm));
> > if (strptime(s, "%Y-%m-%d %H:%M:%S %z", &tm) == NULL)
> > errx(1, "strptime: s=%s", s);
> >
> > printf("s=%s, gmtoff=%ld, zone=%s\n", s, tm.tm_gmtoff,
> >       tm.tm_zone ? tm.tm_zone : "(null)");
> > }
> >
> > int
> > main(void)
> > {
> > testtime("2000-01-01 00:00:01 Z");
> > testtime("2000-01-01 00:00:01 UT");
> > testtime("2000-01-01 00:00:01 GMT");
> > testtime("2000-01-01 00:00:01 -04:00");
> > testtime("2000-01-01 00:00:01 -0400");
> > testtime("2000-01-01 00:00:01 +04:00");
> > testtime("2000-01-01 00:00:01 +0400");
> > testtime("2000-01-01 00:00:01 EST");
> > testtime("2000-01-01 00:00:01 EDT");
> > testtime("2000-01-01 00:00:01 A");
> > testtime("2000-01-01 00:00:01 B");
> > testtime("2000-01-01 00:00:01 C");
> > //testtime("2000-01-01 00:00:01 J"); // strptime returns NULL.
> > testtime("2000-01-01 00:00:01 Q");
> > testtime("2000-01-01 00:00:01 R");
> >
> > return 0;
> > }
> >
>
> Last weekly bump,
>
> Any further comments or OK's to get this in or reject it?
>
> As discussed previously for point 2 I think it is fine to either return NULL
> and not handle military zones like most libc's do or handle them properly like
> NetBSD (reversed offsets).
>
> Thanks,
>
> --
> Kind regards,
> Hiltjo
>

Reply | Threaded
Open this post in threaded view
|

Re: [patch] improve strptime(3) %z timezone parsing

Ingo Schwarze
Hi,

Theo de Raadt wrote on Sun, Mar 24, 2019 at 12:48:03PM -0600:
> Hiltjo Posthuma <[hidden email]> wrote:
>> On Sun, Feb 24, 2019 at 01:11:39PM +0100, Hiltjo Posthuma wrote:

>>> 2. The military/nautical UTC offsets are [...]

broken by design.

>> As discussed previously for point 2 I think it is fine to either
>> return NULL and not handle military zones like most libc's do

> My position is we should delete support.

So here is a diff to delete support for the military time zones.
Just error out when encountering them.  That's the best thing we
can do because their meaning is ill-defined in the first place.

I'm not mixing anything else into this diff.  The other bugs should
be handled separately.

OK?
  Ingo


Index: strptime.3
===================================================================
RCS file: /cvs/src/lib/libc/time/strptime.3,v
retrieving revision 1.27
diff -u -p -r1.27 strptime.3
--- strptime.3 21 Feb 2019 19:10:32 -0000 1.27
+++ strptime.3 9 May 2019 18:29:19 -0000
@@ -242,15 +242,7 @@ time
 or
 .Ql Standard
 .Pq Dq S
-time;
-a single letter military timezone specified as:
-.Dq A
-through
-.Dq I
-and
-.Dq K
-through
-.Dq Y .
+time.
 .It Cm \&%Z
 timezone name or no characters when timezone information is unavailable.
 .It Cm \&%%
Index: strptime.c
===================================================================
RCS file: /cvs/src/lib/libc/time/strptime.c,v
retrieving revision 1.25
diff -u -p -r1.25 strptime.c
--- strptime.c 21 Feb 2019 19:10:32 -0000 1.25
+++ strptime.c 9 May 2019 18:29:19 -0000
@@ -520,25 +520,6 @@ literal:
  bp = ep;
  continue;
  }
-
- if ((*bp >= 'A' && *bp <= 'I') ||
-    (*bp >= 'L' && *bp <= 'Y')) {
-#ifdef TM_GMTOFF
- /* Argh! No 'J'! */
- if (*bp >= 'A' && *bp <= 'I')
- tm->TM_GMTOFF =
-    ('A' - 1) - (int)*bp;
- else if (*bp >= 'L' && *bp <= 'M')
- tm->TM_GMTOFF = 'A' - (int)*bp;
- else if (*bp >= 'N' && *bp <= 'Y')
- tm->TM_GMTOFF = (int)*bp - 'M';
-#endif
-#ifdef TM_ZONE
- tm->TM_ZONE = NULL; /* XXX */
-#endif
- bp++;
- continue;
- }
  return NULL;
  }
  offs = 0;

Reply | Threaded
Open this post in threaded view
|

Re: [patch] improve strptime(3) %z timezone parsing

Hiltjo Posthuma
On Thu, May 09, 2019 at 08:38:43PM +0200, Ingo Schwarze wrote:

> Hi,
>
> Theo de Raadt wrote on Sun, Mar 24, 2019 at 12:48:03PM -0600:
> > Hiltjo Posthuma <[hidden email]> wrote:
> >> On Sun, Feb 24, 2019 at 01:11:39PM +0100, Hiltjo Posthuma wrote:
>
> >>> 2. The military/nautical UTC offsets are [...]
>
> broken by design.
>
> >> As discussed previously for point 2 I think it is fine to either
> >> return NULL and not handle military zones like most libc's do
>
> > My position is we should delete support.
>
> So here is a diff to delete support for the military time zones.
> Just error out when encountering them.  That's the best thing we
> can do because their meaning is ill-defined in the first place.
>
> I'm not mixing anything else into this diff.  The other bugs should
> be handled separately.
>
> OK?
>   Ingo
>
>
> Index: strptime.3
> ===================================================================
> RCS file: /cvs/src/lib/libc/time/strptime.3,v
> retrieving revision 1.27
> diff -u -p -r1.27 strptime.3
> --- strptime.3 21 Feb 2019 19:10:32 -0000 1.27
> +++ strptime.3 9 May 2019 18:29:19 -0000
> @@ -242,15 +242,7 @@ time
>  or
>  .Ql Standard
>  .Pq Dq S
> -time;
> -a single letter military timezone specified as:
> -.Dq A
> -through
> -.Dq I
> -and
> -.Dq K
> -through
> -.Dq Y .
> +time.
>  .It Cm \&%Z
>  timezone name or no characters when timezone information is unavailable.
>  .It Cm \&%%
> Index: strptime.c
> ===================================================================
> RCS file: /cvs/src/lib/libc/time/strptime.c,v
> retrieving revision 1.25
> diff -u -p -r1.25 strptime.c
> --- strptime.c 21 Feb 2019 19:10:32 -0000 1.25
> +++ strptime.c 9 May 2019 18:29:19 -0000
> @@ -520,25 +520,6 @@ literal:
>   bp = ep;
>   continue;
>   }
> -
> - if ((*bp >= 'A' && *bp <= 'I') ||
> -    (*bp >= 'L' && *bp <= 'Y')) {
> -#ifdef TM_GMTOFF
> - /* Argh! No 'J'! */
> - if (*bp >= 'A' && *bp <= 'I')
> - tm->TM_GMTOFF =
> -    ('A' - 1) - (int)*bp;
> - else if (*bp >= 'L' && *bp <= 'M')
> - tm->TM_GMTOFF = 'A' - (int)*bp;
> - else if (*bp >= 'N' && *bp <= 'Y')
> - tm->TM_GMTOFF = (int)*bp - 'M';
> -#endif
> -#ifdef TM_ZONE
> - tm->TM_ZONE = NULL; /* XXX */
> -#endif
> - bp++;
> - continue;
> - }
>   return NULL;
>   }
>   offs = 0;
>

Hi,

This looks good to me. There is also a comment which could be removed or
changed:


@@ -464,9 +464,6 @@ literal:
                         * C[DS]T = Central : -5 | -6
                         * M[DS]T = Mountain: -6 | -7
                         * P[DS]T = Pacific : -7 | -8
-                        *          Military
-                        * [A-IL-M] = -1 ... -9 (J not used)
-                        * [N-Y]  = +1 ... +12
                         */
                        while (isspace(*bp))
                                bp++;

--
Kind regards,
Hiltjo

Reply | Threaded
Open this post in threaded view
|

Re: [patch] improve strptime(3) %z timezone parsing

Ted Unangst-6
In reply to this post by Ingo Schwarze
Ingo Schwarze wrote:
> I'm not mixing anything else into this diff.  The other bugs should
> be handled separately.
>
> OK?

Works for me. (with additional comment removal)

Reply | Threaded
Open this post in threaded view
|

Re: [patch] improve strptime(3) %z timezone parsing

Ingo Schwarze
Hi Ted,

Ted Unangst wrote on Thu, May 09, 2019 at 04:16:40PM -0400:
> Ingo Schwarze wrote:

>> I'm not mixing anything else into this diff.  The other bugs should
>> be handled separately.
 
> Works for me. (with additional comment removal)

Thanks for checking, committed.

Now let's get to the more serious part.
Hiltjo observed that %Z and %z produce wrong results.

First of all, neither POSIX nor XPG define tm_gmtoff nor %Z nor %z:

  https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/time.h.html
  https://pubs.opengroup.org/onlinepubs/9699919799/functions/strptime.html

So i think the best way to find out what tm_gmtoff should be is to
understand how programs in our own tree use it.

Here is a (hopefully) complete list of the users in OpenBSD base.
The following files expect it to contain seconds:
 - lib/libc/time/strftime.c
 - lib/libc/time/wcsftime.c
 - usr.sbin/smtpd/to.c
 - usr.sbin/snmpd/mib.c
 - usr.sbin/cron/cron.c
 - usr.bin/ftp/util.c
 - usr.bin/cvs/entries.c
 - usr.bin/rcs/rcstime.c
 - gnu/usr.bin/perl/time64.c

I failed to find any users that do *not* expect seconds.
So my conclusion is that the documentation is right and
what the code in strptime.c does is wrong.

Here is a patch to fix the code.

OK?


The change to %Z is exactly what Hiltjo sent.
The current code for %z is unnecessarily complicated.
Rather than fixing it, i simply rewrote it from scratch.
I like it when a bugfix results in -28 +11 LOC and better readability.

Yours,
  Ingo


Index: strptime.c
===================================================================
RCS file: /cvs/src/lib/libc/time/strptime.c,v
retrieving revision 1.26
diff -u -p -r1.26 strptime.c
--- strptime.c 10 May 2019 12:49:16 -0000 1.26
+++ strptime.c 10 May 2019 14:40:49 -0000
@@ -497,7 +497,7 @@ literal:
  ep = _find_string(bp, &i, nast, NULL, 4);
  if (ep != NULL) {
 #ifdef TM_GMTOFF
- tm->TM_GMTOFF = -5 - i;
+ tm->TM_GMTOFF = (-5 - i) * SECSPERHOUR;
 #endif
 #ifdef TM_ZONE
  tm->TM_ZONE = (char *)nast[i];
@@ -509,7 +509,7 @@ literal:
  if (ep != NULL) {
  tm->tm_isdst = 1;
 #ifdef TM_GMTOFF
- tm->TM_GMTOFF = -4 - i;
+ tm->TM_GMTOFF = (-4 - i) * SECSPERHOUR;
 #endif
 #ifdef TM_ZONE
  tm->TM_ZONE = (char *)nadt[i];
@@ -519,33 +519,16 @@ literal:
  }
  return NULL;
  }
- offs = 0;
- for (i = 0; i < 4; ) {
- if (isdigit(*bp)) {
- offs = offs * 10 + (*bp++ - '0');
- i++;
- continue;
- }
- if (i == 2 && *bp == ':') {
- bp++;
- continue;
- }
- break;
- }
- switch (i) {
- case 2:
- offs *= 100;
- break;
- case 4:
- i = offs % 100;
- if (i >= 60)
- return NULL;
- /* Convert minutes into decimal */
- offs = (offs / 100) * 100 + (i * 50) / 30;
- break;
- default:
+ if (!isdigit(bp[0]) || !isdigit(bp[1]))
  return NULL;
- }
+ offs = ((bp[0]-'0') * 10 + (bp[1]-'0')) * SECSPERHOUR;
+ bp += 2;
+ if (*bp == ':')
+ bp++;
+ if (!isdigit(bp[0]) || !isdigit(bp[1]))
+ return NULL;
+ offs += ((bp[0]-'0') * 10 + (bp[1]-'0')) * SECSPERMIN;
+ bp += 2;
  if (neg)
  offs = -offs;
  tm->tm_isdst = 0; /* XXX */

Reply | Threaded
Open this post in threaded view
|

Re: [patch] improve strptime(3) %z timezone parsing

Ted Unangst-6
Ingo Schwarze wrote:

> Now let's get to the more serious part.
> Hiltjo observed that %Z and %z produce wrong results.
>
> First of all, neither POSIX nor XPG define tm_gmtoff nor %Z nor %z:
>
>   https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/time.h.html
>   https://pubs.opengroup.org/onlinepubs/9699919799/functions/strptime.html
>
> So i think the best way to find out what tm_gmtoff should be is to
> understand how programs in our own tree use it.
>
> Here is a (hopefully) complete list of the users in OpenBSD base.
> The following files expect it to contain seconds:
>  - lib/libc/time/strftime.c
>  - lib/libc/time/wcsftime.c
>  - usr.sbin/smtpd/to.c
>  - usr.sbin/snmpd/mib.c
>  - usr.sbin/cron/cron.c
>  - usr.bin/ftp/util.c
>  - usr.bin/cvs/entries.c
>  - usr.bin/rcs/rcstime.c
>  - gnu/usr.bin/perl/time64.c
>
> I failed to find any users that do *not* expect seconds.
> So my conclusion is that the documentation is right and
> what the code in strptime.c does is wrong.
>
> Here is a patch to fix the code.

this looks good to me. ok.

Reply | Threaded
Open this post in threaded view
|

Re: [patch] improve strptime(3) %z timezone parsing

Todd C. Miller-3
In reply to this post by Ingo Schwarze
On Fri, 10 May 2019 16:52:35 +0200, Ingo Schwarze wrote:

> I failed to find any users that do *not* expect seconds.
> So my conclusion is that the documentation is right and
> what the code in strptime.c does is wrong.

Yes, tm_gmtoff is in seconds.

> Here is a patch to fix the code.

OK millert@ for that part.

> The change to %Z is exactly what Hiltjo sent.
> The current code for %z is unnecessarily complicated.
> Rather than fixing it, i simply rewrote it from scratch.
> I like it when a bugfix results in -28 +11 LOC and better readability.

I don't believe this handles the "[+-]hh" form.

 - todd

Reply | Threaded
Open this post in threaded view
|

Re: [patch] improve strptime(3) %z timezone parsing

Ingo Schwarze
Hi Todd,

Todd C. Miller wrote on Fri, May 10, 2019 at 02:08:45PM -0600:
> On Fri, 10 May 2019 16:52:35 +0200, Ingo Schwarze wrote:

>> Here is a patch to fix the code.

> OK millert@ for that part.

Thanks, committed.

>> The change to %Z is exactly what Hiltjo sent.
>> The current code for %z is unnecessarily complicated.
>> Rather than fixing it, i simply rewrote it from scratch.
>> I like it when a bugfix results in -28 +11 LOC and better readability.

> I don't believe this handles the "[+-]hh" form.

Ouch.  No, it does not.  Thanks for spotting the regression.

The following patch preserves the parsing behaviour
and correctly stores the number of seconds into tm_gmtoff.

OK?
  Ingo


 $ ./z +1
NULL
 $ ./z +02
7200
 $ ./z +02x
7200 (rest "x")
 $ ./z +02:
7200
 $ ./z +02:x
7200 (rest "x")
 $ ./z +02:1
NULL
 $ ./z +02:01
7260
 $ ./z +02:013
7260 (rest "3")


Index: strptime.c
===================================================================
RCS file: /cvs/src/lib/libc/time/strptime.c,v
retrieving revision 1.27
diff -u -p -r1.27 strptime.c
--- strptime.c 10 May 2019 20:24:58 -0000 1.27
+++ strptime.c 10 May 2019 20:43:35 -0000
@@ -519,32 +519,17 @@ literal:
  }
  return NULL;
  }
- offs = 0;
- for (i = 0; i < 4; ) {
- if (isdigit(*bp)) {
- offs = offs * 10 + (*bp++ - '0');
- i++;
- continue;
- }
- if (i == 2 && *bp == ':') {
- bp++;
- continue;
- }
- break;
- }
- switch (i) {
- case 2:
- offs *= 100;
- break;
- case 4:
- i = offs % 100;
- if (i >= 60)
- return NULL;
- /* Convert minutes into decimal */
- offs = (offs / 100) * 100 + (i * 50) / 30;
- break;
- default:
+ if (!isdigit(bp[0]) || !isdigit(bp[1]))
  return NULL;
+ offs = ((bp[0]-'0') * 10 + (bp[1]-'0')) * SECSPERHOUR;
+ bp += 2;
+ if (*bp == ':')
+ bp++;
+ if (isdigit(*bp)) {
+ offs += (*bp++ - '0') * 10 * SECSPERMIN;
+ if (!isdigit(*bp))
+ return NULL;
+ offs += (*bp++ - '0') * SECSPERMIN;
  }
  if (neg)
  offs = -offs;

Reply | Threaded
Open this post in threaded view
|

Re: [patch] improve strptime(3) %z timezone parsing

Ted Unangst-6
Ingo Schwarze wrote:
> Ouch.  No, it does not.  Thanks for spotting the regression.
>
> The following patch preserves the parsing behaviour
> and correctly stores the number of seconds into tm_gmtoff.

oops, missed that case too. this looks correct.

Reply | Threaded
Open this post in threaded view
|

Re: [patch] improve strptime(3) %z timezone parsing

Todd C. Miller-3
In reply to this post by Ingo Schwarze
On Fri, 10 May 2019 23:06:21 +0200, Ingo Schwarze wrote:

> The following patch preserves the parsing behaviour
> and correctly stores the number of seconds into tm_gmtoff.

That one looks correct.  OK millert@

 - todd

Reply | Threaded
Open this post in threaded view
|

Re: [patch] improve strptime(3) %z timezone parsing

Ingo Schwarze
Hi,

> That one looks correct.  OK millert@

Committed, thanks for checking!


While here, i noticed ugly preprocessor macros.
Let's make our future life easier by unifdefing a bit.
When compiling with -g0, there is no object change.

Note that if TM_ZONE is not defined, wcsftime.c doesn't
currently even compile.  Talk about code in the tree that
is never tested.

OK?
  Ingo


Index: localtime.c
===================================================================
RCS file: /cvs/src/lib/libc/time/localtime.c,v
retrieving revision 1.59
diff -u -p -r1.59 localtime.c
--- localtime.c 19 Sep 2016 12:48:21 -0000 1.59
+++ localtime.c 10 May 2019 22:17:14 -0000
@@ -46,7 +46,7 @@
 ** in which Daylight Saving Time is never observed.
 ** 4. They might reference tzname[0] after setting to a time zone
 ** in which Standard Time is never observed.
-** 5. They might reference tm.TM_ZONE after calling offtime.
+** 5. They might reference tm.tm_zone after calling offtime.
 ** What's best to do in the above cases is open to debate;
 ** for now, we just set things up so that in any of the five cases
 ** WILDABBR is used. Another possibility: initialize tzname[0] to the
@@ -214,14 +214,8 @@ DEF_WEAK(tzname);
 
 static struct tm tm;
 
-#ifdef USG_COMPAT
 long timezone = 0;
 int daylight = 0;
-#endif /* defined USG_COMPAT */
-
-#ifdef ALTZONE
-time_t altzone = 0;
-#endif /* defined ALTZONE */
 
 static long
 detzcode(const char *codep)
@@ -255,13 +249,8 @@ settzname(void)
 
  tzname[0] = wildabbr;
  tzname[1] = wildabbr;
-#ifdef USG_COMPAT
  daylight = 0;
  timezone = 0;
-#endif /* defined USG_COMPAT */
-#ifdef ALTZONE
- altzone = 0;
-#endif /* defined ALTZONE */
  if (sp == NULL) {
  tzname[0] = tzname[1] = (char *)gmt;
  return;
@@ -273,16 +262,10 @@ settzname(void)
  const struct ttinfo *ttisp = &sp->ttis[sp->types[i]];
 
  tzname[ttisp->tt_isdst] = &sp->chars[ttisp->tt_abbrind];
-#ifdef USG_COMPAT
  if (ttisp->tt_isdst)
  daylight = 1;
  if (!ttisp->tt_isdst)
  timezone = -(ttisp->tt_gmtoff);
-#endif /* defined USG_COMPAT */
-#ifdef ALTZONE
- if (ttisp->tt_isdst)
- altzone = -(ttisp->tt_gmtoff);
-#endif /* defined ALTZONE */
  }
  /*
  ** Finally, scrub the abbreviations.
@@ -1274,9 +1257,7 @@ localsub(const time_t *timep, long offse
  result = timesub(&t, ttisp->tt_gmtoff, sp, tmp);
  tmp->tm_isdst = ttisp->tt_isdst;
  tzname[tmp->tm_isdst] = &sp->chars[ttisp->tt_abbrind];
-#ifdef TM_ZONE
- tmp->TM_ZONE = &sp->chars[ttisp->tt_abbrind];
-#endif /* defined TM_ZONE */
+ tmp->tm_zone = &sp->chars[ttisp->tt_abbrind];
  return result;
 }
 
@@ -1325,21 +1306,19 @@ gmtsub(const time_t *timep, long offset,
  }
  _THREAD_PRIVATE_MUTEX_UNLOCK(gmt);
  result = timesub(timep, offset, gmtptr, tmp);
-#ifdef TM_ZONE
  /*
  ** Could get fancy here and deliver something such as
  ** "UTC+xxxx" or "UTC-xxxx" if offset is non-zero,
  ** but this is no time for a treasure hunt.
  */
  if (offset != 0)
- tmp->TM_ZONE = wildabbr;
+ tmp->tm_zone = wildabbr;
  else {
  if (gmtptr == NULL)
- tmp->TM_ZONE = (char *)gmt;
+ tmp->tm_zone = (char *)gmt;
  else
- tmp->TM_ZONE = gmtptr->chars;
+ tmp->tm_zone = gmtptr->chars;
  }
-#endif /* defined TM_ZONE */
  return result;
 }
 
@@ -1508,9 +1487,7 @@ timesub(const time_t *timep, long offset
  idays -= ip[tmp->tm_mon];
  tmp->tm_mday = (int) (idays + 1);
  tmp->tm_isdst = 0;
-#ifdef TM_GMTOFF
- tmp->TM_GMTOFF = offset;
-#endif /* defined TM_GMTOFF */
+ tmp->tm_gmtoff = offset;
  return tmp;
 }
 
Index: private.h
===================================================================
RCS file: /cvs/src/lib/libc/time/private.h,v
retrieving revision 1.38
diff -u -p -r1.38 private.h
--- private.h 24 Oct 2015 18:13:18 -0000 1.38
+++ private.h 10 May 2019 22:17:14 -0000
@@ -9,12 +9,9 @@
 */
 
 /* OpenBSD defaults */
-#define TM_GMTOFF tm_gmtoff
-#define TM_ZONE tm_zone
 #define PCTS 1
 #define ALL_STATE 1
 #define STD_INSPIRED 1
-#define USG_COMPAT 1
 
 /*
 ** This header is for use ONLY with the time conversion code.
Index: strftime.c
===================================================================
RCS file: /cvs/src/lib/libc/time/strftime.c,v
retrieving revision 1.30
diff -u -p -r1.30 strftime.c
--- strftime.c 21 Sep 2016 04:38:57 -0000 1.30
+++ strftime.c 10 May 2019 22:17:14 -0000
@@ -456,12 +456,9 @@ label:
  pt, ptlim);
  continue;
  case 'Z':
-#ifdef TM_ZONE
- if (t->TM_ZONE != NULL)
- pt = _add(t->TM_ZONE, pt, ptlim);
- else
-#endif /* defined TM_ZONE */
- if (t->tm_isdst >= 0)
+ if (t->tm_zone != NULL)
+ pt = _add(t->tm_zone, pt, ptlim);
+ else if (t->tm_isdst >= 0)
  pt = _add(tzname[t->tm_isdst != 0],
  pt, ptlim);
  /*
@@ -477,41 +474,7 @@ label:
 
  if (t->tm_isdst < 0)
  continue;
-#ifdef TM_GMTOFF
- diff = t->TM_GMTOFF;
-#else /* !defined TM_GMTOFF */
- /*
- ** C99 says that the UTC offset must
- ** be computed by looking only at
- ** tm_isdst. This requirement is
- ** incorrect, since it means the code
- ** must rely on magic (in this case
- ** altzone and timezone), and the
- ** magic might not have the correct
- ** offset. Doing things correctly is
- ** tricky and requires disobeying C99;
- ** see GNU C strftime for details.
- ** For now, punt and conform to the
- ** standard, even though it's incorrect.
- **
- ** C99 says that %z must be replaced by the
- ** empty string if the time zone is not
- ** determinable, so output nothing if the
- ** appropriate variables are not available.
- */
- if (t->tm_isdst == 0)
-#ifdef USG_COMPAT
- diff = -timezone;
-#else /* !defined USG_COMPAT */
- continue;
-#endif /* !defined USG_COMPAT */
- else
-#ifdef ALTZONE
- diff = -altzone;
-#else /* !defined ALTZONE */
- continue;
-#endif /* !defined ALTZONE */
-#endif /* !defined TM_GMTOFF */
+ diff = t->tm_gmtoff;
  if (diff < 0) {
  sign = "-";
  diff = -diff;
Index: strptime.c
===================================================================
RCS file: /cvs/src/lib/libc/time/strptime.c,v
retrieving revision 1.29
diff -u -p -r1.29 strptime.c
--- strptime.c 10 May 2019 21:39:05 -0000 1.29
+++ strptime.c 10 May 2019 22:17:14 -0000
@@ -416,21 +416,13 @@ literal:
  tzset();
  if (strncmp((const char *)bp, gmt, 3) == 0) {
  tm->tm_isdst = 0;
-#ifdef TM_GMTOFF
- tm->TM_GMTOFF = 0;
-#endif
-#ifdef TM_ZONE
- tm->TM_ZONE = gmt;
-#endif
+ tm->tm_gmtoff = 0;
+ tm->tm_zone = gmt;
  bp += 3;
  } else if (strncmp((const char *)bp, utc, 3) == 0) {
  tm->tm_isdst = 0;
-#ifdef TM_GMTOFF
- tm->TM_GMTOFF = 0;
-#endif
-#ifdef TM_ZONE
- tm->TM_ZONE = utc;
-#endif
+ tm->tm_gmtoff = 0;
+ tm->tm_zone = utc;
  bp += 3;
  } else {
  ep = _find_string(bp, &i,
@@ -440,12 +432,8 @@ literal:
  return (NULL);
 
  tm->tm_isdst = i;
-#ifdef TM_GMTOFF
- tm->TM_GMTOFF = -(timezone);
-#endif
-#ifdef TM_ZONE
- tm->TM_ZONE = tzname[i];
-#endif
+ tm->tm_gmtoff = -(timezone);
+ tm->tm_zone = tzname[i];
  bp = ep;
  }
  continue;
@@ -479,12 +467,8 @@ literal:
  /*FALLTHROUGH*/
  case 'Z':
  tm->tm_isdst = 0;
-#ifdef TM_GMTOFF
- tm->TM_GMTOFF = 0;
-#endif
-#ifdef TM_ZONE
- tm->TM_ZONE = utc;
-#endif
+ tm->tm_gmtoff = 0;
+ tm->tm_zone = utc;
  continue;
  case '+':
  neg = 0;
@@ -496,24 +480,16 @@ literal:
  --bp;
  ep = _find_string(bp, &i, nast, NULL, 4);
  if (ep != NULL) {
-#ifdef TM_GMTOFF
- tm->TM_GMTOFF = (-5 - i) * SECSPERHOUR;
-#endif
-#ifdef TM_ZONE
- tm->TM_ZONE = (char *)nast[i];
-#endif
+ tm->tm_gmtoff = (-5 - i) * SECSPERHOUR;
+ tm->tm_zone = (char *)nast[i];
  bp = ep;
  continue;
  }
  ep = _find_string(bp, &i, nadt, NULL, 4);
  if (ep != NULL) {
  tm->tm_isdst = 1;
-#ifdef TM_GMTOFF
- tm->TM_GMTOFF = (-4 - i) * SECSPERHOUR;
-#endif
-#ifdef TM_ZONE
- tm->TM_ZONE = (char *)nadt[i];
-#endif
+ tm->tm_gmtoff = (-4 - i) * SECSPERHOUR;
+ tm->tm_zone = (char *)nadt[i];
  bp = ep;
  continue;
  }
@@ -534,12 +510,8 @@ literal:
  if (neg)
  offs = -offs;
  tm->tm_isdst = 0; /* XXX */
-#ifdef TM_GMTOFF
- tm->TM_GMTOFF = offs;
-#endif
-#ifdef TM_ZONE
- tm->TM_ZONE = NULL; /* XXX */
-#endif
+ tm->tm_gmtoff = offs;
+ tm->tm_zone = NULL; /* XXX */
  continue;
 
  /*
Index: wcsftime.c
===================================================================
RCS file: /cvs/src/lib/libc/time/wcsftime.c,v
retrieving revision 1.6
diff -u -p -r1.6 wcsftime.c
--- wcsftime.c 9 Feb 2015 14:52:28 -0000 1.6
+++ wcsftime.c 10 May 2019 22:17:14 -0000
@@ -439,7 +439,7 @@ label:
  continue;
  case 'Z':
  if (t->tm_zone != NULL)
- pt = _sadd(t->TM_ZONE, pt, ptlim);
+ pt = _sadd(t->tm_zone, pt, ptlim);
  else
  if (t->tm_isdst >= 0)
  pt = _sadd(tzname[t->tm_isdst != 0],

Reply | Threaded
Open this post in threaded view
|

Re: [patch] improve strptime(3) %z timezone parsing

Todd C. Miller-3
On Sat, 11 May 2019 00:30:35 +0200, Ingo Schwarze wrote:

> While here, i noticed ugly preprocessor macros.
> Let's make our future life easier by unifdefing a bit.
> When compiling with -g0, there is no object change.

No objection, OK millert@

 - todd