fsck_ffs: faster with lots of cylinder groups

Previous Topic Next Topic
 
classic Classic list List threaded Threaded
5 messages Options
Reply | Threaded
Open this post in threaded view
|

fsck_ffs: faster with lots of cylinder groups

Otto Moerbeek
Hi,

both phase 1 and phase 5 need cylinder group metadata.  This diff
keeps the cg data read in phase 1 in memory to be used by phase 5 if
possible. From FreeBSD.

        -Otto

On an empty 30T fileystem:

$ time obj/fsck_ffs -f /dev/sd3a
    2m44.10s real     0m13.21s user     0m07.38s system

$ time doas obj/fsck_ffs -f /dev/sd3a
    1m32.81s real     0m12.86s user     0m05.25s system

The difference will be less if a fileystem is filled up, but still nice.

        -Otto

Index: fsck.h
===================================================================
RCS file: /cvs/src/sbin/fsck_ffs/fsck.h,v
retrieving revision 1.32
diff -u -p -r1.32 fsck.h
--- fsck.h 5 Jan 2018 09:33:47 -0000 1.32
+++ fsck.h 21 Jun 2020 12:48:50 -0000
@@ -136,7 +136,6 @@ struct bufarea {
 struct bufarea bufhead; /* head of list of other blks in filesys */
 struct bufarea sblk; /* file system superblock */
 struct bufarea asblk; /* alternate file system superblock */
-struct bufarea cgblk; /* cylinder group blocks */
 struct bufarea *pdirbp; /* current directory contents */
 struct bufarea *pbp; /* current inode block */
 struct bufarea *getdatablk(daddr_t, long);
@@ -148,9 +147,7 @@ struct bufarea *getdatablk(daddr_t, long
  (bp)->b_flags = 0;
 
 #define sbdirty() sblk.b_dirty = 1
-#define cgdirty() cgblk.b_dirty = 1
 #define sblock (*sblk.b_un.b_fs)
-#define cgrp (*cgblk.b_un.b_cg)
 
 enum fixstate {DONTKNOW, NOFIX, FIX, IGNORE};
 
@@ -275,9 +272,13 @@ struct ufs2_dinode ufs2_zino;
 #define FOUND 0x10
 
 union dinode *ginode(ino_t);
+struct bufarea *cglookup(u_int cg);
 struct inoinfo *getinoinfo(ino_t);
 void getblk(struct bufarea *, daddr_t, long);
 ino_t allocino(ino_t, int);
+void *Malloc(size_t);
+void *Calloc(size_t, size_t);
+void *Reallocarray(void *, size_t, size_t);
 
 int (*info_fn)(char *, size_t);
 char *info_filesys;
Index: inode.c
===================================================================
RCS file: /cvs/src/sbin/fsck_ffs/inode.c,v
retrieving revision 1.49
diff -u -p -r1.49 inode.c
--- inode.c 16 Sep 2018 02:43:11 -0000 1.49
+++ inode.c 21 Jun 2020 12:48:50 -0000
@@ -370,7 +370,7 @@ setinodebuf(ino_t inum)
  partialsize = inobufsize;
  }
  if (inodebuf == NULL &&
-    (inodebuf = malloc((unsigned)inobufsize)) == NULL)
+    (inodebuf = Malloc((unsigned)inobufsize)) == NULL)
  errexit("Cannot allocate space for inode buffer\n");
 }
 
@@ -401,7 +401,7 @@ cacheino(union dinode *dp, ino_t inumber
  blks = howmany(DIP(dp, di_size), sblock.fs_bsize);
  if (blks > NDADDR)
  blks = NDADDR + NIADDR;
- inp = malloc(sizeof(*inp) + (blks ? blks - 1 : 0) * sizeof(daddr_t));
+ inp = Malloc(sizeof(*inp) + (blks ? blks - 1 : 0) * sizeof(daddr_t));
  if (inp == NULL)
  errexit("cannot allocate memory for inode cache\n");
  inpp = &inphead[inumber % numdirs];
@@ -423,10 +423,10 @@ cacheino(union dinode *dp, ino_t inumber
  inp->i_blks[NDADDR + i] = DIP(dp, di_ib[i]);
  if (inplast == listmax) {
  newlistmax = listmax + 100;
- newinpsort = reallocarray(inpsort,
+ newinpsort = Reallocarray(inpsort,
     (unsigned)newlistmax, sizeof(struct inoinfo *));
  if (newinpsort == NULL)
- errexit("cannot increase directory list");
+ errexit("cannot increase directory list\n");
  inpsort = newinpsort;
  listmax = newlistmax;
  }
@@ -582,7 +582,8 @@ allocino(ino_t request, int type)
 {
  ino_t ino;
  union dinode *dp;
- struct cg *cgp = &cgrp;
+ struct bufarea *cgbp;
+ struct cg *cgp;
  int cg;
  time_t t;
  struct inostat *info;
@@ -602,7 +603,7 @@ allocino(ino_t request, int type)
  unsigned long newalloced, i;
  newalloced = MINIMUM(sblock.fs_ipg,
  MAXIMUM(2 * inostathead[cg].il_numalloced, 10));
- info = calloc(newalloced, sizeof(struct inostat));
+ info = Calloc(newalloced, sizeof(struct inostat));
  if (info == NULL) {
  pwarn("cannot alloc %zu bytes to extend inoinfo\n",
  sizeof(struct inostat) * newalloced);
@@ -619,7 +620,8 @@ allocino(ino_t request, int type)
  inostathead[cg].il_numalloced = newalloced;
  info = inoinfo(ino);
  }
- getblk(&cgblk, cgtod(&sblock, cg), sblock.fs_cgsize);
+ cgbp = cglookup(cg);
+ cgp = cgbp->b_un.b_cg;
  if (!cg_chkmagic(cgp))
  pfatal("CG %d: BAD MAGIC NUMBER\n", cg);
  setbit(cg_inosused(cgp), ino % sblock.fs_ipg);
@@ -637,7 +639,7 @@ allocino(ino_t request, int type)
  default:
  return (0);
  }
- cgdirty();
+ dirty(cgbp);
  dp = ginode(ino);
  DIP_SET(dp, di_db[0],  allocblk(1));
  if (DIP(dp, di_db[0]) == 0) {
Index: pass1.c
===================================================================
RCS file: /cvs/src/sbin/fsck_ffs/pass1.c,v
retrieving revision 1.46
diff -u -p -r1.46 pass1.c
--- pass1.c 20 Jun 2020 07:49:04 -0000 1.46
+++ pass1.c 21 Jun 2020 12:48:50 -0000
@@ -66,6 +66,8 @@ pass1(void)
  ino_t inumber, inosused, ninosused;
  size_t inospace;
  struct inostat *info;
+ struct bufarea *cgbp;
+ struct cg *cgp;
  u_int c;
  struct inodesc idesc;
  daddr_t i, cgd;
@@ -99,9 +101,10 @@ pass1(void)
  for (c = 0; c < sblock.fs_ncg; c++) {
  inumber = c * sblock.fs_ipg;
  setinodebuf(inumber);
- getblk(&cgblk, cgtod(&sblock, c), sblock.fs_cgsize);
+ cgbp = cglookup(c);
+ cgp = cgbp->b_un.b_cg;
  if (sblock.fs_magic == FS_UFS2_MAGIC) {
- inosused = cgrp.cg_initediblk;
+ inosused = cgp->cg_initediblk;
  if (inosused > sblock.fs_ipg)
  inosused = sblock.fs_ipg;
  } else
@@ -115,7 +118,7 @@ pass1(void)
  * read only those inodes in from disk.
  */
  if (preen && usedsoftdep) {
- cp = &cg_inosused(&cgrp)[(inosused - 1) / CHAR_BIT];
+ cp = &cg_inosused(cgp)[(inosused - 1) / CHAR_BIT];
  for ( ; inosused != 0; cp--) {
  if (*cp == 0) {
  if (inosused > CHAR_BIT)
@@ -140,10 +143,10 @@ pass1(void)
  inostathead[c].il_stat = 0;
  continue;
  }
- info = calloc((unsigned)inosused, sizeof(struct inostat));
+ info = Calloc((unsigned)inosused, sizeof(struct inostat));
  inospace = (unsigned)inosused * sizeof(struct inostat);
  if (info == NULL)
- errexit("cannot alloc %zu bytes for inoinfo", inospace);
+ errexit("cannot alloc %zu bytes for inoinfo\n", inospace);
  inostathead[c].il_stat = info;
  /*
  * Scan the allocated inodes.
@@ -179,7 +182,7 @@ pass1(void)
  struct inostat *ninfo;
  size_t ninospace;
 
- ninfo = reallocarray(info, ninosused, sizeof(*ninfo));
+ ninfo = Reallocarray(info, ninosused, sizeof(*ninfo));
  if (ninfo == NULL) {
  pfatal("too many inodes %llu, or out of memory\n",
     (unsigned long long)ninosused);
@@ -300,7 +303,7 @@ checkinode(ino_t inumber, struct inodesc
  n_files++;
  ILNCOUNT(inumber) = DIP(dp, di_nlink);
  if (DIP(dp, di_nlink) <= 0) {
- zlnp = malloc(sizeof *zlnp);
+ zlnp = Malloc(sizeof *zlnp);
  if (zlnp == NULL) {
  pfatal("LINK COUNT TABLE OVERFLOW");
  if (reply("CONTINUE") == 0) {
@@ -392,7 +395,7 @@ pass1check(struct inodesc *idesc)
  }
  return (STOP);
  }
- new = malloc(sizeof(struct dups));
+ new = Malloc(sizeof(struct dups));
  if (new == NULL) {
  pfatal("DUP TABLE OVERFLOW.");
  if (reply("CONTINUE") == 0) {
Index: pass5.c
===================================================================
RCS file: /cvs/src/sbin/fsck_ffs/pass5.c,v
retrieving revision 1.49
diff -u -p -r1.49 pass5.c
--- pass5.c 20 Jun 2020 07:49:04 -0000 1.49
+++ pass5.c 21 Jun 2020 12:48:50 -0000
@@ -65,7 +65,6 @@ pass5(void)
  u_int c;
  int inomapsize, blkmapsize;
  struct fs *fs = &sblock;
- struct cg *cg = &cgrp;
  daddr_t dbase, dmax;
  daddr_t d;
  long i, k, rewritecg = 0;
@@ -76,6 +75,8 @@ pass5(void)
  char buf[MAXBSIZE];
  struct cg *newcg = (struct cg *)buf;
  struct ocg *ocg = (struct ocg *)buf;
+ struct cg *cg;
+ struct bufarea *cgbp;
 
  memset(newcg, 0, (size_t)fs->fs_cgsize);
  if (cvtlevel >= 3) {
@@ -179,7 +180,8 @@ pass5(void)
  info_fn = pass5_info;
  for (c = 0; c < fs->fs_ncg; c++) {
  info_cg = c;
- getblk(&cgblk, cgtod(fs, c), fs->fs_cgsize);
+ cgbp = cglookup(c);
+ cg = cgbp->b_un.b_cg;
  if (!cg_chkmagic(cg))
  pfatal("CG %u: BAD MAGIC NUMBER\n", c);
  dbase = cgbase(fs, c);
@@ -323,13 +325,13 @@ pass5(void)
  }
  if (rewritecg) {
  memcpy(cg, newcg, (size_t)fs->fs_cgsize);
- cgdirty();
+ dirty(cgbp);
  continue;
  }
  if (memcmp(newcg, cg, basesize) &&
     dofix(&idesc[2], "SUMMARY INFORMATION BAD")) {
  memcpy(cg, newcg, (size_t)basesize);
- cgdirty();
+ dirty(cgbp);
  }
  if (usedsoftdep) {
  for (i = 0; i < inomapsize; i++) {
@@ -364,7 +366,7 @@ pass5(void)
     dofix(&idesc[1], "BLK(S) MISSING IN BIT MAPS")) {
  memmove(cg_inosused(cg), cg_inosused(newcg),
  (size_t)mapsize);
- cgdirty();
+ dirty(cgbp);
  }
  }
  info_fn = NULL;
Index: utilities.c
===================================================================
RCS file: /cvs/src/sbin/fsck_ffs/utilities.c,v
retrieving revision 1.53
diff -u -p -r1.53 utilities.c
--- utilities.c 20 Jun 2020 07:49:04 -0000 1.53
+++ utilities.c 21 Jun 2020 12:48:50 -0000
@@ -51,7 +51,8 @@
 #include "fsck.h"
 #include "extern.h"
 
-long diskreads, totalreads; /* Disk cache statistics */
+long diskreads, totalreads; /* Disk cache statistics */
+static struct bufarea cgblk; /* backup buffer for cylinder group blocks */
 
 static void rwerror(char *, daddr_t);
 
@@ -176,6 +177,39 @@ bufinit(void)
 }
 
 /*
+ * Manage cylinder group buffers.
+ */
+static struct bufarea *cgbufs; /* header for cylinder group cache */
+static int flushtries; /* number of tries to reclaim memory */
+struct bufarea *
+cglookup(u_int cg)
+{
+ struct bufarea *cgbp;
+ struct cg *cgp;
+
+ if (cgbufs == NULL) {
+ cgbufs = calloc(sblock.fs_ncg, sizeof(struct bufarea));
+ if (cgbufs == NULL)
+ errexit("cannot allocate cylinder group buffers");
+ }
+ cgbp = &cgbufs[cg];
+ if (cgbp->b_un.b_cg != NULL)
+ return (cgbp);
+ cgp = NULL;
+ if (flushtries == 0)
+ cgp = malloc((unsigned int)sblock.fs_cgsize);
+ if (cgp == NULL) {
+ getblk(&cgblk, cgtod(&sblock, cg), sblock.fs_cgsize);
+ return (&cgblk);
+ }
+ cgbp->b_un.b_cg = cgp;
+ initbarea(cgbp);
+ getblk(cgbp, cgtod(&sblock, cg), sblock.fs_cgsize);
+ return (cgbp);
+}
+
+
+/*
  * Manage a cache of directory blocks.
  */
 struct bufarea *
@@ -305,6 +339,15 @@ ckfini(int markclean)
  }
  if (bufhead.b_size != cnt)
  errexit("Panic: lost %d buffers\n", bufhead.b_size - cnt);
+ if (cgbufs != NULL) {
+ for (cnt = 0; cnt < sblock.fs_ncg; cnt++) {
+ if (cgbufs[cnt].b_un.b_cg == NULL)
+ continue;
+ flush(fswritefd, &cgbufs[cnt]);
+ free(cgbufs[cnt].b_un.b_cg);
+ }
+ free(cgbufs);
+ }
  pbp = pdirbp = NULL;
  if (markclean && (sblock.fs_clean & FS_ISCLEAN) == 0) {
  /*
@@ -400,7 +443,8 @@ allocblk(int frags)
 {
  daddr_t i, baseblk;
  int j, k, cg;
- struct cg *cgp = &cgrp;
+ struct bufarea *cgbp;
+ struct cg *cgp;
 
  if (frags <= 0 || frags > sblock.fs_frag)
  return (0);
@@ -416,7 +460,8 @@ allocblk(int frags)
  continue;
  }
  cg = dtog(&sblock, i + j);
- getblk(&cgblk, cgtod(&sblock, cg), sblock.fs_cgsize);
+ cgbp = cglookup(cg);
+ cgp = cgbp->b_un.b_cg;
  if (!cg_chkmagic(cgp))
  pfatal("CG %d: BAD MAGIC NUMBER\n", cg);
  baseblk = dtogd(&sblock, i + j);
@@ -614,4 +659,68 @@ catchinfo(int signo)
  writev(info_fd, iov, 4);
  }
  errno = save_errno;
+}
+/*
+ * Attempt to flush a cylinder group cache entry.
+ * Return whether the flush was successful.
+ */
+static int
+flushentry(void)
+{
+ struct bufarea *cgbp;
+
+ if (flushtries == sblock.fs_ncg || cgbufs == NULL)
+ return (0);
+ cgbp = &cgbufs[flushtries++];
+ if (cgbp->b_un.b_cg == NULL)
+ return (0);
+ flush(fswritefd, cgbp);
+ free(cgbp->b_un.b_buf);
+ cgbp->b_un.b_buf = NULL;
+ return (1);
+}
+
+/*
+ * Wrapper for malloc() that flushes the cylinder group cache to try
+ * to get space.
+ */
+void *
+Malloc(size_t size)
+{
+ void *retval;
+
+ while ((retval = malloc(size)) == NULL)
+ if (flushentry() == 0)
+ break;
+ return (retval);
+}
+
+/*
+ * Wrapper for calloc() that flushes the cylinder group cache to try
+ * to get space.
+ */
+void*
+Calloc(size_t cnt, size_t size)
+{
+ void *retval;
+
+ while ((retval = calloc(cnt, size)) == NULL)
+ if (flushentry() == 0)
+ break;
+ return (retval);
+}
+
+/*
+ * Wrapper for reallocarray() that flushes the cylinder group cache to try
+ * to get space.
+ */
+void*
+Reallocarray(void *p, size_t cnt, size_t size)
+{
+ void *retval;
+
+ while ((retval = reallocarray(p, cnt, size)) == NULL)
+ if (flushentry() == 0)
+ break;
+ return (retval);
 }


Reply | Threaded
Open this post in threaded view
|

Re: fsck_ffs: faster with lots of cylinder groups

Otto Moerbeek
On Sun, Jun 21, 2020 at 03:35:21PM +0200, Otto Moerbeek wrote:

> Hi,
>
> both phase 1 and phase 5 need cylinder group metadata.  This diff
> keeps the cg data read in phase 1 in memory to be used by phase 5 if
> possible. From FreeBSD.
>
> -Otto
>
> On an empty 30T fileystem:
>
> $ time obj/fsck_ffs -f /dev/sd3a
>     2m44.10s real     0m13.21s user     0m07.38s system
>
> $ time doas obj/fsck_ffs -f /dev/sd3a
>     1m32.81s real     0m12.86s user     0m05.25s system
>
> The difference will be less if a fileystem is filled up, but still nice.

Any takers?

        -Otto

>
> Index: fsck.h
> ===================================================================
> RCS file: /cvs/src/sbin/fsck_ffs/fsck.h,v
> retrieving revision 1.32
> diff -u -p -r1.32 fsck.h
> --- fsck.h 5 Jan 2018 09:33:47 -0000 1.32
> +++ fsck.h 21 Jun 2020 12:48:50 -0000
> @@ -136,7 +136,6 @@ struct bufarea {
>  struct bufarea bufhead; /* head of list of other blks in filesys */
>  struct bufarea sblk; /* file system superblock */
>  struct bufarea asblk; /* alternate file system superblock */
> -struct bufarea cgblk; /* cylinder group blocks */
>  struct bufarea *pdirbp; /* current directory contents */
>  struct bufarea *pbp; /* current inode block */
>  struct bufarea *getdatablk(daddr_t, long);
> @@ -148,9 +147,7 @@ struct bufarea *getdatablk(daddr_t, long
>   (bp)->b_flags = 0;
>  
>  #define sbdirty() sblk.b_dirty = 1
> -#define cgdirty() cgblk.b_dirty = 1
>  #define sblock (*sblk.b_un.b_fs)
> -#define cgrp (*cgblk.b_un.b_cg)
>  
>  enum fixstate {DONTKNOW, NOFIX, FIX, IGNORE};
>  
> @@ -275,9 +272,13 @@ struct ufs2_dinode ufs2_zino;
>  #define FOUND 0x10
>  
>  union dinode *ginode(ino_t);
> +struct bufarea *cglookup(u_int cg);
>  struct inoinfo *getinoinfo(ino_t);
>  void getblk(struct bufarea *, daddr_t, long);
>  ino_t allocino(ino_t, int);
> +void *Malloc(size_t);
> +void *Calloc(size_t, size_t);
> +void *Reallocarray(void *, size_t, size_t);
>  
>  int (*info_fn)(char *, size_t);
>  char *info_filesys;
> Index: inode.c
> ===================================================================
> RCS file: /cvs/src/sbin/fsck_ffs/inode.c,v
> retrieving revision 1.49
> diff -u -p -r1.49 inode.c
> --- inode.c 16 Sep 2018 02:43:11 -0000 1.49
> +++ inode.c 21 Jun 2020 12:48:50 -0000
> @@ -370,7 +370,7 @@ setinodebuf(ino_t inum)
>   partialsize = inobufsize;
>   }
>   if (inodebuf == NULL &&
> -    (inodebuf = malloc((unsigned)inobufsize)) == NULL)
> +    (inodebuf = Malloc((unsigned)inobufsize)) == NULL)
>   errexit("Cannot allocate space for inode buffer\n");
>  }
>  
> @@ -401,7 +401,7 @@ cacheino(union dinode *dp, ino_t inumber
>   blks = howmany(DIP(dp, di_size), sblock.fs_bsize);
>   if (blks > NDADDR)
>   blks = NDADDR + NIADDR;
> - inp = malloc(sizeof(*inp) + (blks ? blks - 1 : 0) * sizeof(daddr_t));
> + inp = Malloc(sizeof(*inp) + (blks ? blks - 1 : 0) * sizeof(daddr_t));
>   if (inp == NULL)
>   errexit("cannot allocate memory for inode cache\n");
>   inpp = &inphead[inumber % numdirs];
> @@ -423,10 +423,10 @@ cacheino(union dinode *dp, ino_t inumber
>   inp->i_blks[NDADDR + i] = DIP(dp, di_ib[i]);
>   if (inplast == listmax) {
>   newlistmax = listmax + 100;
> - newinpsort = reallocarray(inpsort,
> + newinpsort = Reallocarray(inpsort,
>      (unsigned)newlistmax, sizeof(struct inoinfo *));
>   if (newinpsort == NULL)
> - errexit("cannot increase directory list");
> + errexit("cannot increase directory list\n");
>   inpsort = newinpsort;
>   listmax = newlistmax;
>   }
> @@ -582,7 +582,8 @@ allocino(ino_t request, int type)
>  {
>   ino_t ino;
>   union dinode *dp;
> - struct cg *cgp = &cgrp;
> + struct bufarea *cgbp;
> + struct cg *cgp;
>   int cg;
>   time_t t;
>   struct inostat *info;
> @@ -602,7 +603,7 @@ allocino(ino_t request, int type)
>   unsigned long newalloced, i;
>   newalloced = MINIMUM(sblock.fs_ipg,
>   MAXIMUM(2 * inostathead[cg].il_numalloced, 10));
> - info = calloc(newalloced, sizeof(struct inostat));
> + info = Calloc(newalloced, sizeof(struct inostat));
>   if (info == NULL) {
>   pwarn("cannot alloc %zu bytes to extend inoinfo\n",
>   sizeof(struct inostat) * newalloced);
> @@ -619,7 +620,8 @@ allocino(ino_t request, int type)
>   inostathead[cg].il_numalloced = newalloced;
>   info = inoinfo(ino);
>   }
> - getblk(&cgblk, cgtod(&sblock, cg), sblock.fs_cgsize);
> + cgbp = cglookup(cg);
> + cgp = cgbp->b_un.b_cg;
>   if (!cg_chkmagic(cgp))
>   pfatal("CG %d: BAD MAGIC NUMBER\n", cg);
>   setbit(cg_inosused(cgp), ino % sblock.fs_ipg);
> @@ -637,7 +639,7 @@ allocino(ino_t request, int type)
>   default:
>   return (0);
>   }
> - cgdirty();
> + dirty(cgbp);
>   dp = ginode(ino);
>   DIP_SET(dp, di_db[0],  allocblk(1));
>   if (DIP(dp, di_db[0]) == 0) {
> Index: pass1.c
> ===================================================================
> RCS file: /cvs/src/sbin/fsck_ffs/pass1.c,v
> retrieving revision 1.46
> diff -u -p -r1.46 pass1.c
> --- pass1.c 20 Jun 2020 07:49:04 -0000 1.46
> +++ pass1.c 21 Jun 2020 12:48:50 -0000
> @@ -66,6 +66,8 @@ pass1(void)
>   ino_t inumber, inosused, ninosused;
>   size_t inospace;
>   struct inostat *info;
> + struct bufarea *cgbp;
> + struct cg *cgp;
>   u_int c;
>   struct inodesc idesc;
>   daddr_t i, cgd;
> @@ -99,9 +101,10 @@ pass1(void)
>   for (c = 0; c < sblock.fs_ncg; c++) {
>   inumber = c * sblock.fs_ipg;
>   setinodebuf(inumber);
> - getblk(&cgblk, cgtod(&sblock, c), sblock.fs_cgsize);
> + cgbp = cglookup(c);
> + cgp = cgbp->b_un.b_cg;
>   if (sblock.fs_magic == FS_UFS2_MAGIC) {
> - inosused = cgrp.cg_initediblk;
> + inosused = cgp->cg_initediblk;
>   if (inosused > sblock.fs_ipg)
>   inosused = sblock.fs_ipg;
>   } else
> @@ -115,7 +118,7 @@ pass1(void)
>   * read only those inodes in from disk.
>   */
>   if (preen && usedsoftdep) {
> - cp = &cg_inosused(&cgrp)[(inosused - 1) / CHAR_BIT];
> + cp = &cg_inosused(cgp)[(inosused - 1) / CHAR_BIT];
>   for ( ; inosused != 0; cp--) {
>   if (*cp == 0) {
>   if (inosused > CHAR_BIT)
> @@ -140,10 +143,10 @@ pass1(void)
>   inostathead[c].il_stat = 0;
>   continue;
>   }
> - info = calloc((unsigned)inosused, sizeof(struct inostat));
> + info = Calloc((unsigned)inosused, sizeof(struct inostat));
>   inospace = (unsigned)inosused * sizeof(struct inostat);
>   if (info == NULL)
> - errexit("cannot alloc %zu bytes for inoinfo", inospace);
> + errexit("cannot alloc %zu bytes for inoinfo\n", inospace);
>   inostathead[c].il_stat = info;
>   /*
>   * Scan the allocated inodes.
> @@ -179,7 +182,7 @@ pass1(void)
>   struct inostat *ninfo;
>   size_t ninospace;
>  
> - ninfo = reallocarray(info, ninosused, sizeof(*ninfo));
> + ninfo = Reallocarray(info, ninosused, sizeof(*ninfo));
>   if (ninfo == NULL) {
>   pfatal("too many inodes %llu, or out of memory\n",
>      (unsigned long long)ninosused);
> @@ -300,7 +303,7 @@ checkinode(ino_t inumber, struct inodesc
>   n_files++;
>   ILNCOUNT(inumber) = DIP(dp, di_nlink);
>   if (DIP(dp, di_nlink) <= 0) {
> - zlnp = malloc(sizeof *zlnp);
> + zlnp = Malloc(sizeof *zlnp);
>   if (zlnp == NULL) {
>   pfatal("LINK COUNT TABLE OVERFLOW");
>   if (reply("CONTINUE") == 0) {
> @@ -392,7 +395,7 @@ pass1check(struct inodesc *idesc)
>   }
>   return (STOP);
>   }
> - new = malloc(sizeof(struct dups));
> + new = Malloc(sizeof(struct dups));
>   if (new == NULL) {
>   pfatal("DUP TABLE OVERFLOW.");
>   if (reply("CONTINUE") == 0) {
> Index: pass5.c
> ===================================================================
> RCS file: /cvs/src/sbin/fsck_ffs/pass5.c,v
> retrieving revision 1.49
> diff -u -p -r1.49 pass5.c
> --- pass5.c 20 Jun 2020 07:49:04 -0000 1.49
> +++ pass5.c 21 Jun 2020 12:48:50 -0000
> @@ -65,7 +65,6 @@ pass5(void)
>   u_int c;
>   int inomapsize, blkmapsize;
>   struct fs *fs = &sblock;
> - struct cg *cg = &cgrp;
>   daddr_t dbase, dmax;
>   daddr_t d;
>   long i, k, rewritecg = 0;
> @@ -76,6 +75,8 @@ pass5(void)
>   char buf[MAXBSIZE];
>   struct cg *newcg = (struct cg *)buf;
>   struct ocg *ocg = (struct ocg *)buf;
> + struct cg *cg;
> + struct bufarea *cgbp;
>  
>   memset(newcg, 0, (size_t)fs->fs_cgsize);
>   if (cvtlevel >= 3) {
> @@ -179,7 +180,8 @@ pass5(void)
>   info_fn = pass5_info;
>   for (c = 0; c < fs->fs_ncg; c++) {
>   info_cg = c;
> - getblk(&cgblk, cgtod(fs, c), fs->fs_cgsize);
> + cgbp = cglookup(c);
> + cg = cgbp->b_un.b_cg;
>   if (!cg_chkmagic(cg))
>   pfatal("CG %u: BAD MAGIC NUMBER\n", c);
>   dbase = cgbase(fs, c);
> @@ -323,13 +325,13 @@ pass5(void)
>   }
>   if (rewritecg) {
>   memcpy(cg, newcg, (size_t)fs->fs_cgsize);
> - cgdirty();
> + dirty(cgbp);
>   continue;
>   }
>   if (memcmp(newcg, cg, basesize) &&
>      dofix(&idesc[2], "SUMMARY INFORMATION BAD")) {
>   memcpy(cg, newcg, (size_t)basesize);
> - cgdirty();
> + dirty(cgbp);
>   }
>   if (usedsoftdep) {
>   for (i = 0; i < inomapsize; i++) {
> @@ -364,7 +366,7 @@ pass5(void)
>      dofix(&idesc[1], "BLK(S) MISSING IN BIT MAPS")) {
>   memmove(cg_inosused(cg), cg_inosused(newcg),
>   (size_t)mapsize);
> - cgdirty();
> + dirty(cgbp);
>   }
>   }
>   info_fn = NULL;
> Index: utilities.c
> ===================================================================
> RCS file: /cvs/src/sbin/fsck_ffs/utilities.c,v
> retrieving revision 1.53
> diff -u -p -r1.53 utilities.c
> --- utilities.c 20 Jun 2020 07:49:04 -0000 1.53
> +++ utilities.c 21 Jun 2020 12:48:50 -0000
> @@ -51,7 +51,8 @@
>  #include "fsck.h"
>  #include "extern.h"
>  
> -long diskreads, totalreads; /* Disk cache statistics */
> +long diskreads, totalreads; /* Disk cache statistics */
> +static struct bufarea cgblk; /* backup buffer for cylinder group blocks */
>  
>  static void rwerror(char *, daddr_t);
>  
> @@ -176,6 +177,39 @@ bufinit(void)
>  }
>  
>  /*
> + * Manage cylinder group buffers.
> + */
> +static struct bufarea *cgbufs; /* header for cylinder group cache */
> +static int flushtries; /* number of tries to reclaim memory */
> +struct bufarea *
> +cglookup(u_int cg)
> +{
> + struct bufarea *cgbp;
> + struct cg *cgp;
> +
> + if (cgbufs == NULL) {
> + cgbufs = calloc(sblock.fs_ncg, sizeof(struct bufarea));
> + if (cgbufs == NULL)
> + errexit("cannot allocate cylinder group buffers");
> + }
> + cgbp = &cgbufs[cg];
> + if (cgbp->b_un.b_cg != NULL)
> + return (cgbp);
> + cgp = NULL;
> + if (flushtries == 0)
> + cgp = malloc((unsigned int)sblock.fs_cgsize);
> + if (cgp == NULL) {
> + getblk(&cgblk, cgtod(&sblock, cg), sblock.fs_cgsize);
> + return (&cgblk);
> + }
> + cgbp->b_un.b_cg = cgp;
> + initbarea(cgbp);
> + getblk(cgbp, cgtod(&sblock, cg), sblock.fs_cgsize);
> + return (cgbp);
> +}
> +
> +
> +/*
>   * Manage a cache of directory blocks.
>   */
>  struct bufarea *
> @@ -305,6 +339,15 @@ ckfini(int markclean)
>   }
>   if (bufhead.b_size != cnt)
>   errexit("Panic: lost %d buffers\n", bufhead.b_size - cnt);
> + if (cgbufs != NULL) {
> + for (cnt = 0; cnt < sblock.fs_ncg; cnt++) {
> + if (cgbufs[cnt].b_un.b_cg == NULL)
> + continue;
> + flush(fswritefd, &cgbufs[cnt]);
> + free(cgbufs[cnt].b_un.b_cg);
> + }
> + free(cgbufs);
> + }
>   pbp = pdirbp = NULL;
>   if (markclean && (sblock.fs_clean & FS_ISCLEAN) == 0) {
>   /*
> @@ -400,7 +443,8 @@ allocblk(int frags)
>  {
>   daddr_t i, baseblk;
>   int j, k, cg;
> - struct cg *cgp = &cgrp;
> + struct bufarea *cgbp;
> + struct cg *cgp;
>  
>   if (frags <= 0 || frags > sblock.fs_frag)
>   return (0);
> @@ -416,7 +460,8 @@ allocblk(int frags)
>   continue;
>   }
>   cg = dtog(&sblock, i + j);
> - getblk(&cgblk, cgtod(&sblock, cg), sblock.fs_cgsize);
> + cgbp = cglookup(cg);
> + cgp = cgbp->b_un.b_cg;
>   if (!cg_chkmagic(cgp))
>   pfatal("CG %d: BAD MAGIC NUMBER\n", cg);
>   baseblk = dtogd(&sblock, i + j);
> @@ -614,4 +659,68 @@ catchinfo(int signo)
>   writev(info_fd, iov, 4);
>   }
>   errno = save_errno;
> +}
> +/*
> + * Attempt to flush a cylinder group cache entry.
> + * Return whether the flush was successful.
> + */
> +static int
> +flushentry(void)
> +{
> + struct bufarea *cgbp;
> +
> + if (flushtries == sblock.fs_ncg || cgbufs == NULL)
> + return (0);
> + cgbp = &cgbufs[flushtries++];
> + if (cgbp->b_un.b_cg == NULL)
> + return (0);
> + flush(fswritefd, cgbp);
> + free(cgbp->b_un.b_buf);
> + cgbp->b_un.b_buf = NULL;
> + return (1);
> +}
> +
> +/*
> + * Wrapper for malloc() that flushes the cylinder group cache to try
> + * to get space.
> + */
> +void *
> +Malloc(size_t size)
> +{
> + void *retval;
> +
> + while ((retval = malloc(size)) == NULL)
> + if (flushentry() == 0)
> + break;
> + return (retval);
> +}
> +
> +/*
> + * Wrapper for calloc() that flushes the cylinder group cache to try
> + * to get space.
> + */
> +void*
> +Calloc(size_t cnt, size_t size)
> +{
> + void *retval;
> +
> + while ((retval = calloc(cnt, size)) == NULL)
> + if (flushentry() == 0)
> + break;
> + return (retval);
> +}
> +
> +/*
> + * Wrapper for reallocarray() that flushes the cylinder group cache to try
> + * to get space.
> + */
> +void*
> +Reallocarray(void *p, size_t cnt, size_t size)
> +{
> + void *retval;
> +
> + while ((retval = reallocarray(p, cnt, size)) == NULL)
> + if (flushentry() == 0)
> + break;
> + return (retval);
>  }
>
>

Reply | Threaded
Open this post in threaded view
|

Re: fsck_ffs: faster with lots of cylinder groups

Otto Moerbeek
On Mon, Jun 29, 2020 at 02:30:41PM +0200, Otto Moerbeek wrote:

> On Sun, Jun 21, 2020 at 03:35:21PM +0200, Otto Moerbeek wrote:
>
> > Hi,
> >
> > both phase 1 and phase 5 need cylinder group metadata.  This diff
> > keeps the cg data read in phase 1 in memory to be used by phase 5 if
> > possible. From FreeBSD.
> >
> > -Otto
> >
> > On an empty 30T fileystem:
> >
> > $ time obj/fsck_ffs -f /dev/sd3a
> >     2m44.10s real     0m13.21s user     0m07.38s system
> >
> > $ time doas obj/fsck_ffs -f /dev/sd3a
> >     1m32.81s real     0m12.86s user     0m05.25s system
> >
> > The difference will be less if a fileystem is filled up, but still nice.
>
> Any takers?

No feedback. I'm getting discouraged in doing more filesystem work...

What to do?

1) Abondon the diff
2) Commit without ok

I did quite extensive testing, but both options are unsatisfactory.

        -Otto

>
> >
> > Index: fsck.h
> > ===================================================================
> > RCS file: /cvs/src/sbin/fsck_ffs/fsck.h,v
> > retrieving revision 1.32
> > diff -u -p -r1.32 fsck.h
> > --- fsck.h 5 Jan 2018 09:33:47 -0000 1.32
> > +++ fsck.h 21 Jun 2020 12:48:50 -0000
> > @@ -136,7 +136,6 @@ struct bufarea {
> >  struct bufarea bufhead; /* head of list of other blks in filesys */
> >  struct bufarea sblk; /* file system superblock */
> >  struct bufarea asblk; /* alternate file system superblock */
> > -struct bufarea cgblk; /* cylinder group blocks */
> >  struct bufarea *pdirbp; /* current directory contents */
> >  struct bufarea *pbp; /* current inode block */
> >  struct bufarea *getdatablk(daddr_t, long);
> > @@ -148,9 +147,7 @@ struct bufarea *getdatablk(daddr_t, long
> >   (bp)->b_flags = 0;
> >  
> >  #define sbdirty() sblk.b_dirty = 1
> > -#define cgdirty() cgblk.b_dirty = 1
> >  #define sblock (*sblk.b_un.b_fs)
> > -#define cgrp (*cgblk.b_un.b_cg)
> >  
> >  enum fixstate {DONTKNOW, NOFIX, FIX, IGNORE};
> >  
> > @@ -275,9 +272,13 @@ struct ufs2_dinode ufs2_zino;
> >  #define FOUND 0x10
> >  
> >  union dinode *ginode(ino_t);
> > +struct bufarea *cglookup(u_int cg);
> >  struct inoinfo *getinoinfo(ino_t);
> >  void getblk(struct bufarea *, daddr_t, long);
> >  ino_t allocino(ino_t, int);
> > +void *Malloc(size_t);
> > +void *Calloc(size_t, size_t);
> > +void *Reallocarray(void *, size_t, size_t);
> >  
> >  int (*info_fn)(char *, size_t);
> >  char *info_filesys;
> > Index: inode.c
> > ===================================================================
> > RCS file: /cvs/src/sbin/fsck_ffs/inode.c,v
> > retrieving revision 1.49
> > diff -u -p -r1.49 inode.c
> > --- inode.c 16 Sep 2018 02:43:11 -0000 1.49
> > +++ inode.c 21 Jun 2020 12:48:50 -0000
> > @@ -370,7 +370,7 @@ setinodebuf(ino_t inum)
> >   partialsize = inobufsize;
> >   }
> >   if (inodebuf == NULL &&
> > -    (inodebuf = malloc((unsigned)inobufsize)) == NULL)
> > +    (inodebuf = Malloc((unsigned)inobufsize)) == NULL)
> >   errexit("Cannot allocate space for inode buffer\n");
> >  }
> >  
> > @@ -401,7 +401,7 @@ cacheino(union dinode *dp, ino_t inumber
> >   blks = howmany(DIP(dp, di_size), sblock.fs_bsize);
> >   if (blks > NDADDR)
> >   blks = NDADDR + NIADDR;
> > - inp = malloc(sizeof(*inp) + (blks ? blks - 1 : 0) * sizeof(daddr_t));
> > + inp = Malloc(sizeof(*inp) + (blks ? blks - 1 : 0) * sizeof(daddr_t));
> >   if (inp == NULL)
> >   errexit("cannot allocate memory for inode cache\n");
> >   inpp = &inphead[inumber % numdirs];
> > @@ -423,10 +423,10 @@ cacheino(union dinode *dp, ino_t inumber
> >   inp->i_blks[NDADDR + i] = DIP(dp, di_ib[i]);
> >   if (inplast == listmax) {
> >   newlistmax = listmax + 100;
> > - newinpsort = reallocarray(inpsort,
> > + newinpsort = Reallocarray(inpsort,
> >      (unsigned)newlistmax, sizeof(struct inoinfo *));
> >   if (newinpsort == NULL)
> > - errexit("cannot increase directory list");
> > + errexit("cannot increase directory list\n");
> >   inpsort = newinpsort;
> >   listmax = newlistmax;
> >   }
> > @@ -582,7 +582,8 @@ allocino(ino_t request, int type)
> >  {
> >   ino_t ino;
> >   union dinode *dp;
> > - struct cg *cgp = &cgrp;
> > + struct bufarea *cgbp;
> > + struct cg *cgp;
> >   int cg;
> >   time_t t;
> >   struct inostat *info;
> > @@ -602,7 +603,7 @@ allocino(ino_t request, int type)
> >   unsigned long newalloced, i;
> >   newalloced = MINIMUM(sblock.fs_ipg,
> >   MAXIMUM(2 * inostathead[cg].il_numalloced, 10));
> > - info = calloc(newalloced, sizeof(struct inostat));
> > + info = Calloc(newalloced, sizeof(struct inostat));
> >   if (info == NULL) {
> >   pwarn("cannot alloc %zu bytes to extend inoinfo\n",
> >   sizeof(struct inostat) * newalloced);
> > @@ -619,7 +620,8 @@ allocino(ino_t request, int type)
> >   inostathead[cg].il_numalloced = newalloced;
> >   info = inoinfo(ino);
> >   }
> > - getblk(&cgblk, cgtod(&sblock, cg), sblock.fs_cgsize);
> > + cgbp = cglookup(cg);
> > + cgp = cgbp->b_un.b_cg;
> >   if (!cg_chkmagic(cgp))
> >   pfatal("CG %d: BAD MAGIC NUMBER\n", cg);
> >   setbit(cg_inosused(cgp), ino % sblock.fs_ipg);
> > @@ -637,7 +639,7 @@ allocino(ino_t request, int type)
> >   default:
> >   return (0);
> >   }
> > - cgdirty();
> > + dirty(cgbp);
> >   dp = ginode(ino);
> >   DIP_SET(dp, di_db[0],  allocblk(1));
> >   if (DIP(dp, di_db[0]) == 0) {
> > Index: pass1.c
> > ===================================================================
> > RCS file: /cvs/src/sbin/fsck_ffs/pass1.c,v
> > retrieving revision 1.46
> > diff -u -p -r1.46 pass1.c
> > --- pass1.c 20 Jun 2020 07:49:04 -0000 1.46
> > +++ pass1.c 21 Jun 2020 12:48:50 -0000
> > @@ -66,6 +66,8 @@ pass1(void)
> >   ino_t inumber, inosused, ninosused;
> >   size_t inospace;
> >   struct inostat *info;
> > + struct bufarea *cgbp;
> > + struct cg *cgp;
> >   u_int c;
> >   struct inodesc idesc;
> >   daddr_t i, cgd;
> > @@ -99,9 +101,10 @@ pass1(void)
> >   for (c = 0; c < sblock.fs_ncg; c++) {
> >   inumber = c * sblock.fs_ipg;
> >   setinodebuf(inumber);
> > - getblk(&cgblk, cgtod(&sblock, c), sblock.fs_cgsize);
> > + cgbp = cglookup(c);
> > + cgp = cgbp->b_un.b_cg;
> >   if (sblock.fs_magic == FS_UFS2_MAGIC) {
> > - inosused = cgrp.cg_initediblk;
> > + inosused = cgp->cg_initediblk;
> >   if (inosused > sblock.fs_ipg)
> >   inosused = sblock.fs_ipg;
> >   } else
> > @@ -115,7 +118,7 @@ pass1(void)
> >   * read only those inodes in from disk.
> >   */
> >   if (preen && usedsoftdep) {
> > - cp = &cg_inosused(&cgrp)[(inosused - 1) / CHAR_BIT];
> > + cp = &cg_inosused(cgp)[(inosused - 1) / CHAR_BIT];
> >   for ( ; inosused != 0; cp--) {
> >   if (*cp == 0) {
> >   if (inosused > CHAR_BIT)
> > @@ -140,10 +143,10 @@ pass1(void)
> >   inostathead[c].il_stat = 0;
> >   continue;
> >   }
> > - info = calloc((unsigned)inosused, sizeof(struct inostat));
> > + info = Calloc((unsigned)inosused, sizeof(struct inostat));
> >   inospace = (unsigned)inosused * sizeof(struct inostat);
> >   if (info == NULL)
> > - errexit("cannot alloc %zu bytes for inoinfo", inospace);
> > + errexit("cannot alloc %zu bytes for inoinfo\n", inospace);
> >   inostathead[c].il_stat = info;
> >   /*
> >   * Scan the allocated inodes.
> > @@ -179,7 +182,7 @@ pass1(void)
> >   struct inostat *ninfo;
> >   size_t ninospace;
> >  
> > - ninfo = reallocarray(info, ninosused, sizeof(*ninfo));
> > + ninfo = Reallocarray(info, ninosused, sizeof(*ninfo));
> >   if (ninfo == NULL) {
> >   pfatal("too many inodes %llu, or out of memory\n",
> >      (unsigned long long)ninosused);
> > @@ -300,7 +303,7 @@ checkinode(ino_t inumber, struct inodesc
> >   n_files++;
> >   ILNCOUNT(inumber) = DIP(dp, di_nlink);
> >   if (DIP(dp, di_nlink) <= 0) {
> > - zlnp = malloc(sizeof *zlnp);
> > + zlnp = Malloc(sizeof *zlnp);
> >   if (zlnp == NULL) {
> >   pfatal("LINK COUNT TABLE OVERFLOW");
> >   if (reply("CONTINUE") == 0) {
> > @@ -392,7 +395,7 @@ pass1check(struct inodesc *idesc)
> >   }
> >   return (STOP);
> >   }
> > - new = malloc(sizeof(struct dups));
> > + new = Malloc(sizeof(struct dups));
> >   if (new == NULL) {
> >   pfatal("DUP TABLE OVERFLOW.");
> >   if (reply("CONTINUE") == 0) {
> > Index: pass5.c
> > ===================================================================
> > RCS file: /cvs/src/sbin/fsck_ffs/pass5.c,v
> > retrieving revision 1.49
> > diff -u -p -r1.49 pass5.c
> > --- pass5.c 20 Jun 2020 07:49:04 -0000 1.49
> > +++ pass5.c 21 Jun 2020 12:48:50 -0000
> > @@ -65,7 +65,6 @@ pass5(void)
> >   u_int c;
> >   int inomapsize, blkmapsize;
> >   struct fs *fs = &sblock;
> > - struct cg *cg = &cgrp;
> >   daddr_t dbase, dmax;
> >   daddr_t d;
> >   long i, k, rewritecg = 0;
> > @@ -76,6 +75,8 @@ pass5(void)
> >   char buf[MAXBSIZE];
> >   struct cg *newcg = (struct cg *)buf;
> >   struct ocg *ocg = (struct ocg *)buf;
> > + struct cg *cg;
> > + struct bufarea *cgbp;
> >  
> >   memset(newcg, 0, (size_t)fs->fs_cgsize);
> >   if (cvtlevel >= 3) {
> > @@ -179,7 +180,8 @@ pass5(void)
> >   info_fn = pass5_info;
> >   for (c = 0; c < fs->fs_ncg; c++) {
> >   info_cg = c;
> > - getblk(&cgblk, cgtod(fs, c), fs->fs_cgsize);
> > + cgbp = cglookup(c);
> > + cg = cgbp->b_un.b_cg;
> >   if (!cg_chkmagic(cg))
> >   pfatal("CG %u: BAD MAGIC NUMBER\n", c);
> >   dbase = cgbase(fs, c);
> > @@ -323,13 +325,13 @@ pass5(void)
> >   }
> >   if (rewritecg) {
> >   memcpy(cg, newcg, (size_t)fs->fs_cgsize);
> > - cgdirty();
> > + dirty(cgbp);
> >   continue;
> >   }
> >   if (memcmp(newcg, cg, basesize) &&
> >      dofix(&idesc[2], "SUMMARY INFORMATION BAD")) {
> >   memcpy(cg, newcg, (size_t)basesize);
> > - cgdirty();
> > + dirty(cgbp);
> >   }
> >   if (usedsoftdep) {
> >   for (i = 0; i < inomapsize; i++) {
> > @@ -364,7 +366,7 @@ pass5(void)
> >      dofix(&idesc[1], "BLK(S) MISSING IN BIT MAPS")) {
> >   memmove(cg_inosused(cg), cg_inosused(newcg),
> >   (size_t)mapsize);
> > - cgdirty();
> > + dirty(cgbp);
> >   }
> >   }
> >   info_fn = NULL;
> > Index: utilities.c
> > ===================================================================
> > RCS file: /cvs/src/sbin/fsck_ffs/utilities.c,v
> > retrieving revision 1.53
> > diff -u -p -r1.53 utilities.c
> > --- utilities.c 20 Jun 2020 07:49:04 -0000 1.53
> > +++ utilities.c 21 Jun 2020 12:48:50 -0000
> > @@ -51,7 +51,8 @@
> >  #include "fsck.h"
> >  #include "extern.h"
> >  
> > -long diskreads, totalreads; /* Disk cache statistics */
> > +long diskreads, totalreads; /* Disk cache statistics */
> > +static struct bufarea cgblk; /* backup buffer for cylinder group blocks */
> >  
> >  static void rwerror(char *, daddr_t);
> >  
> > @@ -176,6 +177,39 @@ bufinit(void)
> >  }
> >  
> >  /*
> > + * Manage cylinder group buffers.
> > + */
> > +static struct bufarea *cgbufs; /* header for cylinder group cache */
> > +static int flushtries; /* number of tries to reclaim memory */
> > +struct bufarea *
> > +cglookup(u_int cg)
> > +{
> > + struct bufarea *cgbp;
> > + struct cg *cgp;
> > +
> > + if (cgbufs == NULL) {
> > + cgbufs = calloc(sblock.fs_ncg, sizeof(struct bufarea));
> > + if (cgbufs == NULL)
> > + errexit("cannot allocate cylinder group buffers");
> > + }
> > + cgbp = &cgbufs[cg];
> > + if (cgbp->b_un.b_cg != NULL)
> > + return (cgbp);
> > + cgp = NULL;
> > + if (flushtries == 0)
> > + cgp = malloc((unsigned int)sblock.fs_cgsize);
> > + if (cgp == NULL) {
> > + getblk(&cgblk, cgtod(&sblock, cg), sblock.fs_cgsize);
> > + return (&cgblk);
> > + }
> > + cgbp->b_un.b_cg = cgp;
> > + initbarea(cgbp);
> > + getblk(cgbp, cgtod(&sblock, cg), sblock.fs_cgsize);
> > + return (cgbp);
> > +}
> > +
> > +
> > +/*
> >   * Manage a cache of directory blocks.
> >   */
> >  struct bufarea *
> > @@ -305,6 +339,15 @@ ckfini(int markclean)
> >   }
> >   if (bufhead.b_size != cnt)
> >   errexit("Panic: lost %d buffers\n", bufhead.b_size - cnt);
> > + if (cgbufs != NULL) {
> > + for (cnt = 0; cnt < sblock.fs_ncg; cnt++) {
> > + if (cgbufs[cnt].b_un.b_cg == NULL)
> > + continue;
> > + flush(fswritefd, &cgbufs[cnt]);
> > + free(cgbufs[cnt].b_un.b_cg);
> > + }
> > + free(cgbufs);
> > + }
> >   pbp = pdirbp = NULL;
> >   if (markclean && (sblock.fs_clean & FS_ISCLEAN) == 0) {
> >   /*
> > @@ -400,7 +443,8 @@ allocblk(int frags)
> >  {
> >   daddr_t i, baseblk;
> >   int j, k, cg;
> > - struct cg *cgp = &cgrp;
> > + struct bufarea *cgbp;
> > + struct cg *cgp;
> >  
> >   if (frags <= 0 || frags > sblock.fs_frag)
> >   return (0);
> > @@ -416,7 +460,8 @@ allocblk(int frags)
> >   continue;
> >   }
> >   cg = dtog(&sblock, i + j);
> > - getblk(&cgblk, cgtod(&sblock, cg), sblock.fs_cgsize);
> > + cgbp = cglookup(cg);
> > + cgp = cgbp->b_un.b_cg;
> >   if (!cg_chkmagic(cgp))
> >   pfatal("CG %d: BAD MAGIC NUMBER\n", cg);
> >   baseblk = dtogd(&sblock, i + j);
> > @@ -614,4 +659,68 @@ catchinfo(int signo)
> >   writev(info_fd, iov, 4);
> >   }
> >   errno = save_errno;
> > +}
> > +/*
> > + * Attempt to flush a cylinder group cache entry.
> > + * Return whether the flush was successful.
> > + */
> > +static int
> > +flushentry(void)
> > +{
> > + struct bufarea *cgbp;
> > +
> > + if (flushtries == sblock.fs_ncg || cgbufs == NULL)
> > + return (0);
> > + cgbp = &cgbufs[flushtries++];
> > + if (cgbp->b_un.b_cg == NULL)
> > + return (0);
> > + flush(fswritefd, cgbp);
> > + free(cgbp->b_un.b_buf);
> > + cgbp->b_un.b_buf = NULL;
> > + return (1);
> > +}
> > +
> > +/*
> > + * Wrapper for malloc() that flushes the cylinder group cache to try
> > + * to get space.
> > + */
> > +void *
> > +Malloc(size_t size)
> > +{
> > + void *retval;
> > +
> > + while ((retval = malloc(size)) == NULL)
> > + if (flushentry() == 0)
> > + break;
> > + return (retval);
> > +}
> > +
> > +/*
> > + * Wrapper for calloc() that flushes the cylinder group cache to try
> > + * to get space.
> > + */
> > +void*
> > +Calloc(size_t cnt, size_t size)
> > +{
> > + void *retval;
> > +
> > + while ((retval = calloc(cnt, size)) == NULL)
> > + if (flushentry() == 0)
> > + break;
> > + return (retval);
> > +}
> > +
> > +/*
> > + * Wrapper for reallocarray() that flushes the cylinder group cache to try
> > + * to get space.
> > + */
> > +void*
> > +Reallocarray(void *p, size_t cnt, size_t size)
> > +{
> > + void *retval;
> > +
> > + while ((retval = reallocarray(p, cnt, size)) == NULL)
> > + if (flushentry() == 0)
> > + break;
> > + return (retval);
> >  }
> >
> >
>

Reply | Threaded
Open this post in threaded view
|

Re: fsck_ffs: faster with lots of cylinder groups

Solene Rapenne
On Sun, 12 Jul 2020 09:13:47 +0200
Otto Moerbeek <[hidden email]>:

> On Mon, Jun 29, 2020 at 02:30:41PM +0200, Otto Moerbeek wrote:
>
> > On Sun, Jun 21, 2020 at 03:35:21PM +0200, Otto Moerbeek wrote:
> >  
> > > Hi,
> > >
> > > both phase 1 and phase 5 need cylinder group metadata.  This diff
> > > keeps the cg data read in phase 1 in memory to be used by phase 5 if
> > > possible. From FreeBSD.
> > >
> > > -Otto
> > >
> > > On an empty 30T fileystem:
> > >
> > > $ time obj/fsck_ffs -f /dev/sd3a
> > >     2m44.10s real     0m13.21s user     0m07.38s system
> > >
> > > $ time doas obj/fsck_ffs -f /dev/sd3a
> > >     1m32.81s real     0m12.86s user     0m05.25s system
> > >
> > > The difference will be less if a fileystem is filled up, but still nice.  
> >
> > Any takers?  
>
> No feedback. I'm getting discouraged in doing more filesystem work...
>
> What to do?
>
> 1) Abondon the diff
> 2) Commit without ok
>
> I did quite extensive testing, but both options are unsatisfactory.
>
> -Otto

I'm not sure how to test your diff.
Would running fsck on a sane filesystem enough?

Are you using Vms that you halt to force a
fsck on them? Would this be a good test too?

Reply | Threaded
Open this post in threaded view
|

Re: fsck_ffs: faster with lots of cylinder groups

Otto Moerbeek
On Sun, Jul 12, 2020 at 11:07:05AM +0200, Solene Rapenne wrote:

> On Sun, 12 Jul 2020 09:13:47 +0200
> Otto Moerbeek <[hidden email]>:
>
> > On Mon, Jun 29, 2020 at 02:30:41PM +0200, Otto Moerbeek wrote:
> >
> > > On Sun, Jun 21, 2020 at 03:35:21PM +0200, Otto Moerbeek wrote:
> > >  
> > > > Hi,
> > > >
> > > > both phase 1 and phase 5 need cylinder group metadata.  This diff
> > > > keeps the cg data read in phase 1 in memory to be used by phase 5 if
> > > > possible. From FreeBSD.
> > > >
> > > > -Otto
> > > >
> > > > On an empty 30T fileystem:
> > > >
> > > > $ time obj/fsck_ffs -f /dev/sd3a
> > > >     2m44.10s real     0m13.21s user     0m07.38s system
> > > >
> > > > $ time doas obj/fsck_ffs -f /dev/sd3a
> > > >     1m32.81s real     0m12.86s user     0m05.25s system
> > > >
> > > > The difference will be less if a fileystem is filled up, but still nice.  
> > >
> > > Any takers?  
> >
> > No feedback. I'm getting discouraged in doing more filesystem work...
> >
> > What to do?
> >
> > 1) Abondon the diff
> > 2) Commit without ok
> >
> > I did quite extensive testing, but both options are unsatisfactory.
> >
> > -Otto
>
> I'm not sure how to test your diff.
> Would running fsck on a sane filesystem enough?
>
> Are you using Vms that you halt to force a
> fsck on them? Would this be a good test too?

I have used both large and small fieysystems, clean and with
inconsistencies, both ffs1 and ffs2. Sometimes I create
inconsistencies by power cycling a machine, buut I have created faulty
filesystems by carefully overwriting meta data with dd in the past as
well.

In this case running with a restricted ulimit -d to force the fallback
code to kick in is also an good idea.

        -Otto