pf: percpu anchor stacks

Previous Topic Next Topic
 
classic Classic list List threaded Threaded
12 messages Options
Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

pf: percpu anchor stacks

Patrick Wildt-3
Hi,

to step into and out of pf(4) anchors, pf(4) uses a temporary stack
that is a global variable.  Now once we run pf_test_rule() in parallel
and an anchor is evaluated in parallel, the global stack would be used
concurrently, which can lead to panics.  To solve this issue this diff
allocates a per-cpu stack using the cpumem API.

Opinions?

Patrick

diff --git a/sys/net/pf.c b/sys/net/pf.c
index 3ddcd7726d2..78316ae0009 100644
--- a/sys/net/pf.c
+++ b/sys/net/pf.c
@@ -119,12 +119,7 @@ u_char pf_tcp_secret[16];
 int pf_tcp_secret_init;
 int pf_tcp_iss_off;
 
-struct pf_anchor_stackframe {
- struct pf_ruleset *rs;
- struct pf_rule *r;
- struct pf_anchor_node *parent;
- struct pf_anchor *child;
-} pf_anchor_stack[64];
+struct cpumem *pf_anchor_stacks;
 
 struct pool pf_src_tree_pl, pf_rule_pl, pf_queue_pl;
 struct pool pf_state_pl, pf_state_key_pl, pf_state_item_pl;
@@ -3003,22 +2998,26 @@ void
 pf_step_into_anchor(int *depth, struct pf_ruleset **rs,
     struct pf_rule **r, struct pf_rule **a)
 {
+ struct pf_anchor_stack *s;
  struct pf_anchor_stackframe *f;
 
- if (*depth >= sizeof(pf_anchor_stack) /
-    sizeof(pf_anchor_stack[0])) {
+ s = cpumem_enter(pf_anchor_stacks);
+
+ if (*depth >= sizeof(*s) / sizeof(s->frame[0])) {
  log(LOG_ERR, "pf: anchor stack overflow\n");
  *r = TAILQ_NEXT(*r, entries);
+ cpumem_leave(pf_anchor_stacks, s);
  return;
  } else if (a != NULL)
  *a = *r;
- f = pf_anchor_stack + (*depth)++;
+ f = s->frame + (*depth)++;
  f->rs = *rs;
  f->r = *r;
  if ((*r)->anchor_wildcard) {
  f->parent = &(*r)->anchor->children;
  if ((f->child = RB_MIN(pf_anchor_node, f->parent)) == NULL) {
  *r = NULL;
+ cpumem_leave(pf_anchor_stacks, s);
  return;
  }
  *rs = &f->child->ruleset;
@@ -3028,19 +3027,23 @@ pf_step_into_anchor(int *depth, struct pf_ruleset **rs,
  *rs = &(*r)->anchor->ruleset;
  }
  *r = TAILQ_FIRST((*rs)->rules.active.ptr);
+ cpumem_leave(pf_anchor_stacks, s);
 }
 
 int
 pf_step_out_of_anchor(int *depth, struct pf_ruleset **rs,
     struct pf_rule **r, struct pf_rule **a, int *match)
 {
+ struct pf_anchor_stack *s;
  struct pf_anchor_stackframe *f;
  int quick = 0;
 
+ s = cpumem_enter(pf_anchor_stacks);
+
  do {
  if (*depth <= 0)
  break;
- f = pf_anchor_stack + *depth - 1;
+ f = s->frame + *depth - 1;
  if (f->parent != NULL && f->child != NULL) {
  f->child = RB_NEXT(pf_anchor_node, f->parent, f->child);
  if (f->child != NULL) {
@@ -3066,6 +3069,7 @@ pf_step_out_of_anchor(int *depth, struct pf_ruleset **rs,
  *r = TAILQ_NEXT(f->r, entries);
  } while (*r == NULL);
 
+ cpumem_leave(pf_anchor_stacks, s);
  return (quick);
 }
 
diff --git a/sys/net/pf_ioctl.c b/sys/net/pf_ioctl.c
index 56a43a55ab8..869ca3eaa1d 100644
--- a/sys/net/pf_ioctl.c
+++ b/sys/net/pf_ioctl.c
@@ -161,6 +161,10 @@ pfattach(int num)
     IPL_SOFTNET, 0, "pfruleitem", NULL);
  pool_init(&pf_queue_pl, sizeof(struct pf_queuespec), 0,
     IPL_SOFTNET, 0, "pfqueue", NULL);
+
+ pf_anchor_stacks = cpumem_malloc(sizeof(struct pf_anchor_stack),
+    M_TEMP);
+
  hfsc_initialize();
  pfr_initialize();
  pfi_initialize();
diff --git a/sys/net/pfvar.h b/sys/net/pfvar.h
index 00e9b790a91..62a183727b3 100644
--- a/sys/net/pfvar.h
+++ b/sys/net/pfvar.h
@@ -1872,6 +1872,19 @@ void pf_send_tcp(const struct pf_rule *, sa_family_t,
     u_int8_t, u_int16_t, u_int16_t, u_int8_t, int,
     u_int16_t, u_int);
 
+struct pf_anchor_stackframe {
+ struct pf_ruleset *rs;
+ struct pf_rule *r;
+ struct pf_anchor_node *parent;
+ struct pf_anchor *child;
+};
+
+struct pf_anchor_stack {
+ struct pf_anchor_stackframe frame[64];
+};
+
+struct cpumem;
+extern struct cpumem *pf_anchor_stacks;
 #endif /* _KERNEL */
 
 #endif /* _NET_PFVAR_H_ */

Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

Re: pf: percpu anchor stacks

Alexandr Nedvedicky
Hello,

I've sent different patch [1], which was touching same functions some time ago.
The old patch [1] basically splits pf_test_rule() to two functions:
    pf_test_rule()

    pf_match_rule(), which walks anchor stack recursively. the recursion depth
    is limited to 64.

the memory foot print for true recursion is same as for array of stack frames.
The only difference comes from the place where the memory gets allocated.

regards
sasha

[1] https://marc.info/?l=openbsd-tech&m=143902905917671&w=4

Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

Re: pf: percpu anchor stacks

Alexandr Nedvedicky
In reply to this post by Patrick Wildt-3
Hello,

I'm attaching patch, which removes stack-as-a-global variable.
it's updated patch [1] to current tree.

sorry for being pushy advocating my old, rusty patch.

thanks and
regards
sasha

[1] http://openbsd-archive.7691.n7.nabble.com/Re-PF-SMP-making-anchor-stack-multithreaded-tt275915.html#none

--------8<---------------8<---------------8<------------------8<--------
diff -r d6e3a6338889 src/sys/net/pf.c
--- a/src/sys/net/pf.c Mon Mar 20 01:10:40 2017 +0100
+++ b/src/sys/net/pf.c Fri Mar 24 11:28:18 2017 +0100
@@ -119,12 +119,37 @@ u_char pf_tcp_secret[16];
 int pf_tcp_secret_init;
 int pf_tcp_iss_off;
 
-struct pf_anchor_stackframe {
- struct pf_ruleset *rs;
- struct pf_rule *r;
- struct pf_anchor_node *parent;
- struct pf_anchor *child;
-} pf_anchor_stack[64];
+struct pf_test_ctx {
+ int test_status;
+ struct pf_pdesc *pd;
+ struct pf_rule_actions act;
+ u_int8_t icmpcode;
+ u_int8_t icmptype;
+ int icmp_dir;
+ int state_icmp;
+ int tag;
+ u_short reason;
+ struct pf_rule_item *ri;
+ struct pf_src_node *sns[PF_SN_MAX];
+ struct pf_rule_slist rules;
+ struct pf_rule *nr;
+ struct pf_rule **rm;
+ struct pf_rule *a;
+ struct pf_rule **am;
+ struct pf_ruleset **rsm;
+ struct pf_ruleset *arsm;
+ struct pf_ruleset *aruleset;
+ struct tcphdr *th;
+ int depth;
+};
+
+#define PF_ANCHOR_STACK_MAX 64
+
+enum {
+ PF_TEST_FAIL = -1,
+ PF_TEST_OK,
+ PF_TEST_QUICK
+};
 
 struct pool pf_src_tree_pl, pf_rule_pl, pf_queue_pl;
 struct pool pf_state_pl, pf_state_key_pl, pf_state_item_pl;
@@ -211,11 +236,8 @@ struct pf_state *pf_find_state(struct p
     struct pf_state_key_cmp *, u_int, struct mbuf *);
 int pf_src_connlimit(struct pf_state **);
 int pf_match_rcvif(struct mbuf *, struct pf_rule *);
-void pf_step_into_anchor(int *, struct pf_ruleset **,
-    struct pf_rule **, struct pf_rule **);
-int pf_step_out_of_anchor(int *, struct pf_ruleset **,
-     struct pf_rule **, struct pf_rule **,
-     int *);
+int pf_step_into_anchor(struct pf_test_ctx *, struct pf_rule *);
+int pf_match_rule(struct pf_test_ctx *, struct pf_ruleset *);
 void pf_counters_inc(int, struct pf_pdesc *,
     struct pf_state *, struct pf_rule *,
     struct pf_rule *);
@@ -3019,74 +3041,37 @@ pf_tag_packet(struct mbuf *m, int tag, i
  m->m_pkthdr.ph_rtableid = (u_int)rtableid;
 }
 
-void
-pf_step_into_anchor(int *depth, struct pf_ruleset **rs,
-    struct pf_rule **r, struct pf_rule **a)
+int
+pf_step_into_anchor(struct pf_test_ctx *cx, struct pf_rule *r)
 {
- struct pf_anchor_stackframe *f;
-
- if (*depth >= sizeof(pf_anchor_stack) /
-    sizeof(pf_anchor_stack[0])) {
- log(LOG_ERR, "pf: anchor stack overflow\n");
- *r = TAILQ_NEXT(*r, entries);
- return;
- } else if (a != NULL)
- *a = *r;
- f = pf_anchor_stack + (*depth)++;
- f->rs = *rs;
- f->r = *r;
- if ((*r)->anchor_wildcard) {
- f->parent = &(*r)->anchor->children;
- if ((f->child = RB_MIN(pf_anchor_node, f->parent)) == NULL) {
- *r = NULL;
- return;
- }
- *rs = &f->child->ruleset;
- } else {
- f->parent = NULL;
- f->child = NULL;
- *rs = &(*r)->anchor->ruleset;
- }
- *r = TAILQ_FIRST((*rs)->rules.active.ptr);
-}
-
-int
-pf_step_out_of_anchor(int *depth, struct pf_ruleset **rs,
-    struct pf_rule **r, struct pf_rule **a, int *match)
-{
- struct pf_anchor_stackframe *f;
- int quick = 0;
-
- do {
- if (*depth <= 0)
- break;
- f = pf_anchor_stack + *depth - 1;
- if (f->parent != NULL && f->child != NULL) {
- f->child = RB_NEXT(pf_anchor_node, f->parent, f->child);
- if (f->child != NULL) {
- *rs = &f->child->ruleset;
- *r = TAILQ_FIRST((*rs)->rules.active.ptr);
- if (*r == NULL)
- continue;
- else
- break;
+ int rv;
+
+ if (cx->depth >= PF_ANCHOR_STACK_MAX) {
+ log(LOG_ERR, "pf_step_into_anchor: stack overflow\n");
+ return (PF_TEST_FAIL);
+ }
+
+ cx->depth++;
+
+ if (r->anchor_wildcard) {
+ struct pf_anchor *child;
+ rv = PF_TEST_OK;
+ RB_FOREACH(child, pf_anchor_node, &r->anchor->children) {
+ rv = pf_match_rule(cx, &child->ruleset);
+ if (rv != 0) {
+ /*
+ * break on quick rule or failure
+ */
+ break;
  }
  }
- (*depth)--;
- if (*depth == 0 && a != NULL)
- *a = NULL;
- else if (a != NULL)
- *a = f->r;
- *rs = f->rs;
- if (*match > *depth) {
- *match = *depth;
- if (f->r->quick)
- quick = 1;
- }
- *r = TAILQ_NEXT(f->r, entries);
- } while (*r == NULL);
-
- return (quick);
+ } else {
+ rv = pf_match_rule(cx, &r->anchor->ruleset);
+ }
+
+ cx->depth--;
+
+ return (rv);
 }
 
 void
@@ -3458,46 +3443,230 @@ pf_rule_to_actions(struct pf_rule *r, st
  } while (0)
 
 int
+pf_match_rule(struct pf_test_ctx *cx, struct pf_ruleset *ruleset)
+{
+ struct pf_rule *r;
+
+ r = TAILQ_FIRST(ruleset->rules.active.ptr);
+ while (r != NULL) {
+ r->evaluations++;
+ PF_TEST_ATTRIB((pfi_kif_match(r->kif, cx->pd->kif) == r->ifnot),
+ r->skip[PF_SKIP_IFP].ptr);
+ PF_TEST_ATTRIB((r->direction && r->direction != cx->pd->dir),
+ r->skip[PF_SKIP_DIR].ptr);
+ PF_TEST_ATTRIB((r->onrdomain >= 0  &&
+    (r->onrdomain == cx->pd->rdomain) == r->ifnot),
+ r->skip[PF_SKIP_RDOM].ptr);
+ PF_TEST_ATTRIB((r->af && r->af != cx->pd->af),
+ r->skip[PF_SKIP_AF].ptr);
+ PF_TEST_ATTRIB((r->proto && r->proto != cx->pd->proto),
+ r->skip[PF_SKIP_PROTO].ptr);
+ PF_TEST_ATTRIB((PF_MISMATCHAW(&r->src.addr, &cx->pd->nsaddr,
+    cx->pd->naf, r->src.neg, cx->pd->kif, cx->act.rtableid)),
+ r->skip[PF_SKIP_SRC_ADDR].ptr);
+ PF_TEST_ATTRIB((PF_MISMATCHAW(&r->dst.addr, &cx->pd->ndaddr,
+    cx->pd->af, r->dst.neg, NULL, cx->act.rtableid)),
+ r->skip[PF_SKIP_DST_ADDR].ptr);
+
+ switch (cx->pd->virtual_proto) {
+ case PF_VPROTO_FRAGMENT:
+ /* tcp/udp only. port_op always 0 in other cases */
+ PF_TEST_ATTRIB((r->src.port_op || r->dst.port_op),
+ TAILQ_NEXT(r, entries));
+ PF_TEST_ATTRIB((cx->pd->proto == IPPROTO_TCP &&
+    r->flagset),
+ TAILQ_NEXT(r, entries));
+ /* icmp only. type/code always 0 in other cases */
+ PF_TEST_ATTRIB((r->type || r->code),
+ TAILQ_NEXT(r, entries));
+ /* tcp/udp only. {uid|gid}.op always 0 in other cases */
+ PF_TEST_ATTRIB((r->gid.op || r->uid.op),
+ TAILQ_NEXT(r, entries));
+ break;
+
+ case IPPROTO_TCP:
+ PF_TEST_ATTRIB(((r->flagset & cx->th->th_flags) !=
+    r->flags),
+ TAILQ_NEXT(r, entries));
+ PF_TEST_ATTRIB((r->os_fingerprint != PF_OSFP_ANY &&
+    !pf_osfp_match(pf_osfp_fingerprint(cx->pd),
+    r->os_fingerprint)),
+ TAILQ_NEXT(r, entries));
+ /* FALLTHROUGH */
+
+ case IPPROTO_UDP:
+ /* tcp/udp only. port_op always 0 in other cases */
+ PF_TEST_ATTRIB((r->src.port_op &&
+    !pf_match_port(r->src.port_op, r->src.port[0],
+    r->src.port[1], cx->pd->nsport)),
+ r->skip[PF_SKIP_SRC_PORT].ptr);
+ PF_TEST_ATTRIB((r->dst.port_op &&
+    !pf_match_port(r->dst.port_op, r->dst.port[0],
+    r->dst.port[1], cx->pd->ndport)),
+ r->skip[PF_SKIP_DST_PORT].ptr);
+ /* tcp/udp only. uid.op always 0 in other cases */
+ PF_TEST_ATTRIB((r->uid.op && (cx->pd->lookup.done ||
+    (cx->pd->lookup.done =
+    pf_socket_lookup(cx->pd), 1)) &&
+    !pf_match_uid(r->uid.op, r->uid.uid[0],
+    r->uid.uid[1], cx->pd->lookup.uid)),
+ TAILQ_NEXT(r, entries));
+ /* tcp/udp only. gid.op always 0 in other cases */
+ PF_TEST_ATTRIB((r->gid.op && (cx->pd->lookup.done ||
+    (cx->pd->lookup.done =
+    pf_socket_lookup(cx->pd), 1)) &&
+    !pf_match_gid(r->gid.op, r->gid.gid[0],
+    r->gid.gid[1], cx->pd->lookup.gid)),
+ TAILQ_NEXT(r, entries));
+ break;
+
+ case IPPROTO_ICMP:
+ case IPPROTO_ICMPV6:
+ /* icmp only. type always 0 in other cases */
+ PF_TEST_ATTRIB((r->type && r->type != cx->icmptype + 1),
+ TAILQ_NEXT(r, entries));
+ /* icmp only. type always 0 in other cases */
+ PF_TEST_ATTRIB((r->code && r->code != cx->icmpcode + 1),
+ TAILQ_NEXT(r, entries));
+ /* icmp only. don't create states on replies */
+ PF_TEST_ATTRIB((r->keep_state && !cx->state_icmp &&
+    (r->rule_flag & PFRULE_STATESLOPPY) == 0 &&
+    cx->icmp_dir != PF_IN),
+ TAILQ_NEXT(r, entries));
+ break;
+
+ default:
+ break;
+ }
+
+ PF_TEST_ATTRIB((r->rule_flag & PFRULE_FRAGMENT &&
+    cx->pd->virtual_proto != PF_VPROTO_FRAGMENT),
+ TAILQ_NEXT(r, entries));
+ PF_TEST_ATTRIB((r->tos && !(r->tos == cx->pd->tos)),
+ TAILQ_NEXT(r, entries));
+ PF_TEST_ATTRIB((r->prob &&
+    r->prob <= arc4random_uniform(UINT_MAX - 1) + 1),
+ TAILQ_NEXT(r, entries));
+ PF_TEST_ATTRIB((r->match_tag &&
+    !pf_match_tag(cx->pd->m, r, &cx->tag)),
+ TAILQ_NEXT(r, entries));
+ PF_TEST_ATTRIB((r->rcv_kif && pf_match_rcvif(cx->pd->m, r) ==
+    r->rcvifnot),
+ TAILQ_NEXT(r, entries));
+ PF_TEST_ATTRIB((r->prio &&
+    (r->prio == PF_PRIO_ZERO ? 0 : r->prio) !=
+    cx->pd->m->m_pkthdr.pf.prio),
+ TAILQ_NEXT(r, entries));
+
+ /* FALLTHROUGH */
+ if (r->tag)
+ cx->tag = r->tag;
+ if (r->anchor == NULL) {
+ if (r->action == PF_MATCH) {
+ if ((cx->ri = pool_get(&pf_rule_item_pl,
+    PR_NOWAIT)) == NULL) {
+ REASON_SET(&cx->reason, PFRES_MEMORY);
+ cx->test_status = PF_TEST_FAIL;
+ break;
+ }
+ cx->ri->r = r;
+ /* order is irrelevant */
+ SLIST_INSERT_HEAD(&cx->rules, cx->ri, entry);
+ cx->ri = NULL;
+ pf_rule_to_actions(r, &cx->act);
+ if (r->rule_flag & PFRULE_AFTO)
+ cx->pd->naf = r->naf;
+ if (pf_get_transaddr(r, cx->pd, cx->sns,
+    &cx->nr) == -1) {
+ REASON_SET(&cx->reason, PFRES_TRANSLATE);
+ cx->test_status = PF_TEST_FAIL;
+ break;
+ }
+#if NPFLOG > 0
+ if (r->log) {
+ REASON_SET(&cx->reason, PFRES_MATCH);
+ PFLOG_PACKET(cx->pd, cx->reason, r,
+    cx->a, ruleset, NULL);
+ }
+#endif /* NPFLOG > 0 */
+ } else {
+ /*
+ * found matching r
+ */
+ *cx->rm = r;
+ /*
+ * anchor, with ruleset, where r belongs to
+ */
+ *cx->am = cx->a;
+ /*
+ * ruleset where r belongs to
+ */
+ *cx->rsm = ruleset;
+ /*
+ * ruleset, where anchor belongs to.
+ */
+ cx->arsm = cx->aruleset;
+ }
+
+#if NPFLOG > 0
+ if (cx->act.log & PF_LOG_MATCHES)
+ pf_log_matches(cx->pd, r, cx->a, ruleset,
+    &cx->rules);
+#endif /* NPFLOG > 0 */
+
+ if (r->quick) {
+ cx->test_status = PF_TEST_QUICK;
+ break;
+ }
+ } else {
+ cx->a = r; /* remember anchor */
+ cx->aruleset = ruleset; /* and its ruleset */
+ if (pf_step_into_anchor(cx, r) != PF_TEST_OK) {
+ break;
+ }
+ }
+ r = TAILQ_NEXT(r, entries);
+nextrule:;
+ }
+
+ return (cx->test_status);
+}
+
+int
 pf_test_rule(struct pf_pdesc *pd, struct pf_rule **rm, struct pf_state **sm,
     struct pf_rule **am, struct pf_ruleset **rsm, u_short *reason)
 {
- struct pf_rule *r;
- struct pf_rule *nr = NULL;
+ struct pf_rule *r = NULL;
  struct pf_rule *a = NULL;
- struct pf_ruleset *arsm = NULL;
- struct pf_ruleset *aruleset = NULL;
  struct pf_ruleset *ruleset = NULL;
- struct pf_rule_slist rules;
- struct pf_rule_item *ri;
- struct pf_src_node *sns[PF_SN_MAX];
  struct pf_state_key *skw = NULL, *sks = NULL;
- struct pf_rule_actions act;
  int rewrite = 0;
- int tag = -1;
- int asd = 0;
- int match = 0;
- int state_icmp = 0, icmp_dir = 0;
  u_int16_t virtual_type, virtual_id;
- u_int8_t icmptype = 0, icmpcode = 0;
  int action = PF_DROP;
-
- bzero(&act, sizeof(act));
- bzero(sns, sizeof(sns));
- act.rtableid = pd->rdomain;
- SLIST_INIT(&rules);
+ struct pf_test_ctx cx;
+ int rv;
+
+ bzero(&cx, sizeof(cx));
+ cx.pd = pd;
+ cx.rm = rm;
+ cx.am = am;
+ cx.rsm = rsm;
+ cx.th = &pd->hdr.tcp;
+ cx.act.rtableid = pd->rdomain;
+ SLIST_INIT(&cx.rules);
 
  if (pd->dir == PF_IN && if_congested()) {
- REASON_SET(reason, PFRES_CONGEST);
+ REASON_SET(&cx.reason, PFRES_CONGEST);
  return (PF_DROP);
  }
 
  switch (pd->virtual_proto) {
  case IPPROTO_ICMP:
- icmptype = pd->hdr.icmp.icmp_type;
- icmpcode = pd->hdr.icmp.icmp_code;
- state_icmp = pf_icmp_mapping(pd, icmptype,
-    &icmp_dir, &virtual_id, &virtual_type);
- if (icmp_dir == PF_IN) {
+ cx.icmptype = pd->hdr.icmp.icmp_type;
+ cx.icmpcode = pd->hdr.icmp.icmp_code;
+ cx.state_icmp = pf_icmp_mapping(pd, cx.icmptype,
+    &cx.icmp_dir, &virtual_id, &virtual_type);
+ if (cx.icmp_dir == PF_IN) {
  pd->osport = pd->nsport = virtual_id;
  pd->odport = pd->ndport = virtual_type;
  } else {
@@ -3507,11 +3676,11 @@ pf_test_rule(struct pf_pdesc *pd, struct
  break;
 #ifdef INET6
  case IPPROTO_ICMPV6:
- icmptype = pd->hdr.icmp6.icmp6_type;
- icmpcode = pd->hdr.icmp6.icmp6_code;
- state_icmp = pf_icmp_mapping(pd, icmptype,
-    &icmp_dir, &virtual_id, &virtual_type);
- if (icmp_dir == PF_IN) {
+ cx.icmptype = pd->hdr.icmp6.icmp6_type;
+ cx.icmpcode = pd->hdr.icmp6.icmp6_code;
+ cx.state_icmp = pf_icmp_mapping(pd, cx.icmptype,
+    &cx.icmp_dir, &virtual_id, &virtual_type);
+ if (cx.icmp_dir == PF_IN) {
  pd->osport = pd->nsport = virtual_id;
  pd->odport = pd->ndport = virtual_type;
  } else {
@@ -3523,191 +3692,36 @@ pf_test_rule(struct pf_pdesc *pd, struct
  }
 
  ruleset = &pf_main_ruleset;
- r = TAILQ_FIRST(pf_main_ruleset.rules.active.ptr);
- while (r != NULL) {
- if (r->rule_flag & PFRULE_EXPIRED) {
- r = TAILQ_NEXT(r, entries);
- goto nextrule;
- }
- r->evaluations++;
- PF_TEST_ATTRIB((pfi_kif_match(r->kif, pd->kif) == r->ifnot),
- r->skip[PF_SKIP_IFP].ptr);
- PF_TEST_ATTRIB((r->direction && r->direction != pd->dir),
- r->skip[PF_SKIP_DIR].ptr);
- PF_TEST_ATTRIB((r->onrdomain >= 0  &&
-    (r->onrdomain == pd->rdomain) == r->ifnot),
- r->skip[PF_SKIP_RDOM].ptr);
- PF_TEST_ATTRIB((r->af && r->af != pd->af),
- r->skip[PF_SKIP_AF].ptr);
- PF_TEST_ATTRIB((r->proto && r->proto != pd->proto),
- r->skip[PF_SKIP_PROTO].ptr);
- PF_TEST_ATTRIB((PF_MISMATCHAW(&r->src.addr, &pd->nsaddr,
-    pd->naf, r->src.neg, pd->kif, act.rtableid)),
- r->skip[PF_SKIP_SRC_ADDR].ptr);
- PF_TEST_ATTRIB((PF_MISMATCHAW(&r->dst.addr, &pd->ndaddr, pd->af,
-    r->dst.neg, NULL, act.rtableid)),
- r->skip[PF_SKIP_DST_ADDR].ptr);
-
- switch (pd->virtual_proto) {
- case PF_VPROTO_FRAGMENT:
- /* tcp/udp only. port_op always 0 in other cases */
- PF_TEST_ATTRIB((r->src.port_op || r->dst.port_op),
- TAILQ_NEXT(r, entries));
- PF_TEST_ATTRIB((pd->proto == IPPROTO_TCP && r->flagset),
- TAILQ_NEXT(r, entries));
- /* icmp only. type/code always 0 in other cases */
- PF_TEST_ATTRIB((r->type || r->code),
- TAILQ_NEXT(r, entries));
- /* tcp/udp only. {uid|gid}.op always 0 in other cases */
- PF_TEST_ATTRIB((r->gid.op || r->uid.op),
- TAILQ_NEXT(r, entries));
- break;
-
- case IPPROTO_TCP:
- PF_TEST_ATTRIB(((r->flagset & pd->hdr.tcp.th_flags) !=
-    r->flags),
- TAILQ_NEXT(r, entries));
- PF_TEST_ATTRIB((r->os_fingerprint != PF_OSFP_ANY &&
-    !pf_osfp_match(pf_osfp_fingerprint(pd),
-    r->os_fingerprint)),
- TAILQ_NEXT(r, entries));
- /* FALLTHROUGH */
-
- case IPPROTO_UDP:
- /* tcp/udp only. port_op always 0 in other cases */
- PF_TEST_ATTRIB((r->src.port_op &&
-    !pf_match_port(r->src.port_op, r->src.port[0],
-    r->src.port[1], pd->nsport)),
- r->skip[PF_SKIP_SRC_PORT].ptr);
- PF_TEST_ATTRIB((r->dst.port_op &&
-    !pf_match_port(r->dst.port_op, r->dst.port[0],
-    r->dst.port[1], pd->ndport)),
- r->skip[PF_SKIP_DST_PORT].ptr);
- /* tcp/udp only. uid.op always 0 in other cases */
- PF_TEST_ATTRIB((r->uid.op && (pd->lookup.done ||
-    (pd->lookup.done =
-    pf_socket_lookup(pd), 1)) &&
-    !pf_match_uid(r->uid.op, r->uid.uid[0],
-    r->uid.uid[1], pd->lookup.uid)),
- TAILQ_NEXT(r, entries));
- /* tcp/udp only. gid.op always 0 in other cases */
- PF_TEST_ATTRIB((r->gid.op && (pd->lookup.done ||
-    (pd->lookup.done =
-    pf_socket_lookup(pd), 1)) &&
-    !pf_match_gid(r->gid.op, r->gid.gid[0],
-    r->gid.gid[1], pd->lookup.gid)),
- TAILQ_NEXT(r, entries));
- break;
-
- case IPPROTO_ICMP:
- case IPPROTO_ICMPV6:
- /* icmp only. type always 0 in other cases */
- PF_TEST_ATTRIB((r->type && r->type != icmptype + 1),
- TAILQ_NEXT(r, entries));
- /* icmp only. type always 0 in other cases */
- PF_TEST_ATTRIB((r->code && r->code != icmpcode + 1),
- TAILQ_NEXT(r, entries));
- /* icmp only. don't create states on replies */
- PF_TEST_ATTRIB((r->keep_state && !state_icmp &&
-    (r->rule_flag & PFRULE_STATESLOPPY) == 0 &&
-    icmp_dir != PF_IN),
- TAILQ_NEXT(r, entries));
- break;
-
- default:
- break;
- }
-
- PF_TEST_ATTRIB((r->rule_flag & PFRULE_FRAGMENT &&
-    pd->virtual_proto != PF_VPROTO_FRAGMENT),
- TAILQ_NEXT(r, entries));
- PF_TEST_ATTRIB((r->tos && !(r->tos == pd->tos)),
- TAILQ_NEXT(r, entries));
- PF_TEST_ATTRIB((r->prob &&
-    r->prob <= arc4random_uniform(UINT_MAX - 1) + 1),
- TAILQ_NEXT(r, entries));
- PF_TEST_ATTRIB((r->match_tag && !pf_match_tag(pd->m, r, &tag)),
- TAILQ_NEXT(r, entries));
- PF_TEST_ATTRIB((r->rcv_kif && pf_match_rcvif(pd->m, r) ==
-    r->rcvifnot),
- TAILQ_NEXT(r, entries));
- PF_TEST_ATTRIB((r->prio && (r->prio == PF_PRIO_ZERO ?
-    0 : r->prio) != pd->m->m_pkthdr.pf.prio),
- TAILQ_NEXT(r, entries));
-
- /* FALLTHROUGH */
- if (r->tag)
- tag = r->tag;
- if (r->anchor == NULL) {
- if (r->action == PF_MATCH) {
- if ((ri = pool_get(&pf_rule_item_pl,
-    PR_NOWAIT)) == NULL) {
- REASON_SET(reason, PFRES_MEMORY);
- goto cleanup;
- }
- ri->r = r;
- /* order is irrelevant */
- SLIST_INSERT_HEAD(&rules, ri, entry);
- pf_rule_to_actions(r, &act);
- if (r->rule_flag & PFRULE_AFTO)
- pd->naf = r->naf;
- if (pf_get_transaddr(r, pd, sns, &nr) == -1) {
- REASON_SET(reason, PFRES_TRANSLATE);
- goto cleanup;
- }
-#if NPFLOG > 0
- if (r->log) {
- REASON_SET(reason, PFRES_MATCH);
- PFLOG_PACKET(pd, *reason, r, a, ruleset,
-    NULL);
- }
-#endif /* NPFLOG > 0 */
- } else {
- match = asd;
- *rm = r;
- *am = a;
- *rsm = ruleset;
- arsm = aruleset;
- }
-
-#if NPFLOG > 0
- if (act.log & PF_LOG_MATCHES)
- pf_log_matches(pd, r, a, ruleset, &rules);
-#endif /* NPFLOG > 0 */
-
- if (r->quick)
- break;
- r = TAILQ_NEXT(r, entries);
- } else {
- aruleset = ruleset;
- pf_step_into_anchor(&asd, &ruleset, &r, &a);
- }
-
- nextrule:
- if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
-    &r, &a, &match))
- break;
- }
- r = *rm; /* matching rule */
- a = *am; /* rule that defines an anchor containing 'r' */
- ruleset = *rsm; /* ruleset of the anchor defined by the rule 'a' */
- aruleset = arsm;/* ruleset of the 'a' rule itself */
+ rv = pf_match_rule(&cx, ruleset);
+ if (rv == PF_TEST_FAIL) {
+ /*
+ * Reason has been set in pf_match_rule() already.
+ */
+ goto cleanup;
+ }
+
+ r = *cx.rm; /* matching rule */
+ a = *cx.am; /* rule that defines an anchor containing 'r' */
+ ruleset = *cx.rsm;/* ruleset of the anchor defined by the rule 'a' */
+ cx.aruleset = cx.arsm;/* ruleset of the 'a' rule itself */
+
+
 
  /* apply actions for last matching pass/block rule */
- pf_rule_to_actions(r, &act);
+ pf_rule_to_actions(r, &cx.act);
  if (r->rule_flag & PFRULE_AFTO)
  pd->naf = r->naf;
- if (pf_get_transaddr(r, pd, sns, &nr) == -1) {
- REASON_SET(reason, PFRES_TRANSLATE);
+ if (pf_get_transaddr(r, pd, cx.sns, &cx.nr) == -1) {
+ REASON_SET(&cx.reason, PFRES_TRANSLATE);
  goto cleanup;
  }
- REASON_SET(reason, PFRES_MATCH);
+ REASON_SET(&cx.reason, PFRES_MATCH);
 
 #if NPFLOG > 0
  if (r->log)
- PFLOG_PACKET(pd, *reason, r, a, ruleset, NULL);
- if (act.log & PF_LOG_MATCHES)
- pf_log_matches(pd, r, a, ruleset, &rules);
+ PFLOG_PACKET(pd, cx.reason, r, cx.a, ruleset, NULL);
+ if (cx.act.log & PF_LOG_MATCHES)
+ pf_log_matches(pd, r, cx.a, ruleset, &cx.rules);
 #endif /* NPFLOG > 0 */
 
  if (pd->virtual_proto != PF_VPROTO_FRAGMENT &&
@@ -3718,31 +3732,30 @@ pf_test_rule(struct pf_pdesc *pd, struct
  if (pd->proto == IPPROTO_TCP &&
     ((r->rule_flag & PFRULE_RETURNRST) ||
     (r->rule_flag & PFRULE_RETURN)) &&
-    !(pd->hdr.tcp.th_flags & TH_RST)) {
- struct tcphdr *th = &pd->hdr.tcp;
- u_int32_t ack = ntohl(th->th_seq) + pd->p_len;
+    !(cx.th->th_flags & TH_RST)) {
+ u_int32_t ack = ntohl(cx.th->th_seq) + pd->p_len;
 
  if (pf_check_tcp_cksum(pd->m, pd->off,
     pd->tot_len - pd->off, pd->af))
- REASON_SET(reason, PFRES_PROTCKSUM);
+ REASON_SET(&cx.reason, PFRES_PROTCKSUM);
  else {
- if (th->th_flags & TH_SYN)
+ if (cx.th->th_flags & TH_SYN)
  ack++;
- if (th->th_flags & TH_FIN)
+ if (cx.th->th_flags & TH_FIN)
  ack++;
  pf_send_tcp(r, pd->af, pd->dst,
-    pd->src, th->th_dport, th->th_sport,
-    ntohl(th->th_ack), ack, TH_RST|TH_ACK, 0, 0,
-    r->return_ttl, 1, 0, pd->rdomain);
+    pd->src, cx.th->th_dport, cx.th->th_sport,
+    ntohl(cx.th->th_ack), ack, TH_RST|TH_ACK,
+    0, 0, r->return_ttl, 1, 0, pd->rdomain);
  }
  } else if ((pd->proto != IPPROTO_ICMP ||
-    ICMP_INFOTYPE(icmptype)) && pd->af == AF_INET &&
+    ICMP_INFOTYPE(cx.icmptype)) && pd->af == AF_INET &&
     r->return_icmp)
  pf_send_icmp(pd->m, r->return_icmp >> 8,
     r->return_icmp & 255, pd->af, r, pd->rdomain);
  else if ((pd->proto != IPPROTO_ICMPV6 ||
-    (icmptype >= ICMP6_ECHO_REQUEST &&
-    icmptype != ND_REDIRECT)) && pd->af == AF_INET6 &&
+    (cx.icmptype >= ICMP6_ECHO_REQUEST &&
+    cx.icmptype != ND_REDIRECT)) && pd->af == AF_INET6 &&
     r->return_icmp6)
  pf_send_icmp(pd->m, r->return_icmp6 >> 8,
     r->return_icmp6 & 255, pd->af, r, pd->rdomain);
@@ -3751,13 +3764,13 @@ pf_test_rule(struct pf_pdesc *pd, struct
  if (r->action == PF_DROP)
  goto cleanup;
 
- pf_tag_packet(pd->m, tag, act.rtableid);
- if (act.rtableid >= 0 &&
-    rtable_l2(act.rtableid) != pd->rdomain)
+ pf_tag_packet(pd->m, cx.tag, cx.act.rtableid);
+ if (cx.act.rtableid >= 0 &&
+    rtable_l2(cx.act.rtableid) != pd->rdomain)
  pd->destchg = 1;
 
  if (r->action == PF_PASS && pd->badopts && ! r->allow_opts) {
- REASON_SET(reason, PFRES_IPOPTIONS);
+ REASON_SET(&cx.reason, PFRES_IPOPTIONS);
 #if NPFLOG > 0
  pd->pflog |= PF_LOG_FORCE;
 #endif /* NPFLOG > 0 */
@@ -3769,23 +3782,23 @@ pf_test_rule(struct pf_pdesc *pd, struct
  action = PF_PASS;
 
  if (pd->virtual_proto != PF_VPROTO_FRAGMENT
-    && !state_icmp && r->keep_state) {
+    && !cx.state_icmp && r->keep_state) {
 
  if (r->rule_flag & PFRULE_SRCTRACK &&
-    pf_insert_src_node(&sns[PF_SN_NONE], r, PF_SN_NONE, pd->af,
+    pf_insert_src_node(&cx.sns[PF_SN_NONE], r, PF_SN_NONE, pd->af,
     pd->src, NULL) != 0) {
- REASON_SET(reason, PFRES_SRCLIMIT);
+ REASON_SET(&cx.reason, PFRES_SRCLIMIT);
  goto cleanup;
  }
 
  if (r->max_states && (r->states_cur >= r->max_states)) {
  pf_status.lcounters[LCNT_STATES]++;
- REASON_SET(reason, PFRES_MAXSTATES);
+ REASON_SET(&cx.reason, PFRES_MAXSTATES);
  goto cleanup;
  }
 
- action = pf_create_state(pd, r, a, nr, &skw, &sks, &rewrite,
-    sm, tag, &rules, &act, sns);
+ action = pf_create_state(pd, r, a, cx.nr, &skw, &sks, &rewrite,
+    sm, cx.tag, &cx.rules, &cx.act, cx.sns);
 
  if (action != PF_PASS)
  goto cleanup;
@@ -3801,7 +3814,7 @@ pf_test_rule(struct pf_pdesc *pd, struct
     sk->port[pd->af == pd->naf ? pd->sidx : pd->didx],
     &sk->addr[pd->af == pd->naf ? pd->didx : pd->sidx],
     sk->port[pd->af == pd->naf ? pd->didx : pd->sidx],
-    virtual_type, icmp_dir);
+    virtual_type, cx.icmp_dir);
  }
 
 #ifdef INET6
@@ -3810,9 +3823,9 @@ pf_test_rule(struct pf_pdesc *pd, struct
 #endif /* INET6 */
 
  } else {
- while ((ri = SLIST_FIRST(&rules))) {
- SLIST_REMOVE_HEAD(&rules, entry);
- pool_put(&pf_rule_item_pl, ri);
+ while ((cx.ri = SLIST_FIRST(&cx.rules))) {
+ SLIST_REMOVE_HEAD(&cx.rules, entry);
+ pool_put(&pf_rule_item_pl, cx.ri);
  }
  }
 
@@ -3844,9 +3857,9 @@ pf_test_rule(struct pf_pdesc *pd, struct
  return (action);
 
 cleanup:
- while ((ri = SLIST_FIRST(&rules))) {
- SLIST_REMOVE_HEAD(&rules, entry);
- pool_put(&pf_rule_item_pl, ri);
+ while ((cx.ri = SLIST_FIRST(&cx.rules))) {
+ SLIST_REMOVE_HEAD(&cx.rules, entry);
+ pool_put(&pf_rule_item_pl, cx.ri);
  }
 
  return (action);
--------8<---------------8<---------------8<------------------8<--------

Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

Re: pf: percpu anchor stacks

Mike Belopuhov-5
On Fri, Mar 24, 2017 at 12:19 +0100, Alexandr Nedvedicky wrote:
> Hello,
>
> I'm attaching patch, which removes stack-as-a-global variable.
> it's updated patch [1] to current tree.
>
> sorry for being pushy advocating my old, rusty patch.
>

I think your diff is the way to go indeed.  If we can avoid
using the global stack altogether, then all the better.
This diff also splits giant pf_test_rule into several chunks
which is a good thing in my opinion.

A few random comments:

 - some lines appear to be longer than 80 symbols;

 - "cx" is an uncommon abbreviation for a "context" in OpenBSD,
   we normally use "ctx";

 - PF_TEST_ATTRIB could use a "continue" statement instead of
   the goto;

 - instead of checking "rv" against 0 in the "break on quick
   rule or failure" I'd like to see an actual check against
   PF_TEST_* values so that it's grep'able;

 - s/test_status/action/ as it's done everywhere else?

 - I'm not certain I like extra set of PASS/BLOCK macros.
   I know you want to represent the "quick" pass separately,
   but perhaps there's a way to do it while using PF_PASS...

Does this pass pfctl regress tests?

While I haven't noticed anything criminal here, it makes me
wonder if it'd be possible to do this change in a few steps:
factor out rule maching from pf_test_rule and then bring in
anchor changes?

> thanks and
> regards
> sasha
>
> [1] http://openbsd-archive.7691.n7.nabble.com/Re-PF-SMP-making-anchor-stack-multithreaded-tt275915.html#none
>
> --------8<---------------8<---------------8<------------------8<--------
> diff -r d6e3a6338889 src/sys/net/pf.c
> --- a/src/sys/net/pf.c Mon Mar 20 01:10:40 2017 +0100
> +++ b/src/sys/net/pf.c Fri Mar 24 11:28:18 2017 +0100
> @@ -119,12 +119,37 @@ u_char pf_tcp_secret[16];
>  int pf_tcp_secret_init;
>  int pf_tcp_iss_off;
>  
> -struct pf_anchor_stackframe {
> - struct pf_ruleset *rs;
> - struct pf_rule *r;
> - struct pf_anchor_node *parent;
> - struct pf_anchor *child;
> -} pf_anchor_stack[64];
> +struct pf_test_ctx {
> + int test_status;
> + struct pf_pdesc *pd;
> + struct pf_rule_actions act;
> + u_int8_t icmpcode;
> + u_int8_t icmptype;
> + int icmp_dir;
> + int state_icmp;
> + int tag;
> + u_short reason;
> + struct pf_rule_item *ri;
> + struct pf_src_node *sns[PF_SN_MAX];
> + struct pf_rule_slist rules;
> + struct pf_rule *nr;
> + struct pf_rule **rm;
> + struct pf_rule *a;
> + struct pf_rule **am;
> + struct pf_ruleset **rsm;
> + struct pf_ruleset *arsm;
> + struct pf_ruleset *aruleset;
> + struct tcphdr *th;
> + int depth;
> +};
> +
> +#define PF_ANCHOR_STACK_MAX 64
> +
> +enum {
> + PF_TEST_FAIL = -1,
> + PF_TEST_OK,
> + PF_TEST_QUICK
> +};
>  
>  struct pool pf_src_tree_pl, pf_rule_pl, pf_queue_pl;
>  struct pool pf_state_pl, pf_state_key_pl, pf_state_item_pl;
> @@ -211,11 +236,8 @@ struct pf_state *pf_find_state(struct p
>      struct pf_state_key_cmp *, u_int, struct mbuf *);
>  int pf_src_connlimit(struct pf_state **);
>  int pf_match_rcvif(struct mbuf *, struct pf_rule *);
> -void pf_step_into_anchor(int *, struct pf_ruleset **,
> -    struct pf_rule **, struct pf_rule **);
> -int pf_step_out_of_anchor(int *, struct pf_ruleset **,
> -     struct pf_rule **, struct pf_rule **,
> -     int *);
> +int pf_step_into_anchor(struct pf_test_ctx *, struct pf_rule *);
> +int pf_match_rule(struct pf_test_ctx *, struct pf_ruleset *);
>  void pf_counters_inc(int, struct pf_pdesc *,
>      struct pf_state *, struct pf_rule *,
>      struct pf_rule *);
> @@ -3019,74 +3041,37 @@ pf_tag_packet(struct mbuf *m, int tag, i
>   m->m_pkthdr.ph_rtableid = (u_int)rtableid;
>  }
>  
> -void
> -pf_step_into_anchor(int *depth, struct pf_ruleset **rs,
> -    struct pf_rule **r, struct pf_rule **a)
> +int
> +pf_step_into_anchor(struct pf_test_ctx *cx, struct pf_rule *r)
>  {
> - struct pf_anchor_stackframe *f;
> -
> - if (*depth >= sizeof(pf_anchor_stack) /
> -    sizeof(pf_anchor_stack[0])) {
> - log(LOG_ERR, "pf: anchor stack overflow\n");
> - *r = TAILQ_NEXT(*r, entries);
> - return;
> - } else if (a != NULL)
> - *a = *r;
> - f = pf_anchor_stack + (*depth)++;
> - f->rs = *rs;
> - f->r = *r;
> - if ((*r)->anchor_wildcard) {
> - f->parent = &(*r)->anchor->children;
> - if ((f->child = RB_MIN(pf_anchor_node, f->parent)) == NULL) {
> - *r = NULL;
> - return;
> - }
> - *rs = &f->child->ruleset;
> - } else {
> - f->parent = NULL;
> - f->child = NULL;
> - *rs = &(*r)->anchor->ruleset;
> - }
> - *r = TAILQ_FIRST((*rs)->rules.active.ptr);
> -}
> -
> -int
> -pf_step_out_of_anchor(int *depth, struct pf_ruleset **rs,
> -    struct pf_rule **r, struct pf_rule **a, int *match)
> -{
> - struct pf_anchor_stackframe *f;
> - int quick = 0;
> -
> - do {
> - if (*depth <= 0)
> - break;
> - f = pf_anchor_stack + *depth - 1;
> - if (f->parent != NULL && f->child != NULL) {
> - f->child = RB_NEXT(pf_anchor_node, f->parent, f->child);
> - if (f->child != NULL) {
> - *rs = &f->child->ruleset;
> - *r = TAILQ_FIRST((*rs)->rules.active.ptr);
> - if (*r == NULL)
> - continue;
> - else
> - break;
> + int rv;
> +
> + if (cx->depth >= PF_ANCHOR_STACK_MAX) {
> + log(LOG_ERR, "pf_step_into_anchor: stack overflow\n");
> + return (PF_TEST_FAIL);
> + }
> +
> + cx->depth++;
> +
> + if (r->anchor_wildcard) {
> + struct pf_anchor *child;
> + rv = PF_TEST_OK;
> + RB_FOREACH(child, pf_anchor_node, &r->anchor->children) {
> + rv = pf_match_rule(cx, &child->ruleset);
> + if (rv != 0) {
> + /*
> + * break on quick rule or failure
> + */
> + break;
>   }
>   }
> - (*depth)--;
> - if (*depth == 0 && a != NULL)
> - *a = NULL;
> - else if (a != NULL)
> - *a = f->r;
> - *rs = f->rs;
> - if (*match > *depth) {
> - *match = *depth;
> - if (f->r->quick)
> - quick = 1;
> - }
> - *r = TAILQ_NEXT(f->r, entries);
> - } while (*r == NULL);
> -
> - return (quick);
> + } else {
> + rv = pf_match_rule(cx, &r->anchor->ruleset);
> + }
> +
> + cx->depth--;
> +
> + return (rv);
>  }
>  
>  void
> @@ -3458,46 +3443,230 @@ pf_rule_to_actions(struct pf_rule *r, st
>   } while (0)
>  
>  int
> +pf_match_rule(struct pf_test_ctx *cx, struct pf_ruleset *ruleset)
> +{
> + struct pf_rule *r;
> +
> + r = TAILQ_FIRST(ruleset->rules.active.ptr);
> + while (r != NULL) {
> + r->evaluations++;
> + PF_TEST_ATTRIB((pfi_kif_match(r->kif, cx->pd->kif) == r->ifnot),
> + r->skip[PF_SKIP_IFP].ptr);
> + PF_TEST_ATTRIB((r->direction && r->direction != cx->pd->dir),
> + r->skip[PF_SKIP_DIR].ptr);
> + PF_TEST_ATTRIB((r->onrdomain >= 0  &&
> +    (r->onrdomain == cx->pd->rdomain) == r->ifnot),
> + r->skip[PF_SKIP_RDOM].ptr);
> + PF_TEST_ATTRIB((r->af && r->af != cx->pd->af),
> + r->skip[PF_SKIP_AF].ptr);
> + PF_TEST_ATTRIB((r->proto && r->proto != cx->pd->proto),
> + r->skip[PF_SKIP_PROTO].ptr);
> + PF_TEST_ATTRIB((PF_MISMATCHAW(&r->src.addr, &cx->pd->nsaddr,
> +    cx->pd->naf, r->src.neg, cx->pd->kif, cx->act.rtableid)),
> + r->skip[PF_SKIP_SRC_ADDR].ptr);
> + PF_TEST_ATTRIB((PF_MISMATCHAW(&r->dst.addr, &cx->pd->ndaddr,
> +    cx->pd->af, r->dst.neg, NULL, cx->act.rtableid)),
> + r->skip[PF_SKIP_DST_ADDR].ptr);
> +
> + switch (cx->pd->virtual_proto) {
> + case PF_VPROTO_FRAGMENT:
> + /* tcp/udp only. port_op always 0 in other cases */
> + PF_TEST_ATTRIB((r->src.port_op || r->dst.port_op),
> + TAILQ_NEXT(r, entries));
> + PF_TEST_ATTRIB((cx->pd->proto == IPPROTO_TCP &&
> +    r->flagset),
> + TAILQ_NEXT(r, entries));
> + /* icmp only. type/code always 0 in other cases */
> + PF_TEST_ATTRIB((r->type || r->code),
> + TAILQ_NEXT(r, entries));
> + /* tcp/udp only. {uid|gid}.op always 0 in other cases */
> + PF_TEST_ATTRIB((r->gid.op || r->uid.op),
> + TAILQ_NEXT(r, entries));
> + break;
> +
> + case IPPROTO_TCP:
> + PF_TEST_ATTRIB(((r->flagset & cx->th->th_flags) !=
> +    r->flags),
> + TAILQ_NEXT(r, entries));
> + PF_TEST_ATTRIB((r->os_fingerprint != PF_OSFP_ANY &&
> +    !pf_osfp_match(pf_osfp_fingerprint(cx->pd),
> +    r->os_fingerprint)),
> + TAILQ_NEXT(r, entries));
> + /* FALLTHROUGH */
> +
> + case IPPROTO_UDP:
> + /* tcp/udp only. port_op always 0 in other cases */
> + PF_TEST_ATTRIB((r->src.port_op &&
> +    !pf_match_port(r->src.port_op, r->src.port[0],
> +    r->src.port[1], cx->pd->nsport)),
> + r->skip[PF_SKIP_SRC_PORT].ptr);
> + PF_TEST_ATTRIB((r->dst.port_op &&
> +    !pf_match_port(r->dst.port_op, r->dst.port[0],
> +    r->dst.port[1], cx->pd->ndport)),
> + r->skip[PF_SKIP_DST_PORT].ptr);
> + /* tcp/udp only. uid.op always 0 in other cases */
> + PF_TEST_ATTRIB((r->uid.op && (cx->pd->lookup.done ||
> +    (cx->pd->lookup.done =
> +    pf_socket_lookup(cx->pd), 1)) &&
> +    !pf_match_uid(r->uid.op, r->uid.uid[0],
> +    r->uid.uid[1], cx->pd->lookup.uid)),
> + TAILQ_NEXT(r, entries));
> + /* tcp/udp only. gid.op always 0 in other cases */
> + PF_TEST_ATTRIB((r->gid.op && (cx->pd->lookup.done ||
> +    (cx->pd->lookup.done =
> +    pf_socket_lookup(cx->pd), 1)) &&
> +    !pf_match_gid(r->gid.op, r->gid.gid[0],
> +    r->gid.gid[1], cx->pd->lookup.gid)),
> + TAILQ_NEXT(r, entries));
> + break;
> +
> + case IPPROTO_ICMP:
> + case IPPROTO_ICMPV6:
> + /* icmp only. type always 0 in other cases */
> + PF_TEST_ATTRIB((r->type && r->type != cx->icmptype + 1),
> + TAILQ_NEXT(r, entries));
> + /* icmp only. type always 0 in other cases */
> + PF_TEST_ATTRIB((r->code && r->code != cx->icmpcode + 1),
> + TAILQ_NEXT(r, entries));
> + /* icmp only. don't create states on replies */
> + PF_TEST_ATTRIB((r->keep_state && !cx->state_icmp &&
> +    (r->rule_flag & PFRULE_STATESLOPPY) == 0 &&
> +    cx->icmp_dir != PF_IN),
> + TAILQ_NEXT(r, entries));
> + break;
> +
> + default:
> + break;
> + }
> +
> + PF_TEST_ATTRIB((r->rule_flag & PFRULE_FRAGMENT &&
> +    cx->pd->virtual_proto != PF_VPROTO_FRAGMENT),
> + TAILQ_NEXT(r, entries));
> + PF_TEST_ATTRIB((r->tos && !(r->tos == cx->pd->tos)),
> + TAILQ_NEXT(r, entries));
> + PF_TEST_ATTRIB((r->prob &&
> +    r->prob <= arc4random_uniform(UINT_MAX - 1) + 1),
> + TAILQ_NEXT(r, entries));
> + PF_TEST_ATTRIB((r->match_tag &&
> +    !pf_match_tag(cx->pd->m, r, &cx->tag)),
> + TAILQ_NEXT(r, entries));
> + PF_TEST_ATTRIB((r->rcv_kif && pf_match_rcvif(cx->pd->m, r) ==
> +    r->rcvifnot),
> + TAILQ_NEXT(r, entries));
> + PF_TEST_ATTRIB((r->prio &&
> +    (r->prio == PF_PRIO_ZERO ? 0 : r->prio) !=
> +    cx->pd->m->m_pkthdr.pf.prio),
> + TAILQ_NEXT(r, entries));
> +
> + /* FALLTHROUGH */
> + if (r->tag)
> + cx->tag = r->tag;
> + if (r->anchor == NULL) {
> + if (r->action == PF_MATCH) {
> + if ((cx->ri = pool_get(&pf_rule_item_pl,
> +    PR_NOWAIT)) == NULL) {
> + REASON_SET(&cx->reason, PFRES_MEMORY);
> + cx->test_status = PF_TEST_FAIL;
> + break;
> + }
> + cx->ri->r = r;
> + /* order is irrelevant */
> + SLIST_INSERT_HEAD(&cx->rules, cx->ri, entry);
> + cx->ri = NULL;
> + pf_rule_to_actions(r, &cx->act);
> + if (r->rule_flag & PFRULE_AFTO)
> + cx->pd->naf = r->naf;
> + if (pf_get_transaddr(r, cx->pd, cx->sns,
> +    &cx->nr) == -1) {
> + REASON_SET(&cx->reason, PFRES_TRANSLATE);
> + cx->test_status = PF_TEST_FAIL;
> + break;
> + }
> +#if NPFLOG > 0
> + if (r->log) {
> + REASON_SET(&cx->reason, PFRES_MATCH);
> + PFLOG_PACKET(cx->pd, cx->reason, r,
> +    cx->a, ruleset, NULL);
> + }
> +#endif /* NPFLOG > 0 */
> + } else {
> + /*
> + * found matching r
> + */
> + *cx->rm = r;
> + /*
> + * anchor, with ruleset, where r belongs to
> + */
> + *cx->am = cx->a;
> + /*
> + * ruleset where r belongs to
> + */
> + *cx->rsm = ruleset;
> + /*
> + * ruleset, where anchor belongs to.
> + */
> + cx->arsm = cx->aruleset;
> + }
> +
> +#if NPFLOG > 0
> + if (cx->act.log & PF_LOG_MATCHES)
> + pf_log_matches(cx->pd, r, cx->a, ruleset,
> +    &cx->rules);
> +#endif /* NPFLOG > 0 */
> +
> + if (r->quick) {
> + cx->test_status = PF_TEST_QUICK;
> + break;
> + }
> + } else {
> + cx->a = r; /* remember anchor */
> + cx->aruleset = ruleset; /* and its ruleset */
> + if (pf_step_into_anchor(cx, r) != PF_TEST_OK) {
> + break;
> + }
> + }
> + r = TAILQ_NEXT(r, entries);
> +nextrule:;
> + }
> +
> + return (cx->test_status);
> +}
> +
> +int
>  pf_test_rule(struct pf_pdesc *pd, struct pf_rule **rm, struct pf_state **sm,
>      struct pf_rule **am, struct pf_ruleset **rsm, u_short *reason)
>  {
> - struct pf_rule *r;
> - struct pf_rule *nr = NULL;
> + struct pf_rule *r = NULL;
>   struct pf_rule *a = NULL;
> - struct pf_ruleset *arsm = NULL;
> - struct pf_ruleset *aruleset = NULL;
>   struct pf_ruleset *ruleset = NULL;
> - struct pf_rule_slist rules;
> - struct pf_rule_item *ri;
> - struct pf_src_node *sns[PF_SN_MAX];
>   struct pf_state_key *skw = NULL, *sks = NULL;
> - struct pf_rule_actions act;
>   int rewrite = 0;
> - int tag = -1;
> - int asd = 0;
> - int match = 0;
> - int state_icmp = 0, icmp_dir = 0;
>   u_int16_t virtual_type, virtual_id;
> - u_int8_t icmptype = 0, icmpcode = 0;
>   int action = PF_DROP;
> -
> - bzero(&act, sizeof(act));
> - bzero(sns, sizeof(sns));
> - act.rtableid = pd->rdomain;
> - SLIST_INIT(&rules);
> + struct pf_test_ctx cx;
> + int rv;
> +
> + bzero(&cx, sizeof(cx));
> + cx.pd = pd;
> + cx.rm = rm;
> + cx.am = am;
> + cx.rsm = rsm;
> + cx.th = &pd->hdr.tcp;
> + cx.act.rtableid = pd->rdomain;
> + SLIST_INIT(&cx.rules);
>  
>   if (pd->dir == PF_IN && if_congested()) {
> - REASON_SET(reason, PFRES_CONGEST);
> + REASON_SET(&cx.reason, PFRES_CONGEST);
>   return (PF_DROP);
>   }
>  
>   switch (pd->virtual_proto) {
>   case IPPROTO_ICMP:
> - icmptype = pd->hdr.icmp.icmp_type;
> - icmpcode = pd->hdr.icmp.icmp_code;
> - state_icmp = pf_icmp_mapping(pd, icmptype,
> -    &icmp_dir, &virtual_id, &virtual_type);
> - if (icmp_dir == PF_IN) {
> + cx.icmptype = pd->hdr.icmp.icmp_type;
> + cx.icmpcode = pd->hdr.icmp.icmp_code;
> + cx.state_icmp = pf_icmp_mapping(pd, cx.icmptype,
> +    &cx.icmp_dir, &virtual_id, &virtual_type);
> + if (cx.icmp_dir == PF_IN) {
>   pd->osport = pd->nsport = virtual_id;
>   pd->odport = pd->ndport = virtual_type;
>   } else {
> @@ -3507,11 +3676,11 @@ pf_test_rule(struct pf_pdesc *pd, struct
>   break;
>  #ifdef INET6
>   case IPPROTO_ICMPV6:
> - icmptype = pd->hdr.icmp6.icmp6_type;
> - icmpcode = pd->hdr.icmp6.icmp6_code;
> - state_icmp = pf_icmp_mapping(pd, icmptype,
> -    &icmp_dir, &virtual_id, &virtual_type);
> - if (icmp_dir == PF_IN) {
> + cx.icmptype = pd->hdr.icmp6.icmp6_type;
> + cx.icmpcode = pd->hdr.icmp6.icmp6_code;
> + cx.state_icmp = pf_icmp_mapping(pd, cx.icmptype,
> +    &cx.icmp_dir, &virtual_id, &virtual_type);
> + if (cx.icmp_dir == PF_IN) {
>   pd->osport = pd->nsport = virtual_id;
>   pd->odport = pd->ndport = virtual_type;
>   } else {
> @@ -3523,191 +3692,36 @@ pf_test_rule(struct pf_pdesc *pd, struct
>   }
>  
>   ruleset = &pf_main_ruleset;
> - r = TAILQ_FIRST(pf_main_ruleset.rules.active.ptr);
> - while (r != NULL) {
> - if (r->rule_flag & PFRULE_EXPIRED) {
> - r = TAILQ_NEXT(r, entries);
> - goto nextrule;
> - }
> - r->evaluations++;
> - PF_TEST_ATTRIB((pfi_kif_match(r->kif, pd->kif) == r->ifnot),
> - r->skip[PF_SKIP_IFP].ptr);
> - PF_TEST_ATTRIB((r->direction && r->direction != pd->dir),
> - r->skip[PF_SKIP_DIR].ptr);
> - PF_TEST_ATTRIB((r->onrdomain >= 0  &&
> -    (r->onrdomain == pd->rdomain) == r->ifnot),
> - r->skip[PF_SKIP_RDOM].ptr);
> - PF_TEST_ATTRIB((r->af && r->af != pd->af),
> - r->skip[PF_SKIP_AF].ptr);
> - PF_TEST_ATTRIB((r->proto && r->proto != pd->proto),
> - r->skip[PF_SKIP_PROTO].ptr);
> - PF_TEST_ATTRIB((PF_MISMATCHAW(&r->src.addr, &pd->nsaddr,
> -    pd->naf, r->src.neg, pd->kif, act.rtableid)),
> - r->skip[PF_SKIP_SRC_ADDR].ptr);
> - PF_TEST_ATTRIB((PF_MISMATCHAW(&r->dst.addr, &pd->ndaddr, pd->af,
> -    r->dst.neg, NULL, act.rtableid)),
> - r->skip[PF_SKIP_DST_ADDR].ptr);
> -
> - switch (pd->virtual_proto) {
> - case PF_VPROTO_FRAGMENT:
> - /* tcp/udp only. port_op always 0 in other cases */
> - PF_TEST_ATTRIB((r->src.port_op || r->dst.port_op),
> - TAILQ_NEXT(r, entries));
> - PF_TEST_ATTRIB((pd->proto == IPPROTO_TCP && r->flagset),
> - TAILQ_NEXT(r, entries));
> - /* icmp only. type/code always 0 in other cases */
> - PF_TEST_ATTRIB((r->type || r->code),
> - TAILQ_NEXT(r, entries));
> - /* tcp/udp only. {uid|gid}.op always 0 in other cases */
> - PF_TEST_ATTRIB((r->gid.op || r->uid.op),
> - TAILQ_NEXT(r, entries));
> - break;
> -
> - case IPPROTO_TCP:
> - PF_TEST_ATTRIB(((r->flagset & pd->hdr.tcp.th_flags) !=
> -    r->flags),
> - TAILQ_NEXT(r, entries));
> - PF_TEST_ATTRIB((r->os_fingerprint != PF_OSFP_ANY &&
> -    !pf_osfp_match(pf_osfp_fingerprint(pd),
> -    r->os_fingerprint)),
> - TAILQ_NEXT(r, entries));
> - /* FALLTHROUGH */
> -
> - case IPPROTO_UDP:
> - /* tcp/udp only. port_op always 0 in other cases */
> - PF_TEST_ATTRIB((r->src.port_op &&
> -    !pf_match_port(r->src.port_op, r->src.port[0],
> -    r->src.port[1], pd->nsport)),
> - r->skip[PF_SKIP_SRC_PORT].ptr);
> - PF_TEST_ATTRIB((r->dst.port_op &&
> -    !pf_match_port(r->dst.port_op, r->dst.port[0],
> -    r->dst.port[1], pd->ndport)),
> - r->skip[PF_SKIP_DST_PORT].ptr);
> - /* tcp/udp only. uid.op always 0 in other cases */
> - PF_TEST_ATTRIB((r->uid.op && (pd->lookup.done ||
> -    (pd->lookup.done =
> -    pf_socket_lookup(pd), 1)) &&
> -    !pf_match_uid(r->uid.op, r->uid.uid[0],
> -    r->uid.uid[1], pd->lookup.uid)),
> - TAILQ_NEXT(r, entries));
> - /* tcp/udp only. gid.op always 0 in other cases */
> - PF_TEST_ATTRIB((r->gid.op && (pd->lookup.done ||
> -    (pd->lookup.done =
> -    pf_socket_lookup(pd), 1)) &&
> -    !pf_match_gid(r->gid.op, r->gid.gid[0],
> -    r->gid.gid[1], pd->lookup.gid)),
> - TAILQ_NEXT(r, entries));
> - break;
> -
> - case IPPROTO_ICMP:
> - case IPPROTO_ICMPV6:
> - /* icmp only. type always 0 in other cases */
> - PF_TEST_ATTRIB((r->type && r->type != icmptype + 1),
> - TAILQ_NEXT(r, entries));
> - /* icmp only. type always 0 in other cases */
> - PF_TEST_ATTRIB((r->code && r->code != icmpcode + 1),
> - TAILQ_NEXT(r, entries));
> - /* icmp only. don't create states on replies */
> - PF_TEST_ATTRIB((r->keep_state && !state_icmp &&
> -    (r->rule_flag & PFRULE_STATESLOPPY) == 0 &&
> -    icmp_dir != PF_IN),
> - TAILQ_NEXT(r, entries));
> - break;
> -
> - default:
> - break;
> - }
> -
> - PF_TEST_ATTRIB((r->rule_flag & PFRULE_FRAGMENT &&
> -    pd->virtual_proto != PF_VPROTO_FRAGMENT),
> - TAILQ_NEXT(r, entries));
> - PF_TEST_ATTRIB((r->tos && !(r->tos == pd->tos)),
> - TAILQ_NEXT(r, entries));
> - PF_TEST_ATTRIB((r->prob &&
> -    r->prob <= arc4random_uniform(UINT_MAX - 1) + 1),
> - TAILQ_NEXT(r, entries));
> - PF_TEST_ATTRIB((r->match_tag && !pf_match_tag(pd->m, r, &tag)),
> - TAILQ_NEXT(r, entries));
> - PF_TEST_ATTRIB((r->rcv_kif && pf_match_rcvif(pd->m, r) ==
> -    r->rcvifnot),
> - TAILQ_NEXT(r, entries));
> - PF_TEST_ATTRIB((r->prio && (r->prio == PF_PRIO_ZERO ?
> -    0 : r->prio) != pd->m->m_pkthdr.pf.prio),
> - TAILQ_NEXT(r, entries));
> -
> - /* FALLTHROUGH */
> - if (r->tag)
> - tag = r->tag;
> - if (r->anchor == NULL) {
> - if (r->action == PF_MATCH) {
> - if ((ri = pool_get(&pf_rule_item_pl,
> -    PR_NOWAIT)) == NULL) {
> - REASON_SET(reason, PFRES_MEMORY);
> - goto cleanup;
> - }
> - ri->r = r;
> - /* order is irrelevant */
> - SLIST_INSERT_HEAD(&rules, ri, entry);
> - pf_rule_to_actions(r, &act);
> - if (r->rule_flag & PFRULE_AFTO)
> - pd->naf = r->naf;
> - if (pf_get_transaddr(r, pd, sns, &nr) == -1) {
> - REASON_SET(reason, PFRES_TRANSLATE);
> - goto cleanup;
> - }
> -#if NPFLOG > 0
> - if (r->log) {
> - REASON_SET(reason, PFRES_MATCH);
> - PFLOG_PACKET(pd, *reason, r, a, ruleset,
> -    NULL);
> - }
> -#endif /* NPFLOG > 0 */
> - } else {
> - match = asd;
> - *rm = r;
> - *am = a;
> - *rsm = ruleset;
> - arsm = aruleset;
> - }
> -
> -#if NPFLOG > 0
> - if (act.log & PF_LOG_MATCHES)
> - pf_log_matches(pd, r, a, ruleset, &rules);
> -#endif /* NPFLOG > 0 */
> -
> - if (r->quick)
> - break;
> - r = TAILQ_NEXT(r, entries);
> - } else {
> - aruleset = ruleset;
> - pf_step_into_anchor(&asd, &ruleset, &r, &a);
> - }
> -
> - nextrule:
> - if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
> -    &r, &a, &match))
> - break;
> - }
> - r = *rm; /* matching rule */
> - a = *am; /* rule that defines an anchor containing 'r' */
> - ruleset = *rsm; /* ruleset of the anchor defined by the rule 'a' */
> - aruleset = arsm;/* ruleset of the 'a' rule itself */
> + rv = pf_match_rule(&cx, ruleset);
> + if (rv == PF_TEST_FAIL) {
> + /*
> + * Reason has been set in pf_match_rule() already.
> + */
> + goto cleanup;
> + }
> +
> + r = *cx.rm; /* matching rule */
> + a = *cx.am; /* rule that defines an anchor containing 'r' */
> + ruleset = *cx.rsm;/* ruleset of the anchor defined by the rule 'a' */
> + cx.aruleset = cx.arsm;/* ruleset of the 'a' rule itself */
> +
> +
>  
>   /* apply actions for last matching pass/block rule */
> - pf_rule_to_actions(r, &act);
> + pf_rule_to_actions(r, &cx.act);
>   if (r->rule_flag & PFRULE_AFTO)
>   pd->naf = r->naf;
> - if (pf_get_transaddr(r, pd, sns, &nr) == -1) {
> - REASON_SET(reason, PFRES_TRANSLATE);
> + if (pf_get_transaddr(r, pd, cx.sns, &cx.nr) == -1) {
> + REASON_SET(&cx.reason, PFRES_TRANSLATE);
>   goto cleanup;
>   }
> - REASON_SET(reason, PFRES_MATCH);
> + REASON_SET(&cx.reason, PFRES_MATCH);
>  
>  #if NPFLOG > 0
>   if (r->log)
> - PFLOG_PACKET(pd, *reason, r, a, ruleset, NULL);
> - if (act.log & PF_LOG_MATCHES)
> - pf_log_matches(pd, r, a, ruleset, &rules);
> + PFLOG_PACKET(pd, cx.reason, r, cx.a, ruleset, NULL);
> + if (cx.act.log & PF_LOG_MATCHES)
> + pf_log_matches(pd, r, cx.a, ruleset, &cx.rules);
>  #endif /* NPFLOG > 0 */
>  
>   if (pd->virtual_proto != PF_VPROTO_FRAGMENT &&
> @@ -3718,31 +3732,30 @@ pf_test_rule(struct pf_pdesc *pd, struct
>   if (pd->proto == IPPROTO_TCP &&
>      ((r->rule_flag & PFRULE_RETURNRST) ||
>      (r->rule_flag & PFRULE_RETURN)) &&
> -    !(pd->hdr.tcp.th_flags & TH_RST)) {
> - struct tcphdr *th = &pd->hdr.tcp;
> - u_int32_t ack = ntohl(th->th_seq) + pd->p_len;
> +    !(cx.th->th_flags & TH_RST)) {
> + u_int32_t ack = ntohl(cx.th->th_seq) + pd->p_len;
>  
>   if (pf_check_tcp_cksum(pd->m, pd->off,
>      pd->tot_len - pd->off, pd->af))
> - REASON_SET(reason, PFRES_PROTCKSUM);
> + REASON_SET(&cx.reason, PFRES_PROTCKSUM);
>   else {
> - if (th->th_flags & TH_SYN)
> + if (cx.th->th_flags & TH_SYN)
>   ack++;
> - if (th->th_flags & TH_FIN)
> + if (cx.th->th_flags & TH_FIN)
>   ack++;
>   pf_send_tcp(r, pd->af, pd->dst,
> -    pd->src, th->th_dport, th->th_sport,
> -    ntohl(th->th_ack), ack, TH_RST|TH_ACK, 0, 0,
> -    r->return_ttl, 1, 0, pd->rdomain);
> +    pd->src, cx.th->th_dport, cx.th->th_sport,
> +    ntohl(cx.th->th_ack), ack, TH_RST|TH_ACK,
> +    0, 0, r->return_ttl, 1, 0, pd->rdomain);
>   }
>   } else if ((pd->proto != IPPROTO_ICMP ||
> -    ICMP_INFOTYPE(icmptype)) && pd->af == AF_INET &&
> +    ICMP_INFOTYPE(cx.icmptype)) && pd->af == AF_INET &&
>      r->return_icmp)
>   pf_send_icmp(pd->m, r->return_icmp >> 8,
>      r->return_icmp & 255, pd->af, r, pd->rdomain);
>   else if ((pd->proto != IPPROTO_ICMPV6 ||
> -    (icmptype >= ICMP6_ECHO_REQUEST &&
> -    icmptype != ND_REDIRECT)) && pd->af == AF_INET6 &&
> +    (cx.icmptype >= ICMP6_ECHO_REQUEST &&
> +    cx.icmptype != ND_REDIRECT)) && pd->af == AF_INET6 &&
>      r->return_icmp6)
>   pf_send_icmp(pd->m, r->return_icmp6 >> 8,
>      r->return_icmp6 & 255, pd->af, r, pd->rdomain);
> @@ -3751,13 +3764,13 @@ pf_test_rule(struct pf_pdesc *pd, struct
>   if (r->action == PF_DROP)
>   goto cleanup;
>  
> - pf_tag_packet(pd->m, tag, act.rtableid);
> - if (act.rtableid >= 0 &&
> -    rtable_l2(act.rtableid) != pd->rdomain)
> + pf_tag_packet(pd->m, cx.tag, cx.act.rtableid);
> + if (cx.act.rtableid >= 0 &&
> +    rtable_l2(cx.act.rtableid) != pd->rdomain)
>   pd->destchg = 1;
>  
>   if (r->action == PF_PASS && pd->badopts && ! r->allow_opts) {
> - REASON_SET(reason, PFRES_IPOPTIONS);
> + REASON_SET(&cx.reason, PFRES_IPOPTIONS);
>  #if NPFLOG > 0
>   pd->pflog |= PF_LOG_FORCE;
>  #endif /* NPFLOG > 0 */
> @@ -3769,23 +3782,23 @@ pf_test_rule(struct pf_pdesc *pd, struct
>   action = PF_PASS;
>  
>   if (pd->virtual_proto != PF_VPROTO_FRAGMENT
> -    && !state_icmp && r->keep_state) {
> +    && !cx.state_icmp && r->keep_state) {
>  
>   if (r->rule_flag & PFRULE_SRCTRACK &&
> -    pf_insert_src_node(&sns[PF_SN_NONE], r, PF_SN_NONE, pd->af,
> +    pf_insert_src_node(&cx.sns[PF_SN_NONE], r, PF_SN_NONE, pd->af,
>      pd->src, NULL) != 0) {
> - REASON_SET(reason, PFRES_SRCLIMIT);
> + REASON_SET(&cx.reason, PFRES_SRCLIMIT);
>   goto cleanup;
>   }
>  
>   if (r->max_states && (r->states_cur >= r->max_states)) {
>   pf_status.lcounters[LCNT_STATES]++;
> - REASON_SET(reason, PFRES_MAXSTATES);
> + REASON_SET(&cx.reason, PFRES_MAXSTATES);
>   goto cleanup;
>   }
>  
> - action = pf_create_state(pd, r, a, nr, &skw, &sks, &rewrite,
> -    sm, tag, &rules, &act, sns);
> + action = pf_create_state(pd, r, a, cx.nr, &skw, &sks, &rewrite,
> +    sm, cx.tag, &cx.rules, &cx.act, cx.sns);
>  
>   if (action != PF_PASS)
>   goto cleanup;
> @@ -3801,7 +3814,7 @@ pf_test_rule(struct pf_pdesc *pd, struct
>      sk->port[pd->af == pd->naf ? pd->sidx : pd->didx],
>      &sk->addr[pd->af == pd->naf ? pd->didx : pd->sidx],
>      sk->port[pd->af == pd->naf ? pd->didx : pd->sidx],
> -    virtual_type, icmp_dir);
> +    virtual_type, cx.icmp_dir);
>   }
>  
>  #ifdef INET6
> @@ -3810,9 +3823,9 @@ pf_test_rule(struct pf_pdesc *pd, struct
>  #endif /* INET6 */
>  
>   } else {
> - while ((ri = SLIST_FIRST(&rules))) {
> - SLIST_REMOVE_HEAD(&rules, entry);
> - pool_put(&pf_rule_item_pl, ri);
> + while ((cx.ri = SLIST_FIRST(&cx.rules))) {
> + SLIST_REMOVE_HEAD(&cx.rules, entry);
> + pool_put(&pf_rule_item_pl, cx.ri);
>   }
>   }
>  
> @@ -3844,9 +3857,9 @@ pf_test_rule(struct pf_pdesc *pd, struct
>   return (action);
>  
>  cleanup:
> - while ((ri = SLIST_FIRST(&rules))) {
> - SLIST_REMOVE_HEAD(&rules, entry);
> - pool_put(&pf_rule_item_pl, ri);
> + while ((cx.ri = SLIST_FIRST(&cx.rules))) {
> + SLIST_REMOVE_HEAD(&cx.rules, entry);
> + pool_put(&pf_rule_item_pl, cx.ri);
>   }
>  
>   return (action);
> --------8<---------------8<---------------8<------------------8<--------
>

Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

Re: pf: percpu anchor stacks

Alexandr Nedvedicky
Hello Mike,

thank you for looking at my patch. I accept most of your comments.
I believe the items below deserve further discussion.

>  - instead of checking "rv" against 0 in the "break on quick
>    rule or failure" I'd like to see an actual check against
>    PF_TEST_* values so that it's grep'able;

    this is the 'edited' diff to highlight the place, which your comment is
    related to. It shows the change to patch I've sent in earlier mail [1].

[1] http://openbsd-archive.7691.n7.nabble.com/pf-percpu-anchor-stacks-tt314935.html#a315309
--------8<---------------8<---------------8<------------------8<--------
-pf_step_into_anchor(struct pf_test_ctx *cx, struct pf_rule *r)
+pf_step_into_anchor(struct pf_test_ctx *ctx, struct pf_rule *r)
 {
        int     rv;
 
        if (r->anchor_wildcard) {
                struct pf_anchor        *child;
                rv = PF_TEST_OK;
                RB_FOREACH(child, pf_anchor_node, &r->anchor->children) {
-                       rv = pf_match_rule(cx, &child->ruleset);
-                       if (rv != 0) {
+                       rv = pf_match_rule(ctx, &child->ruleset);
+                       if ((rv == PF_TEST_QUICK) || (rv == PF_TEST_FAIL)) {
                                /*
-                                * break on quick rule or failure
+                                * we either hit a rule qith quick action
+                                * (more likely), or hit some runtime
+                                * error (e.g. pool_get() faillure).
                                 */
                                break;
                        }
                }
        } else {
-               rv = pf_match_rule(cx, &r->anchor->ruleset);
-       }
-
-       cx->depth--;
+               rv = pf_match_rule(ctx, &r->anchor->ruleset);
+       }
+
+       ctx->depth--;
 
        return (rv);
 }
--------8<---------------8<---------------8<------------------8<--------

>
>  - s/test_status/action/ as it's done everywhere else?

    I've opted to test_status, because it's something different to 'action'
    as we use it in current code.

    the 'action' usually comes from rule (or state match) and orders, what
    PF should do with packet (pass/block/translate/...)

    the test_result rather indicates a status of rule processing, which is:
        terminate due to failure (PF_TEST_FAIL), caused by runtime error,
        continue (PF_TEST_OK),
        terminate (PF_TEST_QUICK) due to hitting a rule with 'quick' action

>
>  - I'm not certain I like extra set of PASS/BLOCK macros.
>    I know you want to represent the "quick" pass separately,
>    but perhaps there's a way to do it while using PF_PASS...

    I somewhat understand your point, but as I've said earlier, I see 'action'
    and 'test_status' as two distinct things, which I prefer to clearly
    separate.

    However I don't insist on the current code in patch. I think the existing
    enum, which defines PF_PASS/PF_BLOCK/... can be extended. If we will go
    this way, then I would rather do s/test_status/virtual_action
    using virtual, follows the pattern, which got established for
    protocol: proto & virtual_proto


>
> Does this pass pfctl regress tests?

    I'm about to run those tests for OpenBSD.

>
> While I haven't noticed anything criminal here, it makes me
> wonder if it'd be possible to do this change in a few steps:
> factor out rule maching from pf_test_rule and then bring in
> anchor changes?
>

    if I understand you right, you basically want me to make change
    in two steps:

        the first step splits current pf_test_rule() to pf_match_rule() and
        pf_test_rule()

        the second step will kill global anchor stack array by introducing
        a true recursion. The patch will remove pf_step_out_of_anchor()
        function.

    I think I can do it. And also as Theo pointed out there is no rush
    to get that patch to tree.

thanks and
regards
sasha

--------8<---------------8<---------------8<------------------8<--------

diff -r b483ee1b4a65 src/sys/net/pf.c
--- a/src/sys/net/pf.c Tue Mar 28 10:39:14 2017 +0200
+++ b/src/sys/net/pf.c Tue Mar 28 11:44:12 2017 +0200
@@ -119,12 +119,53 @@ u_char pf_tcp_secret[16];
 int pf_tcp_secret_init;
 int pf_tcp_iss_off;
 
-struct pf_anchor_stackframe {
- struct pf_ruleset *rs;
- struct pf_rule *r;
- struct pf_anchor_node *parent;
- struct pf_anchor *child;
-} pf_anchor_stack[64];
+struct pf_test_ctx {
+ int test_status;
+ struct pf_pdesc *pd;
+ struct pf_rule_actions act;
+ u_int8_t icmpcode;
+ u_int8_t icmptype;
+ int icmp_dir;
+ int state_icmp;
+ int tag;
+ u_short reason;
+ struct pf_rule_item *ri;
+ struct pf_src_node *sns[PF_SN_MAX];
+ struct pf_rule_slist rules;
+ struct pf_rule *nr;
+ struct pf_rule **rm;
+ struct pf_rule *a;
+ struct pf_rule **am;
+ struct pf_ruleset **rsm;
+ struct pf_ruleset *arsm;
+ struct pf_ruleset *aruleset;
+ struct tcphdr *th;
+ int depth;
+};
+
+#define PF_ANCHOR_STACK_MAX 64
+
+enum {
+ PF_TEST_FAIL = -1,
+ PF_TEST_OK,
+ PF_TEST_QUICK
+};
+/*
+ * Cannot fold into pf_pdesc directly, unknown storage size outside pf.c.
+ * Keep in sync with union pf_headers in pflog_bpfcopy() in if_pflog.c.
+ */
+union pf_headers {
+ struct tcphdr tcp;
+ struct udphdr udp;
+ struct icmp icmp;
+#ifdef INET6
+ struct icmp6_hdr icmp6;
+ struct mld_hdr mld;
+ struct nd_neighbor_solicit nd_ns;
+#endif /* INET6 */
+};
+
+
 
 struct pool pf_src_tree_pl, pf_rule_pl, pf_queue_pl;
 struct pool pf_state_pl, pf_state_key_pl, pf_state_item_pl;
@@ -211,11 +252,8 @@ struct pf_state *pf_find_state(struct p
     struct pf_state_key_cmp *, u_int, struct mbuf *);
 int pf_src_connlimit(struct pf_state **);
 int pf_match_rcvif(struct mbuf *, struct pf_rule *);
-void pf_step_into_anchor(int *, struct pf_ruleset **,
-    struct pf_rule **, struct pf_rule **);
-int pf_step_out_of_anchor(int *, struct pf_ruleset **,
-     struct pf_rule **, struct pf_rule **,
-     int *);
+int pf_step_into_anchor(struct pf_test_ctx *, struct pf_rule *);
+int pf_match_rule(struct pf_test_ctx *, struct pf_ruleset *);
 void pf_counters_inc(int, struct pf_pdesc *,
     struct pf_state *, struct pf_rule *,
     struct pf_rule *);
@@ -3019,74 +3057,39 @@ pf_tag_packet(struct mbuf *m, int tag, i
  m->m_pkthdr.ph_rtableid = (u_int)rtableid;
 }
 
-void
-pf_step_into_anchor(int *depth, struct pf_ruleset **rs,
-    struct pf_rule **r, struct pf_rule **a)
+int
+pf_step_into_anchor(struct pf_test_ctx *ctx, struct pf_rule *r)
 {
- struct pf_anchor_stackframe *f;
-
- if (*depth >= sizeof(pf_anchor_stack) /
-    sizeof(pf_anchor_stack[0])) {
- log(LOG_ERR, "pf: anchor stack overflow\n");
- *r = TAILQ_NEXT(*r, entries);
- return;
- } else if (a != NULL)
- *a = *r;
- f = pf_anchor_stack + (*depth)++;
- f->rs = *rs;
- f->r = *r;
- if ((*r)->anchor_wildcard) {
- f->parent = &(*r)->anchor->children;
- if ((f->child = RB_MIN(pf_anchor_node, f->parent)) == NULL) {
- *r = NULL;
- return;
- }
- *rs = &f->child->ruleset;
- } else {
- f->parent = NULL;
- f->child = NULL;
- *rs = &(*r)->anchor->ruleset;
- }
- *r = TAILQ_FIRST((*rs)->rules.active.ptr);
-}
-
-int
-pf_step_out_of_anchor(int *depth, struct pf_ruleset **rs,
-    struct pf_rule **r, struct pf_rule **a, int *match)
-{
- struct pf_anchor_stackframe *f;
- int quick = 0;
-
- do {
- if (*depth <= 0)
- break;
- f = pf_anchor_stack + *depth - 1;
- if (f->parent != NULL && f->child != NULL) {
- f->child = RB_NEXT(pf_anchor_node, f->parent, f->child);
- if (f->child != NULL) {
- *rs = &f->child->ruleset;
- *r = TAILQ_FIRST((*rs)->rules.active.ptr);
- if (*r == NULL)
- continue;
- else
- break;
+ int rv;
+
+ if (ctx->depth >= PF_ANCHOR_STACK_MAX) {
+ log(LOG_ERR, "pf_step_into_anchor: stack overflow\n");
+ return (PF_TEST_FAIL);
+ }
+
+ ctx->depth++;
+
+ if (r->anchor_wildcard) {
+ struct pf_anchor *child;
+ rv = PF_TEST_OK;
+ RB_FOREACH(child, pf_anchor_node, &r->anchor->children) {
+ rv = pf_match_rule(ctx, &child->ruleset);
+ if ((rv == PF_TEST_QUICK) || (rv == PF_TEST_FAIL)) {
+ /*
+ * we either hit a rule qith quick action
+ * (more likely), or hit some runtime
+ * error (e.g. pool_get() faillure).
+ */
+ break;
  }
  }
- (*depth)--;
- if (*depth == 0 && a != NULL)
- *a = NULL;
- else if (a != NULL)
- *a = f->r;
- *rs = f->rs;
- if (*match > *depth) {
- *match = *depth;
- if (f->r->quick)
- quick = 1;
- }
- *r = TAILQ_NEXT(f->r, entries);
- } while (*r == NULL);
-
- return (quick);
+ } else {
+ rv = pf_match_rule(ctx, &r->anchor->ruleset);
+ }
+
+ ctx->depth--;
+
+ return (rv);
 }
 
 void
@@ -3453,51 +3456,239 @@ pf_rule_to_actions(struct pf_rule *r, st
  do { \
  if (t) { \
  r = a; \
- goto nextrule; \
+ continue; \
  } \
  } while (0)
 
 int
+pf_match_rule(struct pf_test_ctx *ctx, struct pf_ruleset *ruleset)
+{
+ struct pf_rule *r;
+
+ r = TAILQ_FIRST(ruleset->rules.active.ptr);
+ while (r != NULL) {
+ r->evaluations++;
+ PF_TEST_ATTRIB(
+    (pfi_kif_match(r->kif, ctx->pd->kif) == r->ifnot),
+ r->skip[PF_SKIP_IFP].ptr);
+ PF_TEST_ATTRIB((r->direction && r->direction != ctx->pd->dir),
+ r->skip[PF_SKIP_DIR].ptr);
+ PF_TEST_ATTRIB((r->onrdomain >= 0  &&
+    (r->onrdomain == ctx->pd->rdomain) == r->ifnot),
+ r->skip[PF_SKIP_RDOM].ptr);
+ PF_TEST_ATTRIB((r->af && r->af != ctx->pd->af),
+ r->skip[PF_SKIP_AF].ptr);
+ PF_TEST_ATTRIB((r->proto && r->proto != ctx->pd->proto),
+ r->skip[PF_SKIP_PROTO].ptr);
+ PF_TEST_ATTRIB((PF_MISMATCHAW(&r->src.addr, &ctx->pd->nsaddr,
+    ctx->pd->naf, r->src.neg, ctx->pd->kif,
+    ctx->act.rtableid)),
+ r->skip[PF_SKIP_SRC_ADDR].ptr);
+ PF_TEST_ATTRIB((PF_MISMATCHAW(&r->dst.addr, &ctx->pd->ndaddr,
+    ctx->pd->af, r->dst.neg, NULL, ctx->act.rtableid)),
+ r->skip[PF_SKIP_DST_ADDR].ptr);
+
+ switch (ctx->pd->virtual_proto) {
+ case PF_VPROTO_FRAGMENT:
+ /* tcp/udp only. port_op always 0 in other cases */
+ PF_TEST_ATTRIB((r->src.port_op || r->dst.port_op),
+ TAILQ_NEXT(r, entries));
+ PF_TEST_ATTRIB((ctx->pd->proto == IPPROTO_TCP &&
+    r->flagset),
+ TAILQ_NEXT(r, entries));
+ /* icmp only. type/code always 0 in other cases */
+ PF_TEST_ATTRIB((r->type || r->code),
+ TAILQ_NEXT(r, entries));
+ /* tcp/udp only. {uid|gid}.op always 0 in other cases */
+ PF_TEST_ATTRIB((r->gid.op || r->uid.op),
+ TAILQ_NEXT(r, entries));
+ break;
+
+ case IPPROTO_TCP:
+ PF_TEST_ATTRIB(((r->flagset & ctx->th->th_flags) !=
+    r->flags),
+ TAILQ_NEXT(r, entries));
+ PF_TEST_ATTRIB((r->os_fingerprint != PF_OSFP_ANY &&
+    !pf_osfp_match(pf_osfp_fingerprint(ctx->pd),
+    r->os_fingerprint)),
+ TAILQ_NEXT(r, entries));
+ /* FALLTHROUGH */
+
+ case IPPROTO_UDP:
+ /* tcp/udp only. port_op always 0 in other cases */
+ PF_TEST_ATTRIB((r->src.port_op &&
+    !pf_match_port(r->src.port_op, r->src.port[0],
+    r->src.port[1], ctx->pd->nsport)),
+ r->skip[PF_SKIP_SRC_PORT].ptr);
+ PF_TEST_ATTRIB((r->dst.port_op &&
+    !pf_match_port(r->dst.port_op, r->dst.port[0],
+    r->dst.port[1], ctx->pd->ndport)),
+ r->skip[PF_SKIP_DST_PORT].ptr);
+ /* tcp/udp only. uid.op always 0 in other cases */
+ PF_TEST_ATTRIB((r->uid.op && (ctx->pd->lookup.done ||
+    (ctx->pd->lookup.done =
+    pf_socket_lookup(ctx->pd), 1)) &&
+    !pf_match_uid(r->uid.op, r->uid.uid[0],
+    r->uid.uid[1], ctx->pd->lookup.uid)),
+ TAILQ_NEXT(r, entries));
+ /* tcp/udp only. gid.op always 0 in other cases */
+ PF_TEST_ATTRIB((r->gid.op && (ctx->pd->lookup.done ||
+    (ctx->pd->lookup.done =
+    pf_socket_lookup(ctx->pd), 1)) &&
+    !pf_match_gid(r->gid.op, r->gid.gid[0],
+    r->gid.gid[1], ctx->pd->lookup.gid)),
+ TAILQ_NEXT(r, entries));
+ break;
+
+ case IPPROTO_ICMP:
+ case IPPROTO_ICMPV6:
+ /* icmp only. type always 0 in other cases */
+ PF_TEST_ATTRIB((r->type &&
+    r->type != ctx->icmptype + 1),
+ TAILQ_NEXT(r, entries));
+ /* icmp only. type always 0 in other cases */
+ PF_TEST_ATTRIB((r->code &&
+    r->code != ctx->icmpcode + 1),
+ TAILQ_NEXT(r, entries));
+ /* icmp only. don't create states on replies */
+ PF_TEST_ATTRIB((r->keep_state && !ctx->state_icmp &&
+    (r->rule_flag & PFRULE_STATESLOPPY) == 0 &&
+    ctx->icmp_dir != PF_IN),
+ TAILQ_NEXT(r, entries));
+ break;
+
+ default:
+ break;
+ }
+
+ PF_TEST_ATTRIB((r->rule_flag & PFRULE_FRAGMENT &&
+    ctx->pd->virtual_proto != PF_VPROTO_FRAGMENT),
+ TAILQ_NEXT(r, entries));
+ PF_TEST_ATTRIB((r->tos && !(r->tos == ctx->pd->tos)),
+ TAILQ_NEXT(r, entries));
+ PF_TEST_ATTRIB((r->prob &&
+    r->prob <= arc4random_uniform(UINT_MAX - 1) + 1),
+ TAILQ_NEXT(r, entries));
+ PF_TEST_ATTRIB((r->match_tag &&
+    !pf_match_tag(ctx->pd->m, r, &ctx->tag)),
+ TAILQ_NEXT(r, entries));
+ PF_TEST_ATTRIB((r->rcv_kif && pf_match_rcvif(ctx->pd->m, r) ==
+    r->rcvifnot),
+ TAILQ_NEXT(r, entries));
+ PF_TEST_ATTRIB((r->prio &&
+    (r->prio == PF_PRIO_ZERO ? 0 : r->prio) !=
+    ctx->pd->m->m_pkthdr.pf.prio),
+ TAILQ_NEXT(r, entries));
+
+ /* FALLTHROUGH */
+ if (r->tag)
+ ctx->tag = r->tag;
+ if (r->anchor == NULL) {
+ if (r->action == PF_MATCH) {
+ if ((ctx->ri = pool_get(&pf_rule_item_pl,
+    PR_NOWAIT)) == NULL) {
+ REASON_SET(&ctx->reason, PFRES_MEMORY);
+ ctx->test_status = PF_TEST_FAIL;
+ break;
+ }
+ ctx->ri->r = r;
+ /* order is irrelevant */
+ SLIST_INSERT_HEAD(&ctx->rules, ctx->ri, entry);
+ ctx->ri = NULL;
+ pf_rule_to_actions(r, &ctx->act);
+ if (r->rule_flag & PFRULE_AFTO)
+ ctx->pd->naf = r->naf;
+ if (pf_get_transaddr(r, ctx->pd, ctx->sns,
+    &ctx->nr) == -1) {
+ REASON_SET(&ctx->reason,
+    PFRES_TRANSLATE);
+ ctx->test_status = PF_TEST_FAIL;
+ break;
+ }
+#if NPFLOG > 0
+ if (r->log) {
+ REASON_SET(&ctx->reason, PFRES_MATCH);
+ PFLOG_PACKET(ctx->pd, ctx->reason, r,
+    ctx->a, ruleset, NULL);
+ }
+#endif /* NPFLOG > 0 */
+ } else {
+ /*
+ * found matching r
+ */
+ *ctx->rm = r;
+ /*
+ * anchor, with ruleset, where r belongs to
+ */
+ *ctx->am = ctx->a;
+ /*
+ * ruleset where r belongs to
+ */
+ *ctx->rsm = ruleset;
+ /*
+ * ruleset, where anchor belongs to.
+ */
+ ctx->arsm = ctx->aruleset;
+ }
+
+#if NPFLOG > 0
+ if (ctx->act.log & PF_LOG_MATCHES)
+ pf_log_matches(ctx->pd, r, ctx->a, ruleset,
+    &ctx->rules);
+#endif /* NPFLOG > 0 */
+
+ if (r->quick) {
+ ctx->test_status = PF_TEST_QUICK;
+ break;
+ }
+ } else {
+ ctx->a = r; /* remember anchor */
+ ctx->aruleset = ruleset; /* and its ruleset */
+ if (pf_step_into_anchor(ctx, r) != PF_TEST_OK) {
+ break;
+ }
+ }
+ r = TAILQ_NEXT(r, entries);
+ }
+
+ return (ctx->test_status);
+}
+
+int
 pf_test_rule(struct pf_pdesc *pd, struct pf_rule **rm, struct pf_state **sm,
     struct pf_rule **am, struct pf_ruleset **rsm, u_short *reason)
 {
- struct pf_rule *r;
- struct pf_rule *nr = NULL;
+ struct pf_rule *r = NULL;
  struct pf_rule *a = NULL;
- struct pf_ruleset *arsm = NULL;
- struct pf_ruleset *aruleset = NULL;
  struct pf_ruleset *ruleset = NULL;
- struct pf_rule_slist rules;
- struct pf_rule_item *ri;
- struct pf_src_node *sns[PF_SN_MAX];
  struct pf_state_key *skw = NULL, *sks = NULL;
- struct pf_rule_actions act;
  int rewrite = 0;
- int tag = -1;
- int asd = 0;
- int match = 0;
- int state_icmp = 0, icmp_dir = 0;
  u_int16_t virtual_type, virtual_id;
- u_int8_t icmptype = 0, icmpcode = 0;
  int action = PF_DROP;
-
- bzero(&act, sizeof(act));
- bzero(sns, sizeof(sns));
- act.rtableid = pd->rdomain;
- SLIST_INIT(&rules);
+ struct pf_test_ctx ctx;
+ int rv;
+
+ bzero(&ctx, sizeof(ctx));
+ ctx.pd = pd;
+ ctx.rm = rm;
+ ctx.am = am;
+ ctx.rsm = rsm;
+ ctx.th = &pd->hdr.tcp;
+ ctx.act.rtableid = pd->rdomain;
+ SLIST_INIT(&ctx.rules);
 
  if (pd->dir == PF_IN && if_congested()) {
- REASON_SET(reason, PFRES_CONGEST);
+ REASON_SET(&ctx.reason, PFRES_CONGEST);
  return (PF_DROP);
  }
 
  switch (pd->virtual_proto) {
  case IPPROTO_ICMP:
- icmptype = pd->hdr.icmp.icmp_type;
- icmpcode = pd->hdr.icmp.icmp_code;
- state_icmp = pf_icmp_mapping(pd, icmptype,
-    &icmp_dir, &virtual_id, &virtual_type);
- if (icmp_dir == PF_IN) {
+ ctx.icmptype = pd->hdr.icmp.icmp_type;
+ ctx.icmpcode = pd->hdr.icmp.icmp_code;
+ ctx.state_icmp = pf_icmp_mapping(pd, ctx.icmptype,
+    &ctx.icmp_dir, &virtual_id, &virtual_type);
+ if (ctx.icmp_dir == PF_IN) {
  pd->osport = pd->nsport = virtual_id;
  pd->odport = pd->ndport = virtual_type;
  } else {
@@ -3507,11 +3698,11 @@ pf_test_rule(struct pf_pdesc *pd, struct
  break;
 #ifdef INET6
  case IPPROTO_ICMPV6:
- icmptype = pd->hdr.icmp6.icmp6_type;
- icmpcode = pd->hdr.icmp6.icmp6_code;
- state_icmp = pf_icmp_mapping(pd, icmptype,
-    &icmp_dir, &virtual_id, &virtual_type);
- if (icmp_dir == PF_IN) {
+ ctx.icmptype = pd->hdr.icmp6.icmp6_type;
+ ctx.icmpcode = pd->hdr.icmp6.icmp6_code;
+ ctx.state_icmp = pf_icmp_mapping(pd, ctx.icmptype,
+    &ctx.icmp_dir, &virtual_id, &virtual_type);
+ if (ctx.icmp_dir == PF_IN) {
  pd->osport = pd->nsport = virtual_id;
  pd->odport = pd->ndport = virtual_type;
  } else {
@@ -3523,191 +3714,36 @@ pf_test_rule(struct pf_pdesc *pd, struct
  }
 
  ruleset = &pf_main_ruleset;
- r = TAILQ_FIRST(pf_main_ruleset.rules.active.ptr);
- while (r != NULL) {
- if (r->rule_flag & PFRULE_EXPIRED) {
- r = TAILQ_NEXT(r, entries);
- goto nextrule;
- }
- r->evaluations++;
- PF_TEST_ATTRIB((pfi_kif_match(r->kif, pd->kif) == r->ifnot),
- r->skip[PF_SKIP_IFP].ptr);
- PF_TEST_ATTRIB((r->direction && r->direction != pd->dir),
- r->skip[PF_SKIP_DIR].ptr);
- PF_TEST_ATTRIB((r->onrdomain >= 0  &&
-    (r->onrdomain == pd->rdomain) == r->ifnot),
- r->skip[PF_SKIP_RDOM].ptr);
- PF_TEST_ATTRIB((r->af && r->af != pd->af),
- r->skip[PF_SKIP_AF].ptr);
- PF_TEST_ATTRIB((r->proto && r->proto != pd->proto),
- r->skip[PF_SKIP_PROTO].ptr);
- PF_TEST_ATTRIB((PF_MISMATCHAW(&r->src.addr, &pd->nsaddr,
-    pd->naf, r->src.neg, pd->kif, act.rtableid)),
- r->skip[PF_SKIP_SRC_ADDR].ptr);
- PF_TEST_ATTRIB((PF_MISMATCHAW(&r->dst.addr, &pd->ndaddr, pd->af,
-    r->dst.neg, NULL, act.rtableid)),
- r->skip[PF_SKIP_DST_ADDR].ptr);
-
- switch (pd->virtual_proto) {
- case PF_VPROTO_FRAGMENT:
- /* tcp/udp only. port_op always 0 in other cases */
- PF_TEST_ATTRIB((r->src.port_op || r->dst.port_op),
- TAILQ_NEXT(r, entries));
- PF_TEST_ATTRIB((pd->proto == IPPROTO_TCP && r->flagset),
- TAILQ_NEXT(r, entries));
- /* icmp only. type/code always 0 in other cases */
- PF_TEST_ATTRIB((r->type || r->code),
- TAILQ_NEXT(r, entries));
- /* tcp/udp only. {uid|gid}.op always 0 in other cases */
- PF_TEST_ATTRIB((r->gid.op || r->uid.op),
- TAILQ_NEXT(r, entries));
- break;
-
- case IPPROTO_TCP:
- PF_TEST_ATTRIB(((r->flagset & pd->hdr.tcp.th_flags) !=
-    r->flags),
- TAILQ_NEXT(r, entries));
- PF_TEST_ATTRIB((r->os_fingerprint != PF_OSFP_ANY &&
-    !pf_osfp_match(pf_osfp_fingerprint(pd),
-    r->os_fingerprint)),
- TAILQ_NEXT(r, entries));
- /* FALLTHROUGH */
-
- case IPPROTO_UDP:
- /* tcp/udp only. port_op always 0 in other cases */
- PF_TEST_ATTRIB((r->src.port_op &&
-    !pf_match_port(r->src.port_op, r->src.port[0],
-    r->src.port[1], pd->nsport)),
- r->skip[PF_SKIP_SRC_PORT].ptr);
- PF_TEST_ATTRIB((r->dst.port_op &&
-    !pf_match_port(r->dst.port_op, r->dst.port[0],
-    r->dst.port[1], pd->ndport)),
- r->skip[PF_SKIP_DST_PORT].ptr);
- /* tcp/udp only. uid.op always 0 in other cases */
- PF_TEST_ATTRIB((r->uid.op && (pd->lookup.done ||
-    (pd->lookup.done =
-    pf_socket_lookup(pd), 1)) &&
-    !pf_match_uid(r->uid.op, r->uid.uid[0],
-    r->uid.uid[1], pd->lookup.uid)),
- TAILQ_NEXT(r, entries));
- /* tcp/udp only. gid.op always 0 in other cases */
- PF_TEST_ATTRIB((r->gid.op && (pd->lookup.done ||
-    (pd->lookup.done =
-    pf_socket_lookup(pd), 1)) &&
-    !pf_match_gid(r->gid.op, r->gid.gid[0],
-    r->gid.gid[1], pd->lookup.gid)),
- TAILQ_NEXT(r, entries));
- break;
-
- case IPPROTO_ICMP:
- case IPPROTO_ICMPV6:
- /* icmp only. type always 0 in other cases */
- PF_TEST_ATTRIB((r->type && r->type != icmptype + 1),
- TAILQ_NEXT(r, entries));
- /* icmp only. type always 0 in other cases */
- PF_TEST_ATTRIB((r->code && r->code != icmpcode + 1),
- TAILQ_NEXT(r, entries));
- /* icmp only. don't create states on replies */
- PF_TEST_ATTRIB((r->keep_state && !state_icmp &&
-    (r->rule_flag & PFRULE_STATESLOPPY) == 0 &&
-    icmp_dir != PF_IN),
- TAILQ_NEXT(r, entries));
- break;
-
- default:
- break;
- }
-
- PF_TEST_ATTRIB((r->rule_flag & PFRULE_FRAGMENT &&
-    pd->virtual_proto != PF_VPROTO_FRAGMENT),
- TAILQ_NEXT(r, entries));
- PF_TEST_ATTRIB((r->tos && !(r->tos == pd->tos)),
- TAILQ_NEXT(r, entries));
- PF_TEST_ATTRIB((r->prob &&
-    r->prob <= arc4random_uniform(UINT_MAX - 1) + 1),
- TAILQ_NEXT(r, entries));
- PF_TEST_ATTRIB((r->match_tag && !pf_match_tag(pd->m, r, &tag)),
- TAILQ_NEXT(r, entries));
- PF_TEST_ATTRIB((r->rcv_kif && pf_match_rcvif(pd->m, r) ==
-    r->rcvifnot),
- TAILQ_NEXT(r, entries));
- PF_TEST_ATTRIB((r->prio && (r->prio == PF_PRIO_ZERO ?
-    0 : r->prio) != pd->m->m_pkthdr.pf.prio),
- TAILQ_NEXT(r, entries));
-
- /* FALLTHROUGH */
- if (r->tag)
- tag = r->tag;
- if (r->anchor == NULL) {
- if (r->action == PF_MATCH) {
- if ((ri = pool_get(&pf_rule_item_pl,
-    PR_NOWAIT)) == NULL) {
- REASON_SET(reason, PFRES_MEMORY);
- goto cleanup;
- }
- ri->r = r;
- /* order is irrelevant */
- SLIST_INSERT_HEAD(&rules, ri, entry);
- pf_rule_to_actions(r, &act);
- if (r->rule_flag & PFRULE_AFTO)
- pd->naf = r->naf;
- if (pf_get_transaddr(r, pd, sns, &nr) == -1) {
- REASON_SET(reason, PFRES_TRANSLATE);
- goto cleanup;
- }
-#if NPFLOG > 0
- if (r->log) {
- REASON_SET(reason, PFRES_MATCH);
- PFLOG_PACKET(pd, *reason, r, a, ruleset,
-    NULL);
- }
-#endif /* NPFLOG > 0 */
- } else {
- match = asd;
- *rm = r;
- *am = a;
- *rsm = ruleset;
- arsm = aruleset;
- }
-
-#if NPFLOG > 0
- if (act.log & PF_LOG_MATCHES)
- pf_log_matches(pd, r, a, ruleset, &rules);
-#endif /* NPFLOG > 0 */
-
- if (r->quick)
- break;
- r = TAILQ_NEXT(r, entries);
- } else {
- aruleset = ruleset;
- pf_step_into_anchor(&asd, &ruleset, &r, &a);
- }
-
- nextrule:
- if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
-    &r, &a, &match))
- break;
- }
- r = *rm; /* matching rule */
- a = *am; /* rule that defines an anchor containing 'r' */
- ruleset = *rsm; /* ruleset of the anchor defined by the rule 'a' */
- aruleset = arsm;/* ruleset of the 'a' rule itself */
+ rv = pf_match_rule(&ctx, ruleset);
+ if (rv == PF_TEST_FAIL) {
+ /*
+ * Reason has been set in pf_match_rule() already.
+ */
+ goto cleanup;
+ }
+
+ r = *ctx.rm; /* matching rule */
+ a = *ctx.am; /* rule that defines an anchor containing 'r' */
+ ruleset = *ctx.rsm;/* ruleset of the anchor defined by the rule 'a' */
+ ctx.aruleset = ctx.arsm;/* ruleset of the 'a' rule itself */
+
+
 
  /* apply actions for last matching pass/block rule */
- pf_rule_to_actions(r, &act);
+ pf_rule_to_actions(r, &ctx.act);
  if (r->rule_flag & PFRULE_AFTO)
  pd->naf = r->naf;
- if (pf_get_transaddr(r, pd, sns, &nr) == -1) {
- REASON_SET(reason, PFRES_TRANSLATE);
+ if (pf_get_transaddr(r, pd, ctx.sns, &ctx.nr) == -1) {
+ REASON_SET(&ctx.reason, PFRES_TRANSLATE);
  goto cleanup;
  }
- REASON_SET(reason, PFRES_MATCH);
+ REASON_SET(&ctx.reason, PFRES_MATCH);
 
 #if NPFLOG > 0
  if (r->log)
- PFLOG_PACKET(pd, *reason, r, a, ruleset, NULL);
- if (act.log & PF_LOG_MATCHES)
- pf_log_matches(pd, r, a, ruleset, &rules);
+ PFLOG_PACKET(pd, ctx.reason, r, ctx.a, ruleset, NULL);
+ if (ctx.act.log & PF_LOG_MATCHES)
+ pf_log_matches(pd, r, ctx.a, ruleset, &ctx.rules);
 #endif /* NPFLOG > 0 */
 
  if (pd->virtual_proto != PF_VPROTO_FRAGMENT &&
@@ -3718,31 +3754,32 @@ pf_test_rule(struct pf_pdesc *pd, struct
  if (pd->proto == IPPROTO_TCP &&
     ((r->rule_flag & PFRULE_RETURNRST) ||
     (r->rule_flag & PFRULE_RETURN)) &&
-    !(pd->hdr.tcp.th_flags & TH_RST)) {
- struct tcphdr *th = &pd->hdr.tcp;
- u_int32_t ack = ntohl(th->th_seq) + pd->p_len;
+    !(ctx.th->th_flags & TH_RST)) {
+ u_int32_t ack =
+    ntohl(ctx.th->th_seq) + pd->p_len;
 
  if (pf_check_tcp_cksum(pd->m, pd->off,
     pd->tot_len - pd->off, pd->af))
- REASON_SET(reason, PFRES_PROTCKSUM);
+ REASON_SET(&ctx.reason, PFRES_PROTCKSUM);
  else {
- if (th->th_flags & TH_SYN)
+ if (ctx.th->th_flags & TH_SYN)
  ack++;
- if (th->th_flags & TH_FIN)
+ if (ctx.th->th_flags & TH_FIN)
  ack++;
  pf_send_tcp(r, pd->af, pd->dst,
-    pd->src, th->th_dport, th->th_sport,
-    ntohl(th->th_ack), ack, TH_RST|TH_ACK, 0, 0,
-    r->return_ttl, 1, 0, pd->rdomain);
+    pd->src, ctx.th->th_dport,
+    ctx.th->th_sport, ntohl(ctx.th->th_ack),
+    ack, TH_RST|TH_ACK, 0, 0, r->return_ttl,
+    1, 0, pd->rdomain);
  }
  } else if ((pd->proto != IPPROTO_ICMP ||
-    ICMP_INFOTYPE(icmptype)) && pd->af == AF_INET &&
+    ICMP_INFOTYPE(ctx.icmptype)) && pd->af == AF_INET &&
     r->return_icmp)
  pf_send_icmp(pd->m, r->return_icmp >> 8,
     r->return_icmp & 255, pd->af, r, pd->rdomain);
  else if ((pd->proto != IPPROTO_ICMPV6 ||
-    (icmptype >= ICMP6_ECHO_REQUEST &&
-    icmptype != ND_REDIRECT)) && pd->af == AF_INET6 &&
+    (ctx.icmptype >= ICMP6_ECHO_REQUEST &&
+    ctx.icmptype != ND_REDIRECT)) && pd->af == AF_INET6 &&
     r->return_icmp6)
  pf_send_icmp(pd->m, r->return_icmp6 >> 8,
     r->return_icmp6 & 255, pd->af, r, pd->rdomain);
@@ -3751,13 +3788,13 @@ pf_test_rule(struct pf_pdesc *pd, struct
  if (r->action == PF_DROP)
  goto cleanup;
 
- pf_tag_packet(pd->m, tag, act.rtableid);
- if (act.rtableid >= 0 &&
-    rtable_l2(act.rtableid) != pd->rdomain)
+ pf_tag_packet(pd->m, ctx.tag, ctx.act.rtableid);
+ if (ctx.act.rtableid >= 0 &&
+    rtable_l2(ctx.act.rtableid) != pd->rdomain)
  pd->destchg = 1;
 
  if (r->action == PF_PASS && pd->badopts && ! r->allow_opts) {
- REASON_SET(reason, PFRES_IPOPTIONS);
+ REASON_SET(&ctx.reason, PFRES_IPOPTIONS);
 #if NPFLOG > 0
  pd->pflog |= PF_LOG_FORCE;
 #endif /* NPFLOG > 0 */
@@ -3769,23 +3806,23 @@ pf_test_rule(struct pf_pdesc *pd, struct
  action = PF_PASS;
 
  if (pd->virtual_proto != PF_VPROTO_FRAGMENT
-    && !state_icmp && r->keep_state) {
+    && !ctx.state_icmp && r->keep_state) {
 
  if (r->rule_flag & PFRULE_SRCTRACK &&
-    pf_insert_src_node(&sns[PF_SN_NONE], r, PF_SN_NONE, pd->af,
-    pd->src, NULL) != 0) {
- REASON_SET(reason, PFRES_SRCLIMIT);
+    pf_insert_src_node(&ctx.sns[PF_SN_NONE], r, PF_SN_NONE,
+    pd->af, pd->src, NULL) != 0) {
+ REASON_SET(&ctx.reason, PFRES_SRCLIMIT);
  goto cleanup;
  }
 
  if (r->max_states && (r->states_cur >= r->max_states)) {
  pf_status.lcounters[LCNT_STATES]++;
- REASON_SET(reason, PFRES_MAXSTATES);
+ REASON_SET(&ctx.reason, PFRES_MAXSTATES);
  goto cleanup;
  }
 
- action = pf_create_state(pd, r, a, nr, &skw, &sks, &rewrite,
-    sm, tag, &rules, &act, sns);
+ action = pf_create_state(pd, r, a, ctx.nr, &skw, &sks,
+    &rewrite, sm, ctx.tag, &ctx.rules, &ctx.act, ctx.sns);
 
  if (action != PF_PASS)
  goto cleanup;
@@ -3801,7 +3838,7 @@ pf_test_rule(struct pf_pdesc *pd, struct
     sk->port[pd->af == pd->naf ? pd->sidx : pd->didx],
     &sk->addr[pd->af == pd->naf ? pd->didx : pd->sidx],
     sk->port[pd->af == pd->naf ? pd->didx : pd->sidx],
-    virtual_type, icmp_dir);
+    virtual_type, ctx.icmp_dir);
  }
 
 #ifdef INET6
@@ -3810,9 +3847,9 @@ pf_test_rule(struct pf_pdesc *pd, struct
 #endif /* INET6 */
 
  } else {
- while ((ri = SLIST_FIRST(&rules))) {
- SLIST_REMOVE_HEAD(&rules, entry);
- pool_put(&pf_rule_item_pl, ri);
+ while ((ctx.ri = SLIST_FIRST(&ctx.rules))) {
+ SLIST_REMOVE_HEAD(&ctx.rules, entry);
+ pool_put(&pf_rule_item_pl, ctx.ri);
  }
  }
 
@@ -3844,9 +3881,9 @@ pf_test_rule(struct pf_pdesc *pd, struct
  return (action);
 
 cleanup:
- while ((ri = SLIST_FIRST(&rules))) {
- SLIST_REMOVE_HEAD(&rules, entry);
- pool_put(&pf_rule_item_pl, ri);
+ while ((ctx.ri = SLIST_FIRST(&ctx.rules))) {
+ SLIST_REMOVE_HEAD(&ctx.rules, entry);
+ pool_put(&pf_rule_item_pl, ctx.ri);
  }
 
  return (action);

Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

Re: pf: percpu anchor stacks

Martin Pieuchot
On 28/03/17(Tue) 13:02, Alexandr Nedvedicky wrote:
> [...]
> >
> >  - s/test_status/action/ as it's done everywhere else?
>
>     I've opted to test_status, because it's something different to 'action'
>     as we use it in current code.

I agree with you for test_status.  What about naming the enum and use it
instead of 'int' for the variable?  This implicitly documents the possible
option and allow the compiler to check for missing cases in switch.

> > Does this pass pfctl regress tests?
>
>     I'm about to run those tests for OpenBSD.

Did you manage to do that?

> > While I haven't noticed anything criminal here, it makes me
> > wonder if it'd be possible to do this change in a few steps:
> > factor out rule maching from pf_test_rule and then bring in
> > anchor changes?
> >
>
>     if I understand you right, you basically want me to make change
>     in two steps:
>
> the first step splits current pf_test_rule() to pf_match_rule() and
> pf_test_rule()
>
> the second step will kill global anchor stack array by introducing
> a true recursion. The patch will remove pf_step_out_of_anchor()
> function.
>
>     I think I can do it. And also as Theo pointed out there is no rush
>     to get that patch to tree.

Now *is* the time to commit the first step, the refactoring.  Once
that's done we can discuss the introduction of the context.

Could you come up with such diff?

Cheers,
Martin

Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

Re: pf: percpu anchor stacks

Alexandr Nedvedicky
Hello,

> Now *is* the time to commit the first step, the refactoring.  Once
> that's done we can discuss the introduction of the context.
>
> Could you come up with such diff?

    first of all: I have not managed to finish the re-factoring step yet, work
    is still in progress. I stole some cycles from other projects, but it was
    not enough apparently. Must try harder next time...


> > > Does this pass pfctl regress tests?
> >
> >     I'm about to run those tests for OpenBSD.
>
> Did you manage to do that?

    I have some update on testing of final patch. I've used pf_forward tests to
    make sure the code I'm changing gets executed. I'm still working on
    testcase, which covers deeper anchor tree with once-rules.

    the pf_forward tests show no harm caused by my changes, though I saw some
    failures:

        Makefile:217 'run-regress-udp-inet-RTT_IN'
        Makefile:217 'run-regress-udp-inet6-ECO_IN'
        Makefile:217 'run-regress-udp-inet6-ECO_OUT'
        Makefile:217 'run-regress-udp-inet6-RDR_IN'
        Makefile:217 'run-regress-udp-inet6-RDR_OUT'
        Makefile:217 'run-regress-udp-inet6-RTT_IN'
        Makefile:215 'run-regress-udp-inet6-RPT_OUT'
        Makefile:257 'run-regress-traceroute-udp-inet6-AF_IN'

    I could see same failures in baseline (tree _without_ my changes). I took a
    closer look to find out what's going on there. I took a tcpdump at ECO:
        #
        # tcpdump -i vnet1 running on ECO (192.168.214.188, 192.168.3.20)
        #
        13:27:31.712955 192.168.1.10.42707 > 192.168.214.188.echo: udp 3
        13:27:31.713616 192.168.3.20.echo > 192.168.1.10.42707: udp 3
        13:27:31.714693 192.168.1.10 > 192.168.3.20: icmp: 192.168.1.10
            udp port 42707 unreachable
        #
        # output above shows we get answer from .3.20 instead of .214.188
        # looks as a kind of yet another bug.
        #

    There are multiple IP addresses bound to ECO IN/OUT interface. However
    UDP socket at ECO always answers using primary IP address bound to ECO
    interface. The answer triggers ICMP port unreachable at SRC (192.168.1.10)

> > >  - s/test_status/action/ as it's done everywhere else?
> >
> >     I've opted to test_status, because it's something different to 'action'
> >     as we use it in current code.
>
> I agree with you for test_status.  What about naming the enum and use it
> instead of 'int' for the variable?  This implicitly documents the possible
> option and allow the compiler to check for missing cases in switch.

    I'm attaching updated final patch, which accepts your suggestion.

thanks and
regards
sasha

--------8<---------------8<---------------8<------------------8<--------
diff -r d1adecdc78cc src/sys/net/pf.c
--- a/src/sys/net/pf.c Fri May 12 00:09:06 2017 +0200
+++ b/src/sys/net/pf.c Mon May 15 13:36:45 2017 +0200
@@ -119,12 +119,54 @@ u_char pf_tcp_secret[16];
 int pf_tcp_secret_init;
 int pf_tcp_iss_off;
 
-struct pf_anchor_stackframe {
- struct pf_ruleset *rs;
- struct pf_rule *r;
- struct pf_anchor_node *parent;
- struct pf_anchor *child;
-} pf_anchor_stack[64];
+enum pf_test_status {
+ PF_TEST_FAIL = -1,
+ PF_TEST_OK,
+ PF_TEST_QUICK
+};
+
+struct pf_test_ctx {
+ enum pf_test_status  test_status;
+ struct pf_pdesc *pd;
+ struct pf_rule_actions  act;
+ u_int8_t  icmpcode;
+ u_int8_t  icmptype;
+ int  icmp_dir;
+ int  state_icmp;
+ int  tag;
+ u_short  reason;
+ struct pf_rule_item *ri;
+ struct pf_src_node *sns[PF_SN_MAX];
+ struct pf_rule_slist  rules;
+ struct pf_rule *nr;
+ struct pf_rule **rm;
+ struct pf_rule *a;
+ struct pf_rule **am;
+ struct pf_ruleset **rsm;
+ struct pf_ruleset *arsm;
+ struct pf_ruleset *aruleset;
+ struct tcphdr *th;
+ int  depth;
+};
+
+#define PF_ANCHOR_STACK_MAX 64
+
+/*
+ * Cannot fold into pf_pdesc directly, unknown storage size outside pf.c.
+ * Keep in sync with union pf_headers in pflog_bpfcopy() in if_pflog.c.
+ */
+union pf_headers {
+ struct tcphdr tcp;
+ struct udphdr udp;
+ struct icmp icmp;
+#ifdef INET6
+ struct icmp6_hdr icmp6;
+ struct mld_hdr mld;
+ struct nd_neighbor_solicit nd_ns;
+#endif /* INET6 */
+};
+
+
 
 struct pool pf_src_tree_pl, pf_rule_pl, pf_queue_pl;
 struct pool pf_state_pl, pf_state_key_pl, pf_state_item_pl;
@@ -211,11 +253,8 @@ struct pf_state *pf_find_state(struct p
     struct pf_state_key_cmp *, u_int, struct mbuf *);
 int pf_src_connlimit(struct pf_state **);
 int pf_match_rcvif(struct mbuf *, struct pf_rule *);
-void pf_step_into_anchor(int *, struct pf_ruleset **,
-    struct pf_rule **, struct pf_rule **);
-int pf_step_out_of_anchor(int *, struct pf_ruleset **,
-     struct pf_rule **, struct pf_rule **,
-     int *);
+int pf_step_into_anchor(struct pf_test_ctx *, struct pf_rule *);
+int pf_match_rule(struct pf_test_ctx *, struct pf_ruleset *);
 void pf_counters_inc(int, struct pf_pdesc *,
     struct pf_state *, struct pf_rule *,
     struct pf_rule *);
@@ -3020,74 +3059,39 @@ pf_tag_packet(struct mbuf *m, int tag, i
  m->m_pkthdr.ph_rtableid = (u_int)rtableid;
 }
 
-void
-pf_step_into_anchor(int *depth, struct pf_ruleset **rs,
-    struct pf_rule **r, struct pf_rule **a)
+enum pf_test_status
+pf_step_into_anchor(struct pf_test_ctx *ctx, struct pf_rule *r)
 {
- struct pf_anchor_stackframe *f;
-
- if (*depth >= sizeof(pf_anchor_stack) /
-    sizeof(pf_anchor_stack[0])) {
- log(LOG_ERR, "pf: anchor stack overflow\n");
- *r = TAILQ_NEXT(*r, entries);
- return;
- } else if (a != NULL)
- *a = *r;
- f = pf_anchor_stack + (*depth)++;
- f->rs = *rs;
- f->r = *r;
- if ((*r)->anchor_wildcard) {
- f->parent = &(*r)->anchor->children;
- if ((f->child = RB_MIN(pf_anchor_node, f->parent)) == NULL) {
- *r = NULL;
- return;
- }
- *rs = &f->child->ruleset;
- } else {
- f->parent = NULL;
- f->child = NULL;
- *rs = &(*r)->anchor->ruleset;
- }
- *r = TAILQ_FIRST((*rs)->rules.active.ptr);
-}
-
-int
-pf_step_out_of_anchor(int *depth, struct pf_ruleset **rs,
-    struct pf_rule **r, struct pf_rule **a, int *match)
-{
- struct pf_anchor_stackframe *f;
- int quick = 0;
-
- do {
- if (*depth <= 0)
- break;
- f = pf_anchor_stack + *depth - 1;
- if (f->parent != NULL && f->child != NULL) {
- f->child = RB_NEXT(pf_anchor_node, f->parent, f->child);
- if (f->child != NULL) {
- *rs = &f->child->ruleset;
- *r = TAILQ_FIRST((*rs)->rules.active.ptr);
- if (*r == NULL)
- continue;
- else
- break;
+ int rv;
+
+ if (ctx->depth >= PF_ANCHOR_STACK_MAX) {
+ log(LOG_ERR, "pf_step_into_anchor: stack overflow\n");
+ return (PF_TEST_FAIL);
+ }
+
+ ctx->depth++;
+
+ if (r->anchor_wildcard) {
+ struct pf_anchor *child;
+ rv = PF_TEST_OK;
+ RB_FOREACH(child, pf_anchor_node, &r->anchor->children) {
+ rv = pf_match_rule(ctx, &child->ruleset);
+ if ((rv == PF_TEST_QUICK) || (rv == PF_TEST_FAIL)) {
+ /*
+ * we either hit a rule qith quick action
+ * (more likely), or hit some runtime
+ * error (e.g. pool_get() faillure).
+ */
+ break;
  }
  }
- (*depth)--;
- if (*depth == 0 && a != NULL)
- *a = NULL;
- else if (a != NULL)
- *a = f->r;
- *rs = f->rs;
- if (*match > *depth) {
- *match = *depth;
- if (f->r->quick)
- quick = 1;
- }
- *r = TAILQ_NEXT(f->r, entries);
- } while (*r == NULL);
-
- return (quick);
+ } else {
+ rv = pf_match_rule(ctx, &r->anchor->ruleset);
+ }
+
+ ctx->depth--;
+
+ return (rv);
 }
 
 void
@@ -3451,54 +3455,241 @@ pf_rule_to_actions(struct pf_rule *r, st
 }
 
 #define PF_TEST_ATTRIB(t, a) \
- do { \
- if (t) { \
- r = a; \
- goto nextrule; \
- } \
+ if (t) { \
+ r = a; \
+ continue; \
+ } else do { \
  } while (0)
 
+enum pf_test_status
+pf_match_rule(struct pf_test_ctx *ctx, struct pf_ruleset *ruleset)
+{
+ struct pf_rule *r;
+
+ r = TAILQ_FIRST(ruleset->rules.active.ptr);
+ while (r != NULL) {
+ r->evaluations++;
+ PF_TEST_ATTRIB(
+    (pfi_kif_match(r->kif, ctx->pd->kif) == r->ifnot),
+ r->skip[PF_SKIP_IFP].ptr);
+ PF_TEST_ATTRIB((r->direction && r->direction != ctx->pd->dir),
+ r->skip[PF_SKIP_DIR].ptr);
+ PF_TEST_ATTRIB((r->onrdomain >= 0  &&
+    (r->onrdomain == ctx->pd->rdomain) == r->ifnot),
+ r->skip[PF_SKIP_RDOM].ptr);
+ PF_TEST_ATTRIB((r->af && r->af != ctx->pd->af),
+ r->skip[PF_SKIP_AF].ptr);
+ PF_TEST_ATTRIB((r->proto && r->proto != ctx->pd->proto),
+ r->skip[PF_SKIP_PROTO].ptr);
+ PF_TEST_ATTRIB((PF_MISMATCHAW(&r->src.addr, &ctx->pd->nsaddr,
+    ctx->pd->naf, r->src.neg, ctx->pd->kif,
+    ctx->act.rtableid)),
+ r->skip[PF_SKIP_SRC_ADDR].ptr);
+ PF_TEST_ATTRIB((PF_MISMATCHAW(&r->dst.addr, &ctx->pd->ndaddr,
+    ctx->pd->af, r->dst.neg, NULL, ctx->act.rtableid)),
+ r->skip[PF_SKIP_DST_ADDR].ptr);
+
+ switch (ctx->pd->virtual_proto) {
+ case PF_VPROTO_FRAGMENT:
+ /* tcp/udp only. port_op always 0 in other cases */
+ PF_TEST_ATTRIB((r->src.port_op || r->dst.port_op),
+ TAILQ_NEXT(r, entries));
+ PF_TEST_ATTRIB((ctx->pd->proto == IPPROTO_TCP &&
+    r->flagset),
+ TAILQ_NEXT(r, entries));
+ /* icmp only. type/code always 0 in other cases */
+ PF_TEST_ATTRIB((r->type || r->code),
+ TAILQ_NEXT(r, entries));
+ /* tcp/udp only. {uid|gid}.op always 0 in other cases */
+ PF_TEST_ATTRIB((r->gid.op || r->uid.op),
+ TAILQ_NEXT(r, entries));
+ break;
+
+ case IPPROTO_TCP:
+ PF_TEST_ATTRIB(((r->flagset & ctx->th->th_flags) !=
+    r->flags),
+ TAILQ_NEXT(r, entries));
+ PF_TEST_ATTRIB((r->os_fingerprint != PF_OSFP_ANY &&
+    !pf_osfp_match(pf_osfp_fingerprint(ctx->pd),
+    r->os_fingerprint)),
+ TAILQ_NEXT(r, entries));
+ /* FALLTHROUGH */
+
+ case IPPROTO_UDP:
+ /* tcp/udp only. port_op always 0 in other cases */
+ PF_TEST_ATTRIB((r->src.port_op &&
+    !pf_match_port(r->src.port_op, r->src.port[0],
+    r->src.port[1], ctx->pd->nsport)),
+ r->skip[PF_SKIP_SRC_PORT].ptr);
+ PF_TEST_ATTRIB((r->dst.port_op &&
+    !pf_match_port(r->dst.port_op, r->dst.port[0],
+    r->dst.port[1], ctx->pd->ndport)),
+ r->skip[PF_SKIP_DST_PORT].ptr);
+ /* tcp/udp only. uid.op always 0 in other cases */
+ PF_TEST_ATTRIB((r->uid.op && (ctx->pd->lookup.done ||
+    (ctx->pd->lookup.done =
+    pf_socket_lookup(ctx->pd), 1)) &&
+    !pf_match_uid(r->uid.op, r->uid.uid[0],
+    r->uid.uid[1], ctx->pd->lookup.uid)),
+ TAILQ_NEXT(r, entries));
+ /* tcp/udp only. gid.op always 0 in other cases */
+ PF_TEST_ATTRIB((r->gid.op && (ctx->pd->lookup.done ||
+    (ctx->pd->lookup.done =
+    pf_socket_lookup(ctx->pd), 1)) &&
+    !pf_match_gid(r->gid.op, r->gid.gid[0],
+    r->gid.gid[1], ctx->pd->lookup.gid)),
+ TAILQ_NEXT(r, entries));
+ break;
+
+ case IPPROTO_ICMP:
+ case IPPROTO_ICMPV6:
+ /* icmp only. type always 0 in other cases */
+ PF_TEST_ATTRIB((r->type &&
+    r->type != ctx->icmptype + 1),
+ TAILQ_NEXT(r, entries));
+ /* icmp only. type always 0 in other cases */
+ PF_TEST_ATTRIB((r->code &&
+    r->code != ctx->icmpcode + 1),
+ TAILQ_NEXT(r, entries));
+ /* icmp only. don't create states on replies */
+ PF_TEST_ATTRIB((r->keep_state && !ctx->state_icmp &&
+    (r->rule_flag & PFRULE_STATESLOPPY) == 0 &&
+    ctx->icmp_dir != PF_IN),
+ TAILQ_NEXT(r, entries));
+ break;
+
+ default:
+ break;
+ }
+
+ PF_TEST_ATTRIB((r->rule_flag & PFRULE_FRAGMENT &&
+    ctx->pd->virtual_proto != PF_VPROTO_FRAGMENT),
+ TAILQ_NEXT(r, entries));
+ PF_TEST_ATTRIB((r->tos && !(r->tos == ctx->pd->tos)),
+ TAILQ_NEXT(r, entries));
+ PF_TEST_ATTRIB((r->prob &&
+    r->prob <= arc4random_uniform(UINT_MAX - 1) + 1),
+ TAILQ_NEXT(r, entries));
+ PF_TEST_ATTRIB((r->match_tag &&
+    !pf_match_tag(ctx->pd->m, r, &ctx->tag)),
+ TAILQ_NEXT(r, entries));
+ PF_TEST_ATTRIB((r->rcv_kif && pf_match_rcvif(ctx->pd->m, r) ==
+    r->rcvifnot),
+ TAILQ_NEXT(r, entries));
+ PF_TEST_ATTRIB((r->prio &&
+    (r->prio == PF_PRIO_ZERO ? 0 : r->prio) !=
+    ctx->pd->m->m_pkthdr.pf.prio),
+ TAILQ_NEXT(r, entries));
+
+ /* FALLTHROUGH */
+ if (r->tag)
+ ctx->tag = r->tag;
+ if (r->anchor == NULL) {
+ if (r->action == PF_MATCH) {
+ if ((ctx->ri = pool_get(&pf_rule_item_pl,
+    PR_NOWAIT)) == NULL) {
+ REASON_SET(&ctx->reason, PFRES_MEMORY);
+ ctx->test_status = PF_TEST_FAIL;
+ break;
+ }
+ ctx->ri->r = r;
+ /* order is irrelevant */
+ SLIST_INSERT_HEAD(&ctx->rules, ctx->ri, entry);
+ ctx->ri = NULL;
+ pf_rule_to_actions(r, &ctx->act);
+ if (r->rule_flag & PFRULE_AFTO)
+ ctx->pd->naf = r->naf;
+ if (pf_get_transaddr(r, ctx->pd, ctx->sns,
+    &ctx->nr) == -1) {
+ REASON_SET(&ctx->reason,
+    PFRES_TRANSLATE);
+ ctx->test_status = PF_TEST_FAIL;
+ break;
+ }
+#if NPFLOG > 0
+ if (r->log) {
+ REASON_SET(&ctx->reason, PFRES_MATCH);
+ PFLOG_PACKET(ctx->pd, ctx->reason, r,
+    ctx->a, ruleset, NULL);
+ }
+#endif /* NPFLOG > 0 */
+ } else {
+ /*
+ * found matching r
+ */
+ *ctx->rm = r;
+ /*
+ * anchor, with ruleset, where r belongs to
+ */
+ *ctx->am = ctx->a;
+ /*
+ * ruleset where r belongs to
+ */
+ *ctx->rsm = ruleset;
+ /*
+ * ruleset, where anchor belongs to.
+ */
+ ctx->arsm = ctx->aruleset;
+ }
+
+#if NPFLOG > 0
+ if (ctx->act.log & PF_LOG_MATCHES)
+ pf_log_matches(ctx->pd, r, ctx->a, ruleset,
+    &ctx->rules);
+#endif /* NPFLOG > 0 */
+
+ if (r->quick) {
+ ctx->test_status = PF_TEST_QUICK;
+ break;
+ }
+ } else {
+ ctx->a = r; /* remember anchor */
+ ctx->aruleset = ruleset; /* and its ruleset */
+ if (pf_step_into_anchor(ctx, r) != PF_TEST_OK) {
+ break;
+ }
+ }
+ r = TAILQ_NEXT(r, entries);
+ }
+
+ return (ctx->test_status);
+}
+
 int
 pf_test_rule(struct pf_pdesc *pd, struct pf_rule **rm, struct pf_state **sm,
     struct pf_rule **am, struct pf_ruleset **rsm, u_short *reason)
 {
- struct pf_rule *r;
- struct pf_rule *nr = NULL;
+ struct pf_rule *r = NULL;
  struct pf_rule *a = NULL;
- struct pf_ruleset *arsm = NULL;
- struct pf_ruleset *aruleset = NULL;
  struct pf_ruleset *ruleset = NULL;
- struct pf_rule_slist rules;
- struct pf_rule_item *ri;
- struct pf_src_node *sns[PF_SN_MAX];
  struct pf_state_key *skw = NULL, *sks = NULL;
- struct pf_rule_actions act;
  int rewrite = 0;
- int tag = -1;
- int asd = 0;
- int match = 0;
- int state_icmp = 0, icmp_dir = 0;
  u_int16_t virtual_type, virtual_id;
- u_int8_t icmptype = 0, icmpcode = 0;
  int action = PF_DROP;
-
- bzero(&act, sizeof(act));
- bzero(sns, sizeof(sns));
- act.rtableid = pd->rdomain;
- SLIST_INIT(&rules);
+ struct pf_test_ctx ctx;
+ int rv;
+
+ bzero(&ctx, sizeof(ctx));
+ ctx.pd = pd;
+ ctx.rm = rm;
+ ctx.am = am;
+ ctx.rsm = rsm;
+ ctx.th = &pd->hdr.tcp;
+ ctx.act.rtableid = pd->rdomain;
+ SLIST_INIT(&ctx.rules);
 
  if (pd->dir == PF_IN && if_congested()) {
- REASON_SET(reason, PFRES_CONGEST);
+ REASON_SET(&ctx.reason, PFRES_CONGEST);
  return (PF_DROP);
  }
 
  switch (pd->virtual_proto) {
  case IPPROTO_ICMP:
- icmptype = pd->hdr.icmp.icmp_type;
- icmpcode = pd->hdr.icmp.icmp_code;
- state_icmp = pf_icmp_mapping(pd, icmptype,
-    &icmp_dir, &virtual_id, &virtual_type);
- if (icmp_dir == PF_IN) {
+ ctx.icmptype = pd->hdr.icmp.icmp_type;
+ ctx.icmpcode = pd->hdr.icmp.icmp_code;
+ ctx.state_icmp = pf_icmp_mapping(pd, ctx.icmptype,
+    &ctx.icmp_dir, &virtual_id, &virtual_type);
+ if (ctx.icmp_dir == PF_IN) {
  pd->osport = pd->nsport = virtual_id;
  pd->odport = pd->ndport = virtual_type;
  } else {
@@ -3508,11 +3699,11 @@ pf_test_rule(struct pf_pdesc *pd, struct
  break;
 #ifdef INET6
  case IPPROTO_ICMPV6:
- icmptype = pd->hdr.icmp6.icmp6_type;
- icmpcode = pd->hdr.icmp6.icmp6_code;
- state_icmp = pf_icmp_mapping(pd, icmptype,
-    &icmp_dir, &virtual_id, &virtual_type);
- if (icmp_dir == PF_IN) {
+ ctx.icmptype = pd->hdr.icmp6.icmp6_type;
+ ctx.icmpcode = pd->hdr.icmp6.icmp6_code;
+ ctx.state_icmp = pf_icmp_mapping(pd, ctx.icmptype,
+    &ctx.icmp_dir, &virtual_id, &virtual_type);
+ if (ctx.icmp_dir == PF_IN) {
  pd->osport = pd->nsport = virtual_id;
  pd->odport = pd->ndport = virtual_type;
  } else {
@@ -3524,191 +3715,36 @@ pf_test_rule(struct pf_pdesc *pd, struct
  }
 
  ruleset = &pf_main_ruleset;
- r = TAILQ_FIRST(pf_main_ruleset.rules.active.ptr);
- while (r != NULL) {
- if (r->rule_flag & PFRULE_EXPIRED) {
- r = TAILQ_NEXT(r, entries);
- goto nextrule;
- }
- r->evaluations++;
- PF_TEST_ATTRIB((pfi_kif_match(r->kif, pd->kif) == r->ifnot),
- r->skip[PF_SKIP_IFP].ptr);
- PF_TEST_ATTRIB((r->direction && r->direction != pd->dir),
- r->skip[PF_SKIP_DIR].ptr);
- PF_TEST_ATTRIB((r->onrdomain >= 0  &&
-    (r->onrdomain == pd->rdomain) == r->ifnot),
- r->skip[PF_SKIP_RDOM].ptr);
- PF_TEST_ATTRIB((r->af && r->af != pd->af),
- r->skip[PF_SKIP_AF].ptr);
- PF_TEST_ATTRIB((r->proto && r->proto != pd->proto),
- r->skip[PF_SKIP_PROTO].ptr);
- PF_TEST_ATTRIB((PF_MISMATCHAW(&r->src.addr, &pd->nsaddr,
-    pd->naf, r->src.neg, pd->kif, act.rtableid)),
- r->skip[PF_SKIP_SRC_ADDR].ptr);
- PF_TEST_ATTRIB((PF_MISMATCHAW(&r->dst.addr, &pd->ndaddr, pd->af,
-    r->dst.neg, NULL, act.rtableid)),
- r->skip[PF_SKIP_DST_ADDR].ptr);
-
- switch (pd->virtual_proto) {
- case PF_VPROTO_FRAGMENT:
- /* tcp/udp only. port_op always 0 in other cases */
- PF_TEST_ATTRIB((r->src.port_op || r->dst.port_op),
- TAILQ_NEXT(r, entries));
- PF_TEST_ATTRIB((pd->proto == IPPROTO_TCP && r->flagset),
- TAILQ_NEXT(r, entries));
- /* icmp only. type/code always 0 in other cases */
- PF_TEST_ATTRIB((r->type || r->code),
- TAILQ_NEXT(r, entries));
- /* tcp/udp only. {uid|gid}.op always 0 in other cases */
- PF_TEST_ATTRIB((r->gid.op || r->uid.op),
- TAILQ_NEXT(r, entries));
- break;
-
- case IPPROTO_TCP:
- PF_TEST_ATTRIB(((r->flagset & pd->hdr.tcp.th_flags) !=
-    r->flags),
- TAILQ_NEXT(r, entries));
- PF_TEST_ATTRIB((r->os_fingerprint != PF_OSFP_ANY &&
-    !pf_osfp_match(pf_osfp_fingerprint(pd),
-    r->os_fingerprint)),
- TAILQ_NEXT(r, entries));
- /* FALLTHROUGH */
-
- case IPPROTO_UDP:
- /* tcp/udp only. port_op always 0 in other cases */
- PF_TEST_ATTRIB((r->src.port_op &&
-    !pf_match_port(r->src.port_op, r->src.port[0],
-    r->src.port[1], pd->nsport)),
- r->skip[PF_SKIP_SRC_PORT].ptr);
- PF_TEST_ATTRIB((r->dst.port_op &&
-    !pf_match_port(r->dst.port_op, r->dst.port[0],
-    r->dst.port[1], pd->ndport)),
- r->skip[PF_SKIP_DST_PORT].ptr);
- /* tcp/udp only. uid.op always 0 in other cases */
- PF_TEST_ATTRIB((r->uid.op && (pd->lookup.done ||
-    (pd->lookup.done =
-    pf_socket_lookup(pd), 1)) &&
-    !pf_match_uid(r->uid.op, r->uid.uid[0],
-    r->uid.uid[1], pd->lookup.uid)),
- TAILQ_NEXT(r, entries));
- /* tcp/udp only. gid.op always 0 in other cases */
- PF_TEST_ATTRIB((r->gid.op && (pd->lookup.done ||
-    (pd->lookup.done =
-    pf_socket_lookup(pd), 1)) &&
-    !pf_match_gid(r->gid.op, r->gid.gid[0],
-    r->gid.gid[1], pd->lookup.gid)),
- TAILQ_NEXT(r, entries));
- break;
-
- case IPPROTO_ICMP:
- case IPPROTO_ICMPV6:
- /* icmp only. type always 0 in other cases */
- PF_TEST_ATTRIB((r->type && r->type != icmptype + 1),
- TAILQ_NEXT(r, entries));
- /* icmp only. type always 0 in other cases */
- PF_TEST_ATTRIB((r->code && r->code != icmpcode + 1),
- TAILQ_NEXT(r, entries));
- /* icmp only. don't create states on replies */
- PF_TEST_ATTRIB((r->keep_state && !state_icmp &&
-    (r->rule_flag & PFRULE_STATESLOPPY) == 0 &&
-    icmp_dir != PF_IN),
- TAILQ_NEXT(r, entries));
- break;
-
- default:
- break;
- }
-
- PF_TEST_ATTRIB((r->rule_flag & PFRULE_FRAGMENT &&
-    pd->virtual_proto != PF_VPROTO_FRAGMENT),
- TAILQ_NEXT(r, entries));
- PF_TEST_ATTRIB((r->tos && !(r->tos == pd->tos)),
- TAILQ_NEXT(r, entries));
- PF_TEST_ATTRIB((r->prob &&
-    r->prob <= arc4random_uniform(UINT_MAX - 1) + 1),
- TAILQ_NEXT(r, entries));
- PF_TEST_ATTRIB((r->match_tag && !pf_match_tag(pd->m, r, &tag)),
- TAILQ_NEXT(r, entries));
- PF_TEST_ATTRIB((r->rcv_kif && pf_match_rcvif(pd->m, r) ==
-    r->rcvifnot),
- TAILQ_NEXT(r, entries));
- PF_TEST_ATTRIB((r->prio && (r->prio == PF_PRIO_ZERO ?
-    0 : r->prio) != pd->m->m_pkthdr.pf.prio),
- TAILQ_NEXT(r, entries));
-
- /* FALLTHROUGH */
- if (r->tag)
- tag = r->tag;
- if (r->anchor == NULL) {
- if (r->action == PF_MATCH) {
- if ((ri = pool_get(&pf_rule_item_pl,
-    PR_NOWAIT)) == NULL) {
- REASON_SET(reason, PFRES_MEMORY);
- goto cleanup;
- }
- ri->r = r;
- /* order is irrelevant */
- SLIST_INSERT_HEAD(&rules, ri, entry);
- pf_rule_to_actions(r, &act);
- if (r->rule_flag & PFRULE_AFTO)
- pd->naf = r->naf;
- if (pf_get_transaddr(r, pd, sns, &nr) == -1) {
- REASON_SET(reason, PFRES_TRANSLATE);
- goto cleanup;
- }
-#if NPFLOG > 0
- if (r->log) {
- REASON_SET(reason, PFRES_MATCH);
- PFLOG_PACKET(pd, *reason, r, a, ruleset,
-    NULL);
- }
-#endif /* NPFLOG > 0 */
- } else {
- match = asd;
- *rm = r;
- *am = a;
- *rsm = ruleset;
- arsm = aruleset;
- }
-
-#if NPFLOG > 0
- if (act.log & PF_LOG_MATCHES)
- pf_log_matches(pd, r, a, ruleset, &rules);
-#endif /* NPFLOG > 0 */
-
- if (r->quick)
- break;
- r = TAILQ_NEXT(r, entries);
- } else {
- aruleset = ruleset;
- pf_step_into_anchor(&asd, &ruleset, &r, &a);
- }
-
- nextrule:
- if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
-    &r, &a, &match))
- break;
- }
- r = *rm; /* matching rule */
- a = *am; /* rule that defines an anchor containing 'r' */
- ruleset = *rsm; /* ruleset of the anchor defined by the rule 'a' */
- aruleset = arsm;/* ruleset of the 'a' rule itself */
+ rv = pf_match_rule(&ctx, ruleset);
+ if (rv == PF_TEST_FAIL) {
+ /*
+ * Reason has been set in pf_match_rule() already.
+ */
+ goto cleanup;
+ }
+
+ r = *ctx.rm; /* matching rule */
+ a = *ctx.am; /* rule that defines an anchor containing 'r' */
+ ruleset = *ctx.rsm;/* ruleset of the anchor defined by the rule 'a' */
+ ctx.aruleset = ctx.arsm;/* ruleset of the 'a' rule itself */
+
+
 
  /* apply actions for last matching pass/block rule */
- pf_rule_to_actions(r, &act);
+ pf_rule_to_actions(r, &ctx.act);
  if (r->rule_flag & PFRULE_AFTO)
  pd->naf = r->naf;
- if (pf_get_transaddr(r, pd, sns, &nr) == -1) {
- REASON_SET(reason, PFRES_TRANSLATE);
+ if (pf_get_transaddr(r, pd, ctx.sns, &ctx.nr) == -1) {
+ REASON_SET(&ctx.reason, PFRES_TRANSLATE);
  goto cleanup;
  }
- REASON_SET(reason, PFRES_MATCH);
+ REASON_SET(&ctx.reason, PFRES_MATCH);
 
 #if NPFLOG > 0
  if (r->log)
- PFLOG_PACKET(pd, *reason, r, a, ruleset, NULL);
- if (act.log & PF_LOG_MATCHES)
- pf_log_matches(pd, r, a, ruleset, &rules);
+ PFLOG_PACKET(pd, ctx.reason, r, ctx.a, ruleset, NULL);
+ if (ctx.act.log & PF_LOG_MATCHES)
+ pf_log_matches(pd, r, ctx.a, ruleset, &ctx.rules);
 #endif /* NPFLOG > 0 */
 
  if (pd->virtual_proto != PF_VPROTO_FRAGMENT &&
@@ -3719,31 +3755,32 @@ pf_test_rule(struct pf_pdesc *pd, struct
  if (pd->proto == IPPROTO_TCP &&
     ((r->rule_flag & PFRULE_RETURNRST) ||
     (r->rule_flag & PFRULE_RETURN)) &&
-    !(pd->hdr.tcp.th_flags & TH_RST)) {
- struct tcphdr *th = &pd->hdr.tcp;
- u_int32_t ack = ntohl(th->th_seq) + pd->p_len;
+    !(ctx.th->th_flags & TH_RST)) {
+ u_int32_t ack =
+    ntohl(ctx.th->th_seq) + pd->p_len;
 
  if (pf_check_tcp_cksum(pd->m, pd->off,
     pd->tot_len - pd->off, pd->af))
- REASON_SET(reason, PFRES_PROTCKSUM);
+ REASON_SET(&ctx.reason, PFRES_PROTCKSUM);
  else {
- if (th->th_flags & TH_SYN)
+ if (ctx.th->th_flags & TH_SYN)
  ack++;
- if (th->th_flags & TH_FIN)
+ if (ctx.th->th_flags & TH_FIN)
  ack++;
  pf_send_tcp(r, pd->af, pd->dst,
-    pd->src, th->th_dport, th->th_sport,
-    ntohl(th->th_ack), ack, TH_RST|TH_ACK, 0, 0,
-    r->return_ttl, 1, 0, pd->rdomain);
+    pd->src, ctx.th->th_dport,
+    ctx.th->th_sport, ntohl(ctx.th->th_ack),
+    ack, TH_RST|TH_ACK, 0, 0, r->return_ttl,
+    1, 0, pd->rdomain);
  }
  } else if ((pd->proto != IPPROTO_ICMP ||
-    ICMP_INFOTYPE(icmptype)) && pd->af == AF_INET &&
+    ICMP_INFOTYPE(ctx.icmptype)) && pd->af == AF_INET &&
     r->return_icmp)
  pf_send_icmp(pd->m, r->return_icmp >> 8,
     r->return_icmp & 255, pd->af, r, pd->rdomain);
  else if ((pd->proto != IPPROTO_ICMPV6 ||
-    (icmptype >= ICMP6_ECHO_REQUEST &&
-    icmptype != ND_REDIRECT)) && pd->af == AF_INET6 &&
+    (ctx.icmptype >= ICMP6_ECHO_REQUEST &&
+    ctx.icmptype != ND_REDIRECT)) && pd->af == AF_INET6 &&
     r->return_icmp6)
  pf_send_icmp(pd->m, r->return_icmp6 >> 8,
     r->return_icmp6 & 255, pd->af, r, pd->rdomain);
@@ -3752,13 +3789,13 @@ pf_test_rule(struct pf_pdesc *pd, struct
  if (r->action == PF_DROP)
  goto cleanup;
 
- pf_tag_packet(pd->m, tag, act.rtableid);
- if (act.rtableid >= 0 &&
-    rtable_l2(act.rtableid) != pd->rdomain)
+ pf_tag_packet(pd->m, ctx.tag, ctx.act.rtableid);
+ if (ctx.act.rtableid >= 0 &&
+    rtable_l2(ctx.act.rtableid) != pd->rdomain)
  pd->destchg = 1;
 
  if (r->action == PF_PASS && pd->badopts && ! r->allow_opts) {
- REASON_SET(reason, PFRES_IPOPTIONS);
+ REASON_SET(&ctx.reason, PFRES_IPOPTIONS);
 #if NPFLOG > 0
  pd->pflog |= PF_LOG_FORCE;
 #endif /* NPFLOG > 0 */
@@ -3770,23 +3807,23 @@ pf_test_rule(struct pf_pdesc *pd, struct
  action = PF_PASS;
 
  if (pd->virtual_proto != PF_VPROTO_FRAGMENT
-    && !state_icmp && r->keep_state) {
+    && !ctx.state_icmp && r->keep_state) {
 
  if (r->rule_flag & PFRULE_SRCTRACK &&
-    pf_insert_src_node(&sns[PF_SN_NONE], r, PF_SN_NONE, pd->af,
-    pd->src, NULL) != 0) {
- REASON_SET(reason, PFRES_SRCLIMIT);
+    pf_insert_src_node(&ctx.sns[PF_SN_NONE], r, PF_SN_NONE,
+    pd->af, pd->src, NULL) != 0) {
+ REASON_SET(&ctx.reason, PFRES_SRCLIMIT);
  goto cleanup;
  }
 
  if (r->max_states && (r->states_cur >= r->max_states)) {
  pf_status.lcounters[LCNT_STATES]++;
- REASON_SET(reason, PFRES_MAXSTATES);
+ REASON_SET(&ctx.reason, PFRES_MAXSTATES);
  goto cleanup;
  }
 
- action = pf_create_state(pd, r, a, nr, &skw, &sks, &rewrite,
-    sm, tag, &rules, &act, sns);
+ action = pf_create_state(pd, r, a, ctx.nr, &skw, &sks,
+    &rewrite, sm, ctx.tag, &ctx.rules, &ctx.act, ctx.sns);
 
  if (action != PF_PASS)
  goto cleanup;
@@ -3802,7 +3839,7 @@ pf_test_rule(struct pf_pdesc *pd, struct
     sk->port[pd->af == pd->naf ? pd->sidx : pd->didx],
     &sk->addr[pd->af == pd->naf ? pd->didx : pd->sidx],
     sk->port[pd->af == pd->naf ? pd->didx : pd->sidx],
-    virtual_type, icmp_dir);
+    virtual_type, ctx.icmp_dir);
  }
 
 #ifdef INET6
@@ -3811,9 +3848,9 @@ pf_test_rule(struct pf_pdesc *pd, struct
 #endif /* INET6 */
 
  } else {
- while ((ri = SLIST_FIRST(&rules))) {
- SLIST_REMOVE_HEAD(&rules, entry);
- pool_put(&pf_rule_item_pl, ri);
+ while ((ctx.ri = SLIST_FIRST(&ctx.rules))) {
+ SLIST_REMOVE_HEAD(&ctx.rules, entry);
+ pool_put(&pf_rule_item_pl, ctx.ri);
  }
  }
 
@@ -3845,9 +3882,9 @@ pf_test_rule(struct pf_pdesc *pd, struct
  return (action);
 
 cleanup:
- while ((ri = SLIST_FIRST(&rules))) {
- SLIST_REMOVE_HEAD(&rules, entry);
- pool_put(&pf_rule_item_pl, ri);
+ while ((ctx.ri = SLIST_FIRST(&ctx.rules))) {
+ SLIST_REMOVE_HEAD(&ctx.rules, entry);
+ pool_put(&pf_rule_item_pl, ctx.ri);
  }
 
  return (action);

Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

Re: pf: percpu anchor stacks

Mike Belopuhov-5
On Mon, May 15, 2017 at 15:19 +0200, Alexandr Nedvedicky wrote:

> Hello,
>
> > Now *is* the time to commit the first step, the refactoring.  Once
> > that's done we can discuss the introduction of the context.
> >
> > Could you come up with such diff?
>
>     first of all: I have not managed to finish the re-factoring step yet, work
>     is still in progress. I stole some cycles from other projects, but it was
>     not enough apparently. Must try harder next time...
>
>
> > > > Does this pass pfctl regress tests?
> > >
> > >     I'm about to run those tests for OpenBSD.
> >
> > Did you manage to do that?
>
>     I have some update on testing of final patch. I've used pf_forward tests to
>     make sure the code I'm changing gets executed. I'm still working on
>     testcase, which covers deeper anchor tree with once-rules.
>
>     the pf_forward tests show no harm caused by my changes, though I saw some
>     failures:
>
> Makefile:217 'run-regress-udp-inet-RTT_IN'
> Makefile:217 'run-regress-udp-inet6-ECO_IN'
> Makefile:217 'run-regress-udp-inet6-ECO_OUT'
> Makefile:217 'run-regress-udp-inet6-RDR_IN'
> Makefile:217 'run-regress-udp-inet6-RDR_OUT'
> Makefile:217 'run-regress-udp-inet6-RTT_IN'
> Makefile:215 'run-regress-udp-inet6-RPT_OUT'
> Makefile:257 'run-regress-traceroute-udp-inet6-AF_IN'
>
>     I could see same failures in baseline (tree _without_ my changes). I took a
>     closer look to find out what's going on there. I took a tcpdump at ECO:
> #
> # tcpdump -i vnet1 running on ECO (192.168.214.188, 192.168.3.20)
> #
> 13:27:31.712955 192.168.1.10.42707 > 192.168.214.188.echo: udp 3
> 13:27:31.713616 192.168.3.20.echo > 192.168.1.10.42707: udp 3
> 13:27:31.714693 192.168.1.10 > 192.168.3.20: icmp: 192.168.1.10
>    udp port 42707 unreachable
> #
> # output above shows we get answer from .3.20 instead of .214.188
> # looks as a kind of yet another bug.
> #
>
>     There are multiple IP addresses bound to ECO IN/OUT interface. However
>     UDP socket at ECO always answers using primary IP address bound to ECO
>     interface. The answer triggers ICMP port unreachable at SRC (192.168.1.10)
>
> > > >  - s/test_status/action/ as it's done everywhere else?
> > >
> > >     I've opted to test_status, because it's something different to 'action'
> > >     as we use it in current code.
> >
> > I agree with you for test_status.  What about naming the enum and use it
> > instead of 'int' for the variable?  This implicitly documents the possible
> > option and allow the compiler to check for missing cases in switch.
>
>     I'm attaching updated final patch, which accepts your suggestion.
>
> thanks and
> regards
> sasha
>

I think you can go ahead with your change.  OK mikeb

Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

Re: pf: percpu anchor stacks

Alexander Bluhm
In reply to this post by Alexandr Nedvedicky
On Mon, May 15, 2017 at 03:19:19PM +0200, Alexandr Nedvedicky wrote:
>     I'm attaching updated final patch, which accepts your suggestion.

I think this broke sys/net/pf_forward.
http://bluhm.genua.de/regress/results/regress.html
When backing out pf.c rev 1.1024 it works again.

I guess it is a problem with tagged route-to rules in an anchor.
I cannot investigate right now, but will do later.

bluhm

>
> thanks and
> regards
> sasha
>
> --------8<---------------8<---------------8<------------------8<--------
> diff -r d1adecdc78cc src/sys/net/pf.c
> --- a/src/sys/net/pf.c Fri May 12 00:09:06 2017 +0200
> +++ b/src/sys/net/pf.c Mon May 15 13:36:45 2017 +0200
> @@ -119,12 +119,54 @@ u_char pf_tcp_secret[16];
>  int pf_tcp_secret_init;
>  int pf_tcp_iss_off;
>  
> -struct pf_anchor_stackframe {
> - struct pf_ruleset *rs;
> - struct pf_rule *r;
> - struct pf_anchor_node *parent;
> - struct pf_anchor *child;
> -} pf_anchor_stack[64];
> +enum pf_test_status {
> + PF_TEST_FAIL = -1,
> + PF_TEST_OK,
> + PF_TEST_QUICK
> +};
> +
> +struct pf_test_ctx {
> + enum pf_test_status  test_status;
> + struct pf_pdesc *pd;
> + struct pf_rule_actions  act;
> + u_int8_t  icmpcode;
> + u_int8_t  icmptype;
> + int  icmp_dir;
> + int  state_icmp;
> + int  tag;
> + u_short  reason;
> + struct pf_rule_item *ri;
> + struct pf_src_node *sns[PF_SN_MAX];
> + struct pf_rule_slist  rules;
> + struct pf_rule *nr;
> + struct pf_rule **rm;
> + struct pf_rule *a;
> + struct pf_rule **am;
> + struct pf_ruleset **rsm;
> + struct pf_ruleset *arsm;
> + struct pf_ruleset *aruleset;
> + struct tcphdr *th;
> + int  depth;
> +};
> +
> +#define PF_ANCHOR_STACK_MAX 64
> +
> +/*
> + * Cannot fold into pf_pdesc directly, unknown storage size outside pf.c.
> + * Keep in sync with union pf_headers in pflog_bpfcopy() in if_pflog.c.
> + */
> +union pf_headers {
> + struct tcphdr tcp;
> + struct udphdr udp;
> + struct icmp icmp;
> +#ifdef INET6
> + struct icmp6_hdr icmp6;
> + struct mld_hdr mld;
> + struct nd_neighbor_solicit nd_ns;
> +#endif /* INET6 */
> +};
> +
> +
>  
>  struct pool pf_src_tree_pl, pf_rule_pl, pf_queue_pl;
>  struct pool pf_state_pl, pf_state_key_pl, pf_state_item_pl;
> @@ -211,11 +253,8 @@ struct pf_state *pf_find_state(struct p
>      struct pf_state_key_cmp *, u_int, struct mbuf *);
>  int pf_src_connlimit(struct pf_state **);
>  int pf_match_rcvif(struct mbuf *, struct pf_rule *);
> -void pf_step_into_anchor(int *, struct pf_ruleset **,
> -    struct pf_rule **, struct pf_rule **);
> -int pf_step_out_of_anchor(int *, struct pf_ruleset **,
> -     struct pf_rule **, struct pf_rule **,
> -     int *);
> +int pf_step_into_anchor(struct pf_test_ctx *, struct pf_rule *);
> +int pf_match_rule(struct pf_test_ctx *, struct pf_ruleset *);
>  void pf_counters_inc(int, struct pf_pdesc *,
>      struct pf_state *, struct pf_rule *,
>      struct pf_rule *);
> @@ -3020,74 +3059,39 @@ pf_tag_packet(struct mbuf *m, int tag, i
>   m->m_pkthdr.ph_rtableid = (u_int)rtableid;
>  }
>  
> -void
> -pf_step_into_anchor(int *depth, struct pf_ruleset **rs,
> -    struct pf_rule **r, struct pf_rule **a)
> +enum pf_test_status
> +pf_step_into_anchor(struct pf_test_ctx *ctx, struct pf_rule *r)
>  {
> - struct pf_anchor_stackframe *f;
> -
> - if (*depth >= sizeof(pf_anchor_stack) /
> -    sizeof(pf_anchor_stack[0])) {
> - log(LOG_ERR, "pf: anchor stack overflow\n");
> - *r = TAILQ_NEXT(*r, entries);
> - return;
> - } else if (a != NULL)
> - *a = *r;
> - f = pf_anchor_stack + (*depth)++;
> - f->rs = *rs;
> - f->r = *r;
> - if ((*r)->anchor_wildcard) {
> - f->parent = &(*r)->anchor->children;
> - if ((f->child = RB_MIN(pf_anchor_node, f->parent)) == NULL) {
> - *r = NULL;
> - return;
> - }
> - *rs = &f->child->ruleset;
> - } else {
> - f->parent = NULL;
> - f->child = NULL;
> - *rs = &(*r)->anchor->ruleset;
> - }
> - *r = TAILQ_FIRST((*rs)->rules.active.ptr);
> -}
> -
> -int
> -pf_step_out_of_anchor(int *depth, struct pf_ruleset **rs,
> -    struct pf_rule **r, struct pf_rule **a, int *match)
> -{
> - struct pf_anchor_stackframe *f;
> - int quick = 0;
> -
> - do {
> - if (*depth <= 0)
> - break;
> - f = pf_anchor_stack + *depth - 1;
> - if (f->parent != NULL && f->child != NULL) {
> - f->child = RB_NEXT(pf_anchor_node, f->parent, f->child);
> - if (f->child != NULL) {
> - *rs = &f->child->ruleset;
> - *r = TAILQ_FIRST((*rs)->rules.active.ptr);
> - if (*r == NULL)
> - continue;
> - else
> - break;
> + int rv;
> +
> + if (ctx->depth >= PF_ANCHOR_STACK_MAX) {
> + log(LOG_ERR, "pf_step_into_anchor: stack overflow\n");
> + return (PF_TEST_FAIL);
> + }
> +
> + ctx->depth++;
> +
> + if (r->anchor_wildcard) {
> + struct pf_anchor *child;
> + rv = PF_TEST_OK;
> + RB_FOREACH(child, pf_anchor_node, &r->anchor->children) {
> + rv = pf_match_rule(ctx, &child->ruleset);
> + if ((rv == PF_TEST_QUICK) || (rv == PF_TEST_FAIL)) {
> + /*
> + * we either hit a rule qith quick action
> + * (more likely), or hit some runtime
> + * error (e.g. pool_get() faillure).
> + */
> + break;
>   }
>   }
> - (*depth)--;
> - if (*depth == 0 && a != NULL)
> - *a = NULL;
> - else if (a != NULL)
> - *a = f->r;
> - *rs = f->rs;
> - if (*match > *depth) {
> - *match = *depth;
> - if (f->r->quick)
> - quick = 1;
> - }
> - *r = TAILQ_NEXT(f->r, entries);
> - } while (*r == NULL);
> -
> - return (quick);
> + } else {
> + rv = pf_match_rule(ctx, &r->anchor->ruleset);
> + }
> +
> + ctx->depth--;
> +
> + return (rv);
>  }
>  
>  void
> @@ -3451,54 +3455,241 @@ pf_rule_to_actions(struct pf_rule *r, st
>  }
>  
>  #define PF_TEST_ATTRIB(t, a) \
> - do { \
> - if (t) { \
> - r = a; \
> - goto nextrule; \
> - } \
> + if (t) { \
> + r = a; \
> + continue; \
> + } else do { \
>   } while (0)
>  
> +enum pf_test_status
> +pf_match_rule(struct pf_test_ctx *ctx, struct pf_ruleset *ruleset)
> +{
> + struct pf_rule *r;
> +
> + r = TAILQ_FIRST(ruleset->rules.active.ptr);
> + while (r != NULL) {
> + r->evaluations++;
> + PF_TEST_ATTRIB(
> +    (pfi_kif_match(r->kif, ctx->pd->kif) == r->ifnot),
> + r->skip[PF_SKIP_IFP].ptr);
> + PF_TEST_ATTRIB((r->direction && r->direction != ctx->pd->dir),
> + r->skip[PF_SKIP_DIR].ptr);
> + PF_TEST_ATTRIB((r->onrdomain >= 0  &&
> +    (r->onrdomain == ctx->pd->rdomain) == r->ifnot),
> + r->skip[PF_SKIP_RDOM].ptr);
> + PF_TEST_ATTRIB((r->af && r->af != ctx->pd->af),
> + r->skip[PF_SKIP_AF].ptr);
> + PF_TEST_ATTRIB((r->proto && r->proto != ctx->pd->proto),
> + r->skip[PF_SKIP_PROTO].ptr);
> + PF_TEST_ATTRIB((PF_MISMATCHAW(&r->src.addr, &ctx->pd->nsaddr,
> +    ctx->pd->naf, r->src.neg, ctx->pd->kif,
> +    ctx->act.rtableid)),
> + r->skip[PF_SKIP_SRC_ADDR].ptr);
> + PF_TEST_ATTRIB((PF_MISMATCHAW(&r->dst.addr, &ctx->pd->ndaddr,
> +    ctx->pd->af, r->dst.neg, NULL, ctx->act.rtableid)),
> + r->skip[PF_SKIP_DST_ADDR].ptr);
> +
> + switch (ctx->pd->virtual_proto) {
> + case PF_VPROTO_FRAGMENT:
> + /* tcp/udp only. port_op always 0 in other cases */
> + PF_TEST_ATTRIB((r->src.port_op || r->dst.port_op),
> + TAILQ_NEXT(r, entries));
> + PF_TEST_ATTRIB((ctx->pd->proto == IPPROTO_TCP &&
> +    r->flagset),
> + TAILQ_NEXT(r, entries));
> + /* icmp only. type/code always 0 in other cases */
> + PF_TEST_ATTRIB((r->type || r->code),
> + TAILQ_NEXT(r, entries));
> + /* tcp/udp only. {uid|gid}.op always 0 in other cases */
> + PF_TEST_ATTRIB((r->gid.op || r->uid.op),
> + TAILQ_NEXT(r, entries));
> + break;
> +
> + case IPPROTO_TCP:
> + PF_TEST_ATTRIB(((r->flagset & ctx->th->th_flags) !=
> +    r->flags),
> + TAILQ_NEXT(r, entries));
> + PF_TEST_ATTRIB((r->os_fingerprint != PF_OSFP_ANY &&
> +    !pf_osfp_match(pf_osfp_fingerprint(ctx->pd),
> +    r->os_fingerprint)),
> + TAILQ_NEXT(r, entries));
> + /* FALLTHROUGH */
> +
> + case IPPROTO_UDP:
> + /* tcp/udp only. port_op always 0 in other cases */
> + PF_TEST_ATTRIB((r->src.port_op &&
> +    !pf_match_port(r->src.port_op, r->src.port[0],
> +    r->src.port[1], ctx->pd->nsport)),
> + r->skip[PF_SKIP_SRC_PORT].ptr);
> + PF_TEST_ATTRIB((r->dst.port_op &&
> +    !pf_match_port(r->dst.port_op, r->dst.port[0],
> +    r->dst.port[1], ctx->pd->ndport)),
> + r->skip[PF_SKIP_DST_PORT].ptr);
> + /* tcp/udp only. uid.op always 0 in other cases */
> + PF_TEST_ATTRIB((r->uid.op && (ctx->pd->lookup.done ||
> +    (ctx->pd->lookup.done =
> +    pf_socket_lookup(ctx->pd), 1)) &&
> +    !pf_match_uid(r->uid.op, r->uid.uid[0],
> +    r->uid.uid[1], ctx->pd->lookup.uid)),
> + TAILQ_NEXT(r, entries));
> + /* tcp/udp only. gid.op always 0 in other cases */
> + PF_TEST_ATTRIB((r->gid.op && (ctx->pd->lookup.done ||
> +    (ctx->pd->lookup.done =
> +    pf_socket_lookup(ctx->pd), 1)) &&
> +    !pf_match_gid(r->gid.op, r->gid.gid[0],
> +    r->gid.gid[1], ctx->pd->lookup.gid)),
> + TAILQ_NEXT(r, entries));
> + break;
> +
> + case IPPROTO_ICMP:
> + case IPPROTO_ICMPV6:
> + /* icmp only. type always 0 in other cases */
> + PF_TEST_ATTRIB((r->type &&
> +    r->type != ctx->icmptype + 1),
> + TAILQ_NEXT(r, entries));
> + /* icmp only. type always 0 in other cases */
> + PF_TEST_ATTRIB((r->code &&
> +    r->code != ctx->icmpcode + 1),
> + TAILQ_NEXT(r, entries));
> + /* icmp only. don't create states on replies */
> + PF_TEST_ATTRIB((r->keep_state && !ctx->state_icmp &&
> +    (r->rule_flag & PFRULE_STATESLOPPY) == 0 &&
> +    ctx->icmp_dir != PF_IN),
> + TAILQ_NEXT(r, entries));
> + break;
> +
> + default:
> + break;
> + }
> +
> + PF_TEST_ATTRIB((r->rule_flag & PFRULE_FRAGMENT &&
> +    ctx->pd->virtual_proto != PF_VPROTO_FRAGMENT),
> + TAILQ_NEXT(r, entries));
> + PF_TEST_ATTRIB((r->tos && !(r->tos == ctx->pd->tos)),
> + TAILQ_NEXT(r, entries));
> + PF_TEST_ATTRIB((r->prob &&
> +    r->prob <= arc4random_uniform(UINT_MAX - 1) + 1),
> + TAILQ_NEXT(r, entries));
> + PF_TEST_ATTRIB((r->match_tag &&
> +    !pf_match_tag(ctx->pd->m, r, &ctx->tag)),
> + TAILQ_NEXT(r, entries));
> + PF_TEST_ATTRIB((r->rcv_kif && pf_match_rcvif(ctx->pd->m, r) ==
> +    r->rcvifnot),
> + TAILQ_NEXT(r, entries));
> + PF_TEST_ATTRIB((r->prio &&
> +    (r->prio == PF_PRIO_ZERO ? 0 : r->prio) !=
> +    ctx->pd->m->m_pkthdr.pf.prio),
> + TAILQ_NEXT(r, entries));
> +
> + /* FALLTHROUGH */
> + if (r->tag)
> + ctx->tag = r->tag;
> + if (r->anchor == NULL) {
> + if (r->action == PF_MATCH) {
> + if ((ctx->ri = pool_get(&pf_rule_item_pl,
> +    PR_NOWAIT)) == NULL) {
> + REASON_SET(&ctx->reason, PFRES_MEMORY);
> + ctx->test_status = PF_TEST_FAIL;
> + break;
> + }
> + ctx->ri->r = r;
> + /* order is irrelevant */
> + SLIST_INSERT_HEAD(&ctx->rules, ctx->ri, entry);
> + ctx->ri = NULL;
> + pf_rule_to_actions(r, &ctx->act);
> + if (r->rule_flag & PFRULE_AFTO)
> + ctx->pd->naf = r->naf;
> + if (pf_get_transaddr(r, ctx->pd, ctx->sns,
> +    &ctx->nr) == -1) {
> + REASON_SET(&ctx->reason,
> +    PFRES_TRANSLATE);
> + ctx->test_status = PF_TEST_FAIL;
> + break;
> + }
> +#if NPFLOG > 0
> + if (r->log) {
> + REASON_SET(&ctx->reason, PFRES_MATCH);
> + PFLOG_PACKET(ctx->pd, ctx->reason, r,
> +    ctx->a, ruleset, NULL);
> + }
> +#endif /* NPFLOG > 0 */
> + } else {
> + /*
> + * found matching r
> + */
> + *ctx->rm = r;
> + /*
> + * anchor, with ruleset, where r belongs to
> + */
> + *ctx->am = ctx->a;
> + /*
> + * ruleset where r belongs to
> + */
> + *ctx->rsm = ruleset;
> + /*
> + * ruleset, where anchor belongs to.
> + */
> + ctx->arsm = ctx->aruleset;
> + }
> +
> +#if NPFLOG > 0
> + if (ctx->act.log & PF_LOG_MATCHES)
> + pf_log_matches(ctx->pd, r, ctx->a, ruleset,
> +    &ctx->rules);
> +#endif /* NPFLOG > 0 */
> +
> + if (r->quick) {
> + ctx->test_status = PF_TEST_QUICK;
> + break;
> + }
> + } else {
> + ctx->a = r; /* remember anchor */
> + ctx->aruleset = ruleset; /* and its ruleset */
> + if (pf_step_into_anchor(ctx, r) != PF_TEST_OK) {
> + break;
> + }
> + }
> + r = TAILQ_NEXT(r, entries);
> + }
> +
> + return (ctx->test_status);
> +}
> +
>  int
>  pf_test_rule(struct pf_pdesc *pd, struct pf_rule **rm, struct pf_state **sm,
>      struct pf_rule **am, struct pf_ruleset **rsm, u_short *reason)
>  {
> - struct pf_rule *r;
> - struct pf_rule *nr = NULL;
> + struct pf_rule *r = NULL;
>   struct pf_rule *a = NULL;
> - struct pf_ruleset *arsm = NULL;
> - struct pf_ruleset *aruleset = NULL;
>   struct pf_ruleset *ruleset = NULL;
> - struct pf_rule_slist rules;
> - struct pf_rule_item *ri;
> - struct pf_src_node *sns[PF_SN_MAX];
>   struct pf_state_key *skw = NULL, *sks = NULL;
> - struct pf_rule_actions act;
>   int rewrite = 0;
> - int tag = -1;
> - int asd = 0;
> - int match = 0;
> - int state_icmp = 0, icmp_dir = 0;
>   u_int16_t virtual_type, virtual_id;
> - u_int8_t icmptype = 0, icmpcode = 0;
>   int action = PF_DROP;
> -
> - bzero(&act, sizeof(act));
> - bzero(sns, sizeof(sns));
> - act.rtableid = pd->rdomain;
> - SLIST_INIT(&rules);
> + struct pf_test_ctx ctx;
> + int rv;
> +
> + bzero(&ctx, sizeof(ctx));
> + ctx.pd = pd;
> + ctx.rm = rm;
> + ctx.am = am;
> + ctx.rsm = rsm;
> + ctx.th = &pd->hdr.tcp;
> + ctx.act.rtableid = pd->rdomain;
> + SLIST_INIT(&ctx.rules);
>  
>   if (pd->dir == PF_IN && if_congested()) {
> - REASON_SET(reason, PFRES_CONGEST);
> + REASON_SET(&ctx.reason, PFRES_CONGEST);
>   return (PF_DROP);
>   }
>  
>   switch (pd->virtual_proto) {
>   case IPPROTO_ICMP:
> - icmptype = pd->hdr.icmp.icmp_type;
> - icmpcode = pd->hdr.icmp.icmp_code;
> - state_icmp = pf_icmp_mapping(pd, icmptype,
> -    &icmp_dir, &virtual_id, &virtual_type);
> - if (icmp_dir == PF_IN) {
> + ctx.icmptype = pd->hdr.icmp.icmp_type;
> + ctx.icmpcode = pd->hdr.icmp.icmp_code;
> + ctx.state_icmp = pf_icmp_mapping(pd, ctx.icmptype,
> +    &ctx.icmp_dir, &virtual_id, &virtual_type);
> + if (ctx.icmp_dir == PF_IN) {
>   pd->osport = pd->nsport = virtual_id;
>   pd->odport = pd->ndport = virtual_type;
>   } else {
> @@ -3508,11 +3699,11 @@ pf_test_rule(struct pf_pdesc *pd, struct
>   break;
>  #ifdef INET6
>   case IPPROTO_ICMPV6:
> - icmptype = pd->hdr.icmp6.icmp6_type;
> - icmpcode = pd->hdr.icmp6.icmp6_code;
> - state_icmp = pf_icmp_mapping(pd, icmptype,
> -    &icmp_dir, &virtual_id, &virtual_type);
> - if (icmp_dir == PF_IN) {
> + ctx.icmptype = pd->hdr.icmp6.icmp6_type;
> + ctx.icmpcode = pd->hdr.icmp6.icmp6_code;
> + ctx.state_icmp = pf_icmp_mapping(pd, ctx.icmptype,
> +    &ctx.icmp_dir, &virtual_id, &virtual_type);
> + if (ctx.icmp_dir == PF_IN) {
>   pd->osport = pd->nsport = virtual_id;
>   pd->odport = pd->ndport = virtual_type;
>   } else {
> @@ -3524,191 +3715,36 @@ pf_test_rule(struct pf_pdesc *pd, struct
>   }
>  
>   ruleset = &pf_main_ruleset;
> - r = TAILQ_FIRST(pf_main_ruleset.rules.active.ptr);
> - while (r != NULL) {
> - if (r->rule_flag & PFRULE_EXPIRED) {
> - r = TAILQ_NEXT(r, entries);
> - goto nextrule;
> - }
> - r->evaluations++;
> - PF_TEST_ATTRIB((pfi_kif_match(r->kif, pd->kif) == r->ifnot),
> - r->skip[PF_SKIP_IFP].ptr);
> - PF_TEST_ATTRIB((r->direction && r->direction != pd->dir),
> - r->skip[PF_SKIP_DIR].ptr);
> - PF_TEST_ATTRIB((r->onrdomain >= 0  &&
> -    (r->onrdomain == pd->rdomain) == r->ifnot),
> - r->skip[PF_SKIP_RDOM].ptr);
> - PF_TEST_ATTRIB((r->af && r->af != pd->af),
> - r->skip[PF_SKIP_AF].ptr);
> - PF_TEST_ATTRIB((r->proto && r->proto != pd->proto),
> - r->skip[PF_SKIP_PROTO].ptr);
> - PF_TEST_ATTRIB((PF_MISMATCHAW(&r->src.addr, &pd->nsaddr,
> -    pd->naf, r->src.neg, pd->kif, act.rtableid)),
> - r->skip[PF_SKIP_SRC_ADDR].ptr);
> - PF_TEST_ATTRIB((PF_MISMATCHAW(&r->dst.addr, &pd->ndaddr, pd->af,
> -    r->dst.neg, NULL, act.rtableid)),
> - r->skip[PF_SKIP_DST_ADDR].ptr);
> -
> - switch (pd->virtual_proto) {
> - case PF_VPROTO_FRAGMENT:
> - /* tcp/udp only. port_op always 0 in other cases */
> - PF_TEST_ATTRIB((r->src.port_op || r->dst.port_op),
> - TAILQ_NEXT(r, entries));
> - PF_TEST_ATTRIB((pd->proto == IPPROTO_TCP && r->flagset),
> - TAILQ_NEXT(r, entries));
> - /* icmp only. type/code always 0 in other cases */
> - PF_TEST_ATTRIB((r->type || r->code),
> - TAILQ_NEXT(r, entries));
> - /* tcp/udp only. {uid|gid}.op always 0 in other cases */
> - PF_TEST_ATTRIB((r->gid.op || r->uid.op),
> - TAILQ_NEXT(r, entries));
> - break;
> -
> - case IPPROTO_TCP:
> - PF_TEST_ATTRIB(((r->flagset & pd->hdr.tcp.th_flags) !=
> -    r->flags),
> - TAILQ_NEXT(r, entries));
> - PF_TEST_ATTRIB((r->os_fingerprint != PF_OSFP_ANY &&
> -    !pf_osfp_match(pf_osfp_fingerprint(pd),
> -    r->os_fingerprint)),
> - TAILQ_NEXT(r, entries));
> - /* FALLTHROUGH */
> -
> - case IPPROTO_UDP:
> - /* tcp/udp only. port_op always 0 in other cases */
> - PF_TEST_ATTRIB((r->src.port_op &&
> -    !pf_match_port(r->src.port_op, r->src.port[0],
> -    r->src.port[1], pd->nsport)),
> - r->skip[PF_SKIP_SRC_PORT].ptr);
> - PF_TEST_ATTRIB((r->dst.port_op &&
> -    !pf_match_port(r->dst.port_op, r->dst.port[0],
> -    r->dst.port[1], pd->ndport)),
> - r->skip[PF_SKIP_DST_PORT].ptr);
> - /* tcp/udp only. uid.op always 0 in other cases */
> - PF_TEST_ATTRIB((r->uid.op && (pd->lookup.done ||
> -    (pd->lookup.done =
> -    pf_socket_lookup(pd), 1)) &&
> -    !pf_match_uid(r->uid.op, r->uid.uid[0],
> -    r->uid.uid[1], pd->lookup.uid)),
> - TAILQ_NEXT(r, entries));
> - /* tcp/udp only. gid.op always 0 in other cases */
> - PF_TEST_ATTRIB((r->gid.op && (pd->lookup.done ||
> -    (pd->lookup.done =
> -    pf_socket_lookup(pd), 1)) &&
> -    !pf_match_gid(r->gid.op, r->gid.gid[0],
> -    r->gid.gid[1], pd->lookup.gid)),
> - TAILQ_NEXT(r, entries));
> - break;
> -
> - case IPPROTO_ICMP:
> - case IPPROTO_ICMPV6:
> - /* icmp only. type always 0 in other cases */
> - PF_TEST_ATTRIB((r->type && r->type != icmptype + 1),
> - TAILQ_NEXT(r, entries));
> - /* icmp only. type always 0 in other cases */
> - PF_TEST_ATTRIB((r->code && r->code != icmpcode + 1),
> - TAILQ_NEXT(r, entries));
> - /* icmp only. don't create states on replies */
> - PF_TEST_ATTRIB((r->keep_state && !state_icmp &&
> -    (r->rule_flag & PFRULE_STATESLOPPY) == 0 &&
> -    icmp_dir != PF_IN),
> - TAILQ_NEXT(r, entries));
> - break;
> -
> - default:
> - break;
> - }
> -
> - PF_TEST_ATTRIB((r->rule_flag & PFRULE_FRAGMENT &&
> -    pd->virtual_proto != PF_VPROTO_FRAGMENT),
> - TAILQ_NEXT(r, entries));
> - PF_TEST_ATTRIB((r->tos && !(r->tos == pd->tos)),
> - TAILQ_NEXT(r, entries));
> - PF_TEST_ATTRIB((r->prob &&
> -    r->prob <= arc4random_uniform(UINT_MAX - 1) + 1),
> - TAILQ_NEXT(r, entries));
> - PF_TEST_ATTRIB((r->match_tag && !pf_match_tag(pd->m, r, &tag)),
> - TAILQ_NEXT(r, entries));
> - PF_TEST_ATTRIB((r->rcv_kif && pf_match_rcvif(pd->m, r) ==
> -    r->rcvifnot),
> - TAILQ_NEXT(r, entries));
> - PF_TEST_ATTRIB((r->prio && (r->prio == PF_PRIO_ZERO ?
> -    0 : r->prio) != pd->m->m_pkthdr.pf.prio),
> - TAILQ_NEXT(r, entries));
> -
> - /* FALLTHROUGH */
> - if (r->tag)
> - tag = r->tag;
> - if (r->anchor == NULL) {
> - if (r->action == PF_MATCH) {
> - if ((ri = pool_get(&pf_rule_item_pl,
> -    PR_NOWAIT)) == NULL) {
> - REASON_SET(reason, PFRES_MEMORY);
> - goto cleanup;
> - }
> - ri->r = r;
> - /* order is irrelevant */
> - SLIST_INSERT_HEAD(&rules, ri, entry);
> - pf_rule_to_actions(r, &act);
> - if (r->rule_flag & PFRULE_AFTO)
> - pd->naf = r->naf;
> - if (pf_get_transaddr(r, pd, sns, &nr) == -1) {
> - REASON_SET(reason, PFRES_TRANSLATE);
> - goto cleanup;
> - }
> -#if NPFLOG > 0
> - if (r->log) {
> - REASON_SET(reason, PFRES_MATCH);
> - PFLOG_PACKET(pd, *reason, r, a, ruleset,
> -    NULL);
> - }
> -#endif /* NPFLOG > 0 */
> - } else {
> - match = asd;
> - *rm = r;
> - *am = a;
> - *rsm = ruleset;
> - arsm = aruleset;
> - }
> -
> -#if NPFLOG > 0
> - if (act.log & PF_LOG_MATCHES)
> - pf_log_matches(pd, r, a, ruleset, &rules);
> -#endif /* NPFLOG > 0 */
> -
> - if (r->quick)
> - break;
> - r = TAILQ_NEXT(r, entries);
> - } else {
> - aruleset = ruleset;
> - pf_step_into_anchor(&asd, &ruleset, &r, &a);
> - }
> -
> - nextrule:
> - if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
> -    &r, &a, &match))
> - break;
> - }
> - r = *rm; /* matching rule */
> - a = *am; /* rule that defines an anchor containing 'r' */
> - ruleset = *rsm; /* ruleset of the anchor defined by the rule 'a' */
> - aruleset = arsm;/* ruleset of the 'a' rule itself */
> + rv = pf_match_rule(&ctx, ruleset);
> + if (rv == PF_TEST_FAIL) {
> + /*
> + * Reason has been set in pf_match_rule() already.
> + */
> + goto cleanup;
> + }
> +
> + r = *ctx.rm; /* matching rule */
> + a = *ctx.am; /* rule that defines an anchor containing 'r' */
> + ruleset = *ctx.rsm;/* ruleset of the anchor defined by the rule 'a' */
> + ctx.aruleset = ctx.arsm;/* ruleset of the 'a' rule itself */
> +
> +
>  
>   /* apply actions for last matching pass/block rule */
> - pf_rule_to_actions(r, &act);
> + pf_rule_to_actions(r, &ctx.act);
>   if (r->rule_flag & PFRULE_AFTO)
>   pd->naf = r->naf;
> - if (pf_get_transaddr(r, pd, sns, &nr) == -1) {
> - REASON_SET(reason, PFRES_TRANSLATE);
> + if (pf_get_transaddr(r, pd, ctx.sns, &ctx.nr) == -1) {
> + REASON_SET(&ctx.reason, PFRES_TRANSLATE);
>   goto cleanup;
>   }
> - REASON_SET(reason, PFRES_MATCH);
> + REASON_SET(&ctx.reason, PFRES_MATCH);
>  
>  #if NPFLOG > 0
>   if (r->log)
> - PFLOG_PACKET(pd, *reason, r, a, ruleset, NULL);
> - if (act.log & PF_LOG_MATCHES)
> - pf_log_matches(pd, r, a, ruleset, &rules);
> + PFLOG_PACKET(pd, ctx.reason, r, ctx.a, ruleset, NULL);
> + if (ctx.act.log & PF_LOG_MATCHES)
> + pf_log_matches(pd, r, ctx.a, ruleset, &ctx.rules);
>  #endif /* NPFLOG > 0 */
>  
>   if (pd->virtual_proto != PF_VPROTO_FRAGMENT &&
> @@ -3719,31 +3755,32 @@ pf_test_rule(struct pf_pdesc *pd, struct
>   if (pd->proto == IPPROTO_TCP &&
>      ((r->rule_flag & PFRULE_RETURNRST) ||
>      (r->rule_flag & PFRULE_RETURN)) &&
> -    !(pd->hdr.tcp.th_flags & TH_RST)) {
> - struct tcphdr *th = &pd->hdr.tcp;
> - u_int32_t ack = ntohl(th->th_seq) + pd->p_len;
> +    !(ctx.th->th_flags & TH_RST)) {
> + u_int32_t ack =
> +    ntohl(ctx.th->th_seq) + pd->p_len;
>  
>   if (pf_check_tcp_cksum(pd->m, pd->off,
>      pd->tot_len - pd->off, pd->af))
> - REASON_SET(reason, PFRES_PROTCKSUM);
> + REASON_SET(&ctx.reason, PFRES_PROTCKSUM);
>   else {
> - if (th->th_flags & TH_SYN)
> + if (ctx.th->th_flags & TH_SYN)
>   ack++;
> - if (th->th_flags & TH_FIN)
> + if (ctx.th->th_flags & TH_FIN)
>   ack++;
>   pf_send_tcp(r, pd->af, pd->dst,
> -    pd->src, th->th_dport, th->th_sport,
> -    ntohl(th->th_ack), ack, TH_RST|TH_ACK, 0, 0,
> -    r->return_ttl, 1, 0, pd->rdomain);
> +    pd->src, ctx.th->th_dport,
> +    ctx.th->th_sport, ntohl(ctx.th->th_ack),
> +    ack, TH_RST|TH_ACK, 0, 0, r->return_ttl,
> +    1, 0, pd->rdomain);
>   }
>   } else if ((pd->proto != IPPROTO_ICMP ||
> -    ICMP_INFOTYPE(icmptype)) && pd->af == AF_INET &&
> +    ICMP_INFOTYPE(ctx.icmptype)) && pd->af == AF_INET &&
>      r->return_icmp)
>   pf_send_icmp(pd->m, r->return_icmp >> 8,
>      r->return_icmp & 255, pd->af, r, pd->rdomain);
>   else if ((pd->proto != IPPROTO_ICMPV6 ||
> -    (icmptype >= ICMP6_ECHO_REQUEST &&
> -    icmptype != ND_REDIRECT)) && pd->af == AF_INET6 &&
> +    (ctx.icmptype >= ICMP6_ECHO_REQUEST &&
> +    ctx.icmptype != ND_REDIRECT)) && pd->af == AF_INET6 &&
>      r->return_icmp6)
>   pf_send_icmp(pd->m, r->return_icmp6 >> 8,
>      r->return_icmp6 & 255, pd->af, r, pd->rdomain);
> @@ -3752,13 +3789,13 @@ pf_test_rule(struct pf_pdesc *pd, struct
>   if (r->action == PF_DROP)
>   goto cleanup;
>  
> - pf_tag_packet(pd->m, tag, act.rtableid);
> - if (act.rtableid >= 0 &&
> -    rtable_l2(act.rtableid) != pd->rdomain)
> + pf_tag_packet(pd->m, ctx.tag, ctx.act.rtableid);
> + if (ctx.act.rtableid >= 0 &&
> +    rtable_l2(ctx.act.rtableid) != pd->rdomain)
>   pd->destchg = 1;
>  
>   if (r->action == PF_PASS && pd->badopts && ! r->allow_opts) {
> - REASON_SET(reason, PFRES_IPOPTIONS);
> + REASON_SET(&ctx.reason, PFRES_IPOPTIONS);
>  #if NPFLOG > 0
>   pd->pflog |= PF_LOG_FORCE;
>  #endif /* NPFLOG > 0 */
> @@ -3770,23 +3807,23 @@ pf_test_rule(struct pf_pdesc *pd, struct
>   action = PF_PASS;
>  
>   if (pd->virtual_proto != PF_VPROTO_FRAGMENT
> -    && !state_icmp && r->keep_state) {
> +    && !ctx.state_icmp && r->keep_state) {
>  
>   if (r->rule_flag & PFRULE_SRCTRACK &&
> -    pf_insert_src_node(&sns[PF_SN_NONE], r, PF_SN_NONE, pd->af,
> -    pd->src, NULL) != 0) {
> - REASON_SET(reason, PFRES_SRCLIMIT);
> +    pf_insert_src_node(&ctx.sns[PF_SN_NONE], r, PF_SN_NONE,
> +    pd->af, pd->src, NULL) != 0) {
> + REASON_SET(&ctx.reason, PFRES_SRCLIMIT);
>   goto cleanup;
>   }
>  
>   if (r->max_states && (r->states_cur >= r->max_states)) {
>   pf_status.lcounters[LCNT_STATES]++;
> - REASON_SET(reason, PFRES_MAXSTATES);
> + REASON_SET(&ctx.reason, PFRES_MAXSTATES);
>   goto cleanup;
>   }
>  
> - action = pf_create_state(pd, r, a, nr, &skw, &sks, &rewrite,
> -    sm, tag, &rules, &act, sns);
> + action = pf_create_state(pd, r, a, ctx.nr, &skw, &sks,
> +    &rewrite, sm, ctx.tag, &ctx.rules, &ctx.act, ctx.sns);
>  
>   if (action != PF_PASS)
>   goto cleanup;
> @@ -3802,7 +3839,7 @@ pf_test_rule(struct pf_pdesc *pd, struct
>      sk->port[pd->af == pd->naf ? pd->sidx : pd->didx],
>      &sk->addr[pd->af == pd->naf ? pd->didx : pd->sidx],
>      sk->port[pd->af == pd->naf ? pd->didx : pd->sidx],
> -    virtual_type, icmp_dir);
> +    virtual_type, ctx.icmp_dir);
>   }
>  
>  #ifdef INET6
> @@ -3811,9 +3848,9 @@ pf_test_rule(struct pf_pdesc *pd, struct
>  #endif /* INET6 */
>  
>   } else {
> - while ((ri = SLIST_FIRST(&rules))) {
> - SLIST_REMOVE_HEAD(&rules, entry);
> - pool_put(&pf_rule_item_pl, ri);
> + while ((ctx.ri = SLIST_FIRST(&ctx.rules))) {
> + SLIST_REMOVE_HEAD(&ctx.rules, entry);
> + pool_put(&pf_rule_item_pl, ctx.ri);
>   }
>   }
>  
> @@ -3845,9 +3882,9 @@ pf_test_rule(struct pf_pdesc *pd, struct
>   return (action);
>  
>  cleanup:
> - while ((ri = SLIST_FIRST(&rules))) {
> - SLIST_REMOVE_HEAD(&rules, entry);
> - pool_put(&pf_rule_item_pl, ri);
> + while ((ctx.ri = SLIST_FIRST(&ctx.rules))) {
> + SLIST_REMOVE_HEAD(&ctx.rules, entry);
> + pool_put(&pf_rule_item_pl, ctx.ri);
>   }
>  
>   return (action);

Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

Re: pf: percpu anchor stacks

Alexandr Nedvedicky
Hello,

On Fri, May 19, 2017 at 06:10:54PM +0200, Alexander Bluhm wrote:

> On Mon, May 15, 2017 at 03:19:19PM +0200, Alexandr Nedvedicky wrote:
> >     I'm attaching updated final patch, which accepts your suggestion.
>
> I think this broke sys/net/pf_forward.
> http://bluhm.genua.de/regress/results/regress.html
> When backing out pf.c rev 1.1024 it works again.
>
> I guess it is a problem with tagged route-to rules in an anchor.
> I cannot investigate right now, but will do later.
>

    I have not seen those failures when running pf_forward test. I guess my
    test set up is somewhat broken. I'll need your help to get it to shape.
    I'll try to follow with on this off-list.


regards
sasha

Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

Re: pf: percpu anchor stacks

Alexandr Nedvedicky
In reply to this post by Alexander Bluhm
Hello,

would you be able to try patch below to check if it will fix pf_forward failures?

thanks a lot
and sorry for inconveniences

regards
sasha

--------8<---------------8<---------------8<------------------8<--------
diff -r eb40d8d52679 src/sys/net/pf.c
--- a/src/sys/net/pf.c  Fri May 19 23:35:22 2017 +0200
+++ b/src/sys/net/pf.c  Fri May 19 23:40:35 2017 +0200
@@ -3644,6 +3644,7 @@ pf_test_rule(struct pf_pdesc *pd, struct
        ctx.rsm = rsm;
        ctx.th = &pd->hdr.tcp;
        ctx.act.rtableid = pd->rdomain;
+       ctx.tag = -1;
        SLIST_INIT(&ctx.rules);
 
        if (pd->dir == PF_IN && if_congested()) {
--------8<---------------8<---------------8<------------------8<--------

Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

Re: pf: percpu anchor stacks

Alexander Bluhm
On Fri, May 19, 2017 at 11:47:21PM +0200, Alexandr Nedvedicky wrote:
> would you be able to try patch below to check if it will fix pf_forward failures?

Yes, this fixes it.  OK bluhm@

> thanks a lot
> and sorry for inconveniences

Thanks for the quick fix.  And there was no inconvenience, I have
written the pf tests to find regressions.

bluhm

> --------8<---------------8<---------------8<------------------8<--------
> diff -r eb40d8d52679 src/sys/net/pf.c
> --- a/src/sys/net/pf.c  Fri May 19 23:35:22 2017 +0200
> +++ b/src/sys/net/pf.c  Fri May 19 23:40:35 2017 +0200
> @@ -3644,6 +3644,7 @@ pf_test_rule(struct pf_pdesc *pd, struct
>         ctx.rsm = rsm;
>         ctx.th = &pd->hdr.tcp;
>         ctx.act.rtableid = pd->rdomain;
> +       ctx.tag = -1;
>         SLIST_INIT(&ctx.rules);
>  
>         if (pd->dir == PF_IN && if_congested()) {
> --------8<---------------8<---------------8<------------------8<--------

Loading...