Quantcast

pf: percpu anchor stacks

Previous Topic Next Topic
 
classic Classic list List threaded Threaded
5 messages Options
Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

pf: percpu anchor stacks

Patrick Wildt-3
Hi,

to step into and out of pf(4) anchors, pf(4) uses a temporary stack
that is a global variable.  Now once we run pf_test_rule() in parallel
and an anchor is evaluated in parallel, the global stack would be used
concurrently, which can lead to panics.  To solve this issue this diff
allocates a per-cpu stack using the cpumem API.

Opinions?

Patrick

diff --git a/sys/net/pf.c b/sys/net/pf.c
index 3ddcd7726d2..78316ae0009 100644
--- a/sys/net/pf.c
+++ b/sys/net/pf.c
@@ -119,12 +119,7 @@ u_char pf_tcp_secret[16];
 int pf_tcp_secret_init;
 int pf_tcp_iss_off;
 
-struct pf_anchor_stackframe {
- struct pf_ruleset *rs;
- struct pf_rule *r;
- struct pf_anchor_node *parent;
- struct pf_anchor *child;
-} pf_anchor_stack[64];
+struct cpumem *pf_anchor_stacks;
 
 struct pool pf_src_tree_pl, pf_rule_pl, pf_queue_pl;
 struct pool pf_state_pl, pf_state_key_pl, pf_state_item_pl;
@@ -3003,22 +2998,26 @@ void
 pf_step_into_anchor(int *depth, struct pf_ruleset **rs,
     struct pf_rule **r, struct pf_rule **a)
 {
+ struct pf_anchor_stack *s;
  struct pf_anchor_stackframe *f;
 
- if (*depth >= sizeof(pf_anchor_stack) /
-    sizeof(pf_anchor_stack[0])) {
+ s = cpumem_enter(pf_anchor_stacks);
+
+ if (*depth >= sizeof(*s) / sizeof(s->frame[0])) {
  log(LOG_ERR, "pf: anchor stack overflow\n");
  *r = TAILQ_NEXT(*r, entries);
+ cpumem_leave(pf_anchor_stacks, s);
  return;
  } else if (a != NULL)
  *a = *r;
- f = pf_anchor_stack + (*depth)++;
+ f = s->frame + (*depth)++;
  f->rs = *rs;
  f->r = *r;
  if ((*r)->anchor_wildcard) {
  f->parent = &(*r)->anchor->children;
  if ((f->child = RB_MIN(pf_anchor_node, f->parent)) == NULL) {
  *r = NULL;
+ cpumem_leave(pf_anchor_stacks, s);
  return;
  }
  *rs = &f->child->ruleset;
@@ -3028,19 +3027,23 @@ pf_step_into_anchor(int *depth, struct pf_ruleset **rs,
  *rs = &(*r)->anchor->ruleset;
  }
  *r = TAILQ_FIRST((*rs)->rules.active.ptr);
+ cpumem_leave(pf_anchor_stacks, s);
 }
 
 int
 pf_step_out_of_anchor(int *depth, struct pf_ruleset **rs,
     struct pf_rule **r, struct pf_rule **a, int *match)
 {
+ struct pf_anchor_stack *s;
  struct pf_anchor_stackframe *f;
  int quick = 0;
 
+ s = cpumem_enter(pf_anchor_stacks);
+
  do {
  if (*depth <= 0)
  break;
- f = pf_anchor_stack + *depth - 1;
+ f = s->frame + *depth - 1;
  if (f->parent != NULL && f->child != NULL) {
  f->child = RB_NEXT(pf_anchor_node, f->parent, f->child);
  if (f->child != NULL) {
@@ -3066,6 +3069,7 @@ pf_step_out_of_anchor(int *depth, struct pf_ruleset **rs,
  *r = TAILQ_NEXT(f->r, entries);
  } while (*r == NULL);
 
+ cpumem_leave(pf_anchor_stacks, s);
  return (quick);
 }
 
diff --git a/sys/net/pf_ioctl.c b/sys/net/pf_ioctl.c
index 56a43a55ab8..869ca3eaa1d 100644
--- a/sys/net/pf_ioctl.c
+++ b/sys/net/pf_ioctl.c
@@ -161,6 +161,10 @@ pfattach(int num)
     IPL_SOFTNET, 0, "pfruleitem", NULL);
  pool_init(&pf_queue_pl, sizeof(struct pf_queuespec), 0,
     IPL_SOFTNET, 0, "pfqueue", NULL);
+
+ pf_anchor_stacks = cpumem_malloc(sizeof(struct pf_anchor_stack),
+    M_TEMP);
+
  hfsc_initialize();
  pfr_initialize();
  pfi_initialize();
diff --git a/sys/net/pfvar.h b/sys/net/pfvar.h
index 00e9b790a91..62a183727b3 100644
--- a/sys/net/pfvar.h
+++ b/sys/net/pfvar.h
@@ -1872,6 +1872,19 @@ void pf_send_tcp(const struct pf_rule *, sa_family_t,
     u_int8_t, u_int16_t, u_int16_t, u_int8_t, int,
     u_int16_t, u_int);
 
+struct pf_anchor_stackframe {
+ struct pf_ruleset *rs;
+ struct pf_rule *r;
+ struct pf_anchor_node *parent;
+ struct pf_anchor *child;
+};
+
+struct pf_anchor_stack {
+ struct pf_anchor_stackframe frame[64];
+};
+
+struct cpumem;
+extern struct cpumem *pf_anchor_stacks;
 #endif /* _KERNEL */
 
 #endif /* _NET_PFVAR_H_ */

Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

Re: pf: percpu anchor stacks

Alexandr Nedvedicky
Hello,

I've sent different patch [1], which was touching same functions some time ago.
The old patch [1] basically splits pf_test_rule() to two functions:
    pf_test_rule()

    pf_match_rule(), which walks anchor stack recursively. the recursion depth
    is limited to 64.

the memory foot print for true recursion is same as for array of stack frames.
The only difference comes from the place where the memory gets allocated.

regards
sasha

[1] https://marc.info/?l=openbsd-tech&m=143902905917671&w=4

Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

Re: pf: percpu anchor stacks

Alexandr Nedvedicky
In reply to this post by Patrick Wildt-3
Hello,

I'm attaching patch, which removes stack-as-a-global variable.
it's updated patch [1] to current tree.

sorry for being pushy advocating my old, rusty patch.

thanks and
regards
sasha

[1] http://openbsd-archive.7691.n7.nabble.com/Re-PF-SMP-making-anchor-stack-multithreaded-tt275915.html#none

--------8<---------------8<---------------8<------------------8<--------
diff -r d6e3a6338889 src/sys/net/pf.c
--- a/src/sys/net/pf.c Mon Mar 20 01:10:40 2017 +0100
+++ b/src/sys/net/pf.c Fri Mar 24 11:28:18 2017 +0100
@@ -119,12 +119,37 @@ u_char pf_tcp_secret[16];
 int pf_tcp_secret_init;
 int pf_tcp_iss_off;
 
-struct pf_anchor_stackframe {
- struct pf_ruleset *rs;
- struct pf_rule *r;
- struct pf_anchor_node *parent;
- struct pf_anchor *child;
-} pf_anchor_stack[64];
+struct pf_test_ctx {
+ int test_status;
+ struct pf_pdesc *pd;
+ struct pf_rule_actions act;
+ u_int8_t icmpcode;
+ u_int8_t icmptype;
+ int icmp_dir;
+ int state_icmp;
+ int tag;
+ u_short reason;
+ struct pf_rule_item *ri;
+ struct pf_src_node *sns[PF_SN_MAX];
+ struct pf_rule_slist rules;
+ struct pf_rule *nr;
+ struct pf_rule **rm;
+ struct pf_rule *a;
+ struct pf_rule **am;
+ struct pf_ruleset **rsm;
+ struct pf_ruleset *arsm;
+ struct pf_ruleset *aruleset;
+ struct tcphdr *th;
+ int depth;
+};
+
+#define PF_ANCHOR_STACK_MAX 64
+
+enum {
+ PF_TEST_FAIL = -1,
+ PF_TEST_OK,
+ PF_TEST_QUICK
+};
 
 struct pool pf_src_tree_pl, pf_rule_pl, pf_queue_pl;
 struct pool pf_state_pl, pf_state_key_pl, pf_state_item_pl;
@@ -211,11 +236,8 @@ struct pf_state *pf_find_state(struct p
     struct pf_state_key_cmp *, u_int, struct mbuf *);
 int pf_src_connlimit(struct pf_state **);
 int pf_match_rcvif(struct mbuf *, struct pf_rule *);
-void pf_step_into_anchor(int *, struct pf_ruleset **,
-    struct pf_rule **, struct pf_rule **);
-int pf_step_out_of_anchor(int *, struct pf_ruleset **,
-     struct pf_rule **, struct pf_rule **,
-     int *);
+int pf_step_into_anchor(struct pf_test_ctx *, struct pf_rule *);
+int pf_match_rule(struct pf_test_ctx *, struct pf_ruleset *);
 void pf_counters_inc(int, struct pf_pdesc *,
     struct pf_state *, struct pf_rule *,
     struct pf_rule *);
@@ -3019,74 +3041,37 @@ pf_tag_packet(struct mbuf *m, int tag, i
  m->m_pkthdr.ph_rtableid = (u_int)rtableid;
 }
 
-void
-pf_step_into_anchor(int *depth, struct pf_ruleset **rs,
-    struct pf_rule **r, struct pf_rule **a)
+int
+pf_step_into_anchor(struct pf_test_ctx *cx, struct pf_rule *r)
 {
- struct pf_anchor_stackframe *f;
-
- if (*depth >= sizeof(pf_anchor_stack) /
-    sizeof(pf_anchor_stack[0])) {
- log(LOG_ERR, "pf: anchor stack overflow\n");
- *r = TAILQ_NEXT(*r, entries);
- return;
- } else if (a != NULL)
- *a = *r;
- f = pf_anchor_stack + (*depth)++;
- f->rs = *rs;
- f->r = *r;
- if ((*r)->anchor_wildcard) {
- f->parent = &(*r)->anchor->children;
- if ((f->child = RB_MIN(pf_anchor_node, f->parent)) == NULL) {
- *r = NULL;
- return;
- }
- *rs = &f->child->ruleset;
- } else {
- f->parent = NULL;
- f->child = NULL;
- *rs = &(*r)->anchor->ruleset;
- }
- *r = TAILQ_FIRST((*rs)->rules.active.ptr);
-}
-
-int
-pf_step_out_of_anchor(int *depth, struct pf_ruleset **rs,
-    struct pf_rule **r, struct pf_rule **a, int *match)
-{
- struct pf_anchor_stackframe *f;
- int quick = 0;
-
- do {
- if (*depth <= 0)
- break;
- f = pf_anchor_stack + *depth - 1;
- if (f->parent != NULL && f->child != NULL) {
- f->child = RB_NEXT(pf_anchor_node, f->parent, f->child);
- if (f->child != NULL) {
- *rs = &f->child->ruleset;
- *r = TAILQ_FIRST((*rs)->rules.active.ptr);
- if (*r == NULL)
- continue;
- else
- break;
+ int rv;
+
+ if (cx->depth >= PF_ANCHOR_STACK_MAX) {
+ log(LOG_ERR, "pf_step_into_anchor: stack overflow\n");
+ return (PF_TEST_FAIL);
+ }
+
+ cx->depth++;
+
+ if (r->anchor_wildcard) {
+ struct pf_anchor *child;
+ rv = PF_TEST_OK;
+ RB_FOREACH(child, pf_anchor_node, &r->anchor->children) {
+ rv = pf_match_rule(cx, &child->ruleset);
+ if (rv != 0) {
+ /*
+ * break on quick rule or failure
+ */
+ break;
  }
  }
- (*depth)--;
- if (*depth == 0 && a != NULL)
- *a = NULL;
- else if (a != NULL)
- *a = f->r;
- *rs = f->rs;
- if (*match > *depth) {
- *match = *depth;
- if (f->r->quick)
- quick = 1;
- }
- *r = TAILQ_NEXT(f->r, entries);
- } while (*r == NULL);
-
- return (quick);
+ } else {
+ rv = pf_match_rule(cx, &r->anchor->ruleset);
+ }
+
+ cx->depth--;
+
+ return (rv);
 }
 
 void
@@ -3458,46 +3443,230 @@ pf_rule_to_actions(struct pf_rule *r, st
  } while (0)
 
 int
+pf_match_rule(struct pf_test_ctx *cx, struct pf_ruleset *ruleset)
+{
+ struct pf_rule *r;
+
+ r = TAILQ_FIRST(ruleset->rules.active.ptr);
+ while (r != NULL) {
+ r->evaluations++;
+ PF_TEST_ATTRIB((pfi_kif_match(r->kif, cx->pd->kif) == r->ifnot),
+ r->skip[PF_SKIP_IFP].ptr);
+ PF_TEST_ATTRIB((r->direction && r->direction != cx->pd->dir),
+ r->skip[PF_SKIP_DIR].ptr);
+ PF_TEST_ATTRIB((r->onrdomain >= 0  &&
+    (r->onrdomain == cx->pd->rdomain) == r->ifnot),
+ r->skip[PF_SKIP_RDOM].ptr);
+ PF_TEST_ATTRIB((r->af && r->af != cx->pd->af),
+ r->skip[PF_SKIP_AF].ptr);
+ PF_TEST_ATTRIB((r->proto && r->proto != cx->pd->proto),
+ r->skip[PF_SKIP_PROTO].ptr);
+ PF_TEST_ATTRIB((PF_MISMATCHAW(&r->src.addr, &cx->pd->nsaddr,
+    cx->pd->naf, r->src.neg, cx->pd->kif, cx->act.rtableid)),
+ r->skip[PF_SKIP_SRC_ADDR].ptr);
+ PF_TEST_ATTRIB((PF_MISMATCHAW(&r->dst.addr, &cx->pd->ndaddr,
+    cx->pd->af, r->dst.neg, NULL, cx->act.rtableid)),
+ r->skip[PF_SKIP_DST_ADDR].ptr);
+
+ switch (cx->pd->virtual_proto) {
+ case PF_VPROTO_FRAGMENT:
+ /* tcp/udp only. port_op always 0 in other cases */
+ PF_TEST_ATTRIB((r->src.port_op || r->dst.port_op),
+ TAILQ_NEXT(r, entries));
+ PF_TEST_ATTRIB((cx->pd->proto == IPPROTO_TCP &&
+    r->flagset),
+ TAILQ_NEXT(r, entries));
+ /* icmp only. type/code always 0 in other cases */
+ PF_TEST_ATTRIB((r->type || r->code),
+ TAILQ_NEXT(r, entries));
+ /* tcp/udp only. {uid|gid}.op always 0 in other cases */
+ PF_TEST_ATTRIB((r->gid.op || r->uid.op),
+ TAILQ_NEXT(r, entries));
+ break;
+
+ case IPPROTO_TCP:
+ PF_TEST_ATTRIB(((r->flagset & cx->th->th_flags) !=
+    r->flags),
+ TAILQ_NEXT(r, entries));
+ PF_TEST_ATTRIB((r->os_fingerprint != PF_OSFP_ANY &&
+    !pf_osfp_match(pf_osfp_fingerprint(cx->pd),
+    r->os_fingerprint)),
+ TAILQ_NEXT(r, entries));
+ /* FALLTHROUGH */
+
+ case IPPROTO_UDP:
+ /* tcp/udp only. port_op always 0 in other cases */
+ PF_TEST_ATTRIB((r->src.port_op &&
+    !pf_match_port(r->src.port_op, r->src.port[0],
+    r->src.port[1], cx->pd->nsport)),
+ r->skip[PF_SKIP_SRC_PORT].ptr);
+ PF_TEST_ATTRIB((r->dst.port_op &&
+    !pf_match_port(r->dst.port_op, r->dst.port[0],
+    r->dst.port[1], cx->pd->ndport)),
+ r->skip[PF_SKIP_DST_PORT].ptr);
+ /* tcp/udp only. uid.op always 0 in other cases */
+ PF_TEST_ATTRIB((r->uid.op && (cx->pd->lookup.done ||
+    (cx->pd->lookup.done =
+    pf_socket_lookup(cx->pd), 1)) &&
+    !pf_match_uid(r->uid.op, r->uid.uid[0],
+    r->uid.uid[1], cx->pd->lookup.uid)),
+ TAILQ_NEXT(r, entries));
+ /* tcp/udp only. gid.op always 0 in other cases */
+ PF_TEST_ATTRIB((r->gid.op && (cx->pd->lookup.done ||
+    (cx->pd->lookup.done =
+    pf_socket_lookup(cx->pd), 1)) &&
+    !pf_match_gid(r->gid.op, r->gid.gid[0],
+    r->gid.gid[1], cx->pd->lookup.gid)),
+ TAILQ_NEXT(r, entries));
+ break;
+
+ case IPPROTO_ICMP:
+ case IPPROTO_ICMPV6:
+ /* icmp only. type always 0 in other cases */
+ PF_TEST_ATTRIB((r->type && r->type != cx->icmptype + 1),
+ TAILQ_NEXT(r, entries));
+ /* icmp only. type always 0 in other cases */
+ PF_TEST_ATTRIB((r->code && r->code != cx->icmpcode + 1),
+ TAILQ_NEXT(r, entries));
+ /* icmp only. don't create states on replies */
+ PF_TEST_ATTRIB((r->keep_state && !cx->state_icmp &&
+    (r->rule_flag & PFRULE_STATESLOPPY) == 0 &&
+    cx->icmp_dir != PF_IN),
+ TAILQ_NEXT(r, entries));
+ break;
+
+ default:
+ break;
+ }
+
+ PF_TEST_ATTRIB((r->rule_flag & PFRULE_FRAGMENT &&
+    cx->pd->virtual_proto != PF_VPROTO_FRAGMENT),
+ TAILQ_NEXT(r, entries));
+ PF_TEST_ATTRIB((r->tos && !(r->tos == cx->pd->tos)),
+ TAILQ_NEXT(r, entries));
+ PF_TEST_ATTRIB((r->prob &&
+    r->prob <= arc4random_uniform(UINT_MAX - 1) + 1),
+ TAILQ_NEXT(r, entries));
+ PF_TEST_ATTRIB((r->match_tag &&
+    !pf_match_tag(cx->pd->m, r, &cx->tag)),
+ TAILQ_NEXT(r, entries));
+ PF_TEST_ATTRIB((r->rcv_kif && pf_match_rcvif(cx->pd->m, r) ==
+    r->rcvifnot),
+ TAILQ_NEXT(r, entries));
+ PF_TEST_ATTRIB((r->prio &&
+    (r->prio == PF_PRIO_ZERO ? 0 : r->prio) !=
+    cx->pd->m->m_pkthdr.pf.prio),
+ TAILQ_NEXT(r, entries));
+
+ /* FALLTHROUGH */
+ if (r->tag)
+ cx->tag = r->tag;
+ if (r->anchor == NULL) {
+ if (r->action == PF_MATCH) {
+ if ((cx->ri = pool_get(&pf_rule_item_pl,
+    PR_NOWAIT)) == NULL) {
+ REASON_SET(&cx->reason, PFRES_MEMORY);
+ cx->test_status = PF_TEST_FAIL;
+ break;
+ }
+ cx->ri->r = r;
+ /* order is irrelevant */
+ SLIST_INSERT_HEAD(&cx->rules, cx->ri, entry);
+ cx->ri = NULL;
+ pf_rule_to_actions(r, &cx->act);
+ if (r->rule_flag & PFRULE_AFTO)
+ cx->pd->naf = r->naf;
+ if (pf_get_transaddr(r, cx->pd, cx->sns,
+    &cx->nr) == -1) {
+ REASON_SET(&cx->reason, PFRES_TRANSLATE);
+ cx->test_status = PF_TEST_FAIL;
+ break;
+ }
+#if NPFLOG > 0
+ if (r->log) {
+ REASON_SET(&cx->reason, PFRES_MATCH);
+ PFLOG_PACKET(cx->pd, cx->reason, r,
+    cx->a, ruleset, NULL);
+ }
+#endif /* NPFLOG > 0 */
+ } else {
+ /*
+ * found matching r
+ */
+ *cx->rm = r;
+ /*
+ * anchor, with ruleset, where r belongs to
+ */
+ *cx->am = cx->a;
+ /*
+ * ruleset where r belongs to
+ */
+ *cx->rsm = ruleset;
+ /*
+ * ruleset, where anchor belongs to.
+ */
+ cx->arsm = cx->aruleset;
+ }
+
+#if NPFLOG > 0
+ if (cx->act.log & PF_LOG_MATCHES)
+ pf_log_matches(cx->pd, r, cx->a, ruleset,
+    &cx->rules);
+#endif /* NPFLOG > 0 */
+
+ if (r->quick) {
+ cx->test_status = PF_TEST_QUICK;
+ break;
+ }
+ } else {
+ cx->a = r; /* remember anchor */
+ cx->aruleset = ruleset; /* and its ruleset */
+ if (pf_step_into_anchor(cx, r) != PF_TEST_OK) {
+ break;
+ }
+ }
+ r = TAILQ_NEXT(r, entries);
+nextrule:;
+ }
+
+ return (cx->test_status);
+}
+
+int
 pf_test_rule(struct pf_pdesc *pd, struct pf_rule **rm, struct pf_state **sm,
     struct pf_rule **am, struct pf_ruleset **rsm, u_short *reason)
 {
- struct pf_rule *r;
- struct pf_rule *nr = NULL;
+ struct pf_rule *r = NULL;
  struct pf_rule *a = NULL;
- struct pf_ruleset *arsm = NULL;
- struct pf_ruleset *aruleset = NULL;
  struct pf_ruleset *ruleset = NULL;
- struct pf_rule_slist rules;
- struct pf_rule_item *ri;
- struct pf_src_node *sns[PF_SN_MAX];
  struct pf_state_key *skw = NULL, *sks = NULL;
- struct pf_rule_actions act;
  int rewrite = 0;
- int tag = -1;
- int asd = 0;
- int match = 0;
- int state_icmp = 0, icmp_dir = 0;
  u_int16_t virtual_type, virtual_id;
- u_int8_t icmptype = 0, icmpcode = 0;
  int action = PF_DROP;
-
- bzero(&act, sizeof(act));
- bzero(sns, sizeof(sns));
- act.rtableid = pd->rdomain;
- SLIST_INIT(&rules);
+ struct pf_test_ctx cx;
+ int rv;
+
+ bzero(&cx, sizeof(cx));
+ cx.pd = pd;
+ cx.rm = rm;
+ cx.am = am;
+ cx.rsm = rsm;
+ cx.th = &pd->hdr.tcp;
+ cx.act.rtableid = pd->rdomain;
+ SLIST_INIT(&cx.rules);
 
  if (pd->dir == PF_IN && if_congested()) {
- REASON_SET(reason, PFRES_CONGEST);
+ REASON_SET(&cx.reason, PFRES_CONGEST);
  return (PF_DROP);
  }
 
  switch (pd->virtual_proto) {
  case IPPROTO_ICMP:
- icmptype = pd->hdr.icmp.icmp_type;
- icmpcode = pd->hdr.icmp.icmp_code;
- state_icmp = pf_icmp_mapping(pd, icmptype,
-    &icmp_dir, &virtual_id, &virtual_type);
- if (icmp_dir == PF_IN) {
+ cx.icmptype = pd->hdr.icmp.icmp_type;
+ cx.icmpcode = pd->hdr.icmp.icmp_code;
+ cx.state_icmp = pf_icmp_mapping(pd, cx.icmptype,
+    &cx.icmp_dir, &virtual_id, &virtual_type);
+ if (cx.icmp_dir == PF_IN) {
  pd->osport = pd->nsport = virtual_id;
  pd->odport = pd->ndport = virtual_type;
  } else {
@@ -3507,11 +3676,11 @@ pf_test_rule(struct pf_pdesc *pd, struct
  break;
 #ifdef INET6
  case IPPROTO_ICMPV6:
- icmptype = pd->hdr.icmp6.icmp6_type;
- icmpcode = pd->hdr.icmp6.icmp6_code;
- state_icmp = pf_icmp_mapping(pd, icmptype,
-    &icmp_dir, &virtual_id, &virtual_type);
- if (icmp_dir == PF_IN) {
+ cx.icmptype = pd->hdr.icmp6.icmp6_type;
+ cx.icmpcode = pd->hdr.icmp6.icmp6_code;
+ cx.state_icmp = pf_icmp_mapping(pd, cx.icmptype,
+    &cx.icmp_dir, &virtual_id, &virtual_type);
+ if (cx.icmp_dir == PF_IN) {
  pd->osport = pd->nsport = virtual_id;
  pd->odport = pd->ndport = virtual_type;
  } else {
@@ -3523,191 +3692,36 @@ pf_test_rule(struct pf_pdesc *pd, struct
  }
 
  ruleset = &pf_main_ruleset;
- r = TAILQ_FIRST(pf_main_ruleset.rules.active.ptr);
- while (r != NULL) {
- if (r->rule_flag & PFRULE_EXPIRED) {
- r = TAILQ_NEXT(r, entries);
- goto nextrule;
- }
- r->evaluations++;
- PF_TEST_ATTRIB((pfi_kif_match(r->kif, pd->kif) == r->ifnot),
- r->skip[PF_SKIP_IFP].ptr);
- PF_TEST_ATTRIB((r->direction && r->direction != pd->dir),
- r->skip[PF_SKIP_DIR].ptr);
- PF_TEST_ATTRIB((r->onrdomain >= 0  &&
-    (r->onrdomain == pd->rdomain) == r->ifnot),
- r->skip[PF_SKIP_RDOM].ptr);
- PF_TEST_ATTRIB((r->af && r->af != pd->af),
- r->skip[PF_SKIP_AF].ptr);
- PF_TEST_ATTRIB((r->proto && r->proto != pd->proto),
- r->skip[PF_SKIP_PROTO].ptr);
- PF_TEST_ATTRIB((PF_MISMATCHAW(&r->src.addr, &pd->nsaddr,
-    pd->naf, r->src.neg, pd->kif, act.rtableid)),
- r->skip[PF_SKIP_SRC_ADDR].ptr);
- PF_TEST_ATTRIB((PF_MISMATCHAW(&r->dst.addr, &pd->ndaddr, pd->af,
-    r->dst.neg, NULL, act.rtableid)),
- r->skip[PF_SKIP_DST_ADDR].ptr);
-
- switch (pd->virtual_proto) {
- case PF_VPROTO_FRAGMENT:
- /* tcp/udp only. port_op always 0 in other cases */
- PF_TEST_ATTRIB((r->src.port_op || r->dst.port_op),
- TAILQ_NEXT(r, entries));
- PF_TEST_ATTRIB((pd->proto == IPPROTO_TCP && r->flagset),
- TAILQ_NEXT(r, entries));
- /* icmp only. type/code always 0 in other cases */
- PF_TEST_ATTRIB((r->type || r->code),
- TAILQ_NEXT(r, entries));
- /* tcp/udp only. {uid|gid}.op always 0 in other cases */
- PF_TEST_ATTRIB((r->gid.op || r->uid.op),
- TAILQ_NEXT(r, entries));
- break;
-
- case IPPROTO_TCP:
- PF_TEST_ATTRIB(((r->flagset & pd->hdr.tcp.th_flags) !=
-    r->flags),
- TAILQ_NEXT(r, entries));
- PF_TEST_ATTRIB((r->os_fingerprint != PF_OSFP_ANY &&
-    !pf_osfp_match(pf_osfp_fingerprint(pd),
-    r->os_fingerprint)),
- TAILQ_NEXT(r, entries));
- /* FALLTHROUGH */
-
- case IPPROTO_UDP:
- /* tcp/udp only. port_op always 0 in other cases */
- PF_TEST_ATTRIB((r->src.port_op &&
-    !pf_match_port(r->src.port_op, r->src.port[0],
-    r->src.port[1], pd->nsport)),
- r->skip[PF_SKIP_SRC_PORT].ptr);
- PF_TEST_ATTRIB((r->dst.port_op &&
-    !pf_match_port(r->dst.port_op, r->dst.port[0],
-    r->dst.port[1], pd->ndport)),
- r->skip[PF_SKIP_DST_PORT].ptr);
- /* tcp/udp only. uid.op always 0 in other cases */
- PF_TEST_ATTRIB((r->uid.op && (pd->lookup.done ||
-    (pd->lookup.done =
-    pf_socket_lookup(pd), 1)) &&
-    !pf_match_uid(r->uid.op, r->uid.uid[0],
-    r->uid.uid[1], pd->lookup.uid)),
- TAILQ_NEXT(r, entries));
- /* tcp/udp only. gid.op always 0 in other cases */
- PF_TEST_ATTRIB((r->gid.op && (pd->lookup.done ||
-    (pd->lookup.done =
-    pf_socket_lookup(pd), 1)) &&
-    !pf_match_gid(r->gid.op, r->gid.gid[0],
-    r->gid.gid[1], pd->lookup.gid)),
- TAILQ_NEXT(r, entries));
- break;
-
- case IPPROTO_ICMP:
- case IPPROTO_ICMPV6:
- /* icmp only. type always 0 in other cases */
- PF_TEST_ATTRIB((r->type && r->type != icmptype + 1),
- TAILQ_NEXT(r, entries));
- /* icmp only. type always 0 in other cases */
- PF_TEST_ATTRIB((r->code && r->code != icmpcode + 1),
- TAILQ_NEXT(r, entries));
- /* icmp only. don't create states on replies */
- PF_TEST_ATTRIB((r->keep_state && !state_icmp &&
-    (r->rule_flag & PFRULE_STATESLOPPY) == 0 &&
-    icmp_dir != PF_IN),
- TAILQ_NEXT(r, entries));
- break;
-
- default:
- break;
- }
-
- PF_TEST_ATTRIB((r->rule_flag & PFRULE_FRAGMENT &&
-    pd->virtual_proto != PF_VPROTO_FRAGMENT),
- TAILQ_NEXT(r, entries));
- PF_TEST_ATTRIB((r->tos && !(r->tos == pd->tos)),
- TAILQ_NEXT(r, entries));
- PF_TEST_ATTRIB((r->prob &&
-    r->prob <= arc4random_uniform(UINT_MAX - 1) + 1),
- TAILQ_NEXT(r, entries));
- PF_TEST_ATTRIB((r->match_tag && !pf_match_tag(pd->m, r, &tag)),
- TAILQ_NEXT(r, entries));
- PF_TEST_ATTRIB((r->rcv_kif && pf_match_rcvif(pd->m, r) ==
-    r->rcvifnot),
- TAILQ_NEXT(r, entries));
- PF_TEST_ATTRIB((r->prio && (r->prio == PF_PRIO_ZERO ?
-    0 : r->prio) != pd->m->m_pkthdr.pf.prio),
- TAILQ_NEXT(r, entries));
-
- /* FALLTHROUGH */
- if (r->tag)
- tag = r->tag;
- if (r->anchor == NULL) {
- if (r->action == PF_MATCH) {
- if ((ri = pool_get(&pf_rule_item_pl,
-    PR_NOWAIT)) == NULL) {
- REASON_SET(reason, PFRES_MEMORY);
- goto cleanup;
- }
- ri->r = r;
- /* order is irrelevant */
- SLIST_INSERT_HEAD(&rules, ri, entry);
- pf_rule_to_actions(r, &act);
- if (r->rule_flag & PFRULE_AFTO)
- pd->naf = r->naf;
- if (pf_get_transaddr(r, pd, sns, &nr) == -1) {
- REASON_SET(reason, PFRES_TRANSLATE);
- goto cleanup;
- }
-#if NPFLOG > 0
- if (r->log) {
- REASON_SET(reason, PFRES_MATCH);
- PFLOG_PACKET(pd, *reason, r, a, ruleset,
-    NULL);
- }
-#endif /* NPFLOG > 0 */
- } else {
- match = asd;
- *rm = r;
- *am = a;
- *rsm = ruleset;
- arsm = aruleset;
- }
-
-#if NPFLOG > 0
- if (act.log & PF_LOG_MATCHES)
- pf_log_matches(pd, r, a, ruleset, &rules);
-#endif /* NPFLOG > 0 */
-
- if (r->quick)
- break;
- r = TAILQ_NEXT(r, entries);
- } else {
- aruleset = ruleset;
- pf_step_into_anchor(&asd, &ruleset, &r, &a);
- }
-
- nextrule:
- if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
-    &r, &a, &match))
- break;
- }
- r = *rm; /* matching rule */
- a = *am; /* rule that defines an anchor containing 'r' */
- ruleset = *rsm; /* ruleset of the anchor defined by the rule 'a' */
- aruleset = arsm;/* ruleset of the 'a' rule itself */
+ rv = pf_match_rule(&cx, ruleset);
+ if (rv == PF_TEST_FAIL) {
+ /*
+ * Reason has been set in pf_match_rule() already.
+ */
+ goto cleanup;
+ }
+
+ r = *cx.rm; /* matching rule */
+ a = *cx.am; /* rule that defines an anchor containing 'r' */
+ ruleset = *cx.rsm;/* ruleset of the anchor defined by the rule 'a' */
+ cx.aruleset = cx.arsm;/* ruleset of the 'a' rule itself */
+
+
 
  /* apply actions for last matching pass/block rule */
- pf_rule_to_actions(r, &act);
+ pf_rule_to_actions(r, &cx.act);
  if (r->rule_flag & PFRULE_AFTO)
  pd->naf = r->naf;
- if (pf_get_transaddr(r, pd, sns, &nr) == -1) {
- REASON_SET(reason, PFRES_TRANSLATE);
+ if (pf_get_transaddr(r, pd, cx.sns, &cx.nr) == -1) {
+ REASON_SET(&cx.reason, PFRES_TRANSLATE);
  goto cleanup;
  }
- REASON_SET(reason, PFRES_MATCH);
+ REASON_SET(&cx.reason, PFRES_MATCH);
 
 #if NPFLOG > 0
  if (r->log)
- PFLOG_PACKET(pd, *reason, r, a, ruleset, NULL);
- if (act.log & PF_LOG_MATCHES)
- pf_log_matches(pd, r, a, ruleset, &rules);
+ PFLOG_PACKET(pd, cx.reason, r, cx.a, ruleset, NULL);
+ if (cx.act.log & PF_LOG_MATCHES)
+ pf_log_matches(pd, r, cx.a, ruleset, &cx.rules);
 #endif /* NPFLOG > 0 */
 
  if (pd->virtual_proto != PF_VPROTO_FRAGMENT &&
@@ -3718,31 +3732,30 @@ pf_test_rule(struct pf_pdesc *pd, struct
  if (pd->proto == IPPROTO_TCP &&
     ((r->rule_flag & PFRULE_RETURNRST) ||
     (r->rule_flag & PFRULE_RETURN)) &&
-    !(pd->hdr.tcp.th_flags & TH_RST)) {
- struct tcphdr *th = &pd->hdr.tcp;
- u_int32_t ack = ntohl(th->th_seq) + pd->p_len;
+    !(cx.th->th_flags & TH_RST)) {
+ u_int32_t ack = ntohl(cx.th->th_seq) + pd->p_len;
 
  if (pf_check_tcp_cksum(pd->m, pd->off,
     pd->tot_len - pd->off, pd->af))
- REASON_SET(reason, PFRES_PROTCKSUM);
+ REASON_SET(&cx.reason, PFRES_PROTCKSUM);
  else {
- if (th->th_flags & TH_SYN)
+ if (cx.th->th_flags & TH_SYN)
  ack++;
- if (th->th_flags & TH_FIN)
+ if (cx.th->th_flags & TH_FIN)
  ack++;
  pf_send_tcp(r, pd->af, pd->dst,
-    pd->src, th->th_dport, th->th_sport,
-    ntohl(th->th_ack), ack, TH_RST|TH_ACK, 0, 0,
-    r->return_ttl, 1, 0, pd->rdomain);
+    pd->src, cx.th->th_dport, cx.th->th_sport,
+    ntohl(cx.th->th_ack), ack, TH_RST|TH_ACK,
+    0, 0, r->return_ttl, 1, 0, pd->rdomain);
  }
  } else if ((pd->proto != IPPROTO_ICMP ||
-    ICMP_INFOTYPE(icmptype)) && pd->af == AF_INET &&
+    ICMP_INFOTYPE(cx.icmptype)) && pd->af == AF_INET &&
     r->return_icmp)
  pf_send_icmp(pd->m, r->return_icmp >> 8,
     r->return_icmp & 255, pd->af, r, pd->rdomain);
  else if ((pd->proto != IPPROTO_ICMPV6 ||
-    (icmptype >= ICMP6_ECHO_REQUEST &&
-    icmptype != ND_REDIRECT)) && pd->af == AF_INET6 &&
+    (cx.icmptype >= ICMP6_ECHO_REQUEST &&
+    cx.icmptype != ND_REDIRECT)) && pd->af == AF_INET6 &&
     r->return_icmp6)
  pf_send_icmp(pd->m, r->return_icmp6 >> 8,
     r->return_icmp6 & 255, pd->af, r, pd->rdomain);
@@ -3751,13 +3764,13 @@ pf_test_rule(struct pf_pdesc *pd, struct
  if (r->action == PF_DROP)
  goto cleanup;
 
- pf_tag_packet(pd->m, tag, act.rtableid);
- if (act.rtableid >= 0 &&
-    rtable_l2(act.rtableid) != pd->rdomain)
+ pf_tag_packet(pd->m, cx.tag, cx.act.rtableid);
+ if (cx.act.rtableid >= 0 &&
+    rtable_l2(cx.act.rtableid) != pd->rdomain)
  pd->destchg = 1;
 
  if (r->action == PF_PASS && pd->badopts && ! r->allow_opts) {
- REASON_SET(reason, PFRES_IPOPTIONS);
+ REASON_SET(&cx.reason, PFRES_IPOPTIONS);
 #if NPFLOG > 0
  pd->pflog |= PF_LOG_FORCE;
 #endif /* NPFLOG > 0 */
@@ -3769,23 +3782,23 @@ pf_test_rule(struct pf_pdesc *pd, struct
  action = PF_PASS;
 
  if (pd->virtual_proto != PF_VPROTO_FRAGMENT
-    && !state_icmp && r->keep_state) {
+    && !cx.state_icmp && r->keep_state) {
 
  if (r->rule_flag & PFRULE_SRCTRACK &&
-    pf_insert_src_node(&sns[PF_SN_NONE], r, PF_SN_NONE, pd->af,
+    pf_insert_src_node(&cx.sns[PF_SN_NONE], r, PF_SN_NONE, pd->af,
     pd->src, NULL) != 0) {
- REASON_SET(reason, PFRES_SRCLIMIT);
+ REASON_SET(&cx.reason, PFRES_SRCLIMIT);
  goto cleanup;
  }
 
  if (r->max_states && (r->states_cur >= r->max_states)) {
  pf_status.lcounters[LCNT_STATES]++;
- REASON_SET(reason, PFRES_MAXSTATES);
+ REASON_SET(&cx.reason, PFRES_MAXSTATES);
  goto cleanup;
  }
 
- action = pf_create_state(pd, r, a, nr, &skw, &sks, &rewrite,
-    sm, tag, &rules, &act, sns);
+ action = pf_create_state(pd, r, a, cx.nr, &skw, &sks, &rewrite,
+    sm, cx.tag, &cx.rules, &cx.act, cx.sns);
 
  if (action != PF_PASS)
  goto cleanup;
@@ -3801,7 +3814,7 @@ pf_test_rule(struct pf_pdesc *pd, struct
     sk->port[pd->af == pd->naf ? pd->sidx : pd->didx],
     &sk->addr[pd->af == pd->naf ? pd->didx : pd->sidx],
     sk->port[pd->af == pd->naf ? pd->didx : pd->sidx],
-    virtual_type, icmp_dir);
+    virtual_type, cx.icmp_dir);
  }
 
 #ifdef INET6
@@ -3810,9 +3823,9 @@ pf_test_rule(struct pf_pdesc *pd, struct
 #endif /* INET6 */
 
  } else {
- while ((ri = SLIST_FIRST(&rules))) {
- SLIST_REMOVE_HEAD(&rules, entry);
- pool_put(&pf_rule_item_pl, ri);
+ while ((cx.ri = SLIST_FIRST(&cx.rules))) {
+ SLIST_REMOVE_HEAD(&cx.rules, entry);
+ pool_put(&pf_rule_item_pl, cx.ri);
  }
  }
 
@@ -3844,9 +3857,9 @@ pf_test_rule(struct pf_pdesc *pd, struct
  return (action);
 
 cleanup:
- while ((ri = SLIST_FIRST(&rules))) {
- SLIST_REMOVE_HEAD(&rules, entry);
- pool_put(&pf_rule_item_pl, ri);
+ while ((cx.ri = SLIST_FIRST(&cx.rules))) {
+ SLIST_REMOVE_HEAD(&cx.rules, entry);
+ pool_put(&pf_rule_item_pl, cx.ri);
  }
 
  return (action);
--------8<---------------8<---------------8<------------------8<--------

Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

Re: pf: percpu anchor stacks

Mike Belopuhov-5
On Fri, Mar 24, 2017 at 12:19 +0100, Alexandr Nedvedicky wrote:
> Hello,
>
> I'm attaching patch, which removes stack-as-a-global variable.
> it's updated patch [1] to current tree.
>
> sorry for being pushy advocating my old, rusty patch.
>

I think your diff is the way to go indeed.  If we can avoid
using the global stack altogether, then all the better.
This diff also splits giant pf_test_rule into several chunks
which is a good thing in my opinion.

A few random comments:

 - some lines appear to be longer than 80 symbols;

 - "cx" is an uncommon abbreviation for a "context" in OpenBSD,
   we normally use "ctx";

 - PF_TEST_ATTRIB could use a "continue" statement instead of
   the goto;

 - instead of checking "rv" against 0 in the "break on quick
   rule or failure" I'd like to see an actual check against
   PF_TEST_* values so that it's grep'able;

 - s/test_status/action/ as it's done everywhere else?

 - I'm not certain I like extra set of PASS/BLOCK macros.
   I know you want to represent the "quick" pass separately,
   but perhaps there's a way to do it while using PF_PASS...

Does this pass pfctl regress tests?

While I haven't noticed anything criminal here, it makes me
wonder if it'd be possible to do this change in a few steps:
factor out rule maching from pf_test_rule and then bring in
anchor changes?

> thanks and
> regards
> sasha
>
> [1] http://openbsd-archive.7691.n7.nabble.com/Re-PF-SMP-making-anchor-stack-multithreaded-tt275915.html#none
>
> --------8<---------------8<---------------8<------------------8<--------
> diff -r d6e3a6338889 src/sys/net/pf.c
> --- a/src/sys/net/pf.c Mon Mar 20 01:10:40 2017 +0100
> +++ b/src/sys/net/pf.c Fri Mar 24 11:28:18 2017 +0100
> @@ -119,12 +119,37 @@ u_char pf_tcp_secret[16];
>  int pf_tcp_secret_init;
>  int pf_tcp_iss_off;
>  
> -struct pf_anchor_stackframe {
> - struct pf_ruleset *rs;
> - struct pf_rule *r;
> - struct pf_anchor_node *parent;
> - struct pf_anchor *child;
> -} pf_anchor_stack[64];
> +struct pf_test_ctx {
> + int test_status;
> + struct pf_pdesc *pd;
> + struct pf_rule_actions act;
> + u_int8_t icmpcode;
> + u_int8_t icmptype;
> + int icmp_dir;
> + int state_icmp;
> + int tag;
> + u_short reason;
> + struct pf_rule_item *ri;
> + struct pf_src_node *sns[PF_SN_MAX];
> + struct pf_rule_slist rules;
> + struct pf_rule *nr;
> + struct pf_rule **rm;
> + struct pf_rule *a;
> + struct pf_rule **am;
> + struct pf_ruleset **rsm;
> + struct pf_ruleset *arsm;
> + struct pf_ruleset *aruleset;
> + struct tcphdr *th;
> + int depth;
> +};
> +
> +#define PF_ANCHOR_STACK_MAX 64
> +
> +enum {
> + PF_TEST_FAIL = -1,
> + PF_TEST_OK,
> + PF_TEST_QUICK
> +};
>  
>  struct pool pf_src_tree_pl, pf_rule_pl, pf_queue_pl;
>  struct pool pf_state_pl, pf_state_key_pl, pf_state_item_pl;
> @@ -211,11 +236,8 @@ struct pf_state *pf_find_state(struct p
>      struct pf_state_key_cmp *, u_int, struct mbuf *);
>  int pf_src_connlimit(struct pf_state **);
>  int pf_match_rcvif(struct mbuf *, struct pf_rule *);
> -void pf_step_into_anchor(int *, struct pf_ruleset **,
> -    struct pf_rule **, struct pf_rule **);
> -int pf_step_out_of_anchor(int *, struct pf_ruleset **,
> -     struct pf_rule **, struct pf_rule **,
> -     int *);
> +int pf_step_into_anchor(struct pf_test_ctx *, struct pf_rule *);
> +int pf_match_rule(struct pf_test_ctx *, struct pf_ruleset *);
>  void pf_counters_inc(int, struct pf_pdesc *,
>      struct pf_state *, struct pf_rule *,
>      struct pf_rule *);
> @@ -3019,74 +3041,37 @@ pf_tag_packet(struct mbuf *m, int tag, i
>   m->m_pkthdr.ph_rtableid = (u_int)rtableid;
>  }
>  
> -void
> -pf_step_into_anchor(int *depth, struct pf_ruleset **rs,
> -    struct pf_rule **r, struct pf_rule **a)
> +int
> +pf_step_into_anchor(struct pf_test_ctx *cx, struct pf_rule *r)
>  {
> - struct pf_anchor_stackframe *f;
> -
> - if (*depth >= sizeof(pf_anchor_stack) /
> -    sizeof(pf_anchor_stack[0])) {
> - log(LOG_ERR, "pf: anchor stack overflow\n");
> - *r = TAILQ_NEXT(*r, entries);
> - return;
> - } else if (a != NULL)
> - *a = *r;
> - f = pf_anchor_stack + (*depth)++;
> - f->rs = *rs;
> - f->r = *r;
> - if ((*r)->anchor_wildcard) {
> - f->parent = &(*r)->anchor->children;
> - if ((f->child = RB_MIN(pf_anchor_node, f->parent)) == NULL) {
> - *r = NULL;
> - return;
> - }
> - *rs = &f->child->ruleset;
> - } else {
> - f->parent = NULL;
> - f->child = NULL;
> - *rs = &(*r)->anchor->ruleset;
> - }
> - *r = TAILQ_FIRST((*rs)->rules.active.ptr);
> -}
> -
> -int
> -pf_step_out_of_anchor(int *depth, struct pf_ruleset **rs,
> -    struct pf_rule **r, struct pf_rule **a, int *match)
> -{
> - struct pf_anchor_stackframe *f;
> - int quick = 0;
> -
> - do {
> - if (*depth <= 0)
> - break;
> - f = pf_anchor_stack + *depth - 1;
> - if (f->parent != NULL && f->child != NULL) {
> - f->child = RB_NEXT(pf_anchor_node, f->parent, f->child);
> - if (f->child != NULL) {
> - *rs = &f->child->ruleset;
> - *r = TAILQ_FIRST((*rs)->rules.active.ptr);
> - if (*r == NULL)
> - continue;
> - else
> - break;
> + int rv;
> +
> + if (cx->depth >= PF_ANCHOR_STACK_MAX) {
> + log(LOG_ERR, "pf_step_into_anchor: stack overflow\n");
> + return (PF_TEST_FAIL);
> + }
> +
> + cx->depth++;
> +
> + if (r->anchor_wildcard) {
> + struct pf_anchor *child;
> + rv = PF_TEST_OK;
> + RB_FOREACH(child, pf_anchor_node, &r->anchor->children) {
> + rv = pf_match_rule(cx, &child->ruleset);
> + if (rv != 0) {
> + /*
> + * break on quick rule or failure
> + */
> + break;
>   }
>   }
> - (*depth)--;
> - if (*depth == 0 && a != NULL)
> - *a = NULL;
> - else if (a != NULL)
> - *a = f->r;
> - *rs = f->rs;
> - if (*match > *depth) {
> - *match = *depth;
> - if (f->r->quick)
> - quick = 1;
> - }
> - *r = TAILQ_NEXT(f->r, entries);
> - } while (*r == NULL);
> -
> - return (quick);
> + } else {
> + rv = pf_match_rule(cx, &r->anchor->ruleset);
> + }
> +
> + cx->depth--;
> +
> + return (rv);
>  }
>  
>  void
> @@ -3458,46 +3443,230 @@ pf_rule_to_actions(struct pf_rule *r, st
>   } while (0)
>  
>  int
> +pf_match_rule(struct pf_test_ctx *cx, struct pf_ruleset *ruleset)
> +{
> + struct pf_rule *r;
> +
> + r = TAILQ_FIRST(ruleset->rules.active.ptr);
> + while (r != NULL) {
> + r->evaluations++;
> + PF_TEST_ATTRIB((pfi_kif_match(r->kif, cx->pd->kif) == r->ifnot),
> + r->skip[PF_SKIP_IFP].ptr);
> + PF_TEST_ATTRIB((r->direction && r->direction != cx->pd->dir),
> + r->skip[PF_SKIP_DIR].ptr);
> + PF_TEST_ATTRIB((r->onrdomain >= 0  &&
> +    (r->onrdomain == cx->pd->rdomain) == r->ifnot),
> + r->skip[PF_SKIP_RDOM].ptr);
> + PF_TEST_ATTRIB((r->af && r->af != cx->pd->af),
> + r->skip[PF_SKIP_AF].ptr);
> + PF_TEST_ATTRIB((r->proto && r->proto != cx->pd->proto),
> + r->skip[PF_SKIP_PROTO].ptr);
> + PF_TEST_ATTRIB((PF_MISMATCHAW(&r->src.addr, &cx->pd->nsaddr,
> +    cx->pd->naf, r->src.neg, cx->pd->kif, cx->act.rtableid)),
> + r->skip[PF_SKIP_SRC_ADDR].ptr);
> + PF_TEST_ATTRIB((PF_MISMATCHAW(&r->dst.addr, &cx->pd->ndaddr,
> +    cx->pd->af, r->dst.neg, NULL, cx->act.rtableid)),
> + r->skip[PF_SKIP_DST_ADDR].ptr);
> +
> + switch (cx->pd->virtual_proto) {
> + case PF_VPROTO_FRAGMENT:
> + /* tcp/udp only. port_op always 0 in other cases */
> + PF_TEST_ATTRIB((r->src.port_op || r->dst.port_op),
> + TAILQ_NEXT(r, entries));
> + PF_TEST_ATTRIB((cx->pd->proto == IPPROTO_TCP &&
> +    r->flagset),
> + TAILQ_NEXT(r, entries));
> + /* icmp only. type/code always 0 in other cases */
> + PF_TEST_ATTRIB((r->type || r->code),
> + TAILQ_NEXT(r, entries));
> + /* tcp/udp only. {uid|gid}.op always 0 in other cases */
> + PF_TEST_ATTRIB((r->gid.op || r->uid.op),
> + TAILQ_NEXT(r, entries));
> + break;
> +
> + case IPPROTO_TCP:
> + PF_TEST_ATTRIB(((r->flagset & cx->th->th_flags) !=
> +    r->flags),
> + TAILQ_NEXT(r, entries));
> + PF_TEST_ATTRIB((r->os_fingerprint != PF_OSFP_ANY &&
> +    !pf_osfp_match(pf_osfp_fingerprint(cx->pd),
> +    r->os_fingerprint)),
> + TAILQ_NEXT(r, entries));
> + /* FALLTHROUGH */
> +
> + case IPPROTO_UDP:
> + /* tcp/udp only. port_op always 0 in other cases */
> + PF_TEST_ATTRIB((r->src.port_op &&
> +    !pf_match_port(r->src.port_op, r->src.port[0],
> +    r->src.port[1], cx->pd->nsport)),
> + r->skip[PF_SKIP_SRC_PORT].ptr);
> + PF_TEST_ATTRIB((r->dst.port_op &&
> +    !pf_match_port(r->dst.port_op, r->dst.port[0],
> +    r->dst.port[1], cx->pd->ndport)),
> + r->skip[PF_SKIP_DST_PORT].ptr);
> + /* tcp/udp only. uid.op always 0 in other cases */
> + PF_TEST_ATTRIB((r->uid.op && (cx->pd->lookup.done ||
> +    (cx->pd->lookup.done =
> +    pf_socket_lookup(cx->pd), 1)) &&
> +    !pf_match_uid(r->uid.op, r->uid.uid[0],
> +    r->uid.uid[1], cx->pd->lookup.uid)),
> + TAILQ_NEXT(r, entries));
> + /* tcp/udp only. gid.op always 0 in other cases */
> + PF_TEST_ATTRIB((r->gid.op && (cx->pd->lookup.done ||
> +    (cx->pd->lookup.done =
> +    pf_socket_lookup(cx->pd), 1)) &&
> +    !pf_match_gid(r->gid.op, r->gid.gid[0],
> +    r->gid.gid[1], cx->pd->lookup.gid)),
> + TAILQ_NEXT(r, entries));
> + break;
> +
> + case IPPROTO_ICMP:
> + case IPPROTO_ICMPV6:
> + /* icmp only. type always 0 in other cases */
> + PF_TEST_ATTRIB((r->type && r->type != cx->icmptype + 1),
> + TAILQ_NEXT(r, entries));
> + /* icmp only. type always 0 in other cases */
> + PF_TEST_ATTRIB((r->code && r->code != cx->icmpcode + 1),
> + TAILQ_NEXT(r, entries));
> + /* icmp only. don't create states on replies */
> + PF_TEST_ATTRIB((r->keep_state && !cx->state_icmp &&
> +    (r->rule_flag & PFRULE_STATESLOPPY) == 0 &&
> +    cx->icmp_dir != PF_IN),
> + TAILQ_NEXT(r, entries));
> + break;
> +
> + default:
> + break;
> + }
> +
> + PF_TEST_ATTRIB((r->rule_flag & PFRULE_FRAGMENT &&
> +    cx->pd->virtual_proto != PF_VPROTO_FRAGMENT),
> + TAILQ_NEXT(r, entries));
> + PF_TEST_ATTRIB((r->tos && !(r->tos == cx->pd->tos)),
> + TAILQ_NEXT(r, entries));
> + PF_TEST_ATTRIB((r->prob &&
> +    r->prob <= arc4random_uniform(UINT_MAX - 1) + 1),
> + TAILQ_NEXT(r, entries));
> + PF_TEST_ATTRIB((r->match_tag &&
> +    !pf_match_tag(cx->pd->m, r, &cx->tag)),
> + TAILQ_NEXT(r, entries));
> + PF_TEST_ATTRIB((r->rcv_kif && pf_match_rcvif(cx->pd->m, r) ==
> +    r->rcvifnot),
> + TAILQ_NEXT(r, entries));
> + PF_TEST_ATTRIB((r->prio &&
> +    (r->prio == PF_PRIO_ZERO ? 0 : r->prio) !=
> +    cx->pd->m->m_pkthdr.pf.prio),
> + TAILQ_NEXT(r, entries));
> +
> + /* FALLTHROUGH */
> + if (r->tag)
> + cx->tag = r->tag;
> + if (r->anchor == NULL) {
> + if (r->action == PF_MATCH) {
> + if ((cx->ri = pool_get(&pf_rule_item_pl,
> +    PR_NOWAIT)) == NULL) {
> + REASON_SET(&cx->reason, PFRES_MEMORY);
> + cx->test_status = PF_TEST_FAIL;
> + break;
> + }
> + cx->ri->r = r;
> + /* order is irrelevant */
> + SLIST_INSERT_HEAD(&cx->rules, cx->ri, entry);
> + cx->ri = NULL;
> + pf_rule_to_actions(r, &cx->act);
> + if (r->rule_flag & PFRULE_AFTO)
> + cx->pd->naf = r->naf;
> + if (pf_get_transaddr(r, cx->pd, cx->sns,
> +    &cx->nr) == -1) {
> + REASON_SET(&cx->reason, PFRES_TRANSLATE);
> + cx->test_status = PF_TEST_FAIL;
> + break;
> + }
> +#if NPFLOG > 0
> + if (r->log) {
> + REASON_SET(&cx->reason, PFRES_MATCH);
> + PFLOG_PACKET(cx->pd, cx->reason, r,
> +    cx->a, ruleset, NULL);
> + }
> +#endif /* NPFLOG > 0 */
> + } else {
> + /*
> + * found matching r
> + */
> + *cx->rm = r;
> + /*
> + * anchor, with ruleset, where r belongs to
> + */
> + *cx->am = cx->a;
> + /*
> + * ruleset where r belongs to
> + */
> + *cx->rsm = ruleset;
> + /*
> + * ruleset, where anchor belongs to.
> + */
> + cx->arsm = cx->aruleset;
> + }
> +
> +#if NPFLOG > 0
> + if (cx->act.log & PF_LOG_MATCHES)
> + pf_log_matches(cx->pd, r, cx->a, ruleset,
> +    &cx->rules);
> +#endif /* NPFLOG > 0 */
> +
> + if (r->quick) {
> + cx->test_status = PF_TEST_QUICK;
> + break;
> + }
> + } else {
> + cx->a = r; /* remember anchor */
> + cx->aruleset = ruleset; /* and its ruleset */
> + if (pf_step_into_anchor(cx, r) != PF_TEST_OK) {
> + break;
> + }
> + }
> + r = TAILQ_NEXT(r, entries);
> +nextrule:;
> + }
> +
> + return (cx->test_status);
> +}
> +
> +int
>  pf_test_rule(struct pf_pdesc *pd, struct pf_rule **rm, struct pf_state **sm,
>      struct pf_rule **am, struct pf_ruleset **rsm, u_short *reason)
>  {
> - struct pf_rule *r;
> - struct pf_rule *nr = NULL;
> + struct pf_rule *r = NULL;
>   struct pf_rule *a = NULL;
> - struct pf_ruleset *arsm = NULL;
> - struct pf_ruleset *aruleset = NULL;
>   struct pf_ruleset *ruleset = NULL;
> - struct pf_rule_slist rules;
> - struct pf_rule_item *ri;
> - struct pf_src_node *sns[PF_SN_MAX];
>   struct pf_state_key *skw = NULL, *sks = NULL;
> - struct pf_rule_actions act;
>   int rewrite = 0;
> - int tag = -1;
> - int asd = 0;
> - int match = 0;
> - int state_icmp = 0, icmp_dir = 0;
>   u_int16_t virtual_type, virtual_id;
> - u_int8_t icmptype = 0, icmpcode = 0;
>   int action = PF_DROP;
> -
> - bzero(&act, sizeof(act));
> - bzero(sns, sizeof(sns));
> - act.rtableid = pd->rdomain;
> - SLIST_INIT(&rules);
> + struct pf_test_ctx cx;
> + int rv;
> +
> + bzero(&cx, sizeof(cx));
> + cx.pd = pd;
> + cx.rm = rm;
> + cx.am = am;
> + cx.rsm = rsm;
> + cx.th = &pd->hdr.tcp;
> + cx.act.rtableid = pd->rdomain;
> + SLIST_INIT(&cx.rules);
>  
>   if (pd->dir == PF_IN && if_congested()) {
> - REASON_SET(reason, PFRES_CONGEST);
> + REASON_SET(&cx.reason, PFRES_CONGEST);
>   return (PF_DROP);
>   }
>  
>   switch (pd->virtual_proto) {
>   case IPPROTO_ICMP:
> - icmptype = pd->hdr.icmp.icmp_type;
> - icmpcode = pd->hdr.icmp.icmp_code;
> - state_icmp = pf_icmp_mapping(pd, icmptype,
> -    &icmp_dir, &virtual_id, &virtual_type);
> - if (icmp_dir == PF_IN) {
> + cx.icmptype = pd->hdr.icmp.icmp_type;
> + cx.icmpcode = pd->hdr.icmp.icmp_code;
> + cx.state_icmp = pf_icmp_mapping(pd, cx.icmptype,
> +    &cx.icmp_dir, &virtual_id, &virtual_type);
> + if (cx.icmp_dir == PF_IN) {
>   pd->osport = pd->nsport = virtual_id;
>   pd->odport = pd->ndport = virtual_type;
>   } else {
> @@ -3507,11 +3676,11 @@ pf_test_rule(struct pf_pdesc *pd, struct
>   break;
>  #ifdef INET6
>   case IPPROTO_ICMPV6:
> - icmptype = pd->hdr.icmp6.icmp6_type;
> - icmpcode = pd->hdr.icmp6.icmp6_code;
> - state_icmp = pf_icmp_mapping(pd, icmptype,
> -    &icmp_dir, &virtual_id, &virtual_type);
> - if (icmp_dir == PF_IN) {
> + cx.icmptype = pd->hdr.icmp6.icmp6_type;
> + cx.icmpcode = pd->hdr.icmp6.icmp6_code;
> + cx.state_icmp = pf_icmp_mapping(pd, cx.icmptype,
> +    &cx.icmp_dir, &virtual_id, &virtual_type);
> + if (cx.icmp_dir == PF_IN) {
>   pd->osport = pd->nsport = virtual_id;
>   pd->odport = pd->ndport = virtual_type;
>   } else {
> @@ -3523,191 +3692,36 @@ pf_test_rule(struct pf_pdesc *pd, struct
>   }
>  
>   ruleset = &pf_main_ruleset;
> - r = TAILQ_FIRST(pf_main_ruleset.rules.active.ptr);
> - while (r != NULL) {
> - if (r->rule_flag & PFRULE_EXPIRED) {
> - r = TAILQ_NEXT(r, entries);
> - goto nextrule;
> - }
> - r->evaluations++;
> - PF_TEST_ATTRIB((pfi_kif_match(r->kif, pd->kif) == r->ifnot),
> - r->skip[PF_SKIP_IFP].ptr);
> - PF_TEST_ATTRIB((r->direction && r->direction != pd->dir),
> - r->skip[PF_SKIP_DIR].ptr);
> - PF_TEST_ATTRIB((r->onrdomain >= 0  &&
> -    (r->onrdomain == pd->rdomain) == r->ifnot),
> - r->skip[PF_SKIP_RDOM].ptr);
> - PF_TEST_ATTRIB((r->af && r->af != pd->af),
> - r->skip[PF_SKIP_AF].ptr);
> - PF_TEST_ATTRIB((r->proto && r->proto != pd->proto),
> - r->skip[PF_SKIP_PROTO].ptr);
> - PF_TEST_ATTRIB((PF_MISMATCHAW(&r->src.addr, &pd->nsaddr,
> -    pd->naf, r->src.neg, pd->kif, act.rtableid)),
> - r->skip[PF_SKIP_SRC_ADDR].ptr);
> - PF_TEST_ATTRIB((PF_MISMATCHAW(&r->dst.addr, &pd->ndaddr, pd->af,
> -    r->dst.neg, NULL, act.rtableid)),
> - r->skip[PF_SKIP_DST_ADDR].ptr);
> -
> - switch (pd->virtual_proto) {
> - case PF_VPROTO_FRAGMENT:
> - /* tcp/udp only. port_op always 0 in other cases */
> - PF_TEST_ATTRIB((r->src.port_op || r->dst.port_op),
> - TAILQ_NEXT(r, entries));
> - PF_TEST_ATTRIB((pd->proto == IPPROTO_TCP && r->flagset),
> - TAILQ_NEXT(r, entries));
> - /* icmp only. type/code always 0 in other cases */
> - PF_TEST_ATTRIB((r->type || r->code),
> - TAILQ_NEXT(r, entries));
> - /* tcp/udp only. {uid|gid}.op always 0 in other cases */
> - PF_TEST_ATTRIB((r->gid.op || r->uid.op),
> - TAILQ_NEXT(r, entries));
> - break;
> -
> - case IPPROTO_TCP:
> - PF_TEST_ATTRIB(((r->flagset & pd->hdr.tcp.th_flags) !=
> -    r->flags),
> - TAILQ_NEXT(r, entries));
> - PF_TEST_ATTRIB((r->os_fingerprint != PF_OSFP_ANY &&
> -    !pf_osfp_match(pf_osfp_fingerprint(pd),
> -    r->os_fingerprint)),
> - TAILQ_NEXT(r, entries));
> - /* FALLTHROUGH */
> -
> - case IPPROTO_UDP:
> - /* tcp/udp only. port_op always 0 in other cases */
> - PF_TEST_ATTRIB((r->src.port_op &&
> -    !pf_match_port(r->src.port_op, r->src.port[0],
> -    r->src.port[1], pd->nsport)),
> - r->skip[PF_SKIP_SRC_PORT].ptr);
> - PF_TEST_ATTRIB((r->dst.port_op &&
> -    !pf_match_port(r->dst.port_op, r->dst.port[0],
> -    r->dst.port[1], pd->ndport)),
> - r->skip[PF_SKIP_DST_PORT].ptr);
> - /* tcp/udp only. uid.op always 0 in other cases */
> - PF_TEST_ATTRIB((r->uid.op && (pd->lookup.done ||
> -    (pd->lookup.done =
> -    pf_socket_lookup(pd), 1)) &&
> -    !pf_match_uid(r->uid.op, r->uid.uid[0],
> -    r->uid.uid[1], pd->lookup.uid)),
> - TAILQ_NEXT(r, entries));
> - /* tcp/udp only. gid.op always 0 in other cases */
> - PF_TEST_ATTRIB((r->gid.op && (pd->lookup.done ||
> -    (pd->lookup.done =
> -    pf_socket_lookup(pd), 1)) &&
> -    !pf_match_gid(r->gid.op, r->gid.gid[0],
> -    r->gid.gid[1], pd->lookup.gid)),
> - TAILQ_NEXT(r, entries));
> - break;
> -
> - case IPPROTO_ICMP:
> - case IPPROTO_ICMPV6:
> - /* icmp only. type always 0 in other cases */
> - PF_TEST_ATTRIB((r->type && r->type != icmptype + 1),
> - TAILQ_NEXT(r, entries));
> - /* icmp only. type always 0 in other cases */
> - PF_TEST_ATTRIB((r->code && r->code != icmpcode + 1),
> - TAILQ_NEXT(r, entries));
> - /* icmp only. don't create states on replies */
> - PF_TEST_ATTRIB((r->keep_state && !state_icmp &&
> -    (r->rule_flag & PFRULE_STATESLOPPY) == 0 &&
> -    icmp_dir != PF_IN),
> - TAILQ_NEXT(r, entries));
> - break;
> -
> - default:
> - break;
> - }
> -
> - PF_TEST_ATTRIB((r->rule_flag & PFRULE_FRAGMENT &&
> -    pd->virtual_proto != PF_VPROTO_FRAGMENT),
> - TAILQ_NEXT(r, entries));
> - PF_TEST_ATTRIB((r->tos && !(r->tos == pd->tos)),
> - TAILQ_NEXT(r, entries));
> - PF_TEST_ATTRIB((r->prob &&
> -    r->prob <= arc4random_uniform(UINT_MAX - 1) + 1),
> - TAILQ_NEXT(r, entries));
> - PF_TEST_ATTRIB((r->match_tag && !pf_match_tag(pd->m, r, &tag)),
> - TAILQ_NEXT(r, entries));
> - PF_TEST_ATTRIB((r->rcv_kif && pf_match_rcvif(pd->m, r) ==
> -    r->rcvifnot),
> - TAILQ_NEXT(r, entries));
> - PF_TEST_ATTRIB((r->prio && (r->prio == PF_PRIO_ZERO ?
> -    0 : r->prio) != pd->m->m_pkthdr.pf.prio),
> - TAILQ_NEXT(r, entries));
> -
> - /* FALLTHROUGH */
> - if (r->tag)
> - tag = r->tag;
> - if (r->anchor == NULL) {
> - if (r->action == PF_MATCH) {
> - if ((ri = pool_get(&pf_rule_item_pl,
> -    PR_NOWAIT)) == NULL) {
> - REASON_SET(reason, PFRES_MEMORY);
> - goto cleanup;
> - }
> - ri->r = r;
> - /* order is irrelevant */
> - SLIST_INSERT_HEAD(&rules, ri, entry);
> - pf_rule_to_actions(r, &act);
> - if (r->rule_flag & PFRULE_AFTO)
> - pd->naf = r->naf;
> - if (pf_get_transaddr(r, pd, sns, &nr) == -1) {
> - REASON_SET(reason, PFRES_TRANSLATE);
> - goto cleanup;
> - }
> -#if NPFLOG > 0
> - if (r->log) {
> - REASON_SET(reason, PFRES_MATCH);
> - PFLOG_PACKET(pd, *reason, r, a, ruleset,
> -    NULL);
> - }
> -#endif /* NPFLOG > 0 */
> - } else {
> - match = asd;
> - *rm = r;
> - *am = a;
> - *rsm = ruleset;
> - arsm = aruleset;
> - }
> -
> -#if NPFLOG > 0
> - if (act.log & PF_LOG_MATCHES)
> - pf_log_matches(pd, r, a, ruleset, &rules);
> -#endif /* NPFLOG > 0 */
> -
> - if (r->quick)
> - break;
> - r = TAILQ_NEXT(r, entries);
> - } else {
> - aruleset = ruleset;
> - pf_step_into_anchor(&asd, &ruleset, &r, &a);
> - }
> -
> - nextrule:
> - if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
> -    &r, &a, &match))
> - break;
> - }
> - r = *rm; /* matching rule */
> - a = *am; /* rule that defines an anchor containing 'r' */
> - ruleset = *rsm; /* ruleset of the anchor defined by the rule 'a' */
> - aruleset = arsm;/* ruleset of the 'a' rule itself */
> + rv = pf_match_rule(&cx, ruleset);
> + if (rv == PF_TEST_FAIL) {
> + /*
> + * Reason has been set in pf_match_rule() already.
> + */
> + goto cleanup;
> + }
> +
> + r = *cx.rm; /* matching rule */
> + a = *cx.am; /* rule that defines an anchor containing 'r' */
> + ruleset = *cx.rsm;/* ruleset of the anchor defined by the rule 'a' */
> + cx.aruleset = cx.arsm;/* ruleset of the 'a' rule itself */
> +
> +
>  
>   /* apply actions for last matching pass/block rule */
> - pf_rule_to_actions(r, &act);
> + pf_rule_to_actions(r, &cx.act);
>   if (r->rule_flag & PFRULE_AFTO)
>   pd->naf = r->naf;
> - if (pf_get_transaddr(r, pd, sns, &nr) == -1) {
> - REASON_SET(reason, PFRES_TRANSLATE);
> + if (pf_get_transaddr(r, pd, cx.sns, &cx.nr) == -1) {
> + REASON_SET(&cx.reason, PFRES_TRANSLATE);
>   goto cleanup;
>   }
> - REASON_SET(reason, PFRES_MATCH);
> + REASON_SET(&cx.reason, PFRES_MATCH);
>  
>  #if NPFLOG > 0
>   if (r->log)
> - PFLOG_PACKET(pd, *reason, r, a, ruleset, NULL);
> - if (act.log & PF_LOG_MATCHES)
> - pf_log_matches(pd, r, a, ruleset, &rules);
> + PFLOG_PACKET(pd, cx.reason, r, cx.a, ruleset, NULL);
> + if (cx.act.log & PF_LOG_MATCHES)
> + pf_log_matches(pd, r, cx.a, ruleset, &cx.rules);
>  #endif /* NPFLOG > 0 */
>  
>   if (pd->virtual_proto != PF_VPROTO_FRAGMENT &&
> @@ -3718,31 +3732,30 @@ pf_test_rule(struct pf_pdesc *pd, struct
>   if (pd->proto == IPPROTO_TCP &&
>      ((r->rule_flag & PFRULE_RETURNRST) ||
>      (r->rule_flag & PFRULE_RETURN)) &&
> -    !(pd->hdr.tcp.th_flags & TH_RST)) {
> - struct tcphdr *th = &pd->hdr.tcp;
> - u_int32_t ack = ntohl(th->th_seq) + pd->p_len;
> +    !(cx.th->th_flags & TH_RST)) {
> + u_int32_t ack = ntohl(cx.th->th_seq) + pd->p_len;
>  
>   if (pf_check_tcp_cksum(pd->m, pd->off,
>      pd->tot_len - pd->off, pd->af))
> - REASON_SET(reason, PFRES_PROTCKSUM);
> + REASON_SET(&cx.reason, PFRES_PROTCKSUM);
>   else {
> - if (th->th_flags & TH_SYN)
> + if (cx.th->th_flags & TH_SYN)
>   ack++;
> - if (th->th_flags & TH_FIN)
> + if (cx.th->th_flags & TH_FIN)
>   ack++;
>   pf_send_tcp(r, pd->af, pd->dst,
> -    pd->src, th->th_dport, th->th_sport,
> -    ntohl(th->th_ack), ack, TH_RST|TH_ACK, 0, 0,
> -    r->return_ttl, 1, 0, pd->rdomain);
> +    pd->src, cx.th->th_dport, cx.th->th_sport,
> +    ntohl(cx.th->th_ack), ack, TH_RST|TH_ACK,
> +    0, 0, r->return_ttl, 1, 0, pd->rdomain);
>   }
>   } else if ((pd->proto != IPPROTO_ICMP ||
> -    ICMP_INFOTYPE(icmptype)) && pd->af == AF_INET &&
> +    ICMP_INFOTYPE(cx.icmptype)) && pd->af == AF_INET &&
>      r->return_icmp)
>   pf_send_icmp(pd->m, r->return_icmp >> 8,
>      r->return_icmp & 255, pd->af, r, pd->rdomain);
>   else if ((pd->proto != IPPROTO_ICMPV6 ||
> -    (icmptype >= ICMP6_ECHO_REQUEST &&
> -    icmptype != ND_REDIRECT)) && pd->af == AF_INET6 &&
> +    (cx.icmptype >= ICMP6_ECHO_REQUEST &&
> +    cx.icmptype != ND_REDIRECT)) && pd->af == AF_INET6 &&
>      r->return_icmp6)
>   pf_send_icmp(pd->m, r->return_icmp6 >> 8,
>      r->return_icmp6 & 255, pd->af, r, pd->rdomain);
> @@ -3751,13 +3764,13 @@ pf_test_rule(struct pf_pdesc *pd, struct
>   if (r->action == PF_DROP)
>   goto cleanup;
>  
> - pf_tag_packet(pd->m, tag, act.rtableid);
> - if (act.rtableid >= 0 &&
> -    rtable_l2(act.rtableid) != pd->rdomain)
> + pf_tag_packet(pd->m, cx.tag, cx.act.rtableid);
> + if (cx.act.rtableid >= 0 &&
> +    rtable_l2(cx.act.rtableid) != pd->rdomain)
>   pd->destchg = 1;
>  
>   if (r->action == PF_PASS && pd->badopts && ! r->allow_opts) {
> - REASON_SET(reason, PFRES_IPOPTIONS);
> + REASON_SET(&cx.reason, PFRES_IPOPTIONS);
>  #if NPFLOG > 0
>   pd->pflog |= PF_LOG_FORCE;
>  #endif /* NPFLOG > 0 */
> @@ -3769,23 +3782,23 @@ pf_test_rule(struct pf_pdesc *pd, struct
>   action = PF_PASS;
>  
>   if (pd->virtual_proto != PF_VPROTO_FRAGMENT
> -    && !state_icmp && r->keep_state) {
> +    && !cx.state_icmp && r->keep_state) {
>  
>   if (r->rule_flag & PFRULE_SRCTRACK &&
> -    pf_insert_src_node(&sns[PF_SN_NONE], r, PF_SN_NONE, pd->af,
> +    pf_insert_src_node(&cx.sns[PF_SN_NONE], r, PF_SN_NONE, pd->af,
>      pd->src, NULL) != 0) {
> - REASON_SET(reason, PFRES_SRCLIMIT);
> + REASON_SET(&cx.reason, PFRES_SRCLIMIT);
>   goto cleanup;
>   }
>  
>   if (r->max_states && (r->states_cur >= r->max_states)) {
>   pf_status.lcounters[LCNT_STATES]++;
> - REASON_SET(reason, PFRES_MAXSTATES);
> + REASON_SET(&cx.reason, PFRES_MAXSTATES);
>   goto cleanup;
>   }
>  
> - action = pf_create_state(pd, r, a, nr, &skw, &sks, &rewrite,
> -    sm, tag, &rules, &act, sns);
> + action = pf_create_state(pd, r, a, cx.nr, &skw, &sks, &rewrite,
> +    sm, cx.tag, &cx.rules, &cx.act, cx.sns);
>  
>   if (action != PF_PASS)
>   goto cleanup;
> @@ -3801,7 +3814,7 @@ pf_test_rule(struct pf_pdesc *pd, struct
>      sk->port[pd->af == pd->naf ? pd->sidx : pd->didx],
>      &sk->addr[pd->af == pd->naf ? pd->didx : pd->sidx],
>      sk->port[pd->af == pd->naf ? pd->didx : pd->sidx],
> -    virtual_type, icmp_dir);
> +    virtual_type, cx.icmp_dir);
>   }
>  
>  #ifdef INET6
> @@ -3810,9 +3823,9 @@ pf_test_rule(struct pf_pdesc *pd, struct
>  #endif /* INET6 */
>  
>   } else {
> - while ((ri = SLIST_FIRST(&rules))) {
> - SLIST_REMOVE_HEAD(&rules, entry);
> - pool_put(&pf_rule_item_pl, ri);
> + while ((cx.ri = SLIST_FIRST(&cx.rules))) {
> + SLIST_REMOVE_HEAD(&cx.rules, entry);
> + pool_put(&pf_rule_item_pl, cx.ri);
>   }
>   }
>  
> @@ -3844,9 +3857,9 @@ pf_test_rule(struct pf_pdesc *pd, struct
>   return (action);
>  
>  cleanup:
> - while ((ri = SLIST_FIRST(&rules))) {
> - SLIST_REMOVE_HEAD(&rules, entry);
> - pool_put(&pf_rule_item_pl, ri);
> + while ((cx.ri = SLIST_FIRST(&cx.rules))) {
> + SLIST_REMOVE_HEAD(&cx.rules, entry);
> + pool_put(&pf_rule_item_pl, cx.ri);
>   }
>  
>   return (action);
> --------8<---------------8<---------------8<------------------8<--------
>

Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

Re: pf: percpu anchor stacks

Alexandr Nedvedicky
Hello Mike,

thank you for looking at my patch. I accept most of your comments.
I believe the items below deserve further discussion.

>  - instead of checking "rv" against 0 in the "break on quick
>    rule or failure" I'd like to see an actual check against
>    PF_TEST_* values so that it's grep'able;

    this is the 'edited' diff to highlight the place, which your comment is
    related to. It shows the change to patch I've sent in earlier mail [1].

[1] http://openbsd-archive.7691.n7.nabble.com/pf-percpu-anchor-stacks-tt314935.html#a315309
--------8<---------------8<---------------8<------------------8<--------
-pf_step_into_anchor(struct pf_test_ctx *cx, struct pf_rule *r)
+pf_step_into_anchor(struct pf_test_ctx *ctx, struct pf_rule *r)
 {
        int     rv;
 
        if (r->anchor_wildcard) {
                struct pf_anchor        *child;
                rv = PF_TEST_OK;
                RB_FOREACH(child, pf_anchor_node, &r->anchor->children) {
-                       rv = pf_match_rule(cx, &child->ruleset);
-                       if (rv != 0) {
+                       rv = pf_match_rule(ctx, &child->ruleset);
+                       if ((rv == PF_TEST_QUICK) || (rv == PF_TEST_FAIL)) {
                                /*
-                                * break on quick rule or failure
+                                * we either hit a rule qith quick action
+                                * (more likely), or hit some runtime
+                                * error (e.g. pool_get() faillure).
                                 */
                                break;
                        }
                }
        } else {
-               rv = pf_match_rule(cx, &r->anchor->ruleset);
-       }
-
-       cx->depth--;
+               rv = pf_match_rule(ctx, &r->anchor->ruleset);
+       }
+
+       ctx->depth--;
 
        return (rv);
 }
--------8<---------------8<---------------8<------------------8<--------

>
>  - s/test_status/action/ as it's done everywhere else?

    I've opted to test_status, because it's something different to 'action'
    as we use it in current code.

    the 'action' usually comes from rule (or state match) and orders, what
    PF should do with packet (pass/block/translate/...)

    the test_result rather indicates a status of rule processing, which is:
        terminate due to failure (PF_TEST_FAIL), caused by runtime error,
        continue (PF_TEST_OK),
        terminate (PF_TEST_QUICK) due to hitting a rule with 'quick' action

>
>  - I'm not certain I like extra set of PASS/BLOCK macros.
>    I know you want to represent the "quick" pass separately,
>    but perhaps there's a way to do it while using PF_PASS...

    I somewhat understand your point, but as I've said earlier, I see 'action'
    and 'test_status' as two distinct things, which I prefer to clearly
    separate.

    However I don't insist on the current code in patch. I think the existing
    enum, which defines PF_PASS/PF_BLOCK/... can be extended. If we will go
    this way, then I would rather do s/test_status/virtual_action
    using virtual, follows the pattern, which got established for
    protocol: proto & virtual_proto


>
> Does this pass pfctl regress tests?

    I'm about to run those tests for OpenBSD.

>
> While I haven't noticed anything criminal here, it makes me
> wonder if it'd be possible to do this change in a few steps:
> factor out rule maching from pf_test_rule and then bring in
> anchor changes?
>

    if I understand you right, you basically want me to make change
    in two steps:

        the first step splits current pf_test_rule() to pf_match_rule() and
        pf_test_rule()

        the second step will kill global anchor stack array by introducing
        a true recursion. The patch will remove pf_step_out_of_anchor()
        function.

    I think I can do it. And also as Theo pointed out there is no rush
    to get that patch to tree.

thanks and
regards
sasha

--------8<---------------8<---------------8<------------------8<--------

diff -r b483ee1b4a65 src/sys/net/pf.c
--- a/src/sys/net/pf.c Tue Mar 28 10:39:14 2017 +0200
+++ b/src/sys/net/pf.c Tue Mar 28 11:44:12 2017 +0200
@@ -119,12 +119,53 @@ u_char pf_tcp_secret[16];
 int pf_tcp_secret_init;
 int pf_tcp_iss_off;
 
-struct pf_anchor_stackframe {
- struct pf_ruleset *rs;
- struct pf_rule *r;
- struct pf_anchor_node *parent;
- struct pf_anchor *child;
-} pf_anchor_stack[64];
+struct pf_test_ctx {
+ int test_status;
+ struct pf_pdesc *pd;
+ struct pf_rule_actions act;
+ u_int8_t icmpcode;
+ u_int8_t icmptype;
+ int icmp_dir;
+ int state_icmp;
+ int tag;
+ u_short reason;
+ struct pf_rule_item *ri;
+ struct pf_src_node *sns[PF_SN_MAX];
+ struct pf_rule_slist rules;
+ struct pf_rule *nr;
+ struct pf_rule **rm;
+ struct pf_rule *a;
+ struct pf_rule **am;
+ struct pf_ruleset **rsm;
+ struct pf_ruleset *arsm;
+ struct pf_ruleset *aruleset;
+ struct tcphdr *th;
+ int depth;
+};
+
+#define PF_ANCHOR_STACK_MAX 64
+
+enum {
+ PF_TEST_FAIL = -1,
+ PF_TEST_OK,
+ PF_TEST_QUICK
+};
+/*
+ * Cannot fold into pf_pdesc directly, unknown storage size outside pf.c.
+ * Keep in sync with union pf_headers in pflog_bpfcopy() in if_pflog.c.
+ */
+union pf_headers {
+ struct tcphdr tcp;
+ struct udphdr udp;
+ struct icmp icmp;
+#ifdef INET6
+ struct icmp6_hdr icmp6;
+ struct mld_hdr mld;
+ struct nd_neighbor_solicit nd_ns;
+#endif /* INET6 */
+};
+
+
 
 struct pool pf_src_tree_pl, pf_rule_pl, pf_queue_pl;
 struct pool pf_state_pl, pf_state_key_pl, pf_state_item_pl;
@@ -211,11 +252,8 @@ struct pf_state *pf_find_state(struct p
     struct pf_state_key_cmp *, u_int, struct mbuf *);
 int pf_src_connlimit(struct pf_state **);
 int pf_match_rcvif(struct mbuf *, struct pf_rule *);
-void pf_step_into_anchor(int *, struct pf_ruleset **,
-    struct pf_rule **, struct pf_rule **);
-int pf_step_out_of_anchor(int *, struct pf_ruleset **,
-     struct pf_rule **, struct pf_rule **,
-     int *);
+int pf_step_into_anchor(struct pf_test_ctx *, struct pf_rule *);
+int pf_match_rule(struct pf_test_ctx *, struct pf_ruleset *);
 void pf_counters_inc(int, struct pf_pdesc *,
     struct pf_state *, struct pf_rule *,
     struct pf_rule *);
@@ -3019,74 +3057,39 @@ pf_tag_packet(struct mbuf *m, int tag, i
  m->m_pkthdr.ph_rtableid = (u_int)rtableid;
 }
 
-void
-pf_step_into_anchor(int *depth, struct pf_ruleset **rs,
-    struct pf_rule **r, struct pf_rule **a)
+int
+pf_step_into_anchor(struct pf_test_ctx *ctx, struct pf_rule *r)
 {
- struct pf_anchor_stackframe *f;
-
- if (*depth >= sizeof(pf_anchor_stack) /
-    sizeof(pf_anchor_stack[0])) {
- log(LOG_ERR, "pf: anchor stack overflow\n");
- *r = TAILQ_NEXT(*r, entries);
- return;
- } else if (a != NULL)
- *a = *r;
- f = pf_anchor_stack + (*depth)++;
- f->rs = *rs;
- f->r = *r;
- if ((*r)->anchor_wildcard) {
- f->parent = &(*r)->anchor->children;
- if ((f->child = RB_MIN(pf_anchor_node, f->parent)) == NULL) {
- *r = NULL;
- return;
- }
- *rs = &f->child->ruleset;
- } else {
- f->parent = NULL;
- f->child = NULL;
- *rs = &(*r)->anchor->ruleset;
- }
- *r = TAILQ_FIRST((*rs)->rules.active.ptr);
-}
-
-int
-pf_step_out_of_anchor(int *depth, struct pf_ruleset **rs,
-    struct pf_rule **r, struct pf_rule **a, int *match)
-{
- struct pf_anchor_stackframe *f;
- int quick = 0;
-
- do {
- if (*depth <= 0)
- break;
- f = pf_anchor_stack + *depth - 1;
- if (f->parent != NULL && f->child != NULL) {
- f->child = RB_NEXT(pf_anchor_node, f->parent, f->child);
- if (f->child != NULL) {
- *rs = &f->child->ruleset;
- *r = TAILQ_FIRST((*rs)->rules.active.ptr);
- if (*r == NULL)
- continue;
- else
- break;
+ int rv;
+
+ if (ctx->depth >= PF_ANCHOR_STACK_MAX) {
+ log(LOG_ERR, "pf_step_into_anchor: stack overflow\n");
+ return (PF_TEST_FAIL);
+ }
+
+ ctx->depth++;
+
+ if (r->anchor_wildcard) {
+ struct pf_anchor *child;
+ rv = PF_TEST_OK;
+ RB_FOREACH(child, pf_anchor_node, &r->anchor->children) {
+ rv = pf_match_rule(ctx, &child->ruleset);
+ if ((rv == PF_TEST_QUICK) || (rv == PF_TEST_FAIL)) {
+ /*
+ * we either hit a rule qith quick action
+ * (more likely), or hit some runtime
+ * error (e.g. pool_get() faillure).
+ */
+ break;
  }
  }
- (*depth)--;
- if (*depth == 0 && a != NULL)
- *a = NULL;
- else if (a != NULL)
- *a = f->r;
- *rs = f->rs;
- if (*match > *depth) {
- *match = *depth;
- if (f->r->quick)
- quick = 1;
- }
- *r = TAILQ_NEXT(f->r, entries);
- } while (*r == NULL);
-
- return (quick);
+ } else {
+ rv = pf_match_rule(ctx, &r->anchor->ruleset);
+ }
+
+ ctx->depth--;
+
+ return (rv);
 }
 
 void
@@ -3453,51 +3456,239 @@ pf_rule_to_actions(struct pf_rule *r, st
  do { \
  if (t) { \
  r = a; \
- goto nextrule; \
+ continue; \
  } \
  } while (0)
 
 int
+pf_match_rule(struct pf_test_ctx *ctx, struct pf_ruleset *ruleset)
+{
+ struct pf_rule *r;
+
+ r = TAILQ_FIRST(ruleset->rules.active.ptr);
+ while (r != NULL) {
+ r->evaluations++;
+ PF_TEST_ATTRIB(
+    (pfi_kif_match(r->kif, ctx->pd->kif) == r->ifnot),
+ r->skip[PF_SKIP_IFP].ptr);
+ PF_TEST_ATTRIB((r->direction && r->direction != ctx->pd->dir),
+ r->skip[PF_SKIP_DIR].ptr);
+ PF_TEST_ATTRIB((r->onrdomain >= 0  &&
+    (r->onrdomain == ctx->pd->rdomain) == r->ifnot),
+ r->skip[PF_SKIP_RDOM].ptr);
+ PF_TEST_ATTRIB((r->af && r->af != ctx->pd->af),
+ r->skip[PF_SKIP_AF].ptr);
+ PF_TEST_ATTRIB((r->proto && r->proto != ctx->pd->proto),
+ r->skip[PF_SKIP_PROTO].ptr);
+ PF_TEST_ATTRIB((PF_MISMATCHAW(&r->src.addr, &ctx->pd->nsaddr,
+    ctx->pd->naf, r->src.neg, ctx->pd->kif,
+    ctx->act.rtableid)),
+ r->skip[PF_SKIP_SRC_ADDR].ptr);
+ PF_TEST_ATTRIB((PF_MISMATCHAW(&r->dst.addr, &ctx->pd->ndaddr,
+    ctx->pd->af, r->dst.neg, NULL, ctx->act.rtableid)),
+ r->skip[PF_SKIP_DST_ADDR].ptr);
+
+ switch (ctx->pd->virtual_proto) {
+ case PF_VPROTO_FRAGMENT:
+ /* tcp/udp only. port_op always 0 in other cases */
+ PF_TEST_ATTRIB((r->src.port_op || r->dst.port_op),
+ TAILQ_NEXT(r, entries));
+ PF_TEST_ATTRIB((ctx->pd->proto == IPPROTO_TCP &&
+    r->flagset),
+ TAILQ_NEXT(r, entries));
+ /* icmp only. type/code always 0 in other cases */
+ PF_TEST_ATTRIB((r->type || r->code),
+ TAILQ_NEXT(r, entries));
+ /* tcp/udp only. {uid|gid}.op always 0 in other cases */
+ PF_TEST_ATTRIB((r->gid.op || r->uid.op),
+ TAILQ_NEXT(r, entries));
+ break;
+
+ case IPPROTO_TCP:
+ PF_TEST_ATTRIB(((r->flagset & ctx->th->th_flags) !=
+    r->flags),
+ TAILQ_NEXT(r, entries));
+ PF_TEST_ATTRIB((r->os_fingerprint != PF_OSFP_ANY &&
+    !pf_osfp_match(pf_osfp_fingerprint(ctx->pd),
+    r->os_fingerprint)),
+ TAILQ_NEXT(r, entries));
+ /* FALLTHROUGH */
+
+ case IPPROTO_UDP:
+ /* tcp/udp only. port_op always 0 in other cases */
+ PF_TEST_ATTRIB((r->src.port_op &&
+    !pf_match_port(r->src.port_op, r->src.port[0],
+    r->src.port[1], ctx->pd->nsport)),
+ r->skip[PF_SKIP_SRC_PORT].ptr);
+ PF_TEST_ATTRIB((r->dst.port_op &&
+    !pf_match_port(r->dst.port_op, r->dst.port[0],
+    r->dst.port[1], ctx->pd->ndport)),
+ r->skip[PF_SKIP_DST_PORT].ptr);
+ /* tcp/udp only. uid.op always 0 in other cases */
+ PF_TEST_ATTRIB((r->uid.op && (ctx->pd->lookup.done ||
+    (ctx->pd->lookup.done =
+    pf_socket_lookup(ctx->pd), 1)) &&
+    !pf_match_uid(r->uid.op, r->uid.uid[0],
+    r->uid.uid[1], ctx->pd->lookup.uid)),
+ TAILQ_NEXT(r, entries));
+ /* tcp/udp only. gid.op always 0 in other cases */
+ PF_TEST_ATTRIB((r->gid.op && (ctx->pd->lookup.done ||
+    (ctx->pd->lookup.done =
+    pf_socket_lookup(ctx->pd), 1)) &&
+    !pf_match_gid(r->gid.op, r->gid.gid[0],
+    r->gid.gid[1], ctx->pd->lookup.gid)),
+ TAILQ_NEXT(r, entries));
+ break;
+
+ case IPPROTO_ICMP:
+ case IPPROTO_ICMPV6:
+ /* icmp only. type always 0 in other cases */
+ PF_TEST_ATTRIB((r->type &&
+    r->type != ctx->icmptype + 1),
+ TAILQ_NEXT(r, entries));
+ /* icmp only. type always 0 in other cases */
+ PF_TEST_ATTRIB((r->code &&
+    r->code != ctx->icmpcode + 1),
+ TAILQ_NEXT(r, entries));
+ /* icmp only. don't create states on replies */
+ PF_TEST_ATTRIB((r->keep_state && !ctx->state_icmp &&
+    (r->rule_flag & PFRULE_STATESLOPPY) == 0 &&
+    ctx->icmp_dir != PF_IN),
+ TAILQ_NEXT(r, entries));
+ break;
+
+ default:
+ break;
+ }
+
+ PF_TEST_ATTRIB((r->rule_flag & PFRULE_FRAGMENT &&
+    ctx->pd->virtual_proto != PF_VPROTO_FRAGMENT),
+ TAILQ_NEXT(r, entries));
+ PF_TEST_ATTRIB((r->tos && !(r->tos == ctx->pd->tos)),
+ TAILQ_NEXT(r, entries));
+ PF_TEST_ATTRIB((r->prob &&
+    r->prob <= arc4random_uniform(UINT_MAX - 1) + 1),
+ TAILQ_NEXT(r, entries));
+ PF_TEST_ATTRIB((r->match_tag &&
+    !pf_match_tag(ctx->pd->m, r, &ctx->tag)),
+ TAILQ_NEXT(r, entries));
+ PF_TEST_ATTRIB((r->rcv_kif && pf_match_rcvif(ctx->pd->m, r) ==
+    r->rcvifnot),
+ TAILQ_NEXT(r, entries));
+ PF_TEST_ATTRIB((r->prio &&
+    (r->prio == PF_PRIO_ZERO ? 0 : r->prio) !=
+    ctx->pd->m->m_pkthdr.pf.prio),
+ TAILQ_NEXT(r, entries));
+
+ /* FALLTHROUGH */
+ if (r->tag)
+ ctx->tag = r->tag;
+ if (r->anchor == NULL) {
+ if (r->action == PF_MATCH) {
+ if ((ctx->ri = pool_get(&pf_rule_item_pl,
+    PR_NOWAIT)) == NULL) {
+ REASON_SET(&ctx->reason, PFRES_MEMORY);
+ ctx->test_status = PF_TEST_FAIL;
+ break;
+ }
+ ctx->ri->r = r;
+ /* order is irrelevant */
+ SLIST_INSERT_HEAD(&ctx->rules, ctx->ri, entry);
+ ctx->ri = NULL;
+ pf_rule_to_actions(r, &ctx->act);
+ if (r->rule_flag & PFRULE_AFTO)
+ ctx->pd->naf = r->naf;
+ if (pf_get_transaddr(r, ctx->pd, ctx->sns,
+    &ctx->nr) == -1) {
+ REASON_SET(&ctx->reason,
+    PFRES_TRANSLATE);
+ ctx->test_status = PF_TEST_FAIL;
+ break;
+ }
+#if NPFLOG > 0
+ if (r->log) {
+ REASON_SET(&ctx->reason, PFRES_MATCH);
+ PFLOG_PACKET(ctx->pd, ctx->reason, r,
+    ctx->a, ruleset, NULL);
+ }
+#endif /* NPFLOG > 0 */
+ } else {
+ /*
+ * found matching r
+ */
+ *ctx->rm = r;
+ /*
+ * anchor, with ruleset, where r belongs to
+ */
+ *ctx->am = ctx->a;
+ /*
+ * ruleset where r belongs to
+ */
+ *ctx->rsm = ruleset;
+ /*
+ * ruleset, where anchor belongs to.
+ */
+ ctx->arsm = ctx->aruleset;
+ }
+
+#if NPFLOG > 0
+ if (ctx->act.log & PF_LOG_MATCHES)
+ pf_log_matches(ctx->pd, r, ctx->a, ruleset,
+    &ctx->rules);
+#endif /* NPFLOG > 0 */
+
+ if (r->quick) {
+ ctx->test_status = PF_TEST_QUICK;
+ break;
+ }
+ } else {
+ ctx->a = r; /* remember anchor */
+ ctx->aruleset = ruleset; /* and its ruleset */
+ if (pf_step_into_anchor(ctx, r) != PF_TEST_OK) {
+ break;
+ }
+ }
+ r = TAILQ_NEXT(r, entries);
+ }
+
+ return (ctx->test_status);
+}
+
+int
 pf_test_rule(struct pf_pdesc *pd, struct pf_rule **rm, struct pf_state **sm,
     struct pf_rule **am, struct pf_ruleset **rsm, u_short *reason)
 {
- struct pf_rule *r;
- struct pf_rule *nr = NULL;
+ struct pf_rule *r = NULL;
  struct pf_rule *a = NULL;
- struct pf_ruleset *arsm = NULL;
- struct pf_ruleset *aruleset = NULL;
  struct pf_ruleset *ruleset = NULL;
- struct pf_rule_slist rules;
- struct pf_rule_item *ri;
- struct pf_src_node *sns[PF_SN_MAX];
  struct pf_state_key *skw = NULL, *sks = NULL;
- struct pf_rule_actions act;
  int rewrite = 0;
- int tag = -1;
- int asd = 0;
- int match = 0;
- int state_icmp = 0, icmp_dir = 0;
  u_int16_t virtual_type, virtual_id;
- u_int8_t icmptype = 0, icmpcode = 0;
  int action = PF_DROP;
-
- bzero(&act, sizeof(act));
- bzero(sns, sizeof(sns));
- act.rtableid = pd->rdomain;
- SLIST_INIT(&rules);
+ struct pf_test_ctx ctx;
+ int rv;
+
+ bzero(&ctx, sizeof(ctx));
+ ctx.pd = pd;
+ ctx.rm = rm;
+ ctx.am = am;
+ ctx.rsm = rsm;
+ ctx.th = &pd->hdr.tcp;
+ ctx.act.rtableid = pd->rdomain;
+ SLIST_INIT(&ctx.rules);
 
  if (pd->dir == PF_IN && if_congested()) {
- REASON_SET(reason, PFRES_CONGEST);
+ REASON_SET(&ctx.reason, PFRES_CONGEST);
  return (PF_DROP);
  }
 
  switch (pd->virtual_proto) {
  case IPPROTO_ICMP:
- icmptype = pd->hdr.icmp.icmp_type;
- icmpcode = pd->hdr.icmp.icmp_code;
- state_icmp = pf_icmp_mapping(pd, icmptype,
-    &icmp_dir, &virtual_id, &virtual_type);
- if (icmp_dir == PF_IN) {
+ ctx.icmptype = pd->hdr.icmp.icmp_type;
+ ctx.icmpcode = pd->hdr.icmp.icmp_code;
+ ctx.state_icmp = pf_icmp_mapping(pd, ctx.icmptype,
+    &ctx.icmp_dir, &virtual_id, &virtual_type);
+ if (ctx.icmp_dir == PF_IN) {
  pd->osport = pd->nsport = virtual_id;
  pd->odport = pd->ndport = virtual_type;
  } else {
@@ -3507,11 +3698,11 @@ pf_test_rule(struct pf_pdesc *pd, struct
  break;
 #ifdef INET6
  case IPPROTO_ICMPV6:
- icmptype = pd->hdr.icmp6.icmp6_type;
- icmpcode = pd->hdr.icmp6.icmp6_code;
- state_icmp = pf_icmp_mapping(pd, icmptype,
-    &icmp_dir, &virtual_id, &virtual_type);
- if (icmp_dir == PF_IN) {
+ ctx.icmptype = pd->hdr.icmp6.icmp6_type;
+ ctx.icmpcode = pd->hdr.icmp6.icmp6_code;
+ ctx.state_icmp = pf_icmp_mapping(pd, ctx.icmptype,
+    &ctx.icmp_dir, &virtual_id, &virtual_type);
+ if (ctx.icmp_dir == PF_IN) {
  pd->osport = pd->nsport = virtual_id;
  pd->odport = pd->ndport = virtual_type;
  } else {
@@ -3523,191 +3714,36 @@ pf_test_rule(struct pf_pdesc *pd, struct
  }
 
  ruleset = &pf_main_ruleset;
- r = TAILQ_FIRST(pf_main_ruleset.rules.active.ptr);
- while (r != NULL) {
- if (r->rule_flag & PFRULE_EXPIRED) {
- r = TAILQ_NEXT(r, entries);
- goto nextrule;
- }
- r->evaluations++;
- PF_TEST_ATTRIB((pfi_kif_match(r->kif, pd->kif) == r->ifnot),
- r->skip[PF_SKIP_IFP].ptr);
- PF_TEST_ATTRIB((r->direction && r->direction != pd->dir),
- r->skip[PF_SKIP_DIR].ptr);
- PF_TEST_ATTRIB((r->onrdomain >= 0  &&
-    (r->onrdomain == pd->rdomain) == r->ifnot),
- r->skip[PF_SKIP_RDOM].ptr);
- PF_TEST_ATTRIB((r->af && r->af != pd->af),
- r->skip[PF_SKIP_AF].ptr);
- PF_TEST_ATTRIB((r->proto && r->proto != pd->proto),
- r->skip[PF_SKIP_PROTO].ptr);
- PF_TEST_ATTRIB((PF_MISMATCHAW(&r->src.addr, &pd->nsaddr,
-    pd->naf, r->src.neg, pd->kif, act.rtableid)),
- r->skip[PF_SKIP_SRC_ADDR].ptr);
- PF_TEST_ATTRIB((PF_MISMATCHAW(&r->dst.addr, &pd->ndaddr, pd->af,
-    r->dst.neg, NULL, act.rtableid)),
- r->skip[PF_SKIP_DST_ADDR].ptr);
-
- switch (pd->virtual_proto) {
- case PF_VPROTO_FRAGMENT:
- /* tcp/udp only. port_op always 0 in other cases */
- PF_TEST_ATTRIB((r->src.port_op || r->dst.port_op),
- TAILQ_NEXT(r, entries));
- PF_TEST_ATTRIB((pd->proto == IPPROTO_TCP && r->flagset),
- TAILQ_NEXT(r, entries));
- /* icmp only. type/code always 0 in other cases */
- PF_TEST_ATTRIB((r->type || r->code),
- TAILQ_NEXT(r, entries));
- /* tcp/udp only. {uid|gid}.op always 0 in other cases */
- PF_TEST_ATTRIB((r->gid.op || r->uid.op),
- TAILQ_NEXT(r, entries));
- break;
-
- case IPPROTO_TCP:
- PF_TEST_ATTRIB(((r->flagset & pd->hdr.tcp.th_flags) !=
-    r->flags),
- TAILQ_NEXT(r, entries));
- PF_TEST_ATTRIB((r->os_fingerprint != PF_OSFP_ANY &&
-    !pf_osfp_match(pf_osfp_fingerprint(pd),
-    r->os_fingerprint)),
- TAILQ_NEXT(r, entries));
- /* FALLTHROUGH */
-
- case IPPROTO_UDP:
- /* tcp/udp only. port_op always 0 in other cases */
- PF_TEST_ATTRIB((r->src.port_op &&
-    !pf_match_port(r->src.port_op, r->src.port[0],
-    r->src.port[1], pd->nsport)),
- r->skip[PF_SKIP_SRC_PORT].ptr);
- PF_TEST_ATTRIB((r->dst.port_op &&
-    !pf_match_port(r->dst.port_op, r->dst.port[0],
-    r->dst.port[1], pd->ndport)),
- r->skip[PF_SKIP_DST_PORT].ptr);
- /* tcp/udp only. uid.op always 0 in other cases */
- PF_TEST_ATTRIB((r->uid.op && (pd->lookup.done ||
-    (pd->lookup.done =
-    pf_socket_lookup(pd), 1)) &&
-    !pf_match_uid(r->uid.op, r->uid.uid[0],
-    r->uid.uid[1], pd->lookup.uid)),
- TAILQ_NEXT(r, entries));
- /* tcp/udp only. gid.op always 0 in other cases */
- PF_TEST_ATTRIB((r->gid.op && (pd->lookup.done ||
-    (pd->lookup.done =
-    pf_socket_lookup(pd), 1)) &&
-    !pf_match_gid(r->gid.op, r->gid.gid[0],
-    r->gid.gid[1], pd->lookup.gid)),
- TAILQ_NEXT(r, entries));
- break;
-
- case IPPROTO_ICMP:
- case IPPROTO_ICMPV6:
- /* icmp only. type always 0 in other cases */
- PF_TEST_ATTRIB((r->type && r->type != icmptype + 1),
- TAILQ_NEXT(r, entries));
- /* icmp only. type always 0 in other cases */
- PF_TEST_ATTRIB((r->code && r->code != icmpcode + 1),
- TAILQ_NEXT(r, entries));
- /* icmp only. don't create states on replies */
- PF_TEST_ATTRIB((r->keep_state && !state_icmp &&
-    (r->rule_flag & PFRULE_STATESLOPPY) == 0 &&
-    icmp_dir != PF_IN),
- TAILQ_NEXT(r, entries));
- break;
-
- default:
- break;
- }
-
- PF_TEST_ATTRIB((r->rule_flag & PFRULE_FRAGMENT &&
-    pd->virtual_proto != PF_VPROTO_FRAGMENT),
- TAILQ_NEXT(r, entries));
- PF_TEST_ATTRIB((r->tos && !(r->tos == pd->tos)),
- TAILQ_NEXT(r, entries));
- PF_TEST_ATTRIB((r->prob &&
-    r->prob <= arc4random_uniform(UINT_MAX - 1) + 1),
- TAILQ_NEXT(r, entries));
- PF_TEST_ATTRIB((r->match_tag && !pf_match_tag(pd->m, r, &tag)),
- TAILQ_NEXT(r, entries));
- PF_TEST_ATTRIB((r->rcv_kif && pf_match_rcvif(pd->m, r) ==
-    r->rcvifnot),
- TAILQ_NEXT(r, entries));
- PF_TEST_ATTRIB((r->prio && (r->prio == PF_PRIO_ZERO ?
-    0 : r->prio) != pd->m->m_pkthdr.pf.prio),
- TAILQ_NEXT(r, entries));
-
- /* FALLTHROUGH */
- if (r->tag)
- tag = r->tag;
- if (r->anchor == NULL) {
- if (r->action == PF_MATCH) {
- if ((ri = pool_get(&pf_rule_item_pl,
-    PR_NOWAIT)) == NULL) {
- REASON_SET(reason, PFRES_MEMORY);
- goto cleanup;
- }
- ri->r = r;
- /* order is irrelevant */
- SLIST_INSERT_HEAD(&rules, ri, entry);
- pf_rule_to_actions(r, &act);
- if (r->rule_flag & PFRULE_AFTO)
- pd->naf = r->naf;
- if (pf_get_transaddr(r, pd, sns, &nr) == -1) {
- REASON_SET(reason, PFRES_TRANSLATE);
- goto cleanup;
- }
-#if NPFLOG > 0
- if (r->log) {
- REASON_SET(reason, PFRES_MATCH);
- PFLOG_PACKET(pd, *reason, r, a, ruleset,
-    NULL);
- }
-#endif /* NPFLOG > 0 */
- } else {
- match = asd;
- *rm = r;
- *am = a;
- *rsm = ruleset;
- arsm = aruleset;
- }
-
-#if NPFLOG > 0
- if (act.log & PF_LOG_MATCHES)
- pf_log_matches(pd, r, a, ruleset, &rules);
-#endif /* NPFLOG > 0 */
-
- if (r->quick)
- break;
- r = TAILQ_NEXT(r, entries);
- } else {
- aruleset = ruleset;
- pf_step_into_anchor(&asd, &ruleset, &r, &a);
- }
-
- nextrule:
- if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
-    &r, &a, &match))
- break;
- }
- r = *rm; /* matching rule */
- a = *am; /* rule that defines an anchor containing 'r' */
- ruleset = *rsm; /* ruleset of the anchor defined by the rule 'a' */
- aruleset = arsm;/* ruleset of the 'a' rule itself */
+ rv = pf_match_rule(&ctx, ruleset);
+ if (rv == PF_TEST_FAIL) {
+ /*
+ * Reason has been set in pf_match_rule() already.
+ */
+ goto cleanup;
+ }
+
+ r = *ctx.rm; /* matching rule */
+ a = *ctx.am; /* rule that defines an anchor containing 'r' */
+ ruleset = *ctx.rsm;/* ruleset of the anchor defined by the rule 'a' */
+ ctx.aruleset = ctx.arsm;/* ruleset of the 'a' rule itself */
+
+
 
  /* apply actions for last matching pass/block rule */
- pf_rule_to_actions(r, &act);
+ pf_rule_to_actions(r, &ctx.act);
  if (r->rule_flag & PFRULE_AFTO)
  pd->naf = r->naf;
- if (pf_get_transaddr(r, pd, sns, &nr) == -1) {
- REASON_SET(reason, PFRES_TRANSLATE);
+ if (pf_get_transaddr(r, pd, ctx.sns, &ctx.nr) == -1) {
+ REASON_SET(&ctx.reason, PFRES_TRANSLATE);
  goto cleanup;
  }
- REASON_SET(reason, PFRES_MATCH);
+ REASON_SET(&ctx.reason, PFRES_MATCH);
 
 #if NPFLOG > 0
  if (r->log)
- PFLOG_PACKET(pd, *reason, r, a, ruleset, NULL);
- if (act.log & PF_LOG_MATCHES)
- pf_log_matches(pd, r, a, ruleset, &rules);
+ PFLOG_PACKET(pd, ctx.reason, r, ctx.a, ruleset, NULL);
+ if (ctx.act.log & PF_LOG_MATCHES)
+ pf_log_matches(pd, r, ctx.a, ruleset, &ctx.rules);
 #endif /* NPFLOG > 0 */
 
  if (pd->virtual_proto != PF_VPROTO_FRAGMENT &&
@@ -3718,31 +3754,32 @@ pf_test_rule(struct pf_pdesc *pd, struct
  if (pd->proto == IPPROTO_TCP &&
     ((r->rule_flag & PFRULE_RETURNRST) ||
     (r->rule_flag & PFRULE_RETURN)) &&
-    !(pd->hdr.tcp.th_flags & TH_RST)) {
- struct tcphdr *th = &pd->hdr.tcp;
- u_int32_t ack = ntohl(th->th_seq) + pd->p_len;
+    !(ctx.th->th_flags & TH_RST)) {
+ u_int32_t ack =
+    ntohl(ctx.th->th_seq) + pd->p_len;
 
  if (pf_check_tcp_cksum(pd->m, pd->off,
     pd->tot_len - pd->off, pd->af))
- REASON_SET(reason, PFRES_PROTCKSUM);
+ REASON_SET(&ctx.reason, PFRES_PROTCKSUM);
  else {
- if (th->th_flags & TH_SYN)
+ if (ctx.th->th_flags & TH_SYN)
  ack++;
- if (th->th_flags & TH_FIN)
+ if (ctx.th->th_flags & TH_FIN)
  ack++;
  pf_send_tcp(r, pd->af, pd->dst,
-    pd->src, th->th_dport, th->th_sport,
-    ntohl(th->th_ack), ack, TH_RST|TH_ACK, 0, 0,
-    r->return_ttl, 1, 0, pd->rdomain);
+    pd->src, ctx.th->th_dport,
+    ctx.th->th_sport, ntohl(ctx.th->th_ack),
+    ack, TH_RST|TH_ACK, 0, 0, r->return_ttl,
+    1, 0, pd->rdomain);
  }
  } else if ((pd->proto != IPPROTO_ICMP ||
-    ICMP_INFOTYPE(icmptype)) && pd->af == AF_INET &&
+    ICMP_INFOTYPE(ctx.icmptype)) && pd->af == AF_INET &&
     r->return_icmp)
  pf_send_icmp(pd->m, r->return_icmp >> 8,
     r->return_icmp & 255, pd->af, r, pd->rdomain);
  else if ((pd->proto != IPPROTO_ICMPV6 ||
-    (icmptype >= ICMP6_ECHO_REQUEST &&
-    icmptype != ND_REDIRECT)) && pd->af == AF_INET6 &&
+    (ctx.icmptype >= ICMP6_ECHO_REQUEST &&
+    ctx.icmptype != ND_REDIRECT)) && pd->af == AF_INET6 &&
     r->return_icmp6)
  pf_send_icmp(pd->m, r->return_icmp6 >> 8,
     r->return_icmp6 & 255, pd->af, r, pd->rdomain);
@@ -3751,13 +3788,13 @@ pf_test_rule(struct pf_pdesc *pd, struct
  if (r->action == PF_DROP)
  goto cleanup;
 
- pf_tag_packet(pd->m, tag, act.rtableid);
- if (act.rtableid >= 0 &&
-    rtable_l2(act.rtableid) != pd->rdomain)
+ pf_tag_packet(pd->m, ctx.tag, ctx.act.rtableid);
+ if (ctx.act.rtableid >= 0 &&
+    rtable_l2(ctx.act.rtableid) != pd->rdomain)
  pd->destchg = 1;
 
  if (r->action == PF_PASS && pd->badopts && ! r->allow_opts) {
- REASON_SET(reason, PFRES_IPOPTIONS);
+ REASON_SET(&ctx.reason, PFRES_IPOPTIONS);
 #if NPFLOG > 0
  pd->pflog |= PF_LOG_FORCE;
 #endif /* NPFLOG > 0 */
@@ -3769,23 +3806,23 @@ pf_test_rule(struct pf_pdesc *pd, struct
  action = PF_PASS;
 
  if (pd->virtual_proto != PF_VPROTO_FRAGMENT
-    && !state_icmp && r->keep_state) {
+    && !ctx.state_icmp && r->keep_state) {
 
  if (r->rule_flag & PFRULE_SRCTRACK &&
-    pf_insert_src_node(&sns[PF_SN_NONE], r, PF_SN_NONE, pd->af,
-    pd->src, NULL) != 0) {
- REASON_SET(reason, PFRES_SRCLIMIT);
+    pf_insert_src_node(&ctx.sns[PF_SN_NONE], r, PF_SN_NONE,
+    pd->af, pd->src, NULL) != 0) {
+ REASON_SET(&ctx.reason, PFRES_SRCLIMIT);
  goto cleanup;
  }
 
  if (r->max_states && (r->states_cur >= r->max_states)) {
  pf_status.lcounters[LCNT_STATES]++;
- REASON_SET(reason, PFRES_MAXSTATES);
+ REASON_SET(&ctx.reason, PFRES_MAXSTATES);
  goto cleanup;
  }
 
- action = pf_create_state(pd, r, a, nr, &skw, &sks, &rewrite,
-    sm, tag, &rules, &act, sns);
+ action = pf_create_state(pd, r, a, ctx.nr, &skw, &sks,
+    &rewrite, sm, ctx.tag, &ctx.rules, &ctx.act, ctx.sns);
 
  if (action != PF_PASS)
  goto cleanup;
@@ -3801,7 +3838,7 @@ pf_test_rule(struct pf_pdesc *pd, struct
     sk->port[pd->af == pd->naf ? pd->sidx : pd->didx],
     &sk->addr[pd->af == pd->naf ? pd->didx : pd->sidx],
     sk->port[pd->af == pd->naf ? pd->didx : pd->sidx],
-    virtual_type, icmp_dir);
+    virtual_type, ctx.icmp_dir);
  }
 
 #ifdef INET6
@@ -3810,9 +3847,9 @@ pf_test_rule(struct pf_pdesc *pd, struct
 #endif /* INET6 */
 
  } else {
- while ((ri = SLIST_FIRST(&rules))) {
- SLIST_REMOVE_HEAD(&rules, entry);
- pool_put(&pf_rule_item_pl, ri);
+ while ((ctx.ri = SLIST_FIRST(&ctx.rules))) {
+ SLIST_REMOVE_HEAD(&ctx.rules, entry);
+ pool_put(&pf_rule_item_pl, ctx.ri);
  }
  }
 
@@ -3844,9 +3881,9 @@ pf_test_rule(struct pf_pdesc *pd, struct
  return (action);
 
 cleanup:
- while ((ri = SLIST_FIRST(&rules))) {
- SLIST_REMOVE_HEAD(&rules, entry);
- pool_put(&pf_rule_item_pl, ri);
+ while ((ctx.ri = SLIST_FIRST(&ctx.rules))) {
+ SLIST_REMOVE_HEAD(&ctx.rules, entry);
+ pool_put(&pf_rule_item_pl, ctx.ri);
  }
 
  return (action);

Loading...