next step on the way to real policy routing

classic Classic list List threaded Threaded
2 messages Options
Reply | Threaded
Open this post in threaded view
|

next step on the way to real policy routing

Henning Brauer-2
we're getting closer to have real policy routing - this diff gives us
multiple routing tables, however, they're not accessible from userland
(aka route(8) yet) or beeing used for routing. nontheless this diff
needs testing and review. please do so.

this is important, and it is important that we're reasonably fast, and
I need help. I can't do such big changes in one of the most twisted
parts of teh kernel without getting help in testing and review.

this diff does break netstat -r, this is beeing worked on. there might
be more userland utils (I doubt that tho) that try to snoop the routing
table via kvm, that will not work any more.

Index: route.c
===================================================================
RCS file: /cvs/src/sys/net/route.c,v
retrieving revision 1.70
diff -u -p -r1.70 route.c
--- route.c 22 Mar 2006 14:37:44 -0000 1.70
+++ route.c 30 Mar 2006 11:19:44 -0000
@@ -130,9 +130,12 @@ struct ifaddr *encap_findgwifa(struct so
 
 #define SA(p) ((struct sockaddr *)(p))
 
-struct route_cb route_cb;
-struct rtstat rtstat;
-struct radix_node_head *rt_tables[AF_MAX+1];
+struct route_cb   route_cb;
+struct rtstat   rtstat;
+struct radix_node_head ***rt_tables;
+u_int8_t   af2rtafidx[AF_MAX+1];
+u_int8_t   rtafidx_max;
+u_int   rtbl_cnt = 0;
 
 int rttrash; /* routes not in table but not freed */
 struct sockaddr wildcard; /* zero cookie for wildcard searches */
@@ -140,6 +143,8 @@ struct sockaddr wildcard; /* zero cooki
 struct pool rtentry_pool; /* pool for rtentry structures */
 struct pool rttimer_pool; /* pool for rttimer structures */
 
+int rtable_init(struct radix_node_head ***);
+int rtable_add(char *);
 int okaytoclone(u_int, int);
 int rtdeletemsg(struct rtentry *);
 int rtflushclone1(struct radix_node *, void *);
@@ -147,6 +152,7 @@ void rtflushclone(struct radix_node_head
 int rt_if_remove_rtdelete(struct radix_node *, void *);
 
 #define LABELID_MAX 50000
+#define RTBL_CNT_INC 4 /* allocate rtables in chunks of 4 */
 
 struct rt_label {
  TAILQ_ENTRY(rt_label) rtl_entry;
@@ -165,14 +171,35 @@ encap_findgwifa(struct sockaddr *gw)
 }
 #endif
 
-void
-rtable_init(void **table)
+int
+rtable_init(struct radix_node_head ***table)
 {
- struct domain *dom;
+ void **p;
+ struct domain *dom;
+ u_int8_t  i;
+
+ bzero(af2rtafidx, sizeof(af2rtafidx));
+ rtafidx_max = i = 0;
+
+ /* 1st pass: find out how many tables to allocate */
  for (dom = domains; dom != NULL; dom = dom->dom_next)
  if (dom->dom_rtattach)
- dom->dom_rtattach(&table[dom->dom_family],
+ rtafidx_max++;
+
+ if ((p = malloc(sizeof(void *) * (rtafidx_max + 1), M_RTABLE,
+    M_NOWAIT)) == NULL)
+ return (-1);
+ bzero(p, sizeof(void *) * (rtafidx_max + 1));
+
+ /* 2nd pass: attach */
+ for (dom = domains; dom != NULL; dom = dom->dom_next)
+ if (dom->dom_rtattach) {
+ af2rtafidx[dom->dom_family] = i++;
+ dom->dom_rtattach(&p[af2rtafidx[dom->dom_family]],
     dom->dom_rtoffset);
+ }
+
+ *table = (struct radix_node_head **)p;
 }
 
 void
@@ -181,7 +208,34 @@ route_init()
  pool_init(&rtentry_pool, sizeof(struct rtentry), 0, 0, 0, "rtentpl",
     NULL);
  rn_init(); /* initialize all zeroes, all ones, mask table */
- rtable_init((void **)rt_tables);
+ if (rtable_add("main") == -1)
+ panic("route_init rtable_add");
+}
+
+int
+rtable_add(char *tblname)
+{
+ u_int i;
+ void *p;
+
+ for (i = 0; i < rtbl_cnt; i++)
+ if (rt_tables[i] == NULL)
+ break;
+
+ if (i == rtbl_cnt) {
+ rtbl_cnt += RTBL_CNT_INC;
+ if ((p = malloc(sizeof(void *) * rtbl_cnt, M_RTABLE,
+    M_NOWAIT)) == NULL)
+ return (-1);
+ bzero(p, sizeof(void *) * rtbl_cnt);
+ if (i > 0) {
+ bcopy(rt_tables, p, sizeof(void *) * i);
+ free(rt_tables, M_RTABLE);
+ }
+ rt_tables = p;
+ }
+
+ return (rtable_init(&rt_tables[i]));
 }
 
 void
@@ -205,13 +259,14 @@ okaytoclone(u_int flags, int howstrict)
 struct rtentry *
 rtalloc2(struct sockaddr *dst, int report, int howstrict)
 {
- struct radix_node_head *rnh = rt_tables[dst->sa_family];
+ struct radix_node_head *rnh;
  struct rtentry *rt;
  struct radix_node *rn;
  struct rtentry *newrt = 0;
  struct rt_addrinfo info;
  int s = splnet(), err = 0, msgtype = RTM_MISS;
 
+ rnh = rt_gettable(dst->sa_family, 0);
  if (rnh && (rn = rnh->rnh_matchaddr((caddr_t)dst, rnh)) &&
     ((rn->rn_flags & RNF_ROOT) == 0)) {
  newrt = rt = (struct rtentry *)rn;
@@ -257,13 +312,14 @@ rtalloc(struct route *ro)
 struct rtentry *
 rtalloc1(struct sockaddr *dst, int report)
 {
- struct radix_node_head *rnh = rt_tables[dst->sa_family];
+ struct radix_node_head *rnh;
  struct rtentry *rt;
  struct radix_node *rn;
  struct rtentry *newrt = 0;
  struct rt_addrinfo info;
  int s = splsoftnet(), err = 0, msgtype = RTM_MISS;
 
+ rnh = rt_gettable(dst->sa_family, 0);
  if (rnh && (rn = rnh->rnh_matchaddr((caddr_t)dst, rnh)) &&
     ((rn->rn_flags & RNF_ROOT) == 0)) {
  newrt = rt = (struct rtentry *)rn;
@@ -653,7 +709,7 @@ rtrequest1(int req, struct rt_addrinfo *
  struct sockaddr_rtlabel *sa_rl;
 #define senderr(x) { error = x ; goto bad; }
 
- if ((rnh = rt_tables[info->rti_info[RTAX_DST]->sa_family]) == 0)
+ if ((rnh = rt_gettable(info->rti_info[RTAX_DST]->sa_family, 0)) == NULL)
  senderr(EAFNOSUPPORT);
  if (info->rti_flags & RTF_HOST)
  info->rti_info[RTAX_NETMASK] = NULL;
@@ -1123,10 +1179,9 @@ rt_timer_add(struct rtentry *rt, void (*
 }
 
 struct radix_node_head *
-rt_gettable(sa_family_t af, int id)
+rt_gettable(sa_family_t af, u_int id)
 {
- /* ignore id for now */
- return (rt_tables[af]);
+ return (rt_tables[id][af2rtafidx[af]]);
 }
 
 struct radix_node *
@@ -1138,6 +1193,7 @@ rt_lookup(struct sockaddr *dst, struct s
 
  return (rnh->rnh_lookup(dst, mask, rnh));
 }
+
 /* ARGSUSED */
 void
 rt_timer_timer(void *arg)
@@ -1254,13 +1310,11 @@ rt_if_remove(struct ifnet *ifp)
  int i;
  struct radix_node_head *rnh;
 
- for (i = 1; i <= AF_MAX; i++) {
- rnh = rt_tables[i];
- if (rnh)
+ for (i = 1; i <= AF_MAX; i++)
+ if ((rnh = rt_gettable(i, 0)) != NULL)
  while ((*rnh->rnh_walktree)(rnh,
     rt_if_remove_rtdelete, ifp) == EAGAIN)
- ;
- }
+ ; /* nothing */
 }
 
 /*
Index: route.h
===================================================================
RCS file: /cvs/src/sys/net/route.h,v
retrieving revision 1.38
diff -u -p -r1.38 route.h
--- route.h 30 Mar 2006 09:53:43 -0000 1.38
+++ route.h 30 Mar 2006 11:19:44 -0000
@@ -334,7 +334,6 @@ void rt_timer_queue_destroy(struct rtti
 void rt_timer_remove_all(struct rtentry *);
 unsigned long rt_timer_count(struct rttimer_queue *);
 void rt_timer_timer(void *);
-void rtable_init(void **);
 void rtalloc(struct route *);
 struct rtentry *
  rtalloc1(struct sockaddr *, int);
@@ -354,7 +353,7 @@ int rtrequest(int, struct sockaddr *,
 int rtrequest1(int, struct rt_addrinfo *, struct rtentry **);
 void rt_if_remove(struct ifnet *);
 
-struct radix_node_head *rt_gettable(sa_family_t, int);
+struct radix_node_head *rt_gettable(sa_family_t, u_int);
 struct radix_node *rt_lookup(struct sockaddr *, struct sockaddr *, int);
 #endif /* _KERNEL */
 #endif /* _NET_ROUTE_H_ */

Reply | Threaded
Open this post in threaded view
|

Re: next step on the way to real policy routing

Henning Brauer-2
* Henning Brauer <[hidden email]> [2006-03-30 13:28]:

> we're getting closer to have real policy routing - this diff gives us
> multiple routing tables, however, they're not accessible from userland
> (aka route(8) yet) or beeing used for routing. nontheless this diff
> needs testing and review. please do so.
>
> this is important, and it is important that we're reasonably fast, and
> I need help. I can't do such big changes in one of the most twisted
> parts of teh kernel without getting help in testing and review.
>
> this diff does break netstat -r, this is beeing worked on. there might
> be more userland utils (I doubt that tho) that try to snoop the routing
> table via kvm, that will not work any more.

please use this diff instead.

oh, and this has nothing to do with bgpd at all. it can be tested on
any box, even betterer if it forwards traffic.

Index: route.c
===================================================================
RCS file: /cvs/src/sys/net/route.c,v
retrieving revision 1.70
diff -u -p -r1.70 route.c
--- route.c 22 Mar 2006 14:37:44 -0000 1.70
+++ route.c 30 Mar 2006 16:59:34 -0000
@@ -130,9 +130,12 @@ struct ifaddr *encap_findgwifa(struct so
 
 #define SA(p) ((struct sockaddr *)(p))
 
-struct route_cb route_cb;
-struct rtstat rtstat;
-struct radix_node_head *rt_tables[AF_MAX+1];
+struct route_cb   route_cb;
+struct rtstat   rtstat;
+struct radix_node_head ***rt_tables;
+u_int8_t   af2rtafidx[AF_MAX+1];
+u_int8_t   rtafidx_max;
+u_int   rtbl_cnt = 0;
 
 int rttrash; /* routes not in table but not freed */
 struct sockaddr wildcard; /* zero cookie for wildcard searches */
@@ -140,6 +143,8 @@ struct sockaddr wildcard; /* zero cooki
 struct pool rtentry_pool; /* pool for rtentry structures */
 struct pool rttimer_pool; /* pool for rttimer structures */
 
+int rtable_init(struct radix_node_head ***);
+int rtable_add(char *);
 int okaytoclone(u_int, int);
 int rtdeletemsg(struct rtentry *);
 int rtflushclone1(struct radix_node *, void *);
@@ -147,6 +152,7 @@ void rtflushclone(struct radix_node_head
 int rt_if_remove_rtdelete(struct radix_node *, void *);
 
 #define LABELID_MAX 50000
+#define RTBL_CNT_INC 4 /* allocate rtables in chunks of 4 */
 
 struct rt_label {
  TAILQ_ENTRY(rt_label) rtl_entry;
@@ -165,23 +171,72 @@ encap_findgwifa(struct sockaddr *gw)
 }
 #endif
 
-void
-rtable_init(void **table)
+int
+rtable_init(struct radix_node_head ***table)
 {
- struct domain *dom;
+ void **p;
+ struct domain *dom;
+
+ if ((p = malloc(sizeof(void *) * (rtafidx_max + 1), M_RTABLE,
+    M_NOWAIT)) == NULL)
+ return (-1);
+ bzero(p, sizeof(void *) * (rtafidx_max + 1));
+
+ /* 2nd pass: attach */
  for (dom = domains; dom != NULL; dom = dom->dom_next)
  if (dom->dom_rtattach)
- dom->dom_rtattach(&table[dom->dom_family],
+ dom->dom_rtattach(&p[af2rtafidx[dom->dom_family]],
     dom->dom_rtoffset);
+
+ *table = (struct radix_node_head **)p;
+ return (0);
 }
 
 void
 route_init()
 {
+ struct domain *dom;
+
  pool_init(&rtentry_pool, sizeof(struct rtentry), 0, 0, 0, "rtentpl",
     NULL);
  rn_init(); /* initialize all zeroes, all ones, mask table */
- rtable_init((void **)rt_tables);
+
+ bzero(af2rtafidx, sizeof(af2rtafidx));
+ rtafidx_max = 0;
+
+ /* find out how many tables to allocate */
+ for (dom = domains; dom != NULL; dom = dom->dom_next)
+ if (dom->dom_rtattach)
+ af2rtafidx[dom->dom_family] = rtafidx_max++;
+
+ if (rtable_add("main") == -1)
+ panic("route_init rtable_add");
+}
+
+int
+rtable_add(char *tblname)
+{
+ u_int i;
+ void *p;
+
+ for (i = 0; i < rtbl_cnt; i++)
+ if (rt_tables[i] == NULL)
+ break;
+
+ if (i == rtbl_cnt) {
+ rtbl_cnt += RTBL_CNT_INC;
+ if ((p = malloc(sizeof(void *) * rtbl_cnt, M_RTABLE,
+    M_NOWAIT)) == NULL)
+ return (-1);
+ bzero(p, sizeof(void *) * rtbl_cnt);
+ if (i > 0) {
+ bcopy(rt_tables, p, sizeof(void *) * i);
+ free(rt_tables, M_RTABLE);
+ }
+ rt_tables = p;
+ }
+
+ return (rtable_init(&rt_tables[i]));
 }
 
 void
@@ -205,13 +260,14 @@ okaytoclone(u_int flags, int howstrict)
 struct rtentry *
 rtalloc2(struct sockaddr *dst, int report, int howstrict)
 {
- struct radix_node_head *rnh = rt_tables[dst->sa_family];
+ struct radix_node_head *rnh;
  struct rtentry *rt;
  struct radix_node *rn;
  struct rtentry *newrt = 0;
  struct rt_addrinfo info;
  int s = splnet(), err = 0, msgtype = RTM_MISS;
 
+ rnh = rt_gettable(dst->sa_family, 0);
  if (rnh && (rn = rnh->rnh_matchaddr((caddr_t)dst, rnh)) &&
     ((rn->rn_flags & RNF_ROOT) == 0)) {
  newrt = rt = (struct rtentry *)rn;
@@ -257,13 +313,14 @@ rtalloc(struct route *ro)
 struct rtentry *
 rtalloc1(struct sockaddr *dst, int report)
 {
- struct radix_node_head *rnh = rt_tables[dst->sa_family];
+ struct radix_node_head *rnh;
  struct rtentry *rt;
  struct radix_node *rn;
  struct rtentry *newrt = 0;
  struct rt_addrinfo info;
  int s = splsoftnet(), err = 0, msgtype = RTM_MISS;
 
+ rnh = rt_gettable(dst->sa_family, 0);
  if (rnh && (rn = rnh->rnh_matchaddr((caddr_t)dst, rnh)) &&
     ((rn->rn_flags & RNF_ROOT) == 0)) {
  newrt = rt = (struct rtentry *)rn;
@@ -653,7 +710,7 @@ rtrequest1(int req, struct rt_addrinfo *
  struct sockaddr_rtlabel *sa_rl;
 #define senderr(x) { error = x ; goto bad; }
 
- if ((rnh = rt_tables[info->rti_info[RTAX_DST]->sa_family]) == 0)
+ if ((rnh = rt_gettable(info->rti_info[RTAX_DST]->sa_family, 0)) == NULL)
  senderr(EAFNOSUPPORT);
  if (info->rti_flags & RTF_HOST)
  info->rti_info[RTAX_NETMASK] = NULL;
@@ -1123,10 +1180,9 @@ rt_timer_add(struct rtentry *rt, void (*
 }
 
 struct radix_node_head *
-rt_gettable(sa_family_t af, int id)
+rt_gettable(sa_family_t af, u_int id)
 {
- /* ignore id for now */
- return (rt_tables[af]);
+ return (rt_tables[id][af2rtafidx[af]]);
 }
 
 struct radix_node *
@@ -1138,6 +1194,7 @@ rt_lookup(struct sockaddr *dst, struct s
 
  return (rnh->rnh_lookup(dst, mask, rnh));
 }
+
 /* ARGSUSED */
 void
 rt_timer_timer(void *arg)
@@ -1254,13 +1311,11 @@ rt_if_remove(struct ifnet *ifp)
  int i;
  struct radix_node_head *rnh;
 
- for (i = 1; i <= AF_MAX; i++) {
- rnh = rt_tables[i];
- if (rnh)
+ for (i = 1; i <= AF_MAX; i++)
+ if ((rnh = rt_gettable(i, 0)) != NULL)
  while ((*rnh->rnh_walktree)(rnh,
     rt_if_remove_rtdelete, ifp) == EAGAIN)
- ;
- }
+ ; /* nothing */
 }
 
 /*
Index: route.h
===================================================================
RCS file: /cvs/src/sys/net/route.h,v
retrieving revision 1.38
diff -u -p -r1.38 route.h
--- route.h 30 Mar 2006 09:53:43 -0000 1.38
+++ route.h 30 Mar 2006 16:59:34 -0000
@@ -334,7 +334,6 @@ void rt_timer_queue_destroy(struct rtti
 void rt_timer_remove_all(struct rtentry *);
 unsigned long rt_timer_count(struct rttimer_queue *);
 void rt_timer_timer(void *);
-void rtable_init(void **);
 void rtalloc(struct route *);
 struct rtentry *
  rtalloc1(struct sockaddr *, int);
@@ -354,7 +353,7 @@ int rtrequest(int, struct sockaddr *,
 int rtrequest1(int, struct rt_addrinfo *, struct rtentry **);
 void rt_if_remove(struct ifnet *);
 
-struct radix_node_head *rt_gettable(sa_family_t, int);
+struct radix_node_head *rt_gettable(sa_family_t, u_int);
 struct radix_node *rt_lookup(struct sockaddr *, struct sockaddr *, int);
 #endif /* _KERNEL */
 #endif /* _NET_ROUTE_H_ */