[Xorp-hackers] Patches to support multiple multi-cast routing tables in Linux & Xorp

James R. Leu jleu at mindspring.com
Fri Jun 6 09:10:09 PDT 2008


Are you building on top of the network namespace work
that has gone into 2.6.25?  It was my understanding that
a majority of the IPv4|6 stacks already supported
network namespaces, I would be enlightened to hear
otherwise.

On Thu, Jun 05, 2008 at 05:36:24PM -0700, Ben Greear wrote:
> I just made a successful transfer between two ports on
> a virtual router using the attached patches to xorp
> and the 2.6.25 linux kernel.
>
> The kernel at least, still has locking bugs, if nothing
> else.
>
> I also will probably remove the char unused[32] members
> from the API structs.
>
> But, it at least somewhat works.
>
> Also, the xorp and kernel patches are on top of my other patches,
> so they may not apply w/out conflicts.
>
> Comments are welcome.
>
> Thanks,
> Ben
>
> -- 
> Ben Greear <greearb at candelatech.com>
> Candela Technologies Inc  http://www.candelatech.com
>

> diff --git a/include/linux/mroute.h b/include/linux/mroute.h
> index 35a8277..cea3933 100644
> --- a/include/linux/mroute.h
> +++ b/include/linux/mroute.h
> @@ -29,6 +29,10 @@
>  #define SIOCGETVIFCNT	SIOCPROTOPRIVATE	/* IP protocol privates */
>  #define SIOCGETSGCNT	(SIOCPROTOPRIVATE+1)
>  #define SIOCGETRPF	(SIOCPROTOPRIVATE+2)
> +// Support for multiple routing tables.
> +#define SIOCGETVIFCNT_NG	(SIOCPROTOPRIVATE+3)
> +#define SIOCGETSGCNT_NG	(SIOCPROTOPRIVATE+4)
> +#define SIOCGETRP_NG      (SIOCPROTOPRIVATE+5)
>  
>  #define MAXVIFS		32	
>  typedef unsigned long vifbitmap_t;	/* User mode code depends on this lot */
> @@ -46,6 +50,7 @@ typedef unsigned short vifi_t;
>  #define VIFM_COPY(mfrom,mto)	((mto)=(mfrom))
>  #define VIFM_SAME(m1,m2)	((m1)==(m2))
>  
> +
>  /*
>   *	Passed by mrouted for an MRT_ADD_VIF - again we use the
>   *	mrouted 3.6 structures for compatibility
> @@ -60,6 +65,13 @@ struct vifctl {
>  	struct in_addr vifc_rmt_addr;	/* IPIP tunnel addr */
>  };
>  
> +/* For supporting multiple routing tables */
> +struct vifctl_ng {
> +	struct vifctl vif;
> +	__u32 table_id;
> +	char unused[32]; /* for future additions, must be zero currently */
> +} __attribute__ ((packed));
> +
>  #define VIFF_TUNNEL	0x1	/* IPIP tunnel */
>  #define VIFF_SRCRT	0x2	/* NI */
>  #define VIFF_REGISTER	0x4	/* register vif	*/
> @@ -80,6 +92,20 @@ struct mfcctl
>  	int	     mfcc_expire;
>  };
>  
> +struct mfcctl_ng {
> +	struct mfcctl mfc;
> +	__u32 table_id;
> +	char unused[32]; /* for future additions, must be zero currently */
> +} __attribute__ ((packed));
> +
> +
> +struct mrt_sockopt_simple {
> +	__u32 optval;
> +	__u32 table_id;
> +	char unused[32]; /* for future additions, must be zero currently */
> +} __attribute__ ((packed));
> +
> +
>  /* 
>   *	Group count retrieval for mrouted
>   */
> @@ -93,6 +119,13 @@ struct sioc_sg_req
>  	unsigned long wrong_if;
>  };
>  
> +struct sioc_sg_req_ng {
> +	struct sioc_sg_req req;
> +	__u32 table_id;
> +	char unused[32]; /* for future additions, must be zero currently */
> +} __attribute__ ((packed));
> +	
> +
>  /*
>   *	To get vif packet counts
>   */
> @@ -106,6 +139,13 @@ struct sioc_vif_req
>  	unsigned long obytes;	/* Out bytes */
>  };
>  
> +struct sioc_vif_req_ng {
> +	struct sioc_vif_req vif;
> +	__u32 table_id;
> +	char unused[32]; /* for future additions, must be zero currently */
> +} __attribute__ ((packed));
> +
> +
>  /*
>   *	This is the format the mroute daemon expects to see IGMP control
>   *	data. Magically happens to be like an IP packet as per the original
> @@ -128,6 +168,7 @@ struct igmpmsg
>  #ifdef __KERNEL__
>  #include <net/sock.h>
>  
> +
>  #ifdef CONFIG_IP_MROUTE
>  static inline int ip_mroute_opt(int opt)
>  {
> @@ -156,6 +197,8 @@ struct vif_device
>  	unsigned short	flags;			/* Control flags 		*/
>  	__be32		local,remote;		/* Addresses(remote for tunnels)*/
>  	int		link;			/* Physical interface index	*/
> +	int              table_id;               /* routing table this belongs to */
> +	int              vif_index;              /* It's index in the table's vif array */
>  };
>  
>  #define VIFF_STATIC 0x8000
> @@ -167,11 +210,12 @@ struct mfc_cache
>  	__be32 mfc_origin;			/* Source of packet 		*/
>  	vifi_t mfc_parent;			/* Source interface		*/
>  	int mfc_flags;				/* Flags on line		*/
> +	int table_id;                            /* mcast routing table id         */
>  
>  	union {
>  		struct {
>  			unsigned long expires;
> -			struct sk_buff_head unresolved;	/* Unresolved buffers		*/
> +			struct sk_buff_head unresolved;	/* Unresolved buffers	*/
>  		} unres;
>  		struct {
>  			unsigned long last_assert;
> @@ -180,7 +224,7 @@ struct mfc_cache
>  			unsigned long bytes;
>  			unsigned long pkt;
>  			unsigned long wrong_if;
> -			unsigned char ttls[MAXVIFS];	/* TTL thresholds		*/
> +			unsigned char ttls[MAXVIFS];	/* TTL thresholds	*/
>  		} res;
>  	} mfc_un;
>  };
> @@ -190,6 +234,24 @@ struct mfc_cache
>  
>  #define MFC_LINES		64
>  
> +/* Holds data for a particular multicast routing table. */
> +struct mroute_table_entry {
> +	struct list_head list_member;
> +	struct sock *mroute_socket;
> +	unsigned int table_id;
> +
> +	rwlock_t mte_lock; // locks all data for this table except for the mfc-unres queue
> +	struct vif_device vif_table[MAXVIFS];		/* Devices 		*/
> +	int maxvif;
> +	int mroute_do_assert;				/* Set in PIM assert	*/
> +	int mroute_do_pim;
> +	int reg_vif_num;
> +	struct mfc_cache *mfc_cache_array[MFC_LINES];	/* Forwarding cache	*/
> +	struct mfc_cache *mfc_unres_queue;		/* Queue of unresolved entries */
> +	atomic_t cache_resolve_queue_len;		/* Size of unresolved	*/
> +	spinlock_t mfc_unres_lock;
> +};
> +
>  #ifdef __BIG_ENDIAN
>  #define MFC_HASH(a,b)	(((((__force u32)(__be32)a)>>24)^(((__force u32)(__be32)b)>>26))&(MFC_LINES-1))
>  #else
> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> index 30064d7..586ff24 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -43,6 +43,9 @@
>  
>  #include <net/net_namespace.h>
>  
> +#ifdef CONFIG_IP_MROUTE
> +struct mroute_table_entry;
> +#endif
>  struct vlan_group;
>  struct ethtool_ops;
>  struct netpoll_info;
> @@ -728,6 +731,11 @@ struct net_device
>  				     */
>  	long dflt_skb_mark; /* Specify skb->mark for pkts received on this interface. */
>  
> +#ifdef CONFIG_IP_MROUTE
> +	/* IPv4 Multicast Routing Table Entry */
> +	struct mroute_table_entry* mrt_entry;
> +#endif
> +	
>  	/* bridge stuff */
>  	struct net_bridge_port	*br_port;
>  	/* macvlan */
> diff --git a/net/core/dev.c b/net/core/dev.c
> index 617a49a..46e16bf 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -4044,6 +4044,9 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
>  	dev->padded = (char *)dev - (char *)p;
>  	dev->nd_net = &init_net;
>  
> +	/** This points to the multicast routing table, when assigned. */
> +	dev->mrt_entry = NULL;
> +
>  	if (sizeof_priv) {
>  		dev->priv = ((char *)dev +
>  			     ((sizeof(struct net_device) +
> diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
> index a94f52c..89f6c91 100644
> --- a/net/ipv4/ipmr.c
> +++ b/net/ipv4/ipmr.c
> @@ -69,48 +69,28 @@
>  #define CONFIG_IP_PIMSM	1
>  #endif
>  
> -static struct sock *mroute_socket;
>  
> -
> -/* Big lock, protecting vif table, mrt cache and mroute socket state.
> -   Note that the changes are semaphored via rtnl_lock.
> - */
> -
> -static DEFINE_RWLOCK(mrt_lock);
> -
> -/*
> - *	Multicast router control variables
> +/** Table is protected by mrte_lock.  Hold RTNL first if you
> + * you must acquire RTNL and mrte at the same time.
>   */
> +#define MROUTE_TABLE_HASH_MAX 1024
> +static struct list_head mroute_tables[MROUTE_TABLE_HASH_MAX];
> +static DEFINE_RWLOCK(mrte_lock);
>  
> -static struct vif_device vif_table[MAXVIFS];		/* Devices 		*/
> -static int maxvif;
> +/** Use-counter for PIM enabled mcast routing tables */
> +static int mroute_pim_cnt;
>  
> -#define VIF_EXISTS(idx) (vif_table[idx].dev != NULL)
> +static struct kmem_cache *mrt_cachep;
>  
> -static int mroute_do_assert;				/* Set in PIM assert	*/
> -static int mroute_do_pim;
>  
> -static struct mfc_cache *mfc_cache_array[MFC_LINES];	/* Forwarding cache	*/
> +#define VIF_EXISTS(e, idx) (e->vif_table[idx].dev != NULL)
>  
> -static struct mfc_cache *mfc_unres_queue;		/* Queue of unresolved entries */
> -static atomic_t cache_resolve_queue_len;		/* Size of unresolved	*/
> +#define DFLT_MROUTE_TBL RT_TABLE_MAIN /* 254 as of 2.6.25 */
>  
> -/* Special spinlock for queue of unresolved entries */
> -static DEFINE_SPINLOCK(mfc_unres_lock);
>  
> -/* We return to original Alan's scheme. Hash table of resolved
> -   entries is changed only in process context and protected
> -   with weak lock mrt_lock. Queue of unresolved entries is protected
> -   with strong spinlock mfc_unres_lock.
> -
> -   In this case data path is free of exclusive locks at all.
> - */
> -
> -static struct kmem_cache *mrt_cachep __read_mostly;
> -
> -static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local);
> -static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert);
> -static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm);
> +static int ip_mr_forward(struct mroute_table_entry* e, struct sk_buff *skb, struct mfc_cache *cache, int local);
> +static int ipmr_cache_report(struct mroute_table_entry* e, struct sk_buff *pkt, vifi_t vifi, int assert);
> +static int ipmr_fill_mroute(struct mroute_table_entry* e, struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm);
>  
>  #ifdef CONFIG_IP_PIMSM_V2
>  static struct net_protocol pim_protocol;
> @@ -120,9 +100,52 @@ static struct timer_list ipmr_expire_timer;
>  
>  /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
>  
> -static
> -struct net_device *ipmr_new_tunnel(struct vifctl *v)
> -{
> +/** Simple hash algorithm */
> +static int find_mroute_table_bucket(unsigned int table_id) {
> +	return table_id % MROUTE_TABLE_HASH_MAX;
> +}
> +
> +/** Must hold mrte_lock to call this */
> +static struct mroute_table_entry* find_mroute_table(unsigned int table_id) {
> +	int i = find_mroute_table_bucket(table_id);
> +	struct mroute_table_entry* e;
> +	struct list_head *li;
> +	list_for_each(li, &(mroute_tables[i])) {
> +		e = list_entry(li , struct mroute_table_entry, list_member);
> +		if (e->table_id == table_id) {
> +			return e;
> +		}
> +	}
> +	return NULL;
> +}
> +
> +static struct mroute_table_entry* create_mroute_table(unsigned int table_id) {
> +	struct mroute_table_entry* e = kzalloc(sizeof(*e), GFP_KERNEL);
> +	int i;
> +	if (!e)
> +		return NULL;
> +	rwlock_init(&e->mte_lock);
> +	spin_lock_init(&e->mfc_unres_lock);
> +	for (i = 0; i<MAXVIFS; i++) {
> +		e->vif_table[i].table_id = table_id;
> +		e->vif_table[i].vif_index = i;
> +	}
> +	e->table_id = table_id;
> +	e->reg_vif_num = -1;	
> +	INIT_LIST_HEAD(&e->list_member);
> +	return e;
> +}
> +		
> +
> +/** Assumes it does not already exist.  Must hold mrte_lock while calling this. */
> +static void add_mroute_table(struct mroute_table_entry* e) {
> +	int i = find_mroute_table_bucket(e->table_id);
> +
> +	list_add_tail(&e->list_member, &mroute_tables[i]);
> +}
> +
> +/** RTNL must be held when calling this */
> +static struct net_device *ipmr_new_tunnel(struct vifctl *v, unsigned int table_id) {
>  	struct net_device  *dev;
>  
>  	dev = __dev_get_by_name(&init_net, "tunl0");
> @@ -140,7 +163,7 @@ struct net_device *ipmr_new_tunnel(struct vifctl *v)
>  		p.iph.version = 4;
>  		p.iph.ihl = 5;
>  		p.iph.protocol = IPPROTO_IPIP;
> -		sprintf(p.name, "dvmrp%d", v->vifc_vifi);
> +		snprintf(p.name, sizeof(p.name), "dvmrp%d-%d", table_id, v->vifc_vifi);
>  		ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
>  
>  		oldfs = get_fs(); set_fs(KERNEL_DS);
> @@ -176,15 +199,19 @@ failure:
>  
>  #ifdef CONFIG_IP_PIMSM
>  
> -static int reg_vif_num = -1;
> -
> -static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
> -{
> -	read_lock(&mrt_lock);
> -	((struct net_device_stats*)netdev_priv(dev))->tx_bytes += skb->len;
> -	((struct net_device_stats*)netdev_priv(dev))->tx_packets++;
> -	ipmr_cache_report(skb, reg_vif_num, IGMPMSG_WHOLEPKT);
> -	read_unlock(&mrt_lock);
> +static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) {
> +	struct mroute_table_entry* e;
> +	read_lock(&mrte_lock);
> +	e = dev->mrt_entry;
> +	if (e) {
> +		((struct net_device_stats*)netdev_priv(dev))->tx_bytes += skb->len;
> +		((struct net_device_stats*)netdev_priv(dev))->tx_packets++;
> +		ipmr_cache_report(e, skb, e->reg_vif_num, IGMPMSG_WHOLEPKT);
> +	}
> +	else {
> +		printk("ERROR:  Device %s has NULL mrt_entry.\n", dev->name);
> +	}
> +	read_unlock(&mrte_lock);
>  	kfree_skb(skb);
>  	return 0;
>  }
> @@ -204,13 +231,18 @@ static void reg_vif_setup(struct net_device *dev)
>  	dev->destructor		= free_netdev;
>  }
>  
> -static struct net_device *ipmr_reg_vif(void)
> -{
> +/** Must hold RTNL to call this */
> +static struct net_device *ipmr_reg_vif(struct mroute_table_entry* e) {
>  	struct net_device *dev;
>  	struct in_device *in_dev;
> +	char dname[32];
> +	snprintf(dname, sizeof(dname), "pimreg%d", e->table_id);
>  
> -	dev = alloc_netdev(sizeof(struct net_device_stats), "pimreg",
> +	// TODO:  Bad locking, mrte is probably locked and this sucks anyway. --Ben
> +	rtnl_unlock();
> +	dev = alloc_netdev(sizeof(struct net_device_stats), dname,
>  			   reg_vif_setup);
> +	rtnl_lock();
>  
>  	if (dev == NULL)
>  		return NULL;
> @@ -247,44 +279,44 @@ failure:
>  #endif
>  
>  /*
> - *	Delete a VIF entry
> + *	Delete a VIF entry, assumes RTNL is held.
>   */
> -
> -static int vif_delete(int vifi)
> -{
> +static int vif_delete(struct mroute_table_entry* e, int vifi) {
>  	struct vif_device *v;
>  	struct net_device *dev;
>  	struct in_device *in_dev;
>  
> -	if (vifi < 0 || vifi >= maxvif)
> +	if (vifi < 0 || vifi >= e->maxvif)
>  		return -EADDRNOTAVAIL;
>  
> -	v = &vif_table[vifi];
> +	write_lock(&e->mte_lock);
> +	
> +	v = &e->vif_table[vifi];
>  
> -	write_lock_bh(&mrt_lock);
>  	dev = v->dev;
>  	v->dev = NULL;
>  
>  	if (!dev) {
> -		write_unlock_bh(&mrt_lock);
> +		write_unlock(&e->mte_lock);
>  		return -EADDRNOTAVAIL;
>  	}
>  
>  #ifdef CONFIG_IP_PIMSM
> -	if (vifi == reg_vif_num)
> -		reg_vif_num = -1;
> +	if (vifi == e->reg_vif_num) {
> +		e->reg_vif_num = -1;
> +	}
>  #endif
> +	dev->mrt_entry = NULL;
>  
> -	if (vifi+1 == maxvif) {
> +	if (vifi+1 == e->maxvif) {
>  		int tmp;
>  		for (tmp=vifi-1; tmp>=0; tmp--) {
> -			if (VIF_EXISTS(tmp))
> +			if (VIF_EXISTS(e, tmp))
>  				break;
>  		}
> -		maxvif = tmp+1;
> +		e->maxvif = tmp+1;
>  	}
> -
> -	write_unlock_bh(&mrt_lock);
> +	write_unlock(&e->mte_lock);
>  
>  	dev_set_allmulti(dev, -1);
>  
> @@ -304,14 +336,13 @@ static int vif_delete(int vifi)
>     and reporting error to netlink readers.
>   */
>  
> -static void ipmr_destroy_unres(struct mfc_cache *c)
> -{
> +static void ipmr_destroy_unres(struct mroute_table_entry* te, struct mfc_cache *c) {
>  	struct sk_buff *skb;
>  	struct nlmsgerr *e;
>  
> -	atomic_dec(&cache_resolve_queue_len);
> +	atomic_dec(&te->cache_resolve_queue_len);
>  
> -	while ((skb=skb_dequeue(&c->mfc_un.unres.unresolved))) {
> +	while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
>  		if (ip_hdr(skb)->version == 0) {
>  			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
>  			nlh->nlmsg_type = NLMSG_ERROR;
> @@ -335,54 +366,65 @@ static void ipmr_destroy_unres(struct mfc_cache *c)
>  static void ipmr_expire_process(unsigned long dummy)
>  {
>  	unsigned long now;
> -	unsigned long expires;
> +	unsigned long expires = 10*HZ;
>  	struct mfc_cache *c, **cp;
> +	int i;
>  
> -	if (!spin_trylock(&mfc_unres_lock)) {
> -		mod_timer(&ipmr_expire_timer, jiffies+HZ/10);
> -		return;
> -	}
> -
> -	if (atomic_read(&cache_resolve_queue_len) == 0)
> -		goto out;
> -
> -	now = jiffies;
> -	expires = 10*HZ;
> -	cp = &mfc_unres_queue;
> -
> -	while ((c=*cp) != NULL) {
> -		if (time_after(c->mfc_un.unres.expires, now)) {
> -			unsigned long interval = c->mfc_un.unres.expires - now;
> -			if (interval < expires)
> -				expires = interval;
> -			cp = &c->next;
> -			continue;
> -		}
> -
> -		*cp = c->next;
> -
> -		ipmr_destroy_unres(c);
> -	}
> +	read_lock(&mrte_lock);
>  
> -	if (atomic_read(&cache_resolve_queue_len))
> -		mod_timer(&ipmr_expire_timer, jiffies + expires);
> +	for (i = 0; i<MROUTE_TABLE_HASH_MAX; i++) {
> +		struct list_head *li;
> +		list_for_each(li, &mroute_tables[i]) {
> +			struct mroute_table_entry* e;
> +			e = list_entry(li , struct mroute_table_entry, list_member);
> +			if (!spin_trylock(&e->mfc_unres_lock)) {
> +				mod_timer(&ipmr_expire_timer, jiffies+HZ/10);
> +				goto out;
> +			}
> +			
> +			if (atomic_read(&e->cache_resolve_queue_len) == 0) {
> +				spin_unlock(&e->mfc_unres_lock);
> +				continue;
> +			}
> +			
> +			now = jiffies;
> +			cp = &e->mfc_unres_queue;
> +			
> +			while ((c = *cp) != NULL) {
> +				if (time_after(c->mfc_un.unres.expires, now)) {
> +					unsigned long interval = c->mfc_un.unres.expires - now;
> +					if (interval < expires)
> +						expires = interval;
> +					cp = &c->next;
> +					continue;
> +				}
> +				
> +				*cp = c->next;
> +				
> +				ipmr_destroy_unres(e, c);
> +			}
> +			
> +			if (atomic_read(&e->cache_resolve_queue_len))
> +				mod_timer(&ipmr_expire_timer, jiffies + expires);
> +			
> +			spin_unlock(&e->mfc_unres_lock);
> +		}/* for each table in this bucket */
> +	}/* for all possible table buckets */
>  
>  out:
> -	spin_unlock(&mfc_unres_lock);
> +	read_unlock(&mrte_lock);
>  }
>  
> -/* Fill oifs list. It is called under write locked mrt_lock. */
> -
> -static void ipmr_update_thresholds(struct mfc_cache *cache, unsigned char *ttls)
> -{
> +/* Fill oifs list. It is called under write locked e->mte_lock. */
> +static void ipmr_update_thresholds(struct mroute_table_entry* e, struct mfc_cache *cache, unsigned char *ttls) {
>  	int vifi;
>  
>  	cache->mfc_un.res.minvif = MAXVIFS;
>  	cache->mfc_un.res.maxvif = 0;
>  	memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
>  
> -	for (vifi=0; vifi<maxvif; vifi++) {
> -		if (VIF_EXISTS(vifi) && ttls[vifi] && ttls[vifi] < 255) {
> +	for (vifi = 0; vifi < e->maxvif; vifi++) {
> +		if (VIF_EXISTS(e, vifi) && ttls[vifi] && ttls[vifi] < 255) {
>  			cache->mfc_un.res.ttls[vifi] = ttls[vifi];
>  			if (cache->mfc_un.res.minvif > vifi)
>  				cache->mfc_un.res.minvif = vifi;
> @@ -392,15 +434,21 @@ static void ipmr_update_thresholds(struct mfc_cache *cache, unsigned char *ttls)
>  	}
>  }
>  
> -static int vif_add(struct vifctl *vifc, int mrtsock)
> -{
> +/** Must hold RTNL and write locked &mrte_lock to call this */
> +static int vif_add(struct mroute_table_entry* e, struct vifctl *vifc, int mrtsock) {
>  	int vifi = vifc->vifc_vifi;
> -	struct vif_device *v = &vif_table[vifi];
> +	struct vif_device *v;
>  	struct net_device *dev;
>  	struct in_device *in_dev;
>  
> +	if ((vifi < 0) || (vifi >= MAXVIFS)) {
> +		printk("ERROR:  vifi: %d is out of range.\n", vifi);
> +		return -EINVAL;
> +	}
> +	v = &e->vif_table[vifi];
> +	
>  	/* Is vif busy ? */
> -	if (VIF_EXISTS(vifi))
> +	if (VIF_EXISTS(e, vifi))
>  		return -EADDRINUSE;
>  
>  	switch (vifc->vifc_flags) {
> @@ -410,15 +458,15 @@ static int vif_add(struct vifctl *vifc, int mrtsock)
>  		 * Special Purpose VIF in PIM
>  		 * All the packets will be sent to the daemon
>  		 */
> -		if (reg_vif_num >= 0)
> +		if (e->reg_vif_num >= 0)
>  			return -EADDRINUSE;
> -		dev = ipmr_reg_vif();
> +		dev = ipmr_reg_vif(e);
>  		if (!dev)
>  			return -ENOBUFS;
>  		break;
>  #endif
>  	case VIFF_TUNNEL:
> -		dev = ipmr_new_tunnel(vifc);
> +		dev = ipmr_new_tunnel(vifc, e->table_id);
>  		if (!dev)
>  			return -ENOBUFS;
>  		break;
> @@ -426,6 +474,11 @@ static int vif_add(struct vifctl *vifc, int mrtsock)
>  		dev = ip_dev_find(&init_net, vifc->vifc_lcl_addr.s_addr);
>  		if (!dev)
>  			return -EADDRNOTAVAIL;
> +		if (dev->mrt_entry && (dev->mrt_entry != e)) {
> +			printk("ERROR:  Device: %s is already in multicast routing table: %d\n",
> +			       dev->name, dev->mrt_entry->table_id);
> +			return -EADDRNOTAVAIL;
> +		}
>  		dev_put(dev);
>  		break;
>  	default:
> @@ -441,13 +494,13 @@ static int vif_add(struct vifctl *vifc, int mrtsock)
>  	/*
>  	 *	Fill in the VIF structures
>  	 */
> -	v->rate_limit=vifc->vifc_rate_limit;
> -	v->local=vifc->vifc_lcl_addr.s_addr;
> -	v->remote=vifc->vifc_rmt_addr.s_addr;
> -	v->flags=vifc->vifc_flags;
> +	v->rate_limit = vifc->vifc_rate_limit;
> +	v->local = vifc->vifc_lcl_addr.s_addr;
> +	v->remote = vifc->vifc_rmt_addr.s_addr;
> +	v->flags = vifc->vifc_flags;
>  	if (!mrtsock)
>  		v->flags |= VIFF_STATIC;
> -	v->threshold=vifc->vifc_threshold;
> +	v->threshold = vifc->vifc_threshold;
>  	v->bytes_in = 0;
>  	v->bytes_out = 0;
>  	v->pkt_in = 0;
> @@ -457,26 +510,29 @@ static int vif_add(struct vifctl *vifc, int mrtsock)
>  		v->link = dev->iflink;
>  
>  	/* And finish update writing critical data */
> -	write_lock_bh(&mrt_lock);
> +	write_lock_bh(&e->mte_lock);
>  	dev_hold(dev);
>  	v->dev=dev;
> +	dev->mrt_entry = e;
>  #ifdef CONFIG_IP_PIMSM
> -	if (v->flags&VIFF_REGISTER)
> -		reg_vif_num = vifi;
> +	if (v->flags&VIFF_REGISTER) {
> +		e->reg_vif_num = vifi;
> +		dev->mrt_entry = e;
> +	}
>  #endif
> -	if (vifi+1 > maxvif)
> -		maxvif = vifi+1;
> -	write_unlock_bh(&mrt_lock);
> +	if (vifi + 1 > e->maxvif)
> +		e->maxvif = vifi + 1;
> +	write_unlock_bh(&e->mte_lock);
> +
>  	return 0;
>  }
>  
> -static struct mfc_cache *ipmr_cache_find(__be32 origin, __be32 mcastgrp)
> -{
> +static struct mfc_cache *ipmr_cache_find(struct mroute_table_entry* e, __be32 origin, __be32 mcastgrp) {
>  	int line=MFC_HASH(mcastgrp,origin);
>  	struct mfc_cache *c;
>  
> -	for (c=mfc_cache_array[line]; c; c = c->next) {
> -		if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp)
> +	for (c = e->mfc_cache_array[line]; c; c = c->next) {
> +		if (c->mfc_origin == origin && c->mfc_mcastgrp == mcastgrp)
>  			break;
>  	}
>  	return c;
> @@ -485,22 +541,28 @@ static struct mfc_cache *ipmr_cache_find(__be32 origin, __be32 mcastgrp)
>  /*
>   *	Allocate a multicast cache entry
>   */
> -static struct mfc_cache *ipmr_cache_alloc(void)
> -{
> -	struct mfc_cache *c=kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
> -	if (c==NULL)
> +static struct mfc_cache *ipmr_cache_alloc(struct mroute_table_entry* e) {
> +	struct mfc_cache *c;
> +	if (! mrt_cachep)
> +		return NULL;
> +	c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
> +	if (c == NULL)
>  		return NULL;
>  	c->mfc_un.res.minvif = MAXVIFS;
> +	c->table_id = e->table_id;
>  	return c;
>  }
>  
> -static struct mfc_cache *ipmr_cache_alloc_unres(void)
> -{
> -	struct mfc_cache *c=kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
> -	if (c==NULL)
> +static struct mfc_cache *ipmr_cache_alloc_unres(struct mroute_table_entry* e) {
> +	struct mfc_cache *c;
> +	if (!mrt_cachep)
> +		return NULL;
> +	c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
> +	if (c == NULL)
>  		return NULL;
>  	skb_queue_head_init(&c->mfc_un.unres.unresolved);
>  	c->mfc_un.unres.expires = jiffies + 10*HZ;
> +	c->table_id = e->table_id;
>  	return c;
>  }
>  
> @@ -508,7 +570,7 @@ static struct mfc_cache *ipmr_cache_alloc_unres(void)
>   *	A cache entry has gone into a resolved state from queued
>   */
>  
> -static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
> +static void ipmr_cache_resolve(struct mroute_table_entry* te, struct mfc_cache *uc, struct mfc_cache *c)
>  {
>  	struct sk_buff *skb;
>  	struct nlmsgerr *e;
> @@ -521,10 +583,13 @@ static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
>  		if (ip_hdr(skb)->version == 0) {
>  			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
>  
> -			if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
> +			read_lock(&te->mte_lock);
> +			if (ipmr_fill_mroute(te, skb, c, NLMSG_DATA(nlh)) > 0) {
> +				read_unlock(&te->mte_lock);
>  				nlh->nlmsg_len = (skb_tail_pointer(skb) -
>  						  (u8 *)nlh);
>  			} else {
> +				read_unlock(&te->mte_lock);
>  				nlh->nlmsg_type = NLMSG_ERROR;
>  				nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
>  				skb_trim(skb, nlh->nlmsg_len);
> @@ -535,7 +600,7 @@ static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
>  
>  			rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
>  		} else
> -			ip_mr_forward(skb, c, 0);
> +			ip_mr_forward(te, skb, c, 0);
>  	}
>  }
>  
> @@ -543,11 +608,9 @@ static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
>   *	Bounce a cache query up to mrouted. We could use netlink for this but mrouted
>   *	expects the following bizarre scheme.
>   *
> - *	Called under mrt_lock.
>   */
>  
> -static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
> -{
> +static int ipmr_cache_report(struct mroute_table_entry* e, struct sk_buff *pkt, vifi_t vifi, int assert) {
>  	struct sk_buff *skb;
>  	const int ihl = ip_hdrlen(pkt);
>  	struct igmphdr *igmp;
> @@ -578,7 +641,7 @@ static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
>  		memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
>  		msg->im_msgtype = IGMPMSG_WHOLEPKT;
>  		msg->im_mbz = 0;
> -		msg->im_vif = reg_vif_num;
> +		msg->im_vif = e->reg_vif_num;
>  		ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
>  		ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
>  					     sizeof(struct iphdr));
> @@ -602,7 +665,7 @@ static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
>  	 *	Add our header
>  	 */
>  
> -	igmp=(struct igmphdr *)skb_put(skb,sizeof(struct igmphdr));
> +	igmp = (struct igmphdr *)skb_put(skb, sizeof(struct igmphdr));
>  	igmp->type	=
>  	msg->im_msgtype = assert;
>  	igmp->code 	=	0;
> @@ -610,7 +673,7 @@ static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
>  	skb->transport_header = skb->network_header;
>  	}
>  
> -	if (mroute_socket == NULL) {
> +	if (e->mroute_socket == NULL) {
>  		kfree_skb(skb);
>  		return -EINVAL;
>  	}
> @@ -618,7 +681,7 @@ static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
>  	/*
>  	 *	Deliver to mrouted
>  	 */
> -	if ((ret=sock_queue_rcv_skb(mroute_socket,skb))<0) {
> +	if ((ret = sock_queue_rcv_skb(e->mroute_socket, skb)) < 0) {
>  		if (net_ratelimit())
>  			printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
>  		kfree_skb(skb);
> @@ -631,15 +694,13 @@ static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
>   *	Queue a packet for resolution. It gets locked cache entry!
>   */
>  
> -static int
> -ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb)
> -{
> +static int ipmr_cache_unresolved(struct mroute_table_entry* e, vifi_t vifi, struct sk_buff *skb) {
>  	int err;
>  	struct mfc_cache *c;
>  	const struct iphdr *iph = ip_hdr(skb);
>  
> -	spin_lock_bh(&mfc_unres_lock);
> -	for (c=mfc_unres_queue; c; c=c->next) {
> +	spin_lock_bh(&e->mfc_unres_lock);
> +	for (c = e->mfc_unres_queue; c; c=c->next) {
>  		if (c->mfc_mcastgrp == iph->daddr &&
>  		    c->mfc_origin == iph->saddr)
>  			break;
> @@ -650,9 +711,9 @@ ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb)
>  		 *	Create a new entry if allowable
>  		 */
>  
> -		if (atomic_read(&cache_resolve_queue_len)>=10 ||
> -		    (c=ipmr_cache_alloc_unres())==NULL) {
> -			spin_unlock_bh(&mfc_unres_lock);
> +		if (atomic_read(&e->cache_resolve_queue_len) >= 10 ||
> +		    (c = ipmr_cache_alloc_unres(e)) == NULL) {
> +			spin_unlock_bh(&e->mfc_unres_lock);
>  
>  			kfree_skb(skb);
>  			return -ENOBUFS;
> @@ -668,20 +729,20 @@ ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb)
>  		/*
>  		 *	Reflect first query at mrouted.
>  		 */
> -		if ((err = ipmr_cache_report(skb, vifi, IGMPMSG_NOCACHE))<0) {
> +		if ((err = ipmr_cache_report(e, skb, vifi, IGMPMSG_NOCACHE))<0) {
>  			/* If the report failed throw the cache entry
>  			   out - Brad Parker
>  			 */
> -			spin_unlock_bh(&mfc_unres_lock);
> +			spin_unlock_bh(&e->mfc_unres_lock);
>  
>  			kmem_cache_free(mrt_cachep, c);
>  			kfree_skb(skb);
>  			return err;
>  		}
>  
> -		atomic_inc(&cache_resolve_queue_len);
> -		c->next = mfc_unres_queue;
> -		mfc_unres_queue = c;
> +		atomic_inc(&e->cache_resolve_queue_len);
> +		c->next = e->mfc_unres_queue;
> +		e->mfc_unres_queue = c;
>  
>  		mod_timer(&ipmr_expire_timer, c->mfc_un.unres.expires);
>  	}
> @@ -689,7 +750,7 @@ ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb)
>  	/*
>  	 *	See if we can append the packet
>  	 */
> -	if (c->mfc_un.unres.unresolved.qlen>3) {
> +	if (c->mfc_un.unres.unresolved.qlen > 3) {
>  		kfree_skb(skb);
>  		err = -ENOBUFS;
>  	} else {
> @@ -697,7 +758,7 @@ ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb)
>  		err = 0;
>  	}
>  
> -	spin_unlock_bh(&mfc_unres_lock);
> +	spin_unlock_bh(&e->mfc_unres_lock);
>  	return err;
>  }
>  
> @@ -705,19 +766,18 @@ ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb)
>   *	MFC cache manipulation by user space mroute daemon
>   */
>  
> -static int ipmr_mfc_delete(struct mfcctl *mfc)
> -{
> +static int ipmr_mfc_delete(struct mroute_table_entry* e, struct mfcctl *mfc) {
>  	int line;
>  	struct mfc_cache *c, **cp;
>  
>  	line=MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
>  
> -	for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) {
> +	for (cp = &e->mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) {
>  		if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
>  		    c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
> -			write_lock_bh(&mrt_lock);
> +			write_lock_bh(&e->mte_lock);
>  			*cp = c->next;
> -			write_unlock_bh(&mrt_lock);
> +			write_unlock_bh(&e->mte_lock);
>  
>  			kmem_cache_free(mrt_cachep, c);
>  			return 0;
> @@ -726,67 +786,67 @@ static int ipmr_mfc_delete(struct mfcctl *mfc)
>  	return -ENOENT;
>  }
>  
> -static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock)
> -{
> +static int ipmr_mfc_add(struct mroute_table_entry* e, struct mfcctl *mfc, int mrtsock) {
>  	int line;
>  	struct mfc_cache *uc, *c, **cp;
>  
> -	line=MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
> +	line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
>  
> -	for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) {
> +	for (cp = &e->mfc_cache_array[line]; (c = *cp) != NULL; cp = &c->next) {
>  		if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
>  		    c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr)
>  			break;
>  	}
>  
>  	if (c != NULL) {
> -		write_lock_bh(&mrt_lock);
> +		write_lock_bh(&e->mte_lock);
>  		c->mfc_parent = mfc->mfcc_parent;
> -		ipmr_update_thresholds(c, mfc->mfcc_ttls);
> +		ipmr_update_thresholds(e, c, mfc->mfcc_ttls);
>  		if (!mrtsock)
>  			c->mfc_flags |= MFC_STATIC;
> -		write_unlock_bh(&mrt_lock);
> +		write_unlock_bh(&e->mte_lock);
>  		return 0;
>  	}
>  
>  	if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
>  		return -EINVAL;
>  
> -	c=ipmr_cache_alloc();
> -	if (c==NULL)
> +	c = ipmr_cache_alloc(e);
> +	if (c == NULL)
>  		return -ENOMEM;
>  
> -	c->mfc_origin=mfc->mfcc_origin.s_addr;
> -	c->mfc_mcastgrp=mfc->mfcc_mcastgrp.s_addr;
> -	c->mfc_parent=mfc->mfcc_parent;
> -	ipmr_update_thresholds(c, mfc->mfcc_ttls);
> +	c->mfc_origin = mfc->mfcc_origin.s_addr;
> +	c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
> +	c->mfc_parent = mfc->mfcc_parent;
> +	write_lock_bh(&e->mte_lock);
> +	ipmr_update_thresholds(e, c, mfc->mfcc_ttls);
>  	if (!mrtsock)
>  		c->mfc_flags |= MFC_STATIC;
>  
> -	write_lock_bh(&mrt_lock);
> -	c->next = mfc_cache_array[line];
> -	mfc_cache_array[line] = c;
> -	write_unlock_bh(&mrt_lock);
> +	c->next = e->mfc_cache_array[line];
> +	e->mfc_cache_array[line] = c;
> +	write_unlock_bh(&e->mte_lock);
>  
>  	/*
>  	 *	Check to see if we resolved a queued list. If so we
>  	 *	need to send on the frames and tidy up.
>  	 */
> -	spin_lock_bh(&mfc_unres_lock);
> -	for (cp = &mfc_unres_queue; (uc=*cp) != NULL;
> +	spin_lock_bh(&e->mfc_unres_lock);
> +	for (cp = &e->mfc_unres_queue; (uc=*cp) != NULL;
>  	     cp = &uc->next) {
>  		if (uc->mfc_origin == c->mfc_origin &&
>  		    uc->mfc_mcastgrp == c->mfc_mcastgrp) {
>  			*cp = uc->next;
> -			if (atomic_dec_and_test(&cache_resolve_queue_len))
> +			// TODO:  Busted, only del timer when *all* for all tables are resolved.
> +			if (atomic_dec_and_test(&e->cache_resolve_queue_len))
>  				del_timer(&ipmr_expire_timer);
>  			break;
>  		}
>  	}
> -	spin_unlock_bh(&mfc_unres_lock);
> +	spin_unlock_bh(&e->mfc_unres_lock);
>  
>  	if (uc) {
> -		ipmr_cache_resolve(uc, c);
> +		ipmr_cache_resolve(e, uc, c);
>  		kmem_cache_free(mrt_cachep, uc);
>  	}
>  	return 0;
> @@ -794,18 +854,18 @@ static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock)
>  
>  /*
>   *	Close the multicast socket, and clear the vif tables etc
> + *  Assumes RTNL is held.
>   */
>  
> -static void mroute_clean_tables(struct sock *sk)
> -{
> +static void mroute_clean_tables(struct mroute_table_entry* e, struct sock *sk) {
>  	int i;
>  
>  	/*
>  	 *	Shut down all active vif entries
>  	 */
> -	for (i=0; i<maxvif; i++) {
> -		if (!(vif_table[i].flags&VIFF_STATIC))
> -			vif_delete(i);
> +	for (i=0; i < e->maxvif; i++) {
> +		if (!(e->vif_table[i].flags&VIFF_STATIC))
> +			vif_delete(e, i);
>  	}
>  
>  	/*
> @@ -814,170 +874,244 @@ static void mroute_clean_tables(struct sock *sk)
>  	for (i=0;i<MFC_LINES;i++) {
>  		struct mfc_cache *c, **cp;
>  
> -		cp = &mfc_cache_array[i];
> +		cp = &e->mfc_cache_array[i];
>  		while ((c = *cp) != NULL) {
>  			if (c->mfc_flags&MFC_STATIC) {
>  				cp = &c->next;
>  				continue;
>  			}
> -			write_lock_bh(&mrt_lock);
> +			write_lock_bh(&e->mte_lock);
>  			*cp = c->next;
> -			write_unlock_bh(&mrt_lock);
> +			write_unlock_bh(&e->mte_lock);
>  
>  			kmem_cache_free(mrt_cachep, c);
>  		}
>  	}
>  
> -	if (atomic_read(&cache_resolve_queue_len) != 0) {
> +	if (atomic_read(&e->cache_resolve_queue_len) != 0) {
>  		struct mfc_cache *c;
>  
> -		spin_lock_bh(&mfc_unres_lock);
> -		while (mfc_unres_queue != NULL) {
> -			c = mfc_unres_queue;
> -			mfc_unres_queue = c->next;
> -			spin_unlock_bh(&mfc_unres_lock);
> +		spin_lock_bh(&e->mfc_unres_lock);
> +		while (e->mfc_unres_queue != NULL) {
> +			c = e->mfc_unres_queue;
> +			e->mfc_unres_queue = c->next;
> +			spin_unlock_bh(&e->mfc_unres_lock);
>  
> -			ipmr_destroy_unres(c);
> +			ipmr_destroy_unres(e, c);
>  
> -			spin_lock_bh(&mfc_unres_lock);
> +			spin_lock_bh(&e->mfc_unres_lock);
>  		}
> -		spin_unlock_bh(&mfc_unres_lock);
> +		spin_unlock_bh(&e->mfc_unres_lock);
>  	}
>  }
>  
> -static void mrtsock_destruct(struct sock *sk)
> -{
> +static void mrtsock_destruct(struct sock *sk){
> +	int i;
>  	rtnl_lock();
> -	if (sk == mroute_socket) {
> -		IPV4_DEVCONF_ALL(sk->sk_net, MC_FORWARDING)--;
>  
> -		write_lock_bh(&mrt_lock);
> -		mroute_socket=NULL;
> -		write_unlock_bh(&mrt_lock);
> -
> -		mroute_clean_tables(sk);
> +	for (i = 0; i<MROUTE_TABLE_HASH_MAX; i++) {
> +		struct list_head *li;
> +		struct list_head* tmp;
> +		struct mroute_table_entry* e;
> +		list_for_each_safe(li, tmp, &mroute_tables[i]) {
> +			e = list_entry(li , struct mroute_table_entry, list_member);
> +			if (sk == e->mroute_socket) {
> +				IPV4_DEVCONF_ALL(sk->sk_net, MC_FORWARDING)--;
> +				
> +				write_lock_bh(&e->mte_lock);
> +				e->mroute_socket = NULL;
> +				write_unlock_bh(&e->mte_lock);
> +				
> +				mroute_clean_tables(e, sk);
> +			}
> +		}
>  	}
>  	rtnl_unlock();
>  }
>  
> -/*
> - *	Socket options and virtual interface manipulation. The whole
> - *	virtual interface system is a complete heap, but unfortunately
> - *	that's how BSD mrouted happens to think. Maybe one day with a proper
> - *	MOSPF/PIM router set up we can clean this up.
> - */
> -
> -int ip_mroute_setsockopt(struct sock *sk,int optname,char __user *optval,int optlen)
> -{
> -	int ret;
> +int ip_mroute_setsockopt_helper(int table_entry_id, struct sock *sk,int optname,
> +				char __user *optval, int optlen) {
> +	int rv;
>  	struct vifctl vif;
>  	struct mfcctl mfc;
> +	struct mroute_table_entry* e;
> +
> +	printk("setsocktop: optname: %d  optlen: %d  table_id: %d  sizeof vif: %d\n",
> +	       optname, optlen, table_entry_id, sizeof(vif));
> +
> +	read_lock(&mrte_lock);
> +	
> +	e = find_mroute_table(table_entry_id);
>  
>  	if (optname != MRT_INIT) {
> -		if (sk != mroute_socket && !capable(CAP_NET_ADMIN))
> -			return -EACCES;
> +		if (!e) {
> +			read_unlock(&mrte_lock);
> +			rv = -ENODEV;
> +			return rv;
> +		}
> +		read_lock(&e->mte_lock);
> +		if (sk != e->mroute_socket && !capable(CAP_NET_ADMIN)) {
> +			write_unlock(&e->mte_lock);
> +			read_unlock(&mrte_lock);
> +			rv = -EACCES;
> +			return rv;
> +		}
> +		read_unlock(&e->mte_lock);
>  	}
> +	read_unlock(&mrte_lock);
>  
>  	switch (optname) {
>  	case MRT_INIT:
> +		printk("MRT_INIT\n");
>  		if (sk->sk_type != SOCK_RAW ||
> -		    inet_sk(sk)->num != IPPROTO_IGMP)
> -			return -EOPNOTSUPP;
> -		if (optlen!=sizeof(int))
> -			return -ENOPROTOOPT;
> -
> -		rtnl_lock();
> -		if (mroute_socket) {
> -			rtnl_unlock();
> -			return -EADDRINUSE;
> +		    inet_sk(sk)->num != IPPROTO_IGMP) {
> +			rv = -EOPNOTSUPP;
>  		}
> -
> -		ret = ip_ra_control(sk, 1, mrtsock_destruct);
> -		if (ret == 0) {
> -			write_lock_bh(&mrt_lock);
> -			mroute_socket=sk;
> -			write_unlock_bh(&mrt_lock);
> -
> -			IPV4_DEVCONF_ALL(sk->sk_net, MC_FORWARDING)++;
> +		else if (optlen < sizeof(int)) {
> +			rv = -ENOPROTOOPT;
>  		}
> -		rtnl_unlock();
> -		return ret;
> +		else {
> +			if (!e) {
> +				e = create_mroute_table(table_entry_id);
> +				
> +				if (!e) {
> +					return -ENODEV;
> +				}
> +				rtnl_lock();
> +				write_lock(&mrte_lock);
> +				add_mroute_table(e);
> +				write_unlock(&mrte_lock);
> +				rtnl_unlock();
> +			}
> +			if (e->mroute_socket) {
> +				rv = -EADDRINUSE;
> +			}
> +			else {
> +				rv = ip_ra_control(sk, 1, mrtsock_destruct);
> +				if (rv == 0) {
> +					write_lock(&e->mte_lock);
> +					e->mroute_socket = sk;
> +					write_unlock(&e->mte_lock);
> +					
> +					IPV4_DEVCONF_ALL(sk->sk_net, MC_FORWARDING)++;
> +				}
> +			}
> +		}
> +		break;
> +	
>  	case MRT_DONE:
> -		if (sk!=mroute_socket)
> -			return -EACCES;
> -		return ip_ra_control(sk, 0, NULL);
> +		printk("MRT_DONE\n");
> +		if (sk != e->mroute_socket) {
> +			rv = -EACCES;
> +		}
> +		else {
> +			rv = ip_ra_control(sk, 0, NULL);
> +			return rv;
> +		}
> +		break;
> +		
>  	case MRT_ADD_VIF:
>  	case MRT_DEL_VIF:
> -		if (optlen!=sizeof(vif))
> -			return -EINVAL;
> -		if (copy_from_user(&vif,optval,sizeof(vif)))
> -			return -EFAULT;
> -		if (vif.vifc_vifi >= MAXVIFS)
> -			return -ENFILE;
> -		rtnl_lock();
> -		if (optname==MRT_ADD_VIF) {
> -			ret = vif_add(&vif, sk==mroute_socket);
> -		} else {
> -			ret = vif_delete(vif.vifc_vifi);
> +		if (optname == MRT_ADD_VIF) 
> +			printk("MRT_ADD_VIF\n");
> +		else
> +			printk("MRT_DEL_VIF\n");
> +		if (optlen < sizeof(vif)) {
> +			rv = -EINVAL;
>  		}
> -		rtnl_unlock();
> -		return ret;
> -
> +		else if (copy_from_user(&vif, optval, sizeof(vif))) {
> +			rv = -EFAULT;
> +		}
> +		else if (vif.vifc_vifi >= MAXVIFS) {
> +			rv = -ENFILE;
> +		}
> +		else {
> +			rtnl_lock();
> +			write_lock(&mrte_lock);
> +			if (optname == MRT_ADD_VIF) {
> +				rv = vif_add(e, &vif, sk == e->mroute_socket);
> +			} else {
> +				rv = vif_delete(e, vif.vifc_vifi);
> +			}
> +			write_unlock(&mrte_lock);
> +			rtnl_unlock();
> +		}
> +		return rv;
> +		
>  		/*
>  		 *	Manipulate the forwarding caches. These live
>  		 *	in a sort of kernel/user symbiosis.
>  		 */
>  	case MRT_ADD_MFC:
>  	case MRT_DEL_MFC:
> -		if (optlen!=sizeof(mfc))
> -			return -EINVAL;
> -		if (copy_from_user(&mfc,optval, sizeof(mfc)))
> -			return -EFAULT;
> -		rtnl_lock();
> -		if (optname==MRT_DEL_MFC)
> -			ret = ipmr_mfc_delete(&mfc);
> -		else
> -			ret = ipmr_mfc_add(&mfc, sk==mroute_socket);
> -		rtnl_unlock();
> -		return ret;
> +		printk("MRT_ADD or DEL_MFC\n");
> +		if (optlen < sizeof(mfc)) {
> +			rv = -EINVAL;
> +		}
> +		else if (copy_from_user(&mfc,optval, sizeof(mfc))) {
> +			rv = -EFAULT;
> +		}
> +		else {
> +			rtnl_lock();
> +			write_lock(&mrte_lock);
> +			if (optname == MRT_DEL_MFC)
> +				rv = ipmr_mfc_delete(e, &mfc);
> +			else
> +				rv = ipmr_mfc_add(e, &mfc, sk == e->mroute_socket);
> +			/* drop the locks here..and re-acquire below as needed. */
> +			write_unlock(&mrte_lock);
> +			rtnl_unlock();			
> +		}
> +		return rv;
> +		
>  		/*
>  		 *	Control PIM assert.
>  		 */
>  	case MRT_ASSERT:
>  	{
>  		int v;
> -		if (get_user(v,(int __user *)optval))
> -			return -EFAULT;
> -		mroute_do_assert=(v)?1:0;
> -		return 0;
> +		printk("MRT_ASSERT\n");
> +		if (get_user(v,(int __user *)optval)) {
> +			rv = -EFAULT;
> +		}
> +		else {
> +			rv = 0;
> +			e->mroute_do_assert = (v) ? 1 : 0;
> +		}
> +		break;
>  	}
>  #ifdef CONFIG_IP_PIMSM
>  	case MRT_PIM:
>  	{
>  		int v;
> -
> -		if (get_user(v,(int __user *)optval))
> -			return -EFAULT;
> -		v = (v) ? 1 : 0;
> -
> -		rtnl_lock();
> -		ret = 0;
> -		if (v != mroute_do_pim) {
> -			mroute_do_pim = v;
> -			mroute_do_assert = v;
> +		printk("MRT_PIM\n");
> +		if (get_user(v,(int __user *)optval)) {
> +			rv = -EFAULT;
> +		}
> +		else {
> +			v = (v) ? 1 : 0;
> +
> +			rv = 0;
> +			if (v != e->mroute_do_pim) {
> +				e->mroute_do_pim = v;
> +				e->mroute_do_assert = v;
> +				if (v)
> +					mroute_pim_cnt++;
> +				else
> +					mroute_pim_cnt--;
>  #ifdef CONFIG_IP_PIMSM_V2
> -			if (mroute_do_pim)
> -				ret = inet_add_protocol(&pim_protocol,
> -							IPPROTO_PIM);
> -			else
> -				ret = inet_del_protocol(&pim_protocol,
> -							IPPROTO_PIM);
> -			if (ret < 0)
> -				ret = -EAGAIN;
> +				if (v && (mroute_pim_cnt == 1))
> +					rv = inet_add_protocol(&pim_protocol,
> +								IPPROTO_PIM);
> +				else if ((!v) && (mroute_pim_cnt == 0))
> +					rv = inet_del_protocol(&pim_protocol,
> +								IPPROTO_PIM);
> +				if (rv < 0)
> +					rv = -EAGAIN;
>  #endif
> +			}
>  		}
> -		rtnl_unlock();
> -		return ret;
> +		break;
>  	}
>  #endif
>  	/*
> @@ -985,19 +1119,77 @@ int ip_mroute_setsockopt(struct sock *sk,int optname,char __user *optval,int opt
>  	 *	set.
>  	 */
>  	default:
> -		return -ENOPROTOOPT;
> +		rv = -ENOPROTOOPT;
> +	}
> +	return rv;
> +}
> +
> +/*
> + *	Socket options and virtual interface manipulation. The whole
> + *	virtual interface system is a complete heap, but unfortunately
> + *	that's how BSD mrouted happens to think. Maybe one day with a proper
> + *	MOSPF/PIM router set up we can clean this up.
> + */
> +
> +int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int optlen) {
> +	struct vifctl_ng vif_ng;
> +	struct mrt_sockopt_simple tmp;
> +	struct mfcctl_ng mfc_ng;
> +	int tbl_entry_id = DFLT_MROUTE_TBL;
> +
> +	printk("setsocktop: optname: %d  optlen: %d\n", optname, optlen);
> +
> +	switch (optname) {
> +	case MRT_INIT:
> +	case MRT_DONE:
> +	case MRT_ASSERT:
> +#ifdef CONFIG_IP_PIMSM
> +	case MRT_PIM:
> +#endif
> +		if (optlen == sizeof(struct mrt_sockopt_simple)) {
> +			if (copy_from_user(&tmp, optval, sizeof(tmp)))
> +				return -EFAULT;
> +			tbl_entry_id = tmp.table_id;
> +		}
> +		break;
> +
> +	case MRT_ADD_VIF:
> +	case MRT_DEL_VIF:
> +		if (optlen == sizeof(vif_ng)) {
> +			if (copy_from_user(&vif_ng, optval, sizeof(vif_ng)))
> +				return -EFAULT;
> +			tbl_entry_id = vif_ng.table_id;
> +		}
> +		break;
> +		
> +	case MRT_ADD_MFC:
> +	case MRT_DEL_MFC:
> +		if (optlen == sizeof(mfc_ng)) {
> +			if (copy_from_user(&mfc_ng, optval, sizeof(mfc_ng)))
> +				return -EFAULT;
> +			tbl_entry_id = mfc_ng.table_id;
> +		}
> +		break;
> +	default:
> +		break;
>  	}
> +
> +	return ip_mroute_setsockopt_helper(tbl_entry_id, sk, optname, optval, optlen);
>  }
>  
>  /*
>   *	Getsock opt support for the multicast routing system.
>   */
>  
> -int ip_mroute_getsockopt(struct sock *sk,int optname,char __user *optval,int __user *optlen)
> -{
> +int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen) {
>  	int olr;
> +	struct mrt_sockopt_simple tmp;
>  	int val;
> +	unsigned int table_id = DFLT_MROUTE_TBL;
> +	struct mroute_table_entry* e;
>  
> +	printk("ip_mroute_getsocktopt, optname: %d\n", optname);
> +	
>  	if (optname!=MRT_VERSION &&
>  #ifdef CONFIG_IP_PIMSM
>  	   optname!=MRT_PIM &&
> @@ -1008,22 +1200,44 @@ int ip_mroute_getsockopt(struct sock *sk,int optname,char __user *optval,int __u
>  	if (get_user(olr, optlen))
>  		return -EFAULT;
>  
> -	olr = min_t(unsigned int, olr, sizeof(int));
> +	olr = min_t(unsigned int, olr, sizeof(struct mrt_sockopt_simple));
>  	if (olr < 0)
>  		return -EINVAL;
>  
> +	if (olr == sizeof(tmp)) {
> +		if (copy_from_user(&tmp, optval, sizeof(tmp)))
> +			return -EFAULT;
> +		table_id = tmp.table_id;
> +	}
> +	
>  	if (put_user(olr,optlen))
>  		return -EFAULT;
> -	if (optname==MRT_VERSION)
> -		val=0x0305;
> +	
> +	rtnl_lock();
> +	e = find_mroute_table(table_id);
> +	if (!e) {
> +		rtnl_unlock();
> +		return -ENODEV;
> +	}
> +	if (optname == MRT_VERSION)
> +		val = 0x0305;
>  #ifdef CONFIG_IP_PIMSM
>  	else if (optname==MRT_PIM)
> -		val=mroute_do_pim;
> +		val = e->mroute_do_pim;
>  #endif
>  	else
> -		val=mroute_do_assert;
> -	if (copy_to_user(optval,&val,olr))
> -		return -EFAULT;
> +		val = e->mroute_do_assert;
> +	rtnl_unlock();
> +	
> +	if (olr == sizeof(tmp)) {
> +		tmp.optval = val;
> +		if (copy_to_user(optval, &tmp, olr))
> +			return -EFAULT;
> +	}
> +	else {
> +		if (copy_to_user(optval,&val,olr))
> +			return -EFAULT;
> +	}
>  	return 0;
>  }
>  
> @@ -1031,51 +1245,131 @@ int ip_mroute_getsockopt(struct sock *sk,int optname,char __user *optval,int __u
>   *	The IP multicast ioctl support routines.
>   */
>  
> -int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
> -{
> +int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg) {
>  	struct sioc_sg_req sr;
>  	struct sioc_vif_req vr;
> +	struct sioc_sg_req_ng sr_ng;
> +	struct sioc_vif_req_ng vr_ng;
>  	struct vif_device *vif;
>  	struct mfc_cache *c;
> +	struct mroute_table_entry* e;
>  
> +	printk("ipmr_ioctl, cmd: %d\n", cmd);
> +	
>  	switch (cmd) {
>  	case SIOCGETVIFCNT:
> -		if (copy_from_user(&vr,arg,sizeof(vr)))
> +		if (copy_from_user(&vr, arg, sizeof(vr)))
> +			return -EFAULT;
> +		
> +		read_lock(&mrte_lock);
> +		e = find_mroute_table(DFLT_MROUTE_TBL);
> +		if (!e) {
> +			read_unlock(&mrte_lock);
> +			return -ENODEV;
> +		}
> +		else {
> +			if (vr.vifi >= e->maxvif) {
> +				read_unlock(&mrte_lock);
> +				return -EINVAL;
> +			}
> +			
> +			read_lock(&e->mte_lock);
> +			vif = &e->vif_table[vr.vifi];
> +			if (VIF_EXISTS(e, vr.vifi))	{
> +				vr.icount = vif->pkt_in;
> +				vr.ocount = vif->pkt_out;
> +				vr.ibytes = vif->bytes_in;
> +				vr.obytes = vif->bytes_out;
> +				read_unlock(&e->mte_lock);
> +				read_unlock(&mrte_lock);		
> +
> +				if (copy_to_user(arg, &vr, sizeof(vr)))
> +					return -EFAULT;
> +				return 0;
> +			}
> +		}
> +		read_unlock(&mrte_lock);
> +		return -EADDRNOTAVAIL;
> +		
> +	case SIOCGETSGCNT:
> +		if (copy_from_user(&sr,arg,sizeof(sr)))
> +			return -EFAULT;
> +
> +		read_lock(&mrte_lock);
> +		e = find_mroute_table(DFLT_MROUTE_TBL);
> +		if (!e) {
> +			read_unlock(&mrte_lock);
> +			return -ENODEV;
> +		}
> +		else {
> +			c = ipmr_cache_find(e, sr.src.s_addr, sr.grp.s_addr);
> +			if (c) {
> +				sr.pktcnt = c->mfc_un.res.pkt;
> +				sr.bytecnt = c->mfc_un.res.bytes;
> +				sr.wrong_if = c->mfc_un.res.wrong_if;
> +				read_unlock(&mrte_lock);
> +
> +				if (copy_to_user(arg, &sr, sizeof(sr)))
> +					return -EFAULT;
> +				return 0;
> +			}
> +		}
> +		read_unlock(&mrte_lock);
> +		return -EADDRNOTAVAIL;
> +
> +		
> +	case SIOCGETVIFCNT_NG:
> +		if (copy_from_user(&vr_ng,arg,sizeof(vr_ng)))
>  			return -EFAULT;
> -		if (vr.vifi>=maxvif)
> +		
> +		read_lock(&mrte_lock);
> +		e = find_mroute_table(vr_ng.table_id);
> +		if (!e) {
> +			read_unlock(&mrte_lock);
> +			return -ENODEV;
> +		}
> +		if (vr_ng.vif.vifi >= e->maxvif) {
> +			read_unlock(&mrte_lock);
>  			return -EINVAL;
> -		read_lock(&mrt_lock);
> -		vif=&vif_table[vr.vifi];
> -		if (VIF_EXISTS(vr.vifi))	{
> -			vr.icount=vif->pkt_in;
> -			vr.ocount=vif->pkt_out;
> -			vr.ibytes=vif->bytes_in;
> -			vr.obytes=vif->bytes_out;
> -			read_unlock(&mrt_lock);
> -
> -			if (copy_to_user(arg,&vr,sizeof(vr)))
> +		}
> +		if (VIF_EXISTS(e, vr_ng.vif.vifi))	{
> +			vif = &e->vif_table[vr_ng.vif.vifi];
> +			vr_ng.vif.icount = vif->pkt_in;
> +			vr_ng.vif.ocount = vif->pkt_out;
> +			vr_ng.vif.ibytes = vif->bytes_in;
> +			vr_ng.vif.obytes = vif->bytes_out;
> +			read_unlock(&mrte_lock);
> +			
> +			if (copy_to_user(arg, &vr_ng, sizeof(vr_ng)))
>  				return -EFAULT;
>  			return 0;
>  		}
> -		read_unlock(&mrt_lock);
> +		read_unlock(&mrte_lock);
>  		return -EADDRNOTAVAIL;
> -	case SIOCGETSGCNT:
> -		if (copy_from_user(&sr,arg,sizeof(sr)))
> +		
> +	case SIOCGETSGCNT_NG:
> +		if (copy_from_user(&sr_ng, arg, sizeof(sr_ng)))
>  			return -EFAULT;
>  
> -		read_lock(&mrt_lock);
> -		c = ipmr_cache_find(sr.src.s_addr, sr.grp.s_addr);
> +		read_lock(&mrte_lock);
> +		e = find_mroute_table(sr_ng.table_id);
> +		if (!e) {
> +			read_unlock(&mrte_lock);
> +			return -ENODEV;
> +		}
> +		
> +		c = ipmr_cache_find(e, sr.src.s_addr, sr.grp.s_addr);
>  		if (c) {
> -			sr.pktcnt = c->mfc_un.res.pkt;
> -			sr.bytecnt = c->mfc_un.res.bytes;
> -			sr.wrong_if = c->mfc_un.res.wrong_if;
> -			read_unlock(&mrt_lock);
> +			sr_ng.req.pktcnt = c->mfc_un.res.pkt;
> +			sr_ng.req.bytecnt = c->mfc_un.res.bytes;
> +			sr_ng.req.wrong_if = c->mfc_un.res.wrong_if;
> +			read_unlock(&mrte_lock);
>  
>  			if (copy_to_user(arg,&sr,sizeof(sr)))
>  				return -EFAULT;
>  			return 0;
>  		}
> -		read_unlock(&mrt_lock);
> +		read_unlock(&mrte_lock);
>  		return -EADDRNOTAVAIL;
>  	default:
>  		return -ENOIOCTLCMD;
> @@ -1083,22 +1377,34 @@ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
>  }
>  
>  
> +/** RTNL is held when this method is called. */
>  static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
>  {
>  	struct net_device *dev = ptr;
> -	struct vif_device *v;
>  	int ct;
> +	int i;
>  
>  	if (dev->nd_net != &init_net)
>  		return NOTIFY_DONE;
>  
>  	if (event != NETDEV_UNREGISTER)
>  		return NOTIFY_DONE;
> -	v=&vif_table[0];
> -	for (ct=0;ct<maxvif;ct++,v++) {
> -		if (v->dev==dev)
> -			vif_delete(ct);
> +
> +	for (i = 0; i<MROUTE_TABLE_HASH_MAX; i++) {
> +		struct list_head *li;
> +		struct list_head* tmp;
> +		list_for_each_safe(li, tmp, &mroute_tables[i]) {
> +			struct mroute_table_entry* e;
> +			e = list_entry(li , struct mroute_table_entry, list_member);
> +			write_lock(&e->mte_lock);
> +			for (ct = 0; ct < e->maxvif; ct++) {
> +				if (e->vif_table[ct].dev == dev)
> +					vif_delete(e, ct);
> +			}
> +			write_unlock(&e->mte_lock);
> +		}
>  	}
> +	dev->mrt_entry = NULL;
>  	return NOTIFY_DONE;
>  }
>  
> @@ -1155,10 +1461,10 @@ static inline int ipmr_forward_finish(struct sk_buff *skb)
>   *	Processing handlers for ipmr_forward
>   */
>  
> -static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
> -{
> +static void ipmr_queue_xmit(struct mroute_table_entry* e, struct sk_buff *skb, struct mfc_cache *c,
> +			    int vifi) {
>  	const struct iphdr *iph = ip_hdr(skb);
> -	struct vif_device *vif = &vif_table[vifi];
> +	struct vif_device *vif = &e->vif_table[vifi];
>  	struct net_device *dev;
>  	struct rtable *rt;
>  	int    encap = 0;
> @@ -1172,7 +1478,7 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
>  		vif->bytes_out+=skb->len;
>  		((struct net_device_stats*)netdev_priv(vif->dev))->tx_bytes += skb->len;
>  		((struct net_device_stats*)netdev_priv(vif->dev))->tx_packets++;
> -		ipmr_cache_report(skb, vifi, IGMPMSG_WHOLEPKT);
> +		ipmr_cache_report(e, skb, vifi, IGMPMSG_WHOLEPKT);
>  		kfree_skb(skb);
>  		return;
>  	}
> @@ -1256,11 +1562,10 @@ out_free:
>  	return;
>  }
>  
> -static int ipmr_find_vif(struct net_device *dev)
> -{
> +static int ipmr_find_vif(struct mroute_table_entry* e, struct net_device *dev) {
>  	int ct;
> -	for (ct=maxvif-1; ct>=0; ct--) {
> -		if (vif_table[ct].dev == dev)
> +	for (ct = e->maxvif - 1; ct >= 0; ct--) {
> +		if (e->vif_table[ct].dev == dev)
>  			break;
>  	}
>  	return ct;
> @@ -1268,7 +1573,7 @@ static int ipmr_find_vif(struct net_device *dev)
>  
>  /* "local" means that we should preserve one skb (for local delivery) */
>  
> -static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local)
> +static int ip_mr_forward(struct mroute_table_entry* e, struct sk_buff *skb, struct mfc_cache *cache, int local)
>  {
>  	int psend = -1;
>  	int vif, ct;
> @@ -1280,7 +1585,7 @@ static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local
>  	/*
>  	 * Wrong interface: drop packet and (maybe) send PIM assert.
>  	 */
> -	if (vif_table[vif].dev != skb->dev) {
> +	if (e->vif_table[vif].dev != skb->dev) {
>  		int true_vifi;
>  
>  		if (((struct rtable*)skb->dst)->fl.iif == 0) {
> @@ -1299,25 +1604,25 @@ static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local
>  		}
>  
>  		cache->mfc_un.res.wrong_if++;
> -		true_vifi = ipmr_find_vif(skb->dev);
> +		true_vifi = ipmr_find_vif(e, skb->dev);
>  
> -		if (true_vifi >= 0 && mroute_do_assert &&
> +		if (true_vifi >= 0 && e->mroute_do_assert &&
>  		    /* pimsm uses asserts, when switching from RPT to SPT,
>  		       so that we cannot check that packet arrived on an oif.
>  		       It is bad, but otherwise we would need to move pretty
>  		       large chunk of pimd to kernel. Ough... --ANK
>  		     */
> -		    (mroute_do_pim || cache->mfc_un.res.ttls[true_vifi] < 255) &&
> +		    (e->mroute_do_pim || cache->mfc_un.res.ttls[true_vifi] < 255) &&
>  		    time_after(jiffies,
>  			       cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
>  			cache->mfc_un.res.last_assert = jiffies;
> -			ipmr_cache_report(skb, true_vifi, IGMPMSG_WRONGVIF);
> +			ipmr_cache_report(e, skb, true_vifi, IGMPMSG_WRONGVIF);
>  		}
>  		goto dont_forward;
>  	}
>  
> -	vif_table[vif].pkt_in++;
> -	vif_table[vif].bytes_in+=skb->len;
> +	e->vif_table[vif].pkt_in++;
> +	e->vif_table[vif].bytes_in+=skb->len;
>  
>  	/*
>  	 *	Forward the frame
> @@ -1327,7 +1632,7 @@ static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local
>  			if (psend != -1) {
>  				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
>  				if (skb2)
> -					ipmr_queue_xmit(skb2, cache, psend);
> +					ipmr_queue_xmit(e, skb2, cache, psend);
>  			}
>  			psend=ct;
>  		}
> @@ -1336,9 +1641,9 @@ static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local
>  		if (local) {
>  			struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
>  			if (skb2)
> -				ipmr_queue_xmit(skb2, cache, psend);
> +				ipmr_queue_xmit(e, skb2, cache, psend);
>  		} else {
> -			ipmr_queue_xmit(skb, cache, psend);
> +			ipmr_queue_xmit(e, skb, cache, psend);
>  			return 0;
>  		}
>  	}
> @@ -1357,12 +1662,13 @@ dont_forward:
>  int ip_mr_input(struct sk_buff *skb)
>  {
>  	struct mfc_cache *cache;
> -	int local = ((struct rtable*)skb->dst)->rt_flags&RTCF_LOCAL;
> +	int local = ((struct rtable*)skb->dst)->rt_flags & RTCF_LOCAL;
> +	struct mroute_table_entry* e = NULL;
>  
>  	/* Packet is looped back after forward, it should not be
>  	   forwarded second time, but still can be delivered locally.
>  	 */
> -	if (IPCB(skb)->flags&IPSKB_FORWARDED)
> +	if (IPCB(skb)->flags & IPSKB_FORWARDED)
>  		goto dont_forward;
>  
>  	if (!local) {
> @@ -1376,51 +1682,65 @@ int ip_mr_input(struct sk_buff *skb)
>  			       groups. It is very bad, because it means
>  			       that we can forward NO IGMP messages.
>  			     */
> -			    read_lock(&mrt_lock);
> -			    if (mroute_socket) {
> -				    nf_reset(skb);
> -				    raw_rcv(mroute_socket, skb);
> -				    read_unlock(&mrt_lock);
> -				    return 0;
> +			    read_lock(&mrte_lock);
> +			    e = skb->dev->mrt_entry;
> +			    if (!e) {
> +				    printk("WARNING:  Device %s has NULL mrt_entry in ip_mr_input.\n",
> +					   skb->dev->name);
> +			    }
> +			    else {
> +				    if (e->mroute_socket) {
> +					    nf_reset(skb);
> +					    raw_rcv(e->mroute_socket, skb);
> +					    read_unlock(&mrte_lock);
> +					    return 0;
> +				    }
>  			    }
> -			    read_unlock(&mrt_lock);
> +			    read_unlock(&mrte_lock);
>  		    }
>  	}
>  
> -	read_lock(&mrt_lock);
> -	cache = ipmr_cache_find(ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
> +	read_lock(&mrte_lock);
> +	e = skb->dev->mrt_entry;
> +	if (!e) {
> +		printk("WARNING:  Device %s has NULL mrt_entry in ip_mr_input(2).\n",
> +		       skb->dev->name);
> +		read_unlock(&mrte_lock);
> +		kfree_skb(skb);
> +		return -ENODEV;
> +	}
> +	
> +	cache = ipmr_cache_find(e, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
>  
>  	/*
>  	 *	No usable cache entry
>  	 */
>  	if (cache==NULL) {
>  		int vif;
> -
>  		if (local) {
>  			struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
>  			ip_local_deliver(skb);
>  			if (skb2 == NULL) {
> -				read_unlock(&mrt_lock);
> +				read_unlock(&mrte_lock);
>  				return -ENOBUFS;
>  			}
>  			skb = skb2;
>  		}
>  
> -		vif = ipmr_find_vif(skb->dev);
> +		vif = ipmr_find_vif(e, skb->dev);
>  		if (vif >= 0) {
> -			int err = ipmr_cache_unresolved(vif, skb);
> -			read_unlock(&mrt_lock);
> -
> +			int err = ipmr_cache_unresolved(e, vif, skb);
> +			read_unlock(&mrte_lock);
>  			return err;
>  		}
> -		read_unlock(&mrt_lock);
> +		read_unlock(&mrte_lock);
>  		kfree_skb(skb);
>  		return -ENODEV;
>  	}
>  
> -	ip_mr_forward(skb, cache, local);
> +	ip_mr_forward(e, skb, cache, local);
>  
> -	read_unlock(&mrt_lock);
> +	read_unlock(&mrte_lock);
>  
>  	if (local)
>  		return ip_local_deliver(skb);
> @@ -1439,21 +1759,29 @@ dont_forward:
>   * Handle IGMP messages of PIMv1
>   */
>  
> -int pim_rcv_v1(struct sk_buff * skb)
> -{
> +int pim_rcv_v1(struct sk_buff * skb) {
>  	struct igmphdr *pim;
>  	struct iphdr   *encap;
>  	struct net_device  *reg_dev = NULL;
> -
> +	struct mroute_table_entry* e;
> +	
>  	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
>  		goto drop;
>  
>  	pim = igmp_hdr(skb);
>  
> -	if (!mroute_do_pim ||
> +	read_lock(&mrte_lock);
> +	e = skb->dev->mrt_entry;
> +	if (!e) {
> +		printk("WARNING:  Device %s has NULL mrt_entry in pim_rcv_v1\n",
> +		       skb->dev->name);
> +		goto drop_unlock;
> +	}
> +
> +	if (!e->mroute_do_pim ||
>  	    skb->len < sizeof(*pim) + sizeof(*encap) ||
>  	    pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
> -		goto drop;
> +		goto drop_unlock;
>  
>  	encap = (struct iphdr *)(skb_transport_header(skb) +
>  				 sizeof(struct igmphdr));
> @@ -1466,14 +1794,15 @@ int pim_rcv_v1(struct sk_buff * skb)
>  	if (!ipv4_is_multicast(encap->daddr) ||
>  	    encap->tot_len == 0 ||
>  	    ntohs(encap->tot_len) + sizeof(*pim) > skb->len)
> -		goto drop;
> +		goto drop_unlock;
>  
> -	read_lock(&mrt_lock);
> -	if (reg_vif_num >= 0)
> -		reg_dev = vif_table[reg_vif_num].dev;
> +	read_lock(&e->mte_lock);
> +	if (e->reg_vif_num >= 0)
> +		reg_dev = e->vif_table[e->reg_vif_num].dev;
>  	if (reg_dev)
>  		dev_hold(reg_dev);
> -	read_unlock(&mrt_lock);
> +	read_unlock(&e->mte_lock);
> +	read_unlock(&mrte_lock);
>  
>  	if (reg_dev == NULL)
>  		goto drop;
> @@ -1493,6 +1822,9 @@ int pim_rcv_v1(struct sk_buff * skb)
>  	netif_rx(skb);
>  	dev_put(reg_dev);
>  	return 0;
> +	
> + drop_unlock:
> +	read_unlock(&mrte_lock);
>   drop:
>  	kfree_skb(skb);
>  	return 0;
> @@ -1500,11 +1832,11 @@ int pim_rcv_v1(struct sk_buff * skb)
>  #endif
>  
>  #ifdef CONFIG_IP_PIMSM_V2
> -static int pim_rcv(struct sk_buff * skb)
> -{
> +static int pim_rcv(struct sk_buff * skb) {
>  	struct pimreghdr *pim;
>  	struct iphdr   *encap;
>  	struct net_device  *reg_dev = NULL;
> +	struct mroute_table_entry* e;
>  
>  	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
>  		goto drop;
> @@ -1524,13 +1856,22 @@ static int pim_rcv(struct sk_buff * skb)
>  	    ntohs(encap->tot_len) + sizeof(*pim) > skb->len)
>  		goto drop;
>  
> -	read_lock(&mrt_lock);
> -	if (reg_vif_num >= 0)
> -		reg_dev = vif_table[reg_vif_num].dev;
> +	read_lock(&mrte_lock);
> +	e = skb->dev->mrt_entry;
> +	if (!e) {
> +		printk("WARNING:  Device %s has NULL mrt_entry in pim_rcv\n",
> +		       skb->dev->name);
> +		goto drop_unlock;
> +	}
> +
> +	read_lock(&e->mte_lock);
> +	if (e->reg_vif_num >= 0)
> +		reg_dev = e->vif_table[e->reg_vif_num].dev;
>  	if (reg_dev)
>  		dev_hold(reg_dev);
> -	read_unlock(&mrt_lock);
> -
> +	read_unlock(&e->mte_lock);
> +	read_unlock(&mrte_lock);
> +	
>  	if (reg_dev == NULL)
>  		goto drop;
>  
> @@ -1549,18 +1890,21 @@ static int pim_rcv(struct sk_buff * skb)
>  	netif_rx(skb);
>  	dev_put(reg_dev);
>  	return 0;
> +	
> +drop_unlock:
> +	read_unlock(&mrte_lock);
>   drop:
>  	kfree_skb(skb);
>  	return 0;
>  }
>  #endif
>  
> -static int
> -ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
> -{
> +/** Hold table read lock so that vifs can't change */
> +static int ipmr_fill_mroute(struct mroute_table_entry* e, struct sk_buff *skb, struct mfc_cache *c,
> +			    struct rtmsg *rtm) {
>  	int ct;
>  	struct rtnexthop *nhp;
> -	struct net_device *dev = vif_table[c->mfc_parent].dev;
> +	struct net_device *dev = e->vif_table[c->mfc_parent].dev;
>  	u8 *b = skb_tail_pointer(skb);
>  	struct rtattr *mp_head;
>  
> @@ -1576,7 +1920,7 @@ ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
>  			nhp = (struct rtnexthop*)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
>  			nhp->rtnh_flags = 0;
>  			nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
> -			nhp->rtnh_ifindex = vif_table[ct].dev->ifindex;
> +			nhp->rtnh_ifindex = e->vif_table[ct].dev->ifindex;
>  			nhp->rtnh_len = sizeof(*nhp);
>  		}
>  	}
> @@ -1590,14 +1934,29 @@ rtattr_failure:
>  	return -EMSGSIZE;
>  }
>  
> -int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
> -{
> +int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait) {
>  	int err;
>  	struct mfc_cache *cache;
>  	struct rtable *rt = (struct rtable*)skb->dst;
> +	struct mroute_table_entry* e;
> +	struct net_device* iif_dev = dev_get_by_index(&init_net, rt->fl.iif);
> +
> +	if (!iif_dev) {
> +		return -ENODEV;
> +	}
> +	read_lock(&mrte_lock);
> +	e = iif_dev->mrt_entry;
> +	
> +	if (!e) {
> +		printk("WARNING:  Device %s has null mrt_entry in ipmr_get_route.\n",
> +		       iif_dev->name);
> +		dev_put(iif_dev);
> +		read_unlock(&mrte_lock);
> +		return -ENODEV;
> +	}
> +	dev_put(iif_dev);
>  
> -	read_lock(&mrt_lock);
> -	cache = ipmr_cache_find(rt->rt_src, rt->rt_dst);
> +	cache = ipmr_cache_find(e, rt->rt_src, rt->rt_dst);
>  
>  	if (cache==NULL) {
>  		struct sk_buff *skb2;
> @@ -1606,18 +1965,18 @@ int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
>  		int vif;
>  
>  		if (nowait) {
> -			read_unlock(&mrt_lock);
> +			read_unlock(&mrte_lock);
>  			return -EAGAIN;
>  		}
>  
>  		dev = skb->dev;
> -		if (dev == NULL || (vif = ipmr_find_vif(dev)) < 0) {
> -			read_unlock(&mrt_lock);
> +		if (dev == NULL || (vif = ipmr_find_vif(e, dev)) < 0) {
> +			read_unlock(&mrte_lock);
>  			return -ENODEV;
>  		}
>  		skb2 = skb_clone(skb, GFP_ATOMIC);
>  		if (!skb2) {
> -			read_unlock(&mrt_lock);
> +			read_unlock(&mrte_lock);
>  			return -ENOMEM;
>  		}
>  
> @@ -1628,15 +1987,17 @@ int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
>  		iph->saddr = rt->rt_src;
>  		iph->daddr = rt->rt_dst;
>  		iph->version = 0;
> -		err = ipmr_cache_unresolved(vif, skb2);
> -		read_unlock(&mrt_lock);
> +		err = ipmr_cache_unresolved(e, vif, skb2);
> +		read_unlock(&mrte_lock);
>  		return err;
>  	}
>  
>  	if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
>  		cache->mfc_flags |= MFC_NOTIFY;
> -	err = ipmr_fill_mroute(skb, cache, rtm);
> -	read_unlock(&mrt_lock);
> +	read_lock(&e->mte_lock);
> +	err = ipmr_fill_mroute(e, skb, cache, rtm);
> +	read_unlock(&e->mte_lock);
> +	read_unlock(&mrte_lock);
>  	return err;
>  }
>  
> @@ -1645,25 +2006,53 @@ int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
>   *	The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
>   */
>  struct ipmr_vif_iter {
> +	int bucket;
> +	unsigned int table_id;
>  	int ct;
>  };
>  
>  static struct vif_device *ipmr_vif_seq_idx(struct ipmr_vif_iter *iter,
>  					   loff_t pos)
>  {
> -	for (iter->ct = 0; iter->ct < maxvif; ++iter->ct) {
> -		if (!VIF_EXISTS(iter->ct))
> -			continue;
> -		if (pos-- == 0)
> -			return &vif_table[iter->ct];
> +	int i;
> +	iter->ct = 0;
> +	for (i = 0; i<MROUTE_TABLE_HASH_MAX; i++) {
> +		struct list_head *li;
> +		struct list_head* tmp;
> +		list_for_each_safe(li, tmp, &mroute_tables[i]) {
> +			int q;
> +			struct mroute_table_entry* e;
> +			e = list_entry(li , struct mroute_table_entry, list_member);
> +			for (q = 0; q < MAXVIFS/*e->maxvif*/; q++) {
> +				if (!VIF_EXISTS(e, q))
> +					continue;
> +				if (pos-- == 0) {
> +					iter->ct = q;
> +					iter->table_id = e->table_id;
> +					iter->bucket = i;
> +					return &e->vif_table[q];
> +				}
> +			}
> +		}
>  	}
>  	return NULL;
>  }
>  
>  static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
> -	__acquires(mrt_lock)
> +	__acquires(mrte_lock)
>  {
> -	read_lock(&mrt_lock);
> +	int i;
> +	read_lock(&mrte_lock);
> +	// Grab read locks for all of the tables too.
> +	for (i = 0; i<MROUTE_TABLE_HASH_MAX; i++) {
> +		struct list_head *li;
> +		struct list_head* tmp;
> +		list_for_each_safe(li, tmp, &mroute_tables[i]) {
> +			struct mroute_table_entry* e;
> +			e = list_entry(li , struct mroute_table_entry, list_member);
> +			read_lock(&e->mte_lock);
> +		}
> +	}
>  	return *pos ? ipmr_vif_seq_idx(seq->private, *pos - 1)
>  		: SEQ_START_TOKEN;
>  }
> @@ -1671,40 +2060,78 @@ static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
>  static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
>  {
>  	struct ipmr_vif_iter *iter = seq->private;
> +	int found_tbl = 0;
>  
>  	++*pos;
>  	if (v == SEQ_START_TOKEN)
>  		return ipmr_vif_seq_idx(iter, 0);
>  
> -	while (++iter->ct < maxvif) {
> -		if (!VIF_EXISTS(iter->ct))
> -			continue;
> -		return &vif_table[iter->ct];
> +	while (iter->bucket < MROUTE_TABLE_HASH_MAX) {
> +		int i = iter->bucket;
> +		struct list_head *li;
> +		struct list_head* tmp;
> +		list_for_each_safe(li, tmp, &mroute_tables[i]) {
> +			int q;
> +			struct mroute_table_entry* e;
> +			e = list_entry(li , struct mroute_table_entry, list_member);
> +			/* if it's this table, or we've seen the table and into the next one */
> +			if ((e->table_id == iter->table_id) || found_tbl) {
> +				found_tbl = 1;
> +				iter->table_id = e->table_id;
> +				for (q = iter->ct; q < e->maxvif; q++) {
> +					if (!VIF_EXISTS(e, q))
> +						continue;
> +					/* found it, set up iter for next round */
> +					iter->ct = q + 1;
> +					iter->table_id = e->table_id;
> +					iter->bucket = i;
> +					return &e->vif_table[q];
> +				}
> +				/* not in this table, try next */
> +				iter->ct = 0;
> +			}
> +		}
> +		/* not in any table in that bucket */
> +		iter->bucket++;
> +		iter->ct = 0;
> +		WARN_ON(!found_tbl);
> +		found_tbl = 1;
>  	}
>  	return NULL;
>  }
>  
>  static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
> -	__releases(mrt_lock)
> +	__releases(mrte_lock)
>  {
> -	read_unlock(&mrt_lock);
> +	int i;
> +	// Release read locks for all of the tables too.
> +	for (i = 0; i<MROUTE_TABLE_HASH_MAX; i++) {
> +		struct list_head *li;
> +		struct list_head* tmp;
> +		list_for_each_safe(li, tmp, &mroute_tables[i]) {
> +			struct mroute_table_entry* e;
> +			e = list_entry(li , struct mroute_table_entry, list_member);
> +			read_unlock(&e->mte_lock);
> +		}
> +	}
> +	read_unlock(&mrte_lock);
>  }
>  
>  static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
>  {
>  	if (v == SEQ_START_TOKEN) {
>  		seq_puts(seq,
> -			 "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags Local    Remote\n");
> +			 "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags Local    Remote  TableId\n");
>  	} else {
>  		const struct vif_device *vif = v;
>  		const char *name =  vif->dev ? vif->dev->name : "none";
>  
>  		seq_printf(seq,
> -			   "%2Zd %-10s %8ld %7ld  %8ld %7ld %05X %08X %08X\n",
> -			   vif - vif_table,
> +			   "%2Zd %-10s %8ld %7ld  %8ld %7ld %05X %08X %08X %d\n",
> +			   vif->vif_index,
>  			   name, vif->bytes_in, vif->pkt_in,
>  			   vif->bytes_out, vif->pkt_out,
> -			   vif->flags, vif->local, vif->remote);
> +			   vif->flags, vif->local, vif->remote, vif->table_id);
>  	}
>  	return 0;
>  }
> @@ -1732,39 +2159,85 @@ static const struct file_operations ipmr_vif_fops = {
>  
>  struct ipmr_mfc_iter {
>  	struct mfc_cache **cache;
> +	int bucket;
> +	unsigned int table_id;
>  	int ct;
> +	int walking_unres_queue; //boolean
>  };
>  
>  
>  static struct mfc_cache *ipmr_mfc_seq_idx(struct ipmr_mfc_iter *it, loff_t pos)
>  {
>  	struct mfc_cache *mfc;
> -
> -	it->cache = mfc_cache_array;
> -	read_lock(&mrt_lock);
> -	for (it->ct = 0; it->ct < MFC_LINES; it->ct++)
> -		for (mfc = mfc_cache_array[it->ct]; mfc; mfc = mfc->next)
> -			if (pos-- == 0)
> -				return mfc;
> -	read_unlock(&mrt_lock);
> -
> -	it->cache = &mfc_unres_queue;
> -	spin_lock_bh(&mfc_unres_lock);
> -	for (mfc = mfc_unres_queue; mfc; mfc = mfc->next)
> -		if (pos-- == 0)
> -			return mfc;
> -	spin_unlock_bh(&mfc_unres_lock);
> +	int i;
> +	
> +	for (i = 0; i<MROUTE_TABLE_HASH_MAX; i++) {
> +		struct list_head *li;
> +		struct list_head* tmp;
> +		list_for_each_safe(li, tmp, &mroute_tables[i]) {
> +			struct mroute_table_entry* e;
> +			e = list_entry(li , struct mroute_table_entry, list_member);
> +			
> +			it->cache = e->mfc_cache_array;
> +			it->walking_unres_queue = 0;
> +			for (it->ct = 0; it->ct < MFC_LINES; it->ct++) {
> +				for (mfc = e->mfc_cache_array[it->ct]; mfc; mfc = mfc->next) {
> +					if (pos-- == 0) {
> +						it->table_id = e->table_id;
> +						it->bucket = i;
> +						return mfc;
> +					}
> +				}
> +			}
> +			
> +			it->cache = &e->mfc_unres_queue;
> +			it->walking_unres_queue = 1;
> +			spin_lock_bh(&e->mfc_unres_lock);
> +			for (mfc = e->mfc_unres_queue; mfc; mfc = mfc->next) {
> +				if (pos-- == 0) {
> +					it->table_id = e->table_id;
> +					it->bucket = i;
> +					/* Return with lock held..the 'next' logic expects it. */
> +					return mfc;
> +				}
> +			}
> +			spin_unlock_bh(&e->mfc_unres_lock);
> +		}
> +	}
>  
>  	it->cache = NULL;
> +	it->bucket = 0;
> +	it->table_id = 0;
> +	it->ct = 0;
> +	it->walking_unres_queue = 0;
>  	return NULL;
>  }
>  
>  
>  static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
> +	__acquires(mrte_lock)
>  {
> -	struct ipmr_mfc_iter *it = seq->private;
> +	int i;
> +	struct ipmr_mfc_iter *it;
> +	
> +	read_lock(&mrte_lock);
> +	// Grab read locks for all of the tables too.
> +	for (i = 0; i<MROUTE_TABLE_HASH_MAX; i++) {
> +		struct list_head *li;
> +		struct list_head* tmp;
> +		list_for_each_safe(li, tmp, &mroute_tables[i]) {
> +			struct mroute_table_entry* e;
> +			e = list_entry(li , struct mroute_table_entry, list_member);
> +			read_lock(&e->mte_lock);
> +		}
> +	}
> +
> +	it = seq->private;
>  	it->cache = NULL;
>  	it->ct = 0;
> +	it->table_id = 0;
> +	it->bucket = 0;
> +	it->walking_unres_queue = 0;
>  	return *pos ? ipmr_mfc_seq_idx(seq->private, *pos - 1)
>  		: SEQ_START_TOKEN;
>  }
> @@ -1773,7 +2246,8 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
>  {
>  	struct mfc_cache *mfc = v;
>  	struct ipmr_mfc_iter *it = seq->private;
> -
> +	int found_tbl = 0;
> +	
>  	++*pos;
>  
>  	if (v == SEQ_START_TOKEN)
> @@ -1782,42 +2256,82 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
>  	if (mfc->next)
>  		return mfc->next;
>  
> -	if (it->cache == &mfc_unres_queue)
> -		goto end_of_list;
> -
> -	BUG_ON(it->cache != mfc_cache_array);
> -
> -	while (++it->ct < MFC_LINES) {
> -		mfc = mfc_cache_array[it->ct];
> -		if (mfc)
> -			return mfc;
> +	while (it->bucket < MROUTE_TABLE_HASH_MAX) {
> +		int i = it->bucket;
> +		struct list_head *li;
> +		struct list_head* tmp;
> +		list_for_each_safe(li, tmp, &mroute_tables[i]) {
> +			struct mroute_table_entry* e;
> +			e = list_entry(li , struct mroute_table_entry, list_member);
> +			/* if it's this table, or we've seen the table and into the next one */
> +			if ((e->table_id == it->table_id) || found_tbl) {
> +				found_tbl = 1;
> +				it->table_id = e->table_id;
> +				
> +				if (it->cache == &e->mfc_unres_queue)
> +					goto end_of_list;
> +				
> +				BUG_ON(it->cache != e->mfc_cache_array);
> +				
> +				while (++it->ct < MFC_LINES) {
> +					mfc = e->mfc_cache_array[it->ct];
> +					if (mfc)
> +						return mfc;
> +				}
> +				
> +				/* exhausted cache_array, show unresolved */
> +				it->cache = &e->mfc_unres_queue;
> +				it->walking_unres_queue = 1;
> +				it->ct = 0;
> +				
> +				spin_lock_bh(&e->mfc_unres_lock);
> +				mfc = e->mfc_unres_queue;
> +				if (mfc)
> +					return mfc;
> +				
> +			end_of_list:
> +				spin_unlock_bh(&e->mfc_unres_lock);
> +				/* not in this table, try next */
> +				it->cache = NULL;
> +				it->walking_unres_queue = 0;
> +				it->ct = 0;
> +			}
> +		}
> +		/* not in any table in that bucket */
> +		it->bucket++;
> +		it->ct = 0;
> +		it->cache = NULL;
> +		it->walking_unres_queue = 0;
> +		WARN_ON(!found_tbl);
> +		found_tbl = 1;
>  	}
> -
> -	/* exhausted cache_array, show unresolved */
> -	read_unlock(&mrt_lock);
> -	it->cache = &mfc_unres_queue;
> -	it->ct = 0;
> -
> -	spin_lock_bh(&mfc_unres_lock);
> -	mfc = mfc_unres_queue;
> -	if (mfc)
> -		return mfc;
> -
> - end_of_list:
> -	spin_unlock_bh(&mfc_unres_lock);
> -	it->cache = NULL;
> -
> +	
>  	return NULL;
> +
>  }
>  
>  static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
>  {
>  	struct ipmr_mfc_iter *it = seq->private;
> +	int i;
> +	
> +	if (it->walking_unres_queue) {
> +		struct mroute_table_entry* e = find_mroute_table(it->table_id);
> +		spin_unlock(&e->mfc_unres_lock);
> +	}
> +	
> +	// Release read locks for all of the tables too.
> +	for (i = 0; i<MROUTE_TABLE_HASH_MAX; i++) {
> +		struct list_head *li;
> +		struct list_head* tmp;
> +		list_for_each_safe(li, tmp, &mroute_tables[i]) {
> +			struct mroute_table_entry* e;
> +			e = list_entry(li , struct mroute_table_entry, list_member);
> +			read_unlock(&e->mte_lock);
> +		}
> +	}
>  
> -	if (it->cache == &mfc_unres_queue)
> -		spin_unlock_bh(&mfc_unres_lock);
> -	else if (it->cache == mfc_cache_array)
> -		read_unlock(&mrt_lock);
> +	read_unlock(&mrte_lock);
>  }
>  
>  static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
> @@ -1826,27 +2340,33 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
>  
>  	if (v == SEQ_START_TOKEN) {
>  		seq_puts(seq,
> -		 "Group    Origin   Iif     Pkts    Bytes    Wrong Oifs\n");
> +		 "Group    Origin   Iif     Pkts    Bytes    Wrong Oifs  TableId\n");
>  	} else {
>  		const struct mfc_cache *mfc = v;
>  		const struct ipmr_mfc_iter *it = seq->private;
>  
> -		seq_printf(seq, "%08lX %08lX %-3d %8ld %8ld %8ld",
> +		seq_printf(seq, "%08lX %08lX %-3d %8ld %8ld %8ld %d",
>  			   (unsigned long) mfc->mfc_mcastgrp,
>  			   (unsigned long) mfc->mfc_origin,
>  			   mfc->mfc_parent,
>  			   mfc->mfc_un.res.pkt,
>  			   mfc->mfc_un.res.bytes,
> -			   mfc->mfc_un.res.wrong_if);
> +			   mfc->mfc_un.res.wrong_if, mfc->table_id);
>  
> -		if (it->cache != &mfc_unres_queue) {
> +		if (!it->walking_unres_queue) {
> +			struct mroute_table_entry* e = find_mroute_table(it->table_id);
> +			if (!e) {
> +				printk("ERROR:  Could not find mrte, id: %d in mfc_seq_show\n",
> +				       it->table_id);
> +			}
>  			for (n = mfc->mfc_un.res.minvif;
>  			     n < mfc->mfc_un.res.maxvif; n++ ) {
> -				if (VIF_EXISTS(n)
> -				   && mfc->mfc_un.res.ttls[n] < 255)
> -				seq_printf(seq,
> -					   " %2d:%-3d",
> -					   n, mfc->mfc_un.res.ttls[n]);
> +				if (e && VIF_EXISTS(e, n)
> +				    && mfc->mfc_un.res.ttls[n] < 255) {
> +					seq_printf(seq,
> +						   " %2d:%-3d",
> +						   n, mfc->mfc_un.res.ttls[n]);
> +				}
>  			}
>  		}
>  		seq_putc(seq, '\n');
> @@ -1887,12 +2407,21 @@ static struct net_protocol pim_protocol = {
>   *	Setup for IP multicast routing
>   */
>  
> -void __init ip_mr_init(void)
> -{
> +void __init ip_mr_init(void) {
> +	int i;
> +	printk("Initializing IPv4 Multicast Routing with multiple table support.\n");
>  	mrt_cachep = kmem_cache_create("ip_mrt_cache",
>  				       sizeof(struct mfc_cache),
>  				       0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
>  				       NULL);
> +	if (!mrt_cachep) {
> +		printk("ERROR:  Failed to allocate mrt_cachep while initializing ipmr.\n");
> +	}
> +
> +	for (i = 0; i<MROUTE_TABLE_HASH_MAX; i++) {
> +		INIT_LIST_HEAD(&mroute_tables[i]);
> +	}
> +	
>  	setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0);
>  	register_netdevice_notifier(&ip_mr_notifier);
>  #ifdef CONFIG_PROC_FS

> diff --git a/fea/iftree.cc b/fea/iftree.cc
> index 6b998b5..e7d329b 100644
> --- a/fea/iftree.cc
> +++ b/fea/iftree.cc
> @@ -1670,6 +1670,9 @@ IfTree::insert_ifindex(IfTreeInterface* ifp)
>  
>      iter = _ifindex_map.find(ifp->pif_index());
>      if (iter != _ifindex_map.end()) {
> +	XLOG_WARNING("_ifindex_map appears corrupted, found iter->second: %p (%d) != ifp: %p for pif_index: %d\n",
> +		     iter->second, iter->second->pif_index(), ifp, ifp->pif_index());
> +
>  	XLOG_ASSERT(iter->second == ifp);
>  	iter->second = ifp;
>  	return;
> diff --git a/fea/mfea_mrouter.cc b/fea/mfea_mrouter.cc
> index 4d94110..789b8e7 100644
> --- a/fea/mfea_mrouter.cc
> +++ b/fea/mfea_mrouter.cc
> @@ -85,6 +85,74 @@
>  #include "mfea_kernel_messages.hh"
>  #include "mfea_osdep.hh"
>  #include "mfea_mrouter.hh"
> +#include "fibconfig.hh"
> +
> +
> +#if defined(HOST_OS_LINUX)
> +// Attempt to use multiple mcast tables if kernel supports it.
> +#define USE_MULT_MCAST_TABLES
> +#endif
> +
> +#ifdef USE_MULT_MCAST_TABLES
> +/** In order to support multiple routing tables, the kernel API had to be extended.
> + * Since no distro has this currently in #include files, add private definitions
> + * here. --Ben
> + */
> +
> +// Assume supported until we know otherwise.
> +bool supports_mcast_tables = true;
> +
> +#define DFLT_MCAST_TABLE 254  /* 'main' routing table id in Linux */
> +
> +
> +// Support for multiple routing tables.
> +#define SIOCGETVIFCNT_NG	(SIOCPROTOPRIVATE+3)
> +#define SIOCGETSGCNT_NG	(SIOCPROTOPRIVATE+4)
> +#define SIOCGETRP_NG      (SIOCPROTOPRIVATE+5)
> +
> +/* For supporting multiple routing tables */
> +struct vifctl_ng {
> +	struct vifctl vif;
> +	__u32 table_id;
> +	char unused[32]; /* for future additions */
> +} __attribute__ ((packed));
> +
> +struct mfcctl_ng {
> +	struct mfcctl mfc;
> +	__u32 table_id;
> +	char unused[32]; /* for future additions */
> +} __attribute__ ((packed));
> +
> +/* Used with these options:
> +	case MRT_INIT:
> +	case MRT_DONE:
> +	case MRT_ASSERT:
> +#ifdef CONFIG_IP_PIMSM
> +	case MRT_PIM:
> +#endif
> +and all getsockopt options
> +*/
> +struct mrt_sockopt_simple {
> +	__u32 optval;
> +	__u32 table_id;
> +	char unused[32]; /* for future additions */
> +} __attribute__ ((packed));
> +
> +struct sioc_sg_req_ng {
> +	struct sioc_sg_req req;
> +	__u32 table_id;
> +	char unused[32]; /* for future additions */
> +} __attribute__ ((packed));
> +	
> +struct sioc_vif_req_ng {
> +	struct sioc_vif_req vif;
> +	__u32 table_id;
> +	char unused[32]; /* for future additions */
> +} __attribute__ ((packed));
> +
> +#else
> +bool supports_mcast_tables = false;
> +#endif
>  
>  
>  //
> @@ -123,14 +191,14 @@ typedef char *caddr_t;
>   * MfeaMrouter::MfeaMrouter:
>   * @mfea_node: The MfeaNode I belong to.
>   **/
> -MfeaMrouter::MfeaMrouter(MfeaNode& mfea_node)
> +MfeaMrouter::MfeaMrouter(MfeaNode& mfea_node, const FibConfig& fibconfig)
>      : ProtoUnit(mfea_node.family(), mfea_node.module_id()),
>        _mfea_node(mfea_node),
>        _mrt_api_mrt_mfc_flags_disable_wrongvif(false),
>        _mrt_api_mrt_mfc_flags_border_vif(false),
>        _mrt_api_mrt_mfc_rp(false),
>        _mrt_api_mrt_mfc_bw_upcall(false),
> -      _multicast_forwarding_enabled(false)
> +      _multicast_forwarding_enabled(false), _fibconfig(fibconfig)
>  {
>      string error_msg;
>  
> @@ -346,6 +414,14 @@ MfeaMrouter::stop()
>      return (XORP_OK);
>  }
>  
> +int MfeaMrouter::getTableId() const {
> +    int table_id = DFLT_MCAST_TABLE;
> +    if (_fibconfig.unicast_forwarding_table_id_is_configured(family())) {
> +        table_id = _fibconfig.unicast_forwarding_table_id(family());
> +    }
> +    return table_id;
> +}
> +
>  /**
>   * Test if the underlying system supports IPv4 multicast routing.
>   * 
> @@ -360,7 +436,14 @@ MfeaMrouter::have_multicast_routing4() const
>  #else
>      int s;
>      int mrouter_version = 1;	// XXX: hardcoded version
> -    
> +
> +#ifdef USE_MULT_MCAST_TABLES
> +    struct mrt_sockopt_simple tmp;
> +    memset(&tmp, 0, sizeof(tmp));
> +    tmp.table_id = getTableId();
> +    tmp.optval = 1; //version
> +#endif
> +
>      if (! is_ipv4())
>  	return (false);		// Wrong family
>      
> @@ -377,12 +460,24 @@ MfeaMrouter::have_multicast_routing4() const
>      s = socket(family(), SOCK_RAW, kernel_mrouter_ip_protocol());
>      if (s < 0)
>  	return (false);		// Failure to open the socket
> -    
> -    if (setsockopt(s, IPPROTO_IP, MRT_INIT,
> -		   (void *)&mrouter_version, sizeof(mrouter_version))
> -	< 0) {
> -	close(s);
> -	return (false);
> +
> +    // First, try for multiple routing tables.
> +#ifdef USE_MULT_MCAST_TABLES
> +    errno = 0;
> +    if (setsockopt(s, IPPROTO_IP, MRT_INIT, &tmp, sizeof(tmp)) < 0) {
> +	// Ok, not this
> +	supports_mcast_tables = false;
> +    }
> +    else {
> +	supports_mcast_tables = true;
> +    }
> +#endif
> +
> +    if (!supports_mcast_tables) {
> +	if (setsockopt(s, IPPROTO_IP, MRT_INIT, &mrouter_version, sizeof(mrouter_version)) < 0) {
> +	    close(s);
> +	    return (false);
> +	}
>      }
>      
>      // Success
> @@ -758,12 +853,33 @@ MfeaMrouter::start_mrt()
>  		       error_msg.c_str());
>  	    return (XORP_ERROR);
>  	}
> -	if (setsockopt(_mrouter_socket, IPPROTO_IP, MRT_INIT,
> -		       (void *)&mrouter_version, sizeof(mrouter_version))
> -	    < 0) {
> -	    XLOG_ERROR("setsockopt(MRT_INIT, %u) failed: %s",
> -		       mrouter_version, strerror(errno));
> -	    return (XORP_ERROR);
> +
> +#ifdef USE_MULT_MCAST_TABLES
> +	struct mrt_sockopt_simple tmp;
> +	memset(&tmp, 0, sizeof(tmp));
> +	tmp.table_id = getTableId();
> +	tmp.optval = 1; //version
> +
> +	if (setsockopt(_mrouter_socket, IPPROTO_IP, MRT_INIT, &tmp, sizeof(tmp)) < 0) {
> +	    // Ok, not this
> +	    supports_mcast_tables = false;
> +	    XLOG_ERROR("MROUTE:  WARNING:  setsockopt(MRT_INIT does not support multiple routing tables:: %s",
> +		       strerror(errno));
> +	}
> +	else {
> +	    supports_mcast_tables = true;
> +	    XLOG_ERROR("MROUTE:  setsockopt(MRT_INIT supports multiple routing tables!");
> +	}
> +#endif
> +
> +	if (!supports_mcast_tables) {
> +	    if (setsockopt(_mrouter_socket, IPPROTO_IP, MRT_INIT,
> +			   (void *)&mrouter_version, sizeof(mrouter_version))
> +		< 0) {
> +		XLOG_ERROR("setsockopt(MRT_INIT, %u) failed: %s",
> +			   mrouter_version, strerror(errno));
> +		return (XORP_ERROR);
> +	    }
>  	}
>  #endif // HAVE_IPV4_MULTICAST_ROUTING
>  	break;
> @@ -966,6 +1082,9 @@ MfeaMrouter::stop_mrt()
>      if (!_mrouter_socket.is_valid())
>  	return (XORP_ERROR);
>      
> +    size_t sz = 0;
> +    void* o = NULL;
> +
>      switch (family()) {
>      case AF_INET:
>  #ifndef HAVE_IPV4_MULTICAST_ROUTING
> @@ -978,8 +1097,21 @@ MfeaMrouter::stop_mrt()
>  		       error_msg.c_str());
>  	    return (XORP_ERROR);
>  	}
> -	if (setsockopt(_mrouter_socket, IPPROTO_IP, MRT_DONE, NULL, 0)
> -	    < 0) {
> +
> +#ifdef USE_MULT_MCAST_TABLES
> +	struct mrt_sockopt_simple tmp;
> +	memset(&tmp, 0, sizeof(tmp));
> +	tmp.table_id = getTableId();
> +	tmp.optval = 1; //version
> +	sz = sizeof(tmp);
> +	o = &tmp;
> +	if (!supports_mcast_tables) {
> +	    sz = 0;
> +	    o = NULL;
> +	}
> +#endif
> +
> +	if (setsockopt(_mrouter_socket, IPPROTO_IP, MRT_DONE, o, sz) < 0) {
>  	    XLOG_ERROR("setsockopt(MRT_DONE) failed: %s", strerror(errno));
>  	    return (XORP_ERROR);
>  	}
> @@ -1019,6 +1151,8 @@ int
>  MfeaMrouter::start_pim(string& error_msg)
>  {
>      int v = 1;
> +    size_t sz = 0;
> +    void* o = NULL;
>  
>      switch (family()) {
>      case AF_INET:
> @@ -1027,8 +1161,23 @@ MfeaMrouter::start_pim(string& error_msg)
>  			     "IPv4 multicast routing not supported");
>  	return (XORP_ERROR);
>  #else
> -	if (setsockopt(_mrouter_socket, IPPROTO_IP, MRT_PIM,
> -		       (void *)&v, sizeof(v)) < 0) {
> +#ifdef USE_MULT_MCAST_TABLES
> +	struct mrt_sockopt_simple tmp;
> +	memset(&tmp, 0, sizeof(tmp));
> +	tmp.table_id = getTableId();
> +	tmp.optval = 1; //pim
> +	sz = sizeof(tmp);
> +	o = &tmp;
> +	if (!supports_mcast_tables) {
> +	    sz = sizeof(v);
> +	    o = &v;
> +	}
> +#else
> +	sz = sizeof(v);
> +	o = &v;
> +#endif
> +
> +	if (setsockopt(_mrouter_socket, IPPROTO_IP, MRT_PIM, o, sz) < 0) {
>  	    error_msg = c_format("setsockopt(MRT_PIM, %u) failed: %s",
>  				 v, strerror(errno));
>  	    return (XORP_ERROR);
> @@ -1068,6 +1217,8 @@ int
>  MfeaMrouter::stop_pim(string& error_msg)
>  {
>      int v = 0;
> +    size_t sz = 0;
> +    void* o = NULL;
>  
>      if (!_mrouter_socket.is_valid())
>  	return (XORP_ERROR);
> @@ -1079,9 +1230,23 @@ MfeaMrouter::stop_pim(string& error_msg)
>  			     "IPv4 multicast routing not supported");
>  	return (XORP_ERROR);
>  #else
> -	v = 0;
> -	if (setsockopt(_mrouter_socket, IPPROTO_IP, MRT_PIM,
> -		       (void *)&v, sizeof(v)) < 0) {
> +#ifdef USE_MULT_MCAST_TABLES
> +	struct mrt_sockopt_simple tmp;
> +	memset(&tmp, 0, sizeof(tmp));
> +	tmp.table_id = getTableId();
> +	tmp.optval = 0; //pim
> +	sz = sizeof(tmp);
> +	o = &tmp;
> +	if (!supports_mcast_tables) {
> +	    sz = sizeof(v);
> +	    o = &v;
> +	}
> +#else
> +	sz = sizeof(v);
> +	o = &v;
> +#endif
> +
> +	if (setsockopt(_mrouter_socket, IPPROTO_IP, MRT_PIM, o, sz) < 0) {
>  	    error_msg = c_format("setsockopt(MRT_PIM, %u) failed: %s",
>  				 v, strerror(errno));
>  	    return (XORP_ERROR);
> @@ -1133,6 +1298,9 @@ MfeaMrouter::add_multicast_vif(uint32_t vif_index)
>      
>      if (mfea_vif == NULL)
>  	return (XORP_ERROR);
> +
> +    void* sopt_arg = NULL;
> +    size_t sz = 0;
>      
>      switch (family()) {
>      case AF_INET:
> @@ -1142,9 +1310,24 @@ MfeaMrouter::add_multicast_vif(uint32_t vif_index)
>  		   "IPv4 multicast routing not supported");
>  	return (XORP_ERROR);
>  #else
> +#ifdef USE_MULT_MCAST_TABLES
> +	struct vifctl_ng vc_ng;
> +	struct vifctl& vc = vc_ng.vif;
> +	memset(&vc_ng, 0, sizeof(vc_ng));
> +	sopt_arg = &vc_ng;
> +	sz = sizeof(vc_ng);
> +	vc_ng.table_id = getTableId();
> +	if (!supports_mcast_tables) {
> +	    sopt_arg = &(vc_ng.vif);
> +	    sz = sizeof(vc_ng.vif);
> +	}
> +#else
>  	struct vifctl vc;
> -	
>  	memset(&vc, 0, sizeof(vc));
> +	sopt_arg = &vc;
> +	sz = sizeof(vc);
> +#endif
> +
>  	vc.vifc_vifi = mfea_vif->vif_index();
>  	// XXX: we don't (need to) support VIFF_TUNNEL; VIFF_SRCRT is obsolete
>  	vc.vifc_flags = 0;
> @@ -1164,7 +1347,7 @@ MfeaMrouter::add_multicast_vif(uint32_t vif_index)
>  	// because we don't (need to) support IPIP tunnels.
>  	//
>  	if (setsockopt(_mrouter_socket, IPPROTO_IP, MRT_ADD_VIF,
> -		       (void *)&vc, sizeof(vc)) < 0) {
> +		       sopt_arg, sz) < 0) {
>  	    XLOG_ERROR("setsockopt(MRT_ADD_VIF, vif %s) failed: %s",
>  		       mfea_vif->name().c_str(), strerror(errno));
>  	    return (XORP_ERROR);
> @@ -1245,11 +1428,26 @@ MfeaMrouter::delete_multicast_vif(uint32_t vif_index)
>  	// an argument of type "vifi_t".
>  	//
>  #ifdef HOST_OS_LINUX
> +#ifdef USE_MULT_MCAST_TABLES
> +	struct vifctl_ng vc_ng;
> +	struct vifctl& vc = vc_ng.vif;
> +	memset(&vc_ng, 0, sizeof(vc_ng));
> +	void* sopt_arg = &vc_ng;
> +	size_t sz = sizeof(vc_ng);
> +	vc_ng.table_id = getTableId();
> +	if (!supports_mcast_tables) {
> +	    sopt_arg = &(vc_ng.vif);
> +	    sz = sizeof(vc_ng.vif);
> +	}
> +#else
>  	struct vifctl vc;
>  	memset(&vc, 0, sizeof(vc));
> +	void* sopt_arg = &vc;
> +	size_t sz = sizeof(vc);
> +#endif
>  	vc.vifc_vifi = mfea_vif->vif_index();
>  	ret_value = setsockopt(_mrouter_socket, IPPROTO_IP, MRT_DEL_VIF,
> -			       (void *)&vc, sizeof(vc));
> +			       sopt_arg, sz);
>  #else
>  	vifi_t vifi = mfea_vif->vif_index();
>  	ret_value = setsockopt(_mrouter_socket, IPPROTO_IP, MRT_DEL_VIF,
> @@ -1376,13 +1574,28 @@ MfeaMrouter::add_mfc(const IPvX& source, const IPvX& group,
>  	return (XORP_ERROR);
>  #else
>  
> +#ifdef USE_MULT_MCAST_TABLES
> +	struct mfcctl_ng mc_ng;
> +	struct mfcctl& mc = mc_ng.mfc;
> +	memset(&mc_ng, 0, sizeof(mc_ng));
> +	void* sopt_arg = &mc_ng;
> +	size_t sz = sizeof(mc_ng);
> +	mc_ng.table_id = getTableId();
> +	if (!supports_mcast_tables) {
> +	    sopt_arg = &(mc_ng.mfc);
> +	    sz = sizeof(mc_ng.mfc);
> +	}
> +#else
>  #if defined(HAVE_STRUCT_MFCCTL2) && defined(ENABLE_ADVANCED_MULTICAST_API)
>  	struct mfcctl2 mc;
>  #else
>  	struct mfcctl mc;
>  #endif
> -	
> +	void* sopt_arg = &mc;
> +	size_t sz = sizeof(mc);
>  	memset(&mc, 0, sizeof(mc));
> +#endif
> +	
>  	source.copy_out(mc.mfcc_origin);
>  	group.copy_out(mc.mfcc_mcastgrp);
>  	mc.mfcc_parent = iif_vif_index;
> @@ -1398,7 +1611,7 @@ MfeaMrouter::add_mfc(const IPvX& source, const IPvX& group,
>  #endif
>  	
>  	if (setsockopt(_mrouter_socket, IPPROTO_IP, MRT_ADD_MFC,
> -		       (void *)&mc, sizeof(mc)) < 0) {
> +		       sopt_arg, sz) < 0) {
>  	    XLOG_ERROR("setsockopt(MRT_ADD_MFC, (%s, %s)) failed: %s",
>  		       cstring(source), cstring(group), strerror(errno));
>  	    return (XORP_ERROR);
> @@ -1489,13 +1702,28 @@ MfeaMrouter::delete_mfc(const IPvX& source, const IPvX& group)
>  		   "IPv4 multicast routing not supported");
>  	return (XORP_ERROR);
>  #else
> +#ifdef USE_MULT_MCAST_TABLES
> +	struct mfcctl_ng mc_ng;
> +	struct mfcctl& mc = mc_ng.mfc;
> +	memset(&mc_ng, 0, sizeof(mc_ng));
> +	void* sopt_arg = &mc_ng;
> +	size_t sz = sizeof(mc_ng);
> +	mc_ng.table_id = getTableId();
> +	if (!supports_mcast_tables) {
> +	    sopt_arg = &(mc_ng.mfc);
> +	    sz = sizeof(mc_ng.mfc);
> +	}
> +#else
>  	struct mfcctl mc;
> -	
> +	void* sopt_arg = &mc;
> +	size_t sz = sizeof(mc);
> +#endif
> +
>  	source.copy_out(mc.mfcc_origin);
>  	group.copy_out(mc.mfcc_mcastgrp);
>  	
>  	if (setsockopt(_mrouter_socket, IPPROTO_IP, MRT_DEL_MFC,
> -		       (void *)&mc, sizeof(mc)) < 0) {
> +		       sopt_arg, sz) < 0) {
>  	    XLOG_ERROR("setsockopt(MRT_DEL_MFC, (%s, %s)) failed: %s",
>  		       cstring(source), cstring(group), strerror(errno));
>  	    return (XORP_ERROR);
> @@ -2077,18 +2305,34 @@ MfeaMrouter::get_sg_count(const IPvX& source, const IPvX& group,
>  		   "IPv4 multicast routing not supported");
>  	return (XORP_ERROR);
>  #else
> +	int ioctl_cmd = SIOCGETSGCNT;
> +#ifdef USE_MULT_MCAST_TABLES
> +	struct sioc_sg_req_ng sgreq_ng;
> +	memset(&sgreq_ng, 0, sizeof(sgreq_ng));
> +	sgreq_ng.table_id = getTableId();
> +	struct sioc_sg_req& sgreq = (sgreq_ng.req);
> +	void* o = &sgreq_ng;
> +	ioctl_cmd = SIOCGETSGCNT_NG;
> +	if (!supports_mcast_tables) {
> +	    o = &(sgreq_ng.req);
> +	    ioctl_cmd = SIOCGETSGCNT;
> +	}
> +#else
>  	struct sioc_sg_req sgreq;
> -	
>  	memset(&sgreq, 0, sizeof(sgreq));
> +	void* o = &sgreq;
> +#endif
> +	
>  	source.copy_out(sgreq.src);
>  	group.copy_out(sgreq.grp);
> +
>  	//
>  	// XXX: some older mcast code has bug in ip_mroute.c, get_sg_cnt():
>  	// the return code is always 0, so this is why we need to check
>  	// if all values are 0xffffffffU (the indication for error).
>  	// TODO: remove the 0xffffffffU check in the future.
>  	//
> -	if ((ioctl(_mrouter_socket, SIOCGETSGCNT, &sgreq) < 0)
> +	if ((ioctl(_mrouter_socket, ioctl_cmd, o) < 0)
>  	    || ((sgreq.pktcnt == 0xffffffffU)
>  		&& (sgreq.bytecnt == 0xffffffffU)
>  		&& (sgreq.wrong_if == 0xffffffffU))) {
> @@ -2176,11 +2420,27 @@ MfeaMrouter::get_vif_count(uint32_t vif_index, VifCount& vif_count)
>  		   "IPv4 multicast routing not supported");
>  	return (XORP_ERROR);
>  #else
> +	int ioctl_cmd = SIOCGETVIFCNT;
> +#ifdef USE_MULT_MCAST_TABLES
> +	struct sioc_vif_req_ng vreq_ng;
> +	memset(&vreq_ng, 0, sizeof(vreq_ng));
> +	vreq_ng.table_id = getTableId();
> +	struct sioc_vif_req& vreq = (vreq_ng.vif);
> +	void* o = &vreq_ng;
> +	ioctl_cmd = SIOCGETVIFCNT_NG;
> +	if (!supports_mcast_tables) {
> +	    o = &(vreq_ng.vif);
> +	    ioctl_cmd = SIOCGETVIFCNT;
> +	}
> +#else
>  	struct sioc_vif_req vreq;
> -
>  	memset(&vreq, 0, sizeof(vreq));
> +	void* o = &vreq;
> +#endif
> +
>  	vreq.vifi = mfea_vif->vif_index();
> -	if (ioctl(_mrouter_socket, SIOCGETVIFCNT, &vreq) < 0) {
> +
> +	if (ioctl(_mrouter_socket, ioctl_cmd, o) < 0) {
>  	    XLOG_ERROR("ioctl(SIOCGETVIFCNT, vif %s) failed: %s",
>  		       mfea_vif->name().c_str(), strerror(errno));
>  	    vif_count.set_icount(~0U);
> diff --git a/fea/mfea_mrouter.hh b/fea/mfea_mrouter.hh
> index 0d6b77b..00ac2b7 100644
> --- a/fea/mfea_mrouter.hh
> +++ b/fea/mfea_mrouter.hh
> @@ -45,6 +45,7 @@ class MfeaNode;
>  class SgCount;
>  class TimeVal;
>  class VifCount;
> +class FibConfig;
>  
>  
>  /**
> @@ -60,7 +61,7 @@ public:
>       * 
>       * @param mfea_node the MFEA node (@ref MfeaNode) this entry belongs to.
>       */
> -    MfeaMrouter(MfeaNode& mfea_node);
> +    MfeaMrouter(MfeaNode& mfea_node, const FibConfig& fibconfig);
>      
>      /**
>       * Destructor
> @@ -80,7 +81,13 @@ public:
>       * @return XORP_OK on success, otherwise XORP_ERROR.
>       */
>      int		stop();
> -    
> +
> +    /** Get the multicast table id that is currently configured.
> +     * Currently, changing configured table-id at run-time will break
> +     * things, by the way.
> +     */
> +    int getTableId() const;
> +
>      /**
>       * Test if the underlying system supports IPv4 multicast routing.
>       * 
> @@ -445,6 +452,7 @@ private:
>      // Original state from the underlying system before the MFEA was started
>      //
>      bool	_multicast_forwarding_enabled;
> +    const FibConfig& _fibconfig;
>  };
>  
>  /**
> diff --git a/fea/mfea_node.cc b/fea/mfea_node.cc
> index f42a34d..d66d1bb 100644
> --- a/fea/mfea_node.cc
> +++ b/fea/mfea_node.cc
> @@ -74,7 +74,7 @@ MfeaNode::MfeaNode(FeaNode& fea_node, int family, xorp_module_id module_id,
>      : ProtoNode<MfeaVif>(family, module_id, eventloop),
>        IfConfigUpdateReporterBase(fea_node.ifconfig().ifconfig_update_replicator()),
>        _fea_node(fea_node),
> -      _mfea_mrouter(*this),
> +      _mfea_mrouter(*this, fea_node.fibconfig()),
>        _mfea_dft(*this),
>        _mfea_iftree("mfea-tree"),
>        _mfea_iftree_update_replicator(_mfea_iftree),
> diff --git a/fea/xrl_mfea_node.cc b/fea/xrl_mfea_node.cc
> index 5ed9ce5..822e398 100644
> --- a/fea/xrl_mfea_node.cc
> +++ b/fea/xrl_mfea_node.cc
> @@ -40,7 +40,7 @@ XrlMfeaNode::XrlMfeaNode(FeaNode&	fea_node,
>  			 const string&	finder_hostname,
>  			 uint16_t	finder_port,
>  			 const string&	finder_target)
> -    : MfeaNode(fea_node, family, module_id, eventloop),
> +	: MfeaNode(fea_node, family, module_id, eventloop),
>        XrlStdRouter(eventloop, class_name.c_str(), finder_hostname.c_str(),
>  		   finder_port),
>        XrlMfeaTargetBase(&xrl_router()),
> diff --git a/fea/xrl_mfea_node.hh b/fea/xrl_mfea_node.hh
> index be24b27..e419741 100644
> --- a/fea/xrl_mfea_node.hh
> +++ b/fea/xrl_mfea_node.hh
> @@ -50,6 +50,7 @@ public:
>  		const string&	finder_hostname,
>  		uint16_t	finder_port,
>  		const string&	finder_target);
> +
>      virtual ~XrlMfeaNode();
>  
>      /**

> _______________________________________________
> Xorp-hackers mailing list
> Xorp-hackers at icir.org
> http://mailman.ICSI.Berkeley.EDU/mailman/listinfo/xorp-hackers


-- 
James R. Leu
jleu at mindspring.com
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: not available
Url : http://mailman.ICSI.Berkeley.EDU/pipermail/xorp-hackers/attachments/20080606/6bb98c53/attachment-0001.bin 


More information about the Xorp-hackers mailing list