Browse Source

Merge branch 'tipc-next'

Ying Xue says:

====================
tipc: convert name table read-write lock to RCU

Now TIPC name table is statically allocated and is protected with a
Read-Write lock. To enhance the performance of TIPC name table lookup,
we are going to involve RCU lock to protect the name table. As a
consequence, it becomes lockless to concurrently look up name table on
read side. However, before the conversion can be successfully made,
the following two things must be first done:

- change allocation way of name table from static to dynamic
- fix several incorrect locking policy issues
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
David S. Miller 10 years ago
parent
commit
66813d4d04
5 changed files with 157 additions and 158 deletions
  1. 9 0
      include/linux/rculist.h
  2. 27 56
      net/tipc/name_distr.c
  3. 103 99
      net/tipc/name_table.c
  4. 18 2
      net/tipc/name_table.h
  5. 0 1
      net/tipc/subscr.c

+ 9 - 0
include/linux/rculist.h

@@ -542,6 +542,15 @@ static inline void hlist_add_behind_rcu(struct hlist_node *n,
 	     pos = hlist_entry_safe(rcu_dereference_bh((pos)->member.next),\
 	     pos = hlist_entry_safe(rcu_dereference_bh((pos)->member.next),\
 			typeof(*(pos)), member))
 			typeof(*(pos)), member))
 
 
+/**
+ * hlist_for_each_entry_from_rcu - iterate over a hlist continuing from current point
+ * @pos:	the type * to use as a loop cursor.
+ * @member:	the name of the hlist_node within the struct.
+ */
+#define hlist_for_each_entry_from_rcu(pos, member)			\
+	for (; pos;							\
+	     pos = hlist_entry_safe(rcu_dereference((pos)->member.next),\
+			typeof(*(pos)), member))
 
 
 #endif	/* __KERNEL__ */
 #endif	/* __KERNEL__ */
 #endif
 #endif

+ 27 - 56
net/tipc/name_distr.c

@@ -38,39 +38,6 @@
 #include "link.h"
 #include "link.h"
 #include "name_distr.h"
 #include "name_distr.h"
 
 
-/**
- * struct publ_list - list of publications made by this node
- * @list: circular list of publications
- * @list_size: number of entries in list
- */
-struct publ_list {
-	struct list_head list;
-	u32 size;
-};
-
-static struct publ_list publ_zone = {
-	.list = LIST_HEAD_INIT(publ_zone.list),
-	.size = 0,
-};
-
-static struct publ_list publ_cluster = {
-	.list = LIST_HEAD_INIT(publ_cluster.list),
-	.size = 0,
-};
-
-static struct publ_list publ_node = {
-	.list = LIST_HEAD_INIT(publ_node.list),
-	.size = 0,
-};
-
-static struct publ_list *publ_lists[] = {
-	NULL,
-	&publ_zone,	/* publ_lists[TIPC_ZONE_SCOPE]		*/
-	&publ_cluster,	/* publ_lists[TIPC_CLUSTER_SCOPE]	*/
-	&publ_node	/* publ_lists[TIPC_NODE_SCOPE]		*/
-};
-
-
 int sysctl_tipc_named_timeout __read_mostly = 2000;
 int sysctl_tipc_named_timeout __read_mostly = 2000;
 
 
 /**
 /**
@@ -146,8 +113,8 @@ struct sk_buff *tipc_named_publish(struct publication *publ)
 	struct sk_buff *buf;
 	struct sk_buff *buf;
 	struct distr_item *item;
 	struct distr_item *item;
 
 
-	list_add_tail(&publ->local_list, &publ_lists[publ->scope]->list);
-	publ_lists[publ->scope]->size++;
+	list_add_tail_rcu(&publ->local_list,
+			  &tipc_nametbl->publ_list[publ->scope]);
 
 
 	if (publ->scope == TIPC_NODE_SCOPE)
 	if (publ->scope == TIPC_NODE_SCOPE)
 		return NULL;
 		return NULL;
@@ -172,7 +139,6 @@ struct sk_buff *tipc_named_withdraw(struct publication *publ)
 	struct distr_item *item;
 	struct distr_item *item;
 
 
 	list_del(&publ->local_list);
 	list_del(&publ->local_list);
-	publ_lists[publ->scope]->size--;
 
 
 	if (publ->scope == TIPC_NODE_SCOPE)
 	if (publ->scope == TIPC_NODE_SCOPE)
 		return NULL;
 		return NULL;
@@ -195,21 +161,17 @@ struct sk_buff *tipc_named_withdraw(struct publication *publ)
  * @pls: linked list of publication items to be packed into buffer chain
  * @pls: linked list of publication items to be packed into buffer chain
  */
  */
 static void named_distribute(struct sk_buff_head *list, u32 dnode,
 static void named_distribute(struct sk_buff_head *list, u32 dnode,
-			     struct publ_list *pls)
+			     struct list_head *pls)
 {
 {
 	struct publication *publ;
 	struct publication *publ;
 	struct sk_buff *skb = NULL;
 	struct sk_buff *skb = NULL;
 	struct distr_item *item = NULL;
 	struct distr_item *item = NULL;
-	uint dsz = pls->size * ITEM_SIZE;
 	uint msg_dsz = (tipc_node_get_mtu(dnode, 0) / ITEM_SIZE) * ITEM_SIZE;
 	uint msg_dsz = (tipc_node_get_mtu(dnode, 0) / ITEM_SIZE) * ITEM_SIZE;
-	uint rem = dsz;
-	uint msg_rem = 0;
+	uint msg_rem = msg_dsz;
 
 
-	list_for_each_entry(publ, &pls->list, local_list) {
+	list_for_each_entry(publ, pls, local_list) {
 		/* Prepare next buffer: */
 		/* Prepare next buffer: */
 		if (!skb) {
 		if (!skb) {
-			msg_rem = min_t(uint, rem, msg_dsz);
-			rem -= msg_rem;
 			skb = named_prepare_buf(PUBLICATION, msg_rem, dnode);
 			skb = named_prepare_buf(PUBLICATION, msg_rem, dnode);
 			if (!skb) {
 			if (!skb) {
 				pr_warn("Bulk publication failure\n");
 				pr_warn("Bulk publication failure\n");
@@ -227,8 +189,14 @@ static void named_distribute(struct sk_buff_head *list, u32 dnode,
 		if (!msg_rem) {
 		if (!msg_rem) {
 			__skb_queue_tail(list, skb);
 			__skb_queue_tail(list, skb);
 			skb = NULL;
 			skb = NULL;
+			msg_rem = msg_dsz;
 		}
 		}
 	}
 	}
+	if (skb) {
+		msg_set_size(buf_msg(skb), INT_H_SIZE + (msg_dsz - msg_rem));
+		skb_trim(skb, INT_H_SIZE + (msg_dsz - msg_rem));
+		__skb_queue_tail(list, skb);
+	}
 }
 }
 
 
 /**
 /**
@@ -240,10 +208,12 @@ void tipc_named_node_up(u32 dnode)
 
 
 	__skb_queue_head_init(&head);
 	__skb_queue_head_init(&head);
 
 
-	read_lock_bh(&tipc_nametbl_lock);
-	named_distribute(&head, dnode, &publ_cluster);
-	named_distribute(&head, dnode, &publ_zone);
-	read_unlock_bh(&tipc_nametbl_lock);
+	rcu_read_lock();
+	named_distribute(&head, dnode,
+			 &tipc_nametbl->publ_list[TIPC_CLUSTER_SCOPE]);
+	named_distribute(&head, dnode,
+			 &tipc_nametbl->publ_list[TIPC_ZONE_SCOPE]);
+	rcu_read_unlock();
 
 
 	tipc_link_xmit(&head, dnode, dnode);
 	tipc_link_xmit(&head, dnode, dnode);
 }
 }
@@ -290,12 +260,12 @@ static void tipc_publ_purge(struct publication *publ, u32 addr)
 {
 {
 	struct publication *p;
 	struct publication *p;
 
 
-	write_lock_bh(&tipc_nametbl_lock);
+	spin_lock_bh(&tipc_nametbl_lock);
 	p = tipc_nametbl_remove_publ(publ->type, publ->lower,
 	p = tipc_nametbl_remove_publ(publ->type, publ->lower,
 				     publ->node, publ->ref, publ->key);
 				     publ->node, publ->ref, publ->key);
 	if (p)
 	if (p)
 		tipc_publ_unsubscribe(p, addr);
 		tipc_publ_unsubscribe(p, addr);
-	write_unlock_bh(&tipc_nametbl_lock);
+	spin_unlock_bh(&tipc_nametbl_lock);
 
 
 	if (p != publ) {
 	if (p != publ) {
 		pr_err("Unable to remove publication from failed node\n"
 		pr_err("Unable to remove publication from failed node\n"
@@ -304,7 +274,7 @@ static void tipc_publ_purge(struct publication *publ, u32 addr)
 		       publ->key);
 		       publ->key);
 	}
 	}
 
 
-	kfree(p);
+	kfree_rcu(p, rcu);
 }
 }
 
 
 void tipc_publ_notify(struct list_head *nsub_list, u32 addr)
 void tipc_publ_notify(struct list_head *nsub_list, u32 addr)
@@ -341,7 +311,7 @@ static bool tipc_update_nametbl(struct distr_item *i, u32 node, u32 dtype)
 						ntohl(i->key));
 						ntohl(i->key));
 		if (publ) {
 		if (publ) {
 			tipc_publ_unsubscribe(publ, node);
 			tipc_publ_unsubscribe(publ, node);
-			kfree(publ);
+			kfree_rcu(publ, rcu);
 			return true;
 			return true;
 		}
 		}
 	} else {
 	} else {
@@ -406,14 +376,14 @@ void tipc_named_rcv(struct sk_buff *buf)
 	u32 count = msg_data_sz(msg) / ITEM_SIZE;
 	u32 count = msg_data_sz(msg) / ITEM_SIZE;
 	u32 node = msg_orignode(msg);
 	u32 node = msg_orignode(msg);
 
 
-	write_lock_bh(&tipc_nametbl_lock);
+	spin_lock_bh(&tipc_nametbl_lock);
 	while (count--) {
 	while (count--) {
 		if (!tipc_update_nametbl(item, node, msg_type(msg)))
 		if (!tipc_update_nametbl(item, node, msg_type(msg)))
 			tipc_named_add_backlog(item, msg_type(msg), node);
 			tipc_named_add_backlog(item, msg_type(msg), node);
 		item++;
 		item++;
 	}
 	}
 	tipc_named_process_backlog();
 	tipc_named_process_backlog();
-	write_unlock_bh(&tipc_nametbl_lock);
+	spin_unlock_bh(&tipc_nametbl_lock);
 	kfree_skb(buf);
 	kfree_skb(buf);
 }
 }
 
 
@@ -429,11 +399,12 @@ void tipc_named_reinit(void)
 	struct publication *publ;
 	struct publication *publ;
 	int scope;
 	int scope;
 
 
-	write_lock_bh(&tipc_nametbl_lock);
+	spin_lock_bh(&tipc_nametbl_lock);
 
 
 	for (scope = TIPC_ZONE_SCOPE; scope <= TIPC_NODE_SCOPE; scope++)
 	for (scope = TIPC_ZONE_SCOPE; scope <= TIPC_NODE_SCOPE; scope++)
-		list_for_each_entry(publ, &publ_lists[scope]->list, local_list)
+		list_for_each_entry_rcu(publ, &tipc_nametbl->publ_list[scope],
+					local_list)
 			publ->node = tipc_own_addr;
 			publ->node = tipc_own_addr;
 
 
-	write_unlock_bh(&tipc_nametbl_lock);
+	spin_unlock_bh(&tipc_nametbl_lock);
 }
 }

+ 103 - 99
net/tipc/name_table.c

@@ -2,7 +2,7 @@
  * net/tipc/name_table.c: TIPC name table code
  * net/tipc/name_table.c: TIPC name table code
  *
  *
  * Copyright (c) 2000-2006, 2014, Ericsson AB
  * Copyright (c) 2000-2006, 2014, Ericsson AB
- * Copyright (c) 2004-2008, 2010-2011, Wind River Systems
+ * Copyright (c) 2004-2008, 2010-2014, Wind River Systems
  * All rights reserved.
  * All rights reserved.
  *
  *
  * Redistribution and use in source and binary forms, with or without
  * Redistribution and use in source and binary forms, with or without
@@ -92,6 +92,7 @@ struct sub_seq {
  * @ns_list: links to adjacent name sequences in hash chain
  * @ns_list: links to adjacent name sequences in hash chain
  * @subscriptions: list of subscriptions for this 'type'
  * @subscriptions: list of subscriptions for this 'type'
  * @lock: spinlock controlling access to publication lists of all sub-sequences
  * @lock: spinlock controlling access to publication lists of all sub-sequences
+ * @rcu: RCU callback head used for deferred freeing
  */
  */
 struct name_seq {
 struct name_seq {
 	u32 type;
 	u32 type;
@@ -101,21 +102,11 @@ struct name_seq {
 	struct hlist_node ns_list;
 	struct hlist_node ns_list;
 	struct list_head subscriptions;
 	struct list_head subscriptions;
 	spinlock_t lock;
 	spinlock_t lock;
+	struct rcu_head rcu;
 };
 };
 
 
-/**
- * struct name_table - table containing all existing port name publications
- * @types: pointer to fixed-sized array of name sequence lists,
- *         accessed via hashing on 'type'; name sequence lists are *not* sorted
- * @local_publ_count: number of publications issued by this node
- */
-struct name_table {
-	struct hlist_head *types;
-	u32 local_publ_count;
-};
-
-static struct name_table table;
-DEFINE_RWLOCK(tipc_nametbl_lock);
+struct name_table *tipc_nametbl;
+DEFINE_SPINLOCK(tipc_nametbl_lock);
 
 
 static int hash(int x)
 static int hash(int x)
 {
 {
@@ -142,9 +133,7 @@ static struct publication *publ_create(u32 type, u32 lower, u32 upper,
 	publ->node = node;
 	publ->node = node;
 	publ->ref = port_ref;
 	publ->ref = port_ref;
 	publ->key = key;
 	publ->key = key;
-	INIT_LIST_HEAD(&publ->local_list);
 	INIT_LIST_HEAD(&publ->pport_list);
 	INIT_LIST_HEAD(&publ->pport_list);
-	INIT_LIST_HEAD(&publ->nodesub_list);
 	return publ;
 	return publ;
 }
 }
 
 
@@ -179,22 +168,10 @@ static struct name_seq *tipc_nameseq_create(u32 type, struct hlist_head *seq_hea
 	nseq->alloc = 1;
 	nseq->alloc = 1;
 	INIT_HLIST_NODE(&nseq->ns_list);
 	INIT_HLIST_NODE(&nseq->ns_list);
 	INIT_LIST_HEAD(&nseq->subscriptions);
 	INIT_LIST_HEAD(&nseq->subscriptions);
-	hlist_add_head(&nseq->ns_list, seq_head);
+	hlist_add_head_rcu(&nseq->ns_list, seq_head);
 	return nseq;
 	return nseq;
 }
 }
 
 
-/*
- * nameseq_delete_empty - deletes a name sequence structure if now unused
- */
-static void nameseq_delete_empty(struct name_seq *seq)
-{
-	if (!seq->first_free && list_empty(&seq->subscriptions)) {
-		hlist_del_init(&seq->ns_list);
-		kfree(seq->sseqs);
-		kfree(seq);
-	}
-}
-
 /**
 /**
  * nameseq_find_subseq - find sub-sequence (if any) matching a name instance
  * nameseq_find_subseq - find sub-sequence (if any) matching a name instance
  *
  *
@@ -475,8 +452,8 @@ static struct name_seq *nametbl_find_seq(u32 type)
 	struct hlist_head *seq_head;
 	struct hlist_head *seq_head;
 	struct name_seq *ns;
 	struct name_seq *ns;
 
 
-	seq_head = &table.types[hash(type)];
-	hlist_for_each_entry(ns, seq_head, ns_list) {
+	seq_head = &tipc_nametbl->seq_hlist[hash(type)];
+	hlist_for_each_entry_rcu(ns, seq_head, ns_list) {
 		if (ns->type == type)
 		if (ns->type == type)
 			return ns;
 			return ns;
 	}
 	}
@@ -487,7 +464,9 @@ static struct name_seq *nametbl_find_seq(u32 type)
 struct publication *tipc_nametbl_insert_publ(u32 type, u32 lower, u32 upper,
 struct publication *tipc_nametbl_insert_publ(u32 type, u32 lower, u32 upper,
 					     u32 scope, u32 node, u32 port, u32 key)
 					     u32 scope, u32 node, u32 port, u32 key)
 {
 {
+	struct publication *publ;
 	struct name_seq *seq = nametbl_find_seq(type);
 	struct name_seq *seq = nametbl_find_seq(type);
+	int index = hash(type);
 
 
 	if ((scope < TIPC_ZONE_SCOPE) || (scope > TIPC_NODE_SCOPE) ||
 	if ((scope < TIPC_ZONE_SCOPE) || (scope > TIPC_NODE_SCOPE) ||
 	    (lower > upper)) {
 	    (lower > upper)) {
@@ -497,12 +476,16 @@ struct publication *tipc_nametbl_insert_publ(u32 type, u32 lower, u32 upper,
 	}
 	}
 
 
 	if (!seq)
 	if (!seq)
-		seq = tipc_nameseq_create(type, &table.types[hash(type)]);
+		seq = tipc_nameseq_create(type,
+					  &tipc_nametbl->seq_hlist[index]);
 	if (!seq)
 	if (!seq)
 		return NULL;
 		return NULL;
 
 
-	return tipc_nameseq_insert_publ(seq, type, lower, upper,
+	spin_lock_bh(&seq->lock);
+	publ = tipc_nameseq_insert_publ(seq, type, lower, upper,
 					scope, node, port, key);
 					scope, node, port, key);
+	spin_unlock_bh(&seq->lock);
+	return publ;
 }
 }
 
 
 struct publication *tipc_nametbl_remove_publ(u32 type, u32 lower,
 struct publication *tipc_nametbl_remove_publ(u32 type, u32 lower,
@@ -514,8 +497,16 @@ struct publication *tipc_nametbl_remove_publ(u32 type, u32 lower,
 	if (!seq)
 	if (!seq)
 		return NULL;
 		return NULL;
 
 
+	spin_lock_bh(&seq->lock);
 	publ = tipc_nameseq_remove_publ(seq, lower, node, ref, key);
 	publ = tipc_nameseq_remove_publ(seq, lower, node, ref, key);
-	nameseq_delete_empty(seq);
+	if (!seq->first_free && list_empty(&seq->subscriptions)) {
+		hlist_del_init_rcu(&seq->ns_list);
+		kfree(seq->sseqs);
+		spin_unlock_bh(&seq->lock);
+		kfree_rcu(seq, rcu);
+		return publ;
+	}
+	spin_unlock_bh(&seq->lock);
 	return publ;
 	return publ;
 }
 }
 
 
@@ -544,14 +535,14 @@ u32 tipc_nametbl_translate(u32 type, u32 instance, u32 *destnode)
 	if (!tipc_in_scope(*destnode, tipc_own_addr))
 	if (!tipc_in_scope(*destnode, tipc_own_addr))
 		return 0;
 		return 0;
 
 
-	read_lock_bh(&tipc_nametbl_lock);
+	rcu_read_lock();
 	seq = nametbl_find_seq(type);
 	seq = nametbl_find_seq(type);
 	if (unlikely(!seq))
 	if (unlikely(!seq))
 		goto not_found;
 		goto not_found;
+	spin_lock_bh(&seq->lock);
 	sseq = nameseq_find_subseq(seq, instance);
 	sseq = nameseq_find_subseq(seq, instance);
 	if (unlikely(!sseq))
 	if (unlikely(!sseq))
-		goto not_found;
-	spin_lock_bh(&seq->lock);
+		goto no_match;
 	info = sseq->info;
 	info = sseq->info;
 
 
 	/* Closest-First Algorithm */
 	/* Closest-First Algorithm */
@@ -601,7 +592,7 @@ u32 tipc_nametbl_translate(u32 type, u32 instance, u32 *destnode)
 no_match:
 no_match:
 	spin_unlock_bh(&seq->lock);
 	spin_unlock_bh(&seq->lock);
 not_found:
 not_found:
-	read_unlock_bh(&tipc_nametbl_lock);
+	rcu_read_unlock();
 	*destnode = node;
 	*destnode = node;
 	return ref;
 	return ref;
 }
 }
@@ -627,13 +618,12 @@ int tipc_nametbl_mc_translate(u32 type, u32 lower, u32 upper, u32 limit,
 	struct name_info *info;
 	struct name_info *info;
 	int res = 0;
 	int res = 0;
 
 
-	read_lock_bh(&tipc_nametbl_lock);
+	rcu_read_lock();
 	seq = nametbl_find_seq(type);
 	seq = nametbl_find_seq(type);
 	if (!seq)
 	if (!seq)
 		goto exit;
 		goto exit;
 
 
 	spin_lock_bh(&seq->lock);
 	spin_lock_bh(&seq->lock);
-
 	sseq = seq->sseqs + nameseq_locate_subseq(seq, lower);
 	sseq = seq->sseqs + nameseq_locate_subseq(seq, lower);
 	sseq_stop = seq->sseqs + seq->first_free;
 	sseq_stop = seq->sseqs + seq->first_free;
 	for (; sseq != sseq_stop; sseq++) {
 	for (; sseq != sseq_stop; sseq++) {
@@ -651,10 +641,9 @@ int tipc_nametbl_mc_translate(u32 type, u32 lower, u32 upper, u32 limit,
 		if (info->cluster_list_size != info->node_list_size)
 		if (info->cluster_list_size != info->node_list_size)
 			res = 1;
 			res = 1;
 	}
 	}
-
 	spin_unlock_bh(&seq->lock);
 	spin_unlock_bh(&seq->lock);
 exit:
 exit:
-	read_unlock_bh(&tipc_nametbl_lock);
+	rcu_read_unlock();
 	return res;
 	return res;
 }
 }
 
 
@@ -667,22 +656,23 @@ struct publication *tipc_nametbl_publish(u32 type, u32 lower, u32 upper,
 	struct publication *publ;
 	struct publication *publ;
 	struct sk_buff *buf = NULL;
 	struct sk_buff *buf = NULL;
 
 
-	if (table.local_publ_count >= TIPC_MAX_PUBLICATIONS) {
+	spin_lock_bh(&tipc_nametbl_lock);
+	if (tipc_nametbl->local_publ_count >= TIPC_MAX_PUBLICATIONS) {
 		pr_warn("Publication failed, local publication limit reached (%u)\n",
 		pr_warn("Publication failed, local publication limit reached (%u)\n",
 			TIPC_MAX_PUBLICATIONS);
 			TIPC_MAX_PUBLICATIONS);
+		spin_unlock_bh(&tipc_nametbl_lock);
 		return NULL;
 		return NULL;
 	}
 	}
 
 
-	write_lock_bh(&tipc_nametbl_lock);
 	publ = tipc_nametbl_insert_publ(type, lower, upper, scope,
 	publ = tipc_nametbl_insert_publ(type, lower, upper, scope,
 				   tipc_own_addr, port_ref, key);
 				   tipc_own_addr, port_ref, key);
 	if (likely(publ)) {
 	if (likely(publ)) {
-		table.local_publ_count++;
+		tipc_nametbl->local_publ_count++;
 		buf = tipc_named_publish(publ);
 		buf = tipc_named_publish(publ);
 		/* Any pending external events? */
 		/* Any pending external events? */
 		tipc_named_process_backlog();
 		tipc_named_process_backlog();
 	}
 	}
-	write_unlock_bh(&tipc_nametbl_lock);
+	spin_unlock_bh(&tipc_nametbl_lock);
 
 
 	if (buf)
 	if (buf)
 		named_cluster_distribute(buf);
 		named_cluster_distribute(buf);
@@ -695,27 +685,28 @@ struct publication *tipc_nametbl_publish(u32 type, u32 lower, u32 upper,
 int tipc_nametbl_withdraw(u32 type, u32 lower, u32 ref, u32 key)
 int tipc_nametbl_withdraw(u32 type, u32 lower, u32 ref, u32 key)
 {
 {
 	struct publication *publ;
 	struct publication *publ;
-	struct sk_buff *buf;
+	struct sk_buff *skb = NULL;
 
 
-	write_lock_bh(&tipc_nametbl_lock);
+	spin_lock_bh(&tipc_nametbl_lock);
 	publ = tipc_nametbl_remove_publ(type, lower, tipc_own_addr, ref, key);
 	publ = tipc_nametbl_remove_publ(type, lower, tipc_own_addr, ref, key);
 	if (likely(publ)) {
 	if (likely(publ)) {
-		table.local_publ_count--;
-		buf = tipc_named_withdraw(publ);
+		tipc_nametbl->local_publ_count--;
+		skb = tipc_named_withdraw(publ);
 		/* Any pending external events? */
 		/* Any pending external events? */
 		tipc_named_process_backlog();
 		tipc_named_process_backlog();
-		write_unlock_bh(&tipc_nametbl_lock);
 		list_del_init(&publ->pport_list);
 		list_del_init(&publ->pport_list);
-		kfree(publ);
+		kfree_rcu(publ, rcu);
+	} else {
+		pr_err("Unable to remove local publication\n"
+		       "(type=%u, lower=%u, ref=%u, key=%u)\n",
+		       type, lower, ref, key);
+	}
+	spin_unlock_bh(&tipc_nametbl_lock);
 
 
-		if (buf)
-			named_cluster_distribute(buf);
+	if (skb) {
+		named_cluster_distribute(skb);
 		return 1;
 		return 1;
 	}
 	}
-	write_unlock_bh(&tipc_nametbl_lock);
-	pr_err("Unable to remove local publication\n"
-	       "(type=%u, lower=%u, ref=%u, key=%u)\n",
-	       type, lower, ref, key);
 	return 0;
 	return 0;
 }
 }
 
 
@@ -725,12 +716,14 @@ int tipc_nametbl_withdraw(u32 type, u32 lower, u32 ref, u32 key)
 void tipc_nametbl_subscribe(struct tipc_subscription *s)
 void tipc_nametbl_subscribe(struct tipc_subscription *s)
 {
 {
 	u32 type = s->seq.type;
 	u32 type = s->seq.type;
+	int index = hash(type);
 	struct name_seq *seq;
 	struct name_seq *seq;
 
 
-	write_lock_bh(&tipc_nametbl_lock);
+	spin_lock_bh(&tipc_nametbl_lock);
 	seq = nametbl_find_seq(type);
 	seq = nametbl_find_seq(type);
 	if (!seq)
 	if (!seq)
-		seq = tipc_nameseq_create(type, &table.types[hash(type)]);
+		seq = tipc_nameseq_create(type,
+					  &tipc_nametbl->seq_hlist[index]);
 	if (seq) {
 	if (seq) {
 		spin_lock_bh(&seq->lock);
 		spin_lock_bh(&seq->lock);
 		tipc_nameseq_subscribe(seq, s);
 		tipc_nameseq_subscribe(seq, s);
@@ -739,7 +732,7 @@ void tipc_nametbl_subscribe(struct tipc_subscription *s)
 		pr_warn("Failed to create subscription for {%u,%u,%u}\n",
 		pr_warn("Failed to create subscription for {%u,%u,%u}\n",
 			s->seq.type, s->seq.lower, s->seq.upper);
 			s->seq.type, s->seq.lower, s->seq.upper);
 	}
 	}
-	write_unlock_bh(&tipc_nametbl_lock);
+	spin_unlock_bh(&tipc_nametbl_lock);
 }
 }
 
 
 /**
 /**
@@ -749,18 +742,23 @@ void tipc_nametbl_unsubscribe(struct tipc_subscription *s)
 {
 {
 	struct name_seq *seq;
 	struct name_seq *seq;
 
 
-	write_lock_bh(&tipc_nametbl_lock);
+	spin_lock_bh(&tipc_nametbl_lock);
 	seq = nametbl_find_seq(s->seq.type);
 	seq = nametbl_find_seq(s->seq.type);
 	if (seq != NULL) {
 	if (seq != NULL) {
 		spin_lock_bh(&seq->lock);
 		spin_lock_bh(&seq->lock);
 		list_del_init(&s->nameseq_list);
 		list_del_init(&s->nameseq_list);
-		spin_unlock_bh(&seq->lock);
-		nameseq_delete_empty(seq);
+		if (!seq->first_free && list_empty(&seq->subscriptions)) {
+			hlist_del_init_rcu(&seq->ns_list);
+			kfree(seq->sseqs);
+			spin_unlock_bh(&seq->lock);
+			kfree_rcu(seq, rcu);
+		} else {
+			spin_unlock_bh(&seq->lock);
+		}
 	}
 	}
-	write_unlock_bh(&tipc_nametbl_lock);
+	spin_unlock_bh(&tipc_nametbl_lock);
 }
 }
 
 
-
 /**
 /**
  * subseq_list - print specified sub-sequence contents into the given buffer
  * subseq_list - print specified sub-sequence contents into the given buffer
  */
  */
@@ -882,8 +880,8 @@ static int nametbl_list(char *buf, int len, u32 depth_info,
 		lowbound = 0;
 		lowbound = 0;
 		upbound = ~0;
 		upbound = ~0;
 		for (i = 0; i < TIPC_NAMETBL_SIZE; i++) {
 		for (i = 0; i < TIPC_NAMETBL_SIZE; i++) {
-			seq_head = &table.types[i];
-			hlist_for_each_entry(seq, seq_head, ns_list) {
+			seq_head = &tipc_nametbl->seq_hlist[i];
+			hlist_for_each_entry_rcu(seq, seq_head, ns_list) {
 				ret += nameseq_list(seq, buf + ret, len - ret,
 				ret += nameseq_list(seq, buf + ret, len - ret,
 						   depth, seq->type,
 						   depth, seq->type,
 						   lowbound, upbound, i);
 						   lowbound, upbound, i);
@@ -898,8 +896,8 @@ static int nametbl_list(char *buf, int len, u32 depth_info,
 		}
 		}
 		ret += nametbl_header(buf + ret, len - ret, depth);
 		ret += nametbl_header(buf + ret, len - ret, depth);
 		i = hash(type);
 		i = hash(type);
-		seq_head = &table.types[i];
-		hlist_for_each_entry(seq, seq_head, ns_list) {
+		seq_head = &tipc_nametbl->seq_hlist[i];
+		hlist_for_each_entry_rcu(seq, seq_head, ns_list) {
 			if (seq->type == type) {
 			if (seq->type == type) {
 				ret += nameseq_list(seq, buf + ret, len - ret,
 				ret += nameseq_list(seq, buf + ret, len - ret,
 						   depth, type,
 						   depth, type,
@@ -931,11 +929,11 @@ struct sk_buff *tipc_nametbl_get(const void *req_tlv_area, int req_tlv_space)
 	pb = TLV_DATA(rep_tlv);
 	pb = TLV_DATA(rep_tlv);
 	pb_len = ULTRA_STRING_MAX_LEN;
 	pb_len = ULTRA_STRING_MAX_LEN;
 	argv = (struct tipc_name_table_query *)TLV_DATA(req_tlv_area);
 	argv = (struct tipc_name_table_query *)TLV_DATA(req_tlv_area);
-	read_lock_bh(&tipc_nametbl_lock);
+	rcu_read_lock();
 	str_len = nametbl_list(pb, pb_len, ntohl(argv->depth),
 	str_len = nametbl_list(pb, pb_len, ntohl(argv->depth),
 			       ntohl(argv->type),
 			       ntohl(argv->type),
 			       ntohl(argv->lowbound), ntohl(argv->upbound));
 			       ntohl(argv->lowbound), ntohl(argv->upbound));
-	read_unlock_bh(&tipc_nametbl_lock);
+	rcu_read_unlock();
 	str_len += 1;	/* for "\0" */
 	str_len += 1;	/* for "\0" */
 	skb_put(buf, TLV_SPACE(str_len));
 	skb_put(buf, TLV_SPACE(str_len));
 	TLV_SET(rep_tlv, TIPC_TLV_ULTRA_STRING, NULL, str_len);
 	TLV_SET(rep_tlv, TIPC_TLV_ULTRA_STRING, NULL, str_len);
@@ -945,12 +943,18 @@ struct sk_buff *tipc_nametbl_get(const void *req_tlv_area, int req_tlv_space)
 
 
 int tipc_nametbl_init(void)
 int tipc_nametbl_init(void)
 {
 {
-	table.types = kcalloc(TIPC_NAMETBL_SIZE, sizeof(struct hlist_head),
-			      GFP_ATOMIC);
-	if (!table.types)
+	int i;
+
+	tipc_nametbl = kzalloc(sizeof(*tipc_nametbl), GFP_ATOMIC);
+	if (!tipc_nametbl)
 		return -ENOMEM;
 		return -ENOMEM;
 
 
-	table.local_publ_count = 0;
+	for (i = 0; i < TIPC_NAMETBL_SIZE; i++)
+		INIT_HLIST_HEAD(&tipc_nametbl->seq_hlist[i]);
+
+	INIT_LIST_HEAD(&tipc_nametbl->publ_list[TIPC_ZONE_SCOPE]);
+	INIT_LIST_HEAD(&tipc_nametbl->publ_list[TIPC_CLUSTER_SCOPE]);
+	INIT_LIST_HEAD(&tipc_nametbl->publ_list[TIPC_NODE_SCOPE]);
 	return 0;
 	return 0;
 }
 }
 
 
@@ -965,17 +969,19 @@ static void tipc_purge_publications(struct name_seq *seq)
 	struct sub_seq *sseq;
 	struct sub_seq *sseq;
 	struct name_info *info;
 	struct name_info *info;
 
 
-	if (!seq->sseqs) {
-		nameseq_delete_empty(seq);
-		return;
-	}
+	spin_lock_bh(&seq->lock);
 	sseq = seq->sseqs;
 	sseq = seq->sseqs;
 	info = sseq->info;
 	info = sseq->info;
 	list_for_each_entry_safe(publ, safe, &info->zone_list, zone_list) {
 	list_for_each_entry_safe(publ, safe, &info->zone_list, zone_list) {
 		tipc_nametbl_remove_publ(publ->type, publ->lower, publ->node,
 		tipc_nametbl_remove_publ(publ->type, publ->lower, publ->node,
 					 publ->ref, publ->key);
 					 publ->ref, publ->key);
-		kfree(publ);
+		kfree_rcu(publ, rcu);
 	}
 	}
+	hlist_del_init_rcu(&seq->ns_list);
+	kfree(seq->sseqs);
+	spin_lock_bh(&seq->lock);
+
+	kfree_rcu(seq, rcu);
 }
 }
 
 
 void tipc_nametbl_stop(void)
 void tipc_nametbl_stop(void)
@@ -983,23 +989,24 @@ void tipc_nametbl_stop(void)
 	u32 i;
 	u32 i;
 	struct name_seq *seq;
 	struct name_seq *seq;
 	struct hlist_head *seq_head;
 	struct hlist_head *seq_head;
-	struct hlist_node *safe;
 
 
 	/* Verify name table is empty and purge any lingering
 	/* Verify name table is empty and purge any lingering
 	 * publications, then release the name table
 	 * publications, then release the name table
 	 */
 	 */
-	write_lock_bh(&tipc_nametbl_lock);
+	spin_lock_bh(&tipc_nametbl_lock);
 	for (i = 0; i < TIPC_NAMETBL_SIZE; i++) {
 	for (i = 0; i < TIPC_NAMETBL_SIZE; i++) {
-		if (hlist_empty(&table.types[i]))
+		if (hlist_empty(&tipc_nametbl->seq_hlist[i]))
 			continue;
 			continue;
-		seq_head = &table.types[i];
-		hlist_for_each_entry_safe(seq, safe, seq_head, ns_list) {
+		seq_head = &tipc_nametbl->seq_hlist[i];
+		hlist_for_each_entry_rcu(seq, seq_head, ns_list) {
 			tipc_purge_publications(seq);
 			tipc_purge_publications(seq);
 		}
 		}
 	}
 	}
-	kfree(table.types);
-	table.types = NULL;
-	write_unlock_bh(&tipc_nametbl_lock);
+	spin_unlock_bh(&tipc_nametbl_lock);
+
+	synchronize_net();
+	kfree(tipc_nametbl);
+
 }
 }
 
 
 static int __tipc_nl_add_nametable_publ(struct tipc_nl_msg *msg,
 static int __tipc_nl_add_nametable_publ(struct tipc_nl_msg *msg,
@@ -1103,7 +1110,7 @@ static int __tipc_nl_seq_list(struct tipc_nl_msg *msg, u32 *last_type,
 			      u32 *last_lower, u32 *last_publ)
 			      u32 *last_lower, u32 *last_publ)
 {
 {
 	struct hlist_head *seq_head;
 	struct hlist_head *seq_head;
-	struct name_seq *seq;
+	struct name_seq *seq = NULL;
 	int err;
 	int err;
 	int i;
 	int i;
 
 
@@ -1113,22 +1120,21 @@ static int __tipc_nl_seq_list(struct tipc_nl_msg *msg, u32 *last_type,
 		i = 0;
 		i = 0;
 
 
 	for (; i < TIPC_NAMETBL_SIZE; i++) {
 	for (; i < TIPC_NAMETBL_SIZE; i++) {
-		seq_head = &table.types[i];
+		seq_head = &tipc_nametbl->seq_hlist[i];
 
 
 		if (*last_type) {
 		if (*last_type) {
 			seq = nametbl_find_seq(*last_type);
 			seq = nametbl_find_seq(*last_type);
 			if (!seq)
 			if (!seq)
 				return -EPIPE;
 				return -EPIPE;
 		} else {
 		} else {
-			seq = hlist_entry_safe((seq_head)->first,
-					       struct name_seq, ns_list);
+			hlist_for_each_entry_rcu(seq, seq_head, ns_list)
+				break;
 			if (!seq)
 			if (!seq)
 				continue;
 				continue;
 		}
 		}
 
 
-		hlist_for_each_entry_from(seq, ns_list) {
+		hlist_for_each_entry_from_rcu(seq, ns_list) {
 			spin_lock_bh(&seq->lock);
 			spin_lock_bh(&seq->lock);
-
 			err = __tipc_nl_subseq_list(msg, seq, last_lower,
 			err = __tipc_nl_subseq_list(msg, seq, last_lower,
 						    last_publ);
 						    last_publ);
 
 
@@ -1160,8 +1166,7 @@ int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb)
 	msg.portid = NETLINK_CB(cb->skb).portid;
 	msg.portid = NETLINK_CB(cb->skb).portid;
 	msg.seq = cb->nlh->nlmsg_seq;
 	msg.seq = cb->nlh->nlmsg_seq;
 
 
-	read_lock_bh(&tipc_nametbl_lock);
-
+	rcu_read_lock();
 	err = __tipc_nl_seq_list(&msg, &last_type, &last_lower, &last_publ);
 	err = __tipc_nl_seq_list(&msg, &last_type, &last_lower, &last_publ);
 	if (!err) {
 	if (!err) {
 		done = 1;
 		done = 1;
@@ -1174,8 +1179,7 @@ int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb)
 		 */
 		 */
 		cb->prev_seq = 1;
 		cb->prev_seq = 1;
 	}
 	}
-
-	read_unlock_bh(&tipc_nametbl_lock);
+	rcu_read_unlock();
 
 
 	cb->args[0] = last_type;
 	cb->args[0] = last_type;
 	cb->args[1] = last_lower;
 	cb->args[1] = last_lower;

+ 18 - 2
net/tipc/name_table.h

@@ -43,7 +43,9 @@ struct tipc_port_list;
 /*
 /*
  * TIPC name types reserved for internal TIPC use (both current and planned)
  * TIPC name types reserved for internal TIPC use (both current and planned)
  */
  */
-#define TIPC_ZM_SRV 3		/* zone master service name type */
+#define TIPC_ZM_SRV		3	/* zone master service name type */
+#define TIPC_PUBL_SCOPE_NUM	(TIPC_NODE_SCOPE + 1)
+#define TIPC_NAMETBL_SIZE	1024	/* must be a power of 2 */
 
 
 /**
 /**
  * struct publication - info about a published (name or) name sequence
  * struct publication - info about a published (name or) name sequence
@@ -60,6 +62,7 @@ struct tipc_port_list;
  * @node_list: adjacent matching name seq publications with >= node scope
  * @node_list: adjacent matching name seq publications with >= node scope
  * @cluster_list: adjacent matching name seq publications with >= cluster scope
  * @cluster_list: adjacent matching name seq publications with >= cluster scope
  * @zone_list: adjacent matching name seq publications with >= zone scope
  * @zone_list: adjacent matching name seq publications with >= zone scope
+ * @rcu: RCU callback head used for deferred freeing
  *
  *
  * Note that the node list, cluster list, and zone list are circular lists.
  * Note that the node list, cluster list, and zone list are circular lists.
  */
  */
@@ -77,10 +80,23 @@ struct publication {
 	struct list_head node_list;
 	struct list_head node_list;
 	struct list_head cluster_list;
 	struct list_head cluster_list;
 	struct list_head zone_list;
 	struct list_head zone_list;
+	struct rcu_head rcu;
 };
 };
 
 
+/**
+ * struct name_table - table containing all existing port name publications
+ * @seq_hlist: name sequence hash lists
+ * @publ_list: pulication lists
+ * @local_publ_count: number of publications issued by this node
+ */
+struct name_table {
+	struct hlist_head seq_hlist[TIPC_NAMETBL_SIZE];
+	struct list_head publ_list[TIPC_PUBL_SCOPE_NUM];
+	u32 local_publ_count;
+};
 
 
-extern rwlock_t tipc_nametbl_lock;
+extern spinlock_t tipc_nametbl_lock;
+extern struct name_table *tipc_nametbl;
 
 
 int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb);
 int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb);
 
 

+ 0 - 1
net/tipc/subscr.c

@@ -305,7 +305,6 @@ static int subscr_subscribe(struct tipc_subscr *s,
 		kfree(sub);
 		kfree(sub);
 		return -EINVAL;
 		return -EINVAL;
 	}
 	}
-	INIT_LIST_HEAD(&sub->nameseq_list);
 	list_add(&sub->subscription_list, &subscriber->subscription_list);
 	list_add(&sub->subscription_list, &subscriber->subscription_list);
 	sub->subscriber = subscriber;
 	sub->subscriber = subscriber;
 	sub->swap = swap;
 	sub->swap = swap;