|
@@ -1,24 +1,42 @@
|
|
|
|
|
+===================================================
|
|
|
|
|
+Adding reference counters (krefs) to kernel objects
|
|
|
|
|
+===================================================
|
|
|
|
|
+
|
|
|
|
|
+:Author: Corey Minyard <minyard@acm.org>
|
|
|
|
|
+:Author: Thomas Hellstrom <thellstrom@vmware.com>
|
|
|
|
|
+
|
|
|
|
|
+A lot of this was lifted from Greg Kroah-Hartman's 2004 OLS paper and
|
|
|
|
|
+presentation on krefs, which can be found at:
|
|
|
|
|
+
|
|
|
|
|
+ - http://www.kroah.com/linux/talks/ols_2004_kref_paper/Reprint-Kroah-Hartman-OLS2004.pdf
|
|
|
|
|
+ - http://www.kroah.com/linux/talks/ols_2004_kref_talk/
|
|
|
|
|
+
|
|
|
|
|
+Introduction
|
|
|
|
|
+============
|
|
|
|
|
|
|
|
krefs allow you to add reference counters to your objects. If you
|
|
krefs allow you to add reference counters to your objects. If you
|
|
|
have objects that are used in multiple places and passed around, and
|
|
have objects that are used in multiple places and passed around, and
|
|
|
you don't have refcounts, your code is almost certainly broken. If
|
|
you don't have refcounts, your code is almost certainly broken. If
|
|
|
you want refcounts, krefs are the way to go.
|
|
you want refcounts, krefs are the way to go.
|
|
|
|
|
|
|
|
-To use a kref, add one to your data structures like:
|
|
|
|
|
|
|
+To use a kref, add one to your data structures like::
|
|
|
|
|
|
|
|
-struct my_data
|
|
|
|
|
-{
|
|
|
|
|
|
|
+ struct my_data
|
|
|
|
|
+ {
|
|
|
.
|
|
.
|
|
|
.
|
|
.
|
|
|
struct kref refcount;
|
|
struct kref refcount;
|
|
|
.
|
|
.
|
|
|
.
|
|
.
|
|
|
-};
|
|
|
|
|
|
|
+ };
|
|
|
|
|
|
|
|
The kref can occur anywhere within the data structure.
|
|
The kref can occur anywhere within the data structure.
|
|
|
|
|
|
|
|
|
|
+Initialization
|
|
|
|
|
+==============
|
|
|
|
|
+
|
|
|
You must initialize the kref after you allocate it. To do this, call
|
|
You must initialize the kref after you allocate it. To do this, call
|
|
|
-kref_init as so:
|
|
|
|
|
|
|
+kref_init as so::
|
|
|
|
|
|
|
|
struct my_data *data;
|
|
struct my_data *data;
|
|
|
|
|
|
|
@@ -29,18 +47,25 @@ kref_init as so:
|
|
|
|
|
|
|
|
This sets the refcount in the kref to 1.
|
|
This sets the refcount in the kref to 1.
|
|
|
|
|
|
|
|
|
|
+Kref rules
|
|
|
|
|
+==========
|
|
|
|
|
+
|
|
|
Once you have an initialized kref, you must follow the following
|
|
Once you have an initialized kref, you must follow the following
|
|
|
rules:
|
|
rules:
|
|
|
|
|
|
|
|
1) If you make a non-temporary copy of a pointer, especially if
|
|
1) If you make a non-temporary copy of a pointer, especially if
|
|
|
it can be passed to another thread of execution, you must
|
|
it can be passed to another thread of execution, you must
|
|
|
- increment the refcount with kref_get() before passing it off:
|
|
|
|
|
|
|
+ increment the refcount with kref_get() before passing it off::
|
|
|
|
|
+
|
|
|
kref_get(&data->refcount);
|
|
kref_get(&data->refcount);
|
|
|
|
|
+
|
|
|
If you already have a valid pointer to a kref-ed structure (the
|
|
If you already have a valid pointer to a kref-ed structure (the
|
|
|
refcount cannot go to zero) you may do this without a lock.
|
|
refcount cannot go to zero) you may do this without a lock.
|
|
|
|
|
|
|
|
-2) When you are done with a pointer, you must call kref_put():
|
|
|
|
|
|
|
+2) When you are done with a pointer, you must call kref_put()::
|
|
|
|
|
+
|
|
|
kref_put(&data->refcount, data_release);
|
|
kref_put(&data->refcount, data_release);
|
|
|
|
|
+
|
|
|
If this is the last reference to the pointer, the release
|
|
If this is the last reference to the pointer, the release
|
|
|
routine will be called. If the code never tries to get
|
|
routine will be called. If the code never tries to get
|
|
|
a valid pointer to a kref-ed structure without already
|
|
a valid pointer to a kref-ed structure without already
|
|
@@ -53,25 +78,25 @@ rules:
|
|
|
structure must remain valid during the kref_get().
|
|
structure must remain valid during the kref_get().
|
|
|
|
|
|
|
|
For example, if you allocate some data and then pass it to another
|
|
For example, if you allocate some data and then pass it to another
|
|
|
-thread to process:
|
|
|
|
|
|
|
+thread to process::
|
|
|
|
|
|
|
|
-void data_release(struct kref *ref)
|
|
|
|
|
-{
|
|
|
|
|
|
|
+ void data_release(struct kref *ref)
|
|
|
|
|
+ {
|
|
|
struct my_data *data = container_of(ref, struct my_data, refcount);
|
|
struct my_data *data = container_of(ref, struct my_data, refcount);
|
|
|
kfree(data);
|
|
kfree(data);
|
|
|
-}
|
|
|
|
|
|
|
+ }
|
|
|
|
|
|
|
|
-void more_data_handling(void *cb_data)
|
|
|
|
|
-{
|
|
|
|
|
|
|
+ void more_data_handling(void *cb_data)
|
|
|
|
|
+ {
|
|
|
struct my_data *data = cb_data;
|
|
struct my_data *data = cb_data;
|
|
|
.
|
|
.
|
|
|
. do stuff with data here
|
|
. do stuff with data here
|
|
|
.
|
|
.
|
|
|
kref_put(&data->refcount, data_release);
|
|
kref_put(&data->refcount, data_release);
|
|
|
-}
|
|
|
|
|
|
|
+ }
|
|
|
|
|
|
|
|
-int my_data_handler(void)
|
|
|
|
|
-{
|
|
|
|
|
|
|
+ int my_data_handler(void)
|
|
|
|
|
+ {
|
|
|
int rv = 0;
|
|
int rv = 0;
|
|
|
struct my_data *data;
|
|
struct my_data *data;
|
|
|
struct task_struct *task;
|
|
struct task_struct *task;
|
|
@@ -91,10 +116,10 @@ int my_data_handler(void)
|
|
|
.
|
|
.
|
|
|
. do stuff with data here
|
|
. do stuff with data here
|
|
|
.
|
|
.
|
|
|
- out:
|
|
|
|
|
|
|
+ out:
|
|
|
kref_put(&data->refcount, data_release);
|
|
kref_put(&data->refcount, data_release);
|
|
|
return rv;
|
|
return rv;
|
|
|
-}
|
|
|
|
|
|
|
+ }
|
|
|
|
|
|
|
|
This way, it doesn't matter what order the two threads handle the
|
|
This way, it doesn't matter what order the two threads handle the
|
|
|
data, the kref_put() handles knowing when the data is not referenced
|
|
data, the kref_put() handles knowing when the data is not referenced
|
|
@@ -104,7 +129,7 @@ put needs no lock because nothing tries to get the data without
|
|
|
already holding a pointer.
|
|
already holding a pointer.
|
|
|
|
|
|
|
|
Note that the "before" in rule 1 is very important. You should never
|
|
Note that the "before" in rule 1 is very important. You should never
|
|
|
-do something like:
|
|
|
|
|
|
|
+do something like::
|
|
|
|
|
|
|
|
task = kthread_run(more_data_handling, data, "more_data_handling");
|
|
task = kthread_run(more_data_handling, data, "more_data_handling");
|
|
|
if (task == ERR_PTR(-ENOMEM)) {
|
|
if (task == ERR_PTR(-ENOMEM)) {
|
|
@@ -124,14 +149,14 @@ bad style. Don't do it.
|
|
|
There are some situations where you can optimize the gets and puts.
|
|
There are some situations where you can optimize the gets and puts.
|
|
|
For instance, if you are done with an object and enqueuing it for
|
|
For instance, if you are done with an object and enqueuing it for
|
|
|
something else or passing it off to something else, there is no reason
|
|
something else or passing it off to something else, there is no reason
|
|
|
-to do a get then a put:
|
|
|
|
|
|
|
+to do a get then a put::
|
|
|
|
|
|
|
|
/* Silly extra get and put */
|
|
/* Silly extra get and put */
|
|
|
kref_get(&obj->ref);
|
|
kref_get(&obj->ref);
|
|
|
enqueue(obj);
|
|
enqueue(obj);
|
|
|
kref_put(&obj->ref, obj_cleanup);
|
|
kref_put(&obj->ref, obj_cleanup);
|
|
|
|
|
|
|
|
-Just do the enqueue. A comment about this is always welcome:
|
|
|
|
|
|
|
+Just do the enqueue. A comment about this is always welcome::
|
|
|
|
|
|
|
|
enqueue(obj);
|
|
enqueue(obj);
|
|
|
/* We are done with obj, so we pass our refcount off
|
|
/* We are done with obj, so we pass our refcount off
|
|
@@ -142,109 +167,99 @@ instance, you have a list of items that are each kref-ed, and you wish
|
|
|
to get the first one. You can't just pull the first item off the list
|
|
to get the first one. You can't just pull the first item off the list
|
|
|
and kref_get() it. That violates rule 3 because you are not already
|
|
and kref_get() it. That violates rule 3 because you are not already
|
|
|
holding a valid pointer. You must add a mutex (or some other lock).
|
|
holding a valid pointer. You must add a mutex (or some other lock).
|
|
|
-For instance:
|
|
|
|
|
-
|
|
|
|
|
-static DEFINE_MUTEX(mutex);
|
|
|
|
|
-static LIST_HEAD(q);
|
|
|
|
|
-struct my_data
|
|
|
|
|
-{
|
|
|
|
|
- struct kref refcount;
|
|
|
|
|
- struct list_head link;
|
|
|
|
|
-};
|
|
|
|
|
-
|
|
|
|
|
-static struct my_data *get_entry()
|
|
|
|
|
-{
|
|
|
|
|
- struct my_data *entry = NULL;
|
|
|
|
|
- mutex_lock(&mutex);
|
|
|
|
|
- if (!list_empty(&q)) {
|
|
|
|
|
- entry = container_of(q.next, struct my_data, link);
|
|
|
|
|
- kref_get(&entry->refcount);
|
|
|
|
|
|
|
+For instance::
|
|
|
|
|
+
|
|
|
|
|
+ static DEFINE_MUTEX(mutex);
|
|
|
|
|
+ static LIST_HEAD(q);
|
|
|
|
|
+ struct my_data
|
|
|
|
|
+ {
|
|
|
|
|
+ struct kref refcount;
|
|
|
|
|
+ struct list_head link;
|
|
|
|
|
+ };
|
|
|
|
|
+
|
|
|
|
|
+ static struct my_data *get_entry()
|
|
|
|
|
+ {
|
|
|
|
|
+ struct my_data *entry = NULL;
|
|
|
|
|
+ mutex_lock(&mutex);
|
|
|
|
|
+ if (!list_empty(&q)) {
|
|
|
|
|
+ entry = container_of(q.next, struct my_data, link);
|
|
|
|
|
+ kref_get(&entry->refcount);
|
|
|
|
|
+ }
|
|
|
|
|
+ mutex_unlock(&mutex);
|
|
|
|
|
+ return entry;
|
|
|
}
|
|
}
|
|
|
- mutex_unlock(&mutex);
|
|
|
|
|
- return entry;
|
|
|
|
|
-}
|
|
|
|
|
|
|
|
|
|
-static void release_entry(struct kref *ref)
|
|
|
|
|
-{
|
|
|
|
|
- struct my_data *entry = container_of(ref, struct my_data, refcount);
|
|
|
|
|
|
|
+ static void release_entry(struct kref *ref)
|
|
|
|
|
+ {
|
|
|
|
|
+ struct my_data *entry = container_of(ref, struct my_data, refcount);
|
|
|
|
|
|
|
|
- list_del(&entry->link);
|
|
|
|
|
- kfree(entry);
|
|
|
|
|
-}
|
|
|
|
|
|
|
+ list_del(&entry->link);
|
|
|
|
|
+ kfree(entry);
|
|
|
|
|
+ }
|
|
|
|
|
|
|
|
-static void put_entry(struct my_data *entry)
|
|
|
|
|
-{
|
|
|
|
|
- mutex_lock(&mutex);
|
|
|
|
|
- kref_put(&entry->refcount, release_entry);
|
|
|
|
|
- mutex_unlock(&mutex);
|
|
|
|
|
-}
|
|
|
|
|
|
|
+ static void put_entry(struct my_data *entry)
|
|
|
|
|
+ {
|
|
|
|
|
+ mutex_lock(&mutex);
|
|
|
|
|
+ kref_put(&entry->refcount, release_entry);
|
|
|
|
|
+ mutex_unlock(&mutex);
|
|
|
|
|
+ }
|
|
|
|
|
|
|
|
The kref_put() return value is useful if you do not want to hold the
|
|
The kref_put() return value is useful if you do not want to hold the
|
|
|
lock during the whole release operation. Say you didn't want to call
|
|
lock during the whole release operation. Say you didn't want to call
|
|
|
kfree() with the lock held in the example above (since it is kind of
|
|
kfree() with the lock held in the example above (since it is kind of
|
|
|
-pointless to do so). You could use kref_put() as follows:
|
|
|
|
|
|
|
+pointless to do so). You could use kref_put() as follows::
|
|
|
|
|
|
|
|
-static void release_entry(struct kref *ref)
|
|
|
|
|
-{
|
|
|
|
|
- /* All work is done after the return from kref_put(). */
|
|
|
|
|
-}
|
|
|
|
|
|
|
+ static void release_entry(struct kref *ref)
|
|
|
|
|
+ {
|
|
|
|
|
+ /* All work is done after the return from kref_put(). */
|
|
|
|
|
+ }
|
|
|
|
|
|
|
|
-static void put_entry(struct my_data *entry)
|
|
|
|
|
-{
|
|
|
|
|
- mutex_lock(&mutex);
|
|
|
|
|
- if (kref_put(&entry->refcount, release_entry)) {
|
|
|
|
|
- list_del(&entry->link);
|
|
|
|
|
- mutex_unlock(&mutex);
|
|
|
|
|
- kfree(entry);
|
|
|
|
|
- } else
|
|
|
|
|
- mutex_unlock(&mutex);
|
|
|
|
|
-}
|
|
|
|
|
|
|
+ static void put_entry(struct my_data *entry)
|
|
|
|
|
+ {
|
|
|
|
|
+ mutex_lock(&mutex);
|
|
|
|
|
+ if (kref_put(&entry->refcount, release_entry)) {
|
|
|
|
|
+ list_del(&entry->link);
|
|
|
|
|
+ mutex_unlock(&mutex);
|
|
|
|
|
+ kfree(entry);
|
|
|
|
|
+ } else
|
|
|
|
|
+ mutex_unlock(&mutex);
|
|
|
|
|
+ }
|
|
|
|
|
|
|
|
This is really more useful if you have to call other routines as part
|
|
This is really more useful if you have to call other routines as part
|
|
|
of the free operations that could take a long time or might claim the
|
|
of the free operations that could take a long time or might claim the
|
|
|
same lock. Note that doing everything in the release routine is still
|
|
same lock. Note that doing everything in the release routine is still
|
|
|
preferred as it is a little neater.
|
|
preferred as it is a little neater.
|
|
|
|
|
|
|
|
-
|
|
|
|
|
-Corey Minyard <minyard@acm.org>
|
|
|
|
|
-
|
|
|
|
|
-A lot of this was lifted from Greg Kroah-Hartman's 2004 OLS paper and
|
|
|
|
|
-presentation on krefs, which can be found at:
|
|
|
|
|
- http://www.kroah.com/linux/talks/ols_2004_kref_paper/Reprint-Kroah-Hartman-OLS2004.pdf
|
|
|
|
|
-and:
|
|
|
|
|
- http://www.kroah.com/linux/talks/ols_2004_kref_talk/
|
|
|
|
|
-
|
|
|
|
|
-
|
|
|
|
|
The above example could also be optimized using kref_get_unless_zero() in
|
|
The above example could also be optimized using kref_get_unless_zero() in
|
|
|
-the following way:
|
|
|
|
|
-
|
|
|
|
|
-static struct my_data *get_entry()
|
|
|
|
|
-{
|
|
|
|
|
- struct my_data *entry = NULL;
|
|
|
|
|
- mutex_lock(&mutex);
|
|
|
|
|
- if (!list_empty(&q)) {
|
|
|
|
|
- entry = container_of(q.next, struct my_data, link);
|
|
|
|
|
- if (!kref_get_unless_zero(&entry->refcount))
|
|
|
|
|
- entry = NULL;
|
|
|
|
|
|
|
+the following way::
|
|
|
|
|
+
|
|
|
|
|
+ static struct my_data *get_entry()
|
|
|
|
|
+ {
|
|
|
|
|
+ struct my_data *entry = NULL;
|
|
|
|
|
+ mutex_lock(&mutex);
|
|
|
|
|
+ if (!list_empty(&q)) {
|
|
|
|
|
+ entry = container_of(q.next, struct my_data, link);
|
|
|
|
|
+ if (!kref_get_unless_zero(&entry->refcount))
|
|
|
|
|
+ entry = NULL;
|
|
|
|
|
+ }
|
|
|
|
|
+ mutex_unlock(&mutex);
|
|
|
|
|
+ return entry;
|
|
|
}
|
|
}
|
|
|
- mutex_unlock(&mutex);
|
|
|
|
|
- return entry;
|
|
|
|
|
-}
|
|
|
|
|
|
|
|
|
|
-static void release_entry(struct kref *ref)
|
|
|
|
|
-{
|
|
|
|
|
- struct my_data *entry = container_of(ref, struct my_data, refcount);
|
|
|
|
|
|
|
+ static void release_entry(struct kref *ref)
|
|
|
|
|
+ {
|
|
|
|
|
+ struct my_data *entry = container_of(ref, struct my_data, refcount);
|
|
|
|
|
|
|
|
- mutex_lock(&mutex);
|
|
|
|
|
- list_del(&entry->link);
|
|
|
|
|
- mutex_unlock(&mutex);
|
|
|
|
|
- kfree(entry);
|
|
|
|
|
-}
|
|
|
|
|
|
|
+ mutex_lock(&mutex);
|
|
|
|
|
+ list_del(&entry->link);
|
|
|
|
|
+ mutex_unlock(&mutex);
|
|
|
|
|
+ kfree(entry);
|
|
|
|
|
+ }
|
|
|
|
|
|
|
|
-static void put_entry(struct my_data *entry)
|
|
|
|
|
-{
|
|
|
|
|
- kref_put(&entry->refcount, release_entry);
|
|
|
|
|
-}
|
|
|
|
|
|
|
+ static void put_entry(struct my_data *entry)
|
|
|
|
|
+ {
|
|
|
|
|
+ kref_put(&entry->refcount, release_entry);
|
|
|
|
|
+ }
|
|
|
|
|
|
|
|
Which is useful to remove the mutex lock around kref_put() in put_entry(), but
|
|
Which is useful to remove the mutex lock around kref_put() in put_entry(), but
|
|
|
it's important that kref_get_unless_zero is enclosed in the same critical
|
|
it's important that kref_get_unless_zero is enclosed in the same critical
|
|
@@ -254,51 +269,51 @@ Note that it is illegal to use kref_get_unless_zero without checking its
|
|
|
return value. If you are sure (by already having a valid pointer) that
|
|
return value. If you are sure (by already having a valid pointer) that
|
|
|
kref_get_unless_zero() will return true, then use kref_get() instead.
|
|
kref_get_unless_zero() will return true, then use kref_get() instead.
|
|
|
|
|
|
|
|
-The function kref_get_unless_zero also makes it possible to use rcu
|
|
|
|
|
-locking for lookups in the above example:
|
|
|
|
|
|
|
+Krefs and RCU
|
|
|
|
|
+=============
|
|
|
|
|
|
|
|
-struct my_data
|
|
|
|
|
-{
|
|
|
|
|
- struct rcu_head rhead;
|
|
|
|
|
- .
|
|
|
|
|
- struct kref refcount;
|
|
|
|
|
- .
|
|
|
|
|
- .
|
|
|
|
|
-};
|
|
|
|
|
-
|
|
|
|
|
-static struct my_data *get_entry_rcu()
|
|
|
|
|
-{
|
|
|
|
|
- struct my_data *entry = NULL;
|
|
|
|
|
- rcu_read_lock();
|
|
|
|
|
- if (!list_empty(&q)) {
|
|
|
|
|
- entry = container_of(q.next, struct my_data, link);
|
|
|
|
|
- if (!kref_get_unless_zero(&entry->refcount))
|
|
|
|
|
- entry = NULL;
|
|
|
|
|
|
|
+The function kref_get_unless_zero also makes it possible to use rcu
|
|
|
|
|
+locking for lookups in the above example::
|
|
|
|
|
+
|
|
|
|
|
+ struct my_data
|
|
|
|
|
+ {
|
|
|
|
|
+ struct rcu_head rhead;
|
|
|
|
|
+ .
|
|
|
|
|
+ struct kref refcount;
|
|
|
|
|
+ .
|
|
|
|
|
+ .
|
|
|
|
|
+ };
|
|
|
|
|
+
|
|
|
|
|
+ static struct my_data *get_entry_rcu()
|
|
|
|
|
+ {
|
|
|
|
|
+ struct my_data *entry = NULL;
|
|
|
|
|
+ rcu_read_lock();
|
|
|
|
|
+ if (!list_empty(&q)) {
|
|
|
|
|
+ entry = container_of(q.next, struct my_data, link);
|
|
|
|
|
+ if (!kref_get_unless_zero(&entry->refcount))
|
|
|
|
|
+ entry = NULL;
|
|
|
|
|
+ }
|
|
|
|
|
+ rcu_read_unlock();
|
|
|
|
|
+ return entry;
|
|
|
}
|
|
}
|
|
|
- rcu_read_unlock();
|
|
|
|
|
- return entry;
|
|
|
|
|
-}
|
|
|
|
|
|
|
|
|
|
-static void release_entry_rcu(struct kref *ref)
|
|
|
|
|
-{
|
|
|
|
|
- struct my_data *entry = container_of(ref, struct my_data, refcount);
|
|
|
|
|
|
|
+ static void release_entry_rcu(struct kref *ref)
|
|
|
|
|
+ {
|
|
|
|
|
+ struct my_data *entry = container_of(ref, struct my_data, refcount);
|
|
|
|
|
|
|
|
- mutex_lock(&mutex);
|
|
|
|
|
- list_del_rcu(&entry->link);
|
|
|
|
|
- mutex_unlock(&mutex);
|
|
|
|
|
- kfree_rcu(entry, rhead);
|
|
|
|
|
-}
|
|
|
|
|
|
|
+ mutex_lock(&mutex);
|
|
|
|
|
+ list_del_rcu(&entry->link);
|
|
|
|
|
+ mutex_unlock(&mutex);
|
|
|
|
|
+ kfree_rcu(entry, rhead);
|
|
|
|
|
+ }
|
|
|
|
|
|
|
|
-static void put_entry(struct my_data *entry)
|
|
|
|
|
-{
|
|
|
|
|
- kref_put(&entry->refcount, release_entry_rcu);
|
|
|
|
|
-}
|
|
|
|
|
|
|
+ static void put_entry(struct my_data *entry)
|
|
|
|
|
+ {
|
|
|
|
|
+ kref_put(&entry->refcount, release_entry_rcu);
|
|
|
|
|
+ }
|
|
|
|
|
|
|
|
But note that the struct kref member needs to remain in valid memory for a
|
|
But note that the struct kref member needs to remain in valid memory for a
|
|
|
rcu grace period after release_entry_rcu was called. That can be accomplished
|
|
rcu grace period after release_entry_rcu was called. That can be accomplished
|
|
|
by using kfree_rcu(entry, rhead) as done above, or by calling synchronize_rcu()
|
|
by using kfree_rcu(entry, rhead) as done above, or by calling synchronize_rcu()
|
|
|
before using kfree, but note that synchronize_rcu() may sleep for a
|
|
before using kfree, but note that synchronize_rcu() may sleep for a
|
|
|
substantial amount of time.
|
|
substantial amount of time.
|
|
|
-
|
|
|
|
|
-
|
|
|
|
|
-Thomas Hellstrom <thellstrom@vmware.com>
|
|
|