7 ani în urmă · ca16eb342e
--- a/block/bfq-cgroup.c
+++ b/block/bfq-cgroup.c
@@ -275,9 +275,9 @@ static void bfqg_and_blkg_get(struct bfq_group *bfqg)
 
				 
			
 
				 void bfqg_and_blkg_put(struct bfq_group *bfqg)
			
 
				 {
			
 
				-	bfqg_put(bfqg);
			
 
				-
			
 
				 	blkg_put(bfqg_to_blkg(bfqg));
			
 
				+
			
 
				+	bfqg_put(bfqg);
			
 
				 }
			
 
				 
			
 
				 /* @stats = 0 */
			
--- a/block/bio.c
+++ b/block/bio.c
@@ -2015,7 +2015,8 @@ int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg)
 
				 {
			
 
				 	if (unlikely(bio->bi_blkg))
			
 
				 		return -EBUSY;
			
 
				-	blkg_get(blkg);
			
 
				+	if (!blkg_try_get(blkg))
			
 
				+		return -ENODEV;
			
 
				 	bio->bi_blkg = blkg;
			
 
				 	return 0;
			
 
				 }
			
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -310,28 +310,11 @@ struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
 
				 	}
			
 
				 }
			
 
				 
			
 
				-static void blkg_pd_offline(struct blkcg_gq *blkg)
			
 
				-{
			
 
				-	int i;
			
 
				-
			
 
				-	lockdep_assert_held(blkg->q->queue_lock);
			
 
				-	lockdep_assert_held(&blkg->blkcg->lock);
			
 
				-
			
 
				-	for (i = 0; i < BLKCG_MAX_POLS; i++) {
			
 
				-		struct blkcg_policy *pol = blkcg_policy[i];
			
 
				-
			
 
				-		if (blkg->pd[i] && !blkg->pd[i]->offline &&
			
 
				-		    pol->pd_offline_fn) {
			
 
				-			pol->pd_offline_fn(blkg->pd[i]);
			
 
				-			blkg->pd[i]->offline = true;
			
 
				-		}
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				 static void blkg_destroy(struct blkcg_gq *blkg)
			
 
				 {
			
 
				 	struct blkcg *blkcg = blkg->blkcg;
			
 
				 	struct blkcg_gq *parent = blkg->parent;
			
 
				+	int i;
			
 
				 
			
 
				 	lockdep_assert_held(blkg->q->queue_lock);
			
 
				 	lockdep_assert_held(&blkcg->lock);
			
@@ -340,6 +323,13 @@ static void blkg_destroy(struct blkcg_gq *blkg)
 
				 	WARN_ON_ONCE(list_empty(&blkg->q_node));
			
 
				 	WARN_ON_ONCE(hlist_unhashed(&blkg->blkcg_node));
			
 
				 
			
 
				+	for (i = 0; i < BLKCG_MAX_POLS; i++) {
			
 
				+		struct blkcg_policy *pol = blkcg_policy[i];
			
 
				+
			
 
				+		if (blkg->pd[i] && pol->pd_offline_fn)
			
 
				+			pol->pd_offline_fn(blkg->pd[i]);
			
 
				+	}
			
 
				+
			
 
				 	if (parent) {
			
 
				 		blkg_rwstat_add_aux(&parent->stat_bytes, &blkg->stat_bytes);
			
 
				 		blkg_rwstat_add_aux(&parent->stat_ios, &blkg->stat_ios);
			
@@ -382,7 +372,6 @@ static void blkg_destroy_all(struct request_queue *q)
 
				 		struct blkcg *blkcg = blkg->blkcg;
			
 
				 
			
 
				 		spin_lock(&blkcg->lock);
			
 
				-		blkg_pd_offline(blkg);
			
 
				 		blkg_destroy(blkg);
			
 
				 		spin_unlock(&blkcg->lock);
			
 
				 	}
			
@@ -1053,59 +1042,64 @@ static struct cftype blkcg_legacy_files[] = {
 
				 	{ }	/* terminate */
			
 
				 };
			
 
				 
			
 
				+/*
			
 
				+ * blkcg destruction is a three-stage process.
			
 
				+ *
			
 
				+ * 1. Destruction starts.  The blkcg_css_offline() callback is invoked
			
 
				+ *    which offlines writeback.  Here we tie the next stage of blkg destruction
			
 
				+ *    to the completion of writeback associated with the blkcg.  This lets us
			
 
				+ *    avoid punting potentially large amounts of outstanding writeback to root
			
 
				+ *    while maintaining any ongoing policies.  The next stage is triggered when
			
 
				+ *    the nr_cgwbs count goes to zero.
			
 
				+ *
			
 
				+ * 2. When the nr_cgwbs count goes to zero, blkcg_destroy_blkgs() is called
			
 
				+ *    and handles the destruction of blkgs.  Here the css reference held by
			
 
				+ *    the blkg is put back eventually allowing blkcg_css_free() to be called.
			
 
				+ *    This work may occur in cgwb_release_workfn() on the cgwb_release
			
 
				+ *    workqueue.  Any submitted ios that fail to get the blkg ref will be
			
 
				+ *    punted to the root_blkg.
			
 
				+ *
			
 
				+ * 3. Once the blkcg ref count goes to zero, blkcg_css_free() is called.
			
 
				+ *    This finally frees the blkcg.
			
 
				+ */
			
 
				+
			
 
				 /**
			
 
				  * blkcg_css_offline - cgroup css_offline callback
			
 
				  * @css: css of interest
			
 
				  *
			
 
				- * This function is called when @css is about to go away and responsible
			
 
				- * for offlining all blkgs pd and killing all wbs associated with @css.
			
 
				- * blkgs pd offline should be done while holding both q and blkcg locks.
			
 
				- * As blkcg lock is nested inside q lock, this function performs reverse
			
 
				- * double lock dancing.
			
 
				- *
			
 
				- * This is the blkcg counterpart of ioc_release_fn().
			
 
				+ * This function is called when @css is about to go away.  Here the cgwbs are
			
 
				+ * offlined first and only once writeback associated with the blkcg has
			
 
				+ * finished do we start step 2 (see above).
			
 
				  */
			
 
				 static void blkcg_css_offline(struct cgroup_subsys_state *css)
			
 
				 {
			
 
				 	struct blkcg *blkcg = css_to_blkcg(css);
			
 
				-	struct blkcg_gq *blkg;
			
 
				-
			
 
				-	spin_lock_irq(&blkcg->lock);
			
 
				-
			
 
				-	hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) {
			
 
				-		struct request_queue *q = blkg->q;
			
 
				-
			
 
				-		if (spin_trylock(q->queue_lock)) {
			
 
				-			blkg_pd_offline(blkg);
			
 
				-			spin_unlock(q->queue_lock);
			
 
				-		} else {
			
 
				-			spin_unlock_irq(&blkcg->lock);
			
 
				-			cpu_relax();
			
 
				-			spin_lock_irq(&blkcg->lock);
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	spin_unlock_irq(&blkcg->lock);
			
 
				 
			
 
				+	/* this prevents anyone from attaching or migrating to this blkcg */
			
 
				 	wb_blkcg_offline(blkcg);
			
 
				+
			
 
				+	/* put the base cgwb reference allowing step 2 to be triggered */
			
 
				+	blkcg_cgwb_put(blkcg);
			
 
				 }
			
 
				 
			
 
				 /**
			
 
				- * blkcg_destroy_all_blkgs - destroy all blkgs associated with a blkcg
			
 
				+ * blkcg_destroy_blkgs - responsible for shooting down blkgs
			
 
				  * @blkcg: blkcg of interest
			
 
				  *
			
 
				- * This function is called when blkcg css is about to free and responsible for
			
 
				- * destroying all blkgs associated with @blkcg.
			
 
				- * blkgs should be removed while holding both q and blkcg locks. As blkcg lock
			
 
				+ * blkgs should be removed while holding both q and blkcg locks.  As blkcg lock
			
 
				  * is nested inside q lock, this function performs reverse double lock dancing.
			
 
				+ * Destroying the blkgs releases the reference held on the blkcg's css allowing
			
 
				+ * blkcg_css_free to eventually be called.
			
 
				+ *
			
 
				+ * This is the blkcg counterpart of ioc_release_fn().
			
 
				  */
			
 
				-static void blkcg_destroy_all_blkgs(struct blkcg *blkcg)
			
 
				+void blkcg_destroy_blkgs(struct blkcg *blkcg)
			
 
				 {
			
 
				 	spin_lock_irq(&blkcg->lock);
			
 
				+
			
 
				 	while (!hlist_empty(&blkcg->blkg_list)) {
			
 
				 		struct blkcg_gq *blkg = hlist_entry(blkcg->blkg_list.first,
			
 
				-						    struct blkcg_gq,
			
 
				-						    blkcg_node);
			
 
				+						struct blkcg_gq, blkcg_node);
			
 
				 		struct request_queue *q = blkg->q;
			
 
				 
			
 
				 		if (spin_trylock(q->queue_lock)) {
			
@@ -1117,6 +1111,7 @@ static void blkcg_destroy_all_blkgs(struct blkcg *blkcg)
 
				 			spin_lock_irq(&blkcg->lock);
			
 
				 		}
			
 
				 	}
			
 
				+
			
 
				 	spin_unlock_irq(&blkcg->lock);
			
 
				 }
			
 
				 
			
@@ -1125,8 +1120,6 @@ static void blkcg_css_free(struct cgroup_subsys_state *css)
 
				 	struct blkcg *blkcg = css_to_blkcg(css);
			
 
				 	int i;
			
 
				 
			
 
				-	blkcg_destroy_all_blkgs(blkcg);
			
 
				-
			
 
				 	mutex_lock(&blkcg_pol_mutex);
			
 
				 
			
 
				 	list_del(&blkcg->all_blkcgs_node);
			
@@ -1189,6 +1182,7 @@ blkcg_css_alloc(struct cgroup_subsys_state *parent_css)
 
				 	INIT_HLIST_HEAD(&blkcg->blkg_list);
			
 
				 #ifdef CONFIG_CGROUP_WRITEBACK
			
 
				 	INIT_LIST_HEAD(&blkcg->cgwb_list);
			
 
				+	refcount_set(&blkcg->cgwb_refcnt, 1);
			
 
				 #endif
			
 
				 	list_add_tail(&blkcg->all_blkcgs_node, &all_blkcgs);
			
 
				 
			
@@ -1480,11 +1474,8 @@ void blkcg_deactivate_policy(struct request_queue *q,
 
				 
			
 
				 	list_for_each_entry(blkg, &q->blkg_list, q_node) {
			
 
				 		if (blkg->pd[pol->plid]) {
			
 
				-			if (!blkg->pd[pol->plid]->offline &&
			
 
				-			    pol->pd_offline_fn) {
			
 
				+			if (pol->pd_offline_fn)
			
 
				 				pol->pd_offline_fn(blkg->pd[pol->plid]);
			
 
				-				blkg->pd[pol->plid]->offline = true;
			
 
				-			}
			
 
				 			pol->pd_free_fn(blkg->pd[pol->plid]);
			
 
				 			blkg->pd[pol->plid] = NULL;
			
 
				 		}
			
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -2163,9 +2163,12 @@ static inline bool bio_check_ro(struct bio *bio, struct hd_struct *part)
 
				 {
			
 
				 	const int op = bio_op(bio);
			
 
				 
			
 
				-	if (part->policy && (op_is_write(op) && !op_is_flush(op))) {
			
 
				+	if (part->policy && op_is_write(op)) {
			
 
				 		char b[BDEVNAME_SIZE];
			
 
				 
			
 
				+		if (op_is_flush(bio->bi_opf) && !bio_sectors(bio))
			
 
				+			return false;
			
 
				+
			
 
				 		WARN_ONCE(1,
			
 
				 		       "generic_make_request: Trying to write "
			
 
				 			"to read-only block-device %s (partno %d)\n",
			
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -2129,8 +2129,9 @@ static inline void throtl_update_latency_buckets(struct throtl_data *td)
 
				 static void blk_throtl_assoc_bio(struct throtl_grp *tg, struct bio *bio)
			
 
				 {
			
 
				 #ifdef CONFIG_BLK_DEV_THROTTLING_LOW
			
 
				-	if (bio->bi_css)
			
 
				-		bio_associate_blkg(bio, tg_to_blkg(tg));
			
 
				+	/* fallback to root_blkg if we fail to get a blkg ref */
			
 
				+	if (bio->bi_css && (bio_associate_blkg(bio, tg_to_blkg(tg)) == -ENODEV))
			
 
				+		bio_associate_blkg(bio, bio->bi_disk->queue->root_blkg);
			
 
				 	bio_issue_init(&bio->bi_issue, bio_sectors(bio));
			
 
				 #endif
			
 
				 }
			
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -1239,6 +1239,9 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
 
				 	case NBD_SET_SOCK:
			
 
				 		return nbd_add_socket(nbd, arg, false);
			
 
				 	case NBD_SET_BLKSIZE:
			
 
				+		if (!arg || !is_power_of_2(arg) || arg < 512 ||
			
 
				+		    arg > PAGE_SIZE)
			
 
				+			return -EINVAL;
			
 
				 		nbd_size_set(nbd, arg,
			
 
				 			     div_s64(config->bytesize, arg));
			
 
				 		return 0;
			
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -56,6 +56,7 @@ struct blkcg {
 
				 	struct list_head		all_blkcgs_node;
			
 
				 #ifdef CONFIG_CGROUP_WRITEBACK
			
 
				 	struct list_head		cgwb_list;
			
 
				+	refcount_t			cgwb_refcnt;
			
 
				 #endif
			
 
				 };
			
 
				 
			
@@ -89,7 +90,6 @@ struct blkg_policy_data {
 
				 	/* the blkg and policy id this per-policy data belongs to */
			
 
				 	struct blkcg_gq			*blkg;
			
 
				 	int				plid;
			
 
				-	bool				offline;
			
 
				 };
			
 
				 
			
 
				 /*
			
@@ -387,6 +387,49 @@ static inline struct blkcg *cpd_to_blkcg(struct blkcg_policy_data *cpd)
 
				 	return cpd ? cpd->blkcg : NULL;
			
 
				 }
			
 
				 
			
 
				+extern void blkcg_destroy_blkgs(struct blkcg *blkcg);
			
 
				+
			
 
				+#ifdef CONFIG_CGROUP_WRITEBACK
			
 
				+
			
 
				+/**
			
 
				+ * blkcg_cgwb_get - get a reference for blkcg->cgwb_list
			
 
				+ * @blkcg: blkcg of interest
			
 
				+ *
			
 
				+ * This is used to track the number of active wb's related to a blkcg.
			
 
				+ */
			
 
				+static inline void blkcg_cgwb_get(struct blkcg *blkcg)
			
 
				+{
			
 
				+	refcount_inc(&blkcg->cgwb_refcnt);
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * blkcg_cgwb_put - put a reference for @blkcg->cgwb_list
			
 
				+ * @blkcg: blkcg of interest
			
 
				+ *
			
 
				+ * This is used to track the number of active wb's related to a blkcg.
			
 
				+ * When this count goes to zero, all active wb has finished so the
			
 
				+ * blkcg can continue destruction by calling blkcg_destroy_blkgs().
			
 
				+ * This work may occur in cgwb_release_workfn() on the cgwb_release
			
 
				+ * workqueue.
			
 
				+ */
			
 
				+static inline void blkcg_cgwb_put(struct blkcg *blkcg)
			
 
				+{
			
 
				+	if (refcount_dec_and_test(&blkcg->cgwb_refcnt))
			
 
				+		blkcg_destroy_blkgs(blkcg);
			
 
				+}
			
 
				+
			
 
				+#else
			
 
				+
			
 
				+static inline void blkcg_cgwb_get(struct blkcg *blkcg) { }
			
 
				+
			
 
				+static inline void blkcg_cgwb_put(struct blkcg *blkcg)
			
 
				+{
			
 
				+	/* wb isn't being accounted, so trigger destruction right away */
			
 
				+	blkcg_destroy_blkgs(blkcg);
			
 
				+}
			
 
				+
			
 
				+#endif
			
 
				+
			
 
				 /**
			
 
				  * blkg_path - format cgroup path of blkg
			
 
				  * @blkg: blkg of interest
			
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -491,6 +491,7 @@ static void cgwb_release_workfn(struct work_struct *work)
 
				 {
			
 
				 	struct bdi_writeback *wb = container_of(work, struct bdi_writeback,
			
 
				 						release_work);
			
 
				+	struct blkcg *blkcg = css_to_blkcg(wb->blkcg_css);
			
 
				 
			
 
				 	mutex_lock(&wb->bdi->cgwb_release_mutex);
			
 
				 	wb_shutdown(wb);
			
@@ -499,6 +500,9 @@ static void cgwb_release_workfn(struct work_struct *work)
 
				 	css_put(wb->blkcg_css);
			
 
				 	mutex_unlock(&wb->bdi->cgwb_release_mutex);
			
 
				 
			
 
				+	/* triggers blkg destruction if cgwb_refcnt becomes zero */
			
 
				+	blkcg_cgwb_put(blkcg);
			
 
				+
			
 
				 	fprop_local_destroy_percpu(&wb->memcg_completions);
			
 
				 	percpu_ref_exit(&wb->refcnt);
			
 
				 	wb_exit(wb);
			
@@ -597,6 +601,7 @@ static int cgwb_create(struct backing_dev_info *bdi,
 
				 			list_add_tail_rcu(&wb->bdi_node, &bdi->wb_list);
			
 
				 			list_add(&wb->memcg_node, memcg_cgwb_list);
			
 
				 			list_add(&wb->blkcg_node, blkcg_cgwb_list);
			
 
				+			blkcg_cgwb_get(blkcg);
			
 
				 			css_get(memcg_css);
			
 
				 			css_get(blkcg_css);
			
 
				 		}