7 лет назад · 2217a47de4
--- a/drivers/scsi/storvsc_drv.c
+++ b/drivers/scsi/storvsc_drv.c
@@ -395,6 +395,12 @@ MODULE_PARM_DESC(storvsc_ringbuffer_size, "Ring buffer size (bytes)");
 
				 
			
 
				 module_param(storvsc_vcpus_per_sub_channel, int, S_IRUGO);
			
 
				 MODULE_PARM_DESC(storvsc_vcpus_per_sub_channel, "Ratio of VCPUs to subchannels");
			
 
				+
			
 
				+static int ring_avail_percent_lowater = 10;
			
 
				+module_param(ring_avail_percent_lowater, int, S_IRUGO);
			
 
				+MODULE_PARM_DESC(ring_avail_percent_lowater,
			
 
				+		"Select a channel if available ring size > this in percent");
			
 
				+
			
 
				 /*
			
 
				  * Timeout in seconds for all devices managed by this driver.
			
 
				  */
			
@@ -468,6 +474,13 @@ struct storvsc_device {
 
				 	 * Mask of CPUs bound to subchannels.
			
 
				 	 */
			
 
				 	struct cpumask alloced_cpus;
			
 
				+	/*
			
 
				+	 * Pre-allocated struct cpumask for each hardware queue.
			
 
				+	 * struct cpumask is used by selecting out-going channels. It is a
			
 
				+	 * big structure, default to 1024k bytes when CONFIG_MAXSMP=y.
			
 
				+	 * Pre-allocate it to avoid allocation on the kernel stack.
			
 
				+	 */
			
 
				+	struct cpumask *cpumask_chns;
			
 
				 	/* Used for vsc/vsp channel reset process */
			
 
				 	struct storvsc_cmd_request init_request;
			
 
				 	struct storvsc_cmd_request reset_request;
			
@@ -872,6 +885,13 @@ static int storvsc_channel_init(struct hv_device *device, bool is_fc)
 
				 	if (stor_device->stor_chns == NULL)
			
 
				 		return -ENOMEM;
			
 
				 
			
 
				+	stor_device->cpumask_chns = kcalloc(num_possible_cpus(),
			
 
				+			sizeof(struct cpumask), GFP_KERNEL);
			
 
				+	if (stor_device->cpumask_chns == NULL) {
			
 
				+		kfree(stor_device->stor_chns);
			
 
				+		return -ENOMEM;
			
 
				+	}
			
 
				+
			
 
				 	stor_device->stor_chns[device->channel->target_cpu] = device->channel;
			
 
				 	cpumask_set_cpu(device->channel->target_cpu,
			
 
				 			&stor_device->alloced_cpus);
			
@@ -1232,6 +1252,7 @@ static int storvsc_dev_remove(struct hv_device *device)
 
				 	vmbus_close(device->channel);
			
 
				 
			
 
				 	kfree(stor_device->stor_chns);
			
 
				+	kfree(stor_device->cpumask_chns);
			
 
				 	kfree(stor_device);
			
 
				 	return 0;
			
 
				 }
			
@@ -1241,7 +1262,7 @@ static struct vmbus_channel *get_og_chn(struct storvsc_device *stor_device,
 
				 {
			
 
				 	u16 slot = 0;
			
 
				 	u16 hash_qnum;
			
 
				-	struct cpumask alloced_mask;
			
 
				+	struct cpumask *alloced_mask = &stor_device->cpumask_chns[q_num];
			
 
				 	int num_channels, tgt_cpu;
			
 
				 
			
 
				 	if (stor_device->num_sc == 0)
			
@@ -1257,10 +1278,10 @@ static struct vmbus_channel *get_og_chn(struct storvsc_device *stor_device,
 
				 	 * III. Mapping is persistent.
			
 
				 	 */
			
 
				 
			
 
				-	cpumask_and(&alloced_mask, &stor_device->alloced_cpus,
			
 
				+	cpumask_and(alloced_mask, &stor_device->alloced_cpus,
			
 
				 		    cpumask_of_node(cpu_to_node(q_num)));
			
 
				 
			
 
				-	num_channels = cpumask_weight(&alloced_mask);
			
 
				+	num_channels = cpumask_weight(alloced_mask);
			
 
				 	if (num_channels == 0)
			
 
				 		return stor_device->device->channel;
			
 
				 
			
@@ -1268,7 +1289,7 @@ static struct vmbus_channel *get_og_chn(struct storvsc_device *stor_device,
 
				 	while (hash_qnum >= num_channels)
			
 
				 		hash_qnum -= num_channels;
			
 
				 
			
 
				-	for_each_cpu(tgt_cpu, &alloced_mask) {
			
 
				+	for_each_cpu(tgt_cpu, alloced_mask) {
			
 
				 		if (slot == hash_qnum)
			
 
				 			break;
			
 
				 		slot++;
			
@@ -1285,9 +1306,9 @@ static int storvsc_do_io(struct hv_device *device,
 
				 {
			
 
				 	struct storvsc_device *stor_device;
			
 
				 	struct vstor_packet *vstor_packet;
			
 
				-	struct vmbus_channel *outgoing_channel;
			
 
				+	struct vmbus_channel *outgoing_channel, *channel;
			
 
				 	int ret = 0;
			
 
				-	struct cpumask alloced_mask;
			
 
				+	struct cpumask *alloced_mask;
			
 
				 	int tgt_cpu;
			
 
				 
			
 
				 	vstor_packet = &request->vstor_packet;
			
@@ -1301,22 +1322,53 @@ static int storvsc_do_io(struct hv_device *device,
 
				 	/*
			
 
				 	 * Select an an appropriate channel to send the request out.
			
 
				 	 */
			
 
				-
			
 
				 	if (stor_device->stor_chns[q_num] != NULL) {
			
 
				 		outgoing_channel = stor_device->stor_chns[q_num];
			
 
				-		if (outgoing_channel->target_cpu == smp_processor_id()) {
			
 
				+		if (outgoing_channel->target_cpu == q_num) {
			
 
				 			/*
			
 
				 			 * Ideally, we want to pick a different channel if
			
 
				 			 * available on the same NUMA node.
			
 
				 			 */
			
 
				-			cpumask_and(&alloced_mask, &stor_device->alloced_cpus,
			
 
				+			alloced_mask = &stor_device->cpumask_chns[q_num];
			
 
				+			cpumask_and(alloced_mask, &stor_device->alloced_cpus,
			
 
				 				    cpumask_of_node(cpu_to_node(q_num)));
			
 
				-			for_each_cpu_wrap(tgt_cpu, &alloced_mask,
			
 
				-					outgoing_channel->target_cpu + 1) {
			
 
				-				if (tgt_cpu != outgoing_channel->target_cpu) {
			
 
				-					outgoing_channel =
			
 
				-					stor_device->stor_chns[tgt_cpu];
			
 
				-					break;
			
 
				+
			
 
				+			for_each_cpu_wrap(tgt_cpu, alloced_mask, q_num + 1) {
			
 
				+				if (tgt_cpu == q_num)
			
 
				+					continue;
			
 
				+				channel = stor_device->stor_chns[tgt_cpu];
			
 
				+				if (hv_get_avail_to_write_percent(
			
 
				+							&channel->outbound)
			
 
				+						> ring_avail_percent_lowater) {
			
 
				+					outgoing_channel = channel;
			
 
				+					goto found_channel;
			
 
				+				}
			
 
				+			}
			
 
				+
			
 
				+			/*
			
 
				+			 * All the other channels on the same NUMA node are
			
 
				+			 * busy. Try to use the channel on the current CPU
			
 
				+			 */
			
 
				+			if (hv_get_avail_to_write_percent(
			
 
				+						&outgoing_channel->outbound)
			
 
				+					> ring_avail_percent_lowater)
			
 
				+				goto found_channel;
			
 
				+
			
 
				+			/*
			
 
				+			 * If we reach here, all the channels on the current
			
 
				+			 * NUMA node are busy. Try to find a channel in
			
 
				+			 * other NUMA nodes
			
 
				+			 */
			
 
				+			cpumask_andnot(alloced_mask, &stor_device->alloced_cpus,
			
 
				+					cpumask_of_node(cpu_to_node(q_num)));
			
 
				+
			
 
				+			for_each_cpu(tgt_cpu, alloced_mask) {
			
 
				+				channel = stor_device->stor_chns[tgt_cpu];
			
 
				+				if (hv_get_avail_to_write_percent(
			
 
				+							&channel->outbound)
			
 
				+						> ring_avail_percent_lowater) {
			
 
				+					outgoing_channel = channel;
			
 
				+					goto found_channel;
			
 
				 				}
			
 
				 			}
			
 
				 		}
			
@@ -1324,7 +1376,7 @@ static int storvsc_do_io(struct hv_device *device,
 
				 		outgoing_channel = get_og_chn(stor_device, q_num);
			
 
				 	}
			
 
				 
			
 
				-
			
 
				+found_channel:
			
 
				 	vstor_packet->flags |= REQUEST_COMPLETION_FLAG;
			
 
				 
			
 
				 	vstor_packet->vm_srb.length = (sizeof(struct vmscsi_request) -
			
@@ -1726,8 +1778,9 @@ static int storvsc_probe(struct hv_device *device,
 
				 		max_sub_channels = (num_cpus / storvsc_vcpus_per_sub_channel);
			
 
				 	}
			
 
				 
			
 
				-	scsi_driver.can_queue = (max_outstanding_req_per_channel *
			
 
				-				 (max_sub_channels + 1));
			
 
				+	scsi_driver.can_queue = max_outstanding_req_per_channel *
			
 
				+				(max_sub_channels + 1) *
			
 
				+				(100 - ring_avail_percent_lowater) / 100;
			
 
				 
			
 
				 	host = scsi_host_alloc(&scsi_driver,
			
 
				 			       sizeof(struct hv_host_device));
			
@@ -1858,6 +1911,7 @@ err_out2:
 
				 
			
 
				 err_out1:
			
 
				 	kfree(stor_device->stor_chns);
			
 
				+	kfree(stor_device->cpumask_chns);
			
 
				 	kfree(stor_device);
			
 
				 
			
 
				 err_out0: