6 жил өмнө · 425752c63b
--- a/arch/powerpc/include/asm/cputhreads.h
+++ b/arch/powerpc/include/asm/cputhreads.h
@@ -23,11 +23,13 @@
 
				 extern int threads_per_core;
			
 
				 extern int threads_per_subcore;
			
 
				 extern int threads_shift;
			
 
				+extern bool has_big_cores;
			
 
				 extern cpumask_t threads_core_mask;
			
 
				 #else
			
 
				 #define threads_per_core	1
			
 
				 #define threads_per_subcore	1
			
 
				 #define threads_shift		0
			
 
				+#define has_big_cores		0
			
 
				 #define threads_core_mask	(*get_cpu_mask(0))
			
 
				 #endif
			
 
				 
			
--- a/arch/powerpc/include/asm/smp.h
+++ b/arch/powerpc/include/asm/smp.h
@@ -100,6 +100,7 @@ static inline void set_hard_smp_processor_id(int cpu, int phys)
 
				 DECLARE_PER_CPU(cpumask_var_t, cpu_sibling_map);
			
 
				 DECLARE_PER_CPU(cpumask_var_t, cpu_l2_cache_map);
			
 
				 DECLARE_PER_CPU(cpumask_var_t, cpu_core_map);
			
 
				+DECLARE_PER_CPU(cpumask_var_t, cpu_smallcore_map);
			
 
				 
			
 
				 static inline struct cpumask *cpu_sibling_mask(int cpu)
			
 
				 {
			
@@ -116,6 +117,11 @@ static inline struct cpumask *cpu_l2_cache_mask(int cpu)
 
				 	return per_cpu(cpu_l2_cache_map, cpu);
			
 
				 }
			
 
				 
			
 
				+static inline struct cpumask *cpu_smallcore_mask(int cpu)
			
 
				+{
			
 
				+	return per_cpu(cpu_smallcore_map, cpu);
			
 
				+}
			
 
				+
			
 
				 extern int cpu_to_core_id(int cpu);
			
 
				 
			
 
				 /* Since OpenPIC has only 4 IPIs, we use slightly different message numbers.
			
@@ -166,6 +172,11 @@ static inline const struct cpumask *cpu_sibling_mask(int cpu)
 
				 	return cpumask_of(cpu);
			
 
				 }
			
 
				 
			
 
				+static inline const struct cpumask *cpu_smallcore_mask(int cpu)
			
 
				+{
			
 
				+	return cpumask_of(cpu);
			
 
				+}
			
 
				+
			
 
				 #endif /* CONFIG_SMP */
			
 
				 
			
 
				 #ifdef CONFIG_PPC64
			
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -75,14 +75,32 @@ static DEFINE_PER_CPU(int, cpu_state) = { 0 };
 
				 #endif
			
 
				 
			
 
				 struct thread_info *secondary_ti;
			
 
				+bool has_big_cores;
			
 
				 
			
 
				 DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map);
			
 
				+DEFINE_PER_CPU(cpumask_var_t, cpu_smallcore_map);
			
 
				 DEFINE_PER_CPU(cpumask_var_t, cpu_l2_cache_map);
			
 
				 DEFINE_PER_CPU(cpumask_var_t, cpu_core_map);
			
 
				 
			
 
				 EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
			
 
				 EXPORT_PER_CPU_SYMBOL(cpu_l2_cache_map);
			
 
				 EXPORT_PER_CPU_SYMBOL(cpu_core_map);
			
 
				+EXPORT_SYMBOL_GPL(has_big_cores);
			
 
				+
			
 
				+#define MAX_THREAD_LIST_SIZE	8
			
 
				+#define THREAD_GROUP_SHARE_L1   1
			
 
				+struct thread_groups {
			
 
				+	unsigned int property;
			
 
				+	unsigned int nr_groups;
			
 
				+	unsigned int threads_per_group;
			
 
				+	unsigned int thread_list[MAX_THREAD_LIST_SIZE];
			
 
				+};
			
 
				+
			
 
				+/*
			
 
				+ * On big-cores system, cpu_l1_cache_map for each CPU corresponds to
			
 
				+ * the set its siblings that share the L1-cache.
			
 
				+ */
			
 
				+DEFINE_PER_CPU(cpumask_var_t, cpu_l1_cache_map);
			
 
				 
			
 
				 /* SMP operations for this machine */
			
 
				 struct smp_ops_t *smp_ops;
			
@@ -675,6 +693,185 @@ static void set_cpus_unrelated(int i, int j,
 
				 }
			
 
				 #endif
			
 
				 
			
 
				+/*
			
 
				+ * parse_thread_groups: Parses the "ibm,thread-groups" device tree
			
 
				+ *                      property for the CPU device node @dn and stores
			
 
				+ *                      the parsed output in the thread_groups
			
 
				+ *                      structure @tg if the ibm,thread-groups[0]
			
 
				+ *                      matches @property.
			
 
				+ *
			
 
				+ * @dn: The device node of the CPU device.
			
 
				+ * @tg: Pointer to a thread group structure into which the parsed
			
 
				+ *      output of "ibm,thread-groups" is stored.
			
 
				+ * @property: The property of the thread-group that the caller is
			
 
				+ *            interested in.
			
 
				+ *
			
 
				+ * ibm,thread-groups[0..N-1] array defines which group of threads in
			
 
				+ * the CPU-device node can be grouped together based on the property.
			
 
				+ *
			
 
				+ * ibm,thread-groups[0] tells us the property based on which the
			
 
				+ * threads are being grouped together. If this value is 1, it implies
			
 
				+ * that the threads in the same group share L1, translation cache.
			
 
				+ *
			
 
				+ * ibm,thread-groups[1] tells us how many such thread groups exist.
			
 
				+ *
			
 
				+ * ibm,thread-groups[2] tells us the number of threads in each such
			
 
				+ * group.
			
 
				+ *
			
 
				+ * ibm,thread-groups[3..N-1] is the list of threads identified by
			
 
				+ * "ibm,ppc-interrupt-server#s" arranged as per their membership in
			
 
				+ * the grouping.
			
 
				+ *
			
 
				+ * Example: If ibm,thread-groups = [1,2,4,5,6,7,8,9,10,11,12] it
			
 
				+ * implies that there are 2 groups of 4 threads each, where each group
			
 
				+ * of threads share L1, translation cache.
			
 
				+ *
			
 
				+ * The "ibm,ppc-interrupt-server#s" of the first group is {5,6,7,8}
			
 
				+ * and the "ibm,ppc-interrupt-server#s" of the second group is {9, 10,
			
 
				+ * 11, 12} structure
			
 
				+ *
			
 
				+ * Returns 0 on success, -EINVAL if the property does not exist,
			
 
				+ * -ENODATA if property does not have a value, and -EOVERFLOW if the
			
 
				+ * property data isn't large enough.
			
 
				+ */
			
 
				+static int parse_thread_groups(struct device_node *dn,
			
 
				+			       struct thread_groups *tg,
			
 
				+			       unsigned int property)
			
 
				+{
			
 
				+	int i;
			
 
				+	u32 thread_group_array[3 + MAX_THREAD_LIST_SIZE];
			
 
				+	u32 *thread_list;
			
 
				+	size_t total_threads;
			
 
				+	int ret;
			
 
				+
			
 
				+	ret = of_property_read_u32_array(dn, "ibm,thread-groups",
			
 
				+					 thread_group_array, 3);
			
 
				+	if (ret)
			
 
				+		return ret;
			
 
				+
			
 
				+	tg->property = thread_group_array[0];
			
 
				+	tg->nr_groups = thread_group_array[1];
			
 
				+	tg->threads_per_group = thread_group_array[2];
			
 
				+	if (tg->property != property ||
			
 
				+	    tg->nr_groups < 1 ||
			
 
				+	    tg->threads_per_group < 1)
			
 
				+		return -ENODATA;
			
 
				+
			
 
				+	total_threads = tg->nr_groups * tg->threads_per_group;
			
 
				+
			
 
				+	ret = of_property_read_u32_array(dn, "ibm,thread-groups",
			
 
				+					 thread_group_array,
			
 
				+					 3 + total_threads);
			
 
				+	if (ret)
			
 
				+		return ret;
			
 
				+
			
 
				+	thread_list = &thread_group_array[3];
			
 
				+
			
 
				+	for (i = 0 ; i < total_threads; i++)
			
 
				+		tg->thread_list[i] = thread_list[i];
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * get_cpu_thread_group_start : Searches the thread group in tg->thread_list
			
 
				+ *                              that @cpu belongs to.
			
 
				+ *
			
 
				+ * @cpu : The logical CPU whose thread group is being searched.
			
 
				+ * @tg : The thread-group structure of the CPU node which @cpu belongs
			
 
				+ *       to.
			
 
				+ *
			
 
				+ * Returns the index to tg->thread_list that points to the the start
			
 
				+ * of the thread_group that @cpu belongs to.
			
 
				+ *
			
 
				+ * Returns -1 if cpu doesn't belong to any of the groups pointed to by
			
 
				+ * tg->thread_list.
			
 
				+ */
			
 
				+static int get_cpu_thread_group_start(int cpu, struct thread_groups *tg)
			
 
				+{
			
 
				+	int hw_cpu_id = get_hard_smp_processor_id(cpu);
			
 
				+	int i, j;
			
 
				+
			
 
				+	for (i = 0; i < tg->nr_groups; i++) {
			
 
				+		int group_start = i * tg->threads_per_group;
			
 
				+
			
 
				+		for (j = 0; j < tg->threads_per_group; j++) {
			
 
				+			int idx = group_start + j;
			
 
				+
			
 
				+			if (tg->thread_list[idx] == hw_cpu_id)
			
 
				+				return group_start;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	return -1;
			
 
				+}
			
 
				+
			
 
				+static int init_cpu_l1_cache_map(int cpu)
			
 
				+
			
 
				+{
			
 
				+	struct device_node *dn = of_get_cpu_node(cpu, NULL);
			
 
				+	struct thread_groups tg = {.property = 0,
			
 
				+				   .nr_groups = 0,
			
 
				+				   .threads_per_group = 0};
			
 
				+	int first_thread = cpu_first_thread_sibling(cpu);
			
 
				+	int i, cpu_group_start = -1, err = 0;
			
 
				+
			
 
				+	if (!dn)
			
 
				+		return -ENODATA;
			
 
				+
			
 
				+	err = parse_thread_groups(dn, &tg, THREAD_GROUP_SHARE_L1);
			
 
				+	if (err)
			
 
				+		goto out;
			
 
				+
			
 
				+	zalloc_cpumask_var_node(&per_cpu(cpu_l1_cache_map, cpu),
			
 
				+				GFP_KERNEL,
			
 
				+				cpu_to_node(cpu));
			
 
				+
			
 
				+	cpu_group_start = get_cpu_thread_group_start(cpu, &tg);
			
 
				+
			
 
				+	if (unlikely(cpu_group_start == -1)) {
			
 
				+		WARN_ON_ONCE(1);
			
 
				+		err = -ENODATA;
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	for (i = first_thread; i < first_thread + threads_per_core; i++) {
			
 
				+		int i_group_start = get_cpu_thread_group_start(i, &tg);
			
 
				+
			
 
				+		if (unlikely(i_group_start == -1)) {
			
 
				+			WARN_ON_ONCE(1);
			
 
				+			err = -ENODATA;
			
 
				+			goto out;
			
 
				+		}
			
 
				+
			
 
				+		if (i_group_start == cpu_group_start)
			
 
				+			cpumask_set_cpu(i, per_cpu(cpu_l1_cache_map, cpu));
			
 
				+	}
			
 
				+
			
 
				+out:
			
 
				+	of_node_put(dn);
			
 
				+	return err;
			
 
				+}
			
 
				+
			
 
				+static int init_big_cores(void)
			
 
				+{
			
 
				+	int cpu;
			
 
				+
			
 
				+	for_each_possible_cpu(cpu) {
			
 
				+		int err = init_cpu_l1_cache_map(cpu);
			
 
				+
			
 
				+		if (err)
			
 
				+			return err;
			
 
				+
			
 
				+		zalloc_cpumask_var_node(&per_cpu(cpu_smallcore_map, cpu),
			
 
				+					GFP_KERNEL,
			
 
				+					cpu_to_node(cpu));
			
 
				+	}
			
 
				+
			
 
				+	has_big_cores = true;
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				 void __init smp_prepare_cpus(unsigned int max_cpus)
			
 
				 {
			
 
				 	unsigned int cpu;
			
@@ -713,6 +910,12 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
 
				 	cpumask_set_cpu(boot_cpuid, cpu_l2_cache_mask(boot_cpuid));
			
 
				 	cpumask_set_cpu(boot_cpuid, cpu_core_mask(boot_cpuid));
			
 
				 
			
 
				+	init_big_cores();
			
 
				+	if (has_big_cores) {
			
 
				+		cpumask_set_cpu(boot_cpuid,
			
 
				+				cpu_smallcore_mask(boot_cpuid));
			
 
				+	}
			
 
				+
			
 
				 	if (smp_ops && smp_ops->probe)
			
 
				 		smp_ops->probe();
			
 
				 }
			
@@ -1003,10 +1206,28 @@ static void remove_cpu_from_masks(int cpu)
 
				 		set_cpus_unrelated(cpu, i, cpu_core_mask);
			
 
				 		set_cpus_unrelated(cpu, i, cpu_l2_cache_mask);
			
 
				 		set_cpus_unrelated(cpu, i, cpu_sibling_mask);
			
 
				+		if (has_big_cores)
			
 
				+			set_cpus_unrelated(cpu, i, cpu_smallcore_mask);
			
 
				 	}
			
 
				 }
			
 
				 #endif
			
 
				 
			
 
				+static inline void add_cpu_to_smallcore_masks(int cpu)
			
 
				+{
			
 
				+	struct cpumask *this_l1_cache_map = per_cpu(cpu_l1_cache_map, cpu);
			
 
				+	int i, first_thread = cpu_first_thread_sibling(cpu);
			
 
				+
			
 
				+	if (!has_big_cores)
			
 
				+		return;
			
 
				+
			
 
				+	cpumask_set_cpu(cpu, cpu_smallcore_mask(cpu));
			
 
				+
			
 
				+	for (i = first_thread; i < first_thread + threads_per_core; i++) {
			
 
				+		if (cpu_online(i) && cpumask_test_cpu(i, this_l1_cache_map))
			
 
				+			set_cpus_related(i, cpu, cpu_smallcore_mask);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				 static void add_cpu_to_masks(int cpu)
			
 
				 {
			
 
				 	int first_thread = cpu_first_thread_sibling(cpu);
			
@@ -1023,6 +1244,7 @@ static void add_cpu_to_masks(int cpu)
 
				 		if (cpu_online(i))
			
 
				 			set_cpus_related(i, cpu, cpu_sibling_mask);
			
 
				 
			
 
				+	add_cpu_to_smallcore_masks(cpu);
			
 
				 	/*
			
 
				 	 * Copy the thread sibling mask into the cache sibling mask
			
 
				 	 * and mark any CPUs that share an L2 with this CPU.