|
@@ -1687,6 +1687,187 @@ exit:
|
|
|
return err;
|
|
|
}
|
|
|
|
|
|
+#ifdef CONFIG_CHECKPOINT_RESTORE
|
|
|
+/*
|
|
|
+ * WARNING: we don't require any capability here so be very careful
|
|
|
+ * in what is allowed for modification from userspace.
|
|
|
+ */
|
|
|
+static int validate_prctl_map(struct prctl_mm_map *prctl_map)
|
|
|
+{
|
|
|
+ unsigned long mmap_max_addr = TASK_SIZE;
|
|
|
+ struct mm_struct *mm = current->mm;
|
|
|
+ int error = -EINVAL, i;
|
|
|
+
|
|
|
+ static const unsigned char offsets[] = {
|
|
|
+ offsetof(struct prctl_mm_map, start_code),
|
|
|
+ offsetof(struct prctl_mm_map, end_code),
|
|
|
+ offsetof(struct prctl_mm_map, start_data),
|
|
|
+ offsetof(struct prctl_mm_map, end_data),
|
|
|
+ offsetof(struct prctl_mm_map, start_brk),
|
|
|
+ offsetof(struct prctl_mm_map, brk),
|
|
|
+ offsetof(struct prctl_mm_map, start_stack),
|
|
|
+ offsetof(struct prctl_mm_map, arg_start),
|
|
|
+ offsetof(struct prctl_mm_map, arg_end),
|
|
|
+ offsetof(struct prctl_mm_map, env_start),
|
|
|
+ offsetof(struct prctl_mm_map, env_end),
|
|
|
+ };
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Make sure the members are not somewhere outside
|
|
|
+ * of allowed address space.
|
|
|
+ */
|
|
|
+ for (i = 0; i < ARRAY_SIZE(offsets); i++) {
|
|
|
+ u64 val = *(u64 *)((char *)prctl_map + offsets[i]);
|
|
|
+
|
|
|
+ if ((unsigned long)val >= mmap_max_addr ||
|
|
|
+ (unsigned long)val < mmap_min_addr)
|
|
|
+ goto out;
|
|
|
+ }
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Make sure the pairs are ordered.
|
|
|
+ */
|
|
|
+#define __prctl_check_order(__m1, __op, __m2) \
|
|
|
+ ((unsigned long)prctl_map->__m1 __op \
|
|
|
+ (unsigned long)prctl_map->__m2) ? 0 : -EINVAL
|
|
|
+ error = __prctl_check_order(start_code, <, end_code);
|
|
|
+ error |= __prctl_check_order(start_data, <, end_data);
|
|
|
+ error |= __prctl_check_order(start_brk, <=, brk);
|
|
|
+ error |= __prctl_check_order(arg_start, <=, arg_end);
|
|
|
+ error |= __prctl_check_order(env_start, <=, env_end);
|
|
|
+ if (error)
|
|
|
+ goto out;
|
|
|
+#undef __prctl_check_order
|
|
|
+
|
|
|
+ error = -EINVAL;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * @brk should be after @end_data in traditional maps.
|
|
|
+ */
|
|
|
+ if (prctl_map->start_brk <= prctl_map->end_data ||
|
|
|
+ prctl_map->brk <= prctl_map->end_data)
|
|
|
+ goto out;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Neither we should allow to override limits if they set.
|
|
|
+ */
|
|
|
+ if (check_data_rlimit(rlimit(RLIMIT_DATA), prctl_map->brk,
|
|
|
+ prctl_map->start_brk, prctl_map->end_data,
|
|
|
+ prctl_map->start_data))
|
|
|
+ goto out;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Someone is trying to cheat the auxv vector.
|
|
|
+ */
|
|
|
+ if (prctl_map->auxv_size) {
|
|
|
+ if (!prctl_map->auxv || prctl_map->auxv_size > sizeof(mm->saved_auxv))
|
|
|
+ goto out;
|
|
|
+ }
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Finally, make sure the caller has the rights to
|
|
|
+ * change /proc/pid/exe link: only local root should
|
|
|
+ * be allowed to.
|
|
|
+ */
|
|
|
+ if (prctl_map->exe_fd != (u32)-1) {
|
|
|
+ struct user_namespace *ns = current_user_ns();
|
|
|
+ const struct cred *cred = current_cred();
|
|
|
+
|
|
|
+ if (!uid_eq(cred->uid, make_kuid(ns, 0)) ||
|
|
|
+ !gid_eq(cred->gid, make_kgid(ns, 0)))
|
|
|
+ goto out;
|
|
|
+ }
|
|
|
+
|
|
|
+ error = 0;
|
|
|
+out:
|
|
|
+ return error;
|
|
|
+}
|
|
|
+
|
|
|
+static int prctl_set_mm_map(int opt, const void __user *addr, unsigned long data_size)
|
|
|
+{
|
|
|
+ struct prctl_mm_map prctl_map = { .exe_fd = (u32)-1, };
|
|
|
+ unsigned long user_auxv[AT_VECTOR_SIZE];
|
|
|
+ struct mm_struct *mm = current->mm;
|
|
|
+ int error;
|
|
|
+
|
|
|
+ BUILD_BUG_ON(sizeof(user_auxv) != sizeof(mm->saved_auxv));
|
|
|
+ BUILD_BUG_ON(sizeof(struct prctl_mm_map) > 256);
|
|
|
+
|
|
|
+ if (opt == PR_SET_MM_MAP_SIZE)
|
|
|
+ return put_user((unsigned int)sizeof(prctl_map),
|
|
|
+ (unsigned int __user *)addr);
|
|
|
+
|
|
|
+ if (data_size != sizeof(prctl_map))
|
|
|
+ return -EINVAL;
|
|
|
+
|
|
|
+ if (copy_from_user(&prctl_map, addr, sizeof(prctl_map)))
|
|
|
+ return -EFAULT;
|
|
|
+
|
|
|
+ error = validate_prctl_map(&prctl_map);
|
|
|
+ if (error)
|
|
|
+ return error;
|
|
|
+
|
|
|
+ if (prctl_map.auxv_size) {
|
|
|
+ memset(user_auxv, 0, sizeof(user_auxv));
|
|
|
+ if (copy_from_user(user_auxv,
|
|
|
+ (const void __user *)prctl_map.auxv,
|
|
|
+ prctl_map.auxv_size))
|
|
|
+ return -EFAULT;
|
|
|
+
|
|
|
+ /* Last entry must be AT_NULL as specification requires */
|
|
|
+ user_auxv[AT_VECTOR_SIZE - 2] = AT_NULL;
|
|
|
+ user_auxv[AT_VECTOR_SIZE - 1] = AT_NULL;
|
|
|
+ }
|
|
|
+
|
|
|
+ down_write(&mm->mmap_sem);
|
|
|
+ if (prctl_map.exe_fd != (u32)-1)
|
|
|
+ error = prctl_set_mm_exe_file_locked(mm, prctl_map.exe_fd);
|
|
|
+ downgrade_write(&mm->mmap_sem);
|
|
|
+ if (error)
|
|
|
+ goto out;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * We don't validate if these members are pointing to
|
|
|
+ * real present VMAs because application may have correspond
|
|
|
+ * VMAs already unmapped and kernel uses these members for statistics
|
|
|
+ * output in procfs mostly, except
|
|
|
+ *
|
|
|
+ * - @start_brk/@brk which are used in do_brk but kernel lookups
|
|
|
+ * for VMAs when updating these memvers so anything wrong written
|
|
|
+ * here cause kernel to swear at userspace program but won't lead
|
|
|
+ * to any problem in kernel itself
|
|
|
+ */
|
|
|
+
|
|
|
+ mm->start_code = prctl_map.start_code;
|
|
|
+ mm->end_code = prctl_map.end_code;
|
|
|
+ mm->start_data = prctl_map.start_data;
|
|
|
+ mm->end_data = prctl_map.end_data;
|
|
|
+ mm->start_brk = prctl_map.start_brk;
|
|
|
+ mm->brk = prctl_map.brk;
|
|
|
+ mm->start_stack = prctl_map.start_stack;
|
|
|
+ mm->arg_start = prctl_map.arg_start;
|
|
|
+ mm->arg_end = prctl_map.arg_end;
|
|
|
+ mm->env_start = prctl_map.env_start;
|
|
|
+ mm->env_end = prctl_map.env_end;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Note this update of @saved_auxv is lockless thus
|
|
|
+ * if someone reads this member in procfs while we're
|
|
|
+ * updating -- it may get partly updated results. It's
|
|
|
+ * known and acceptable trade off: we leave it as is to
|
|
|
+ * not introduce additional locks here making the kernel
|
|
|
+ * more complex.
|
|
|
+ */
|
|
|
+ if (prctl_map.auxv_size)
|
|
|
+ memcpy(mm->saved_auxv, user_auxv, sizeof(user_auxv));
|
|
|
+
|
|
|
+ error = 0;
|
|
|
+out:
|
|
|
+ up_read(&mm->mmap_sem);
|
|
|
+ return error;
|
|
|
+}
|
|
|
+#endif /* CONFIG_CHECKPOINT_RESTORE */
|
|
|
+
|
|
|
static int prctl_set_mm(int opt, unsigned long addr,
|
|
|
unsigned long arg4, unsigned long arg5)
|
|
|
{
|
|
@@ -1694,9 +1875,16 @@ static int prctl_set_mm(int opt, unsigned long addr,
|
|
|
struct vm_area_struct *vma;
|
|
|
int error;
|
|
|
|
|
|
- if (arg5 || (arg4 && opt != PR_SET_MM_AUXV))
|
|
|
+ if (arg5 || (arg4 && (opt != PR_SET_MM_AUXV &&
|
|
|
+ opt != PR_SET_MM_MAP &&
|
|
|
+ opt != PR_SET_MM_MAP_SIZE)))
|
|
|
return -EINVAL;
|
|
|
|
|
|
+#ifdef CONFIG_CHECKPOINT_RESTORE
|
|
|
+ if (opt == PR_SET_MM_MAP || opt == PR_SET_MM_MAP_SIZE)
|
|
|
+ return prctl_set_mm_map(opt, (const void __user *)addr, arg4);
|
|
|
+#endif
|
|
|
+
|
|
|
if (!capable(CAP_SYS_RESOURCE))
|
|
|
return -EPERM;
|
|
|
|