protection_keys.c 36 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410
  1. /*
  2. * Tests x86 Memory Protection Keys (see Documentation/x86/protection-keys.txt)
  3. *
  4. * There are examples in here of:
  5. * * how to set protection keys on memory
  6. * * how to set/clear bits in PKRU (the rights register)
  7. * * how to handle SEGV_PKRU signals and extract pkey-relevant
  8. * information from the siginfo
  9. *
  10. * Things to add:
  11. * make sure KSM and KSM COW breaking works
  12. * prefault pages in at malloc, or not
  13. * protect MPX bounds tables with protection keys?
  14. * make sure VMA splitting/merging is working correctly
  15. * OOMs can destroy mm->mmap (see exit_mmap()), so make sure it is immune to pkeys
  16. * look for pkey "leaks" where it is still set on a VMA but "freed" back to the kernel
  17. * do a plain mprotect() to a mprotect_pkey() area and make sure the pkey sticks
  18. *
  19. * Compile like this:
  20. * gcc -o protection_keys -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm
  21. * gcc -m32 -o protection_keys_32 -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm
  22. */
  23. #define _GNU_SOURCE
  24. #include <errno.h>
  25. #include <linux/futex.h>
  26. #include <sys/time.h>
  27. #include <sys/syscall.h>
  28. #include <string.h>
  29. #include <stdio.h>
  30. #include <stdint.h>
  31. #include <stdbool.h>
  32. #include <signal.h>
  33. #include <assert.h>
  34. #include <stdlib.h>
  35. #include <ucontext.h>
  36. #include <sys/mman.h>
  37. #include <sys/types.h>
  38. #include <sys/wait.h>
  39. #include <sys/stat.h>
  40. #include <fcntl.h>
  41. #include <unistd.h>
  42. #include <sys/ptrace.h>
  43. #include <setjmp.h>
  44. #include "pkey-helpers.h"
  45. int iteration_nr = 1;
  46. int test_nr;
  47. unsigned int shadow_pkru;
  48. #define HPAGE_SIZE (1UL<<21)
  49. #define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x)))
  50. #define ALIGN_UP(x, align_to) (((x) + ((align_to)-1)) & ~((align_to)-1))
  51. #define ALIGN_DOWN(x, align_to) ((x) & ~((align_to)-1))
  52. #define ALIGN_PTR_UP(p, ptr_align_to) ((typeof(p))ALIGN_UP((unsigned long)(p), ptr_align_to))
  53. #define ALIGN_PTR_DOWN(p, ptr_align_to) ((typeof(p))ALIGN_DOWN((unsigned long)(p), ptr_align_to))
  54. #define __stringify_1(x...) #x
  55. #define __stringify(x...) __stringify_1(x)
  56. #define PTR_ERR_ENOTSUP ((void *)-ENOTSUP)
  57. int dprint_in_signal;
  58. char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE];
  59. extern void abort_hooks(void);
  60. #define pkey_assert(condition) do { \
  61. if (!(condition)) { \
  62. dprintf0("assert() at %s::%d test_nr: %d iteration: %d\n", \
  63. __FILE__, __LINE__, \
  64. test_nr, iteration_nr); \
  65. dprintf0("errno at assert: %d", errno); \
  66. abort_hooks(); \
  67. assert(condition); \
  68. } \
  69. } while (0)
  70. #define raw_assert(cond) assert(cond)
  71. void cat_into_file(char *str, char *file)
  72. {
  73. int fd = open(file, O_RDWR);
  74. int ret;
  75. dprintf2("%s(): writing '%s' to '%s'\n", __func__, str, file);
  76. /*
  77. * these need to be raw because they are called under
  78. * pkey_assert()
  79. */
  80. raw_assert(fd >= 0);
  81. ret = write(fd, str, strlen(str));
  82. if (ret != strlen(str)) {
  83. perror("write to file failed");
  84. fprintf(stderr, "filename: '%s' str: '%s'\n", file, str);
  85. raw_assert(0);
  86. }
  87. close(fd);
  88. }
  89. #if CONTROL_TRACING > 0
  90. static int warned_tracing;
  91. int tracing_root_ok(void)
  92. {
  93. if (geteuid() != 0) {
  94. if (!warned_tracing)
  95. fprintf(stderr, "WARNING: not run as root, "
  96. "can not do tracing control\n");
  97. warned_tracing = 1;
  98. return 0;
  99. }
  100. return 1;
  101. }
  102. #endif
  103. void tracing_on(void)
  104. {
  105. #if CONTROL_TRACING > 0
  106. #define TRACEDIR "/sys/kernel/debug/tracing"
  107. char pidstr[32];
  108. if (!tracing_root_ok())
  109. return;
  110. sprintf(pidstr, "%d", getpid());
  111. cat_into_file("0", TRACEDIR "/tracing_on");
  112. cat_into_file("\n", TRACEDIR "/trace");
  113. if (1) {
  114. cat_into_file("function_graph", TRACEDIR "/current_tracer");
  115. cat_into_file("1", TRACEDIR "/options/funcgraph-proc");
  116. } else {
  117. cat_into_file("nop", TRACEDIR "/current_tracer");
  118. }
  119. cat_into_file(pidstr, TRACEDIR "/set_ftrace_pid");
  120. cat_into_file("1", TRACEDIR "/tracing_on");
  121. dprintf1("enabled tracing\n");
  122. #endif
  123. }
  124. void tracing_off(void)
  125. {
  126. #if CONTROL_TRACING > 0
  127. if (!tracing_root_ok())
  128. return;
  129. cat_into_file("0", "/sys/kernel/debug/tracing/tracing_on");
  130. #endif
  131. }
  132. void abort_hooks(void)
  133. {
  134. fprintf(stderr, "running %s()...\n", __func__);
  135. tracing_off();
  136. #ifdef SLEEP_ON_ABORT
  137. sleep(SLEEP_ON_ABORT);
  138. #endif
  139. }
  140. static inline void __page_o_noops(void)
  141. {
  142. /* 8-bytes of instruction * 512 bytes = 1 page */
  143. asm(".rept 512 ; nopl 0x7eeeeeee(%eax) ; .endr");
  144. }
  145. /*
  146. * This attempts to have roughly a page of instructions followed by a few
  147. * instructions that do a write, and another page of instructions. That
  148. * way, we are pretty sure that the write is in the second page of
  149. * instructions and has at least a page of padding behind it.
  150. *
  151. * *That* lets us be sure to madvise() away the write instruction, which
  152. * will then fault, which makes sure that the fault code handles
  153. * execute-only memory properly.
  154. */
  155. __attribute__((__aligned__(PAGE_SIZE)))
  156. void lots_o_noops_around_write(int *write_to_me)
  157. {
  158. dprintf3("running %s()\n", __func__);
  159. __page_o_noops();
  160. /* Assume this happens in the second page of instructions: */
  161. *write_to_me = __LINE__;
  162. /* pad out by another page: */
  163. __page_o_noops();
  164. dprintf3("%s() done\n", __func__);
  165. }
  166. /* Define some kernel-like types */
  167. #define u8 uint8_t
  168. #define u16 uint16_t
  169. #define u32 uint32_t
  170. #define u64 uint64_t
  171. #ifdef __i386__
  172. #define SYS_mprotect_key 380
  173. #define SYS_pkey_alloc 381
  174. #define SYS_pkey_free 382
  175. #define REG_IP_IDX REG_EIP
  176. #define si_pkey_offset 0x18
  177. #else
  178. #define SYS_mprotect_key 329
  179. #define SYS_pkey_alloc 330
  180. #define SYS_pkey_free 331
  181. #define REG_IP_IDX REG_RIP
  182. #define si_pkey_offset 0x20
  183. #endif
  184. void dump_mem(void *dumpme, int len_bytes)
  185. {
  186. char *c = (void *)dumpme;
  187. int i;
  188. for (i = 0; i < len_bytes; i += sizeof(u64)) {
  189. u64 *ptr = (u64 *)(c + i);
  190. dprintf1("dump[%03d][@%p]: %016jx\n", i, ptr, *ptr);
  191. }
  192. }
  193. #define __SI_FAULT (3 << 16)
  194. #define SEGV_BNDERR (__SI_FAULT|3) /* failed address bound checks */
  195. #define SEGV_PKUERR (__SI_FAULT|4)
  196. static char *si_code_str(int si_code)
  197. {
  198. if (si_code & SEGV_MAPERR)
  199. return "SEGV_MAPERR";
  200. if (si_code & SEGV_ACCERR)
  201. return "SEGV_ACCERR";
  202. if (si_code & SEGV_BNDERR)
  203. return "SEGV_BNDERR";
  204. if (si_code & SEGV_PKUERR)
  205. return "SEGV_PKUERR";
  206. return "UNKNOWN";
  207. }
  208. int pkru_faults;
  209. int last_si_pkey = -1;
  210. void signal_handler(int signum, siginfo_t *si, void *vucontext)
  211. {
  212. ucontext_t *uctxt = vucontext;
  213. int trapno;
  214. unsigned long ip;
  215. char *fpregs;
  216. u32 *pkru_ptr;
  217. u64 si_pkey;
  218. u32 *si_pkey_ptr;
  219. int pkru_offset;
  220. fpregset_t fpregset;
  221. dprint_in_signal = 1;
  222. dprintf1(">>>>===============SIGSEGV============================\n");
  223. dprintf1("%s()::%d, pkru: 0x%x shadow: %x\n", __func__, __LINE__,
  224. __rdpkru(), shadow_pkru);
  225. trapno = uctxt->uc_mcontext.gregs[REG_TRAPNO];
  226. ip = uctxt->uc_mcontext.gregs[REG_IP_IDX];
  227. fpregset = uctxt->uc_mcontext.fpregs;
  228. fpregs = (void *)fpregset;
  229. dprintf2("%s() trapno: %d ip: 0x%lx info->si_code: %s/%d\n", __func__,
  230. trapno, ip, si_code_str(si->si_code), si->si_code);
  231. #ifdef __i386__
  232. /*
  233. * 32-bit has some extra padding so that userspace can tell whether
  234. * the XSTATE header is present in addition to the "legacy" FPU
  235. * state. We just assume that it is here.
  236. */
  237. fpregs += 0x70;
  238. #endif
  239. pkru_offset = pkru_xstate_offset();
  240. pkru_ptr = (void *)(&fpregs[pkru_offset]);
  241. dprintf1("siginfo: %p\n", si);
  242. dprintf1(" fpregs: %p\n", fpregs);
  243. /*
  244. * If we got a PKRU fault, we *HAVE* to have at least one bit set in
  245. * here.
  246. */
  247. dprintf1("pkru_xstate_offset: %d\n", pkru_xstate_offset());
  248. if (DEBUG_LEVEL > 4)
  249. dump_mem(pkru_ptr - 128, 256);
  250. pkey_assert(*pkru_ptr);
  251. si_pkey_ptr = (u32 *)(((u8 *)si) + si_pkey_offset);
  252. dprintf1("si_pkey_ptr: %p\n", si_pkey_ptr);
  253. dump_mem(si_pkey_ptr - 8, 24);
  254. si_pkey = *si_pkey_ptr;
  255. pkey_assert(si_pkey < NR_PKEYS);
  256. last_si_pkey = si_pkey;
  257. if ((si->si_code == SEGV_MAPERR) ||
  258. (si->si_code == SEGV_ACCERR) ||
  259. (si->si_code == SEGV_BNDERR)) {
  260. printf("non-PK si_code, exiting...\n");
  261. exit(4);
  262. }
  263. dprintf1("signal pkru from xsave: %08x\n", *pkru_ptr);
  264. /* need __rdpkru() version so we do not do shadow_pkru checking */
  265. dprintf1("signal pkru from pkru: %08x\n", __rdpkru());
  266. dprintf1("si_pkey from siginfo: %jx\n", si_pkey);
  267. *(u64 *)pkru_ptr = 0x00000000;
  268. dprintf1("WARNING: set PRKU=0 to allow faulting instruction to continue\n");
  269. pkru_faults++;
  270. dprintf1("<<<<==================================================\n");
  271. return;
  272. if (trapno == 14) {
  273. fprintf(stderr,
  274. "ERROR: In signal handler, page fault, trapno = %d, ip = %016lx\n",
  275. trapno, ip);
  276. fprintf(stderr, "si_addr %p\n", si->si_addr);
  277. fprintf(stderr, "REG_ERR: %lx\n",
  278. (unsigned long)uctxt->uc_mcontext.gregs[REG_ERR]);
  279. exit(1);
  280. } else {
  281. fprintf(stderr, "unexpected trap %d! at 0x%lx\n", trapno, ip);
  282. fprintf(stderr, "si_addr %p\n", si->si_addr);
  283. fprintf(stderr, "REG_ERR: %lx\n",
  284. (unsigned long)uctxt->uc_mcontext.gregs[REG_ERR]);
  285. exit(2);
  286. }
  287. dprint_in_signal = 0;
  288. }
  289. int wait_all_children(void)
  290. {
  291. int status;
  292. return waitpid(-1, &status, 0);
  293. }
  294. void sig_chld(int x)
  295. {
  296. dprint_in_signal = 1;
  297. dprintf2("[%d] SIGCHLD: %d\n", getpid(), x);
  298. dprint_in_signal = 0;
  299. }
  300. void setup_sigsegv_handler(void)
  301. {
  302. int r, rs;
  303. struct sigaction newact;
  304. struct sigaction oldact;
  305. /* #PF is mapped to sigsegv */
  306. int signum = SIGSEGV;
  307. newact.sa_handler = 0;
  308. newact.sa_sigaction = signal_handler;
  309. /*sigset_t - signals to block while in the handler */
  310. /* get the old signal mask. */
  311. rs = sigprocmask(SIG_SETMASK, 0, &newact.sa_mask);
  312. pkey_assert(rs == 0);
  313. /* call sa_sigaction, not sa_handler*/
  314. newact.sa_flags = SA_SIGINFO;
  315. newact.sa_restorer = 0; /* void(*)(), obsolete */
  316. r = sigaction(signum, &newact, &oldact);
  317. r = sigaction(SIGALRM, &newact, &oldact);
  318. pkey_assert(r == 0);
  319. }
  320. void setup_handlers(void)
  321. {
  322. signal(SIGCHLD, &sig_chld);
  323. setup_sigsegv_handler();
  324. }
  325. pid_t fork_lazy_child(void)
  326. {
  327. pid_t forkret;
  328. forkret = fork();
  329. pkey_assert(forkret >= 0);
  330. dprintf3("[%d] fork() ret: %d\n", getpid(), forkret);
  331. if (!forkret) {
  332. /* in the child */
  333. while (1) {
  334. dprintf1("child sleeping...\n");
  335. sleep(30);
  336. }
  337. }
  338. return forkret;
  339. }
  340. void davecmp(void *_a, void *_b, int len)
  341. {
  342. int i;
  343. unsigned long *a = _a;
  344. unsigned long *b = _b;
  345. for (i = 0; i < len / sizeof(*a); i++) {
  346. if (a[i] == b[i])
  347. continue;
  348. dprintf3("[%3d]: a: %016lx b: %016lx\n", i, a[i], b[i]);
  349. }
  350. }
  351. void dumpit(char *f)
  352. {
  353. int fd = open(f, O_RDONLY);
  354. char buf[100];
  355. int nr_read;
  356. dprintf2("maps fd: %d\n", fd);
  357. do {
  358. nr_read = read(fd, &buf[0], sizeof(buf));
  359. write(1, buf, nr_read);
  360. } while (nr_read > 0);
  361. close(fd);
  362. }
  363. #define PKEY_DISABLE_ACCESS 0x1
  364. #define PKEY_DISABLE_WRITE 0x2
  365. u32 pkey_get(int pkey, unsigned long flags)
  366. {
  367. u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE);
  368. u32 pkru = __rdpkru();
  369. u32 shifted_pkru;
  370. u32 masked_pkru;
  371. dprintf1("%s(pkey=%d, flags=%lx) = %x / %d\n",
  372. __func__, pkey, flags, 0, 0);
  373. dprintf2("%s() raw pkru: %x\n", __func__, pkru);
  374. shifted_pkru = (pkru >> (pkey * PKRU_BITS_PER_PKEY));
  375. dprintf2("%s() shifted_pkru: %x\n", __func__, shifted_pkru);
  376. masked_pkru = shifted_pkru & mask;
  377. dprintf2("%s() masked pkru: %x\n", __func__, masked_pkru);
  378. /*
  379. * shift down the relevant bits to the lowest two, then
  380. * mask off all the other high bits.
  381. */
  382. return masked_pkru;
  383. }
  384. int pkey_set(int pkey, unsigned long rights, unsigned long flags)
  385. {
  386. u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE);
  387. u32 old_pkru = __rdpkru();
  388. u32 new_pkru;
  389. /* make sure that 'rights' only contains the bits we expect: */
  390. assert(!(rights & ~mask));
  391. /* copy old pkru */
  392. new_pkru = old_pkru;
  393. /* mask out bits from pkey in old value: */
  394. new_pkru &= ~(mask << (pkey * PKRU_BITS_PER_PKEY));
  395. /* OR in new bits for pkey: */
  396. new_pkru |= (rights << (pkey * PKRU_BITS_PER_PKEY));
  397. __wrpkru(new_pkru);
  398. dprintf3("%s(pkey=%d, rights=%lx, flags=%lx) = %x pkru now: %x old_pkru: %x\n",
  399. __func__, pkey, rights, flags, 0, __rdpkru(), old_pkru);
  400. return 0;
  401. }
  402. void pkey_disable_set(int pkey, int flags)
  403. {
  404. unsigned long syscall_flags = 0;
  405. int ret;
  406. int pkey_rights;
  407. u32 orig_pkru;
  408. dprintf1("START->%s(%d, 0x%x)\n", __func__,
  409. pkey, flags);
  410. pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE));
  411. pkey_rights = pkey_get(pkey, syscall_flags);
  412. dprintf1("%s(%d) pkey_get(%d): %x\n", __func__,
  413. pkey, pkey, pkey_rights);
  414. pkey_assert(pkey_rights >= 0);
  415. pkey_rights |= flags;
  416. ret = pkey_set(pkey, pkey_rights, syscall_flags);
  417. assert(!ret);
  418. /*pkru and flags have the same format */
  419. shadow_pkru |= flags << (pkey * 2);
  420. dprintf1("%s(%d) shadow: 0x%x\n", __func__, pkey, shadow_pkru);
  421. pkey_assert(ret >= 0);
  422. pkey_rights = pkey_get(pkey, syscall_flags);
  423. dprintf1("%s(%d) pkey_get(%d): %x\n", __func__,
  424. pkey, pkey, pkey_rights);
  425. dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru());
  426. if (flags)
  427. pkey_assert(rdpkru() > orig_pkru);
  428. dprintf1("END<---%s(%d, 0x%x)\n", __func__,
  429. pkey, flags);
  430. }
  431. void pkey_disable_clear(int pkey, int flags)
  432. {
  433. unsigned long syscall_flags = 0;
  434. int ret;
  435. int pkey_rights = pkey_get(pkey, syscall_flags);
  436. u32 orig_pkru = rdpkru();
  437. pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE));
  438. dprintf1("%s(%d) pkey_get(%d): %x\n", __func__,
  439. pkey, pkey, pkey_rights);
  440. pkey_assert(pkey_rights >= 0);
  441. pkey_rights |= flags;
  442. ret = pkey_set(pkey, pkey_rights, 0);
  443. /* pkru and flags have the same format */
  444. shadow_pkru &= ~(flags << (pkey * 2));
  445. pkey_assert(ret >= 0);
  446. pkey_rights = pkey_get(pkey, syscall_flags);
  447. dprintf1("%s(%d) pkey_get(%d): %x\n", __func__,
  448. pkey, pkey, pkey_rights);
  449. dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru());
  450. if (flags)
  451. assert(rdpkru() > orig_pkru);
  452. }
  453. void pkey_write_allow(int pkey)
  454. {
  455. pkey_disable_clear(pkey, PKEY_DISABLE_WRITE);
  456. }
  457. void pkey_write_deny(int pkey)
  458. {
  459. pkey_disable_set(pkey, PKEY_DISABLE_WRITE);
  460. }
  461. void pkey_access_allow(int pkey)
  462. {
  463. pkey_disable_clear(pkey, PKEY_DISABLE_ACCESS);
  464. }
  465. void pkey_access_deny(int pkey)
  466. {
  467. pkey_disable_set(pkey, PKEY_DISABLE_ACCESS);
  468. }
  469. int sys_mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot,
  470. unsigned long pkey)
  471. {
  472. int sret;
  473. dprintf2("%s(0x%p, %zx, prot=%lx, pkey=%lx)\n", __func__,
  474. ptr, size, orig_prot, pkey);
  475. errno = 0;
  476. sret = syscall(SYS_mprotect_key, ptr, size, orig_prot, pkey);
  477. if (errno) {
  478. dprintf2("SYS_mprotect_key sret: %d\n", sret);
  479. dprintf2("SYS_mprotect_key prot: 0x%lx\n", orig_prot);
  480. dprintf2("SYS_mprotect_key failed, errno: %d\n", errno);
  481. if (DEBUG_LEVEL >= 2)
  482. perror("SYS_mprotect_pkey");
  483. }
  484. return sret;
  485. }
  486. int sys_pkey_alloc(unsigned long flags, unsigned long init_val)
  487. {
  488. int ret = syscall(SYS_pkey_alloc, flags, init_val);
  489. dprintf1("%s(flags=%lx, init_val=%lx) syscall ret: %d errno: %d\n",
  490. __func__, flags, init_val, ret, errno);
  491. return ret;
  492. }
  493. int alloc_pkey(void)
  494. {
  495. int ret;
  496. unsigned long init_val = 0x0;
  497. dprintf1("alloc_pkey()::%d, pkru: 0x%x shadow: %x\n",
  498. __LINE__, __rdpkru(), shadow_pkru);
  499. ret = sys_pkey_alloc(0, init_val);
  500. /*
  501. * pkey_alloc() sets PKRU, so we need to reflect it in
  502. * shadow_pkru:
  503. */
  504. dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n",
  505. __LINE__, ret, __rdpkru(), shadow_pkru);
  506. if (ret) {
  507. /* clear both the bits: */
  508. shadow_pkru &= ~(0x3 << (ret * 2));
  509. dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n",
  510. __LINE__, ret, __rdpkru(), shadow_pkru);
  511. /*
  512. * move the new state in from init_val
  513. * (remember, we cheated and init_val == pkru format)
  514. */
  515. shadow_pkru |= (init_val << (ret * 2));
  516. }
  517. dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n",
  518. __LINE__, ret, __rdpkru(), shadow_pkru);
  519. dprintf1("alloc_pkey()::%d errno: %d\n", __LINE__, errno);
  520. /* for shadow checking: */
  521. rdpkru();
  522. dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n",
  523. __LINE__, ret, __rdpkru(), shadow_pkru);
  524. return ret;
  525. }
  526. int sys_pkey_free(unsigned long pkey)
  527. {
  528. int ret = syscall(SYS_pkey_free, pkey);
  529. dprintf1("%s(pkey=%ld) syscall ret: %d\n", __func__, pkey, ret);
  530. return ret;
  531. }
  532. /*
  533. * I had a bug where pkey bits could be set by mprotect() but
  534. * not cleared. This ensures we get lots of random bit sets
  535. * and clears on the vma and pte pkey bits.
  536. */
  537. int alloc_random_pkey(void)
  538. {
  539. int max_nr_pkey_allocs;
  540. int ret;
  541. int i;
  542. int alloced_pkeys[NR_PKEYS];
  543. int nr_alloced = 0;
  544. int random_index;
  545. memset(alloced_pkeys, 0, sizeof(alloced_pkeys));
  546. /* allocate every possible key and make a note of which ones we got */
  547. max_nr_pkey_allocs = NR_PKEYS;
  548. max_nr_pkey_allocs = 1;
  549. for (i = 0; i < max_nr_pkey_allocs; i++) {
  550. int new_pkey = alloc_pkey();
  551. if (new_pkey < 0)
  552. break;
  553. alloced_pkeys[nr_alloced++] = new_pkey;
  554. }
  555. pkey_assert(nr_alloced > 0);
  556. /* select a random one out of the allocated ones */
  557. random_index = rand() % nr_alloced;
  558. ret = alloced_pkeys[random_index];
  559. /* now zero it out so we don't free it next */
  560. alloced_pkeys[random_index] = 0;
  561. /* go through the allocated ones that we did not want and free them */
  562. for (i = 0; i < nr_alloced; i++) {
  563. int free_ret;
  564. if (!alloced_pkeys[i])
  565. continue;
  566. free_ret = sys_pkey_free(alloced_pkeys[i]);
  567. pkey_assert(!free_ret);
  568. }
  569. dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__,
  570. __LINE__, ret, __rdpkru(), shadow_pkru);
  571. return ret;
  572. }
  573. int mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot,
  574. unsigned long pkey)
  575. {
  576. int nr_iterations = random() % 100;
  577. int ret;
  578. while (0) {
  579. int rpkey = alloc_random_pkey();
  580. ret = sys_mprotect_pkey(ptr, size, orig_prot, pkey);
  581. dprintf1("sys_mprotect_pkey(%p, %zx, prot=0x%lx, pkey=%ld) ret: %d\n",
  582. ptr, size, orig_prot, pkey, ret);
  583. if (nr_iterations-- < 0)
  584. break;
  585. dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__,
  586. __LINE__, ret, __rdpkru(), shadow_pkru);
  587. sys_pkey_free(rpkey);
  588. dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__,
  589. __LINE__, ret, __rdpkru(), shadow_pkru);
  590. }
  591. pkey_assert(pkey < NR_PKEYS);
  592. ret = sys_mprotect_pkey(ptr, size, orig_prot, pkey);
  593. dprintf1("mprotect_pkey(%p, %zx, prot=0x%lx, pkey=%ld) ret: %d\n",
  594. ptr, size, orig_prot, pkey, ret);
  595. pkey_assert(!ret);
  596. dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__,
  597. __LINE__, ret, __rdpkru(), shadow_pkru);
  598. return ret;
  599. }
  600. struct pkey_malloc_record {
  601. void *ptr;
  602. long size;
  603. };
  604. struct pkey_malloc_record *pkey_malloc_records;
  605. long nr_pkey_malloc_records;
  606. void record_pkey_malloc(void *ptr, long size)
  607. {
  608. long i;
  609. struct pkey_malloc_record *rec = NULL;
  610. for (i = 0; i < nr_pkey_malloc_records; i++) {
  611. rec = &pkey_malloc_records[i];
  612. /* find a free record */
  613. if (rec)
  614. break;
  615. }
  616. if (!rec) {
  617. /* every record is full */
  618. size_t old_nr_records = nr_pkey_malloc_records;
  619. size_t new_nr_records = (nr_pkey_malloc_records * 2 + 1);
  620. size_t new_size = new_nr_records * sizeof(struct pkey_malloc_record);
  621. dprintf2("new_nr_records: %zd\n", new_nr_records);
  622. dprintf2("new_size: %zd\n", new_size);
  623. pkey_malloc_records = realloc(pkey_malloc_records, new_size);
  624. pkey_assert(pkey_malloc_records != NULL);
  625. rec = &pkey_malloc_records[nr_pkey_malloc_records];
  626. /*
  627. * realloc() does not initialize memory, so zero it from
  628. * the first new record all the way to the end.
  629. */
  630. for (i = 0; i < new_nr_records - old_nr_records; i++)
  631. memset(rec + i, 0, sizeof(*rec));
  632. }
  633. dprintf3("filling malloc record[%d/%p]: {%p, %ld}\n",
  634. (int)(rec - pkey_malloc_records), rec, ptr, size);
  635. rec->ptr = ptr;
  636. rec->size = size;
  637. nr_pkey_malloc_records++;
  638. }
  639. void free_pkey_malloc(void *ptr)
  640. {
  641. long i;
  642. int ret;
  643. dprintf3("%s(%p)\n", __func__, ptr);
  644. for (i = 0; i < nr_pkey_malloc_records; i++) {
  645. struct pkey_malloc_record *rec = &pkey_malloc_records[i];
  646. dprintf4("looking for ptr %p at record[%ld/%p]: {%p, %ld}\n",
  647. ptr, i, rec, rec->ptr, rec->size);
  648. if ((ptr < rec->ptr) ||
  649. (ptr >= rec->ptr + rec->size))
  650. continue;
  651. dprintf3("found ptr %p at record[%ld/%p]: {%p, %ld}\n",
  652. ptr, i, rec, rec->ptr, rec->size);
  653. nr_pkey_malloc_records--;
  654. ret = munmap(rec->ptr, rec->size);
  655. dprintf3("munmap ret: %d\n", ret);
  656. pkey_assert(!ret);
  657. dprintf3("clearing rec->ptr, rec: %p\n", rec);
  658. rec->ptr = NULL;
  659. dprintf3("done clearing rec->ptr, rec: %p\n", rec);
  660. return;
  661. }
  662. pkey_assert(false);
  663. }
  664. void *malloc_pkey_with_mprotect(long size, int prot, u16 pkey)
  665. {
  666. void *ptr;
  667. int ret;
  668. rdpkru();
  669. dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__,
  670. size, prot, pkey);
  671. pkey_assert(pkey < NR_PKEYS);
  672. ptr = mmap(NULL, size, prot, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
  673. pkey_assert(ptr != (void *)-1);
  674. ret = mprotect_pkey((void *)ptr, PAGE_SIZE, prot, pkey);
  675. pkey_assert(!ret);
  676. record_pkey_malloc(ptr, size);
  677. rdpkru();
  678. dprintf1("%s() for pkey %d @ %p\n", __func__, pkey, ptr);
  679. return ptr;
  680. }
  681. void *malloc_pkey_anon_huge(long size, int prot, u16 pkey)
  682. {
  683. int ret;
  684. void *ptr;
  685. dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__,
  686. size, prot, pkey);
  687. /*
  688. * Guarantee we can fit at least one huge page in the resulting
  689. * allocation by allocating space for 2:
  690. */
  691. size = ALIGN_UP(size, HPAGE_SIZE * 2);
  692. ptr = mmap(NULL, size, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
  693. pkey_assert(ptr != (void *)-1);
  694. record_pkey_malloc(ptr, size);
  695. mprotect_pkey(ptr, size, prot, pkey);
  696. dprintf1("unaligned ptr: %p\n", ptr);
  697. ptr = ALIGN_PTR_UP(ptr, HPAGE_SIZE);
  698. dprintf1(" aligned ptr: %p\n", ptr);
  699. ret = madvise(ptr, HPAGE_SIZE, MADV_HUGEPAGE);
  700. dprintf1("MADV_HUGEPAGE ret: %d\n", ret);
  701. ret = madvise(ptr, HPAGE_SIZE, MADV_WILLNEED);
  702. dprintf1("MADV_WILLNEED ret: %d\n", ret);
  703. memset(ptr, 0, HPAGE_SIZE);
  704. dprintf1("mmap()'d thp for pkey %d @ %p\n", pkey, ptr);
  705. return ptr;
  706. }
  707. int hugetlb_setup_ok;
  708. #define GET_NR_HUGE_PAGES 10
  709. void setup_hugetlbfs(void)
  710. {
  711. int err;
  712. int fd;
  713. int validated_nr_pages;
  714. int i;
  715. char buf[] = "123";
  716. if (geteuid() != 0) {
  717. fprintf(stderr, "WARNING: not run as root, can not do hugetlb test\n");
  718. return;
  719. }
  720. cat_into_file(__stringify(GET_NR_HUGE_PAGES), "/proc/sys/vm/nr_hugepages");
  721. /*
  722. * Now go make sure that we got the pages and that they
  723. * are 2M pages. Someone might have made 1G the default.
  724. */
  725. fd = open("/sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages", O_RDONLY);
  726. if (fd < 0) {
  727. perror("opening sysfs 2M hugetlb config");
  728. return;
  729. }
  730. /* -1 to guarantee leaving the trailing \0 */
  731. err = read(fd, buf, sizeof(buf)-1);
  732. close(fd);
  733. if (err <= 0) {
  734. perror("reading sysfs 2M hugetlb config");
  735. return;
  736. }
  737. if (atoi(buf) != GET_NR_HUGE_PAGES) {
  738. fprintf(stderr, "could not confirm 2M pages, got: '%s' expected %d\n",
  739. buf, GET_NR_HUGE_PAGES);
  740. return;
  741. }
  742. hugetlb_setup_ok = 1;
  743. }
  744. void *malloc_pkey_hugetlb(long size, int prot, u16 pkey)
  745. {
  746. void *ptr;
  747. int flags = MAP_ANONYMOUS|MAP_PRIVATE|MAP_HUGETLB;
  748. if (!hugetlb_setup_ok)
  749. return PTR_ERR_ENOTSUP;
  750. dprintf1("doing %s(%ld, %x, %x)\n", __func__, size, prot, pkey);
  751. size = ALIGN_UP(size, HPAGE_SIZE * 2);
  752. pkey_assert(pkey < NR_PKEYS);
  753. ptr = mmap(NULL, size, PROT_NONE, flags, -1, 0);
  754. pkey_assert(ptr != (void *)-1);
  755. mprotect_pkey(ptr, size, prot, pkey);
  756. record_pkey_malloc(ptr, size);
  757. dprintf1("mmap()'d hugetlbfs for pkey %d @ %p\n", pkey, ptr);
  758. return ptr;
  759. }
  760. void *malloc_pkey_mmap_dax(long size, int prot, u16 pkey)
  761. {
  762. void *ptr;
  763. int fd;
  764. dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__,
  765. size, prot, pkey);
  766. pkey_assert(pkey < NR_PKEYS);
  767. fd = open("/dax/foo", O_RDWR);
  768. pkey_assert(fd >= 0);
  769. ptr = mmap(0, size, prot, MAP_SHARED, fd, 0);
  770. pkey_assert(ptr != (void *)-1);
  771. mprotect_pkey(ptr, size, prot, pkey);
  772. record_pkey_malloc(ptr, size);
  773. dprintf1("mmap()'d for pkey %d @ %p\n", pkey, ptr);
  774. close(fd);
  775. return ptr;
  776. }
  777. void *(*pkey_malloc[])(long size, int prot, u16 pkey) = {
  778. malloc_pkey_with_mprotect,
  779. malloc_pkey_anon_huge,
  780. malloc_pkey_hugetlb
  781. /* can not do direct with the pkey_mprotect() API:
  782. malloc_pkey_mmap_direct,
  783. malloc_pkey_mmap_dax,
  784. */
  785. };
  786. void *malloc_pkey(long size, int prot, u16 pkey)
  787. {
  788. void *ret;
  789. static int malloc_type;
  790. int nr_malloc_types = ARRAY_SIZE(pkey_malloc);
  791. pkey_assert(pkey < NR_PKEYS);
  792. while (1) {
  793. pkey_assert(malloc_type < nr_malloc_types);
  794. ret = pkey_malloc[malloc_type](size, prot, pkey);
  795. pkey_assert(ret != (void *)-1);
  796. malloc_type++;
  797. if (malloc_type >= nr_malloc_types)
  798. malloc_type = (random()%nr_malloc_types);
  799. /* try again if the malloc_type we tried is unsupported */
  800. if (ret == PTR_ERR_ENOTSUP)
  801. continue;
  802. break;
  803. }
  804. dprintf3("%s(%ld, prot=%x, pkey=%x) returning: %p\n", __func__,
  805. size, prot, pkey, ret);
  806. return ret;
  807. }
  808. int last_pkru_faults;
  809. void expected_pk_fault(int pkey)
  810. {
  811. dprintf2("%s(): last_pkru_faults: %d pkru_faults: %d\n",
  812. __func__, last_pkru_faults, pkru_faults);
  813. dprintf2("%s(%d): last_si_pkey: %d\n", __func__, pkey, last_si_pkey);
  814. pkey_assert(last_pkru_faults + 1 == pkru_faults);
  815. pkey_assert(last_si_pkey == pkey);
  816. /*
  817. * The signal handler shold have cleared out PKRU to let the
  818. * test program continue. We now have to restore it.
  819. */
  820. if (__rdpkru() != 0)
  821. pkey_assert(0);
  822. __wrpkru(shadow_pkru);
  823. dprintf1("%s() set PKRU=%x to restore state after signal nuked it\n",
  824. __func__, shadow_pkru);
  825. last_pkru_faults = pkru_faults;
  826. last_si_pkey = -1;
  827. }
  828. void do_not_expect_pk_fault(void)
  829. {
  830. pkey_assert(last_pkru_faults == pkru_faults);
  831. }
  832. int test_fds[10] = { -1 };
  833. int nr_test_fds;
  834. void __save_test_fd(int fd)
  835. {
  836. pkey_assert(fd >= 0);
  837. pkey_assert(nr_test_fds < ARRAY_SIZE(test_fds));
  838. test_fds[nr_test_fds] = fd;
  839. nr_test_fds++;
  840. }
  841. int get_test_read_fd(void)
  842. {
  843. int test_fd = open("/etc/passwd", O_RDONLY);
  844. __save_test_fd(test_fd);
  845. return test_fd;
  846. }
  847. void close_test_fds(void)
  848. {
  849. int i;
  850. for (i = 0; i < nr_test_fds; i++) {
  851. if (test_fds[i] < 0)
  852. continue;
  853. close(test_fds[i]);
  854. test_fds[i] = -1;
  855. }
  856. nr_test_fds = 0;
  857. }
  858. #define barrier() __asm__ __volatile__("": : :"memory")
  859. __attribute__((noinline)) int read_ptr(int *ptr)
  860. {
  861. /*
  862. * Keep GCC from optimizing this away somehow
  863. */
  864. barrier();
  865. return *ptr;
  866. }
  867. void test_read_of_write_disabled_region(int *ptr, u16 pkey)
  868. {
  869. int ptr_contents;
  870. dprintf1("disabling write access to PKEY[1], doing read\n");
  871. pkey_write_deny(pkey);
  872. ptr_contents = read_ptr(ptr);
  873. dprintf1("*ptr: %d\n", ptr_contents);
  874. dprintf1("\n");
  875. }
  876. void test_read_of_access_disabled_region(int *ptr, u16 pkey)
  877. {
  878. int ptr_contents;
  879. dprintf1("disabling access to PKEY[%02d], doing read @ %p\n", pkey, ptr);
  880. rdpkru();
  881. pkey_access_deny(pkey);
  882. ptr_contents = read_ptr(ptr);
  883. dprintf1("*ptr: %d\n", ptr_contents);
  884. expected_pk_fault(pkey);
  885. }
  886. void test_write_of_write_disabled_region(int *ptr, u16 pkey)
  887. {
  888. dprintf1("disabling write access to PKEY[%02d], doing write\n", pkey);
  889. pkey_write_deny(pkey);
  890. *ptr = __LINE__;
  891. expected_pk_fault(pkey);
  892. }
  893. void test_write_of_access_disabled_region(int *ptr, u16 pkey)
  894. {
  895. dprintf1("disabling access to PKEY[%02d], doing write\n", pkey);
  896. pkey_access_deny(pkey);
  897. *ptr = __LINE__;
  898. expected_pk_fault(pkey);
  899. }
  900. void test_kernel_write_of_access_disabled_region(int *ptr, u16 pkey)
  901. {
  902. int ret;
  903. int test_fd = get_test_read_fd();
  904. dprintf1("disabling access to PKEY[%02d], "
  905. "having kernel read() to buffer\n", pkey);
  906. pkey_access_deny(pkey);
  907. ret = read(test_fd, ptr, 1);
  908. dprintf1("read ret: %d\n", ret);
  909. pkey_assert(ret);
  910. }
  911. void test_kernel_write_of_write_disabled_region(int *ptr, u16 pkey)
  912. {
  913. int ret;
  914. int test_fd = get_test_read_fd();
  915. pkey_write_deny(pkey);
  916. ret = read(test_fd, ptr, 100);
  917. dprintf1("read ret: %d\n", ret);
  918. if (ret < 0 && (DEBUG_LEVEL > 0))
  919. perror("verbose read result (OK for this to be bad)");
  920. pkey_assert(ret);
  921. }
  922. void test_kernel_gup_of_access_disabled_region(int *ptr, u16 pkey)
  923. {
  924. int pipe_ret, vmsplice_ret;
  925. struct iovec iov;
  926. int pipe_fds[2];
  927. pipe_ret = pipe(pipe_fds);
  928. pkey_assert(pipe_ret == 0);
  929. dprintf1("disabling access to PKEY[%02d], "
  930. "having kernel vmsplice from buffer\n", pkey);
  931. pkey_access_deny(pkey);
  932. iov.iov_base = ptr;
  933. iov.iov_len = PAGE_SIZE;
  934. vmsplice_ret = vmsplice(pipe_fds[1], &iov, 1, SPLICE_F_GIFT);
  935. dprintf1("vmsplice() ret: %d\n", vmsplice_ret);
  936. pkey_assert(vmsplice_ret == -1);
  937. close(pipe_fds[0]);
  938. close(pipe_fds[1]);
  939. }
  940. void test_kernel_gup_write_to_write_disabled_region(int *ptr, u16 pkey)
  941. {
  942. int ignored = 0xdada;
  943. int futex_ret;
  944. int some_int = __LINE__;
  945. dprintf1("disabling write to PKEY[%02d], "
  946. "doing futex gunk in buffer\n", pkey);
  947. *ptr = some_int;
  948. pkey_write_deny(pkey);
  949. futex_ret = syscall(SYS_futex, ptr, FUTEX_WAIT, some_int-1, NULL,
  950. &ignored, ignored);
  951. if (DEBUG_LEVEL > 0)
  952. perror("futex");
  953. dprintf1("futex() ret: %d\n", futex_ret);
  954. }
  955. /* Assumes that all pkeys other than 'pkey' are unallocated */
  956. void test_pkey_syscalls_on_non_allocated_pkey(int *ptr, u16 pkey)
  957. {
  958. int err;
  959. int i;
  960. /* Note: 0 is the default pkey, so don't mess with it */
  961. for (i = 1; i < NR_PKEYS; i++) {
  962. if (pkey == i)
  963. continue;
  964. dprintf1("trying get/set/free to non-allocated pkey: %2d\n", i);
  965. err = sys_pkey_free(i);
  966. pkey_assert(err);
  967. /* not enforced when pkey_get() is not a syscall
  968. err = pkey_get(i, 0);
  969. pkey_assert(err < 0);
  970. */
  971. err = sys_pkey_free(i);
  972. pkey_assert(err);
  973. err = sys_mprotect_pkey(ptr, PAGE_SIZE, PROT_READ, i);
  974. pkey_assert(err);
  975. }
  976. }
  977. /* Assumes that all pkeys other than 'pkey' are unallocated */
  978. void test_pkey_syscalls_bad_args(int *ptr, u16 pkey)
  979. {
  980. int err;
  981. int bad_flag = (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE) + 1;
  982. int bad_pkey = NR_PKEYS+99;
  983. /* not enforced when pkey_get() is not a syscall
  984. err = pkey_get(bad_pkey, bad_flag);
  985. pkey_assert(err < 0);
  986. */
  987. /* pass a known-invalid pkey in: */
  988. err = sys_mprotect_pkey(ptr, PAGE_SIZE, PROT_READ, bad_pkey);
  989. pkey_assert(err);
  990. }
  991. /* Assumes that all pkeys other than 'pkey' are unallocated */
  992. void test_pkey_alloc_exhaust(int *ptr, u16 pkey)
  993. {
  994. unsigned long flags;
  995. unsigned long init_val;
  996. int err;
  997. int allocated_pkeys[NR_PKEYS] = {0};
  998. int nr_allocated_pkeys = 0;
  999. int i;
  1000. for (i = 0; i < NR_PKEYS*2; i++) {
  1001. int new_pkey;
  1002. dprintf1("%s() alloc loop: %d\n", __func__, i);
  1003. new_pkey = alloc_pkey();
  1004. dprintf4("%s()::%d, err: %d pkru: 0x%x shadow: 0x%x\n", __func__,
  1005. __LINE__, err, __rdpkru(), shadow_pkru);
  1006. rdpkru(); /* for shadow checking */
  1007. dprintf2("%s() errno: %d ENOSPC: %d\n", __func__, errno, ENOSPC);
  1008. if ((new_pkey == -1) && (errno == ENOSPC)) {
  1009. dprintf2("%s() failed to allocate pkey after %d tries\n",
  1010. __func__, nr_allocated_pkeys);
  1011. break;
  1012. }
  1013. pkey_assert(nr_allocated_pkeys < NR_PKEYS);
  1014. allocated_pkeys[nr_allocated_pkeys++] = new_pkey;
  1015. }
  1016. dprintf3("%s()::%d\n", __func__, __LINE__);
  1017. /*
  1018. * ensure it did not reach the end of the loop without
  1019. * failure:
  1020. */
  1021. pkey_assert(i < NR_PKEYS*2);
  1022. /*
  1023. * There are 16 pkeys supported in hardware. One is taken
  1024. * up for the default (0) and another can be taken up by
  1025. * an execute-only mapping. Ensure that we can allocate
  1026. * at least 14 (16-2).
  1027. */
  1028. pkey_assert(i >= NR_PKEYS-2);
  1029. for (i = 0; i < nr_allocated_pkeys; i++) {
  1030. err = sys_pkey_free(allocated_pkeys[i]);
  1031. pkey_assert(!err);
  1032. rdpkru(); /* for shadow checking */
  1033. }
  1034. }
  1035. void test_ptrace_of_child(int *ptr, u16 pkey)
  1036. {
  1037. __attribute__((__unused__)) int peek_result;
  1038. pid_t child_pid;
  1039. void *ignored = 0;
  1040. long ret;
  1041. int status;
  1042. /*
  1043. * This is the "control" for our little expermient. Make sure
  1044. * we can always access it when ptracing.
  1045. */
  1046. int *plain_ptr_unaligned = malloc(HPAGE_SIZE);
  1047. int *plain_ptr = ALIGN_PTR_UP(plain_ptr_unaligned, PAGE_SIZE);
  1048. /*
  1049. * Fork a child which is an exact copy of this process, of course.
  1050. * That means we can do all of our tests via ptrace() and then plain
  1051. * memory access and ensure they work differently.
  1052. */
  1053. child_pid = fork_lazy_child();
  1054. dprintf1("[%d] child pid: %d\n", getpid(), child_pid);
  1055. ret = ptrace(PTRACE_ATTACH, child_pid, ignored, ignored);
  1056. if (ret)
  1057. perror("attach");
  1058. dprintf1("[%d] attach ret: %ld %d\n", getpid(), ret, __LINE__);
  1059. pkey_assert(ret != -1);
  1060. ret = waitpid(child_pid, &status, WUNTRACED);
  1061. if ((ret != child_pid) || !(WIFSTOPPED(status))) {
  1062. fprintf(stderr, "weird waitpid result %ld stat %x\n",
  1063. ret, status);
  1064. pkey_assert(0);
  1065. }
  1066. dprintf2("waitpid ret: %ld\n", ret);
  1067. dprintf2("waitpid status: %d\n", status);
  1068. pkey_access_deny(pkey);
  1069. pkey_write_deny(pkey);
  1070. /* Write access, untested for now:
  1071. ret = ptrace(PTRACE_POKEDATA, child_pid, peek_at, data);
  1072. pkey_assert(ret != -1);
  1073. dprintf1("poke at %p: %ld\n", peek_at, ret);
  1074. */
  1075. /*
  1076. * Try to access the pkey-protected "ptr" via ptrace:
  1077. */
  1078. ret = ptrace(PTRACE_PEEKDATA, child_pid, ptr, ignored);
  1079. /* expect it to work, without an error: */
  1080. pkey_assert(ret != -1);
  1081. /* Now access from the current task, and expect an exception: */
  1082. peek_result = read_ptr(ptr);
  1083. expected_pk_fault(pkey);
  1084. /*
  1085. * Try to access the NON-pkey-protected "plain_ptr" via ptrace:
  1086. */
  1087. ret = ptrace(PTRACE_PEEKDATA, child_pid, plain_ptr, ignored);
  1088. /* expect it to work, without an error: */
  1089. pkey_assert(ret != -1);
  1090. /* Now access from the current task, and expect NO exception: */
  1091. peek_result = read_ptr(plain_ptr);
  1092. do_not_expect_pk_fault();
  1093. ret = ptrace(PTRACE_DETACH, child_pid, ignored, 0);
  1094. pkey_assert(ret != -1);
  1095. ret = kill(child_pid, SIGKILL);
  1096. pkey_assert(ret != -1);
  1097. wait(&status);
  1098. free(plain_ptr_unaligned);
  1099. }
  1100. void test_executing_on_unreadable_memory(int *ptr, u16 pkey)
  1101. {
  1102. void *p1;
  1103. int scratch;
  1104. int ptr_contents;
  1105. int ret;
  1106. p1 = ALIGN_PTR_UP(&lots_o_noops_around_write, PAGE_SIZE);
  1107. dprintf3("&lots_o_noops: %p\n", &lots_o_noops_around_write);
  1108. /* lots_o_noops_around_write should be page-aligned already */
  1109. assert(p1 == &lots_o_noops_around_write);
  1110. /* Point 'p1' at the *second* page of the function: */
  1111. p1 += PAGE_SIZE;
  1112. madvise(p1, PAGE_SIZE, MADV_DONTNEED);
  1113. lots_o_noops_around_write(&scratch);
  1114. ptr_contents = read_ptr(p1);
  1115. dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents);
  1116. ret = mprotect_pkey(p1, PAGE_SIZE, PROT_EXEC, (u64)pkey);
  1117. pkey_assert(!ret);
  1118. pkey_access_deny(pkey);
  1119. dprintf2("pkru: %x\n", rdpkru());
  1120. /*
  1121. * Make sure this is an *instruction* fault
  1122. */
  1123. madvise(p1, PAGE_SIZE, MADV_DONTNEED);
  1124. lots_o_noops_around_write(&scratch);
  1125. do_not_expect_pk_fault();
  1126. ptr_contents = read_ptr(p1);
  1127. dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents);
  1128. expected_pk_fault(pkey);
  1129. }
  1130. void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey)
  1131. {
  1132. int size = PAGE_SIZE;
  1133. int sret;
  1134. if (cpu_has_pku()) {
  1135. dprintf1("SKIP: %s: no CPU support\n", __func__);
  1136. return;
  1137. }
  1138. sret = syscall(SYS_mprotect_key, ptr, size, PROT_READ, pkey);
  1139. pkey_assert(sret < 0);
  1140. }
  1141. void (*pkey_tests[])(int *ptr, u16 pkey) = {
  1142. test_read_of_write_disabled_region,
  1143. test_read_of_access_disabled_region,
  1144. test_write_of_write_disabled_region,
  1145. test_write_of_access_disabled_region,
  1146. test_kernel_write_of_access_disabled_region,
  1147. test_kernel_write_of_write_disabled_region,
  1148. test_kernel_gup_of_access_disabled_region,
  1149. test_kernel_gup_write_to_write_disabled_region,
  1150. test_executing_on_unreadable_memory,
  1151. test_ptrace_of_child,
  1152. test_pkey_syscalls_on_non_allocated_pkey,
  1153. test_pkey_syscalls_bad_args,
  1154. test_pkey_alloc_exhaust,
  1155. };
  1156. void run_tests_once(void)
  1157. {
  1158. int *ptr;
  1159. int prot = PROT_READ|PROT_WRITE;
  1160. for (test_nr = 0; test_nr < ARRAY_SIZE(pkey_tests); test_nr++) {
  1161. int pkey;
  1162. int orig_pkru_faults = pkru_faults;
  1163. dprintf1("======================\n");
  1164. dprintf1("test %d preparing...\n", test_nr);
  1165. tracing_on();
  1166. pkey = alloc_random_pkey();
  1167. dprintf1("test %d starting with pkey: %d\n", test_nr, pkey);
  1168. ptr = malloc_pkey(PAGE_SIZE, prot, pkey);
  1169. dprintf1("test %d starting...\n", test_nr);
  1170. pkey_tests[test_nr](ptr, pkey);
  1171. dprintf1("freeing test memory: %p\n", ptr);
  1172. free_pkey_malloc(ptr);
  1173. sys_pkey_free(pkey);
  1174. dprintf1("pkru_faults: %d\n", pkru_faults);
  1175. dprintf1("orig_pkru_faults: %d\n", orig_pkru_faults);
  1176. tracing_off();
  1177. close_test_fds();
  1178. printf("test %2d PASSED (iteration %d)\n", test_nr, iteration_nr);
  1179. dprintf1("======================\n\n");
  1180. }
  1181. iteration_nr++;
  1182. }
  1183. void pkey_setup_shadow(void)
  1184. {
  1185. shadow_pkru = __rdpkru();
  1186. }
  1187. int main(void)
  1188. {
  1189. int nr_iterations = 22;
  1190. setup_handlers();
  1191. printf("has pku: %d\n", cpu_has_pku());
  1192. if (!cpu_has_pku()) {
  1193. int size = PAGE_SIZE;
  1194. int *ptr;
  1195. printf("running PKEY tests for unsupported CPU/OS\n");
  1196. ptr = mmap(NULL, size, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
  1197. assert(ptr != (void *)-1);
  1198. test_mprotect_pkey_on_unsupported_cpu(ptr, 1);
  1199. exit(0);
  1200. }
  1201. pkey_setup_shadow();
  1202. printf("startup pkru: %x\n", rdpkru());
  1203. setup_hugetlbfs();
  1204. while (nr_iterations-- > 0)
  1205. run_tests_once();
  1206. printf("done (all tests OK)\n");
  1207. return 0;
  1208. }