protection_keys.c 38 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Tests x86 Memory Protection Keys (see Documentation/x86/protection-keys.txt)
  4. *
  5. * There are examples in here of:
  6. * * how to set protection keys on memory
  7. * * how to set/clear bits in PKRU (the rights register)
  8. * * how to handle SEGV_PKRU signals and extract pkey-relevant
  9. * information from the siginfo
  10. *
  11. * Things to add:
  12. * make sure KSM and KSM COW breaking works
  13. * prefault pages in at malloc, or not
  14. * protect MPX bounds tables with protection keys?
  15. * make sure VMA splitting/merging is working correctly
  16. * OOMs can destroy mm->mmap (see exit_mmap()), so make sure it is immune to pkeys
  17. * look for pkey "leaks" where it is still set on a VMA but "freed" back to the kernel
  18. * do a plain mprotect() to a mprotect_pkey() area and make sure the pkey sticks
  19. *
  20. * Compile like this:
  21. * gcc -o protection_keys -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm
  22. * gcc -m32 -o protection_keys_32 -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm
  23. */
  24. #define _GNU_SOURCE
  25. #include <errno.h>
  26. #include <linux/futex.h>
  27. #include <sys/time.h>
  28. #include <sys/syscall.h>
  29. #include <string.h>
  30. #include <stdio.h>
  31. #include <stdint.h>
  32. #include <stdbool.h>
  33. #include <signal.h>
  34. #include <assert.h>
  35. #include <stdlib.h>
  36. #include <ucontext.h>
  37. #include <sys/mman.h>
  38. #include <sys/types.h>
  39. #include <sys/wait.h>
  40. #include <sys/stat.h>
  41. #include <fcntl.h>
  42. #include <unistd.h>
  43. #include <sys/ptrace.h>
  44. #include <setjmp.h>
  45. #include "pkey-helpers.h"
  46. int iteration_nr = 1;
  47. int test_nr;
  48. unsigned int shadow_pkru;
  49. #define HPAGE_SIZE (1UL<<21)
  50. #define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x)))
  51. #define ALIGN_UP(x, align_to) (((x) + ((align_to)-1)) & ~((align_to)-1))
  52. #define ALIGN_DOWN(x, align_to) ((x) & ~((align_to)-1))
  53. #define ALIGN_PTR_UP(p, ptr_align_to) ((typeof(p))ALIGN_UP((unsigned long)(p), ptr_align_to))
  54. #define ALIGN_PTR_DOWN(p, ptr_align_to) ((typeof(p))ALIGN_DOWN((unsigned long)(p), ptr_align_to))
  55. #define __stringify_1(x...) #x
  56. #define __stringify(x...) __stringify_1(x)
  57. #define PTR_ERR_ENOTSUP ((void *)-ENOTSUP)
  58. int dprint_in_signal;
  59. char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE];
  60. extern void abort_hooks(void);
  61. #define pkey_assert(condition) do { \
  62. if (!(condition)) { \
  63. dprintf0("assert() at %s::%d test_nr: %d iteration: %d\n", \
  64. __FILE__, __LINE__, \
  65. test_nr, iteration_nr); \
  66. dprintf0("errno at assert: %d", errno); \
  67. abort_hooks(); \
  68. exit(__LINE__); \
  69. } \
  70. } while (0)
  71. void cat_into_file(char *str, char *file)
  72. {
  73. int fd = open(file, O_RDWR);
  74. int ret;
  75. dprintf2("%s(): writing '%s' to '%s'\n", __func__, str, file);
  76. /*
  77. * these need to be raw because they are called under
  78. * pkey_assert()
  79. */
  80. if (fd < 0) {
  81. fprintf(stderr, "error opening '%s'\n", str);
  82. perror("error: ");
  83. exit(__LINE__);
  84. }
  85. ret = write(fd, str, strlen(str));
  86. if (ret != strlen(str)) {
  87. perror("write to file failed");
  88. fprintf(stderr, "filename: '%s' str: '%s'\n", file, str);
  89. exit(__LINE__);
  90. }
  91. close(fd);
  92. }
  93. #if CONTROL_TRACING > 0
  94. static int warned_tracing;
  95. int tracing_root_ok(void)
  96. {
  97. if (geteuid() != 0) {
  98. if (!warned_tracing)
  99. fprintf(stderr, "WARNING: not run as root, "
  100. "can not do tracing control\n");
  101. warned_tracing = 1;
  102. return 0;
  103. }
  104. return 1;
  105. }
  106. #endif
  107. void tracing_on(void)
  108. {
  109. #if CONTROL_TRACING > 0
  110. #define TRACEDIR "/sys/kernel/debug/tracing"
  111. char pidstr[32];
  112. if (!tracing_root_ok())
  113. return;
  114. sprintf(pidstr, "%d", getpid());
  115. cat_into_file("0", TRACEDIR "/tracing_on");
  116. cat_into_file("\n", TRACEDIR "/trace");
  117. if (1) {
  118. cat_into_file("function_graph", TRACEDIR "/current_tracer");
  119. cat_into_file("1", TRACEDIR "/options/funcgraph-proc");
  120. } else {
  121. cat_into_file("nop", TRACEDIR "/current_tracer");
  122. }
  123. cat_into_file(pidstr, TRACEDIR "/set_ftrace_pid");
  124. cat_into_file("1", TRACEDIR "/tracing_on");
  125. dprintf1("enabled tracing\n");
  126. #endif
  127. }
  128. void tracing_off(void)
  129. {
  130. #if CONTROL_TRACING > 0
  131. if (!tracing_root_ok())
  132. return;
  133. cat_into_file("0", "/sys/kernel/debug/tracing/tracing_on");
  134. #endif
  135. }
  136. void abort_hooks(void)
  137. {
  138. fprintf(stderr, "running %s()...\n", __func__);
  139. tracing_off();
  140. #ifdef SLEEP_ON_ABORT
  141. sleep(SLEEP_ON_ABORT);
  142. #endif
  143. }
  144. static inline void __page_o_noops(void)
  145. {
  146. /* 8-bytes of instruction * 512 bytes = 1 page */
  147. asm(".rept 512 ; nopl 0x7eeeeeee(%eax) ; .endr");
  148. }
  149. /*
  150. * This attempts to have roughly a page of instructions followed by a few
  151. * instructions that do a write, and another page of instructions. That
  152. * way, we are pretty sure that the write is in the second page of
  153. * instructions and has at least a page of padding behind it.
  154. *
  155. * *That* lets us be sure to madvise() away the write instruction, which
  156. * will then fault, which makes sure that the fault code handles
  157. * execute-only memory properly.
  158. */
  159. __attribute__((__aligned__(PAGE_SIZE)))
  160. void lots_o_noops_around_write(int *write_to_me)
  161. {
  162. dprintf3("running %s()\n", __func__);
  163. __page_o_noops();
  164. /* Assume this happens in the second page of instructions: */
  165. *write_to_me = __LINE__;
  166. /* pad out by another page: */
  167. __page_o_noops();
  168. dprintf3("%s() done\n", __func__);
  169. }
  170. /* Define some kernel-like types */
  171. #define u8 uint8_t
  172. #define u16 uint16_t
  173. #define u32 uint32_t
  174. #define u64 uint64_t
  175. #ifdef __i386__
  176. #ifndef SYS_mprotect_key
  177. # define SYS_mprotect_key 380
  178. #endif
  179. #ifndef SYS_pkey_alloc
  180. # define SYS_pkey_alloc 381
  181. # define SYS_pkey_free 382
  182. #endif
  183. #define REG_IP_IDX REG_EIP
  184. #define si_pkey_offset 0x14
  185. #else
  186. #ifndef SYS_mprotect_key
  187. # define SYS_mprotect_key 329
  188. #endif
  189. #ifndef SYS_pkey_alloc
  190. # define SYS_pkey_alloc 330
  191. # define SYS_pkey_free 331
  192. #endif
  193. #define REG_IP_IDX REG_RIP
  194. #define si_pkey_offset 0x20
  195. #endif
  196. void dump_mem(void *dumpme, int len_bytes)
  197. {
  198. char *c = (void *)dumpme;
  199. int i;
  200. for (i = 0; i < len_bytes; i += sizeof(u64)) {
  201. u64 *ptr = (u64 *)(c + i);
  202. dprintf1("dump[%03d][@%p]: %016jx\n", i, ptr, *ptr);
  203. }
  204. }
  205. /* Failed address bound checks: */
  206. #ifndef SEGV_BNDERR
  207. # define SEGV_BNDERR 3
  208. #endif
  209. #ifndef SEGV_PKUERR
  210. # define SEGV_PKUERR 4
  211. #endif
  212. static char *si_code_str(int si_code)
  213. {
  214. if (si_code == SEGV_MAPERR)
  215. return "SEGV_MAPERR";
  216. if (si_code == SEGV_ACCERR)
  217. return "SEGV_ACCERR";
  218. if (si_code == SEGV_BNDERR)
  219. return "SEGV_BNDERR";
  220. if (si_code == SEGV_PKUERR)
  221. return "SEGV_PKUERR";
  222. return "UNKNOWN";
  223. }
  224. int pkru_faults;
  225. int last_si_pkey = -1;
  226. void signal_handler(int signum, siginfo_t *si, void *vucontext)
  227. {
  228. ucontext_t *uctxt = vucontext;
  229. int trapno;
  230. unsigned long ip;
  231. char *fpregs;
  232. u32 *pkru_ptr;
  233. u64 siginfo_pkey;
  234. u32 *si_pkey_ptr;
  235. int pkru_offset;
  236. fpregset_t fpregset;
  237. dprint_in_signal = 1;
  238. dprintf1(">>>>===============SIGSEGV============================\n");
  239. dprintf1("%s()::%d, pkru: 0x%x shadow: %x\n", __func__, __LINE__,
  240. __rdpkru(), shadow_pkru);
  241. trapno = uctxt->uc_mcontext.gregs[REG_TRAPNO];
  242. ip = uctxt->uc_mcontext.gregs[REG_IP_IDX];
  243. fpregset = uctxt->uc_mcontext.fpregs;
  244. fpregs = (void *)fpregset;
  245. dprintf2("%s() trapno: %d ip: 0x%lx info->si_code: %s/%d\n", __func__,
  246. trapno, ip, si_code_str(si->si_code), si->si_code);
  247. #ifdef __i386__
  248. /*
  249. * 32-bit has some extra padding so that userspace can tell whether
  250. * the XSTATE header is present in addition to the "legacy" FPU
  251. * state. We just assume that it is here.
  252. */
  253. fpregs += 0x70;
  254. #endif
  255. pkru_offset = pkru_xstate_offset();
  256. pkru_ptr = (void *)(&fpregs[pkru_offset]);
  257. dprintf1("siginfo: %p\n", si);
  258. dprintf1(" fpregs: %p\n", fpregs);
  259. /*
  260. * If we got a PKRU fault, we *HAVE* to have at least one bit set in
  261. * here.
  262. */
  263. dprintf1("pkru_xstate_offset: %d\n", pkru_xstate_offset());
  264. if (DEBUG_LEVEL > 4)
  265. dump_mem(pkru_ptr - 128, 256);
  266. pkey_assert(*pkru_ptr);
  267. if ((si->si_code == SEGV_MAPERR) ||
  268. (si->si_code == SEGV_ACCERR) ||
  269. (si->si_code == SEGV_BNDERR)) {
  270. printf("non-PK si_code, exiting...\n");
  271. exit(4);
  272. }
  273. si_pkey_ptr = (u32 *)(((u8 *)si) + si_pkey_offset);
  274. dprintf1("si_pkey_ptr: %p\n", si_pkey_ptr);
  275. dump_mem((u8 *)si_pkey_ptr - 8, 24);
  276. siginfo_pkey = *si_pkey_ptr;
  277. pkey_assert(siginfo_pkey < NR_PKEYS);
  278. last_si_pkey = siginfo_pkey;
  279. dprintf1("signal pkru from xsave: %08x\n", *pkru_ptr);
  280. /* need __rdpkru() version so we do not do shadow_pkru checking */
  281. dprintf1("signal pkru from pkru: %08x\n", __rdpkru());
  282. dprintf1("pkey from siginfo: %jx\n", siginfo_pkey);
  283. *(u64 *)pkru_ptr = 0x00000000;
  284. dprintf1("WARNING: set PRKU=0 to allow faulting instruction to continue\n");
  285. pkru_faults++;
  286. dprintf1("<<<<==================================================\n");
  287. dprint_in_signal = 0;
  288. }
  289. int wait_all_children(void)
  290. {
  291. int status;
  292. return waitpid(-1, &status, 0);
  293. }
  294. void sig_chld(int x)
  295. {
  296. dprint_in_signal = 1;
  297. dprintf2("[%d] SIGCHLD: %d\n", getpid(), x);
  298. dprint_in_signal = 0;
  299. }
  300. void setup_sigsegv_handler(void)
  301. {
  302. int r, rs;
  303. struct sigaction newact;
  304. struct sigaction oldact;
  305. /* #PF is mapped to sigsegv */
  306. int signum = SIGSEGV;
  307. newact.sa_handler = 0;
  308. newact.sa_sigaction = signal_handler;
  309. /*sigset_t - signals to block while in the handler */
  310. /* get the old signal mask. */
  311. rs = sigprocmask(SIG_SETMASK, 0, &newact.sa_mask);
  312. pkey_assert(rs == 0);
  313. /* call sa_sigaction, not sa_handler*/
  314. newact.sa_flags = SA_SIGINFO;
  315. newact.sa_restorer = 0; /* void(*)(), obsolete */
  316. r = sigaction(signum, &newact, &oldact);
  317. r = sigaction(SIGALRM, &newact, &oldact);
  318. pkey_assert(r == 0);
  319. }
  320. void setup_handlers(void)
  321. {
  322. signal(SIGCHLD, &sig_chld);
  323. setup_sigsegv_handler();
  324. }
  325. pid_t fork_lazy_child(void)
  326. {
  327. pid_t forkret;
  328. forkret = fork();
  329. pkey_assert(forkret >= 0);
  330. dprintf3("[%d] fork() ret: %d\n", getpid(), forkret);
  331. if (!forkret) {
  332. /* in the child */
  333. while (1) {
  334. dprintf1("child sleeping...\n");
  335. sleep(30);
  336. }
  337. }
  338. return forkret;
  339. }
  340. #ifndef PKEY_DISABLE_ACCESS
  341. # define PKEY_DISABLE_ACCESS 0x1
  342. #endif
  343. #ifndef PKEY_DISABLE_WRITE
  344. # define PKEY_DISABLE_WRITE 0x2
  345. #endif
  346. static u32 hw_pkey_get(int pkey, unsigned long flags)
  347. {
  348. u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE);
  349. u32 pkru = __rdpkru();
  350. u32 shifted_pkru;
  351. u32 masked_pkru;
  352. dprintf1("%s(pkey=%d, flags=%lx) = %x / %d\n",
  353. __func__, pkey, flags, 0, 0);
  354. dprintf2("%s() raw pkru: %x\n", __func__, pkru);
  355. shifted_pkru = (pkru >> (pkey * PKRU_BITS_PER_PKEY));
  356. dprintf2("%s() shifted_pkru: %x\n", __func__, shifted_pkru);
  357. masked_pkru = shifted_pkru & mask;
  358. dprintf2("%s() masked pkru: %x\n", __func__, masked_pkru);
  359. /*
  360. * shift down the relevant bits to the lowest two, then
  361. * mask off all the other high bits.
  362. */
  363. return masked_pkru;
  364. }
  365. static int hw_pkey_set(int pkey, unsigned long rights, unsigned long flags)
  366. {
  367. u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE);
  368. u32 old_pkru = __rdpkru();
  369. u32 new_pkru;
  370. /* make sure that 'rights' only contains the bits we expect: */
  371. assert(!(rights & ~mask));
  372. /* copy old pkru */
  373. new_pkru = old_pkru;
  374. /* mask out bits from pkey in old value: */
  375. new_pkru &= ~(mask << (pkey * PKRU_BITS_PER_PKEY));
  376. /* OR in new bits for pkey: */
  377. new_pkru |= (rights << (pkey * PKRU_BITS_PER_PKEY));
  378. __wrpkru(new_pkru);
  379. dprintf3("%s(pkey=%d, rights=%lx, flags=%lx) = %x pkru now: %x old_pkru: %x\n",
  380. __func__, pkey, rights, flags, 0, __rdpkru(), old_pkru);
  381. return 0;
  382. }
  383. void pkey_disable_set(int pkey, int flags)
  384. {
  385. unsigned long syscall_flags = 0;
  386. int ret;
  387. int pkey_rights;
  388. u32 orig_pkru = rdpkru();
  389. dprintf1("START->%s(%d, 0x%x)\n", __func__,
  390. pkey, flags);
  391. pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE));
  392. pkey_rights = hw_pkey_get(pkey, syscall_flags);
  393. dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
  394. pkey, pkey, pkey_rights);
  395. pkey_assert(pkey_rights >= 0);
  396. pkey_rights |= flags;
  397. ret = hw_pkey_set(pkey, pkey_rights, syscall_flags);
  398. assert(!ret);
  399. /*pkru and flags have the same format */
  400. shadow_pkru |= flags << (pkey * 2);
  401. dprintf1("%s(%d) shadow: 0x%x\n", __func__, pkey, shadow_pkru);
  402. pkey_assert(ret >= 0);
  403. pkey_rights = hw_pkey_get(pkey, syscall_flags);
  404. dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
  405. pkey, pkey, pkey_rights);
  406. dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru());
  407. if (flags)
  408. pkey_assert(rdpkru() > orig_pkru);
  409. dprintf1("END<---%s(%d, 0x%x)\n", __func__,
  410. pkey, flags);
  411. }
  412. void pkey_disable_clear(int pkey, int flags)
  413. {
  414. unsigned long syscall_flags = 0;
  415. int ret;
  416. int pkey_rights = hw_pkey_get(pkey, syscall_flags);
  417. u32 orig_pkru = rdpkru();
  418. pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE));
  419. dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
  420. pkey, pkey, pkey_rights);
  421. pkey_assert(pkey_rights >= 0);
  422. pkey_rights |= flags;
  423. ret = hw_pkey_set(pkey, pkey_rights, 0);
  424. /* pkru and flags have the same format */
  425. shadow_pkru &= ~(flags << (pkey * 2));
  426. pkey_assert(ret >= 0);
  427. pkey_rights = hw_pkey_get(pkey, syscall_flags);
  428. dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
  429. pkey, pkey, pkey_rights);
  430. dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru());
  431. if (flags)
  432. assert(rdpkru() > orig_pkru);
  433. }
  434. void pkey_write_allow(int pkey)
  435. {
  436. pkey_disable_clear(pkey, PKEY_DISABLE_WRITE);
  437. }
  438. void pkey_write_deny(int pkey)
  439. {
  440. pkey_disable_set(pkey, PKEY_DISABLE_WRITE);
  441. }
  442. void pkey_access_allow(int pkey)
  443. {
  444. pkey_disable_clear(pkey, PKEY_DISABLE_ACCESS);
  445. }
  446. void pkey_access_deny(int pkey)
  447. {
  448. pkey_disable_set(pkey, PKEY_DISABLE_ACCESS);
  449. }
  450. int sys_mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot,
  451. unsigned long pkey)
  452. {
  453. int sret;
  454. dprintf2("%s(0x%p, %zx, prot=%lx, pkey=%lx)\n", __func__,
  455. ptr, size, orig_prot, pkey);
  456. errno = 0;
  457. sret = syscall(SYS_mprotect_key, ptr, size, orig_prot, pkey);
  458. if (errno) {
  459. dprintf2("SYS_mprotect_key sret: %d\n", sret);
  460. dprintf2("SYS_mprotect_key prot: 0x%lx\n", orig_prot);
  461. dprintf2("SYS_mprotect_key failed, errno: %d\n", errno);
  462. if (DEBUG_LEVEL >= 2)
  463. perror("SYS_mprotect_pkey");
  464. }
  465. return sret;
  466. }
  467. int sys_pkey_alloc(unsigned long flags, unsigned long init_val)
  468. {
  469. int ret = syscall(SYS_pkey_alloc, flags, init_val);
  470. dprintf1("%s(flags=%lx, init_val=%lx) syscall ret: %d errno: %d\n",
  471. __func__, flags, init_val, ret, errno);
  472. return ret;
  473. }
  474. int alloc_pkey(void)
  475. {
  476. int ret;
  477. unsigned long init_val = 0x0;
  478. dprintf1("alloc_pkey()::%d, pkru: 0x%x shadow: %x\n",
  479. __LINE__, __rdpkru(), shadow_pkru);
  480. ret = sys_pkey_alloc(0, init_val);
  481. /*
  482. * pkey_alloc() sets PKRU, so we need to reflect it in
  483. * shadow_pkru:
  484. */
  485. dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n",
  486. __LINE__, ret, __rdpkru(), shadow_pkru);
  487. if (ret) {
  488. /* clear both the bits: */
  489. shadow_pkru &= ~(0x3 << (ret * 2));
  490. dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n",
  491. __LINE__, ret, __rdpkru(), shadow_pkru);
  492. /*
  493. * move the new state in from init_val
  494. * (remember, we cheated and init_val == pkru format)
  495. */
  496. shadow_pkru |= (init_val << (ret * 2));
  497. }
  498. dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n",
  499. __LINE__, ret, __rdpkru(), shadow_pkru);
  500. dprintf1("alloc_pkey()::%d errno: %d\n", __LINE__, errno);
  501. /* for shadow checking: */
  502. rdpkru();
  503. dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n",
  504. __LINE__, ret, __rdpkru(), shadow_pkru);
  505. return ret;
  506. }
  507. int sys_pkey_free(unsigned long pkey)
  508. {
  509. int ret = syscall(SYS_pkey_free, pkey);
  510. dprintf1("%s(pkey=%ld) syscall ret: %d\n", __func__, pkey, ret);
  511. return ret;
  512. }
  513. /*
  514. * I had a bug where pkey bits could be set by mprotect() but
  515. * not cleared. This ensures we get lots of random bit sets
  516. * and clears on the vma and pte pkey bits.
  517. */
  518. int alloc_random_pkey(void)
  519. {
  520. int max_nr_pkey_allocs;
  521. int ret;
  522. int i;
  523. int alloced_pkeys[NR_PKEYS];
  524. int nr_alloced = 0;
  525. int random_index;
  526. memset(alloced_pkeys, 0, sizeof(alloced_pkeys));
  527. /* allocate every possible key and make a note of which ones we got */
  528. max_nr_pkey_allocs = NR_PKEYS;
  529. max_nr_pkey_allocs = 1;
  530. for (i = 0; i < max_nr_pkey_allocs; i++) {
  531. int new_pkey = alloc_pkey();
  532. if (new_pkey < 0)
  533. break;
  534. alloced_pkeys[nr_alloced++] = new_pkey;
  535. }
  536. pkey_assert(nr_alloced > 0);
  537. /* select a random one out of the allocated ones */
  538. random_index = rand() % nr_alloced;
  539. ret = alloced_pkeys[random_index];
  540. /* now zero it out so we don't free it next */
  541. alloced_pkeys[random_index] = 0;
  542. /* go through the allocated ones that we did not want and free them */
  543. for (i = 0; i < nr_alloced; i++) {
  544. int free_ret;
  545. if (!alloced_pkeys[i])
  546. continue;
  547. free_ret = sys_pkey_free(alloced_pkeys[i]);
  548. pkey_assert(!free_ret);
  549. }
  550. dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__,
  551. __LINE__, ret, __rdpkru(), shadow_pkru);
  552. return ret;
  553. }
  554. int mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot,
  555. unsigned long pkey)
  556. {
  557. int nr_iterations = random() % 100;
  558. int ret;
  559. while (0) {
  560. int rpkey = alloc_random_pkey();
  561. ret = sys_mprotect_pkey(ptr, size, orig_prot, pkey);
  562. dprintf1("sys_mprotect_pkey(%p, %zx, prot=0x%lx, pkey=%ld) ret: %d\n",
  563. ptr, size, orig_prot, pkey, ret);
  564. if (nr_iterations-- < 0)
  565. break;
  566. dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__,
  567. __LINE__, ret, __rdpkru(), shadow_pkru);
  568. sys_pkey_free(rpkey);
  569. dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__,
  570. __LINE__, ret, __rdpkru(), shadow_pkru);
  571. }
  572. pkey_assert(pkey < NR_PKEYS);
  573. ret = sys_mprotect_pkey(ptr, size, orig_prot, pkey);
  574. dprintf1("mprotect_pkey(%p, %zx, prot=0x%lx, pkey=%ld) ret: %d\n",
  575. ptr, size, orig_prot, pkey, ret);
  576. pkey_assert(!ret);
  577. dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__,
  578. __LINE__, ret, __rdpkru(), shadow_pkru);
  579. return ret;
  580. }
  581. struct pkey_malloc_record {
  582. void *ptr;
  583. long size;
  584. int prot;
  585. };
  586. struct pkey_malloc_record *pkey_malloc_records;
  587. struct pkey_malloc_record *pkey_last_malloc_record;
  588. long nr_pkey_malloc_records;
  589. void record_pkey_malloc(void *ptr, long size, int prot)
  590. {
  591. long i;
  592. struct pkey_malloc_record *rec = NULL;
  593. for (i = 0; i < nr_pkey_malloc_records; i++) {
  594. rec = &pkey_malloc_records[i];
  595. /* find a free record */
  596. if (rec)
  597. break;
  598. }
  599. if (!rec) {
  600. /* every record is full */
  601. size_t old_nr_records = nr_pkey_malloc_records;
  602. size_t new_nr_records = (nr_pkey_malloc_records * 2 + 1);
  603. size_t new_size = new_nr_records * sizeof(struct pkey_malloc_record);
  604. dprintf2("new_nr_records: %zd\n", new_nr_records);
  605. dprintf2("new_size: %zd\n", new_size);
  606. pkey_malloc_records = realloc(pkey_malloc_records, new_size);
  607. pkey_assert(pkey_malloc_records != NULL);
  608. rec = &pkey_malloc_records[nr_pkey_malloc_records];
  609. /*
  610. * realloc() does not initialize memory, so zero it from
  611. * the first new record all the way to the end.
  612. */
  613. for (i = 0; i < new_nr_records - old_nr_records; i++)
  614. memset(rec + i, 0, sizeof(*rec));
  615. }
  616. dprintf3("filling malloc record[%d/%p]: {%p, %ld}\n",
  617. (int)(rec - pkey_malloc_records), rec, ptr, size);
  618. rec->ptr = ptr;
  619. rec->size = size;
  620. rec->prot = prot;
  621. pkey_last_malloc_record = rec;
  622. nr_pkey_malloc_records++;
  623. }
  624. void free_pkey_malloc(void *ptr)
  625. {
  626. long i;
  627. int ret;
  628. dprintf3("%s(%p)\n", __func__, ptr);
  629. for (i = 0; i < nr_pkey_malloc_records; i++) {
  630. struct pkey_malloc_record *rec = &pkey_malloc_records[i];
  631. dprintf4("looking for ptr %p at record[%ld/%p]: {%p, %ld}\n",
  632. ptr, i, rec, rec->ptr, rec->size);
  633. if ((ptr < rec->ptr) ||
  634. (ptr >= rec->ptr + rec->size))
  635. continue;
  636. dprintf3("found ptr %p at record[%ld/%p]: {%p, %ld}\n",
  637. ptr, i, rec, rec->ptr, rec->size);
  638. nr_pkey_malloc_records--;
  639. ret = munmap(rec->ptr, rec->size);
  640. dprintf3("munmap ret: %d\n", ret);
  641. pkey_assert(!ret);
  642. dprintf3("clearing rec->ptr, rec: %p\n", rec);
  643. rec->ptr = NULL;
  644. dprintf3("done clearing rec->ptr, rec: %p\n", rec);
  645. return;
  646. }
  647. pkey_assert(false);
  648. }
  649. void *malloc_pkey_with_mprotect(long size, int prot, u16 pkey)
  650. {
  651. void *ptr;
  652. int ret;
  653. rdpkru();
  654. dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__,
  655. size, prot, pkey);
  656. pkey_assert(pkey < NR_PKEYS);
  657. ptr = mmap(NULL, size, prot, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
  658. pkey_assert(ptr != (void *)-1);
  659. ret = mprotect_pkey((void *)ptr, PAGE_SIZE, prot, pkey);
  660. pkey_assert(!ret);
  661. record_pkey_malloc(ptr, size, prot);
  662. rdpkru();
  663. dprintf1("%s() for pkey %d @ %p\n", __func__, pkey, ptr);
  664. return ptr;
  665. }
  666. void *malloc_pkey_anon_huge(long size, int prot, u16 pkey)
  667. {
  668. int ret;
  669. void *ptr;
  670. dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__,
  671. size, prot, pkey);
  672. /*
  673. * Guarantee we can fit at least one huge page in the resulting
  674. * allocation by allocating space for 2:
  675. */
  676. size = ALIGN_UP(size, HPAGE_SIZE * 2);
  677. ptr = mmap(NULL, size, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
  678. pkey_assert(ptr != (void *)-1);
  679. record_pkey_malloc(ptr, size, prot);
  680. mprotect_pkey(ptr, size, prot, pkey);
  681. dprintf1("unaligned ptr: %p\n", ptr);
  682. ptr = ALIGN_PTR_UP(ptr, HPAGE_SIZE);
  683. dprintf1(" aligned ptr: %p\n", ptr);
  684. ret = madvise(ptr, HPAGE_SIZE, MADV_HUGEPAGE);
  685. dprintf1("MADV_HUGEPAGE ret: %d\n", ret);
  686. ret = madvise(ptr, HPAGE_SIZE, MADV_WILLNEED);
  687. dprintf1("MADV_WILLNEED ret: %d\n", ret);
  688. memset(ptr, 0, HPAGE_SIZE);
  689. dprintf1("mmap()'d thp for pkey %d @ %p\n", pkey, ptr);
  690. return ptr;
  691. }
  692. int hugetlb_setup_ok;
  693. #define GET_NR_HUGE_PAGES 10
  694. void setup_hugetlbfs(void)
  695. {
  696. int err;
  697. int fd;
  698. char buf[] = "123";
  699. if (geteuid() != 0) {
  700. fprintf(stderr, "WARNING: not run as root, can not do hugetlb test\n");
  701. return;
  702. }
  703. cat_into_file(__stringify(GET_NR_HUGE_PAGES), "/proc/sys/vm/nr_hugepages");
  704. /*
  705. * Now go make sure that we got the pages and that they
  706. * are 2M pages. Someone might have made 1G the default.
  707. */
  708. fd = open("/sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages", O_RDONLY);
  709. if (fd < 0) {
  710. perror("opening sysfs 2M hugetlb config");
  711. return;
  712. }
  713. /* -1 to guarantee leaving the trailing \0 */
  714. err = read(fd, buf, sizeof(buf)-1);
  715. close(fd);
  716. if (err <= 0) {
  717. perror("reading sysfs 2M hugetlb config");
  718. return;
  719. }
  720. if (atoi(buf) != GET_NR_HUGE_PAGES) {
  721. fprintf(stderr, "could not confirm 2M pages, got: '%s' expected %d\n",
  722. buf, GET_NR_HUGE_PAGES);
  723. return;
  724. }
  725. hugetlb_setup_ok = 1;
  726. }
  727. void *malloc_pkey_hugetlb(long size, int prot, u16 pkey)
  728. {
  729. void *ptr;
  730. int flags = MAP_ANONYMOUS|MAP_PRIVATE|MAP_HUGETLB;
  731. if (!hugetlb_setup_ok)
  732. return PTR_ERR_ENOTSUP;
  733. dprintf1("doing %s(%ld, %x, %x)\n", __func__, size, prot, pkey);
  734. size = ALIGN_UP(size, HPAGE_SIZE * 2);
  735. pkey_assert(pkey < NR_PKEYS);
  736. ptr = mmap(NULL, size, PROT_NONE, flags, -1, 0);
  737. pkey_assert(ptr != (void *)-1);
  738. mprotect_pkey(ptr, size, prot, pkey);
  739. record_pkey_malloc(ptr, size, prot);
  740. dprintf1("mmap()'d hugetlbfs for pkey %d @ %p\n", pkey, ptr);
  741. return ptr;
  742. }
  743. void *malloc_pkey_mmap_dax(long size, int prot, u16 pkey)
  744. {
  745. void *ptr;
  746. int fd;
  747. dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__,
  748. size, prot, pkey);
  749. pkey_assert(pkey < NR_PKEYS);
  750. fd = open("/dax/foo", O_RDWR);
  751. pkey_assert(fd >= 0);
  752. ptr = mmap(0, size, prot, MAP_SHARED, fd, 0);
  753. pkey_assert(ptr != (void *)-1);
  754. mprotect_pkey(ptr, size, prot, pkey);
  755. record_pkey_malloc(ptr, size, prot);
  756. dprintf1("mmap()'d for pkey %d @ %p\n", pkey, ptr);
  757. close(fd);
  758. return ptr;
  759. }
  760. void *(*pkey_malloc[])(long size, int prot, u16 pkey) = {
  761. malloc_pkey_with_mprotect,
  762. malloc_pkey_anon_huge,
  763. malloc_pkey_hugetlb
  764. /* can not do direct with the pkey_mprotect() API:
  765. malloc_pkey_mmap_direct,
  766. malloc_pkey_mmap_dax,
  767. */
  768. };
  769. void *malloc_pkey(long size, int prot, u16 pkey)
  770. {
  771. void *ret;
  772. static int malloc_type;
  773. int nr_malloc_types = ARRAY_SIZE(pkey_malloc);
  774. pkey_assert(pkey < NR_PKEYS);
  775. while (1) {
  776. pkey_assert(malloc_type < nr_malloc_types);
  777. ret = pkey_malloc[malloc_type](size, prot, pkey);
  778. pkey_assert(ret != (void *)-1);
  779. malloc_type++;
  780. if (malloc_type >= nr_malloc_types)
  781. malloc_type = (random()%nr_malloc_types);
  782. /* try again if the malloc_type we tried is unsupported */
  783. if (ret == PTR_ERR_ENOTSUP)
  784. continue;
  785. break;
  786. }
  787. dprintf3("%s(%ld, prot=%x, pkey=%x) returning: %p\n", __func__,
  788. size, prot, pkey, ret);
  789. return ret;
  790. }
  791. int last_pkru_faults;
  792. #define UNKNOWN_PKEY -2
  793. void expected_pk_fault(int pkey)
  794. {
  795. dprintf2("%s(): last_pkru_faults: %d pkru_faults: %d\n",
  796. __func__, last_pkru_faults, pkru_faults);
  797. dprintf2("%s(%d): last_si_pkey: %d\n", __func__, pkey, last_si_pkey);
  798. pkey_assert(last_pkru_faults + 1 == pkru_faults);
  799. /*
  800. * For exec-only memory, we do not know the pkey in
  801. * advance, so skip this check.
  802. */
  803. if (pkey != UNKNOWN_PKEY)
  804. pkey_assert(last_si_pkey == pkey);
  805. /*
  806. * The signal handler shold have cleared out PKRU to let the
  807. * test program continue. We now have to restore it.
  808. */
  809. if (__rdpkru() != 0)
  810. pkey_assert(0);
  811. __wrpkru(shadow_pkru);
  812. dprintf1("%s() set PKRU=%x to restore state after signal nuked it\n",
  813. __func__, shadow_pkru);
  814. last_pkru_faults = pkru_faults;
  815. last_si_pkey = -1;
  816. }
  817. #define do_not_expect_pk_fault(msg) do { \
  818. if (last_pkru_faults != pkru_faults) \
  819. dprintf0("unexpected PK fault: %s\n", msg); \
  820. pkey_assert(last_pkru_faults == pkru_faults); \
  821. } while (0)
  822. int test_fds[10] = { -1 };
  823. int nr_test_fds;
  824. void __save_test_fd(int fd)
  825. {
  826. pkey_assert(fd >= 0);
  827. pkey_assert(nr_test_fds < ARRAY_SIZE(test_fds));
  828. test_fds[nr_test_fds] = fd;
  829. nr_test_fds++;
  830. }
  831. int get_test_read_fd(void)
  832. {
  833. int test_fd = open("/etc/passwd", O_RDONLY);
  834. __save_test_fd(test_fd);
  835. return test_fd;
  836. }
  837. void close_test_fds(void)
  838. {
  839. int i;
  840. for (i = 0; i < nr_test_fds; i++) {
  841. if (test_fds[i] < 0)
  842. continue;
  843. close(test_fds[i]);
  844. test_fds[i] = -1;
  845. }
  846. nr_test_fds = 0;
  847. }
  848. #define barrier() __asm__ __volatile__("": : :"memory")
  849. __attribute__((noinline)) int read_ptr(int *ptr)
  850. {
  851. /*
  852. * Keep GCC from optimizing this away somehow
  853. */
  854. barrier();
  855. return *ptr;
  856. }
  857. void test_read_of_write_disabled_region(int *ptr, u16 pkey)
  858. {
  859. int ptr_contents;
  860. dprintf1("disabling write access to PKEY[1], doing read\n");
  861. pkey_write_deny(pkey);
  862. ptr_contents = read_ptr(ptr);
  863. dprintf1("*ptr: %d\n", ptr_contents);
  864. dprintf1("\n");
  865. }
  866. void test_read_of_access_disabled_region(int *ptr, u16 pkey)
  867. {
  868. int ptr_contents;
  869. dprintf1("disabling access to PKEY[%02d], doing read @ %p\n", pkey, ptr);
  870. rdpkru();
  871. pkey_access_deny(pkey);
  872. ptr_contents = read_ptr(ptr);
  873. dprintf1("*ptr: %d\n", ptr_contents);
  874. expected_pk_fault(pkey);
  875. }
  876. void test_write_of_write_disabled_region(int *ptr, u16 pkey)
  877. {
  878. dprintf1("disabling write access to PKEY[%02d], doing write\n", pkey);
  879. pkey_write_deny(pkey);
  880. *ptr = __LINE__;
  881. expected_pk_fault(pkey);
  882. }
  883. void test_write_of_access_disabled_region(int *ptr, u16 pkey)
  884. {
  885. dprintf1("disabling access to PKEY[%02d], doing write\n", pkey);
  886. pkey_access_deny(pkey);
  887. *ptr = __LINE__;
  888. expected_pk_fault(pkey);
  889. }
  890. void test_kernel_write_of_access_disabled_region(int *ptr, u16 pkey)
  891. {
  892. int ret;
  893. int test_fd = get_test_read_fd();
  894. dprintf1("disabling access to PKEY[%02d], "
  895. "having kernel read() to buffer\n", pkey);
  896. pkey_access_deny(pkey);
  897. ret = read(test_fd, ptr, 1);
  898. dprintf1("read ret: %d\n", ret);
  899. pkey_assert(ret);
  900. }
  901. void test_kernel_write_of_write_disabled_region(int *ptr, u16 pkey)
  902. {
  903. int ret;
  904. int test_fd = get_test_read_fd();
  905. pkey_write_deny(pkey);
  906. ret = read(test_fd, ptr, 100);
  907. dprintf1("read ret: %d\n", ret);
  908. if (ret < 0 && (DEBUG_LEVEL > 0))
  909. perror("verbose read result (OK for this to be bad)");
  910. pkey_assert(ret);
  911. }
  912. void test_kernel_gup_of_access_disabled_region(int *ptr, u16 pkey)
  913. {
  914. int pipe_ret, vmsplice_ret;
  915. struct iovec iov;
  916. int pipe_fds[2];
  917. pipe_ret = pipe(pipe_fds);
  918. pkey_assert(pipe_ret == 0);
  919. dprintf1("disabling access to PKEY[%02d], "
  920. "having kernel vmsplice from buffer\n", pkey);
  921. pkey_access_deny(pkey);
  922. iov.iov_base = ptr;
  923. iov.iov_len = PAGE_SIZE;
  924. vmsplice_ret = vmsplice(pipe_fds[1], &iov, 1, SPLICE_F_GIFT);
  925. dprintf1("vmsplice() ret: %d\n", vmsplice_ret);
  926. pkey_assert(vmsplice_ret == -1);
  927. close(pipe_fds[0]);
  928. close(pipe_fds[1]);
  929. }
  930. void test_kernel_gup_write_to_write_disabled_region(int *ptr, u16 pkey)
  931. {
  932. int ignored = 0xdada;
  933. int futex_ret;
  934. int some_int = __LINE__;
  935. dprintf1("disabling write to PKEY[%02d], "
  936. "doing futex gunk in buffer\n", pkey);
  937. *ptr = some_int;
  938. pkey_write_deny(pkey);
  939. futex_ret = syscall(SYS_futex, ptr, FUTEX_WAIT, some_int-1, NULL,
  940. &ignored, ignored);
  941. if (DEBUG_LEVEL > 0)
  942. perror("futex");
  943. dprintf1("futex() ret: %d\n", futex_ret);
  944. }
  945. /* Assumes that all pkeys other than 'pkey' are unallocated */
  946. void test_pkey_syscalls_on_non_allocated_pkey(int *ptr, u16 pkey)
  947. {
  948. int err;
  949. int i;
  950. /* Note: 0 is the default pkey, so don't mess with it */
  951. for (i = 1; i < NR_PKEYS; i++) {
  952. if (pkey == i)
  953. continue;
  954. dprintf1("trying get/set/free to non-allocated pkey: %2d\n", i);
  955. err = sys_pkey_free(i);
  956. pkey_assert(err);
  957. err = sys_pkey_free(i);
  958. pkey_assert(err);
  959. err = sys_mprotect_pkey(ptr, PAGE_SIZE, PROT_READ, i);
  960. pkey_assert(err);
  961. }
  962. }
  963. /* Assumes that all pkeys other than 'pkey' are unallocated */
  964. void test_pkey_syscalls_bad_args(int *ptr, u16 pkey)
  965. {
  966. int err;
  967. int bad_pkey = NR_PKEYS+99;
  968. /* pass a known-invalid pkey in: */
  969. err = sys_mprotect_pkey(ptr, PAGE_SIZE, PROT_READ, bad_pkey);
  970. pkey_assert(err);
  971. }
  972. /* Assumes that all pkeys other than 'pkey' are unallocated */
  973. void test_pkey_alloc_exhaust(int *ptr, u16 pkey)
  974. {
  975. int err;
  976. int allocated_pkeys[NR_PKEYS] = {0};
  977. int nr_allocated_pkeys = 0;
  978. int i;
  979. for (i = 0; i < NR_PKEYS*2; i++) {
  980. int new_pkey;
  981. dprintf1("%s() alloc loop: %d\n", __func__, i);
  982. new_pkey = alloc_pkey();
  983. dprintf4("%s()::%d, err: %d pkru: 0x%x shadow: 0x%x\n", __func__,
  984. __LINE__, err, __rdpkru(), shadow_pkru);
  985. rdpkru(); /* for shadow checking */
  986. dprintf2("%s() errno: %d ENOSPC: %d\n", __func__, errno, ENOSPC);
  987. if ((new_pkey == -1) && (errno == ENOSPC)) {
  988. dprintf2("%s() failed to allocate pkey after %d tries\n",
  989. __func__, nr_allocated_pkeys);
  990. break;
  991. }
  992. pkey_assert(nr_allocated_pkeys < NR_PKEYS);
  993. allocated_pkeys[nr_allocated_pkeys++] = new_pkey;
  994. }
  995. dprintf3("%s()::%d\n", __func__, __LINE__);
  996. /*
  997. * ensure it did not reach the end of the loop without
  998. * failure:
  999. */
  1000. pkey_assert(i < NR_PKEYS*2);
  1001. /*
  1002. * There are 16 pkeys supported in hardware. Three are
  1003. * allocated by the time we get here:
  1004. * 1. The default key (0)
  1005. * 2. One possibly consumed by an execute-only mapping.
  1006. * 3. One allocated by the test code and passed in via
  1007. * 'pkey' to this function.
  1008. * Ensure that we can allocate at least another 13 (16-3).
  1009. */
  1010. pkey_assert(i >= NR_PKEYS-3);
  1011. for (i = 0; i < nr_allocated_pkeys; i++) {
  1012. err = sys_pkey_free(allocated_pkeys[i]);
  1013. pkey_assert(!err);
  1014. rdpkru(); /* for shadow checking */
  1015. }
  1016. }
  1017. /*
  1018. * pkey 0 is special. It is allocated by default, so you do not
  1019. * have to call pkey_alloc() to use it first. Make sure that it
  1020. * is usable.
  1021. */
  1022. void test_mprotect_with_pkey_0(int *ptr, u16 pkey)
  1023. {
  1024. long size;
  1025. int prot;
  1026. assert(pkey_last_malloc_record);
  1027. size = pkey_last_malloc_record->size;
  1028. /*
  1029. * This is a bit of a hack. But mprotect() requires
  1030. * huge-page-aligned sizes when operating on hugetlbfs.
  1031. * So, make sure that we use something that's a multiple
  1032. * of a huge page when we can.
  1033. */
  1034. if (size >= HPAGE_SIZE)
  1035. size = HPAGE_SIZE;
  1036. prot = pkey_last_malloc_record->prot;
  1037. /* Use pkey 0 */
  1038. mprotect_pkey(ptr, size, prot, 0);
  1039. /* Make sure that we can set it back to the original pkey. */
  1040. mprotect_pkey(ptr, size, prot, pkey);
  1041. }
  1042. void test_ptrace_of_child(int *ptr, u16 pkey)
  1043. {
  1044. __attribute__((__unused__)) int peek_result;
  1045. pid_t child_pid;
  1046. void *ignored = 0;
  1047. long ret;
  1048. int status;
  1049. /*
  1050. * This is the "control" for our little expermient. Make sure
  1051. * we can always access it when ptracing.
  1052. */
  1053. int *plain_ptr_unaligned = malloc(HPAGE_SIZE);
  1054. int *plain_ptr = ALIGN_PTR_UP(plain_ptr_unaligned, PAGE_SIZE);
  1055. /*
  1056. * Fork a child which is an exact copy of this process, of course.
  1057. * That means we can do all of our tests via ptrace() and then plain
  1058. * memory access and ensure they work differently.
  1059. */
  1060. child_pid = fork_lazy_child();
  1061. dprintf1("[%d] child pid: %d\n", getpid(), child_pid);
  1062. ret = ptrace(PTRACE_ATTACH, child_pid, ignored, ignored);
  1063. if (ret)
  1064. perror("attach");
  1065. dprintf1("[%d] attach ret: %ld %d\n", getpid(), ret, __LINE__);
  1066. pkey_assert(ret != -1);
  1067. ret = waitpid(child_pid, &status, WUNTRACED);
  1068. if ((ret != child_pid) || !(WIFSTOPPED(status))) {
  1069. fprintf(stderr, "weird waitpid result %ld stat %x\n",
  1070. ret, status);
  1071. pkey_assert(0);
  1072. }
  1073. dprintf2("waitpid ret: %ld\n", ret);
  1074. dprintf2("waitpid status: %d\n", status);
  1075. pkey_access_deny(pkey);
  1076. pkey_write_deny(pkey);
  1077. /* Write access, untested for now:
  1078. ret = ptrace(PTRACE_POKEDATA, child_pid, peek_at, data);
  1079. pkey_assert(ret != -1);
  1080. dprintf1("poke at %p: %ld\n", peek_at, ret);
  1081. */
  1082. /*
  1083. * Try to access the pkey-protected "ptr" via ptrace:
  1084. */
  1085. ret = ptrace(PTRACE_PEEKDATA, child_pid, ptr, ignored);
  1086. /* expect it to work, without an error: */
  1087. pkey_assert(ret != -1);
  1088. /* Now access from the current task, and expect an exception: */
  1089. peek_result = read_ptr(ptr);
  1090. expected_pk_fault(pkey);
  1091. /*
  1092. * Try to access the NON-pkey-protected "plain_ptr" via ptrace:
  1093. */
  1094. ret = ptrace(PTRACE_PEEKDATA, child_pid, plain_ptr, ignored);
  1095. /* expect it to work, without an error: */
  1096. pkey_assert(ret != -1);
  1097. /* Now access from the current task, and expect NO exception: */
  1098. peek_result = read_ptr(plain_ptr);
  1099. do_not_expect_pk_fault("read plain pointer after ptrace");
  1100. ret = ptrace(PTRACE_DETACH, child_pid, ignored, 0);
  1101. pkey_assert(ret != -1);
  1102. ret = kill(child_pid, SIGKILL);
  1103. pkey_assert(ret != -1);
  1104. wait(&status);
  1105. free(plain_ptr_unaligned);
  1106. }
  1107. void *get_pointer_to_instructions(void)
  1108. {
  1109. void *p1;
  1110. p1 = ALIGN_PTR_UP(&lots_o_noops_around_write, PAGE_SIZE);
  1111. dprintf3("&lots_o_noops: %p\n", &lots_o_noops_around_write);
  1112. /* lots_o_noops_around_write should be page-aligned already */
  1113. assert(p1 == &lots_o_noops_around_write);
  1114. /* Point 'p1' at the *second* page of the function: */
  1115. p1 += PAGE_SIZE;
  1116. /*
  1117. * Try to ensure we fault this in on next touch to ensure
  1118. * we get an instruction fault as opposed to a data one
  1119. */
  1120. madvise(p1, PAGE_SIZE, MADV_DONTNEED);
  1121. return p1;
  1122. }
  1123. void test_executing_on_unreadable_memory(int *ptr, u16 pkey)
  1124. {
  1125. void *p1;
  1126. int scratch;
  1127. int ptr_contents;
  1128. int ret;
  1129. p1 = get_pointer_to_instructions();
  1130. lots_o_noops_around_write(&scratch);
  1131. ptr_contents = read_ptr(p1);
  1132. dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents);
  1133. ret = mprotect_pkey(p1, PAGE_SIZE, PROT_EXEC, (u64)pkey);
  1134. pkey_assert(!ret);
  1135. pkey_access_deny(pkey);
  1136. dprintf2("pkru: %x\n", rdpkru());
  1137. /*
  1138. * Make sure this is an *instruction* fault
  1139. */
  1140. madvise(p1, PAGE_SIZE, MADV_DONTNEED);
  1141. lots_o_noops_around_write(&scratch);
  1142. do_not_expect_pk_fault("executing on PROT_EXEC memory");
  1143. ptr_contents = read_ptr(p1);
  1144. dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents);
  1145. expected_pk_fault(pkey);
  1146. }
  1147. void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey)
  1148. {
  1149. void *p1;
  1150. int scratch;
  1151. int ptr_contents;
  1152. int ret;
  1153. dprintf1("%s() start\n", __func__);
  1154. p1 = get_pointer_to_instructions();
  1155. lots_o_noops_around_write(&scratch);
  1156. ptr_contents = read_ptr(p1);
  1157. dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents);
  1158. /* Use a *normal* mprotect(), not mprotect_pkey(): */
  1159. ret = mprotect(p1, PAGE_SIZE, PROT_EXEC);
  1160. pkey_assert(!ret);
  1161. dprintf2("pkru: %x\n", rdpkru());
  1162. /* Make sure this is an *instruction* fault */
  1163. madvise(p1, PAGE_SIZE, MADV_DONTNEED);
  1164. lots_o_noops_around_write(&scratch);
  1165. do_not_expect_pk_fault("executing on PROT_EXEC memory");
  1166. ptr_contents = read_ptr(p1);
  1167. dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents);
  1168. expected_pk_fault(UNKNOWN_PKEY);
  1169. /*
  1170. * Put the memory back to non-PROT_EXEC. Should clear the
  1171. * exec-only pkey off the VMA and allow it to be readable
  1172. * again. Go to PROT_NONE first to check for a kernel bug
  1173. * that did not clear the pkey when doing PROT_NONE.
  1174. */
  1175. ret = mprotect(p1, PAGE_SIZE, PROT_NONE);
  1176. pkey_assert(!ret);
  1177. ret = mprotect(p1, PAGE_SIZE, PROT_READ|PROT_EXEC);
  1178. pkey_assert(!ret);
  1179. ptr_contents = read_ptr(p1);
  1180. do_not_expect_pk_fault("plain read on recently PROT_EXEC area");
  1181. }
  1182. void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey)
  1183. {
  1184. int size = PAGE_SIZE;
  1185. int sret;
  1186. if (cpu_has_pku()) {
  1187. dprintf1("SKIP: %s: no CPU support\n", __func__);
  1188. return;
  1189. }
  1190. sret = syscall(SYS_mprotect_key, ptr, size, PROT_READ, pkey);
  1191. pkey_assert(sret < 0);
  1192. }
  1193. void (*pkey_tests[])(int *ptr, u16 pkey) = {
  1194. test_read_of_write_disabled_region,
  1195. test_read_of_access_disabled_region,
  1196. test_write_of_write_disabled_region,
  1197. test_write_of_access_disabled_region,
  1198. test_kernel_write_of_access_disabled_region,
  1199. test_kernel_write_of_write_disabled_region,
  1200. test_kernel_gup_of_access_disabled_region,
  1201. test_kernel_gup_write_to_write_disabled_region,
  1202. test_executing_on_unreadable_memory,
  1203. test_implicit_mprotect_exec_only_memory,
  1204. test_mprotect_with_pkey_0,
  1205. test_ptrace_of_child,
  1206. test_pkey_syscalls_on_non_allocated_pkey,
  1207. test_pkey_syscalls_bad_args,
  1208. test_pkey_alloc_exhaust,
  1209. };
  1210. void run_tests_once(void)
  1211. {
  1212. int *ptr;
  1213. int prot = PROT_READ|PROT_WRITE;
  1214. for (test_nr = 0; test_nr < ARRAY_SIZE(pkey_tests); test_nr++) {
  1215. int pkey;
  1216. int orig_pkru_faults = pkru_faults;
  1217. dprintf1("======================\n");
  1218. dprintf1("test %d preparing...\n", test_nr);
  1219. tracing_on();
  1220. pkey = alloc_random_pkey();
  1221. dprintf1("test %d starting with pkey: %d\n", test_nr, pkey);
  1222. ptr = malloc_pkey(PAGE_SIZE, prot, pkey);
  1223. dprintf1("test %d starting...\n", test_nr);
  1224. pkey_tests[test_nr](ptr, pkey);
  1225. dprintf1("freeing test memory: %p\n", ptr);
  1226. free_pkey_malloc(ptr);
  1227. sys_pkey_free(pkey);
  1228. dprintf1("pkru_faults: %d\n", pkru_faults);
  1229. dprintf1("orig_pkru_faults: %d\n", orig_pkru_faults);
  1230. tracing_off();
  1231. close_test_fds();
  1232. printf("test %2d PASSED (iteration %d)\n", test_nr, iteration_nr);
  1233. dprintf1("======================\n\n");
  1234. }
  1235. iteration_nr++;
  1236. }
  1237. void pkey_setup_shadow(void)
  1238. {
  1239. shadow_pkru = __rdpkru();
  1240. }
  1241. int main(void)
  1242. {
  1243. int nr_iterations = 22;
  1244. setup_handlers();
  1245. printf("has pku: %d\n", cpu_has_pku());
  1246. if (!cpu_has_pku()) {
  1247. int size = PAGE_SIZE;
  1248. int *ptr;
  1249. printf("running PKEY tests for unsupported CPU/OS\n");
  1250. ptr = mmap(NULL, size, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
  1251. assert(ptr != (void *)-1);
  1252. test_mprotect_pkey_on_unsupported_cpu(ptr, 1);
  1253. exit(0);
  1254. }
  1255. pkey_setup_shadow();
  1256. printf("startup pkru: %x\n", rdpkru());
  1257. setup_hugetlbfs();
  1258. while (nr_iterations-- > 0)
  1259. run_tests_once();
  1260. printf("done (all tests OK)\n");
  1261. return 0;
  1262. }