vc4_validate_shaders.c 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944
  1. /*
  2. * Copyright © 2014 Broadcom
  3. *
  4. * Permission is hereby granted, free of charge, to any person obtaining a
  5. * copy of this software and associated documentation files (the "Software"),
  6. * to deal in the Software without restriction, including without limitation
  7. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8. * and/or sell copies of the Software, and to permit persons to whom the
  9. * Software is furnished to do so, subject to the following conditions:
  10. *
  11. * The above copyright notice and this permission notice (including the next
  12. * paragraph) shall be included in all copies or substantial portions of the
  13. * Software.
  14. *
  15. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  18. * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20. * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21. * IN THE SOFTWARE.
  22. */
  23. /**
  24. * DOC: Shader validator for VC4.
  25. *
  26. * The VC4 has no IOMMU between it and system memory, so a user with
  27. * access to execute shaders could escalate privilege by overwriting
  28. * system memory (using the VPM write address register in the
  29. * general-purpose DMA mode) or reading system memory it shouldn't
  30. * (reading it as a texture, or uniform data, or vertex data).
  31. *
  32. * This walks over a shader BO, ensuring that its accesses are
  33. * appropriately bounded, and recording how many texture accesses are
  34. * made and where so that we can do relocations for them in the
  35. * uniform stream.
  36. */
  37. #include "vc4_drv.h"
  38. #include "vc4_qpu_defines.h"
  39. #define LIVE_REG_COUNT (32 + 32 + 4)
  40. struct vc4_shader_validation_state {
  41. /* Current IP being validated. */
  42. uint32_t ip;
  43. /* IP at the end of the BO, do not read shader[max_ip] */
  44. uint32_t max_ip;
  45. uint64_t *shader;
  46. struct vc4_texture_sample_info tmu_setup[2];
  47. int tmu_write_count[2];
  48. /* For registers that were last written to by a MIN instruction with
  49. * one argument being a uniform, the address of the uniform.
  50. * Otherwise, ~0.
  51. *
  52. * This is used for the validation of direct address memory reads.
  53. */
  54. uint32_t live_min_clamp_offsets[LIVE_REG_COUNT];
  55. bool live_max_clamp_regs[LIVE_REG_COUNT];
  56. uint32_t live_immediates[LIVE_REG_COUNT];
  57. /* Bitfield of which IPs are used as branch targets.
  58. *
  59. * Used for validation that the uniform stream is updated at the right
  60. * points and clearing the texturing/clamping state.
  61. */
  62. unsigned long *branch_targets;
  63. /* Set when entering a basic block, and cleared when the uniform
  64. * address update is found. This is used to make sure that we don't
  65. * read uniforms when the address is undefined.
  66. */
  67. bool needs_uniform_address_update;
  68. /* Set when we find a backwards branch. If the branch is backwards,
  69. * the taraget is probably doing an address reset to read uniforms,
  70. * and so we need to be sure that a uniforms address is present in the
  71. * stream, even if the shader didn't need to read uniforms in later
  72. * basic blocks.
  73. */
  74. bool needs_uniform_address_for_loop;
  75. /* Set when we find an instruction writing the top half of the
  76. * register files. If we allowed writing the unusable regs in
  77. * a threaded shader, then the other shader running on our
  78. * QPU's clamp validation would be invalid.
  79. */
  80. bool all_registers_used;
  81. };
  82. static uint32_t
  83. waddr_to_live_reg_index(uint32_t waddr, bool is_b)
  84. {
  85. if (waddr < 32) {
  86. if (is_b)
  87. return 32 + waddr;
  88. else
  89. return waddr;
  90. } else if (waddr <= QPU_W_ACC3) {
  91. return 64 + waddr - QPU_W_ACC0;
  92. } else {
  93. return ~0;
  94. }
  95. }
  96. static uint32_t
  97. raddr_add_a_to_live_reg_index(uint64_t inst)
  98. {
  99. uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
  100. uint32_t add_a = QPU_GET_FIELD(inst, QPU_ADD_A);
  101. uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
  102. uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
  103. if (add_a == QPU_MUX_A)
  104. return raddr_a;
  105. else if (add_a == QPU_MUX_B && sig != QPU_SIG_SMALL_IMM)
  106. return 32 + raddr_b;
  107. else if (add_a <= QPU_MUX_R3)
  108. return 64 + add_a;
  109. else
  110. return ~0;
  111. }
  112. static bool
  113. live_reg_is_upper_half(uint32_t lri)
  114. {
  115. return (lri >= 16 && lri < 32) ||
  116. (lri >= 32 + 16 && lri < 32 + 32);
  117. }
  118. static bool
  119. is_tmu_submit(uint32_t waddr)
  120. {
  121. return (waddr == QPU_W_TMU0_S ||
  122. waddr == QPU_W_TMU1_S);
  123. }
  124. static bool
  125. is_tmu_write(uint32_t waddr)
  126. {
  127. return (waddr >= QPU_W_TMU0_S &&
  128. waddr <= QPU_W_TMU1_B);
  129. }
  130. static bool
  131. record_texture_sample(struct vc4_validated_shader_info *validated_shader,
  132. struct vc4_shader_validation_state *validation_state,
  133. int tmu)
  134. {
  135. uint32_t s = validated_shader->num_texture_samples;
  136. int i;
  137. struct vc4_texture_sample_info *temp_samples;
  138. temp_samples = krealloc(validated_shader->texture_samples,
  139. (s + 1) * sizeof(*temp_samples),
  140. GFP_KERNEL);
  141. if (!temp_samples)
  142. return false;
  143. memcpy(&temp_samples[s],
  144. &validation_state->tmu_setup[tmu],
  145. sizeof(*temp_samples));
  146. validated_shader->num_texture_samples = s + 1;
  147. validated_shader->texture_samples = temp_samples;
  148. for (i = 0; i < 4; i++)
  149. validation_state->tmu_setup[tmu].p_offset[i] = ~0;
  150. return true;
  151. }
  152. static bool
  153. check_tmu_write(struct vc4_validated_shader_info *validated_shader,
  154. struct vc4_shader_validation_state *validation_state,
  155. bool is_mul)
  156. {
  157. uint64_t inst = validation_state->shader[validation_state->ip];
  158. uint32_t waddr = (is_mul ?
  159. QPU_GET_FIELD(inst, QPU_WADDR_MUL) :
  160. QPU_GET_FIELD(inst, QPU_WADDR_ADD));
  161. uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
  162. uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
  163. int tmu = waddr > QPU_W_TMU0_B;
  164. bool submit = is_tmu_submit(waddr);
  165. bool is_direct = submit && validation_state->tmu_write_count[tmu] == 0;
  166. uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
  167. if (is_direct) {
  168. uint32_t add_b = QPU_GET_FIELD(inst, QPU_ADD_B);
  169. uint32_t clamp_reg, clamp_offset;
  170. if (sig == QPU_SIG_SMALL_IMM) {
  171. DRM_ERROR("direct TMU read used small immediate\n");
  172. return false;
  173. }
  174. /* Make sure that this texture load is an add of the base
  175. * address of the UBO to a clamped offset within the UBO.
  176. */
  177. if (is_mul ||
  178. QPU_GET_FIELD(inst, QPU_OP_ADD) != QPU_A_ADD) {
  179. DRM_ERROR("direct TMU load wasn't an add\n");
  180. return false;
  181. }
  182. /* We assert that the clamped address is the first
  183. * argument, and the UBO base address is the second argument.
  184. * This is arbitrary, but simpler than supporting flipping the
  185. * two either way.
  186. */
  187. clamp_reg = raddr_add_a_to_live_reg_index(inst);
  188. if (clamp_reg == ~0) {
  189. DRM_ERROR("direct TMU load wasn't clamped\n");
  190. return false;
  191. }
  192. clamp_offset = validation_state->live_min_clamp_offsets[clamp_reg];
  193. if (clamp_offset == ~0) {
  194. DRM_ERROR("direct TMU load wasn't clamped\n");
  195. return false;
  196. }
  197. /* Store the clamp value's offset in p1 (see reloc_tex() in
  198. * vc4_validate.c).
  199. */
  200. validation_state->tmu_setup[tmu].p_offset[1] =
  201. clamp_offset;
  202. if (!(add_b == QPU_MUX_A && raddr_a == QPU_R_UNIF) &&
  203. !(add_b == QPU_MUX_B && raddr_b == QPU_R_UNIF)) {
  204. DRM_ERROR("direct TMU load didn't add to a uniform\n");
  205. return false;
  206. }
  207. validation_state->tmu_setup[tmu].is_direct = true;
  208. } else {
  209. if (raddr_a == QPU_R_UNIF || (sig != QPU_SIG_SMALL_IMM &&
  210. raddr_b == QPU_R_UNIF)) {
  211. DRM_ERROR("uniform read in the same instruction as "
  212. "texture setup.\n");
  213. return false;
  214. }
  215. }
  216. if (validation_state->tmu_write_count[tmu] >= 4) {
  217. DRM_ERROR("TMU%d got too many parameters before dispatch\n",
  218. tmu);
  219. return false;
  220. }
  221. validation_state->tmu_setup[tmu].p_offset[validation_state->tmu_write_count[tmu]] =
  222. validated_shader->uniforms_size;
  223. validation_state->tmu_write_count[tmu]++;
  224. /* Since direct uses a RADDR uniform reference, it will get counted in
  225. * check_instruction_reads()
  226. */
  227. if (!is_direct) {
  228. if (validation_state->needs_uniform_address_update) {
  229. DRM_ERROR("Texturing with undefined uniform address\n");
  230. return false;
  231. }
  232. validated_shader->uniforms_size += 4;
  233. }
  234. if (submit) {
  235. if (!record_texture_sample(validated_shader,
  236. validation_state, tmu)) {
  237. return false;
  238. }
  239. validation_state->tmu_write_count[tmu] = 0;
  240. }
  241. return true;
  242. }
  243. static bool require_uniform_address_uniform(struct vc4_validated_shader_info *validated_shader)
  244. {
  245. uint32_t o = validated_shader->num_uniform_addr_offsets;
  246. uint32_t num_uniforms = validated_shader->uniforms_size / 4;
  247. validated_shader->uniform_addr_offsets =
  248. krealloc(validated_shader->uniform_addr_offsets,
  249. (o + 1) *
  250. sizeof(*validated_shader->uniform_addr_offsets),
  251. GFP_KERNEL);
  252. if (!validated_shader->uniform_addr_offsets)
  253. return false;
  254. validated_shader->uniform_addr_offsets[o] = num_uniforms;
  255. validated_shader->num_uniform_addr_offsets++;
  256. return true;
  257. }
  258. static bool
  259. validate_uniform_address_write(struct vc4_validated_shader_info *validated_shader,
  260. struct vc4_shader_validation_state *validation_state,
  261. bool is_mul)
  262. {
  263. uint64_t inst = validation_state->shader[validation_state->ip];
  264. u32 add_b = QPU_GET_FIELD(inst, QPU_ADD_B);
  265. u32 raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
  266. u32 raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
  267. u32 add_lri = raddr_add_a_to_live_reg_index(inst);
  268. /* We want our reset to be pointing at whatever uniform follows the
  269. * uniforms base address.
  270. */
  271. u32 expected_offset = validated_shader->uniforms_size + 4;
  272. /* We only support absolute uniform address changes, and we
  273. * require that they be in the current basic block before any
  274. * of its uniform reads.
  275. *
  276. * One could potentially emit more efficient QPU code, by
  277. * noticing that (say) an if statement does uniform control
  278. * flow for all threads and that the if reads the same number
  279. * of uniforms on each side. However, this scheme is easy to
  280. * validate so it's all we allow for now.
  281. */
  282. switch (QPU_GET_FIELD(inst, QPU_SIG)) {
  283. case QPU_SIG_NONE:
  284. case QPU_SIG_SCOREBOARD_UNLOCK:
  285. case QPU_SIG_COLOR_LOAD:
  286. case QPU_SIG_LOAD_TMU0:
  287. case QPU_SIG_LOAD_TMU1:
  288. break;
  289. default:
  290. DRM_ERROR("uniforms address change must be "
  291. "normal math\n");
  292. return false;
  293. }
  294. if (is_mul || QPU_GET_FIELD(inst, QPU_OP_ADD) != QPU_A_ADD) {
  295. DRM_ERROR("Uniform address reset must be an ADD.\n");
  296. return false;
  297. }
  298. if (QPU_GET_FIELD(inst, QPU_COND_ADD) != QPU_COND_ALWAYS) {
  299. DRM_ERROR("Uniform address reset must be unconditional.\n");
  300. return false;
  301. }
  302. if (QPU_GET_FIELD(inst, QPU_PACK) != QPU_PACK_A_NOP &&
  303. !(inst & QPU_PM)) {
  304. DRM_ERROR("No packing allowed on uniforms reset\n");
  305. return false;
  306. }
  307. if (add_lri == -1) {
  308. DRM_ERROR("First argument of uniform address write must be "
  309. "an immediate value.\n");
  310. return false;
  311. }
  312. if (validation_state->live_immediates[add_lri] != expected_offset) {
  313. DRM_ERROR("Resetting uniforms with offset %db instead of %db\n",
  314. validation_state->live_immediates[add_lri],
  315. expected_offset);
  316. return false;
  317. }
  318. if (!(add_b == QPU_MUX_A && raddr_a == QPU_R_UNIF) &&
  319. !(add_b == QPU_MUX_B && raddr_b == QPU_R_UNIF)) {
  320. DRM_ERROR("Second argument of uniform address write must be "
  321. "a uniform.\n");
  322. return false;
  323. }
  324. validation_state->needs_uniform_address_update = false;
  325. validation_state->needs_uniform_address_for_loop = false;
  326. return require_uniform_address_uniform(validated_shader);
  327. }
  328. static bool
  329. check_reg_write(struct vc4_validated_shader_info *validated_shader,
  330. struct vc4_shader_validation_state *validation_state,
  331. bool is_mul)
  332. {
  333. uint64_t inst = validation_state->shader[validation_state->ip];
  334. uint32_t waddr = (is_mul ?
  335. QPU_GET_FIELD(inst, QPU_WADDR_MUL) :
  336. QPU_GET_FIELD(inst, QPU_WADDR_ADD));
  337. uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
  338. bool ws = inst & QPU_WS;
  339. bool is_b = is_mul ^ ws;
  340. u32 lri = waddr_to_live_reg_index(waddr, is_b);
  341. if (lri != -1) {
  342. uint32_t cond_add = QPU_GET_FIELD(inst, QPU_COND_ADD);
  343. uint32_t cond_mul = QPU_GET_FIELD(inst, QPU_COND_MUL);
  344. if (sig == QPU_SIG_LOAD_IMM &&
  345. QPU_GET_FIELD(inst, QPU_PACK) == QPU_PACK_A_NOP &&
  346. ((is_mul && cond_mul == QPU_COND_ALWAYS) ||
  347. (!is_mul && cond_add == QPU_COND_ALWAYS))) {
  348. validation_state->live_immediates[lri] =
  349. QPU_GET_FIELD(inst, QPU_LOAD_IMM);
  350. } else {
  351. validation_state->live_immediates[lri] = ~0;
  352. }
  353. if (live_reg_is_upper_half(lri))
  354. validation_state->all_registers_used = true;
  355. }
  356. switch (waddr) {
  357. case QPU_W_UNIFORMS_ADDRESS:
  358. if (is_b) {
  359. DRM_ERROR("relative uniforms address change "
  360. "unsupported\n");
  361. return false;
  362. }
  363. return validate_uniform_address_write(validated_shader,
  364. validation_state,
  365. is_mul);
  366. case QPU_W_TLB_COLOR_MS:
  367. case QPU_W_TLB_COLOR_ALL:
  368. case QPU_W_TLB_Z:
  369. /* These only interact with the tile buffer, not main memory,
  370. * so they're safe.
  371. */
  372. return true;
  373. case QPU_W_TMU0_S:
  374. case QPU_W_TMU0_T:
  375. case QPU_W_TMU0_R:
  376. case QPU_W_TMU0_B:
  377. case QPU_W_TMU1_S:
  378. case QPU_W_TMU1_T:
  379. case QPU_W_TMU1_R:
  380. case QPU_W_TMU1_B:
  381. return check_tmu_write(validated_shader, validation_state,
  382. is_mul);
  383. case QPU_W_HOST_INT:
  384. case QPU_W_TMU_NOSWAP:
  385. case QPU_W_TLB_ALPHA_MASK:
  386. case QPU_W_MUTEX_RELEASE:
  387. /* XXX: I haven't thought about these, so don't support them
  388. * for now.
  389. */
  390. DRM_ERROR("Unsupported waddr %d\n", waddr);
  391. return false;
  392. case QPU_W_VPM_ADDR:
  393. DRM_ERROR("General VPM DMA unsupported\n");
  394. return false;
  395. case QPU_W_VPM:
  396. case QPU_W_VPMVCD_SETUP:
  397. /* We allow VPM setup in general, even including VPM DMA
  398. * configuration setup, because the (unsafe) DMA can only be
  399. * triggered by QPU_W_VPM_ADDR writes.
  400. */
  401. return true;
  402. case QPU_W_TLB_STENCIL_SETUP:
  403. return true;
  404. }
  405. return true;
  406. }
  407. static void
  408. track_live_clamps(struct vc4_validated_shader_info *validated_shader,
  409. struct vc4_shader_validation_state *validation_state)
  410. {
  411. uint64_t inst = validation_state->shader[validation_state->ip];
  412. uint32_t op_add = QPU_GET_FIELD(inst, QPU_OP_ADD);
  413. uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
  414. uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL);
  415. uint32_t cond_add = QPU_GET_FIELD(inst, QPU_COND_ADD);
  416. uint32_t add_a = QPU_GET_FIELD(inst, QPU_ADD_A);
  417. uint32_t add_b = QPU_GET_FIELD(inst, QPU_ADD_B);
  418. uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
  419. uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
  420. uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
  421. bool ws = inst & QPU_WS;
  422. uint32_t lri_add_a, lri_add, lri_mul;
  423. bool add_a_is_min_0;
  424. /* Check whether OP_ADD's A argumennt comes from a live MAX(x, 0),
  425. * before we clear previous live state.
  426. */
  427. lri_add_a = raddr_add_a_to_live_reg_index(inst);
  428. add_a_is_min_0 = (lri_add_a != ~0 &&
  429. validation_state->live_max_clamp_regs[lri_add_a]);
  430. /* Clear live state for registers written by our instruction. */
  431. lri_add = waddr_to_live_reg_index(waddr_add, ws);
  432. lri_mul = waddr_to_live_reg_index(waddr_mul, !ws);
  433. if (lri_mul != ~0) {
  434. validation_state->live_max_clamp_regs[lri_mul] = false;
  435. validation_state->live_min_clamp_offsets[lri_mul] = ~0;
  436. }
  437. if (lri_add != ~0) {
  438. validation_state->live_max_clamp_regs[lri_add] = false;
  439. validation_state->live_min_clamp_offsets[lri_add] = ~0;
  440. } else {
  441. /* Nothing further to do for live tracking, since only ADDs
  442. * generate new live clamp registers.
  443. */
  444. return;
  445. }
  446. /* Now, handle remaining live clamp tracking for the ADD operation. */
  447. if (cond_add != QPU_COND_ALWAYS)
  448. return;
  449. if (op_add == QPU_A_MAX) {
  450. /* Track live clamps of a value to a minimum of 0 (in either
  451. * arg).
  452. */
  453. if (sig != QPU_SIG_SMALL_IMM || raddr_b != 0 ||
  454. (add_a != QPU_MUX_B && add_b != QPU_MUX_B)) {
  455. return;
  456. }
  457. validation_state->live_max_clamp_regs[lri_add] = true;
  458. } else if (op_add == QPU_A_MIN) {
  459. /* Track live clamps of a value clamped to a minimum of 0 and
  460. * a maximum of some uniform's offset.
  461. */
  462. if (!add_a_is_min_0)
  463. return;
  464. if (!(add_b == QPU_MUX_A && raddr_a == QPU_R_UNIF) &&
  465. !(add_b == QPU_MUX_B && raddr_b == QPU_R_UNIF &&
  466. sig != QPU_SIG_SMALL_IMM)) {
  467. return;
  468. }
  469. validation_state->live_min_clamp_offsets[lri_add] =
  470. validated_shader->uniforms_size;
  471. }
  472. }
  473. static bool
  474. check_instruction_writes(struct vc4_validated_shader_info *validated_shader,
  475. struct vc4_shader_validation_state *validation_state)
  476. {
  477. uint64_t inst = validation_state->shader[validation_state->ip];
  478. uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
  479. uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL);
  480. bool ok;
  481. if (is_tmu_write(waddr_add) && is_tmu_write(waddr_mul)) {
  482. DRM_ERROR("ADD and MUL both set up textures\n");
  483. return false;
  484. }
  485. ok = (check_reg_write(validated_shader, validation_state, false) &&
  486. check_reg_write(validated_shader, validation_state, true));
  487. track_live_clamps(validated_shader, validation_state);
  488. return ok;
  489. }
  490. static bool
  491. check_branch(uint64_t inst,
  492. struct vc4_validated_shader_info *validated_shader,
  493. struct vc4_shader_validation_state *validation_state,
  494. int ip)
  495. {
  496. int32_t branch_imm = QPU_GET_FIELD(inst, QPU_BRANCH_TARGET);
  497. uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
  498. uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL);
  499. if ((int)branch_imm < 0)
  500. validation_state->needs_uniform_address_for_loop = true;
  501. /* We don't want to have to worry about validation of this, and
  502. * there's no need for it.
  503. */
  504. if (waddr_add != QPU_W_NOP || waddr_mul != QPU_W_NOP) {
  505. DRM_ERROR("branch instruction at %d wrote a register.\n",
  506. validation_state->ip);
  507. return false;
  508. }
  509. return true;
  510. }
  511. static bool
  512. check_instruction_reads(struct vc4_validated_shader_info *validated_shader,
  513. struct vc4_shader_validation_state *validation_state)
  514. {
  515. uint64_t inst = validation_state->shader[validation_state->ip];
  516. uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
  517. uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
  518. uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
  519. if (raddr_a == QPU_R_UNIF ||
  520. (raddr_b == QPU_R_UNIF && sig != QPU_SIG_SMALL_IMM)) {
  521. /* This can't overflow the uint32_t, because we're reading 8
  522. * bytes of instruction to increment by 4 here, so we'd
  523. * already be OOM.
  524. */
  525. validated_shader->uniforms_size += 4;
  526. if (validation_state->needs_uniform_address_update) {
  527. DRM_ERROR("Uniform read with undefined uniform "
  528. "address\n");
  529. return false;
  530. }
  531. }
  532. if ((raddr_a >= 16 && raddr_a < 32) ||
  533. (raddr_b >= 16 && raddr_b < 32 && sig != QPU_SIG_SMALL_IMM)) {
  534. validation_state->all_registers_used = true;
  535. }
  536. return true;
  537. }
  538. /* Make sure that all branches are absolute and point within the shader, and
  539. * note their targets for later.
  540. */
  541. static bool
  542. vc4_validate_branches(struct vc4_shader_validation_state *validation_state)
  543. {
  544. uint32_t max_branch_target = 0;
  545. int ip;
  546. int last_branch = -2;
  547. for (ip = 0; ip < validation_state->max_ip; ip++) {
  548. uint64_t inst = validation_state->shader[ip];
  549. int32_t branch_imm = QPU_GET_FIELD(inst, QPU_BRANCH_TARGET);
  550. uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
  551. uint32_t after_delay_ip = ip + 4;
  552. uint32_t branch_target_ip;
  553. if (sig == QPU_SIG_PROG_END) {
  554. /* There are two delay slots after program end is
  555. * signaled that are still executed, then we're
  556. * finished. validation_state->max_ip is the
  557. * instruction after the last valid instruction in the
  558. * program.
  559. */
  560. validation_state->max_ip = ip + 3;
  561. continue;
  562. }
  563. if (sig != QPU_SIG_BRANCH)
  564. continue;
  565. if (ip - last_branch < 4) {
  566. DRM_ERROR("Branch at %d during delay slots\n", ip);
  567. return false;
  568. }
  569. last_branch = ip;
  570. if (inst & QPU_BRANCH_REG) {
  571. DRM_ERROR("branching from register relative "
  572. "not supported\n");
  573. return false;
  574. }
  575. if (!(inst & QPU_BRANCH_REL)) {
  576. DRM_ERROR("relative branching required\n");
  577. return false;
  578. }
  579. /* The actual branch target is the instruction after the delay
  580. * slots, plus whatever byte offset is in the low 32 bits of
  581. * the instruction. Make sure we're not branching beyond the
  582. * end of the shader object.
  583. */
  584. if (branch_imm % sizeof(inst) != 0) {
  585. DRM_ERROR("branch target not aligned\n");
  586. return false;
  587. }
  588. branch_target_ip = after_delay_ip + (branch_imm >> 3);
  589. if (branch_target_ip >= validation_state->max_ip) {
  590. DRM_ERROR("Branch at %d outside of shader (ip %d/%d)\n",
  591. ip, branch_target_ip,
  592. validation_state->max_ip);
  593. return false;
  594. }
  595. set_bit(branch_target_ip, validation_state->branch_targets);
  596. /* Make sure that the non-branching path is also not outside
  597. * the shader.
  598. */
  599. if (after_delay_ip >= validation_state->max_ip) {
  600. DRM_ERROR("Branch at %d continues past shader end "
  601. "(%d/%d)\n",
  602. ip, after_delay_ip, validation_state->max_ip);
  603. return false;
  604. }
  605. set_bit(after_delay_ip, validation_state->branch_targets);
  606. max_branch_target = max(max_branch_target, after_delay_ip);
  607. }
  608. if (max_branch_target > validation_state->max_ip - 3) {
  609. DRM_ERROR("Branch landed after QPU_SIG_PROG_END");
  610. return false;
  611. }
  612. return true;
  613. }
  614. /* Resets any known state for the shader, used when we may be branched to from
  615. * multiple locations in the program (or at shader start).
  616. */
  617. static void
  618. reset_validation_state(struct vc4_shader_validation_state *validation_state)
  619. {
  620. int i;
  621. for (i = 0; i < 8; i++)
  622. validation_state->tmu_setup[i / 4].p_offset[i % 4] = ~0;
  623. for (i = 0; i < LIVE_REG_COUNT; i++) {
  624. validation_state->live_min_clamp_offsets[i] = ~0;
  625. validation_state->live_max_clamp_regs[i] = false;
  626. validation_state->live_immediates[i] = ~0;
  627. }
  628. }
  629. static bool
  630. texturing_in_progress(struct vc4_shader_validation_state *validation_state)
  631. {
  632. return (validation_state->tmu_write_count[0] != 0 ||
  633. validation_state->tmu_write_count[1] != 0);
  634. }
  635. static bool
  636. vc4_handle_branch_target(struct vc4_shader_validation_state *validation_state)
  637. {
  638. uint32_t ip = validation_state->ip;
  639. if (!test_bit(ip, validation_state->branch_targets))
  640. return true;
  641. if (texturing_in_progress(validation_state)) {
  642. DRM_ERROR("Branch target landed during TMU setup\n");
  643. return false;
  644. }
  645. /* Reset our live values tracking, since this instruction may have
  646. * multiple predecessors.
  647. *
  648. * One could potentially do analysis to determine that, for
  649. * example, all predecessors have a live max clamp in the same
  650. * register, but we don't bother with that.
  651. */
  652. reset_validation_state(validation_state);
  653. /* Since we've entered a basic block from potentially multiple
  654. * predecessors, we need the uniforms address to be updated before any
  655. * unforms are read. We require that after any branch point, the next
  656. * uniform to be loaded is a uniform address offset. That uniform's
  657. * offset will be marked by the uniform address register write
  658. * validation, or a one-off the end-of-program check.
  659. */
  660. validation_state->needs_uniform_address_update = true;
  661. return true;
  662. }
  663. struct vc4_validated_shader_info *
  664. vc4_validate_shader(struct drm_gem_cma_object *shader_obj)
  665. {
  666. bool found_shader_end = false;
  667. int shader_end_ip = 0;
  668. uint32_t last_thread_switch_ip = -3;
  669. uint32_t ip;
  670. struct vc4_validated_shader_info *validated_shader = NULL;
  671. struct vc4_shader_validation_state validation_state;
  672. memset(&validation_state, 0, sizeof(validation_state));
  673. validation_state.shader = shader_obj->vaddr;
  674. validation_state.max_ip = shader_obj->base.size / sizeof(uint64_t);
  675. reset_validation_state(&validation_state);
  676. validation_state.branch_targets =
  677. kcalloc(BITS_TO_LONGS(validation_state.max_ip),
  678. sizeof(unsigned long), GFP_KERNEL);
  679. if (!validation_state.branch_targets)
  680. goto fail;
  681. validated_shader = kcalloc(1, sizeof(*validated_shader), GFP_KERNEL);
  682. if (!validated_shader)
  683. goto fail;
  684. if (!vc4_validate_branches(&validation_state))
  685. goto fail;
  686. for (ip = 0; ip < validation_state.max_ip; ip++) {
  687. uint64_t inst = validation_state.shader[ip];
  688. uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
  689. validation_state.ip = ip;
  690. if (!vc4_handle_branch_target(&validation_state))
  691. goto fail;
  692. if (ip == last_thread_switch_ip + 3) {
  693. /* Reset r0-r3 live clamp data */
  694. int i;
  695. for (i = 64; i < LIVE_REG_COUNT; i++) {
  696. validation_state.live_min_clamp_offsets[i] = ~0;
  697. validation_state.live_max_clamp_regs[i] = false;
  698. validation_state.live_immediates[i] = ~0;
  699. }
  700. }
  701. switch (sig) {
  702. case QPU_SIG_NONE:
  703. case QPU_SIG_WAIT_FOR_SCOREBOARD:
  704. case QPU_SIG_SCOREBOARD_UNLOCK:
  705. case QPU_SIG_COLOR_LOAD:
  706. case QPU_SIG_LOAD_TMU0:
  707. case QPU_SIG_LOAD_TMU1:
  708. case QPU_SIG_PROG_END:
  709. case QPU_SIG_SMALL_IMM:
  710. case QPU_SIG_THREAD_SWITCH:
  711. case QPU_SIG_LAST_THREAD_SWITCH:
  712. if (!check_instruction_writes(validated_shader,
  713. &validation_state)) {
  714. DRM_ERROR("Bad write at ip %d\n", ip);
  715. goto fail;
  716. }
  717. if (!check_instruction_reads(validated_shader,
  718. &validation_state))
  719. goto fail;
  720. if (sig == QPU_SIG_PROG_END) {
  721. found_shader_end = true;
  722. shader_end_ip = ip;
  723. }
  724. if (sig == QPU_SIG_THREAD_SWITCH ||
  725. sig == QPU_SIG_LAST_THREAD_SWITCH) {
  726. validated_shader->is_threaded = true;
  727. if (ip < last_thread_switch_ip + 3) {
  728. DRM_ERROR("Thread switch too soon after "
  729. "last switch at ip %d\n", ip);
  730. goto fail;
  731. }
  732. last_thread_switch_ip = ip;
  733. }
  734. break;
  735. case QPU_SIG_LOAD_IMM:
  736. if (!check_instruction_writes(validated_shader,
  737. &validation_state)) {
  738. DRM_ERROR("Bad LOAD_IMM write at ip %d\n", ip);
  739. goto fail;
  740. }
  741. break;
  742. case QPU_SIG_BRANCH:
  743. if (!check_branch(inst, validated_shader,
  744. &validation_state, ip))
  745. goto fail;
  746. if (ip < last_thread_switch_ip + 3) {
  747. DRM_ERROR("Branch in thread switch at ip %d",
  748. ip);
  749. goto fail;
  750. }
  751. break;
  752. default:
  753. DRM_ERROR("Unsupported QPU signal %d at "
  754. "instruction %d\n", sig, ip);
  755. goto fail;
  756. }
  757. /* There are two delay slots after program end is signaled
  758. * that are still executed, then we're finished.
  759. */
  760. if (found_shader_end && ip == shader_end_ip + 2)
  761. break;
  762. }
  763. if (ip == validation_state.max_ip) {
  764. DRM_ERROR("shader failed to terminate before "
  765. "shader BO end at %zd\n",
  766. shader_obj->base.size);
  767. goto fail;
  768. }
  769. /* Might corrupt other thread */
  770. if (validated_shader->is_threaded &&
  771. validation_state.all_registers_used) {
  772. DRM_ERROR("Shader uses threading, but uses the upper "
  773. "half of the registers, too\n");
  774. goto fail;
  775. }
  776. /* If we did a backwards branch and we haven't emitted a uniforms
  777. * reset since then, we still need the uniforms stream to have the
  778. * uniforms address available so that the backwards branch can do its
  779. * uniforms reset.
  780. *
  781. * We could potentially prove that the backwards branch doesn't
  782. * contain any uses of uniforms until program exit, but that doesn't
  783. * seem to be worth the trouble.
  784. */
  785. if (validation_state.needs_uniform_address_for_loop) {
  786. if (!require_uniform_address_uniform(validated_shader))
  787. goto fail;
  788. validated_shader->uniforms_size += 4;
  789. }
  790. /* Again, no chance of integer overflow here because the worst case
  791. * scenario is 8 bytes of uniforms plus handles per 8-byte
  792. * instruction.
  793. */
  794. validated_shader->uniforms_src_size =
  795. (validated_shader->uniforms_size +
  796. 4 * validated_shader->num_texture_samples);
  797. kfree(validation_state.branch_targets);
  798. return validated_shader;
  799. fail:
  800. kfree(validation_state.branch_targets);
  801. if (validated_shader) {
  802. kfree(validated_shader->texture_samples);
  803. kfree(validated_shader);
  804. }
  805. return NULL;
  806. }