vc4_validate.c 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944
  1. /*
  2. * Copyright © 2014 Broadcom
  3. *
  4. * Permission is hereby granted, free of charge, to any person obtaining a
  5. * copy of this software and associated documentation files (the "Software"),
  6. * to deal in the Software without restriction, including without limitation
  7. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8. * and/or sell copies of the Software, and to permit persons to whom the
  9. * Software is furnished to do so, subject to the following conditions:
  10. *
  11. * The above copyright notice and this permission notice (including the next
  12. * paragraph) shall be included in all copies or substantial portions of the
  13. * Software.
  14. *
  15. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  18. * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20. * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21. * IN THE SOFTWARE.
  22. */
  23. /**
  24. * DOC: Command list validator for VC4.
  25. *
  26. * Since the VC4 has no IOMMU between it and system memory, a user
  27. * with access to execute command lists could escalate privilege by
  28. * overwriting system memory (drawing to it as a framebuffer) or
  29. * reading system memory it shouldn't (reading it as a vertex buffer
  30. * or index buffer)
  31. *
  32. * We validate binner command lists to ensure that all accesses are
  33. * within the bounds of the GEM objects referenced by the submitted
  34. * job. It explicitly whitelists packets, and looks at the offsets in
  35. * any address fields to make sure they're contained within the BOs
  36. * they reference.
  37. *
  38. * Note that because CL validation is already reading the
  39. * user-submitted CL and writing the validated copy out to the memory
  40. * that the GPU will actually read, this is also where GEM relocation
  41. * processing (turning BO references into actual addresses for the GPU
  42. * to use) happens.
  43. */
  44. #include "uapi/drm/vc4_drm.h"
  45. #include "vc4_drv.h"
  46. #include "vc4_packet.h"
  47. #define VALIDATE_ARGS \
  48. struct vc4_exec_info *exec, \
  49. void *validated, \
  50. void *untrusted
  51. /** Return the width in pixels of a 64-byte microtile. */
  52. static uint32_t
  53. utile_width(int cpp)
  54. {
  55. switch (cpp) {
  56. case 1:
  57. case 2:
  58. return 8;
  59. case 4:
  60. return 4;
  61. case 8:
  62. return 2;
  63. default:
  64. DRM_ERROR("unknown cpp: %d\n", cpp);
  65. return 1;
  66. }
  67. }
  68. /** Return the height in pixels of a 64-byte microtile. */
  69. static uint32_t
  70. utile_height(int cpp)
  71. {
  72. switch (cpp) {
  73. case 1:
  74. return 8;
  75. case 2:
  76. case 4:
  77. case 8:
  78. return 4;
  79. default:
  80. DRM_ERROR("unknown cpp: %d\n", cpp);
  81. return 1;
  82. }
  83. }
  84. /**
  85. * size_is_lt() - Returns whether a miplevel of the given size will
  86. * use the lineartile (LT) tiling layout rather than the normal T
  87. * tiling layout.
  88. * @width: Width in pixels of the miplevel
  89. * @height: Height in pixels of the miplevel
  90. * @cpp: Bytes per pixel of the pixel format
  91. */
  92. static bool
  93. size_is_lt(uint32_t width, uint32_t height, int cpp)
  94. {
  95. return (width <= 4 * utile_width(cpp) ||
  96. height <= 4 * utile_height(cpp));
  97. }
  98. struct drm_gem_cma_object *
  99. vc4_use_bo(struct vc4_exec_info *exec, uint32_t hindex)
  100. {
  101. struct drm_gem_cma_object *obj;
  102. struct vc4_bo *bo;
  103. if (hindex >= exec->bo_count) {
  104. DRM_ERROR("BO index %d greater than BO count %d\n",
  105. hindex, exec->bo_count);
  106. return NULL;
  107. }
  108. obj = exec->bo[hindex];
  109. bo = to_vc4_bo(&obj->base);
  110. if (bo->validated_shader) {
  111. DRM_ERROR("Trying to use shader BO as something other than "
  112. "a shader\n");
  113. return NULL;
  114. }
  115. return obj;
  116. }
  117. static struct drm_gem_cma_object *
  118. vc4_use_handle(struct vc4_exec_info *exec, uint32_t gem_handles_packet_index)
  119. {
  120. return vc4_use_bo(exec, exec->bo_index[gem_handles_packet_index]);
  121. }
  122. static bool
  123. validate_bin_pos(struct vc4_exec_info *exec, void *untrusted, uint32_t pos)
  124. {
  125. /* Note that the untrusted pointer passed to these functions is
  126. * incremented past the packet byte.
  127. */
  128. return (untrusted - 1 == exec->bin_u + pos);
  129. }
  130. static uint32_t
  131. gl_shader_rec_size(uint32_t pointer_bits)
  132. {
  133. uint32_t attribute_count = pointer_bits & 7;
  134. bool extended = pointer_bits & 8;
  135. if (attribute_count == 0)
  136. attribute_count = 8;
  137. if (extended)
  138. return 100 + attribute_count * 4;
  139. else
  140. return 36 + attribute_count * 8;
  141. }
  142. bool
  143. vc4_check_tex_size(struct vc4_exec_info *exec, struct drm_gem_cma_object *fbo,
  144. uint32_t offset, uint8_t tiling_format,
  145. uint32_t width, uint32_t height, uint8_t cpp)
  146. {
  147. uint32_t aligned_width, aligned_height, stride, size;
  148. uint32_t utile_w = utile_width(cpp);
  149. uint32_t utile_h = utile_height(cpp);
  150. /* The shaded vertex format stores signed 12.4 fixed point
  151. * (-2048,2047) offsets from the viewport center, so we should
  152. * never have a render target larger than 4096. The texture
  153. * unit can only sample from 2048x2048, so it's even more
  154. * restricted. This lets us avoid worrying about overflow in
  155. * our math.
  156. */
  157. if (width > 4096 || height > 4096) {
  158. DRM_ERROR("Surface dimesions (%d,%d) too large", width, height);
  159. return false;
  160. }
  161. switch (tiling_format) {
  162. case VC4_TILING_FORMAT_LINEAR:
  163. aligned_width = round_up(width, utile_w);
  164. aligned_height = height;
  165. break;
  166. case VC4_TILING_FORMAT_T:
  167. aligned_width = round_up(width, utile_w * 8);
  168. aligned_height = round_up(height, utile_h * 8);
  169. break;
  170. case VC4_TILING_FORMAT_LT:
  171. aligned_width = round_up(width, utile_w);
  172. aligned_height = round_up(height, utile_h);
  173. break;
  174. default:
  175. DRM_ERROR("buffer tiling %d unsupported\n", tiling_format);
  176. return false;
  177. }
  178. stride = aligned_width * cpp;
  179. size = stride * aligned_height;
  180. if (size + offset < size ||
  181. size + offset > fbo->base.size) {
  182. DRM_ERROR("Overflow in %dx%d (%dx%d) fbo size (%d + %d > %zd)\n",
  183. width, height,
  184. aligned_width, aligned_height,
  185. size, offset, fbo->base.size);
  186. return false;
  187. }
  188. return true;
  189. }
  190. static int
  191. validate_flush(VALIDATE_ARGS)
  192. {
  193. if (!validate_bin_pos(exec, untrusted, exec->args->bin_cl_size - 1)) {
  194. DRM_ERROR("Bin CL must end with VC4_PACKET_FLUSH\n");
  195. return -EINVAL;
  196. }
  197. exec->found_flush = true;
  198. return 0;
  199. }
  200. static int
  201. validate_start_tile_binning(VALIDATE_ARGS)
  202. {
  203. if (exec->found_start_tile_binning_packet) {
  204. DRM_ERROR("Duplicate VC4_PACKET_START_TILE_BINNING\n");
  205. return -EINVAL;
  206. }
  207. exec->found_start_tile_binning_packet = true;
  208. if (!exec->found_tile_binning_mode_config_packet) {
  209. DRM_ERROR("missing VC4_PACKET_TILE_BINNING_MODE_CONFIG\n");
  210. return -EINVAL;
  211. }
  212. return 0;
  213. }
  214. static int
  215. validate_increment_semaphore(VALIDATE_ARGS)
  216. {
  217. if (!validate_bin_pos(exec, untrusted, exec->args->bin_cl_size - 2)) {
  218. DRM_ERROR("Bin CL must end with "
  219. "VC4_PACKET_INCREMENT_SEMAPHORE\n");
  220. return -EINVAL;
  221. }
  222. exec->found_increment_semaphore_packet = true;
  223. return 0;
  224. }
  225. static int
  226. validate_indexed_prim_list(VALIDATE_ARGS)
  227. {
  228. struct drm_gem_cma_object *ib;
  229. uint32_t length = *(uint32_t *)(untrusted + 1);
  230. uint32_t offset = *(uint32_t *)(untrusted + 5);
  231. uint32_t max_index = *(uint32_t *)(untrusted + 9);
  232. uint32_t index_size = (*(uint8_t *)(untrusted + 0) >> 4) ? 2 : 1;
  233. struct vc4_shader_state *shader_state;
  234. /* Check overflow condition */
  235. if (exec->shader_state_count == 0) {
  236. DRM_ERROR("shader state must precede primitives\n");
  237. return -EINVAL;
  238. }
  239. shader_state = &exec->shader_state[exec->shader_state_count - 1];
  240. if (max_index > shader_state->max_index)
  241. shader_state->max_index = max_index;
  242. ib = vc4_use_handle(exec, 0);
  243. if (!ib)
  244. return -EINVAL;
  245. exec->bin_dep_seqno = max(exec->bin_dep_seqno,
  246. to_vc4_bo(&ib->base)->write_seqno);
  247. if (offset > ib->base.size ||
  248. (ib->base.size - offset) / index_size < length) {
  249. DRM_ERROR("IB access overflow (%d + %d*%d > %zd)\n",
  250. offset, length, index_size, ib->base.size);
  251. return -EINVAL;
  252. }
  253. *(uint32_t *)(validated + 5) = ib->paddr + offset;
  254. return 0;
  255. }
  256. static int
  257. validate_gl_array_primitive(VALIDATE_ARGS)
  258. {
  259. uint32_t length = *(uint32_t *)(untrusted + 1);
  260. uint32_t base_index = *(uint32_t *)(untrusted + 5);
  261. uint32_t max_index;
  262. struct vc4_shader_state *shader_state;
  263. /* Check overflow condition */
  264. if (exec->shader_state_count == 0) {
  265. DRM_ERROR("shader state must precede primitives\n");
  266. return -EINVAL;
  267. }
  268. shader_state = &exec->shader_state[exec->shader_state_count - 1];
  269. if (length + base_index < length) {
  270. DRM_ERROR("primitive vertex count overflow\n");
  271. return -EINVAL;
  272. }
  273. max_index = length + base_index - 1;
  274. if (max_index > shader_state->max_index)
  275. shader_state->max_index = max_index;
  276. return 0;
  277. }
  278. static int
  279. validate_gl_shader_state(VALIDATE_ARGS)
  280. {
  281. uint32_t i = exec->shader_state_count++;
  282. if (i >= exec->shader_state_size) {
  283. DRM_ERROR("More requests for shader states than declared\n");
  284. return -EINVAL;
  285. }
  286. exec->shader_state[i].addr = *(uint32_t *)untrusted;
  287. exec->shader_state[i].max_index = 0;
  288. if (exec->shader_state[i].addr & ~0xf) {
  289. DRM_ERROR("high bits set in GL shader rec reference\n");
  290. return -EINVAL;
  291. }
  292. *(uint32_t *)validated = (exec->shader_rec_p +
  293. exec->shader_state[i].addr);
  294. exec->shader_rec_p +=
  295. roundup(gl_shader_rec_size(exec->shader_state[i].addr), 16);
  296. return 0;
  297. }
  298. static int
  299. validate_tile_binning_config(VALIDATE_ARGS)
  300. {
  301. struct drm_device *dev = exec->exec_bo->base.dev;
  302. struct vc4_bo *tile_bo;
  303. uint8_t flags;
  304. uint32_t tile_state_size, tile_alloc_size;
  305. uint32_t tile_count;
  306. if (exec->found_tile_binning_mode_config_packet) {
  307. DRM_ERROR("Duplicate VC4_PACKET_TILE_BINNING_MODE_CONFIG\n");
  308. return -EINVAL;
  309. }
  310. exec->found_tile_binning_mode_config_packet = true;
  311. exec->bin_tiles_x = *(uint8_t *)(untrusted + 12);
  312. exec->bin_tiles_y = *(uint8_t *)(untrusted + 13);
  313. tile_count = exec->bin_tiles_x * exec->bin_tiles_y;
  314. flags = *(uint8_t *)(untrusted + 14);
  315. if (exec->bin_tiles_x == 0 ||
  316. exec->bin_tiles_y == 0) {
  317. DRM_ERROR("Tile binning config of %dx%d too small\n",
  318. exec->bin_tiles_x, exec->bin_tiles_y);
  319. return -EINVAL;
  320. }
  321. if (flags & (VC4_BIN_CONFIG_DB_NON_MS |
  322. VC4_BIN_CONFIG_TILE_BUFFER_64BIT)) {
  323. DRM_ERROR("unsupported binning config flags 0x%02x\n", flags);
  324. return -EINVAL;
  325. }
  326. /* The tile state data array is 48 bytes per tile, and we put it at
  327. * the start of a BO containing both it and the tile alloc.
  328. */
  329. tile_state_size = 48 * tile_count;
  330. /* Since the tile alloc array will follow us, align. */
  331. exec->tile_alloc_offset = roundup(tile_state_size, 4096);
  332. *(uint8_t *)(validated + 14) =
  333. ((flags & ~(VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_MASK |
  334. VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_MASK)) |
  335. VC4_BIN_CONFIG_AUTO_INIT_TSDA |
  336. VC4_SET_FIELD(VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_32,
  337. VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE) |
  338. VC4_SET_FIELD(VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_128,
  339. VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE));
  340. /* Initial block size. */
  341. tile_alloc_size = 32 * tile_count;
  342. /*
  343. * The initial allocation gets rounded to the next 256 bytes before
  344. * the hardware starts fulfilling further allocations.
  345. */
  346. tile_alloc_size = roundup(tile_alloc_size, 256);
  347. /* Add space for the extra allocations. This is what gets used first,
  348. * before overflow memory. It must have at least 4096 bytes, but we
  349. * want to avoid overflow memory usage if possible.
  350. */
  351. tile_alloc_size += 1024 * 1024;
  352. tile_bo = vc4_bo_create(dev, exec->tile_alloc_offset + tile_alloc_size,
  353. true);
  354. exec->tile_bo = &tile_bo->base;
  355. if (IS_ERR(exec->tile_bo))
  356. return PTR_ERR(exec->tile_bo);
  357. list_add_tail(&tile_bo->unref_head, &exec->unref_list);
  358. /* tile alloc address. */
  359. *(uint32_t *)(validated + 0) = (exec->tile_bo->paddr +
  360. exec->tile_alloc_offset);
  361. /* tile alloc size. */
  362. *(uint32_t *)(validated + 4) = tile_alloc_size;
  363. /* tile state address. */
  364. *(uint32_t *)(validated + 8) = exec->tile_bo->paddr;
  365. return 0;
  366. }
  367. static int
  368. validate_gem_handles(VALIDATE_ARGS)
  369. {
  370. memcpy(exec->bo_index, untrusted, sizeof(exec->bo_index));
  371. return 0;
  372. }
  373. #define VC4_DEFINE_PACKET(packet, func) \
  374. [packet] = { packet ## _SIZE, #packet, func }
  375. static const struct cmd_info {
  376. uint16_t len;
  377. const char *name;
  378. int (*func)(struct vc4_exec_info *exec, void *validated,
  379. void *untrusted);
  380. } cmd_info[] = {
  381. VC4_DEFINE_PACKET(VC4_PACKET_HALT, NULL),
  382. VC4_DEFINE_PACKET(VC4_PACKET_NOP, NULL),
  383. VC4_DEFINE_PACKET(VC4_PACKET_FLUSH, validate_flush),
  384. VC4_DEFINE_PACKET(VC4_PACKET_FLUSH_ALL, NULL),
  385. VC4_DEFINE_PACKET(VC4_PACKET_START_TILE_BINNING,
  386. validate_start_tile_binning),
  387. VC4_DEFINE_PACKET(VC4_PACKET_INCREMENT_SEMAPHORE,
  388. validate_increment_semaphore),
  389. VC4_DEFINE_PACKET(VC4_PACKET_GL_INDEXED_PRIMITIVE,
  390. validate_indexed_prim_list),
  391. VC4_DEFINE_PACKET(VC4_PACKET_GL_ARRAY_PRIMITIVE,
  392. validate_gl_array_primitive),
  393. VC4_DEFINE_PACKET(VC4_PACKET_PRIMITIVE_LIST_FORMAT, NULL),
  394. VC4_DEFINE_PACKET(VC4_PACKET_GL_SHADER_STATE, validate_gl_shader_state),
  395. VC4_DEFINE_PACKET(VC4_PACKET_CONFIGURATION_BITS, NULL),
  396. VC4_DEFINE_PACKET(VC4_PACKET_FLAT_SHADE_FLAGS, NULL),
  397. VC4_DEFINE_PACKET(VC4_PACKET_POINT_SIZE, NULL),
  398. VC4_DEFINE_PACKET(VC4_PACKET_LINE_WIDTH, NULL),
  399. VC4_DEFINE_PACKET(VC4_PACKET_RHT_X_BOUNDARY, NULL),
  400. VC4_DEFINE_PACKET(VC4_PACKET_DEPTH_OFFSET, NULL),
  401. VC4_DEFINE_PACKET(VC4_PACKET_CLIP_WINDOW, NULL),
  402. VC4_DEFINE_PACKET(VC4_PACKET_VIEWPORT_OFFSET, NULL),
  403. VC4_DEFINE_PACKET(VC4_PACKET_CLIPPER_XY_SCALING, NULL),
  404. /* Note: The docs say this was also 105, but it was 106 in the
  405. * initial userland code drop.
  406. */
  407. VC4_DEFINE_PACKET(VC4_PACKET_CLIPPER_Z_SCALING, NULL),
  408. VC4_DEFINE_PACKET(VC4_PACKET_TILE_BINNING_MODE_CONFIG,
  409. validate_tile_binning_config),
  410. VC4_DEFINE_PACKET(VC4_PACKET_GEM_HANDLES, validate_gem_handles),
  411. };
  412. int
  413. vc4_validate_bin_cl(struct drm_device *dev,
  414. void *validated,
  415. void *unvalidated,
  416. struct vc4_exec_info *exec)
  417. {
  418. uint32_t len = exec->args->bin_cl_size;
  419. uint32_t dst_offset = 0;
  420. uint32_t src_offset = 0;
  421. while (src_offset < len) {
  422. void *dst_pkt = validated + dst_offset;
  423. void *src_pkt = unvalidated + src_offset;
  424. u8 cmd = *(uint8_t *)src_pkt;
  425. const struct cmd_info *info;
  426. if (cmd >= ARRAY_SIZE(cmd_info)) {
  427. DRM_ERROR("0x%08x: packet %d out of bounds\n",
  428. src_offset, cmd);
  429. return -EINVAL;
  430. }
  431. info = &cmd_info[cmd];
  432. if (!info->name) {
  433. DRM_ERROR("0x%08x: packet %d invalid\n",
  434. src_offset, cmd);
  435. return -EINVAL;
  436. }
  437. if (src_offset + info->len > len) {
  438. DRM_ERROR("0x%08x: packet %d (%s) length 0x%08x "
  439. "exceeds bounds (0x%08x)\n",
  440. src_offset, cmd, info->name, info->len,
  441. src_offset + len);
  442. return -EINVAL;
  443. }
  444. if (cmd != VC4_PACKET_GEM_HANDLES)
  445. memcpy(dst_pkt, src_pkt, info->len);
  446. if (info->func && info->func(exec,
  447. dst_pkt + 1,
  448. src_pkt + 1)) {
  449. DRM_ERROR("0x%08x: packet %d (%s) failed to validate\n",
  450. src_offset, cmd, info->name);
  451. return -EINVAL;
  452. }
  453. src_offset += info->len;
  454. /* GEM handle loading doesn't produce HW packets. */
  455. if (cmd != VC4_PACKET_GEM_HANDLES)
  456. dst_offset += info->len;
  457. /* When the CL hits halt, it'll stop reading anything else. */
  458. if (cmd == VC4_PACKET_HALT)
  459. break;
  460. }
  461. exec->ct0ea = exec->ct0ca + dst_offset;
  462. if (!exec->found_start_tile_binning_packet) {
  463. DRM_ERROR("Bin CL missing VC4_PACKET_START_TILE_BINNING\n");
  464. return -EINVAL;
  465. }
  466. /* The bin CL must be ended with INCREMENT_SEMAPHORE and FLUSH. The
  467. * semaphore is used to trigger the render CL to start up, and the
  468. * FLUSH is what caps the bin lists with
  469. * VC4_PACKET_RETURN_FROM_SUB_LIST (so they jump back to the main
  470. * render CL when they get called to) and actually triggers the queued
  471. * semaphore increment.
  472. */
  473. if (!exec->found_increment_semaphore_packet || !exec->found_flush) {
  474. DRM_ERROR("Bin CL missing VC4_PACKET_INCREMENT_SEMAPHORE + "
  475. "VC4_PACKET_FLUSH\n");
  476. return -EINVAL;
  477. }
  478. return 0;
  479. }
  480. static bool
  481. reloc_tex(struct vc4_exec_info *exec,
  482. void *uniform_data_u,
  483. struct vc4_texture_sample_info *sample,
  484. uint32_t texture_handle_index, bool is_cs)
  485. {
  486. struct drm_gem_cma_object *tex;
  487. uint32_t p0 = *(uint32_t *)(uniform_data_u + sample->p_offset[0]);
  488. uint32_t p1 = *(uint32_t *)(uniform_data_u + sample->p_offset[1]);
  489. uint32_t p2 = (sample->p_offset[2] != ~0 ?
  490. *(uint32_t *)(uniform_data_u + sample->p_offset[2]) : 0);
  491. uint32_t p3 = (sample->p_offset[3] != ~0 ?
  492. *(uint32_t *)(uniform_data_u + sample->p_offset[3]) : 0);
  493. uint32_t *validated_p0 = exec->uniforms_v + sample->p_offset[0];
  494. uint32_t offset = p0 & VC4_TEX_P0_OFFSET_MASK;
  495. uint32_t miplevels = VC4_GET_FIELD(p0, VC4_TEX_P0_MIPLVLS);
  496. uint32_t width = VC4_GET_FIELD(p1, VC4_TEX_P1_WIDTH);
  497. uint32_t height = VC4_GET_FIELD(p1, VC4_TEX_P1_HEIGHT);
  498. uint32_t cpp, tiling_format, utile_w, utile_h;
  499. uint32_t i;
  500. uint32_t cube_map_stride = 0;
  501. enum vc4_texture_data_type type;
  502. tex = vc4_use_bo(exec, texture_handle_index);
  503. if (!tex)
  504. return false;
  505. if (sample->is_direct) {
  506. uint32_t remaining_size = tex->base.size - p0;
  507. if (p0 > tex->base.size - 4) {
  508. DRM_ERROR("UBO offset greater than UBO size\n");
  509. goto fail;
  510. }
  511. if (p1 > remaining_size - 4) {
  512. DRM_ERROR("UBO clamp would allow reads "
  513. "outside of UBO\n");
  514. goto fail;
  515. }
  516. *validated_p0 = tex->paddr + p0;
  517. return true;
  518. }
  519. if (width == 0)
  520. width = 2048;
  521. if (height == 0)
  522. height = 2048;
  523. if (p0 & VC4_TEX_P0_CMMODE_MASK) {
  524. if (VC4_GET_FIELD(p2, VC4_TEX_P2_PTYPE) ==
  525. VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE)
  526. cube_map_stride = p2 & VC4_TEX_P2_CMST_MASK;
  527. if (VC4_GET_FIELD(p3, VC4_TEX_P2_PTYPE) ==
  528. VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE) {
  529. if (cube_map_stride) {
  530. DRM_ERROR("Cube map stride set twice\n");
  531. goto fail;
  532. }
  533. cube_map_stride = p3 & VC4_TEX_P2_CMST_MASK;
  534. }
  535. if (!cube_map_stride) {
  536. DRM_ERROR("Cube map stride not set\n");
  537. goto fail;
  538. }
  539. }
  540. type = (VC4_GET_FIELD(p0, VC4_TEX_P0_TYPE) |
  541. (VC4_GET_FIELD(p1, VC4_TEX_P1_TYPE4) << 4));
  542. switch (type) {
  543. case VC4_TEXTURE_TYPE_RGBA8888:
  544. case VC4_TEXTURE_TYPE_RGBX8888:
  545. case VC4_TEXTURE_TYPE_RGBA32R:
  546. cpp = 4;
  547. break;
  548. case VC4_TEXTURE_TYPE_RGBA4444:
  549. case VC4_TEXTURE_TYPE_RGBA5551:
  550. case VC4_TEXTURE_TYPE_RGB565:
  551. case VC4_TEXTURE_TYPE_LUMALPHA:
  552. case VC4_TEXTURE_TYPE_S16F:
  553. case VC4_TEXTURE_TYPE_S16:
  554. cpp = 2;
  555. break;
  556. case VC4_TEXTURE_TYPE_LUMINANCE:
  557. case VC4_TEXTURE_TYPE_ALPHA:
  558. case VC4_TEXTURE_TYPE_S8:
  559. cpp = 1;
  560. break;
  561. case VC4_TEXTURE_TYPE_ETC1:
  562. /* ETC1 is arranged as 64-bit blocks, where each block is 4x4
  563. * pixels.
  564. */
  565. cpp = 8;
  566. width = (width + 3) >> 2;
  567. height = (height + 3) >> 2;
  568. break;
  569. case VC4_TEXTURE_TYPE_BW1:
  570. case VC4_TEXTURE_TYPE_A4:
  571. case VC4_TEXTURE_TYPE_A1:
  572. case VC4_TEXTURE_TYPE_RGBA64:
  573. case VC4_TEXTURE_TYPE_YUV422R:
  574. default:
  575. DRM_ERROR("Texture format %d unsupported\n", type);
  576. goto fail;
  577. }
  578. utile_w = utile_width(cpp);
  579. utile_h = utile_height(cpp);
  580. if (type == VC4_TEXTURE_TYPE_RGBA32R) {
  581. tiling_format = VC4_TILING_FORMAT_LINEAR;
  582. } else {
  583. if (size_is_lt(width, height, cpp))
  584. tiling_format = VC4_TILING_FORMAT_LT;
  585. else
  586. tiling_format = VC4_TILING_FORMAT_T;
  587. }
  588. if (!vc4_check_tex_size(exec, tex, offset + cube_map_stride * 5,
  589. tiling_format, width, height, cpp)) {
  590. goto fail;
  591. }
  592. /* The mipmap levels are stored before the base of the texture. Make
  593. * sure there is actually space in the BO.
  594. */
  595. for (i = 1; i <= miplevels; i++) {
  596. uint32_t level_width = max(width >> i, 1u);
  597. uint32_t level_height = max(height >> i, 1u);
  598. uint32_t aligned_width, aligned_height;
  599. uint32_t level_size;
  600. /* Once the levels get small enough, they drop from T to LT. */
  601. if (tiling_format == VC4_TILING_FORMAT_T &&
  602. size_is_lt(level_width, level_height, cpp)) {
  603. tiling_format = VC4_TILING_FORMAT_LT;
  604. }
  605. switch (tiling_format) {
  606. case VC4_TILING_FORMAT_T:
  607. aligned_width = round_up(level_width, utile_w * 8);
  608. aligned_height = round_up(level_height, utile_h * 8);
  609. break;
  610. case VC4_TILING_FORMAT_LT:
  611. aligned_width = round_up(level_width, utile_w);
  612. aligned_height = round_up(level_height, utile_h);
  613. break;
  614. default:
  615. aligned_width = round_up(level_width, utile_w);
  616. aligned_height = level_height;
  617. break;
  618. }
  619. level_size = aligned_width * cpp * aligned_height;
  620. if (offset < level_size) {
  621. DRM_ERROR("Level %d (%dx%d -> %dx%d) size %db "
  622. "overflowed buffer bounds (offset %d)\n",
  623. i, level_width, level_height,
  624. aligned_width, aligned_height,
  625. level_size, offset);
  626. goto fail;
  627. }
  628. offset -= level_size;
  629. }
  630. *validated_p0 = tex->paddr + p0;
  631. if (is_cs) {
  632. exec->bin_dep_seqno = max(exec->bin_dep_seqno,
  633. to_vc4_bo(&tex->base)->write_seqno);
  634. }
  635. return true;
  636. fail:
  637. DRM_INFO("Texture p0 at %d: 0x%08x\n", sample->p_offset[0], p0);
  638. DRM_INFO("Texture p1 at %d: 0x%08x\n", sample->p_offset[1], p1);
  639. DRM_INFO("Texture p2 at %d: 0x%08x\n", sample->p_offset[2], p2);
  640. DRM_INFO("Texture p3 at %d: 0x%08x\n", sample->p_offset[3], p3);
  641. return false;
  642. }
  643. static int
  644. validate_gl_shader_rec(struct drm_device *dev,
  645. struct vc4_exec_info *exec,
  646. struct vc4_shader_state *state)
  647. {
  648. uint32_t *src_handles;
  649. void *pkt_u, *pkt_v;
  650. static const uint32_t shader_reloc_offsets[] = {
  651. 4, /* fs */
  652. 16, /* vs */
  653. 28, /* cs */
  654. };
  655. uint32_t shader_reloc_count = ARRAY_SIZE(shader_reloc_offsets);
  656. struct drm_gem_cma_object *bo[shader_reloc_count + 8];
  657. uint32_t nr_attributes, nr_relocs, packet_size;
  658. int i;
  659. nr_attributes = state->addr & 0x7;
  660. if (nr_attributes == 0)
  661. nr_attributes = 8;
  662. packet_size = gl_shader_rec_size(state->addr);
  663. nr_relocs = ARRAY_SIZE(shader_reloc_offsets) + nr_attributes;
  664. if (nr_relocs * 4 > exec->shader_rec_size) {
  665. DRM_ERROR("overflowed shader recs reading %d handles "
  666. "from %d bytes left\n",
  667. nr_relocs, exec->shader_rec_size);
  668. return -EINVAL;
  669. }
  670. src_handles = exec->shader_rec_u;
  671. exec->shader_rec_u += nr_relocs * 4;
  672. exec->shader_rec_size -= nr_relocs * 4;
  673. if (packet_size > exec->shader_rec_size) {
  674. DRM_ERROR("overflowed shader recs copying %db packet "
  675. "from %d bytes left\n",
  676. packet_size, exec->shader_rec_size);
  677. return -EINVAL;
  678. }
  679. pkt_u = exec->shader_rec_u;
  680. pkt_v = exec->shader_rec_v;
  681. memcpy(pkt_v, pkt_u, packet_size);
  682. exec->shader_rec_u += packet_size;
  683. /* Shader recs have to be aligned to 16 bytes (due to the attribute
  684. * flags being in the low bytes), so round the next validated shader
  685. * rec address up. This should be safe, since we've got so many
  686. * relocations in a shader rec packet.
  687. */
  688. BUG_ON(roundup(packet_size, 16) - packet_size > nr_relocs * 4);
  689. exec->shader_rec_v += roundup(packet_size, 16);
  690. exec->shader_rec_size -= packet_size;
  691. for (i = 0; i < shader_reloc_count; i++) {
  692. if (src_handles[i] > exec->bo_count) {
  693. DRM_ERROR("Shader handle %d too big\n", src_handles[i]);
  694. return -EINVAL;
  695. }
  696. bo[i] = exec->bo[src_handles[i]];
  697. if (!bo[i])
  698. return -EINVAL;
  699. }
  700. for (i = shader_reloc_count; i < nr_relocs; i++) {
  701. bo[i] = vc4_use_bo(exec, src_handles[i]);
  702. if (!bo[i])
  703. return -EINVAL;
  704. }
  705. if (((*(uint16_t *)pkt_u & VC4_SHADER_FLAG_FS_SINGLE_THREAD) == 0) !=
  706. to_vc4_bo(&bo[0]->base)->validated_shader->is_threaded) {
  707. DRM_ERROR("Thread mode of CL and FS do not match\n");
  708. return -EINVAL;
  709. }
  710. if (to_vc4_bo(&bo[1]->base)->validated_shader->is_threaded ||
  711. to_vc4_bo(&bo[2]->base)->validated_shader->is_threaded) {
  712. DRM_ERROR("cs and vs cannot be threaded\n");
  713. return -EINVAL;
  714. }
  715. for (i = 0; i < shader_reloc_count; i++) {
  716. struct vc4_validated_shader_info *validated_shader;
  717. uint32_t o = shader_reloc_offsets[i];
  718. uint32_t src_offset = *(uint32_t *)(pkt_u + o);
  719. uint32_t *texture_handles_u;
  720. void *uniform_data_u;
  721. uint32_t tex, uni;
  722. *(uint32_t *)(pkt_v + o) = bo[i]->paddr + src_offset;
  723. if (src_offset != 0) {
  724. DRM_ERROR("Shaders must be at offset 0 of "
  725. "the BO.\n");
  726. return -EINVAL;
  727. }
  728. validated_shader = to_vc4_bo(&bo[i]->base)->validated_shader;
  729. if (!validated_shader)
  730. return -EINVAL;
  731. if (validated_shader->uniforms_src_size >
  732. exec->uniforms_size) {
  733. DRM_ERROR("Uniforms src buffer overflow\n");
  734. return -EINVAL;
  735. }
  736. texture_handles_u = exec->uniforms_u;
  737. uniform_data_u = (texture_handles_u +
  738. validated_shader->num_texture_samples);
  739. memcpy(exec->uniforms_v, uniform_data_u,
  740. validated_shader->uniforms_size);
  741. for (tex = 0;
  742. tex < validated_shader->num_texture_samples;
  743. tex++) {
  744. if (!reloc_tex(exec,
  745. uniform_data_u,
  746. &validated_shader->texture_samples[tex],
  747. texture_handles_u[tex],
  748. i == 2)) {
  749. return -EINVAL;
  750. }
  751. }
  752. /* Fill in the uniform slots that need this shader's
  753. * start-of-uniforms address (used for resetting the uniform
  754. * stream in the presence of control flow).
  755. */
  756. for (uni = 0;
  757. uni < validated_shader->num_uniform_addr_offsets;
  758. uni++) {
  759. uint32_t o = validated_shader->uniform_addr_offsets[uni];
  760. ((uint32_t *)exec->uniforms_v)[o] = exec->uniforms_p;
  761. }
  762. *(uint32_t *)(pkt_v + o + 4) = exec->uniforms_p;
  763. exec->uniforms_u += validated_shader->uniforms_src_size;
  764. exec->uniforms_v += validated_shader->uniforms_size;
  765. exec->uniforms_p += validated_shader->uniforms_size;
  766. }
  767. for (i = 0; i < nr_attributes; i++) {
  768. struct drm_gem_cma_object *vbo =
  769. bo[ARRAY_SIZE(shader_reloc_offsets) + i];
  770. uint32_t o = 36 + i * 8;
  771. uint32_t offset = *(uint32_t *)(pkt_u + o + 0);
  772. uint32_t attr_size = *(uint8_t *)(pkt_u + o + 4) + 1;
  773. uint32_t stride = *(uint8_t *)(pkt_u + o + 5);
  774. uint32_t max_index;
  775. exec->bin_dep_seqno = max(exec->bin_dep_seqno,
  776. to_vc4_bo(&vbo->base)->write_seqno);
  777. if (state->addr & 0x8)
  778. stride |= (*(uint32_t *)(pkt_u + 100 + i * 4)) & ~0xff;
  779. if (vbo->base.size < offset ||
  780. vbo->base.size - offset < attr_size) {
  781. DRM_ERROR("BO offset overflow (%d + %d > %zu)\n",
  782. offset, attr_size, vbo->base.size);
  783. return -EINVAL;
  784. }
  785. if (stride != 0) {
  786. max_index = ((vbo->base.size - offset - attr_size) /
  787. stride);
  788. if (state->max_index > max_index) {
  789. DRM_ERROR("primitives use index %d out of "
  790. "supplied %d\n",
  791. state->max_index, max_index);
  792. return -EINVAL;
  793. }
  794. }
  795. *(uint32_t *)(pkt_v + o) = vbo->paddr + offset;
  796. }
  797. return 0;
  798. }
  799. int
  800. vc4_validate_shader_recs(struct drm_device *dev,
  801. struct vc4_exec_info *exec)
  802. {
  803. uint32_t i;
  804. int ret = 0;
  805. for (i = 0; i < exec->shader_state_count; i++) {
  806. ret = validate_gl_shader_rec(dev, exec, &exec->shader_state[i]);
  807. if (ret)
  808. return ret;
  809. }
  810. return ret;
  811. }