dir.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445
  1. /*
  2. * (C) 2001 Clemson University and The University of Chicago
  3. *
  4. * See COPYING in top-level directory.
  5. */
  6. #include "protocol.h"
  7. #include "orangefs-kernel.h"
  8. #include "orangefs-bufmap.h"
  9. struct readdir_handle_s {
  10. int buffer_index;
  11. struct orangefs_readdir_response_s readdir_response;
  12. void *dents_buf;
  13. };
  14. /*
  15. * decode routine used by kmod to deal with the blob sent from
  16. * userspace for readdirs. The blob contains zero or more of these
  17. * sub-blobs:
  18. * __u32 - represents length of the character string that follows.
  19. * string - between 1 and ORANGEFS_NAME_MAX bytes long.
  20. * padding - (if needed) to cause the __u32 plus the string to be
  21. * eight byte aligned.
  22. * khandle - sizeof(khandle) bytes.
  23. */
  24. static long decode_dirents(char *ptr, size_t size,
  25. struct orangefs_readdir_response_s *readdir)
  26. {
  27. int i;
  28. struct orangefs_readdir_response_s *rd =
  29. (struct orangefs_readdir_response_s *) ptr;
  30. char *buf = ptr;
  31. int khandle_size = sizeof(struct orangefs_khandle);
  32. size_t offset = offsetof(struct orangefs_readdir_response_s,
  33. dirent_array);
  34. /* 8 reflects eight byte alignment */
  35. int smallest_blob = khandle_size + 8;
  36. __u32 len;
  37. int aligned_len;
  38. int sizeof_u32 = sizeof(__u32);
  39. long ret;
  40. gossip_debug(GOSSIP_DIR_DEBUG, "%s: size:%zu:\n", __func__, size);
  41. /* size is = offset on empty dirs, > offset on non-empty dirs... */
  42. if (size < offset) {
  43. gossip_err("%s: size:%zu: offset:%zu:\n",
  44. __func__,
  45. size,
  46. offset);
  47. ret = -EINVAL;
  48. goto out;
  49. }
  50. if ((size == offset) && (readdir->orangefs_dirent_outcount != 0)) {
  51. gossip_err("%s: size:%zu: dirent_outcount:%d:\n",
  52. __func__,
  53. size,
  54. readdir->orangefs_dirent_outcount);
  55. ret = -EINVAL;
  56. goto out;
  57. }
  58. readdir->token = rd->token;
  59. readdir->orangefs_dirent_outcount = rd->orangefs_dirent_outcount;
  60. readdir->dirent_array = kcalloc(readdir->orangefs_dirent_outcount,
  61. sizeof(*readdir->dirent_array),
  62. GFP_KERNEL);
  63. if (readdir->dirent_array == NULL) {
  64. gossip_err("%s: kcalloc failed.\n", __func__);
  65. ret = -ENOMEM;
  66. goto out;
  67. }
  68. buf += offset;
  69. size -= offset;
  70. for (i = 0; i < readdir->orangefs_dirent_outcount; i++) {
  71. if (size < smallest_blob) {
  72. gossip_err("%s: size:%zu: smallest_blob:%d:\n",
  73. __func__,
  74. size,
  75. smallest_blob);
  76. ret = -EINVAL;
  77. goto free;
  78. }
  79. len = *(__u32 *)buf;
  80. if ((len < 1) || (len > ORANGEFS_NAME_MAX)) {
  81. gossip_err("%s: len:%d:\n", __func__, len);
  82. ret = -EINVAL;
  83. goto free;
  84. }
  85. gossip_debug(GOSSIP_DIR_DEBUG,
  86. "%s: size:%zu: len:%d:\n",
  87. __func__,
  88. size,
  89. len);
  90. readdir->dirent_array[i].d_name = buf + sizeof_u32;
  91. readdir->dirent_array[i].d_length = len;
  92. /*
  93. * Calculate "aligned" length of this string and its
  94. * associated __u32 descriptor.
  95. */
  96. aligned_len = ((sizeof_u32 + len + 1) + 7) & ~7;
  97. gossip_debug(GOSSIP_DIR_DEBUG,
  98. "%s: aligned_len:%d:\n",
  99. __func__,
  100. aligned_len);
  101. /*
  102. * The end of the blob should coincide with the end
  103. * of the last sub-blob.
  104. */
  105. if (size < aligned_len + khandle_size) {
  106. gossip_err("%s: ran off the end of the blob.\n",
  107. __func__);
  108. ret = -EINVAL;
  109. goto free;
  110. }
  111. size -= aligned_len + khandle_size;
  112. buf += aligned_len;
  113. readdir->dirent_array[i].khandle =
  114. *(struct orangefs_khandle *) buf;
  115. buf += khandle_size;
  116. }
  117. ret = buf - ptr;
  118. gossip_debug(GOSSIP_DIR_DEBUG, "%s: returning:%ld:\n", __func__, ret);
  119. goto out;
  120. free:
  121. kfree(readdir->dirent_array);
  122. readdir->dirent_array = NULL;
  123. out:
  124. return ret;
  125. }
  126. static long readdir_handle_ctor(struct readdir_handle_s *rhandle, void *buf,
  127. size_t size, int buffer_index)
  128. {
  129. long ret;
  130. if (buf == NULL) {
  131. gossip_err
  132. ("Invalid NULL buffer specified in readdir_handle_ctor\n");
  133. return -ENOMEM;
  134. }
  135. if (buffer_index < 0) {
  136. gossip_err
  137. ("Invalid buffer index specified in readdir_handle_ctor\n");
  138. return -EINVAL;
  139. }
  140. rhandle->buffer_index = buffer_index;
  141. rhandle->dents_buf = buf;
  142. ret = decode_dirents(buf, size, &rhandle->readdir_response);
  143. if (ret < 0) {
  144. gossip_err("Could not decode readdir from buffer %ld\n", ret);
  145. rhandle->buffer_index = -1;
  146. gossip_debug(GOSSIP_DIR_DEBUG, "vfree %p\n", buf);
  147. vfree(buf);
  148. rhandle->dents_buf = NULL;
  149. }
  150. return ret;
  151. }
  152. static void readdir_handle_dtor(struct readdir_handle_s *rhandle)
  153. {
  154. if (rhandle == NULL)
  155. return;
  156. /* kfree(NULL) is safe */
  157. kfree(rhandle->readdir_response.dirent_array);
  158. rhandle->readdir_response.dirent_array = NULL;
  159. if (rhandle->buffer_index >= 0) {
  160. orangefs_readdir_index_put(rhandle->buffer_index);
  161. rhandle->buffer_index = -1;
  162. }
  163. if (rhandle->dents_buf) {
  164. gossip_debug(GOSSIP_DIR_DEBUG, "vfree %p\n",
  165. rhandle->dents_buf);
  166. vfree(rhandle->dents_buf);
  167. rhandle->dents_buf = NULL;
  168. }
  169. }
  170. /*
  171. * Read directory entries from an instance of an open directory.
  172. */
  173. static int orangefs_readdir(struct file *file, struct dir_context *ctx)
  174. {
  175. struct orangefs_bufmap *bufmap = NULL;
  176. int ret = 0;
  177. int buffer_index;
  178. /*
  179. * ptoken supports Orangefs' distributed directory logic, added
  180. * in 2.9.2.
  181. */
  182. __u64 *ptoken = file->private_data;
  183. __u64 pos = 0;
  184. ino_t ino = 0;
  185. struct dentry *dentry = file->f_path.dentry;
  186. struct orangefs_kernel_op_s *new_op = NULL;
  187. struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(dentry->d_inode);
  188. int buffer_full = 0;
  189. struct readdir_handle_s rhandle;
  190. int i = 0;
  191. int len = 0;
  192. ino_t current_ino = 0;
  193. char *current_entry = NULL;
  194. long bytes_decoded;
  195. gossip_debug(GOSSIP_DIR_DEBUG,
  196. "%s: ctx->pos:%lld, ptoken = %llu\n",
  197. __func__,
  198. lld(ctx->pos),
  199. llu(*ptoken));
  200. pos = (__u64) ctx->pos;
  201. /* are we done? */
  202. if (pos == ORANGEFS_READDIR_END) {
  203. gossip_debug(GOSSIP_DIR_DEBUG,
  204. "Skipping to termination path\n");
  205. return 0;
  206. }
  207. gossip_debug(GOSSIP_DIR_DEBUG,
  208. "orangefs_readdir called on %s (pos=%llu)\n",
  209. dentry->d_name.name, llu(pos));
  210. rhandle.buffer_index = -1;
  211. rhandle.dents_buf = NULL;
  212. memset(&rhandle.readdir_response, 0, sizeof(rhandle.readdir_response));
  213. new_op = op_alloc(ORANGEFS_VFS_OP_READDIR);
  214. if (!new_op)
  215. return -ENOMEM;
  216. new_op->uses_shared_memory = 1;
  217. new_op->upcall.req.readdir.refn = orangefs_inode->refn;
  218. new_op->upcall.req.readdir.max_dirent_count =
  219. ORANGEFS_MAX_DIRENT_COUNT_READDIR;
  220. gossip_debug(GOSSIP_DIR_DEBUG,
  221. "%s: upcall.req.readdir.refn.khandle: %pU\n",
  222. __func__,
  223. &new_op->upcall.req.readdir.refn.khandle);
  224. new_op->upcall.req.readdir.token = *ptoken;
  225. get_new_buffer_index:
  226. ret = orangefs_readdir_index_get(&bufmap, &buffer_index);
  227. if (ret < 0) {
  228. gossip_lerr("orangefs_readdir: orangefs_readdir_index_get() failure (%d)\n",
  229. ret);
  230. goto out_free_op;
  231. }
  232. new_op->upcall.req.readdir.buf_index = buffer_index;
  233. ret = service_operation(new_op,
  234. "orangefs_readdir",
  235. get_interruptible_flag(dentry->d_inode));
  236. gossip_debug(GOSSIP_DIR_DEBUG,
  237. "Readdir downcall status is %d. ret:%d\n",
  238. new_op->downcall.status,
  239. ret);
  240. if (ret == -EAGAIN && op_state_purged(new_op)) {
  241. /*
  242. * readdir shared memory aread has been wiped due to
  243. * pvfs2-client-core restarting, so we must get a new
  244. * index into the shared memory.
  245. */
  246. gossip_debug(GOSSIP_DIR_DEBUG,
  247. "%s: Getting new buffer_index for retry of readdir..\n",
  248. __func__);
  249. orangefs_readdir_index_put(buffer_index);
  250. goto get_new_buffer_index;
  251. }
  252. if (ret == -EIO && op_state_purged(new_op)) {
  253. gossip_err("%s: Client is down. Aborting readdir call.\n",
  254. __func__);
  255. orangefs_readdir_index_put(buffer_index);
  256. goto out_free_op;
  257. }
  258. if (ret < 0 || new_op->downcall.status != 0) {
  259. gossip_debug(GOSSIP_DIR_DEBUG,
  260. "Readdir request failed. Status:%d\n",
  261. new_op->downcall.status);
  262. orangefs_readdir_index_put(buffer_index);
  263. if (ret >= 0)
  264. ret = new_op->downcall.status;
  265. goto out_free_op;
  266. }
  267. bytes_decoded =
  268. readdir_handle_ctor(&rhandle,
  269. new_op->downcall.trailer_buf,
  270. new_op->downcall.trailer_size,
  271. buffer_index);
  272. if (bytes_decoded < 0) {
  273. gossip_err("orangefs_readdir: Could not decode trailer buffer into a readdir response %d\n",
  274. ret);
  275. ret = bytes_decoded;
  276. orangefs_readdir_index_put(buffer_index);
  277. goto out_free_op;
  278. }
  279. if (bytes_decoded != new_op->downcall.trailer_size) {
  280. gossip_err("orangefs_readdir: # bytes decoded (%ld) "
  281. "!= trailer size (%ld)\n",
  282. bytes_decoded,
  283. (long)new_op->downcall.trailer_size);
  284. ret = -EINVAL;
  285. goto out_destroy_handle;
  286. }
  287. /*
  288. * orangefs doesn't actually store dot and dot-dot, but
  289. * we need to have them represented.
  290. */
  291. if (pos == 0) {
  292. ino = get_ino_from_khandle(dentry->d_inode);
  293. gossip_debug(GOSSIP_DIR_DEBUG,
  294. "%s: calling dir_emit of \".\" with pos = %llu\n",
  295. __func__,
  296. llu(pos));
  297. ret = dir_emit(ctx, ".", 1, ino, DT_DIR);
  298. pos += 1;
  299. }
  300. if (pos == 1) {
  301. ino = get_parent_ino_from_dentry(dentry);
  302. gossip_debug(GOSSIP_DIR_DEBUG,
  303. "%s: calling dir_emit of \"..\" with pos = %llu\n",
  304. __func__,
  305. llu(pos));
  306. ret = dir_emit(ctx, "..", 2, ino, DT_DIR);
  307. pos += 1;
  308. }
  309. /*
  310. * we stored ORANGEFS_ITERATE_NEXT in ctx->pos last time around
  311. * to prevent "finding" dot and dot-dot on any iteration
  312. * other than the first.
  313. */
  314. if (ctx->pos == ORANGEFS_ITERATE_NEXT)
  315. ctx->pos = 0;
  316. for (i = ctx->pos;
  317. i < rhandle.readdir_response.orangefs_dirent_outcount;
  318. i++) {
  319. len = rhandle.readdir_response.dirent_array[i].d_length;
  320. current_entry = rhandle.readdir_response.dirent_array[i].d_name;
  321. current_ino = orangefs_khandle_to_ino(
  322. &(rhandle.readdir_response.dirent_array[i].khandle));
  323. gossip_debug(GOSSIP_DIR_DEBUG,
  324. "calling dir_emit for %s with len %d"
  325. ", ctx->pos %ld\n",
  326. current_entry,
  327. len,
  328. (unsigned long)ctx->pos);
  329. /*
  330. * type is unknown. We don't return object type
  331. * in the dirent_array. This leaves getdents
  332. * clueless about type.
  333. */
  334. ret =
  335. dir_emit(ctx, current_entry, len, current_ino, DT_UNKNOWN);
  336. if (!ret)
  337. break;
  338. ctx->pos++;
  339. gossip_debug(GOSSIP_DIR_DEBUG,
  340. "%s: ctx->pos:%lld\n",
  341. __func__,
  342. lld(ctx->pos));
  343. }
  344. /*
  345. * we ran all the way through the last batch, set up for
  346. * getting another batch...
  347. */
  348. if (ret) {
  349. *ptoken = rhandle.readdir_response.token;
  350. ctx->pos = ORANGEFS_ITERATE_NEXT;
  351. }
  352. /*
  353. * Did we hit the end of the directory?
  354. */
  355. if (rhandle.readdir_response.token == ORANGEFS_READDIR_END &&
  356. !buffer_full) {
  357. gossip_debug(GOSSIP_DIR_DEBUG,
  358. "End of dir detected; setting ctx->pos to ORANGEFS_READDIR_END.\n");
  359. ctx->pos = ORANGEFS_READDIR_END;
  360. }
  361. out_destroy_handle:
  362. readdir_handle_dtor(&rhandle);
  363. out_free_op:
  364. op_release(new_op);
  365. gossip_debug(GOSSIP_DIR_DEBUG, "orangefs_readdir returning %d\n", ret);
  366. return ret;
  367. }
  368. static int orangefs_dir_open(struct inode *inode, struct file *file)
  369. {
  370. __u64 *ptoken;
  371. file->private_data = kmalloc(sizeof(__u64), GFP_KERNEL);
  372. if (!file->private_data)
  373. return -ENOMEM;
  374. ptoken = file->private_data;
  375. *ptoken = ORANGEFS_READDIR_START;
  376. return 0;
  377. }
  378. static int orangefs_dir_release(struct inode *inode, struct file *file)
  379. {
  380. orangefs_flush_inode(inode);
  381. kfree(file->private_data);
  382. return 0;
  383. }
  384. /** ORANGEFS implementation of VFS directory operations */
  385. const struct file_operations orangefs_dir_operations = {
  386. .read = generic_read_dir,
  387. .iterate = orangefs_readdir,
  388. .open = orangefs_dir_open,
  389. .release = orangefs_dir_release,
  390. };