dir.c 9.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404
  1. /*
  2. * Copyright 2017 Omnibond Systems, L.L.C.
  3. */
  4. #include "protocol.h"
  5. #include "orangefs-kernel.h"
  6. #include "orangefs-bufmap.h"
  7. struct orangefs_dir_part {
  8. struct orangefs_dir_part *next;
  9. size_t len;
  10. };
  11. struct orangefs_dir {
  12. __u64 token;
  13. struct orangefs_dir_part *part;
  14. loff_t end;
  15. int error;
  16. };
  17. #define PART_SHIFT (24)
  18. #define PART_SIZE (1<<24)
  19. #define PART_MASK (~(PART_SIZE - 1))
  20. /*
  21. * There can be up to 512 directory entries. Each entry is encoded as
  22. * follows:
  23. * 4 bytes: string size (n)
  24. * n bytes: string
  25. * 1 byte: trailing zero
  26. * padding to 8 bytes
  27. * 16 bytes: khandle
  28. * padding to 8 bytes
  29. *
  30. * The trailer_buf starts with a struct orangefs_readdir_response_s
  31. * which must be skipped to get to the directory data.
  32. *
  33. * The data which is received from the userspace daemon is termed a
  34. * part and is stored in a linked list in case more than one part is
  35. * needed for a large directory.
  36. *
  37. * The position pointer (ctx->pos) encodes the part and offset on which
  38. * to begin reading at. Bits above PART_SHIFT encode the part and bits
  39. * below PART_SHIFT encode the offset. Parts are stored in a linked
  40. * list which grows as data is received from the server. The overhead
  41. * associated with managing the list is presumed to be small compared to
  42. * the overhead of communicating with the server.
  43. *
  44. * As data is received from the server, it is placed at the end of the
  45. * part list. Data is parsed from the current position as it is needed.
  46. * When data is determined to be corrupt, it is either because the
  47. * userspace component has sent back corrupt data or because the file
  48. * pointer has been moved to an invalid location. Since the two cannot
  49. * be differentiated, return EIO.
  50. *
  51. * Part zero is synthesized to contains `.' and `..'. Part one is the
  52. * first part of the part list.
  53. */
  54. static int do_readdir(struct orangefs_inode_s *oi,
  55. struct orangefs_dir *od, struct dentry *dentry,
  56. struct orangefs_kernel_op_s *op)
  57. {
  58. struct orangefs_readdir_response_s *resp;
  59. int bufi, r;
  60. /*
  61. * Despite the badly named field, readdir does not use shared
  62. * memory. However, there are a limited number of readdir
  63. * slots, which must be allocated here. This flag simply tells
  64. * the op scheduler to return the op here for retry.
  65. */
  66. op->uses_shared_memory = 1;
  67. op->upcall.req.readdir.refn = oi->refn;
  68. op->upcall.req.readdir.token = od->token;
  69. op->upcall.req.readdir.max_dirent_count =
  70. ORANGEFS_MAX_DIRENT_COUNT_READDIR;
  71. again:
  72. bufi = orangefs_readdir_index_get();
  73. if (bufi < 0) {
  74. od->error = bufi;
  75. return bufi;
  76. }
  77. op->upcall.req.readdir.buf_index = bufi;
  78. r = service_operation(op, "orangefs_readdir",
  79. get_interruptible_flag(dentry->d_inode));
  80. orangefs_readdir_index_put(bufi);
  81. if (op_state_purged(op)) {
  82. if (r == -EAGAIN) {
  83. vfree(op->downcall.trailer_buf);
  84. goto again;
  85. } else if (r == -EIO) {
  86. vfree(op->downcall.trailer_buf);
  87. od->error = r;
  88. return r;
  89. }
  90. }
  91. if (r < 0) {
  92. vfree(op->downcall.trailer_buf);
  93. od->error = r;
  94. return r;
  95. } else if (op->downcall.status) {
  96. vfree(op->downcall.trailer_buf);
  97. od->error = op->downcall.status;
  98. return op->downcall.status;
  99. }
  100. /*
  101. * The maximum size is size per entry times the 512 entries plus
  102. * the header. This is well under the limit.
  103. */
  104. if (op->downcall.trailer_size > PART_SIZE) {
  105. vfree(op->downcall.trailer_buf);
  106. od->error = -EIO;
  107. return -EIO;
  108. }
  109. resp = (struct orangefs_readdir_response_s *)
  110. op->downcall.trailer_buf;
  111. od->token = resp->token;
  112. return 0;
  113. }
  114. static int parse_readdir(struct orangefs_dir *od,
  115. struct orangefs_kernel_op_s *op)
  116. {
  117. struct orangefs_dir_part *part, *new;
  118. size_t count;
  119. count = 1;
  120. part = od->part;
  121. while (part) {
  122. count++;
  123. if (part->next)
  124. part = part->next;
  125. else
  126. break;
  127. }
  128. new = (void *)op->downcall.trailer_buf;
  129. new->next = NULL;
  130. new->len = op->downcall.trailer_size -
  131. sizeof(struct orangefs_readdir_response_s);
  132. if (!od->part)
  133. od->part = new;
  134. else
  135. part->next = new;
  136. count++;
  137. od->end = count << PART_SHIFT;
  138. return 0;
  139. }
  140. static int orangefs_dir_more(struct orangefs_inode_s *oi,
  141. struct orangefs_dir *od, struct dentry *dentry)
  142. {
  143. struct orangefs_kernel_op_s *op;
  144. int r;
  145. op = op_alloc(ORANGEFS_VFS_OP_READDIR);
  146. if (!op) {
  147. od->error = -ENOMEM;
  148. return -ENOMEM;
  149. }
  150. r = do_readdir(oi, od, dentry, op);
  151. if (r) {
  152. od->error = r;
  153. goto out;
  154. }
  155. r = parse_readdir(od, op);
  156. if (r) {
  157. od->error = r;
  158. goto out;
  159. }
  160. od->error = 0;
  161. out:
  162. op_release(op);
  163. return od->error;
  164. }
  165. static int fill_from_part(struct orangefs_dir_part *part,
  166. struct dir_context *ctx)
  167. {
  168. const int offset = sizeof(struct orangefs_readdir_response_s);
  169. struct orangefs_khandle *khandle;
  170. __u32 *len, padlen;
  171. loff_t i;
  172. char *s;
  173. i = ctx->pos & ~PART_MASK;
  174. /* The file offset from userspace is too large. */
  175. if (i > part->len)
  176. return 1;
  177. /*
  178. * If the seek pointer is positioned just before an entry it
  179. * should find the next entry.
  180. */
  181. if (i % 8)
  182. i = i + (8 - i%8)%8;
  183. while (i < part->len) {
  184. if (part->len < i + sizeof *len)
  185. break;
  186. len = (void *)part + offset + i;
  187. /*
  188. * len is the size of the string itself. padlen is the
  189. * total size of the encoded string.
  190. */
  191. padlen = (sizeof *len + *len + 1) +
  192. (8 - (sizeof *len + *len + 1)%8)%8;
  193. if (part->len < i + padlen + sizeof *khandle)
  194. goto next;
  195. s = (void *)part + offset + i + sizeof *len;
  196. if (s[*len] != 0)
  197. goto next;
  198. khandle = (void *)part + offset + i + padlen;
  199. if (!dir_emit(ctx, s, *len,
  200. orangefs_khandle_to_ino(khandle),
  201. DT_UNKNOWN))
  202. return 0;
  203. i += padlen + sizeof *khandle;
  204. i = i + (8 - i%8)%8;
  205. BUG_ON(i > part->len);
  206. ctx->pos = (ctx->pos & PART_MASK) | i;
  207. continue;
  208. next:
  209. i += 8;
  210. }
  211. return 1;
  212. }
  213. static int orangefs_dir_fill(struct orangefs_inode_s *oi,
  214. struct orangefs_dir *od, struct dentry *dentry,
  215. struct dir_context *ctx)
  216. {
  217. struct orangefs_dir_part *part;
  218. size_t count;
  219. count = ((ctx->pos & PART_MASK) >> PART_SHIFT) - 1;
  220. part = od->part;
  221. while (part->next && count) {
  222. count--;
  223. part = part->next;
  224. }
  225. /* This means the userspace file offset is invalid. */
  226. if (count) {
  227. od->error = -EIO;
  228. return -EIO;
  229. }
  230. while (part && part->len) {
  231. int r;
  232. r = fill_from_part(part, ctx);
  233. if (r < 0) {
  234. od->error = r;
  235. return r;
  236. } else if (r == 0) {
  237. /* Userspace buffer is full. */
  238. break;
  239. } else {
  240. /*
  241. * The part ran out of data. Move to the next
  242. * part. */
  243. ctx->pos = (ctx->pos & PART_MASK) +
  244. (1 << PART_SHIFT);
  245. part = part->next;
  246. }
  247. }
  248. return 0;
  249. }
  250. static loff_t orangefs_dir_llseek(struct file *file, loff_t offset,
  251. int whence)
  252. {
  253. struct orangefs_dir *od = file->private_data;
  254. /*
  255. * Delete the stored data so userspace sees new directory
  256. * entries.
  257. */
  258. if (!whence && offset < od->end) {
  259. struct orangefs_dir_part *part = od->part;
  260. while (part) {
  261. struct orangefs_dir_part *next = part->next;
  262. vfree(part);
  263. part = next;
  264. }
  265. od->token = ORANGEFS_ITERATE_START;
  266. od->part = NULL;
  267. od->end = 1 << PART_SHIFT;
  268. }
  269. return default_llseek(file, offset, whence);
  270. }
  271. static int orangefs_dir_iterate(struct file *file,
  272. struct dir_context *ctx)
  273. {
  274. struct orangefs_inode_s *oi;
  275. struct orangefs_dir *od;
  276. struct dentry *dentry;
  277. int r;
  278. dentry = file->f_path.dentry;
  279. oi = ORANGEFS_I(dentry->d_inode);
  280. od = file->private_data;
  281. if (od->error)
  282. return od->error;
  283. if (ctx->pos == 0) {
  284. if (!dir_emit_dot(file, ctx))
  285. return 0;
  286. ctx->pos++;
  287. }
  288. if (ctx->pos == 1) {
  289. if (!dir_emit_dotdot(file, ctx))
  290. return 0;
  291. ctx->pos = 1 << PART_SHIFT;
  292. }
  293. /*
  294. * The seek position is in the first synthesized part but is not
  295. * valid.
  296. */
  297. if ((ctx->pos & PART_MASK) == 0)
  298. return -EIO;
  299. r = 0;
  300. /*
  301. * Must read more if the user has sought past what has been read
  302. * so far. Stop a user who has sought past the end.
  303. */
  304. while (od->token != ORANGEFS_ITERATE_END &&
  305. ctx->pos > od->end) {
  306. r = orangefs_dir_more(oi, od, dentry);
  307. if (r)
  308. return r;
  309. }
  310. if (od->token == ORANGEFS_ITERATE_END && ctx->pos > od->end)
  311. return -EIO;
  312. /* Then try to fill if there's any left in the buffer. */
  313. if (ctx->pos < od->end) {
  314. r = orangefs_dir_fill(oi, od, dentry, ctx);
  315. if (r)
  316. return r;
  317. }
  318. /* Finally get some more and try to fill. */
  319. if (od->token != ORANGEFS_ITERATE_END) {
  320. r = orangefs_dir_more(oi, od, dentry);
  321. if (r)
  322. return r;
  323. r = orangefs_dir_fill(oi, od, dentry, ctx);
  324. }
  325. return r;
  326. }
  327. static int orangefs_dir_open(struct inode *inode, struct file *file)
  328. {
  329. struct orangefs_dir *od;
  330. file->private_data = kmalloc(sizeof(struct orangefs_dir),
  331. GFP_KERNEL);
  332. if (!file->private_data)
  333. return -ENOMEM;
  334. od = file->private_data;
  335. od->token = ORANGEFS_ITERATE_START;
  336. od->part = NULL;
  337. od->end = 1 << PART_SHIFT;
  338. od->error = 0;
  339. return 0;
  340. }
  341. static int orangefs_dir_release(struct inode *inode, struct file *file)
  342. {
  343. struct orangefs_dir *od = file->private_data;
  344. struct orangefs_dir_part *part = od->part;
  345. orangefs_flush_inode(inode);
  346. while (part) {
  347. struct orangefs_dir_part *next = part->next;
  348. vfree(part);
  349. part = next;
  350. }
  351. kfree(od);
  352. return 0;
  353. }
  354. const struct file_operations orangefs_dir_operations = {
  355. .llseek = orangefs_dir_llseek,
  356. .read = generic_read_dir,
  357. .iterate = orangefs_dir_iterate,
  358. .open = orangefs_dir_open,
  359. .release = orangefs_dir_release
  360. };