xfs_inode_buf.c 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761
  1. /*
  2. * Copyright (c) 2000-2006 Silicon Graphics, Inc.
  3. * All Rights Reserved.
  4. *
  5. * This program is free software; you can redistribute it and/or
  6. * modify it under the terms of the GNU General Public License as
  7. * published by the Free Software Foundation.
  8. *
  9. * This program is distributed in the hope that it would be useful,
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. * GNU General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU General Public License
  15. * along with this program; if not, write the Free Software Foundation,
  16. * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
  17. */
  18. #include "xfs.h"
  19. #include "xfs_fs.h"
  20. #include "xfs_shared.h"
  21. #include "xfs_format.h"
  22. #include "xfs_log_format.h"
  23. #include "xfs_trans_resv.h"
  24. #include "xfs_mount.h"
  25. #include "xfs_defer.h"
  26. #include "xfs_inode.h"
  27. #include "xfs_errortag.h"
  28. #include "xfs_error.h"
  29. #include "xfs_cksum.h"
  30. #include "xfs_icache.h"
  31. #include "xfs_trans.h"
  32. #include "xfs_ialloc.h"
  33. #include "xfs_dir2.h"
  34. #include <linux/iversion.h>
  35. /*
  36. * Check that none of the inode's in the buffer have a next
  37. * unlinked field of 0.
  38. */
  39. #if defined(DEBUG)
  40. void
  41. xfs_inobp_check(
  42. xfs_mount_t *mp,
  43. xfs_buf_t *bp)
  44. {
  45. int i;
  46. int j;
  47. xfs_dinode_t *dip;
  48. j = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog;
  49. for (i = 0; i < j; i++) {
  50. dip = xfs_buf_offset(bp, i * mp->m_sb.sb_inodesize);
  51. if (!dip->di_next_unlinked) {
  52. xfs_alert(mp,
  53. "Detected bogus zero next_unlinked field in inode %d buffer 0x%llx.",
  54. i, (long long)bp->b_bn);
  55. }
  56. }
  57. }
  58. #endif
  59. bool
  60. xfs_dinode_good_version(
  61. struct xfs_mount *mp,
  62. __u8 version)
  63. {
  64. if (xfs_sb_version_hascrc(&mp->m_sb))
  65. return version == 3;
  66. return version == 1 || version == 2;
  67. }
  68. /*
  69. * If we are doing readahead on an inode buffer, we might be in log recovery
  70. * reading an inode allocation buffer that hasn't yet been replayed, and hence
  71. * has not had the inode cores stamped into it. Hence for readahead, the buffer
  72. * may be potentially invalid.
  73. *
  74. * If the readahead buffer is invalid, we need to mark it with an error and
  75. * clear the DONE status of the buffer so that a followup read will re-read it
  76. * from disk. We don't report the error otherwise to avoid warnings during log
  77. * recovery and we don't get unnecssary panics on debug kernels. We use EIO here
  78. * because all we want to do is say readahead failed; there is no-one to report
  79. * the error to, so this will distinguish it from a non-ra verifier failure.
  80. * Changes to this readahead error behavour also need to be reflected in
  81. * xfs_dquot_buf_readahead_verify().
  82. */
  83. static void
  84. xfs_inode_buf_verify(
  85. struct xfs_buf *bp,
  86. bool readahead)
  87. {
  88. struct xfs_mount *mp = bp->b_target->bt_mount;
  89. xfs_agnumber_t agno;
  90. int i;
  91. int ni;
  92. /*
  93. * Validate the magic number and version of every inode in the buffer
  94. */
  95. agno = xfs_daddr_to_agno(mp, XFS_BUF_ADDR(bp));
  96. ni = XFS_BB_TO_FSB(mp, bp->b_length) * mp->m_sb.sb_inopblock;
  97. for (i = 0; i < ni; i++) {
  98. int di_ok;
  99. xfs_dinode_t *dip;
  100. xfs_agino_t unlinked_ino;
  101. dip = xfs_buf_offset(bp, (i << mp->m_sb.sb_inodelog));
  102. unlinked_ino = be32_to_cpu(dip->di_next_unlinked);
  103. di_ok = dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) &&
  104. xfs_dinode_good_version(mp, dip->di_version) &&
  105. (unlinked_ino == NULLAGINO ||
  106. xfs_verify_agino(mp, agno, unlinked_ino));
  107. if (unlikely(XFS_TEST_ERROR(!di_ok, mp,
  108. XFS_ERRTAG_ITOBP_INOTOBP))) {
  109. if (readahead) {
  110. bp->b_flags &= ~XBF_DONE;
  111. xfs_buf_ioerror(bp, -EIO);
  112. return;
  113. }
  114. #ifdef DEBUG
  115. xfs_alert(mp,
  116. "bad inode magic/vsn daddr %lld #%d (magic=%x)",
  117. (unsigned long long)bp->b_bn, i,
  118. be16_to_cpu(dip->di_magic));
  119. #endif
  120. xfs_buf_verifier_error(bp, -EFSCORRUPTED,
  121. __func__, dip, sizeof(*dip),
  122. NULL);
  123. return;
  124. }
  125. }
  126. }
  127. static void
  128. xfs_inode_buf_read_verify(
  129. struct xfs_buf *bp)
  130. {
  131. xfs_inode_buf_verify(bp, false);
  132. }
  133. static void
  134. xfs_inode_buf_readahead_verify(
  135. struct xfs_buf *bp)
  136. {
  137. xfs_inode_buf_verify(bp, true);
  138. }
  139. static void
  140. xfs_inode_buf_write_verify(
  141. struct xfs_buf *bp)
  142. {
  143. xfs_inode_buf_verify(bp, false);
  144. }
  145. const struct xfs_buf_ops xfs_inode_buf_ops = {
  146. .name = "xfs_inode",
  147. .verify_read = xfs_inode_buf_read_verify,
  148. .verify_write = xfs_inode_buf_write_verify,
  149. };
  150. const struct xfs_buf_ops xfs_inode_buf_ra_ops = {
  151. .name = "xxfs_inode_ra",
  152. .verify_read = xfs_inode_buf_readahead_verify,
  153. .verify_write = xfs_inode_buf_write_verify,
  154. };
  155. /*
  156. * This routine is called to map an inode to the buffer containing the on-disk
  157. * version of the inode. It returns a pointer to the buffer containing the
  158. * on-disk inode in the bpp parameter, and in the dipp parameter it returns a
  159. * pointer to the on-disk inode within that buffer.
  160. *
  161. * If a non-zero error is returned, then the contents of bpp and dipp are
  162. * undefined.
  163. */
  164. int
  165. xfs_imap_to_bp(
  166. struct xfs_mount *mp,
  167. struct xfs_trans *tp,
  168. struct xfs_imap *imap,
  169. struct xfs_dinode **dipp,
  170. struct xfs_buf **bpp,
  171. uint buf_flags,
  172. uint iget_flags)
  173. {
  174. struct xfs_buf *bp;
  175. int error;
  176. buf_flags |= XBF_UNMAPPED;
  177. error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap->im_blkno,
  178. (int)imap->im_len, buf_flags, &bp,
  179. &xfs_inode_buf_ops);
  180. if (error) {
  181. if (error == -EAGAIN) {
  182. ASSERT(buf_flags & XBF_TRYLOCK);
  183. return error;
  184. }
  185. if (error == -EFSCORRUPTED &&
  186. (iget_flags & XFS_IGET_UNTRUSTED))
  187. return -EINVAL;
  188. xfs_warn(mp, "%s: xfs_trans_read_buf() returned error %d.",
  189. __func__, error);
  190. return error;
  191. }
  192. *bpp = bp;
  193. *dipp = xfs_buf_offset(bp, imap->im_boffset);
  194. return 0;
  195. }
  196. void
  197. xfs_inode_from_disk(
  198. struct xfs_inode *ip,
  199. struct xfs_dinode *from)
  200. {
  201. struct xfs_icdinode *to = &ip->i_d;
  202. struct inode *inode = VFS_I(ip);
  203. /*
  204. * Convert v1 inodes immediately to v2 inode format as this is the
  205. * minimum inode version format we support in the rest of the code.
  206. */
  207. to->di_version = from->di_version;
  208. if (to->di_version == 1) {
  209. set_nlink(inode, be16_to_cpu(from->di_onlink));
  210. to->di_projid_lo = 0;
  211. to->di_projid_hi = 0;
  212. to->di_version = 2;
  213. } else {
  214. set_nlink(inode, be32_to_cpu(from->di_nlink));
  215. to->di_projid_lo = be16_to_cpu(from->di_projid_lo);
  216. to->di_projid_hi = be16_to_cpu(from->di_projid_hi);
  217. }
  218. to->di_format = from->di_format;
  219. to->di_uid = be32_to_cpu(from->di_uid);
  220. to->di_gid = be32_to_cpu(from->di_gid);
  221. to->di_flushiter = be16_to_cpu(from->di_flushiter);
  222. /*
  223. * Time is signed, so need to convert to signed 32 bit before
  224. * storing in inode timestamp which may be 64 bit. Otherwise
  225. * a time before epoch is converted to a time long after epoch
  226. * on 64 bit systems.
  227. */
  228. inode->i_atime.tv_sec = (int)be32_to_cpu(from->di_atime.t_sec);
  229. inode->i_atime.tv_nsec = (int)be32_to_cpu(from->di_atime.t_nsec);
  230. inode->i_mtime.tv_sec = (int)be32_to_cpu(from->di_mtime.t_sec);
  231. inode->i_mtime.tv_nsec = (int)be32_to_cpu(from->di_mtime.t_nsec);
  232. inode->i_ctime.tv_sec = (int)be32_to_cpu(from->di_ctime.t_sec);
  233. inode->i_ctime.tv_nsec = (int)be32_to_cpu(from->di_ctime.t_nsec);
  234. inode->i_generation = be32_to_cpu(from->di_gen);
  235. inode->i_mode = be16_to_cpu(from->di_mode);
  236. to->di_size = be64_to_cpu(from->di_size);
  237. to->di_nblocks = be64_to_cpu(from->di_nblocks);
  238. to->di_extsize = be32_to_cpu(from->di_extsize);
  239. to->di_nextents = be32_to_cpu(from->di_nextents);
  240. to->di_anextents = be16_to_cpu(from->di_anextents);
  241. to->di_forkoff = from->di_forkoff;
  242. to->di_aformat = from->di_aformat;
  243. to->di_dmevmask = be32_to_cpu(from->di_dmevmask);
  244. to->di_dmstate = be16_to_cpu(from->di_dmstate);
  245. to->di_flags = be16_to_cpu(from->di_flags);
  246. if (to->di_version == 3) {
  247. inode_set_iversion_queried(inode,
  248. be64_to_cpu(from->di_changecount));
  249. to->di_crtime.t_sec = be32_to_cpu(from->di_crtime.t_sec);
  250. to->di_crtime.t_nsec = be32_to_cpu(from->di_crtime.t_nsec);
  251. to->di_flags2 = be64_to_cpu(from->di_flags2);
  252. to->di_cowextsize = be32_to_cpu(from->di_cowextsize);
  253. }
  254. }
  255. void
  256. xfs_inode_to_disk(
  257. struct xfs_inode *ip,
  258. struct xfs_dinode *to,
  259. xfs_lsn_t lsn)
  260. {
  261. struct xfs_icdinode *from = &ip->i_d;
  262. struct inode *inode = VFS_I(ip);
  263. to->di_magic = cpu_to_be16(XFS_DINODE_MAGIC);
  264. to->di_onlink = 0;
  265. to->di_version = from->di_version;
  266. to->di_format = from->di_format;
  267. to->di_uid = cpu_to_be32(from->di_uid);
  268. to->di_gid = cpu_to_be32(from->di_gid);
  269. to->di_projid_lo = cpu_to_be16(from->di_projid_lo);
  270. to->di_projid_hi = cpu_to_be16(from->di_projid_hi);
  271. memset(to->di_pad, 0, sizeof(to->di_pad));
  272. to->di_atime.t_sec = cpu_to_be32(inode->i_atime.tv_sec);
  273. to->di_atime.t_nsec = cpu_to_be32(inode->i_atime.tv_nsec);
  274. to->di_mtime.t_sec = cpu_to_be32(inode->i_mtime.tv_sec);
  275. to->di_mtime.t_nsec = cpu_to_be32(inode->i_mtime.tv_nsec);
  276. to->di_ctime.t_sec = cpu_to_be32(inode->i_ctime.tv_sec);
  277. to->di_ctime.t_nsec = cpu_to_be32(inode->i_ctime.tv_nsec);
  278. to->di_nlink = cpu_to_be32(inode->i_nlink);
  279. to->di_gen = cpu_to_be32(inode->i_generation);
  280. to->di_mode = cpu_to_be16(inode->i_mode);
  281. to->di_size = cpu_to_be64(from->di_size);
  282. to->di_nblocks = cpu_to_be64(from->di_nblocks);
  283. to->di_extsize = cpu_to_be32(from->di_extsize);
  284. to->di_nextents = cpu_to_be32(from->di_nextents);
  285. to->di_anextents = cpu_to_be16(from->di_anextents);
  286. to->di_forkoff = from->di_forkoff;
  287. to->di_aformat = from->di_aformat;
  288. to->di_dmevmask = cpu_to_be32(from->di_dmevmask);
  289. to->di_dmstate = cpu_to_be16(from->di_dmstate);
  290. to->di_flags = cpu_to_be16(from->di_flags);
  291. if (from->di_version == 3) {
  292. to->di_changecount = cpu_to_be64(inode_peek_iversion(inode));
  293. to->di_crtime.t_sec = cpu_to_be32(from->di_crtime.t_sec);
  294. to->di_crtime.t_nsec = cpu_to_be32(from->di_crtime.t_nsec);
  295. to->di_flags2 = cpu_to_be64(from->di_flags2);
  296. to->di_cowextsize = cpu_to_be32(from->di_cowextsize);
  297. to->di_ino = cpu_to_be64(ip->i_ino);
  298. to->di_lsn = cpu_to_be64(lsn);
  299. memset(to->di_pad2, 0, sizeof(to->di_pad2));
  300. uuid_copy(&to->di_uuid, &ip->i_mount->m_sb.sb_meta_uuid);
  301. to->di_flushiter = 0;
  302. } else {
  303. to->di_flushiter = cpu_to_be16(from->di_flushiter);
  304. }
  305. }
  306. void
  307. xfs_log_dinode_to_disk(
  308. struct xfs_log_dinode *from,
  309. struct xfs_dinode *to)
  310. {
  311. to->di_magic = cpu_to_be16(from->di_magic);
  312. to->di_mode = cpu_to_be16(from->di_mode);
  313. to->di_version = from->di_version;
  314. to->di_format = from->di_format;
  315. to->di_onlink = 0;
  316. to->di_uid = cpu_to_be32(from->di_uid);
  317. to->di_gid = cpu_to_be32(from->di_gid);
  318. to->di_nlink = cpu_to_be32(from->di_nlink);
  319. to->di_projid_lo = cpu_to_be16(from->di_projid_lo);
  320. to->di_projid_hi = cpu_to_be16(from->di_projid_hi);
  321. memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad));
  322. to->di_atime.t_sec = cpu_to_be32(from->di_atime.t_sec);
  323. to->di_atime.t_nsec = cpu_to_be32(from->di_atime.t_nsec);
  324. to->di_mtime.t_sec = cpu_to_be32(from->di_mtime.t_sec);
  325. to->di_mtime.t_nsec = cpu_to_be32(from->di_mtime.t_nsec);
  326. to->di_ctime.t_sec = cpu_to_be32(from->di_ctime.t_sec);
  327. to->di_ctime.t_nsec = cpu_to_be32(from->di_ctime.t_nsec);
  328. to->di_size = cpu_to_be64(from->di_size);
  329. to->di_nblocks = cpu_to_be64(from->di_nblocks);
  330. to->di_extsize = cpu_to_be32(from->di_extsize);
  331. to->di_nextents = cpu_to_be32(from->di_nextents);
  332. to->di_anextents = cpu_to_be16(from->di_anextents);
  333. to->di_forkoff = from->di_forkoff;
  334. to->di_aformat = from->di_aformat;
  335. to->di_dmevmask = cpu_to_be32(from->di_dmevmask);
  336. to->di_dmstate = cpu_to_be16(from->di_dmstate);
  337. to->di_flags = cpu_to_be16(from->di_flags);
  338. to->di_gen = cpu_to_be32(from->di_gen);
  339. if (from->di_version == 3) {
  340. to->di_changecount = cpu_to_be64(from->di_changecount);
  341. to->di_crtime.t_sec = cpu_to_be32(from->di_crtime.t_sec);
  342. to->di_crtime.t_nsec = cpu_to_be32(from->di_crtime.t_nsec);
  343. to->di_flags2 = cpu_to_be64(from->di_flags2);
  344. to->di_cowextsize = cpu_to_be32(from->di_cowextsize);
  345. to->di_ino = cpu_to_be64(from->di_ino);
  346. to->di_lsn = cpu_to_be64(from->di_lsn);
  347. memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2));
  348. uuid_copy(&to->di_uuid, &from->di_uuid);
  349. to->di_flushiter = 0;
  350. } else {
  351. to->di_flushiter = cpu_to_be16(from->di_flushiter);
  352. }
  353. }
  354. xfs_failaddr_t
  355. xfs_dinode_verify(
  356. struct xfs_mount *mp,
  357. xfs_ino_t ino,
  358. struct xfs_dinode *dip)
  359. {
  360. uint16_t mode;
  361. uint16_t flags;
  362. uint64_t flags2;
  363. uint64_t di_size;
  364. if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC))
  365. return __this_address;
  366. /* Verify v3 integrity information first */
  367. if (dip->di_version >= 3) {
  368. if (!xfs_sb_version_hascrc(&mp->m_sb))
  369. return __this_address;
  370. if (!xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize,
  371. XFS_DINODE_CRC_OFF))
  372. return __this_address;
  373. if (be64_to_cpu(dip->di_ino) != ino)
  374. return __this_address;
  375. if (!uuid_equal(&dip->di_uuid, &mp->m_sb.sb_meta_uuid))
  376. return __this_address;
  377. }
  378. /* don't allow invalid i_size */
  379. di_size = be64_to_cpu(dip->di_size);
  380. if (di_size & (1ULL << 63))
  381. return __this_address;
  382. mode = be16_to_cpu(dip->di_mode);
  383. if (mode && xfs_mode_to_ftype(mode) == XFS_DIR3_FT_UNKNOWN)
  384. return __this_address;
  385. /* No zero-length symlinks/dirs. */
  386. if ((S_ISLNK(mode) || S_ISDIR(mode)) && di_size == 0)
  387. return __this_address;
  388. /* Fork checks carried over from xfs_iformat_fork */
  389. if (mode &&
  390. be32_to_cpu(dip->di_nextents) + be16_to_cpu(dip->di_anextents) >
  391. be64_to_cpu(dip->di_nblocks))
  392. return __this_address;
  393. if (mode && XFS_DFORK_BOFF(dip) > mp->m_sb.sb_inodesize)
  394. return __this_address;
  395. flags = be16_to_cpu(dip->di_flags);
  396. if (mode && (flags & XFS_DIFLAG_REALTIME) && !mp->m_rtdev_targp)
  397. return __this_address;
  398. /* Do we have appropriate data fork formats for the mode? */
  399. switch (mode & S_IFMT) {
  400. case S_IFIFO:
  401. case S_IFCHR:
  402. case S_IFBLK:
  403. case S_IFSOCK:
  404. if (dip->di_format != XFS_DINODE_FMT_DEV)
  405. return __this_address;
  406. break;
  407. case S_IFREG:
  408. case S_IFLNK:
  409. case S_IFDIR:
  410. switch (dip->di_format) {
  411. case XFS_DINODE_FMT_LOCAL:
  412. /*
  413. * no local regular files yet
  414. */
  415. if (S_ISREG(mode))
  416. return __this_address;
  417. if (di_size > XFS_DFORK_DSIZE(dip, mp))
  418. return __this_address;
  419. /* fall through */
  420. case XFS_DINODE_FMT_EXTENTS:
  421. case XFS_DINODE_FMT_BTREE:
  422. break;
  423. default:
  424. return __this_address;
  425. }
  426. break;
  427. case 0:
  428. /* Uninitialized inode ok. */
  429. break;
  430. default:
  431. return __this_address;
  432. }
  433. if (XFS_DFORK_Q(dip)) {
  434. switch (dip->di_aformat) {
  435. case XFS_DINODE_FMT_LOCAL:
  436. case XFS_DINODE_FMT_EXTENTS:
  437. case XFS_DINODE_FMT_BTREE:
  438. break;
  439. default:
  440. return __this_address;
  441. }
  442. }
  443. /* only version 3 or greater inodes are extensively verified here */
  444. if (dip->di_version < 3)
  445. return NULL;
  446. flags2 = be64_to_cpu(dip->di_flags2);
  447. /* don't allow reflink/cowextsize if we don't have reflink */
  448. if ((flags2 & (XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE)) &&
  449. !xfs_sb_version_hasreflink(&mp->m_sb))
  450. return __this_address;
  451. /* only regular files get reflink */
  452. if ((flags2 & XFS_DIFLAG2_REFLINK) && (mode & S_IFMT) != S_IFREG)
  453. return __this_address;
  454. /* don't let reflink and realtime mix */
  455. if ((flags2 & XFS_DIFLAG2_REFLINK) && (flags & XFS_DIFLAG_REALTIME))
  456. return __this_address;
  457. /* don't let reflink and dax mix */
  458. if ((flags2 & XFS_DIFLAG2_REFLINK) && (flags2 & XFS_DIFLAG2_DAX))
  459. return __this_address;
  460. return NULL;
  461. }
  462. void
  463. xfs_dinode_calc_crc(
  464. struct xfs_mount *mp,
  465. struct xfs_dinode *dip)
  466. {
  467. uint32_t crc;
  468. if (dip->di_version < 3)
  469. return;
  470. ASSERT(xfs_sb_version_hascrc(&mp->m_sb));
  471. crc = xfs_start_cksum_update((char *)dip, mp->m_sb.sb_inodesize,
  472. XFS_DINODE_CRC_OFF);
  473. dip->di_crc = xfs_end_cksum(crc);
  474. }
  475. /*
  476. * Read the disk inode attributes into the in-core inode structure.
  477. *
  478. * For version 5 superblocks, if we are initialising a new inode and we are not
  479. * utilising the XFS_MOUNT_IKEEP inode cluster mode, we can simple build the new
  480. * inode core with a random generation number. If we are keeping inodes around,
  481. * we need to read the inode cluster to get the existing generation number off
  482. * disk. Further, if we are using version 4 superblocks (i.e. v1/v2 inode
  483. * format) then log recovery is dependent on the di_flushiter field being
  484. * initialised from the current on-disk value and hence we must also read the
  485. * inode off disk.
  486. */
  487. int
  488. xfs_iread(
  489. xfs_mount_t *mp,
  490. xfs_trans_t *tp,
  491. xfs_inode_t *ip,
  492. uint iget_flags)
  493. {
  494. xfs_buf_t *bp;
  495. xfs_dinode_t *dip;
  496. xfs_failaddr_t fa;
  497. int error;
  498. /*
  499. * Fill in the location information in the in-core inode.
  500. */
  501. error = xfs_imap(mp, tp, ip->i_ino, &ip->i_imap, iget_flags);
  502. if (error)
  503. return error;
  504. /* shortcut IO on inode allocation if possible */
  505. if ((iget_flags & XFS_IGET_CREATE) &&
  506. xfs_sb_version_hascrc(&mp->m_sb) &&
  507. !(mp->m_flags & XFS_MOUNT_IKEEP)) {
  508. /* initialise the on-disk inode core */
  509. memset(&ip->i_d, 0, sizeof(ip->i_d));
  510. VFS_I(ip)->i_generation = prandom_u32();
  511. ip->i_d.di_version = 3;
  512. return 0;
  513. }
  514. /*
  515. * Get pointers to the on-disk inode and the buffer containing it.
  516. */
  517. error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &bp, 0, iget_flags);
  518. if (error)
  519. return error;
  520. /* even unallocated inodes are verified */
  521. fa = xfs_dinode_verify(mp, ip->i_ino, dip);
  522. if (fa) {
  523. xfs_inode_verifier_error(ip, -EFSCORRUPTED, "dinode", dip,
  524. sizeof(*dip), fa);
  525. error = -EFSCORRUPTED;
  526. goto out_brelse;
  527. }
  528. /*
  529. * If the on-disk inode is already linked to a directory
  530. * entry, copy all of the inode into the in-core inode.
  531. * xfs_iformat_fork() handles copying in the inode format
  532. * specific information.
  533. * Otherwise, just get the truly permanent information.
  534. */
  535. if (dip->di_mode) {
  536. xfs_inode_from_disk(ip, dip);
  537. error = xfs_iformat_fork(ip, dip);
  538. if (error) {
  539. #ifdef DEBUG
  540. xfs_alert(mp, "%s: xfs_iformat() returned error %d",
  541. __func__, error);
  542. #endif /* DEBUG */
  543. goto out_brelse;
  544. }
  545. } else {
  546. /*
  547. * Partial initialisation of the in-core inode. Just the bits
  548. * that xfs_ialloc won't overwrite or relies on being correct.
  549. */
  550. ip->i_d.di_version = dip->di_version;
  551. VFS_I(ip)->i_generation = be32_to_cpu(dip->di_gen);
  552. ip->i_d.di_flushiter = be16_to_cpu(dip->di_flushiter);
  553. /*
  554. * Make sure to pull in the mode here as well in
  555. * case the inode is released without being used.
  556. * This ensures that xfs_inactive() will see that
  557. * the inode is already free and not try to mess
  558. * with the uninitialized part of it.
  559. */
  560. VFS_I(ip)->i_mode = 0;
  561. }
  562. ASSERT(ip->i_d.di_version >= 2);
  563. ip->i_delayed_blks = 0;
  564. /*
  565. * Mark the buffer containing the inode as something to keep
  566. * around for a while. This helps to keep recently accessed
  567. * meta-data in-core longer.
  568. */
  569. xfs_buf_set_ref(bp, XFS_INO_REF);
  570. /*
  571. * Use xfs_trans_brelse() to release the buffer containing the on-disk
  572. * inode, because it was acquired with xfs_trans_read_buf() in
  573. * xfs_imap_to_bp() above. If tp is NULL, this is just a normal
  574. * brelse(). If we're within a transaction, then xfs_trans_brelse()
  575. * will only release the buffer if it is not dirty within the
  576. * transaction. It will be OK to release the buffer in this case,
  577. * because inodes on disk are never destroyed and we will be locking the
  578. * new in-core inode before putting it in the cache where other
  579. * processes can find it. Thus we don't have to worry about the inode
  580. * being changed just because we released the buffer.
  581. */
  582. out_brelse:
  583. xfs_trans_brelse(tp, bp);
  584. return error;
  585. }
  586. /*
  587. * Validate di_extsize hint.
  588. *
  589. * The rules are documented at xfs_ioctl_setattr_check_extsize().
  590. * These functions must be kept in sync with each other.
  591. */
  592. xfs_failaddr_t
  593. xfs_inode_validate_extsize(
  594. struct xfs_mount *mp,
  595. uint32_t extsize,
  596. uint16_t mode,
  597. uint16_t flags)
  598. {
  599. bool rt_flag;
  600. bool hint_flag;
  601. bool inherit_flag;
  602. uint32_t extsize_bytes;
  603. uint32_t blocksize_bytes;
  604. rt_flag = (flags & XFS_DIFLAG_REALTIME);
  605. hint_flag = (flags & XFS_DIFLAG_EXTSIZE);
  606. inherit_flag = (flags & XFS_DIFLAG_EXTSZINHERIT);
  607. extsize_bytes = XFS_FSB_TO_B(mp, extsize);
  608. if (rt_flag)
  609. blocksize_bytes = mp->m_sb.sb_rextsize << mp->m_sb.sb_blocklog;
  610. else
  611. blocksize_bytes = mp->m_sb.sb_blocksize;
  612. if ((hint_flag || inherit_flag) && !(S_ISDIR(mode) || S_ISREG(mode)))
  613. return __this_address;
  614. if (hint_flag && !S_ISREG(mode))
  615. return __this_address;
  616. if (inherit_flag && !S_ISDIR(mode))
  617. return __this_address;
  618. if ((hint_flag || inherit_flag) && extsize == 0)
  619. return __this_address;
  620. if (!(hint_flag || inherit_flag) && extsize != 0)
  621. return __this_address;
  622. if (extsize_bytes % blocksize_bytes)
  623. return __this_address;
  624. if (extsize > MAXEXTLEN)
  625. return __this_address;
  626. if (!rt_flag && extsize > mp->m_sb.sb_agblocks / 2)
  627. return __this_address;
  628. return NULL;
  629. }
  630. /*
  631. * Validate di_cowextsize hint.
  632. *
  633. * The rules are documented at xfs_ioctl_setattr_check_cowextsize().
  634. * These functions must be kept in sync with each other.
  635. */
  636. xfs_failaddr_t
  637. xfs_inode_validate_cowextsize(
  638. struct xfs_mount *mp,
  639. uint32_t cowextsize,
  640. uint16_t mode,
  641. uint16_t flags,
  642. uint64_t flags2)
  643. {
  644. bool rt_flag;
  645. bool hint_flag;
  646. uint32_t cowextsize_bytes;
  647. rt_flag = (flags & XFS_DIFLAG_REALTIME);
  648. hint_flag = (flags2 & XFS_DIFLAG2_COWEXTSIZE);
  649. cowextsize_bytes = XFS_FSB_TO_B(mp, cowextsize);
  650. if (hint_flag && !xfs_sb_version_hasreflink(&mp->m_sb))
  651. return __this_address;
  652. if (hint_flag && !(S_ISDIR(mode) || S_ISREG(mode)))
  653. return __this_address;
  654. if (hint_flag && cowextsize == 0)
  655. return __this_address;
  656. if (!hint_flag && cowextsize != 0)
  657. return __this_address;
  658. if (hint_flag && rt_flag)
  659. return __this_address;
  660. if (cowextsize_bytes % mp->m_sb.sb_blocksize)
  661. return __this_address;
  662. if (cowextsize > MAXEXTLEN)
  663. return __this_address;
  664. if (cowextsize > mp->m_sb.sb_agblocks / 2)
  665. return __this_address;
  666. return NULL;
  667. }