node.c 70 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947
  1. /*
  2. * fs/f2fs/node.c
  3. *
  4. * Copyright (c) 2012 Samsung Electronics Co., Ltd.
  5. * http://www.samsung.com/
  6. *
  7. * This program is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU General Public License version 2 as
  9. * published by the Free Software Foundation.
  10. */
  11. #include <linux/fs.h>
  12. #include <linux/f2fs_fs.h>
  13. #include <linux/mpage.h>
  14. #include <linux/backing-dev.h>
  15. #include <linux/blkdev.h>
  16. #include <linux/pagevec.h>
  17. #include <linux/swap.h>
  18. #include "f2fs.h"
  19. #include "node.h"
  20. #include "segment.h"
  21. #include "xattr.h"
  22. #include "trace.h"
  23. #include <trace/events/f2fs.h>
  24. #define on_f2fs_build_free_nids(nmi) mutex_is_locked(&(nm_i)->build_lock)
  25. static struct kmem_cache *nat_entry_slab;
  26. static struct kmem_cache *free_nid_slab;
  27. static struct kmem_cache *nat_entry_set_slab;
  28. /*
  29. * Check whether the given nid is within node id range.
  30. */
  31. int f2fs_check_nid_range(struct f2fs_sb_info *sbi, nid_t nid)
  32. {
  33. if (unlikely(nid < F2FS_ROOT_INO(sbi) || nid >= NM_I(sbi)->max_nid)) {
  34. set_sbi_flag(sbi, SBI_NEED_FSCK);
  35. f2fs_msg(sbi->sb, KERN_WARNING,
  36. "%s: out-of-range nid=%x, run fsck to fix.",
  37. __func__, nid);
  38. return -EINVAL;
  39. }
  40. return 0;
  41. }
  42. bool f2fs_available_free_memory(struct f2fs_sb_info *sbi, int type)
  43. {
  44. struct f2fs_nm_info *nm_i = NM_I(sbi);
  45. struct sysinfo val;
  46. unsigned long avail_ram;
  47. unsigned long mem_size = 0;
  48. bool res = false;
  49. si_meminfo(&val);
  50. /* only uses low memory */
  51. avail_ram = val.totalram - val.totalhigh;
  52. /*
  53. * give 25%, 25%, 50%, 50%, 50% memory for each components respectively
  54. */
  55. if (type == FREE_NIDS) {
  56. mem_size = (nm_i->nid_cnt[FREE_NID] *
  57. sizeof(struct free_nid)) >> PAGE_SHIFT;
  58. res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2);
  59. } else if (type == NAT_ENTRIES) {
  60. mem_size = (nm_i->nat_cnt * sizeof(struct nat_entry)) >>
  61. PAGE_SHIFT;
  62. res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2);
  63. if (excess_cached_nats(sbi))
  64. res = false;
  65. } else if (type == DIRTY_DENTS) {
  66. if (sbi->sb->s_bdi->wb.dirty_exceeded)
  67. return false;
  68. mem_size = get_pages(sbi, F2FS_DIRTY_DENTS);
  69. res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1);
  70. } else if (type == INO_ENTRIES) {
  71. int i;
  72. for (i = 0; i < MAX_INO_ENTRY; i++)
  73. mem_size += sbi->im[i].ino_num *
  74. sizeof(struct ino_entry);
  75. mem_size >>= PAGE_SHIFT;
  76. res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1);
  77. } else if (type == EXTENT_CACHE) {
  78. mem_size = (atomic_read(&sbi->total_ext_tree) *
  79. sizeof(struct extent_tree) +
  80. atomic_read(&sbi->total_ext_node) *
  81. sizeof(struct extent_node)) >> PAGE_SHIFT;
  82. res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1);
  83. } else if (type == INMEM_PAGES) {
  84. /* it allows 20% / total_ram for inmemory pages */
  85. mem_size = get_pages(sbi, F2FS_INMEM_PAGES);
  86. res = mem_size < (val.totalram / 5);
  87. } else {
  88. if (!sbi->sb->s_bdi->wb.dirty_exceeded)
  89. return true;
  90. }
  91. return res;
  92. }
  93. static void clear_node_page_dirty(struct page *page)
  94. {
  95. if (PageDirty(page)) {
  96. f2fs_clear_radix_tree_dirty_tag(page);
  97. clear_page_dirty_for_io(page);
  98. dec_page_count(F2FS_P_SB(page), F2FS_DIRTY_NODES);
  99. }
  100. ClearPageUptodate(page);
  101. }
  102. static struct page *get_current_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
  103. {
  104. pgoff_t index = current_nat_addr(sbi, nid);
  105. return f2fs_get_meta_page(sbi, index);
  106. }
  107. static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
  108. {
  109. struct page *src_page;
  110. struct page *dst_page;
  111. pgoff_t src_off;
  112. pgoff_t dst_off;
  113. void *src_addr;
  114. void *dst_addr;
  115. struct f2fs_nm_info *nm_i = NM_I(sbi);
  116. src_off = current_nat_addr(sbi, nid);
  117. dst_off = next_nat_addr(sbi, src_off);
  118. /* get current nat block page with lock */
  119. src_page = f2fs_get_meta_page(sbi, src_off);
  120. dst_page = f2fs_grab_meta_page(sbi, dst_off);
  121. f2fs_bug_on(sbi, PageDirty(src_page));
  122. src_addr = page_address(src_page);
  123. dst_addr = page_address(dst_page);
  124. memcpy(dst_addr, src_addr, PAGE_SIZE);
  125. set_page_dirty(dst_page);
  126. f2fs_put_page(src_page, 1);
  127. set_to_next_nat(nm_i, nid);
  128. return dst_page;
  129. }
  130. static struct nat_entry *__alloc_nat_entry(nid_t nid, bool no_fail)
  131. {
  132. struct nat_entry *new;
  133. if (no_fail)
  134. new = f2fs_kmem_cache_alloc(nat_entry_slab, GFP_F2FS_ZERO);
  135. else
  136. new = kmem_cache_alloc(nat_entry_slab, GFP_F2FS_ZERO);
  137. if (new) {
  138. nat_set_nid(new, nid);
  139. nat_reset_flag(new);
  140. }
  141. return new;
  142. }
  143. static void __free_nat_entry(struct nat_entry *e)
  144. {
  145. kmem_cache_free(nat_entry_slab, e);
  146. }
  147. /* must be locked by nat_tree_lock */
  148. static struct nat_entry *__init_nat_entry(struct f2fs_nm_info *nm_i,
  149. struct nat_entry *ne, struct f2fs_nat_entry *raw_ne, bool no_fail)
  150. {
  151. if (no_fail)
  152. f2fs_radix_tree_insert(&nm_i->nat_root, nat_get_nid(ne), ne);
  153. else if (radix_tree_insert(&nm_i->nat_root, nat_get_nid(ne), ne))
  154. return NULL;
  155. if (raw_ne)
  156. node_info_from_raw_nat(&ne->ni, raw_ne);
  157. list_add_tail(&ne->list, &nm_i->nat_entries);
  158. nm_i->nat_cnt++;
  159. return ne;
  160. }
  161. static struct nat_entry *__lookup_nat_cache(struct f2fs_nm_info *nm_i, nid_t n)
  162. {
  163. return radix_tree_lookup(&nm_i->nat_root, n);
  164. }
  165. static unsigned int __gang_lookup_nat_cache(struct f2fs_nm_info *nm_i,
  166. nid_t start, unsigned int nr, struct nat_entry **ep)
  167. {
  168. return radix_tree_gang_lookup(&nm_i->nat_root, (void **)ep, start, nr);
  169. }
  170. static void __del_from_nat_cache(struct f2fs_nm_info *nm_i, struct nat_entry *e)
  171. {
  172. list_del(&e->list);
  173. radix_tree_delete(&nm_i->nat_root, nat_get_nid(e));
  174. nm_i->nat_cnt--;
  175. __free_nat_entry(e);
  176. }
  177. static struct nat_entry_set *__grab_nat_entry_set(struct f2fs_nm_info *nm_i,
  178. struct nat_entry *ne)
  179. {
  180. nid_t set = NAT_BLOCK_OFFSET(ne->ni.nid);
  181. struct nat_entry_set *head;
  182. head = radix_tree_lookup(&nm_i->nat_set_root, set);
  183. if (!head) {
  184. head = f2fs_kmem_cache_alloc(nat_entry_set_slab, GFP_NOFS);
  185. INIT_LIST_HEAD(&head->entry_list);
  186. INIT_LIST_HEAD(&head->set_list);
  187. head->set = set;
  188. head->entry_cnt = 0;
  189. f2fs_radix_tree_insert(&nm_i->nat_set_root, set, head);
  190. }
  191. return head;
  192. }
  193. static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i,
  194. struct nat_entry *ne)
  195. {
  196. struct nat_entry_set *head;
  197. bool new_ne = nat_get_blkaddr(ne) == NEW_ADDR;
  198. if (!new_ne)
  199. head = __grab_nat_entry_set(nm_i, ne);
  200. /*
  201. * update entry_cnt in below condition:
  202. * 1. update NEW_ADDR to valid block address;
  203. * 2. update old block address to new one;
  204. */
  205. if (!new_ne && (get_nat_flag(ne, IS_PREALLOC) ||
  206. !get_nat_flag(ne, IS_DIRTY)))
  207. head->entry_cnt++;
  208. set_nat_flag(ne, IS_PREALLOC, new_ne);
  209. if (get_nat_flag(ne, IS_DIRTY))
  210. goto refresh_list;
  211. nm_i->dirty_nat_cnt++;
  212. set_nat_flag(ne, IS_DIRTY, true);
  213. refresh_list:
  214. if (new_ne)
  215. list_del_init(&ne->list);
  216. else
  217. list_move_tail(&ne->list, &head->entry_list);
  218. }
  219. static void __clear_nat_cache_dirty(struct f2fs_nm_info *nm_i,
  220. struct nat_entry_set *set, struct nat_entry *ne)
  221. {
  222. list_move_tail(&ne->list, &nm_i->nat_entries);
  223. set_nat_flag(ne, IS_DIRTY, false);
  224. set->entry_cnt--;
  225. nm_i->dirty_nat_cnt--;
  226. }
  227. static unsigned int __gang_lookup_nat_set(struct f2fs_nm_info *nm_i,
  228. nid_t start, unsigned int nr, struct nat_entry_set **ep)
  229. {
  230. return radix_tree_gang_lookup(&nm_i->nat_set_root, (void **)ep,
  231. start, nr);
  232. }
  233. int f2fs_need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid)
  234. {
  235. struct f2fs_nm_info *nm_i = NM_I(sbi);
  236. struct nat_entry *e;
  237. bool need = false;
  238. down_read(&nm_i->nat_tree_lock);
  239. e = __lookup_nat_cache(nm_i, nid);
  240. if (e) {
  241. if (!get_nat_flag(e, IS_CHECKPOINTED) &&
  242. !get_nat_flag(e, HAS_FSYNCED_INODE))
  243. need = true;
  244. }
  245. up_read(&nm_i->nat_tree_lock);
  246. return need;
  247. }
  248. bool f2fs_is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid)
  249. {
  250. struct f2fs_nm_info *nm_i = NM_I(sbi);
  251. struct nat_entry *e;
  252. bool is_cp = true;
  253. down_read(&nm_i->nat_tree_lock);
  254. e = __lookup_nat_cache(nm_i, nid);
  255. if (e && !get_nat_flag(e, IS_CHECKPOINTED))
  256. is_cp = false;
  257. up_read(&nm_i->nat_tree_lock);
  258. return is_cp;
  259. }
  260. bool f2fs_need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino)
  261. {
  262. struct f2fs_nm_info *nm_i = NM_I(sbi);
  263. struct nat_entry *e;
  264. bool need_update = true;
  265. down_read(&nm_i->nat_tree_lock);
  266. e = __lookup_nat_cache(nm_i, ino);
  267. if (e && get_nat_flag(e, HAS_LAST_FSYNC) &&
  268. (get_nat_flag(e, IS_CHECKPOINTED) ||
  269. get_nat_flag(e, HAS_FSYNCED_INODE)))
  270. need_update = false;
  271. up_read(&nm_i->nat_tree_lock);
  272. return need_update;
  273. }
  274. /* must be locked by nat_tree_lock */
  275. static void cache_nat_entry(struct f2fs_sb_info *sbi, nid_t nid,
  276. struct f2fs_nat_entry *ne)
  277. {
  278. struct f2fs_nm_info *nm_i = NM_I(sbi);
  279. struct nat_entry *new, *e;
  280. new = __alloc_nat_entry(nid, false);
  281. if (!new)
  282. return;
  283. down_write(&nm_i->nat_tree_lock);
  284. e = __lookup_nat_cache(nm_i, nid);
  285. if (!e)
  286. e = __init_nat_entry(nm_i, new, ne, false);
  287. else
  288. f2fs_bug_on(sbi, nat_get_ino(e) != le32_to_cpu(ne->ino) ||
  289. nat_get_blkaddr(e) !=
  290. le32_to_cpu(ne->block_addr) ||
  291. nat_get_version(e) != ne->version);
  292. up_write(&nm_i->nat_tree_lock);
  293. if (e != new)
  294. __free_nat_entry(new);
  295. }
  296. static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
  297. block_t new_blkaddr, bool fsync_done)
  298. {
  299. struct f2fs_nm_info *nm_i = NM_I(sbi);
  300. struct nat_entry *e;
  301. struct nat_entry *new = __alloc_nat_entry(ni->nid, true);
  302. down_write(&nm_i->nat_tree_lock);
  303. e = __lookup_nat_cache(nm_i, ni->nid);
  304. if (!e) {
  305. e = __init_nat_entry(nm_i, new, NULL, true);
  306. copy_node_info(&e->ni, ni);
  307. f2fs_bug_on(sbi, ni->blk_addr == NEW_ADDR);
  308. } else if (new_blkaddr == NEW_ADDR) {
  309. /*
  310. * when nid is reallocated,
  311. * previous nat entry can be remained in nat cache.
  312. * So, reinitialize it with new information.
  313. */
  314. copy_node_info(&e->ni, ni);
  315. f2fs_bug_on(sbi, ni->blk_addr != NULL_ADDR);
  316. }
  317. /* let's free early to reduce memory consumption */
  318. if (e != new)
  319. __free_nat_entry(new);
  320. /* sanity check */
  321. f2fs_bug_on(sbi, nat_get_blkaddr(e) != ni->blk_addr);
  322. f2fs_bug_on(sbi, nat_get_blkaddr(e) == NULL_ADDR &&
  323. new_blkaddr == NULL_ADDR);
  324. f2fs_bug_on(sbi, nat_get_blkaddr(e) == NEW_ADDR &&
  325. new_blkaddr == NEW_ADDR);
  326. f2fs_bug_on(sbi, is_valid_data_blkaddr(sbi, nat_get_blkaddr(e)) &&
  327. new_blkaddr == NEW_ADDR);
  328. /* increment version no as node is removed */
  329. if (nat_get_blkaddr(e) != NEW_ADDR && new_blkaddr == NULL_ADDR) {
  330. unsigned char version = nat_get_version(e);
  331. nat_set_version(e, inc_node_version(version));
  332. }
  333. /* change address */
  334. nat_set_blkaddr(e, new_blkaddr);
  335. if (!is_valid_data_blkaddr(sbi, new_blkaddr))
  336. set_nat_flag(e, IS_CHECKPOINTED, false);
  337. __set_nat_cache_dirty(nm_i, e);
  338. /* update fsync_mark if its inode nat entry is still alive */
  339. if (ni->nid != ni->ino)
  340. e = __lookup_nat_cache(nm_i, ni->ino);
  341. if (e) {
  342. if (fsync_done && ni->nid == ni->ino)
  343. set_nat_flag(e, HAS_FSYNCED_INODE, true);
  344. set_nat_flag(e, HAS_LAST_FSYNC, fsync_done);
  345. }
  346. up_write(&nm_i->nat_tree_lock);
  347. }
  348. int f2fs_try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
  349. {
  350. struct f2fs_nm_info *nm_i = NM_I(sbi);
  351. int nr = nr_shrink;
  352. if (!down_write_trylock(&nm_i->nat_tree_lock))
  353. return 0;
  354. while (nr_shrink && !list_empty(&nm_i->nat_entries)) {
  355. struct nat_entry *ne;
  356. ne = list_first_entry(&nm_i->nat_entries,
  357. struct nat_entry, list);
  358. __del_from_nat_cache(nm_i, ne);
  359. nr_shrink--;
  360. }
  361. up_write(&nm_i->nat_tree_lock);
  362. return nr - nr_shrink;
  363. }
  364. /*
  365. * This function always returns success
  366. */
  367. void f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
  368. struct node_info *ni)
  369. {
  370. struct f2fs_nm_info *nm_i = NM_I(sbi);
  371. struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
  372. struct f2fs_journal *journal = curseg->journal;
  373. nid_t start_nid = START_NID(nid);
  374. struct f2fs_nat_block *nat_blk;
  375. struct page *page = NULL;
  376. struct f2fs_nat_entry ne;
  377. struct nat_entry *e;
  378. pgoff_t index;
  379. int i;
  380. ni->nid = nid;
  381. /* Check nat cache */
  382. down_read(&nm_i->nat_tree_lock);
  383. e = __lookup_nat_cache(nm_i, nid);
  384. if (e) {
  385. ni->ino = nat_get_ino(e);
  386. ni->blk_addr = nat_get_blkaddr(e);
  387. ni->version = nat_get_version(e);
  388. up_read(&nm_i->nat_tree_lock);
  389. return;
  390. }
  391. memset(&ne, 0, sizeof(struct f2fs_nat_entry));
  392. /* Check current segment summary */
  393. down_read(&curseg->journal_rwsem);
  394. i = f2fs_lookup_journal_in_cursum(journal, NAT_JOURNAL, nid, 0);
  395. if (i >= 0) {
  396. ne = nat_in_journal(journal, i);
  397. node_info_from_raw_nat(ni, &ne);
  398. }
  399. up_read(&curseg->journal_rwsem);
  400. if (i >= 0) {
  401. up_read(&nm_i->nat_tree_lock);
  402. goto cache;
  403. }
  404. /* Fill node_info from nat page */
  405. index = current_nat_addr(sbi, nid);
  406. up_read(&nm_i->nat_tree_lock);
  407. page = f2fs_get_meta_page(sbi, index);
  408. nat_blk = (struct f2fs_nat_block *)page_address(page);
  409. ne = nat_blk->entries[nid - start_nid];
  410. node_info_from_raw_nat(ni, &ne);
  411. f2fs_put_page(page, 1);
  412. cache:
  413. /* cache nat entry */
  414. cache_nat_entry(sbi, nid, &ne);
  415. }
  416. /*
  417. * readahead MAX_RA_NODE number of node pages.
  418. */
  419. static void f2fs_ra_node_pages(struct page *parent, int start, int n)
  420. {
  421. struct f2fs_sb_info *sbi = F2FS_P_SB(parent);
  422. struct blk_plug plug;
  423. int i, end;
  424. nid_t nid;
  425. blk_start_plug(&plug);
  426. /* Then, try readahead for siblings of the desired node */
  427. end = start + n;
  428. end = min(end, NIDS_PER_BLOCK);
  429. for (i = start; i < end; i++) {
  430. nid = get_nid(parent, i, false);
  431. f2fs_ra_node_page(sbi, nid);
  432. }
  433. blk_finish_plug(&plug);
  434. }
  435. pgoff_t f2fs_get_next_page_offset(struct dnode_of_data *dn, pgoff_t pgofs)
  436. {
  437. const long direct_index = ADDRS_PER_INODE(dn->inode);
  438. const long direct_blks = ADDRS_PER_BLOCK;
  439. const long indirect_blks = ADDRS_PER_BLOCK * NIDS_PER_BLOCK;
  440. unsigned int skipped_unit = ADDRS_PER_BLOCK;
  441. int cur_level = dn->cur_level;
  442. int max_level = dn->max_level;
  443. pgoff_t base = 0;
  444. if (!dn->max_level)
  445. return pgofs + 1;
  446. while (max_level-- > cur_level)
  447. skipped_unit *= NIDS_PER_BLOCK;
  448. switch (dn->max_level) {
  449. case 3:
  450. base += 2 * indirect_blks;
  451. case 2:
  452. base += 2 * direct_blks;
  453. case 1:
  454. base += direct_index;
  455. break;
  456. default:
  457. f2fs_bug_on(F2FS_I_SB(dn->inode), 1);
  458. }
  459. return ((pgofs - base) / skipped_unit + 1) * skipped_unit + base;
  460. }
  461. /*
  462. * The maximum depth is four.
  463. * Offset[0] will have raw inode offset.
  464. */
  465. static int get_node_path(struct inode *inode, long block,
  466. int offset[4], unsigned int noffset[4])
  467. {
  468. const long direct_index = ADDRS_PER_INODE(inode);
  469. const long direct_blks = ADDRS_PER_BLOCK;
  470. const long dptrs_per_blk = NIDS_PER_BLOCK;
  471. const long indirect_blks = ADDRS_PER_BLOCK * NIDS_PER_BLOCK;
  472. const long dindirect_blks = indirect_blks * NIDS_PER_BLOCK;
  473. int n = 0;
  474. int level = 0;
  475. noffset[0] = 0;
  476. if (block < direct_index) {
  477. offset[n] = block;
  478. goto got;
  479. }
  480. block -= direct_index;
  481. if (block < direct_blks) {
  482. offset[n++] = NODE_DIR1_BLOCK;
  483. noffset[n] = 1;
  484. offset[n] = block;
  485. level = 1;
  486. goto got;
  487. }
  488. block -= direct_blks;
  489. if (block < direct_blks) {
  490. offset[n++] = NODE_DIR2_BLOCK;
  491. noffset[n] = 2;
  492. offset[n] = block;
  493. level = 1;
  494. goto got;
  495. }
  496. block -= direct_blks;
  497. if (block < indirect_blks) {
  498. offset[n++] = NODE_IND1_BLOCK;
  499. noffset[n] = 3;
  500. offset[n++] = block / direct_blks;
  501. noffset[n] = 4 + offset[n - 1];
  502. offset[n] = block % direct_blks;
  503. level = 2;
  504. goto got;
  505. }
  506. block -= indirect_blks;
  507. if (block < indirect_blks) {
  508. offset[n++] = NODE_IND2_BLOCK;
  509. noffset[n] = 4 + dptrs_per_blk;
  510. offset[n++] = block / direct_blks;
  511. noffset[n] = 5 + dptrs_per_blk + offset[n - 1];
  512. offset[n] = block % direct_blks;
  513. level = 2;
  514. goto got;
  515. }
  516. block -= indirect_blks;
  517. if (block < dindirect_blks) {
  518. offset[n++] = NODE_DIND_BLOCK;
  519. noffset[n] = 5 + (dptrs_per_blk * 2);
  520. offset[n++] = block / indirect_blks;
  521. noffset[n] = 6 + (dptrs_per_blk * 2) +
  522. offset[n - 1] * (dptrs_per_blk + 1);
  523. offset[n++] = (block / direct_blks) % dptrs_per_blk;
  524. noffset[n] = 7 + (dptrs_per_blk * 2) +
  525. offset[n - 2] * (dptrs_per_blk + 1) +
  526. offset[n - 1];
  527. offset[n] = block % direct_blks;
  528. level = 3;
  529. goto got;
  530. } else {
  531. return -E2BIG;
  532. }
  533. got:
  534. return level;
  535. }
  536. /*
  537. * Caller should call f2fs_put_dnode(dn).
  538. * Also, it should grab and release a rwsem by calling f2fs_lock_op() and
  539. * f2fs_unlock_op() only if ro is not set RDONLY_NODE.
  540. * In the case of RDONLY_NODE, we don't need to care about mutex.
  541. */
  542. int f2fs_get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode)
  543. {
  544. struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
  545. struct page *npage[4];
  546. struct page *parent = NULL;
  547. int offset[4];
  548. unsigned int noffset[4];
  549. nid_t nids[4];
  550. int level, i = 0;
  551. int err = 0;
  552. level = get_node_path(dn->inode, index, offset, noffset);
  553. if (level < 0)
  554. return level;
  555. nids[0] = dn->inode->i_ino;
  556. npage[0] = dn->inode_page;
  557. if (!npage[0]) {
  558. npage[0] = f2fs_get_node_page(sbi, nids[0]);
  559. if (IS_ERR(npage[0]))
  560. return PTR_ERR(npage[0]);
  561. }
  562. /* if inline_data is set, should not report any block indices */
  563. if (f2fs_has_inline_data(dn->inode) && index) {
  564. err = -ENOENT;
  565. f2fs_put_page(npage[0], 1);
  566. goto release_out;
  567. }
  568. parent = npage[0];
  569. if (level != 0)
  570. nids[1] = get_nid(parent, offset[0], true);
  571. dn->inode_page = npage[0];
  572. dn->inode_page_locked = true;
  573. /* get indirect or direct nodes */
  574. for (i = 1; i <= level; i++) {
  575. bool done = false;
  576. if (!nids[i] && mode == ALLOC_NODE) {
  577. /* alloc new node */
  578. if (!f2fs_alloc_nid(sbi, &(nids[i]))) {
  579. err = -ENOSPC;
  580. goto release_pages;
  581. }
  582. dn->nid = nids[i];
  583. npage[i] = f2fs_new_node_page(dn, noffset[i]);
  584. if (IS_ERR(npage[i])) {
  585. f2fs_alloc_nid_failed(sbi, nids[i]);
  586. err = PTR_ERR(npage[i]);
  587. goto release_pages;
  588. }
  589. set_nid(parent, offset[i - 1], nids[i], i == 1);
  590. f2fs_alloc_nid_done(sbi, nids[i]);
  591. done = true;
  592. } else if (mode == LOOKUP_NODE_RA && i == level && level > 1) {
  593. npage[i] = f2fs_get_node_page_ra(parent, offset[i - 1]);
  594. if (IS_ERR(npage[i])) {
  595. err = PTR_ERR(npage[i]);
  596. goto release_pages;
  597. }
  598. done = true;
  599. }
  600. if (i == 1) {
  601. dn->inode_page_locked = false;
  602. unlock_page(parent);
  603. } else {
  604. f2fs_put_page(parent, 1);
  605. }
  606. if (!done) {
  607. npage[i] = f2fs_get_node_page(sbi, nids[i]);
  608. if (IS_ERR(npage[i])) {
  609. err = PTR_ERR(npage[i]);
  610. f2fs_put_page(npage[0], 0);
  611. goto release_out;
  612. }
  613. }
  614. if (i < level) {
  615. parent = npage[i];
  616. nids[i + 1] = get_nid(parent, offset[i], false);
  617. }
  618. }
  619. dn->nid = nids[level];
  620. dn->ofs_in_node = offset[level];
  621. dn->node_page = npage[level];
  622. dn->data_blkaddr = datablock_addr(dn->inode,
  623. dn->node_page, dn->ofs_in_node);
  624. return 0;
  625. release_pages:
  626. f2fs_put_page(parent, 1);
  627. if (i > 1)
  628. f2fs_put_page(npage[0], 0);
  629. release_out:
  630. dn->inode_page = NULL;
  631. dn->node_page = NULL;
  632. if (err == -ENOENT) {
  633. dn->cur_level = i;
  634. dn->max_level = level;
  635. dn->ofs_in_node = offset[level];
  636. }
  637. return err;
  638. }
  639. static void truncate_node(struct dnode_of_data *dn)
  640. {
  641. struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
  642. struct node_info ni;
  643. f2fs_get_node_info(sbi, dn->nid, &ni);
  644. /* Deallocate node address */
  645. f2fs_invalidate_blocks(sbi, ni.blk_addr);
  646. dec_valid_node_count(sbi, dn->inode, dn->nid == dn->inode->i_ino);
  647. set_node_addr(sbi, &ni, NULL_ADDR, false);
  648. if (dn->nid == dn->inode->i_ino) {
  649. f2fs_remove_orphan_inode(sbi, dn->nid);
  650. dec_valid_inode_count(sbi);
  651. f2fs_inode_synced(dn->inode);
  652. }
  653. clear_node_page_dirty(dn->node_page);
  654. set_sbi_flag(sbi, SBI_IS_DIRTY);
  655. f2fs_put_page(dn->node_page, 1);
  656. invalidate_mapping_pages(NODE_MAPPING(sbi),
  657. dn->node_page->index, dn->node_page->index);
  658. dn->node_page = NULL;
  659. trace_f2fs_truncate_node(dn->inode, dn->nid, ni.blk_addr);
  660. }
  661. static int truncate_dnode(struct dnode_of_data *dn)
  662. {
  663. struct page *page;
  664. if (dn->nid == 0)
  665. return 1;
  666. /* get direct node */
  667. page = f2fs_get_node_page(F2FS_I_SB(dn->inode), dn->nid);
  668. if (IS_ERR(page) && PTR_ERR(page) == -ENOENT)
  669. return 1;
  670. else if (IS_ERR(page))
  671. return PTR_ERR(page);
  672. /* Make dnode_of_data for parameter */
  673. dn->node_page = page;
  674. dn->ofs_in_node = 0;
  675. f2fs_truncate_data_blocks(dn);
  676. truncate_node(dn);
  677. return 1;
  678. }
  679. static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs,
  680. int ofs, int depth)
  681. {
  682. struct dnode_of_data rdn = *dn;
  683. struct page *page;
  684. struct f2fs_node *rn;
  685. nid_t child_nid;
  686. unsigned int child_nofs;
  687. int freed = 0;
  688. int i, ret;
  689. if (dn->nid == 0)
  690. return NIDS_PER_BLOCK + 1;
  691. trace_f2fs_truncate_nodes_enter(dn->inode, dn->nid, dn->data_blkaddr);
  692. page = f2fs_get_node_page(F2FS_I_SB(dn->inode), dn->nid);
  693. if (IS_ERR(page)) {
  694. trace_f2fs_truncate_nodes_exit(dn->inode, PTR_ERR(page));
  695. return PTR_ERR(page);
  696. }
  697. f2fs_ra_node_pages(page, ofs, NIDS_PER_BLOCK);
  698. rn = F2FS_NODE(page);
  699. if (depth < 3) {
  700. for (i = ofs; i < NIDS_PER_BLOCK; i++, freed++) {
  701. child_nid = le32_to_cpu(rn->in.nid[i]);
  702. if (child_nid == 0)
  703. continue;
  704. rdn.nid = child_nid;
  705. ret = truncate_dnode(&rdn);
  706. if (ret < 0)
  707. goto out_err;
  708. if (set_nid(page, i, 0, false))
  709. dn->node_changed = true;
  710. }
  711. } else {
  712. child_nofs = nofs + ofs * (NIDS_PER_BLOCK + 1) + 1;
  713. for (i = ofs; i < NIDS_PER_BLOCK; i++) {
  714. child_nid = le32_to_cpu(rn->in.nid[i]);
  715. if (child_nid == 0) {
  716. child_nofs += NIDS_PER_BLOCK + 1;
  717. continue;
  718. }
  719. rdn.nid = child_nid;
  720. ret = truncate_nodes(&rdn, child_nofs, 0, depth - 1);
  721. if (ret == (NIDS_PER_BLOCK + 1)) {
  722. if (set_nid(page, i, 0, false))
  723. dn->node_changed = true;
  724. child_nofs += ret;
  725. } else if (ret < 0 && ret != -ENOENT) {
  726. goto out_err;
  727. }
  728. }
  729. freed = child_nofs;
  730. }
  731. if (!ofs) {
  732. /* remove current indirect node */
  733. dn->node_page = page;
  734. truncate_node(dn);
  735. freed++;
  736. } else {
  737. f2fs_put_page(page, 1);
  738. }
  739. trace_f2fs_truncate_nodes_exit(dn->inode, freed);
  740. return freed;
  741. out_err:
  742. f2fs_put_page(page, 1);
  743. trace_f2fs_truncate_nodes_exit(dn->inode, ret);
  744. return ret;
  745. }
  746. static int truncate_partial_nodes(struct dnode_of_data *dn,
  747. struct f2fs_inode *ri, int *offset, int depth)
  748. {
  749. struct page *pages[2];
  750. nid_t nid[3];
  751. nid_t child_nid;
  752. int err = 0;
  753. int i;
  754. int idx = depth - 2;
  755. nid[0] = le32_to_cpu(ri->i_nid[offset[0] - NODE_DIR1_BLOCK]);
  756. if (!nid[0])
  757. return 0;
  758. /* get indirect nodes in the path */
  759. for (i = 0; i < idx + 1; i++) {
  760. /* reference count'll be increased */
  761. pages[i] = f2fs_get_node_page(F2FS_I_SB(dn->inode), nid[i]);
  762. if (IS_ERR(pages[i])) {
  763. err = PTR_ERR(pages[i]);
  764. idx = i - 1;
  765. goto fail;
  766. }
  767. nid[i + 1] = get_nid(pages[i], offset[i + 1], false);
  768. }
  769. f2fs_ra_node_pages(pages[idx], offset[idx + 1], NIDS_PER_BLOCK);
  770. /* free direct nodes linked to a partial indirect node */
  771. for (i = offset[idx + 1]; i < NIDS_PER_BLOCK; i++) {
  772. child_nid = get_nid(pages[idx], i, false);
  773. if (!child_nid)
  774. continue;
  775. dn->nid = child_nid;
  776. err = truncate_dnode(dn);
  777. if (err < 0)
  778. goto fail;
  779. if (set_nid(pages[idx], i, 0, false))
  780. dn->node_changed = true;
  781. }
  782. if (offset[idx + 1] == 0) {
  783. dn->node_page = pages[idx];
  784. dn->nid = nid[idx];
  785. truncate_node(dn);
  786. } else {
  787. f2fs_put_page(pages[idx], 1);
  788. }
  789. offset[idx]++;
  790. offset[idx + 1] = 0;
  791. idx--;
  792. fail:
  793. for (i = idx; i >= 0; i--)
  794. f2fs_put_page(pages[i], 1);
  795. trace_f2fs_truncate_partial_nodes(dn->inode, nid, depth, err);
  796. return err;
  797. }
  798. /*
  799. * All the block addresses of data and nodes should be nullified.
  800. */
  801. int f2fs_truncate_inode_blocks(struct inode *inode, pgoff_t from)
  802. {
  803. struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  804. int err = 0, cont = 1;
  805. int level, offset[4], noffset[4];
  806. unsigned int nofs = 0;
  807. struct f2fs_inode *ri;
  808. struct dnode_of_data dn;
  809. struct page *page;
  810. trace_f2fs_truncate_inode_blocks_enter(inode, from);
  811. level = get_node_path(inode, from, offset, noffset);
  812. if (level < 0)
  813. return level;
  814. page = f2fs_get_node_page(sbi, inode->i_ino);
  815. if (IS_ERR(page)) {
  816. trace_f2fs_truncate_inode_blocks_exit(inode, PTR_ERR(page));
  817. return PTR_ERR(page);
  818. }
  819. set_new_dnode(&dn, inode, page, NULL, 0);
  820. unlock_page(page);
  821. ri = F2FS_INODE(page);
  822. switch (level) {
  823. case 0:
  824. case 1:
  825. nofs = noffset[1];
  826. break;
  827. case 2:
  828. nofs = noffset[1];
  829. if (!offset[level - 1])
  830. goto skip_partial;
  831. err = truncate_partial_nodes(&dn, ri, offset, level);
  832. if (err < 0 && err != -ENOENT)
  833. goto fail;
  834. nofs += 1 + NIDS_PER_BLOCK;
  835. break;
  836. case 3:
  837. nofs = 5 + 2 * NIDS_PER_BLOCK;
  838. if (!offset[level - 1])
  839. goto skip_partial;
  840. err = truncate_partial_nodes(&dn, ri, offset, level);
  841. if (err < 0 && err != -ENOENT)
  842. goto fail;
  843. break;
  844. default:
  845. BUG();
  846. }
  847. skip_partial:
  848. while (cont) {
  849. dn.nid = le32_to_cpu(ri->i_nid[offset[0] - NODE_DIR1_BLOCK]);
  850. switch (offset[0]) {
  851. case NODE_DIR1_BLOCK:
  852. case NODE_DIR2_BLOCK:
  853. err = truncate_dnode(&dn);
  854. break;
  855. case NODE_IND1_BLOCK:
  856. case NODE_IND2_BLOCK:
  857. err = truncate_nodes(&dn, nofs, offset[1], 2);
  858. break;
  859. case NODE_DIND_BLOCK:
  860. err = truncate_nodes(&dn, nofs, offset[1], 3);
  861. cont = 0;
  862. break;
  863. default:
  864. BUG();
  865. }
  866. if (err < 0 && err != -ENOENT)
  867. goto fail;
  868. if (offset[1] == 0 &&
  869. ri->i_nid[offset[0] - NODE_DIR1_BLOCK]) {
  870. lock_page(page);
  871. BUG_ON(page->mapping != NODE_MAPPING(sbi));
  872. f2fs_wait_on_page_writeback(page, NODE, true);
  873. ri->i_nid[offset[0] - NODE_DIR1_BLOCK] = 0;
  874. set_page_dirty(page);
  875. unlock_page(page);
  876. }
  877. offset[1] = 0;
  878. offset[0]++;
  879. nofs += err;
  880. }
  881. fail:
  882. f2fs_put_page(page, 0);
  883. trace_f2fs_truncate_inode_blocks_exit(inode, err);
  884. return err > 0 ? 0 : err;
  885. }
  886. /* caller must lock inode page */
  887. int f2fs_truncate_xattr_node(struct inode *inode)
  888. {
  889. struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  890. nid_t nid = F2FS_I(inode)->i_xattr_nid;
  891. struct dnode_of_data dn;
  892. struct page *npage;
  893. if (!nid)
  894. return 0;
  895. npage = f2fs_get_node_page(sbi, nid);
  896. if (IS_ERR(npage))
  897. return PTR_ERR(npage);
  898. f2fs_i_xnid_write(inode, 0);
  899. set_new_dnode(&dn, inode, NULL, npage, nid);
  900. truncate_node(&dn);
  901. return 0;
  902. }
  903. /*
  904. * Caller should grab and release a rwsem by calling f2fs_lock_op() and
  905. * f2fs_unlock_op().
  906. */
  907. int f2fs_remove_inode_page(struct inode *inode)
  908. {
  909. struct dnode_of_data dn;
  910. int err;
  911. set_new_dnode(&dn, inode, NULL, NULL, inode->i_ino);
  912. err = f2fs_get_dnode_of_data(&dn, 0, LOOKUP_NODE);
  913. if (err)
  914. return err;
  915. err = f2fs_truncate_xattr_node(inode);
  916. if (err) {
  917. f2fs_put_dnode(&dn);
  918. return err;
  919. }
  920. /* remove potential inline_data blocks */
  921. if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
  922. S_ISLNK(inode->i_mode))
  923. f2fs_truncate_data_blocks_range(&dn, 1);
  924. /* 0 is possible, after f2fs_new_inode() has failed */
  925. f2fs_bug_on(F2FS_I_SB(inode),
  926. inode->i_blocks != 0 && inode->i_blocks != 8);
  927. /* will put inode & node pages */
  928. truncate_node(&dn);
  929. return 0;
  930. }
  931. struct page *f2fs_new_inode_page(struct inode *inode)
  932. {
  933. struct dnode_of_data dn;
  934. /* allocate inode page for new inode */
  935. set_new_dnode(&dn, inode, NULL, NULL, inode->i_ino);
  936. /* caller should f2fs_put_page(page, 1); */
  937. return f2fs_new_node_page(&dn, 0);
  938. }
  939. struct page *f2fs_new_node_page(struct dnode_of_data *dn, unsigned int ofs)
  940. {
  941. struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
  942. struct node_info new_ni;
  943. struct page *page;
  944. int err;
  945. if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC)))
  946. return ERR_PTR(-EPERM);
  947. page = f2fs_grab_cache_page(NODE_MAPPING(sbi), dn->nid, false);
  948. if (!page)
  949. return ERR_PTR(-ENOMEM);
  950. if (unlikely((err = inc_valid_node_count(sbi, dn->inode, !ofs))))
  951. goto fail;
  952. #ifdef CONFIG_F2FS_CHECK_FS
  953. f2fs_get_node_info(sbi, dn->nid, &new_ni);
  954. f2fs_bug_on(sbi, new_ni.blk_addr != NULL_ADDR);
  955. #endif
  956. new_ni.nid = dn->nid;
  957. new_ni.ino = dn->inode->i_ino;
  958. new_ni.blk_addr = NULL_ADDR;
  959. new_ni.flag = 0;
  960. new_ni.version = 0;
  961. set_node_addr(sbi, &new_ni, NEW_ADDR, false);
  962. f2fs_wait_on_page_writeback(page, NODE, true);
  963. fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true);
  964. set_cold_node(page, S_ISDIR(dn->inode->i_mode));
  965. if (!PageUptodate(page))
  966. SetPageUptodate(page);
  967. if (set_page_dirty(page))
  968. dn->node_changed = true;
  969. if (f2fs_has_xattr_block(ofs))
  970. f2fs_i_xnid_write(dn->inode, dn->nid);
  971. if (ofs == 0)
  972. inc_valid_inode_count(sbi);
  973. return page;
  974. fail:
  975. clear_node_page_dirty(page);
  976. f2fs_put_page(page, 1);
  977. return ERR_PTR(err);
  978. }
  979. /*
  980. * Caller should do after getting the following values.
  981. * 0: f2fs_put_page(page, 0)
  982. * LOCKED_PAGE or error: f2fs_put_page(page, 1)
  983. */
  984. static int read_node_page(struct page *page, int op_flags)
  985. {
  986. struct f2fs_sb_info *sbi = F2FS_P_SB(page);
  987. struct node_info ni;
  988. struct f2fs_io_info fio = {
  989. .sbi = sbi,
  990. .type = NODE,
  991. .op = REQ_OP_READ,
  992. .op_flags = op_flags,
  993. .page = page,
  994. .encrypted_page = NULL,
  995. };
  996. if (PageUptodate(page)) {
  997. #ifdef CONFIG_F2FS_CHECK_FS
  998. f2fs_bug_on(sbi, !f2fs_inode_chksum_verify(sbi, page));
  999. #endif
  1000. return LOCKED_PAGE;
  1001. }
  1002. f2fs_get_node_info(sbi, page->index, &ni);
  1003. if (unlikely(ni.blk_addr == NULL_ADDR) ||
  1004. is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN)) {
  1005. ClearPageUptodate(page);
  1006. return -ENOENT;
  1007. }
  1008. fio.new_blkaddr = fio.old_blkaddr = ni.blk_addr;
  1009. return f2fs_submit_page_bio(&fio);
  1010. }
  1011. /*
  1012. * Readahead a node page
  1013. */
  1014. void f2fs_ra_node_page(struct f2fs_sb_info *sbi, nid_t nid)
  1015. {
  1016. struct page *apage;
  1017. int err;
  1018. if (!nid)
  1019. return;
  1020. if (f2fs_check_nid_range(sbi, nid))
  1021. return;
  1022. rcu_read_lock();
  1023. apage = radix_tree_lookup(&NODE_MAPPING(sbi)->i_pages, nid);
  1024. rcu_read_unlock();
  1025. if (apage)
  1026. return;
  1027. apage = f2fs_grab_cache_page(NODE_MAPPING(sbi), nid, false);
  1028. if (!apage)
  1029. return;
  1030. err = read_node_page(apage, REQ_RAHEAD);
  1031. f2fs_put_page(apage, err ? 1 : 0);
  1032. }
  1033. static struct page *__get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid,
  1034. struct page *parent, int start)
  1035. {
  1036. struct page *page;
  1037. int err;
  1038. if (!nid)
  1039. return ERR_PTR(-ENOENT);
  1040. if (f2fs_check_nid_range(sbi, nid))
  1041. return ERR_PTR(-EINVAL);
  1042. repeat:
  1043. page = f2fs_grab_cache_page(NODE_MAPPING(sbi), nid, false);
  1044. if (!page)
  1045. return ERR_PTR(-ENOMEM);
  1046. err = read_node_page(page, 0);
  1047. if (err < 0) {
  1048. f2fs_put_page(page, 1);
  1049. return ERR_PTR(err);
  1050. } else if (err == LOCKED_PAGE) {
  1051. err = 0;
  1052. goto page_hit;
  1053. }
  1054. if (parent)
  1055. f2fs_ra_node_pages(parent, start + 1, MAX_RA_NODE);
  1056. lock_page(page);
  1057. if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
  1058. f2fs_put_page(page, 1);
  1059. goto repeat;
  1060. }
  1061. if (unlikely(!PageUptodate(page))) {
  1062. err = -EIO;
  1063. goto out_err;
  1064. }
  1065. if (!f2fs_inode_chksum_verify(sbi, page)) {
  1066. err = -EBADMSG;
  1067. goto out_err;
  1068. }
  1069. page_hit:
  1070. if(unlikely(nid != nid_of_node(page))) {
  1071. f2fs_msg(sbi->sb, KERN_WARNING, "inconsistent node block, "
  1072. "nid:%lu, node_footer[nid:%u,ino:%u,ofs:%u,cpver:%llu,blkaddr:%u]",
  1073. nid, nid_of_node(page), ino_of_node(page),
  1074. ofs_of_node(page), cpver_of_node(page),
  1075. next_blkaddr_of_node(page));
  1076. err = -EINVAL;
  1077. out_err:
  1078. ClearPageUptodate(page);
  1079. f2fs_put_page(page, 1);
  1080. return ERR_PTR(err);
  1081. }
  1082. return page;
  1083. }
  1084. struct page *f2fs_get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid)
  1085. {
  1086. return __get_node_page(sbi, nid, NULL, 0);
  1087. }
  1088. struct page *f2fs_get_node_page_ra(struct page *parent, int start)
  1089. {
  1090. struct f2fs_sb_info *sbi = F2FS_P_SB(parent);
  1091. nid_t nid = get_nid(parent, start, false);
  1092. return __get_node_page(sbi, nid, parent, start);
  1093. }
  1094. static void flush_inline_data(struct f2fs_sb_info *sbi, nid_t ino)
  1095. {
  1096. struct inode *inode;
  1097. struct page *page;
  1098. int ret;
  1099. /* should flush inline_data before evict_inode */
  1100. inode = ilookup(sbi->sb, ino);
  1101. if (!inode)
  1102. return;
  1103. page = f2fs_pagecache_get_page(inode->i_mapping, 0,
  1104. FGP_LOCK|FGP_NOWAIT, 0);
  1105. if (!page)
  1106. goto iput_out;
  1107. if (!PageUptodate(page))
  1108. goto page_out;
  1109. if (!PageDirty(page))
  1110. goto page_out;
  1111. if (!clear_page_dirty_for_io(page))
  1112. goto page_out;
  1113. ret = f2fs_write_inline_data(inode, page);
  1114. inode_dec_dirty_pages(inode);
  1115. f2fs_remove_dirty_inode(inode);
  1116. if (ret)
  1117. set_page_dirty(page);
  1118. page_out:
  1119. f2fs_put_page(page, 1);
  1120. iput_out:
  1121. iput(inode);
  1122. }
  1123. static struct page *last_fsync_dnode(struct f2fs_sb_info *sbi, nid_t ino)
  1124. {
  1125. pgoff_t index;
  1126. struct pagevec pvec;
  1127. struct page *last_page = NULL;
  1128. int nr_pages;
  1129. pagevec_init(&pvec);
  1130. index = 0;
  1131. while ((nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index,
  1132. PAGECACHE_TAG_DIRTY))) {
  1133. int i;
  1134. for (i = 0; i < nr_pages; i++) {
  1135. struct page *page = pvec.pages[i];
  1136. if (unlikely(f2fs_cp_error(sbi))) {
  1137. f2fs_put_page(last_page, 0);
  1138. pagevec_release(&pvec);
  1139. return ERR_PTR(-EIO);
  1140. }
  1141. if (!IS_DNODE(page) || !is_cold_node(page))
  1142. continue;
  1143. if (ino_of_node(page) != ino)
  1144. continue;
  1145. lock_page(page);
  1146. if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
  1147. continue_unlock:
  1148. unlock_page(page);
  1149. continue;
  1150. }
  1151. if (ino_of_node(page) != ino)
  1152. goto continue_unlock;
  1153. if (!PageDirty(page)) {
  1154. /* someone wrote it for us */
  1155. goto continue_unlock;
  1156. }
  1157. if (last_page)
  1158. f2fs_put_page(last_page, 0);
  1159. get_page(page);
  1160. last_page = page;
  1161. unlock_page(page);
  1162. }
  1163. pagevec_release(&pvec);
  1164. cond_resched();
  1165. }
  1166. return last_page;
  1167. }
  1168. static int __write_node_page(struct page *page, bool atomic, bool *submitted,
  1169. struct writeback_control *wbc, bool do_balance,
  1170. enum iostat_type io_type)
  1171. {
  1172. struct f2fs_sb_info *sbi = F2FS_P_SB(page);
  1173. nid_t nid;
  1174. struct node_info ni;
  1175. struct f2fs_io_info fio = {
  1176. .sbi = sbi,
  1177. .ino = ino_of_node(page),
  1178. .type = NODE,
  1179. .op = REQ_OP_WRITE,
  1180. .op_flags = wbc_to_write_flags(wbc),
  1181. .page = page,
  1182. .encrypted_page = NULL,
  1183. .submitted = false,
  1184. .io_type = io_type,
  1185. .io_wbc = wbc,
  1186. };
  1187. trace_f2fs_writepage(page, NODE);
  1188. if (unlikely(f2fs_cp_error(sbi)))
  1189. goto redirty_out;
  1190. if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
  1191. goto redirty_out;
  1192. /* get old block addr of this node page */
  1193. nid = nid_of_node(page);
  1194. f2fs_bug_on(sbi, page->index != nid);
  1195. if (wbc->for_reclaim) {
  1196. if (!down_read_trylock(&sbi->node_write))
  1197. goto redirty_out;
  1198. } else {
  1199. down_read(&sbi->node_write);
  1200. }
  1201. f2fs_get_node_info(sbi, nid, &ni);
  1202. /* This page is already truncated */
  1203. if (unlikely(ni.blk_addr == NULL_ADDR)) {
  1204. ClearPageUptodate(page);
  1205. dec_page_count(sbi, F2FS_DIRTY_NODES);
  1206. up_read(&sbi->node_write);
  1207. unlock_page(page);
  1208. return 0;
  1209. }
  1210. if (atomic && !test_opt(sbi, NOBARRIER))
  1211. fio.op_flags |= REQ_PREFLUSH | REQ_FUA;
  1212. set_page_writeback(page);
  1213. ClearPageError(page);
  1214. fio.old_blkaddr = ni.blk_addr;
  1215. f2fs_do_write_node_page(nid, &fio);
  1216. set_node_addr(sbi, &ni, fio.new_blkaddr, is_fsync_dnode(page));
  1217. dec_page_count(sbi, F2FS_DIRTY_NODES);
  1218. up_read(&sbi->node_write);
  1219. if (wbc->for_reclaim) {
  1220. f2fs_submit_merged_write_cond(sbi, page->mapping->host, 0,
  1221. page->index, NODE);
  1222. submitted = NULL;
  1223. }
  1224. unlock_page(page);
  1225. if (unlikely(f2fs_cp_error(sbi))) {
  1226. f2fs_submit_merged_write(sbi, NODE);
  1227. submitted = NULL;
  1228. }
  1229. if (submitted)
  1230. *submitted = fio.submitted;
  1231. if (do_balance)
  1232. f2fs_balance_fs(sbi, false);
  1233. return 0;
  1234. redirty_out:
  1235. redirty_page_for_writepage(wbc, page);
  1236. return AOP_WRITEPAGE_ACTIVATE;
  1237. }
  1238. void f2fs_move_node_page(struct page *node_page, int gc_type)
  1239. {
  1240. if (gc_type == FG_GC) {
  1241. struct writeback_control wbc = {
  1242. .sync_mode = WB_SYNC_ALL,
  1243. .nr_to_write = 1,
  1244. .for_reclaim = 0,
  1245. };
  1246. set_page_dirty(node_page);
  1247. f2fs_wait_on_page_writeback(node_page, NODE, true);
  1248. f2fs_bug_on(F2FS_P_SB(node_page), PageWriteback(node_page));
  1249. if (!clear_page_dirty_for_io(node_page))
  1250. goto out_page;
  1251. if (__write_node_page(node_page, false, NULL,
  1252. &wbc, false, FS_GC_NODE_IO))
  1253. unlock_page(node_page);
  1254. goto release_page;
  1255. } else {
  1256. /* set page dirty and write it */
  1257. if (!PageWriteback(node_page))
  1258. set_page_dirty(node_page);
  1259. }
  1260. out_page:
  1261. unlock_page(node_page);
  1262. release_page:
  1263. f2fs_put_page(node_page, 0);
  1264. }
  1265. static int f2fs_write_node_page(struct page *page,
  1266. struct writeback_control *wbc)
  1267. {
  1268. return __write_node_page(page, false, NULL, wbc, false, FS_NODE_IO);
  1269. }
  1270. int f2fs_fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode,
  1271. struct writeback_control *wbc, bool atomic)
  1272. {
  1273. pgoff_t index;
  1274. pgoff_t last_idx = ULONG_MAX;
  1275. struct pagevec pvec;
  1276. int ret = 0;
  1277. struct page *last_page = NULL;
  1278. bool marked = false;
  1279. nid_t ino = inode->i_ino;
  1280. int nr_pages;
  1281. if (atomic) {
  1282. last_page = last_fsync_dnode(sbi, ino);
  1283. if (IS_ERR_OR_NULL(last_page))
  1284. return PTR_ERR_OR_ZERO(last_page);
  1285. }
  1286. retry:
  1287. pagevec_init(&pvec);
  1288. index = 0;
  1289. while ((nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index,
  1290. PAGECACHE_TAG_DIRTY))) {
  1291. int i;
  1292. for (i = 0; i < nr_pages; i++) {
  1293. struct page *page = pvec.pages[i];
  1294. bool submitted = false;
  1295. if (unlikely(f2fs_cp_error(sbi))) {
  1296. f2fs_put_page(last_page, 0);
  1297. pagevec_release(&pvec);
  1298. ret = -EIO;
  1299. goto out;
  1300. }
  1301. if (!IS_DNODE(page) || !is_cold_node(page))
  1302. continue;
  1303. if (ino_of_node(page) != ino)
  1304. continue;
  1305. lock_page(page);
  1306. if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
  1307. continue_unlock:
  1308. unlock_page(page);
  1309. continue;
  1310. }
  1311. if (ino_of_node(page) != ino)
  1312. goto continue_unlock;
  1313. if (!PageDirty(page) && page != last_page) {
  1314. /* someone wrote it for us */
  1315. goto continue_unlock;
  1316. }
  1317. f2fs_wait_on_page_writeback(page, NODE, true);
  1318. BUG_ON(PageWriteback(page));
  1319. set_fsync_mark(page, 0);
  1320. set_dentry_mark(page, 0);
  1321. if (!atomic || page == last_page) {
  1322. set_fsync_mark(page, 1);
  1323. if (IS_INODE(page)) {
  1324. if (is_inode_flag_set(inode,
  1325. FI_DIRTY_INODE))
  1326. f2fs_update_inode(inode, page);
  1327. set_dentry_mark(page,
  1328. f2fs_need_dentry_mark(sbi, ino));
  1329. }
  1330. /* may be written by other thread */
  1331. if (!PageDirty(page))
  1332. set_page_dirty(page);
  1333. }
  1334. if (!clear_page_dirty_for_io(page))
  1335. goto continue_unlock;
  1336. ret = __write_node_page(page, atomic &&
  1337. page == last_page,
  1338. &submitted, wbc, true,
  1339. FS_NODE_IO);
  1340. if (ret) {
  1341. unlock_page(page);
  1342. f2fs_put_page(last_page, 0);
  1343. break;
  1344. } else if (submitted) {
  1345. last_idx = page->index;
  1346. }
  1347. if (page == last_page) {
  1348. f2fs_put_page(page, 0);
  1349. marked = true;
  1350. break;
  1351. }
  1352. }
  1353. pagevec_release(&pvec);
  1354. cond_resched();
  1355. if (ret || marked)
  1356. break;
  1357. }
  1358. if (!ret && atomic && !marked) {
  1359. f2fs_msg(sbi->sb, KERN_DEBUG,
  1360. "Retry to write fsync mark: ino=%u, idx=%lx",
  1361. ino, last_page->index);
  1362. lock_page(last_page);
  1363. f2fs_wait_on_page_writeback(last_page, NODE, true);
  1364. set_page_dirty(last_page);
  1365. unlock_page(last_page);
  1366. goto retry;
  1367. }
  1368. out:
  1369. if (last_idx != ULONG_MAX)
  1370. f2fs_submit_merged_write_cond(sbi, NULL, ino, last_idx, NODE);
  1371. return ret ? -EIO: 0;
  1372. }
  1373. int f2fs_sync_node_pages(struct f2fs_sb_info *sbi,
  1374. struct writeback_control *wbc,
  1375. bool do_balance, enum iostat_type io_type)
  1376. {
  1377. pgoff_t index;
  1378. struct pagevec pvec;
  1379. int step = 0;
  1380. int nwritten = 0;
  1381. int ret = 0;
  1382. int nr_pages, done = 0;
  1383. pagevec_init(&pvec);
  1384. next_step:
  1385. index = 0;
  1386. while (!done && (nr_pages = pagevec_lookup_tag(&pvec,
  1387. NODE_MAPPING(sbi), &index, PAGECACHE_TAG_DIRTY))) {
  1388. int i;
  1389. for (i = 0; i < nr_pages; i++) {
  1390. struct page *page = pvec.pages[i];
  1391. bool submitted = false;
  1392. /* give a priority to WB_SYNC threads */
  1393. if (atomic_read(&sbi->wb_sync_req[NODE]) &&
  1394. wbc->sync_mode == WB_SYNC_NONE) {
  1395. done = 1;
  1396. break;
  1397. }
  1398. /*
  1399. * flushing sequence with step:
  1400. * 0. indirect nodes
  1401. * 1. dentry dnodes
  1402. * 2. file dnodes
  1403. */
  1404. if (step == 0 && IS_DNODE(page))
  1405. continue;
  1406. if (step == 1 && (!IS_DNODE(page) ||
  1407. is_cold_node(page)))
  1408. continue;
  1409. if (step == 2 && (!IS_DNODE(page) ||
  1410. !is_cold_node(page)))
  1411. continue;
  1412. lock_node:
  1413. if (wbc->sync_mode == WB_SYNC_ALL)
  1414. lock_page(page);
  1415. else if (!trylock_page(page))
  1416. continue;
  1417. if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
  1418. continue_unlock:
  1419. unlock_page(page);
  1420. continue;
  1421. }
  1422. if (!PageDirty(page)) {
  1423. /* someone wrote it for us */
  1424. goto continue_unlock;
  1425. }
  1426. /* flush inline_data */
  1427. if (is_inline_node(page)) {
  1428. clear_inline_node(page);
  1429. unlock_page(page);
  1430. flush_inline_data(sbi, ino_of_node(page));
  1431. goto lock_node;
  1432. }
  1433. f2fs_wait_on_page_writeback(page, NODE, true);
  1434. BUG_ON(PageWriteback(page));
  1435. if (!clear_page_dirty_for_io(page))
  1436. goto continue_unlock;
  1437. set_fsync_mark(page, 0);
  1438. set_dentry_mark(page, 0);
  1439. ret = __write_node_page(page, false, &submitted,
  1440. wbc, do_balance, io_type);
  1441. if (ret)
  1442. unlock_page(page);
  1443. else if (submitted)
  1444. nwritten++;
  1445. if (--wbc->nr_to_write == 0)
  1446. break;
  1447. }
  1448. pagevec_release(&pvec);
  1449. cond_resched();
  1450. if (wbc->nr_to_write == 0) {
  1451. step = 2;
  1452. break;
  1453. }
  1454. }
  1455. if (step < 2) {
  1456. step++;
  1457. goto next_step;
  1458. }
  1459. if (nwritten)
  1460. f2fs_submit_merged_write(sbi, NODE);
  1461. if (unlikely(f2fs_cp_error(sbi)))
  1462. return -EIO;
  1463. return ret;
  1464. }
  1465. int f2fs_wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino)
  1466. {
  1467. pgoff_t index = 0;
  1468. struct pagevec pvec;
  1469. int ret2, ret = 0;
  1470. int nr_pages;
  1471. pagevec_init(&pvec);
  1472. while ((nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index,
  1473. PAGECACHE_TAG_WRITEBACK))) {
  1474. int i;
  1475. for (i = 0; i < nr_pages; i++) {
  1476. struct page *page = pvec.pages[i];
  1477. if (ino && ino_of_node(page) == ino) {
  1478. f2fs_wait_on_page_writeback(page, NODE, true);
  1479. if (TestClearPageError(page))
  1480. ret = -EIO;
  1481. }
  1482. }
  1483. pagevec_release(&pvec);
  1484. cond_resched();
  1485. }
  1486. ret2 = filemap_check_errors(NODE_MAPPING(sbi));
  1487. if (!ret)
  1488. ret = ret2;
  1489. return ret;
  1490. }
  1491. static int f2fs_write_node_pages(struct address_space *mapping,
  1492. struct writeback_control *wbc)
  1493. {
  1494. struct f2fs_sb_info *sbi = F2FS_M_SB(mapping);
  1495. struct blk_plug plug;
  1496. long diff;
  1497. if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
  1498. goto skip_write;
  1499. /* balancing f2fs's metadata in background */
  1500. f2fs_balance_fs_bg(sbi);
  1501. /* collect a number of dirty node pages and write together */
  1502. if (get_pages(sbi, F2FS_DIRTY_NODES) < nr_pages_to_skip(sbi, NODE))
  1503. goto skip_write;
  1504. if (wbc->sync_mode == WB_SYNC_ALL)
  1505. atomic_inc(&sbi->wb_sync_req[NODE]);
  1506. else if (atomic_read(&sbi->wb_sync_req[NODE]))
  1507. goto skip_write;
  1508. trace_f2fs_writepages(mapping->host, wbc, NODE);
  1509. diff = nr_pages_to_write(sbi, NODE, wbc);
  1510. blk_start_plug(&plug);
  1511. f2fs_sync_node_pages(sbi, wbc, true, FS_NODE_IO);
  1512. blk_finish_plug(&plug);
  1513. wbc->nr_to_write = max((long)0, wbc->nr_to_write - diff);
  1514. if (wbc->sync_mode == WB_SYNC_ALL)
  1515. atomic_dec(&sbi->wb_sync_req[NODE]);
  1516. return 0;
  1517. skip_write:
  1518. wbc->pages_skipped += get_pages(sbi, F2FS_DIRTY_NODES);
  1519. trace_f2fs_writepages(mapping->host, wbc, NODE);
  1520. return 0;
  1521. }
  1522. static int f2fs_set_node_page_dirty(struct page *page)
  1523. {
  1524. trace_f2fs_set_page_dirty(page, NODE);
  1525. if (!PageUptodate(page))
  1526. SetPageUptodate(page);
  1527. #ifdef CONFIG_F2FS_CHECK_FS
  1528. if (IS_INODE(page))
  1529. f2fs_inode_chksum_set(F2FS_P_SB(page), page);
  1530. #endif
  1531. if (!PageDirty(page)) {
  1532. __set_page_dirty_nobuffers(page);
  1533. inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_NODES);
  1534. SetPagePrivate(page);
  1535. f2fs_trace_pid(page);
  1536. return 1;
  1537. }
  1538. return 0;
  1539. }
  1540. /*
  1541. * Structure of the f2fs node operations
  1542. */
  1543. const struct address_space_operations f2fs_node_aops = {
  1544. .writepage = f2fs_write_node_page,
  1545. .writepages = f2fs_write_node_pages,
  1546. .set_page_dirty = f2fs_set_node_page_dirty,
  1547. .invalidatepage = f2fs_invalidate_page,
  1548. .releasepage = f2fs_release_page,
  1549. #ifdef CONFIG_MIGRATION
  1550. .migratepage = f2fs_migrate_page,
  1551. #endif
  1552. };
  1553. static struct free_nid *__lookup_free_nid_list(struct f2fs_nm_info *nm_i,
  1554. nid_t n)
  1555. {
  1556. return radix_tree_lookup(&nm_i->free_nid_root, n);
  1557. }
  1558. static int __insert_free_nid(struct f2fs_sb_info *sbi,
  1559. struct free_nid *i, enum nid_state state)
  1560. {
  1561. struct f2fs_nm_info *nm_i = NM_I(sbi);
  1562. int err = radix_tree_insert(&nm_i->free_nid_root, i->nid, i);
  1563. if (err)
  1564. return err;
  1565. f2fs_bug_on(sbi, state != i->state);
  1566. nm_i->nid_cnt[state]++;
  1567. if (state == FREE_NID)
  1568. list_add_tail(&i->list, &nm_i->free_nid_list);
  1569. return 0;
  1570. }
  1571. static void __remove_free_nid(struct f2fs_sb_info *sbi,
  1572. struct free_nid *i, enum nid_state state)
  1573. {
  1574. struct f2fs_nm_info *nm_i = NM_I(sbi);
  1575. f2fs_bug_on(sbi, state != i->state);
  1576. nm_i->nid_cnt[state]--;
  1577. if (state == FREE_NID)
  1578. list_del(&i->list);
  1579. radix_tree_delete(&nm_i->free_nid_root, i->nid);
  1580. }
  1581. static void __move_free_nid(struct f2fs_sb_info *sbi, struct free_nid *i,
  1582. enum nid_state org_state, enum nid_state dst_state)
  1583. {
  1584. struct f2fs_nm_info *nm_i = NM_I(sbi);
  1585. f2fs_bug_on(sbi, org_state != i->state);
  1586. i->state = dst_state;
  1587. nm_i->nid_cnt[org_state]--;
  1588. nm_i->nid_cnt[dst_state]++;
  1589. switch (dst_state) {
  1590. case PREALLOC_NID:
  1591. list_del(&i->list);
  1592. break;
  1593. case FREE_NID:
  1594. list_add_tail(&i->list, &nm_i->free_nid_list);
  1595. break;
  1596. default:
  1597. BUG_ON(1);
  1598. }
  1599. }
  1600. static void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid,
  1601. bool set, bool build)
  1602. {
  1603. struct f2fs_nm_info *nm_i = NM_I(sbi);
  1604. unsigned int nat_ofs = NAT_BLOCK_OFFSET(nid);
  1605. unsigned int nid_ofs = nid - START_NID(nid);
  1606. if (!test_bit_le(nat_ofs, nm_i->nat_block_bitmap))
  1607. return;
  1608. if (set) {
  1609. if (test_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]))
  1610. return;
  1611. __set_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]);
  1612. nm_i->free_nid_count[nat_ofs]++;
  1613. } else {
  1614. if (!test_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]))
  1615. return;
  1616. __clear_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]);
  1617. if (!build)
  1618. nm_i->free_nid_count[nat_ofs]--;
  1619. }
  1620. }
  1621. /* return if the nid is recognized as free */
  1622. static bool add_free_nid(struct f2fs_sb_info *sbi,
  1623. nid_t nid, bool build, bool update)
  1624. {
  1625. struct f2fs_nm_info *nm_i = NM_I(sbi);
  1626. struct free_nid *i, *e;
  1627. struct nat_entry *ne;
  1628. int err = -EINVAL;
  1629. bool ret = false;
  1630. /* 0 nid should not be used */
  1631. if (unlikely(nid == 0))
  1632. return false;
  1633. i = f2fs_kmem_cache_alloc(free_nid_slab, GFP_NOFS);
  1634. i->nid = nid;
  1635. i->state = FREE_NID;
  1636. radix_tree_preload(GFP_NOFS | __GFP_NOFAIL);
  1637. spin_lock(&nm_i->nid_list_lock);
  1638. if (build) {
  1639. /*
  1640. * Thread A Thread B
  1641. * - f2fs_create
  1642. * - f2fs_new_inode
  1643. * - f2fs_alloc_nid
  1644. * - __insert_nid_to_list(PREALLOC_NID)
  1645. * - f2fs_balance_fs_bg
  1646. * - f2fs_build_free_nids
  1647. * - __f2fs_build_free_nids
  1648. * - scan_nat_page
  1649. * - add_free_nid
  1650. * - __lookup_nat_cache
  1651. * - f2fs_add_link
  1652. * - f2fs_init_inode_metadata
  1653. * - f2fs_new_inode_page
  1654. * - f2fs_new_node_page
  1655. * - set_node_addr
  1656. * - f2fs_alloc_nid_done
  1657. * - __remove_nid_from_list(PREALLOC_NID)
  1658. * - __insert_nid_to_list(FREE_NID)
  1659. */
  1660. ne = __lookup_nat_cache(nm_i, nid);
  1661. if (ne && (!get_nat_flag(ne, IS_CHECKPOINTED) ||
  1662. nat_get_blkaddr(ne) != NULL_ADDR))
  1663. goto err_out;
  1664. e = __lookup_free_nid_list(nm_i, nid);
  1665. if (e) {
  1666. if (e->state == FREE_NID)
  1667. ret = true;
  1668. goto err_out;
  1669. }
  1670. }
  1671. ret = true;
  1672. err = __insert_free_nid(sbi, i, FREE_NID);
  1673. err_out:
  1674. if (update) {
  1675. update_free_nid_bitmap(sbi, nid, ret, build);
  1676. if (!build)
  1677. nm_i->available_nids++;
  1678. }
  1679. spin_unlock(&nm_i->nid_list_lock);
  1680. radix_tree_preload_end();
  1681. if (err)
  1682. kmem_cache_free(free_nid_slab, i);
  1683. return ret;
  1684. }
  1685. static void remove_free_nid(struct f2fs_sb_info *sbi, nid_t nid)
  1686. {
  1687. struct f2fs_nm_info *nm_i = NM_I(sbi);
  1688. struct free_nid *i;
  1689. bool need_free = false;
  1690. spin_lock(&nm_i->nid_list_lock);
  1691. i = __lookup_free_nid_list(nm_i, nid);
  1692. if (i && i->state == FREE_NID) {
  1693. __remove_free_nid(sbi, i, FREE_NID);
  1694. need_free = true;
  1695. }
  1696. spin_unlock(&nm_i->nid_list_lock);
  1697. if (need_free)
  1698. kmem_cache_free(free_nid_slab, i);
  1699. }
  1700. static int scan_nat_page(struct f2fs_sb_info *sbi,
  1701. struct page *nat_page, nid_t start_nid)
  1702. {
  1703. struct f2fs_nm_info *nm_i = NM_I(sbi);
  1704. struct f2fs_nat_block *nat_blk = page_address(nat_page);
  1705. block_t blk_addr;
  1706. unsigned int nat_ofs = NAT_BLOCK_OFFSET(start_nid);
  1707. int i;
  1708. __set_bit_le(nat_ofs, nm_i->nat_block_bitmap);
  1709. i = start_nid % NAT_ENTRY_PER_BLOCK;
  1710. for (; i < NAT_ENTRY_PER_BLOCK; i++, start_nid++) {
  1711. if (unlikely(start_nid >= nm_i->max_nid))
  1712. break;
  1713. blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr);
  1714. if (blk_addr == NEW_ADDR)
  1715. return -EINVAL;
  1716. if (blk_addr == NULL_ADDR) {
  1717. add_free_nid(sbi, start_nid, true, true);
  1718. } else {
  1719. spin_lock(&NM_I(sbi)->nid_list_lock);
  1720. update_free_nid_bitmap(sbi, start_nid, false, true);
  1721. spin_unlock(&NM_I(sbi)->nid_list_lock);
  1722. }
  1723. }
  1724. return 0;
  1725. }
  1726. static void scan_curseg_cache(struct f2fs_sb_info *sbi)
  1727. {
  1728. struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
  1729. struct f2fs_journal *journal = curseg->journal;
  1730. int i;
  1731. down_read(&curseg->journal_rwsem);
  1732. for (i = 0; i < nats_in_cursum(journal); i++) {
  1733. block_t addr;
  1734. nid_t nid;
  1735. addr = le32_to_cpu(nat_in_journal(journal, i).block_addr);
  1736. nid = le32_to_cpu(nid_in_journal(journal, i));
  1737. if (addr == NULL_ADDR)
  1738. add_free_nid(sbi, nid, true, false);
  1739. else
  1740. remove_free_nid(sbi, nid);
  1741. }
  1742. up_read(&curseg->journal_rwsem);
  1743. }
  1744. static void scan_free_nid_bits(struct f2fs_sb_info *sbi)
  1745. {
  1746. struct f2fs_nm_info *nm_i = NM_I(sbi);
  1747. unsigned int i, idx;
  1748. nid_t nid;
  1749. down_read(&nm_i->nat_tree_lock);
  1750. for (i = 0; i < nm_i->nat_blocks; i++) {
  1751. if (!test_bit_le(i, nm_i->nat_block_bitmap))
  1752. continue;
  1753. if (!nm_i->free_nid_count[i])
  1754. continue;
  1755. for (idx = 0; idx < NAT_ENTRY_PER_BLOCK; idx++) {
  1756. idx = find_next_bit_le(nm_i->free_nid_bitmap[i],
  1757. NAT_ENTRY_PER_BLOCK, idx);
  1758. if (idx >= NAT_ENTRY_PER_BLOCK)
  1759. break;
  1760. nid = i * NAT_ENTRY_PER_BLOCK + idx;
  1761. add_free_nid(sbi, nid, true, false);
  1762. if (nm_i->nid_cnt[FREE_NID] >= MAX_FREE_NIDS)
  1763. goto out;
  1764. }
  1765. }
  1766. out:
  1767. scan_curseg_cache(sbi);
  1768. up_read(&nm_i->nat_tree_lock);
  1769. }
  1770. static int __f2fs_build_free_nids(struct f2fs_sb_info *sbi,
  1771. bool sync, bool mount)
  1772. {
  1773. struct f2fs_nm_info *nm_i = NM_I(sbi);
  1774. int i = 0, ret;
  1775. nid_t nid = nm_i->next_scan_nid;
  1776. if (unlikely(nid >= nm_i->max_nid))
  1777. nid = 0;
  1778. /* Enough entries */
  1779. if (nm_i->nid_cnt[FREE_NID] >= NAT_ENTRY_PER_BLOCK)
  1780. return 0;
  1781. if (!sync && !f2fs_available_free_memory(sbi, FREE_NIDS))
  1782. return 0;
  1783. if (!mount) {
  1784. /* try to find free nids in free_nid_bitmap */
  1785. scan_free_nid_bits(sbi);
  1786. if (nm_i->nid_cnt[FREE_NID] >= NAT_ENTRY_PER_BLOCK)
  1787. return 0;
  1788. }
  1789. /* readahead nat pages to be scanned */
  1790. f2fs_ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES,
  1791. META_NAT, true);
  1792. down_read(&nm_i->nat_tree_lock);
  1793. while (1) {
  1794. if (!test_bit_le(NAT_BLOCK_OFFSET(nid),
  1795. nm_i->nat_block_bitmap)) {
  1796. struct page *page = get_current_nat_page(sbi, nid);
  1797. ret = scan_nat_page(sbi, page, nid);
  1798. f2fs_put_page(page, 1);
  1799. if (ret) {
  1800. up_read(&nm_i->nat_tree_lock);
  1801. f2fs_bug_on(sbi, !mount);
  1802. f2fs_msg(sbi->sb, KERN_ERR,
  1803. "NAT is corrupt, run fsck to fix it");
  1804. return -EINVAL;
  1805. }
  1806. }
  1807. nid += (NAT_ENTRY_PER_BLOCK - (nid % NAT_ENTRY_PER_BLOCK));
  1808. if (unlikely(nid >= nm_i->max_nid))
  1809. nid = 0;
  1810. if (++i >= FREE_NID_PAGES)
  1811. break;
  1812. }
  1813. /* go to the next free nat pages to find free nids abundantly */
  1814. nm_i->next_scan_nid = nid;
  1815. /* find free nids from current sum_pages */
  1816. scan_curseg_cache(sbi);
  1817. up_read(&nm_i->nat_tree_lock);
  1818. f2fs_ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nm_i->next_scan_nid),
  1819. nm_i->ra_nid_pages, META_NAT, false);
  1820. return 0;
  1821. }
  1822. int f2fs_build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
  1823. {
  1824. int ret;
  1825. mutex_lock(&NM_I(sbi)->build_lock);
  1826. ret = __f2fs_build_free_nids(sbi, sync, mount);
  1827. mutex_unlock(&NM_I(sbi)->build_lock);
  1828. return ret;
  1829. }
  1830. /*
  1831. * If this function returns success, caller can obtain a new nid
  1832. * from second parameter of this function.
  1833. * The returned nid could be used ino as well as nid when inode is created.
  1834. */
  1835. bool f2fs_alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid)
  1836. {
  1837. struct f2fs_nm_info *nm_i = NM_I(sbi);
  1838. struct free_nid *i = NULL;
  1839. retry:
  1840. #ifdef CONFIG_F2FS_FAULT_INJECTION
  1841. if (time_to_inject(sbi, FAULT_ALLOC_NID)) {
  1842. f2fs_show_injection_info(FAULT_ALLOC_NID);
  1843. return false;
  1844. }
  1845. #endif
  1846. spin_lock(&nm_i->nid_list_lock);
  1847. if (unlikely(nm_i->available_nids == 0)) {
  1848. spin_unlock(&nm_i->nid_list_lock);
  1849. return false;
  1850. }
  1851. /* We should not use stale free nids created by f2fs_build_free_nids */
  1852. if (nm_i->nid_cnt[FREE_NID] && !on_f2fs_build_free_nids(nm_i)) {
  1853. f2fs_bug_on(sbi, list_empty(&nm_i->free_nid_list));
  1854. i = list_first_entry(&nm_i->free_nid_list,
  1855. struct free_nid, list);
  1856. *nid = i->nid;
  1857. __move_free_nid(sbi, i, FREE_NID, PREALLOC_NID);
  1858. nm_i->available_nids--;
  1859. update_free_nid_bitmap(sbi, *nid, false, false);
  1860. spin_unlock(&nm_i->nid_list_lock);
  1861. return true;
  1862. }
  1863. spin_unlock(&nm_i->nid_list_lock);
  1864. /* Let's scan nat pages and its caches to get free nids */
  1865. f2fs_build_free_nids(sbi, true, false);
  1866. goto retry;
  1867. }
  1868. /*
  1869. * f2fs_alloc_nid() should be called prior to this function.
  1870. */
  1871. void f2fs_alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid)
  1872. {
  1873. struct f2fs_nm_info *nm_i = NM_I(sbi);
  1874. struct free_nid *i;
  1875. spin_lock(&nm_i->nid_list_lock);
  1876. i = __lookup_free_nid_list(nm_i, nid);
  1877. f2fs_bug_on(sbi, !i);
  1878. __remove_free_nid(sbi, i, PREALLOC_NID);
  1879. spin_unlock(&nm_i->nid_list_lock);
  1880. kmem_cache_free(free_nid_slab, i);
  1881. }
  1882. /*
  1883. * f2fs_alloc_nid() should be called prior to this function.
  1884. */
  1885. void f2fs_alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid)
  1886. {
  1887. struct f2fs_nm_info *nm_i = NM_I(sbi);
  1888. struct free_nid *i;
  1889. bool need_free = false;
  1890. if (!nid)
  1891. return;
  1892. spin_lock(&nm_i->nid_list_lock);
  1893. i = __lookup_free_nid_list(nm_i, nid);
  1894. f2fs_bug_on(sbi, !i);
  1895. if (!f2fs_available_free_memory(sbi, FREE_NIDS)) {
  1896. __remove_free_nid(sbi, i, PREALLOC_NID);
  1897. need_free = true;
  1898. } else {
  1899. __move_free_nid(sbi, i, PREALLOC_NID, FREE_NID);
  1900. }
  1901. nm_i->available_nids++;
  1902. update_free_nid_bitmap(sbi, nid, true, false);
  1903. spin_unlock(&nm_i->nid_list_lock);
  1904. if (need_free)
  1905. kmem_cache_free(free_nid_slab, i);
  1906. }
  1907. int f2fs_try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink)
  1908. {
  1909. struct f2fs_nm_info *nm_i = NM_I(sbi);
  1910. struct free_nid *i, *next;
  1911. int nr = nr_shrink;
  1912. if (nm_i->nid_cnt[FREE_NID] <= MAX_FREE_NIDS)
  1913. return 0;
  1914. if (!mutex_trylock(&nm_i->build_lock))
  1915. return 0;
  1916. spin_lock(&nm_i->nid_list_lock);
  1917. list_for_each_entry_safe(i, next, &nm_i->free_nid_list, list) {
  1918. if (nr_shrink <= 0 ||
  1919. nm_i->nid_cnt[FREE_NID] <= MAX_FREE_NIDS)
  1920. break;
  1921. __remove_free_nid(sbi, i, FREE_NID);
  1922. kmem_cache_free(free_nid_slab, i);
  1923. nr_shrink--;
  1924. }
  1925. spin_unlock(&nm_i->nid_list_lock);
  1926. mutex_unlock(&nm_i->build_lock);
  1927. return nr - nr_shrink;
  1928. }
  1929. void f2fs_recover_inline_xattr(struct inode *inode, struct page *page)
  1930. {
  1931. void *src_addr, *dst_addr;
  1932. size_t inline_size;
  1933. struct page *ipage;
  1934. struct f2fs_inode *ri;
  1935. ipage = f2fs_get_node_page(F2FS_I_SB(inode), inode->i_ino);
  1936. f2fs_bug_on(F2FS_I_SB(inode), IS_ERR(ipage));
  1937. ri = F2FS_INODE(page);
  1938. if (ri->i_inline & F2FS_INLINE_XATTR) {
  1939. set_inode_flag(inode, FI_INLINE_XATTR);
  1940. } else {
  1941. clear_inode_flag(inode, FI_INLINE_XATTR);
  1942. goto update_inode;
  1943. }
  1944. dst_addr = inline_xattr_addr(inode, ipage);
  1945. src_addr = inline_xattr_addr(inode, page);
  1946. inline_size = inline_xattr_size(inode);
  1947. f2fs_wait_on_page_writeback(ipage, NODE, true);
  1948. memcpy(dst_addr, src_addr, inline_size);
  1949. update_inode:
  1950. f2fs_update_inode(inode, ipage);
  1951. f2fs_put_page(ipage, 1);
  1952. }
  1953. int f2fs_recover_xattr_data(struct inode *inode, struct page *page)
  1954. {
  1955. struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  1956. nid_t prev_xnid = F2FS_I(inode)->i_xattr_nid;
  1957. nid_t new_xnid;
  1958. struct dnode_of_data dn;
  1959. struct node_info ni;
  1960. struct page *xpage;
  1961. if (!prev_xnid)
  1962. goto recover_xnid;
  1963. /* 1: invalidate the previous xattr nid */
  1964. f2fs_get_node_info(sbi, prev_xnid, &ni);
  1965. f2fs_invalidate_blocks(sbi, ni.blk_addr);
  1966. dec_valid_node_count(sbi, inode, false);
  1967. set_node_addr(sbi, &ni, NULL_ADDR, false);
  1968. recover_xnid:
  1969. /* 2: update xattr nid in inode */
  1970. if (!f2fs_alloc_nid(sbi, &new_xnid))
  1971. return -ENOSPC;
  1972. set_new_dnode(&dn, inode, NULL, NULL, new_xnid);
  1973. xpage = f2fs_new_node_page(&dn, XATTR_NODE_OFFSET);
  1974. if (IS_ERR(xpage)) {
  1975. f2fs_alloc_nid_failed(sbi, new_xnid);
  1976. return PTR_ERR(xpage);
  1977. }
  1978. f2fs_alloc_nid_done(sbi, new_xnid);
  1979. f2fs_update_inode_page(inode);
  1980. /* 3: update and set xattr node page dirty */
  1981. memcpy(F2FS_NODE(xpage), F2FS_NODE(page), VALID_XATTR_BLOCK_SIZE);
  1982. set_page_dirty(xpage);
  1983. f2fs_put_page(xpage, 1);
  1984. return 0;
  1985. }
  1986. int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
  1987. {
  1988. struct f2fs_inode *src, *dst;
  1989. nid_t ino = ino_of_node(page);
  1990. struct node_info old_ni, new_ni;
  1991. struct page *ipage;
  1992. f2fs_get_node_info(sbi, ino, &old_ni);
  1993. if (unlikely(old_ni.blk_addr != NULL_ADDR))
  1994. return -EINVAL;
  1995. retry:
  1996. ipage = f2fs_grab_cache_page(NODE_MAPPING(sbi), ino, false);
  1997. if (!ipage) {
  1998. congestion_wait(BLK_RW_ASYNC, HZ/50);
  1999. goto retry;
  2000. }
  2001. /* Should not use this inode from free nid list */
  2002. remove_free_nid(sbi, ino);
  2003. if (!PageUptodate(ipage))
  2004. SetPageUptodate(ipage);
  2005. fill_node_footer(ipage, ino, ino, 0, true);
  2006. set_cold_node(page, false);
  2007. src = F2FS_INODE(page);
  2008. dst = F2FS_INODE(ipage);
  2009. memcpy(dst, src, (unsigned long)&src->i_ext - (unsigned long)src);
  2010. dst->i_size = 0;
  2011. dst->i_blocks = cpu_to_le64(1);
  2012. dst->i_links = cpu_to_le32(1);
  2013. dst->i_xattr_nid = 0;
  2014. dst->i_inline = src->i_inline & (F2FS_INLINE_XATTR | F2FS_EXTRA_ATTR);
  2015. if (dst->i_inline & F2FS_EXTRA_ATTR) {
  2016. dst->i_extra_isize = src->i_extra_isize;
  2017. if (f2fs_sb_has_flexible_inline_xattr(sbi->sb) &&
  2018. F2FS_FITS_IN_INODE(src, le16_to_cpu(src->i_extra_isize),
  2019. i_inline_xattr_size))
  2020. dst->i_inline_xattr_size = src->i_inline_xattr_size;
  2021. if (f2fs_sb_has_project_quota(sbi->sb) &&
  2022. F2FS_FITS_IN_INODE(src, le16_to_cpu(src->i_extra_isize),
  2023. i_projid))
  2024. dst->i_projid = src->i_projid;
  2025. }
  2026. new_ni = old_ni;
  2027. new_ni.ino = ino;
  2028. if (unlikely(inc_valid_node_count(sbi, NULL, true)))
  2029. WARN_ON(1);
  2030. set_node_addr(sbi, &new_ni, NEW_ADDR, false);
  2031. inc_valid_inode_count(sbi);
  2032. set_page_dirty(ipage);
  2033. f2fs_put_page(ipage, 1);
  2034. return 0;
  2035. }
  2036. void f2fs_restore_node_summary(struct f2fs_sb_info *sbi,
  2037. unsigned int segno, struct f2fs_summary_block *sum)
  2038. {
  2039. struct f2fs_node *rn;
  2040. struct f2fs_summary *sum_entry;
  2041. block_t addr;
  2042. int i, idx, last_offset, nrpages;
  2043. /* scan the node segment */
  2044. last_offset = sbi->blocks_per_seg;
  2045. addr = START_BLOCK(sbi, segno);
  2046. sum_entry = &sum->entries[0];
  2047. for (i = 0; i < last_offset; i += nrpages, addr += nrpages) {
  2048. nrpages = min(last_offset - i, BIO_MAX_PAGES);
  2049. /* readahead node pages */
  2050. f2fs_ra_meta_pages(sbi, addr, nrpages, META_POR, true);
  2051. for (idx = addr; idx < addr + nrpages; idx++) {
  2052. struct page *page = f2fs_get_tmp_page(sbi, idx);
  2053. rn = F2FS_NODE(page);
  2054. sum_entry->nid = rn->footer.nid;
  2055. sum_entry->version = 0;
  2056. sum_entry->ofs_in_node = 0;
  2057. sum_entry++;
  2058. f2fs_put_page(page, 1);
  2059. }
  2060. invalidate_mapping_pages(META_MAPPING(sbi), addr,
  2061. addr + nrpages);
  2062. }
  2063. }
  2064. static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
  2065. {
  2066. struct f2fs_nm_info *nm_i = NM_I(sbi);
  2067. struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
  2068. struct f2fs_journal *journal = curseg->journal;
  2069. int i;
  2070. down_write(&curseg->journal_rwsem);
  2071. for (i = 0; i < nats_in_cursum(journal); i++) {
  2072. struct nat_entry *ne;
  2073. struct f2fs_nat_entry raw_ne;
  2074. nid_t nid = le32_to_cpu(nid_in_journal(journal, i));
  2075. raw_ne = nat_in_journal(journal, i);
  2076. ne = __lookup_nat_cache(nm_i, nid);
  2077. if (!ne) {
  2078. ne = __alloc_nat_entry(nid, true);
  2079. __init_nat_entry(nm_i, ne, &raw_ne, true);
  2080. }
  2081. /*
  2082. * if a free nat in journal has not been used after last
  2083. * checkpoint, we should remove it from available nids,
  2084. * since later we will add it again.
  2085. */
  2086. if (!get_nat_flag(ne, IS_DIRTY) &&
  2087. le32_to_cpu(raw_ne.block_addr) == NULL_ADDR) {
  2088. spin_lock(&nm_i->nid_list_lock);
  2089. nm_i->available_nids--;
  2090. spin_unlock(&nm_i->nid_list_lock);
  2091. }
  2092. __set_nat_cache_dirty(nm_i, ne);
  2093. }
  2094. update_nats_in_cursum(journal, -i);
  2095. up_write(&curseg->journal_rwsem);
  2096. }
  2097. static void __adjust_nat_entry_set(struct nat_entry_set *nes,
  2098. struct list_head *head, int max)
  2099. {
  2100. struct nat_entry_set *cur;
  2101. if (nes->entry_cnt >= max)
  2102. goto add_out;
  2103. list_for_each_entry(cur, head, set_list) {
  2104. if (cur->entry_cnt >= nes->entry_cnt) {
  2105. list_add(&nes->set_list, cur->set_list.prev);
  2106. return;
  2107. }
  2108. }
  2109. add_out:
  2110. list_add_tail(&nes->set_list, head);
  2111. }
  2112. static void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid,
  2113. struct page *page)
  2114. {
  2115. struct f2fs_nm_info *nm_i = NM_I(sbi);
  2116. unsigned int nat_index = start_nid / NAT_ENTRY_PER_BLOCK;
  2117. struct f2fs_nat_block *nat_blk = page_address(page);
  2118. int valid = 0;
  2119. int i = 0;
  2120. if (!enabled_nat_bits(sbi, NULL))
  2121. return;
  2122. if (nat_index == 0) {
  2123. valid = 1;
  2124. i = 1;
  2125. }
  2126. for (; i < NAT_ENTRY_PER_BLOCK; i++) {
  2127. if (nat_blk->entries[i].block_addr != NULL_ADDR)
  2128. valid++;
  2129. }
  2130. if (valid == 0) {
  2131. __set_bit_le(nat_index, nm_i->empty_nat_bits);
  2132. __clear_bit_le(nat_index, nm_i->full_nat_bits);
  2133. return;
  2134. }
  2135. __clear_bit_le(nat_index, nm_i->empty_nat_bits);
  2136. if (valid == NAT_ENTRY_PER_BLOCK)
  2137. __set_bit_le(nat_index, nm_i->full_nat_bits);
  2138. else
  2139. __clear_bit_le(nat_index, nm_i->full_nat_bits);
  2140. }
  2141. static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
  2142. struct nat_entry_set *set, struct cp_control *cpc)
  2143. {
  2144. struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
  2145. struct f2fs_journal *journal = curseg->journal;
  2146. nid_t start_nid = set->set * NAT_ENTRY_PER_BLOCK;
  2147. bool to_journal = true;
  2148. struct f2fs_nat_block *nat_blk;
  2149. struct nat_entry *ne, *cur;
  2150. struct page *page = NULL;
  2151. /*
  2152. * there are two steps to flush nat entries:
  2153. * #1, flush nat entries to journal in current hot data summary block.
  2154. * #2, flush nat entries to nat page.
  2155. */
  2156. if (enabled_nat_bits(sbi, cpc) ||
  2157. !__has_cursum_space(journal, set->entry_cnt, NAT_JOURNAL))
  2158. to_journal = false;
  2159. if (to_journal) {
  2160. down_write(&curseg->journal_rwsem);
  2161. } else {
  2162. page = get_next_nat_page(sbi, start_nid);
  2163. nat_blk = page_address(page);
  2164. f2fs_bug_on(sbi, !nat_blk);
  2165. }
  2166. /* flush dirty nats in nat entry set */
  2167. list_for_each_entry_safe(ne, cur, &set->entry_list, list) {
  2168. struct f2fs_nat_entry *raw_ne;
  2169. nid_t nid = nat_get_nid(ne);
  2170. int offset;
  2171. f2fs_bug_on(sbi, nat_get_blkaddr(ne) == NEW_ADDR);
  2172. if (to_journal) {
  2173. offset = f2fs_lookup_journal_in_cursum(journal,
  2174. NAT_JOURNAL, nid, 1);
  2175. f2fs_bug_on(sbi, offset < 0);
  2176. raw_ne = &nat_in_journal(journal, offset);
  2177. nid_in_journal(journal, offset) = cpu_to_le32(nid);
  2178. } else {
  2179. raw_ne = &nat_blk->entries[nid - start_nid];
  2180. }
  2181. raw_nat_from_node_info(raw_ne, &ne->ni);
  2182. nat_reset_flag(ne);
  2183. __clear_nat_cache_dirty(NM_I(sbi), set, ne);
  2184. if (nat_get_blkaddr(ne) == NULL_ADDR) {
  2185. add_free_nid(sbi, nid, false, true);
  2186. } else {
  2187. spin_lock(&NM_I(sbi)->nid_list_lock);
  2188. update_free_nid_bitmap(sbi, nid, false, false);
  2189. spin_unlock(&NM_I(sbi)->nid_list_lock);
  2190. }
  2191. }
  2192. if (to_journal) {
  2193. up_write(&curseg->journal_rwsem);
  2194. } else {
  2195. __update_nat_bits(sbi, start_nid, page);
  2196. f2fs_put_page(page, 1);
  2197. }
  2198. /* Allow dirty nats by node block allocation in write_begin */
  2199. if (!set->entry_cnt) {
  2200. radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set);
  2201. kmem_cache_free(nat_entry_set_slab, set);
  2202. }
  2203. }
  2204. /*
  2205. * This function is called during the checkpointing process.
  2206. */
  2207. void f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
  2208. {
  2209. struct f2fs_nm_info *nm_i = NM_I(sbi);
  2210. struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
  2211. struct f2fs_journal *journal = curseg->journal;
  2212. struct nat_entry_set *setvec[SETVEC_SIZE];
  2213. struct nat_entry_set *set, *tmp;
  2214. unsigned int found;
  2215. nid_t set_idx = 0;
  2216. LIST_HEAD(sets);
  2217. /* during unmount, let's flush nat_bits before checking dirty_nat_cnt */
  2218. if (enabled_nat_bits(sbi, cpc)) {
  2219. down_write(&nm_i->nat_tree_lock);
  2220. remove_nats_in_journal(sbi);
  2221. up_write(&nm_i->nat_tree_lock);
  2222. }
  2223. if (!nm_i->dirty_nat_cnt)
  2224. return;
  2225. down_write(&nm_i->nat_tree_lock);
  2226. /*
  2227. * if there are no enough space in journal to store dirty nat
  2228. * entries, remove all entries from journal and merge them
  2229. * into nat entry set.
  2230. */
  2231. if (enabled_nat_bits(sbi, cpc) ||
  2232. !__has_cursum_space(journal, nm_i->dirty_nat_cnt, NAT_JOURNAL))
  2233. remove_nats_in_journal(sbi);
  2234. while ((found = __gang_lookup_nat_set(nm_i,
  2235. set_idx, SETVEC_SIZE, setvec))) {
  2236. unsigned idx;
  2237. set_idx = setvec[found - 1]->set + 1;
  2238. for (idx = 0; idx < found; idx++)
  2239. __adjust_nat_entry_set(setvec[idx], &sets,
  2240. MAX_NAT_JENTRIES(journal));
  2241. }
  2242. /* flush dirty nats in nat entry set */
  2243. list_for_each_entry_safe(set, tmp, &sets, set_list)
  2244. __flush_nat_entry_set(sbi, set, cpc);
  2245. up_write(&nm_i->nat_tree_lock);
  2246. /* Allow dirty nats by node block allocation in write_begin */
  2247. }
  2248. static int __get_nat_bitmaps(struct f2fs_sb_info *sbi)
  2249. {
  2250. struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
  2251. struct f2fs_nm_info *nm_i = NM_I(sbi);
  2252. unsigned int nat_bits_bytes = nm_i->nat_blocks / BITS_PER_BYTE;
  2253. unsigned int i;
  2254. __u64 cp_ver = cur_cp_version(ckpt);
  2255. block_t nat_bits_addr;
  2256. if (!enabled_nat_bits(sbi, NULL))
  2257. return 0;
  2258. nm_i->nat_bits_blocks = F2FS_BLK_ALIGN((nat_bits_bytes << 1) + 8);
  2259. nm_i->nat_bits = f2fs_kzalloc(sbi,
  2260. nm_i->nat_bits_blocks << F2FS_BLKSIZE_BITS, GFP_KERNEL);
  2261. if (!nm_i->nat_bits)
  2262. return -ENOMEM;
  2263. nat_bits_addr = __start_cp_addr(sbi) + sbi->blocks_per_seg -
  2264. nm_i->nat_bits_blocks;
  2265. for (i = 0; i < nm_i->nat_bits_blocks; i++) {
  2266. struct page *page = f2fs_get_meta_page(sbi, nat_bits_addr++);
  2267. memcpy(nm_i->nat_bits + (i << F2FS_BLKSIZE_BITS),
  2268. page_address(page), F2FS_BLKSIZE);
  2269. f2fs_put_page(page, 1);
  2270. }
  2271. cp_ver |= (cur_cp_crc(ckpt) << 32);
  2272. if (cpu_to_le64(cp_ver) != *(__le64 *)nm_i->nat_bits) {
  2273. disable_nat_bits(sbi, true);
  2274. return 0;
  2275. }
  2276. nm_i->full_nat_bits = nm_i->nat_bits + 8;
  2277. nm_i->empty_nat_bits = nm_i->full_nat_bits + nat_bits_bytes;
  2278. f2fs_msg(sbi->sb, KERN_NOTICE, "Found nat_bits in checkpoint");
  2279. return 0;
  2280. }
  2281. static inline void load_free_nid_bitmap(struct f2fs_sb_info *sbi)
  2282. {
  2283. struct f2fs_nm_info *nm_i = NM_I(sbi);
  2284. unsigned int i = 0;
  2285. nid_t nid, last_nid;
  2286. if (!enabled_nat_bits(sbi, NULL))
  2287. return;
  2288. for (i = 0; i < nm_i->nat_blocks; i++) {
  2289. i = find_next_bit_le(nm_i->empty_nat_bits, nm_i->nat_blocks, i);
  2290. if (i >= nm_i->nat_blocks)
  2291. break;
  2292. __set_bit_le(i, nm_i->nat_block_bitmap);
  2293. nid = i * NAT_ENTRY_PER_BLOCK;
  2294. last_nid = nid + NAT_ENTRY_PER_BLOCK;
  2295. spin_lock(&NM_I(sbi)->nid_list_lock);
  2296. for (; nid < last_nid; nid++)
  2297. update_free_nid_bitmap(sbi, nid, true, true);
  2298. spin_unlock(&NM_I(sbi)->nid_list_lock);
  2299. }
  2300. for (i = 0; i < nm_i->nat_blocks; i++) {
  2301. i = find_next_bit_le(nm_i->full_nat_bits, nm_i->nat_blocks, i);
  2302. if (i >= nm_i->nat_blocks)
  2303. break;
  2304. __set_bit_le(i, nm_i->nat_block_bitmap);
  2305. }
  2306. }
  2307. static int init_node_manager(struct f2fs_sb_info *sbi)
  2308. {
  2309. struct f2fs_super_block *sb_raw = F2FS_RAW_SUPER(sbi);
  2310. struct f2fs_nm_info *nm_i = NM_I(sbi);
  2311. unsigned char *version_bitmap;
  2312. unsigned int nat_segs;
  2313. int err;
  2314. nm_i->nat_blkaddr = le32_to_cpu(sb_raw->nat_blkaddr);
  2315. /* segment_count_nat includes pair segment so divide to 2. */
  2316. nat_segs = le32_to_cpu(sb_raw->segment_count_nat) >> 1;
  2317. nm_i->nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg);
  2318. nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nm_i->nat_blocks;
  2319. /* not used nids: 0, node, meta, (and root counted as valid node) */
  2320. nm_i->available_nids = nm_i->max_nid - sbi->total_valid_node_count -
  2321. sbi->nquota_files - F2FS_RESERVED_NODE_NUM;
  2322. nm_i->nid_cnt[FREE_NID] = 0;
  2323. nm_i->nid_cnt[PREALLOC_NID] = 0;
  2324. nm_i->nat_cnt = 0;
  2325. nm_i->ram_thresh = DEF_RAM_THRESHOLD;
  2326. nm_i->ra_nid_pages = DEF_RA_NID_PAGES;
  2327. nm_i->dirty_nats_ratio = DEF_DIRTY_NAT_RATIO_THRESHOLD;
  2328. INIT_RADIX_TREE(&nm_i->free_nid_root, GFP_ATOMIC);
  2329. INIT_LIST_HEAD(&nm_i->free_nid_list);
  2330. INIT_RADIX_TREE(&nm_i->nat_root, GFP_NOIO);
  2331. INIT_RADIX_TREE(&nm_i->nat_set_root, GFP_NOIO);
  2332. INIT_LIST_HEAD(&nm_i->nat_entries);
  2333. mutex_init(&nm_i->build_lock);
  2334. spin_lock_init(&nm_i->nid_list_lock);
  2335. init_rwsem(&nm_i->nat_tree_lock);
  2336. nm_i->next_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid);
  2337. nm_i->bitmap_size = __bitmap_size(sbi, NAT_BITMAP);
  2338. version_bitmap = __bitmap_ptr(sbi, NAT_BITMAP);
  2339. if (!version_bitmap)
  2340. return -EFAULT;
  2341. nm_i->nat_bitmap = kmemdup(version_bitmap, nm_i->bitmap_size,
  2342. GFP_KERNEL);
  2343. if (!nm_i->nat_bitmap)
  2344. return -ENOMEM;
  2345. err = __get_nat_bitmaps(sbi);
  2346. if (err)
  2347. return err;
  2348. #ifdef CONFIG_F2FS_CHECK_FS
  2349. nm_i->nat_bitmap_mir = kmemdup(version_bitmap, nm_i->bitmap_size,
  2350. GFP_KERNEL);
  2351. if (!nm_i->nat_bitmap_mir)
  2352. return -ENOMEM;
  2353. #endif
  2354. return 0;
  2355. }
  2356. static int init_free_nid_cache(struct f2fs_sb_info *sbi)
  2357. {
  2358. struct f2fs_nm_info *nm_i = NM_I(sbi);
  2359. int i;
  2360. nm_i->free_nid_bitmap =
  2361. f2fs_kzalloc(sbi, array_size(sizeof(unsigned char *),
  2362. nm_i->nat_blocks),
  2363. GFP_KERNEL);
  2364. if (!nm_i->free_nid_bitmap)
  2365. return -ENOMEM;
  2366. for (i = 0; i < nm_i->nat_blocks; i++) {
  2367. nm_i->free_nid_bitmap[i] = f2fs_kvzalloc(sbi,
  2368. f2fs_bitmap_size(NAT_ENTRY_PER_BLOCK), GFP_KERNEL);
  2369. if (!nm_i->free_nid_bitmap[i])
  2370. return -ENOMEM;
  2371. }
  2372. nm_i->nat_block_bitmap = f2fs_kvzalloc(sbi, nm_i->nat_blocks / 8,
  2373. GFP_KERNEL);
  2374. if (!nm_i->nat_block_bitmap)
  2375. return -ENOMEM;
  2376. nm_i->free_nid_count =
  2377. f2fs_kvzalloc(sbi, array_size(sizeof(unsigned short),
  2378. nm_i->nat_blocks),
  2379. GFP_KERNEL);
  2380. if (!nm_i->free_nid_count)
  2381. return -ENOMEM;
  2382. return 0;
  2383. }
  2384. int f2fs_build_node_manager(struct f2fs_sb_info *sbi)
  2385. {
  2386. int err;
  2387. sbi->nm_info = f2fs_kzalloc(sbi, sizeof(struct f2fs_nm_info),
  2388. GFP_KERNEL);
  2389. if (!sbi->nm_info)
  2390. return -ENOMEM;
  2391. err = init_node_manager(sbi);
  2392. if (err)
  2393. return err;
  2394. err = init_free_nid_cache(sbi);
  2395. if (err)
  2396. return err;
  2397. /* load free nid status from nat_bits table */
  2398. load_free_nid_bitmap(sbi);
  2399. return f2fs_build_free_nids(sbi, true, true);
  2400. }
  2401. void f2fs_destroy_node_manager(struct f2fs_sb_info *sbi)
  2402. {
  2403. struct f2fs_nm_info *nm_i = NM_I(sbi);
  2404. struct free_nid *i, *next_i;
  2405. struct nat_entry *natvec[NATVEC_SIZE];
  2406. struct nat_entry_set *setvec[SETVEC_SIZE];
  2407. nid_t nid = 0;
  2408. unsigned int found;
  2409. if (!nm_i)
  2410. return;
  2411. /* destroy free nid list */
  2412. spin_lock(&nm_i->nid_list_lock);
  2413. list_for_each_entry_safe(i, next_i, &nm_i->free_nid_list, list) {
  2414. __remove_free_nid(sbi, i, FREE_NID);
  2415. spin_unlock(&nm_i->nid_list_lock);
  2416. kmem_cache_free(free_nid_slab, i);
  2417. spin_lock(&nm_i->nid_list_lock);
  2418. }
  2419. f2fs_bug_on(sbi, nm_i->nid_cnt[FREE_NID]);
  2420. f2fs_bug_on(sbi, nm_i->nid_cnt[PREALLOC_NID]);
  2421. f2fs_bug_on(sbi, !list_empty(&nm_i->free_nid_list));
  2422. spin_unlock(&nm_i->nid_list_lock);
  2423. /* destroy nat cache */
  2424. down_write(&nm_i->nat_tree_lock);
  2425. while ((found = __gang_lookup_nat_cache(nm_i,
  2426. nid, NATVEC_SIZE, natvec))) {
  2427. unsigned idx;
  2428. nid = nat_get_nid(natvec[found - 1]) + 1;
  2429. for (idx = 0; idx < found; idx++)
  2430. __del_from_nat_cache(nm_i, natvec[idx]);
  2431. }
  2432. f2fs_bug_on(sbi, nm_i->nat_cnt);
  2433. /* destroy nat set cache */
  2434. nid = 0;
  2435. while ((found = __gang_lookup_nat_set(nm_i,
  2436. nid, SETVEC_SIZE, setvec))) {
  2437. unsigned idx;
  2438. nid = setvec[found - 1]->set + 1;
  2439. for (idx = 0; idx < found; idx++) {
  2440. /* entry_cnt is not zero, when cp_error was occurred */
  2441. f2fs_bug_on(sbi, !list_empty(&setvec[idx]->entry_list));
  2442. radix_tree_delete(&nm_i->nat_set_root, setvec[idx]->set);
  2443. kmem_cache_free(nat_entry_set_slab, setvec[idx]);
  2444. }
  2445. }
  2446. up_write(&nm_i->nat_tree_lock);
  2447. kvfree(nm_i->nat_block_bitmap);
  2448. if (nm_i->free_nid_bitmap) {
  2449. int i;
  2450. for (i = 0; i < nm_i->nat_blocks; i++)
  2451. kvfree(nm_i->free_nid_bitmap[i]);
  2452. kfree(nm_i->free_nid_bitmap);
  2453. }
  2454. kvfree(nm_i->free_nid_count);
  2455. kfree(nm_i->nat_bitmap);
  2456. kfree(nm_i->nat_bits);
  2457. #ifdef CONFIG_F2FS_CHECK_FS
  2458. kfree(nm_i->nat_bitmap_mir);
  2459. #endif
  2460. sbi->nm_info = NULL;
  2461. kfree(nm_i);
  2462. }
  2463. int __init f2fs_create_node_manager_caches(void)
  2464. {
  2465. nat_entry_slab = f2fs_kmem_cache_create("nat_entry",
  2466. sizeof(struct nat_entry));
  2467. if (!nat_entry_slab)
  2468. goto fail;
  2469. free_nid_slab = f2fs_kmem_cache_create("free_nid",
  2470. sizeof(struct free_nid));
  2471. if (!free_nid_slab)
  2472. goto destroy_nat_entry;
  2473. nat_entry_set_slab = f2fs_kmem_cache_create("nat_entry_set",
  2474. sizeof(struct nat_entry_set));
  2475. if (!nat_entry_set_slab)
  2476. goto destroy_free_nid;
  2477. return 0;
  2478. destroy_free_nid:
  2479. kmem_cache_destroy(free_nid_slab);
  2480. destroy_nat_entry:
  2481. kmem_cache_destroy(nat_entry_slab);
  2482. fail:
  2483. return -ENOMEM;
  2484. }
  2485. void f2fs_destroy_node_manager_caches(void)
  2486. {
  2487. kmem_cache_destroy(nat_entry_set_slab);
  2488. kmem_cache_destroy(free_nid_slab);
  2489. kmem_cache_destroy(nat_entry_slab);
  2490. }