dm-integrity.c 91 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259
  1. /*
  2. * Copyright (C) 2016-2017 Red Hat, Inc. All rights reserved.
  3. * Copyright (C) 2016-2017 Milan Broz
  4. * Copyright (C) 2016-2017 Mikulas Patocka
  5. *
  6. * This file is released under the GPL.
  7. */
  8. #include <linux/module.h>
  9. #include <linux/device-mapper.h>
  10. #include <linux/dm-io.h>
  11. #include <linux/vmalloc.h>
  12. #include <linux/sort.h>
  13. #include <linux/rbtree.h>
  14. #include <linux/delay.h>
  15. #include <linux/random.h>
  16. #include <crypto/hash.h>
  17. #include <crypto/skcipher.h>
  18. #include <linux/async_tx.h>
  19. #include "dm-bufio.h"
  20. #define DM_MSG_PREFIX "integrity"
  21. #define DEFAULT_INTERLEAVE_SECTORS 32768
  22. #define DEFAULT_JOURNAL_SIZE_FACTOR 7
  23. #define DEFAULT_BUFFER_SECTORS 128
  24. #define DEFAULT_JOURNAL_WATERMARK 50
  25. #define DEFAULT_SYNC_MSEC 10000
  26. #define DEFAULT_MAX_JOURNAL_SECTORS 131072
  27. #define MIN_LOG2_INTERLEAVE_SECTORS 3
  28. #define MAX_LOG2_INTERLEAVE_SECTORS 31
  29. #define METADATA_WORKQUEUE_MAX_ACTIVE 16
  30. /*
  31. * Warning - DEBUG_PRINT prints security-sensitive data to the log,
  32. * so it should not be enabled in the official kernel
  33. */
  34. //#define DEBUG_PRINT
  35. //#define INTERNAL_VERIFY
  36. /*
  37. * On disk structures
  38. */
  39. #define SB_MAGIC "integrt"
  40. #define SB_VERSION 1
  41. #define SB_SECTORS 8
  42. #define MAX_SECTORS_PER_BLOCK 8
  43. struct superblock {
  44. __u8 magic[8];
  45. __u8 version;
  46. __u8 log2_interleave_sectors;
  47. __u16 integrity_tag_size;
  48. __u32 journal_sections;
  49. __u64 provided_data_sectors; /* userspace uses this value */
  50. __u32 flags;
  51. __u8 log2_sectors_per_block;
  52. };
  53. #define SB_FLAG_HAVE_JOURNAL_MAC 0x1
  54. #define JOURNAL_ENTRY_ROUNDUP 8
  55. typedef __u64 commit_id_t;
  56. #define JOURNAL_MAC_PER_SECTOR 8
  57. struct journal_entry {
  58. union {
  59. struct {
  60. __u32 sector_lo;
  61. __u32 sector_hi;
  62. } s;
  63. __u64 sector;
  64. } u;
  65. commit_id_t last_bytes[0];
  66. /* __u8 tag[0]; */
  67. };
  68. #define journal_entry_tag(ic, je) ((__u8 *)&(je)->last_bytes[(ic)->sectors_per_block])
  69. #if BITS_PER_LONG == 64
  70. #define journal_entry_set_sector(je, x) do { smp_wmb(); ACCESS_ONCE((je)->u.sector) = cpu_to_le64(x); } while (0)
  71. #define journal_entry_get_sector(je) le64_to_cpu((je)->u.sector)
  72. #elif defined(CONFIG_LBDAF)
  73. #define journal_entry_set_sector(je, x) do { (je)->u.s.sector_lo = cpu_to_le32(x); smp_wmb(); ACCESS_ONCE((je)->u.s.sector_hi) = cpu_to_le32((x) >> 32); } while (0)
  74. #define journal_entry_get_sector(je) le64_to_cpu((je)->u.sector)
  75. #else
  76. #define journal_entry_set_sector(je, x) do { (je)->u.s.sector_lo = cpu_to_le32(x); smp_wmb(); ACCESS_ONCE((je)->u.s.sector_hi) = cpu_to_le32(0); } while (0)
  77. #define journal_entry_get_sector(je) le32_to_cpu((je)->u.s.sector_lo)
  78. #endif
  79. #define journal_entry_is_unused(je) ((je)->u.s.sector_hi == cpu_to_le32(-1))
  80. #define journal_entry_set_unused(je) do { ((je)->u.s.sector_hi = cpu_to_le32(-1)); } while (0)
  81. #define journal_entry_is_inprogress(je) ((je)->u.s.sector_hi == cpu_to_le32(-2))
  82. #define journal_entry_set_inprogress(je) do { ((je)->u.s.sector_hi = cpu_to_le32(-2)); } while (0)
  83. #define JOURNAL_BLOCK_SECTORS 8
  84. #define JOURNAL_SECTOR_DATA ((1 << SECTOR_SHIFT) - sizeof(commit_id_t))
  85. #define JOURNAL_MAC_SIZE (JOURNAL_MAC_PER_SECTOR * JOURNAL_BLOCK_SECTORS)
  86. struct journal_sector {
  87. __u8 entries[JOURNAL_SECTOR_DATA - JOURNAL_MAC_PER_SECTOR];
  88. __u8 mac[JOURNAL_MAC_PER_SECTOR];
  89. commit_id_t commit_id;
  90. };
  91. #define MAX_TAG_SIZE (JOURNAL_SECTOR_DATA - JOURNAL_MAC_PER_SECTOR - offsetof(struct journal_entry, last_bytes[MAX_SECTORS_PER_BLOCK]))
  92. #define METADATA_PADDING_SECTORS 8
  93. #define N_COMMIT_IDS 4
  94. static unsigned char prev_commit_seq(unsigned char seq)
  95. {
  96. return (seq + N_COMMIT_IDS - 1) % N_COMMIT_IDS;
  97. }
  98. static unsigned char next_commit_seq(unsigned char seq)
  99. {
  100. return (seq + 1) % N_COMMIT_IDS;
  101. }
  102. /*
  103. * In-memory structures
  104. */
  105. struct journal_node {
  106. struct rb_node node;
  107. sector_t sector;
  108. };
  109. struct alg_spec {
  110. char *alg_string;
  111. char *key_string;
  112. __u8 *key;
  113. unsigned key_size;
  114. };
  115. struct dm_integrity_c {
  116. struct dm_dev *dev;
  117. unsigned tag_size;
  118. __s8 log2_tag_size;
  119. sector_t start;
  120. mempool_t *journal_io_mempool;
  121. struct dm_io_client *io;
  122. struct dm_bufio_client *bufio;
  123. struct workqueue_struct *metadata_wq;
  124. struct superblock *sb;
  125. unsigned journal_pages;
  126. struct page_list *journal;
  127. struct page_list *journal_io;
  128. struct page_list *journal_xor;
  129. struct crypto_skcipher *journal_crypt;
  130. struct scatterlist **journal_scatterlist;
  131. struct scatterlist **journal_io_scatterlist;
  132. struct skcipher_request **sk_requests;
  133. struct crypto_shash *journal_mac;
  134. struct journal_node *journal_tree;
  135. struct rb_root journal_tree_root;
  136. sector_t provided_data_sectors;
  137. unsigned short journal_entry_size;
  138. unsigned char journal_entries_per_sector;
  139. unsigned char journal_section_entries;
  140. unsigned short journal_section_sectors;
  141. unsigned journal_sections;
  142. unsigned journal_entries;
  143. sector_t device_sectors;
  144. unsigned initial_sectors;
  145. unsigned metadata_run;
  146. __s8 log2_metadata_run;
  147. __u8 log2_buffer_sectors;
  148. __u8 sectors_per_block;
  149. unsigned char mode;
  150. bool suspending;
  151. int failed;
  152. struct crypto_shash *internal_hash;
  153. /* these variables are locked with endio_wait.lock */
  154. struct rb_root in_progress;
  155. wait_queue_head_t endio_wait;
  156. struct workqueue_struct *wait_wq;
  157. unsigned char commit_seq;
  158. commit_id_t commit_ids[N_COMMIT_IDS];
  159. unsigned committed_section;
  160. unsigned n_committed_sections;
  161. unsigned uncommitted_section;
  162. unsigned n_uncommitted_sections;
  163. unsigned free_section;
  164. unsigned char free_section_entry;
  165. unsigned free_sectors;
  166. unsigned free_sectors_threshold;
  167. struct workqueue_struct *commit_wq;
  168. struct work_struct commit_work;
  169. struct workqueue_struct *writer_wq;
  170. struct work_struct writer_work;
  171. struct bio_list flush_bio_list;
  172. unsigned long autocommit_jiffies;
  173. struct timer_list autocommit_timer;
  174. unsigned autocommit_msec;
  175. wait_queue_head_t copy_to_journal_wait;
  176. struct completion crypto_backoff;
  177. bool journal_uptodate;
  178. bool just_formatted;
  179. struct alg_spec internal_hash_alg;
  180. struct alg_spec journal_crypt_alg;
  181. struct alg_spec journal_mac_alg;
  182. atomic64_t number_of_mismatches;
  183. };
  184. struct dm_integrity_range {
  185. sector_t logical_sector;
  186. unsigned n_sectors;
  187. struct rb_node node;
  188. };
  189. struct dm_integrity_io {
  190. struct work_struct work;
  191. struct dm_integrity_c *ic;
  192. bool write;
  193. bool fua;
  194. struct dm_integrity_range range;
  195. sector_t metadata_block;
  196. unsigned metadata_offset;
  197. atomic_t in_flight;
  198. blk_status_t bi_status;
  199. struct completion *completion;
  200. struct gendisk *orig_bi_disk;
  201. u8 orig_bi_partno;
  202. bio_end_io_t *orig_bi_end_io;
  203. struct bio_integrity_payload *orig_bi_integrity;
  204. struct bvec_iter orig_bi_iter;
  205. };
  206. struct journal_completion {
  207. struct dm_integrity_c *ic;
  208. atomic_t in_flight;
  209. struct completion comp;
  210. };
  211. struct journal_io {
  212. struct dm_integrity_range range;
  213. struct journal_completion *comp;
  214. };
  215. static struct kmem_cache *journal_io_cache;
  216. #define JOURNAL_IO_MEMPOOL 32
  217. #ifdef DEBUG_PRINT
  218. #define DEBUG_print(x, ...) printk(KERN_DEBUG x, ##__VA_ARGS__)
  219. static void __DEBUG_bytes(__u8 *bytes, size_t len, const char *msg, ...)
  220. {
  221. va_list args;
  222. va_start(args, msg);
  223. vprintk(msg, args);
  224. va_end(args);
  225. if (len)
  226. pr_cont(":");
  227. while (len) {
  228. pr_cont(" %02x", *bytes);
  229. bytes++;
  230. len--;
  231. }
  232. pr_cont("\n");
  233. }
  234. #define DEBUG_bytes(bytes, len, msg, ...) __DEBUG_bytes(bytes, len, KERN_DEBUG msg, ##__VA_ARGS__)
  235. #else
  236. #define DEBUG_print(x, ...) do { } while (0)
  237. #define DEBUG_bytes(bytes, len, msg, ...) do { } while (0)
  238. #endif
  239. /*
  240. * DM Integrity profile, protection is performed layer above (dm-crypt)
  241. */
  242. static const struct blk_integrity_profile dm_integrity_profile = {
  243. .name = "DM-DIF-EXT-TAG",
  244. .generate_fn = NULL,
  245. .verify_fn = NULL,
  246. };
  247. static void dm_integrity_map_continue(struct dm_integrity_io *dio, bool from_map);
  248. static void integrity_bio_wait(struct work_struct *w);
  249. static void dm_integrity_dtr(struct dm_target *ti);
  250. static void dm_integrity_io_error(struct dm_integrity_c *ic, const char *msg, int err)
  251. {
  252. if (err == -EILSEQ)
  253. atomic64_inc(&ic->number_of_mismatches);
  254. if (!cmpxchg(&ic->failed, 0, err))
  255. DMERR("Error on %s: %d", msg, err);
  256. }
  257. static int dm_integrity_failed(struct dm_integrity_c *ic)
  258. {
  259. return ACCESS_ONCE(ic->failed);
  260. }
  261. static commit_id_t dm_integrity_commit_id(struct dm_integrity_c *ic, unsigned i,
  262. unsigned j, unsigned char seq)
  263. {
  264. /*
  265. * Xor the number with section and sector, so that if a piece of
  266. * journal is written at wrong place, it is detected.
  267. */
  268. return ic->commit_ids[seq] ^ cpu_to_le64(((__u64)i << 32) ^ j);
  269. }
  270. static void get_area_and_offset(struct dm_integrity_c *ic, sector_t data_sector,
  271. sector_t *area, sector_t *offset)
  272. {
  273. __u8 log2_interleave_sectors = ic->sb->log2_interleave_sectors;
  274. *area = data_sector >> log2_interleave_sectors;
  275. *offset = (unsigned)data_sector & ((1U << log2_interleave_sectors) - 1);
  276. }
  277. #define sector_to_block(ic, n) \
  278. do { \
  279. BUG_ON((n) & (unsigned)((ic)->sectors_per_block - 1)); \
  280. (n) >>= (ic)->sb->log2_sectors_per_block; \
  281. } while (0)
  282. static __u64 get_metadata_sector_and_offset(struct dm_integrity_c *ic, sector_t area,
  283. sector_t offset, unsigned *metadata_offset)
  284. {
  285. __u64 ms;
  286. unsigned mo;
  287. ms = area << ic->sb->log2_interleave_sectors;
  288. if (likely(ic->log2_metadata_run >= 0))
  289. ms += area << ic->log2_metadata_run;
  290. else
  291. ms += area * ic->metadata_run;
  292. ms >>= ic->log2_buffer_sectors;
  293. sector_to_block(ic, offset);
  294. if (likely(ic->log2_tag_size >= 0)) {
  295. ms += offset >> (SECTOR_SHIFT + ic->log2_buffer_sectors - ic->log2_tag_size);
  296. mo = (offset << ic->log2_tag_size) & ((1U << SECTOR_SHIFT << ic->log2_buffer_sectors) - 1);
  297. } else {
  298. ms += (__u64)offset * ic->tag_size >> (SECTOR_SHIFT + ic->log2_buffer_sectors);
  299. mo = (offset * ic->tag_size) & ((1U << SECTOR_SHIFT << ic->log2_buffer_sectors) - 1);
  300. }
  301. *metadata_offset = mo;
  302. return ms;
  303. }
  304. static sector_t get_data_sector(struct dm_integrity_c *ic, sector_t area, sector_t offset)
  305. {
  306. sector_t result;
  307. result = area << ic->sb->log2_interleave_sectors;
  308. if (likely(ic->log2_metadata_run >= 0))
  309. result += (area + 1) << ic->log2_metadata_run;
  310. else
  311. result += (area + 1) * ic->metadata_run;
  312. result += (sector_t)ic->initial_sectors + offset;
  313. return result;
  314. }
  315. static void wraparound_section(struct dm_integrity_c *ic, unsigned *sec_ptr)
  316. {
  317. if (unlikely(*sec_ptr >= ic->journal_sections))
  318. *sec_ptr -= ic->journal_sections;
  319. }
  320. static int sync_rw_sb(struct dm_integrity_c *ic, int op, int op_flags)
  321. {
  322. struct dm_io_request io_req;
  323. struct dm_io_region io_loc;
  324. io_req.bi_op = op;
  325. io_req.bi_op_flags = op_flags;
  326. io_req.mem.type = DM_IO_KMEM;
  327. io_req.mem.ptr.addr = ic->sb;
  328. io_req.notify.fn = NULL;
  329. io_req.client = ic->io;
  330. io_loc.bdev = ic->dev->bdev;
  331. io_loc.sector = ic->start;
  332. io_loc.count = SB_SECTORS;
  333. return dm_io(&io_req, 1, &io_loc, NULL);
  334. }
  335. static void access_journal_check(struct dm_integrity_c *ic, unsigned section, unsigned offset,
  336. bool e, const char *function)
  337. {
  338. #if defined(CONFIG_DM_DEBUG) || defined(INTERNAL_VERIFY)
  339. unsigned limit = e ? ic->journal_section_entries : ic->journal_section_sectors;
  340. if (unlikely(section >= ic->journal_sections) ||
  341. unlikely(offset >= limit)) {
  342. printk(KERN_CRIT "%s: invalid access at (%u,%u), limit (%u,%u)\n",
  343. function, section, offset, ic->journal_sections, limit);
  344. BUG();
  345. }
  346. #endif
  347. }
  348. static void page_list_location(struct dm_integrity_c *ic, unsigned section, unsigned offset,
  349. unsigned *pl_index, unsigned *pl_offset)
  350. {
  351. unsigned sector;
  352. access_journal_check(ic, section, offset, false, "page_list_location");
  353. sector = section * ic->journal_section_sectors + offset;
  354. *pl_index = sector >> (PAGE_SHIFT - SECTOR_SHIFT);
  355. *pl_offset = (sector << SECTOR_SHIFT) & (PAGE_SIZE - 1);
  356. }
  357. static struct journal_sector *access_page_list(struct dm_integrity_c *ic, struct page_list *pl,
  358. unsigned section, unsigned offset, unsigned *n_sectors)
  359. {
  360. unsigned pl_index, pl_offset;
  361. char *va;
  362. page_list_location(ic, section, offset, &pl_index, &pl_offset);
  363. if (n_sectors)
  364. *n_sectors = (PAGE_SIZE - pl_offset) >> SECTOR_SHIFT;
  365. va = lowmem_page_address(pl[pl_index].page);
  366. return (struct journal_sector *)(va + pl_offset);
  367. }
  368. static struct journal_sector *access_journal(struct dm_integrity_c *ic, unsigned section, unsigned offset)
  369. {
  370. return access_page_list(ic, ic->journal, section, offset, NULL);
  371. }
  372. static struct journal_entry *access_journal_entry(struct dm_integrity_c *ic, unsigned section, unsigned n)
  373. {
  374. unsigned rel_sector, offset;
  375. struct journal_sector *js;
  376. access_journal_check(ic, section, n, true, "access_journal_entry");
  377. rel_sector = n % JOURNAL_BLOCK_SECTORS;
  378. offset = n / JOURNAL_BLOCK_SECTORS;
  379. js = access_journal(ic, section, rel_sector);
  380. return (struct journal_entry *)((char *)js + offset * ic->journal_entry_size);
  381. }
  382. static struct journal_sector *access_journal_data(struct dm_integrity_c *ic, unsigned section, unsigned n)
  383. {
  384. n <<= ic->sb->log2_sectors_per_block;
  385. n += JOURNAL_BLOCK_SECTORS;
  386. access_journal_check(ic, section, n, false, "access_journal_data");
  387. return access_journal(ic, section, n);
  388. }
  389. static void section_mac(struct dm_integrity_c *ic, unsigned section, __u8 result[JOURNAL_MAC_SIZE])
  390. {
  391. SHASH_DESC_ON_STACK(desc, ic->journal_mac);
  392. int r;
  393. unsigned j, size;
  394. desc->tfm = ic->journal_mac;
  395. desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
  396. r = crypto_shash_init(desc);
  397. if (unlikely(r)) {
  398. dm_integrity_io_error(ic, "crypto_shash_init", r);
  399. goto err;
  400. }
  401. for (j = 0; j < ic->journal_section_entries; j++) {
  402. struct journal_entry *je = access_journal_entry(ic, section, j);
  403. r = crypto_shash_update(desc, (__u8 *)&je->u.sector, sizeof je->u.sector);
  404. if (unlikely(r)) {
  405. dm_integrity_io_error(ic, "crypto_shash_update", r);
  406. goto err;
  407. }
  408. }
  409. size = crypto_shash_digestsize(ic->journal_mac);
  410. if (likely(size <= JOURNAL_MAC_SIZE)) {
  411. r = crypto_shash_final(desc, result);
  412. if (unlikely(r)) {
  413. dm_integrity_io_error(ic, "crypto_shash_final", r);
  414. goto err;
  415. }
  416. memset(result + size, 0, JOURNAL_MAC_SIZE - size);
  417. } else {
  418. __u8 digest[size];
  419. r = crypto_shash_final(desc, digest);
  420. if (unlikely(r)) {
  421. dm_integrity_io_error(ic, "crypto_shash_final", r);
  422. goto err;
  423. }
  424. memcpy(result, digest, JOURNAL_MAC_SIZE);
  425. }
  426. return;
  427. err:
  428. memset(result, 0, JOURNAL_MAC_SIZE);
  429. }
  430. static void rw_section_mac(struct dm_integrity_c *ic, unsigned section, bool wr)
  431. {
  432. __u8 result[JOURNAL_MAC_SIZE];
  433. unsigned j;
  434. if (!ic->journal_mac)
  435. return;
  436. section_mac(ic, section, result);
  437. for (j = 0; j < JOURNAL_BLOCK_SECTORS; j++) {
  438. struct journal_sector *js = access_journal(ic, section, j);
  439. if (likely(wr))
  440. memcpy(&js->mac, result + (j * JOURNAL_MAC_PER_SECTOR), JOURNAL_MAC_PER_SECTOR);
  441. else {
  442. if (memcmp(&js->mac, result + (j * JOURNAL_MAC_PER_SECTOR), JOURNAL_MAC_PER_SECTOR))
  443. dm_integrity_io_error(ic, "journal mac", -EILSEQ);
  444. }
  445. }
  446. }
  447. static void complete_journal_op(void *context)
  448. {
  449. struct journal_completion *comp = context;
  450. BUG_ON(!atomic_read(&comp->in_flight));
  451. if (likely(atomic_dec_and_test(&comp->in_flight)))
  452. complete(&comp->comp);
  453. }
  454. static void xor_journal(struct dm_integrity_c *ic, bool encrypt, unsigned section,
  455. unsigned n_sections, struct journal_completion *comp)
  456. {
  457. struct async_submit_ctl submit;
  458. size_t n_bytes = (size_t)(n_sections * ic->journal_section_sectors) << SECTOR_SHIFT;
  459. unsigned pl_index, pl_offset, section_index;
  460. struct page_list *source_pl, *target_pl;
  461. if (likely(encrypt)) {
  462. source_pl = ic->journal;
  463. target_pl = ic->journal_io;
  464. } else {
  465. source_pl = ic->journal_io;
  466. target_pl = ic->journal;
  467. }
  468. page_list_location(ic, section, 0, &pl_index, &pl_offset);
  469. atomic_add(roundup(pl_offset + n_bytes, PAGE_SIZE) >> PAGE_SHIFT, &comp->in_flight);
  470. init_async_submit(&submit, ASYNC_TX_XOR_ZERO_DST, NULL, complete_journal_op, comp, NULL);
  471. section_index = pl_index;
  472. do {
  473. size_t this_step;
  474. struct page *src_pages[2];
  475. struct page *dst_page;
  476. while (unlikely(pl_index == section_index)) {
  477. unsigned dummy;
  478. if (likely(encrypt))
  479. rw_section_mac(ic, section, true);
  480. section++;
  481. n_sections--;
  482. if (!n_sections)
  483. break;
  484. page_list_location(ic, section, 0, &section_index, &dummy);
  485. }
  486. this_step = min(n_bytes, (size_t)PAGE_SIZE - pl_offset);
  487. dst_page = target_pl[pl_index].page;
  488. src_pages[0] = source_pl[pl_index].page;
  489. src_pages[1] = ic->journal_xor[pl_index].page;
  490. async_xor(dst_page, src_pages, pl_offset, 2, this_step, &submit);
  491. pl_index++;
  492. pl_offset = 0;
  493. n_bytes -= this_step;
  494. } while (n_bytes);
  495. BUG_ON(n_sections);
  496. async_tx_issue_pending_all();
  497. }
  498. static void complete_journal_encrypt(struct crypto_async_request *req, int err)
  499. {
  500. struct journal_completion *comp = req->data;
  501. if (unlikely(err)) {
  502. if (likely(err == -EINPROGRESS)) {
  503. complete(&comp->ic->crypto_backoff);
  504. return;
  505. }
  506. dm_integrity_io_error(comp->ic, "asynchronous encrypt", err);
  507. }
  508. complete_journal_op(comp);
  509. }
  510. static bool do_crypt(bool encrypt, struct skcipher_request *req, struct journal_completion *comp)
  511. {
  512. int r;
  513. skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
  514. complete_journal_encrypt, comp);
  515. if (likely(encrypt))
  516. r = crypto_skcipher_encrypt(req);
  517. else
  518. r = crypto_skcipher_decrypt(req);
  519. if (likely(!r))
  520. return false;
  521. if (likely(r == -EINPROGRESS))
  522. return true;
  523. if (likely(r == -EBUSY)) {
  524. wait_for_completion(&comp->ic->crypto_backoff);
  525. reinit_completion(&comp->ic->crypto_backoff);
  526. return true;
  527. }
  528. dm_integrity_io_error(comp->ic, "encrypt", r);
  529. return false;
  530. }
  531. static void crypt_journal(struct dm_integrity_c *ic, bool encrypt, unsigned section,
  532. unsigned n_sections, struct journal_completion *comp)
  533. {
  534. struct scatterlist **source_sg;
  535. struct scatterlist **target_sg;
  536. atomic_add(2, &comp->in_flight);
  537. if (likely(encrypt)) {
  538. source_sg = ic->journal_scatterlist;
  539. target_sg = ic->journal_io_scatterlist;
  540. } else {
  541. source_sg = ic->journal_io_scatterlist;
  542. target_sg = ic->journal_scatterlist;
  543. }
  544. do {
  545. struct skcipher_request *req;
  546. unsigned ivsize;
  547. char *iv;
  548. if (likely(encrypt))
  549. rw_section_mac(ic, section, true);
  550. req = ic->sk_requests[section];
  551. ivsize = crypto_skcipher_ivsize(ic->journal_crypt);
  552. iv = req->iv;
  553. memcpy(iv, iv + ivsize, ivsize);
  554. req->src = source_sg[section];
  555. req->dst = target_sg[section];
  556. if (unlikely(do_crypt(encrypt, req, comp)))
  557. atomic_inc(&comp->in_flight);
  558. section++;
  559. n_sections--;
  560. } while (n_sections);
  561. atomic_dec(&comp->in_flight);
  562. complete_journal_op(comp);
  563. }
  564. static void encrypt_journal(struct dm_integrity_c *ic, bool encrypt, unsigned section,
  565. unsigned n_sections, struct journal_completion *comp)
  566. {
  567. if (ic->journal_xor)
  568. return xor_journal(ic, encrypt, section, n_sections, comp);
  569. else
  570. return crypt_journal(ic, encrypt, section, n_sections, comp);
  571. }
  572. static void complete_journal_io(unsigned long error, void *context)
  573. {
  574. struct journal_completion *comp = context;
  575. if (unlikely(error != 0))
  576. dm_integrity_io_error(comp->ic, "writing journal", -EIO);
  577. complete_journal_op(comp);
  578. }
  579. static void rw_journal(struct dm_integrity_c *ic, int op, int op_flags, unsigned section,
  580. unsigned n_sections, struct journal_completion *comp)
  581. {
  582. struct dm_io_request io_req;
  583. struct dm_io_region io_loc;
  584. unsigned sector, n_sectors, pl_index, pl_offset;
  585. int r;
  586. if (unlikely(dm_integrity_failed(ic))) {
  587. if (comp)
  588. complete_journal_io(-1UL, comp);
  589. return;
  590. }
  591. sector = section * ic->journal_section_sectors;
  592. n_sectors = n_sections * ic->journal_section_sectors;
  593. pl_index = sector >> (PAGE_SHIFT - SECTOR_SHIFT);
  594. pl_offset = (sector << SECTOR_SHIFT) & (PAGE_SIZE - 1);
  595. io_req.bi_op = op;
  596. io_req.bi_op_flags = op_flags;
  597. io_req.mem.type = DM_IO_PAGE_LIST;
  598. if (ic->journal_io)
  599. io_req.mem.ptr.pl = &ic->journal_io[pl_index];
  600. else
  601. io_req.mem.ptr.pl = &ic->journal[pl_index];
  602. io_req.mem.offset = pl_offset;
  603. if (likely(comp != NULL)) {
  604. io_req.notify.fn = complete_journal_io;
  605. io_req.notify.context = comp;
  606. } else {
  607. io_req.notify.fn = NULL;
  608. }
  609. io_req.client = ic->io;
  610. io_loc.bdev = ic->dev->bdev;
  611. io_loc.sector = ic->start + SB_SECTORS + sector;
  612. io_loc.count = n_sectors;
  613. r = dm_io(&io_req, 1, &io_loc, NULL);
  614. if (unlikely(r)) {
  615. dm_integrity_io_error(ic, op == REQ_OP_READ ? "reading journal" : "writing journal", r);
  616. if (comp) {
  617. WARN_ONCE(1, "asynchronous dm_io failed: %d", r);
  618. complete_journal_io(-1UL, comp);
  619. }
  620. }
  621. }
  622. static void write_journal(struct dm_integrity_c *ic, unsigned commit_start, unsigned commit_sections)
  623. {
  624. struct journal_completion io_comp;
  625. struct journal_completion crypt_comp_1;
  626. struct journal_completion crypt_comp_2;
  627. unsigned i;
  628. io_comp.ic = ic;
  629. init_completion(&io_comp.comp);
  630. if (commit_start + commit_sections <= ic->journal_sections) {
  631. io_comp.in_flight = (atomic_t)ATOMIC_INIT(1);
  632. if (ic->journal_io) {
  633. crypt_comp_1.ic = ic;
  634. init_completion(&crypt_comp_1.comp);
  635. crypt_comp_1.in_flight = (atomic_t)ATOMIC_INIT(0);
  636. encrypt_journal(ic, true, commit_start, commit_sections, &crypt_comp_1);
  637. wait_for_completion_io(&crypt_comp_1.comp);
  638. } else {
  639. for (i = 0; i < commit_sections; i++)
  640. rw_section_mac(ic, commit_start + i, true);
  641. }
  642. rw_journal(ic, REQ_OP_WRITE, REQ_FUA | REQ_SYNC, commit_start,
  643. commit_sections, &io_comp);
  644. } else {
  645. unsigned to_end;
  646. io_comp.in_flight = (atomic_t)ATOMIC_INIT(2);
  647. to_end = ic->journal_sections - commit_start;
  648. if (ic->journal_io) {
  649. crypt_comp_1.ic = ic;
  650. init_completion(&crypt_comp_1.comp);
  651. crypt_comp_1.in_flight = (atomic_t)ATOMIC_INIT(0);
  652. encrypt_journal(ic, true, commit_start, to_end, &crypt_comp_1);
  653. if (try_wait_for_completion(&crypt_comp_1.comp)) {
  654. rw_journal(ic, REQ_OP_WRITE, REQ_FUA, commit_start, to_end, &io_comp);
  655. reinit_completion(&crypt_comp_1.comp);
  656. crypt_comp_1.in_flight = (atomic_t)ATOMIC_INIT(0);
  657. encrypt_journal(ic, true, 0, commit_sections - to_end, &crypt_comp_1);
  658. wait_for_completion_io(&crypt_comp_1.comp);
  659. } else {
  660. crypt_comp_2.ic = ic;
  661. init_completion(&crypt_comp_2.comp);
  662. crypt_comp_2.in_flight = (atomic_t)ATOMIC_INIT(0);
  663. encrypt_journal(ic, true, 0, commit_sections - to_end, &crypt_comp_2);
  664. wait_for_completion_io(&crypt_comp_1.comp);
  665. rw_journal(ic, REQ_OP_WRITE, REQ_FUA, commit_start, to_end, &io_comp);
  666. wait_for_completion_io(&crypt_comp_2.comp);
  667. }
  668. } else {
  669. for (i = 0; i < to_end; i++)
  670. rw_section_mac(ic, commit_start + i, true);
  671. rw_journal(ic, REQ_OP_WRITE, REQ_FUA, commit_start, to_end, &io_comp);
  672. for (i = 0; i < commit_sections - to_end; i++)
  673. rw_section_mac(ic, i, true);
  674. }
  675. rw_journal(ic, REQ_OP_WRITE, REQ_FUA, 0, commit_sections - to_end, &io_comp);
  676. }
  677. wait_for_completion_io(&io_comp.comp);
  678. }
  679. static void copy_from_journal(struct dm_integrity_c *ic, unsigned section, unsigned offset,
  680. unsigned n_sectors, sector_t target, io_notify_fn fn, void *data)
  681. {
  682. struct dm_io_request io_req;
  683. struct dm_io_region io_loc;
  684. int r;
  685. unsigned sector, pl_index, pl_offset;
  686. BUG_ON((target | n_sectors | offset) & (unsigned)(ic->sectors_per_block - 1));
  687. if (unlikely(dm_integrity_failed(ic))) {
  688. fn(-1UL, data);
  689. return;
  690. }
  691. sector = section * ic->journal_section_sectors + JOURNAL_BLOCK_SECTORS + offset;
  692. pl_index = sector >> (PAGE_SHIFT - SECTOR_SHIFT);
  693. pl_offset = (sector << SECTOR_SHIFT) & (PAGE_SIZE - 1);
  694. io_req.bi_op = REQ_OP_WRITE;
  695. io_req.bi_op_flags = 0;
  696. io_req.mem.type = DM_IO_PAGE_LIST;
  697. io_req.mem.ptr.pl = &ic->journal[pl_index];
  698. io_req.mem.offset = pl_offset;
  699. io_req.notify.fn = fn;
  700. io_req.notify.context = data;
  701. io_req.client = ic->io;
  702. io_loc.bdev = ic->dev->bdev;
  703. io_loc.sector = ic->start + target;
  704. io_loc.count = n_sectors;
  705. r = dm_io(&io_req, 1, &io_loc, NULL);
  706. if (unlikely(r)) {
  707. WARN_ONCE(1, "asynchronous dm_io failed: %d", r);
  708. fn(-1UL, data);
  709. }
  710. }
  711. static bool add_new_range(struct dm_integrity_c *ic, struct dm_integrity_range *new_range)
  712. {
  713. struct rb_node **n = &ic->in_progress.rb_node;
  714. struct rb_node *parent;
  715. BUG_ON((new_range->logical_sector | new_range->n_sectors) & (unsigned)(ic->sectors_per_block - 1));
  716. parent = NULL;
  717. while (*n) {
  718. struct dm_integrity_range *range = container_of(*n, struct dm_integrity_range, node);
  719. parent = *n;
  720. if (new_range->logical_sector + new_range->n_sectors <= range->logical_sector) {
  721. n = &range->node.rb_left;
  722. } else if (new_range->logical_sector >= range->logical_sector + range->n_sectors) {
  723. n = &range->node.rb_right;
  724. } else {
  725. return false;
  726. }
  727. }
  728. rb_link_node(&new_range->node, parent, n);
  729. rb_insert_color(&new_range->node, &ic->in_progress);
  730. return true;
  731. }
  732. static void remove_range_unlocked(struct dm_integrity_c *ic, struct dm_integrity_range *range)
  733. {
  734. rb_erase(&range->node, &ic->in_progress);
  735. wake_up_locked(&ic->endio_wait);
  736. }
  737. static void remove_range(struct dm_integrity_c *ic, struct dm_integrity_range *range)
  738. {
  739. unsigned long flags;
  740. spin_lock_irqsave(&ic->endio_wait.lock, flags);
  741. remove_range_unlocked(ic, range);
  742. spin_unlock_irqrestore(&ic->endio_wait.lock, flags);
  743. }
  744. static void init_journal_node(struct journal_node *node)
  745. {
  746. RB_CLEAR_NODE(&node->node);
  747. node->sector = (sector_t)-1;
  748. }
  749. static void add_journal_node(struct dm_integrity_c *ic, struct journal_node *node, sector_t sector)
  750. {
  751. struct rb_node **link;
  752. struct rb_node *parent;
  753. node->sector = sector;
  754. BUG_ON(!RB_EMPTY_NODE(&node->node));
  755. link = &ic->journal_tree_root.rb_node;
  756. parent = NULL;
  757. while (*link) {
  758. struct journal_node *j;
  759. parent = *link;
  760. j = container_of(parent, struct journal_node, node);
  761. if (sector < j->sector)
  762. link = &j->node.rb_left;
  763. else
  764. link = &j->node.rb_right;
  765. }
  766. rb_link_node(&node->node, parent, link);
  767. rb_insert_color(&node->node, &ic->journal_tree_root);
  768. }
  769. static void remove_journal_node(struct dm_integrity_c *ic, struct journal_node *node)
  770. {
  771. BUG_ON(RB_EMPTY_NODE(&node->node));
  772. rb_erase(&node->node, &ic->journal_tree_root);
  773. init_journal_node(node);
  774. }
  775. #define NOT_FOUND (-1U)
  776. static unsigned find_journal_node(struct dm_integrity_c *ic, sector_t sector, sector_t *next_sector)
  777. {
  778. struct rb_node *n = ic->journal_tree_root.rb_node;
  779. unsigned found = NOT_FOUND;
  780. *next_sector = (sector_t)-1;
  781. while (n) {
  782. struct journal_node *j = container_of(n, struct journal_node, node);
  783. if (sector == j->sector) {
  784. found = j - ic->journal_tree;
  785. }
  786. if (sector < j->sector) {
  787. *next_sector = j->sector;
  788. n = j->node.rb_left;
  789. } else {
  790. n = j->node.rb_right;
  791. }
  792. }
  793. return found;
  794. }
  795. static bool test_journal_node(struct dm_integrity_c *ic, unsigned pos, sector_t sector)
  796. {
  797. struct journal_node *node, *next_node;
  798. struct rb_node *next;
  799. if (unlikely(pos >= ic->journal_entries))
  800. return false;
  801. node = &ic->journal_tree[pos];
  802. if (unlikely(RB_EMPTY_NODE(&node->node)))
  803. return false;
  804. if (unlikely(node->sector != sector))
  805. return false;
  806. next = rb_next(&node->node);
  807. if (unlikely(!next))
  808. return true;
  809. next_node = container_of(next, struct journal_node, node);
  810. return next_node->sector != sector;
  811. }
  812. static bool find_newer_committed_node(struct dm_integrity_c *ic, struct journal_node *node)
  813. {
  814. struct rb_node *next;
  815. struct journal_node *next_node;
  816. unsigned next_section;
  817. BUG_ON(RB_EMPTY_NODE(&node->node));
  818. next = rb_next(&node->node);
  819. if (unlikely(!next))
  820. return false;
  821. next_node = container_of(next, struct journal_node, node);
  822. if (next_node->sector != node->sector)
  823. return false;
  824. next_section = (unsigned)(next_node - ic->journal_tree) / ic->journal_section_entries;
  825. if (next_section >= ic->committed_section &&
  826. next_section < ic->committed_section + ic->n_committed_sections)
  827. return true;
  828. if (next_section + ic->journal_sections < ic->committed_section + ic->n_committed_sections)
  829. return true;
  830. return false;
  831. }
  832. #define TAG_READ 0
  833. #define TAG_WRITE 1
  834. #define TAG_CMP 2
  835. static int dm_integrity_rw_tag(struct dm_integrity_c *ic, unsigned char *tag, sector_t *metadata_block,
  836. unsigned *metadata_offset, unsigned total_size, int op)
  837. {
  838. do {
  839. unsigned char *data, *dp;
  840. struct dm_buffer *b;
  841. unsigned to_copy;
  842. int r;
  843. r = dm_integrity_failed(ic);
  844. if (unlikely(r))
  845. return r;
  846. data = dm_bufio_read(ic->bufio, *metadata_block, &b);
  847. if (unlikely(IS_ERR(data)))
  848. return PTR_ERR(data);
  849. to_copy = min((1U << SECTOR_SHIFT << ic->log2_buffer_sectors) - *metadata_offset, total_size);
  850. dp = data + *metadata_offset;
  851. if (op == TAG_READ) {
  852. memcpy(tag, dp, to_copy);
  853. } else if (op == TAG_WRITE) {
  854. memcpy(dp, tag, to_copy);
  855. dm_bufio_mark_partial_buffer_dirty(b, *metadata_offset, *metadata_offset + to_copy);
  856. } else {
  857. /* e.g.: op == TAG_CMP */
  858. if (unlikely(memcmp(dp, tag, to_copy))) {
  859. unsigned i;
  860. for (i = 0; i < to_copy; i++) {
  861. if (dp[i] != tag[i])
  862. break;
  863. total_size--;
  864. }
  865. dm_bufio_release(b);
  866. return total_size;
  867. }
  868. }
  869. dm_bufio_release(b);
  870. tag += to_copy;
  871. *metadata_offset += to_copy;
  872. if (unlikely(*metadata_offset == 1U << SECTOR_SHIFT << ic->log2_buffer_sectors)) {
  873. (*metadata_block)++;
  874. *metadata_offset = 0;
  875. }
  876. total_size -= to_copy;
  877. } while (unlikely(total_size));
  878. return 0;
  879. }
  880. static void dm_integrity_flush_buffers(struct dm_integrity_c *ic)
  881. {
  882. int r;
  883. r = dm_bufio_write_dirty_buffers(ic->bufio);
  884. if (unlikely(r))
  885. dm_integrity_io_error(ic, "writing tags", r);
  886. }
  887. static void sleep_on_endio_wait(struct dm_integrity_c *ic)
  888. {
  889. DECLARE_WAITQUEUE(wait, current);
  890. __add_wait_queue(&ic->endio_wait, &wait);
  891. __set_current_state(TASK_UNINTERRUPTIBLE);
  892. spin_unlock_irq(&ic->endio_wait.lock);
  893. io_schedule();
  894. spin_lock_irq(&ic->endio_wait.lock);
  895. __remove_wait_queue(&ic->endio_wait, &wait);
  896. }
  897. static void autocommit_fn(unsigned long data)
  898. {
  899. struct dm_integrity_c *ic = (struct dm_integrity_c *)data;
  900. if (likely(!dm_integrity_failed(ic)))
  901. queue_work(ic->commit_wq, &ic->commit_work);
  902. }
  903. static void schedule_autocommit(struct dm_integrity_c *ic)
  904. {
  905. if (!timer_pending(&ic->autocommit_timer))
  906. mod_timer(&ic->autocommit_timer, jiffies + ic->autocommit_jiffies);
  907. }
  908. static void submit_flush_bio(struct dm_integrity_c *ic, struct dm_integrity_io *dio)
  909. {
  910. struct bio *bio;
  911. unsigned long flags;
  912. spin_lock_irqsave(&ic->endio_wait.lock, flags);
  913. bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
  914. bio_list_add(&ic->flush_bio_list, bio);
  915. spin_unlock_irqrestore(&ic->endio_wait.lock, flags);
  916. queue_work(ic->commit_wq, &ic->commit_work);
  917. }
  918. static void do_endio(struct dm_integrity_c *ic, struct bio *bio)
  919. {
  920. int r = dm_integrity_failed(ic);
  921. if (unlikely(r) && !bio->bi_status)
  922. bio->bi_status = errno_to_blk_status(r);
  923. bio_endio(bio);
  924. }
  925. static void do_endio_flush(struct dm_integrity_c *ic, struct dm_integrity_io *dio)
  926. {
  927. struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
  928. if (unlikely(dio->fua) && likely(!bio->bi_status) && likely(!dm_integrity_failed(ic)))
  929. submit_flush_bio(ic, dio);
  930. else
  931. do_endio(ic, bio);
  932. }
  933. static void dec_in_flight(struct dm_integrity_io *dio)
  934. {
  935. if (atomic_dec_and_test(&dio->in_flight)) {
  936. struct dm_integrity_c *ic = dio->ic;
  937. struct bio *bio;
  938. remove_range(ic, &dio->range);
  939. if (unlikely(dio->write))
  940. schedule_autocommit(ic);
  941. bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
  942. if (unlikely(dio->bi_status) && !bio->bi_status)
  943. bio->bi_status = dio->bi_status;
  944. if (likely(!bio->bi_status) && unlikely(bio_sectors(bio) != dio->range.n_sectors)) {
  945. dio->range.logical_sector += dio->range.n_sectors;
  946. bio_advance(bio, dio->range.n_sectors << SECTOR_SHIFT);
  947. INIT_WORK(&dio->work, integrity_bio_wait);
  948. queue_work(ic->wait_wq, &dio->work);
  949. return;
  950. }
  951. do_endio_flush(ic, dio);
  952. }
  953. }
  954. static void integrity_end_io(struct bio *bio)
  955. {
  956. struct dm_integrity_io *dio = dm_per_bio_data(bio, sizeof(struct dm_integrity_io));
  957. bio->bi_iter = dio->orig_bi_iter;
  958. bio->bi_disk = dio->orig_bi_disk;
  959. bio->bi_partno = dio->orig_bi_partno;
  960. if (dio->orig_bi_integrity) {
  961. bio->bi_integrity = dio->orig_bi_integrity;
  962. bio->bi_opf |= REQ_INTEGRITY;
  963. }
  964. bio->bi_end_io = dio->orig_bi_end_io;
  965. if (dio->completion)
  966. complete(dio->completion);
  967. dec_in_flight(dio);
  968. }
  969. static void integrity_sector_checksum(struct dm_integrity_c *ic, sector_t sector,
  970. const char *data, char *result)
  971. {
  972. __u64 sector_le = cpu_to_le64(sector);
  973. SHASH_DESC_ON_STACK(req, ic->internal_hash);
  974. int r;
  975. unsigned digest_size;
  976. req->tfm = ic->internal_hash;
  977. req->flags = 0;
  978. r = crypto_shash_init(req);
  979. if (unlikely(r < 0)) {
  980. dm_integrity_io_error(ic, "crypto_shash_init", r);
  981. goto failed;
  982. }
  983. r = crypto_shash_update(req, (const __u8 *)&sector_le, sizeof sector_le);
  984. if (unlikely(r < 0)) {
  985. dm_integrity_io_error(ic, "crypto_shash_update", r);
  986. goto failed;
  987. }
  988. r = crypto_shash_update(req, data, ic->sectors_per_block << SECTOR_SHIFT);
  989. if (unlikely(r < 0)) {
  990. dm_integrity_io_error(ic, "crypto_shash_update", r);
  991. goto failed;
  992. }
  993. r = crypto_shash_final(req, result);
  994. if (unlikely(r < 0)) {
  995. dm_integrity_io_error(ic, "crypto_shash_final", r);
  996. goto failed;
  997. }
  998. digest_size = crypto_shash_digestsize(ic->internal_hash);
  999. if (unlikely(digest_size < ic->tag_size))
  1000. memset(result + digest_size, 0, ic->tag_size - digest_size);
  1001. return;
  1002. failed:
  1003. /* this shouldn't happen anyway, the hash functions have no reason to fail */
  1004. get_random_bytes(result, ic->tag_size);
  1005. }
  1006. static void integrity_metadata(struct work_struct *w)
  1007. {
  1008. struct dm_integrity_io *dio = container_of(w, struct dm_integrity_io, work);
  1009. struct dm_integrity_c *ic = dio->ic;
  1010. int r;
  1011. if (ic->internal_hash) {
  1012. struct bvec_iter iter;
  1013. struct bio_vec bv;
  1014. unsigned digest_size = crypto_shash_digestsize(ic->internal_hash);
  1015. struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
  1016. char *checksums;
  1017. unsigned extra_space = unlikely(digest_size > ic->tag_size) ? digest_size - ic->tag_size : 0;
  1018. char checksums_onstack[ic->tag_size + extra_space];
  1019. unsigned sectors_to_process = dio->range.n_sectors;
  1020. sector_t sector = dio->range.logical_sector;
  1021. if (unlikely(ic->mode == 'R'))
  1022. goto skip_io;
  1023. checksums = kmalloc((PAGE_SIZE >> SECTOR_SHIFT >> ic->sb->log2_sectors_per_block) * ic->tag_size + extra_space,
  1024. GFP_NOIO | __GFP_NORETRY | __GFP_NOWARN);
  1025. if (!checksums)
  1026. checksums = checksums_onstack;
  1027. __bio_for_each_segment(bv, bio, iter, dio->orig_bi_iter) {
  1028. unsigned pos;
  1029. char *mem, *checksums_ptr;
  1030. again:
  1031. mem = (char *)kmap_atomic(bv.bv_page) + bv.bv_offset;
  1032. pos = 0;
  1033. checksums_ptr = checksums;
  1034. do {
  1035. integrity_sector_checksum(ic, sector, mem + pos, checksums_ptr);
  1036. checksums_ptr += ic->tag_size;
  1037. sectors_to_process -= ic->sectors_per_block;
  1038. pos += ic->sectors_per_block << SECTOR_SHIFT;
  1039. sector += ic->sectors_per_block;
  1040. } while (pos < bv.bv_len && sectors_to_process && checksums != checksums_onstack);
  1041. kunmap_atomic(mem);
  1042. r = dm_integrity_rw_tag(ic, checksums, &dio->metadata_block, &dio->metadata_offset,
  1043. checksums_ptr - checksums, !dio->write ? TAG_CMP : TAG_WRITE);
  1044. if (unlikely(r)) {
  1045. if (r > 0) {
  1046. DMERR("Checksum failed at sector 0x%llx",
  1047. (unsigned long long)(sector - ((r + ic->tag_size - 1) / ic->tag_size)));
  1048. r = -EILSEQ;
  1049. atomic64_inc(&ic->number_of_mismatches);
  1050. }
  1051. if (likely(checksums != checksums_onstack))
  1052. kfree(checksums);
  1053. goto error;
  1054. }
  1055. if (!sectors_to_process)
  1056. break;
  1057. if (unlikely(pos < bv.bv_len)) {
  1058. bv.bv_offset += pos;
  1059. bv.bv_len -= pos;
  1060. goto again;
  1061. }
  1062. }
  1063. if (likely(checksums != checksums_onstack))
  1064. kfree(checksums);
  1065. } else {
  1066. struct bio_integrity_payload *bip = dio->orig_bi_integrity;
  1067. if (bip) {
  1068. struct bio_vec biv;
  1069. struct bvec_iter iter;
  1070. unsigned data_to_process = dio->range.n_sectors;
  1071. sector_to_block(ic, data_to_process);
  1072. data_to_process *= ic->tag_size;
  1073. bip_for_each_vec(biv, bip, iter) {
  1074. unsigned char *tag;
  1075. unsigned this_len;
  1076. BUG_ON(PageHighMem(biv.bv_page));
  1077. tag = lowmem_page_address(biv.bv_page) + biv.bv_offset;
  1078. this_len = min(biv.bv_len, data_to_process);
  1079. r = dm_integrity_rw_tag(ic, tag, &dio->metadata_block, &dio->metadata_offset,
  1080. this_len, !dio->write ? TAG_READ : TAG_WRITE);
  1081. if (unlikely(r))
  1082. goto error;
  1083. data_to_process -= this_len;
  1084. if (!data_to_process)
  1085. break;
  1086. }
  1087. }
  1088. }
  1089. skip_io:
  1090. dec_in_flight(dio);
  1091. return;
  1092. error:
  1093. dio->bi_status = errno_to_blk_status(r);
  1094. dec_in_flight(dio);
  1095. }
  1096. static int dm_integrity_map(struct dm_target *ti, struct bio *bio)
  1097. {
  1098. struct dm_integrity_c *ic = ti->private;
  1099. struct dm_integrity_io *dio = dm_per_bio_data(bio, sizeof(struct dm_integrity_io));
  1100. struct bio_integrity_payload *bip;
  1101. sector_t area, offset;
  1102. dio->ic = ic;
  1103. dio->bi_status = 0;
  1104. if (unlikely(bio->bi_opf & REQ_PREFLUSH)) {
  1105. submit_flush_bio(ic, dio);
  1106. return DM_MAPIO_SUBMITTED;
  1107. }
  1108. dio->range.logical_sector = dm_target_offset(ti, bio->bi_iter.bi_sector);
  1109. dio->write = bio_op(bio) == REQ_OP_WRITE;
  1110. dio->fua = dio->write && bio->bi_opf & REQ_FUA;
  1111. if (unlikely(dio->fua)) {
  1112. /*
  1113. * Don't pass down the FUA flag because we have to flush
  1114. * disk cache anyway.
  1115. */
  1116. bio->bi_opf &= ~REQ_FUA;
  1117. }
  1118. if (unlikely(dio->range.logical_sector + bio_sectors(bio) > ic->provided_data_sectors)) {
  1119. DMERR("Too big sector number: 0x%llx + 0x%x > 0x%llx",
  1120. (unsigned long long)dio->range.logical_sector, bio_sectors(bio),
  1121. (unsigned long long)ic->provided_data_sectors);
  1122. return DM_MAPIO_KILL;
  1123. }
  1124. if (unlikely((dio->range.logical_sector | bio_sectors(bio)) & (unsigned)(ic->sectors_per_block - 1))) {
  1125. DMERR("Bio not aligned on %u sectors: 0x%llx, 0x%x",
  1126. ic->sectors_per_block,
  1127. (unsigned long long)dio->range.logical_sector, bio_sectors(bio));
  1128. return DM_MAPIO_KILL;
  1129. }
  1130. if (ic->sectors_per_block > 1) {
  1131. struct bvec_iter iter;
  1132. struct bio_vec bv;
  1133. bio_for_each_segment(bv, bio, iter) {
  1134. if (unlikely((bv.bv_offset | bv.bv_len) & ((ic->sectors_per_block << SECTOR_SHIFT) - 1))) {
  1135. DMERR("Bio vector (%u,%u) is not aligned on %u-sector boundary",
  1136. bv.bv_offset, bv.bv_len, ic->sectors_per_block);
  1137. return DM_MAPIO_KILL;
  1138. }
  1139. }
  1140. }
  1141. bip = bio_integrity(bio);
  1142. if (!ic->internal_hash) {
  1143. if (bip) {
  1144. unsigned wanted_tag_size = bio_sectors(bio) >> ic->sb->log2_sectors_per_block;
  1145. if (ic->log2_tag_size >= 0)
  1146. wanted_tag_size <<= ic->log2_tag_size;
  1147. else
  1148. wanted_tag_size *= ic->tag_size;
  1149. if (unlikely(wanted_tag_size != bip->bip_iter.bi_size)) {
  1150. DMERR("Invalid integrity data size %u, expected %u", bip->bip_iter.bi_size, wanted_tag_size);
  1151. return DM_MAPIO_KILL;
  1152. }
  1153. }
  1154. } else {
  1155. if (unlikely(bip != NULL)) {
  1156. DMERR("Unexpected integrity data when using internal hash");
  1157. return DM_MAPIO_KILL;
  1158. }
  1159. }
  1160. if (unlikely(ic->mode == 'R') && unlikely(dio->write))
  1161. return DM_MAPIO_KILL;
  1162. get_area_and_offset(ic, dio->range.logical_sector, &area, &offset);
  1163. dio->metadata_block = get_metadata_sector_and_offset(ic, area, offset, &dio->metadata_offset);
  1164. bio->bi_iter.bi_sector = get_data_sector(ic, area, offset);
  1165. dm_integrity_map_continue(dio, true);
  1166. return DM_MAPIO_SUBMITTED;
  1167. }
  1168. static bool __journal_read_write(struct dm_integrity_io *dio, struct bio *bio,
  1169. unsigned journal_section, unsigned journal_entry)
  1170. {
  1171. struct dm_integrity_c *ic = dio->ic;
  1172. sector_t logical_sector;
  1173. unsigned n_sectors;
  1174. logical_sector = dio->range.logical_sector;
  1175. n_sectors = dio->range.n_sectors;
  1176. do {
  1177. struct bio_vec bv = bio_iovec(bio);
  1178. char *mem;
  1179. if (unlikely(bv.bv_len >> SECTOR_SHIFT > n_sectors))
  1180. bv.bv_len = n_sectors << SECTOR_SHIFT;
  1181. n_sectors -= bv.bv_len >> SECTOR_SHIFT;
  1182. bio_advance_iter(bio, &bio->bi_iter, bv.bv_len);
  1183. retry_kmap:
  1184. mem = kmap_atomic(bv.bv_page);
  1185. if (likely(dio->write))
  1186. flush_dcache_page(bv.bv_page);
  1187. do {
  1188. struct journal_entry *je = access_journal_entry(ic, journal_section, journal_entry);
  1189. if (unlikely(!dio->write)) {
  1190. struct journal_sector *js;
  1191. char *mem_ptr;
  1192. unsigned s;
  1193. if (unlikely(journal_entry_is_inprogress(je))) {
  1194. flush_dcache_page(bv.bv_page);
  1195. kunmap_atomic(mem);
  1196. __io_wait_event(ic->copy_to_journal_wait, !journal_entry_is_inprogress(je));
  1197. goto retry_kmap;
  1198. }
  1199. smp_rmb();
  1200. BUG_ON(journal_entry_get_sector(je) != logical_sector);
  1201. js = access_journal_data(ic, journal_section, journal_entry);
  1202. mem_ptr = mem + bv.bv_offset;
  1203. s = 0;
  1204. do {
  1205. memcpy(mem_ptr, js, JOURNAL_SECTOR_DATA);
  1206. *(commit_id_t *)(mem_ptr + JOURNAL_SECTOR_DATA) = je->last_bytes[s];
  1207. js++;
  1208. mem_ptr += 1 << SECTOR_SHIFT;
  1209. } while (++s < ic->sectors_per_block);
  1210. #ifdef INTERNAL_VERIFY
  1211. if (ic->internal_hash) {
  1212. char checksums_onstack[max(crypto_shash_digestsize(ic->internal_hash), ic->tag_size)];
  1213. integrity_sector_checksum(ic, logical_sector, mem + bv.bv_offset, checksums_onstack);
  1214. if (unlikely(memcmp(checksums_onstack, journal_entry_tag(ic, je), ic->tag_size))) {
  1215. DMERR("Checksum failed when reading from journal, at sector 0x%llx",
  1216. (unsigned long long)logical_sector);
  1217. }
  1218. }
  1219. #endif
  1220. }
  1221. if (!ic->internal_hash) {
  1222. struct bio_integrity_payload *bip = bio_integrity(bio);
  1223. unsigned tag_todo = ic->tag_size;
  1224. char *tag_ptr = journal_entry_tag(ic, je);
  1225. if (bip) do {
  1226. struct bio_vec biv = bvec_iter_bvec(bip->bip_vec, bip->bip_iter);
  1227. unsigned tag_now = min(biv.bv_len, tag_todo);
  1228. char *tag_addr;
  1229. BUG_ON(PageHighMem(biv.bv_page));
  1230. tag_addr = lowmem_page_address(biv.bv_page) + biv.bv_offset;
  1231. if (likely(dio->write))
  1232. memcpy(tag_ptr, tag_addr, tag_now);
  1233. else
  1234. memcpy(tag_addr, tag_ptr, tag_now);
  1235. bvec_iter_advance(bip->bip_vec, &bip->bip_iter, tag_now);
  1236. tag_ptr += tag_now;
  1237. tag_todo -= tag_now;
  1238. } while (unlikely(tag_todo)); else {
  1239. if (likely(dio->write))
  1240. memset(tag_ptr, 0, tag_todo);
  1241. }
  1242. }
  1243. if (likely(dio->write)) {
  1244. struct journal_sector *js;
  1245. unsigned s;
  1246. js = access_journal_data(ic, journal_section, journal_entry);
  1247. memcpy(js, mem + bv.bv_offset, ic->sectors_per_block << SECTOR_SHIFT);
  1248. s = 0;
  1249. do {
  1250. je->last_bytes[s] = js[s].commit_id;
  1251. } while (++s < ic->sectors_per_block);
  1252. if (ic->internal_hash) {
  1253. unsigned digest_size = crypto_shash_digestsize(ic->internal_hash);
  1254. if (unlikely(digest_size > ic->tag_size)) {
  1255. char checksums_onstack[digest_size];
  1256. integrity_sector_checksum(ic, logical_sector, (char *)js, checksums_onstack);
  1257. memcpy(journal_entry_tag(ic, je), checksums_onstack, ic->tag_size);
  1258. } else
  1259. integrity_sector_checksum(ic, logical_sector, (char *)js, journal_entry_tag(ic, je));
  1260. }
  1261. journal_entry_set_sector(je, logical_sector);
  1262. }
  1263. logical_sector += ic->sectors_per_block;
  1264. journal_entry++;
  1265. if (unlikely(journal_entry == ic->journal_section_entries)) {
  1266. journal_entry = 0;
  1267. journal_section++;
  1268. wraparound_section(ic, &journal_section);
  1269. }
  1270. bv.bv_offset += ic->sectors_per_block << SECTOR_SHIFT;
  1271. } while (bv.bv_len -= ic->sectors_per_block << SECTOR_SHIFT);
  1272. if (unlikely(!dio->write))
  1273. flush_dcache_page(bv.bv_page);
  1274. kunmap_atomic(mem);
  1275. } while (n_sectors);
  1276. if (likely(dio->write)) {
  1277. smp_mb();
  1278. if (unlikely(waitqueue_active(&ic->copy_to_journal_wait)))
  1279. wake_up(&ic->copy_to_journal_wait);
  1280. if (ACCESS_ONCE(ic->free_sectors) <= ic->free_sectors_threshold) {
  1281. queue_work(ic->commit_wq, &ic->commit_work);
  1282. } else {
  1283. schedule_autocommit(ic);
  1284. }
  1285. } else {
  1286. remove_range(ic, &dio->range);
  1287. }
  1288. if (unlikely(bio->bi_iter.bi_size)) {
  1289. sector_t area, offset;
  1290. dio->range.logical_sector = logical_sector;
  1291. get_area_and_offset(ic, dio->range.logical_sector, &area, &offset);
  1292. dio->metadata_block = get_metadata_sector_and_offset(ic, area, offset, &dio->metadata_offset);
  1293. return true;
  1294. }
  1295. return false;
  1296. }
  1297. static void dm_integrity_map_continue(struct dm_integrity_io *dio, bool from_map)
  1298. {
  1299. struct dm_integrity_c *ic = dio->ic;
  1300. struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
  1301. unsigned journal_section, journal_entry;
  1302. unsigned journal_read_pos;
  1303. struct completion read_comp;
  1304. bool need_sync_io = ic->internal_hash && !dio->write;
  1305. if (need_sync_io && from_map) {
  1306. INIT_WORK(&dio->work, integrity_bio_wait);
  1307. queue_work(ic->metadata_wq, &dio->work);
  1308. return;
  1309. }
  1310. lock_retry:
  1311. spin_lock_irq(&ic->endio_wait.lock);
  1312. retry:
  1313. if (unlikely(dm_integrity_failed(ic))) {
  1314. spin_unlock_irq(&ic->endio_wait.lock);
  1315. do_endio(ic, bio);
  1316. return;
  1317. }
  1318. dio->range.n_sectors = bio_sectors(bio);
  1319. journal_read_pos = NOT_FOUND;
  1320. if (likely(ic->mode == 'J')) {
  1321. if (dio->write) {
  1322. unsigned next_entry, i, pos;
  1323. unsigned ws, we, range_sectors;
  1324. dio->range.n_sectors = min(dio->range.n_sectors,
  1325. ic->free_sectors << ic->sb->log2_sectors_per_block);
  1326. if (unlikely(!dio->range.n_sectors))
  1327. goto sleep;
  1328. range_sectors = dio->range.n_sectors >> ic->sb->log2_sectors_per_block;
  1329. ic->free_sectors -= range_sectors;
  1330. journal_section = ic->free_section;
  1331. journal_entry = ic->free_section_entry;
  1332. next_entry = ic->free_section_entry + range_sectors;
  1333. ic->free_section_entry = next_entry % ic->journal_section_entries;
  1334. ic->free_section += next_entry / ic->journal_section_entries;
  1335. ic->n_uncommitted_sections += next_entry / ic->journal_section_entries;
  1336. wraparound_section(ic, &ic->free_section);
  1337. pos = journal_section * ic->journal_section_entries + journal_entry;
  1338. ws = journal_section;
  1339. we = journal_entry;
  1340. i = 0;
  1341. do {
  1342. struct journal_entry *je;
  1343. add_journal_node(ic, &ic->journal_tree[pos], dio->range.logical_sector + i);
  1344. pos++;
  1345. if (unlikely(pos >= ic->journal_entries))
  1346. pos = 0;
  1347. je = access_journal_entry(ic, ws, we);
  1348. BUG_ON(!journal_entry_is_unused(je));
  1349. journal_entry_set_inprogress(je);
  1350. we++;
  1351. if (unlikely(we == ic->journal_section_entries)) {
  1352. we = 0;
  1353. ws++;
  1354. wraparound_section(ic, &ws);
  1355. }
  1356. } while ((i += ic->sectors_per_block) < dio->range.n_sectors);
  1357. spin_unlock_irq(&ic->endio_wait.lock);
  1358. goto journal_read_write;
  1359. } else {
  1360. sector_t next_sector;
  1361. journal_read_pos = find_journal_node(ic, dio->range.logical_sector, &next_sector);
  1362. if (likely(journal_read_pos == NOT_FOUND)) {
  1363. if (unlikely(dio->range.n_sectors > next_sector - dio->range.logical_sector))
  1364. dio->range.n_sectors = next_sector - dio->range.logical_sector;
  1365. } else {
  1366. unsigned i;
  1367. unsigned jp = journal_read_pos + 1;
  1368. for (i = ic->sectors_per_block; i < dio->range.n_sectors; i += ic->sectors_per_block, jp++) {
  1369. if (!test_journal_node(ic, jp, dio->range.logical_sector + i))
  1370. break;
  1371. }
  1372. dio->range.n_sectors = i;
  1373. }
  1374. }
  1375. }
  1376. if (unlikely(!add_new_range(ic, &dio->range))) {
  1377. /*
  1378. * We must not sleep in the request routine because it could
  1379. * stall bios on current->bio_list.
  1380. * So, we offload the bio to a workqueue if we have to sleep.
  1381. */
  1382. sleep:
  1383. if (from_map) {
  1384. spin_unlock_irq(&ic->endio_wait.lock);
  1385. INIT_WORK(&dio->work, integrity_bio_wait);
  1386. queue_work(ic->wait_wq, &dio->work);
  1387. return;
  1388. } else {
  1389. sleep_on_endio_wait(ic);
  1390. goto retry;
  1391. }
  1392. }
  1393. spin_unlock_irq(&ic->endio_wait.lock);
  1394. if (unlikely(journal_read_pos != NOT_FOUND)) {
  1395. journal_section = journal_read_pos / ic->journal_section_entries;
  1396. journal_entry = journal_read_pos % ic->journal_section_entries;
  1397. goto journal_read_write;
  1398. }
  1399. dio->in_flight = (atomic_t)ATOMIC_INIT(2);
  1400. if (need_sync_io) {
  1401. init_completion(&read_comp);
  1402. dio->completion = &read_comp;
  1403. } else
  1404. dio->completion = NULL;
  1405. dio->orig_bi_iter = bio->bi_iter;
  1406. dio->orig_bi_disk = bio->bi_disk;
  1407. dio->orig_bi_partno = bio->bi_partno;
  1408. bio_set_dev(bio, ic->dev->bdev);
  1409. dio->orig_bi_integrity = bio_integrity(bio);
  1410. bio->bi_integrity = NULL;
  1411. bio->bi_opf &= ~REQ_INTEGRITY;
  1412. dio->orig_bi_end_io = bio->bi_end_io;
  1413. bio->bi_end_io = integrity_end_io;
  1414. bio->bi_iter.bi_size = dio->range.n_sectors << SECTOR_SHIFT;
  1415. bio->bi_iter.bi_sector += ic->start;
  1416. generic_make_request(bio);
  1417. if (need_sync_io) {
  1418. wait_for_completion_io(&read_comp);
  1419. if (likely(!bio->bi_status))
  1420. integrity_metadata(&dio->work);
  1421. else
  1422. dec_in_flight(dio);
  1423. } else {
  1424. INIT_WORK(&dio->work, integrity_metadata);
  1425. queue_work(ic->metadata_wq, &dio->work);
  1426. }
  1427. return;
  1428. journal_read_write:
  1429. if (unlikely(__journal_read_write(dio, bio, journal_section, journal_entry)))
  1430. goto lock_retry;
  1431. do_endio_flush(ic, dio);
  1432. }
  1433. static void integrity_bio_wait(struct work_struct *w)
  1434. {
  1435. struct dm_integrity_io *dio = container_of(w, struct dm_integrity_io, work);
  1436. dm_integrity_map_continue(dio, false);
  1437. }
  1438. static void pad_uncommitted(struct dm_integrity_c *ic)
  1439. {
  1440. if (ic->free_section_entry) {
  1441. ic->free_sectors -= ic->journal_section_entries - ic->free_section_entry;
  1442. ic->free_section_entry = 0;
  1443. ic->free_section++;
  1444. wraparound_section(ic, &ic->free_section);
  1445. ic->n_uncommitted_sections++;
  1446. }
  1447. WARN_ON(ic->journal_sections * ic->journal_section_entries !=
  1448. (ic->n_uncommitted_sections + ic->n_committed_sections) * ic->journal_section_entries + ic->free_sectors);
  1449. }
  1450. static void integrity_commit(struct work_struct *w)
  1451. {
  1452. struct dm_integrity_c *ic = container_of(w, struct dm_integrity_c, commit_work);
  1453. unsigned commit_start, commit_sections;
  1454. unsigned i, j, n;
  1455. struct bio *flushes;
  1456. del_timer(&ic->autocommit_timer);
  1457. spin_lock_irq(&ic->endio_wait.lock);
  1458. flushes = bio_list_get(&ic->flush_bio_list);
  1459. if (unlikely(ic->mode != 'J')) {
  1460. spin_unlock_irq(&ic->endio_wait.lock);
  1461. dm_integrity_flush_buffers(ic);
  1462. goto release_flush_bios;
  1463. }
  1464. pad_uncommitted(ic);
  1465. commit_start = ic->uncommitted_section;
  1466. commit_sections = ic->n_uncommitted_sections;
  1467. spin_unlock_irq(&ic->endio_wait.lock);
  1468. if (!commit_sections)
  1469. goto release_flush_bios;
  1470. i = commit_start;
  1471. for (n = 0; n < commit_sections; n++) {
  1472. for (j = 0; j < ic->journal_section_entries; j++) {
  1473. struct journal_entry *je;
  1474. je = access_journal_entry(ic, i, j);
  1475. io_wait_event(ic->copy_to_journal_wait, !journal_entry_is_inprogress(je));
  1476. }
  1477. for (j = 0; j < ic->journal_section_sectors; j++) {
  1478. struct journal_sector *js;
  1479. js = access_journal(ic, i, j);
  1480. js->commit_id = dm_integrity_commit_id(ic, i, j, ic->commit_seq);
  1481. }
  1482. i++;
  1483. if (unlikely(i >= ic->journal_sections))
  1484. ic->commit_seq = next_commit_seq(ic->commit_seq);
  1485. wraparound_section(ic, &i);
  1486. }
  1487. smp_rmb();
  1488. write_journal(ic, commit_start, commit_sections);
  1489. spin_lock_irq(&ic->endio_wait.lock);
  1490. ic->uncommitted_section += commit_sections;
  1491. wraparound_section(ic, &ic->uncommitted_section);
  1492. ic->n_uncommitted_sections -= commit_sections;
  1493. ic->n_committed_sections += commit_sections;
  1494. spin_unlock_irq(&ic->endio_wait.lock);
  1495. if (ACCESS_ONCE(ic->free_sectors) <= ic->free_sectors_threshold)
  1496. queue_work(ic->writer_wq, &ic->writer_work);
  1497. release_flush_bios:
  1498. while (flushes) {
  1499. struct bio *next = flushes->bi_next;
  1500. flushes->bi_next = NULL;
  1501. do_endio(ic, flushes);
  1502. flushes = next;
  1503. }
  1504. }
  1505. static void complete_copy_from_journal(unsigned long error, void *context)
  1506. {
  1507. struct journal_io *io = context;
  1508. struct journal_completion *comp = io->comp;
  1509. struct dm_integrity_c *ic = comp->ic;
  1510. remove_range(ic, &io->range);
  1511. mempool_free(io, ic->journal_io_mempool);
  1512. if (unlikely(error != 0))
  1513. dm_integrity_io_error(ic, "copying from journal", -EIO);
  1514. complete_journal_op(comp);
  1515. }
  1516. static void restore_last_bytes(struct dm_integrity_c *ic, struct journal_sector *js,
  1517. struct journal_entry *je)
  1518. {
  1519. unsigned s = 0;
  1520. do {
  1521. js->commit_id = je->last_bytes[s];
  1522. js++;
  1523. } while (++s < ic->sectors_per_block);
  1524. }
  1525. static void do_journal_write(struct dm_integrity_c *ic, unsigned write_start,
  1526. unsigned write_sections, bool from_replay)
  1527. {
  1528. unsigned i, j, n;
  1529. struct journal_completion comp;
  1530. struct blk_plug plug;
  1531. blk_start_plug(&plug);
  1532. comp.ic = ic;
  1533. comp.in_flight = (atomic_t)ATOMIC_INIT(1);
  1534. init_completion(&comp.comp);
  1535. i = write_start;
  1536. for (n = 0; n < write_sections; n++, i++, wraparound_section(ic, &i)) {
  1537. #ifndef INTERNAL_VERIFY
  1538. if (unlikely(from_replay))
  1539. #endif
  1540. rw_section_mac(ic, i, false);
  1541. for (j = 0; j < ic->journal_section_entries; j++) {
  1542. struct journal_entry *je = access_journal_entry(ic, i, j);
  1543. sector_t sec, area, offset;
  1544. unsigned k, l, next_loop;
  1545. sector_t metadata_block;
  1546. unsigned metadata_offset;
  1547. struct journal_io *io;
  1548. if (journal_entry_is_unused(je))
  1549. continue;
  1550. BUG_ON(unlikely(journal_entry_is_inprogress(je)) && !from_replay);
  1551. sec = journal_entry_get_sector(je);
  1552. if (unlikely(from_replay)) {
  1553. if (unlikely(sec & (unsigned)(ic->sectors_per_block - 1))) {
  1554. dm_integrity_io_error(ic, "invalid sector in journal", -EIO);
  1555. sec &= ~(sector_t)(ic->sectors_per_block - 1);
  1556. }
  1557. }
  1558. get_area_and_offset(ic, sec, &area, &offset);
  1559. restore_last_bytes(ic, access_journal_data(ic, i, j), je);
  1560. for (k = j + 1; k < ic->journal_section_entries; k++) {
  1561. struct journal_entry *je2 = access_journal_entry(ic, i, k);
  1562. sector_t sec2, area2, offset2;
  1563. if (journal_entry_is_unused(je2))
  1564. break;
  1565. BUG_ON(unlikely(journal_entry_is_inprogress(je2)) && !from_replay);
  1566. sec2 = journal_entry_get_sector(je2);
  1567. get_area_and_offset(ic, sec2, &area2, &offset2);
  1568. if (area2 != area || offset2 != offset + ((k - j) << ic->sb->log2_sectors_per_block))
  1569. break;
  1570. restore_last_bytes(ic, access_journal_data(ic, i, k), je2);
  1571. }
  1572. next_loop = k - 1;
  1573. io = mempool_alloc(ic->journal_io_mempool, GFP_NOIO);
  1574. io->comp = &comp;
  1575. io->range.logical_sector = sec;
  1576. io->range.n_sectors = (k - j) << ic->sb->log2_sectors_per_block;
  1577. spin_lock_irq(&ic->endio_wait.lock);
  1578. while (unlikely(!add_new_range(ic, &io->range)))
  1579. sleep_on_endio_wait(ic);
  1580. if (likely(!from_replay)) {
  1581. struct journal_node *section_node = &ic->journal_tree[i * ic->journal_section_entries];
  1582. /* don't write if there is newer committed sector */
  1583. while (j < k && find_newer_committed_node(ic, &section_node[j])) {
  1584. struct journal_entry *je2 = access_journal_entry(ic, i, j);
  1585. journal_entry_set_unused(je2);
  1586. remove_journal_node(ic, &section_node[j]);
  1587. j++;
  1588. sec += ic->sectors_per_block;
  1589. offset += ic->sectors_per_block;
  1590. }
  1591. while (j < k && find_newer_committed_node(ic, &section_node[k - 1])) {
  1592. struct journal_entry *je2 = access_journal_entry(ic, i, k - 1);
  1593. journal_entry_set_unused(je2);
  1594. remove_journal_node(ic, &section_node[k - 1]);
  1595. k--;
  1596. }
  1597. if (j == k) {
  1598. remove_range_unlocked(ic, &io->range);
  1599. spin_unlock_irq(&ic->endio_wait.lock);
  1600. mempool_free(io, ic->journal_io_mempool);
  1601. goto skip_io;
  1602. }
  1603. for (l = j; l < k; l++) {
  1604. remove_journal_node(ic, &section_node[l]);
  1605. }
  1606. }
  1607. spin_unlock_irq(&ic->endio_wait.lock);
  1608. metadata_block = get_metadata_sector_and_offset(ic, area, offset, &metadata_offset);
  1609. for (l = j; l < k; l++) {
  1610. int r;
  1611. struct journal_entry *je2 = access_journal_entry(ic, i, l);
  1612. if (
  1613. #ifndef INTERNAL_VERIFY
  1614. unlikely(from_replay) &&
  1615. #endif
  1616. ic->internal_hash) {
  1617. char test_tag[max(crypto_shash_digestsize(ic->internal_hash), ic->tag_size)];
  1618. integrity_sector_checksum(ic, sec + ((l - j) << ic->sb->log2_sectors_per_block),
  1619. (char *)access_journal_data(ic, i, l), test_tag);
  1620. if (unlikely(memcmp(test_tag, journal_entry_tag(ic, je2), ic->tag_size)))
  1621. dm_integrity_io_error(ic, "tag mismatch when replaying journal", -EILSEQ);
  1622. }
  1623. journal_entry_set_unused(je2);
  1624. r = dm_integrity_rw_tag(ic, journal_entry_tag(ic, je2), &metadata_block, &metadata_offset,
  1625. ic->tag_size, TAG_WRITE);
  1626. if (unlikely(r)) {
  1627. dm_integrity_io_error(ic, "reading tags", r);
  1628. }
  1629. }
  1630. atomic_inc(&comp.in_flight);
  1631. copy_from_journal(ic, i, j << ic->sb->log2_sectors_per_block,
  1632. (k - j) << ic->sb->log2_sectors_per_block,
  1633. get_data_sector(ic, area, offset),
  1634. complete_copy_from_journal, io);
  1635. skip_io:
  1636. j = next_loop;
  1637. }
  1638. }
  1639. dm_bufio_write_dirty_buffers_async(ic->bufio);
  1640. blk_finish_plug(&plug);
  1641. complete_journal_op(&comp);
  1642. wait_for_completion_io(&comp.comp);
  1643. dm_integrity_flush_buffers(ic);
  1644. }
  1645. static void integrity_writer(struct work_struct *w)
  1646. {
  1647. struct dm_integrity_c *ic = container_of(w, struct dm_integrity_c, writer_work);
  1648. unsigned write_start, write_sections;
  1649. unsigned prev_free_sectors;
  1650. /* the following test is not needed, but it tests the replay code */
  1651. if (ACCESS_ONCE(ic->suspending))
  1652. return;
  1653. spin_lock_irq(&ic->endio_wait.lock);
  1654. write_start = ic->committed_section;
  1655. write_sections = ic->n_committed_sections;
  1656. spin_unlock_irq(&ic->endio_wait.lock);
  1657. if (!write_sections)
  1658. return;
  1659. do_journal_write(ic, write_start, write_sections, false);
  1660. spin_lock_irq(&ic->endio_wait.lock);
  1661. ic->committed_section += write_sections;
  1662. wraparound_section(ic, &ic->committed_section);
  1663. ic->n_committed_sections -= write_sections;
  1664. prev_free_sectors = ic->free_sectors;
  1665. ic->free_sectors += write_sections * ic->journal_section_entries;
  1666. if (unlikely(!prev_free_sectors))
  1667. wake_up_locked(&ic->endio_wait);
  1668. spin_unlock_irq(&ic->endio_wait.lock);
  1669. }
  1670. static void init_journal(struct dm_integrity_c *ic, unsigned start_section,
  1671. unsigned n_sections, unsigned char commit_seq)
  1672. {
  1673. unsigned i, j, n;
  1674. if (!n_sections)
  1675. return;
  1676. for (n = 0; n < n_sections; n++) {
  1677. i = start_section + n;
  1678. wraparound_section(ic, &i);
  1679. for (j = 0; j < ic->journal_section_sectors; j++) {
  1680. struct journal_sector *js = access_journal(ic, i, j);
  1681. memset(&js->entries, 0, JOURNAL_SECTOR_DATA);
  1682. js->commit_id = dm_integrity_commit_id(ic, i, j, commit_seq);
  1683. }
  1684. for (j = 0; j < ic->journal_section_entries; j++) {
  1685. struct journal_entry *je = access_journal_entry(ic, i, j);
  1686. journal_entry_set_unused(je);
  1687. }
  1688. }
  1689. write_journal(ic, start_section, n_sections);
  1690. }
  1691. static int find_commit_seq(struct dm_integrity_c *ic, unsigned i, unsigned j, commit_id_t id)
  1692. {
  1693. unsigned char k;
  1694. for (k = 0; k < N_COMMIT_IDS; k++) {
  1695. if (dm_integrity_commit_id(ic, i, j, k) == id)
  1696. return k;
  1697. }
  1698. dm_integrity_io_error(ic, "journal commit id", -EIO);
  1699. return -EIO;
  1700. }
  1701. static void replay_journal(struct dm_integrity_c *ic)
  1702. {
  1703. unsigned i, j;
  1704. bool used_commit_ids[N_COMMIT_IDS];
  1705. unsigned max_commit_id_sections[N_COMMIT_IDS];
  1706. unsigned write_start, write_sections;
  1707. unsigned continue_section;
  1708. bool journal_empty;
  1709. unsigned char unused, last_used, want_commit_seq;
  1710. if (ic->mode == 'R')
  1711. return;
  1712. if (ic->journal_uptodate)
  1713. return;
  1714. last_used = 0;
  1715. write_start = 0;
  1716. if (!ic->just_formatted) {
  1717. DEBUG_print("reading journal\n");
  1718. rw_journal(ic, REQ_OP_READ, 0, 0, ic->journal_sections, NULL);
  1719. if (ic->journal_io)
  1720. DEBUG_bytes(lowmem_page_address(ic->journal_io[0].page), 64, "read journal");
  1721. if (ic->journal_io) {
  1722. struct journal_completion crypt_comp;
  1723. crypt_comp.ic = ic;
  1724. init_completion(&crypt_comp.comp);
  1725. crypt_comp.in_flight = (atomic_t)ATOMIC_INIT(0);
  1726. encrypt_journal(ic, false, 0, ic->journal_sections, &crypt_comp);
  1727. wait_for_completion(&crypt_comp.comp);
  1728. }
  1729. DEBUG_bytes(lowmem_page_address(ic->journal[0].page), 64, "decrypted journal");
  1730. }
  1731. if (dm_integrity_failed(ic))
  1732. goto clear_journal;
  1733. journal_empty = true;
  1734. memset(used_commit_ids, 0, sizeof used_commit_ids);
  1735. memset(max_commit_id_sections, 0, sizeof max_commit_id_sections);
  1736. for (i = 0; i < ic->journal_sections; i++) {
  1737. for (j = 0; j < ic->journal_section_sectors; j++) {
  1738. int k;
  1739. struct journal_sector *js = access_journal(ic, i, j);
  1740. k = find_commit_seq(ic, i, j, js->commit_id);
  1741. if (k < 0)
  1742. goto clear_journal;
  1743. used_commit_ids[k] = true;
  1744. max_commit_id_sections[k] = i;
  1745. }
  1746. if (journal_empty) {
  1747. for (j = 0; j < ic->journal_section_entries; j++) {
  1748. struct journal_entry *je = access_journal_entry(ic, i, j);
  1749. if (!journal_entry_is_unused(je)) {
  1750. journal_empty = false;
  1751. break;
  1752. }
  1753. }
  1754. }
  1755. }
  1756. if (!used_commit_ids[N_COMMIT_IDS - 1]) {
  1757. unused = N_COMMIT_IDS - 1;
  1758. while (unused && !used_commit_ids[unused - 1])
  1759. unused--;
  1760. } else {
  1761. for (unused = 0; unused < N_COMMIT_IDS; unused++)
  1762. if (!used_commit_ids[unused])
  1763. break;
  1764. if (unused == N_COMMIT_IDS) {
  1765. dm_integrity_io_error(ic, "journal commit ids", -EIO);
  1766. goto clear_journal;
  1767. }
  1768. }
  1769. DEBUG_print("first unused commit seq %d [%d,%d,%d,%d]\n",
  1770. unused, used_commit_ids[0], used_commit_ids[1],
  1771. used_commit_ids[2], used_commit_ids[3]);
  1772. last_used = prev_commit_seq(unused);
  1773. want_commit_seq = prev_commit_seq(last_used);
  1774. if (!used_commit_ids[want_commit_seq] && used_commit_ids[prev_commit_seq(want_commit_seq)])
  1775. journal_empty = true;
  1776. write_start = max_commit_id_sections[last_used] + 1;
  1777. if (unlikely(write_start >= ic->journal_sections))
  1778. want_commit_seq = next_commit_seq(want_commit_seq);
  1779. wraparound_section(ic, &write_start);
  1780. i = write_start;
  1781. for (write_sections = 0; write_sections < ic->journal_sections; write_sections++) {
  1782. for (j = 0; j < ic->journal_section_sectors; j++) {
  1783. struct journal_sector *js = access_journal(ic, i, j);
  1784. if (js->commit_id != dm_integrity_commit_id(ic, i, j, want_commit_seq)) {
  1785. /*
  1786. * This could be caused by crash during writing.
  1787. * We won't replay the inconsistent part of the
  1788. * journal.
  1789. */
  1790. DEBUG_print("commit id mismatch at position (%u, %u): %d != %d\n",
  1791. i, j, find_commit_seq(ic, i, j, js->commit_id), want_commit_seq);
  1792. goto brk;
  1793. }
  1794. }
  1795. i++;
  1796. if (unlikely(i >= ic->journal_sections))
  1797. want_commit_seq = next_commit_seq(want_commit_seq);
  1798. wraparound_section(ic, &i);
  1799. }
  1800. brk:
  1801. if (!journal_empty) {
  1802. DEBUG_print("replaying %u sections, starting at %u, commit seq %d\n",
  1803. write_sections, write_start, want_commit_seq);
  1804. do_journal_write(ic, write_start, write_sections, true);
  1805. }
  1806. if (write_sections == ic->journal_sections && (ic->mode == 'J' || journal_empty)) {
  1807. continue_section = write_start;
  1808. ic->commit_seq = want_commit_seq;
  1809. DEBUG_print("continuing from section %u, commit seq %d\n", write_start, ic->commit_seq);
  1810. } else {
  1811. unsigned s;
  1812. unsigned char erase_seq;
  1813. clear_journal:
  1814. DEBUG_print("clearing journal\n");
  1815. erase_seq = prev_commit_seq(prev_commit_seq(last_used));
  1816. s = write_start;
  1817. init_journal(ic, s, 1, erase_seq);
  1818. s++;
  1819. wraparound_section(ic, &s);
  1820. if (ic->journal_sections >= 2) {
  1821. init_journal(ic, s, ic->journal_sections - 2, erase_seq);
  1822. s += ic->journal_sections - 2;
  1823. wraparound_section(ic, &s);
  1824. init_journal(ic, s, 1, erase_seq);
  1825. }
  1826. continue_section = 0;
  1827. ic->commit_seq = next_commit_seq(erase_seq);
  1828. }
  1829. ic->committed_section = continue_section;
  1830. ic->n_committed_sections = 0;
  1831. ic->uncommitted_section = continue_section;
  1832. ic->n_uncommitted_sections = 0;
  1833. ic->free_section = continue_section;
  1834. ic->free_section_entry = 0;
  1835. ic->free_sectors = ic->journal_entries;
  1836. ic->journal_tree_root = RB_ROOT;
  1837. for (i = 0; i < ic->journal_entries; i++)
  1838. init_journal_node(&ic->journal_tree[i]);
  1839. }
  1840. static void dm_integrity_postsuspend(struct dm_target *ti)
  1841. {
  1842. struct dm_integrity_c *ic = (struct dm_integrity_c *)ti->private;
  1843. del_timer_sync(&ic->autocommit_timer);
  1844. ic->suspending = true;
  1845. queue_work(ic->commit_wq, &ic->commit_work);
  1846. drain_workqueue(ic->commit_wq);
  1847. if (ic->mode == 'J') {
  1848. drain_workqueue(ic->writer_wq);
  1849. dm_integrity_flush_buffers(ic);
  1850. }
  1851. ic->suspending = false;
  1852. BUG_ON(!RB_EMPTY_ROOT(&ic->in_progress));
  1853. ic->journal_uptodate = true;
  1854. }
  1855. static void dm_integrity_resume(struct dm_target *ti)
  1856. {
  1857. struct dm_integrity_c *ic = (struct dm_integrity_c *)ti->private;
  1858. replay_journal(ic);
  1859. }
  1860. static void dm_integrity_status(struct dm_target *ti, status_type_t type,
  1861. unsigned status_flags, char *result, unsigned maxlen)
  1862. {
  1863. struct dm_integrity_c *ic = (struct dm_integrity_c *)ti->private;
  1864. unsigned arg_count;
  1865. size_t sz = 0;
  1866. switch (type) {
  1867. case STATUSTYPE_INFO:
  1868. DMEMIT("%llu", (unsigned long long)atomic64_read(&ic->number_of_mismatches));
  1869. break;
  1870. case STATUSTYPE_TABLE: {
  1871. __u64 watermark_percentage = (__u64)(ic->journal_entries - ic->free_sectors_threshold) * 100;
  1872. watermark_percentage += ic->journal_entries / 2;
  1873. do_div(watermark_percentage, ic->journal_entries);
  1874. arg_count = 5;
  1875. arg_count += ic->sectors_per_block != 1;
  1876. arg_count += !!ic->internal_hash_alg.alg_string;
  1877. arg_count += !!ic->journal_crypt_alg.alg_string;
  1878. arg_count += !!ic->journal_mac_alg.alg_string;
  1879. DMEMIT("%s %llu %u %c %u", ic->dev->name, (unsigned long long)ic->start,
  1880. ic->tag_size, ic->mode, arg_count);
  1881. DMEMIT(" journal_sectors:%u", ic->initial_sectors - SB_SECTORS);
  1882. DMEMIT(" interleave_sectors:%u", 1U << ic->sb->log2_interleave_sectors);
  1883. DMEMIT(" buffer_sectors:%u", 1U << ic->log2_buffer_sectors);
  1884. DMEMIT(" journal_watermark:%u", (unsigned)watermark_percentage);
  1885. DMEMIT(" commit_time:%u", ic->autocommit_msec);
  1886. if (ic->sectors_per_block != 1)
  1887. DMEMIT(" block_size:%u", ic->sectors_per_block << SECTOR_SHIFT);
  1888. #define EMIT_ALG(a, n) \
  1889. do { \
  1890. if (ic->a.alg_string) { \
  1891. DMEMIT(" %s:%s", n, ic->a.alg_string); \
  1892. if (ic->a.key_string) \
  1893. DMEMIT(":%s", ic->a.key_string);\
  1894. } \
  1895. } while (0)
  1896. EMIT_ALG(internal_hash_alg, "internal_hash");
  1897. EMIT_ALG(journal_crypt_alg, "journal_crypt");
  1898. EMIT_ALG(journal_mac_alg, "journal_mac");
  1899. break;
  1900. }
  1901. }
  1902. }
  1903. static int dm_integrity_iterate_devices(struct dm_target *ti,
  1904. iterate_devices_callout_fn fn, void *data)
  1905. {
  1906. struct dm_integrity_c *ic = ti->private;
  1907. return fn(ti, ic->dev, ic->start + ic->initial_sectors + ic->metadata_run, ti->len, data);
  1908. }
  1909. static void dm_integrity_io_hints(struct dm_target *ti, struct queue_limits *limits)
  1910. {
  1911. struct dm_integrity_c *ic = ti->private;
  1912. if (ic->sectors_per_block > 1) {
  1913. limits->logical_block_size = ic->sectors_per_block << SECTOR_SHIFT;
  1914. limits->physical_block_size = ic->sectors_per_block << SECTOR_SHIFT;
  1915. blk_limits_io_min(limits, ic->sectors_per_block << SECTOR_SHIFT);
  1916. }
  1917. }
  1918. static void calculate_journal_section_size(struct dm_integrity_c *ic)
  1919. {
  1920. unsigned sector_space = JOURNAL_SECTOR_DATA;
  1921. ic->journal_sections = le32_to_cpu(ic->sb->journal_sections);
  1922. ic->journal_entry_size = roundup(offsetof(struct journal_entry, last_bytes[ic->sectors_per_block]) + ic->tag_size,
  1923. JOURNAL_ENTRY_ROUNDUP);
  1924. if (ic->sb->flags & cpu_to_le32(SB_FLAG_HAVE_JOURNAL_MAC))
  1925. sector_space -= JOURNAL_MAC_PER_SECTOR;
  1926. ic->journal_entries_per_sector = sector_space / ic->journal_entry_size;
  1927. ic->journal_section_entries = ic->journal_entries_per_sector * JOURNAL_BLOCK_SECTORS;
  1928. ic->journal_section_sectors = (ic->journal_section_entries << ic->sb->log2_sectors_per_block) + JOURNAL_BLOCK_SECTORS;
  1929. ic->journal_entries = ic->journal_section_entries * ic->journal_sections;
  1930. }
  1931. static int calculate_device_limits(struct dm_integrity_c *ic)
  1932. {
  1933. __u64 initial_sectors;
  1934. sector_t last_sector, last_area, last_offset;
  1935. calculate_journal_section_size(ic);
  1936. initial_sectors = SB_SECTORS + (__u64)ic->journal_section_sectors * ic->journal_sections;
  1937. if (initial_sectors + METADATA_PADDING_SECTORS >= ic->device_sectors || initial_sectors > UINT_MAX)
  1938. return -EINVAL;
  1939. ic->initial_sectors = initial_sectors;
  1940. ic->metadata_run = roundup((__u64)ic->tag_size << (ic->sb->log2_interleave_sectors - ic->sb->log2_sectors_per_block),
  1941. (__u64)(1 << SECTOR_SHIFT << METADATA_PADDING_SECTORS)) >> SECTOR_SHIFT;
  1942. if (!(ic->metadata_run & (ic->metadata_run - 1)))
  1943. ic->log2_metadata_run = __ffs(ic->metadata_run);
  1944. else
  1945. ic->log2_metadata_run = -1;
  1946. get_area_and_offset(ic, ic->provided_data_sectors - 1, &last_area, &last_offset);
  1947. last_sector = get_data_sector(ic, last_area, last_offset);
  1948. if (ic->start + last_sector < last_sector || ic->start + last_sector >= ic->device_sectors)
  1949. return -EINVAL;
  1950. return 0;
  1951. }
  1952. static int initialize_superblock(struct dm_integrity_c *ic, unsigned journal_sectors, unsigned interleave_sectors)
  1953. {
  1954. unsigned journal_sections;
  1955. int test_bit;
  1956. memset(ic->sb, 0, SB_SECTORS << SECTOR_SHIFT);
  1957. memcpy(ic->sb->magic, SB_MAGIC, 8);
  1958. ic->sb->version = SB_VERSION;
  1959. ic->sb->integrity_tag_size = cpu_to_le16(ic->tag_size);
  1960. ic->sb->log2_sectors_per_block = __ffs(ic->sectors_per_block);
  1961. if (ic->journal_mac_alg.alg_string)
  1962. ic->sb->flags |= cpu_to_le32(SB_FLAG_HAVE_JOURNAL_MAC);
  1963. calculate_journal_section_size(ic);
  1964. journal_sections = journal_sectors / ic->journal_section_sectors;
  1965. if (!journal_sections)
  1966. journal_sections = 1;
  1967. ic->sb->journal_sections = cpu_to_le32(journal_sections);
  1968. if (!interleave_sectors)
  1969. interleave_sectors = DEFAULT_INTERLEAVE_SECTORS;
  1970. ic->sb->log2_interleave_sectors = __fls(interleave_sectors);
  1971. ic->sb->log2_interleave_sectors = max((__u8)MIN_LOG2_INTERLEAVE_SECTORS, ic->sb->log2_interleave_sectors);
  1972. ic->sb->log2_interleave_sectors = min((__u8)MAX_LOG2_INTERLEAVE_SECTORS, ic->sb->log2_interleave_sectors);
  1973. ic->provided_data_sectors = 0;
  1974. for (test_bit = fls64(ic->device_sectors) - 1; test_bit >= 3; test_bit--) {
  1975. __u64 prev_data_sectors = ic->provided_data_sectors;
  1976. ic->provided_data_sectors |= (sector_t)1 << test_bit;
  1977. if (calculate_device_limits(ic))
  1978. ic->provided_data_sectors = prev_data_sectors;
  1979. }
  1980. if (!ic->provided_data_sectors)
  1981. return -EINVAL;
  1982. ic->sb->provided_data_sectors = cpu_to_le64(ic->provided_data_sectors);
  1983. return 0;
  1984. }
  1985. static void dm_integrity_set(struct dm_target *ti, struct dm_integrity_c *ic)
  1986. {
  1987. struct gendisk *disk = dm_disk(dm_table_get_md(ti->table));
  1988. struct blk_integrity bi;
  1989. memset(&bi, 0, sizeof(bi));
  1990. bi.profile = &dm_integrity_profile;
  1991. bi.tuple_size = ic->tag_size;
  1992. bi.tag_size = bi.tuple_size;
  1993. bi.interval_exp = ic->sb->log2_sectors_per_block + SECTOR_SHIFT;
  1994. blk_integrity_register(disk, &bi);
  1995. blk_queue_max_integrity_segments(disk->queue, UINT_MAX);
  1996. }
  1997. static void dm_integrity_free_page_list(struct dm_integrity_c *ic, struct page_list *pl)
  1998. {
  1999. unsigned i;
  2000. if (!pl)
  2001. return;
  2002. for (i = 0; i < ic->journal_pages; i++)
  2003. if (pl[i].page)
  2004. __free_page(pl[i].page);
  2005. kvfree(pl);
  2006. }
  2007. static struct page_list *dm_integrity_alloc_page_list(struct dm_integrity_c *ic)
  2008. {
  2009. size_t page_list_desc_size = ic->journal_pages * sizeof(struct page_list);
  2010. struct page_list *pl;
  2011. unsigned i;
  2012. pl = kvmalloc(page_list_desc_size, GFP_KERNEL | __GFP_ZERO);
  2013. if (!pl)
  2014. return NULL;
  2015. for (i = 0; i < ic->journal_pages; i++) {
  2016. pl[i].page = alloc_page(GFP_KERNEL);
  2017. if (!pl[i].page) {
  2018. dm_integrity_free_page_list(ic, pl);
  2019. return NULL;
  2020. }
  2021. if (i)
  2022. pl[i - 1].next = &pl[i];
  2023. }
  2024. return pl;
  2025. }
  2026. static void dm_integrity_free_journal_scatterlist(struct dm_integrity_c *ic, struct scatterlist **sl)
  2027. {
  2028. unsigned i;
  2029. for (i = 0; i < ic->journal_sections; i++)
  2030. kvfree(sl[i]);
  2031. kfree(sl);
  2032. }
  2033. static struct scatterlist **dm_integrity_alloc_journal_scatterlist(struct dm_integrity_c *ic, struct page_list *pl)
  2034. {
  2035. struct scatterlist **sl;
  2036. unsigned i;
  2037. sl = kvmalloc(ic->journal_sections * sizeof(struct scatterlist *), GFP_KERNEL | __GFP_ZERO);
  2038. if (!sl)
  2039. return NULL;
  2040. for (i = 0; i < ic->journal_sections; i++) {
  2041. struct scatterlist *s;
  2042. unsigned start_index, start_offset;
  2043. unsigned end_index, end_offset;
  2044. unsigned n_pages;
  2045. unsigned idx;
  2046. page_list_location(ic, i, 0, &start_index, &start_offset);
  2047. page_list_location(ic, i, ic->journal_section_sectors - 1, &end_index, &end_offset);
  2048. n_pages = (end_index - start_index + 1);
  2049. s = kvmalloc(n_pages * sizeof(struct scatterlist), GFP_KERNEL);
  2050. if (!s) {
  2051. dm_integrity_free_journal_scatterlist(ic, sl);
  2052. return NULL;
  2053. }
  2054. sg_init_table(s, n_pages);
  2055. for (idx = start_index; idx <= end_index; idx++) {
  2056. char *va = lowmem_page_address(pl[idx].page);
  2057. unsigned start = 0, end = PAGE_SIZE;
  2058. if (idx == start_index)
  2059. start = start_offset;
  2060. if (idx == end_index)
  2061. end = end_offset + (1 << SECTOR_SHIFT);
  2062. sg_set_buf(&s[idx - start_index], va + start, end - start);
  2063. }
  2064. sl[i] = s;
  2065. }
  2066. return sl;
  2067. }
  2068. static void free_alg(struct alg_spec *a)
  2069. {
  2070. kzfree(a->alg_string);
  2071. kzfree(a->key);
  2072. memset(a, 0, sizeof *a);
  2073. }
  2074. static int get_alg_and_key(const char *arg, struct alg_spec *a, char **error, char *error_inval)
  2075. {
  2076. char *k;
  2077. free_alg(a);
  2078. a->alg_string = kstrdup(strchr(arg, ':') + 1, GFP_KERNEL);
  2079. if (!a->alg_string)
  2080. goto nomem;
  2081. k = strchr(a->alg_string, ':');
  2082. if (k) {
  2083. *k = 0;
  2084. a->key_string = k + 1;
  2085. if (strlen(a->key_string) & 1)
  2086. goto inval;
  2087. a->key_size = strlen(a->key_string) / 2;
  2088. a->key = kmalloc(a->key_size, GFP_KERNEL);
  2089. if (!a->key)
  2090. goto nomem;
  2091. if (hex2bin(a->key, a->key_string, a->key_size))
  2092. goto inval;
  2093. }
  2094. return 0;
  2095. inval:
  2096. *error = error_inval;
  2097. return -EINVAL;
  2098. nomem:
  2099. *error = "Out of memory for an argument";
  2100. return -ENOMEM;
  2101. }
  2102. static int get_mac(struct crypto_shash **hash, struct alg_spec *a, char **error,
  2103. char *error_alg, char *error_key)
  2104. {
  2105. int r;
  2106. if (a->alg_string) {
  2107. *hash = crypto_alloc_shash(a->alg_string, 0, CRYPTO_ALG_ASYNC);
  2108. if (IS_ERR(*hash)) {
  2109. *error = error_alg;
  2110. r = PTR_ERR(*hash);
  2111. *hash = NULL;
  2112. return r;
  2113. }
  2114. if (a->key) {
  2115. r = crypto_shash_setkey(*hash, a->key, a->key_size);
  2116. if (r) {
  2117. *error = error_key;
  2118. return r;
  2119. }
  2120. }
  2121. }
  2122. return 0;
  2123. }
  2124. static int create_journal(struct dm_integrity_c *ic, char **error)
  2125. {
  2126. int r = 0;
  2127. unsigned i;
  2128. __u64 journal_pages, journal_desc_size, journal_tree_size;
  2129. unsigned char *crypt_data = NULL;
  2130. ic->commit_ids[0] = cpu_to_le64(0x1111111111111111ULL);
  2131. ic->commit_ids[1] = cpu_to_le64(0x2222222222222222ULL);
  2132. ic->commit_ids[2] = cpu_to_le64(0x3333333333333333ULL);
  2133. ic->commit_ids[3] = cpu_to_le64(0x4444444444444444ULL);
  2134. journal_pages = roundup((__u64)ic->journal_sections * ic->journal_section_sectors,
  2135. PAGE_SIZE >> SECTOR_SHIFT) >> (PAGE_SHIFT - SECTOR_SHIFT);
  2136. journal_desc_size = journal_pages * sizeof(struct page_list);
  2137. if (journal_pages >= totalram_pages - totalhigh_pages || journal_desc_size > ULONG_MAX) {
  2138. *error = "Journal doesn't fit into memory";
  2139. r = -ENOMEM;
  2140. goto bad;
  2141. }
  2142. ic->journal_pages = journal_pages;
  2143. ic->journal = dm_integrity_alloc_page_list(ic);
  2144. if (!ic->journal) {
  2145. *error = "Could not allocate memory for journal";
  2146. r = -ENOMEM;
  2147. goto bad;
  2148. }
  2149. if (ic->journal_crypt_alg.alg_string) {
  2150. unsigned ivsize, blocksize;
  2151. struct journal_completion comp;
  2152. comp.ic = ic;
  2153. ic->journal_crypt = crypto_alloc_skcipher(ic->journal_crypt_alg.alg_string, 0, 0);
  2154. if (IS_ERR(ic->journal_crypt)) {
  2155. *error = "Invalid journal cipher";
  2156. r = PTR_ERR(ic->journal_crypt);
  2157. ic->journal_crypt = NULL;
  2158. goto bad;
  2159. }
  2160. ivsize = crypto_skcipher_ivsize(ic->journal_crypt);
  2161. blocksize = crypto_skcipher_blocksize(ic->journal_crypt);
  2162. if (ic->journal_crypt_alg.key) {
  2163. r = crypto_skcipher_setkey(ic->journal_crypt, ic->journal_crypt_alg.key,
  2164. ic->journal_crypt_alg.key_size);
  2165. if (r) {
  2166. *error = "Error setting encryption key";
  2167. goto bad;
  2168. }
  2169. }
  2170. DEBUG_print("cipher %s, block size %u iv size %u\n",
  2171. ic->journal_crypt_alg.alg_string, blocksize, ivsize);
  2172. ic->journal_io = dm_integrity_alloc_page_list(ic);
  2173. if (!ic->journal_io) {
  2174. *error = "Could not allocate memory for journal io";
  2175. r = -ENOMEM;
  2176. goto bad;
  2177. }
  2178. if (blocksize == 1) {
  2179. struct scatterlist *sg;
  2180. SKCIPHER_REQUEST_ON_STACK(req, ic->journal_crypt);
  2181. unsigned char iv[ivsize];
  2182. skcipher_request_set_tfm(req, ic->journal_crypt);
  2183. ic->journal_xor = dm_integrity_alloc_page_list(ic);
  2184. if (!ic->journal_xor) {
  2185. *error = "Could not allocate memory for journal xor";
  2186. r = -ENOMEM;
  2187. goto bad;
  2188. }
  2189. sg = kvmalloc((ic->journal_pages + 1) * sizeof(struct scatterlist), GFP_KERNEL);
  2190. if (!sg) {
  2191. *error = "Unable to allocate sg list";
  2192. r = -ENOMEM;
  2193. goto bad;
  2194. }
  2195. sg_init_table(sg, ic->journal_pages + 1);
  2196. for (i = 0; i < ic->journal_pages; i++) {
  2197. char *va = lowmem_page_address(ic->journal_xor[i].page);
  2198. clear_page(va);
  2199. sg_set_buf(&sg[i], va, PAGE_SIZE);
  2200. }
  2201. sg_set_buf(&sg[i], &ic->commit_ids, sizeof ic->commit_ids);
  2202. memset(iv, 0x00, ivsize);
  2203. skcipher_request_set_crypt(req, sg, sg, PAGE_SIZE * ic->journal_pages + sizeof ic->commit_ids, iv);
  2204. init_completion(&comp.comp);
  2205. comp.in_flight = (atomic_t)ATOMIC_INIT(1);
  2206. if (do_crypt(true, req, &comp))
  2207. wait_for_completion(&comp.comp);
  2208. kvfree(sg);
  2209. r = dm_integrity_failed(ic);
  2210. if (r) {
  2211. *error = "Unable to encrypt journal";
  2212. goto bad;
  2213. }
  2214. DEBUG_bytes(lowmem_page_address(ic->journal_xor[0].page), 64, "xor data");
  2215. crypto_free_skcipher(ic->journal_crypt);
  2216. ic->journal_crypt = NULL;
  2217. } else {
  2218. SKCIPHER_REQUEST_ON_STACK(req, ic->journal_crypt);
  2219. unsigned char iv[ivsize];
  2220. unsigned crypt_len = roundup(ivsize, blocksize);
  2221. crypt_data = kmalloc(crypt_len, GFP_KERNEL);
  2222. if (!crypt_data) {
  2223. *error = "Unable to allocate crypt data";
  2224. r = -ENOMEM;
  2225. goto bad;
  2226. }
  2227. skcipher_request_set_tfm(req, ic->journal_crypt);
  2228. ic->journal_scatterlist = dm_integrity_alloc_journal_scatterlist(ic, ic->journal);
  2229. if (!ic->journal_scatterlist) {
  2230. *error = "Unable to allocate sg list";
  2231. r = -ENOMEM;
  2232. goto bad;
  2233. }
  2234. ic->journal_io_scatterlist = dm_integrity_alloc_journal_scatterlist(ic, ic->journal_io);
  2235. if (!ic->journal_io_scatterlist) {
  2236. *error = "Unable to allocate sg list";
  2237. r = -ENOMEM;
  2238. goto bad;
  2239. }
  2240. ic->sk_requests = kvmalloc(ic->journal_sections * sizeof(struct skcipher_request *), GFP_KERNEL | __GFP_ZERO);
  2241. if (!ic->sk_requests) {
  2242. *error = "Unable to allocate sk requests";
  2243. r = -ENOMEM;
  2244. goto bad;
  2245. }
  2246. for (i = 0; i < ic->journal_sections; i++) {
  2247. struct scatterlist sg;
  2248. struct skcipher_request *section_req;
  2249. __u32 section_le = cpu_to_le32(i);
  2250. memset(iv, 0x00, ivsize);
  2251. memset(crypt_data, 0x00, crypt_len);
  2252. memcpy(crypt_data, &section_le, min((size_t)crypt_len, sizeof(section_le)));
  2253. sg_init_one(&sg, crypt_data, crypt_len);
  2254. skcipher_request_set_crypt(req, &sg, &sg, crypt_len, iv);
  2255. init_completion(&comp.comp);
  2256. comp.in_flight = (atomic_t)ATOMIC_INIT(1);
  2257. if (do_crypt(true, req, &comp))
  2258. wait_for_completion(&comp.comp);
  2259. r = dm_integrity_failed(ic);
  2260. if (r) {
  2261. *error = "Unable to generate iv";
  2262. goto bad;
  2263. }
  2264. section_req = skcipher_request_alloc(ic->journal_crypt, GFP_KERNEL);
  2265. if (!section_req) {
  2266. *error = "Unable to allocate crypt request";
  2267. r = -ENOMEM;
  2268. goto bad;
  2269. }
  2270. section_req->iv = kmalloc(ivsize * 2, GFP_KERNEL);
  2271. if (!section_req->iv) {
  2272. skcipher_request_free(section_req);
  2273. *error = "Unable to allocate iv";
  2274. r = -ENOMEM;
  2275. goto bad;
  2276. }
  2277. memcpy(section_req->iv + ivsize, crypt_data, ivsize);
  2278. section_req->cryptlen = (size_t)ic->journal_section_sectors << SECTOR_SHIFT;
  2279. ic->sk_requests[i] = section_req;
  2280. DEBUG_bytes(crypt_data, ivsize, "iv(%u)", i);
  2281. }
  2282. }
  2283. }
  2284. for (i = 0; i < N_COMMIT_IDS; i++) {
  2285. unsigned j;
  2286. retest_commit_id:
  2287. for (j = 0; j < i; j++) {
  2288. if (ic->commit_ids[j] == ic->commit_ids[i]) {
  2289. ic->commit_ids[i] = cpu_to_le64(le64_to_cpu(ic->commit_ids[i]) + 1);
  2290. goto retest_commit_id;
  2291. }
  2292. }
  2293. DEBUG_print("commit id %u: %016llx\n", i, ic->commit_ids[i]);
  2294. }
  2295. journal_tree_size = (__u64)ic->journal_entries * sizeof(struct journal_node);
  2296. if (journal_tree_size > ULONG_MAX) {
  2297. *error = "Journal doesn't fit into memory";
  2298. r = -ENOMEM;
  2299. goto bad;
  2300. }
  2301. ic->journal_tree = kvmalloc(journal_tree_size, GFP_KERNEL);
  2302. if (!ic->journal_tree) {
  2303. *error = "Could not allocate memory for journal tree";
  2304. r = -ENOMEM;
  2305. }
  2306. bad:
  2307. kfree(crypt_data);
  2308. return r;
  2309. }
  2310. /*
  2311. * Construct a integrity mapping
  2312. *
  2313. * Arguments:
  2314. * device
  2315. * offset from the start of the device
  2316. * tag size
  2317. * D - direct writes, J - journal writes, R - recovery mode
  2318. * number of optional arguments
  2319. * optional arguments:
  2320. * journal_sectors
  2321. * interleave_sectors
  2322. * buffer_sectors
  2323. * journal_watermark
  2324. * commit_time
  2325. * internal_hash
  2326. * journal_crypt
  2327. * journal_mac
  2328. * block_size
  2329. */
  2330. static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
  2331. {
  2332. struct dm_integrity_c *ic;
  2333. char dummy;
  2334. int r;
  2335. unsigned extra_args;
  2336. struct dm_arg_set as;
  2337. static const struct dm_arg _args[] = {
  2338. {0, 9, "Invalid number of feature args"},
  2339. };
  2340. unsigned journal_sectors, interleave_sectors, buffer_sectors, journal_watermark, sync_msec;
  2341. bool should_write_sb;
  2342. __u64 threshold;
  2343. unsigned long long start;
  2344. #define DIRECT_ARGUMENTS 4
  2345. if (argc <= DIRECT_ARGUMENTS) {
  2346. ti->error = "Invalid argument count";
  2347. return -EINVAL;
  2348. }
  2349. ic = kzalloc(sizeof(struct dm_integrity_c), GFP_KERNEL);
  2350. if (!ic) {
  2351. ti->error = "Cannot allocate integrity context";
  2352. return -ENOMEM;
  2353. }
  2354. ti->private = ic;
  2355. ti->per_io_data_size = sizeof(struct dm_integrity_io);
  2356. ic->in_progress = RB_ROOT;
  2357. init_waitqueue_head(&ic->endio_wait);
  2358. bio_list_init(&ic->flush_bio_list);
  2359. init_waitqueue_head(&ic->copy_to_journal_wait);
  2360. init_completion(&ic->crypto_backoff);
  2361. atomic64_set(&ic->number_of_mismatches, 0);
  2362. r = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &ic->dev);
  2363. if (r) {
  2364. ti->error = "Device lookup failed";
  2365. goto bad;
  2366. }
  2367. if (sscanf(argv[1], "%llu%c", &start, &dummy) != 1 || start != (sector_t)start) {
  2368. ti->error = "Invalid starting offset";
  2369. r = -EINVAL;
  2370. goto bad;
  2371. }
  2372. ic->start = start;
  2373. if (strcmp(argv[2], "-")) {
  2374. if (sscanf(argv[2], "%u%c", &ic->tag_size, &dummy) != 1 || !ic->tag_size) {
  2375. ti->error = "Invalid tag size";
  2376. r = -EINVAL;
  2377. goto bad;
  2378. }
  2379. }
  2380. if (!strcmp(argv[3], "J") || !strcmp(argv[3], "D") || !strcmp(argv[3], "R"))
  2381. ic->mode = argv[3][0];
  2382. else {
  2383. ti->error = "Invalid mode (expecting J, D, R)";
  2384. r = -EINVAL;
  2385. goto bad;
  2386. }
  2387. ic->device_sectors = i_size_read(ic->dev->bdev->bd_inode) >> SECTOR_SHIFT;
  2388. journal_sectors = min((sector_t)DEFAULT_MAX_JOURNAL_SECTORS,
  2389. ic->device_sectors >> DEFAULT_JOURNAL_SIZE_FACTOR);
  2390. interleave_sectors = DEFAULT_INTERLEAVE_SECTORS;
  2391. buffer_sectors = DEFAULT_BUFFER_SECTORS;
  2392. journal_watermark = DEFAULT_JOURNAL_WATERMARK;
  2393. sync_msec = DEFAULT_SYNC_MSEC;
  2394. ic->sectors_per_block = 1;
  2395. as.argc = argc - DIRECT_ARGUMENTS;
  2396. as.argv = argv + DIRECT_ARGUMENTS;
  2397. r = dm_read_arg_group(_args, &as, &extra_args, &ti->error);
  2398. if (r)
  2399. goto bad;
  2400. while (extra_args--) {
  2401. const char *opt_string;
  2402. unsigned val;
  2403. opt_string = dm_shift_arg(&as);
  2404. if (!opt_string) {
  2405. r = -EINVAL;
  2406. ti->error = "Not enough feature arguments";
  2407. goto bad;
  2408. }
  2409. if (sscanf(opt_string, "journal_sectors:%u%c", &val, &dummy) == 1)
  2410. journal_sectors = val;
  2411. else if (sscanf(opt_string, "interleave_sectors:%u%c", &val, &dummy) == 1)
  2412. interleave_sectors = val;
  2413. else if (sscanf(opt_string, "buffer_sectors:%u%c", &val, &dummy) == 1)
  2414. buffer_sectors = val;
  2415. else if (sscanf(opt_string, "journal_watermark:%u%c", &val, &dummy) == 1 && val <= 100)
  2416. journal_watermark = val;
  2417. else if (sscanf(opt_string, "commit_time:%u%c", &val, &dummy) == 1)
  2418. sync_msec = val;
  2419. else if (sscanf(opt_string, "block_size:%u%c", &val, &dummy) == 1) {
  2420. if (val < 1 << SECTOR_SHIFT ||
  2421. val > MAX_SECTORS_PER_BLOCK << SECTOR_SHIFT ||
  2422. (val & (val -1))) {
  2423. r = -EINVAL;
  2424. ti->error = "Invalid block_size argument";
  2425. goto bad;
  2426. }
  2427. ic->sectors_per_block = val >> SECTOR_SHIFT;
  2428. } else if (!memcmp(opt_string, "internal_hash:", strlen("internal_hash:"))) {
  2429. r = get_alg_and_key(opt_string, &ic->internal_hash_alg, &ti->error,
  2430. "Invalid internal_hash argument");
  2431. if (r)
  2432. goto bad;
  2433. } else if (!memcmp(opt_string, "journal_crypt:", strlen("journal_crypt:"))) {
  2434. r = get_alg_and_key(opt_string, &ic->journal_crypt_alg, &ti->error,
  2435. "Invalid journal_crypt argument");
  2436. if (r)
  2437. goto bad;
  2438. } else if (!memcmp(opt_string, "journal_mac:", strlen("journal_mac:"))) {
  2439. r = get_alg_and_key(opt_string, &ic->journal_mac_alg, &ti->error,
  2440. "Invalid journal_mac argument");
  2441. if (r)
  2442. goto bad;
  2443. } else {
  2444. r = -EINVAL;
  2445. ti->error = "Invalid argument";
  2446. goto bad;
  2447. }
  2448. }
  2449. r = get_mac(&ic->internal_hash, &ic->internal_hash_alg, &ti->error,
  2450. "Invalid internal hash", "Error setting internal hash key");
  2451. if (r)
  2452. goto bad;
  2453. r = get_mac(&ic->journal_mac, &ic->journal_mac_alg, &ti->error,
  2454. "Invalid journal mac", "Error setting journal mac key");
  2455. if (r)
  2456. goto bad;
  2457. if (!ic->tag_size) {
  2458. if (!ic->internal_hash) {
  2459. ti->error = "Unknown tag size";
  2460. r = -EINVAL;
  2461. goto bad;
  2462. }
  2463. ic->tag_size = crypto_shash_digestsize(ic->internal_hash);
  2464. }
  2465. if (ic->tag_size > MAX_TAG_SIZE) {
  2466. ti->error = "Too big tag size";
  2467. r = -EINVAL;
  2468. goto bad;
  2469. }
  2470. if (!(ic->tag_size & (ic->tag_size - 1)))
  2471. ic->log2_tag_size = __ffs(ic->tag_size);
  2472. else
  2473. ic->log2_tag_size = -1;
  2474. ic->autocommit_jiffies = msecs_to_jiffies(sync_msec);
  2475. ic->autocommit_msec = sync_msec;
  2476. setup_timer(&ic->autocommit_timer, autocommit_fn, (unsigned long)ic);
  2477. ic->io = dm_io_client_create();
  2478. if (IS_ERR(ic->io)) {
  2479. r = PTR_ERR(ic->io);
  2480. ic->io = NULL;
  2481. ti->error = "Cannot allocate dm io";
  2482. goto bad;
  2483. }
  2484. ic->journal_io_mempool = mempool_create_slab_pool(JOURNAL_IO_MEMPOOL, journal_io_cache);
  2485. if (!ic->journal_io_mempool) {
  2486. r = -ENOMEM;
  2487. ti->error = "Cannot allocate mempool";
  2488. goto bad;
  2489. }
  2490. ic->metadata_wq = alloc_workqueue("dm-integrity-metadata",
  2491. WQ_MEM_RECLAIM, METADATA_WORKQUEUE_MAX_ACTIVE);
  2492. if (!ic->metadata_wq) {
  2493. ti->error = "Cannot allocate workqueue";
  2494. r = -ENOMEM;
  2495. goto bad;
  2496. }
  2497. /*
  2498. * If this workqueue were percpu, it would cause bio reordering
  2499. * and reduced performance.
  2500. */
  2501. ic->wait_wq = alloc_workqueue("dm-integrity-wait", WQ_MEM_RECLAIM | WQ_UNBOUND, 1);
  2502. if (!ic->wait_wq) {
  2503. ti->error = "Cannot allocate workqueue";
  2504. r = -ENOMEM;
  2505. goto bad;
  2506. }
  2507. ic->commit_wq = alloc_workqueue("dm-integrity-commit", WQ_MEM_RECLAIM, 1);
  2508. if (!ic->commit_wq) {
  2509. ti->error = "Cannot allocate workqueue";
  2510. r = -ENOMEM;
  2511. goto bad;
  2512. }
  2513. INIT_WORK(&ic->commit_work, integrity_commit);
  2514. if (ic->mode == 'J') {
  2515. ic->writer_wq = alloc_workqueue("dm-integrity-writer", WQ_MEM_RECLAIM, 1);
  2516. if (!ic->writer_wq) {
  2517. ti->error = "Cannot allocate workqueue";
  2518. r = -ENOMEM;
  2519. goto bad;
  2520. }
  2521. INIT_WORK(&ic->writer_work, integrity_writer);
  2522. }
  2523. ic->sb = alloc_pages_exact(SB_SECTORS << SECTOR_SHIFT, GFP_KERNEL);
  2524. if (!ic->sb) {
  2525. r = -ENOMEM;
  2526. ti->error = "Cannot allocate superblock area";
  2527. goto bad;
  2528. }
  2529. r = sync_rw_sb(ic, REQ_OP_READ, 0);
  2530. if (r) {
  2531. ti->error = "Error reading superblock";
  2532. goto bad;
  2533. }
  2534. should_write_sb = false;
  2535. if (memcmp(ic->sb->magic, SB_MAGIC, 8)) {
  2536. if (ic->mode != 'R') {
  2537. if (memchr_inv(ic->sb, 0, SB_SECTORS << SECTOR_SHIFT)) {
  2538. r = -EINVAL;
  2539. ti->error = "The device is not initialized";
  2540. goto bad;
  2541. }
  2542. }
  2543. r = initialize_superblock(ic, journal_sectors, interleave_sectors);
  2544. if (r) {
  2545. ti->error = "Could not initialize superblock";
  2546. goto bad;
  2547. }
  2548. if (ic->mode != 'R')
  2549. should_write_sb = true;
  2550. }
  2551. if (ic->sb->version != SB_VERSION) {
  2552. r = -EINVAL;
  2553. ti->error = "Unknown version";
  2554. goto bad;
  2555. }
  2556. if (le16_to_cpu(ic->sb->integrity_tag_size) != ic->tag_size) {
  2557. r = -EINVAL;
  2558. ti->error = "Tag size doesn't match the information in superblock";
  2559. goto bad;
  2560. }
  2561. if (ic->sb->log2_sectors_per_block != __ffs(ic->sectors_per_block)) {
  2562. r = -EINVAL;
  2563. ti->error = "Block size doesn't match the information in superblock";
  2564. goto bad;
  2565. }
  2566. if (!le32_to_cpu(ic->sb->journal_sections)) {
  2567. r = -EINVAL;
  2568. ti->error = "Corrupted superblock, journal_sections is 0";
  2569. goto bad;
  2570. }
  2571. /* make sure that ti->max_io_len doesn't overflow */
  2572. if (ic->sb->log2_interleave_sectors < MIN_LOG2_INTERLEAVE_SECTORS ||
  2573. ic->sb->log2_interleave_sectors > MAX_LOG2_INTERLEAVE_SECTORS) {
  2574. r = -EINVAL;
  2575. ti->error = "Invalid interleave_sectors in the superblock";
  2576. goto bad;
  2577. }
  2578. ic->provided_data_sectors = le64_to_cpu(ic->sb->provided_data_sectors);
  2579. if (ic->provided_data_sectors != le64_to_cpu(ic->sb->provided_data_sectors)) {
  2580. /* test for overflow */
  2581. r = -EINVAL;
  2582. ti->error = "The superblock has 64-bit device size, but the kernel was compiled with 32-bit sectors";
  2583. goto bad;
  2584. }
  2585. if (!!(ic->sb->flags & cpu_to_le32(SB_FLAG_HAVE_JOURNAL_MAC)) != !!ic->journal_mac_alg.alg_string) {
  2586. r = -EINVAL;
  2587. ti->error = "Journal mac mismatch";
  2588. goto bad;
  2589. }
  2590. r = calculate_device_limits(ic);
  2591. if (r) {
  2592. ti->error = "The device is too small";
  2593. goto bad;
  2594. }
  2595. if (ti->len > ic->provided_data_sectors) {
  2596. r = -EINVAL;
  2597. ti->error = "Not enough provided sectors for requested mapping size";
  2598. goto bad;
  2599. }
  2600. if (!buffer_sectors)
  2601. buffer_sectors = 1;
  2602. ic->log2_buffer_sectors = min3((int)__fls(buffer_sectors), (int)__ffs(ic->metadata_run), 31 - SECTOR_SHIFT);
  2603. threshold = (__u64)ic->journal_entries * (100 - journal_watermark);
  2604. threshold += 50;
  2605. do_div(threshold, 100);
  2606. ic->free_sectors_threshold = threshold;
  2607. DEBUG_print("initialized:\n");
  2608. DEBUG_print(" integrity_tag_size %u\n", le16_to_cpu(ic->sb->integrity_tag_size));
  2609. DEBUG_print(" journal_entry_size %u\n", ic->journal_entry_size);
  2610. DEBUG_print(" journal_entries_per_sector %u\n", ic->journal_entries_per_sector);
  2611. DEBUG_print(" journal_section_entries %u\n", ic->journal_section_entries);
  2612. DEBUG_print(" journal_section_sectors %u\n", ic->journal_section_sectors);
  2613. DEBUG_print(" journal_sections %u\n", (unsigned)le32_to_cpu(ic->sb->journal_sections));
  2614. DEBUG_print(" journal_entries %u\n", ic->journal_entries);
  2615. DEBUG_print(" log2_interleave_sectors %d\n", ic->sb->log2_interleave_sectors);
  2616. DEBUG_print(" device_sectors 0x%llx\n", (unsigned long long)ic->device_sectors);
  2617. DEBUG_print(" initial_sectors 0x%x\n", ic->initial_sectors);
  2618. DEBUG_print(" metadata_run 0x%x\n", ic->metadata_run);
  2619. DEBUG_print(" log2_metadata_run %d\n", ic->log2_metadata_run);
  2620. DEBUG_print(" provided_data_sectors 0x%llx (%llu)\n", (unsigned long long)ic->provided_data_sectors,
  2621. (unsigned long long)ic->provided_data_sectors);
  2622. DEBUG_print(" log2_buffer_sectors %u\n", ic->log2_buffer_sectors);
  2623. ic->bufio = dm_bufio_client_create(ic->dev->bdev, 1U << (SECTOR_SHIFT + ic->log2_buffer_sectors),
  2624. 1, 0, NULL, NULL);
  2625. if (IS_ERR(ic->bufio)) {
  2626. r = PTR_ERR(ic->bufio);
  2627. ti->error = "Cannot initialize dm-bufio";
  2628. ic->bufio = NULL;
  2629. goto bad;
  2630. }
  2631. dm_bufio_set_sector_offset(ic->bufio, ic->start + ic->initial_sectors);
  2632. if (ic->mode != 'R') {
  2633. r = create_journal(ic, &ti->error);
  2634. if (r)
  2635. goto bad;
  2636. }
  2637. if (should_write_sb) {
  2638. int r;
  2639. init_journal(ic, 0, ic->journal_sections, 0);
  2640. r = dm_integrity_failed(ic);
  2641. if (unlikely(r)) {
  2642. ti->error = "Error initializing journal";
  2643. goto bad;
  2644. }
  2645. r = sync_rw_sb(ic, REQ_OP_WRITE, REQ_FUA);
  2646. if (r) {
  2647. ti->error = "Error initializing superblock";
  2648. goto bad;
  2649. }
  2650. ic->just_formatted = true;
  2651. }
  2652. r = dm_set_target_max_io_len(ti, 1U << ic->sb->log2_interleave_sectors);
  2653. if (r)
  2654. goto bad;
  2655. if (!ic->internal_hash)
  2656. dm_integrity_set(ti, ic);
  2657. ti->num_flush_bios = 1;
  2658. ti->flush_supported = true;
  2659. return 0;
  2660. bad:
  2661. dm_integrity_dtr(ti);
  2662. return r;
  2663. }
  2664. static void dm_integrity_dtr(struct dm_target *ti)
  2665. {
  2666. struct dm_integrity_c *ic = ti->private;
  2667. BUG_ON(!RB_EMPTY_ROOT(&ic->in_progress));
  2668. if (ic->metadata_wq)
  2669. destroy_workqueue(ic->metadata_wq);
  2670. if (ic->wait_wq)
  2671. destroy_workqueue(ic->wait_wq);
  2672. if (ic->commit_wq)
  2673. destroy_workqueue(ic->commit_wq);
  2674. if (ic->writer_wq)
  2675. destroy_workqueue(ic->writer_wq);
  2676. if (ic->bufio)
  2677. dm_bufio_client_destroy(ic->bufio);
  2678. mempool_destroy(ic->journal_io_mempool);
  2679. if (ic->io)
  2680. dm_io_client_destroy(ic->io);
  2681. if (ic->dev)
  2682. dm_put_device(ti, ic->dev);
  2683. dm_integrity_free_page_list(ic, ic->journal);
  2684. dm_integrity_free_page_list(ic, ic->journal_io);
  2685. dm_integrity_free_page_list(ic, ic->journal_xor);
  2686. if (ic->journal_scatterlist)
  2687. dm_integrity_free_journal_scatterlist(ic, ic->journal_scatterlist);
  2688. if (ic->journal_io_scatterlist)
  2689. dm_integrity_free_journal_scatterlist(ic, ic->journal_io_scatterlist);
  2690. if (ic->sk_requests) {
  2691. unsigned i;
  2692. for (i = 0; i < ic->journal_sections; i++) {
  2693. struct skcipher_request *req = ic->sk_requests[i];
  2694. if (req) {
  2695. kzfree(req->iv);
  2696. skcipher_request_free(req);
  2697. }
  2698. }
  2699. kvfree(ic->sk_requests);
  2700. }
  2701. kvfree(ic->journal_tree);
  2702. if (ic->sb)
  2703. free_pages_exact(ic->sb, SB_SECTORS << SECTOR_SHIFT);
  2704. if (ic->internal_hash)
  2705. crypto_free_shash(ic->internal_hash);
  2706. free_alg(&ic->internal_hash_alg);
  2707. if (ic->journal_crypt)
  2708. crypto_free_skcipher(ic->journal_crypt);
  2709. free_alg(&ic->journal_crypt_alg);
  2710. if (ic->journal_mac)
  2711. crypto_free_shash(ic->journal_mac);
  2712. free_alg(&ic->journal_mac_alg);
  2713. kfree(ic);
  2714. }
  2715. static struct target_type integrity_target = {
  2716. .name = "integrity",
  2717. .version = {1, 1, 0},
  2718. .module = THIS_MODULE,
  2719. .features = DM_TARGET_SINGLETON | DM_TARGET_INTEGRITY,
  2720. .ctr = dm_integrity_ctr,
  2721. .dtr = dm_integrity_dtr,
  2722. .map = dm_integrity_map,
  2723. .postsuspend = dm_integrity_postsuspend,
  2724. .resume = dm_integrity_resume,
  2725. .status = dm_integrity_status,
  2726. .iterate_devices = dm_integrity_iterate_devices,
  2727. .io_hints = dm_integrity_io_hints,
  2728. };
  2729. int __init dm_integrity_init(void)
  2730. {
  2731. int r;
  2732. journal_io_cache = kmem_cache_create("integrity_journal_io",
  2733. sizeof(struct journal_io), 0, 0, NULL);
  2734. if (!journal_io_cache) {
  2735. DMERR("can't allocate journal io cache");
  2736. return -ENOMEM;
  2737. }
  2738. r = dm_register_target(&integrity_target);
  2739. if (r < 0)
  2740. DMERR("register failed %d", r);
  2741. return r;
  2742. }
  2743. void dm_integrity_exit(void)
  2744. {
  2745. dm_unregister_target(&integrity_target);
  2746. kmem_cache_destroy(journal_io_cache);
  2747. }
  2748. module_init(dm_integrity_init);
  2749. module_exit(dm_integrity_exit);
  2750. MODULE_AUTHOR("Milan Broz");
  2751. MODULE_AUTHOR("Mikulas Patocka");
  2752. MODULE_DESCRIPTION(DM_NAME " target for integrity tags extension");
  2753. MODULE_LICENSE("GPL");