scsi_dh_alua.c 28 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091
  1. /*
  2. * Generic SCSI-3 ALUA SCSI Device Handler
  3. *
  4. * Copyright (C) 2007-2010 Hannes Reinecke, SUSE Linux Products GmbH.
  5. * All rights reserved.
  6. *
  7. * This program is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU General Public License as published by
  9. * the Free Software Foundation; either version 2 of the License, or
  10. * (at your option) any later version.
  11. *
  12. * This program is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU General Public License
  18. * along with this program; if not, write to the Free Software
  19. * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  20. *
  21. */
  22. #include <linux/slab.h>
  23. #include <linux/delay.h>
  24. #include <linux/module.h>
  25. #include <asm/unaligned.h>
  26. #include <scsi/scsi.h>
  27. #include <scsi/scsi_dbg.h>
  28. #include <scsi/scsi_eh.h>
  29. #include <scsi/scsi_dh.h>
  30. #define ALUA_DH_NAME "alua"
  31. #define ALUA_DH_VER "1.3"
  32. #define TPGS_STATE_OPTIMIZED 0x0
  33. #define TPGS_STATE_NONOPTIMIZED 0x1
  34. #define TPGS_STATE_STANDBY 0x2
  35. #define TPGS_STATE_UNAVAILABLE 0x3
  36. #define TPGS_STATE_LBA_DEPENDENT 0x4
  37. #define TPGS_STATE_OFFLINE 0xe
  38. #define TPGS_STATE_TRANSITIONING 0xf
  39. #define TPGS_SUPPORT_NONE 0x00
  40. #define TPGS_SUPPORT_OPTIMIZED 0x01
  41. #define TPGS_SUPPORT_NONOPTIMIZED 0x02
  42. #define TPGS_SUPPORT_STANDBY 0x04
  43. #define TPGS_SUPPORT_UNAVAILABLE 0x08
  44. #define TPGS_SUPPORT_LBA_DEPENDENT 0x10
  45. #define TPGS_SUPPORT_OFFLINE 0x40
  46. #define TPGS_SUPPORT_TRANSITION 0x80
  47. #define RTPG_FMT_MASK 0x70
  48. #define RTPG_FMT_EXT_HDR 0x10
  49. #define TPGS_MODE_UNINITIALIZED -1
  50. #define TPGS_MODE_NONE 0x0
  51. #define TPGS_MODE_IMPLICIT 0x1
  52. #define TPGS_MODE_EXPLICIT 0x2
  53. #define ALUA_RTPG_SIZE 128
  54. #define ALUA_FAILOVER_TIMEOUT 60
  55. #define ALUA_FAILOVER_RETRIES 5
  56. #define ALUA_RTPG_DELAY_MSECS 5
  57. /* device handler flags */
  58. #define ALUA_OPTIMIZE_STPG 0x01
  59. #define ALUA_RTPG_EXT_HDR_UNSUPP 0x02
  60. #define ALUA_SYNC_STPG 0x04
  61. /* State machine flags */
  62. #define ALUA_PG_RUN_RTPG 0x10
  63. #define ALUA_PG_RUN_STPG 0x20
  64. #define ALUA_PG_RUNNING 0x40
  65. static uint optimize_stpg;
  66. module_param(optimize_stpg, uint, S_IRUGO|S_IWUSR);
  67. MODULE_PARM_DESC(optimize_stpg, "Allow use of a non-optimized path, rather than sending a STPG, when implicit TPGS is supported (0=No,1=Yes). Default is 0.");
  68. static LIST_HEAD(port_group_list);
  69. static DEFINE_SPINLOCK(port_group_lock);
  70. static struct workqueue_struct *kaluad_wq;
  71. static struct workqueue_struct *kaluad_sync_wq;
  72. struct alua_port_group {
  73. struct kref kref;
  74. struct rcu_head rcu;
  75. struct list_head node;
  76. unsigned char device_id_str[256];
  77. int device_id_len;
  78. int group_id;
  79. int tpgs;
  80. int state;
  81. int pref;
  82. unsigned flags; /* used for optimizing STPG */
  83. unsigned char transition_tmo;
  84. unsigned long expiry;
  85. unsigned long interval;
  86. struct delayed_work rtpg_work;
  87. spinlock_t lock;
  88. struct list_head rtpg_list;
  89. struct scsi_device *rtpg_sdev;
  90. };
  91. struct alua_dh_data {
  92. struct alua_port_group *pg;
  93. int group_id;
  94. spinlock_t pg_lock;
  95. struct scsi_device *sdev;
  96. int init_error;
  97. struct mutex init_mutex;
  98. };
  99. struct alua_queue_data {
  100. struct list_head entry;
  101. activate_complete callback_fn;
  102. void *callback_data;
  103. };
  104. #define ALUA_POLICY_SWITCH_CURRENT 0
  105. #define ALUA_POLICY_SWITCH_ALL 1
  106. static void alua_rtpg_work(struct work_struct *work);
  107. static void alua_rtpg_queue(struct alua_port_group *pg,
  108. struct scsi_device *sdev,
  109. struct alua_queue_data *qdata, bool force);
  110. static void alua_check(struct scsi_device *sdev, bool force);
  111. static void release_port_group(struct kref *kref)
  112. {
  113. struct alua_port_group *pg;
  114. pg = container_of(kref, struct alua_port_group, kref);
  115. if (pg->rtpg_sdev)
  116. flush_delayed_work(&pg->rtpg_work);
  117. spin_lock(&port_group_lock);
  118. list_del(&pg->node);
  119. spin_unlock(&port_group_lock);
  120. kfree_rcu(pg, rcu);
  121. }
  122. /*
  123. * submit_rtpg - Issue a REPORT TARGET GROUP STATES command
  124. * @sdev: sdev the command should be sent to
  125. */
  126. static int submit_rtpg(struct scsi_device *sdev, unsigned char *buff,
  127. int bufflen, struct scsi_sense_hdr *sshdr, int flags)
  128. {
  129. u8 cdb[COMMAND_SIZE(MAINTENANCE_IN)];
  130. int req_flags = REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT |
  131. REQ_FAILFAST_DRIVER;
  132. /* Prepare the command. */
  133. memset(cdb, 0x0, COMMAND_SIZE(MAINTENANCE_IN));
  134. cdb[0] = MAINTENANCE_IN;
  135. if (!(flags & ALUA_RTPG_EXT_HDR_UNSUPP))
  136. cdb[1] = MI_REPORT_TARGET_PGS | MI_EXT_HDR_PARAM_FMT;
  137. else
  138. cdb[1] = MI_REPORT_TARGET_PGS;
  139. put_unaligned_be32(bufflen, &cdb[6]);
  140. return scsi_execute_req_flags(sdev, cdb, DMA_FROM_DEVICE,
  141. buff, bufflen, sshdr,
  142. ALUA_FAILOVER_TIMEOUT * HZ,
  143. ALUA_FAILOVER_RETRIES, NULL, req_flags);
  144. }
  145. /*
  146. * submit_stpg - Issue a SET TARGET PORT GROUP command
  147. *
  148. * Currently we're only setting the current target port group state
  149. * to 'active/optimized' and let the array firmware figure out
  150. * the states of the remaining groups.
  151. */
  152. static int submit_stpg(struct scsi_device *sdev, int group_id,
  153. struct scsi_sense_hdr *sshdr)
  154. {
  155. u8 cdb[COMMAND_SIZE(MAINTENANCE_OUT)];
  156. unsigned char stpg_data[8];
  157. int stpg_len = 8;
  158. int req_flags = REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT |
  159. REQ_FAILFAST_DRIVER;
  160. /* Prepare the data buffer */
  161. memset(stpg_data, 0, stpg_len);
  162. stpg_data[4] = TPGS_STATE_OPTIMIZED & 0x0f;
  163. put_unaligned_be16(group_id, &stpg_data[6]);
  164. /* Prepare the command. */
  165. memset(cdb, 0x0, COMMAND_SIZE(MAINTENANCE_OUT));
  166. cdb[0] = MAINTENANCE_OUT;
  167. cdb[1] = MO_SET_TARGET_PGS;
  168. put_unaligned_be32(stpg_len, &cdb[6]);
  169. return scsi_execute_req_flags(sdev, cdb, DMA_TO_DEVICE,
  170. stpg_data, stpg_len,
  171. sshdr, ALUA_FAILOVER_TIMEOUT * HZ,
  172. ALUA_FAILOVER_RETRIES, NULL, req_flags);
  173. }
  174. struct alua_port_group *alua_find_get_pg(char *id_str, size_t id_size,
  175. int group_id)
  176. {
  177. struct alua_port_group *pg;
  178. list_for_each_entry(pg, &port_group_list, node) {
  179. if (pg->group_id != group_id)
  180. continue;
  181. if (pg->device_id_len != id_size)
  182. continue;
  183. if (strncmp(pg->device_id_str, id_str, id_size))
  184. continue;
  185. if (!kref_get_unless_zero(&pg->kref))
  186. continue;
  187. return pg;
  188. }
  189. return NULL;
  190. }
  191. /*
  192. * alua_alloc_pg - Allocate a new port_group structure
  193. * @sdev: scsi device
  194. * @h: alua device_handler data
  195. * @group_id: port group id
  196. *
  197. * Allocate a new port_group structure for a given
  198. * device.
  199. */
  200. struct alua_port_group *alua_alloc_pg(struct scsi_device *sdev,
  201. int group_id, int tpgs)
  202. {
  203. struct alua_port_group *pg, *tmp_pg;
  204. pg = kzalloc(sizeof(struct alua_port_group), GFP_KERNEL);
  205. if (!pg)
  206. return ERR_PTR(-ENOMEM);
  207. pg->device_id_len = scsi_vpd_lun_id(sdev, pg->device_id_str,
  208. sizeof(pg->device_id_str));
  209. if (pg->device_id_len <= 0) {
  210. /*
  211. * Internal error: TPGS supported but no device
  212. * identifcation found. Disable ALUA support.
  213. */
  214. kfree(pg);
  215. sdev_printk(KERN_INFO, sdev,
  216. "%s: No device descriptors found\n",
  217. ALUA_DH_NAME);
  218. return ERR_PTR(-ENXIO);
  219. }
  220. pg->group_id = group_id;
  221. pg->tpgs = tpgs;
  222. pg->state = TPGS_STATE_OPTIMIZED;
  223. if (optimize_stpg)
  224. pg->flags |= ALUA_OPTIMIZE_STPG;
  225. kref_init(&pg->kref);
  226. INIT_DELAYED_WORK(&pg->rtpg_work, alua_rtpg_work);
  227. INIT_LIST_HEAD(&pg->rtpg_list);
  228. INIT_LIST_HEAD(&pg->node);
  229. spin_lock_init(&pg->lock);
  230. spin_lock(&port_group_lock);
  231. tmp_pg = alua_find_get_pg(pg->device_id_str, pg->device_id_len,
  232. group_id);
  233. if (tmp_pg) {
  234. spin_unlock(&port_group_lock);
  235. kfree(pg);
  236. return tmp_pg;
  237. }
  238. list_add(&pg->node, &port_group_list);
  239. spin_unlock(&port_group_lock);
  240. return pg;
  241. }
  242. /*
  243. * alua_check_tpgs - Evaluate TPGS setting
  244. * @sdev: device to be checked
  245. *
  246. * Examine the TPGS setting of the sdev to find out if ALUA
  247. * is supported.
  248. */
  249. static int alua_check_tpgs(struct scsi_device *sdev)
  250. {
  251. int tpgs = TPGS_MODE_NONE;
  252. /*
  253. * ALUA support for non-disk devices is fraught with
  254. * difficulties, so disable it for now.
  255. */
  256. if (sdev->type != TYPE_DISK) {
  257. sdev_printk(KERN_INFO, sdev,
  258. "%s: disable for non-disk devices\n",
  259. ALUA_DH_NAME);
  260. return tpgs;
  261. }
  262. tpgs = scsi_device_tpgs(sdev);
  263. switch (tpgs) {
  264. case TPGS_MODE_EXPLICIT|TPGS_MODE_IMPLICIT:
  265. sdev_printk(KERN_INFO, sdev,
  266. "%s: supports implicit and explicit TPGS\n",
  267. ALUA_DH_NAME);
  268. break;
  269. case TPGS_MODE_EXPLICIT:
  270. sdev_printk(KERN_INFO, sdev, "%s: supports explicit TPGS\n",
  271. ALUA_DH_NAME);
  272. break;
  273. case TPGS_MODE_IMPLICIT:
  274. sdev_printk(KERN_INFO, sdev, "%s: supports implicit TPGS\n",
  275. ALUA_DH_NAME);
  276. break;
  277. case TPGS_MODE_NONE:
  278. sdev_printk(KERN_INFO, sdev, "%s: not supported\n",
  279. ALUA_DH_NAME);
  280. break;
  281. default:
  282. sdev_printk(KERN_INFO, sdev,
  283. "%s: unsupported TPGS setting %d\n",
  284. ALUA_DH_NAME, tpgs);
  285. tpgs = TPGS_MODE_NONE;
  286. break;
  287. }
  288. return tpgs;
  289. }
  290. /*
  291. * alua_check_vpd - Evaluate INQUIRY vpd page 0x83
  292. * @sdev: device to be checked
  293. *
  294. * Extract the relative target port and the target port group
  295. * descriptor from the list of identificators.
  296. */
  297. static int alua_check_vpd(struct scsi_device *sdev, struct alua_dh_data *h,
  298. int tpgs)
  299. {
  300. int rel_port = -1, group_id;
  301. struct alua_port_group *pg, *old_pg = NULL;
  302. group_id = scsi_vpd_tpg_id(sdev, &rel_port);
  303. if (group_id < 0) {
  304. /*
  305. * Internal error; TPGS supported but required
  306. * VPD identification descriptors not present.
  307. * Disable ALUA support
  308. */
  309. sdev_printk(KERN_INFO, sdev,
  310. "%s: No target port descriptors found\n",
  311. ALUA_DH_NAME);
  312. return SCSI_DH_DEV_UNSUPP;
  313. }
  314. pg = alua_alloc_pg(sdev, group_id, tpgs);
  315. if (IS_ERR(pg)) {
  316. if (PTR_ERR(pg) == -ENOMEM)
  317. return SCSI_DH_NOMEM;
  318. return SCSI_DH_DEV_UNSUPP;
  319. }
  320. sdev_printk(KERN_INFO, sdev,
  321. "%s: device %s port group %x rel port %x\n",
  322. ALUA_DH_NAME, pg->device_id_str, group_id, rel_port);
  323. /* Check for existing port group references */
  324. spin_lock(&h->pg_lock);
  325. old_pg = h->pg;
  326. if (old_pg != pg) {
  327. /* port group has changed. Update to new port group */
  328. rcu_assign_pointer(h->pg, pg);
  329. }
  330. if (sdev->synchronous_alua)
  331. pg->flags |= ALUA_SYNC_STPG;
  332. alua_rtpg_queue(h->pg, sdev, NULL, true);
  333. spin_unlock(&h->pg_lock);
  334. if (old_pg)
  335. kref_put(&old_pg->kref, release_port_group);
  336. return SCSI_DH_OK;
  337. }
  338. static char print_alua_state(int state)
  339. {
  340. switch (state) {
  341. case TPGS_STATE_OPTIMIZED:
  342. return 'A';
  343. case TPGS_STATE_NONOPTIMIZED:
  344. return 'N';
  345. case TPGS_STATE_STANDBY:
  346. return 'S';
  347. case TPGS_STATE_UNAVAILABLE:
  348. return 'U';
  349. case TPGS_STATE_LBA_DEPENDENT:
  350. return 'L';
  351. case TPGS_STATE_OFFLINE:
  352. return 'O';
  353. case TPGS_STATE_TRANSITIONING:
  354. return 'T';
  355. default:
  356. return 'X';
  357. }
  358. }
  359. static int alua_check_sense(struct scsi_device *sdev,
  360. struct scsi_sense_hdr *sense_hdr)
  361. {
  362. switch (sense_hdr->sense_key) {
  363. case NOT_READY:
  364. if (sense_hdr->asc == 0x04 && sense_hdr->ascq == 0x0a) {
  365. /*
  366. * LUN Not Accessible - ALUA state transition
  367. */
  368. alua_check(sdev, false);
  369. return NEEDS_RETRY;
  370. }
  371. break;
  372. case UNIT_ATTENTION:
  373. if (sense_hdr->asc == 0x29 && sense_hdr->ascq == 0x00) {
  374. /*
  375. * Power On, Reset, or Bus Device Reset.
  376. * Might have obscured a state transition,
  377. * so schedule a recheck.
  378. */
  379. alua_check(sdev, true);
  380. return ADD_TO_MLQUEUE;
  381. }
  382. if (sense_hdr->asc == 0x29 && sense_hdr->ascq == 0x04)
  383. /*
  384. * Device internal reset
  385. */
  386. return ADD_TO_MLQUEUE;
  387. if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x01)
  388. /*
  389. * Mode Parameters Changed
  390. */
  391. return ADD_TO_MLQUEUE;
  392. if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x06) {
  393. /*
  394. * ALUA state changed
  395. */
  396. alua_check(sdev, true);
  397. return ADD_TO_MLQUEUE;
  398. }
  399. if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x07) {
  400. /*
  401. * Implicit ALUA state transition failed
  402. */
  403. alua_check(sdev, true);
  404. return ADD_TO_MLQUEUE;
  405. }
  406. if (sense_hdr->asc == 0x3f && sense_hdr->ascq == 0x03)
  407. /*
  408. * Inquiry data has changed
  409. */
  410. return ADD_TO_MLQUEUE;
  411. if (sense_hdr->asc == 0x3f && sense_hdr->ascq == 0x0e)
  412. /*
  413. * REPORTED_LUNS_DATA_HAS_CHANGED is reported
  414. * when switching controllers on targets like
  415. * Intel Multi-Flex. We can just retry.
  416. */
  417. return ADD_TO_MLQUEUE;
  418. break;
  419. }
  420. return SCSI_RETURN_NOT_HANDLED;
  421. }
  422. /*
  423. * alua_rtpg - Evaluate REPORT TARGET GROUP STATES
  424. * @sdev: the device to be evaluated.
  425. *
  426. * Evaluate the Target Port Group State.
  427. * Returns SCSI_DH_DEV_OFFLINED if the path is
  428. * found to be unusable.
  429. */
  430. static int alua_rtpg(struct scsi_device *sdev, struct alua_port_group *pg)
  431. {
  432. struct scsi_sense_hdr sense_hdr;
  433. struct alua_port_group *tmp_pg;
  434. int len, k, off, valid_states = 0, bufflen = ALUA_RTPG_SIZE;
  435. unsigned char *desc, *buff;
  436. unsigned err, retval;
  437. unsigned int tpg_desc_tbl_off;
  438. unsigned char orig_transition_tmo;
  439. unsigned long flags;
  440. if (!pg->expiry) {
  441. unsigned long transition_tmo = ALUA_FAILOVER_TIMEOUT * HZ;
  442. if (pg->transition_tmo)
  443. transition_tmo = pg->transition_tmo * HZ;
  444. pg->expiry = round_jiffies_up(jiffies + transition_tmo);
  445. }
  446. buff = kzalloc(bufflen, GFP_KERNEL);
  447. if (!buff)
  448. return SCSI_DH_DEV_TEMP_BUSY;
  449. retry:
  450. retval = submit_rtpg(sdev, buff, bufflen, &sense_hdr, pg->flags);
  451. if (retval) {
  452. if (!scsi_sense_valid(&sense_hdr)) {
  453. sdev_printk(KERN_INFO, sdev,
  454. "%s: rtpg failed, result %d\n",
  455. ALUA_DH_NAME, retval);
  456. kfree(buff);
  457. if (driver_byte(retval) == DRIVER_ERROR)
  458. return SCSI_DH_DEV_TEMP_BUSY;
  459. return SCSI_DH_IO;
  460. }
  461. /*
  462. * submit_rtpg() has failed on existing arrays
  463. * when requesting extended header info, and
  464. * the array doesn't support extended headers,
  465. * even though it shouldn't according to T10.
  466. * The retry without rtpg_ext_hdr_req set
  467. * handles this.
  468. */
  469. if (!(pg->flags & ALUA_RTPG_EXT_HDR_UNSUPP) &&
  470. sense_hdr.sense_key == ILLEGAL_REQUEST &&
  471. sense_hdr.asc == 0x24 && sense_hdr.ascq == 0) {
  472. pg->flags |= ALUA_RTPG_EXT_HDR_UNSUPP;
  473. goto retry;
  474. }
  475. /*
  476. * Retry on ALUA state transition or if any
  477. * UNIT ATTENTION occurred.
  478. */
  479. if (sense_hdr.sense_key == NOT_READY &&
  480. sense_hdr.asc == 0x04 && sense_hdr.ascq == 0x0a)
  481. err = SCSI_DH_RETRY;
  482. else if (sense_hdr.sense_key == UNIT_ATTENTION)
  483. err = SCSI_DH_RETRY;
  484. if (err == SCSI_DH_RETRY &&
  485. pg->expiry != 0 && time_before(jiffies, pg->expiry)) {
  486. sdev_printk(KERN_ERR, sdev, "%s: rtpg retry\n",
  487. ALUA_DH_NAME);
  488. scsi_print_sense_hdr(sdev, ALUA_DH_NAME, &sense_hdr);
  489. return err;
  490. }
  491. sdev_printk(KERN_ERR, sdev, "%s: rtpg failed\n",
  492. ALUA_DH_NAME);
  493. scsi_print_sense_hdr(sdev, ALUA_DH_NAME, &sense_hdr);
  494. kfree(buff);
  495. pg->expiry = 0;
  496. return SCSI_DH_IO;
  497. }
  498. len = get_unaligned_be32(&buff[0]) + 4;
  499. if (len > bufflen) {
  500. /* Resubmit with the correct length */
  501. kfree(buff);
  502. bufflen = len;
  503. buff = kmalloc(bufflen, GFP_KERNEL);
  504. if (!buff) {
  505. sdev_printk(KERN_WARNING, sdev,
  506. "%s: kmalloc buffer failed\n",__func__);
  507. /* Temporary failure, bypass */
  508. pg->expiry = 0;
  509. return SCSI_DH_DEV_TEMP_BUSY;
  510. }
  511. goto retry;
  512. }
  513. orig_transition_tmo = pg->transition_tmo;
  514. if ((buff[4] & RTPG_FMT_MASK) == RTPG_FMT_EXT_HDR && buff[5] != 0)
  515. pg->transition_tmo = buff[5];
  516. else
  517. pg->transition_tmo = ALUA_FAILOVER_TIMEOUT;
  518. if (orig_transition_tmo != pg->transition_tmo) {
  519. sdev_printk(KERN_INFO, sdev,
  520. "%s: transition timeout set to %d seconds\n",
  521. ALUA_DH_NAME, pg->transition_tmo);
  522. pg->expiry = jiffies + pg->transition_tmo * HZ;
  523. }
  524. if ((buff[4] & RTPG_FMT_MASK) == RTPG_FMT_EXT_HDR)
  525. tpg_desc_tbl_off = 8;
  526. else
  527. tpg_desc_tbl_off = 4;
  528. for (k = tpg_desc_tbl_off, desc = buff + tpg_desc_tbl_off;
  529. k < len;
  530. k += off, desc += off) {
  531. u16 group_id = get_unaligned_be16(&desc[2]);
  532. spin_lock_irqsave(&port_group_lock, flags);
  533. tmp_pg = alua_find_get_pg(pg->device_id_str, pg->device_id_len,
  534. group_id);
  535. spin_unlock_irqrestore(&port_group_lock, flags);
  536. if (tmp_pg) {
  537. if (spin_trylock_irqsave(&tmp_pg->lock, flags)) {
  538. if ((tmp_pg == pg) ||
  539. !(tmp_pg->flags & ALUA_PG_RUNNING)) {
  540. tmp_pg->state = desc[0] & 0x0f;
  541. tmp_pg->pref = desc[0] >> 7;
  542. }
  543. if (tmp_pg == pg)
  544. valid_states = desc[1];
  545. spin_unlock_irqrestore(&tmp_pg->lock, flags);
  546. }
  547. kref_put(&tmp_pg->kref, release_port_group);
  548. }
  549. off = 8 + (desc[7] * 4);
  550. }
  551. spin_lock_irqsave(&pg->lock, flags);
  552. sdev_printk(KERN_INFO, sdev,
  553. "%s: port group %02x state %c %s supports %c%c%c%c%c%c%c\n",
  554. ALUA_DH_NAME, pg->group_id, print_alua_state(pg->state),
  555. pg->pref ? "preferred" : "non-preferred",
  556. valid_states&TPGS_SUPPORT_TRANSITION?'T':'t',
  557. valid_states&TPGS_SUPPORT_OFFLINE?'O':'o',
  558. valid_states&TPGS_SUPPORT_LBA_DEPENDENT?'L':'l',
  559. valid_states&TPGS_SUPPORT_UNAVAILABLE?'U':'u',
  560. valid_states&TPGS_SUPPORT_STANDBY?'S':'s',
  561. valid_states&TPGS_SUPPORT_NONOPTIMIZED?'N':'n',
  562. valid_states&TPGS_SUPPORT_OPTIMIZED?'A':'a');
  563. switch (pg->state) {
  564. case TPGS_STATE_TRANSITIONING:
  565. if (time_before(jiffies, pg->expiry)) {
  566. /* State transition, retry */
  567. pg->interval = 2;
  568. err = SCSI_DH_RETRY;
  569. } else {
  570. /* Transitioning time exceeded, set port to standby */
  571. err = SCSI_DH_IO;
  572. pg->state = TPGS_STATE_STANDBY;
  573. pg->expiry = 0;
  574. }
  575. break;
  576. case TPGS_STATE_OFFLINE:
  577. /* Path unusable */
  578. err = SCSI_DH_DEV_OFFLINED;
  579. pg->expiry = 0;
  580. break;
  581. default:
  582. /* Useable path if active */
  583. err = SCSI_DH_OK;
  584. pg->expiry = 0;
  585. break;
  586. }
  587. spin_unlock_irqrestore(&pg->lock, flags);
  588. kfree(buff);
  589. return err;
  590. }
  591. /*
  592. * alua_stpg - Issue a SET TARGET PORT GROUP command
  593. *
  594. * Issue a SET TARGET PORT GROUP command and evaluate the
  595. * response. Returns SCSI_DH_RETRY per default to trigger
  596. * a re-evaluation of the target group state or SCSI_DH_OK
  597. * if no further action needs to be taken.
  598. */
  599. static unsigned alua_stpg(struct scsi_device *sdev, struct alua_port_group *pg)
  600. {
  601. int retval;
  602. struct scsi_sense_hdr sense_hdr;
  603. if (!(pg->tpgs & TPGS_MODE_EXPLICIT)) {
  604. /* Only implicit ALUA supported, retry */
  605. return SCSI_DH_RETRY;
  606. }
  607. switch (pg->state) {
  608. case TPGS_STATE_OPTIMIZED:
  609. return SCSI_DH_OK;
  610. case TPGS_STATE_NONOPTIMIZED:
  611. if ((pg->flags & ALUA_OPTIMIZE_STPG) &&
  612. !pg->pref &&
  613. (pg->tpgs & TPGS_MODE_IMPLICIT))
  614. return SCSI_DH_OK;
  615. break;
  616. case TPGS_STATE_STANDBY:
  617. case TPGS_STATE_UNAVAILABLE:
  618. break;
  619. case TPGS_STATE_OFFLINE:
  620. return SCSI_DH_IO;
  621. case TPGS_STATE_TRANSITIONING:
  622. break;
  623. default:
  624. sdev_printk(KERN_INFO, sdev,
  625. "%s: stpg failed, unhandled TPGS state %d",
  626. ALUA_DH_NAME, pg->state);
  627. return SCSI_DH_NOSYS;
  628. }
  629. retval = submit_stpg(sdev, pg->group_id, &sense_hdr);
  630. if (retval) {
  631. if (!scsi_sense_valid(&sense_hdr)) {
  632. sdev_printk(KERN_INFO, sdev,
  633. "%s: stpg failed, result %d",
  634. ALUA_DH_NAME, retval);
  635. if (driver_byte(retval) == DRIVER_ERROR)
  636. return SCSI_DH_DEV_TEMP_BUSY;
  637. } else {
  638. sdev_printk(KERN_INFO, sdev, "%s: stpg failed\n",
  639. ALUA_DH_NAME);
  640. scsi_print_sense_hdr(sdev, ALUA_DH_NAME, &sense_hdr);
  641. }
  642. }
  643. /* Retry RTPG */
  644. return SCSI_DH_RETRY;
  645. }
  646. static void alua_rtpg_work(struct work_struct *work)
  647. {
  648. struct alua_port_group *pg =
  649. container_of(work, struct alua_port_group, rtpg_work.work);
  650. struct scsi_device *sdev;
  651. LIST_HEAD(qdata_list);
  652. int err = SCSI_DH_OK;
  653. struct alua_queue_data *qdata, *tmp;
  654. unsigned long flags;
  655. struct workqueue_struct *alua_wq = kaluad_wq;
  656. spin_lock_irqsave(&pg->lock, flags);
  657. sdev = pg->rtpg_sdev;
  658. if (!sdev) {
  659. WARN_ON(pg->flags & ALUA_PG_RUN_RTPG);
  660. WARN_ON(pg->flags & ALUA_PG_RUN_STPG);
  661. spin_unlock_irqrestore(&pg->lock, flags);
  662. return;
  663. }
  664. if (pg->flags & ALUA_SYNC_STPG)
  665. alua_wq = kaluad_sync_wq;
  666. pg->flags |= ALUA_PG_RUNNING;
  667. if (pg->flags & ALUA_PG_RUN_RTPG) {
  668. pg->flags &= ~ALUA_PG_RUN_RTPG;
  669. spin_unlock_irqrestore(&pg->lock, flags);
  670. err = alua_rtpg(sdev, pg);
  671. spin_lock_irqsave(&pg->lock, flags);
  672. if (err == SCSI_DH_RETRY || pg->flags & ALUA_PG_RUN_RTPG) {
  673. pg->flags &= ~ALUA_PG_RUNNING;
  674. pg->flags |= ALUA_PG_RUN_RTPG;
  675. spin_unlock_irqrestore(&pg->lock, flags);
  676. queue_delayed_work(alua_wq, &pg->rtpg_work,
  677. pg->interval * HZ);
  678. return;
  679. }
  680. if (err != SCSI_DH_OK)
  681. pg->flags &= ~ALUA_PG_RUN_STPG;
  682. }
  683. if (pg->flags & ALUA_PG_RUN_STPG) {
  684. pg->flags &= ~ALUA_PG_RUN_STPG;
  685. spin_unlock_irqrestore(&pg->lock, flags);
  686. err = alua_stpg(sdev, pg);
  687. spin_lock_irqsave(&pg->lock, flags);
  688. if (err == SCSI_DH_RETRY || pg->flags & ALUA_PG_RUN_RTPG) {
  689. pg->flags |= ALUA_PG_RUN_RTPG;
  690. pg->interval = 0;
  691. pg->flags &= ~ALUA_PG_RUNNING;
  692. spin_unlock_irqrestore(&pg->lock, flags);
  693. queue_delayed_work(alua_wq, &pg->rtpg_work,
  694. pg->interval * HZ);
  695. return;
  696. }
  697. }
  698. list_splice_init(&pg->rtpg_list, &qdata_list);
  699. pg->rtpg_sdev = NULL;
  700. spin_unlock_irqrestore(&pg->lock, flags);
  701. list_for_each_entry_safe(qdata, tmp, &qdata_list, entry) {
  702. list_del(&qdata->entry);
  703. if (qdata->callback_fn)
  704. qdata->callback_fn(qdata->callback_data, err);
  705. kfree(qdata);
  706. }
  707. spin_lock_irqsave(&pg->lock, flags);
  708. pg->flags &= ~ALUA_PG_RUNNING;
  709. spin_unlock_irqrestore(&pg->lock, flags);
  710. scsi_device_put(sdev);
  711. kref_put(&pg->kref, release_port_group);
  712. }
  713. static void alua_rtpg_queue(struct alua_port_group *pg,
  714. struct scsi_device *sdev,
  715. struct alua_queue_data *qdata, bool force)
  716. {
  717. int start_queue = 0;
  718. unsigned long flags;
  719. struct workqueue_struct *alua_wq = kaluad_wq;
  720. if (!pg)
  721. return;
  722. spin_lock_irqsave(&pg->lock, flags);
  723. if (qdata) {
  724. list_add_tail(&qdata->entry, &pg->rtpg_list);
  725. pg->flags |= ALUA_PG_RUN_STPG;
  726. force = true;
  727. }
  728. if (pg->rtpg_sdev == NULL) {
  729. pg->interval = 0;
  730. pg->flags |= ALUA_PG_RUN_RTPG;
  731. kref_get(&pg->kref);
  732. pg->rtpg_sdev = sdev;
  733. scsi_device_get(sdev);
  734. start_queue = 1;
  735. } else if (!(pg->flags & ALUA_PG_RUN_RTPG) && force) {
  736. pg->flags |= ALUA_PG_RUN_RTPG;
  737. /* Do not queue if the worker is already running */
  738. if (!(pg->flags & ALUA_PG_RUNNING)) {
  739. kref_get(&pg->kref);
  740. start_queue = 1;
  741. }
  742. }
  743. if (pg->flags & ALUA_SYNC_STPG)
  744. alua_wq = kaluad_sync_wq;
  745. spin_unlock_irqrestore(&pg->lock, flags);
  746. if (start_queue &&
  747. !queue_delayed_work(alua_wq, &pg->rtpg_work,
  748. msecs_to_jiffies(ALUA_RTPG_DELAY_MSECS))) {
  749. scsi_device_put(sdev);
  750. kref_put(&pg->kref, release_port_group);
  751. }
  752. }
  753. /*
  754. * alua_initialize - Initialize ALUA state
  755. * @sdev: the device to be initialized
  756. *
  757. * For the prep_fn to work correctly we have
  758. * to initialize the ALUA state for the device.
  759. */
  760. static int alua_initialize(struct scsi_device *sdev, struct alua_dh_data *h)
  761. {
  762. int err = SCSI_DH_DEV_UNSUPP, tpgs;
  763. mutex_lock(&h->init_mutex);
  764. tpgs = alua_check_tpgs(sdev);
  765. if (tpgs != TPGS_MODE_NONE)
  766. err = alua_check_vpd(sdev, h, tpgs);
  767. h->init_error = err;
  768. mutex_unlock(&h->init_mutex);
  769. return err;
  770. }
  771. /*
  772. * alua_set_params - set/unset the optimize flag
  773. * @sdev: device on the path to be activated
  774. * params - parameters in the following format
  775. * "no_of_params\0param1\0param2\0param3\0...\0"
  776. * For example, to set the flag pass the following parameters
  777. * from multipath.conf
  778. * hardware_handler "2 alua 1"
  779. */
  780. static int alua_set_params(struct scsi_device *sdev, const char *params)
  781. {
  782. struct alua_dh_data *h = sdev->handler_data;
  783. struct alua_port_group __rcu *pg = NULL;
  784. unsigned int optimize = 0, argc;
  785. const char *p = params;
  786. int result = SCSI_DH_OK;
  787. unsigned long flags;
  788. if ((sscanf(params, "%u", &argc) != 1) || (argc != 1))
  789. return -EINVAL;
  790. while (*p++)
  791. ;
  792. if ((sscanf(p, "%u", &optimize) != 1) || (optimize > 1))
  793. return -EINVAL;
  794. rcu_read_lock();
  795. pg = rcu_dereference(h->pg);
  796. if (!pg) {
  797. rcu_read_unlock();
  798. return -ENXIO;
  799. }
  800. spin_lock_irqsave(&pg->lock, flags);
  801. if (optimize)
  802. pg->flags |= ALUA_OPTIMIZE_STPG;
  803. else
  804. pg->flags &= ~ALUA_OPTIMIZE_STPG;
  805. spin_unlock_irqrestore(&pg->lock, flags);
  806. rcu_read_unlock();
  807. return result;
  808. }
  809. /*
  810. * alua_activate - activate a path
  811. * @sdev: device on the path to be activated
  812. *
  813. * We're currently switching the port group to be activated only and
  814. * let the array figure out the rest.
  815. * There may be other arrays which require us to switch all port groups
  816. * based on a certain policy. But until we actually encounter them it
  817. * should be okay.
  818. */
  819. static int alua_activate(struct scsi_device *sdev,
  820. activate_complete fn, void *data)
  821. {
  822. struct alua_dh_data *h = sdev->handler_data;
  823. int err = SCSI_DH_OK;
  824. struct alua_queue_data *qdata;
  825. struct alua_port_group __rcu *pg;
  826. qdata = kzalloc(sizeof(*qdata), GFP_KERNEL);
  827. if (!qdata) {
  828. err = SCSI_DH_RES_TEMP_UNAVAIL;
  829. goto out;
  830. }
  831. qdata->callback_fn = fn;
  832. qdata->callback_data = data;
  833. mutex_lock(&h->init_mutex);
  834. rcu_read_lock();
  835. pg = rcu_dereference(h->pg);
  836. if (!pg || !kref_get_unless_zero(&pg->kref)) {
  837. rcu_read_unlock();
  838. kfree(qdata);
  839. err = h->init_error;
  840. mutex_unlock(&h->init_mutex);
  841. goto out;
  842. }
  843. fn = NULL;
  844. rcu_read_unlock();
  845. mutex_unlock(&h->init_mutex);
  846. alua_rtpg_queue(pg, sdev, qdata, true);
  847. kref_put(&pg->kref, release_port_group);
  848. out:
  849. if (fn)
  850. fn(data, err);
  851. return 0;
  852. }
  853. /*
  854. * alua_check - check path status
  855. * @sdev: device on the path to be checked
  856. *
  857. * Check the device status
  858. */
  859. static void alua_check(struct scsi_device *sdev, bool force)
  860. {
  861. struct alua_dh_data *h = sdev->handler_data;
  862. struct alua_port_group *pg;
  863. rcu_read_lock();
  864. pg = rcu_dereference(h->pg);
  865. if (!pg || !kref_get_unless_zero(&pg->kref)) {
  866. rcu_read_unlock();
  867. return;
  868. }
  869. rcu_read_unlock();
  870. alua_rtpg_queue(pg, sdev, NULL, force);
  871. kref_put(&pg->kref, release_port_group);
  872. }
  873. /*
  874. * alua_prep_fn - request callback
  875. *
  876. * Fail I/O to all paths not in state
  877. * active/optimized or active/non-optimized.
  878. */
  879. static int alua_prep_fn(struct scsi_device *sdev, struct request *req)
  880. {
  881. struct alua_dh_data *h = sdev->handler_data;
  882. struct alua_port_group __rcu *pg;
  883. int state = TPGS_STATE_OPTIMIZED;
  884. int ret = BLKPREP_OK;
  885. rcu_read_lock();
  886. pg = rcu_dereference(h->pg);
  887. if (pg)
  888. state = pg->state;
  889. rcu_read_unlock();
  890. if (state == TPGS_STATE_TRANSITIONING)
  891. ret = BLKPREP_DEFER;
  892. else if (state != TPGS_STATE_OPTIMIZED &&
  893. state != TPGS_STATE_NONOPTIMIZED &&
  894. state != TPGS_STATE_LBA_DEPENDENT) {
  895. ret = BLKPREP_KILL;
  896. req->cmd_flags |= REQ_QUIET;
  897. }
  898. return ret;
  899. }
  900. /*
  901. * alua_bus_attach - Attach device handler
  902. * @sdev: device to be attached to
  903. */
  904. static int alua_bus_attach(struct scsi_device *sdev)
  905. {
  906. struct alua_dh_data *h;
  907. int err, ret = -EINVAL;
  908. h = kzalloc(sizeof(*h) , GFP_KERNEL);
  909. if (!h)
  910. return -ENOMEM;
  911. spin_lock_init(&h->pg_lock);
  912. rcu_assign_pointer(h->pg, NULL);
  913. h->init_error = SCSI_DH_OK;
  914. h->sdev = sdev;
  915. mutex_init(&h->init_mutex);
  916. err = alua_initialize(sdev, h);
  917. if (err == SCSI_DH_NOMEM)
  918. ret = -ENOMEM;
  919. if (err != SCSI_DH_OK && err != SCSI_DH_DEV_OFFLINED)
  920. goto failed;
  921. sdev->handler_data = h;
  922. return 0;
  923. failed:
  924. kfree(h);
  925. return ret;
  926. }
  927. /*
  928. * alua_bus_detach - Detach device handler
  929. * @sdev: device to be detached from
  930. */
  931. static void alua_bus_detach(struct scsi_device *sdev)
  932. {
  933. struct alua_dh_data *h = sdev->handler_data;
  934. struct alua_port_group *pg;
  935. spin_lock(&h->pg_lock);
  936. pg = h->pg;
  937. rcu_assign_pointer(h->pg, NULL);
  938. h->sdev = NULL;
  939. spin_unlock(&h->pg_lock);
  940. if (pg)
  941. kref_put(&pg->kref, release_port_group);
  942. sdev->handler_data = NULL;
  943. kfree(h);
  944. }
  945. static struct scsi_device_handler alua_dh = {
  946. .name = ALUA_DH_NAME,
  947. .module = THIS_MODULE,
  948. .attach = alua_bus_attach,
  949. .detach = alua_bus_detach,
  950. .prep_fn = alua_prep_fn,
  951. .check_sense = alua_check_sense,
  952. .activate = alua_activate,
  953. .set_params = alua_set_params,
  954. };
  955. static int __init alua_init(void)
  956. {
  957. int r;
  958. kaluad_wq = alloc_workqueue("kaluad", WQ_MEM_RECLAIM, 0);
  959. if (!kaluad_wq) {
  960. /* Temporary failure, bypass */
  961. return SCSI_DH_DEV_TEMP_BUSY;
  962. }
  963. kaluad_sync_wq = create_workqueue("kaluad_sync");
  964. if (!kaluad_sync_wq) {
  965. destroy_workqueue(kaluad_wq);
  966. return SCSI_DH_DEV_TEMP_BUSY;
  967. }
  968. r = scsi_register_device_handler(&alua_dh);
  969. if (r != 0) {
  970. printk(KERN_ERR "%s: Failed to register scsi device handler",
  971. ALUA_DH_NAME);
  972. destroy_workqueue(kaluad_sync_wq);
  973. destroy_workqueue(kaluad_wq);
  974. }
  975. return r;
  976. }
  977. static void __exit alua_exit(void)
  978. {
  979. scsi_unregister_device_handler(&alua_dh);
  980. destroy_workqueue(kaluad_sync_wq);
  981. destroy_workqueue(kaluad_wq);
  982. }
  983. module_init(alua_init);
  984. module_exit(alua_exit);
  985. MODULE_DESCRIPTION("DM Multipath ALUA support");
  986. MODULE_AUTHOR("Hannes Reinecke <hare@suse.de>");
  987. MODULE_LICENSE("GPL");
  988. MODULE_VERSION(ALUA_DH_VER);