stack_user.c 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945
  1. /* -*- mode: c; c-basic-offset: 8; -*-
  2. * vim: noexpandtab sw=8 ts=8 sts=0:
  3. *
  4. * stack_user.c
  5. *
  6. * Code which interfaces ocfs2 with fs/dlm and a userspace stack.
  7. *
  8. * Copyright (C) 2007 Oracle. All rights reserved.
  9. *
  10. * This program is free software; you can redistribute it and/or
  11. * modify it under the terms of the GNU General Public
  12. * License as published by the Free Software Foundation, version 2.
  13. *
  14. * This program is distributed in the hope that it will be useful,
  15. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  17. * General Public License for more details.
  18. */
  19. #include <linux/module.h>
  20. #include <linux/fs.h>
  21. #include <linux/miscdevice.h>
  22. #include <linux/mutex.h>
  23. #include <linux/slab.h>
  24. #include <linux/reboot.h>
  25. #include <asm/uaccess.h>
  26. #include "stackglue.h"
  27. #include <linux/dlm_plock.h>
  28. /*
  29. * The control protocol starts with a handshake. Until the handshake
  30. * is complete, the control device will fail all write(2)s.
  31. *
  32. * The handshake is simple. First, the client reads until EOF. Each line
  33. * of output is a supported protocol tag. All protocol tags are a single
  34. * character followed by a two hex digit version number. Currently the
  35. * only things supported is T01, for "Text-base version 0x01". Next, the
  36. * client writes the version they would like to use, including the newline.
  37. * Thus, the protocol tag is 'T01\n'. If the version tag written is
  38. * unknown, -EINVAL is returned. Once the negotiation is complete, the
  39. * client can start sending messages.
  40. *
  41. * The T01 protocol has three messages. First is the "SETN" message.
  42. * It has the following syntax:
  43. *
  44. * SETN<space><8-char-hex-nodenum><newline>
  45. *
  46. * This is 14 characters.
  47. *
  48. * The "SETN" message must be the first message following the protocol.
  49. * It tells ocfs2_control the local node number.
  50. *
  51. * Next comes the "SETV" message. It has the following syntax:
  52. *
  53. * SETV<space><2-char-hex-major><space><2-char-hex-minor><newline>
  54. *
  55. * This is 11 characters.
  56. *
  57. * The "SETV" message sets the filesystem locking protocol version as
  58. * negotiated by the client. The client negotiates based on the maximum
  59. * version advertised in /sys/fs/ocfs2/max_locking_protocol. The major
  60. * number from the "SETV" message must match
  61. * ocfs2_user_plugin.sp_max_proto.pv_major, and the minor number
  62. * must be less than or equal to ...sp_max_version.pv_minor.
  63. *
  64. * Once this information has been set, mounts will be allowed. From this
  65. * point on, the "DOWN" message can be sent for node down notification.
  66. * It has the following syntax:
  67. *
  68. * DOWN<space><32-char-cap-hex-uuid><space><8-char-hex-nodenum><newline>
  69. *
  70. * eg:
  71. *
  72. * DOWN 632A924FDD844190BDA93C0DF6B94899 00000001\n
  73. *
  74. * This is 47 characters.
  75. */
  76. /*
  77. * Whether or not the client has done the handshake.
  78. * For now, we have just one protocol version.
  79. */
  80. #define OCFS2_CONTROL_PROTO "T01\n"
  81. #define OCFS2_CONTROL_PROTO_LEN 4
  82. /* Handshake states */
  83. #define OCFS2_CONTROL_HANDSHAKE_INVALID (0)
  84. #define OCFS2_CONTROL_HANDSHAKE_READ (1)
  85. #define OCFS2_CONTROL_HANDSHAKE_PROTOCOL (2)
  86. #define OCFS2_CONTROL_HANDSHAKE_VALID (3)
  87. /* Messages */
  88. #define OCFS2_CONTROL_MESSAGE_OP_LEN 4
  89. #define OCFS2_CONTROL_MESSAGE_SETNODE_OP "SETN"
  90. #define OCFS2_CONTROL_MESSAGE_SETNODE_TOTAL_LEN 14
  91. #define OCFS2_CONTROL_MESSAGE_SETVERSION_OP "SETV"
  92. #define OCFS2_CONTROL_MESSAGE_SETVERSION_TOTAL_LEN 11
  93. #define OCFS2_CONTROL_MESSAGE_DOWN_OP "DOWN"
  94. #define OCFS2_CONTROL_MESSAGE_DOWN_TOTAL_LEN 47
  95. #define OCFS2_TEXT_UUID_LEN 32
  96. #define OCFS2_CONTROL_MESSAGE_VERNUM_LEN 2
  97. #define OCFS2_CONTROL_MESSAGE_NODENUM_LEN 8
  98. /*
  99. * ocfs2_live_connection is refcounted because the filesystem and
  100. * miscdevice sides can detach in different order. Let's just be safe.
  101. */
  102. struct ocfs2_live_connection {
  103. struct list_head oc_list;
  104. struct ocfs2_cluster_connection *oc_conn;
  105. atomic_t oc_this_node;
  106. int oc_our_slot;
  107. };
  108. struct ocfs2_control_private {
  109. struct list_head op_list;
  110. int op_state;
  111. int op_this_node;
  112. struct ocfs2_protocol_version op_proto;
  113. };
  114. /* SETN<space><8-char-hex-nodenum><newline> */
  115. struct ocfs2_control_message_setn {
  116. char tag[OCFS2_CONTROL_MESSAGE_OP_LEN];
  117. char space;
  118. char nodestr[OCFS2_CONTROL_MESSAGE_NODENUM_LEN];
  119. char newline;
  120. };
  121. /* SETV<space><2-char-hex-major><space><2-char-hex-minor><newline> */
  122. struct ocfs2_control_message_setv {
  123. char tag[OCFS2_CONTROL_MESSAGE_OP_LEN];
  124. char space1;
  125. char major[OCFS2_CONTROL_MESSAGE_VERNUM_LEN];
  126. char space2;
  127. char minor[OCFS2_CONTROL_MESSAGE_VERNUM_LEN];
  128. char newline;
  129. };
  130. /* DOWN<space><32-char-cap-hex-uuid><space><8-char-hex-nodenum><newline> */
  131. struct ocfs2_control_message_down {
  132. char tag[OCFS2_CONTROL_MESSAGE_OP_LEN];
  133. char space1;
  134. char uuid[OCFS2_TEXT_UUID_LEN];
  135. char space2;
  136. char nodestr[OCFS2_CONTROL_MESSAGE_NODENUM_LEN];
  137. char newline;
  138. };
  139. union ocfs2_control_message {
  140. char tag[OCFS2_CONTROL_MESSAGE_OP_LEN];
  141. struct ocfs2_control_message_setn u_setn;
  142. struct ocfs2_control_message_setv u_setv;
  143. struct ocfs2_control_message_down u_down;
  144. };
  145. static struct ocfs2_stack_plugin ocfs2_user_plugin;
  146. static atomic_t ocfs2_control_opened;
  147. static int ocfs2_control_this_node = -1;
  148. static struct ocfs2_protocol_version running_proto;
  149. static LIST_HEAD(ocfs2_live_connection_list);
  150. static LIST_HEAD(ocfs2_control_private_list);
  151. static DEFINE_MUTEX(ocfs2_control_lock);
  152. static inline void ocfs2_control_set_handshake_state(struct file *file,
  153. int state)
  154. {
  155. struct ocfs2_control_private *p = file->private_data;
  156. p->op_state = state;
  157. }
  158. static inline int ocfs2_control_get_handshake_state(struct file *file)
  159. {
  160. struct ocfs2_control_private *p = file->private_data;
  161. return p->op_state;
  162. }
  163. static struct ocfs2_live_connection *ocfs2_connection_find(const char *name)
  164. {
  165. size_t len = strlen(name);
  166. struct ocfs2_live_connection *c;
  167. BUG_ON(!mutex_is_locked(&ocfs2_control_lock));
  168. list_for_each_entry(c, &ocfs2_live_connection_list, oc_list) {
  169. if ((c->oc_conn->cc_namelen == len) &&
  170. !strncmp(c->oc_conn->cc_name, name, len))
  171. return c;
  172. }
  173. return NULL;
  174. }
  175. /*
  176. * ocfs2_live_connection structures are created underneath the ocfs2
  177. * mount path. Since the VFS prevents multiple calls to
  178. * fill_super(), we can't get dupes here.
  179. */
  180. static int ocfs2_live_connection_attach(struct ocfs2_cluster_connection *conn,
  181. struct ocfs2_live_connection *c)
  182. {
  183. int rc = 0;
  184. mutex_lock(&ocfs2_control_lock);
  185. c->oc_conn = conn;
  186. if (atomic_read(&ocfs2_control_opened))
  187. list_add(&c->oc_list, &ocfs2_live_connection_list);
  188. else {
  189. printk(KERN_ERR
  190. "ocfs2: Userspace control daemon is not present\n");
  191. rc = -ESRCH;
  192. }
  193. mutex_unlock(&ocfs2_control_lock);
  194. return rc;
  195. }
  196. /*
  197. * This function disconnects the cluster connection from ocfs2_control.
  198. * Afterwards, userspace can't affect the cluster connection.
  199. */
  200. static void ocfs2_live_connection_drop(struct ocfs2_live_connection *c)
  201. {
  202. mutex_lock(&ocfs2_control_lock);
  203. list_del_init(&c->oc_list);
  204. c->oc_conn = NULL;
  205. mutex_unlock(&ocfs2_control_lock);
  206. kfree(c);
  207. }
  208. static int ocfs2_control_cfu(void *target, size_t target_len,
  209. const char __user *buf, size_t count)
  210. {
  211. /* The T01 expects write(2) calls to have exactly one command */
  212. if ((count != target_len) ||
  213. (count > sizeof(union ocfs2_control_message)))
  214. return -EINVAL;
  215. if (copy_from_user(target, buf, target_len))
  216. return -EFAULT;
  217. return 0;
  218. }
  219. static ssize_t ocfs2_control_validate_protocol(struct file *file,
  220. const char __user *buf,
  221. size_t count)
  222. {
  223. ssize_t ret;
  224. char kbuf[OCFS2_CONTROL_PROTO_LEN];
  225. ret = ocfs2_control_cfu(kbuf, OCFS2_CONTROL_PROTO_LEN,
  226. buf, count);
  227. if (ret)
  228. return ret;
  229. if (strncmp(kbuf, OCFS2_CONTROL_PROTO, OCFS2_CONTROL_PROTO_LEN))
  230. return -EINVAL;
  231. ocfs2_control_set_handshake_state(file,
  232. OCFS2_CONTROL_HANDSHAKE_PROTOCOL);
  233. return count;
  234. }
  235. static void ocfs2_control_send_down(const char *uuid,
  236. int nodenum)
  237. {
  238. struct ocfs2_live_connection *c;
  239. mutex_lock(&ocfs2_control_lock);
  240. c = ocfs2_connection_find(uuid);
  241. if (c) {
  242. BUG_ON(c->oc_conn == NULL);
  243. c->oc_conn->cc_recovery_handler(nodenum,
  244. c->oc_conn->cc_recovery_data);
  245. }
  246. mutex_unlock(&ocfs2_control_lock);
  247. }
  248. /*
  249. * Called whenever configuration elements are sent to /dev/ocfs2_control.
  250. * If all configuration elements are present, try to set the global
  251. * values. If there is a problem, return an error. Skip any missing
  252. * elements, and only bump ocfs2_control_opened when we have all elements
  253. * and are successful.
  254. */
  255. static int ocfs2_control_install_private(struct file *file)
  256. {
  257. int rc = 0;
  258. int set_p = 1;
  259. struct ocfs2_control_private *p = file->private_data;
  260. BUG_ON(p->op_state != OCFS2_CONTROL_HANDSHAKE_PROTOCOL);
  261. mutex_lock(&ocfs2_control_lock);
  262. if (p->op_this_node < 0) {
  263. set_p = 0;
  264. } else if ((ocfs2_control_this_node >= 0) &&
  265. (ocfs2_control_this_node != p->op_this_node)) {
  266. rc = -EINVAL;
  267. goto out_unlock;
  268. }
  269. if (!p->op_proto.pv_major) {
  270. set_p = 0;
  271. } else if (!list_empty(&ocfs2_live_connection_list) &&
  272. ((running_proto.pv_major != p->op_proto.pv_major) ||
  273. (running_proto.pv_minor != p->op_proto.pv_minor))) {
  274. rc = -EINVAL;
  275. goto out_unlock;
  276. }
  277. if (set_p) {
  278. ocfs2_control_this_node = p->op_this_node;
  279. running_proto.pv_major = p->op_proto.pv_major;
  280. running_proto.pv_minor = p->op_proto.pv_minor;
  281. }
  282. out_unlock:
  283. mutex_unlock(&ocfs2_control_lock);
  284. if (!rc && set_p) {
  285. /* We set the global values successfully */
  286. atomic_inc(&ocfs2_control_opened);
  287. ocfs2_control_set_handshake_state(file,
  288. OCFS2_CONTROL_HANDSHAKE_VALID);
  289. }
  290. return rc;
  291. }
  292. static int ocfs2_control_get_this_node(void)
  293. {
  294. int rc;
  295. mutex_lock(&ocfs2_control_lock);
  296. if (ocfs2_control_this_node < 0)
  297. rc = -EINVAL;
  298. else
  299. rc = ocfs2_control_this_node;
  300. mutex_unlock(&ocfs2_control_lock);
  301. return rc;
  302. }
  303. static int ocfs2_control_do_setnode_msg(struct file *file,
  304. struct ocfs2_control_message_setn *msg)
  305. {
  306. long nodenum;
  307. char *ptr = NULL;
  308. struct ocfs2_control_private *p = file->private_data;
  309. if (ocfs2_control_get_handshake_state(file) !=
  310. OCFS2_CONTROL_HANDSHAKE_PROTOCOL)
  311. return -EINVAL;
  312. if (strncmp(msg->tag, OCFS2_CONTROL_MESSAGE_SETNODE_OP,
  313. OCFS2_CONTROL_MESSAGE_OP_LEN))
  314. return -EINVAL;
  315. if ((msg->space != ' ') || (msg->newline != '\n'))
  316. return -EINVAL;
  317. msg->space = msg->newline = '\0';
  318. nodenum = simple_strtol(msg->nodestr, &ptr, 16);
  319. if (!ptr || *ptr)
  320. return -EINVAL;
  321. if ((nodenum == LONG_MIN) || (nodenum == LONG_MAX) ||
  322. (nodenum > INT_MAX) || (nodenum < 0))
  323. return -ERANGE;
  324. p->op_this_node = nodenum;
  325. return ocfs2_control_install_private(file);
  326. }
  327. static int ocfs2_control_do_setversion_msg(struct file *file,
  328. struct ocfs2_control_message_setv *msg)
  329. {
  330. long major, minor;
  331. char *ptr = NULL;
  332. struct ocfs2_control_private *p = file->private_data;
  333. struct ocfs2_protocol_version *max =
  334. &ocfs2_user_plugin.sp_max_proto;
  335. if (ocfs2_control_get_handshake_state(file) !=
  336. OCFS2_CONTROL_HANDSHAKE_PROTOCOL)
  337. return -EINVAL;
  338. if (strncmp(msg->tag, OCFS2_CONTROL_MESSAGE_SETVERSION_OP,
  339. OCFS2_CONTROL_MESSAGE_OP_LEN))
  340. return -EINVAL;
  341. if ((msg->space1 != ' ') || (msg->space2 != ' ') ||
  342. (msg->newline != '\n'))
  343. return -EINVAL;
  344. msg->space1 = msg->space2 = msg->newline = '\0';
  345. major = simple_strtol(msg->major, &ptr, 16);
  346. if (!ptr || *ptr)
  347. return -EINVAL;
  348. minor = simple_strtol(msg->minor, &ptr, 16);
  349. if (!ptr || *ptr)
  350. return -EINVAL;
  351. /*
  352. * The major must be between 1 and 255, inclusive. The minor
  353. * must be between 0 and 255, inclusive. The version passed in
  354. * must be within the maximum version supported by the filesystem.
  355. */
  356. if ((major == LONG_MIN) || (major == LONG_MAX) ||
  357. (major > (u8)-1) || (major < 1))
  358. return -ERANGE;
  359. if ((minor == LONG_MIN) || (minor == LONG_MAX) ||
  360. (minor > (u8)-1) || (minor < 0))
  361. return -ERANGE;
  362. if ((major != max->pv_major) ||
  363. (minor > max->pv_minor))
  364. return -EINVAL;
  365. p->op_proto.pv_major = major;
  366. p->op_proto.pv_minor = minor;
  367. return ocfs2_control_install_private(file);
  368. }
  369. static int ocfs2_control_do_down_msg(struct file *file,
  370. struct ocfs2_control_message_down *msg)
  371. {
  372. long nodenum;
  373. char *p = NULL;
  374. if (ocfs2_control_get_handshake_state(file) !=
  375. OCFS2_CONTROL_HANDSHAKE_VALID)
  376. return -EINVAL;
  377. if (strncmp(msg->tag, OCFS2_CONTROL_MESSAGE_DOWN_OP,
  378. OCFS2_CONTROL_MESSAGE_OP_LEN))
  379. return -EINVAL;
  380. if ((msg->space1 != ' ') || (msg->space2 != ' ') ||
  381. (msg->newline != '\n'))
  382. return -EINVAL;
  383. msg->space1 = msg->space2 = msg->newline = '\0';
  384. nodenum = simple_strtol(msg->nodestr, &p, 16);
  385. if (!p || *p)
  386. return -EINVAL;
  387. if ((nodenum == LONG_MIN) || (nodenum == LONG_MAX) ||
  388. (nodenum > INT_MAX) || (nodenum < 0))
  389. return -ERANGE;
  390. ocfs2_control_send_down(msg->uuid, nodenum);
  391. return 0;
  392. }
  393. static ssize_t ocfs2_control_message(struct file *file,
  394. const char __user *buf,
  395. size_t count)
  396. {
  397. ssize_t ret;
  398. union ocfs2_control_message msg;
  399. /* Try to catch padding issues */
  400. WARN_ON(offsetof(struct ocfs2_control_message_down, uuid) !=
  401. (sizeof(msg.u_down.tag) + sizeof(msg.u_down.space1)));
  402. memset(&msg, 0, sizeof(union ocfs2_control_message));
  403. ret = ocfs2_control_cfu(&msg, count, buf, count);
  404. if (ret)
  405. goto out;
  406. if ((count == OCFS2_CONTROL_MESSAGE_SETNODE_TOTAL_LEN) &&
  407. !strncmp(msg.tag, OCFS2_CONTROL_MESSAGE_SETNODE_OP,
  408. OCFS2_CONTROL_MESSAGE_OP_LEN))
  409. ret = ocfs2_control_do_setnode_msg(file, &msg.u_setn);
  410. else if ((count == OCFS2_CONTROL_MESSAGE_SETVERSION_TOTAL_LEN) &&
  411. !strncmp(msg.tag, OCFS2_CONTROL_MESSAGE_SETVERSION_OP,
  412. OCFS2_CONTROL_MESSAGE_OP_LEN))
  413. ret = ocfs2_control_do_setversion_msg(file, &msg.u_setv);
  414. else if ((count == OCFS2_CONTROL_MESSAGE_DOWN_TOTAL_LEN) &&
  415. !strncmp(msg.tag, OCFS2_CONTROL_MESSAGE_DOWN_OP,
  416. OCFS2_CONTROL_MESSAGE_OP_LEN))
  417. ret = ocfs2_control_do_down_msg(file, &msg.u_down);
  418. else
  419. ret = -EINVAL;
  420. out:
  421. return ret ? ret : count;
  422. }
  423. static ssize_t ocfs2_control_write(struct file *file,
  424. const char __user *buf,
  425. size_t count,
  426. loff_t *ppos)
  427. {
  428. ssize_t ret;
  429. switch (ocfs2_control_get_handshake_state(file)) {
  430. case OCFS2_CONTROL_HANDSHAKE_INVALID:
  431. ret = -EINVAL;
  432. break;
  433. case OCFS2_CONTROL_HANDSHAKE_READ:
  434. ret = ocfs2_control_validate_protocol(file, buf,
  435. count);
  436. break;
  437. case OCFS2_CONTROL_HANDSHAKE_PROTOCOL:
  438. case OCFS2_CONTROL_HANDSHAKE_VALID:
  439. ret = ocfs2_control_message(file, buf, count);
  440. break;
  441. default:
  442. BUG();
  443. ret = -EIO;
  444. break;
  445. }
  446. return ret;
  447. }
  448. /*
  449. * This is a naive version. If we ever have a new protocol, we'll expand
  450. * it. Probably using seq_file.
  451. */
  452. static ssize_t ocfs2_control_read(struct file *file,
  453. char __user *buf,
  454. size_t count,
  455. loff_t *ppos)
  456. {
  457. ssize_t ret;
  458. ret = simple_read_from_buffer(buf, count, ppos,
  459. OCFS2_CONTROL_PROTO, OCFS2_CONTROL_PROTO_LEN);
  460. /* Have we read the whole protocol list? */
  461. if (ret > 0 && *ppos >= OCFS2_CONTROL_PROTO_LEN)
  462. ocfs2_control_set_handshake_state(file,
  463. OCFS2_CONTROL_HANDSHAKE_READ);
  464. return ret;
  465. }
  466. static int ocfs2_control_release(struct inode *inode, struct file *file)
  467. {
  468. struct ocfs2_control_private *p = file->private_data;
  469. mutex_lock(&ocfs2_control_lock);
  470. if (ocfs2_control_get_handshake_state(file) !=
  471. OCFS2_CONTROL_HANDSHAKE_VALID)
  472. goto out;
  473. if (atomic_dec_and_test(&ocfs2_control_opened)) {
  474. if (!list_empty(&ocfs2_live_connection_list)) {
  475. /* XXX: Do bad things! */
  476. printk(KERN_ERR
  477. "ocfs2: Unexpected release of ocfs2_control!\n"
  478. " Loss of cluster connection requires "
  479. "an emergency restart!\n");
  480. emergency_restart();
  481. }
  482. /*
  483. * Last valid close clears the node number and resets
  484. * the locking protocol version
  485. */
  486. ocfs2_control_this_node = -1;
  487. running_proto.pv_major = 0;
  488. running_proto.pv_major = 0;
  489. }
  490. out:
  491. list_del_init(&p->op_list);
  492. file->private_data = NULL;
  493. mutex_unlock(&ocfs2_control_lock);
  494. kfree(p);
  495. return 0;
  496. }
  497. static int ocfs2_control_open(struct inode *inode, struct file *file)
  498. {
  499. struct ocfs2_control_private *p;
  500. p = kzalloc(sizeof(struct ocfs2_control_private), GFP_KERNEL);
  501. if (!p)
  502. return -ENOMEM;
  503. p->op_this_node = -1;
  504. mutex_lock(&ocfs2_control_lock);
  505. file->private_data = p;
  506. list_add(&p->op_list, &ocfs2_control_private_list);
  507. mutex_unlock(&ocfs2_control_lock);
  508. return 0;
  509. }
  510. static const struct file_operations ocfs2_control_fops = {
  511. .open = ocfs2_control_open,
  512. .release = ocfs2_control_release,
  513. .read = ocfs2_control_read,
  514. .write = ocfs2_control_write,
  515. .owner = THIS_MODULE,
  516. .llseek = default_llseek,
  517. };
  518. static struct miscdevice ocfs2_control_device = {
  519. .minor = MISC_DYNAMIC_MINOR,
  520. .name = "ocfs2_control",
  521. .fops = &ocfs2_control_fops,
  522. };
  523. static int ocfs2_control_init(void)
  524. {
  525. int rc;
  526. atomic_set(&ocfs2_control_opened, 0);
  527. rc = misc_register(&ocfs2_control_device);
  528. if (rc)
  529. printk(KERN_ERR
  530. "ocfs2: Unable to register ocfs2_control device "
  531. "(errno %d)\n",
  532. -rc);
  533. return rc;
  534. }
  535. static void ocfs2_control_exit(void)
  536. {
  537. int rc;
  538. rc = misc_deregister(&ocfs2_control_device);
  539. if (rc)
  540. printk(KERN_ERR
  541. "ocfs2: Unable to deregister ocfs2_control device "
  542. "(errno %d)\n",
  543. -rc);
  544. }
  545. static void fsdlm_lock_ast_wrapper(void *astarg)
  546. {
  547. struct ocfs2_dlm_lksb *lksb = astarg;
  548. int status = lksb->lksb_fsdlm.sb_status;
  549. /*
  550. * For now we're punting on the issue of other non-standard errors
  551. * where we can't tell if the unlock_ast or lock_ast should be called.
  552. * The main "other error" that's possible is EINVAL which means the
  553. * function was called with invalid args, which shouldn't be possible
  554. * since the caller here is under our control. Other non-standard
  555. * errors probably fall into the same category, or otherwise are fatal
  556. * which means we can't carry on anyway.
  557. */
  558. if (status == -DLM_EUNLOCK || status == -DLM_ECANCEL)
  559. lksb->lksb_conn->cc_proto->lp_unlock_ast(lksb, 0);
  560. else
  561. lksb->lksb_conn->cc_proto->lp_lock_ast(lksb);
  562. }
  563. static void fsdlm_blocking_ast_wrapper(void *astarg, int level)
  564. {
  565. struct ocfs2_dlm_lksb *lksb = astarg;
  566. lksb->lksb_conn->cc_proto->lp_blocking_ast(lksb, level);
  567. }
  568. static int user_dlm_lock(struct ocfs2_cluster_connection *conn,
  569. int mode,
  570. struct ocfs2_dlm_lksb *lksb,
  571. u32 flags,
  572. void *name,
  573. unsigned int namelen)
  574. {
  575. int ret;
  576. if (!lksb->lksb_fsdlm.sb_lvbptr)
  577. lksb->lksb_fsdlm.sb_lvbptr = (char *)lksb +
  578. sizeof(struct dlm_lksb);
  579. ret = dlm_lock(conn->cc_lockspace, mode, &lksb->lksb_fsdlm,
  580. flags|DLM_LKF_NODLCKWT, name, namelen, 0,
  581. fsdlm_lock_ast_wrapper, lksb,
  582. fsdlm_blocking_ast_wrapper);
  583. return ret;
  584. }
  585. static int user_dlm_unlock(struct ocfs2_cluster_connection *conn,
  586. struct ocfs2_dlm_lksb *lksb,
  587. u32 flags)
  588. {
  589. int ret;
  590. ret = dlm_unlock(conn->cc_lockspace, lksb->lksb_fsdlm.sb_lkid,
  591. flags, &lksb->lksb_fsdlm, lksb);
  592. return ret;
  593. }
  594. static int user_dlm_lock_status(struct ocfs2_dlm_lksb *lksb)
  595. {
  596. return lksb->lksb_fsdlm.sb_status;
  597. }
  598. static int user_dlm_lvb_valid(struct ocfs2_dlm_lksb *lksb)
  599. {
  600. int invalid = lksb->lksb_fsdlm.sb_flags & DLM_SBF_VALNOTVALID;
  601. return !invalid;
  602. }
  603. static void *user_dlm_lvb(struct ocfs2_dlm_lksb *lksb)
  604. {
  605. if (!lksb->lksb_fsdlm.sb_lvbptr)
  606. lksb->lksb_fsdlm.sb_lvbptr = (char *)lksb +
  607. sizeof(struct dlm_lksb);
  608. return (void *)(lksb->lksb_fsdlm.sb_lvbptr);
  609. }
  610. static void user_dlm_dump_lksb(struct ocfs2_dlm_lksb *lksb)
  611. {
  612. }
  613. static int user_plock(struct ocfs2_cluster_connection *conn,
  614. u64 ino,
  615. struct file *file,
  616. int cmd,
  617. struct file_lock *fl)
  618. {
  619. /*
  620. * This more or less just demuxes the plock request into any
  621. * one of three dlm calls.
  622. *
  623. * Internally, fs/dlm will pass these to a misc device, which
  624. * a userspace daemon will read and write to.
  625. *
  626. * For now, cancel requests (which happen internally only),
  627. * are turned into unlocks. Most of this function taken from
  628. * gfs2_lock.
  629. */
  630. if (cmd == F_CANCELLK) {
  631. cmd = F_SETLK;
  632. fl->fl_type = F_UNLCK;
  633. }
  634. if (IS_GETLK(cmd))
  635. return dlm_posix_get(conn->cc_lockspace, ino, file, fl);
  636. else if (fl->fl_type == F_UNLCK)
  637. return dlm_posix_unlock(conn->cc_lockspace, ino, file, fl);
  638. else
  639. return dlm_posix_lock(conn->cc_lockspace, ino, file, cmd, fl);
  640. }
  641. /*
  642. * Compare a requested locking protocol version against the current one.
  643. *
  644. * If the major numbers are different, they are incompatible.
  645. * If the current minor is greater than the request, they are incompatible.
  646. * If the current minor is less than or equal to the request, they are
  647. * compatible, and the requester should run at the current minor version.
  648. */
  649. static int fs_protocol_compare(struct ocfs2_protocol_version *existing,
  650. struct ocfs2_protocol_version *request)
  651. {
  652. if (existing->pv_major != request->pv_major)
  653. return 1;
  654. if (existing->pv_minor > request->pv_minor)
  655. return 1;
  656. if (existing->pv_minor < request->pv_minor)
  657. request->pv_minor = existing->pv_minor;
  658. return 0;
  659. }
  660. static void user_recover_prep(void *arg)
  661. {
  662. }
  663. static void user_recover_slot(void *arg, struct dlm_slot *slot)
  664. {
  665. struct ocfs2_cluster_connection *conn = arg;
  666. printk(KERN_INFO "ocfs2: Node %d/%d down. Initiating recovery.\n",
  667. slot->nodeid, slot->slot);
  668. conn->cc_recovery_handler(slot->nodeid, conn->cc_recovery_data);
  669. }
  670. static void user_recover_done(void *arg, struct dlm_slot *slots,
  671. int num_slots, int our_slot,
  672. uint32_t generation)
  673. {
  674. struct ocfs2_cluster_connection *conn = arg;
  675. struct ocfs2_live_connection *lc = conn->cc_private;
  676. int i;
  677. for (i = 0; i < num_slots; i++)
  678. if (slots[i].slot == our_slot) {
  679. atomic_set(&lc->oc_this_node, slots[i].nodeid);
  680. break;
  681. }
  682. lc->oc_our_slot = our_slot;
  683. }
  684. const struct dlm_lockspace_ops ocfs2_ls_ops = {
  685. .recover_prep = user_recover_prep,
  686. .recover_slot = user_recover_slot,
  687. .recover_done = user_recover_done,
  688. };
  689. static int user_cluster_connect(struct ocfs2_cluster_connection *conn)
  690. {
  691. dlm_lockspace_t *fsdlm;
  692. struct ocfs2_live_connection *lc;
  693. int rc;
  694. BUG_ON(conn == NULL);
  695. lc = kzalloc(sizeof(struct ocfs2_live_connection), GFP_KERNEL);
  696. if (!lc) {
  697. rc = -ENOMEM;
  698. goto out;
  699. }
  700. rc = ocfs2_live_connection_attach(conn, lc);
  701. if (rc)
  702. goto out;
  703. /*
  704. * running_proto must have been set before we allowed any mounts
  705. * to proceed.
  706. */
  707. if (fs_protocol_compare(&running_proto, &conn->cc_version)) {
  708. printk(KERN_ERR
  709. "Unable to mount with fs locking protocol version "
  710. "%u.%u because the userspace control daemon has "
  711. "negotiated %u.%u\n",
  712. conn->cc_version.pv_major, conn->cc_version.pv_minor,
  713. running_proto.pv_major, running_proto.pv_minor);
  714. rc = -EPROTO;
  715. ocfs2_live_connection_drop(lc);
  716. lc = NULL;
  717. goto out;
  718. }
  719. rc = dlm_new_lockspace(conn->cc_name, NULL, DLM_LSFL_FS, DLM_LVB_LEN,
  720. NULL, NULL, NULL, &fsdlm);
  721. if (rc) {
  722. ocfs2_live_connection_drop(lc);
  723. lc = NULL;
  724. goto out;
  725. }
  726. conn->cc_private = lc;
  727. conn->cc_lockspace = fsdlm;
  728. out:
  729. if (rc && lc)
  730. kfree(lc);
  731. return rc;
  732. }
  733. static int user_cluster_disconnect(struct ocfs2_cluster_connection *conn)
  734. {
  735. dlm_release_lockspace(conn->cc_lockspace, 2);
  736. conn->cc_lockspace = NULL;
  737. ocfs2_live_connection_drop(conn->cc_private);
  738. conn->cc_private = NULL;
  739. return 0;
  740. }
  741. static int user_cluster_this_node(unsigned int *this_node)
  742. {
  743. int rc;
  744. rc = ocfs2_control_get_this_node();
  745. if (rc < 0)
  746. return rc;
  747. *this_node = rc;
  748. return 0;
  749. }
  750. static struct ocfs2_stack_operations ocfs2_user_plugin_ops = {
  751. .connect = user_cluster_connect,
  752. .disconnect = user_cluster_disconnect,
  753. .this_node = user_cluster_this_node,
  754. .dlm_lock = user_dlm_lock,
  755. .dlm_unlock = user_dlm_unlock,
  756. .lock_status = user_dlm_lock_status,
  757. .lvb_valid = user_dlm_lvb_valid,
  758. .lock_lvb = user_dlm_lvb,
  759. .plock = user_plock,
  760. .dump_lksb = user_dlm_dump_lksb,
  761. };
  762. static struct ocfs2_stack_plugin ocfs2_user_plugin = {
  763. .sp_name = "user",
  764. .sp_ops = &ocfs2_user_plugin_ops,
  765. .sp_owner = THIS_MODULE,
  766. };
  767. static int __init ocfs2_user_plugin_init(void)
  768. {
  769. int rc;
  770. rc = ocfs2_control_init();
  771. if (!rc) {
  772. rc = ocfs2_stack_glue_register(&ocfs2_user_plugin);
  773. if (rc)
  774. ocfs2_control_exit();
  775. }
  776. return rc;
  777. }
  778. static void __exit ocfs2_user_plugin_exit(void)
  779. {
  780. ocfs2_stack_glue_unregister(&ocfs2_user_plugin);
  781. ocfs2_control_exit();
  782. }
  783. MODULE_AUTHOR("Oracle");
  784. MODULE_DESCRIPTION("ocfs2 driver for userspace cluster stacks");
  785. MODULE_LICENSE("GPL");
  786. module_init(ocfs2_user_plugin_init);
  787. module_exit(ocfs2_user_plugin_exit);