server.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590
  1. /* AFS server record management
  2. *
  3. * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
  4. * Written by David Howells (dhowells@redhat.com)
  5. *
  6. * This program is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU General Public License
  8. * as published by the Free Software Foundation; either version
  9. * 2 of the License, or (at your option) any later version.
  10. */
  11. #include <linux/sched.h>
  12. #include <linux/slab.h>
  13. #include "afs_fs.h"
  14. #include "internal.h"
  15. #include "protocol_yfs.h"
  16. static unsigned afs_server_gc_delay = 10; /* Server record timeout in seconds */
  17. static unsigned afs_server_update_delay = 30; /* Time till VLDB recheck in secs */
  18. static void afs_inc_servers_outstanding(struct afs_net *net)
  19. {
  20. atomic_inc(&net->servers_outstanding);
  21. }
  22. static void afs_dec_servers_outstanding(struct afs_net *net)
  23. {
  24. if (atomic_dec_and_test(&net->servers_outstanding))
  25. wake_up_var(&net->servers_outstanding);
  26. }
  27. /*
  28. * Find a server by one of its addresses.
  29. */
  30. struct afs_server *afs_find_server(struct afs_net *net,
  31. const struct sockaddr_rxrpc *srx)
  32. {
  33. const struct sockaddr_in6 *a = &srx->transport.sin6, *b;
  34. const struct afs_addr_list *alist;
  35. struct afs_server *server = NULL;
  36. unsigned int i;
  37. bool ipv6 = true;
  38. int seq = 0, diff;
  39. if (srx->transport.sin6.sin6_addr.s6_addr32[0] == 0 ||
  40. srx->transport.sin6.sin6_addr.s6_addr32[1] == 0 ||
  41. srx->transport.sin6.sin6_addr.s6_addr32[2] == htonl(0xffff))
  42. ipv6 = false;
  43. rcu_read_lock();
  44. do {
  45. if (server)
  46. afs_put_server(net, server);
  47. server = NULL;
  48. read_seqbegin_or_lock(&net->fs_addr_lock, &seq);
  49. if (ipv6) {
  50. hlist_for_each_entry_rcu(server, &net->fs_addresses6, addr6_link) {
  51. alist = rcu_dereference(server->addresses);
  52. for (i = alist->nr_ipv4; i < alist->nr_addrs; i++) {
  53. b = &alist->addrs[i].transport.sin6;
  54. diff = ((u16 __force)a->sin6_port -
  55. (u16 __force)b->sin6_port);
  56. if (diff == 0)
  57. diff = memcmp(&a->sin6_addr,
  58. &b->sin6_addr,
  59. sizeof(struct in6_addr));
  60. if (diff == 0)
  61. goto found;
  62. }
  63. }
  64. } else {
  65. hlist_for_each_entry_rcu(server, &net->fs_addresses4, addr4_link) {
  66. alist = rcu_dereference(server->addresses);
  67. for (i = 0; i < alist->nr_ipv4; i++) {
  68. b = &alist->addrs[i].transport.sin6;
  69. diff = ((u16 __force)a->sin6_port -
  70. (u16 __force)b->sin6_port);
  71. if (diff == 0)
  72. diff = ((u32 __force)a->sin6_addr.s6_addr32[3] -
  73. (u32 __force)b->sin6_addr.s6_addr32[3]);
  74. if (diff == 0)
  75. goto found;
  76. }
  77. }
  78. }
  79. server = NULL;
  80. found:
  81. if (server && !atomic_inc_not_zero(&server->usage))
  82. server = NULL;
  83. } while (need_seqretry(&net->fs_addr_lock, seq));
  84. done_seqretry(&net->fs_addr_lock, seq);
  85. rcu_read_unlock();
  86. return server;
  87. }
  88. /*
  89. * Look up a server by its UUID
  90. */
  91. struct afs_server *afs_find_server_by_uuid(struct afs_net *net, const uuid_t *uuid)
  92. {
  93. struct afs_server *server = NULL;
  94. struct rb_node *p;
  95. int diff, seq = 0;
  96. _enter("%pU", uuid);
  97. do {
  98. /* Unfortunately, rbtree walking doesn't give reliable results
  99. * under just the RCU read lock, so we have to check for
  100. * changes.
  101. */
  102. if (server)
  103. afs_put_server(net, server);
  104. server = NULL;
  105. read_seqbegin_or_lock(&net->fs_lock, &seq);
  106. p = net->fs_servers.rb_node;
  107. while (p) {
  108. server = rb_entry(p, struct afs_server, uuid_rb);
  109. diff = memcmp(uuid, &server->uuid, sizeof(*uuid));
  110. if (diff < 0) {
  111. p = p->rb_left;
  112. } else if (diff > 0) {
  113. p = p->rb_right;
  114. } else {
  115. afs_get_server(server);
  116. break;
  117. }
  118. server = NULL;
  119. }
  120. } while (need_seqretry(&net->fs_lock, seq));
  121. done_seqretry(&net->fs_lock, seq);
  122. _leave(" = %p", server);
  123. return server;
  124. }
  125. /*
  126. * Install a server record in the namespace tree
  127. */
  128. static struct afs_server *afs_install_server(struct afs_net *net,
  129. struct afs_server *candidate)
  130. {
  131. const struct afs_addr_list *alist;
  132. struct afs_server *server;
  133. struct rb_node **pp, *p;
  134. int ret = -EEXIST, diff;
  135. _enter("%p", candidate);
  136. write_seqlock(&net->fs_lock);
  137. /* Firstly install the server in the UUID lookup tree */
  138. pp = &net->fs_servers.rb_node;
  139. p = NULL;
  140. while (*pp) {
  141. p = *pp;
  142. _debug("- consider %p", p);
  143. server = rb_entry(p, struct afs_server, uuid_rb);
  144. diff = memcmp(&candidate->uuid, &server->uuid, sizeof(uuid_t));
  145. if (diff < 0)
  146. pp = &(*pp)->rb_left;
  147. else if (diff > 0)
  148. pp = &(*pp)->rb_right;
  149. else
  150. goto exists;
  151. }
  152. server = candidate;
  153. rb_link_node(&server->uuid_rb, p, pp);
  154. rb_insert_color(&server->uuid_rb, &net->fs_servers);
  155. hlist_add_head_rcu(&server->proc_link, &net->fs_proc);
  156. write_seqlock(&net->fs_addr_lock);
  157. alist = rcu_dereference_protected(server->addresses,
  158. lockdep_is_held(&net->fs_addr_lock.lock));
  159. /* Secondly, if the server has any IPv4 and/or IPv6 addresses, install
  160. * it in the IPv4 and/or IPv6 reverse-map lists.
  161. *
  162. * TODO: For speed we want to use something other than a flat list
  163. * here; even sorting the list in terms of lowest address would help a
  164. * bit, but anything we might want to do gets messy and memory
  165. * intensive.
  166. */
  167. if (alist->nr_ipv4 > 0)
  168. hlist_add_head_rcu(&server->addr4_link, &net->fs_addresses4);
  169. if (alist->nr_addrs > alist->nr_ipv4)
  170. hlist_add_head_rcu(&server->addr6_link, &net->fs_addresses6);
  171. write_sequnlock(&net->fs_addr_lock);
  172. ret = 0;
  173. exists:
  174. afs_get_server(server);
  175. write_sequnlock(&net->fs_lock);
  176. return server;
  177. }
  178. /*
  179. * allocate a new server record
  180. */
  181. static struct afs_server *afs_alloc_server(struct afs_net *net,
  182. const uuid_t *uuid,
  183. struct afs_addr_list *alist)
  184. {
  185. struct afs_server *server;
  186. _enter("");
  187. server = kzalloc(sizeof(struct afs_server), GFP_KERNEL);
  188. if (!server)
  189. goto enomem;
  190. atomic_set(&server->usage, 1);
  191. RCU_INIT_POINTER(server->addresses, alist);
  192. server->addr_version = alist->version;
  193. server->uuid = *uuid;
  194. server->flags = (1UL << AFS_SERVER_FL_NEW);
  195. server->update_at = ktime_get_real_seconds() + afs_server_update_delay;
  196. rwlock_init(&server->fs_lock);
  197. INIT_HLIST_HEAD(&server->cb_volumes);
  198. rwlock_init(&server->cb_break_lock);
  199. init_waitqueue_head(&server->probe_wq);
  200. spin_lock_init(&server->probe_lock);
  201. afs_inc_servers_outstanding(net);
  202. _leave(" = %p", server);
  203. return server;
  204. enomem:
  205. _leave(" = NULL [nomem]");
  206. return NULL;
  207. }
  208. /*
  209. * Look up an address record for a server
  210. */
  211. static struct afs_addr_list *afs_vl_lookup_addrs(struct afs_cell *cell,
  212. struct key *key, const uuid_t *uuid)
  213. {
  214. struct afs_vl_cursor vc;
  215. struct afs_addr_list *alist = NULL;
  216. int ret;
  217. ret = -ERESTARTSYS;
  218. if (afs_begin_vlserver_operation(&vc, cell, key)) {
  219. while (afs_select_vlserver(&vc)) {
  220. if (test_bit(AFS_VLSERVER_FL_IS_YFS, &vc.server->flags))
  221. alist = afs_yfsvl_get_endpoints(&vc, uuid);
  222. else
  223. alist = afs_vl_get_addrs_u(&vc, uuid);
  224. }
  225. ret = afs_end_vlserver_operation(&vc);
  226. }
  227. return ret < 0 ? ERR_PTR(ret) : alist;
  228. }
  229. /*
  230. * Get or create a fileserver record.
  231. */
  232. struct afs_server *afs_lookup_server(struct afs_cell *cell, struct key *key,
  233. const uuid_t *uuid)
  234. {
  235. struct afs_addr_list *alist;
  236. struct afs_server *server, *candidate;
  237. _enter("%p,%pU", cell->net, uuid);
  238. server = afs_find_server_by_uuid(cell->net, uuid);
  239. if (server)
  240. return server;
  241. alist = afs_vl_lookup_addrs(cell, key, uuid);
  242. if (IS_ERR(alist))
  243. return ERR_CAST(alist);
  244. candidate = afs_alloc_server(cell->net, uuid, alist);
  245. if (!candidate) {
  246. afs_put_addrlist(alist);
  247. return ERR_PTR(-ENOMEM);
  248. }
  249. server = afs_install_server(cell->net, candidate);
  250. if (server != candidate) {
  251. afs_put_addrlist(alist);
  252. kfree(candidate);
  253. }
  254. _leave(" = %p{%d}", server, atomic_read(&server->usage));
  255. return server;
  256. }
  257. /*
  258. * Set the server timer to fire after a given delay, assuming it's not already
  259. * set for an earlier time.
  260. */
  261. static void afs_set_server_timer(struct afs_net *net, time64_t delay)
  262. {
  263. if (net->live) {
  264. afs_inc_servers_outstanding(net);
  265. if (timer_reduce(&net->fs_timer, jiffies + delay * HZ))
  266. afs_dec_servers_outstanding(net);
  267. }
  268. }
  269. /*
  270. * Server management timer. We have an increment on fs_outstanding that we
  271. * need to pass along to the work item.
  272. */
  273. void afs_servers_timer(struct timer_list *timer)
  274. {
  275. struct afs_net *net = container_of(timer, struct afs_net, fs_timer);
  276. _enter("");
  277. if (!queue_work(afs_wq, &net->fs_manager))
  278. afs_dec_servers_outstanding(net);
  279. }
  280. /*
  281. * Release a reference on a server record.
  282. */
  283. void afs_put_server(struct afs_net *net, struct afs_server *server)
  284. {
  285. unsigned int usage;
  286. if (!server)
  287. return;
  288. server->put_time = ktime_get_real_seconds();
  289. usage = atomic_dec_return(&server->usage);
  290. _enter("{%u}", usage);
  291. if (likely(usage > 0))
  292. return;
  293. afs_set_server_timer(net, afs_server_gc_delay);
  294. }
  295. static void afs_server_rcu(struct rcu_head *rcu)
  296. {
  297. struct afs_server *server = container_of(rcu, struct afs_server, rcu);
  298. afs_put_addrlist(rcu_access_pointer(server->addresses));
  299. kfree(server);
  300. }
  301. /*
  302. * destroy a dead server
  303. */
  304. static void afs_destroy_server(struct afs_net *net, struct afs_server *server)
  305. {
  306. struct afs_addr_list *alist = rcu_access_pointer(server->addresses);
  307. struct afs_addr_cursor ac = {
  308. .alist = alist,
  309. .index = alist->preferred,
  310. .error = 0,
  311. };
  312. _enter("%p", server);
  313. if (test_bit(AFS_SERVER_FL_MAY_HAVE_CB, &server->flags))
  314. afs_fs_give_up_all_callbacks(net, server, &ac, NULL);
  315. wait_var_event(&server->probe_outstanding,
  316. atomic_read(&server->probe_outstanding) == 0);
  317. call_rcu(&server->rcu, afs_server_rcu);
  318. afs_dec_servers_outstanding(net);
  319. }
  320. /*
  321. * Garbage collect any expired servers.
  322. */
  323. static void afs_gc_servers(struct afs_net *net, struct afs_server *gc_list)
  324. {
  325. struct afs_server *server;
  326. bool deleted;
  327. int usage;
  328. while ((server = gc_list)) {
  329. gc_list = server->gc_next;
  330. write_seqlock(&net->fs_lock);
  331. usage = 1;
  332. deleted = atomic_try_cmpxchg(&server->usage, &usage, 0);
  333. if (deleted) {
  334. rb_erase(&server->uuid_rb, &net->fs_servers);
  335. hlist_del_rcu(&server->proc_link);
  336. }
  337. write_sequnlock(&net->fs_lock);
  338. if (deleted) {
  339. write_seqlock(&net->fs_addr_lock);
  340. if (!hlist_unhashed(&server->addr4_link))
  341. hlist_del_rcu(&server->addr4_link);
  342. if (!hlist_unhashed(&server->addr6_link))
  343. hlist_del_rcu(&server->addr6_link);
  344. write_sequnlock(&net->fs_addr_lock);
  345. afs_destroy_server(net, server);
  346. }
  347. }
  348. }
  349. /*
  350. * Manage the records of servers known to be within a network namespace. This
  351. * includes garbage collecting unused servers.
  352. *
  353. * Note also that we were given an increment on net->servers_outstanding by
  354. * whoever queued us that we need to deal with before returning.
  355. */
  356. void afs_manage_servers(struct work_struct *work)
  357. {
  358. struct afs_net *net = container_of(work, struct afs_net, fs_manager);
  359. struct afs_server *gc_list = NULL;
  360. struct rb_node *cursor;
  361. time64_t now = ktime_get_real_seconds(), next_manage = TIME64_MAX;
  362. bool purging = !net->live;
  363. _enter("");
  364. /* Trawl the server list looking for servers that have expired from
  365. * lack of use.
  366. */
  367. read_seqlock_excl(&net->fs_lock);
  368. for (cursor = rb_first(&net->fs_servers); cursor; cursor = rb_next(cursor)) {
  369. struct afs_server *server =
  370. rb_entry(cursor, struct afs_server, uuid_rb);
  371. int usage = atomic_read(&server->usage);
  372. _debug("manage %pU %u", &server->uuid, usage);
  373. ASSERTCMP(usage, >=, 1);
  374. ASSERTIFCMP(purging, usage, ==, 1);
  375. if (usage == 1) {
  376. time64_t expire_at = server->put_time;
  377. if (!test_bit(AFS_SERVER_FL_VL_FAIL, &server->flags) &&
  378. !test_bit(AFS_SERVER_FL_NOT_FOUND, &server->flags))
  379. expire_at += afs_server_gc_delay;
  380. if (purging || expire_at <= now) {
  381. server->gc_next = gc_list;
  382. gc_list = server;
  383. } else if (expire_at < next_manage) {
  384. next_manage = expire_at;
  385. }
  386. }
  387. }
  388. read_sequnlock_excl(&net->fs_lock);
  389. /* Update the timer on the way out. We have to pass an increment on
  390. * servers_outstanding in the namespace that we are in to the timer or
  391. * the work scheduler.
  392. */
  393. if (!purging && next_manage < TIME64_MAX) {
  394. now = ktime_get_real_seconds();
  395. if (next_manage - now <= 0) {
  396. if (queue_work(afs_wq, &net->fs_manager))
  397. afs_inc_servers_outstanding(net);
  398. } else {
  399. afs_set_server_timer(net, next_manage - now);
  400. }
  401. }
  402. afs_gc_servers(net, gc_list);
  403. afs_dec_servers_outstanding(net);
  404. _leave(" [%d]", atomic_read(&net->servers_outstanding));
  405. }
  406. static void afs_queue_server_manager(struct afs_net *net)
  407. {
  408. afs_inc_servers_outstanding(net);
  409. if (!queue_work(afs_wq, &net->fs_manager))
  410. afs_dec_servers_outstanding(net);
  411. }
  412. /*
  413. * Purge list of servers.
  414. */
  415. void afs_purge_servers(struct afs_net *net)
  416. {
  417. _enter("");
  418. if (del_timer_sync(&net->fs_timer))
  419. atomic_dec(&net->servers_outstanding);
  420. afs_queue_server_manager(net);
  421. _debug("wait");
  422. wait_var_event(&net->servers_outstanding,
  423. !atomic_read(&net->servers_outstanding));
  424. _leave("");
  425. }
  426. /*
  427. * Get an update for a server's address list.
  428. */
  429. static noinline bool afs_update_server_record(struct afs_fs_cursor *fc, struct afs_server *server)
  430. {
  431. struct afs_addr_list *alist, *discard;
  432. _enter("");
  433. alist = afs_vl_lookup_addrs(fc->vnode->volume->cell, fc->key,
  434. &server->uuid);
  435. if (IS_ERR(alist)) {
  436. fc->ac.error = PTR_ERR(alist);
  437. _leave(" = f [%d]", fc->ac.error);
  438. return false;
  439. }
  440. discard = alist;
  441. if (server->addr_version != alist->version) {
  442. write_lock(&server->fs_lock);
  443. discard = rcu_dereference_protected(server->addresses,
  444. lockdep_is_held(&server->fs_lock));
  445. rcu_assign_pointer(server->addresses, alist);
  446. server->addr_version = alist->version;
  447. write_unlock(&server->fs_lock);
  448. }
  449. server->update_at = ktime_get_real_seconds() + afs_server_update_delay;
  450. afs_put_addrlist(discard);
  451. _leave(" = t");
  452. return true;
  453. }
  454. /*
  455. * See if a server's address list needs updating.
  456. */
  457. bool afs_check_server_record(struct afs_fs_cursor *fc, struct afs_server *server)
  458. {
  459. time64_t now = ktime_get_real_seconds();
  460. long diff;
  461. bool success;
  462. int ret, retries = 0;
  463. _enter("");
  464. ASSERT(server);
  465. retry:
  466. diff = READ_ONCE(server->update_at) - now;
  467. if (diff > 0) {
  468. _leave(" = t [not now %ld]", diff);
  469. return true;
  470. }
  471. if (!test_and_set_bit_lock(AFS_SERVER_FL_UPDATING, &server->flags)) {
  472. success = afs_update_server_record(fc, server);
  473. clear_bit_unlock(AFS_SERVER_FL_UPDATING, &server->flags);
  474. wake_up_bit(&server->flags, AFS_SERVER_FL_UPDATING);
  475. _leave(" = %d", success);
  476. return success;
  477. }
  478. ret = wait_on_bit(&server->flags, AFS_SERVER_FL_UPDATING,
  479. TASK_INTERRUPTIBLE);
  480. if (ret == -ERESTARTSYS) {
  481. fc->ac.error = ret;
  482. _leave(" = f [intr]");
  483. return false;
  484. }
  485. retries++;
  486. if (retries == 4) {
  487. _leave(" = f [stale]");
  488. ret = -ESTALE;
  489. return false;
  490. }
  491. goto retry;
  492. }