Browse Source

staging: lustre: delete the filesystem from the tree.

The Lustre filesystem has been in the kernel tree for over 5 years now.
While it has been an endless source of enjoyment for new kernel
developers learning how to do basic codingstyle cleanups, as well as an
semi-entertaining source of bewilderment from the vfs developers any
time they have looked into the codebase to try to figure out how to port
their latest api changes to this filesystem, it has not really moved
forward into the "this is in shape to get out of staging" despite many
half-completed attempts.

And getting code out of staging is the main goal of that portion of the
kernel tree.  Code should not stagnate and it feels like having this
code in staging is only causing the development cycle of the filesystem
to take longer than it should.  There is a whole separate out-of-tree
copy of this codebase where the developers work on it, and then random
changes are thrown over the wall at staging at some later point in time.
This dual-tree development model has never worked, and the state of this
codebase is proof of that.

So, let's just delete the whole mess.  Now the lustre developers can go
off and work in their out-of-tree codebase and not have to worry about
providing valid changelog entries and breaking their patches up into
logical pieces.  They can take the time they have spend doing those
types of housekeeping chores and get the codebase into a much better
shape, and it can be submitted for inclusion into the real part of the
kernel tree when ready.

Cc: Oleg Drokin <oleg.drokin@intel.com>
Cc: Andreas Dilger <andreas.dilger@intel.com>
Cc: James Simmons <jsimmons@infradead.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Greg Kroah-Hartman 7 years ago
parent
commit
be65f9ed26
100 changed files with 0 additions and 61189 deletions
  1. 0 9
      MAINTAINERS
  2. 0 2
      drivers/staging/Kconfig
  3. 0 1
      drivers/staging/Makefile
  4. 0 3
      drivers/staging/lustre/Kconfig
  5. 0 2
      drivers/staging/lustre/Makefile
  6. 0 83
      drivers/staging/lustre/README.txt
  7. 0 302
      drivers/staging/lustre/TODO
  8. 0 76
      drivers/staging/lustre/include/linux/libcfs/libcfs.h
  9. 0 434
      drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h
  10. 0 208
      drivers/staging/lustre/include/linux/libcfs/libcfs_crypto.h
  11. 0 207
      drivers/staging/lustre/include/linux/libcfs/libcfs_debug.h
  12. 0 194
      drivers/staging/lustre/include/linux/libcfs/libcfs_fail.h
  13. 0 869
      drivers/staging/lustre/include/linux/libcfs/libcfs_hash.h
  14. 0 200
      drivers/staging/lustre/include/linux/libcfs/libcfs_private.h
  15. 0 102
      drivers/staging/lustre/include/linux/libcfs/libcfs_string.h
  16. 0 212
      drivers/staging/lustre/include/linux/lnet/api.h
  17. 0 652
      drivers/staging/lustre/include/linux/lnet/lib-lnet.h
  18. 0 666
      drivers/staging/lustre/include/linux/lnet/lib-types.h
  19. 0 87
      drivers/staging/lustre/include/linux/lnet/socklnd.h
  20. 0 149
      drivers/staging/lustre/include/uapi/linux/lnet/libcfs_debug.h
  21. 0 141
      drivers/staging/lustre/include/uapi/linux/lnet/libcfs_ioctl.h
  22. 0 150
      drivers/staging/lustre/include/uapi/linux/lnet/lnet-dlc.h
  23. 0 669
      drivers/staging/lustre/include/uapi/linux/lnet/lnet-types.h
  24. 0 123
      drivers/staging/lustre/include/uapi/linux/lnet/lnetctl.h
  25. 0 556
      drivers/staging/lustre/include/uapi/linux/lnet/lnetst.h
  26. 0 119
      drivers/staging/lustre/include/uapi/linux/lnet/nidstr.h
  27. 0 44
      drivers/staging/lustre/include/uapi/linux/lnet/socklnd.h
  28. 0 261
      drivers/staging/lustre/include/uapi/linux/lustre/lustre_cfg.h
  29. 0 293
      drivers/staging/lustre/include/uapi/linux/lustre/lustre_fid.h
  30. 0 72
      drivers/staging/lustre/include/uapi/linux/lustre/lustre_fiemap.h
  31. 0 2690
      drivers/staging/lustre/include/uapi/linux/lustre/lustre_idl.h
  32. 0 229
      drivers/staging/lustre/include/uapi/linux/lustre/lustre_ioctl.h
  33. 0 94
      drivers/staging/lustre/include/uapi/linux/lustre/lustre_kernelcomm.h
  34. 0 236
      drivers/staging/lustre/include/uapi/linux/lustre/lustre_ostid.h
  35. 0 94
      drivers/staging/lustre/include/uapi/linux/lustre/lustre_param.h
  36. 0 1327
      drivers/staging/lustre/include/uapi/linux/lustre/lustre_user.h
  37. 0 27
      drivers/staging/lustre/include/uapi/linux/lustre/lustre_ver.h
  38. 0 46
      drivers/staging/lustre/lnet/Kconfig
  39. 0 1
      drivers/staging/lustre/lnet/Makefile
  40. 0 1
      drivers/staging/lustre/lnet/klnds/Makefile
  41. 0 5
      drivers/staging/lustre/lnet/klnds/o2iblnd/Makefile
  42. 0 2958
      drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c
  43. 0 1048
      drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h
  44. 0 3763
      drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c
  45. 0 296
      drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_modparams.c
  46. 0 6
      drivers/staging/lustre/lnet/klnds/socklnd/Makefile
  47. 0 2921
      drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c
  48. 0 704
      drivers/staging/lustre/lnet/klnds/socklnd/socklnd.h
  49. 0 2586
      drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c
  50. 0 534
      drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c
  51. 0 184
      drivers/staging/lustre/lnet/klnds/socklnd/socklnd_modparams.c
  52. 0 810
      drivers/staging/lustre/lnet/klnds/socklnd/socklnd_proto.c
  53. 0 16
      drivers/staging/lustre/lnet/libcfs/Makefile
  54. 0 461
      drivers/staging/lustre/lnet/libcfs/debug.c
  55. 0 146
      drivers/staging/lustre/lnet/libcfs/fail.c
  56. 0 2065
      drivers/staging/lustre/lnet/libcfs/hash.c
  57. 0 1086
      drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
  58. 0 155
      drivers/staging/lustre/lnet/libcfs/libcfs_lock.c
  59. 0 171
      drivers/staging/lustre/lnet/libcfs/libcfs_mem.c
  60. 0 562
      drivers/staging/lustre/lnet/libcfs/libcfs_string.c
  61. 0 139
      drivers/staging/lustre/lnet/libcfs/linux-crypto-adler.c
  62. 0 447
      drivers/staging/lustre/lnet/libcfs/linux-crypto.c
  63. 0 30
      drivers/staging/lustre/lnet/libcfs/linux-crypto.h
  64. 0 142
      drivers/staging/lustre/lnet/libcfs/linux-debug.c
  65. 0 258
      drivers/staging/lustre/lnet/libcfs/linux-tracefile.c
  66. 0 758
      drivers/staging/lustre/lnet/libcfs/module.c
  67. 0 1198
      drivers/staging/lustre/lnet/libcfs/tracefile.c
  68. 0 274
      drivers/staging/lustre/lnet/libcfs/tracefile.h
  69. 0 10
      drivers/staging/lustre/lnet/lnet/Makefile
  70. 0 501
      drivers/staging/lustre/lnet/lnet/acceptor.c
  71. 0 2307
      drivers/staging/lustre/lnet/lnet/api-ni.c
  72. 0 1235
      drivers/staging/lustre/lnet/lnet/config.c
  73. 0 426
      drivers/staging/lustre/lnet/lnet/lib-eq.c
  74. 0 463
      drivers/staging/lustre/lnet/lnet/lib-md.c
  75. 0 274
      drivers/staging/lustre/lnet/lnet/lib-me.c
  76. 0 2386
      drivers/staging/lustre/lnet/lnet/lib-move.c
  77. 0 625
      drivers/staging/lustre/lnet/lnet/lib-msg.c
  78. 0 987
      drivers/staging/lustre/lnet/lnet/lib-ptl.c
  79. 0 585
      drivers/staging/lustre/lnet/lnet/lib-socket.c
  80. 0 105
      drivers/staging/lustre/lnet/lnet/lo.c
  81. 0 239
      drivers/staging/lustre/lnet/lnet/module.c
  82. 0 1023
      drivers/staging/lustre/lnet/lnet/net_fault.c
  83. 0 1261
      drivers/staging/lustre/lnet/lnet/nidstrings.c
  84. 0 456
      drivers/staging/lustre/lnet/lnet/peer.c
  85. 0 1799
      drivers/staging/lustre/lnet/lnet/router.c
  86. 0 907
      drivers/staging/lustre/lnet/lnet/router_proc.c
  87. 0 7
      drivers/staging/lustre/lnet/selftest/Makefile
  88. 0 526
      drivers/staging/lustre/lnet/selftest/brw_test.c
  89. 0 801
      drivers/staging/lustre/lnet/selftest/conctl.c
  90. 0 1396
      drivers/staging/lustre/lnet/selftest/conrpc.c
  91. 0 142
      drivers/staging/lustre/lnet/selftest/conrpc.h
  92. 0 2104
      drivers/staging/lustre/lnet/selftest/console.c
  93. 0 244
      drivers/staging/lustre/lnet/selftest/console.h
  94. 0 1786
      drivers/staging/lustre/lnet/selftest/framework.c
  95. 0 169
      drivers/staging/lustre/lnet/selftest/module.c
  96. 0 228
      drivers/staging/lustre/lnet/selftest/ping_test.c
  97. 0 1682
      drivers/staging/lustre/lnet/selftest/rpc.c
  98. 0 295
      drivers/staging/lustre/lnet/selftest/rpc.h
  99. 0 622
      drivers/staging/lustre/lnet/selftest/selftest.h
  100. 0 244
      drivers/staging/lustre/lnet/selftest/timer.c

+ 0 - 9
MAINTAINERS

@@ -13329,15 +13329,6 @@ S:	Odd Fixes
 F:	Documentation/devicetree/bindings/staging/iio/
 F:	drivers/staging/iio/
 
-STAGING - LUSTRE PARALLEL FILESYSTEM
-M:	Oleg Drokin <oleg.drokin@intel.com>
-M:	Andreas Dilger <andreas.dilger@intel.com>
-M:	James Simmons <jsimmons@infradead.org>
-L:	lustre-devel@lists.lustre.org (moderated for non-subscribers)
-W:	http://wiki.lustre.org/
-S:	Maintained
-F:	drivers/staging/lustre
-
 STAGING - NVIDIA COMPLIANT EMBEDDED CONTROLLER INTERFACE (nvec)
 M:	Marc Dietrich <marvin24@gmx.de>
 L:	ac100@lists.launchpad.net (moderated for non-subscribers)

+ 0 - 2
drivers/staging/Kconfig

@@ -84,8 +84,6 @@ source "drivers/staging/netlogic/Kconfig"
 
 source "drivers/staging/mt29f_spinand/Kconfig"
 
-source "drivers/staging/lustre/Kconfig"
-
 source "drivers/staging/dgnc/Kconfig"
 
 source "drivers/staging/gs_fpgaboot/Kconfig"

+ 0 - 1
drivers/staging/Makefile

@@ -32,7 +32,6 @@ obj-$(CONFIG_STAGING_BOARD)	+= board/
 obj-$(CONFIG_LTE_GDM724X)	+= gdm724x/
 obj-$(CONFIG_FIREWIRE_SERIAL)	+= fwserial/
 obj-$(CONFIG_GOLDFISH)		+= goldfish/
-obj-$(CONFIG_LNET)		+= lustre/
 obj-$(CONFIG_DGNC)			+= dgnc/
 obj-$(CONFIG_MTD_SPINAND_MT29F)	+= mt29f_spinand/
 obj-$(CONFIG_GS_FPGABOOT)	+= gs_fpgaboot/

+ 0 - 3
drivers/staging/lustre/Kconfig

@@ -1,3 +0,0 @@
-source "drivers/staging/lustre/lnet/Kconfig"
-
-source "drivers/staging/lustre/lustre/Kconfig"

+ 0 - 2
drivers/staging/lustre/Makefile

@@ -1,2 +0,0 @@
-obj-$(CONFIG_LNET)		+= lnet/
-obj-$(CONFIG_LUSTRE_FS)		+= lustre/

+ 0 - 83
drivers/staging/lustre/README.txt

@@ -1,83 +0,0 @@
-Lustre Parallel Filesystem Client
-=================================
-
-The Lustre file system is an open-source, parallel file system
-that supports many requirements of leadership class HPC simulation
-environments.
-Born from a research project at Carnegie Mellon University,
-the Lustre file system is a widely-used option in HPC.
-The Lustre file system provides a POSIX compliant file system interface,
-can scale to thousands of clients, petabytes of storage and
-hundreds of gigabytes per second of I/O bandwidth.
-
-Unlike shared disk storage cluster filesystems (e.g. OCFS2, GFS, GPFS),
-Lustre has independent Metadata and Data servers that clients can access
-in parallel to maximize performance.
-
-In order to use Lustre client you will need to download the "lustre-client"
-package that contains the userspace tools from http://lustre.org/download/
-
-You will need to install and configure your Lustre servers separately.
-
-Mount Syntax
-============
-After you installed the lustre-client tools including mount.lustre binary
-you can mount your Lustre filesystem with:
-
-mount -t lustre mgs:/fsname mnt
-
-where mgs is the host name or ip address of your Lustre MGS(management service)
-fsname is the name of the filesystem you would like to mount.
-
-
-Mount Options
-=============
-
-  noflock
-	Disable posix file locking (Applications trying to use
-	the functionality will get ENOSYS)
-
-  localflock
-	Enable local flock support, using only client-local flock
-	(faster, for applications that require flock but do not run
-	 on multiple nodes).
-
-  flock
-	Enable cluster-global posix file locking coherent across all
-	client nodes.
-
-  user_xattr, nouser_xattr
-	Support "user." extended attributes (or not)
-
-  user_fid2path, nouser_fid2path
-	Enable FID to path translation by regular users (or not)
-
-  checksum, nochecksum
-	Verify data consistency on the wire and in memory as it passes
-	between the layers (or not).
-
-  lruresize, nolruresize
-	Allow lock LRU to be controlled by memory pressure on the server
-	(or only 100 (default, controlled by lru_size proc parameter) locks
-	 per CPU per server on this client).
-
-  lazystatfs, nolazystatfs
-	Do not block in statfs() if some of the servers are down.
-
-  32bitapi
-	Shrink inode numbers to fit into 32 bits. This is necessary
-	if you plan to reexport Lustre filesystem from this client via
-	NFSv4.
-
-  verbose, noverbose
-	Enable mount/umount console messages (or not)
-
-More Information
-================
-You can get more information at the Lustre website: http://wiki.lustre.org/
-
-Source for the userspace tools and out-of-tree client and server code
-is available at: http://git.hpdd.intel.com/fs/lustre-release.git
-
-Latest binary packages:
-http://lustre.org/download/

+ 0 - 302
drivers/staging/lustre/TODO

@@ -1,302 +0,0 @@
-Currently all the work directed toward the lustre upstream client is tracked
-at the following link:
-
-https://jira.hpdd.intel.com/browse/LU-9679
-
-Under this ticket you will see the following work items that need to be
-addressed:
-
-******************************************************************************
-* libcfs cleanup
-*
-* https://jira.hpdd.intel.com/browse/LU-9859
-*
-* Track all the cleanups and simplification of the libcfs module. Remove
-* functions the kernel provides. Possibly integrate some of the functionality
-* into the kernel proper.
-*
-******************************************************************************
-
-https://jira.hpdd.intel.com/browse/LU-100086
-
-LNET_MINOR conflicts with USERIO_MINOR
-
-------------------------------------------------------------------------------
-
-https://jira.hpdd.intel.com/browse/LU-8130
-
-Fix and simplify libcfs hash handling
-
-------------------------------------------------------------------------------
-
-https://jira.hpdd.intel.com/browse/LU-8703
-
-The current way we handle SMP is wrong. Platforms like ARM and KNL can have
-core and NUMA setups with things like NUMA nodes with no cores. We need to
-handle such cases. This work also greatly simplified the lustre SMP code.
-
-------------------------------------------------------------------------------
-
-https://jira.hpdd.intel.com/browse/LU-9019
-
-Replace libcfs time API with standard kernel APIs. Also migrate away from
-jiffies. We found jiffies can vary on nodes which can lead to corner cases
-that can break the file system due to nodes having inconsistent behavior.
-So move to time64_t and ktime_t as much as possible.
-
-******************************************************************************
-* Proper IB support for ko2iblnd
-******************************************************************************
-https://jira.hpdd.intel.com/browse/LU-9179
-
-Poor performance for the ko2iblnd driver. This is related to many of the
-patches below that are missing from the linux client.
-------------------------------------------------------------------------------
-
-https://jira.hpdd.intel.com/browse/LU-9886
-
-Crash in upstream kiblnd_handle_early_rxs()
-------------------------------------------------------------------------------
-
-https://jira.hpdd.intel.com/browse/LU-10394 / LU-10526 / LU-10089
-
-Default to default to using MEM_REG
-------------------------------------------------------------------------------
-
-https://jira.hpdd.intel.com/browse/LU-10459
-
-throttle tx based on queue depth
-------------------------------------------------------------------------------
-
-https://jira.hpdd.intel.com/browse/LU-9943
-
-correct WR fast reg accounting
-------------------------------------------------------------------------------
-
-https://jira.hpdd.intel.com/browse/LU-10291
-
-remove concurrent_sends tunable
-------------------------------------------------------------------------------
-
-https://jira.hpdd.intel.com/browse/LU-10213
-
-calculate qp max_send_wrs properly
-------------------------------------------------------------------------------
-
-https://jira.hpdd.intel.com/browse/LU-9810
-
-use less CQ entries for each connection
-------------------------------------------------------------------------------
-
-https://jira.hpdd.intel.com/browse/LU-10129 / LU-9180
-
-rework map_on_demand behavior
-------------------------------------------------------------------------------
-
-https://jira.hpdd.intel.com/browse/LU-10129
-
-query device capabilities
-------------------------------------------------------------------------------
-
-https://jira.hpdd.intel.com/browse/LU-10015
-
-fix race at kiblnd_connect_peer
-------------------------------------------------------------------------------
-
-https://jira.hpdd.intel.com/browse/LU-9983
-
-allow for discontiguous fragments
-------------------------------------------------------------------------------
-
-https://jira.hpdd.intel.com/browse/LU-9500
-
-Don't Page Align remote_addr with FastReg
-------------------------------------------------------------------------------
-
-https://jira.hpdd.intel.com/browse/LU-9448
-
-handle empty CPTs
-------------------------------------------------------------------------------
-
-https://jira.hpdd.intel.com/browse/LU-9507
-
-Don't Assert On Reconnect with MultiQP
-------------------------------------------------------------------------------
-
-https://jira.hpdd.intel.com/browse/LU-9472
-
-Fix FastReg map/unmap for MLX5
-------------------------------------------------------------------------------
-
-https://jira.hpdd.intel.com/browse/LU-9425
-
-Turn on 2 sges by default
-------------------------------------------------------------------------------
-
-https://jira.hpdd.intel.com/browse/LU-8943
-
-Enable Multiple OPA Endpoints between Nodes
-------------------------------------------------------------------------------
-
-https://jira.hpdd.intel.com/browse/LU-5718
-
-multiple sges for work request
-------------------------------------------------------------------------------
-
-https://jira.hpdd.intel.com/browse/LU-9094
-
-kill timedout txs from ibp_tx_queue
-------------------------------------------------------------------------------
-
-https://jira.hpdd.intel.com/browse/LU-9094
-
-reconnect peer for REJ_INVALID_SERVICE_ID
-------------------------------------------------------------------------------
-
-https://jira.hpdd.intel.com/browse/LU-8752
-
-Stop MLX5 triggering a dump_cqe
-------------------------------------------------------------------------------
-
-https://jira.hpdd.intel.com/browse/LU-8874
-
-Move ko2iblnd to latest RDMA changes
-------------------------------------------------------------------------------
-
-https://jira.hpdd.intel.com/browse/LU-8875 / LU-8874
-
-Change to new RDMA done callback mechanism
-
-------------------------------------------------------------------------------
-
-https://jira.hpdd.intel.com/browse/LU-9164 / LU-8874
-
-Incorporate RDMA map/unamp API's into ko2iblnd
-
-******************************************************************************
-* sysfs/debugfs fixes
-*
-* https://jira.hpdd.intel.com/browse/LU-8066
-*
-* The original migration to sysfs was done in haste without properly working
-* utilities to test the changes. This covers the work to restore the proper
-* behavior. Huge project to make this right.
-*
-******************************************************************************
-
-https://jira.hpdd.intel.com/browse/LU-9431
-
-The function class_process_proc_param was used for our mass updates of proc
-tunables. It didn't work with sysfs and it was just ugly so it was removed.
-In the process the ability to mass update thousands of clients was lost. This
-work restores this in a sane way.
-
-------------------------------------------------------------------------------
-https://jira.hpdd.intel.com/browse/LU-9091
-
-One the major request of users is the ability to pass in parameters into a
-sysfs file in various different units. For example we can set max_pages_per_rpc
-but this can vary on platforms due to different platform sizes. So you can
-set this like max_pages_per_rpc=16MiB. The original code to handle this written
-before the string helpers were created so the code doesn't follow that format
-but it would be easy to move to. Currently the string helpers does the reverse
-of what we need, changing bytes to string. We need to change a string to bytes.
-
-******************************************************************************
-* Proper user land to kernel space interface for Lustre
-*
-* https://jira.hpdd.intel.com/browse/LU-9680
-*
-******************************************************************************
-
-https://jira.hpdd.intel.com/browse/LU-8915
-
-Don't use linux list structure as user land arguments for lnet selftest.
-This code is pretty poor quality and really needs to be reworked.
-
-------------------------------------------------------------------------------
-
-https://jira.hpdd.intel.com/browse/LU-8834
-
-The lustre ioctl LL_IOC_FUTIMES_3 is very generic. Need to either work with
-other file systems with similar functionality and make a common syscall
-interface or rework our server code to automagically do it for us.
-
-------------------------------------------------------------------------------
-
-https://jira.hpdd.intel.com/browse/LU-6202
-
-Cleanup up ioctl handling. We have many obsolete ioctls. Also the way we do
-ioctls can be changed over to netlink. This also has the benefit of working
-better with HPC systems that do IO forwarding. Such systems don't like ioctls
-very well.
-
-------------------------------------------------------------------------------
-
-https://jira.hpdd.intel.com/browse/LU-9667
-
-More cleanups by making our utilities use sysfs instead of ioctls for LNet.
-Also it has been requested to move the remaining ioctls to the netlink API.
-
-******************************************************************************
-* Misc
-******************************************************************************
-
-------------------------------------------------------------------------------
-https://jira.hpdd.intel.com/browse/LU-9855
-
-Clean up obdclass preprocessor code. One of the major eye sores is the various
-pointer redirections and macros used by the obdclass. This makes the code very
-difficult to understand. It was requested by the Al Viro to clean this up before
-we leave staging.
-
-------------------------------------------------------------------------------
-
-https://jira.hpdd.intel.com/browse/LU-9633
-
-Migrate to sphinx kernel-doc style comments. Add documents in Documentation.
-
-------------------------------------------------------------------------------
-
-https://jira.hpdd.intel.com/browse/LU-6142
-
-Possible remaining coding style fix. Remove deadcode. Enforce kernel code
-style. Other minor misc cleanups...
-
-------------------------------------------------------------------------------
-
-https://jira.hpdd.intel.com/browse/LU-8837
-
-Separate client/server functionality. Functions only used by server can be
-removed from client. Most of this has been done but we need a inspect of the
-code to make sure.
-
-------------------------------------------------------------------------------
-
-https://jira.hpdd.intel.com/browse/LU-8964
-
-Lustre client readahead/writeback control needs to better suit kernel providings.
-Currently its being explored. We could end up replacing the CLIO read ahead
-abstract with the kernel proper version.
-
-------------------------------------------------------------------------------
-
-https://jira.hpdd.intel.com/browse/LU-9862
-
-Patch that landed for LU-7890 leads to static checker errors
-------------------------------------------------------------------------------
-
-https://jira.hpdd.intel.com/browse/LU-9868
-
-dcache/namei fixes for lustre
-------------------------------------------------------------------------------
-
-https://jira.hpdd.intel.com/browse/LU-10467
-
-use standard linux wait_events macros work by Neil Brown
-
-------------------------------------------------------------------------------
-
-Please send any patches to Greg Kroah-Hartman <greg@kroah.com>, Andreas Dilger
-<andreas.dilger@intel.com>, James Simmons <jsimmons@infradead.org> and
-Oleg Drokin <oleg.drokin@intel.com>.

+ 0 - 76
drivers/staging/lustre/include/linux/libcfs/libcfs.h

@@ -1,76 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#ifndef __LIBCFS_LIBCFS_H__
-#define __LIBCFS_LIBCFS_H__
-
-#include <linux/notifier.h>
-#include <linux/workqueue.h>
-#include <linux/sysctl.h>
-
-#include <linux/libcfs/libcfs_debug.h>
-#include <linux/libcfs/libcfs_private.h>
-#include <linux/libcfs/libcfs_fail.h>
-
-#define LIBCFS_VERSION "0.7.0"
-
-extern struct blocking_notifier_head libcfs_ioctl_list;
-static inline int notifier_from_ioctl_errno(int err)
-{
-	if (err == -EINVAL)
-		return NOTIFY_OK;
-	return notifier_from_errno(err) | NOTIFY_STOP_MASK;
-}
-
-int libcfs_setup(void);
-
-extern struct workqueue_struct *cfs_rehash_wq;
-
-void lustre_insert_debugfs(struct ctl_table *table);
-int lprocfs_call_handler(void *data, int write, loff_t *ppos,
-			 void __user *buffer, size_t *lenp,
-			 int (*handler)(void *data, int write, loff_t pos,
-					void __user *buffer, int len));
-
-/*
- * Memory
- */
-#if BITS_PER_LONG == 32
-/* limit to lowmem on 32-bit systems */
-#define NUM_CACHEPAGES \
-	min(totalram_pages, 1UL << (30 - PAGE_SHIFT) * 3 / 4)
-#else
-#define NUM_CACHEPAGES totalram_pages
-#endif
-
-#endif /* __LIBCFS_LIBCFS_H__ */

+ 0 - 434
drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h

@@ -1,434 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
- *
- * Copyright (c) 2012, 2015 Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * libcfs/include/libcfs/libcfs_cpu.h
- *
- * CPU partition
- *   . CPU partition is virtual processing unit
- *
- *   . CPU partition can present 1-N cores, or 1-N NUMA nodes,
- *     in other words, CPU partition is a processors pool.
- *
- * CPU Partition Table (CPT)
- *   . a set of CPU partitions
- *
- *   . There are two modes for CPT: CFS_CPU_MODE_NUMA and CFS_CPU_MODE_SMP
- *
- *   . User can specify total number of CPU partitions while creating a
- *     CPT, ID of CPU partition is always start from 0.
- *
- *     Example: if there are 8 cores on the system, while creating a CPT
- *     with cpu_npartitions=4:
- *	      core[0, 1] = partition[0], core[2, 3] = partition[1]
- *	      core[4, 5] = partition[2], core[6, 7] = partition[3]
- *
- *	  cpu_npartitions=1:
- *	      core[0, 1, ... 7] = partition[0]
- *
- *   . User can also specify CPU partitions by string pattern
- *
- *     Examples: cpu_partitions="0[0,1], 1[2,3]"
- *	       cpu_partitions="N 0[0-3], 1[4-8]"
- *
- *     The first character "N" means following numbers are numa ID
- *
- *   . NUMA allocators, CPU affinity threads are built over CPU partitions,
- *     instead of HW CPUs or HW nodes.
- *
- *   . By default, Lustre modules should refer to the global cfs_cpt_tab,
- *     instead of accessing HW CPUs directly, so concurrency of Lustre can be
- *     configured by cpu_npartitions of the global cfs_cpt_tab
- *
- *   . If cpu_npartitions=1(all CPUs in one pool), lustre should work the
- *     same way as 2.2 or earlier versions
- *
- * Author: liang@whamcloud.com
- */
-
-#ifndef __LIBCFS_CPU_H__
-#define __LIBCFS_CPU_H__
-
-#include <linux/cpu.h>
-#include <linux/cpuset.h>
-#include <linux/topology.h>
-
-/* any CPU partition */
-#define CFS_CPT_ANY		(-1)
-
-#ifdef CONFIG_SMP
-/** virtual processing unit */
-struct cfs_cpu_partition {
-	/* CPUs mask for this partition */
-	cpumask_var_t			cpt_cpumask;
-	/* nodes mask for this partition */
-	nodemask_t			*cpt_nodemask;
-	/* spread rotor for NUMA allocator */
-	unsigned int			cpt_spread_rotor;
-};
-
-
-/** descriptor for CPU partitions */
-struct cfs_cpt_table {
-	/* version, reserved for hotplug */
-	unsigned int			ctb_version;
-	/* spread rotor for NUMA allocator */
-	unsigned int			ctb_spread_rotor;
-	/* # of CPU partitions */
-	unsigned int			ctb_nparts;
-	/* partitions tables */
-	struct cfs_cpu_partition	*ctb_parts;
-	/* shadow HW CPU to CPU partition ID */
-	int				*ctb_cpu2cpt;
-	/* all cpus in this partition table */
-	cpumask_var_t			ctb_cpumask;
-	/* all nodes in this partition table */
-	nodemask_t			*ctb_nodemask;
-};
-
-extern struct cfs_cpt_table	*cfs_cpt_tab;
-
-/**
- * return cpumask of CPU partition \a cpt
- */
-cpumask_var_t *cfs_cpt_cpumask(struct cfs_cpt_table *cptab, int cpt);
-/**
- * print string information of cpt-table
- */
-int cfs_cpt_table_print(struct cfs_cpt_table *cptab, char *buf, int len);
-/**
- * return total number of CPU partitions in \a cptab
- */
-int
-cfs_cpt_number(struct cfs_cpt_table *cptab);
-/**
- * return number of HW cores or hyper-threadings in a CPU partition \a cpt
- */
-int cfs_cpt_weight(struct cfs_cpt_table *cptab, int cpt);
-/**
- * is there any online CPU in CPU partition \a cpt
- */
-int cfs_cpt_online(struct cfs_cpt_table *cptab, int cpt);
-/**
- * return nodemask of CPU partition \a cpt
- */
-nodemask_t *cfs_cpt_nodemask(struct cfs_cpt_table *cptab, int cpt);
-/**
- * shadow current HW processor ID to CPU-partition ID of \a cptab
- */
-int cfs_cpt_current(struct cfs_cpt_table *cptab, int remap);
-/**
- * shadow HW processor ID \a CPU to CPU-partition ID by \a cptab
- */
-int cfs_cpt_of_cpu(struct cfs_cpt_table *cptab, int cpu);
-/**
- * bind current thread on a CPU-partition \a cpt of \a cptab
- */
-int cfs_cpt_bind(struct cfs_cpt_table *cptab, int cpt);
-/**
- * add \a cpu to CPU partition @cpt of \a cptab, return 1 for success,
- * otherwise 0 is returned
- */
-int cfs_cpt_set_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu);
-/**
- * remove \a cpu from CPU partition \a cpt of \a cptab
- */
-void cfs_cpt_unset_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu);
-/**
- * add all cpus in \a mask to CPU partition \a cpt
- * return 1 if successfully set all CPUs, otherwise return 0
- */
-int cfs_cpt_set_cpumask(struct cfs_cpt_table *cptab,
-			int cpt, cpumask_t *mask);
-/**
- * remove all cpus in \a mask from CPU partition \a cpt
- */
-void cfs_cpt_unset_cpumask(struct cfs_cpt_table *cptab,
-			   int cpt, cpumask_t *mask);
-/**
- * add all cpus in NUMA node \a node to CPU partition \a cpt
- * return 1 if successfully set all CPUs, otherwise return 0
- */
-int cfs_cpt_set_node(struct cfs_cpt_table *cptab, int cpt, int node);
-/**
- * remove all cpus in NUMA node \a node from CPU partition \a cpt
- */
-void cfs_cpt_unset_node(struct cfs_cpt_table *cptab, int cpt, int node);
-
-/**
- * add all cpus in node mask \a mask to CPU partition \a cpt
- * return 1 if successfully set all CPUs, otherwise return 0
- */
-int cfs_cpt_set_nodemask(struct cfs_cpt_table *cptab,
-			 int cpt, nodemask_t *mask);
-/**
- * remove all cpus in node mask \a mask from CPU partition \a cpt
- */
-void cfs_cpt_unset_nodemask(struct cfs_cpt_table *cptab,
-			    int cpt, nodemask_t *mask);
-/**
- * unset all cpus for CPU partition \a cpt
- */
-void cfs_cpt_clear(struct cfs_cpt_table *cptab, int cpt);
-/**
- * convert partition id \a cpt to numa node id, if there are more than one
- * nodes in this partition, it might return a different node id each time.
- */
-int cfs_cpt_spread_node(struct cfs_cpt_table *cptab, int cpt);
-
-/**
- * return number of HTs in the same core of \a cpu
- */
-int cfs_cpu_ht_nsiblings(int cpu);
-
-int  cfs_cpu_init(void);
-void cfs_cpu_fini(void);
-
-#else /* !CONFIG_SMP */
-struct cfs_cpt_table;
-#define cfs_cpt_tab ((struct cfs_cpt_table *)NULL)
-
-static inline cpumask_var_t *
-cfs_cpt_cpumask(struct cfs_cpt_table *cptab, int cpt)
-{
-	return NULL;
-}
-
-static inline int
-cfs_cpt_table_print(struct cfs_cpt_table *cptab, char *buf, int len)
-{
-	return 0;
-}
-static inline int
-cfs_cpt_number(struct cfs_cpt_table *cptab)
-{
-	return 1;
-}
-
-static inline int
-cfs_cpt_weight(struct cfs_cpt_table *cptab, int cpt)
-{
-	return 1;
-}
-
-static inline int
-cfs_cpt_online(struct cfs_cpt_table *cptab, int cpt)
-{
-	return 1;
-}
-
-static inline nodemask_t *
-cfs_cpt_nodemask(struct cfs_cpt_table *cptab, int cpt)
-{
-	return NULL;
-}
-
-static inline int
-cfs_cpt_set_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu)
-{
-	return 1;
-}
-
-static inline void
-cfs_cpt_unset_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu)
-{
-}
-
-static inline int
-cfs_cpt_set_cpumask(struct cfs_cpt_table *cptab, int cpt, cpumask_t *mask)
-{
-	return 1;
-}
-
-static inline void
-cfs_cpt_unset_cpumask(struct cfs_cpt_table *cptab, int cpt, cpumask_t *mask)
-{
-}
-
-static inline int
-cfs_cpt_set_node(struct cfs_cpt_table *cptab, int cpt, int node)
-{
-	return 1;
-}
-
-static inline void
-cfs_cpt_unset_node(struct cfs_cpt_table *cptab, int cpt, int node)
-{
-}
-
-static inline int
-cfs_cpt_set_nodemask(struct cfs_cpt_table *cptab, int cpt, nodemask_t *mask)
-{
-	return 1;
-}
-
-static inline void
-cfs_cpt_unset_nodemask(struct cfs_cpt_table *cptab, int cpt, nodemask_t *mask)
-{
-}
-
-static inline void
-cfs_cpt_clear(struct cfs_cpt_table *cptab, int cpt)
-{
-}
-
-static inline int
-cfs_cpt_spread_node(struct cfs_cpt_table *cptab, int cpt)
-{
-	return 0;
-}
-
-static inline int
-cfs_cpu_ht_nsiblings(int cpu)
-{
-	return 1;
-}
-
-static inline int
-cfs_cpt_current(struct cfs_cpt_table *cptab, int remap)
-{
-	return 0;
-}
-
-static inline int
-cfs_cpt_of_cpu(struct cfs_cpt_table *cptab, int cpu)
-{
-	return 0;
-}
-
-static inline int
-cfs_cpt_bind(struct cfs_cpt_table *cptab, int cpt)
-{
-	return 0;
-}
-
-static inline int
-cfs_cpu_init(void)
-{
-	return 0;
-}
-
-static inline void cfs_cpu_fini(void)
-{
-}
-
-#endif /* CONFIG_SMP */
-
-/**
- * destroy a CPU partition table
- */
-void cfs_cpt_table_free(struct cfs_cpt_table *cptab);
-/**
- * create a cfs_cpt_table with \a ncpt number of partitions
- */
-struct cfs_cpt_table *cfs_cpt_table_alloc(unsigned int ncpt);
-
-/*
- * allocate per-cpu-partition data, returned value is an array of pointers,
- * variable can be indexed by CPU ID.
- *	cptab != NULL: size of array is number of CPU partitions
- *	cptab == NULL: size of array is number of HW cores
- */
-void *cfs_percpt_alloc(struct cfs_cpt_table *cptab, unsigned int size);
-/*
- * destroy per-cpu-partition variable
- */
-void cfs_percpt_free(void *vars);
-int cfs_percpt_number(void *vars);
-
-#define cfs_percpt_for_each(var, i, vars)		\
-	for (i = 0; i < cfs_percpt_number(vars) &&	\
-		((var) = (vars)[i]) != NULL; i++)
-
-/*
- * percpu partition lock
- *
- * There are some use-cases like this in Lustre:
- * . each CPU partition has it's own private data which is frequently changed,
- *   and mostly by the local CPU partition.
- * . all CPU partitions share some global data, these data are rarely changed.
- *
- * LNet is typical example.
- * CPU partition lock is designed for this kind of use-cases:
- * . each CPU partition has it's own private lock
- * . change on private data just needs to take the private lock
- * . read on shared data just needs to take _any_ of private locks
- * . change on shared data needs to take _all_ private locks,
- *   which is slow and should be really rare.
- */
-enum {
-	CFS_PERCPT_LOCK_EX	= -1,	/* negative */
-};
-
-struct cfs_percpt_lock {
-	/* cpu-partition-table for this lock */
-	struct cfs_cpt_table     *pcl_cptab;
-	/* exclusively locked */
-	unsigned int		  pcl_locked;
-	/* private lock table */
-	spinlock_t		**pcl_locks;
-};
-
-/* return number of private locks */
-#define cfs_percpt_lock_num(pcl)	cfs_cpt_number(pcl->pcl_cptab)
-
-/*
- * create a cpu-partition lock based on CPU partition table \a cptab,
- * each private lock has extra \a psize bytes padding data
- */
-struct cfs_percpt_lock *cfs_percpt_lock_create(struct cfs_cpt_table *cptab,
-					       struct lock_class_key *keys);
-/* destroy a cpu-partition lock */
-void cfs_percpt_lock_free(struct cfs_percpt_lock *pcl);
-
-/* lock private lock \a index of \a pcl */
-void cfs_percpt_lock(struct cfs_percpt_lock *pcl, int index);
-
-/* unlock private lock \a index of \a pcl */
-void cfs_percpt_unlock(struct cfs_percpt_lock *pcl, int index);
-
-#define CFS_PERCPT_LOCK_KEYS	256
-
-/* NB: don't allocate keys dynamically, lockdep needs them to be in ".data" */
-#define cfs_percpt_lock_alloc(cptab)					\
-({									\
-	static struct lock_class_key ___keys[CFS_PERCPT_LOCK_KEYS];	\
-	struct cfs_percpt_lock *___lk;					\
-									\
-	if (cfs_cpt_number(cptab) > CFS_PERCPT_LOCK_KEYS)		\
-		___lk = cfs_percpt_lock_create(cptab, NULL);		\
-	else								\
-		___lk = cfs_percpt_lock_create(cptab, ___keys);		\
-	___lk;								\
-})
-
-/**
- * iterate over all CPU partitions in \a cptab
- */
-#define cfs_cpt_for_each(i, cptab)	\
-	for (i = 0; i < cfs_cpt_number(cptab); i++)
-
-#endif /* __LIBCFS_CPU_H__ */

+ 0 - 208
drivers/staging/lustre/include/linux/libcfs/libcfs_crypto.h

@@ -1,208 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/* GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see http://www.gnu.org/licenses
- *
- * Please  visit http://www.xyratex.com/contact if you need additional
- * information or have any questions.
- *
- * GPL HEADER END
- */
-
-/*
- * Copyright 2012 Xyratex Technology Limited
- */
-
-#ifndef _LIBCFS_CRYPTO_H
-#define _LIBCFS_CRYPTO_H
-
-#include <linux/string.h>
-struct page;
-
-struct cfs_crypto_hash_type {
-	char		*cht_name;      /*< hash algorithm name, equal to
-					 * format name for crypto api
-					 */
-	unsigned int    cht_key;	/*< init key by default (valid for
-					 * 4 bytes context like crc32, adler
-					 */
-	unsigned int    cht_size;       /**< hash digest size */
-};
-
-enum cfs_crypto_hash_alg {
-	CFS_HASH_ALG_NULL       = 0,
-	CFS_HASH_ALG_ADLER32,
-	CFS_HASH_ALG_CRC32,
-	CFS_HASH_ALG_MD5,
-	CFS_HASH_ALG_SHA1,
-	CFS_HASH_ALG_SHA256,
-	CFS_HASH_ALG_SHA384,
-	CFS_HASH_ALG_SHA512,
-	CFS_HASH_ALG_CRC32C,
-	CFS_HASH_ALG_MAX,
-	CFS_HASH_ALG_UNKNOWN	= 0xff
-};
-
-static struct cfs_crypto_hash_type hash_types[] = {
-	[CFS_HASH_ALG_NULL] = {
-		.cht_name	= "null",
-		.cht_key	= 0,
-		.cht_size	= 0
-	},
-	[CFS_HASH_ALG_ADLER32] = {
-		.cht_name	= "adler32",
-		.cht_key	= 1,
-		.cht_size	= 4
-	},
-	[CFS_HASH_ALG_CRC32] = {
-		.cht_name	= "crc32",
-		.cht_key	= ~0,
-		.cht_size	= 4
-	},
-	[CFS_HASH_ALG_CRC32C] = {
-		.cht_name	= "crc32c",
-		.cht_key	= ~0,
-		.cht_size	= 4
-	},
-	[CFS_HASH_ALG_MD5] = {
-		.cht_name	= "md5",
-		.cht_key	= 0,
-		.cht_size	= 16
-	},
-	[CFS_HASH_ALG_SHA1] = {
-		.cht_name	= "sha1",
-		.cht_key	= 0,
-		.cht_size	= 20
-	},
-	[CFS_HASH_ALG_SHA256] = {
-		.cht_name	= "sha256",
-		.cht_key	= 0,
-		.cht_size	= 32
-	},
-	[CFS_HASH_ALG_SHA384] = {
-		.cht_name	= "sha384",
-		.cht_key	= 0,
-		.cht_size	= 48
-	},
-	[CFS_HASH_ALG_SHA512] = {
-		.cht_name	= "sha512",
-		.cht_key	= 0,
-		.cht_size	= 64
-	},
-	[CFS_HASH_ALG_MAX] = {
-		.cht_name	= NULL,
-		.cht_key	= 0,
-		.cht_size	= 64
-	},
-};
-
-/* Maximum size of hash_types[].cht_size */
-#define CFS_CRYPTO_HASH_DIGESTSIZE_MAX	64
-
-/**
- * Return hash algorithm information for the specified algorithm identifier
- *
- * Hash information includes algorithm name, initial seed, hash size.
- *
- * \retval	cfs_crypto_hash_type for valid ID (CFS_HASH_ALG_*)
- * \retval	NULL for unknown algorithm identifier
- */
-static inline const struct cfs_crypto_hash_type *
-cfs_crypto_hash_type(enum cfs_crypto_hash_alg hash_alg)
-{
-	struct cfs_crypto_hash_type *ht;
-
-	if (hash_alg < CFS_HASH_ALG_MAX) {
-		ht = &hash_types[hash_alg];
-		if (ht->cht_name)
-			return ht;
-	}
-	return NULL;
-}
-
-/**
- * Return hash name for hash algorithm identifier
- *
- * \param[in]	hash_alg hash alrgorithm id (CFS_HASH_ALG_*)
- *
- * \retval	string name of known hash algorithm
- * \retval	"unknown" if hash algorithm is unknown
- */
-static inline const char *
-cfs_crypto_hash_name(enum cfs_crypto_hash_alg hash_alg)
-{
-	const struct cfs_crypto_hash_type *ht;
-
-	ht = cfs_crypto_hash_type(hash_alg);
-	if (ht)
-		return ht->cht_name;
-	return "unknown";
-}
-
-/**
- * Return digest size for hash algorithm type
- *
- * \param[in]	hash_alg hash alrgorithm id (CFS_HASH_ALG_*)
- *
- * \retval	hash algorithm digest size in bytes
- * \retval	0 if hash algorithm type is unknown
- */
-static inline int cfs_crypto_hash_digestsize(enum cfs_crypto_hash_alg hash_alg)
-{
-	const struct cfs_crypto_hash_type *ht;
-
-	ht = cfs_crypto_hash_type(hash_alg);
-	if (ht)
-		return ht->cht_size;
-	return 0;
-}
-
-/**
- * Find hash algorithm ID for the specified algorithm name
- *
- * \retval	hash algorithm ID for valid ID (CFS_HASH_ALG_*)
- * \retval	CFS_HASH_ALG_UNKNOWN for unknown algorithm name
- */
-static inline unsigned char cfs_crypto_hash_alg(const char *algname)
-{
-	enum cfs_crypto_hash_alg hash_alg;
-
-	for (hash_alg = 0; hash_alg < CFS_HASH_ALG_MAX; hash_alg++)
-		if (!strcmp(hash_types[hash_alg].cht_name, algname))
-			return hash_alg;
-
-	return CFS_HASH_ALG_UNKNOWN;
-}
-
-int cfs_crypto_hash_digest(enum cfs_crypto_hash_alg hash_alg,
-			   const void *buf, unsigned int buf_len,
-			   unsigned char *key, unsigned int key_len,
-			   unsigned char *hash, unsigned int *hash_len);
-
-struct ahash_request *
-cfs_crypto_hash_init(enum cfs_crypto_hash_alg hash_alg,
-		     unsigned char *key, unsigned int key_len);
-int cfs_crypto_hash_update_page(struct ahash_request *desc,
-				struct page *page, unsigned int offset,
-				unsigned int len);
-int cfs_crypto_hash_update(struct ahash_request *desc, const void *buf,
-			   unsigned int buf_len);
-int cfs_crypto_hash_final(struct ahash_request *desc,
-			  unsigned char *hash, unsigned int *hash_len);
-int cfs_crypto_register(void);
-void cfs_crypto_unregister(void);
-int cfs_crypto_hash_speed(enum cfs_crypto_hash_alg hash_alg);
-#endif

+ 0 - 207
drivers/staging/lustre/include/linux/libcfs/libcfs_debug.h

@@ -1,207 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * libcfs/include/libcfs/libcfs_debug.h
- *
- * Debug messages and assertions
- *
- */
-
-#ifndef __LIBCFS_DEBUG_H__
-#define __LIBCFS_DEBUG_H__
-
-#include <linux/limits.h>
-#include <uapi/linux/lnet/libcfs_debug.h>
-
-/*
- *  Debugging
- */
-extern unsigned int libcfs_subsystem_debug;
-extern unsigned int libcfs_stack;
-extern unsigned int libcfs_debug;
-extern unsigned int libcfs_printk;
-extern unsigned int libcfs_console_ratelimit;
-extern unsigned int libcfs_console_max_delay;
-extern unsigned int libcfs_console_min_delay;
-extern unsigned int libcfs_console_backoff;
-extern unsigned int libcfs_debug_binary;
-extern char libcfs_debug_file_path_arr[PATH_MAX];
-
-int libcfs_debug_mask2str(char *str, int size, int mask, int is_subsys);
-int libcfs_debug_str2mask(int *mask, const char *str, int is_subsys);
-
-/* Has there been an LBUG? */
-extern unsigned int libcfs_catastrophe;
-extern unsigned int libcfs_panic_on_lbug;
-
-/* Enable debug-checks on stack size - except on x86_64 */
-#if !defined(__x86_64__)
-# ifdef __ia64__
-#  define CDEBUG_STACK() (THREAD_SIZE -				 \
-			  ((unsigned long)__builtin_dwarf_cfa() &       \
-			   (THREAD_SIZE - 1)))
-# else
-#  define CDEBUG_STACK() (THREAD_SIZE -				 \
-			  ((unsigned long)__builtin_frame_address(0) &  \
-			   (THREAD_SIZE - 1)))
-# endif /* __ia64__ */
-
-#define __CHECK_STACK(msgdata, mask, cdls)			      \
-do {								    \
-	if (unlikely(CDEBUG_STACK() > libcfs_stack)) {		  \
-		LIBCFS_DEBUG_MSG_DATA_INIT(msgdata, D_WARNING, NULL);   \
-		libcfs_stack = CDEBUG_STACK();			  \
-		libcfs_debug_msg(msgdata,			       \
-				 "maximum lustre stack %lu\n",	  \
-				 CDEBUG_STACK());		       \
-		(msgdata)->msg_mask = mask;			     \
-		(msgdata)->msg_cdls = cdls;			     \
-		dump_stack();					   \
-	      /*panic("LBUG");*/					\
-	}							       \
-} while (0)
-#define CFS_CHECK_STACK(msgdata, mask, cdls)  __CHECK_STACK(msgdata, mask, cdls)
-#else /* __x86_64__ */
-#define CFS_CHECK_STACK(msgdata, mask, cdls) do {} while (0)
-#define CDEBUG_STACK() (0L)
-#endif /* __x86_64__ */
-
-#ifndef DEBUG_SUBSYSTEM
-# define DEBUG_SUBSYSTEM S_UNDEFINED
-#endif
-
-#define CDEBUG_DEFAULT_MAX_DELAY (600 * HZ)	 /* jiffies */
-#define CDEBUG_DEFAULT_MIN_DELAY ((HZ + 1) / 2) /* jiffies */
-#define CDEBUG_DEFAULT_BACKOFF   2
-struct cfs_debug_limit_state {
-	unsigned long   cdls_next;
-	unsigned int cdls_delay;
-	int	     cdls_count;
-};
-
-struct libcfs_debug_msg_data {
-	const char *msg_file;
-	const char *msg_fn;
-	int	    msg_subsys;
-	int	    msg_line;
-	int	    msg_mask;
-	struct cfs_debug_limit_state *msg_cdls;
-};
-
-#define LIBCFS_DEBUG_MSG_DATA_INIT(data, mask, cdls)		\
-do {								\
-	(data)->msg_subsys = DEBUG_SUBSYSTEM;			\
-	(data)->msg_file   = __FILE__;				\
-	(data)->msg_fn     = __func__;				\
-	(data)->msg_line   = __LINE__;				\
-	(data)->msg_cdls   = (cdls);				\
-	(data)->msg_mask   = (mask);				\
-} while (0)
-
-#define LIBCFS_DEBUG_MSG_DATA_DECL(dataname, mask, cdls)	\
-	static struct libcfs_debug_msg_data dataname = {	\
-	       .msg_subsys = DEBUG_SUBSYSTEM,			\
-	       .msg_file   = __FILE__,				\
-	       .msg_fn     = __func__,				\
-	       .msg_line   = __LINE__,				\
-	       .msg_cdls   = (cdls)	 };			\
-	dataname.msg_mask   = (mask)
-
-/**
- * Filters out logging messages based on mask and subsystem.
- */
-static inline int cfs_cdebug_show(unsigned int mask, unsigned int subsystem)
-{
-	return mask & D_CANTMASK ||
-		((libcfs_debug & mask) && (libcfs_subsystem_debug & subsystem));
-}
-
-#define __CDEBUG(cdls, mask, format, ...)				\
-do {									\
-	static struct libcfs_debug_msg_data msgdata;			\
-									\
-	CFS_CHECK_STACK(&msgdata, mask, cdls);				\
-									\
-	if (cfs_cdebug_show(mask, DEBUG_SUBSYSTEM)) {			\
-		LIBCFS_DEBUG_MSG_DATA_INIT(&msgdata, mask, cdls);	\
-		libcfs_debug_msg(&msgdata, format, ## __VA_ARGS__);	\
-	}								\
-} while (0)
-
-#define CDEBUG(mask, format, ...) __CDEBUG(NULL, mask, format, ## __VA_ARGS__)
-
-#define CDEBUG_LIMIT(mask, format, ...)					\
-do {									\
-	static struct cfs_debug_limit_state cdls;			\
-									\
-	__CDEBUG(&cdls, mask, format, ## __VA_ARGS__);			\
-} while (0)
-
-/*
- * Lustre Error Checksum: calculates checksum
- * of Hex number by XORing the nybbles.
- */
-#define LERRCHKSUM(hexnum) (((hexnum) & 0xf) ^ ((hexnum) >> 4 & 0xf) ^ \
-			   ((hexnum) >> 8 & 0xf))
-
-#define CWARN(format, ...)	CDEBUG_LIMIT(D_WARNING, format, ## __VA_ARGS__)
-#define CERROR(format, ...)	CDEBUG_LIMIT(D_ERROR, format, ## __VA_ARGS__)
-#define CNETERR(format, a...)	CDEBUG_LIMIT(D_NETERROR, format, ## a)
-#define CEMERG(format, ...)	CDEBUG_LIMIT(D_EMERG, format, ## __VA_ARGS__)
-
-#define LCONSOLE(mask, format, ...) CDEBUG(D_CONSOLE | (mask), format, ## __VA_ARGS__)
-#define LCONSOLE_INFO(format, ...)  CDEBUG_LIMIT(D_CONSOLE, format, ## __VA_ARGS__)
-#define LCONSOLE_WARN(format, ...)  CDEBUG_LIMIT(D_CONSOLE | D_WARNING, format, ## __VA_ARGS__)
-#define LCONSOLE_ERROR_MSG(errnum, format, ...) CDEBUG_LIMIT(D_CONSOLE | D_ERROR, \
-			   "%x-%x: " format, errnum, LERRCHKSUM(errnum), ## __VA_ARGS__)
-#define LCONSOLE_ERROR(format, ...) LCONSOLE_ERROR_MSG(0x00, format, ## __VA_ARGS__)
-
-#define LCONSOLE_EMERG(format, ...) CDEBUG(D_CONSOLE | D_EMERG, format, ## __VA_ARGS__)
-
-int libcfs_debug_msg(struct libcfs_debug_msg_data *msgdata,
-		     const char *format1, ...)
-	__printf(2, 3);
-
-int libcfs_debug_vmsg2(struct libcfs_debug_msg_data *msgdata,
-		       const char *format1,
-		       va_list args, const char *format2, ...)
-	__printf(4, 5);
-
-/* other external symbols that tracefile provides: */
-int cfs_trace_copyin_string(char *knl_buffer, int knl_buffer_nob,
-			    const char __user *usr_buffer, int usr_buffer_nob);
-int cfs_trace_copyout_string(char __user *usr_buffer, int usr_buffer_nob,
-			     const char *knl_buffer, char *append);
-
-#define LIBCFS_DEBUG_FILE_PATH_DEFAULT "/tmp/lustre-log"
-
-#endif	/* __LIBCFS_DEBUG_H__ */

+ 0 - 194
drivers/staging/lustre/include/linux/libcfs/libcfs_fail.h

@@ -1,194 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see http://www.gnu.org/licenses
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Oracle Corporation, Inc.
- */
-
-#ifndef _LIBCFS_FAIL_H
-#define _LIBCFS_FAIL_H
-
-#include <linux/sched.h>
-#include <linux/wait.h>
-
-extern unsigned long cfs_fail_loc;
-extern unsigned int cfs_fail_val;
-extern int cfs_fail_err;
-
-extern wait_queue_head_t cfs_race_waitq;
-extern int cfs_race_state;
-
-int __cfs_fail_check_set(u32 id, u32 value, int set);
-int __cfs_fail_timeout_set(u32 id, u32 value, int ms, int set);
-
-enum {
-	CFS_FAIL_LOC_NOSET      = 0,
-	CFS_FAIL_LOC_ORSET      = 1,
-	CFS_FAIL_LOC_RESET      = 2,
-	CFS_FAIL_LOC_VALUE      = 3
-};
-
-/* Failure injection control */
-#define CFS_FAIL_MASK_SYS    0x0000FF00
-#define CFS_FAIL_MASK_LOC   (0x000000FF | CFS_FAIL_MASK_SYS)
-
-#define CFS_FAILED_BIT       30
-/* CFS_FAILED is 0x40000000 */
-#define CFS_FAILED		BIT(CFS_FAILED_BIT)
-
-#define CFS_FAIL_ONCE_BIT    31
-/* CFS_FAIL_ONCE is 0x80000000 */
-#define CFS_FAIL_ONCE		BIT(CFS_FAIL_ONCE_BIT)
-
-/* The following flags aren't made to be combined */
-#define CFS_FAIL_SKIP	0x20000000 /* skip N times then fail */
-#define CFS_FAIL_SOME	0x10000000 /* only fail N times */
-#define CFS_FAIL_RAND	0x08000000 /* fail 1/N of the times */
-#define CFS_FAIL_USR1	0x04000000 /* user flag */
-
-#define CFS_FAULT	0x02000000 /* match any CFS_FAULT_CHECK */
-
-static inline bool CFS_FAIL_PRECHECK(u32 id)
-{
-	return cfs_fail_loc &&
-	       ((cfs_fail_loc & CFS_FAIL_MASK_LOC) == (id & CFS_FAIL_MASK_LOC) ||
-		(cfs_fail_loc & id & CFS_FAULT));
-}
-
-static inline int cfs_fail_check_set(u32 id, u32 value,
-				     int set, int quiet)
-{
-	int ret = 0;
-
-	if (unlikely(CFS_FAIL_PRECHECK(id))) {
-		ret = __cfs_fail_check_set(id, value, set);
-		if (ret) {
-			if (quiet) {
-				CDEBUG(D_INFO, "*** cfs_fail_loc=%x, val=%u***\n",
-				       id, value);
-			} else {
-				LCONSOLE_INFO("*** cfs_fail_loc=%x, val=%u***\n",
-					      id, value);
-			}
-		}
-	}
-
-	return ret;
-}
-
-/* If id hit cfs_fail_loc, return 1, otherwise return 0 */
-#define CFS_FAIL_CHECK(id) \
-	cfs_fail_check_set(id, 0, CFS_FAIL_LOC_NOSET, 0)
-#define CFS_FAIL_CHECK_QUIET(id) \
-	cfs_fail_check_set(id, 0, CFS_FAIL_LOC_NOSET, 1)
-
-/*
- * If id hit cfs_fail_loc and cfs_fail_val == (-1 or value) return 1,
- * otherwise return 0
- */
-#define CFS_FAIL_CHECK_VALUE(id, value) \
-	cfs_fail_check_set(id, value, CFS_FAIL_LOC_VALUE, 0)
-#define CFS_FAIL_CHECK_VALUE_QUIET(id, value) \
-	cfs_fail_check_set(id, value, CFS_FAIL_LOC_VALUE, 1)
-
-/*
- * If id hit cfs_fail_loc, cfs_fail_loc |= value and return 1,
- * otherwise return 0
- */
-#define CFS_FAIL_CHECK_ORSET(id, value) \
-	cfs_fail_check_set(id, value, CFS_FAIL_LOC_ORSET, 0)
-#define CFS_FAIL_CHECK_ORSET_QUIET(id, value) \
-	cfs_fail_check_set(id, value, CFS_FAIL_LOC_ORSET, 1)
-
-/*
- * If id hit cfs_fail_loc, cfs_fail_loc = value and return 1,
- * otherwise return 0
- */
-#define CFS_FAIL_CHECK_RESET(id, value) \
-	cfs_fail_check_set(id, value, CFS_FAIL_LOC_RESET, 0)
-#define CFS_FAIL_CHECK_RESET_QUIET(id, value) \
-	cfs_fail_check_set(id, value, CFS_FAIL_LOC_RESET, 1)
-
-static inline int cfs_fail_timeout_set(u32 id, u32 value, int ms, int set)
-{
-	if (unlikely(CFS_FAIL_PRECHECK(id)))
-		return __cfs_fail_timeout_set(id, value, ms, set);
-	return 0;
-}
-
-/* If id hit cfs_fail_loc, sleep for seconds or milliseconds */
-#define CFS_FAIL_TIMEOUT(id, secs) \
-	cfs_fail_timeout_set(id, 0, (secs) * 1000, CFS_FAIL_LOC_NOSET)
-
-#define CFS_FAIL_TIMEOUT_MS(id, ms) \
-	cfs_fail_timeout_set(id, 0, ms, CFS_FAIL_LOC_NOSET)
-
-/*
- * If id hit cfs_fail_loc, cfs_fail_loc |= value and
- * sleep seconds or milliseconds
- */
-#define CFS_FAIL_TIMEOUT_ORSET(id, value, secs) \
-	cfs_fail_timeout_set(id, value, (secs) * 1000, CFS_FAIL_LOC_ORSET)
-
-#define CFS_FAIL_TIMEOUT_RESET(id, value, secs) \
-	cfs_fail_timeout_set(id, value, (secs) * 1000, CFS_FAIL_LOC_RESET)
-
-#define CFS_FAIL_TIMEOUT_MS_ORSET(id, value, ms) \
-	cfs_fail_timeout_set(id, value, ms, CFS_FAIL_LOC_ORSET)
-
-#define CFS_FAULT_CHECK(id)			\
-	CFS_FAIL_CHECK(CFS_FAULT | (id))
-
-/*
- * The idea here is to synchronise two threads to force a race. The
- * first thread that calls this with a matching fail_loc is put to
- * sleep. The next thread that calls with the same fail_loc wakes up
- * the first and continues.
- */
-static inline void cfs_race(u32 id)
-{
-	if (CFS_FAIL_PRECHECK(id)) {
-		if (unlikely(__cfs_fail_check_set(id, 0, CFS_FAIL_LOC_NOSET))) {
-			int rc;
-
-			cfs_race_state = 0;
-			CERROR("cfs_race id %x sleeping\n", id);
-			rc = wait_event_interruptible(cfs_race_waitq,
-						      !!cfs_race_state);
-			CERROR("cfs_fail_race id %x awake, rc=%d\n", id, rc);
-		} else {
-			CERROR("cfs_fail_race id %x waking\n", id);
-			cfs_race_state = 1;
-			wake_up(&cfs_race_waitq);
-		}
-	}
-}
-
-#define CFS_RACE(id) cfs_race(id)
-
-#endif /* _LIBCFS_FAIL_H */

+ 0 - 869
drivers/staging/lustre/include/linux/libcfs/libcfs_hash.h

@@ -1,869 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, 2015 Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * libcfs/include/libcfs/libcfs_hash.h
- *
- * Hashing routines
- *
- */
-
-#ifndef __LIBCFS_HASH_H__
-#define __LIBCFS_HASH_H__
-
-#include <linux/hash.h>
-#include <linux/spinlock.h>
-#include <linux/workqueue.h>
-#include <linux/libcfs/libcfs.h>
-
-/*
- * Knuth recommends primes in approximately golden ratio to the maximum
- * integer representable by a machine word for multiplicative hashing.
- * Chuck Lever verified the effectiveness of this technique:
- * http://www.citi.umich.edu/techreports/reports/citi-tr-00-1.pdf
- *
- * These primes are chosen to be bit-sparse, that is operations on
- * them can use shifts and additions instead of multiplications for
- * machines where multiplications are slow.
- */
-/* 2^31 + 2^29 - 2^25 + 2^22 - 2^19 - 2^16 + 1 */
-#define CFS_GOLDEN_RATIO_PRIME_32 0x9e370001UL
-/*  2^63 + 2^61 - 2^57 + 2^54 - 2^51 - 2^18 + 1 */
-#define CFS_GOLDEN_RATIO_PRIME_64 0x9e37fffffffc0001ULL
-
-/** disable debug */
-#define CFS_HASH_DEBUG_NONE	0
-/*
- * record hash depth and output to console when it's too deep,
- * computing overhead is low but consume more memory
- */
-#define CFS_HASH_DEBUG_1	1
-/** expensive, check key validation */
-#define CFS_HASH_DEBUG_2	2
-
-#define CFS_HASH_DEBUG_LEVEL	CFS_HASH_DEBUG_NONE
-
-struct cfs_hash_ops;
-struct cfs_hash_lock_ops;
-struct cfs_hash_hlist_ops;
-
-union cfs_hash_lock {
-	rwlock_t		rw;		/**< rwlock */
-	spinlock_t		spin;		/**< spinlock */
-};
-
-/**
- * cfs_hash_bucket is a container of:
- * - lock, counter ...
- * - array of hash-head starting from hsb_head[0], hash-head can be one of
- *   . struct cfs_hash_head
- *   . struct cfs_hash_head_dep
- *   . struct cfs_hash_dhead
- *   . struct cfs_hash_dhead_dep
- *   which depends on requirement of user
- * - some extra bytes (caller can require it while creating hash)
- */
-struct cfs_hash_bucket {
-	union cfs_hash_lock	hsb_lock;	/**< bucket lock */
-	u32			hsb_count;	/**< current entries */
-	u32			hsb_version;	/**< change version */
-	unsigned int		hsb_index;	/**< index of bucket */
-	int			hsb_depmax;	/**< max depth on bucket */
-	long			hsb_head[0];	/**< hash-head array */
-};
-
-/**
- * cfs_hash bucket descriptor, it's normally in stack of caller
- */
-struct cfs_hash_bd {
-	/* address of bucket */
-	struct cfs_hash_bucket	*bd_bucket;
-	/* offset in bucket */
-	unsigned int		 bd_offset;
-};
-
-#define CFS_HASH_NAME_LEN	16	/**< default name length */
-#define CFS_HASH_BIGNAME_LEN	64	/**< bigname for param tree */
-
-#define CFS_HASH_BKT_BITS	3	/**< default bits of bucket */
-#define CFS_HASH_BITS_MAX	30	/**< max bits of bucket */
-#define CFS_HASH_BITS_MIN	CFS_HASH_BKT_BITS
-
-/**
- * common hash attributes.
- */
-enum cfs_hash_tag {
-	/**
-	 * don't need any lock, caller will protect operations with it's
-	 * own lock. With this flag:
-	 *  . CFS_HASH_NO_BKTLOCK, CFS_HASH_RW_BKTLOCK, CFS_HASH_SPIN_BKTLOCK
-	 *    will be ignored.
-	 *  . Some functions will be disabled with this flag, i.e:
-	 *    cfs_hash_for_each_empty, cfs_hash_rehash
-	 */
-	CFS_HASH_NO_LOCK	= BIT(0),
-	/** no bucket lock, use one spinlock to protect the whole hash */
-	CFS_HASH_NO_BKTLOCK	= BIT(1),
-	/** rwlock to protect bucket */
-	CFS_HASH_RW_BKTLOCK	= BIT(2),
-	/** spinlock to protect bucket */
-	CFS_HASH_SPIN_BKTLOCK	= BIT(3),
-	/** always add new item to tail */
-	CFS_HASH_ADD_TAIL	= BIT(4),
-	/** hash-table doesn't have refcount on item */
-	CFS_HASH_NO_ITEMREF	= BIT(5),
-	/** big name for param-tree */
-	CFS_HASH_BIGNAME	= BIT(6),
-	/** track global count */
-	CFS_HASH_COUNTER	= BIT(7),
-	/** rehash item by new key */
-	CFS_HASH_REHASH_KEY	= BIT(8),
-	/** Enable dynamic hash resizing */
-	CFS_HASH_REHASH		= BIT(9),
-	/** can shrink hash-size */
-	CFS_HASH_SHRINK		= BIT(10),
-	/** assert hash is empty on exit */
-	CFS_HASH_ASSERT_EMPTY	= BIT(11),
-	/** record hlist depth */
-	CFS_HASH_DEPTH		= BIT(12),
-	/**
-	 * rehash is always scheduled in a different thread, so current
-	 * change on hash table is non-blocking
-	 */
-	CFS_HASH_NBLK_CHANGE	= BIT(13),
-	/**
-	 * NB, we typed hs_flags as  u16, please change it
-	 * if you need to extend >=16 flags
-	 */
-};
-
-/** most used attributes */
-#define CFS_HASH_DEFAULT	(CFS_HASH_RW_BKTLOCK | \
-				 CFS_HASH_COUNTER | CFS_HASH_REHASH)
-
-/**
- * cfs_hash is a hash-table implementation for general purpose, it can support:
- *    . two refcount modes
- *      hash-table with & without refcount
- *    . four lock modes
- *      nolock, one-spinlock, rw-bucket-lock, spin-bucket-lock
- *    . general operations
- *      lookup, add(add_tail or add_head), delete
- *    . rehash
- *      grows or shrink
- *    . iteration
- *      locked iteration and unlocked iteration
- *    . bigname
- *      support long name hash
- *    . debug
- *      trace max searching depth
- *
- * Rehash:
- * When the htable grows or shrinks, a separate task (cfs_hash_rehash_worker)
- * is spawned to handle the rehash in the background, it's possible that other
- * processes can concurrently perform additions, deletions, and lookups
- * without being blocked on rehash completion, because rehash will release
- * the global wrlock for each bucket.
- *
- * rehash and iteration can't run at the same time because it's too tricky
- * to keep both of them safe and correct.
- * As they are relatively rare operations, so:
- *   . if iteration is in progress while we try to launch rehash, then
- *     it just giveup, iterator will launch rehash at the end.
- *   . if rehash is in progress while we try to iterate the hash table,
- *     then we just wait (shouldn't be very long time), anyway, nobody
- *     should expect iteration of whole hash-table to be non-blocking.
- *
- * During rehashing, a (key,object) pair may be in one of two buckets,
- * depending on whether the worker task has yet to transfer the object
- * to its new location in the table. Lookups and deletions need to search both
- * locations; additions must take care to only insert into the new bucket.
- */
-
-struct cfs_hash {
-	/**
-	 * serialize with rehash, or serialize all operations if
-	 * the hash-table has CFS_HASH_NO_BKTLOCK
-	 */
-	union cfs_hash_lock		hs_lock;
-	/** hash operations */
-	struct cfs_hash_ops		*hs_ops;
-	/** hash lock operations */
-	struct cfs_hash_lock_ops	*hs_lops;
-	/** hash list operations */
-	struct cfs_hash_hlist_ops	*hs_hops;
-	/** hash buckets-table */
-	struct cfs_hash_bucket		**hs_buckets;
-	/** total number of items on this hash-table */
-	atomic_t			hs_count;
-	/** hash flags, see cfs_hash_tag for detail */
-	u16				hs_flags;
-	/** # of extra-bytes for bucket, for user saving extended attributes */
-	u16				hs_extra_bytes;
-	/** wants to iterate */
-	u8				hs_iterating;
-	/** hash-table is dying */
-	u8				hs_exiting;
-	/** current hash bits */
-	u8				hs_cur_bits;
-	/** min hash bits */
-	u8				hs_min_bits;
-	/** max hash bits */
-	u8				hs_max_bits;
-	/** bits for rehash */
-	u8				hs_rehash_bits;
-	/** bits for each bucket */
-	u8				hs_bkt_bits;
-	/** resize min threshold */
-	u16				hs_min_theta;
-	/** resize max threshold */
-	u16				hs_max_theta;
-	/** resize count */
-	u32				hs_rehash_count;
-	/** # of iterators (caller of cfs_hash_for_each_*) */
-	u32				hs_iterators;
-	/** rehash workitem */
-	struct work_struct		hs_rehash_work;
-	/** refcount on this hash table */
-	atomic_t			hs_refcount;
-	/** rehash buckets-table */
-	struct cfs_hash_bucket		**hs_rehash_buckets;
-#if CFS_HASH_DEBUG_LEVEL >= CFS_HASH_DEBUG_1
-	/** serialize debug members */
-	spinlock_t			hs_dep_lock;
-	/** max depth */
-	unsigned int			hs_dep_max;
-	/** id of the deepest bucket */
-	unsigned int			hs_dep_bkt;
-	/** offset in the deepest bucket */
-	unsigned int			hs_dep_off;
-	/** bits when we found the max depth */
-	unsigned int			hs_dep_bits;
-	/** workitem to output max depth */
-	struct work_struct		hs_dep_work;
-#endif
-	/** name of htable */
-	char				hs_name[0];
-};
-
-struct cfs_hash_lock_ops {
-	/** lock the hash table */
-	void    (*hs_lock)(union cfs_hash_lock *lock, int exclusive);
-	/** unlock the hash table */
-	void    (*hs_unlock)(union cfs_hash_lock *lock, int exclusive);
-	/** lock the hash bucket */
-	void    (*hs_bkt_lock)(union cfs_hash_lock *lock, int exclusive);
-	/** unlock the hash bucket */
-	void    (*hs_bkt_unlock)(union cfs_hash_lock *lock, int exclusive);
-};
-
-struct cfs_hash_hlist_ops {
-	/** return hlist_head of hash-head of @bd */
-	struct hlist_head *(*hop_hhead)(struct cfs_hash *hs,
-					struct cfs_hash_bd *bd);
-	/** return hash-head size */
-	int (*hop_hhead_size)(struct cfs_hash *hs);
-	/** add @hnode to hash-head of @bd */
-	int (*hop_hnode_add)(struct cfs_hash *hs, struct cfs_hash_bd *bd,
-			     struct hlist_node *hnode);
-	/** remove @hnode from hash-head of @bd */
-	int (*hop_hnode_del)(struct cfs_hash *hs, struct cfs_hash_bd *bd,
-			     struct hlist_node *hnode);
-};
-
-struct cfs_hash_ops {
-	/** return hashed value from @key */
-	unsigned int (*hs_hash)(struct cfs_hash *hs, const void *key,
-				unsigned int mask);
-	/** return key address of @hnode */
-	void *   (*hs_key)(struct hlist_node *hnode);
-	/** copy key from @hnode to @key */
-	void     (*hs_keycpy)(struct hlist_node *hnode, void *key);
-	/**
-	 *  compare @key with key of @hnode
-	 *  returns 1 on a match
-	 */
-	int      (*hs_keycmp)(const void *key, struct hlist_node *hnode);
-	/** return object address of @hnode, i.e: container_of(...hnode) */
-	void *   (*hs_object)(struct hlist_node *hnode);
-	/** get refcount of item, always called with holding bucket-lock */
-	void     (*hs_get)(struct cfs_hash *hs, struct hlist_node *hnode);
-	/** release refcount of item */
-	void     (*hs_put)(struct cfs_hash *hs, struct hlist_node *hnode);
-	/** release refcount of item, always called with holding bucket-lock */
-	void     (*hs_put_locked)(struct cfs_hash *hs,
-				  struct hlist_node *hnode);
-	/** it's called before removing of @hnode */
-	void     (*hs_exit)(struct cfs_hash *hs, struct hlist_node *hnode);
-};
-
-/** total number of buckets in @hs */
-#define CFS_HASH_NBKT(hs)	\
-	BIT((hs)->hs_cur_bits - (hs)->hs_bkt_bits)
-
-/** total number of buckets in @hs while rehashing */
-#define CFS_HASH_RH_NBKT(hs)	\
-	BIT((hs)->hs_rehash_bits - (hs)->hs_bkt_bits)
-
-/** number of hlist for in bucket */
-#define CFS_HASH_BKT_NHLIST(hs)	BIT((hs)->hs_bkt_bits)
-
-/** total number of hlist in @hs */
-#define CFS_HASH_NHLIST(hs)	BIT((hs)->hs_cur_bits)
-
-/** total number of hlist in @hs while rehashing */
-#define CFS_HASH_RH_NHLIST(hs)	BIT((hs)->hs_rehash_bits)
-
-static inline int
-cfs_hash_with_no_lock(struct cfs_hash *hs)
-{
-	/* caller will serialize all operations for this hash-table */
-	return hs->hs_flags & CFS_HASH_NO_LOCK;
-}
-
-static inline int
-cfs_hash_with_no_bktlock(struct cfs_hash *hs)
-{
-	/* no bucket lock, one single lock to protect the hash-table */
-	return hs->hs_flags & CFS_HASH_NO_BKTLOCK;
-}
-
-static inline int
-cfs_hash_with_rw_bktlock(struct cfs_hash *hs)
-{
-	/* rwlock to protect hash bucket */
-	return hs->hs_flags & CFS_HASH_RW_BKTLOCK;
-}
-
-static inline int
-cfs_hash_with_spin_bktlock(struct cfs_hash *hs)
-{
-	/* spinlock to protect hash bucket */
-	return hs->hs_flags & CFS_HASH_SPIN_BKTLOCK;
-}
-
-static inline int
-cfs_hash_with_add_tail(struct cfs_hash *hs)
-{
-	return hs->hs_flags & CFS_HASH_ADD_TAIL;
-}
-
-static inline int
-cfs_hash_with_no_itemref(struct cfs_hash *hs)
-{
-	/*
-	 * hash-table doesn't keep refcount on item,
-	 * item can't be removed from hash unless it's
-	 * ZERO refcount
-	 */
-	return hs->hs_flags & CFS_HASH_NO_ITEMREF;
-}
-
-static inline int
-cfs_hash_with_bigname(struct cfs_hash *hs)
-{
-	return hs->hs_flags & CFS_HASH_BIGNAME;
-}
-
-static inline int
-cfs_hash_with_counter(struct cfs_hash *hs)
-{
-	return hs->hs_flags & CFS_HASH_COUNTER;
-}
-
-static inline int
-cfs_hash_with_rehash(struct cfs_hash *hs)
-{
-	return hs->hs_flags & CFS_HASH_REHASH;
-}
-
-static inline int
-cfs_hash_with_rehash_key(struct cfs_hash *hs)
-{
-	return hs->hs_flags & CFS_HASH_REHASH_KEY;
-}
-
-static inline int
-cfs_hash_with_shrink(struct cfs_hash *hs)
-{
-	return hs->hs_flags & CFS_HASH_SHRINK;
-}
-
-static inline int
-cfs_hash_with_assert_empty(struct cfs_hash *hs)
-{
-	return hs->hs_flags & CFS_HASH_ASSERT_EMPTY;
-}
-
-static inline int
-cfs_hash_with_depth(struct cfs_hash *hs)
-{
-	return hs->hs_flags & CFS_HASH_DEPTH;
-}
-
-static inline int
-cfs_hash_with_nblk_change(struct cfs_hash *hs)
-{
-	return hs->hs_flags & CFS_HASH_NBLK_CHANGE;
-}
-
-static inline int
-cfs_hash_is_exiting(struct cfs_hash *hs)
-{
-	/* cfs_hash_destroy is called */
-	return hs->hs_exiting;
-}
-
-static inline int
-cfs_hash_is_rehashing(struct cfs_hash *hs)
-{
-	/* rehash is launched */
-	return !!hs->hs_rehash_bits;
-}
-
-static inline int
-cfs_hash_is_iterating(struct cfs_hash *hs)
-{
-	/* someone is calling cfs_hash_for_each_* */
-	return hs->hs_iterating || hs->hs_iterators;
-}
-
-static inline int
-cfs_hash_bkt_size(struct cfs_hash *hs)
-{
-	return offsetof(struct cfs_hash_bucket, hsb_head[0]) +
-	       hs->hs_hops->hop_hhead_size(hs) * CFS_HASH_BKT_NHLIST(hs) +
-	       hs->hs_extra_bytes;
-}
-
-static inline unsigned
-cfs_hash_id(struct cfs_hash *hs, const void *key, unsigned int mask)
-{
-	return hs->hs_ops->hs_hash(hs, key, mask);
-}
-
-static inline void *
-cfs_hash_key(struct cfs_hash *hs, struct hlist_node *hnode)
-{
-	return hs->hs_ops->hs_key(hnode);
-}
-
-static inline void
-cfs_hash_keycpy(struct cfs_hash *hs, struct hlist_node *hnode, void *key)
-{
-	if (hs->hs_ops->hs_keycpy)
-		hs->hs_ops->hs_keycpy(hnode, key);
-}
-
-/**
- * Returns 1 on a match,
- */
-static inline int
-cfs_hash_keycmp(struct cfs_hash *hs, const void *key, struct hlist_node *hnode)
-{
-	return hs->hs_ops->hs_keycmp(key, hnode);
-}
-
-static inline void *
-cfs_hash_object(struct cfs_hash *hs, struct hlist_node *hnode)
-{
-	return hs->hs_ops->hs_object(hnode);
-}
-
-static inline void
-cfs_hash_get(struct cfs_hash *hs, struct hlist_node *hnode)
-{
-	return hs->hs_ops->hs_get(hs, hnode);
-}
-
-static inline void
-cfs_hash_put_locked(struct cfs_hash *hs, struct hlist_node *hnode)
-{
-	return hs->hs_ops->hs_put_locked(hs, hnode);
-}
-
-static inline void
-cfs_hash_put(struct cfs_hash *hs, struct hlist_node *hnode)
-{
-	return hs->hs_ops->hs_put(hs, hnode);
-}
-
-static inline void
-cfs_hash_exit(struct cfs_hash *hs, struct hlist_node *hnode)
-{
-	if (hs->hs_ops->hs_exit)
-		hs->hs_ops->hs_exit(hs, hnode);
-}
-
-static inline void cfs_hash_lock(struct cfs_hash *hs, int excl)
-{
-	hs->hs_lops->hs_lock(&hs->hs_lock, excl);
-}
-
-static inline void cfs_hash_unlock(struct cfs_hash *hs, int excl)
-{
-	hs->hs_lops->hs_unlock(&hs->hs_lock, excl);
-}
-
-static inline int cfs_hash_dec_and_lock(struct cfs_hash *hs,
-					atomic_t *condition)
-{
-	LASSERT(cfs_hash_with_no_bktlock(hs));
-	return atomic_dec_and_lock(condition, &hs->hs_lock.spin);
-}
-
-static inline void cfs_hash_bd_lock(struct cfs_hash *hs,
-				    struct cfs_hash_bd *bd, int excl)
-{
-	hs->hs_lops->hs_bkt_lock(&bd->bd_bucket->hsb_lock, excl);
-}
-
-static inline void cfs_hash_bd_unlock(struct cfs_hash *hs,
-				      struct cfs_hash_bd *bd, int excl)
-{
-	hs->hs_lops->hs_bkt_unlock(&bd->bd_bucket->hsb_lock, excl);
-}
-
-/**
- * operations on cfs_hash bucket (bd: bucket descriptor),
- * they are normally for hash-table without rehash
- */
-void cfs_hash_bd_get(struct cfs_hash *hs, const void *key,
-		     struct cfs_hash_bd *bd);
-
-static inline void
-cfs_hash_bd_get_and_lock(struct cfs_hash *hs, const void *key,
-			 struct cfs_hash_bd *bd, int excl)
-{
-	cfs_hash_bd_get(hs, key, bd);
-	cfs_hash_bd_lock(hs, bd, excl);
-}
-
-static inline unsigned
-cfs_hash_bd_index_get(struct cfs_hash *hs, struct cfs_hash_bd *bd)
-{
-	return bd->bd_offset | (bd->bd_bucket->hsb_index << hs->hs_bkt_bits);
-}
-
-static inline void
-cfs_hash_bd_index_set(struct cfs_hash *hs, unsigned int index,
-		      struct cfs_hash_bd *bd)
-{
-	bd->bd_bucket = hs->hs_buckets[index >> hs->hs_bkt_bits];
-	bd->bd_offset = index & (CFS_HASH_BKT_NHLIST(hs) - 1U);
-}
-
-static inline void *
-cfs_hash_bd_extra_get(struct cfs_hash *hs, struct cfs_hash_bd *bd)
-{
-	return (void *)bd->bd_bucket +
-	       cfs_hash_bkt_size(hs) - hs->hs_extra_bytes;
-}
-
-static inline u32
-cfs_hash_bd_version_get(struct cfs_hash_bd *bd)
-{
-	/* need hold cfs_hash_bd_lock */
-	return bd->bd_bucket->hsb_version;
-}
-
-static inline u32
-cfs_hash_bd_count_get(struct cfs_hash_bd *bd)
-{
-	/* need hold cfs_hash_bd_lock */
-	return bd->bd_bucket->hsb_count;
-}
-
-static inline int
-cfs_hash_bd_depmax_get(struct cfs_hash_bd *bd)
-{
-	return bd->bd_bucket->hsb_depmax;
-}
-
-static inline int
-cfs_hash_bd_compare(struct cfs_hash_bd *bd1, struct cfs_hash_bd *bd2)
-{
-	if (bd1->bd_bucket->hsb_index != bd2->bd_bucket->hsb_index)
-		return bd1->bd_bucket->hsb_index - bd2->bd_bucket->hsb_index;
-
-	if (bd1->bd_offset != bd2->bd_offset)
-		return bd1->bd_offset - bd2->bd_offset;
-
-	return 0;
-}
-
-void cfs_hash_bd_add_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd,
-			    struct hlist_node *hnode);
-void cfs_hash_bd_del_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd,
-			    struct hlist_node *hnode);
-void cfs_hash_bd_move_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd_old,
-			     struct cfs_hash_bd *bd_new,
-			     struct hlist_node *hnode);
-
-static inline int
-cfs_hash_bd_dec_and_lock(struct cfs_hash *hs, struct cfs_hash_bd *bd,
-			 atomic_t *condition)
-{
-	LASSERT(cfs_hash_with_spin_bktlock(hs));
-	return atomic_dec_and_lock(condition, &bd->bd_bucket->hsb_lock.spin);
-}
-
-static inline struct hlist_head *
-cfs_hash_bd_hhead(struct cfs_hash *hs, struct cfs_hash_bd *bd)
-{
-	return hs->hs_hops->hop_hhead(hs, bd);
-}
-
-struct hlist_node *
-cfs_hash_bd_lookup_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd,
-			  const void *key);
-struct hlist_node *
-cfs_hash_bd_peek_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd,
-			const void *key);
-
-/**
- * operations on cfs_hash bucket (bd: bucket descriptor),
- * they are safe for hash-table with rehash
- */
-void cfs_hash_dual_bd_get(struct cfs_hash *hs, const void *key,
-			  struct cfs_hash_bd *bds);
-void cfs_hash_dual_bd_lock(struct cfs_hash *hs, struct cfs_hash_bd *bds,
-			   int excl);
-void cfs_hash_dual_bd_unlock(struct cfs_hash *hs, struct cfs_hash_bd *bds,
-			     int excl);
-
-static inline void
-cfs_hash_dual_bd_get_and_lock(struct cfs_hash *hs, const void *key,
-			      struct cfs_hash_bd *bds, int excl)
-{
-	cfs_hash_dual_bd_get(hs, key, bds);
-	cfs_hash_dual_bd_lock(hs, bds, excl);
-}
-
-struct hlist_node *
-cfs_hash_dual_bd_lookup_locked(struct cfs_hash *hs, struct cfs_hash_bd *bds,
-			       const void *key);
-struct hlist_node *
-cfs_hash_dual_bd_findadd_locked(struct cfs_hash *hs, struct cfs_hash_bd *bds,
-				const void *key, struct hlist_node *hnode,
-				int insist_add);
-struct hlist_node *
-cfs_hash_dual_bd_finddel_locked(struct cfs_hash *hs, struct cfs_hash_bd *bds,
-				const void *key, struct hlist_node *hnode);
-
-/* Hash init/cleanup functions */
-struct cfs_hash *
-cfs_hash_create(char *name, unsigned int cur_bits, unsigned int max_bits,
-		unsigned int bkt_bits, unsigned int extra_bytes,
-		unsigned int min_theta, unsigned int max_theta,
-		struct cfs_hash_ops *ops, unsigned int flags);
-
-struct cfs_hash *cfs_hash_getref(struct cfs_hash *hs);
-void cfs_hash_putref(struct cfs_hash *hs);
-
-/* Hash addition functions */
-void cfs_hash_add(struct cfs_hash *hs, const void *key,
-		  struct hlist_node *hnode);
-int cfs_hash_add_unique(struct cfs_hash *hs, const void *key,
-			struct hlist_node *hnode);
-void *cfs_hash_findadd_unique(struct cfs_hash *hs, const void *key,
-			      struct hlist_node *hnode);
-
-/* Hash deletion functions */
-void *cfs_hash_del(struct cfs_hash *hs, const void *key,
-		   struct hlist_node *hnode);
-void *cfs_hash_del_key(struct cfs_hash *hs, const void *key);
-
-/* Hash lookup/for_each functions */
-#define CFS_HASH_LOOP_HOG       1024
-
-typedef int (*cfs_hash_for_each_cb_t)(struct cfs_hash *hs,
-				      struct cfs_hash_bd *bd,
-				      struct hlist_node *node,
-				      void *data);
-void *
-cfs_hash_lookup(struct cfs_hash *hs, const void *key);
-void
-cfs_hash_for_each(struct cfs_hash *hs, cfs_hash_for_each_cb_t cb, void *data);
-void
-cfs_hash_for_each_safe(struct cfs_hash *hs, cfs_hash_for_each_cb_t cb,
-		       void *data);
-int
-cfs_hash_for_each_nolock(struct cfs_hash *hs, cfs_hash_for_each_cb_t cb,
-			 void *data, int start);
-int
-cfs_hash_for_each_empty(struct cfs_hash *hs, cfs_hash_for_each_cb_t cb,
-			void *data);
-void
-cfs_hash_for_each_key(struct cfs_hash *hs, const void *key,
-		      cfs_hash_for_each_cb_t cb, void *data);
-typedef int (*cfs_hash_cond_opt_cb_t)(void *obj, void *data);
-void
-cfs_hash_cond_del(struct cfs_hash *hs, cfs_hash_cond_opt_cb_t cb, void *data);
-
-void
-cfs_hash_hlist_for_each(struct cfs_hash *hs, unsigned int hindex,
-			cfs_hash_for_each_cb_t cb, void *data);
-int  cfs_hash_is_empty(struct cfs_hash *hs);
-u64 cfs_hash_size_get(struct cfs_hash *hs);
-
-/*
- * Rehash - Theta is calculated to be the average chained
- * hash depth assuming a perfectly uniform hash function.
- */
-void cfs_hash_rehash_cancel_locked(struct cfs_hash *hs);
-void cfs_hash_rehash_cancel(struct cfs_hash *hs);
-void cfs_hash_rehash(struct cfs_hash *hs, int do_rehash);
-void cfs_hash_rehash_key(struct cfs_hash *hs, const void *old_key,
-			 void *new_key, struct hlist_node *hnode);
-
-#if CFS_HASH_DEBUG_LEVEL > CFS_HASH_DEBUG_1
-/* Validate hnode references the correct key */
-static inline void
-cfs_hash_key_validate(struct cfs_hash *hs, const void *key,
-		      struct hlist_node *hnode)
-{
-	LASSERT(cfs_hash_keycmp(hs, key, hnode));
-}
-
-/* Validate hnode is in the correct bucket */
-static inline void
-cfs_hash_bucket_validate(struct cfs_hash *hs, struct cfs_hash_bd *bd,
-			 struct hlist_node *hnode)
-{
-	struct cfs_hash_bd bds[2];
-
-	cfs_hash_dual_bd_get(hs, cfs_hash_key(hs, hnode), bds);
-	LASSERT(bds[0].bd_bucket == bd->bd_bucket ||
-		bds[1].bd_bucket == bd->bd_bucket);
-}
-
-#else /* CFS_HASH_DEBUG_LEVEL > CFS_HASH_DEBUG_1 */
-
-static inline void
-cfs_hash_key_validate(struct cfs_hash *hs, const void *key,
-		      struct hlist_node *hnode) {}
-
-static inline void
-cfs_hash_bucket_validate(struct cfs_hash *hs, struct cfs_hash_bd *bd,
-			 struct hlist_node *hnode) {}
-
-#endif /* CFS_HASH_DEBUG_LEVEL */
-
-#define CFS_HASH_THETA_BITS	10
-#define CFS_HASH_MIN_THETA	BIT(CFS_HASH_THETA_BITS - 1)
-#define CFS_HASH_MAX_THETA	BIT(CFS_HASH_THETA_BITS + 1)
-
-/* Return integer component of theta */
-static inline int __cfs_hash_theta_int(int theta)
-{
-	return (theta >> CFS_HASH_THETA_BITS);
-}
-
-/* Return a fractional value between 0 and 999 */
-static inline int __cfs_hash_theta_frac(int theta)
-{
-	return ((theta * 1000) >> CFS_HASH_THETA_BITS) -
-	       (__cfs_hash_theta_int(theta) * 1000);
-}
-
-static inline int __cfs_hash_theta(struct cfs_hash *hs)
-{
-	return (atomic_read(&hs->hs_count) <<
-		CFS_HASH_THETA_BITS) >> hs->hs_cur_bits;
-}
-
-static inline void
-__cfs_hash_set_theta(struct cfs_hash *hs, int min, int max)
-{
-	LASSERT(min < max);
-	hs->hs_min_theta = (u16)min;
-	hs->hs_max_theta = (u16)max;
-}
-
-/* Generic debug formatting routines mainly for proc handler */
-struct seq_file;
-void cfs_hash_debug_header(struct seq_file *m);
-void cfs_hash_debug_str(struct cfs_hash *hs, struct seq_file *m);
-
-/*
- * Generic djb2 hash algorithm for character arrays.
- */
-static inline unsigned
-cfs_hash_djb2_hash(const void *key, size_t size, unsigned int mask)
-{
-	unsigned int i, hash = 5381;
-
-	LASSERT(key);
-
-	for (i = 0; i < size; i++)
-		hash = hash * 33 + ((char *)key)[i];
-
-	return (hash & mask);
-}
-
-/*
- * Generic u32 hash algorithm.
- */
-static inline unsigned
-cfs_hash_u32_hash(const u32 key, unsigned int mask)
-{
-	return ((key * CFS_GOLDEN_RATIO_PRIME_32) & mask);
-}
-
-/*
- * Generic u64 hash algorithm.
- */
-static inline unsigned
-cfs_hash_u64_hash(const u64 key, unsigned int mask)
-{
-	return ((unsigned int)(key * CFS_GOLDEN_RATIO_PRIME_64) & mask);
-}
-
-/** iterate over all buckets in @bds (array of struct cfs_hash_bd) */
-#define cfs_hash_for_each_bd(bds, n, i)	\
-	for (i = 0; i < n && (bds)[i].bd_bucket != NULL; i++)
-
-/** iterate over all buckets of @hs */
-#define cfs_hash_for_each_bucket(hs, bd, pos)			\
-	for (pos = 0;						\
-	     pos < CFS_HASH_NBKT(hs) &&				\
-	     ((bd)->bd_bucket = (hs)->hs_buckets[pos]) != NULL; pos++)
-
-/** iterate over all hlist of bucket @bd */
-#define cfs_hash_bd_for_each_hlist(hs, bd, hlist)		\
-	for ((bd)->bd_offset = 0;				\
-	     (bd)->bd_offset < CFS_HASH_BKT_NHLIST(hs) &&	\
-	     (hlist = cfs_hash_bd_hhead(hs, bd)) != NULL;	\
-	     (bd)->bd_offset++)
-
-/* !__LIBCFS__HASH_H__ */
-#endif

+ 0 - 200
drivers/staging/lustre/include/linux/libcfs/libcfs_private.h

@@ -1,200 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * libcfs/include/libcfs/libcfs_private.h
- *
- * Various defines for libcfs.
- *
- */
-
-#ifndef __LIBCFS_PRIVATE_H__
-#define __LIBCFS_PRIVATE_H__
-
-#ifndef DEBUG_SUBSYSTEM
-# define DEBUG_SUBSYSTEM S_UNDEFINED
-#endif
-
-#define LASSERTF(cond, fmt, ...)					\
-do {									\
-	if (unlikely(!(cond))) {					\
-		LIBCFS_DEBUG_MSG_DATA_DECL(__msg_data, D_EMERG, NULL);	\
-		libcfs_debug_msg(&__msg_data,				\
-				 "ASSERTION( %s ) failed: " fmt, #cond,	\
-				 ## __VA_ARGS__);			\
-		lbug_with_loc(&__msg_data);				\
-	}								\
-} while (0)
-
-#define LASSERT(cond) LASSERTF(cond, "\n")
-
-#ifdef CONFIG_LUSTRE_DEBUG_EXPENSIVE_CHECK
-/**
- * This is for more expensive checks that one doesn't want to be enabled all
- * the time. LINVRNT() has to be explicitly enabled by
- * CONFIG_LUSTRE_DEBUG_EXPENSIVE_CHECK option.
- */
-# define LINVRNT(exp) LASSERT(exp)
-#else
-# define LINVRNT(exp) ((void)sizeof !!(exp))
-#endif
-
-void __noreturn lbug_with_loc(struct libcfs_debug_msg_data *msg);
-
-#define LBUG()							  \
-do {								    \
-	LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, D_EMERG, NULL);	     \
-	lbug_with_loc(&msgdata);					\
-} while (0)
-
-/*
- * Use #define rather than inline, as lnet_cpt_table() might
- * not be defined yet
- */
-#define kmalloc_cpt(size, flags, cpt) \
-	kmalloc_node(size, flags,  cfs_cpt_spread_node(lnet_cpt_table(), cpt))
-
-#define kzalloc_cpt(size, flags, cpt) \
-	kmalloc_node(size, flags | __GFP_ZERO,				\
-		     cfs_cpt_spread_node(lnet_cpt_table(), cpt))
-
-#define kvmalloc_cpt(size, flags, cpt) \
-	kvmalloc_node(size, flags,					\
-		      cfs_cpt_spread_node(lnet_cpt_table(), cpt))
-
-#define kvzalloc_cpt(size, flags, cpt) \
-	kvmalloc_node(size, flags | __GFP_ZERO,				\
-		      cfs_cpt_spread_node(lnet_cpt_table(), cpt))
-
-/******************************************************************************/
-
-void libcfs_debug_dumplog(void);
-int libcfs_debug_init(unsigned long bufsize);
-int libcfs_debug_cleanup(void);
-int libcfs_debug_clear_buffer(void);
-int libcfs_debug_mark_buffer(const char *text);
-
-/*
- * allocate a variable array, returned value is an array of pointers.
- * Caller can specify length of array by count.
- */
-void *cfs_array_alloc(int count, unsigned int size);
-void  cfs_array_free(void *vars);
-
-#define LASSERT_ATOMIC_ENABLED	  (1)
-
-#if LASSERT_ATOMIC_ENABLED
-
-/** assert value of @a is equal to @v */
-#define LASSERT_ATOMIC_EQ(a, v)			\
-	LASSERTF(atomic_read(a) == v, "value: %d\n", atomic_read((a)))
-
-/** assert value of @a is unequal to @v */
-#define LASSERT_ATOMIC_NE(a, v)		\
-	LASSERTF(atomic_read(a) != v, "value: %d\n", atomic_read((a)))
-
-/** assert value of @a is little than @v */
-#define LASSERT_ATOMIC_LT(a, v)		\
-	LASSERTF(atomic_read(a) < v, "value: %d\n", atomic_read((a)))
-
-/** assert value of @a is little/equal to @v */
-#define LASSERT_ATOMIC_LE(a, v)		\
-	LASSERTF(atomic_read(a) <= v, "value: %d\n", atomic_read((a)))
-
-/** assert value of @a is great than @v */
-#define LASSERT_ATOMIC_GT(a, v)		\
-	LASSERTF(atomic_read(a) > v, "value: %d\n", atomic_read((a)))
-
-/** assert value of @a is great/equal to @v */
-#define LASSERT_ATOMIC_GE(a, v)		\
-	LASSERTF(atomic_read(a) >= v, "value: %d\n", atomic_read((a)))
-
-/** assert value of @a is great than @v1 and little than @v2 */
-#define LASSERT_ATOMIC_GT_LT(a, v1, v2)			 \
-do {							    \
-	int __v = atomic_read(a);			   \
-	LASSERTF(__v > v1 && __v < v2, "value: %d\n", __v);     \
-} while (0)
-
-/** assert value of @a is great than @v1 and little/equal to @v2 */
-#define LASSERT_ATOMIC_GT_LE(a, v1, v2)			 \
-do {							    \
-	int __v = atomic_read(a);			   \
-	LASSERTF(__v > v1 && __v <= v2, "value: %d\n", __v);    \
-} while (0)
-
-/** assert value of @a is great/equal to @v1 and little than @v2 */
-#define LASSERT_ATOMIC_GE_LT(a, v1, v2)			 \
-do {							    \
-	int __v = atomic_read(a);			   \
-	LASSERTF(__v >= v1 && __v < v2, "value: %d\n", __v);    \
-} while (0)
-
-/** assert value of @a is great/equal to @v1 and little/equal to @v2 */
-#define LASSERT_ATOMIC_GE_LE(a, v1, v2)			 \
-do {							    \
-	int __v = atomic_read(a);			   \
-	LASSERTF(__v >= v1 && __v <= v2, "value: %d\n", __v);   \
-} while (0)
-
-#else /* !LASSERT_ATOMIC_ENABLED */
-
-#define LASSERT_ATOMIC_EQ(a, v)		 do {} while (0)
-#define LASSERT_ATOMIC_NE(a, v)		 do {} while (0)
-#define LASSERT_ATOMIC_LT(a, v)		 do {} while (0)
-#define LASSERT_ATOMIC_LE(a, v)		 do {} while (0)
-#define LASSERT_ATOMIC_GT(a, v)		 do {} while (0)
-#define LASSERT_ATOMIC_GE(a, v)		 do {} while (0)
-#define LASSERT_ATOMIC_GT_LT(a, v1, v2)	 do {} while (0)
-#define LASSERT_ATOMIC_GT_LE(a, v1, v2)	 do {} while (0)
-#define LASSERT_ATOMIC_GE_LT(a, v1, v2)	 do {} while (0)
-#define LASSERT_ATOMIC_GE_LE(a, v1, v2)	 do {} while (0)
-
-#endif /* LASSERT_ATOMIC_ENABLED */
-
-#define LASSERT_ATOMIC_ZERO(a)		  LASSERT_ATOMIC_EQ(a, 0)
-#define LASSERT_ATOMIC_POS(a)		   LASSERT_ATOMIC_GT(a, 0)
-
-/* implication */
-#define ergo(a, b) (!(a) || (b))
-/* logical equivalence */
-#define equi(a, b) (!!(a) == !!(b))
-
-#ifndef HAVE_CFS_SIZE_ROUND
-static inline size_t cfs_size_round(int val)
-{
-	return round_up(val, 8);
-}
-
-#define HAVE_CFS_SIZE_ROUND
-#endif
-
-#endif

+ 0 - 102
drivers/staging/lustre/include/linux/libcfs/libcfs_string.h

@@ -1,102 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * libcfs/include/libcfs/libcfs_string.h
- *
- * Generic string manipulation functions.
- *
- * Author: Nathan Rutman <nathan.rutman@sun.com>
- */
-
-#ifndef __LIBCFS_STRING_H__
-#define __LIBCFS_STRING_H__
-
-#include <linux/mm.h>
-
-/* libcfs_string.c */
-/* Convert a text string to a bitmask */
-int cfs_str2mask(const char *str, const char *(*bit2str)(int bit),
-		 int *oldmask, int minmask, int allmask);
-/* trim leading and trailing space characters */
-char *cfs_firststr(char *str, size_t size);
-
-/**
- * Structure to represent NULL-less strings.
- */
-struct cfs_lstr {
-	char		*ls_str;
-	int		ls_len;
-};
-
-/*
- * Structure to represent \<range_expr\> token of the syntax.
- */
-struct cfs_range_expr {
-	/*
-	 * Link to cfs_expr_list::el_exprs.
-	 */
-	struct list_head	re_link;
-	u32		re_lo;
-	u32		re_hi;
-	u32		re_stride;
-};
-
-struct cfs_expr_list {
-	struct list_head	el_link;
-	struct list_head	el_exprs;
-};
-
-int cfs_gettok(struct cfs_lstr *next, char delim, struct cfs_lstr *res);
-int cfs_str2num_check(char *str, int nob, unsigned int *num,
-		      unsigned int min, unsigned int max);
-int cfs_expr_list_match(u32 value, struct cfs_expr_list *expr_list);
-int cfs_expr_list_print(char *buffer, int count,
-			struct cfs_expr_list *expr_list);
-int cfs_expr_list_values(struct cfs_expr_list *expr_list,
-			 int max, u32 **values);
-static inline void
-cfs_expr_list_values_free(u32 *values, int num)
-{
-	/*
-	 * This array is allocated by kvalloc(), so it shouldn't be freed
-	 * by OBD_FREE() if it's called by module other than libcfs & LNet,
-	 * otherwise we will see fake memory leak
-	 */
-	kvfree(values);
-}
-
-void cfs_expr_list_free(struct cfs_expr_list *expr_list);
-int cfs_expr_list_parse(char *str, int len, unsigned int min, unsigned int max,
-			struct cfs_expr_list **elpp);
-void cfs_expr_list_free_list(struct list_head *list);
-
-#endif

+ 0 - 212
drivers/staging/lustre/include/linux/lnet/api.h

@@ -1,212 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011 - 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Seagate, Inc.
- */
-
-#ifndef __LNET_API_H__
-#define __LNET_API_H__
-
-/** \defgroup lnet LNet
- *
- * The Lustre Networking subsystem.
- *
- * LNet is an asynchronous message-passing API, which provides an unreliable
- * connectionless service that can't guarantee any order. It supports OFA IB,
- * TCP/IP, and Cray Interconnects, and routes between heterogeneous networks.
- *
- * @{
- */
-
-#include <uapi/linux/lnet/lnet-types.h>
-
-/** \defgroup lnet_init_fini Initialization and cleanup
- * The LNet must be properly initialized before any LNet calls can be made.
- * @{
- */
-int LNetNIInit(lnet_pid_t requested_pid);
-int LNetNIFini(void);
-/** @} lnet_init_fini */
-
-/** \defgroup lnet_addr LNet addressing and basic types
- *
- * Addressing scheme and basic data types of LNet.
- *
- * The LNet API is memory-oriented, so LNet must be able to address not only
- * end-points but also memory region within a process address space.
- * An ::lnet_nid_t addresses an end-point. An ::lnet_pid_t identifies a process
- * in a node. A portal represents an opening in the address space of a
- * process. Match bits is criteria to identify a region of memory inside a
- * portal, and offset specifies an offset within the memory region.
- *
- * LNet creates a table of portals for each process during initialization.
- * This table has MAX_PORTALS entries and its size can't be dynamically
- * changed. A portal stays empty until the owning process starts to add
- * memory regions to it. A portal is sometimes called an index because
- * it's an entry in the portals table of a process.
- *
- * \see LNetMEAttach
- * @{
- */
-int LNetGetId(unsigned int index, struct lnet_process_id *id);
-int LNetDist(lnet_nid_t nid, lnet_nid_t *srcnid, __u32 *order);
-
-/** @} lnet_addr */
-
-/** \defgroup lnet_me Match entries
- *
- * A match entry (abbreviated as ME) describes a set of criteria to accept
- * incoming requests.
- *
- * A portal is essentially a match list plus a set of attributes. A match
- * list is a chain of MEs. Each ME includes a pointer to a memory descriptor
- * and a set of match criteria. The match criteria can be used to reject
- * incoming requests based on process ID or the match bits provided in the
- * request. MEs can be dynamically inserted into a match list by LNetMEAttach()
- * and LNetMEInsert(), and removed from its list by LNetMEUnlink().
- * @{
- */
-int LNetMEAttach(unsigned int      portal,
-		 struct lnet_process_id match_id_in,
-		 __u64		   match_bits_in,
-		 __u64		   ignore_bits_in,
-		 enum lnet_unlink unlink_in,
-		 enum lnet_ins_pos pos_in,
-		 struct lnet_handle_me *handle_out);
-
-int LNetMEInsert(struct lnet_handle_me current_in,
-		 struct lnet_process_id match_id_in,
-		 __u64		   match_bits_in,
-		 __u64		   ignore_bits_in,
-		 enum lnet_unlink unlink_in,
-		 enum lnet_ins_pos position_in,
-		 struct lnet_handle_me *handle_out);
-
-int LNetMEUnlink(struct lnet_handle_me current_in);
-/** @} lnet_me */
-
-/** \defgroup lnet_md Memory descriptors
- *
- * A memory descriptor contains information about a region of a user's
- * memory (either in kernel or user space) and optionally points to an
- * event queue where information about the operations performed on the
- * memory descriptor are recorded. Memory descriptor is abbreviated as
- * MD and can be used interchangeably with the memory region it describes.
- *
- * The LNet API provides two operations to create MDs: LNetMDAttach()
- * and LNetMDBind(); one operation to unlink and release the resources
- * associated with a MD: LNetMDUnlink().
- * @{
- */
-int LNetMDAttach(struct lnet_handle_me current_in,
-		 struct lnet_md md_in,
-		 enum lnet_unlink unlink_in,
-		 struct lnet_handle_md *md_handle_out);
-
-int LNetMDBind(struct lnet_md md_in,
-	       enum lnet_unlink unlink_in,
-	       struct lnet_handle_md *md_handle_out);
-
-int LNetMDUnlink(struct lnet_handle_md md_in);
-/** @} lnet_md */
-
-/** \defgroup lnet_eq Events and event queues
- *
- * Event queues (abbreviated as EQ) are used to log operations performed on
- * local MDs. In particular, they signal the completion of a data transmission
- * into or out of a MD. They can also be used to hold acknowledgments for
- * completed PUT operations and indicate when a MD has been unlinked. Multiple
- * MDs can share a single EQ. An EQ may have an optional event handler
- * associated with it. If an event handler exists, it will be run for each
- * event that is deposited into the EQ.
- *
- * In addition to the lnet_handle_eq, the LNet API defines two types
- * associated with events: The ::lnet_event_kind defines the kinds of events
- * that can be stored in an EQ. The lnet_event defines a structure that
- * holds the information about with an event.
- *
- * There are five functions for dealing with EQs: LNetEQAlloc() is used to
- * create an EQ and allocate the resources needed, while LNetEQFree()
- * releases these resources and free the EQ. LNetEQGet() retrieves the next
- * event from an EQ, and LNetEQWait() can be used to block a process until
- * an EQ has at least one event. LNetEQPoll() can be used to test or wait
- * on multiple EQs.
- * @{
- */
-int LNetEQAlloc(unsigned int       count_in,
-		lnet_eq_handler_t  handler,
-		struct lnet_handle_eq *handle_out);
-
-int LNetEQFree(struct lnet_handle_eq eventq_in);
-
-int LNetEQPoll(struct lnet_handle_eq *eventqs_in,
-	       int		 neq_in,
-	       int		 timeout_ms,
-	       int		 interruptible,
-	       struct lnet_event *event_out,
-	       int		*which_eq_out);
-/** @} lnet_eq */
-
-/** \defgroup lnet_data Data movement operations
- *
- * The LNet API provides two data movement operations: LNetPut()
- * and LNetGet().
- * @{
- */
-int LNetPut(lnet_nid_t	      self,
-	    struct lnet_handle_md md_in,
-	    enum lnet_ack_req ack_req_in,
-	    struct lnet_process_id target_in,
-	    unsigned int      portal_in,
-	    __u64	      match_bits_in,
-	    unsigned int      offset_in,
-	    __u64	      hdr_data_in);
-
-int LNetGet(lnet_nid_t	      self,
-	    struct lnet_handle_md md_in,
-	    struct lnet_process_id target_in,
-	    unsigned int      portal_in,
-	    __u64	      match_bits_in,
-	    unsigned int      offset_in);
-/** @} lnet_data */
-
-/** \defgroup lnet_misc Miscellaneous operations.
- * Miscellaneous operations.
- * @{
- */
-int LNetSetLazyPortal(int portal);
-int LNetClearLazyPortal(int portal);
-int LNetCtl(unsigned int cmd, void *arg);
-void LNetDebugPeer(struct lnet_process_id id);
-
-/** @} lnet_misc */
-
-/** @} lnet */
-#endif

+ 0 - 652
drivers/staging/lustre/include/linux/lnet/lib-lnet.h

@@ -1,652 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Seagate, Inc.
- *
- * lnet/include/lnet/lib-lnet.h
- */
-
-#ifndef __LNET_LIB_LNET_H__
-#define __LNET_LIB_LNET_H__
-
-#include <linux/libcfs/libcfs.h>
-#include <linux/libcfs/libcfs_cpu.h>
-#include <linux/libcfs/libcfs_string.h>
-#include <net/sock.h>
-
-#include <linux/lnet/api.h>
-#include <linux/lnet/lib-types.h>
-#include <uapi/linux/lnet/lnet-dlc.h>
-#include <uapi/linux/lnet/lnet-types.h>
-#include <uapi/linux/lnet/lnetctl.h>
-#include <uapi/linux/lnet/nidstr.h>
-
-extern struct lnet the_lnet;	/* THE network */
-
-#if (BITS_PER_LONG == 32)
-/* 2 CPTs, allowing more CPTs might make us under memory pressure */
-#define LNET_CPT_MAX_BITS	1
-
-#else /* 64-bit system */
-/*
- * 256 CPTs for thousands of CPUs, allowing more CPTs might make us
- * under risk of consuming all lh_cookie.
- */
-#define LNET_CPT_MAX_BITS	8
-#endif /* BITS_PER_LONG == 32 */
-
-/* max allowed CPT number */
-#define LNET_CPT_MAX		(1 << LNET_CPT_MAX_BITS)
-
-#define LNET_CPT_NUMBER		(the_lnet.ln_cpt_number)
-#define LNET_CPT_BITS		(the_lnet.ln_cpt_bits)
-#define LNET_CPT_MASK		((1ULL << LNET_CPT_BITS) - 1)
-
-/** exclusive lock */
-#define LNET_LOCK_EX		CFS_PERCPT_LOCK_EX
-
-/* need both kernel and user-land acceptor */
-#define LNET_ACCEPTOR_MIN_RESERVED_PORT    512
-#define LNET_ACCEPTOR_MAX_RESERVED_PORT    1023
-
-static inline int lnet_is_route_alive(struct lnet_route *route)
-{
-	/* gateway is down */
-	if (!route->lr_gateway->lp_alive)
-		return 0;
-	/* no NI status, assume it's alive */
-	if ((route->lr_gateway->lp_ping_feats &
-	     LNET_PING_FEAT_NI_STATUS) == 0)
-		return 1;
-	/* has NI status, check # down NIs */
-	return route->lr_downis == 0;
-}
-
-static inline int lnet_is_wire_handle_none(struct lnet_handle_wire *wh)
-{
-	return (wh->wh_interface_cookie == LNET_WIRE_HANDLE_COOKIE_NONE &&
-		wh->wh_object_cookie == LNET_WIRE_HANDLE_COOKIE_NONE);
-}
-
-static inline int lnet_md_exhausted(struct lnet_libmd *md)
-{
-	return (!md->md_threshold ||
-		((md->md_options & LNET_MD_MAX_SIZE) &&
-		 md->md_offset + md->md_max_size > md->md_length));
-}
-
-static inline int lnet_md_unlinkable(struct lnet_libmd *md)
-{
-	/*
-	 * Should unlink md when its refcount is 0 and either:
-	 *  - md has been flagged for deletion (by auto unlink or
-	 *    LNetM[DE]Unlink, in the latter case md may not be exhausted).
-	 *  - auto unlink is on and md is exhausted.
-	 */
-	if (md->md_refcount)
-		return 0;
-
-	if (md->md_flags & LNET_MD_FLAG_ZOMBIE)
-		return 1;
-
-	return ((md->md_flags & LNET_MD_FLAG_AUTO_UNLINK) &&
-		lnet_md_exhausted(md));
-}
-
-#define lnet_cpt_table()	(the_lnet.ln_cpt_table)
-#define lnet_cpt_current()	cfs_cpt_current(the_lnet.ln_cpt_table, 1)
-
-static inline int
-lnet_cpt_of_cookie(__u64 cookie)
-{
-	unsigned int cpt = (cookie >> LNET_COOKIE_TYPE_BITS) & LNET_CPT_MASK;
-
-	/*
-	 * LNET_CPT_NUMBER doesn't have to be power2, which means we can
-	 * get illegal cpt from it's invalid cookie
-	 */
-	return cpt < LNET_CPT_NUMBER ? cpt : cpt % LNET_CPT_NUMBER;
-}
-
-static inline void
-lnet_res_lock(int cpt)
-{
-	cfs_percpt_lock(the_lnet.ln_res_lock, cpt);
-}
-
-static inline void
-lnet_res_unlock(int cpt)
-{
-	cfs_percpt_unlock(the_lnet.ln_res_lock, cpt);
-}
-
-static inline int
-lnet_res_lock_current(void)
-{
-	int cpt = lnet_cpt_current();
-
-	lnet_res_lock(cpt);
-	return cpt;
-}
-
-static inline void
-lnet_net_lock(int cpt)
-{
-	cfs_percpt_lock(the_lnet.ln_net_lock, cpt);
-}
-
-static inline void
-lnet_net_unlock(int cpt)
-{
-	cfs_percpt_unlock(the_lnet.ln_net_lock, cpt);
-}
-
-static inline int
-lnet_net_lock_current(void)
-{
-	int cpt = lnet_cpt_current();
-
-	lnet_net_lock(cpt);
-	return cpt;
-}
-
-#define LNET_LOCK()		lnet_net_lock(LNET_LOCK_EX)
-#define LNET_UNLOCK()		lnet_net_unlock(LNET_LOCK_EX)
-
-#define lnet_ptl_lock(ptl)	spin_lock(&(ptl)->ptl_lock)
-#define lnet_ptl_unlock(ptl)	spin_unlock(&(ptl)->ptl_lock)
-#define lnet_eq_wait_lock()	spin_lock(&the_lnet.ln_eq_wait_lock)
-#define lnet_eq_wait_unlock()	spin_unlock(&the_lnet.ln_eq_wait_lock)
-#define lnet_ni_lock(ni)	spin_lock(&(ni)->ni_lock)
-#define lnet_ni_unlock(ni)	spin_unlock(&(ni)->ni_lock)
-
-#define MAX_PORTALS		64
-
-static inline struct lnet_libmd *
-lnet_md_alloc(struct lnet_md *umd)
-{
-	struct lnet_libmd *md;
-	unsigned int size;
-	unsigned int niov;
-
-	if (umd->options & LNET_MD_KIOV) {
-		niov = umd->length;
-		size = offsetof(struct lnet_libmd, md_iov.kiov[niov]);
-	} else {
-		niov = umd->options & LNET_MD_IOVEC ? umd->length : 1;
-		size = offsetof(struct lnet_libmd, md_iov.iov[niov]);
-	}
-
-	md = kzalloc(size, GFP_NOFS);
-
-	if (md) {
-		/* Set here in case of early free */
-		md->md_options = umd->options;
-		md->md_niov = niov;
-		INIT_LIST_HEAD(&md->md_list);
-	}
-
-	return md;
-}
-
-struct lnet_libhandle *lnet_res_lh_lookup(struct lnet_res_container *rec,
-					  __u64 cookie);
-void lnet_res_lh_initialize(struct lnet_res_container *rec,
-			    struct lnet_libhandle *lh);
-static inline void
-lnet_res_lh_invalidate(struct lnet_libhandle *lh)
-{
-	/* NB: cookie is still useful, don't reset it */
-	list_del(&lh->lh_hash_chain);
-}
-
-static inline void
-lnet_eq2handle(struct lnet_handle_eq *handle, struct lnet_eq *eq)
-{
-	if (!eq) {
-		LNetInvalidateEQHandle(handle);
-		return;
-	}
-
-	handle->cookie = eq->eq_lh.lh_cookie;
-}
-
-static inline struct lnet_eq *
-lnet_handle2eq(struct lnet_handle_eq *handle)
-{
-	struct lnet_libhandle *lh;
-
-	lh = lnet_res_lh_lookup(&the_lnet.ln_eq_container, handle->cookie);
-	if (!lh)
-		return NULL;
-
-	return lh_entry(lh, struct lnet_eq, eq_lh);
-}
-
-static inline void
-lnet_md2handle(struct lnet_handle_md *handle, struct lnet_libmd *md)
-{
-	handle->cookie = md->md_lh.lh_cookie;
-}
-
-static inline struct lnet_libmd *
-lnet_handle2md(struct lnet_handle_md *handle)
-{
-	/* ALWAYS called with resource lock held */
-	struct lnet_libhandle *lh;
-	int cpt;
-
-	cpt = lnet_cpt_of_cookie(handle->cookie);
-	lh = lnet_res_lh_lookup(the_lnet.ln_md_containers[cpt],
-				handle->cookie);
-	if (!lh)
-		return NULL;
-
-	return lh_entry(lh, struct lnet_libmd, md_lh);
-}
-
-static inline struct lnet_libmd *
-lnet_wire_handle2md(struct lnet_handle_wire *wh)
-{
-	/* ALWAYS called with resource lock held */
-	struct lnet_libhandle *lh;
-	int cpt;
-
-	if (wh->wh_interface_cookie != the_lnet.ln_interface_cookie)
-		return NULL;
-
-	cpt = lnet_cpt_of_cookie(wh->wh_object_cookie);
-	lh = lnet_res_lh_lookup(the_lnet.ln_md_containers[cpt],
-				wh->wh_object_cookie);
-	if (!lh)
-		return NULL;
-
-	return lh_entry(lh, struct lnet_libmd, md_lh);
-}
-
-static inline void
-lnet_me2handle(struct lnet_handle_me *handle, struct lnet_me *me)
-{
-	handle->cookie = me->me_lh.lh_cookie;
-}
-
-static inline struct lnet_me *
-lnet_handle2me(struct lnet_handle_me *handle)
-{
-	/* ALWAYS called with resource lock held */
-	struct lnet_libhandle *lh;
-	int cpt;
-
-	cpt = lnet_cpt_of_cookie(handle->cookie);
-	lh = lnet_res_lh_lookup(the_lnet.ln_me_containers[cpt],
-				handle->cookie);
-	if (!lh)
-		return NULL;
-
-	return lh_entry(lh, struct lnet_me, me_lh);
-}
-
-static inline void
-lnet_peer_addref_locked(struct lnet_peer *lp)
-{
-	LASSERT(lp->lp_refcount > 0);
-	lp->lp_refcount++;
-}
-
-void lnet_destroy_peer_locked(struct lnet_peer *lp);
-
-static inline void
-lnet_peer_decref_locked(struct lnet_peer *lp)
-{
-	LASSERT(lp->lp_refcount > 0);
-	lp->lp_refcount--;
-	if (!lp->lp_refcount)
-		lnet_destroy_peer_locked(lp);
-}
-
-static inline int
-lnet_isrouter(struct lnet_peer *lp)
-{
-	return lp->lp_rtr_refcount ? 1 : 0;
-}
-
-static inline void
-lnet_ni_addref_locked(struct lnet_ni *ni, int cpt)
-{
-	LASSERT(cpt >= 0 && cpt < LNET_CPT_NUMBER);
-	LASSERT(*ni->ni_refs[cpt] >= 0);
-
-	(*ni->ni_refs[cpt])++;
-}
-
-static inline void
-lnet_ni_addref(struct lnet_ni *ni)
-{
-	lnet_net_lock(0);
-	lnet_ni_addref_locked(ni, 0);
-	lnet_net_unlock(0);
-}
-
-static inline void
-lnet_ni_decref_locked(struct lnet_ni *ni, int cpt)
-{
-	LASSERT(cpt >= 0 && cpt < LNET_CPT_NUMBER);
-	LASSERT(*ni->ni_refs[cpt] > 0);
-
-	(*ni->ni_refs[cpt])--;
-}
-
-static inline void
-lnet_ni_decref(struct lnet_ni *ni)
-{
-	lnet_net_lock(0);
-	lnet_ni_decref_locked(ni, 0);
-	lnet_net_unlock(0);
-}
-
-void lnet_ni_free(struct lnet_ni *ni);
-struct lnet_ni *
-lnet_ni_alloc(__u32 net, struct cfs_expr_list *el, struct list_head *nilist);
-
-static inline int
-lnet_nid2peerhash(lnet_nid_t nid)
-{
-	return hash_long(nid, LNET_PEER_HASH_BITS);
-}
-
-static inline struct list_head *
-lnet_net2rnethash(__u32 net)
-{
-	return &the_lnet.ln_remote_nets_hash[(LNET_NETNUM(net) +
-		LNET_NETTYP(net)) &
-		((1U << the_lnet.ln_remote_nets_hbits) - 1)];
-}
-
-extern struct lnet_lnd the_lolnd;
-extern int avoid_asym_router_failure;
-
-int lnet_cpt_of_nid_locked(lnet_nid_t nid);
-int lnet_cpt_of_nid(lnet_nid_t nid);
-struct lnet_ni *lnet_nid2ni_locked(lnet_nid_t nid, int cpt);
-struct lnet_ni *lnet_net2ni_locked(__u32 net, int cpt);
-struct lnet_ni *lnet_net2ni(__u32 net);
-
-extern int portal_rotor;
-
-int lnet_lib_init(void);
-void lnet_lib_exit(void);
-
-int lnet_notify(struct lnet_ni *ni, lnet_nid_t peer, int alive,
-		unsigned long when);
-void lnet_notify_locked(struct lnet_peer *lp, int notifylnd, int alive,
-			unsigned long when);
-int lnet_add_route(__u32 net, __u32 hops, lnet_nid_t gateway_nid,
-		   unsigned int priority);
-int lnet_check_routes(void);
-int lnet_del_route(__u32 net, lnet_nid_t gw_nid);
-void lnet_destroy_routes(void);
-int lnet_get_route(int idx, __u32 *net, __u32 *hops,
-		   lnet_nid_t *gateway, __u32 *alive, __u32 *priority);
-int lnet_get_rtr_pool_cfg(int idx, struct lnet_ioctl_pool_cfg *pool_cfg);
-
-void lnet_router_debugfs_init(void);
-void lnet_router_debugfs_fini(void);
-int  lnet_rtrpools_alloc(int im_a_router);
-void lnet_destroy_rtrbuf(struct lnet_rtrbuf *rb, int npages);
-int lnet_rtrpools_adjust(int tiny, int small, int large);
-int lnet_rtrpools_enable(void);
-void lnet_rtrpools_disable(void);
-void lnet_rtrpools_free(int keep_pools);
-struct lnet_remotenet *lnet_find_net_locked(__u32 net);
-int lnet_dyn_add_ni(lnet_pid_t requested_pid,
-		    struct lnet_ioctl_config_data *conf);
-int lnet_dyn_del_ni(__u32 net);
-int lnet_clear_lazy_portal(struct lnet_ni *ni, int portal, char *reason);
-
-int lnet_islocalnid(lnet_nid_t nid);
-int lnet_islocalnet(__u32 net);
-
-void lnet_msg_attach_md(struct lnet_msg *msg, struct lnet_libmd *md,
-			unsigned int offset, unsigned int mlen);
-void lnet_msg_detach_md(struct lnet_msg *msg, int status);
-void lnet_build_unlink_event(struct lnet_libmd *md, struct lnet_event *ev);
-void lnet_build_msg_event(struct lnet_msg *msg, enum lnet_event_kind ev_type);
-void lnet_msg_commit(struct lnet_msg *msg, int cpt);
-void lnet_msg_decommit(struct lnet_msg *msg, int cpt, int status);
-
-void lnet_eq_enqueue_event(struct lnet_eq *eq, struct lnet_event *ev);
-void lnet_prep_send(struct lnet_msg *msg, int type,
-		    struct lnet_process_id target, unsigned int offset,
-		    unsigned int len);
-int lnet_send(lnet_nid_t nid, struct lnet_msg *msg, lnet_nid_t rtr_nid);
-void lnet_return_tx_credits_locked(struct lnet_msg *msg);
-void lnet_return_rx_credits_locked(struct lnet_msg *msg);
-void lnet_schedule_blocked_locked(struct lnet_rtrbufpool *rbp);
-void lnet_drop_routed_msgs_locked(struct list_head *list, int cpt);
-
-/* portals functions */
-/* portals attributes */
-static inline int
-lnet_ptl_is_lazy(struct lnet_portal *ptl)
-{
-	return !!(ptl->ptl_options & LNET_PTL_LAZY);
-}
-
-static inline int
-lnet_ptl_is_unique(struct lnet_portal *ptl)
-{
-	return !!(ptl->ptl_options & LNET_PTL_MATCH_UNIQUE);
-}
-
-static inline int
-lnet_ptl_is_wildcard(struct lnet_portal *ptl)
-{
-	return !!(ptl->ptl_options & LNET_PTL_MATCH_WILDCARD);
-}
-
-static inline void
-lnet_ptl_setopt(struct lnet_portal *ptl, int opt)
-{
-	ptl->ptl_options |= opt;
-}
-
-static inline void
-lnet_ptl_unsetopt(struct lnet_portal *ptl, int opt)
-{
-	ptl->ptl_options &= ~opt;
-}
-
-/* match-table functions */
-struct list_head *lnet_mt_match_head(struct lnet_match_table *mtable,
-				     struct lnet_process_id id, __u64 mbits);
-struct lnet_match_table *lnet_mt_of_attach(unsigned int index,
-					   struct lnet_process_id id,
-					   __u64 mbits, __u64 ignore_bits,
-					   enum lnet_ins_pos pos);
-int lnet_mt_match_md(struct lnet_match_table *mtable,
-		     struct lnet_match_info *info, struct lnet_msg *msg);
-
-/* portals match/attach functions */
-void lnet_ptl_attach_md(struct lnet_me *me, struct lnet_libmd *md,
-			struct list_head *matches, struct list_head *drops);
-void lnet_ptl_detach_md(struct lnet_me *me, struct lnet_libmd *md);
-int lnet_ptl_match_md(struct lnet_match_info *info, struct lnet_msg *msg);
-
-/* initialized and finalize portals */
-int lnet_portals_create(void);
-void lnet_portals_destroy(void);
-
-/* message functions */
-int lnet_parse(struct lnet_ni *ni, struct lnet_hdr *hdr,
-	       lnet_nid_t fromnid, void *private, int rdma_req);
-int lnet_parse_local(struct lnet_ni *ni, struct lnet_msg *msg);
-int lnet_parse_forward_locked(struct lnet_ni *ni, struct lnet_msg *msg);
-
-void lnet_recv(struct lnet_ni *ni, void *private, struct lnet_msg *msg,
-	       int delayed, unsigned int offset, unsigned int mlen,
-	       unsigned int rlen);
-void lnet_ni_recv(struct lnet_ni *ni, void *private, struct lnet_msg *msg,
-		  int delayed, unsigned int offset,
-		  unsigned int mlen, unsigned int rlen);
-
-struct lnet_msg *lnet_create_reply_msg(struct lnet_ni *ni,
-				       struct lnet_msg *get_msg);
-void lnet_set_reply_msg_len(struct lnet_ni *ni, struct lnet_msg *msg,
-			    unsigned int len);
-
-void lnet_finalize(struct lnet_ni *ni, struct lnet_msg *msg, int rc);
-
-void lnet_drop_message(struct lnet_ni *ni, int cpt, void *private,
-		       unsigned int nob);
-void lnet_drop_delayed_msg_list(struct list_head *head, char *reason);
-void lnet_recv_delayed_msg_list(struct list_head *head);
-
-int lnet_msg_container_setup(struct lnet_msg_container *container, int cpt);
-void lnet_msg_container_cleanup(struct lnet_msg_container *container);
-void lnet_msg_containers_destroy(void);
-int lnet_msg_containers_create(void);
-
-char *lnet_msgtyp2str(int type);
-void lnet_print_hdr(struct lnet_hdr *hdr);
-int lnet_fail_nid(lnet_nid_t nid, unsigned int threshold);
-
-/** \addtogroup lnet_fault_simulation @{ */
-
-int lnet_fault_ctl(int cmd, struct libcfs_ioctl_data *data);
-int lnet_fault_init(void);
-void lnet_fault_fini(void);
-
-bool lnet_drop_rule_match(struct lnet_hdr *hdr);
-
-int lnet_delay_rule_add(struct lnet_fault_attr *attr);
-int lnet_delay_rule_del(lnet_nid_t src, lnet_nid_t dst, bool shutdown);
-int lnet_delay_rule_list(int pos, struct lnet_fault_attr *attr,
-			 struct lnet_fault_stat *stat);
-void lnet_delay_rule_reset(void);
-void lnet_delay_rule_check(void);
-bool lnet_delay_rule_match_locked(struct lnet_hdr *hdr, struct lnet_msg *msg);
-
-/** @} lnet_fault_simulation */
-
-void lnet_counters_get(struct lnet_counters *counters);
-void lnet_counters_reset(void);
-
-unsigned int lnet_iov_nob(unsigned int niov, struct kvec *iov);
-int lnet_extract_iov(int dst_niov, struct kvec *dst,
-		     int src_niov, const struct kvec *src,
-		      unsigned int offset, unsigned int len);
-
-unsigned int lnet_kiov_nob(unsigned int niov, struct bio_vec *iov);
-int lnet_extract_kiov(int dst_niov, struct bio_vec *dst,
-		      int src_niov, const struct bio_vec *src,
-		      unsigned int offset, unsigned int len);
-
-void lnet_copy_iov2iter(struct iov_iter *to,
-			unsigned int nsiov, const struct kvec *siov,
-			unsigned int soffset, unsigned int nob);
-void lnet_copy_kiov2iter(struct iov_iter *to,
-			 unsigned int nkiov, const struct bio_vec *kiov,
-			 unsigned int kiovoffset, unsigned int nob);
-
-void lnet_me_unlink(struct lnet_me *me);
-
-void lnet_md_unlink(struct lnet_libmd *md);
-void lnet_md_deconstruct(struct lnet_libmd *lmd, struct lnet_md *umd);
-
-void lnet_register_lnd(struct lnet_lnd *lnd);
-void lnet_unregister_lnd(struct lnet_lnd *lnd);
-
-int lnet_connect(struct socket **sockp, lnet_nid_t peer_nid,
-		 __u32 local_ip, __u32 peer_ip, int peer_port);
-void lnet_connect_console_error(int rc, lnet_nid_t peer_nid,
-				__u32 peer_ip, int port);
-int lnet_count_acceptor_nis(void);
-int lnet_acceptor_timeout(void);
-int lnet_acceptor_port(void);
-
-int lnet_count_acceptor_nis(void);
-int lnet_acceptor_port(void);
-
-int lnet_acceptor_start(void);
-void lnet_acceptor_stop(void);
-
-int lnet_ipif_query(char *name, int *up, __u32 *ip, __u32 *mask);
-int lnet_ipif_enumerate(char ***names);
-void lnet_ipif_free_enumeration(char **names, int n);
-int lnet_sock_setbuf(struct socket *socket, int txbufsize, int rxbufsize);
-int lnet_sock_getbuf(struct socket *socket, int *txbufsize, int *rxbufsize);
-int lnet_sock_getaddr(struct socket *socket, bool remote, __u32 *ip, int *port);
-int lnet_sock_write(struct socket *sock, void *buffer, int nob, int timeout);
-int lnet_sock_read(struct socket *sock, void *buffer, int nob, int timeout);
-
-int lnet_sock_listen(struct socket **sockp, __u32 ip, int port, int backlog);
-int lnet_sock_accept(struct socket **newsockp, struct socket *sock);
-int lnet_sock_connect(struct socket **sockp, int *fatal,
-		      __u32 local_ip, int local_port,
-		      __u32 peer_ip, int peer_port);
-void libcfs_sock_release(struct socket *sock);
-
-int lnet_peers_start_down(void);
-int lnet_peer_buffer_credits(struct lnet_ni *ni);
-
-int lnet_router_checker_start(void);
-void lnet_router_checker_stop(void);
-void lnet_router_ni_update_locked(struct lnet_peer *gw, __u32 net);
-void lnet_swap_pinginfo(struct lnet_ping_info *info);
-
-int lnet_parse_ip2nets(char **networksp, char *ip2nets);
-int lnet_parse_routes(char *route_str, int *im_a_router);
-int lnet_parse_networks(struct list_head *nilist, char *networks);
-int lnet_net_unique(__u32 net, struct list_head *nilist);
-
-int lnet_nid2peer_locked(struct lnet_peer **lpp, lnet_nid_t nid, int cpt);
-struct lnet_peer *lnet_find_peer_locked(struct lnet_peer_table *ptable,
-					lnet_nid_t nid);
-void lnet_peer_tables_cleanup(struct lnet_ni *ni);
-void lnet_peer_tables_destroy(void);
-int lnet_peer_tables_create(void);
-void lnet_debug_peer(lnet_nid_t nid);
-int lnet_get_peer_info(__u32 peer_index, __u64 *nid,
-		       char alivness[LNET_MAX_STR_LEN],
-		       __u32 *cpt_iter, __u32 *refcount,
-		       __u32 *ni_peer_tx_credits, __u32 *peer_tx_credits,
-		       __u32 *peer_rtr_credits, __u32 *peer_min_rtr_credtis,
-		       __u32 *peer_tx_qnob);
-
-static inline void
-lnet_peer_set_alive(struct lnet_peer *lp)
-{
-	lp->lp_last_query = jiffies;
-	lp->lp_last_alive = jiffies;
-	if (!lp->lp_alive)
-		lnet_notify_locked(lp, 0, 1, lp->lp_last_alive);
-}
-
-#endif

+ 0 - 666
drivers/staging/lustre/include/linux/lnet/lib-types.h

@@ -1,666 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Seagate, Inc.
- *
- * lnet/include/lnet/lib-types.h
- */
-
-#ifndef __LNET_LIB_TYPES_H__
-#define __LNET_LIB_TYPES_H__
-
-#include <linux/kthread.h>
-#include <linux/uio.h>
-#include <linux/types.h>
-#include <linux/completion.h>
-
-#include <uapi/linux/lnet/lnet-types.h>
-#include <uapi/linux/lnet/lnetctl.h>
-
-/* Max payload size */
-#define LNET_MAX_PAYLOAD      CONFIG_LNET_MAX_PAYLOAD
-#if (LNET_MAX_PAYLOAD < LNET_MTU)
-# error "LNET_MAX_PAYLOAD too small - error in configure --with-max-payload-mb"
-#elif (LNET_MAX_PAYLOAD > (PAGE_SIZE * LNET_MAX_IOV))
-# error "LNET_MAX_PAYLOAD too large - error in configure --with-max-payload-mb"
-#endif
-
-/* forward refs */
-struct lnet_libmd;
-
-struct lnet_msg {
-	struct list_head	msg_activelist;
-	struct list_head	msg_list;	   /* Q for credits/MD */
-
-	struct lnet_process_id	msg_target;
-	/* where is it from, it's only for building event */
-	lnet_nid_t		msg_from;
-	__u32			msg_type;
-
-	/* committed for sending */
-	unsigned int		msg_tx_committed:1;
-	/* CPT # this message committed for sending */
-	unsigned int		msg_tx_cpt:15;
-	/* committed for receiving */
-	unsigned int		msg_rx_committed:1;
-	/* CPT # this message committed for receiving */
-	unsigned int		msg_rx_cpt:15;
-	/* queued for tx credit */
-	unsigned int		msg_tx_delayed:1;
-	/* queued for RX buffer */
-	unsigned int		msg_rx_delayed:1;
-	/* ready for pending on RX delay list */
-	unsigned int		msg_rx_ready_delay:1;
-
-	unsigned int	msg_vmflush:1;		/* VM trying to free memory */
-	unsigned int	msg_target_is_router:1; /* sending to a router */
-	unsigned int	msg_routing:1;		/* being forwarded */
-	unsigned int	msg_ack:1;		/* ack on finalize (PUT) */
-	unsigned int	msg_sending:1;		/* outgoing message */
-	unsigned int	msg_receiving:1;	/* being received */
-	unsigned int	msg_txcredit:1;		/* taken an NI send credit */
-	unsigned int	msg_peertxcredit:1;	/* taken a peer send credit */
-	unsigned int	msg_rtrcredit:1;	/* taken a global router credit */
-	unsigned int	msg_peerrtrcredit:1;	/* taken a peer router credit */
-	unsigned int	msg_onactivelist:1;	/* on the activelist */
-	unsigned int	msg_rdma_get:1;
-
-	struct lnet_peer	*msg_txpeer;	 /* peer I'm sending to */
-	struct lnet_peer	*msg_rxpeer;	 /* peer I received from */
-
-	void			*msg_private;
-	struct lnet_libmd	*msg_md;
-
-	unsigned int		 msg_len;
-	unsigned int		 msg_wanted;
-	unsigned int		 msg_offset;
-	unsigned int		 msg_niov;
-	struct kvec		*msg_iov;
-	struct bio_vec		*msg_kiov;
-
-	struct lnet_event	 msg_ev;
-	struct lnet_hdr		 msg_hdr;
-};
-
-struct lnet_libhandle {
-	struct list_head	lh_hash_chain;
-	__u64			lh_cookie;
-};
-
-#define lh_entry(ptr, type, member) \
-	((type *)((char *)(ptr) - (char *)(&((type *)0)->member)))
-
-struct lnet_eq {
-	struct list_head	  eq_list;
-	struct lnet_libhandle	  eq_lh;
-	unsigned long		  eq_enq_seq;
-	unsigned long		  eq_deq_seq;
-	unsigned int		  eq_size;
-	lnet_eq_handler_t	  eq_callback;
-	struct lnet_event	 *eq_events;
-	int			**eq_refs;	/* percpt refcount for EQ */
-};
-
-struct lnet_me {
-	struct list_head	 me_list;
-	struct lnet_libhandle	 me_lh;
-	struct lnet_process_id	 me_match_id;
-	unsigned int		 me_portal;
-	unsigned int		 me_pos;	/* hash offset in mt_hash */
-	__u64			 me_match_bits;
-	__u64			 me_ignore_bits;
-	enum lnet_unlink	 me_unlink;
-	struct lnet_libmd	*me_md;
-};
-
-struct lnet_libmd {
-	struct list_head	 md_list;
-	struct lnet_libhandle	 md_lh;
-	struct lnet_me		*md_me;
-	char			*md_start;
-	unsigned int		 md_offset;
-	unsigned int		 md_length;
-	unsigned int		 md_max_size;
-	int			 md_threshold;
-	int			 md_refcount;
-	unsigned int		 md_options;
-	unsigned int		 md_flags;
-	void			*md_user_ptr;
-	struct lnet_eq		*md_eq;
-	unsigned int		 md_niov;	/* # frags */
-	union {
-		struct kvec	iov[LNET_MAX_IOV];
-		struct bio_vec	kiov[LNET_MAX_IOV];
-	} md_iov;
-};
-
-#define LNET_MD_FLAG_ZOMBIE		BIT(0)
-#define LNET_MD_FLAG_AUTO_UNLINK	BIT(1)
-#define LNET_MD_FLAG_ABORTED		BIT(2)
-
-struct lnet_test_peer {
-	/* info about peers we are trying to fail */
-	struct list_head	tp_list;	/* ln_test_peers */
-	lnet_nid_t		tp_nid;		/* matching nid */
-	unsigned int		tp_threshold;	/* # failures to simulate */
-};
-
-#define LNET_COOKIE_TYPE_MD	1
-#define LNET_COOKIE_TYPE_ME	2
-#define LNET_COOKIE_TYPE_EQ	3
-#define LNET_COOKIE_TYPE_BITS	2
-#define LNET_COOKIE_MASK	((1ULL << LNET_COOKIE_TYPE_BITS) - 1ULL)
-
-struct lnet_ni;			/* forward ref */
-
-struct lnet_lnd {
-	/* fields managed by portals */
-	struct list_head	lnd_list;	/* stash in the LND table */
-	int			lnd_refcount;	/* # active instances */
-
-	/* fields initialised by the LND */
-	__u32			lnd_type;
-
-	int  (*lnd_startup)(struct lnet_ni *ni);
-	void (*lnd_shutdown)(struct lnet_ni *ni);
-	int  (*lnd_ctl)(struct lnet_ni *ni, unsigned int cmd, void *arg);
-
-	/*
-	 * In data movement APIs below, payload buffers are described as a set
-	 * of 'niov' fragments which are...
-	 * EITHER
-	 *    in virtual memory (struct iovec *iov != NULL)
-	 * OR
-	 *    in pages (kernel only: plt_kiov_t *kiov != NULL).
-	 * The LND may NOT overwrite these fragment descriptors.
-	 * An 'offset' and may specify a byte offset within the set of
-	 * fragments to start from
-	 */
-
-	/*
-	 * Start sending a preformatted message.  'private' is NULL for PUT and
-	 * GET messages; otherwise this is a response to an incoming message
-	 * and 'private' is the 'private' passed to lnet_parse().  Return
-	 * non-zero for immediate failure, otherwise complete later with
-	 * lnet_finalize()
-	 */
-	int (*lnd_send)(struct lnet_ni *ni, void *private,
-			struct lnet_msg *msg);
-
-	/*
-	 * Start receiving 'mlen' bytes of payload data, skipping the following
-	 * 'rlen' - 'mlen' bytes. 'private' is the 'private' passed to
-	 * lnet_parse().  Return non-zero for immediate failure, otherwise
-	 * complete later with lnet_finalize().  This also gives back a receive
-	 * credit if the LND does flow control.
-	 */
-	int (*lnd_recv)(struct lnet_ni *ni, void *private, struct lnet_msg *msg,
-			int delayed, struct iov_iter *to, unsigned int rlen);
-
-	/*
-	 * lnet_parse() has had to delay processing of this message
-	 * (e.g. waiting for a forwarding buffer or send credits).  Give the
-	 * LND a chance to free urgently needed resources.  If called, return 0
-	 * for success and do NOT give back a receive credit; that has to wait
-	 * until lnd_recv() gets called.  On failure return < 0 and
-	 * release resources; lnd_recv() will not be called.
-	 */
-	int (*lnd_eager_recv)(struct lnet_ni *ni, void *private,
-			      struct lnet_msg *msg, void **new_privatep);
-
-	/* notification of peer health */
-	void (*lnd_notify)(struct lnet_ni *ni, lnet_nid_t peer, int alive);
-
-	/* query of peer aliveness */
-	void (*lnd_query)(struct lnet_ni *ni, lnet_nid_t peer,
-			  unsigned long *when);
-
-	/* accept a new connection */
-	int (*lnd_accept)(struct lnet_ni *ni, struct socket *sock);
-};
-
-struct lnet_tx_queue {
-	int			tq_credits;	/* # tx credits free */
-	int			tq_credits_min;	/* lowest it's been */
-	int			tq_credits_max;	/* total # tx credits */
-	struct list_head	tq_delayed;	/* delayed TXs */
-};
-
-struct lnet_ni {
-	spinlock_t		  ni_lock;
-	struct list_head	  ni_list;	/* chain on ln_nis */
-	struct list_head	  ni_cptlist;	/* chain on ln_nis_cpt */
-	int			  ni_maxtxcredits; /* # tx credits  */
-	/* # per-peer send credits */
-	int			  ni_peertxcredits;
-	/* # per-peer router buffer credits */
-	int			  ni_peerrtrcredits;
-	/* seconds to consider peer dead */
-	int			  ni_peertimeout;
-	int			  ni_ncpts;	/* number of CPTs */
-	__u32			 *ni_cpts;	/* bond NI on some CPTs */
-	lnet_nid_t		  ni_nid;	/* interface's NID */
-	void			 *ni_data;	/* instance-specific data */
-	struct lnet_lnd		 *ni_lnd;	/* procedural interface */
-	struct lnet_tx_queue	**ni_tx_queues;	/* percpt TX queues */
-	int			**ni_refs;	/* percpt reference count */
-	time64_t		  ni_last_alive;/* when I was last alive */
-	struct lnet_ni_status	 *ni_status;	/* my health status */
-	/* per NI LND tunables */
-	struct lnet_ioctl_config_lnd_tunables *ni_lnd_tunables;
-	/* equivalent interfaces to use */
-	char			 *ni_interfaces[LNET_MAX_INTERFACES];
-	/* original net namespace */
-	struct net		 *ni_net_ns;
-};
-
-#define LNET_PROTO_PING_MATCHBITS	0x8000000000000000LL
-
-/*
- * NB: value of these features equal to LNET_PROTO_PING_VERSION_x
- * of old LNet, so there shouldn't be any compatibility issue
- */
-#define LNET_PING_FEAT_INVAL		(0)		/* no feature */
-#define LNET_PING_FEAT_BASE		BIT(0)	/* just a ping */
-#define LNET_PING_FEAT_NI_STATUS	BIT(1)	/* return NI status */
-#define LNET_PING_FEAT_RTE_DISABLED	BIT(2)	/* Routing enabled */
-
-#define LNET_PING_FEAT_MASK		(LNET_PING_FEAT_BASE | \
-					 LNET_PING_FEAT_NI_STATUS)
-
-/* router checker data, per router */
-#define LNET_MAX_RTR_NIS   16
-#define LNET_PINGINFO_SIZE offsetof(struct lnet_ping_info, pi_ni[LNET_MAX_RTR_NIS])
-struct lnet_rc_data {
-	/* chain on the_lnet.ln_zombie_rcd or ln_deathrow_rcd */
-	struct list_head	 rcd_list;
-	struct lnet_handle_md	 rcd_mdh;	/* ping buffer MD */
-	struct lnet_peer	*rcd_gateway;	/* reference to gateway */
-	struct lnet_ping_info	*rcd_pinginfo;	/* ping buffer */
-};
-
-struct lnet_peer {
-	struct list_head	 lp_hashlist;	/* chain on peer hash */
-	struct list_head	 lp_txq;	/* messages blocking for
-						 * tx credits
-						 */
-	struct list_head	 lp_rtrq;	/* messages blocking for
-						 * router credits
-						 */
-	struct list_head	 lp_rtr_list;	/* chain on router list */
-	int			 lp_txcredits;	/* # tx credits available */
-	int			 lp_mintxcredits;  /* low water mark */
-	int			 lp_rtrcredits;	   /* # router credits */
-	int			 lp_minrtrcredits; /* low water mark */
-	unsigned int		 lp_alive:1;	   /* alive/dead? */
-	unsigned int		 lp_notify:1;	/* notification outstanding? */
-	unsigned int		 lp_notifylnd:1;/* outstanding notification
-						 * for LND?
-						 */
-	unsigned int		 lp_notifying:1; /* some thread is handling
-						  * notification
-						  */
-	unsigned int		 lp_ping_notsent;/* SEND event outstanding
-						  * from ping
-						  */
-	int			 lp_alive_count; /* # times router went
-						  * dead<->alive
-						  */
-	long			 lp_txqnob;	 /* ytes queued for sending */
-	unsigned long		 lp_timestamp;	 /* time of last aliveness
-						  * news
-						  */
-	unsigned long		 lp_ping_timestamp;/* time of last ping
-						    * attempt
-						    */
-	unsigned long		 lp_ping_deadline; /* != 0 if ping reply
-						    * expected
-						    */
-	unsigned long		 lp_last_alive;	/* when I was last alive */
-	unsigned long		 lp_last_query;	/* when lp_ni was queried
-						 * last time
-						 */
-	struct lnet_ni		*lp_ni;		/* interface peer is on */
-	lnet_nid_t		 lp_nid;	/* peer's NID */
-	int			 lp_refcount;	/* # refs */
-	int			 lp_cpt;	/* CPT this peer attached on */
-	/* # refs from lnet_route::lr_gateway */
-	int			 lp_rtr_refcount;
-	/* returned RC ping features */
-	unsigned int		 lp_ping_feats;
-	struct list_head	 lp_routes;	/* routers on this peer */
-	struct lnet_rc_data	*lp_rcd;	/* router checker state */
-};
-
-/* peer hash size */
-#define LNET_PEER_HASH_BITS	9
-#define LNET_PEER_HASH_SIZE	(1 << LNET_PEER_HASH_BITS)
-
-/* peer hash table */
-struct lnet_peer_table {
-	int			 pt_version;	/* /proc validity stamp */
-	int			 pt_number;	/* # peers extant */
-	/* # zombies to go to deathrow (and not there yet) */
-	int			 pt_zombies;
-	struct list_head	 pt_deathrow;	/* zombie peers */
-	struct list_head	*pt_hash;	/* NID->peer hash */
-};
-
-/*
- * peer aliveness is enabled only on routers for peers in a network where the
- * lnet_ni::ni_peertimeout has been set to a positive value
- */
-#define lnet_peer_aliveness_enabled(lp) (the_lnet.ln_routing && \
-					 (lp)->lp_ni->ni_peertimeout > 0)
-
-struct lnet_route {
-	struct list_head	 lr_list;	/* chain on net */
-	struct list_head	 lr_gwlist;	/* chain on gateway */
-	struct lnet_peer	*lr_gateway;	/* router node */
-	__u32			 lr_net;	/* remote network number */
-	int			 lr_seq;	/* sequence for round-robin */
-	unsigned int		 lr_downis;	/* number of down NIs */
-	__u32			 lr_hops;	/* how far I am */
-	unsigned int             lr_priority;	/* route priority */
-};
-
-#define LNET_REMOTE_NETS_HASH_DEFAULT	(1U << 7)
-#define LNET_REMOTE_NETS_HASH_MAX	(1U << 16)
-#define LNET_REMOTE_NETS_HASH_SIZE	(1 << the_lnet.ln_remote_nets_hbits)
-
-struct lnet_remotenet {
-	struct list_head	lrn_list;	/* chain on
-						 * ln_remote_nets_hash
-						 */
-	struct list_head	lrn_routes;	/* routes to me */
-	__u32			lrn_net;	/* my net number */
-};
-
-/** lnet message has credit and can be submitted to lnd for send/receive */
-#define LNET_CREDIT_OK		0
-/** lnet message is waiting for credit */
-#define LNET_CREDIT_WAIT	1
-
-struct lnet_rtrbufpool {
-	struct list_head	rbp_bufs;	/* my free buffer pool */
-	struct list_head	rbp_msgs;	/* messages blocking
-						 * for a buffer
-						 */
-	int			rbp_npages;	/* # pages in each buffer */
-	/* requested number of buffers */
-	int			rbp_req_nbuffers;
-	/* # buffers actually allocated */
-	int			rbp_nbuffers;
-	int			rbp_credits;	/* # free buffers
-						 * blocked messages
-						 */
-	int			rbp_mincredits;	/* low water mark */
-};
-
-struct lnet_rtrbuf {
-	struct list_head	 rb_list;	/* chain on rbp_bufs */
-	struct lnet_rtrbufpool	*rb_pool;	/* owning pool */
-	struct bio_vec		 rb_kiov[0];	/* the buffer space */
-};
-
-#define LNET_PEER_HASHSIZE	503	/* prime! */
-
-#define LNET_TINY_BUF_IDX	0
-#define LNET_SMALL_BUF_IDX	1
-#define LNET_LARGE_BUF_IDX	2
-
-/* # different router buffer pools */
-#define LNET_NRBPOOLS		(LNET_LARGE_BUF_IDX + 1)
-
-enum lnet_match_flags {
-	/* Didn't match anything */
-	LNET_MATCHMD_NONE	= BIT(0),
-	/* Matched OK */
-	LNET_MATCHMD_OK		= BIT(1),
-	/* Must be discarded */
-	LNET_MATCHMD_DROP	= BIT(2),
-	/* match and buffer is exhausted */
-	LNET_MATCHMD_EXHAUSTED	= BIT(3),
-	/* match or drop */
-	LNET_MATCHMD_FINISH	= (LNET_MATCHMD_OK | LNET_MATCHMD_DROP),
-};
-
-/* Options for lnet_portal::ptl_options */
-#define LNET_PTL_LAZY		BIT(0)
-#define LNET_PTL_MATCH_UNIQUE	BIT(1)	/* unique match, for RDMA */
-#define LNET_PTL_MATCH_WILDCARD	BIT(2)	/* wildcard match, request portal */
-
-/* parameter for matching operations (GET, PUT) */
-struct lnet_match_info {
-	__u64			mi_mbits;
-	struct lnet_process_id	mi_id;
-	unsigned int		mi_opc;
-	unsigned int		mi_portal;
-	unsigned int		mi_rlength;
-	unsigned int		mi_roffset;
-};
-
-/* ME hash of RDMA portal */
-#define LNET_MT_HASH_BITS		8
-#define LNET_MT_HASH_SIZE		(1 << LNET_MT_HASH_BITS)
-#define LNET_MT_HASH_MASK		(LNET_MT_HASH_SIZE - 1)
-/*
- * we allocate (LNET_MT_HASH_SIZE + 1) entries for lnet_match_table::mt_hash,
- * the last entry is reserved for MEs with ignore-bits
- */
-#define LNET_MT_HASH_IGNORE		LNET_MT_HASH_SIZE
-/*
- * __u64 has 2^6 bits, so need 2^(LNET_MT_HASH_BITS - LNET_MT_BITS_U64) which
- * is 4 __u64s as bit-map, and add an extra __u64 (only use one bit) for the
- * ME-list with ignore-bits, which is mtable::mt_hash[LNET_MT_HASH_IGNORE]
- */
-#define LNET_MT_BITS_U64		6	/* 2^6 bits */
-#define LNET_MT_EXHAUSTED_BITS		(LNET_MT_HASH_BITS - LNET_MT_BITS_U64)
-#define LNET_MT_EXHAUSTED_BMAP		((1 << LNET_MT_EXHAUSTED_BITS) + 1)
-
-/* portal match table */
-struct lnet_match_table {
-	/* reserved for upcoming patches, CPU partition ID */
-	unsigned int		 mt_cpt;
-	unsigned int		 mt_portal;	/* portal index */
-	/*
-	 * match table is set as "enabled" if there's non-exhausted MD
-	 * attached on mt_mhash, it's only valid for wildcard portal
-	 */
-	unsigned int		 mt_enabled;
-	/* bitmap to flag whether MEs on mt_hash are exhausted or not */
-	__u64			 mt_exhausted[LNET_MT_EXHAUSTED_BMAP];
-	struct list_head	*mt_mhash;	/* matching hash */
-};
-
-/* these are only useful for wildcard portal */
-/* Turn off message rotor for wildcard portals */
-#define	LNET_PTL_ROTOR_OFF	0
-/* round-robin dispatch all PUT messages for wildcard portals */
-#define	LNET_PTL_ROTOR_ON	1
-/* round-robin dispatch routed PUT message for wildcard portals */
-#define	LNET_PTL_ROTOR_RR_RT	2
-/* dispatch routed PUT message by hashing source NID for wildcard portals */
-#define	LNET_PTL_ROTOR_HASH_RT	3
-
-struct lnet_portal {
-	spinlock_t		  ptl_lock;
-	unsigned int		  ptl_index;	/* portal ID, reserved */
-	/* flags on this portal: lazy, unique... */
-	unsigned int		  ptl_options;
-	/* list of messages which are stealing buffer */
-	struct list_head	  ptl_msg_stealing;
-	/* messages blocking for MD */
-	struct list_head	  ptl_msg_delayed;
-	/* Match table for each CPT */
-	struct lnet_match_table	**ptl_mtables;
-	/* spread rotor of incoming "PUT" */
-	unsigned int		  ptl_rotor;
-	/* # active entries for this portal */
-	int			  ptl_mt_nmaps;
-	/* array of active entries' cpu-partition-id */
-	int			  ptl_mt_maps[0];
-};
-
-#define LNET_LH_HASH_BITS	12
-#define LNET_LH_HASH_SIZE	(1ULL << LNET_LH_HASH_BITS)
-#define LNET_LH_HASH_MASK	(LNET_LH_HASH_SIZE - 1)
-
-/* resource container (ME, MD, EQ) */
-struct lnet_res_container {
-	unsigned int		 rec_type;	/* container type */
-	__u64			 rec_lh_cookie;	/* cookie generator */
-	struct list_head	 rec_active;	/* active resource list */
-	struct list_head	*rec_lh_hash;	/* handle hash */
-};
-
-/* message container */
-struct lnet_msg_container {
-	int			  msc_init;	/* initialized or not */
-	/* max # threads finalizing */
-	int			  msc_nfinalizers;
-	/* msgs waiting to complete finalizing */
-	struct list_head	  msc_finalizing;
-	struct list_head	  msc_active;	/* active message list */
-	/* threads doing finalization */
-	void			**msc_finalizers;
-};
-
-/* Router Checker states */
-#define LNET_RC_STATE_SHUTDOWN		0	/* not started */
-#define LNET_RC_STATE_RUNNING		1	/* started up OK */
-#define LNET_RC_STATE_STOPPING		2	/* telling thread to stop */
-
-struct lnet {
-	/* CPU partition table of LNet */
-	struct cfs_cpt_table		 *ln_cpt_table;
-	/* number of CPTs in ln_cpt_table */
-	unsigned int			  ln_cpt_number;
-	unsigned int			  ln_cpt_bits;
-
-	/* protect LNet resources (ME/MD/EQ) */
-	struct cfs_percpt_lock		 *ln_res_lock;
-	/* # portals */
-	int				  ln_nportals;
-	/* the vector of portals */
-	struct lnet_portal		**ln_portals;
-	/* percpt ME containers */
-	struct lnet_res_container	**ln_me_containers;
-	/* percpt MD container */
-	struct lnet_res_container	**ln_md_containers;
-
-	/* Event Queue container */
-	struct lnet_res_container	  ln_eq_container;
-	wait_queue_head_t		  ln_eq_waitq;
-	spinlock_t			  ln_eq_wait_lock;
-	unsigned int			  ln_remote_nets_hbits;
-
-	/* protect NI, peer table, credits, routers, rtrbuf... */
-	struct cfs_percpt_lock		 *ln_net_lock;
-	/* percpt message containers for active/finalizing/freed message */
-	struct lnet_msg_container	**ln_msg_containers;
-	struct lnet_counters		**ln_counters;
-	struct lnet_peer_table		**ln_peer_tables;
-	/* failure simulation */
-	struct list_head		  ln_test_peers;
-	struct list_head		  ln_drop_rules;
-	struct list_head		  ln_delay_rules;
-
-	struct list_head		  ln_nis;	/* LND instances */
-	/* NIs bond on specific CPT(s) */
-	struct list_head		  ln_nis_cpt;
-	/* dying LND instances */
-	struct list_head		  ln_nis_zombie;
-	struct lnet_ni			 *ln_loni;	/* the loopback NI */
-
-	/* remote networks with routes to them */
-	struct list_head		 *ln_remote_nets_hash;
-	/* validity stamp */
-	__u64				  ln_remote_nets_version;
-	/* list of all known routers */
-	struct list_head		  ln_routers;
-	/* validity stamp */
-	__u64				  ln_routers_version;
-	/* percpt router buffer pools */
-	struct lnet_rtrbufpool		**ln_rtrpools;
-
-	struct lnet_handle_md		  ln_ping_target_md;
-	struct lnet_handle_eq		  ln_ping_target_eq;
-	struct lnet_ping_info		 *ln_ping_info;
-
-	/* router checker startup/shutdown state */
-	int				  ln_rc_state;
-	/* router checker's event queue */
-	struct lnet_handle_eq		  ln_rc_eqh;
-	/* rcd still pending on net */
-	struct list_head		  ln_rcd_deathrow;
-	/* rcd ready for free */
-	struct list_head		  ln_rcd_zombie;
-	/* serialise startup/shutdown */
-	struct completion		  ln_rc_signal;
-
-	struct mutex			  ln_api_mutex;
-	struct mutex			  ln_lnd_mutex;
-	struct mutex			  ln_delay_mutex;
-	/* Have I called LNetNIInit myself? */
-	int				  ln_niinit_self;
-	/* LNetNIInit/LNetNIFini counter */
-	int				  ln_refcount;
-	/* shutdown in progress */
-	int				  ln_shutdown;
-
-	int				  ln_routing;	/* am I a router? */
-	lnet_pid_t			  ln_pid;	/* requested pid */
-	/* uniquely identifies this ni in this epoch */
-	__u64				  ln_interface_cookie;
-	/* registered LNDs */
-	struct list_head		  ln_lnds;
-
-	/* test protocol compatibility flags */
-	int				  ln_testprotocompat;
-
-	/*
-	 * 0 - load the NIs from the mod params
-	 * 1 - do not load the NIs from the mod params
-	 * Reverse logic to ensure that other calls to LNetNIInit
-	 * need no change
-	 */
-	bool				  ln_nis_from_mod_params;
-
-	/*
-	 * waitq for router checker.  As long as there are no routes in
-	 * the list, the router checker will sleep on this queue.  when
-	 * routes are added the thread will wake up
-	 */
-	wait_queue_head_t		  ln_rc_waitq;
-
-};
-
-#endif

+ 0 - 87
drivers/staging/lustre/include/linux/lnet/socklnd.h

@@ -1,87 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012 - 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Seagate, Inc.
- *
- * lnet/include/lnet/socklnd.h
- */
-#ifndef __LNET_LNET_SOCKLND_H__
-#define __LNET_LNET_SOCKLND_H__
-
-#include <uapi/linux/lnet/lnet-types.h>
-#include <uapi/linux/lnet/socklnd.h>
-
-struct ksock_hello_msg {
-	__u32		kshm_magic;	/* magic number of socklnd message */
-	__u32		kshm_version;	/* version of socklnd message */
-	lnet_nid_t      kshm_src_nid;	/* sender's nid */
-	lnet_nid_t	kshm_dst_nid;	/* destination nid */
-	lnet_pid_t	kshm_src_pid;	/* sender's pid */
-	lnet_pid_t	kshm_dst_pid;	/* destination pid */
-	__u64		kshm_src_incarnation; /* sender's incarnation */
-	__u64		kshm_dst_incarnation; /* destination's incarnation */
-	__u32		kshm_ctype;	/* connection type */
-	__u32		kshm_nips;	/* # IP addrs */
-	__u32		kshm_ips[0];	/* IP addrs */
-} WIRE_ATTR;
-
-struct ksock_lnet_msg {
-	struct lnet_hdr	ksnm_hdr;	/* lnet hdr */
-
-	/*
-	 * ksnm_payload is removed because of winnt compiler's limitation:
-	 * zero-sized array can only be placed at the tail of [nested]
-	 * structure definitions. lnet payload will be stored just after
-	 * the body of structure ksock_lnet_msg_t
-	 */
-} WIRE_ATTR;
-
-struct ksock_msg {
-	__u32	ksm_type;		/* type of socklnd message */
-	__u32	ksm_csum;		/* checksum if != 0 */
-	__u64	ksm_zc_cookies[2];	/* Zero-Copy request/ACK cookie */
-	union {
-		struct ksock_lnet_msg lnetmsg; /* lnet message, it's empty if
-						* it's NOOP
-						*/
-	} WIRE_ATTR ksm_u;
-} WIRE_ATTR;
-
-#define KSOCK_MSG_NOOP	0xC0	/* ksm_u empty */
-#define KSOCK_MSG_LNET	0xC1	/* lnet msg */
-
-/*
- * We need to know this number to parse hello msg from ksocklnd in
- * other LND (usocklnd, for example)
- */
-#define KSOCK_PROTO_V2	2
-#define KSOCK_PROTO_V3	3
-
-#endif

+ 0 - 149
drivers/staging/lustre/include/uapi/linux/lnet/libcfs_debug.h

@@ -1,149 +0,0 @@
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, 2014, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * libcfs/include/libcfs/libcfs_debug.h
- *
- * Debug messages and assertions
- *
- */
-
-#ifndef __UAPI_LIBCFS_DEBUG_H__
-#define __UAPI_LIBCFS_DEBUG_H__
-
-/**
- * Format for debug message headers
- */
-struct ptldebug_header {
-	__u32 ph_len;
-	__u32 ph_flags;
-	__u32 ph_subsys;
-	__u32 ph_mask;
-	__u16 ph_cpu_id;
-	__u16 ph_type;
-	/* time_t overflow in 2106 */
-	__u32 ph_sec;
-	__u64 ph_usec;
-	__u32 ph_stack;
-	__u32 ph_pid;
-	__u32 ph_extern_pid;
-	__u32 ph_line_num;
-} __attribute__((packed));
-
-#define PH_FLAG_FIRST_RECORD	1
-
-/* Debugging subsystems (32 bits, non-overlapping) */
-#define S_UNDEFINED     0x00000001
-#define S_MDC           0x00000002
-#define S_MDS           0x00000004
-#define S_OSC           0x00000008
-#define S_OST           0x00000010
-#define S_CLASS         0x00000020
-#define S_LOG           0x00000040
-#define S_LLITE         0x00000080
-#define S_RPC           0x00000100
-#define S_MGMT          0x00000200
-#define S_LNET          0x00000400
-#define S_LND           0x00000800 /* ALL LNDs */
-#define S_PINGER        0x00001000
-#define S_FILTER        0x00002000
-#define S_LIBCFS        0x00004000
-#define S_ECHO          0x00008000
-#define S_LDLM          0x00010000
-#define S_LOV           0x00020000
-#define S_LQUOTA        0x00040000
-#define S_OSD           0x00080000
-#define S_LFSCK         0x00100000
-#define S_SNAPSHOT      0x00200000
-/* unused */
-#define S_LMV           0x00800000 /* b_new_cmd */
-/* unused */
-#define S_SEC           0x02000000 /* upcall cache */
-#define S_GSS           0x04000000 /* b_new_cmd */
-/* unused */
-#define S_MGC           0x10000000
-#define S_MGS           0x20000000
-#define S_FID           0x40000000 /* b_new_cmd */
-#define S_FLD           0x80000000 /* b_new_cmd */
-
-#define LIBCFS_DEBUG_SUBSYS_NAMES {					\
-	"undefined", "mdc", "mds", "osc", "ost", "class", "log",	\
-	"llite", "rpc", "mgmt", "lnet", "lnd", "pinger", "filter",	\
-	"libcfs", "echo", "ldlm", "lov", "lquota", "osd", "lfsck",	\
-	"snapshot", "", "lmv", "", "sec", "gss", "", "mgc", "mgs",	\
-	"fid", "fld", NULL }
-
-/* Debugging masks (32 bits, non-overlapping) */
-#define D_TRACE         0x00000001 /* ENTRY/EXIT markers */
-#define D_INODE         0x00000002
-#define D_SUPER         0x00000004
-#define D_EXT2          0x00000008 /* anything from ext2_debug */
-#define D_MALLOC        0x00000010 /* print malloc, free information */
-#define D_CACHE         0x00000020 /* cache-related items */
-#define D_INFO          0x00000040 /* general information */
-#define D_IOCTL         0x00000080 /* ioctl related information */
-#define D_NETERROR      0x00000100 /* network errors */
-#define D_NET           0x00000200 /* network communications */
-#define D_WARNING       0x00000400 /* CWARN(...) == CDEBUG (D_WARNING, ...) */
-#define D_BUFFS         0x00000800
-#define D_OTHER         0x00001000
-#define D_DENTRY        0x00002000
-#define D_NETTRACE      0x00004000
-#define D_PAGE          0x00008000 /* bulk page handling */
-#define D_DLMTRACE      0x00010000
-#define D_ERROR         0x00020000 /* CERROR(...) == CDEBUG (D_ERROR, ...) */
-#define D_EMERG         0x00040000 /* CEMERG(...) == CDEBUG (D_EMERG, ...) */
-#define D_HA            0x00080000 /* recovery and failover */
-#define D_RPCTRACE      0x00100000 /* for distributed debugging */
-#define D_VFSTRACE      0x00200000
-#define D_READA         0x00400000 /* read-ahead */
-#define D_MMAP          0x00800000
-#define D_CONFIG        0x01000000
-#define D_CONSOLE       0x02000000
-#define D_QUOTA         0x04000000
-#define D_SEC           0x08000000
-#define D_LFSCK         0x10000000 /* For both OI scrub and LFSCK */
-#define D_HSM           0x20000000
-#define D_SNAPSHOT      0x40000000 /* snapshot */
-#define D_LAYOUT        0x80000000
-
-#define LIBCFS_DEBUG_MASKS_NAMES {					\
-	"trace", "inode", "super", "ext2", "malloc", "cache", "info",	\
-	"ioctl", "neterror", "net", "warning", "buffs", "other",	\
-	"dentry", "nettrace", "page", "dlmtrace", "error", "emerg",	\
-	"ha", "rpctrace", "vfstrace", "reada", "mmap", "config",	\
-	"console", "quota", "sec", "lfsck", "hsm", "snapshot", "layout",\
-	NULL }
-
-#define D_CANTMASK   (D_ERROR | D_EMERG | D_WARNING | D_CONSOLE)
-
-#define LIBCFS_DEBUG_FILE_PATH_DEFAULT "/tmp/lustre-log"
-
-#endif	/* __UAPI_LIBCFS_DEBUG_H__ */

+ 0 - 141
drivers/staging/lustre/include/uapi/linux/lnet/libcfs_ioctl.h

@@ -1,141 +0,0 @@
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * libcfs/include/libcfs/libcfs_ioctl.h
- *
- * Low-level ioctl data structures. Kernel ioctl functions declared here,
- * and user space functions are in libcfs/util/ioctl.h.
- *
- */
-
-#ifndef __LIBCFS_IOCTL_H__
-#define __LIBCFS_IOCTL_H__
-
-#include <linux/types.h>
-#include <linux/ioctl.h>
-
-#define LIBCFS_IOCTL_VERSION	0x0001000a
-#define LIBCFS_IOCTL_VERSION2	0x0001000b
-
-struct libcfs_ioctl_hdr {
-	__u32 ioc_len;
-	__u32 ioc_version;
-};
-
-/** max size to copy from userspace */
-#define LIBCFS_IOC_DATA_MAX	(128 * 1024)
-
-struct libcfs_ioctl_data {
-	struct libcfs_ioctl_hdr ioc_hdr;
-
-	__u64 ioc_nid;
-	__u64 ioc_u64[1];
-
-	__u32 ioc_flags;
-	__u32 ioc_count;
-	__u32 ioc_net;
-	__u32 ioc_u32[7];
-
-	__u32 ioc_inllen1;
-	char *ioc_inlbuf1;
-	__u32 ioc_inllen2;
-	char *ioc_inlbuf2;
-
-	__u32 ioc_plen1; /* buffers in userspace */
-	void __user *ioc_pbuf1;
-	__u32 ioc_plen2; /* buffers in userspace */
-	void __user *ioc_pbuf2;
-
-	char ioc_bulk[0];
-};
-
-struct libcfs_debug_ioctl_data {
-	struct libcfs_ioctl_hdr hdr;
-	unsigned int subs;
-	unsigned int debug;
-};
-
-/* 'f' ioctls are defined in lustre_ioctl.h and lustre_user.h except for: */
-#define LIBCFS_IOC_DEBUG_MASK		   _IOWR('f', 250, long)
-#define IOCTL_LIBCFS_TYPE		   long
-
-#define IOC_LIBCFS_TYPE			   ('e')
-#define IOC_LIBCFS_MIN_NR		   30
-/* libcfs ioctls */
-/* IOC_LIBCFS_PANIC obsolete in 2.8.0, was _IOWR('e', 30, IOCTL_LIBCFS_TYPE) */
-#define IOC_LIBCFS_CLEAR_DEBUG		   _IOWR('e', 31, IOCTL_LIBCFS_TYPE)
-#define IOC_LIBCFS_MARK_DEBUG		   _IOWR('e', 32, IOCTL_LIBCFS_TYPE)
-/* IOC_LIBCFS_MEMHOG obsolete in 2.8.0, was _IOWR('e', 36, IOCTL_LIBCFS_TYPE) */
-/* lnet ioctls */
-#define IOC_LIBCFS_GET_NI		   _IOWR('e', 50, IOCTL_LIBCFS_TYPE)
-#define IOC_LIBCFS_FAIL_NID		   _IOWR('e', 51, IOCTL_LIBCFS_TYPE)
-#define IOC_LIBCFS_NOTIFY_ROUTER	   _IOWR('e', 55, IOCTL_LIBCFS_TYPE)
-#define IOC_LIBCFS_UNCONFIGURE		   _IOWR('e', 56, IOCTL_LIBCFS_TYPE)
-/*	 IOC_LIBCFS_PORTALS_COMPATIBILITY  _IOWR('e', 57, IOCTL_LIBCFS_TYPE) */
-#define IOC_LIBCFS_LNET_DIST		   _IOWR('e', 58, IOCTL_LIBCFS_TYPE)
-#define IOC_LIBCFS_CONFIGURE		   _IOWR('e', 59, IOCTL_LIBCFS_TYPE)
-#define IOC_LIBCFS_TESTPROTOCOMPAT	   _IOWR('e', 60, IOCTL_LIBCFS_TYPE)
-#define IOC_LIBCFS_PING			   _IOWR('e', 61, IOCTL_LIBCFS_TYPE)
-/*	IOC_LIBCFS_DEBUG_PEER		   _IOWR('e', 62, IOCTL_LIBCFS_TYPE) */
-#define IOC_LIBCFS_LNETST		   _IOWR('e', 63, IOCTL_LIBCFS_TYPE)
-#define IOC_LIBCFS_LNET_FAULT		   _IOWR('e', 64, IOCTL_LIBCFS_TYPE)
-/* lnd ioctls */
-#define IOC_LIBCFS_REGISTER_MYNID	   _IOWR('e', 70, IOCTL_LIBCFS_TYPE)
-#define IOC_LIBCFS_CLOSE_CONNECTION	   _IOWR('e', 71, IOCTL_LIBCFS_TYPE)
-#define IOC_LIBCFS_PUSH_CONNECTION	   _IOWR('e', 72, IOCTL_LIBCFS_TYPE)
-#define IOC_LIBCFS_GET_CONN		   _IOWR('e', 73, IOCTL_LIBCFS_TYPE)
-#define IOC_LIBCFS_DEL_PEER		   _IOWR('e', 74, IOCTL_LIBCFS_TYPE)
-#define IOC_LIBCFS_ADD_PEER		   _IOWR('e', 75, IOCTL_LIBCFS_TYPE)
-#define IOC_LIBCFS_GET_PEER		   _IOWR('e', 76, IOCTL_LIBCFS_TYPE)
-/* ioctl 77 is free for use */
-#define IOC_LIBCFS_ADD_INTERFACE	   _IOWR('e', 78, IOCTL_LIBCFS_TYPE)
-#define IOC_LIBCFS_DEL_INTERFACE	   _IOWR('e', 79, IOCTL_LIBCFS_TYPE)
-#define IOC_LIBCFS_GET_INTERFACE	   _IOWR('e', 80, IOCTL_LIBCFS_TYPE)
-
-/*
- * DLC Specific IOCTL numbers.
- * In order to maintain backward compatibility with any possible external
- * tools which might be accessing the IOCTL numbers, a new group of IOCTL
- * number have been allocated.
- */
-#define IOCTL_CONFIG_SIZE		struct lnet_ioctl_config_data
-#define IOC_LIBCFS_ADD_ROUTE		_IOWR(IOC_LIBCFS_TYPE, 81, IOCTL_CONFIG_SIZE)
-#define IOC_LIBCFS_DEL_ROUTE		_IOWR(IOC_LIBCFS_TYPE, 82, IOCTL_CONFIG_SIZE)
-#define IOC_LIBCFS_GET_ROUTE		_IOWR(IOC_LIBCFS_TYPE, 83, IOCTL_CONFIG_SIZE)
-#define IOC_LIBCFS_ADD_NET		_IOWR(IOC_LIBCFS_TYPE, 84, IOCTL_CONFIG_SIZE)
-#define IOC_LIBCFS_DEL_NET		_IOWR(IOC_LIBCFS_TYPE, 85, IOCTL_CONFIG_SIZE)
-#define IOC_LIBCFS_GET_NET		_IOWR(IOC_LIBCFS_TYPE, 86, IOCTL_CONFIG_SIZE)
-#define IOC_LIBCFS_CONFIG_RTR		_IOWR(IOC_LIBCFS_TYPE, 87, IOCTL_CONFIG_SIZE)
-#define IOC_LIBCFS_ADD_BUF		_IOWR(IOC_LIBCFS_TYPE, 88, IOCTL_CONFIG_SIZE)
-#define IOC_LIBCFS_GET_BUF		_IOWR(IOC_LIBCFS_TYPE, 89, IOCTL_CONFIG_SIZE)
-#define IOC_LIBCFS_GET_PEER_INFO	_IOWR(IOC_LIBCFS_TYPE, 90, IOCTL_CONFIG_SIZE)
-#define IOC_LIBCFS_GET_LNET_STATS	_IOWR(IOC_LIBCFS_TYPE, 91, IOCTL_CONFIG_SIZE)
-#define IOC_LIBCFS_MAX_NR		91
-
-#endif /* __LIBCFS_IOCTL_H__ */

+ 0 - 150
drivers/staging/lustre/include/uapi/linux/lnet/lnet-dlc.h

@@ -1,150 +0,0 @@
-/*
- * LGPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library.
- *
- * LGPL HEADER END
- *
- */
-/*
- * Copyright (c) 2014, Intel Corporation.
- */
-/*
- * Author: Amir Shehata <amir.shehata@intel.com>
- */
-
-#ifndef LNET_DLC_H
-#define LNET_DLC_H
-
-#include <uapi/linux/lnet/libcfs_ioctl.h>
-#include <uapi/linux/lnet/lnet-types.h>
-
-#define MAX_NUM_SHOW_ENTRIES	32
-#define LNET_MAX_STR_LEN	128
-#define LNET_MAX_SHOW_NUM_CPT	128
-#define LNET_UNDEFINED_HOPS	((__u32)(-1))
-
-struct lnet_ioctl_config_lnd_cmn_tunables {
-	__u32 lct_version;
-	__u32 lct_peer_timeout;
-	__u32 lct_peer_tx_credits;
-	__u32 lct_peer_rtr_credits;
-	__u32 lct_max_tx_credits;
-};
-
-struct lnet_ioctl_config_o2iblnd_tunables {
-	__u32 lnd_version;
-	__u32 lnd_peercredits_hiw;
-	__u32 lnd_map_on_demand;
-	__u32 lnd_concurrent_sends;
-	__u32 lnd_fmr_pool_size;
-	__u32 lnd_fmr_flush_trigger;
-	__u32 lnd_fmr_cache;
-	__u16 lnd_conns_per_peer;
-	__u16 pad;
-};
-
-struct lnet_ioctl_config_lnd_tunables {
-	struct lnet_ioctl_config_lnd_cmn_tunables lt_cmn;
-	union {
-		struct lnet_ioctl_config_o2iblnd_tunables lt_o2ib;
-	} lt_tun_u;
-};
-
-struct lnet_ioctl_net_config {
-	char ni_interfaces[LNET_MAX_INTERFACES][LNET_MAX_STR_LEN];
-	__u32 ni_status;
-	__u32 ni_cpts[LNET_MAX_SHOW_NUM_CPT];
-	char cfg_bulk[0];
-};
-
-#define LNET_TINY_BUF_IDX	0
-#define LNET_SMALL_BUF_IDX	1
-#define LNET_LARGE_BUF_IDX	2
-
-/* # different router buffer pools */
-#define LNET_NRBPOOLS		(LNET_LARGE_BUF_IDX + 1)
-
-struct lnet_ioctl_pool_cfg {
-	struct {
-		__u32 pl_npages;
-		__u32 pl_nbuffers;
-		__u32 pl_credits;
-		__u32 pl_mincredits;
-	} pl_pools[LNET_NRBPOOLS];
-	__u32 pl_routing;
-};
-
-struct lnet_ioctl_config_data {
-	struct libcfs_ioctl_hdr cfg_hdr;
-
-	__u32 cfg_net;
-	__u32 cfg_count;
-	__u64 cfg_nid;
-	__u32 cfg_ncpts;
-
-	union {
-		struct {
-			__u32 rtr_hop;
-			__u32 rtr_priority;
-			__u32 rtr_flags;
-		} cfg_route;
-		struct {
-			char net_intf[LNET_MAX_STR_LEN];
-			__s32 net_peer_timeout;
-			__s32 net_peer_tx_credits;
-			__s32 net_peer_rtr_credits;
-			__s32 net_max_tx_credits;
-			__u32 net_cksum_algo;
-			__u32 net_interface_count;
-		} cfg_net;
-		struct {
-			__u32 buf_enable;
-			__s32 buf_tiny;
-			__s32 buf_small;
-			__s32 buf_large;
-		} cfg_buffers;
-	} cfg_config_u;
-
-	char cfg_bulk[0];
-};
-
-struct lnet_ioctl_peer {
-	struct libcfs_ioctl_hdr pr_hdr;
-	__u32 pr_count;
-	__u32 pr_pad;
-	__u64 pr_nid;
-
-	union {
-		struct {
-			char cr_aliveness[LNET_MAX_STR_LEN];
-			__u32 cr_refcount;
-			__u32 cr_ni_peer_tx_credits;
-			__u32 cr_peer_tx_credits;
-			__u32 cr_peer_rtr_credits;
-			__u32 cr_peer_min_rtr_credits;
-			__u32 cr_peer_tx_qnob;
-			__u32 cr_ncpt;
-		} pr_peer_credits;
-	} pr_lnd_u;
-};
-
-struct lnet_ioctl_lnet_stats {
-	struct libcfs_ioctl_hdr st_hdr;
-	struct lnet_counters st_cntrs;
-};
-
-#endif /* LNET_DLC_H */

+ 0 - 669
drivers/staging/lustre/include/uapi/linux/lnet/lnet-types.h

@@ -1,669 +0,0 @@
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012 - 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Seagate, Inc.
- */
-
-#ifndef __LNET_TYPES_H__
-#define __LNET_TYPES_H__
-
-#include <linux/types.h>
-#include <linux/bvec.h>
-
-/** \addtogroup lnet
- * @{
- */
-
-#define LNET_VERSION		"0.6.0"
-
-/** \addtogroup lnet_addr
- * @{
- */
-
-/** Portal reserved for LNet's own use.
- * \see lustre/include/lustre/lustre_idl.h for Lustre portal assignments.
- */
-#define LNET_RESERVED_PORTAL	0
-
-/**
- * Address of an end-point in an LNet network.
- *
- * A node can have multiple end-points and hence multiple addresses.
- * An LNet network can be a simple network (e.g. tcp0) or a network of
- * LNet networks connected by LNet routers. Therefore an end-point address
- * has two parts: network ID, and address within a network.
- *
- * \see LNET_NIDNET, LNET_NIDADDR, and LNET_MKNID.
- */
-typedef __u64 lnet_nid_t;
-/**
- * ID of a process in a node. Shortened as PID to distinguish from
- * lnet_process_id, the global process ID.
- */
-typedef __u32 lnet_pid_t;
-
-/** wildcard NID that matches any end-point address */
-#define LNET_NID_ANY	((lnet_nid_t)(-1))
-/** wildcard PID that matches any lnet_pid_t */
-#define LNET_PID_ANY	((lnet_pid_t)(-1))
-
-#define LNET_PID_RESERVED 0xf0000000 /* reserved bits in PID */
-#define LNET_PID_USERFLAG 0x80000000 /* set in userspace peers */
-#define LNET_PID_LUSTRE	  12345
-
-#define LNET_TIME_FOREVER (-1)
-
-/* how an LNET NID encodes net:address */
-/** extract the address part of an lnet_nid_t */
-
-static inline __u32 LNET_NIDADDR(lnet_nid_t nid)
-{
-	return nid & 0xffffffff;
-}
-
-static inline __u32 LNET_NIDNET(lnet_nid_t nid)
-{
-	return (nid >> 32) & 0xffffffff;
-}
-
-static inline lnet_nid_t LNET_MKNID(__u32 net, __u32 addr)
-{
-	return (((__u64)net) << 32) | addr;
-}
-
-static inline __u32 LNET_NETNUM(__u32 net)
-{
-	return net & 0xffff;
-}
-
-static inline __u32 LNET_NETTYP(__u32 net)
-{
-	return (net >> 16) & 0xffff;
-}
-
-static inline __u32 LNET_MKNET(__u32 type, __u32 num)
-{
-	return (type << 16) | num;
-}
-
-#define WIRE_ATTR	__packed
-
-/* Packed version of lnet_process_id to transfer via network */
-struct lnet_process_id_packed {
-	/* node id / process id */
-	lnet_nid_t	nid;
-	lnet_pid_t	pid;
-} WIRE_ATTR;
-
-/*
- * The wire handle's interface cookie only matches one network interface in
- * one epoch (i.e. new cookie when the interface restarts or the node
- * reboots).  The object cookie only matches one object on that interface
- * during that object's lifetime (i.e. no cookie re-use).
- */
-struct lnet_handle_wire {
-	__u64	wh_interface_cookie;
-	__u64	wh_object_cookie;
-} WIRE_ATTR;
-
-enum lnet_msg_type {
-	LNET_MSG_ACK = 0,
-	LNET_MSG_PUT,
-	LNET_MSG_GET,
-	LNET_MSG_REPLY,
-	LNET_MSG_HELLO,
-};
-
-/*
- * The variant fields of the portals message header are aligned on an 8
- * byte boundary in the message header.  Note that all types used in these
- * wire structs MUST be fixed size and the smaller types are placed at the
- * end.
- */
-struct lnet_ack {
-	struct lnet_handle_wire	dst_wmd;
-	__u64			match_bits;
-	__u32			mlength;
-} WIRE_ATTR;
-
-struct lnet_put {
-	struct lnet_handle_wire	ack_wmd;
-	__u64			match_bits;
-	__u64			hdr_data;
-	__u32			ptl_index;
-	__u32			offset;
-} WIRE_ATTR;
-
-struct lnet_get {
-	struct lnet_handle_wire	return_wmd;
-	__u64			match_bits;
-	__u32			ptl_index;
-	__u32			src_offset;
-	__u32			sink_length;
-} WIRE_ATTR;
-
-struct lnet_reply {
-	struct lnet_handle_wire	dst_wmd;
-} WIRE_ATTR;
-
-struct lnet_hello {
-	__u64			incarnation;
-	__u32			type;
-} WIRE_ATTR;
-
-struct lnet_hdr {
-	lnet_nid_t	dest_nid;
-	lnet_nid_t	src_nid;
-	lnet_pid_t	dest_pid;
-	lnet_pid_t	src_pid;
-	__u32		type;		/* enum lnet_msg_type */
-	__u32		payload_length;	/* payload data to follow */
-	/*<------__u64 aligned------->*/
-	union {
-		struct lnet_ack		ack;
-		struct lnet_put		put;
-		struct lnet_get		get;
-		struct lnet_reply	reply;
-		struct lnet_hello	hello;
-	} msg;
-} WIRE_ATTR;
-
-/*
- * A HELLO message contains a magic number and protocol version
- * code in the header's dest_nid, the peer's NID in the src_nid, and
- * LNET_MSG_HELLO in the type field.  All other common fields are zero
- * (including payload_size; i.e. no payload).
- * This is for use by byte-stream LNDs (e.g. TCP/IP) to check the peer is
- * running the same protocol and to find out its NID. These LNDs should
- * exchange HELLO messages when a connection is first established.  Individual
- * LNDs can put whatever else they fancy in struct lnet_hdr::msg.
- */
-struct lnet_magicversion {
-	__u32	magic;		/* LNET_PROTO_TCP_MAGIC */
-	__u16	version_major;	/* increment on incompatible change */
-	__u16	version_minor;	/* increment on compatible change */
-} WIRE_ATTR;
-
-/* PROTO MAGIC for LNDs */
-#define LNET_PROTO_IB_MAGIC		0x0be91b91
-#define LNET_PROTO_GNI_MAGIC		0xb00fbabe /* ask Kim */
-#define LNET_PROTO_TCP_MAGIC		0xeebc0ded
-#define LNET_PROTO_ACCEPTOR_MAGIC	0xacce7100
-#define LNET_PROTO_PING_MAGIC		0x70696E67 /* 'ping' */
-
-/* Placeholder for a future "unified" protocol across all LNDs */
-/*
- * Current LNDs that receive a request with this magic will respond with a
- * "stub" reply using their current protocol
- */
-#define LNET_PROTO_MAGIC		0x45726963 /* ! */
-
-#define LNET_PROTO_TCP_VERSION_MAJOR	1
-#define LNET_PROTO_TCP_VERSION_MINOR	0
-
-/* Acceptor connection request */
-struct lnet_acceptor_connreq {
-	__u32	acr_magic;		/* PTL_ACCEPTOR_PROTO_MAGIC */
-	__u32	acr_version;		/* protocol version */
-	__u64	acr_nid;		/* target NID */
-} WIRE_ATTR;
-
-#define LNET_PROTO_ACCEPTOR_VERSION	1
-
-struct lnet_ni_status {
-	lnet_nid_t	ns_nid;
-	__u32		ns_status;
-	__u32		ns_unused;
-} WIRE_ATTR;
-
-struct lnet_ping_info {
-	__u32			pi_magic;
-	__u32			pi_features;
-	lnet_pid_t		pi_pid;
-	__u32			pi_nnis;
-	struct lnet_ni_status	pi_ni[0];
-} WIRE_ATTR;
-
-struct lnet_counters {
-	__u32	msgs_alloc;
-	__u32	msgs_max;
-	__u32	errors;
-	__u32	send_count;
-	__u32	recv_count;
-	__u32	route_count;
-	__u32	drop_count;
-	__u64	send_length;
-	__u64	recv_length;
-	__u64	route_length;
-	__u64	drop_length;
-} WIRE_ATTR;
-
-#define LNET_NI_STATUS_UP      0x15aac0de
-#define LNET_NI_STATUS_DOWN    0xdeadface
-#define LNET_NI_STATUS_INVALID 0x00000000
-
-#define LNET_MAX_INTERFACES    16
-
-/**
- * Objects maintained by the LNet are accessed through handles. Handle types
- * have names of the form lnet_handle_xx, where xx is one of the two letter
- * object type codes ('eq' for event queue, 'md' for memory descriptor, and
- * 'me' for match entry). Each type of object is given a unique handle type
- * to enhance type checking.
- */
-#define LNET_WIRE_HANDLE_COOKIE_NONE   (-1)
-
-struct lnet_handle_eq {
-	u64	cookie;
-};
-
-/**
- * Invalidate eq handle @h.
- */
-static inline void LNetInvalidateEQHandle(struct lnet_handle_eq *h)
-{
-	h->cookie = LNET_WIRE_HANDLE_COOKIE_NONE;
-}
-
-/**
- * Check whether eq handle @h is invalid.
- *
- * @return 1 if handle is invalid, 0 if valid.
- */
-static inline int LNetEQHandleIsInvalid(struct lnet_handle_eq h)
-{
-	return (LNET_WIRE_HANDLE_COOKIE_NONE == h.cookie);
-}
-
-struct lnet_handle_md {
-	u64	cookie;
-};
-
-/**
- * Invalidate md handle @h.
- */
-static inline void LNetInvalidateMDHandle(struct lnet_handle_md *h)
-{
-	h->cookie = LNET_WIRE_HANDLE_COOKIE_NONE;
-}
-
-/**
- * Check whether eq handle @h is invalid.
- *
- * @return 1 if handle is invalid, 0 if valid.
- */
-static inline int LNetMDHandleIsInvalid(struct lnet_handle_md h)
-{
-	return (LNET_WIRE_HANDLE_COOKIE_NONE == h.cookie);
-}
-
-struct lnet_handle_me {
-	u64	cookie;
-};
-
-/**
- * Global process ID.
- */
-struct lnet_process_id {
-	/** node id */
-	lnet_nid_t nid;
-	/** process id */
-	lnet_pid_t pid;
-};
-/** @} lnet_addr */
-
-/** \addtogroup lnet_me
- * @{
- */
-
-/**
- * Specifies whether the match entry or memory descriptor should be unlinked
- * automatically (LNET_UNLINK) or not (LNET_RETAIN).
- */
-enum lnet_unlink {
-	LNET_RETAIN = 0,
-	LNET_UNLINK
-};
-
-/**
- * Values of the type lnet_ins_pos are used to control where a new match
- * entry is inserted. The value LNET_INS_BEFORE is used to insert the new
- * entry before the current entry or before the head of the list. The value
- * LNET_INS_AFTER is used to insert the new entry after the current entry
- * or after the last item in the list.
- */
-enum lnet_ins_pos {
-	/** insert ME before current position or head of the list */
-	LNET_INS_BEFORE,
-	/** insert ME after current position or tail of the list */
-	LNET_INS_AFTER,
-	/** attach ME at tail of local CPU partition ME list */
-	LNET_INS_LOCAL
-};
-
-/** @} lnet_me */
-
-/** \addtogroup lnet_md
- * @{
- */
-
-/**
- * Defines the visible parts of a memory descriptor. Values of this type
- * are used to initialize memory descriptors.
- */
-struct lnet_md {
-	/**
-	 * Specify the memory region associated with the memory descriptor.
-	 * If the options field has:
-	 * - LNET_MD_KIOV bit set: The start field points to the starting
-	 * address of an array of struct bio_vec and the length field specifies
-	 * the number of entries in the array. The length can't be bigger
-	 * than LNET_MAX_IOV. The struct bio_vec is used to describe page-based
-	 * fragments that are not necessarily mapped in virtual memory.
-	 * - LNET_MD_IOVEC bit set: The start field points to the starting
-	 * address of an array of struct iovec and the length field specifies
-	 * the number of entries in the array. The length can't be bigger
-	 * than LNET_MAX_IOV. The struct iovec is used to describe fragments
-	 * that have virtual addresses.
-	 * - Otherwise: The memory region is contiguous. The start field
-	 * specifies the starting address for the memory region and the
-	 * length field specifies its length.
-	 *
-	 * When the memory region is fragmented, all fragments but the first
-	 * one must start on page boundary, and all but the last must end on
-	 * page boundary.
-	 */
-	void		*start;
-	unsigned int	 length;
-	/**
-	 * Specifies the maximum number of operations that can be performed
-	 * on the memory descriptor. An operation is any action that could
-	 * possibly generate an event. In the usual case, the threshold value
-	 * is decremented for each operation on the MD. When the threshold
-	 * drops to zero, the MD becomes inactive and does not respond to
-	 * operations. A threshold value of LNET_MD_THRESH_INF indicates that
-	 * there is no bound on the number of operations that may be applied
-	 * to a MD.
-	 */
-	int		 threshold;
-	/**
-	 * Specifies the largest incoming request that the memory descriptor
-	 * should respond to. When the unused portion of a MD (length -
-	 * local offset) falls below this value, the MD becomes inactive and
-	 * does not respond to further operations. This value is only used
-	 * if the LNET_MD_MAX_SIZE option is set.
-	 */
-	int		 max_size;
-	/**
-	 * Specifies the behavior of the memory descriptor. A bitwise OR
-	 * of the following values can be used:
-	 * - LNET_MD_OP_PUT: The LNet PUT operation is allowed on this MD.
-	 * - LNET_MD_OP_GET: The LNet GET operation is allowed on this MD.
-	 * - LNET_MD_MANAGE_REMOTE: The offset used in accessing the memory
-	 *   region is provided by the incoming request. By default, the
-	 *   offset is maintained locally. When maintained locally, the
-	 *   offset is incremented by the length of the request so that
-	 *   the next operation (PUT or GET) will access the next part of
-	 *   the memory region. Note that only one offset variable exists
-	 *   per memory descriptor. If both PUT and GET operations are
-	 *   performed on a memory descriptor, the offset is updated each time.
-	 * - LNET_MD_TRUNCATE: The length provided in the incoming request can
-	 *   be reduced to match the memory available in the region (determined
-	 *   by subtracting the offset from the length of the memory region).
-	 *   By default, if the length in the incoming operation is greater
-	 *   than the amount of memory available, the operation is rejected.
-	 * - LNET_MD_ACK_DISABLE: An acknowledgment should not be sent for
-	 *   incoming PUT operations, even if requested. By default,
-	 *   acknowledgments are sent for PUT operations that request an
-	 *   acknowledgment. Acknowledgments are never sent for GET operations.
-	 *   The data sent in the REPLY serves as an implicit acknowledgment.
-	 * - LNET_MD_KIOV: The start and length fields specify an array of
-	 *   struct bio_vec.
-	 * - LNET_MD_IOVEC: The start and length fields specify an array of
-	 *   struct iovec.
-	 * - LNET_MD_MAX_SIZE: The max_size field is valid.
-	 *
-	 * Note:
-	 * - LNET_MD_KIOV or LNET_MD_IOVEC allows for a scatter/gather
-	 *   capability for memory descriptors. They can't be both set.
-	 * - When LNET_MD_MAX_SIZE is set, the total length of the memory
-	 *   region (i.e. sum of all fragment lengths) must not be less than
-	 *   \a max_size.
-	 */
-	unsigned int	 options;
-	/**
-	 * A user-specified value that is associated with the memory
-	 * descriptor. The value does not need to be a pointer, but must fit
-	 * in the space used by a pointer. This value is recorded in events
-	 * associated with operations on this MD.
-	 */
-	void		*user_ptr;
-	/**
-	 * A handle for the event queue used to log the operations performed on
-	 * the memory region. If this argument is a NULL handle (i.e. nullified
-	 * by LNetInvalidateHandle()), operations performed on this memory
-	 * descriptor are not logged.
-	 */
-	struct lnet_handle_eq eq_handle;
-};
-
-/*
- * Max Transfer Unit (minimum supported everywhere).
- * CAVEAT EMPTOR, with multinet (i.e. routers forwarding between networks)
- * these limits are system wide and not interface-local.
- */
-#define LNET_MTU_BITS	20
-#define LNET_MTU	(1 << LNET_MTU_BITS)
-
-/** limit on the number of fragments in discontiguous MDs */
-#define LNET_MAX_IOV	256
-
-/**
- * Options for the MD structure. See lnet_md::options.
- */
-#define LNET_MD_OP_PUT		(1 << 0)
-/** See lnet_md::options. */
-#define LNET_MD_OP_GET		(1 << 1)
-/** See lnet_md::options. */
-#define LNET_MD_MANAGE_REMOTE	(1 << 2)
-/* unused			(1 << 3) */
-/** See lnet_md::options. */
-#define LNET_MD_TRUNCATE	(1 << 4)
-/** See lnet_md::options. */
-#define LNET_MD_ACK_DISABLE	(1 << 5)
-/** See lnet_md::options. */
-#define LNET_MD_IOVEC		(1 << 6)
-/** See lnet_md::options. */
-#define LNET_MD_MAX_SIZE	(1 << 7)
-/** See lnet_md::options. */
-#define LNET_MD_KIOV		(1 << 8)
-
-/* For compatibility with Cray Portals */
-#define LNET_MD_PHYS		0
-
-/** Infinite threshold on MD operations. See lnet_md::threshold */
-#define LNET_MD_THRESH_INF	(-1)
-
-/** @} lnet_md */
-
-/** \addtogroup lnet_eq
- * @{
- */
-
-/**
- * Six types of events can be logged in an event queue.
- */
-enum lnet_event_kind {
-	/** An incoming GET operation has completed on the MD. */
-	LNET_EVENT_GET		= 1,
-	/**
-	 * An incoming PUT operation has completed on the MD. The
-	 * underlying layers will not alter the memory (on behalf of this
-	 * operation) once this event has been logged.
-	 */
-	LNET_EVENT_PUT,
-	/**
-	 * A REPLY operation has completed. This event is logged after the
-	 * data (if any) from the REPLY has been written into the MD.
-	 */
-	LNET_EVENT_REPLY,
-	/** An acknowledgment has been received. */
-	LNET_EVENT_ACK,
-	/**
-	 * An outgoing send (PUT or GET) operation has completed. This event
-	 * is logged after the entire buffer has been sent and it is safe for
-	 * the caller to reuse the buffer.
-	 *
-	 * Note:
-	 * - The LNET_EVENT_SEND doesn't guarantee message delivery. It can
-	 *   happen even when the message has not yet been put out on wire.
-	 * - It's unsafe to assume that in an outgoing GET operation
-	 *   the LNET_EVENT_SEND event would happen before the
-	 *   LNET_EVENT_REPLY event. The same holds for LNET_EVENT_SEND and
-	 *   LNET_EVENT_ACK events in an outgoing PUT operation.
-	 */
-	LNET_EVENT_SEND,
-	/**
-	 * A MD has been unlinked. Note that LNetMDUnlink() does not
-	 * necessarily trigger an LNET_EVENT_UNLINK event.
-	 * \see LNetMDUnlink
-	 */
-	LNET_EVENT_UNLINK,
-};
-
-#define LNET_SEQ_GT(a, b)      (((signed long)((a) - (b))) > 0)
-
-/**
- * Information about an event on a MD.
- */
-struct lnet_event {
-	/** The identifier (nid, pid) of the target. */
-	struct lnet_process_id	target;
-	/** The identifier (nid, pid) of the initiator. */
-	struct lnet_process_id	initiator;
-	/**
-	 * The NID of the immediate sender. If the request has been forwarded
-	 * by routers, this is the NID of the last hop; otherwise it's the
-	 * same as the initiator.
-	 */
-	lnet_nid_t		sender;
-	/** Indicates the type of the event. */
-	enum lnet_event_kind	type;
-	/** The portal table index specified in the request */
-	unsigned int		pt_index;
-	/** A copy of the match bits specified in the request. */
-	__u64			match_bits;
-	/** The length (in bytes) specified in the request. */
-	unsigned int		rlength;
-	/**
-	 * The length (in bytes) of the data that was manipulated by the
-	 * operation. For truncated operations, the manipulated length will be
-	 * the number of bytes specified by the MD (possibly with an offset,
-	 * see lnet_md). For all other operations, the manipulated length
-	 * will be the length of the requested operation, i.e. rlength.
-	 */
-	unsigned int		mlength;
-	/**
-	 * The handle to the MD associated with the event. The handle may be
-	 * invalid if the MD has been unlinked.
-	 */
-	struct lnet_handle_md	md_handle;
-	/**
-	 * A snapshot of the state of the MD immediately after the event has
-	 * been processed. In particular, the threshold field in md will
-	 * reflect the value of the threshold after the operation occurred.
-	 */
-	struct lnet_md		md;
-	/**
-	 * 64 bits of out-of-band user data. Only valid for LNET_EVENT_PUT.
-	 * \see LNetPut
-	 */
-	__u64			hdr_data;
-	/**
-	 * Indicates the completion status of the operation. It's 0 for
-	 * successful operations, otherwise it's an error code.
-	 */
-	int			status;
-	/**
-	 * Indicates whether the MD has been unlinked. Note that:
-	 * - An event with unlinked set is the last event on the MD.
-	 * - This field is also set for an explicit LNET_EVENT_UNLINK event.
-	 * \see LNetMDUnlink
-	 */
-	int			unlinked;
-	/**
-	 * The displacement (in bytes) into the memory region that the
-	 * operation used. The offset can be determined by the operation for
-	 * a remote managed MD or by the local MD.
-	 * \see lnet_md::options
-	 */
-	unsigned int		offset;
-	/**
-	 * The sequence number for this event. Sequence numbers are unique
-	 * to each event.
-	 */
-	volatile unsigned long	sequence;
-};
-
-/**
- * Event queue handler function type.
- *
- * The EQ handler runs for each event that is deposited into the EQ. The
- * handler is supplied with a pointer to the event that triggered the
- * handler invocation.
- *
- * The handler must not block, must be reentrant, and must not call any LNet
- * API functions. It should return as quickly as possible.
- */
-typedef void (*lnet_eq_handler_t)(struct lnet_event *event);
-#define LNET_EQ_HANDLER_NONE NULL
-/** @} lnet_eq */
-
-/** \addtogroup lnet_data
- * @{
- */
-
-/**
- * Specify whether an acknowledgment should be sent by target when the PUT
- * operation completes (i.e., when the data has been written to a MD of the
- * target process).
- *
- * \see lnet_md::options for the discussion on LNET_MD_ACK_DISABLE by which
- * acknowledgments can be disabled for a MD.
- */
-enum lnet_ack_req {
-	/** Request an acknowledgment */
-	LNET_ACK_REQ,
-	/** Request that no acknowledgment should be generated. */
-	LNET_NOACK_REQ
-};
-/** @} lnet_data */
-
-/** @} lnet */
-#endif

+ 0 - 123
drivers/staging/lustre/include/uapi/linux/lnet/lnetctl.h

@@ -1,123 +0,0 @@
-/*
- *   This file is part of Portals, http://www.sf.net/projects/lustre/
- *
- *   Portals is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
- *
- *   Portals is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- * header for lnet ioctl
- */
-#ifndef _LNETCTL_H_
-#define _LNETCTL_H_
-
-#include <uapi/linux/lnet/lnet-types.h>
-
-/** \addtogroup lnet_fault_simulation
- * @{
- */
-
-enum {
-	LNET_CTL_DROP_ADD,
-	LNET_CTL_DROP_DEL,
-	LNET_CTL_DROP_RESET,
-	LNET_CTL_DROP_LIST,
-	LNET_CTL_DELAY_ADD,
-	LNET_CTL_DELAY_DEL,
-	LNET_CTL_DELAY_RESET,
-	LNET_CTL_DELAY_LIST,
-};
-
-#define LNET_ACK_BIT		(1 << 0)
-#define LNET_PUT_BIT		(1 << 1)
-#define LNET_GET_BIT		(1 << 2)
-#define LNET_REPLY_BIT		(1 << 3)
-
-/** ioctl parameter for LNet fault simulation */
-struct lnet_fault_attr {
-	/**
-	 * source NID of drop rule
-	 * LNET_NID_ANY is wildcard for all sources
-	 * 255.255.255.255@net is wildcard for all addresses from @net
-	 */
-	lnet_nid_t			fa_src;
-	/** destination NID of drop rule, see \a dr_src for details */
-	lnet_nid_t			fa_dst;
-	/**
-	 * Portal mask to drop, -1 means all portals, for example:
-	 * fa_ptl_mask = (1 << _LDLM_CB_REQUEST_PORTAL ) |
-	 *		 (1 << LDLM_CANCEL_REQUEST_PORTAL)
-	 *
-	 * If it is non-zero then only PUT and GET will be filtered, otherwise
-	 * there is no portal filter, all matched messages will be checked.
-	 */
-	__u64				fa_ptl_mask;
-	/**
-	 * message types to drop, for example:
-	 * dra_type = LNET_DROP_ACK_BIT | LNET_DROP_PUT_BIT
-	 *
-	 * If it is non-zero then only specified message types are filtered,
-	 * otherwise all message types will be checked.
-	 */
-	__u32				fa_msg_mask;
-	union {
-		/** message drop simulation */
-		struct {
-			/** drop rate of this rule */
-			__u32			da_rate;
-			/**
-			 * time interval of message drop, it is exclusive
-			 * with da_rate
-			 */
-			__u32			da_interval;
-		} drop;
-		/** message latency simulation */
-		struct {
-			__u32			la_rate;
-			/**
-			 * time interval of message delay, it is exclusive
-			 * with la_rate
-			 */
-			__u32			la_interval;
-			/** latency to delay */
-			__u32			la_latency;
-		} delay;
-		__u64			space[8];
-	} u;
-};
-
-/** fault simluation stats */
-struct lnet_fault_stat {
-	/** total # matched messages */
-	__u64				fs_count;
-	/** # dropped LNET_MSG_PUT by this rule */
-	__u64				fs_put;
-	/** # dropped LNET_MSG_ACK by this rule */
-	__u64				fs_ack;
-	/** # dropped LNET_MSG_GET by this rule */
-	__u64				fs_get;
-	/** # dropped LNET_MSG_REPLY by this rule */
-	__u64				fs_reply;
-	union {
-		struct {
-			/** total # dropped messages */
-			__u64			ds_dropped;
-		} drop;
-		struct {
-			/** total # delayed messages */
-			__u64			ls_delayed;
-		} delay;
-		__u64			space[8];
-	} u;
-};
-
-/** @} lnet_fault_simulation */
-
-#define LNET_DEV_ID 0
-#define LNET_DEV_PATH "/dev/lnet"
-
-#endif

+ 0 - 556
drivers/staging/lustre/include/uapi/linux/lnet/lnetst.h

@@ -1,556 +0,0 @@
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011 - 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Seagate, Inc.
- *
- * lnet/include/lnet/lnetst.h
- *
- * Author: Liang Zhen <liang.zhen@intel.com>
- */
-
-#ifndef __LNET_ST_H__
-#define __LNET_ST_H__
-
-#include <linux/types.h>
-
-#define LST_FEAT_NONE		(0)
-#define LST_FEAT_BULK_LEN	(1 << 0)	/* enable variable page size */
-
-#define LST_FEATS_EMPTY		(LST_FEAT_NONE)
-#define LST_FEATS_MASK		(LST_FEAT_NONE | LST_FEAT_BULK_LEN)
-
-#define LST_NAME_SIZE		32	/* max name buffer length */
-
-#define LSTIO_DEBUG		0xC00	/* debug */
-#define LSTIO_SESSION_NEW	0xC01	/* create session */
-#define LSTIO_SESSION_END	0xC02	/* end session */
-#define LSTIO_SESSION_INFO	0xC03	/* query session */
-#define LSTIO_GROUP_ADD		0xC10	/* add group */
-#define LSTIO_GROUP_LIST	0xC11	/* list all groups in session */
-#define LSTIO_GROUP_INFO	0xC12	/* query default information of
-					 * specified group
-					 */
-#define LSTIO_GROUP_DEL		0xC13	/* delete group */
-#define LSTIO_NODES_ADD		0xC14	/* add nodes to specified group */
-#define LSTIO_GROUP_UPDATE      0xC15	/* update group */
-#define LSTIO_BATCH_ADD		0xC20	/* add batch */
-#define LSTIO_BATCH_START	0xC21	/* start batch */
-#define LSTIO_BATCH_STOP	0xC22	/* stop batch */
-#define LSTIO_BATCH_DEL		0xC23	/* delete batch */
-#define LSTIO_BATCH_LIST	0xC24	/* show all batches in the session */
-#define LSTIO_BATCH_INFO	0xC25	/* show defail of specified batch */
-#define LSTIO_TEST_ADD		0xC26	/* add test (to batch) */
-#define LSTIO_BATCH_QUERY	0xC27	/* query batch status */
-#define LSTIO_STAT_QUERY	0xC30	/* get stats */
-
-struct lst_sid {
-	lnet_nid_t	ses_nid;	/* nid of console node */
-	__u64		ses_stamp;	/* time stamp */
-};					/*** session id */
-
-extern struct lst_sid LST_INVALID_SID;
-
-struct lst_bid {
-	__u64	bat_id;		/* unique id in session */
-};				/*** batch id (group of tests) */
-
-/* Status of test node */
-#define LST_NODE_ACTIVE		0x1	/* node in this session */
-#define LST_NODE_BUSY		0x2	/* node is taken by other session */
-#define LST_NODE_DOWN		0x4	/* node is down */
-#define LST_NODE_UNKNOWN	0x8	/* node not in session */
-
-struct lstcon_node_ent {
-	struct lnet_process_id	nde_id;		/* id of node */
-	int			nde_state;	/* state of node */
-};				/*** node entry, for list_group command */
-
-struct lstcon_ndlist_ent {
-	int	nle_nnode;	/* # of nodes */
-	int	nle_nactive;	/* # of active nodes */
-	int	nle_nbusy;	/* # of busy nodes */
-	int	nle_ndown;	/* # of down nodes */
-	int	nle_nunknown;	/* # of unknown nodes */
-};				/*** node_list entry, for list_batch command */
-
-struct lstcon_test_ent {
-	int	tse_type;       /* test type */
-	int	tse_loop;       /* loop count */
-	int	tse_concur;     /* concurrency of test */
-};				/* test summary entry, for
-				 * list_batch command
-				 */
-
-struct lstcon_batch_ent {
-	int	bae_state;	/* batch status */
-	int	bae_timeout;	/* batch timeout */
-	int	bae_ntest;	/* # of tests in the batch */
-};				/* batch summary entry, for
-				 * list_batch command
-				 */
-
-struct lstcon_test_batch_ent {
-	struct lstcon_ndlist_ent   tbe_cli_nle;	/* client (group) node_list
-						 * entry
-						 */
-	struct lstcon_ndlist_ent   tbe_srv_nle;	/* server (group) node_list
-						 * entry
-						 */
-	union {
-		struct lstcon_test_ent	tbe_test; /* test entry */
-		struct lstcon_batch_ent tbe_batch;/* batch entry */
-	} u;
-};				/* test/batch verbose information entry,
-				 * for list_batch command
-				 */
-
-struct lstcon_rpc_ent {
-	struct list_head	rpe_link;	/* link chain */
-	struct lnet_process_id	rpe_peer;	/* peer's id */
-	struct timeval		rpe_stamp;	/* time stamp of RPC */
-	int			rpe_state;	/* peer's state */
-	int			rpe_rpc_errno;	/* RPC errno */
-
-	struct lst_sid		rpe_sid;	/* peer's session id */
-	int			rpe_fwk_errno;	/* framework errno */
-	int			rpe_priv[4];	/* private data */
-	char			rpe_payload[0];	/* private reply payload */
-};
-
-struct lstcon_trans_stat {
-	int	trs_rpc_stat[4];	/* RPCs stat (0: total 1: failed
-					 * 2: finished
-					 * 4: reserved
-					 */
-	int	trs_rpc_errno;		/* RPC errno */
-	int	trs_fwk_stat[8];	/* framework stat */
-	int	trs_fwk_errno;		/* errno of the first remote error */
-	void	*trs_fwk_private;	/* private framework stat */
-};
-
-static inline int
-lstcon_rpc_stat_total(struct lstcon_trans_stat *stat, int inc)
-{
-	return inc ? ++stat->trs_rpc_stat[0] : stat->trs_rpc_stat[0];
-}
-
-static inline int
-lstcon_rpc_stat_success(struct lstcon_trans_stat *stat, int inc)
-{
-	return inc ? ++stat->trs_rpc_stat[1] : stat->trs_rpc_stat[1];
-}
-
-static inline int
-lstcon_rpc_stat_failure(struct lstcon_trans_stat *stat, int inc)
-{
-	return inc ? ++stat->trs_rpc_stat[2] : stat->trs_rpc_stat[2];
-}
-
-static inline int
-lstcon_sesop_stat_success(struct lstcon_trans_stat *stat, int inc)
-{
-	return inc ? ++stat->trs_fwk_stat[0] : stat->trs_fwk_stat[0];
-}
-
-static inline int
-lstcon_sesop_stat_failure(struct lstcon_trans_stat *stat, int inc)
-{
-	return inc ? ++stat->trs_fwk_stat[1] : stat->trs_fwk_stat[1];
-}
-
-static inline int
-lstcon_sesqry_stat_active(struct lstcon_trans_stat *stat, int inc)
-{
-	return inc ? ++stat->trs_fwk_stat[0] : stat->trs_fwk_stat[0];
-}
-
-static inline int
-lstcon_sesqry_stat_busy(struct lstcon_trans_stat *stat, int inc)
-{
-	return inc ? ++stat->trs_fwk_stat[1] : stat->trs_fwk_stat[1];
-}
-
-static inline int
-lstcon_sesqry_stat_unknown(struct lstcon_trans_stat *stat, int inc)
-{
-	return inc ? ++stat->trs_fwk_stat[2] : stat->trs_fwk_stat[2];
-}
-
-static inline int
-lstcon_tsbop_stat_success(struct lstcon_trans_stat *stat, int inc)
-{
-	return inc ? ++stat->trs_fwk_stat[0] : stat->trs_fwk_stat[0];
-}
-
-static inline int
-lstcon_tsbop_stat_failure(struct lstcon_trans_stat *stat, int inc)
-{
-	return inc ? ++stat->trs_fwk_stat[1] : stat->trs_fwk_stat[1];
-}
-
-static inline int
-lstcon_tsbqry_stat_idle(struct lstcon_trans_stat *stat, int inc)
-{
-	return inc ? ++stat->trs_fwk_stat[0] : stat->trs_fwk_stat[0];
-}
-
-static inline int
-lstcon_tsbqry_stat_run(struct lstcon_trans_stat *stat, int inc)
-{
-	return inc ? ++stat->trs_fwk_stat[1] : stat->trs_fwk_stat[1];
-}
-
-static inline int
-lstcon_tsbqry_stat_failure(struct lstcon_trans_stat *stat, int inc)
-{
-	return inc ? ++stat->trs_fwk_stat[2] : stat->trs_fwk_stat[2];
-}
-
-static inline int
-lstcon_statqry_stat_success(struct lstcon_trans_stat *stat, int inc)
-{
-	return inc ? ++stat->trs_fwk_stat[0] : stat->trs_fwk_stat[0];
-}
-
-static inline int
-lstcon_statqry_stat_failure(struct lstcon_trans_stat *stat, int inc)
-{
-	return inc ? ++stat->trs_fwk_stat[1] : stat->trs_fwk_stat[1];
-}
-
-/* create a session */
-struct lstio_session_new_args {
-	int		 lstio_ses_key;		/* IN: local key */
-	int		 lstio_ses_timeout;	/* IN: session timeout */
-	int		 lstio_ses_force;	/* IN: force create ? */
-	/** IN: session features */
-	unsigned int	 lstio_ses_feats;
-	struct lst_sid __user *lstio_ses_idp;	/* OUT: session id */
-	int		 lstio_ses_nmlen;	/* IN: name length */
-	char __user	 *lstio_ses_namep;	/* IN: session name */
-};
-
-/* query current session */
-struct lstio_session_info_args {
-	struct lst_sid __user	*lstio_ses_idp;		/* OUT: session id */
-	int __user		*lstio_ses_keyp;	/* OUT: local key */
-	/** OUT: session features */
-	unsigned int __user	*lstio_ses_featp;
-	struct lstcon_ndlist_ent __user *lstio_ses_ndinfo;/* OUT: */
-	int			 lstio_ses_nmlen;	/* IN: name length */
-	char __user		*lstio_ses_namep;	/* OUT: session name */
-};
-
-/* delete a session */
-struct lstio_session_end_args {
-	int			lstio_ses_key;	/* IN: session key */
-};
-
-#define LST_OPC_SESSION		1
-#define LST_OPC_GROUP		2
-#define LST_OPC_NODES		3
-#define LST_OPC_BATCHCLI	4
-#define LST_OPC_BATCHSRV	5
-
-struct lstio_debug_args {
-	int			 lstio_dbg_key;		/* IN: session key */
-	int			 lstio_dbg_type;	/* IN: debug
-							 * session|batch|
-							 * group|nodes list
-							 */
-	int			 lstio_dbg_flags;	/* IN: reserved debug
-							 * flags
-							 */
-	int			 lstio_dbg_timeout;	/* IN: timeout of
-							 * debug
-							 */
-	int			 lstio_dbg_nmlen;	/* IN: len of name */
-	char __user		*lstio_dbg_namep;	/* IN: name of
-							 * group|batch
-							 */
-	int			 lstio_dbg_count;	/* IN: # of test nodes
-							 * to debug
-							 */
-	struct lnet_process_id __user *lstio_dbg_idsp;	/* IN: id of test
-							 * nodes
-							 */
-	struct list_head __user	*lstio_dbg_resultp;	/* OUT: list head of
-							 * result buffer
-							 */
-};
-
-struct lstio_group_add_args {
-	int		 lstio_grp_key;		/* IN: session key */
-	int		 lstio_grp_nmlen;	/* IN: name length */
-	char __user	*lstio_grp_namep;	/* IN: group name */
-};
-
-struct lstio_group_del_args {
-	int		 lstio_grp_key;		/* IN: session key */
-	int		 lstio_grp_nmlen;	/* IN: name length */
-	char __user	*lstio_grp_namep;	/* IN: group name */
-};
-
-#define LST_GROUP_CLEAN		1	/* remove inactive nodes in the group */
-#define LST_GROUP_REFRESH	2	/* refresh inactive nodes
-					 * in the group
-					 */
-#define LST_GROUP_RMND		3	/* delete nodes from the group */
-
-struct lstio_group_update_args {
-	int			 lstio_grp_key;		/* IN: session key */
-	int			 lstio_grp_opc;		/* IN: OPC */
-	int			 lstio_grp_args;	/* IN: arguments */
-	int			 lstio_grp_nmlen;	/* IN: name length */
-	char __user		*lstio_grp_namep;	/* IN: group name */
-	int			 lstio_grp_count;	/* IN: # of nodes id */
-	struct lnet_process_id __user *lstio_grp_idsp;	/* IN: array of nodes */
-	struct list_head __user	*lstio_grp_resultp;	/* OUT: list head of
-							 * result buffer
-							 */
-};
-
-struct lstio_group_nodes_args {
-	int			 lstio_grp_key;		/* IN: session key */
-	int			 lstio_grp_nmlen;	/* IN: name length */
-	char __user		*lstio_grp_namep;	/* IN: group name */
-	int			 lstio_grp_count;	/* IN: # of nodes */
-	/** OUT: session features */
-	unsigned int __user	*lstio_grp_featp;
-	struct lnet_process_id __user *lstio_grp_idsp;	/* IN: nodes */
-	struct list_head __user	*lstio_grp_resultp;	/* OUT: list head of
-							 * result buffer
-							 */
-};
-
-struct lstio_group_list_args {
-	int	 lstio_grp_key;		/* IN: session key */
-	int	 lstio_grp_idx;		/* IN: group idx */
-	int	 lstio_grp_nmlen;	/* IN: name len */
-	char __user *lstio_grp_namep;	/* OUT: name */
-};
-
-struct lstio_group_info_args {
-	int			 lstio_grp_key;		/* IN: session key */
-	int			 lstio_grp_nmlen;	/* IN: name len */
-	char __user		*lstio_grp_namep;	/* IN: name */
-	struct lstcon_ndlist_ent __user *lstio_grp_entp;/* OUT: description
-							 * of group
-							 */
-	int __user		*lstio_grp_idxp;	/* IN/OUT: node index */
-	int __user		*lstio_grp_ndentp;	/* IN/OUT: # of nodent */
-	struct lstcon_node_ent __user *lstio_grp_dentsp;/* OUT: nodent array */
-};
-
-#define LST_DEFAULT_BATCH	"batch"			/* default batch name */
-
-struct lstio_batch_add_args {
-	int	 lstio_bat_key;		/* IN: session key */
-	int	 lstio_bat_nmlen;	/* IN: name length */
-	char __user *lstio_bat_namep;	/* IN: batch name */
-};
-
-struct lstio_batch_del_args {
-	int	 lstio_bat_key;		/* IN: session key */
-	int	 lstio_bat_nmlen;	/* IN: name length */
-	char __user *lstio_bat_namep;	/* IN: batch name */
-};
-
-struct lstio_batch_run_args {
-	int			 lstio_bat_key;		/* IN: session key */
-	int			 lstio_bat_timeout;	/* IN: timeout for
-							 * the batch
-							 */
-	int			 lstio_bat_nmlen;	/* IN: name length */
-	char __user		*lstio_bat_namep;	/* IN: batch name */
-	struct list_head __user	*lstio_bat_resultp;	/* OUT: list head of
-							 * result buffer
-							 */
-};
-
-struct lstio_batch_stop_args {
-	int			 lstio_bat_key;		/* IN: session key */
-	int			 lstio_bat_force;	/* IN: abort unfinished
-							 * test RPC
-							 */
-	int			 lstio_bat_nmlen;	/* IN: name length */
-	char __user		*lstio_bat_namep;	/* IN: batch name */
-	struct list_head __user	*lstio_bat_resultp;	/* OUT: list head of
-							 * result buffer
-							 */
-};
-
-struct lstio_batch_query_args {
-	int			 lstio_bat_key;		/* IN: session key */
-	int			 lstio_bat_testidx;	/* IN: test index */
-	int			 lstio_bat_client;	/* IN: we testing
-							 * client?
-							 */
-	int			 lstio_bat_timeout;	/* IN: timeout for
-							 * waiting
-							 */
-	int			 lstio_bat_nmlen;	/* IN: name length */
-	char __user		*lstio_bat_namep;	/* IN: batch name */
-	struct list_head __user	*lstio_bat_resultp;	/* OUT: list head of
-							 * result buffer
-							 */
-};
-
-struct lstio_batch_list_args {
-	int	 lstio_bat_key;		/* IN: session key */
-	int	 lstio_bat_idx;		/* IN: index */
-	int	 lstio_bat_nmlen;	/* IN: name length */
-	char __user *lstio_bat_namep;	/* IN: batch name */
-};
-
-struct lstio_batch_info_args {
-	int			 lstio_bat_key;		/* IN: session key */
-	int			 lstio_bat_nmlen;	/* IN: name length */
-	char __user		*lstio_bat_namep;	/* IN: name */
-	int			 lstio_bat_server;	/* IN: query server
-							 * or not
-							 */
-	int			 lstio_bat_testidx;	/* IN: test index */
-	struct lstcon_test_batch_ent __user *lstio_bat_entp;/* OUT: batch ent */
-
-	int __user		*lstio_bat_idxp;	/* IN/OUT: index of node */
-	int __user		*lstio_bat_ndentp;	/* IN/OUT: # of nodent */
-	struct lstcon_node_ent __user *lstio_bat_dentsp;/* array of nodent */
-};
-
-/* add stat in session */
-struct lstio_stat_args {
-	int			 lstio_sta_key;		/* IN: session key */
-	int			 lstio_sta_timeout;	/* IN: timeout for
-							 * stat request
-							 */
-	int			 lstio_sta_nmlen;	/* IN: group name
-							 * length
-							 */
-	char __user		*lstio_sta_namep;	/* IN: group name */
-	int			 lstio_sta_count;	/* IN: # of pid */
-	struct lnet_process_id __user *lstio_sta_idsp;	/* IN: pid */
-	struct list_head __user	*lstio_sta_resultp;	/* OUT: list head of
-							 * result buffer
-							 */
-};
-
-enum lst_test_type {
-	LST_TEST_BULK	= 1,
-	LST_TEST_PING	= 2
-};
-
-/* create a test in a batch */
-#define LST_MAX_CONCUR	1024	/* Max concurrency of test */
-
-struct lstio_test_args {
-	int		  lstio_tes_key;	/* IN: session key */
-	int		  lstio_tes_bat_nmlen;	/* IN: batch name len */
-	char __user	 *lstio_tes_bat_name;	/* IN: batch name */
-	int		  lstio_tes_type;	/* IN: test type */
-	int		  lstio_tes_oneside;	/* IN: one sided test */
-	int		  lstio_tes_loop;	/* IN: loop count */
-	int		  lstio_tes_concur;	/* IN: concurrency */
-
-	int		  lstio_tes_dist;	/* IN: node distribution in
-						 * destination groups
-						 */
-	int		  lstio_tes_span;	/* IN: node span in
-						 * destination groups
-						 */
-	int		  lstio_tes_sgrp_nmlen;	/* IN: source group
-						 * name length
-						 */
-	char __user	 *lstio_tes_sgrp_name;	/* IN: group name */
-	int		  lstio_tes_dgrp_nmlen;	/* IN: destination group
-						 * name length
-						 */
-	char __user	 *lstio_tes_dgrp_name;	/* IN: group name */
-
-	int		  lstio_tes_param_len;	/* IN: param buffer len */
-	void __user	 *lstio_tes_param;	/* IN: parameter for specified
-						 * test: lstio_bulk_param_t,
-						 * lstio_ping_param_t,
-						 * ... more
-						 */
-	int __user	 *lstio_tes_retp;	/* OUT: private returned
-						 * value
-						 */
-	struct list_head __user *lstio_tes_resultp;/* OUT: list head of
-						    * result buffer
-						    */
-};
-
-enum lst_brw_type {
-	LST_BRW_READ	= 1,
-	LST_BRW_WRITE	= 2
-};
-
-enum lst_brw_flags {
-	LST_BRW_CHECK_NONE	= 1,
-	LST_BRW_CHECK_SIMPLE	= 2,
-	LST_BRW_CHECK_FULL	= 3
-};
-
-struct lst_test_bulk_param {
-	int	blk_opc;	/* bulk operation code */
-	int	blk_size;       /* size (bytes) */
-	int	blk_time;       /* time of running the test*/
-	int	blk_flags;      /* reserved flags */
-	int	blk_cli_off;	/* bulk offset on client */
-	int	blk_srv_off;	/* reserved: bulk offset on server */
-};
-
-struct lst_test_ping_param {
-	int	png_size;	/* size of ping message */
-	int	png_time;	/* time */
-	int	png_loop;	/* loop */
-	int	png_flags;	/* reserved flags */
-};
-
-struct srpc_counters {
-	__u32 errors;
-	__u32 rpcs_sent;
-	__u32 rpcs_rcvd;
-	__u32 rpcs_dropped;
-	__u32 rpcs_expired;
-	__u64 bulk_get;
-	__u64 bulk_put;
-} WIRE_ATTR;
-
-struct sfw_counters {
-	/** milliseconds since current session started */
-	__u32 running_ms;
-	__u32 active_batches;
-	__u32 zombie_sessions;
-	__u32 brw_errors;
-	__u32 ping_errors;
-} WIRE_ATTR;
-
-#endif

+ 0 - 119
drivers/staging/lustre/include/uapi/linux/lnet/nidstr.h

@@ -1,119 +0,0 @@
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-#ifndef _LNET_NIDSTRINGS_H
-#define _LNET_NIDSTRINGS_H
-
-#include <uapi/linux/lnet/lnet-types.h>
-
-/**
- *  Lustre Network Driver types.
- */
-enum {
-	/*
-	 * Only add to these values (i.e. don't ever change or redefine them):
-	 * network addresses depend on them...
-	 */
-	QSWLND		= 1,
-	SOCKLND		= 2,
-	GMLND		= 3,
-	PTLLND		= 4,
-	O2IBLND		= 5,
-	CIBLND		= 6,
-	OPENIBLND	= 7,
-	IIBLND		= 8,
-	LOLND		= 9,
-	RALND		= 10,
-	VIBLND		= 11,
-	MXLND		= 12,
-	GNILND		= 13,
-	GNIIPLND	= 14,
-};
-
-struct list_head;
-
-#define LNET_NIDSTR_COUNT  1024    /* # of nidstrings */
-#define LNET_NIDSTR_SIZE   32      /* size of each one (see below for usage) */
-
-/* support decl needed by both kernel and user space */
-char *libcfs_next_nidstring(void);
-int libcfs_isknown_lnd(__u32 lnd);
-char *libcfs_lnd2modname(__u32 lnd);
-char *libcfs_lnd2str_r(__u32 lnd, char *buf, size_t buf_size);
-static inline char *libcfs_lnd2str(__u32 lnd)
-{
-	return libcfs_lnd2str_r(lnd, libcfs_next_nidstring(),
-				LNET_NIDSTR_SIZE);
-}
-
-int libcfs_str2lnd(const char *str);
-char *libcfs_net2str_r(__u32 net, char *buf, size_t buf_size);
-static inline char *libcfs_net2str(__u32 net)
-{
-	return libcfs_net2str_r(net, libcfs_next_nidstring(),
-				LNET_NIDSTR_SIZE);
-}
-
-char *libcfs_nid2str_r(lnet_nid_t nid, char *buf, size_t buf_size);
-static inline char *libcfs_nid2str(lnet_nid_t nid)
-{
-	return libcfs_nid2str_r(nid, libcfs_next_nidstring(),
-				LNET_NIDSTR_SIZE);
-}
-
-__u32 libcfs_str2net(const char *str);
-lnet_nid_t libcfs_str2nid(const char *str);
-int libcfs_str2anynid(lnet_nid_t *nid, const char *str);
-char *libcfs_id2str(struct lnet_process_id id);
-void cfs_free_nidlist(struct list_head *list);
-int cfs_parse_nidlist(char *str, int len, struct list_head *list);
-int cfs_print_nidlist(char *buffer, int count, struct list_head *list);
-int cfs_match_nid(lnet_nid_t nid, struct list_head *list);
-
-int cfs_ip_addr_parse(char *str, int len, struct list_head *list);
-int cfs_ip_addr_match(__u32 addr, struct list_head *list);
-bool cfs_nidrange_is_contiguous(struct list_head *nidlist);
-void cfs_nidrange_find_min_max(struct list_head *nidlist, char *min_nid,
-			       char *max_nid, size_t nidstr_length);
-
-struct netstrfns {
-	__u32	nf_type;
-	char	*nf_name;
-	char	*nf_modname;
-	void	(*nf_addr2str)(__u32 addr, char *str, size_t size);
-	int	(*nf_str2addr)(const char *str, int nob, __u32 *addr);
-	int	(*nf_parse_addrlist)(char *str, int len,
-				     struct list_head *list);
-	int	(*nf_print_addrlist)(char *buffer, int count,
-				     struct list_head *list);
-	int	(*nf_match_addr)(__u32 addr, struct list_head *list);
-	bool	(*nf_is_contiguous)(struct list_head *nidlist);
-	void	(*nf_min_max)(struct list_head *nidlist, __u32 *min_nid,
-			      __u32 *max_nid);
-};
-
-#endif /* _LNET_NIDSTRINGS_H */

+ 0 - 44
drivers/staging/lustre/include/uapi/linux/lnet/socklnd.h

@@ -1,44 +0,0 @@
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * #defines shared between socknal implementation and utilities
- */
-#ifndef __UAPI_LNET_SOCKLND_H__
-#define __UAPI_LNET_SOCKLND_H__
-
-#define SOCKLND_CONN_NONE     (-1)
-#define SOCKLND_CONN_ANY	0
-#define SOCKLND_CONN_CONTROL	1
-#define SOCKLND_CONN_BULK_IN	2
-#define SOCKLND_CONN_BULK_OUT	3
-#define SOCKLND_CONN_NTYPES	4
-
-#define SOCKLND_CONN_ACK	SOCKLND_CONN_BULK_IN
-
-#endif

+ 0 - 261
drivers/staging/lustre/include/uapi/linux/lustre/lustre_cfg.h

@@ -1,261 +0,0 @@
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#ifndef _UAPI_LUSTRE_CFG_H_
-#define _UAPI_LUSTRE_CFG_H_
-
-#include <linux/errno.h>
-#include <linux/kernel.h>
-#include <uapi/linux/lustre/lustre_user.h>
-
-/** \defgroup cfg cfg
- *
- * @{
- */
-
-/*
- * 1cf6
- * lcfG
- */
-#define LUSTRE_CFG_VERSION 0x1cf60001
-#define LUSTRE_CFG_MAX_BUFCOUNT 8
-
-#define LCFG_HDR_SIZE(count) \
-	__ALIGN_KERNEL(offsetof(struct lustre_cfg, lcfg_buflens[(count)]), 8)
-
-/** If the LCFG_REQUIRED bit is set in a configuration command,
- * then the client is required to understand this parameter
- * in order to mount the filesystem. If it does not understand
- * a REQUIRED command the client mount will fail.
- */
-#define LCFG_REQUIRED	0x0001000
-
-enum lcfg_command_type {
-	LCFG_ATTACH		  = 0x00cf001, /**< create a new obd instance */
-	LCFG_DETACH		  = 0x00cf002, /**< destroy obd instance */
-	LCFG_SETUP		  = 0x00cf003, /**< call type-specific setup */
-	LCFG_CLEANUP		  = 0x00cf004, /**< call type-specific cleanup
-						 */
-	LCFG_ADD_UUID		  = 0x00cf005, /**< add a nid to a niduuid */
-	LCFG_DEL_UUID		  = 0x00cf006, /**< remove a nid from
-						 *  a niduuid
-						 */
-	LCFG_MOUNTOPT		  = 0x00cf007, /**< create a profile
-						 * (mdc, osc)
-						 */
-	LCFG_DEL_MOUNTOPT	  = 0x00cf008, /**< destroy a profile */
-	LCFG_SET_TIMEOUT	  = 0x00cf009, /**< set obd_timeout */
-	LCFG_SET_UPCALL		  = 0x00cf00a, /**< deprecated */
-	LCFG_ADD_CONN		  = 0x00cf00b, /**< add a failover niduuid to
-						 *  an obd
-						 */
-	LCFG_DEL_CONN		  = 0x00cf00c, /**< remove a failover niduuid */
-	LCFG_LOV_ADD_OBD	  = 0x00cf00d, /**< add an osc to a lov */
-	LCFG_LOV_DEL_OBD	  = 0x00cf00e, /**< remove an osc from a lov */
-	LCFG_PARAM		  = 0x00cf00f, /**< set a proc parameter */
-	LCFG_MARKER		  = 0x00cf010, /**< metadata about next
-						 *  cfg rec
-						 */
-	LCFG_LOG_START		  = 0x00ce011, /**< mgc only, process a
-						 *  cfg log
-						 */
-	LCFG_LOG_END		  = 0x00ce012, /**< stop processing updates */
-	LCFG_LOV_ADD_INA	  = 0x00ce013, /**< like LOV_ADD_OBD,
-						 *  inactive
-						 */
-	LCFG_ADD_MDC		  = 0x00cf014, /**< add an mdc to a lmv */
-	LCFG_DEL_MDC		  = 0x00cf015, /**< remove an mdc from a lmv */
-	LCFG_SPTLRPC_CONF	  = 0x00ce016, /**< security */
-	LCFG_POOL_NEW		  = 0x00ce020, /**< create an ost pool name */
-	LCFG_POOL_ADD		  = 0x00ce021, /**< add an ost to a pool */
-	LCFG_POOL_REM		  = 0x00ce022, /**< remove an ost from a pool */
-	LCFG_POOL_DEL		  = 0x00ce023, /**< destroy an ost pool name */
-	LCFG_SET_LDLM_TIMEOUT	  = 0x00ce030, /**< set ldlm_timeout */
-	LCFG_PRE_CLEANUP	  = 0x00cf031, /**< call type-specific pre
-						 * cleanup cleanup
-						 */
-	LCFG_SET_PARAM		  = 0x00ce032, /**< use set_param syntax to set
-						 * a proc parameters
-						 */
-};
-
-struct lustre_cfg_bufs {
-	void  *lcfg_buf[LUSTRE_CFG_MAX_BUFCOUNT];
-	__u32 lcfg_buflen[LUSTRE_CFG_MAX_BUFCOUNT];
-	__u32 lcfg_bufcount;
-};
-
-struct lustre_cfg {
-	__u32 lcfg_version;
-	__u32 lcfg_command;
-
-	__u32 lcfg_num;
-	__u32 lcfg_flags;
-	__u64 lcfg_nid;
-	__u32 lcfg_nal;		/* not used any more */
-
-	__u32 lcfg_bufcount;
-	__u32 lcfg_buflens[0];
-};
-
-enum cfg_record_type {
-	PORTALS_CFG_TYPE	= 1,
-	LUSTRE_CFG_TYPE		= 123,
-};
-
-#define LUSTRE_CFG_BUFLEN(lcfg, idx)					\
-	((lcfg)->lcfg_bufcount <= (idx) ? 0 : (lcfg)->lcfg_buflens[(idx)])
-
-static inline void lustre_cfg_bufs_set(struct lustre_cfg_bufs *bufs,
-				       __u32 index, void *buf, __u32 buflen)
-{
-	if (index >= LUSTRE_CFG_MAX_BUFCOUNT)
-		return;
-
-	if (!bufs)
-		return;
-
-	if (bufs->lcfg_bufcount <= index)
-		bufs->lcfg_bufcount = index + 1;
-
-	bufs->lcfg_buf[index] = buf;
-	bufs->lcfg_buflen[index] = buflen;
-}
-
-static inline void lustre_cfg_bufs_set_string(struct lustre_cfg_bufs *bufs,
-					      __u32 index, char *str)
-{
-	lustre_cfg_bufs_set(bufs, index, str, str ? strlen(str) + 1 : 0);
-}
-
-static inline void lustre_cfg_bufs_reset(struct lustre_cfg_bufs *bufs,
-					 char *name)
-{
-	memset((bufs), 0, sizeof(*bufs));
-	if (name)
-		lustre_cfg_bufs_set_string(bufs, 0, name);
-}
-
-static inline void *lustre_cfg_buf(struct lustre_cfg *lcfg, __u32 index)
-{
-	__u32 i;
-	size_t offset;
-	__u32 bufcount;
-
-	if (!lcfg)
-		return NULL;
-
-	bufcount = lcfg->lcfg_bufcount;
-	if (index >= bufcount)
-		return NULL;
-
-	offset = LCFG_HDR_SIZE(lcfg->lcfg_bufcount);
-	for (i = 0; i < index; i++)
-		offset += __ALIGN_KERNEL(lcfg->lcfg_buflens[i], 8);
-	return (char *)lcfg + offset;
-}
-
-static inline void lustre_cfg_bufs_init(struct lustre_cfg_bufs *bufs,
-					struct lustre_cfg *lcfg)
-{
-	__u32 i;
-
-	bufs->lcfg_bufcount = lcfg->lcfg_bufcount;
-	for (i = 0; i < bufs->lcfg_bufcount; i++) {
-		bufs->lcfg_buflen[i] = lcfg->lcfg_buflens[i];
-		bufs->lcfg_buf[i] = lustre_cfg_buf(lcfg, i);
-	}
-}
-
-static inline __u32 lustre_cfg_len(__u32 bufcount, __u32 *buflens)
-{
-	__u32 i;
-	__u32 len;
-
-	len = LCFG_HDR_SIZE(bufcount);
-	for (i = 0; i < bufcount; i++)
-		len += __ALIGN_KERNEL(buflens[i], 8);
-
-	return __ALIGN_KERNEL(len, 8);
-}
-
-static inline void lustre_cfg_init(struct lustre_cfg *lcfg, int cmd,
-				   struct lustre_cfg_bufs *bufs)
-{
-	char *ptr;
-	__u32 i;
-
-	lcfg->lcfg_version = LUSTRE_CFG_VERSION;
-	lcfg->lcfg_command = cmd;
-	lcfg->lcfg_bufcount = bufs->lcfg_bufcount;
-
-	ptr = (char *)lcfg + LCFG_HDR_SIZE(lcfg->lcfg_bufcount);
-	for (i = 0; i < lcfg->lcfg_bufcount; i++) {
-		lcfg->lcfg_buflens[i] = bufs->lcfg_buflen[i];
-		if (bufs->lcfg_buf[i]) {
-			memcpy(ptr, bufs->lcfg_buf[i], bufs->lcfg_buflen[i]);
-			ptr += __ALIGN_KERNEL(bufs->lcfg_buflen[i], 8);
-		}
-	}
-}
-
-static inline int lustre_cfg_sanity_check(void *buf, size_t len)
-{
-	struct lustre_cfg *lcfg = (struct lustre_cfg *)buf;
-
-	if (!lcfg)
-		return -EINVAL;
-
-	/* check that the first bits of the struct are valid */
-	if (len < LCFG_HDR_SIZE(0))
-		return -EINVAL;
-
-	if (lcfg->lcfg_version != LUSTRE_CFG_VERSION)
-		return -EINVAL;
-
-	if (lcfg->lcfg_bufcount >= LUSTRE_CFG_MAX_BUFCOUNT)
-		return -EINVAL;
-
-	/* check that the buflens are valid */
-	if (len < LCFG_HDR_SIZE(lcfg->lcfg_bufcount))
-		return -EINVAL;
-
-	/* make sure all the pointers point inside the data */
-	if (len < lustre_cfg_len(lcfg->lcfg_bufcount, lcfg->lcfg_buflens))
-		return -EINVAL;
-
-	return 0;
-}
-
-/** @} cfg */
-
-#endif /* _UAPI_LUSTRE_CFG_H_ */

+ 0 - 293
drivers/staging/lustre/include/uapi/linux/lustre/lustre_fid.h

@@ -1,293 +0,0 @@
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2014, Intel Corporation.
- *
- * Copyright 2016 Cray Inc, all rights reserved.
- * Author: Ben Evans.
- *
- * all fid manipulation functions go here
- *
- * FIDS are globally unique within a Lustre filessytem, and are made up
- * of three parts: sequence, Object ID, and version.
- *
- */
-#ifndef _UAPI_LUSTRE_FID_H_
-#define _UAPI_LUSTRE_FID_H_
-
-#include <uapi/linux/lustre/lustre_idl.h>
-
-/** returns fid object sequence */
-static inline __u64 fid_seq(const struct lu_fid *fid)
-{
-	return fid->f_seq;
-}
-
-/** returns fid object id */
-static inline __u32 fid_oid(const struct lu_fid *fid)
-{
-	return fid->f_oid;
-}
-
-/** returns fid object version */
-static inline __u32 fid_ver(const struct lu_fid *fid)
-{
-	return fid->f_ver;
-}
-
-static inline void fid_zero(struct lu_fid *fid)
-{
-	memset(fid, 0, sizeof(*fid));
-}
-
-static inline __u64 fid_ver_oid(const struct lu_fid *fid)
-{
-	return (__u64)fid_ver(fid) << 32 | fid_oid(fid);
-}
-
-static inline bool fid_seq_is_mdt0(__u64 seq)
-{
-	return seq == FID_SEQ_OST_MDT0;
-}
-
-static inline bool fid_seq_is_mdt(__u64 seq)
-{
-	return seq == FID_SEQ_OST_MDT0 || seq >= FID_SEQ_NORMAL;
-};
-
-static inline bool fid_seq_is_echo(__u64 seq)
-{
-	return seq == FID_SEQ_ECHO;
-}
-
-static inline bool fid_is_echo(const struct lu_fid *fid)
-{
-	return fid_seq_is_echo(fid_seq(fid));
-}
-
-static inline bool fid_seq_is_llog(__u64 seq)
-{
-	return seq == FID_SEQ_LLOG;
-}
-
-static inline bool fid_is_llog(const struct lu_fid *fid)
-{
-	/* file with OID == 0 is not llog but contains last oid */
-	return fid_seq_is_llog(fid_seq(fid)) && fid_oid(fid) > 0;
-}
-
-static inline bool fid_seq_is_rsvd(__u64 seq)
-{
-	return seq > FID_SEQ_OST_MDT0 && seq <= FID_SEQ_RSVD;
-};
-
-static inline bool fid_seq_is_special(__u64 seq)
-{
-	return seq == FID_SEQ_SPECIAL;
-};
-
-static inline bool fid_seq_is_local_file(__u64 seq)
-{
-	return seq == FID_SEQ_LOCAL_FILE ||
-	       seq == FID_SEQ_LOCAL_NAME;
-};
-
-static inline bool fid_seq_is_root(__u64 seq)
-{
-	return seq == FID_SEQ_ROOT;
-}
-
-static inline bool fid_seq_is_dot(__u64 seq)
-{
-	return seq == FID_SEQ_DOT_LUSTRE;
-}
-
-static inline bool fid_seq_is_default(__u64 seq)
-{
-	return seq == FID_SEQ_LOV_DEFAULT;
-}
-
-static inline bool fid_is_mdt0(const struct lu_fid *fid)
-{
-	return fid_seq_is_mdt0(fid_seq(fid));
-}
-
-/**
- * Check if a fid is igif or not.
- * \param fid the fid to be tested.
- * \return true if the fid is an igif; otherwise false.
- */
-static inline bool fid_seq_is_igif(__u64 seq)
-{
-	return seq >= FID_SEQ_IGIF && seq <= FID_SEQ_IGIF_MAX;
-}
-
-static inline bool fid_is_igif(const struct lu_fid *fid)
-{
-	return fid_seq_is_igif(fid_seq(fid));
-}
-
-/**
- * Check if a fid is idif or not.
- * \param fid the fid to be tested.
- * \return true if the fid is an idif; otherwise false.
- */
-static inline bool fid_seq_is_idif(__u64 seq)
-{
-	return seq >= FID_SEQ_IDIF && seq <= FID_SEQ_IDIF_MAX;
-}
-
-static inline bool fid_is_idif(const struct lu_fid *fid)
-{
-	return fid_seq_is_idif(fid_seq(fid));
-}
-
-static inline bool fid_is_local_file(const struct lu_fid *fid)
-{
-	return fid_seq_is_local_file(fid_seq(fid));
-}
-
-static inline bool fid_seq_is_norm(__u64 seq)
-{
-	return (seq >= FID_SEQ_NORMAL);
-}
-
-static inline bool fid_is_norm(const struct lu_fid *fid)
-{
-	return fid_seq_is_norm(fid_seq(fid));
-}
-
-/* convert an OST objid into an IDIF FID SEQ number */
-static inline __u64 fid_idif_seq(__u64 id, __u32 ost_idx)
-{
-	return FID_SEQ_IDIF | (ost_idx << 16) | ((id >> 32) & 0xffff);
-}
-
-/* convert a packed IDIF FID into an OST objid */
-static inline __u64 fid_idif_id(__u64 seq, __u32 oid, __u32 ver)
-{
-	return ((__u64)ver << 48) | ((seq & 0xffff) << 32) | oid;
-}
-
-static inline __u32 idif_ost_idx(__u64 seq)
-{
-	return (seq >> 16) & 0xffff;
-}
-
-/* extract ost index from IDIF FID */
-static inline __u32 fid_idif_ost_idx(const struct lu_fid *fid)
-{
-	return idif_ost_idx(fid_seq(fid));
-}
-
-/**
- * Get inode number from an igif.
- * \param fid an igif to get inode number from.
- * \return inode number for the igif.
- */
-static inline ino_t lu_igif_ino(const struct lu_fid *fid)
-{
-	return fid_seq(fid);
-}
-
-/**
- * Get inode generation from an igif.
- * \param fid an igif to get inode generation from.
- * \return inode generation for the igif.
- */
-static inline __u32 lu_igif_gen(const struct lu_fid *fid)
-{
-	return fid_oid(fid);
-}
-
-/**
- * Build igif from the inode number/generation.
- */
-static inline void lu_igif_build(struct lu_fid *fid, __u32 ino, __u32 gen)
-{
-	fid->f_seq = ino;
-	fid->f_oid = gen;
-	fid->f_ver = 0;
-}
-
-/*
- * Fids are transmitted across network (in the sender byte-ordering),
- * and stored on disk in big-endian order.
- */
-static inline void fid_cpu_to_le(struct lu_fid *dst, const struct lu_fid *src)
-{
-	dst->f_seq = __cpu_to_le64(fid_seq(src));
-	dst->f_oid = __cpu_to_le32(fid_oid(src));
-	dst->f_ver = __cpu_to_le32(fid_ver(src));
-}
-
-static inline void fid_le_to_cpu(struct lu_fid *dst, const struct lu_fid *src)
-{
-	dst->f_seq = __le64_to_cpu(fid_seq(src));
-	dst->f_oid = __le32_to_cpu(fid_oid(src));
-	dst->f_ver = __le32_to_cpu(fid_ver(src));
-}
-
-static inline void fid_cpu_to_be(struct lu_fid *dst, const struct lu_fid *src)
-{
-	dst->f_seq = __cpu_to_be64(fid_seq(src));
-	dst->f_oid = __cpu_to_be32(fid_oid(src));
-	dst->f_ver = __cpu_to_be32(fid_ver(src));
-}
-
-static inline void fid_be_to_cpu(struct lu_fid *dst, const struct lu_fid *src)
-{
-	dst->f_seq = __be64_to_cpu(fid_seq(src));
-	dst->f_oid = __be32_to_cpu(fid_oid(src));
-	dst->f_ver = __be32_to_cpu(fid_ver(src));
-}
-
-static inline bool fid_is_sane(const struct lu_fid *fid)
-{
-	return fid && ((fid_seq(fid) >= FID_SEQ_START && !fid_ver(fid)) ||
-			fid_is_igif(fid) || fid_is_idif(fid) ||
-			fid_seq_is_rsvd(fid_seq(fid)));
-}
-
-static inline bool lu_fid_eq(const struct lu_fid *f0, const struct lu_fid *f1)
-{
-	return !memcmp(f0, f1, sizeof(*f0));
-}
-
-static inline int lu_fid_cmp(const struct lu_fid *f0,
-			     const struct lu_fid *f1)
-{
-	if (fid_seq(f0) != fid_seq(f1))
-		return fid_seq(f0) > fid_seq(f1) ? 1 : -1;
-
-	if (fid_oid(f0) != fid_oid(f1))
-		return fid_oid(f0) > fid_oid(f1) ? 1 : -1;
-
-	if (fid_ver(f0) != fid_ver(f1))
-		return fid_ver(f0) > fid_ver(f1) ? 1 : -1;
-
-	return 0;
-}
-#endif

+ 0 - 72
drivers/staging/lustre/include/uapi/linux/lustre/lustre_fiemap.h

@@ -1,72 +0,0 @@
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2014, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * FIEMAP data structures and flags. This header file will be used until
- * fiemap.h is available in the upstream kernel.
- *
- * Author: Kalpak Shah <kalpak.shah@sun.com>
- * Author: Andreas Dilger <adilger@sun.com>
- */
-
-#ifndef _LUSTRE_FIEMAP_H
-#define _LUSTRE_FIEMAP_H
-
-#include <stddef.h>
-#include <linux/fiemap.h>
-
-/* XXX: We use fiemap_extent::fe_reserved[0] */
-#define fe_device	fe_reserved[0]
-
-static inline size_t fiemap_count_to_size(size_t extent_count)
-{
-	return sizeof(struct fiemap) + extent_count *
-				       sizeof(struct fiemap_extent);
-}
-
-static inline unsigned int fiemap_size_to_count(size_t array_size)
-{
-	return (array_size - sizeof(struct fiemap)) /
-		sizeof(struct fiemap_extent);
-}
-
-#define FIEMAP_FLAG_DEVICE_ORDER 0x40000000 /* return device ordered mapping */
-
-#ifdef FIEMAP_FLAGS_COMPAT
-#undef FIEMAP_FLAGS_COMPAT
-#endif
-
-/* Lustre specific flags - use a high bit, don't conflict with upstream flag */
-#define FIEMAP_EXTENT_NO_DIRECT	 0x40000000 /* Data mapping undefined */
-#define FIEMAP_EXTENT_NET	 0x80000000 /* Data stored remotely.
-					     * Sets NO_DIRECT flag
-					     */
-
-#endif /* _LUSTRE_FIEMAP_H */

+ 0 - 2690
drivers/staging/lustre/include/uapi/linux/lustre/lustre_idl.h

@@ -1,2690 +0,0 @@
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * Lustre wire protocol definitions.
- */
-
-/** \defgroup lustreidl lustreidl
- *
- * Lustre wire protocol definitions.
- *
- * ALL structs passing over the wire should be declared here.  Structs
- * that are used in interfaces with userspace should go in lustre_user.h.
- *
- * All structs being declared here should be built from simple fixed-size
- * types (__u8, __u16, __u32, __u64) or be built from other types or
- * structs also declared in this file.  Similarly, all flags and magic
- * values in those structs should also be declared here.  This ensures
- * that the Lustre wire protocol is not influenced by external dependencies.
- *
- * The only other acceptable items in this file are VERY SIMPLE accessor
- * functions to avoid callers grubbing inside the structures. Nothing that
- * depends on external functions or definitions should be in here.
- *
- * Structs must be properly aligned to put 64-bit values on an 8-byte
- * boundary.  Any structs being added here must also be added to
- * utils/wirecheck.c and "make newwiretest" run to regenerate the
- * utils/wiretest.c sources.  This allows us to verify that wire structs
- * have the proper alignment/size on all architectures.
- *
- * DO NOT CHANGE any of the structs, flags, values declared here and used
- * in released Lustre versions.  Some structs may have padding fields that
- * can be used.  Some structs might allow addition at the end (verify this
- * in the code to ensure that new/old clients that see this larger struct
- * do not fail, otherwise you need to implement protocol compatibility).
- *
- * @{
- */
-
-#ifndef _LUSTRE_IDL_H_
-#define _LUSTRE_IDL_H_
-
-#include <asm/byteorder.h>
-#include <linux/types.h>
-
-#include <uapi/linux/lnet/lnet-types.h>
-/* Defn's shared with user-space. */
-#include <uapi/linux/lustre/lustre_user.h>
-#include <uapi/linux/lustre/lustre_ver.h>
-
-/*
- *  GENERAL STUFF
- */
-/* FOO_REQUEST_PORTAL is for incoming requests on the FOO
- * FOO_REPLY_PORTAL   is for incoming replies on the FOO
- * FOO_BULK_PORTAL    is for incoming bulk on the FOO
- */
-
-/* Lustre service names are following the format
- * service name + MDT + seq name
- */
-#define LUSTRE_MDT_MAXNAMELEN	80
-
-#define CONNMGR_REQUEST_PORTAL	  1
-#define CONNMGR_REPLY_PORTAL	    2
-/*#define OSC_REQUEST_PORTAL	    3 */
-#define OSC_REPLY_PORTAL		4
-/*#define OSC_BULK_PORTAL	       5 */
-#define OST_IO_PORTAL		   6
-#define OST_CREATE_PORTAL	       7
-#define OST_BULK_PORTAL		 8
-/*#define MDC_REQUEST_PORTAL	    9 */
-#define MDC_REPLY_PORTAL	       10
-/*#define MDC_BULK_PORTAL	      11 */
-#define MDS_REQUEST_PORTAL	     12
-/*#define MDS_REPLY_PORTAL	     13 */
-#define MDS_BULK_PORTAL		14
-#define LDLM_CB_REQUEST_PORTAL	 15
-#define LDLM_CB_REPLY_PORTAL	   16
-#define LDLM_CANCEL_REQUEST_PORTAL     17
-#define LDLM_CANCEL_REPLY_PORTAL       18
-/*#define PTLBD_REQUEST_PORTAL	   19 */
-/*#define PTLBD_REPLY_PORTAL	     20 */
-/*#define PTLBD_BULK_PORTAL	      21 */
-#define MDS_SETATTR_PORTAL	     22
-#define MDS_READPAGE_PORTAL	    23
-#define OUT_PORTAL		    24
-
-#define MGC_REPLY_PORTAL	       25
-#define MGS_REQUEST_PORTAL	     26
-#define MGS_REPLY_PORTAL	       27
-#define OST_REQUEST_PORTAL	     28
-#define FLD_REQUEST_PORTAL	     29
-#define SEQ_METADATA_PORTAL	    30
-#define SEQ_DATA_PORTAL		31
-#define SEQ_CONTROLLER_PORTAL	  32
-#define MGS_BULK_PORTAL		33
-
-/* Portal 63 is reserved for the Cray Inc DVS - nic@cray.com, roe@cray.com,
- *						n8851@cray.com
- */
-
-/* packet types */
-#define PTL_RPC_MSG_REQUEST 4711
-#define PTL_RPC_MSG_ERR     4712
-#define PTL_RPC_MSG_REPLY   4713
-
-/* DON'T use swabbed values of MAGIC as magic! */
-#define LUSTRE_MSG_MAGIC_V2 0x0BD00BD3
-#define LUSTRE_MSG_MAGIC_V2_SWABBED 0xD30BD00B
-
-#define LUSTRE_MSG_MAGIC LUSTRE_MSG_MAGIC_V2
-
-#define PTLRPC_MSG_VERSION  0x00000003
-#define LUSTRE_VERSION_MASK 0xffff0000
-#define LUSTRE_OBD_VERSION  0x00010000
-#define LUSTRE_MDS_VERSION  0x00020000
-#define LUSTRE_OST_VERSION  0x00030000
-#define LUSTRE_DLM_VERSION  0x00040000
-#define LUSTRE_LOG_VERSION  0x00050000
-#define LUSTRE_MGS_VERSION  0x00060000
-
-/**
- * Describes a range of sequence, lsr_start is included but lsr_end is
- * not in the range.
- * Same structure is used in fld module where lsr_index field holds mdt id
- * of the home mdt.
- */
-struct lu_seq_range {
-	__u64 lsr_start;
-	__u64 lsr_end;
-	__u32 lsr_index;
-	__u32 lsr_flags;
-};
-
-struct lu_seq_range_array {
-	__u32 lsra_count;
-	__u32 lsra_padding;
-	struct lu_seq_range lsra_lsr[0];
-};
-
-#define LU_SEQ_RANGE_MDT	0x0
-#define LU_SEQ_RANGE_OST	0x1
-#define LU_SEQ_RANGE_ANY	0x3
-
-#define LU_SEQ_RANGE_MASK	0x3
-
-/** \defgroup lu_fid lu_fid
- * @{
- */
-
-/**
- * Flags for lustre_mdt_attrs::lma_compat and lustre_mdt_attrs::lma_incompat.
- * Deprecated since HSM and SOM attributes are now stored in separate on-disk
- * xattr.
- */
-enum lma_compat {
-	LMAC_HSM	= 0x00000001,
-/*	LMAC_SOM	= 0x00000002, obsolete since 2.8.0 */
-	LMAC_NOT_IN_OI	= 0x00000004, /* the object does NOT need OI mapping */
-	LMAC_FID_ON_OST = 0x00000008, /* For OST-object, its OI mapping is
-				       * under /O/<seq>/d<x>.
-				       */
-};
-
-/**
- * Masks for all features that should be supported by a Lustre version to
- * access a specific file.
- * This information is stored in lustre_mdt_attrs::lma_incompat.
- */
-enum lma_incompat {
-	LMAI_RELEASED		= 0x00000001, /* file is released */
-	LMAI_AGENT		= 0x00000002, /* agent inode */
-	LMAI_REMOTE_PARENT	= 0x00000004, /* the parent of the object
-					       * is on the remote MDT
-					       */
-};
-
-#define LMA_INCOMPAT_SUPP	(LMAI_AGENT | LMAI_REMOTE_PARENT)
-
-/**
- * fid constants
- */
-enum {
-	/** LASTID file has zero OID */
-	LUSTRE_FID_LASTID_OID = 0UL,
-	/** initial fid id value */
-	LUSTRE_FID_INIT_OID  = 1UL
-};
-
-/* copytool uses a 32b bitmask field to encode archive-Ids during register
- * with MDT thru kuc.
- * archive num = 0 => all
- * archive num from 1 to 32
- */
-#define LL_HSM_MAX_ARCHIVE (sizeof(__u32) * 8)
-
-/**
- * Note that reserved SEQ numbers below 12 will conflict with ldiskfs
- * inodes in the IGIF namespace, so these reserved SEQ numbers can be
- * used for other purposes and not risk collisions with existing inodes.
- *
- * Different FID Format
- * http://wiki.old.lustre.org/index.php/Architecture_-_Interoperability_fids_zfs
- */
-enum fid_seq {
-	FID_SEQ_OST_MDT0	= 0,
-	FID_SEQ_LLOG		= 1, /* unnamed llogs */
-	FID_SEQ_ECHO		= 2,
-	FID_SEQ_OST_MDT1	= 3,
-	FID_SEQ_OST_MAX		= 9, /* Max MDT count before OST_on_FID */
-	FID_SEQ_LLOG_NAME	= 10, /* named llogs */
-	FID_SEQ_RSVD		= 11,
-	FID_SEQ_IGIF		= 12,
-	FID_SEQ_IGIF_MAX	= 0x0ffffffffULL,
-	FID_SEQ_IDIF		= 0x100000000ULL,
-	FID_SEQ_IDIF_MAX	= 0x1ffffffffULL,
-	/* Normal FID sequence starts from this value, i.e. 1<<33 */
-	FID_SEQ_START		= 0x200000000ULL,
-	/* sequence for local pre-defined FIDs listed in local_oid */
-	FID_SEQ_LOCAL_FILE	= 0x200000001ULL,
-	FID_SEQ_DOT_LUSTRE	= 0x200000002ULL,
-	/* sequence is used for local named objects FIDs generated
-	 * by local_object_storage library
-	 */
-	FID_SEQ_LOCAL_NAME	= 0x200000003ULL,
-	/* Because current FLD will only cache the fid sequence, instead
-	 * of oid on the client side, if the FID needs to be exposed to
-	 * clients sides, it needs to make sure all of fids under one
-	 * sequence will be located in one MDT.
-	 */
-	FID_SEQ_SPECIAL		= 0x200000004ULL,
-	FID_SEQ_QUOTA		= 0x200000005ULL,
-	FID_SEQ_QUOTA_GLB	= 0x200000006ULL,
-	FID_SEQ_ROOT		= 0x200000007ULL,  /* Located on MDT0 */
-	FID_SEQ_NORMAL		= 0x200000400ULL,
-	FID_SEQ_LOV_DEFAULT	= 0xffffffffffffffffULL
-};
-
-#define OBIF_OID_MAX_BITS	   32
-#define OBIF_MAX_OID		(1ULL << OBIF_OID_MAX_BITS)
-#define OBIF_OID_MASK	       ((1ULL << OBIF_OID_MAX_BITS) - 1)
-#define IDIF_OID_MAX_BITS	   48
-#define IDIF_MAX_OID		(1ULL << IDIF_OID_MAX_BITS)
-#define IDIF_OID_MASK	       ((1ULL << IDIF_OID_MAX_BITS) - 1)
-
-/** OID for FID_SEQ_SPECIAL */
-enum special_oid {
-	/* Big Filesystem Lock to serialize rename operations */
-	FID_OID_SPECIAL_BFL     = 1UL,
-};
-
-/** OID for FID_SEQ_DOT_LUSTRE */
-enum dot_lustre_oid {
-	FID_OID_DOT_LUSTRE  = 1UL,
-	FID_OID_DOT_LUSTRE_OBF = 2UL,
-};
-
-/** OID for FID_SEQ_ROOT */
-enum root_oid {
-	FID_OID_ROOT		= 1UL,
-	FID_OID_ECHO_ROOT	= 2UL,
-};
-
-/** @} lu_fid */
-
-/** \defgroup lu_dir lu_dir
- * @{
- */
-
-/**
- * Enumeration of possible directory entry attributes.
- *
- * Attributes follow directory entry header in the order they appear in this
- * enumeration.
- */
-enum lu_dirent_attrs {
-	LUDA_FID		= 0x0001,
-	LUDA_TYPE		= 0x0002,
-	LUDA_64BITHASH		= 0x0004,
-};
-
-/**
- * Layout of readdir pages, as transmitted on wire.
- */
-struct lu_dirent {
-	/** valid if LUDA_FID is set. */
-	struct lu_fid lde_fid;
-	/** a unique entry identifier: a hash or an offset. */
-	__u64	 lde_hash;
-	/** total record length, including all attributes. */
-	__u16	 lde_reclen;
-	/** name length */
-	__u16	 lde_namelen;
-	/** optional variable size attributes following this entry.
-	 *  taken from enum lu_dirent_attrs.
-	 */
-	__u32	 lde_attrs;
-	/** name is followed by the attributes indicated in ->ldp_attrs, in
-	 *  their natural order. After the last attribute, padding bytes are
-	 *  added to make ->lde_reclen a multiple of 8.
-	 */
-	char	  lde_name[0];
-};
-
-/*
- * Definitions of optional directory entry attributes formats.
- *
- * Individual attributes do not have their length encoded in a generic way. It
- * is assumed that consumer of an attribute knows its format. This means that
- * it is impossible to skip over an unknown attribute, except by skipping over all
- * remaining attributes (by using ->lde_reclen), which is not too
- * constraining, because new server versions will append new attributes at
- * the end of an entry.
- */
-
-/**
- * Fid directory attribute: a fid of an object referenced by the entry. This
- * will be almost always requested by the client and supplied by the server.
- *
- * Aligned to 8 bytes.
- */
-/* To have compatibility with 1.8, lets have fid in lu_dirent struct. */
-
-/**
- * File type.
- *
- * Aligned to 2 bytes.
- */
-struct luda_type {
-	__u16 lt_type;
-};
-
-#ifndef IFSHIFT
-#define IFSHIFT                 12
-#endif
-
-#ifndef IFTODT
-#define IFTODT(type)		(((type) & S_IFMT) >> IFSHIFT)
-#endif
-#ifndef DTTOIF
-#define DTTOIF(dirtype)		((dirtype) << IFSHIFT)
-#endif
-
-struct lu_dirpage {
-	__le64	    ldp_hash_start;
-	__le64	    ldp_hash_end;
-	__le32	    ldp_flags;
-	__le32	    ldp_pad0;
-	struct lu_dirent ldp_entries[0];
-};
-
-enum lu_dirpage_flags {
-	/**
-	 * dirpage contains no entry.
-	 */
-	LDF_EMPTY   = 1 << 0,
-	/**
-	 * last entry's lde_hash equals ldp_hash_end.
-	 */
-	LDF_COLLIDE = 1 << 1
-};
-
-static inline struct lu_dirent *lu_dirent_start(struct lu_dirpage *dp)
-{
-	if (__le32_to_cpu(dp->ldp_flags) & LDF_EMPTY)
-		return NULL;
-	else
-		return dp->ldp_entries;
-}
-
-static inline struct lu_dirent *lu_dirent_next(struct lu_dirent *ent)
-{
-	struct lu_dirent *next;
-
-	if (__le16_to_cpu(ent->lde_reclen) != 0)
-		next = ((void *)ent) + __le16_to_cpu(ent->lde_reclen);
-	else
-		next = NULL;
-
-	return next;
-}
-
-static inline size_t lu_dirent_calc_size(size_t namelen, __u16 attr)
-{
-	size_t size;
-
-	if (attr & LUDA_TYPE) {
-		const size_t align = sizeof(struct luda_type) - 1;
-
-		size = (sizeof(struct lu_dirent) + namelen + align) & ~align;
-		size += sizeof(struct luda_type);
-	} else {
-		size = sizeof(struct lu_dirent) + namelen;
-	}
-
-	return (size + 7) & ~7;
-}
-
-#define MDS_DIR_END_OFF 0xfffffffffffffffeULL
-
-/**
- * MDS_READPAGE page size
- *
- * This is the directory page size packed in MDS_READPAGE RPC.
- * It's different than PAGE_SIZE because the client needs to
- * access the struct lu_dirpage header packed at the beginning of
- * the "page" and without this there isn't any way to know find the
- * lu_dirpage header is if client and server PAGE_SIZE differ.
- */
-#define LU_PAGE_SHIFT 12
-#define LU_PAGE_SIZE  (1UL << LU_PAGE_SHIFT)
-#define LU_PAGE_MASK  (~(LU_PAGE_SIZE - 1))
-
-#define LU_PAGE_COUNT (1 << (PAGE_SHIFT - LU_PAGE_SHIFT))
-
-/** @} lu_dir */
-
-struct lustre_handle {
-	__u64 cookie;
-};
-
-#define DEAD_HANDLE_MAGIC 0xdeadbeefcafebabeULL
-
-static inline bool lustre_handle_is_used(const struct lustre_handle *lh)
-{
-	return lh->cookie != 0ull;
-}
-
-static inline bool lustre_handle_equal(const struct lustre_handle *lh1,
-				       const struct lustre_handle *lh2)
-{
-	return lh1->cookie == lh2->cookie;
-}
-
-static inline void lustre_handle_copy(struct lustre_handle *tgt,
-				      const struct lustre_handle *src)
-{
-	tgt->cookie = src->cookie;
-}
-
-/* flags for lm_flags */
-#define MSGHDR_AT_SUPPORT	       0x1
-#define MSGHDR_CKSUM_INCOMPAT18	 0x2
-
-#define lustre_msg lustre_msg_v2
-/* we depend on this structure to be 8-byte aligned */
-/* this type is only endian-adjusted in lustre_unpack_msg() */
-struct lustre_msg_v2 {
-	__u32 lm_bufcount;
-	__u32 lm_secflvr;
-	__u32 lm_magic;
-	__u32 lm_repsize;
-	__u32 lm_cksum;
-	__u32 lm_flags;
-	__u32 lm_padding_2;
-	__u32 lm_padding_3;
-	__u32 lm_buflens[0];
-};
-
-/* without gss, ptlrpc_body is put at the first buffer. */
-#define PTLRPC_NUM_VERSIONS     4
-
-struct ptlrpc_body_v3 {
-	struct lustre_handle pb_handle;
-	__u32 pb_type;
-	__u32 pb_version;
-	__u32 pb_opc;
-	__u32 pb_status;
-	__u64 pb_last_xid; /* highest replied XID without lower unreplied XID */
-	__u16 pb_tag;      /* virtual slot idx for multiple modifying RPCs */
-	__u16 pb_padding0;
-	__u32 pb_padding1;
-	__u64 pb_last_committed;
-	__u64 pb_transno;
-	__u32 pb_flags;
-	__u32 pb_op_flags;
-	__u32 pb_conn_cnt;
-	__u32 pb_timeout;  /* for req, the deadline, for rep, the service est */
-	__u32 pb_service_time; /* for rep, actual service time */
-	__u32 pb_limit;
-	__u64 pb_slv;
-	/* VBR: pre-versions */
-	__u64 pb_pre_versions[PTLRPC_NUM_VERSIONS];
-	__u64 pb_mbits; /**< match bits for bulk request */
-	/* padding for future needs */
-	__u64 pb_padding64_0;
-	__u64 pb_padding64_1;
-	__u64 pb_padding64_2;
-	char  pb_jobid[LUSTRE_JOBID_SIZE];
-};
-
-#define ptlrpc_body     ptlrpc_body_v3
-
-struct ptlrpc_body_v2 {
-	struct lustre_handle pb_handle;
-	__u32 pb_type;
-	__u32 pb_version;
-	__u32 pb_opc;
-	__u32 pb_status;
-	__u64 pb_last_xid; /* highest replied XID without lower unreplied XID */
-	__u16 pb_tag;      /* virtual slot idx for multiple modifying RPCs */
-	__u16 pb_padding0;
-	__u32 pb_padding1;
-	__u64 pb_last_committed;
-	__u64 pb_transno;
-	__u32 pb_flags;
-	__u32 pb_op_flags;
-	__u32 pb_conn_cnt;
-	__u32 pb_timeout;  /* for req, the deadline, for rep, the service est */
-	__u32 pb_service_time; /* for rep, actual service time, also used for
-				* net_latency of req
-				*/
-	__u32 pb_limit;
-	__u64 pb_slv;
-	/* VBR: pre-versions */
-	__u64 pb_pre_versions[PTLRPC_NUM_VERSIONS];
-	__u64 pb_mbits; /**< unused in V2 */
-	/* padding for future needs */
-	__u64 pb_padding64_0;
-	__u64 pb_padding64_1;
-	__u64 pb_padding64_2;
-};
-
-/* message body offset for lustre_msg_v2 */
-/* ptlrpc body offset in all request/reply messages */
-#define MSG_PTLRPC_BODY_OFF	     0
-
-/* normal request/reply message record offset */
-#define REQ_REC_OFF		     1
-#define REPLY_REC_OFF		   1
-
-/* ldlm request message body offset */
-#define DLM_LOCKREQ_OFF		 1 /* lockreq offset */
-#define DLM_REQ_REC_OFF		 2 /* normal dlm request record offset */
-
-/* ldlm intent lock message body offset */
-#define DLM_INTENT_IT_OFF	       2 /* intent lock it offset */
-#define DLM_INTENT_REC_OFF	      3 /* intent lock record offset */
-
-/* ldlm reply message body offset */
-#define DLM_LOCKREPLY_OFF	       1 /* lockrep offset */
-#define DLM_REPLY_REC_OFF	       2 /* reply record offset */
-
-/** only use in req->rq_{req,rep}_swab_mask */
-#define MSG_PTLRPC_HEADER_OFF	   31
-
-/* Flags that are operation-specific go in the top 16 bits. */
-#define MSG_OP_FLAG_MASK   0xffff0000
-#define MSG_OP_FLAG_SHIFT  16
-
-/* Flags that apply to all requests are in the bottom 16 bits */
-#define MSG_GEN_FLAG_MASK     0x0000ffff
-#define MSG_LAST_REPLAY	   0x0001
-#define MSG_RESENT		0x0002
-#define MSG_REPLAY		0x0004
-/* #define MSG_AT_SUPPORT	 0x0008
- * This was used in early prototypes of adaptive timeouts, and while there
- * shouldn't be any users of that code there also isn't a need for using this
- * bits. Defer usage until at least 1.10 to avoid potential conflict.
- */
-#define MSG_DELAY_REPLAY	  0x0010
-#define MSG_VERSION_REPLAY	0x0020
-#define MSG_REQ_REPLAY_DONE       0x0040
-#define MSG_LOCK_REPLAY_DONE      0x0080
-
-/*
- * Flags for all connect opcodes (MDS_CONNECT, OST_CONNECT)
- */
-
-#define MSG_CONNECT_RECOVERING  0x00000001
-#define MSG_CONNECT_RECONNECT   0x00000002
-#define MSG_CONNECT_REPLAYABLE  0x00000004
-/*#define MSG_CONNECT_PEER	0x8 */
-#define MSG_CONNECT_LIBCLIENT   0x00000010
-#define MSG_CONNECT_INITIAL     0x00000020
-#define MSG_CONNECT_ASYNC       0x00000040
-#define MSG_CONNECT_NEXT_VER    0x00000080 /* use next version of lustre_msg */
-#define MSG_CONNECT_TRANSNO     0x00000100 /* report transno */
-
-/* Connect flags */
-#define OBD_CONNECT_RDONLY		  0x1ULL /*client has read-only access*/
-#define OBD_CONNECT_INDEX		  0x2ULL /*connect specific LOV idx */
-#define OBD_CONNECT_MDS			  0x4ULL /*connect from MDT to OST */
-#define OBD_CONNECT_GRANT		  0x8ULL /*OSC gets grant at connect */
-#define OBD_CONNECT_SRVLOCK		 0x10ULL /*server takes locks for cli */
-#define OBD_CONNECT_VERSION		 0x20ULL /*Lustre versions in ocd */
-#define OBD_CONNECT_REQPORTAL		 0x40ULL /*Separate non-IO req portal */
-#define OBD_CONNECT_ACL			 0x80ULL /*access control lists */
-#define OBD_CONNECT_XATTR		0x100ULL /*client use extended attr */
-#define OBD_CONNECT_LARGE_ACL		0x200ULL /* more than 32 ACL entries */
-#define OBD_CONNECT_TRUNCLOCK		0x400ULL /*locks on server for punch */
-#define OBD_CONNECT_TRANSNO		0x800ULL /*replay sends init transno */
-#define OBD_CONNECT_IBITS	       0x1000ULL /*support for inodebits locks*/
-#define OBD_CONNECT_JOIN	       0x2000ULL /*files can be concatenated.
-						  *We do not support JOIN FILE
-						  *anymore, reserve this flags
-						  *just for preventing such bit
-						  *to be reused.
-						  */
-#define OBD_CONNECT_ATTRFID	       0x4000ULL /*Server can GetAttr By Fid*/
-#define OBD_CONNECT_NODEVOH	       0x8000ULL /*No open hndl on specl nodes*/
-#define OBD_CONNECT_RMT_CLIENT	      0x10000ULL /* Remote client, never used
-						  * in production. Removed in
-						  * 2.9. Keep this flag to
-						  * avoid reuse.
-						  */
-#define OBD_CONNECT_RMT_CLIENT_FORCE  0x20000ULL /* Remote client by force,
-						  * never used in production.
-						  * Removed in 2.9. Keep this
-						  * flag to avoid reuse
-						  */
-#define OBD_CONNECT_BRW_SIZE	      0x40000ULL /*Max bytes per rpc */
-#define OBD_CONNECT_QUOTA64	      0x80000ULL /*Not used since 2.4 */
-#define OBD_CONNECT_MDS_CAPA	     0x100000ULL /*MDS capability */
-#define OBD_CONNECT_OSS_CAPA	     0x200000ULL /*OSS capability */
-#define OBD_CONNECT_CANCELSET	     0x400000ULL /*Early batched cancels. */
-#define OBD_CONNECT_SOM		     0x800000ULL /*Size on MDS */
-#define OBD_CONNECT_AT		    0x1000000ULL /*client uses AT */
-#define OBD_CONNECT_LRU_RESIZE      0x2000000ULL /*LRU resize feature. */
-#define OBD_CONNECT_MDS_MDS	    0x4000000ULL /*MDS-MDS connection */
-#define OBD_CONNECT_REAL	    0x8000000ULL /* obsolete since 2.8 */
-#define OBD_CONNECT_CHANGE_QS      0x10000000ULL /*Not used since 2.4 */
-#define OBD_CONNECT_CKSUM	   0x20000000ULL /*support several cksum algos*/
-#define OBD_CONNECT_FID		   0x40000000ULL /*FID is supported by server */
-#define OBD_CONNECT_VBR		   0x80000000ULL /*version based recovery */
-#define OBD_CONNECT_LOV_V3	  0x100000000ULL /*client supports LOV v3 EA */
-#define OBD_CONNECT_GRANT_SHRINK  0x200000000ULL /* support grant shrink */
-#define OBD_CONNECT_SKIP_ORPHAN   0x400000000ULL /* don't reuse orphan objids */
-#define OBD_CONNECT_MAX_EASIZE    0x800000000ULL /* preserved for large EA */
-#define OBD_CONNECT_FULL20       0x1000000000ULL /* it is 2.0 client */
-#define OBD_CONNECT_LAYOUTLOCK   0x2000000000ULL /* client uses layout lock */
-#define OBD_CONNECT_64BITHASH    0x4000000000ULL /* client supports 64-bits
-						  * directory hash
-						  */
-#define OBD_CONNECT_MAXBYTES     0x8000000000ULL /* max stripe size */
-#define OBD_CONNECT_IMP_RECOV   0x10000000000ULL /* imp recovery support */
-#define OBD_CONNECT_JOBSTATS    0x20000000000ULL /* jobid in ptlrpc_body */
-#define OBD_CONNECT_UMASK       0x40000000000ULL /* create uses client umask */
-#define OBD_CONNECT_EINPROGRESS 0x80000000000ULL /* client handles -EINPROGRESS
-						  * RPC error properly
-						  */
-#define OBD_CONNECT_GRANT_PARAM 0x100000000000ULL/* extra grant params used for
-						  * finer space reservation
-						  */
-#define OBD_CONNECT_FLOCK_OWNER 0x200000000000ULL /* for the fixed 1.8
-						   * policy and 2.x server
-						   */
-#define OBD_CONNECT_LVB_TYPE	0x400000000000ULL /* variable type of LVB */
-#define OBD_CONNECT_NANOSEC_TIME 0x800000000000ULL /* nanosecond timestamps */
-#define OBD_CONNECT_LIGHTWEIGHT 0x1000000000000ULL/* lightweight connection */
-#define OBD_CONNECT_SHORTIO     0x2000000000000ULL/* short io */
-#define OBD_CONNECT_PINGLESS	0x4000000000000ULL/* pings not required */
-#define OBD_CONNECT_FLOCK_DEAD	0x8000000000000ULL/* flock deadlock detection */
-#define OBD_CONNECT_DISP_STRIPE 0x10000000000000ULL/*create stripe disposition*/
-#define OBD_CONNECT_OPEN_BY_FID	0x20000000000000ULL	/* open by fid won't pack
-							 * name in request
-							 */
-#define OBD_CONNECT_LFSCK	0x40000000000000ULL/* support online LFSCK */
-#define OBD_CONNECT_UNLINK_CLOSE 0x100000000000000ULL/* close file in unlink */
-#define OBD_CONNECT_MULTIMODRPCS 0x200000000000000ULL /* support multiple modify
-						       *  RPCs in parallel
-						       */
-#define OBD_CONNECT_DIR_STRIPE	 0x400000000000000ULL/* striped DNE dir */
-#define OBD_CONNECT_SUBTREE	 0x800000000000000ULL /* fileset mount */
-#define OBD_CONNECT_LOCK_AHEAD	 0x1000000000000000ULL /* lock ahead */
-/** bulk matchbits is sent within ptlrpc_body */
-#define OBD_CONNECT_BULK_MBITS	 0x2000000000000000ULL
-#define OBD_CONNECT_OBDOPACK	 0x4000000000000000ULL /* compact OUT obdo */
-#define OBD_CONNECT_FLAGS2	 0x8000000000000000ULL /* second flags word */
-
-/* XXX README XXX:
- * Please DO NOT add flag values here before first ensuring that this same
- * flag value is not in use on some other branch.  Please clear any such
- * changes with senior engineers before starting to use a new flag.  Then,
- * submit a small patch against EVERY branch that ONLY adds the new flag,
- * updates obd_connect_names[] for lprocfs_rd_connect_flags(), adds the
- * flag to check_obd_connect_data(), and updates wiretests accordingly, so it
- * can be approved and landed easily to reserve the flag for future use.
- */
-
-/* The MNE_SWAB flag is overloading the MDS_MDS bit only for the MGS
- * connection.  It is a temporary bug fix for Imperative Recovery interop
- * between 2.2 and 2.3 x86/ppc nodes, and can be removed when interop for
- * 2.2 clients/servers is no longer needed.  LU-1252/LU-1644.
- */
-#define OBD_CONNECT_MNE_SWAB		 OBD_CONNECT_MDS_MDS
-
-#define OCD_HAS_FLAG(ocd, flg)  \
-	(!!((ocd)->ocd_connect_flags & OBD_CONNECT_##flg))
-
-/* Features required for this version of the client to work with server */
-#define CLIENT_CONNECT_MDT_REQD (OBD_CONNECT_IBITS | OBD_CONNECT_FID | \
-				 OBD_CONNECT_FULL20)
-
-/* This structure is used for both request and reply.
- *
- * If we eventually have separate connect data for different types, which we
- * almost certainly will, then perhaps we stick a union in here.
- */
-struct obd_connect_data {
-	__u64 ocd_connect_flags; /* OBD_CONNECT_* per above */
-	__u32 ocd_version;	 /* lustre release version number */
-	__u32 ocd_grant;	 /* initial cache grant amount (bytes) */
-	__u32 ocd_index;	 /* LOV index to connect to */
-	__u32 ocd_brw_size;	 /* Maximum BRW size in bytes */
-	__u64 ocd_ibits_known;   /* inode bits this client understands */
-	__u8  ocd_blocksize;     /* log2 of the backend filesystem blocksize */
-	__u8  ocd_inodespace;    /* log2 of the per-inode space consumption */
-	__u16 ocd_grant_extent;  /* per-extent grant overhead, in 1K blocks */
-	__u32 ocd_unused;	 /* also fix lustre_swab_connect */
-	__u64 ocd_transno;       /* first transno from client to be replayed */
-	__u32 ocd_group;	 /* MDS group on OST */
-	__u32 ocd_cksum_types;   /* supported checksum algorithms */
-	__u32 ocd_max_easize;    /* How big LOV EA can be on MDS */
-	__u32 ocd_instance;      /* instance # of this target */
-	__u64 ocd_maxbytes;      /* Maximum stripe size in bytes */
-	/* Fields after ocd_maxbytes are only accessible by the receiver
-	 * if the corresponding flag in ocd_connect_flags is set. Accessing
-	 * any field after ocd_maxbytes on the receiver without a valid flag
-	 * may result in out-of-bound memory access and kernel oops.
-	 */
-	__u16 ocd_maxmodrpcs;	/* Maximum modify RPCs in parallel */
-	__u16 padding0;		/* added 2.1.0. also fix lustre_swab_connect */
-	__u32 padding1;		/* added 2.1.0. also fix lustre_swab_connect */
-	__u64 ocd_connect_flags2;
-	__u64 padding3;	  /* added 2.1.0. also fix lustre_swab_connect */
-	__u64 padding4;	  /* added 2.1.0. also fix lustre_swab_connect */
-	__u64 padding5;	  /* added 2.1.0. also fix lustre_swab_connect */
-	__u64 padding6;	  /* added 2.1.0. also fix lustre_swab_connect */
-	__u64 padding7;	  /* added 2.1.0. also fix lustre_swab_connect */
-	__u64 padding8;	  /* added 2.1.0. also fix lustre_swab_connect */
-	__u64 padding9;	  /* added 2.1.0. also fix lustre_swab_connect */
-	__u64 paddingA;	  /* added 2.1.0. also fix lustre_swab_connect */
-	__u64 paddingB;	  /* added 2.1.0. also fix lustre_swab_connect */
-	__u64 paddingC;	  /* added 2.1.0. also fix lustre_swab_connect */
-	__u64 paddingD;	  /* added 2.1.0. also fix lustre_swab_connect */
-	__u64 paddingE;	  /* added 2.1.0. also fix lustre_swab_connect */
-	__u64 paddingF;	  /* added 2.1.0. also fix lustre_swab_connect */
-};
-
-/* XXX README XXX:
- * Please DO NOT use any fields here before first ensuring that this same
- * field is not in use on some other branch.  Please clear any such changes
- * with senior engineers before starting to use a new field.  Then, submit
- * a small patch against EVERY branch that ONLY adds the new field along with
- * the matching OBD_CONNECT flag, so that can be approved and landed easily to
- * reserve the flag for future use.
- */
-
-/*
- * Supported checksum algorithms. Up to 32 checksum types are supported.
- * (32-bit mask stored in obd_connect_data::ocd_cksum_types)
- * Please update DECLARE_CKSUM_NAME/OBD_CKSUM_ALL in obd.h when adding a new
- * algorithm and also the OBD_FL_CKSUM* flags.
- */
-enum cksum_type {
-	OBD_CKSUM_CRC32  = 0x00000001,
-	OBD_CKSUM_ADLER  = 0x00000002,
-	OBD_CKSUM_CRC32C = 0x00000004,
-};
-
-/*
- *   OST requests: OBDO & OBD request records
- */
-
-/* opcodes */
-enum ost_cmd {
-	OST_REPLY      =  0,       /* reply ? */
-	OST_GETATTR    =  1,
-	OST_SETATTR    =  2,
-	OST_READ       =  3,
-	OST_WRITE      =  4,
-	OST_CREATE     =  5,
-	OST_DESTROY    =  6,
-	OST_GET_INFO   =  7,
-	OST_CONNECT    =  8,
-	OST_DISCONNECT =  9,
-	OST_PUNCH      = 10,
-	OST_OPEN       = 11,
-	OST_CLOSE      = 12,
-	OST_STATFS     = 13,
-	OST_SYNC       = 16,
-	OST_SET_INFO   = 17,
-	OST_QUOTACHECK = 18, /* not used since 2.4 */
-	OST_QUOTACTL   = 19,
-	OST_QUOTA_ADJUST_QUNIT = 20, /* not used since 2.4 */
-	OST_LAST_OPC
-};
-#define OST_FIRST_OPC  OST_REPLY
-
-enum obdo_flags {
-	OBD_FL_INLINEDATA   = 0x00000001,
-	OBD_FL_OBDMDEXISTS  = 0x00000002,
-	OBD_FL_DELORPHAN    = 0x00000004, /* if set in o_flags delete orphans */
-	OBD_FL_NORPC	    = 0x00000008, /* set in o_flags do in OSC not OST */
-	OBD_FL_IDONLY       = 0x00000010, /* set in o_flags only adjust obj id*/
-	OBD_FL_RECREATE_OBJS = 0x00000020, /* recreate missing obj */
-	OBD_FL_DEBUG_CHECK  = 0x00000040, /* echo client/server debug check */
-	OBD_FL_NO_USRQUOTA  = 0x00000100, /* the object's owner is over quota */
-	OBD_FL_NO_GRPQUOTA  = 0x00000200, /* the object's group is over quota */
-	OBD_FL_CREATE_CROW  = 0x00000400, /* object should be create on write */
-	OBD_FL_SRVLOCK      = 0x00000800, /* delegate DLM locking to server */
-	OBD_FL_CKSUM_CRC32  = 0x00001000, /* CRC32 checksum type */
-	OBD_FL_CKSUM_ADLER  = 0x00002000, /* ADLER checksum type */
-	OBD_FL_CKSUM_CRC32C = 0x00004000, /* CRC32C checksum type */
-	OBD_FL_CKSUM_RSVD2  = 0x00008000, /* for future cksum types */
-	OBD_FL_CKSUM_RSVD3  = 0x00010000, /* for future cksum types */
-	OBD_FL_SHRINK_GRANT = 0x00020000, /* object shrink the grant */
-	OBD_FL_MMAP	    = 0x00040000, /* object is mmapped on the client.
-					   * XXX: obsoleted - reserved for old
-					   * clients prior than 2.2
-					   */
-	OBD_FL_RECOV_RESEND = 0x00080000, /* recoverable resent */
-	OBD_FL_NOSPC_BLK    = 0x00100000, /* no more block space on OST */
-	OBD_FL_FLUSH	    = 0x00200000, /* flush pages on the OST */
-	OBD_FL_SHORT_IO	    = 0x00400000, /* short io request */
-
-	/* Note that while these checksum values are currently separate bits,
-	 * in 2.x we can actually allow all values from 1-31 if we wanted.
-	 */
-	OBD_FL_CKSUM_ALL    = OBD_FL_CKSUM_CRC32 | OBD_FL_CKSUM_ADLER |
-			      OBD_FL_CKSUM_CRC32C,
-
-	/* mask for local-only flag, which won't be sent over network */
-	OBD_FL_LOCAL_MASK   = 0xF0000000,
-};
-
-/*
- * All LOV EA magics should have the same postfix, if some new version
- * Lustre instroduces new LOV EA magic, then when down-grade to an old
- * Lustre, even though the old version system does not recognizes such
- * new magic, it still can distinguish the corrupted cases by checking
- * the magic's postfix.
- */
-#define LOV_MAGIC_MAGIC 0x0BD0
-#define LOV_MAGIC_MASK  0xFFFF
-
-#define LOV_MAGIC_V1		(0x0BD10000 | LOV_MAGIC_MAGIC)
-#define LOV_MAGIC_JOIN_V1	(0x0BD20000 | LOV_MAGIC_MAGIC)
-#define LOV_MAGIC_V3		(0x0BD30000 | LOV_MAGIC_MAGIC)
-#define LOV_MAGIC_MIGRATE	(0x0BD40000 | LOV_MAGIC_MAGIC)
-/* reserved for specifying OSTs */
-#define LOV_MAGIC_SPECIFIC	(0x0BD50000 | LOV_MAGIC_MAGIC)
-#define LOV_MAGIC		LOV_MAGIC_V1
-
-/*
- * magic for fully defined striping
- * the idea is that we should have different magics for striping "hints"
- * (struct lov_user_md_v[13]) and defined ready-to-use striping (struct
- * lov_mds_md_v[13]). at the moment the magics are used in wire protocol,
- * we can't just change it w/o long way preparation, but we still need a
- * mechanism to allow LOD to differentiate hint versus ready striping.
- * so, at the moment we do a trick: MDT knows what to expect from request
- * depending on the case (replay uses ready striping, non-replay req uses
- * hints), so MDT replaces magic with appropriate one and now LOD can
- * easily understand what's inside -bzzz
- */
-#define LOV_MAGIC_V1_DEF  0x0CD10BD0
-#define LOV_MAGIC_V3_DEF  0x0CD30BD0
-
-#define lov_pattern(pattern)		(pattern & ~LOV_PATTERN_F_MASK)
-#define lov_pattern_flags(pattern)	(pattern & LOV_PATTERN_F_MASK)
-
-#define lov_ost_data lov_ost_data_v1
-struct lov_ost_data_v1 {	  /* per-stripe data structure (little-endian)*/
-	struct ost_id l_ost_oi;	  /* OST object ID */
-	__u32 l_ost_gen;	  /* generation of this l_ost_idx */
-	__u32 l_ost_idx;	  /* OST index in LOV (lov_tgt_desc->tgts) */
-};
-
-#define lov_mds_md lov_mds_md_v1
-struct lov_mds_md_v1 {	    /* LOV EA mds/wire data (little-endian) */
-	__u32 lmm_magic;	  /* magic number = LOV_MAGIC_V1 */
-	__u32 lmm_pattern;	/* LOV_PATTERN_RAID0, LOV_PATTERN_RAID1 */
-	struct ost_id	lmm_oi;	  /* LOV object ID */
-	__u32 lmm_stripe_size;    /* size of stripe in bytes */
-	/* lmm_stripe_count used to be __u32 */
-	__u16 lmm_stripe_count;   /* num stripes in use for this object */
-	__u16 lmm_layout_gen;     /* layout generation number */
-	struct lov_ost_data_v1 lmm_objects[0]; /* per-stripe data */
-};
-
-#define MAX_MD_SIZE							\
-	(sizeof(struct lov_mds_md) + 4 * sizeof(struct lov_ost_data))
-#define MIN_MD_SIZE							\
-	(sizeof(struct lov_mds_md) + 1 * sizeof(struct lov_ost_data))
-
-#define XATTR_NAME_ACL_ACCESS   "system.posix_acl_access"
-#define XATTR_NAME_ACL_DEFAULT  "system.posix_acl_default"
-#define XATTR_USER_PREFIX       "user."
-#define XATTR_TRUSTED_PREFIX    "trusted."
-#define XATTR_SECURITY_PREFIX   "security."
-#define XATTR_LUSTRE_PREFIX     "lustre."
-
-#define XATTR_NAME_LOV	  "trusted.lov"
-#define XATTR_NAME_LMA	  "trusted.lma"
-#define XATTR_NAME_LMV	  "trusted.lmv"
-#define XATTR_NAME_DEFAULT_LMV	"trusted.dmv"
-#define XATTR_NAME_LINK	 "trusted.link"
-#define XATTR_NAME_FID	  "trusted.fid"
-#define XATTR_NAME_VERSION      "trusted.version"
-#define XATTR_NAME_SOM		"trusted.som"
-#define XATTR_NAME_HSM		"trusted.hsm"
-#define XATTR_NAME_LFSCK_NAMESPACE "trusted.lfsck_namespace"
-
-struct lov_mds_md_v3 {	    /* LOV EA mds/wire data (little-endian) */
-	__u32 lmm_magic;	  /* magic number = LOV_MAGIC_V3 */
-	__u32 lmm_pattern;	/* LOV_PATTERN_RAID0, LOV_PATTERN_RAID1 */
-	struct ost_id	lmm_oi;	  /* LOV object ID */
-	__u32 lmm_stripe_size;    /* size of stripe in bytes */
-	/* lmm_stripe_count used to be __u32 */
-	__u16 lmm_stripe_count;   /* num stripes in use for this object */
-	__u16 lmm_layout_gen;     /* layout generation number */
-	char  lmm_pool_name[LOV_MAXPOOLNAME + 1]; /* must be 32bit aligned */
-	struct lov_ost_data_v1 lmm_objects[0]; /* per-stripe data */
-};
-
-static inline __u32 lov_mds_md_size(__u16 stripes, __u32 lmm_magic)
-{
-	if (lmm_magic == LOV_MAGIC_V3)
-		return sizeof(struct lov_mds_md_v3) +
-				stripes * sizeof(struct lov_ost_data_v1);
-	else
-		return sizeof(struct lov_mds_md_v1) +
-				stripes * sizeof(struct lov_ost_data_v1);
-}
-
-static inline __u32
-lov_mds_md_max_stripe_count(size_t buf_size, __u32 lmm_magic)
-{
-	switch (lmm_magic) {
-	case LOV_MAGIC_V1: {
-		struct lov_mds_md_v1 lmm;
-
-		if (buf_size < sizeof(lmm))
-			return 0;
-
-		return (buf_size - sizeof(lmm)) / sizeof(lmm.lmm_objects[0]);
-	}
-	case LOV_MAGIC_V3: {
-		struct lov_mds_md_v3 lmm;
-
-		if (buf_size < sizeof(lmm))
-			return 0;
-
-		return (buf_size - sizeof(lmm)) / sizeof(lmm.lmm_objects[0]);
-	}
-	default:
-		return 0;
-	}
-}
-
-#define OBD_MD_FLID	   (0x00000001ULL) /* object ID */
-#define OBD_MD_FLATIME     (0x00000002ULL) /* access time */
-#define OBD_MD_FLMTIME     (0x00000004ULL) /* data modification time */
-#define OBD_MD_FLCTIME     (0x00000008ULL) /* change time */
-#define OBD_MD_FLSIZE      (0x00000010ULL) /* size */
-#define OBD_MD_FLBLOCKS    (0x00000020ULL) /* allocated blocks count */
-#define OBD_MD_FLBLKSZ     (0x00000040ULL) /* block size */
-#define OBD_MD_FLMODE      (0x00000080ULL) /* access bits (mode & ~S_IFMT) */
-#define OBD_MD_FLTYPE      (0x00000100ULL) /* object type (mode & S_IFMT) */
-#define OBD_MD_FLUID       (0x00000200ULL) /* user ID */
-#define OBD_MD_FLGID       (0x00000400ULL) /* group ID */
-#define OBD_MD_FLFLAGS     (0x00000800ULL) /* flags word */
-#define OBD_MD_FLNLINK     (0x00002000ULL) /* link count */
-#define OBD_MD_FLGENER     (0x00004000ULL) /* generation number */
-/*#define OBD_MD_FLINLINE    (0x00008000ULL)  inline data. used until 1.6.5 */
-#define OBD_MD_FLRDEV      (0x00010000ULL) /* device number */
-#define OBD_MD_FLEASIZE    (0x00020000ULL) /* extended attribute data */
-#define OBD_MD_LINKNAME    (0x00040000ULL) /* symbolic link target */
-#define OBD_MD_FLHANDLE    (0x00080000ULL) /* file/lock handle */
-#define OBD_MD_FLCKSUM     (0x00100000ULL) /* bulk data checksum */
-#define OBD_MD_FLQOS       (0x00200000ULL) /* quality of service stats */
-/*#define OBD_MD_FLOSCOPQ    (0x00400000ULL) osc opaque data, never used */
-/*	OBD_MD_FLCOOKIE    (0x00800000ULL) obsolete in 2.8 */
-#define OBD_MD_FLGROUP     (0x01000000ULL) /* group */
-#define OBD_MD_FLFID       (0x02000000ULL) /* ->ost write inline fid */
-#define OBD_MD_FLEPOCH     (0x04000000ULL) /* ->ost write with ioepoch */
-					   /* ->mds if epoch opens or closes
-					    */
-#define OBD_MD_FLGRANT     (0x08000000ULL) /* ost preallocation space grant */
-#define OBD_MD_FLDIREA     (0x10000000ULL) /* dir's extended attribute data */
-#define OBD_MD_FLUSRQUOTA  (0x20000000ULL) /* over quota flags sent from ost */
-#define OBD_MD_FLGRPQUOTA  (0x40000000ULL) /* over quota flags sent from ost */
-#define OBD_MD_FLMODEASIZE (0x80000000ULL) /* EA size will be changed */
-
-#define OBD_MD_MDS	   (0x0000000100000000ULL) /* where an inode lives on */
-#define OBD_MD_REINT       (0x0000000200000000ULL) /* reintegrate oa */
-#define OBD_MD_MEA	   (0x0000000400000000ULL) /* CMD split EA  */
-#define OBD_MD_TSTATE      (0x0000000800000000ULL) /* transient state field */
-
-#define OBD_MD_FLXATTR       (0x0000001000000000ULL) /* xattr */
-#define OBD_MD_FLXATTRLS     (0x0000002000000000ULL) /* xattr list */
-#define OBD_MD_FLXATTRRM     (0x0000004000000000ULL) /* xattr remove */
-#define OBD_MD_FLACL	     (0x0000008000000000ULL) /* ACL */
-/*	OBD_MD_FLRMTPERM     (0x0000010000000000ULL) remote perm, obsolete */
-#define OBD_MD_FLMDSCAPA     (0x0000020000000000ULL) /* MDS capability */
-#define OBD_MD_FLOSSCAPA     (0x0000040000000000ULL) /* OSS capability */
-#define OBD_MD_FLCKSPLIT     (0x0000080000000000ULL) /* Check split on server */
-#define OBD_MD_FLCROSSREF    (0x0000100000000000ULL) /* Cross-ref case */
-#define OBD_MD_FLGETATTRLOCK (0x0000200000000000ULL) /* Get IOEpoch attributes
-						      * under lock; for xattr
-						      * requests means the
-						      * client holds the lock
-						      */
-#define OBD_MD_FLOBJCOUNT    (0x0000400000000000ULL) /* for multiple destroy */
-
-/*	OBD_MD_FLRMTLSETFACL (0x0001000000000000ULL) lfs lsetfacl, obsolete */
-/*	OBD_MD_FLRMTLGETFACL (0x0002000000000000ULL) lfs lgetfacl, obsolete */
-/*	OBD_MD_FLRMTRSETFACL (0x0004000000000000ULL) lfs rsetfacl, obsolete */
-/*	OBD_MD_FLRMTRGETFACL (0x0008000000000000ULL) lfs rgetfacl, obsolete */
-
-#define OBD_MD_FLDATAVERSION (0x0010000000000000ULL) /* iversion sum */
-#define OBD_MD_CLOSE_INTENT_EXECED (0x0020000000000000ULL) /* close intent
-							    * executed
-							    */
-
-#define OBD_MD_DEFAULT_MEA   (0x0040000000000000ULL) /* default MEA */
-
-#define OBD_MD_FLGETATTR (OBD_MD_FLID    | OBD_MD_FLATIME | OBD_MD_FLMTIME | \
-			  OBD_MD_FLCTIME | OBD_MD_FLSIZE  | OBD_MD_FLBLKSZ | \
-			  OBD_MD_FLMODE  | OBD_MD_FLTYPE  | OBD_MD_FLUID   | \
-			  OBD_MD_FLGID   | OBD_MD_FLFLAGS | OBD_MD_FLNLINK | \
-			  OBD_MD_FLGENER | OBD_MD_FLRDEV  | OBD_MD_FLGROUP)
-
-#define OBD_MD_FLXATTRALL (OBD_MD_FLXATTR | OBD_MD_FLXATTRLS)
-
-/* don't forget obdo_fid which is way down at the bottom so it can
- * come after the definition of llog_cookie
- */
-
-enum hss_valid {
-	HSS_SETMASK	= 0x01,
-	HSS_CLEARMASK	= 0x02,
-	HSS_ARCHIVE_ID	= 0x04,
-};
-
-struct hsm_state_set {
-	__u32	hss_valid;
-	__u32	hss_archive_id;
-	__u64	hss_setmask;
-	__u64	hss_clearmask;
-};
-
-/* ost_body.data values for OST_BRW */
-
-#define OBD_BRW_READ		0x01
-#define OBD_BRW_WRITE		0x02
-#define OBD_BRW_RWMASK		(OBD_BRW_READ | OBD_BRW_WRITE)
-#define OBD_BRW_SYNC		0x08 /* this page is a part of synchronous
-				      * transfer and is not accounted in
-				      * the grant.
-				      */
-#define OBD_BRW_CHECK		0x10
-#define OBD_BRW_FROM_GRANT      0x20 /* the osc manages this under llite */
-#define OBD_BRW_GRANTED		0x40 /* the ost manages this */
-#define OBD_BRW_NOCACHE		0x80 /* this page is a part of non-cached IO */
-#define OBD_BRW_NOQUOTA	       0x100
-#define OBD_BRW_SRVLOCK	       0x200 /* Client holds no lock over this page */
-#define OBD_BRW_ASYNC	       0x400 /* Server may delay commit to disk */
-#define OBD_BRW_MEMALLOC       0x800 /* Client runs in the "kswapd" context */
-#define OBD_BRW_OVER_USRQUOTA 0x1000 /* Running out of user quota */
-#define OBD_BRW_OVER_GRPQUOTA 0x2000 /* Running out of group quota */
-#define OBD_BRW_SOFT_SYNC     0x4000 /* This flag notifies the server
-				      * that the client is running low on
-				      * space for unstable pages; asking
-				      * it to sync quickly
-				      */
-
-#define OBD_OBJECT_EOF	LUSTRE_EOF
-
-#define OST_MIN_PRECREATE 32
-#define OST_MAX_PRECREATE 20000
-
-struct obd_ioobj {
-	struct ost_id	ioo_oid;	/* object ID, if multi-obj BRW */
-	__u32		ioo_max_brw;	/* low 16 bits were o_mode before 2.4,
-					 * now (PTLRPC_BULK_OPS_COUNT - 1) in
-					 * high 16 bits in 2.4 and later
-					 */
-	__u32		ioo_bufcnt;	/* number of niobufs for this object */
-};
-
-/*
- * NOTE: IOOBJ_MAX_BRW_BITS defines the _offset_ of the max_brw field in
- * ioo_max_brw, NOT the maximum number of bits in PTLRPC_BULK_OPS_BITS.
- * That said, ioo_max_brw is a 32-bit field so the limit is also 16 bits.
- */
-#define IOOBJ_MAX_BRW_BITS	16
-#define ioobj_max_brw_get(ioo)	(((ioo)->ioo_max_brw >> IOOBJ_MAX_BRW_BITS) + 1)
-#define ioobj_max_brw_set(ioo, num)					\
-do { (ioo)->ioo_max_brw = ((num) - 1) << IOOBJ_MAX_BRW_BITS; } while (0)
-
-/* multiple of 8 bytes => can array */
-struct niobuf_remote {
-	__u64	rnb_offset;
-	__u32	rnb_len;
-	__u32	rnb_flags;
-};
-
-/* lock value block communicated between the filter and llite */
-
-/* OST_LVB_ERR_INIT is needed because the return code in rc is
- * negative, i.e. because ((MASK + rc) & MASK) != MASK.
- */
-#define OST_LVB_ERR_INIT 0xffbadbad80000000ULL
-#define OST_LVB_ERR_MASK 0xffbadbad00000000ULL
-#define OST_LVB_IS_ERR(blocks)					  \
-	((blocks & OST_LVB_ERR_MASK) == OST_LVB_ERR_MASK)
-#define OST_LVB_SET_ERR(blocks, rc)				     \
-	do { blocks = OST_LVB_ERR_INIT + rc; } while (0)
-#define OST_LVB_GET_ERR(blocks)    (int)(blocks - OST_LVB_ERR_INIT)
-
-struct ost_lvb_v1 {
-	__u64		lvb_size;
-	__s64		lvb_mtime;
-	__s64		lvb_atime;
-	__s64		lvb_ctime;
-	__u64		lvb_blocks;
-};
-
-struct ost_lvb {
-	__u64		lvb_size;
-	__s64		lvb_mtime;
-	__s64		lvb_atime;
-	__s64		lvb_ctime;
-	__u64		lvb_blocks;
-	__u32		lvb_mtime_ns;
-	__u32		lvb_atime_ns;
-	__u32		lvb_ctime_ns;
-	__u32		lvb_padding;
-};
-
-/*
- *   lquota data structures
- */
-
-/* The lquota_id structure is a union of all the possible identifier types that
- * can be used with quota, this includes:
- * - 64-bit user ID
- * - 64-bit group ID
- * - a FID which can be used for per-directory quota in the future
- */
-union lquota_id {
-	struct lu_fid	qid_fid; /* FID for per-directory quota */
-	__u64		qid_uid; /* user identifier */
-	__u64		qid_gid; /* group identifier */
-};
-
-/* quotactl management */
-struct obd_quotactl {
-	__u32			qc_cmd;
-	__u32			qc_type; /* see Q_* flag below */
-	__u32			qc_id;
-	__u32			qc_stat;
-	struct obd_dqinfo	qc_dqinfo;
-	struct obd_dqblk	qc_dqblk;
-};
-
-#define Q_COPY(out, in, member) (out)->member = (in)->member
-
-#define QCTL_COPY(out, in)		\
-do {					\
-	Q_COPY(out, in, qc_cmd);	\
-	Q_COPY(out, in, qc_type);	\
-	Q_COPY(out, in, qc_id);		\
-	Q_COPY(out, in, qc_stat);	\
-	Q_COPY(out, in, qc_dqinfo);	\
-	Q_COPY(out, in, qc_dqblk);	\
-} while (0)
-
-/* Data structures associated with the quota locks */
-
-/* Glimpse descriptor used for the index & per-ID quota locks */
-struct ldlm_gl_lquota_desc {
-	union lquota_id	gl_id;    /* quota ID subject to the glimpse */
-	__u64		gl_flags; /* see LQUOTA_FL* below */
-	__u64		gl_ver;   /* new index version */
-	__u64		gl_hardlimit; /* new hardlimit or qunit value */
-	__u64		gl_softlimit; /* new softlimit */
-	__u64		gl_time;
-	__u64		gl_pad2;
-};
-
-/* quota glimpse flags */
-#define LQUOTA_FL_EDQUOT 0x1 /* user/group out of quota space on QMT */
-
-/* LVB used with quota (global and per-ID) locks */
-struct lquota_lvb {
-	__u64	lvb_flags;	/* see LQUOTA_FL* above */
-	__u64	lvb_id_may_rel; /* space that might be released later */
-	__u64	lvb_id_rel;     /* space released by the slave for this ID */
-	__u64	lvb_id_qunit;   /* current qunit value */
-	__u64	lvb_pad1;
-};
-
-/* op codes */
-enum quota_cmd {
-	QUOTA_DQACQ	= 601,
-	QUOTA_DQREL	= 602,
-	QUOTA_LAST_OPC
-};
-#define QUOTA_FIRST_OPC	QUOTA_DQACQ
-
-/*
- *   MDS REQ RECORDS
- */
-
-/* opcodes */
-enum mds_cmd {
-	MDS_GETATTR		= 33,
-	MDS_GETATTR_NAME	= 34,
-	MDS_CLOSE		= 35,
-	MDS_REINT		= 36,
-	MDS_READPAGE		= 37,
-	MDS_CONNECT		= 38,
-	MDS_DISCONNECT		= 39,
-	MDS_GETSTATUS		= 40,
-	MDS_STATFS		= 41,
-	MDS_PIN			= 42, /* obsolete, never used in a release */
-	MDS_UNPIN		= 43, /* obsolete, never used in a release */
-	MDS_SYNC		= 44,
-	MDS_DONE_WRITING	= 45, /* obsolete since 2.8.0 */
-	MDS_SET_INFO		= 46,
-	MDS_QUOTACHECK		= 47, /* not used since 2.4 */
-	MDS_QUOTACTL		= 48,
-	MDS_GETXATTR		= 49,
-	MDS_SETXATTR		= 50, /* obsolete, now it's MDS_REINT op */
-	MDS_WRITEPAGE		= 51,
-	MDS_IS_SUBDIR		= 52, /* obsolete, never used in a release */
-	MDS_GET_INFO		= 53,
-	MDS_HSM_STATE_GET	= 54,
-	MDS_HSM_STATE_SET	= 55,
-	MDS_HSM_ACTION		= 56,
-	MDS_HSM_PROGRESS	= 57,
-	MDS_HSM_REQUEST		= 58,
-	MDS_HSM_CT_REGISTER	= 59,
-	MDS_HSM_CT_UNREGISTER	= 60,
-	MDS_SWAP_LAYOUTS	= 61,
-	MDS_LAST_OPC
-};
-
-#define MDS_FIRST_OPC    MDS_GETATTR
-
-/*
- * Do not exceed 63
- */
-
-enum mdt_reint_cmd {
-	REINT_SETATTR  = 1,
-	REINT_CREATE   = 2,
-	REINT_LINK     = 3,
-	REINT_UNLINK   = 4,
-	REINT_RENAME   = 5,
-	REINT_OPEN     = 6,
-	REINT_SETXATTR = 7,
-	REINT_RMENTRY  = 8,
-	REINT_MIGRATE  = 9,
-	REINT_MAX
-};
-
-/* the disposition of the intent outlines what was executed */
-#define DISP_IT_EXECD	0x00000001
-#define DISP_LOOKUP_EXECD    0x00000002
-#define DISP_LOOKUP_NEG      0x00000004
-#define DISP_LOOKUP_POS      0x00000008
-#define DISP_OPEN_CREATE     0x00000010
-#define DISP_OPEN_OPEN       0x00000020
-#define DISP_ENQ_COMPLETE    0x00400000		/* obsolete and unused */
-#define DISP_ENQ_OPEN_REF    0x00800000
-#define DISP_ENQ_CREATE_REF  0x01000000
-#define DISP_OPEN_LOCK       0x02000000
-#define DISP_OPEN_LEASE      0x04000000
-#define DISP_OPEN_STRIPE     0x08000000
-#define DISP_OPEN_DENY		0x10000000
-
-/* INODE LOCK PARTS */
-#define MDS_INODELOCK_LOOKUP 0x000001	/* For namespace, dentry etc, and also
-					 * was used to protect permission (mode,
-					 * owner, group etc) before 2.4.
-					 */
-#define MDS_INODELOCK_UPDATE 0x000002	/* size, links, timestamps */
-#define MDS_INODELOCK_OPEN   0x000004	/* For opened files */
-#define MDS_INODELOCK_LAYOUT 0x000008	/* for layout */
-
-/* The PERM bit is added int 2.4, and it is used to protect permission(mode,
- * owner, group, acl etc), so to separate the permission from LOOKUP lock.
- * Because for remote directories(in DNE), these locks will be granted by
- * different MDTs(different ldlm namespace).
- *
- * For local directory, MDT will always grant UPDATE_LOCK|PERM_LOCK together.
- * For Remote directory, the master MDT, where the remote directory is, will
- * grant UPDATE_LOCK|PERM_LOCK, and the remote MDT, where the name entry is,
- * will grant LOOKUP_LOCK.
- */
-#define MDS_INODELOCK_PERM   0x000010
-#define MDS_INODELOCK_XATTR  0x000020	/* extended attributes */
-
-#define MDS_INODELOCK_MAXSHIFT 5
-/* This FULL lock is useful to take on unlink sort of operations */
-#define MDS_INODELOCK_FULL ((1 << (MDS_INODELOCK_MAXSHIFT + 1)) - 1)
-
-/* NOTE: until Lustre 1.8.7/2.1.1 the fid_ver() was packed into name[2],
- * but was moved into name[1] along with the OID to avoid consuming the
- * name[2,3] fields that need to be used for the quota id (also a FID).
- */
-enum {
-	LUSTRE_RES_ID_SEQ_OFF = 0,
-	LUSTRE_RES_ID_VER_OID_OFF = 1,
-	LUSTRE_RES_ID_WAS_VER_OFF = 2, /* see note above */
-	LUSTRE_RES_ID_QUOTA_SEQ_OFF = 2,
-	LUSTRE_RES_ID_QUOTA_VER_OID_OFF = 3,
-	LUSTRE_RES_ID_HSH_OFF = 3
-};
-
-#define MDS_STATUS_CONN 1
-#define MDS_STATUS_LOV 2
-
-/* these should be identical to their EXT4_*_FL counterparts, they are
- * redefined here only to avoid dragging in fs/ext4/ext4.h
- */
-#define LUSTRE_SYNC_FL	 0x00000008 /* Synchronous updates */
-#define LUSTRE_IMMUTABLE_FL    0x00000010 /* Immutable file */
-#define LUSTRE_APPEND_FL       0x00000020 /* writes to file may only append */
-#define LUSTRE_NODUMP_FL	0x00000040 /* do not dump file */
-#define LUSTRE_NOATIME_FL      0x00000080 /* do not update atime */
-#define LUSTRE_INDEX_FL		0x00001000 /* hash-indexed directory */
-#define LUSTRE_DIRSYNC_FL      0x00010000 /* dirsync behaviour (dir only) */
-#define LUSTRE_TOPDIR_FL	0x00020000 /* Top of directory hierarchies*/
-#define LUSTRE_DIRECTIO_FL	0x00100000 /* Use direct i/o */
-#define LUSTRE_INLINE_DATA_FL	0x10000000 /* Inode has inline data. */
-
-/* Convert wire LUSTRE_*_FL to corresponding client local VFS S_* values
- * for the client inode i_flags.  The LUSTRE_*_FL are the Lustre wire
- * protocol equivalents of LDISKFS_*_FL values stored on disk, while
- * the S_* flags are kernel-internal values that change between kernel
- * versions.  These flags are set/cleared via FSFILT_IOC_{GET,SET}_FLAGS.
- * See b=16526 for a full history.
- */
-static inline int ll_ext_to_inode_flags(int flags)
-{
-	return (((flags & LUSTRE_SYNC_FL)      ? S_SYNC      : 0) |
-		((flags & LUSTRE_NOATIME_FL)   ? S_NOATIME   : 0) |
-		((flags & LUSTRE_APPEND_FL)    ? S_APPEND    : 0) |
-		((flags & LUSTRE_DIRSYNC_FL)   ? S_DIRSYNC   : 0) |
-		((flags & LUSTRE_IMMUTABLE_FL) ? S_IMMUTABLE : 0));
-}
-
-static inline int ll_inode_to_ext_flags(int iflags)
-{
-	return (((iflags & S_SYNC)      ? LUSTRE_SYNC_FL      : 0) |
-		((iflags & S_NOATIME)   ? LUSTRE_NOATIME_FL   : 0) |
-		((iflags & S_APPEND)    ? LUSTRE_APPEND_FL    : 0) |
-		((iflags & S_DIRSYNC)   ? LUSTRE_DIRSYNC_FL   : 0) |
-		((iflags & S_IMMUTABLE) ? LUSTRE_IMMUTABLE_FL : 0));
-}
-
-/* 64 possible states */
-enum md_transient_state {
-	MS_RESTORE	= (1 << 0),	/* restore is running */
-};
-
-struct mdt_body {
-	struct lu_fid mbo_fid1;
-	struct lu_fid mbo_fid2;
-	struct lustre_handle mbo_handle;
-	__u64	mbo_valid;
-	__u64	mbo_size;	/* Offset, in the case of MDS_READPAGE */
-	__s64	mbo_mtime;
-	__s64	mbo_atime;
-	__s64	mbo_ctime;
-	__u64	mbo_blocks;	/* XID, in the case of MDS_READPAGE */
-	__u64	mbo_ioepoch;
-	__u64	mbo_t_state;	/* transient file state defined in
-				 * enum md_transient_state
-				 * was "ino" until 2.4.0
-				 */
-	__u32	mbo_fsuid;
-	__u32	mbo_fsgid;
-	__u32	mbo_capability;
-	__u32	mbo_mode;
-	__u32	mbo_uid;
-	__u32	mbo_gid;
-	__u32	mbo_flags;	/* LUSTRE_*_FL file attributes */
-	__u32	mbo_rdev;
-	__u32	mbo_nlink;	/* #bytes to read in the case of MDS_READPAGE */
-	__u32	mbo_unused2;	/* was "generation" until 2.4.0 */
-	__u32	mbo_suppgid;
-	__u32	mbo_eadatasize;
-	__u32	mbo_aclsize;
-	__u32	mbo_max_mdsize;
-	__u32	mbo_unused3;	/* was max_cookiesize until 2.8 */
-	__u32	mbo_uid_h;	/* high 32-bits of uid, for FUID */
-	__u32	mbo_gid_h;	/* high 32-bits of gid, for FUID */
-	__u32	mbo_padding_5;	/* also fix lustre_swab_mdt_body */
-	__u64	mbo_padding_6;
-	__u64	mbo_padding_7;
-	__u64	mbo_padding_8;
-	__u64	mbo_padding_9;
-	__u64	mbo_padding_10;
-}; /* 216 */
-
-struct mdt_ioepoch {
-	struct lustre_handle mio_handle;
-	__u64 mio_unused1; /* was ioepoch */
-	__u32 mio_unused2; /* was flags */
-	__u32 mio_padding;
-};
-
-/* permissions for md_perm.mp_perm */
-enum {
-	CFS_SETUID_PERM = 0x01,
-	CFS_SETGID_PERM = 0x02,
-	CFS_SETGRP_PERM = 0x04,
-};
-
-struct mdt_rec_setattr {
-	__u32	   sa_opcode;
-	__u32	   sa_cap;
-	__u32	   sa_fsuid;
-	__u32	   sa_fsuid_h;
-	__u32	   sa_fsgid;
-	__u32	   sa_fsgid_h;
-	__u32	   sa_suppgid;
-	__u32	   sa_suppgid_h;
-	__u32	   sa_padding_1;
-	__u32	   sa_padding_1_h;
-	struct lu_fid   sa_fid;
-	__u64	   sa_valid;
-	__u32	   sa_uid;
-	__u32	   sa_gid;
-	__u64	   sa_size;
-	__u64	   sa_blocks;
-	__s64	   sa_mtime;
-	__s64	   sa_atime;
-	__s64	   sa_ctime;
-	__u32	   sa_attr_flags;
-	__u32	   sa_mode;
-	__u32	   sa_bias;      /* some operation flags */
-	__u32	   sa_padding_3;
-	__u32	   sa_padding_4;
-	__u32	   sa_padding_5;
-};
-
-/*
- * Attribute flags used in mdt_rec_setattr::sa_valid.
- * The kernel's #defines for ATTR_* should not be used over the network
- * since the client and MDS may run different kernels (see bug 13828)
- * Therefore, we should only use MDS_ATTR_* attributes for sa_valid.
- */
-#define MDS_ATTR_MODE	       0x1ULL /* = 1 */
-#define MDS_ATTR_UID	       0x2ULL /* = 2 */
-#define MDS_ATTR_GID	       0x4ULL /* = 4 */
-#define MDS_ATTR_SIZE	       0x8ULL /* = 8 */
-#define MDS_ATTR_ATIME	      0x10ULL /* = 16 */
-#define MDS_ATTR_MTIME	      0x20ULL /* = 32 */
-#define MDS_ATTR_CTIME	      0x40ULL /* = 64 */
-#define MDS_ATTR_ATIME_SET    0x80ULL /* = 128 */
-#define MDS_ATTR_MTIME_SET   0x100ULL /* = 256 */
-#define MDS_ATTR_FORCE       0x200ULL /* = 512, Not a change, but a change it */
-#define MDS_ATTR_ATTR_FLAG   0x400ULL /* = 1024 */
-#define MDS_ATTR_KILL_SUID   0x800ULL /* = 2048 */
-#define MDS_ATTR_KILL_SGID  0x1000ULL /* = 4096 */
-#define MDS_ATTR_CTIME_SET  0x2000ULL /* = 8192 */
-#define MDS_ATTR_FROM_OPEN  0x4000ULL /* = 16384, called from open path,
-				       * ie O_TRUNC
-				       */
-#define MDS_ATTR_BLOCKS     0x8000ULL /* = 32768 */
-
-#define MDS_FMODE_CLOSED	 00000000
-#define MDS_FMODE_EXEC	   00000004
-/*	MDS_FMODE_EPOCH		01000000 obsolete since 2.8.0 */
-/*	MDS_FMODE_TRUNC		02000000 obsolete since 2.8.0 */
-/*	MDS_FMODE_SOM		04000000 obsolete since 2.8.0 */
-
-#define MDS_OPEN_CREATED	 00000010
-#define MDS_OPEN_CROSS	   00000020
-
-#define MDS_OPEN_CREAT	   00000100
-#define MDS_OPEN_EXCL	    00000200
-#define MDS_OPEN_TRUNC	   00001000
-#define MDS_OPEN_APPEND	  00002000
-#define MDS_OPEN_SYNC	    00010000
-#define MDS_OPEN_DIRECTORY       00200000
-
-#define MDS_OPEN_BY_FID		040000000 /* open_by_fid for known object */
-#define MDS_OPEN_DELAY_CREATE  0100000000 /* delay initial object create */
-#define MDS_OPEN_OWNEROVERRIDE 0200000000 /* NFSD rw-reopen ro file for owner */
-#define MDS_OPEN_JOIN_FILE     0400000000 /* open for join file.
-					   * We do not support JOIN FILE
-					   * anymore, reserve this flags
-					   * just for preventing such bit
-					   * to be reused.
-					   */
-
-#define MDS_OPEN_LOCK	      04000000000 /* This open requires open lock */
-#define MDS_OPEN_HAS_EA      010000000000 /* specify object create pattern */
-#define MDS_OPEN_HAS_OBJS    020000000000 /* Just set the EA the obj exist */
-#define MDS_OPEN_NORESTORE  0100000000000ULL /* Do not restore file at open */
-#define MDS_OPEN_NEWSTRIPE  0200000000000ULL /* New stripe needed (restripe or
-					      * hsm restore)
-					      */
-#define MDS_OPEN_VOLATILE   0400000000000ULL /* File is volatile = created
-					      * unlinked
-					      */
-#define MDS_OPEN_LEASE	   01000000000000ULL /* Open the file and grant lease
-					      * delegation, succeed if it's not
-					      * being opened with conflict mode.
-					      */
-#define MDS_OPEN_RELEASE   02000000000000ULL /* Open the file for HSM release */
-
-#define MDS_OPEN_FL_INTERNAL (MDS_OPEN_HAS_EA | MDS_OPEN_HAS_OBJS |	\
-			      MDS_OPEN_OWNEROVERRIDE | MDS_OPEN_LOCK |	\
-			      MDS_OPEN_BY_FID | MDS_OPEN_LEASE |	\
-			      MDS_OPEN_RELEASE)
-
-enum mds_op_bias {
-	MDS_CHECK_SPLIT		= 1 << 0,
-	MDS_CROSS_REF		= 1 << 1,
-	MDS_VTX_BYPASS		= 1 << 2,
-	MDS_PERM_BYPASS		= 1 << 3,
-/*	MDS_SOM			= 1 << 4, obsolete since 2.8.0 */
-	MDS_QUOTA_IGNORE	= 1 << 5,
-	MDS_CLOSE_CLEANUP	= 1 << 6,
-	MDS_KEEP_ORPHAN		= 1 << 7,
-	MDS_RECOV_OPEN		= 1 << 8,
-	MDS_DATA_MODIFIED	= 1 << 9,
-	MDS_CREATE_VOLATILE	= 1 << 10,
-	MDS_OWNEROVERRIDE	= 1 << 11,
-	MDS_HSM_RELEASE		= 1 << 12,
-	MDS_RENAME_MIGRATE	= 1 << 13,
-	MDS_CLOSE_LAYOUT_SWAP	= 1 << 14,
-};
-
-/* instance of mdt_reint_rec */
-struct mdt_rec_create {
-	__u32	   cr_opcode;
-	__u32	   cr_cap;
-	__u32	   cr_fsuid;
-	__u32	   cr_fsuid_h;
-	__u32	   cr_fsgid;
-	__u32	   cr_fsgid_h;
-	__u32	   cr_suppgid1;
-	__u32	   cr_suppgid1_h;
-	__u32	   cr_suppgid2;
-	__u32	   cr_suppgid2_h;
-	struct lu_fid   cr_fid1;
-	struct lu_fid   cr_fid2;
-	struct lustre_handle cr_old_handle; /* handle in case of open replay */
-	__s64	   cr_time;
-	__u64	   cr_rdev;
-	__u64	   cr_ioepoch;
-	__u64	   cr_padding_1;   /* rr_blocks */
-	__u32	   cr_mode;
-	__u32	   cr_bias;
-	/* use of helpers set/get_mrc_cr_flags() is needed to access
-	 * 64 bits cr_flags [cr_flags_l, cr_flags_h], this is done to
-	 * extend cr_flags size without breaking 1.8 compat
-	 */
-	__u32	   cr_flags_l;     /* for use with open, low  32 bits  */
-	__u32	   cr_flags_h;     /* for use with open, high 32 bits */
-	__u32	   cr_umask;       /* umask for create */
-	__u32	   cr_padding_4;   /* rr_padding_4 */
-};
-
-/* instance of mdt_reint_rec */
-struct mdt_rec_link {
-	__u32	   lk_opcode;
-	__u32	   lk_cap;
-	__u32	   lk_fsuid;
-	__u32	   lk_fsuid_h;
-	__u32	   lk_fsgid;
-	__u32	   lk_fsgid_h;
-	__u32	   lk_suppgid1;
-	__u32	   lk_suppgid1_h;
-	__u32	   lk_suppgid2;
-	__u32	   lk_suppgid2_h;
-	struct lu_fid   lk_fid1;
-	struct lu_fid   lk_fid2;
-	__s64	   lk_time;
-	__u64	   lk_padding_1;   /* rr_atime */
-	__u64	   lk_padding_2;   /* rr_ctime */
-	__u64	   lk_padding_3;   /* rr_size */
-	__u64	   lk_padding_4;   /* rr_blocks */
-	__u32	   lk_bias;
-	__u32	   lk_padding_5;   /* rr_mode */
-	__u32	   lk_padding_6;   /* rr_flags */
-	__u32	   lk_padding_7;   /* rr_padding_2 */
-	__u32	   lk_padding_8;   /* rr_padding_3 */
-	__u32	   lk_padding_9;   /* rr_padding_4 */
-};
-
-/* instance of mdt_reint_rec */
-struct mdt_rec_unlink {
-	__u32	   ul_opcode;
-	__u32	   ul_cap;
-	__u32	   ul_fsuid;
-	__u32	   ul_fsuid_h;
-	__u32	   ul_fsgid;
-	__u32	   ul_fsgid_h;
-	__u32	   ul_suppgid1;
-	__u32	   ul_suppgid1_h;
-	__u32	   ul_suppgid2;
-	__u32	   ul_suppgid2_h;
-	struct lu_fid   ul_fid1;
-	struct lu_fid   ul_fid2;
-	__s64	   ul_time;
-	__u64	   ul_padding_2;   /* rr_atime */
-	__u64	   ul_padding_3;   /* rr_ctime */
-	__u64	   ul_padding_4;   /* rr_size */
-	__u64	   ul_padding_5;   /* rr_blocks */
-	__u32	   ul_bias;
-	__u32	   ul_mode;
-	__u32	   ul_padding_6;   /* rr_flags */
-	__u32	   ul_padding_7;   /* rr_padding_2 */
-	__u32	   ul_padding_8;   /* rr_padding_3 */
-	__u32	   ul_padding_9;   /* rr_padding_4 */
-};
-
-/* instance of mdt_reint_rec */
-struct mdt_rec_rename {
-	__u32	   rn_opcode;
-	__u32	   rn_cap;
-	__u32	   rn_fsuid;
-	__u32	   rn_fsuid_h;
-	__u32	   rn_fsgid;
-	__u32	   rn_fsgid_h;
-	__u32	   rn_suppgid1;
-	__u32	   rn_suppgid1_h;
-	__u32	   rn_suppgid2;
-	__u32	   rn_suppgid2_h;
-	struct lu_fid   rn_fid1;
-	struct lu_fid   rn_fid2;
-	__s64	   rn_time;
-	__u64	   rn_padding_1;   /* rr_atime */
-	__u64	   rn_padding_2;   /* rr_ctime */
-	__u64	   rn_padding_3;   /* rr_size */
-	__u64	   rn_padding_4;   /* rr_blocks */
-	__u32	   rn_bias;	/* some operation flags */
-	__u32	   rn_mode;	/* cross-ref rename has mode */
-	__u32	   rn_padding_5;   /* rr_flags */
-	__u32	   rn_padding_6;   /* rr_padding_2 */
-	__u32	   rn_padding_7;   /* rr_padding_3 */
-	__u32	   rn_padding_8;   /* rr_padding_4 */
-};
-
-/* instance of mdt_reint_rec */
-struct mdt_rec_setxattr {
-	__u32	   sx_opcode;
-	__u32	   sx_cap;
-	__u32	   sx_fsuid;
-	__u32	   sx_fsuid_h;
-	__u32	   sx_fsgid;
-	__u32	   sx_fsgid_h;
-	__u32	   sx_suppgid1;
-	__u32	   sx_suppgid1_h;
-	__u32	   sx_suppgid2;
-	__u32	   sx_suppgid2_h;
-	struct lu_fid   sx_fid;
-	__u64	   sx_padding_1;   /* These three are rr_fid2 */
-	__u32	   sx_padding_2;
-	__u32	   sx_padding_3;
-	__u64	   sx_valid;
-	__s64	   sx_time;
-	__u64	   sx_padding_5;   /* rr_ctime */
-	__u64	   sx_padding_6;   /* rr_size */
-	__u64	   sx_padding_7;   /* rr_blocks */
-	__u32	   sx_size;
-	__u32	   sx_flags;
-	__u32	   sx_padding_8;   /* rr_flags */
-	__u32	   sx_padding_9;   /* rr_padding_2 */
-	__u32	   sx_padding_10;  /* rr_padding_3 */
-	__u32	   sx_padding_11;  /* rr_padding_4 */
-};
-
-/*
- * mdt_rec_reint is the template for all mdt_reint_xxx structures.
- * Do NOT change the size of various members, otherwise the value
- * will be broken in lustre_swab_mdt_rec_reint().
- *
- * If you add new members in other mdt_reint_xxx structures and need to use the
- * rr_padding_x fields, then update lustre_swab_mdt_rec_reint() also.
- */
-struct mdt_rec_reint {
-	__u32	   rr_opcode;
-	__u32	   rr_cap;
-	__u32	   rr_fsuid;
-	__u32	   rr_fsuid_h;
-	__u32	   rr_fsgid;
-	__u32	   rr_fsgid_h;
-	__u32	   rr_suppgid1;
-	__u32	   rr_suppgid1_h;
-	__u32	   rr_suppgid2;
-	__u32	   rr_suppgid2_h;
-	struct lu_fid   rr_fid1;
-	struct lu_fid   rr_fid2;
-	__s64	   rr_mtime;
-	__s64	   rr_atime;
-	__s64	   rr_ctime;
-	__u64	   rr_size;
-	__u64	   rr_blocks;
-	__u32	   rr_bias;
-	__u32	   rr_mode;
-	__u32	   rr_flags;
-	__u32	   rr_flags_h;
-	__u32	   rr_umask;
-	__u32	   rr_padding_4; /* also fix lustre_swab_mdt_rec_reint */
-};
-
-/* lmv structures */
-struct lmv_desc {
-	__u32 ld_tgt_count;		/* how many MDS's */
-	__u32 ld_active_tgt_count;	 /* how many active */
-	__u32 ld_default_stripe_count;     /* how many objects are used */
-	__u32 ld_pattern;		  /* default hash pattern */
-	__u64 ld_default_hash_size;
-	__u64 ld_padding_1;		/* also fix lustre_swab_lmv_desc */
-	__u32 ld_padding_2;		/* also fix lustre_swab_lmv_desc */
-	__u32 ld_qos_maxage;	       /* in second */
-	__u32 ld_padding_3;		/* also fix lustre_swab_lmv_desc */
-	__u32 ld_padding_4;		/* also fix lustre_swab_lmv_desc */
-	struct obd_uuid ld_uuid;
-};
-
-/* LMV layout EA, and it will be stored both in master and slave object */
-struct lmv_mds_md_v1 {
-	__u32 lmv_magic;
-	__u32 lmv_stripe_count;
-	__u32 lmv_master_mdt_index;	/* On master object, it is master
-					 * MDT index, on slave object, it
-					 * is stripe index of the slave obj
-					 */
-	__u32 lmv_hash_type;		/* dir stripe policy, i.e. indicate
-					 * which hash function to be used,
-					 * Note: only lower 16 bits is being
-					 * used for now. Higher 16 bits will
-					 * be used to mark the object status,
-					 * for example migrating or dead.
-					 */
-	__u32 lmv_layout_version;	/* Used for directory restriping */
-	__u32 lmv_padding1;
-	__u64 lmv_padding2;
-	__u64 lmv_padding3;
-	char lmv_pool_name[LOV_MAXPOOLNAME + 1];/* pool name */
-	struct lu_fid lmv_stripe_fids[0];	/* FIDs for each stripe */
-};
-
-#define LMV_MAGIC_V1	 0x0CD20CD0	/* normal stripe lmv magic */
-#define LMV_MAGIC	 LMV_MAGIC_V1
-
-/* #define LMV_USER_MAGIC 0x0CD30CD0 */
-#define LMV_MAGIC_STRIPE 0x0CD40CD0	/* magic for dir sub_stripe */
-
-/*
- *Right now only the lower part(0-16bits) of lmv_hash_type is being used,
- * and the higher part will be the flag to indicate the status of object,
- * for example the object is being migrated. And the hash function
- * might be interpreted differently with different flags.
- */
-#define LMV_HASH_TYPE_MASK		0x0000ffff
-
-#define LMV_HASH_FLAG_MIGRATION		0x80000000
-#define LMV_HASH_FLAG_DEAD		0x40000000
-
-/**
- * The FNV-1a hash algorithm is as follows:
- *     hash = FNV_offset_basis
- *     for each octet_of_data to be hashed
- *             hash = hash XOR octet_of_data
- *             hash = hash × FNV_prime
- *     return hash
- * http://en.wikipedia.org/wiki/Fowler–Noll–Vo_hash_function#FNV-1a_hash
- *
- * http://www.isthe.com/chongo/tech/comp/fnv/index.html#FNV-reference-source
- * FNV_prime is 2^40 + 2^8 + 0xb3 = 0x100000001b3ULL
- **/
-#define LUSTRE_FNV_1A_64_PRIME		0x100000001b3ULL
-#define LUSTRE_FNV_1A_64_OFFSET_BIAS	0xcbf29ce484222325ULL
-static inline __u64 lustre_hash_fnv_1a_64(const void *buf, size_t size)
-{
-	__u64 hash = LUSTRE_FNV_1A_64_OFFSET_BIAS;
-	const unsigned char *p = buf;
-	size_t i;
-
-	for (i = 0; i < size; i++) {
-		hash ^= p[i];
-		hash *= LUSTRE_FNV_1A_64_PRIME;
-	}
-
-	return hash;
-}
-
-union lmv_mds_md {
-	__u32			lmv_magic;
-	struct lmv_mds_md_v1	lmv_md_v1;
-	struct lmv_user_md	lmv_user_md;
-};
-
-static inline ssize_t lmv_mds_md_size(int stripe_count, unsigned int lmm_magic)
-{
-	ssize_t len = -EINVAL;
-
-	switch (lmm_magic) {
-	case LMV_MAGIC_V1: {
-		struct lmv_mds_md_v1 *lmm1;
-
-		len = sizeof(*lmm1);
-		len += stripe_count * sizeof(lmm1->lmv_stripe_fids[0]);
-		break; }
-	default:
-		break;
-	}
-	return len;
-}
-
-static inline int lmv_mds_md_stripe_count_get(const union lmv_mds_md *lmm)
-{
-	switch (__le32_to_cpu(lmm->lmv_magic)) {
-	case LMV_MAGIC_V1:
-		return __le32_to_cpu(lmm->lmv_md_v1.lmv_stripe_count);
-	case LMV_USER_MAGIC:
-		return __le32_to_cpu(lmm->lmv_user_md.lum_stripe_count);
-	default:
-		return -EINVAL;
-	}
-}
-
-enum fld_rpc_opc {
-	FLD_QUERY	= 900,
-	FLD_READ	= 901,
-	FLD_LAST_OPC,
-	FLD_FIRST_OPC	= FLD_QUERY
-};
-
-enum seq_rpc_opc {
-	SEQ_QUERY		       = 700,
-	SEQ_LAST_OPC,
-	SEQ_FIRST_OPC		   = SEQ_QUERY
-};
-
-enum seq_op {
-	SEQ_ALLOC_SUPER = 0,
-	SEQ_ALLOC_META = 1
-};
-
-enum fld_op {
-	FLD_CREATE = 0,
-	FLD_DELETE = 1,
-	FLD_LOOKUP = 2,
-};
-
-/*
- *  LOV data structures
- */
-
-#define LOV_MAX_UUID_BUFFER_SIZE  8192
-/* The size of the buffer the lov/mdc reserves for the
- * array of UUIDs returned by the MDS.  With the current
- * protocol, this will limit the max number of OSTs per LOV
- */
-
-#define LOV_DESC_MAGIC 0xB0CCDE5C
-#define LOV_DESC_QOS_MAXAGE_DEFAULT 5  /* Seconds */
-#define LOV_DESC_STRIPE_SIZE_DEFAULT (1 << LNET_MTU_BITS)
-
-/* LOV settings descriptor (should only contain static info) */
-struct lov_desc {
-	__u32 ld_tgt_count;		/* how many OBD's */
-	__u32 ld_active_tgt_count;	/* how many active */
-	__u32 ld_default_stripe_count;  /* how many objects are used */
-	__u32 ld_pattern;		/* default PATTERN_RAID0 */
-	__u64 ld_default_stripe_size;   /* in bytes */
-	__u64 ld_default_stripe_offset; /* in bytes */
-	__u32 ld_padding_0;		/* unused */
-	__u32 ld_qos_maxage;		/* in second */
-	__u32 ld_padding_1;		/* also fix lustre_swab_lov_desc */
-	__u32 ld_padding_2;		/* also fix lustre_swab_lov_desc */
-	struct obd_uuid ld_uuid;
-};
-
-#define ld_magic ld_active_tgt_count       /* for swabbing from llogs */
-
-/*
- *   LDLM requests:
- */
-/* opcodes -- MUST be distinct from OST/MDS opcodes */
-enum ldlm_cmd {
-	LDLM_ENQUEUE     = 101,
-	LDLM_CONVERT     = 102,
-	LDLM_CANCEL      = 103,
-	LDLM_BL_CALLBACK = 104,
-	LDLM_CP_CALLBACK = 105,
-	LDLM_GL_CALLBACK = 106,
-	LDLM_SET_INFO    = 107,
-	LDLM_LAST_OPC
-};
-#define LDLM_FIRST_OPC LDLM_ENQUEUE
-
-#define RES_NAME_SIZE 4
-struct ldlm_res_id {
-	__u64 name[RES_NAME_SIZE];
-};
-
-#define DLDLMRES	"[%#llx:%#llx:%#llx].%llx"
-#define PLDLMRES(res)	(res)->lr_name.name[0], (res)->lr_name.name[1], \
-			(res)->lr_name.name[2], (res)->lr_name.name[3]
-
-/* lock types */
-enum ldlm_mode {
-	LCK_MINMODE = 0,
-	LCK_EX      = 1,
-	LCK_PW      = 2,
-	LCK_PR      = 4,
-	LCK_CW      = 8,
-	LCK_CR      = 16,
-	LCK_NL      = 32,
-	LCK_GROUP   = 64,
-	LCK_COS     = 128,
-	LCK_MAXMODE
-};
-
-#define LCK_MODE_NUM    8
-
-enum ldlm_type {
-	LDLM_PLAIN     = 10,
-	LDLM_EXTENT    = 11,
-	LDLM_FLOCK     = 12,
-	LDLM_IBITS     = 13,
-	LDLM_MAX_TYPE
-};
-
-#define LDLM_MIN_TYPE LDLM_PLAIN
-
-struct ldlm_extent {
-	__u64 start;
-	__u64 end;
-	__u64 gid;
-};
-
-struct ldlm_inodebits {
-	__u64 bits;
-};
-
-struct ldlm_flock_wire {
-	__u64 lfw_start;
-	__u64 lfw_end;
-	__u64 lfw_owner;
-	__u32 lfw_padding;
-	__u32 lfw_pid;
-};
-
-/* it's important that the fields of the ldlm_extent structure match
- * the first fields of the ldlm_flock structure because there is only
- * one ldlm_swab routine to process the ldlm_policy_data_t union. if
- * this ever changes we will need to swab the union differently based
- * on the resource type.
- */
-
-union ldlm_wire_policy_data {
-	struct ldlm_extent l_extent;
-	struct ldlm_flock_wire l_flock;
-	struct ldlm_inodebits l_inodebits;
-};
-
-union ldlm_gl_desc {
-	struct ldlm_gl_lquota_desc	lquota_desc;
-};
-
-enum ldlm_intent_flags {
-	IT_OPEN		= 0x00000001,
-	IT_CREAT	= 0x00000002,
-	IT_OPEN_CREAT	= 0x00000003,
-	IT_READDIR	= 0x00000004,
-	IT_GETATTR	= 0x00000008,
-	IT_LOOKUP	= 0x00000010,
-	IT_UNLINK	= 0x00000020,
-	IT_TRUNC	= 0x00000040,
-	IT_GETXATTR	= 0x00000080,
-	IT_EXEC		= 0x00000100,
-	IT_PIN		= 0x00000200,
-	IT_LAYOUT	= 0x00000400,
-	IT_QUOTA_DQACQ	= 0x00000800,
-	IT_QUOTA_CONN	= 0x00001000,
-	IT_SETXATTR	= 0x00002000,
-};
-
-struct ldlm_intent {
-	__u64 opc;
-};
-
-struct ldlm_resource_desc {
-	enum ldlm_type lr_type;
-	__u32 lr_padding;       /* also fix lustre_swab_ldlm_resource_desc */
-	struct ldlm_res_id lr_name;
-};
-
-struct ldlm_lock_desc {
-	struct ldlm_resource_desc l_resource;
-	enum ldlm_mode l_req_mode;
-	enum ldlm_mode l_granted_mode;
-	union ldlm_wire_policy_data l_policy_data;
-};
-
-#define LDLM_LOCKREQ_HANDLES 2
-#define LDLM_ENQUEUE_CANCEL_OFF 1
-
-struct ldlm_request {
-	__u32 lock_flags;
-	__u32 lock_count;
-	struct ldlm_lock_desc lock_desc;
-	struct lustre_handle lock_handle[LDLM_LOCKREQ_HANDLES];
-};
-
-struct ldlm_reply {
-	__u32 lock_flags;
-	__u32 lock_padding;     /* also fix lustre_swab_ldlm_reply */
-	struct ldlm_lock_desc lock_desc;
-	struct lustre_handle lock_handle;
-	__u64  lock_policy_res1;
-	__u64  lock_policy_res2;
-};
-
-#define ldlm_flags_to_wire(flags)    ((__u32)(flags))
-#define ldlm_flags_from_wire(flags)  ((__u64)(flags))
-
-/*
- * Opcodes for mountconf (mgs and mgc)
- */
-enum mgs_cmd {
-	MGS_CONNECT = 250,
-	MGS_DISCONNECT,
-	MGS_EXCEPTION,	 /* node died, etc. */
-	MGS_TARGET_REG,	/* whenever target starts up */
-	MGS_TARGET_DEL,
-	MGS_SET_INFO,
-	MGS_CONFIG_READ,
-	MGS_LAST_OPC
-};
-#define MGS_FIRST_OPC MGS_CONNECT
-
-#define MGS_PARAM_MAXLEN 1024
-#define KEY_SET_INFO "set_info"
-
-struct mgs_send_param {
-	char	     mgs_param[MGS_PARAM_MAXLEN];
-};
-
-/* We pass this info to the MGS so it can write config logs */
-#define MTI_NAME_MAXLEN  64
-#define MTI_PARAM_MAXLEN 4096
-#define MTI_NIDS_MAX     32
-struct mgs_target_info {
-	__u32	    mti_lustre_ver;
-	__u32	    mti_stripe_index;
-	__u32	    mti_config_ver;
-	__u32	    mti_flags;
-	__u32	    mti_nid_count;
-	__u32	    mti_instance; /* Running instance of target */
-	char	     mti_fsname[MTI_NAME_MAXLEN];
-	char	     mti_svname[MTI_NAME_MAXLEN];
-	char	     mti_uuid[sizeof(struct obd_uuid)];
-	__u64	    mti_nids[MTI_NIDS_MAX];     /* host nids (lnet_nid_t)*/
-	char	     mti_params[MTI_PARAM_MAXLEN];
-};
-
-struct mgs_nidtbl_entry {
-	__u64	   mne_version;    /* table version of this entry */
-	__u32	   mne_instance;   /* target instance # */
-	__u32	   mne_index;      /* target index */
-	__u32	   mne_length;     /* length of this entry - by bytes */
-	__u8	    mne_type;       /* target type LDD_F_SV_TYPE_OST/MDT */
-	__u8	    mne_nid_type;   /* type of nid(mbz). for ipv6. */
-	__u8	    mne_nid_size;   /* size of each NID, by bytes */
-	__u8	    mne_nid_count;  /* # of NIDs in buffer */
-	union {
-		lnet_nid_t nids[0];     /* variable size buffer for NIDs. */
-	} u;
-};
-
-struct mgs_config_body {
-	char     mcb_name[MTI_NAME_MAXLEN]; /* logname */
-	__u64    mcb_offset;    /* next index of config log to request */
-	__u16    mcb_type;      /* type of log: CONFIG_T_[CONFIG|RECOVER] */
-	__u8     mcb_reserved;
-	__u8     mcb_bits;      /* bits unit size of config log */
-	__u32    mcb_units;     /* # of units for bulk transfer */
-};
-
-struct mgs_config_res {
-	__u64    mcr_offset;    /* index of last config log */
-	__u64    mcr_size;      /* size of the log */
-};
-
-/* Config marker flags (in config log) */
-#define CM_START       0x01
-#define CM_END	 0x02
-#define CM_SKIP	0x04
-#define CM_UPGRADE146  0x08
-#define CM_EXCLUDE     0x10
-#define CM_START_SKIP (CM_START | CM_SKIP)
-
-struct cfg_marker {
-	__u32	     cm_step;       /* aka config version */
-	__u32	     cm_flags;
-	__u32	     cm_vers;       /* lustre release version number */
-	__u32	     cm_padding;    /* 64 bit align */
-	__s64	     cm_createtime; /*when this record was first created */
-	__s64	     cm_canceltime; /*when this record is no longer valid*/
-	char	      cm_tgtname[MTI_NAME_MAXLEN];
-	char	      cm_comment[MTI_NAME_MAXLEN];
-};
-
-/*
- * Opcodes for multiple servers.
- */
-
-enum obd_cmd {
-	OBD_PING = 400,
-	OBD_LOG_CANCEL,
-	OBD_QC_CALLBACK, /* not used since 2.4 */
-	OBD_IDX_READ,
-	OBD_LAST_OPC
-};
-#define OBD_FIRST_OPC OBD_PING
-
-/**
- * llog contexts indices.
- *
- * There is compatibility problem with indexes below, they are not
- * continuous and must keep their numbers for compatibility needs.
- * See LU-5218 for details.
- */
-enum llog_ctxt_id {
-	LLOG_CONFIG_ORIG_CTXT  =  0,
-	LLOG_CONFIG_REPL_CTXT = 1,
-	LLOG_MDS_OST_ORIG_CTXT = 2,
-	LLOG_MDS_OST_REPL_CTXT = 3, /* kept just to avoid re-assignment */
-	LLOG_SIZE_ORIG_CTXT = 4,
-	LLOG_SIZE_REPL_CTXT = 5,
-	LLOG_TEST_ORIG_CTXT = 8,
-	LLOG_TEST_REPL_CTXT = 9, /* kept just to avoid re-assignment */
-	LLOG_CHANGELOG_ORIG_CTXT = 12, /**< changelog generation on mdd */
-	LLOG_CHANGELOG_REPL_CTXT = 13, /**< changelog access on clients */
-	/* for multiple changelog consumers */
-	LLOG_CHANGELOG_USER_ORIG_CTXT = 14,
-	LLOG_AGENT_ORIG_CTXT = 15, /**< agent requests generation on cdt */
-	LLOG_MAX_CTXTS
-};
-
-/** Identifier for a single log object */
-struct llog_logid {
-	struct ost_id		lgl_oi;
-	__u32		   lgl_ogen;
-} __packed;
-
-/** Records written to the CATALOGS list */
-#define CATLIST "CATALOGS"
-struct llog_catid {
-	struct llog_logid       lci_logid;
-	__u32		   lci_padding1;
-	__u32		   lci_padding2;
-	__u32		   lci_padding3;
-} __packed;
-
-/* Log data record types - there is no specific reason that these need to
- * be related to the RPC opcodes, but no reason not to (may be handy later?)
- */
-#define LLOG_OP_MAGIC 0x10600000
-#define LLOG_OP_MASK  0xfff00000
-
-enum llog_op_type {
-	LLOG_PAD_MAGIC		= LLOG_OP_MAGIC | 0x00000,
-	OST_SZ_REC		= LLOG_OP_MAGIC | 0x00f00,
-	/* OST_RAID1_REC	= LLOG_OP_MAGIC | 0x01000, never used */
-	MDS_UNLINK_REC		= LLOG_OP_MAGIC | 0x10000 | (MDS_REINT << 8) |
-				  REINT_UNLINK, /* obsolete after 2.5.0 */
-	MDS_UNLINK64_REC	= LLOG_OP_MAGIC | 0x90000 | (MDS_REINT << 8) |
-				  REINT_UNLINK,
-	/* MDS_SETATTR_REC	= LLOG_OP_MAGIC | 0x12401, obsolete 1.8.0 */
-	MDS_SETATTR64_REC	= LLOG_OP_MAGIC | 0x90000 | (MDS_REINT << 8) |
-				  REINT_SETATTR,
-	OBD_CFG_REC		= LLOG_OP_MAGIC | 0x20000,
-	/* PTL_CFG_REC		= LLOG_OP_MAGIC | 0x30000, obsolete 1.4.0 */
-	LLOG_GEN_REC		= LLOG_OP_MAGIC | 0x40000,
-	/* LLOG_JOIN_REC	= LLOG_OP_MAGIC | 0x50000, obsolete  1.8.0 */
-	CHANGELOG_REC		= LLOG_OP_MAGIC | 0x60000,
-	CHANGELOG_USER_REC	= LLOG_OP_MAGIC | 0x70000,
-	HSM_AGENT_REC		= LLOG_OP_MAGIC | 0x80000,
-	LLOG_HDR_MAGIC		= LLOG_OP_MAGIC | 0x45539,
-	LLOG_LOGID_MAGIC	= LLOG_OP_MAGIC | 0x4553b,
-};
-
-#define LLOG_REC_HDR_NEEDS_SWABBING(r) \
-	(((r)->lrh_type & __swab32(LLOG_OP_MASK)) == __swab32(LLOG_OP_MAGIC))
-
-/** Log record header - stored in little endian order.
- * Each record must start with this struct, end with a llog_rec_tail,
- * and be a multiple of 256 bits in size.
- */
-struct llog_rec_hdr {
-	__u32	lrh_len;
-	__u32	lrh_index;
-	__u32	lrh_type;
-	__u32	lrh_id;
-};
-
-struct llog_rec_tail {
-	__u32	lrt_len;
-	__u32	lrt_index;
-};
-
-/* Where data follow just after header */
-#define REC_DATA(ptr)						\
-	((void *)((char *)ptr + sizeof(struct llog_rec_hdr)))
-
-#define REC_DATA_LEN(rec)					\
-	(rec->lrh_len - sizeof(struct llog_rec_hdr) -		\
-	 sizeof(struct llog_rec_tail))
-
-struct llog_logid_rec {
-	struct llog_rec_hdr	lid_hdr;
-	struct llog_logid	lid_id;
-	__u32			lid_padding1;
-	__u64			lid_padding2;
-	__u64			lid_padding3;
-	struct llog_rec_tail	lid_tail;
-} __packed;
-
-struct llog_unlink_rec {
-	struct llog_rec_hdr	lur_hdr;
-	__u64			lur_oid;
-	__u32			lur_oseq;
-	__u32			lur_count;
-	struct llog_rec_tail	lur_tail;
-} __packed;
-
-struct llog_unlink64_rec {
-	struct llog_rec_hdr	lur_hdr;
-	struct lu_fid		lur_fid;
-	__u32			lur_count; /* to destroy the lost precreated */
-	__u32			lur_padding1;
-	__u64			lur_padding2;
-	__u64			lur_padding3;
-	struct llog_rec_tail    lur_tail;
-} __packed;
-
-struct llog_setattr64_rec {
-	struct llog_rec_hdr	lsr_hdr;
-	struct ost_id		lsr_oi;
-	__u32			lsr_uid;
-	__u32			lsr_uid_h;
-	__u32			lsr_gid;
-	__u32			lsr_gid_h;
-	__u64			lsr_valid;
-	struct llog_rec_tail    lsr_tail;
-} __packed;
-
-struct llog_size_change_rec {
-	struct llog_rec_hdr	lsc_hdr;
-	struct ll_fid		lsc_fid;
-	__u32			lsc_ioepoch;
-	__u32			lsc_padding1;
-	__u64			lsc_padding2;
-	__u64			lsc_padding3;
-	struct llog_rec_tail	lsc_tail;
-} __packed;
-
-/* changelog llog name, needed by client replicators */
-#define CHANGELOG_CATALOG "changelog_catalog"
-
-struct changelog_setinfo {
-	__u64 cs_recno;
-	__u32 cs_id;
-} __packed;
-
-/** changelog record */
-struct llog_changelog_rec {
-	struct llog_rec_hdr	cr_hdr;
-	struct changelog_rec	cr;		/**< Variable length field */
-	struct llog_rec_tail	cr_do_not_use;	/**< for_sizezof_only */
-} __packed;
-
-struct llog_changelog_user_rec {
-	struct llog_rec_hdr   cur_hdr;
-	__u32		 cur_id;
-	__u32		 cur_padding;
-	__u64		 cur_endrec;
-	struct llog_rec_tail  cur_tail;
-} __packed;
-
-enum agent_req_status {
-	ARS_WAITING,
-	ARS_STARTED,
-	ARS_FAILED,
-	ARS_CANCELED,
-	ARS_SUCCEED,
-};
-
-static inline const char *agent_req_status2name(const enum agent_req_status ars)
-{
-	switch (ars) {
-	case ARS_WAITING:
-		return "WAITING";
-	case ARS_STARTED:
-		return "STARTED";
-	case ARS_FAILED:
-		return "FAILED";
-	case ARS_CANCELED:
-		return "CANCELED";
-	case ARS_SUCCEED:
-		return "SUCCEED";
-	default:
-		return "UNKNOWN";
-	}
-}
-
-struct llog_agent_req_rec {
-	struct llog_rec_hdr	arr_hdr;	/**< record header */
-	__u32			arr_status;	/**< status of the request */
-						/* must match enum
-						 * agent_req_status
-						 */
-	__u32			arr_archive_id;	/**< backend archive number */
-	__u64			arr_flags;	/**< req flags */
-	__u64			arr_compound_id;/**< compound cookie */
-	__u64			arr_req_create;	/**< req. creation time */
-	__u64			arr_req_change;	/**< req. status change time */
-	struct hsm_action_item	arr_hai;	/**< req. to the agent */
-	struct llog_rec_tail	arr_tail;   /**< record tail for_sizezof_only */
-} __packed;
-
-/* Old llog gen for compatibility */
-struct llog_gen {
-	__u64 mnt_cnt;
-	__u64 conn_cnt;
-} __packed;
-
-struct llog_gen_rec {
-	struct llog_rec_hdr	lgr_hdr;
-	struct llog_gen		lgr_gen;
-	__u64			padding1;
-	__u64			padding2;
-	__u64			padding3;
-	struct llog_rec_tail	lgr_tail;
-};
-
-/* flags for the logs */
-enum llog_flag {
-	LLOG_F_ZAP_WHEN_EMPTY	= 0x1,
-	LLOG_F_IS_CAT		= 0x2,
-	LLOG_F_IS_PLAIN		= 0x4,
-	LLOG_F_EXT_JOBID        = 0x8,
-	LLOG_F_IS_FIXSIZE	= 0x10,
-
-	/*
-	 * Note: Flags covered by LLOG_F_EXT_MASK will be inherited from
-	 * catlog to plain log, so do not add LLOG_F_IS_FIXSIZE here,
-	 * because the catlog record is usually fixed size, but its plain
-	 * log record can be variable
-	 */
-	LLOG_F_EXT_MASK = LLOG_F_EXT_JOBID,
-};
-
-/* On-disk header structure of each log object, stored in little endian order */
-#define LLOG_MIN_CHUNK_SIZE	8192
-#define LLOG_HEADER_SIZE	(96)	/* sizeof (llog_log_hdr) +
-					 * sizeof(llh_tail) - sizeof(llh_bitmap)
-					 */
-#define LLOG_BITMAP_BYTES	(LLOG_MIN_CHUNK_SIZE - LLOG_HEADER_SIZE)
-#define LLOG_MIN_REC_SIZE	(24)	/* round(llog_rec_hdr + llog_rec_tail) */
-
-/* flags for the logs */
-struct llog_log_hdr {
-	struct llog_rec_hdr     llh_hdr;
-	__s64		   llh_timestamp;
-	__u32		   llh_count;
-	__u32		   llh_bitmap_offset;
-	__u32		   llh_size;
-	__u32		   llh_flags;
-	__u32		   llh_cat_idx;
-	/* for a catalog the first plain slot is next to it */
-	struct obd_uuid	 llh_tgtuuid;
-	__u32		   llh_reserved[LLOG_HEADER_SIZE / sizeof(__u32) - 23];
-	/* These fields must always be at the end of the llog_log_hdr.
-	 * Note: llh_bitmap size is variable because llog chunk size could be
-	 * bigger than LLOG_MIN_CHUNK_SIZE, i.e. sizeof(llog_log_hdr) > 8192
-	 * bytes, and the real size is stored in llh_hdr.lrh_len, which means
-	 * llh_tail should only be referred by LLOG_HDR_TAIL().
-	 * But this structure is also used by client/server llog interface
-	 * (see llog_client.c), it will be kept in its original way to avoid
-	 * compatibility issue.
-	 */
-	__u32		   llh_bitmap[LLOG_BITMAP_BYTES / sizeof(__u32)];
-	struct llog_rec_tail    llh_tail;
-} __packed;
-
-#undef LLOG_HEADER_SIZE
-#undef LLOG_BITMAP_BYTES
-
-#define LLOG_HDR_BITMAP_SIZE(llh) (__u32)((llh->llh_hdr.lrh_len -	\
-					   llh->llh_bitmap_offset -	\
-					   sizeof(llh->llh_tail)) * 8)
-#define LLOG_HDR_BITMAP(llh)	(__u32 *)((char *)(llh) +		\
-					  (llh)->llh_bitmap_offset)
-#define LLOG_HDR_TAIL(llh)	((struct llog_rec_tail *)((char *)llh + \
-							 llh->llh_hdr.lrh_len - \
-							 sizeof(llh->llh_tail)))
-
-/** log cookies are used to reference a specific log file and a record
- * therein
- */
-struct llog_cookie {
-	struct llog_logid       lgc_lgl;
-	__u32		   lgc_subsys;
-	__u32		   lgc_index;
-	__u32		   lgc_padding;
-} __packed;
-
-/** llog protocol */
-enum llogd_rpc_ops {
-	LLOG_ORIGIN_HANDLE_CREATE       = 501,
-	LLOG_ORIGIN_HANDLE_NEXT_BLOCK   = 502,
-	LLOG_ORIGIN_HANDLE_READ_HEADER  = 503,
-	LLOG_ORIGIN_HANDLE_WRITE_REC    = 504,
-	LLOG_ORIGIN_HANDLE_CLOSE	= 505,
-	LLOG_ORIGIN_CONNECT		= 506,
-	LLOG_CATINFO			= 507,  /* deprecated */
-	LLOG_ORIGIN_HANDLE_PREV_BLOCK   = 508,
-	LLOG_ORIGIN_HANDLE_DESTROY      = 509,  /* for destroy llog object*/
-	LLOG_LAST_OPC,
-	LLOG_FIRST_OPC		  = LLOG_ORIGIN_HANDLE_CREATE
-};
-
-struct llogd_body {
-	struct llog_logid  lgd_logid;
-	__u32 lgd_ctxt_idx;
-	__u32 lgd_llh_flags;
-	__u32 lgd_index;
-	__u32 lgd_saved_index;
-	__u32 lgd_len;
-	__u64 lgd_cur_offset;
-} __packed;
-
-struct llogd_conn_body {
-	struct llog_gen	 lgdc_gen;
-	struct llog_logid       lgdc_logid;
-	__u32		   lgdc_ctxt_idx;
-} __packed;
-
-/* Note: 64-bit types are 64-bit aligned in structure */
-struct obdo {
-	__u64		o_valid;	/* hot fields in this obdo */
-	struct ost_id	o_oi;
-	__u64		o_parent_seq;
-	__u64		o_size;	 /* o_size-o_blocks == ost_lvb */
-	__s64		o_mtime;
-	__s64		o_atime;
-	__s64		o_ctime;
-	__u64		o_blocks;       /* brw: cli sent cached bytes */
-	__u64		o_grant;
-
-	/* 32-bit fields start here: keep an even number of them via padding */
-	__u32		o_blksize;      /* optimal IO blocksize */
-	__u32		o_mode;	 /* brw: cli sent cache remain */
-	__u32		o_uid;
-	__u32		o_gid;
-	__u32		o_flags;
-	__u32		o_nlink;	/* brw: checksum */
-	__u32		o_parent_oid;
-	__u32		o_misc;		/* brw: o_dropped */
-
-	__u64		   o_ioepoch;      /* epoch in ost writes */
-	__u32		   o_stripe_idx;   /* holds stripe idx */
-	__u32		   o_parent_ver;
-	struct lustre_handle    o_handle;  /* brw: lock handle to prolong locks
-					    */
-	struct llog_cookie      o_lcookie; /* destroy: unlink cookie from MDS,
-					    * obsolete in 2.8, reused in OSP
-					    */
-	__u32			o_uid_h;
-	__u32			o_gid_h;
-
-	__u64			o_data_version; /* getattr: sum of iversion for
-						 * each stripe.
-						 * brw: grant space consumed on
-						 * the client for the write
-						 */
-	__u64			o_padding_4;
-	__u64			o_padding_5;
-	__u64			o_padding_6;
-};
-
-#define o_dirty   o_blocks
-#define o_undirty o_mode
-#define o_dropped o_misc
-#define o_cksum   o_nlink
-#define o_grant_used o_data_version
-
-/* request structure for OST's */
-struct ost_body {
-	struct  obdo oa;
-};
-
-/* Key for FIEMAP to be used in get_info calls */
-struct ll_fiemap_info_key {
-	char		lfik_name[8];
-	struct obdo	lfik_oa;
-	struct fiemap	lfik_fiemap;
-};
-
-/* security opcodes */
-enum sec_cmd {
-	SEC_CTX_INIT	    = 801,
-	SEC_CTX_INIT_CONT       = 802,
-	SEC_CTX_FINI	    = 803,
-	SEC_LAST_OPC,
-	SEC_FIRST_OPC	   = SEC_CTX_INIT
-};
-
-/*
- * capa related definitions
- */
-#define CAPA_HMAC_MAX_LEN       64
-#define CAPA_HMAC_KEY_MAX_LEN   56
-
-/* NB take care when changing the sequence of elements this struct,
- * because the offset info is used in find_capa()
- */
-struct lustre_capa {
-	struct lu_fid   lc_fid;	 /** fid */
-	__u64	   lc_opc;	 /** operations allowed */
-	__u64	   lc_uid;	 /** file owner */
-	__u64	   lc_gid;	 /** file group */
-	__u32	   lc_flags;       /** HMAC algorithm & flags */
-	__u32	   lc_keyid;       /** key# used for the capability */
-	__u32	   lc_timeout;     /** capa timeout value (sec) */
-/* FIXME: y2038 time_t overflow: */
-	__u32	   lc_expiry;      /** expiry time (sec) */
-	__u8	    lc_hmac[CAPA_HMAC_MAX_LEN];   /** HMAC */
-} __packed;
-
-/** lustre_capa::lc_opc */
-enum {
-	CAPA_OPC_BODY_WRITE   = 1 << 0,  /**< write object data */
-	CAPA_OPC_BODY_READ    = 1 << 1,  /**< read object data */
-	CAPA_OPC_INDEX_LOOKUP = 1 << 2,  /**< lookup object fid */
-	CAPA_OPC_INDEX_INSERT = 1 << 3,  /**< insert object fid */
-	CAPA_OPC_INDEX_DELETE = 1 << 4,  /**< delete object fid */
-	CAPA_OPC_OSS_WRITE    = 1 << 5,  /**< write oss object data */
-	CAPA_OPC_OSS_READ     = 1 << 6,  /**< read oss object data */
-	CAPA_OPC_OSS_TRUNC    = 1 << 7,  /**< truncate oss object */
-	CAPA_OPC_OSS_DESTROY  = 1 << 8,  /**< destroy oss object */
-	CAPA_OPC_META_WRITE   = 1 << 9,  /**< write object meta data */
-	CAPA_OPC_META_READ    = 1 << 10, /**< read object meta data */
-};
-
-#define CAPA_OPC_OSS_RW (CAPA_OPC_OSS_READ | CAPA_OPC_OSS_WRITE)
-#define CAPA_OPC_MDS_ONLY						   \
-	(CAPA_OPC_BODY_WRITE | CAPA_OPC_BODY_READ | CAPA_OPC_INDEX_LOOKUP | \
-	 CAPA_OPC_INDEX_INSERT | CAPA_OPC_INDEX_DELETE)
-#define CAPA_OPC_OSS_ONLY						   \
-	(CAPA_OPC_OSS_WRITE | CAPA_OPC_OSS_READ | CAPA_OPC_OSS_TRUNC |      \
-	 CAPA_OPC_OSS_DESTROY)
-#define CAPA_OPC_MDS_DEFAULT ~CAPA_OPC_OSS_ONLY
-#define CAPA_OPC_OSS_DEFAULT ~(CAPA_OPC_MDS_ONLY | CAPA_OPC_OSS_ONLY)
-
-struct lustre_capa_key {
-	__u64   lk_seq;       /**< mds# */
-	__u32   lk_keyid;     /**< key# */
-	__u32   lk_padding;
-	__u8    lk_key[CAPA_HMAC_KEY_MAX_LEN];    /**< key */
-} __packed;
-
-/** The link ea holds 1 \a link_ea_entry for each hardlink */
-#define LINK_EA_MAGIC 0x11EAF1DFUL
-struct link_ea_header {
-	__u32 leh_magic;
-	__u32 leh_reccount;
-	__u64 leh_len;      /* total size */
-	__u32 leh_overflow_time;
-	__u32 leh_padding;
-};
-
-/** Hardlink data is name and parent fid.
- * Stored in this crazy struct for maximum packing and endian-neutrality
- */
-struct link_ea_entry {
-	/** __u16 stored big-endian, unaligned */
-	unsigned char      lee_reclen[2];
-	unsigned char      lee_parent_fid[sizeof(struct lu_fid)];
-	char	       lee_name[0];
-} __packed;
-
-/** fid2path request/reply structure */
-struct getinfo_fid2path {
-	struct lu_fid   gf_fid;
-	__u64	   gf_recno;
-	__u32	   gf_linkno;
-	__u32	   gf_pathlen;
-	char	    gf_path[0];
-} __packed;
-
-/** path2parent request/reply structures */
-struct getparent {
-	struct lu_fid	gp_fid;		/**< parent FID */
-	__u32		gp_linkno;	/**< hardlink number */
-	__u32		gp_name_size;	/**< size of the name field */
-	char		gp_name[0];	/**< zero-terminated link name */
-} __packed;
-
-enum {
-	LAYOUT_INTENT_ACCESS    = 0,
-	LAYOUT_INTENT_READ      = 1,
-	LAYOUT_INTENT_WRITE     = 2,
-	LAYOUT_INTENT_GLIMPSE   = 3,
-	LAYOUT_INTENT_TRUNC     = 4,
-	LAYOUT_INTENT_RELEASE   = 5,
-	LAYOUT_INTENT_RESTORE   = 6
-};
-
-/* enqueue layout lock with intent */
-struct layout_intent {
-	__u32 li_opc; /* intent operation for enqueue, read, write etc */
-	__u32 li_flags;
-	__u64 li_start;
-	__u64 li_end;
-};
-
-/**
- * On the wire version of hsm_progress structure.
- *
- * Contains the userspace hsm_progress and some internal fields.
- */
-struct hsm_progress_kernel {
-	/* Field taken from struct hsm_progress */
-	struct lu_fid		hpk_fid;
-	__u64			hpk_cookie;
-	struct hsm_extent	hpk_extent;
-	__u16			hpk_flags;
-	__u16			hpk_errval; /* positive val */
-	__u32			hpk_padding1;
-	/* Additional fields */
-	__u64			hpk_data_version;
-	__u64			hpk_padding2;
-} __packed;
-
-/** layout swap request structure
- * fid1 and fid2 are in mdt_body
- */
-struct mdc_swap_layouts {
-	__u64	   msl_flags;
-} __packed;
-
-struct close_data {
-	struct lustre_handle	cd_handle;
-	struct lu_fid		cd_fid;
-	__u64			cd_data_version;
-	__u64			cd_reserved[8];
-};
-
-#endif
-/** @} lustreidl */

+ 0 - 229
drivers/staging/lustre/include/uapi/linux/lustre/lustre_ioctl.h

@@ -1,229 +0,0 @@
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-#ifndef _UAPI_LUSTRE_IOCTL_H_
-#define _UAPI_LUSTRE_IOCTL_H_
-
-#include <linux/ioctl.h>
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <uapi/linux/lustre/lustre_idl.h>
-
-#if !defined(__KERNEL__) && !defined(LUSTRE_UTILS)
-# error This file is for Lustre internal use only.
-#endif
-
-enum md_echo_cmd {
-	ECHO_MD_CREATE		= 1, /* Open/Create file on MDT */
-	ECHO_MD_MKDIR		= 2, /* Mkdir on MDT */
-	ECHO_MD_DESTROY		= 3, /* Unlink file on MDT */
-	ECHO_MD_RMDIR		= 4, /* Rmdir on MDT */
-	ECHO_MD_LOOKUP		= 5, /* Lookup on MDT */
-	ECHO_MD_GETATTR		= 6, /* Getattr on MDT */
-	ECHO_MD_SETATTR		= 7, /* Setattr on MDT */
-	ECHO_MD_ALLOC_FID	= 8, /* Get FIDs from MDT */
-};
-
-#define OBD_DEV_ID 1
-#define OBD_DEV_NAME "obd"
-#define OBD_DEV_PATH "/dev/" OBD_DEV_NAME
-
-#define OBD_IOCTL_VERSION	0x00010004
-#define OBD_DEV_BY_DEVNAME	0xffffd0de
-
-struct obd_ioctl_data {
-	__u32		ioc_len;
-	__u32		ioc_version;
-
-	union {
-		__u64	ioc_cookie;
-		__u64	ioc_u64_1;
-	};
-	union {
-		__u32	ioc_conn1;
-		__u32	ioc_u32_1;
-	};
-	union {
-		__u32	ioc_conn2;
-		__u32	ioc_u32_2;
-	};
-
-	struct obdo	ioc_obdo1;
-	struct obdo	ioc_obdo2;
-
-	__u64		ioc_count;
-	__u64		ioc_offset;
-	__u32		ioc_dev;
-	__u32		ioc_command;
-
-	__u64		ioc_nid;
-	__u32		ioc_nal;
-	__u32		ioc_type;
-
-	/* buffers the kernel will treat as user pointers */
-	__u32		ioc_plen1;
-	char __user    *ioc_pbuf1;
-	__u32		ioc_plen2;
-	char __user    *ioc_pbuf2;
-
-	/* inline buffers for various arguments */
-	__u32		ioc_inllen1;
-	char	       *ioc_inlbuf1;
-	__u32		ioc_inllen2;
-	char	       *ioc_inlbuf2;
-	__u32		ioc_inllen3;
-	char	       *ioc_inlbuf3;
-	__u32		ioc_inllen4;
-	char	       *ioc_inlbuf4;
-
-	char		ioc_bulk[0];
-};
-
-struct obd_ioctl_hdr {
-	__u32		ioc_len;
-	__u32		ioc_version;
-};
-
-static inline __u32 obd_ioctl_packlen(struct obd_ioctl_data *data)
-{
-	__u32 len = __ALIGN_KERNEL(sizeof(*data), 8);
-
-	len += __ALIGN_KERNEL(data->ioc_inllen1, 8);
-	len += __ALIGN_KERNEL(data->ioc_inllen2, 8);
-	len += __ALIGN_KERNEL(data->ioc_inllen3, 8);
-	len += __ALIGN_KERNEL(data->ioc_inllen4, 8);
-
-	return len;
-}
-
-/*
- * OBD_IOC_DATA_TYPE is only for compatibility reasons with older
- * Linux Lustre user tools. New ioctls should NOT use this macro as
- * the ioctl "size". Instead the ioctl should get a "size" argument
- * which is the actual data type used by the ioctl, to ensure the
- * ioctl interface is versioned correctly.
- */
-#define OBD_IOC_DATA_TYPE	long
-
-/*	IOC_LDLM_TEST		_IOWR('f', 40, long) */
-/*	IOC_LDLM_DUMP		_IOWR('f', 41, long) */
-/*	IOC_LDLM_REGRESS_START	_IOWR('f', 42, long) */
-/*	IOC_LDLM_REGRESS_STOP	_IOWR('f', 43, long) */
-
-#define OBD_IOC_CREATE		_IOWR('f', 101, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_DESTROY		_IOW('f', 104, OBD_IOC_DATA_TYPE)
-/*	OBD_IOC_PREALLOCATE	_IOWR('f', 105, OBD_IOC_DATA_TYPE) */
-
-#define OBD_IOC_SETATTR		_IOW('f', 107, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_GETATTR		_IOWR('f', 108, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_READ		_IOWR('f', 109, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_WRITE		_IOWR('f', 110, OBD_IOC_DATA_TYPE)
-
-#define OBD_IOC_STATFS		_IOWR('f', 113, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_SYNC		_IOW('f', 114, OBD_IOC_DATA_TYPE)
-/*	OBD_IOC_READ2		_IOWR('f', 115, OBD_IOC_DATA_TYPE) */
-/*	OBD_IOC_FORMAT		_IOWR('f', 116, OBD_IOC_DATA_TYPE) */
-/*	OBD_IOC_PARTITION	_IOWR('f', 117, OBD_IOC_DATA_TYPE) */
-/*	OBD_IOC_COPY		_IOWR('f', 120, OBD_IOC_DATA_TYPE) */
-/*	OBD_IOC_MIGR		_IOWR('f', 121, OBD_IOC_DATA_TYPE) */
-/*	OBD_IOC_PUNCH		_IOWR('f', 122, OBD_IOC_DATA_TYPE) */
-
-/*	OBD_IOC_MODULE_DEBUG	_IOWR('f', 124, OBD_IOC_DATA_TYPE) */
-#define OBD_IOC_BRW_READ	_IOWR('f', 125, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_BRW_WRITE	_IOWR('f', 126, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_NAME2DEV	_IOWR('f', 127, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_UUID2DEV	_IOWR('f', 130, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_GETNAME		_IOWR('f', 131, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_GETMDNAME	_IOR('f', 131, char[MAX_OBD_NAME])
-#define OBD_IOC_GETDTNAME	OBD_IOC_GETNAME
-#define OBD_IOC_LOV_GET_CONFIG	_IOWR('f', 132, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_CLIENT_RECOVER	_IOW('f', 133, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_PING_TARGET	_IOW('f', 136, OBD_IOC_DATA_TYPE)
-
-/*	OBD_IOC_DEC_FS_USE_COUNT _IO('f', 139) */
-#define OBD_IOC_NO_TRANSNO	_IOW('f', 140, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_SET_READONLY	_IOW('f', 141, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_ABORT_RECOVERY	_IOR('f', 142, OBD_IOC_DATA_TYPE)
-/*	OBD_IOC_ROOT_SQUASH	_IOWR('f', 143, OBD_IOC_DATA_TYPE) */
-#define OBD_GET_VERSION		_IOWR('f', 144, OBD_IOC_DATA_TYPE)
-/*	OBD_IOC_GSS_SUPPORT	_IOWR('f', 145, OBD_IOC_DATA_TYPE) */
-/*	OBD_IOC_CLOSE_UUID	_IOWR('f', 147, OBD_IOC_DATA_TYPE) */
-#define OBD_IOC_CHANGELOG_SEND	_IOW('f', 148, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_GETDEVICE	_IOWR('f', 149, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_FID2PATH	_IOWR('f', 150, OBD_IOC_DATA_TYPE)
-/*	lustre/lustre_user.h	151-153 */
-/*	OBD_IOC_LOV_SETSTRIPE	154 LL_IOC_LOV_SETSTRIPE */
-/*	OBD_IOC_LOV_GETSTRIPE	155 LL_IOC_LOV_GETSTRIPE */
-/*	OBD_IOC_LOV_SETEA	156 LL_IOC_LOV_SETEA */
-/*	lustre/lustre_user.h	157-159 */
-/*	OBD_IOC_QUOTACHECK	_IOW('f', 160, int) */
-/*	OBD_IOC_POLL_QUOTACHECK	_IOR('f', 161, struct if_quotacheck *) */
-#define OBD_IOC_QUOTACTL	_IOWR('f', 162, struct if_quotactl)
-/*	lustre/lustre_user.h	163-176 */
-#define OBD_IOC_CHANGELOG_REG	_IOW('f', 177, struct obd_ioctl_data)
-#define OBD_IOC_CHANGELOG_DEREG	_IOW('f', 178, struct obd_ioctl_data)
-#define OBD_IOC_CHANGELOG_CLEAR	_IOW('f', 179, struct obd_ioctl_data)
-/*	OBD_IOC_RECORD		_IOWR('f', 180, OBD_IOC_DATA_TYPE) */
-/*	OBD_IOC_ENDRECORD	_IOWR('f', 181, OBD_IOC_DATA_TYPE) */
-/*	OBD_IOC_PARSE		_IOWR('f', 182, OBD_IOC_DATA_TYPE) */
-/*	OBD_IOC_DORECORD	_IOWR('f', 183, OBD_IOC_DATA_TYPE) */
-#define OBD_IOC_PROCESS_CFG	_IOWR('f', 184, OBD_IOC_DATA_TYPE)
-/*	OBD_IOC_DUMP_LOG	_IOWR('f', 185, OBD_IOC_DATA_TYPE) */
-/*	OBD_IOC_CLEAR_LOG	_IOWR('f', 186, OBD_IOC_DATA_TYPE) */
-#define OBD_IOC_PARAM		_IOW('f', 187, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_POOL		_IOWR('f', 188, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_REPLACE_NIDS	_IOWR('f', 189, OBD_IOC_DATA_TYPE)
-
-#define OBD_IOC_CATLOGLIST	_IOWR('f', 190, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_LLOG_INFO	_IOWR('f', 191, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_LLOG_PRINT	_IOWR('f', 192, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_LLOG_CANCEL	_IOWR('f', 193, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_LLOG_REMOVE	_IOWR('f', 194, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_LLOG_CHECK	_IOWR('f', 195, OBD_IOC_DATA_TYPE)
-/*	OBD_IOC_LLOG_CATINFO	_IOWR('f', 196, OBD_IOC_DATA_TYPE) */
-#define OBD_IOC_NODEMAP		_IOWR('f', 197, OBD_IOC_DATA_TYPE)
-
-/*	ECHO_IOC_GET_STRIPE	_IOWR('f', 200, OBD_IOC_DATA_TYPE) */
-/*	ECHO_IOC_SET_STRIPE	_IOWR('f', 201, OBD_IOC_DATA_TYPE) */
-/*	ECHO_IOC_ENQUEUE	_IOWR('f', 202, OBD_IOC_DATA_TYPE) */
-/*	ECHO_IOC_CANCEL		_IOWR('f', 203, OBD_IOC_DATA_TYPE) */
-
-#define OBD_IOC_GET_OBJ_VERSION	_IOR('f', 210, OBD_IOC_DATA_TYPE)
-
-/*	lustre/lustre_user.h	212-217 */
-#define OBD_IOC_GET_MNTOPT	_IOW('f', 220, mntopt_t)
-#define OBD_IOC_ECHO_MD		_IOR('f', 221, struct obd_ioctl_data)
-#define OBD_IOC_ECHO_ALLOC_SEQ	_IOWR('f', 222, struct obd_ioctl_data)
-#define OBD_IOC_START_LFSCK	_IOWR('f', 230, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_STOP_LFSCK	_IOW('f', 231, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_QUERY_LFSCK	_IOR('f', 232, struct obd_ioctl_data)
-/*	lustre/lustre_user.h	240-249 */
-/*	LIBCFS_IOC_DEBUG_MASK	250 */
-
-#define IOC_OSC_SET_ACTIVE	_IOWR('h', 21, void *)
-
-#endif /* _UAPI_LUSTRE_IOCTL_H_ */

+ 0 - 94
drivers/staging/lustre/include/uapi/linux/lustre/lustre_kernelcomm.h

@@ -1,94 +0,0 @@
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2013, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- *
- * Author: Nathan Rutman <nathan.rutman@sun.com>
- *
- * Kernel <-> userspace communication routines.
- * The definitions below are used in the kernel and userspace.
- */
-
-#ifndef __UAPI_LUSTRE_KERNELCOMM_H__
-#define __UAPI_LUSTRE_KERNELCOMM_H__
-
-#include <linux/types.h>
-
-/* KUC message header.
- * All current and future KUC messages should use this header.
- * To avoid having to include Lustre headers from libcfs, define this here.
- */
-struct kuc_hdr {
-	__u16 kuc_magic;
-	/* Each new Lustre feature should use a different transport */
-	__u8  kuc_transport;
-	__u8  kuc_flags;
-	/* Message type or opcode, transport-specific */
-	__u16 kuc_msgtype;
-	/* Including header */
-	__u16 kuc_msglen;
-} __aligned(sizeof(__u64));
-
-#define KUC_CHANGELOG_MSG_MAXSIZE (sizeof(struct kuc_hdr) + CR_MAXSIZE)
-
-#define KUC_MAGIC		0x191C /*Lustre9etLinC */
-
-/* kuc_msgtype values are defined in each transport */
-enum kuc_transport_type {
-	KUC_TRANSPORT_GENERIC	= 1,
-	KUC_TRANSPORT_HSM	= 2,
-	KUC_TRANSPORT_CHANGELOG	= 3,
-};
-
-enum kuc_generic_message_type {
-	KUC_MSG_SHUTDOWN	= 1,
-};
-
-/* KUC Broadcast Groups. This determines which userspace process hears which
- * messages.  Mutliple transports may be used within a group, or multiple
- * groups may use the same transport.  Broadcast
- * groups need not be used if e.g. a UID is specified instead;
- * use group 0 to signify unicast.
- */
-#define KUC_GRP_HSM	0x02
-#define KUC_GRP_MAX	KUC_GRP_HSM
-
-#define LK_FLG_STOP 0x01
-#define LK_NOFD -1U
-
-/* kernelcomm control structure, passed from userspace to kernel */
-struct lustre_kernelcomm {
-	__u32 lk_wfd;
-	__u32 lk_rfd;
-	__u32 lk_uid;
-	__u32 lk_group;
-	__u32 lk_data;
-	__u32 lk_flags;
-} __packed;
-
-#endif	/* __UAPI_LUSTRE_KERNELCOMM_H__ */

+ 0 - 236
drivers/staging/lustre/include/uapi/linux/lustre/lustre_ostid.h

@@ -1,236 +0,0 @@
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2014, Intel Corporation.
- *
- * Copyright 2015 Cray Inc, all rights reserved.
- * Author: Ben Evans.
- *
- * Define ost_id  associated functions
- */
-
-#ifndef _UAPI_LUSTRE_OSTID_H_
-#define _UAPI_LUSTRE_OSTID_H_
-
-#include <linux/errno.h>
-#include <uapi/linux/lustre/lustre_fid.h>
-
-static inline __u64 lmm_oi_id(const struct ost_id *oi)
-{
-	return oi->oi.oi_id;
-}
-
-static inline __u64 lmm_oi_seq(const struct ost_id *oi)
-{
-	return oi->oi.oi_seq;
-}
-
-static inline void lmm_oi_set_seq(struct ost_id *oi, __u64 seq)
-{
-	oi->oi.oi_seq = seq;
-}
-
-static inline void lmm_oi_set_id(struct ost_id *oi, __u64 oid)
-{
-	oi->oi.oi_id = oid;
-}
-
-static inline void lmm_oi_le_to_cpu(struct ost_id *dst_oi,
-				    const struct ost_id *src_oi)
-{
-	dst_oi->oi.oi_id = __le64_to_cpu(src_oi->oi.oi_id);
-	dst_oi->oi.oi_seq = __le64_to_cpu(src_oi->oi.oi_seq);
-}
-
-static inline void lmm_oi_cpu_to_le(struct ost_id *dst_oi,
-				    const struct ost_id *src_oi)
-{
-	dst_oi->oi.oi_id = __cpu_to_le64(src_oi->oi.oi_id);
-	dst_oi->oi.oi_seq = __cpu_to_le64(src_oi->oi.oi_seq);
-}
-
-/* extract OST sequence (group) from a wire ost_id (id/seq) pair */
-static inline __u64 ostid_seq(const struct ost_id *ostid)
-{
-	if (fid_seq_is_mdt0(ostid->oi.oi_seq))
-		return FID_SEQ_OST_MDT0;
-
-	if (fid_seq_is_default(ostid->oi.oi_seq))
-		return FID_SEQ_LOV_DEFAULT;
-
-	if (fid_is_idif(&ostid->oi_fid))
-		return FID_SEQ_OST_MDT0;
-
-	return fid_seq(&ostid->oi_fid);
-}
-
-/* extract OST objid from a wire ost_id (id/seq) pair */
-static inline __u64 ostid_id(const struct ost_id *ostid)
-{
-	if (fid_seq_is_mdt0(ostid->oi.oi_seq))
-		return ostid->oi.oi_id & IDIF_OID_MASK;
-
-	if (fid_seq_is_default(ostid->oi.oi_seq))
-		return ostid->oi.oi_id;
-
-	if (fid_is_idif(&ostid->oi_fid))
-		return fid_idif_id(fid_seq(&ostid->oi_fid),
-				   fid_oid(&ostid->oi_fid), 0);
-
-	return fid_oid(&ostid->oi_fid);
-}
-
-static inline void ostid_set_seq(struct ost_id *oi, __u64 seq)
-{
-	if (fid_seq_is_mdt0(seq) || fid_seq_is_default(seq)) {
-		oi->oi.oi_seq = seq;
-	} else {
-		oi->oi_fid.f_seq = seq;
-		/*
-		 * Note: if f_oid + f_ver is zero, we need init it
-		 * to be 1, otherwise, ostid_seq will treat this
-		 * as old ostid (oi_seq == 0)
-		 */
-		if (!oi->oi_fid.f_oid && !oi->oi_fid.f_ver)
-			oi->oi_fid.f_oid = LUSTRE_FID_INIT_OID;
-	}
-}
-
-static inline void ostid_set_seq_mdt0(struct ost_id *oi)
-{
-	ostid_set_seq(oi, FID_SEQ_OST_MDT0);
-}
-
-static inline void ostid_set_seq_echo(struct ost_id *oi)
-{
-	ostid_set_seq(oi, FID_SEQ_ECHO);
-}
-
-static inline void ostid_set_seq_llog(struct ost_id *oi)
-{
-	ostid_set_seq(oi, FID_SEQ_LLOG);
-}
-
-static inline void ostid_cpu_to_le(const struct ost_id *src_oi,
-				   struct ost_id *dst_oi)
-{
-	if (fid_seq_is_mdt0(src_oi->oi.oi_seq)) {
-		dst_oi->oi.oi_id = __cpu_to_le64(src_oi->oi.oi_id);
-		dst_oi->oi.oi_seq = __cpu_to_le64(src_oi->oi.oi_seq);
-	} else {
-		fid_cpu_to_le(&dst_oi->oi_fid, &src_oi->oi_fid);
-	}
-}
-
-static inline void ostid_le_to_cpu(const struct ost_id *src_oi,
-				   struct ost_id *dst_oi)
-{
-	if (fid_seq_is_mdt0(src_oi->oi.oi_seq)) {
-		dst_oi->oi.oi_id = __le64_to_cpu(src_oi->oi.oi_id);
-		dst_oi->oi.oi_seq = __le64_to_cpu(src_oi->oi.oi_seq);
-	} else {
-		fid_le_to_cpu(&dst_oi->oi_fid, &src_oi->oi_fid);
-	}
-}
-
-/**
- * Sigh, because pre-2.4 uses
- * struct lov_mds_md_v1 {
- *	........
- *	__u64 lmm_object_id;
- *	__u64 lmm_object_seq;
- *      ......
- *      }
- * to identify the LOV(MDT) object, and lmm_object_seq will
- * be normal_fid, which make it hard to combine these conversion
- * to ostid_to FID. so we will do lmm_oi/fid conversion separately
- *
- * We can tell the lmm_oi by this way,
- * 1.8: lmm_object_id = {inode}, lmm_object_gr = 0
- * 2.1: lmm_object_id = {oid < 128k}, lmm_object_seq = FID_SEQ_NORMAL
- * 2.4: lmm_oi.f_seq = FID_SEQ_NORMAL, lmm_oi.f_oid = {oid < 128k},
- *      lmm_oi.f_ver = 0
- *
- * But currently lmm_oi/lsm_oi does not have any "real" usages,
- * except for printing some information, and the user can always
- * get the real FID from LMA, besides this multiple case check might
- * make swab more complicate. So we will keep using id/seq for lmm_oi.
- */
-
-static inline void fid_to_lmm_oi(const struct lu_fid *fid,
-				 struct ost_id *oi)
-{
-	oi->oi.oi_id = fid_oid(fid);
-	oi->oi.oi_seq = fid_seq(fid);
-}
-
-/**
- * Unpack an OST object id/seq (group) into a FID.  This is needed for
- * converting all obdo, lmm, lsm, etc. 64-bit id/seq pairs into proper
- * FIDs.  Note that if an id/seq is already in FID/IDIF format it will
- * be passed through unchanged.  Only legacy OST objects in "group 0"
- * will be mapped into the IDIF namespace so that they can fit into the
- * struct lu_fid fields without loss.
- */
-static inline int ostid_to_fid(struct lu_fid *fid, const struct ost_id *ostid,
-			       __u32 ost_idx)
-{
-	__u64 seq = ostid_seq(ostid);
-
-	if (ost_idx > 0xffff)
-		return -EBADF;
-
-	if (fid_seq_is_mdt0(seq)) {
-		__u64 oid = ostid_id(ostid);
-
-		/* This is a "legacy" (old 1.x/2.early) OST object in "group 0"
-		 * that we map into the IDIF namespace.  It allows up to 2^48
-		 * objects per OST, as this is the object namespace that has
-		 * been in production for years.  This can handle create rates
-		 * of 1M objects/s/OST for 9 years, or combinations thereof.
-		 */
-		if (oid >= IDIF_MAX_OID)
-			return -EBADF;
-
-		fid->f_seq = fid_idif_seq(oid, ost_idx);
-		/* truncate to 32 bits by assignment */
-		fid->f_oid = oid;
-		/* in theory, not currently used */
-		fid->f_ver = oid >> 48;
-	} else if (!fid_seq_is_default(seq)) {
-		/* This is either an IDIF object, which identifies objects
-		 * across all OSTs, or a regular FID.  The IDIF namespace
-		 * maps legacy OST objects into the FID namespace.  In both
-		 * cases, we just pass the FID through, no conversion needed.
-		 */
-		if (ostid->oi_fid.f_ver)
-			return -EBADF;
-
-		*fid = ostid->oi_fid;
-	}
-
-	return 0;
-}
-#endif /* _UAPI_LUSTRE_OSTID_H_ */

+ 0 - 94
drivers/staging/lustre/include/uapi/linux/lustre/lustre_param.h

@@ -1,94 +0,0 @@
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * User-settable parameter keys
- *
- * Author: Nathan Rutman <nathan@clusterfs.com>
- */
-
-#ifndef _UAPI_LUSTRE_PARAM_H_
-#define _UAPI_LUSTRE_PARAM_H_
-
-/** \defgroup param param
- *
- * @{
- */
-
-/****************** User-settable parameter keys *********************/
-/* e.g.
- *	tunefs.lustre --param="failover.node=192.168.0.13@tcp0" /dev/sda
- *	lctl conf_param testfs-OST0000 failover.node=3@elan,192.168.0.3@tcp0
- *		    ... testfs-MDT0000.lov.stripesize=4M
- *		    ... testfs-OST0000.ost.client_cache_seconds=15
- *		    ... testfs.sys.timeout=<secs>
- *		    ... testfs.llite.max_read_ahead_mb=16
- */
-
-/* System global or special params not handled in obd's proc
- * See mgs_write_log_sys()
- */
-#define PARAM_TIMEOUT		"timeout="	   /* global */
-#define PARAM_LDLM_TIMEOUT	"ldlm_timeout="	   /* global */
-#define PARAM_AT_MIN		"at_min="	   /* global */
-#define PARAM_AT_MAX		"at_max="	   /* global */
-#define PARAM_AT_EXTRA		"at_extra="	   /* global */
-#define PARAM_AT_EARLY_MARGIN	"at_early_margin=" /* global */
-#define PARAM_AT_HISTORY	"at_history="	   /* global */
-#define PARAM_JOBID_VAR		"jobid_var="	   /* global */
-#define PARAM_MGSNODE		"mgsnode="	   /* only at mounttime */
-#define PARAM_FAILNODE		"failover.node="   /* add failover nid */
-#define PARAM_FAILMODE		"failover.mode="   /* initial mount only */
-#define PARAM_ACTIVE		"active="	   /* activate/deactivate */
-#define PARAM_NETWORK		"network="	   /* bind on nid */
-#define PARAM_ID_UPCALL		"identity_upcall=" /* identity upcall */
-
-/* Prefixes for parameters handled by obd's proc methods (XXX_process_config) */
-#define PARAM_OST		"ost."
-#define PARAM_OSD		"osd."
-#define PARAM_OSC		"osc."
-#define PARAM_MDT		"mdt."
-#define PARAM_HSM		"mdt.hsm."
-#define PARAM_MDD		"mdd."
-#define PARAM_MDC		"mdc."
-#define PARAM_LLITE		"llite."
-#define PARAM_LOV		"lov."
-#define PARAM_LOD		"lod."
-#define PARAM_OSP		"osp."
-#define PARAM_SYS		"sys."		/* global */
-#define PARAM_SRPC		"srpc."
-#define PARAM_SRPC_FLVR		"srpc.flavor."
-#define PARAM_SRPC_UDESC	"srpc.udesc.cli2mdt"
-#define PARAM_SEC		"security."
-#define PARAM_QUOTA		"quota."	/* global */
-
-/** @} param */
-
-#endif /* _UAPI_LUSTRE_PARAM_H_ */

+ 0 - 1327
drivers/staging/lustre/include/uapi/linux/lustre/lustre_user.h

@@ -1,1327 +0,0 @@
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2010, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/include/lustre/lustre_user.h
- *
- * Lustre public user-space interface definitions.
- */
-
-#ifndef _LUSTRE_USER_H
-#define _LUSTRE_USER_H
-
-/** \defgroup lustreuser lustreuser
- *
- * @{
- */
-
-#ifdef __KERNEL__
-# include <linux/fs.h>
-# include <linux/quota.h>
-# include <linux/sched/signal.h>
-# include <linux/string.h> /* snprintf() */
-# include <linux/version.h>
-#else /* !__KERNEL__ */
-# define NEED_QUOTA_DEFS
-# include <stdio.h> /* snprintf() */
-# include <string.h>
-# include <sys/quota.h>
-# include <sys/stat.h>
-#endif /* __KERNEL__ */
-#include <uapi/linux/lustre/lustre_fiemap.h>
-
-/*
- * We need to always use 64bit version because the structure
- * is shared across entire cluster where 32bit and 64bit machines
- * are co-existing.
- */
-#if __BITS_PER_LONG != 64 || defined(__ARCH_WANT_STAT64)
-typedef struct stat64   lstat_t;
-#define lstat_f  lstat64
-#define fstat_f		fstat64
-#define fstatat_f	fstatat64
-#else
-typedef struct stat     lstat_t;
-#define lstat_f  lstat
-#define fstat_f		fstat
-#define fstatat_f	fstatat
-#endif
-
-#define HAVE_LOV_USER_MDS_DATA
-
-#define LUSTRE_EOF 0xffffffffffffffffULL
-
-/* for statfs() */
-#define LL_SUPER_MAGIC 0x0BD00BD0
-
-#ifndef FSFILT_IOC_GETFLAGS
-#define FSFILT_IOC_GETFLAGS	       _IOR('f', 1, long)
-#define FSFILT_IOC_SETFLAGS	       _IOW('f', 2, long)
-#define FSFILT_IOC_GETVERSION	     _IOR('f', 3, long)
-#define FSFILT_IOC_SETVERSION	     _IOW('f', 4, long)
-#define FSFILT_IOC_GETVERSION_OLD	 _IOR('v', 1, long)
-#define FSFILT_IOC_SETVERSION_OLD	 _IOW('v', 2, long)
-#endif
-
-/* FIEMAP flags supported by Lustre */
-#define LUSTRE_FIEMAP_FLAGS_COMPAT (FIEMAP_FLAG_SYNC | FIEMAP_FLAG_DEVICE_ORDER)
-
-enum obd_statfs_state {
-	OS_STATE_DEGRADED       = 0x00000001, /**< RAID degraded/rebuilding */
-	OS_STATE_READONLY       = 0x00000002, /**< filesystem is read-only */
-	OS_STATE_RDONLY_1       = 0x00000004, /**< obsolete 1.6, was EROFS=30 */
-	OS_STATE_RDONLY_2       = 0x00000008, /**< obsolete 1.6, was EROFS=30 */
-	OS_STATE_RDONLY_3       = 0x00000010, /**< obsolete 1.6, was EROFS=30 */
-};
-
-struct obd_statfs {
-	__u64	   os_type;
-	__u64	   os_blocks;
-	__u64	   os_bfree;
-	__u64	   os_bavail;
-	__u64	   os_files;
-	__u64	   os_ffree;
-	__u8	    os_fsid[40];
-	__u32	   os_bsize;
-	__u32	   os_namelen;
-	__u64	   os_maxbytes;
-	__u32	   os_state;       /**< obd_statfs_state OS_STATE_* flag */
-	__u32	   os_fprecreated; /* objs available now to the caller */
-				   /* used in QoS code to find preferred OSTs */
-	__u32	   os_spare2;
-	__u32	   os_spare3;
-	__u32	   os_spare4;
-	__u32	   os_spare5;
-	__u32	   os_spare6;
-	__u32	   os_spare7;
-	__u32	   os_spare8;
-	__u32	   os_spare9;
-};
-
-/**
- * File IDentifier.
- *
- * FID is a cluster-wide unique identifier of a file or an object (stripe).
- * FIDs are never reused.
- **/
-struct lu_fid {
-       /**
-	* FID sequence. Sequence is a unit of migration: all files (objects)
-	* with FIDs from a given sequence are stored on the same server.
-	* Lustre should support 2^64 objects, so even if each sequence
-	* has only a single object we can still enumerate 2^64 objects.
-	**/
-	__u64 f_seq;
-	/* FID number within sequence. */
-	__u32 f_oid;
-	/**
-	 * FID version, used to distinguish different versions (in the sense
-	 * of snapshots, etc.) of the same file system object. Not currently
-	 * used.
-	 **/
-	__u32 f_ver;
-};
-
-static inline bool fid_is_zero(const struct lu_fid *fid)
-{
-	return !fid->f_seq && !fid->f_oid;
-}
-
-struct filter_fid {
-	struct lu_fid	ff_parent;  /* ff_parent.f_ver == file stripe number */
-};
-
-/* keep this one for compatibility */
-struct filter_fid_old {
-	struct lu_fid	ff_parent;
-	__u64		ff_objid;
-	__u64		ff_seq;
-};
-
-/* Userspace should treat lu_fid as opaque, and only use the following methods
- * to print or parse them.  Other functions (e.g. compare, swab) could be moved
- * here from lustre_idl.h if needed.
- */
-struct lu_fid;
-
-/**
- * Following struct for object attributes, that will be kept inode's EA.
- * Introduced in 2.0 release (please see b15993, for details)
- * Added to all objects since Lustre 2.4 as contains self FID
- */
-struct lustre_mdt_attrs {
-	/**
-	 * Bitfield for supported data in this structure. From enum lma_compat.
-	 * lma_self_fid and lma_flags are always available.
-	 */
-	__u32   lma_compat;
-	/**
-	 * Per-file incompat feature list. Lustre version should support all
-	 * flags set in this field. The supported feature mask is available in
-	 * LMA_INCOMPAT_SUPP.
-	 */
-	__u32   lma_incompat;
-	/** FID of this inode */
-	struct lu_fid  lma_self_fid;
-};
-
-/**
- * Prior to 2.4, the LMA structure also included SOM attributes which has since
- * been moved to a dedicated xattr
- * lma_flags was also removed because of lma_compat/incompat fields.
- */
-#define LMA_OLD_SIZE (sizeof(struct lustre_mdt_attrs) + 5 * sizeof(__u64))
-
-/**
- * OST object IDentifier.
- */
-struct ost_id {
-	union {
-		struct {
-			__u64	oi_id;
-			__u64	oi_seq;
-		} oi;
-		struct lu_fid oi_fid;
-	};
-};
-
-#define DOSTID "%#llx:%llu"
-#define POSTID(oi) ostid_seq(oi), ostid_id(oi)
-
-/*
- * The ioctl naming rules:
- * LL_*     - works on the currently opened filehandle instead of parent dir
- * *_OBD_*  - gets data for both OSC or MDC (LOV, LMV indirectly)
- * *_MDC_*  - gets/sets data related to MDC
- * *_LOV_*  - gets/sets data related to OSC/LOV
- * *FILE*   - called on parent dir and passes in a filename
- * *STRIPE* - set/get lov_user_md
- * *INFO    - set/get lov_user_mds_data
- */
-/*	lustre_ioctl.h			101-150 */
-#define LL_IOC_GETFLAGS		 _IOR('f', 151, long)
-#define LL_IOC_SETFLAGS		 _IOW('f', 152, long)
-#define LL_IOC_CLRFLAGS		 _IOW('f', 153, long)
-#define LL_IOC_LOV_SETSTRIPE	    _IOW('f', 154, long)
-#define LL_IOC_LOV_GETSTRIPE	    _IOW('f', 155, long)
-#define LL_IOC_LOV_SETEA		_IOW('f', 156, long)
-/*	LL_IOC_RECREATE_OBJ		157 obsolete */
-/*	LL_IOC_RECREATE_FID		158 obsolete */
-#define LL_IOC_GROUP_LOCK	       _IOW('f', 158, long)
-#define LL_IOC_GROUP_UNLOCK	     _IOW('f', 159, long)
-/* #define LL_IOC_QUOTACHECK		160 OBD_IOC_QUOTACHECK */
-/* #define LL_IOC_POLL_QUOTACHECK	161 OBD_IOC_POLL_QUOTACHECK */
-/* #define LL_IOC_QUOTACTL		162 OBD_IOC_QUOTACTL */
-#define IOC_OBD_STATFS		  _IOWR('f', 164, struct obd_statfs *)
-/*	IOC_LOV_GETINFO			165 obsolete */
-#define LL_IOC_FLUSHCTX		 _IOW('f', 166, long)
-/* LL_IOC_RMTACL			167 obsolete */
-#define LL_IOC_GETOBDCOUNT	      _IOR('f', 168, long)
-#define LL_IOC_LLOOP_ATTACH	     _IOWR('f', 169, long)
-#define LL_IOC_LLOOP_DETACH	     _IOWR('f', 170, long)
-#define LL_IOC_LLOOP_INFO	       _IOWR('f', 171, struct lu_fid)
-#define LL_IOC_LLOOP_DETACH_BYDEV       _IOWR('f', 172, long)
-#define LL_IOC_PATH2FID		 _IOR('f', 173, long)
-#define LL_IOC_GET_CONNECT_FLAGS	_IOWR('f', 174, __u64 *)
-#define LL_IOC_GET_MDTIDX	       _IOR('f', 175, int)
-
-/*	lustre_ioctl.h			177-210 */
-#define LL_IOC_HSM_STATE_GET		_IOR('f', 211, struct hsm_user_state)
-#define LL_IOC_HSM_STATE_SET		_IOW('f', 212, struct hsm_state_set)
-#define LL_IOC_HSM_CT_START		_IOW('f', 213, struct lustre_kernelcomm)
-#define LL_IOC_HSM_COPY_START		_IOW('f', 214, struct hsm_copy *)
-#define LL_IOC_HSM_COPY_END		_IOW('f', 215, struct hsm_copy *)
-#define LL_IOC_HSM_PROGRESS		_IOW('f', 216, struct hsm_user_request)
-#define LL_IOC_HSM_REQUEST		_IOW('f', 217, struct hsm_user_request)
-#define LL_IOC_DATA_VERSION		_IOR('f', 218, struct ioc_data_version)
-#define LL_IOC_LOV_SWAP_LAYOUTS		_IOW('f', 219, \
-						struct lustre_swap_layouts)
-#define LL_IOC_HSM_ACTION		_IOR('f', 220, \
-						struct hsm_current_action)
-/* see <lustre_lib.h> for ioctl numbers 221-232 */
-
-#define LL_IOC_LMV_SETSTRIPE	    _IOWR('f', 240, struct lmv_user_md)
-#define LL_IOC_LMV_GETSTRIPE	    _IOWR('f', 241, struct lmv_user_md)
-#define LL_IOC_SET_LEASE		_IOWR('f', 243, long)
-#define LL_IOC_GET_LEASE		_IO('f', 244)
-#define LL_IOC_HSM_IMPORT		_IOWR('f', 245, struct hsm_user_import)
-#define LL_IOC_LMV_SET_DEFAULT_STRIPE	_IOWR('f', 246, struct lmv_user_md)
-#define LL_IOC_MIGRATE			_IOR('f', 247, int)
-#define LL_IOC_FID2MDTIDX		_IOWR('f', 248, struct lu_fid)
-#define LL_IOC_GETPARENT		_IOWR('f', 249, struct getparent)
-
-/* Lease types for use as arg and return of LL_IOC_{GET,SET}_LEASE ioctl. */
-enum ll_lease_type {
-	LL_LEASE_RDLCK	= 0x1,
-	LL_LEASE_WRLCK	= 0x2,
-	LL_LEASE_UNLCK	= 0x4,
-};
-
-#define LL_STATFS_LMV	   1
-#define LL_STATFS_LOV	   2
-#define LL_STATFS_NODELAY	4
-
-#define IOC_MDC_TYPE	    'i'
-#define IOC_MDC_LOOKUP	  _IOWR(IOC_MDC_TYPE, 20, struct obd_device *)
-#define IOC_MDC_GETFILESTRIPE   _IOWR(IOC_MDC_TYPE, 21, struct lov_user_md *)
-#define IOC_MDC_GETFILEINFO     _IOWR(IOC_MDC_TYPE, 22, struct lov_user_mds_data *)
-#define LL_IOC_MDC_GETINFO      _IOWR(IOC_MDC_TYPE, 23, struct lov_user_mds_data *)
-
-#define MAX_OBD_NAME 128 /* If this changes, a NEW ioctl must be added */
-
-/* Define O_LOV_DELAY_CREATE to be a mask that is not useful for regular
- * files, but are unlikely to be used in practice and are not harmful if
- * used incorrectly.  O_NOCTTY and FASYNC are only meaningful for character
- * devices and are safe for use on new files (See LU-812, LU-4209).
- */
-#define O_LOV_DELAY_CREATE	(O_NOCTTY | FASYNC)
-
-#define LL_FILE_IGNORE_LOCK     0x00000001
-#define LL_FILE_GROUP_LOCKED    0x00000002
-#define LL_FILE_READAHEA	0x00000004
-#define LL_FILE_LOCKED_DIRECTIO 0x00000008 /* client-side locks with dio */
-#define LL_FILE_LOCKLESS_IO     0x00000010 /* server-side locks with cio */
-#define LL_FILE_RMTACL	  0x00000020
-
-#define LOV_USER_MAGIC_V1	0x0BD10BD0
-#define LOV_USER_MAGIC		LOV_USER_MAGIC_V1
-#define LOV_USER_MAGIC_JOIN_V1	0x0BD20BD0
-#define LOV_USER_MAGIC_V3	0x0BD30BD0
-/* 0x0BD40BD0 is occupied by LOV_MAGIC_MIGRATE */
-#define LOV_USER_MAGIC_SPECIFIC	0x0BD50BD0	/* for specific OSTs */
-
-#define LMV_USER_MAGIC    0x0CD30CD0    /*default lmv magic*/
-
-#define LOV_PATTERN_RAID0	0x001
-#define LOV_PATTERN_RAID1	0x002
-#define LOV_PATTERN_FIRST	0x100
-#define LOV_PATTERN_CMOBD	0x200
-
-#define LOV_PATTERN_F_MASK	0xffff0000
-#define LOV_PATTERN_F_HOLE	0x40000000 /* there is hole in LOV EA */
-#define LOV_PATTERN_F_RELEASED	0x80000000 /* HSM released file */
-
-#define LOV_MAXPOOLNAME 15
-#define LOV_POOLNAMEF "%.15s"
-
-#define LOV_MIN_STRIPE_BITS 16   /* maximum PAGE_SIZE (ia64), power of 2 */
-#define LOV_MIN_STRIPE_SIZE (1 << LOV_MIN_STRIPE_BITS)
-#define LOV_MAX_STRIPE_COUNT_OLD 160
-/* This calculation is crafted so that input of 4096 will result in 160
- * which in turn is equal to old maximal stripe count.
- * XXX: In fact this is too simplified for now, what it also need is to get
- * ea_type argument to clearly know how much space each stripe consumes.
- *
- * The limit of 12 pages is somewhat arbitrary, but is a reasonably large
- * allocation that is sufficient for the current generation of systems.
- *
- * (max buffer size - lov+rpc header) / sizeof(struct lov_ost_data_v1)
- */
-#define LOV_MAX_STRIPE_COUNT 2000  /* ((12 * 4096 - 256) / 24) */
-#define LOV_ALL_STRIPES       0xffff /* only valid for directories */
-#define LOV_V1_INSANE_STRIPE_COUNT 65532 /* maximum stripe count bz13933 */
-
-#define XATTR_LUSTRE_PREFIX	"lustre."
-#define XATTR_LUSTRE_LOV	"lustre.lov"
-
-#define lov_user_ost_data lov_user_ost_data_v1
-struct lov_user_ost_data_v1 {     /* per-stripe data structure */
-	struct ost_id l_ost_oi;	  /* OST object ID */
-	__u32 l_ost_gen;	  /* generation of this OST index */
-	__u32 l_ost_idx;	  /* OST index in LOV */
-} __packed;
-
-#define lov_user_md lov_user_md_v1
-struct lov_user_md_v1 {	   /* LOV EA user data (host-endian) */
-	__u32 lmm_magic;	  /* magic number = LOV_USER_MAGIC_V1 */
-	__u32 lmm_pattern;	/* LOV_PATTERN_RAID0, LOV_PATTERN_RAID1 */
-	struct ost_id lmm_oi;	  /* LOV object ID */
-	__u32 lmm_stripe_size;    /* size of stripe in bytes */
-	__u16 lmm_stripe_count;   /* num stripes in use for this object */
-	union {
-		__u16 lmm_stripe_offset;  /* starting stripe offset in
-					   * lmm_objects, use when writing
-					   */
-		__u16 lmm_layout_gen;     /* layout generation number
-					   * used when reading
-					   */
-	};
-	struct lov_user_ost_data_v1 lmm_objects[0]; /* per-stripe data */
-} __attribute__((packed,  __may_alias__));
-
-struct lov_user_md_v3 {	   /* LOV EA user data (host-endian) */
-	__u32 lmm_magic;	  /* magic number = LOV_USER_MAGIC_V3 */
-	__u32 lmm_pattern;	/* LOV_PATTERN_RAID0, LOV_PATTERN_RAID1 */
-	struct ost_id lmm_oi;	  /* LOV object ID */
-	__u32 lmm_stripe_size;    /* size of stripe in bytes */
-	__u16 lmm_stripe_count;   /* num stripes in use for this object */
-	union {
-		__u16 lmm_stripe_offset;  /* starting stripe offset in
-					   * lmm_objects, use when writing
-					   */
-		__u16 lmm_layout_gen;     /* layout generation number
-					   * used when reading
-					   */
-	};
-	char  lmm_pool_name[LOV_MAXPOOLNAME + 1];   /* pool name */
-	struct lov_user_ost_data_v1 lmm_objects[0]; /* per-stripe data */
-} __packed;
-
-static inline __u32 lov_user_md_size(__u16 stripes, __u32 lmm_magic)
-{
-	if (lmm_magic == LOV_USER_MAGIC_V1)
-		return sizeof(struct lov_user_md_v1) +
-				stripes * sizeof(struct lov_user_ost_data_v1);
-	return sizeof(struct lov_user_md_v3) +
-	       stripes * sizeof(struct lov_user_ost_data_v1);
-}
-
-/* Compile with -D_LARGEFILE64_SOURCE or -D_GNU_SOURCE (or #define) to
- * use this.  It is unsafe to #define those values in this header as it
- * is possible the application has already #included <sys/stat.h>.
- */
-#ifdef HAVE_LOV_USER_MDS_DATA
-#define lov_user_mds_data lov_user_mds_data_v1
-struct lov_user_mds_data_v1 {
-	lstat_t lmd_st;		 /* MDS stat struct */
-	struct lov_user_md_v1 lmd_lmm;  /* LOV EA V1 user data */
-} __packed;
-
-struct lov_user_mds_data_v3 {
-	lstat_t lmd_st;		 /* MDS stat struct */
-	struct lov_user_md_v3 lmd_lmm;  /* LOV EA V3 user data */
-} __packed;
-#endif
-
-struct lmv_user_mds_data {
-	struct lu_fid	lum_fid;
-	__u32		lum_padding;
-	__u32		lum_mds;
-};
-
-enum lmv_hash_type {
-	LMV_HASH_TYPE_UNKNOWN	= 0,	/* 0 is reserved for testing purpose */
-	LMV_HASH_TYPE_ALL_CHARS = 1,
-	LMV_HASH_TYPE_FNV_1A_64 = 2,
-};
-
-#define LMV_HASH_NAME_ALL_CHARS		"all_char"
-#define LMV_HASH_NAME_FNV_1A_64		"fnv_1a_64"
-
-/*
- * Got this according to how get LOV_MAX_STRIPE_COUNT, see above,
- * (max buffer size - lmv+rpc header) / sizeof(struct lmv_user_mds_data)
- */
-#define LMV_MAX_STRIPE_COUNT 2000  /* ((12 * 4096 - 256) / 24) */
-#define lmv_user_md lmv_user_md_v1
-struct lmv_user_md_v1 {
-	__u32	lum_magic;	 /* must be the first field */
-	__u32	lum_stripe_count;  /* dirstripe count */
-	__u32	lum_stripe_offset; /* MDT idx for default dirstripe */
-	__u32	lum_hash_type;     /* Dir stripe policy */
-	__u32	lum_type;	  /* LMV type: default or normal */
-	__u32	lum_padding1;
-	__u32	lum_padding2;
-	__u32	lum_padding3;
-	char	lum_pool_name[LOV_MAXPOOLNAME + 1];
-	struct	lmv_user_mds_data  lum_objects[0];
-} __packed;
-
-static inline int lmv_user_md_size(int stripes, int lmm_magic)
-{
-	return sizeof(struct lmv_user_md) +
-		      stripes * sizeof(struct lmv_user_mds_data);
-}
-
-struct ll_recreate_obj {
-	__u64 lrc_id;
-	__u32 lrc_ost_idx;
-};
-
-struct ll_fid {
-	__u64 id;	 /* holds object id */
-	__u32 generation; /* holds object generation */
-	__u32 f_type;     /* holds object type or stripe idx when passing it to
-			   * OST for saving into EA.
-			   */
-};
-
-#define UUID_MAX	40
-struct obd_uuid {
-	char uuid[UUID_MAX];
-};
-
-static inline bool obd_uuid_equals(const struct obd_uuid *u1,
-				   const struct obd_uuid *u2)
-{
-	return strcmp((char *)u1->uuid, (char *)u2->uuid) == 0;
-}
-
-static inline int obd_uuid_empty(struct obd_uuid *uuid)
-{
-	return uuid->uuid[0] == '\0';
-}
-
-static inline void obd_str2uuid(struct obd_uuid *uuid, const char *tmp)
-{
-	strncpy((char *)uuid->uuid, tmp, sizeof(*uuid));
-	uuid->uuid[sizeof(*uuid) - 1] = '\0';
-}
-
-/* For printf's only, make sure uuid is terminated */
-static inline char *obd_uuid2str(const struct obd_uuid *uuid)
-{
-	if (!uuid)
-		return NULL;
-
-	if (uuid->uuid[sizeof(*uuid) - 1] != '\0') {
-		/* Obviously not safe, but for printfs, no real harm done...
-		 * we're always null-terminated, even in a race.
-		 */
-		static char temp[sizeof(*uuid)];
-
-		memcpy(temp, uuid->uuid, sizeof(*uuid) - 1);
-		temp[sizeof(*uuid) - 1] = '\0';
-		return temp;
-	}
-	return (char *)(uuid->uuid);
-}
-
-/* Extract fsname from uuid (or target name) of a target
- * e.g. (myfs-OST0007_UUID -> myfs)
- * see also deuuidify.
- */
-static inline void obd_uuid2fsname(char *buf, char *uuid, int buflen)
-{
-	char *p;
-
-	strncpy(buf, uuid, buflen - 1);
-	buf[buflen - 1] = '\0';
-	p = strrchr(buf, '-');
-	if (p)
-		*p = '\0';
-}
-
-/* printf display format
- * * usage: printf("file FID is "DFID"\n", PFID(fid));
- */
-#define FID_NOBRACE_LEN 40
-#define FID_LEN (FID_NOBRACE_LEN + 2)
-#define DFID_NOBRACE "%#llx:0x%x:0x%x"
-#define DFID "[" DFID_NOBRACE "]"
-#define PFID(fid) (unsigned long long)(fid)->f_seq, (fid)->f_oid, (fid)->f_ver
-
-/* scanf input parse format for fids in DFID_NOBRACE format
- * Need to strip '[' from DFID format first or use "["SFID"]" at caller.
- * usage: sscanf(fidstr, SFID, RFID(&fid));
- */
-#define SFID "0x%llx:0x%x:0x%x"
-#define RFID(fid) &((fid)->f_seq), &((fid)->f_oid), &((fid)->f_ver)
-
-/********* Quotas **********/
-
-#define Q_QUOTACHECK   0x800100 /* deprecated as of 2.4 */
-#define Q_INITQUOTA    0x800101 /* deprecated as of 2.4  */
-#define Q_GETOINFO     0x800102 /* get obd quota info */
-#define Q_GETOQUOTA    0x800103 /* get obd quotas */
-#define Q_FINVALIDATE  0x800104 /* deprecated as of 2.4 */
-
-/* these must be explicitly translated into linux Q_* in ll_dir_ioctl */
-#define LUSTRE_Q_QUOTAON    0x800002	/* deprecated as of 2.4 */
-#define LUSTRE_Q_QUOTAOFF   0x800003	/* deprecated as of 2.4 */
-#define LUSTRE_Q_GETINFO    0x800005     /* get information about quota files */
-#define LUSTRE_Q_SETINFO    0x800006     /* set information about quota files */
-#define LUSTRE_Q_GETQUOTA   0x800007     /* get user quota structure */
-#define LUSTRE_Q_SETQUOTA   0x800008     /* set user quota structure */
-/* lustre-specific control commands */
-#define LUSTRE_Q_INVALIDATE  0x80000b	/* deprecated as of 2.4 */
-#define LUSTRE_Q_FINVALIDATE 0x80000c	/* deprecated as of 2.4 */
-
-#define UGQUOTA 2       /* set both USRQUOTA and GRPQUOTA */
-
-#define IDENTITY_DOWNCALL_MAGIC 0x6d6dd629
-
-/* permission */
-#define N_PERMS_MAX      64
-
-struct perm_downcall_data {
-	__u64 pdd_nid;
-	__u32 pdd_perm;
-	__u32 pdd_padding;
-};
-
-struct identity_downcall_data {
-	__u32			    idd_magic;
-	__u32			    idd_err;
-	__u32			    idd_uid;
-	__u32			    idd_gid;
-	__u32			    idd_nperms;
-	__u32			    idd_ngroups;
-	struct perm_downcall_data idd_perms[N_PERMS_MAX];
-	__u32			    idd_groups[0];
-};
-
-/* lustre volatile file support
- * file name header: .^L^S^T^R:volatile"
- */
-#define LUSTRE_VOLATILE_HDR	".\x0c\x13\x14\x12:VOLATILE"
-#define LUSTRE_VOLATILE_HDR_LEN	14
-/* hdr + MDT index */
-#define LUSTRE_VOLATILE_IDX	LUSTRE_VOLATILE_HDR":%.4X:"
-
-enum lustre_quota_version {
-	LUSTRE_QUOTA_V2 = 1
-};
-
-/* XXX: same as if_dqinfo struct in kernel */
-struct obd_dqinfo {
-	__u64 dqi_bgrace;
-	__u64 dqi_igrace;
-	__u32 dqi_flags;
-	__u32 dqi_valid;
-};
-
-/* XXX: same as if_dqblk struct in kernel, plus one padding */
-struct obd_dqblk {
-	__u64 dqb_bhardlimit;
-	__u64 dqb_bsoftlimit;
-	__u64 dqb_curspace;
-	__u64 dqb_ihardlimit;
-	__u64 dqb_isoftlimit;
-	__u64 dqb_curinodes;
-	__u64 dqb_btime;
-	__u64 dqb_itime;
-	__u32 dqb_valid;
-	__u32 dqb_padding;
-};
-
-enum {
-	QC_GENERAL      = 0,
-	QC_MDTIDX       = 1,
-	QC_OSTIDX       = 2,
-	QC_UUID	 = 3
-};
-
-struct if_quotactl {
-	__u32		   qc_cmd;
-	__u32		   qc_type;
-	__u32		   qc_id;
-	__u32		   qc_stat;
-	__u32		   qc_valid;
-	__u32		   qc_idx;
-	struct obd_dqinfo       qc_dqinfo;
-	struct obd_dqblk	qc_dqblk;
-	char		    obd_type[16];
-	struct obd_uuid	 obd_uuid;
-};
-
-/* swap layout flags */
-#define SWAP_LAYOUTS_CHECK_DV1		(1 << 0)
-#define SWAP_LAYOUTS_CHECK_DV2		(1 << 1)
-#define SWAP_LAYOUTS_KEEP_MTIME		(1 << 2)
-#define SWAP_LAYOUTS_KEEP_ATIME		(1 << 3)
-#define SWAP_LAYOUTS_CLOSE		(1 << 4)
-
-/* Swap XATTR_NAME_HSM as well, only on the MDT so far */
-#define SWAP_LAYOUTS_MDS_HSM		(1 << 31)
-struct lustre_swap_layouts {
-	__u64	sl_flags;
-	__u32	sl_fd;
-	__u32	sl_gid;
-	__u64	sl_dv1;
-	__u64	sl_dv2;
-};
-
-/********* Changelogs **********/
-/** Changelog record types */
-enum changelog_rec_type {
-	CL_MARK     = 0,
-	CL_CREATE   = 1,  /* namespace */
-	CL_MKDIR    = 2,  /* namespace */
-	CL_HARDLINK = 3,  /* namespace */
-	CL_SOFTLINK = 4,  /* namespace */
-	CL_MKNOD    = 5,  /* namespace */
-	CL_UNLINK   = 6,  /* namespace */
-	CL_RMDIR    = 7,  /* namespace */
-	CL_RENAME   = 8,  /* namespace */
-	CL_EXT      = 9,  /* namespace extended record (2nd half of rename) */
-	CL_OPEN     = 10, /* not currently used */
-	CL_CLOSE    = 11, /* may be written to log only with mtime change */
-	CL_LAYOUT   = 12, /* file layout/striping modified */
-	CL_TRUNC    = 13,
-	CL_SETATTR  = 14,
-	CL_XATTR    = 15,
-	CL_HSM      = 16, /* HSM specific events, see flags */
-	CL_MTIME    = 17, /* Precedence: setattr > mtime > ctime > atime */
-	CL_CTIME    = 18,
-	CL_ATIME    = 19,
-	CL_LAST
-};
-
-static inline const char *changelog_type2str(int type)
-{
-	static const char *changelog_str[] = {
-		"MARK",  "CREAT", "MKDIR", "HLINK", "SLINK", "MKNOD", "UNLNK",
-		"RMDIR", "RENME", "RNMTO", "OPEN",  "CLOSE", "LYOUT", "TRUNC",
-		"SATTR", "XATTR", "HSM",   "MTIME", "CTIME", "ATIME",
-	};
-
-	if (type >= 0 && type < CL_LAST)
-		return changelog_str[type];
-	return NULL;
-}
-
-/* per-record flags */
-#define CLF_FLAGSHIFT   12
-#define CLF_FLAGMASK    ((1U << CLF_FLAGSHIFT) - 1)
-#define CLF_VERMASK     (~CLF_FLAGMASK)
-enum changelog_rec_flags {
-	CLF_VERSION	= 0x1000,
-	CLF_RENAME	= 0x2000,
-	CLF_JOBID	= 0x4000,
-	CLF_SUPPORTED	= CLF_VERSION | CLF_RENAME | CLF_JOBID
-};
-
-/* Anything under the flagmask may be per-type (if desired) */
-/* Flags for unlink */
-#define CLF_UNLINK_LAST       0x0001 /* Unlink of last hardlink */
-#define CLF_UNLINK_HSM_EXISTS 0x0002 /* File has something in HSM */
-				     /* HSM cleaning needed */
-/* Flags for rename */
-#define CLF_RENAME_LAST		0x0001	/* rename unlink last hardlink of
-					 * target
-					 */
-#define CLF_RENAME_LAST_EXISTS	0x0002	/* rename unlink last hardlink of target
-					 * has an archive in backend
-					 */
-
-/* Flags for HSM */
-/* 12b used (from high weight to low weight):
- * 2b for flags
- * 3b for event
- * 7b for error code
- */
-#define CLF_HSM_ERR_L	0 /* HSM return code, 7 bits */
-#define CLF_HSM_ERR_H	6
-#define CLF_HSM_EVENT_L      7 /* HSM event, 3 bits, see enum hsm_event */
-#define CLF_HSM_EVENT_H      9
-#define CLF_HSM_FLAG_L      10 /* HSM flags, 2 bits, 1 used, 1 spare */
-#define CLF_HSM_FLAG_H      11
-#define CLF_HSM_SPARE_L     12 /* 4 spare bits */
-#define CLF_HSM_SPARE_H     15
-#define CLF_HSM_LAST	15
-
-/* Remove bits higher than _h, then extract the value
- * between _h and _l by shifting lower weigth to bit 0.
- */
-#define CLF_GET_BITS(_b, _h, _l) (((_b << (CLF_HSM_LAST - _h)) & 0xFFFF) \
-				   >> (CLF_HSM_LAST - _h + _l))
-
-#define CLF_HSM_SUCCESS      0x00
-#define CLF_HSM_MAXERROR     0x7E
-#define CLF_HSM_ERROVERFLOW  0x7F
-
-#define CLF_HSM_DIRTY	1 /* file is dirty after HSM request end */
-
-/* 3 bits field => 8 values allowed */
-enum hsm_event {
-	HE_ARCHIVE      = 0,
-	HE_RESTORE      = 1,
-	HE_CANCEL       = 2,
-	HE_RELEASE      = 3,
-	HE_REMOVE       = 4,
-	HE_STATE	= 5,
-	HE_SPARE1       = 6,
-	HE_SPARE2       = 7,
-};
-
-static inline enum hsm_event hsm_get_cl_event(__u16 flags)
-{
-	return CLF_GET_BITS(flags, CLF_HSM_EVENT_H, CLF_HSM_EVENT_L);
-}
-
-static inline void hsm_set_cl_event(int *flags, enum hsm_event he)
-{
-	*flags |= (he << CLF_HSM_EVENT_L);
-}
-
-static inline __u16 hsm_get_cl_flags(int flags)
-{
-	return CLF_GET_BITS(flags, CLF_HSM_FLAG_H, CLF_HSM_FLAG_L);
-}
-
-static inline void hsm_set_cl_flags(int *flags, int bits)
-{
-	*flags |= (bits << CLF_HSM_FLAG_L);
-}
-
-static inline int hsm_get_cl_error(int flags)
-{
-	return CLF_GET_BITS(flags, CLF_HSM_ERR_H, CLF_HSM_ERR_L);
-}
-
-static inline void hsm_set_cl_error(int *flags, int error)
-{
-	*flags |= (error << CLF_HSM_ERR_L);
-}
-
-enum changelog_send_flag {
-	/* Not yet implemented */
-	CHANGELOG_FLAG_FOLLOW	= 0x01,
-	/*
-	 * Blocking IO makes sense in case of slow user parsing of the records,
-	 * but it also prevents us from cleaning up if the records are not
-	 * consumed.
-	 */
-	CHANGELOG_FLAG_BLOCK	= 0x02,
-	/* Pack jobid into the changelog records if available. */
-	CHANGELOG_FLAG_JOBID	= 0x04,
-};
-
-#define CR_MAXSIZE cfs_size_round(2 * NAME_MAX + 2 + \
-				  changelog_rec_offset(CLF_SUPPORTED))
-
-/* 31 usable bytes string + null terminator. */
-#define LUSTRE_JOBID_SIZE	32
-
-/*
- * This is the minimal changelog record. It can contain extensions
- * such as rename fields or process jobid. Its exact content is described
- * by the cr_flags.
- *
- * Extensions are packed in the same order as their corresponding flags.
- */
-struct changelog_rec {
-	__u16		 cr_namelen;
-	__u16		 cr_flags; /**< \a changelog_rec_flags */
-	__u32		 cr_type;  /**< \a changelog_rec_type */
-	__u64		 cr_index; /**< changelog record number */
-	__u64		 cr_prev;  /**< last index for this target fid */
-	__u64		 cr_time;
-	union {
-		struct lu_fid    cr_tfid;	/**< target fid */
-		__u32	 cr_markerflags; /**< CL_MARK flags */
-	};
-	struct lu_fid	    cr_pfid;	/**< parent fid */
-} __packed;
-
-/* Changelog extension for RENAME. */
-struct changelog_ext_rename {
-	struct lu_fid	cr_sfid;	/**< source fid, or zero */
-	struct lu_fid	cr_spfid;	/**< source parent fid, or zero */
-};
-
-/* Changelog extension to include JOBID. */
-struct changelog_ext_jobid {
-	char	cr_jobid[LUSTRE_JOBID_SIZE];	/**< zero-terminated string. */
-};
-
-static inline size_t changelog_rec_offset(enum changelog_rec_flags crf)
-{
-	size_t size = sizeof(struct changelog_rec);
-
-	if (crf & CLF_RENAME)
-		size += sizeof(struct changelog_ext_rename);
-
-	if (crf & CLF_JOBID)
-		size += sizeof(struct changelog_ext_jobid);
-
-	return size;
-}
-
-static inline size_t changelog_rec_size(struct changelog_rec *rec)
-{
-	return changelog_rec_offset(rec->cr_flags);
-}
-
-static inline size_t changelog_rec_varsize(struct changelog_rec *rec)
-{
-	return changelog_rec_size(rec) - sizeof(*rec) + rec->cr_namelen;
-}
-
-static inline
-struct changelog_ext_rename *changelog_rec_rename(struct changelog_rec *rec)
-{
-	enum changelog_rec_flags crf = rec->cr_flags & CLF_VERSION;
-
-	return (struct changelog_ext_rename *)((char *)rec +
-					       changelog_rec_offset(crf));
-}
-
-/* The jobid follows the rename extension, if present */
-static inline
-struct changelog_ext_jobid *changelog_rec_jobid(struct changelog_rec *rec)
-{
-	enum changelog_rec_flags crf = rec->cr_flags &
-				       (CLF_VERSION | CLF_RENAME);
-
-	return (struct changelog_ext_jobid *)((char *)rec +
-					      changelog_rec_offset(crf));
-}
-
-/* The name follows the rename and jobid extensions, if present */
-static inline char *changelog_rec_name(struct changelog_rec *rec)
-{
-	return (char *)rec + changelog_rec_offset(rec->cr_flags &
-						  CLF_SUPPORTED);
-}
-
-static inline size_t changelog_rec_snamelen(struct changelog_rec *rec)
-{
-	return rec->cr_namelen - strlen(changelog_rec_name(rec)) - 1;
-}
-
-static inline char *changelog_rec_sname(struct changelog_rec *rec)
-{
-	char *cr_name = changelog_rec_name(rec);
-
-	return cr_name + strlen(cr_name) + 1;
-}
-
-/**
- * Remap a record to the desired format as specified by the crf flags.
- * The record must be big enough to contain the final remapped version.
- * Superfluous extension fields are removed and missing ones are added
- * and zeroed. The flags of the record are updated accordingly.
- *
- * The jobid and rename extensions can be added to a record, to match the
- * format an application expects, typically. In this case, the newly added
- * fields will be zeroed.
- * The Jobid field can be removed, to guarantee compatibility with older
- * clients that don't expect this field in the records they process.
- *
- * The following assumptions are being made:
- *	- CLF_RENAME will not be removed
- *	- CLF_JOBID will not be added without CLF_RENAME being added too
- *
- * @param[in,out]  rec		The record to remap.
- * @param[in]	   crf_wanted	Flags describing the desired extensions.
- */
-static inline void changelog_remap_rec(struct changelog_rec *rec,
-				       enum changelog_rec_flags crf_wanted)
-{
-	char *jid_mov, *rnm_mov;
-
-	crf_wanted &= CLF_SUPPORTED;
-
-	if ((rec->cr_flags & CLF_SUPPORTED) == crf_wanted)
-		return;
-
-	/* First move the variable-length name field */
-	memmove((char *)rec + changelog_rec_offset(crf_wanted),
-		changelog_rec_name(rec), rec->cr_namelen);
-
-	/* Locations of jobid and rename extensions in the remapped record */
-	jid_mov = (char *)rec +
-		  changelog_rec_offset(crf_wanted & ~CLF_JOBID);
-	rnm_mov = (char *)rec +
-		  changelog_rec_offset(crf_wanted & ~(CLF_JOBID | CLF_RENAME));
-
-	/* Move the extension fields to the desired positions */
-	if ((crf_wanted & CLF_JOBID) && (rec->cr_flags & CLF_JOBID))
-		memmove(jid_mov, changelog_rec_jobid(rec),
-			sizeof(struct changelog_ext_jobid));
-
-	if ((crf_wanted & CLF_RENAME) && (rec->cr_flags & CLF_RENAME))
-		memmove(rnm_mov, changelog_rec_rename(rec),
-			sizeof(struct changelog_ext_rename));
-
-	/* Clear newly added fields */
-	if ((crf_wanted & CLF_JOBID) && !(rec->cr_flags & CLF_JOBID))
-		memset(jid_mov, 0, sizeof(struct changelog_ext_jobid));
-
-	if ((crf_wanted & CLF_RENAME) && !(rec->cr_flags & CLF_RENAME))
-		memset(rnm_mov, 0, sizeof(struct changelog_ext_rename));
-
-	/* Update the record's flags accordingly */
-	rec->cr_flags = (rec->cr_flags & CLF_FLAGMASK) | crf_wanted;
-}
-
-struct ioc_changelog {
-	__u64 icc_recno;
-	__u32 icc_mdtindex;
-	__u32 icc_id;
-	__u32 icc_flags;
-};
-
-enum changelog_message_type {
-	CL_RECORD = 10, /* message is a changelog_rec */
-	CL_EOF    = 11, /* at end of current changelog */
-};
-
-/********* Misc **********/
-
-struct ioc_data_version {
-	__u64 idv_version;
-	__u64 idv_flags;     /* See LL_DV_xxx */
-};
-
-#define LL_DV_RD_FLUSH	(1 << 0) /* Flush dirty pages from clients */
-#define LL_DV_WR_FLUSH	(1 << 1) /* Flush all caching pages from clients */
-
-#ifndef offsetof
-# define offsetof(typ, memb)     ((unsigned long)((char *)&(((typ *)0)->memb)))
-#endif
-
-#define dot_lustre_name ".lustre"
-
-/********* HSM **********/
-
-/** HSM per-file state
- * See HSM_FLAGS below.
- */
-enum hsm_states {
-	HS_NONE		= 0x00000000,
-	HS_EXISTS	= 0x00000001,
-	HS_DIRTY	= 0x00000002,
-	HS_RELEASED	= 0x00000004,
-	HS_ARCHIVED	= 0x00000008,
-	HS_NORELEASE	= 0x00000010,
-	HS_NOARCHIVE	= 0x00000020,
-	HS_LOST		= 0x00000040,
-};
-
-/* HSM user-setable flags. */
-#define HSM_USER_MASK   (HS_NORELEASE | HS_NOARCHIVE | HS_DIRTY)
-
-/* Other HSM flags. */
-#define HSM_STATUS_MASK (HS_EXISTS | HS_LOST | HS_RELEASED | HS_ARCHIVED)
-
-/*
- * All HSM-related possible flags that could be applied to a file.
- * This should be kept in sync with hsm_states.
- */
-#define HSM_FLAGS_MASK  (HSM_USER_MASK | HSM_STATUS_MASK)
-
-/**
- * HSM request progress state
- */
-enum hsm_progress_states {
-	HPS_WAITING	= 1,
-	HPS_RUNNING	= 2,
-	HPS_DONE	= 3,
-};
-
-#define HPS_NONE	0
-
-static inline char *hsm_progress_state2name(enum hsm_progress_states s)
-{
-	switch  (s) {
-	case HPS_WAITING:	return "waiting";
-	case HPS_RUNNING:	return "running";
-	case HPS_DONE:		return "done";
-	default:		return "unknown";
-	}
-}
-
-struct hsm_extent {
-	__u64 offset;
-	__u64 length;
-} __packed;
-
-/**
- * Current HSM states of a Lustre file.
- *
- * This structure purpose is to be sent to user-space mainly. It describes the
- * current HSM flags and in-progress action.
- */
-struct hsm_user_state {
-	/** Current HSM states, from enum hsm_states. */
-	__u32			hus_states;
-	__u32			hus_archive_id;
-	/**  The current undergoing action, if there is one */
-	__u32			hus_in_progress_state;
-	__u32			hus_in_progress_action;
-	struct hsm_extent	hus_in_progress_location;
-	char			hus_extended_info[];
-};
-
-struct hsm_state_set_ioc {
-	struct lu_fid	hssi_fid;
-	__u64		hssi_setmask;
-	__u64		hssi_clearmask;
-};
-
-/*
- * This structure describes the current in-progress action for a file.
- * it is returned to user space and send over the wire
- */
-struct hsm_current_action {
-	/**  The current undergoing action, if there is one */
-	/* state is one of hsm_progress_states */
-	__u32			hca_state;
-	/* action is one of hsm_user_action */
-	__u32			hca_action;
-	struct hsm_extent	hca_location;
-};
-
-/***** HSM user requests ******/
-/* User-generated (lfs/ioctl) request types */
-enum hsm_user_action {
-	HUA_NONE    =  1, /* no action (noop) */
-	HUA_ARCHIVE = 10, /* copy to hsm */
-	HUA_RESTORE = 11, /* prestage */
-	HUA_RELEASE = 12, /* drop ost objects */
-	HUA_REMOVE  = 13, /* remove from archive */
-	HUA_CANCEL  = 14  /* cancel a request */
-};
-
-static inline char *hsm_user_action2name(enum hsm_user_action  a)
-{
-	switch  (a) {
-	case HUA_NONE:    return "NOOP";
-	case HUA_ARCHIVE: return "ARCHIVE";
-	case HUA_RESTORE: return "RESTORE";
-	case HUA_RELEASE: return "RELEASE";
-	case HUA_REMOVE:  return "REMOVE";
-	case HUA_CANCEL:  return "CANCEL";
-	default:	  return "UNKNOWN";
-	}
-}
-
-/*
- * List of hr_flags (bit field)
- */
-#define HSM_FORCE_ACTION 0x0001
-/* used by CT, connot be set by user */
-#define HSM_GHOST_COPY   0x0002
-
-/**
- * Contains all the fixed part of struct hsm_user_request.
- *
- */
-struct hsm_request {
-	__u32 hr_action;	/* enum hsm_user_action */
-	__u32 hr_archive_id;	/* archive id, used only with HUA_ARCHIVE */
-	__u64 hr_flags;		/* request flags */
-	__u32 hr_itemcount;	/* item count in hur_user_item vector */
-	__u32 hr_data_len;
-};
-
-struct hsm_user_item {
-	struct lu_fid	hui_fid;
-	struct hsm_extent hui_extent;
-} __packed;
-
-struct hsm_user_request {
-	struct hsm_request	hur_request;
-	struct hsm_user_item	hur_user_item[0];
-	/* extra data blob at end of struct (after all
-	 * hur_user_items), only use helpers to access it
-	 */
-} __packed;
-
-/** Return pointer to data field in a hsm user request */
-static inline void *hur_data(struct hsm_user_request *hur)
-{
-	return &hur->hur_user_item[hur->hur_request.hr_itemcount];
-}
-
-/**
- * Compute the current length of the provided hsm_user_request.  This returns -1
- * instead of an errno because ssize_t is defined to be only [ -1, SSIZE_MAX ]
- *
- * return -1 on bounds check error.
- */
-static inline ssize_t hur_len(struct hsm_user_request *hur)
-{
-	__u64	size;
-
-	/* can't overflow a __u64 since hr_itemcount is only __u32 */
-	size = offsetof(struct hsm_user_request, hur_user_item[0]) +
-		(__u64)hur->hur_request.hr_itemcount *
-		sizeof(hur->hur_user_item[0]) + hur->hur_request.hr_data_len;
-
-	if (size != (ssize_t)size)
-		return -1;
-
-	return size;
-}
-
-/****** HSM RPCs to copytool *****/
-/* Message types the copytool may receive */
-enum hsm_message_type {
-	HMT_ACTION_LIST = 100, /* message is a hsm_action_list */
-};
-
-/* Actions the copytool may be instructed to take for a given action_item */
-enum hsm_copytool_action {
-	HSMA_NONE    = 10, /* no action */
-	HSMA_ARCHIVE = 20, /* arbitrary offset */
-	HSMA_RESTORE = 21,
-	HSMA_REMOVE  = 22,
-	HSMA_CANCEL  = 23
-};
-
-static inline char *hsm_copytool_action2name(enum hsm_copytool_action  a)
-{
-	switch  (a) {
-	case HSMA_NONE:    return "NOOP";
-	case HSMA_ARCHIVE: return "ARCHIVE";
-	case HSMA_RESTORE: return "RESTORE";
-	case HSMA_REMOVE:  return "REMOVE";
-	case HSMA_CANCEL:  return "CANCEL";
-	default:	   return "UNKNOWN";
-	}
-}
-
-/* Copytool item action description */
-struct hsm_action_item {
-	__u32      hai_len;     /* valid size of this struct */
-	__u32      hai_action;  /* hsm_copytool_action, but use known size */
-	struct lu_fid hai_fid;     /* Lustre FID to operated on */
-	struct lu_fid hai_dfid;    /* fid used for data access */
-	struct hsm_extent hai_extent;  /* byte range to operate on */
-	__u64      hai_cookie;  /* action cookie from coordinator */
-	__u64      hai_gid;     /* grouplock id */
-	char       hai_data[0]; /* variable length */
-} __packed;
-
-/*
- * helper function which print in hexa the first bytes of
- * hai opaque field
- * \param hai [IN] record to print
- * \param buffer [OUT] output buffer
- * \param len [IN] max buffer len
- * \retval buffer
- */
-static inline char *hai_dump_data_field(struct hsm_action_item *hai,
-					char *buffer, size_t len)
-{
-	int i, data_len;
-	char *ptr;
-
-	ptr = buffer;
-	data_len = hai->hai_len - sizeof(*hai);
-	for (i = 0; (i < data_len) && (len > 2); i++) {
-		snprintf(ptr, 3, "%02X", (unsigned char)hai->hai_data[i]);
-		ptr += 2;
-		len -= 2;
-	}
-
-	*ptr = '\0';
-
-	return buffer;
-}
-
-/* Copytool action list */
-#define HAL_VERSION 1
-#define HAL_MAXSIZE LNET_MTU /* bytes, used in userspace only */
-struct hsm_action_list {
-	__u32 hal_version;
-	__u32 hal_count;       /* number of hai's to follow */
-	__u64 hal_compound_id; /* returned by coordinator */
-	__u64 hal_flags;
-	__u32 hal_archive_id; /* which archive backend */
-	__u32 padding1;
-	char  hal_fsname[0];   /* null-terminated */
-	/* struct hsm_action_item[hal_count] follows, aligned on 8-byte
-	 * boundaries. See hai_first
-	 */
-} __packed;
-
-#ifndef HAVE_CFS_SIZE_ROUND
-static inline int cfs_size_round(int val)
-{
-	return (val + 7) & (~0x7);
-}
-
-#define HAVE_CFS_SIZE_ROUND
-#endif
-
-/* Return pointer to first hai in action list */
-static inline struct hsm_action_item *hai_first(struct hsm_action_list *hal)
-{
-	return (struct hsm_action_item *)(hal->hal_fsname +
-					  cfs_size_round(strlen(hal-> \
-								hal_fsname)
-							 + 1));
-}
-
-/* Return pointer to next hai */
-static inline struct hsm_action_item *hai_next(struct hsm_action_item *hai)
-{
-	return (struct hsm_action_item *)((char *)hai +
-					  cfs_size_round(hai->hai_len));
-}
-
-/* Return size of an hsm_action_list */
-static inline int hal_size(struct hsm_action_list *hal)
-{
-	int i, sz;
-	struct hsm_action_item *hai;
-
-	sz = sizeof(*hal) + cfs_size_round(strlen(hal->hal_fsname) + 1);
-	hai = hai_first(hal);
-	for (i = 0; i < hal->hal_count; i++, hai = hai_next(hai))
-		sz += cfs_size_round(hai->hai_len);
-
-	return sz;
-}
-
-/* HSM file import
- * describe the attributes to be set on imported file
- */
-struct hsm_user_import {
-	__u64		hui_size;
-	__u64		hui_atime;
-	__u64		hui_mtime;
-	__u32		hui_atime_ns;
-	__u32		hui_mtime_ns;
-	__u32		hui_uid;
-	__u32		hui_gid;
-	__u32		hui_mode;
-	__u32		hui_archive_id;
-};
-
-/* Copytool progress reporting */
-#define HP_FLAG_COMPLETED 0x01
-#define HP_FLAG_RETRY     0x02
-
-struct hsm_progress {
-	struct lu_fid		hp_fid;
-	__u64			hp_cookie;
-	struct hsm_extent	hp_extent;
-	__u16			hp_flags;
-	__u16			hp_errval; /* positive val */
-	__u32			padding;
-};
-
-struct hsm_copy {
-	__u64			hc_data_version;
-	__u16			hc_flags;
-	__u16			hc_errval; /* positive val */
-	__u32			padding;
-	struct hsm_action_item	hc_hai;
-};
-
-/** @} lustreuser */
-
-#endif /* _LUSTRE_USER_H */

+ 0 - 27
drivers/staging/lustre/include/uapi/linux/lustre/lustre_ver.h

@@ -1,27 +0,0 @@
-#ifndef _LUSTRE_VER_H_
-#define _LUSTRE_VER_H_
-
-#define LUSTRE_MAJOR 2
-#define LUSTRE_MINOR 6
-#define LUSTRE_PATCH 99
-#define LUSTRE_FIX 0
-#define LUSTRE_VERSION_STRING "2.6.99"
-
-#define OBD_OCD_VERSION(major, minor, patch, fix)			\
-	(((major) << 24) + ((minor) << 16) + ((patch) << 8) + (fix))
-
-#define OBD_OCD_VERSION_MAJOR(version)	((int)((version) >> 24) & 255)
-#define OBD_OCD_VERSION_MINOR(version)	((int)((version) >> 16) & 255)
-#define OBD_OCD_VERSION_PATCH(version)	((int)((version) >>  8) & 255)
-#define OBD_OCD_VERSION_FIX(version)	((int)((version) >>  0) & 255)
-
-#define LUSTRE_VERSION_CODE						\
-	OBD_OCD_VERSION(LUSTRE_MAJOR, LUSTRE_MINOR, LUSTRE_PATCH, LUSTRE_FIX)
-
-/*
- * If lustre version of client and servers it connects to differs by more
- * than this amount, client would issue a warning.
- */
-#define LUSTRE_VERSION_OFFSET_WARN OBD_OCD_VERSION(0, 4, 0, 0)
-
-#endif

+ 0 - 46
drivers/staging/lustre/lnet/Kconfig

@@ -1,46 +0,0 @@
-config LNET
-	tristate "Lustre networking subsystem (LNet)"
-	depends on INET
-	help
-	  The Lustre network layer, also known as LNet, is a networking abstaction
-	  level API that was initially created to allow Lustre Filesystem to utilize
-	  very different networks like tcp and ib verbs in a uniform way. In the
-	  case of Lustre routers only the LNet layer is required. Lately other
-	  projects are also looking into using LNet as their networking API as well.
-
-config LNET_MAX_PAYLOAD
-	int "Lustre lnet max transfer payload (default 1MB)"
-	depends on LNET
-	default "1048576"
-	help
-	  This option defines the maximum size of payload in bytes that lnet
-	  can put into its transport.
-
-	  If unsure, use default.
-
-config LNET_SELFTEST
-	tristate "Lustre networking self testing"
-	depends on LNET
-	help
-	  Choose Y here if you want to do lnet self testing. To compile this
-	  as a module, choose M here: the module will be called lnet_selftest.
-
-	  To compile this as a kernel modules, choose M here and it will be
-	  called lnet_selftest.
-
-	  If unsure, say N.
-
-	  See also http://wiki.lustre.org/
-
-config LNET_XPRT_IB
-	tristate "LNET infiniband support"
-	depends on LNET && PCI && INFINIBAND && INFINIBAND_ADDR_TRANS
-	default LNET && INFINIBAND
-	help
-	  This option allows the LNET users to use infiniband as an
-	  RDMA-enabled transport.
-
-	  To compile this as a kernel module, choose M here and it will be
-	  called ko2iblnd.
-
-	  If unsure, say N.

+ 0 - 1
drivers/staging/lustre/lnet/Makefile

@@ -1 +0,0 @@
-obj-$(CONFIG_LNET) += libcfs/ lnet/ klnds/ selftest/

+ 0 - 1
drivers/staging/lustre/lnet/klnds/Makefile

@@ -1 +0,0 @@
-obj-$(CONFIG_LNET) += o2iblnd/  socklnd/

+ 0 - 5
drivers/staging/lustre/lnet/klnds/o2iblnd/Makefile

@@ -1,5 +0,0 @@
-subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/include
-subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/lustre/include
-
-obj-$(CONFIG_LNET_XPRT_IB) += ko2iblnd.o
-ko2iblnd-y := o2iblnd.o o2iblnd_cb.o o2iblnd_modparams.o

+ 0 - 2958
drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c

@@ -1,2958 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/klnds/o2iblnd/o2iblnd.c
- *
- * Author: Eric Barton <eric@bartonsoftware.com>
- */
-
-#include <asm/div64.h>
-#include <asm/page.h>
-#include "o2iblnd.h"
-
-static struct lnet_lnd the_o2iblnd;
-
-struct kib_data kiblnd_data;
-
-static __u32 kiblnd_cksum(void *ptr, int nob)
-{
-	char *c = ptr;
-	__u32 sum = 0;
-
-	while (nob-- > 0)
-		sum = ((sum << 1) | (sum >> 31)) + *c++;
-
-	/* ensure I don't return 0 (== no checksum) */
-	return !sum ? 1 : sum;
-}
-
-static char *kiblnd_msgtype2str(int type)
-{
-	switch (type) {
-	case IBLND_MSG_CONNREQ:
-		return "CONNREQ";
-
-	case IBLND_MSG_CONNACK:
-		return "CONNACK";
-
-	case IBLND_MSG_NOOP:
-		return "NOOP";
-
-	case IBLND_MSG_IMMEDIATE:
-		return "IMMEDIATE";
-
-	case IBLND_MSG_PUT_REQ:
-		return "PUT_REQ";
-
-	case IBLND_MSG_PUT_NAK:
-		return "PUT_NAK";
-
-	case IBLND_MSG_PUT_ACK:
-		return "PUT_ACK";
-
-	case IBLND_MSG_PUT_DONE:
-		return "PUT_DONE";
-
-	case IBLND_MSG_GET_REQ:
-		return "GET_REQ";
-
-	case IBLND_MSG_GET_DONE:
-		return "GET_DONE";
-
-	default:
-		return "???";
-	}
-}
-
-static int kiblnd_msgtype2size(int type)
-{
-	const int hdr_size = offsetof(struct kib_msg, ibm_u);
-
-	switch (type) {
-	case IBLND_MSG_CONNREQ:
-	case IBLND_MSG_CONNACK:
-		return hdr_size + sizeof(struct kib_connparams);
-
-	case IBLND_MSG_NOOP:
-		return hdr_size;
-
-	case IBLND_MSG_IMMEDIATE:
-		return offsetof(struct kib_msg, ibm_u.immediate.ibim_payload[0]);
-
-	case IBLND_MSG_PUT_REQ:
-		return hdr_size + sizeof(struct kib_putreq_msg);
-
-	case IBLND_MSG_PUT_ACK:
-		return hdr_size + sizeof(struct kib_putack_msg);
-
-	case IBLND_MSG_GET_REQ:
-		return hdr_size + sizeof(struct kib_get_msg);
-
-	case IBLND_MSG_PUT_NAK:
-	case IBLND_MSG_PUT_DONE:
-	case IBLND_MSG_GET_DONE:
-		return hdr_size + sizeof(struct kib_completion_msg);
-	default:
-		return -1;
-	}
-}
-
-static int kiblnd_unpack_rd(struct kib_msg *msg, int flip)
-{
-	struct kib_rdma_desc *rd;
-	int msg_size;
-	int nob;
-	int n;
-	int i;
-
-	LASSERT(msg->ibm_type == IBLND_MSG_GET_REQ ||
-		msg->ibm_type == IBLND_MSG_PUT_ACK);
-
-	rd = msg->ibm_type == IBLND_MSG_GET_REQ ?
-			      &msg->ibm_u.get.ibgm_rd :
-			      &msg->ibm_u.putack.ibpam_rd;
-
-	if (flip) {
-		__swab32s(&rd->rd_key);
-		__swab32s(&rd->rd_nfrags);
-	}
-
-	n = rd->rd_nfrags;
-
-	nob = offsetof(struct kib_msg, ibm_u) +
-	      kiblnd_rd_msg_size(rd, msg->ibm_type, n);
-
-	if (msg->ibm_nob < nob) {
-		CERROR("Short %s: %d(%d)\n",
-		       kiblnd_msgtype2str(msg->ibm_type), msg->ibm_nob, nob);
-		return 1;
-	}
-
-	msg_size = kiblnd_rd_size(rd);
-	if (msg_size <= 0 || msg_size > LNET_MAX_PAYLOAD) {
-		CERROR("Bad msg_size: %d, should be 0 < n <= %d\n",
-		       msg_size, LNET_MAX_PAYLOAD);
-		return 1;
-	}
-
-	if (!flip)
-		return 0;
-
-	for (i = 0; i < n; i++) {
-		__swab32s(&rd->rd_frags[i].rf_nob);
-		__swab64s(&rd->rd_frags[i].rf_addr);
-	}
-
-	return 0;
-}
-
-void kiblnd_pack_msg(struct lnet_ni *ni, struct kib_msg *msg, int version,
-		     int credits, lnet_nid_t dstnid, __u64 dststamp)
-{
-	struct kib_net *net = ni->ni_data;
-
-	/*
-	 * CAVEAT EMPTOR! all message fields not set here should have been
-	 * initialised previously.
-	 */
-	msg->ibm_magic    = IBLND_MSG_MAGIC;
-	msg->ibm_version  = version;
-	/*   ibm_type */
-	msg->ibm_credits  = credits;
-	/*   ibm_nob */
-	msg->ibm_cksum    = 0;
-	msg->ibm_srcnid   = ni->ni_nid;
-	msg->ibm_srcstamp = net->ibn_incarnation;
-	msg->ibm_dstnid   = dstnid;
-	msg->ibm_dststamp = dststamp;
-
-	if (*kiblnd_tunables.kib_cksum) {
-		/* NB ibm_cksum zero while computing cksum */
-		msg->ibm_cksum = kiblnd_cksum(msg, msg->ibm_nob);
-	}
-}
-
-int kiblnd_unpack_msg(struct kib_msg *msg, int nob)
-{
-	const int hdr_size = offsetof(struct kib_msg, ibm_u);
-	__u32 msg_cksum;
-	__u16 version;
-	int msg_nob;
-	int flip;
-
-	/* 6 bytes are enough to have received magic + version */
-	if (nob < 6) {
-		CERROR("Short message: %d\n", nob);
-		return -EPROTO;
-	}
-
-	if (msg->ibm_magic == IBLND_MSG_MAGIC) {
-		flip = 0;
-	} else if (msg->ibm_magic == __swab32(IBLND_MSG_MAGIC)) {
-		flip = 1;
-	} else {
-		CERROR("Bad magic: %08x\n", msg->ibm_magic);
-		return -EPROTO;
-	}
-
-	version = flip ? __swab16(msg->ibm_version) : msg->ibm_version;
-	if (version != IBLND_MSG_VERSION &&
-	    version != IBLND_MSG_VERSION_1) {
-		CERROR("Bad version: %x\n", version);
-		return -EPROTO;
-	}
-
-	if (nob < hdr_size) {
-		CERROR("Short message: %d\n", nob);
-		return -EPROTO;
-	}
-
-	msg_nob = flip ? __swab32(msg->ibm_nob) : msg->ibm_nob;
-	if (msg_nob > nob) {
-		CERROR("Short message: got %d, wanted %d\n", nob, msg_nob);
-		return -EPROTO;
-	}
-
-	/*
-	 * checksum must be computed with ibm_cksum zero and BEFORE anything
-	 * gets flipped
-	 */
-	msg_cksum = flip ? __swab32(msg->ibm_cksum) : msg->ibm_cksum;
-	msg->ibm_cksum = 0;
-	if (msg_cksum &&
-	    msg_cksum != kiblnd_cksum(msg, msg_nob)) {
-		CERROR("Bad checksum\n");
-		return -EPROTO;
-	}
-
-	msg->ibm_cksum = msg_cksum;
-
-	if (flip) {
-		/* leave magic unflipped as a clue to peer endianness */
-		msg->ibm_version = version;
-		BUILD_BUG_ON(sizeof(msg->ibm_type) != 1);
-		BUILD_BUG_ON(sizeof(msg->ibm_credits) != 1);
-		msg->ibm_nob     = msg_nob;
-		__swab64s(&msg->ibm_srcnid);
-		__swab64s(&msg->ibm_srcstamp);
-		__swab64s(&msg->ibm_dstnid);
-		__swab64s(&msg->ibm_dststamp);
-	}
-
-	if (msg->ibm_srcnid == LNET_NID_ANY) {
-		CERROR("Bad src nid: %s\n", libcfs_nid2str(msg->ibm_srcnid));
-		return -EPROTO;
-	}
-
-	if (msg_nob < kiblnd_msgtype2size(msg->ibm_type)) {
-		CERROR("Short %s: %d(%d)\n", kiblnd_msgtype2str(msg->ibm_type),
-		       msg_nob, kiblnd_msgtype2size(msg->ibm_type));
-		return -EPROTO;
-	}
-
-	switch (msg->ibm_type) {
-	default:
-		CERROR("Unknown message type %x\n", msg->ibm_type);
-		return -EPROTO;
-
-	case IBLND_MSG_NOOP:
-	case IBLND_MSG_IMMEDIATE:
-	case IBLND_MSG_PUT_REQ:
-		break;
-
-	case IBLND_MSG_PUT_ACK:
-	case IBLND_MSG_GET_REQ:
-		if (kiblnd_unpack_rd(msg, flip))
-			return -EPROTO;
-		break;
-
-	case IBLND_MSG_PUT_NAK:
-	case IBLND_MSG_PUT_DONE:
-	case IBLND_MSG_GET_DONE:
-		if (flip)
-			__swab32s(&msg->ibm_u.completion.ibcm_status);
-		break;
-
-	case IBLND_MSG_CONNREQ:
-	case IBLND_MSG_CONNACK:
-		if (flip) {
-			__swab16s(&msg->ibm_u.connparams.ibcp_queue_depth);
-			__swab16s(&msg->ibm_u.connparams.ibcp_max_frags);
-			__swab32s(&msg->ibm_u.connparams.ibcp_max_msg_size);
-		}
-		break;
-	}
-	return 0;
-}
-
-int kiblnd_create_peer(struct lnet_ni *ni, struct kib_peer **peerp,
-		       lnet_nid_t nid)
-{
-	struct kib_peer *peer;
-	struct kib_net *net = ni->ni_data;
-	int cpt = lnet_cpt_of_nid(nid);
-	unsigned long flags;
-
-	LASSERT(net);
-	LASSERT(nid != LNET_NID_ANY);
-
-	peer = kzalloc_cpt(sizeof(*peer), GFP_NOFS, cpt);
-	if (!peer) {
-		CERROR("Cannot allocate peer\n");
-		return -ENOMEM;
-	}
-
-	peer->ibp_ni = ni;
-	peer->ibp_nid = nid;
-	peer->ibp_error = 0;
-	peer->ibp_last_alive = 0;
-	peer->ibp_max_frags = kiblnd_cfg_rdma_frags(peer->ibp_ni);
-	peer->ibp_queue_depth = ni->ni_peertxcredits;
-	atomic_set(&peer->ibp_refcount, 1);  /* 1 ref for caller */
-
-	INIT_LIST_HEAD(&peer->ibp_list);     /* not in the peer table yet */
-	INIT_LIST_HEAD(&peer->ibp_conns);
-	INIT_LIST_HEAD(&peer->ibp_tx_queue);
-
-	write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
-
-	/* always called with a ref on ni, which prevents ni being shutdown */
-	LASSERT(!net->ibn_shutdown);
-
-	/* npeers only grows with the global lock held */
-	atomic_inc(&net->ibn_npeers);
-
-	write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
-
-	*peerp = peer;
-	return 0;
-}
-
-void kiblnd_destroy_peer(struct kib_peer *peer)
-{
-	struct kib_net *net = peer->ibp_ni->ni_data;
-
-	LASSERT(net);
-	LASSERT(!atomic_read(&peer->ibp_refcount));
-	LASSERT(!kiblnd_peer_active(peer));
-	LASSERT(kiblnd_peer_idle(peer));
-	LASSERT(list_empty(&peer->ibp_tx_queue));
-
-	kfree(peer);
-
-	/*
-	 * NB a peer's connections keep a reference on their peer until
-	 * they are destroyed, so we can be assured that _all_ state to do
-	 * with this peer has been cleaned up when its refcount drops to
-	 * zero.
-	 */
-	atomic_dec(&net->ibn_npeers);
-}
-
-struct kib_peer *kiblnd_find_peer_locked(lnet_nid_t nid)
-{
-	/*
-	 * the caller is responsible for accounting the additional reference
-	 * that this creates
-	 */
-	struct list_head *peer_list = kiblnd_nid2peerlist(nid);
-	struct list_head *tmp;
-	struct kib_peer *peer;
-
-	list_for_each(tmp, peer_list) {
-		peer = list_entry(tmp, struct kib_peer, ibp_list);
-		LASSERT(!kiblnd_peer_idle(peer));
-
-		if (peer->ibp_nid != nid)
-			continue;
-
-		CDEBUG(D_NET, "got peer [%p] -> %s (%d) version: %x\n",
-		       peer, libcfs_nid2str(nid),
-		       atomic_read(&peer->ibp_refcount),
-		       peer->ibp_version);
-		return peer;
-	}
-	return NULL;
-}
-
-void kiblnd_unlink_peer_locked(struct kib_peer *peer)
-{
-	LASSERT(list_empty(&peer->ibp_conns));
-
-	LASSERT(kiblnd_peer_active(peer));
-	list_del_init(&peer->ibp_list);
-	/* lose peerlist's ref */
-	kiblnd_peer_decref(peer);
-}
-
-static int kiblnd_get_peer_info(struct lnet_ni *ni, int index,
-				lnet_nid_t *nidp, int *count)
-{
-	struct kib_peer *peer;
-	struct list_head *ptmp;
-	int i;
-	unsigned long flags;
-
-	read_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
-
-	for (i = 0; i < kiblnd_data.kib_peer_hash_size; i++) {
-		list_for_each(ptmp, &kiblnd_data.kib_peers[i]) {
-			peer = list_entry(ptmp, struct kib_peer, ibp_list);
-			LASSERT(!kiblnd_peer_idle(peer));
-
-			if (peer->ibp_ni != ni)
-				continue;
-
-			if (index-- > 0)
-				continue;
-
-			*nidp = peer->ibp_nid;
-			*count = atomic_read(&peer->ibp_refcount);
-
-			read_unlock_irqrestore(&kiblnd_data.kib_global_lock,
-					       flags);
-			return 0;
-		}
-	}
-
-	read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
-	return -ENOENT;
-}
-
-static void kiblnd_del_peer_locked(struct kib_peer *peer)
-{
-	struct list_head *ctmp;
-	struct list_head *cnxt;
-	struct kib_conn *conn;
-
-	if (list_empty(&peer->ibp_conns)) {
-		kiblnd_unlink_peer_locked(peer);
-	} else {
-		list_for_each_safe(ctmp, cnxt, &peer->ibp_conns) {
-			conn = list_entry(ctmp, struct kib_conn, ibc_list);
-
-			kiblnd_close_conn_locked(conn, 0);
-		}
-		/* NB closing peer's last conn unlinked it. */
-	}
-	/*
-	 * NB peer now unlinked; might even be freed if the peer table had the
-	 * last ref on it.
-	 */
-}
-
-static int kiblnd_del_peer(struct lnet_ni *ni, lnet_nid_t nid)
-{
-	LIST_HEAD(zombies);
-	struct list_head *ptmp;
-	struct list_head *pnxt;
-	struct kib_peer *peer;
-	int lo;
-	int hi;
-	int i;
-	unsigned long flags;
-	int rc = -ENOENT;
-
-	write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
-
-	if (nid != LNET_NID_ANY) {
-		lo = kiblnd_nid2peerlist(nid) - kiblnd_data.kib_peers;
-		hi = kiblnd_nid2peerlist(nid) - kiblnd_data.kib_peers;
-	} else {
-		lo = 0;
-		hi = kiblnd_data.kib_peer_hash_size - 1;
-	}
-
-	for (i = lo; i <= hi; i++) {
-		list_for_each_safe(ptmp, pnxt, &kiblnd_data.kib_peers[i]) {
-			peer = list_entry(ptmp, struct kib_peer, ibp_list);
-			LASSERT(!kiblnd_peer_idle(peer));
-
-			if (peer->ibp_ni != ni)
-				continue;
-
-			if (!(nid == LNET_NID_ANY || peer->ibp_nid == nid))
-				continue;
-
-			if (!list_empty(&peer->ibp_tx_queue)) {
-				LASSERT(list_empty(&peer->ibp_conns));
-
-				list_splice_init(&peer->ibp_tx_queue,
-						 &zombies);
-			}
-
-			kiblnd_del_peer_locked(peer);
-			rc = 0;	 /* matched something */
-		}
-	}
-
-	write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
-
-	kiblnd_txlist_done(ni, &zombies, -EIO);
-
-	return rc;
-}
-
-static struct kib_conn *kiblnd_get_conn_by_idx(struct lnet_ni *ni, int index)
-{
-	struct kib_peer *peer;
-	struct list_head *ptmp;
-	struct kib_conn *conn;
-	struct list_head *ctmp;
-	int i;
-	unsigned long flags;
-
-	read_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
-
-	for (i = 0; i < kiblnd_data.kib_peer_hash_size; i++) {
-		list_for_each(ptmp, &kiblnd_data.kib_peers[i]) {
-			peer = list_entry(ptmp, struct kib_peer, ibp_list);
-			LASSERT(!kiblnd_peer_idle(peer));
-
-			if (peer->ibp_ni != ni)
-				continue;
-
-			list_for_each(ctmp, &peer->ibp_conns) {
-				if (index-- > 0)
-					continue;
-
-				conn = list_entry(ctmp, struct kib_conn,
-						  ibc_list);
-				kiblnd_conn_addref(conn);
-				read_unlock_irqrestore(
-					&kiblnd_data.kib_global_lock,
-					flags);
-				return conn;
-			}
-		}
-	}
-
-	read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
-	return NULL;
-}
-
-int kiblnd_translate_mtu(int value)
-{
-	switch (value) {
-	default:
-		return -1;
-	case 0:
-		return 0;
-	case 256:
-		return IB_MTU_256;
-	case 512:
-		return IB_MTU_512;
-	case 1024:
-		return IB_MTU_1024;
-	case 2048:
-		return IB_MTU_2048;
-	case 4096:
-		return IB_MTU_4096;
-	}
-}
-
-static void kiblnd_setup_mtu_locked(struct rdma_cm_id *cmid)
-{
-	int mtu;
-
-	/* XXX There is no path record for iWARP, set by netdev->change_mtu? */
-	if (!cmid->route.path_rec)
-		return;
-
-	mtu = kiblnd_translate_mtu(*kiblnd_tunables.kib_ib_mtu);
-	LASSERT(mtu >= 0);
-	if (mtu)
-		cmid->route.path_rec->mtu = mtu;
-}
-
-static int kiblnd_get_completion_vector(struct kib_conn *conn, int cpt)
-{
-	cpumask_var_t *mask;
-	int vectors;
-	int off;
-	int i;
-	lnet_nid_t nid = conn->ibc_peer->ibp_nid;
-
-	vectors = conn->ibc_cmid->device->num_comp_vectors;
-	if (vectors <= 1)
-		return 0;
-
-	mask = cfs_cpt_cpumask(lnet_cpt_table(), cpt);
-	if (!mask)
-		return 0;
-
-	/* hash NID to CPU id in this partition... */
-	off = do_div(nid, cpumask_weight(*mask));
-	for_each_cpu(i, *mask) {
-		if (!off--)
-			return i % vectors;
-	}
-
-	LBUG();
-	return 1;
-}
-
-struct kib_conn *kiblnd_create_conn(struct kib_peer *peer, struct rdma_cm_id *cmid,
-				    int state, int version)
-{
-	/*
-	 * CAVEAT EMPTOR:
-	 * If the new conn is created successfully it takes over the caller's
-	 * ref on 'peer'.  It also "owns" 'cmid' and destroys it when it itself
-	 * is destroyed.  On failure, the caller's ref on 'peer' remains and
-	 * she must dispose of 'cmid'.  (Actually I'd block forever if I tried
-	 * to destroy 'cmid' here since I'm called from the CM which still has
-	 * its ref on 'cmid').
-	 */
-	rwlock_t *glock = &kiblnd_data.kib_global_lock;
-	struct kib_net *net = peer->ibp_ni->ni_data;
-	struct kib_dev *dev;
-	struct ib_qp_init_attr *init_qp_attr;
-	struct kib_sched_info *sched;
-	struct ib_cq_init_attr cq_attr = {};
-	struct kib_conn *conn;
-	struct ib_cq *cq;
-	unsigned long flags;
-	int cpt;
-	int rc;
-	int i;
-
-	LASSERT(net);
-	LASSERT(!in_interrupt());
-
-	dev = net->ibn_dev;
-
-	cpt = lnet_cpt_of_nid(peer->ibp_nid);
-	sched = kiblnd_data.kib_scheds[cpt];
-
-	LASSERT(sched->ibs_nthreads > 0);
-
-	init_qp_attr = kzalloc_cpt(sizeof(*init_qp_attr), GFP_NOFS, cpt);
-	if (!init_qp_attr) {
-		CERROR("Can't allocate qp_attr for %s\n",
-		       libcfs_nid2str(peer->ibp_nid));
-		goto failed_0;
-	}
-
-	conn = kzalloc_cpt(sizeof(*conn), GFP_NOFS, cpt);
-	if (!conn) {
-		CERROR("Can't allocate connection for %s\n",
-		       libcfs_nid2str(peer->ibp_nid));
-		goto failed_1;
-	}
-
-	conn->ibc_state = IBLND_CONN_INIT;
-	conn->ibc_version = version;
-	conn->ibc_peer = peer;		  /* I take the caller's ref */
-	cmid->context = conn;		   /* for future CM callbacks */
-	conn->ibc_cmid = cmid;
-	conn->ibc_max_frags = peer->ibp_max_frags;
-	conn->ibc_queue_depth = peer->ibp_queue_depth;
-
-	INIT_LIST_HEAD(&conn->ibc_early_rxs);
-	INIT_LIST_HEAD(&conn->ibc_tx_noops);
-	INIT_LIST_HEAD(&conn->ibc_tx_queue);
-	INIT_LIST_HEAD(&conn->ibc_tx_queue_rsrvd);
-	INIT_LIST_HEAD(&conn->ibc_tx_queue_nocred);
-	INIT_LIST_HEAD(&conn->ibc_active_txs);
-	spin_lock_init(&conn->ibc_lock);
-
-	conn->ibc_connvars = kzalloc_cpt(sizeof(*conn->ibc_connvars), GFP_NOFS, cpt);
-	if (!conn->ibc_connvars) {
-		CERROR("Can't allocate in-progress connection state\n");
-		goto failed_2;
-	}
-
-	write_lock_irqsave(glock, flags);
-	if (dev->ibd_failover) {
-		write_unlock_irqrestore(glock, flags);
-		CERROR("%s: failover in progress\n", dev->ibd_ifname);
-		goto failed_2;
-	}
-
-	if (dev->ibd_hdev->ibh_ibdev != cmid->device) {
-		/* wakeup failover thread and teardown connection */
-		if (kiblnd_dev_can_failover(dev)) {
-			list_add_tail(&dev->ibd_fail_list,
-				      &kiblnd_data.kib_failed_devs);
-			wake_up(&kiblnd_data.kib_failover_waitq);
-		}
-
-		write_unlock_irqrestore(glock, flags);
-		CERROR("cmid HCA(%s), kib_dev(%s) need failover\n",
-		       cmid->device->name, dev->ibd_ifname);
-		goto failed_2;
-	}
-
-	kiblnd_hdev_addref_locked(dev->ibd_hdev);
-	conn->ibc_hdev = dev->ibd_hdev;
-
-	kiblnd_setup_mtu_locked(cmid);
-
-	write_unlock_irqrestore(glock, flags);
-
-	conn->ibc_rxs = kzalloc_cpt(IBLND_RX_MSGS(conn) * sizeof(struct kib_rx),
-				    GFP_NOFS, cpt);
-	if (!conn->ibc_rxs) {
-		CERROR("Cannot allocate RX buffers\n");
-		goto failed_2;
-	}
-
-	rc = kiblnd_alloc_pages(&conn->ibc_rx_pages, cpt,
-				IBLND_RX_MSG_PAGES(conn));
-	if (rc)
-		goto failed_2;
-
-	kiblnd_map_rx_descs(conn);
-
-	cq_attr.cqe = IBLND_CQ_ENTRIES(conn);
-	cq_attr.comp_vector = kiblnd_get_completion_vector(conn, cpt);
-	cq = ib_create_cq(cmid->device,
-			  kiblnd_cq_completion, kiblnd_cq_event, conn,
-			  &cq_attr);
-	if (IS_ERR(cq)) {
-		CERROR("Failed to create CQ with %d CQEs: %ld\n",
-		       IBLND_CQ_ENTRIES(conn), PTR_ERR(cq));
-		goto failed_2;
-	}
-
-	conn->ibc_cq = cq;
-
-	rc = ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
-	if (rc) {
-		CERROR("Can't request completion notification: %d\n", rc);
-		goto failed_2;
-	}
-
-	init_qp_attr->event_handler = kiblnd_qp_event;
-	init_qp_attr->qp_context = conn;
-	init_qp_attr->cap.max_send_wr = IBLND_SEND_WRS(conn);
-	init_qp_attr->cap.max_recv_wr = IBLND_RECV_WRS(conn);
-	init_qp_attr->cap.max_send_sge = 1;
-	init_qp_attr->cap.max_recv_sge = 1;
-	init_qp_attr->sq_sig_type = IB_SIGNAL_REQ_WR;
-	init_qp_attr->qp_type = IB_QPT_RC;
-	init_qp_attr->send_cq = cq;
-	init_qp_attr->recv_cq = cq;
-
-	conn->ibc_sched = sched;
-
-	rc = rdma_create_qp(cmid, conn->ibc_hdev->ibh_pd, init_qp_attr);
-	if (rc) {
-		CERROR("Can't create QP: %d, send_wr: %d, recv_wr: %d\n",
-		       rc, init_qp_attr->cap.max_send_wr,
-		       init_qp_attr->cap.max_recv_wr);
-		goto failed_2;
-	}
-
-	kfree(init_qp_attr);
-
-	/* 1 ref for caller and each rxmsg */
-	atomic_set(&conn->ibc_refcount, 1 + IBLND_RX_MSGS(conn));
-	conn->ibc_nrx = IBLND_RX_MSGS(conn);
-
-	/* post receives */
-	for (i = 0; i < IBLND_RX_MSGS(conn); i++) {
-		rc = kiblnd_post_rx(&conn->ibc_rxs[i],
-				    IBLND_POSTRX_NO_CREDIT);
-		if (rc) {
-			CERROR("Can't post rxmsg: %d\n", rc);
-
-			/* Make posted receives complete */
-			kiblnd_abort_receives(conn);
-
-			/*
-			 * correct # of posted buffers
-			 * NB locking needed now I'm racing with completion
-			 */
-			spin_lock_irqsave(&sched->ibs_lock, flags);
-			conn->ibc_nrx -= IBLND_RX_MSGS(conn) - i;
-			spin_unlock_irqrestore(&sched->ibs_lock, flags);
-
-			/*
-			 * cmid will be destroyed by CM(ofed) after cm_callback
-			 * returned, so we can't refer it anymore
-			 * (by kiblnd_connd()->kiblnd_destroy_conn)
-			 */
-			rdma_destroy_qp(conn->ibc_cmid);
-			conn->ibc_cmid = NULL;
-
-			/* Drop my own and unused rxbuffer refcounts */
-			while (i++ <= IBLND_RX_MSGS(conn))
-				kiblnd_conn_decref(conn);
-
-			return NULL;
-		}
-	}
-
-	/* Init successful! */
-	LASSERT(state == IBLND_CONN_ACTIVE_CONNECT ||
-		state == IBLND_CONN_PASSIVE_WAIT);
-	conn->ibc_state = state;
-
-	/* 1 more conn */
-	atomic_inc(&net->ibn_nconns);
-	return conn;
-
- failed_2:
-	kiblnd_destroy_conn(conn);
-	kfree(conn);
- failed_1:
-	kfree(init_qp_attr);
- failed_0:
-	return NULL;
-}
-
-void kiblnd_destroy_conn(struct kib_conn *conn)
-{
-	struct rdma_cm_id *cmid = conn->ibc_cmid;
-	struct kib_peer *peer = conn->ibc_peer;
-	int rc;
-
-	LASSERT(!in_interrupt());
-	LASSERT(!atomic_read(&conn->ibc_refcount));
-	LASSERT(list_empty(&conn->ibc_early_rxs));
-	LASSERT(list_empty(&conn->ibc_tx_noops));
-	LASSERT(list_empty(&conn->ibc_tx_queue));
-	LASSERT(list_empty(&conn->ibc_tx_queue_rsrvd));
-	LASSERT(list_empty(&conn->ibc_tx_queue_nocred));
-	LASSERT(list_empty(&conn->ibc_active_txs));
-	LASSERT(!conn->ibc_noops_posted);
-	LASSERT(!conn->ibc_nsends_posted);
-
-	switch (conn->ibc_state) {
-	default:
-		/* conn must be completely disengaged from the network */
-		LBUG();
-
-	case IBLND_CONN_DISCONNECTED:
-		/* connvars should have been freed already */
-		LASSERT(!conn->ibc_connvars);
-		break;
-
-	case IBLND_CONN_INIT:
-		break;
-	}
-
-	/* conn->ibc_cmid might be destroyed by CM already */
-	if (cmid && cmid->qp)
-		rdma_destroy_qp(cmid);
-
-	if (conn->ibc_cq) {
-		rc = ib_destroy_cq(conn->ibc_cq);
-		if (rc)
-			CWARN("Error destroying CQ: %d\n", rc);
-	}
-
-	if (conn->ibc_rx_pages)
-		kiblnd_unmap_rx_descs(conn);
-
-	kfree(conn->ibc_rxs);
-	kfree(conn->ibc_connvars);
-
-	if (conn->ibc_hdev)
-		kiblnd_hdev_decref(conn->ibc_hdev);
-
-	/* See CAVEAT EMPTOR above in kiblnd_create_conn */
-	if (conn->ibc_state != IBLND_CONN_INIT) {
-		struct kib_net *net = peer->ibp_ni->ni_data;
-
-		kiblnd_peer_decref(peer);
-		rdma_destroy_id(cmid);
-		atomic_dec(&net->ibn_nconns);
-	}
-}
-
-int kiblnd_close_peer_conns_locked(struct kib_peer *peer, int why)
-{
-	struct kib_conn *conn;
-	struct list_head *ctmp;
-	struct list_head *cnxt;
-	int count = 0;
-
-	list_for_each_safe(ctmp, cnxt, &peer->ibp_conns) {
-		conn = list_entry(ctmp, struct kib_conn, ibc_list);
-
-		CDEBUG(D_NET, "Closing conn -> %s, version: %x, reason: %d\n",
-		       libcfs_nid2str(peer->ibp_nid),
-		       conn->ibc_version, why);
-
-		kiblnd_close_conn_locked(conn, why);
-		count++;
-	}
-
-	return count;
-}
-
-int kiblnd_close_stale_conns_locked(struct kib_peer *peer,
-				    int version, __u64 incarnation)
-{
-	struct kib_conn *conn;
-	struct list_head *ctmp;
-	struct list_head *cnxt;
-	int count = 0;
-
-	list_for_each_safe(ctmp, cnxt, &peer->ibp_conns) {
-		conn = list_entry(ctmp, struct kib_conn, ibc_list);
-
-		if (conn->ibc_version     == version &&
-		    conn->ibc_incarnation == incarnation)
-			continue;
-
-		CDEBUG(D_NET,
-		       "Closing stale conn -> %s version: %x, incarnation:%#llx(%x, %#llx)\n",
-		       libcfs_nid2str(peer->ibp_nid),
-		       conn->ibc_version, conn->ibc_incarnation,
-		       version, incarnation);
-
-		kiblnd_close_conn_locked(conn, -ESTALE);
-		count++;
-	}
-
-	return count;
-}
-
-static int kiblnd_close_matching_conns(struct lnet_ni *ni, lnet_nid_t nid)
-{
-	struct kib_peer *peer;
-	struct list_head *ptmp;
-	struct list_head *pnxt;
-	int lo;
-	int hi;
-	int i;
-	unsigned long flags;
-	int count = 0;
-
-	write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
-
-	if (nid != LNET_NID_ANY) {
-		lo = kiblnd_nid2peerlist(nid) - kiblnd_data.kib_peers;
-		hi = kiblnd_nid2peerlist(nid) - kiblnd_data.kib_peers;
-	} else {
-		lo = 0;
-		hi = kiblnd_data.kib_peer_hash_size - 1;
-	}
-
-	for (i = lo; i <= hi; i++) {
-		list_for_each_safe(ptmp, pnxt, &kiblnd_data.kib_peers[i]) {
-			peer = list_entry(ptmp, struct kib_peer, ibp_list);
-			LASSERT(!kiblnd_peer_idle(peer));
-
-			if (peer->ibp_ni != ni)
-				continue;
-
-			if (!(nid == LNET_NID_ANY || nid == peer->ibp_nid))
-				continue;
-
-			count += kiblnd_close_peer_conns_locked(peer, 0);
-		}
-	}
-
-	write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
-
-	/* wildcards always succeed */
-	if (nid == LNET_NID_ANY)
-		return 0;
-
-	return !count ? -ENOENT : 0;
-}
-
-static int kiblnd_ctl(struct lnet_ni *ni, unsigned int cmd, void *arg)
-{
-	struct libcfs_ioctl_data *data = arg;
-	int rc = -EINVAL;
-
-	switch (cmd) {
-	case IOC_LIBCFS_GET_PEER: {
-		lnet_nid_t nid = 0;
-		int count = 0;
-
-		rc = kiblnd_get_peer_info(ni, data->ioc_count,
-					  &nid, &count);
-		data->ioc_nid   = nid;
-		data->ioc_count = count;
-		break;
-	}
-
-	case IOC_LIBCFS_DEL_PEER: {
-		rc = kiblnd_del_peer(ni, data->ioc_nid);
-		break;
-	}
-	case IOC_LIBCFS_GET_CONN: {
-		struct kib_conn *conn;
-
-		rc = 0;
-		conn = kiblnd_get_conn_by_idx(ni, data->ioc_count);
-		if (!conn) {
-			rc = -ENOENT;
-			break;
-		}
-
-		LASSERT(conn->ibc_cmid);
-		data->ioc_nid = conn->ibc_peer->ibp_nid;
-		if (!conn->ibc_cmid->route.path_rec)
-			data->ioc_u32[0] = 0; /* iWarp has no path MTU */
-		else
-			data->ioc_u32[0] =
-			ib_mtu_enum_to_int(conn->ibc_cmid->route.path_rec->mtu);
-		kiblnd_conn_decref(conn);
-		break;
-	}
-	case IOC_LIBCFS_CLOSE_CONNECTION: {
-		rc = kiblnd_close_matching_conns(ni, data->ioc_nid);
-		break;
-	}
-
-	default:
-		break;
-	}
-
-	return rc;
-}
-
-static void kiblnd_query(struct lnet_ni *ni, lnet_nid_t nid,
-			 unsigned long *when)
-{
-	unsigned long last_alive = 0;
-	unsigned long now = jiffies;
-	rwlock_t *glock = &kiblnd_data.kib_global_lock;
-	struct kib_peer *peer;
-	unsigned long flags;
-
-	read_lock_irqsave(glock, flags);
-
-	peer = kiblnd_find_peer_locked(nid);
-	if (peer)
-		last_alive = peer->ibp_last_alive;
-
-	read_unlock_irqrestore(glock, flags);
-
-	if (last_alive)
-		*when = last_alive;
-
-	/*
-	 * peer is not persistent in hash, trigger peer creation
-	 * and connection establishment with a NULL tx
-	 */
-	if (!peer)
-		kiblnd_launch_tx(ni, NULL, nid);
-
-	CDEBUG(D_NET, "Peer %s %p, alive %ld secs ago\n",
-	       libcfs_nid2str(nid), peer,
-	       last_alive ? (now - last_alive) / HZ : -1);
-}
-
-static void kiblnd_free_pages(struct kib_pages *p)
-{
-	int npages = p->ibp_npages;
-	int i;
-
-	for (i = 0; i < npages; i++) {
-		if (p->ibp_pages[i])
-			__free_page(p->ibp_pages[i]);
-	}
-
-	kfree(p);
-}
-
-int kiblnd_alloc_pages(struct kib_pages **pp, int cpt, int npages)
-{
-	struct kib_pages *p;
-	int i;
-
-	p = kzalloc_cpt(offsetof(struct kib_pages, ibp_pages[npages]),
-			GFP_NOFS, cpt);
-	if (!p) {
-		CERROR("Can't allocate descriptor for %d pages\n", npages);
-		return -ENOMEM;
-	}
-
-	p->ibp_npages = npages;
-
-	for (i = 0; i < npages; i++) {
-		p->ibp_pages[i] = alloc_pages_node(
-				    cfs_cpt_spread_node(lnet_cpt_table(), cpt),
-				    GFP_NOFS, 0);
-		if (!p->ibp_pages[i]) {
-			CERROR("Can't allocate page %d of %d\n", i, npages);
-			kiblnd_free_pages(p);
-			return -ENOMEM;
-		}
-	}
-
-	*pp = p;
-	return 0;
-}
-
-void kiblnd_unmap_rx_descs(struct kib_conn *conn)
-{
-	struct kib_rx *rx;
-	int i;
-
-	LASSERT(conn->ibc_rxs);
-	LASSERT(conn->ibc_hdev);
-
-	for (i = 0; i < IBLND_RX_MSGS(conn); i++) {
-		rx = &conn->ibc_rxs[i];
-
-		LASSERT(rx->rx_nob >= 0); /* not posted */
-
-		kiblnd_dma_unmap_single(conn->ibc_hdev->ibh_ibdev,
-					KIBLND_UNMAP_ADDR(rx, rx_msgunmap,
-							  rx->rx_msgaddr),
-					IBLND_MSG_SIZE, DMA_FROM_DEVICE);
-	}
-
-	kiblnd_free_pages(conn->ibc_rx_pages);
-
-	conn->ibc_rx_pages = NULL;
-}
-
-void kiblnd_map_rx_descs(struct kib_conn *conn)
-{
-	struct kib_rx *rx;
-	struct page *pg;
-	int pg_off;
-	int ipg;
-	int i;
-
-	for (pg_off = ipg = i = 0; i < IBLND_RX_MSGS(conn); i++) {
-		pg = conn->ibc_rx_pages->ibp_pages[ipg];
-		rx = &conn->ibc_rxs[i];
-
-		rx->rx_conn = conn;
-		rx->rx_msg = (struct kib_msg *)(((char *)page_address(pg)) + pg_off);
-
-		rx->rx_msgaddr = kiblnd_dma_map_single(conn->ibc_hdev->ibh_ibdev,
-						       rx->rx_msg,
-						       IBLND_MSG_SIZE,
-						       DMA_FROM_DEVICE);
-		LASSERT(!kiblnd_dma_mapping_error(conn->ibc_hdev->ibh_ibdev,
-						  rx->rx_msgaddr));
-		KIBLND_UNMAP_ADDR_SET(rx, rx_msgunmap, rx->rx_msgaddr);
-
-		CDEBUG(D_NET, "rx %d: %p %#llx(%#llx)\n",
-		       i, rx->rx_msg, rx->rx_msgaddr,
-		       (__u64)(page_to_phys(pg) + pg_off));
-
-		pg_off += IBLND_MSG_SIZE;
-		LASSERT(pg_off <= PAGE_SIZE);
-
-		if (pg_off == PAGE_SIZE) {
-			pg_off = 0;
-			ipg++;
-			LASSERT(ipg <= IBLND_RX_MSG_PAGES(conn));
-		}
-	}
-}
-
-static void kiblnd_unmap_tx_pool(struct kib_tx_pool *tpo)
-{
-	struct kib_hca_dev *hdev = tpo->tpo_hdev;
-	struct kib_tx *tx;
-	int i;
-
-	LASSERT(!tpo->tpo_pool.po_allocated);
-
-	if (!hdev)
-		return;
-
-	for (i = 0; i < tpo->tpo_pool.po_size; i++) {
-		tx = &tpo->tpo_tx_descs[i];
-		kiblnd_dma_unmap_single(hdev->ibh_ibdev,
-					KIBLND_UNMAP_ADDR(tx, tx_msgunmap,
-							  tx->tx_msgaddr),
-					IBLND_MSG_SIZE, DMA_TO_DEVICE);
-	}
-
-	kiblnd_hdev_decref(hdev);
-	tpo->tpo_hdev = NULL;
-}
-
-static struct kib_hca_dev *kiblnd_current_hdev(struct kib_dev *dev)
-{
-	struct kib_hca_dev *hdev;
-	unsigned long flags;
-	int i = 0;
-
-	read_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
-	while (dev->ibd_failover) {
-		read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
-		if (!(i++ % 50))
-			CDEBUG(D_NET, "%s: Wait for failover\n",
-			       dev->ibd_ifname);
-		set_current_state(TASK_INTERRUPTIBLE);
-		schedule_timeout(HZ / 100);
-
-		read_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
-	}
-
-	kiblnd_hdev_addref_locked(dev->ibd_hdev);
-	hdev = dev->ibd_hdev;
-
-	read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
-
-	return hdev;
-}
-
-static void kiblnd_map_tx_pool(struct kib_tx_pool *tpo)
-{
-	struct kib_pages *txpgs = tpo->tpo_tx_pages;
-	struct kib_pool *pool = &tpo->tpo_pool;
-	struct kib_net *net = pool->po_owner->ps_net;
-	struct kib_dev *dev;
-	struct page *page;
-	struct kib_tx *tx;
-	int page_offset;
-	int ipage;
-	int i;
-
-	LASSERT(net);
-
-	dev = net->ibn_dev;
-
-	/* pre-mapped messages are not bigger than 1 page */
-	BUILD_BUG_ON(IBLND_MSG_SIZE > PAGE_SIZE);
-
-	/* No fancy arithmetic when we do the buffer calculations */
-	BUILD_BUG_ON(PAGE_SIZE % IBLND_MSG_SIZE);
-
-	tpo->tpo_hdev = kiblnd_current_hdev(dev);
-
-	for (ipage = page_offset = i = 0; i < pool->po_size; i++) {
-		page = txpgs->ibp_pages[ipage];
-		tx = &tpo->tpo_tx_descs[i];
-
-		tx->tx_msg = (struct kib_msg *)(((char *)page_address(page)) +
-					   page_offset);
-
-		tx->tx_msgaddr = kiblnd_dma_map_single(
-			tpo->tpo_hdev->ibh_ibdev, tx->tx_msg,
-			IBLND_MSG_SIZE, DMA_TO_DEVICE);
-		LASSERT(!kiblnd_dma_mapping_error(tpo->tpo_hdev->ibh_ibdev,
-						  tx->tx_msgaddr));
-		KIBLND_UNMAP_ADDR_SET(tx, tx_msgunmap, tx->tx_msgaddr);
-
-		list_add(&tx->tx_list, &pool->po_free_list);
-
-		page_offset += IBLND_MSG_SIZE;
-		LASSERT(page_offset <= PAGE_SIZE);
-
-		if (page_offset == PAGE_SIZE) {
-			page_offset = 0;
-			ipage++;
-			LASSERT(ipage <= txpgs->ibp_npages);
-		}
-	}
-}
-
-static void kiblnd_destroy_fmr_pool(struct kib_fmr_pool *fpo)
-{
-	LASSERT(!fpo->fpo_map_count);
-
-	if (fpo->fpo_is_fmr) {
-		if (fpo->fmr.fpo_fmr_pool)
-			ib_destroy_fmr_pool(fpo->fmr.fpo_fmr_pool);
-	} else {
-		struct kib_fast_reg_descriptor *frd, *tmp;
-		int i = 0;
-
-		list_for_each_entry_safe(frd, tmp, &fpo->fast_reg.fpo_pool_list,
-					 frd_list) {
-			list_del(&frd->frd_list);
-			ib_dereg_mr(frd->frd_mr);
-			kfree(frd);
-			i++;
-		}
-		if (i < fpo->fast_reg.fpo_pool_size)
-			CERROR("FastReg pool still has %d regions registered\n",
-			       fpo->fast_reg.fpo_pool_size - i);
-	}
-
-	if (fpo->fpo_hdev)
-		kiblnd_hdev_decref(fpo->fpo_hdev);
-
-	kfree(fpo);
-}
-
-static void kiblnd_destroy_fmr_pool_list(struct list_head *head)
-{
-	struct kib_fmr_pool *fpo, *tmp;
-
-	list_for_each_entry_safe(fpo, tmp, head, fpo_list) {
-		list_del(&fpo->fpo_list);
-		kiblnd_destroy_fmr_pool(fpo);
-	}
-}
-
-static int
-kiblnd_fmr_pool_size(struct lnet_ioctl_config_o2iblnd_tunables *tunables,
-		     int ncpts)
-{
-	int size = tunables->lnd_fmr_pool_size / ncpts;
-
-	return max(IBLND_FMR_POOL, size);
-}
-
-static int
-kiblnd_fmr_flush_trigger(struct lnet_ioctl_config_o2iblnd_tunables *tunables,
-			 int ncpts)
-{
-	int size = tunables->lnd_fmr_flush_trigger / ncpts;
-
-	return max(IBLND_FMR_POOL_FLUSH, size);
-}
-
-static int kiblnd_alloc_fmr_pool(struct kib_fmr_poolset *fps, struct kib_fmr_pool *fpo)
-{
-	struct ib_fmr_pool_param param = {
-		.max_pages_per_fmr = LNET_MAX_PAYLOAD / PAGE_SIZE,
-		.page_shift        = PAGE_SHIFT,
-		.access            = (IB_ACCESS_LOCAL_WRITE |
-				      IB_ACCESS_REMOTE_WRITE),
-		.pool_size         = fps->fps_pool_size,
-		.dirty_watermark   = fps->fps_flush_trigger,
-		.flush_function    = NULL,
-		.flush_arg         = NULL,
-		.cache             = !!fps->fps_cache };
-	int rc = 0;
-
-	fpo->fmr.fpo_fmr_pool = ib_create_fmr_pool(fpo->fpo_hdev->ibh_pd,
-						   &param);
-	if (IS_ERR(fpo->fmr.fpo_fmr_pool)) {
-		rc = PTR_ERR(fpo->fmr.fpo_fmr_pool);
-		if (rc != -ENOSYS)
-			CERROR("Failed to create FMR pool: %d\n", rc);
-		else
-			CERROR("FMRs are not supported\n");
-	}
-
-	return rc;
-}
-
-static int kiblnd_alloc_freg_pool(struct kib_fmr_poolset *fps, struct kib_fmr_pool *fpo)
-{
-	struct kib_fast_reg_descriptor *frd, *tmp;
-	int i, rc;
-
-	INIT_LIST_HEAD(&fpo->fast_reg.fpo_pool_list);
-	fpo->fast_reg.fpo_pool_size = 0;
-	for (i = 0; i < fps->fps_pool_size; i++) {
-		frd = kzalloc_cpt(sizeof(*frd), GFP_NOFS, fps->fps_cpt);
-		if (!frd) {
-			CERROR("Failed to allocate a new fast_reg descriptor\n");
-			rc = -ENOMEM;
-			goto out;
-		}
-
-		frd->frd_mr = ib_alloc_mr(fpo->fpo_hdev->ibh_pd,
-					  IB_MR_TYPE_MEM_REG,
-					  LNET_MAX_PAYLOAD / PAGE_SIZE);
-		if (IS_ERR(frd->frd_mr)) {
-			rc = PTR_ERR(frd->frd_mr);
-			CERROR("Failed to allocate ib_alloc_mr: %d\n", rc);
-			frd->frd_mr = NULL;
-			goto out_middle;
-		}
-
-		frd->frd_valid = true;
-
-		list_add_tail(&frd->frd_list, &fpo->fast_reg.fpo_pool_list);
-		fpo->fast_reg.fpo_pool_size++;
-	}
-
-	return 0;
-
-out_middle:
-	if (frd->frd_mr)
-		ib_dereg_mr(frd->frd_mr);
-	kfree(frd);
-
-out:
-	list_for_each_entry_safe(frd, tmp, &fpo->fast_reg.fpo_pool_list,
-				 frd_list) {
-		list_del(&frd->frd_list);
-		ib_dereg_mr(frd->frd_mr);
-		kfree(frd);
-	}
-
-	return rc;
-}
-
-static int kiblnd_create_fmr_pool(struct kib_fmr_poolset *fps,
-				  struct kib_fmr_pool **pp_fpo)
-{
-	struct kib_dev *dev = fps->fps_net->ibn_dev;
-	struct ib_device_attr *dev_attr;
-	struct kib_fmr_pool *fpo;
-	int rc;
-
-	fpo = kzalloc_cpt(sizeof(*fpo), GFP_NOFS, fps->fps_cpt);
-	if (!fpo)
-		return -ENOMEM;
-
-	fpo->fpo_hdev = kiblnd_current_hdev(dev);
-	dev_attr = &fpo->fpo_hdev->ibh_ibdev->attrs;
-
-	/* Check for FMR or FastReg support */
-	fpo->fpo_is_fmr = 0;
-	if (fpo->fpo_hdev->ibh_ibdev->alloc_fmr &&
-	    fpo->fpo_hdev->ibh_ibdev->dealloc_fmr &&
-	    fpo->fpo_hdev->ibh_ibdev->map_phys_fmr &&
-	    fpo->fpo_hdev->ibh_ibdev->unmap_fmr) {
-		LCONSOLE_INFO("Using FMR for registration\n");
-		fpo->fpo_is_fmr = 1;
-	} else if (dev_attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) {
-		LCONSOLE_INFO("Using FastReg for registration\n");
-	} else {
-		rc = -ENOSYS;
-		LCONSOLE_ERROR_MSG(rc, "IB device does not support FMRs nor FastRegs, can't register memory\n");
-		goto out_fpo;
-	}
-
-	if (fpo->fpo_is_fmr)
-		rc = kiblnd_alloc_fmr_pool(fps, fpo);
-	else
-		rc = kiblnd_alloc_freg_pool(fps, fpo);
-	if (rc)
-		goto out_fpo;
-
-	fpo->fpo_deadline = jiffies + IBLND_POOL_DEADLINE * HZ;
-	fpo->fpo_owner = fps;
-	*pp_fpo = fpo;
-
-	return 0;
-
-out_fpo:
-	kiblnd_hdev_decref(fpo->fpo_hdev);
-	kfree(fpo);
-	return rc;
-}
-
-static void kiblnd_fail_fmr_poolset(struct kib_fmr_poolset *fps,
-				    struct list_head *zombies)
-{
-	if (!fps->fps_net) /* initialized? */
-		return;
-
-	spin_lock(&fps->fps_lock);
-
-	while (!list_empty(&fps->fps_pool_list)) {
-		struct kib_fmr_pool *fpo = list_entry(fps->fps_pool_list.next,
-						 struct kib_fmr_pool, fpo_list);
-		fpo->fpo_failed = 1;
-		list_del(&fpo->fpo_list);
-		if (!fpo->fpo_map_count)
-			list_add(&fpo->fpo_list, zombies);
-		else
-			list_add(&fpo->fpo_list, &fps->fps_failed_pool_list);
-	}
-
-	spin_unlock(&fps->fps_lock);
-}
-
-static void kiblnd_fini_fmr_poolset(struct kib_fmr_poolset *fps)
-{
-	if (fps->fps_net) { /* initialized? */
-		kiblnd_destroy_fmr_pool_list(&fps->fps_failed_pool_list);
-		kiblnd_destroy_fmr_pool_list(&fps->fps_pool_list);
-	}
-}
-
-static int
-kiblnd_init_fmr_poolset(struct kib_fmr_poolset *fps, int cpt, int ncpts,
-			struct kib_net *net,
-			struct lnet_ioctl_config_o2iblnd_tunables *tunables)
-{
-	struct kib_fmr_pool *fpo;
-	int rc;
-
-	memset(fps, 0, sizeof(*fps));
-
-	fps->fps_net = net;
-	fps->fps_cpt = cpt;
-
-	fps->fps_pool_size = kiblnd_fmr_pool_size(tunables, ncpts);
-	fps->fps_flush_trigger = kiblnd_fmr_flush_trigger(tunables, ncpts);
-	fps->fps_cache = tunables->lnd_fmr_cache;
-
-	spin_lock_init(&fps->fps_lock);
-	INIT_LIST_HEAD(&fps->fps_pool_list);
-	INIT_LIST_HEAD(&fps->fps_failed_pool_list);
-
-	rc = kiblnd_create_fmr_pool(fps, &fpo);
-	if (!rc)
-		list_add_tail(&fpo->fpo_list, &fps->fps_pool_list);
-
-	return rc;
-}
-
-static int kiblnd_fmr_pool_is_idle(struct kib_fmr_pool *fpo, unsigned long now)
-{
-	if (fpo->fpo_map_count) /* still in use */
-		return 0;
-	if (fpo->fpo_failed)
-		return 1;
-	return time_after_eq(now, fpo->fpo_deadline);
-}
-
-static int
-kiblnd_map_tx_pages(struct kib_tx *tx, struct kib_rdma_desc *rd)
-{
-	__u64 *pages = tx->tx_pages;
-	struct kib_hca_dev *hdev;
-	int npages;
-	int size;
-	int i;
-
-	hdev = tx->tx_pool->tpo_hdev;
-
-	for (i = 0, npages = 0; i < rd->rd_nfrags; i++) {
-		for (size = 0; size <  rd->rd_frags[i].rf_nob;
-		     size += hdev->ibh_page_size) {
-			pages[npages++] = (rd->rd_frags[i].rf_addr &
-					   hdev->ibh_page_mask) + size;
-		}
-	}
-
-	return npages;
-}
-
-void kiblnd_fmr_pool_unmap(struct kib_fmr *fmr, int status)
-{
-	LIST_HEAD(zombies);
-	struct kib_fmr_pool *fpo = fmr->fmr_pool;
-	struct kib_fmr_poolset *fps;
-	unsigned long now = jiffies;
-	struct kib_fmr_pool *tmp;
-	int rc;
-
-	if (!fpo)
-		return;
-
-	fps = fpo->fpo_owner;
-	if (fpo->fpo_is_fmr) {
-		if (fmr->fmr_pfmr) {
-			rc = ib_fmr_pool_unmap(fmr->fmr_pfmr);
-			LASSERT(!rc);
-			fmr->fmr_pfmr = NULL;
-		}
-
-		if (status) {
-			rc = ib_flush_fmr_pool(fpo->fmr.fpo_fmr_pool);
-			LASSERT(!rc);
-		}
-	} else {
-		struct kib_fast_reg_descriptor *frd = fmr->fmr_frd;
-
-		if (frd) {
-			frd->frd_valid = false;
-			spin_lock(&fps->fps_lock);
-			list_add_tail(&frd->frd_list, &fpo->fast_reg.fpo_pool_list);
-			spin_unlock(&fps->fps_lock);
-			fmr->fmr_frd = NULL;
-		}
-	}
-	fmr->fmr_pool = NULL;
-
-	spin_lock(&fps->fps_lock);
-	fpo->fpo_map_count--;  /* decref the pool */
-
-	list_for_each_entry_safe(fpo, tmp, &fps->fps_pool_list, fpo_list) {
-		/* the first pool is persistent */
-		if (fps->fps_pool_list.next == &fpo->fpo_list)
-			continue;
-
-		if (kiblnd_fmr_pool_is_idle(fpo, now)) {
-			list_move(&fpo->fpo_list, &zombies);
-			fps->fps_version++;
-		}
-	}
-	spin_unlock(&fps->fps_lock);
-
-	if (!list_empty(&zombies))
-		kiblnd_destroy_fmr_pool_list(&zombies);
-}
-
-int kiblnd_fmr_pool_map(struct kib_fmr_poolset *fps, struct kib_tx *tx,
-			struct kib_rdma_desc *rd, __u32 nob, __u64 iov,
-			struct kib_fmr *fmr)
-{
-	__u64 *pages = tx->tx_pages;
-	bool is_rx = (rd != tx->tx_rd);
-	bool tx_pages_mapped = false;
-	struct kib_fmr_pool *fpo;
-	int npages = 0;
-	__u64 version;
-	int rc;
-
- again:
-	spin_lock(&fps->fps_lock);
-	version = fps->fps_version;
-	list_for_each_entry(fpo, &fps->fps_pool_list, fpo_list) {
-		fpo->fpo_deadline = jiffies + IBLND_POOL_DEADLINE * HZ;
-		fpo->fpo_map_count++;
-
-		if (fpo->fpo_is_fmr) {
-			struct ib_pool_fmr *pfmr;
-
-			spin_unlock(&fps->fps_lock);
-
-			if (!tx_pages_mapped) {
-				npages = kiblnd_map_tx_pages(tx, rd);
-				tx_pages_mapped = 1;
-			}
-
-			pfmr = ib_fmr_pool_map_phys(fpo->fmr.fpo_fmr_pool,
-						    pages, npages, iov);
-			if (likely(!IS_ERR(pfmr))) {
-				fmr->fmr_key = is_rx ? pfmr->fmr->rkey :
-						       pfmr->fmr->lkey;
-				fmr->fmr_frd = NULL;
-				fmr->fmr_pfmr = pfmr;
-				fmr->fmr_pool = fpo;
-				return 0;
-			}
-			rc = PTR_ERR(pfmr);
-		} else {
-			if (!list_empty(&fpo->fast_reg.fpo_pool_list)) {
-				struct kib_fast_reg_descriptor *frd;
-				struct ib_reg_wr *wr;
-				struct ib_mr *mr;
-				int n;
-
-				frd = list_first_entry(&fpo->fast_reg.fpo_pool_list,
-						       struct kib_fast_reg_descriptor,
-						       frd_list);
-				list_del(&frd->frd_list);
-				spin_unlock(&fps->fps_lock);
-
-				mr = frd->frd_mr;
-
-				if (!frd->frd_valid) {
-					__u32 key = is_rx ? mr->rkey : mr->lkey;
-					struct ib_send_wr *inv_wr;
-
-					inv_wr = &frd->frd_inv_wr;
-					memset(inv_wr, 0, sizeof(*inv_wr));
-					inv_wr->opcode = IB_WR_LOCAL_INV;
-					inv_wr->wr_id = IBLND_WID_MR;
-					inv_wr->ex.invalidate_rkey = key;
-
-					/* Bump the key */
-					key = ib_inc_rkey(key);
-					ib_update_fast_reg_key(mr, key);
-				}
-
-				n = ib_map_mr_sg(mr, tx->tx_frags,
-						 tx->tx_nfrags, NULL, PAGE_SIZE);
-				if (unlikely(n != tx->tx_nfrags)) {
-					CERROR("Failed to map mr %d/%d elements\n",
-					       n, tx->tx_nfrags);
-					return n < 0 ? n : -EINVAL;
-				}
-
-				mr->iova = iov;
-
-				/* Prepare FastReg WR */
-				wr = &frd->frd_fastreg_wr;
-				memset(wr, 0, sizeof(*wr));
-				wr->wr.opcode = IB_WR_REG_MR;
-				wr->wr.wr_id = IBLND_WID_MR;
-				wr->wr.num_sge = 0;
-				wr->wr.send_flags = 0;
-				wr->mr = mr;
-				wr->key = is_rx ? mr->rkey : mr->lkey;
-				wr->access = (IB_ACCESS_LOCAL_WRITE |
-					      IB_ACCESS_REMOTE_WRITE);
-
-				fmr->fmr_key = is_rx ? mr->rkey : mr->lkey;
-				fmr->fmr_frd = frd;
-				fmr->fmr_pfmr = NULL;
-				fmr->fmr_pool = fpo;
-				return 0;
-			}
-			spin_unlock(&fps->fps_lock);
-			rc = -EAGAIN;
-		}
-
-		spin_lock(&fps->fps_lock);
-		fpo->fpo_map_count--;
-		if (rc != -EAGAIN) {
-			spin_unlock(&fps->fps_lock);
-			return rc;
-		}
-
-		/* EAGAIN and ... */
-		if (version != fps->fps_version) {
-			spin_unlock(&fps->fps_lock);
-			goto again;
-		}
-	}
-
-	if (fps->fps_increasing) {
-		spin_unlock(&fps->fps_lock);
-		CDEBUG(D_NET, "Another thread is allocating new FMR pool, waiting for her to complete\n");
-		schedule();
-		goto again;
-	}
-
-	if (time_before(jiffies, fps->fps_next_retry)) {
-		/* someone failed recently */
-		spin_unlock(&fps->fps_lock);
-		return -EAGAIN;
-	}
-
-	fps->fps_increasing = 1;
-	spin_unlock(&fps->fps_lock);
-
-	CDEBUG(D_NET, "Allocate new FMR pool\n");
-	rc = kiblnd_create_fmr_pool(fps, &fpo);
-	spin_lock(&fps->fps_lock);
-	fps->fps_increasing = 0;
-	if (!rc) {
-		fps->fps_version++;
-		list_add_tail(&fpo->fpo_list, &fps->fps_pool_list);
-	} else {
-		fps->fps_next_retry = jiffies + IBLND_POOL_RETRY * HZ;
-	}
-	spin_unlock(&fps->fps_lock);
-
-	goto again;
-}
-
-static void kiblnd_fini_pool(struct kib_pool *pool)
-{
-	LASSERT(list_empty(&pool->po_free_list));
-	LASSERT(!pool->po_allocated);
-
-	CDEBUG(D_NET, "Finalize %s pool\n", pool->po_owner->ps_name);
-}
-
-static void kiblnd_init_pool(struct kib_poolset *ps, struct kib_pool *pool, int size)
-{
-	CDEBUG(D_NET, "Initialize %s pool\n", ps->ps_name);
-
-	memset(pool, 0, sizeof(*pool));
-	INIT_LIST_HEAD(&pool->po_free_list);
-	pool->po_deadline = jiffies + IBLND_POOL_DEADLINE * HZ;
-	pool->po_owner    = ps;
-	pool->po_size     = size;
-}
-
-static void kiblnd_destroy_pool_list(struct list_head *head)
-{
-	struct kib_pool *pool;
-
-	while (!list_empty(head)) {
-		pool = list_entry(head->next, struct kib_pool, po_list);
-		list_del(&pool->po_list);
-
-		LASSERT(pool->po_owner);
-		pool->po_owner->ps_pool_destroy(pool);
-	}
-}
-
-static void kiblnd_fail_poolset(struct kib_poolset *ps, struct list_head *zombies)
-{
-	if (!ps->ps_net) /* initialized? */
-		return;
-
-	spin_lock(&ps->ps_lock);
-	while (!list_empty(&ps->ps_pool_list)) {
-		struct kib_pool *po = list_entry(ps->ps_pool_list.next,
-					    struct kib_pool, po_list);
-		po->po_failed = 1;
-		list_del(&po->po_list);
-		if (!po->po_allocated)
-			list_add(&po->po_list, zombies);
-		else
-			list_add(&po->po_list, &ps->ps_failed_pool_list);
-	}
-	spin_unlock(&ps->ps_lock);
-}
-
-static void kiblnd_fini_poolset(struct kib_poolset *ps)
-{
-	if (ps->ps_net) { /* initialized? */
-		kiblnd_destroy_pool_list(&ps->ps_failed_pool_list);
-		kiblnd_destroy_pool_list(&ps->ps_pool_list);
-	}
-}
-
-static int kiblnd_init_poolset(struct kib_poolset *ps, int cpt,
-			       struct kib_net *net, char *name, int size,
-			       kib_ps_pool_create_t po_create,
-			       kib_ps_pool_destroy_t po_destroy,
-			       kib_ps_node_init_t nd_init,
-			       kib_ps_node_fini_t nd_fini)
-{
-	struct kib_pool *pool;
-	int rc;
-
-	memset(ps, 0, sizeof(*ps));
-
-	ps->ps_cpt          = cpt;
-	ps->ps_net          = net;
-	ps->ps_pool_create  = po_create;
-	ps->ps_pool_destroy = po_destroy;
-	ps->ps_node_init    = nd_init;
-	ps->ps_node_fini    = nd_fini;
-	ps->ps_pool_size    = size;
-	if (strlcpy(ps->ps_name, name, sizeof(ps->ps_name))
-	    >= sizeof(ps->ps_name))
-		return -E2BIG;
-	spin_lock_init(&ps->ps_lock);
-	INIT_LIST_HEAD(&ps->ps_pool_list);
-	INIT_LIST_HEAD(&ps->ps_failed_pool_list);
-
-	rc = ps->ps_pool_create(ps, size, &pool);
-	if (!rc)
-		list_add(&pool->po_list, &ps->ps_pool_list);
-	else
-		CERROR("Failed to create the first pool for %s\n", ps->ps_name);
-
-	return rc;
-}
-
-static int kiblnd_pool_is_idle(struct kib_pool *pool, unsigned long now)
-{
-	if (pool->po_allocated) /* still in use */
-		return 0;
-	if (pool->po_failed)
-		return 1;
-	return time_after_eq(now, pool->po_deadline);
-}
-
-void kiblnd_pool_free_node(struct kib_pool *pool, struct list_head *node)
-{
-	LIST_HEAD(zombies);
-	struct kib_poolset *ps = pool->po_owner;
-	struct kib_pool *tmp;
-	unsigned long now = jiffies;
-
-	spin_lock(&ps->ps_lock);
-
-	if (ps->ps_node_fini)
-		ps->ps_node_fini(pool, node);
-
-	LASSERT(pool->po_allocated > 0);
-	list_add(node, &pool->po_free_list);
-	pool->po_allocated--;
-
-	list_for_each_entry_safe(pool, tmp, &ps->ps_pool_list, po_list) {
-		/* the first pool is persistent */
-		if (ps->ps_pool_list.next == &pool->po_list)
-			continue;
-
-		if (kiblnd_pool_is_idle(pool, now))
-			list_move(&pool->po_list, &zombies);
-	}
-	spin_unlock(&ps->ps_lock);
-
-	if (!list_empty(&zombies))
-		kiblnd_destroy_pool_list(&zombies);
-}
-
-struct list_head *kiblnd_pool_alloc_node(struct kib_poolset *ps)
-{
-	struct list_head *node;
-	struct kib_pool *pool;
-	unsigned int interval = 1;
-	unsigned long time_before;
-	unsigned int trips = 0;
-	int rc;
-
- again:
-	spin_lock(&ps->ps_lock);
-	list_for_each_entry(pool, &ps->ps_pool_list, po_list) {
-		if (list_empty(&pool->po_free_list))
-			continue;
-
-		pool->po_allocated++;
-		pool->po_deadline = jiffies + IBLND_POOL_DEADLINE * HZ;
-		node = pool->po_free_list.next;
-		list_del(node);
-
-		if (ps->ps_node_init) {
-			/* still hold the lock */
-			ps->ps_node_init(pool, node);
-		}
-		spin_unlock(&ps->ps_lock);
-		return node;
-	}
-
-	/* no available tx pool and ... */
-	if (ps->ps_increasing) {
-		/* another thread is allocating a new pool */
-		spin_unlock(&ps->ps_lock);
-		trips++;
-		CDEBUG(D_NET, "Another thread is allocating new %s pool, waiting %d HZs for her to complete. trips = %d\n",
-		       ps->ps_name, interval, trips);
-
-		set_current_state(TASK_INTERRUPTIBLE);
-		schedule_timeout(interval);
-		if (interval < HZ)
-			interval *= 2;
-
-		goto again;
-	}
-
-	if (time_before(jiffies, ps->ps_next_retry)) {
-		/* someone failed recently */
-		spin_unlock(&ps->ps_lock);
-		return NULL;
-	}
-
-	ps->ps_increasing = 1;
-	spin_unlock(&ps->ps_lock);
-
-	CDEBUG(D_NET, "%s pool exhausted, allocate new pool\n", ps->ps_name);
-	time_before = jiffies;
-	rc = ps->ps_pool_create(ps, ps->ps_pool_size, &pool);
-	CDEBUG(D_NET, "ps_pool_create took %lu HZ to complete",
-	       jiffies - time_before);
-
-	spin_lock(&ps->ps_lock);
-	ps->ps_increasing = 0;
-	if (!rc) {
-		list_add_tail(&pool->po_list, &ps->ps_pool_list);
-	} else {
-		ps->ps_next_retry = jiffies + IBLND_POOL_RETRY * HZ;
-		CERROR("Can't allocate new %s pool because out of memory\n",
-		       ps->ps_name);
-	}
-	spin_unlock(&ps->ps_lock);
-
-	goto again;
-}
-
-static void kiblnd_destroy_tx_pool(struct kib_pool *pool)
-{
-	struct kib_tx_pool *tpo = container_of(pool, struct kib_tx_pool, tpo_pool);
-	int i;
-
-	LASSERT(!pool->po_allocated);
-
-	if (tpo->tpo_tx_pages) {
-		kiblnd_unmap_tx_pool(tpo);
-		kiblnd_free_pages(tpo->tpo_tx_pages);
-	}
-
-	if (!tpo->tpo_tx_descs)
-		goto out;
-
-	for (i = 0; i < pool->po_size; i++) {
-		struct kib_tx *tx = &tpo->tpo_tx_descs[i];
-
-		list_del(&tx->tx_list);
-		kfree(tx->tx_pages);
-		kfree(tx->tx_frags);
-		kfree(tx->tx_wrq);
-		kfree(tx->tx_sge);
-		kfree(tx->tx_rd);
-	}
-
-	kfree(tpo->tpo_tx_descs);
-out:
-	kiblnd_fini_pool(pool);
-	kfree(tpo);
-}
-
-static int kiblnd_tx_pool_size(int ncpts)
-{
-	int ntx = *kiblnd_tunables.kib_ntx / ncpts;
-
-	return max(IBLND_TX_POOL, ntx);
-}
-
-static int kiblnd_create_tx_pool(struct kib_poolset *ps, int size,
-				 struct kib_pool **pp_po)
-{
-	int i;
-	int npg;
-	struct kib_pool *pool;
-	struct kib_tx_pool *tpo;
-
-	tpo = kzalloc_cpt(sizeof(*tpo), GFP_NOFS, ps->ps_cpt);
-	if (!tpo) {
-		CERROR("Failed to allocate TX pool\n");
-		return -ENOMEM;
-	}
-
-	pool = &tpo->tpo_pool;
-	kiblnd_init_pool(ps, pool, size);
-	tpo->tpo_tx_descs = NULL;
-	tpo->tpo_tx_pages = NULL;
-
-	npg = DIV_ROUND_UP(size * IBLND_MSG_SIZE, PAGE_SIZE);
-	if (kiblnd_alloc_pages(&tpo->tpo_tx_pages, ps->ps_cpt, npg)) {
-		CERROR("Can't allocate tx pages: %d\n", npg);
-		kfree(tpo);
-		return -ENOMEM;
-	}
-
-	tpo->tpo_tx_descs = kzalloc_cpt(size * sizeof(struct kib_tx),
-					GFP_NOFS, ps->ps_cpt);
-	if (!tpo->tpo_tx_descs) {
-		CERROR("Can't allocate %d tx descriptors\n", size);
-		ps->ps_pool_destroy(pool);
-		return -ENOMEM;
-	}
-
-	memset(tpo->tpo_tx_descs, 0, size * sizeof(struct kib_tx));
-
-	for (i = 0; i < size; i++) {
-		struct kib_tx *tx = &tpo->tpo_tx_descs[i];
-
-		tx->tx_pool = tpo;
-		if (ps->ps_net->ibn_fmr_ps) {
-			tx->tx_pages = kzalloc_cpt(LNET_MAX_IOV * sizeof(*tx->tx_pages),
-						   GFP_NOFS, ps->ps_cpt);
-			if (!tx->tx_pages)
-				break;
-		}
-
-		tx->tx_frags = kzalloc_cpt((1 + IBLND_MAX_RDMA_FRAGS) *
-					   sizeof(*tx->tx_frags),
-					   GFP_NOFS, ps->ps_cpt);
-		if (!tx->tx_frags)
-			break;
-
-		sg_init_table(tx->tx_frags, IBLND_MAX_RDMA_FRAGS + 1);
-
-		tx->tx_wrq = kzalloc_cpt((1 + IBLND_MAX_RDMA_FRAGS) *
-					 sizeof(*tx->tx_wrq),
-					 GFP_NOFS, ps->ps_cpt);
-		if (!tx->tx_wrq)
-			break;
-
-		tx->tx_sge = kzalloc_cpt((1 + IBLND_MAX_RDMA_FRAGS) *
-					 sizeof(*tx->tx_sge),
-					 GFP_NOFS, ps->ps_cpt);
-		if (!tx->tx_sge)
-			break;
-
-		tx->tx_rd = kzalloc_cpt(offsetof(struct kib_rdma_desc,
-						 rd_frags[IBLND_MAX_RDMA_FRAGS]),
-					GFP_NOFS, ps->ps_cpt);
-		if (!tx->tx_rd)
-			break;
-	}
-
-	if (i == size) {
-		kiblnd_map_tx_pool(tpo);
-		*pp_po = pool;
-		return 0;
-	}
-
-	ps->ps_pool_destroy(pool);
-	return -ENOMEM;
-}
-
-static void kiblnd_tx_init(struct kib_pool *pool, struct list_head *node)
-{
-	struct kib_tx_poolset *tps = container_of(pool->po_owner,
-						  struct kib_tx_poolset,
-						  tps_poolset);
-	struct kib_tx *tx = list_entry(node, struct kib_tx, tx_list);
-
-	tx->tx_cookie = tps->tps_next_tx_cookie++;
-}
-
-static void kiblnd_net_fini_pools(struct kib_net *net)
-{
-	int i;
-
-	cfs_cpt_for_each(i, lnet_cpt_table()) {
-		struct kib_tx_poolset *tps;
-		struct kib_fmr_poolset *fps;
-
-		if (net->ibn_tx_ps) {
-			tps = net->ibn_tx_ps[i];
-			kiblnd_fini_poolset(&tps->tps_poolset);
-		}
-
-		if (net->ibn_fmr_ps) {
-			fps = net->ibn_fmr_ps[i];
-			kiblnd_fini_fmr_poolset(fps);
-		}
-	}
-
-	if (net->ibn_tx_ps) {
-		cfs_percpt_free(net->ibn_tx_ps);
-		net->ibn_tx_ps = NULL;
-	}
-
-	if (net->ibn_fmr_ps) {
-		cfs_percpt_free(net->ibn_fmr_ps);
-		net->ibn_fmr_ps = NULL;
-	}
-}
-
-static int kiblnd_net_init_pools(struct kib_net *net, struct lnet_ni *ni,
-				 __u32 *cpts, int ncpts)
-{
-	struct lnet_ioctl_config_o2iblnd_tunables *tunables;
-	int cpt;
-	int rc;
-	int i;
-
-	tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib;
-
-	if (tunables->lnd_fmr_pool_size < *kiblnd_tunables.kib_ntx / 4) {
-		CERROR("Can't set fmr pool size (%d) < ntx / 4(%d)\n",
-		       tunables->lnd_fmr_pool_size,
-		       *kiblnd_tunables.kib_ntx / 4);
-		rc = -EINVAL;
-		goto failed;
-	}
-
-	/*
-	 * TX pool must be created later than FMR, see LU-2268
-	 * for details
-	 */
-	LASSERT(!net->ibn_tx_ps);
-
-	/*
-	 * premapping can fail if ibd_nmr > 1, so we always create
-	 * FMR pool and map-on-demand if premapping failed
-	 *
-	 * cfs_precpt_alloc is creating an array of struct kib_fmr_poolset
-	 * The number of struct kib_fmr_poolsets create is equal to the
-	 * number of CPTs that exist, i.e net->ibn_fmr_ps[cpt].
-	 */
-	net->ibn_fmr_ps = cfs_percpt_alloc(lnet_cpt_table(),
-					   sizeof(struct kib_fmr_poolset));
-	if (!net->ibn_fmr_ps) {
-		CERROR("Failed to allocate FMR pool array\n");
-		rc = -ENOMEM;
-		goto failed;
-	}
-
-	for (i = 0; i < ncpts; i++) {
-		cpt = !cpts ? i : cpts[i];
-		rc = kiblnd_init_fmr_poolset(net->ibn_fmr_ps[cpt], cpt, ncpts,
-					     net, tunables);
-		if (rc) {
-			CERROR("Can't initialize FMR pool for CPT %d: %d\n",
-			       cpt, rc);
-			goto failed;
-		}
-	}
-
-	if (i > 0)
-		LASSERT(i == ncpts);
-
-	/*
-	 * cfs_precpt_alloc is creating an array of struct kib_tx_poolset
-	 * The number of struct kib_tx_poolsets create is equal to the
-	 * number of CPTs that exist, i.e net->ibn_tx_ps[cpt].
-	 */
-	net->ibn_tx_ps = cfs_percpt_alloc(lnet_cpt_table(),
-					  sizeof(struct kib_tx_poolset));
-	if (!net->ibn_tx_ps) {
-		CERROR("Failed to allocate tx pool array\n");
-		rc = -ENOMEM;
-		goto failed;
-	}
-
-	for (i = 0; i < ncpts; i++) {
-		cpt = !cpts ? i : cpts[i];
-		rc = kiblnd_init_poolset(&net->ibn_tx_ps[cpt]->tps_poolset,
-					 cpt, net, "TX",
-					 kiblnd_tx_pool_size(ncpts),
-					 kiblnd_create_tx_pool,
-					 kiblnd_destroy_tx_pool,
-					 kiblnd_tx_init, NULL);
-		if (rc) {
-			CERROR("Can't initialize TX pool for CPT %d: %d\n",
-			       cpt, rc);
-			goto failed;
-		}
-	}
-
-	return 0;
- failed:
-	kiblnd_net_fini_pools(net);
-	LASSERT(rc);
-	return rc;
-}
-
-static int kiblnd_hdev_get_attr(struct kib_hca_dev *hdev)
-{
-	/*
-	 * It's safe to assume a HCA can handle a page size
-	 * matching that of the native system
-	 */
-	hdev->ibh_page_shift = PAGE_SHIFT;
-	hdev->ibh_page_size  = 1 << PAGE_SHIFT;
-	hdev->ibh_page_mask  = ~((__u64)hdev->ibh_page_size - 1);
-
-	hdev->ibh_mr_size = hdev->ibh_ibdev->attrs.max_mr_size;
-	if (hdev->ibh_mr_size == ~0ULL) {
-		hdev->ibh_mr_shift = 64;
-		return 0;
-	}
-
-	CERROR("Invalid mr size: %#llx\n", hdev->ibh_mr_size);
-	return -EINVAL;
-}
-
-void kiblnd_hdev_destroy(struct kib_hca_dev *hdev)
-{
-	if (hdev->ibh_pd)
-		ib_dealloc_pd(hdev->ibh_pd);
-
-	if (hdev->ibh_cmid)
-		rdma_destroy_id(hdev->ibh_cmid);
-
-	kfree(hdev);
-}
-
-/* DUMMY */
-static int kiblnd_dummy_callback(struct rdma_cm_id *cmid,
-				 struct rdma_cm_event *event)
-{
-	return 0;
-}
-
-static int kiblnd_dev_need_failover(struct kib_dev *dev)
-{
-	struct rdma_cm_id *cmid;
-	struct sockaddr_in srcaddr;
-	struct sockaddr_in dstaddr;
-	int rc;
-
-	if (!dev->ibd_hdev || /* initializing */
-	    !dev->ibd_hdev->ibh_cmid || /* listener is dead */
-	    *kiblnd_tunables.kib_dev_failover > 1) /* debugging */
-		return 1;
-
-	/*
-	 * XXX: it's UGLY, but I don't have better way to find
-	 * ib-bonding HCA failover because:
-	 *
-	 * a. no reliable CM event for HCA failover...
-	 * b. no OFED API to get ib_device for current net_device...
-	 *
-	 * We have only two choices at this point:
-	 *
-	 * a. rdma_bind_addr(), it will conflict with listener cmid
-	 * b. rdma_resolve_addr() to zero addr
-	 */
-	cmid = kiblnd_rdma_create_id(kiblnd_dummy_callback, dev, RDMA_PS_TCP,
-				     IB_QPT_RC);
-	if (IS_ERR(cmid)) {
-		rc = PTR_ERR(cmid);
-		CERROR("Failed to create cmid for failover: %d\n", rc);
-		return rc;
-	}
-
-	memset(&srcaddr, 0, sizeof(srcaddr));
-	srcaddr.sin_family = AF_INET;
-	srcaddr.sin_addr.s_addr = htonl(dev->ibd_ifip);
-
-	memset(&dstaddr, 0, sizeof(dstaddr));
-	dstaddr.sin_family = AF_INET;
-	rc = rdma_resolve_addr(cmid, (struct sockaddr *)&srcaddr,
-			       (struct sockaddr *)&dstaddr, 1);
-	if (rc || !cmid->device) {
-		CERROR("Failed to bind %s:%pI4h to device(%p): %d\n",
-		       dev->ibd_ifname, &dev->ibd_ifip,
-		       cmid->device, rc);
-		rdma_destroy_id(cmid);
-		return rc;
-	}
-
-	rc = dev->ibd_hdev->ibh_ibdev != cmid->device; /* true for failover */
-	rdma_destroy_id(cmid);
-
-	return rc;
-}
-
-int kiblnd_dev_failover(struct kib_dev *dev)
-{
-	LIST_HEAD(zombie_tpo);
-	LIST_HEAD(zombie_ppo);
-	LIST_HEAD(zombie_fpo);
-	struct rdma_cm_id *cmid  = NULL;
-	struct kib_hca_dev *hdev  = NULL;
-	struct ib_pd *pd;
-	struct kib_net *net;
-	struct sockaddr_in addr;
-	unsigned long flags;
-	int rc = 0;
-	int i;
-
-	LASSERT(*kiblnd_tunables.kib_dev_failover > 1 ||
-		dev->ibd_can_failover || !dev->ibd_hdev);
-
-	rc = kiblnd_dev_need_failover(dev);
-	if (rc <= 0)
-		goto out;
-
-	if (dev->ibd_hdev &&
-	    dev->ibd_hdev->ibh_cmid) {
-		/*
-		 * XXX it's not good to close old listener at here,
-		 * because we can fail to create new listener.
-		 * But we have to close it now, otherwise rdma_bind_addr
-		 * will return EADDRINUSE... How crap!
-		 */
-		write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
-
-		cmid = dev->ibd_hdev->ibh_cmid;
-		/*
-		 * make next schedule of kiblnd_dev_need_failover()
-		 * return 1 for me
-		 */
-		dev->ibd_hdev->ibh_cmid  = NULL;
-		write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
-
-		rdma_destroy_id(cmid);
-	}
-
-	cmid = kiblnd_rdma_create_id(kiblnd_cm_callback, dev, RDMA_PS_TCP,
-				     IB_QPT_RC);
-	if (IS_ERR(cmid)) {
-		rc = PTR_ERR(cmid);
-		CERROR("Failed to create cmid for failover: %d\n", rc);
-		goto out;
-	}
-
-	memset(&addr, 0, sizeof(addr));
-	addr.sin_family      = AF_INET;
-	addr.sin_addr.s_addr = htonl(dev->ibd_ifip);
-	addr.sin_port	= htons(*kiblnd_tunables.kib_service);
-
-	/* Bind to failover device or port */
-	rc = rdma_bind_addr(cmid, (struct sockaddr *)&addr);
-	if (rc || !cmid->device) {
-		CERROR("Failed to bind %s:%pI4h to device(%p): %d\n",
-		       dev->ibd_ifname, &dev->ibd_ifip,
-		       cmid->device, rc);
-		rdma_destroy_id(cmid);
-		goto out;
-	}
-
-	hdev = kzalloc(sizeof(*hdev), GFP_NOFS);
-	if (!hdev) {
-		CERROR("Failed to allocate kib_hca_dev\n");
-		rdma_destroy_id(cmid);
-		rc = -ENOMEM;
-		goto out;
-	}
-
-	atomic_set(&hdev->ibh_ref, 1);
-	hdev->ibh_dev   = dev;
-	hdev->ibh_cmid  = cmid;
-	hdev->ibh_ibdev = cmid->device;
-
-	pd = ib_alloc_pd(cmid->device, 0);
-	if (IS_ERR(pd)) {
-		rc = PTR_ERR(pd);
-		CERROR("Can't allocate PD: %d\n", rc);
-		goto out;
-	}
-
-	hdev->ibh_pd = pd;
-
-	rc = rdma_listen(cmid, 0);
-	if (rc) {
-		CERROR("Can't start new listener: %d\n", rc);
-		goto out;
-	}
-
-	rc = kiblnd_hdev_get_attr(hdev);
-	if (rc) {
-		CERROR("Can't get device attributes: %d\n", rc);
-		goto out;
-	}
-
-	write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
-
-	swap(dev->ibd_hdev, hdev); /* take over the refcount */
-
-	list_for_each_entry(net, &dev->ibd_nets, ibn_list) {
-		cfs_cpt_for_each(i, lnet_cpt_table()) {
-			kiblnd_fail_poolset(&net->ibn_tx_ps[i]->tps_poolset,
-					    &zombie_tpo);
-
-			if (net->ibn_fmr_ps)
-				kiblnd_fail_fmr_poolset(net->ibn_fmr_ps[i],
-							&zombie_fpo);
-		}
-	}
-
-	write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
- out:
-	if (!list_empty(&zombie_tpo))
-		kiblnd_destroy_pool_list(&zombie_tpo);
-	if (!list_empty(&zombie_ppo))
-		kiblnd_destroy_pool_list(&zombie_ppo);
-	if (!list_empty(&zombie_fpo))
-		kiblnd_destroy_fmr_pool_list(&zombie_fpo);
-	if (hdev)
-		kiblnd_hdev_decref(hdev);
-
-	if (rc)
-		dev->ibd_failed_failover++;
-	else
-		dev->ibd_failed_failover = 0;
-
-	return rc;
-}
-
-void kiblnd_destroy_dev(struct kib_dev *dev)
-{
-	LASSERT(!dev->ibd_nnets);
-	LASSERT(list_empty(&dev->ibd_nets));
-
-	list_del(&dev->ibd_fail_list);
-	list_del(&dev->ibd_list);
-
-	if (dev->ibd_hdev)
-		kiblnd_hdev_decref(dev->ibd_hdev);
-
-	kfree(dev);
-}
-
-static struct kib_dev *kiblnd_create_dev(char *ifname)
-{
-	struct net_device *netdev;
-	struct kib_dev *dev;
-	__u32 netmask;
-	__u32 ip;
-	int up;
-	int rc;
-
-	rc = lnet_ipif_query(ifname, &up, &ip, &netmask);
-	if (rc) {
-		CERROR("Can't query IPoIB interface %s: %d\n",
-		       ifname, rc);
-		return NULL;
-	}
-
-	if (!up) {
-		CERROR("Can't query IPoIB interface %s: it's down\n", ifname);
-		return NULL;
-	}
-
-	dev = kzalloc(sizeof(*dev), GFP_NOFS);
-	if (!dev)
-		return NULL;
-
-	netdev = dev_get_by_name(&init_net, ifname);
-	if (!netdev) {
-		dev->ibd_can_failover = 0;
-	} else {
-		dev->ibd_can_failover = !!(netdev->flags & IFF_MASTER);
-		dev_put(netdev);
-	}
-
-	INIT_LIST_HEAD(&dev->ibd_nets);
-	INIT_LIST_HEAD(&dev->ibd_list); /* not yet in kib_devs */
-	INIT_LIST_HEAD(&dev->ibd_fail_list);
-	dev->ibd_ifip = ip;
-	strcpy(&dev->ibd_ifname[0], ifname);
-
-	/* initialize the device */
-	rc = kiblnd_dev_failover(dev);
-	if (rc) {
-		CERROR("Can't initialize device: %d\n", rc);
-		kfree(dev);
-		return NULL;
-	}
-
-	list_add_tail(&dev->ibd_list, &kiblnd_data.kib_devs);
-	return dev;
-}
-
-static void kiblnd_base_shutdown(void)
-{
-	struct kib_sched_info *sched;
-	int i;
-
-	LASSERT(list_empty(&kiblnd_data.kib_devs));
-
-	switch (kiblnd_data.kib_init) {
-	default:
-		LBUG();
-
-	case IBLND_INIT_ALL:
-	case IBLND_INIT_DATA:
-		LASSERT(kiblnd_data.kib_peers);
-		for (i = 0; i < kiblnd_data.kib_peer_hash_size; i++)
-			LASSERT(list_empty(&kiblnd_data.kib_peers[i]));
-		LASSERT(list_empty(&kiblnd_data.kib_connd_zombies));
-		LASSERT(list_empty(&kiblnd_data.kib_connd_conns));
-		LASSERT(list_empty(&kiblnd_data.kib_reconn_list));
-		LASSERT(list_empty(&kiblnd_data.kib_reconn_wait));
-
-		/* flag threads to terminate; wake and wait for them to die */
-		kiblnd_data.kib_shutdown = 1;
-
-		/*
-		 * NB: we really want to stop scheduler threads net by net
-		 * instead of the whole module, this should be improved
-		 * with dynamic configuration LNet
-		 */
-		cfs_percpt_for_each(sched, i, kiblnd_data.kib_scheds)
-			wake_up_all(&sched->ibs_waitq);
-
-		wake_up_all(&kiblnd_data.kib_connd_waitq);
-		wake_up_all(&kiblnd_data.kib_failover_waitq);
-
-		i = 2;
-		while (atomic_read(&kiblnd_data.kib_nthreads)) {
-			i++;
-			/* power of 2 ? */
-			CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET,
-			       "Waiting for %d threads to terminate\n",
-			       atomic_read(&kiblnd_data.kib_nthreads));
-			set_current_state(TASK_UNINTERRUPTIBLE);
-			schedule_timeout(HZ);
-		}
-
-		/* fall through */
-
-	case IBLND_INIT_NOTHING:
-		break;
-	}
-
-	kvfree(kiblnd_data.kib_peers);
-
-	if (kiblnd_data.kib_scheds)
-		cfs_percpt_free(kiblnd_data.kib_scheds);
-
-	kiblnd_data.kib_init = IBLND_INIT_NOTHING;
-	module_put(THIS_MODULE);
-}
-
-static void kiblnd_shutdown(struct lnet_ni *ni)
-{
-	struct kib_net *net = ni->ni_data;
-	rwlock_t *g_lock = &kiblnd_data.kib_global_lock;
-	int i;
-	unsigned long flags;
-
-	LASSERT(kiblnd_data.kib_init == IBLND_INIT_ALL);
-
-	if (!net)
-		goto out;
-
-	write_lock_irqsave(g_lock, flags);
-	net->ibn_shutdown = 1;
-	write_unlock_irqrestore(g_lock, flags);
-
-	switch (net->ibn_init) {
-	default:
-		LBUG();
-
-	case IBLND_INIT_ALL:
-		/* nuke all existing peers within this net */
-		kiblnd_del_peer(ni, LNET_NID_ANY);
-
-		/* Wait for all peer state to clean up */
-		i = 2;
-		while (atomic_read(&net->ibn_npeers)) {
-			i++;
-			CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* 2**n? */
-			       "%s: waiting for %d peers to disconnect\n",
-			       libcfs_nid2str(ni->ni_nid),
-			       atomic_read(&net->ibn_npeers));
-			set_current_state(TASK_UNINTERRUPTIBLE);
-			schedule_timeout(HZ);
-		}
-
-		kiblnd_net_fini_pools(net);
-
-		write_lock_irqsave(g_lock, flags);
-		LASSERT(net->ibn_dev->ibd_nnets > 0);
-		net->ibn_dev->ibd_nnets--;
-		list_del(&net->ibn_list);
-		write_unlock_irqrestore(g_lock, flags);
-
-		/* fall through */
-
-	case IBLND_INIT_NOTHING:
-		LASSERT(!atomic_read(&net->ibn_nconns));
-
-		if (net->ibn_dev && !net->ibn_dev->ibd_nnets)
-			kiblnd_destroy_dev(net->ibn_dev);
-
-		break;
-	}
-
-	net->ibn_init = IBLND_INIT_NOTHING;
-	ni->ni_data = NULL;
-
-	kfree(net);
-
-out:
-	if (list_empty(&kiblnd_data.kib_devs))
-		kiblnd_base_shutdown();
-}
-
-static int kiblnd_base_startup(void)
-{
-	struct kib_sched_info *sched;
-	int rc;
-	int i;
-
-	LASSERT(kiblnd_data.kib_init == IBLND_INIT_NOTHING);
-
-	try_module_get(THIS_MODULE);
-	/* zero pointers, flags etc */
-	memset(&kiblnd_data, 0, sizeof(kiblnd_data));
-
-	rwlock_init(&kiblnd_data.kib_global_lock);
-
-	INIT_LIST_HEAD(&kiblnd_data.kib_devs);
-	INIT_LIST_HEAD(&kiblnd_data.kib_failed_devs);
-
-	kiblnd_data.kib_peer_hash_size = IBLND_PEER_HASH_SIZE;
-	kiblnd_data.kib_peers = kvmalloc_array(kiblnd_data.kib_peer_hash_size,
-					       sizeof(struct list_head),
-					       GFP_KERNEL);
-	if (!kiblnd_data.kib_peers)
-		goto failed;
-	for (i = 0; i < kiblnd_data.kib_peer_hash_size; i++)
-		INIT_LIST_HEAD(&kiblnd_data.kib_peers[i]);
-
-	spin_lock_init(&kiblnd_data.kib_connd_lock);
-	INIT_LIST_HEAD(&kiblnd_data.kib_connd_conns);
-	INIT_LIST_HEAD(&kiblnd_data.kib_connd_zombies);
-	INIT_LIST_HEAD(&kiblnd_data.kib_reconn_list);
-	INIT_LIST_HEAD(&kiblnd_data.kib_reconn_wait);
-
-	init_waitqueue_head(&kiblnd_data.kib_connd_waitq);
-	init_waitqueue_head(&kiblnd_data.kib_failover_waitq);
-
-	kiblnd_data.kib_scheds = cfs_percpt_alloc(lnet_cpt_table(),
-						  sizeof(*sched));
-	if (!kiblnd_data.kib_scheds)
-		goto failed;
-
-	cfs_percpt_for_each(sched, i, kiblnd_data.kib_scheds) {
-		int nthrs;
-
-		spin_lock_init(&sched->ibs_lock);
-		INIT_LIST_HEAD(&sched->ibs_conns);
-		init_waitqueue_head(&sched->ibs_waitq);
-
-		nthrs = cfs_cpt_weight(lnet_cpt_table(), i);
-		if (*kiblnd_tunables.kib_nscheds > 0) {
-			nthrs = min(nthrs, *kiblnd_tunables.kib_nscheds);
-		} else {
-			/*
-			 * max to half of CPUs, another half is reserved for
-			 * upper layer modules
-			 */
-			nthrs = min(max(IBLND_N_SCHED, nthrs >> 1), nthrs);
-		}
-
-		sched->ibs_nthreads_max = nthrs;
-		sched->ibs_cpt = i;
-	}
-
-	kiblnd_data.kib_error_qpa.qp_state = IB_QPS_ERR;
-
-	/* lists/ptrs/locks initialised */
-	kiblnd_data.kib_init = IBLND_INIT_DATA;
-	/*****************************************************/
-
-	rc = kiblnd_thread_start(kiblnd_connd, NULL, "kiblnd_connd");
-	if (rc) {
-		CERROR("Can't spawn o2iblnd connd: %d\n", rc);
-		goto failed;
-	}
-
-	if (*kiblnd_tunables.kib_dev_failover)
-		rc = kiblnd_thread_start(kiblnd_failover_thread, NULL,
-					 "kiblnd_failover");
-
-	if (rc) {
-		CERROR("Can't spawn o2iblnd failover thread: %d\n", rc);
-		goto failed;
-	}
-
-	/* flag everything initialised */
-	kiblnd_data.kib_init = IBLND_INIT_ALL;
-	/*****************************************************/
-
-	return 0;
-
- failed:
-	kiblnd_base_shutdown();
-	return -ENETDOWN;
-}
-
-static int kiblnd_start_schedulers(struct kib_sched_info *sched)
-{
-	int rc = 0;
-	int nthrs;
-	int i;
-
-	if (!sched->ibs_nthreads) {
-		if (*kiblnd_tunables.kib_nscheds > 0) {
-			nthrs = sched->ibs_nthreads_max;
-		} else {
-			nthrs = cfs_cpt_weight(lnet_cpt_table(),
-					       sched->ibs_cpt);
-			nthrs = min(max(IBLND_N_SCHED, nthrs >> 1), nthrs);
-			nthrs = min(IBLND_N_SCHED_HIGH, nthrs);
-		}
-	} else {
-		LASSERT(sched->ibs_nthreads <= sched->ibs_nthreads_max);
-		/* increase one thread if there is new interface */
-		nthrs = sched->ibs_nthreads < sched->ibs_nthreads_max;
-	}
-
-	for (i = 0; i < nthrs; i++) {
-		long id;
-		char name[20];
-
-		id = KIB_THREAD_ID(sched->ibs_cpt, sched->ibs_nthreads + i);
-		snprintf(name, sizeof(name), "kiblnd_sd_%02ld_%02ld",
-			 KIB_THREAD_CPT(id), KIB_THREAD_TID(id));
-		rc = kiblnd_thread_start(kiblnd_scheduler, (void *)id, name);
-		if (!rc)
-			continue;
-
-		CERROR("Can't spawn thread %d for scheduler[%d]: %d\n",
-		       sched->ibs_cpt, sched->ibs_nthreads + i, rc);
-		break;
-	}
-
-	sched->ibs_nthreads += i;
-	return rc;
-}
-
-static int kiblnd_dev_start_threads(struct kib_dev *dev, int newdev, __u32 *cpts,
-				    int ncpts)
-{
-	int cpt;
-	int rc;
-	int i;
-
-	for (i = 0; i < ncpts; i++) {
-		struct kib_sched_info *sched;
-
-		cpt = !cpts ? i : cpts[i];
-		sched = kiblnd_data.kib_scheds[cpt];
-
-		if (!newdev && sched->ibs_nthreads > 0)
-			continue;
-
-		rc = kiblnd_start_schedulers(kiblnd_data.kib_scheds[cpt]);
-		if (rc) {
-			CERROR("Failed to start scheduler threads for %s\n",
-			       dev->ibd_ifname);
-			return rc;
-		}
-	}
-	return 0;
-}
-
-static struct kib_dev *kiblnd_dev_search(char *ifname)
-{
-	struct kib_dev *alias = NULL;
-	struct kib_dev *dev;
-	char *colon;
-	char *colon2;
-
-	colon = strchr(ifname, ':');
-	list_for_each_entry(dev, &kiblnd_data.kib_devs, ibd_list) {
-		if (!strcmp(&dev->ibd_ifname[0], ifname))
-			return dev;
-
-		if (alias)
-			continue;
-
-		colon2 = strchr(dev->ibd_ifname, ':');
-		if (colon)
-			*colon = 0;
-		if (colon2)
-			*colon2 = 0;
-
-		if (!strcmp(&dev->ibd_ifname[0], ifname))
-			alias = dev;
-
-		if (colon)
-			*colon = ':';
-		if (colon2)
-			*colon2 = ':';
-	}
-	return alias;
-}
-
-static int kiblnd_startup(struct lnet_ni *ni)
-{
-	char *ifname;
-	struct kib_dev *ibdev = NULL;
-	struct kib_net *net;
-	struct timespec64 tv;
-	unsigned long flags;
-	int rc;
-	int newdev;
-
-	LASSERT(ni->ni_lnd == &the_o2iblnd);
-
-	if (kiblnd_data.kib_init == IBLND_INIT_NOTHING) {
-		rc = kiblnd_base_startup();
-		if (rc)
-			return rc;
-	}
-
-	net = kzalloc(sizeof(*net), GFP_NOFS);
-	ni->ni_data = net;
-	if (!net)
-		goto net_failed;
-
-	ktime_get_real_ts64(&tv);
-	net->ibn_incarnation = tv.tv_sec * USEC_PER_SEC +
-			       tv.tv_nsec / NSEC_PER_USEC;
-
-	rc = kiblnd_tunables_setup(ni);
-	if (rc)
-		goto net_failed;
-
-	if (ni->ni_interfaces[0]) {
-		/* Use the IPoIB interface specified in 'networks=' */
-
-		BUILD_BUG_ON(LNET_MAX_INTERFACES <= 1);
-		if (ni->ni_interfaces[1]) {
-			CERROR("Multiple interfaces not supported\n");
-			goto failed;
-		}
-
-		ifname = ni->ni_interfaces[0];
-	} else {
-		ifname = *kiblnd_tunables.kib_default_ipif;
-	}
-
-	if (strlen(ifname) >= sizeof(ibdev->ibd_ifname)) {
-		CERROR("IPoIB interface name too long: %s\n", ifname);
-		goto failed;
-	}
-
-	ibdev = kiblnd_dev_search(ifname);
-
-	newdev = !ibdev;
-	/* hmm...create kib_dev even for alias */
-	if (!ibdev || strcmp(&ibdev->ibd_ifname[0], ifname))
-		ibdev = kiblnd_create_dev(ifname);
-
-	if (!ibdev)
-		goto failed;
-
-	net->ibn_dev = ibdev;
-	ni->ni_nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid), ibdev->ibd_ifip);
-
-	rc = kiblnd_dev_start_threads(ibdev, newdev,
-				      ni->ni_cpts, ni->ni_ncpts);
-	if (rc)
-		goto failed;
-
-	rc = kiblnd_net_init_pools(net, ni, ni->ni_cpts, ni->ni_ncpts);
-	if (rc) {
-		CERROR("Failed to initialize NI pools: %d\n", rc);
-		goto failed;
-	}
-
-	write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
-	ibdev->ibd_nnets++;
-	list_add_tail(&net->ibn_list, &ibdev->ibd_nets);
-	write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
-
-	net->ibn_init = IBLND_INIT_ALL;
-
-	return 0;
-
-failed:
-	if (!net->ibn_dev && ibdev)
-		kiblnd_destroy_dev(ibdev);
-
-net_failed:
-	kiblnd_shutdown(ni);
-
-	CDEBUG(D_NET, "%s failed\n", __func__);
-	return -ENETDOWN;
-}
-
-static struct lnet_lnd the_o2iblnd = {
-	.lnd_type	= O2IBLND,
-	.lnd_startup	= kiblnd_startup,
-	.lnd_shutdown	= kiblnd_shutdown,
-	.lnd_ctl	= kiblnd_ctl,
-	.lnd_query	= kiblnd_query,
-	.lnd_send	= kiblnd_send,
-	.lnd_recv	= kiblnd_recv,
-};
-
-static void __exit ko2iblnd_exit(void)
-{
-	lnet_unregister_lnd(&the_o2iblnd);
-}
-
-static int __init ko2iblnd_init(void)
-{
-	int rc;
-
-	BUILD_BUG_ON(sizeof(struct kib_msg) > IBLND_MSG_SIZE);
-	BUILD_BUG_ON(offsetof(struct kib_msg,
-			  ibm_u.get.ibgm_rd.rd_frags[IBLND_MAX_RDMA_FRAGS])
-			  > IBLND_MSG_SIZE);
-	BUILD_BUG_ON(offsetof(struct kib_msg,
-			  ibm_u.putack.ibpam_rd.rd_frags[IBLND_MAX_RDMA_FRAGS])
-			  > IBLND_MSG_SIZE);
-
-	kiblnd_tunables_init();
-
-	rc = libcfs_setup();
-	if (rc)
-		return rc;
-
-	lnet_register_lnd(&the_o2iblnd);
-
-	return 0;
-}
-
-MODULE_AUTHOR("OpenSFS, Inc. <http://www.lustre.org/>");
-MODULE_DESCRIPTION("OpenIB gen2 LNet Network Driver");
-MODULE_VERSION("2.7.0");
-MODULE_LICENSE("GPL");
-
-module_init(ko2iblnd_init);
-module_exit(ko2iblnd_exit);

+ 0 - 1048
drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h

@@ -1,1048 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/klnds/o2iblnd/o2iblnd.h
- *
- * Author: Eric Barton <eric@bartonsoftware.com>
- */
-
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/stat.h>
-#include <linux/errno.h>
-#include <linux/unistd.h>
-#include <linux/uio.h>
-#include <linux/uaccess.h>
-
-#include <linux/io.h>
-
-#include <linux/fs.h>
-#include <linux/file.h>
-#include <linux/list.h>
-#include <linux/kmod.h>
-#include <linux/sysctl.h>
-#include <linux/pci.h>
-
-#include <net/sock.h>
-#include <linux/in.h>
-
-#include <rdma/rdma_cm.h>
-#include <rdma/ib_cm.h>
-#include <rdma/ib_verbs.h>
-#include <rdma/ib_fmr_pool.h>
-
-#define DEBUG_SUBSYSTEM S_LND
-
-#include <linux/lnet/lib-lnet.h>
-
-#define IBLND_PEER_HASH_SIZE		101	/* # peer lists */
-/* # scheduler loops before reschedule */
-#define IBLND_RESCHED			100
-
-#define IBLND_N_SCHED			2
-#define IBLND_N_SCHED_HIGH		4
-
-struct kib_tunables {
-	int *kib_dev_failover;           /* HCA failover */
-	unsigned int *kib_service;       /* IB service number */
-	int *kib_min_reconnect_interval; /* first failed connection retry... */
-	int *kib_max_reconnect_interval; /* exponentially increasing to this */
-	int *kib_cksum;                  /* checksum struct kib_msg? */
-	int *kib_timeout;                /* comms timeout (seconds) */
-	int *kib_keepalive;              /* keepalive timeout (seconds) */
-	int *kib_ntx;                    /* # tx descs */
-	char **kib_default_ipif;         /* default IPoIB interface */
-	int *kib_retry_count;
-	int *kib_rnr_retry_count;
-	int *kib_ib_mtu;                 /* IB MTU */
-	int *kib_require_priv_port;      /* accept only privileged ports */
-	int *kib_use_priv_port; /* use privileged port for active connect */
-	int *kib_nscheds;                /* # threads on each CPT */
-};
-
-extern struct kib_tunables  kiblnd_tunables;
-
-#define IBLND_MSG_QUEUE_SIZE_V1   8 /* V1 only : # messages/RDMAs in-flight */
-#define IBLND_CREDIT_HIGHWATER_V1 7 /* V1 only : when eagerly to return credits */
-
-#define IBLND_CREDITS_DEFAULT     8 /* default # of peer credits */
-#define IBLND_CREDITS_MAX	  ((typeof(((struct kib_msg *)0)->ibm_credits)) - 1)  /* Max # of peer credits */
-
-/* when eagerly to return credits */
-#define IBLND_CREDITS_HIGHWATER(t, v)	((v) == IBLND_MSG_VERSION_1 ? \
-					IBLND_CREDIT_HIGHWATER_V1 : \
-					t->lnd_peercredits_hiw)
-
-#define kiblnd_rdma_create_id(cb, dev, ps, qpt) rdma_create_id(current->nsproxy->net_ns, \
-							       cb, dev, \
-							       ps, qpt)
-
-/* 2 OOB shall suffice for 1 keepalive and 1 returning credits */
-#define IBLND_OOB_CAPABLE(v)       ((v) != IBLND_MSG_VERSION_1)
-#define IBLND_OOB_MSGS(v)	   (IBLND_OOB_CAPABLE(v) ? 2 : 0)
-
-#define IBLND_FRAG_SHIFT	(PAGE_SHIFT - 12)	/* frag size on wire is in 4K units */
-#define IBLND_MSG_SIZE		(4 << 10)		/* max size of queued messages (inc hdr) */
-#define IBLND_MAX_RDMA_FRAGS	(LNET_MAX_PAYLOAD >> 12)/* max # of fragments supported in 4K size */
-
-/************************/
-/* derived constants... */
-/* Pools (shared by connections on each CPT) */
-/* These pools can grow at runtime, so don't need give a very large value */
-#define IBLND_TX_POOL			256
-#define IBLND_FMR_POOL			256
-#define IBLND_FMR_POOL_FLUSH		192
-
-#define IBLND_RX_MSGS(c)	\
-	((c->ibc_queue_depth) * 2 + IBLND_OOB_MSGS(c->ibc_version))
-#define IBLND_RX_MSG_BYTES(c)	(IBLND_RX_MSGS(c) * IBLND_MSG_SIZE)
-#define IBLND_RX_MSG_PAGES(c)	\
-	((IBLND_RX_MSG_BYTES(c) + PAGE_SIZE - 1) / PAGE_SIZE)
-
-/* WRs and CQEs (per connection) */
-#define IBLND_RECV_WRS(c)	IBLND_RX_MSGS(c)
-#define IBLND_SEND_WRS(c)	\
-	(((c->ibc_max_frags + 1) << IBLND_FRAG_SHIFT) * \
-	  kiblnd_concurrent_sends(c->ibc_version, c->ibc_peer->ibp_ni))
-#define IBLND_CQ_ENTRIES(c)	(IBLND_RECV_WRS(c) + IBLND_SEND_WRS(c))
-
-struct kib_hca_dev;
-
-/* o2iblnd can run over aliased interface */
-#ifdef IFALIASZ
-#define KIB_IFNAME_SIZE	      IFALIASZ
-#else
-#define KIB_IFNAME_SIZE	      256
-#endif
-
-struct kib_dev {
-	struct list_head   ibd_list;            /* chain on kib_devs */
-	struct list_head   ibd_fail_list;       /* chain on kib_failed_devs */
-	__u32              ibd_ifip;            /* IPoIB interface IP */
-
-	/* IPoIB interface name */
-	char               ibd_ifname[KIB_IFNAME_SIZE];
-	int                ibd_nnets;           /* # nets extant */
-
-	unsigned long      ibd_next_failover;
-	int                ibd_failed_failover; /* # failover failures */
-	unsigned int       ibd_failover;        /* failover in progress */
-	unsigned int ibd_can_failover; /* IPoIB interface is a bonding master */
-	struct list_head   ibd_nets;
-	struct kib_hca_dev *ibd_hdev;
-};
-
-struct kib_hca_dev {
-	struct rdma_cm_id  *ibh_cmid;           /* listener cmid */
-	struct ib_device   *ibh_ibdev;          /* IB device */
-	int                ibh_page_shift;      /* page shift of current HCA */
-	int                ibh_page_size;       /* page size of current HCA */
-	__u64              ibh_page_mask;       /* page mask of current HCA */
-	int                ibh_mr_shift;        /* bits shift of max MR size */
-	__u64              ibh_mr_size;         /* size of MR */
-	struct ib_pd       *ibh_pd;             /* PD */
-	struct kib_dev	   *ibh_dev;		/* owner */
-	atomic_t           ibh_ref;             /* refcount */
-};
-
-/** # of seconds to keep pool alive */
-#define IBLND_POOL_DEADLINE     300
-/** # of seconds to retry if allocation failed */
-#define IBLND_POOL_RETRY	1
-
-struct kib_pages {
-	int                ibp_npages;          /* # pages */
-	struct page        *ibp_pages[0];       /* page array */
-};
-
-struct kib_pool;
-struct kib_poolset;
-
-typedef int  (*kib_ps_pool_create_t)(struct kib_poolset *ps,
-				     int inc, struct kib_pool **pp_po);
-typedef void (*kib_ps_pool_destroy_t)(struct kib_pool *po);
-typedef void (*kib_ps_node_init_t)(struct kib_pool *po, struct list_head *node);
-typedef void (*kib_ps_node_fini_t)(struct kib_pool *po, struct list_head *node);
-
-struct kib_net;
-
-#define IBLND_POOL_NAME_LEN     32
-
-struct kib_poolset {
-	spinlock_t            ps_lock;            /* serialize */
-	struct kib_net        *ps_net;            /* network it belongs to */
-	char                  ps_name[IBLND_POOL_NAME_LEN]; /* pool set name */
-	struct list_head      ps_pool_list;       /* list of pools */
-	struct list_head      ps_failed_pool_list;/* failed pool list */
-	unsigned long         ps_next_retry;      /* time stamp for retry if */
-						  /* failed to allocate */
-	int                   ps_increasing;      /* is allocating new pool */
-	int                   ps_pool_size;       /* new pool size */
-	int                   ps_cpt;             /* CPT id */
-
-	kib_ps_pool_create_t  ps_pool_create;     /* create a new pool */
-	kib_ps_pool_destroy_t ps_pool_destroy;    /* destroy a pool */
-	kib_ps_node_init_t    ps_node_init; /* initialize new allocated node */
-	kib_ps_node_fini_t    ps_node_fini;       /* finalize node */
-};
-
-struct kib_pool {
-	struct list_head      po_list;       /* chain on pool list */
-	struct list_head      po_free_list;  /* pre-allocated node */
-	struct kib_poolset	*po_owner;	/* pool_set of this pool */
-	unsigned long         po_deadline;   /* deadline of this pool */
-	int                   po_allocated;  /* # of elements in use */
-	int                   po_failed;     /* pool is created on failed HCA */
-	int                   po_size;       /* # of pre-allocated elements */
-};
-
-struct kib_tx_poolset {
-	struct kib_poolset	tps_poolset;		/* pool-set */
-	__u64                 tps_next_tx_cookie; /* cookie of TX */
-};
-
-struct kib_tx_pool {
-	struct kib_pool		 tpo_pool;	/* pool */
-	struct kib_hca_dev	*tpo_hdev;	/* device for this pool */
-	struct kib_tx		*tpo_tx_descs;	/* all the tx descriptors */
-	struct kib_pages	*tpo_tx_pages;	/* premapped tx msg pages */
-};
-
-struct kib_fmr_poolset {
-	spinlock_t            fps_lock;            /* serialize */
-	struct kib_net        *fps_net;            /* IB network */
-	struct list_head      fps_pool_list;       /* FMR pool list */
-	struct list_head      fps_failed_pool_list;/* FMR pool list */
-	__u64                 fps_version;         /* validity stamp */
-	int                   fps_cpt;             /* CPT id */
-	int                   fps_pool_size;
-	int                   fps_flush_trigger;
-	int		      fps_cache;
-	int                   fps_increasing;      /* is allocating new pool */
-	unsigned long         fps_next_retry;      /* time stamp for retry if*/
-						   /* failed to allocate */
-};
-
-struct kib_fast_reg_descriptor { /* For fast registration */
-	struct list_head		 frd_list;
-	struct ib_send_wr		 frd_inv_wr;
-	struct ib_reg_wr		 frd_fastreg_wr;
-	struct ib_mr			*frd_mr;
-	bool				 frd_valid;
-};
-
-struct kib_fmr_pool {
-	struct list_head	 fpo_list;	/* chain on pool list */
-	struct kib_hca_dev	*fpo_hdev;	/* device for this pool */
-	struct kib_fmr_poolset	*fpo_owner;	/* owner of this pool */
-	union {
-		struct {
-			struct ib_fmr_pool *fpo_fmr_pool; /* IB FMR pool */
-		} fmr;
-		struct { /* For fast registration */
-			struct list_head    fpo_pool_list;
-			int		    fpo_pool_size;
-		} fast_reg;
-	};
-	unsigned long         fpo_deadline;        /* deadline of this pool */
-	int                   fpo_failed;          /* fmr pool is failed */
-	int                   fpo_map_count;       /* # of mapped FMR */
-	int		      fpo_is_fmr;
-};
-
-struct kib_fmr {
-	struct kib_fmr_pool		*fmr_pool;	/* pool of FMR */
-	struct ib_pool_fmr		*fmr_pfmr;	/* IB pool fmr */
-	struct kib_fast_reg_descriptor	*fmr_frd;
-	u32				 fmr_key;
-};
-
-struct kib_net {
-	struct list_head      ibn_list;       /* chain on struct kib_dev::ibd_nets */
-	__u64                 ibn_incarnation;/* my epoch */
-	int                   ibn_init;       /* initialisation state */
-	int                   ibn_shutdown;   /* shutting down? */
-
-	atomic_t              ibn_npeers;     /* # peers extant */
-	atomic_t              ibn_nconns;     /* # connections extant */
-
-	struct kib_tx_poolset	**ibn_tx_ps;	/* tx pool-set */
-	struct kib_fmr_poolset	**ibn_fmr_ps;	/* fmr pool-set */
-
-	struct kib_dev		*ibn_dev;	/* underlying IB device */
-};
-
-#define KIB_THREAD_SHIFT		16
-#define KIB_THREAD_ID(cpt, tid)		((cpt) << KIB_THREAD_SHIFT | (tid))
-#define KIB_THREAD_CPT(id)		((id) >> KIB_THREAD_SHIFT)
-#define KIB_THREAD_TID(id)		((id) & ((1UL << KIB_THREAD_SHIFT) - 1))
-
-struct kib_sched_info {
-	spinlock_t         ibs_lock;     /* serialise */
-	wait_queue_head_t  ibs_waitq;    /* schedulers sleep here */
-	struct list_head   ibs_conns;    /* conns to check for rx completions */
-	int                ibs_nthreads; /* number of scheduler threads */
-	int                ibs_nthreads_max; /* max allowed scheduler threads */
-	int                ibs_cpt;      /* CPT id */
-};
-
-struct kib_data {
-	int               kib_init;           /* initialisation state */
-	int               kib_shutdown;       /* shut down? */
-	struct list_head  kib_devs;           /* IB devices extant */
-	struct list_head  kib_failed_devs;    /* list head of failed devices */
-	wait_queue_head_t kib_failover_waitq; /* schedulers sleep here */
-	atomic_t kib_nthreads;                /* # live threads */
-	rwlock_t kib_global_lock;    /* stabilize net/dev/peer/conn ops */
-	struct list_head *kib_peers; /* hash table of all my known peers */
-	int  kib_peer_hash_size;     /* size of kib_peers */
-	void *kib_connd; /* the connd task (serialisation assertions) */
-	struct list_head kib_connd_conns;   /* connections to setup/teardown */
-	struct list_head kib_connd_zombies; /* connections with zero refcount */
-	/* connections to reconnect */
-	struct list_head	kib_reconn_list;
-	/* peers wait for reconnection */
-	struct list_head	kib_reconn_wait;
-	/**
-	 * The second that peers are pulled out from \a kib_reconn_wait
-	 * for reconnection.
-	 */
-	time64_t		kib_reconn_sec;
-
-	wait_queue_head_t kib_connd_waitq;  /* connection daemon sleeps here */
-	spinlock_t kib_connd_lock;          /* serialise */
-	struct ib_qp_attr kib_error_qpa;    /* QP->ERROR */
-	struct kib_sched_info **kib_scheds; /* percpt data for schedulers */
-};
-
-#define IBLND_INIT_NOTHING 0
-#define IBLND_INIT_DATA    1
-#define IBLND_INIT_ALL     2
-
-/************************************************************************
- * IB Wire message format.
- * These are sent in sender's byte order (i.e. receiver flips).
- */
-
-struct kib_connparams {
-	__u16        ibcp_queue_depth;
-	__u16        ibcp_max_frags;
-	__u32        ibcp_max_msg_size;
-} WIRE_ATTR;
-
-struct kib_immediate_msg {
-	struct lnet_hdr	ibim_hdr;        /* portals header */
-	char         ibim_payload[0]; /* piggy-backed payload */
-} WIRE_ATTR;
-
-struct kib_rdma_frag {
-	__u32        rf_nob;          /* # bytes this frag */
-	__u64        rf_addr;         /* CAVEAT EMPTOR: misaligned!! */
-} WIRE_ATTR;
-
-struct kib_rdma_desc {
-	__u32           rd_key;       /* local/remote key */
-	__u32           rd_nfrags;    /* # fragments */
-	struct kib_rdma_frag	rd_frags[0];	/* buffer frags */
-} WIRE_ATTR;
-
-struct kib_putreq_msg {
-	struct lnet_hdr	ibprm_hdr;    /* portals header */
-	__u64           ibprm_cookie; /* opaque completion cookie */
-} WIRE_ATTR;
-
-struct kib_putack_msg {
-	__u64           ibpam_src_cookie; /* reflected completion cookie */
-	__u64           ibpam_dst_cookie; /* opaque completion cookie */
-	struct kib_rdma_desc ibpam_rd;         /* sender's sink buffer */
-} WIRE_ATTR;
-
-struct kib_get_msg {
-	struct lnet_hdr ibgm_hdr;     /* portals header */
-	__u64           ibgm_cookie;  /* opaque completion cookie */
-	struct kib_rdma_desc ibgm_rd;      /* rdma descriptor */
-} WIRE_ATTR;
-
-struct kib_completion_msg {
-	__u64           ibcm_cookie;  /* opaque completion cookie */
-	__s32           ibcm_status;  /* < 0 failure: >= 0 length */
-} WIRE_ATTR;
-
-struct kib_msg {
-	/* First 2 fields fixed FOR ALL TIME */
-	__u32           ibm_magic;    /* I'm an ibnal message */
-	__u16           ibm_version;  /* this is my version number */
-
-	__u8            ibm_type;     /* msg type */
-	__u8            ibm_credits;  /* returned credits */
-	__u32           ibm_nob;      /* # bytes in whole message */
-	__u32           ibm_cksum;    /* checksum (0 == no checksum) */
-	__u64           ibm_srcnid;   /* sender's NID */
-	__u64           ibm_srcstamp; /* sender's incarnation */
-	__u64           ibm_dstnid;   /* destination's NID */
-	__u64           ibm_dststamp; /* destination's incarnation */
-
-	union {
-		struct kib_connparams		connparams;
-		struct kib_immediate_msg	immediate;
-		struct kib_putreq_msg		putreq;
-		struct kib_putack_msg		putack;
-		struct kib_get_msg		get;
-		struct kib_completion_msg	completion;
-	} WIRE_ATTR ibm_u;
-} WIRE_ATTR;
-
-#define IBLND_MSG_MAGIC     LNET_PROTO_IB_MAGIC /* unique magic */
-
-#define IBLND_MSG_VERSION_1 0x11
-#define IBLND_MSG_VERSION_2 0x12
-#define IBLND_MSG_VERSION   IBLND_MSG_VERSION_2
-
-#define IBLND_MSG_CONNREQ   0xc0	/* connection request */
-#define IBLND_MSG_CONNACK   0xc1	/* connection acknowledge */
-#define IBLND_MSG_NOOP      0xd0	/* nothing (just credits) */
-#define IBLND_MSG_IMMEDIATE 0xd1	/* immediate */
-#define IBLND_MSG_PUT_REQ   0xd2	/* putreq (src->sink) */
-#define IBLND_MSG_PUT_NAK   0xd3	/* completion (sink->src) */
-#define IBLND_MSG_PUT_ACK   0xd4	/* putack (sink->src) */
-#define IBLND_MSG_PUT_DONE  0xd5	/* completion (src->sink) */
-#define IBLND_MSG_GET_REQ   0xd6	/* getreq (sink->src) */
-#define IBLND_MSG_GET_DONE  0xd7	/* completion (src->sink: all OK) */
-
-struct kib_rej {
-	__u32            ibr_magic;       /* sender's magic */
-	__u16            ibr_version;     /* sender's version */
-	__u8             ibr_why;         /* reject reason */
-	__u8             ibr_padding;     /* padding */
-	__u64            ibr_incarnation; /* incarnation of peer */
-	struct kib_connparams ibr_cp;          /* connection parameters */
-} WIRE_ATTR;
-
-/* connection rejection reasons */
-#define IBLND_REJECT_CONN_RACE      1 /* You lost connection race */
-#define IBLND_REJECT_NO_RESOURCES   2 /* Out of memory/conns etc */
-#define IBLND_REJECT_FATAL          3 /* Anything else */
-#define IBLND_REJECT_CONN_UNCOMPAT  4 /* incompatible version peer */
-#define IBLND_REJECT_CONN_STALE     5 /* stale peer */
-/* peer's rdma frags doesn't match mine */
-#define IBLND_REJECT_RDMA_FRAGS	    6
-/* peer's msg queue size doesn't match mine */
-#define IBLND_REJECT_MSG_QUEUE_SIZE 7
-
-/***********************************************************************/
-
-struct kib_rx {					/* receive message */
-	struct list_head       rx_list;       /* queue for attention */
-	struct kib_conn        *rx_conn;      /* owning conn */
-	int                    rx_nob; /* # bytes received (-1 while posted) */
-	enum ib_wc_status      rx_status;     /* completion status */
-	struct kib_msg		*rx_msg;	/* message buffer (host vaddr) */
-	__u64                  rx_msgaddr;    /* message buffer (I/O addr) */
-	DECLARE_PCI_UNMAP_ADDR(rx_msgunmap);  /* for dma_unmap_single() */
-	struct ib_recv_wr      rx_wrq;        /* receive work item... */
-	struct ib_sge          rx_sge;        /* ...and its memory */
-};
-
-#define IBLND_POSTRX_DONT_POST    0 /* don't post */
-#define IBLND_POSTRX_NO_CREDIT    1 /* post: no credits */
-#define IBLND_POSTRX_PEER_CREDIT  2 /* post: give peer back 1 credit */
-#define IBLND_POSTRX_RSRVD_CREDIT 3 /* post: give self back 1 reserved credit */
-
-struct kib_tx {					/* transmit message */
-	struct list_head      tx_list; /* queue on idle_txs ibc_tx_queue etc. */
-	struct kib_tx_pool	*tx_pool;	/* pool I'm from */
-	struct kib_conn       *tx_conn;       /* owning conn */
-	short                 tx_sending;     /* # tx callbacks outstanding */
-	short                 tx_queued;      /* queued for sending */
-	short                 tx_waiting;     /* waiting for peer */
-	int                   tx_status;      /* LNET completion status */
-	unsigned long         tx_deadline;    /* completion deadline */
-	__u64                 tx_cookie;      /* completion cookie */
-	struct lnet_msg		*tx_lntmsg[2];	/* lnet msgs to finalize on completion */
-	struct kib_msg	      *tx_msg;        /* message buffer (host vaddr) */
-	__u64                 tx_msgaddr;     /* message buffer (I/O addr) */
-	DECLARE_PCI_UNMAP_ADDR(tx_msgunmap);  /* for dma_unmap_single() */
-	int                   tx_nwrq;        /* # send work items */
-	struct ib_rdma_wr     *tx_wrq;        /* send work items... */
-	struct ib_sge         *tx_sge;        /* ...and their memory */
-	struct kib_rdma_desc  *tx_rd;         /* rdma descriptor */
-	int                   tx_nfrags;      /* # entries in... */
-	struct scatterlist    *tx_frags;      /* dma_map_sg descriptor */
-	__u64                 *tx_pages;      /* rdma phys page addrs */
-	struct kib_fmr        fmr;	      /* FMR */
-	int                   tx_dmadir;      /* dma direction */
-};
-
-struct kib_connvars {
-	struct kib_msg cv_msg; /* connection-in-progress variables */
-};
-
-struct kib_conn {
-	struct kib_sched_info *ibc_sched;      /* scheduler information */
-	struct kib_peer       *ibc_peer;       /* owning peer */
-	struct kib_hca_dev         *ibc_hdev;       /* HCA bound on */
-	struct list_head ibc_list;             /* stash on peer's conn list */
-	struct list_head      ibc_sched_list;  /* schedule for attention */
-	__u16                 ibc_version;     /* version of connection */
-	/* reconnect later */
-	__u16			ibc_reconnect:1;
-	__u64                 ibc_incarnation; /* which instance of the peer */
-	atomic_t              ibc_refcount;    /* # users */
-	int                   ibc_state;       /* what's happening */
-	int                   ibc_nsends_posted; /* # uncompleted sends */
-	int                   ibc_noops_posted;  /* # uncompleted NOOPs */
-	int                   ibc_credits;     /* # credits I have */
-	int                   ibc_outstanding_credits; /* # credits to return */
-	int                   ibc_reserved_credits; /* # ACK/DONE msg credits */
-	int                   ibc_comms_error; /* set on comms error */
-	/* connections queue depth */
-	__u16		      ibc_queue_depth;
-	/* connections max frags */
-	__u16		      ibc_max_frags;
-	unsigned int          ibc_nrx:16;      /* receive buffers owned */
-	unsigned int          ibc_scheduled:1; /* scheduled for attention */
-	unsigned int          ibc_ready:1;     /* CQ callback fired */
-	unsigned long         ibc_last_send;   /* time of last send */
-	struct list_head      ibc_connd_list;  /* link chain for */
-					       /* kiblnd_check_conns only */
-	struct list_head ibc_early_rxs; /* rxs completed before ESTABLISHED */
-	struct list_head ibc_tx_noops;         /* IBLND_MSG_NOOPs for */
-					       /* IBLND_MSG_VERSION_1 */
-	struct list_head ibc_tx_queue;         /* sends that need a credit */
-	struct list_head ibc_tx_queue_nocred;  /* sends that don't need a */
-					       /* credit */
-	struct list_head ibc_tx_queue_rsrvd;   /* sends that need to */
-					       /* reserve an ACK/DONE msg */
-	struct list_head ibc_active_txs; /* active tx awaiting completion */
-	spinlock_t            ibc_lock;        /* serialise */
-	struct kib_rx              *ibc_rxs;        /* the rx descs */
-	struct kib_pages           *ibc_rx_pages;   /* premapped rx msg pages */
-
-	struct rdma_cm_id     *ibc_cmid;       /* CM id */
-	struct ib_cq          *ibc_cq;         /* completion queue */
-
-	struct kib_connvars	*ibc_connvars;	/* in-progress connection state */
-};
-
-#define IBLND_CONN_INIT           0	 /* being initialised */
-#define IBLND_CONN_ACTIVE_CONNECT 1	 /* active sending req */
-#define IBLND_CONN_PASSIVE_WAIT   2	 /* passive waiting for rtu */
-#define IBLND_CONN_ESTABLISHED    3	 /* connection established */
-#define IBLND_CONN_CLOSING        4	 /* being closed */
-#define IBLND_CONN_DISCONNECTED   5	 /* disconnected */
-
-struct kib_peer {
-	struct list_head ibp_list;        /* stash on global peer list */
-	lnet_nid_t       ibp_nid;         /* who's on the other end(s) */
-	struct lnet_ni	*ibp_ni;         /* LNet interface */
-	struct list_head ibp_conns;       /* all active connections */
-	struct kib_conn	*ibp_next_conn;  /* next connection to send on for
-					  * round robin */
-	struct list_head ibp_tx_queue;    /* msgs waiting for a conn */
-	__u64            ibp_incarnation; /* incarnation of peer */
-	/* when (in jiffies) I was last alive */
-	unsigned long		ibp_last_alive;
-	/* # users */
-	atomic_t		ibp_refcount;
-	/* version of peer */
-	__u16			ibp_version;
-	/* current passive connection attempts */
-	unsigned short		ibp_accepting;
-	/* current active connection attempts */
-	unsigned short		ibp_connecting;
-	/* reconnect this peer later */
-	unsigned char		ibp_reconnecting;
-	/* counter of how many times we triggered a conn race */
-	unsigned char		ibp_races;
-	/* # consecutive reconnection attempts to this peer */
-	unsigned int		ibp_reconnected;
-	/* errno on closing this peer */
-	int              ibp_error;
-	/* max map_on_demand */
-	__u16		 ibp_max_frags;
-	/* max_peer_credits */
-	__u16		 ibp_queue_depth;
-};
-
-extern struct kib_data kiblnd_data;
-
-void kiblnd_hdev_destroy(struct kib_hca_dev *hdev);
-
-int kiblnd_msg_queue_size(int version, struct lnet_ni *ni);
-
-/* max # of fragments configured by user */
-static inline int
-kiblnd_cfg_rdma_frags(struct lnet_ni *ni)
-{
-	struct lnet_ioctl_config_o2iblnd_tunables *tunables;
-	int mod;
-
-	tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib;
-	mod = tunables->lnd_map_on_demand;
-	return mod ? mod : IBLND_MAX_RDMA_FRAGS >> IBLND_FRAG_SHIFT;
-}
-
-static inline int
-kiblnd_rdma_frags(int version, struct lnet_ni *ni)
-{
-	return version == IBLND_MSG_VERSION_1 ?
-			  (IBLND_MAX_RDMA_FRAGS >> IBLND_FRAG_SHIFT) :
-			  kiblnd_cfg_rdma_frags(ni);
-}
-
-static inline int
-kiblnd_concurrent_sends(int version, struct lnet_ni *ni)
-{
-	struct lnet_ioctl_config_o2iblnd_tunables *tunables;
-	int concurrent_sends;
-
-	tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib;
-	concurrent_sends = tunables->lnd_concurrent_sends;
-
-	if (version == IBLND_MSG_VERSION_1) {
-		if (concurrent_sends > IBLND_MSG_QUEUE_SIZE_V1 * 2)
-			return IBLND_MSG_QUEUE_SIZE_V1 * 2;
-
-		if (concurrent_sends < IBLND_MSG_QUEUE_SIZE_V1 / 2)
-			return IBLND_MSG_QUEUE_SIZE_V1 / 2;
-	}
-
-	return concurrent_sends;
-}
-
-static inline void
-kiblnd_hdev_addref_locked(struct kib_hca_dev *hdev)
-{
-	LASSERT(atomic_read(&hdev->ibh_ref) > 0);
-	atomic_inc(&hdev->ibh_ref);
-}
-
-static inline void
-kiblnd_hdev_decref(struct kib_hca_dev *hdev)
-{
-	LASSERT(atomic_read(&hdev->ibh_ref) > 0);
-	if (atomic_dec_and_test(&hdev->ibh_ref))
-		kiblnd_hdev_destroy(hdev);
-}
-
-static inline int
-kiblnd_dev_can_failover(struct kib_dev *dev)
-{
-	if (!list_empty(&dev->ibd_fail_list)) /* already scheduled */
-		return 0;
-
-	if (!*kiblnd_tunables.kib_dev_failover) /* disabled */
-		return 0;
-
-	if (*kiblnd_tunables.kib_dev_failover > 1) /* force failover */
-		return 1;
-
-	return dev->ibd_can_failover;
-}
-
-#define kiblnd_conn_addref(conn)				\
-do {							    \
-	CDEBUG(D_NET, "conn[%p] (%d)++\n",		      \
-	       (conn), atomic_read(&(conn)->ibc_refcount)); \
-	atomic_inc(&(conn)->ibc_refcount);		  \
-} while (0)
-
-#define kiblnd_conn_decref(conn)					\
-do {									\
-	unsigned long flags;						\
-									\
-	CDEBUG(D_NET, "conn[%p] (%d)--\n",				\
-	       (conn), atomic_read(&(conn)->ibc_refcount));		\
-	LASSERT_ATOMIC_POS(&(conn)->ibc_refcount);			\
-	if (atomic_dec_and_test(&(conn)->ibc_refcount)) {		\
-		spin_lock_irqsave(&kiblnd_data.kib_connd_lock, flags);	\
-		list_add_tail(&(conn)->ibc_list,			\
-				  &kiblnd_data.kib_connd_zombies);	\
-		wake_up(&kiblnd_data.kib_connd_waitq);		\
-		spin_unlock_irqrestore(&kiblnd_data.kib_connd_lock, flags);\
-	}								\
-} while (0)
-
-#define kiblnd_peer_addref(peer)				\
-do {							    \
-	CDEBUG(D_NET, "peer[%p] -> %s (%d)++\n",		\
-	       (peer), libcfs_nid2str((peer)->ibp_nid),	 \
-	       atomic_read(&(peer)->ibp_refcount));	\
-	atomic_inc(&(peer)->ibp_refcount);		  \
-} while (0)
-
-#define kiblnd_peer_decref(peer)				\
-do {							    \
-	CDEBUG(D_NET, "peer[%p] -> %s (%d)--\n",		\
-	       (peer), libcfs_nid2str((peer)->ibp_nid),	 \
-	       atomic_read(&(peer)->ibp_refcount));	\
-	LASSERT_ATOMIC_POS(&(peer)->ibp_refcount);	      \
-	if (atomic_dec_and_test(&(peer)->ibp_refcount))     \
-		kiblnd_destroy_peer(peer);		      \
-} while (0)
-
-static inline bool
-kiblnd_peer_connecting(struct kib_peer *peer)
-{
-	return peer->ibp_connecting ||
-	       peer->ibp_reconnecting ||
-	       peer->ibp_accepting;
-}
-
-static inline bool
-kiblnd_peer_idle(struct kib_peer *peer)
-{
-	return !kiblnd_peer_connecting(peer) && list_empty(&peer->ibp_conns);
-}
-
-static inline struct list_head *
-kiblnd_nid2peerlist(lnet_nid_t nid)
-{
-	unsigned int hash =
-		((unsigned int)nid) % kiblnd_data.kib_peer_hash_size;
-
-	return &kiblnd_data.kib_peers[hash];
-}
-
-static inline int
-kiblnd_peer_active(struct kib_peer *peer)
-{
-	/* Am I in the peer hash table? */
-	return !list_empty(&peer->ibp_list);
-}
-
-static inline struct kib_conn *
-kiblnd_get_conn_locked(struct kib_peer *peer)
-{
-	struct list_head *next;
-
-	LASSERT(!list_empty(&peer->ibp_conns));
-
-	/* Advance to next connection, be sure to skip the head node */
-	if (!peer->ibp_next_conn ||
-	    peer->ibp_next_conn->ibc_list.next == &peer->ibp_conns)
-		next = peer->ibp_conns.next;
-	else
-		next = peer->ibp_next_conn->ibc_list.next;
-	peer->ibp_next_conn = list_entry(next, struct kib_conn, ibc_list);
-
-	return peer->ibp_next_conn;
-}
-
-static inline int
-kiblnd_send_keepalive(struct kib_conn *conn)
-{
-	return (*kiblnd_tunables.kib_keepalive > 0) &&
-		time_after(jiffies, conn->ibc_last_send +
-			   msecs_to_jiffies(*kiblnd_tunables.kib_keepalive *
-					    MSEC_PER_SEC));
-}
-
-static inline int
-kiblnd_need_noop(struct kib_conn *conn)
-{
-	struct lnet_ioctl_config_o2iblnd_tunables *tunables;
-	struct lnet_ni *ni = conn->ibc_peer->ibp_ni;
-
-	LASSERT(conn->ibc_state >= IBLND_CONN_ESTABLISHED);
-	tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib;
-
-	if (conn->ibc_outstanding_credits <
-	    IBLND_CREDITS_HIGHWATER(tunables, conn->ibc_version) &&
-	    !kiblnd_send_keepalive(conn))
-		return 0; /* No need to send NOOP */
-
-	if (IBLND_OOB_CAPABLE(conn->ibc_version)) {
-		if (!list_empty(&conn->ibc_tx_queue_nocred))
-			return 0; /* NOOP can be piggybacked */
-
-		/* No tx to piggyback NOOP onto or no credit to send a tx */
-		return (list_empty(&conn->ibc_tx_queue) ||
-			!conn->ibc_credits);
-	}
-
-	if (!list_empty(&conn->ibc_tx_noops) || /* NOOP already queued */
-	    !list_empty(&conn->ibc_tx_queue_nocred) || /* piggyback NOOP */
-	    !conn->ibc_credits)		    /* no credit */
-		return 0;
-
-	if (conn->ibc_credits == 1 &&      /* last credit reserved for */
-	    !conn->ibc_outstanding_credits) /* giving back credits */
-		return 0;
-
-	/* No tx to piggyback NOOP onto or no credit to send a tx */
-	return (list_empty(&conn->ibc_tx_queue) || conn->ibc_credits == 1);
-}
-
-static inline void
-kiblnd_abort_receives(struct kib_conn *conn)
-{
-	ib_modify_qp(conn->ibc_cmid->qp,
-		     &kiblnd_data.kib_error_qpa, IB_QP_STATE);
-}
-
-static inline const char *
-kiblnd_queue2str(struct kib_conn *conn, struct list_head *q)
-{
-	if (q == &conn->ibc_tx_queue)
-		return "tx_queue";
-
-	if (q == &conn->ibc_tx_queue_rsrvd)
-		return "tx_queue_rsrvd";
-
-	if (q == &conn->ibc_tx_queue_nocred)
-		return "tx_queue_nocred";
-
-	if (q == &conn->ibc_active_txs)
-		return "active_txs";
-
-	LBUG();
-	return NULL;
-}
-
-/* CAVEAT EMPTOR: We rely on descriptor alignment to allow us to use the */
-/* lowest bits of the work request id to stash the work item type. */
-
-#define IBLND_WID_INVAL	0
-#define IBLND_WID_TX	1
-#define IBLND_WID_RX	2
-#define IBLND_WID_RDMA	3
-#define IBLND_WID_MR	4
-#define IBLND_WID_MASK	7UL
-
-static inline __u64
-kiblnd_ptr2wreqid(void *ptr, int type)
-{
-	unsigned long lptr = (unsigned long)ptr;
-
-	LASSERT(!(lptr & IBLND_WID_MASK));
-	LASSERT(!(type & ~IBLND_WID_MASK));
-	return (__u64)(lptr | type);
-}
-
-static inline void *
-kiblnd_wreqid2ptr(__u64 wreqid)
-{
-	return (void *)(((unsigned long)wreqid) & ~IBLND_WID_MASK);
-}
-
-static inline int
-kiblnd_wreqid2type(__u64 wreqid)
-{
-	return wreqid & IBLND_WID_MASK;
-}
-
-static inline void
-kiblnd_set_conn_state(struct kib_conn *conn, int state)
-{
-	conn->ibc_state = state;
-	mb();
-}
-
-static inline void
-kiblnd_init_msg(struct kib_msg *msg, int type, int body_nob)
-{
-	msg->ibm_type = type;
-	msg->ibm_nob  = offsetof(struct kib_msg, ibm_u) + body_nob;
-}
-
-static inline int
-kiblnd_rd_size(struct kib_rdma_desc *rd)
-{
-	int   i;
-	int   size;
-
-	for (i = size = 0; i < rd->rd_nfrags; i++)
-		size += rd->rd_frags[i].rf_nob;
-
-	return size;
-}
-
-static inline __u64
-kiblnd_rd_frag_addr(struct kib_rdma_desc *rd, int index)
-{
-	return rd->rd_frags[index].rf_addr;
-}
-
-static inline __u32
-kiblnd_rd_frag_size(struct kib_rdma_desc *rd, int index)
-{
-	return rd->rd_frags[index].rf_nob;
-}
-
-static inline __u32
-kiblnd_rd_frag_key(struct kib_rdma_desc *rd, int index)
-{
-	return rd->rd_key;
-}
-
-static inline int
-kiblnd_rd_consume_frag(struct kib_rdma_desc *rd, int index, __u32 nob)
-{
-	if (nob < rd->rd_frags[index].rf_nob) {
-		rd->rd_frags[index].rf_addr += nob;
-		rd->rd_frags[index].rf_nob  -= nob;
-	} else {
-		index++;
-	}
-
-	return index;
-}
-
-static inline int
-kiblnd_rd_msg_size(struct kib_rdma_desc *rd, int msgtype, int n)
-{
-	LASSERT(msgtype == IBLND_MSG_GET_REQ ||
-		msgtype == IBLND_MSG_PUT_ACK);
-
-	return msgtype == IBLND_MSG_GET_REQ ?
-	       offsetof(struct kib_get_msg, ibgm_rd.rd_frags[n]) :
-	       offsetof(struct kib_putack_msg, ibpam_rd.rd_frags[n]);
-}
-
-static inline __u64
-kiblnd_dma_mapping_error(struct ib_device *dev, u64 dma_addr)
-{
-	return ib_dma_mapping_error(dev, dma_addr);
-}
-
-static inline __u64 kiblnd_dma_map_single(struct ib_device *dev,
-					  void *msg, size_t size,
-					  enum dma_data_direction direction)
-{
-	return ib_dma_map_single(dev, msg, size, direction);
-}
-
-static inline void kiblnd_dma_unmap_single(struct ib_device *dev,
-					   __u64 addr, size_t size,
-					  enum dma_data_direction direction)
-{
-	ib_dma_unmap_single(dev, addr, size, direction);
-}
-
-#define KIBLND_UNMAP_ADDR_SET(p, m, a)  do {} while (0)
-#define KIBLND_UNMAP_ADDR(p, m, a)      (a)
-
-static inline int kiblnd_dma_map_sg(struct ib_device *dev,
-				    struct scatterlist *sg, int nents,
-				    enum dma_data_direction direction)
-{
-	return ib_dma_map_sg(dev, sg, nents, direction);
-}
-
-static inline void kiblnd_dma_unmap_sg(struct ib_device *dev,
-				       struct scatterlist *sg, int nents,
-				       enum dma_data_direction direction)
-{
-	ib_dma_unmap_sg(dev, sg, nents, direction);
-}
-
-static inline __u64 kiblnd_sg_dma_address(struct ib_device *dev,
-					  struct scatterlist *sg)
-{
-	return ib_sg_dma_address(dev, sg);
-}
-
-static inline unsigned int kiblnd_sg_dma_len(struct ib_device *dev,
-					     struct scatterlist *sg)
-{
-	return ib_sg_dma_len(dev, sg);
-}
-
-/* XXX We use KIBLND_CONN_PARAM(e) as writable buffer, it's not strictly */
-/* right because OFED1.2 defines it as const, to use it we have to add */
-/* (void *) cast to overcome "const" */
-
-#define KIBLND_CONN_PARAM(e)     ((e)->param.conn.private_data)
-#define KIBLND_CONN_PARAM_LEN(e) ((e)->param.conn.private_data_len)
-
-void kiblnd_map_rx_descs(struct kib_conn *conn);
-void kiblnd_unmap_rx_descs(struct kib_conn *conn);
-void kiblnd_pool_free_node(struct kib_pool *pool, struct list_head *node);
-struct list_head *kiblnd_pool_alloc_node(struct kib_poolset *ps);
-
-int  kiblnd_fmr_pool_map(struct kib_fmr_poolset *fps, struct kib_tx *tx,
-			 struct kib_rdma_desc *rd, __u32 nob, __u64 iov,
-			 struct kib_fmr *fmr);
-void kiblnd_fmr_pool_unmap(struct kib_fmr *fmr, int status);
-
-int kiblnd_tunables_setup(struct lnet_ni *ni);
-void kiblnd_tunables_init(void);
-
-int  kiblnd_connd(void *arg);
-int  kiblnd_scheduler(void *arg);
-int  kiblnd_thread_start(int (*fn)(void *arg), void *arg, char *name);
-int  kiblnd_failover_thread(void *arg);
-
-int  kiblnd_alloc_pages(struct kib_pages **pp, int cpt, int npages);
-
-int  kiblnd_cm_callback(struct rdma_cm_id *cmid,
-			struct rdma_cm_event *event);
-int  kiblnd_translate_mtu(int value);
-
-int  kiblnd_dev_failover(struct kib_dev *dev);
-int kiblnd_create_peer(struct lnet_ni *ni, struct kib_peer **peerp,
-		       lnet_nid_t nid);
-void kiblnd_destroy_peer(struct kib_peer *peer);
-bool kiblnd_reconnect_peer(struct kib_peer *peer);
-void kiblnd_destroy_dev(struct kib_dev *dev);
-void kiblnd_unlink_peer_locked(struct kib_peer *peer);
-struct kib_peer *kiblnd_find_peer_locked(lnet_nid_t nid);
-int  kiblnd_close_stale_conns_locked(struct kib_peer *peer,
-				     int version, __u64 incarnation);
-int  kiblnd_close_peer_conns_locked(struct kib_peer *peer, int why);
-
-struct kib_conn *kiblnd_create_conn(struct kib_peer *peer,
-				    struct rdma_cm_id *cmid,
-				    int state, int version);
-void kiblnd_destroy_conn(struct kib_conn *conn);
-void kiblnd_close_conn(struct kib_conn *conn, int error);
-void kiblnd_close_conn_locked(struct kib_conn *conn, int error);
-
-void kiblnd_launch_tx(struct lnet_ni *ni, struct kib_tx *tx, lnet_nid_t nid);
-void kiblnd_txlist_done(struct lnet_ni *ni, struct list_head *txlist,
-			int status);
-
-void kiblnd_qp_event(struct ib_event *event, void *arg);
-void kiblnd_cq_event(struct ib_event *event, void *arg);
-void kiblnd_cq_completion(struct ib_cq *cq, void *arg);
-
-void kiblnd_pack_msg(struct lnet_ni *ni, struct kib_msg *msg, int version,
-		     int credits, lnet_nid_t dstnid, __u64 dststamp);
-int  kiblnd_unpack_msg(struct kib_msg *msg, int nob);
-int  kiblnd_post_rx(struct kib_rx *rx, int credit);
-
-int kiblnd_send(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg);
-int kiblnd_recv(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg,
-		int delayed, struct iov_iter *to, unsigned int rlen);

+ 0 - 3763
drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c

@@ -1,3763 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/klnds/o2iblnd/o2iblnd_cb.c
- *
- * Author: Eric Barton <eric@bartonsoftware.com>
- */
-
-#include <linux/highmem.h>
-#include "o2iblnd.h"
-
-#define MAX_CONN_RACES_BEFORE_ABORT 20
-
-static void kiblnd_peer_alive(struct kib_peer *peer);
-static void kiblnd_peer_connect_failed(struct kib_peer *peer, int active, int error);
-static void kiblnd_init_tx_msg(struct lnet_ni *ni, struct kib_tx *tx,
-			       int type, int body_nob);
-static int kiblnd_init_rdma(struct kib_conn *conn, struct kib_tx *tx, int type,
-			    int resid, struct kib_rdma_desc *dstrd,
-			    __u64 dstcookie);
-static void kiblnd_queue_tx_locked(struct kib_tx *tx, struct kib_conn *conn);
-static void kiblnd_queue_tx(struct kib_tx *tx, struct kib_conn *conn);
-static void kiblnd_unmap_tx(struct kib_tx *tx);
-static void kiblnd_check_sends_locked(struct kib_conn *conn);
-
-static void
-kiblnd_tx_done(struct lnet_ni *ni, struct kib_tx *tx)
-{
-	struct lnet_msg *lntmsg[2];
-	struct kib_net *net = ni->ni_data;
-	int rc;
-	int i;
-
-	LASSERT(net);
-	LASSERT(!in_interrupt());
-	LASSERT(!tx->tx_queued);	       /* mustn't be queued for sending */
-	LASSERT(!tx->tx_sending);	  /* mustn't be awaiting sent callback */
-	LASSERT(!tx->tx_waiting);	      /* mustn't be awaiting peer response */
-	LASSERT(tx->tx_pool);
-
-	kiblnd_unmap_tx(tx);
-
-	/* tx may have up to 2 lnet msgs to finalise */
-	lntmsg[0] = tx->tx_lntmsg[0]; tx->tx_lntmsg[0] = NULL;
-	lntmsg[1] = tx->tx_lntmsg[1]; tx->tx_lntmsg[1] = NULL;
-	rc = tx->tx_status;
-
-	if (tx->tx_conn) {
-		LASSERT(ni == tx->tx_conn->ibc_peer->ibp_ni);
-
-		kiblnd_conn_decref(tx->tx_conn);
-		tx->tx_conn = NULL;
-	}
-
-	tx->tx_nwrq = 0;
-	tx->tx_status = 0;
-
-	kiblnd_pool_free_node(&tx->tx_pool->tpo_pool, &tx->tx_list);
-
-	/* delay finalize until my descs have been freed */
-	for (i = 0; i < 2; i++) {
-		if (!lntmsg[i])
-			continue;
-
-		lnet_finalize(ni, lntmsg[i], rc);
-	}
-}
-
-void
-kiblnd_txlist_done(struct lnet_ni *ni, struct list_head *txlist, int status)
-{
-	struct kib_tx *tx;
-
-	while (!list_empty(txlist)) {
-		tx = list_entry(txlist->next, struct kib_tx, tx_list);
-
-		list_del(&tx->tx_list);
-		/* complete now */
-		tx->tx_waiting = 0;
-		tx->tx_status = status;
-		kiblnd_tx_done(ni, tx);
-	}
-}
-
-static struct kib_tx *
-kiblnd_get_idle_tx(struct lnet_ni *ni, lnet_nid_t target)
-{
-	struct kib_net *net = (struct kib_net *)ni->ni_data;
-	struct list_head *node;
-	struct kib_tx *tx;
-	struct kib_tx_poolset *tps;
-
-	tps = net->ibn_tx_ps[lnet_cpt_of_nid(target)];
-	node = kiblnd_pool_alloc_node(&tps->tps_poolset);
-	if (!node)
-		return NULL;
-	tx = list_entry(node, struct kib_tx, tx_list);
-
-	LASSERT(!tx->tx_nwrq);
-	LASSERT(!tx->tx_queued);
-	LASSERT(!tx->tx_sending);
-	LASSERT(!tx->tx_waiting);
-	LASSERT(!tx->tx_status);
-	LASSERT(!tx->tx_conn);
-	LASSERT(!tx->tx_lntmsg[0]);
-	LASSERT(!tx->tx_lntmsg[1]);
-	LASSERT(!tx->tx_nfrags);
-
-	return tx;
-}
-
-static void
-kiblnd_drop_rx(struct kib_rx *rx)
-{
-	struct kib_conn *conn = rx->rx_conn;
-	struct kib_sched_info *sched = conn->ibc_sched;
-	unsigned long flags;
-
-	spin_lock_irqsave(&sched->ibs_lock, flags);
-	LASSERT(conn->ibc_nrx > 0);
-	conn->ibc_nrx--;
-	spin_unlock_irqrestore(&sched->ibs_lock, flags);
-
-	kiblnd_conn_decref(conn);
-}
-
-int
-kiblnd_post_rx(struct kib_rx *rx, int credit)
-{
-	struct kib_conn *conn = rx->rx_conn;
-	struct kib_net *net = conn->ibc_peer->ibp_ni->ni_data;
-	struct ib_recv_wr *bad_wrq = NULL;
-	int rc;
-
-	LASSERT(net);
-	LASSERT(!in_interrupt());
-	LASSERT(credit == IBLND_POSTRX_NO_CREDIT ||
-		credit == IBLND_POSTRX_PEER_CREDIT ||
-		credit == IBLND_POSTRX_RSRVD_CREDIT);
-
-	rx->rx_sge.lkey   = conn->ibc_hdev->ibh_pd->local_dma_lkey;
-	rx->rx_sge.addr   = rx->rx_msgaddr;
-	rx->rx_sge.length = IBLND_MSG_SIZE;
-
-	rx->rx_wrq.next    = NULL;
-	rx->rx_wrq.sg_list = &rx->rx_sge;
-	rx->rx_wrq.num_sge = 1;
-	rx->rx_wrq.wr_id   = kiblnd_ptr2wreqid(rx, IBLND_WID_RX);
-
-	LASSERT(conn->ibc_state >= IBLND_CONN_INIT);
-	LASSERT(rx->rx_nob >= 0);	      /* not posted */
-
-	if (conn->ibc_state > IBLND_CONN_ESTABLISHED) {
-		kiblnd_drop_rx(rx);	     /* No more posts for this rx */
-		return 0;
-	}
-
-	rx->rx_nob = -1;			/* flag posted */
-
-	/* NB: need an extra reference after ib_post_recv because we don't
-	 * own this rx (and rx::rx_conn) anymore, LU-5678.
-	 */
-	kiblnd_conn_addref(conn);
-	rc = ib_post_recv(conn->ibc_cmid->qp, &rx->rx_wrq, &bad_wrq);
-	if (unlikely(rc)) {
-		CERROR("Can't post rx for %s: %d, bad_wrq: %p\n",
-		       libcfs_nid2str(conn->ibc_peer->ibp_nid), rc, bad_wrq);
-		rx->rx_nob = 0;
-	}
-
-	if (conn->ibc_state < IBLND_CONN_ESTABLISHED) /* Initial post */
-		goto out;
-
-	if (unlikely(rc)) {
-		kiblnd_close_conn(conn, rc);
-		kiblnd_drop_rx(rx);	     /* No more posts for this rx */
-		goto out;
-	}
-
-	if (credit == IBLND_POSTRX_NO_CREDIT)
-		goto out;
-
-	spin_lock(&conn->ibc_lock);
-	if (credit == IBLND_POSTRX_PEER_CREDIT)
-		conn->ibc_outstanding_credits++;
-	else
-		conn->ibc_reserved_credits++;
-	kiblnd_check_sends_locked(conn);
-	spin_unlock(&conn->ibc_lock);
-
-out:
-	kiblnd_conn_decref(conn);
-	return rc;
-}
-
-static struct kib_tx *
-kiblnd_find_waiting_tx_locked(struct kib_conn *conn, int txtype, __u64 cookie)
-{
-	struct list_head *tmp;
-
-	list_for_each(tmp, &conn->ibc_active_txs) {
-		struct kib_tx *tx = list_entry(tmp, struct kib_tx, tx_list);
-
-		LASSERT(!tx->tx_queued);
-		LASSERT(tx->tx_sending || tx->tx_waiting);
-
-		if (tx->tx_cookie != cookie)
-			continue;
-
-		if (tx->tx_waiting &&
-		    tx->tx_msg->ibm_type == txtype)
-			return tx;
-
-		CWARN("Bad completion: %swaiting, type %x (wanted %x)\n",
-		      tx->tx_waiting ? "" : "NOT ",
-		      tx->tx_msg->ibm_type, txtype);
-	}
-	return NULL;
-}
-
-static void
-kiblnd_handle_completion(struct kib_conn *conn, int txtype, int status, __u64 cookie)
-{
-	struct kib_tx *tx;
-	struct lnet_ni *ni = conn->ibc_peer->ibp_ni;
-	int idle;
-
-	spin_lock(&conn->ibc_lock);
-
-	tx = kiblnd_find_waiting_tx_locked(conn, txtype, cookie);
-	if (!tx) {
-		spin_unlock(&conn->ibc_lock);
-
-		CWARN("Unmatched completion type %x cookie %#llx from %s\n",
-		      txtype, cookie, libcfs_nid2str(conn->ibc_peer->ibp_nid));
-		kiblnd_close_conn(conn, -EPROTO);
-		return;
-	}
-
-	if (!tx->tx_status) {	       /* success so far */
-		if (status < 0) /* failed? */
-			tx->tx_status = status;
-		else if (txtype == IBLND_MSG_GET_REQ)
-			lnet_set_reply_msg_len(ni, tx->tx_lntmsg[1], status);
-	}
-
-	tx->tx_waiting = 0;
-
-	idle = !tx->tx_queued && !tx->tx_sending;
-	if (idle)
-		list_del(&tx->tx_list);
-
-	spin_unlock(&conn->ibc_lock);
-
-	if (idle)
-		kiblnd_tx_done(ni, tx);
-}
-
-static void
-kiblnd_send_completion(struct kib_conn *conn, int type, int status, __u64 cookie)
-{
-	struct lnet_ni *ni = conn->ibc_peer->ibp_ni;
-	struct kib_tx *tx = kiblnd_get_idle_tx(ni, conn->ibc_peer->ibp_nid);
-
-	if (!tx) {
-		CERROR("Can't get tx for completion %x for %s\n",
-		       type, libcfs_nid2str(conn->ibc_peer->ibp_nid));
-		return;
-	}
-
-	tx->tx_msg->ibm_u.completion.ibcm_status = status;
-	tx->tx_msg->ibm_u.completion.ibcm_cookie = cookie;
-	kiblnd_init_tx_msg(ni, tx, type, sizeof(struct kib_completion_msg));
-
-	kiblnd_queue_tx(tx, conn);
-}
-
-static void
-kiblnd_handle_rx(struct kib_rx *rx)
-{
-	struct kib_msg *msg = rx->rx_msg;
-	struct kib_conn *conn = rx->rx_conn;
-	struct lnet_ni *ni = conn->ibc_peer->ibp_ni;
-	int credits = msg->ibm_credits;
-	struct kib_tx *tx;
-	int rc = 0;
-	int rc2;
-	int post_credit;
-
-	LASSERT(conn->ibc_state >= IBLND_CONN_ESTABLISHED);
-
-	CDEBUG(D_NET, "Received %x[%d] from %s\n",
-	       msg->ibm_type, credits,
-	       libcfs_nid2str(conn->ibc_peer->ibp_nid));
-
-	if (credits) {
-		/* Have I received credits that will let me send? */
-		spin_lock(&conn->ibc_lock);
-
-		if (conn->ibc_credits + credits >
-		    conn->ibc_queue_depth) {
-			rc2 = conn->ibc_credits;
-			spin_unlock(&conn->ibc_lock);
-
-			CERROR("Bad credits from %s: %d + %d > %d\n",
-			       libcfs_nid2str(conn->ibc_peer->ibp_nid),
-			       rc2, credits, conn->ibc_queue_depth);
-
-			kiblnd_close_conn(conn, -EPROTO);
-			kiblnd_post_rx(rx, IBLND_POSTRX_NO_CREDIT);
-			return;
-		}
-
-		conn->ibc_credits += credits;
-
-		/* This ensures the credit taken by NOOP can be returned */
-		if (msg->ibm_type == IBLND_MSG_NOOP &&
-		    !IBLND_OOB_CAPABLE(conn->ibc_version)) /* v1 only */
-			conn->ibc_outstanding_credits++;
-
-		kiblnd_check_sends_locked(conn);
-		spin_unlock(&conn->ibc_lock);
-	}
-
-	switch (msg->ibm_type) {
-	default:
-		CERROR("Bad IBLND message type %x from %s\n",
-		       msg->ibm_type, libcfs_nid2str(conn->ibc_peer->ibp_nid));
-		post_credit = IBLND_POSTRX_NO_CREDIT;
-		rc = -EPROTO;
-		break;
-
-	case IBLND_MSG_NOOP:
-		if (IBLND_OOB_CAPABLE(conn->ibc_version)) {
-			post_credit = IBLND_POSTRX_NO_CREDIT;
-			break;
-		}
-
-		if (credits) /* credit already posted */
-			post_credit = IBLND_POSTRX_NO_CREDIT;
-		else	      /* a keepalive NOOP */
-			post_credit = IBLND_POSTRX_PEER_CREDIT;
-		break;
-
-	case IBLND_MSG_IMMEDIATE:
-		post_credit = IBLND_POSTRX_DONT_POST;
-		rc = lnet_parse(ni, &msg->ibm_u.immediate.ibim_hdr,
-				msg->ibm_srcnid, rx, 0);
-		if (rc < 0)		     /* repost on error */
-			post_credit = IBLND_POSTRX_PEER_CREDIT;
-		break;
-
-	case IBLND_MSG_PUT_REQ:
-		post_credit = IBLND_POSTRX_DONT_POST;
-		rc = lnet_parse(ni, &msg->ibm_u.putreq.ibprm_hdr,
-				msg->ibm_srcnid, rx, 1);
-		if (rc < 0)		     /* repost on error */
-			post_credit = IBLND_POSTRX_PEER_CREDIT;
-		break;
-
-	case IBLND_MSG_PUT_NAK:
-		CWARN("PUT_NACK from %s\n",
-		      libcfs_nid2str(conn->ibc_peer->ibp_nid));
-		post_credit = IBLND_POSTRX_RSRVD_CREDIT;
-		kiblnd_handle_completion(conn, IBLND_MSG_PUT_REQ,
-					 msg->ibm_u.completion.ibcm_status,
-					 msg->ibm_u.completion.ibcm_cookie);
-		break;
-
-	case IBLND_MSG_PUT_ACK:
-		post_credit = IBLND_POSTRX_RSRVD_CREDIT;
-
-		spin_lock(&conn->ibc_lock);
-		tx = kiblnd_find_waiting_tx_locked(conn, IBLND_MSG_PUT_REQ,
-						   msg->ibm_u.putack.ibpam_src_cookie);
-		if (tx)
-			list_del(&tx->tx_list);
-		spin_unlock(&conn->ibc_lock);
-
-		if (!tx) {
-			CERROR("Unmatched PUT_ACK from %s\n",
-			       libcfs_nid2str(conn->ibc_peer->ibp_nid));
-			rc = -EPROTO;
-			break;
-		}
-
-		LASSERT(tx->tx_waiting);
-		/*
-		 * CAVEAT EMPTOR: I could be racing with tx_complete, but...
-		 * (a) I can overwrite tx_msg since my peer has received it!
-		 * (b) tx_waiting set tells tx_complete() it's not done.
-		 */
-		tx->tx_nwrq = 0;		/* overwrite PUT_REQ */
-
-		rc2 = kiblnd_init_rdma(conn, tx, IBLND_MSG_PUT_DONE,
-				       kiblnd_rd_size(&msg->ibm_u.putack.ibpam_rd),
-				       &msg->ibm_u.putack.ibpam_rd,
-				       msg->ibm_u.putack.ibpam_dst_cookie);
-		if (rc2 < 0)
-			CERROR("Can't setup rdma for PUT to %s: %d\n",
-			       libcfs_nid2str(conn->ibc_peer->ibp_nid), rc2);
-
-		spin_lock(&conn->ibc_lock);
-		tx->tx_waiting = 0;	/* clear waiting and queue atomically */
-		kiblnd_queue_tx_locked(tx, conn);
-		spin_unlock(&conn->ibc_lock);
-		break;
-
-	case IBLND_MSG_PUT_DONE:
-		post_credit = IBLND_POSTRX_PEER_CREDIT;
-		kiblnd_handle_completion(conn, IBLND_MSG_PUT_ACK,
-					 msg->ibm_u.completion.ibcm_status,
-					 msg->ibm_u.completion.ibcm_cookie);
-		break;
-
-	case IBLND_MSG_GET_REQ:
-		post_credit = IBLND_POSTRX_DONT_POST;
-		rc = lnet_parse(ni, &msg->ibm_u.get.ibgm_hdr,
-				msg->ibm_srcnid, rx, 1);
-		if (rc < 0)		     /* repost on error */
-			post_credit = IBLND_POSTRX_PEER_CREDIT;
-		break;
-
-	case IBLND_MSG_GET_DONE:
-		post_credit = IBLND_POSTRX_RSRVD_CREDIT;
-		kiblnd_handle_completion(conn, IBLND_MSG_GET_REQ,
-					 msg->ibm_u.completion.ibcm_status,
-					 msg->ibm_u.completion.ibcm_cookie);
-		break;
-	}
-
-	if (rc < 0)			     /* protocol error */
-		kiblnd_close_conn(conn, rc);
-
-	if (post_credit != IBLND_POSTRX_DONT_POST)
-		kiblnd_post_rx(rx, post_credit);
-}
-
-static void
-kiblnd_rx_complete(struct kib_rx *rx, int status, int nob)
-{
-	struct kib_msg *msg = rx->rx_msg;
-	struct kib_conn *conn = rx->rx_conn;
-	struct lnet_ni *ni = conn->ibc_peer->ibp_ni;
-	struct kib_net *net = ni->ni_data;
-	int rc;
-	int err = -EIO;
-
-	LASSERT(net);
-	LASSERT(rx->rx_nob < 0);	       /* was posted */
-	rx->rx_nob = 0;			 /* isn't now */
-
-	if (conn->ibc_state > IBLND_CONN_ESTABLISHED)
-		goto ignore;
-
-	if (status != IB_WC_SUCCESS) {
-		CNETERR("Rx from %s failed: %d\n",
-			libcfs_nid2str(conn->ibc_peer->ibp_nid), status);
-		goto failed;
-	}
-
-	LASSERT(nob >= 0);
-	rx->rx_nob = nob;
-
-	rc = kiblnd_unpack_msg(msg, rx->rx_nob);
-	if (rc) {
-		CERROR("Error %d unpacking rx from %s\n",
-		       rc, libcfs_nid2str(conn->ibc_peer->ibp_nid));
-		goto failed;
-	}
-
-	if (msg->ibm_srcnid != conn->ibc_peer->ibp_nid ||
-	    msg->ibm_dstnid != ni->ni_nid ||
-	    msg->ibm_srcstamp != conn->ibc_incarnation ||
-	    msg->ibm_dststamp != net->ibn_incarnation) {
-		CERROR("Stale rx from %s\n",
-		       libcfs_nid2str(conn->ibc_peer->ibp_nid));
-		err = -ESTALE;
-		goto failed;
-	}
-
-	/* set time last known alive */
-	kiblnd_peer_alive(conn->ibc_peer);
-
-	/* racing with connection establishment/teardown! */
-
-	if (conn->ibc_state < IBLND_CONN_ESTABLISHED) {
-		rwlock_t *g_lock = &kiblnd_data.kib_global_lock;
-		unsigned long flags;
-
-		write_lock_irqsave(g_lock, flags);
-		/* must check holding global lock to eliminate race */
-		if (conn->ibc_state < IBLND_CONN_ESTABLISHED) {
-			list_add_tail(&rx->rx_list, &conn->ibc_early_rxs);
-			write_unlock_irqrestore(g_lock, flags);
-			return;
-		}
-		write_unlock_irqrestore(g_lock, flags);
-	}
-	kiblnd_handle_rx(rx);
-	return;
-
- failed:
-	CDEBUG(D_NET, "rx %p conn %p\n", rx, conn);
-	kiblnd_close_conn(conn, err);
- ignore:
-	kiblnd_drop_rx(rx);		     /* Don't re-post rx. */
-}
-
-static struct page *
-kiblnd_kvaddr_to_page(unsigned long vaddr)
-{
-	struct page *page;
-
-	if (is_vmalloc_addr((void *)vaddr)) {
-		page = vmalloc_to_page((void *)vaddr);
-		LASSERT(page);
-		return page;
-	}
-#ifdef CONFIG_HIGHMEM
-	if (vaddr >= PKMAP_BASE &&
-	    vaddr < (PKMAP_BASE + LAST_PKMAP * PAGE_SIZE)) {
-		/* No highmem pages only used for bulk (kiov) I/O */
-		CERROR("find page for address in highmem\n");
-		LBUG();
-	}
-#endif
-	page = virt_to_page(vaddr);
-	LASSERT(page);
-	return page;
-}
-
-static int
-kiblnd_fmr_map_tx(struct kib_net *net, struct kib_tx *tx, struct kib_rdma_desc *rd, __u32 nob)
-{
-	struct kib_hca_dev *hdev;
-	struct kib_fmr_poolset *fps;
-	int cpt;
-	int rc;
-
-	LASSERT(tx->tx_pool);
-	LASSERT(tx->tx_pool->tpo_pool.po_owner);
-
-	hdev = tx->tx_pool->tpo_hdev;
-	cpt = tx->tx_pool->tpo_pool.po_owner->ps_cpt;
-
-	fps = net->ibn_fmr_ps[cpt];
-	rc = kiblnd_fmr_pool_map(fps, tx, rd, nob, 0, &tx->fmr);
-	if (rc) {
-		CERROR("Can't map %u bytes: %d\n", nob, rc);
-		return rc;
-	}
-
-	/*
-	 * If rd is not tx_rd, it's going to get sent to a peer, who will need
-	 * the rkey
-	 */
-	rd->rd_key = tx->fmr.fmr_key;
-	rd->rd_frags[0].rf_addr &= ~hdev->ibh_page_mask;
-	rd->rd_frags[0].rf_nob = nob;
-	rd->rd_nfrags = 1;
-
-	return 0;
-}
-
-static void kiblnd_unmap_tx(struct kib_tx *tx)
-{
-	if (tx->fmr.fmr_pfmr || tx->fmr.fmr_frd)
-		kiblnd_fmr_pool_unmap(&tx->fmr, tx->tx_status);
-
-	if (tx->tx_nfrags) {
-		kiblnd_dma_unmap_sg(tx->tx_pool->tpo_hdev->ibh_ibdev,
-				    tx->tx_frags, tx->tx_nfrags, tx->tx_dmadir);
-		tx->tx_nfrags = 0;
-	}
-}
-
-static int kiblnd_map_tx(struct lnet_ni *ni, struct kib_tx *tx,
-			 struct kib_rdma_desc *rd, int nfrags)
-{
-	struct kib_net *net = ni->ni_data;
-	struct kib_hca_dev *hdev = net->ibn_dev->ibd_hdev;
-	__u32 nob;
-	int i;
-
-	/*
-	 * If rd is not tx_rd, it's going to get sent to a peer and I'm the
-	 * RDMA sink
-	 */
-	tx->tx_dmadir = (rd != tx->tx_rd) ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
-	tx->tx_nfrags = nfrags;
-
-	rd->rd_nfrags = kiblnd_dma_map_sg(hdev->ibh_ibdev, tx->tx_frags,
-					  tx->tx_nfrags, tx->tx_dmadir);
-
-	for (i = 0, nob = 0; i < rd->rd_nfrags; i++) {
-		rd->rd_frags[i].rf_nob  = kiblnd_sg_dma_len(
-			hdev->ibh_ibdev, &tx->tx_frags[i]);
-		rd->rd_frags[i].rf_addr = kiblnd_sg_dma_address(
-			hdev->ibh_ibdev, &tx->tx_frags[i]);
-		nob += rd->rd_frags[i].rf_nob;
-	}
-
-	if (net->ibn_fmr_ps)
-		return kiblnd_fmr_map_tx(net, tx, rd, nob);
-
-	return -EINVAL;
-}
-
-static int
-kiblnd_setup_rd_iov(struct lnet_ni *ni, struct kib_tx *tx,
-		    struct kib_rdma_desc *rd, unsigned int niov,
-		    const struct kvec *iov, int offset, int nob)
-{
-	struct kib_net *net = ni->ni_data;
-	struct page *page;
-	struct scatterlist *sg;
-	unsigned long vaddr;
-	int fragnob;
-	int page_offset;
-
-	LASSERT(nob > 0);
-	LASSERT(niov > 0);
-	LASSERT(net);
-
-	while (offset >= iov->iov_len) {
-		offset -= iov->iov_len;
-		niov--;
-		iov++;
-		LASSERT(niov > 0);
-	}
-
-	sg = tx->tx_frags;
-	do {
-		LASSERT(niov > 0);
-
-		vaddr = ((unsigned long)iov->iov_base) + offset;
-		page_offset = vaddr & (PAGE_SIZE - 1);
-		page = kiblnd_kvaddr_to_page(vaddr);
-		if (!page) {
-			CERROR("Can't find page\n");
-			return -EFAULT;
-		}
-
-		fragnob = min((int)(iov->iov_len - offset), nob);
-		fragnob = min(fragnob, (int)PAGE_SIZE - page_offset);
-
-		sg_set_page(sg, page, fragnob, page_offset);
-		sg = sg_next(sg);
-		if (!sg) {
-			CERROR("lacking enough sg entries to map tx\n");
-			return -EFAULT;
-		}
-
-		if (offset + fragnob < iov->iov_len) {
-			offset += fragnob;
-		} else {
-			offset = 0;
-			iov++;
-			niov--;
-		}
-		nob -= fragnob;
-	} while (nob > 0);
-
-	return kiblnd_map_tx(ni, tx, rd, sg - tx->tx_frags);
-}
-
-static int
-kiblnd_setup_rd_kiov(struct lnet_ni *ni, struct kib_tx *tx,
-		     struct kib_rdma_desc *rd, int nkiov,
-		     const struct bio_vec *kiov, int offset, int nob)
-{
-	struct kib_net *net = ni->ni_data;
-	struct scatterlist *sg;
-	int fragnob;
-
-	CDEBUG(D_NET, "niov %d offset %d nob %d\n", nkiov, offset, nob);
-
-	LASSERT(nob > 0);
-	LASSERT(nkiov > 0);
-	LASSERT(net);
-
-	while (offset >= kiov->bv_len) {
-		offset -= kiov->bv_len;
-		nkiov--;
-		kiov++;
-		LASSERT(nkiov > 0);
-	}
-
-	sg = tx->tx_frags;
-	do {
-		LASSERT(nkiov > 0);
-
-		fragnob = min((int)(kiov->bv_len - offset), nob);
-
-		sg_set_page(sg, kiov->bv_page, fragnob,
-			    kiov->bv_offset + offset);
-		sg = sg_next(sg);
-		if (!sg) {
-			CERROR("lacking enough sg entries to map tx\n");
-			return -EFAULT;
-		}
-
-		offset = 0;
-		kiov++;
-		nkiov--;
-		nob -= fragnob;
-	} while (nob > 0);
-
-	return kiblnd_map_tx(ni, tx, rd, sg - tx->tx_frags);
-}
-
-static int
-kiblnd_post_tx_locked(struct kib_conn *conn, struct kib_tx *tx, int credit)
-	__must_hold(&conn->ibc_lock)
-{
-	struct kib_msg *msg = tx->tx_msg;
-	struct kib_peer *peer = conn->ibc_peer;
-	struct lnet_ni *ni = peer->ibp_ni;
-	int ver = conn->ibc_version;
-	int rc;
-	int done;
-
-	LASSERT(tx->tx_queued);
-	/* We rely on this for QP sizing */
-	LASSERT(tx->tx_nwrq > 0);
-
-	LASSERT(!credit || credit == 1);
-	LASSERT(conn->ibc_outstanding_credits >= 0);
-	LASSERT(conn->ibc_outstanding_credits <= conn->ibc_queue_depth);
-	LASSERT(conn->ibc_credits >= 0);
-	LASSERT(conn->ibc_credits <= conn->ibc_queue_depth);
-
-	if (conn->ibc_nsends_posted == kiblnd_concurrent_sends(ver, ni)) {
-		/* tx completions outstanding... */
-		CDEBUG(D_NET, "%s: posted enough\n",
-		       libcfs_nid2str(peer->ibp_nid));
-		return -EAGAIN;
-	}
-
-	if (credit && !conn->ibc_credits) {   /* no credits */
-		CDEBUG(D_NET, "%s: no credits\n",
-		       libcfs_nid2str(peer->ibp_nid));
-		return -EAGAIN;
-	}
-
-	if (credit && !IBLND_OOB_CAPABLE(ver) &&
-	    conn->ibc_credits == 1 &&   /* last credit reserved */
-	    msg->ibm_type != IBLND_MSG_NOOP) {      /* for NOOP */
-		CDEBUG(D_NET, "%s: not using last credit\n",
-		       libcfs_nid2str(peer->ibp_nid));
-		return -EAGAIN;
-	}
-
-	/* NB don't drop ibc_lock before bumping tx_sending */
-	list_del(&tx->tx_list);
-	tx->tx_queued = 0;
-
-	if (msg->ibm_type == IBLND_MSG_NOOP &&
-	    (!kiblnd_need_noop(conn) ||     /* redundant NOOP */
-	     (IBLND_OOB_CAPABLE(ver) && /* posted enough NOOP */
-	      conn->ibc_noops_posted == IBLND_OOB_MSGS(ver)))) {
-		/*
-		 * OK to drop when posted enough NOOPs, since
-		 * kiblnd_check_sends_locked will queue NOOP again when
-		 * posted NOOPs complete
-		 */
-		spin_unlock(&conn->ibc_lock);
-		kiblnd_tx_done(peer->ibp_ni, tx);
-		spin_lock(&conn->ibc_lock);
-		CDEBUG(D_NET, "%s(%d): redundant or enough NOOP\n",
-		       libcfs_nid2str(peer->ibp_nid),
-		       conn->ibc_noops_posted);
-		return 0;
-	}
-
-	kiblnd_pack_msg(peer->ibp_ni, msg, ver, conn->ibc_outstanding_credits,
-			peer->ibp_nid, conn->ibc_incarnation);
-
-	conn->ibc_credits -= credit;
-	conn->ibc_outstanding_credits = 0;
-	conn->ibc_nsends_posted++;
-	if (msg->ibm_type == IBLND_MSG_NOOP)
-		conn->ibc_noops_posted++;
-
-	/*
-	 * CAVEAT EMPTOR!  This tx could be the PUT_DONE of an RDMA
-	 * PUT.  If so, it was first queued here as a PUT_REQ, sent and
-	 * stashed on ibc_active_txs, matched by an incoming PUT_ACK,
-	 * and then re-queued here.  It's (just) possible that
-	 * tx_sending is non-zero if we've not done the tx_complete()
-	 * from the first send; hence the ++ rather than = below.
-	 */
-	tx->tx_sending++;
-	list_add(&tx->tx_list, &conn->ibc_active_txs);
-
-	/* I'm still holding ibc_lock! */
-	if (conn->ibc_state != IBLND_CONN_ESTABLISHED) {
-		rc = -ECONNABORTED;
-	} else if (tx->tx_pool->tpo_pool.po_failed ||
-		 conn->ibc_hdev != tx->tx_pool->tpo_hdev) {
-		/* close_conn will launch failover */
-		rc = -ENETDOWN;
-	} else {
-		struct kib_fast_reg_descriptor *frd = tx->fmr.fmr_frd;
-		struct ib_send_wr *bad = &tx->tx_wrq[tx->tx_nwrq - 1].wr;
-		struct ib_send_wr *wrq = &tx->tx_wrq[0].wr;
-
-		if (frd) {
-			if (!frd->frd_valid) {
-				wrq = &frd->frd_inv_wr;
-				wrq->next = &frd->frd_fastreg_wr.wr;
-			} else {
-				wrq = &frd->frd_fastreg_wr.wr;
-			}
-			frd->frd_fastreg_wr.wr.next = &tx->tx_wrq[0].wr;
-		}
-
-		LASSERTF(bad->wr_id == kiblnd_ptr2wreqid(tx, IBLND_WID_TX),
-			 "bad wr_id %llx, opc %d, flags %d, peer: %s\n",
-			 bad->wr_id, bad->opcode, bad->send_flags,
-			 libcfs_nid2str(conn->ibc_peer->ibp_nid));
-		bad = NULL;
-		rc = ib_post_send(conn->ibc_cmid->qp, wrq, &bad);
-	}
-
-	conn->ibc_last_send = jiffies;
-
-	if (!rc)
-		return 0;
-
-	/*
-	 * NB credits are transferred in the actual
-	 * message, which can only be the last work item
-	 */
-	conn->ibc_credits += credit;
-	conn->ibc_outstanding_credits += msg->ibm_credits;
-	conn->ibc_nsends_posted--;
-	if (msg->ibm_type == IBLND_MSG_NOOP)
-		conn->ibc_noops_posted--;
-
-	tx->tx_status = rc;
-	tx->tx_waiting = 0;
-	tx->tx_sending--;
-
-	done = !tx->tx_sending;
-	if (done)
-		list_del(&tx->tx_list);
-
-	spin_unlock(&conn->ibc_lock);
-
-	if (conn->ibc_state == IBLND_CONN_ESTABLISHED)
-		CERROR("Error %d posting transmit to %s\n",
-		       rc, libcfs_nid2str(peer->ibp_nid));
-	else
-		CDEBUG(D_NET, "Error %d posting transmit to %s\n",
-		       rc, libcfs_nid2str(peer->ibp_nid));
-
-	kiblnd_close_conn(conn, rc);
-
-	if (done)
-		kiblnd_tx_done(peer->ibp_ni, tx);
-
-	spin_lock(&conn->ibc_lock);
-
-	return -EIO;
-}
-
-static void
-kiblnd_check_sends_locked(struct kib_conn *conn)
-{
-	int ver = conn->ibc_version;
-	struct lnet_ni *ni = conn->ibc_peer->ibp_ni;
-	struct kib_tx *tx;
-
-	/* Don't send anything until after the connection is established */
-	if (conn->ibc_state < IBLND_CONN_ESTABLISHED) {
-		CDEBUG(D_NET, "%s too soon\n",
-		       libcfs_nid2str(conn->ibc_peer->ibp_nid));
-		return;
-	}
-
-	LASSERT(conn->ibc_nsends_posted <= kiblnd_concurrent_sends(ver, ni));
-	LASSERT(!IBLND_OOB_CAPABLE(ver) ||
-		conn->ibc_noops_posted <= IBLND_OOB_MSGS(ver));
-	LASSERT(conn->ibc_reserved_credits >= 0);
-
-	while (conn->ibc_reserved_credits > 0 &&
-	       !list_empty(&conn->ibc_tx_queue_rsrvd)) {
-		tx = list_entry(conn->ibc_tx_queue_rsrvd.next,
-				struct kib_tx, tx_list);
-		list_del(&tx->tx_list);
-		list_add_tail(&tx->tx_list, &conn->ibc_tx_queue);
-		conn->ibc_reserved_credits--;
-	}
-
-	if (kiblnd_need_noop(conn)) {
-		spin_unlock(&conn->ibc_lock);
-
-		tx = kiblnd_get_idle_tx(ni, conn->ibc_peer->ibp_nid);
-		if (tx)
-			kiblnd_init_tx_msg(ni, tx, IBLND_MSG_NOOP, 0);
-
-		spin_lock(&conn->ibc_lock);
-		if (tx)
-			kiblnd_queue_tx_locked(tx, conn);
-	}
-
-	for (;;) {
-		int credit;
-
-		if (!list_empty(&conn->ibc_tx_queue_nocred)) {
-			credit = 0;
-			tx = list_entry(conn->ibc_tx_queue_nocred.next,
-					struct kib_tx, tx_list);
-		} else if (!list_empty(&conn->ibc_tx_noops)) {
-			LASSERT(!IBLND_OOB_CAPABLE(ver));
-			credit = 1;
-			tx = list_entry(conn->ibc_tx_noops.next,
-					struct kib_tx, tx_list);
-		} else if (!list_empty(&conn->ibc_tx_queue)) {
-			credit = 1;
-			tx = list_entry(conn->ibc_tx_queue.next,
-					struct kib_tx, tx_list);
-		} else {
-			break;
-		}
-
-		if (kiblnd_post_tx_locked(conn, tx, credit))
-			break;
-	}
-}
-
-static void
-kiblnd_tx_complete(struct kib_tx *tx, int status)
-{
-	int failed = (status != IB_WC_SUCCESS);
-	struct kib_conn *conn = tx->tx_conn;
-	int idle;
-
-	LASSERT(tx->tx_sending > 0);
-
-	if (failed) {
-		if (conn->ibc_state == IBLND_CONN_ESTABLISHED)
-			CNETERR("Tx -> %s cookie %#llx sending %d waiting %d: failed %d\n",
-				libcfs_nid2str(conn->ibc_peer->ibp_nid),
-				tx->tx_cookie, tx->tx_sending, tx->tx_waiting,
-				status);
-
-		kiblnd_close_conn(conn, -EIO);
-	} else {
-		kiblnd_peer_alive(conn->ibc_peer);
-	}
-
-	spin_lock(&conn->ibc_lock);
-
-	/*
-	 * I could be racing with rdma completion.  Whoever makes 'tx' idle
-	 * gets to free it, which also drops its ref on 'conn'.
-	 */
-	tx->tx_sending--;
-	conn->ibc_nsends_posted--;
-	if (tx->tx_msg->ibm_type == IBLND_MSG_NOOP)
-		conn->ibc_noops_posted--;
-
-	if (failed) {
-		tx->tx_waiting = 0;	     /* don't wait for peer */
-		tx->tx_status = -EIO;
-	}
-
-	idle = !tx->tx_sending &&	 /* This is the final callback */
-	       !tx->tx_waiting &&	       /* Not waiting for peer */
-	       !tx->tx_queued;		  /* Not re-queued (PUT_DONE) */
-	if (idle)
-		list_del(&tx->tx_list);
-
-	kiblnd_check_sends_locked(conn);
-	spin_unlock(&conn->ibc_lock);
-
-	if (idle)
-		kiblnd_tx_done(conn->ibc_peer->ibp_ni, tx);
-}
-
-static void
-kiblnd_init_tx_msg(struct lnet_ni *ni, struct kib_tx *tx, int type,
-		   int body_nob)
-{
-	struct kib_hca_dev *hdev = tx->tx_pool->tpo_hdev;
-	struct ib_sge *sge = &tx->tx_sge[tx->tx_nwrq];
-	struct ib_rdma_wr *wrq = &tx->tx_wrq[tx->tx_nwrq];
-	int nob = offsetof(struct kib_msg, ibm_u) + body_nob;
-
-	LASSERT(tx->tx_nwrq >= 0);
-	LASSERT(tx->tx_nwrq < IBLND_MAX_RDMA_FRAGS + 1);
-	LASSERT(nob <= IBLND_MSG_SIZE);
-
-	kiblnd_init_msg(tx->tx_msg, type, body_nob);
-
-	sge->lkey   = hdev->ibh_pd->local_dma_lkey;
-	sge->addr   = tx->tx_msgaddr;
-	sge->length = nob;
-
-	memset(wrq, 0, sizeof(*wrq));
-
-	wrq->wr.next       = NULL;
-	wrq->wr.wr_id      = kiblnd_ptr2wreqid(tx, IBLND_WID_TX);
-	wrq->wr.sg_list    = sge;
-	wrq->wr.num_sge    = 1;
-	wrq->wr.opcode     = IB_WR_SEND;
-	wrq->wr.send_flags = IB_SEND_SIGNALED;
-
-	tx->tx_nwrq++;
-}
-
-static int
-kiblnd_init_rdma(struct kib_conn *conn, struct kib_tx *tx, int type,
-		 int resid, struct kib_rdma_desc *dstrd, __u64 dstcookie)
-{
-	struct kib_msg *ibmsg = tx->tx_msg;
-	struct kib_rdma_desc *srcrd = tx->tx_rd;
-	struct ib_sge *sge = &tx->tx_sge[0];
-	struct ib_rdma_wr *wrq, *next;
-	int rc  = resid;
-	int srcidx = 0;
-	int dstidx = 0;
-	int wrknob;
-
-	LASSERT(!in_interrupt());
-	LASSERT(!tx->tx_nwrq);
-	LASSERT(type == IBLND_MSG_GET_DONE ||
-		type == IBLND_MSG_PUT_DONE);
-
-	if (kiblnd_rd_size(srcrd) > conn->ibc_max_frags << PAGE_SHIFT) {
-		CERROR("RDMA is too large for peer %s (%d), src size: %d dst size: %d\n",
-		       libcfs_nid2str(conn->ibc_peer->ibp_nid),
-		       conn->ibc_max_frags << PAGE_SHIFT,
-		       kiblnd_rd_size(srcrd), kiblnd_rd_size(dstrd));
-		rc = -EMSGSIZE;
-		goto too_big;
-	}
-
-	while (resid > 0) {
-		if (srcidx >= srcrd->rd_nfrags) {
-			CERROR("Src buffer exhausted: %d frags\n", srcidx);
-			rc = -EPROTO;
-			break;
-		}
-
-		if (dstidx == dstrd->rd_nfrags) {
-			CERROR("Dst buffer exhausted: %d frags\n", dstidx);
-			rc = -EPROTO;
-			break;
-		}
-
-		if (tx->tx_nwrq >= IBLND_MAX_RDMA_FRAGS) {
-			CERROR("RDMA has too many fragments for peer %s (%d), src idx/frags: %d/%d dst idx/frags: %d/%d\n",
-			       libcfs_nid2str(conn->ibc_peer->ibp_nid),
-			       IBLND_MAX_RDMA_FRAGS,
-			       srcidx, srcrd->rd_nfrags,
-			       dstidx, dstrd->rd_nfrags);
-			rc = -EMSGSIZE;
-			break;
-		}
-
-		wrknob = min3(kiblnd_rd_frag_size(srcrd, srcidx),
-			      kiblnd_rd_frag_size(dstrd, dstidx),
-			      (__u32)resid);
-
-		sge = &tx->tx_sge[tx->tx_nwrq];
-		sge->addr   = kiblnd_rd_frag_addr(srcrd, srcidx);
-		sge->lkey   = kiblnd_rd_frag_key(srcrd, srcidx);
-		sge->length = wrknob;
-
-		wrq = &tx->tx_wrq[tx->tx_nwrq];
-		next = wrq + 1;
-
-		wrq->wr.next       = &next->wr;
-		wrq->wr.wr_id      = kiblnd_ptr2wreqid(tx, IBLND_WID_RDMA);
-		wrq->wr.sg_list    = sge;
-		wrq->wr.num_sge    = 1;
-		wrq->wr.opcode     = IB_WR_RDMA_WRITE;
-		wrq->wr.send_flags = 0;
-
-		wrq->remote_addr = kiblnd_rd_frag_addr(dstrd, dstidx);
-		wrq->rkey        = kiblnd_rd_frag_key(dstrd, dstidx);
-
-		srcidx = kiblnd_rd_consume_frag(srcrd, srcidx, wrknob);
-		dstidx = kiblnd_rd_consume_frag(dstrd, dstidx, wrknob);
-
-		resid -= wrknob;
-
-		tx->tx_nwrq++;
-		wrq++;
-		sge++;
-	}
-too_big:
-	if (rc < 0)			     /* no RDMA if completing with failure */
-		tx->tx_nwrq = 0;
-
-	ibmsg->ibm_u.completion.ibcm_status = rc;
-	ibmsg->ibm_u.completion.ibcm_cookie = dstcookie;
-	kiblnd_init_tx_msg(conn->ibc_peer->ibp_ni, tx,
-			   type, sizeof(struct kib_completion_msg));
-
-	return rc;
-}
-
-static void
-kiblnd_queue_tx_locked(struct kib_tx *tx, struct kib_conn *conn)
-{
-	struct list_head *q;
-
-	LASSERT(tx->tx_nwrq > 0);	      /* work items set up */
-	LASSERT(!tx->tx_queued);	       /* not queued for sending already */
-	LASSERT(conn->ibc_state >= IBLND_CONN_ESTABLISHED);
-
-	tx->tx_queued = 1;
-	tx->tx_deadline = jiffies +
-			  msecs_to_jiffies(*kiblnd_tunables.kib_timeout *
-					   MSEC_PER_SEC);
-
-	if (!tx->tx_conn) {
-		kiblnd_conn_addref(conn);
-		tx->tx_conn = conn;
-		LASSERT(tx->tx_msg->ibm_type != IBLND_MSG_PUT_DONE);
-	} else {
-		/* PUT_DONE first attached to conn as a PUT_REQ */
-		LASSERT(tx->tx_conn == conn);
-		LASSERT(tx->tx_msg->ibm_type == IBLND_MSG_PUT_DONE);
-	}
-
-	switch (tx->tx_msg->ibm_type) {
-	default:
-		LBUG();
-
-	case IBLND_MSG_PUT_REQ:
-	case IBLND_MSG_GET_REQ:
-		q = &conn->ibc_tx_queue_rsrvd;
-		break;
-
-	case IBLND_MSG_PUT_NAK:
-	case IBLND_MSG_PUT_ACK:
-	case IBLND_MSG_PUT_DONE:
-	case IBLND_MSG_GET_DONE:
-		q = &conn->ibc_tx_queue_nocred;
-		break;
-
-	case IBLND_MSG_NOOP:
-		if (IBLND_OOB_CAPABLE(conn->ibc_version))
-			q = &conn->ibc_tx_queue_nocred;
-		else
-			q = &conn->ibc_tx_noops;
-		break;
-
-	case IBLND_MSG_IMMEDIATE:
-		q = &conn->ibc_tx_queue;
-		break;
-	}
-
-	list_add_tail(&tx->tx_list, q);
-}
-
-static void
-kiblnd_queue_tx(struct kib_tx *tx, struct kib_conn *conn)
-{
-	spin_lock(&conn->ibc_lock);
-	kiblnd_queue_tx_locked(tx, conn);
-	kiblnd_check_sends_locked(conn);
-	spin_unlock(&conn->ibc_lock);
-}
-
-static int kiblnd_resolve_addr(struct rdma_cm_id *cmid,
-			       struct sockaddr_in *srcaddr,
-			       struct sockaddr_in *dstaddr,
-			       int timeout_ms)
-{
-	unsigned short port;
-	int rc;
-
-	/* allow the port to be reused */
-	rc = rdma_set_reuseaddr(cmid, 1);
-	if (rc) {
-		CERROR("Unable to set reuse on cmid: %d\n", rc);
-		return rc;
-	}
-
-	/* look for a free privileged port */
-	for (port = PROT_SOCK - 1; port > 0; port--) {
-		srcaddr->sin_port = htons(port);
-		rc = rdma_resolve_addr(cmid,
-				       (struct sockaddr *)srcaddr,
-				       (struct sockaddr *)dstaddr,
-				       timeout_ms);
-		if (!rc) {
-			CDEBUG(D_NET, "bound to port %hu\n", port);
-			return 0;
-		} else if (rc == -EADDRINUSE || rc == -EADDRNOTAVAIL) {
-			CDEBUG(D_NET, "bind to port %hu failed: %d\n",
-			       port, rc);
-		} else {
-			return rc;
-		}
-	}
-
-	CERROR("Failed to bind to a free privileged port\n");
-	return rc;
-}
-
-static void
-kiblnd_connect_peer(struct kib_peer *peer)
-{
-	struct rdma_cm_id *cmid;
-	struct kib_dev *dev;
-	struct kib_net *net = peer->ibp_ni->ni_data;
-	struct sockaddr_in srcaddr;
-	struct sockaddr_in dstaddr;
-	int rc;
-
-	LASSERT(net);
-	LASSERT(peer->ibp_connecting > 0);
-
-	cmid = kiblnd_rdma_create_id(kiblnd_cm_callback, peer, RDMA_PS_TCP,
-				     IB_QPT_RC);
-
-	if (IS_ERR(cmid)) {
-		CERROR("Can't create CMID for %s: %ld\n",
-		       libcfs_nid2str(peer->ibp_nid), PTR_ERR(cmid));
-		rc = PTR_ERR(cmid);
-		goto failed;
-	}
-
-	dev = net->ibn_dev;
-	memset(&srcaddr, 0, sizeof(srcaddr));
-	srcaddr.sin_family = AF_INET;
-	srcaddr.sin_addr.s_addr = htonl(dev->ibd_ifip);
-
-	memset(&dstaddr, 0, sizeof(dstaddr));
-	dstaddr.sin_family = AF_INET;
-	dstaddr.sin_port = htons(*kiblnd_tunables.kib_service);
-	dstaddr.sin_addr.s_addr = htonl(LNET_NIDADDR(peer->ibp_nid));
-
-	kiblnd_peer_addref(peer);	       /* cmid's ref */
-
-	if (*kiblnd_tunables.kib_use_priv_port) {
-		rc = kiblnd_resolve_addr(cmid, &srcaddr, &dstaddr,
-					 *kiblnd_tunables.kib_timeout * 1000);
-	} else {
-		rc = rdma_resolve_addr(cmid,
-				       (struct sockaddr *)&srcaddr,
-				       (struct sockaddr *)&dstaddr,
-				       *kiblnd_tunables.kib_timeout * 1000);
-	}
-	if (rc) {
-		/* Can't initiate address resolution:  */
-		CERROR("Can't resolve addr for %s: %d\n",
-		       libcfs_nid2str(peer->ibp_nid), rc);
-		goto failed2;
-	}
-
-	return;
-
- failed2:
-	kiblnd_peer_connect_failed(peer, 1, rc);
-	kiblnd_peer_decref(peer);	       /* cmid's ref */
-	rdma_destroy_id(cmid);
-	return;
- failed:
-	kiblnd_peer_connect_failed(peer, 1, rc);
-}
-
-bool
-kiblnd_reconnect_peer(struct kib_peer *peer)
-{
-	rwlock_t *glock = &kiblnd_data.kib_global_lock;
-	char *reason = NULL;
-	struct list_head txs;
-	unsigned long flags;
-
-	INIT_LIST_HEAD(&txs);
-
-	write_lock_irqsave(glock, flags);
-	if (!peer->ibp_reconnecting) {
-		if (peer->ibp_accepting)
-			reason = "accepting";
-		else if (peer->ibp_connecting)
-			reason = "connecting";
-		else if (!list_empty(&peer->ibp_conns))
-			reason = "connected";
-		else /* connected then closed */
-			reason = "closed";
-
-		goto no_reconnect;
-	}
-
-	LASSERT(!peer->ibp_accepting && !peer->ibp_connecting &&
-		list_empty(&peer->ibp_conns));
-	peer->ibp_reconnecting--;
-
-	if (!kiblnd_peer_active(peer)) {
-		list_splice_init(&peer->ibp_tx_queue, &txs);
-		reason = "unlinked";
-		goto no_reconnect;
-	}
-
-	peer->ibp_connecting++;
-	peer->ibp_reconnected++;
-	write_unlock_irqrestore(glock, flags);
-
-	kiblnd_connect_peer(peer);
-	return true;
-
-no_reconnect:
-	write_unlock_irqrestore(glock, flags);
-
-	CWARN("Abort reconnection of %s: %s\n",
-	      libcfs_nid2str(peer->ibp_nid), reason);
-	kiblnd_txlist_done(peer->ibp_ni, &txs, -ECONNABORTED);
-	return false;
-}
-
-void
-kiblnd_launch_tx(struct lnet_ni *ni, struct kib_tx *tx, lnet_nid_t nid)
-{
-	struct kib_peer *peer;
-	struct kib_peer *peer2;
-	struct kib_conn *conn;
-	rwlock_t *g_lock = &kiblnd_data.kib_global_lock;
-	unsigned long flags;
-	int rc;
-	int		   i;
-	struct lnet_ioctl_config_o2iblnd_tunables *tunables;
-
-	/*
-	 * If I get here, I've committed to send, so I complete the tx with
-	 * failure on any problems
-	 */
-	LASSERT(!tx || !tx->tx_conn); /* only set when assigned a conn */
-	LASSERT(!tx || tx->tx_nwrq > 0);     /* work items have been set up */
-
-	/*
-	 * First time, just use a read lock since I expect to find my peer
-	 * connected
-	 */
-	read_lock_irqsave(g_lock, flags);
-
-	peer = kiblnd_find_peer_locked(nid);
-	if (peer && !list_empty(&peer->ibp_conns)) {
-		/* Found a peer with an established connection */
-		conn = kiblnd_get_conn_locked(peer);
-		kiblnd_conn_addref(conn); /* 1 ref for me... */
-
-		read_unlock_irqrestore(g_lock, flags);
-
-		if (tx)
-			kiblnd_queue_tx(tx, conn);
-		kiblnd_conn_decref(conn); /* ...to here */
-		return;
-	}
-
-	read_unlock(g_lock);
-	/* Re-try with a write lock */
-	write_lock(g_lock);
-
-	peer = kiblnd_find_peer_locked(nid);
-	if (peer) {
-		if (list_empty(&peer->ibp_conns)) {
-			/* found a peer, but it's still connecting... */
-			LASSERT(kiblnd_peer_connecting(peer));
-			if (tx)
-				list_add_tail(&tx->tx_list,
-					      &peer->ibp_tx_queue);
-			write_unlock_irqrestore(g_lock, flags);
-		} else {
-			conn = kiblnd_get_conn_locked(peer);
-			kiblnd_conn_addref(conn); /* 1 ref for me... */
-
-			write_unlock_irqrestore(g_lock, flags);
-
-			if (tx)
-				kiblnd_queue_tx(tx, conn);
-			kiblnd_conn_decref(conn); /* ...to here */
-		}
-		return;
-	}
-
-	write_unlock_irqrestore(g_lock, flags);
-
-	/* Allocate a peer ready to add to the peer table and retry */
-	rc = kiblnd_create_peer(ni, &peer, nid);
-	if (rc) {
-		CERROR("Can't create peer %s\n", libcfs_nid2str(nid));
-		if (tx) {
-			tx->tx_status = -EHOSTUNREACH;
-			tx->tx_waiting = 0;
-			kiblnd_tx_done(ni, tx);
-		}
-		return;
-	}
-
-	write_lock_irqsave(g_lock, flags);
-
-	peer2 = kiblnd_find_peer_locked(nid);
-	if (peer2) {
-		if (list_empty(&peer2->ibp_conns)) {
-			/* found a peer, but it's still connecting... */
-			LASSERT(kiblnd_peer_connecting(peer2));
-			if (tx)
-				list_add_tail(&tx->tx_list,
-					      &peer2->ibp_tx_queue);
-			write_unlock_irqrestore(g_lock, flags);
-		} else {
-			conn = kiblnd_get_conn_locked(peer2);
-			kiblnd_conn_addref(conn); /* 1 ref for me... */
-
-			write_unlock_irqrestore(g_lock, flags);
-
-			if (tx)
-				kiblnd_queue_tx(tx, conn);
-			kiblnd_conn_decref(conn); /* ...to here */
-		}
-
-		kiblnd_peer_decref(peer);
-		return;
-	}
-
-	/* Brand new peer */
-	LASSERT(!peer->ibp_connecting);
-	tunables = &peer->ibp_ni->ni_lnd_tunables->lt_tun_u.lt_o2ib;
-	peer->ibp_connecting = tunables->lnd_conns_per_peer;
-
-	/* always called with a ref on ni, which prevents ni being shutdown */
-	LASSERT(!((struct kib_net *)ni->ni_data)->ibn_shutdown);
-
-	if (tx)
-		list_add_tail(&tx->tx_list, &peer->ibp_tx_queue);
-
-	kiblnd_peer_addref(peer);
-	list_add_tail(&peer->ibp_list, kiblnd_nid2peerlist(nid));
-
-	write_unlock_irqrestore(g_lock, flags);
-
-	for (i = 0; i < tunables->lnd_conns_per_peer; i++)
-		kiblnd_connect_peer(peer);
-	kiblnd_peer_decref(peer);
-}
-
-int
-kiblnd_send(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg)
-{
-	struct lnet_hdr *hdr = &lntmsg->msg_hdr;
-	int type = lntmsg->msg_type;
-	struct lnet_process_id target = lntmsg->msg_target;
-	int target_is_router = lntmsg->msg_target_is_router;
-	int routing = lntmsg->msg_routing;
-	unsigned int payload_niov = lntmsg->msg_niov;
-	struct kvec *payload_iov = lntmsg->msg_iov;
-	struct bio_vec *payload_kiov = lntmsg->msg_kiov;
-	unsigned int payload_offset = lntmsg->msg_offset;
-	unsigned int payload_nob = lntmsg->msg_len;
-	struct iov_iter from;
-	struct kib_msg *ibmsg;
-	struct kib_rdma_desc  *rd;
-	struct kib_tx *tx;
-	int nob;
-	int rc;
-
-	/* NB 'private' is different depending on what we're sending.... */
-
-	CDEBUG(D_NET, "sending %d bytes in %d frags to %s\n",
-	       payload_nob, payload_niov, libcfs_id2str(target));
-
-	LASSERT(!payload_nob || payload_niov > 0);
-	LASSERT(payload_niov <= LNET_MAX_IOV);
-
-	/* Thread context */
-	LASSERT(!in_interrupt());
-	/* payload is either all vaddrs or all pages */
-	LASSERT(!(payload_kiov && payload_iov));
-
-	if (payload_kiov)
-		iov_iter_bvec(&from, ITER_BVEC | WRITE,
-			      payload_kiov, payload_niov,
-			      payload_nob + payload_offset);
-	else
-		iov_iter_kvec(&from, ITER_KVEC | WRITE,
-			      payload_iov, payload_niov,
-			      payload_nob + payload_offset);
-
-	iov_iter_advance(&from, payload_offset);
-
-	switch (type) {
-	default:
-		LBUG();
-		return -EIO;
-
-	case LNET_MSG_ACK:
-		LASSERT(!payload_nob);
-		break;
-
-	case LNET_MSG_GET:
-		if (routing || target_is_router)
-			break;		  /* send IMMEDIATE */
-
-		/* is the REPLY message too small for RDMA? */
-		nob = offsetof(struct kib_msg, ibm_u.immediate.ibim_payload[lntmsg->msg_md->md_length]);
-		if (nob <= IBLND_MSG_SIZE)
-			break;		  /* send IMMEDIATE */
-
-		tx = kiblnd_get_idle_tx(ni, target.nid);
-		if (!tx) {
-			CERROR("Can't allocate txd for GET to %s\n",
-			       libcfs_nid2str(target.nid));
-			return -ENOMEM;
-		}
-
-		ibmsg = tx->tx_msg;
-		rd = &ibmsg->ibm_u.get.ibgm_rd;
-		if (!(lntmsg->msg_md->md_options & LNET_MD_KIOV))
-			rc = kiblnd_setup_rd_iov(ni, tx, rd,
-						 lntmsg->msg_md->md_niov,
-						 lntmsg->msg_md->md_iov.iov,
-						 0, lntmsg->msg_md->md_length);
-		else
-			rc = kiblnd_setup_rd_kiov(ni, tx, rd,
-						  lntmsg->msg_md->md_niov,
-						  lntmsg->msg_md->md_iov.kiov,
-						  0, lntmsg->msg_md->md_length);
-		if (rc) {
-			CERROR("Can't setup GET sink for %s: %d\n",
-			       libcfs_nid2str(target.nid), rc);
-			kiblnd_tx_done(ni, tx);
-			return -EIO;
-		}
-
-		nob = offsetof(struct kib_get_msg, ibgm_rd.rd_frags[rd->rd_nfrags]);
-		ibmsg->ibm_u.get.ibgm_cookie = tx->tx_cookie;
-		ibmsg->ibm_u.get.ibgm_hdr = *hdr;
-
-		kiblnd_init_tx_msg(ni, tx, IBLND_MSG_GET_REQ, nob);
-
-		tx->tx_lntmsg[1] = lnet_create_reply_msg(ni, lntmsg);
-		if (!tx->tx_lntmsg[1]) {
-			CERROR("Can't create reply for GET -> %s\n",
-			       libcfs_nid2str(target.nid));
-			kiblnd_tx_done(ni, tx);
-			return -EIO;
-		}
-
-		tx->tx_lntmsg[0] = lntmsg;      /* finalise lntmsg[0,1] on completion */
-		tx->tx_waiting = 1;	     /* waiting for GET_DONE */
-		kiblnd_launch_tx(ni, tx, target.nid);
-		return 0;
-
-	case LNET_MSG_REPLY:
-	case LNET_MSG_PUT:
-		/* Is the payload small enough not to need RDMA? */
-		nob = offsetof(struct kib_msg, ibm_u.immediate.ibim_payload[payload_nob]);
-		if (nob <= IBLND_MSG_SIZE)
-			break;		  /* send IMMEDIATE */
-
-		tx = kiblnd_get_idle_tx(ni, target.nid);
-		if (!tx) {
-			CERROR("Can't allocate %s txd for %s\n",
-			       type == LNET_MSG_PUT ? "PUT" : "REPLY",
-			       libcfs_nid2str(target.nid));
-			return -ENOMEM;
-		}
-
-		if (!payload_kiov)
-			rc = kiblnd_setup_rd_iov(ni, tx, tx->tx_rd,
-						 payload_niov, payload_iov,
-						 payload_offset, payload_nob);
-		else
-			rc = kiblnd_setup_rd_kiov(ni, tx, tx->tx_rd,
-						  payload_niov, payload_kiov,
-						  payload_offset, payload_nob);
-		if (rc) {
-			CERROR("Can't setup PUT src for %s: %d\n",
-			       libcfs_nid2str(target.nid), rc);
-			kiblnd_tx_done(ni, tx);
-			return -EIO;
-		}
-
-		ibmsg = tx->tx_msg;
-		ibmsg->ibm_u.putreq.ibprm_hdr = *hdr;
-		ibmsg->ibm_u.putreq.ibprm_cookie = tx->tx_cookie;
-		kiblnd_init_tx_msg(ni, tx, IBLND_MSG_PUT_REQ, sizeof(struct kib_putreq_msg));
-
-		tx->tx_lntmsg[0] = lntmsg;      /* finalise lntmsg on completion */
-		tx->tx_waiting = 1;	     /* waiting for PUT_{ACK,NAK} */
-		kiblnd_launch_tx(ni, tx, target.nid);
-		return 0;
-	}
-
-	/* send IMMEDIATE */
-
-	LASSERT(offsetof(struct kib_msg, ibm_u.immediate.ibim_payload[payload_nob])
-		 <= IBLND_MSG_SIZE);
-
-	tx = kiblnd_get_idle_tx(ni, target.nid);
-	if (!tx) {
-		CERROR("Can't send %d to %s: tx descs exhausted\n",
-		       type, libcfs_nid2str(target.nid));
-		return -ENOMEM;
-	}
-
-	ibmsg = tx->tx_msg;
-	ibmsg->ibm_u.immediate.ibim_hdr = *hdr;
-
-	rc = copy_from_iter(&ibmsg->ibm_u.immediate.ibim_payload, payload_nob,
-			    &from);
-	if (rc != payload_nob) {
-		kiblnd_pool_free_node(&tx->tx_pool->tpo_pool, &tx->tx_list);
-		return -EFAULT;
-	}
-
-	nob = offsetof(struct kib_immediate_msg, ibim_payload[payload_nob]);
-	kiblnd_init_tx_msg(ni, tx, IBLND_MSG_IMMEDIATE, nob);
-
-	tx->tx_lntmsg[0] = lntmsg;	      /* finalise lntmsg on completion */
-	kiblnd_launch_tx(ni, tx, target.nid);
-	return 0;
-}
-
-static void
-kiblnd_reply(struct lnet_ni *ni, struct kib_rx *rx, struct lnet_msg *lntmsg)
-{
-	struct lnet_process_id target = lntmsg->msg_target;
-	unsigned int niov = lntmsg->msg_niov;
-	struct kvec *iov = lntmsg->msg_iov;
-	struct bio_vec *kiov = lntmsg->msg_kiov;
-	unsigned int offset = lntmsg->msg_offset;
-	unsigned int nob = lntmsg->msg_len;
-	struct kib_tx *tx;
-	int rc;
-
-	tx = kiblnd_get_idle_tx(ni, rx->rx_conn->ibc_peer->ibp_nid);
-	if (!tx) {
-		CERROR("Can't get tx for REPLY to %s\n",
-		       libcfs_nid2str(target.nid));
-		goto failed_0;
-	}
-
-	if (!nob)
-		rc = 0;
-	else if (!kiov)
-		rc = kiblnd_setup_rd_iov(ni, tx, tx->tx_rd,
-					 niov, iov, offset, nob);
-	else
-		rc = kiblnd_setup_rd_kiov(ni, tx, tx->tx_rd,
-					  niov, kiov, offset, nob);
-
-	if (rc) {
-		CERROR("Can't setup GET src for %s: %d\n",
-		       libcfs_nid2str(target.nid), rc);
-		goto failed_1;
-	}
-
-	rc = kiblnd_init_rdma(rx->rx_conn, tx,
-			      IBLND_MSG_GET_DONE, nob,
-			      &rx->rx_msg->ibm_u.get.ibgm_rd,
-			      rx->rx_msg->ibm_u.get.ibgm_cookie);
-	if (rc < 0) {
-		CERROR("Can't setup rdma for GET from %s: %d\n",
-		       libcfs_nid2str(target.nid), rc);
-		goto failed_1;
-	}
-
-	if (!nob) {
-		/* No RDMA: local completion may happen now! */
-		lnet_finalize(ni, lntmsg, 0);
-	} else {
-		/* RDMA: lnet_finalize(lntmsg) when it completes */
-		tx->tx_lntmsg[0] = lntmsg;
-	}
-
-	kiblnd_queue_tx(tx, rx->rx_conn);
-	return;
-
- failed_1:
-	kiblnd_tx_done(ni, tx);
- failed_0:
-	lnet_finalize(ni, lntmsg, -EIO);
-}
-
-int
-kiblnd_recv(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg,
-	    int delayed, struct iov_iter *to, unsigned int rlen)
-{
-	struct kib_rx *rx = private;
-	struct kib_msg *rxmsg = rx->rx_msg;
-	struct kib_conn *conn = rx->rx_conn;
-	struct kib_tx *tx;
-	int nob;
-	int post_credit = IBLND_POSTRX_PEER_CREDIT;
-	int rc = 0;
-
-	LASSERT(iov_iter_count(to) <= rlen);
-	LASSERT(!in_interrupt());
-	/* Either all pages or all vaddrs */
-
-	switch (rxmsg->ibm_type) {
-	default:
-		LBUG();
-
-	case IBLND_MSG_IMMEDIATE:
-		nob = offsetof(struct kib_msg, ibm_u.immediate.ibim_payload[rlen]);
-		if (nob > rx->rx_nob) {
-			CERROR("Immediate message from %s too big: %d(%d)\n",
-			       libcfs_nid2str(rxmsg->ibm_u.immediate.ibim_hdr.src_nid),
-			       nob, rx->rx_nob);
-			rc = -EPROTO;
-			break;
-		}
-
-		rc = copy_to_iter(&rxmsg->ibm_u.immediate.ibim_payload, rlen,
-				  to);
-		if (rc != rlen) {
-			rc = -EFAULT;
-			break;
-		}
-
-		rc = 0;
-		lnet_finalize(ni, lntmsg, 0);
-		break;
-
-	case IBLND_MSG_PUT_REQ: {
-		struct kib_msg	*txmsg;
-		struct kib_rdma_desc *rd;
-
-		if (!iov_iter_count(to)) {
-			lnet_finalize(ni, lntmsg, 0);
-			kiblnd_send_completion(rx->rx_conn, IBLND_MSG_PUT_NAK, 0,
-					       rxmsg->ibm_u.putreq.ibprm_cookie);
-			break;
-		}
-
-		tx = kiblnd_get_idle_tx(ni, conn->ibc_peer->ibp_nid);
-		if (!tx) {
-			CERROR("Can't allocate tx for %s\n",
-			       libcfs_nid2str(conn->ibc_peer->ibp_nid));
-			/* Not replying will break the connection */
-			rc = -ENOMEM;
-			break;
-		}
-
-		txmsg = tx->tx_msg;
-		rd = &txmsg->ibm_u.putack.ibpam_rd;
-		if (!(to->type & ITER_BVEC))
-			rc = kiblnd_setup_rd_iov(ni, tx, rd,
-						 to->nr_segs, to->kvec,
-						 to->iov_offset,
-						 iov_iter_count(to));
-		else
-			rc = kiblnd_setup_rd_kiov(ni, tx, rd,
-						  to->nr_segs, to->bvec,
-						  to->iov_offset,
-						  iov_iter_count(to));
-		if (rc) {
-			CERROR("Can't setup PUT sink for %s: %d\n",
-			       libcfs_nid2str(conn->ibc_peer->ibp_nid), rc);
-			kiblnd_tx_done(ni, tx);
-			/* tell peer it's over */
-			kiblnd_send_completion(rx->rx_conn, IBLND_MSG_PUT_NAK, rc,
-					       rxmsg->ibm_u.putreq.ibprm_cookie);
-			break;
-		}
-
-		nob = offsetof(struct kib_putack_msg, ibpam_rd.rd_frags[rd->rd_nfrags]);
-		txmsg->ibm_u.putack.ibpam_src_cookie = rxmsg->ibm_u.putreq.ibprm_cookie;
-		txmsg->ibm_u.putack.ibpam_dst_cookie = tx->tx_cookie;
-
-		kiblnd_init_tx_msg(ni, tx, IBLND_MSG_PUT_ACK, nob);
-
-		tx->tx_lntmsg[0] = lntmsg;      /* finalise lntmsg on completion */
-		tx->tx_waiting = 1;	     /* waiting for PUT_DONE */
-		kiblnd_queue_tx(tx, conn);
-
-		/* reposted buffer reserved for PUT_DONE */
-		post_credit = IBLND_POSTRX_NO_CREDIT;
-		break;
-		}
-
-	case IBLND_MSG_GET_REQ:
-		if (lntmsg) {
-			/* Optimized GET; RDMA lntmsg's payload */
-			kiblnd_reply(ni, rx, lntmsg);
-		} else {
-			/* GET didn't match anything */
-			kiblnd_send_completion(rx->rx_conn, IBLND_MSG_GET_DONE,
-					       -ENODATA,
-					       rxmsg->ibm_u.get.ibgm_cookie);
-		}
-		break;
-	}
-
-	kiblnd_post_rx(rx, post_credit);
-	return rc;
-}
-
-int
-kiblnd_thread_start(int (*fn)(void *arg), void *arg, char *name)
-{
-	struct task_struct *task = kthread_run(fn, arg, "%s", name);
-
-	if (IS_ERR(task))
-		return PTR_ERR(task);
-
-	atomic_inc(&kiblnd_data.kib_nthreads);
-	return 0;
-}
-
-static void
-kiblnd_thread_fini(void)
-{
-	atomic_dec(&kiblnd_data.kib_nthreads);
-}
-
-static void
-kiblnd_peer_alive(struct kib_peer *peer)
-{
-	/* This is racy, but everyone's only writing jiffies */
-	peer->ibp_last_alive = jiffies;
-	mb();
-}
-
-static void
-kiblnd_peer_notify(struct kib_peer *peer)
-{
-	int error = 0;
-	unsigned long last_alive = 0;
-	unsigned long flags;
-
-	read_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
-
-	if (kiblnd_peer_idle(peer) && peer->ibp_error) {
-		error = peer->ibp_error;
-		peer->ibp_error = 0;
-
-		last_alive = peer->ibp_last_alive;
-	}
-
-	read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
-
-	if (error)
-		lnet_notify(peer->ibp_ni,
-			    peer->ibp_nid, 0, last_alive);
-}
-
-void
-kiblnd_close_conn_locked(struct kib_conn *conn, int error)
-{
-	/*
-	 * This just does the immediate housekeeping. 'error' is zero for a
-	 * normal shutdown which can happen only after the connection has been
-	 * established.  If the connection is established, schedule the
-	 * connection to be finished off by the connd. Otherwise the connd is
-	 * already dealing with it (either to set it up or tear it down).
-	 * Caller holds kib_global_lock exclusively in irq context
-	 */
-	struct kib_peer *peer = conn->ibc_peer;
-	struct kib_dev *dev;
-	unsigned long flags;
-
-	LASSERT(error || conn->ibc_state >= IBLND_CONN_ESTABLISHED);
-
-	if (error && !conn->ibc_comms_error)
-		conn->ibc_comms_error = error;
-
-	if (conn->ibc_state != IBLND_CONN_ESTABLISHED)
-		return; /* already being handled  */
-
-	if (!error &&
-	    list_empty(&conn->ibc_tx_noops) &&
-	    list_empty(&conn->ibc_tx_queue) &&
-	    list_empty(&conn->ibc_tx_queue_rsrvd) &&
-	    list_empty(&conn->ibc_tx_queue_nocred) &&
-	    list_empty(&conn->ibc_active_txs)) {
-		CDEBUG(D_NET, "closing conn to %s\n",
-		       libcfs_nid2str(peer->ibp_nid));
-	} else {
-		CNETERR("Closing conn to %s: error %d%s%s%s%s%s\n",
-			libcfs_nid2str(peer->ibp_nid), error,
-			list_empty(&conn->ibc_tx_queue) ? "" : "(sending)",
-			list_empty(&conn->ibc_tx_noops) ? "" : "(sending_noops)",
-			list_empty(&conn->ibc_tx_queue_rsrvd) ? "" : "(sending_rsrvd)",
-			list_empty(&conn->ibc_tx_queue_nocred) ? "" : "(sending_nocred)",
-			list_empty(&conn->ibc_active_txs) ? "" : "(waiting)");
-	}
-
-	dev = ((struct kib_net *)peer->ibp_ni->ni_data)->ibn_dev;
-	if (peer->ibp_next_conn == conn)
-		/* clear next_conn so it won't be used */
-		peer->ibp_next_conn = NULL;
-	list_del(&conn->ibc_list);
-	/* connd (see below) takes over ibc_list's ref */
-
-	if (list_empty(&peer->ibp_conns) &&    /* no more conns */
-	    kiblnd_peer_active(peer)) {	 /* still in peer table */
-		kiblnd_unlink_peer_locked(peer);
-
-		/* set/clear error on last conn */
-		peer->ibp_error = conn->ibc_comms_error;
-	}
-
-	kiblnd_set_conn_state(conn, IBLND_CONN_CLOSING);
-
-	if (error &&
-	    kiblnd_dev_can_failover(dev)) {
-		list_add_tail(&dev->ibd_fail_list,
-			      &kiblnd_data.kib_failed_devs);
-		wake_up(&kiblnd_data.kib_failover_waitq);
-	}
-
-	spin_lock_irqsave(&kiblnd_data.kib_connd_lock, flags);
-
-	list_add_tail(&conn->ibc_list, &kiblnd_data.kib_connd_conns);
-	wake_up(&kiblnd_data.kib_connd_waitq);
-
-	spin_unlock_irqrestore(&kiblnd_data.kib_connd_lock, flags);
-}
-
-void
-kiblnd_close_conn(struct kib_conn *conn, int error)
-{
-	unsigned long flags;
-
-	write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
-
-	kiblnd_close_conn_locked(conn, error);
-
-	write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
-}
-
-static void
-kiblnd_handle_early_rxs(struct kib_conn *conn)
-{
-	unsigned long flags;
-	struct kib_rx *rx;
-
-	LASSERT(!in_interrupt());
-	LASSERT(conn->ibc_state >= IBLND_CONN_ESTABLISHED);
-
-	write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
-	while (!list_empty(&conn->ibc_early_rxs)) {
-		rx = list_entry(conn->ibc_early_rxs.next,
-				struct kib_rx, rx_list);
-		list_del(&rx->rx_list);
-		write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
-
-		kiblnd_handle_rx(rx);
-
-		write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
-	}
-	write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
-}
-
-static void
-kiblnd_abort_txs(struct kib_conn *conn, struct list_head *txs)
-{
-	LIST_HEAD(zombies);
-	struct list_head *tmp;
-	struct list_head *nxt;
-	struct kib_tx *tx;
-
-	spin_lock(&conn->ibc_lock);
-
-	list_for_each_safe(tmp, nxt, txs) {
-		tx = list_entry(tmp, struct kib_tx, tx_list);
-
-		if (txs == &conn->ibc_active_txs) {
-			LASSERT(!tx->tx_queued);
-			LASSERT(tx->tx_waiting || tx->tx_sending);
-		} else {
-			LASSERT(tx->tx_queued);
-		}
-
-		tx->tx_status = -ECONNABORTED;
-		tx->tx_waiting = 0;
-
-		if (!tx->tx_sending) {
-			tx->tx_queued = 0;
-			list_del(&tx->tx_list);
-			list_add(&tx->tx_list, &zombies);
-		}
-	}
-
-	spin_unlock(&conn->ibc_lock);
-
-	kiblnd_txlist_done(conn->ibc_peer->ibp_ni, &zombies, -ECONNABORTED);
-}
-
-static void
-kiblnd_finalise_conn(struct kib_conn *conn)
-{
-	LASSERT(!in_interrupt());
-	LASSERT(conn->ibc_state > IBLND_CONN_INIT);
-
-	kiblnd_set_conn_state(conn, IBLND_CONN_DISCONNECTED);
-
-	/*
-	 * abort_receives moves QP state to IB_QPS_ERR.  This is only required
-	 * for connections that didn't get as far as being connected, because
-	 * rdma_disconnect() does this for free.
-	 */
-	kiblnd_abort_receives(conn);
-
-	/*
-	 * Complete all tx descs not waiting for sends to complete.
-	 * NB we should be safe from RDMA now that the QP has changed state
-	 */
-	kiblnd_abort_txs(conn, &conn->ibc_tx_noops);
-	kiblnd_abort_txs(conn, &conn->ibc_tx_queue);
-	kiblnd_abort_txs(conn, &conn->ibc_tx_queue_rsrvd);
-	kiblnd_abort_txs(conn, &conn->ibc_tx_queue_nocred);
-	kiblnd_abort_txs(conn, &conn->ibc_active_txs);
-
-	kiblnd_handle_early_rxs(conn);
-}
-
-static void
-kiblnd_peer_connect_failed(struct kib_peer *peer, int active, int error)
-{
-	LIST_HEAD(zombies);
-	unsigned long flags;
-
-	LASSERT(error);
-	LASSERT(!in_interrupt());
-
-	write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
-
-	if (active) {
-		LASSERT(peer->ibp_connecting > 0);
-		peer->ibp_connecting--;
-	} else {
-		LASSERT(peer->ibp_accepting > 0);
-		peer->ibp_accepting--;
-	}
-
-	if (kiblnd_peer_connecting(peer)) {
-		/* another connection attempt under way... */
-		write_unlock_irqrestore(&kiblnd_data.kib_global_lock,
-					flags);
-		return;
-	}
-
-	peer->ibp_reconnected = 0;
-	if (list_empty(&peer->ibp_conns)) {
-		/* Take peer's blocked transmits to complete with error */
-		list_add(&zombies, &peer->ibp_tx_queue);
-		list_del_init(&peer->ibp_tx_queue);
-
-		if (kiblnd_peer_active(peer))
-			kiblnd_unlink_peer_locked(peer);
-
-		peer->ibp_error = error;
-	} else {
-		/* Can't have blocked transmits if there are connections */
-		LASSERT(list_empty(&peer->ibp_tx_queue));
-	}
-
-	write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
-
-	kiblnd_peer_notify(peer);
-
-	if (list_empty(&zombies))
-		return;
-
-	CNETERR("Deleting messages for %s: connection failed\n",
-		libcfs_nid2str(peer->ibp_nid));
-
-	kiblnd_txlist_done(peer->ibp_ni, &zombies, -EHOSTUNREACH);
-}
-
-static void
-kiblnd_connreq_done(struct kib_conn *conn, int status)
-{
-	struct kib_peer *peer = conn->ibc_peer;
-	struct kib_tx *tx;
-	struct kib_tx *tmp;
-	struct list_head txs;
-	unsigned long flags;
-	int active;
-
-	active = (conn->ibc_state == IBLND_CONN_ACTIVE_CONNECT);
-
-	CDEBUG(D_NET, "%s: active(%d), version(%x), status(%d)\n",
-	       libcfs_nid2str(peer->ibp_nid), active,
-	       conn->ibc_version, status);
-
-	LASSERT(!in_interrupt());
-	LASSERT((conn->ibc_state == IBLND_CONN_ACTIVE_CONNECT &&
-		 peer->ibp_connecting > 0) ||
-		 (conn->ibc_state == IBLND_CONN_PASSIVE_WAIT &&
-		 peer->ibp_accepting > 0));
-
-	kfree(conn->ibc_connvars);
-	conn->ibc_connvars = NULL;
-
-	if (status) {
-		/* failed to establish connection */
-		kiblnd_peer_connect_failed(peer, active, status);
-		kiblnd_finalise_conn(conn);
-		return;
-	}
-
-	/* connection established */
-	write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
-
-	conn->ibc_last_send = jiffies;
-	kiblnd_set_conn_state(conn, IBLND_CONN_ESTABLISHED);
-	kiblnd_peer_alive(peer);
-
-	/*
-	 * Add conn to peer's list and nuke any dangling conns from a different
-	 * peer instance...
-	 */
-	kiblnd_conn_addref(conn);	       /* +1 ref for ibc_list */
-	list_add(&conn->ibc_list, &peer->ibp_conns);
-	peer->ibp_reconnected = 0;
-	if (active)
-		peer->ibp_connecting--;
-	else
-		peer->ibp_accepting--;
-
-	if (!peer->ibp_version) {
-		peer->ibp_version     = conn->ibc_version;
-		peer->ibp_incarnation = conn->ibc_incarnation;
-	}
-
-	if (peer->ibp_version     != conn->ibc_version ||
-	    peer->ibp_incarnation != conn->ibc_incarnation) {
-		kiblnd_close_stale_conns_locked(peer, conn->ibc_version,
-						conn->ibc_incarnation);
-		peer->ibp_version     = conn->ibc_version;
-		peer->ibp_incarnation = conn->ibc_incarnation;
-	}
-
-	/* grab pending txs while I have the lock */
-	list_add(&txs, &peer->ibp_tx_queue);
-	list_del_init(&peer->ibp_tx_queue);
-
-	if (!kiblnd_peer_active(peer) ||	/* peer has been deleted */
-	    conn->ibc_comms_error) {       /* error has happened already */
-		struct lnet_ni *ni = peer->ibp_ni;
-
-		/* start to shut down connection */
-		kiblnd_close_conn_locked(conn, -ECONNABORTED);
-		write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
-
-		kiblnd_txlist_done(ni, &txs, -ECONNABORTED);
-
-		return;
-	}
-
-	/*
-	 * +1 ref for myself, this connection is visible to other threads
-	 * now, refcount of peer:ibp_conns can be released by connection
-	 * close from either a different thread, or the calling of
-	 * kiblnd_check_sends_locked() below. See bz21911 for details.
-	 */
-	kiblnd_conn_addref(conn);
-	write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
-
-	/* Schedule blocked txs
-	 * Note: if we are running with conns_per_peer > 1, these blocked
-	 * txs will all get scheduled to the first connection which gets
-	 * scheduled.  We won't be using round robin on this first batch.
-	 */
-	spin_lock(&conn->ibc_lock);
-	list_for_each_entry_safe(tx, tmp, &txs, tx_list) {
-		list_del(&tx->tx_list);
-
-		kiblnd_queue_tx_locked(tx, conn);
-	}
-	kiblnd_check_sends_locked(conn);
-	spin_unlock(&conn->ibc_lock);
-
-	/* schedule blocked rxs */
-	kiblnd_handle_early_rxs(conn);
-
-	kiblnd_conn_decref(conn);
-}
-
-static void
-kiblnd_reject(struct rdma_cm_id *cmid, struct kib_rej *rej)
-{
-	int rc;
-
-	rc = rdma_reject(cmid, rej, sizeof(*rej));
-
-	if (rc)
-		CWARN("Error %d sending reject\n", rc);
-}
-
-static int
-kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob)
-{
-	rwlock_t *g_lock = &kiblnd_data.kib_global_lock;
-	struct kib_msg *reqmsg = priv;
-	struct kib_msg *ackmsg;
-	struct kib_dev *ibdev;
-	struct kib_peer *peer;
-	struct kib_peer *peer2;
-	struct kib_conn *conn;
-	struct lnet_ni *ni  = NULL;
-	struct kib_net *net = NULL;
-	lnet_nid_t nid;
-	struct rdma_conn_param cp;
-	struct kib_rej rej;
-	int version = IBLND_MSG_VERSION;
-	unsigned long flags;
-	int max_frags;
-	int rc;
-	struct sockaddr_in *peer_addr;
-
-	LASSERT(!in_interrupt());
-
-	/* cmid inherits 'context' from the corresponding listener id */
-	ibdev = (struct kib_dev *)cmid->context;
-	LASSERT(ibdev);
-
-	memset(&rej, 0, sizeof(rej));
-	rej.ibr_magic = IBLND_MSG_MAGIC;
-	rej.ibr_why = IBLND_REJECT_FATAL;
-	rej.ibr_cp.ibcp_max_msg_size = IBLND_MSG_SIZE;
-
-	peer_addr = (struct sockaddr_in *)&cmid->route.addr.dst_addr;
-	if (*kiblnd_tunables.kib_require_priv_port &&
-	    ntohs(peer_addr->sin_port) >= PROT_SOCK) {
-		__u32 ip = ntohl(peer_addr->sin_addr.s_addr);
-
-		CERROR("Peer's port (%pI4h:%hu) is not privileged\n",
-		       &ip, ntohs(peer_addr->sin_port));
-		goto failed;
-	}
-
-	if (priv_nob < offsetof(struct kib_msg, ibm_type)) {
-		CERROR("Short connection request\n");
-		goto failed;
-	}
-
-	/*
-	 * Future protocol version compatibility support!  If the
-	 * o2iblnd-specific protocol changes, or when LNET unifies
-	 * protocols over all LNDs, the initial connection will
-	 * negotiate a protocol version.  I trap this here to avoid
-	 * console errors; the reject tells the peer which protocol I
-	 * speak.
-	 */
-	if (reqmsg->ibm_magic == LNET_PROTO_MAGIC ||
-	    reqmsg->ibm_magic == __swab32(LNET_PROTO_MAGIC))
-		goto failed;
-	if (reqmsg->ibm_magic == IBLND_MSG_MAGIC &&
-	    reqmsg->ibm_version != IBLND_MSG_VERSION &&
-	    reqmsg->ibm_version != IBLND_MSG_VERSION_1)
-		goto failed;
-	if (reqmsg->ibm_magic == __swab32(IBLND_MSG_MAGIC) &&
-	    reqmsg->ibm_version != __swab16(IBLND_MSG_VERSION) &&
-	    reqmsg->ibm_version != __swab16(IBLND_MSG_VERSION_1))
-		goto failed;
-
-	rc = kiblnd_unpack_msg(reqmsg, priv_nob);
-	if (rc) {
-		CERROR("Can't parse connection request: %d\n", rc);
-		goto failed;
-	}
-
-	nid = reqmsg->ibm_srcnid;
-	ni = lnet_net2ni(LNET_NIDNET(reqmsg->ibm_dstnid));
-
-	if (ni) {
-		net = (struct kib_net *)ni->ni_data;
-		rej.ibr_incarnation = net->ibn_incarnation;
-	}
-
-	if (!ni ||			 /* no matching net */
-	    ni->ni_nid != reqmsg->ibm_dstnid ||   /* right NET, wrong NID! */
-	    net->ibn_dev != ibdev) {	      /* wrong device */
-		CERROR("Can't accept conn from %s on %s (%s:%d:%pI4h): bad dst nid %s\n",
-		       libcfs_nid2str(nid),
-		       !ni ? "NA" : libcfs_nid2str(ni->ni_nid),
-		       ibdev->ibd_ifname, ibdev->ibd_nnets,
-		       &ibdev->ibd_ifip,
-		       libcfs_nid2str(reqmsg->ibm_dstnid));
-
-		goto failed;
-	}
-
-       /* check time stamp as soon as possible */
-	if (reqmsg->ibm_dststamp &&
-	    reqmsg->ibm_dststamp != net->ibn_incarnation) {
-		CWARN("Stale connection request\n");
-		rej.ibr_why = IBLND_REJECT_CONN_STALE;
-		goto failed;
-	}
-
-	/* I can accept peer's version */
-	version = reqmsg->ibm_version;
-
-	if (reqmsg->ibm_type != IBLND_MSG_CONNREQ) {
-		CERROR("Unexpected connreq msg type: %x from %s\n",
-		       reqmsg->ibm_type, libcfs_nid2str(nid));
-		goto failed;
-	}
-
-	if (reqmsg->ibm_u.connparams.ibcp_queue_depth >
-	    kiblnd_msg_queue_size(version, ni)) {
-		CERROR("Can't accept conn from %s, queue depth too large: %d (<=%d wanted)\n",
-		       libcfs_nid2str(nid),
-		       reqmsg->ibm_u.connparams.ibcp_queue_depth,
-		       kiblnd_msg_queue_size(version, ni));
-
-		if (version == IBLND_MSG_VERSION)
-			rej.ibr_why = IBLND_REJECT_MSG_QUEUE_SIZE;
-
-		goto failed;
-	}
-
-	max_frags = reqmsg->ibm_u.connparams.ibcp_max_frags >> IBLND_FRAG_SHIFT;
-	if (max_frags > kiblnd_rdma_frags(version, ni)) {
-		CWARN("Can't accept conn from %s (version %x): max message size %d is too large (%d wanted)\n",
-		      libcfs_nid2str(nid), version, max_frags,
-		      kiblnd_rdma_frags(version, ni));
-
-		if (version >= IBLND_MSG_VERSION)
-			rej.ibr_why = IBLND_REJECT_RDMA_FRAGS;
-
-		goto failed;
-	} else if (max_frags < kiblnd_rdma_frags(version, ni) &&
-		   !net->ibn_fmr_ps) {
-		CWARN("Can't accept conn from %s (version %x): max message size %d incompatible without FMR pool (%d wanted)\n",
-		      libcfs_nid2str(nid), version, max_frags,
-		      kiblnd_rdma_frags(version, ni));
-
-		if (version == IBLND_MSG_VERSION)
-			rej.ibr_why = IBLND_REJECT_RDMA_FRAGS;
-
-		goto failed;
-	}
-
-	if (reqmsg->ibm_u.connparams.ibcp_max_msg_size > IBLND_MSG_SIZE) {
-		CERROR("Can't accept %s: message size %d too big (%d max)\n",
-		       libcfs_nid2str(nid),
-		       reqmsg->ibm_u.connparams.ibcp_max_msg_size,
-		       IBLND_MSG_SIZE);
-		goto failed;
-	}
-
-	/* assume 'nid' is a new peer; create  */
-	rc = kiblnd_create_peer(ni, &peer, nid);
-	if (rc) {
-		CERROR("Can't create peer for %s\n", libcfs_nid2str(nid));
-		rej.ibr_why = IBLND_REJECT_NO_RESOURCES;
-		goto failed;
-	}
-
-	/* We have validated the peer's parameters so use those */
-	peer->ibp_max_frags = max_frags;
-	peer->ibp_queue_depth = reqmsg->ibm_u.connparams.ibcp_queue_depth;
-
-	write_lock_irqsave(g_lock, flags);
-
-	peer2 = kiblnd_find_peer_locked(nid);
-	if (peer2) {
-		if (!peer2->ibp_version) {
-			peer2->ibp_version     = version;
-			peer2->ibp_incarnation = reqmsg->ibm_srcstamp;
-		}
-
-		/* not the guy I've talked with */
-		if (peer2->ibp_incarnation != reqmsg->ibm_srcstamp ||
-		    peer2->ibp_version     != version) {
-			kiblnd_close_peer_conns_locked(peer2, -ESTALE);
-
-			if (kiblnd_peer_active(peer2)) {
-				peer2->ibp_incarnation = reqmsg->ibm_srcstamp;
-				peer2->ibp_version = version;
-			}
-			write_unlock_irqrestore(g_lock, flags);
-
-			CWARN("Conn stale %s version %x/%x incarnation %llu/%llu\n",
-			      libcfs_nid2str(nid), peer2->ibp_version, version,
-			      peer2->ibp_incarnation, reqmsg->ibm_srcstamp);
-
-			kiblnd_peer_decref(peer);
-			rej.ibr_why = IBLND_REJECT_CONN_STALE;
-			goto failed;
-		}
-
-		/*
-		 * Tie-break connection race in favour of the higher NID.
-		 * If we keep running into a race condition multiple times,
-		 * we have to assume that the connection attempt with the
-		 * higher NID is stuck in a connecting state and will never
-		 * recover.  As such, we pass through this if-block and let
-		 * the lower NID connection win so we can move forward.
-		 */
-		if (peer2->ibp_connecting &&
-		    nid < ni->ni_nid && peer2->ibp_races <
-		    MAX_CONN_RACES_BEFORE_ABORT) {
-			peer2->ibp_races++;
-			write_unlock_irqrestore(g_lock, flags);
-
-			CDEBUG(D_NET, "Conn race %s\n",
-			       libcfs_nid2str(peer2->ibp_nid));
-
-			kiblnd_peer_decref(peer);
-			rej.ibr_why = IBLND_REJECT_CONN_RACE;
-			goto failed;
-		}
-		if (peer2->ibp_races >= MAX_CONN_RACES_BEFORE_ABORT)
-			CNETERR("Conn race %s: unresolved after %d attempts, letting lower NID win\n",
-				libcfs_nid2str(peer2->ibp_nid),
-				MAX_CONN_RACES_BEFORE_ABORT);
-		/**
-		 * passive connection is allowed even this peer is waiting for
-		 * reconnection.
-		 */
-		peer2->ibp_reconnecting = 0;
-		peer2->ibp_races = 0;
-		peer2->ibp_accepting++;
-		kiblnd_peer_addref(peer2);
-
-		/**
-		 * Race with kiblnd_launch_tx (active connect) to create peer
-		 * so copy validated parameters since we now know what the
-		 * peer's limits are
-		 */
-		peer2->ibp_max_frags = peer->ibp_max_frags;
-		peer2->ibp_queue_depth = peer->ibp_queue_depth;
-
-		write_unlock_irqrestore(g_lock, flags);
-		kiblnd_peer_decref(peer);
-		peer = peer2;
-	} else {
-		/* Brand new peer */
-		LASSERT(!peer->ibp_accepting);
-		LASSERT(!peer->ibp_version &&
-			!peer->ibp_incarnation);
-
-		peer->ibp_accepting   = 1;
-		peer->ibp_version     = version;
-		peer->ibp_incarnation = reqmsg->ibm_srcstamp;
-
-		/* I have a ref on ni that prevents it being shutdown */
-		LASSERT(!net->ibn_shutdown);
-
-		kiblnd_peer_addref(peer);
-		list_add_tail(&peer->ibp_list, kiblnd_nid2peerlist(nid));
-
-		write_unlock_irqrestore(g_lock, flags);
-	}
-
-	conn = kiblnd_create_conn(peer, cmid, IBLND_CONN_PASSIVE_WAIT,
-				  version);
-	if (!conn) {
-		kiblnd_peer_connect_failed(peer, 0, -ENOMEM);
-		kiblnd_peer_decref(peer);
-		rej.ibr_why = IBLND_REJECT_NO_RESOURCES;
-		goto failed;
-	}
-
-	/*
-	 * conn now "owns" cmid, so I return success from here on to ensure the
-	 * CM callback doesn't destroy cmid.
-	 */
-	conn->ibc_incarnation      = reqmsg->ibm_srcstamp;
-	conn->ibc_credits          = conn->ibc_queue_depth;
-	conn->ibc_reserved_credits = conn->ibc_queue_depth;
-	LASSERT(conn->ibc_credits + conn->ibc_reserved_credits +
-		IBLND_OOB_MSGS(version) <= IBLND_RX_MSGS(conn));
-
-	ackmsg = &conn->ibc_connvars->cv_msg;
-	memset(ackmsg, 0, sizeof(*ackmsg));
-
-	kiblnd_init_msg(ackmsg, IBLND_MSG_CONNACK,
-			sizeof(ackmsg->ibm_u.connparams));
-	ackmsg->ibm_u.connparams.ibcp_queue_depth = conn->ibc_queue_depth;
-	ackmsg->ibm_u.connparams.ibcp_max_frags = conn->ibc_max_frags << IBLND_FRAG_SHIFT;
-	ackmsg->ibm_u.connparams.ibcp_max_msg_size = IBLND_MSG_SIZE;
-
-	kiblnd_pack_msg(ni, ackmsg, version, 0, nid, reqmsg->ibm_srcstamp);
-
-	memset(&cp, 0, sizeof(cp));
-	cp.private_data	= ackmsg;
-	cp.private_data_len = ackmsg->ibm_nob;
-	cp.responder_resources = 0;	     /* No atomic ops or RDMA reads */
-	cp.initiator_depth = 0;
-	cp.flow_control	= 1;
-	cp.retry_count = *kiblnd_tunables.kib_retry_count;
-	cp.rnr_retry_count = *kiblnd_tunables.kib_rnr_retry_count;
-
-	CDEBUG(D_NET, "Accept %s\n", libcfs_nid2str(nid));
-
-	rc = rdma_accept(cmid, &cp);
-	if (rc) {
-		CERROR("Can't accept %s: %d\n", libcfs_nid2str(nid), rc);
-		rej.ibr_version = version;
-		rej.ibr_why     = IBLND_REJECT_FATAL;
-
-		kiblnd_reject(cmid, &rej);
-		kiblnd_connreq_done(conn, rc);
-		kiblnd_conn_decref(conn);
-	}
-
-	lnet_ni_decref(ni);
-	return 0;
-
- failed:
-	if (ni) {
-		rej.ibr_cp.ibcp_queue_depth = kiblnd_msg_queue_size(version, ni);
-		rej.ibr_cp.ibcp_max_frags = kiblnd_rdma_frags(version, ni);
-		lnet_ni_decref(ni);
-	}
-
-	rej.ibr_version             = version;
-	kiblnd_reject(cmid, &rej);
-
-	return -ECONNREFUSED;
-}
-
-static void
-kiblnd_check_reconnect(struct kib_conn *conn, int version,
-		       __u64 incarnation, int why, struct kib_connparams *cp)
-{
-	rwlock_t *glock = &kiblnd_data.kib_global_lock;
-	struct kib_peer *peer = conn->ibc_peer;
-	char *reason;
-	int msg_size = IBLND_MSG_SIZE;
-	int frag_num = -1;
-	int queue_dep = -1;
-	bool reconnect;
-	unsigned long flags;
-
-	LASSERT(conn->ibc_state == IBLND_CONN_ACTIVE_CONNECT);
-	LASSERT(peer->ibp_connecting > 0);     /* 'conn' at least */
-
-	if (cp) {
-		msg_size = cp->ibcp_max_msg_size;
-		frag_num	= cp->ibcp_max_frags << IBLND_FRAG_SHIFT;
-		queue_dep = cp->ibcp_queue_depth;
-	}
-
-	write_lock_irqsave(glock, flags);
-	/**
-	 * retry connection if it's still needed and no other connection
-	 * attempts (active or passive) are in progress
-	 * NB: reconnect is still needed even when ibp_tx_queue is
-	 * empty if ibp_version != version because reconnect may be
-	 * initiated by kiblnd_query()
-	 */
-	reconnect = (!list_empty(&peer->ibp_tx_queue) ||
-		     peer->ibp_version != version) &&
-		    peer->ibp_connecting &&
-		    !peer->ibp_accepting;
-	if (!reconnect) {
-		reason = "no need";
-		goto out;
-	}
-
-	switch (why) {
-	default:
-		reason = "Unknown";
-		break;
-
-	case IBLND_REJECT_RDMA_FRAGS: {
-		struct lnet_ioctl_config_lnd_tunables *tunables;
-
-		if (!cp) {
-			reason = "can't negotiate max frags";
-			goto out;
-		}
-		tunables = peer->ibp_ni->ni_lnd_tunables;
-		if (!tunables->lt_tun_u.lt_o2ib.lnd_map_on_demand) {
-			reason = "map_on_demand must be enabled";
-			goto out;
-		}
-		if (conn->ibc_max_frags <= frag_num) {
-			reason = "unsupported max frags";
-			goto out;
-		}
-
-		peer->ibp_max_frags = frag_num;
-		reason = "rdma fragments";
-		break;
-	}
-	case IBLND_REJECT_MSG_QUEUE_SIZE:
-		if (!cp) {
-			reason = "can't negotiate queue depth";
-			goto out;
-		}
-		if (conn->ibc_queue_depth <= queue_dep) {
-			reason = "unsupported queue depth";
-			goto out;
-		}
-
-		peer->ibp_queue_depth = queue_dep;
-		reason = "queue depth";
-		break;
-
-	case IBLND_REJECT_CONN_STALE:
-		reason = "stale";
-		break;
-
-	case IBLND_REJECT_CONN_RACE:
-		reason = "conn race";
-		break;
-
-	case IBLND_REJECT_CONN_UNCOMPAT:
-		reason = "version negotiation";
-		break;
-	}
-
-	conn->ibc_reconnect = 1;
-	peer->ibp_reconnecting++;
-	peer->ibp_version = version;
-	if (incarnation)
-		peer->ibp_incarnation = incarnation;
-out:
-	write_unlock_irqrestore(glock, flags);
-
-	CNETERR("%s: %s (%s), %x, %x, msg_size: %d, queue_depth: %d/%d, max_frags: %d/%d\n",
-		libcfs_nid2str(peer->ibp_nid),
-		reconnect ? "reconnect" : "don't reconnect",
-		reason, IBLND_MSG_VERSION, version, msg_size,
-		conn->ibc_queue_depth, queue_dep,
-		conn->ibc_max_frags, frag_num);
-	/**
-	 * if conn::ibc_reconnect is TRUE, connd will reconnect to the peer
-	 * while destroying the zombie
-	 */
-}
-
-static void
-kiblnd_rejected(struct kib_conn *conn, int reason, void *priv, int priv_nob)
-{
-	struct kib_peer *peer = conn->ibc_peer;
-
-	LASSERT(!in_interrupt());
-	LASSERT(conn->ibc_state == IBLND_CONN_ACTIVE_CONNECT);
-
-	switch (reason) {
-	case IB_CM_REJ_STALE_CONN:
-		kiblnd_check_reconnect(conn, IBLND_MSG_VERSION, 0,
-				       IBLND_REJECT_CONN_STALE, NULL);
-		break;
-
-	case IB_CM_REJ_INVALID_SERVICE_ID:
-		CNETERR("%s rejected: no listener at %d\n",
-			libcfs_nid2str(peer->ibp_nid),
-			*kiblnd_tunables.kib_service);
-		break;
-
-	case IB_CM_REJ_CONSUMER_DEFINED:
-		if (priv_nob >= offsetof(struct kib_rej, ibr_padding)) {
-			struct kib_rej *rej = priv;
-			struct kib_connparams *cp = NULL;
-			int flip = 0;
-			__u64 incarnation = -1;
-
-			/* NB. default incarnation is -1 because:
-			 * a) V1 will ignore dst incarnation in connreq.
-			 * b) V2 will provide incarnation while rejecting me,
-			 *    -1 will be overwrote.
-			 *
-			 * if I try to connect to a V1 peer with V2 protocol,
-			 * it rejected me then upgrade to V2, I have no idea
-			 * about the upgrading and try to reconnect with V1,
-			 * in this case upgraded V2 can find out I'm trying to
-			 * talk to the old guy and reject me(incarnation is -1).
-			 */
-
-			if (rej->ibr_magic == __swab32(IBLND_MSG_MAGIC) ||
-			    rej->ibr_magic == __swab32(LNET_PROTO_MAGIC)) {
-				__swab32s(&rej->ibr_magic);
-				__swab16s(&rej->ibr_version);
-				flip = 1;
-			}
-
-			if (priv_nob >= sizeof(struct kib_rej) &&
-			    rej->ibr_version > IBLND_MSG_VERSION_1) {
-				/*
-				 * priv_nob is always 148 in current version
-				 * of OFED, so we still need to check version.
-				 * (define of IB_CM_REJ_PRIVATE_DATA_SIZE)
-				 */
-				cp = &rej->ibr_cp;
-
-				if (flip) {
-					__swab64s(&rej->ibr_incarnation);
-					__swab16s(&cp->ibcp_queue_depth);
-					__swab16s(&cp->ibcp_max_frags);
-					__swab32s(&cp->ibcp_max_msg_size);
-				}
-
-				incarnation = rej->ibr_incarnation;
-			}
-
-			if (rej->ibr_magic != IBLND_MSG_MAGIC &&
-			    rej->ibr_magic != LNET_PROTO_MAGIC) {
-				CERROR("%s rejected: consumer defined fatal error\n",
-				       libcfs_nid2str(peer->ibp_nid));
-				break;
-			}
-
-			if (rej->ibr_version != IBLND_MSG_VERSION &&
-			    rej->ibr_version != IBLND_MSG_VERSION_1) {
-				CERROR("%s rejected: o2iblnd version %x error\n",
-				       libcfs_nid2str(peer->ibp_nid),
-				       rej->ibr_version);
-				break;
-			}
-
-			if (rej->ibr_why     == IBLND_REJECT_FATAL &&
-			    rej->ibr_version == IBLND_MSG_VERSION_1) {
-				CDEBUG(D_NET, "rejected by old version peer %s: %x\n",
-				       libcfs_nid2str(peer->ibp_nid), rej->ibr_version);
-
-				if (conn->ibc_version != IBLND_MSG_VERSION_1)
-					rej->ibr_why = IBLND_REJECT_CONN_UNCOMPAT;
-			}
-
-			switch (rej->ibr_why) {
-			case IBLND_REJECT_CONN_RACE:
-			case IBLND_REJECT_CONN_STALE:
-			case IBLND_REJECT_CONN_UNCOMPAT:
-			case IBLND_REJECT_MSG_QUEUE_SIZE:
-			case IBLND_REJECT_RDMA_FRAGS:
-				kiblnd_check_reconnect(conn, rej->ibr_version,
-						       incarnation,
-						       rej->ibr_why, cp);
-				break;
-
-			case IBLND_REJECT_NO_RESOURCES:
-				CERROR("%s rejected: o2iblnd no resources\n",
-				       libcfs_nid2str(peer->ibp_nid));
-				break;
-
-			case IBLND_REJECT_FATAL:
-				CERROR("%s rejected: o2iblnd fatal error\n",
-				       libcfs_nid2str(peer->ibp_nid));
-				break;
-
-			default:
-				CERROR("%s rejected: o2iblnd reason %d\n",
-				       libcfs_nid2str(peer->ibp_nid),
-				       rej->ibr_why);
-				break;
-			}
-			break;
-		}
-		/* fall through */
-	default:
-		CNETERR("%s rejected: reason %d, size %d\n",
-			libcfs_nid2str(peer->ibp_nid), reason, priv_nob);
-		break;
-	}
-
-	kiblnd_connreq_done(conn, -ECONNREFUSED);
-}
-
-static void
-kiblnd_check_connreply(struct kib_conn *conn, void *priv, int priv_nob)
-{
-	struct kib_peer *peer = conn->ibc_peer;
-	struct lnet_ni *ni = peer->ibp_ni;
-	struct kib_net *net = ni->ni_data;
-	struct kib_msg *msg = priv;
-	int ver = conn->ibc_version;
-	int rc = kiblnd_unpack_msg(msg, priv_nob);
-	unsigned long flags;
-
-	LASSERT(net);
-
-	if (rc) {
-		CERROR("Can't unpack connack from %s: %d\n",
-		       libcfs_nid2str(peer->ibp_nid), rc);
-		goto failed;
-	}
-
-	if (msg->ibm_type != IBLND_MSG_CONNACK) {
-		CERROR("Unexpected message %d from %s\n",
-		       msg->ibm_type, libcfs_nid2str(peer->ibp_nid));
-		rc = -EPROTO;
-		goto failed;
-	}
-
-	if (ver != msg->ibm_version) {
-		CERROR("%s replied version %x is different with requested version %x\n",
-		       libcfs_nid2str(peer->ibp_nid), msg->ibm_version, ver);
-		rc = -EPROTO;
-		goto failed;
-	}
-
-	if (msg->ibm_u.connparams.ibcp_queue_depth >
-	    conn->ibc_queue_depth) {
-		CERROR("%s has incompatible queue depth %d (<=%d wanted)\n",
-		       libcfs_nid2str(peer->ibp_nid),
-		       msg->ibm_u.connparams.ibcp_queue_depth,
-		       conn->ibc_queue_depth);
-		rc = -EPROTO;
-		goto failed;
-	}
-
-	if ((msg->ibm_u.connparams.ibcp_max_frags >> IBLND_FRAG_SHIFT) >
-	    conn->ibc_max_frags) {
-		CERROR("%s has incompatible max_frags %d (<=%d wanted)\n",
-		       libcfs_nid2str(peer->ibp_nid),
-		       msg->ibm_u.connparams.ibcp_max_frags >> IBLND_FRAG_SHIFT,
-		       conn->ibc_max_frags);
-		rc = -EPROTO;
-		goto failed;
-	}
-
-	if (msg->ibm_u.connparams.ibcp_max_msg_size > IBLND_MSG_SIZE) {
-		CERROR("%s max message size %d too big (%d max)\n",
-		       libcfs_nid2str(peer->ibp_nid),
-		       msg->ibm_u.connparams.ibcp_max_msg_size,
-		       IBLND_MSG_SIZE);
-		rc = -EPROTO;
-		goto failed;
-	}
-
-	read_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
-	if (msg->ibm_dstnid == ni->ni_nid &&
-	    msg->ibm_dststamp == net->ibn_incarnation)
-		rc = 0;
-	else
-		rc = -ESTALE;
-	read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
-
-	if (rc) {
-		CERROR("Bad connection reply from %s, rc = %d, version: %x max_frags: %d\n",
-		       libcfs_nid2str(peer->ibp_nid), rc,
-		       msg->ibm_version, msg->ibm_u.connparams.ibcp_max_frags);
-		goto failed;
-	}
-
-	conn->ibc_incarnation = msg->ibm_srcstamp;
-	conn->ibc_credits = msg->ibm_u.connparams.ibcp_queue_depth;
-	conn->ibc_reserved_credits = msg->ibm_u.connparams.ibcp_queue_depth;
-	conn->ibc_queue_depth = msg->ibm_u.connparams.ibcp_queue_depth;
-	conn->ibc_max_frags = msg->ibm_u.connparams.ibcp_max_frags >> IBLND_FRAG_SHIFT;
-	LASSERT(conn->ibc_credits + conn->ibc_reserved_credits +
-		IBLND_OOB_MSGS(ver) <= IBLND_RX_MSGS(conn));
-
-	kiblnd_connreq_done(conn, 0);
-	return;
-
- failed:
-	/*
-	 * NB My QP has already established itself, so I handle anything going
-	 * wrong here by setting ibc_comms_error.
-	 * kiblnd_connreq_done(0) moves the conn state to ESTABLISHED, but then
-	 * immediately tears it down.
-	 */
-	LASSERT(rc);
-	conn->ibc_comms_error = rc;
-	kiblnd_connreq_done(conn, 0);
-}
-
-static int
-kiblnd_active_connect(struct rdma_cm_id *cmid)
-{
-	struct kib_peer *peer = (struct kib_peer *)cmid->context;
-	struct kib_conn *conn;
-	struct kib_msg *msg;
-	struct rdma_conn_param cp;
-	int version;
-	__u64 incarnation;
-	unsigned long flags;
-	int rc;
-
-	read_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
-
-	incarnation = peer->ibp_incarnation;
-	version = !peer->ibp_version ? IBLND_MSG_VERSION :
-				       peer->ibp_version;
-
-	read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
-
-	conn = kiblnd_create_conn(peer, cmid, IBLND_CONN_ACTIVE_CONNECT,
-				  version);
-	if (!conn) {
-		kiblnd_peer_connect_failed(peer, 1, -ENOMEM);
-		kiblnd_peer_decref(peer); /* lose cmid's ref */
-		return -ENOMEM;
-	}
-
-	/*
-	 * conn "owns" cmid now, so I return success from here on to ensure the
-	 * CM callback doesn't destroy cmid. conn also takes over cmid's ref
-	 * on peer
-	 */
-	msg = &conn->ibc_connvars->cv_msg;
-
-	memset(msg, 0, sizeof(*msg));
-	kiblnd_init_msg(msg, IBLND_MSG_CONNREQ, sizeof(msg->ibm_u.connparams));
-	msg->ibm_u.connparams.ibcp_queue_depth = conn->ibc_queue_depth;
-	msg->ibm_u.connparams.ibcp_max_frags = conn->ibc_max_frags << IBLND_FRAG_SHIFT;
-	msg->ibm_u.connparams.ibcp_max_msg_size = IBLND_MSG_SIZE;
-
-	kiblnd_pack_msg(peer->ibp_ni, msg, version,
-			0, peer->ibp_nid, incarnation);
-
-	memset(&cp, 0, sizeof(cp));
-	cp.private_data	= msg;
-	cp.private_data_len    = msg->ibm_nob;
-	cp.responder_resources = 0;	     /* No atomic ops or RDMA reads */
-	cp.initiator_depth     = 0;
-	cp.flow_control        = 1;
-	cp.retry_count         = *kiblnd_tunables.kib_retry_count;
-	cp.rnr_retry_count     = *kiblnd_tunables.kib_rnr_retry_count;
-
-	LASSERT(cmid->context == (void *)conn);
-	LASSERT(conn->ibc_cmid == cmid);
-
-	rc = rdma_connect(cmid, &cp);
-	if (rc) {
-		CERROR("Can't connect to %s: %d\n",
-		       libcfs_nid2str(peer->ibp_nid), rc);
-		kiblnd_connreq_done(conn, rc);
-		kiblnd_conn_decref(conn);
-	}
-
-	return 0;
-}
-
-int
-kiblnd_cm_callback(struct rdma_cm_id *cmid, struct rdma_cm_event *event)
-{
-	struct kib_peer *peer;
-	struct kib_conn *conn;
-	int rc;
-
-	switch (event->event) {
-	default:
-		CERROR("Unexpected event: %d, status: %d\n",
-		       event->event, event->status);
-		LBUG();
-
-	case RDMA_CM_EVENT_CONNECT_REQUEST:
-		/* destroy cmid on failure */
-		rc = kiblnd_passive_connect(cmid,
-					    (void *)KIBLND_CONN_PARAM(event),
-					    KIBLND_CONN_PARAM_LEN(event));
-		CDEBUG(D_NET, "connreq: %d\n", rc);
-		return rc;
-
-	case RDMA_CM_EVENT_ADDR_ERROR:
-		peer = (struct kib_peer *)cmid->context;
-		CNETERR("%s: ADDR ERROR %d\n",
-			libcfs_nid2str(peer->ibp_nid), event->status);
-		kiblnd_peer_connect_failed(peer, 1, -EHOSTUNREACH);
-		kiblnd_peer_decref(peer);
-		return -EHOSTUNREACH;      /* rc destroys cmid */
-
-	case RDMA_CM_EVENT_ADDR_RESOLVED:
-		peer = (struct kib_peer *)cmid->context;
-
-		CDEBUG(D_NET, "%s Addr resolved: %d\n",
-		       libcfs_nid2str(peer->ibp_nid), event->status);
-
-		if (event->status) {
-			CNETERR("Can't resolve address for %s: %d\n",
-				libcfs_nid2str(peer->ibp_nid), event->status);
-			rc = event->status;
-		} else {
-			rc = rdma_resolve_route(
-				cmid, *kiblnd_tunables.kib_timeout * 1000);
-			if (!rc) {
-				struct kib_net *net = peer->ibp_ni->ni_data;
-				struct kib_dev *dev = net->ibn_dev;
-
-				CDEBUG(D_NET, "%s: connection bound to "\
-				       "%s:%pI4h:%s\n",
-				       libcfs_nid2str(peer->ibp_nid),
-				       dev->ibd_ifname,
-				       &dev->ibd_ifip, cmid->device->name);
-
-				return 0;
-			}
-
-			/* Can't initiate route resolution */
-			CERROR("Can't resolve route for %s: %d\n",
-			       libcfs_nid2str(peer->ibp_nid), rc);
-		}
-		kiblnd_peer_connect_failed(peer, 1, rc);
-		kiblnd_peer_decref(peer);
-		return rc;		      /* rc destroys cmid */
-
-	case RDMA_CM_EVENT_ROUTE_ERROR:
-		peer = (struct kib_peer *)cmid->context;
-		CNETERR("%s: ROUTE ERROR %d\n",
-			libcfs_nid2str(peer->ibp_nid), event->status);
-		kiblnd_peer_connect_failed(peer, 1, -EHOSTUNREACH);
-		kiblnd_peer_decref(peer);
-		return -EHOSTUNREACH;	   /* rc destroys cmid */
-
-	case RDMA_CM_EVENT_ROUTE_RESOLVED:
-		peer = (struct kib_peer *)cmid->context;
-		CDEBUG(D_NET, "%s Route resolved: %d\n",
-		       libcfs_nid2str(peer->ibp_nid), event->status);
-
-		if (!event->status)
-			return kiblnd_active_connect(cmid);
-
-		CNETERR("Can't resolve route for %s: %d\n",
-			libcfs_nid2str(peer->ibp_nid), event->status);
-		kiblnd_peer_connect_failed(peer, 1, event->status);
-		kiblnd_peer_decref(peer);
-		return event->status;	   /* rc destroys cmid */
-
-	case RDMA_CM_EVENT_UNREACHABLE:
-		conn = (struct kib_conn *)cmid->context;
-		LASSERT(conn->ibc_state == IBLND_CONN_ACTIVE_CONNECT ||
-			conn->ibc_state == IBLND_CONN_PASSIVE_WAIT);
-		CNETERR("%s: UNREACHABLE %d\n",
-			libcfs_nid2str(conn->ibc_peer->ibp_nid), event->status);
-		kiblnd_connreq_done(conn, -ENETDOWN);
-		kiblnd_conn_decref(conn);
-		return 0;
-
-	case RDMA_CM_EVENT_CONNECT_ERROR:
-		conn = (struct kib_conn *)cmid->context;
-		LASSERT(conn->ibc_state == IBLND_CONN_ACTIVE_CONNECT ||
-			conn->ibc_state == IBLND_CONN_PASSIVE_WAIT);
-		CNETERR("%s: CONNECT ERROR %d\n",
-			libcfs_nid2str(conn->ibc_peer->ibp_nid), event->status);
-		kiblnd_connreq_done(conn, -ENOTCONN);
-		kiblnd_conn_decref(conn);
-		return 0;
-
-	case RDMA_CM_EVENT_REJECTED:
-		conn = (struct kib_conn *)cmid->context;
-		switch (conn->ibc_state) {
-		default:
-			LBUG();
-
-		case IBLND_CONN_PASSIVE_WAIT:
-			CERROR("%s: REJECTED %d\n",
-			       libcfs_nid2str(conn->ibc_peer->ibp_nid),
-			       event->status);
-			kiblnd_connreq_done(conn, -ECONNRESET);
-			break;
-
-		case IBLND_CONN_ACTIVE_CONNECT:
-			kiblnd_rejected(conn, event->status,
-					(void *)KIBLND_CONN_PARAM(event),
-					KIBLND_CONN_PARAM_LEN(event));
-			break;
-		}
-		kiblnd_conn_decref(conn);
-		return 0;
-
-	case RDMA_CM_EVENT_ESTABLISHED:
-		conn = (struct kib_conn *)cmid->context;
-		switch (conn->ibc_state) {
-		default:
-			LBUG();
-
-		case IBLND_CONN_PASSIVE_WAIT:
-			CDEBUG(D_NET, "ESTABLISHED (passive): %s\n",
-			       libcfs_nid2str(conn->ibc_peer->ibp_nid));
-			kiblnd_connreq_done(conn, 0);
-			break;
-
-		case IBLND_CONN_ACTIVE_CONNECT:
-			CDEBUG(D_NET, "ESTABLISHED(active): %s\n",
-			       libcfs_nid2str(conn->ibc_peer->ibp_nid));
-			kiblnd_check_connreply(conn,
-					       (void *)KIBLND_CONN_PARAM(event),
-					       KIBLND_CONN_PARAM_LEN(event));
-			break;
-		}
-		/* net keeps its ref on conn! */
-		return 0;
-
-	case RDMA_CM_EVENT_TIMEWAIT_EXIT:
-		CDEBUG(D_NET, "Ignore TIMEWAIT_EXIT event\n");
-		return 0;
-	case RDMA_CM_EVENT_DISCONNECTED:
-		conn = (struct kib_conn *)cmid->context;
-		if (conn->ibc_state < IBLND_CONN_ESTABLISHED) {
-			CERROR("%s DISCONNECTED\n",
-			       libcfs_nid2str(conn->ibc_peer->ibp_nid));
-			kiblnd_connreq_done(conn, -ECONNRESET);
-		} else {
-			kiblnd_close_conn(conn, 0);
-		}
-		kiblnd_conn_decref(conn);
-		cmid->context = NULL;
-		return 0;
-
-	case RDMA_CM_EVENT_DEVICE_REMOVAL:
-		LCONSOLE_ERROR_MSG(0x131,
-				   "Received notification of device removal\n"
-				   "Please shutdown LNET to allow this to proceed\n");
-		/*
-		 * Can't remove network from underneath LNET for now, so I have
-		 * to ignore this
-		 */
-		return 0;
-
-	case RDMA_CM_EVENT_ADDR_CHANGE:
-		LCONSOLE_INFO("Physical link changed (eg hca/port)\n");
-		return 0;
-	}
-}
-
-static int
-kiblnd_check_txs_locked(struct kib_conn *conn, struct list_head *txs)
-{
-	struct kib_tx *tx;
-	struct list_head *ttmp;
-
-	list_for_each(ttmp, txs) {
-		tx = list_entry(ttmp, struct kib_tx, tx_list);
-
-		if (txs != &conn->ibc_active_txs) {
-			LASSERT(tx->tx_queued);
-		} else {
-			LASSERT(!tx->tx_queued);
-			LASSERT(tx->tx_waiting || tx->tx_sending);
-		}
-
-		if (time_after_eq(jiffies, tx->tx_deadline)) {
-			CERROR("Timed out tx: %s, %lu seconds\n",
-			       kiblnd_queue2str(conn, txs),
-			       (jiffies - tx->tx_deadline) / HZ);
-			return 1;
-		}
-	}
-
-	return 0;
-}
-
-static int
-kiblnd_conn_timed_out_locked(struct kib_conn *conn)
-{
-	return  kiblnd_check_txs_locked(conn, &conn->ibc_tx_queue) ||
-		kiblnd_check_txs_locked(conn, &conn->ibc_tx_noops) ||
-		kiblnd_check_txs_locked(conn, &conn->ibc_tx_queue_rsrvd) ||
-		kiblnd_check_txs_locked(conn, &conn->ibc_tx_queue_nocred) ||
-		kiblnd_check_txs_locked(conn, &conn->ibc_active_txs);
-}
-
-static void
-kiblnd_check_conns(int idx)
-{
-	LIST_HEAD(closes);
-	LIST_HEAD(checksends);
-	struct list_head *peers = &kiblnd_data.kib_peers[idx];
-	struct list_head *ptmp;
-	struct kib_peer *peer;
-	struct kib_conn *conn;
-	struct kib_conn *temp;
-	struct kib_conn *tmp;
-	struct list_head *ctmp;
-	unsigned long flags;
-
-	/*
-	 * NB. We expect to have a look at all the peers and not find any
-	 * RDMAs to time out, so we just use a shared lock while we
-	 * take a look...
-	 */
-	read_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
-
-	list_for_each(ptmp, peers) {
-		peer = list_entry(ptmp, struct kib_peer, ibp_list);
-
-		list_for_each(ctmp, &peer->ibp_conns) {
-			int timedout;
-			int sendnoop;
-
-			conn = list_entry(ctmp, struct kib_conn, ibc_list);
-
-			LASSERT(conn->ibc_state == IBLND_CONN_ESTABLISHED);
-
-			spin_lock(&conn->ibc_lock);
-
-			sendnoop = kiblnd_need_noop(conn);
-			timedout = kiblnd_conn_timed_out_locked(conn);
-			if (!sendnoop && !timedout) {
-				spin_unlock(&conn->ibc_lock);
-				continue;
-			}
-
-			if (timedout) {
-				CERROR("Timed out RDMA with %s (%lu): c: %u, oc: %u, rc: %u\n",
-				       libcfs_nid2str(peer->ibp_nid),
-				       (jiffies - peer->ibp_last_alive) / HZ,
-				       conn->ibc_credits,
-				       conn->ibc_outstanding_credits,
-				       conn->ibc_reserved_credits);
-				list_add(&conn->ibc_connd_list, &closes);
-			} else {
-				list_add(&conn->ibc_connd_list, &checksends);
-			}
-			/* +ref for 'closes' or 'checksends' */
-			kiblnd_conn_addref(conn);
-
-			spin_unlock(&conn->ibc_lock);
-		}
-	}
-
-	read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
-
-	/*
-	 * Handle timeout by closing the whole
-	 * connection. We can only be sure RDMA activity
-	 * has ceased once the QP has been modified.
-	 */
-	list_for_each_entry_safe(conn, tmp, &closes, ibc_connd_list) {
-		list_del(&conn->ibc_connd_list);
-		kiblnd_close_conn(conn, -ETIMEDOUT);
-		kiblnd_conn_decref(conn);
-	}
-
-	/*
-	 * In case we have enough credits to return via a
-	 * NOOP, but there were no non-blocking tx descs
-	 * free to do it last time...
-	 */
-	list_for_each_entry_safe(conn, temp, &checksends, ibc_connd_list) {
-		list_del(&conn->ibc_connd_list);
-
-		spin_lock(&conn->ibc_lock);
-		kiblnd_check_sends_locked(conn);
-		spin_unlock(&conn->ibc_lock);
-
-		kiblnd_conn_decref(conn);
-	}
-}
-
-static void
-kiblnd_disconnect_conn(struct kib_conn *conn)
-{
-	LASSERT(!in_interrupt());
-	LASSERT(current == kiblnd_data.kib_connd);
-	LASSERT(conn->ibc_state == IBLND_CONN_CLOSING);
-
-	rdma_disconnect(conn->ibc_cmid);
-	kiblnd_finalise_conn(conn);
-
-	kiblnd_peer_notify(conn->ibc_peer);
-}
-
-/**
- * High-water for reconnection to the same peer, reconnection attempt should
- * be delayed after trying more than KIB_RECONN_HIGH_RACE.
- */
-#define KIB_RECONN_HIGH_RACE	10
-/**
- * Allow connd to take a break and handle other things after consecutive
- * reconnection attempts.
- */
-#define KIB_RECONN_BREAK	100
-
-int
-kiblnd_connd(void *arg)
-{
-	spinlock_t *lock = &kiblnd_data.kib_connd_lock;
-	wait_queue_entry_t wait;
-	unsigned long flags;
-	struct kib_conn *conn;
-	int timeout;
-	int i;
-	int dropped_lock;
-	int peer_index = 0;
-	unsigned long deadline = jiffies;
-
-	init_waitqueue_entry(&wait, current);
-	kiblnd_data.kib_connd = current;
-
-	spin_lock_irqsave(lock, flags);
-
-	while (!kiblnd_data.kib_shutdown) {
-		int reconn = 0;
-
-		dropped_lock = 0;
-
-		if (!list_empty(&kiblnd_data.kib_connd_zombies)) {
-			struct kib_peer *peer = NULL;
-
-			conn = list_entry(kiblnd_data.kib_connd_zombies.next,
-					  struct kib_conn, ibc_list);
-			list_del(&conn->ibc_list);
-			if (conn->ibc_reconnect) {
-				peer = conn->ibc_peer;
-				kiblnd_peer_addref(peer);
-			}
-
-			spin_unlock_irqrestore(lock, flags);
-			dropped_lock = 1;
-
-			kiblnd_destroy_conn(conn);
-
-			spin_lock_irqsave(lock, flags);
-			if (!peer) {
-				kfree(conn);
-				continue;
-			}
-
-			conn->ibc_peer = peer;
-			if (peer->ibp_reconnected < KIB_RECONN_HIGH_RACE)
-				list_add_tail(&conn->ibc_list,
-					      &kiblnd_data.kib_reconn_list);
-			else
-				list_add_tail(&conn->ibc_list,
-					      &kiblnd_data.kib_reconn_wait);
-		}
-
-		if (!list_empty(&kiblnd_data.kib_connd_conns)) {
-			conn = list_entry(kiblnd_data.kib_connd_conns.next,
-					  struct kib_conn, ibc_list);
-			list_del(&conn->ibc_list);
-
-			spin_unlock_irqrestore(lock, flags);
-			dropped_lock = 1;
-
-			kiblnd_disconnect_conn(conn);
-			kiblnd_conn_decref(conn);
-
-			spin_lock_irqsave(lock, flags);
-		}
-
-		while (reconn < KIB_RECONN_BREAK) {
-			if (kiblnd_data.kib_reconn_sec !=
-			    ktime_get_real_seconds()) {
-				kiblnd_data.kib_reconn_sec = ktime_get_real_seconds();
-				list_splice_init(&kiblnd_data.kib_reconn_wait,
-						 &kiblnd_data.kib_reconn_list);
-			}
-
-			if (list_empty(&kiblnd_data.kib_reconn_list))
-				break;
-
-			conn = list_entry(kiblnd_data.kib_reconn_list.next,
-					  struct kib_conn, ibc_list);
-			list_del(&conn->ibc_list);
-
-			spin_unlock_irqrestore(lock, flags);
-			dropped_lock = 1;
-
-			reconn += kiblnd_reconnect_peer(conn->ibc_peer);
-			kiblnd_peer_decref(conn->ibc_peer);
-			kfree(conn);
-
-			spin_lock_irqsave(lock, flags);
-		}
-
-		/* careful with the jiffy wrap... */
-		timeout = (int)(deadline - jiffies);
-		if (timeout <= 0) {
-			const int n = 4;
-			const int p = 1;
-			int chunk = kiblnd_data.kib_peer_hash_size;
-
-			spin_unlock_irqrestore(lock, flags);
-			dropped_lock = 1;
-
-			/*
-			 * Time to check for RDMA timeouts on a few more
-			 * peers: I do checks every 'p' seconds on a
-			 * proportion of the peer table and I need to check
-			 * every connection 'n' times within a timeout
-			 * interval, to ensure I detect a timeout on any
-			 * connection within (n+1)/n times the timeout
-			 * interval.
-			 */
-			if (*kiblnd_tunables.kib_timeout > n * p)
-				chunk = (chunk * n * p) /
-					*kiblnd_tunables.kib_timeout;
-			if (!chunk)
-				chunk = 1;
-
-			for (i = 0; i < chunk; i++) {
-				kiblnd_check_conns(peer_index);
-				peer_index = (peer_index + 1) %
-					     kiblnd_data.kib_peer_hash_size;
-			}
-
-			deadline += msecs_to_jiffies(p * MSEC_PER_SEC);
-			spin_lock_irqsave(lock, flags);
-		}
-
-		if (dropped_lock)
-			continue;
-
-		/* Nothing to do for 'timeout'  */
-		set_current_state(TASK_INTERRUPTIBLE);
-		add_wait_queue(&kiblnd_data.kib_connd_waitq, &wait);
-		spin_unlock_irqrestore(lock, flags);
-
-		schedule_timeout(timeout);
-
-		remove_wait_queue(&kiblnd_data.kib_connd_waitq, &wait);
-		spin_lock_irqsave(lock, flags);
-	}
-
-	spin_unlock_irqrestore(lock, flags);
-
-	kiblnd_thread_fini();
-	return 0;
-}
-
-void
-kiblnd_qp_event(struct ib_event *event, void *arg)
-{
-	struct kib_conn *conn = arg;
-
-	switch (event->event) {
-	case IB_EVENT_COMM_EST:
-		CDEBUG(D_NET, "%s established\n",
-		       libcfs_nid2str(conn->ibc_peer->ibp_nid));
-		/*
-		 * We received a packet but connection isn't established
-		 * probably handshake packet was lost, so free to
-		 * force make connection established
-		 */
-		rdma_notify(conn->ibc_cmid, IB_EVENT_COMM_EST);
-		return;
-
-	default:
-		CERROR("%s: Async QP event type %d\n",
-		       libcfs_nid2str(conn->ibc_peer->ibp_nid), event->event);
-		return;
-	}
-}
-
-static void
-kiblnd_complete(struct ib_wc *wc)
-{
-	switch (kiblnd_wreqid2type(wc->wr_id)) {
-	default:
-		LBUG();
-
-	case IBLND_WID_MR:
-		if (wc->status != IB_WC_SUCCESS &&
-		    wc->status != IB_WC_WR_FLUSH_ERR)
-			CNETERR("FastReg failed: %d\n", wc->status);
-		break;
-
-	case IBLND_WID_RDMA:
-		/*
-		 * We only get RDMA completion notification if it fails.  All
-		 * subsequent work items, including the final SEND will fail
-		 * too.  However we can't print out any more info about the
-		 * failing RDMA because 'tx' might be back on the idle list or
-		 * even reused already if we didn't manage to post all our work
-		 * items
-		 */
-		CNETERR("RDMA (tx: %p) failed: %d\n",
-			kiblnd_wreqid2ptr(wc->wr_id), wc->status);
-		return;
-
-	case IBLND_WID_TX:
-		kiblnd_tx_complete(kiblnd_wreqid2ptr(wc->wr_id), wc->status);
-		return;
-
-	case IBLND_WID_RX:
-		kiblnd_rx_complete(kiblnd_wreqid2ptr(wc->wr_id), wc->status,
-				   wc->byte_len);
-		return;
-	}
-}
-
-void
-kiblnd_cq_completion(struct ib_cq *cq, void *arg)
-{
-	/*
-	 * NB I'm not allowed to schedule this conn once its refcount has
-	 * reached 0.  Since fundamentally I'm racing with scheduler threads
-	 * consuming my CQ I could be called after all completions have
-	 * occurred.  But in this case, !ibc_nrx && !ibc_nsends_posted
-	 * and this CQ is about to be destroyed so I NOOP.
-	 */
-	struct kib_conn *conn = arg;
-	struct kib_sched_info *sched = conn->ibc_sched;
-	unsigned long flags;
-
-	LASSERT(cq == conn->ibc_cq);
-
-	spin_lock_irqsave(&sched->ibs_lock, flags);
-
-	conn->ibc_ready = 1;
-
-	if (!conn->ibc_scheduled &&
-	    (conn->ibc_nrx > 0 ||
-	     conn->ibc_nsends_posted > 0)) {
-		kiblnd_conn_addref(conn); /* +1 ref for sched_conns */
-		conn->ibc_scheduled = 1;
-		list_add_tail(&conn->ibc_sched_list, &sched->ibs_conns);
-
-		if (waitqueue_active(&sched->ibs_waitq))
-			wake_up(&sched->ibs_waitq);
-	}
-
-	spin_unlock_irqrestore(&sched->ibs_lock, flags);
-}
-
-void
-kiblnd_cq_event(struct ib_event *event, void *arg)
-{
-	struct kib_conn *conn = arg;
-
-	CERROR("%s: async CQ event type %d\n",
-	       libcfs_nid2str(conn->ibc_peer->ibp_nid), event->event);
-}
-
-int
-kiblnd_scheduler(void *arg)
-{
-	long id = (long)arg;
-	struct kib_sched_info *sched;
-	struct kib_conn *conn;
-	wait_queue_entry_t wait;
-	unsigned long flags;
-	struct ib_wc wc;
-	int did_something;
-	int busy_loops = 0;
-	int rc;
-
-	init_waitqueue_entry(&wait, current);
-
-	sched = kiblnd_data.kib_scheds[KIB_THREAD_CPT(id)];
-
-	rc = cfs_cpt_bind(lnet_cpt_table(), sched->ibs_cpt);
-	if (rc) {
-		CWARN("Unable to bind on CPU partition %d, please verify whether all CPUs are healthy and reload modules if necessary, otherwise your system might under risk of low performance\n",
-		      sched->ibs_cpt);
-	}
-
-	spin_lock_irqsave(&sched->ibs_lock, flags);
-
-	while (!kiblnd_data.kib_shutdown) {
-		if (busy_loops++ >= IBLND_RESCHED) {
-			spin_unlock_irqrestore(&sched->ibs_lock, flags);
-
-			cond_resched();
-			busy_loops = 0;
-
-			spin_lock_irqsave(&sched->ibs_lock, flags);
-		}
-
-		did_something = 0;
-
-		if (!list_empty(&sched->ibs_conns)) {
-			conn = list_entry(sched->ibs_conns.next, struct kib_conn,
-					  ibc_sched_list);
-			/* take over kib_sched_conns' ref on conn... */
-			LASSERT(conn->ibc_scheduled);
-			list_del(&conn->ibc_sched_list);
-			conn->ibc_ready = 0;
-
-			spin_unlock_irqrestore(&sched->ibs_lock, flags);
-
-			wc.wr_id = IBLND_WID_INVAL;
-
-			rc = ib_poll_cq(conn->ibc_cq, 1, &wc);
-			if (!rc) {
-				rc = ib_req_notify_cq(conn->ibc_cq,
-						      IB_CQ_NEXT_COMP);
-				if (rc < 0) {
-					CWARN("%s: ib_req_notify_cq failed: %d, closing connection\n",
-					      libcfs_nid2str(conn->ibc_peer->ibp_nid), rc);
-					kiblnd_close_conn(conn, -EIO);
-					kiblnd_conn_decref(conn);
-					spin_lock_irqsave(&sched->ibs_lock,
-							  flags);
-					continue;
-				}
-
-				rc = ib_poll_cq(conn->ibc_cq, 1, &wc);
-			}
-
-			if (unlikely(rc > 0 && wc.wr_id == IBLND_WID_INVAL)) {
-				LCONSOLE_ERROR("ib_poll_cq (rc: %d) returned invalid wr_id, opcode %d, status: %d, vendor_err: %d, conn: %s status: %d\nplease upgrade firmware and OFED or contact vendor.\n",
-					       rc, wc.opcode, wc.status,
-					       wc.vendor_err,
-					       libcfs_nid2str(conn->ibc_peer->ibp_nid),
-					       conn->ibc_state);
-				rc = -EINVAL;
-			}
-
-			if (rc < 0) {
-				CWARN("%s: ib_poll_cq failed: %d, closing connection\n",
-				      libcfs_nid2str(conn->ibc_peer->ibp_nid),
-				      rc);
-				kiblnd_close_conn(conn, -EIO);
-				kiblnd_conn_decref(conn);
-				spin_lock_irqsave(&sched->ibs_lock, flags);
-				continue;
-			}
-
-			spin_lock_irqsave(&sched->ibs_lock, flags);
-
-			if (rc || conn->ibc_ready) {
-				/*
-				 * There may be another completion waiting; get
-				 * another scheduler to check while I handle
-				 * this one...
-				 */
-				/* +1 ref for sched_conns */
-				kiblnd_conn_addref(conn);
-				list_add_tail(&conn->ibc_sched_list,
-					      &sched->ibs_conns);
-				if (waitqueue_active(&sched->ibs_waitq))
-					wake_up(&sched->ibs_waitq);
-			} else {
-				conn->ibc_scheduled = 0;
-			}
-
-			if (rc) {
-				spin_unlock_irqrestore(&sched->ibs_lock, flags);
-				kiblnd_complete(&wc);
-
-				spin_lock_irqsave(&sched->ibs_lock, flags);
-			}
-
-			kiblnd_conn_decref(conn); /* ...drop my ref from above */
-			did_something = 1;
-		}
-
-		if (did_something)
-			continue;
-
-		set_current_state(TASK_INTERRUPTIBLE);
-		add_wait_queue_exclusive(&sched->ibs_waitq, &wait);
-		spin_unlock_irqrestore(&sched->ibs_lock, flags);
-
-		schedule();
-		busy_loops = 0;
-
-		remove_wait_queue(&sched->ibs_waitq, &wait);
-		spin_lock_irqsave(&sched->ibs_lock, flags);
-	}
-
-	spin_unlock_irqrestore(&sched->ibs_lock, flags);
-
-	kiblnd_thread_fini();
-	return 0;
-}
-
-int
-kiblnd_failover_thread(void *arg)
-{
-	rwlock_t *glock = &kiblnd_data.kib_global_lock;
-	struct kib_dev *dev;
-	wait_queue_entry_t wait;
-	unsigned long flags;
-	int rc;
-
-	LASSERT(*kiblnd_tunables.kib_dev_failover);
-
-	init_waitqueue_entry(&wait, current);
-	write_lock_irqsave(glock, flags);
-
-	while (!kiblnd_data.kib_shutdown) {
-		int do_failover = 0;
-		int long_sleep;
-
-		list_for_each_entry(dev, &kiblnd_data.kib_failed_devs,
-				    ibd_fail_list) {
-			if (time_before(jiffies,
-					dev->ibd_next_failover))
-				continue;
-			do_failover = 1;
-			break;
-		}
-
-		if (do_failover) {
-			list_del_init(&dev->ibd_fail_list);
-			dev->ibd_failover = 1;
-			write_unlock_irqrestore(glock, flags);
-
-			rc = kiblnd_dev_failover(dev);
-
-			write_lock_irqsave(glock, flags);
-
-			LASSERT(dev->ibd_failover);
-			dev->ibd_failover = 0;
-			if (rc >= 0) { /* Device is OK or failover succeed */
-				dev->ibd_next_failover = jiffies + 3 * HZ;
-				continue;
-			}
-
-			/* failed to failover, retry later */
-			dev->ibd_next_failover =
-				jiffies + min(dev->ibd_failed_failover, 10) * HZ;
-			if (kiblnd_dev_can_failover(dev)) {
-				list_add_tail(&dev->ibd_fail_list,
-					      &kiblnd_data.kib_failed_devs);
-			}
-
-			continue;
-		}
-
-		/* long sleep if no more pending failover */
-		long_sleep = list_empty(&kiblnd_data.kib_failed_devs);
-
-		set_current_state(TASK_INTERRUPTIBLE);
-		add_wait_queue(&kiblnd_data.kib_failover_waitq, &wait);
-		write_unlock_irqrestore(glock, flags);
-
-		rc = schedule_timeout(long_sleep ? 10 * HZ :
-						   HZ);
-		remove_wait_queue(&kiblnd_data.kib_failover_waitq, &wait);
-		write_lock_irqsave(glock, flags);
-
-		if (!long_sleep || rc)
-			continue;
-
-		/*
-		 * have a long sleep, routine check all active devices,
-		 * we need checking like this because if there is not active
-		 * connection on the dev and no SEND from local, we may listen
-		 * on wrong HCA for ever while there is a bonding failover
-		 */
-		list_for_each_entry(dev, &kiblnd_data.kib_devs, ibd_list) {
-			if (kiblnd_dev_can_failover(dev)) {
-				list_add_tail(&dev->ibd_fail_list,
-					      &kiblnd_data.kib_failed_devs);
-			}
-		}
-	}
-
-	write_unlock_irqrestore(glock, flags);
-
-	kiblnd_thread_fini();
-	return 0;
-}

+ 0 - 296
drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_modparams.c

@@ -1,296 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/klnds/o2iblnd/o2iblnd_modparams.c
- *
- * Author: Eric Barton <eric@bartonsoftware.com>
- */
-
-#include "o2iblnd.h"
-
-static int service = 987;
-module_param(service, int, 0444);
-MODULE_PARM_DESC(service, "service number (within RDMA_PS_TCP)");
-
-static int cksum;
-module_param(cksum, int, 0644);
-MODULE_PARM_DESC(cksum, "set non-zero to enable message (not RDMA) checksums");
-
-static int timeout = 50;
-module_param(timeout, int, 0644);
-MODULE_PARM_DESC(timeout, "timeout (seconds)");
-
-/*
- * Number of threads in each scheduler pool which is percpt,
- * we will estimate reasonable value based on CPUs if it's set to zero.
- */
-static int nscheds;
-module_param(nscheds, int, 0444);
-MODULE_PARM_DESC(nscheds, "number of threads in each scheduler pool");
-
-static unsigned int conns_per_peer = 1;
-module_param(conns_per_peer, uint, 0444);
-MODULE_PARM_DESC(conns_per_peer, "number of connections per peer");
-
-/* NB: this value is shared by all CPTs, it can grow at runtime */
-static int ntx = 512;
-module_param(ntx, int, 0444);
-MODULE_PARM_DESC(ntx, "# of message descriptors allocated for each pool");
-
-/* NB: this value is shared by all CPTs */
-static int credits = 256;
-module_param(credits, int, 0444);
-MODULE_PARM_DESC(credits, "# concurrent sends");
-
-static int peer_credits = 8;
-module_param(peer_credits, int, 0444);
-MODULE_PARM_DESC(peer_credits, "# concurrent sends to 1 peer");
-
-static int peer_credits_hiw;
-module_param(peer_credits_hiw, int, 0444);
-MODULE_PARM_DESC(peer_credits_hiw, "when eagerly to return credits");
-
-static int peer_buffer_credits;
-module_param(peer_buffer_credits, int, 0444);
-MODULE_PARM_DESC(peer_buffer_credits, "# per-peer router buffer credits");
-
-static int peer_timeout = 180;
-module_param(peer_timeout, int, 0444);
-MODULE_PARM_DESC(peer_timeout, "Seconds without aliveness news to declare peer dead (<=0 to disable)");
-
-static char *ipif_name = "ib0";
-module_param(ipif_name, charp, 0444);
-MODULE_PARM_DESC(ipif_name, "IPoIB interface name");
-
-static int retry_count = 5;
-module_param(retry_count, int, 0644);
-MODULE_PARM_DESC(retry_count, "Retransmissions when no ACK received");
-
-static int rnr_retry_count = 6;
-module_param(rnr_retry_count, int, 0644);
-MODULE_PARM_DESC(rnr_retry_count, "RNR retransmissions");
-
-static int keepalive = 100;
-module_param(keepalive, int, 0644);
-MODULE_PARM_DESC(keepalive, "Idle time in seconds before sending a keepalive");
-
-static int ib_mtu;
-module_param(ib_mtu, int, 0444);
-MODULE_PARM_DESC(ib_mtu, "IB MTU 256/512/1024/2048/4096");
-
-static int concurrent_sends;
-module_param(concurrent_sends, int, 0444);
-MODULE_PARM_DESC(concurrent_sends, "send work-queue sizing");
-
-#define IBLND_DEFAULT_MAP_ON_DEMAND IBLND_MAX_RDMA_FRAGS
-static int map_on_demand = IBLND_DEFAULT_MAP_ON_DEMAND;
-module_param(map_on_demand, int, 0444);
-MODULE_PARM_DESC(map_on_demand, "map on demand");
-
-/* NB: this value is shared by all CPTs, it can grow at runtime */
-static int fmr_pool_size = 512;
-module_param(fmr_pool_size, int, 0444);
-MODULE_PARM_DESC(fmr_pool_size, "size of fmr pool on each CPT (>= ntx / 4)");
-
-/* NB: this value is shared by all CPTs, it can grow at runtime */
-static int fmr_flush_trigger = 384;
-module_param(fmr_flush_trigger, int, 0444);
-MODULE_PARM_DESC(fmr_flush_trigger, "# dirty FMRs that triggers pool flush");
-
-static int fmr_cache = 1;
-module_param(fmr_cache, int, 0444);
-MODULE_PARM_DESC(fmr_cache, "non-zero to enable FMR caching");
-
-/*
- * 0: disable failover
- * 1: enable failover if necessary
- * 2: force to failover (for debug)
- */
-static int dev_failover;
-module_param(dev_failover, int, 0444);
-MODULE_PARM_DESC(dev_failover, "HCA failover for bonding (0 off, 1 on, other values reserved)");
-
-static int require_privileged_port;
-module_param(require_privileged_port, int, 0644);
-MODULE_PARM_DESC(require_privileged_port, "require privileged port when accepting connection");
-
-static int use_privileged_port = 1;
-module_param(use_privileged_port, int, 0644);
-MODULE_PARM_DESC(use_privileged_port, "use privileged port when initiating connection");
-
-struct kib_tunables kiblnd_tunables = {
-	.kib_dev_failover      = &dev_failover,
-	.kib_service           = &service,
-	.kib_cksum             = &cksum,
-	.kib_timeout           = &timeout,
-	.kib_keepalive         = &keepalive,
-	.kib_ntx               = &ntx,
-	.kib_default_ipif      = &ipif_name,
-	.kib_retry_count       = &retry_count,
-	.kib_rnr_retry_count   = &rnr_retry_count,
-	.kib_ib_mtu            = &ib_mtu,
-	.kib_require_priv_port = &require_privileged_port,
-	.kib_use_priv_port     = &use_privileged_port,
-	.kib_nscheds           = &nscheds
-};
-
-static struct lnet_ioctl_config_o2iblnd_tunables default_tunables;
-
-/* # messages/RDMAs in-flight */
-int kiblnd_msg_queue_size(int version, struct lnet_ni *ni)
-{
-	if (version == IBLND_MSG_VERSION_1)
-		return IBLND_MSG_QUEUE_SIZE_V1;
-	else if (ni)
-		return ni->ni_peertxcredits;
-	else
-		return peer_credits;
-}
-
-int kiblnd_tunables_setup(struct lnet_ni *ni)
-{
-	struct lnet_ioctl_config_o2iblnd_tunables *tunables;
-
-	/*
-	 * if there was no tunables specified, setup the tunables to be
-	 * defaulted
-	 */
-	if (!ni->ni_lnd_tunables) {
-		ni->ni_lnd_tunables = kzalloc(sizeof(*ni->ni_lnd_tunables),
-					      GFP_NOFS);
-		if (!ni->ni_lnd_tunables)
-			return -ENOMEM;
-
-		memcpy(&ni->ni_lnd_tunables->lt_tun_u.lt_o2ib,
-		       &default_tunables, sizeof(*tunables));
-	}
-	tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib;
-
-	/* Current API version */
-	tunables->lnd_version = 0;
-
-	if (kiblnd_translate_mtu(*kiblnd_tunables.kib_ib_mtu) < 0) {
-		CERROR("Invalid ib_mtu %d, expected 256/512/1024/2048/4096\n",
-		       *kiblnd_tunables.kib_ib_mtu);
-		return -EINVAL;
-	}
-
-	if (!ni->ni_peertimeout)
-		ni->ni_peertimeout = peer_timeout;
-
-	if (!ni->ni_maxtxcredits)
-		ni->ni_maxtxcredits = credits;
-
-	if (!ni->ni_peertxcredits)
-		ni->ni_peertxcredits = peer_credits;
-
-	if (!ni->ni_peerrtrcredits)
-		ni->ni_peerrtrcredits = peer_buffer_credits;
-
-	if (ni->ni_peertxcredits < IBLND_CREDITS_DEFAULT)
-		ni->ni_peertxcredits = IBLND_CREDITS_DEFAULT;
-
-	if (ni->ni_peertxcredits > IBLND_CREDITS_MAX)
-		ni->ni_peertxcredits = IBLND_CREDITS_MAX;
-
-	if (ni->ni_peertxcredits > credits)
-		ni->ni_peertxcredits = credits;
-
-	if (!tunables->lnd_peercredits_hiw)
-		tunables->lnd_peercredits_hiw = peer_credits_hiw;
-
-	if (tunables->lnd_peercredits_hiw < ni->ni_peertxcredits / 2)
-		tunables->lnd_peercredits_hiw = ni->ni_peertxcredits / 2;
-
-	if (tunables->lnd_peercredits_hiw >= ni->ni_peertxcredits)
-		tunables->lnd_peercredits_hiw = ni->ni_peertxcredits - 1;
-
-	if (tunables->lnd_map_on_demand <= 0 ||
-	    tunables->lnd_map_on_demand > IBLND_MAX_RDMA_FRAGS) {
-		/* Use the default */
-		CWARN("Invalid map_on_demand (%d), expects 1 - %d. Using default of %d\n",
-		      tunables->lnd_map_on_demand,
-		      IBLND_MAX_RDMA_FRAGS, IBLND_DEFAULT_MAP_ON_DEMAND);
-		tunables->lnd_map_on_demand = IBLND_DEFAULT_MAP_ON_DEMAND;
-	}
-
-	if (tunables->lnd_map_on_demand == 1) {
-		/* don't make sense to create map if only one fragment */
-		tunables->lnd_map_on_demand = 2;
-	}
-
-	if (!tunables->lnd_concurrent_sends) {
-		if (tunables->lnd_map_on_demand > 0 &&
-		    tunables->lnd_map_on_demand <= IBLND_MAX_RDMA_FRAGS / 8) {
-			tunables->lnd_concurrent_sends =
-						ni->ni_peertxcredits * 2;
-		} else {
-			tunables->lnd_concurrent_sends = ni->ni_peertxcredits;
-		}
-	}
-
-	if (tunables->lnd_concurrent_sends > ni->ni_peertxcredits * 2)
-		tunables->lnd_concurrent_sends = ni->ni_peertxcredits * 2;
-
-	if (tunables->lnd_concurrent_sends < ni->ni_peertxcredits / 2)
-		tunables->lnd_concurrent_sends = ni->ni_peertxcredits / 2;
-
-	if (tunables->lnd_concurrent_sends < ni->ni_peertxcredits) {
-		CWARN("Concurrent sends %d is lower than message queue size: %d, performance may drop slightly.\n",
-		      tunables->lnd_concurrent_sends, ni->ni_peertxcredits);
-	}
-
-	if (!tunables->lnd_fmr_pool_size)
-		tunables->lnd_fmr_pool_size = fmr_pool_size;
-	if (!tunables->lnd_fmr_flush_trigger)
-		tunables->lnd_fmr_flush_trigger = fmr_flush_trigger;
-	if (!tunables->lnd_fmr_cache)
-		tunables->lnd_fmr_cache = fmr_cache;
-	if (!tunables->lnd_conns_per_peer) {
-		tunables->lnd_conns_per_peer = (conns_per_peer) ?
-			conns_per_peer : 1;
-	}
-
-	return 0;
-}
-
-void kiblnd_tunables_init(void)
-{
-	default_tunables.lnd_version = 0;
-	default_tunables.lnd_peercredits_hiw = peer_credits_hiw,
-	default_tunables.lnd_map_on_demand = map_on_demand;
-	default_tunables.lnd_concurrent_sends = concurrent_sends;
-	default_tunables.lnd_fmr_pool_size = fmr_pool_size;
-	default_tunables.lnd_fmr_flush_trigger = fmr_flush_trigger;
-	default_tunables.lnd_fmr_cache = fmr_cache;
-	default_tunables.lnd_conns_per_peer = conns_per_peer;
-}

+ 0 - 6
drivers/staging/lustre/lnet/klnds/socklnd/Makefile

@@ -1,6 +0,0 @@
-subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/include
-subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/lustre/include
-
-obj-$(CONFIG_LNET) += ksocklnd.o
-
-ksocklnd-y := socklnd.o socklnd_cb.o socklnd_proto.o socklnd_modparams.o socklnd_lib.o

+ 0 - 2921
drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c

@@ -1,2921 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/klnds/socklnd/socklnd.c
- *
- * Author: Zach Brown <zab@zabbo.net>
- * Author: Peter J. Braam <braam@clusterfs.com>
- * Author: Phil Schwan <phil@clusterfs.com>
- * Author: Eric Barton <eric@bartonsoftware.com>
- */
-
-#include "socklnd.h"
-
-static struct lnet_lnd the_ksocklnd;
-struct ksock_nal_data ksocknal_data;
-
-static struct ksock_interface *
-ksocknal_ip2iface(struct lnet_ni *ni, __u32 ip)
-{
-	struct ksock_net *net = ni->ni_data;
-	int i;
-	struct ksock_interface *iface;
-
-	for (i = 0; i < net->ksnn_ninterfaces; i++) {
-		LASSERT(i < LNET_MAX_INTERFACES);
-		iface = &net->ksnn_interfaces[i];
-
-		if (iface->ksni_ipaddr == ip)
-			return iface;
-	}
-
-	return NULL;
-}
-
-static struct ksock_route *
-ksocknal_create_route(__u32 ipaddr, int port)
-{
-	struct ksock_route *route;
-
-	route = kzalloc(sizeof(*route), GFP_NOFS);
-	if (!route)
-		return NULL;
-
-	atomic_set(&route->ksnr_refcount, 1);
-	route->ksnr_peer = NULL;
-	route->ksnr_retry_interval = 0;	 /* OK to connect at any time */
-	route->ksnr_ipaddr = ipaddr;
-	route->ksnr_port = port;
-	route->ksnr_scheduled = 0;
-	route->ksnr_connecting = 0;
-	route->ksnr_connected = 0;
-	route->ksnr_deleted = 0;
-	route->ksnr_conn_count = 0;
-	route->ksnr_share_count = 0;
-
-	return route;
-}
-
-void
-ksocknal_destroy_route(struct ksock_route *route)
-{
-	LASSERT(!atomic_read(&route->ksnr_refcount));
-
-	if (route->ksnr_peer)
-		ksocknal_peer_decref(route->ksnr_peer);
-
-	kfree(route);
-}
-
-static int
-ksocknal_create_peer(struct ksock_peer **peerp, struct lnet_ni *ni,
-		     struct lnet_process_id id)
-{
-	int cpt = lnet_cpt_of_nid(id.nid);
-	struct ksock_net *net = ni->ni_data;
-	struct ksock_peer *peer;
-
-	LASSERT(id.nid != LNET_NID_ANY);
-	LASSERT(id.pid != LNET_PID_ANY);
-	LASSERT(!in_interrupt());
-
-	peer = kzalloc_cpt(sizeof(*peer), GFP_NOFS, cpt);
-	if (!peer)
-		return -ENOMEM;
-
-	peer->ksnp_ni = ni;
-	peer->ksnp_id = id;
-	atomic_set(&peer->ksnp_refcount, 1);   /* 1 ref for caller */
-	peer->ksnp_closing = 0;
-	peer->ksnp_accepting = 0;
-	peer->ksnp_proto = NULL;
-	peer->ksnp_last_alive = 0;
-	peer->ksnp_zc_next_cookie = SOCKNAL_KEEPALIVE_PING + 1;
-
-	INIT_LIST_HEAD(&peer->ksnp_conns);
-	INIT_LIST_HEAD(&peer->ksnp_routes);
-	INIT_LIST_HEAD(&peer->ksnp_tx_queue);
-	INIT_LIST_HEAD(&peer->ksnp_zc_req_list);
-	spin_lock_init(&peer->ksnp_lock);
-
-	spin_lock_bh(&net->ksnn_lock);
-
-	if (net->ksnn_shutdown) {
-		spin_unlock_bh(&net->ksnn_lock);
-
-		kfree(peer);
-		CERROR("Can't create peer: network shutdown\n");
-		return -ESHUTDOWN;
-	}
-
-	net->ksnn_npeers++;
-
-	spin_unlock_bh(&net->ksnn_lock);
-
-	*peerp = peer;
-	return 0;
-}
-
-void
-ksocknal_destroy_peer(struct ksock_peer *peer)
-{
-	struct ksock_net *net = peer->ksnp_ni->ni_data;
-
-	CDEBUG(D_NET, "peer %s %p deleted\n",
-	       libcfs_id2str(peer->ksnp_id), peer);
-
-	LASSERT(!atomic_read(&peer->ksnp_refcount));
-	LASSERT(!peer->ksnp_accepting);
-	LASSERT(list_empty(&peer->ksnp_conns));
-	LASSERT(list_empty(&peer->ksnp_routes));
-	LASSERT(list_empty(&peer->ksnp_tx_queue));
-	LASSERT(list_empty(&peer->ksnp_zc_req_list));
-
-	kfree(peer);
-
-	/*
-	 * NB a peer's connections and routes keep a reference on their peer
-	 * until they are destroyed, so we can be assured that _all_ state to
-	 * do with this peer has been cleaned up when its refcount drops to
-	 * zero.
-	 */
-	spin_lock_bh(&net->ksnn_lock);
-	net->ksnn_npeers--;
-	spin_unlock_bh(&net->ksnn_lock);
-}
-
-struct ksock_peer *
-ksocknal_find_peer_locked(struct lnet_ni *ni, struct lnet_process_id id)
-{
-	struct list_head *peer_list = ksocknal_nid2peerlist(id.nid);
-	struct ksock_peer *peer;
-
-	list_for_each_entry(peer, peer_list, ksnp_list) {
-		LASSERT(!peer->ksnp_closing);
-
-		if (peer->ksnp_ni != ni)
-			continue;
-
-		if (peer->ksnp_id.nid != id.nid ||
-		    peer->ksnp_id.pid != id.pid)
-			continue;
-
-		CDEBUG(D_NET, "got peer [%p] -> %s (%d)\n",
-		       peer, libcfs_id2str(id),
-		       atomic_read(&peer->ksnp_refcount));
-		return peer;
-	}
-	return NULL;
-}
-
-struct ksock_peer *
-ksocknal_find_peer(struct lnet_ni *ni, struct lnet_process_id id)
-{
-	struct ksock_peer *peer;
-
-	read_lock(&ksocknal_data.ksnd_global_lock);
-	peer = ksocknal_find_peer_locked(ni, id);
-	if (peer)			/* +1 ref for caller? */
-		ksocknal_peer_addref(peer);
-	read_unlock(&ksocknal_data.ksnd_global_lock);
-
-	return peer;
-}
-
-static void
-ksocknal_unlink_peer_locked(struct ksock_peer *peer)
-{
-	int i;
-	__u32 ip;
-	struct ksock_interface *iface;
-
-	for (i = 0; i < peer->ksnp_n_passive_ips; i++) {
-		LASSERT(i < LNET_MAX_INTERFACES);
-		ip = peer->ksnp_passive_ips[i];
-
-		iface = ksocknal_ip2iface(peer->ksnp_ni, ip);
-		/*
-		 * All IPs in peer->ksnp_passive_ips[] come from the
-		 * interface list, therefore the call must succeed.
-		 */
-		LASSERT(iface);
-
-		CDEBUG(D_NET, "peer=%p iface=%p ksni_nroutes=%d\n",
-		       peer, iface, iface->ksni_nroutes);
-		iface->ksni_npeers--;
-	}
-
-	LASSERT(list_empty(&peer->ksnp_conns));
-	LASSERT(list_empty(&peer->ksnp_routes));
-	LASSERT(!peer->ksnp_closing);
-	peer->ksnp_closing = 1;
-	list_del(&peer->ksnp_list);
-	/* lose peerlist's ref */
-	ksocknal_peer_decref(peer);
-}
-
-static int
-ksocknal_get_peer_info(struct lnet_ni *ni, int index,
-		       struct lnet_process_id *id, __u32 *myip, __u32 *peer_ip,
-		       int *port, int *conn_count, int *share_count)
-{
-	struct ksock_peer *peer;
-	struct list_head *ptmp;
-	struct ksock_route *route;
-	struct list_head *rtmp;
-	int i;
-	int j;
-	int rc = -ENOENT;
-
-	read_lock(&ksocknal_data.ksnd_global_lock);
-
-	for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) {
-		list_for_each(ptmp, &ksocknal_data.ksnd_peers[i]) {
-			peer = list_entry(ptmp, struct ksock_peer, ksnp_list);
-
-			if (peer->ksnp_ni != ni)
-				continue;
-
-			if (!peer->ksnp_n_passive_ips &&
-			    list_empty(&peer->ksnp_routes)) {
-				if (index-- > 0)
-					continue;
-
-				*id = peer->ksnp_id;
-				*myip = 0;
-				*peer_ip = 0;
-				*port = 0;
-				*conn_count = 0;
-				*share_count = 0;
-				rc = 0;
-				goto out;
-			}
-
-			for (j = 0; j < peer->ksnp_n_passive_ips; j++) {
-				if (index-- > 0)
-					continue;
-
-				*id = peer->ksnp_id;
-				*myip = peer->ksnp_passive_ips[j];
-				*peer_ip = 0;
-				*port = 0;
-				*conn_count = 0;
-				*share_count = 0;
-				rc = 0;
-				goto out;
-			}
-
-			list_for_each(rtmp, &peer->ksnp_routes) {
-				if (index-- > 0)
-					continue;
-
-				route = list_entry(rtmp, struct ksock_route,
-						   ksnr_list);
-
-				*id = peer->ksnp_id;
-				*myip = route->ksnr_myipaddr;
-				*peer_ip = route->ksnr_ipaddr;
-				*port = route->ksnr_port;
-				*conn_count = route->ksnr_conn_count;
-				*share_count = route->ksnr_share_count;
-				rc = 0;
-				goto out;
-			}
-		}
-	}
- out:
-	read_unlock(&ksocknal_data.ksnd_global_lock);
-	return rc;
-}
-
-static void
-ksocknal_associate_route_conn_locked(struct ksock_route *route,
-				     struct ksock_conn *conn)
-{
-	struct ksock_peer *peer = route->ksnr_peer;
-	int type = conn->ksnc_type;
-	struct ksock_interface *iface;
-
-	conn->ksnc_route = route;
-	ksocknal_route_addref(route);
-
-	if (route->ksnr_myipaddr != conn->ksnc_myipaddr) {
-		if (!route->ksnr_myipaddr) {
-			/* route wasn't bound locally yet (the initial route) */
-			CDEBUG(D_NET, "Binding %s %pI4h to %pI4h\n",
-			       libcfs_id2str(peer->ksnp_id),
-			       &route->ksnr_ipaddr,
-			       &conn->ksnc_myipaddr);
-		} else {
-			CDEBUG(D_NET, "Rebinding %s %pI4h from %pI4h to %pI4h\n",
-			       libcfs_id2str(peer->ksnp_id),
-			       &route->ksnr_ipaddr,
-			       &route->ksnr_myipaddr,
-			       &conn->ksnc_myipaddr);
-
-			iface = ksocknal_ip2iface(route->ksnr_peer->ksnp_ni,
-						  route->ksnr_myipaddr);
-			if (iface)
-				iface->ksni_nroutes--;
-		}
-		route->ksnr_myipaddr = conn->ksnc_myipaddr;
-		iface = ksocknal_ip2iface(route->ksnr_peer->ksnp_ni,
-					  route->ksnr_myipaddr);
-		if (iface)
-			iface->ksni_nroutes++;
-	}
-
-	route->ksnr_connected |= (1 << type);
-	route->ksnr_conn_count++;
-
-	/*
-	 * Successful connection => further attempts can
-	 * proceed immediately
-	 */
-	route->ksnr_retry_interval = 0;
-}
-
-static void
-ksocknal_add_route_locked(struct ksock_peer *peer, struct ksock_route *route)
-{
-	struct list_head *tmp;
-	struct ksock_conn *conn;
-	struct ksock_route *route2;
-
-	LASSERT(!peer->ksnp_closing);
-	LASSERT(!route->ksnr_peer);
-	LASSERT(!route->ksnr_scheduled);
-	LASSERT(!route->ksnr_connecting);
-	LASSERT(!route->ksnr_connected);
-
-	/* LASSERT(unique) */
-	list_for_each(tmp, &peer->ksnp_routes) {
-		route2 = list_entry(tmp, struct ksock_route, ksnr_list);
-
-		if (route2->ksnr_ipaddr == route->ksnr_ipaddr) {
-			CERROR("Duplicate route %s %pI4h\n",
-			       libcfs_id2str(peer->ksnp_id),
-			       &route->ksnr_ipaddr);
-			LBUG();
-		}
-	}
-
-	route->ksnr_peer = peer;
-	ksocknal_peer_addref(peer);
-	/* peer's routelist takes over my ref on 'route' */
-	list_add_tail(&route->ksnr_list, &peer->ksnp_routes);
-
-	list_for_each(tmp, &peer->ksnp_conns) {
-		conn = list_entry(tmp, struct ksock_conn, ksnc_list);
-
-		if (conn->ksnc_ipaddr != route->ksnr_ipaddr)
-			continue;
-
-		ksocknal_associate_route_conn_locked(route, conn);
-		/* keep going (typed routes) */
-	}
-}
-
-static void
-ksocknal_del_route_locked(struct ksock_route *route)
-{
-	struct ksock_peer *peer = route->ksnr_peer;
-	struct ksock_interface *iface;
-	struct ksock_conn *conn;
-	struct list_head *ctmp;
-	struct list_head *cnxt;
-
-	LASSERT(!route->ksnr_deleted);
-
-	/* Close associated conns */
-	list_for_each_safe(ctmp, cnxt, &peer->ksnp_conns) {
-		conn = list_entry(ctmp, struct ksock_conn, ksnc_list);
-
-		if (conn->ksnc_route != route)
-			continue;
-
-		ksocknal_close_conn_locked(conn, 0);
-	}
-
-	if (route->ksnr_myipaddr) {
-		iface = ksocknal_ip2iface(route->ksnr_peer->ksnp_ni,
-					  route->ksnr_myipaddr);
-		if (iface)
-			iface->ksni_nroutes--;
-	}
-
-	route->ksnr_deleted = 1;
-	list_del(&route->ksnr_list);
-	ksocknal_route_decref(route);	     /* drop peer's ref */
-
-	if (list_empty(&peer->ksnp_routes) &&
-	    list_empty(&peer->ksnp_conns)) {
-		/*
-		 * I've just removed the last route to a peer with no active
-		 * connections
-		 */
-		ksocknal_unlink_peer_locked(peer);
-	}
-}
-
-int
-ksocknal_add_peer(struct lnet_ni *ni, struct lnet_process_id id, __u32 ipaddr,
-		  int port)
-{
-	struct ksock_peer *peer;
-	struct ksock_peer *peer2;
-	struct ksock_route *route;
-	struct ksock_route *route2;
-	int rc;
-
-	if (id.nid == LNET_NID_ANY ||
-	    id.pid == LNET_PID_ANY)
-		return -EINVAL;
-
-	/* Have a brand new peer ready... */
-	rc = ksocknal_create_peer(&peer, ni, id);
-	if (rc)
-		return rc;
-
-	route = ksocknal_create_route(ipaddr, port);
-	if (!route) {
-		ksocknal_peer_decref(peer);
-		return -ENOMEM;
-	}
-
-	write_lock_bh(&ksocknal_data.ksnd_global_lock);
-
-	/* always called with a ref on ni, so shutdown can't have started */
-	LASSERT(!((struct ksock_net *)ni->ni_data)->ksnn_shutdown);
-
-	peer2 = ksocknal_find_peer_locked(ni, id);
-	if (peer2) {
-		ksocknal_peer_decref(peer);
-		peer = peer2;
-	} else {
-		/* peer table takes my ref on peer */
-		list_add_tail(&peer->ksnp_list,
-			      ksocknal_nid2peerlist(id.nid));
-	}
-
-	list_for_each_entry(route2, &peer->ksnp_routes, ksnr_list) {
-		if (route2->ksnr_ipaddr == ipaddr) {
-			/* Route already exists, use the old one */
-			ksocknal_route_decref(route);
-			route2->ksnr_share_count++;
-			goto out;
-		}
-	}
-	/* Route doesn't already exist, add the new one */
-	ksocknal_add_route_locked(peer, route);
-	route->ksnr_share_count++;
-out:
-	write_unlock_bh(&ksocknal_data.ksnd_global_lock);
-
-	return 0;
-}
-
-static void
-ksocknal_del_peer_locked(struct ksock_peer *peer, __u32 ip)
-{
-	struct ksock_conn *conn;
-	struct ksock_route *route;
-	struct list_head *tmp;
-	struct list_head *nxt;
-	int nshared;
-
-	LASSERT(!peer->ksnp_closing);
-
-	/* Extra ref prevents peer disappearing until I'm done with it */
-	ksocknal_peer_addref(peer);
-
-	list_for_each_safe(tmp, nxt, &peer->ksnp_routes) {
-		route = list_entry(tmp, struct ksock_route, ksnr_list);
-
-		/* no match */
-		if (!(!ip || route->ksnr_ipaddr == ip))
-			continue;
-
-		route->ksnr_share_count = 0;
-		/* This deletes associated conns too */
-		ksocknal_del_route_locked(route);
-	}
-
-	nshared = 0;
-	list_for_each_safe(tmp, nxt, &peer->ksnp_routes) {
-		route = list_entry(tmp, struct ksock_route, ksnr_list);
-		nshared += route->ksnr_share_count;
-	}
-
-	if (!nshared) {
-		/*
-		 * remove everything else if there are no explicit entries
-		 * left
-		 */
-		list_for_each_safe(tmp, nxt, &peer->ksnp_routes) {
-			route = list_entry(tmp, struct ksock_route, ksnr_list);
-
-			/* we should only be removing auto-entries */
-			LASSERT(!route->ksnr_share_count);
-			ksocknal_del_route_locked(route);
-		}
-
-		list_for_each_safe(tmp, nxt, &peer->ksnp_conns) {
-			conn = list_entry(tmp, struct ksock_conn, ksnc_list);
-
-			ksocknal_close_conn_locked(conn, 0);
-		}
-	}
-
-	ksocknal_peer_decref(peer);
-	/* NB peer unlinks itself when last conn/route is removed */
-}
-
-static int
-ksocknal_del_peer(struct lnet_ni *ni, struct lnet_process_id id, __u32 ip)
-{
-	LIST_HEAD(zombies);
-	struct list_head *ptmp;
-	struct list_head *pnxt;
-	struct ksock_peer *peer;
-	int lo;
-	int hi;
-	int i;
-	int rc = -ENOENT;
-
-	write_lock_bh(&ksocknal_data.ksnd_global_lock);
-
-	if (id.nid != LNET_NID_ANY) {
-		lo = (int)(ksocknal_nid2peerlist(id.nid) - ksocknal_data.ksnd_peers);
-		hi = (int)(ksocknal_nid2peerlist(id.nid) - ksocknal_data.ksnd_peers);
-	} else {
-		lo = 0;
-		hi = ksocknal_data.ksnd_peer_hash_size - 1;
-	}
-
-	for (i = lo; i <= hi; i++) {
-		list_for_each_safe(ptmp, pnxt, &ksocknal_data.ksnd_peers[i]) {
-			peer = list_entry(ptmp, struct ksock_peer, ksnp_list);
-
-			if (peer->ksnp_ni != ni)
-				continue;
-
-			if (!((id.nid == LNET_NID_ANY || peer->ksnp_id.nid == id.nid) &&
-			      (id.pid == LNET_PID_ANY || peer->ksnp_id.pid == id.pid)))
-				continue;
-
-			ksocknal_peer_addref(peer);     /* a ref for me... */
-
-			ksocknal_del_peer_locked(peer, ip);
-
-			if (peer->ksnp_closing &&
-			    !list_empty(&peer->ksnp_tx_queue)) {
-				LASSERT(list_empty(&peer->ksnp_conns));
-				LASSERT(list_empty(&peer->ksnp_routes));
-
-				list_splice_init(&peer->ksnp_tx_queue,
-						 &zombies);
-			}
-
-			ksocknal_peer_decref(peer);     /* ...till here */
-
-			rc = 0;		 /* matched! */
-		}
-	}
-
-	write_unlock_bh(&ksocknal_data.ksnd_global_lock);
-
-	ksocknal_txlist_done(ni, &zombies, 1);
-
-	return rc;
-}
-
-static struct ksock_conn *
-ksocknal_get_conn_by_idx(struct lnet_ni *ni, int index)
-{
-	struct ksock_peer *peer;
-	struct list_head *ptmp;
-	struct ksock_conn *conn;
-	struct list_head *ctmp;
-	int i;
-
-	read_lock(&ksocknal_data.ksnd_global_lock);
-
-	for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) {
-		list_for_each(ptmp, &ksocknal_data.ksnd_peers[i]) {
-			peer = list_entry(ptmp, struct ksock_peer, ksnp_list);
-
-			LASSERT(!peer->ksnp_closing);
-
-			if (peer->ksnp_ni != ni)
-				continue;
-
-			list_for_each(ctmp, &peer->ksnp_conns) {
-				if (index-- > 0)
-					continue;
-
-				conn = list_entry(ctmp, struct ksock_conn,
-						  ksnc_list);
-				ksocknal_conn_addref(conn);
-				read_unlock(&ksocknal_data.ksnd_global_lock);
-				return conn;
-			}
-		}
-	}
-
-	read_unlock(&ksocknal_data.ksnd_global_lock);
-	return NULL;
-}
-
-static struct ksock_sched *
-ksocknal_choose_scheduler_locked(unsigned int cpt)
-{
-	struct ksock_sched_info	*info = ksocknal_data.ksnd_sched_info[cpt];
-	struct ksock_sched *sched;
-	int i;
-
-	LASSERT(info->ksi_nthreads > 0);
-
-	sched = &info->ksi_scheds[0];
-	/*
-	 * NB: it's safe so far, but info->ksi_nthreads could be changed
-	 * at runtime when we have dynamic LNet configuration, then we
-	 * need to take care of this.
-	 */
-	for (i = 1; i < info->ksi_nthreads; i++) {
-		if (sched->kss_nconns > info->ksi_scheds[i].kss_nconns)
-			sched = &info->ksi_scheds[i];
-	}
-
-	return sched;
-}
-
-static int
-ksocknal_local_ipvec(struct lnet_ni *ni, __u32 *ipaddrs)
-{
-	struct ksock_net *net = ni->ni_data;
-	int i;
-	int nip;
-
-	read_lock(&ksocknal_data.ksnd_global_lock);
-
-	nip = net->ksnn_ninterfaces;
-	LASSERT(nip <= LNET_MAX_INTERFACES);
-
-	/*
-	 * Only offer interfaces for additional connections if I have
-	 * more than one.
-	 */
-	if (nip < 2) {
-		read_unlock(&ksocknal_data.ksnd_global_lock);
-		return 0;
-	}
-
-	for (i = 0; i < nip; i++) {
-		ipaddrs[i] = net->ksnn_interfaces[i].ksni_ipaddr;
-		LASSERT(ipaddrs[i]);
-	}
-
-	read_unlock(&ksocknal_data.ksnd_global_lock);
-	return nip;
-}
-
-static int
-ksocknal_match_peerip(struct ksock_interface *iface, __u32 *ips, int nips)
-{
-	int best_netmatch = 0;
-	int best_xor      = 0;
-	int best	  = -1;
-	int this_xor;
-	int this_netmatch;
-	int i;
-
-	for (i = 0; i < nips; i++) {
-		if (!ips[i])
-			continue;
-
-		this_xor = ips[i] ^ iface->ksni_ipaddr;
-		this_netmatch = !(this_xor & iface->ksni_netmask) ? 1 : 0;
-
-		if (!(best < 0 ||
-		      best_netmatch < this_netmatch ||
-		      (best_netmatch == this_netmatch &&
-		       best_xor > this_xor)))
-			continue;
-
-		best = i;
-		best_netmatch = this_netmatch;
-		best_xor = this_xor;
-	}
-
-	LASSERT(best >= 0);
-	return best;
-}
-
-static int
-ksocknal_select_ips(struct ksock_peer *peer, __u32 *peerips, int n_peerips)
-{
-	rwlock_t *global_lock = &ksocknal_data.ksnd_global_lock;
-	struct ksock_net *net = peer->ksnp_ni->ni_data;
-	struct ksock_interface *iface;
-	struct ksock_interface *best_iface;
-	int n_ips;
-	int i;
-	int j;
-	int k;
-	__u32 ip;
-	__u32 xor;
-	int this_netmatch;
-	int best_netmatch;
-	int best_npeers;
-
-	/*
-	 * CAVEAT EMPTOR: We do all our interface matching with an
-	 * exclusive hold of global lock at IRQ priority.  We're only
-	 * expecting to be dealing with small numbers of interfaces, so the
-	 * O(n**3)-ness shouldn't matter
-	 */
-	/*
-	 * Also note that I'm not going to return more than n_peerips
-	 * interfaces, even if I have more myself
-	 */
-	write_lock_bh(global_lock);
-
-	LASSERT(n_peerips <= LNET_MAX_INTERFACES);
-	LASSERT(net->ksnn_ninterfaces <= LNET_MAX_INTERFACES);
-
-	/*
-	 * Only match interfaces for additional connections
-	 * if I have > 1 interface
-	 */
-	n_ips = (net->ksnn_ninterfaces < 2) ? 0 :
-		min(n_peerips, net->ksnn_ninterfaces);
-
-	for (i = 0; peer->ksnp_n_passive_ips < n_ips; i++) {
-		/*	      ^ yes really... */
-
-		/*
-		 * If we have any new interfaces, first tick off all the
-		 * peer IPs that match old interfaces, then choose new
-		 * interfaces to match the remaining peer IPS.
-		 * We don't forget interfaces we've stopped using; we might
-		 * start using them again...
-		 */
-		if (i < peer->ksnp_n_passive_ips) {
-			/* Old interface. */
-			ip = peer->ksnp_passive_ips[i];
-			best_iface = ksocknal_ip2iface(peer->ksnp_ni, ip);
-
-			/* peer passive ips are kept up to date */
-			LASSERT(best_iface);
-		} else {
-			/* choose a new interface */
-			LASSERT(i == peer->ksnp_n_passive_ips);
-
-			best_iface = NULL;
-			best_netmatch = 0;
-			best_npeers = 0;
-
-			for (j = 0; j < net->ksnn_ninterfaces; j++) {
-				iface = &net->ksnn_interfaces[j];
-				ip = iface->ksni_ipaddr;
-
-				for (k = 0; k < peer->ksnp_n_passive_ips; k++)
-					if (peer->ksnp_passive_ips[k] == ip)
-						break;
-
-				if (k < peer->ksnp_n_passive_ips) /* using it already */
-					continue;
-
-				k = ksocknal_match_peerip(iface, peerips,
-							  n_peerips);
-				xor = ip ^ peerips[k];
-				this_netmatch = !(xor & iface->ksni_netmask) ? 1 : 0;
-
-				if (!(!best_iface ||
-				      best_netmatch < this_netmatch ||
-				      (best_netmatch == this_netmatch &&
-				       best_npeers > iface->ksni_npeers)))
-					continue;
-
-				best_iface = iface;
-				best_netmatch = this_netmatch;
-				best_npeers = iface->ksni_npeers;
-			}
-
-			LASSERT(best_iface);
-
-			best_iface->ksni_npeers++;
-			ip = best_iface->ksni_ipaddr;
-			peer->ksnp_passive_ips[i] = ip;
-			peer->ksnp_n_passive_ips = i + 1;
-		}
-
-		/* mark the best matching peer IP used */
-		j = ksocknal_match_peerip(best_iface, peerips, n_peerips);
-		peerips[j] = 0;
-	}
-
-	/* Overwrite input peer IP addresses */
-	memcpy(peerips, peer->ksnp_passive_ips, n_ips * sizeof(*peerips));
-
-	write_unlock_bh(global_lock);
-
-	return n_ips;
-}
-
-static void
-ksocknal_create_routes(struct ksock_peer *peer, int port,
-		       __u32 *peer_ipaddrs, int npeer_ipaddrs)
-{
-	struct ksock_route *newroute = NULL;
-	rwlock_t *global_lock = &ksocknal_data.ksnd_global_lock;
-	struct lnet_ni *ni = peer->ksnp_ni;
-	struct ksock_net *net = ni->ni_data;
-	struct list_head *rtmp;
-	struct ksock_route *route;
-	struct ksock_interface *iface;
-	struct ksock_interface *best_iface;
-	int best_netmatch;
-	int this_netmatch;
-	int best_nroutes;
-	int i;
-	int j;
-
-	/*
-	 * CAVEAT EMPTOR: We do all our interface matching with an
-	 * exclusive hold of global lock at IRQ priority.  We're only
-	 * expecting to be dealing with small numbers of interfaces, so the
-	 * O(n**3)-ness here shouldn't matter
-	 */
-	write_lock_bh(global_lock);
-
-	if (net->ksnn_ninterfaces < 2) {
-		/*
-		 * Only create additional connections
-		 * if I have > 1 interface
-		 */
-		write_unlock_bh(global_lock);
-		return;
-	}
-
-	LASSERT(npeer_ipaddrs <= LNET_MAX_INTERFACES);
-
-	for (i = 0; i < npeer_ipaddrs; i++) {
-		if (newroute) {
-			newroute->ksnr_ipaddr = peer_ipaddrs[i];
-		} else {
-			write_unlock_bh(global_lock);
-
-			newroute = ksocknal_create_route(peer_ipaddrs[i], port);
-			if (!newroute)
-				return;
-
-			write_lock_bh(global_lock);
-		}
-
-		if (peer->ksnp_closing) {
-			/* peer got closed under me */
-			break;
-		}
-
-		/* Already got a route? */
-		route = NULL;
-		list_for_each(rtmp, &peer->ksnp_routes) {
-			route = list_entry(rtmp, struct ksock_route, ksnr_list);
-
-			if (route->ksnr_ipaddr == newroute->ksnr_ipaddr)
-				break;
-
-			route = NULL;
-		}
-		if (route)
-			continue;
-
-		best_iface = NULL;
-		best_nroutes = 0;
-		best_netmatch = 0;
-
-		LASSERT(net->ksnn_ninterfaces <= LNET_MAX_INTERFACES);
-
-		/* Select interface to connect from */
-		for (j = 0; j < net->ksnn_ninterfaces; j++) {
-			iface = &net->ksnn_interfaces[j];
-
-			/* Using this interface already? */
-			list_for_each(rtmp, &peer->ksnp_routes) {
-				route = list_entry(rtmp, struct ksock_route,
-						   ksnr_list);
-
-				if (route->ksnr_myipaddr == iface->ksni_ipaddr)
-					break;
-
-				route = NULL;
-			}
-			if (route)
-				continue;
-
-			this_netmatch = (!((iface->ksni_ipaddr ^
-					   newroute->ksnr_ipaddr) &
-					   iface->ksni_netmask)) ? 1 : 0;
-
-			if (!(!best_iface ||
-			      best_netmatch < this_netmatch ||
-			      (best_netmatch == this_netmatch &&
-			       best_nroutes > iface->ksni_nroutes)))
-				continue;
-
-			best_iface = iface;
-			best_netmatch = this_netmatch;
-			best_nroutes = iface->ksni_nroutes;
-		}
-
-		if (!best_iface)
-			continue;
-
-		newroute->ksnr_myipaddr = best_iface->ksni_ipaddr;
-		best_iface->ksni_nroutes++;
-
-		ksocknal_add_route_locked(peer, newroute);
-		newroute = NULL;
-	}
-
-	write_unlock_bh(global_lock);
-	if (newroute)
-		ksocknal_route_decref(newroute);
-}
-
-int
-ksocknal_accept(struct lnet_ni *ni, struct socket *sock)
-{
-	struct ksock_connreq *cr;
-	int rc;
-	__u32 peer_ip;
-	int peer_port;
-
-	rc = lnet_sock_getaddr(sock, 1, &peer_ip, &peer_port);
-	LASSERT(!rc);		      /* we succeeded before */
-
-	cr = kzalloc(sizeof(*cr), GFP_NOFS);
-	if (!cr) {
-		LCONSOLE_ERROR_MSG(0x12f, "Dropping connection request from %pI4h: memory exhausted\n",
-				   &peer_ip);
-		return -ENOMEM;
-	}
-
-	lnet_ni_addref(ni);
-	cr->ksncr_ni   = ni;
-	cr->ksncr_sock = sock;
-
-	spin_lock_bh(&ksocknal_data.ksnd_connd_lock);
-
-	list_add_tail(&cr->ksncr_list, &ksocknal_data.ksnd_connd_connreqs);
-	wake_up(&ksocknal_data.ksnd_connd_waitq);
-
-	spin_unlock_bh(&ksocknal_data.ksnd_connd_lock);
-	return 0;
-}
-
-static int
-ksocknal_connecting(struct ksock_peer *peer, __u32 ipaddr)
-{
-	struct ksock_route *route;
-
-	list_for_each_entry(route, &peer->ksnp_routes, ksnr_list) {
-		if (route->ksnr_ipaddr == ipaddr)
-			return route->ksnr_connecting;
-	}
-	return 0;
-}
-
-int
-ksocknal_create_conn(struct lnet_ni *ni, struct ksock_route *route,
-		     struct socket *sock, int type)
-{
-	rwlock_t *global_lock = &ksocknal_data.ksnd_global_lock;
-	LIST_HEAD(zombies);
-	struct lnet_process_id peerid;
-	struct list_head *tmp;
-	__u64 incarnation;
-	struct ksock_conn *conn;
-	struct ksock_conn *conn2;
-	struct ksock_peer *peer = NULL;
-	struct ksock_peer *peer2;
-	struct ksock_sched *sched;
-	struct ksock_hello_msg *hello;
-	int cpt;
-	struct ksock_tx *tx;
-	struct ksock_tx *txtmp;
-	int rc;
-	int active;
-	char *warn = NULL;
-
-	active = !!route;
-
-	LASSERT(active == (type != SOCKLND_CONN_NONE));
-
-	conn = kzalloc(sizeof(*conn), GFP_NOFS);
-	if (!conn) {
-		rc = -ENOMEM;
-		goto failed_0;
-	}
-
-	conn->ksnc_peer = NULL;
-	conn->ksnc_route = NULL;
-	conn->ksnc_sock = sock;
-	/*
-	 * 2 ref, 1 for conn, another extra ref prevents socket
-	 * being closed before establishment of connection
-	 */
-	atomic_set(&conn->ksnc_sock_refcount, 2);
-	conn->ksnc_type = type;
-	ksocknal_lib_save_callback(sock, conn);
-	atomic_set(&conn->ksnc_conn_refcount, 1); /* 1 ref for me */
-
-	conn->ksnc_rx_ready = 0;
-	conn->ksnc_rx_scheduled = 0;
-
-	INIT_LIST_HEAD(&conn->ksnc_tx_queue);
-	conn->ksnc_tx_ready = 0;
-	conn->ksnc_tx_scheduled = 0;
-	conn->ksnc_tx_carrier = NULL;
-	atomic_set(&conn->ksnc_tx_nob, 0);
-
-	hello = kvzalloc(offsetof(struct ksock_hello_msg,
-				  kshm_ips[LNET_MAX_INTERFACES]),
-			 GFP_KERNEL);
-	if (!hello) {
-		rc = -ENOMEM;
-		goto failed_1;
-	}
-
-	/* stash conn's local and remote addrs */
-	rc = ksocknal_lib_get_conn_addrs(conn);
-	if (rc)
-		goto failed_1;
-
-	/*
-	 * Find out/confirm peer's NID and connection type and get the
-	 * vector of interfaces she's willing to let me connect to.
-	 * Passive connections use the listener timeout since the peer sends
-	 * eagerly
-	 */
-	if (active) {
-		peer = route->ksnr_peer;
-		LASSERT(ni == peer->ksnp_ni);
-
-		/* Active connection sends HELLO eagerly */
-		hello->kshm_nips = ksocknal_local_ipvec(ni, hello->kshm_ips);
-		peerid = peer->ksnp_id;
-
-		write_lock_bh(global_lock);
-		conn->ksnc_proto = peer->ksnp_proto;
-		write_unlock_bh(global_lock);
-
-		if (!conn->ksnc_proto) {
-			conn->ksnc_proto = &ksocknal_protocol_v3x;
-#if SOCKNAL_VERSION_DEBUG
-			if (*ksocknal_tunables.ksnd_protocol == 2)
-				conn->ksnc_proto = &ksocknal_protocol_v2x;
-			else if (*ksocknal_tunables.ksnd_protocol == 1)
-				conn->ksnc_proto = &ksocknal_protocol_v1x;
-#endif
-		}
-
-		rc = ksocknal_send_hello(ni, conn, peerid.nid, hello);
-		if (rc)
-			goto failed_1;
-	} else {
-		peerid.nid = LNET_NID_ANY;
-		peerid.pid = LNET_PID_ANY;
-
-		/* Passive, get protocol from peer */
-		conn->ksnc_proto = NULL;
-	}
-
-	rc = ksocknal_recv_hello(ni, conn, hello, &peerid, &incarnation);
-	if (rc < 0)
-		goto failed_1;
-
-	LASSERT(!rc || active);
-	LASSERT(conn->ksnc_proto);
-	LASSERT(peerid.nid != LNET_NID_ANY);
-
-	cpt = lnet_cpt_of_nid(peerid.nid);
-
-	if (active) {
-		ksocknal_peer_addref(peer);
-		write_lock_bh(global_lock);
-	} else {
-		rc = ksocknal_create_peer(&peer, ni, peerid);
-		if (rc)
-			goto failed_1;
-
-		write_lock_bh(global_lock);
-
-		/* called with a ref on ni, so shutdown can't have started */
-		LASSERT(!((struct ksock_net *)ni->ni_data)->ksnn_shutdown);
-
-		peer2 = ksocknal_find_peer_locked(ni, peerid);
-		if (!peer2) {
-			/*
-			 * NB this puts an "empty" peer in the peer
-			 * table (which takes my ref)
-			 */
-			list_add_tail(&peer->ksnp_list,
-				      ksocknal_nid2peerlist(peerid.nid));
-		} else {
-			ksocknal_peer_decref(peer);
-			peer = peer2;
-		}
-
-		/* +1 ref for me */
-		ksocknal_peer_addref(peer);
-		peer->ksnp_accepting++;
-
-		/*
-		 * Am I already connecting to this guy?  Resolve in
-		 * favour of higher NID...
-		 */
-		if (peerid.nid < ni->ni_nid &&
-		    ksocknal_connecting(peer, conn->ksnc_ipaddr)) {
-			rc = EALREADY;
-			warn = "connection race resolution";
-			goto failed_2;
-		}
-	}
-
-	if (peer->ksnp_closing ||
-	    (active && route->ksnr_deleted)) {
-		/* peer/route got closed under me */
-		rc = -ESTALE;
-		warn = "peer/route removed";
-		goto failed_2;
-	}
-
-	if (!peer->ksnp_proto) {
-		/*
-		 * Never connected before.
-		 * NB recv_hello may have returned EPROTO to signal my peer
-		 * wants a different protocol than the one I asked for.
-		 */
-		LASSERT(list_empty(&peer->ksnp_conns));
-
-		peer->ksnp_proto = conn->ksnc_proto;
-		peer->ksnp_incarnation = incarnation;
-	}
-
-	if (peer->ksnp_proto != conn->ksnc_proto ||
-	    peer->ksnp_incarnation != incarnation) {
-		/* Peer rebooted or I've got the wrong protocol version */
-		ksocknal_close_peer_conns_locked(peer, 0, 0);
-
-		peer->ksnp_proto = NULL;
-		rc = ESTALE;
-		warn = peer->ksnp_incarnation != incarnation ?
-		       "peer rebooted" :
-		       "wrong proto version";
-		goto failed_2;
-	}
-
-	switch (rc) {
-	default:
-		LBUG();
-	case 0:
-		break;
-	case EALREADY:
-		warn = "lost conn race";
-		goto failed_2;
-	case EPROTO:
-		warn = "retry with different protocol version";
-		goto failed_2;
-	}
-
-	/*
-	 * Refuse to duplicate an existing connection, unless this is a
-	 * loopback connection
-	 */
-	if (conn->ksnc_ipaddr != conn->ksnc_myipaddr) {
-		list_for_each(tmp, &peer->ksnp_conns) {
-			conn2 = list_entry(tmp, struct ksock_conn, ksnc_list);
-
-			if (conn2->ksnc_ipaddr != conn->ksnc_ipaddr ||
-			    conn2->ksnc_myipaddr != conn->ksnc_myipaddr ||
-			    conn2->ksnc_type != conn->ksnc_type)
-				continue;
-
-			/*
-			 * Reply on a passive connection attempt so the peer
-			 * realises we're connected.
-			 */
-			LASSERT(!rc);
-			if (!active)
-				rc = EALREADY;
-
-			warn = "duplicate";
-			goto failed_2;
-		}
-	}
-
-	/*
-	 * If the connection created by this route didn't bind to the IP
-	 * address the route connected to, the connection/route matching
-	 * code below probably isn't going to work.
-	 */
-	if (active &&
-	    route->ksnr_ipaddr != conn->ksnc_ipaddr) {
-		CERROR("Route %s %pI4h connected to %pI4h\n",
-		       libcfs_id2str(peer->ksnp_id),
-		       &route->ksnr_ipaddr,
-		       &conn->ksnc_ipaddr);
-	}
-
-	/*
-	 * Search for a route corresponding to the new connection and
-	 * create an association.  This allows incoming connections created
-	 * by routes in my peer to match my own route entries so I don't
-	 * continually create duplicate routes.
-	 */
-	list_for_each(tmp, &peer->ksnp_routes) {
-		route = list_entry(tmp, struct ksock_route, ksnr_list);
-
-		if (route->ksnr_ipaddr != conn->ksnc_ipaddr)
-			continue;
-
-		ksocknal_associate_route_conn_locked(route, conn);
-		break;
-	}
-
-	conn->ksnc_peer = peer;		 /* conn takes my ref on peer */
-	peer->ksnp_last_alive = jiffies;
-	peer->ksnp_send_keepalive = 0;
-	peer->ksnp_error = 0;
-
-	sched = ksocknal_choose_scheduler_locked(cpt);
-	sched->kss_nconns++;
-	conn->ksnc_scheduler = sched;
-
-	conn->ksnc_tx_last_post = jiffies;
-	/* Set the deadline for the outgoing HELLO to drain */
-	conn->ksnc_tx_bufnob = sock->sk->sk_wmem_queued;
-	conn->ksnc_tx_deadline = jiffies + *ksocknal_tunables.ksnd_timeout * HZ;
-	mb();   /* order with adding to peer's conn list */
-
-	list_add(&conn->ksnc_list, &peer->ksnp_conns);
-	ksocknal_conn_addref(conn);
-
-	ksocknal_new_packet(conn, 0);
-
-	conn->ksnc_zc_capable = ksocknal_lib_zc_capable(conn);
-
-	/* Take packets blocking for this connection. */
-	list_for_each_entry_safe(tx, txtmp, &peer->ksnp_tx_queue, tx_list) {
-		int match = conn->ksnc_proto->pro_match_tx(conn, tx,
-							   tx->tx_nonblk);
-
-		if (match == SOCKNAL_MATCH_NO)
-			continue;
-
-		list_del(&tx->tx_list);
-		ksocknal_queue_tx_locked(tx, conn);
-	}
-
-	write_unlock_bh(global_lock);
-
-	/*
-	 * We've now got a new connection.  Any errors from here on are just
-	 * like "normal" comms errors and we close the connection normally.
-	 * NB (a) we still have to send the reply HELLO for passive
-	 *	connections,
-	 *    (b) normal I/O on the conn is blocked until I setup and call the
-	 *	socket callbacks.
-	 */
-	CDEBUG(D_NET, "New conn %s p %d.x %pI4h -> %pI4h/%d incarnation:%lld sched[%d:%d]\n",
-	       libcfs_id2str(peerid), conn->ksnc_proto->pro_version,
-	       &conn->ksnc_myipaddr, &conn->ksnc_ipaddr,
-	       conn->ksnc_port, incarnation, cpt,
-	       (int)(sched - &sched->kss_info->ksi_scheds[0]));
-
-	if (active) {
-		/* additional routes after interface exchange? */
-		ksocknal_create_routes(peer, conn->ksnc_port,
-				       hello->kshm_ips, hello->kshm_nips);
-	} else {
-		hello->kshm_nips = ksocknal_select_ips(peer, hello->kshm_ips,
-						       hello->kshm_nips);
-		rc = ksocknal_send_hello(ni, conn, peerid.nid, hello);
-	}
-
-	kvfree(hello);
-
-	/*
-	 * setup the socket AFTER I've received hello (it disables
-	 * SO_LINGER).  I might call back to the acceptor who may want
-	 * to send a protocol version response and then close the
-	 * socket; this ensures the socket only tears down after the
-	 * response has been sent.
-	 */
-	if (!rc)
-		rc = ksocknal_lib_setup_sock(sock);
-
-	write_lock_bh(global_lock);
-
-	/* NB my callbacks block while I hold ksnd_global_lock */
-	ksocknal_lib_set_callback(sock, conn);
-
-	if (!active)
-		peer->ksnp_accepting--;
-
-	write_unlock_bh(global_lock);
-
-	if (rc) {
-		write_lock_bh(global_lock);
-		if (!conn->ksnc_closing) {
-			/* could be closed by another thread */
-			ksocknal_close_conn_locked(conn, rc);
-		}
-		write_unlock_bh(global_lock);
-	} else if (!ksocknal_connsock_addref(conn)) {
-		/* Allow I/O to proceed. */
-		ksocknal_read_callback(conn);
-		ksocknal_write_callback(conn);
-		ksocknal_connsock_decref(conn);
-	}
-
-	ksocknal_connsock_decref(conn);
-	ksocknal_conn_decref(conn);
-	return rc;
-
- failed_2:
-	if (!peer->ksnp_closing &&
-	    list_empty(&peer->ksnp_conns) &&
-	    list_empty(&peer->ksnp_routes)) {
-		list_add(&zombies, &peer->ksnp_tx_queue);
-		list_del_init(&peer->ksnp_tx_queue);
-		ksocknal_unlink_peer_locked(peer);
-	}
-
-	write_unlock_bh(global_lock);
-
-	if (warn) {
-		if (rc < 0)
-			CERROR("Not creating conn %s type %d: %s\n",
-			       libcfs_id2str(peerid), conn->ksnc_type, warn);
-		else
-			CDEBUG(D_NET, "Not creating conn %s type %d: %s\n",
-			       libcfs_id2str(peerid), conn->ksnc_type, warn);
-	}
-
-	if (!active) {
-		if (rc > 0) {
-			/*
-			 * Request retry by replying with CONN_NONE
-			 * ksnc_proto has been set already
-			 */
-			conn->ksnc_type = SOCKLND_CONN_NONE;
-			hello->kshm_nips = 0;
-			ksocknal_send_hello(ni, conn, peerid.nid, hello);
-		}
-
-		write_lock_bh(global_lock);
-		peer->ksnp_accepting--;
-		write_unlock_bh(global_lock);
-	}
-
-	ksocknal_txlist_done(ni, &zombies, 1);
-	ksocknal_peer_decref(peer);
-
-failed_1:
-	kvfree(hello);
-
-	kfree(conn);
-
-failed_0:
-	sock_release(sock);
-	return rc;
-}
-
-void
-ksocknal_close_conn_locked(struct ksock_conn *conn, int error)
-{
-	/*
-	 * This just does the immmediate housekeeping, and queues the
-	 * connection for the reaper to terminate.
-	 * Caller holds ksnd_global_lock exclusively in irq context
-	 */
-	struct ksock_peer *peer = conn->ksnc_peer;
-	struct ksock_route *route;
-	struct ksock_conn *conn2;
-	struct list_head *tmp;
-
-	LASSERT(!peer->ksnp_error);
-	LASSERT(!conn->ksnc_closing);
-	conn->ksnc_closing = 1;
-
-	/* ksnd_deathrow_conns takes over peer's ref */
-	list_del(&conn->ksnc_list);
-
-	route = conn->ksnc_route;
-	if (route) {
-		/* dissociate conn from route... */
-		LASSERT(!route->ksnr_deleted);
-		LASSERT(route->ksnr_connected & (1 << conn->ksnc_type));
-
-		conn2 = NULL;
-		list_for_each(tmp, &peer->ksnp_conns) {
-			conn2 = list_entry(tmp, struct ksock_conn, ksnc_list);
-
-			if (conn2->ksnc_route == route &&
-			    conn2->ksnc_type == conn->ksnc_type)
-				break;
-
-			conn2 = NULL;
-		}
-		if (!conn2)
-			route->ksnr_connected &= ~(1 << conn->ksnc_type);
-
-		conn->ksnc_route = NULL;
-
-		ksocknal_route_decref(route);     /* drop conn's ref on route */
-	}
-
-	if (list_empty(&peer->ksnp_conns)) {
-		/* No more connections to this peer */
-
-		if (!list_empty(&peer->ksnp_tx_queue)) {
-			struct ksock_tx *tx;
-
-			LASSERT(conn->ksnc_proto == &ksocknal_protocol_v3x);
-
-			/*
-			 * throw them to the last connection...,
-			 * these TXs will be send to /dev/null by scheduler
-			 */
-			list_for_each_entry(tx, &peer->ksnp_tx_queue,
-					    tx_list)
-				ksocknal_tx_prep(conn, tx);
-
-			spin_lock_bh(&conn->ksnc_scheduler->kss_lock);
-			list_splice_init(&peer->ksnp_tx_queue,
-					 &conn->ksnc_tx_queue);
-			spin_unlock_bh(&conn->ksnc_scheduler->kss_lock);
-		}
-
-		peer->ksnp_proto = NULL;  /* renegotiate protocol version */
-		peer->ksnp_error = error; /* stash last conn close reason */
-
-		if (list_empty(&peer->ksnp_routes)) {
-			/*
-			 * I've just closed last conn belonging to a
-			 * peer with no routes to it
-			 */
-			ksocknal_unlink_peer_locked(peer);
-		}
-	}
-
-	spin_lock_bh(&ksocknal_data.ksnd_reaper_lock);
-
-	list_add_tail(&conn->ksnc_list,
-		      &ksocknal_data.ksnd_deathrow_conns);
-	wake_up(&ksocknal_data.ksnd_reaper_waitq);
-
-	spin_unlock_bh(&ksocknal_data.ksnd_reaper_lock);
-}
-
-void
-ksocknal_peer_failed(struct ksock_peer *peer)
-{
-	int notify = 0;
-	unsigned long last_alive = 0;
-
-	/*
-	 * There has been a connection failure or comms error; but I'll only
-	 * tell LNET I think the peer is dead if it's to another kernel and
-	 * there are no connections or connection attempts in existence.
-	 */
-	read_lock(&ksocknal_data.ksnd_global_lock);
-
-	if (!(peer->ksnp_id.pid & LNET_PID_USERFLAG) &&
-	    list_empty(&peer->ksnp_conns) &&
-	    !peer->ksnp_accepting &&
-	    !ksocknal_find_connecting_route_locked(peer)) {
-		notify = 1;
-		last_alive = peer->ksnp_last_alive;
-	}
-
-	read_unlock(&ksocknal_data.ksnd_global_lock);
-
-	if (notify)
-		lnet_notify(peer->ksnp_ni, peer->ksnp_id.nid, 0,
-			    last_alive);
-}
-
-void
-ksocknal_finalize_zcreq(struct ksock_conn *conn)
-{
-	struct ksock_peer *peer = conn->ksnc_peer;
-	struct ksock_tx *tx;
-	struct ksock_tx *temp;
-	struct ksock_tx *tmp;
-	LIST_HEAD(zlist);
-
-	/*
-	 * NB safe to finalize TXs because closing of socket will
-	 * abort all buffered data
-	 */
-	LASSERT(!conn->ksnc_sock);
-
-	spin_lock(&peer->ksnp_lock);
-
-	list_for_each_entry_safe(tx, tmp, &peer->ksnp_zc_req_list, tx_zc_list) {
-		if (tx->tx_conn != conn)
-			continue;
-
-		LASSERT(tx->tx_msg.ksm_zc_cookies[0]);
-
-		tx->tx_msg.ksm_zc_cookies[0] = 0;
-		tx->tx_zc_aborted = 1; /* mark it as not-acked */
-		list_del(&tx->tx_zc_list);
-		list_add(&tx->tx_zc_list, &zlist);
-	}
-
-	spin_unlock(&peer->ksnp_lock);
-
-	list_for_each_entry_safe(tx, temp, &zlist, tx_zc_list) {
-		list_del(&tx->tx_zc_list);
-		ksocknal_tx_decref(tx);
-	}
-}
-
-void
-ksocknal_terminate_conn(struct ksock_conn *conn)
-{
-	/*
-	 * This gets called by the reaper (guaranteed thread context) to
-	 * disengage the socket from its callbacks and close it.
-	 * ksnc_refcount will eventually hit zero, and then the reaper will
-	 * destroy it.
-	 */
-	struct ksock_peer *peer = conn->ksnc_peer;
-	struct ksock_sched *sched = conn->ksnc_scheduler;
-	int failed = 0;
-
-	LASSERT(conn->ksnc_closing);
-
-	/* wake up the scheduler to "send" all remaining packets to /dev/null */
-	spin_lock_bh(&sched->kss_lock);
-
-	/* a closing conn is always ready to tx */
-	conn->ksnc_tx_ready = 1;
-
-	if (!conn->ksnc_tx_scheduled &&
-	    !list_empty(&conn->ksnc_tx_queue)) {
-		list_add_tail(&conn->ksnc_tx_list,
-			      &sched->kss_tx_conns);
-		conn->ksnc_tx_scheduled = 1;
-		/* extra ref for scheduler */
-		ksocknal_conn_addref(conn);
-
-		wake_up(&sched->kss_waitq);
-	}
-
-	spin_unlock_bh(&sched->kss_lock);
-
-	/* serialise with callbacks */
-	write_lock_bh(&ksocknal_data.ksnd_global_lock);
-
-	ksocknal_lib_reset_callback(conn->ksnc_sock, conn);
-
-	/*
-	 * OK, so this conn may not be completely disengaged from its
-	 * scheduler yet, but it _has_ committed to terminate...
-	 */
-	conn->ksnc_scheduler->kss_nconns--;
-
-	if (peer->ksnp_error) {
-		/* peer's last conn closed in error */
-		LASSERT(list_empty(&peer->ksnp_conns));
-		failed = 1;
-		peer->ksnp_error = 0;     /* avoid multiple notifications */
-	}
-
-	write_unlock_bh(&ksocknal_data.ksnd_global_lock);
-
-	if (failed)
-		ksocknal_peer_failed(peer);
-
-	/*
-	 * The socket is closed on the final put; either here, or in
-	 * ksocknal_{send,recv}msg().  Since we set up the linger2 option
-	 * when the connection was established, this will close the socket
-	 * immediately, aborting anything buffered in it. Any hung
-	 * zero-copy transmits will therefore complete in finite time.
-	 */
-	ksocknal_connsock_decref(conn);
-}
-
-void
-ksocknal_queue_zombie_conn(struct ksock_conn *conn)
-{
-	/* Queue the conn for the reaper to destroy */
-
-	LASSERT(!atomic_read(&conn->ksnc_conn_refcount));
-	spin_lock_bh(&ksocknal_data.ksnd_reaper_lock);
-
-	list_add_tail(&conn->ksnc_list, &ksocknal_data.ksnd_zombie_conns);
-	wake_up(&ksocknal_data.ksnd_reaper_waitq);
-
-	spin_unlock_bh(&ksocknal_data.ksnd_reaper_lock);
-}
-
-void
-ksocknal_destroy_conn(struct ksock_conn *conn)
-{
-	unsigned long last_rcv;
-
-	/* Final coup-de-grace of the reaper */
-	CDEBUG(D_NET, "connection %p\n", conn);
-
-	LASSERT(!atomic_read(&conn->ksnc_conn_refcount));
-	LASSERT(!atomic_read(&conn->ksnc_sock_refcount));
-	LASSERT(!conn->ksnc_sock);
-	LASSERT(!conn->ksnc_route);
-	LASSERT(!conn->ksnc_tx_scheduled);
-	LASSERT(!conn->ksnc_rx_scheduled);
-	LASSERT(list_empty(&conn->ksnc_tx_queue));
-
-	/* complete current receive if any */
-	switch (conn->ksnc_rx_state) {
-	case SOCKNAL_RX_LNET_PAYLOAD:
-		last_rcv = conn->ksnc_rx_deadline -
-			   *ksocknal_tunables.ksnd_timeout * HZ;
-		CERROR("Completing partial receive from %s[%d], ip %pI4h:%d, with error, wanted: %zd, left: %d, last alive is %ld secs ago\n",
-		       libcfs_id2str(conn->ksnc_peer->ksnp_id), conn->ksnc_type,
-		       &conn->ksnc_ipaddr, conn->ksnc_port,
-		       iov_iter_count(&conn->ksnc_rx_to), conn->ksnc_rx_nob_left,
-		       (jiffies - last_rcv) / HZ);
-		lnet_finalize(conn->ksnc_peer->ksnp_ni,
-			      conn->ksnc_cookie, -EIO);
-		break;
-	case SOCKNAL_RX_LNET_HEADER:
-		if (conn->ksnc_rx_started)
-			CERROR("Incomplete receive of lnet header from %s, ip %pI4h:%d, with error, protocol: %d.x.\n",
-			       libcfs_id2str(conn->ksnc_peer->ksnp_id),
-			       &conn->ksnc_ipaddr, conn->ksnc_port,
-			       conn->ksnc_proto->pro_version);
-		break;
-	case SOCKNAL_RX_KSM_HEADER:
-		if (conn->ksnc_rx_started)
-			CERROR("Incomplete receive of ksock message from %s, ip %pI4h:%d, with error, protocol: %d.x.\n",
-			       libcfs_id2str(conn->ksnc_peer->ksnp_id),
-			       &conn->ksnc_ipaddr, conn->ksnc_port,
-			       conn->ksnc_proto->pro_version);
-		break;
-	case SOCKNAL_RX_SLOP:
-		if (conn->ksnc_rx_started)
-			CERROR("Incomplete receive of slops from %s, ip %pI4h:%d, with error\n",
-			       libcfs_id2str(conn->ksnc_peer->ksnp_id),
-			       &conn->ksnc_ipaddr, conn->ksnc_port);
-	       break;
-	default:
-		LBUG();
-		break;
-	}
-
-	ksocknal_peer_decref(conn->ksnc_peer);
-
-	kfree(conn);
-}
-
-int
-ksocknal_close_peer_conns_locked(struct ksock_peer *peer, __u32 ipaddr, int why)
-{
-	struct ksock_conn *conn;
-	struct list_head *ctmp;
-	struct list_head *cnxt;
-	int count = 0;
-
-	list_for_each_safe(ctmp, cnxt, &peer->ksnp_conns) {
-		conn = list_entry(ctmp, struct ksock_conn, ksnc_list);
-
-		if (!ipaddr || conn->ksnc_ipaddr == ipaddr) {
-			count++;
-			ksocknal_close_conn_locked(conn, why);
-		}
-	}
-
-	return count;
-}
-
-int
-ksocknal_close_conn_and_siblings(struct ksock_conn *conn, int why)
-{
-	struct ksock_peer *peer = conn->ksnc_peer;
-	__u32 ipaddr = conn->ksnc_ipaddr;
-	int count;
-
-	write_lock_bh(&ksocknal_data.ksnd_global_lock);
-
-	count = ksocknal_close_peer_conns_locked(peer, ipaddr, why);
-
-	write_unlock_bh(&ksocknal_data.ksnd_global_lock);
-
-	return count;
-}
-
-int
-ksocknal_close_matching_conns(struct lnet_process_id id, __u32 ipaddr)
-{
-	struct ksock_peer *peer;
-	struct list_head *ptmp;
-	struct list_head *pnxt;
-	int lo;
-	int hi;
-	int i;
-	int count = 0;
-
-	write_lock_bh(&ksocknal_data.ksnd_global_lock);
-
-	if (id.nid != LNET_NID_ANY) {
-		lo = (int)(ksocknal_nid2peerlist(id.nid) - ksocknal_data.ksnd_peers);
-		hi = (int)(ksocknal_nid2peerlist(id.nid) - ksocknal_data.ksnd_peers);
-	} else {
-		lo = 0;
-		hi = ksocknal_data.ksnd_peer_hash_size - 1;
-	}
-
-	for (i = lo; i <= hi; i++) {
-		list_for_each_safe(ptmp, pnxt,
-				   &ksocknal_data.ksnd_peers[i]) {
-			peer = list_entry(ptmp, struct ksock_peer, ksnp_list);
-
-			if (!((id.nid == LNET_NID_ANY || id.nid == peer->ksnp_id.nid) &&
-			      (id.pid == LNET_PID_ANY || id.pid == peer->ksnp_id.pid)))
-				continue;
-
-			count += ksocknal_close_peer_conns_locked(peer, ipaddr,
-								  0);
-		}
-	}
-
-	write_unlock_bh(&ksocknal_data.ksnd_global_lock);
-
-	/* wildcards always succeed */
-	if (id.nid == LNET_NID_ANY || id.pid == LNET_PID_ANY || !ipaddr)
-		return 0;
-
-	if (!count)
-		return -ENOENT;
-	else
-		return 0;
-}
-
-void
-ksocknal_notify(struct lnet_ni *ni, lnet_nid_t gw_nid, int alive)
-{
-	/*
-	 * The router is telling me she's been notified of a change in
-	 * gateway state....
-	 */
-	struct lnet_process_id id = {0};
-
-	id.nid = gw_nid;
-	id.pid = LNET_PID_ANY;
-
-	CDEBUG(D_NET, "gw %s %s\n", libcfs_nid2str(gw_nid),
-	       alive ? "up" : "down");
-
-	if (!alive) {
-		/* If the gateway crashed, close all open connections... */
-		ksocknal_close_matching_conns(id, 0);
-		return;
-	}
-
-	/*
-	 * ...otherwise do nothing.  We can only establish new connections
-	 * if we have autroutes, and these connect on demand.
-	 */
-}
-
-void
-ksocknal_query(struct lnet_ni *ni, lnet_nid_t nid, unsigned long *when)
-{
-	int connect = 1;
-	unsigned long last_alive = 0;
-	unsigned long now = jiffies;
-	struct ksock_peer *peer = NULL;
-	rwlock_t *glock = &ksocknal_data.ksnd_global_lock;
-	struct lnet_process_id id = {
-		.nid = nid,
-		.pid = LNET_PID_LUSTRE,
-	};
-
-	read_lock(glock);
-
-	peer = ksocknal_find_peer_locked(ni, id);
-	if (peer) {
-		struct ksock_conn *conn;
-		int bufnob;
-
-		list_for_each_entry(conn, &peer->ksnp_conns, ksnc_list) {
-			bufnob = conn->ksnc_sock->sk->sk_wmem_queued;
-
-			if (bufnob < conn->ksnc_tx_bufnob) {
-				/* something got ACKed */
-				conn->ksnc_tx_deadline =
-					jiffies + *ksocknal_tunables.ksnd_timeout * HZ;
-				peer->ksnp_last_alive = now;
-				conn->ksnc_tx_bufnob = bufnob;
-			}
-		}
-
-		last_alive = peer->ksnp_last_alive;
-		if (!ksocknal_find_connectable_route_locked(peer))
-			connect = 0;
-	}
-
-	read_unlock(glock);
-
-	if (last_alive)
-		*when = last_alive;
-
-	CDEBUG(D_NET, "Peer %s %p, alive %ld secs ago, connect %d\n",
-	       libcfs_nid2str(nid), peer,
-	       last_alive ? (now - last_alive) / HZ : -1,
-	       connect);
-
-	if (!connect)
-		return;
-
-	ksocknal_add_peer(ni, id, LNET_NIDADDR(nid), lnet_acceptor_port());
-
-	write_lock_bh(glock);
-
-	peer = ksocknal_find_peer_locked(ni, id);
-	if (peer)
-		ksocknal_launch_all_connections_locked(peer);
-
-	write_unlock_bh(glock);
-}
-
-static void
-ksocknal_push_peer(struct ksock_peer *peer)
-{
-	int index;
-	int i;
-	struct list_head *tmp;
-	struct ksock_conn *conn;
-
-	for (index = 0; ; index++) {
-		read_lock(&ksocknal_data.ksnd_global_lock);
-
-		i = 0;
-		conn = NULL;
-
-		list_for_each(tmp, &peer->ksnp_conns) {
-			if (i++ == index) {
-				conn = list_entry(tmp, struct ksock_conn,
-						  ksnc_list);
-				ksocknal_conn_addref(conn);
-				break;
-			}
-		}
-
-		read_unlock(&ksocknal_data.ksnd_global_lock);
-
-		if (!conn)
-			break;
-
-		ksocknal_lib_push_conn(conn);
-		ksocknal_conn_decref(conn);
-	}
-}
-
-static int ksocknal_push(struct lnet_ni *ni, struct lnet_process_id id)
-{
-	struct list_head *start;
-	struct list_head *end;
-	struct list_head *tmp;
-	int rc = -ENOENT;
-	unsigned int hsize = ksocknal_data.ksnd_peer_hash_size;
-
-	if (id.nid == LNET_NID_ANY) {
-		start = &ksocknal_data.ksnd_peers[0];
-		end = &ksocknal_data.ksnd_peers[hsize - 1];
-	} else {
-		start = ksocknal_nid2peerlist(id.nid);
-		end = ksocknal_nid2peerlist(id.nid);
-	}
-
-	for (tmp = start; tmp <= end; tmp++) {
-		int peer_off; /* searching offset in peer hash table */
-
-		for (peer_off = 0; ; peer_off++) {
-			struct ksock_peer *peer;
-			int i = 0;
-
-			read_lock(&ksocknal_data.ksnd_global_lock);
-			list_for_each_entry(peer, tmp, ksnp_list) {
-				if (!((id.nid == LNET_NID_ANY ||
-				       id.nid == peer->ksnp_id.nid) &&
-				      (id.pid == LNET_PID_ANY ||
-				       id.pid == peer->ksnp_id.pid)))
-					continue;
-
-				if (i++ == peer_off) {
-					ksocknal_peer_addref(peer);
-					break;
-				}
-			}
-			read_unlock(&ksocknal_data.ksnd_global_lock);
-
-			if (!i) /* no match */
-				break;
-
-			rc = 0;
-			ksocknal_push_peer(peer);
-			ksocknal_peer_decref(peer);
-		}
-	}
-	return rc;
-}
-
-static int
-ksocknal_add_interface(struct lnet_ni *ni, __u32 ipaddress, __u32 netmask)
-{
-	struct ksock_net *net = ni->ni_data;
-	struct ksock_interface *iface;
-	int rc;
-	int i;
-	int j;
-	struct list_head *ptmp;
-	struct ksock_peer *peer;
-	struct list_head *rtmp;
-	struct ksock_route *route;
-
-	if (!ipaddress || !netmask)
-		return -EINVAL;
-
-	write_lock_bh(&ksocknal_data.ksnd_global_lock);
-
-	iface = ksocknal_ip2iface(ni, ipaddress);
-	if (iface) {
-		/* silently ignore dups */
-		rc = 0;
-	} else if (net->ksnn_ninterfaces == LNET_MAX_INTERFACES) {
-		rc = -ENOSPC;
-	} else {
-		iface = &net->ksnn_interfaces[net->ksnn_ninterfaces++];
-
-		iface->ksni_ipaddr = ipaddress;
-		iface->ksni_netmask = netmask;
-		iface->ksni_nroutes = 0;
-		iface->ksni_npeers = 0;
-
-		for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) {
-			list_for_each(ptmp, &ksocknal_data.ksnd_peers[i]) {
-				peer = list_entry(ptmp, struct ksock_peer,
-						  ksnp_list);
-
-				for (j = 0; j < peer->ksnp_n_passive_ips; j++)
-					if (peer->ksnp_passive_ips[j] == ipaddress)
-						iface->ksni_npeers++;
-
-				list_for_each(rtmp, &peer->ksnp_routes) {
-					route = list_entry(rtmp, struct ksock_route,
-							   ksnr_list);
-
-					if (route->ksnr_myipaddr == ipaddress)
-						iface->ksni_nroutes++;
-				}
-			}
-		}
-
-		rc = 0;
-		/*
-		 * NB only new connections will pay attention to the
-		 * new interface!
-		 */
-	}
-
-	write_unlock_bh(&ksocknal_data.ksnd_global_lock);
-
-	return rc;
-}
-
-static void
-ksocknal_peer_del_interface_locked(struct ksock_peer *peer, __u32 ipaddr)
-{
-	struct list_head *tmp;
-	struct list_head *nxt;
-	struct ksock_route *route;
-	struct ksock_conn *conn;
-	int i;
-	int j;
-
-	for (i = 0; i < peer->ksnp_n_passive_ips; i++)
-		if (peer->ksnp_passive_ips[i] == ipaddr) {
-			for (j = i + 1; j < peer->ksnp_n_passive_ips; j++)
-				peer->ksnp_passive_ips[j - 1] =
-					peer->ksnp_passive_ips[j];
-			peer->ksnp_n_passive_ips--;
-			break;
-		}
-
-	list_for_each_safe(tmp, nxt, &peer->ksnp_routes) {
-		route = list_entry(tmp, struct ksock_route, ksnr_list);
-
-		if (route->ksnr_myipaddr != ipaddr)
-			continue;
-
-		if (route->ksnr_share_count) {
-			/* Manually created; keep, but unbind */
-			route->ksnr_myipaddr = 0;
-		} else {
-			ksocknal_del_route_locked(route);
-		}
-	}
-
-	list_for_each_safe(tmp, nxt, &peer->ksnp_conns) {
-		conn = list_entry(tmp, struct ksock_conn, ksnc_list);
-
-		if (conn->ksnc_myipaddr == ipaddr)
-			ksocknal_close_conn_locked(conn, 0);
-	}
-}
-
-static int
-ksocknal_del_interface(struct lnet_ni *ni, __u32 ipaddress)
-{
-	struct ksock_net *net = ni->ni_data;
-	int rc = -ENOENT;
-	struct list_head *tmp;
-	struct list_head *nxt;
-	struct ksock_peer *peer;
-	__u32 this_ip;
-	int i;
-	int j;
-
-	write_lock_bh(&ksocknal_data.ksnd_global_lock);
-
-	for (i = 0; i < net->ksnn_ninterfaces; i++) {
-		this_ip = net->ksnn_interfaces[i].ksni_ipaddr;
-
-		if (!(!ipaddress || ipaddress == this_ip))
-			continue;
-
-		rc = 0;
-
-		for (j = i + 1; j < net->ksnn_ninterfaces; j++)
-			net->ksnn_interfaces[j - 1] =
-				net->ksnn_interfaces[j];
-
-		net->ksnn_ninterfaces--;
-
-		for (j = 0; j < ksocknal_data.ksnd_peer_hash_size; j++) {
-			list_for_each_safe(tmp, nxt,
-					   &ksocknal_data.ksnd_peers[j]) {
-				peer = list_entry(tmp, struct ksock_peer, ksnp_list);
-
-				if (peer->ksnp_ni != ni)
-					continue;
-
-				ksocknal_peer_del_interface_locked(peer, this_ip);
-			}
-		}
-	}
-
-	write_unlock_bh(&ksocknal_data.ksnd_global_lock);
-
-	return rc;
-}
-
-int
-ksocknal_ctl(struct lnet_ni *ni, unsigned int cmd, void *arg)
-{
-	struct lnet_process_id id = {0};
-	struct libcfs_ioctl_data *data = arg;
-	int rc;
-
-	switch (cmd) {
-	case IOC_LIBCFS_GET_INTERFACE: {
-		struct ksock_net       *net = ni->ni_data;
-		struct ksock_interface *iface;
-
-		read_lock(&ksocknal_data.ksnd_global_lock);
-
-		if (data->ioc_count >= (__u32)net->ksnn_ninterfaces) {
-			rc = -ENOENT;
-		} else {
-			rc = 0;
-			iface = &net->ksnn_interfaces[data->ioc_count];
-
-			data->ioc_u32[0] = iface->ksni_ipaddr;
-			data->ioc_u32[1] = iface->ksni_netmask;
-			data->ioc_u32[2] = iface->ksni_npeers;
-			data->ioc_u32[3] = iface->ksni_nroutes;
-		}
-
-		read_unlock(&ksocknal_data.ksnd_global_lock);
-		return rc;
-	}
-
-	case IOC_LIBCFS_ADD_INTERFACE:
-		return ksocknal_add_interface(ni,
-					      data->ioc_u32[0], /* IP address */
-					      data->ioc_u32[1]); /* net mask */
-
-	case IOC_LIBCFS_DEL_INTERFACE:
-		return ksocknal_del_interface(ni,
-					      data->ioc_u32[0]); /* IP address */
-
-	case IOC_LIBCFS_GET_PEER: {
-		__u32 myip = 0;
-		__u32 ip = 0;
-		int port = 0;
-		int conn_count = 0;
-		int share_count = 0;
-
-		rc = ksocknal_get_peer_info(ni, data->ioc_count,
-					    &id, &myip, &ip, &port,
-					    &conn_count,  &share_count);
-		if (rc)
-			return rc;
-
-		data->ioc_nid    = id.nid;
-		data->ioc_count  = share_count;
-		data->ioc_u32[0] = ip;
-		data->ioc_u32[1] = port;
-		data->ioc_u32[2] = myip;
-		data->ioc_u32[3] = conn_count;
-		data->ioc_u32[4] = id.pid;
-		return 0;
-	}
-
-	case IOC_LIBCFS_ADD_PEER:
-		id.nid = data->ioc_nid;
-		id.pid = LNET_PID_LUSTRE;
-		return ksocknal_add_peer(ni, id,
-					  data->ioc_u32[0], /* IP */
-					  data->ioc_u32[1]); /* port */
-
-	case IOC_LIBCFS_DEL_PEER:
-		id.nid = data->ioc_nid;
-		id.pid = LNET_PID_ANY;
-		return ksocknal_del_peer(ni, id,
-					  data->ioc_u32[0]); /* IP */
-
-	case IOC_LIBCFS_GET_CONN: {
-		int txmem;
-		int rxmem;
-		int nagle;
-		struct ksock_conn *conn;
-
-		conn = ksocknal_get_conn_by_idx(ni, data->ioc_count);
-		if (!conn)
-			return -ENOENT;
-
-		ksocknal_lib_get_conn_tunables(conn, &txmem, &rxmem, &nagle);
-
-		data->ioc_count  = txmem;
-		data->ioc_nid    = conn->ksnc_peer->ksnp_id.nid;
-		data->ioc_flags  = nagle;
-		data->ioc_u32[0] = conn->ksnc_ipaddr;
-		data->ioc_u32[1] = conn->ksnc_port;
-		data->ioc_u32[2] = conn->ksnc_myipaddr;
-		data->ioc_u32[3] = conn->ksnc_type;
-		data->ioc_u32[4] = conn->ksnc_scheduler->kss_info->ksi_cpt;
-		data->ioc_u32[5] = rxmem;
-		data->ioc_u32[6] = conn->ksnc_peer->ksnp_id.pid;
-		ksocknal_conn_decref(conn);
-		return 0;
-	}
-
-	case IOC_LIBCFS_CLOSE_CONNECTION:
-		id.nid = data->ioc_nid;
-		id.pid = LNET_PID_ANY;
-		return ksocknal_close_matching_conns(id,
-						      data->ioc_u32[0]);
-
-	case IOC_LIBCFS_REGISTER_MYNID:
-		/* Ignore if this is a noop */
-		if (data->ioc_nid == ni->ni_nid)
-			return 0;
-
-		CERROR("obsolete IOC_LIBCFS_REGISTER_MYNID: %s(%s)\n",
-		       libcfs_nid2str(data->ioc_nid),
-		       libcfs_nid2str(ni->ni_nid));
-		return -EINVAL;
-
-	case IOC_LIBCFS_PUSH_CONNECTION:
-		id.nid = data->ioc_nid;
-		id.pid = LNET_PID_ANY;
-		return ksocknal_push(ni, id);
-
-	default:
-		return -EINVAL;
-	}
-	/* not reached */
-}
-
-static void
-ksocknal_free_buffers(void)
-{
-	LASSERT(!atomic_read(&ksocknal_data.ksnd_nactive_txs));
-
-	if (ksocknal_data.ksnd_sched_info) {
-		struct ksock_sched_info *info;
-		int i;
-
-		cfs_percpt_for_each(info, i, ksocknal_data.ksnd_sched_info)
-			kfree(info->ksi_scheds);
-		cfs_percpt_free(ksocknal_data.ksnd_sched_info);
-	}
-
-	kvfree(ksocknal_data.ksnd_peers);
-
-	spin_lock(&ksocknal_data.ksnd_tx_lock);
-
-	if (!list_empty(&ksocknal_data.ksnd_idle_noop_txs)) {
-		struct list_head zlist;
-		struct ksock_tx *tx;
-		struct ksock_tx *temp;
-
-		list_add(&zlist, &ksocknal_data.ksnd_idle_noop_txs);
-		list_del_init(&ksocknal_data.ksnd_idle_noop_txs);
-		spin_unlock(&ksocknal_data.ksnd_tx_lock);
-
-		list_for_each_entry_safe(tx, temp, &zlist, tx_list) {
-			list_del(&tx->tx_list);
-			kfree(tx);
-		}
-	} else {
-		spin_unlock(&ksocknal_data.ksnd_tx_lock);
-	}
-}
-
-static void
-ksocknal_base_shutdown(void)
-{
-	struct ksock_sched_info *info;
-	struct ksock_sched *sched;
-	int i;
-	int j;
-
-	LASSERT(!ksocknal_data.ksnd_nnets);
-
-	switch (ksocknal_data.ksnd_init) {
-	default:
-		LASSERT(0);
-		/* fall through */
-	case SOCKNAL_INIT_ALL:
-	case SOCKNAL_INIT_DATA:
-		LASSERT(ksocknal_data.ksnd_peers);
-		for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++)
-			LASSERT(list_empty(&ksocknal_data.ksnd_peers[i]));
-
-		LASSERT(list_empty(&ksocknal_data.ksnd_nets));
-		LASSERT(list_empty(&ksocknal_data.ksnd_enomem_conns));
-		LASSERT(list_empty(&ksocknal_data.ksnd_zombie_conns));
-		LASSERT(list_empty(&ksocknal_data.ksnd_connd_connreqs));
-		LASSERT(list_empty(&ksocknal_data.ksnd_connd_routes));
-
-		if (ksocknal_data.ksnd_sched_info) {
-			cfs_percpt_for_each(info, i,
-					    ksocknal_data.ksnd_sched_info) {
-				if (!info->ksi_scheds)
-					continue;
-
-				for (j = 0; j < info->ksi_nthreads_max; j++) {
-					sched = &info->ksi_scheds[j];
-					LASSERT(list_empty(
-						&sched->kss_tx_conns));
-					LASSERT(list_empty(
-						&sched->kss_rx_conns));
-					LASSERT(list_empty(
-						&sched->kss_zombie_noop_txs));
-					LASSERT(!sched->kss_nconns);
-				}
-			}
-		}
-
-		/* flag threads to terminate; wake and wait for them to die */
-		ksocknal_data.ksnd_shuttingdown = 1;
-		wake_up_all(&ksocknal_data.ksnd_connd_waitq);
-		wake_up_all(&ksocknal_data.ksnd_reaper_waitq);
-
-		if (ksocknal_data.ksnd_sched_info) {
-			cfs_percpt_for_each(info, i,
-					    ksocknal_data.ksnd_sched_info) {
-				if (!info->ksi_scheds)
-					continue;
-
-				for (j = 0; j < info->ksi_nthreads_max; j++) {
-					sched = &info->ksi_scheds[j];
-					wake_up_all(&sched->kss_waitq);
-				}
-			}
-		}
-
-		i = 4;
-		read_lock(&ksocknal_data.ksnd_global_lock);
-		while (ksocknal_data.ksnd_nthreads) {
-			i++;
-			CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */
-			       "waiting for %d threads to terminate\n",
-				ksocknal_data.ksnd_nthreads);
-			read_unlock(&ksocknal_data.ksnd_global_lock);
-			set_current_state(TASK_UNINTERRUPTIBLE);
-			schedule_timeout(HZ);
-			read_lock(&ksocknal_data.ksnd_global_lock);
-		}
-		read_unlock(&ksocknal_data.ksnd_global_lock);
-
-		ksocknal_free_buffers();
-
-		ksocknal_data.ksnd_init = SOCKNAL_INIT_NOTHING;
-		break;
-	}
-
-	module_put(THIS_MODULE);
-}
-
-static __u64
-ksocknal_new_incarnation(void)
-{
-	/* The incarnation number is the time this module loaded and it
-	 * identifies this particular instance of the socknal.
-	 */
-	return ktime_get_ns();
-}
-
-static int
-ksocknal_base_startup(void)
-{
-	struct ksock_sched_info	*info;
-	int rc;
-	int i;
-
-	LASSERT(ksocknal_data.ksnd_init == SOCKNAL_INIT_NOTHING);
-	LASSERT(!ksocknal_data.ksnd_nnets);
-
-	memset(&ksocknal_data, 0, sizeof(ksocknal_data)); /* zero pointers */
-
-	ksocknal_data.ksnd_peer_hash_size = SOCKNAL_PEER_HASH_SIZE;
-	ksocknal_data.ksnd_peers = kvmalloc_array(ksocknal_data.ksnd_peer_hash_size,
-						  sizeof(struct list_head),
-						  GFP_KERNEL);
-	if (!ksocknal_data.ksnd_peers)
-		return -ENOMEM;
-
-	for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++)
-		INIT_LIST_HEAD(&ksocknal_data.ksnd_peers[i]);
-
-	rwlock_init(&ksocknal_data.ksnd_global_lock);
-	INIT_LIST_HEAD(&ksocknal_data.ksnd_nets);
-
-	spin_lock_init(&ksocknal_data.ksnd_reaper_lock);
-	INIT_LIST_HEAD(&ksocknal_data.ksnd_enomem_conns);
-	INIT_LIST_HEAD(&ksocknal_data.ksnd_zombie_conns);
-	INIT_LIST_HEAD(&ksocknal_data.ksnd_deathrow_conns);
-	init_waitqueue_head(&ksocknal_data.ksnd_reaper_waitq);
-
-	spin_lock_init(&ksocknal_data.ksnd_connd_lock);
-	INIT_LIST_HEAD(&ksocknal_data.ksnd_connd_connreqs);
-	INIT_LIST_HEAD(&ksocknal_data.ksnd_connd_routes);
-	init_waitqueue_head(&ksocknal_data.ksnd_connd_waitq);
-
-	spin_lock_init(&ksocknal_data.ksnd_tx_lock);
-	INIT_LIST_HEAD(&ksocknal_data.ksnd_idle_noop_txs);
-
-	/* NB memset above zeros whole of ksocknal_data */
-
-	/* flag lists/ptrs/locks initialised */
-	ksocknal_data.ksnd_init = SOCKNAL_INIT_DATA;
-	try_module_get(THIS_MODULE);
-
-	ksocknal_data.ksnd_sched_info = cfs_percpt_alloc(lnet_cpt_table(),
-							 sizeof(*info));
-	if (!ksocknal_data.ksnd_sched_info)
-		goto failed;
-
-	cfs_percpt_for_each(info, i, ksocknal_data.ksnd_sched_info) {
-		struct ksock_sched *sched;
-		int nthrs;
-
-		nthrs = cfs_cpt_weight(lnet_cpt_table(), i);
-		if (*ksocknal_tunables.ksnd_nscheds > 0) {
-			nthrs = min(nthrs, *ksocknal_tunables.ksnd_nscheds);
-		} else {
-			/*
-			 * max to half of CPUs, assume another half should be
-			 * reserved for upper layer modules
-			 */
-			nthrs = min(max(SOCKNAL_NSCHEDS, nthrs >> 1), nthrs);
-		}
-
-		info->ksi_nthreads_max = nthrs;
-		info->ksi_cpt = i;
-
-		info->ksi_scheds = kzalloc_cpt(info->ksi_nthreads_max * sizeof(*sched),
-					       GFP_NOFS, i);
-		if (!info->ksi_scheds)
-			goto failed;
-
-		for (; nthrs > 0; nthrs--) {
-			sched = &info->ksi_scheds[nthrs - 1];
-
-			sched->kss_info = info;
-			spin_lock_init(&sched->kss_lock);
-			INIT_LIST_HEAD(&sched->kss_rx_conns);
-			INIT_LIST_HEAD(&sched->kss_tx_conns);
-			INIT_LIST_HEAD(&sched->kss_zombie_noop_txs);
-			init_waitqueue_head(&sched->kss_waitq);
-		}
-	}
-
-	ksocknal_data.ksnd_connd_starting       = 0;
-	ksocknal_data.ksnd_connd_failed_stamp   = 0;
-	ksocknal_data.ksnd_connd_starting_stamp = ktime_get_real_seconds();
-	/*
-	 * must have at least 2 connds to remain responsive to accepts while
-	 * connecting
-	 */
-	if (*ksocknal_tunables.ksnd_nconnds < SOCKNAL_CONND_RESV + 1)
-		*ksocknal_tunables.ksnd_nconnds = SOCKNAL_CONND_RESV + 1;
-
-	if (*ksocknal_tunables.ksnd_nconnds_max <
-	    *ksocknal_tunables.ksnd_nconnds) {
-		ksocknal_tunables.ksnd_nconnds_max =
-			ksocknal_tunables.ksnd_nconnds;
-	}
-
-	for (i = 0; i < *ksocknal_tunables.ksnd_nconnds; i++) {
-		char name[16];
-
-		spin_lock_bh(&ksocknal_data.ksnd_connd_lock);
-		ksocknal_data.ksnd_connd_starting++;
-		spin_unlock_bh(&ksocknal_data.ksnd_connd_lock);
-
-		snprintf(name, sizeof(name), "socknal_cd%02d", i);
-		rc = ksocknal_thread_start(ksocknal_connd,
-					   (void *)((uintptr_t)i), name);
-		if (rc) {
-			spin_lock_bh(&ksocknal_data.ksnd_connd_lock);
-			ksocknal_data.ksnd_connd_starting--;
-			spin_unlock_bh(&ksocknal_data.ksnd_connd_lock);
-			CERROR("Can't spawn socknal connd: %d\n", rc);
-			goto failed;
-		}
-	}
-
-	rc = ksocknal_thread_start(ksocknal_reaper, NULL, "socknal_reaper");
-	if (rc) {
-		CERROR("Can't spawn socknal reaper: %d\n", rc);
-		goto failed;
-	}
-
-	/* flag everything initialised */
-	ksocknal_data.ksnd_init = SOCKNAL_INIT_ALL;
-
-	return 0;
-
- failed:
-	ksocknal_base_shutdown();
-	return -ENETDOWN;
-}
-
-static void
-ksocknal_debug_peerhash(struct lnet_ni *ni)
-{
-	struct ksock_peer *peer = NULL;
-	struct list_head *tmp;
-	int i;
-
-	read_lock(&ksocknal_data.ksnd_global_lock);
-
-	for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) {
-		list_for_each(tmp, &ksocknal_data.ksnd_peers[i]) {
-			peer = list_entry(tmp, struct ksock_peer, ksnp_list);
-
-			if (peer->ksnp_ni == ni)
-				break;
-
-			peer = NULL;
-		}
-	}
-
-	if (peer) {
-		struct ksock_route *route;
-		struct ksock_conn  *conn;
-
-		CWARN("Active peer on shutdown: %s, ref %d, scnt %d, closing %d, accepting %d, err %d, zcookie %llu, txq %d, zc_req %d\n",
-		      libcfs_id2str(peer->ksnp_id),
-		      atomic_read(&peer->ksnp_refcount),
-		      peer->ksnp_sharecount, peer->ksnp_closing,
-		      peer->ksnp_accepting, peer->ksnp_error,
-		      peer->ksnp_zc_next_cookie,
-		      !list_empty(&peer->ksnp_tx_queue),
-		      !list_empty(&peer->ksnp_zc_req_list));
-
-		list_for_each(tmp, &peer->ksnp_routes) {
-			route = list_entry(tmp, struct ksock_route, ksnr_list);
-			CWARN("Route: ref %d, schd %d, conn %d, cnted %d, del %d\n",
-			      atomic_read(&route->ksnr_refcount),
-			      route->ksnr_scheduled, route->ksnr_connecting,
-			      route->ksnr_connected, route->ksnr_deleted);
-		}
-
-		list_for_each(tmp, &peer->ksnp_conns) {
-			conn = list_entry(tmp, struct ksock_conn, ksnc_list);
-			CWARN("Conn: ref %d, sref %d, t %d, c %d\n",
-			      atomic_read(&conn->ksnc_conn_refcount),
-			      atomic_read(&conn->ksnc_sock_refcount),
-			      conn->ksnc_type, conn->ksnc_closing);
-		}
-	}
-
-	read_unlock(&ksocknal_data.ksnd_global_lock);
-}
-
-void
-ksocknal_shutdown(struct lnet_ni *ni)
-{
-	struct ksock_net *net = ni->ni_data;
-	int i;
-	struct lnet_process_id anyid = {0};
-
-	anyid.nid = LNET_NID_ANY;
-	anyid.pid = LNET_PID_ANY;
-
-	LASSERT(ksocknal_data.ksnd_init == SOCKNAL_INIT_ALL);
-	LASSERT(ksocknal_data.ksnd_nnets > 0);
-
-	spin_lock_bh(&net->ksnn_lock);
-	net->ksnn_shutdown = 1;		 /* prevent new peers */
-	spin_unlock_bh(&net->ksnn_lock);
-
-	/* Delete all peers */
-	ksocknal_del_peer(ni, anyid, 0);
-
-	/* Wait for all peer state to clean up */
-	i = 2;
-	spin_lock_bh(&net->ksnn_lock);
-	while (net->ksnn_npeers) {
-		spin_unlock_bh(&net->ksnn_lock);
-
-		i++;
-		CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */
-		       "waiting for %d peers to disconnect\n",
-		       net->ksnn_npeers);
-		set_current_state(TASK_UNINTERRUPTIBLE);
-		schedule_timeout(HZ);
-
-		ksocknal_debug_peerhash(ni);
-
-		spin_lock_bh(&net->ksnn_lock);
-	}
-	spin_unlock_bh(&net->ksnn_lock);
-
-	for (i = 0; i < net->ksnn_ninterfaces; i++) {
-		LASSERT(!net->ksnn_interfaces[i].ksni_npeers);
-		LASSERT(!net->ksnn_interfaces[i].ksni_nroutes);
-	}
-
-	list_del(&net->ksnn_list);
-	kfree(net);
-
-	ksocknal_data.ksnd_nnets--;
-	if (!ksocknal_data.ksnd_nnets)
-		ksocknal_base_shutdown();
-}
-
-static int
-ksocknal_enumerate_interfaces(struct ksock_net *net)
-{
-	char **names;
-	int i;
-	int j;
-	int rc;
-	int n;
-
-	n = lnet_ipif_enumerate(&names);
-	if (n <= 0) {
-		CERROR("Can't enumerate interfaces: %d\n", n);
-		return n;
-	}
-
-	for (i = j = 0; i < n; i++) {
-		int up;
-		__u32 ip;
-		__u32 mask;
-
-		if (!strcmp(names[i], "lo")) /* skip the loopback IF */
-			continue;
-
-		rc = lnet_ipif_query(names[i], &up, &ip, &mask);
-		if (rc) {
-			CWARN("Can't get interface %s info: %d\n",
-			      names[i], rc);
-			continue;
-		}
-
-		if (!up) {
-			CWARN("Ignoring interface %s (down)\n",
-			      names[i]);
-			continue;
-		}
-
-		if (j == LNET_MAX_INTERFACES) {
-			CWARN("Ignoring interface %s (too many interfaces)\n",
-			      names[i]);
-			continue;
-		}
-
-		net->ksnn_interfaces[j].ksni_ipaddr = ip;
-		net->ksnn_interfaces[j].ksni_netmask = mask;
-		strlcpy(net->ksnn_interfaces[j].ksni_name,
-			names[i], sizeof(net->ksnn_interfaces[j].ksni_name));
-		j++;
-	}
-
-	lnet_ipif_free_enumeration(names, n);
-
-	if (!j)
-		CERROR("Can't find any usable interfaces\n");
-
-	return j;
-}
-
-static int
-ksocknal_search_new_ipif(struct ksock_net *net)
-{
-	int new_ipif = 0;
-	int i;
-
-	for (i = 0; i < net->ksnn_ninterfaces; i++) {
-		char *ifnam = &net->ksnn_interfaces[i].ksni_name[0];
-		char *colon = strchr(ifnam, ':');
-		int found  = 0;
-		struct ksock_net *tmp;
-		int j;
-
-		if (colon) /* ignore alias device */
-			*colon = 0;
-
-		list_for_each_entry(tmp, &ksocknal_data.ksnd_nets, ksnn_list) {
-			for (j = 0; !found && j < tmp->ksnn_ninterfaces; j++) {
-				char *ifnam2 =
-					&tmp->ksnn_interfaces[j].ksni_name[0];
-				char *colon2 = strchr(ifnam2, ':');
-
-				if (colon2)
-					*colon2 = 0;
-
-				found = !strcmp(ifnam, ifnam2);
-				if (colon2)
-					*colon2 = ':';
-			}
-			if (found)
-				break;
-		}
-
-		new_ipif += !found;
-		if (colon)
-			*colon = ':';
-	}
-
-	return new_ipif;
-}
-
-static int
-ksocknal_start_schedulers(struct ksock_sched_info *info)
-{
-	int nthrs;
-	int rc = 0;
-	int i;
-
-	if (!info->ksi_nthreads) {
-		if (*ksocknal_tunables.ksnd_nscheds > 0) {
-			nthrs = info->ksi_nthreads_max;
-		} else {
-			nthrs = cfs_cpt_weight(lnet_cpt_table(),
-					       info->ksi_cpt);
-			nthrs = min(max(SOCKNAL_NSCHEDS, nthrs >> 1), nthrs);
-			nthrs = min(SOCKNAL_NSCHEDS_HIGH, nthrs);
-		}
-		nthrs = min(nthrs, info->ksi_nthreads_max);
-	} else {
-		LASSERT(info->ksi_nthreads <= info->ksi_nthreads_max);
-		/* increase two threads if there is new interface */
-		nthrs = min(2, info->ksi_nthreads_max - info->ksi_nthreads);
-	}
-
-	for (i = 0; i < nthrs; i++) {
-		long id;
-		char name[20];
-		struct ksock_sched *sched;
-
-		id = KSOCK_THREAD_ID(info->ksi_cpt, info->ksi_nthreads + i);
-		sched = &info->ksi_scheds[KSOCK_THREAD_SID(id)];
-		snprintf(name, sizeof(name), "socknal_sd%02d_%02d",
-			 info->ksi_cpt, (int)(sched - &info->ksi_scheds[0]));
-
-		rc = ksocknal_thread_start(ksocknal_scheduler,
-					   (void *)id, name);
-		if (!rc)
-			continue;
-
-		CERROR("Can't spawn thread %d for scheduler[%d]: %d\n",
-		       info->ksi_cpt, info->ksi_nthreads + i, rc);
-		break;
-	}
-
-	info->ksi_nthreads += i;
-	return rc;
-}
-
-static int
-ksocknal_net_start_threads(struct ksock_net *net, __u32 *cpts, int ncpts)
-{
-	int newif = ksocknal_search_new_ipif(net);
-	int rc;
-	int i;
-
-	LASSERT(ncpts > 0 && ncpts <= cfs_cpt_number(lnet_cpt_table()));
-
-	for (i = 0; i < ncpts; i++) {
-		struct ksock_sched_info *info;
-		int cpt = !cpts ? i : cpts[i];
-
-		LASSERT(cpt < cfs_cpt_number(lnet_cpt_table()));
-		info = ksocknal_data.ksnd_sched_info[cpt];
-
-		if (!newif && info->ksi_nthreads > 0)
-			continue;
-
-		rc = ksocknal_start_schedulers(info);
-		if (rc)
-			return rc;
-	}
-	return 0;
-}
-
-int
-ksocknal_startup(struct lnet_ni *ni)
-{
-	struct ksock_net *net;
-	int rc;
-	int i;
-
-	LASSERT(ni->ni_lnd == &the_ksocklnd);
-
-	if (ksocknal_data.ksnd_init == SOCKNAL_INIT_NOTHING) {
-		rc = ksocknal_base_startup();
-		if (rc)
-			return rc;
-	}
-
-	net = kzalloc(sizeof(*net), GFP_NOFS);
-	if (!net)
-		goto fail_0;
-
-	spin_lock_init(&net->ksnn_lock);
-	net->ksnn_incarnation = ksocknal_new_incarnation();
-	ni->ni_data = net;
-	ni->ni_peertimeout    = *ksocknal_tunables.ksnd_peertimeout;
-	ni->ni_maxtxcredits   = *ksocknal_tunables.ksnd_credits;
-	ni->ni_peertxcredits  = *ksocknal_tunables.ksnd_peertxcredits;
-	ni->ni_peerrtrcredits = *ksocknal_tunables.ksnd_peerrtrcredits;
-
-	if (!ni->ni_interfaces[0]) {
-		rc = ksocknal_enumerate_interfaces(net);
-		if (rc <= 0)
-			goto fail_1;
-
-		net->ksnn_ninterfaces = 1;
-	} else {
-		for (i = 0; i < LNET_MAX_INTERFACES; i++) {
-			int up;
-
-			if (!ni->ni_interfaces[i])
-				break;
-
-			rc = lnet_ipif_query(ni->ni_interfaces[i], &up,
-					     &net->ksnn_interfaces[i].ksni_ipaddr,
-					     &net->ksnn_interfaces[i].ksni_netmask);
-
-			if (rc) {
-				CERROR("Can't get interface %s info: %d\n",
-				       ni->ni_interfaces[i], rc);
-				goto fail_1;
-			}
-
-			if (!up) {
-				CERROR("Interface %s is down\n",
-				       ni->ni_interfaces[i]);
-				goto fail_1;
-			}
-
-			strlcpy(net->ksnn_interfaces[i].ksni_name,
-				ni->ni_interfaces[i],
-				sizeof(net->ksnn_interfaces[i].ksni_name));
-		}
-		net->ksnn_ninterfaces = i;
-	}
-
-	/* call it before add it to ksocknal_data.ksnd_nets */
-	rc = ksocknal_net_start_threads(net, ni->ni_cpts, ni->ni_ncpts);
-	if (rc)
-		goto fail_1;
-
-	ni->ni_nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid),
-				net->ksnn_interfaces[0].ksni_ipaddr);
-	list_add(&net->ksnn_list, &ksocknal_data.ksnd_nets);
-
-	ksocknal_data.ksnd_nnets++;
-
-	return 0;
-
- fail_1:
-	kfree(net);
- fail_0:
-	if (!ksocknal_data.ksnd_nnets)
-		ksocknal_base_shutdown();
-
-	return -ENETDOWN;
-}
-
-static void __exit ksocklnd_exit(void)
-{
-	lnet_unregister_lnd(&the_ksocklnd);
-}
-
-static int __init ksocklnd_init(void)
-{
-	int rc;
-
-	/* check ksnr_connected/connecting field large enough */
-	BUILD_BUG_ON(SOCKLND_CONN_NTYPES > 4);
-	BUILD_BUG_ON(SOCKLND_CONN_ACK != SOCKLND_CONN_BULK_IN);
-
-	/* initialize the_ksocklnd */
-	the_ksocklnd.lnd_type     = SOCKLND;
-	the_ksocklnd.lnd_startup  = ksocknal_startup;
-	the_ksocklnd.lnd_shutdown = ksocknal_shutdown;
-	the_ksocklnd.lnd_ctl      = ksocknal_ctl;
-	the_ksocklnd.lnd_send     = ksocknal_send;
-	the_ksocklnd.lnd_recv     = ksocknal_recv;
-	the_ksocklnd.lnd_notify   = ksocknal_notify;
-	the_ksocklnd.lnd_query    = ksocknal_query;
-	the_ksocklnd.lnd_accept   = ksocknal_accept;
-
-	rc = ksocknal_tunables_init();
-	if (rc)
-		return rc;
-
-	rc = libcfs_setup();
-	if (rc)
-		return rc;
-
-	lnet_register_lnd(&the_ksocklnd);
-
-	return 0;
-}
-
-MODULE_AUTHOR("OpenSFS, Inc. <http://www.lustre.org/>");
-MODULE_DESCRIPTION("TCP Socket LNet Network Driver");
-MODULE_VERSION("2.7.0");
-MODULE_LICENSE("GPL");
-
-module_init(ksocklnd_init);
-module_exit(ksocklnd_exit);

+ 0 - 704
drivers/staging/lustre/lnet/klnds/socklnd/socklnd.h

@@ -1,704 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- *
- *   Author: Zach Brown <zab@zabbo.net>
- *   Author: Peter J. Braam <braam@clusterfs.com>
- *   Author: Phil Schwan <phil@clusterfs.com>
- *   Author: Eric Barton <eric@bartonsoftware.com>
- *
- *   This file is part of Lustre, http://www.lustre.org
- *
- *   Portals is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
- *
- *   Portals is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- */
-
-#ifndef _SOCKLND_SOCKLND_H_
-#define _SOCKLND_SOCKLND_H_
-
-#define DEBUG_PORTAL_ALLOC
-#define DEBUG_SUBSYSTEM S_LND
-
-#include <linux/crc32.h>
-#include <linux/errno.h>
-#include <linux/if.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/kmod.h>
-#include <linux/list.h>
-#include <linux/mm.h>
-#include <linux/module.h>
-#include <linux/stat.h>
-#include <linux/string.h>
-#include <linux/syscalls.h>
-#include <linux/sysctl.h>
-#include <linux/uio.h>
-#include <linux/unistd.h>
-#include <asm/irq.h>
-#include <net/sock.h>
-#include <net/tcp.h>
-
-#include <linux/lnet/lib-lnet.h>
-#include <linux/lnet/socklnd.h>
-
-/* assume one thread for each connection type */
-#define SOCKNAL_NSCHEDS		3
-#define SOCKNAL_NSCHEDS_HIGH	(SOCKNAL_NSCHEDS << 1)
-
-#define SOCKNAL_PEER_HASH_SIZE  101   /* # peer lists */
-#define SOCKNAL_RESCHED         100   /* # scheduler loops before reschedule */
-#define SOCKNAL_INSANITY_RECONN 5000  /* connd is trying on reconn infinitely */
-#define SOCKNAL_ENOMEM_RETRY    1     /* jiffies between retries */
-
-#define SOCKNAL_SINGLE_FRAG_TX  0     /* disable multi-fragment sends */
-#define SOCKNAL_SINGLE_FRAG_RX  0     /* disable multi-fragment receives */
-
-#define SOCKNAL_VERSION_DEBUG   0     /* enable protocol version debugging */
-
-/*
- * risk kmap deadlock on multi-frag I/O (backs off to single-frag if disabled).
- * no risk if we're not running on a CONFIG_HIGHMEM platform.
- */
-#ifdef CONFIG_HIGHMEM
-# define SOCKNAL_RISK_KMAP_DEADLOCK  0
-#else
-# define SOCKNAL_RISK_KMAP_DEADLOCK  1
-#endif
-
-struct ksock_sched_info;
-
-struct ksock_sched {				/* per scheduler state */
-	spinlock_t              kss_lock;       /* serialise */
-	struct list_head        kss_rx_conns;   /* conn waiting to be read */
-	struct list_head        kss_tx_conns;   /* conn waiting to be written */
-	struct list_head        kss_zombie_noop_txs; /* zombie noop tx list */
-	wait_queue_head_t       kss_waitq;	/* where scheduler sleeps */
-	int                     kss_nconns;     /* # connections assigned to
-						 * this scheduler
-						 */
-	struct ksock_sched_info *kss_info;	/* owner of it */
-};
-
-struct ksock_sched_info {
-	int                     ksi_nthreads_max; /* max allowed threads */
-	int                     ksi_nthreads;     /* number of threads */
-	int                     ksi_cpt;          /* CPT id */
-	struct ksock_sched	*ksi_scheds;	  /* array of schedulers */
-};
-
-#define KSOCK_CPT_SHIFT           16
-#define KSOCK_THREAD_ID(cpt, sid) (((cpt) << KSOCK_CPT_SHIFT) | (sid))
-#define KSOCK_THREAD_CPT(id)      ((id) >> KSOCK_CPT_SHIFT)
-#define KSOCK_THREAD_SID(id)      ((id) & ((1UL << KSOCK_CPT_SHIFT) - 1))
-
-struct ksock_interface {			/* in-use interface */
-	__u32		ksni_ipaddr;		/* interface's IP address */
-	__u32		ksni_netmask;		/* interface's network mask */
-	int		ksni_nroutes;		/* # routes using (active) */
-	int		ksni_npeers;		/* # peers using (passive) */
-	char		ksni_name[IFNAMSIZ];	/* interface name */
-};
-
-struct ksock_tunables {
-	int          *ksnd_timeout;            /* "stuck" socket timeout
-						* (seconds)
-						*/
-	int          *ksnd_nscheds;            /* # scheduler threads in each
-						* pool while starting
-						*/
-	int          *ksnd_nconnds;            /* # connection daemons */
-	int          *ksnd_nconnds_max;        /* max # connection daemons */
-	int          *ksnd_min_reconnectms;    /* first connection retry after
-						* (ms)...
-						*/
-	int          *ksnd_max_reconnectms;    /* ...exponentially increasing to
-						* this
-						*/
-	int          *ksnd_eager_ack;          /* make TCP ack eagerly? */
-	int          *ksnd_typed_conns;        /* drive sockets by type? */
-	int          *ksnd_min_bulk;           /* smallest "large" message */
-	int          *ksnd_tx_buffer_size;     /* socket tx buffer size */
-	int          *ksnd_rx_buffer_size;     /* socket rx buffer size */
-	int          *ksnd_nagle;              /* enable NAGLE? */
-	int          *ksnd_round_robin;        /* round robin for multiple
-						* interfaces
-						*/
-	int          *ksnd_keepalive;          /* # secs for sending keepalive
-						* NOOP
-						*/
-	int          *ksnd_keepalive_idle;     /* # idle secs before 1st probe
-						*/
-	int          *ksnd_keepalive_count;    /* # probes */
-	int          *ksnd_keepalive_intvl;    /* time between probes */
-	int          *ksnd_credits;            /* # concurrent sends */
-	int          *ksnd_peertxcredits;      /* # concurrent sends to 1 peer
-						*/
-	int          *ksnd_peerrtrcredits;     /* # per-peer router buffer
-						* credits
-						*/
-	int          *ksnd_peertimeout;        /* seconds to consider peer dead
-						*/
-	int          *ksnd_enable_csum;        /* enable check sum */
-	int          *ksnd_inject_csum_error;  /* set non-zero to inject
-						* checksum error
-						*/
-	int          *ksnd_nonblk_zcack;       /* always send zc-ack on
-						* non-blocking connection
-						*/
-	unsigned int *ksnd_zc_min_payload;     /* minimum zero copy payload
-						* size
-						*/
-	int          *ksnd_zc_recv;            /* enable ZC receive (for
-						* Chelsio TOE)
-						*/
-	int          *ksnd_zc_recv_min_nfrags; /* minimum # of fragments to
-						* enable ZC receive
-						*/
-};
-
-struct ksock_net {
-	__u64		  ksnn_incarnation;	/* my epoch */
-	spinlock_t	  ksnn_lock;		/* serialise */
-	struct list_head	  ksnn_list;		/* chain on global list */
-	int		  ksnn_npeers;		/* # peers */
-	int		  ksnn_shutdown;	/* shutting down? */
-	int		  ksnn_ninterfaces;	/* IP interfaces */
-	struct ksock_interface ksnn_interfaces[LNET_MAX_INTERFACES];
-};
-
-/** connd timeout */
-#define SOCKNAL_CONND_TIMEOUT  120
-/** reserved thread for accepting & creating new connd */
-#define SOCKNAL_CONND_RESV     1
-
-struct ksock_nal_data {
-	int                     ksnd_init;              /* initialisation state
-							 */
-	int                     ksnd_nnets;             /* # networks set up */
-	struct list_head        ksnd_nets;              /* list of nets */
-	rwlock_t                ksnd_global_lock;       /* stabilize peer/conn
-							 * ops
-							 */
-	struct list_head        *ksnd_peers;            /* hash table of all my
-							 * known peers
-							 */
-	int                     ksnd_peer_hash_size;    /* size of ksnd_peers */
-
-	int                     ksnd_nthreads;          /* # live threads */
-	int                     ksnd_shuttingdown;      /* tell threads to exit
-							 */
-	struct ksock_sched_info **ksnd_sched_info;      /* schedulers info */
-
-	atomic_t                ksnd_nactive_txs;       /* #active txs */
-
-	struct list_head        ksnd_deathrow_conns;    /* conns to close:
-							 * reaper_lock
-							 */
-	struct list_head        ksnd_zombie_conns;      /* conns to free:
-							 * reaper_lock
-							 */
-	struct list_head        ksnd_enomem_conns;      /* conns to retry:
-							 * reaper_lock
-							 */
-	wait_queue_head_t       ksnd_reaper_waitq;      /* reaper sleeps here */
-	unsigned long	        ksnd_reaper_waketime;   /* when reaper will wake
-							 */
-	spinlock_t              ksnd_reaper_lock;       /* serialise */
-
-	int                     ksnd_enomem_tx;         /* test ENOMEM sender */
-	int                     ksnd_stall_tx;          /* test sluggish sender
-							 */
-	int                     ksnd_stall_rx;          /* test sluggish
-							 * receiver
-							 */
-	struct list_head        ksnd_connd_connreqs;    /* incoming connection
-							 * requests
-							 */
-	struct list_head        ksnd_connd_routes;      /* routes waiting to be
-							 * connected
-							 */
-	wait_queue_head_t       ksnd_connd_waitq;       /* connds sleep here */
-	int                     ksnd_connd_connecting;  /* # connds connecting
-							 */
-	time64_t                ksnd_connd_failed_stamp;/* time stamp of the
-							 * last failed
-							 * connecting attempt
-							 */
-	time64_t                ksnd_connd_starting_stamp;/* time stamp of the
-							   * last starting connd
-							   */
-	unsigned int		ksnd_connd_starting;	/* # starting connd */
-	unsigned int		ksnd_connd_running;	/* # running connd */
-	spinlock_t              ksnd_connd_lock;        /* serialise */
-
-	struct list_head        ksnd_idle_noop_txs;     /* list head for freed
-							 * noop tx
-							 */
-	spinlock_t              ksnd_tx_lock;           /* serialise, g_lock
-							 * unsafe
-							 */
-};
-
-#define SOCKNAL_INIT_NOTHING 0
-#define SOCKNAL_INIT_DATA    1
-#define SOCKNAL_INIT_ALL     2
-
-/*
- * A packet just assembled for transmission is represented by 1 or more
- * struct iovec fragments (the first frag contains the portals header),
- * followed by 0 or more struct bio_vec fragments.
- *
- * On the receive side, initially 1 struct iovec fragment is posted for
- * receive (the header).  Once the header has been received, the payload is
- * received into either struct iovec or struct bio_vec fragments, depending on
- * what the header matched or whether the message needs forwarding.
- */
-struct ksock_conn;  /* forward ref */
-struct ksock_peer;  /* forward ref */
-struct ksock_route; /* forward ref */
-struct ksock_proto; /* forward ref */
-
-struct ksock_tx {			   /* transmit packet */
-	struct list_head  tx_list;         /* queue on conn for transmission etc
-					    */
-	struct list_head  tx_zc_list;      /* queue on peer for ZC request */
-	atomic_t          tx_refcount;     /* tx reference count */
-	int               tx_nob;          /* # packet bytes */
-	int               tx_resid;        /* residual bytes */
-	int               tx_niov;         /* # packet iovec frags */
-	struct kvec       *tx_iov;         /* packet iovec frags */
-	int               tx_nkiov;        /* # packet page frags */
-	unsigned short    tx_zc_aborted;   /* aborted ZC request */
-	unsigned short    tx_zc_capable:1; /* payload is large enough for ZC */
-	unsigned short    tx_zc_checked:1; /* Have I checked if I should ZC? */
-	unsigned short    tx_nonblk:1;     /* it's a non-blocking ACK */
-	struct bio_vec	  *tx_kiov;	   /* packet page frags */
-	struct ksock_conn *tx_conn;        /* owning conn */
-	struct lnet_msg        *tx_lnetmsg;     /* lnet message for lnet_finalize()
-					    */
-	unsigned long     tx_deadline;     /* when (in jiffies) tx times out */
-	struct ksock_msg       tx_msg;          /* socklnd message buffer */
-	int               tx_desc_size;    /* size of this descriptor */
-	union {
-		struct {
-			struct kvec iov;     /* virt hdr */
-			struct bio_vec kiov[0]; /* paged payload */
-		} paged;
-		struct {
-			struct kvec iov[1];  /* virt hdr + payload */
-		} virt;
-	} tx_frags;
-};
-
-#define KSOCK_NOOP_TX_SIZE (offsetof(struct ksock_tx, tx_frags.paged.kiov[0]))
-
-/* network zero copy callback descriptor embedded in struct ksock_tx */
-
-#define SOCKNAL_RX_KSM_HEADER   1 /* reading ksock message header */
-#define SOCKNAL_RX_LNET_HEADER  2 /* reading lnet message header */
-#define SOCKNAL_RX_PARSE        3 /* Calling lnet_parse() */
-#define SOCKNAL_RX_PARSE_WAIT   4 /* waiting to be told to read the body */
-#define SOCKNAL_RX_LNET_PAYLOAD 5 /* reading lnet payload (to deliver here) */
-#define SOCKNAL_RX_SLOP         6 /* skipping body */
-
-struct ksock_conn {
-	struct ksock_peer  *ksnc_peer;        /* owning peer */
-	struct ksock_route *ksnc_route;       /* owning route */
-	struct list_head   ksnc_list;         /* stash on peer's conn list */
-	struct socket      *ksnc_sock;        /* actual socket */
-	void               *ksnc_saved_data_ready;  /* socket's original
-						     * data_ready() callback
-						     */
-	void               *ksnc_saved_write_space; /* socket's original
-						     * write_space() callback
-						     */
-	atomic_t           ksnc_conn_refcount;/* conn refcount */
-	atomic_t           ksnc_sock_refcount;/* sock refcount */
-	struct ksock_sched *ksnc_scheduler;	/* who schedules this connection
-						 */
-	__u32              ksnc_myipaddr;     /* my IP */
-	__u32              ksnc_ipaddr;       /* peer's IP */
-	int                ksnc_port;         /* peer's port */
-	signed int         ksnc_type:3;       /* type of connection, should be
-					       * signed value
-					       */
-	unsigned int       ksnc_closing:1;    /* being shut down */
-	unsigned int       ksnc_flip:1;       /* flip or not, only for V2.x */
-	unsigned int       ksnc_zc_capable:1; /* enable to ZC */
-	struct ksock_proto *ksnc_proto;       /* protocol for the connection */
-
-	/* reader */
-	struct list_head   ksnc_rx_list;      /* where I enq waiting input or a
-					       * forwarding descriptor
-					       */
-	unsigned long      ksnc_rx_deadline;  /* when (in jiffies) receive times
-					       * out
-					       */
-	__u8               ksnc_rx_started;   /* started receiving a message */
-	__u8               ksnc_rx_ready;     /* data ready to read */
-	__u8               ksnc_rx_scheduled; /* being progressed */
-	__u8               ksnc_rx_state;     /* what is being read */
-	int                ksnc_rx_nob_left;  /* # bytes to next hdr/body */
-	struct iov_iter    ksnc_rx_to;		/* copy destination */
-	struct kvec        ksnc_rx_iov_space[LNET_MAX_IOV]; /* space for frag descriptors */
-	__u32              ksnc_rx_csum;      /* partial checksum for incoming
-					       * data
-					       */
-	void               *ksnc_cookie;      /* rx lnet_finalize passthru arg
-					       */
-	struct ksock_msg        ksnc_msg;          /* incoming message buffer:
-					       * V2.x message takes the
-					       * whole struct
-					       * V1.x message is a bare
-					       * struct lnet_hdr, it's stored in
-					       * ksnc_msg.ksm_u.lnetmsg
-					       */
-	/* WRITER */
-	struct list_head   ksnc_tx_list;      /* where I enq waiting for output
-					       * space
-					       */
-	struct list_head   ksnc_tx_queue;     /* packets waiting to be sent */
-	struct ksock_tx	  *ksnc_tx_carrier;   /* next TX that can carry a LNet
-					       * message or ZC-ACK
-					       */
-	unsigned long      ksnc_tx_deadline;  /* when (in jiffies) tx times out
-					       */
-	int                ksnc_tx_bufnob;    /* send buffer marker */
-	atomic_t           ksnc_tx_nob;       /* # bytes queued */
-	int		   ksnc_tx_ready;     /* write space */
-	int		   ksnc_tx_scheduled; /* being progressed */
-	unsigned long      ksnc_tx_last_post; /* time stamp of the last posted
-					       * TX
-					       */
-};
-
-struct ksock_route {
-	struct list_head  ksnr_list;           /* chain on peer route list */
-	struct list_head  ksnr_connd_list;     /* chain on ksnr_connd_routes */
-	struct ksock_peer *ksnr_peer;          /* owning peer */
-	atomic_t          ksnr_refcount;       /* # users */
-	unsigned long     ksnr_timeout;        /* when (in jiffies) reconnection
-						* can happen next
-						*/
-	long              ksnr_retry_interval; /* how long between retries */
-	__u32             ksnr_myipaddr;       /* my IP */
-	__u32             ksnr_ipaddr;         /* IP address to connect to */
-	int               ksnr_port;           /* port to connect to */
-	unsigned int      ksnr_scheduled:1;    /* scheduled for attention */
-	unsigned int      ksnr_connecting:1;   /* connection establishment in
-						* progress
-						*/
-	unsigned int      ksnr_connected:4;    /* connections established by
-						* type
-						*/
-	unsigned int      ksnr_deleted:1;      /* been removed from peer? */
-	unsigned int      ksnr_share_count;    /* created explicitly? */
-	int               ksnr_conn_count;     /* # conns established by this
-						* route
-						*/
-};
-
-#define SOCKNAL_KEEPALIVE_PING 1 /* cookie for keepalive ping */
-
-struct ksock_peer {
-	struct list_head   ksnp_list;           /* stash on global peer list */
-	unsigned long      ksnp_last_alive;     /* when (in jiffies) I was last
-						 * alive
-						 */
-	struct lnet_process_id  ksnp_id;	/* who's on the other end(s) */
-	atomic_t           ksnp_refcount;       /* # users */
-	int                ksnp_sharecount;     /* lconf usage counter */
-	int                ksnp_closing;        /* being closed */
-	int                ksnp_accepting;      /* # passive connections pending
-						 */
-	int                ksnp_error;          /* errno on closing last conn */
-	__u64              ksnp_zc_next_cookie; /* ZC completion cookie */
-	__u64              ksnp_incarnation;    /* latest known peer incarnation
-						 */
-	struct ksock_proto *ksnp_proto;         /* latest known peer protocol */
-	struct list_head   ksnp_conns;          /* all active connections */
-	struct list_head   ksnp_routes;         /* routes */
-	struct list_head   ksnp_tx_queue;       /* waiting packets */
-	spinlock_t         ksnp_lock;           /* serialize, g_lock unsafe */
-	struct list_head   ksnp_zc_req_list;    /* zero copy requests wait for
-						 * ACK
-						 */
-	unsigned long      ksnp_send_keepalive; /* time to send keepalive */
-	struct lnet_ni	   *ksnp_ni;		/* which network */
-	int                ksnp_n_passive_ips;  /* # of... */
-
-	/* preferred local interfaces */
-	__u32              ksnp_passive_ips[LNET_MAX_INTERFACES];
-};
-
-struct ksock_connreq {
-	struct list_head ksncr_list;  /* stash on ksnd_connd_connreqs */
-	struct lnet_ni	 *ksncr_ni;	/* chosen NI */
-	struct socket    *ksncr_sock; /* accepted socket */
-};
-
-extern struct ksock_nal_data ksocknal_data;
-extern struct ksock_tunables ksocknal_tunables;
-
-#define SOCKNAL_MATCH_NO  0 /* TX can't match type of connection */
-#define SOCKNAL_MATCH_YES 1 /* TX matches type of connection */
-#define SOCKNAL_MATCH_MAY 2 /* TX can be sent on the connection, but not
-			     * preferred
-			     */
-
-struct ksock_proto {
-	/* version number of protocol */
-	int        pro_version;
-
-	/* handshake function */
-	int        (*pro_send_hello)(struct ksock_conn *, struct ksock_hello_msg *);
-
-	/* handshake function */
-	int        (*pro_recv_hello)(struct ksock_conn *, struct ksock_hello_msg *, int);
-
-	/* message pack */
-	void       (*pro_pack)(struct ksock_tx *);
-
-	/* message unpack */
-	void       (*pro_unpack)(struct ksock_msg *);
-
-	/* queue tx on the connection */
-	struct ksock_tx *(*pro_queue_tx_msg)(struct ksock_conn *, struct ksock_tx *);
-
-	/* queue ZC ack on the connection */
-	int        (*pro_queue_tx_zcack)(struct ksock_conn *, struct ksock_tx *, __u64);
-
-	/* handle ZC request */
-	int        (*pro_handle_zcreq)(struct ksock_conn *, __u64, int);
-
-	/* handle ZC ACK */
-	int        (*pro_handle_zcack)(struct ksock_conn *, __u64, __u64);
-
-	/*
-	 * msg type matches the connection type:
-	 * return value:
-	 *   return MATCH_NO  : no
-	 *   return MATCH_YES : matching type
-	 *   return MATCH_MAY : can be backup
-	 */
-	int        (*pro_match_tx)(struct ksock_conn *, struct ksock_tx *, int);
-};
-
-extern struct ksock_proto ksocknal_protocol_v1x;
-extern struct ksock_proto ksocknal_protocol_v2x;
-extern struct ksock_proto ksocknal_protocol_v3x;
-
-#define KSOCK_PROTO_V1_MAJOR LNET_PROTO_TCP_VERSION_MAJOR
-#define KSOCK_PROTO_V1_MINOR LNET_PROTO_TCP_VERSION_MINOR
-#define KSOCK_PROTO_V1       KSOCK_PROTO_V1_MAJOR
-
-#ifndef CPU_MASK_NONE
-#define CPU_MASK_NONE   0UL
-#endif
-
-static inline int
-ksocknal_route_mask(void)
-{
-	if (!*ksocknal_tunables.ksnd_typed_conns)
-		return (1 << SOCKLND_CONN_ANY);
-
-	return ((1 << SOCKLND_CONN_CONTROL) |
-		(1 << SOCKLND_CONN_BULK_IN) |
-		(1 << SOCKLND_CONN_BULK_OUT));
-}
-
-static inline struct list_head *
-ksocknal_nid2peerlist(lnet_nid_t nid)
-{
-	unsigned int hash = ((unsigned int)nid) % ksocknal_data.ksnd_peer_hash_size;
-
-	return &ksocknal_data.ksnd_peers[hash];
-}
-
-static inline void
-ksocknal_conn_addref(struct ksock_conn *conn)
-{
-	LASSERT(atomic_read(&conn->ksnc_conn_refcount) > 0);
-	atomic_inc(&conn->ksnc_conn_refcount);
-}
-
-void ksocknal_queue_zombie_conn(struct ksock_conn *conn);
-void ksocknal_finalize_zcreq(struct ksock_conn *conn);
-
-static inline void
-ksocknal_conn_decref(struct ksock_conn *conn)
-{
-	LASSERT(atomic_read(&conn->ksnc_conn_refcount) > 0);
-	if (atomic_dec_and_test(&conn->ksnc_conn_refcount))
-		ksocknal_queue_zombie_conn(conn);
-}
-
-static inline int
-ksocknal_connsock_addref(struct ksock_conn *conn)
-{
-	int rc = -ESHUTDOWN;
-
-	read_lock(&ksocknal_data.ksnd_global_lock);
-	if (!conn->ksnc_closing) {
-		LASSERT(atomic_read(&conn->ksnc_sock_refcount) > 0);
-		atomic_inc(&conn->ksnc_sock_refcount);
-		rc = 0;
-	}
-	read_unlock(&ksocknal_data.ksnd_global_lock);
-
-	return rc;
-}
-
-static inline void
-ksocknal_connsock_decref(struct ksock_conn *conn)
-{
-	LASSERT(atomic_read(&conn->ksnc_sock_refcount) > 0);
-	if (atomic_dec_and_test(&conn->ksnc_sock_refcount)) {
-		LASSERT(conn->ksnc_closing);
-		sock_release(conn->ksnc_sock);
-		conn->ksnc_sock = NULL;
-		ksocknal_finalize_zcreq(conn);
-	}
-}
-
-static inline void
-ksocknal_tx_addref(struct ksock_tx *tx)
-{
-	LASSERT(atomic_read(&tx->tx_refcount) > 0);
-	atomic_inc(&tx->tx_refcount);
-}
-
-void ksocknal_tx_prep(struct ksock_conn *, struct ksock_tx *tx);
-void ksocknal_tx_done(struct lnet_ni *ni, struct ksock_tx *tx);
-
-static inline void
-ksocknal_tx_decref(struct ksock_tx *tx)
-{
-	LASSERT(atomic_read(&tx->tx_refcount) > 0);
-	if (atomic_dec_and_test(&tx->tx_refcount))
-		ksocknal_tx_done(NULL, tx);
-}
-
-static inline void
-ksocknal_route_addref(struct ksock_route *route)
-{
-	LASSERT(atomic_read(&route->ksnr_refcount) > 0);
-	atomic_inc(&route->ksnr_refcount);
-}
-
-void ksocknal_destroy_route(struct ksock_route *route);
-
-static inline void
-ksocknal_route_decref(struct ksock_route *route)
-{
-	LASSERT(atomic_read(&route->ksnr_refcount) > 0);
-	if (atomic_dec_and_test(&route->ksnr_refcount))
-		ksocknal_destroy_route(route);
-}
-
-static inline void
-ksocknal_peer_addref(struct ksock_peer *peer)
-{
-	LASSERT(atomic_read(&peer->ksnp_refcount) > 0);
-	atomic_inc(&peer->ksnp_refcount);
-}
-
-void ksocknal_destroy_peer(struct ksock_peer *peer);
-
-static inline void
-ksocknal_peer_decref(struct ksock_peer *peer)
-{
-	LASSERT(atomic_read(&peer->ksnp_refcount) > 0);
-	if (atomic_dec_and_test(&peer->ksnp_refcount))
-		ksocknal_destroy_peer(peer);
-}
-
-int ksocknal_startup(struct lnet_ni *ni);
-void ksocknal_shutdown(struct lnet_ni *ni);
-int ksocknal_ctl(struct lnet_ni *ni, unsigned int cmd, void *arg);
-int ksocknal_send(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg);
-int ksocknal_recv(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg,
-		  int delayed, struct iov_iter *to, unsigned int rlen);
-int ksocknal_accept(struct lnet_ni *ni, struct socket *sock);
-
-int ksocknal_add_peer(struct lnet_ni *ni, struct lnet_process_id id, __u32 ip,
-		      int port);
-struct ksock_peer *ksocknal_find_peer_locked(struct lnet_ni *ni,
-					     struct lnet_process_id id);
-struct ksock_peer *ksocknal_find_peer(struct lnet_ni *ni,
-				      struct lnet_process_id id);
-void ksocknal_peer_failed(struct ksock_peer *peer);
-int ksocknal_create_conn(struct lnet_ni *ni, struct ksock_route *route,
-			 struct socket *sock, int type);
-void ksocknal_close_conn_locked(struct ksock_conn *conn, int why);
-void ksocknal_terminate_conn(struct ksock_conn *conn);
-void ksocknal_destroy_conn(struct ksock_conn *conn);
-int  ksocknal_close_peer_conns_locked(struct ksock_peer *peer,
-				      __u32 ipaddr, int why);
-int ksocknal_close_conn_and_siblings(struct ksock_conn *conn, int why);
-int ksocknal_close_matching_conns(struct lnet_process_id id, __u32 ipaddr);
-struct ksock_conn *ksocknal_find_conn_locked(struct ksock_peer *peer,
-					     struct ksock_tx *tx, int nonblk);
-
-int  ksocknal_launch_packet(struct lnet_ni *ni, struct ksock_tx *tx,
-			    struct lnet_process_id id);
-struct ksock_tx *ksocknal_alloc_tx(int type, int size);
-void ksocknal_free_tx(struct ksock_tx *tx);
-struct ksock_tx *ksocknal_alloc_tx_noop(__u64 cookie, int nonblk);
-void ksocknal_next_tx_carrier(struct ksock_conn *conn);
-void ksocknal_queue_tx_locked(struct ksock_tx *tx, struct ksock_conn *conn);
-void ksocknal_txlist_done(struct lnet_ni *ni, struct list_head *txlist, int error);
-void ksocknal_notify(struct lnet_ni *ni, lnet_nid_t gw_nid, int alive);
-void ksocknal_query(struct lnet_ni *ni, lnet_nid_t nid, unsigned long *when);
-int ksocknal_thread_start(int (*fn)(void *arg), void *arg, char *name);
-void ksocknal_thread_fini(void);
-void ksocknal_launch_all_connections_locked(struct ksock_peer *peer);
-struct ksock_route *ksocknal_find_connectable_route_locked(struct ksock_peer *peer);
-struct ksock_route *ksocknal_find_connecting_route_locked(struct ksock_peer *peer);
-int ksocknal_new_packet(struct ksock_conn *conn, int skip);
-int ksocknal_scheduler(void *arg);
-int ksocknal_connd(void *arg);
-int ksocknal_reaper(void *arg);
-int ksocknal_send_hello(struct lnet_ni *ni, struct ksock_conn *conn,
-			lnet_nid_t peer_nid, struct ksock_hello_msg *hello);
-int ksocknal_recv_hello(struct lnet_ni *ni, struct ksock_conn *conn,
-			struct ksock_hello_msg *hello,
-			struct lnet_process_id *id,
-			__u64 *incarnation);
-void ksocknal_read_callback(struct ksock_conn *conn);
-void ksocknal_write_callback(struct ksock_conn *conn);
-
-int ksocknal_lib_zc_capable(struct ksock_conn *conn);
-void ksocknal_lib_save_callback(struct socket *sock, struct ksock_conn *conn);
-void ksocknal_lib_set_callback(struct socket *sock,  struct ksock_conn *conn);
-void ksocknal_lib_reset_callback(struct socket *sock, struct ksock_conn *conn);
-void ksocknal_lib_push_conn(struct ksock_conn *conn);
-int ksocknal_lib_get_conn_addrs(struct ksock_conn *conn);
-int ksocknal_lib_setup_sock(struct socket *so);
-int ksocknal_lib_send_iov(struct ksock_conn *conn, struct ksock_tx *tx);
-int ksocknal_lib_send_kiov(struct ksock_conn *conn, struct ksock_tx *tx);
-void ksocknal_lib_eager_ack(struct ksock_conn *conn);
-int ksocknal_lib_recv(struct ksock_conn *conn);
-int ksocknal_lib_get_conn_tunables(struct ksock_conn *conn, int *txmem,
-				   int *rxmem, int *nagle);
-
-void ksocknal_read_callback(struct ksock_conn *conn);
-void ksocknal_write_callback(struct ksock_conn *conn);
-
-int ksocknal_tunables_init(void);
-
-void ksocknal_lib_csum_tx(struct ksock_tx *tx);
-
-int ksocknal_lib_memory_pressure(struct ksock_conn *conn);
-int ksocknal_lib_bind_thread_to_cpu(int id);
-
-#endif /* _SOCKLND_SOCKLND_H_ */

+ 0 - 2586
drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c

@@ -1,2586 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- *
- *   Author: Zach Brown <zab@zabbo.net>
- *   Author: Peter J. Braam <braam@clusterfs.com>
- *   Author: Phil Schwan <phil@clusterfs.com>
- *   Author: Eric Barton <eric@bartonsoftware.com>
- *
- *   This file is part of Portals, http://www.sf.net/projects/sandiaportals/
- *
- *   Portals is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
- *
- *   Portals is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- */
-
-#include <linux/sched/mm.h>
-#include "socklnd.h"
-
-struct ksock_tx *
-ksocknal_alloc_tx(int type, int size)
-{
-	struct ksock_tx *tx = NULL;
-
-	if (type == KSOCK_MSG_NOOP) {
-		LASSERT(size == KSOCK_NOOP_TX_SIZE);
-
-		/* searching for a noop tx in free list */
-		spin_lock(&ksocknal_data.ksnd_tx_lock);
-
-		if (!list_empty(&ksocknal_data.ksnd_idle_noop_txs)) {
-			tx = list_entry(ksocknal_data.ksnd_idle_noop_txs.next,
-					struct ksock_tx, tx_list);
-			LASSERT(tx->tx_desc_size == size);
-			list_del(&tx->tx_list);
-		}
-
-		spin_unlock(&ksocknal_data.ksnd_tx_lock);
-	}
-
-	if (!tx)
-		tx = kzalloc(size, GFP_NOFS);
-
-	if (!tx)
-		return NULL;
-
-	atomic_set(&tx->tx_refcount, 1);
-	tx->tx_zc_aborted = 0;
-	tx->tx_zc_capable = 0;
-	tx->tx_zc_checked = 0;
-	tx->tx_desc_size  = size;
-
-	atomic_inc(&ksocknal_data.ksnd_nactive_txs);
-
-	return tx;
-}
-
-struct ksock_tx *
-ksocknal_alloc_tx_noop(__u64 cookie, int nonblk)
-{
-	struct ksock_tx *tx;
-
-	tx = ksocknal_alloc_tx(KSOCK_MSG_NOOP, KSOCK_NOOP_TX_SIZE);
-	if (!tx) {
-		CERROR("Can't allocate noop tx desc\n");
-		return NULL;
-	}
-
-	tx->tx_conn    = NULL;
-	tx->tx_lnetmsg = NULL;
-	tx->tx_kiov    = NULL;
-	tx->tx_nkiov   = 0;
-	tx->tx_iov     = tx->tx_frags.virt.iov;
-	tx->tx_niov    = 1;
-	tx->tx_nonblk  = nonblk;
-
-	tx->tx_msg.ksm_csum = 0;
-	tx->tx_msg.ksm_type = KSOCK_MSG_NOOP;
-	tx->tx_msg.ksm_zc_cookies[0] = 0;
-	tx->tx_msg.ksm_zc_cookies[1] = cookie;
-
-	return tx;
-}
-
-void
-ksocknal_free_tx(struct ksock_tx *tx)
-{
-	atomic_dec(&ksocknal_data.ksnd_nactive_txs);
-
-	if (!tx->tx_lnetmsg && tx->tx_desc_size == KSOCK_NOOP_TX_SIZE) {
-		/* it's a noop tx */
-		spin_lock(&ksocknal_data.ksnd_tx_lock);
-
-		list_add(&tx->tx_list, &ksocknal_data.ksnd_idle_noop_txs);
-
-		spin_unlock(&ksocknal_data.ksnd_tx_lock);
-	} else {
-		kfree(tx);
-	}
-}
-
-static int
-ksocknal_send_iov(struct ksock_conn *conn, struct ksock_tx *tx)
-{
-	struct kvec *iov = tx->tx_iov;
-	int nob;
-	int rc;
-
-	LASSERT(tx->tx_niov > 0);
-
-	/* Never touch tx->tx_iov inside ksocknal_lib_send_iov() */
-	rc = ksocknal_lib_send_iov(conn, tx);
-
-	if (rc <= 0)			    /* sent nothing? */
-		return rc;
-
-	nob = rc;
-	LASSERT(nob <= tx->tx_resid);
-	tx->tx_resid -= nob;
-
-	/* "consume" iov */
-	do {
-		LASSERT(tx->tx_niov > 0);
-
-		if (nob < (int)iov->iov_len) {
-			iov->iov_base = (void *)((char *)iov->iov_base + nob);
-			iov->iov_len -= nob;
-			return rc;
-		}
-
-		nob -= iov->iov_len;
-		tx->tx_iov = ++iov;
-		tx->tx_niov--;
-	} while (nob);
-
-	return rc;
-}
-
-static int
-ksocknal_send_kiov(struct ksock_conn *conn, struct ksock_tx *tx)
-{
-	struct bio_vec *kiov = tx->tx_kiov;
-	int nob;
-	int rc;
-
-	LASSERT(!tx->tx_niov);
-	LASSERT(tx->tx_nkiov > 0);
-
-	/* Never touch tx->tx_kiov inside ksocknal_lib_send_kiov() */
-	rc = ksocknal_lib_send_kiov(conn, tx);
-
-	if (rc <= 0)			    /* sent nothing? */
-		return rc;
-
-	nob = rc;
-	LASSERT(nob <= tx->tx_resid);
-	tx->tx_resid -= nob;
-
-	/* "consume" kiov */
-	do {
-		LASSERT(tx->tx_nkiov > 0);
-
-		if (nob < (int)kiov->bv_len) {
-			kiov->bv_offset += nob;
-			kiov->bv_len -= nob;
-			return rc;
-		}
-
-		nob -= (int)kiov->bv_len;
-		tx->tx_kiov = ++kiov;
-		tx->tx_nkiov--;
-	} while (nob);
-
-	return rc;
-}
-
-static int
-ksocknal_transmit(struct ksock_conn *conn, struct ksock_tx *tx)
-{
-	int rc;
-	int bufnob;
-
-	if (ksocknal_data.ksnd_stall_tx) {
-		set_current_state(TASK_UNINTERRUPTIBLE);
-		schedule_timeout(ksocknal_data.ksnd_stall_tx * HZ);
-	}
-
-	LASSERT(tx->tx_resid);
-
-	rc = ksocknal_connsock_addref(conn);
-	if (rc) {
-		LASSERT(conn->ksnc_closing);
-		return -ESHUTDOWN;
-	}
-
-	do {
-		if (ksocknal_data.ksnd_enomem_tx > 0) {
-			/* testing... */
-			ksocknal_data.ksnd_enomem_tx--;
-			rc = -EAGAIN;
-		} else if (tx->tx_niov) {
-			rc = ksocknal_send_iov(conn, tx);
-		} else {
-			rc = ksocknal_send_kiov(conn, tx);
-		}
-
-		bufnob = conn->ksnc_sock->sk->sk_wmem_queued;
-		if (rc > 0)		     /* sent something? */
-			conn->ksnc_tx_bufnob += rc; /* account it */
-
-		if (bufnob < conn->ksnc_tx_bufnob) {
-			/*
-			 * allocated send buffer bytes < computed; infer
-			 * something got ACKed
-			 */
-			conn->ksnc_tx_deadline =
-				jiffies + *ksocknal_tunables.ksnd_timeout * HZ;
-			conn->ksnc_peer->ksnp_last_alive = jiffies;
-			conn->ksnc_tx_bufnob = bufnob;
-			mb();
-		}
-
-		if (rc <= 0) { /* Didn't write anything? */
-
-			if (!rc) /* some stacks return 0 instead of -EAGAIN */
-				rc = -EAGAIN;
-
-			/* Check if EAGAIN is due to memory pressure */
-			if (rc == -EAGAIN && ksocknal_lib_memory_pressure(conn))
-				rc = -ENOMEM;
-
-			break;
-		}
-
-		/* socket's wmem_queued now includes 'rc' bytes */
-		atomic_sub(rc, &conn->ksnc_tx_nob);
-		rc = 0;
-
-	} while (tx->tx_resid);
-
-	ksocknal_connsock_decref(conn);
-	return rc;
-}
-
-static int
-ksocknal_recv_iter(struct ksock_conn *conn)
-{
-	int nob;
-	int rc;
-
-	/*
-	 * Never touch conn->ksnc_rx_to or change connection
-	 * status inside ksocknal_lib_recv
-	 */
-	rc = ksocknal_lib_recv(conn);
-
-	if (rc <= 0)
-		return rc;
-
-	/* received something... */
-	nob = rc;
-
-	conn->ksnc_peer->ksnp_last_alive = jiffies;
-	conn->ksnc_rx_deadline =
-		jiffies + *ksocknal_tunables.ksnd_timeout * HZ;
-	mb();		       /* order with setting rx_started */
-	conn->ksnc_rx_started = 1;
-
-	conn->ksnc_rx_nob_left -= nob;
-
-	iov_iter_advance(&conn->ksnc_rx_to, nob);
-	if (iov_iter_count(&conn->ksnc_rx_to))
-		return -EAGAIN;
-
-	return 1;
-}
-
-static int
-ksocknal_receive(struct ksock_conn *conn)
-{
-	/*
-	 * Return 1 on success, 0 on EOF, < 0 on error.
-	 * Caller checks ksnc_rx_to to determine
-	 * progress/completion.
-	 */
-	int rc;
-
-	if (ksocknal_data.ksnd_stall_rx) {
-		set_current_state(TASK_UNINTERRUPTIBLE);
-		schedule_timeout(ksocknal_data.ksnd_stall_rx * HZ);
-	}
-
-	rc = ksocknal_connsock_addref(conn);
-	if (rc) {
-		LASSERT(conn->ksnc_closing);
-		return -ESHUTDOWN;
-	}
-
-	for (;;) {
-		rc = ksocknal_recv_iter(conn);
-		if (rc <= 0) {
-			/* error/EOF or partial receive */
-			if (rc == -EAGAIN) {
-				rc = 1;
-			} else if (!rc && conn->ksnc_rx_started) {
-				/* EOF in the middle of a message */
-				rc = -EPROTO;
-			}
-			break;
-		}
-
-		/* Completed a fragment */
-
-		if (!iov_iter_count(&conn->ksnc_rx_to)) {
-			rc = 1;
-			break;
-		}
-	}
-
-	ksocknal_connsock_decref(conn);
-	return rc;
-}
-
-void
-ksocknal_tx_done(struct lnet_ni *ni, struct ksock_tx *tx)
-{
-	struct lnet_msg *lnetmsg = tx->tx_lnetmsg;
-	int rc = (!tx->tx_resid && !tx->tx_zc_aborted) ? 0 : -EIO;
-
-	LASSERT(ni || tx->tx_conn);
-
-	if (tx->tx_conn)
-		ksocknal_conn_decref(tx->tx_conn);
-
-	if (!ni && tx->tx_conn)
-		ni = tx->tx_conn->ksnc_peer->ksnp_ni;
-
-	ksocknal_free_tx(tx);
-	if (lnetmsg) /* KSOCK_MSG_NOOP go without lnetmsg */
-		lnet_finalize(ni, lnetmsg, rc);
-}
-
-void
-ksocknal_txlist_done(struct lnet_ni *ni, struct list_head *txlist, int error)
-{
-	struct ksock_tx *tx;
-
-	while (!list_empty(txlist)) {
-		tx = list_entry(txlist->next, struct ksock_tx, tx_list);
-
-		if (error && tx->tx_lnetmsg) {
-			CNETERR("Deleting packet type %d len %d %s->%s\n",
-				le32_to_cpu(tx->tx_lnetmsg->msg_hdr.type),
-				le32_to_cpu(tx->tx_lnetmsg->msg_hdr.payload_length),
-				libcfs_nid2str(le64_to_cpu(tx->tx_lnetmsg->msg_hdr.src_nid)),
-				libcfs_nid2str(le64_to_cpu(tx->tx_lnetmsg->msg_hdr.dest_nid)));
-		} else if (error) {
-			CNETERR("Deleting noop packet\n");
-		}
-
-		list_del(&tx->tx_list);
-
-		LASSERT(atomic_read(&tx->tx_refcount) == 1);
-		ksocknal_tx_done(ni, tx);
-	}
-}
-
-static void
-ksocknal_check_zc_req(struct ksock_tx *tx)
-{
-	struct ksock_conn *conn = tx->tx_conn;
-	struct ksock_peer *peer = conn->ksnc_peer;
-
-	/*
-	 * Set tx_msg.ksm_zc_cookies[0] to a unique non-zero cookie and add tx
-	 * to ksnp_zc_req_list if some fragment of this message should be sent
-	 * zero-copy.  Our peer will send an ACK containing this cookie when
-	 * she has received this message to tell us we can signal completion.
-	 * tx_msg.ksm_zc_cookies[0] remains non-zero while tx is on
-	 * ksnp_zc_req_list.
-	 */
-	LASSERT(tx->tx_msg.ksm_type != KSOCK_MSG_NOOP);
-	LASSERT(tx->tx_zc_capable);
-
-	tx->tx_zc_checked = 1;
-
-	if (conn->ksnc_proto == &ksocknal_protocol_v1x ||
-	    !conn->ksnc_zc_capable)
-		return;
-
-	/*
-	 * assign cookie and queue tx to pending list, it will be released when
-	 * a matching ack is received. See ksocknal_handle_zcack()
-	 */
-	ksocknal_tx_addref(tx);
-
-	spin_lock(&peer->ksnp_lock);
-
-	/* ZC_REQ is going to be pinned to the peer */
-	tx->tx_deadline =
-		jiffies + *ksocknal_tunables.ksnd_timeout * HZ;
-
-	LASSERT(!tx->tx_msg.ksm_zc_cookies[0]);
-
-	tx->tx_msg.ksm_zc_cookies[0] = peer->ksnp_zc_next_cookie++;
-
-	if (!peer->ksnp_zc_next_cookie)
-		peer->ksnp_zc_next_cookie = SOCKNAL_KEEPALIVE_PING + 1;
-
-	list_add_tail(&tx->tx_zc_list, &peer->ksnp_zc_req_list);
-
-	spin_unlock(&peer->ksnp_lock);
-}
-
-static void
-ksocknal_uncheck_zc_req(struct ksock_tx *tx)
-{
-	struct ksock_peer *peer = tx->tx_conn->ksnc_peer;
-
-	LASSERT(tx->tx_msg.ksm_type != KSOCK_MSG_NOOP);
-	LASSERT(tx->tx_zc_capable);
-
-	tx->tx_zc_checked = 0;
-
-	spin_lock(&peer->ksnp_lock);
-
-	if (!tx->tx_msg.ksm_zc_cookies[0]) {
-		/* Not waiting for an ACK */
-		spin_unlock(&peer->ksnp_lock);
-		return;
-	}
-
-	tx->tx_msg.ksm_zc_cookies[0] = 0;
-	list_del(&tx->tx_zc_list);
-
-	spin_unlock(&peer->ksnp_lock);
-
-	ksocknal_tx_decref(tx);
-}
-
-static int
-ksocknal_process_transmit(struct ksock_conn *conn, struct ksock_tx *tx)
-{
-	int rc;
-
-	if (tx->tx_zc_capable && !tx->tx_zc_checked)
-		ksocknal_check_zc_req(tx);
-
-	rc = ksocknal_transmit(conn, tx);
-
-	CDEBUG(D_NET, "send(%d) %d\n", tx->tx_resid, rc);
-
-	if (!tx->tx_resid) {
-		/* Sent everything OK */
-		LASSERT(!rc);
-
-		return 0;
-	}
-
-	if (rc == -EAGAIN)
-		return rc;
-
-	if (rc == -ENOMEM) {
-		static int counter;
-
-		counter++;   /* exponential backoff warnings */
-		if ((counter & (-counter)) == counter)
-			CWARN("%u ENOMEM tx %p\n", counter, conn);
-
-		/* Queue on ksnd_enomem_conns for retry after a timeout */
-		spin_lock_bh(&ksocknal_data.ksnd_reaper_lock);
-
-		/* enomem list takes over scheduler's ref... */
-		LASSERT(conn->ksnc_tx_scheduled);
-		list_add_tail(&conn->ksnc_tx_list,
-			      &ksocknal_data.ksnd_enomem_conns);
-		if (!time_after_eq(jiffies + SOCKNAL_ENOMEM_RETRY,
-				   ksocknal_data.ksnd_reaper_waketime))
-			wake_up(&ksocknal_data.ksnd_reaper_waitq);
-
-		spin_unlock_bh(&ksocknal_data.ksnd_reaper_lock);
-		return rc;
-	}
-
-	/* Actual error */
-	LASSERT(rc < 0);
-
-	if (!conn->ksnc_closing) {
-		switch (rc) {
-		case -ECONNRESET:
-			LCONSOLE_WARN("Host %pI4h reset our connection while we were sending data; it may have rebooted.\n",
-				      &conn->ksnc_ipaddr);
-			break;
-		default:
-			LCONSOLE_WARN("There was an unexpected network error while writing to %pI4h: %d.\n",
-				      &conn->ksnc_ipaddr, rc);
-			break;
-		}
-		CDEBUG(D_NET, "[%p] Error %d on write to %s ip %pI4h:%d\n",
-		       conn, rc,
-		       libcfs_id2str(conn->ksnc_peer->ksnp_id),
-		       &conn->ksnc_ipaddr,
-		       conn->ksnc_port);
-	}
-
-	if (tx->tx_zc_checked)
-		ksocknal_uncheck_zc_req(tx);
-
-	/* it's not an error if conn is being closed */
-	ksocknal_close_conn_and_siblings(conn, (conn->ksnc_closing) ? 0 : rc);
-
-	return rc;
-}
-
-static void
-ksocknal_launch_connection_locked(struct ksock_route *route)
-{
-	/* called holding write lock on ksnd_global_lock */
-
-	LASSERT(!route->ksnr_scheduled);
-	LASSERT(!route->ksnr_connecting);
-	LASSERT(ksocknal_route_mask() & ~route->ksnr_connected);
-
-	route->ksnr_scheduled = 1;	      /* scheduling conn for connd */
-	ksocknal_route_addref(route);	   /* extra ref for connd */
-
-	spin_lock_bh(&ksocknal_data.ksnd_connd_lock);
-
-	list_add_tail(&route->ksnr_connd_list,
-		      &ksocknal_data.ksnd_connd_routes);
-	wake_up(&ksocknal_data.ksnd_connd_waitq);
-
-	spin_unlock_bh(&ksocknal_data.ksnd_connd_lock);
-}
-
-void
-ksocknal_launch_all_connections_locked(struct ksock_peer *peer)
-{
-	struct ksock_route *route;
-
-	/* called holding write lock on ksnd_global_lock */
-	for (;;) {
-		/* launch any/all connections that need it */
-		route = ksocknal_find_connectable_route_locked(peer);
-		if (!route)
-			return;
-
-		ksocknal_launch_connection_locked(route);
-	}
-}
-
-struct ksock_conn *
-ksocknal_find_conn_locked(struct ksock_peer *peer, struct ksock_tx *tx,
-			  int nonblk)
-{
-	struct list_head *tmp;
-	struct ksock_conn *conn;
-	struct ksock_conn *typed = NULL;
-	struct ksock_conn *fallback = NULL;
-	int tnob = 0;
-	int fnob = 0;
-
-	list_for_each(tmp, &peer->ksnp_conns) {
-		struct ksock_conn *c;
-		int nob, rc;
-
-		c = list_entry(tmp, struct ksock_conn, ksnc_list);
-		nob = atomic_read(&c->ksnc_tx_nob) +
-		      c->ksnc_sock->sk->sk_wmem_queued;
-
-		LASSERT(!c->ksnc_closing);
-		LASSERT(c->ksnc_proto &&
-			c->ksnc_proto->pro_match_tx);
-
-		rc = c->ksnc_proto->pro_match_tx(c, tx, nonblk);
-
-		switch (rc) {
-		default:
-			LBUG();
-		case SOCKNAL_MATCH_NO: /* protocol rejected the tx */
-			continue;
-
-		case SOCKNAL_MATCH_YES: /* typed connection */
-			if (!typed || tnob > nob ||
-			    (tnob == nob && *ksocknal_tunables.ksnd_round_robin &&
-			     time_after(typed->ksnc_tx_last_post, c->ksnc_tx_last_post))) {
-				typed = c;
-				tnob  = nob;
-			}
-			break;
-
-		case SOCKNAL_MATCH_MAY: /* fallback connection */
-			if (!fallback || fnob > nob ||
-			    (fnob == nob && *ksocknal_tunables.ksnd_round_robin &&
-			     time_after(fallback->ksnc_tx_last_post, c->ksnc_tx_last_post))) {
-				fallback = c;
-				fnob = nob;
-			}
-			break;
-		}
-	}
-
-	/* prefer the typed selection */
-	conn = (typed) ? typed : fallback;
-
-	if (conn)
-		conn->ksnc_tx_last_post = jiffies;
-
-	return conn;
-}
-
-void
-ksocknal_tx_prep(struct ksock_conn *conn, struct ksock_tx *tx)
-{
-	conn->ksnc_proto->pro_pack(tx);
-
-	atomic_add(tx->tx_nob, &conn->ksnc_tx_nob);
-	ksocknal_conn_addref(conn); /* +1 ref for tx */
-	tx->tx_conn = conn;
-}
-
-void
-ksocknal_queue_tx_locked(struct ksock_tx *tx, struct ksock_conn *conn)
-{
-	struct ksock_sched *sched = conn->ksnc_scheduler;
-	struct ksock_msg *msg = &tx->tx_msg;
-	struct ksock_tx *ztx = NULL;
-	int bufnob = 0;
-
-	/*
-	 * called holding global lock (read or irq-write) and caller may
-	 * not have dropped this lock between finding conn and calling me,
-	 * so we don't need the {get,put}connsock dance to deref
-	 * ksnc_sock...
-	 */
-	LASSERT(!conn->ksnc_closing);
-
-	CDEBUG(D_NET, "Sending to %s ip %pI4h:%d\n",
-	       libcfs_id2str(conn->ksnc_peer->ksnp_id),
-	       &conn->ksnc_ipaddr, conn->ksnc_port);
-
-	ksocknal_tx_prep(conn, tx);
-
-	/*
-	 * Ensure the frags we've been given EXACTLY match the number of
-	 * bytes we want to send.  Many TCP/IP stacks disregard any total
-	 * size parameters passed to them and just look at the frags.
-	 *
-	 * We always expect at least 1 mapped fragment containing the
-	 * complete ksocknal message header.
-	 */
-	LASSERT(lnet_iov_nob(tx->tx_niov, tx->tx_iov) +
-		lnet_kiov_nob(tx->tx_nkiov, tx->tx_kiov) ==
-		(unsigned int)tx->tx_nob);
-	LASSERT(tx->tx_niov >= 1);
-	LASSERT(tx->tx_resid == tx->tx_nob);
-
-	CDEBUG(D_NET, "Packet %p type %d, nob %d niov %d nkiov %d\n",
-	       tx, (tx->tx_lnetmsg) ? tx->tx_lnetmsg->msg_hdr.type :
-					      KSOCK_MSG_NOOP,
-	       tx->tx_nob, tx->tx_niov, tx->tx_nkiov);
-
-	/*
-	 * FIXME: SOCK_WMEM_QUEUED and SOCK_ERROR could block in __DARWIN8__
-	 * but they're used inside spinlocks a lot.
-	 */
-	bufnob = conn->ksnc_sock->sk->sk_wmem_queued;
-	spin_lock_bh(&sched->kss_lock);
-
-	if (list_empty(&conn->ksnc_tx_queue) && !bufnob) {
-		/* First packet starts the timeout */
-		conn->ksnc_tx_deadline =
-			jiffies + *ksocknal_tunables.ksnd_timeout * HZ;
-		if (conn->ksnc_tx_bufnob > 0) /* something got ACKed */
-			conn->ksnc_peer->ksnp_last_alive = jiffies;
-		conn->ksnc_tx_bufnob = 0;
-		mb(); /* order with adding to tx_queue */
-	}
-
-	if (msg->ksm_type == KSOCK_MSG_NOOP) {
-		/*
-		 * The packet is noop ZC ACK, try to piggyback the ack_cookie
-		 * on a normal packet so I don't need to send it
-		 */
-		LASSERT(msg->ksm_zc_cookies[1]);
-		LASSERT(conn->ksnc_proto->pro_queue_tx_zcack);
-
-		/* ZC ACK piggybacked on ztx release tx later */
-		if (conn->ksnc_proto->pro_queue_tx_zcack(conn, tx, 0))
-			ztx = tx;
-	} else {
-		/*
-		 * It's a normal packet - can it piggback a noop zc-ack that
-		 * has been queued already?
-		 */
-		LASSERT(!msg->ksm_zc_cookies[1]);
-		LASSERT(conn->ksnc_proto->pro_queue_tx_msg);
-
-		ztx = conn->ksnc_proto->pro_queue_tx_msg(conn, tx);
-		/* ztx will be released later */
-	}
-
-	if (ztx) {
-		atomic_sub(ztx->tx_nob, &conn->ksnc_tx_nob);
-		list_add_tail(&ztx->tx_list, &sched->kss_zombie_noop_txs);
-	}
-
-	if (conn->ksnc_tx_ready &&      /* able to send */
-	    !conn->ksnc_tx_scheduled) { /* not scheduled to send */
-		/* +1 ref for scheduler */
-		ksocknal_conn_addref(conn);
-		list_add_tail(&conn->ksnc_tx_list, &sched->kss_tx_conns);
-		conn->ksnc_tx_scheduled = 1;
-		wake_up(&sched->kss_waitq);
-	}
-
-	spin_unlock_bh(&sched->kss_lock);
-}
-
-struct ksock_route *
-ksocknal_find_connectable_route_locked(struct ksock_peer *peer)
-{
-	unsigned long now = jiffies;
-	struct list_head *tmp;
-	struct ksock_route *route;
-
-	list_for_each(tmp, &peer->ksnp_routes) {
-		route = list_entry(tmp, struct ksock_route, ksnr_list);
-
-		LASSERT(!route->ksnr_connecting || route->ksnr_scheduled);
-
-		/* connections being established */
-		if (route->ksnr_scheduled)
-			continue;
-
-		/* all route types connected ? */
-		if (!(ksocknal_route_mask() & ~route->ksnr_connected))
-			continue;
-
-		if (!(!route->ksnr_retry_interval || /* first attempt */
-		      time_after_eq(now, route->ksnr_timeout))) {
-			CDEBUG(D_NET,
-			       "Too soon to retry route %pI4h (cnted %d, interval %ld, %ld secs later)\n",
-			       &route->ksnr_ipaddr,
-			       route->ksnr_connected,
-			       route->ksnr_retry_interval,
-			       (route->ksnr_timeout - now) / HZ);
-			continue;
-		}
-
-		return route;
-	}
-
-	return NULL;
-}
-
-struct ksock_route *
-ksocknal_find_connecting_route_locked(struct ksock_peer *peer)
-{
-	struct list_head *tmp;
-	struct ksock_route *route;
-
-	list_for_each(tmp, &peer->ksnp_routes) {
-		route = list_entry(tmp, struct ksock_route, ksnr_list);
-
-		LASSERT(!route->ksnr_connecting || route->ksnr_scheduled);
-
-		if (route->ksnr_scheduled)
-			return route;
-	}
-
-	return NULL;
-}
-
-int
-ksocknal_launch_packet(struct lnet_ni *ni, struct ksock_tx *tx,
-		       struct lnet_process_id id)
-{
-	struct ksock_peer *peer;
-	struct ksock_conn *conn;
-	rwlock_t *g_lock;
-	int retry;
-	int rc;
-
-	LASSERT(!tx->tx_conn);
-
-	g_lock = &ksocknal_data.ksnd_global_lock;
-
-	for (retry = 0;; retry = 1) {
-		read_lock(g_lock);
-		peer = ksocknal_find_peer_locked(ni, id);
-		if (peer) {
-			if (!ksocknal_find_connectable_route_locked(peer)) {
-				conn = ksocknal_find_conn_locked(peer, tx, tx->tx_nonblk);
-				if (conn) {
-					/*
-					 * I've got no routes that need to be
-					 * connecting and I do have an actual
-					 * connection...
-					 */
-					ksocknal_queue_tx_locked(tx, conn);
-					read_unlock(g_lock);
-					return 0;
-				}
-			}
-		}
-
-		/* I'll need a write lock... */
-		read_unlock(g_lock);
-
-		write_lock_bh(g_lock);
-
-		peer = ksocknal_find_peer_locked(ni, id);
-		if (peer)
-			break;
-
-		write_unlock_bh(g_lock);
-
-		if (id.pid & LNET_PID_USERFLAG) {
-			CERROR("Refusing to create a connection to userspace process %s\n",
-			       libcfs_id2str(id));
-			return -EHOSTUNREACH;
-		}
-
-		if (retry) {
-			CERROR("Can't find peer %s\n", libcfs_id2str(id));
-			return -EHOSTUNREACH;
-		}
-
-		rc = ksocknal_add_peer(ni, id,
-				       LNET_NIDADDR(id.nid),
-				       lnet_acceptor_port());
-		if (rc) {
-			CERROR("Can't add peer %s: %d\n",
-			       libcfs_id2str(id), rc);
-			return rc;
-		}
-	}
-
-	ksocknal_launch_all_connections_locked(peer);
-
-	conn = ksocknal_find_conn_locked(peer, tx, tx->tx_nonblk);
-	if (conn) {
-		/* Connection exists; queue message on it */
-		ksocknal_queue_tx_locked(tx, conn);
-		write_unlock_bh(g_lock);
-		return 0;
-	}
-
-	if (peer->ksnp_accepting > 0 ||
-	    ksocknal_find_connecting_route_locked(peer)) {
-		/* the message is going to be pinned to the peer */
-		tx->tx_deadline =
-			jiffies + *ksocknal_tunables.ksnd_timeout * HZ;
-
-		/* Queue the message until a connection is established */
-		list_add_tail(&tx->tx_list, &peer->ksnp_tx_queue);
-		write_unlock_bh(g_lock);
-		return 0;
-	}
-
-	write_unlock_bh(g_lock);
-
-	/* NB Routes may be ignored if connections to them failed recently */
-	CNETERR("No usable routes to %s\n", libcfs_id2str(id));
-	return -EHOSTUNREACH;
-}
-
-int
-ksocknal_send(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg)
-{
-	unsigned int mpflag = 0;
-	int type = lntmsg->msg_type;
-	struct lnet_process_id target = lntmsg->msg_target;
-	unsigned int payload_niov = lntmsg->msg_niov;
-	struct kvec *payload_iov = lntmsg->msg_iov;
-	struct bio_vec *payload_kiov = lntmsg->msg_kiov;
-	unsigned int payload_offset = lntmsg->msg_offset;
-	unsigned int payload_nob = lntmsg->msg_len;
-	struct ksock_tx *tx;
-	int desc_size;
-	int rc;
-
-	/*
-	 * NB 'private' is different depending on what we're sending.
-	 * Just ignore it...
-	 */
-	CDEBUG(D_NET, "sending %u bytes in %d frags to %s\n",
-	       payload_nob, payload_niov, libcfs_id2str(target));
-
-	LASSERT(!payload_nob || payload_niov > 0);
-	LASSERT(payload_niov <= LNET_MAX_IOV);
-	/* payload is either all vaddrs or all pages */
-	LASSERT(!(payload_kiov && payload_iov));
-	LASSERT(!in_interrupt());
-
-	if (payload_iov)
-		desc_size = offsetof(struct ksock_tx,
-				     tx_frags.virt.iov[1 + payload_niov]);
-	else
-		desc_size = offsetof(struct ksock_tx,
-				     tx_frags.paged.kiov[payload_niov]);
-
-	if (lntmsg->msg_vmflush)
-		mpflag = memalloc_noreclaim_save();
-	tx = ksocknal_alloc_tx(KSOCK_MSG_LNET, desc_size);
-	if (!tx) {
-		CERROR("Can't allocate tx desc type %d size %d\n",
-		       type, desc_size);
-		if (lntmsg->msg_vmflush)
-			memalloc_noreclaim_restore(mpflag);
-		return -ENOMEM;
-	}
-
-	tx->tx_conn = NULL;		     /* set when assigned a conn */
-	tx->tx_lnetmsg = lntmsg;
-
-	if (payload_iov) {
-		tx->tx_kiov = NULL;
-		tx->tx_nkiov = 0;
-		tx->tx_iov = tx->tx_frags.virt.iov;
-		tx->tx_niov = 1 +
-			      lnet_extract_iov(payload_niov, &tx->tx_iov[1],
-					       payload_niov, payload_iov,
-					       payload_offset, payload_nob);
-	} else {
-		tx->tx_niov = 1;
-		tx->tx_iov = &tx->tx_frags.paged.iov;
-		tx->tx_kiov = tx->tx_frags.paged.kiov;
-		tx->tx_nkiov = lnet_extract_kiov(payload_niov, tx->tx_kiov,
-						 payload_niov, payload_kiov,
-						 payload_offset, payload_nob);
-
-		if (payload_nob >= *ksocknal_tunables.ksnd_zc_min_payload)
-			tx->tx_zc_capable = 1;
-	}
-
-	tx->tx_msg.ksm_csum = 0;
-	tx->tx_msg.ksm_type = KSOCK_MSG_LNET;
-	tx->tx_msg.ksm_zc_cookies[0] = 0;
-	tx->tx_msg.ksm_zc_cookies[1] = 0;
-
-	/* The first fragment will be set later in pro_pack */
-	rc = ksocknal_launch_packet(ni, tx, target);
-	if (mpflag)
-		memalloc_noreclaim_restore(mpflag);
-
-	if (!rc)
-		return 0;
-
-	ksocknal_free_tx(tx);
-	return -EIO;
-}
-
-int
-ksocknal_thread_start(int (*fn)(void *arg), void *arg, char *name)
-{
-	struct task_struct *task = kthread_run(fn, arg, "%s", name);
-
-	if (IS_ERR(task))
-		return PTR_ERR(task);
-
-	write_lock_bh(&ksocknal_data.ksnd_global_lock);
-	ksocknal_data.ksnd_nthreads++;
-	write_unlock_bh(&ksocknal_data.ksnd_global_lock);
-	return 0;
-}
-
-void
-ksocknal_thread_fini(void)
-{
-	write_lock_bh(&ksocknal_data.ksnd_global_lock);
-	ksocknal_data.ksnd_nthreads--;
-	write_unlock_bh(&ksocknal_data.ksnd_global_lock);
-}
-
-int
-ksocknal_new_packet(struct ksock_conn *conn, int nob_to_skip)
-{
-	static char ksocknal_slop_buffer[4096];
-	struct kvec *kvec = conn->ksnc_rx_iov_space;
-
-	int nob;
-	unsigned int niov;
-	int skipped;
-
-	LASSERT(conn->ksnc_proto);
-
-	if (*ksocknal_tunables.ksnd_eager_ack & conn->ksnc_type) {
-		/* Remind the socket to ack eagerly... */
-		ksocknal_lib_eager_ack(conn);
-	}
-
-	if (!nob_to_skip) {	 /* right at next packet boundary now */
-		conn->ksnc_rx_started = 0;
-		mb();		       /* racing with timeout thread */
-
-		switch (conn->ksnc_proto->pro_version) {
-		case  KSOCK_PROTO_V2:
-		case  KSOCK_PROTO_V3:
-			conn->ksnc_rx_state = SOCKNAL_RX_KSM_HEADER;
-			kvec->iov_base = &conn->ksnc_msg;
-			kvec->iov_len = offsetof(struct ksock_msg, ksm_u);
-			conn->ksnc_rx_nob_left = offsetof(struct ksock_msg, ksm_u);
-			iov_iter_kvec(&conn->ksnc_rx_to, READ|ITER_KVEC, kvec,
-					1, offsetof(struct ksock_msg, ksm_u));
-			break;
-
-		case KSOCK_PROTO_V1:
-			/* Receiving bare struct lnet_hdr */
-			conn->ksnc_rx_state = SOCKNAL_RX_LNET_HEADER;
-			kvec->iov_base = &conn->ksnc_msg.ksm_u.lnetmsg;
-			kvec->iov_len = sizeof(struct lnet_hdr);
-			conn->ksnc_rx_nob_left = sizeof(struct lnet_hdr);
-			iov_iter_kvec(&conn->ksnc_rx_to, READ|ITER_KVEC, kvec,
-					1, sizeof(struct lnet_hdr));
-			break;
-
-		default:
-			LBUG();
-		}
-		conn->ksnc_rx_csum = ~0;
-		return 1;
-	}
-
-	/*
-	 * Set up to skip as much as possible now.  If there's more left
-	 * (ran out of iov entries) we'll get called again
-	 */
-	conn->ksnc_rx_state = SOCKNAL_RX_SLOP;
-	conn->ksnc_rx_nob_left = nob_to_skip;
-	skipped = 0;
-	niov = 0;
-
-	do {
-		nob = min_t(int, nob_to_skip, sizeof(ksocknal_slop_buffer));
-
-		kvec[niov].iov_base = ksocknal_slop_buffer;
-		kvec[niov].iov_len  = nob;
-		niov++;
-		skipped += nob;
-		nob_to_skip -= nob;
-
-	} while (nob_to_skip &&    /* mustn't overflow conn's rx iov */
-		 niov < sizeof(conn->ksnc_rx_iov_space) / sizeof(struct iovec));
-
-	iov_iter_kvec(&conn->ksnc_rx_to, READ|ITER_KVEC, kvec, niov, skipped);
-	return 0;
-}
-
-static int
-ksocknal_process_receive(struct ksock_conn *conn)
-{
-	struct kvec *kvec = conn->ksnc_rx_iov_space;
-	struct lnet_hdr *lhdr;
-	struct lnet_process_id *id;
-	int rc;
-
-	LASSERT(atomic_read(&conn->ksnc_conn_refcount) > 0);
-
-	/* NB: sched lock NOT held */
-	/* SOCKNAL_RX_LNET_HEADER is here for backward compatibility */
-	LASSERT(conn->ksnc_rx_state == SOCKNAL_RX_KSM_HEADER ||
-		conn->ksnc_rx_state == SOCKNAL_RX_LNET_PAYLOAD ||
-		conn->ksnc_rx_state == SOCKNAL_RX_LNET_HEADER ||
-		conn->ksnc_rx_state == SOCKNAL_RX_SLOP);
- again:
-	if (iov_iter_count(&conn->ksnc_rx_to)) {
-		rc = ksocknal_receive(conn);
-
-		if (rc <= 0) {
-			LASSERT(rc != -EAGAIN);
-
-			if (!rc)
-				CDEBUG(D_NET, "[%p] EOF from %s ip %pI4h:%d\n",
-				       conn,
-				       libcfs_id2str(conn->ksnc_peer->ksnp_id),
-				       &conn->ksnc_ipaddr,
-				       conn->ksnc_port);
-			else if (!conn->ksnc_closing)
-				CERROR("[%p] Error %d on read from %s ip %pI4h:%d\n",
-				       conn, rc,
-				       libcfs_id2str(conn->ksnc_peer->ksnp_id),
-				       &conn->ksnc_ipaddr,
-				       conn->ksnc_port);
-
-			/* it's not an error if conn is being closed */
-			ksocknal_close_conn_and_siblings(conn,
-							 (conn->ksnc_closing) ? 0 : rc);
-			return (!rc ? -ESHUTDOWN : rc);
-		}
-
-		if (iov_iter_count(&conn->ksnc_rx_to)) {
-			/* short read */
-			return -EAGAIN;
-		}
-	}
-	switch (conn->ksnc_rx_state) {
-	case SOCKNAL_RX_KSM_HEADER:
-		if (conn->ksnc_flip) {
-			__swab32s(&conn->ksnc_msg.ksm_type);
-			__swab32s(&conn->ksnc_msg.ksm_csum);
-			__swab64s(&conn->ksnc_msg.ksm_zc_cookies[0]);
-			__swab64s(&conn->ksnc_msg.ksm_zc_cookies[1]);
-		}
-
-		if (conn->ksnc_msg.ksm_type != KSOCK_MSG_NOOP &&
-		    conn->ksnc_msg.ksm_type != KSOCK_MSG_LNET) {
-			CERROR("%s: Unknown message type: %x\n",
-			       libcfs_id2str(conn->ksnc_peer->ksnp_id),
-			       conn->ksnc_msg.ksm_type);
-			ksocknal_new_packet(conn, 0);
-			ksocknal_close_conn_and_siblings(conn, -EPROTO);
-			return -EPROTO;
-		}
-
-		if (conn->ksnc_msg.ksm_type == KSOCK_MSG_NOOP &&
-		    conn->ksnc_msg.ksm_csum &&     /* has checksum */
-		    conn->ksnc_msg.ksm_csum != conn->ksnc_rx_csum) {
-			/* NOOP Checksum error */
-			CERROR("%s: Checksum error, wire:0x%08X data:0x%08X\n",
-			       libcfs_id2str(conn->ksnc_peer->ksnp_id),
-			       conn->ksnc_msg.ksm_csum, conn->ksnc_rx_csum);
-			ksocknal_new_packet(conn, 0);
-			ksocknal_close_conn_and_siblings(conn, -EPROTO);
-			return -EIO;
-		}
-
-		if (conn->ksnc_msg.ksm_zc_cookies[1]) {
-			__u64 cookie = 0;
-
-			LASSERT(conn->ksnc_proto != &ksocknal_protocol_v1x);
-
-			if (conn->ksnc_msg.ksm_type == KSOCK_MSG_NOOP)
-				cookie = conn->ksnc_msg.ksm_zc_cookies[0];
-
-			rc = conn->ksnc_proto->pro_handle_zcack(conn, cookie,
-					       conn->ksnc_msg.ksm_zc_cookies[1]);
-
-			if (rc) {
-				CERROR("%s: Unknown ZC-ACK cookie: %llu, %llu\n",
-				       libcfs_id2str(conn->ksnc_peer->ksnp_id),
-				       cookie, conn->ksnc_msg.ksm_zc_cookies[1]);
-				ksocknal_new_packet(conn, 0);
-				ksocknal_close_conn_and_siblings(conn, -EPROTO);
-				return rc;
-			}
-		}
-
-		if (conn->ksnc_msg.ksm_type == KSOCK_MSG_NOOP) {
-			ksocknal_new_packet(conn, 0);
-			return 0;       /* NOOP is done and just return */
-		}
-
-		conn->ksnc_rx_state = SOCKNAL_RX_LNET_HEADER;
-		conn->ksnc_rx_nob_left = sizeof(struct ksock_lnet_msg);
-
-		kvec->iov_base = &conn->ksnc_msg.ksm_u.lnetmsg;
-		kvec->iov_len = sizeof(struct ksock_lnet_msg);
-
-		iov_iter_kvec(&conn->ksnc_rx_to, READ|ITER_KVEC, kvec,
-				1, sizeof(struct ksock_lnet_msg));
-
-		goto again;     /* read lnet header now */
-
-	case SOCKNAL_RX_LNET_HEADER:
-		/* unpack message header */
-		conn->ksnc_proto->pro_unpack(&conn->ksnc_msg);
-
-		if (conn->ksnc_peer->ksnp_id.pid & LNET_PID_USERFLAG) {
-			/* Userspace peer */
-			lhdr = &conn->ksnc_msg.ksm_u.lnetmsg.ksnm_hdr;
-			id = &conn->ksnc_peer->ksnp_id;
-
-			/* Substitute process ID assigned at connection time */
-			lhdr->src_pid = cpu_to_le32(id->pid);
-			lhdr->src_nid = cpu_to_le64(id->nid);
-		}
-
-		conn->ksnc_rx_state = SOCKNAL_RX_PARSE;
-		ksocknal_conn_addref(conn);     /* ++ref while parsing */
-
-		rc = lnet_parse(conn->ksnc_peer->ksnp_ni,
-				&conn->ksnc_msg.ksm_u.lnetmsg.ksnm_hdr,
-				conn->ksnc_peer->ksnp_id.nid, conn, 0);
-		if (rc < 0) {
-			/* I just received garbage: give up on this conn */
-			ksocknal_new_packet(conn, 0);
-			ksocknal_close_conn_and_siblings(conn, rc);
-			ksocknal_conn_decref(conn);
-			return -EPROTO;
-		}
-
-		/* I'm racing with ksocknal_recv() */
-		LASSERT(conn->ksnc_rx_state == SOCKNAL_RX_PARSE ||
-			conn->ksnc_rx_state == SOCKNAL_RX_LNET_PAYLOAD);
-
-		if (conn->ksnc_rx_state != SOCKNAL_RX_LNET_PAYLOAD)
-			return 0;
-
-		/* ksocknal_recv() got called */
-		goto again;
-
-	case SOCKNAL_RX_LNET_PAYLOAD:
-		/* payload all received */
-		rc = 0;
-
-		if (!conn->ksnc_rx_nob_left &&   /* not truncating */
-		    conn->ksnc_msg.ksm_csum &&  /* has checksum */
-		    conn->ksnc_msg.ksm_csum != conn->ksnc_rx_csum) {
-			CERROR("%s: Checksum error, wire:0x%08X data:0x%08X\n",
-			       libcfs_id2str(conn->ksnc_peer->ksnp_id),
-			       conn->ksnc_msg.ksm_csum, conn->ksnc_rx_csum);
-			rc = -EIO;
-		}
-
-		if (!rc && conn->ksnc_msg.ksm_zc_cookies[0]) {
-			LASSERT(conn->ksnc_proto != &ksocknal_protocol_v1x);
-
-			lhdr = &conn->ksnc_msg.ksm_u.lnetmsg.ksnm_hdr;
-			id = &conn->ksnc_peer->ksnp_id;
-
-			rc = conn->ksnc_proto->pro_handle_zcreq(conn,
-					conn->ksnc_msg.ksm_zc_cookies[0],
-					*ksocknal_tunables.ksnd_nonblk_zcack ||
-					le64_to_cpu(lhdr->src_nid) != id->nid);
-		}
-
-		lnet_finalize(conn->ksnc_peer->ksnp_ni, conn->ksnc_cookie, rc);
-
-		if (rc) {
-			ksocknal_new_packet(conn, 0);
-			ksocknal_close_conn_and_siblings(conn, rc);
-			return -EPROTO;
-		}
-		/* Fall through */
-
-	case SOCKNAL_RX_SLOP:
-		/* starting new packet? */
-		if (ksocknal_new_packet(conn, conn->ksnc_rx_nob_left))
-			return 0;       /* come back later */
-		goto again;	     /* try to finish reading slop now */
-
-	default:
-		break;
-	}
-
-	/* Not Reached */
-	LBUG();
-	return -EINVAL;		       /* keep gcc happy */
-}
-
-int
-ksocknal_recv(struct lnet_ni *ni, void *private, struct lnet_msg *msg,
-	      int delayed, struct iov_iter *to, unsigned int rlen)
-{
-	struct ksock_conn *conn = private;
-	struct ksock_sched *sched = conn->ksnc_scheduler;
-
-	LASSERT(iov_iter_count(to) <= rlen);
-	LASSERT(to->nr_segs <= LNET_MAX_IOV);
-
-	conn->ksnc_cookie = msg;
-	conn->ksnc_rx_nob_left = rlen;
-
-	conn->ksnc_rx_to = *to;
-
-	LASSERT(conn->ksnc_rx_scheduled);
-
-	spin_lock_bh(&sched->kss_lock);
-
-	switch (conn->ksnc_rx_state) {
-	case SOCKNAL_RX_PARSE_WAIT:
-		list_add_tail(&conn->ksnc_rx_list, &sched->kss_rx_conns);
-		wake_up(&sched->kss_waitq);
-		LASSERT(conn->ksnc_rx_ready);
-		break;
-
-	case SOCKNAL_RX_PARSE:
-		/* scheduler hasn't noticed I'm parsing yet */
-		break;
-	}
-
-	conn->ksnc_rx_state = SOCKNAL_RX_LNET_PAYLOAD;
-
-	spin_unlock_bh(&sched->kss_lock);
-	ksocknal_conn_decref(conn);
-	return 0;
-}
-
-static inline int
-ksocknal_sched_cansleep(struct ksock_sched *sched)
-{
-	int rc;
-
-	spin_lock_bh(&sched->kss_lock);
-
-	rc = !ksocknal_data.ksnd_shuttingdown &&
-	      list_empty(&sched->kss_rx_conns) &&
-	      list_empty(&sched->kss_tx_conns);
-
-	spin_unlock_bh(&sched->kss_lock);
-	return rc;
-}
-
-int ksocknal_scheduler(void *arg)
-{
-	struct ksock_sched_info *info;
-	struct ksock_sched *sched;
-	struct ksock_conn *conn;
-	struct ksock_tx *tx;
-	int rc;
-	int nloops = 0;
-	long id = (long)arg;
-
-	info = ksocknal_data.ksnd_sched_info[KSOCK_THREAD_CPT(id)];
-	sched = &info->ksi_scheds[KSOCK_THREAD_SID(id)];
-
-	rc = cfs_cpt_bind(lnet_cpt_table(), info->ksi_cpt);
-	if (rc) {
-		CWARN("Can't set CPU partition affinity to %d: %d\n",
-		      info->ksi_cpt, rc);
-	}
-
-	spin_lock_bh(&sched->kss_lock);
-
-	while (!ksocknal_data.ksnd_shuttingdown) {
-		int did_something = 0;
-
-		/* Ensure I progress everything semi-fairly */
-
-		if (!list_empty(&sched->kss_rx_conns)) {
-			conn = list_entry(sched->kss_rx_conns.next,
-					  struct ksock_conn, ksnc_rx_list);
-			list_del(&conn->ksnc_rx_list);
-
-			LASSERT(conn->ksnc_rx_scheduled);
-			LASSERT(conn->ksnc_rx_ready);
-
-			/*
-			 * clear rx_ready in case receive isn't complete.
-			 * Do it BEFORE we call process_recv, since
-			 * data_ready can set it any time after we release
-			 * kss_lock.
-			 */
-			conn->ksnc_rx_ready = 0;
-			spin_unlock_bh(&sched->kss_lock);
-
-			rc = ksocknal_process_receive(conn);
-
-			spin_lock_bh(&sched->kss_lock);
-
-			/* I'm the only one that can clear this flag */
-			LASSERT(conn->ksnc_rx_scheduled);
-
-			/* Did process_receive get everything it wanted? */
-			if (!rc)
-				conn->ksnc_rx_ready = 1;
-
-			if (conn->ksnc_rx_state == SOCKNAL_RX_PARSE) {
-				/*
-				 * Conn blocked waiting for ksocknal_recv()
-				 * I change its state (under lock) to signal
-				 * it can be rescheduled
-				 */
-				conn->ksnc_rx_state = SOCKNAL_RX_PARSE_WAIT;
-			} else if (conn->ksnc_rx_ready) {
-				/* reschedule for rx */
-				list_add_tail(&conn->ksnc_rx_list,
-					      &sched->kss_rx_conns);
-			} else {
-				conn->ksnc_rx_scheduled = 0;
-				/* drop my ref */
-				ksocknal_conn_decref(conn);
-			}
-
-			did_something = 1;
-		}
-
-		if (!list_empty(&sched->kss_tx_conns)) {
-			LIST_HEAD(zlist);
-
-			if (!list_empty(&sched->kss_zombie_noop_txs)) {
-				list_add(&zlist, &sched->kss_zombie_noop_txs);
-				list_del_init(&sched->kss_zombie_noop_txs);
-			}
-
-			conn = list_entry(sched->kss_tx_conns.next,
-					  struct ksock_conn, ksnc_tx_list);
-			list_del(&conn->ksnc_tx_list);
-
-			LASSERT(conn->ksnc_tx_scheduled);
-			LASSERT(conn->ksnc_tx_ready);
-			LASSERT(!list_empty(&conn->ksnc_tx_queue));
-
-			tx = list_entry(conn->ksnc_tx_queue.next,
-					struct ksock_tx, tx_list);
-
-			if (conn->ksnc_tx_carrier == tx)
-				ksocknal_next_tx_carrier(conn);
-
-			/* dequeue now so empty list => more to send */
-			list_del(&tx->tx_list);
-
-			/*
-			 * Clear tx_ready in case send isn't complete.  Do
-			 * it BEFORE we call process_transmit, since
-			 * write_space can set it any time after we release
-			 * kss_lock.
-			 */
-			conn->ksnc_tx_ready = 0;
-			spin_unlock_bh(&sched->kss_lock);
-
-			if (!list_empty(&zlist)) {
-				/*
-				 * free zombie noop txs, it's fast because
-				 * noop txs are just put in freelist
-				 */
-				ksocknal_txlist_done(NULL, &zlist, 0);
-			}
-
-			rc = ksocknal_process_transmit(conn, tx);
-
-			if (rc == -ENOMEM || rc == -EAGAIN) {
-				/*
-				 * Incomplete send: replace tx on HEAD of
-				 * tx_queue
-				 */
-				spin_lock_bh(&sched->kss_lock);
-				list_add(&tx->tx_list, &conn->ksnc_tx_queue);
-			} else {
-				/* Complete send; tx -ref */
-				ksocknal_tx_decref(tx);
-
-				spin_lock_bh(&sched->kss_lock);
-				/* assume space for more */
-				conn->ksnc_tx_ready = 1;
-			}
-
-			if (rc == -ENOMEM) {
-				/*
-				 * Do nothing; after a short timeout, this
-				 * conn will be reposted on kss_tx_conns.
-				 */
-			} else if (conn->ksnc_tx_ready &&
-				   !list_empty(&conn->ksnc_tx_queue)) {
-				/* reschedule for tx */
-				list_add_tail(&conn->ksnc_tx_list,
-					      &sched->kss_tx_conns);
-			} else {
-				conn->ksnc_tx_scheduled = 0;
-				/* drop my ref */
-				ksocknal_conn_decref(conn);
-			}
-
-			did_something = 1;
-		}
-		if (!did_something ||	   /* nothing to do */
-		    ++nloops == SOCKNAL_RESCHED) { /* hogging CPU? */
-			spin_unlock_bh(&sched->kss_lock);
-
-			nloops = 0;
-
-			if (!did_something) {   /* wait for something to do */
-				rc = wait_event_interruptible_exclusive(
-					sched->kss_waitq,
-					!ksocknal_sched_cansleep(sched));
-				LASSERT(!rc);
-			} else {
-				cond_resched();
-			}
-
-			spin_lock_bh(&sched->kss_lock);
-		}
-	}
-
-	spin_unlock_bh(&sched->kss_lock);
-	ksocknal_thread_fini();
-	return 0;
-}
-
-/*
- * Add connection to kss_rx_conns of scheduler
- * and wakeup the scheduler.
- */
-void ksocknal_read_callback(struct ksock_conn *conn)
-{
-	struct ksock_sched *sched;
-
-	sched = conn->ksnc_scheduler;
-
-	spin_lock_bh(&sched->kss_lock);
-
-	conn->ksnc_rx_ready = 1;
-
-	if (!conn->ksnc_rx_scheduled) {  /* not being progressed */
-		list_add_tail(&conn->ksnc_rx_list, &sched->kss_rx_conns);
-		conn->ksnc_rx_scheduled = 1;
-		/* extra ref for scheduler */
-		ksocknal_conn_addref(conn);
-
-		wake_up(&sched->kss_waitq);
-	}
-	spin_unlock_bh(&sched->kss_lock);
-}
-
-/*
- * Add connection to kss_tx_conns of scheduler
- * and wakeup the scheduler.
- */
-void ksocknal_write_callback(struct ksock_conn *conn)
-{
-	struct ksock_sched *sched;
-
-	sched = conn->ksnc_scheduler;
-
-	spin_lock_bh(&sched->kss_lock);
-
-	conn->ksnc_tx_ready = 1;
-
-	if (!conn->ksnc_tx_scheduled && /* not being progressed */
-	    !list_empty(&conn->ksnc_tx_queue)) { /* packets to send */
-		list_add_tail(&conn->ksnc_tx_list, &sched->kss_tx_conns);
-		conn->ksnc_tx_scheduled = 1;
-		/* extra ref for scheduler */
-		ksocknal_conn_addref(conn);
-
-		wake_up(&sched->kss_waitq);
-	}
-
-	spin_unlock_bh(&sched->kss_lock);
-}
-
-static struct ksock_proto *
-ksocknal_parse_proto_version(struct ksock_hello_msg *hello)
-{
-	__u32 version = 0;
-
-	if (hello->kshm_magic == LNET_PROTO_MAGIC)
-		version = hello->kshm_version;
-	else if (hello->kshm_magic == __swab32(LNET_PROTO_MAGIC))
-		version = __swab32(hello->kshm_version);
-
-	if (version) {
-#if SOCKNAL_VERSION_DEBUG
-		if (*ksocknal_tunables.ksnd_protocol == 1)
-			return NULL;
-
-		if (*ksocknal_tunables.ksnd_protocol == 2 &&
-		    version == KSOCK_PROTO_V3)
-			return NULL;
-#endif
-		if (version == KSOCK_PROTO_V2)
-			return &ksocknal_protocol_v2x;
-
-		if (version == KSOCK_PROTO_V3)
-			return &ksocknal_protocol_v3x;
-
-		return NULL;
-	}
-
-	if (hello->kshm_magic == le32_to_cpu(LNET_PROTO_TCP_MAGIC)) {
-		struct lnet_magicversion *hmv = (struct lnet_magicversion *)hello;
-
-		BUILD_BUG_ON(sizeof(struct lnet_magicversion) !=
-			     offsetof(struct ksock_hello_msg, kshm_src_nid));
-
-		if (hmv->version_major == cpu_to_le16(KSOCK_PROTO_V1_MAJOR) &&
-		    hmv->version_minor == cpu_to_le16(KSOCK_PROTO_V1_MINOR))
-			return &ksocknal_protocol_v1x;
-	}
-
-	return NULL;
-}
-
-int
-ksocknal_send_hello(struct lnet_ni *ni, struct ksock_conn *conn,
-		    lnet_nid_t peer_nid, struct ksock_hello_msg *hello)
-{
-	/* CAVEAT EMPTOR: this byte flips 'ipaddrs' */
-	struct ksock_net *net = (struct ksock_net *)ni->ni_data;
-
-	LASSERT(hello->kshm_nips <= LNET_MAX_INTERFACES);
-
-	/* rely on caller to hold a ref on socket so it wouldn't disappear */
-	LASSERT(conn->ksnc_proto);
-
-	hello->kshm_src_nid = ni->ni_nid;
-	hello->kshm_dst_nid = peer_nid;
-	hello->kshm_src_pid = the_lnet.ln_pid;
-
-	hello->kshm_src_incarnation = net->ksnn_incarnation;
-	hello->kshm_ctype = conn->ksnc_type;
-
-	return conn->ksnc_proto->pro_send_hello(conn, hello);
-}
-
-static int
-ksocknal_invert_type(int type)
-{
-	switch (type) {
-	case SOCKLND_CONN_ANY:
-	case SOCKLND_CONN_CONTROL:
-		return type;
-	case SOCKLND_CONN_BULK_IN:
-		return SOCKLND_CONN_BULK_OUT;
-	case SOCKLND_CONN_BULK_OUT:
-		return SOCKLND_CONN_BULK_IN;
-	default:
-		return SOCKLND_CONN_NONE;
-	}
-}
-
-int
-ksocknal_recv_hello(struct lnet_ni *ni, struct ksock_conn *conn,
-		    struct ksock_hello_msg *hello,
-		    struct lnet_process_id *peerid,
-		    __u64 *incarnation)
-{
-	/* Return < 0	fatal error
-	 *	0	  success
-	 *	EALREADY   lost connection race
-	 *	EPROTO     protocol version mismatch
-	 */
-	struct socket *sock = conn->ksnc_sock;
-	int active = !!conn->ksnc_proto;
-	int timeout;
-	int proto_match;
-	int rc;
-	struct ksock_proto *proto;
-	struct lnet_process_id recv_id;
-
-	/* socket type set on active connections - not set on passive */
-	LASSERT(!active == !(conn->ksnc_type != SOCKLND_CONN_NONE));
-
-	timeout = active ? *ksocknal_tunables.ksnd_timeout :
-			    lnet_acceptor_timeout();
-
-	rc = lnet_sock_read(sock, &hello->kshm_magic,
-			    sizeof(hello->kshm_magic), timeout);
-	if (rc) {
-		CERROR("Error %d reading HELLO from %pI4h\n",
-		       rc, &conn->ksnc_ipaddr);
-		LASSERT(rc < 0);
-		return rc;
-	}
-
-	if (hello->kshm_magic != LNET_PROTO_MAGIC &&
-	    hello->kshm_magic != __swab32(LNET_PROTO_MAGIC) &&
-	    hello->kshm_magic != le32_to_cpu(LNET_PROTO_TCP_MAGIC)) {
-		/* Unexpected magic! */
-		CERROR("Bad magic(1) %#08x (%#08x expected) from %pI4h\n",
-		       __cpu_to_le32(hello->kshm_magic),
-		       LNET_PROTO_TCP_MAGIC,
-		       &conn->ksnc_ipaddr);
-		return -EPROTO;
-	}
-
-	rc = lnet_sock_read(sock, &hello->kshm_version,
-			    sizeof(hello->kshm_version), timeout);
-	if (rc) {
-		CERROR("Error %d reading HELLO from %pI4h\n",
-		       rc, &conn->ksnc_ipaddr);
-		LASSERT(rc < 0);
-		return rc;
-	}
-
-	proto = ksocknal_parse_proto_version(hello);
-	if (!proto) {
-		if (!active) {
-			/* unknown protocol from peer, tell peer my protocol */
-			conn->ksnc_proto = &ksocknal_protocol_v3x;
-#if SOCKNAL_VERSION_DEBUG
-			if (*ksocknal_tunables.ksnd_protocol == 2)
-				conn->ksnc_proto = &ksocknal_protocol_v2x;
-			else if (*ksocknal_tunables.ksnd_protocol == 1)
-				conn->ksnc_proto = &ksocknal_protocol_v1x;
-#endif
-			hello->kshm_nips = 0;
-			ksocknal_send_hello(ni, conn, ni->ni_nid, hello);
-		}
-
-		CERROR("Unknown protocol version (%d.x expected) from %pI4h\n",
-		       conn->ksnc_proto->pro_version,
-		       &conn->ksnc_ipaddr);
-
-		return -EPROTO;
-	}
-
-	proto_match = (conn->ksnc_proto == proto);
-	conn->ksnc_proto = proto;
-
-	/* receive the rest of hello message anyway */
-	rc = conn->ksnc_proto->pro_recv_hello(conn, hello, timeout);
-	if (rc) {
-		CERROR("Error %d reading or checking hello from from %pI4h\n",
-		       rc, &conn->ksnc_ipaddr);
-		LASSERT(rc < 0);
-		return rc;
-	}
-
-	*incarnation = hello->kshm_src_incarnation;
-
-	if (hello->kshm_src_nid == LNET_NID_ANY) {
-		CERROR("Expecting a HELLO hdr with a NID, but got LNET_NID_ANY from %pI4h\n",
-		       &conn->ksnc_ipaddr);
-		return -EPROTO;
-	}
-
-	if (!active &&
-	    conn->ksnc_port > LNET_ACCEPTOR_MAX_RESERVED_PORT) {
-		/* Userspace NAL assigns peer process ID from socket */
-		recv_id.pid = conn->ksnc_port | LNET_PID_USERFLAG;
-		recv_id.nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid),
-					 conn->ksnc_ipaddr);
-	} else {
-		recv_id.nid = hello->kshm_src_nid;
-		recv_id.pid = hello->kshm_src_pid;
-	}
-
-	if (!active) {
-		*peerid = recv_id;
-
-		/* peer determines type */
-		conn->ksnc_type = ksocknal_invert_type(hello->kshm_ctype);
-		if (conn->ksnc_type == SOCKLND_CONN_NONE) {
-			CERROR("Unexpected type %d from %s ip %pI4h\n",
-			       hello->kshm_ctype, libcfs_id2str(*peerid),
-			       &conn->ksnc_ipaddr);
-			return -EPROTO;
-		}
-
-		return 0;
-	}
-
-	if (peerid->pid != recv_id.pid ||
-	    peerid->nid != recv_id.nid) {
-		LCONSOLE_ERROR_MSG(0x130, "Connected successfully to %s on host %pI4h, but they claimed they were %s; please check your Lustre configuration.\n",
-				   libcfs_id2str(*peerid),
-				   &conn->ksnc_ipaddr,
-				   libcfs_id2str(recv_id));
-		return -EPROTO;
-	}
-
-	if (hello->kshm_ctype == SOCKLND_CONN_NONE) {
-		/* Possible protocol mismatch or I lost the connection race */
-		return proto_match ? EALREADY : EPROTO;
-	}
-
-	if (ksocknal_invert_type(hello->kshm_ctype) != conn->ksnc_type) {
-		CERROR("Mismatched types: me %d, %s ip %pI4h %d\n",
-		       conn->ksnc_type, libcfs_id2str(*peerid),
-		       &conn->ksnc_ipaddr, hello->kshm_ctype);
-		return -EPROTO;
-	}
-
-	return 0;
-}
-
-static int
-ksocknal_connect(struct ksock_route *route)
-{
-	LIST_HEAD(zombies);
-	struct ksock_peer *peer = route->ksnr_peer;
-	int type;
-	int wanted;
-	struct socket *sock;
-	unsigned long deadline;
-	int retry_later = 0;
-	int rc = 0;
-
-	deadline = jiffies + *ksocknal_tunables.ksnd_timeout * HZ;
-
-	write_lock_bh(&ksocknal_data.ksnd_global_lock);
-
-	LASSERT(route->ksnr_scheduled);
-	LASSERT(!route->ksnr_connecting);
-
-	route->ksnr_connecting = 1;
-
-	for (;;) {
-		wanted = ksocknal_route_mask() & ~route->ksnr_connected;
-
-		/*
-		 * stop connecting if peer/route got closed under me, or
-		 * route got connected while queued
-		 */
-		if (peer->ksnp_closing || route->ksnr_deleted ||
-		    !wanted) {
-			retry_later = 0;
-			break;
-		}
-
-		/* reschedule if peer is connecting to me */
-		if (peer->ksnp_accepting > 0) {
-			CDEBUG(D_NET,
-			       "peer %s(%d) already connecting to me, retry later.\n",
-			       libcfs_nid2str(peer->ksnp_id.nid),
-			       peer->ksnp_accepting);
-			retry_later = 1;
-		}
-
-		if (retry_later) /* needs reschedule */
-			break;
-
-		if (wanted & BIT(SOCKLND_CONN_ANY)) {
-			type = SOCKLND_CONN_ANY;
-		} else if (wanted & BIT(SOCKLND_CONN_CONTROL)) {
-			type = SOCKLND_CONN_CONTROL;
-		} else if (wanted & BIT(SOCKLND_CONN_BULK_IN)) {
-			type = SOCKLND_CONN_BULK_IN;
-		} else {
-			LASSERT(wanted & BIT(SOCKLND_CONN_BULK_OUT));
-			type = SOCKLND_CONN_BULK_OUT;
-		}
-
-		write_unlock_bh(&ksocknal_data.ksnd_global_lock);
-
-		if (time_after_eq(jiffies, deadline)) {
-			rc = -ETIMEDOUT;
-			lnet_connect_console_error(rc, peer->ksnp_id.nid,
-						   route->ksnr_ipaddr,
-						   route->ksnr_port);
-			goto failed;
-		}
-
-		rc = lnet_connect(&sock, peer->ksnp_id.nid,
-				  route->ksnr_myipaddr,
-				  route->ksnr_ipaddr, route->ksnr_port);
-		if (rc)
-			goto failed;
-
-		rc = ksocknal_create_conn(peer->ksnp_ni, route, sock, type);
-		if (rc < 0) {
-			lnet_connect_console_error(rc, peer->ksnp_id.nid,
-						   route->ksnr_ipaddr,
-						   route->ksnr_port);
-			goto failed;
-		}
-
-		/*
-		 * A +ve RC means I have to retry because I lost the connection
-		 * race or I have to renegotiate protocol version
-		 */
-		retry_later = (rc);
-		if (retry_later)
-			CDEBUG(D_NET, "peer %s: conn race, retry later.\n",
-			       libcfs_nid2str(peer->ksnp_id.nid));
-
-		write_lock_bh(&ksocknal_data.ksnd_global_lock);
-	}
-
-	route->ksnr_scheduled = 0;
-	route->ksnr_connecting = 0;
-
-	if (retry_later) {
-		/*
-		 * re-queue for attention; this frees me up to handle
-		 * the peer's incoming connection request
-		 */
-		if (rc == EALREADY ||
-		    (!rc && peer->ksnp_accepting > 0)) {
-			/*
-			 * We want to introduce a delay before next
-			 * attempt to connect if we lost conn race,
-			 * but the race is resolved quickly usually,
-			 * so min_reconnectms should be good heuristic
-			 */
-			route->ksnr_retry_interval =
-				*ksocknal_tunables.ksnd_min_reconnectms * HZ / 1000;
-			route->ksnr_timeout = jiffies + route->ksnr_retry_interval;
-		}
-
-		ksocknal_launch_connection_locked(route);
-	}
-
-	write_unlock_bh(&ksocknal_data.ksnd_global_lock);
-	return retry_later;
-
- failed:
-	write_lock_bh(&ksocknal_data.ksnd_global_lock);
-
-	route->ksnr_scheduled = 0;
-	route->ksnr_connecting = 0;
-
-	/* This is a retry rather than a new connection */
-	route->ksnr_retry_interval *= 2;
-	route->ksnr_retry_interval =
-		max(route->ksnr_retry_interval,
-		    (long)*ksocknal_tunables.ksnd_min_reconnectms * HZ / 1000);
-	route->ksnr_retry_interval =
-		min(route->ksnr_retry_interval,
-		    (long)*ksocknal_tunables.ksnd_max_reconnectms * HZ / 1000);
-
-	LASSERT(route->ksnr_retry_interval);
-	route->ksnr_timeout = jiffies + route->ksnr_retry_interval;
-
-	if (!list_empty(&peer->ksnp_tx_queue) &&
-	    !peer->ksnp_accepting &&
-	    !ksocknal_find_connecting_route_locked(peer)) {
-		struct ksock_conn *conn;
-
-		/*
-		 * ksnp_tx_queue is queued on a conn on successful
-		 * connection for V1.x and V2.x
-		 */
-		if (!list_empty(&peer->ksnp_conns)) {
-			conn = list_entry(peer->ksnp_conns.next,
-					  struct ksock_conn, ksnc_list);
-			LASSERT(conn->ksnc_proto == &ksocknal_protocol_v3x);
-		}
-
-		/*
-		 * take all the blocked packets while I've got the lock and
-		 * complete below...
-		 */
-		list_splice_init(&peer->ksnp_tx_queue, &zombies);
-	}
-
-	write_unlock_bh(&ksocknal_data.ksnd_global_lock);
-
-	ksocknal_peer_failed(peer);
-	ksocknal_txlist_done(peer->ksnp_ni, &zombies, 1);
-	return 0;
-}
-
-/*
- * check whether we need to create more connds.
- * It will try to create new thread if it's necessary, @timeout can
- * be updated if failed to create, so caller wouldn't keep try while
- * running out of resource.
- */
-static int
-ksocknal_connd_check_start(time64_t sec, long *timeout)
-{
-	char name[16];
-	int rc;
-	int total = ksocknal_data.ksnd_connd_starting +
-		    ksocknal_data.ksnd_connd_running;
-
-	if (unlikely(ksocknal_data.ksnd_init < SOCKNAL_INIT_ALL)) {
-		/* still in initializing */
-		return 0;
-	}
-
-	if (total >= *ksocknal_tunables.ksnd_nconnds_max ||
-	    total > ksocknal_data.ksnd_connd_connecting + SOCKNAL_CONND_RESV) {
-		/*
-		 * can't create more connd, or still have enough
-		 * threads to handle more connecting
-		 */
-		return 0;
-	}
-
-	if (list_empty(&ksocknal_data.ksnd_connd_routes)) {
-		/* no pending connecting request */
-		return 0;
-	}
-
-	if (sec - ksocknal_data.ksnd_connd_failed_stamp <= 1) {
-		/* may run out of resource, retry later */
-		*timeout = HZ;
-		return 0;
-	}
-
-	if (ksocknal_data.ksnd_connd_starting > 0) {
-		/* serialize starting to avoid flood */
-		return 0;
-	}
-
-	ksocknal_data.ksnd_connd_starting_stamp = sec;
-	ksocknal_data.ksnd_connd_starting++;
-	spin_unlock_bh(&ksocknal_data.ksnd_connd_lock);
-
-	/* NB: total is the next id */
-	snprintf(name, sizeof(name), "socknal_cd%02d", total);
-	rc = ksocknal_thread_start(ksocknal_connd, NULL, name);
-
-	spin_lock_bh(&ksocknal_data.ksnd_connd_lock);
-	if (!rc)
-		return 1;
-
-	/* we tried ... */
-	LASSERT(ksocknal_data.ksnd_connd_starting > 0);
-	ksocknal_data.ksnd_connd_starting--;
-	ksocknal_data.ksnd_connd_failed_stamp = ktime_get_real_seconds();
-
-	return 1;
-}
-
-/*
- * check whether current thread can exit, it will return 1 if there are too
- * many threads and no creating in past 120 seconds.
- * Also, this function may update @timeout to make caller come back
- * again to recheck these conditions.
- */
-static int
-ksocknal_connd_check_stop(time64_t sec, long *timeout)
-{
-	int val;
-
-	if (unlikely(ksocknal_data.ksnd_init < SOCKNAL_INIT_ALL)) {
-		/* still in initializing */
-		return 0;
-	}
-
-	if (ksocknal_data.ksnd_connd_starting > 0) {
-		/* in progress of starting new thread */
-		return 0;
-	}
-
-	if (ksocknal_data.ksnd_connd_running <=
-	    *ksocknal_tunables.ksnd_nconnds) { /* can't shrink */
-		return 0;
-	}
-
-	/* created thread in past 120 seconds? */
-	val = (int)(ksocknal_data.ksnd_connd_starting_stamp +
-		    SOCKNAL_CONND_TIMEOUT - sec);
-
-	*timeout = (val > 0) ? val * HZ :
-			       SOCKNAL_CONND_TIMEOUT * HZ;
-	if (val > 0)
-		return 0;
-
-	/* no creating in past 120 seconds */
-
-	return ksocknal_data.ksnd_connd_running >
-	       ksocknal_data.ksnd_connd_connecting + SOCKNAL_CONND_RESV;
-}
-
-/*
- * Go through connd_routes queue looking for a route that we can process
- * right now, @timeout_p can be updated if we need to come back later
- */
-static struct ksock_route *
-ksocknal_connd_get_route_locked(signed long *timeout_p)
-{
-	struct ksock_route *route;
-	unsigned long now;
-
-	now = jiffies;
-
-	/* connd_routes can contain both pending and ordinary routes */
-	list_for_each_entry(route, &ksocknal_data.ksnd_connd_routes,
-			    ksnr_connd_list) {
-		if (!route->ksnr_retry_interval ||
-		    time_after_eq(now, route->ksnr_timeout))
-			return route;
-
-		if (*timeout_p == MAX_SCHEDULE_TIMEOUT ||
-		    (int)*timeout_p > (int)(route->ksnr_timeout - now))
-			*timeout_p = (int)(route->ksnr_timeout - now);
-	}
-
-	return NULL;
-}
-
-int
-ksocknal_connd(void *arg)
-{
-	spinlock_t *connd_lock = &ksocknal_data.ksnd_connd_lock;
-	struct ksock_connreq *cr;
-	wait_queue_entry_t wait;
-	int nloops = 0;
-	int cons_retry = 0;
-
-	init_waitqueue_entry(&wait, current);
-
-	spin_lock_bh(connd_lock);
-
-	LASSERT(ksocknal_data.ksnd_connd_starting > 0);
-	ksocknal_data.ksnd_connd_starting--;
-	ksocknal_data.ksnd_connd_running++;
-
-	while (!ksocknal_data.ksnd_shuttingdown) {
-		struct ksock_route *route = NULL;
-		time64_t sec = ktime_get_real_seconds();
-		long timeout = MAX_SCHEDULE_TIMEOUT;
-		int dropped_lock = 0;
-
-		if (ksocknal_connd_check_stop(sec, &timeout)) {
-			/* wakeup another one to check stop */
-			wake_up(&ksocknal_data.ksnd_connd_waitq);
-			break;
-		}
-
-		if (ksocknal_connd_check_start(sec, &timeout)) {
-			/* created new thread */
-			dropped_lock = 1;
-		}
-
-		if (!list_empty(&ksocknal_data.ksnd_connd_connreqs)) {
-			/* Connection accepted by the listener */
-			cr = list_entry(ksocknal_data.ksnd_connd_connreqs.next,
-					struct ksock_connreq, ksncr_list);
-
-			list_del(&cr->ksncr_list);
-			spin_unlock_bh(connd_lock);
-			dropped_lock = 1;
-
-			ksocknal_create_conn(cr->ksncr_ni, NULL,
-					     cr->ksncr_sock, SOCKLND_CONN_NONE);
-			lnet_ni_decref(cr->ksncr_ni);
-			kfree(cr);
-
-			spin_lock_bh(connd_lock);
-		}
-
-		/*
-		 * Only handle an outgoing connection request if there
-		 * is a thread left to handle incoming connections and
-		 * create new connd
-		 */
-		if (ksocknal_data.ksnd_connd_connecting + SOCKNAL_CONND_RESV <
-		    ksocknal_data.ksnd_connd_running) {
-			route = ksocknal_connd_get_route_locked(&timeout);
-		}
-		if (route) {
-			list_del(&route->ksnr_connd_list);
-			ksocknal_data.ksnd_connd_connecting++;
-			spin_unlock_bh(connd_lock);
-			dropped_lock = 1;
-
-			if (ksocknal_connect(route)) {
-				/* consecutive retry */
-				if (cons_retry++ > SOCKNAL_INSANITY_RECONN) {
-					CWARN("massive consecutive re-connecting to %pI4h\n",
-					      &route->ksnr_ipaddr);
-					cons_retry = 0;
-				}
-			} else {
-				cons_retry = 0;
-			}
-
-			ksocknal_route_decref(route);
-
-			spin_lock_bh(connd_lock);
-			ksocknal_data.ksnd_connd_connecting--;
-		}
-
-		if (dropped_lock) {
-			if (++nloops < SOCKNAL_RESCHED)
-				continue;
-			spin_unlock_bh(connd_lock);
-			nloops = 0;
-			cond_resched();
-			spin_lock_bh(connd_lock);
-			continue;
-		}
-
-		/* Nothing to do for 'timeout'  */
-		set_current_state(TASK_INTERRUPTIBLE);
-		add_wait_queue_exclusive(&ksocknal_data.ksnd_connd_waitq,
-					 &wait);
-		spin_unlock_bh(connd_lock);
-
-		nloops = 0;
-		schedule_timeout(timeout);
-
-		remove_wait_queue(&ksocknal_data.ksnd_connd_waitq, &wait);
-		spin_lock_bh(connd_lock);
-	}
-	ksocknal_data.ksnd_connd_running--;
-	spin_unlock_bh(connd_lock);
-
-	ksocknal_thread_fini();
-	return 0;
-}
-
-static struct ksock_conn *
-ksocknal_find_timed_out_conn(struct ksock_peer *peer)
-{
-	/* We're called with a shared lock on ksnd_global_lock */
-	struct ksock_conn *conn;
-	struct list_head *ctmp;
-
-	list_for_each(ctmp, &peer->ksnp_conns) {
-		int error;
-
-		conn = list_entry(ctmp, struct ksock_conn, ksnc_list);
-
-		/* Don't need the {get,put}connsock dance to deref ksnc_sock */
-		LASSERT(!conn->ksnc_closing);
-
-		/*
-		 * SOCK_ERROR will reset error code of socket in
-		 * some platform (like Darwin8.x)
-		 */
-		error = conn->ksnc_sock->sk->sk_err;
-		if (error) {
-			ksocknal_conn_addref(conn);
-
-			switch (error) {
-			case ECONNRESET:
-				CNETERR("A connection with %s (%pI4h:%d) was reset; it may have rebooted.\n",
-					libcfs_id2str(peer->ksnp_id),
-					&conn->ksnc_ipaddr,
-					conn->ksnc_port);
-				break;
-			case ETIMEDOUT:
-				CNETERR("A connection with %s (%pI4h:%d) timed out; the network or node may be down.\n",
-					libcfs_id2str(peer->ksnp_id),
-					&conn->ksnc_ipaddr,
-					conn->ksnc_port);
-				break;
-			default:
-				CNETERR("An unexpected network error %d occurred with %s (%pI4h:%d\n",
-					error,
-					libcfs_id2str(peer->ksnp_id),
-					&conn->ksnc_ipaddr,
-					conn->ksnc_port);
-				break;
-			}
-
-			return conn;
-		}
-
-		if (conn->ksnc_rx_started &&
-		    time_after_eq(jiffies,
-				  conn->ksnc_rx_deadline)) {
-			/* Timed out incomplete incoming message */
-			ksocknal_conn_addref(conn);
-			CNETERR("Timeout receiving from %s (%pI4h:%d), state %d wanted %zd left %d\n",
-				libcfs_id2str(peer->ksnp_id),
-				&conn->ksnc_ipaddr,
-				conn->ksnc_port,
-				conn->ksnc_rx_state,
-				iov_iter_count(&conn->ksnc_rx_to),
-				conn->ksnc_rx_nob_left);
-			return conn;
-		}
-
-		if ((!list_empty(&conn->ksnc_tx_queue) ||
-		     conn->ksnc_sock->sk->sk_wmem_queued) &&
-		    time_after_eq(jiffies,
-				  conn->ksnc_tx_deadline)) {
-			/*
-			 * Timed out messages queued for sending or
-			 * buffered in the socket's send buffer
-			 */
-			ksocknal_conn_addref(conn);
-			CNETERR("Timeout sending data to %s (%pI4h:%d) the network or that node may be down.\n",
-				libcfs_id2str(peer->ksnp_id),
-				&conn->ksnc_ipaddr,
-				conn->ksnc_port);
-			return conn;
-		}
-	}
-
-	return NULL;
-}
-
-static inline void
-ksocknal_flush_stale_txs(struct ksock_peer *peer)
-{
-	struct ksock_tx *tx;
-	struct ksock_tx *tmp;
-	LIST_HEAD(stale_txs);
-
-	write_lock_bh(&ksocknal_data.ksnd_global_lock);
-
-	list_for_each_entry_safe(tx, tmp, &peer->ksnp_tx_queue, tx_list) {
-		if (!time_after_eq(jiffies,
-				   tx->tx_deadline))
-			break;
-
-		list_del(&tx->tx_list);
-		list_add_tail(&tx->tx_list, &stale_txs);
-	}
-
-	write_unlock_bh(&ksocknal_data.ksnd_global_lock);
-
-	ksocknal_txlist_done(peer->ksnp_ni, &stale_txs, 1);
-}
-
-static int
-ksocknal_send_keepalive_locked(struct ksock_peer *peer)
-	__must_hold(&ksocknal_data.ksnd_global_lock)
-{
-	struct ksock_sched *sched;
-	struct ksock_conn *conn;
-	struct ksock_tx *tx;
-
-	/* last_alive will be updated by create_conn */
-	if (list_empty(&peer->ksnp_conns))
-		return 0;
-
-	if (peer->ksnp_proto != &ksocknal_protocol_v3x)
-		return 0;
-
-	if (*ksocknal_tunables.ksnd_keepalive <= 0 ||
-	    time_before(jiffies,
-			peer->ksnp_last_alive + *ksocknal_tunables.ksnd_keepalive * HZ))
-		return 0;
-
-	if (time_before(jiffies, peer->ksnp_send_keepalive))
-		return 0;
-
-	/*
-	 * retry 10 secs later, so we wouldn't put pressure
-	 * on this peer if we failed to send keepalive this time
-	 */
-	peer->ksnp_send_keepalive = jiffies + 10 * HZ;
-
-	conn = ksocknal_find_conn_locked(peer, NULL, 1);
-	if (conn) {
-		sched = conn->ksnc_scheduler;
-
-		spin_lock_bh(&sched->kss_lock);
-		if (!list_empty(&conn->ksnc_tx_queue)) {
-			spin_unlock_bh(&sched->kss_lock);
-			/* there is an queued ACK, don't need keepalive */
-			return 0;
-		}
-
-		spin_unlock_bh(&sched->kss_lock);
-	}
-
-	read_unlock(&ksocknal_data.ksnd_global_lock);
-
-	/* cookie = 1 is reserved for keepalive PING */
-	tx = ksocknal_alloc_tx_noop(1, 1);
-	if (!tx) {
-		read_lock(&ksocknal_data.ksnd_global_lock);
-		return -ENOMEM;
-	}
-
-	if (!ksocknal_launch_packet(peer->ksnp_ni, tx, peer->ksnp_id)) {
-		read_lock(&ksocknal_data.ksnd_global_lock);
-		return 1;
-	}
-
-	ksocknal_free_tx(tx);
-	read_lock(&ksocknal_data.ksnd_global_lock);
-
-	return -EIO;
-}
-
-static void
-ksocknal_check_peer_timeouts(int idx)
-{
-	struct list_head *peers = &ksocknal_data.ksnd_peers[idx];
-	struct ksock_peer *peer;
-	struct ksock_conn *conn;
-	struct ksock_tx *tx;
-
- again:
-	/*
-	 * NB. We expect to have a look at all the peers and not find any
-	 * connections to time out, so we just use a shared lock while we
-	 * take a look...
-	 */
-	read_lock(&ksocknal_data.ksnd_global_lock);
-
-	list_for_each_entry(peer, peers, ksnp_list) {
-		unsigned long deadline = 0;
-		struct ksock_tx *tx_stale;
-		int resid = 0;
-		int n = 0;
-
-		if (ksocknal_send_keepalive_locked(peer)) {
-			read_unlock(&ksocknal_data.ksnd_global_lock);
-			goto again;
-		}
-
-		conn = ksocknal_find_timed_out_conn(peer);
-
-		if (conn) {
-			read_unlock(&ksocknal_data.ksnd_global_lock);
-
-			ksocknal_close_conn_and_siblings(conn, -ETIMEDOUT);
-
-			/*
-			 * NB we won't find this one again, but we can't
-			 * just proceed with the next peer, since we dropped
-			 * ksnd_global_lock and it might be dead already!
-			 */
-			ksocknal_conn_decref(conn);
-			goto again;
-		}
-
-		/*
-		 * we can't process stale txs right here because we're
-		 * holding only shared lock
-		 */
-		if (!list_empty(&peer->ksnp_tx_queue)) {
-			tx = list_entry(peer->ksnp_tx_queue.next,
-					struct ksock_tx, tx_list);
-
-			if (time_after_eq(jiffies,
-					  tx->tx_deadline)) {
-				ksocknal_peer_addref(peer);
-				read_unlock(&ksocknal_data.ksnd_global_lock);
-
-				ksocknal_flush_stale_txs(peer);
-
-				ksocknal_peer_decref(peer);
-				goto again;
-			}
-		}
-
-		if (list_empty(&peer->ksnp_zc_req_list))
-			continue;
-
-		tx_stale = NULL;
-		spin_lock(&peer->ksnp_lock);
-		list_for_each_entry(tx, &peer->ksnp_zc_req_list, tx_zc_list) {
-			if (!time_after_eq(jiffies,
-					   tx->tx_deadline))
-				break;
-			/* ignore the TX if connection is being closed */
-			if (tx->tx_conn->ksnc_closing)
-				continue;
-			if (!tx_stale)
-				tx_stale = tx;
-			n++;
-		}
-
-		if (!tx_stale) {
-			spin_unlock(&peer->ksnp_lock);
-			continue;
-		}
-
-		deadline = tx_stale->tx_deadline;
-		resid = tx_stale->tx_resid;
-		conn = tx_stale->tx_conn;
-		ksocknal_conn_addref(conn);
-
-		spin_unlock(&peer->ksnp_lock);
-		read_unlock(&ksocknal_data.ksnd_global_lock);
-
-		CERROR("Total %d stale ZC_REQs for peer %s detected; the oldest(%p) timed out %ld secs ago, resid: %d, wmem: %d\n",
-		       n, libcfs_nid2str(peer->ksnp_id.nid), tx_stale,
-		       (jiffies - deadline) / HZ,
-		       resid, conn->ksnc_sock->sk->sk_wmem_queued);
-
-		ksocknal_close_conn_and_siblings(conn, -ETIMEDOUT);
-		ksocknal_conn_decref(conn);
-		goto again;
-	}
-
-	read_unlock(&ksocknal_data.ksnd_global_lock);
-}
-
-int
-ksocknal_reaper(void *arg)
-{
-	wait_queue_entry_t wait;
-	struct ksock_conn *conn;
-	struct ksock_sched *sched;
-	struct list_head enomem_conns;
-	int nenomem_conns;
-	long timeout;
-	int i;
-	int peer_index = 0;
-	unsigned long deadline = jiffies;
-
-	INIT_LIST_HEAD(&enomem_conns);
-	init_waitqueue_entry(&wait, current);
-
-	spin_lock_bh(&ksocknal_data.ksnd_reaper_lock);
-
-	while (!ksocknal_data.ksnd_shuttingdown) {
-		if (!list_empty(&ksocknal_data.ksnd_deathrow_conns)) {
-			conn = list_entry(ksocknal_data.ksnd_deathrow_conns.next,
-					  struct ksock_conn, ksnc_list);
-			list_del(&conn->ksnc_list);
-
-			spin_unlock_bh(&ksocknal_data.ksnd_reaper_lock);
-
-			ksocknal_terminate_conn(conn);
-			ksocknal_conn_decref(conn);
-
-			spin_lock_bh(&ksocknal_data.ksnd_reaper_lock);
-			continue;
-		}
-
-		if (!list_empty(&ksocknal_data.ksnd_zombie_conns)) {
-			conn = list_entry(ksocknal_data.ksnd_zombie_conns.next,
-					  struct ksock_conn, ksnc_list);
-			list_del(&conn->ksnc_list);
-
-			spin_unlock_bh(&ksocknal_data.ksnd_reaper_lock);
-
-			ksocknal_destroy_conn(conn);
-
-			spin_lock_bh(&ksocknal_data.ksnd_reaper_lock);
-			continue;
-		}
-
-		if (!list_empty(&ksocknal_data.ksnd_enomem_conns)) {
-			list_add(&enomem_conns,
-				 &ksocknal_data.ksnd_enomem_conns);
-			list_del_init(&ksocknal_data.ksnd_enomem_conns);
-		}
-
-		spin_unlock_bh(&ksocknal_data.ksnd_reaper_lock);
-
-		/* reschedule all the connections that stalled with ENOMEM... */
-		nenomem_conns = 0;
-		while (!list_empty(&enomem_conns)) {
-			conn = list_entry(enomem_conns.next, struct ksock_conn,
-					  ksnc_tx_list);
-			list_del(&conn->ksnc_tx_list);
-
-			sched = conn->ksnc_scheduler;
-
-			spin_lock_bh(&sched->kss_lock);
-
-			LASSERT(conn->ksnc_tx_scheduled);
-			conn->ksnc_tx_ready = 1;
-			list_add_tail(&conn->ksnc_tx_list,
-				      &sched->kss_tx_conns);
-			wake_up(&sched->kss_waitq);
-
-			spin_unlock_bh(&sched->kss_lock);
-			nenomem_conns++;
-		}
-
-		/* careful with the jiffy wrap... */
-		while ((timeout = deadline - jiffies) <= 0) {
-			const int n = 4;
-			const int p = 1;
-			int chunk = ksocknal_data.ksnd_peer_hash_size;
-
-			/*
-			 * Time to check for timeouts on a few more peers: I do
-			 * checks every 'p' seconds on a proportion of the peer
-			 * table and I need to check every connection 'n' times
-			 * within a timeout interval, to ensure I detect a
-			 * timeout on any connection within (n+1)/n times the
-			 * timeout interval.
-			 */
-			if (*ksocknal_tunables.ksnd_timeout > n * p)
-				chunk = (chunk * n * p) /
-					*ksocknal_tunables.ksnd_timeout;
-			if (!chunk)
-				chunk = 1;
-
-			for (i = 0; i < chunk; i++) {
-				ksocknal_check_peer_timeouts(peer_index);
-				peer_index = (peer_index + 1) %
-					     ksocknal_data.ksnd_peer_hash_size;
-			}
-
-			deadline = deadline + p * HZ;
-		}
-
-		if (nenomem_conns) {
-			/*
-			 * Reduce my timeout if I rescheduled ENOMEM conns.
-			 * This also prevents me getting woken immediately
-			 * if any go back on my enomem list.
-			 */
-			timeout = SOCKNAL_ENOMEM_RETRY;
-		}
-		ksocknal_data.ksnd_reaper_waketime = jiffies + timeout;
-
-		set_current_state(TASK_INTERRUPTIBLE);
-		add_wait_queue(&ksocknal_data.ksnd_reaper_waitq, &wait);
-
-		if (!ksocknal_data.ksnd_shuttingdown &&
-		    list_empty(&ksocknal_data.ksnd_deathrow_conns) &&
-		    list_empty(&ksocknal_data.ksnd_zombie_conns))
-			schedule_timeout(timeout);
-
-		set_current_state(TASK_RUNNING);
-		remove_wait_queue(&ksocknal_data.ksnd_reaper_waitq, &wait);
-
-		spin_lock_bh(&ksocknal_data.ksnd_reaper_lock);
-	}
-
-	spin_unlock_bh(&ksocknal_data.ksnd_reaper_lock);
-
-	ksocknal_thread_fini();
-	return 0;
-}

+ 0 - 534
drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c

@@ -1,534 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#include <linux/highmem.h>
-#include "socklnd.h"
-
-int
-ksocknal_lib_get_conn_addrs(struct ksock_conn *conn)
-{
-	int rc = lnet_sock_getaddr(conn->ksnc_sock, 1, &conn->ksnc_ipaddr,
-				   &conn->ksnc_port);
-
-	/* Didn't need the {get,put}connsock dance to deref ksnc_sock... */
-	LASSERT(!conn->ksnc_closing);
-
-	if (rc) {
-		CERROR("Error %d getting sock peer IP\n", rc);
-		return rc;
-	}
-
-	rc = lnet_sock_getaddr(conn->ksnc_sock, 0, &conn->ksnc_myipaddr, NULL);
-	if (rc) {
-		CERROR("Error %d getting sock local IP\n", rc);
-		return rc;
-	}
-
-	return 0;
-}
-
-int
-ksocknal_lib_zc_capable(struct ksock_conn *conn)
-{
-	int caps = conn->ksnc_sock->sk->sk_route_caps;
-
-	if (conn->ksnc_proto == &ksocknal_protocol_v1x)
-		return 0;
-
-	/*
-	 * ZC if the socket supports scatter/gather and doesn't need software
-	 * checksums
-	 */
-	return ((caps & NETIF_F_SG) && (caps & NETIF_F_CSUM_MASK));
-}
-
-int
-ksocknal_lib_send_iov(struct ksock_conn *conn, struct ksock_tx *tx)
-{
-	struct msghdr msg = {.msg_flags = MSG_DONTWAIT};
-	struct socket *sock = conn->ksnc_sock;
-	int nob, i;
-
-	if (*ksocknal_tunables.ksnd_enable_csum	&& /* checksum enabled */
-	    conn->ksnc_proto == &ksocknal_protocol_v2x && /* V2.x connection  */
-	    tx->tx_nob == tx->tx_resid		 && /* frist sending    */
-	    !tx->tx_msg.ksm_csum)		     /* not checksummed  */
-		ksocknal_lib_csum_tx(tx);
-
-	for (nob = i = 0; i < tx->tx_niov; i++)
-		nob += tx->tx_iov[i].iov_len;
-
-	if (!list_empty(&conn->ksnc_tx_queue) ||
-	    nob < tx->tx_resid)
-		msg.msg_flags |= MSG_MORE;
-
-	iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC,
-		      tx->tx_iov, tx->tx_niov, nob);
-	return sock_sendmsg(sock, &msg);
-}
-
-int
-ksocknal_lib_send_kiov(struct ksock_conn *conn, struct ksock_tx *tx)
-{
-	struct socket *sock = conn->ksnc_sock;
-	struct bio_vec *kiov = tx->tx_kiov;
-	int rc;
-	int nob;
-
-	/* Not NOOP message */
-	LASSERT(tx->tx_lnetmsg);
-
-	if (tx->tx_msg.ksm_zc_cookies[0]) {
-		/* Zero copy is enabled */
-		struct sock *sk = sock->sk;
-		struct page *page = kiov->bv_page;
-		int offset = kiov->bv_offset;
-		int fragsize = kiov->bv_len;
-		int msgflg = MSG_DONTWAIT;
-
-		CDEBUG(D_NET, "page %p + offset %x for %d\n",
-		       page, offset, kiov->bv_len);
-
-		if (!list_empty(&conn->ksnc_tx_queue) ||
-		    fragsize < tx->tx_resid)
-			msgflg |= MSG_MORE;
-
-		if (sk->sk_prot->sendpage) {
-			rc = sk->sk_prot->sendpage(sk, page,
-						   offset, fragsize, msgflg);
-		} else {
-			rc = tcp_sendpage(sk, page, offset, fragsize, msgflg);
-		}
-	} else {
-		struct msghdr msg = {.msg_flags = MSG_DONTWAIT};
-		int i;
-
-		for (nob = i = 0; i < tx->tx_nkiov; i++)
-			nob += kiov[i].bv_len;
-
-		if (!list_empty(&conn->ksnc_tx_queue) ||
-		    nob < tx->tx_resid)
-			msg.msg_flags |= MSG_MORE;
-
-		iov_iter_bvec(&msg.msg_iter, WRITE | ITER_BVEC,
-			      kiov, tx->tx_nkiov, nob);
-		rc = sock_sendmsg(sock, &msg);
-	}
-	return rc;
-}
-
-void
-ksocknal_lib_eager_ack(struct ksock_conn *conn)
-{
-	int opt = 1;
-	struct socket *sock = conn->ksnc_sock;
-
-	/*
-	 * Remind the socket to ACK eagerly.  If I don't, the socket might
-	 * think I'm about to send something it could piggy-back the ACK
-	 * on, introducing delay in completing zero-copy sends in my
-	 * peer.
-	 */
-	kernel_setsockopt(sock, SOL_TCP, TCP_QUICKACK, (char *)&opt,
-			  sizeof(opt));
-}
-
-static int lustre_csum(struct kvec *v, void *context)
-{
-	struct ksock_conn *conn = context;
-	conn->ksnc_rx_csum = crc32_le(conn->ksnc_rx_csum,
-				      v->iov_base, v->iov_len);
-	return 0;
-}
-
-int
-ksocknal_lib_recv(struct ksock_conn *conn)
-{
-	struct msghdr msg = { .msg_iter = conn->ksnc_rx_to };
-	__u32 saved_csum;
-	int rc;
-
-	rc = sock_recvmsg(conn->ksnc_sock, &msg, MSG_DONTWAIT);
-	if (rc <= 0)
-		return rc;
-
-	saved_csum = conn->ksnc_msg.ksm_csum;
-	if (!saved_csum)
-		return rc;
-
-	/* header is included only in V2 - V3 checksums only the bulk data */
-	if (!(conn->ksnc_rx_to.type & ITER_BVEC) &&
-	     conn->ksnc_proto != &ksocknal_protocol_v2x)
-		return rc;
-
-	/* accumulate checksum */
-	conn->ksnc_msg.ksm_csum = 0;
-	iov_iter_for_each_range(&conn->ksnc_rx_to, rc, lustre_csum, conn);
-	conn->ksnc_msg.ksm_csum = saved_csum;
-
-	return rc;
-}
-
-void
-ksocknal_lib_csum_tx(struct ksock_tx *tx)
-{
-	int i;
-	__u32 csum;
-	void *base;
-
-	LASSERT(tx->tx_iov[0].iov_base == &tx->tx_msg);
-	LASSERT(tx->tx_conn);
-	LASSERT(tx->tx_conn->ksnc_proto == &ksocknal_protocol_v2x);
-
-	tx->tx_msg.ksm_csum = 0;
-
-	csum = crc32_le(~0, tx->tx_iov[0].iov_base,
-			tx->tx_iov[0].iov_len);
-
-	if (tx->tx_kiov) {
-		for (i = 0; i < tx->tx_nkiov; i++) {
-			base = kmap(tx->tx_kiov[i].bv_page) +
-			       tx->tx_kiov[i].bv_offset;
-
-			csum = crc32_le(csum, base, tx->tx_kiov[i].bv_len);
-
-			kunmap(tx->tx_kiov[i].bv_page);
-		}
-	} else {
-		for (i = 1; i < tx->tx_niov; i++)
-			csum = crc32_le(csum, tx->tx_iov[i].iov_base,
-					tx->tx_iov[i].iov_len);
-	}
-
-	if (*ksocknal_tunables.ksnd_inject_csum_error) {
-		csum++;
-		*ksocknal_tunables.ksnd_inject_csum_error = 0;
-	}
-
-	tx->tx_msg.ksm_csum = csum;
-}
-
-int
-ksocknal_lib_get_conn_tunables(struct ksock_conn *conn, int *txmem,
-			       int *rxmem, int *nagle)
-{
-	struct socket *sock = conn->ksnc_sock;
-	int len;
-	int rc;
-
-	rc = ksocknal_connsock_addref(conn);
-	if (rc) {
-		LASSERT(conn->ksnc_closing);
-		*txmem = *rxmem = *nagle = 0;
-		return -ESHUTDOWN;
-	}
-
-	rc = lnet_sock_getbuf(sock, txmem, rxmem);
-	if (!rc) {
-		len = sizeof(*nagle);
-		rc = kernel_getsockopt(sock, SOL_TCP, TCP_NODELAY,
-				       (char *)nagle, &len);
-	}
-
-	ksocknal_connsock_decref(conn);
-
-	if (!rc)
-		*nagle = !*nagle;
-	else
-		*txmem = *rxmem = *nagle = 0;
-
-	return rc;
-}
-
-int
-ksocknal_lib_setup_sock(struct socket *sock)
-{
-	int rc;
-	int option;
-	int keep_idle;
-	int keep_intvl;
-	int keep_count;
-	int do_keepalive;
-	struct linger linger;
-
-	sock->sk->sk_allocation = GFP_NOFS;
-
-	/*
-	 * Ensure this socket aborts active sends immediately when we close
-	 * it.
-	 */
-	linger.l_onoff = 0;
-	linger.l_linger = 0;
-
-	rc = kernel_setsockopt(sock, SOL_SOCKET, SO_LINGER, (char *)&linger,
-			       sizeof(linger));
-	if (rc) {
-		CERROR("Can't set SO_LINGER: %d\n", rc);
-		return rc;
-	}
-
-	option = -1;
-	rc = kernel_setsockopt(sock, SOL_TCP, TCP_LINGER2, (char *)&option,
-			       sizeof(option));
-	if (rc) {
-		CERROR("Can't set SO_LINGER2: %d\n", rc);
-		return rc;
-	}
-
-	if (!*ksocknal_tunables.ksnd_nagle) {
-		option = 1;
-
-		rc = kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY,
-				       (char *)&option, sizeof(option));
-		if (rc) {
-			CERROR("Can't disable nagle: %d\n", rc);
-			return rc;
-		}
-	}
-
-	rc = lnet_sock_setbuf(sock, *ksocknal_tunables.ksnd_tx_buffer_size,
-			      *ksocknal_tunables.ksnd_rx_buffer_size);
-	if (rc) {
-		CERROR("Can't set buffer tx %d, rx %d buffers: %d\n",
-		       *ksocknal_tunables.ksnd_tx_buffer_size,
-		       *ksocknal_tunables.ksnd_rx_buffer_size, rc);
-		return rc;
-	}
-
-/* TCP_BACKOFF_* sockopt tunables unsupported in stock kernels */
-
-	/* snapshot tunables */
-	keep_idle  = *ksocknal_tunables.ksnd_keepalive_idle;
-	keep_count = *ksocknal_tunables.ksnd_keepalive_count;
-	keep_intvl = *ksocknal_tunables.ksnd_keepalive_intvl;
-
-	do_keepalive = (keep_idle > 0 && keep_count > 0 && keep_intvl > 0);
-
-	option = (do_keepalive ? 1 : 0);
-	rc = kernel_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, (char *)&option,
-			       sizeof(option));
-	if (rc) {
-		CERROR("Can't set SO_KEEPALIVE: %d\n", rc);
-		return rc;
-	}
-
-	if (!do_keepalive)
-		return 0;
-
-	rc = kernel_setsockopt(sock, SOL_TCP, TCP_KEEPIDLE, (char *)&keep_idle,
-			       sizeof(keep_idle));
-	if (rc) {
-		CERROR("Can't set TCP_KEEPIDLE: %d\n", rc);
-		return rc;
-	}
-
-	rc = kernel_setsockopt(sock, SOL_TCP, TCP_KEEPINTVL,
-			       (char *)&keep_intvl, sizeof(keep_intvl));
-	if (rc) {
-		CERROR("Can't set TCP_KEEPINTVL: %d\n", rc);
-		return rc;
-	}
-
-	rc = kernel_setsockopt(sock, SOL_TCP, TCP_KEEPCNT, (char *)&keep_count,
-			       sizeof(keep_count));
-	if (rc) {
-		CERROR("Can't set TCP_KEEPCNT: %d\n", rc);
-		return rc;
-	}
-
-	return 0;
-}
-
-void
-ksocknal_lib_push_conn(struct ksock_conn *conn)
-{
-	struct sock *sk;
-	struct tcp_sock *tp;
-	int nonagle;
-	int val = 1;
-	int rc;
-
-	rc = ksocknal_connsock_addref(conn);
-	if (rc)			    /* being shut down */
-		return;
-
-	sk = conn->ksnc_sock->sk;
-	tp = tcp_sk(sk);
-
-	lock_sock(sk);
-	nonagle = tp->nonagle;
-	tp->nonagle = 1;
-	release_sock(sk);
-
-	rc = kernel_setsockopt(conn->ksnc_sock, SOL_TCP, TCP_NODELAY,
-			       (char *)&val, sizeof(val));
-	LASSERT(!rc);
-
-	lock_sock(sk);
-	tp->nonagle = nonagle;
-	release_sock(sk);
-
-	ksocknal_connsock_decref(conn);
-}
-
-/*
- * socket call back in Linux
- */
-static void
-ksocknal_data_ready(struct sock *sk)
-{
-	struct ksock_conn *conn;
-
-	/* interleave correctly with closing sockets... */
-	LASSERT(!in_irq());
-	read_lock(&ksocknal_data.ksnd_global_lock);
-
-	conn = sk->sk_user_data;
-	if (!conn) {	     /* raced with ksocknal_terminate_conn */
-		LASSERT(sk->sk_data_ready != &ksocknal_data_ready);
-		sk->sk_data_ready(sk);
-	} else {
-		ksocknal_read_callback(conn);
-	}
-
-	read_unlock(&ksocknal_data.ksnd_global_lock);
-}
-
-static void
-ksocknal_write_space(struct sock *sk)
-{
-	struct ksock_conn *conn;
-	int wspace;
-	int min_wpace;
-
-	/* interleave correctly with closing sockets... */
-	LASSERT(!in_irq());
-	read_lock(&ksocknal_data.ksnd_global_lock);
-
-	conn = sk->sk_user_data;
-	wspace = sk_stream_wspace(sk);
-	min_wpace = sk_stream_min_wspace(sk);
-
-	CDEBUG(D_NET, "sk %p wspace %d low water %d conn %p%s%s%s\n",
-	       sk, wspace, min_wpace, conn,
-	       !conn ? "" : (conn->ksnc_tx_ready ?
-				      " ready" : " blocked"),
-	       !conn ? "" : (conn->ksnc_tx_scheduled ?
-				      " scheduled" : " idle"),
-	       !conn ? "" : (list_empty(&conn->ksnc_tx_queue) ?
-				      " empty" : " queued"));
-
-	if (!conn) {	     /* raced with ksocknal_terminate_conn */
-		LASSERT(sk->sk_write_space != &ksocknal_write_space);
-		sk->sk_write_space(sk);
-
-		read_unlock(&ksocknal_data.ksnd_global_lock);
-		return;
-	}
-
-	if (wspace >= min_wpace) {	      /* got enough space */
-		ksocknal_write_callback(conn);
-
-		/*
-		 * Clear SOCK_NOSPACE _after_ ksocknal_write_callback so the
-		 * ENOMEM check in ksocknal_transmit is race-free (think about
-		 * it).
-		 */
-		clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
-	}
-
-	read_unlock(&ksocknal_data.ksnd_global_lock);
-}
-
-void
-ksocknal_lib_save_callback(struct socket *sock, struct ksock_conn *conn)
-{
-	conn->ksnc_saved_data_ready = sock->sk->sk_data_ready;
-	conn->ksnc_saved_write_space = sock->sk->sk_write_space;
-}
-
-void
-ksocknal_lib_set_callback(struct socket *sock,  struct ksock_conn *conn)
-{
-	sock->sk->sk_user_data = conn;
-	sock->sk->sk_data_ready = ksocknal_data_ready;
-	sock->sk->sk_write_space = ksocknal_write_space;
-}
-
-void
-ksocknal_lib_reset_callback(struct socket *sock, struct ksock_conn *conn)
-{
-	/*
-	 * Remove conn's network callbacks.
-	 * NB I _have_ to restore the callback, rather than storing a noop,
-	 * since the socket could survive past this module being unloaded!!
-	 */
-	sock->sk->sk_data_ready = conn->ksnc_saved_data_ready;
-	sock->sk->sk_write_space = conn->ksnc_saved_write_space;
-
-	/*
-	 * A callback could be in progress already; they hold a read lock
-	 * on ksnd_global_lock (to serialise with me) and NOOP if
-	 * sk_user_data is NULL.
-	 */
-	sock->sk->sk_user_data = NULL;
-}
-
-int
-ksocknal_lib_memory_pressure(struct ksock_conn *conn)
-{
-	int rc = 0;
-	struct ksock_sched *sched;
-
-	sched = conn->ksnc_scheduler;
-	spin_lock_bh(&sched->kss_lock);
-
-	if (!test_bit(SOCK_NOSPACE, &conn->ksnc_sock->flags) &&
-	    !conn->ksnc_tx_ready) {
-		/*
-		 * SOCK_NOSPACE is set when the socket fills
-		 * and cleared in the write_space callback
-		 * (which also sets ksnc_tx_ready).  If
-		 * SOCK_NOSPACE and ksnc_tx_ready are BOTH
-		 * zero, I didn't fill the socket and
-		 * write_space won't reschedule me, so I
-		 * return -ENOMEM to get my caller to retry
-		 * after a timeout
-		 */
-		rc = -ENOMEM;
-	}
-
-	spin_unlock_bh(&sched->kss_lock);
-
-	return rc;
-}

+ 0 - 184
drivers/staging/lustre/lnet/klnds/socklnd/socklnd_modparams.c

@@ -1,184 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- *
- *   Author: Eric Barton <eric@bartonsoftware.com>
- *
- *   Portals is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
- *
- *   Portals is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- */
-
-#include "socklnd.h"
-
-static int sock_timeout = 50;
-module_param(sock_timeout, int, 0644);
-MODULE_PARM_DESC(sock_timeout, "dead socket timeout (seconds)");
-
-static int credits = 256;
-module_param(credits, int, 0444);
-MODULE_PARM_DESC(credits, "# concurrent sends");
-
-static int peer_credits = 8;
-module_param(peer_credits, int, 0444);
-MODULE_PARM_DESC(peer_credits, "# concurrent sends to 1 peer");
-
-static int peer_buffer_credits;
-module_param(peer_buffer_credits, int, 0444);
-MODULE_PARM_DESC(peer_buffer_credits, "# per-peer router buffer credits");
-
-static int peer_timeout = 180;
-module_param(peer_timeout, int, 0444);
-MODULE_PARM_DESC(peer_timeout, "Seconds without aliveness news to declare peer dead (<=0 to disable)");
-
-/*
- * Number of daemons in each thread pool which is percpt,
- * we will estimate reasonable value based on CPUs if it's not set.
- */
-static unsigned int nscheds;
-module_param(nscheds, int, 0444);
-MODULE_PARM_DESC(nscheds, "# scheduler daemons in each pool while starting");
-
-static int nconnds = 4;
-module_param(nconnds, int, 0444);
-MODULE_PARM_DESC(nconnds, "# connection daemons while starting");
-
-static int nconnds_max = 64;
-module_param(nconnds_max, int, 0444);
-MODULE_PARM_DESC(nconnds_max, "max # connection daemons");
-
-static int min_reconnectms = 1000;
-module_param(min_reconnectms, int, 0644);
-MODULE_PARM_DESC(min_reconnectms, "min connection retry interval (mS)");
-
-static int max_reconnectms = 60000;
-module_param(max_reconnectms, int, 0644);
-MODULE_PARM_DESC(max_reconnectms, "max connection retry interval (mS)");
-
-# define DEFAULT_EAGER_ACK 0
-static int eager_ack = DEFAULT_EAGER_ACK;
-module_param(eager_ack, int, 0644);
-MODULE_PARM_DESC(eager_ack, "send tcp ack packets eagerly");
-
-static int typed_conns = 1;
-module_param(typed_conns, int, 0444);
-MODULE_PARM_DESC(typed_conns, "use different sockets for bulk");
-
-static int min_bulk = 1 << 10;
-module_param(min_bulk, int, 0644);
-MODULE_PARM_DESC(min_bulk, "smallest 'large' message");
-
-# define DEFAULT_BUFFER_SIZE 0
-static int tx_buffer_size = DEFAULT_BUFFER_SIZE;
-module_param(tx_buffer_size, int, 0644);
-MODULE_PARM_DESC(tx_buffer_size, "socket tx buffer size (0 for system default)");
-
-static int rx_buffer_size = DEFAULT_BUFFER_SIZE;
-module_param(rx_buffer_size, int, 0644);
-MODULE_PARM_DESC(rx_buffer_size, "socket rx buffer size (0 for system default)");
-
-static int nagle;
-module_param(nagle, int, 0644);
-MODULE_PARM_DESC(nagle, "enable NAGLE?");
-
-static int round_robin = 1;
-module_param(round_robin, int, 0644);
-MODULE_PARM_DESC(round_robin, "Round robin for multiple interfaces");
-
-static int keepalive = 30;
-module_param(keepalive, int, 0644);
-MODULE_PARM_DESC(keepalive, "# seconds before send keepalive");
-
-static int keepalive_idle = 30;
-module_param(keepalive_idle, int, 0644);
-MODULE_PARM_DESC(keepalive_idle, "# idle seconds before probe");
-
-#define DEFAULT_KEEPALIVE_COUNT  5
-static int keepalive_count = DEFAULT_KEEPALIVE_COUNT;
-module_param(keepalive_count, int, 0644);
-MODULE_PARM_DESC(keepalive_count, "# missed probes == dead");
-
-static int keepalive_intvl = 5;
-module_param(keepalive_intvl, int, 0644);
-MODULE_PARM_DESC(keepalive_intvl, "seconds between probes");
-
-static int enable_csum;
-module_param(enable_csum, int, 0644);
-MODULE_PARM_DESC(enable_csum, "enable check sum");
-
-static int inject_csum_error;
-module_param(inject_csum_error, int, 0644);
-MODULE_PARM_DESC(inject_csum_error, "set non-zero to inject a checksum error");
-
-static int nonblk_zcack = 1;
-module_param(nonblk_zcack, int, 0644);
-MODULE_PARM_DESC(nonblk_zcack, "always send ZC-ACK on non-blocking connection");
-
-static unsigned int zc_min_payload = 16 << 10;
-module_param(zc_min_payload, int, 0644);
-MODULE_PARM_DESC(zc_min_payload, "minimum payload size to zero copy");
-
-static unsigned int zc_recv;
-module_param(zc_recv, int, 0644);
-MODULE_PARM_DESC(zc_recv, "enable ZC recv for Chelsio driver");
-
-static unsigned int zc_recv_min_nfrags = 16;
-module_param(zc_recv_min_nfrags, int, 0644);
-MODULE_PARM_DESC(zc_recv_min_nfrags, "minimum # of fragments to enable ZC recv");
-
-#if SOCKNAL_VERSION_DEBUG
-static int protocol = 3;
-module_param(protocol, int, 0644);
-MODULE_PARM_DESC(protocol, "protocol version");
-#endif
-
-struct ksock_tunables ksocknal_tunables;
-
-int ksocknal_tunables_init(void)
-{
-	/* initialize ksocknal_tunables structure */
-	ksocknal_tunables.ksnd_timeout            = &sock_timeout;
-	ksocknal_tunables.ksnd_nscheds            = &nscheds;
-	ksocknal_tunables.ksnd_nconnds            = &nconnds;
-	ksocknal_tunables.ksnd_nconnds_max        = &nconnds_max;
-	ksocknal_tunables.ksnd_min_reconnectms    = &min_reconnectms;
-	ksocknal_tunables.ksnd_max_reconnectms    = &max_reconnectms;
-	ksocknal_tunables.ksnd_eager_ack          = &eager_ack;
-	ksocknal_tunables.ksnd_typed_conns        = &typed_conns;
-	ksocknal_tunables.ksnd_min_bulk           = &min_bulk;
-	ksocknal_tunables.ksnd_tx_buffer_size     = &tx_buffer_size;
-	ksocknal_tunables.ksnd_rx_buffer_size     = &rx_buffer_size;
-	ksocknal_tunables.ksnd_nagle              = &nagle;
-	ksocknal_tunables.ksnd_round_robin        = &round_robin;
-	ksocknal_tunables.ksnd_keepalive          = &keepalive;
-	ksocknal_tunables.ksnd_keepalive_idle     = &keepalive_idle;
-	ksocknal_tunables.ksnd_keepalive_count    = &keepalive_count;
-	ksocknal_tunables.ksnd_keepalive_intvl    = &keepalive_intvl;
-	ksocknal_tunables.ksnd_credits            = &credits;
-	ksocknal_tunables.ksnd_peertxcredits      = &peer_credits;
-	ksocknal_tunables.ksnd_peerrtrcredits     = &peer_buffer_credits;
-	ksocknal_tunables.ksnd_peertimeout        = &peer_timeout;
-	ksocknal_tunables.ksnd_enable_csum        = &enable_csum;
-	ksocknal_tunables.ksnd_inject_csum_error  = &inject_csum_error;
-	ksocknal_tunables.ksnd_nonblk_zcack       = &nonblk_zcack;
-	ksocknal_tunables.ksnd_zc_min_payload     = &zc_min_payload;
-	ksocknal_tunables.ksnd_zc_recv            = &zc_recv;
-	ksocknal_tunables.ksnd_zc_recv_min_nfrags = &zc_recv_min_nfrags;
-
-#if SOCKNAL_VERSION_DEBUG
-	ksocknal_tunables.ksnd_protocol           = &protocol;
-#endif
-
-	if (*ksocknal_tunables.ksnd_zc_min_payload < (2 << 10))
-		*ksocknal_tunables.ksnd_zc_min_payload = 2 << 10;
-
-	return 0;
-};

+ 0 - 810
drivers/staging/lustre/lnet/klnds/socklnd/socklnd_proto.c

@@ -1,810 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
- *
- * Copyright (c) 2012, Intel Corporation.
- *
- *   Author: Zach Brown <zab@zabbo.net>
- *   Author: Peter J. Braam <braam@clusterfs.com>
- *   Author: Phil Schwan <phil@clusterfs.com>
- *   Author: Eric Barton <eric@bartonsoftware.com>
- *
- *   This file is part of Portals, http://www.sf.net/projects/sandiaportals/
- *
- *   Portals is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
- *
- *   Portals is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- */
-
-#include "socklnd.h"
-
-/*
- * Protocol entries :
- *   pro_send_hello       : send hello message
- *   pro_recv_hello       : receive hello message
- *   pro_pack	     : pack message header
- *   pro_unpack	   : unpack message header
- *   pro_queue_tx_zcack() : Called holding BH lock: kss_lock
- *			  return 1 if ACK is piggybacked, otherwise return 0
- *   pro_queue_tx_msg()   : Called holding BH lock: kss_lock
- *			  return the ACK that piggybacked by my message, or NULL
- *   pro_handle_zcreq()   : handler of incoming ZC-REQ
- *   pro_handle_zcack()   : handler of incoming ZC-ACK
- *   pro_match_tx()       : Called holding glock
- */
-
-static struct ksock_tx *
-ksocknal_queue_tx_msg_v1(struct ksock_conn *conn, struct ksock_tx *tx_msg)
-{
-	/* V1.x, just enqueue it */
-	list_add_tail(&tx_msg->tx_list, &conn->ksnc_tx_queue);
-	return NULL;
-}
-
-void
-ksocknal_next_tx_carrier(struct ksock_conn *conn)
-{
-	struct ksock_tx *tx = conn->ksnc_tx_carrier;
-
-	/* Called holding BH lock: conn->ksnc_scheduler->kss_lock */
-	LASSERT(!list_empty(&conn->ksnc_tx_queue));
-	LASSERT(tx);
-
-	/* Next TX that can carry ZC-ACK or LNet message */
-	if (tx->tx_list.next == &conn->ksnc_tx_queue) {
-		/* no more packets queued */
-		conn->ksnc_tx_carrier = NULL;
-	} else {
-		conn->ksnc_tx_carrier = list_next_entry(tx, tx_list);
-		LASSERT(conn->ksnc_tx_carrier->tx_msg.ksm_type == tx->tx_msg.ksm_type);
-	}
-}
-
-static int
-ksocknal_queue_tx_zcack_v2(struct ksock_conn *conn,
-			   struct ksock_tx *tx_ack, __u64 cookie)
-{
-	struct ksock_tx *tx = conn->ksnc_tx_carrier;
-
-	LASSERT(!tx_ack ||
-		tx_ack->tx_msg.ksm_type == KSOCK_MSG_NOOP);
-
-	/*
-	 * Enqueue or piggyback tx_ack / cookie
-	 * . no tx can piggyback cookie of tx_ack (or cookie), just
-	 *   enqueue the tx_ack (if tx_ack != NUL) and return NULL.
-	 * . There is tx can piggyback cookie of tx_ack (or cookie),
-	 *   piggyback the cookie and return the tx.
-	 */
-	if (!tx) {
-		if (tx_ack) {
-			list_add_tail(&tx_ack->tx_list,
-				      &conn->ksnc_tx_queue);
-			conn->ksnc_tx_carrier = tx_ack;
-		}
-		return 0;
-	}
-
-	if (tx->tx_msg.ksm_type == KSOCK_MSG_NOOP) {
-		/* tx is noop zc-ack, can't piggyback zc-ack cookie */
-		if (tx_ack)
-			list_add_tail(&tx_ack->tx_list,
-				      &conn->ksnc_tx_queue);
-		return 0;
-	}
-
-	LASSERT(tx->tx_msg.ksm_type == KSOCK_MSG_LNET);
-	LASSERT(!tx->tx_msg.ksm_zc_cookies[1]);
-
-	if (tx_ack)
-		cookie = tx_ack->tx_msg.ksm_zc_cookies[1];
-
-	/* piggyback the zc-ack cookie */
-	tx->tx_msg.ksm_zc_cookies[1] = cookie;
-	/* move on to the next TX which can carry cookie */
-	ksocknal_next_tx_carrier(conn);
-
-	return 1;
-}
-
-static struct ksock_tx *
-ksocknal_queue_tx_msg_v2(struct ksock_conn *conn, struct ksock_tx *tx_msg)
-{
-	struct ksock_tx *tx  = conn->ksnc_tx_carrier;
-
-	/*
-	 * Enqueue tx_msg:
-	 * . If there is no NOOP on the connection, just enqueue
-	 *   tx_msg and return NULL
-	 * . If there is NOOP on the connection, piggyback the cookie
-	 *   and replace the NOOP tx, and return the NOOP tx.
-	 */
-	if (!tx) { /* nothing on queue */
-		list_add_tail(&tx_msg->tx_list, &conn->ksnc_tx_queue);
-		conn->ksnc_tx_carrier = tx_msg;
-		return NULL;
-	}
-
-	if (tx->tx_msg.ksm_type == KSOCK_MSG_LNET) { /* nothing to carry */
-		list_add_tail(&tx_msg->tx_list, &conn->ksnc_tx_queue);
-		return NULL;
-	}
-
-	LASSERT(tx->tx_msg.ksm_type == KSOCK_MSG_NOOP);
-
-	/* There is a noop zc-ack can be piggybacked */
-	tx_msg->tx_msg.ksm_zc_cookies[1] = tx->tx_msg.ksm_zc_cookies[1];
-	ksocknal_next_tx_carrier(conn);
-
-	/* use new_tx to replace the noop zc-ack packet */
-	list_add(&tx_msg->tx_list, &tx->tx_list);
-	list_del(&tx->tx_list);
-
-	return tx;
-}
-
-static int
-ksocknal_queue_tx_zcack_v3(struct ksock_conn *conn,
-			   struct ksock_tx *tx_ack, __u64 cookie)
-{
-	struct ksock_tx *tx;
-
-	if (conn->ksnc_type != SOCKLND_CONN_ACK)
-		return ksocknal_queue_tx_zcack_v2(conn, tx_ack, cookie);
-
-	/* non-blocking ZC-ACK (to router) */
-	LASSERT(!tx_ack ||
-		tx_ack->tx_msg.ksm_type == KSOCK_MSG_NOOP);
-
-	tx = conn->ksnc_tx_carrier;
-	if (!tx) {
-		if (tx_ack) {
-			list_add_tail(&tx_ack->tx_list,
-				      &conn->ksnc_tx_queue);
-			conn->ksnc_tx_carrier = tx_ack;
-		}
-		return 0;
-	}
-
-	/* conn->ksnc_tx_carrier */
-
-	if (tx_ack)
-		cookie = tx_ack->tx_msg.ksm_zc_cookies[1];
-
-	if (cookie == SOCKNAL_KEEPALIVE_PING) /* ignore keepalive PING */
-		return 1;
-
-	if (tx->tx_msg.ksm_zc_cookies[1] == SOCKNAL_KEEPALIVE_PING) {
-		/* replace the keepalive PING with a real ACK */
-		LASSERT(!tx->tx_msg.ksm_zc_cookies[0]);
-		tx->tx_msg.ksm_zc_cookies[1] = cookie;
-		return 1;
-	}
-
-	if (cookie == tx->tx_msg.ksm_zc_cookies[0] ||
-	    cookie == tx->tx_msg.ksm_zc_cookies[1]) {
-		CWARN("%s: duplicated ZC cookie: %llu\n",
-		      libcfs_id2str(conn->ksnc_peer->ksnp_id), cookie);
-		return 1; /* XXX return error in the future */
-	}
-
-	if (!tx->tx_msg.ksm_zc_cookies[0]) {
-		/*
-		 * NOOP tx has only one ZC-ACK cookie,
-		 * can carry at least one more
-		 */
-		if (tx->tx_msg.ksm_zc_cookies[1] > cookie) {
-			tx->tx_msg.ksm_zc_cookies[0] = tx->tx_msg.ksm_zc_cookies[1];
-			tx->tx_msg.ksm_zc_cookies[1] = cookie;
-		} else {
-			tx->tx_msg.ksm_zc_cookies[0] = cookie;
-		}
-
-		if (tx->tx_msg.ksm_zc_cookies[0] - tx->tx_msg.ksm_zc_cookies[1] > 2) {
-			/*
-			 * not likely to carry more ACKs, skip it
-			 * to simplify logic
-			 */
-			ksocknal_next_tx_carrier(conn);
-		}
-
-		return 1;
-	}
-
-	/* takes two or more cookies already */
-
-	if (tx->tx_msg.ksm_zc_cookies[0] > tx->tx_msg.ksm_zc_cookies[1]) {
-		__u64   tmp = 0;
-
-		/* two separated cookies: (a+2, a) or (a+1, a) */
-		LASSERT(tx->tx_msg.ksm_zc_cookies[0] -
-			 tx->tx_msg.ksm_zc_cookies[1] <= 2);
-
-		if (tx->tx_msg.ksm_zc_cookies[0] -
-		    tx->tx_msg.ksm_zc_cookies[1] == 2) {
-			if (cookie == tx->tx_msg.ksm_zc_cookies[1] + 1)
-				tmp = cookie;
-		} else if (cookie == tx->tx_msg.ksm_zc_cookies[1] - 1) {
-			tmp = tx->tx_msg.ksm_zc_cookies[1];
-		} else if (cookie == tx->tx_msg.ksm_zc_cookies[0] + 1) {
-			tmp = tx->tx_msg.ksm_zc_cookies[0];
-		}
-
-		if (tmp) {
-			/* range of cookies */
-			tx->tx_msg.ksm_zc_cookies[0] = tmp - 1;
-			tx->tx_msg.ksm_zc_cookies[1] = tmp + 1;
-			return 1;
-		}
-
-	} else {
-		/*
-		 * ksm_zc_cookies[0] < ksm_zc_cookies[1],
-		 * it is range of cookies
-		 */
-		if (cookie >= tx->tx_msg.ksm_zc_cookies[0] &&
-		    cookie <= tx->tx_msg.ksm_zc_cookies[1]) {
-			CWARN("%s: duplicated ZC cookie: %llu\n",
-			      libcfs_id2str(conn->ksnc_peer->ksnp_id), cookie);
-			return 1; /* XXX: return error in the future */
-		}
-
-		if (cookie == tx->tx_msg.ksm_zc_cookies[1] + 1) {
-			tx->tx_msg.ksm_zc_cookies[1] = cookie;
-			return 1;
-		}
-
-		if (cookie == tx->tx_msg.ksm_zc_cookies[0] - 1) {
-			tx->tx_msg.ksm_zc_cookies[0] = cookie;
-			return 1;
-		}
-	}
-
-	/* failed to piggyback ZC-ACK */
-	if (tx_ack) {
-		list_add_tail(&tx_ack->tx_list, &conn->ksnc_tx_queue);
-		/* the next tx can piggyback at least 1 ACK */
-		ksocknal_next_tx_carrier(conn);
-	}
-
-	return 0;
-}
-
-static int
-ksocknal_match_tx(struct ksock_conn *conn, struct ksock_tx *tx, int nonblk)
-{
-	int nob;
-
-#if SOCKNAL_VERSION_DEBUG
-	if (!*ksocknal_tunables.ksnd_typed_conns)
-		return SOCKNAL_MATCH_YES;
-#endif
-
-	if (!tx || !tx->tx_lnetmsg) {
-		/* noop packet */
-		nob = offsetof(struct ksock_msg, ksm_u);
-	} else {
-		nob = tx->tx_lnetmsg->msg_len +
-		      ((conn->ksnc_proto == &ksocknal_protocol_v1x) ?
-		       sizeof(struct lnet_hdr) : sizeof(struct ksock_msg));
-	}
-
-	/* default checking for typed connection */
-	switch (conn->ksnc_type) {
-	default:
-		CERROR("ksnc_type bad: %u\n", conn->ksnc_type);
-		LBUG();
-	case SOCKLND_CONN_ANY:
-		return SOCKNAL_MATCH_YES;
-
-	case SOCKLND_CONN_BULK_IN:
-		return SOCKNAL_MATCH_MAY;
-
-	case SOCKLND_CONN_BULK_OUT:
-		if (nob < *ksocknal_tunables.ksnd_min_bulk)
-			return SOCKNAL_MATCH_MAY;
-		else
-			return SOCKNAL_MATCH_YES;
-
-	case SOCKLND_CONN_CONTROL:
-		if (nob >= *ksocknal_tunables.ksnd_min_bulk)
-			return SOCKNAL_MATCH_MAY;
-		else
-			return SOCKNAL_MATCH_YES;
-	}
-}
-
-static int
-ksocknal_match_tx_v3(struct ksock_conn *conn, struct ksock_tx *tx, int nonblk)
-{
-	int nob;
-
-	if (!tx || !tx->tx_lnetmsg)
-		nob = offsetof(struct ksock_msg, ksm_u);
-	else
-		nob = tx->tx_lnetmsg->msg_len + sizeof(struct ksock_msg);
-
-	switch (conn->ksnc_type) {
-	default:
-		CERROR("ksnc_type bad: %u\n", conn->ksnc_type);
-		LBUG();
-	case SOCKLND_CONN_ANY:
-		return SOCKNAL_MATCH_NO;
-
-	case SOCKLND_CONN_ACK:
-		if (nonblk)
-			return SOCKNAL_MATCH_YES;
-		else if (!tx || !tx->tx_lnetmsg)
-			return SOCKNAL_MATCH_MAY;
-		else
-			return SOCKNAL_MATCH_NO;
-
-	case SOCKLND_CONN_BULK_OUT:
-		if (nonblk)
-			return SOCKNAL_MATCH_NO;
-		else if (nob < *ksocknal_tunables.ksnd_min_bulk)
-			return SOCKNAL_MATCH_MAY;
-		else
-			return SOCKNAL_MATCH_YES;
-
-	case SOCKLND_CONN_CONTROL:
-		if (nonblk)
-			return SOCKNAL_MATCH_NO;
-		else if (nob >= *ksocknal_tunables.ksnd_min_bulk)
-			return SOCKNAL_MATCH_MAY;
-		else
-			return SOCKNAL_MATCH_YES;
-	}
-}
-
-/* (Sink) handle incoming ZC request from sender */
-static int
-ksocknal_handle_zcreq(struct ksock_conn *c, __u64 cookie, int remote)
-{
-	struct ksock_peer *peer = c->ksnc_peer;
-	struct ksock_conn *conn;
-	struct ksock_tx *tx;
-	int rc;
-
-	read_lock(&ksocknal_data.ksnd_global_lock);
-
-	conn = ksocknal_find_conn_locked(peer, NULL, !!remote);
-	if (conn) {
-		struct ksock_sched *sched = conn->ksnc_scheduler;
-
-		LASSERT(conn->ksnc_proto->pro_queue_tx_zcack);
-
-		spin_lock_bh(&sched->kss_lock);
-
-		rc = conn->ksnc_proto->pro_queue_tx_zcack(conn, NULL, cookie);
-
-		spin_unlock_bh(&sched->kss_lock);
-
-		if (rc) { /* piggybacked */
-			read_unlock(&ksocknal_data.ksnd_global_lock);
-			return 0;
-		}
-	}
-
-	read_unlock(&ksocknal_data.ksnd_global_lock);
-
-	/* ACK connection is not ready, or can't piggyback the ACK */
-	tx = ksocknal_alloc_tx_noop(cookie, !!remote);
-	if (!tx)
-		return -ENOMEM;
-
-	rc = ksocknal_launch_packet(peer->ksnp_ni, tx, peer->ksnp_id);
-	if (!rc)
-		return 0;
-
-	ksocknal_free_tx(tx);
-	return rc;
-}
-
-/* (Sender) handle ZC_ACK from sink */
-static int
-ksocknal_handle_zcack(struct ksock_conn *conn, __u64 cookie1, __u64 cookie2)
-{
-	struct ksock_peer *peer = conn->ksnc_peer;
-	struct ksock_tx *tx;
-	struct ksock_tx *temp;
-	struct ksock_tx *tmp;
-	LIST_HEAD(zlist);
-	int count;
-
-	if (!cookie1)
-		cookie1 = cookie2;
-
-	count = (cookie1 > cookie2) ? 2 : (cookie2 - cookie1 + 1);
-
-	if (cookie2 == SOCKNAL_KEEPALIVE_PING &&
-	    conn->ksnc_proto == &ksocknal_protocol_v3x) {
-		/* keepalive PING for V3.x, just ignore it */
-		return count == 1 ? 0 : -EPROTO;
-	}
-
-	spin_lock(&peer->ksnp_lock);
-
-	list_for_each_entry_safe(tx, tmp, &peer->ksnp_zc_req_list,
-				 tx_zc_list) {
-		__u64 c = tx->tx_msg.ksm_zc_cookies[0];
-
-		if (c == cookie1 || c == cookie2 ||
-		    (cookie1 < c && c < cookie2)) {
-			tx->tx_msg.ksm_zc_cookies[0] = 0;
-			list_del(&tx->tx_zc_list);
-			list_add(&tx->tx_zc_list, &zlist);
-
-			if (!--count)
-				break;
-		}
-	}
-
-	spin_unlock(&peer->ksnp_lock);
-
-	list_for_each_entry_safe(tx, temp, &zlist, tx_zc_list) {
-		list_del(&tx->tx_zc_list);
-		ksocknal_tx_decref(tx);
-	}
-
-	return !count ? 0 : -EPROTO;
-}
-
-static int
-ksocknal_send_hello_v1(struct ksock_conn *conn, struct ksock_hello_msg *hello)
-{
-	struct socket *sock = conn->ksnc_sock;
-	struct lnet_hdr *hdr;
-	struct lnet_magicversion *hmv;
-	int rc;
-	int i;
-
-	BUILD_BUG_ON(sizeof(struct lnet_magicversion) != offsetof(struct lnet_hdr, src_nid));
-
-	hdr = kzalloc(sizeof(*hdr), GFP_NOFS);
-	if (!hdr) {
-		CERROR("Can't allocate struct lnet_hdr\n");
-		return -ENOMEM;
-	}
-
-	hmv = (struct lnet_magicversion *)&hdr->dest_nid;
-
-	/*
-	 * Re-organize V2.x message header to V1.x (struct lnet_hdr)
-	 * header and send out
-	 */
-	hmv->magic         = cpu_to_le32(LNET_PROTO_TCP_MAGIC);
-	hmv->version_major = cpu_to_le16(KSOCK_PROTO_V1_MAJOR);
-	hmv->version_minor = cpu_to_le16(KSOCK_PROTO_V1_MINOR);
-
-	if (the_lnet.ln_testprotocompat) {
-		/* single-shot proto check */
-		LNET_LOCK();
-		if (the_lnet.ln_testprotocompat & 1) {
-			hmv->version_major++;   /* just different! */
-			the_lnet.ln_testprotocompat &= ~1;
-		}
-		if (the_lnet.ln_testprotocompat & 2) {
-			hmv->magic = LNET_PROTO_MAGIC;
-			the_lnet.ln_testprotocompat &= ~2;
-		}
-		LNET_UNLOCK();
-	}
-
-	hdr->src_nid = cpu_to_le64(hello->kshm_src_nid);
-	hdr->src_pid = cpu_to_le32(hello->kshm_src_pid);
-	hdr->type = cpu_to_le32(LNET_MSG_HELLO);
-	hdr->payload_length = cpu_to_le32(hello->kshm_nips * sizeof(__u32));
-	hdr->msg.hello.type = cpu_to_le32(hello->kshm_ctype);
-	hdr->msg.hello.incarnation = cpu_to_le64(hello->kshm_src_incarnation);
-
-	rc = lnet_sock_write(sock, hdr, sizeof(*hdr), lnet_acceptor_timeout());
-	if (rc) {
-		CNETERR("Error %d sending HELLO hdr to %pI4h/%d\n",
-			rc, &conn->ksnc_ipaddr, conn->ksnc_port);
-		goto out;
-	}
-
-	if (!hello->kshm_nips)
-		goto out;
-
-	for (i = 0; i < (int)hello->kshm_nips; i++)
-		hello->kshm_ips[i] = __cpu_to_le32(hello->kshm_ips[i]);
-
-	rc = lnet_sock_write(sock, hello->kshm_ips,
-			     hello->kshm_nips * sizeof(__u32),
-			     lnet_acceptor_timeout());
-	if (rc) {
-		CNETERR("Error %d sending HELLO payload (%d) to %pI4h/%d\n",
-			rc, hello->kshm_nips,
-			&conn->ksnc_ipaddr, conn->ksnc_port);
-	}
-out:
-	kfree(hdr);
-
-	return rc;
-}
-
-static int
-ksocknal_send_hello_v2(struct ksock_conn *conn, struct ksock_hello_msg *hello)
-{
-	struct socket *sock = conn->ksnc_sock;
-	int rc;
-
-	hello->kshm_magic   = LNET_PROTO_MAGIC;
-	hello->kshm_version = conn->ksnc_proto->pro_version;
-
-	if (the_lnet.ln_testprotocompat) {
-		/* single-shot proto check */
-		LNET_LOCK();
-		if (the_lnet.ln_testprotocompat & 1) {
-			hello->kshm_version++;   /* just different! */
-			the_lnet.ln_testprotocompat &= ~1;
-		}
-		LNET_UNLOCK();
-	}
-
-	rc = lnet_sock_write(sock, hello, offsetof(struct ksock_hello_msg, kshm_ips),
-			     lnet_acceptor_timeout());
-	if (rc) {
-		CNETERR("Error %d sending HELLO hdr to %pI4h/%d\n",
-			rc, &conn->ksnc_ipaddr, conn->ksnc_port);
-		return rc;
-	}
-
-	if (!hello->kshm_nips)
-		return 0;
-
-	rc = lnet_sock_write(sock, hello->kshm_ips,
-			     hello->kshm_nips * sizeof(__u32),
-			     lnet_acceptor_timeout());
-	if (rc) {
-		CNETERR("Error %d sending HELLO payload (%d) to %pI4h/%d\n",
-			rc, hello->kshm_nips,
-			&conn->ksnc_ipaddr, conn->ksnc_port);
-	}
-
-	return rc;
-}
-
-static int
-ksocknal_recv_hello_v1(struct ksock_conn *conn, struct ksock_hello_msg *hello,
-		       int timeout)
-{
-	struct socket *sock = conn->ksnc_sock;
-	struct lnet_hdr *hdr;
-	int rc;
-	int i;
-
-	hdr = kzalloc(sizeof(*hdr), GFP_NOFS);
-	if (!hdr) {
-		CERROR("Can't allocate struct lnet_hdr\n");
-		return -ENOMEM;
-	}
-
-	rc = lnet_sock_read(sock, &hdr->src_nid,
-			    sizeof(*hdr) - offsetof(struct lnet_hdr, src_nid),
-			    timeout);
-	if (rc) {
-		CERROR("Error %d reading rest of HELLO hdr from %pI4h\n",
-		       rc, &conn->ksnc_ipaddr);
-		LASSERT(rc < 0 && rc != -EALREADY);
-		goto out;
-	}
-
-	/* ...and check we got what we expected */
-	if (hdr->type != cpu_to_le32(LNET_MSG_HELLO)) {
-		CERROR("Expecting a HELLO hdr, but got type %d from %pI4h\n",
-		       le32_to_cpu(hdr->type),
-		       &conn->ksnc_ipaddr);
-		rc = -EPROTO;
-		goto out;
-	}
-
-	hello->kshm_src_nid         = le64_to_cpu(hdr->src_nid);
-	hello->kshm_src_pid         = le32_to_cpu(hdr->src_pid);
-	hello->kshm_src_incarnation = le64_to_cpu(hdr->msg.hello.incarnation);
-	hello->kshm_ctype           = le32_to_cpu(hdr->msg.hello.type);
-	hello->kshm_nips            = le32_to_cpu(hdr->payload_length) /
-						  sizeof(__u32);
-
-	if (hello->kshm_nips > LNET_MAX_INTERFACES) {
-		CERROR("Bad nips %d from ip %pI4h\n",
-		       hello->kshm_nips, &conn->ksnc_ipaddr);
-		rc = -EPROTO;
-		goto out;
-	}
-
-	if (!hello->kshm_nips)
-		goto out;
-
-	rc = lnet_sock_read(sock, hello->kshm_ips,
-			    hello->kshm_nips * sizeof(__u32), timeout);
-	if (rc) {
-		CERROR("Error %d reading IPs from ip %pI4h\n",
-		       rc, &conn->ksnc_ipaddr);
-		LASSERT(rc < 0 && rc != -EALREADY);
-		goto out;
-	}
-
-	for (i = 0; i < (int)hello->kshm_nips; i++) {
-		hello->kshm_ips[i] = __le32_to_cpu(hello->kshm_ips[i]);
-
-		if (!hello->kshm_ips[i]) {
-			CERROR("Zero IP[%d] from ip %pI4h\n",
-			       i, &conn->ksnc_ipaddr);
-			rc = -EPROTO;
-			break;
-		}
-	}
-out:
-	kfree(hdr);
-
-	return rc;
-}
-
-static int
-ksocknal_recv_hello_v2(struct ksock_conn *conn, struct ksock_hello_msg *hello,
-		       int timeout)
-{
-	struct socket *sock = conn->ksnc_sock;
-	int rc;
-	int i;
-
-	if (hello->kshm_magic == LNET_PROTO_MAGIC)
-		conn->ksnc_flip = 0;
-	else
-		conn->ksnc_flip = 1;
-
-	rc = lnet_sock_read(sock, &hello->kshm_src_nid,
-			    offsetof(struct ksock_hello_msg, kshm_ips) -
-				     offsetof(struct ksock_hello_msg, kshm_src_nid),
-			    timeout);
-	if (rc) {
-		CERROR("Error %d reading HELLO from %pI4h\n",
-		       rc, &conn->ksnc_ipaddr);
-		LASSERT(rc < 0 && rc != -EALREADY);
-		return rc;
-	}
-
-	if (conn->ksnc_flip) {
-		__swab32s(&hello->kshm_src_pid);
-		__swab64s(&hello->kshm_src_nid);
-		__swab32s(&hello->kshm_dst_pid);
-		__swab64s(&hello->kshm_dst_nid);
-		__swab64s(&hello->kshm_src_incarnation);
-		__swab64s(&hello->kshm_dst_incarnation);
-		__swab32s(&hello->kshm_ctype);
-		__swab32s(&hello->kshm_nips);
-	}
-
-	if (hello->kshm_nips > LNET_MAX_INTERFACES) {
-		CERROR("Bad nips %d from ip %pI4h\n",
-		       hello->kshm_nips, &conn->ksnc_ipaddr);
-		return -EPROTO;
-	}
-
-	if (!hello->kshm_nips)
-		return 0;
-
-	rc = lnet_sock_read(sock, hello->kshm_ips,
-			    hello->kshm_nips * sizeof(__u32), timeout);
-	if (rc) {
-		CERROR("Error %d reading IPs from ip %pI4h\n",
-		       rc, &conn->ksnc_ipaddr);
-		LASSERT(rc < 0 && rc != -EALREADY);
-		return rc;
-	}
-
-	for (i = 0; i < (int)hello->kshm_nips; i++) {
-		if (conn->ksnc_flip)
-			__swab32s(&hello->kshm_ips[i]);
-
-		if (!hello->kshm_ips[i]) {
-			CERROR("Zero IP[%d] from ip %pI4h\n",
-			       i, &conn->ksnc_ipaddr);
-			return -EPROTO;
-		}
-	}
-
-	return 0;
-}
-
-static void
-ksocknal_pack_msg_v1(struct ksock_tx *tx)
-{
-	/* V1.x has no KSOCK_MSG_NOOP */
-	LASSERT(tx->tx_msg.ksm_type != KSOCK_MSG_NOOP);
-	LASSERT(tx->tx_lnetmsg);
-
-	tx->tx_iov[0].iov_base = &tx->tx_lnetmsg->msg_hdr;
-	tx->tx_iov[0].iov_len  = sizeof(struct lnet_hdr);
-
-	tx->tx_nob = tx->tx_lnetmsg->msg_len + sizeof(struct lnet_hdr);
-	tx->tx_resid = tx->tx_lnetmsg->msg_len + sizeof(struct lnet_hdr);
-}
-
-static void
-ksocknal_pack_msg_v2(struct ksock_tx *tx)
-{
-	tx->tx_iov[0].iov_base = &tx->tx_msg;
-
-	if (tx->tx_lnetmsg) {
-		LASSERT(tx->tx_msg.ksm_type != KSOCK_MSG_NOOP);
-
-		tx->tx_msg.ksm_u.lnetmsg.ksnm_hdr = tx->tx_lnetmsg->msg_hdr;
-		tx->tx_iov[0].iov_len = sizeof(struct ksock_msg);
-		tx->tx_nob = sizeof(struct ksock_msg) + tx->tx_lnetmsg->msg_len;
-		tx->tx_resid = sizeof(struct ksock_msg) + tx->tx_lnetmsg->msg_len;
-	} else {
-		LASSERT(tx->tx_msg.ksm_type == KSOCK_MSG_NOOP);
-
-		tx->tx_iov[0].iov_len = offsetof(struct ksock_msg, ksm_u.lnetmsg.ksnm_hdr);
-		tx->tx_nob = offsetof(struct ksock_msg,  ksm_u.lnetmsg.ksnm_hdr);
-		tx->tx_resid = offsetof(struct ksock_msg,  ksm_u.lnetmsg.ksnm_hdr);
-	}
-	/*
-	 * Don't checksum before start sending, because packet can be
-	 * piggybacked with ACK
-	 */
-}
-
-static void
-ksocknal_unpack_msg_v1(struct ksock_msg *msg)
-{
-	msg->ksm_csum = 0;
-	msg->ksm_type = KSOCK_MSG_LNET;
-	msg->ksm_zc_cookies[0] = 0;
-	msg->ksm_zc_cookies[1] = 0;
-}
-
-static void
-ksocknal_unpack_msg_v2(struct ksock_msg *msg)
-{
-	return;  /* Do nothing */
-}
-
-struct ksock_proto ksocknal_protocol_v1x = {
-	.pro_version        = KSOCK_PROTO_V1,
-	.pro_send_hello     = ksocknal_send_hello_v1,
-	.pro_recv_hello     = ksocknal_recv_hello_v1,
-	.pro_pack           = ksocknal_pack_msg_v1,
-	.pro_unpack         = ksocknal_unpack_msg_v1,
-	.pro_queue_tx_msg   = ksocknal_queue_tx_msg_v1,
-	.pro_handle_zcreq   = NULL,
-	.pro_handle_zcack   = NULL,
-	.pro_queue_tx_zcack = NULL,
-	.pro_match_tx       = ksocknal_match_tx
-};
-
-struct ksock_proto ksocknal_protocol_v2x = {
-	.pro_version        = KSOCK_PROTO_V2,
-	.pro_send_hello     = ksocknal_send_hello_v2,
-	.pro_recv_hello     = ksocknal_recv_hello_v2,
-	.pro_pack           = ksocknal_pack_msg_v2,
-	.pro_unpack         = ksocknal_unpack_msg_v2,
-	.pro_queue_tx_msg   = ksocknal_queue_tx_msg_v2,
-	.pro_queue_tx_zcack = ksocknal_queue_tx_zcack_v2,
-	.pro_handle_zcreq   = ksocknal_handle_zcreq,
-	.pro_handle_zcack   = ksocknal_handle_zcack,
-	.pro_match_tx       = ksocknal_match_tx
-};
-
-struct ksock_proto ksocknal_protocol_v3x = {
-	.pro_version        = KSOCK_PROTO_V3,
-	.pro_send_hello     = ksocknal_send_hello_v2,
-	.pro_recv_hello     = ksocknal_recv_hello_v2,
-	.pro_pack           = ksocknal_pack_msg_v2,
-	.pro_unpack         = ksocknal_unpack_msg_v2,
-	.pro_queue_tx_msg   = ksocknal_queue_tx_msg_v2,
-	.pro_queue_tx_zcack = ksocknal_queue_tx_zcack_v3,
-	.pro_handle_zcreq   = ksocknal_handle_zcreq,
-	.pro_handle_zcack   = ksocknal_handle_zcack,
-	.pro_match_tx       = ksocknal_match_tx_v3
-};

+ 0 - 16
drivers/staging/lustre/lnet/libcfs/Makefile

@@ -1,16 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/include
-subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/lustre/include
-
-obj-$(CONFIG_LNET) += libcfs.o
-
-libcfs-obj-y += linux-tracefile.o linux-debug.o
-libcfs-obj-y += linux-crypto.o
-libcfs-obj-y += linux-crypto-adler.o
-
-libcfs-obj-y += debug.o fail.o module.o tracefile.o
-libcfs-obj-y += libcfs_string.o hash.o
-libcfs-obj-$(CONFIG_SMP) += libcfs_cpu.o
-libcfs-obj-y += libcfs_mem.o libcfs_lock.o
-
-libcfs-objs := $(libcfs-obj-y)

+ 0 - 461
drivers/staging/lustre/lnet/libcfs/debug.c

@@ -1,461 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * libcfs/libcfs/debug.c
- *
- * Author: Phil Schwan <phil@clusterfs.com>
- *
- */
-
-# define DEBUG_SUBSYSTEM S_LNET
-
-#include <linux/module.h>
-#include <linux/ctype.h>
-#include <linux/libcfs/libcfs_string.h>
-#include <linux/kthread.h>
-#include "tracefile.h"
-
-static char debug_file_name[1024];
-
-unsigned int libcfs_subsystem_debug = ~0;
-EXPORT_SYMBOL(libcfs_subsystem_debug);
-module_param(libcfs_subsystem_debug, int, 0644);
-MODULE_PARM_DESC(libcfs_subsystem_debug, "Lustre kernel debug subsystem mask");
-
-unsigned int libcfs_debug = (D_CANTMASK |
-			     D_NETERROR | D_HA | D_CONFIG | D_IOCTL);
-EXPORT_SYMBOL(libcfs_debug);
-module_param(libcfs_debug, int, 0644);
-MODULE_PARM_DESC(libcfs_debug, "Lustre kernel debug mask");
-
-static int libcfs_param_debug_mb_set(const char *val,
-				     const struct kernel_param *kp)
-{
-	int rc;
-	unsigned int num;
-
-	rc = kstrtouint(val, 0, &num);
-	if (rc < 0)
-		return rc;
-
-	if (!*((unsigned int *)kp->arg)) {
-		*((unsigned int *)kp->arg) = num;
-		return 0;
-	}
-
-	rc = cfs_trace_set_debug_mb(num);
-
-	if (!rc)
-		*((unsigned int *)kp->arg) = cfs_trace_get_debug_mb();
-
-	return rc;
-}
-
-/* While debug_mb setting look like unsigned int, in fact
- * it needs quite a bunch of extra processing, so we define special
- * debugmb parameter type with corresponding methods to handle this case
- */
-static const struct kernel_param_ops param_ops_debugmb = {
-	.set = libcfs_param_debug_mb_set,
-	.get = param_get_uint,
-};
-
-#define param_check_debugmb(name, p) \
-		__param_check(name, p, unsigned int)
-
-static unsigned int libcfs_debug_mb;
-module_param(libcfs_debug_mb, debugmb, 0644);
-MODULE_PARM_DESC(libcfs_debug_mb, "Total debug buffer size.");
-
-unsigned int libcfs_printk = D_CANTMASK;
-module_param(libcfs_printk, uint, 0644);
-MODULE_PARM_DESC(libcfs_printk, "Lustre kernel debug console mask");
-
-unsigned int libcfs_console_ratelimit = 1;
-module_param(libcfs_console_ratelimit, uint, 0644);
-MODULE_PARM_DESC(libcfs_console_ratelimit, "Lustre kernel debug console ratelimit (0 to disable)");
-
-static int param_set_delay_minmax(const char *val,
-				  const struct kernel_param *kp,
-				  long min, long max)
-{
-	long d;
-	int sec;
-	int rc;
-
-	rc = kstrtoint(val, 0, &sec);
-	if (rc)
-		return -EINVAL;
-
-	d = sec * HZ / 100;
-	if (d < min || d > max)
-		return -EINVAL;
-
-	*((unsigned int *)kp->arg) = d;
-
-	return 0;
-}
-
-static int param_get_delay(char *buffer, const struct kernel_param *kp)
-{
-	unsigned int d = *(unsigned int *)kp->arg;
-
-	return sprintf(buffer, "%u", (unsigned int)(d * 100) / HZ);
-}
-
-unsigned int libcfs_console_max_delay;
-unsigned int libcfs_console_min_delay;
-
-static int param_set_console_max_delay(const char *val,
-				       const struct kernel_param *kp)
-{
-	return param_set_delay_minmax(val, kp,
-				      libcfs_console_min_delay, INT_MAX);
-}
-
-static const struct kernel_param_ops param_ops_console_max_delay = {
-	.set = param_set_console_max_delay,
-	.get = param_get_delay,
-};
-
-#define param_check_console_max_delay(name, p) \
-		__param_check(name, p, unsigned int)
-
-module_param(libcfs_console_max_delay, console_max_delay, 0644);
-MODULE_PARM_DESC(libcfs_console_max_delay, "Lustre kernel debug console max delay (jiffies)");
-
-static int param_set_console_min_delay(const char *val,
-				       const struct kernel_param *kp)
-{
-	return param_set_delay_minmax(val, kp,
-				      1, libcfs_console_max_delay);
-}
-
-static const struct kernel_param_ops param_ops_console_min_delay = {
-	.set = param_set_console_min_delay,
-	.get = param_get_delay,
-};
-
-#define param_check_console_min_delay(name, p) \
-		__param_check(name, p, unsigned int)
-
-module_param(libcfs_console_min_delay, console_min_delay, 0644);
-MODULE_PARM_DESC(libcfs_console_min_delay, "Lustre kernel debug console min delay (jiffies)");
-
-static int param_set_uint_minmax(const char *val,
-				 const struct kernel_param *kp,
-				 unsigned int min, unsigned int max)
-{
-	unsigned int num;
-	int ret;
-
-	if (!val)
-		return -EINVAL;
-	ret = kstrtouint(val, 0, &num);
-	if (ret < 0 || num < min || num > max)
-		return -EINVAL;
-	*((unsigned int *)kp->arg) = num;
-	return 0;
-}
-
-static int param_set_uintpos(const char *val, const struct kernel_param *kp)
-{
-	return param_set_uint_minmax(val, kp, 1, -1);
-}
-
-static const struct kernel_param_ops param_ops_uintpos = {
-	.set = param_set_uintpos,
-	.get = param_get_uint,
-};
-
-#define param_check_uintpos(name, p) \
-		__param_check(name, p, unsigned int)
-
-unsigned int libcfs_console_backoff = CDEBUG_DEFAULT_BACKOFF;
-module_param(libcfs_console_backoff, uintpos, 0644);
-MODULE_PARM_DESC(libcfs_console_backoff, "Lustre kernel debug console backoff factor");
-
-unsigned int libcfs_debug_binary = 1;
-
-unsigned int libcfs_stack = 3 * THREAD_SIZE / 4;
-EXPORT_SYMBOL(libcfs_stack);
-
-unsigned int libcfs_catastrophe;
-EXPORT_SYMBOL(libcfs_catastrophe);
-
-unsigned int libcfs_panic_on_lbug = 1;
-module_param(libcfs_panic_on_lbug, uint, 0644);
-MODULE_PARM_DESC(libcfs_panic_on_lbug, "Lustre kernel panic on LBUG");
-
-static wait_queue_head_t debug_ctlwq;
-
-char libcfs_debug_file_path_arr[PATH_MAX] = LIBCFS_DEBUG_FILE_PATH_DEFAULT;
-
-/* We need to pass a pointer here, but elsewhere this must be a const */
-static char *libcfs_debug_file_path;
-module_param(libcfs_debug_file_path, charp, 0644);
-MODULE_PARM_DESC(libcfs_debug_file_path,
-		 "Path for dumping debug logs, set 'NONE' to prevent log dumping");
-
-int libcfs_panic_in_progress;
-
-/* libcfs_debug_token2mask() expects the returned string in lower-case */
-static const char *
-libcfs_debug_subsys2str(int subsys)
-{
-	static const char * const libcfs_debug_subsystems[] =
-		LIBCFS_DEBUG_SUBSYS_NAMES;
-
-	if (subsys >= ARRAY_SIZE(libcfs_debug_subsystems))
-		return NULL;
-
-	return libcfs_debug_subsystems[subsys];
-}
-
-/* libcfs_debug_token2mask() expects the returned string in lower-case */
-static const char *
-libcfs_debug_dbg2str(int debug)
-{
-	static const char * const libcfs_debug_masks[] =
-		LIBCFS_DEBUG_MASKS_NAMES;
-
-	if (debug >= ARRAY_SIZE(libcfs_debug_masks))
-		return NULL;
-
-	return libcfs_debug_masks[debug];
-}
-
-int
-libcfs_debug_mask2str(char *str, int size, int mask, int is_subsys)
-{
-	const char *(*fn)(int bit) = is_subsys ? libcfs_debug_subsys2str :
-						 libcfs_debug_dbg2str;
-	int len = 0;
-	const char *token;
-	int i;
-
-	if (!mask) {			/* "0" */
-		if (size > 0)
-			str[0] = '0';
-		len = 1;
-	} else {				/* space-separated tokens */
-		for (i = 0; i < 32; i++) {
-			if (!(mask & (1 << i)))
-				continue;
-
-			token = fn(i);
-			if (!token)	      /* unused bit */
-				continue;
-
-			if (len > 0) {		  /* separator? */
-				if (len < size)
-					str[len] = ' ';
-				len++;
-			}
-
-			while (*token) {
-				if (len < size)
-					str[len] = *token;
-				token++;
-				len++;
-			}
-		}
-	}
-
-	/* terminate 'str' */
-	if (len < size)
-		str[len] = 0;
-	else
-		str[size - 1] = 0;
-
-	return len;
-}
-
-int
-libcfs_debug_str2mask(int *mask, const char *str, int is_subsys)
-{
-	const char *(*fn)(int bit) = is_subsys ? libcfs_debug_subsys2str :
-						 libcfs_debug_dbg2str;
-	int m = 0;
-	int matched;
-	int n;
-	int t;
-
-	/* Allow a number for backwards compatibility */
-
-	for (n = strlen(str); n > 0; n--)
-		if (!isspace(str[n - 1]))
-			break;
-	matched = n;
-	t = sscanf(str, "%i%n", &m, &matched);
-	if (t >= 1 && matched == n) {
-		/* don't print warning for lctl set_param debug=0 or -1 */
-		if (m && m != -1)
-			CWARN("You are trying to use a numerical value for the mask - this will be deprecated in a future release.\n");
-		*mask = m;
-		return 0;
-	}
-
-	return cfs_str2mask(str, fn, mask, is_subsys ? 0 : D_CANTMASK,
-			    0xffffffff);
-}
-
-/**
- * Dump Lustre log to ::debug_file_path by calling tracefile_dump_all_pages()
- */
-void libcfs_debug_dumplog_internal(void *arg)
-{
-	static time64_t last_dump_time;
-	time64_t current_time;
-	void *journal_info;
-
-	journal_info = current->journal_info;
-	current->journal_info = NULL;
-	current_time = ktime_get_real_seconds();
-
-	if (strncmp(libcfs_debug_file_path_arr, "NONE", 4) &&
-	    current_time > last_dump_time) {
-		last_dump_time = current_time;
-		snprintf(debug_file_name, sizeof(debug_file_name) - 1,
-			 "%s.%lld.%ld", libcfs_debug_file_path_arr,
-			 (s64)current_time, (long)arg);
-		pr_alert("LustreError: dumping log to %s\n", debug_file_name);
-		cfs_tracefile_dump_all_pages(debug_file_name);
-		libcfs_run_debug_log_upcall(debug_file_name);
-	}
-
-	current->journal_info = journal_info;
-}
-
-static int libcfs_debug_dumplog_thread(void *arg)
-{
-	libcfs_debug_dumplog_internal(arg);
-	wake_up(&debug_ctlwq);
-	return 0;
-}
-
-void libcfs_debug_dumplog(void)
-{
-	wait_queue_entry_t wait;
-	struct task_struct *dumper;
-
-	/* we're being careful to ensure that the kernel thread is
-	 * able to set our state to running as it exits before we
-	 * get to schedule()
-	 */
-	init_waitqueue_entry(&wait, current);
-	add_wait_queue(&debug_ctlwq, &wait);
-
-	dumper = kthread_run(libcfs_debug_dumplog_thread,
-			     (void *)(long)current->pid,
-			     "libcfs_debug_dumper");
-	set_current_state(TASK_INTERRUPTIBLE);
-	if (IS_ERR(dumper))
-		pr_err("LustreError: cannot start log dump thread: %ld\n",
-		       PTR_ERR(dumper));
-	else
-		schedule();
-
-	/* be sure to teardown if cfs_create_thread() failed */
-	remove_wait_queue(&debug_ctlwq, &wait);
-	set_current_state(TASK_RUNNING);
-}
-EXPORT_SYMBOL(libcfs_debug_dumplog);
-
-int libcfs_debug_init(unsigned long bufsize)
-{
-	unsigned int max = libcfs_debug_mb;
-	int rc = 0;
-
-	init_waitqueue_head(&debug_ctlwq);
-
-	if (libcfs_console_max_delay <= 0 || /* not set by user or */
-	    libcfs_console_min_delay <= 0 || /* set to invalid values */
-	    libcfs_console_min_delay >= libcfs_console_max_delay) {
-		libcfs_console_max_delay = CDEBUG_DEFAULT_MAX_DELAY;
-		libcfs_console_min_delay = CDEBUG_DEFAULT_MIN_DELAY;
-	}
-
-	if (libcfs_debug_file_path) {
-		strlcpy(libcfs_debug_file_path_arr,
-			libcfs_debug_file_path,
-			sizeof(libcfs_debug_file_path_arr));
-	}
-
-	/* If libcfs_debug_mb is set to an invalid value or uninitialized
-	 * then just make the total buffers smp_num_cpus * TCD_MAX_PAGES
-	 */
-	if (max > cfs_trace_max_debug_mb() || max < num_possible_cpus()) {
-		max = TCD_MAX_PAGES;
-	} else {
-		max = max / num_possible_cpus();
-		max <<= (20 - PAGE_SHIFT);
-	}
-
-	rc = cfs_tracefile_init(max);
-	if (!rc) {
-		libcfs_register_panic_notifier();
-		libcfs_debug_mb = cfs_trace_get_debug_mb();
-	}
-
-	return rc;
-}
-
-int libcfs_debug_cleanup(void)
-{
-	libcfs_unregister_panic_notifier();
-	cfs_tracefile_exit();
-	return 0;
-}
-
-int libcfs_debug_clear_buffer(void)
-{
-	cfs_trace_flush_pages();
-	return 0;
-}
-
-/* Debug markers, although printed by S_LNET should not be marked as such. */
-#undef DEBUG_SUBSYSTEM
-#define DEBUG_SUBSYSTEM S_UNDEFINED
-int libcfs_debug_mark_buffer(const char *text)
-{
-	CDEBUG(D_TRACE,
-	       "***************************************************\n");
-	LCONSOLE(D_WARNING, "DEBUG MARKER: %s\n", text);
-	CDEBUG(D_TRACE,
-	       "***************************************************\n");
-
-	return 0;
-}
-
-#undef DEBUG_SUBSYSTEM
-#define DEBUG_SUBSYSTEM S_LNET

+ 0 - 146
drivers/staging/lustre/lnet/libcfs/fail.c

@@ -1,146 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see http://www.gnu.org/licenses
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Oracle Corporation, Inc.
- */
-
-#include <linux/types.h>
-#include <linux/slab.h>
-#include <linux/module.h>
-#include <linux/libcfs/libcfs.h>
-#include <linux/random.h>
-
-unsigned long cfs_fail_loc;
-EXPORT_SYMBOL(cfs_fail_loc);
-
-unsigned int cfs_fail_val;
-EXPORT_SYMBOL(cfs_fail_val);
-
-int cfs_fail_err;
-EXPORT_SYMBOL(cfs_fail_err);
-
-DECLARE_WAIT_QUEUE_HEAD(cfs_race_waitq);
-EXPORT_SYMBOL(cfs_race_waitq);
-
-int cfs_race_state;
-EXPORT_SYMBOL(cfs_race_state);
-
-int __cfs_fail_check_set(u32 id, u32 value, int set)
-{
-	static atomic_t cfs_fail_count = ATOMIC_INIT(0);
-
-	LASSERT(!(id & CFS_FAIL_ONCE));
-
-	if ((cfs_fail_loc & (CFS_FAILED | CFS_FAIL_ONCE)) ==
-	    (CFS_FAILED | CFS_FAIL_ONCE)) {
-		atomic_set(&cfs_fail_count, 0); /* paranoia */
-		return 0;
-	}
-
-	/* Fail 1/cfs_fail_val times */
-	if (cfs_fail_loc & CFS_FAIL_RAND) {
-		if (cfs_fail_val < 2 || prandom_u32_max(cfs_fail_val) > 0)
-			return 0;
-	}
-
-	/* Skip the first cfs_fail_val, then fail */
-	if (cfs_fail_loc & CFS_FAIL_SKIP) {
-		if (atomic_inc_return(&cfs_fail_count) <= cfs_fail_val)
-			return 0;
-	}
-
-	/* check cfs_fail_val... */
-	if (set == CFS_FAIL_LOC_VALUE) {
-		if (cfs_fail_val != -1 && cfs_fail_val != value)
-			return 0;
-	}
-
-	/* Fail cfs_fail_val times, overridden by FAIL_ONCE */
-	if (cfs_fail_loc & CFS_FAIL_SOME &&
-	    (!(cfs_fail_loc & CFS_FAIL_ONCE) || cfs_fail_val <= 1)) {
-		int count = atomic_inc_return(&cfs_fail_count);
-
-		if (count >= cfs_fail_val) {
-			set_bit(CFS_FAIL_ONCE_BIT, &cfs_fail_loc);
-			atomic_set(&cfs_fail_count, 0);
-			/* we are lost race to increase  */
-			if (count > cfs_fail_val)
-				return 0;
-		}
-	}
-
-	/* Take into account the current call for FAIL_ONCE for ORSET only,
-	 * as RESET is a new fail_loc, it does not change the current call
-	 */
-	if ((set == CFS_FAIL_LOC_ORSET) && (value & CFS_FAIL_ONCE))
-		set_bit(CFS_FAIL_ONCE_BIT, &cfs_fail_loc);
-	/* Lost race to set CFS_FAILED_BIT. */
-	if (test_and_set_bit(CFS_FAILED_BIT, &cfs_fail_loc)) {
-		/* If CFS_FAIL_ONCE is valid, only one process can fail,
-		 * otherwise multi-process can fail at the same time.
-		 */
-		if (cfs_fail_loc & CFS_FAIL_ONCE)
-			return 0;
-	}
-
-	switch (set) {
-	case CFS_FAIL_LOC_NOSET:
-	case CFS_FAIL_LOC_VALUE:
-		break;
-	case CFS_FAIL_LOC_ORSET:
-		cfs_fail_loc |= value & ~(CFS_FAILED | CFS_FAIL_ONCE);
-		break;
-	case CFS_FAIL_LOC_RESET:
-		cfs_fail_loc = value;
-		atomic_set(&cfs_fail_count, 0);
-		break;
-	default:
-		LASSERTF(0, "called with bad set %u\n", set);
-		break;
-	}
-
-	return 1;
-}
-EXPORT_SYMBOL(__cfs_fail_check_set);
-
-int __cfs_fail_timeout_set(u32 id, u32 value, int ms, int set)
-{
-	int ret;
-
-	ret = __cfs_fail_check_set(id, value, set);
-	if (ret && likely(ms > 0)) {
-		CERROR("cfs_fail_timeout id %x sleeping for %dms\n",
-		       id, ms);
-		set_current_state(TASK_UNINTERRUPTIBLE);
-		schedule_timeout(ms * HZ / 1000);
-		CERROR("cfs_fail_timeout id %x awake\n", id);
-	}
-	return ret;
-}
-EXPORT_SYMBOL(__cfs_fail_timeout_set);

+ 0 - 2065
drivers/staging/lustre/lnet/libcfs/hash.c

@@ -1,2065 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * libcfs/libcfs/hash.c
- *
- * Implement a hash class for hash process in lustre system.
- *
- * Author: YuZhangyong <yzy@clusterfs.com>
- *
- * 2008-08-15: Brian Behlendorf <behlendorf1@llnl.gov>
- * - Simplified API and improved documentation
- * - Added per-hash feature flags:
- *   * CFS_HASH_DEBUG additional validation
- *   * CFS_HASH_REHASH dynamic rehashing
- * - Added per-hash statistics
- * - General performance enhancements
- *
- * 2009-07-31: Liang Zhen <zhen.liang@sun.com>
- * - move all stuff to libcfs
- * - don't allow cur_bits != max_bits without setting of CFS_HASH_REHASH
- * - ignore hs_rwlock if without CFS_HASH_REHASH setting
- * - buckets are allocated one by one(instead of contiguous memory),
- *   to avoid unnecessary cacheline conflict
- *
- * 2010-03-01: Liang Zhen <zhen.liang@sun.com>
- * - "bucket" is a group of hlist_head now, user can specify bucket size
- *   by bkt_bits of cfs_hash_create(), all hlist_heads in a bucket share
- *   one lock for reducing memory overhead.
- *
- * - support lockless hash, caller will take care of locks:
- *   avoid lock overhead for hash tables that are already protected
- *   by locking in the caller for another reason
- *
- * - support both spin_lock/rwlock for bucket:
- *   overhead of spinlock contention is lower than read/write
- *   contention of rwlock, so using spinlock to serialize operations on
- *   bucket is more reasonable for those frequently changed hash tables
- *
- * - support one-single lock mode:
- *   one lock to protect all hash operations to avoid overhead of
- *   multiple locks if hash table is always small
- *
- * - removed a lot of unnecessary addref & decref on hash element:
- *   addref & decref are atomic operations in many use-cases which
- *   are expensive.
- *
- * - support non-blocking cfs_hash_add() and cfs_hash_findadd():
- *   some lustre use-cases require these functions to be strictly
- *   non-blocking, we need to schedule required rehash on a different
- *   thread on those cases.
- *
- * - safer rehash on large hash table
- *   In old implementation, rehash function will exclusively lock the
- *   hash table and finish rehash in one batch, it's dangerous on SMP
- *   system because rehash millions of elements could take long time.
- *   New implemented rehash can release lock and relax CPU in middle
- *   of rehash, it's safe for another thread to search/change on the
- *   hash table even it's in rehasing.
- *
- * - support two different refcount modes
- *   . hash table has refcount on element
- *   . hash table doesn't change refcount on adding/removing element
- *
- * - support long name hash table (for param-tree)
- *
- * - fix a bug for cfs_hash_rehash_key:
- *   in old implementation, cfs_hash_rehash_key could screw up the
- *   hash-table because @key is overwritten without any protection.
- *   Now we need user to define hs_keycpy for those rehash enabled
- *   hash tables, cfs_hash_rehash_key will overwrite hash-key
- *   inside lock by calling hs_keycpy.
- *
- * - better hash iteration:
- *   Now we support both locked iteration & lockless iteration of hash
- *   table. Also, user can break the iteration by return 1 in callback.
- */
-#include <linux/seq_file.h>
-#include <linux/log2.h>
-#include <linux/slab.h>
-#include <linux/mm.h>
-#include <linux/libcfs/libcfs_hash.h>
-
-#if CFS_HASH_DEBUG_LEVEL >= CFS_HASH_DEBUG_1
-static unsigned int warn_on_depth = 8;
-module_param(warn_on_depth, uint, 0644);
-MODULE_PARM_DESC(warn_on_depth, "warning when hash depth is high.");
-#endif
-
-struct workqueue_struct *cfs_rehash_wq;
-
-static inline void
-cfs_hash_nl_lock(union cfs_hash_lock *lock, int exclusive) {}
-
-static inline void
-cfs_hash_nl_unlock(union cfs_hash_lock *lock, int exclusive) {}
-
-static inline void
-cfs_hash_spin_lock(union cfs_hash_lock *lock, int exclusive)
-	__acquires(&lock->spin)
-{
-	spin_lock(&lock->spin);
-}
-
-static inline void
-cfs_hash_spin_unlock(union cfs_hash_lock *lock, int exclusive)
-	__releases(&lock->spin)
-{
-	spin_unlock(&lock->spin);
-}
-
-static inline void
-cfs_hash_rw_lock(union cfs_hash_lock *lock, int exclusive)
-	__acquires(&lock->rw)
-{
-	if (!exclusive)
-		read_lock(&lock->rw);
-	else
-		write_lock(&lock->rw);
-}
-
-static inline void
-cfs_hash_rw_unlock(union cfs_hash_lock *lock, int exclusive)
-	__releases(&lock->rw)
-{
-	if (!exclusive)
-		read_unlock(&lock->rw);
-	else
-		write_unlock(&lock->rw);
-}
-
-/** No lock hash */
-static struct cfs_hash_lock_ops cfs_hash_nl_lops = {
-	.hs_lock	= cfs_hash_nl_lock,
-	.hs_unlock	= cfs_hash_nl_unlock,
-	.hs_bkt_lock	= cfs_hash_nl_lock,
-	.hs_bkt_unlock	= cfs_hash_nl_unlock,
-};
-
-/** no bucket lock, one spinlock to protect everything */
-static struct cfs_hash_lock_ops cfs_hash_nbl_lops = {
-	.hs_lock	= cfs_hash_spin_lock,
-	.hs_unlock	= cfs_hash_spin_unlock,
-	.hs_bkt_lock	= cfs_hash_nl_lock,
-	.hs_bkt_unlock	= cfs_hash_nl_unlock,
-};
-
-/** spin bucket lock, rehash is enabled */
-static struct cfs_hash_lock_ops cfs_hash_bkt_spin_lops = {
-	.hs_lock	= cfs_hash_rw_lock,
-	.hs_unlock	= cfs_hash_rw_unlock,
-	.hs_bkt_lock	= cfs_hash_spin_lock,
-	.hs_bkt_unlock	= cfs_hash_spin_unlock,
-};
-
-/** rw bucket lock, rehash is enabled */
-static struct cfs_hash_lock_ops cfs_hash_bkt_rw_lops = {
-	.hs_lock	= cfs_hash_rw_lock,
-	.hs_unlock	= cfs_hash_rw_unlock,
-	.hs_bkt_lock	= cfs_hash_rw_lock,
-	.hs_bkt_unlock	= cfs_hash_rw_unlock,
-};
-
-/** spin bucket lock, rehash is disabled */
-static struct cfs_hash_lock_ops cfs_hash_nr_bkt_spin_lops = {
-	.hs_lock	= cfs_hash_nl_lock,
-	.hs_unlock	= cfs_hash_nl_unlock,
-	.hs_bkt_lock	= cfs_hash_spin_lock,
-	.hs_bkt_unlock	= cfs_hash_spin_unlock,
-};
-
-/** rw bucket lock, rehash is disabled */
-static struct cfs_hash_lock_ops cfs_hash_nr_bkt_rw_lops = {
-	.hs_lock	= cfs_hash_nl_lock,
-	.hs_unlock	= cfs_hash_nl_unlock,
-	.hs_bkt_lock	= cfs_hash_rw_lock,
-	.hs_bkt_unlock	= cfs_hash_rw_unlock,
-};
-
-static void
-cfs_hash_lock_setup(struct cfs_hash *hs)
-{
-	if (cfs_hash_with_no_lock(hs)) {
-		hs->hs_lops = &cfs_hash_nl_lops;
-
-	} else if (cfs_hash_with_no_bktlock(hs)) {
-		hs->hs_lops = &cfs_hash_nbl_lops;
-		spin_lock_init(&hs->hs_lock.spin);
-
-	} else if (cfs_hash_with_rehash(hs)) {
-		rwlock_init(&hs->hs_lock.rw);
-
-		if (cfs_hash_with_rw_bktlock(hs))
-			hs->hs_lops = &cfs_hash_bkt_rw_lops;
-		else if (cfs_hash_with_spin_bktlock(hs))
-			hs->hs_lops = &cfs_hash_bkt_spin_lops;
-		else
-			LBUG();
-	} else {
-		if (cfs_hash_with_rw_bktlock(hs))
-			hs->hs_lops = &cfs_hash_nr_bkt_rw_lops;
-		else if (cfs_hash_with_spin_bktlock(hs))
-			hs->hs_lops = &cfs_hash_nr_bkt_spin_lops;
-		else
-			LBUG();
-	}
-}
-
-/**
- * Simple hash head without depth tracking
- * new element is always added to head of hlist
- */
-struct cfs_hash_head {
-	struct hlist_head	hh_head;	/**< entries list */
-};
-
-static int
-cfs_hash_hh_hhead_size(struct cfs_hash *hs)
-{
-	return sizeof(struct cfs_hash_head);
-}
-
-static struct hlist_head *
-cfs_hash_hh_hhead(struct cfs_hash *hs, struct cfs_hash_bd *bd)
-{
-	struct cfs_hash_head *head;
-
-	head = (struct cfs_hash_head *)&bd->bd_bucket->hsb_head[0];
-	return &head[bd->bd_offset].hh_head;
-}
-
-static int
-cfs_hash_hh_hnode_add(struct cfs_hash *hs, struct cfs_hash_bd *bd,
-		      struct hlist_node *hnode)
-{
-	hlist_add_head(hnode, cfs_hash_hh_hhead(hs, bd));
-	return -1; /* unknown depth */
-}
-
-static int
-cfs_hash_hh_hnode_del(struct cfs_hash *hs, struct cfs_hash_bd *bd,
-		      struct hlist_node *hnode)
-{
-	hlist_del_init(hnode);
-	return -1; /* unknown depth */
-}
-
-/**
- * Simple hash head with depth tracking
- * new element is always added to head of hlist
- */
-struct cfs_hash_head_dep {
-	struct hlist_head	hd_head;	/**< entries list */
-	unsigned int		hd_depth;	/**< list length */
-};
-
-static int
-cfs_hash_hd_hhead_size(struct cfs_hash *hs)
-{
-	return sizeof(struct cfs_hash_head_dep);
-}
-
-static struct hlist_head *
-cfs_hash_hd_hhead(struct cfs_hash *hs, struct cfs_hash_bd *bd)
-{
-	struct cfs_hash_head_dep *head;
-
-	head = (struct cfs_hash_head_dep *)&bd->bd_bucket->hsb_head[0];
-	return &head[bd->bd_offset].hd_head;
-}
-
-static int
-cfs_hash_hd_hnode_add(struct cfs_hash *hs, struct cfs_hash_bd *bd,
-		      struct hlist_node *hnode)
-{
-	struct cfs_hash_head_dep *hh;
-
-	hh = container_of(cfs_hash_hd_hhead(hs, bd),
-			  struct cfs_hash_head_dep, hd_head);
-	hlist_add_head(hnode, &hh->hd_head);
-	return ++hh->hd_depth;
-}
-
-static int
-cfs_hash_hd_hnode_del(struct cfs_hash *hs, struct cfs_hash_bd *bd,
-		      struct hlist_node *hnode)
-{
-	struct cfs_hash_head_dep *hh;
-
-	hh = container_of(cfs_hash_hd_hhead(hs, bd),
-			  struct cfs_hash_head_dep, hd_head);
-	hlist_del_init(hnode);
-	return --hh->hd_depth;
-}
-
-/**
- * double links hash head without depth tracking
- * new element is always added to tail of hlist
- */
-struct cfs_hash_dhead {
-	struct hlist_head	dh_head;	/**< entries list */
-	struct hlist_node	*dh_tail;	/**< the last entry */
-};
-
-static int
-cfs_hash_dh_hhead_size(struct cfs_hash *hs)
-{
-	return sizeof(struct cfs_hash_dhead);
-}
-
-static struct hlist_head *
-cfs_hash_dh_hhead(struct cfs_hash *hs, struct cfs_hash_bd *bd)
-{
-	struct cfs_hash_dhead *head;
-
-	head = (struct cfs_hash_dhead *)&bd->bd_bucket->hsb_head[0];
-	return &head[bd->bd_offset].dh_head;
-}
-
-static int
-cfs_hash_dh_hnode_add(struct cfs_hash *hs, struct cfs_hash_bd *bd,
-		      struct hlist_node *hnode)
-{
-	struct cfs_hash_dhead *dh;
-
-	dh = container_of(cfs_hash_dh_hhead(hs, bd),
-			  struct cfs_hash_dhead, dh_head);
-	if (dh->dh_tail) /* not empty */
-		hlist_add_behind(hnode, dh->dh_tail);
-	else /* empty list */
-		hlist_add_head(hnode, &dh->dh_head);
-	dh->dh_tail = hnode;
-	return -1; /* unknown depth */
-}
-
-static int
-cfs_hash_dh_hnode_del(struct cfs_hash *hs, struct cfs_hash_bd *bd,
-		      struct hlist_node *hnd)
-{
-	struct cfs_hash_dhead *dh;
-
-	dh = container_of(cfs_hash_dh_hhead(hs, bd),
-			  struct cfs_hash_dhead, dh_head);
-	if (!hnd->next) { /* it's the tail */
-		dh->dh_tail = (hnd->pprev == &dh->dh_head.first) ? NULL :
-			      container_of(hnd->pprev, struct hlist_node, next);
-	}
-	hlist_del_init(hnd);
-	return -1; /* unknown depth */
-}
-
-/**
- * double links hash head with depth tracking
- * new element is always added to tail of hlist
- */
-struct cfs_hash_dhead_dep {
-	struct hlist_head	dd_head;	/**< entries list */
-	struct hlist_node	*dd_tail;	/**< the last entry */
-	unsigned int		dd_depth;	/**< list length */
-};
-
-static int
-cfs_hash_dd_hhead_size(struct cfs_hash *hs)
-{
-	return sizeof(struct cfs_hash_dhead_dep);
-}
-
-static struct hlist_head *
-cfs_hash_dd_hhead(struct cfs_hash *hs, struct cfs_hash_bd *bd)
-{
-	struct cfs_hash_dhead_dep *head;
-
-	head = (struct cfs_hash_dhead_dep *)&bd->bd_bucket->hsb_head[0];
-	return &head[bd->bd_offset].dd_head;
-}
-
-static int
-cfs_hash_dd_hnode_add(struct cfs_hash *hs, struct cfs_hash_bd *bd,
-		      struct hlist_node *hnode)
-{
-	struct cfs_hash_dhead_dep *dh;
-
-	dh = container_of(cfs_hash_dd_hhead(hs, bd),
-			  struct cfs_hash_dhead_dep, dd_head);
-	if (dh->dd_tail) /* not empty */
-		hlist_add_behind(hnode, dh->dd_tail);
-	else /* empty list */
-		hlist_add_head(hnode, &dh->dd_head);
-	dh->dd_tail = hnode;
-	return ++dh->dd_depth;
-}
-
-static int
-cfs_hash_dd_hnode_del(struct cfs_hash *hs, struct cfs_hash_bd *bd,
-		      struct hlist_node *hnd)
-{
-	struct cfs_hash_dhead_dep *dh;
-
-	dh = container_of(cfs_hash_dd_hhead(hs, bd),
-			  struct cfs_hash_dhead_dep, dd_head);
-	if (!hnd->next) { /* it's the tail */
-		dh->dd_tail = (hnd->pprev == &dh->dd_head.first) ? NULL :
-			      container_of(hnd->pprev, struct hlist_node, next);
-	}
-	hlist_del_init(hnd);
-	return --dh->dd_depth;
-}
-
-static struct cfs_hash_hlist_ops cfs_hash_hh_hops = {
-	.hop_hhead	= cfs_hash_hh_hhead,
-	.hop_hhead_size	= cfs_hash_hh_hhead_size,
-	.hop_hnode_add	= cfs_hash_hh_hnode_add,
-	.hop_hnode_del	= cfs_hash_hh_hnode_del,
-};
-
-static struct cfs_hash_hlist_ops cfs_hash_hd_hops = {
-	.hop_hhead	= cfs_hash_hd_hhead,
-	.hop_hhead_size	= cfs_hash_hd_hhead_size,
-	.hop_hnode_add	= cfs_hash_hd_hnode_add,
-	.hop_hnode_del	= cfs_hash_hd_hnode_del,
-};
-
-static struct cfs_hash_hlist_ops cfs_hash_dh_hops = {
-	.hop_hhead	= cfs_hash_dh_hhead,
-	.hop_hhead_size	= cfs_hash_dh_hhead_size,
-	.hop_hnode_add	= cfs_hash_dh_hnode_add,
-	.hop_hnode_del	= cfs_hash_dh_hnode_del,
-};
-
-static struct cfs_hash_hlist_ops cfs_hash_dd_hops = {
-	.hop_hhead	= cfs_hash_dd_hhead,
-	.hop_hhead_size	= cfs_hash_dd_hhead_size,
-	.hop_hnode_add	= cfs_hash_dd_hnode_add,
-	.hop_hnode_del	= cfs_hash_dd_hnode_del,
-};
-
-static void
-cfs_hash_hlist_setup(struct cfs_hash *hs)
-{
-	if (cfs_hash_with_add_tail(hs)) {
-		hs->hs_hops = cfs_hash_with_depth(hs) ?
-			      &cfs_hash_dd_hops : &cfs_hash_dh_hops;
-	} else {
-		hs->hs_hops = cfs_hash_with_depth(hs) ?
-			      &cfs_hash_hd_hops : &cfs_hash_hh_hops;
-	}
-}
-
-static void
-cfs_hash_bd_from_key(struct cfs_hash *hs, struct cfs_hash_bucket **bkts,
-		     unsigned int bits, const void *key, struct cfs_hash_bd *bd)
-{
-	unsigned int index = cfs_hash_id(hs, key, (1U << bits) - 1);
-
-	LASSERT(bits == hs->hs_cur_bits || bits == hs->hs_rehash_bits);
-
-	bd->bd_bucket = bkts[index & ((1U << (bits - hs->hs_bkt_bits)) - 1)];
-	bd->bd_offset = index >> (bits - hs->hs_bkt_bits);
-}
-
-void
-cfs_hash_bd_get(struct cfs_hash *hs, const void *key, struct cfs_hash_bd *bd)
-{
-	/* NB: caller should hold hs->hs_rwlock if REHASH is set */
-	if (likely(!hs->hs_rehash_buckets)) {
-		cfs_hash_bd_from_key(hs, hs->hs_buckets,
-				     hs->hs_cur_bits, key, bd);
-	} else {
-		LASSERT(hs->hs_rehash_bits);
-		cfs_hash_bd_from_key(hs, hs->hs_rehash_buckets,
-				     hs->hs_rehash_bits, key, bd);
-	}
-}
-EXPORT_SYMBOL(cfs_hash_bd_get);
-
-static inline void
-cfs_hash_bd_dep_record(struct cfs_hash *hs, struct cfs_hash_bd *bd, int dep_cur)
-{
-	if (likely(dep_cur <= bd->bd_bucket->hsb_depmax))
-		return;
-
-	bd->bd_bucket->hsb_depmax = dep_cur;
-# if CFS_HASH_DEBUG_LEVEL >= CFS_HASH_DEBUG_1
-	if (likely(!warn_on_depth ||
-		   max(warn_on_depth, hs->hs_dep_max) >= dep_cur))
-		return;
-
-	spin_lock(&hs->hs_dep_lock);
-	hs->hs_dep_max = dep_cur;
-	hs->hs_dep_bkt = bd->bd_bucket->hsb_index;
-	hs->hs_dep_off = bd->bd_offset;
-	hs->hs_dep_bits = hs->hs_cur_bits;
-	spin_unlock(&hs->hs_dep_lock);
-
-	queue_work(cfs_rehash_wq, &hs->hs_dep_work);
-# endif
-}
-
-void
-cfs_hash_bd_add_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd,
-		       struct hlist_node *hnode)
-{
-	int rc;
-
-	rc = hs->hs_hops->hop_hnode_add(hs, bd, hnode);
-	cfs_hash_bd_dep_record(hs, bd, rc);
-	bd->bd_bucket->hsb_version++;
-	if (unlikely(!bd->bd_bucket->hsb_version))
-		bd->bd_bucket->hsb_version++;
-	bd->bd_bucket->hsb_count++;
-
-	if (cfs_hash_with_counter(hs))
-		atomic_inc(&hs->hs_count);
-	if (!cfs_hash_with_no_itemref(hs))
-		cfs_hash_get(hs, hnode);
-}
-EXPORT_SYMBOL(cfs_hash_bd_add_locked);
-
-void
-cfs_hash_bd_del_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd,
-		       struct hlist_node *hnode)
-{
-	hs->hs_hops->hop_hnode_del(hs, bd, hnode);
-
-	LASSERT(bd->bd_bucket->hsb_count > 0);
-	bd->bd_bucket->hsb_count--;
-	bd->bd_bucket->hsb_version++;
-	if (unlikely(!bd->bd_bucket->hsb_version))
-		bd->bd_bucket->hsb_version++;
-
-	if (cfs_hash_with_counter(hs)) {
-		LASSERT(atomic_read(&hs->hs_count) > 0);
-		atomic_dec(&hs->hs_count);
-	}
-	if (!cfs_hash_with_no_itemref(hs))
-		cfs_hash_put_locked(hs, hnode);
-}
-EXPORT_SYMBOL(cfs_hash_bd_del_locked);
-
-void
-cfs_hash_bd_move_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd_old,
-			struct cfs_hash_bd *bd_new, struct hlist_node *hnode)
-{
-	struct cfs_hash_bucket *obkt = bd_old->bd_bucket;
-	struct cfs_hash_bucket *nbkt = bd_new->bd_bucket;
-	int rc;
-
-	if (!cfs_hash_bd_compare(bd_old, bd_new))
-		return;
-
-	/* use cfs_hash_bd_hnode_add/del, to avoid atomic & refcount ops
-	 * in cfs_hash_bd_del/add_locked
-	 */
-	hs->hs_hops->hop_hnode_del(hs, bd_old, hnode);
-	rc = hs->hs_hops->hop_hnode_add(hs, bd_new, hnode);
-	cfs_hash_bd_dep_record(hs, bd_new, rc);
-
-	LASSERT(obkt->hsb_count > 0);
-	obkt->hsb_count--;
-	obkt->hsb_version++;
-	if (unlikely(!obkt->hsb_version))
-		obkt->hsb_version++;
-	nbkt->hsb_count++;
-	nbkt->hsb_version++;
-	if (unlikely(!nbkt->hsb_version))
-		nbkt->hsb_version++;
-}
-
-enum {
-	/** always set, for sanity (avoid ZERO intent) */
-	CFS_HS_LOOKUP_MASK_FIND	= BIT(0),
-	/** return entry with a ref */
-	CFS_HS_LOOKUP_MASK_REF	= BIT(1),
-	/** add entry if not existing */
-	CFS_HS_LOOKUP_MASK_ADD	= BIT(2),
-	/** delete entry, ignore other masks */
-	CFS_HS_LOOKUP_MASK_DEL	= BIT(3),
-};
-
-enum cfs_hash_lookup_intent {
-	/** return item w/o refcount */
-	CFS_HS_LOOKUP_IT_PEEK	 = CFS_HS_LOOKUP_MASK_FIND,
-	/** return item with refcount */
-	CFS_HS_LOOKUP_IT_FIND	 = (CFS_HS_LOOKUP_MASK_FIND |
-				    CFS_HS_LOOKUP_MASK_REF),
-	/** return item w/o refcount if existed, otherwise add */
-	CFS_HS_LOOKUP_IT_ADD	 = (CFS_HS_LOOKUP_MASK_FIND |
-				    CFS_HS_LOOKUP_MASK_ADD),
-	/** return item with refcount if existed, otherwise add */
-	CFS_HS_LOOKUP_IT_FINDADD = (CFS_HS_LOOKUP_IT_FIND |
-				    CFS_HS_LOOKUP_MASK_ADD),
-	/** delete if existed */
-	CFS_HS_LOOKUP_IT_FINDDEL = (CFS_HS_LOOKUP_MASK_FIND |
-				    CFS_HS_LOOKUP_MASK_DEL)
-};
-
-static struct hlist_node *
-cfs_hash_bd_lookup_intent(struct cfs_hash *hs, struct cfs_hash_bd *bd,
-			  const void *key, struct hlist_node *hnode,
-			  enum cfs_hash_lookup_intent intent)
-
-{
-	struct hlist_head *hhead = cfs_hash_bd_hhead(hs, bd);
-	struct hlist_node *ehnode;
-	struct hlist_node *match;
-	int intent_add = intent & CFS_HS_LOOKUP_MASK_ADD;
-
-	/* with this function, we can avoid a lot of useless refcount ops,
-	 * which are expensive atomic operations most time.
-	 */
-	match = intent_add ? NULL : hnode;
-	hlist_for_each(ehnode, hhead) {
-		if (!cfs_hash_keycmp(hs, key, ehnode))
-			continue;
-
-		if (match && match != ehnode) /* can't match */
-			continue;
-
-		/* match and ... */
-		if (intent & CFS_HS_LOOKUP_MASK_DEL) {
-			cfs_hash_bd_del_locked(hs, bd, ehnode);
-			return ehnode;
-		}
-
-		/* caller wants refcount? */
-		if (intent & CFS_HS_LOOKUP_MASK_REF)
-			cfs_hash_get(hs, ehnode);
-		return ehnode;
-	}
-	/* no match item */
-	if (!intent_add)
-		return NULL;
-
-	LASSERT(hnode);
-	cfs_hash_bd_add_locked(hs, bd, hnode);
-	return hnode;
-}
-
-struct hlist_node *
-cfs_hash_bd_lookup_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd,
-			  const void *key)
-{
-	return cfs_hash_bd_lookup_intent(hs, bd, key, NULL,
-					 CFS_HS_LOOKUP_IT_FIND);
-}
-EXPORT_SYMBOL(cfs_hash_bd_lookup_locked);
-
-struct hlist_node *
-cfs_hash_bd_peek_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd,
-			const void *key)
-{
-	return cfs_hash_bd_lookup_intent(hs, bd, key, NULL,
-					 CFS_HS_LOOKUP_IT_PEEK);
-}
-EXPORT_SYMBOL(cfs_hash_bd_peek_locked);
-
-static void
-cfs_hash_multi_bd_lock(struct cfs_hash *hs, struct cfs_hash_bd *bds,
-		       unsigned int n, int excl)
-{
-	struct cfs_hash_bucket *prev = NULL;
-	int i;
-
-	/**
-	 * bds must be ascendantly ordered by bd->bd_bucket->hsb_index.
-	 * NB: it's possible that several bds point to the same bucket but
-	 * have different bd::bd_offset, so need take care of deadlock.
-	 */
-	cfs_hash_for_each_bd(bds, n, i) {
-		if (prev == bds[i].bd_bucket)
-			continue;
-
-		LASSERT(!prev || prev->hsb_index < bds[i].bd_bucket->hsb_index);
-		cfs_hash_bd_lock(hs, &bds[i], excl);
-		prev = bds[i].bd_bucket;
-	}
-}
-
-static void
-cfs_hash_multi_bd_unlock(struct cfs_hash *hs, struct cfs_hash_bd *bds,
-			 unsigned int n, int excl)
-{
-	struct cfs_hash_bucket *prev = NULL;
-	int i;
-
-	cfs_hash_for_each_bd(bds, n, i) {
-		if (prev != bds[i].bd_bucket) {
-			cfs_hash_bd_unlock(hs, &bds[i], excl);
-			prev = bds[i].bd_bucket;
-		}
-	}
-}
-
-static struct hlist_node *
-cfs_hash_multi_bd_lookup_locked(struct cfs_hash *hs, struct cfs_hash_bd *bds,
-				unsigned int n, const void *key)
-{
-	struct hlist_node *ehnode;
-	unsigned int i;
-
-	cfs_hash_for_each_bd(bds, n, i) {
-		ehnode = cfs_hash_bd_lookup_intent(hs, &bds[i], key, NULL,
-						   CFS_HS_LOOKUP_IT_FIND);
-		if (ehnode)
-			return ehnode;
-	}
-	return NULL;
-}
-
-static struct hlist_node *
-cfs_hash_multi_bd_findadd_locked(struct cfs_hash *hs, struct cfs_hash_bd *bds,
-				 unsigned int n, const void *key,
-				 struct hlist_node *hnode, int noref)
-{
-	struct hlist_node *ehnode;
-	int intent;
-	unsigned int i;
-
-	LASSERT(hnode);
-	intent = (!noref * CFS_HS_LOOKUP_MASK_REF) | CFS_HS_LOOKUP_IT_PEEK;
-
-	cfs_hash_for_each_bd(bds, n, i) {
-		ehnode = cfs_hash_bd_lookup_intent(hs, &bds[i], key,
-						   NULL, intent);
-		if (ehnode)
-			return ehnode;
-	}
-
-	if (i == 1) { /* only one bucket */
-		cfs_hash_bd_add_locked(hs, &bds[0], hnode);
-	} else {
-		struct cfs_hash_bd mybd;
-
-		cfs_hash_bd_get(hs, key, &mybd);
-		cfs_hash_bd_add_locked(hs, &mybd, hnode);
-	}
-
-	return hnode;
-}
-
-static struct hlist_node *
-cfs_hash_multi_bd_finddel_locked(struct cfs_hash *hs, struct cfs_hash_bd *bds,
-				 unsigned int n, const void *key,
-				 struct hlist_node *hnode)
-{
-	struct hlist_node *ehnode;
-	unsigned int i;
-
-	cfs_hash_for_each_bd(bds, n, i) {
-		ehnode = cfs_hash_bd_lookup_intent(hs, &bds[i], key, hnode,
-						   CFS_HS_LOOKUP_IT_FINDDEL);
-		if (ehnode)
-			return ehnode;
-	}
-	return NULL;
-}
-
-static void
-cfs_hash_bd_order(struct cfs_hash_bd *bd1, struct cfs_hash_bd *bd2)
-{
-	int rc;
-
-	if (!bd2->bd_bucket)
-		return;
-
-	if (!bd1->bd_bucket) {
-		*bd1 = *bd2;
-		bd2->bd_bucket = NULL;
-		return;
-	}
-
-	rc = cfs_hash_bd_compare(bd1, bd2);
-	if (!rc)
-		bd2->bd_bucket = NULL;
-	else if (rc > 0)
-		swap(*bd1, *bd2); /* swap bd1 and bd2 */
-}
-
-void
-cfs_hash_dual_bd_get(struct cfs_hash *hs, const void *key,
-		     struct cfs_hash_bd *bds)
-{
-	/* NB: caller should hold hs_lock.rw if REHASH is set */
-	cfs_hash_bd_from_key(hs, hs->hs_buckets,
-			     hs->hs_cur_bits, key, &bds[0]);
-	if (likely(!hs->hs_rehash_buckets)) {
-		/* no rehash or not rehashing */
-		bds[1].bd_bucket = NULL;
-		return;
-	}
-
-	LASSERT(hs->hs_rehash_bits);
-	cfs_hash_bd_from_key(hs, hs->hs_rehash_buckets,
-			     hs->hs_rehash_bits, key, &bds[1]);
-
-	cfs_hash_bd_order(&bds[0], &bds[1]);
-}
-
-void
-cfs_hash_dual_bd_lock(struct cfs_hash *hs, struct cfs_hash_bd *bds, int excl)
-{
-	cfs_hash_multi_bd_lock(hs, bds, 2, excl);
-}
-
-void
-cfs_hash_dual_bd_unlock(struct cfs_hash *hs, struct cfs_hash_bd *bds, int excl)
-{
-	cfs_hash_multi_bd_unlock(hs, bds, 2, excl);
-}
-
-struct hlist_node *
-cfs_hash_dual_bd_lookup_locked(struct cfs_hash *hs, struct cfs_hash_bd *bds,
-			       const void *key)
-{
-	return cfs_hash_multi_bd_lookup_locked(hs, bds, 2, key);
-}
-
-struct hlist_node *
-cfs_hash_dual_bd_findadd_locked(struct cfs_hash *hs, struct cfs_hash_bd *bds,
-				const void *key, struct hlist_node *hnode,
-				int noref)
-{
-	return cfs_hash_multi_bd_findadd_locked(hs, bds, 2, key,
-						hnode, noref);
-}
-
-struct hlist_node *
-cfs_hash_dual_bd_finddel_locked(struct cfs_hash *hs, struct cfs_hash_bd *bds,
-				const void *key, struct hlist_node *hnode)
-{
-	return cfs_hash_multi_bd_finddel_locked(hs, bds, 2, key, hnode);
-}
-
-static void
-cfs_hash_buckets_free(struct cfs_hash_bucket **buckets,
-		      int bkt_size, int prev_size, int size)
-{
-	int i;
-
-	for (i = prev_size; i < size; i++)
-		kfree(buckets[i]);
-
-	kvfree(buckets);
-}
-
-/*
- * Create or grow bucket memory. Return old_buckets if no allocation was
- * needed, the newly allocated buckets if allocation was needed and
- * successful, and NULL on error.
- */
-static struct cfs_hash_bucket **
-cfs_hash_buckets_realloc(struct cfs_hash *hs, struct cfs_hash_bucket **old_bkts,
-			 unsigned int old_size, unsigned int new_size)
-{
-	struct cfs_hash_bucket **new_bkts;
-	int i;
-
-	LASSERT(!old_size || old_bkts);
-
-	if (old_bkts && old_size == new_size)
-		return old_bkts;
-
-	new_bkts = kvmalloc_array(new_size, sizeof(new_bkts[0]), GFP_KERNEL);
-	if (!new_bkts)
-		return NULL;
-
-	if (old_bkts) {
-		memcpy(new_bkts, old_bkts,
-		       min(old_size, new_size) * sizeof(*old_bkts));
-	}
-
-	for (i = old_size; i < new_size; i++) {
-		struct hlist_head *hhead;
-		struct cfs_hash_bd bd;
-
-		new_bkts[i] = kzalloc(cfs_hash_bkt_size(hs), GFP_KERNEL);
-		if (!new_bkts[i]) {
-			cfs_hash_buckets_free(new_bkts, cfs_hash_bkt_size(hs),
-					      old_size, new_size);
-			return NULL;
-		}
-
-		new_bkts[i]->hsb_index = i;
-		new_bkts[i]->hsb_version = 1;	/* shouldn't be zero */
-		new_bkts[i]->hsb_depmax = -1;	/* unknown */
-		bd.bd_bucket = new_bkts[i];
-		cfs_hash_bd_for_each_hlist(hs, &bd, hhead)
-			INIT_HLIST_HEAD(hhead);
-
-		if (cfs_hash_with_no_lock(hs) ||
-		    cfs_hash_with_no_bktlock(hs))
-			continue;
-
-		if (cfs_hash_with_rw_bktlock(hs))
-			rwlock_init(&new_bkts[i]->hsb_lock.rw);
-		else if (cfs_hash_with_spin_bktlock(hs))
-			spin_lock_init(&new_bkts[i]->hsb_lock.spin);
-		else
-			LBUG(); /* invalid use-case */
-	}
-	return new_bkts;
-}
-
-/**
- * Initialize new libcfs hash, where:
- * @name     - Descriptive hash name
- * @cur_bits - Initial hash table size, in bits
- * @max_bits - Maximum allowed hash table resize, in bits
- * @ops      - Registered hash table operations
- * @flags    - CFS_HASH_REHASH enable synamic hash resizing
- *	     - CFS_HASH_SORT enable chained hash sort
- */
-static void cfs_hash_rehash_worker(struct work_struct *work);
-
-#if CFS_HASH_DEBUG_LEVEL >= CFS_HASH_DEBUG_1
-static void cfs_hash_dep_print(struct work_struct *work)
-{
-	struct cfs_hash *hs = container_of(work, struct cfs_hash, hs_dep_work);
-	int dep;
-	int bkt;
-	int off;
-	int bits;
-
-	spin_lock(&hs->hs_dep_lock);
-	dep = hs->hs_dep_max;
-	bkt = hs->hs_dep_bkt;
-	off = hs->hs_dep_off;
-	bits = hs->hs_dep_bits;
-	spin_unlock(&hs->hs_dep_lock);
-
-	LCONSOLE_WARN("#### HASH %s (bits: %d): max depth %d at bucket %d/%d\n",
-		      hs->hs_name, bits, dep, bkt, off);
-	spin_lock(&hs->hs_dep_lock);
-	hs->hs_dep_bits = 0; /* mark as workitem done */
-	spin_unlock(&hs->hs_dep_lock);
-	return 0;
-}
-
-static void cfs_hash_depth_wi_init(struct cfs_hash *hs)
-{
-	spin_lock_init(&hs->hs_dep_lock);
-	INIT_WORK(&hs->hs_dep_work, cfs_hash_dep_print);
-}
-
-static void cfs_hash_depth_wi_cancel(struct cfs_hash *hs)
-{
-	cancel_work_sync(&hs->hs_dep_work);
-}
-
-#else /* CFS_HASH_DEBUG_LEVEL < CFS_HASH_DEBUG_1 */
-
-static inline void cfs_hash_depth_wi_init(struct cfs_hash *hs) {}
-static inline void cfs_hash_depth_wi_cancel(struct cfs_hash *hs) {}
-
-#endif /* CFS_HASH_DEBUG_LEVEL >= CFS_HASH_DEBUG_1 */
-
-struct cfs_hash *
-cfs_hash_create(char *name, unsigned int cur_bits, unsigned int max_bits,
-		unsigned int bkt_bits, unsigned int extra_bytes,
-		unsigned int min_theta, unsigned int max_theta,
-		struct cfs_hash_ops *ops, unsigned int flags)
-{
-	struct cfs_hash *hs;
-	int len;
-
-	BUILD_BUG_ON(CFS_HASH_THETA_BITS >= 15);
-
-	LASSERT(name);
-	LASSERT(ops->hs_key);
-	LASSERT(ops->hs_hash);
-	LASSERT(ops->hs_object);
-	LASSERT(ops->hs_keycmp);
-	LASSERT(ops->hs_get);
-	LASSERT(ops->hs_put || ops->hs_put_locked);
-
-	if (flags & CFS_HASH_REHASH)
-		flags |= CFS_HASH_COUNTER; /* must have counter */
-
-	LASSERT(cur_bits > 0);
-	LASSERT(cur_bits >= bkt_bits);
-	LASSERT(max_bits >= cur_bits && max_bits < 31);
-	LASSERT(ergo(!(flags & CFS_HASH_REHASH), cur_bits == max_bits));
-	LASSERT(ergo(flags & CFS_HASH_REHASH, !(flags & CFS_HASH_NO_LOCK)));
-	LASSERT(ergo(flags & CFS_HASH_REHASH_KEY, ops->hs_keycpy));
-
-	len = !(flags & CFS_HASH_BIGNAME) ?
-	      CFS_HASH_NAME_LEN : CFS_HASH_BIGNAME_LEN;
-	hs = kzalloc(offsetof(struct cfs_hash, hs_name[len]), GFP_KERNEL);
-	if (!hs)
-		return NULL;
-
-	strlcpy(hs->hs_name, name, len);
-	hs->hs_flags = flags;
-
-	atomic_set(&hs->hs_refcount, 1);
-	atomic_set(&hs->hs_count, 0);
-
-	cfs_hash_lock_setup(hs);
-	cfs_hash_hlist_setup(hs);
-
-	hs->hs_cur_bits = (u8)cur_bits;
-	hs->hs_min_bits = (u8)cur_bits;
-	hs->hs_max_bits = (u8)max_bits;
-	hs->hs_bkt_bits = (u8)bkt_bits;
-
-	hs->hs_ops = ops;
-	hs->hs_extra_bytes = extra_bytes;
-	hs->hs_rehash_bits = 0;
-	INIT_WORK(&hs->hs_rehash_work, cfs_hash_rehash_worker);
-	cfs_hash_depth_wi_init(hs);
-
-	if (cfs_hash_with_rehash(hs))
-		__cfs_hash_set_theta(hs, min_theta, max_theta);
-
-	hs->hs_buckets = cfs_hash_buckets_realloc(hs, NULL, 0,
-						  CFS_HASH_NBKT(hs));
-	if (hs->hs_buckets)
-		return hs;
-
-	kfree(hs);
-	return NULL;
-}
-EXPORT_SYMBOL(cfs_hash_create);
-
-/**
- * Cleanup libcfs hash @hs.
- */
-static void
-cfs_hash_destroy(struct cfs_hash *hs)
-{
-	struct hlist_node *hnode;
-	struct hlist_node *pos;
-	struct cfs_hash_bd bd;
-	int i;
-
-	LASSERT(hs);
-	LASSERT(!cfs_hash_is_exiting(hs) &&
-		!cfs_hash_is_iterating(hs));
-
-	/**
-	 * prohibit further rehashes, don't need any lock because
-	 * I'm the only (last) one can change it.
-	 */
-	hs->hs_exiting = 1;
-	if (cfs_hash_with_rehash(hs))
-		cfs_hash_rehash_cancel(hs);
-
-	cfs_hash_depth_wi_cancel(hs);
-	/* rehash should be done/canceled */
-	LASSERT(hs->hs_buckets && !hs->hs_rehash_buckets);
-
-	cfs_hash_for_each_bucket(hs, &bd, i) {
-		struct hlist_head *hhead;
-
-		LASSERT(bd.bd_bucket);
-		/* no need to take this lock, just for consistent code */
-		cfs_hash_bd_lock(hs, &bd, 1);
-
-		cfs_hash_bd_for_each_hlist(hs, &bd, hhead) {
-			hlist_for_each_safe(hnode, pos, hhead) {
-				LASSERTF(!cfs_hash_with_assert_empty(hs),
-					 "hash %s bucket %u(%u) is not empty: %u items left\n",
-					 hs->hs_name, bd.bd_bucket->hsb_index,
-					 bd.bd_offset, bd.bd_bucket->hsb_count);
-				/* can't assert key valicate, because we
-				 * can interrupt rehash
-				 */
-				cfs_hash_bd_del_locked(hs, &bd, hnode);
-				cfs_hash_exit(hs, hnode);
-			}
-		}
-		LASSERT(!bd.bd_bucket->hsb_count);
-		cfs_hash_bd_unlock(hs, &bd, 1);
-		cond_resched();
-	}
-
-	LASSERT(!atomic_read(&hs->hs_count));
-
-	cfs_hash_buckets_free(hs->hs_buckets, cfs_hash_bkt_size(hs),
-			      0, CFS_HASH_NBKT(hs));
-	i = cfs_hash_with_bigname(hs) ?
-	    CFS_HASH_BIGNAME_LEN : CFS_HASH_NAME_LEN;
-	kfree(hs);
-}
-
-struct cfs_hash *cfs_hash_getref(struct cfs_hash *hs)
-{
-	if (atomic_inc_not_zero(&hs->hs_refcount))
-		return hs;
-	return NULL;
-}
-EXPORT_SYMBOL(cfs_hash_getref);
-
-void cfs_hash_putref(struct cfs_hash *hs)
-{
-	if (atomic_dec_and_test(&hs->hs_refcount))
-		cfs_hash_destroy(hs);
-}
-EXPORT_SYMBOL(cfs_hash_putref);
-
-static inline int
-cfs_hash_rehash_bits(struct cfs_hash *hs)
-{
-	if (cfs_hash_with_no_lock(hs) ||
-	    !cfs_hash_with_rehash(hs))
-		return -EOPNOTSUPP;
-
-	if (unlikely(cfs_hash_is_exiting(hs)))
-		return -ESRCH;
-
-	if (unlikely(cfs_hash_is_rehashing(hs)))
-		return -EALREADY;
-
-	if (unlikely(cfs_hash_is_iterating(hs)))
-		return -EAGAIN;
-
-	/* XXX: need to handle case with max_theta != 2.0
-	 *      and the case with min_theta != 0.5
-	 */
-	if ((hs->hs_cur_bits < hs->hs_max_bits) &&
-	    (__cfs_hash_theta(hs) > hs->hs_max_theta))
-		return hs->hs_cur_bits + 1;
-
-	if (!cfs_hash_with_shrink(hs))
-		return 0;
-
-	if ((hs->hs_cur_bits > hs->hs_min_bits) &&
-	    (__cfs_hash_theta(hs) < hs->hs_min_theta))
-		return hs->hs_cur_bits - 1;
-
-	return 0;
-}
-
-/**
- * don't allow inline rehash if:
- * - user wants non-blocking change (add/del) on hash table
- * - too many elements
- */
-static inline int
-cfs_hash_rehash_inline(struct cfs_hash *hs)
-{
-	return !cfs_hash_with_nblk_change(hs) &&
-	       atomic_read(&hs->hs_count) < CFS_HASH_LOOP_HOG;
-}
-
-/**
- * Add item @hnode to libcfs hash @hs using @key.  The registered
- * ops->hs_get function will be called when the item is added.
- */
-void
-cfs_hash_add(struct cfs_hash *hs, const void *key, struct hlist_node *hnode)
-{
-	struct cfs_hash_bd bd;
-	int bits;
-
-	LASSERT(hlist_unhashed(hnode));
-
-	cfs_hash_lock(hs, 0);
-	cfs_hash_bd_get_and_lock(hs, key, &bd, 1);
-
-	cfs_hash_key_validate(hs, key, hnode);
-	cfs_hash_bd_add_locked(hs, &bd, hnode);
-
-	cfs_hash_bd_unlock(hs, &bd, 1);
-
-	bits = cfs_hash_rehash_bits(hs);
-	cfs_hash_unlock(hs, 0);
-	if (bits > 0)
-		cfs_hash_rehash(hs, cfs_hash_rehash_inline(hs));
-}
-EXPORT_SYMBOL(cfs_hash_add);
-
-static struct hlist_node *
-cfs_hash_find_or_add(struct cfs_hash *hs, const void *key,
-		     struct hlist_node *hnode, int noref)
-{
-	struct hlist_node *ehnode;
-	struct cfs_hash_bd bds[2];
-	int bits = 0;
-
-	LASSERTF(hlist_unhashed(hnode), "hnode = %p\n", hnode);
-
-	cfs_hash_lock(hs, 0);
-	cfs_hash_dual_bd_get_and_lock(hs, key, bds, 1);
-
-	cfs_hash_key_validate(hs, key, hnode);
-	ehnode = cfs_hash_dual_bd_findadd_locked(hs, bds, key,
-						 hnode, noref);
-	cfs_hash_dual_bd_unlock(hs, bds, 1);
-
-	if (ehnode == hnode)	/* new item added */
-		bits = cfs_hash_rehash_bits(hs);
-	cfs_hash_unlock(hs, 0);
-	if (bits > 0)
-		cfs_hash_rehash(hs, cfs_hash_rehash_inline(hs));
-
-	return ehnode;
-}
-
-/**
- * Add item @hnode to libcfs hash @hs using @key.  The registered
- * ops->hs_get function will be called if the item was added.
- * Returns 0 on success or -EALREADY on key collisions.
- */
-int
-cfs_hash_add_unique(struct cfs_hash *hs, const void *key,
-		    struct hlist_node *hnode)
-{
-	return cfs_hash_find_or_add(hs, key, hnode, 1) != hnode ?
-	       -EALREADY : 0;
-}
-EXPORT_SYMBOL(cfs_hash_add_unique);
-
-/**
- * Add item @hnode to libcfs hash @hs using @key.  If this @key
- * already exists in the hash then ops->hs_get will be called on the
- * conflicting entry and that entry will be returned to the caller.
- * Otherwise ops->hs_get is called on the item which was added.
- */
-void *
-cfs_hash_findadd_unique(struct cfs_hash *hs, const void *key,
-			struct hlist_node *hnode)
-{
-	hnode = cfs_hash_find_or_add(hs, key, hnode, 0);
-
-	return cfs_hash_object(hs, hnode);
-}
-EXPORT_SYMBOL(cfs_hash_findadd_unique);
-
-/**
- * Delete item @hnode from the libcfs hash @hs using @key.  The @key
- * is required to ensure the correct hash bucket is locked since there
- * is no direct linkage from the item to the bucket.  The object
- * removed from the hash will be returned and obs->hs_put is called
- * on the removed object.
- */
-void *
-cfs_hash_del(struct cfs_hash *hs, const void *key, struct hlist_node *hnode)
-{
-	void *obj = NULL;
-	int bits = 0;
-	struct cfs_hash_bd bds[2];
-
-	cfs_hash_lock(hs, 0);
-	cfs_hash_dual_bd_get_and_lock(hs, key, bds, 1);
-
-	/* NB: do nothing if @hnode is not in hash table */
-	if (!hnode || !hlist_unhashed(hnode)) {
-		if (!bds[1].bd_bucket && hnode) {
-			cfs_hash_bd_del_locked(hs, &bds[0], hnode);
-		} else {
-			hnode = cfs_hash_dual_bd_finddel_locked(hs, bds,
-								key, hnode);
-		}
-	}
-
-	if (hnode) {
-		obj = cfs_hash_object(hs, hnode);
-		bits = cfs_hash_rehash_bits(hs);
-	}
-
-	cfs_hash_dual_bd_unlock(hs, bds, 1);
-	cfs_hash_unlock(hs, 0);
-	if (bits > 0)
-		cfs_hash_rehash(hs, cfs_hash_rehash_inline(hs));
-
-	return obj;
-}
-EXPORT_SYMBOL(cfs_hash_del);
-
-/**
- * Delete item given @key in libcfs hash @hs.  The first @key found in
- * the hash will be removed, if the key exists multiple times in the hash
- * @hs this function must be called once per key.  The removed object
- * will be returned and ops->hs_put is called on the removed object.
- */
-void *
-cfs_hash_del_key(struct cfs_hash *hs, const void *key)
-{
-	return cfs_hash_del(hs, key, NULL);
-}
-EXPORT_SYMBOL(cfs_hash_del_key);
-
-/**
- * Lookup an item using @key in the libcfs hash @hs and return it.
- * If the @key is found in the hash hs->hs_get() is called and the
- * matching objects is returned.  It is the callers responsibility
- * to call the counterpart ops->hs_put using the cfs_hash_put() macro
- * when when finished with the object.  If the @key was not found
- * in the hash @hs NULL is returned.
- */
-void *
-cfs_hash_lookup(struct cfs_hash *hs, const void *key)
-{
-	void *obj = NULL;
-	struct hlist_node *hnode;
-	struct cfs_hash_bd bds[2];
-
-	cfs_hash_lock(hs, 0);
-	cfs_hash_dual_bd_get_and_lock(hs, key, bds, 0);
-
-	hnode = cfs_hash_dual_bd_lookup_locked(hs, bds, key);
-	if (hnode)
-		obj = cfs_hash_object(hs, hnode);
-
-	cfs_hash_dual_bd_unlock(hs, bds, 0);
-	cfs_hash_unlock(hs, 0);
-
-	return obj;
-}
-EXPORT_SYMBOL(cfs_hash_lookup);
-
-static void
-cfs_hash_for_each_enter(struct cfs_hash *hs)
-{
-	LASSERT(!cfs_hash_is_exiting(hs));
-
-	if (!cfs_hash_with_rehash(hs))
-		return;
-	/*
-	 * NB: it's race on cfs_has_t::hs_iterating, but doesn't matter
-	 * because it's just an unreliable signal to rehash-thread,
-	 * rehash-thread will try to finish rehash ASAP when seeing this.
-	 */
-	hs->hs_iterating = 1;
-
-	cfs_hash_lock(hs, 1);
-	hs->hs_iterators++;
-	cfs_hash_unlock(hs, 1);
-
-	/* NB: iteration is mostly called by service thread,
-	 * we tend to cancel pending rehash-request, instead of
-	 * blocking service thread, we will relaunch rehash request
-	 * after iteration
-	 */
-	if (cfs_hash_is_rehashing(hs))
-		cfs_hash_rehash_cancel(hs);
-}
-
-static void
-cfs_hash_for_each_exit(struct cfs_hash *hs)
-{
-	int remained;
-	int bits;
-
-	if (!cfs_hash_with_rehash(hs))
-		return;
-	cfs_hash_lock(hs, 1);
-	remained = --hs->hs_iterators;
-	bits = cfs_hash_rehash_bits(hs);
-	cfs_hash_unlock(hs, 1);
-	/* NB: it's race on cfs_has_t::hs_iterating, see above */
-	if (!remained)
-		hs->hs_iterating = 0;
-	if (bits > 0) {
-		cfs_hash_rehash(hs, atomic_read(&hs->hs_count) <
-				    CFS_HASH_LOOP_HOG);
-	}
-}
-
-/**
- * For each item in the libcfs hash @hs call the passed callback @func
- * and pass to it as an argument each hash item and the private @data.
- *
- * a) the function may sleep!
- * b) during the callback:
- *    . the bucket lock is held so the callback must never sleep.
- *    . if @removal_safe is true, use can remove current item by
- *      cfs_hash_bd_del_locked
- */
-static u64
-cfs_hash_for_each_tight(struct cfs_hash *hs, cfs_hash_for_each_cb_t func,
-			void *data, int remove_safe)
-{
-	struct hlist_node *hnode;
-	struct hlist_node *pos;
-	struct cfs_hash_bd bd;
-	u64 count = 0;
-	int excl = !!remove_safe;
-	int loop = 0;
-	int i;
-
-	cfs_hash_for_each_enter(hs);
-
-	cfs_hash_lock(hs, 0);
-	LASSERT(!cfs_hash_is_rehashing(hs));
-
-	cfs_hash_for_each_bucket(hs, &bd, i) {
-		struct hlist_head *hhead;
-
-		cfs_hash_bd_lock(hs, &bd, excl);
-		if (!func) { /* only glimpse size */
-			count += bd.bd_bucket->hsb_count;
-			cfs_hash_bd_unlock(hs, &bd, excl);
-			continue;
-		}
-
-		cfs_hash_bd_for_each_hlist(hs, &bd, hhead) {
-			hlist_for_each_safe(hnode, pos, hhead) {
-				cfs_hash_bucket_validate(hs, &bd, hnode);
-				count++;
-				loop++;
-				if (func(hs, &bd, hnode, data)) {
-					cfs_hash_bd_unlock(hs, &bd, excl);
-					goto out;
-				}
-			}
-		}
-		cfs_hash_bd_unlock(hs, &bd, excl);
-		if (loop < CFS_HASH_LOOP_HOG)
-			continue;
-		loop = 0;
-		cfs_hash_unlock(hs, 0);
-		cond_resched();
-		cfs_hash_lock(hs, 0);
-	}
- out:
-	cfs_hash_unlock(hs, 0);
-
-	cfs_hash_for_each_exit(hs);
-	return count;
-}
-
-struct cfs_hash_cond_arg {
-	cfs_hash_cond_opt_cb_t	func;
-	void			*arg;
-};
-
-static int
-cfs_hash_cond_del_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd,
-			 struct hlist_node *hnode, void *data)
-{
-	struct cfs_hash_cond_arg *cond = data;
-
-	if (cond->func(cfs_hash_object(hs, hnode), cond->arg))
-		cfs_hash_bd_del_locked(hs, bd, hnode);
-	return 0;
-}
-
-/**
- * Delete item from the libcfs hash @hs when @func return true.
- * The write lock being hold during loop for each bucket to avoid
- * any object be reference.
- */
-void
-cfs_hash_cond_del(struct cfs_hash *hs, cfs_hash_cond_opt_cb_t func, void *data)
-{
-	struct cfs_hash_cond_arg arg = {
-		.func	= func,
-		.arg	= data,
-	};
-
-	cfs_hash_for_each_tight(hs, cfs_hash_cond_del_locked, &arg, 1);
-}
-EXPORT_SYMBOL(cfs_hash_cond_del);
-
-void
-cfs_hash_for_each(struct cfs_hash *hs, cfs_hash_for_each_cb_t func,
-		  void *data)
-{
-	cfs_hash_for_each_tight(hs, func, data, 0);
-}
-EXPORT_SYMBOL(cfs_hash_for_each);
-
-void
-cfs_hash_for_each_safe(struct cfs_hash *hs, cfs_hash_for_each_cb_t func,
-		       void *data)
-{
-	cfs_hash_for_each_tight(hs, func, data, 1);
-}
-EXPORT_SYMBOL(cfs_hash_for_each_safe);
-
-static int
-cfs_hash_peek(struct cfs_hash *hs, struct cfs_hash_bd *bd,
-	      struct hlist_node *hnode, void *data)
-{
-	*(int *)data = 0;
-	return 1; /* return 1 to break the loop */
-}
-
-int
-cfs_hash_is_empty(struct cfs_hash *hs)
-{
-	int empty = 1;
-
-	cfs_hash_for_each_tight(hs, cfs_hash_peek, &empty, 0);
-	return empty;
-}
-EXPORT_SYMBOL(cfs_hash_is_empty);
-
-u64
-cfs_hash_size_get(struct cfs_hash *hs)
-{
-	return cfs_hash_with_counter(hs) ?
-	       atomic_read(&hs->hs_count) :
-	       cfs_hash_for_each_tight(hs, NULL, NULL, 0);
-}
-EXPORT_SYMBOL(cfs_hash_size_get);
-
-/*
- * cfs_hash_for_each_relax:
- * Iterate the hash table and call @func on each item without
- * any lock. This function can't guarantee to finish iteration
- * if these features are enabled:
- *
- *  a. if rehash_key is enabled, an item can be moved from
- *     one bucket to another bucket
- *  b. user can remove non-zero-ref item from hash-table,
- *     so the item can be removed from hash-table, even worse,
- *     it's possible that user changed key and insert to another
- *     hash bucket.
- * there's no way for us to finish iteration correctly on previous
- * two cases, so iteration has to be stopped on change.
- */
-static int
-cfs_hash_for_each_relax(struct cfs_hash *hs, cfs_hash_for_each_cb_t func,
-			void *data, int start)
-{
-	struct hlist_node *next = NULL;
-	struct hlist_node *hnode;
-	struct cfs_hash_bd bd;
-	u32 version;
-	int count = 0;
-	int stop_on_change;
-	int has_put_locked;
-	int end = -1;
-	int rc = 0;
-	int i;
-
-	stop_on_change = cfs_hash_with_rehash_key(hs) ||
-			 !cfs_hash_with_no_itemref(hs);
-	has_put_locked = hs->hs_ops->hs_put_locked != NULL;
-	cfs_hash_lock(hs, 0);
-again:
-	LASSERT(!cfs_hash_is_rehashing(hs));
-
-	cfs_hash_for_each_bucket(hs, &bd, i) {
-		struct hlist_head *hhead;
-
-		if (i < start)
-			continue;
-		else if (end > 0 && i >= end)
-			break;
-
-		cfs_hash_bd_lock(hs, &bd, 0);
-		version = cfs_hash_bd_version_get(&bd);
-
-		cfs_hash_bd_for_each_hlist(hs, &bd, hhead) {
-			hnode = hhead->first;
-			if (!hnode)
-				continue;
-			cfs_hash_get(hs, hnode);
-
-			for (; hnode; hnode = next) {
-				cfs_hash_bucket_validate(hs, &bd, hnode);
-				next = hnode->next;
-				if (next)
-					cfs_hash_get(hs, next);
-				cfs_hash_bd_unlock(hs, &bd, 0);
-				cfs_hash_unlock(hs, 0);
-
-				rc = func(hs, &bd, hnode, data);
-				if (stop_on_change || !has_put_locked)
-					cfs_hash_put(hs, hnode);
-				cond_resched();
-				count++;
-
-				cfs_hash_lock(hs, 0);
-				cfs_hash_bd_lock(hs, &bd, 0);
-				if (stop_on_change) {
-					if (version !=
-					    cfs_hash_bd_version_get(&bd))
-						rc = -EINTR;
-				} else if (has_put_locked) {
-					cfs_hash_put_locked(hs, hnode);
-				}
-				if (rc) /* callback wants to break iteration */
-					break;
-			}
-			if (next) {
-				if (has_put_locked) {
-					cfs_hash_put_locked(hs, next);
-					next = NULL;
-				}
-				break;
-			} else if (rc) {
-				break;
-			}
-		}
-		cfs_hash_bd_unlock(hs, &bd, 0);
-		if (next && !has_put_locked) {
-			cfs_hash_put(hs, next);
-			next = NULL;
-		}
-		if (rc) /* callback wants to break iteration */
-			break;
-	}
-	if (start > 0 && !rc) {
-		end = start;
-		start = 0;
-		goto again;
-	}
-
-	cfs_hash_unlock(hs, 0);
-	return count;
-}
-
-int
-cfs_hash_for_each_nolock(struct cfs_hash *hs, cfs_hash_for_each_cb_t func,
-			 void *data, int start)
-{
-	if (cfs_hash_with_no_lock(hs) ||
-	    cfs_hash_with_rehash_key(hs) ||
-	    !cfs_hash_with_no_itemref(hs))
-		return -EOPNOTSUPP;
-
-	if (!hs->hs_ops->hs_get ||
-	    (!hs->hs_ops->hs_put && !hs->hs_ops->hs_put_locked))
-		return -EOPNOTSUPP;
-
-	cfs_hash_for_each_enter(hs);
-	cfs_hash_for_each_relax(hs, func, data, start);
-	cfs_hash_for_each_exit(hs);
-
-	return 0;
-}
-EXPORT_SYMBOL(cfs_hash_for_each_nolock);
-
-/**
- * For each hash bucket in the libcfs hash @hs call the passed callback
- * @func until all the hash buckets are empty.  The passed callback @func
- * or the previously registered callback hs->hs_put must remove the item
- * from the hash.  You may either use the cfs_hash_del() or hlist_del()
- * functions.  No rwlocks will be held during the callback @func it is
- * safe to sleep if needed.  This function will not terminate until the
- * hash is empty.  Note it is still possible to concurrently add new
- * items in to the hash.  It is the callers responsibility to ensure
- * the required locking is in place to prevent concurrent insertions.
- */
-int
-cfs_hash_for_each_empty(struct cfs_hash *hs, cfs_hash_for_each_cb_t func,
-			void *data)
-{
-	unsigned int i = 0;
-
-	if (cfs_hash_with_no_lock(hs))
-		return -EOPNOTSUPP;
-
-	if (!hs->hs_ops->hs_get ||
-	    (!hs->hs_ops->hs_put && !hs->hs_ops->hs_put_locked))
-		return -EOPNOTSUPP;
-
-	cfs_hash_for_each_enter(hs);
-	while (cfs_hash_for_each_relax(hs, func, data, 0)) {
-		CDEBUG(D_INFO, "Try to empty hash: %s, loop: %u\n",
-		       hs->hs_name, i++);
-	}
-	cfs_hash_for_each_exit(hs);
-	return 0;
-}
-EXPORT_SYMBOL(cfs_hash_for_each_empty);
-
-void
-cfs_hash_hlist_for_each(struct cfs_hash *hs, unsigned int hindex,
-			cfs_hash_for_each_cb_t func, void *data)
-{
-	struct hlist_head *hhead;
-	struct hlist_node *hnode;
-	struct cfs_hash_bd bd;
-
-	cfs_hash_for_each_enter(hs);
-	cfs_hash_lock(hs, 0);
-	if (hindex >= CFS_HASH_NHLIST(hs))
-		goto out;
-
-	cfs_hash_bd_index_set(hs, hindex, &bd);
-
-	cfs_hash_bd_lock(hs, &bd, 0);
-	hhead = cfs_hash_bd_hhead(hs, &bd);
-	hlist_for_each(hnode, hhead) {
-		if (func(hs, &bd, hnode, data))
-			break;
-	}
-	cfs_hash_bd_unlock(hs, &bd, 0);
-out:
-	cfs_hash_unlock(hs, 0);
-	cfs_hash_for_each_exit(hs);
-}
-EXPORT_SYMBOL(cfs_hash_hlist_for_each);
-
-/*
- * For each item in the libcfs hash @hs which matches the @key call
- * the passed callback @func and pass to it as an argument each hash
- * item and the private @data. During the callback the bucket lock
- * is held so the callback must never sleep.
- */
-void
-cfs_hash_for_each_key(struct cfs_hash *hs, const void *key,
-		      cfs_hash_for_each_cb_t func, void *data)
-{
-	struct hlist_node *hnode;
-	struct cfs_hash_bd bds[2];
-	unsigned int i;
-
-	cfs_hash_lock(hs, 0);
-
-	cfs_hash_dual_bd_get_and_lock(hs, key, bds, 0);
-
-	cfs_hash_for_each_bd(bds, 2, i) {
-		struct hlist_head *hlist = cfs_hash_bd_hhead(hs, &bds[i]);
-
-		hlist_for_each(hnode, hlist) {
-			cfs_hash_bucket_validate(hs, &bds[i], hnode);
-
-			if (cfs_hash_keycmp(hs, key, hnode)) {
-				if (func(hs, &bds[i], hnode, data))
-					break;
-			}
-		}
-	}
-
-	cfs_hash_dual_bd_unlock(hs, bds, 0);
-	cfs_hash_unlock(hs, 0);
-}
-EXPORT_SYMBOL(cfs_hash_for_each_key);
-
-/**
- * Rehash the libcfs hash @hs to the given @bits.  This can be used
- * to grow the hash size when excessive chaining is detected, or to
- * shrink the hash when it is larger than needed.  When the CFS_HASH_REHASH
- * flag is set in @hs the libcfs hash may be dynamically rehashed
- * during addition or removal if the hash's theta value exceeds
- * either the hs->hs_min_theta or hs->max_theta values.  By default
- * these values are tuned to keep the chained hash depth small, and
- * this approach assumes a reasonably uniform hashing function.  The
- * theta thresholds for @hs are tunable via cfs_hash_set_theta().
- */
-void
-cfs_hash_rehash_cancel(struct cfs_hash *hs)
-{
-	LASSERT(cfs_hash_with_rehash(hs));
-	cancel_work_sync(&hs->hs_rehash_work);
-}
-
-void
-cfs_hash_rehash(struct cfs_hash *hs, int do_rehash)
-{
-	int rc;
-
-	LASSERT(cfs_hash_with_rehash(hs) && !cfs_hash_with_no_lock(hs));
-
-	cfs_hash_lock(hs, 1);
-
-	rc = cfs_hash_rehash_bits(hs);
-	if (rc <= 0) {
-		cfs_hash_unlock(hs, 1);
-		return;
-	}
-
-	hs->hs_rehash_bits = rc;
-	if (!do_rehash) {
-		/* launch and return */
-		queue_work(cfs_rehash_wq, &hs->hs_rehash_work);
-		cfs_hash_unlock(hs, 1);
-		return;
-	}
-
-	/* rehash right now */
-	cfs_hash_unlock(hs, 1);
-
-	cfs_hash_rehash_worker(&hs->hs_rehash_work);
-}
-
-static int
-cfs_hash_rehash_bd(struct cfs_hash *hs, struct cfs_hash_bd *old)
-{
-	struct cfs_hash_bd new;
-	struct hlist_head *hhead;
-	struct hlist_node *hnode;
-	struct hlist_node *pos;
-	void *key;
-	int c = 0;
-
-	/* hold cfs_hash_lock(hs, 1), so don't need any bucket lock */
-	cfs_hash_bd_for_each_hlist(hs, old, hhead) {
-		hlist_for_each_safe(hnode, pos, hhead) {
-			key = cfs_hash_key(hs, hnode);
-			LASSERT(key);
-			/* Validate hnode is in the correct bucket. */
-			cfs_hash_bucket_validate(hs, old, hnode);
-			/*
-			 * Delete from old hash bucket; move to new bucket.
-			 * ops->hs_key must be defined.
-			 */
-			cfs_hash_bd_from_key(hs, hs->hs_rehash_buckets,
-					     hs->hs_rehash_bits, key, &new);
-			cfs_hash_bd_move_locked(hs, old, &new, hnode);
-			c++;
-		}
-	}
-
-	return c;
-}
-
-static void
-cfs_hash_rehash_worker(struct work_struct *work)
-{
-	struct cfs_hash *hs = container_of(work, struct cfs_hash, hs_rehash_work);
-	struct cfs_hash_bucket **bkts;
-	struct cfs_hash_bd bd;
-	unsigned int old_size;
-	unsigned int new_size;
-	int bsize;
-	int count = 0;
-	int rc = 0;
-	int i;
-
-	LASSERT(hs && cfs_hash_with_rehash(hs));
-
-	cfs_hash_lock(hs, 0);
-	LASSERT(cfs_hash_is_rehashing(hs));
-
-	old_size = CFS_HASH_NBKT(hs);
-	new_size = CFS_HASH_RH_NBKT(hs);
-
-	cfs_hash_unlock(hs, 0);
-
-	/*
-	 * don't need hs::hs_rwlock for hs::hs_buckets,
-	 * because nobody can change bkt-table except me.
-	 */
-	bkts = cfs_hash_buckets_realloc(hs, hs->hs_buckets,
-					old_size, new_size);
-	cfs_hash_lock(hs, 1);
-	if (!bkts) {
-		rc = -ENOMEM;
-		goto out;
-	}
-
-	if (bkts == hs->hs_buckets) {
-		bkts = NULL; /* do nothing */
-		goto out;
-	}
-
-	rc = __cfs_hash_theta(hs);
-	if ((rc >= hs->hs_min_theta) && (rc <= hs->hs_max_theta)) {
-		/* free the new allocated bkt-table */
-		old_size = new_size;
-		new_size = CFS_HASH_NBKT(hs);
-		rc = -EALREADY;
-		goto out;
-	}
-
-	LASSERT(!hs->hs_rehash_buckets);
-	hs->hs_rehash_buckets = bkts;
-
-	rc = 0;
-	cfs_hash_for_each_bucket(hs, &bd, i) {
-		if (cfs_hash_is_exiting(hs)) {
-			rc = -ESRCH;
-			/* someone wants to destroy the hash, abort now */
-			if (old_size < new_size) /* OK to free old bkt-table */
-				break;
-			/* it's shrinking, need free new bkt-table */
-			hs->hs_rehash_buckets = NULL;
-			old_size = new_size;
-			new_size = CFS_HASH_NBKT(hs);
-			goto out;
-		}
-
-		count += cfs_hash_rehash_bd(hs, &bd);
-		if (count < CFS_HASH_LOOP_HOG ||
-		    cfs_hash_is_iterating(hs)) { /* need to finish ASAP */
-			continue;
-		}
-
-		count = 0;
-		cfs_hash_unlock(hs, 1);
-		cond_resched();
-		cfs_hash_lock(hs, 1);
-	}
-
-	hs->hs_rehash_count++;
-
-	bkts = hs->hs_buckets;
-	hs->hs_buckets = hs->hs_rehash_buckets;
-	hs->hs_rehash_buckets = NULL;
-
-	hs->hs_cur_bits = hs->hs_rehash_bits;
-out:
-	hs->hs_rehash_bits = 0;
-	bsize = cfs_hash_bkt_size(hs);
-	cfs_hash_unlock(hs, 1);
-	/* can't refer to @hs anymore because it could be destroyed */
-	if (bkts)
-		cfs_hash_buckets_free(bkts, bsize, new_size, old_size);
-	if (rc)
-		CDEBUG(D_INFO, "early quit of rehashing: %d\n", rc);
-}
-
-/**
- * Rehash the object referenced by @hnode in the libcfs hash @hs.  The
- * @old_key must be provided to locate the objects previous location
- * in the hash, and the @new_key will be used to reinsert the object.
- * Use this function instead of a cfs_hash_add() + cfs_hash_del()
- * combo when it is critical that there is no window in time where the
- * object is missing from the hash.  When an object is being rehashed
- * the registered cfs_hash_get() and cfs_hash_put() functions will
- * not be called.
- */
-void cfs_hash_rehash_key(struct cfs_hash *hs, const void *old_key,
-			 void *new_key, struct hlist_node *hnode)
-{
-	struct cfs_hash_bd bds[3];
-	struct cfs_hash_bd old_bds[2];
-	struct cfs_hash_bd new_bd;
-
-	LASSERT(!hlist_unhashed(hnode));
-
-	cfs_hash_lock(hs, 0);
-
-	cfs_hash_dual_bd_get(hs, old_key, old_bds);
-	cfs_hash_bd_get(hs, new_key, &new_bd);
-
-	bds[0] = old_bds[0];
-	bds[1] = old_bds[1];
-	bds[2] = new_bd;
-
-	/* NB: bds[0] and bds[1] are ordered already */
-	cfs_hash_bd_order(&bds[1], &bds[2]);
-	cfs_hash_bd_order(&bds[0], &bds[1]);
-
-	cfs_hash_multi_bd_lock(hs, bds, 3, 1);
-	if (likely(!old_bds[1].bd_bucket)) {
-		cfs_hash_bd_move_locked(hs, &old_bds[0], &new_bd, hnode);
-	} else {
-		cfs_hash_dual_bd_finddel_locked(hs, old_bds, old_key, hnode);
-		cfs_hash_bd_add_locked(hs, &new_bd, hnode);
-	}
-	/* overwrite key inside locks, otherwise may screw up with
-	 * other operations, i.e: rehash
-	 */
-	cfs_hash_keycpy(hs, hnode, new_key);
-
-	cfs_hash_multi_bd_unlock(hs, bds, 3, 1);
-	cfs_hash_unlock(hs, 0);
-}
-EXPORT_SYMBOL(cfs_hash_rehash_key);
-
-void cfs_hash_debug_header(struct seq_file *m)
-{
-	seq_printf(m, "%-*s   cur   min   max theta t-min t-max flags rehash   count  maxdep maxdepb distribution\n",
-		   CFS_HASH_BIGNAME_LEN, "name");
-}
-EXPORT_SYMBOL(cfs_hash_debug_header);
-
-static struct cfs_hash_bucket **
-cfs_hash_full_bkts(struct cfs_hash *hs)
-{
-	/* NB: caller should hold hs->hs_rwlock if REHASH is set */
-	if (!hs->hs_rehash_buckets)
-		return hs->hs_buckets;
-
-	LASSERT(hs->hs_rehash_bits);
-	return hs->hs_rehash_bits > hs->hs_cur_bits ?
-	       hs->hs_rehash_buckets : hs->hs_buckets;
-}
-
-static unsigned int
-cfs_hash_full_nbkt(struct cfs_hash *hs)
-{
-	/* NB: caller should hold hs->hs_rwlock if REHASH is set */
-	if (!hs->hs_rehash_buckets)
-		return CFS_HASH_NBKT(hs);
-
-	LASSERT(hs->hs_rehash_bits);
-	return hs->hs_rehash_bits > hs->hs_cur_bits ?
-	       CFS_HASH_RH_NBKT(hs) : CFS_HASH_NBKT(hs);
-}
-
-void cfs_hash_debug_str(struct cfs_hash *hs, struct seq_file *m)
-{
-	int dist[8] = { 0, };
-	int maxdep = -1;
-	int maxdepb = -1;
-	int total = 0;
-	int theta;
-	int i;
-
-	cfs_hash_lock(hs, 0);
-	theta = __cfs_hash_theta(hs);
-
-	seq_printf(m, "%-*s %5d %5d %5d %d.%03d %d.%03d %d.%03d  0x%02x %6d ",
-		   CFS_HASH_BIGNAME_LEN, hs->hs_name,
-		   1 << hs->hs_cur_bits, 1 << hs->hs_min_bits,
-		   1 << hs->hs_max_bits,
-		   __cfs_hash_theta_int(theta), __cfs_hash_theta_frac(theta),
-		   __cfs_hash_theta_int(hs->hs_min_theta),
-		   __cfs_hash_theta_frac(hs->hs_min_theta),
-		   __cfs_hash_theta_int(hs->hs_max_theta),
-		   __cfs_hash_theta_frac(hs->hs_max_theta),
-		   hs->hs_flags, hs->hs_rehash_count);
-
-	/*
-	 * The distribution is a summary of the chained hash depth in
-	 * each of the libcfs hash buckets.  Each buckets hsb_count is
-	 * divided by the hash theta value and used to generate a
-	 * histogram of the hash distribution.  A uniform hash will
-	 * result in all hash buckets being close to the average thus
-	 * only the first few entries in the histogram will be non-zero.
-	 * If you hash function results in a non-uniform hash the will
-	 * be observable by outlier bucks in the distribution histogram.
-	 *
-	 * Uniform hash distribution:		128/128/0/0/0/0/0/0
-	 * Non-Uniform hash distribution:	128/125/0/0/0/0/2/1
-	 */
-	for (i = 0; i < cfs_hash_full_nbkt(hs); i++) {
-		struct cfs_hash_bd bd;
-
-		bd.bd_bucket = cfs_hash_full_bkts(hs)[i];
-		cfs_hash_bd_lock(hs, &bd, 0);
-		if (maxdep < bd.bd_bucket->hsb_depmax) {
-			maxdep  = bd.bd_bucket->hsb_depmax;
-			maxdepb = ffz(~maxdep);
-		}
-		total += bd.bd_bucket->hsb_count;
-		dist[min(fls(bd.bd_bucket->hsb_count / max(theta, 1)), 7)]++;
-		cfs_hash_bd_unlock(hs, &bd, 0);
-	}
-
-	seq_printf(m, "%7d %7d %7d ", total, maxdep, maxdepb);
-	for (i = 0; i < 8; i++)
-		seq_printf(m, "%d%c",  dist[i], (i == 7) ? '\n' : '/');
-
-	cfs_hash_unlock(hs, 0);
-}
-EXPORT_SYMBOL(cfs_hash_debug_str);

+ 0 - 1086
drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c

@@ -1,1086 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * Please see comments in libcfs/include/libcfs/libcfs_cpu.h for introduction
- *
- * Author: liang@whamcloud.com
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include <linux/cpu.h>
-#include <linux/sched.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/cache.h>
-
-#include <linux/libcfs/libcfs_cpu.h>
-#include <linux/libcfs/libcfs_string.h>
-#include <linux/libcfs/libcfs.h>
-
-/** Global CPU partition table */
-struct cfs_cpt_table   *cfs_cpt_tab __read_mostly;
-EXPORT_SYMBOL(cfs_cpt_tab);
-
-/**
- * modparam for setting number of partitions
- *
- *  0 : estimate best value based on cores or NUMA nodes
- *  1 : disable multiple partitions
- * >1 : specify number of partitions
- */
-static int	cpu_npartitions;
-module_param(cpu_npartitions, int, 0444);
-MODULE_PARM_DESC(cpu_npartitions, "# of CPU partitions");
-
-/**
- * modparam for setting CPU partitions patterns:
- *
- * i.e: "0[0,1,2,3] 1[4,5,6,7]", number before bracket is CPU partition ID,
- *      number in bracket is processor ID (core or HT)
- *
- * i.e: "N 0[0,1] 1[2,3]" the first character 'N' means numbers in bracket
- *       are NUMA node ID, number before bracket is CPU partition ID.
- *
- * i.e: "N", shortcut expression to create CPT from NUMA & CPU topology
- *
- * NB: If user specified cpu_pattern, cpu_npartitions will be ignored
- */
-static char	*cpu_pattern = "N";
-module_param(cpu_pattern, charp, 0444);
-MODULE_PARM_DESC(cpu_pattern, "CPU partitions pattern");
-
-static struct cfs_cpt_data {
-	/* serialize hotplug etc */
-	spinlock_t		cpt_lock;
-	/* reserved for hotplug */
-	unsigned long		cpt_version;
-	/* mutex to protect cpt_cpumask */
-	struct mutex		cpt_mutex;
-	/* scratch buffer for set/unset_node */
-	cpumask_var_t		cpt_cpumask;
-} cpt_data;
-
-#define CFS_CPU_VERSION_MAGIC	   0xbabecafe
-
-struct cfs_cpt_table *
-cfs_cpt_table_alloc(unsigned int ncpt)
-{
-	struct cfs_cpt_table *cptab;
-	int i;
-
-	cptab = kzalloc(sizeof(*cptab), GFP_NOFS);
-	if (!cptab)
-		return NULL;
-
-	cptab->ctb_nparts = ncpt;
-
-	cptab->ctb_nodemask = kzalloc(sizeof(*cptab->ctb_nodemask),
-				      GFP_NOFS);
-	if (!zalloc_cpumask_var(&cptab->ctb_cpumask, GFP_NOFS) ||
-	    !cptab->ctb_nodemask)
-		goto failed;
-
-	cptab->ctb_cpu2cpt = kvmalloc_array(num_possible_cpus(),
-					    sizeof(cptab->ctb_cpu2cpt[0]),
-					    GFP_KERNEL);
-	if (!cptab->ctb_cpu2cpt)
-		goto failed;
-
-	memset(cptab->ctb_cpu2cpt, -1,
-	       num_possible_cpus() * sizeof(cptab->ctb_cpu2cpt[0]));
-
-	cptab->ctb_parts = kvmalloc_array(ncpt, sizeof(cptab->ctb_parts[0]),
-					  GFP_KERNEL);
-	if (!cptab->ctb_parts)
-		goto failed;
-
-	for (i = 0; i < ncpt; i++) {
-		struct cfs_cpu_partition *part = &cptab->ctb_parts[i];
-
-		part->cpt_nodemask = kzalloc(sizeof(*part->cpt_nodemask),
-					     GFP_NOFS);
-		if (!zalloc_cpumask_var(&part->cpt_cpumask, GFP_NOFS) ||
-		    !part->cpt_nodemask)
-			goto failed;
-	}
-
-	spin_lock(&cpt_data.cpt_lock);
-	/* Reserved for hotplug */
-	cptab->ctb_version = cpt_data.cpt_version;
-	spin_unlock(&cpt_data.cpt_lock);
-
-	return cptab;
-
- failed:
-	cfs_cpt_table_free(cptab);
-	return NULL;
-}
-EXPORT_SYMBOL(cfs_cpt_table_alloc);
-
-void
-cfs_cpt_table_free(struct cfs_cpt_table *cptab)
-{
-	int i;
-
-	kvfree(cptab->ctb_cpu2cpt);
-
-	for (i = 0; cptab->ctb_parts && i < cptab->ctb_nparts; i++) {
-		struct cfs_cpu_partition *part = &cptab->ctb_parts[i];
-
-		kfree(part->cpt_nodemask);
-		free_cpumask_var(part->cpt_cpumask);
-	}
-
-	kvfree(cptab->ctb_parts);
-
-	kfree(cptab->ctb_nodemask);
-	free_cpumask_var(cptab->ctb_cpumask);
-
-	kfree(cptab);
-}
-EXPORT_SYMBOL(cfs_cpt_table_free);
-
-int
-cfs_cpt_table_print(struct cfs_cpt_table *cptab, char *buf, int len)
-{
-	char *tmp = buf;
-	int rc = 0;
-	int i;
-	int j;
-
-	for (i = 0; i < cptab->ctb_nparts; i++) {
-		if (len > 0) {
-			rc = snprintf(tmp, len, "%d\t: ", i);
-			len -= rc;
-		}
-
-		if (len <= 0) {
-			rc = -EFBIG;
-			goto out;
-		}
-
-		tmp += rc;
-		for_each_cpu(j, cptab->ctb_parts[i].cpt_cpumask) {
-			rc = snprintf(tmp, len, "%d ", j);
-			len -= rc;
-			if (len <= 0) {
-				rc = -EFBIG;
-				goto out;
-			}
-			tmp += rc;
-		}
-
-		*tmp = '\n';
-		tmp++;
-		len--;
-	}
-
- out:
-	if (rc < 0)
-		return rc;
-
-	return tmp - buf;
-}
-EXPORT_SYMBOL(cfs_cpt_table_print);
-
-static void
-cfs_node_to_cpumask(int node, cpumask_t *mask)
-{
-	const cpumask_t *tmp = cpumask_of_node(node);
-
-	if (tmp)
-		cpumask_copy(mask, tmp);
-	else
-		cpumask_clear(mask);
-}
-
-int
-cfs_cpt_number(struct cfs_cpt_table *cptab)
-{
-	return cptab->ctb_nparts;
-}
-EXPORT_SYMBOL(cfs_cpt_number);
-
-int
-cfs_cpt_weight(struct cfs_cpt_table *cptab, int cpt)
-{
-	LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
-
-	return cpt == CFS_CPT_ANY ?
-	       cpumask_weight(cptab->ctb_cpumask) :
-	       cpumask_weight(cptab->ctb_parts[cpt].cpt_cpumask);
-}
-EXPORT_SYMBOL(cfs_cpt_weight);
-
-int
-cfs_cpt_online(struct cfs_cpt_table *cptab, int cpt)
-{
-	LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
-
-	return cpt == CFS_CPT_ANY ?
-	       cpumask_any_and(cptab->ctb_cpumask,
-			       cpu_online_mask) < nr_cpu_ids :
-	       cpumask_any_and(cptab->ctb_parts[cpt].cpt_cpumask,
-			       cpu_online_mask) < nr_cpu_ids;
-}
-EXPORT_SYMBOL(cfs_cpt_online);
-
-cpumask_var_t *
-cfs_cpt_cpumask(struct cfs_cpt_table *cptab, int cpt)
-{
-	LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
-
-	return cpt == CFS_CPT_ANY ?
-	       &cptab->ctb_cpumask : &cptab->ctb_parts[cpt].cpt_cpumask;
-}
-EXPORT_SYMBOL(cfs_cpt_cpumask);
-
-nodemask_t *
-cfs_cpt_nodemask(struct cfs_cpt_table *cptab, int cpt)
-{
-	LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
-
-	return cpt == CFS_CPT_ANY ?
-	       cptab->ctb_nodemask : cptab->ctb_parts[cpt].cpt_nodemask;
-}
-EXPORT_SYMBOL(cfs_cpt_nodemask);
-
-int
-cfs_cpt_set_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu)
-{
-	int node;
-
-	LASSERT(cpt >= 0 && cpt < cptab->ctb_nparts);
-
-	if (cpu < 0 || cpu >= nr_cpu_ids || !cpu_online(cpu)) {
-		CDEBUG(D_INFO, "CPU %d is invalid or it's offline\n", cpu);
-		return 0;
-	}
-
-	if (cptab->ctb_cpu2cpt[cpu] != -1) {
-		CDEBUG(D_INFO, "CPU %d is already in partition %d\n",
-		       cpu, cptab->ctb_cpu2cpt[cpu]);
-		return 0;
-	}
-
-	cptab->ctb_cpu2cpt[cpu] = cpt;
-
-	LASSERT(!cpumask_test_cpu(cpu, cptab->ctb_cpumask));
-	LASSERT(!cpumask_test_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask));
-
-	cpumask_set_cpu(cpu, cptab->ctb_cpumask);
-	cpumask_set_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask);
-
-	node = cpu_to_node(cpu);
-
-	/* first CPU of @node in this CPT table */
-	if (!node_isset(node, *cptab->ctb_nodemask))
-		node_set(node, *cptab->ctb_nodemask);
-
-	/* first CPU of @node in this partition */
-	if (!node_isset(node, *cptab->ctb_parts[cpt].cpt_nodemask))
-		node_set(node, *cptab->ctb_parts[cpt].cpt_nodemask);
-
-	return 1;
-}
-EXPORT_SYMBOL(cfs_cpt_set_cpu);
-
-void
-cfs_cpt_unset_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu)
-{
-	int node;
-	int i;
-
-	LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
-
-	if (cpu < 0 || cpu >= nr_cpu_ids) {
-		CDEBUG(D_INFO, "Invalid CPU id %d\n", cpu);
-		return;
-	}
-
-	if (cpt == CFS_CPT_ANY) {
-		/* caller doesn't know the partition ID */
-		cpt = cptab->ctb_cpu2cpt[cpu];
-		if (cpt < 0) { /* not set in this CPT-table */
-			CDEBUG(D_INFO, "Try to unset cpu %d which is not in CPT-table %p\n",
-			       cpt, cptab);
-			return;
-		}
-
-	} else if (cpt != cptab->ctb_cpu2cpt[cpu]) {
-		CDEBUG(D_INFO,
-		       "CPU %d is not in cpu-partition %d\n", cpu, cpt);
-		return;
-	}
-
-	LASSERT(cpumask_test_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask));
-	LASSERT(cpumask_test_cpu(cpu, cptab->ctb_cpumask));
-
-	cpumask_clear_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask);
-	cpumask_clear_cpu(cpu, cptab->ctb_cpumask);
-	cptab->ctb_cpu2cpt[cpu] = -1;
-
-	node = cpu_to_node(cpu);
-
-	LASSERT(node_isset(node, *cptab->ctb_parts[cpt].cpt_nodemask));
-	LASSERT(node_isset(node, *cptab->ctb_nodemask));
-
-	for_each_cpu(i, cptab->ctb_parts[cpt].cpt_cpumask) {
-		/* this CPT has other CPU belonging to this node? */
-		if (cpu_to_node(i) == node)
-			break;
-	}
-
-	if (i >= nr_cpu_ids)
-		node_clear(node, *cptab->ctb_parts[cpt].cpt_nodemask);
-
-	for_each_cpu(i, cptab->ctb_cpumask) {
-		/* this CPT-table has other CPU belonging to this node? */
-		if (cpu_to_node(i) == node)
-			break;
-	}
-
-	if (i >= nr_cpu_ids)
-		node_clear(node, *cptab->ctb_nodemask);
-}
-EXPORT_SYMBOL(cfs_cpt_unset_cpu);
-
-int
-cfs_cpt_set_cpumask(struct cfs_cpt_table *cptab, int cpt, cpumask_t *mask)
-{
-	int i;
-
-	if (!cpumask_weight(mask) ||
-	    cpumask_any_and(mask, cpu_online_mask) >= nr_cpu_ids) {
-		CDEBUG(D_INFO, "No online CPU is found in the CPU mask for CPU partition %d\n",
-		       cpt);
-		return 0;
-	}
-
-	for_each_cpu(i, mask) {
-		if (!cfs_cpt_set_cpu(cptab, cpt, i))
-			return 0;
-	}
-
-	return 1;
-}
-EXPORT_SYMBOL(cfs_cpt_set_cpumask);
-
-void
-cfs_cpt_unset_cpumask(struct cfs_cpt_table *cptab, int cpt, cpumask_t *mask)
-{
-	int i;
-
-	for_each_cpu(i, mask)
-		cfs_cpt_unset_cpu(cptab, cpt, i);
-}
-EXPORT_SYMBOL(cfs_cpt_unset_cpumask);
-
-int
-cfs_cpt_set_node(struct cfs_cpt_table *cptab, int cpt, int node)
-{
-	int rc;
-
-	if (node < 0 || node >= MAX_NUMNODES) {
-		CDEBUG(D_INFO,
-		       "Invalid NUMA id %d for CPU partition %d\n", node, cpt);
-		return 0;
-	}
-
-	mutex_lock(&cpt_data.cpt_mutex);
-
-	cfs_node_to_cpumask(node, cpt_data.cpt_cpumask);
-
-	rc = cfs_cpt_set_cpumask(cptab, cpt, cpt_data.cpt_cpumask);
-
-	mutex_unlock(&cpt_data.cpt_mutex);
-
-	return rc;
-}
-EXPORT_SYMBOL(cfs_cpt_set_node);
-
-void
-cfs_cpt_unset_node(struct cfs_cpt_table *cptab, int cpt, int node)
-{
-	if (node < 0 || node >= MAX_NUMNODES) {
-		CDEBUG(D_INFO,
-		       "Invalid NUMA id %d for CPU partition %d\n", node, cpt);
-		return;
-	}
-
-	mutex_lock(&cpt_data.cpt_mutex);
-
-	cfs_node_to_cpumask(node, cpt_data.cpt_cpumask);
-
-	cfs_cpt_unset_cpumask(cptab, cpt, cpt_data.cpt_cpumask);
-
-	mutex_unlock(&cpt_data.cpt_mutex);
-}
-EXPORT_SYMBOL(cfs_cpt_unset_node);
-
-int
-cfs_cpt_set_nodemask(struct cfs_cpt_table *cptab, int cpt, nodemask_t *mask)
-{
-	int i;
-
-	for_each_node_mask(i, *mask) {
-		if (!cfs_cpt_set_node(cptab, cpt, i))
-			return 0;
-	}
-
-	return 1;
-}
-EXPORT_SYMBOL(cfs_cpt_set_nodemask);
-
-void
-cfs_cpt_unset_nodemask(struct cfs_cpt_table *cptab, int cpt, nodemask_t *mask)
-{
-	int i;
-
-	for_each_node_mask(i, *mask)
-		cfs_cpt_unset_node(cptab, cpt, i);
-}
-EXPORT_SYMBOL(cfs_cpt_unset_nodemask);
-
-void
-cfs_cpt_clear(struct cfs_cpt_table *cptab, int cpt)
-{
-	int last;
-	int i;
-
-	if (cpt == CFS_CPT_ANY) {
-		last = cptab->ctb_nparts - 1;
-		cpt = 0;
-	} else {
-		last = cpt;
-	}
-
-	for (; cpt <= last; cpt++) {
-		for_each_cpu(i, cptab->ctb_parts[cpt].cpt_cpumask)
-			cfs_cpt_unset_cpu(cptab, cpt, i);
-	}
-}
-EXPORT_SYMBOL(cfs_cpt_clear);
-
-int
-cfs_cpt_spread_node(struct cfs_cpt_table *cptab, int cpt)
-{
-	nodemask_t *mask;
-	int weight;
-	int rotor;
-	int node;
-
-	/* convert CPU partition ID to HW node id */
-
-	if (cpt < 0 || cpt >= cptab->ctb_nparts) {
-		mask = cptab->ctb_nodemask;
-		rotor = cptab->ctb_spread_rotor++;
-	} else {
-		mask = cptab->ctb_parts[cpt].cpt_nodemask;
-		rotor = cptab->ctb_parts[cpt].cpt_spread_rotor++;
-	}
-
-	weight = nodes_weight(*mask);
-	LASSERT(weight > 0);
-
-	rotor %= weight;
-
-	for_each_node_mask(node, *mask) {
-		if (!rotor--)
-			return node;
-	}
-
-	LBUG();
-	return 0;
-}
-EXPORT_SYMBOL(cfs_cpt_spread_node);
-
-int
-cfs_cpt_current(struct cfs_cpt_table *cptab, int remap)
-{
-	int cpu;
-	int cpt;
-
-	preempt_disable();
-	cpu = smp_processor_id();
-	cpt = cptab->ctb_cpu2cpt[cpu];
-
-	if (cpt < 0 && remap) {
-		/* don't return negative value for safety of upper layer,
-		 * instead we shadow the unknown cpu to a valid partition ID
-		 */
-		cpt = cpu % cptab->ctb_nparts;
-	}
-	preempt_enable();
-	return cpt;
-}
-EXPORT_SYMBOL(cfs_cpt_current);
-
-int
-cfs_cpt_of_cpu(struct cfs_cpt_table *cptab, int cpu)
-{
-	LASSERT(cpu >= 0 && cpu < nr_cpu_ids);
-
-	return cptab->ctb_cpu2cpt[cpu];
-}
-EXPORT_SYMBOL(cfs_cpt_of_cpu);
-
-int
-cfs_cpt_bind(struct cfs_cpt_table *cptab, int cpt)
-{
-	cpumask_var_t *cpumask;
-	nodemask_t *nodemask;
-	int rc;
-	int i;
-
-	LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
-
-	if (cpt == CFS_CPT_ANY) {
-		cpumask = &cptab->ctb_cpumask;
-		nodemask = cptab->ctb_nodemask;
-	} else {
-		cpumask = &cptab->ctb_parts[cpt].cpt_cpumask;
-		nodemask = cptab->ctb_parts[cpt].cpt_nodemask;
-	}
-
-	if (cpumask_any_and(*cpumask, cpu_online_mask) >= nr_cpu_ids) {
-		CERROR("No online CPU found in CPU partition %d, did someone do CPU hotplug on system? You might need to reload Lustre modules to keep system working well.\n",
-		       cpt);
-		return -EINVAL;
-	}
-
-	for_each_online_cpu(i) {
-		if (cpumask_test_cpu(i, *cpumask))
-			continue;
-
-		rc = set_cpus_allowed_ptr(current, *cpumask);
-		set_mems_allowed(*nodemask);
-		if (!rc)
-			schedule(); /* switch to allowed CPU */
-
-		return rc;
-	}
-
-	/* don't need to set affinity because all online CPUs are covered */
-	return 0;
-}
-EXPORT_SYMBOL(cfs_cpt_bind);
-
-/**
- * Choose max to \a number CPUs from \a node and set them in \a cpt.
- * We always prefer to choose CPU in the same core/socket.
- */
-static int
-cfs_cpt_choose_ncpus(struct cfs_cpt_table *cptab, int cpt,
-		     cpumask_t *node, int number)
-{
-	cpumask_var_t socket;
-	cpumask_var_t core;
-	int rc = 0;
-	int cpu;
-
-	LASSERT(number > 0);
-
-	if (number >= cpumask_weight(node)) {
-		while (!cpumask_empty(node)) {
-			cpu = cpumask_first(node);
-
-			rc = cfs_cpt_set_cpu(cptab, cpt, cpu);
-			if (!rc)
-				return -EINVAL;
-			cpumask_clear_cpu(cpu, node);
-		}
-		return 0;
-	}
-
-	/*
-	 * Allocate scratch buffers
-	 * As we cannot initialize a cpumask_var_t, we need
-	 * to alloc both before we can risk trying to free either
-	 */
-	if (!zalloc_cpumask_var(&socket, GFP_NOFS))
-		rc = -ENOMEM;
-	if (!zalloc_cpumask_var(&core, GFP_NOFS))
-		rc = -ENOMEM;
-	if (rc)
-		goto out;
-
-	while (!cpumask_empty(node)) {
-		cpu = cpumask_first(node);
-
-		/* get cpumask for cores in the same socket */
-		cpumask_copy(socket, topology_core_cpumask(cpu));
-		cpumask_and(socket, socket, node);
-
-		LASSERT(!cpumask_empty(socket));
-
-		while (!cpumask_empty(socket)) {
-			int i;
-
-			/* get cpumask for hts in the same core */
-			cpumask_copy(core, topology_sibling_cpumask(cpu));
-			cpumask_and(core, core, node);
-
-			LASSERT(!cpumask_empty(core));
-
-			for_each_cpu(i, core) {
-				cpumask_clear_cpu(i, socket);
-				cpumask_clear_cpu(i, node);
-
-				rc = cfs_cpt_set_cpu(cptab, cpt, i);
-				if (!rc) {
-					rc = -EINVAL;
-					goto out;
-				}
-
-				if (!--number)
-					goto out;
-			}
-			cpu = cpumask_first(socket);
-		}
-	}
-
-out:
-	free_cpumask_var(socket);
-	free_cpumask_var(core);
-	return rc;
-}
-
-#define CPT_WEIGHT_MIN  4u
-
-static unsigned int
-cfs_cpt_num_estimate(void)
-{
-	unsigned int nnode = num_online_nodes();
-	unsigned int ncpu = num_online_cpus();
-	unsigned int ncpt;
-
-	if (ncpu <= CPT_WEIGHT_MIN) {
-		ncpt = 1;
-		goto out;
-	}
-
-	/* generate reasonable number of CPU partitions based on total number
-	 * of CPUs, Preferred N should be power2 and match this condition:
-	 * 2 * (N - 1)^2 < NCPUS <= 2 * N^2
-	 */
-	for (ncpt = 2; ncpu > 2 * ncpt * ncpt; ncpt <<= 1)
-		;
-
-	if (ncpt <= nnode) { /* fat numa system */
-		while (nnode > ncpt)
-			nnode >>= 1;
-
-	} else { /* ncpt > nnode */
-		while ((nnode << 1) <= ncpt)
-			nnode <<= 1;
-	}
-
-	ncpt = nnode;
-
-out:
-#if (BITS_PER_LONG == 32)
-	/* config many CPU partitions on 32-bit system could consume
-	 * too much memory
-	 */
-	ncpt = min(2U, ncpt);
-#endif
-	while (ncpu % ncpt)
-		ncpt--; /* worst case is 1 */
-
-	return ncpt;
-}
-
-static struct cfs_cpt_table *
-cfs_cpt_table_create(int ncpt)
-{
-	struct cfs_cpt_table *cptab = NULL;
-	cpumask_var_t mask;
-	int cpt = 0;
-	int num;
-	int rc;
-	int i;
-
-	rc = cfs_cpt_num_estimate();
-	if (ncpt <= 0)
-		ncpt = rc;
-
-	if (ncpt > num_online_cpus() || ncpt > 4 * rc) {
-		CWARN("CPU partition number %d is larger than suggested value (%d), your system may have performance issue or run out of memory while under pressure\n",
-		      ncpt, rc);
-	}
-
-	if (num_online_cpus() % ncpt) {
-		CERROR("CPU number %d is not multiple of cpu_npartition %d, please try different cpu_npartitions value or set pattern string by cpu_pattern=STRING\n",
-		       (int)num_online_cpus(), ncpt);
-		goto failed;
-	}
-
-	cptab = cfs_cpt_table_alloc(ncpt);
-	if (!cptab) {
-		CERROR("Failed to allocate CPU map(%d)\n", ncpt);
-		goto failed;
-	}
-
-	num = num_online_cpus() / ncpt;
-	if (!num) {
-		CERROR("CPU changed while setting CPU partition\n");
-		goto failed;
-	}
-
-	if (!zalloc_cpumask_var(&mask, GFP_NOFS)) {
-		CERROR("Failed to allocate scratch cpumask\n");
-		goto failed;
-	}
-
-	for_each_online_node(i) {
-		cfs_node_to_cpumask(i, mask);
-
-		while (!cpumask_empty(mask)) {
-			struct cfs_cpu_partition *part;
-			int n;
-
-			/*
-			 * Each emulated NUMA node has all allowed CPUs in
-			 * the mask.
-			 * End loop when all partitions have assigned CPUs.
-			 */
-			if (cpt == ncpt)
-				break;
-
-			part = &cptab->ctb_parts[cpt];
-
-			n = num - cpumask_weight(part->cpt_cpumask);
-			LASSERT(n > 0);
-
-			rc = cfs_cpt_choose_ncpus(cptab, cpt, mask, n);
-			if (rc < 0)
-				goto failed_mask;
-
-			LASSERT(num >= cpumask_weight(part->cpt_cpumask));
-			if (num == cpumask_weight(part->cpt_cpumask))
-				cpt++;
-		}
-	}
-
-	if (cpt != ncpt ||
-	    num != cpumask_weight(cptab->ctb_parts[ncpt - 1].cpt_cpumask)) {
-		CERROR("Expect %d(%d) CPU partitions but got %d(%d), CPU hotplug/unplug while setting?\n",
-		       cptab->ctb_nparts, num, cpt,
-		       cpumask_weight(cptab->ctb_parts[ncpt - 1].cpt_cpumask));
-		goto failed_mask;
-	}
-
-	free_cpumask_var(mask);
-
-	return cptab;
-
- failed_mask:
-	free_cpumask_var(mask);
- failed:
-	CERROR("Failed to setup CPU-partition-table with %d CPU-partitions, online HW nodes: %d, HW cpus: %d.\n",
-	       ncpt, num_online_nodes(), num_online_cpus());
-
-	if (cptab)
-		cfs_cpt_table_free(cptab);
-
-	return NULL;
-}
-
-static struct cfs_cpt_table *
-cfs_cpt_table_create_pattern(char *pattern)
-{
-	struct cfs_cpt_table *cptab;
-	char *str;
-	int node = 0;
-	int high;
-	int ncpt = 0;
-	int cpt;
-	int rc;
-	int c;
-	int i;
-
-	str = strim(pattern);
-	if (*str == 'n' || *str == 'N') {
-		pattern = str + 1;
-		if (*pattern != '\0') {
-			node = 1;
-		} else { /* shortcut to create CPT from NUMA & CPU topology */
-			node = -1;
-			ncpt = num_online_nodes();
-		}
-	}
-
-	if (!ncpt) { /* scanning bracket which is mark of partition */
-		for (str = pattern;; str++, ncpt++) {
-			str = strchr(str, '[');
-			if (!str)
-				break;
-		}
-	}
-
-	if (!ncpt ||
-	    (node && ncpt > num_online_nodes()) ||
-	    (!node && ncpt > num_online_cpus())) {
-		CERROR("Invalid pattern %s, or too many partitions %d\n",
-		       pattern, ncpt);
-		return NULL;
-	}
-
-	cptab = cfs_cpt_table_alloc(ncpt);
-	if (!cptab) {
-		CERROR("Failed to allocate cpu partition table\n");
-		return NULL;
-	}
-
-	if (node < 0) { /* shortcut to create CPT from NUMA & CPU topology */
-		cpt = 0;
-
-		for_each_online_node(i) {
-			if (cpt >= ncpt) {
-				CERROR("CPU changed while setting CPU partition table, %d/%d\n",
-				       cpt, ncpt);
-				goto failed;
-			}
-
-			rc = cfs_cpt_set_node(cptab, cpt++, i);
-			if (!rc)
-				goto failed;
-		}
-		return cptab;
-	}
-
-	high = node ? MAX_NUMNODES - 1 : nr_cpu_ids - 1;
-
-	for (str = strim(pattern), c = 0;; c++) {
-		struct cfs_range_expr *range;
-		struct cfs_expr_list *el;
-		char *bracket = strchr(str, '[');
-		int n;
-
-		if (!bracket) {
-			if (*str) {
-				CERROR("Invalid pattern %s\n", str);
-				goto failed;
-			}
-			if (c != ncpt) {
-				CERROR("expect %d partitions but found %d\n",
-				       ncpt, c);
-				goto failed;
-			}
-			break;
-		}
-
-		if (sscanf(str, "%d%n", &cpt, &n) < 1) {
-			CERROR("Invalid cpu pattern %s\n", str);
-			goto failed;
-		}
-
-		if (cpt < 0 || cpt >= ncpt) {
-			CERROR("Invalid partition id %d, total partitions %d\n",
-			       cpt, ncpt);
-			goto failed;
-		}
-
-		if (cfs_cpt_weight(cptab, cpt)) {
-			CERROR("Partition %d has already been set.\n", cpt);
-			goto failed;
-		}
-
-		str = strim(str + n);
-		if (str != bracket) {
-			CERROR("Invalid pattern %s\n", str);
-			goto failed;
-		}
-
-		bracket = strchr(str, ']');
-		if (!bracket) {
-			CERROR("missing right bracket for cpt %d, %s\n",
-			       cpt, str);
-			goto failed;
-		}
-
-		if (cfs_expr_list_parse(str, (bracket - str) + 1,
-					0, high, &el)) {
-			CERROR("Can't parse number range: %s\n", str);
-			goto failed;
-		}
-
-		list_for_each_entry(range, &el->el_exprs, re_link) {
-			for (i = range->re_lo; i <= range->re_hi; i++) {
-				if ((i - range->re_lo) % range->re_stride)
-					continue;
-
-				rc = node ? cfs_cpt_set_node(cptab, cpt, i) :
-					    cfs_cpt_set_cpu(cptab, cpt, i);
-				if (!rc) {
-					cfs_expr_list_free(el);
-					goto failed;
-				}
-			}
-		}
-
-		cfs_expr_list_free(el);
-
-		if (!cfs_cpt_online(cptab, cpt)) {
-			CERROR("No online CPU is found on partition %d\n", cpt);
-			goto failed;
-		}
-
-		str = strim(bracket + 1);
-	}
-
-	return cptab;
-
- failed:
-	cfs_cpt_table_free(cptab);
-	return NULL;
-}
-
-#ifdef CONFIG_HOTPLUG_CPU
-static enum cpuhp_state lustre_cpu_online;
-
-static void cfs_cpu_incr_cpt_version(void)
-{
-	spin_lock(&cpt_data.cpt_lock);
-	cpt_data.cpt_version++;
-	spin_unlock(&cpt_data.cpt_lock);
-}
-
-static int cfs_cpu_online(unsigned int cpu)
-{
-	cfs_cpu_incr_cpt_version();
-	return 0;
-}
-
-static int cfs_cpu_dead(unsigned int cpu)
-{
-	bool warn;
-
-	cfs_cpu_incr_cpt_version();
-
-	mutex_lock(&cpt_data.cpt_mutex);
-	/* if all HTs in a core are offline, it may break affinity */
-	cpumask_copy(cpt_data.cpt_cpumask, topology_sibling_cpumask(cpu));
-	warn = cpumask_any_and(cpt_data.cpt_cpumask,
-			       cpu_online_mask) >= nr_cpu_ids;
-	mutex_unlock(&cpt_data.cpt_mutex);
-	CDEBUG(warn ? D_WARNING : D_INFO,
-	       "Lustre: can't support CPU plug-out well now, performance and stability could be impacted [CPU %u]\n",
-	       cpu);
-	return 0;
-}
-#endif
-
-void
-cfs_cpu_fini(void)
-{
-	if (cfs_cpt_tab)
-		cfs_cpt_table_free(cfs_cpt_tab);
-
-#ifdef CONFIG_HOTPLUG_CPU
-	if (lustre_cpu_online > 0)
-		cpuhp_remove_state_nocalls(lustre_cpu_online);
-	cpuhp_remove_state_nocalls(CPUHP_LUSTRE_CFS_DEAD);
-#endif
-	free_cpumask_var(cpt_data.cpt_cpumask);
-}
-
-int
-cfs_cpu_init(void)
-{
-	int ret = 0;
-
-	LASSERT(!cfs_cpt_tab);
-
-	memset(&cpt_data, 0, sizeof(cpt_data));
-
-	if (!zalloc_cpumask_var(&cpt_data.cpt_cpumask, GFP_NOFS)) {
-		CERROR("Failed to allocate scratch buffer\n");
-		return -1;
-	}
-
-	spin_lock_init(&cpt_data.cpt_lock);
-	mutex_init(&cpt_data.cpt_mutex);
-
-#ifdef CONFIG_HOTPLUG_CPU
-	ret = cpuhp_setup_state_nocalls(CPUHP_LUSTRE_CFS_DEAD,
-					"staging/lustre/cfe:dead", NULL,
-					cfs_cpu_dead);
-	if (ret < 0)
-		goto failed;
-	ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
-					"staging/lustre/cfe:online",
-					cfs_cpu_online, NULL);
-	if (ret < 0)
-		goto failed;
-	lustre_cpu_online = ret;
-#endif
-	ret = -EINVAL;
-
-	if (*cpu_pattern) {
-		char *cpu_pattern_dup = kstrdup(cpu_pattern, GFP_KERNEL);
-
-		if (!cpu_pattern_dup) {
-			CERROR("Failed to duplicate cpu_pattern\n");
-			goto failed;
-		}
-
-		cfs_cpt_tab = cfs_cpt_table_create_pattern(cpu_pattern_dup);
-		kfree(cpu_pattern_dup);
-		if (!cfs_cpt_tab) {
-			CERROR("Failed to create cptab from pattern %s\n",
-			       cpu_pattern);
-			goto failed;
-		}
-
-	} else {
-		cfs_cpt_tab = cfs_cpt_table_create(cpu_npartitions);
-		if (!cfs_cpt_tab) {
-			CERROR("Failed to create ptable with npartitions %d\n",
-			       cpu_npartitions);
-			goto failed;
-		}
-	}
-
-	spin_lock(&cpt_data.cpt_lock);
-	if (cfs_cpt_tab->ctb_version != cpt_data.cpt_version) {
-		spin_unlock(&cpt_data.cpt_lock);
-		CERROR("CPU hotplug/unplug during setup\n");
-		goto failed;
-	}
-	spin_unlock(&cpt_data.cpt_lock);
-
-	LCONSOLE(0, "HW nodes: %d, HW CPU cores: %d, npartitions: %d\n",
-		 num_online_nodes(), num_online_cpus(),
-		 cfs_cpt_number(cfs_cpt_tab));
-	return 0;
-
- failed:
-	cfs_cpu_fini();
-	return ret;
-}

+ 0 - 155
drivers/staging/lustre/lnet/libcfs/libcfs_lock.c

@@ -1,155 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * GPL HEADER END
- */
-/* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2015 Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * Author: liang@whamcloud.com
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/libcfs/libcfs.h>
-#include <linux/libcfs/libcfs_cpu.h>
-
-/** destroy cpu-partition lock, see libcfs_private.h for more detail */
-void
-cfs_percpt_lock_free(struct cfs_percpt_lock *pcl)
-{
-	LASSERT(pcl->pcl_locks);
-	LASSERT(!pcl->pcl_locked);
-
-	cfs_percpt_free(pcl->pcl_locks);
-	kfree(pcl);
-}
-EXPORT_SYMBOL(cfs_percpt_lock_free);
-
-/**
- * create cpu-partition lock, see libcfs_private.h for more detail.
- *
- * cpu-partition lock is designed for large-scale SMP system, so we need to
- * reduce cacheline conflict as possible as we can, that's the
- * reason we always allocate cacheline-aligned memory block.
- */
-struct cfs_percpt_lock *
-cfs_percpt_lock_create(struct cfs_cpt_table *cptab,
-		       struct lock_class_key *keys)
-{
-	struct cfs_percpt_lock *pcl;
-	spinlock_t *lock;
-	int i;
-
-	/* NB: cptab can be NULL, pcl will be for HW CPUs on that case */
-	pcl = kzalloc(sizeof(*pcl), GFP_NOFS);
-	if (!pcl)
-		return NULL;
-
-	pcl->pcl_cptab = cptab;
-	pcl->pcl_locks = cfs_percpt_alloc(cptab, sizeof(*lock));
-	if (!pcl->pcl_locks) {
-		kfree(pcl);
-		return NULL;
-	}
-
-	if (!keys)
-		CWARN("Cannot setup class key for percpt lock, you may see recursive locking warnings which are actually fake.\n");
-
-	cfs_percpt_for_each(lock, i, pcl->pcl_locks) {
-		spin_lock_init(lock);
-		if (keys)
-			lockdep_set_class(lock, &keys[i]);
-	}
-
-	return pcl;
-}
-EXPORT_SYMBOL(cfs_percpt_lock_create);
-
-/**
- * lock a CPU partition
- *
- * \a index != CFS_PERCPT_LOCK_EX
- *     hold private lock indexed by \a index
- *
- * \a index == CFS_PERCPT_LOCK_EX
- *     exclusively lock @pcl and nobody can take private lock
- */
-void
-cfs_percpt_lock(struct cfs_percpt_lock *pcl, int index)
-	__acquires(pcl->pcl_locks)
-{
-	int ncpt = cfs_cpt_number(pcl->pcl_cptab);
-	int i;
-
-	LASSERT(index >= CFS_PERCPT_LOCK_EX && index < ncpt);
-
-	if (ncpt == 1) {
-		index = 0;
-	} else { /* serialize with exclusive lock */
-		while (pcl->pcl_locked)
-			cpu_relax();
-	}
-
-	if (likely(index != CFS_PERCPT_LOCK_EX)) {
-		spin_lock(pcl->pcl_locks[index]);
-		return;
-	}
-
-	/* exclusive lock request */
-	for (i = 0; i < ncpt; i++) {
-		spin_lock(pcl->pcl_locks[i]);
-		if (!i) {
-			LASSERT(!pcl->pcl_locked);
-			/* nobody should take private lock after this
-			 * so I wouldn't starve for too long time
-			 */
-			pcl->pcl_locked = 1;
-		}
-	}
-}
-EXPORT_SYMBOL(cfs_percpt_lock);
-
-/** unlock a CPU partition */
-void
-cfs_percpt_unlock(struct cfs_percpt_lock *pcl, int index)
-	__releases(pcl->pcl_locks)
-{
-	int ncpt = cfs_cpt_number(pcl->pcl_cptab);
-	int i;
-
-	index = ncpt == 1 ? 0 : index;
-
-	if (likely(index != CFS_PERCPT_LOCK_EX)) {
-		spin_unlock(pcl->pcl_locks[index]);
-		return;
-	}
-
-	for (i = ncpt - 1; i >= 0; i--) {
-		if (!i) {
-			LASSERT(pcl->pcl_locked);
-			pcl->pcl_locked = 0;
-		}
-		spin_unlock(pcl->pcl_locks[i]);
-	}
-}
-EXPORT_SYMBOL(cfs_percpt_unlock);

+ 0 - 171
drivers/staging/lustre/lnet/libcfs/libcfs_mem.c

@@ -1,171 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * Author: liang@whamcloud.com
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/libcfs/libcfs_cpu.h>
-#include <linux/slab.h>
-#include <linux/mm.h>
-
-struct cfs_var_array {
-	unsigned int		va_count;	/* # of buffers */
-	unsigned int		va_size;	/* size of each var */
-	struct cfs_cpt_table	*va_cptab;	/* cpu partition table */
-	void			*va_ptrs[0];	/* buffer addresses */
-};
-
-/*
- * free per-cpu data, see more detail in cfs_percpt_free
- */
-void
-cfs_percpt_free(void *vars)
-{
-	struct cfs_var_array *arr;
-	int i;
-
-	arr = container_of(vars, struct cfs_var_array, va_ptrs[0]);
-
-	for (i = 0; i < arr->va_count; i++)
-		kfree(arr->va_ptrs[i]);
-
-	kvfree(arr);
-}
-EXPORT_SYMBOL(cfs_percpt_free);
-
-/*
- * allocate per cpu-partition variables, returned value is an array of pointers,
- * variable can be indexed by CPU partition ID, i.e:
- *
- *	arr = cfs_percpt_alloc(cfs_cpu_pt, size);
- *	then caller can access memory block for CPU 0 by arr[0],
- *	memory block for CPU 1 by arr[1]...
- *	memory block for CPU N by arr[N]...
- *
- * cacheline aligned.
- */
-void *
-cfs_percpt_alloc(struct cfs_cpt_table *cptab, unsigned int size)
-{
-	struct cfs_var_array *arr;
-	int count;
-	int i;
-
-	count = cfs_cpt_number(cptab);
-
-	arr = kvzalloc(offsetof(struct cfs_var_array, va_ptrs[count]),
-		       GFP_KERNEL);
-	if (!arr)
-		return NULL;
-
-	size = L1_CACHE_ALIGN(size);
-	arr->va_size = size;
-	arr->va_count = count;
-	arr->va_cptab = cptab;
-
-	for (i = 0; i < count; i++) {
-		arr->va_ptrs[i] = kzalloc_node(size, GFP_KERNEL,
-					       cfs_cpt_spread_node(cptab, i));
-		if (!arr->va_ptrs[i]) {
-			cfs_percpt_free((void *)&arr->va_ptrs[0]);
-			return NULL;
-		}
-	}
-
-	return (void *)&arr->va_ptrs[0];
-}
-EXPORT_SYMBOL(cfs_percpt_alloc);
-
-/*
- * return number of CPUs (or number of elements in per-cpu data)
- * according to cptab of @vars
- */
-int
-cfs_percpt_number(void *vars)
-{
-	struct cfs_var_array *arr;
-
-	arr = container_of(vars, struct cfs_var_array, va_ptrs[0]);
-
-	return arr->va_count;
-}
-EXPORT_SYMBOL(cfs_percpt_number);
-
-/*
- * free variable array, see more detail in cfs_array_alloc
- */
-void
-cfs_array_free(void *vars)
-{
-	struct cfs_var_array *arr;
-	int i;
-
-	arr = container_of(vars, struct cfs_var_array, va_ptrs[0]);
-
-	for (i = 0; i < arr->va_count; i++) {
-		if (!arr->va_ptrs[i])
-			continue;
-
-		kvfree(arr->va_ptrs[i]);
-	}
-	kvfree(arr);
-}
-EXPORT_SYMBOL(cfs_array_free);
-
-/*
- * allocate a variable array, returned value is an array of pointers.
- * Caller can specify length of array by @count, @size is size of each
- * memory block in array.
- */
-void *
-cfs_array_alloc(int count, unsigned int size)
-{
-	struct cfs_var_array *arr;
-	int i;
-
-	arr = kvmalloc(offsetof(struct cfs_var_array, va_ptrs[count]), GFP_KERNEL);
-	if (!arr)
-		return NULL;
-
-	arr->va_count = count;
-	arr->va_size = size;
-
-	for (i = 0; i < count; i++) {
-		arr->va_ptrs[i] = kvzalloc(size, GFP_KERNEL);
-
-		if (!arr->va_ptrs[i]) {
-			cfs_array_free((void *)&arr->va_ptrs[0]);
-			return NULL;
-		}
-	}
-
-	return (void *)&arr->va_ptrs[0];
-}
-EXPORT_SYMBOL(cfs_array_alloc);

+ 0 - 562
drivers/staging/lustre/lnet/libcfs/libcfs_string.c

@@ -1,562 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, 2015 Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * String manipulation functions.
- *
- * libcfs/libcfs/libcfs_string.c
- *
- * Author: Nathan Rutman <nathan.rutman@sun.com>
- */
-
-#include <linux/ctype.h>
-#include <linux/string.h>
-#include <linux/errno.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/libcfs/libcfs.h>
-#include <linux/libcfs/libcfs_string.h>
-
-/* Convert a text string to a bitmask */
-int cfs_str2mask(const char *str, const char *(*bit2str)(int bit),
-		 int *oldmask, int minmask, int allmask)
-{
-	const char *debugstr;
-	char op = '\0';
-	int newmask = minmask, i, len, found = 0;
-
-	/* <str> must be a list of tokens separated by whitespace
-	 * and optionally an operator ('+' or '-').  If an operator
-	 * appears first in <str>, '*oldmask' is used as the starting point
-	 * (relative), otherwise minmask is used (absolute).  An operator
-	 * applies to all following tokens up to the next operator.
-	 */
-	while (*str != '\0') {
-		while (isspace(*str))
-			str++;
-		if (*str == '\0')
-			break;
-		if (*str == '+' || *str == '-') {
-			op = *str++;
-			if (!found)
-				/* only if first token is relative */
-				newmask = *oldmask;
-			while (isspace(*str))
-				str++;
-			if (*str == '\0')  /* trailing op */
-				return -EINVAL;
-		}
-
-		/* find token length */
-		len = 0;
-		while (str[len] != '\0' && !isspace(str[len]) &&
-		       str[len] != '+' && str[len] != '-')
-			len++;
-
-		/* match token */
-		found = 0;
-		for (i = 0; i < 32; i++) {
-			debugstr = bit2str(i);
-			if (debugstr && strlen(debugstr) == len &&
-			    !strncasecmp(str, debugstr, len)) {
-				if (op == '-')
-					newmask &= ~(1 << i);
-				else
-					newmask |= (1 << i);
-				found = 1;
-				break;
-			}
-		}
-		if (!found && len == 3 &&
-		    !strncasecmp(str, "ALL", len)) {
-			if (op == '-')
-				newmask = minmask;
-			else
-				newmask = allmask;
-			found = 1;
-		}
-		if (!found) {
-			CWARN("unknown mask '%.*s'.\n"
-			      "mask usage: [+|-]<all|type> ...\n", len, str);
-			return -EINVAL;
-		}
-		str += len;
-	}
-
-	*oldmask = newmask;
-	return 0;
-}
-
-/* get the first string out of @str */
-char *cfs_firststr(char *str, size_t size)
-{
-	size_t i = 0;
-	char *end;
-
-	/* trim leading spaces */
-	while (i < size && *str && isspace(*str)) {
-		++i;
-		++str;
-	}
-
-	/* string with all spaces */
-	if (*str == '\0')
-		goto out;
-
-	end = str;
-	while (i < size && *end != '\0' && !isspace(*end)) {
-		++i;
-		++end;
-	}
-
-	*end = '\0';
-out:
-	return str;
-}
-EXPORT_SYMBOL(cfs_firststr);
-
-/**
- * Extracts tokens from strings.
- *
- * Looks for \a delim in string \a next, sets \a res to point to
- * substring before the delimiter, sets \a next right after the found
- * delimiter.
- *
- * \retval 1 if \a res points to a string of non-whitespace characters
- * \retval 0 otherwise
- */
-int
-cfs_gettok(struct cfs_lstr *next, char delim, struct cfs_lstr *res)
-{
-	char *end;
-
-	if (!next->ls_str)
-		return 0;
-
-	/* skip leading white spaces */
-	while (next->ls_len) {
-		if (!isspace(*next->ls_str))
-			break;
-		next->ls_str++;
-		next->ls_len--;
-	}
-
-	if (!next->ls_len) /* whitespaces only */
-		return 0;
-
-	if (*next->ls_str == delim) {
-		/* first non-writespace is the delimiter */
-		return 0;
-	}
-
-	res->ls_str = next->ls_str;
-	end = memchr(next->ls_str, delim, next->ls_len);
-	if (!end) {
-		/* there is no the delimeter in the string */
-		end = next->ls_str + next->ls_len;
-		next->ls_str = NULL;
-	} else {
-		next->ls_str = end + 1;
-		next->ls_len -= (end - res->ls_str + 1);
-	}
-
-	/* skip ending whitespaces */
-	while (--end != res->ls_str) {
-		if (!isspace(*end))
-			break;
-	}
-
-	res->ls_len = end - res->ls_str + 1;
-	return 1;
-}
-EXPORT_SYMBOL(cfs_gettok);
-
-/**
- * Converts string to integer.
- *
- * Accepts decimal and hexadecimal number recordings.
- *
- * \retval 1 if first \a nob chars of \a str convert to decimal or
- * hexadecimal integer in the range [\a min, \a max]
- * \retval 0 otherwise
- */
-int
-cfs_str2num_check(char *str, int nob, unsigned int *num,
-		  unsigned int min, unsigned int max)
-{
-	bool all_numbers = true;
-	char *endp, cache;
-	int rc;
-
-	/**
-	 * kstrouint can only handle strings composed
-	 * of only numbers. We need to scan the string
-	 * passed in for the first non-digit character
-	 * and end the string at that location. If we
-	 * don't find any non-digit character we still
-	 * need to place a '\0' at position nob since
-	 * we are not interested in the rest of the
-	 * string which is longer than nob in size.
-	 * After we are done the character at the
-	 * position we placed '\0' must be restored.
-	 */
-	for (endp = str; endp < str + nob; endp++) {
-		if (!isdigit(*endp)) {
-			all_numbers = false;
-			break;
-		}
-	}
-	cache = *endp;
-	*endp = '\0';
-
-	rc = kstrtouint(str, 10, num);
-	*endp = cache;
-	if (rc || !all_numbers)
-		return 0;
-
-	return (*num >= min && *num <= max);
-}
-EXPORT_SYMBOL(cfs_str2num_check);
-
-/**
- * Parses \<range_expr\> token of the syntax. If \a bracketed is false,
- * \a src should only have a single token which can be \<number\> or  \*
- *
- * \retval pointer to allocated range_expr and initialized
- * range_expr::re_lo, range_expr::re_hi and range_expr:re_stride if \a
- `* src parses to
- * \<number\> |
- * \<number\> '-' \<number\> |
- * \<number\> '-' \<number\> '/' \<number\>
- * \retval 0 will be returned if it can be parsed, otherwise -EINVAL or
- * -ENOMEM will be returned.
- */
-static int
-cfs_range_expr_parse(struct cfs_lstr *src, unsigned int min, unsigned int max,
-		     int bracketed, struct cfs_range_expr **expr)
-{
-	struct cfs_range_expr *re;
-	struct cfs_lstr tok;
-
-	re = kzalloc(sizeof(*re), GFP_NOFS);
-	if (!re)
-		return -ENOMEM;
-
-	if (src->ls_len == 1 && src->ls_str[0] == '*') {
-		re->re_lo = min;
-		re->re_hi = max;
-		re->re_stride = 1;
-		goto out;
-	}
-
-	if (cfs_str2num_check(src->ls_str, src->ls_len,
-			      &re->re_lo, min, max)) {
-		/* <number> is parsed */
-		re->re_hi = re->re_lo;
-		re->re_stride = 1;
-		goto out;
-	}
-
-	if (!bracketed || !cfs_gettok(src, '-', &tok))
-		goto failed;
-
-	if (!cfs_str2num_check(tok.ls_str, tok.ls_len,
-			       &re->re_lo, min, max))
-		goto failed;
-
-	/* <number> - */
-	if (cfs_str2num_check(src->ls_str, src->ls_len,
-			      &re->re_hi, min, max)) {
-		/* <number> - <number> is parsed */
-		re->re_stride = 1;
-		goto out;
-	}
-
-	/* go to check <number> '-' <number> '/' <number> */
-	if (cfs_gettok(src, '/', &tok)) {
-		if (!cfs_str2num_check(tok.ls_str, tok.ls_len,
-				       &re->re_hi, min, max))
-			goto failed;
-
-		/* <number> - <number> / ... */
-		if (cfs_str2num_check(src->ls_str, src->ls_len,
-				      &re->re_stride, min, max)) {
-			/* <number> - <number> / <number> is parsed */
-			goto out;
-		}
-	}
-
- out:
-	*expr = re;
-	return 0;
-
- failed:
-	kfree(re);
-	return -EINVAL;
-}
-
-/**
- * Print the range expression \a re into specified \a buffer.
- * If \a bracketed is true, expression does not need additional
- * brackets.
- *
- * \retval number of characters written
- */
-static int
-cfs_range_expr_print(char *buffer, int count, struct cfs_range_expr *expr,
-		     bool bracketed)
-{
-	int i;
-	char s[] = "[";
-	char e[] = "]";
-
-	if (bracketed) {
-		s[0] = '\0';
-		e[0] = '\0';
-	}
-
-	if (expr->re_lo == expr->re_hi)
-		i = scnprintf(buffer, count, "%u", expr->re_lo);
-	else if (expr->re_stride == 1)
-		i = scnprintf(buffer, count, "%s%u-%u%s",
-			      s, expr->re_lo, expr->re_hi, e);
-	else
-		i = scnprintf(buffer, count, "%s%u-%u/%u%s",
-			      s, expr->re_lo, expr->re_hi, expr->re_stride, e);
-	return i;
-}
-
-/**
- * Print a list of range expressions (\a expr_list) into specified \a buffer.
- * If the list contains several expressions, separate them with comma
- * and surround the list with brackets.
- *
- * \retval number of characters written
- */
-int
-cfs_expr_list_print(char *buffer, int count, struct cfs_expr_list *expr_list)
-{
-	struct cfs_range_expr *expr;
-	int i = 0, j = 0;
-	int numexprs = 0;
-
-	if (count <= 0)
-		return 0;
-
-	list_for_each_entry(expr, &expr_list->el_exprs, re_link)
-		numexprs++;
-
-	if (numexprs > 1)
-		i += scnprintf(buffer + i, count - i, "[");
-
-	list_for_each_entry(expr, &expr_list->el_exprs, re_link) {
-		if (j++)
-			i += scnprintf(buffer + i, count - i, ",");
-		i += cfs_range_expr_print(buffer + i, count - i, expr,
-					  numexprs > 1);
-	}
-
-	if (numexprs > 1)
-		i += scnprintf(buffer + i, count - i, "]");
-
-	return i;
-}
-EXPORT_SYMBOL(cfs_expr_list_print);
-
-/**
- * Matches value (\a value) against ranges expression list \a expr_list.
- *
- * \retval 1 if \a value matches
- * \retval 0 otherwise
- */
-int
-cfs_expr_list_match(u32 value, struct cfs_expr_list *expr_list)
-{
-	struct cfs_range_expr *expr;
-
-	list_for_each_entry(expr, &expr_list->el_exprs, re_link) {
-		if (value >= expr->re_lo && value <= expr->re_hi &&
-		    !((value - expr->re_lo) % expr->re_stride))
-			return 1;
-	}
-
-	return 0;
-}
-EXPORT_SYMBOL(cfs_expr_list_match);
-
-/**
- * Convert express list (\a expr_list) to an array of all matched values
- *
- * \retval N N is total number of all matched values
- * \retval 0 if expression list is empty
- * \retval < 0 for failure
- */
-int
-cfs_expr_list_values(struct cfs_expr_list *expr_list, int max, u32 **valpp)
-{
-	struct cfs_range_expr *expr;
-	u32 *val;
-	int count = 0;
-	int i;
-
-	list_for_each_entry(expr, &expr_list->el_exprs, re_link) {
-		for (i = expr->re_lo; i <= expr->re_hi; i++) {
-			if (!((i - expr->re_lo) % expr->re_stride))
-				count++;
-		}
-	}
-
-	if (!count) /* empty expression list */
-		return 0;
-
-	if (count > max) {
-		CERROR("Number of values %d exceeds max allowed %d\n",
-		       max, count);
-		return -EINVAL;
-	}
-
-	val = kvmalloc_array(count, sizeof(val[0]), GFP_KERNEL | __GFP_ZERO);
-	if (!val)
-		return -ENOMEM;
-
-	count = 0;
-	list_for_each_entry(expr, &expr_list->el_exprs, re_link) {
-		for (i = expr->re_lo; i <= expr->re_hi; i++) {
-			if (!((i - expr->re_lo) % expr->re_stride))
-				val[count++] = i;
-		}
-	}
-
-	*valpp = val;
-	return count;
-}
-EXPORT_SYMBOL(cfs_expr_list_values);
-
-/**
- * Frees cfs_range_expr structures of \a expr_list.
- *
- * \retval none
- */
-void
-cfs_expr_list_free(struct cfs_expr_list *expr_list)
-{
-	while (!list_empty(&expr_list->el_exprs)) {
-		struct cfs_range_expr *expr;
-
-		expr = list_entry(expr_list->el_exprs.next,
-				  struct cfs_range_expr, re_link);
-		list_del(&expr->re_link);
-		kfree(expr);
-	}
-
-	kfree(expr_list);
-}
-EXPORT_SYMBOL(cfs_expr_list_free);
-
-/**
- * Parses \<cfs_expr_list\> token of the syntax.
- *
- * \retval 0 if \a str parses to \<number\> | \<expr_list\>
- * \retval -errno otherwise
- */
-int
-cfs_expr_list_parse(char *str, int len, unsigned int min, unsigned int max,
-		    struct cfs_expr_list **elpp)
-{
-	struct cfs_expr_list *expr_list;
-	struct cfs_range_expr *expr;
-	struct cfs_lstr	src;
-	int rc;
-
-	expr_list = kzalloc(sizeof(*expr_list), GFP_NOFS);
-	if (!expr_list)
-		return -ENOMEM;
-
-	src.ls_str = str;
-	src.ls_len = len;
-
-	INIT_LIST_HEAD(&expr_list->el_exprs);
-
-	if (src.ls_str[0] == '[' &&
-	    src.ls_str[src.ls_len - 1] == ']') {
-		src.ls_str++;
-		src.ls_len -= 2;
-
-		rc = -EINVAL;
-		while (src.ls_str) {
-			struct cfs_lstr tok;
-
-			if (!cfs_gettok(&src, ',', &tok)) {
-				rc = -EINVAL;
-				break;
-			}
-
-			rc = cfs_range_expr_parse(&tok, min, max, 1, &expr);
-			if (rc)
-				break;
-
-			list_add_tail(&expr->re_link, &expr_list->el_exprs);
-		}
-	} else {
-		rc = cfs_range_expr_parse(&src, min, max, 0, &expr);
-		if (!rc)
-			list_add_tail(&expr->re_link, &expr_list->el_exprs);
-	}
-
-	if (rc)
-		cfs_expr_list_free(expr_list);
-	else
-		*elpp = expr_list;
-
-	return rc;
-}
-EXPORT_SYMBOL(cfs_expr_list_parse);
-
-/**
- * Frees cfs_expr_list structures of \a list.
- *
- * For each struct cfs_expr_list structure found on \a list it frees
- * range_expr list attached to it and frees the cfs_expr_list itself.
- *
- * \retval none
- */
-void
-cfs_expr_list_free_list(struct list_head *list)
-{
-	struct cfs_expr_list *el;
-
-	while (!list_empty(list)) {
-		el = list_entry(list->next, struct cfs_expr_list, el_link);
-		list_del(&el->el_link);
-		cfs_expr_list_free(el);
-	}
-}
-EXPORT_SYMBOL(cfs_expr_list_free_list);

+ 0 - 139
drivers/staging/lustre/lnet/libcfs/linux-crypto-adler.c

@@ -1,139 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/* GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see http://www.gnu.org/licenses
- *
- * Please  visit http://www.xyratex.com/contact if you need additional
- * information or have any questions.
- *
- * GPL HEADER END
- */
-
-/*
- * Copyright 2012 Xyratex Technology Limited
- */
-
-/*
- * This is crypto api shash wrappers to zlib_adler32.
- */
-
-#include <linux/module.h>
-#include <linux/zutil.h>
-#include <crypto/internal/hash.h>
-#include "linux-crypto.h"
-
-#define CHKSUM_BLOCK_SIZE	1
-#define CHKSUM_DIGEST_SIZE	4
-
-static int adler32_cra_init(struct crypto_tfm *tfm)
-{
-	u32 *key = crypto_tfm_ctx(tfm);
-
-	*key = 1;
-
-	return 0;
-}
-
-static int adler32_setkey(struct crypto_shash *hash, const u8 *key,
-			  unsigned int keylen)
-{
-	u32 *mctx = crypto_shash_ctx(hash);
-
-	if (keylen != sizeof(u32)) {
-		crypto_shash_set_flags(hash, CRYPTO_TFM_RES_BAD_KEY_LEN);
-		return -EINVAL;
-	}
-	*mctx = *(u32 *)key;
-	return 0;
-}
-
-static int adler32_init(struct shash_desc *desc)
-{
-	u32 *mctx = crypto_shash_ctx(desc->tfm);
-	u32 *cksump = shash_desc_ctx(desc);
-
-	*cksump = *mctx;
-
-	return 0;
-}
-
-static int adler32_update(struct shash_desc *desc, const u8 *data,
-			  unsigned int len)
-{
-	u32 *cksump = shash_desc_ctx(desc);
-
-	*cksump = zlib_adler32(*cksump, data, len);
-	return 0;
-}
-
-static int __adler32_finup(u32 *cksump, const u8 *data, unsigned int len,
-			   u8 *out)
-{
-	*(u32 *)out = zlib_adler32(*cksump, data, len);
-	return 0;
-}
-
-static int adler32_finup(struct shash_desc *desc, const u8 *data,
-			 unsigned int len, u8 *out)
-{
-	return __adler32_finup(shash_desc_ctx(desc), data, len, out);
-}
-
-static int adler32_final(struct shash_desc *desc, u8 *out)
-{
-	u32 *cksump = shash_desc_ctx(desc);
-
-	*(u32 *)out = *cksump;
-	return 0;
-}
-
-static int adler32_digest(struct shash_desc *desc, const u8 *data,
-			  unsigned int len, u8 *out)
-{
-	return __adler32_finup(crypto_shash_ctx(desc->tfm), data, len,
-				    out);
-}
-
-static struct shash_alg alg = {
-	.setkey		= adler32_setkey,
-	.init		= adler32_init,
-	.update		= adler32_update,
-	.final		= adler32_final,
-	.finup		= adler32_finup,
-	.digest		= adler32_digest,
-	.descsize	= sizeof(u32),
-	.digestsize	= CHKSUM_DIGEST_SIZE,
-	.base		= {
-		.cra_name		= "adler32",
-		.cra_driver_name	= "adler32-zlib",
-		.cra_priority		= 100,
-		.cra_flags		= CRYPTO_ALG_OPTIONAL_KEY,
-		.cra_blocksize		= CHKSUM_BLOCK_SIZE,
-		.cra_ctxsize		= sizeof(u32),
-		.cra_module		= THIS_MODULE,
-		.cra_init		= adler32_cra_init,
-	}
-};
-
-int cfs_crypto_adler32_register(void)
-{
-	return crypto_register_shash(&alg);
-}
-
-void cfs_crypto_adler32_unregister(void)
-{
-	crypto_unregister_shash(&alg);
-}

+ 0 - 447
drivers/staging/lustre/lnet/libcfs/linux-crypto.c

@@ -1,447 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/* GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see http://www.gnu.org/licenses
- *
- * Please  visit http://www.xyratex.com/contact if you need additional
- * information or have any questions.
- *
- * GPL HEADER END
- */
-
-/*
- * Copyright 2012 Xyratex Technology Limited
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-
-#include <crypto/hash.h>
-#include <linux/scatterlist.h>
-#include <linux/highmem.h>
-#include <linux/module.h>
-#include <linux/libcfs/libcfs_crypto.h>
-#include <linux/libcfs/libcfs.h>
-#include "linux-crypto.h"
-
-/**
- *  Array of hash algorithm speed in MByte per second
- */
-static int cfs_crypto_hash_speeds[CFS_HASH_ALG_MAX];
-
-/**
- * Initialize the state descriptor for the specified hash algorithm.
- *
- * An internal routine to allocate the hash-specific state in \a req for
- * use with cfs_crypto_hash_digest() to compute the hash of a single message,
- * though possibly in multiple chunks.  The descriptor internal state should
- * be freed with cfs_crypto_hash_final().
- *
- * \param[in]	  hash_alg	hash algorithm id (CFS_HASH_ALG_*)
- * \param[out]	  type		pointer to the hash description in hash_types[]
- *				array
- * \param[in,out] req		hash state descriptor to be initialized
- * \param[in]	  key		initial hash value/state, NULL to use default
- *				value
- * \param[in]	  key_len	length of \a key
- *
- * \retval			0 on success
- * \retval			negative errno on failure
- */
-static int cfs_crypto_hash_alloc(enum cfs_crypto_hash_alg hash_alg,
-				 const struct cfs_crypto_hash_type **type,
-				 struct ahash_request **req,
-				 unsigned char *key,
-				 unsigned int key_len)
-{
-	struct crypto_ahash *tfm;
-	int err = 0;
-
-	*type = cfs_crypto_hash_type(hash_alg);
-
-	if (!*type) {
-		CWARN("Unsupported hash algorithm id = %d, max id is %d\n",
-		      hash_alg, CFS_HASH_ALG_MAX);
-		return -EINVAL;
-	}
-	tfm = crypto_alloc_ahash((*type)->cht_name, 0, CRYPTO_ALG_ASYNC);
-
-	if (IS_ERR(tfm)) {
-		CDEBUG(D_INFO, "Failed to alloc crypto hash %s\n",
-		       (*type)->cht_name);
-		return PTR_ERR(tfm);
-	}
-
-	*req = ahash_request_alloc(tfm, GFP_KERNEL);
-	if (!*req) {
-		CDEBUG(D_INFO, "Failed to alloc ahash_request for %s\n",
-		       (*type)->cht_name);
-		crypto_free_ahash(tfm);
-		return -ENOMEM;
-	}
-
-	ahash_request_set_callback(*req, 0, NULL, NULL);
-
-	if (key)
-		err = crypto_ahash_setkey(tfm, key, key_len);
-	else if ((*type)->cht_key)
-		err = crypto_ahash_setkey(tfm,
-					  (unsigned char *)&((*type)->cht_key),
-					  (*type)->cht_size);
-
-	if (err) {
-		ahash_request_free(*req);
-		crypto_free_ahash(tfm);
-		return err;
-	}
-
-	CDEBUG(D_INFO, "Using crypto hash: %s (%s) speed %d MB/s\n",
-	       crypto_ahash_alg_name(tfm), crypto_ahash_driver_name(tfm),
-	       cfs_crypto_hash_speeds[hash_alg]);
-
-	err = crypto_ahash_init(*req);
-	if (err) {
-		ahash_request_free(*req);
-		crypto_free_ahash(tfm);
-	}
-	return err;
-}
-
-/**
- * Calculate hash digest for the passed buffer.
- *
- * This should be used when computing the hash on a single contiguous buffer.
- * It combines the hash initialization, computation, and cleanup.
- *
- * \param[in]	  hash_alg	id of hash algorithm (CFS_HASH_ALG_*)
- * \param[in]	  buf		data buffer on which to compute hash
- * \param[in]	  buf_len	length of \a buf in bytes
- * \param[in]	  key		initial value/state for algorithm,
- *				if \a key = NULL use default initial value
- * \param[in]	  key_len	length of \a key in bytes
- * \param[out]	  hash		pointer to computed hash value,
- *				if \a hash = NULL then \a hash_len is to digest
- *				size in bytes, retval -ENOSPC
- * \param[in,out] hash_len	size of \a hash buffer
- *
- * \retval -EINVAL		\a buf, \a buf_len, \a hash_len,
- *				\a hash_alg invalid
- * \retval -ENOENT		\a hash_alg is unsupported
- * \retval -ENOSPC		\a hash is NULL, or \a hash_len less than
- *				digest size
- * \retval			0 for success
- * \retval			negative errno for other errors from lower
- *				layers.
- */
-int cfs_crypto_hash_digest(enum cfs_crypto_hash_alg hash_alg,
-			   const void *buf, unsigned int buf_len,
-			   unsigned char *key, unsigned int key_len,
-			   unsigned char *hash, unsigned int *hash_len)
-{
-	struct scatterlist sl;
-	struct ahash_request *req;
-	int err;
-	const struct cfs_crypto_hash_type *type;
-
-	if (!buf || !buf_len || !hash_len)
-		return -EINVAL;
-
-	err = cfs_crypto_hash_alloc(hash_alg, &type, &req, key, key_len);
-	if (err)
-		return err;
-
-	if (!hash || *hash_len < type->cht_size) {
-		*hash_len = type->cht_size;
-		crypto_free_ahash(crypto_ahash_reqtfm(req));
-		ahash_request_free(req);
-		return -ENOSPC;
-	}
-	sg_init_one(&sl, buf, buf_len);
-
-	ahash_request_set_crypt(req, &sl, hash, sl.length);
-	err = crypto_ahash_digest(req);
-	crypto_free_ahash(crypto_ahash_reqtfm(req));
-	ahash_request_free(req);
-
-	return err;
-}
-EXPORT_SYMBOL(cfs_crypto_hash_digest);
-
-/**
- * Allocate and initialize descriptor for hash algorithm.
- *
- * This should be used to initialize a hash descriptor for multiple calls
- * to a single hash function when computing the hash across multiple
- * separate buffers or pages using cfs_crypto_hash_update{,_page}().
- *
- * The hash descriptor should be freed with cfs_crypto_hash_final().
- *
- * \param[in] hash_alg	algorithm id (CFS_HASH_ALG_*)
- * \param[in] key	initial value/state for algorithm, if \a key = NULL
- *			use default initial value
- * \param[in] key_len	length of \a key in bytes
- *
- * \retval		pointer to descriptor of hash instance
- * \retval		ERR_PTR(errno) in case of error
- */
-struct ahash_request *
-cfs_crypto_hash_init(enum cfs_crypto_hash_alg hash_alg,
-		     unsigned char *key, unsigned int key_len)
-{
-	struct ahash_request *req;
-	int err;
-	const struct cfs_crypto_hash_type *type;
-
-	err = cfs_crypto_hash_alloc(hash_alg, &type, &req, key, key_len);
-
-	if (err)
-		return ERR_PTR(err);
-	return req;
-}
-EXPORT_SYMBOL(cfs_crypto_hash_init);
-
-/**
- * Update hash digest computed on data within the given \a page
- *
- * \param[in] hreq	hash state descriptor
- * \param[in] page	data page on which to compute the hash
- * \param[in] offset	offset within \a page at which to start hash
- * \param[in] len	length of data on which to compute hash
- *
- * \retval		0 for success
- * \retval		negative errno on failure
- */
-int cfs_crypto_hash_update_page(struct ahash_request *req,
-				struct page *page, unsigned int offset,
-				unsigned int len)
-{
-	struct scatterlist sl;
-
-	sg_init_table(&sl, 1);
-	sg_set_page(&sl, page, len, offset & ~PAGE_MASK);
-
-	ahash_request_set_crypt(req, &sl, NULL, sl.length);
-	return crypto_ahash_update(req);
-}
-EXPORT_SYMBOL(cfs_crypto_hash_update_page);
-
-/**
- * Update hash digest computed on the specified data
- *
- * \param[in] req	hash state descriptor
- * \param[in] buf	data buffer on which to compute the hash
- * \param[in] buf_len	length of \buf on which to compute hash
- *
- * \retval		0 for success
- * \retval		negative errno on failure
- */
-int cfs_crypto_hash_update(struct ahash_request *req,
-			   const void *buf, unsigned int buf_len)
-{
-	struct scatterlist sl;
-
-	sg_init_one(&sl, buf, buf_len);
-
-	ahash_request_set_crypt(req, &sl, NULL, sl.length);
-	return crypto_ahash_update(req);
-}
-EXPORT_SYMBOL(cfs_crypto_hash_update);
-
-/**
- * Finish hash calculation, copy hash digest to buffer, clean up hash descriptor
- *
- * \param[in]	  req		hash descriptor
- * \param[out]	  hash		pointer to hash buffer to store hash digest
- * \param[in,out] hash_len	pointer to hash buffer size, if \a req = NULL
- *				only free \a req instead of computing the hash
- *
- * \retval	0 for success
- * \retval	-EOVERFLOW if hash_len is too small for the hash digest
- * \retval	negative errno for other errors from lower layers
- */
-int cfs_crypto_hash_final(struct ahash_request *req,
-			  unsigned char *hash, unsigned int *hash_len)
-{
-	int err;
-	int size = crypto_ahash_digestsize(crypto_ahash_reqtfm(req));
-
-	if (!hash || !hash_len) {
-		err = 0;
-		goto free_ahash;
-	}
-	if (*hash_len < size) {
-		err = -EOVERFLOW;
-		goto free_ahash;
-	}
-
-	ahash_request_set_crypt(req, NULL, hash, 0);
-	err = crypto_ahash_final(req);
-	if (!err)
-		*hash_len = size;
-free_ahash:
-	crypto_free_ahash(crypto_ahash_reqtfm(req));
-	ahash_request_free(req);
-	return err;
-}
-EXPORT_SYMBOL(cfs_crypto_hash_final);
-
-/**
- * Compute the speed of specified hash function
- *
- * Run a speed test on the given hash algorithm on buffer of the given size.
- * The speed is stored internally in the cfs_crypto_hash_speeds[] array, and
- * is available through the cfs_crypto_hash_speed() function.
- *
- * \param[in] hash_alg	hash algorithm id (CFS_HASH_ALG_*)
- * \param[in] buf	data buffer on which to compute the hash
- * \param[in] buf_len	length of \buf on which to compute hash
- */
-static void cfs_crypto_performance_test(enum cfs_crypto_hash_alg hash_alg)
-{
-	int buf_len = max(PAGE_SIZE, 1048576UL);
-	void *buf;
-	unsigned long start, end;
-	int bcount, err = 0;
-	struct page *page;
-	unsigned char hash[CFS_CRYPTO_HASH_DIGESTSIZE_MAX];
-	unsigned int hash_len = sizeof(hash);
-
-	page = alloc_page(GFP_KERNEL);
-	if (!page) {
-		err = -ENOMEM;
-		goto out_err;
-	}
-
-	buf = kmap(page);
-	memset(buf, 0xAD, PAGE_SIZE);
-	kunmap(page);
-
-	for (start = jiffies, end = start + msecs_to_jiffies(MSEC_PER_SEC),
-	     bcount = 0; time_before(jiffies, end); bcount++) {
-		struct ahash_request *hdesc;
-		int i;
-
-		hdesc = cfs_crypto_hash_init(hash_alg, NULL, 0);
-		if (IS_ERR(hdesc)) {
-			err = PTR_ERR(hdesc);
-			break;
-		}
-
-		for (i = 0; i < buf_len / PAGE_SIZE; i++) {
-			err = cfs_crypto_hash_update_page(hdesc, page, 0,
-							  PAGE_SIZE);
-			if (err)
-				break;
-		}
-
-		err = cfs_crypto_hash_final(hdesc, hash, &hash_len);
-		if (err)
-			break;
-	}
-	end = jiffies;
-	__free_page(page);
-out_err:
-	if (err) {
-		cfs_crypto_hash_speeds[hash_alg] = err;
-		CDEBUG(D_INFO, "Crypto hash algorithm %s test error: rc = %d\n",
-		       cfs_crypto_hash_name(hash_alg), err);
-	} else {
-		unsigned long tmp;
-
-		tmp = ((bcount * buf_len / jiffies_to_msecs(end - start)) *
-		       1000) / (1024 * 1024);
-		cfs_crypto_hash_speeds[hash_alg] = (int)tmp;
-		CDEBUG(D_CONFIG, "Crypto hash algorithm %s speed = %d MB/s\n",
-		       cfs_crypto_hash_name(hash_alg),
-		       cfs_crypto_hash_speeds[hash_alg]);
-	}
-}
-
-/**
- * hash speed in Mbytes per second for valid hash algorithm
- *
- * Return the performance of the specified \a hash_alg that was previously
- * computed using cfs_crypto_performance_test().
- *
- * \param[in] hash_alg	hash algorithm id (CFS_HASH_ALG_*)
- *
- * \retval		positive speed of the hash function in MB/s
- * \retval		-ENOENT if \a hash_alg is unsupported
- * \retval		negative errno if \a hash_alg speed is unavailable
- */
-int cfs_crypto_hash_speed(enum cfs_crypto_hash_alg hash_alg)
-{
-	if (hash_alg < CFS_HASH_ALG_MAX)
-		return cfs_crypto_hash_speeds[hash_alg];
-	return -ENOENT;
-}
-EXPORT_SYMBOL(cfs_crypto_hash_speed);
-
-/**
- * Run the performance test for all hash algorithms.
- *
- * Run the cfs_crypto_performance_test() benchmark for all of the available
- * hash functions using a 1MB buffer size.  This is a reasonable buffer size
- * for Lustre RPCs, even if the actual RPC size is larger or smaller.
- *
- * Since the setup cost and computation speed of various hash algorithms is
- * a function of the buffer size (and possibly internal contention of offload
- * engines), this speed only represents an estimate of the actual speed under
- * actual usage, but is reasonable for comparing available algorithms.
- *
- * The actual speeds are available via cfs_crypto_hash_speed() for later
- * comparison.
- *
- * \retval	0 on success
- * \retval	-ENOMEM if no memory is available for test buffer
- */
-static int cfs_crypto_test_hashes(void)
-{
-	enum cfs_crypto_hash_alg hash_alg;
-
-	for (hash_alg = 0; hash_alg < CFS_HASH_ALG_MAX; hash_alg++)
-		cfs_crypto_performance_test(hash_alg);
-
-	return 0;
-}
-
-static int adler32;
-
-/**
- * Register available hash functions
- *
- * \retval	0
- */
-int cfs_crypto_register(void)
-{
-	request_module("crc32c");
-
-	if (cfs_crypto_adler32_register() == 0)
-		adler32 = 1;
-
-	/* check all algorithms and do performance test */
-	cfs_crypto_test_hashes();
-	return 0;
-}
-
-/**
- * Unregister previously registered hash functions
- */
-void cfs_crypto_unregister(void)
-{
-	if (adler32)
-		cfs_crypto_adler32_unregister();
-	adler32 = 0;
-}

+ 0 - 30
drivers/staging/lustre/lnet/libcfs/linux-crypto.h

@@ -1,30 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see http://www.gnu.org/licenses
- *
- * Please  visit http://www.xyratex.com/contact if you need additional
- * information or have any questions.
- *
- * GPL HEADER END
- */
-
-/**
- * Functions for start/stop shash adler32 algorithm.
- */
-int cfs_crypto_adler32_register(void);
-void cfs_crypto_adler32_unregister(void);

+ 0 - 142
drivers/staging/lustre/lnet/libcfs/linux-debug.c

@@ -1,142 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * libcfs/libcfs/linux/linux-debug.c
- *
- * Author: Phil Schwan <phil@clusterfs.com>
- */
-
-#include <linux/module.h>
-#include <linux/kmod.h>
-#include <linux/notifier.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/stat.h>
-#include <linux/errno.h>
-#include <linux/unistd.h>
-#include <linux/interrupt.h>
-#include <linux/completion.h>
-#include <linux/fs.h>
-#include <linux/uaccess.h>
-
-# define DEBUG_SUBSYSTEM S_LNET
-
-#include "tracefile.h"
-
-#include <linux/kallsyms.h>
-
-char lnet_debug_log_upcall[1024] = "/usr/lib/lustre/lnet_debug_log_upcall";
-
-/**
- * Upcall function once a Lustre log has been dumped.
- *
- * \param file  path of the dumped log
- */
-void libcfs_run_debug_log_upcall(char *file)
-{
-	char *argv[3];
-	int rc;
-	static const char * const envp[] = {
-		"HOME=/",
-		"PATH=/sbin:/bin:/usr/sbin:/usr/bin",
-		NULL
-	};
-
-	argv[0] = lnet_debug_log_upcall;
-
-	LASSERTF(file, "called on a null filename\n");
-	argv[1] = file; /* only need to pass the path of the file */
-
-	argv[2] = NULL;
-
-	rc = call_usermodehelper(argv[0], argv, (char **)envp, 1);
-	if (rc < 0 && rc != -ENOENT) {
-		CERROR("Error %d invoking LNET debug log upcall %s %s; check /sys/kernel/debug/lnet/debug_log_upcall\n",
-		       rc, argv[0], argv[1]);
-	} else {
-		CDEBUG(D_HA, "Invoked LNET debug log upcall %s %s\n",
-		       argv[0], argv[1]);
-	}
-}
-
-/* coverity[+kill] */
-void __noreturn lbug_with_loc(struct libcfs_debug_msg_data *msgdata)
-{
-	libcfs_catastrophe = 1;
-	libcfs_debug_msg(msgdata, "LBUG\n");
-
-	if (in_interrupt()) {
-		panic("LBUG in interrupt.\n");
-		/* not reached */
-	}
-
-	dump_stack();
-	if (!libcfs_panic_on_lbug)
-		libcfs_debug_dumplog();
-	if (libcfs_panic_on_lbug)
-		panic("LBUG");
-	set_current_state(TASK_UNINTERRUPTIBLE);
-	while (1)
-		schedule();
-}
-EXPORT_SYMBOL(lbug_with_loc);
-
-static int panic_notifier(struct notifier_block *self, unsigned long unused1,
-			  void *unused2)
-{
-	if (libcfs_panic_in_progress)
-		return 0;
-
-	libcfs_panic_in_progress = 1;
-	mb();
-
-	return 0;
-}
-
-static struct notifier_block libcfs_panic_notifier = {
-	.notifier_call	= panic_notifier,
-	.next		= NULL,
-	.priority	= 10000,
-};
-
-void libcfs_register_panic_notifier(void)
-{
-	atomic_notifier_chain_register(&panic_notifier_list,
-				       &libcfs_panic_notifier);
-}
-
-void libcfs_unregister_panic_notifier(void)
-{
-	atomic_notifier_chain_unregister(&panic_notifier_list,
-					 &libcfs_panic_notifier);
-}

+ 0 - 258
drivers/staging/lustre/lnet/libcfs/linux-tracefile.c

@@ -1,258 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-#define LUSTRE_TRACEFILE_PRIVATE
-
-#include <linux/slab.h>
-#include <linux/mm.h>
-#include "tracefile.h"
-
-/* percents to share the total debug memory for each type */
-static unsigned int pages_factor[CFS_TCD_TYPE_MAX] = {
-	80,  /* 80% pages for CFS_TCD_TYPE_PROC */
-	10,  /* 10% pages for CFS_TCD_TYPE_SOFTIRQ */
-	10   /* 10% pages for CFS_TCD_TYPE_IRQ */
-};
-
-char *cfs_trace_console_buffers[NR_CPUS][CFS_TCD_TYPE_MAX];
-
-static DECLARE_RWSEM(cfs_tracefile_sem);
-
-int cfs_tracefile_init_arch(void)
-{
-	int i;
-	int j;
-	struct cfs_trace_cpu_data *tcd;
-
-	/* initialize trace_data */
-	memset(cfs_trace_data, 0, sizeof(cfs_trace_data));
-	for (i = 0; i < CFS_TCD_TYPE_MAX; i++) {
-		cfs_trace_data[i] =
-			kmalloc_array(num_possible_cpus(),
-				      sizeof(union cfs_trace_data_union),
-				      GFP_KERNEL);
-		if (!cfs_trace_data[i])
-			goto out;
-	}
-
-	/* arch related info initialized */
-	cfs_tcd_for_each(tcd, i, j) {
-		spin_lock_init(&tcd->tcd_lock);
-		tcd->tcd_pages_factor = pages_factor[i];
-		tcd->tcd_type = i;
-		tcd->tcd_cpu = j;
-	}
-
-	for (i = 0; i < num_possible_cpus(); i++)
-		for (j = 0; j < 3; j++) {
-			cfs_trace_console_buffers[i][j] =
-				kmalloc(CFS_TRACE_CONSOLE_BUFFER_SIZE,
-					GFP_KERNEL);
-
-			if (!cfs_trace_console_buffers[i][j])
-				goto out;
-		}
-
-	return 0;
-
-out:
-	cfs_tracefile_fini_arch();
-	pr_err("lnet: Not enough memory\n");
-	return -ENOMEM;
-}
-
-void cfs_tracefile_fini_arch(void)
-{
-	int i;
-	int j;
-
-	for (i = 0; i < num_possible_cpus(); i++)
-		for (j = 0; j < 3; j++) {
-			kfree(cfs_trace_console_buffers[i][j]);
-			cfs_trace_console_buffers[i][j] = NULL;
-		}
-
-	for (i = 0; cfs_trace_data[i]; i++) {
-		kfree(cfs_trace_data[i]);
-		cfs_trace_data[i] = NULL;
-	}
-}
-
-void cfs_tracefile_read_lock(void)
-{
-	down_read(&cfs_tracefile_sem);
-}
-
-void cfs_tracefile_read_unlock(void)
-{
-	up_read(&cfs_tracefile_sem);
-}
-
-void cfs_tracefile_write_lock(void)
-{
-	down_write(&cfs_tracefile_sem);
-}
-
-void cfs_tracefile_write_unlock(void)
-{
-	up_write(&cfs_tracefile_sem);
-}
-
-enum cfs_trace_buf_type cfs_trace_buf_idx_get(void)
-{
-	if (in_irq())
-		return CFS_TCD_TYPE_IRQ;
-	if (in_softirq())
-		return CFS_TCD_TYPE_SOFTIRQ;
-	return CFS_TCD_TYPE_PROC;
-}
-
-/*
- * The walking argument indicates the locking comes from all tcd types
- * iterator and we must lock it and dissable local irqs to avoid deadlocks
- * with other interrupt locks that might be happening. See LU-1311
- * for details.
- */
-int cfs_trace_lock_tcd(struct cfs_trace_cpu_data *tcd, int walking)
-	__acquires(&tcd->tc_lock)
-{
-	__LASSERT(tcd->tcd_type < CFS_TCD_TYPE_MAX);
-	if (tcd->tcd_type == CFS_TCD_TYPE_IRQ)
-		spin_lock_irqsave(&tcd->tcd_lock, tcd->tcd_lock_flags);
-	else if (tcd->tcd_type == CFS_TCD_TYPE_SOFTIRQ)
-		spin_lock_bh(&tcd->tcd_lock);
-	else if (unlikely(walking))
-		spin_lock_irq(&tcd->tcd_lock);
-	else
-		spin_lock(&tcd->tcd_lock);
-	return 1;
-}
-
-void cfs_trace_unlock_tcd(struct cfs_trace_cpu_data *tcd, int walking)
-	__releases(&tcd->tcd_lock)
-{
-	__LASSERT(tcd->tcd_type < CFS_TCD_TYPE_MAX);
-	if (tcd->tcd_type == CFS_TCD_TYPE_IRQ)
-		spin_unlock_irqrestore(&tcd->tcd_lock, tcd->tcd_lock_flags);
-	else if (tcd->tcd_type == CFS_TCD_TYPE_SOFTIRQ)
-		spin_unlock_bh(&tcd->tcd_lock);
-	else if (unlikely(walking))
-		spin_unlock_irq(&tcd->tcd_lock);
-	else
-		spin_unlock(&tcd->tcd_lock);
-}
-
-void
-cfs_set_ptldebug_header(struct ptldebug_header *header,
-			struct libcfs_debug_msg_data *msgdata,
-			unsigned long stack)
-{
-	struct timespec64 ts;
-
-	ktime_get_real_ts64(&ts);
-
-	header->ph_subsys = msgdata->msg_subsys;
-	header->ph_mask = msgdata->msg_mask;
-	header->ph_cpu_id = smp_processor_id();
-	header->ph_type = cfs_trace_buf_idx_get();
-	/* y2038 safe since all user space treats this as unsigned, but
-	 * will overflow in 2106
-	 */
-	header->ph_sec = (u32)ts.tv_sec;
-	header->ph_usec = ts.tv_nsec / NSEC_PER_USEC;
-	header->ph_stack = stack;
-	header->ph_pid = current->pid;
-	header->ph_line_num = msgdata->msg_line;
-	header->ph_extern_pid = 0;
-}
-
-static char *
-dbghdr_to_err_string(struct ptldebug_header *hdr)
-{
-	switch (hdr->ph_subsys) {
-	case S_LND:
-	case S_LNET:
-		return "LNetError";
-	default:
-		return "LustreError";
-	}
-}
-
-static char *
-dbghdr_to_info_string(struct ptldebug_header *hdr)
-{
-	switch (hdr->ph_subsys) {
-	case S_LND:
-	case S_LNET:
-		return "LNet";
-	default:
-		return "Lustre";
-	}
-}
-
-void cfs_print_to_console(struct ptldebug_header *hdr, int mask,
-			  const char *buf, int len, const char *file,
-			  const char *fn)
-{
-	char *prefix = "Lustre", *ptype = NULL;
-
-	if (mask & D_EMERG) {
-		prefix = dbghdr_to_err_string(hdr);
-		ptype = KERN_EMERG;
-	} else if (mask & D_ERROR) {
-		prefix = dbghdr_to_err_string(hdr);
-		ptype = KERN_ERR;
-	} else if (mask & D_WARNING) {
-		prefix = dbghdr_to_info_string(hdr);
-		ptype = KERN_WARNING;
-	} else if (mask & (D_CONSOLE | libcfs_printk)) {
-		prefix = dbghdr_to_info_string(hdr);
-		ptype = KERN_INFO;
-	}
-
-	if (mask & D_CONSOLE) {
-		pr_info("%s%s: %.*s", ptype, prefix, len, buf);
-	} else {
-		pr_info("%s%s: %d:%d:(%s:%d:%s()) %.*s", ptype, prefix,
-			hdr->ph_pid, hdr->ph_extern_pid, file,
-			hdr->ph_line_num, fn, len, buf);
-	}
-}
-
-int cfs_trace_max_debug_mb(void)
-{
-	int  total_mb = (totalram_pages >> (20 - PAGE_SHIFT));
-
-	return max(512, (total_mb * 80) / 100);
-}

+ 0 - 758
drivers/staging/lustre/lnet/libcfs/module.c

@@ -1,758 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, 2015 Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-#include <linux/miscdevice.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/stat.h>
-#include <linux/errno.h>
-#include <linux/unistd.h>
-#include <net/sock.h>
-#include <linux/uio.h>
-
-#include <linux/uaccess.h>
-
-#include <linux/fs.h>
-#include <linux/file.h>
-#include <linux/list.h>
-
-#include <linux/sysctl.h>
-#include <linux/debugfs.h>
-
-# define DEBUG_SUBSYSTEM S_LNET
-
-#include <asm/div64.h>
-
-#include <linux/libcfs/libcfs_crypto.h>
-#include <linux/lnet/lib-lnet.h>
-#include <uapi/linux/lnet/lnet-dlc.h>
-#include "tracefile.h"
-
-struct lnet_debugfs_symlink_def {
-	char *name;
-	char *target;
-};
-
-static struct dentry *lnet_debugfs_root;
-
-BLOCKING_NOTIFIER_HEAD(libcfs_ioctl_list);
-EXPORT_SYMBOL(libcfs_ioctl_list);
-
-static inline size_t libcfs_ioctl_packlen(struct libcfs_ioctl_data *data)
-{
-	size_t len = sizeof(*data);
-
-	len += cfs_size_round(data->ioc_inllen1);
-	len += cfs_size_round(data->ioc_inllen2);
-	return len;
-}
-
-static inline bool libcfs_ioctl_is_invalid(struct libcfs_ioctl_data *data)
-{
-	if (data->ioc_hdr.ioc_len > BIT(30)) {
-		CERROR("LIBCFS ioctl: ioc_len larger than 1<<30\n");
-		return true;
-	}
-	if (data->ioc_inllen1 > BIT(30)) {
-		CERROR("LIBCFS ioctl: ioc_inllen1 larger than 1<<30\n");
-		return true;
-	}
-	if (data->ioc_inllen2 > BIT(30)) {
-		CERROR("LIBCFS ioctl: ioc_inllen2 larger than 1<<30\n");
-		return true;
-	}
-	if (data->ioc_inlbuf1 && !data->ioc_inllen1) {
-		CERROR("LIBCFS ioctl: inlbuf1 pointer but 0 length\n");
-		return true;
-	}
-	if (data->ioc_inlbuf2 && !data->ioc_inllen2) {
-		CERROR("LIBCFS ioctl: inlbuf2 pointer but 0 length\n");
-		return true;
-	}
-	if (data->ioc_pbuf1 && !data->ioc_plen1) {
-		CERROR("LIBCFS ioctl: pbuf1 pointer but 0 length\n");
-		return true;
-	}
-	if (data->ioc_pbuf2 && !data->ioc_plen2) {
-		CERROR("LIBCFS ioctl: pbuf2 pointer but 0 length\n");
-		return true;
-	}
-	if (data->ioc_plen1 && !data->ioc_pbuf1) {
-		CERROR("LIBCFS ioctl: plen1 nonzero but no pbuf1 pointer\n");
-		return true;
-	}
-	if (data->ioc_plen2 && !data->ioc_pbuf2) {
-		CERROR("LIBCFS ioctl: plen2 nonzero but no pbuf2 pointer\n");
-		return true;
-	}
-	if ((u32)libcfs_ioctl_packlen(data) != data->ioc_hdr.ioc_len) {
-		CERROR("LIBCFS ioctl: packlen != ioc_len\n");
-		return true;
-	}
-	if (data->ioc_inllen1 &&
-	    data->ioc_bulk[data->ioc_inllen1 - 1] != '\0') {
-		CERROR("LIBCFS ioctl: inlbuf1 not 0 terminated\n");
-		return true;
-	}
-	if (data->ioc_inllen2 &&
-	    data->ioc_bulk[cfs_size_round(data->ioc_inllen1) +
-			   data->ioc_inllen2 - 1] != '\0') {
-		CERROR("LIBCFS ioctl: inlbuf2 not 0 terminated\n");
-		return true;
-	}
-	return false;
-}
-
-static int libcfs_ioctl_data_adjust(struct libcfs_ioctl_data *data)
-{
-	if (libcfs_ioctl_is_invalid(data)) {
-		CERROR("libcfs ioctl: parameter not correctly formatted\n");
-		return -EINVAL;
-	}
-
-	if (data->ioc_inllen1)
-		data->ioc_inlbuf1 = &data->ioc_bulk[0];
-
-	if (data->ioc_inllen2)
-		data->ioc_inlbuf2 = &data->ioc_bulk[0] +
-			cfs_size_round(data->ioc_inllen1);
-
-	return 0;
-}
-
-static int libcfs_ioctl_getdata(struct libcfs_ioctl_hdr **hdr_pp,
-				const struct libcfs_ioctl_hdr __user *uhdr)
-{
-	struct libcfs_ioctl_hdr hdr;
-	int err;
-
-	if (copy_from_user(&hdr, uhdr, sizeof(hdr)))
-		return -EFAULT;
-
-	if (hdr.ioc_version != LIBCFS_IOCTL_VERSION &&
-	    hdr.ioc_version != LIBCFS_IOCTL_VERSION2) {
-		CERROR("libcfs ioctl: version mismatch expected %#x, got %#x\n",
-		       LIBCFS_IOCTL_VERSION, hdr.ioc_version);
-		return -EINVAL;
-	}
-
-	if (hdr.ioc_len < sizeof(hdr)) {
-		CERROR("libcfs ioctl: user buffer too small for ioctl\n");
-		return -EINVAL;
-	}
-
-	if (hdr.ioc_len > LIBCFS_IOC_DATA_MAX) {
-		CERROR("libcfs ioctl: user buffer is too large %d/%d\n",
-		       hdr.ioc_len, LIBCFS_IOC_DATA_MAX);
-		return -EINVAL;
-	}
-
-	*hdr_pp = kvmalloc(hdr.ioc_len, GFP_KERNEL);
-	if (!*hdr_pp)
-		return -ENOMEM;
-
-	if (copy_from_user(*hdr_pp, uhdr, hdr.ioc_len)) {
-		err = -EFAULT;
-		goto free;
-	}
-
-	if ((*hdr_pp)->ioc_version != hdr.ioc_version ||
-	    (*hdr_pp)->ioc_len != hdr.ioc_len) {
-		err = -EINVAL;
-		goto free;
-	}
-
-	return 0;
-
-free:
-	kvfree(*hdr_pp);
-	return err;
-}
-
-static int libcfs_ioctl(unsigned long cmd, void __user *uparam)
-{
-	struct libcfs_ioctl_data *data = NULL;
-	struct libcfs_ioctl_hdr *hdr;
-	int err;
-
-	/* 'cmd' and permissions get checked in our arch-specific caller */
-	err = libcfs_ioctl_getdata(&hdr, uparam);
-	if (err) {
-		CDEBUG_LIMIT(D_ERROR,
-			     "libcfs ioctl: data header error %d\n", err);
-		return err;
-	}
-
-	if (hdr->ioc_version == LIBCFS_IOCTL_VERSION) {
-		/*
-		 * The libcfs_ioctl_data_adjust() function performs adjustment
-		 * operations on the libcfs_ioctl_data structure to make
-		 * it usable by the code.  This doesn't need to be called
-		 * for new data structures added.
-		 */
-		data = container_of(hdr, struct libcfs_ioctl_data, ioc_hdr);
-		err = libcfs_ioctl_data_adjust(data);
-		if (err)
-			goto out;
-	}
-
-	CDEBUG(D_IOCTL, "libcfs ioctl cmd %lu\n", cmd);
-	switch (cmd) {
-	case IOC_LIBCFS_CLEAR_DEBUG:
-		libcfs_debug_clear_buffer();
-		break;
-
-	case IOC_LIBCFS_MARK_DEBUG:
-		if (!data || !data->ioc_inlbuf1 ||
-		    data->ioc_inlbuf1[data->ioc_inllen1 - 1] != '\0') {
-			err = -EINVAL;
-			goto out;
-		}
-		libcfs_debug_mark_buffer(data->ioc_inlbuf1);
-		break;
-
-	default:
-		err = blocking_notifier_call_chain(&libcfs_ioctl_list,
-						   cmd, hdr);
-		if (!(err & NOTIFY_STOP_MASK))
-			/* No-one claimed the ioctl */
-			err = -EINVAL;
-		else
-			err = notifier_to_errno(err);
-		if (!err)
-			if (copy_to_user(uparam, hdr, hdr->ioc_len))
-				err = -EFAULT;
-		break;
-	}
-out:
-	kvfree(hdr);
-	return err;
-}
-
-static long
-libcfs_psdev_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
-{
-	if (!capable(CAP_SYS_ADMIN))
-		return -EACCES;
-
-	if (_IOC_TYPE(cmd) != IOC_LIBCFS_TYPE ||
-	    _IOC_NR(cmd) < IOC_LIBCFS_MIN_NR  ||
-	    _IOC_NR(cmd) > IOC_LIBCFS_MAX_NR) {
-		CDEBUG(D_IOCTL, "invalid ioctl ( type %d, nr %d, size %d )\n",
-		       _IOC_TYPE(cmd), _IOC_NR(cmd), _IOC_SIZE(cmd));
-		return -EINVAL;
-	}
-
-	return libcfs_ioctl(cmd, (void __user *)arg);
-}
-
-static const struct file_operations libcfs_fops = {
-	.owner		= THIS_MODULE,
-	.unlocked_ioctl	= libcfs_psdev_ioctl,
-};
-
-static struct miscdevice libcfs_dev = {
-	.minor = MISC_DYNAMIC_MINOR,
-	.name = "lnet",
-	.fops = &libcfs_fops,
-};
-
-static int libcfs_dev_registered;
-
-int lprocfs_call_handler(void *data, int write, loff_t *ppos,
-			 void __user *buffer, size_t *lenp,
-			 int (*handler)(void *data, int write, loff_t pos,
-					void __user *buffer, int len))
-{
-	int rc = handler(data, write, *ppos, buffer, *lenp);
-
-	if (rc < 0)
-		return rc;
-
-	if (write) {
-		*ppos += *lenp;
-	} else {
-		*lenp = rc;
-		*ppos += rc;
-	}
-	return 0;
-}
-EXPORT_SYMBOL(lprocfs_call_handler);
-
-static int __proc_dobitmasks(void *data, int write,
-			     loff_t pos, void __user *buffer, int nob)
-{
-	const int tmpstrlen = 512;
-	char *tmpstr;
-	int rc;
-	unsigned int *mask = data;
-	int is_subsys = (mask == &libcfs_subsystem_debug) ? 1 : 0;
-	int is_printk = (mask == &libcfs_printk) ? 1 : 0;
-
-	rc = cfs_trace_allocate_string_buffer(&tmpstr, tmpstrlen);
-	if (rc < 0)
-		return rc;
-
-	if (!write) {
-		libcfs_debug_mask2str(tmpstr, tmpstrlen, *mask, is_subsys);
-		rc = strlen(tmpstr);
-
-		if (pos >= rc) {
-			rc = 0;
-		} else {
-			rc = cfs_trace_copyout_string(buffer, nob,
-						      tmpstr + pos, "\n");
-		}
-	} else {
-		rc = cfs_trace_copyin_string(tmpstr, tmpstrlen, buffer, nob);
-		if (rc < 0) {
-			kfree(tmpstr);
-			return rc;
-		}
-
-		rc = libcfs_debug_str2mask(mask, tmpstr, is_subsys);
-		/* Always print LBUG/LASSERT to console, so keep this mask */
-		if (is_printk)
-			*mask |= D_EMERG;
-	}
-
-	kfree(tmpstr);
-	return rc;
-}
-
-static int proc_dobitmasks(struct ctl_table *table, int write,
-			   void __user *buffer, size_t *lenp, loff_t *ppos)
-{
-	return lprocfs_call_handler(table->data, write, ppos, buffer, lenp,
-				    __proc_dobitmasks);
-}
-
-static int __proc_dump_kernel(void *data, int write,
-			      loff_t pos, void __user *buffer, int nob)
-{
-	if (!write)
-		return 0;
-
-	return cfs_trace_dump_debug_buffer_usrstr(buffer, nob);
-}
-
-static int proc_dump_kernel(struct ctl_table *table, int write,
-			    void __user *buffer, size_t *lenp, loff_t *ppos)
-{
-	return lprocfs_call_handler(table->data, write, ppos, buffer, lenp,
-				    __proc_dump_kernel);
-}
-
-static int __proc_daemon_file(void *data, int write,
-			      loff_t pos, void __user *buffer, int nob)
-{
-	if (!write) {
-		int len = strlen(cfs_tracefile);
-
-		if (pos >= len)
-			return 0;
-
-		return cfs_trace_copyout_string(buffer, nob,
-						cfs_tracefile + pos, "\n");
-	}
-
-	return cfs_trace_daemon_command_usrstr(buffer, nob);
-}
-
-static int proc_daemon_file(struct ctl_table *table, int write,
-			    void __user *buffer, size_t *lenp, loff_t *ppos)
-{
-	return lprocfs_call_handler(table->data, write, ppos, buffer, lenp,
-				    __proc_daemon_file);
-}
-
-static int libcfs_force_lbug(struct ctl_table *table, int write,
-			     void __user *buffer,
-			     size_t *lenp, loff_t *ppos)
-{
-	if (write)
-		LBUG();
-	return 0;
-}
-
-static int proc_fail_loc(struct ctl_table *table, int write,
-			 void __user *buffer,
-			 size_t *lenp, loff_t *ppos)
-{
-	int rc;
-	long old_fail_loc = cfs_fail_loc;
-
-	rc = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
-	if (old_fail_loc != cfs_fail_loc)
-		wake_up(&cfs_race_waitq);
-	return rc;
-}
-
-static int __proc_cpt_table(void *data, int write,
-			    loff_t pos, void __user *buffer, int nob)
-{
-	char *buf = NULL;
-	int len = 4096;
-	int rc  = 0;
-
-	if (write)
-		return -EPERM;
-
-	while (1) {
-		buf = kzalloc(len, GFP_KERNEL);
-		if (!buf)
-			return -ENOMEM;
-
-		rc = cfs_cpt_table_print(cfs_cpt_tab, buf, len);
-		if (rc >= 0)
-			break;
-
-		if (rc == -EFBIG) {
-			kfree(buf);
-			len <<= 1;
-			continue;
-		}
-		goto out;
-	}
-
-	if (pos >= rc) {
-		rc = 0;
-		goto out;
-	}
-
-	rc = cfs_trace_copyout_string(buffer, nob, buf + pos, NULL);
- out:
-	kfree(buf);
-	return rc;
-}
-
-static int proc_cpt_table(struct ctl_table *table, int write,
-			  void __user *buffer, size_t *lenp, loff_t *ppos)
-{
-	return lprocfs_call_handler(table->data, write, ppos, buffer, lenp,
-				    __proc_cpt_table);
-}
-
-static struct ctl_table lnet_table[] = {
-	{
-		.procname = "debug",
-		.data     = &libcfs_debug,
-		.maxlen   = sizeof(int),
-		.mode     = 0644,
-		.proc_handler = &proc_dobitmasks,
-	},
-	{
-		.procname = "subsystem_debug",
-		.data     = &libcfs_subsystem_debug,
-		.maxlen   = sizeof(int),
-		.mode     = 0644,
-		.proc_handler = &proc_dobitmasks,
-	},
-	{
-		.procname = "printk",
-		.data     = &libcfs_printk,
-		.maxlen   = sizeof(int),
-		.mode     = 0644,
-		.proc_handler = &proc_dobitmasks,
-	},
-	{
-		.procname = "cpu_partition_table",
-		.maxlen   = 128,
-		.mode     = 0444,
-		.proc_handler = &proc_cpt_table,
-	},
-	{
-		.procname = "debug_log_upcall",
-		.data     = lnet_debug_log_upcall,
-		.maxlen   = sizeof(lnet_debug_log_upcall),
-		.mode     = 0644,
-		.proc_handler = &proc_dostring,
-	},
-	{
-		.procname = "catastrophe",
-		.data     = &libcfs_catastrophe,
-		.maxlen   = sizeof(int),
-		.mode     = 0444,
-		.proc_handler = &proc_dointvec,
-	},
-	{
-		.procname = "dump_kernel",
-		.maxlen   = 256,
-		.mode     = 0200,
-		.proc_handler = &proc_dump_kernel,
-	},
-	{
-		.procname = "daemon_file",
-		.mode     = 0644,
-		.maxlen   = 256,
-		.proc_handler = &proc_daemon_file,
-	},
-	{
-		.procname = "force_lbug",
-		.data     = NULL,
-		.maxlen   = 0,
-		.mode     = 0200,
-		.proc_handler = &libcfs_force_lbug
-	},
-	{
-		.procname = "fail_loc",
-		.data     = &cfs_fail_loc,
-		.maxlen   = sizeof(cfs_fail_loc),
-		.mode     = 0644,
-		.proc_handler = &proc_fail_loc
-	},
-	{
-		.procname = "fail_val",
-		.data     = &cfs_fail_val,
-		.maxlen   = sizeof(int),
-		.mode     = 0644,
-		.proc_handler = &proc_dointvec
-	},
-	{
-		.procname	= "fail_err",
-		.data		= &cfs_fail_err,
-		.maxlen		= sizeof(cfs_fail_err),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec,
-	},
-	{
-	}
-};
-
-static const struct lnet_debugfs_symlink_def lnet_debugfs_symlinks[] = {
-	{ "console_ratelimit",
-	  "/sys/module/libcfs/parameters/libcfs_console_ratelimit"},
-	{ "debug_path",
-	  "/sys/module/libcfs/parameters/libcfs_debug_file_path"},
-	{ "panic_on_lbug",
-	  "/sys/module/libcfs/parameters/libcfs_panic_on_lbug"},
-	{ "libcfs_console_backoff",
-	  "/sys/module/libcfs/parameters/libcfs_console_backoff"},
-	{ "debug_mb",
-	  "/sys/module/libcfs/parameters/libcfs_debug_mb"},
-	{ "console_min_delay_centisecs",
-	  "/sys/module/libcfs/parameters/libcfs_console_min_delay"},
-	{ "console_max_delay_centisecs",
-	  "/sys/module/libcfs/parameters/libcfs_console_max_delay"},
-	{},
-};
-
-static ssize_t lnet_debugfs_read(struct file *filp, char __user *buf,
-				 size_t count, loff_t *ppos)
-{
-	struct ctl_table *table = filp->private_data;
-	int error;
-
-	error = table->proc_handler(table, 0, (void __user *)buf, &count, ppos);
-	if (!error)
-		error = count;
-
-	return error;
-}
-
-static ssize_t lnet_debugfs_write(struct file *filp, const char __user *buf,
-				  size_t count, loff_t *ppos)
-{
-	struct ctl_table *table = filp->private_data;
-	int error;
-
-	error = table->proc_handler(table, 1, (void __user *)buf, &count, ppos);
-	if (!error)
-		error = count;
-
-	return error;
-}
-
-static const struct file_operations lnet_debugfs_file_operations_rw = {
-	.open		= simple_open,
-	.read		= lnet_debugfs_read,
-	.write		= lnet_debugfs_write,
-	.llseek		= default_llseek,
-};
-
-static const struct file_operations lnet_debugfs_file_operations_ro = {
-	.open		= simple_open,
-	.read		= lnet_debugfs_read,
-	.llseek		= default_llseek,
-};
-
-static const struct file_operations lnet_debugfs_file_operations_wo = {
-	.open		= simple_open,
-	.write		= lnet_debugfs_write,
-	.llseek		= default_llseek,
-};
-
-static const struct file_operations *lnet_debugfs_fops_select(umode_t mode)
-{
-	if (!(mode & 0222))
-		return &lnet_debugfs_file_operations_ro;
-
-	if (!(mode & 0444))
-		return &lnet_debugfs_file_operations_wo;
-
-	return &lnet_debugfs_file_operations_rw;
-}
-
-void lustre_insert_debugfs(struct ctl_table *table)
-{
-	if (!lnet_debugfs_root)
-		lnet_debugfs_root = debugfs_create_dir("lnet", NULL);
-
-	/* Even if we cannot create, just ignore it altogether) */
-	if (IS_ERR_OR_NULL(lnet_debugfs_root))
-		return;
-
-	/*
-	 * We don't save the dentry returned because we don't call
-	 * debugfs_remove() but rather remove_recursive()
-	 */
-	for (; table->procname; table++)
-		debugfs_create_file(table->procname, table->mode,
-				    lnet_debugfs_root, table,
-				    lnet_debugfs_fops_select(table->mode));
-}
-EXPORT_SYMBOL_GPL(lustre_insert_debugfs);
-
-static void lustre_insert_debugfs_links(
-	const struct lnet_debugfs_symlink_def *symlinks)
-{
-	for (; symlinks && symlinks->name; symlinks++)
-		debugfs_create_symlink(symlinks->name, lnet_debugfs_root,
-				       symlinks->target);
-}
-
-static void lustre_remove_debugfs(void)
-{
-	debugfs_remove_recursive(lnet_debugfs_root);
-
-	lnet_debugfs_root = NULL;
-}
-
-static DEFINE_MUTEX(libcfs_startup);
-static int libcfs_active;
-
-int libcfs_setup(void)
-{
-	int rc = -EINVAL;
-
-	mutex_lock(&libcfs_startup);
-	if (libcfs_active)
-		goto out;
-
-	if (!libcfs_dev_registered)
-		goto err;
-
-	rc = libcfs_debug_init(5 * 1024 * 1024);
-	if (rc < 0) {
-		pr_err("LustreError: libcfs_debug_init: %d\n", rc);
-		goto err;
-	}
-
-	rc = cfs_cpu_init();
-	if (rc)
-		goto err;
-
-	cfs_rehash_wq = alloc_workqueue("cfs_rh", WQ_SYSFS, 4);
-	if (!cfs_rehash_wq) {
-		CERROR("Failed to start rehash workqueue.\n");
-		rc = -ENOMEM;
-		goto err;
-	}
-
-	rc = cfs_crypto_register();
-	if (rc) {
-		CERROR("cfs_crypto_register: error %d\n", rc);
-		goto err;
-	}
-
-	lustre_insert_debugfs(lnet_table);
-	if (!IS_ERR_OR_NULL(lnet_debugfs_root))
-		lustre_insert_debugfs_links(lnet_debugfs_symlinks);
-
-	CDEBUG(D_OTHER, "portals setup OK\n");
-out:
-	libcfs_active = 1;
-	mutex_unlock(&libcfs_startup);
-	return 0;
-err:
-	cfs_crypto_unregister();
-	if (cfs_rehash_wq)
-		destroy_workqueue(cfs_rehash_wq);
-	cfs_cpu_fini();
-	libcfs_debug_cleanup();
-	mutex_unlock(&libcfs_startup);
-	return rc;
-}
-EXPORT_SYMBOL(libcfs_setup);
-
-static int libcfs_init(void)
-{
-	int rc;
-
-	rc = misc_register(&libcfs_dev);
-	if (rc)
-		CERROR("misc_register: error %d\n", rc);
-	else
-		libcfs_dev_registered = 1;
-	return rc;
-}
-
-static void libcfs_exit(void)
-{
-	int rc;
-
-	lustre_remove_debugfs();
-
-	if (cfs_rehash_wq)
-		destroy_workqueue(cfs_rehash_wq);
-
-	cfs_crypto_unregister();
-
-	if (libcfs_dev_registered)
-		misc_deregister(&libcfs_dev);
-
-	cfs_cpu_fini();
-
-	rc = libcfs_debug_cleanup();
-	if (rc)
-		pr_err("LustreError: libcfs_debug_cleanup: %d\n", rc);
-}
-
-MODULE_AUTHOR("OpenSFS, Inc. <http://www.lustre.org/>");
-MODULE_DESCRIPTION("Lustre helper library");
-MODULE_VERSION(LIBCFS_VERSION);
-MODULE_LICENSE("GPL");
-
-module_init(libcfs_init);
-module_exit(libcfs_exit);

+ 0 - 1198
drivers/staging/lustre/lnet/libcfs/tracefile.c

@@ -1,1198 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * libcfs/libcfs/tracefile.c
- *
- * Author: Zach Brown <zab@clusterfs.com>
- * Author: Phil Schwan <phil@clusterfs.com>
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-#define LUSTRE_TRACEFILE_PRIVATE
-#define pr_fmt(fmt) "Lustre: " fmt
-
-#include <linux/ratelimit.h>
-#include <linux/highmem.h>
-#include <linux/ctype.h>
-#include <linux/kthread.h>
-#include <linux/mm.h>
-#include <linux/slab.h>
-#include <linux/uaccess.h>
-#include "tracefile.h"
-
-/* XXX move things up to the top, comment */
-union cfs_trace_data_union (*cfs_trace_data[TCD_MAX_TYPES])[NR_CPUS] __cacheline_aligned;
-
-char cfs_tracefile[TRACEFILE_NAME_SIZE];
-long long cfs_tracefile_size = CFS_TRACEFILE_SIZE;
-static struct tracefiled_ctl trace_tctl;
-static DEFINE_MUTEX(cfs_trace_thread_mutex);
-static int thread_running;
-
-static atomic_t cfs_tage_allocated = ATOMIC_INIT(0);
-
-struct page_collection {
-	struct list_head	pc_pages;
-	/*
-	 * if this flag is set, collect_pages() will spill both
-	 * ->tcd_daemon_pages and ->tcd_pages to the ->pc_pages. Otherwise,
-	 * only ->tcd_pages are spilled.
-	 */
-	int			pc_want_daemon_pages;
-};
-
-struct tracefiled_ctl {
-	struct completion	tctl_start;
-	struct completion	tctl_stop;
-	wait_queue_head_t	tctl_waitq;
-	pid_t			tctl_pid;
-	atomic_t		tctl_shutdown;
-};
-
-/*
- * small data-structure for each page owned by tracefiled.
- */
-struct cfs_trace_page {
-	/*
-	 * page itself
-	 */
-	struct page		*page;
-	/*
-	 * linkage into one of the lists in trace_data_union or
-	 * page_collection
-	 */
-	struct list_head	linkage;
-	/*
-	 * number of bytes used within this page
-	 */
-	unsigned int		used;
-	/*
-	 * cpu that owns this page
-	 */
-	unsigned short		cpu;
-	/*
-	 * type(context) of this page
-	 */
-	unsigned short		type;
-};
-
-static void put_pages_on_tcd_daemon_list(struct page_collection *pc,
-					 struct cfs_trace_cpu_data *tcd);
-
-static inline struct cfs_trace_page *
-cfs_tage_from_list(struct list_head *list)
-{
-	return list_entry(list, struct cfs_trace_page, linkage);
-}
-
-static struct cfs_trace_page *cfs_tage_alloc(gfp_t gfp)
-{
-	struct page *page;
-	struct cfs_trace_page *tage;
-
-	/* My caller is trying to free memory */
-	if (!in_interrupt() && (current->flags & PF_MEMALLOC))
-		return NULL;
-
-	/*
-	 * Don't spam console with allocation failures: they will be reported
-	 * by upper layer anyway.
-	 */
-	gfp |= __GFP_NOWARN;
-	page = alloc_page(gfp);
-	if (!page)
-		return NULL;
-
-	tage = kmalloc(sizeof(*tage), gfp);
-	if (!tage) {
-		__free_page(page);
-		return NULL;
-	}
-
-	tage->page = page;
-	atomic_inc(&cfs_tage_allocated);
-	return tage;
-}
-
-static void cfs_tage_free(struct cfs_trace_page *tage)
-{
-	__free_page(tage->page);
-	kfree(tage);
-	atomic_dec(&cfs_tage_allocated);
-}
-
-static void cfs_tage_to_tail(struct cfs_trace_page *tage,
-			     struct list_head *queue)
-{
-	list_move_tail(&tage->linkage, queue);
-}
-
-int cfs_trace_refill_stock(struct cfs_trace_cpu_data *tcd, gfp_t gfp,
-			   struct list_head *stock)
-{
-	int i;
-
-	/*
-	 * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT)
-	 * from here: this will lead to infinite recursion.
-	 */
-
-	for (i = 0; i + tcd->tcd_cur_stock_pages < TCD_STOCK_PAGES ; ++i) {
-		struct cfs_trace_page *tage;
-
-		tage = cfs_tage_alloc(gfp);
-		if (!tage)
-			break;
-		list_add_tail(&tage->linkage, stock);
-	}
-	return i;
-}
-
-/* return a page that has 'len' bytes left at the end */
-static struct cfs_trace_page *
-cfs_trace_get_tage_try(struct cfs_trace_cpu_data *tcd, unsigned long len)
-{
-	struct cfs_trace_page *tage;
-
-	if (tcd->tcd_cur_pages > 0) {
-		__LASSERT(!list_empty(&tcd->tcd_pages));
-		tage = cfs_tage_from_list(tcd->tcd_pages.prev);
-		if (tage->used + len <= PAGE_SIZE)
-			return tage;
-	}
-
-	if (tcd->tcd_cur_pages < tcd->tcd_max_pages) {
-		if (tcd->tcd_cur_stock_pages > 0) {
-			tage = cfs_tage_from_list(tcd->tcd_stock_pages.prev);
-			--tcd->tcd_cur_stock_pages;
-			list_del_init(&tage->linkage);
-		} else {
-			tage = cfs_tage_alloc(GFP_ATOMIC);
-			if (unlikely(!tage)) {
-				if (!(current->flags & PF_MEMALLOC) ||
-				    in_interrupt())
-					pr_warn_ratelimited("cannot allocate a tage (%ld)\n",
-							    tcd->tcd_cur_pages);
-				return NULL;
-			}
-		}
-
-		tage->used = 0;
-		tage->cpu = smp_processor_id();
-		tage->type = tcd->tcd_type;
-		list_add_tail(&tage->linkage, &tcd->tcd_pages);
-		tcd->tcd_cur_pages++;
-
-		if (tcd->tcd_cur_pages > 8 && thread_running) {
-			struct tracefiled_ctl *tctl = &trace_tctl;
-			/*
-			 * wake up tracefiled to process some pages.
-			 */
-			wake_up(&tctl->tctl_waitq);
-		}
-		return tage;
-	}
-	return NULL;
-}
-
-static void cfs_tcd_shrink(struct cfs_trace_cpu_data *tcd)
-{
-	int pgcount = tcd->tcd_cur_pages / 10;
-	struct page_collection pc;
-	struct cfs_trace_page *tage;
-	struct cfs_trace_page *tmp;
-
-	/*
-	 * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT)
-	 * from here: this will lead to infinite recursion.
-	 */
-
-	pr_warn_ratelimited("debug daemon buffer overflowed; discarding 10%% of pages (%d of %ld)\n",
-			    pgcount + 1, tcd->tcd_cur_pages);
-
-	INIT_LIST_HEAD(&pc.pc_pages);
-
-	list_for_each_entry_safe(tage, tmp, &tcd->tcd_pages, linkage) {
-		if (!pgcount--)
-			break;
-
-		list_move_tail(&tage->linkage, &pc.pc_pages);
-		tcd->tcd_cur_pages--;
-	}
-	put_pages_on_tcd_daemon_list(&pc, tcd);
-}
-
-/* return a page that has 'len' bytes left at the end */
-static struct cfs_trace_page *cfs_trace_get_tage(struct cfs_trace_cpu_data *tcd,
-						 unsigned long len)
-{
-	struct cfs_trace_page *tage;
-
-	/*
-	 * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT)
-	 * from here: this will lead to infinite recursion.
-	 */
-
-	if (len > PAGE_SIZE) {
-		pr_err("cowardly refusing to write %lu bytes in a page\n", len);
-		return NULL;
-	}
-
-	tage = cfs_trace_get_tage_try(tcd, len);
-	if (tage)
-		return tage;
-	if (thread_running)
-		cfs_tcd_shrink(tcd);
-	if (tcd->tcd_cur_pages > 0) {
-		tage = cfs_tage_from_list(tcd->tcd_pages.next);
-		tage->used = 0;
-		cfs_tage_to_tail(tage, &tcd->tcd_pages);
-	}
-	return tage;
-}
-
-int libcfs_debug_msg(struct libcfs_debug_msg_data *msgdata,
-		     const char *format, ...)
-{
-	va_list args;
-	int rc;
-
-	va_start(args, format);
-	rc = libcfs_debug_vmsg2(msgdata, format, args, NULL);
-	va_end(args);
-
-	return rc;
-}
-EXPORT_SYMBOL(libcfs_debug_msg);
-
-int libcfs_debug_vmsg2(struct libcfs_debug_msg_data *msgdata,
-		       const char *format1, va_list args,
-		       const char *format2, ...)
-{
-	struct cfs_trace_cpu_data *tcd = NULL;
-	struct ptldebug_header header = { 0 };
-	struct cfs_trace_page *tage;
-	/* string_buf is used only if tcd != NULL, and is always set then */
-	char *string_buf = NULL;
-	char *debug_buf;
-	int known_size;
-	int needed = 85; /* average message length */
-	int max_nob;
-	va_list ap;
-	int depth;
-	int i;
-	int remain;
-	int mask = msgdata->msg_mask;
-	const char *file = kbasename(msgdata->msg_file);
-	struct cfs_debug_limit_state *cdls = msgdata->msg_cdls;
-
-	tcd = cfs_trace_get_tcd();
-
-	/* cfs_trace_get_tcd() grabs a lock, which disables preemption and
-	 * pins us to a particular CPU.  This avoids an smp_processor_id()
-	 * warning on Linux when debugging is enabled.
-	 */
-	cfs_set_ptldebug_header(&header, msgdata, CDEBUG_STACK());
-
-	if (!tcd)		/* arch may not log in IRQ context */
-		goto console;
-
-	if (!tcd->tcd_cur_pages)
-		header.ph_flags |= PH_FLAG_FIRST_RECORD;
-
-	if (tcd->tcd_shutting_down) {
-		cfs_trace_put_tcd(tcd);
-		tcd = NULL;
-		goto console;
-	}
-
-	depth = 0;
-	known_size = strlen(file) + 1 + depth;
-	if (msgdata->msg_fn)
-		known_size += strlen(msgdata->msg_fn) + 1;
-
-	if (libcfs_debug_binary)
-		known_size += sizeof(header);
-
-	/*
-	 * '2' used because vsnprintf return real size required for output
-	 * _without_ terminating NULL.
-	 * if needed is to small for this format.
-	 */
-	for (i = 0; i < 2; i++) {
-		tage = cfs_trace_get_tage(tcd, needed + known_size + 1);
-		if (!tage) {
-			if (needed + known_size > PAGE_SIZE)
-				mask |= D_ERROR;
-
-			cfs_trace_put_tcd(tcd);
-			tcd = NULL;
-			goto console;
-		}
-
-		string_buf = (char *)page_address(tage->page) +
-					tage->used + known_size;
-
-		max_nob = PAGE_SIZE - tage->used - known_size;
-		if (max_nob <= 0) {
-			pr_emerg("negative max_nob: %d\n", max_nob);
-			mask |= D_ERROR;
-			cfs_trace_put_tcd(tcd);
-			tcd = NULL;
-			goto console;
-		}
-
-		needed = 0;
-		if (format1) {
-			va_copy(ap, args);
-			needed = vsnprintf(string_buf, max_nob, format1, ap);
-			va_end(ap);
-		}
-
-		if (format2) {
-			remain = max_nob - needed;
-			if (remain < 0)
-				remain = 0;
-
-			va_start(ap, format2);
-			needed += vsnprintf(string_buf + needed, remain,
-					    format2, ap);
-			va_end(ap);
-		}
-
-		if (needed < max_nob) /* well. printing ok.. */
-			break;
-	}
-
-	if (*(string_buf + needed - 1) != '\n')
-		pr_info("format at %s:%d:%s doesn't end in newline\n", file,
-			msgdata->msg_line, msgdata->msg_fn);
-
-	header.ph_len = known_size + needed;
-	debug_buf = (char *)page_address(tage->page) + tage->used;
-
-	if (libcfs_debug_binary) {
-		memcpy(debug_buf, &header, sizeof(header));
-		tage->used += sizeof(header);
-		debug_buf += sizeof(header);
-	}
-
-	/* indent message according to the nesting level */
-	while (depth-- > 0) {
-		*(debug_buf++) = '.';
-		++tage->used;
-	}
-
-	strcpy(debug_buf, file);
-	tage->used += strlen(file) + 1;
-	debug_buf += strlen(file) + 1;
-
-	if (msgdata->msg_fn) {
-		strcpy(debug_buf, msgdata->msg_fn);
-		tage->used += strlen(msgdata->msg_fn) + 1;
-		debug_buf += strlen(msgdata->msg_fn) + 1;
-	}
-
-	__LASSERT(debug_buf == string_buf);
-
-	tage->used += needed;
-	__LASSERT(tage->used <= PAGE_SIZE);
-
-console:
-	if (!(mask & libcfs_printk)) {
-		/* no console output requested */
-		if (tcd)
-			cfs_trace_put_tcd(tcd);
-		return 1;
-	}
-
-	if (cdls) {
-		if (libcfs_console_ratelimit &&
-		    cdls->cdls_next &&		/* not first time ever */
-		    !time_after(jiffies, cdls->cdls_next)) {
-			/* skipping a console message */
-			cdls->cdls_count++;
-			if (tcd)
-				cfs_trace_put_tcd(tcd);
-			return 1;
-		}
-
-		if (time_after(jiffies,
-			       cdls->cdls_next + libcfs_console_max_delay +
-			       10 * HZ)) {
-			/* last timeout was a long time ago */
-			cdls->cdls_delay /= libcfs_console_backoff * 4;
-		} else {
-			cdls->cdls_delay *= libcfs_console_backoff;
-		}
-
-		if (cdls->cdls_delay < libcfs_console_min_delay)
-			cdls->cdls_delay = libcfs_console_min_delay;
-		else if (cdls->cdls_delay > libcfs_console_max_delay)
-			cdls->cdls_delay = libcfs_console_max_delay;
-
-		/* ensure cdls_next is never zero after it's been seen */
-		cdls->cdls_next = (jiffies + cdls->cdls_delay) | 1;
-	}
-
-	if (tcd) {
-		cfs_print_to_console(&header, mask, string_buf, needed, file,
-				     msgdata->msg_fn);
-		cfs_trace_put_tcd(tcd);
-	} else {
-		string_buf = cfs_trace_get_console_buffer();
-
-		needed = 0;
-		if (format1) {
-			va_copy(ap, args);
-			needed = vsnprintf(string_buf,
-					   CFS_TRACE_CONSOLE_BUFFER_SIZE,
-					   format1, ap);
-			va_end(ap);
-		}
-		if (format2) {
-			remain = CFS_TRACE_CONSOLE_BUFFER_SIZE - needed;
-			if (remain > 0) {
-				va_start(ap, format2);
-				needed += vsnprintf(string_buf + needed, remain,
-						    format2, ap);
-				va_end(ap);
-			}
-		}
-		cfs_print_to_console(&header, mask,
-				     string_buf, needed, file, msgdata->msg_fn);
-
-		put_cpu();
-	}
-
-	if (cdls && cdls->cdls_count) {
-		string_buf = cfs_trace_get_console_buffer();
-
-		needed = snprintf(string_buf, CFS_TRACE_CONSOLE_BUFFER_SIZE,
-				  "Skipped %d previous similar message%s\n",
-				  cdls->cdls_count,
-				  (cdls->cdls_count > 1) ? "s" : "");
-
-		cfs_print_to_console(&header, mask,
-				     string_buf, needed, file, msgdata->msg_fn);
-
-		put_cpu();
-		cdls->cdls_count = 0;
-	}
-
-	return 0;
-}
-EXPORT_SYMBOL(libcfs_debug_vmsg2);
-
-void
-cfs_trace_assertion_failed(const char *str,
-			   struct libcfs_debug_msg_data *msgdata)
-{
-	struct ptldebug_header hdr;
-
-	libcfs_panic_in_progress = 1;
-	libcfs_catastrophe = 1;
-	mb();
-
-	cfs_set_ptldebug_header(&hdr, msgdata, CDEBUG_STACK());
-
-	cfs_print_to_console(&hdr, D_EMERG, str, strlen(str),
-			     msgdata->msg_file, msgdata->msg_fn);
-
-	panic("Lustre debug assertion failure\n");
-
-	/* not reached */
-}
-
-static void
-panic_collect_pages(struct page_collection *pc)
-{
-	/* Do the collect_pages job on a single CPU: assumes that all other
-	 * CPUs have been stopped during a panic.  If this isn't true for some
-	 * arch, this will have to be implemented separately in each arch.
-	 */
-	struct cfs_trace_cpu_data *tcd;
-	int i;
-	int j;
-
-	INIT_LIST_HEAD(&pc->pc_pages);
-
-	cfs_tcd_for_each(tcd, i, j) {
-		list_splice_init(&tcd->tcd_pages, &pc->pc_pages);
-		tcd->tcd_cur_pages = 0;
-
-		if (pc->pc_want_daemon_pages) {
-			list_splice_init(&tcd->tcd_daemon_pages, &pc->pc_pages);
-			tcd->tcd_cur_daemon_pages = 0;
-		}
-	}
-}
-
-static void collect_pages_on_all_cpus(struct page_collection *pc)
-{
-	struct cfs_trace_cpu_data *tcd;
-	int i, cpu;
-
-	for_each_possible_cpu(cpu) {
-		cfs_tcd_for_each_type_lock(tcd, i, cpu) {
-			list_splice_init(&tcd->tcd_pages, &pc->pc_pages);
-			tcd->tcd_cur_pages = 0;
-			if (pc->pc_want_daemon_pages) {
-				list_splice_init(&tcd->tcd_daemon_pages,
-						 &pc->pc_pages);
-				tcd->tcd_cur_daemon_pages = 0;
-			}
-		}
-	}
-}
-
-static void collect_pages(struct page_collection *pc)
-{
-	INIT_LIST_HEAD(&pc->pc_pages);
-
-	if (libcfs_panic_in_progress)
-		panic_collect_pages(pc);
-	else
-		collect_pages_on_all_cpus(pc);
-}
-
-static void put_pages_back_on_all_cpus(struct page_collection *pc)
-{
-	struct cfs_trace_cpu_data *tcd;
-	struct list_head *cur_head;
-	struct cfs_trace_page *tage;
-	struct cfs_trace_page *tmp;
-	int i, cpu;
-
-	for_each_possible_cpu(cpu) {
-		cfs_tcd_for_each_type_lock(tcd, i, cpu) {
-			cur_head = tcd->tcd_pages.next;
-
-			list_for_each_entry_safe(tage, tmp, &pc->pc_pages,
-						 linkage) {
-				__LASSERT_TAGE_INVARIANT(tage);
-
-				if (tage->cpu != cpu || tage->type != i)
-					continue;
-
-				cfs_tage_to_tail(tage, cur_head);
-				tcd->tcd_cur_pages++;
-			}
-		}
-	}
-}
-
-static void put_pages_back(struct page_collection *pc)
-{
-	if (!libcfs_panic_in_progress)
-		put_pages_back_on_all_cpus(pc);
-}
-
-/* Add pages to a per-cpu debug daemon ringbuffer.  This buffer makes sure that
- * we have a good amount of data at all times for dumping during an LBUG, even
- * if we have been steadily writing (and otherwise discarding) pages via the
- * debug daemon.
- */
-static void put_pages_on_tcd_daemon_list(struct page_collection *pc,
-					 struct cfs_trace_cpu_data *tcd)
-{
-	struct cfs_trace_page *tage;
-	struct cfs_trace_page *tmp;
-
-	list_for_each_entry_safe(tage, tmp, &pc->pc_pages, linkage) {
-		__LASSERT_TAGE_INVARIANT(tage);
-
-		if (tage->cpu != tcd->tcd_cpu || tage->type != tcd->tcd_type)
-			continue;
-
-		cfs_tage_to_tail(tage, &tcd->tcd_daemon_pages);
-		tcd->tcd_cur_daemon_pages++;
-
-		if (tcd->tcd_cur_daemon_pages > tcd->tcd_max_pages) {
-			struct cfs_trace_page *victim;
-
-			__LASSERT(!list_empty(&tcd->tcd_daemon_pages));
-			victim = cfs_tage_from_list(tcd->tcd_daemon_pages.next);
-
-			__LASSERT_TAGE_INVARIANT(victim);
-
-			list_del(&victim->linkage);
-			cfs_tage_free(victim);
-			tcd->tcd_cur_daemon_pages--;
-		}
-	}
-}
-
-static void put_pages_on_daemon_list(struct page_collection *pc)
-{
-	struct cfs_trace_cpu_data *tcd;
-	int i, cpu;
-
-	for_each_possible_cpu(cpu) {
-		cfs_tcd_for_each_type_lock(tcd, i, cpu)
-			put_pages_on_tcd_daemon_list(pc, tcd);
-	}
-}
-
-void cfs_trace_debug_print(void)
-{
-	struct page_collection pc;
-	struct cfs_trace_page *tage;
-	struct cfs_trace_page *tmp;
-
-	pc.pc_want_daemon_pages = 1;
-	collect_pages(&pc);
-	list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
-		char *p, *file, *fn;
-		struct page *page;
-
-		__LASSERT_TAGE_INVARIANT(tage);
-
-		page = tage->page;
-		p = page_address(page);
-		while (p < ((char *)page_address(page) + tage->used)) {
-			struct ptldebug_header *hdr;
-			int len;
-
-			hdr = (void *)p;
-			p += sizeof(*hdr);
-			file = p;
-			p += strlen(file) + 1;
-			fn = p;
-			p += strlen(fn) + 1;
-			len = hdr->ph_len - (int)(p - (char *)hdr);
-
-			cfs_print_to_console(hdr, D_EMERG, p, len, file, fn);
-
-			p += len;
-		}
-
-		list_del(&tage->linkage);
-		cfs_tage_free(tage);
-	}
-}
-
-int cfs_tracefile_dump_all_pages(char *filename)
-{
-	struct page_collection pc;
-	struct file *filp;
-	struct cfs_trace_page *tage;
-	struct cfs_trace_page *tmp;
-	char *buf;
-	mm_segment_t __oldfs;
-	int rc;
-
-	cfs_tracefile_write_lock();
-
-	filp = filp_open(filename, O_CREAT | O_EXCL | O_WRONLY | O_LARGEFILE,
-			 0600);
-	if (IS_ERR(filp)) {
-		rc = PTR_ERR(filp);
-		filp = NULL;
-		pr_err("LustreError: can't open %s for dump: rc %d\n",
-		       filename, rc);
-		goto out;
-	}
-
-	pc.pc_want_daemon_pages = 1;
-	collect_pages(&pc);
-	if (list_empty(&pc.pc_pages)) {
-		rc = 0;
-		goto close;
-	}
-	__oldfs = get_fs();
-	set_fs(get_ds());
-
-	/* ok, for now, just write the pages.  in the future we'll be building
-	 * iobufs with the pages and calling generic_direct_IO
-	 */
-	list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
-		__LASSERT_TAGE_INVARIANT(tage);
-
-		buf = kmap(tage->page);
-		rc = kernel_write(filp, buf, tage->used, &filp->f_pos);
-		kunmap(tage->page);
-
-		if (rc != (int)tage->used) {
-			pr_warn("wanted to write %u but wrote %d\n", tage->used,
-				rc);
-			put_pages_back(&pc);
-			__LASSERT(list_empty(&pc.pc_pages));
-			break;
-		}
-		list_del(&tage->linkage);
-		cfs_tage_free(tage);
-	}
-	set_fs(__oldfs);
-	rc = vfs_fsync(filp, 1);
-	if (rc)
-		pr_err("sync returns %d\n", rc);
-close:
-	filp_close(filp, NULL);
-out:
-	cfs_tracefile_write_unlock();
-	return rc;
-}
-
-void cfs_trace_flush_pages(void)
-{
-	struct page_collection pc;
-	struct cfs_trace_page *tage;
-	struct cfs_trace_page *tmp;
-
-	pc.pc_want_daemon_pages = 1;
-	collect_pages(&pc);
-	list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
-		__LASSERT_TAGE_INVARIANT(tage);
-
-		list_del(&tage->linkage);
-		cfs_tage_free(tage);
-	}
-}
-
-int cfs_trace_copyin_string(char *knl_buffer, int knl_buffer_nob,
-			    const char __user *usr_buffer, int usr_buffer_nob)
-{
-	int nob;
-
-	if (usr_buffer_nob > knl_buffer_nob)
-		return -EOVERFLOW;
-
-	if (copy_from_user((void *)knl_buffer,
-			   usr_buffer, usr_buffer_nob))
-		return -EFAULT;
-
-	nob = strnlen(knl_buffer, usr_buffer_nob);
-	while (--nob >= 0)		      /* strip trailing whitespace */
-		if (!isspace(knl_buffer[nob]))
-			break;
-
-	if (nob < 0)			    /* empty string */
-		return -EINVAL;
-
-	if (nob == knl_buffer_nob)	      /* no space to terminate */
-		return -EOVERFLOW;
-
-	knl_buffer[nob + 1] = 0;		/* terminate */
-	return 0;
-}
-EXPORT_SYMBOL(cfs_trace_copyin_string);
-
-int cfs_trace_copyout_string(char __user *usr_buffer, int usr_buffer_nob,
-			     const char *knl_buffer, char *append)
-{
-	/*
-	 * NB if 'append' != NULL, it's a single character to append to the
-	 * copied out string - usually "\n" or "" (i.e. a terminating zero byte)
-	 */
-	int nob = strlen(knl_buffer);
-
-	if (nob > usr_buffer_nob)
-		nob = usr_buffer_nob;
-
-	if (copy_to_user(usr_buffer, knl_buffer, nob))
-		return -EFAULT;
-
-	if (append && nob < usr_buffer_nob) {
-		if (copy_to_user(usr_buffer + nob, append, 1))
-			return -EFAULT;
-
-		nob++;
-	}
-
-	return nob;
-}
-EXPORT_SYMBOL(cfs_trace_copyout_string);
-
-int cfs_trace_allocate_string_buffer(char **str, int nob)
-{
-	if (nob > 2 * PAGE_SIZE)	    /* string must be "sensible" */
-		return -EINVAL;
-
-	*str = kmalloc(nob, GFP_KERNEL | __GFP_ZERO);
-	if (!*str)
-		return -ENOMEM;
-
-	return 0;
-}
-
-int cfs_trace_dump_debug_buffer_usrstr(void __user *usr_str, int usr_str_nob)
-{
-	char *str;
-	int rc;
-
-	rc = cfs_trace_allocate_string_buffer(&str, usr_str_nob + 1);
-	if (rc)
-		return rc;
-
-	rc = cfs_trace_copyin_string(str, usr_str_nob + 1,
-				     usr_str, usr_str_nob);
-	if (rc)
-		goto out;
-
-	if (str[0] != '/') {
-		rc = -EINVAL;
-		goto out;
-	}
-	rc = cfs_tracefile_dump_all_pages(str);
-out:
-	kfree(str);
-	return rc;
-}
-
-int cfs_trace_daemon_command(char *str)
-{
-	int rc = 0;
-
-	cfs_tracefile_write_lock();
-
-	if (!strcmp(str, "stop")) {
-		cfs_tracefile_write_unlock();
-		cfs_trace_stop_thread();
-		cfs_tracefile_write_lock();
-		memset(cfs_tracefile, 0, sizeof(cfs_tracefile));
-
-	} else if (!strncmp(str, "size=", 5)) {
-		unsigned long tmp;
-
-		rc = kstrtoul(str + 5, 10, &tmp);
-		if (!rc) {
-			if (tmp < 10 || tmp > 20480)
-				cfs_tracefile_size = CFS_TRACEFILE_SIZE;
-			else
-				cfs_tracefile_size = tmp << 20;
-		}
-	} else if (strlen(str) >= sizeof(cfs_tracefile)) {
-		rc = -ENAMETOOLONG;
-	} else if (str[0] != '/') {
-		rc = -EINVAL;
-	} else {
-		strcpy(cfs_tracefile, str);
-
-		pr_info("debug daemon will attempt to start writing to %s (%lukB max)\n",
-			cfs_tracefile,
-			(long)(cfs_tracefile_size >> 10));
-
-		cfs_trace_start_thread();
-	}
-
-	cfs_tracefile_write_unlock();
-	return rc;
-}
-
-int cfs_trace_daemon_command_usrstr(void __user *usr_str, int usr_str_nob)
-{
-	char *str;
-	int rc;
-
-	rc = cfs_trace_allocate_string_buffer(&str, usr_str_nob + 1);
-	if (rc)
-		return rc;
-
-	rc = cfs_trace_copyin_string(str, usr_str_nob + 1,
-				     usr_str, usr_str_nob);
-	if (!rc)
-		rc = cfs_trace_daemon_command(str);
-
-	kfree(str);
-	return rc;
-}
-
-int cfs_trace_set_debug_mb(int mb)
-{
-	int i;
-	int j;
-	int pages;
-	int limit = cfs_trace_max_debug_mb();
-	struct cfs_trace_cpu_data *tcd;
-
-	if (mb < num_possible_cpus()) {
-		pr_warn("%d MB is too small for debug buffer size, setting it to %d MB.\n",
-			mb, num_possible_cpus());
-		mb = num_possible_cpus();
-	}
-
-	if (mb > limit) {
-		pr_warn("%d MB is too large for debug buffer size, setting it to %d MB.\n",
-			mb, limit);
-		mb = limit;
-	}
-
-	mb /= num_possible_cpus();
-	pages = mb << (20 - PAGE_SHIFT);
-
-	cfs_tracefile_write_lock();
-
-	cfs_tcd_for_each(tcd, i, j)
-		tcd->tcd_max_pages = (pages * tcd->tcd_pages_factor) / 100;
-
-	cfs_tracefile_write_unlock();
-
-	return 0;
-}
-
-int cfs_trace_get_debug_mb(void)
-{
-	int i;
-	int j;
-	struct cfs_trace_cpu_data *tcd;
-	int total_pages = 0;
-
-	cfs_tracefile_read_lock();
-
-	cfs_tcd_for_each(tcd, i, j)
-		total_pages += tcd->tcd_max_pages;
-
-	cfs_tracefile_read_unlock();
-
-	return (total_pages >> (20 - PAGE_SHIFT)) + 1;
-}
-
-static int tracefiled(void *arg)
-{
-	struct page_collection pc;
-	struct tracefiled_ctl *tctl = arg;
-	struct cfs_trace_page *tage;
-	struct cfs_trace_page *tmp;
-	struct file *filp;
-	char *buf;
-	int last_loop = 0;
-	int rc;
-
-	/* we're started late enough that we pick up init's fs context */
-	/* this is so broken in uml?  what on earth is going on? */
-
-	complete(&tctl->tctl_start);
-
-	while (1) {
-		wait_queue_entry_t __wait;
-
-		pc.pc_want_daemon_pages = 0;
-		collect_pages(&pc);
-		if (list_empty(&pc.pc_pages))
-			goto end_loop;
-
-		filp = NULL;
-		cfs_tracefile_read_lock();
-		if (cfs_tracefile[0]) {
-			filp = filp_open(cfs_tracefile,
-					 O_CREAT | O_RDWR | O_LARGEFILE,
-					 0600);
-			if (IS_ERR(filp)) {
-				rc = PTR_ERR(filp);
-				filp = NULL;
-				pr_warn("couldn't open %s: %d\n", cfs_tracefile,
-					rc);
-			}
-		}
-		cfs_tracefile_read_unlock();
-		if (!filp) {
-			put_pages_on_daemon_list(&pc);
-			__LASSERT(list_empty(&pc.pc_pages));
-			goto end_loop;
-		}
-
-		list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
-			static loff_t f_pos;
-
-			__LASSERT_TAGE_INVARIANT(tage);
-
-			if (f_pos >= (off_t)cfs_tracefile_size)
-				f_pos = 0;
-			else if (f_pos > i_size_read(file_inode(filp)))
-				f_pos = i_size_read(file_inode(filp));
-
-			buf = kmap(tage->page);
-			rc = kernel_write(filp, buf, tage->used, &f_pos);
-			kunmap(tage->page);
-
-			if (rc != (int)tage->used) {
-				pr_warn("wanted to write %u but wrote %d\n",
-					tage->used, rc);
-				put_pages_back(&pc);
-				__LASSERT(list_empty(&pc.pc_pages));
-				break;
-			}
-		}
-
-		filp_close(filp, NULL);
-		put_pages_on_daemon_list(&pc);
-		if (!list_empty(&pc.pc_pages)) {
-			int i;
-
-			pr_alert("trace pages aren't empty\n");
-			pr_err("total cpus(%d): ", num_possible_cpus());
-			for (i = 0; i < num_possible_cpus(); i++)
-				if (cpu_online(i))
-					pr_cont("%d(on) ", i);
-				else
-					pr_cont("%d(off) ", i);
-			pr_cont("\n");
-
-			i = 0;
-			list_for_each_entry_safe(tage, tmp, &pc.pc_pages,
-						 linkage)
-				pr_err("page %d belongs to cpu %d\n",
-				       ++i, tage->cpu);
-			pr_err("There are %d pages unwritten\n", i);
-		}
-		__LASSERT(list_empty(&pc.pc_pages));
-end_loop:
-		if (atomic_read(&tctl->tctl_shutdown)) {
-			if (!last_loop) {
-				last_loop = 1;
-				continue;
-			} else {
-				break;
-			}
-		}
-		init_waitqueue_entry(&__wait, current);
-		add_wait_queue(&tctl->tctl_waitq, &__wait);
-		set_current_state(TASK_INTERRUPTIBLE);
-		schedule_timeout(HZ);
-		remove_wait_queue(&tctl->tctl_waitq, &__wait);
-	}
-	complete(&tctl->tctl_stop);
-	return 0;
-}
-
-int cfs_trace_start_thread(void)
-{
-	struct tracefiled_ctl *tctl = &trace_tctl;
-	struct task_struct *task;
-	int rc = 0;
-
-	mutex_lock(&cfs_trace_thread_mutex);
-	if (thread_running)
-		goto out;
-
-	init_completion(&tctl->tctl_start);
-	init_completion(&tctl->tctl_stop);
-	init_waitqueue_head(&tctl->tctl_waitq);
-	atomic_set(&tctl->tctl_shutdown, 0);
-
-	task = kthread_run(tracefiled, tctl, "ktracefiled");
-	if (IS_ERR(task)) {
-		rc = PTR_ERR(task);
-		goto out;
-	}
-
-	wait_for_completion(&tctl->tctl_start);
-	thread_running = 1;
-out:
-	mutex_unlock(&cfs_trace_thread_mutex);
-	return rc;
-}
-
-void cfs_trace_stop_thread(void)
-{
-	struct tracefiled_ctl *tctl = &trace_tctl;
-
-	mutex_lock(&cfs_trace_thread_mutex);
-	if (thread_running) {
-		pr_info("shutting down debug daemon thread...\n");
-		atomic_set(&tctl->tctl_shutdown, 1);
-		wait_for_completion(&tctl->tctl_stop);
-		thread_running = 0;
-	}
-	mutex_unlock(&cfs_trace_thread_mutex);
-}
-
-int cfs_tracefile_init(int max_pages)
-{
-	struct cfs_trace_cpu_data *tcd;
-	int i;
-	int j;
-	int rc;
-	int factor;
-
-	rc = cfs_tracefile_init_arch();
-	if (rc)
-		return rc;
-
-	cfs_tcd_for_each(tcd, i, j) {
-		/* tcd_pages_factor is initialized int tracefile_init_arch. */
-		factor = tcd->tcd_pages_factor;
-		INIT_LIST_HEAD(&tcd->tcd_pages);
-		INIT_LIST_HEAD(&tcd->tcd_stock_pages);
-		INIT_LIST_HEAD(&tcd->tcd_daemon_pages);
-		tcd->tcd_cur_pages = 0;
-		tcd->tcd_cur_stock_pages = 0;
-		tcd->tcd_cur_daemon_pages = 0;
-		tcd->tcd_max_pages = (max_pages * factor) / 100;
-		LASSERT(tcd->tcd_max_pages > 0);
-		tcd->tcd_shutting_down = 0;
-	}
-
-	return 0;
-}
-
-static void trace_cleanup_on_all_cpus(void)
-{
-	struct cfs_trace_cpu_data *tcd;
-	struct cfs_trace_page *tage;
-	struct cfs_trace_page *tmp;
-	int i, cpu;
-
-	for_each_possible_cpu(cpu) {
-		cfs_tcd_for_each_type_lock(tcd, i, cpu) {
-			tcd->tcd_shutting_down = 1;
-
-			list_for_each_entry_safe(tage, tmp, &tcd->tcd_pages,
-						 linkage) {
-				__LASSERT_TAGE_INVARIANT(tage);
-
-				list_del(&tage->linkage);
-				cfs_tage_free(tage);
-			}
-
-			tcd->tcd_cur_pages = 0;
-		}
-	}
-}
-
-static void cfs_trace_cleanup(void)
-{
-	struct page_collection pc;
-
-	INIT_LIST_HEAD(&pc.pc_pages);
-
-	trace_cleanup_on_all_cpus();
-
-	cfs_tracefile_fini_arch();
-}
-
-void cfs_tracefile_exit(void)
-{
-	cfs_trace_stop_thread();
-	cfs_trace_cleanup();
-}

+ 0 - 274
drivers/staging/lustre/lnet/libcfs/tracefile.h

@@ -1,274 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#ifndef __LIBCFS_TRACEFILE_H__
-#define __LIBCFS_TRACEFILE_H__
-
-#include <linux/spinlock.h>
-#include <linux/list.h>
-#include <linux/cache.h>
-#include <linux/threads.h>
-#include <linux/limits.h>
-#include <linux/smp.h>
-#include <linux/libcfs/libcfs.h>
-
-enum cfs_trace_buf_type {
-	CFS_TCD_TYPE_PROC = 0,
-	CFS_TCD_TYPE_SOFTIRQ,
-	CFS_TCD_TYPE_IRQ,
-	CFS_TCD_TYPE_MAX
-};
-
-/* trace file lock routines */
-
-#define TRACEFILE_NAME_SIZE 1024
-extern char cfs_tracefile[TRACEFILE_NAME_SIZE];
-extern long long cfs_tracefile_size;
-
-/**
- * The path of debug log dump upcall script.
- */
-extern char lnet_debug_log_upcall[1024];
-
-void libcfs_run_debug_log_upcall(char *file);
-
-int  cfs_tracefile_init_arch(void);
-void cfs_tracefile_fini_arch(void);
-
-void cfs_tracefile_read_lock(void);
-void cfs_tracefile_read_unlock(void);
-void cfs_tracefile_write_lock(void);
-void cfs_tracefile_write_unlock(void);
-
-int cfs_tracefile_dump_all_pages(char *filename);
-void cfs_trace_debug_print(void);
-void cfs_trace_flush_pages(void);
-int cfs_trace_start_thread(void);
-void cfs_trace_stop_thread(void);
-int cfs_tracefile_init(int max_pages);
-void cfs_tracefile_exit(void);
-
-int cfs_trace_copyin_string(char *knl_buffer, int knl_buffer_nob,
-			    const char __user *usr_buffer, int usr_buffer_nob);
-int cfs_trace_copyout_string(char __user *usr_buffer, int usr_buffer_nob,
-			     const char *knl_str, char *append);
-int cfs_trace_allocate_string_buffer(char **str, int nob);
-int cfs_trace_dump_debug_buffer_usrstr(void __user *usr_str, int usr_str_nob);
-int cfs_trace_daemon_command(char *str);
-int cfs_trace_daemon_command_usrstr(void __user *usr_str, int usr_str_nob);
-int cfs_trace_set_debug_mb(int mb);
-int cfs_trace_get_debug_mb(void);
-
-void libcfs_debug_dumplog_internal(void *arg);
-void libcfs_register_panic_notifier(void);
-void libcfs_unregister_panic_notifier(void);
-extern int libcfs_panic_in_progress;
-int cfs_trace_max_debug_mb(void);
-
-#define TCD_MAX_PAGES (5 << (20 - PAGE_SHIFT))
-#define TCD_STOCK_PAGES (TCD_MAX_PAGES)
-#define CFS_TRACEFILE_SIZE (500 << 20)
-
-#ifdef LUSTRE_TRACEFILE_PRIVATE
-
-/*
- * Private declare for tracefile
- */
-#define TCD_MAX_PAGES (5 << (20 - PAGE_SHIFT))
-#define TCD_STOCK_PAGES (TCD_MAX_PAGES)
-
-#define CFS_TRACEFILE_SIZE (500 << 20)
-
-/*
- * Size of a buffer for sprinting console messages if we can't get a page
- * from system
- */
-#define CFS_TRACE_CONSOLE_BUFFER_SIZE   1024
-
-union cfs_trace_data_union {
-	struct cfs_trace_cpu_data {
-		/*
-		 * Even though this structure is meant to be per-CPU, locking
-		 * is needed because in some places the data may be accessed
-		 * from other CPUs. This lock is directly used in trace_get_tcd
-		 * and trace_put_tcd, which are called in libcfs_debug_vmsg2 and
-		 * tcd_for_each_type_lock
-		 */
-		spinlock_t		tcd_lock;
-		unsigned long		tcd_lock_flags;
-
-		/*
-		 * pages with trace records not yet processed by tracefiled.
-		 */
-		struct list_head	tcd_pages;
-		/* number of pages on ->tcd_pages */
-		unsigned long		tcd_cur_pages;
-
-		/*
-		 * pages with trace records already processed by
-		 * tracefiled. These pages are kept in memory, so that some
-		 * portion of log can be written in the event of LBUG. This
-		 * list is maintained in LRU order.
-		 *
-		 * Pages are moved to ->tcd_daemon_pages by tracefiled()
-		 * (put_pages_on_daemon_list()). LRU pages from this list are
-		 * discarded when list grows too large.
-		 */
-		struct list_head	tcd_daemon_pages;
-		/* number of pages on ->tcd_daemon_pages */
-		unsigned long		tcd_cur_daemon_pages;
-
-		/*
-		 * Maximal number of pages allowed on ->tcd_pages and
-		 * ->tcd_daemon_pages each.
-		 * Always TCD_MAX_PAGES * tcd_pages_factor / 100 in current
-		 * implementation.
-		 */
-		unsigned long		tcd_max_pages;
-
-		/*
-		 * preallocated pages to write trace records into. Pages from
-		 * ->tcd_stock_pages are moved to ->tcd_pages by
-		 * portals_debug_msg().
-		 *
-		 * This list is necessary, because on some platforms it's
-		 * impossible to perform efficient atomic page allocation in a
-		 * non-blockable context.
-		 *
-		 * Such platforms fill ->tcd_stock_pages "on occasion", when
-		 * tracing code is entered in blockable context.
-		 *
-		 * trace_get_tage_try() tries to get a page from
-		 * ->tcd_stock_pages first and resorts to atomic page
-		 * allocation only if this queue is empty. ->tcd_stock_pages
-		 * is replenished when tracing code is entered in blocking
-		 * context (darwin-tracefile.c:trace_get_tcd()). We try to
-		 * maintain TCD_STOCK_PAGES (40 by default) pages in this
-		 * queue. Atomic allocation is only required if more than
-		 * TCD_STOCK_PAGES pagesful are consumed by trace records all
-		 * emitted in non-blocking contexts. Which is quite unlikely.
-		 */
-		struct list_head	tcd_stock_pages;
-		/* number of pages on ->tcd_stock_pages */
-		unsigned long		tcd_cur_stock_pages;
-
-		unsigned short		tcd_shutting_down;
-		unsigned short		tcd_cpu;
-		unsigned short		tcd_type;
-		/* The factors to share debug memory. */
-		unsigned short		tcd_pages_factor;
-	} tcd;
-	char __pad[L1_CACHE_ALIGN(sizeof(struct cfs_trace_cpu_data))];
-};
-
-#define TCD_MAX_TYPES      8
-extern union cfs_trace_data_union (*cfs_trace_data[TCD_MAX_TYPES])[NR_CPUS];
-
-#define cfs_tcd_for_each(tcd, i, j)				       \
-	for (i = 0; cfs_trace_data[i]; i++)				\
-		for (j = 0, ((tcd) = &(*cfs_trace_data[i])[j].tcd);	\
-		     j < num_possible_cpus();				 \
-		     j++, (tcd) = &(*cfs_trace_data[i])[j].tcd)
-
-#define cfs_tcd_for_each_type_lock(tcd, i, cpu)			   \
-	for (i = 0; cfs_trace_data[i] &&				\
-	     (tcd = &(*cfs_trace_data[i])[cpu].tcd) &&			\
-	     cfs_trace_lock_tcd(tcd, 1); cfs_trace_unlock_tcd(tcd, 1), i++)
-
-void cfs_set_ptldebug_header(struct ptldebug_header *header,
-			     struct libcfs_debug_msg_data *m,
-			     unsigned long stack);
-void cfs_print_to_console(struct ptldebug_header *hdr, int mask,
-			  const char *buf, int len, const char *file,
-			  const char *fn);
-
-int cfs_trace_lock_tcd(struct cfs_trace_cpu_data *tcd, int walking);
-void cfs_trace_unlock_tcd(struct cfs_trace_cpu_data *tcd, int walking);
-
-extern char *cfs_trace_console_buffers[NR_CPUS][CFS_TCD_TYPE_MAX];
-enum cfs_trace_buf_type cfs_trace_buf_idx_get(void);
-
-static inline char *
-cfs_trace_get_console_buffer(void)
-{
-	unsigned int i = get_cpu();
-	unsigned int j = cfs_trace_buf_idx_get();
-
-	return cfs_trace_console_buffers[i][j];
-}
-
-static inline struct cfs_trace_cpu_data *
-cfs_trace_get_tcd(void)
-{
-	struct cfs_trace_cpu_data *tcd =
-		&(*cfs_trace_data[cfs_trace_buf_idx_get()])[get_cpu()].tcd;
-
-	cfs_trace_lock_tcd(tcd, 0);
-
-	return tcd;
-}
-
-static inline void cfs_trace_put_tcd(struct cfs_trace_cpu_data *tcd)
-{
-	cfs_trace_unlock_tcd(tcd, 0);
-
-	put_cpu();
-}
-
-int cfs_trace_refill_stock(struct cfs_trace_cpu_data *tcd, gfp_t gfp,
-			   struct list_head *stock);
-
-void cfs_trace_assertion_failed(const char *str,
-				struct libcfs_debug_msg_data *m);
-
-/* ASSERTION that is safe to use within the debug system */
-#define __LASSERT(cond)						 \
-do {								    \
-	if (unlikely(!(cond))) {					\
-		LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, D_EMERG, NULL);     \
-		cfs_trace_assertion_failed("ASSERTION("#cond") failed", \
-					   &msgdata);		   \
-	}							       \
-} while (0)
-
-#define __LASSERT_TAGE_INVARIANT(tage)				  \
-do {								    \
-	__LASSERT(tage);					\
-	__LASSERT(tage->page);				  \
-	__LASSERT(tage->used <= PAGE_SIZE);			 \
-	__LASSERT(page_count(tage->page) > 0);		      \
-} while (0)
-
-#endif	/* LUSTRE_TRACEFILE_PRIVATE */
-
-#endif /* __LIBCFS_TRACEFILE_H__ */

+ 0 - 10
drivers/staging/lustre/lnet/lnet/Makefile

@@ -1,10 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/include
-subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/lustre/include
-
-obj-$(CONFIG_LNET) += lnet.o
-
-lnet-y := api-ni.o config.o nidstrings.o net_fault.o		\
-	  lib-me.o lib-msg.o lib-eq.o lib-md.o lib-ptl.o	\
-	  lib-socket.o lib-move.o module.o lo.o			\
-	  router.o router_proc.o acceptor.o peer.o

+ 0 - 501
drivers/staging/lustre/lnet/lnet/acceptor.c

@@ -1,501 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-#include <linux/completion.h>
-#include <net/sock.h>
-#include <linux/lnet/lib-lnet.h>
-
-static int   accept_port    = 988;
-static int   accept_backlog = 127;
-static int   accept_timeout = 5;
-
-static struct {
-	int			pta_shutdown;
-	struct socket		*pta_sock;
-	struct completion	pta_signal;
-} lnet_acceptor_state = {
-	.pta_shutdown = 1
-};
-
-int
-lnet_acceptor_port(void)
-{
-	return accept_port;
-}
-EXPORT_SYMBOL(lnet_acceptor_port);
-
-static inline int
-lnet_accept_magic(__u32 magic, __u32 constant)
-{
-	return (magic == constant ||
-		magic == __swab32(constant));
-}
-
-static char *accept = "secure";
-
-module_param(accept, charp, 0444);
-MODULE_PARM_DESC(accept, "Accept connections (secure|all|none)");
-module_param(accept_port, int, 0444);
-MODULE_PARM_DESC(accept_port, "Acceptor's port (same on all nodes)");
-module_param(accept_backlog, int, 0444);
-MODULE_PARM_DESC(accept_backlog, "Acceptor's listen backlog");
-module_param(accept_timeout, int, 0644);
-MODULE_PARM_DESC(accept_timeout, "Acceptor's timeout (seconds)");
-
-static char *accept_type;
-
-static int
-lnet_acceptor_get_tunables(void)
-{
-	/*
-	 * Userland acceptor uses 'accept_type' instead of 'accept', due to
-	 * conflict with 'accept(2)', but kernel acceptor still uses 'accept'
-	 * for compatibility. Hence the trick.
-	 */
-	accept_type = accept;
-	return 0;
-}
-
-int
-lnet_acceptor_timeout(void)
-{
-	return accept_timeout;
-}
-EXPORT_SYMBOL(lnet_acceptor_timeout);
-
-void
-lnet_connect_console_error(int rc, lnet_nid_t peer_nid,
-			   __u32 peer_ip, int peer_port)
-{
-	switch (rc) {
-	/* "normal" errors */
-	case -ECONNREFUSED:
-		CNETERR("Connection to %s at host %pI4h on port %d was refused: check that Lustre is running on that node.\n",
-			libcfs_nid2str(peer_nid),
-			&peer_ip, peer_port);
-		break;
-	case -EHOSTUNREACH:
-	case -ENETUNREACH:
-		CNETERR("Connection to %s at host %pI4h was unreachable: the network or that node may be down, or Lustre may be misconfigured.\n",
-			libcfs_nid2str(peer_nid), &peer_ip);
-		break;
-	case -ETIMEDOUT:
-		CNETERR("Connection to %s at host %pI4h on port %d took too long: that node may be hung or experiencing high load.\n",
-			libcfs_nid2str(peer_nid),
-			&peer_ip, peer_port);
-		break;
-	case -ECONNRESET:
-		LCONSOLE_ERROR_MSG(0x11b, "Connection to %s at host %pI4h on port %d was reset: is it running a compatible version of Lustre and is %s one of its NIDs?\n",
-				   libcfs_nid2str(peer_nid),
-				   &peer_ip, peer_port,
-				   libcfs_nid2str(peer_nid));
-		break;
-	case -EPROTO:
-		LCONSOLE_ERROR_MSG(0x11c, "Protocol error connecting to %s at host %pI4h on port %d: is it running a compatible version of Lustre?\n",
-				   libcfs_nid2str(peer_nid),
-				   &peer_ip, peer_port);
-		break;
-	case -EADDRINUSE:
-		LCONSOLE_ERROR_MSG(0x11d, "No privileged ports available to connect to %s at host %pI4h on port %d\n",
-				   libcfs_nid2str(peer_nid),
-				   &peer_ip, peer_port);
-		break;
-	default:
-		LCONSOLE_ERROR_MSG(0x11e, "Unexpected error %d connecting to %s at host %pI4h on port %d\n",
-				   rc, libcfs_nid2str(peer_nid),
-				   &peer_ip, peer_port);
-		break;
-	}
-}
-EXPORT_SYMBOL(lnet_connect_console_error);
-
-int
-lnet_connect(struct socket **sockp, lnet_nid_t peer_nid,
-	     __u32 local_ip, __u32 peer_ip, int peer_port)
-{
-	struct lnet_acceptor_connreq cr;
-	struct socket *sock;
-	int rc;
-	int port;
-	int fatal;
-
-	BUILD_BUG_ON(sizeof(cr) > 16);	    /* too big to be on the stack */
-
-	for (port = LNET_ACCEPTOR_MAX_RESERVED_PORT;
-	     port >= LNET_ACCEPTOR_MIN_RESERVED_PORT;
-	     --port) {
-		/* Iterate through reserved ports. */
-
-		rc = lnet_sock_connect(&sock, &fatal, local_ip, port, peer_ip,
-				       peer_port);
-		if (rc) {
-			if (fatal)
-				goto failed;
-			continue;
-		}
-
-		BUILD_BUG_ON(LNET_PROTO_ACCEPTOR_VERSION != 1);
-
-		cr.acr_magic   = LNET_PROTO_ACCEPTOR_MAGIC;
-		cr.acr_version = LNET_PROTO_ACCEPTOR_VERSION;
-		cr.acr_nid     = peer_nid;
-
-		if (the_lnet.ln_testprotocompat) {
-			/* single-shot proto check */
-			lnet_net_lock(LNET_LOCK_EX);
-			if (the_lnet.ln_testprotocompat & 4) {
-				cr.acr_version++;
-				the_lnet.ln_testprotocompat &= ~4;
-			}
-			if (the_lnet.ln_testprotocompat & 8) {
-				cr.acr_magic = LNET_PROTO_MAGIC;
-				the_lnet.ln_testprotocompat &= ~8;
-			}
-			lnet_net_unlock(LNET_LOCK_EX);
-		}
-
-		rc = lnet_sock_write(sock, &cr, sizeof(cr), accept_timeout);
-		if (rc)
-			goto failed_sock;
-
-		*sockp = sock;
-		return 0;
-	}
-
-	rc = -EADDRINUSE;
-	goto failed;
-
- failed_sock:
-	sock_release(sock);
- failed:
-	lnet_connect_console_error(rc, peer_nid, peer_ip, peer_port);
-	return rc;
-}
-EXPORT_SYMBOL(lnet_connect);
-
-static int
-lnet_accept(struct socket *sock, __u32 magic)
-{
-	struct lnet_acceptor_connreq cr;
-	__u32 peer_ip;
-	int peer_port;
-	int rc;
-	int flip;
-	struct lnet_ni *ni;
-	char *str;
-
-	LASSERT(sizeof(cr) <= 16);	     /* not too big for the stack */
-
-	rc = lnet_sock_getaddr(sock, 1, &peer_ip, &peer_port);
-	LASSERT(!rc);		      /* we succeeded before */
-
-	if (!lnet_accept_magic(magic, LNET_PROTO_ACCEPTOR_MAGIC)) {
-		if (lnet_accept_magic(magic, LNET_PROTO_MAGIC)) {
-			/*
-			 * future version compatibility!
-			 * When LNET unifies protocols over all LNDs, the first
-			 * thing sent will be a version query. I send back
-			 * LNET_PROTO_ACCEPTOR_MAGIC to tell her I'm "old"
-			 */
-			memset(&cr, 0, sizeof(cr));
-			cr.acr_magic = LNET_PROTO_ACCEPTOR_MAGIC;
-			cr.acr_version = LNET_PROTO_ACCEPTOR_VERSION;
-			rc = lnet_sock_write(sock, &cr, sizeof(cr),
-					     accept_timeout);
-
-			if (rc)
-				CERROR("Error sending magic+version in response to LNET magic from %pI4h: %d\n",
-				       &peer_ip, rc);
-			return -EPROTO;
-		}
-
-		if (lnet_accept_magic(magic, LNET_PROTO_TCP_MAGIC))
-			str = "'old' socknal/tcpnal";
-		else
-			str = "unrecognised";
-
-		LCONSOLE_ERROR_MSG(0x11f, "Refusing connection from %pI4h magic %08x: %s acceptor protocol\n",
-				   &peer_ip, magic, str);
-		return -EPROTO;
-	}
-
-	flip = (magic != LNET_PROTO_ACCEPTOR_MAGIC);
-
-	rc = lnet_sock_read(sock, &cr.acr_version, sizeof(cr.acr_version),
-			    accept_timeout);
-	if (rc) {
-		CERROR("Error %d reading connection request version from %pI4h\n",
-		       rc, &peer_ip);
-		return -EIO;
-	}
-
-	if (flip)
-		__swab32s(&cr.acr_version);
-
-	if (cr.acr_version != LNET_PROTO_ACCEPTOR_VERSION) {
-		/*
-		 * future version compatibility!
-		 * An acceptor-specific protocol rev will first send a version
-		 * query.  I send back my current version to tell her I'm
-		 * "old".
-		 */
-		int peer_version = cr.acr_version;
-
-		memset(&cr, 0, sizeof(cr));
-		cr.acr_magic = LNET_PROTO_ACCEPTOR_MAGIC;
-		cr.acr_version = LNET_PROTO_ACCEPTOR_VERSION;
-
-		rc = lnet_sock_write(sock, &cr, sizeof(cr), accept_timeout);
-		if (rc)
-			CERROR("Error sending magic+version in response to version %d from %pI4h: %d\n",
-			       peer_version, &peer_ip, rc);
-		return -EPROTO;
-	}
-
-	rc = lnet_sock_read(sock, &cr.acr_nid,
-			    sizeof(cr) -
-			    offsetof(struct lnet_acceptor_connreq, acr_nid),
-			    accept_timeout);
-	if (rc) {
-		CERROR("Error %d reading connection request from %pI4h\n",
-		       rc, &peer_ip);
-		return -EIO;
-	}
-
-	if (flip)
-		__swab64s(&cr.acr_nid);
-
-	ni = lnet_net2ni(LNET_NIDNET(cr.acr_nid));
-	if (!ni ||	       /* no matching net */
-	    ni->ni_nid != cr.acr_nid) { /* right NET, wrong NID! */
-		if (ni)
-			lnet_ni_decref(ni);
-		LCONSOLE_ERROR_MSG(0x120, "Refusing connection from %pI4h for %s: No matching NI\n",
-				   &peer_ip, libcfs_nid2str(cr.acr_nid));
-		return -EPERM;
-	}
-
-	if (!ni->ni_lnd->lnd_accept) {
-		/* This catches a request for the loopback LND */
-		lnet_ni_decref(ni);
-		LCONSOLE_ERROR_MSG(0x121, "Refusing connection from %pI4h for %s: NI doesn not accept IP connections\n",
-				   &peer_ip, libcfs_nid2str(cr.acr_nid));
-		return -EPERM;
-	}
-
-	CDEBUG(D_NET, "Accept %s from %pI4h\n",
-	       libcfs_nid2str(cr.acr_nid), &peer_ip);
-
-	rc = ni->ni_lnd->lnd_accept(ni, sock);
-
-	lnet_ni_decref(ni);
-	return rc;
-}
-
-static int
-lnet_acceptor(void *arg)
-{
-	struct socket *newsock;
-	int rc;
-	__u32 magic;
-	__u32 peer_ip;
-	int peer_port;
-	int secure = (int)((long)arg);
-
-	LASSERT(!lnet_acceptor_state.pta_sock);
-
-	rc = lnet_sock_listen(&lnet_acceptor_state.pta_sock, 0, accept_port,
-			      accept_backlog);
-	if (rc) {
-		if (rc == -EADDRINUSE)
-			LCONSOLE_ERROR_MSG(0x122, "Can't start acceptor on port %d: port already in use\n",
-					   accept_port);
-		else
-			LCONSOLE_ERROR_MSG(0x123, "Can't start acceptor on port %d: unexpected error %d\n",
-					   accept_port, rc);
-
-		lnet_acceptor_state.pta_sock = NULL;
-	} else {
-		LCONSOLE(0, "Accept %s, port %d\n", accept_type, accept_port);
-	}
-
-	/* set init status and unblock parent */
-	lnet_acceptor_state.pta_shutdown = rc;
-	complete(&lnet_acceptor_state.pta_signal);
-
-	if (rc)
-		return rc;
-
-	while (!lnet_acceptor_state.pta_shutdown) {
-		rc = lnet_sock_accept(&newsock, lnet_acceptor_state.pta_sock);
-		if (rc) {
-			if (rc != -EAGAIN) {
-				CWARN("Accept error %d: pausing...\n", rc);
-				set_current_state(TASK_UNINTERRUPTIBLE);
-				schedule_timeout(HZ);
-			}
-			continue;
-		}
-
-		/* maybe the LNet acceptor thread has been waken */
-		if (lnet_acceptor_state.pta_shutdown) {
-			sock_release(newsock);
-			break;
-		}
-
-		rc = lnet_sock_getaddr(newsock, 1, &peer_ip, &peer_port);
-		if (rc) {
-			CERROR("Can't determine new connection's address\n");
-			goto failed;
-		}
-
-		if (secure && peer_port > LNET_ACCEPTOR_MAX_RESERVED_PORT) {
-			CERROR("Refusing connection from %pI4h: insecure port %d\n",
-			       &peer_ip, peer_port);
-			goto failed;
-		}
-
-		rc = lnet_sock_read(newsock, &magic, sizeof(magic),
-				    accept_timeout);
-		if (rc) {
-			CERROR("Error %d reading connection request from %pI4h\n",
-			       rc, &peer_ip);
-			goto failed;
-		}
-
-		rc = lnet_accept(newsock, magic);
-		if (rc)
-			goto failed;
-
-		continue;
-
-failed:
-		sock_release(newsock);
-	}
-
-	sock_release(lnet_acceptor_state.pta_sock);
-	lnet_acceptor_state.pta_sock = NULL;
-
-	CDEBUG(D_NET, "Acceptor stopping\n");
-
-	/* unblock lnet_acceptor_stop() */
-	complete(&lnet_acceptor_state.pta_signal);
-	return 0;
-}
-
-static inline int
-accept2secure(const char *acc, long *sec)
-{
-	if (!strcmp(acc, "secure")) {
-		*sec = 1;
-		return 1;
-	} else if (!strcmp(acc, "all")) {
-		*sec = 0;
-		return 1;
-	} else if (!strcmp(acc, "none")) {
-		return 0;
-	}
-
-	LCONSOLE_ERROR_MSG(0x124, "Can't parse 'accept=\"%s\"'\n",
-			   acc);
-	return -EINVAL;
-}
-
-int
-lnet_acceptor_start(void)
-{
-	struct task_struct *task;
-	int rc;
-	long rc2;
-	long secure;
-
-	/* if acceptor is already running return immediately */
-	if (!lnet_acceptor_state.pta_shutdown)
-		return 0;
-
-	LASSERT(!lnet_acceptor_state.pta_sock);
-
-	rc = lnet_acceptor_get_tunables();
-	if (rc)
-		return rc;
-
-	init_completion(&lnet_acceptor_state.pta_signal);
-	rc = accept2secure(accept_type, &secure);
-	if (rc <= 0)
-		return rc;
-
-	if (!lnet_count_acceptor_nis())  /* not required */
-		return 0;
-
-	task = kthread_run(lnet_acceptor, (void *)(uintptr_t)secure,
-			   "acceptor_%03ld", secure);
-	if (IS_ERR(task)) {
-		rc2 = PTR_ERR(task);
-		CERROR("Can't start acceptor thread: %ld\n", rc2);
-
-		return -ESRCH;
-	}
-
-	/* wait for acceptor to startup */
-	wait_for_completion(&lnet_acceptor_state.pta_signal);
-
-	if (!lnet_acceptor_state.pta_shutdown) {
-		/* started OK */
-		LASSERT(lnet_acceptor_state.pta_sock);
-		return 0;
-	}
-
-	LASSERT(!lnet_acceptor_state.pta_sock);
-
-	return -ENETDOWN;
-}
-
-void
-lnet_acceptor_stop(void)
-{
-	struct sock *sk;
-
-	if (lnet_acceptor_state.pta_shutdown) /* not running */
-		return;
-
-	lnet_acceptor_state.pta_shutdown = 1;
-
-	sk = lnet_acceptor_state.pta_sock->sk;
-
-	/* awake any sleepers using safe method */
-	sk->sk_state_change(sk);
-
-	/* block until acceptor signals exit */
-	wait_for_completion(&lnet_acceptor_state.pta_signal);
-}

+ 0 - 2307
drivers/staging/lustre/lnet/lnet/api-ni.c

@@ -1,2307 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-#include <linux/log2.h>
-#include <linux/ktime.h>
-
-#include <linux/lnet/lib-lnet.h>
-#include <uapi/linux/lnet/lnet-dlc.h>
-
-#define D_LNI D_CONSOLE
-
-struct lnet the_lnet;		/* THE state of the network */
-EXPORT_SYMBOL(the_lnet);
-
-static char *ip2nets = "";
-module_param(ip2nets, charp, 0444);
-MODULE_PARM_DESC(ip2nets, "LNET network <- IP table");
-
-static char *networks = "";
-module_param(networks, charp, 0444);
-MODULE_PARM_DESC(networks, "local networks");
-
-static char *routes = "";
-module_param(routes, charp, 0444);
-MODULE_PARM_DESC(routes, "routes to non-local networks");
-
-static int rnet_htable_size = LNET_REMOTE_NETS_HASH_DEFAULT;
-module_param(rnet_htable_size, int, 0444);
-MODULE_PARM_DESC(rnet_htable_size, "size of remote network hash table");
-
-static int lnet_ping(struct lnet_process_id id, int timeout_ms,
-		     struct lnet_process_id __user *ids, int n_ids);
-
-static char *
-lnet_get_routes(void)
-{
-	return routes;
-}
-
-static char *
-lnet_get_networks(void)
-{
-	char *nets;
-	int rc;
-
-	if (*networks && *ip2nets) {
-		LCONSOLE_ERROR_MSG(0x101, "Please specify EITHER 'networks' or 'ip2nets' but not both at once\n");
-		return NULL;
-	}
-
-	if (*ip2nets) {
-		rc = lnet_parse_ip2nets(&nets, ip2nets);
-		return !rc ? nets : NULL;
-	}
-
-	if (*networks)
-		return networks;
-
-	return "tcp";
-}
-
-static void
-lnet_init_locks(void)
-{
-	spin_lock_init(&the_lnet.ln_eq_wait_lock);
-	init_waitqueue_head(&the_lnet.ln_eq_waitq);
-	init_waitqueue_head(&the_lnet.ln_rc_waitq);
-	mutex_init(&the_lnet.ln_lnd_mutex);
-	mutex_init(&the_lnet.ln_api_mutex);
-}
-
-static int
-lnet_create_remote_nets_table(void)
-{
-	int i;
-	struct list_head *hash;
-
-	LASSERT(!the_lnet.ln_remote_nets_hash);
-	LASSERT(the_lnet.ln_remote_nets_hbits > 0);
-	hash = kvmalloc_array(LNET_REMOTE_NETS_HASH_SIZE, sizeof(*hash),
-			      GFP_KERNEL);
-	if (!hash) {
-		CERROR("Failed to create remote nets hash table\n");
-		return -ENOMEM;
-	}
-
-	for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++)
-		INIT_LIST_HEAD(&hash[i]);
-	the_lnet.ln_remote_nets_hash = hash;
-	return 0;
-}
-
-static void
-lnet_destroy_remote_nets_table(void)
-{
-	int i;
-
-	if (!the_lnet.ln_remote_nets_hash)
-		return;
-
-	for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++)
-		LASSERT(list_empty(&the_lnet.ln_remote_nets_hash[i]));
-
-	kvfree(the_lnet.ln_remote_nets_hash);
-	the_lnet.ln_remote_nets_hash = NULL;
-}
-
-static void
-lnet_destroy_locks(void)
-{
-	if (the_lnet.ln_res_lock) {
-		cfs_percpt_lock_free(the_lnet.ln_res_lock);
-		the_lnet.ln_res_lock = NULL;
-	}
-
-	if (the_lnet.ln_net_lock) {
-		cfs_percpt_lock_free(the_lnet.ln_net_lock);
-		the_lnet.ln_net_lock = NULL;
-	}
-}
-
-static int
-lnet_create_locks(void)
-{
-	lnet_init_locks();
-
-	the_lnet.ln_res_lock = cfs_percpt_lock_alloc(lnet_cpt_table());
-	if (!the_lnet.ln_res_lock)
-		goto failed;
-
-	the_lnet.ln_net_lock = cfs_percpt_lock_alloc(lnet_cpt_table());
-	if (!the_lnet.ln_net_lock)
-		goto failed;
-
-	return 0;
-
- failed:
-	lnet_destroy_locks();
-	return -ENOMEM;
-}
-
-static void lnet_assert_wire_constants(void)
-{
-	/*
-	 * Wire protocol assertions generated by 'wirecheck'
-	 * running on Linux robert.bartonsoftware.com 2.6.8-1.521
-	 * #1 Mon Aug 16 09:01:18 EDT 2004 i686 athlon i386 GNU/Linux
-	 * with gcc version 3.3.3 20040412 (Red Hat Linux 3.3.3-7)
-	 */
-
-	/* Constants... */
-	BUILD_BUG_ON(LNET_PROTO_TCP_MAGIC != 0xeebc0ded);
-	BUILD_BUG_ON(LNET_PROTO_TCP_VERSION_MAJOR != 1);
-	BUILD_BUG_ON(LNET_PROTO_TCP_VERSION_MINOR != 0);
-	BUILD_BUG_ON(LNET_MSG_ACK != 0);
-	BUILD_BUG_ON(LNET_MSG_PUT != 1);
-	BUILD_BUG_ON(LNET_MSG_GET != 2);
-	BUILD_BUG_ON(LNET_MSG_REPLY != 3);
-	BUILD_BUG_ON(LNET_MSG_HELLO != 4);
-
-	/* Checks for struct ptl_handle_wire_t */
-	BUILD_BUG_ON((int)sizeof(struct lnet_handle_wire) != 16);
-	BUILD_BUG_ON((int)offsetof(struct lnet_handle_wire, wh_interface_cookie) != 0);
-	BUILD_BUG_ON((int)sizeof(((struct lnet_handle_wire *)0)->wh_interface_cookie) != 8);
-	BUILD_BUG_ON((int)offsetof(struct lnet_handle_wire, wh_object_cookie) != 8);
-	BUILD_BUG_ON((int)sizeof(((struct lnet_handle_wire *)0)->wh_object_cookie) != 8);
-
-	/* Checks for struct struct lnet_magicversion */
-	BUILD_BUG_ON((int)sizeof(struct lnet_magicversion) != 8);
-	BUILD_BUG_ON((int)offsetof(struct lnet_magicversion, magic) != 0);
-	BUILD_BUG_ON((int)sizeof(((struct lnet_magicversion *)0)->magic) != 4);
-	BUILD_BUG_ON((int)offsetof(struct lnet_magicversion, version_major) != 4);
-	BUILD_BUG_ON((int)sizeof(((struct lnet_magicversion *)0)->version_major) != 2);
-	BUILD_BUG_ON((int)offsetof(struct lnet_magicversion, version_minor) != 6);
-	BUILD_BUG_ON((int)sizeof(((struct lnet_magicversion *)0)->version_minor) != 2);
-
-	/* Checks for struct struct lnet_hdr */
-	BUILD_BUG_ON((int)sizeof(struct lnet_hdr) != 72);
-	BUILD_BUG_ON((int)offsetof(struct lnet_hdr, dest_nid) != 0);
-	BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->dest_nid) != 8);
-	BUILD_BUG_ON((int)offsetof(struct lnet_hdr, src_nid) != 8);
-	BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->src_nid) != 8);
-	BUILD_BUG_ON((int)offsetof(struct lnet_hdr, dest_pid) != 16);
-	BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->dest_pid) != 4);
-	BUILD_BUG_ON((int)offsetof(struct lnet_hdr, src_pid) != 20);
-	BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->src_pid) != 4);
-	BUILD_BUG_ON((int)offsetof(struct lnet_hdr, type) != 24);
-	BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->type) != 4);
-	BUILD_BUG_ON((int)offsetof(struct lnet_hdr, payload_length) != 28);
-	BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->payload_length) != 4);
-	BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg) != 32);
-	BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg) != 40);
-
-	/* Ack */
-	BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.ack.dst_wmd) != 32);
-	BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.ack.dst_wmd) != 16);
-	BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.ack.match_bits) != 48);
-	BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.ack.match_bits) != 8);
-	BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.ack.mlength) != 56);
-	BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.ack.mlength) != 4);
-
-	/* Put */
-	BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.put.ack_wmd) != 32);
-	BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.put.ack_wmd) != 16);
-	BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.put.match_bits) != 48);
-	BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.put.match_bits) != 8);
-	BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.put.hdr_data) != 56);
-	BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.put.hdr_data) != 8);
-	BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.put.ptl_index) != 64);
-	BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.put.ptl_index) != 4);
-	BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.put.offset) != 68);
-	BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.put.offset) != 4);
-
-	/* Get */
-	BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.get.return_wmd) != 32);
-	BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.get.return_wmd) != 16);
-	BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.get.match_bits) != 48);
-	BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.get.match_bits) != 8);
-	BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.get.ptl_index) != 56);
-	BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.get.ptl_index) != 4);
-	BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.get.src_offset) != 60);
-	BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.get.src_offset) != 4);
-	BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.get.sink_length) != 64);
-	BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.get.sink_length) != 4);
-
-	/* Reply */
-	BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.reply.dst_wmd) != 32);
-	BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.reply.dst_wmd) != 16);
-
-	/* Hello */
-	BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.hello.incarnation) != 32);
-	BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.hello.incarnation) != 8);
-	BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.hello.type) != 40);
-	BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.hello.type) != 4);
-}
-
-static struct lnet_lnd *
-lnet_find_lnd_by_type(__u32 type)
-{
-	struct lnet_lnd *lnd;
-	struct list_head *tmp;
-
-	/* holding lnd mutex */
-	list_for_each(tmp, &the_lnet.ln_lnds) {
-		lnd = list_entry(tmp, struct lnet_lnd, lnd_list);
-
-		if (lnd->lnd_type == type)
-			return lnd;
-	}
-
-	return NULL;
-}
-
-void
-lnet_register_lnd(struct lnet_lnd *lnd)
-{
-	mutex_lock(&the_lnet.ln_lnd_mutex);
-
-	LASSERT(libcfs_isknown_lnd(lnd->lnd_type));
-	LASSERT(!lnet_find_lnd_by_type(lnd->lnd_type));
-
-	list_add_tail(&lnd->lnd_list, &the_lnet.ln_lnds);
-	lnd->lnd_refcount = 0;
-
-	CDEBUG(D_NET, "%s LND registered\n", libcfs_lnd2str(lnd->lnd_type));
-
-	mutex_unlock(&the_lnet.ln_lnd_mutex);
-}
-EXPORT_SYMBOL(lnet_register_lnd);
-
-void
-lnet_unregister_lnd(struct lnet_lnd *lnd)
-{
-	mutex_lock(&the_lnet.ln_lnd_mutex);
-
-	LASSERT(lnet_find_lnd_by_type(lnd->lnd_type) == lnd);
-	LASSERT(!lnd->lnd_refcount);
-
-	list_del(&lnd->lnd_list);
-	CDEBUG(D_NET, "%s LND unregistered\n", libcfs_lnd2str(lnd->lnd_type));
-
-	mutex_unlock(&the_lnet.ln_lnd_mutex);
-}
-EXPORT_SYMBOL(lnet_unregister_lnd);
-
-void
-lnet_counters_get(struct lnet_counters *counters)
-{
-	struct lnet_counters *ctr;
-	int i;
-
-	memset(counters, 0, sizeof(*counters));
-
-	lnet_net_lock(LNET_LOCK_EX);
-
-	cfs_percpt_for_each(ctr, i, the_lnet.ln_counters) {
-		counters->msgs_max     += ctr->msgs_max;
-		counters->msgs_alloc   += ctr->msgs_alloc;
-		counters->errors       += ctr->errors;
-		counters->send_count   += ctr->send_count;
-		counters->recv_count   += ctr->recv_count;
-		counters->route_count  += ctr->route_count;
-		counters->drop_count   += ctr->drop_count;
-		counters->send_length  += ctr->send_length;
-		counters->recv_length  += ctr->recv_length;
-		counters->route_length += ctr->route_length;
-		counters->drop_length  += ctr->drop_length;
-	}
-	lnet_net_unlock(LNET_LOCK_EX);
-}
-EXPORT_SYMBOL(lnet_counters_get);
-
-void
-lnet_counters_reset(void)
-{
-	struct lnet_counters *counters;
-	int i;
-
-	lnet_net_lock(LNET_LOCK_EX);
-
-	cfs_percpt_for_each(counters, i, the_lnet.ln_counters)
-		memset(counters, 0, sizeof(struct lnet_counters));
-
-	lnet_net_unlock(LNET_LOCK_EX);
-}
-
-static char *
-lnet_res_type2str(int type)
-{
-	switch (type) {
-	default:
-		LBUG();
-	case LNET_COOKIE_TYPE_MD:
-		return "MD";
-	case LNET_COOKIE_TYPE_ME:
-		return "ME";
-	case LNET_COOKIE_TYPE_EQ:
-		return "EQ";
-	}
-}
-
-static void
-lnet_res_container_cleanup(struct lnet_res_container *rec)
-{
-	int count = 0;
-
-	if (!rec->rec_type) /* not set yet, it's uninitialized */
-		return;
-
-	while (!list_empty(&rec->rec_active)) {
-		struct list_head *e = rec->rec_active.next;
-
-		list_del_init(e);
-		if (rec->rec_type == LNET_COOKIE_TYPE_EQ) {
-			kfree(list_entry(e, struct lnet_eq, eq_list));
-
-		} else if (rec->rec_type == LNET_COOKIE_TYPE_MD) {
-			kfree(list_entry(e, struct lnet_libmd, md_list));
-
-		} else { /* NB: Active MEs should be attached on portals */
-			LBUG();
-		}
-		count++;
-	}
-
-	if (count > 0) {
-		/*
-		 * Found alive MD/ME/EQ, user really should unlink/free
-		 * all of them before finalize LNet, but if someone didn't,
-		 * we have to recycle garbage for him
-		 */
-		CERROR("%d active elements on exit of %s container\n",
-		       count, lnet_res_type2str(rec->rec_type));
-	}
-
-	kfree(rec->rec_lh_hash);
-	rec->rec_lh_hash = NULL;
-
-	rec->rec_type = 0; /* mark it as finalized */
-}
-
-static int
-lnet_res_container_setup(struct lnet_res_container *rec, int cpt, int type)
-{
-	int rc = 0;
-	int i;
-
-	LASSERT(!rec->rec_type);
-
-	rec->rec_type = type;
-	INIT_LIST_HEAD(&rec->rec_active);
-	rec->rec_lh_cookie = (cpt << LNET_COOKIE_TYPE_BITS) | type;
-
-	/* Arbitrary choice of hash table size */
-	rec->rec_lh_hash = kvmalloc_cpt(LNET_LH_HASH_SIZE * sizeof(rec->rec_lh_hash[0]),
-					GFP_KERNEL, cpt);
-	if (!rec->rec_lh_hash) {
-		rc = -ENOMEM;
-		goto out;
-	}
-
-	for (i = 0; i < LNET_LH_HASH_SIZE; i++)
-		INIT_LIST_HEAD(&rec->rec_lh_hash[i]);
-
-	return 0;
-
-out:
-	CERROR("Failed to setup %s resource container\n",
-	       lnet_res_type2str(type));
-	lnet_res_container_cleanup(rec);
-	return rc;
-}
-
-static void
-lnet_res_containers_destroy(struct lnet_res_container **recs)
-{
-	struct lnet_res_container *rec;
-	int i;
-
-	cfs_percpt_for_each(rec, i, recs)
-		lnet_res_container_cleanup(rec);
-
-	cfs_percpt_free(recs);
-}
-
-static struct lnet_res_container **
-lnet_res_containers_create(int type)
-{
-	struct lnet_res_container **recs;
-	struct lnet_res_container *rec;
-	int rc;
-	int i;
-
-	recs = cfs_percpt_alloc(lnet_cpt_table(), sizeof(*rec));
-	if (!recs) {
-		CERROR("Failed to allocate %s resource containers\n",
-		       lnet_res_type2str(type));
-		return NULL;
-	}
-
-	cfs_percpt_for_each(rec, i, recs) {
-		rc = lnet_res_container_setup(rec, i, type);
-		if (rc) {
-			lnet_res_containers_destroy(recs);
-			return NULL;
-		}
-	}
-
-	return recs;
-}
-
-struct lnet_libhandle *
-lnet_res_lh_lookup(struct lnet_res_container *rec, __u64 cookie)
-{
-	/* ALWAYS called with lnet_res_lock held */
-	struct list_head *head;
-	struct lnet_libhandle *lh;
-	unsigned int hash;
-
-	if ((cookie & LNET_COOKIE_MASK) != rec->rec_type)
-		return NULL;
-
-	hash = cookie >> (LNET_COOKIE_TYPE_BITS + LNET_CPT_BITS);
-	head = &rec->rec_lh_hash[hash & LNET_LH_HASH_MASK];
-
-	list_for_each_entry(lh, head, lh_hash_chain) {
-		if (lh->lh_cookie == cookie)
-			return lh;
-	}
-
-	return NULL;
-}
-
-void
-lnet_res_lh_initialize(struct lnet_res_container *rec,
-		       struct lnet_libhandle *lh)
-{
-	/* ALWAYS called with lnet_res_lock held */
-	unsigned int ibits = LNET_COOKIE_TYPE_BITS + LNET_CPT_BITS;
-	unsigned int hash;
-
-	lh->lh_cookie = rec->rec_lh_cookie;
-	rec->rec_lh_cookie += 1 << ibits;
-
-	hash = (lh->lh_cookie >> ibits) & LNET_LH_HASH_MASK;
-
-	list_add(&lh->lh_hash_chain, &rec->rec_lh_hash[hash]);
-}
-
-static int lnet_unprepare(void);
-
-static int
-lnet_prepare(lnet_pid_t requested_pid)
-{
-	/* Prepare to bring up the network */
-	struct lnet_res_container **recs;
-	int rc = 0;
-
-	if (requested_pid == LNET_PID_ANY) {
-		/* Don't instantiate LNET just for me */
-		return -ENETDOWN;
-	}
-
-	LASSERT(!the_lnet.ln_refcount);
-
-	the_lnet.ln_routing = 0;
-
-	LASSERT(!(requested_pid & LNET_PID_USERFLAG));
-	the_lnet.ln_pid = requested_pid;
-
-	INIT_LIST_HEAD(&the_lnet.ln_test_peers);
-	INIT_LIST_HEAD(&the_lnet.ln_nis);
-	INIT_LIST_HEAD(&the_lnet.ln_nis_cpt);
-	INIT_LIST_HEAD(&the_lnet.ln_nis_zombie);
-	INIT_LIST_HEAD(&the_lnet.ln_routers);
-	INIT_LIST_HEAD(&the_lnet.ln_drop_rules);
-	INIT_LIST_HEAD(&the_lnet.ln_delay_rules);
-
-	rc = lnet_create_remote_nets_table();
-	if (rc)
-		goto failed;
-	/*
-	 * NB the interface cookie in wire handles guards against delayed
-	 * replies and ACKs appearing valid after reboot.
-	 */
-	the_lnet.ln_interface_cookie = ktime_get_ns();
-
-	the_lnet.ln_counters = cfs_percpt_alloc(lnet_cpt_table(),
-						sizeof(struct lnet_counters));
-	if (!the_lnet.ln_counters) {
-		CERROR("Failed to allocate counters for LNet\n");
-		rc = -ENOMEM;
-		goto failed;
-	}
-
-	rc = lnet_peer_tables_create();
-	if (rc)
-		goto failed;
-
-	rc = lnet_msg_containers_create();
-	if (rc)
-		goto failed;
-
-	rc = lnet_res_container_setup(&the_lnet.ln_eq_container, 0,
-				      LNET_COOKIE_TYPE_EQ);
-	if (rc)
-		goto failed;
-
-	recs = lnet_res_containers_create(LNET_COOKIE_TYPE_ME);
-	if (!recs) {
-		rc = -ENOMEM;
-		goto failed;
-	}
-
-	the_lnet.ln_me_containers = recs;
-
-	recs = lnet_res_containers_create(LNET_COOKIE_TYPE_MD);
-	if (!recs) {
-		rc = -ENOMEM;
-		goto failed;
-	}
-
-	the_lnet.ln_md_containers = recs;
-
-	rc = lnet_portals_create();
-	if (rc) {
-		CERROR("Failed to create portals for LNet: %d\n", rc);
-		goto failed;
-	}
-
-	return 0;
-
- failed:
-	lnet_unprepare();
-	return rc;
-}
-
-static int
-lnet_unprepare(void)
-{
-	/*
-	 * NB no LNET_LOCK since this is the last reference.  All LND instances
-	 * have shut down already, so it is safe to unlink and free all
-	 * descriptors, even those that appear committed to a network op (eg MD
-	 * with non-zero pending count)
-	 */
-	lnet_fail_nid(LNET_NID_ANY, 0);
-
-	LASSERT(!the_lnet.ln_refcount);
-	LASSERT(list_empty(&the_lnet.ln_test_peers));
-	LASSERT(list_empty(&the_lnet.ln_nis));
-	LASSERT(list_empty(&the_lnet.ln_nis_cpt));
-	LASSERT(list_empty(&the_lnet.ln_nis_zombie));
-
-	lnet_portals_destroy();
-
-	if (the_lnet.ln_md_containers) {
-		lnet_res_containers_destroy(the_lnet.ln_md_containers);
-		the_lnet.ln_md_containers = NULL;
-	}
-
-	if (the_lnet.ln_me_containers) {
-		lnet_res_containers_destroy(the_lnet.ln_me_containers);
-		the_lnet.ln_me_containers = NULL;
-	}
-
-	lnet_res_container_cleanup(&the_lnet.ln_eq_container);
-
-	lnet_msg_containers_destroy();
-	lnet_peer_tables_destroy();
-	lnet_rtrpools_free(0);
-
-	if (the_lnet.ln_counters) {
-		cfs_percpt_free(the_lnet.ln_counters);
-		the_lnet.ln_counters = NULL;
-	}
-	lnet_destroy_remote_nets_table();
-
-	return 0;
-}
-
-struct lnet_ni  *
-lnet_net2ni_locked(__u32 net, int cpt)
-{
-	struct list_head *tmp;
-	struct lnet_ni *ni;
-
-	LASSERT(cpt != LNET_LOCK_EX);
-
-	list_for_each(tmp, &the_lnet.ln_nis) {
-		ni = list_entry(tmp, struct lnet_ni, ni_list);
-
-		if (LNET_NIDNET(ni->ni_nid) == net) {
-			lnet_ni_addref_locked(ni, cpt);
-			return ni;
-		}
-	}
-
-	return NULL;
-}
-
-struct lnet_ni *
-lnet_net2ni(__u32 net)
-{
-	struct lnet_ni *ni;
-
-	lnet_net_lock(0);
-	ni = lnet_net2ni_locked(net, 0);
-	lnet_net_unlock(0);
-
-	return ni;
-}
-EXPORT_SYMBOL(lnet_net2ni);
-
-static unsigned int
-lnet_nid_cpt_hash(lnet_nid_t nid, unsigned int number)
-{
-	__u64 key = nid;
-	unsigned int val;
-
-	LASSERT(number >= 1 && number <= LNET_CPT_NUMBER);
-
-	if (number == 1)
-		return 0;
-
-	val = hash_long(key, LNET_CPT_BITS);
-	/* NB: LNET_CP_NUMBER doesn't have to be PO2 */
-	if (val < number)
-		return val;
-
-	return (unsigned int)(key + val + (val >> 1)) % number;
-}
-
-int
-lnet_cpt_of_nid_locked(lnet_nid_t nid)
-{
-	struct lnet_ni *ni;
-
-	/* must called with hold of lnet_net_lock */
-	if (LNET_CPT_NUMBER == 1)
-		return 0; /* the only one */
-
-	/* take lnet_net_lock(any) would be OK */
-	if (!list_empty(&the_lnet.ln_nis_cpt)) {
-		list_for_each_entry(ni, &the_lnet.ln_nis_cpt, ni_cptlist) {
-			if (LNET_NIDNET(ni->ni_nid) != LNET_NIDNET(nid))
-				continue;
-
-			LASSERT(ni->ni_cpts);
-			return ni->ni_cpts[lnet_nid_cpt_hash
-					   (nid, ni->ni_ncpts)];
-		}
-	}
-
-	return lnet_nid_cpt_hash(nid, LNET_CPT_NUMBER);
-}
-
-int
-lnet_cpt_of_nid(lnet_nid_t nid)
-{
-	int cpt;
-	int cpt2;
-
-	if (LNET_CPT_NUMBER == 1)
-		return 0; /* the only one */
-
-	if (list_empty(&the_lnet.ln_nis_cpt))
-		return lnet_nid_cpt_hash(nid, LNET_CPT_NUMBER);
-
-	cpt = lnet_net_lock_current();
-	cpt2 = lnet_cpt_of_nid_locked(nid);
-	lnet_net_unlock(cpt);
-
-	return cpt2;
-}
-EXPORT_SYMBOL(lnet_cpt_of_nid);
-
-int
-lnet_islocalnet(__u32 net)
-{
-	struct lnet_ni *ni;
-	int cpt;
-
-	cpt = lnet_net_lock_current();
-
-	ni = lnet_net2ni_locked(net, cpt);
-	if (ni)
-		lnet_ni_decref_locked(ni, cpt);
-
-	lnet_net_unlock(cpt);
-
-	return !!ni;
-}
-
-struct lnet_ni  *
-lnet_nid2ni_locked(lnet_nid_t nid, int cpt)
-{
-	struct lnet_ni *ni;
-	struct list_head *tmp;
-
-	LASSERT(cpt != LNET_LOCK_EX);
-
-	list_for_each(tmp, &the_lnet.ln_nis) {
-		ni = list_entry(tmp, struct lnet_ni, ni_list);
-
-		if (ni->ni_nid == nid) {
-			lnet_ni_addref_locked(ni, cpt);
-			return ni;
-		}
-	}
-
-	return NULL;
-}
-
-int
-lnet_islocalnid(lnet_nid_t nid)
-{
-	struct lnet_ni *ni;
-	int cpt;
-
-	cpt = lnet_net_lock_current();
-	ni = lnet_nid2ni_locked(nid, cpt);
-	if (ni)
-		lnet_ni_decref_locked(ni, cpt);
-	lnet_net_unlock(cpt);
-
-	return !!ni;
-}
-
-int
-lnet_count_acceptor_nis(void)
-{
-	/* Return the # of NIs that need the acceptor. */
-	int count = 0;
-	struct list_head *tmp;
-	struct lnet_ni *ni;
-	int cpt;
-
-	cpt = lnet_net_lock_current();
-	list_for_each(tmp, &the_lnet.ln_nis) {
-		ni = list_entry(tmp, struct lnet_ni, ni_list);
-
-		if (ni->ni_lnd->lnd_accept)
-			count++;
-	}
-
-	lnet_net_unlock(cpt);
-
-	return count;
-}
-
-static struct lnet_ping_info *
-lnet_ping_info_create(int num_ni)
-{
-	struct lnet_ping_info *ping_info;
-	unsigned int infosz;
-
-	infosz = offsetof(struct lnet_ping_info, pi_ni[num_ni]);
-	ping_info = kvzalloc(infosz, GFP_KERNEL);
-	if (!ping_info) {
-		CERROR("Can't allocate ping info[%d]\n", num_ni);
-		return NULL;
-	}
-
-	ping_info->pi_nnis = num_ni;
-	ping_info->pi_pid = the_lnet.ln_pid;
-	ping_info->pi_magic = LNET_PROTO_PING_MAGIC;
-	ping_info->pi_features = LNET_PING_FEAT_NI_STATUS;
-
-	return ping_info;
-}
-
-static inline int
-lnet_get_ni_count(void)
-{
-	struct lnet_ni *ni;
-	int count = 0;
-
-	lnet_net_lock(0);
-
-	list_for_each_entry(ni, &the_lnet.ln_nis, ni_list)
-		count++;
-
-	lnet_net_unlock(0);
-
-	return count;
-}
-
-static inline void
-lnet_ping_info_free(struct lnet_ping_info *pinfo)
-{
-	kvfree(pinfo);
-}
-
-static void
-lnet_ping_info_destroy(void)
-{
-	struct lnet_ni *ni;
-
-	lnet_net_lock(LNET_LOCK_EX);
-
-	list_for_each_entry(ni, &the_lnet.ln_nis, ni_list) {
-		lnet_ni_lock(ni);
-		ni->ni_status = NULL;
-		lnet_ni_unlock(ni);
-	}
-
-	lnet_ping_info_free(the_lnet.ln_ping_info);
-	the_lnet.ln_ping_info = NULL;
-
-	lnet_net_unlock(LNET_LOCK_EX);
-}
-
-static void
-lnet_ping_event_handler(struct lnet_event *event)
-{
-	struct lnet_ping_info *pinfo = event->md.user_ptr;
-
-	if (event->unlinked)
-		pinfo->pi_features = LNET_PING_FEAT_INVAL;
-}
-
-static int
-lnet_ping_info_setup(struct lnet_ping_info **ppinfo,
-		     struct lnet_handle_md *md_handle,
-		     int ni_count, bool set_eq)
-{
-	struct lnet_process_id id = {LNET_NID_ANY, LNET_PID_ANY};
-	struct lnet_handle_me me_handle;
-	struct lnet_md md = { NULL };
-	int rc, rc2;
-
-	if (set_eq) {
-		rc = LNetEQAlloc(0, lnet_ping_event_handler,
-				 &the_lnet.ln_ping_target_eq);
-		if (rc) {
-			CERROR("Can't allocate ping EQ: %d\n", rc);
-			return rc;
-		}
-	}
-
-	*ppinfo = lnet_ping_info_create(ni_count);
-	if (!*ppinfo) {
-		rc = -ENOMEM;
-		goto failed_0;
-	}
-
-	rc = LNetMEAttach(LNET_RESERVED_PORTAL, id,
-			  LNET_PROTO_PING_MATCHBITS, 0,
-			  LNET_UNLINK, LNET_INS_AFTER,
-			  &me_handle);
-	if (rc) {
-		CERROR("Can't create ping ME: %d\n", rc);
-		goto failed_1;
-	}
-
-	/* initialize md content */
-	md.start = *ppinfo;
-	md.length = offsetof(struct lnet_ping_info,
-			     pi_ni[(*ppinfo)->pi_nnis]);
-	md.threshold = LNET_MD_THRESH_INF;
-	md.max_size = 0;
-	md.options = LNET_MD_OP_GET | LNET_MD_TRUNCATE |
-		     LNET_MD_MANAGE_REMOTE;
-	md.user_ptr  = NULL;
-	md.eq_handle = the_lnet.ln_ping_target_eq;
-	md.user_ptr = *ppinfo;
-
-	rc = LNetMDAttach(me_handle, md, LNET_RETAIN, md_handle);
-	if (rc) {
-		CERROR("Can't attach ping MD: %d\n", rc);
-		goto failed_2;
-	}
-
-	return 0;
-
-failed_2:
-	rc2 = LNetMEUnlink(me_handle);
-	LASSERT(!rc2);
-failed_1:
-	lnet_ping_info_free(*ppinfo);
-	*ppinfo = NULL;
-failed_0:
-	if (set_eq)
-		LNetEQFree(the_lnet.ln_ping_target_eq);
-	return rc;
-}
-
-static void
-lnet_ping_md_unlink(struct lnet_ping_info *pinfo,
-		    struct lnet_handle_md *md_handle)
-{
-	LNetMDUnlink(*md_handle);
-	LNetInvalidateMDHandle(md_handle);
-
-	/* NB md could be busy; this just starts the unlink */
-	while (pinfo->pi_features != LNET_PING_FEAT_INVAL) {
-		CDEBUG(D_NET, "Still waiting for ping MD to unlink\n");
-		set_current_state(TASK_NOLOAD);
-		schedule_timeout(HZ);
-	}
-}
-
-static void
-lnet_ping_info_install_locked(struct lnet_ping_info *ping_info)
-{
-	struct lnet_ni_status *ns;
-	struct lnet_ni *ni;
-	int i = 0;
-
-	list_for_each_entry(ni, &the_lnet.ln_nis, ni_list) {
-		LASSERT(i < ping_info->pi_nnis);
-
-		ns = &ping_info->pi_ni[i];
-
-		ns->ns_nid = ni->ni_nid;
-
-		lnet_ni_lock(ni);
-		ns->ns_status = (ni->ni_status) ?
-				 ni->ni_status->ns_status : LNET_NI_STATUS_UP;
-		ni->ni_status = ns;
-		lnet_ni_unlock(ni);
-
-		i++;
-	}
-}
-
-static void
-lnet_ping_target_update(struct lnet_ping_info *pinfo,
-			struct lnet_handle_md md_handle)
-{
-	struct lnet_ping_info *old_pinfo = NULL;
-	struct lnet_handle_md old_md;
-
-	/* switch the NIs to point to the new ping info created */
-	lnet_net_lock(LNET_LOCK_EX);
-
-	if (!the_lnet.ln_routing)
-		pinfo->pi_features |= LNET_PING_FEAT_RTE_DISABLED;
-	lnet_ping_info_install_locked(pinfo);
-
-	if (the_lnet.ln_ping_info) {
-		old_pinfo = the_lnet.ln_ping_info;
-		old_md = the_lnet.ln_ping_target_md;
-	}
-	the_lnet.ln_ping_target_md = md_handle;
-	the_lnet.ln_ping_info = pinfo;
-
-	lnet_net_unlock(LNET_LOCK_EX);
-
-	if (old_pinfo) {
-		/* unlink the old ping info */
-		lnet_ping_md_unlink(old_pinfo, &old_md);
-		lnet_ping_info_free(old_pinfo);
-	}
-}
-
-static void
-lnet_ping_target_fini(void)
-{
-	int rc;
-
-	lnet_ping_md_unlink(the_lnet.ln_ping_info,
-			    &the_lnet.ln_ping_target_md);
-
-	rc = LNetEQFree(the_lnet.ln_ping_target_eq);
-	LASSERT(!rc);
-
-	lnet_ping_info_destroy();
-}
-
-static int
-lnet_ni_tq_credits(struct lnet_ni *ni)
-{
-	int credits;
-
-	LASSERT(ni->ni_ncpts >= 1);
-
-	if (ni->ni_ncpts == 1)
-		return ni->ni_maxtxcredits;
-
-	credits = ni->ni_maxtxcredits / ni->ni_ncpts;
-	credits = max(credits, 8 * ni->ni_peertxcredits);
-	credits = min(credits, ni->ni_maxtxcredits);
-
-	return credits;
-}
-
-static void
-lnet_ni_unlink_locked(struct lnet_ni *ni)
-{
-	if (!list_empty(&ni->ni_cptlist)) {
-		list_del_init(&ni->ni_cptlist);
-		lnet_ni_decref_locked(ni, 0);
-	}
-
-	/* move it to zombie list and nobody can find it anymore */
-	LASSERT(!list_empty(&ni->ni_list));
-	list_move(&ni->ni_list, &the_lnet.ln_nis_zombie);
-	lnet_ni_decref_locked(ni, 0);	/* drop ln_nis' ref */
-}
-
-static void
-lnet_clear_zombies_nis_locked(void)
-{
-	int i;
-	int islo;
-	struct lnet_ni *ni;
-	struct lnet_ni *temp;
-
-	/*
-	 * Now wait for the NI's I just nuked to show up on ln_zombie_nis
-	 * and shut them down in guaranteed thread context
-	 */
-	i = 2;
-	list_for_each_entry_safe(ni, temp, &the_lnet.ln_nis_zombie, ni_list) {
-		int *ref;
-		int j;
-
-		list_del_init(&ni->ni_list);
-		cfs_percpt_for_each(ref, j, ni->ni_refs) {
-			if (!*ref)
-				continue;
-			/* still busy, add it back to zombie list */
-			list_add(&ni->ni_list, &the_lnet.ln_nis_zombie);
-			break;
-		}
-
-		if (!list_empty(&ni->ni_list)) {
-			lnet_net_unlock(LNET_LOCK_EX);
-			++i;
-			if ((i & (-i)) == i) {
-				CDEBUG(D_WARNING, "Waiting for zombie LNI %s\n",
-				       libcfs_nid2str(ni->ni_nid));
-			}
-			set_current_state(TASK_UNINTERRUPTIBLE);
-			schedule_timeout(HZ);
-			lnet_net_lock(LNET_LOCK_EX);
-			continue;
-		}
-
-		ni->ni_lnd->lnd_refcount--;
-		lnet_net_unlock(LNET_LOCK_EX);
-
-		islo = ni->ni_lnd->lnd_type == LOLND;
-
-		LASSERT(!in_interrupt());
-		ni->ni_lnd->lnd_shutdown(ni);
-
-		/*
-		 * can't deref lnd anymore now; it might have unregistered
-		 * itself...
-		 */
-		if (!islo)
-			CDEBUG(D_LNI, "Removed LNI %s\n",
-			       libcfs_nid2str(ni->ni_nid));
-
-		lnet_ni_free(ni);
-		i = 2;
-
-		lnet_net_lock(LNET_LOCK_EX);
-	}
-}
-
-static void
-lnet_shutdown_lndnis(void)
-{
-	struct lnet_ni *ni;
-	struct lnet_ni *temp;
-	int i;
-
-	/* NB called holding the global mutex */
-
-	/* All quiet on the API front */
-	LASSERT(!the_lnet.ln_shutdown);
-	LASSERT(!the_lnet.ln_refcount);
-	LASSERT(list_empty(&the_lnet.ln_nis_zombie));
-
-	lnet_net_lock(LNET_LOCK_EX);
-	the_lnet.ln_shutdown = 1;	/* flag shutdown */
-
-	/* Unlink NIs from the global table */
-	list_for_each_entry_safe(ni, temp, &the_lnet.ln_nis, ni_list) {
-		lnet_ni_unlink_locked(ni);
-	}
-
-	/* Drop the cached loopback NI. */
-	if (the_lnet.ln_loni) {
-		lnet_ni_decref_locked(the_lnet.ln_loni, 0);
-		the_lnet.ln_loni = NULL;
-	}
-
-	lnet_net_unlock(LNET_LOCK_EX);
-
-	/*
-	 * Clear lazy portals and drop delayed messages which hold refs
-	 * on their lnet_msg::msg_rxpeer
-	 */
-	for (i = 0; i < the_lnet.ln_nportals; i++)
-		LNetClearLazyPortal(i);
-
-	/*
-	 * Clear the peer table and wait for all peers to go (they hold refs on
-	 * their NIs)
-	 */
-	lnet_peer_tables_cleanup(NULL);
-
-	lnet_net_lock(LNET_LOCK_EX);
-
-	lnet_clear_zombies_nis_locked();
-	the_lnet.ln_shutdown = 0;
-	lnet_net_unlock(LNET_LOCK_EX);
-}
-
-/* shutdown down the NI and release refcount */
-static void
-lnet_shutdown_lndni(struct lnet_ni *ni)
-{
-	int i;
-
-	lnet_net_lock(LNET_LOCK_EX);
-	lnet_ni_unlink_locked(ni);
-	lnet_net_unlock(LNET_LOCK_EX);
-
-	/* clear messages for this NI on the lazy portal */
-	for (i = 0; i < the_lnet.ln_nportals; i++)
-		lnet_clear_lazy_portal(ni, i, "Shutting down NI");
-
-	/* Do peer table cleanup for this ni */
-	lnet_peer_tables_cleanup(ni);
-
-	lnet_net_lock(LNET_LOCK_EX);
-	lnet_clear_zombies_nis_locked();
-	lnet_net_unlock(LNET_LOCK_EX);
-}
-
-static int
-lnet_startup_lndni(struct lnet_ni *ni, struct lnet_ioctl_config_data *conf)
-{
-	struct lnet_ioctl_config_lnd_tunables *lnd_tunables = NULL;
-	int rc = -EINVAL;
-	int lnd_type;
-	struct lnet_lnd *lnd;
-	struct lnet_tx_queue *tq;
-	int i;
-	u32 seed;
-
-	lnd_type = LNET_NETTYP(LNET_NIDNET(ni->ni_nid));
-
-	LASSERT(libcfs_isknown_lnd(lnd_type));
-
-	if (lnd_type == CIBLND || lnd_type == OPENIBLND ||
-	    lnd_type == IIBLND || lnd_type == VIBLND) {
-		CERROR("LND %s obsoleted\n", libcfs_lnd2str(lnd_type));
-		goto failed0;
-	}
-
-	/* Make sure this new NI is unique. */
-	lnet_net_lock(LNET_LOCK_EX);
-	rc = lnet_net_unique(LNET_NIDNET(ni->ni_nid), &the_lnet.ln_nis);
-	lnet_net_unlock(LNET_LOCK_EX);
-	if (!rc) {
-		if (lnd_type == LOLND) {
-			lnet_ni_free(ni);
-			return 0;
-		}
-
-		CERROR("Net %s is not unique\n",
-		       libcfs_net2str(LNET_NIDNET(ni->ni_nid)));
-		rc = -EEXIST;
-		goto failed0;
-	}
-
-	mutex_lock(&the_lnet.ln_lnd_mutex);
-	lnd = lnet_find_lnd_by_type(lnd_type);
-
-	if (!lnd) {
-		mutex_unlock(&the_lnet.ln_lnd_mutex);
-		rc = request_module("%s", libcfs_lnd2modname(lnd_type));
-		mutex_lock(&the_lnet.ln_lnd_mutex);
-
-		lnd = lnet_find_lnd_by_type(lnd_type);
-		if (!lnd) {
-			mutex_unlock(&the_lnet.ln_lnd_mutex);
-			CERROR("Can't load LND %s, module %s, rc=%d\n",
-			       libcfs_lnd2str(lnd_type),
-			       libcfs_lnd2modname(lnd_type), rc);
-			rc = -EINVAL;
-			goto failed0;
-		}
-	}
-
-	lnet_net_lock(LNET_LOCK_EX);
-	lnd->lnd_refcount++;
-	lnet_net_unlock(LNET_LOCK_EX);
-
-	ni->ni_lnd = lnd;
-
-	if (conf && conf->cfg_hdr.ioc_len > sizeof(*conf))
-		lnd_tunables = (struct lnet_ioctl_config_lnd_tunables *)conf->cfg_bulk;
-
-	if (lnd_tunables) {
-		ni->ni_lnd_tunables = kzalloc(sizeof(*ni->ni_lnd_tunables),
-					      GFP_NOFS);
-		if (!ni->ni_lnd_tunables) {
-			mutex_unlock(&the_lnet.ln_lnd_mutex);
-			rc = -ENOMEM;
-			goto failed0;
-		}
-		memcpy(ni->ni_lnd_tunables, lnd_tunables,
-		       sizeof(*ni->ni_lnd_tunables));
-	}
-
-	/*
-	 * If given some LND tunable parameters, parse those now to
-	 * override the values in the NI structure.
-	 */
-	if (conf) {
-		if (conf->cfg_config_u.cfg_net.net_peer_rtr_credits >= 0)
-			ni->ni_peerrtrcredits =
-				conf->cfg_config_u.cfg_net.net_peer_rtr_credits;
-		if (conf->cfg_config_u.cfg_net.net_peer_timeout >= 0)
-			ni->ni_peertimeout =
-				conf->cfg_config_u.cfg_net.net_peer_timeout;
-		if (conf->cfg_config_u.cfg_net.net_peer_tx_credits != -1)
-			ni->ni_peertxcredits =
-				conf->cfg_config_u.cfg_net.net_peer_tx_credits;
-		if (conf->cfg_config_u.cfg_net.net_max_tx_credits >= 0)
-			ni->ni_maxtxcredits =
-				conf->cfg_config_u.cfg_net.net_max_tx_credits;
-	}
-
-	rc = lnd->lnd_startup(ni);
-
-	mutex_unlock(&the_lnet.ln_lnd_mutex);
-
-	if (rc) {
-		LCONSOLE_ERROR_MSG(0x105, "Error %d starting up LNI %s\n",
-				   rc, libcfs_lnd2str(lnd->lnd_type));
-		lnet_net_lock(LNET_LOCK_EX);
-		lnd->lnd_refcount--;
-		lnet_net_unlock(LNET_LOCK_EX);
-		goto failed0;
-	}
-
-	LASSERT(ni->ni_peertimeout <= 0 || lnd->lnd_query);
-
-	lnet_net_lock(LNET_LOCK_EX);
-	/* refcount for ln_nis */
-	lnet_ni_addref_locked(ni, 0);
-	list_add_tail(&ni->ni_list, &the_lnet.ln_nis);
-	if (ni->ni_cpts) {
-		lnet_ni_addref_locked(ni, 0);
-		list_add_tail(&ni->ni_cptlist, &the_lnet.ln_nis_cpt);
-	}
-
-	lnet_net_unlock(LNET_LOCK_EX);
-
-	if (lnd->lnd_type == LOLND) {
-		lnet_ni_addref(ni);
-		LASSERT(!the_lnet.ln_loni);
-		the_lnet.ln_loni = ni;
-		return 0;
-	}
-
-	if (!ni->ni_peertxcredits || !ni->ni_maxtxcredits) {
-		LCONSOLE_ERROR_MSG(0x107, "LNI %s has no %scredits\n",
-				   libcfs_lnd2str(lnd->lnd_type),
-				   !ni->ni_peertxcredits ?
-				   "" : "per-peer ");
-		/*
-		 * shutdown the NI since if we get here then it must've already
-		 * been started
-		 */
-		lnet_shutdown_lndni(ni);
-		return -EINVAL;
-	}
-
-	cfs_percpt_for_each(tq, i, ni->ni_tx_queues) {
-		tq->tq_credits_min =
-		tq->tq_credits_max =
-		tq->tq_credits = lnet_ni_tq_credits(ni);
-	}
-
-	/* Nodes with small feet have little entropy. The NID for this
-	 * node gives the most entropy in the low bits.
-	 */
-	seed = LNET_NIDADDR(ni->ni_nid);
-	add_device_randomness(&seed, sizeof(seed));
-
-	CDEBUG(D_LNI, "Added LNI %s [%d/%d/%d/%d]\n",
-	       libcfs_nid2str(ni->ni_nid), ni->ni_peertxcredits,
-	       lnet_ni_tq_credits(ni) * LNET_CPT_NUMBER,
-	       ni->ni_peerrtrcredits, ni->ni_peertimeout);
-
-	return 0;
-failed0:
-	lnet_ni_free(ni);
-	return rc;
-}
-
-static int
-lnet_startup_lndnis(struct list_head *nilist)
-{
-	struct lnet_ni *ni;
-	int rc;
-	int ni_count = 0;
-
-	while (!list_empty(nilist)) {
-		ni = list_entry(nilist->next, struct lnet_ni, ni_list);
-		list_del(&ni->ni_list);
-		rc = lnet_startup_lndni(ni, NULL);
-
-		if (rc < 0)
-			goto failed;
-
-		ni_count++;
-	}
-
-	return ni_count;
-failed:
-	lnet_shutdown_lndnis();
-
-	return rc;
-}
-
-/**
- * Initialize LNet library.
- *
- * Automatically called at module loading time. Caller has to call
- * lnet_lib_exit() after a call to lnet_lib_init(), if and only if the
- * latter returned 0. It must be called exactly once.
- *
- * \retval 0 on success
- * \retval -ve on failures.
- */
-int lnet_lib_init(void)
-{
-	int rc;
-
-	lnet_assert_wire_constants();
-
-	memset(&the_lnet, 0, sizeof(the_lnet));
-
-	/* refer to global cfs_cpt_tab for now */
-	the_lnet.ln_cpt_table	= cfs_cpt_tab;
-	the_lnet.ln_cpt_number	= cfs_cpt_number(cfs_cpt_tab);
-
-	LASSERT(the_lnet.ln_cpt_number > 0);
-	if (the_lnet.ln_cpt_number > LNET_CPT_MAX) {
-		/* we are under risk of consuming all lh_cookie */
-		CERROR("Can't have %d CPTs for LNet (max allowed is %d), please change setting of CPT-table and retry\n",
-		       the_lnet.ln_cpt_number, LNET_CPT_MAX);
-		return -E2BIG;
-	}
-
-	while ((1 << the_lnet.ln_cpt_bits) < the_lnet.ln_cpt_number)
-		the_lnet.ln_cpt_bits++;
-
-	rc = lnet_create_locks();
-	if (rc) {
-		CERROR("Can't create LNet global locks: %d\n", rc);
-		return rc;
-	}
-
-	the_lnet.ln_refcount = 0;
-	LNetInvalidateEQHandle(&the_lnet.ln_rc_eqh);
-	INIT_LIST_HEAD(&the_lnet.ln_lnds);
-	INIT_LIST_HEAD(&the_lnet.ln_rcd_zombie);
-	INIT_LIST_HEAD(&the_lnet.ln_rcd_deathrow);
-
-	/*
-	 * The hash table size is the number of bits it takes to express the set
-	 * ln_num_routes, minus 1 (better to under estimate than over so we
-	 * don't waste memory).
-	 */
-	if (rnet_htable_size <= 0)
-		rnet_htable_size = LNET_REMOTE_NETS_HASH_DEFAULT;
-	else if (rnet_htable_size > LNET_REMOTE_NETS_HASH_MAX)
-		rnet_htable_size = LNET_REMOTE_NETS_HASH_MAX;
-	the_lnet.ln_remote_nets_hbits = max_t(int, 1,
-					   order_base_2(rnet_htable_size) - 1);
-
-	/*
-	 * All LNDs apart from the LOLND are in separate modules.  They
-	 * register themselves when their module loads, and unregister
-	 * themselves when their module is unloaded.
-	 */
-	lnet_register_lnd(&the_lolnd);
-	return 0;
-}
-
-/**
- * Finalize LNet library.
- *
- * \pre lnet_lib_init() called with success.
- * \pre All LNet users called LNetNIFini() for matching LNetNIInit() calls.
- */
-void lnet_lib_exit(void)
-{
-	LASSERT(!the_lnet.ln_refcount);
-
-	while (!list_empty(&the_lnet.ln_lnds))
-		lnet_unregister_lnd(list_entry(the_lnet.ln_lnds.next,
-					       struct lnet_lnd, lnd_list));
-	lnet_destroy_locks();
-}
-
-/**
- * Set LNet PID and start LNet interfaces, routing, and forwarding.
- *
- * Users must call this function at least once before any other functions.
- * For each successful call there must be a corresponding call to
- * LNetNIFini(). For subsequent calls to LNetNIInit(), \a requested_pid is
- * ignored.
- *
- * The PID used by LNet may be different from the one requested.
- * See LNetGetId().
- *
- * \param requested_pid PID requested by the caller.
- *
- * \return >= 0 on success, and < 0 error code on failures.
- */
-int
-LNetNIInit(lnet_pid_t requested_pid)
-{
-	int im_a_router = 0;
-	int rc;
-	int ni_count;
-	struct lnet_ping_info *pinfo;
-	struct lnet_handle_md md_handle;
-	struct list_head net_head;
-
-	INIT_LIST_HEAD(&net_head);
-
-	mutex_lock(&the_lnet.ln_api_mutex);
-
-	CDEBUG(D_OTHER, "refs %d\n", the_lnet.ln_refcount);
-
-	if (the_lnet.ln_refcount > 0) {
-		rc = the_lnet.ln_refcount++;
-		mutex_unlock(&the_lnet.ln_api_mutex);
-		return rc;
-	}
-
-	rc = lnet_prepare(requested_pid);
-	if (rc) {
-		mutex_unlock(&the_lnet.ln_api_mutex);
-		return rc;
-	}
-
-	/* Add in the loopback network */
-	if (!lnet_ni_alloc(LNET_MKNET(LOLND, 0), NULL, &net_head)) {
-		rc = -ENOMEM;
-		goto err_empty_list;
-	}
-
-	/*
-	 * If LNet is being initialized via DLC it is possible
-	 * that the user requests not to load module parameters (ones which
-	 * are supported by DLC) on initialization.  Therefore, make sure not
-	 * to load networks, routes and forwarding from module parameters
-	 * in this case. On cleanup in case of failure only clean up
-	 * routes if it has been loaded
-	 */
-	if (!the_lnet.ln_nis_from_mod_params) {
-		rc = lnet_parse_networks(&net_head, lnet_get_networks());
-		if (rc < 0)
-			goto err_empty_list;
-	}
-
-	ni_count = lnet_startup_lndnis(&net_head);
-	if (ni_count < 0) {
-		rc = ni_count;
-		goto err_empty_list;
-	}
-
-	if (!the_lnet.ln_nis_from_mod_params) {
-		rc = lnet_parse_routes(lnet_get_routes(), &im_a_router);
-		if (rc)
-			goto err_shutdown_lndnis;
-
-		rc = lnet_check_routes();
-		if (rc)
-			goto err_destroy_routes;
-
-		rc = lnet_rtrpools_alloc(im_a_router);
-		if (rc)
-			goto err_destroy_routes;
-	}
-
-	rc = lnet_acceptor_start();
-	if (rc)
-		goto err_destroy_routes;
-
-	the_lnet.ln_refcount = 1;
-	/* Now I may use my own API functions... */
-
-	rc = lnet_ping_info_setup(&pinfo, &md_handle, ni_count, true);
-	if (rc)
-		goto err_acceptor_stop;
-
-	lnet_ping_target_update(pinfo, md_handle);
-
-	rc = lnet_router_checker_start();
-	if (rc)
-		goto err_stop_ping;
-
-	lnet_fault_init();
-	lnet_router_debugfs_init();
-
-	mutex_unlock(&the_lnet.ln_api_mutex);
-
-	return 0;
-
-err_stop_ping:
-	lnet_ping_target_fini();
-err_acceptor_stop:
-	the_lnet.ln_refcount = 0;
-	lnet_acceptor_stop();
-err_destroy_routes:
-	if (!the_lnet.ln_nis_from_mod_params)
-		lnet_destroy_routes();
-err_shutdown_lndnis:
-	lnet_shutdown_lndnis();
-err_empty_list:
-	lnet_unprepare();
-	LASSERT(rc < 0);
-	mutex_unlock(&the_lnet.ln_api_mutex);
-	while (!list_empty(&net_head)) {
-		struct lnet_ni *ni;
-
-		ni = list_entry(net_head.next, struct lnet_ni, ni_list);
-		list_del_init(&ni->ni_list);
-		lnet_ni_free(ni);
-	}
-	return rc;
-}
-EXPORT_SYMBOL(LNetNIInit);
-
-/**
- * Stop LNet interfaces, routing, and forwarding.
- *
- * Users must call this function once for each successful call to LNetNIInit().
- * Once the LNetNIFini() operation has been started, the results of pending
- * API operations are undefined.
- *
- * \return always 0 for current implementation.
- */
-int
-LNetNIFini(void)
-{
-	mutex_lock(&the_lnet.ln_api_mutex);
-
-	LASSERT(the_lnet.ln_refcount > 0);
-
-	if (the_lnet.ln_refcount != 1) {
-		the_lnet.ln_refcount--;
-	} else {
-		LASSERT(!the_lnet.ln_niinit_self);
-
-		lnet_fault_fini();
-		lnet_router_debugfs_fini();
-		lnet_router_checker_stop();
-		lnet_ping_target_fini();
-
-		/* Teardown fns that use my own API functions BEFORE here */
-		the_lnet.ln_refcount = 0;
-
-		lnet_acceptor_stop();
-		lnet_destroy_routes();
-		lnet_shutdown_lndnis();
-		lnet_unprepare();
-	}
-
-	mutex_unlock(&the_lnet.ln_api_mutex);
-	return 0;
-}
-EXPORT_SYMBOL(LNetNIFini);
-
-/**
- * Grabs the ni data from the ni structure and fills the out
- * parameters
- *
- * \param[in] ni network       interface structure
- * \param[out] config	       NI configuration
- */
-static void
-lnet_fill_ni_info(struct lnet_ni *ni, struct lnet_ioctl_config_data *config)
-{
-	struct lnet_ioctl_config_lnd_tunables *lnd_cfg = NULL;
-	struct lnet_ioctl_net_config *net_config;
-	size_t min_size, tunable_size = 0;
-	int i;
-
-	if (!ni || !config)
-		return;
-
-	net_config = (struct lnet_ioctl_net_config *)config->cfg_bulk;
-	if (!net_config)
-		return;
-
-	BUILD_BUG_ON(ARRAY_SIZE(ni->ni_interfaces) !=
-		     ARRAY_SIZE(net_config->ni_interfaces));
-
-	for (i = 0; i < ARRAY_SIZE(ni->ni_interfaces); i++) {
-		if (!ni->ni_interfaces[i])
-			break;
-
-		strncpy(net_config->ni_interfaces[i],
-			ni->ni_interfaces[i],
-			sizeof(net_config->ni_interfaces[i]));
-	}
-
-	config->cfg_nid = ni->ni_nid;
-	config->cfg_config_u.cfg_net.net_peer_timeout = ni->ni_peertimeout;
-	config->cfg_config_u.cfg_net.net_max_tx_credits = ni->ni_maxtxcredits;
-	config->cfg_config_u.cfg_net.net_peer_tx_credits = ni->ni_peertxcredits;
-	config->cfg_config_u.cfg_net.net_peer_rtr_credits = ni->ni_peerrtrcredits;
-
-	net_config->ni_status = ni->ni_status->ns_status;
-
-	if (ni->ni_cpts) {
-		int num_cpts = min(ni->ni_ncpts, LNET_MAX_SHOW_NUM_CPT);
-
-		for (i = 0; i < num_cpts; i++)
-			net_config->ni_cpts[i] = ni->ni_cpts[i];
-
-		config->cfg_ncpts = num_cpts;
-	}
-
-	/*
-	 * See if user land tools sent in a newer and larger version
-	 * of struct lnet_tunables than what the kernel uses.
-	 */
-	min_size = sizeof(*config) + sizeof(*net_config);
-
-	if (config->cfg_hdr.ioc_len > min_size)
-		tunable_size = config->cfg_hdr.ioc_len - min_size;
-
-	/* Don't copy to much data to user space */
-	min_size = min(tunable_size, sizeof(*ni->ni_lnd_tunables));
-	lnd_cfg = (struct lnet_ioctl_config_lnd_tunables *)net_config->cfg_bulk;
-
-	if (ni->ni_lnd_tunables && lnd_cfg && min_size) {
-		memcpy(lnd_cfg, ni->ni_lnd_tunables, min_size);
-		config->cfg_config_u.cfg_net.net_interface_count = 1;
-
-		/* Tell user land that kernel side has less data */
-		if (tunable_size > sizeof(*ni->ni_lnd_tunables)) {
-			min_size = tunable_size - sizeof(ni->ni_lnd_tunables);
-			config->cfg_hdr.ioc_len -= min_size;
-		}
-	}
-}
-
-static int
-lnet_get_net_config(struct lnet_ioctl_config_data *config)
-{
-	struct lnet_ni *ni;
-	struct list_head *tmp;
-	int idx = config->cfg_count;
-	int cpt, i = 0;
-	int rc = -ENOENT;
-
-	cpt = lnet_net_lock_current();
-
-	list_for_each(tmp, &the_lnet.ln_nis) {
-		if (i++ != idx)
-			continue;
-
-		ni = list_entry(tmp, struct lnet_ni, ni_list);
-		lnet_ni_lock(ni);
-		lnet_fill_ni_info(ni, config);
-		lnet_ni_unlock(ni);
-		rc = 0;
-		break;
-	}
-
-	lnet_net_unlock(cpt);
-	return rc;
-}
-
-int
-lnet_dyn_add_ni(lnet_pid_t requested_pid, struct lnet_ioctl_config_data *conf)
-{
-	char *nets = conf->cfg_config_u.cfg_net.net_intf;
-	struct lnet_ping_info *pinfo;
-	struct lnet_handle_md md_handle;
-	struct lnet_ni *ni;
-	struct list_head net_head;
-	struct lnet_remotenet *rnet;
-	int rc;
-
-	INIT_LIST_HEAD(&net_head);
-
-	/* Create a ni structure for the network string */
-	rc = lnet_parse_networks(&net_head, nets);
-	if (rc <= 0)
-		return !rc ? -EINVAL : rc;
-
-	mutex_lock(&the_lnet.ln_api_mutex);
-
-	if (rc > 1) {
-		rc = -EINVAL; /* only add one interface per call */
-		goto failed0;
-	}
-
-	ni = list_entry(net_head.next, struct lnet_ni, ni_list);
-
-	lnet_net_lock(LNET_LOCK_EX);
-	rnet = lnet_find_net_locked(LNET_NIDNET(ni->ni_nid));
-	lnet_net_unlock(LNET_LOCK_EX);
-	/*
-	 * make sure that the net added doesn't invalidate the current
-	 * configuration LNet is keeping
-	 */
-	if (rnet) {
-		CERROR("Adding net %s will invalidate routing configuration\n",
-		       nets);
-		rc = -EUSERS;
-		goto failed0;
-	}
-
-	rc = lnet_ping_info_setup(&pinfo, &md_handle, 1 + lnet_get_ni_count(),
-				  false);
-	if (rc)
-		goto failed0;
-
-	list_del_init(&ni->ni_list);
-
-	rc = lnet_startup_lndni(ni, conf);
-	if (rc)
-		goto failed1;
-
-	if (ni->ni_lnd->lnd_accept) {
-		rc = lnet_acceptor_start();
-		if (rc < 0) {
-			/* shutdown the ni that we just started */
-			CERROR("Failed to start up acceptor thread\n");
-			lnet_shutdown_lndni(ni);
-			goto failed1;
-		}
-	}
-
-	lnet_ping_target_update(pinfo, md_handle);
-	mutex_unlock(&the_lnet.ln_api_mutex);
-
-	return 0;
-
-failed1:
-	lnet_ping_md_unlink(pinfo, &md_handle);
-	lnet_ping_info_free(pinfo);
-failed0:
-	mutex_unlock(&the_lnet.ln_api_mutex);
-	while (!list_empty(&net_head)) {
-		ni = list_entry(net_head.next, struct lnet_ni, ni_list);
-		list_del_init(&ni->ni_list);
-		lnet_ni_free(ni);
-	}
-	return rc;
-}
-
-int
-lnet_dyn_del_ni(__u32 net)
-{
-	struct lnet_ni *ni;
-	struct lnet_ping_info *pinfo;
-	struct lnet_handle_md md_handle;
-	int rc;
-
-	/* don't allow userspace to shutdown the LOLND */
-	if (LNET_NETTYP(net) == LOLND)
-		return -EINVAL;
-
-	mutex_lock(&the_lnet.ln_api_mutex);
-	/* create and link a new ping info, before removing the old one */
-	rc = lnet_ping_info_setup(&pinfo, &md_handle,
-				  lnet_get_ni_count() - 1, false);
-	if (rc)
-		goto out;
-
-	ni = lnet_net2ni(net);
-	if (!ni) {
-		rc = -EINVAL;
-		goto failed;
-	}
-
-	/* decrement the reference counter taken by lnet_net2ni() */
-	lnet_ni_decref_locked(ni, 0);
-
-	lnet_shutdown_lndni(ni);
-
-	if (!lnet_count_acceptor_nis())
-		lnet_acceptor_stop();
-
-	lnet_ping_target_update(pinfo, md_handle);
-	goto out;
-failed:
-	lnet_ping_md_unlink(pinfo, &md_handle);
-	lnet_ping_info_free(pinfo);
-out:
-	mutex_unlock(&the_lnet.ln_api_mutex);
-
-	return rc;
-}
-
-/**
- * LNet ioctl handler.
- *
- */
-int
-LNetCtl(unsigned int cmd, void *arg)
-{
-	struct libcfs_ioctl_data *data = arg;
-	struct lnet_ioctl_config_data *config;
-	struct lnet_process_id id = {0};
-	struct lnet_ni *ni;
-	int rc;
-	unsigned long secs_passed;
-
-	BUILD_BUG_ON(LIBCFS_IOC_DATA_MAX <
-		     sizeof(struct lnet_ioctl_net_config) +
-		     sizeof(struct lnet_ioctl_config_data));
-
-	switch (cmd) {
-	case IOC_LIBCFS_GET_NI:
-		rc = LNetGetId(data->ioc_count, &id);
-		data->ioc_nid = id.nid;
-		return rc;
-
-	case IOC_LIBCFS_FAIL_NID:
-		return lnet_fail_nid(data->ioc_nid, data->ioc_count);
-
-	case IOC_LIBCFS_ADD_ROUTE:
-		config = arg;
-
-		if (config->cfg_hdr.ioc_len < sizeof(*config))
-			return -EINVAL;
-
-		mutex_lock(&the_lnet.ln_api_mutex);
-		rc = lnet_add_route(config->cfg_net,
-				    config->cfg_config_u.cfg_route.rtr_hop,
-				    config->cfg_nid,
-				    config->cfg_config_u.cfg_route.rtr_priority);
-		if (!rc) {
-			rc = lnet_check_routes();
-			if (rc)
-				lnet_del_route(config->cfg_net,
-					       config->cfg_nid);
-		}
-		mutex_unlock(&the_lnet.ln_api_mutex);
-		return rc;
-
-	case IOC_LIBCFS_DEL_ROUTE:
-		config = arg;
-
-		if (config->cfg_hdr.ioc_len < sizeof(*config))
-			return -EINVAL;
-
-		mutex_lock(&the_lnet.ln_api_mutex);
-		rc = lnet_del_route(config->cfg_net, config->cfg_nid);
-		mutex_unlock(&the_lnet.ln_api_mutex);
-		return rc;
-
-	case IOC_LIBCFS_GET_ROUTE:
-		config = arg;
-
-		if (config->cfg_hdr.ioc_len < sizeof(*config))
-			return -EINVAL;
-
-		return lnet_get_route(config->cfg_count,
-				      &config->cfg_net,
-				      &config->cfg_config_u.cfg_route.rtr_hop,
-				      &config->cfg_nid,
-				      &config->cfg_config_u.cfg_route.rtr_flags,
-				      &config->cfg_config_u.cfg_route.rtr_priority);
-
-	case IOC_LIBCFS_GET_NET: {
-		size_t total = sizeof(*config) +
-			       sizeof(struct lnet_ioctl_net_config);
-		config = arg;
-
-		if (config->cfg_hdr.ioc_len < total)
-			return -EINVAL;
-
-		return lnet_get_net_config(config);
-	}
-
-	case IOC_LIBCFS_GET_LNET_STATS: {
-		struct lnet_ioctl_lnet_stats *lnet_stats = arg;
-
-		if (lnet_stats->st_hdr.ioc_len < sizeof(*lnet_stats))
-			return -EINVAL;
-
-		lnet_counters_get(&lnet_stats->st_cntrs);
-		return 0;
-	}
-
-	case IOC_LIBCFS_CONFIG_RTR:
-		config = arg;
-
-		if (config->cfg_hdr.ioc_len < sizeof(*config))
-			return -EINVAL;
-
-		mutex_lock(&the_lnet.ln_api_mutex);
-		if (config->cfg_config_u.cfg_buffers.buf_enable) {
-			rc = lnet_rtrpools_enable();
-			mutex_unlock(&the_lnet.ln_api_mutex);
-			return rc;
-		}
-		lnet_rtrpools_disable();
-		mutex_unlock(&the_lnet.ln_api_mutex);
-		return 0;
-
-	case IOC_LIBCFS_ADD_BUF:
-		config = arg;
-
-		if (config->cfg_hdr.ioc_len < sizeof(*config))
-			return -EINVAL;
-
-		mutex_lock(&the_lnet.ln_api_mutex);
-		rc = lnet_rtrpools_adjust(config->cfg_config_u.cfg_buffers.buf_tiny,
-					  config->cfg_config_u.cfg_buffers.buf_small,
-					  config->cfg_config_u.cfg_buffers.buf_large);
-		mutex_unlock(&the_lnet.ln_api_mutex);
-		return rc;
-
-	case IOC_LIBCFS_GET_BUF: {
-		struct lnet_ioctl_pool_cfg *pool_cfg;
-		size_t total = sizeof(*config) + sizeof(*pool_cfg);
-
-		config = arg;
-
-		if (config->cfg_hdr.ioc_len < total)
-			return -EINVAL;
-
-		pool_cfg = (struct lnet_ioctl_pool_cfg *)config->cfg_bulk;
-		return lnet_get_rtr_pool_cfg(config->cfg_count, pool_cfg);
-	}
-
-	case IOC_LIBCFS_GET_PEER_INFO: {
-		struct lnet_ioctl_peer *peer_info = arg;
-
-		if (peer_info->pr_hdr.ioc_len < sizeof(*peer_info))
-			return -EINVAL;
-
-		return lnet_get_peer_info(peer_info->pr_count,
-			&peer_info->pr_nid,
-			peer_info->pr_lnd_u.pr_peer_credits.cr_aliveness,
-			&peer_info->pr_lnd_u.pr_peer_credits.cr_ncpt,
-			&peer_info->pr_lnd_u.pr_peer_credits.cr_refcount,
-			&peer_info->pr_lnd_u.pr_peer_credits.cr_ni_peer_tx_credits,
-			&peer_info->pr_lnd_u.pr_peer_credits.cr_peer_tx_credits,
-			&peer_info->pr_lnd_u.pr_peer_credits.cr_peer_rtr_credits,
-			&peer_info->pr_lnd_u.pr_peer_credits.cr_peer_min_rtr_credits,
-			&peer_info->pr_lnd_u.pr_peer_credits.cr_peer_tx_qnob);
-	}
-
-	case IOC_LIBCFS_NOTIFY_ROUTER:
-		secs_passed = (ktime_get_real_seconds() - data->ioc_u64[0]);
-		secs_passed *= msecs_to_jiffies(MSEC_PER_SEC);
-
-		return lnet_notify(NULL, data->ioc_nid, data->ioc_flags,
-				   jiffies - secs_passed);
-
-	case IOC_LIBCFS_LNET_DIST:
-		rc = LNetDist(data->ioc_nid, &data->ioc_nid, &data->ioc_u32[1]);
-		if (rc < 0 && rc != -EHOSTUNREACH)
-			return rc;
-
-		data->ioc_u32[0] = rc;
-		return 0;
-
-	case IOC_LIBCFS_TESTPROTOCOMPAT:
-		lnet_net_lock(LNET_LOCK_EX);
-		the_lnet.ln_testprotocompat = data->ioc_flags;
-		lnet_net_unlock(LNET_LOCK_EX);
-		return 0;
-
-	case IOC_LIBCFS_LNET_FAULT:
-		return lnet_fault_ctl(data->ioc_flags, data);
-
-	case IOC_LIBCFS_PING:
-		id.nid = data->ioc_nid;
-		id.pid = data->ioc_u32[0];
-		rc = lnet_ping(id, data->ioc_u32[1], /* timeout */
-			       data->ioc_pbuf1,
-			       data->ioc_plen1 / sizeof(struct lnet_process_id));
-		if (rc < 0)
-			return rc;
-		data->ioc_count = rc;
-		return 0;
-
-	default:
-		ni = lnet_net2ni(data->ioc_net);
-		if (!ni)
-			return -EINVAL;
-
-		if (!ni->ni_lnd->lnd_ctl)
-			rc = -EINVAL;
-		else
-			rc = ni->ni_lnd->lnd_ctl(ni, cmd, arg);
-
-		lnet_ni_decref(ni);
-		return rc;
-	}
-	/* not reached */
-}
-EXPORT_SYMBOL(LNetCtl);
-
-void LNetDebugPeer(struct lnet_process_id id)
-{
-	lnet_debug_peer(id.nid);
-}
-EXPORT_SYMBOL(LNetDebugPeer);
-
-/**
- * Retrieve the lnet_process_id ID of LNet interface at \a index. Note that
- * all interfaces share a same PID, as requested by LNetNIInit().
- *
- * \param index Index of the interface to look up.
- * \param id On successful return, this location will hold the
- * lnet_process_id ID of the interface.
- *
- * \retval 0 If an interface exists at \a index.
- * \retval -ENOENT If no interface has been found.
- */
-int
-LNetGetId(unsigned int index, struct lnet_process_id *id)
-{
-	struct lnet_ni *ni;
-	struct list_head *tmp;
-	int cpt;
-	int rc = -ENOENT;
-
-	LASSERT(the_lnet.ln_refcount > 0);
-
-	cpt = lnet_net_lock_current();
-
-	list_for_each(tmp, &the_lnet.ln_nis) {
-		if (index--)
-			continue;
-
-		ni = list_entry(tmp, struct lnet_ni, ni_list);
-
-		id->nid = ni->ni_nid;
-		id->pid = the_lnet.ln_pid;
-		rc = 0;
-		break;
-	}
-
-	lnet_net_unlock(cpt);
-	return rc;
-}
-EXPORT_SYMBOL(LNetGetId);
-
-static int lnet_ping(struct lnet_process_id id, int timeout_ms,
-		     struct lnet_process_id __user *ids, int n_ids)
-{
-	struct lnet_handle_eq eqh;
-	struct lnet_handle_md mdh;
-	struct lnet_event event;
-	struct lnet_md md = { NULL };
-	int which;
-	int unlinked = 0;
-	int replied = 0;
-	const int a_long_time = 60000; /* mS */
-	int infosz;
-	struct lnet_ping_info *info;
-	struct lnet_process_id tmpid;
-	int i;
-	int nob;
-	int rc;
-	int rc2;
-
-	infosz = offsetof(struct lnet_ping_info, pi_ni[n_ids]);
-
-	if (n_ids <= 0 ||
-	    id.nid == LNET_NID_ANY ||
-	    timeout_ms > 500000 ||	      /* arbitrary limit! */
-	    n_ids > 20)			 /* arbitrary limit! */
-		return -EINVAL;
-
-	if (id.pid == LNET_PID_ANY)
-		id.pid = LNET_PID_LUSTRE;
-
-	info = kzalloc(infosz, GFP_KERNEL);
-	if (!info)
-		return -ENOMEM;
-
-	/* NB 2 events max (including any unlink event) */
-	rc = LNetEQAlloc(2, LNET_EQ_HANDLER_NONE, &eqh);
-	if (rc) {
-		CERROR("Can't allocate EQ: %d\n", rc);
-		goto out_0;
-	}
-
-	/* initialize md content */
-	md.start     = info;
-	md.length    = infosz;
-	md.threshold = 2; /*GET/REPLY*/
-	md.max_size  = 0;
-	md.options   = LNET_MD_TRUNCATE;
-	md.user_ptr  = NULL;
-	md.eq_handle = eqh;
-
-	rc = LNetMDBind(md, LNET_UNLINK, &mdh);
-	if (rc) {
-		CERROR("Can't bind MD: %d\n", rc);
-		goto out_1;
-	}
-
-	rc = LNetGet(LNET_NID_ANY, mdh, id,
-		     LNET_RESERVED_PORTAL,
-		     LNET_PROTO_PING_MATCHBITS, 0);
-
-	if (rc) {
-		/* Don't CERROR; this could be deliberate! */
-
-		rc2 = LNetMDUnlink(mdh);
-		LASSERT(!rc2);
-
-		/* NB must wait for the UNLINK event below... */
-		unlinked = 1;
-		timeout_ms = a_long_time;
-	}
-
-	do {
-		/* MUST block for unlink to complete */
-
-		rc2 = LNetEQPoll(&eqh, 1, timeout_ms, !unlinked,
-				 &event, &which);
-
-		CDEBUG(D_NET, "poll %d(%d %d)%s\n", rc2,
-		       (rc2 <= 0) ? -1 : event.type,
-		       (rc2 <= 0) ? -1 : event.status,
-		       (rc2 > 0 && event.unlinked) ? " unlinked" : "");
-
-		LASSERT(rc2 != -EOVERFLOW);     /* can't miss anything */
-
-		if (rc2 <= 0 || event.status) {
-			/* timeout or error */
-			if (!replied && !rc)
-				rc = (rc2 < 0) ? rc2 :
-				     !rc2 ? -ETIMEDOUT :
-				     event.status;
-
-			if (!unlinked) {
-				/* Ensure completion in finite time... */
-				LNetMDUnlink(mdh);
-				/* No assertion (racing with network) */
-				unlinked = 1;
-				timeout_ms = a_long_time;
-			} else if (!rc2) {
-				/* timed out waiting for unlink */
-				CWARN("ping %s: late network completion\n",
-				      libcfs_id2str(id));
-			}
-		} else if (event.type == LNET_EVENT_REPLY) {
-			replied = 1;
-			rc = event.mlength;
-		}
-
-	} while (rc2 <= 0 || !event.unlinked);
-
-	if (!replied) {
-		if (rc >= 0)
-			CWARN("%s: Unexpected rc >= 0 but no reply!\n",
-			      libcfs_id2str(id));
-		rc = -EIO;
-		goto out_1;
-	}
-
-	nob = rc;
-	LASSERT(nob >= 0 && nob <= infosz);
-
-	rc = -EPROTO;			   /* if I can't parse... */
-
-	if (nob < 8) {
-		/* can't check magic/version */
-		CERROR("%s: ping info too short %d\n",
-		       libcfs_id2str(id), nob);
-		goto out_1;
-	}
-
-	if (info->pi_magic == __swab32(LNET_PROTO_PING_MAGIC)) {
-		lnet_swap_pinginfo(info);
-	} else if (info->pi_magic != LNET_PROTO_PING_MAGIC) {
-		CERROR("%s: Unexpected magic %08x\n",
-		       libcfs_id2str(id), info->pi_magic);
-		goto out_1;
-	}
-
-	if (!(info->pi_features & LNET_PING_FEAT_NI_STATUS)) {
-		CERROR("%s: ping w/o NI status: 0x%x\n",
-		       libcfs_id2str(id), info->pi_features);
-		goto out_1;
-	}
-
-	if (nob < offsetof(struct lnet_ping_info, pi_ni[0])) {
-		CERROR("%s: Short reply %d(%d min)\n", libcfs_id2str(id),
-		       nob, (int)offsetof(struct lnet_ping_info, pi_ni[0]));
-		goto out_1;
-	}
-
-	if (info->pi_nnis < n_ids)
-		n_ids = info->pi_nnis;
-
-	if (nob < offsetof(struct lnet_ping_info, pi_ni[n_ids])) {
-		CERROR("%s: Short reply %d(%d expected)\n", libcfs_id2str(id),
-		       nob, (int)offsetof(struct lnet_ping_info, pi_ni[n_ids]));
-		goto out_1;
-	}
-
-	rc = -EFAULT;			   /* If I SEGV... */
-
-	memset(&tmpid, 0, sizeof(tmpid));
-	for (i = 0; i < n_ids; i++) {
-		tmpid.pid = info->pi_pid;
-		tmpid.nid = info->pi_ni[i].ns_nid;
-		if (copy_to_user(&ids[i], &tmpid, sizeof(tmpid)))
-			goto out_1;
-	}
-	rc = info->pi_nnis;
-
- out_1:
-	rc2 = LNetEQFree(eqh);
-	if (rc2)
-		CERROR("rc2 %d\n", rc2);
-	LASSERT(!rc2);
-
- out_0:
-	kfree(info);
-	return rc;
-}

+ 0 - 1235
drivers/staging/lustre/lnet/lnet/config.c

@@ -1,1235 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-#include <linux/nsproxy.h>
-#include <net/net_namespace.h>
-#include <linux/ctype.h>
-#include <linux/lnet/lib-lnet.h>
-
-struct lnet_text_buf {	    /* tmp struct for parsing routes */
-	struct list_head ltb_list;	/* stash on lists */
-	int ltb_size;	/* allocated size */
-	char ltb_text[0];     /* text buffer */
-};
-
-static int lnet_tbnob;			/* track text buf allocation */
-#define LNET_MAX_TEXTBUF_NOB     (64 << 10)	/* bound allocation */
-#define LNET_SINGLE_TEXTBUF_NOB  (4 << 10)
-
-static void
-lnet_syntax(char *name, char *str, int offset, int width)
-{
-	static char dots[LNET_SINGLE_TEXTBUF_NOB];
-	static char dashes[LNET_SINGLE_TEXTBUF_NOB];
-
-	memset(dots, '.', sizeof(dots));
-	dots[sizeof(dots) - 1] = 0;
-	memset(dashes, '-', sizeof(dashes));
-	dashes[sizeof(dashes) - 1] = 0;
-
-	LCONSOLE_ERROR_MSG(0x10f, "Error parsing '%s=\"%s\"'\n", name, str);
-	LCONSOLE_ERROR_MSG(0x110, "here...........%.*s..%.*s|%.*s|\n",
-			   (int)strlen(name), dots, offset, dots,
-			    (width < 1) ? 0 : width - 1, dashes);
-}
-
-static int
-lnet_issep(char c)
-{
-	switch (c) {
-	case '\n':
-	case '\r':
-	case ';':
-		return 1;
-	default:
-		return 0;
-	}
-}
-
-int
-lnet_net_unique(__u32 net, struct list_head *nilist)
-{
-	struct list_head *tmp;
-	struct lnet_ni *ni;
-
-	list_for_each(tmp, nilist) {
-		ni = list_entry(tmp, struct lnet_ni, ni_list);
-
-		if (LNET_NIDNET(ni->ni_nid) == net)
-			return 0;
-	}
-
-	return 1;
-}
-
-void
-lnet_ni_free(struct lnet_ni *ni)
-{
-	int i;
-
-	if (ni->ni_refs)
-		cfs_percpt_free(ni->ni_refs);
-
-	if (ni->ni_tx_queues)
-		cfs_percpt_free(ni->ni_tx_queues);
-
-	if (ni->ni_cpts)
-		cfs_expr_list_values_free(ni->ni_cpts, ni->ni_ncpts);
-
-	kfree(ni->ni_lnd_tunables);
-
-	for (i = 0; i < LNET_MAX_INTERFACES && ni->ni_interfaces[i]; i++)
-		kfree(ni->ni_interfaces[i]);
-
-	/* release reference to net namespace */
-	if (ni->ni_net_ns)
-		put_net(ni->ni_net_ns);
-
-	kfree(ni);
-}
-
-struct lnet_ni *
-lnet_ni_alloc(__u32 net, struct cfs_expr_list *el, struct list_head *nilist)
-{
-	struct lnet_tx_queue *tq;
-	struct lnet_ni *ni;
-	int rc;
-	int i;
-
-	if (!lnet_net_unique(net, nilist)) {
-		LCONSOLE_ERROR_MSG(0x111, "Duplicate network specified: %s\n",
-				   libcfs_net2str(net));
-		return NULL;
-	}
-
-	ni = kzalloc(sizeof(*ni), GFP_NOFS);
-	if (!ni) {
-		CERROR("Out of memory creating network %s\n",
-		       libcfs_net2str(net));
-		return NULL;
-	}
-
-	spin_lock_init(&ni->ni_lock);
-	INIT_LIST_HEAD(&ni->ni_cptlist);
-	ni->ni_refs = cfs_percpt_alloc(lnet_cpt_table(),
-				       sizeof(*ni->ni_refs[0]));
-	if (!ni->ni_refs)
-		goto failed;
-
-	ni->ni_tx_queues = cfs_percpt_alloc(lnet_cpt_table(),
-					    sizeof(*ni->ni_tx_queues[0]));
-	if (!ni->ni_tx_queues)
-		goto failed;
-
-	cfs_percpt_for_each(tq, i, ni->ni_tx_queues)
-		INIT_LIST_HEAD(&tq->tq_delayed);
-
-	if (!el) {
-		ni->ni_cpts  = NULL;
-		ni->ni_ncpts = LNET_CPT_NUMBER;
-	} else {
-		rc = cfs_expr_list_values(el, LNET_CPT_NUMBER, &ni->ni_cpts);
-		if (rc <= 0) {
-			CERROR("Failed to set CPTs for NI %s: %d\n",
-			       libcfs_net2str(net), rc);
-			goto failed;
-		}
-
-		LASSERT(rc <= LNET_CPT_NUMBER);
-		if (rc == LNET_CPT_NUMBER) {
-			cfs_expr_list_values_free(ni->ni_cpts, LNET_CPT_NUMBER);
-			ni->ni_cpts = NULL;
-		}
-
-		ni->ni_ncpts = rc;
-	}
-
-	/* LND will fill in the address part of the NID */
-	ni->ni_nid = LNET_MKNID(net, 0);
-
-	/* Store net namespace in which current ni is being created */
-	if (current->nsproxy->net_ns)
-		ni->ni_net_ns = get_net(current->nsproxy->net_ns);
-	else
-		ni->ni_net_ns = NULL;
-
-	ni->ni_last_alive = ktime_get_real_seconds();
-	list_add_tail(&ni->ni_list, nilist);
-	return ni;
- failed:
-	lnet_ni_free(ni);
-	return NULL;
-}
-
-int
-lnet_parse_networks(struct list_head *nilist, char *networks)
-{
-	struct cfs_expr_list *el = NULL;
-	char *tokens;
-	char *str;
-	char *tmp;
-	struct lnet_ni *ni;
-	__u32 net;
-	int nnets = 0;
-	struct list_head *temp_node;
-
-	if (!networks) {
-		CERROR("networks string is undefined\n");
-		return -EINVAL;
-	}
-
-	if (strlen(networks) > LNET_SINGLE_TEXTBUF_NOB) {
-		/* _WAY_ conservative */
-		LCONSOLE_ERROR_MSG(0x112,
-				   "Can't parse networks: string too long\n");
-		return -EINVAL;
-	}
-
-	tokens = kstrdup(networks, GFP_KERNEL);
-	if (!tokens) {
-		CERROR("Can't allocate net tokens\n");
-		return -ENOMEM;
-	}
-
-	tmp = tokens;
-	str = tokens;
-
-	while (str && *str) {
-		char *comma = strchr(str, ',');
-		char *bracket = strchr(str, '(');
-		char *square = strchr(str, '[');
-		char *iface;
-		int niface;
-		int rc;
-
-		/*
-		 * NB we don't check interface conflicts here; it's the LNDs
-		 * responsibility (if it cares at all)
-		 */
-		if (square && (!comma || square < comma)) {
-			/*
-			 * i.e: o2ib0(ib0)[1,2], number between square
-			 * brackets are CPTs this NI needs to be bond
-			 */
-			if (bracket && bracket > square) {
-				tmp = square;
-				goto failed_syntax;
-			}
-
-			tmp = strchr(square, ']');
-			if (!tmp) {
-				tmp = square;
-				goto failed_syntax;
-			}
-
-			rc = cfs_expr_list_parse(square, tmp - square + 1,
-						 0, LNET_CPT_NUMBER - 1, &el);
-			if (rc) {
-				tmp = square;
-				goto failed_syntax;
-			}
-
-			while (square <= tmp)
-				*square++ = ' ';
-		}
-
-		if (!bracket || (comma && comma < bracket)) {
-			/* no interface list specified */
-
-			if (comma)
-				*comma++ = 0;
-			net = libcfs_str2net(strim(str));
-
-			if (net == LNET_NIDNET(LNET_NID_ANY)) {
-				LCONSOLE_ERROR_MSG(0x113,
-						   "Unrecognised network type\n");
-				tmp = str;
-				goto failed_syntax;
-			}
-
-			if (LNET_NETTYP(net) != LOLND && /* LO is implicit */
-			    !lnet_ni_alloc(net, el, nilist))
-				goto failed;
-
-			if (el) {
-				cfs_expr_list_free(el);
-				el = NULL;
-			}
-
-			str = comma;
-			continue;
-		}
-
-		*bracket = 0;
-		net = libcfs_str2net(strim(str));
-		if (net == LNET_NIDNET(LNET_NID_ANY)) {
-			tmp = str;
-			goto failed_syntax;
-		}
-
-		ni = lnet_ni_alloc(net, el, nilist);
-		if (!ni)
-			goto failed;
-
-		if (el) {
-			cfs_expr_list_free(el);
-			el = NULL;
-		}
-
-		niface = 0;
-		iface = bracket + 1;
-
-		bracket = strchr(iface, ')');
-		if (!bracket) {
-			tmp = iface;
-			goto failed_syntax;
-		}
-
-		*bracket = 0;
-		do {
-			comma = strchr(iface, ',');
-			if (comma)
-				*comma++ = 0;
-
-			iface = strim(iface);
-			if (!*iface) {
-				tmp = iface;
-				goto failed_syntax;
-			}
-
-			if (niface == LNET_MAX_INTERFACES) {
-				LCONSOLE_ERROR_MSG(0x115,
-						   "Too many interfaces for net %s\n",
-						   libcfs_net2str(net));
-				goto failed;
-			}
-
-			/*
-			 * Allocate a separate piece of memory and copy
-			 * into it the string, so we don't have
-			 * a depencency on the tokens string.  This way we
-			 * can free the tokens at the end of the function.
-			 * The newly allocated ni_interfaces[] can be
-			 * freed when freeing the NI
-			 */
-			ni->ni_interfaces[niface] = kstrdup(iface, GFP_KERNEL);
-			if (!ni->ni_interfaces[niface]) {
-				CERROR("Can't allocate net interface name\n");
-				goto failed;
-			}
-			niface++;
-			iface = comma;
-		} while (iface);
-
-		str = bracket + 1;
-		comma = strchr(bracket + 1, ',');
-		if (comma) {
-			*comma = 0;
-			str = strim(str);
-			if (*str) {
-				tmp = str;
-				goto failed_syntax;
-			}
-			str = comma + 1;
-			continue;
-		}
-
-		str = strim(str);
-		if (*str) {
-			tmp = str;
-			goto failed_syntax;
-		}
-	}
-
-	list_for_each(temp_node, nilist)
-		nnets++;
-
-	kfree(tokens);
-	return nnets;
-
- failed_syntax:
-	lnet_syntax("networks", networks, (int)(tmp - tokens), strlen(tmp));
- failed:
-	while (!list_empty(nilist)) {
-		ni = list_entry(nilist->next, struct lnet_ni, ni_list);
-
-		list_del(&ni->ni_list);
-		lnet_ni_free(ni);
-	}
-
-	if (el)
-		cfs_expr_list_free(el);
-
-	kfree(tokens);
-
-	return -EINVAL;
-}
-
-static struct lnet_text_buf *
-lnet_new_text_buf(int str_len)
-{
-	struct lnet_text_buf *ltb;
-	int nob;
-
-	/* NB allocate space for the terminating 0 */
-	nob = offsetof(struct lnet_text_buf, ltb_text[str_len + 1]);
-	if (nob > LNET_SINGLE_TEXTBUF_NOB) {
-		/* _way_ conservative for "route net gateway..." */
-		CERROR("text buffer too big\n");
-		return NULL;
-	}
-
-	if (lnet_tbnob + nob > LNET_MAX_TEXTBUF_NOB) {
-		CERROR("Too many text buffers\n");
-		return NULL;
-	}
-
-	ltb = kzalloc(nob, GFP_KERNEL);
-	if (!ltb)
-		return NULL;
-
-	ltb->ltb_size = nob;
-	ltb->ltb_text[0] = 0;
-	lnet_tbnob += nob;
-	return ltb;
-}
-
-static void
-lnet_free_text_buf(struct lnet_text_buf *ltb)
-{
-	lnet_tbnob -= ltb->ltb_size;
-	kfree(ltb);
-}
-
-static void
-lnet_free_text_bufs(struct list_head *tbs)
-{
-	struct lnet_text_buf *ltb;
-
-	while (!list_empty(tbs)) {
-		ltb = list_entry(tbs->next, struct lnet_text_buf, ltb_list);
-
-		list_del(&ltb->ltb_list);
-		lnet_free_text_buf(ltb);
-	}
-}
-
-static int
-lnet_str2tbs_sep(struct list_head *tbs, char *str)
-{
-	struct list_head pending;
-	char *sep;
-	int nob;
-	int i;
-	struct lnet_text_buf *ltb;
-
-	INIT_LIST_HEAD(&pending);
-
-	/* Split 'str' into separate commands */
-	for (;;) {
-		/* skip leading whitespace */
-		while (isspace(*str))
-			str++;
-
-		/* scan for separator or comment */
-		for (sep = str; *sep; sep++)
-			if (lnet_issep(*sep) || *sep == '#')
-				break;
-
-		nob = (int)(sep - str);
-		if (nob > 0) {
-			ltb = lnet_new_text_buf(nob);
-			if (!ltb) {
-				lnet_free_text_bufs(&pending);
-				return -ENOMEM;
-			}
-
-			for (i = 0; i < nob; i++)
-				if (isspace(str[i]))
-					ltb->ltb_text[i] = ' ';
-				else
-					ltb->ltb_text[i] = str[i];
-
-			ltb->ltb_text[nob] = 0;
-
-			list_add_tail(&ltb->ltb_list, &pending);
-		}
-
-		if (*sep == '#') {
-			/* scan for separator */
-			do {
-				sep++;
-			} while (*sep && !lnet_issep(*sep));
-		}
-
-		if (!*sep)
-			break;
-
-		str = sep + 1;
-	}
-
-	list_splice(&pending, tbs->prev);
-	return 0;
-}
-
-static int
-lnet_expand1tb(struct list_head *list,
-	       char *str, char *sep1, char *sep2,
-	       char *item, int itemlen)
-{
-	int len1 = (int)(sep1 - str);
-	int len2 = strlen(sep2 + 1);
-	struct lnet_text_buf *ltb;
-
-	LASSERT(*sep1 == '[');
-	LASSERT(*sep2 == ']');
-
-	ltb = lnet_new_text_buf(len1 + itemlen + len2);
-	if (!ltb)
-		return -ENOMEM;
-
-	memcpy(ltb->ltb_text, str, len1);
-	memcpy(&ltb->ltb_text[len1], item, itemlen);
-	memcpy(&ltb->ltb_text[len1 + itemlen], sep2 + 1, len2);
-	ltb->ltb_text[len1 + itemlen + len2] = 0;
-
-	list_add_tail(&ltb->ltb_list, list);
-	return 0;
-}
-
-static int
-lnet_str2tbs_expand(struct list_head *tbs, char *str)
-{
-	char num[16];
-	struct list_head pending;
-	char *sep;
-	char *sep2;
-	char *parsed;
-	char *enditem;
-	int lo;
-	int hi;
-	int stride;
-	int i;
-	int nob;
-	int scanned;
-
-	INIT_LIST_HEAD(&pending);
-
-	sep = strchr(str, '[');
-	if (!sep)			/* nothing to expand */
-		return 0;
-
-	sep2 = strchr(sep, ']');
-	if (!sep2)
-		goto failed;
-
-	for (parsed = sep; parsed < sep2; parsed = enditem) {
-		enditem = ++parsed;
-		while (enditem < sep2 && *enditem != ',')
-			enditem++;
-
-		if (enditem == parsed)		/* no empty items */
-			goto failed;
-
-		if (sscanf(parsed, "%d-%d/%d%n", &lo, &hi,
-			   &stride, &scanned) < 3) {
-			if (sscanf(parsed, "%d-%d%n", &lo, &hi, &scanned) < 2) {
-				/* simple string enumeration */
-				if (lnet_expand1tb(&pending, str, sep, sep2,
-						   parsed,
-						   (int)(enditem - parsed))) {
-					goto failed;
-				}
-				continue;
-			}
-
-			stride = 1;
-		}
-
-		/* range expansion */
-
-		if (enditem != parsed + scanned) /* no trailing junk */
-			goto failed;
-
-		if (hi < 0 || lo < 0 || stride < 0 || hi < lo ||
-		    (hi - lo) % stride)
-			goto failed;
-
-		for (i = lo; i <= hi; i += stride) {
-			snprintf(num, sizeof(num), "%d", i);
-			nob = strlen(num);
-			if (nob + 1 == sizeof(num))
-				goto failed;
-
-			if (lnet_expand1tb(&pending, str, sep, sep2,
-					   num, nob))
-				goto failed;
-		}
-	}
-
-	list_splice(&pending, tbs->prev);
-	return 1;
-
- failed:
-	lnet_free_text_bufs(&pending);
-	return -EINVAL;
-}
-
-static int
-lnet_parse_hops(char *str, unsigned int *hops)
-{
-	int len = strlen(str);
-	int nob = len;
-
-	return (sscanf(str, "%u%n", hops, &nob) >= 1 &&
-		nob == len &&
-		*hops > 0 && *hops < 256);
-}
-
-#define LNET_PRIORITY_SEPARATOR (':')
-
-static int
-lnet_parse_priority(char *str, unsigned int *priority, char **token)
-{
-	int nob;
-	char *sep;
-	int len;
-
-	sep = strchr(str, LNET_PRIORITY_SEPARATOR);
-	if (!sep) {
-		*priority = 0;
-		return 0;
-	}
-	len = strlen(sep + 1);
-
-	if ((sscanf((sep + 1), "%u%n", priority, &nob) < 1) || (len != nob)) {
-		/*
-		 * Update the caller's token pointer so it treats the found
-		 * priority as the token to report in the error message.
-		 */
-		*token += sep - str + 1;
-		return -EINVAL;
-	}
-
-	CDEBUG(D_NET, "gateway %s, priority %d, nob %d\n", str, *priority, nob);
-
-	/*
-	 * Change priority separator to \0 to be able to parse NID
-	 */
-	*sep = '\0';
-	return 0;
-}
-
-static int
-lnet_parse_route(char *str, int *im_a_router)
-{
-	/* static scratch buffer OK (single threaded) */
-	static char cmd[LNET_SINGLE_TEXTBUF_NOB];
-
-	struct list_head nets;
-	struct list_head gateways;
-	struct list_head *tmp1;
-	struct list_head *tmp2;
-	__u32 net;
-	lnet_nid_t nid;
-	struct lnet_text_buf *ltb;
-	int rc;
-	char *sep;
-	char *token = str;
-	int ntokens = 0;
-	int myrc = -1;
-	__u32 hops;
-	int got_hops = 0;
-	unsigned int priority = 0;
-
-	INIT_LIST_HEAD(&gateways);
-	INIT_LIST_HEAD(&nets);
-
-	/* save a copy of the string for error messages */
-	strncpy(cmd, str, sizeof(cmd));
-	cmd[sizeof(cmd) - 1] = '\0';
-
-	sep = str;
-	for (;;) {
-		/* scan for token start */
-		while (isspace(*sep))
-			sep++;
-		if (!*sep) {
-			if (ntokens < (got_hops ? 3 : 2))
-				goto token_error;
-			break;
-		}
-
-		ntokens++;
-		token = sep++;
-
-		/* scan for token end */
-		while (*sep && !isspace(*sep))
-			sep++;
-		if (*sep)
-			*sep++ = 0;
-
-		if (ntokens == 1) {
-			tmp2 = &nets;		/* expanding nets */
-		} else if (ntokens == 2 &&
-			   lnet_parse_hops(token, &hops)) {
-			got_hops = 1;	   /* got a hop count */
-			continue;
-		} else {
-			tmp2 = &gateways;	/* expanding gateways */
-		}
-
-		ltb = lnet_new_text_buf(strlen(token));
-		if (!ltb)
-			goto out;
-
-		strcpy(ltb->ltb_text, token);
-		tmp1 = &ltb->ltb_list;
-		list_add_tail(tmp1, tmp2);
-
-		while (tmp1 != tmp2) {
-			ltb = list_entry(tmp1, struct lnet_text_buf, ltb_list);
-
-			rc = lnet_str2tbs_expand(tmp1->next, ltb->ltb_text);
-			if (rc < 0)
-				goto token_error;
-
-			tmp1 = tmp1->next;
-
-			if (rc > 0) {		/* expanded! */
-				list_del(&ltb->ltb_list);
-				lnet_free_text_buf(ltb);
-				continue;
-			}
-
-			if (ntokens == 1) {
-				net = libcfs_str2net(ltb->ltb_text);
-				if (net == LNET_NIDNET(LNET_NID_ANY) ||
-				    LNET_NETTYP(net) == LOLND)
-					goto token_error;
-			} else {
-				rc = lnet_parse_priority(ltb->ltb_text,
-							 &priority, &token);
-				if (rc < 0)
-					goto token_error;
-
-				nid = libcfs_str2nid(ltb->ltb_text);
-				if (nid == LNET_NID_ANY ||
-				    LNET_NETTYP(LNET_NIDNET(nid)) == LOLND)
-					goto token_error;
-			}
-		}
-	}
-
-	/**
-	 * if there are no hops set then we want to flag this value as
-	 * unset since hops is an optional parameter
-	 */
-	if (!got_hops)
-		hops = LNET_UNDEFINED_HOPS;
-
-	LASSERT(!list_empty(&nets));
-	LASSERT(!list_empty(&gateways));
-
-	list_for_each(tmp1, &nets) {
-		ltb = list_entry(tmp1, struct lnet_text_buf, ltb_list);
-		net = libcfs_str2net(ltb->ltb_text);
-		LASSERT(net != LNET_NIDNET(LNET_NID_ANY));
-
-		list_for_each(tmp2, &gateways) {
-			ltb = list_entry(tmp2, struct lnet_text_buf, ltb_list);
-			nid = libcfs_str2nid(ltb->ltb_text);
-			LASSERT(nid != LNET_NID_ANY);
-
-			if (lnet_islocalnid(nid)) {
-				*im_a_router = 1;
-				continue;
-			}
-
-			rc = lnet_add_route(net, hops, nid, priority);
-			if (rc && rc != -EEXIST && rc != -EHOSTUNREACH) {
-				CERROR("Can't create route to %s via %s\n",
-				       libcfs_net2str(net),
-				       libcfs_nid2str(nid));
-				goto out;
-			}
-		}
-	}
-
-	myrc = 0;
-	goto out;
-
- token_error:
-	lnet_syntax("routes", cmd, (int)(token - str), strlen(token));
- out:
-	lnet_free_text_bufs(&nets);
-	lnet_free_text_bufs(&gateways);
-	return myrc;
-}
-
-static int
-lnet_parse_route_tbs(struct list_head *tbs, int *im_a_router)
-{
-	struct lnet_text_buf *ltb;
-
-	while (!list_empty(tbs)) {
-		ltb = list_entry(tbs->next, struct lnet_text_buf, ltb_list);
-
-		if (lnet_parse_route(ltb->ltb_text, im_a_router) < 0) {
-			lnet_free_text_bufs(tbs);
-			return -EINVAL;
-		}
-
-		list_del(&ltb->ltb_list);
-		lnet_free_text_buf(ltb);
-	}
-
-	return 0;
-}
-
-int
-lnet_parse_routes(char *routes, int *im_a_router)
-{
-	struct list_head tbs;
-	int rc = 0;
-
-	*im_a_router = 0;
-
-	INIT_LIST_HEAD(&tbs);
-
-	if (lnet_str2tbs_sep(&tbs, routes) < 0) {
-		CERROR("Error parsing routes\n");
-		rc = -EINVAL;
-	} else {
-		rc = lnet_parse_route_tbs(&tbs, im_a_router);
-	}
-
-	LASSERT(!lnet_tbnob);
-	return rc;
-}
-
-static int
-lnet_match_network_token(char *token, int len, __u32 *ipaddrs, int nip)
-{
-	LIST_HEAD(list);
-	int rc;
-	int i;
-
-	rc = cfs_ip_addr_parse(token, len, &list);
-	if (rc)
-		return rc;
-
-	for (rc = i = 0; !rc && i < nip; i++)
-		rc = cfs_ip_addr_match(ipaddrs[i], &list);
-
-	cfs_expr_list_free_list(&list);
-
-	return rc;
-}
-
-static int
-lnet_match_network_tokens(char *net_entry, __u32 *ipaddrs, int nip)
-{
-	static char tokens[LNET_SINGLE_TEXTBUF_NOB];
-
-	int matched = 0;
-	int ntokens = 0;
-	int len;
-	char *net = NULL;
-	char *sep;
-	char *token;
-	int rc;
-
-	LASSERT(strlen(net_entry) < sizeof(tokens));
-
-	/* work on a copy of the string */
-	strcpy(tokens, net_entry);
-	sep = tokens;
-	for (;;) {
-		/* scan for token start */
-		while (isspace(*sep))
-			sep++;
-		if (!*sep)
-			break;
-
-		token = sep++;
-
-		/* scan for token end */
-		while (*sep && !isspace(*sep))
-			sep++;
-		if (*sep)
-			*sep++ = 0;
-
-		if (!ntokens++) {
-			net = token;
-			continue;
-		}
-
-		len = strlen(token);
-
-		rc = lnet_match_network_token(token, len, ipaddrs, nip);
-		if (rc < 0) {
-			lnet_syntax("ip2nets", net_entry,
-				    (int)(token - tokens), len);
-			return rc;
-		}
-
-		if (rc)
-			matched |= 1;
-	}
-
-	if (!matched)
-		return 0;
-
-	strcpy(net_entry, net);		 /* replace with matched net */
-	return 1;
-}
-
-static __u32
-lnet_netspec2net(char *netspec)
-{
-	char *bracket = strchr(netspec, '(');
-	__u32 net;
-
-	if (bracket)
-		*bracket = 0;
-
-	net = libcfs_str2net(netspec);
-
-	if (bracket)
-		*bracket = '(';
-
-	return net;
-}
-
-static int
-lnet_splitnets(char *source, struct list_head *nets)
-{
-	int offset = 0;
-	int offset2;
-	int len;
-	struct lnet_text_buf *tb;
-	struct lnet_text_buf *tb2;
-	struct list_head *t;
-	char *sep;
-	char *bracket;
-	__u32 net;
-
-	LASSERT(!list_empty(nets));
-	LASSERT(nets->next == nets->prev);     /* single entry */
-
-	tb = list_entry(nets->next, struct lnet_text_buf, ltb_list);
-
-	for (;;) {
-		sep = strchr(tb->ltb_text, ',');
-		bracket = strchr(tb->ltb_text, '(');
-
-		if (sep && bracket && bracket < sep) {
-			/* netspec lists interfaces... */
-
-			offset2 = offset + (int)(bracket - tb->ltb_text);
-			len = strlen(bracket);
-
-			bracket = strchr(bracket + 1, ')');
-
-			if (!bracket ||
-			    !(bracket[1] == ',' || !bracket[1])) {
-				lnet_syntax("ip2nets", source, offset2, len);
-				return -EINVAL;
-			}
-
-			sep = !bracket[1] ? NULL : bracket + 1;
-		}
-
-		if (sep)
-			*sep++ = 0;
-
-		net = lnet_netspec2net(tb->ltb_text);
-		if (net == LNET_NIDNET(LNET_NID_ANY)) {
-			lnet_syntax("ip2nets", source, offset,
-				    strlen(tb->ltb_text));
-			return -EINVAL;
-		}
-
-		list_for_each(t, nets) {
-			tb2 = list_entry(t, struct lnet_text_buf, ltb_list);
-
-			if (tb2 == tb)
-				continue;
-
-			if (net == lnet_netspec2net(tb2->ltb_text)) {
-				/* duplicate network */
-				lnet_syntax("ip2nets", source, offset,
-					    strlen(tb->ltb_text));
-				return -EINVAL;
-			}
-		}
-
-		if (!sep)
-			return 0;
-
-		offset += (int)(sep - tb->ltb_text);
-		len = strlen(sep);
-		tb2 = lnet_new_text_buf(len);
-		if (!tb2)
-			return -ENOMEM;
-
-		strncpy(tb2->ltb_text, sep, len);
-		tb2->ltb_text[len] = '\0';
-		list_add_tail(&tb2->ltb_list, nets);
-
-		tb = tb2;
-	}
-}
-
-static int
-lnet_match_networks(char **networksp, char *ip2nets, __u32 *ipaddrs, int nip)
-{
-	static char networks[LNET_SINGLE_TEXTBUF_NOB];
-	static char source[LNET_SINGLE_TEXTBUF_NOB];
-
-	struct list_head raw_entries;
-	struct list_head matched_nets;
-	struct list_head current_nets;
-	struct list_head *t;
-	struct list_head *t2;
-	struct lnet_text_buf *tb;
-	struct lnet_text_buf *temp;
-	struct lnet_text_buf *tb2;
-	__u32 net1;
-	__u32 net2;
-	int len;
-	int count;
-	int dup;
-	int rc;
-
-	INIT_LIST_HEAD(&raw_entries);
-	if (lnet_str2tbs_sep(&raw_entries, ip2nets) < 0) {
-		CERROR("Error parsing ip2nets\n");
-		LASSERT(!lnet_tbnob);
-		return -EINVAL;
-	}
-
-	INIT_LIST_HEAD(&matched_nets);
-	INIT_LIST_HEAD(&current_nets);
-	networks[0] = 0;
-	count = 0;
-	len = 0;
-	rc = 0;
-
-	list_for_each_entry_safe(tb, temp, &raw_entries, ltb_list) {
-		strncpy(source, tb->ltb_text, sizeof(source));
-		source[sizeof(source) - 1] = '\0';
-
-		/* replace ltb_text with the network(s) add on match */
-		rc = lnet_match_network_tokens(tb->ltb_text, ipaddrs, nip);
-		if (rc < 0)
-			break;
-
-		list_del(&tb->ltb_list);
-
-		if (!rc) {		  /* no match */
-			lnet_free_text_buf(tb);
-			continue;
-		}
-
-		/* split into separate networks */
-		INIT_LIST_HEAD(&current_nets);
-		list_add(&tb->ltb_list, &current_nets);
-		rc = lnet_splitnets(source, &current_nets);
-		if (rc < 0)
-			break;
-
-		dup = 0;
-		list_for_each(t, &current_nets) {
-			tb = list_entry(t, struct lnet_text_buf, ltb_list);
-			net1 = lnet_netspec2net(tb->ltb_text);
-			LASSERT(net1 != LNET_NIDNET(LNET_NID_ANY));
-
-			list_for_each(t2, &matched_nets) {
-				tb2 = list_entry(t2, struct lnet_text_buf,
-						 ltb_list);
-				net2 = lnet_netspec2net(tb2->ltb_text);
-				LASSERT(net2 != LNET_NIDNET(LNET_NID_ANY));
-
-				if (net1 == net2) {
-					dup = 1;
-					break;
-				}
-			}
-
-			if (dup)
-				break;
-		}
-
-		if (dup) {
-			lnet_free_text_bufs(&current_nets);
-			continue;
-		}
-
-		list_for_each_safe(t, t2, &current_nets) {
-			tb = list_entry(t, struct lnet_text_buf, ltb_list);
-
-			list_del(&tb->ltb_list);
-			list_add_tail(&tb->ltb_list, &matched_nets);
-
-			len += snprintf(networks + len, sizeof(networks) - len,
-					"%s%s", !len ? "" : ",",
-					tb->ltb_text);
-
-			if (len >= sizeof(networks)) {
-				CERROR("Too many matched networks\n");
-				rc = -E2BIG;
-				goto out;
-			}
-		}
-
-		count++;
-	}
-
- out:
-	lnet_free_text_bufs(&raw_entries);
-	lnet_free_text_bufs(&matched_nets);
-	lnet_free_text_bufs(&current_nets);
-	LASSERT(!lnet_tbnob);
-
-	if (rc < 0)
-		return rc;
-
-	*networksp = networks;
-	return count;
-}
-
-static int
-lnet_ipaddr_enumerate(__u32 **ipaddrsp)
-{
-	int up;
-	__u32 netmask;
-	__u32 *ipaddrs;
-	__u32 *ipaddrs2;
-	int nip;
-	char **ifnames;
-	int nif = lnet_ipif_enumerate(&ifnames);
-	int i;
-	int rc;
-
-	if (nif <= 0)
-		return nif;
-
-	ipaddrs = kcalloc(nif, sizeof(*ipaddrs), GFP_KERNEL);
-	if (!ipaddrs) {
-		CERROR("Can't allocate ipaddrs[%d]\n", nif);
-		lnet_ipif_free_enumeration(ifnames, nif);
-		return -ENOMEM;
-	}
-
-	for (i = nip = 0; i < nif; i++) {
-		if (!strcmp(ifnames[i], "lo"))
-			continue;
-
-		rc = lnet_ipif_query(ifnames[i], &up, &ipaddrs[nip], &netmask);
-		if (rc) {
-			CWARN("Can't query interface %s: %d\n",
-			      ifnames[i], rc);
-			continue;
-		}
-
-		if (!up) {
-			CWARN("Ignoring interface %s: it's down\n",
-			      ifnames[i]);
-			continue;
-		}
-
-		nip++;
-	}
-
-	lnet_ipif_free_enumeration(ifnames, nif);
-
-	if (nip == nif) {
-		*ipaddrsp = ipaddrs;
-	} else {
-		if (nip > 0) {
-			ipaddrs2 = kcalloc(nip, sizeof(*ipaddrs2),
-					   GFP_KERNEL);
-			if (!ipaddrs2) {
-				CERROR("Can't allocate ipaddrs[%d]\n", nip);
-				nip = -ENOMEM;
-			} else {
-				memcpy(ipaddrs2, ipaddrs,
-				       nip * sizeof(*ipaddrs));
-				*ipaddrsp = ipaddrs2;
-				rc = nip;
-			}
-		}
-		kfree(ipaddrs);
-	}
-	return nip;
-}
-
-int
-lnet_parse_ip2nets(char **networksp, char *ip2nets)
-{
-	__u32 *ipaddrs = NULL;
-	int nip = lnet_ipaddr_enumerate(&ipaddrs);
-	int rc;
-
-	if (nip < 0) {
-		LCONSOLE_ERROR_MSG(0x117,
-				   "Error %d enumerating local IP interfaces for ip2nets to match\n",
-				   nip);
-		return nip;
-	}
-
-	if (!nip) {
-		LCONSOLE_ERROR_MSG(0x118,
-				   "No local IP interfaces for ip2nets to match\n");
-		return -ENOENT;
-	}
-
-	rc = lnet_match_networks(networksp, ip2nets, ipaddrs, nip);
-	kfree(ipaddrs);
-
-	if (rc < 0) {
-		LCONSOLE_ERROR_MSG(0x119, "Error %d parsing ip2nets\n", rc);
-		return rc;
-	}
-
-	if (!rc) {
-		LCONSOLE_ERROR_MSG(0x11a,
-				   "ip2nets does not match any local IP interfaces\n");
-		return -ENOENT;
-	}
-
-	return 0;
-}

+ 0 - 426
drivers/staging/lustre/lnet/lnet/lib-eq.c

@@ -1,426 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/lnet/lib-eq.c
- *
- * Library level Event queue management routines
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include <linux/lnet/lib-lnet.h>
-
-/**
- * Create an event queue that has room for \a count number of events.
- *
- * The event queue is circular and older events will be overwritten by new
- * ones if they are not removed in time by the user using the functions
- * LNetEQGet(), LNetEQWait(), or LNetEQPoll(). It is up to the user to
- * determine the appropriate size of the event queue to prevent this loss
- * of events. Note that when EQ handler is specified in \a callback, no
- * event loss can happen, since the handler is run for each event deposited
- * into the EQ.
- *
- * \param count The number of events to be stored in the event queue. It
- * will be rounded up to the next power of two.
- * \param callback A handler function that runs when an event is deposited
- * into the EQ. The constant value LNET_EQ_HANDLER_NONE can be used to
- * indicate that no event handler is desired.
- * \param handle On successful return, this location will hold a handle for
- * the newly created EQ.
- *
- * \retval 0       On success.
- * \retval -EINVAL If an parameter is not valid.
- * \retval -ENOMEM If memory for the EQ can't be allocated.
- *
- * \see lnet_eq_handler_t for the discussion on EQ handler semantics.
- */
-int
-LNetEQAlloc(unsigned int count, lnet_eq_handler_t callback,
-	    struct lnet_handle_eq *handle)
-{
-	struct lnet_eq *eq;
-
-	LASSERT(the_lnet.ln_refcount > 0);
-
-	/*
-	 * We need count to be a power of 2 so that when eq_{enq,deq}_seq
-	 * overflow, they don't skip entries, so the queue has the same
-	 * apparent capacity at all times
-	 */
-	if (count)
-		count = roundup_pow_of_two(count);
-
-	if (callback != LNET_EQ_HANDLER_NONE && count)
-		CWARN("EQ callback is guaranteed to get every event, do you still want to set eqcount %d for polling event which will have locking overhead? Please contact with developer to confirm\n", count);
-
-	/*
-	 * count can be 0 if only need callback, we can eliminate
-	 * overhead of enqueue event
-	 */
-	if (!count && callback == LNET_EQ_HANDLER_NONE)
-		return -EINVAL;
-
-	eq = kzalloc(sizeof(*eq), GFP_NOFS);
-	if (!eq)
-		return -ENOMEM;
-
-	if (count) {
-		eq->eq_events = kvmalloc_array(count, sizeof(struct lnet_event),
-					       GFP_KERNEL | __GFP_ZERO);
-		if (!eq->eq_events)
-			goto failed;
-		/*
-		 * NB allocator has set all event sequence numbers to 0,
-		 * so all them should be earlier than eq_deq_seq
-		 */
-	}
-
-	eq->eq_deq_seq = 1;
-	eq->eq_enq_seq = 1;
-	eq->eq_size = count;
-	eq->eq_callback = callback;
-
-	eq->eq_refs = cfs_percpt_alloc(lnet_cpt_table(),
-				       sizeof(*eq->eq_refs[0]));
-	if (!eq->eq_refs)
-		goto failed;
-
-	/* MUST hold both exclusive lnet_res_lock */
-	lnet_res_lock(LNET_LOCK_EX);
-	/*
-	 * NB: hold lnet_eq_wait_lock for EQ link/unlink, so we can do
-	 * both EQ lookup and poll event with only lnet_eq_wait_lock
-	 */
-	lnet_eq_wait_lock();
-
-	lnet_res_lh_initialize(&the_lnet.ln_eq_container, &eq->eq_lh);
-	list_add(&eq->eq_list, &the_lnet.ln_eq_container.rec_active);
-
-	lnet_eq_wait_unlock();
-	lnet_res_unlock(LNET_LOCK_EX);
-
-	lnet_eq2handle(handle, eq);
-	return 0;
-
-failed:
-	kvfree(eq->eq_events);
-
-	if (eq->eq_refs)
-		cfs_percpt_free(eq->eq_refs);
-
-	kfree(eq);
-	return -ENOMEM;
-}
-EXPORT_SYMBOL(LNetEQAlloc);
-
-/**
- * Release the resources associated with an event queue if it's idle;
- * otherwise do nothing and it's up to the user to try again.
- *
- * \param eqh A handle for the event queue to be released.
- *
- * \retval 0 If the EQ is not in use and freed.
- * \retval -ENOENT If \a eqh does not point to a valid EQ.
- * \retval -EBUSY  If the EQ is still in use by some MDs.
- */
-int
-LNetEQFree(struct lnet_handle_eq eqh)
-{
-	struct lnet_eq *eq;
-	struct lnet_event *events = NULL;
-	int **refs = NULL;
-	int *ref;
-	int rc = 0;
-	int size = 0;
-	int i;
-
-	LASSERT(the_lnet.ln_refcount > 0);
-
-	lnet_res_lock(LNET_LOCK_EX);
-	/*
-	 * NB: hold lnet_eq_wait_lock for EQ link/unlink, so we can do
-	 * both EQ lookup and poll event with only lnet_eq_wait_lock
-	 */
-	lnet_eq_wait_lock();
-
-	eq = lnet_handle2eq(&eqh);
-	if (!eq) {
-		rc = -ENOENT;
-		goto out;
-	}
-
-	cfs_percpt_for_each(ref, i, eq->eq_refs) {
-		LASSERT(*ref >= 0);
-		if (!*ref)
-			continue;
-
-		CDEBUG(D_NET, "Event equeue (%d: %d) busy on destroy.\n",
-		       i, *ref);
-		rc = -EBUSY;
-		goto out;
-	}
-
-	/* stash for free after lock dropped */
-	events = eq->eq_events;
-	size = eq->eq_size;
-	refs = eq->eq_refs;
-
-	lnet_res_lh_invalidate(&eq->eq_lh);
-	list_del(&eq->eq_list);
-	kfree(eq);
- out:
-	lnet_eq_wait_unlock();
-	lnet_res_unlock(LNET_LOCK_EX);
-
-	kvfree(events);
-	if (refs)
-		cfs_percpt_free(refs);
-
-	return rc;
-}
-EXPORT_SYMBOL(LNetEQFree);
-
-void
-lnet_eq_enqueue_event(struct lnet_eq *eq, struct lnet_event *ev)
-{
-	/* MUST called with resource lock hold but w/o lnet_eq_wait_lock */
-	int index;
-
-	if (!eq->eq_size) {
-		LASSERT(eq->eq_callback != LNET_EQ_HANDLER_NONE);
-		eq->eq_callback(ev);
-		return;
-	}
-
-	lnet_eq_wait_lock();
-	ev->sequence = eq->eq_enq_seq++;
-
-	LASSERT(is_power_of_2(eq->eq_size));
-	index = ev->sequence & (eq->eq_size - 1);
-
-	eq->eq_events[index] = *ev;
-
-	if (eq->eq_callback != LNET_EQ_HANDLER_NONE)
-		eq->eq_callback(ev);
-
-	/* Wake anyone waiting in LNetEQPoll() */
-	if (waitqueue_active(&the_lnet.ln_eq_waitq))
-		wake_up_all(&the_lnet.ln_eq_waitq);
-	lnet_eq_wait_unlock();
-}
-
-static int
-lnet_eq_dequeue_event(struct lnet_eq *eq, struct lnet_event *ev)
-{
-	int new_index = eq->eq_deq_seq & (eq->eq_size - 1);
-	struct lnet_event *new_event = &eq->eq_events[new_index];
-	int rc;
-
-	/* must called with lnet_eq_wait_lock hold */
-	if (LNET_SEQ_GT(eq->eq_deq_seq, new_event->sequence))
-		return 0;
-
-	/* We've got a new event... */
-	*ev = *new_event;
-
-	CDEBUG(D_INFO, "event: %p, sequence: %lu, eq->size: %u\n",
-	       new_event, eq->eq_deq_seq, eq->eq_size);
-
-	/* ...but did it overwrite an event we've not seen yet? */
-	if (eq->eq_deq_seq == new_event->sequence) {
-		rc = 1;
-	} else {
-		/*
-		 * don't complain with CERROR: some EQs are sized small
-		 * anyway; if it's important, the caller should complain
-		 */
-		CDEBUG(D_NET, "Event Queue Overflow: eq seq %lu ev seq %lu\n",
-		       eq->eq_deq_seq, new_event->sequence);
-		rc = -EOVERFLOW;
-	}
-
-	eq->eq_deq_seq = new_event->sequence + 1;
-	return rc;
-}
-
-/**
- * A nonblocking function that can be used to get the next event in an EQ.
- * If an event handler is associated with the EQ, the handler will run before
- * this function returns successfully. The event is removed from the queue.
- *
- * \param eventq A handle for the event queue.
- * \param event On successful return (1 or -EOVERFLOW), this location will
- * hold the next event in the EQ.
- *
- * \retval 0	  No pending event in the EQ.
- * \retval 1	  Indicates success.
- * \retval -ENOENT    If \a eventq does not point to a valid EQ.
- * \retval -EOVERFLOW Indicates success (i.e., an event is returned) and that
- * at least one event between this event and the last event obtained from the
- * EQ has been dropped due to limited space in the EQ.
- */
-
-/**
- * Block the calling process until there is an event in the EQ.
- * If an event handler is associated with the EQ, the handler will run before
- * this function returns successfully. This function returns the next event
- * in the EQ and removes it from the EQ.
- *
- * \param eventq A handle for the event queue.
- * \param event On successful return (1 or -EOVERFLOW), this location will
- * hold the next event in the EQ.
- *
- * \retval 1	  Indicates success.
- * \retval -ENOENT    If \a eventq does not point to a valid EQ.
- * \retval -EOVERFLOW Indicates success (i.e., an event is returned) and that
- * at least one event between this event and the last event obtained from the
- * EQ has been dropped due to limited space in the EQ.
- */
-
-static int
-lnet_eq_wait_locked(int *timeout_ms, long state)
-__must_hold(&the_lnet.ln_eq_wait_lock)
-{
-	int tms = *timeout_ms;
-	int wait;
-	wait_queue_entry_t wl;
-	unsigned long now;
-
-	if (!tms)
-		return -ENXIO; /* don't want to wait and no new event */
-
-	init_waitqueue_entry(&wl, current);
-	set_current_state(state);
-	add_wait_queue(&the_lnet.ln_eq_waitq, &wl);
-
-	lnet_eq_wait_unlock();
-
-	if (tms < 0) {
-		schedule();
-	} else {
-		now = jiffies;
-		schedule_timeout(msecs_to_jiffies(tms));
-		tms -= jiffies_to_msecs(jiffies - now);
-		if (tms < 0) /* no more wait but may have new event */
-			tms = 0;
-	}
-
-	wait = tms; /* might need to call here again */
-	*timeout_ms = tms;
-
-	lnet_eq_wait_lock();
-	remove_wait_queue(&the_lnet.ln_eq_waitq, &wl);
-
-	return wait;
-}
-
-/**
- * Block the calling process until there's an event from a set of EQs or
- * timeout happens.
- *
- * If an event handler is associated with the EQ, the handler will run before
- * this function returns successfully, in which case the corresponding event
- * is consumed.
- *
- * LNetEQPoll() provides a timeout to allow applications to poll, block for a
- * fixed period, or block indefinitely.
- *
- * \param eventqs,neq An array of EQ handles, and size of the array.
- * \param timeout_ms Time in milliseconds to wait for an event to occur on
- * one of the EQs. The constant LNET_TIME_FOREVER can be used to indicate an
- * infinite timeout.
- * \param interruptible, if true, use TASK_INTERRUPTIBLE, else TASK_NOLOAD
- * \param event,which On successful return (1 or -EOVERFLOW), \a event will
- * hold the next event in the EQs, and \a which will contain the index of the
- * EQ from which the event was taken.
- *
- * \retval 0	  No pending event in the EQs after timeout.
- * \retval 1	  Indicates success.
- * \retval -EOVERFLOW Indicates success (i.e., an event is returned) and that
- * at least one event between this event and the last event obtained from the
- * EQ indicated by \a which has been dropped due to limited space in the EQ.
- * \retval -ENOENT    If there's an invalid handle in \a eventqs.
- */
-int
-LNetEQPoll(struct lnet_handle_eq *eventqs, int neq, int timeout_ms,
-	   int interruptible,
-	   struct lnet_event *event, int *which)
-{
-	int wait = 1;
-	int rc;
-	int i;
-
-	LASSERT(the_lnet.ln_refcount > 0);
-
-	if (neq < 1)
-		return -ENOENT;
-
-	lnet_eq_wait_lock();
-
-	for (;;) {
-		for (i = 0; i < neq; i++) {
-			struct lnet_eq *eq = lnet_handle2eq(&eventqs[i]);
-
-			if (!eq) {
-				lnet_eq_wait_unlock();
-				return -ENOENT;
-			}
-
-			rc = lnet_eq_dequeue_event(eq, event);
-			if (rc) {
-				lnet_eq_wait_unlock();
-				*which = i;
-				return rc;
-			}
-		}
-
-		if (!wait)
-			break;
-
-		/*
-		 * return value of lnet_eq_wait_locked:
-		 * -1 : did nothing and it's sure no new event
-		 *  1 : sleep inside and wait until new event
-		 *  0 : don't want to wait anymore, but might have new event
-		 *      so need to call dequeue again
-		 */
-		wait = lnet_eq_wait_locked(&timeout_ms,
-					   interruptible ? TASK_INTERRUPTIBLE
-					   : TASK_NOLOAD);
-		if (wait < 0) /* no new event */
-			break;
-	}
-
-	lnet_eq_wait_unlock();
-	return 0;
-}

+ 0 - 463
drivers/staging/lustre/lnet/lnet/lib-md.c

@@ -1,463 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/lnet/lib-md.c
- *
- * Memory Descriptor management routines
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include <linux/lnet/lib-lnet.h>
-
-/* must be called with lnet_res_lock held */
-void
-lnet_md_unlink(struct lnet_libmd *md)
-{
-	if (!(md->md_flags & LNET_MD_FLAG_ZOMBIE)) {
-		/* first unlink attempt... */
-		struct lnet_me *me = md->md_me;
-
-		md->md_flags |= LNET_MD_FLAG_ZOMBIE;
-
-		/*
-		 * Disassociate from ME (if any),
-		 * and unlink it if it was created
-		 * with LNET_UNLINK
-		 */
-		if (me) {
-			/* detach MD from portal */
-			lnet_ptl_detach_md(me, md);
-			if (me->me_unlink == LNET_UNLINK)
-				lnet_me_unlink(me);
-		}
-
-		/* ensure all future handle lookups fail */
-		lnet_res_lh_invalidate(&md->md_lh);
-	}
-
-	if (md->md_refcount) {
-		CDEBUG(D_NET, "Queueing unlink of md %p\n", md);
-		return;
-	}
-
-	CDEBUG(D_NET, "Unlinking md %p\n", md);
-
-	if (md->md_eq) {
-		int cpt = lnet_cpt_of_cookie(md->md_lh.lh_cookie);
-
-		LASSERT(*md->md_eq->eq_refs[cpt] > 0);
-		(*md->md_eq->eq_refs[cpt])--;
-	}
-
-	LASSERT(!list_empty(&md->md_list));
-	list_del_init(&md->md_list);
-	kfree(md);
-}
-
-static int
-lnet_md_build(struct lnet_libmd *lmd, struct lnet_md *umd, int unlink)
-{
-	int i;
-	unsigned int niov;
-	int total_length = 0;
-
-	lmd->md_me = NULL;
-	lmd->md_start = umd->start;
-	lmd->md_offset = 0;
-	lmd->md_max_size = umd->max_size;
-	lmd->md_options = umd->options;
-	lmd->md_user_ptr = umd->user_ptr;
-	lmd->md_eq = NULL;
-	lmd->md_threshold = umd->threshold;
-	lmd->md_refcount = 0;
-	lmd->md_flags = (unlink == LNET_UNLINK) ? LNET_MD_FLAG_AUTO_UNLINK : 0;
-
-	if (umd->options & LNET_MD_IOVEC) {
-		if (umd->options & LNET_MD_KIOV) /* Can't specify both */
-			return -EINVAL;
-
-		niov = umd->length;
-		lmd->md_niov = umd->length;
-		memcpy(lmd->md_iov.iov, umd->start,
-		       niov * sizeof(lmd->md_iov.iov[0]));
-
-		for (i = 0; i < (int)niov; i++) {
-			/* We take the base address on trust */
-			/* invalid length */
-			if (lmd->md_iov.iov[i].iov_len <= 0)
-				return -EINVAL;
-
-			total_length += lmd->md_iov.iov[i].iov_len;
-		}
-
-		lmd->md_length = total_length;
-
-		if ((umd->options & LNET_MD_MAX_SIZE) && /* use max size */
-		    (umd->max_size < 0 ||
-		     umd->max_size > total_length)) /* illegal max_size */
-			return -EINVAL;
-
-	} else if (umd->options & LNET_MD_KIOV) {
-		niov = umd->length;
-		lmd->md_niov = umd->length;
-		memcpy(lmd->md_iov.kiov, umd->start,
-		       niov * sizeof(lmd->md_iov.kiov[0]));
-
-		for (i = 0; i < (int)niov; i++) {
-			/* We take the page pointer on trust */
-			if (lmd->md_iov.kiov[i].bv_offset +
-			    lmd->md_iov.kiov[i].bv_len > PAGE_SIZE)
-				return -EINVAL; /* invalid length */
-
-			total_length += lmd->md_iov.kiov[i].bv_len;
-		}
-
-		lmd->md_length = total_length;
-
-		if ((umd->options & LNET_MD_MAX_SIZE) && /* max size used */
-		    (umd->max_size < 0 ||
-		     umd->max_size > total_length)) /* illegal max_size */
-			return -EINVAL;
-	} else {   /* contiguous */
-		lmd->md_length = umd->length;
-		niov = 1;
-		lmd->md_niov = 1;
-		lmd->md_iov.iov[0].iov_base = umd->start;
-		lmd->md_iov.iov[0].iov_len = umd->length;
-
-		if ((umd->options & LNET_MD_MAX_SIZE) && /* max size used */
-		    (umd->max_size < 0 ||
-		     umd->max_size > (int)umd->length)) /* illegal max_size */
-			return -EINVAL;
-	}
-
-	return 0;
-}
-
-/* must be called with resource lock held */
-static int
-lnet_md_link(struct lnet_libmd *md, struct lnet_handle_eq eq_handle, int cpt)
-{
-	struct lnet_res_container *container = the_lnet.ln_md_containers[cpt];
-
-	/*
-	 * NB we are passed an allocated, but inactive md.
-	 * if we return success, caller may lnet_md_unlink() it.
-	 * otherwise caller may only kfree() it.
-	 */
-	/*
-	 * This implementation doesn't know how to create START events or
-	 * disable END events.  Best to LASSERT our caller is compliant so
-	 * we find out quickly...
-	 */
-	/*
-	 * TODO - reevaluate what should be here in light of
-	 * the removal of the start and end events
-	 * maybe there we shouldn't even allow LNET_EQ_NONE!)
-	 * LASSERT(!eq);
-	 */
-	if (!LNetEQHandleIsInvalid(eq_handle)) {
-		md->md_eq = lnet_handle2eq(&eq_handle);
-
-		if (!md->md_eq)
-			return -ENOENT;
-
-		(*md->md_eq->eq_refs[cpt])++;
-	}
-
-	lnet_res_lh_initialize(container, &md->md_lh);
-
-	LASSERT(list_empty(&md->md_list));
-	list_add(&md->md_list, &container->rec_active);
-
-	return 0;
-}
-
-/* must be called with lnet_res_lock held */
-void
-lnet_md_deconstruct(struct lnet_libmd *lmd, struct lnet_md *umd)
-{
-	/* NB this doesn't copy out all the iov entries so when a
-	 * discontiguous MD is copied out, the target gets to know the
-	 * original iov pointer (in start) and the number of entries it had
-	 * and that's all.
-	 */
-	umd->start = lmd->md_start;
-	umd->length = !(lmd->md_options &
-		      (LNET_MD_IOVEC | LNET_MD_KIOV)) ?
-		      lmd->md_length : lmd->md_niov;
-	umd->threshold = lmd->md_threshold;
-	umd->max_size = lmd->md_max_size;
-	umd->options = lmd->md_options;
-	umd->user_ptr = lmd->md_user_ptr;
-	lnet_eq2handle(&umd->eq_handle, lmd->md_eq);
-}
-
-static int
-lnet_md_validate(struct lnet_md *umd)
-{
-	if (!umd->start && umd->length) {
-		CERROR("MD start pointer can not be NULL with length %u\n",
-		       umd->length);
-		return -EINVAL;
-	}
-
-	if ((umd->options & (LNET_MD_KIOV | LNET_MD_IOVEC)) &&
-	    umd->length > LNET_MAX_IOV) {
-		CERROR("Invalid option: too many fragments %u, %d max\n",
-		       umd->length, LNET_MAX_IOV);
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-/**
- * Create a memory descriptor and attach it to a ME
- *
- * \param meh A handle for a ME to associate the new MD with.
- * \param umd Provides initial values for the user-visible parts of a MD.
- * Other than its use for initialization, there is no linkage between this
- * structure and the MD maintained by the LNet.
- * \param unlink A flag to indicate whether the MD is automatically unlinked
- * when it becomes inactive, either because the operation threshold drops to
- * zero or because the available memory becomes less than \a umd.max_size.
- * (Note that the check for unlinking a MD only occurs after the completion
- * of a successful operation on the MD.) The value LNET_UNLINK enables auto
- * unlinking; the value LNET_RETAIN disables it.
- * \param handle On successful returns, a handle to the newly created MD is
- * saved here. This handle can be used later in LNetMDUnlink().
- *
- * \retval 0       On success.
- * \retval -EINVAL If \a umd is not valid.
- * \retval -ENOMEM If new MD cannot be allocated.
- * \retval -ENOENT Either \a meh or \a umd.eq_handle does not point to a
- * valid object. Note that it's OK to supply a NULL \a umd.eq_handle by
- * calling LNetInvalidateHandle() on it.
- * \retval -EBUSY  If the ME pointed to by \a meh is already associated with
- * a MD.
- */
-int
-LNetMDAttach(struct lnet_handle_me meh, struct lnet_md umd,
-	     enum lnet_unlink unlink, struct lnet_handle_md *handle)
-{
-	LIST_HEAD(matches);
-	LIST_HEAD(drops);
-	struct lnet_me *me;
-	struct lnet_libmd *md;
-	int cpt;
-	int rc;
-
-	LASSERT(the_lnet.ln_refcount > 0);
-
-	if (lnet_md_validate(&umd))
-		return -EINVAL;
-
-	if (!(umd.options & (LNET_MD_OP_GET | LNET_MD_OP_PUT))) {
-		CERROR("Invalid option: no MD_OP set\n");
-		return -EINVAL;
-	}
-
-	md = lnet_md_alloc(&umd);
-	if (!md)
-		return -ENOMEM;
-
-	rc = lnet_md_build(md, &umd, unlink);
-	if (rc)
-		goto out_free;
-
-	cpt = lnet_cpt_of_cookie(meh.cookie);
-
-	lnet_res_lock(cpt);
-
-	me = lnet_handle2me(&meh);
-	if (!me)
-		rc = -ENOENT;
-	else if (me->me_md)
-		rc = -EBUSY;
-	else
-		rc = lnet_md_link(md, umd.eq_handle, cpt);
-
-	if (rc)
-		goto out_unlock;
-
-	/*
-	 * attach this MD to portal of ME and check if it matches any
-	 * blocked msgs on this portal
-	 */
-	lnet_ptl_attach_md(me, md, &matches, &drops);
-
-	lnet_md2handle(handle, md);
-
-	lnet_res_unlock(cpt);
-
-	lnet_drop_delayed_msg_list(&drops, "Bad match");
-	lnet_recv_delayed_msg_list(&matches);
-
-	return 0;
-
-out_unlock:
-	lnet_res_unlock(cpt);
-out_free:
-	kfree(md);
-	return rc;
-}
-EXPORT_SYMBOL(LNetMDAttach);
-
-/**
- * Create a "free floating" memory descriptor - a MD that is not associated
- * with a ME. Such MDs are usually used in LNetPut() and LNetGet() operations.
- *
- * \param umd,unlink See the discussion for LNetMDAttach().
- * \param handle On successful returns, a handle to the newly created MD is
- * saved here. This handle can be used later in LNetMDUnlink(), LNetPut(),
- * and LNetGet() operations.
- *
- * \retval 0       On success.
- * \retval -EINVAL If \a umd is not valid.
- * \retval -ENOMEM If new MD cannot be allocated.
- * \retval -ENOENT \a umd.eq_handle does not point to a valid EQ. Note that
- * it's OK to supply a NULL \a umd.eq_handle by calling
- * LNetInvalidateHandle() on it.
- */
-int
-LNetMDBind(struct lnet_md umd, enum lnet_unlink unlink,
-	   struct lnet_handle_md *handle)
-{
-	struct lnet_libmd *md;
-	int cpt;
-	int rc;
-
-	LASSERT(the_lnet.ln_refcount > 0);
-
-	if (lnet_md_validate(&umd))
-		return -EINVAL;
-
-	if ((umd.options & (LNET_MD_OP_GET | LNET_MD_OP_PUT))) {
-		CERROR("Invalid option: GET|PUT illegal on active MDs\n");
-		return -EINVAL;
-	}
-
-	md = lnet_md_alloc(&umd);
-	if (!md)
-		return -ENOMEM;
-
-	rc = lnet_md_build(md, &umd, unlink);
-	if (rc)
-		goto out_free;
-
-	cpt = lnet_res_lock_current();
-
-	rc = lnet_md_link(md, umd.eq_handle, cpt);
-	if (rc)
-		goto out_unlock;
-
-	lnet_md2handle(handle, md);
-
-	lnet_res_unlock(cpt);
-	return 0;
-
-out_unlock:
-	lnet_res_unlock(cpt);
-out_free:
-	kfree(md);
-
-	return rc;
-}
-EXPORT_SYMBOL(LNetMDBind);
-
-/**
- * Unlink the memory descriptor from any ME it may be linked to and release
- * the internal resources associated with it. As a result, active messages
- * associated with the MD may get aborted.
- *
- * This function does not free the memory region associated with the MD;
- * i.e., the memory the user allocated for this MD. If the ME associated with
- * this MD is not NULL and was created with auto unlink enabled, the ME is
- * unlinked as well (see LNetMEAttach()).
- *
- * Explicitly unlinking a MD via this function call has the same behavior as
- * a MD that has been automatically unlinked, except that no LNET_EVENT_UNLINK
- * is generated in the latter case.
- *
- * An unlinked event can be reported in two ways:
- * - If there's no pending operations on the MD, it's unlinked immediately
- *   and an LNET_EVENT_UNLINK event is logged before this function returns.
- * - Otherwise, the MD is only marked for deletion when this function
- *   returns, and the unlinked event will be piggybacked on the event of
- *   the completion of the last operation by setting the unlinked field of
- *   the event. No dedicated LNET_EVENT_UNLINK event is generated.
- *
- * Note that in both cases the unlinked field of the event is always set; no
- * more event will happen on the MD after such an event is logged.
- *
- * \param mdh A handle for the MD to be unlinked.
- *
- * \retval 0       On success.
- * \retval -ENOENT If \a mdh does not point to a valid MD object.
- */
-int
-LNetMDUnlink(struct lnet_handle_md mdh)
-{
-	struct lnet_event ev;
-	struct lnet_libmd *md;
-	int cpt;
-
-	LASSERT(the_lnet.ln_refcount > 0);
-
-	cpt = lnet_cpt_of_cookie(mdh.cookie);
-	lnet_res_lock(cpt);
-
-	md = lnet_handle2md(&mdh);
-	if (!md) {
-		lnet_res_unlock(cpt);
-		return -ENOENT;
-	}
-
-	md->md_flags |= LNET_MD_FLAG_ABORTED;
-	/*
-	 * If the MD is busy, lnet_md_unlink just marks it for deletion, and
-	 * when the LND is done, the completion event flags that the MD was
-	 * unlinked.  Otherwise, we enqueue an event now...
-	 */
-	if (md->md_eq && !md->md_refcount) {
-		lnet_build_unlink_event(md, &ev);
-		lnet_eq_enqueue_event(md->md_eq, &ev);
-	}
-
-	lnet_md_unlink(md);
-
-	lnet_res_unlock(cpt);
-	return 0;
-}
-EXPORT_SYMBOL(LNetMDUnlink);

+ 0 - 274
drivers/staging/lustre/lnet/lnet/lib-me.c

@@ -1,274 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/lnet/lib-me.c
- *
- * Match Entry management routines
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include <linux/lnet/lib-lnet.h>
-
-/**
- * Create and attach a match entry to the match list of \a portal. The new
- * ME is empty, i.e. not associated with a memory descriptor. LNetMDAttach()
- * can be used to attach a MD to an empty ME.
- *
- * \param portal The portal table index where the ME should be attached.
- * \param match_id Specifies the match criteria for the process ID of
- * the requester. The constants LNET_PID_ANY and LNET_NID_ANY can be
- * used to wildcard either of the identifiers in the lnet_process_id
- * structure.
- * \param match_bits,ignore_bits Specify the match criteria to apply
- * to the match bits in the incoming request. The ignore bits are used
- * to mask out insignificant bits in the incoming match bits. The resulting
- * bits are then compared to the ME's match bits to determine if the
- * incoming request meets the match criteria.
- * \param unlink Indicates whether the ME should be unlinked when the memory
- * descriptor associated with it is unlinked (Note that the check for
- * unlinking a ME only occurs when the memory descriptor is unlinked.).
- * Valid values are LNET_RETAIN and LNET_UNLINK.
- * \param pos Indicates whether the new ME should be prepended or
- * appended to the match list. Allowed constants: LNET_INS_BEFORE,
- * LNET_INS_AFTER.
- * \param handle On successful returns, a handle to the newly created ME
- * object is saved here. This handle can be used later in LNetMEInsert(),
- * LNetMEUnlink(), or LNetMDAttach() functions.
- *
- * \retval 0       On success.
- * \retval -EINVAL If \a portal is invalid.
- * \retval -ENOMEM If new ME object cannot be allocated.
- */
-int
-LNetMEAttach(unsigned int portal,
-	     struct lnet_process_id match_id,
-	     __u64 match_bits, __u64 ignore_bits,
-	     enum lnet_unlink unlink, enum lnet_ins_pos pos,
-	     struct lnet_handle_me *handle)
-{
-	struct lnet_match_table *mtable;
-	struct lnet_me *me;
-	struct list_head *head;
-
-	LASSERT(the_lnet.ln_refcount > 0);
-
-	if ((int)portal >= the_lnet.ln_nportals)
-		return -EINVAL;
-
-	mtable = lnet_mt_of_attach(portal, match_id,
-				   match_bits, ignore_bits, pos);
-	if (!mtable) /* can't match portal type */
-		return -EPERM;
-
-	me = kzalloc(sizeof(*me), GFP_NOFS);
-	if (!me)
-		return -ENOMEM;
-
-	lnet_res_lock(mtable->mt_cpt);
-
-	me->me_portal = portal;
-	me->me_match_id = match_id;
-	me->me_match_bits = match_bits;
-	me->me_ignore_bits = ignore_bits;
-	me->me_unlink = unlink;
-	me->me_md = NULL;
-
-	lnet_res_lh_initialize(the_lnet.ln_me_containers[mtable->mt_cpt],
-			       &me->me_lh);
-	if (ignore_bits)
-		head = &mtable->mt_mhash[LNET_MT_HASH_IGNORE];
-	else
-		head = lnet_mt_match_head(mtable, match_id, match_bits);
-
-	me->me_pos = head - &mtable->mt_mhash[0];
-	if (pos == LNET_INS_AFTER || pos == LNET_INS_LOCAL)
-		list_add_tail(&me->me_list, head);
-	else
-		list_add(&me->me_list, head);
-
-	lnet_me2handle(handle, me);
-
-	lnet_res_unlock(mtable->mt_cpt);
-	return 0;
-}
-EXPORT_SYMBOL(LNetMEAttach);
-
-/**
- * Create and a match entry and insert it before or after the ME pointed to by
- * \a current_meh. The new ME is empty, i.e. not associated with a memory
- * descriptor. LNetMDAttach() can be used to attach a MD to an empty ME.
- *
- * This function is identical to LNetMEAttach() except for the position
- * where the new ME is inserted.
- *
- * \param current_meh A handle for a ME. The new ME will be inserted
- * immediately before or immediately after this ME.
- * \param match_id,match_bits,ignore_bits,unlink,pos,handle See the discussion
- * for LNetMEAttach().
- *
- * \retval 0       On success.
- * \retval -ENOMEM If new ME object cannot be allocated.
- * \retval -ENOENT If \a current_meh does not point to a valid match entry.
- */
-int
-LNetMEInsert(struct lnet_handle_me current_meh,
-	     struct lnet_process_id match_id,
-	     __u64 match_bits, __u64 ignore_bits,
-	     enum lnet_unlink unlink, enum lnet_ins_pos pos,
-	     struct lnet_handle_me *handle)
-{
-	struct lnet_me *current_me;
-	struct lnet_me *new_me;
-	struct lnet_portal *ptl;
-	int cpt;
-
-	LASSERT(the_lnet.ln_refcount > 0);
-
-	if (pos == LNET_INS_LOCAL)
-		return -EPERM;
-
-	new_me = kzalloc(sizeof(*new_me), GFP_NOFS);
-	if (!new_me)
-		return -ENOMEM;
-
-	cpt = lnet_cpt_of_cookie(current_meh.cookie);
-
-	lnet_res_lock(cpt);
-
-	current_me = lnet_handle2me(&current_meh);
-	if (!current_me) {
-		kfree(new_me);
-
-		lnet_res_unlock(cpt);
-		return -ENOENT;
-	}
-
-	LASSERT(current_me->me_portal < the_lnet.ln_nportals);
-
-	ptl = the_lnet.ln_portals[current_me->me_portal];
-	if (lnet_ptl_is_unique(ptl)) {
-		/* nosense to insertion on unique portal */
-		kfree(new_me);
-		lnet_res_unlock(cpt);
-		return -EPERM;
-	}
-
-	new_me->me_pos = current_me->me_pos;
-	new_me->me_portal = current_me->me_portal;
-	new_me->me_match_id = match_id;
-	new_me->me_match_bits = match_bits;
-	new_me->me_ignore_bits = ignore_bits;
-	new_me->me_unlink = unlink;
-	new_me->me_md = NULL;
-
-	lnet_res_lh_initialize(the_lnet.ln_me_containers[cpt], &new_me->me_lh);
-
-	if (pos == LNET_INS_AFTER)
-		list_add(&new_me->me_list, &current_me->me_list);
-	else
-		list_add_tail(&new_me->me_list, &current_me->me_list);
-
-	lnet_me2handle(handle, new_me);
-
-	lnet_res_unlock(cpt);
-
-	return 0;
-}
-EXPORT_SYMBOL(LNetMEInsert);
-
-/**
- * Unlink a match entry from its match list.
- *
- * This operation also releases any resources associated with the ME. If a
- * memory descriptor is attached to the ME, then it will be unlinked as well
- * and an unlink event will be generated. It is an error to use the ME handle
- * after calling LNetMEUnlink().
- *
- * \param meh A handle for the ME to be unlinked.
- *
- * \retval 0       On success.
- * \retval -ENOENT If \a meh does not point to a valid ME.
- * \see LNetMDUnlink() for the discussion on delivering unlink event.
- */
-int
-LNetMEUnlink(struct lnet_handle_me meh)
-{
-	struct lnet_me *me;
-	struct lnet_libmd *md;
-	struct lnet_event ev;
-	int cpt;
-
-	LASSERT(the_lnet.ln_refcount > 0);
-
-	cpt = lnet_cpt_of_cookie(meh.cookie);
-	lnet_res_lock(cpt);
-
-	me = lnet_handle2me(&meh);
-	if (!me) {
-		lnet_res_unlock(cpt);
-		return -ENOENT;
-	}
-
-	md = me->me_md;
-	if (md) {
-		md->md_flags |= LNET_MD_FLAG_ABORTED;
-		if (md->md_eq && !md->md_refcount) {
-			lnet_build_unlink_event(md, &ev);
-			lnet_eq_enqueue_event(md->md_eq, &ev);
-		}
-	}
-
-	lnet_me_unlink(me);
-
-	lnet_res_unlock(cpt);
-	return 0;
-}
-EXPORT_SYMBOL(LNetMEUnlink);
-
-/* call with lnet_res_lock please */
-void
-lnet_me_unlink(struct lnet_me *me)
-{
-	list_del(&me->me_list);
-
-	if (me->me_md) {
-		struct lnet_libmd *md = me->me_md;
-
-		/* detach MD from portal of this ME */
-		lnet_ptl_detach_md(me, md);
-		lnet_md_unlink(md);
-	}
-
-	lnet_res_lh_invalidate(&me->me_lh);
-	kfree(me);
-}

+ 0 - 2386
drivers/staging/lustre/lnet/lnet/lib-move.c

@@ -1,2386 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/lnet/lib-move.c
- *
- * Data movement routines
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include <linux/lnet/lib-lnet.h>
-#include <linux/nsproxy.h>
-#include <net/net_namespace.h>
-
-static int local_nid_dist_zero = 1;
-module_param(local_nid_dist_zero, int, 0444);
-MODULE_PARM_DESC(local_nid_dist_zero, "Reserved");
-
-int
-lnet_fail_nid(lnet_nid_t nid, unsigned int threshold)
-{
-	struct lnet_test_peer *tp;
-	struct lnet_test_peer *temp;
-	struct list_head *el;
-	struct list_head *next;
-	struct list_head cull;
-
-	/* NB: use lnet_net_lock(0) to serialize operations on test peers */
-	if (threshold) {
-		/* Adding a new entry */
-		tp = kzalloc(sizeof(*tp), GFP_NOFS);
-		if (!tp)
-			return -ENOMEM;
-
-		tp->tp_nid = nid;
-		tp->tp_threshold = threshold;
-
-		lnet_net_lock(0);
-		list_add_tail(&tp->tp_list, &the_lnet.ln_test_peers);
-		lnet_net_unlock(0);
-		return 0;
-	}
-
-	/* removing entries */
-	INIT_LIST_HEAD(&cull);
-
-	lnet_net_lock(0);
-
-	list_for_each_safe(el, next, &the_lnet.ln_test_peers) {
-		tp = list_entry(el, struct lnet_test_peer, tp_list);
-
-		if (!tp->tp_threshold ||    /* needs culling anyway */
-		    nid == LNET_NID_ANY ||       /* removing all entries */
-		    tp->tp_nid == nid) {	  /* matched this one */
-			list_del(&tp->tp_list);
-			list_add(&tp->tp_list, &cull);
-		}
-	}
-
-	lnet_net_unlock(0);
-
-	list_for_each_entry_safe(tp, temp, &cull, tp_list) {
-		list_del(&tp->tp_list);
-		kfree(tp);
-	}
-	return 0;
-}
-
-static int
-fail_peer(lnet_nid_t nid, int outgoing)
-{
-	struct lnet_test_peer *tp;
-	struct lnet_test_peer *temp;
-	struct list_head *el;
-	struct list_head *next;
-	struct list_head cull;
-	int fail = 0;
-
-	INIT_LIST_HEAD(&cull);
-
-	/* NB: use lnet_net_lock(0) to serialize operations on test peers */
-	lnet_net_lock(0);
-
-	list_for_each_safe(el, next, &the_lnet.ln_test_peers) {
-		tp = list_entry(el, struct lnet_test_peer, tp_list);
-
-		if (!tp->tp_threshold) {
-			/* zombie entry */
-			if (outgoing) {
-				/*
-				 * only cull zombies on outgoing tests,
-				 * since we may be at interrupt priority on
-				 * incoming messages.
-				 */
-				list_del(&tp->tp_list);
-				list_add(&tp->tp_list, &cull);
-			}
-			continue;
-		}
-
-		if (tp->tp_nid == LNET_NID_ANY || /* fail every peer */
-		    nid == tp->tp_nid) {	/* fail this peer */
-			fail = 1;
-
-			if (tp->tp_threshold != LNET_MD_THRESH_INF) {
-				tp->tp_threshold--;
-				if (outgoing &&
-				    !tp->tp_threshold) {
-					/* see above */
-					list_del(&tp->tp_list);
-					list_add(&tp->tp_list, &cull);
-				}
-			}
-			break;
-		}
-	}
-
-	lnet_net_unlock(0);
-
-	list_for_each_entry_safe(tp, temp, &cull, tp_list) {
-		list_del(&tp->tp_list);
-
-		kfree(tp);
-	}
-
-	return fail;
-}
-
-unsigned int
-lnet_iov_nob(unsigned int niov, struct kvec *iov)
-{
-	unsigned int nob = 0;
-
-	LASSERT(!niov || iov);
-	while (niov-- > 0)
-		nob += (iov++)->iov_len;
-
-	return nob;
-}
-EXPORT_SYMBOL(lnet_iov_nob);
-
-void
-lnet_copy_iov2iter(struct iov_iter *to,
-		   unsigned int nsiov, const struct kvec *siov,
-		   unsigned int soffset, unsigned int nob)
-{
-	/* NB diov, siov are READ-ONLY */
-	const char *s;
-	size_t left;
-
-	if (!nob)
-		return;
-
-	/* skip complete frags before 'soffset' */
-	LASSERT(nsiov > 0);
-	while (soffset >= siov->iov_len) {
-		soffset -= siov->iov_len;
-		siov++;
-		nsiov--;
-		LASSERT(nsiov > 0);
-	}
-
-	s = (char *)siov->iov_base + soffset;
-	left = siov->iov_len - soffset;
-	do {
-		size_t n, copy = left;
-
-		LASSERT(nsiov > 0);
-
-		if (copy > nob)
-			copy = nob;
-		n = copy_to_iter(s, copy, to);
-		if (n != copy)
-			return;
-		nob -= n;
-
-		siov++;
-		s = (char *)siov->iov_base;
-		left = siov->iov_len;
-		nsiov--;
-	} while (nob > 0);
-}
-EXPORT_SYMBOL(lnet_copy_iov2iter);
-
-void
-lnet_copy_kiov2iter(struct iov_iter *to,
-		    unsigned int nsiov, const struct bio_vec *siov,
-		    unsigned int soffset, unsigned int nob)
-{
-	if (!nob)
-		return;
-
-	LASSERT(!in_interrupt());
-
-	LASSERT(nsiov > 0);
-	while (soffset >= siov->bv_len) {
-		soffset -= siov->bv_len;
-		siov++;
-		nsiov--;
-		LASSERT(nsiov > 0);
-	}
-
-	do {
-		size_t copy = siov->bv_len - soffset, n;
-
-		LASSERT(nsiov > 0);
-
-		if (copy > nob)
-			copy = nob;
-		n = copy_page_to_iter(siov->bv_page,
-				      siov->bv_offset + soffset,
-				      copy, to);
-		if (n != copy)
-			return;
-		nob -= n;
-		siov++;
-		nsiov--;
-		soffset = 0;
-	} while (nob > 0);
-}
-EXPORT_SYMBOL(lnet_copy_kiov2iter);
-
-int
-lnet_extract_iov(int dst_niov, struct kvec *dst,
-		 int src_niov, const struct kvec *src,
-		 unsigned int offset, unsigned int len)
-{
-	/*
-	 * Initialise 'dst' to the subset of 'src' starting at 'offset',
-	 * for exactly 'len' bytes, and return the number of entries.
-	 * NB not destructive to 'src'
-	 */
-	unsigned int frag_len;
-	unsigned int niov;
-
-	if (!len)			   /* no data => */
-		return 0;		     /* no frags */
-
-	LASSERT(src_niov > 0);
-	while (offset >= src->iov_len) {      /* skip initial frags */
-		offset -= src->iov_len;
-		src_niov--;
-		src++;
-		LASSERT(src_niov > 0);
-	}
-
-	niov = 1;
-	for (;;) {
-		LASSERT(src_niov > 0);
-		LASSERT((int)niov <= dst_niov);
-
-		frag_len = src->iov_len - offset;
-		dst->iov_base = ((char *)src->iov_base) + offset;
-
-		if (len <= frag_len) {
-			dst->iov_len = len;
-			return niov;
-		}
-
-		dst->iov_len = frag_len;
-
-		len -= frag_len;
-		dst++;
-		src++;
-		niov++;
-		src_niov--;
-		offset = 0;
-	}
-}
-EXPORT_SYMBOL(lnet_extract_iov);
-
-unsigned int
-lnet_kiov_nob(unsigned int niov, struct bio_vec *kiov)
-{
-	unsigned int nob = 0;
-
-	LASSERT(!niov || kiov);
-	while (niov-- > 0)
-		nob += (kiov++)->bv_len;
-
-	return nob;
-}
-EXPORT_SYMBOL(lnet_kiov_nob);
-
-int
-lnet_extract_kiov(int dst_niov, struct bio_vec *dst,
-		  int src_niov, const struct bio_vec *src,
-		  unsigned int offset, unsigned int len)
-{
-	/*
-	 * Initialise 'dst' to the subset of 'src' starting at 'offset',
-	 * for exactly 'len' bytes, and return the number of entries.
-	 * NB not destructive to 'src'
-	 */
-	unsigned int frag_len;
-	unsigned int niov;
-
-	if (!len)			   /* no data => */
-		return 0;		     /* no frags */
-
-	LASSERT(src_niov > 0);
-	while (offset >= src->bv_len) {      /* skip initial frags */
-		offset -= src->bv_len;
-		src_niov--;
-		src++;
-		LASSERT(src_niov > 0);
-	}
-
-	niov = 1;
-	for (;;) {
-		LASSERT(src_niov > 0);
-		LASSERT((int)niov <= dst_niov);
-
-		frag_len = src->bv_len - offset;
-		dst->bv_page = src->bv_page;
-		dst->bv_offset = src->bv_offset + offset;
-
-		if (len <= frag_len) {
-			dst->bv_len = len;
-			LASSERT(dst->bv_offset + dst->bv_len
-					<= PAGE_SIZE);
-			return niov;
-		}
-
-		dst->bv_len = frag_len;
-		LASSERT(dst->bv_offset + dst->bv_len <= PAGE_SIZE);
-
-		len -= frag_len;
-		dst++;
-		src++;
-		niov++;
-		src_niov--;
-		offset = 0;
-	}
-}
-EXPORT_SYMBOL(lnet_extract_kiov);
-
-void
-lnet_ni_recv(struct lnet_ni *ni, void *private, struct lnet_msg *msg,
-	     int delayed, unsigned int offset, unsigned int mlen,
-	     unsigned int rlen)
-{
-	unsigned int niov = 0;
-	struct kvec *iov = NULL;
-	struct bio_vec *kiov = NULL;
-	struct iov_iter to;
-	int rc;
-
-	LASSERT(!in_interrupt());
-	LASSERT(!mlen || msg);
-
-	if (msg) {
-		LASSERT(msg->msg_receiving);
-		LASSERT(!msg->msg_sending);
-		LASSERT(rlen == msg->msg_len);
-		LASSERT(mlen <= msg->msg_len);
-		LASSERT(msg->msg_offset == offset);
-		LASSERT(msg->msg_wanted == mlen);
-
-		msg->msg_receiving = 0;
-
-		if (mlen) {
-			niov = msg->msg_niov;
-			iov  = msg->msg_iov;
-			kiov = msg->msg_kiov;
-
-			LASSERT(niov > 0);
-			LASSERT(!iov != !kiov);
-		}
-	}
-
-	if (iov) {
-		iov_iter_kvec(&to, ITER_KVEC | READ, iov, niov, mlen + offset);
-		iov_iter_advance(&to, offset);
-	} else {
-		iov_iter_bvec(&to, ITER_BVEC | READ, kiov, niov, mlen + offset);
-		iov_iter_advance(&to, offset);
-	}
-	rc = ni->ni_lnd->lnd_recv(ni, private, msg, delayed, &to, rlen);
-	if (rc < 0)
-		lnet_finalize(ni, msg, rc);
-}
-
-static void
-lnet_setpayloadbuffer(struct lnet_msg *msg)
-{
-	struct lnet_libmd *md = msg->msg_md;
-
-	LASSERT(msg->msg_len > 0);
-	LASSERT(!msg->msg_routing);
-	LASSERT(md);
-	LASSERT(!msg->msg_niov);
-	LASSERT(!msg->msg_iov);
-	LASSERT(!msg->msg_kiov);
-
-	msg->msg_niov = md->md_niov;
-	if (md->md_options & LNET_MD_KIOV)
-		msg->msg_kiov = md->md_iov.kiov;
-	else
-		msg->msg_iov = md->md_iov.iov;
-}
-
-void
-lnet_prep_send(struct lnet_msg *msg, int type, struct lnet_process_id target,
-	       unsigned int offset, unsigned int len)
-{
-	msg->msg_type = type;
-	msg->msg_target = target;
-	msg->msg_len = len;
-	msg->msg_offset = offset;
-
-	if (len)
-		lnet_setpayloadbuffer(msg);
-
-	memset(&msg->msg_hdr, 0, sizeof(msg->msg_hdr));
-	msg->msg_hdr.type	   = cpu_to_le32(type);
-	msg->msg_hdr.dest_nid       = cpu_to_le64(target.nid);
-	msg->msg_hdr.dest_pid       = cpu_to_le32(target.pid);
-	/* src_nid will be set later */
-	msg->msg_hdr.src_pid	= cpu_to_le32(the_lnet.ln_pid);
-	msg->msg_hdr.payload_length = cpu_to_le32(len);
-}
-
-static void
-lnet_ni_send(struct lnet_ni *ni, struct lnet_msg *msg)
-{
-	void *priv = msg->msg_private;
-	int rc;
-
-	LASSERT(!in_interrupt());
-	LASSERT(LNET_NETTYP(LNET_NIDNET(ni->ni_nid)) == LOLND ||
-		(msg->msg_txcredit && msg->msg_peertxcredit));
-
-	rc = ni->ni_lnd->lnd_send(ni, priv, msg);
-	if (rc < 0)
-		lnet_finalize(ni, msg, rc);
-}
-
-static int
-lnet_ni_eager_recv(struct lnet_ni *ni, struct lnet_msg *msg)
-{
-	int rc;
-
-	LASSERT(!msg->msg_sending);
-	LASSERT(msg->msg_receiving);
-	LASSERT(!msg->msg_rx_ready_delay);
-	LASSERT(ni->ni_lnd->lnd_eager_recv);
-
-	msg->msg_rx_ready_delay = 1;
-	rc = ni->ni_lnd->lnd_eager_recv(ni, msg->msg_private, msg,
-					&msg->msg_private);
-	if (rc) {
-		CERROR("recv from %s / send to %s aborted: eager_recv failed %d\n",
-		       libcfs_nid2str(msg->msg_rxpeer->lp_nid),
-		       libcfs_id2str(msg->msg_target), rc);
-		LASSERT(rc < 0); /* required by my callers */
-	}
-
-	return rc;
-}
-
-/* NB: caller shall hold a ref on 'lp' as I'd drop lnet_net_lock */
-static void
-lnet_ni_query_locked(struct lnet_ni *ni, struct lnet_peer *lp)
-{
-	unsigned long last_alive = 0;
-
-	LASSERT(lnet_peer_aliveness_enabled(lp));
-	LASSERT(ni->ni_lnd->lnd_query);
-
-	lnet_net_unlock(lp->lp_cpt);
-	ni->ni_lnd->lnd_query(ni, lp->lp_nid, &last_alive);
-	lnet_net_lock(lp->lp_cpt);
-
-	lp->lp_last_query = jiffies;
-
-	if (last_alive) /* NI has updated timestamp */
-		lp->lp_last_alive = last_alive;
-}
-
-/* NB: always called with lnet_net_lock held */
-static inline int
-lnet_peer_is_alive(struct lnet_peer *lp, unsigned long now)
-{
-	int alive;
-	unsigned long deadline;
-
-	LASSERT(lnet_peer_aliveness_enabled(lp));
-
-	/* Trust lnet_notify() if it has more recent aliveness news, but
-	 * ignore the initial assumed death (see lnet_peers_start_down()).
-	 */
-	if (!lp->lp_alive && lp->lp_alive_count > 0 &&
-	    time_after_eq(lp->lp_timestamp, lp->lp_last_alive))
-		return 0;
-
-	deadline = lp->lp_last_alive + lp->lp_ni->ni_peertimeout * HZ;
-	alive = time_after(deadline, now);
-
-	/* Update obsolete lp_alive except for routers assumed to be dead
-	 * initially, because router checker would update aliveness in this
-	 * case, and moreover lp_last_alive at peer creation is assumed.
-	 */
-	if (alive && !lp->lp_alive &&
-	    !(lnet_isrouter(lp) && !lp->lp_alive_count))
-		lnet_notify_locked(lp, 0, 1, lp->lp_last_alive);
-
-	return alive;
-}
-
-/*
- * NB: returns 1 when alive, 0 when dead, negative when error;
- *     may drop the lnet_net_lock
- */
-static int
-lnet_peer_alive_locked(struct lnet_peer *lp)
-{
-	unsigned long now = jiffies;
-
-	if (!lnet_peer_aliveness_enabled(lp))
-		return -ENODEV;
-
-	if (lnet_peer_is_alive(lp, now))
-		return 1;
-
-	/*
-	 * Peer appears dead, but we should avoid frequent NI queries (at
-	 * most once per lnet_queryinterval seconds).
-	 */
-	if (lp->lp_last_query) {
-		static const int lnet_queryinterval = 1;
-
-		unsigned long next_query =
-			   lp->lp_last_query + lnet_queryinterval * HZ;
-
-		if (time_before(now, next_query)) {
-			if (lp->lp_alive)
-				CWARN("Unexpected aliveness of peer %s: %d < %d (%d/%d)\n",
-				      libcfs_nid2str(lp->lp_nid),
-				      (int)now, (int)next_query,
-				      lnet_queryinterval,
-				      lp->lp_ni->ni_peertimeout);
-			return 0;
-		}
-	}
-
-	/* query NI for latest aliveness news */
-	lnet_ni_query_locked(lp->lp_ni, lp);
-
-	if (lnet_peer_is_alive(lp, now))
-		return 1;
-
-	lnet_notify_locked(lp, 0, 0, lp->lp_last_alive);
-	return 0;
-}
-
-/**
- * \param msg The message to be sent.
- * \param do_send True if lnet_ni_send() should be called in this function.
- *	  lnet_send() is going to lnet_net_unlock immediately after this, so
- *	  it sets do_send FALSE and I don't do the unlock/send/lock bit.
- *
- * \retval LNET_CREDIT_OK If \a msg sent or OK to send.
- * \retval LNET_CREDIT_WAIT If \a msg blocked for credit.
- * \retval -EHOSTUNREACH If the next hop of the message appears dead.
- * \retval -ECANCELED If the MD of the message has been unlinked.
- */
-static int
-lnet_post_send_locked(struct lnet_msg *msg, int do_send)
-{
-	struct lnet_peer *lp = msg->msg_txpeer;
-	struct lnet_ni *ni = lp->lp_ni;
-	int cpt = msg->msg_tx_cpt;
-	struct lnet_tx_queue *tq = ni->ni_tx_queues[cpt];
-
-	/* non-lnet_send() callers have checked before */
-	LASSERT(!do_send || msg->msg_tx_delayed);
-	LASSERT(!msg->msg_receiving);
-	LASSERT(msg->msg_tx_committed);
-
-	/* NB 'lp' is always the next hop */
-	if (!(msg->msg_target.pid & LNET_PID_USERFLAG) &&
-	    !lnet_peer_alive_locked(lp)) {
-		the_lnet.ln_counters[cpt]->drop_count++;
-		the_lnet.ln_counters[cpt]->drop_length += msg->msg_len;
-		lnet_net_unlock(cpt);
-
-		CNETERR("Dropping message for %s: peer not alive\n",
-			libcfs_id2str(msg->msg_target));
-		if (do_send)
-			lnet_finalize(ni, msg, -EHOSTUNREACH);
-
-		lnet_net_lock(cpt);
-		return -EHOSTUNREACH;
-	}
-
-	if (msg->msg_md &&
-	    (msg->msg_md->md_flags & LNET_MD_FLAG_ABORTED)) {
-		lnet_net_unlock(cpt);
-
-		CNETERR("Aborting message for %s: LNetM[DE]Unlink() already called on the MD/ME.\n",
-			libcfs_id2str(msg->msg_target));
-		if (do_send)
-			lnet_finalize(ni, msg, -ECANCELED);
-
-		lnet_net_lock(cpt);
-		return -ECANCELED;
-	}
-
-	if (!msg->msg_peertxcredit) {
-		LASSERT((lp->lp_txcredits < 0) ==
-			!list_empty(&lp->lp_txq));
-
-		msg->msg_peertxcredit = 1;
-		lp->lp_txqnob += msg->msg_len + sizeof(struct lnet_hdr);
-		lp->lp_txcredits--;
-
-		if (lp->lp_txcredits < lp->lp_mintxcredits)
-			lp->lp_mintxcredits = lp->lp_txcredits;
-
-		if (lp->lp_txcredits < 0) {
-			msg->msg_tx_delayed = 1;
-			list_add_tail(&msg->msg_list, &lp->lp_txq);
-			return LNET_CREDIT_WAIT;
-		}
-	}
-
-	if (!msg->msg_txcredit) {
-		LASSERT((tq->tq_credits < 0) ==
-			!list_empty(&tq->tq_delayed));
-
-		msg->msg_txcredit = 1;
-		tq->tq_credits--;
-
-		if (tq->tq_credits < tq->tq_credits_min)
-			tq->tq_credits_min = tq->tq_credits;
-
-		if (tq->tq_credits < 0) {
-			msg->msg_tx_delayed = 1;
-			list_add_tail(&msg->msg_list, &tq->tq_delayed);
-			return LNET_CREDIT_WAIT;
-		}
-	}
-
-	if (do_send) {
-		lnet_net_unlock(cpt);
-		lnet_ni_send(ni, msg);
-		lnet_net_lock(cpt);
-	}
-	return LNET_CREDIT_OK;
-}
-
-static struct lnet_rtrbufpool *
-lnet_msg2bufpool(struct lnet_msg *msg)
-{
-	struct lnet_rtrbufpool *rbp;
-	int cpt;
-
-	LASSERT(msg->msg_rx_committed);
-
-	cpt = msg->msg_rx_cpt;
-	rbp = &the_lnet.ln_rtrpools[cpt][0];
-
-	LASSERT(msg->msg_len <= LNET_MTU);
-	while (msg->msg_len > (unsigned int)rbp->rbp_npages * PAGE_SIZE) {
-		rbp++;
-		LASSERT(rbp < &the_lnet.ln_rtrpools[cpt][LNET_NRBPOOLS]);
-	}
-
-	return rbp;
-}
-
-static int
-lnet_post_routed_recv_locked(struct lnet_msg *msg, int do_recv)
-{
-	/*
-	 * lnet_parse is going to lnet_net_unlock immediately after this, so it
-	 * sets do_recv FALSE and I don't do the unlock/send/lock bit.
-	 * I return LNET_CREDIT_WAIT if msg blocked and LNET_CREDIT_OK if
-	 * received or OK to receive
-	 */
-	struct lnet_peer *lp = msg->msg_rxpeer;
-	struct lnet_rtrbufpool *rbp;
-	struct lnet_rtrbuf *rb;
-
-	LASSERT(!msg->msg_iov);
-	LASSERT(!msg->msg_kiov);
-	LASSERT(!msg->msg_niov);
-	LASSERT(msg->msg_routing);
-	LASSERT(msg->msg_receiving);
-	LASSERT(!msg->msg_sending);
-
-	/* non-lnet_parse callers only receive delayed messages */
-	LASSERT(!do_recv || msg->msg_rx_delayed);
-
-	if (!msg->msg_peerrtrcredit) {
-		LASSERT((lp->lp_rtrcredits < 0) ==
-			!list_empty(&lp->lp_rtrq));
-
-		msg->msg_peerrtrcredit = 1;
-		lp->lp_rtrcredits--;
-		if (lp->lp_rtrcredits < lp->lp_minrtrcredits)
-			lp->lp_minrtrcredits = lp->lp_rtrcredits;
-
-		if (lp->lp_rtrcredits < 0) {
-			/* must have checked eager_recv before here */
-			LASSERT(msg->msg_rx_ready_delay);
-			msg->msg_rx_delayed = 1;
-			list_add_tail(&msg->msg_list, &lp->lp_rtrq);
-			return LNET_CREDIT_WAIT;
-		}
-	}
-
-	rbp = lnet_msg2bufpool(msg);
-
-	if (!msg->msg_rtrcredit) {
-		msg->msg_rtrcredit = 1;
-		rbp->rbp_credits--;
-		if (rbp->rbp_credits < rbp->rbp_mincredits)
-			rbp->rbp_mincredits = rbp->rbp_credits;
-
-		if (rbp->rbp_credits < 0) {
-			/* must have checked eager_recv before here */
-			LASSERT(msg->msg_rx_ready_delay);
-			msg->msg_rx_delayed = 1;
-			list_add_tail(&msg->msg_list, &rbp->rbp_msgs);
-			return LNET_CREDIT_WAIT;
-		}
-	}
-
-	LASSERT(!list_empty(&rbp->rbp_bufs));
-	rb = list_entry(rbp->rbp_bufs.next, struct lnet_rtrbuf, rb_list);
-	list_del(&rb->rb_list);
-
-	msg->msg_niov = rbp->rbp_npages;
-	msg->msg_kiov = &rb->rb_kiov[0];
-
-	if (do_recv) {
-		int cpt = msg->msg_rx_cpt;
-
-		lnet_net_unlock(cpt);
-		lnet_ni_recv(lp->lp_ni, msg->msg_private, msg, 1,
-			     0, msg->msg_len, msg->msg_len);
-		lnet_net_lock(cpt);
-	}
-	return LNET_CREDIT_OK;
-}
-
-void
-lnet_return_tx_credits_locked(struct lnet_msg *msg)
-{
-	struct lnet_peer *txpeer = msg->msg_txpeer;
-	struct lnet_msg *msg2;
-
-	if (msg->msg_txcredit) {
-		struct lnet_ni *ni = txpeer->lp_ni;
-		struct lnet_tx_queue *tq = ni->ni_tx_queues[msg->msg_tx_cpt];
-
-		/* give back NI txcredits */
-		msg->msg_txcredit = 0;
-
-		LASSERT((tq->tq_credits < 0) ==
-			!list_empty(&tq->tq_delayed));
-
-		tq->tq_credits++;
-		if (tq->tq_credits <= 0) {
-			msg2 = list_entry(tq->tq_delayed.next,
-					  struct lnet_msg, msg_list);
-			list_del(&msg2->msg_list);
-
-			LASSERT(msg2->msg_txpeer->lp_ni == ni);
-			LASSERT(msg2->msg_tx_delayed);
-
-			(void)lnet_post_send_locked(msg2, 1);
-		}
-	}
-
-	if (msg->msg_peertxcredit) {
-		/* give back peer txcredits */
-		msg->msg_peertxcredit = 0;
-
-		LASSERT((txpeer->lp_txcredits < 0) ==
-			!list_empty(&txpeer->lp_txq));
-
-		txpeer->lp_txqnob -= msg->msg_len + sizeof(struct lnet_hdr);
-		LASSERT(txpeer->lp_txqnob >= 0);
-
-		txpeer->lp_txcredits++;
-		if (txpeer->lp_txcredits <= 0) {
-			msg2 = list_entry(txpeer->lp_txq.next,
-					  struct lnet_msg, msg_list);
-			list_del(&msg2->msg_list);
-
-			LASSERT(msg2->msg_txpeer == txpeer);
-			LASSERT(msg2->msg_tx_delayed);
-
-			(void)lnet_post_send_locked(msg2, 1);
-		}
-	}
-
-	if (txpeer) {
-		msg->msg_txpeer = NULL;
-		lnet_peer_decref_locked(txpeer);
-	}
-}
-
-void
-lnet_schedule_blocked_locked(struct lnet_rtrbufpool *rbp)
-{
-	struct lnet_msg *msg;
-
-	if (list_empty(&rbp->rbp_msgs))
-		return;
-	msg = list_entry(rbp->rbp_msgs.next,
-			 struct lnet_msg, msg_list);
-	list_del(&msg->msg_list);
-
-	(void)lnet_post_routed_recv_locked(msg, 1);
-}
-
-void
-lnet_drop_routed_msgs_locked(struct list_head *list, int cpt)
-{
-	struct list_head drop;
-	struct lnet_msg *msg;
-	struct lnet_msg *tmp;
-
-	INIT_LIST_HEAD(&drop);
-
-	list_splice_init(list, &drop);
-
-	lnet_net_unlock(cpt);
-
-	list_for_each_entry_safe(msg, tmp, &drop, msg_list) {
-		lnet_ni_recv(msg->msg_rxpeer->lp_ni, msg->msg_private, NULL,
-			     0, 0, 0, msg->msg_hdr.payload_length);
-		list_del_init(&msg->msg_list);
-		lnet_finalize(NULL, msg, -ECANCELED);
-	}
-
-	lnet_net_lock(cpt);
-}
-
-void
-lnet_return_rx_credits_locked(struct lnet_msg *msg)
-{
-	struct lnet_peer *rxpeer = msg->msg_rxpeer;
-	struct lnet_msg *msg2;
-
-	if (msg->msg_rtrcredit) {
-		/* give back global router credits */
-		struct lnet_rtrbuf *rb;
-		struct lnet_rtrbufpool *rbp;
-
-		/*
-		 * NB If a msg ever blocks for a buffer in rbp_msgs, it stays
-		 * there until it gets one allocated, or aborts the wait
-		 * itself
-		 */
-		LASSERT(msg->msg_kiov);
-
-		rb = container_of(msg->msg_kiov, struct lnet_rtrbuf, rb_kiov[0]);
-		rbp = rb->rb_pool;
-
-		msg->msg_kiov = NULL;
-		msg->msg_rtrcredit = 0;
-
-		LASSERT(rbp == lnet_msg2bufpool(msg));
-
-		LASSERT((rbp->rbp_credits > 0) ==
-			!list_empty(&rbp->rbp_bufs));
-
-		/*
-		 * If routing is now turned off, we just drop this buffer and
-		 * don't bother trying to return credits.
-		 */
-		if (!the_lnet.ln_routing) {
-			lnet_destroy_rtrbuf(rb, rbp->rbp_npages);
-			goto routing_off;
-		}
-
-		/*
-		 * It is possible that a user has lowered the desired number of
-		 * buffers in this pool.  Make sure we never put back
-		 * more buffers than the stated number.
-		 */
-		if (unlikely(rbp->rbp_credits >= rbp->rbp_req_nbuffers)) {
-			/* Discard this buffer so we don't have too many. */
-			lnet_destroy_rtrbuf(rb, rbp->rbp_npages);
-			rbp->rbp_nbuffers--;
-		} else {
-			list_add(&rb->rb_list, &rbp->rbp_bufs);
-			rbp->rbp_credits++;
-			if (rbp->rbp_credits <= 0)
-				lnet_schedule_blocked_locked(rbp);
-		}
-	}
-
-routing_off:
-	if (msg->msg_peerrtrcredit) {
-		/* give back peer router credits */
-		msg->msg_peerrtrcredit = 0;
-
-		LASSERT((rxpeer->lp_rtrcredits < 0) ==
-			!list_empty(&rxpeer->lp_rtrq));
-
-		rxpeer->lp_rtrcredits++;
-		/*
-		 * drop all messages which are queued to be routed on that
-		 * peer.
-		 */
-		if (!the_lnet.ln_routing) {
-			lnet_drop_routed_msgs_locked(&rxpeer->lp_rtrq,
-						     msg->msg_rx_cpt);
-		} else if (rxpeer->lp_rtrcredits <= 0) {
-			msg2 = list_entry(rxpeer->lp_rtrq.next,
-					  struct lnet_msg, msg_list);
-			list_del(&msg2->msg_list);
-
-			(void)lnet_post_routed_recv_locked(msg2, 1);
-		}
-	}
-	if (rxpeer) {
-		msg->msg_rxpeer = NULL;
-		lnet_peer_decref_locked(rxpeer);
-	}
-}
-
-static int
-lnet_compare_routes(struct lnet_route *r1, struct lnet_route *r2)
-{
-	struct lnet_peer *p1 = r1->lr_gateway;
-	struct lnet_peer *p2 = r2->lr_gateway;
-	int r1_hops = (r1->lr_hops == LNET_UNDEFINED_HOPS) ? 1 : r1->lr_hops;
-	int r2_hops = (r2->lr_hops == LNET_UNDEFINED_HOPS) ? 1 : r2->lr_hops;
-
-	if (r1->lr_priority < r2->lr_priority)
-		return 1;
-
-	if (r1->lr_priority > r2->lr_priority)
-		return -ERANGE;
-
-	if (r1_hops < r2_hops)
-		return 1;
-
-	if (r1_hops > r2_hops)
-		return -ERANGE;
-
-	if (p1->lp_txqnob < p2->lp_txqnob)
-		return 1;
-
-	if (p1->lp_txqnob > p2->lp_txqnob)
-		return -ERANGE;
-
-	if (p1->lp_txcredits > p2->lp_txcredits)
-		return 1;
-
-	if (p1->lp_txcredits < p2->lp_txcredits)
-		return -ERANGE;
-
-	if (r1->lr_seq - r2->lr_seq <= 0)
-		return 1;
-
-	return -ERANGE;
-}
-
-static struct lnet_peer *
-lnet_find_route_locked(struct lnet_ni *ni, lnet_nid_t target,
-		       lnet_nid_t rtr_nid)
-{
-	struct lnet_remotenet *rnet;
-	struct lnet_route *route;
-	struct lnet_route *best_route;
-	struct lnet_route *last_route;
-	struct lnet_peer *lp_best;
-	struct lnet_peer *lp;
-	int rc;
-
-	/*
-	 * If @rtr_nid is not LNET_NID_ANY, return the gateway with
-	 * rtr_nid nid, otherwise find the best gateway I can use
-	 */
-	rnet = lnet_find_net_locked(LNET_NIDNET(target));
-	if (!rnet)
-		return NULL;
-
-	lp_best = NULL;
-	best_route = NULL;
-	last_route = NULL;
-	list_for_each_entry(route, &rnet->lrn_routes, lr_list) {
-		lp = route->lr_gateway;
-
-		if (!lnet_is_route_alive(route))
-			continue;
-
-		if (ni && lp->lp_ni != ni)
-			continue;
-
-		if (lp->lp_nid == rtr_nid) /* it's pre-determined router */
-			return lp;
-
-		if (!lp_best) {
-			best_route = route;
-			last_route = route;
-			lp_best = lp;
-			continue;
-		}
-
-		/* no protection on below fields, but it's harmless */
-		if (last_route->lr_seq - route->lr_seq < 0)
-			last_route = route;
-
-		rc = lnet_compare_routes(route, best_route);
-		if (rc < 0)
-			continue;
-
-		best_route = route;
-		lp_best = lp;
-	}
-
-	/*
-	 * set sequence number on the best router to the latest sequence + 1
-	 * so we can round-robin all routers, it's race and inaccurate but
-	 * harmless and functional
-	 */
-	if (best_route)
-		best_route->lr_seq = last_route->lr_seq + 1;
-	return lp_best;
-}
-
-int
-lnet_send(lnet_nid_t src_nid, struct lnet_msg *msg, lnet_nid_t rtr_nid)
-{
-	lnet_nid_t dst_nid = msg->msg_target.nid;
-	struct lnet_ni *src_ni;
-	struct lnet_ni *local_ni;
-	struct lnet_peer *lp;
-	int cpt;
-	int cpt2;
-	int rc;
-
-	/*
-	 * NB: rtr_nid is set to LNET_NID_ANY for all current use-cases,
-	 * but we might want to use pre-determined router for ACK/REPLY
-	 * in the future
-	 */
-	/* NB: ni == interface pre-determined (ACK/REPLY) */
-	LASSERT(!msg->msg_txpeer);
-	LASSERT(!msg->msg_sending);
-	LASSERT(!msg->msg_target_is_router);
-	LASSERT(!msg->msg_receiving);
-
-	msg->msg_sending = 1;
-
-	LASSERT(!msg->msg_tx_committed);
-	cpt = lnet_cpt_of_nid(rtr_nid == LNET_NID_ANY ? dst_nid : rtr_nid);
- again:
-	lnet_net_lock(cpt);
-
-	if (the_lnet.ln_shutdown) {
-		lnet_net_unlock(cpt);
-		return -ESHUTDOWN;
-	}
-
-	if (src_nid == LNET_NID_ANY) {
-		src_ni = NULL;
-	} else {
-		src_ni = lnet_nid2ni_locked(src_nid, cpt);
-		if (!src_ni) {
-			lnet_net_unlock(cpt);
-			LCONSOLE_WARN("Can't send to %s: src %s is not a local nid\n",
-				      libcfs_nid2str(dst_nid),
-				      libcfs_nid2str(src_nid));
-			return -EINVAL;
-		}
-		LASSERT(!msg->msg_routing);
-	}
-
-	/* Is this for someone on a local network? */
-	local_ni = lnet_net2ni_locked(LNET_NIDNET(dst_nid), cpt);
-
-	if (local_ni) {
-		if (!src_ni) {
-			src_ni = local_ni;
-			src_nid = src_ni->ni_nid;
-		} else if (src_ni == local_ni) {
-			lnet_ni_decref_locked(local_ni, cpt);
-		} else {
-			lnet_ni_decref_locked(local_ni, cpt);
-			lnet_ni_decref_locked(src_ni, cpt);
-			lnet_net_unlock(cpt);
-			LCONSOLE_WARN("No route to %s via from %s\n",
-				      libcfs_nid2str(dst_nid),
-				      libcfs_nid2str(src_nid));
-			return -EINVAL;
-		}
-
-		LASSERT(src_nid != LNET_NID_ANY);
-		lnet_msg_commit(msg, cpt);
-
-		if (!msg->msg_routing)
-			msg->msg_hdr.src_nid = cpu_to_le64(src_nid);
-
-		if (src_ni == the_lnet.ln_loni) {
-			/* No send credit hassles with LOLND */
-			lnet_net_unlock(cpt);
-			lnet_ni_send(src_ni, msg);
-
-			lnet_net_lock(cpt);
-			lnet_ni_decref_locked(src_ni, cpt);
-			lnet_net_unlock(cpt);
-			return 0;
-		}
-
-		rc = lnet_nid2peer_locked(&lp, dst_nid, cpt);
-		/* lp has ref on src_ni; lose mine */
-		lnet_ni_decref_locked(src_ni, cpt);
-		if (rc) {
-			lnet_net_unlock(cpt);
-			LCONSOLE_WARN("Error %d finding peer %s\n", rc,
-				      libcfs_nid2str(dst_nid));
-			/* ENOMEM or shutting down */
-			return rc;
-		}
-		LASSERT(lp->lp_ni == src_ni);
-	} else {
-		/* sending to a remote network */
-		lp = lnet_find_route_locked(src_ni, dst_nid, rtr_nid);
-		if (!lp) {
-			if (src_ni)
-				lnet_ni_decref_locked(src_ni, cpt);
-			lnet_net_unlock(cpt);
-
-			LCONSOLE_WARN("No route to %s via %s (all routers down)\n",
-				      libcfs_id2str(msg->msg_target),
-				      libcfs_nid2str(src_nid));
-			return -EHOSTUNREACH;
-		}
-
-		/*
-		 * rtr_nid is LNET_NID_ANY or NID of pre-determined router,
-		 * it's possible that rtr_nid isn't LNET_NID_ANY and lp isn't
-		 * pre-determined router, this can happen if router table
-		 * was changed when we release the lock
-		 */
-		if (rtr_nid != lp->lp_nid) {
-			cpt2 = lnet_cpt_of_nid_locked(lp->lp_nid);
-			if (cpt2 != cpt) {
-				if (src_ni)
-					lnet_ni_decref_locked(src_ni, cpt);
-				lnet_net_unlock(cpt);
-
-				rtr_nid = lp->lp_nid;
-				cpt = cpt2;
-				goto again;
-			}
-		}
-
-		CDEBUG(D_NET, "Best route to %s via %s for %s %d\n",
-		       libcfs_nid2str(dst_nid), libcfs_nid2str(lp->lp_nid),
-		       lnet_msgtyp2str(msg->msg_type), msg->msg_len);
-
-		if (!src_ni) {
-			src_ni = lp->lp_ni;
-			src_nid = src_ni->ni_nid;
-		} else {
-			LASSERT(src_ni == lp->lp_ni);
-			lnet_ni_decref_locked(src_ni, cpt);
-		}
-
-		lnet_peer_addref_locked(lp);
-
-		LASSERT(src_nid != LNET_NID_ANY);
-		lnet_msg_commit(msg, cpt);
-
-		if (!msg->msg_routing) {
-			/* I'm the source and now I know which NI to send on */
-			msg->msg_hdr.src_nid = cpu_to_le64(src_nid);
-		}
-
-		msg->msg_target_is_router = 1;
-		msg->msg_target.nid = lp->lp_nid;
-		msg->msg_target.pid = LNET_PID_LUSTRE;
-	}
-
-	/* 'lp' is our best choice of peer */
-
-	LASSERT(!msg->msg_peertxcredit);
-	LASSERT(!msg->msg_txcredit);
-	LASSERT(!msg->msg_txpeer);
-
-	msg->msg_txpeer = lp;		   /* msg takes my ref on lp */
-
-	rc = lnet_post_send_locked(msg, 0);
-	lnet_net_unlock(cpt);
-
-	if (rc < 0)
-		return rc;
-
-	if (rc == LNET_CREDIT_OK)
-		lnet_ni_send(src_ni, msg);
-
-	return 0; /* rc == LNET_CREDIT_OK or LNET_CREDIT_WAIT */
-}
-
-void
-lnet_drop_message(struct lnet_ni *ni, int cpt, void *private, unsigned int nob)
-{
-	lnet_net_lock(cpt);
-	the_lnet.ln_counters[cpt]->drop_count++;
-	the_lnet.ln_counters[cpt]->drop_length += nob;
-	lnet_net_unlock(cpt);
-
-	lnet_ni_recv(ni, private, NULL, 0, 0, 0, nob);
-}
-
-static void
-lnet_recv_put(struct lnet_ni *ni, struct lnet_msg *msg)
-{
-	struct lnet_hdr *hdr = &msg->msg_hdr;
-
-	if (msg->msg_wanted)
-		lnet_setpayloadbuffer(msg);
-
-	lnet_build_msg_event(msg, LNET_EVENT_PUT);
-
-	/*
-	 * Must I ACK?  If so I'll grab the ack_wmd out of the header and put
-	 * it back into the ACK during lnet_finalize()
-	 */
-	msg->msg_ack = !lnet_is_wire_handle_none(&hdr->msg.put.ack_wmd) &&
-		       !(msg->msg_md->md_options & LNET_MD_ACK_DISABLE);
-
-	lnet_ni_recv(ni, msg->msg_private, msg, msg->msg_rx_delayed,
-		     msg->msg_offset, msg->msg_wanted, hdr->payload_length);
-}
-
-static int
-lnet_parse_put(struct lnet_ni *ni, struct lnet_msg *msg)
-{
-	struct lnet_hdr *hdr = &msg->msg_hdr;
-	struct lnet_match_info info;
-	bool ready_delay;
-	int rc;
-
-	/* Convert put fields to host byte order */
-	le64_to_cpus(&hdr->msg.put.match_bits);
-	le32_to_cpus(&hdr->msg.put.ptl_index);
-	le32_to_cpus(&hdr->msg.put.offset);
-
-	info.mi_id.nid	= hdr->src_nid;
-	info.mi_id.pid	= hdr->src_pid;
-	info.mi_opc	= LNET_MD_OP_PUT;
-	info.mi_portal	= hdr->msg.put.ptl_index;
-	info.mi_rlength	= hdr->payload_length;
-	info.mi_roffset	= hdr->msg.put.offset;
-	info.mi_mbits	= hdr->msg.put.match_bits;
-
-	msg->msg_rx_ready_delay = !ni->ni_lnd->lnd_eager_recv;
-	ready_delay = msg->msg_rx_ready_delay;
-
- again:
-	rc = lnet_ptl_match_md(&info, msg);
-	switch (rc) {
-	default:
-		LBUG();
-
-	case LNET_MATCHMD_OK:
-		lnet_recv_put(ni, msg);
-		return 0;
-
-	case LNET_MATCHMD_NONE:
-		/**
-		 * no eager_recv or has already called it, should
-		 * have been attached on delayed list
-		 */
-		if (ready_delay)
-			return 0;
-
-		rc = lnet_ni_eager_recv(ni, msg);
-		if (!rc) {
-			ready_delay = true;
-			goto again;
-		}
-		/* fall through */
-
-	case LNET_MATCHMD_DROP:
-		CNETERR("Dropping PUT from %s portal %d match %llu offset %d length %d: %d\n",
-			libcfs_id2str(info.mi_id), info.mi_portal,
-			info.mi_mbits, info.mi_roffset, info.mi_rlength, rc);
-
-		return -ENOENT;	/* -ve: OK but no match */
-	}
-}
-
-static int
-lnet_parse_get(struct lnet_ni *ni, struct lnet_msg *msg, int rdma_get)
-{
-	struct lnet_match_info info;
-	struct lnet_hdr *hdr = &msg->msg_hdr;
-	struct lnet_handle_wire reply_wmd;
-	int rc;
-
-	/* Convert get fields to host byte order */
-	le64_to_cpus(&hdr->msg.get.match_bits);
-	le32_to_cpus(&hdr->msg.get.ptl_index);
-	le32_to_cpus(&hdr->msg.get.sink_length);
-	le32_to_cpus(&hdr->msg.get.src_offset);
-
-	info.mi_id.nid  = hdr->src_nid;
-	info.mi_id.pid  = hdr->src_pid;
-	info.mi_opc     = LNET_MD_OP_GET;
-	info.mi_portal  = hdr->msg.get.ptl_index;
-	info.mi_rlength = hdr->msg.get.sink_length;
-	info.mi_roffset = hdr->msg.get.src_offset;
-	info.mi_mbits   = hdr->msg.get.match_bits;
-
-	rc = lnet_ptl_match_md(&info, msg);
-	if (rc == LNET_MATCHMD_DROP) {
-		CNETERR("Dropping GET from %s portal %d match %llu offset %d length %d\n",
-			libcfs_id2str(info.mi_id), info.mi_portal,
-			info.mi_mbits, info.mi_roffset, info.mi_rlength);
-		return -ENOENT;	/* -ve: OK but no match */
-	}
-
-	LASSERT(rc == LNET_MATCHMD_OK);
-
-	lnet_build_msg_event(msg, LNET_EVENT_GET);
-
-	reply_wmd = hdr->msg.get.return_wmd;
-
-	lnet_prep_send(msg, LNET_MSG_REPLY, info.mi_id,
-		       msg->msg_offset, msg->msg_wanted);
-
-	msg->msg_hdr.msg.reply.dst_wmd = reply_wmd;
-
-	if (rdma_get) {
-		/* The LND completes the REPLY from her recv procedure */
-		lnet_ni_recv(ni, msg->msg_private, msg, 0,
-			     msg->msg_offset, msg->msg_len, msg->msg_len);
-		return 0;
-	}
-
-	lnet_ni_recv(ni, msg->msg_private, NULL, 0, 0, 0, 0);
-	msg->msg_receiving = 0;
-
-	rc = lnet_send(ni->ni_nid, msg, LNET_NID_ANY);
-	if (rc < 0) {
-		/* didn't get as far as lnet_ni_send() */
-		CERROR("%s: Unable to send REPLY for GET from %s: %d\n",
-		       libcfs_nid2str(ni->ni_nid),
-		       libcfs_id2str(info.mi_id), rc);
-
-		lnet_finalize(ni, msg, rc);
-	}
-
-	return 0;
-}
-
-static int
-lnet_parse_reply(struct lnet_ni *ni, struct lnet_msg *msg)
-{
-	void *private = msg->msg_private;
-	struct lnet_hdr *hdr = &msg->msg_hdr;
-	struct lnet_process_id src = {0};
-	struct lnet_libmd *md;
-	int rlength;
-	int mlength;
-	int cpt;
-
-	cpt = lnet_cpt_of_cookie(hdr->msg.reply.dst_wmd.wh_object_cookie);
-	lnet_res_lock(cpt);
-
-	src.nid = hdr->src_nid;
-	src.pid = hdr->src_pid;
-
-	/* NB handles only looked up by creator (no flips) */
-	md = lnet_wire_handle2md(&hdr->msg.reply.dst_wmd);
-	if (!md || !md->md_threshold || md->md_me) {
-		CNETERR("%s: Dropping REPLY from %s for %s MD %#llx.%#llx\n",
-			libcfs_nid2str(ni->ni_nid), libcfs_id2str(src),
-			!md ? "invalid" : "inactive",
-			hdr->msg.reply.dst_wmd.wh_interface_cookie,
-			hdr->msg.reply.dst_wmd.wh_object_cookie);
-		if (md && md->md_me)
-			CERROR("REPLY MD also attached to portal %d\n",
-			       md->md_me->me_portal);
-
-		lnet_res_unlock(cpt);
-		return -ENOENT;	/* -ve: OK but no match */
-	}
-
-	LASSERT(!md->md_offset);
-
-	rlength = hdr->payload_length;
-	mlength = min_t(uint, rlength, md->md_length);
-
-	if (mlength < rlength &&
-	    !(md->md_options & LNET_MD_TRUNCATE)) {
-		CNETERR("%s: Dropping REPLY from %s length %d for MD %#llx would overflow (%d)\n",
-			libcfs_nid2str(ni->ni_nid), libcfs_id2str(src),
-			rlength, hdr->msg.reply.dst_wmd.wh_object_cookie,
-			mlength);
-		lnet_res_unlock(cpt);
-		return -ENOENT;	/* -ve: OK but no match */
-	}
-
-	CDEBUG(D_NET, "%s: Reply from %s of length %d/%d into md %#llx\n",
-	       libcfs_nid2str(ni->ni_nid), libcfs_id2str(src),
-	       mlength, rlength, hdr->msg.reply.dst_wmd.wh_object_cookie);
-
-	lnet_msg_attach_md(msg, md, 0, mlength);
-
-	if (mlength)
-		lnet_setpayloadbuffer(msg);
-
-	lnet_res_unlock(cpt);
-
-	lnet_build_msg_event(msg, LNET_EVENT_REPLY);
-
-	lnet_ni_recv(ni, private, msg, 0, 0, mlength, rlength);
-	return 0;
-}
-
-static int
-lnet_parse_ack(struct lnet_ni *ni, struct lnet_msg *msg)
-{
-	struct lnet_hdr *hdr = &msg->msg_hdr;
-	struct lnet_process_id src = {0};
-	struct lnet_libmd *md;
-	int cpt;
-
-	src.nid = hdr->src_nid;
-	src.pid = hdr->src_pid;
-
-	/* Convert ack fields to host byte order */
-	le64_to_cpus(&hdr->msg.ack.match_bits);
-	le32_to_cpus(&hdr->msg.ack.mlength);
-
-	cpt = lnet_cpt_of_cookie(hdr->msg.ack.dst_wmd.wh_object_cookie);
-	lnet_res_lock(cpt);
-
-	/* NB handles only looked up by creator (no flips) */
-	md = lnet_wire_handle2md(&hdr->msg.ack.dst_wmd);
-	if (!md || !md->md_threshold || md->md_me) {
-		/* Don't moan; this is expected */
-		CDEBUG(D_NET,
-		       "%s: Dropping ACK from %s to %s MD %#llx.%#llx\n",
-		       libcfs_nid2str(ni->ni_nid), libcfs_id2str(src),
-		       !md ? "invalid" : "inactive",
-		       hdr->msg.ack.dst_wmd.wh_interface_cookie,
-		       hdr->msg.ack.dst_wmd.wh_object_cookie);
-		if (md && md->md_me)
-			CERROR("Source MD also attached to portal %d\n",
-			       md->md_me->me_portal);
-
-		lnet_res_unlock(cpt);
-		return -ENOENT;	/* -ve! */
-	}
-
-	CDEBUG(D_NET, "%s: ACK from %s into md %#llx\n",
-	       libcfs_nid2str(ni->ni_nid), libcfs_id2str(src),
-	       hdr->msg.ack.dst_wmd.wh_object_cookie);
-
-	lnet_msg_attach_md(msg, md, 0, 0);
-
-	lnet_res_unlock(cpt);
-
-	lnet_build_msg_event(msg, LNET_EVENT_ACK);
-
-	lnet_ni_recv(ni, msg->msg_private, msg, 0, 0, 0, msg->msg_len);
-	return 0;
-}
-
-/**
- * \retval LNET_CREDIT_OK	If \a msg is forwarded
- * \retval LNET_CREDIT_WAIT	If \a msg is blocked because w/o buffer
- * \retval -ve			error code
- */
-int
-lnet_parse_forward_locked(struct lnet_ni *ni, struct lnet_msg *msg)
-{
-	int rc = 0;
-
-	if (!the_lnet.ln_routing)
-		return -ECANCELED;
-
-	if (msg->msg_rxpeer->lp_rtrcredits <= 0 ||
-	    lnet_msg2bufpool(msg)->rbp_credits <= 0) {
-		if (!ni->ni_lnd->lnd_eager_recv) {
-			msg->msg_rx_ready_delay = 1;
-		} else {
-			lnet_net_unlock(msg->msg_rx_cpt);
-			rc = lnet_ni_eager_recv(ni, msg);
-			lnet_net_lock(msg->msg_rx_cpt);
-		}
-	}
-
-	if (!rc)
-		rc = lnet_post_routed_recv_locked(msg, 0);
-	return rc;
-}
-
-int
-lnet_parse_local(struct lnet_ni *ni, struct lnet_msg *msg)
-{
-	int rc;
-
-	switch (msg->msg_type) {
-	case LNET_MSG_ACK:
-		rc = lnet_parse_ack(ni, msg);
-		break;
-	case LNET_MSG_PUT:
-		rc = lnet_parse_put(ni, msg);
-		break;
-	case LNET_MSG_GET:
-		rc = lnet_parse_get(ni, msg, msg->msg_rdma_get);
-		break;
-	case LNET_MSG_REPLY:
-		rc = lnet_parse_reply(ni, msg);
-		break;
-	default: /* prevent an unused label if !kernel */
-		LASSERT(0);
-		return -EPROTO;
-	}
-
-	LASSERT(!rc || rc == -ENOENT);
-	return rc;
-}
-
-char *
-lnet_msgtyp2str(int type)
-{
-	switch (type) {
-	case LNET_MSG_ACK:
-		return "ACK";
-	case LNET_MSG_PUT:
-		return "PUT";
-	case LNET_MSG_GET:
-		return "GET";
-	case LNET_MSG_REPLY:
-		return "REPLY";
-	case LNET_MSG_HELLO:
-		return "HELLO";
-	default:
-		return "<UNKNOWN>";
-	}
-}
-
-void
-lnet_print_hdr(struct lnet_hdr *hdr)
-{
-	struct lnet_process_id src = {0};
-	struct lnet_process_id dst = {0};
-	char *type_str = lnet_msgtyp2str(hdr->type);
-
-	src.nid = hdr->src_nid;
-	src.pid = hdr->src_pid;
-
-	dst.nid = hdr->dest_nid;
-	dst.pid = hdr->dest_pid;
-
-	CWARN("P3 Header at %p of type %s\n", hdr, type_str);
-	CWARN("    From %s\n", libcfs_id2str(src));
-	CWARN("    To   %s\n", libcfs_id2str(dst));
-
-	switch (hdr->type) {
-	default:
-		break;
-
-	case LNET_MSG_PUT:
-		CWARN("    Ptl index %d, ack md %#llx.%#llx, match bits %llu\n",
-		      hdr->msg.put.ptl_index,
-		      hdr->msg.put.ack_wmd.wh_interface_cookie,
-		      hdr->msg.put.ack_wmd.wh_object_cookie,
-		      hdr->msg.put.match_bits);
-		CWARN("    Length %d, offset %d, hdr data %#llx\n",
-		      hdr->payload_length, hdr->msg.put.offset,
-		      hdr->msg.put.hdr_data);
-		break;
-
-	case LNET_MSG_GET:
-		CWARN("    Ptl index %d, return md %#llx.%#llx, match bits %llu\n",
-		      hdr->msg.get.ptl_index,
-		      hdr->msg.get.return_wmd.wh_interface_cookie,
-		      hdr->msg.get.return_wmd.wh_object_cookie,
-		      hdr->msg.get.match_bits);
-		CWARN("    Length %d, src offset %d\n",
-		      hdr->msg.get.sink_length,
-		      hdr->msg.get.src_offset);
-		break;
-
-	case LNET_MSG_ACK:
-		CWARN("    dst md %#llx.%#llx, manipulated length %d\n",
-		      hdr->msg.ack.dst_wmd.wh_interface_cookie,
-		      hdr->msg.ack.dst_wmd.wh_object_cookie,
-		      hdr->msg.ack.mlength);
-		break;
-
-	case LNET_MSG_REPLY:
-		CWARN("    dst md %#llx.%#llx, length %d\n",
-		      hdr->msg.reply.dst_wmd.wh_interface_cookie,
-		      hdr->msg.reply.dst_wmd.wh_object_cookie,
-		      hdr->payload_length);
-	}
-}
-
-int
-lnet_parse(struct lnet_ni *ni, struct lnet_hdr *hdr, lnet_nid_t from_nid,
-	   void *private, int rdma_req)
-{
-	int rc = 0;
-	int cpt;
-	int for_me;
-	struct lnet_msg *msg;
-	lnet_pid_t dest_pid;
-	lnet_nid_t dest_nid;
-	lnet_nid_t src_nid;
-	__u32 payload_length;
-	__u32 type;
-
-	LASSERT(!in_interrupt());
-
-	type = le32_to_cpu(hdr->type);
-	src_nid = le64_to_cpu(hdr->src_nid);
-	dest_nid = le64_to_cpu(hdr->dest_nid);
-	dest_pid = le32_to_cpu(hdr->dest_pid);
-	payload_length = le32_to_cpu(hdr->payload_length);
-
-	for_me = (ni->ni_nid == dest_nid);
-	cpt = lnet_cpt_of_nid(from_nid);
-
-	switch (type) {
-	case LNET_MSG_ACK:
-	case LNET_MSG_GET:
-		if (payload_length > 0) {
-			CERROR("%s, src %s: bad %s payload %d (0 expected)\n",
-			       libcfs_nid2str(from_nid),
-			       libcfs_nid2str(src_nid),
-			       lnet_msgtyp2str(type), payload_length);
-			return -EPROTO;
-		}
-		break;
-
-	case LNET_MSG_PUT:
-	case LNET_MSG_REPLY:
-		if (payload_length >
-		   (__u32)(for_me ? LNET_MAX_PAYLOAD : LNET_MTU)) {
-			CERROR("%s, src %s: bad %s payload %d (%d max expected)\n",
-			       libcfs_nid2str(from_nid),
-			       libcfs_nid2str(src_nid),
-			       lnet_msgtyp2str(type),
-			       payload_length,
-			       for_me ? LNET_MAX_PAYLOAD : LNET_MTU);
-			return -EPROTO;
-		}
-		break;
-
-	default:
-		CERROR("%s, src %s: Bad message type 0x%x\n",
-		       libcfs_nid2str(from_nid),
-		       libcfs_nid2str(src_nid), type);
-		return -EPROTO;
-	}
-
-	if (the_lnet.ln_routing &&
-	    ni->ni_last_alive != ktime_get_real_seconds()) {
-		/* NB: so far here is the only place to set NI status to "up */
-		lnet_ni_lock(ni);
-		ni->ni_last_alive = ktime_get_real_seconds();
-		if (ni->ni_status &&
-		    ni->ni_status->ns_status == LNET_NI_STATUS_DOWN)
-			ni->ni_status->ns_status = LNET_NI_STATUS_UP;
-		lnet_ni_unlock(ni);
-	}
-
-	/*
-	 * Regard a bad destination NID as a protocol error.  Senders should
-	 * know what they're doing; if they don't they're misconfigured, buggy
-	 * or malicious so we chop them off at the knees :)
-	 */
-	if (!for_me) {
-		if (LNET_NIDNET(dest_nid) == LNET_NIDNET(ni->ni_nid)) {
-			/* should have gone direct */
-			CERROR("%s, src %s: Bad dest nid %s (should have been sent direct)\n",
-			       libcfs_nid2str(from_nid),
-			       libcfs_nid2str(src_nid),
-			       libcfs_nid2str(dest_nid));
-			return -EPROTO;
-		}
-
-		if (lnet_islocalnid(dest_nid)) {
-			/*
-			 * dest is another local NI; sender should have used
-			 * this node's NID on its own network
-			 */
-			CERROR("%s, src %s: Bad dest nid %s (it's my nid but on a different network)\n",
-			       libcfs_nid2str(from_nid),
-			       libcfs_nid2str(src_nid),
-			       libcfs_nid2str(dest_nid));
-			return -EPROTO;
-		}
-
-		if (rdma_req && type == LNET_MSG_GET) {
-			CERROR("%s, src %s: Bad optimized GET for %s (final destination must be me)\n",
-			       libcfs_nid2str(from_nid),
-			       libcfs_nid2str(src_nid),
-			       libcfs_nid2str(dest_nid));
-			return -EPROTO;
-		}
-
-		if (!the_lnet.ln_routing) {
-			CERROR("%s, src %s: Dropping message for %s (routing not enabled)\n",
-			       libcfs_nid2str(from_nid),
-			       libcfs_nid2str(src_nid),
-			       libcfs_nid2str(dest_nid));
-			goto drop;
-		}
-	}
-
-	/*
-	 * Message looks OK; we're not going to return an error, so we MUST
-	 * call back lnd_recv() come what may...
-	 */
-	if (!list_empty(&the_lnet.ln_test_peers) && /* normally we don't */
-	    fail_peer(src_nid, 0)) {	     /* shall we now? */
-		CERROR("%s, src %s: Dropping %s to simulate failure\n",
-		       libcfs_nid2str(from_nid), libcfs_nid2str(src_nid),
-		       lnet_msgtyp2str(type));
-		goto drop;
-	}
-
-	if (!list_empty(&the_lnet.ln_drop_rules) &&
-	    lnet_drop_rule_match(hdr)) {
-		CDEBUG(D_NET, "%s, src %s, dst %s: Dropping %s to simulate silent message loss\n",
-		       libcfs_nid2str(from_nid), libcfs_nid2str(src_nid),
-		       libcfs_nid2str(dest_nid), lnet_msgtyp2str(type));
-		goto drop;
-	}
-
-	msg = kzalloc(sizeof(*msg), GFP_NOFS);
-	if (!msg) {
-		CERROR("%s, src %s: Dropping %s (out of memory)\n",
-		       libcfs_nid2str(from_nid), libcfs_nid2str(src_nid),
-		       lnet_msgtyp2str(type));
-		goto drop;
-	}
-
-	/* msg zeroed by kzalloc()
-	 * i.e. flags all clear, pointers NULL etc
-	 */
-	msg->msg_type = type;
-	msg->msg_private = private;
-	msg->msg_receiving = 1;
-	msg->msg_rdma_get = rdma_req;
-	msg->msg_wanted = payload_length;
-	msg->msg_len = payload_length;
-	msg->msg_offset = 0;
-	msg->msg_hdr = *hdr;
-	/* for building message event */
-	msg->msg_from = from_nid;
-	if (!for_me) {
-		msg->msg_target.pid	= dest_pid;
-		msg->msg_target.nid	= dest_nid;
-		msg->msg_routing	= 1;
-
-	} else {
-		/* convert common msg->hdr fields to host byteorder */
-		msg->msg_hdr.type	= type;
-		msg->msg_hdr.src_nid	= src_nid;
-		le32_to_cpus(&msg->msg_hdr.src_pid);
-		msg->msg_hdr.dest_nid	= dest_nid;
-		msg->msg_hdr.dest_pid	= dest_pid;
-		msg->msg_hdr.payload_length = payload_length;
-	}
-
-	lnet_net_lock(cpt);
-	rc = lnet_nid2peer_locked(&msg->msg_rxpeer, from_nid, cpt);
-	if (rc) {
-		lnet_net_unlock(cpt);
-		CERROR("%s, src %s: Dropping %s (error %d looking up sender)\n",
-		       libcfs_nid2str(from_nid), libcfs_nid2str(src_nid),
-		       lnet_msgtyp2str(type), rc);
-		kfree(msg);
-		if (rc == -ESHUTDOWN)
-			/* We are shutting down. Don't do anything more */
-			return 0;
-		goto drop;
-	}
-
-	if (lnet_isrouter(msg->msg_rxpeer)) {
-		lnet_peer_set_alive(msg->msg_rxpeer);
-		if (avoid_asym_router_failure &&
-		    LNET_NIDNET(src_nid) != LNET_NIDNET(from_nid)) {
-			/* received a remote message from router, update
-			 * remote NI status on this router.
-			 * NB: multi-hop routed message will be ignored.
-			 */
-			lnet_router_ni_update_locked(msg->msg_rxpeer,
-						     LNET_NIDNET(src_nid));
-		}
-	}
-
-	lnet_msg_commit(msg, cpt);
-
-	/* message delay simulation */
-	if (unlikely(!list_empty(&the_lnet.ln_delay_rules) &&
-		     lnet_delay_rule_match_locked(hdr, msg))) {
-		lnet_net_unlock(cpt);
-		return 0;
-	}
-
-	if (!for_me) {
-		rc = lnet_parse_forward_locked(ni, msg);
-		lnet_net_unlock(cpt);
-
-		if (rc < 0)
-			goto free_drop;
-
-		if (rc == LNET_CREDIT_OK) {
-			lnet_ni_recv(ni, msg->msg_private, msg, 0,
-				     0, payload_length, payload_length);
-		}
-		return 0;
-	}
-
-	lnet_net_unlock(cpt);
-
-	rc = lnet_parse_local(ni, msg);
-	if (rc)
-		goto free_drop;
-	return 0;
-
- free_drop:
-	LASSERT(!msg->msg_md);
-	lnet_finalize(ni, msg, rc);
-
- drop:
-	lnet_drop_message(ni, cpt, private, payload_length);
-	return 0;
-}
-EXPORT_SYMBOL(lnet_parse);
-
-void
-lnet_drop_delayed_msg_list(struct list_head *head, char *reason)
-{
-	while (!list_empty(head)) {
-		struct lnet_process_id id = {0};
-		struct lnet_msg *msg;
-
-		msg = list_entry(head->next, struct lnet_msg, msg_list);
-		list_del(&msg->msg_list);
-
-		id.nid = msg->msg_hdr.src_nid;
-		id.pid = msg->msg_hdr.src_pid;
-
-		LASSERT(!msg->msg_md);
-		LASSERT(msg->msg_rx_delayed);
-		LASSERT(msg->msg_rxpeer);
-		LASSERT(msg->msg_hdr.type == LNET_MSG_PUT);
-
-		CWARN("Dropping delayed PUT from %s portal %d match %llu offset %d length %d: %s\n",
-		      libcfs_id2str(id),
-		      msg->msg_hdr.msg.put.ptl_index,
-		      msg->msg_hdr.msg.put.match_bits,
-		      msg->msg_hdr.msg.put.offset,
-		      msg->msg_hdr.payload_length, reason);
-
-		/*
-		 * NB I can't drop msg's ref on msg_rxpeer until after I've
-		 * called lnet_drop_message(), so I just hang onto msg as well
-		 * until that's done
-		 */
-		lnet_drop_message(msg->msg_rxpeer->lp_ni,
-				  msg->msg_rxpeer->lp_cpt,
-				  msg->msg_private, msg->msg_len);
-		/*
-		 * NB: message will not generate event because w/o attached MD,
-		 * but we still should give error code so lnet_msg_decommit()
-		 * can skip counters operations and other checks.
-		 */
-		lnet_finalize(msg->msg_rxpeer->lp_ni, msg, -ENOENT);
-	}
-}
-
-void
-lnet_recv_delayed_msg_list(struct list_head *head)
-{
-	while (!list_empty(head)) {
-		struct lnet_msg *msg;
-		struct lnet_process_id id;
-
-		msg = list_entry(head->next, struct lnet_msg, msg_list);
-		list_del(&msg->msg_list);
-
-		/*
-		 * md won't disappear under me, since each msg
-		 * holds a ref on it
-		 */
-		id.nid = msg->msg_hdr.src_nid;
-		id.pid = msg->msg_hdr.src_pid;
-
-		LASSERT(msg->msg_rx_delayed);
-		LASSERT(msg->msg_md);
-		LASSERT(msg->msg_rxpeer);
-		LASSERT(msg->msg_hdr.type == LNET_MSG_PUT);
-
-		CDEBUG(D_NET, "Resuming delayed PUT from %s portal %d match %llu offset %d length %d.\n",
-		       libcfs_id2str(id), msg->msg_hdr.msg.put.ptl_index,
-		       msg->msg_hdr.msg.put.match_bits,
-		       msg->msg_hdr.msg.put.offset,
-		       msg->msg_hdr.payload_length);
-
-		lnet_recv_put(msg->msg_rxpeer->lp_ni, msg);
-	}
-}
-
-/**
- * Initiate an asynchronous PUT operation.
- *
- * There are several events associated with a PUT: completion of the send on
- * the initiator node (LNET_EVENT_SEND), and when the send completes
- * successfully, the receipt of an acknowledgment (LNET_EVENT_ACK) indicating
- * that the operation was accepted by the target. The event LNET_EVENT_PUT is
- * used at the target node to indicate the completion of incoming data
- * delivery.
- *
- * The local events will be logged in the EQ associated with the MD pointed to
- * by \a mdh handle. Using a MD without an associated EQ results in these
- * events being discarded. In this case, the caller must have another
- * mechanism (e.g., a higher level protocol) for determining when it is safe
- * to modify the memory region associated with the MD.
- *
- * Note that LNet does not guarantee the order of LNET_EVENT_SEND and
- * LNET_EVENT_ACK, though intuitively ACK should happen after SEND.
- *
- * \param self Indicates the NID of a local interface through which to send
- * the PUT request. Use LNET_NID_ANY to let LNet choose one by itself.
- * \param mdh A handle for the MD that describes the memory to be sent. The MD
- * must be "free floating" (See LNetMDBind()).
- * \param ack Controls whether an acknowledgment is requested.
- * Acknowledgments are only sent when they are requested by the initiating
- * process and the target MD enables them.
- * \param target A process identifier for the target process.
- * \param portal The index in the \a target's portal table.
- * \param match_bits The match bits to use for MD selection at the target
- * process.
- * \param offset The offset into the target MD (only used when the target
- * MD has the LNET_MD_MANAGE_REMOTE option set).
- * \param hdr_data 64 bits of user data that can be included in the message
- * header. This data is written to an event queue entry at the target if an
- * EQ is present on the matching MD.
- *
- * \retval  0      Success, and only in this case events will be generated
- * and logged to EQ (if it exists).
- * \retval -EIO    Simulated failure.
- * \retval -ENOMEM Memory allocation failure.
- * \retval -ENOENT Invalid MD object.
- *
- * \see lnet_event::hdr_data and lnet_event_kind.
- */
-int
-LNetPut(lnet_nid_t self, struct lnet_handle_md mdh, enum lnet_ack_req ack,
-	struct lnet_process_id target, unsigned int portal,
-	__u64 match_bits, unsigned int offset,
-	__u64 hdr_data)
-{
-	struct lnet_msg *msg;
-	struct lnet_libmd *md;
-	int cpt;
-	int rc;
-
-	LASSERT(the_lnet.ln_refcount > 0);
-
-	if (!list_empty(&the_lnet.ln_test_peers) && /* normally we don't */
-	    fail_peer(target.nid, 1)) { /* shall we now? */
-		CERROR("Dropping PUT to %s: simulated failure\n",
-		       libcfs_id2str(target));
-		return -EIO;
-	}
-
-	msg = kzalloc(sizeof(*msg), GFP_NOFS);
-	if (!msg) {
-		CERROR("Dropping PUT to %s: ENOMEM on struct lnet_msg\n",
-		       libcfs_id2str(target));
-		return -ENOMEM;
-	}
-	msg->msg_vmflush = !!(current->flags & PF_MEMALLOC);
-
-	cpt = lnet_cpt_of_cookie(mdh.cookie);
-	lnet_res_lock(cpt);
-
-	md = lnet_handle2md(&mdh);
-	if (!md || !md->md_threshold || md->md_me) {
-		CERROR("Dropping PUT (%llu:%d:%s): MD (%d) invalid\n",
-		       match_bits, portal, libcfs_id2str(target),
-		       !md ? -1 : md->md_threshold);
-		if (md && md->md_me)
-			CERROR("Source MD also attached to portal %d\n",
-			       md->md_me->me_portal);
-		lnet_res_unlock(cpt);
-
-		kfree(msg);
-		return -ENOENT;
-	}
-
-	CDEBUG(D_NET, "%s -> %s\n", __func__, libcfs_id2str(target));
-
-	lnet_msg_attach_md(msg, md, 0, 0);
-
-	lnet_prep_send(msg, LNET_MSG_PUT, target, 0, md->md_length);
-
-	msg->msg_hdr.msg.put.match_bits = cpu_to_le64(match_bits);
-	msg->msg_hdr.msg.put.ptl_index = cpu_to_le32(portal);
-	msg->msg_hdr.msg.put.offset = cpu_to_le32(offset);
-	msg->msg_hdr.msg.put.hdr_data = hdr_data;
-
-	/* NB handles only looked up by creator (no flips) */
-	if (ack == LNET_ACK_REQ) {
-		msg->msg_hdr.msg.put.ack_wmd.wh_interface_cookie =
-			the_lnet.ln_interface_cookie;
-		msg->msg_hdr.msg.put.ack_wmd.wh_object_cookie =
-			md->md_lh.lh_cookie;
-	} else {
-		msg->msg_hdr.msg.put.ack_wmd.wh_interface_cookie =
-			LNET_WIRE_HANDLE_COOKIE_NONE;
-		msg->msg_hdr.msg.put.ack_wmd.wh_object_cookie =
-			LNET_WIRE_HANDLE_COOKIE_NONE;
-	}
-
-	lnet_res_unlock(cpt);
-
-	lnet_build_msg_event(msg, LNET_EVENT_SEND);
-
-	rc = lnet_send(self, msg, LNET_NID_ANY);
-	if (rc) {
-		CNETERR("Error sending PUT to %s: %d\n",
-			libcfs_id2str(target), rc);
-		lnet_finalize(NULL, msg, rc);
-	}
-
-	/* completion will be signalled by an event */
-	return 0;
-}
-EXPORT_SYMBOL(LNetPut);
-
-struct lnet_msg *
-lnet_create_reply_msg(struct lnet_ni *ni, struct lnet_msg *getmsg)
-{
-	/*
-	 * The LND can DMA direct to the GET md (i.e. no REPLY msg).  This
-	 * returns a msg for the LND to pass to lnet_finalize() when the sink
-	 * data has been received.
-	 *
-	 * CAVEAT EMPTOR: 'getmsg' is the original GET, which is freed when
-	 * lnet_finalize() is called on it, so the LND must call this first
-	 */
-	struct lnet_msg *msg = kzalloc(sizeof(*msg), GFP_NOFS);
-	struct lnet_libmd *getmd = getmsg->msg_md;
-	struct lnet_process_id peer_id = getmsg->msg_target;
-	int cpt;
-
-	LASSERT(!getmsg->msg_target_is_router);
-	LASSERT(!getmsg->msg_routing);
-
-	if (!msg) {
-		CERROR("%s: Dropping REPLY from %s: can't allocate msg\n",
-		       libcfs_nid2str(ni->ni_nid), libcfs_id2str(peer_id));
-		goto drop;
-	}
-
-	cpt = lnet_cpt_of_cookie(getmd->md_lh.lh_cookie);
-	lnet_res_lock(cpt);
-
-	LASSERT(getmd->md_refcount > 0);
-
-	if (!getmd->md_threshold) {
-		CERROR("%s: Dropping REPLY from %s for inactive MD %p\n",
-		       libcfs_nid2str(ni->ni_nid), libcfs_id2str(peer_id),
-		       getmd);
-		lnet_res_unlock(cpt);
-		goto drop;
-	}
-
-	LASSERT(!getmd->md_offset);
-
-	CDEBUG(D_NET, "%s: Reply from %s md %p\n",
-	       libcfs_nid2str(ni->ni_nid), libcfs_id2str(peer_id), getmd);
-
-	/* setup information for lnet_build_msg_event */
-	msg->msg_from = peer_id.nid;
-	msg->msg_type = LNET_MSG_GET; /* flag this msg as an "optimized" GET */
-	msg->msg_hdr.src_nid = peer_id.nid;
-	msg->msg_hdr.payload_length = getmd->md_length;
-	msg->msg_receiving = 1; /* required by lnet_msg_attach_md */
-
-	lnet_msg_attach_md(msg, getmd, getmd->md_offset, getmd->md_length);
-	lnet_res_unlock(cpt);
-
-	cpt = lnet_cpt_of_nid(peer_id.nid);
-
-	lnet_net_lock(cpt);
-	lnet_msg_commit(msg, cpt);
-	lnet_net_unlock(cpt);
-
-	lnet_build_msg_event(msg, LNET_EVENT_REPLY);
-
-	return msg;
-
- drop:
-	cpt = lnet_cpt_of_nid(peer_id.nid);
-
-	lnet_net_lock(cpt);
-	the_lnet.ln_counters[cpt]->drop_count++;
-	the_lnet.ln_counters[cpt]->drop_length += getmd->md_length;
-	lnet_net_unlock(cpt);
-
-	kfree(msg);
-
-	return NULL;
-}
-EXPORT_SYMBOL(lnet_create_reply_msg);
-
-void
-lnet_set_reply_msg_len(struct lnet_ni *ni, struct lnet_msg *reply,
-		       unsigned int len)
-{
-	/*
-	 * Set the REPLY length, now the RDMA that elides the REPLY message has
-	 * completed and I know it.
-	 */
-	LASSERT(reply);
-	LASSERT(reply->msg_type == LNET_MSG_GET);
-	LASSERT(reply->msg_ev.type == LNET_EVENT_REPLY);
-
-	/*
-	 * NB I trusted my peer to RDMA.  If she tells me she's written beyond
-	 * the end of my buffer, I might as well be dead.
-	 */
-	LASSERT(len <= reply->msg_ev.mlength);
-
-	reply->msg_ev.mlength = len;
-}
-EXPORT_SYMBOL(lnet_set_reply_msg_len);
-
-/**
- * Initiate an asynchronous GET operation.
- *
- * On the initiator node, an LNET_EVENT_SEND is logged when the GET request
- * is sent, and an LNET_EVENT_REPLY is logged when the data returned from
- * the target node in the REPLY has been written to local MD.
- *
- * On the target node, an LNET_EVENT_GET is logged when the GET request
- * arrives and is accepted into a MD.
- *
- * \param self,target,portal,match_bits,offset See the discussion in LNetPut().
- * \param mdh A handle for the MD that describes the memory into which the
- * requested data will be received. The MD must be "free floating"
- * (See LNetMDBind()).
- *
- * \retval  0      Success, and only in this case events will be generated
- * and logged to EQ (if it exists) of the MD.
- * \retval -EIO    Simulated failure.
- * \retval -ENOMEM Memory allocation failure.
- * \retval -ENOENT Invalid MD object.
- */
-int
-LNetGet(lnet_nid_t self, struct lnet_handle_md mdh,
-	struct lnet_process_id target, unsigned int portal,
-	__u64 match_bits, unsigned int offset)
-{
-	struct lnet_msg *msg;
-	struct lnet_libmd *md;
-	int cpt;
-	int rc;
-
-	LASSERT(the_lnet.ln_refcount > 0);
-
-	if (!list_empty(&the_lnet.ln_test_peers) && /* normally we don't */
-	    fail_peer(target.nid, 1)) {	  /* shall we now? */
-		CERROR("Dropping GET to %s: simulated failure\n",
-		       libcfs_id2str(target));
-		return -EIO;
-	}
-
-	msg = kzalloc(sizeof(*msg), GFP_NOFS);
-	if (!msg) {
-		CERROR("Dropping GET to %s: ENOMEM on struct lnet_msg\n",
-		       libcfs_id2str(target));
-		return -ENOMEM;
-	}
-
-	cpt = lnet_cpt_of_cookie(mdh.cookie);
-	lnet_res_lock(cpt);
-
-	md = lnet_handle2md(&mdh);
-	if (!md || !md->md_threshold || md->md_me) {
-		CERROR("Dropping GET (%llu:%d:%s): MD (%d) invalid\n",
-		       match_bits, portal, libcfs_id2str(target),
-		       !md ? -1 : md->md_threshold);
-		if (md && md->md_me)
-			CERROR("REPLY MD also attached to portal %d\n",
-			       md->md_me->me_portal);
-
-		lnet_res_unlock(cpt);
-
-		kfree(msg);
-		return -ENOENT;
-	}
-
-	CDEBUG(D_NET, "%s -> %s\n", __func__, libcfs_id2str(target));
-
-	lnet_msg_attach_md(msg, md, 0, 0);
-
-	lnet_prep_send(msg, LNET_MSG_GET, target, 0, 0);
-
-	msg->msg_hdr.msg.get.match_bits = cpu_to_le64(match_bits);
-	msg->msg_hdr.msg.get.ptl_index = cpu_to_le32(portal);
-	msg->msg_hdr.msg.get.src_offset = cpu_to_le32(offset);
-	msg->msg_hdr.msg.get.sink_length = cpu_to_le32(md->md_length);
-
-	/* NB handles only looked up by creator (no flips) */
-	msg->msg_hdr.msg.get.return_wmd.wh_interface_cookie =
-		the_lnet.ln_interface_cookie;
-	msg->msg_hdr.msg.get.return_wmd.wh_object_cookie =
-		md->md_lh.lh_cookie;
-
-	lnet_res_unlock(cpt);
-
-	lnet_build_msg_event(msg, LNET_EVENT_SEND);
-
-	rc = lnet_send(self, msg, LNET_NID_ANY);
-	if (rc < 0) {
-		CNETERR("Error sending GET to %s: %d\n",
-			libcfs_id2str(target), rc);
-		lnet_finalize(NULL, msg, rc);
-	}
-
-	/* completion will be signalled by an event */
-	return 0;
-}
-EXPORT_SYMBOL(LNetGet);
-
-/**
- * Calculate distance to node at \a dstnid.
- *
- * \param dstnid Target NID.
- * \param srcnidp If not NULL, NID of the local interface to reach \a dstnid
- * is saved here.
- * \param orderp If not NULL, order of the route to reach \a dstnid is saved
- * here.
- *
- * \retval 0 If \a dstnid belongs to a local interface, and reserved option
- * local_nid_dist_zero is set, which is the default.
- * \retval positives Distance to target NID, i.e. number of hops plus one.
- * \retval -EHOSTUNREACH If \a dstnid is not reachable.
- */
-int
-LNetDist(lnet_nid_t dstnid, lnet_nid_t *srcnidp, __u32 *orderp)
-{
-	struct list_head *e;
-	struct lnet_ni *ni;
-	struct lnet_remotenet *rnet;
-	__u32 dstnet = LNET_NIDNET(dstnid);
-	int hops;
-	int cpt;
-	__u32 order = 2;
-	struct list_head *rn_list;
-
-	/*
-	 * if !local_nid_dist_zero, I don't return a distance of 0 ever
-	 * (when lustre sees a distance of 0, it substitutes 0@lo), so I
-	 * keep order 0 free for 0@lo and order 1 free for a local NID
-	 * match
-	 */
-	LASSERT(the_lnet.ln_refcount > 0);
-
-	cpt = lnet_net_lock_current();
-
-	list_for_each(e, &the_lnet.ln_nis) {
-		ni = list_entry(e, struct lnet_ni, ni_list);
-
-		if (ni->ni_nid == dstnid) {
-			if (srcnidp)
-				*srcnidp = dstnid;
-			if (orderp) {
-				if (LNET_NETTYP(LNET_NIDNET(dstnid)) == LOLND)
-					*orderp = 0;
-				else
-					*orderp = 1;
-			}
-			lnet_net_unlock(cpt);
-
-			return local_nid_dist_zero ? 0 : 1;
-		}
-
-		if (LNET_NIDNET(ni->ni_nid) == dstnet) {
-			/*
-			 * Check if ni was originally created in
-			 * current net namespace.
-			 * If not, assign order above 0xffff0000,
-			 * to make this ni not a priority.
-			 */
-			if (!net_eq(ni->ni_net_ns, current->nsproxy->net_ns))
-				order += 0xffff0000;
-
-			if (srcnidp)
-				*srcnidp = ni->ni_nid;
-			if (orderp)
-				*orderp = order;
-			lnet_net_unlock(cpt);
-			return 1;
-		}
-
-		order++;
-	}
-
-	rn_list = lnet_net2rnethash(dstnet);
-	list_for_each(e, rn_list) {
-		rnet = list_entry(e, struct lnet_remotenet, lrn_list);
-
-		if (rnet->lrn_net == dstnet) {
-			struct lnet_route *route;
-			struct lnet_route *shortest = NULL;
-			__u32 shortest_hops = LNET_UNDEFINED_HOPS;
-			__u32 route_hops;
-
-			LASSERT(!list_empty(&rnet->lrn_routes));
-
-			list_for_each_entry(route, &rnet->lrn_routes,
-					    lr_list) {
-				route_hops = route->lr_hops;
-				if (route_hops == LNET_UNDEFINED_HOPS)
-					route_hops = 1;
-				if (!shortest ||
-				    route_hops < shortest_hops) {
-					shortest = route;
-					shortest_hops = route_hops;
-				}
-			}
-
-			LASSERT(shortest);
-			hops = shortest_hops;
-			if (srcnidp)
-				*srcnidp = shortest->lr_gateway->lp_ni->ni_nid;
-			if (orderp)
-				*orderp = order;
-			lnet_net_unlock(cpt);
-			return hops + 1;
-		}
-		order++;
-	}
-
-	lnet_net_unlock(cpt);
-	return -EHOSTUNREACH;
-}
-EXPORT_SYMBOL(LNetDist);

+ 0 - 625
drivers/staging/lustre/lnet/lnet/lib-msg.c

@@ -1,625 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/lnet/lib-msg.c
- *
- * Message decoding, parsing and finalizing routines
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include <linux/lnet/lib-lnet.h>
-
-void
-lnet_build_unlink_event(struct lnet_libmd *md, struct lnet_event *ev)
-{
-	memset(ev, 0, sizeof(*ev));
-
-	ev->status   = 0;
-	ev->unlinked = 1;
-	ev->type     = LNET_EVENT_UNLINK;
-	lnet_md_deconstruct(md, &ev->md);
-	lnet_md2handle(&ev->md_handle, md);
-}
-
-/*
- * Don't need any lock, must be called after lnet_commit_md
- */
-void
-lnet_build_msg_event(struct lnet_msg *msg, enum lnet_event_kind ev_type)
-{
-	struct lnet_hdr *hdr = &msg->msg_hdr;
-	struct lnet_event *ev  = &msg->msg_ev;
-
-	LASSERT(!msg->msg_routing);
-
-	ev->type = ev_type;
-
-	if (ev_type == LNET_EVENT_SEND) {
-		/* event for active message */
-		ev->target.nid    = le64_to_cpu(hdr->dest_nid);
-		ev->target.pid    = le32_to_cpu(hdr->dest_pid);
-		ev->initiator.nid = LNET_NID_ANY;
-		ev->initiator.pid = the_lnet.ln_pid;
-		ev->sender        = LNET_NID_ANY;
-	} else {
-		/* event for passive message */
-		ev->target.pid    = hdr->dest_pid;
-		ev->target.nid    = hdr->dest_nid;
-		ev->initiator.pid = hdr->src_pid;
-		ev->initiator.nid = hdr->src_nid;
-		ev->rlength       = hdr->payload_length;
-		ev->sender        = msg->msg_from;
-		ev->mlength       = msg->msg_wanted;
-		ev->offset        = msg->msg_offset;
-	}
-
-	switch (ev_type) {
-	default:
-		LBUG();
-
-	case LNET_EVENT_PUT: /* passive PUT */
-		ev->pt_index   = hdr->msg.put.ptl_index;
-		ev->match_bits = hdr->msg.put.match_bits;
-		ev->hdr_data   = hdr->msg.put.hdr_data;
-		return;
-
-	case LNET_EVENT_GET: /* passive GET */
-		ev->pt_index   = hdr->msg.get.ptl_index;
-		ev->match_bits = hdr->msg.get.match_bits;
-		ev->hdr_data   = 0;
-		return;
-
-	case LNET_EVENT_ACK: /* ACK */
-		ev->match_bits = hdr->msg.ack.match_bits;
-		ev->mlength    = hdr->msg.ack.mlength;
-		return;
-
-	case LNET_EVENT_REPLY: /* REPLY */
-		return;
-
-	case LNET_EVENT_SEND: /* active message */
-		if (msg->msg_type == LNET_MSG_PUT) {
-			ev->pt_index   = le32_to_cpu(hdr->msg.put.ptl_index);
-			ev->match_bits = le64_to_cpu(hdr->msg.put.match_bits);
-			ev->offset     = le32_to_cpu(hdr->msg.put.offset);
-			ev->mlength    =
-			ev->rlength    = le32_to_cpu(hdr->payload_length);
-			ev->hdr_data   = le64_to_cpu(hdr->msg.put.hdr_data);
-
-		} else {
-			LASSERT(msg->msg_type == LNET_MSG_GET);
-			ev->pt_index   = le32_to_cpu(hdr->msg.get.ptl_index);
-			ev->match_bits = le64_to_cpu(hdr->msg.get.match_bits);
-			ev->mlength    =
-			ev->rlength    = le32_to_cpu(hdr->msg.get.sink_length);
-			ev->offset     = le32_to_cpu(hdr->msg.get.src_offset);
-			ev->hdr_data   = 0;
-		}
-		return;
-	}
-}
-
-void
-lnet_msg_commit(struct lnet_msg *msg, int cpt)
-{
-	struct lnet_msg_container *container = the_lnet.ln_msg_containers[cpt];
-	struct lnet_counters *counters  = the_lnet.ln_counters[cpt];
-
-	/* routed message can be committed for both receiving and sending */
-	LASSERT(!msg->msg_tx_committed);
-
-	if (msg->msg_sending) {
-		LASSERT(!msg->msg_receiving);
-
-		msg->msg_tx_cpt = cpt;
-		msg->msg_tx_committed = 1;
-		if (msg->msg_rx_committed) { /* routed message REPLY */
-			LASSERT(msg->msg_onactivelist);
-			return;
-		}
-	} else {
-		LASSERT(!msg->msg_sending);
-		msg->msg_rx_cpt = cpt;
-		msg->msg_rx_committed = 1;
-	}
-
-	LASSERT(!msg->msg_onactivelist);
-	msg->msg_onactivelist = 1;
-	list_add(&msg->msg_activelist, &container->msc_active);
-
-	counters->msgs_alloc++;
-	if (counters->msgs_alloc > counters->msgs_max)
-		counters->msgs_max = counters->msgs_alloc;
-}
-
-static void
-lnet_msg_decommit_tx(struct lnet_msg *msg, int status)
-{
-	struct lnet_counters	*counters;
-	struct lnet_event *ev = &msg->msg_ev;
-
-	LASSERT(msg->msg_tx_committed);
-	if (status)
-		goto out;
-
-	counters = the_lnet.ln_counters[msg->msg_tx_cpt];
-	switch (ev->type) {
-	default: /* routed message */
-		LASSERT(msg->msg_routing);
-		LASSERT(msg->msg_rx_committed);
-		LASSERT(!ev->type);
-
-		counters->route_length += msg->msg_len;
-		counters->route_count++;
-		goto out;
-
-	case LNET_EVENT_PUT:
-		/* should have been decommitted */
-		LASSERT(!msg->msg_rx_committed);
-		/* overwritten while sending ACK */
-		LASSERT(msg->msg_type == LNET_MSG_ACK);
-		msg->msg_type = LNET_MSG_PUT; /* fix type */
-		break;
-
-	case LNET_EVENT_SEND:
-		LASSERT(!msg->msg_rx_committed);
-		if (msg->msg_type == LNET_MSG_PUT)
-			counters->send_length += msg->msg_len;
-		break;
-
-	case LNET_EVENT_GET:
-		LASSERT(msg->msg_rx_committed);
-		/*
-		 * overwritten while sending reply, we should never be
-		 * here for optimized GET
-		 */
-		LASSERT(msg->msg_type == LNET_MSG_REPLY);
-		msg->msg_type = LNET_MSG_GET; /* fix type */
-		break;
-	}
-
-	counters->send_count++;
- out:
-	lnet_return_tx_credits_locked(msg);
-	msg->msg_tx_committed = 0;
-}
-
-static void
-lnet_msg_decommit_rx(struct lnet_msg *msg, int status)
-{
-	struct lnet_counters *counters;
-	struct lnet_event *ev = &msg->msg_ev;
-
-	LASSERT(!msg->msg_tx_committed); /* decommitted or never committed */
-	LASSERT(msg->msg_rx_committed);
-
-	if (status)
-		goto out;
-
-	counters = the_lnet.ln_counters[msg->msg_rx_cpt];
-	switch (ev->type) {
-	default:
-		LASSERT(!ev->type);
-		LASSERT(msg->msg_routing);
-		goto out;
-
-	case LNET_EVENT_ACK:
-		LASSERT(msg->msg_type == LNET_MSG_ACK);
-		break;
-
-	case LNET_EVENT_GET:
-		/*
-		 * type is "REPLY" if it's an optimized GET on passive side,
-		 * because optimized GET will never be committed for sending,
-		 * so message type wouldn't be changed back to "GET" by
-		 * lnet_msg_decommit_tx(), see details in lnet_parse_get()
-		 */
-		LASSERT(msg->msg_type == LNET_MSG_REPLY ||
-			msg->msg_type == LNET_MSG_GET);
-		counters->send_length += msg->msg_wanted;
-		break;
-
-	case LNET_EVENT_PUT:
-		LASSERT(msg->msg_type == LNET_MSG_PUT);
-		break;
-
-	case LNET_EVENT_REPLY:
-		/*
-		 * type is "GET" if it's an optimized GET on active side,
-		 * see details in lnet_create_reply_msg()
-		 */
-		LASSERT(msg->msg_type == LNET_MSG_GET ||
-			msg->msg_type == LNET_MSG_REPLY);
-		break;
-	}
-
-	counters->recv_count++;
-	if (ev->type == LNET_EVENT_PUT || ev->type == LNET_EVENT_REPLY)
-		counters->recv_length += msg->msg_wanted;
-
- out:
-	lnet_return_rx_credits_locked(msg);
-	msg->msg_rx_committed = 0;
-}
-
-void
-lnet_msg_decommit(struct lnet_msg *msg, int cpt, int status)
-{
-	int cpt2 = cpt;
-
-	LASSERT(msg->msg_tx_committed || msg->msg_rx_committed);
-	LASSERT(msg->msg_onactivelist);
-
-	if (msg->msg_tx_committed) { /* always decommit for sending first */
-		LASSERT(cpt == msg->msg_tx_cpt);
-		lnet_msg_decommit_tx(msg, status);
-	}
-
-	if (msg->msg_rx_committed) {
-		/* forwarding msg committed for both receiving and sending */
-		if (cpt != msg->msg_rx_cpt) {
-			lnet_net_unlock(cpt);
-			cpt2 = msg->msg_rx_cpt;
-			lnet_net_lock(cpt2);
-		}
-		lnet_msg_decommit_rx(msg, status);
-	}
-
-	list_del(&msg->msg_activelist);
-	msg->msg_onactivelist = 0;
-
-	the_lnet.ln_counters[cpt2]->msgs_alloc--;
-
-	if (cpt2 != cpt) {
-		lnet_net_unlock(cpt2);
-		lnet_net_lock(cpt);
-	}
-}
-
-void
-lnet_msg_attach_md(struct lnet_msg *msg, struct lnet_libmd *md,
-		   unsigned int offset, unsigned int mlen)
-{
-	/* NB: @offset and @len are only useful for receiving */
-	/*
-	 * Here, we attach the MD on lnet_msg and mark it busy and
-	 * decrementing its threshold. Come what may, the lnet_msg "owns"
-	 * the MD until a call to lnet_msg_detach_md or lnet_finalize()
-	 * signals completion.
-	 */
-	LASSERT(!msg->msg_routing);
-
-	msg->msg_md = md;
-	if (msg->msg_receiving) { /* committed for receiving */
-		msg->msg_offset = offset;
-		msg->msg_wanted = mlen;
-	}
-
-	md->md_refcount++;
-	if (md->md_threshold != LNET_MD_THRESH_INF) {
-		LASSERT(md->md_threshold > 0);
-		md->md_threshold--;
-	}
-
-	/* build umd in event */
-	lnet_md2handle(&msg->msg_ev.md_handle, md);
-	lnet_md_deconstruct(md, &msg->msg_ev.md);
-}
-
-void
-lnet_msg_detach_md(struct lnet_msg *msg, int status)
-{
-	struct lnet_libmd *md = msg->msg_md;
-	int unlink;
-
-	/* Now it's safe to drop my caller's ref */
-	md->md_refcount--;
-	LASSERT(md->md_refcount >= 0);
-
-	unlink = lnet_md_unlinkable(md);
-	if (md->md_eq) {
-		msg->msg_ev.status   = status;
-		msg->msg_ev.unlinked = unlink;
-		lnet_eq_enqueue_event(md->md_eq, &msg->msg_ev);
-	}
-
-	if (unlink)
-		lnet_md_unlink(md);
-
-	msg->msg_md = NULL;
-}
-
-static int
-lnet_complete_msg_locked(struct lnet_msg *msg, int cpt)
-{
-	struct lnet_handle_wire ack_wmd;
-	int rc;
-	int status = msg->msg_ev.status;
-
-	LASSERT(msg->msg_onactivelist);
-
-	if (!status && msg->msg_ack) {
-		/* Only send an ACK if the PUT completed successfully */
-
-		lnet_msg_decommit(msg, cpt, 0);
-
-		msg->msg_ack = 0;
-		lnet_net_unlock(cpt);
-
-		LASSERT(msg->msg_ev.type == LNET_EVENT_PUT);
-		LASSERT(!msg->msg_routing);
-
-		ack_wmd = msg->msg_hdr.msg.put.ack_wmd;
-
-		lnet_prep_send(msg, LNET_MSG_ACK, msg->msg_ev.initiator, 0, 0);
-
-		msg->msg_hdr.msg.ack.dst_wmd = ack_wmd;
-		msg->msg_hdr.msg.ack.match_bits = msg->msg_ev.match_bits;
-		msg->msg_hdr.msg.ack.mlength = cpu_to_le32(msg->msg_ev.mlength);
-
-		/*
-		 * NB: we probably want to use NID of msg::msg_from as 3rd
-		 * parameter (router NID) if it's routed message
-		 */
-		rc = lnet_send(msg->msg_ev.target.nid, msg, LNET_NID_ANY);
-
-		lnet_net_lock(cpt);
-		/*
-		 * NB: message is committed for sending, we should return
-		 * on success because LND will finalize this message later.
-		 *
-		 * Also, there is possibility that message is committed for
-		 * sending and also failed before delivering to LND,
-		 * i.e: ENOMEM, in that case we can't fall through either
-		 * because CPT for sending can be different with CPT for
-		 * receiving, so we should return back to lnet_finalize()
-		 * to make sure we are locking the correct partition.
-		 */
-		return rc;
-
-	} else if (!status &&	/* OK so far */
-		   (msg->msg_routing && !msg->msg_sending)) {
-		/* not forwarded */
-		LASSERT(!msg->msg_receiving);	/* called back recv already */
-		lnet_net_unlock(cpt);
-
-		rc = lnet_send(LNET_NID_ANY, msg, LNET_NID_ANY);
-
-		lnet_net_lock(cpt);
-		/*
-		 * NB: message is committed for sending, we should return
-		 * on success because LND will finalize this message later.
-		 *
-		 * Also, there is possibility that message is committed for
-		 * sending and also failed before delivering to LND,
-		 * i.e: ENOMEM, in that case we can't fall through either:
-		 * - The rule is message must decommit for sending first if
-		 *   the it's committed for both sending and receiving
-		 * - CPT for sending can be different with CPT for receiving,
-		 *   so we should return back to lnet_finalize() to make
-		 *   sure we are locking the correct partition.
-		 */
-		return rc;
-	}
-
-	lnet_msg_decommit(msg, cpt, status);
-	kfree(msg);
-	return 0;
-}
-
-void
-lnet_finalize(struct lnet_ni *ni, struct lnet_msg *msg, int status)
-{
-	struct lnet_msg_container *container;
-	int my_slot;
-	int cpt;
-	int rc;
-	int i;
-
-	LASSERT(!in_interrupt());
-
-	if (!msg)
-		return;
-
-	msg->msg_ev.status = status;
-
-	if (msg->msg_md) {
-		cpt = lnet_cpt_of_cookie(msg->msg_md->md_lh.lh_cookie);
-
-		lnet_res_lock(cpt);
-		lnet_msg_detach_md(msg, status);
-		lnet_res_unlock(cpt);
-	}
-
- again:
-	rc = 0;
-	if (!msg->msg_tx_committed && !msg->msg_rx_committed) {
-		/* not committed to network yet */
-		LASSERT(!msg->msg_onactivelist);
-		kfree(msg);
-		return;
-	}
-
-	/*
-	 * NB: routed message can be committed for both receiving and sending,
-	 * we should finalize in LIFO order and keep counters correct.
-	 * (finalize sending first then finalize receiving)
-	 */
-	cpt = msg->msg_tx_committed ? msg->msg_tx_cpt : msg->msg_rx_cpt;
-	lnet_net_lock(cpt);
-
-	container = the_lnet.ln_msg_containers[cpt];
-	list_add_tail(&msg->msg_list, &container->msc_finalizing);
-
-	/*
-	 * Recursion breaker.  Don't complete the message here if I am (or
-	 * enough other threads are) already completing messages
-	 */
-	my_slot = -1;
-	for (i = 0; i < container->msc_nfinalizers; i++) {
-		if (container->msc_finalizers[i] == current)
-			break;
-
-		if (my_slot < 0 && !container->msc_finalizers[i])
-			my_slot = i;
-	}
-
-	if (i < container->msc_nfinalizers || my_slot < 0) {
-		lnet_net_unlock(cpt);
-		return;
-	}
-
-	container->msc_finalizers[my_slot] = current;
-
-	while (!list_empty(&container->msc_finalizing)) {
-		msg = list_entry(container->msc_finalizing.next,
-				 struct lnet_msg, msg_list);
-
-		list_del(&msg->msg_list);
-
-		/*
-		 * NB drops and regains the lnet lock if it actually does
-		 * anything, so my finalizing friends can chomp along too
-		 */
-		rc = lnet_complete_msg_locked(msg, cpt);
-		if (rc)
-			break;
-	}
-
-	if (unlikely(!list_empty(&the_lnet.ln_delay_rules))) {
-		lnet_net_unlock(cpt);
-		lnet_delay_rule_check();
-		lnet_net_lock(cpt);
-	}
-
-	container->msc_finalizers[my_slot] = NULL;
-	lnet_net_unlock(cpt);
-
-	if (rc)
-		goto again;
-}
-EXPORT_SYMBOL(lnet_finalize);
-
-void
-lnet_msg_container_cleanup(struct lnet_msg_container *container)
-{
-	int count = 0;
-
-	if (!container->msc_init)
-		return;
-
-	while (!list_empty(&container->msc_active)) {
-		struct lnet_msg *msg;
-
-		msg = list_entry(container->msc_active.next,
-				 struct lnet_msg, msg_activelist);
-		LASSERT(msg->msg_onactivelist);
-		msg->msg_onactivelist = 0;
-		list_del(&msg->msg_activelist);
-		kfree(msg);
-		count++;
-	}
-
-	if (count > 0)
-		CERROR("%d active msg on exit\n", count);
-
-	kvfree(container->msc_finalizers);
-	container->msc_finalizers = NULL;
-	container->msc_init = 0;
-}
-
-int
-lnet_msg_container_setup(struct lnet_msg_container *container, int cpt)
-{
-	container->msc_init = 1;
-
-	INIT_LIST_HEAD(&container->msc_active);
-	INIT_LIST_HEAD(&container->msc_finalizing);
-
-	/* number of CPUs */
-	container->msc_nfinalizers = cfs_cpt_weight(lnet_cpt_table(), cpt);
-
-	container->msc_finalizers = kvzalloc_cpt(container->msc_nfinalizers *
-						 sizeof(*container->msc_finalizers),
-						 GFP_KERNEL, cpt);
-
-	if (!container->msc_finalizers) {
-		CERROR("Failed to allocate message finalizers\n");
-		lnet_msg_container_cleanup(container);
-		return -ENOMEM;
-	}
-
-	return 0;
-}
-
-void
-lnet_msg_containers_destroy(void)
-{
-	struct lnet_msg_container *container;
-	int i;
-
-	if (!the_lnet.ln_msg_containers)
-		return;
-
-	cfs_percpt_for_each(container, i, the_lnet.ln_msg_containers)
-		lnet_msg_container_cleanup(container);
-
-	cfs_percpt_free(the_lnet.ln_msg_containers);
-	the_lnet.ln_msg_containers = NULL;
-}
-
-int
-lnet_msg_containers_create(void)
-{
-	struct lnet_msg_container *container;
-	int rc;
-	int i;
-
-	the_lnet.ln_msg_containers = cfs_percpt_alloc(lnet_cpt_table(),
-						      sizeof(*container));
-
-	if (!the_lnet.ln_msg_containers) {
-		CERROR("Failed to allocate cpu-partition data for network\n");
-		return -ENOMEM;
-	}
-
-	cfs_percpt_for_each(container, i, the_lnet.ln_msg_containers) {
-		rc = lnet_msg_container_setup(container, i);
-		if (rc) {
-			lnet_msg_containers_destroy();
-			return rc;
-		}
-	}
-
-	return 0;
-}

+ 0 - 987
drivers/staging/lustre/lnet/lnet/lib-ptl.c

@@ -1,987 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2012, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/lnet/lib-ptl.c
- *
- * portal & match routines
- *
- * Author: liang@whamcloud.com
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include <linux/lnet/lib-lnet.h>
-
-/* NB: add /proc interfaces in upcoming patches */
-int portal_rotor = LNET_PTL_ROTOR_HASH_RT;
-module_param(portal_rotor, int, 0644);
-MODULE_PARM_DESC(portal_rotor, "redirect PUTs to different cpu-partitions");
-
-static int
-lnet_ptl_match_type(unsigned int index, struct lnet_process_id match_id,
-		    __u64 mbits, __u64 ignore_bits)
-{
-	struct lnet_portal *ptl = the_lnet.ln_portals[index];
-	int unique;
-
-	unique = !ignore_bits &&
-		 match_id.nid != LNET_NID_ANY &&
-		 match_id.pid != LNET_PID_ANY;
-
-	LASSERT(!lnet_ptl_is_unique(ptl) || !lnet_ptl_is_wildcard(ptl));
-
-	/* prefer to check w/o any lock */
-	if (likely(lnet_ptl_is_unique(ptl) || lnet_ptl_is_wildcard(ptl)))
-		goto match;
-
-	/* unset, new portal */
-	lnet_ptl_lock(ptl);
-	/* check again with lock */
-	if (unlikely(lnet_ptl_is_unique(ptl) || lnet_ptl_is_wildcard(ptl))) {
-		lnet_ptl_unlock(ptl);
-		goto match;
-	}
-
-	/* still not set */
-	if (unique)
-		lnet_ptl_setopt(ptl, LNET_PTL_MATCH_UNIQUE);
-	else
-		lnet_ptl_setopt(ptl, LNET_PTL_MATCH_WILDCARD);
-
-	lnet_ptl_unlock(ptl);
-
-	return 1;
-
- match:
-	if ((lnet_ptl_is_unique(ptl) && !unique) ||
-	    (lnet_ptl_is_wildcard(ptl) && unique))
-		return 0;
-	return 1;
-}
-
-static void
-lnet_ptl_enable_mt(struct lnet_portal *ptl, int cpt)
-{
-	struct lnet_match_table	*mtable = ptl->ptl_mtables[cpt];
-	int i;
-
-	/* with hold of both lnet_res_lock(cpt) and lnet_ptl_lock */
-	LASSERT(lnet_ptl_is_wildcard(ptl));
-
-	mtable->mt_enabled = 1;
-
-	ptl->ptl_mt_maps[ptl->ptl_mt_nmaps] = cpt;
-	for (i = ptl->ptl_mt_nmaps - 1; i >= 0; i--) {
-		LASSERT(ptl->ptl_mt_maps[i] != cpt);
-		if (ptl->ptl_mt_maps[i] < cpt)
-			break;
-
-		/* swap to order */
-		ptl->ptl_mt_maps[i + 1] = ptl->ptl_mt_maps[i];
-		ptl->ptl_mt_maps[i] = cpt;
-	}
-
-	ptl->ptl_mt_nmaps++;
-}
-
-static void
-lnet_ptl_disable_mt(struct lnet_portal *ptl, int cpt)
-{
-	struct lnet_match_table	*mtable = ptl->ptl_mtables[cpt];
-	int i;
-
-	/* with hold of both lnet_res_lock(cpt) and lnet_ptl_lock */
-	LASSERT(lnet_ptl_is_wildcard(ptl));
-
-	if (LNET_CPT_NUMBER == 1)
-		return; /* never disable the only match-table */
-
-	mtable->mt_enabled = 0;
-
-	LASSERT(ptl->ptl_mt_nmaps > 0 &&
-		ptl->ptl_mt_nmaps <= LNET_CPT_NUMBER);
-
-	/* remove it from mt_maps */
-	ptl->ptl_mt_nmaps--;
-	for (i = 0; i < ptl->ptl_mt_nmaps; i++) {
-		if (ptl->ptl_mt_maps[i] >= cpt) /* overwrite it */
-			ptl->ptl_mt_maps[i] = ptl->ptl_mt_maps[i + 1];
-	}
-}
-
-static int
-lnet_try_match_md(struct lnet_libmd *md,
-		  struct lnet_match_info *info, struct lnet_msg *msg)
-{
-	/*
-	 * ALWAYS called holding the lnet_res_lock, and can't lnet_res_unlock;
-	 * lnet_match_blocked_msg() relies on this to avoid races
-	 */
-	unsigned int offset;
-	unsigned int mlength;
-	struct lnet_me *me = md->md_me;
-
-	/* MD exhausted */
-	if (lnet_md_exhausted(md))
-		return LNET_MATCHMD_NONE | LNET_MATCHMD_EXHAUSTED;
-
-	/* mismatched MD op */
-	if (!(md->md_options & info->mi_opc))
-		return LNET_MATCHMD_NONE;
-
-	/* mismatched ME nid/pid? */
-	if (me->me_match_id.nid != LNET_NID_ANY &&
-	    me->me_match_id.nid != info->mi_id.nid)
-		return LNET_MATCHMD_NONE;
-
-	if (me->me_match_id.pid != LNET_PID_ANY &&
-	    me->me_match_id.pid != info->mi_id.pid)
-		return LNET_MATCHMD_NONE;
-
-	/* mismatched ME matchbits? */
-	if ((me->me_match_bits ^ info->mi_mbits) & ~me->me_ignore_bits)
-		return LNET_MATCHMD_NONE;
-
-	/* Hurrah! This _is_ a match; check it out... */
-
-	if (!(md->md_options & LNET_MD_MANAGE_REMOTE))
-		offset = md->md_offset;
-	else
-		offset = info->mi_roffset;
-
-	if (md->md_options & LNET_MD_MAX_SIZE) {
-		mlength = md->md_max_size;
-		LASSERT(md->md_offset + mlength <= md->md_length);
-	} else {
-		mlength = md->md_length - offset;
-	}
-
-	if (info->mi_rlength <= mlength) {	/* fits in allowed space */
-		mlength = info->mi_rlength;
-	} else if (!(md->md_options & LNET_MD_TRUNCATE)) {
-		/* this packet _really_ is too big */
-		CERROR("Matching packet from %s, match %llu length %d too big: %d left, %d allowed\n",
-		       libcfs_id2str(info->mi_id), info->mi_mbits,
-		       info->mi_rlength, md->md_length - offset, mlength);
-
-		return LNET_MATCHMD_DROP;
-	}
-
-	/* Commit to this ME/MD */
-	CDEBUG(D_NET, "Incoming %s index %x from %s of length %d/%d into md %#llx [%d] + %d\n",
-	       (info->mi_opc == LNET_MD_OP_PUT) ? "put" : "get",
-	       info->mi_portal, libcfs_id2str(info->mi_id), mlength,
-	       info->mi_rlength, md->md_lh.lh_cookie, md->md_niov, offset);
-
-	lnet_msg_attach_md(msg, md, offset, mlength);
-	md->md_offset = offset + mlength;
-
-	if (!lnet_md_exhausted(md))
-		return LNET_MATCHMD_OK;
-
-	/*
-	 * Auto-unlink NOW, so the ME gets unlinked if required.
-	 * We bumped md->md_refcount above so the MD just gets flagged
-	 * for unlink when it is finalized.
-	 */
-	if (md->md_flags & LNET_MD_FLAG_AUTO_UNLINK)
-		lnet_md_unlink(md);
-
-	return LNET_MATCHMD_OK | LNET_MATCHMD_EXHAUSTED;
-}
-
-static struct lnet_match_table *
-lnet_match2mt(struct lnet_portal *ptl, struct lnet_process_id id, __u64 mbits)
-{
-	if (LNET_CPT_NUMBER == 1)
-		return ptl->ptl_mtables[0]; /* the only one */
-
-	/* if it's a unique portal, return match-table hashed by NID */
-	return lnet_ptl_is_unique(ptl) ?
-	       ptl->ptl_mtables[lnet_cpt_of_nid(id.nid)] : NULL;
-}
-
-struct lnet_match_table *
-lnet_mt_of_attach(unsigned int index, struct lnet_process_id id,
-		  __u64 mbits, __u64 ignore_bits, enum lnet_ins_pos pos)
-{
-	struct lnet_portal *ptl;
-	struct lnet_match_table	*mtable;
-
-	/* NB: called w/o lock */
-	LASSERT(index < the_lnet.ln_nportals);
-
-	if (!lnet_ptl_match_type(index, id, mbits, ignore_bits))
-		return NULL;
-
-	ptl = the_lnet.ln_portals[index];
-
-	mtable = lnet_match2mt(ptl, id, mbits);
-	if (mtable) /* unique portal or only one match-table */
-		return mtable;
-
-	/* it's a wildcard portal */
-	switch (pos) {
-	default:
-		return NULL;
-	case LNET_INS_BEFORE:
-	case LNET_INS_AFTER:
-		/*
-		 * posted by no affinity thread, always hash to specific
-		 * match-table to avoid buffer stealing which is heavy
-		 */
-		return ptl->ptl_mtables[ptl->ptl_index % LNET_CPT_NUMBER];
-	case LNET_INS_LOCAL:
-		/* posted by cpu-affinity thread */
-		return ptl->ptl_mtables[lnet_cpt_current()];
-	}
-}
-
-static struct lnet_match_table *
-lnet_mt_of_match(struct lnet_match_info *info, struct lnet_msg *msg)
-{
-	struct lnet_match_table	*mtable;
-	struct lnet_portal *ptl;
-	unsigned int nmaps;
-	unsigned int rotor;
-	unsigned int cpt;
-	bool routed;
-
-	/* NB: called w/o lock */
-	LASSERT(info->mi_portal < the_lnet.ln_nportals);
-	ptl = the_lnet.ln_portals[info->mi_portal];
-
-	LASSERT(lnet_ptl_is_wildcard(ptl) || lnet_ptl_is_unique(ptl));
-
-	mtable = lnet_match2mt(ptl, info->mi_id, info->mi_mbits);
-	if (mtable)
-		return mtable;
-
-	/* it's a wildcard portal */
-	routed = LNET_NIDNET(msg->msg_hdr.src_nid) !=
-		 LNET_NIDNET(msg->msg_hdr.dest_nid);
-
-	if (portal_rotor == LNET_PTL_ROTOR_OFF ||
-	    (portal_rotor != LNET_PTL_ROTOR_ON && !routed)) {
-		cpt = lnet_cpt_current();
-		if (ptl->ptl_mtables[cpt]->mt_enabled)
-			return ptl->ptl_mtables[cpt];
-	}
-
-	rotor = ptl->ptl_rotor++; /* get round-robin factor */
-	if (portal_rotor == LNET_PTL_ROTOR_HASH_RT && routed)
-		cpt = lnet_cpt_of_nid(msg->msg_hdr.src_nid);
-	else
-		cpt = rotor % LNET_CPT_NUMBER;
-
-	if (!ptl->ptl_mtables[cpt]->mt_enabled) {
-		/* is there any active entry for this portal? */
-		nmaps = ptl->ptl_mt_nmaps;
-		/* map to an active mtable to avoid heavy "stealing" */
-		if (nmaps) {
-			/*
-			 * NB: there is possibility that ptl_mt_maps is being
-			 * changed because we are not under protection of
-			 * lnet_ptl_lock, but it shouldn't hurt anything
-			 */
-			cpt = ptl->ptl_mt_maps[rotor % nmaps];
-		}
-	}
-
-	return ptl->ptl_mtables[cpt];
-}
-
-static int
-lnet_mt_test_exhausted(struct lnet_match_table *mtable, int pos)
-{
-	__u64 *bmap;
-	int i;
-
-	if (!lnet_ptl_is_wildcard(the_lnet.ln_portals[mtable->mt_portal]))
-		return 0;
-
-	if (pos < 0) { /* check all bits */
-		for (i = 0; i < LNET_MT_EXHAUSTED_BMAP; i++) {
-			if (mtable->mt_exhausted[i] != (__u64)(-1))
-				return 0;
-		}
-		return 1;
-	}
-
-	LASSERT(pos <= LNET_MT_HASH_IGNORE);
-	/* mtable::mt_mhash[pos] is marked as exhausted or not */
-	bmap = &mtable->mt_exhausted[pos >> LNET_MT_BITS_U64];
-	pos &= (1 << LNET_MT_BITS_U64) - 1;
-
-	return (*bmap & BIT(pos));
-}
-
-static void
-lnet_mt_set_exhausted(struct lnet_match_table *mtable, int pos, int exhausted)
-{
-	__u64 *bmap;
-
-	LASSERT(lnet_ptl_is_wildcard(the_lnet.ln_portals[mtable->mt_portal]));
-	LASSERT(pos <= LNET_MT_HASH_IGNORE);
-
-	/* set mtable::mt_mhash[pos] as exhausted/non-exhausted */
-	bmap = &mtable->mt_exhausted[pos >> LNET_MT_BITS_U64];
-	pos &= (1 << LNET_MT_BITS_U64) - 1;
-
-	if (!exhausted)
-		*bmap &= ~(1ULL << pos);
-	else
-		*bmap |= 1ULL << pos;
-}
-
-struct list_head *
-lnet_mt_match_head(struct lnet_match_table *mtable,
-		   struct lnet_process_id id, __u64 mbits)
-{
-	struct lnet_portal *ptl = the_lnet.ln_portals[mtable->mt_portal];
-	unsigned long hash = mbits;
-
-	if (!lnet_ptl_is_wildcard(ptl)) {
-		hash += id.nid + id.pid;
-
-		LASSERT(lnet_ptl_is_unique(ptl));
-		hash = hash_long(hash, LNET_MT_HASH_BITS);
-	}
-	return &mtable->mt_mhash[hash & LNET_MT_HASH_MASK];
-}
-
-int
-lnet_mt_match_md(struct lnet_match_table *mtable,
-		 struct lnet_match_info *info, struct lnet_msg *msg)
-{
-	struct list_head *head;
-	struct lnet_me *me;
-	struct lnet_me *tmp;
-	int exhausted = 0;
-	int rc;
-
-	/* any ME with ignore bits? */
-	if (!list_empty(&mtable->mt_mhash[LNET_MT_HASH_IGNORE]))
-		head = &mtable->mt_mhash[LNET_MT_HASH_IGNORE];
-	else
-		head = lnet_mt_match_head(mtable, info->mi_id, info->mi_mbits);
- again:
-	/* NB: only wildcard portal needs to return LNET_MATCHMD_EXHAUSTED */
-	if (lnet_ptl_is_wildcard(the_lnet.ln_portals[mtable->mt_portal]))
-		exhausted = LNET_MATCHMD_EXHAUSTED;
-
-	list_for_each_entry_safe(me, tmp, head, me_list) {
-		/* ME attached but MD not attached yet */
-		if (!me->me_md)
-			continue;
-
-		LASSERT(me == me->me_md->md_me);
-
-		rc = lnet_try_match_md(me->me_md, info, msg);
-		if (!(rc & LNET_MATCHMD_EXHAUSTED))
-			exhausted = 0; /* mlist is not empty */
-
-		if (rc & LNET_MATCHMD_FINISH) {
-			/*
-			 * don't return EXHAUSTED bit because we don't know
-			 * whether the mlist is empty or not
-			 */
-			return rc & ~LNET_MATCHMD_EXHAUSTED;
-		}
-	}
-
-	if (exhausted == LNET_MATCHMD_EXHAUSTED) { /* @head is exhausted */
-		lnet_mt_set_exhausted(mtable, head - mtable->mt_mhash, 1);
-		if (!lnet_mt_test_exhausted(mtable, -1))
-			exhausted = 0;
-	}
-
-	if (!exhausted && head == &mtable->mt_mhash[LNET_MT_HASH_IGNORE]) {
-		head = lnet_mt_match_head(mtable, info->mi_id, info->mi_mbits);
-		goto again; /* re-check MEs w/o ignore-bits */
-	}
-
-	if (info->mi_opc == LNET_MD_OP_GET ||
-	    !lnet_ptl_is_lazy(the_lnet.ln_portals[info->mi_portal]))
-		return exhausted | LNET_MATCHMD_DROP;
-
-	return exhausted | LNET_MATCHMD_NONE;
-}
-
-static int
-lnet_ptl_match_early(struct lnet_portal *ptl, struct lnet_msg *msg)
-{
-	int rc;
-
-	/*
-	 * message arrived before any buffer posting on this portal,
-	 * simply delay or drop this message
-	 */
-	if (likely(lnet_ptl_is_wildcard(ptl) || lnet_ptl_is_unique(ptl)))
-		return 0;
-
-	lnet_ptl_lock(ptl);
-	/* check it again with hold of lock */
-	if (lnet_ptl_is_wildcard(ptl) || lnet_ptl_is_unique(ptl)) {
-		lnet_ptl_unlock(ptl);
-		return 0;
-	}
-
-	if (lnet_ptl_is_lazy(ptl)) {
-		if (msg->msg_rx_ready_delay) {
-			msg->msg_rx_delayed = 1;
-			list_add_tail(&msg->msg_list,
-				      &ptl->ptl_msg_delayed);
-		}
-		rc = LNET_MATCHMD_NONE;
-	} else {
-		rc = LNET_MATCHMD_DROP;
-	}
-
-	lnet_ptl_unlock(ptl);
-	return rc;
-}
-
-static int
-lnet_ptl_match_delay(struct lnet_portal *ptl,
-		     struct lnet_match_info *info, struct lnet_msg *msg)
-{
-	int first = ptl->ptl_mt_maps[0]; /* read w/o lock */
-	int rc = 0;
-	int i;
-
-	/**
-	 * Steal buffer from other CPTs, and delay msg if nothing to
-	 * steal. This function is more expensive than a regular
-	 * match, but we don't expect it can happen a lot. The return
-	 * code contains one of LNET_MATCHMD_OK, LNET_MATCHMD_DROP, or
-	 * LNET_MATCHMD_NONE.
-	 */
-	LASSERT(lnet_ptl_is_wildcard(ptl));
-
-	for (i = 0; i < LNET_CPT_NUMBER; i++) {
-		struct lnet_match_table *mtable;
-		int cpt;
-
-		cpt = (first + i) % LNET_CPT_NUMBER;
-		mtable = ptl->ptl_mtables[cpt];
-		if (i && i != LNET_CPT_NUMBER - 1 && !mtable->mt_enabled)
-			continue;
-
-		lnet_res_lock(cpt);
-		lnet_ptl_lock(ptl);
-
-		if (!i) {
-			/* The first try, add to stealing list. */
-			list_add_tail(&msg->msg_list,
-				      &ptl->ptl_msg_stealing);
-		}
-
-		if (!list_empty(&msg->msg_list)) {
-			/* On stealing list. */
-			rc = lnet_mt_match_md(mtable, info, msg);
-
-			if ((rc & LNET_MATCHMD_EXHAUSTED) &&
-			    mtable->mt_enabled)
-				lnet_ptl_disable_mt(ptl, cpt);
-
-			if (rc & LNET_MATCHMD_FINISH) {
-				/* Match found, remove from stealing list. */
-				list_del_init(&msg->msg_list);
-			} else if (i == LNET_CPT_NUMBER - 1 ||	/* (1) */
-				   !ptl->ptl_mt_nmaps ||	/* (2) */
-				   (ptl->ptl_mt_nmaps == 1 &&	/* (3) */
-				    ptl->ptl_mt_maps[0] == cpt)) {
-				/**
-				 * No match found, and this is either
-				 * (1) the last cpt to check, or
-				 * (2) there is no active cpt, or
-				 * (3) this is the only active cpt.
-				 * There is nothing to steal: delay or
-				 * drop the message.
-				 */
-				list_del_init(&msg->msg_list);
-
-				if (lnet_ptl_is_lazy(ptl)) {
-					msg->msg_rx_delayed = 1;
-					list_add_tail(&msg->msg_list,
-						      &ptl->ptl_msg_delayed);
-					rc = LNET_MATCHMD_NONE;
-				} else {
-					rc = LNET_MATCHMD_DROP;
-				}
-			} else {
-				/* Do another iteration. */
-				rc = 0;
-			}
-		} else {
-			/**
-			 * No longer on stealing list: another thread
-			 * matched the message in lnet_ptl_attach_md().
-			 * We are now expected to handle the message.
-			 */
-			rc = !msg->msg_md ?
-			     LNET_MATCHMD_DROP : LNET_MATCHMD_OK;
-		}
-
-		lnet_ptl_unlock(ptl);
-		lnet_res_unlock(cpt);
-
-		/**
-		 * Note that test (1) above ensures that we always
-		 * exit the loop through this break statement.
-		 *
-		 * LNET_MATCHMD_NONE means msg was added to the
-		 * delayed queue, and we may no longer reference it
-		 * after lnet_ptl_unlock() and lnet_res_unlock().
-		 */
-		if (rc & (LNET_MATCHMD_FINISH | LNET_MATCHMD_NONE))
-			break;
-	}
-
-	return rc;
-}
-
-int
-lnet_ptl_match_md(struct lnet_match_info *info, struct lnet_msg *msg)
-{
-	struct lnet_match_table	*mtable;
-	struct lnet_portal *ptl;
-	int rc;
-
-	CDEBUG(D_NET, "Request from %s of length %d into portal %d MB=%#llx\n",
-	       libcfs_id2str(info->mi_id), info->mi_rlength, info->mi_portal,
-	       info->mi_mbits);
-
-	if (info->mi_portal >= the_lnet.ln_nportals) {
-		CERROR("Invalid portal %d not in [0-%d]\n",
-		       info->mi_portal, the_lnet.ln_nportals);
-		return LNET_MATCHMD_DROP;
-	}
-
-	ptl = the_lnet.ln_portals[info->mi_portal];
-	rc = lnet_ptl_match_early(ptl, msg);
-	if (rc) /* matched or delayed early message */
-		return rc;
-
-	mtable = lnet_mt_of_match(info, msg);
-	lnet_res_lock(mtable->mt_cpt);
-
-	if (the_lnet.ln_shutdown) {
-		rc = LNET_MATCHMD_DROP;
-		goto out1;
-	}
-
-	rc = lnet_mt_match_md(mtable, info, msg);
-	if ((rc & LNET_MATCHMD_EXHAUSTED) && mtable->mt_enabled) {
-		lnet_ptl_lock(ptl);
-		lnet_ptl_disable_mt(ptl, mtable->mt_cpt);
-		lnet_ptl_unlock(ptl);
-	}
-
-	if (rc & LNET_MATCHMD_FINISH)	/* matched or dropping */
-		goto out1;
-
-	if (!msg->msg_rx_ready_delay)
-		goto out1;
-
-	LASSERT(lnet_ptl_is_lazy(ptl));
-	LASSERT(!msg->msg_rx_delayed);
-
-	/* NB: we don't expect "delay" can happen a lot */
-	if (lnet_ptl_is_unique(ptl) || LNET_CPT_NUMBER == 1) {
-		lnet_ptl_lock(ptl);
-
-		msg->msg_rx_delayed = 1;
-		list_add_tail(&msg->msg_list, &ptl->ptl_msg_delayed);
-
-		lnet_ptl_unlock(ptl);
-		lnet_res_unlock(mtable->mt_cpt);
-		rc = LNET_MATCHMD_NONE;
-	} else  {
-		lnet_res_unlock(mtable->mt_cpt);
-		rc = lnet_ptl_match_delay(ptl, info, msg);
-	}
-
-	/* LNET_MATCHMD_NONE means msg was added to the delay queue */
-	if (rc & LNET_MATCHMD_NONE) {
-		CDEBUG(D_NET,
-		       "Delaying %s from %s ptl %d MB %#llx off %d len %d\n",
-		       info->mi_opc == LNET_MD_OP_PUT ? "PUT" : "GET",
-		       libcfs_id2str(info->mi_id), info->mi_portal,
-		       info->mi_mbits, info->mi_roffset, info->mi_rlength);
-	}
-	goto out0;
- out1:
-	lnet_res_unlock(mtable->mt_cpt);
- out0:
-	/* EXHAUSTED bit is only meaningful for internal functions */
-	return rc & ~LNET_MATCHMD_EXHAUSTED;
-}
-
-void
-lnet_ptl_detach_md(struct lnet_me *me, struct lnet_libmd *md)
-{
-	LASSERT(me->me_md == md && md->md_me == me);
-
-	me->me_md = NULL;
-	md->md_me = NULL;
-}
-
-/* called with lnet_res_lock held */
-void
-lnet_ptl_attach_md(struct lnet_me *me, struct lnet_libmd *md,
-		   struct list_head *matches, struct list_head *drops)
-{
-	struct lnet_portal *ptl = the_lnet.ln_portals[me->me_portal];
-	struct lnet_match_table	*mtable;
-	struct list_head *head;
-	struct lnet_msg *tmp;
-	struct lnet_msg *msg;
-	int exhausted = 0;
-	int cpt;
-
-	LASSERT(!md->md_refcount); /* a brand new MD */
-
-	me->me_md = md;
-	md->md_me = me;
-
-	cpt = lnet_cpt_of_cookie(md->md_lh.lh_cookie);
-	mtable = ptl->ptl_mtables[cpt];
-
-	if (list_empty(&ptl->ptl_msg_stealing) &&
-	    list_empty(&ptl->ptl_msg_delayed) &&
-	    !lnet_mt_test_exhausted(mtable, me->me_pos))
-		return;
-
-	lnet_ptl_lock(ptl);
-	head = &ptl->ptl_msg_stealing;
- again:
-	list_for_each_entry_safe(msg, tmp, head, msg_list) {
-		struct lnet_match_info info;
-		struct lnet_hdr *hdr;
-		int rc;
-
-		LASSERT(msg->msg_rx_delayed || head == &ptl->ptl_msg_stealing);
-
-		hdr = &msg->msg_hdr;
-		info.mi_id.nid  = hdr->src_nid;
-		info.mi_id.pid  = hdr->src_pid;
-		info.mi_opc     = LNET_MD_OP_PUT;
-		info.mi_portal  = hdr->msg.put.ptl_index;
-		info.mi_rlength = hdr->payload_length;
-		info.mi_roffset = hdr->msg.put.offset;
-		info.mi_mbits   = hdr->msg.put.match_bits;
-
-		rc = lnet_try_match_md(md, &info, msg);
-
-		exhausted = (rc & LNET_MATCHMD_EXHAUSTED);
-		if (rc & LNET_MATCHMD_NONE) {
-			if (exhausted)
-				break;
-			continue;
-		}
-
-		/* Hurrah! This _is_ a match */
-		LASSERT(rc & LNET_MATCHMD_FINISH);
-		list_del_init(&msg->msg_list);
-
-		if (head == &ptl->ptl_msg_stealing) {
-			if (exhausted)
-				break;
-			/* stealing thread will handle the message */
-			continue;
-		}
-
-		if (rc & LNET_MATCHMD_OK) {
-			list_add_tail(&msg->msg_list, matches);
-
-			CDEBUG(D_NET, "Resuming delayed PUT from %s portal %d match %llu offset %d length %d.\n",
-			       libcfs_id2str(info.mi_id),
-			       info.mi_portal, info.mi_mbits,
-			       info.mi_roffset, info.mi_rlength);
-		} else {
-			list_add_tail(&msg->msg_list, drops);
-		}
-
-		if (exhausted)
-			break;
-	}
-
-	if (!exhausted && head == &ptl->ptl_msg_stealing) {
-		head = &ptl->ptl_msg_delayed;
-		goto again;
-	}
-
-	if (lnet_ptl_is_wildcard(ptl) && !exhausted) {
-		lnet_mt_set_exhausted(mtable, me->me_pos, 0);
-		if (!mtable->mt_enabled)
-			lnet_ptl_enable_mt(ptl, cpt);
-	}
-
-	lnet_ptl_unlock(ptl);
-}
-
-static void
-lnet_ptl_cleanup(struct lnet_portal *ptl)
-{
-	struct lnet_match_table	*mtable;
-	int i;
-
-	if (!ptl->ptl_mtables) /* uninitialized portal */
-		return;
-
-	LASSERT(list_empty(&ptl->ptl_msg_delayed));
-	LASSERT(list_empty(&ptl->ptl_msg_stealing));
-	cfs_percpt_for_each(mtable, i, ptl->ptl_mtables) {
-		struct list_head *mhash;
-		struct lnet_me *me;
-		int j;
-
-		if (!mtable->mt_mhash) /* uninitialized match-table */
-			continue;
-
-		mhash = mtable->mt_mhash;
-		/* cleanup ME */
-		for (j = 0; j < LNET_MT_HASH_SIZE + 1; j++) {
-			while (!list_empty(&mhash[j])) {
-				me = list_entry(mhash[j].next,
-						struct lnet_me, me_list);
-				CERROR("Active ME %p on exit\n", me);
-				list_del(&me->me_list);
-				kfree(me);
-			}
-		}
-		/* the extra entry is for MEs with ignore bits */
-		kvfree(mhash);
-	}
-
-	cfs_percpt_free(ptl->ptl_mtables);
-	ptl->ptl_mtables = NULL;
-}
-
-static int
-lnet_ptl_setup(struct lnet_portal *ptl, int index)
-{
-	struct lnet_match_table	*mtable;
-	struct list_head *mhash;
-	int i;
-	int j;
-
-	ptl->ptl_mtables = cfs_percpt_alloc(lnet_cpt_table(),
-					    sizeof(struct lnet_match_table));
-	if (!ptl->ptl_mtables) {
-		CERROR("Failed to create match table for portal %d\n", index);
-		return -ENOMEM;
-	}
-
-	ptl->ptl_index = index;
-	INIT_LIST_HEAD(&ptl->ptl_msg_delayed);
-	INIT_LIST_HEAD(&ptl->ptl_msg_stealing);
-	spin_lock_init(&ptl->ptl_lock);
-	cfs_percpt_for_each(mtable, i, ptl->ptl_mtables) {
-		/* the extra entry is for MEs with ignore bits */
-		mhash = kvzalloc_cpt(sizeof(*mhash) * (LNET_MT_HASH_SIZE + 1),
-				     GFP_KERNEL, i);
-		if (!mhash) {
-			CERROR("Failed to create match hash for portal %d\n",
-			       index);
-			goto failed;
-		}
-
-		memset(&mtable->mt_exhausted[0], -1,
-		       sizeof(mtable->mt_exhausted[0]) *
-		       LNET_MT_EXHAUSTED_BMAP);
-		mtable->mt_mhash = mhash;
-		for (j = 0; j < LNET_MT_HASH_SIZE + 1; j++)
-			INIT_LIST_HEAD(&mhash[j]);
-
-		mtable->mt_portal = index;
-		mtable->mt_cpt = i;
-	}
-
-	return 0;
- failed:
-	lnet_ptl_cleanup(ptl);
-	return -ENOMEM;
-}
-
-void
-lnet_portals_destroy(void)
-{
-	int i;
-
-	if (!the_lnet.ln_portals)
-		return;
-
-	for (i = 0; i < the_lnet.ln_nportals; i++)
-		lnet_ptl_cleanup(the_lnet.ln_portals[i]);
-
-	cfs_array_free(the_lnet.ln_portals);
-	the_lnet.ln_portals = NULL;
-	the_lnet.ln_nportals = 0;
-}
-
-int
-lnet_portals_create(void)
-{
-	int size;
-	int i;
-
-	size = offsetof(struct lnet_portal, ptl_mt_maps[LNET_CPT_NUMBER]);
-
-	the_lnet.ln_portals = cfs_array_alloc(MAX_PORTALS, size);
-	if (!the_lnet.ln_portals) {
-		CERROR("Failed to allocate portals table\n");
-		return -ENOMEM;
-	}
-	the_lnet.ln_nportals = MAX_PORTALS;
-
-	for (i = 0; i < the_lnet.ln_nportals; i++) {
-		if (lnet_ptl_setup(the_lnet.ln_portals[i], i)) {
-			lnet_portals_destroy();
-			return -ENOMEM;
-		}
-	}
-
-	return 0;
-}
-
-/**
- * Turn on the lazy portal attribute. Use with caution!
- *
- * This portal attribute only affects incoming PUT requests to the portal,
- * and is off by default. By default, if there's no matching MD for an
- * incoming PUT request, it is simply dropped. With the lazy attribute on,
- * such requests are queued indefinitely until either a matching MD is
- * posted to the portal or the lazy attribute is turned off.
- *
- * It would prevent dropped requests, however it should be regarded as the
- * last line of defense - i.e. users must keep a close watch on active
- * buffers on a lazy portal and once it becomes too low post more buffers as
- * soon as possible. This is because delayed requests usually have detrimental
- * effects on underlying network connections. A few delayed requests often
- * suffice to bring an underlying connection to a complete halt, due to flow
- * control mechanisms.
- *
- * There's also a DOS attack risk. If users don't post match-all MDs on a
- * lazy portal, a malicious peer can easily stop a service by sending some
- * PUT requests with match bits that won't match any MD. A routed server is
- * especially vulnerable since the connections to its neighbor routers are
- * shared among all clients.
- *
- * \param portal Index of the portal to enable the lazy attribute on.
- *
- * \retval 0       On success.
- * \retval -EINVAL If \a portal is not a valid index.
- */
-int
-LNetSetLazyPortal(int portal)
-{
-	struct lnet_portal *ptl;
-
-	if (portal < 0 || portal >= the_lnet.ln_nportals)
-		return -EINVAL;
-
-	CDEBUG(D_NET, "Setting portal %d lazy\n", portal);
-	ptl = the_lnet.ln_portals[portal];
-
-	lnet_res_lock(LNET_LOCK_EX);
-	lnet_ptl_lock(ptl);
-
-	lnet_ptl_setopt(ptl, LNET_PTL_LAZY);
-
-	lnet_ptl_unlock(ptl);
-	lnet_res_unlock(LNET_LOCK_EX);
-
-	return 0;
-}
-EXPORT_SYMBOL(LNetSetLazyPortal);
-
-int
-lnet_clear_lazy_portal(struct lnet_ni *ni, int portal, char *reason)
-{
-	struct lnet_portal *ptl;
-	LIST_HEAD(zombies);
-
-	if (portal < 0 || portal >= the_lnet.ln_nportals)
-		return -EINVAL;
-
-	ptl = the_lnet.ln_portals[portal];
-
-	lnet_res_lock(LNET_LOCK_EX);
-	lnet_ptl_lock(ptl);
-
-	if (!lnet_ptl_is_lazy(ptl)) {
-		lnet_ptl_unlock(ptl);
-		lnet_res_unlock(LNET_LOCK_EX);
-		return 0;
-	}
-
-	if (ni) {
-		struct lnet_msg *msg, *tmp;
-
-		/* grab all messages which are on the NI passed in */
-		list_for_each_entry_safe(msg, tmp, &ptl->ptl_msg_delayed,
-					 msg_list) {
-			if (msg->msg_rxpeer->lp_ni == ni)
-				list_move(&msg->msg_list, &zombies);
-		}
-	} else {
-		if (the_lnet.ln_shutdown)
-			CWARN("Active lazy portal %d on exit\n", portal);
-		else
-			CDEBUG(D_NET, "clearing portal %d lazy\n", portal);
-
-		/* grab all the blocked messages atomically */
-		list_splice_init(&ptl->ptl_msg_delayed, &zombies);
-
-		lnet_ptl_unsetopt(ptl, LNET_PTL_LAZY);
-	}
-
-	lnet_ptl_unlock(ptl);
-	lnet_res_unlock(LNET_LOCK_EX);
-
-	lnet_drop_delayed_msg_list(&zombies, reason);
-
-	return 0;
-}
-
-/**
- * Turn off the lazy portal attribute. Delayed requests on the portal,
- * if any, will be all dropped when this function returns.
- *
- * \param portal Index of the portal to disable the lazy attribute on.
- *
- * \retval 0       On success.
- * \retval -EINVAL If \a portal is not a valid index.
- */
-int
-LNetClearLazyPortal(int portal)
-{
-	return lnet_clear_lazy_portal(NULL, portal,
-				      "Clearing lazy portal attr");
-}
-EXPORT_SYMBOL(LNetClearLazyPortal);

+ 0 - 585
drivers/staging/lustre/lnet/lnet/lib-socket.c

@@ -1,585 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Seagate, Inc.
- */
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include <linux/if.h>
-#include <linux/in.h>
-#include <linux/net.h>
-#include <linux/file.h>
-#include <linux/pagemap.h>
-/* For sys_open & sys_close */
-#include <linux/syscalls.h>
-#include <net/sock.h>
-
-#include <linux/lnet/lib-lnet.h>
-
-static int
-kernel_sock_unlocked_ioctl(struct file *filp, int cmd, unsigned long arg)
-{
-	mm_segment_t oldfs = get_fs();
-	int err;
-
-	set_fs(KERNEL_DS);
-	err = filp->f_op->unlocked_ioctl(filp, cmd, arg);
-	set_fs(oldfs);
-
-	return err;
-}
-
-static int
-lnet_sock_ioctl(int cmd, unsigned long arg)
-{
-	struct file *sock_filp;
-	struct socket *sock;
-	int rc;
-
-	rc = sock_create(PF_INET, SOCK_STREAM, 0, &sock);
-	if (rc) {
-		CERROR("Can't create socket: %d\n", rc);
-		return rc;
-	}
-
-	sock_filp = sock_alloc_file(sock, 0, NULL);
-	if (IS_ERR(sock_filp))
-		return PTR_ERR(sock_filp);
-
-	rc = kernel_sock_unlocked_ioctl(sock_filp, cmd, arg);
-
-	fput(sock_filp);
-	return rc;
-}
-
-int
-lnet_ipif_query(char *name, int *up, __u32 *ip, __u32 *mask)
-{
-	struct ifreq ifr;
-	int nob;
-	int rc;
-	__be32 val;
-
-	nob = strnlen(name, IFNAMSIZ);
-	if (nob == IFNAMSIZ) {
-		CERROR("Interface name %s too long\n", name);
-		return -EINVAL;
-	}
-
-	BUILD_BUG_ON(sizeof(ifr.ifr_name) < IFNAMSIZ);
-
-	if (strlen(name) > sizeof(ifr.ifr_name) - 1)
-		return -E2BIG;
-	strncpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
-
-	rc = lnet_sock_ioctl(SIOCGIFFLAGS, (unsigned long)&ifr);
-	if (rc) {
-		CERROR("Can't get flags for interface %s\n", name);
-		return rc;
-	}
-
-	if (!(ifr.ifr_flags & IFF_UP)) {
-		CDEBUG(D_NET, "Interface %s down\n", name);
-		*up = 0;
-		*ip = *mask = 0;
-		return 0;
-	}
-	*up = 1;
-
-	if (strlen(name) > sizeof(ifr.ifr_name) - 1)
-		return -E2BIG;
-	strncpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
-
-	ifr.ifr_addr.sa_family = AF_INET;
-	rc = lnet_sock_ioctl(SIOCGIFADDR, (unsigned long)&ifr);
-	if (rc) {
-		CERROR("Can't get IP address for interface %s\n", name);
-		return rc;
-	}
-
-	val = ((struct sockaddr_in *)&ifr.ifr_addr)->sin_addr.s_addr;
-	*ip = ntohl(val);
-
-	if (strlen(name) > sizeof(ifr.ifr_name) - 1)
-		return -E2BIG;
-	strncpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
-
-	ifr.ifr_addr.sa_family = AF_INET;
-	rc = lnet_sock_ioctl(SIOCGIFNETMASK, (unsigned long)&ifr);
-	if (rc) {
-		CERROR("Can't get netmask for interface %s\n", name);
-		return rc;
-	}
-
-	val = ((struct sockaddr_in *)&ifr.ifr_netmask)->sin_addr.s_addr;
-	*mask = ntohl(val);
-
-	return 0;
-}
-EXPORT_SYMBOL(lnet_ipif_query);
-
-int
-lnet_ipif_enumerate(char ***namesp)
-{
-	/* Allocate and fill in 'names', returning # interfaces/error */
-	char **names;
-	int toobig;
-	int nalloc;
-	int nfound;
-	struct ifreq *ifr;
-	struct ifconf ifc;
-	int rc;
-	int nob;
-	int i;
-
-	nalloc = 16;	/* first guess at max interfaces */
-	toobig = 0;
-	for (;;) {
-		if (nalloc * sizeof(*ifr) > PAGE_SIZE) {
-			toobig = 1;
-			nalloc = PAGE_SIZE / sizeof(*ifr);
-			CWARN("Too many interfaces: only enumerating first %d\n",
-			      nalloc);
-		}
-
-		ifr = kzalloc(nalloc * sizeof(*ifr), GFP_KERNEL);
-		if (!ifr) {
-			CERROR("ENOMEM enumerating up to %d interfaces\n",
-			       nalloc);
-			rc = -ENOMEM;
-			goto out0;
-		}
-
-		ifc.ifc_buf = (char *)ifr;
-		ifc.ifc_len = nalloc * sizeof(*ifr);
-
-		rc = lnet_sock_ioctl(SIOCGIFCONF, (unsigned long)&ifc);
-		if (rc < 0) {
-			CERROR("Error %d enumerating interfaces\n", rc);
-			goto out1;
-		}
-
-		LASSERT(!rc);
-
-		nfound = ifc.ifc_len / sizeof(*ifr);
-		LASSERT(nfound <= nalloc);
-
-		if (nfound < nalloc || toobig)
-			break;
-
-		kfree(ifr);
-		nalloc *= 2;
-	}
-
-	if (!nfound)
-		goto out1;
-
-	names = kzalloc(nfound * sizeof(*names), GFP_KERNEL);
-	if (!names) {
-		rc = -ENOMEM;
-		goto out1;
-	}
-
-	for (i = 0; i < nfound; i++) {
-		nob = strnlen(ifr[i].ifr_name, IFNAMSIZ);
-		if (nob == IFNAMSIZ) {
-			/* no space for terminating NULL */
-			CERROR("interface name %.*s too long (%d max)\n",
-			       nob, ifr[i].ifr_name, IFNAMSIZ);
-			rc = -ENAMETOOLONG;
-			goto out2;
-		}
-
-		names[i] = kmalloc(IFNAMSIZ, GFP_KERNEL);
-		if (!names[i]) {
-			rc = -ENOMEM;
-			goto out2;
-		}
-
-		memcpy(names[i], ifr[i].ifr_name, nob);
-		names[i][nob] = 0;
-	}
-
-	*namesp = names;
-	rc = nfound;
-
-out2:
-	if (rc < 0)
-		lnet_ipif_free_enumeration(names, nfound);
-out1:
-	kfree(ifr);
-out0:
-	return rc;
-}
-EXPORT_SYMBOL(lnet_ipif_enumerate);
-
-void
-lnet_ipif_free_enumeration(char **names, int n)
-{
-	int i;
-
-	LASSERT(n > 0);
-
-	for (i = 0; i < n && names[i]; i++)
-		kfree(names[i]);
-
-	kfree(names);
-}
-EXPORT_SYMBOL(lnet_ipif_free_enumeration);
-
-int
-lnet_sock_write(struct socket *sock, void *buffer, int nob, int timeout)
-{
-	int rc;
-	long jiffies_left = timeout * msecs_to_jiffies(MSEC_PER_SEC);
-	unsigned long then;
-	struct timeval tv;
-	struct kvec  iov = { .iov_base = buffer, .iov_len  = nob };
-	struct msghdr msg = {NULL,};
-
-	LASSERT(nob > 0);
-	/*
-	 * Caller may pass a zero timeout if she thinks the socket buffer is
-	 * empty enough to take the whole message immediately
-	 */
-	iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, &iov, 1, nob);
-	for (;;) {
-		msg.msg_flags = !timeout ? MSG_DONTWAIT : 0;
-		if (timeout) {
-			/* Set send timeout to remaining time */
-			jiffies_to_timeval(jiffies_left, &tv);
-			rc = kernel_setsockopt(sock, SOL_SOCKET, SO_SNDTIMEO,
-					       (char *)&tv, sizeof(tv));
-			if (rc) {
-				CERROR("Can't set socket send timeout %ld.%06d: %d\n",
-				       (long)tv.tv_sec, (int)tv.tv_usec, rc);
-				return rc;
-			}
-		}
-
-		then = jiffies;
-		rc = kernel_sendmsg(sock, &msg, &iov, 1, nob);
-		jiffies_left -= jiffies - then;
-
-		if (rc < 0)
-			return rc;
-
-		if (!rc) {
-			CERROR("Unexpected zero rc\n");
-			return -ECONNABORTED;
-		}
-
-		if (!msg_data_left(&msg))
-			break;
-
-		if (jiffies_left <= 0)
-			return -EAGAIN;
-	}
-	return 0;
-}
-EXPORT_SYMBOL(lnet_sock_write);
-
-int
-lnet_sock_read(struct socket *sock, void *buffer, int nob, int timeout)
-{
-	int rc;
-	long jiffies_left = timeout * msecs_to_jiffies(MSEC_PER_SEC);
-	unsigned long then;
-	struct timeval tv;
-	struct kvec  iov = {
-		.iov_base = buffer,
-		.iov_len  = nob
-	};
-	struct msghdr msg = {
-		.msg_flags = 0
-	};
-
-	LASSERT(nob > 0);
-	LASSERT(jiffies_left > 0);
-
-	iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1, nob);
-
-	for (;;) {
-		/* Set receive timeout to remaining time */
-		jiffies_to_timeval(jiffies_left, &tv);
-		rc = kernel_setsockopt(sock, SOL_SOCKET, SO_RCVTIMEO,
-				       (char *)&tv, sizeof(tv));
-		if (rc) {
-			CERROR("Can't set socket recv timeout %ld.%06d: %d\n",
-			       (long)tv.tv_sec, (int)tv.tv_usec, rc);
-			return rc;
-		}
-
-		then = jiffies;
-		rc = sock_recvmsg(sock, &msg, 0);
-		jiffies_left -= jiffies - then;
-
-		if (rc < 0)
-			return rc;
-
-		if (!rc)
-			return -ECONNRESET;
-
-		if (!msg_data_left(&msg))
-			return 0;
-
-		if (jiffies_left <= 0)
-			return -ETIMEDOUT;
-	}
-}
-EXPORT_SYMBOL(lnet_sock_read);
-
-static int
-lnet_sock_create(struct socket **sockp, int *fatal, __u32 local_ip,
-		 int local_port)
-{
-	struct sockaddr_in locaddr;
-	struct socket *sock;
-	int rc;
-	int option;
-
-	/* All errors are fatal except bind failure if the port is in use */
-	*fatal = 1;
-
-	rc = sock_create(PF_INET, SOCK_STREAM, 0, &sock);
-	*sockp = sock;
-	if (rc) {
-		CERROR("Can't create socket: %d\n", rc);
-		return rc;
-	}
-
-	option = 1;
-	rc = kernel_setsockopt(sock, SOL_SOCKET, SO_REUSEADDR,
-			       (char *)&option, sizeof(option));
-	if (rc) {
-		CERROR("Can't set SO_REUSEADDR for socket: %d\n", rc);
-		goto failed;
-	}
-
-	if (local_ip || local_port) {
-		memset(&locaddr, 0, sizeof(locaddr));
-		locaddr.sin_family = AF_INET;
-		locaddr.sin_port = htons(local_port);
-		if (!local_ip)
-			locaddr.sin_addr.s_addr = htonl(INADDR_ANY);
-		else
-			locaddr.sin_addr.s_addr = htonl(local_ip);
-
-		rc = kernel_bind(sock, (struct sockaddr *)&locaddr,
-				 sizeof(locaddr));
-		if (rc == -EADDRINUSE) {
-			CDEBUG(D_NET, "Port %d already in use\n", local_port);
-			*fatal = 0;
-			goto failed;
-		}
-		if (rc) {
-			CERROR("Error trying to bind to port %d: %d\n",
-			       local_port, rc);
-			goto failed;
-		}
-	}
-	return 0;
-
-failed:
-	sock_release(sock);
-	return rc;
-}
-
-int
-lnet_sock_setbuf(struct socket *sock, int txbufsize, int rxbufsize)
-{
-	int option;
-	int rc;
-
-	if (txbufsize) {
-		option = txbufsize;
-		rc = kernel_setsockopt(sock, SOL_SOCKET, SO_SNDBUF,
-				       (char *)&option, sizeof(option));
-		if (rc) {
-			CERROR("Can't set send buffer %d: %d\n",
-			       option, rc);
-			return rc;
-		}
-	}
-
-	if (rxbufsize) {
-		option = rxbufsize;
-		rc = kernel_setsockopt(sock, SOL_SOCKET, SO_RCVBUF,
-				       (char *)&option, sizeof(option));
-		if (rc) {
-			CERROR("Can't set receive buffer %d: %d\n",
-			       option, rc);
-			return rc;
-		}
-	}
-	return 0;
-}
-EXPORT_SYMBOL(lnet_sock_setbuf);
-
-int
-lnet_sock_getaddr(struct socket *sock, bool remote, __u32 *ip, int *port)
-{
-	struct sockaddr_in sin;
-	int rc;
-
-	if (remote)
-		rc = kernel_getpeername(sock, (struct sockaddr *)&sin);
-	else
-		rc = kernel_getsockname(sock, (struct sockaddr *)&sin);
-	if (rc < 0) {
-		CERROR("Error %d getting sock %s IP/port\n",
-		       rc, remote ? "peer" : "local");
-		return rc;
-	}
-
-	if (ip)
-		*ip = ntohl(sin.sin_addr.s_addr);
-
-	if (port)
-		*port = ntohs(sin.sin_port);
-
-	return 0;
-}
-EXPORT_SYMBOL(lnet_sock_getaddr);
-
-int
-lnet_sock_getbuf(struct socket *sock, int *txbufsize, int *rxbufsize)
-{
-	if (txbufsize)
-		*txbufsize = sock->sk->sk_sndbuf;
-
-	if (rxbufsize)
-		*rxbufsize = sock->sk->sk_rcvbuf;
-
-	return 0;
-}
-EXPORT_SYMBOL(lnet_sock_getbuf);
-
-int
-lnet_sock_listen(struct socket **sockp, __u32 local_ip, int local_port,
-		 int backlog)
-{
-	int fatal;
-	int rc;
-
-	rc = lnet_sock_create(sockp, &fatal, local_ip, local_port);
-	if (rc) {
-		if (!fatal)
-			CERROR("Can't create socket: port %d already in use\n",
-			       local_port);
-		return rc;
-	}
-
-	rc = kernel_listen(*sockp, backlog);
-	if (!rc)
-		return 0;
-
-	CERROR("Can't set listen backlog %d: %d\n", backlog, rc);
-	sock_release(*sockp);
-	return rc;
-}
-
-int
-lnet_sock_accept(struct socket **newsockp, struct socket *sock)
-{
-	wait_queue_entry_t wait;
-	struct socket *newsock;
-	int rc;
-
-	/*
-	 * XXX this should add a ref to sock->ops->owner, if
-	 * TCP could be a module
-	 */
-	rc = sock_create_lite(PF_PACKET, sock->type, IPPROTO_TCP, &newsock);
-	if (rc) {
-		CERROR("Can't allocate socket\n");
-		return rc;
-	}
-
-	newsock->ops = sock->ops;
-
-	rc = sock->ops->accept(sock, newsock, O_NONBLOCK, false);
-	if (rc == -EAGAIN) {
-		/* Nothing ready, so wait for activity */
-		init_waitqueue_entry(&wait, current);
-		add_wait_queue(sk_sleep(sock->sk), &wait);
-		set_current_state(TASK_INTERRUPTIBLE);
-		schedule();
-		remove_wait_queue(sk_sleep(sock->sk), &wait);
-		rc = sock->ops->accept(sock, newsock, O_NONBLOCK, false);
-	}
-
-	if (rc)
-		goto failed;
-
-	*newsockp = newsock;
-	return 0;
-
-failed:
-	sock_release(newsock);
-	return rc;
-}
-
-int
-lnet_sock_connect(struct socket **sockp, int *fatal, __u32 local_ip,
-		  int local_port, __u32 peer_ip, int peer_port)
-{
-	struct sockaddr_in srvaddr;
-	int rc;
-
-	rc = lnet_sock_create(sockp, fatal, local_ip, local_port);
-	if (rc)
-		return rc;
-
-	memset(&srvaddr, 0, sizeof(srvaddr));
-	srvaddr.sin_family = AF_INET;
-	srvaddr.sin_port = htons(peer_port);
-	srvaddr.sin_addr.s_addr = htonl(peer_ip);
-
-	rc = kernel_connect(*sockp, (struct sockaddr *)&srvaddr,
-			    sizeof(srvaddr), 0);
-	if (!rc)
-		return 0;
-
-	/*
-	 * EADDRNOTAVAIL probably means we're already connected to the same
-	 * peer/port on the same local port on a differently typed
-	 * connection.  Let our caller retry with a different local
-	 * port...
-	 */
-	*fatal = !(rc == -EADDRNOTAVAIL);
-
-	CDEBUG_LIMIT(*fatal ? D_NETERROR : D_NET,
-		     "Error %d connecting %pI4h/%d -> %pI4h/%d\n", rc,
-		     &local_ip, local_port, &peer_ip, peer_port);
-
-	sock_release(*sockp);
-	return rc;
-}

+ 0 - 105
drivers/staging/lustre/lnet/lnet/lo.c

@@ -1,105 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include <linux/lnet/lib-lnet.h>
-
-static int
-lolnd_send(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg)
-{
-	LASSERT(!lntmsg->msg_routing);
-	LASSERT(!lntmsg->msg_target_is_router);
-
-	return lnet_parse(ni, &lntmsg->msg_hdr, ni->ni_nid, lntmsg, 0);
-}
-
-static int
-lolnd_recv(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg,
-	   int delayed, struct iov_iter *to, unsigned int rlen)
-{
-	struct lnet_msg *sendmsg = private;
-
-	if (lntmsg) {		   /* not discarding */
-		if (sendmsg->msg_iov)
-			lnet_copy_iov2iter(to,
-					   sendmsg->msg_niov,
-					   sendmsg->msg_iov,
-					   sendmsg->msg_offset,
-					   iov_iter_count(to));
-		else
-			lnet_copy_kiov2iter(to,
-					    sendmsg->msg_niov,
-					    sendmsg->msg_kiov,
-					    sendmsg->msg_offset,
-					    iov_iter_count(to));
-
-		lnet_finalize(ni, lntmsg, 0);
-	}
-
-	lnet_finalize(ni, sendmsg, 0);
-	return 0;
-}
-
-static int lolnd_instanced;
-
-static void
-lolnd_shutdown(struct lnet_ni *ni)
-{
-	CDEBUG(D_NET, "shutdown\n");
-	LASSERT(lolnd_instanced);
-
-	lolnd_instanced = 0;
-}
-
-static int
-lolnd_startup(struct lnet_ni *ni)
-{
-	LASSERT(ni->ni_lnd == &the_lolnd);
-	LASSERT(!lolnd_instanced);
-	lolnd_instanced = 1;
-
-	return 0;
-}
-
-struct lnet_lnd the_lolnd = {
-	/* .lnd_list       = */ {&the_lolnd.lnd_list, &the_lolnd.lnd_list},
-	/* .lnd_refcount   = */ 0,
-	/* .lnd_type       = */ LOLND,
-	/* .lnd_startup    = */ lolnd_startup,
-	/* .lnd_shutdown   = */ lolnd_shutdown,
-	/* .lnt_ctl        = */ NULL,
-	/* .lnd_send       = */ lolnd_send,
-	/* .lnd_recv       = */ lolnd_recv,
-	/* .lnd_eager_recv = */ NULL,
-	/* .lnd_notify     = */ NULL,
-	/* .lnd_accept     = */ NULL
-};

+ 0 - 239
drivers/staging/lustre/lnet/lnet/module.c

@@ -1,239 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include <linux/lnet/lib-lnet.h>
-#include <uapi/linux/lnet/lnet-dlc.h>
-
-static int config_on_load;
-module_param(config_on_load, int, 0444);
-MODULE_PARM_DESC(config_on_load, "configure network at module load");
-
-static struct mutex lnet_config_mutex;
-
-static int
-lnet_configure(void *arg)
-{
-	/* 'arg' only there so I can be passed to cfs_create_thread() */
-	int rc = 0;
-
-	mutex_lock(&lnet_config_mutex);
-
-	if (!the_lnet.ln_niinit_self) {
-		rc = try_module_get(THIS_MODULE);
-
-		if (rc != 1)
-			goto out;
-
-		rc = LNetNIInit(LNET_PID_LUSTRE);
-		if (rc >= 0) {
-			the_lnet.ln_niinit_self = 1;
-			rc = 0;
-		} else {
-			module_put(THIS_MODULE);
-		}
-	}
-
-out:
-	mutex_unlock(&lnet_config_mutex);
-	return rc;
-}
-
-static int
-lnet_unconfigure(void)
-{
-	int refcount;
-
-	mutex_lock(&lnet_config_mutex);
-
-	if (the_lnet.ln_niinit_self) {
-		the_lnet.ln_niinit_self = 0;
-		LNetNIFini();
-		module_put(THIS_MODULE);
-	}
-
-	mutex_lock(&the_lnet.ln_api_mutex);
-	refcount = the_lnet.ln_refcount;
-	mutex_unlock(&the_lnet.ln_api_mutex);
-
-	mutex_unlock(&lnet_config_mutex);
-	return !refcount ? 0 : -EBUSY;
-}
-
-static int
-lnet_dyn_configure(struct libcfs_ioctl_hdr *hdr)
-{
-	struct lnet_ioctl_config_data *conf =
-		(struct lnet_ioctl_config_data *)hdr;
-	int rc;
-
-	if (conf->cfg_hdr.ioc_len < sizeof(*conf))
-		return -EINVAL;
-
-	mutex_lock(&lnet_config_mutex);
-	if (!the_lnet.ln_niinit_self) {
-		rc = -EINVAL;
-		goto out_unlock;
-	}
-	rc = lnet_dyn_add_ni(LNET_PID_LUSTRE, conf);
-out_unlock:
-	mutex_unlock(&lnet_config_mutex);
-
-	return rc;
-}
-
-static int
-lnet_dyn_unconfigure(struct libcfs_ioctl_hdr *hdr)
-{
-	struct lnet_ioctl_config_data *conf =
-		(struct lnet_ioctl_config_data *)hdr;
-	int rc;
-
-	if (conf->cfg_hdr.ioc_len < sizeof(*conf))
-		return -EINVAL;
-
-	mutex_lock(&lnet_config_mutex);
-	if (!the_lnet.ln_niinit_self) {
-		rc = -EINVAL;
-		goto out_unlock;
-	}
-	rc = lnet_dyn_del_ni(conf->cfg_net);
-out_unlock:
-	mutex_unlock(&lnet_config_mutex);
-
-	return rc;
-}
-
-static int
-lnet_ioctl(struct notifier_block *nb,
-	   unsigned long cmd, void *vdata)
-{
-	int rc;
-	struct libcfs_ioctl_hdr *hdr = vdata;
-
-	switch (cmd) {
-	case IOC_LIBCFS_CONFIGURE: {
-		struct libcfs_ioctl_data *data =
-			(struct libcfs_ioctl_data *)hdr;
-
-		if (data->ioc_hdr.ioc_len < sizeof(*data)) {
-			rc = -EINVAL;
-		} else {
-			the_lnet.ln_nis_from_mod_params = data->ioc_flags;
-			rc = lnet_configure(NULL);
-		}
-		break;
-	}
-
-	case IOC_LIBCFS_UNCONFIGURE:
-		rc = lnet_unconfigure();
-		break;
-
-	case IOC_LIBCFS_ADD_NET:
-		rc = lnet_dyn_configure(hdr);
-		break;
-
-	case IOC_LIBCFS_DEL_NET:
-		rc = lnet_dyn_unconfigure(hdr);
-		break;
-
-	default:
-		/*
-		 * Passing LNET_PID_ANY only gives me a ref if the net is up
-		 * already; I'll need it to ensure the net can't go down while
-		 * I'm called into it
-		 */
-		rc = LNetNIInit(LNET_PID_ANY);
-		if (rc >= 0) {
-			rc = LNetCtl(cmd, hdr);
-			LNetNIFini();
-		}
-		break;
-	}
-	return notifier_from_ioctl_errno(rc);
-}
-
-static struct notifier_block lnet_ioctl_handler = {
-	.notifier_call = lnet_ioctl,
-};
-
-static int __init lnet_init(void)
-{
-	int rc;
-
-	mutex_init(&lnet_config_mutex);
-
-	rc = libcfs_setup();
-	if (rc)
-		return rc;
-
-	rc = lnet_lib_init();
-	if (rc) {
-		CERROR("lnet_lib_init: error %d\n", rc);
-		return rc;
-	}
-
-	rc = blocking_notifier_chain_register(&libcfs_ioctl_list,
-					      &lnet_ioctl_handler);
-	LASSERT(!rc);
-
-	if (config_on_load) {
-		/*
-		 * Have to schedule a separate thread to avoid deadlocking
-		 * in modload
-		 */
-		(void)kthread_run(lnet_configure, NULL, "lnet_initd");
-	}
-
-	return 0;
-}
-
-static void __exit lnet_exit(void)
-{
-	int rc;
-
-	rc = blocking_notifier_chain_unregister(&libcfs_ioctl_list,
-						&lnet_ioctl_handler);
-	LASSERT(!rc);
-
-	lnet_lib_exit();
-}
-
-MODULE_AUTHOR("OpenSFS, Inc. <http://www.lustre.org/>");
-MODULE_DESCRIPTION("Lustre Networking layer");
-MODULE_VERSION(LNET_VERSION);
-MODULE_LICENSE("GPL");
-
-module_init(lnet_init);
-module_exit(lnet_exit);

+ 0 - 1023
drivers/staging/lustre/lnet/lnet/net_fault.c

@@ -1,1023 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2014, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Seagate, Inc.
- *
- * lnet/lnet/net_fault.c
- *
- * Lustre network fault simulation
- *
- * Author: liang.zhen@intel.com
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include <linux/lnet/lib-lnet.h>
-#include <uapi/linux/lnet/lnetctl.h>
-
-#define LNET_MSG_MASK		(LNET_PUT_BIT | LNET_ACK_BIT | \
-				 LNET_GET_BIT | LNET_REPLY_BIT)
-
-struct lnet_drop_rule {
-	/** link chain on the_lnet.ln_drop_rules */
-	struct list_head	dr_link;
-	/** attributes of this rule */
-	struct lnet_fault_attr	dr_attr;
-	/** lock to protect \a dr_drop_at and \a dr_stat */
-	spinlock_t		dr_lock;
-	/**
-	 * the message sequence to drop, which means message is dropped when
-	 * dr_stat.drs_count == dr_drop_at
-	 */
-	unsigned long		dr_drop_at;
-	/**
-	 * seconds to drop the next message, it's exclusive with dr_drop_at
-	 */
-	unsigned long		dr_drop_time;
-	/** baseline to caculate dr_drop_time */
-	unsigned long		dr_time_base;
-	/** statistic of dropped messages */
-	struct lnet_fault_stat	dr_stat;
-};
-
-static bool
-lnet_fault_nid_match(lnet_nid_t nid, lnet_nid_t msg_nid)
-{
-	if (nid == msg_nid || nid == LNET_NID_ANY)
-		return true;
-
-	if (LNET_NIDNET(nid) != LNET_NIDNET(msg_nid))
-		return false;
-
-	/* 255.255.255.255@net is wildcard for all addresses in a network */
-	return LNET_NIDADDR(nid) == LNET_NIDADDR(LNET_NID_ANY);
-}
-
-static bool
-lnet_fault_attr_match(struct lnet_fault_attr *attr, lnet_nid_t src,
-		      lnet_nid_t dst, unsigned int type, unsigned int portal)
-{
-	if (!lnet_fault_nid_match(attr->fa_src, src) ||
-	    !lnet_fault_nid_match(attr->fa_dst, dst))
-		return false;
-
-	if (!(attr->fa_msg_mask & (1 << type)))
-		return false;
-
-	/**
-	 * NB: ACK and REPLY have no portal, but they should have been
-	 * rejected by message mask
-	 */
-	if (attr->fa_ptl_mask && /* has portal filter */
-	    !(attr->fa_ptl_mask & (1ULL << portal)))
-		return false;
-
-	return true;
-}
-
-static int
-lnet_fault_attr_validate(struct lnet_fault_attr *attr)
-{
-	if (!attr->fa_msg_mask)
-		attr->fa_msg_mask = LNET_MSG_MASK; /* all message types */
-
-	if (!attr->fa_ptl_mask) /* no portal filter */
-		return 0;
-
-	/* NB: only PUT and GET can be filtered if portal filter has been set */
-	attr->fa_msg_mask &= LNET_GET_BIT | LNET_PUT_BIT;
-	if (!attr->fa_msg_mask) {
-		CDEBUG(D_NET, "can't find valid message type bits %x\n",
-		       attr->fa_msg_mask);
-		return -EINVAL;
-	}
-	return 0;
-}
-
-static void
-lnet_fault_stat_inc(struct lnet_fault_stat *stat, unsigned int type)
-{
-	/* NB: fs_counter is NOT updated by this function */
-	switch (type) {
-	case LNET_MSG_PUT:
-		stat->fs_put++;
-		return;
-	case LNET_MSG_ACK:
-		stat->fs_ack++;
-		return;
-	case LNET_MSG_GET:
-		stat->fs_get++;
-		return;
-	case LNET_MSG_REPLY:
-		stat->fs_reply++;
-		return;
-	}
-}
-
-/**
- * LNet message drop simulation
- */
-
-/**
- * Add a new drop rule to LNet
- * There is no check for duplicated drop rule, all rules will be checked for
- * incoming message.
- */
-static int
-lnet_drop_rule_add(struct lnet_fault_attr *attr)
-{
-	struct lnet_drop_rule *rule;
-
-	if (attr->u.drop.da_rate & attr->u.drop.da_interval) {
-		CDEBUG(D_NET, "please provide either drop rate or drop interval, but not both at the same time %d/%d\n",
-		       attr->u.drop.da_rate, attr->u.drop.da_interval);
-		return -EINVAL;
-	}
-
-	if (lnet_fault_attr_validate(attr))
-		return -EINVAL;
-
-	rule = kzalloc(sizeof(*rule), GFP_NOFS);
-	if (!rule)
-		return -ENOMEM;
-
-	spin_lock_init(&rule->dr_lock);
-
-	rule->dr_attr = *attr;
-	if (attr->u.drop.da_interval) {
-		rule->dr_time_base = jiffies + attr->u.drop.da_interval * HZ;
-		rule->dr_drop_time = jiffies +
-			prandom_u32_max(attr->u.drop.da_interval) * HZ;
-	} else {
-		rule->dr_drop_at = prandom_u32_max(attr->u.drop.da_rate);
-	}
-
-	lnet_net_lock(LNET_LOCK_EX);
-	list_add(&rule->dr_link, &the_lnet.ln_drop_rules);
-	lnet_net_unlock(LNET_LOCK_EX);
-
-	CDEBUG(D_NET, "Added drop rule: src %s, dst %s, rate %d, interval %d\n",
-	       libcfs_nid2str(attr->fa_src), libcfs_nid2str(attr->fa_src),
-	       attr->u.drop.da_rate, attr->u.drop.da_interval);
-	return 0;
-}
-
-/**
- * Remove matched drop rules from lnet, all rules that can match \a src and
- * \a dst will be removed.
- * If \a src is zero, then all rules have \a dst as destination will be remove
- * If \a dst is zero, then all rules have \a src as source will be removed
- * If both of them are zero, all rules will be removed
- */
-static int
-lnet_drop_rule_del(lnet_nid_t src, lnet_nid_t dst)
-{
-	struct lnet_drop_rule *rule;
-	struct lnet_drop_rule *tmp;
-	struct list_head zombies;
-	int n = 0;
-
-	INIT_LIST_HEAD(&zombies);
-
-	lnet_net_lock(LNET_LOCK_EX);
-	list_for_each_entry_safe(rule, tmp, &the_lnet.ln_drop_rules, dr_link) {
-		if (rule->dr_attr.fa_src != src && src)
-			continue;
-
-		if (rule->dr_attr.fa_dst != dst && dst)
-			continue;
-
-		list_move(&rule->dr_link, &zombies);
-	}
-	lnet_net_unlock(LNET_LOCK_EX);
-
-	list_for_each_entry_safe(rule, tmp, &zombies, dr_link) {
-		CDEBUG(D_NET, "Remove drop rule: src %s->dst: %s (1/%d, %d)\n",
-		       libcfs_nid2str(rule->dr_attr.fa_src),
-		       libcfs_nid2str(rule->dr_attr.fa_dst),
-		       rule->dr_attr.u.drop.da_rate,
-		       rule->dr_attr.u.drop.da_interval);
-
-		list_del(&rule->dr_link);
-		kfree(rule);
-		n++;
-	}
-
-	return n;
-}
-
-/**
- * List drop rule at position of \a pos
- */
-static int
-lnet_drop_rule_list(int pos, struct lnet_fault_attr *attr,
-		    struct lnet_fault_stat *stat)
-{
-	struct lnet_drop_rule *rule;
-	int cpt;
-	int i = 0;
-	int rc = -ENOENT;
-
-	cpt = lnet_net_lock_current();
-	list_for_each_entry(rule, &the_lnet.ln_drop_rules, dr_link) {
-		if (i++ < pos)
-			continue;
-
-		spin_lock(&rule->dr_lock);
-		*attr = rule->dr_attr;
-		*stat = rule->dr_stat;
-		spin_unlock(&rule->dr_lock);
-		rc = 0;
-		break;
-	}
-
-	lnet_net_unlock(cpt);
-	return rc;
-}
-
-/**
- * reset counters for all drop rules
- */
-static void
-lnet_drop_rule_reset(void)
-{
-	struct lnet_drop_rule *rule;
-	int cpt;
-
-	cpt = lnet_net_lock_current();
-
-	list_for_each_entry(rule, &the_lnet.ln_drop_rules, dr_link) {
-		struct lnet_fault_attr *attr = &rule->dr_attr;
-
-		spin_lock(&rule->dr_lock);
-
-		memset(&rule->dr_stat, 0, sizeof(rule->dr_stat));
-		if (attr->u.drop.da_rate) {
-			rule->dr_drop_at = prandom_u32_max(attr->u.drop.da_rate);
-		} else {
-			rule->dr_drop_time = jiffies +
-				prandom_u32_max(attr->u.drop.da_interval) * HZ;
-			rule->dr_time_base = jiffies + attr->u.drop.da_interval * HZ;
-		}
-		spin_unlock(&rule->dr_lock);
-	}
-
-	lnet_net_unlock(cpt);
-}
-
-/**
- * check source/destination NID, portal, message type and drop rate,
- * decide whether should drop this message or not
- */
-static bool
-drop_rule_match(struct lnet_drop_rule *rule, lnet_nid_t src,
-		lnet_nid_t dst, unsigned int type, unsigned int portal)
-{
-	struct lnet_fault_attr *attr = &rule->dr_attr;
-	bool drop;
-
-	if (!lnet_fault_attr_match(attr, src, dst, type, portal))
-		return false;
-
-	/* match this rule, check drop rate now */
-	spin_lock(&rule->dr_lock);
-	if (rule->dr_drop_time) { /* time based drop */
-		unsigned long now = jiffies;
-
-		rule->dr_stat.fs_count++;
-		drop = time_after_eq(now, rule->dr_drop_time);
-		if (drop) {
-			if (time_after(now, rule->dr_time_base))
-				rule->dr_time_base = now;
-
-			rule->dr_drop_time = rule->dr_time_base +
-				prandom_u32_max(attr->u.drop.da_interval) * HZ;
-			rule->dr_time_base += attr->u.drop.da_interval * HZ;
-
-			CDEBUG(D_NET, "Drop Rule %s->%s: next drop : %lu\n",
-			       libcfs_nid2str(attr->fa_src),
-			       libcfs_nid2str(attr->fa_dst),
-			       rule->dr_drop_time);
-		}
-
-	} else { /* rate based drop */
-		drop = rule->dr_stat.fs_count++ == rule->dr_drop_at;
-
-		if (!do_div(rule->dr_stat.fs_count, attr->u.drop.da_rate)) {
-			rule->dr_drop_at = rule->dr_stat.fs_count +
-				prandom_u32_max(attr->u.drop.da_rate);
-			CDEBUG(D_NET, "Drop Rule %s->%s: next drop: %lu\n",
-			       libcfs_nid2str(attr->fa_src),
-			       libcfs_nid2str(attr->fa_dst), rule->dr_drop_at);
-		}
-	}
-
-	if (drop) { /* drop this message, update counters */
-		lnet_fault_stat_inc(&rule->dr_stat, type);
-		rule->dr_stat.u.drop.ds_dropped++;
-	}
-
-	spin_unlock(&rule->dr_lock);
-	return drop;
-}
-
-/**
- * Check if message from \a src to \a dst can match any existed drop rule
- */
-bool
-lnet_drop_rule_match(struct lnet_hdr *hdr)
-{
-	struct lnet_drop_rule *rule;
-	lnet_nid_t src = le64_to_cpu(hdr->src_nid);
-	lnet_nid_t dst = le64_to_cpu(hdr->dest_nid);
-	unsigned int typ = le32_to_cpu(hdr->type);
-	unsigned int ptl = -1;
-	bool drop = false;
-	int cpt;
-
-	/**
-	 * NB: if Portal is specified, then only PUT and GET will be
-	 * filtered by drop rule
-	 */
-	if (typ == LNET_MSG_PUT)
-		ptl = le32_to_cpu(hdr->msg.put.ptl_index);
-	else if (typ == LNET_MSG_GET)
-		ptl = le32_to_cpu(hdr->msg.get.ptl_index);
-
-	cpt = lnet_net_lock_current();
-	list_for_each_entry(rule, &the_lnet.ln_drop_rules, dr_link) {
-		drop = drop_rule_match(rule, src, dst, typ, ptl);
-		if (drop)
-			break;
-	}
-
-	lnet_net_unlock(cpt);
-	return drop;
-}
-
-/**
- * LNet Delay Simulation
- */
-/** timestamp (second) to send delayed message */
-#define msg_delay_send		 msg_ev.hdr_data
-
-struct lnet_delay_rule {
-	/** link chain on the_lnet.ln_delay_rules */
-	struct list_head	dl_link;
-	/** link chain on delay_dd.dd_sched_rules */
-	struct list_head	dl_sched_link;
-	/** attributes of this rule */
-	struct lnet_fault_attr	dl_attr;
-	/** lock to protect \a below members */
-	spinlock_t		dl_lock;
-	/** refcount of delay rule */
-	atomic_t		dl_refcount;
-	/**
-	 * the message sequence to delay, which means message is delayed when
-	 * dl_stat.fs_count == dl_delay_at
-	 */
-	unsigned long		dl_delay_at;
-	/**
-	 * seconds to delay the next message, it's exclusive with dl_delay_at
-	 */
-	unsigned long		dl_delay_time;
-	/** baseline to caculate dl_delay_time */
-	unsigned long		dl_time_base;
-	/** jiffies to send the next delayed message */
-	unsigned long		dl_msg_send;
-	/** delayed message list */
-	struct list_head	dl_msg_list;
-	/** statistic of delayed messages */
-	struct lnet_fault_stat	dl_stat;
-	/** timer to wakeup delay_daemon */
-	struct timer_list	dl_timer;
-};
-
-struct delay_daemon_data {
-	/** serialise rule add/remove */
-	struct mutex		dd_mutex;
-	/** protect rules on \a dd_sched_rules */
-	spinlock_t		dd_lock;
-	/** scheduled delay rules (by timer) */
-	struct list_head	dd_sched_rules;
-	/** daemon thread sleeps at here */
-	wait_queue_head_t	dd_waitq;
-	/** controller (lctl command) wait at here */
-	wait_queue_head_t	dd_ctl_waitq;
-	/** daemon is running */
-	unsigned int		dd_running;
-	/** daemon stopped */
-	unsigned int		dd_stopped;
-};
-
-static struct delay_daemon_data	delay_dd;
-
-static unsigned long
-round_timeout(unsigned long timeout)
-{
-	return (unsigned int)rounddown(timeout, HZ) + HZ;
-}
-
-static void
-delay_rule_decref(struct lnet_delay_rule *rule)
-{
-	if (atomic_dec_and_test(&rule->dl_refcount)) {
-		LASSERT(list_empty(&rule->dl_sched_link));
-		LASSERT(list_empty(&rule->dl_msg_list));
-		LASSERT(list_empty(&rule->dl_link));
-
-		kfree(rule);
-	}
-}
-
-/**
- * check source/destination NID, portal, message type and delay rate,
- * decide whether should delay this message or not
- */
-static bool
-delay_rule_match(struct lnet_delay_rule *rule, lnet_nid_t src,
-		 lnet_nid_t dst, unsigned int type, unsigned int portal,
-		 struct lnet_msg *msg)
-{
-	struct lnet_fault_attr *attr = &rule->dl_attr;
-	bool delay;
-
-	if (!lnet_fault_attr_match(attr, src, dst, type, portal))
-		return false;
-
-	/* match this rule, check delay rate now */
-	spin_lock(&rule->dl_lock);
-	if (rule->dl_delay_time) { /* time based delay */
-		unsigned long now = jiffies;
-
-		rule->dl_stat.fs_count++;
-		delay = time_after_eq(now, rule->dl_delay_time);
-		if (delay) {
-			if (time_after(now, rule->dl_time_base))
-				rule->dl_time_base = now;
-
-			rule->dl_delay_time = rule->dl_time_base +
-				prandom_u32_max(attr->u.delay.la_interval) * HZ;
-			rule->dl_time_base += attr->u.delay.la_interval * HZ;
-
-			CDEBUG(D_NET, "Delay Rule %s->%s: next delay : %lu\n",
-			       libcfs_nid2str(attr->fa_src),
-			       libcfs_nid2str(attr->fa_dst),
-			       rule->dl_delay_time);
-		}
-
-	} else { /* rate based delay */
-		delay = rule->dl_stat.fs_count++ == rule->dl_delay_at;
-		/* generate the next random rate sequence */
-		if (!do_div(rule->dl_stat.fs_count, attr->u.delay.la_rate)) {
-			rule->dl_delay_at = rule->dl_stat.fs_count +
-				prandom_u32_max(attr->u.delay.la_rate);
-			CDEBUG(D_NET, "Delay Rule %s->%s: next delay: %lu\n",
-			       libcfs_nid2str(attr->fa_src),
-			       libcfs_nid2str(attr->fa_dst), rule->dl_delay_at);
-		}
-	}
-
-	if (!delay) {
-		spin_unlock(&rule->dl_lock);
-		return false;
-	}
-
-	/* delay this message, update counters */
-	lnet_fault_stat_inc(&rule->dl_stat, type);
-	rule->dl_stat.u.delay.ls_delayed++;
-
-	list_add_tail(&msg->msg_list, &rule->dl_msg_list);
-	msg->msg_delay_send = round_timeout(
-			jiffies + attr->u.delay.la_latency * HZ);
-	if (rule->dl_msg_send == -1) {
-		rule->dl_msg_send = msg->msg_delay_send;
-		mod_timer(&rule->dl_timer, rule->dl_msg_send);
-	}
-
-	spin_unlock(&rule->dl_lock);
-	return true;
-}
-
-/**
- * check if \a msg can match any Delay Rule, receiving of this message
- * will be delayed if there is a match.
- */
-bool
-lnet_delay_rule_match_locked(struct lnet_hdr *hdr, struct lnet_msg *msg)
-{
-	struct lnet_delay_rule *rule;
-	lnet_nid_t src = le64_to_cpu(hdr->src_nid);
-	lnet_nid_t dst = le64_to_cpu(hdr->dest_nid);
-	unsigned int typ = le32_to_cpu(hdr->type);
-	unsigned int ptl = -1;
-
-	/* NB: called with hold of lnet_net_lock */
-
-	/**
-	 * NB: if Portal is specified, then only PUT and GET will be
-	 * filtered by delay rule
-	 */
-	if (typ == LNET_MSG_PUT)
-		ptl = le32_to_cpu(hdr->msg.put.ptl_index);
-	else if (typ == LNET_MSG_GET)
-		ptl = le32_to_cpu(hdr->msg.get.ptl_index);
-
-	list_for_each_entry(rule, &the_lnet.ln_delay_rules, dl_link) {
-		if (delay_rule_match(rule, src, dst, typ, ptl, msg))
-			return true;
-	}
-
-	return false;
-}
-
-/** check out delayed messages for send */
-static void
-delayed_msg_check(struct lnet_delay_rule *rule, bool all,
-		  struct list_head *msg_list)
-{
-	struct lnet_msg *msg;
-	struct lnet_msg *tmp;
-	unsigned long now = jiffies;
-
-	if (!all && rule->dl_msg_send > now)
-		return;
-
-	spin_lock(&rule->dl_lock);
-	list_for_each_entry_safe(msg, tmp, &rule->dl_msg_list, msg_list) {
-		if (!all && msg->msg_delay_send > now)
-			break;
-
-		msg->msg_delay_send = 0;
-		list_move_tail(&msg->msg_list, msg_list);
-	}
-
-	if (list_empty(&rule->dl_msg_list)) {
-		del_timer(&rule->dl_timer);
-		rule->dl_msg_send = -1;
-
-	} else if (!list_empty(msg_list)) {
-		/*
-		 * dequeued some timedout messages, update timer for the
-		 * next delayed message on rule
-		 */
-		msg = list_entry(rule->dl_msg_list.next,
-				 struct lnet_msg, msg_list);
-		rule->dl_msg_send = msg->msg_delay_send;
-		mod_timer(&rule->dl_timer, rule->dl_msg_send);
-	}
-	spin_unlock(&rule->dl_lock);
-}
-
-static void
-delayed_msg_process(struct list_head *msg_list, bool drop)
-{
-	struct lnet_msg	*msg;
-
-	while (!list_empty(msg_list)) {
-		struct lnet_ni *ni;
-		int cpt;
-		int rc;
-
-		msg = list_entry(msg_list->next, struct lnet_msg, msg_list);
-		LASSERT(msg->msg_rxpeer);
-
-		ni = msg->msg_rxpeer->lp_ni;
-		cpt = msg->msg_rx_cpt;
-
-		list_del_init(&msg->msg_list);
-		if (drop) {
-			rc = -ECANCELED;
-
-		} else if (!msg->msg_routing) {
-			rc = lnet_parse_local(ni, msg);
-			if (!rc)
-				continue;
-
-		} else {
-			lnet_net_lock(cpt);
-			rc = lnet_parse_forward_locked(ni, msg);
-			lnet_net_unlock(cpt);
-
-			switch (rc) {
-			case LNET_CREDIT_OK:
-				lnet_ni_recv(ni, msg->msg_private, msg, 0,
-					     0, msg->msg_len, msg->msg_len);
-				/* fall through */
-			case LNET_CREDIT_WAIT:
-				continue;
-			default: /* failures */
-				break;
-			}
-		}
-
-		lnet_drop_message(ni, cpt, msg->msg_private, msg->msg_len);
-		lnet_finalize(ni, msg, rc);
-	}
-}
-
-/**
- * Process delayed messages for scheduled rules
- * This function can either be called by delay_rule_daemon, or by lnet_finalise
- */
-void
-lnet_delay_rule_check(void)
-{
-	struct lnet_delay_rule *rule;
-	struct list_head msgs;
-
-	INIT_LIST_HEAD(&msgs);
-	while (1) {
-		if (list_empty(&delay_dd.dd_sched_rules))
-			break;
-
-		spin_lock_bh(&delay_dd.dd_lock);
-		if (list_empty(&delay_dd.dd_sched_rules)) {
-			spin_unlock_bh(&delay_dd.dd_lock);
-			break;
-		}
-
-		rule = list_entry(delay_dd.dd_sched_rules.next,
-				  struct lnet_delay_rule, dl_sched_link);
-		list_del_init(&rule->dl_sched_link);
-		spin_unlock_bh(&delay_dd.dd_lock);
-
-		delayed_msg_check(rule, false, &msgs);
-		delay_rule_decref(rule); /* -1 for delay_dd.dd_sched_rules */
-	}
-
-	if (!list_empty(&msgs))
-		delayed_msg_process(&msgs, false);
-}
-
-/** daemon thread to handle delayed messages */
-static int
-lnet_delay_rule_daemon(void *arg)
-{
-	delay_dd.dd_running = 1;
-	wake_up(&delay_dd.dd_ctl_waitq);
-
-	while (delay_dd.dd_running) {
-		wait_event_interruptible(delay_dd.dd_waitq,
-					 !delay_dd.dd_running ||
-					 !list_empty(&delay_dd.dd_sched_rules));
-		lnet_delay_rule_check();
-	}
-
-	/* in case more rules have been enqueued after my last check */
-	lnet_delay_rule_check();
-	delay_dd.dd_stopped = 1;
-	wake_up(&delay_dd.dd_ctl_waitq);
-
-	return 0;
-}
-
-static void
-delay_timer_cb(struct timer_list *t)
-{
-	struct lnet_delay_rule *rule = from_timer(rule, t, dl_timer);
-
-	spin_lock_bh(&delay_dd.dd_lock);
-	if (list_empty(&rule->dl_sched_link) && delay_dd.dd_running) {
-		atomic_inc(&rule->dl_refcount);
-		list_add_tail(&rule->dl_sched_link, &delay_dd.dd_sched_rules);
-		wake_up(&delay_dd.dd_waitq);
-	}
-	spin_unlock_bh(&delay_dd.dd_lock);
-}
-
-/**
- * Add a new delay rule to LNet
- * There is no check for duplicated delay rule, all rules will be checked for
- * incoming message.
- */
-int
-lnet_delay_rule_add(struct lnet_fault_attr *attr)
-{
-	struct lnet_delay_rule *rule;
-	int rc = 0;
-
-	if (attr->u.delay.la_rate & attr->u.delay.la_interval) {
-		CDEBUG(D_NET, "please provide either delay rate or delay interval, but not both at the same time %d/%d\n",
-		       attr->u.delay.la_rate, attr->u.delay.la_interval);
-		return -EINVAL;
-	}
-
-	if (!attr->u.delay.la_latency) {
-		CDEBUG(D_NET, "delay latency cannot be zero\n");
-		return -EINVAL;
-	}
-
-	if (lnet_fault_attr_validate(attr))
-		return -EINVAL;
-
-	rule = kzalloc(sizeof(*rule), GFP_NOFS);
-	if (!rule)
-		return -ENOMEM;
-
-	mutex_lock(&delay_dd.dd_mutex);
-	if (!delay_dd.dd_running) {
-		struct task_struct *task;
-
-		/**
-		 *  NB: although LND threads will process delayed message
-		 * in lnet_finalize, but there is no guarantee that LND
-		 * threads will be waken up if no other message needs to
-		 * be handled.
-		 * Only one daemon thread, performance is not the concern
-		 * of this simualation module.
-		 */
-		task = kthread_run(lnet_delay_rule_daemon, NULL, "lnet_dd");
-		if (IS_ERR(task)) {
-			rc = PTR_ERR(task);
-			goto failed;
-		}
-		wait_event(delay_dd.dd_ctl_waitq, delay_dd.dd_running);
-	}
-
-	timer_setup(&rule->dl_timer, delay_timer_cb, 0);
-
-	spin_lock_init(&rule->dl_lock);
-	INIT_LIST_HEAD(&rule->dl_msg_list);
-	INIT_LIST_HEAD(&rule->dl_sched_link);
-
-	rule->dl_attr = *attr;
-	if (attr->u.delay.la_interval) {
-		rule->dl_time_base = jiffies + attr->u.delay.la_interval * HZ;
-		rule->dl_delay_time = jiffies + 
-			prandom_u32_max(attr->u.delay.la_interval) * HZ;
-	} else {
-		rule->dl_delay_at = prandom_u32_max(attr->u.delay.la_rate);
-	}
-
-	rule->dl_msg_send = -1;
-
-	lnet_net_lock(LNET_LOCK_EX);
-	atomic_set(&rule->dl_refcount, 1);
-	list_add(&rule->dl_link, &the_lnet.ln_delay_rules);
-	lnet_net_unlock(LNET_LOCK_EX);
-
-	CDEBUG(D_NET, "Added delay rule: src %s, dst %s, rate %d\n",
-	       libcfs_nid2str(attr->fa_src), libcfs_nid2str(attr->fa_src),
-	       attr->u.delay.la_rate);
-
-	mutex_unlock(&delay_dd.dd_mutex);
-	return 0;
-failed:
-	mutex_unlock(&delay_dd.dd_mutex);
-	kfree(rule);
-	return rc;
-}
-
-/**
- * Remove matched Delay Rules from lnet, if \a shutdown is true or both \a src
- * and \a dst are zero, all rules will be removed, otherwise only matched rules
- * will be removed.
- * If \a src is zero, then all rules have \a dst as destination will be remove
- * If \a dst is zero, then all rules have \a src as source will be removed
- *
- * When a delay rule is removed, all delayed messages of this rule will be
- * processed immediately.
- */
-int
-lnet_delay_rule_del(lnet_nid_t src, lnet_nid_t dst, bool shutdown)
-{
-	struct lnet_delay_rule *rule;
-	struct lnet_delay_rule *tmp;
-	struct list_head rule_list;
-	struct list_head msg_list;
-	int n = 0;
-	bool cleanup;
-
-	INIT_LIST_HEAD(&rule_list);
-	INIT_LIST_HEAD(&msg_list);
-
-	if (shutdown) {
-		src = 0;
-		dst = 0;
-	}
-
-	mutex_lock(&delay_dd.dd_mutex);
-	lnet_net_lock(LNET_LOCK_EX);
-
-	list_for_each_entry_safe(rule, tmp, &the_lnet.ln_delay_rules, dl_link) {
-		if (rule->dl_attr.fa_src != src && src)
-			continue;
-
-		if (rule->dl_attr.fa_dst != dst && dst)
-			continue;
-
-		CDEBUG(D_NET, "Remove delay rule: src %s->dst: %s (1/%d, %d)\n",
-		       libcfs_nid2str(rule->dl_attr.fa_src),
-		       libcfs_nid2str(rule->dl_attr.fa_dst),
-		       rule->dl_attr.u.delay.la_rate,
-		       rule->dl_attr.u.delay.la_interval);
-		/* refcount is taken over by rule_list */
-		list_move(&rule->dl_link, &rule_list);
-	}
-
-	/* check if we need to shutdown delay_daemon */
-	cleanup = list_empty(&the_lnet.ln_delay_rules) &&
-		  !list_empty(&rule_list);
-	lnet_net_unlock(LNET_LOCK_EX);
-
-	list_for_each_entry_safe(rule, tmp, &rule_list, dl_link) {
-		list_del_init(&rule->dl_link);
-
-		del_timer_sync(&rule->dl_timer);
-		delayed_msg_check(rule, true, &msg_list);
-		delay_rule_decref(rule); /* -1 for the_lnet.ln_delay_rules */
-		n++;
-	}
-
-	if (cleanup) { /* no more delay rule, shutdown delay_daemon */
-		LASSERT(delay_dd.dd_running);
-		delay_dd.dd_running = 0;
-		wake_up(&delay_dd.dd_waitq);
-
-		while (!delay_dd.dd_stopped)
-			wait_event(delay_dd.dd_ctl_waitq, delay_dd.dd_stopped);
-	}
-	mutex_unlock(&delay_dd.dd_mutex);
-
-	if (!list_empty(&msg_list))
-		delayed_msg_process(&msg_list, shutdown);
-
-	return n;
-}
-
-/**
- * List Delay Rule at position of \a pos
- */
-int
-lnet_delay_rule_list(int pos, struct lnet_fault_attr *attr,
-		     struct lnet_fault_stat *stat)
-{
-	struct lnet_delay_rule *rule;
-	int cpt;
-	int i = 0;
-	int rc = -ENOENT;
-
-	cpt = lnet_net_lock_current();
-	list_for_each_entry(rule, &the_lnet.ln_delay_rules, dl_link) {
-		if (i++ < pos)
-			continue;
-
-		spin_lock(&rule->dl_lock);
-		*attr = rule->dl_attr;
-		*stat = rule->dl_stat;
-		spin_unlock(&rule->dl_lock);
-		rc = 0;
-		break;
-	}
-
-	lnet_net_unlock(cpt);
-	return rc;
-}
-
-/**
- * reset counters for all Delay Rules
- */
-void
-lnet_delay_rule_reset(void)
-{
-	struct lnet_delay_rule *rule;
-	int cpt;
-
-	cpt = lnet_net_lock_current();
-
-	list_for_each_entry(rule, &the_lnet.ln_delay_rules, dl_link) {
-		struct lnet_fault_attr *attr = &rule->dl_attr;
-
-		spin_lock(&rule->dl_lock);
-
-		memset(&rule->dl_stat, 0, sizeof(rule->dl_stat));
-		if (attr->u.delay.la_rate) {
-			rule->dl_delay_at = prandom_u32_max(attr->u.delay.la_rate);
-		} else {
-			rule->dl_delay_time =
-				jiffies + prandom_u32_max(
-					attr->u.delay.la_interval) * HZ;
-			rule->dl_time_base = jiffies + attr->u.delay.la_interval * HZ;
-		}
-		spin_unlock(&rule->dl_lock);
-	}
-
-	lnet_net_unlock(cpt);
-}
-
-int
-lnet_fault_ctl(int opc, struct libcfs_ioctl_data *data)
-{
-	struct lnet_fault_attr *attr;
-	struct lnet_fault_stat *stat;
-
-	attr = (struct lnet_fault_attr *)data->ioc_inlbuf1;
-
-	switch (opc) {
-	default:
-		return -EINVAL;
-
-	case LNET_CTL_DROP_ADD:
-		if (!attr)
-			return -EINVAL;
-
-		return lnet_drop_rule_add(attr);
-
-	case LNET_CTL_DROP_DEL:
-		if (!attr)
-			return -EINVAL;
-
-		data->ioc_count = lnet_drop_rule_del(attr->fa_src,
-						     attr->fa_dst);
-		return 0;
-
-	case LNET_CTL_DROP_RESET:
-		lnet_drop_rule_reset();
-		return 0;
-
-	case LNET_CTL_DROP_LIST:
-		stat = (struct lnet_fault_stat *)data->ioc_inlbuf2;
-		if (!attr || !stat)
-			return -EINVAL;
-
-		return lnet_drop_rule_list(data->ioc_count, attr, stat);
-
-	case LNET_CTL_DELAY_ADD:
-		if (!attr)
-			return -EINVAL;
-
-		return lnet_delay_rule_add(attr);
-
-	case LNET_CTL_DELAY_DEL:
-		if (!attr)
-			return -EINVAL;
-
-		data->ioc_count = lnet_delay_rule_del(attr->fa_src,
-						      attr->fa_dst, false);
-		return 0;
-
-	case LNET_CTL_DELAY_RESET:
-		lnet_delay_rule_reset();
-		return 0;
-
-	case LNET_CTL_DELAY_LIST:
-		stat = (struct lnet_fault_stat *)data->ioc_inlbuf2;
-		if (!attr || !stat)
-			return -EINVAL;
-
-		return lnet_delay_rule_list(data->ioc_count, attr, stat);
-	}
-}
-
-int
-lnet_fault_init(void)
-{
-	BUILD_BUG_ON(LNET_PUT_BIT != 1 << LNET_MSG_PUT);
-	BUILD_BUG_ON(LNET_ACK_BIT != 1 << LNET_MSG_ACK);
-	BUILD_BUG_ON(LNET_GET_BIT != 1 << LNET_MSG_GET);
-	BUILD_BUG_ON(LNET_REPLY_BIT != 1 << LNET_MSG_REPLY);
-
-	mutex_init(&delay_dd.dd_mutex);
-	spin_lock_init(&delay_dd.dd_lock);
-	init_waitqueue_head(&delay_dd.dd_waitq);
-	init_waitqueue_head(&delay_dd.dd_ctl_waitq);
-	INIT_LIST_HEAD(&delay_dd.dd_sched_rules);
-
-	return 0;
-}
-
-void
-lnet_fault_fini(void)
-{
-	lnet_drop_rule_del(0, 0);
-	lnet_delay_rule_del(0, 0, true);
-
-	LASSERT(list_empty(&the_lnet.ln_drop_rules));
-	LASSERT(list_empty(&the_lnet.ln_delay_rules));
-	LASSERT(list_empty(&delay_dd.dd_sched_rules));
-}

+ 0 - 1261
drivers/staging/lustre/lnet/lnet/nidstrings.c

@@ -1,1261 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/lnet/nidstrings.c
- *
- * Author: Phil Schwan <phil@clusterfs.com>
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include <linux/spinlock.h>
-#include <linux/slab.h>
-#include <linux/libcfs/libcfs.h>
-#include <linux/libcfs/libcfs_string.h>
-#include <uapi/linux/lnet/nidstr.h>
-
-/* max value for numeric network address */
-#define MAX_NUMERIC_VALUE 0xffffffff
-
-#define IPSTRING_LENGTH 16
-
-/* CAVEAT VENDITOR! Keep the canonical string representation of nets/nids
- * consistent in all conversion functions.  Some code fragments are copied
- * around for the sake of clarity...
- */
-
-/* CAVEAT EMPTOR! Racey temporary buffer allocation!
- * Choose the number of nidstrings to support the MAXIMUM expected number of
- * concurrent users.  If there are more, the returned string will be volatile.
- * NB this number must allow for a process to be descheduled for a timeslice
- * between getting its string and using it.
- */
-
-static char      libcfs_nidstrings[LNET_NIDSTR_COUNT][LNET_NIDSTR_SIZE];
-static int       libcfs_nidstring_idx;
-
-static DEFINE_SPINLOCK(libcfs_nidstring_lock);
-
-static struct netstrfns *libcfs_namenum2netstrfns(const char *name);
-
-char *
-libcfs_next_nidstring(void)
-{
-	char *str;
-	unsigned long flags;
-
-	spin_lock_irqsave(&libcfs_nidstring_lock, flags);
-
-	str = libcfs_nidstrings[libcfs_nidstring_idx++];
-	if (libcfs_nidstring_idx == ARRAY_SIZE(libcfs_nidstrings))
-		libcfs_nidstring_idx = 0;
-
-	spin_unlock_irqrestore(&libcfs_nidstring_lock, flags);
-	return str;
-}
-EXPORT_SYMBOL(libcfs_next_nidstring);
-
-/**
- * Nid range list syntax.
- * \verbatim
- *
- * <nidlist>         :== <nidrange> [ ' ' <nidrange> ]
- * <nidrange>        :== <addrrange> '@' <net>
- * <addrrange>       :== '*' |
- *                       <ipaddr_range> |
- *			 <cfs_expr_list>
- * <ipaddr_range>    :== <cfs_expr_list>.<cfs_expr_list>.<cfs_expr_list>.
- *			 <cfs_expr_list>
- * <cfs_expr_list>   :== <number> |
- *                       <expr_list>
- * <expr_list>       :== '[' <range_expr> [ ',' <range_expr>] ']'
- * <range_expr>      :== <number> |
- *                       <number> '-' <number> |
- *                       <number> '-' <number> '/' <number>
- * <net>             :== <netname> | <netname><number>
- * <netname>         :== "lo" | "tcp" | "o2ib" | "cib" | "openib" | "iib" |
- *                       "vib" | "ra" | "elan" | "mx" | "ptl"
- * \endverbatim
- */
-
-/**
- * Structure to represent \<nidrange\> token of the syntax.
- *
- * One of this is created for each \<net\> parsed.
- */
-struct nidrange {
-	/**
-	 * Link to list of this structures which is built on nid range
-	 * list parsing.
-	 */
-	struct list_head nr_link;
-	/**
-	 * List head for addrrange::ar_link.
-	 */
-	struct list_head nr_addrranges;
-	/**
-	 * Flag indicating that *@<net> is found.
-	 */
-	int nr_all;
-	/**
-	 * Pointer to corresponding element of libcfs_netstrfns.
-	 */
-	struct netstrfns *nr_netstrfns;
-	/**
-	 * Number of network. E.g. 5 if \<net\> is "elan5".
-	 */
-	int nr_netnum;
-};
-
-/**
- * Structure to represent \<addrrange\> token of the syntax.
- */
-struct addrrange {
-	/**
-	 * Link to nidrange::nr_addrranges.
-	 */
-	struct list_head ar_link;
-	/**
-	 * List head for cfs_expr_list::el_list.
-	 */
-	struct list_head ar_numaddr_ranges;
-};
-
-/**
- * Parses \<addrrange\> token on the syntax.
- *
- * Allocates struct addrrange and links to \a nidrange via
- * (nidrange::nr_addrranges)
- *
- * \retval 0 if \a src parses to '*' | \<ipaddr_range\> | \<cfs_expr_list\>
- * \retval -errno otherwise
- */
-static int
-parse_addrange(const struct cfs_lstr *src, struct nidrange *nidrange)
-{
-	struct addrrange *addrrange;
-
-	if (src->ls_len == 1 && src->ls_str[0] == '*') {
-		nidrange->nr_all = 1;
-		return 0;
-	}
-
-	addrrange = kzalloc(sizeof(struct addrrange), GFP_NOFS);
-	if (!addrrange)
-		return -ENOMEM;
-	list_add_tail(&addrrange->ar_link, &nidrange->nr_addrranges);
-	INIT_LIST_HEAD(&addrrange->ar_numaddr_ranges);
-
-	return nidrange->nr_netstrfns->nf_parse_addrlist(src->ls_str,
-						src->ls_len,
-						&addrrange->ar_numaddr_ranges);
-}
-
-/**
- * Finds or creates struct nidrange.
- *
- * Checks if \a src is a valid network name, looks for corresponding
- * nidrange on the ist of nidranges (\a nidlist), creates new struct
- * nidrange if it is not found.
- *
- * \retval pointer to struct nidrange matching network specified via \a src
- * \retval NULL if \a src does not match any network
- */
-static struct nidrange *
-add_nidrange(const struct cfs_lstr *src,
-	     struct list_head *nidlist)
-{
-	struct netstrfns *nf;
-	struct nidrange *nr;
-	int endlen;
-	unsigned int netnum;
-
-	if (src->ls_len >= LNET_NIDSTR_SIZE)
-		return NULL;
-
-	nf = libcfs_namenum2netstrfns(src->ls_str);
-	if (!nf)
-		return NULL;
-	endlen = src->ls_len - strlen(nf->nf_name);
-	if (!endlen)
-		/* network name only, e.g. "elan" or "tcp" */
-		netnum = 0;
-	else {
-		/*
-		 * e.g. "elan25" or "tcp23", refuse to parse if
-		 * network name is not appended with decimal or
-		 * hexadecimal number
-		 */
-		if (!cfs_str2num_check(src->ls_str + strlen(nf->nf_name),
-				       endlen, &netnum, 0, MAX_NUMERIC_VALUE))
-			return NULL;
-	}
-
-	list_for_each_entry(nr, nidlist, nr_link) {
-		if (nr->nr_netstrfns != nf)
-			continue;
-		if (nr->nr_netnum != netnum)
-			continue;
-		return nr;
-	}
-
-	nr = kzalloc(sizeof(struct nidrange), GFP_NOFS);
-	if (!nr)
-		return NULL;
-	list_add_tail(&nr->nr_link, nidlist);
-	INIT_LIST_HEAD(&nr->nr_addrranges);
-	nr->nr_netstrfns = nf;
-	nr->nr_all = 0;
-	nr->nr_netnum = netnum;
-
-	return nr;
-}
-
-/**
- * Parses \<nidrange\> token of the syntax.
- *
- * \retval 1 if \a src parses to \<addrrange\> '@' \<net\>
- * \retval 0 otherwise
- */
-static int
-parse_nidrange(struct cfs_lstr *src, struct list_head *nidlist)
-{
-	struct cfs_lstr addrrange;
-	struct cfs_lstr net;
-	struct nidrange *nr;
-
-	if (!cfs_gettok(src, '@', &addrrange))
-		goto failed;
-
-	if (!cfs_gettok(src, '@', &net) || src->ls_str)
-		goto failed;
-
-	nr = add_nidrange(&net, nidlist);
-	if (!nr)
-		goto failed;
-
-	if (parse_addrange(&addrrange, nr))
-		goto failed;
-
-	return 1;
-failed:
-	return 0;
-}
-
-/**
- * Frees addrrange structures of \a list.
- *
- * For each struct addrrange structure found on \a list it frees
- * cfs_expr_list list attached to it and frees the addrrange itself.
- *
- * \retval none
- */
-static void
-free_addrranges(struct list_head *list)
-{
-	while (!list_empty(list)) {
-		struct addrrange *ar;
-
-		ar = list_entry(list->next, struct addrrange, ar_link);
-
-		cfs_expr_list_free_list(&ar->ar_numaddr_ranges);
-		list_del(&ar->ar_link);
-		kfree(ar);
-	}
-}
-
-/**
- * Frees nidrange strutures of \a list.
- *
- * For each struct nidrange structure found on \a list it frees
- * addrrange list attached to it and frees the nidrange itself.
- *
- * \retval none
- */
-void
-cfs_free_nidlist(struct list_head *list)
-{
-	struct list_head *pos, *next;
-	struct nidrange *nr;
-
-	list_for_each_safe(pos, next, list) {
-		nr = list_entry(pos, struct nidrange, nr_link);
-		free_addrranges(&nr->nr_addrranges);
-		list_del(pos);
-		kfree(nr);
-	}
-}
-EXPORT_SYMBOL(cfs_free_nidlist);
-
-/**
- * Parses nid range list.
- *
- * Parses with rigorous syntax and overflow checking \a str into
- * \<nidrange\> [ ' ' \<nidrange\> ], compiles \a str into set of
- * structures and links that structure to \a nidlist. The resulting
- * list can be used to match a NID againts set of NIDS defined by \a
- * str.
- * \see cfs_match_nid
- *
- * \retval 1 on success
- * \retval 0 otherwise
- */
-int
-cfs_parse_nidlist(char *str, int len, struct list_head *nidlist)
-{
-	struct cfs_lstr src;
-	struct cfs_lstr res;
-	int rc;
-
-	src.ls_str = str;
-	src.ls_len = len;
-	INIT_LIST_HEAD(nidlist);
-	while (src.ls_str) {
-		rc = cfs_gettok(&src, ' ', &res);
-		if (!rc) {
-			cfs_free_nidlist(nidlist);
-			return 0;
-		}
-		rc = parse_nidrange(&res, nidlist);
-		if (!rc) {
-			cfs_free_nidlist(nidlist);
-			return 0;
-		}
-	}
-	return 1;
-}
-EXPORT_SYMBOL(cfs_parse_nidlist);
-
-/**
- * Matches a nid (\a nid) against the compiled list of nidranges (\a nidlist).
- *
- * \see cfs_parse_nidlist()
- *
- * \retval 1 on match
- * \retval 0  otherwises
- */
-int cfs_match_nid(lnet_nid_t nid, struct list_head *nidlist)
-{
-	struct nidrange *nr;
-	struct addrrange *ar;
-
-	list_for_each_entry(nr, nidlist, nr_link) {
-		if (nr->nr_netstrfns->nf_type != LNET_NETTYP(LNET_NIDNET(nid)))
-			continue;
-		if (nr->nr_netnum != LNET_NETNUM(LNET_NIDNET(nid)))
-			continue;
-		if (nr->nr_all)
-			return 1;
-		list_for_each_entry(ar, &nr->nr_addrranges, ar_link)
-			if (nr->nr_netstrfns->nf_match_addr(LNET_NIDADDR(nid),
-							    &ar->ar_numaddr_ranges))
-				return 1;
-	}
-	return 0;
-}
-EXPORT_SYMBOL(cfs_match_nid);
-
-/**
- * Print the network part of the nidrange \a nr into the specified \a buffer.
- *
- * \retval number of characters written
- */
-static int
-cfs_print_network(char *buffer, int count, struct nidrange *nr)
-{
-	struct netstrfns *nf = nr->nr_netstrfns;
-
-	if (!nr->nr_netnum)
-		return scnprintf(buffer, count, "@%s", nf->nf_name);
-	else
-		return scnprintf(buffer, count, "@%s%u",
-				 nf->nf_name, nr->nr_netnum);
-}
-
-/**
- * Print a list of addrrange (\a addrranges) into the specified \a buffer.
- * At max \a count characters can be printed into \a buffer.
- *
- * \retval number of characters written
- */
-static int
-cfs_print_addrranges(char *buffer, int count, struct list_head *addrranges,
-		     struct nidrange *nr)
-{
-	int i = 0;
-	struct addrrange *ar;
-	struct netstrfns *nf = nr->nr_netstrfns;
-
-	list_for_each_entry(ar, addrranges, ar_link) {
-		if (i)
-			i += scnprintf(buffer + i, count - i, " ");
-		i += nf->nf_print_addrlist(buffer + i, count - i,
-					   &ar->ar_numaddr_ranges);
-		i += cfs_print_network(buffer + i, count - i, nr);
-	}
-	return i;
-}
-
-/**
- * Print a list of nidranges (\a nidlist) into the specified \a buffer.
- * At max \a count characters can be printed into \a buffer.
- * Nidranges are separated by a space character.
- *
- * \retval number of characters written
- */
-int cfs_print_nidlist(char *buffer, int count, struct list_head *nidlist)
-{
-	int i = 0;
-	struct nidrange *nr;
-
-	if (count <= 0)
-		return 0;
-
-	list_for_each_entry(nr, nidlist, nr_link) {
-		if (i)
-			i += scnprintf(buffer + i, count - i, " ");
-
-		if (nr->nr_all) {
-			LASSERT(list_empty(&nr->nr_addrranges));
-			i += scnprintf(buffer + i, count - i, "*");
-			i += cfs_print_network(buffer + i, count - i, nr);
-		} else {
-			i += cfs_print_addrranges(buffer + i, count - i,
-						  &nr->nr_addrranges, nr);
-		}
-	}
-	return i;
-}
-EXPORT_SYMBOL(cfs_print_nidlist);
-
-/**
- * Determines minimum and maximum addresses for a single
- * numeric address range
- *
- * \param	ar
- * \param	min_nid
- * \param	max_nid
- */
-static void cfs_ip_ar_min_max(struct addrrange *ar, __u32 *min_nid,
-			      __u32 *max_nid)
-{
-	struct cfs_expr_list *el;
-	struct cfs_range_expr *re;
-	__u32 tmp_ip_addr = 0;
-	unsigned int min_ip[4] = {0};
-	unsigned int max_ip[4] = {0};
-	int re_count = 0;
-
-	list_for_each_entry(el, &ar->ar_numaddr_ranges, el_link) {
-		list_for_each_entry(re, &el->el_exprs, re_link) {
-			min_ip[re_count] = re->re_lo;
-			max_ip[re_count] = re->re_hi;
-			re_count++;
-		}
-	}
-
-	tmp_ip_addr = ((min_ip[0] << 24) | (min_ip[1] << 16) |
-		       (min_ip[2] << 8) | min_ip[3]);
-
-	if (min_nid)
-		*min_nid = tmp_ip_addr;
-
-	tmp_ip_addr = ((max_ip[0] << 24) | (max_ip[1] << 16) |
-		       (max_ip[2] << 8) | max_ip[3]);
-
-	if (max_nid)
-		*max_nid = tmp_ip_addr;
-}
-
-/**
- * Determines minimum and maximum addresses for a single
- * numeric address range
- *
- * \param	ar
- * \param	min_nid
- * \param	max_nid
- */
-static void cfs_num_ar_min_max(struct addrrange *ar, __u32 *min_nid,
-			       __u32 *max_nid)
-{
-	struct cfs_expr_list *el;
-	struct cfs_range_expr *re;
-	unsigned int min_addr = 0;
-	unsigned int max_addr = 0;
-
-	list_for_each_entry(el, &ar->ar_numaddr_ranges, el_link) {
-		list_for_each_entry(re, &el->el_exprs, re_link) {
-			if (re->re_lo < min_addr || !min_addr)
-				min_addr = re->re_lo;
-			if (re->re_hi > max_addr)
-				max_addr = re->re_hi;
-		}
-	}
-
-	if (min_nid)
-		*min_nid = min_addr;
-	if (max_nid)
-		*max_nid = max_addr;
-}
-
-/**
- * Determines whether an expression list in an nidrange contains exactly
- * one contiguous address range. Calls the correct netstrfns for the LND
- *
- * \param	*nidlist
- *
- * \retval	true if contiguous
- * \retval	false if not contiguous
- */
-bool cfs_nidrange_is_contiguous(struct list_head *nidlist)
-{
-	struct nidrange *nr;
-	struct netstrfns *nf = NULL;
-	char *lndname = NULL;
-	int netnum = -1;
-
-	list_for_each_entry(nr, nidlist, nr_link) {
-		nf = nr->nr_netstrfns;
-		if (!lndname)
-			lndname = nf->nf_name;
-		if (netnum == -1)
-			netnum = nr->nr_netnum;
-
-		if (strcmp(lndname, nf->nf_name) ||
-		    netnum != nr->nr_netnum)
-			return false;
-	}
-
-	if (!nf)
-		return false;
-
-	if (!nf->nf_is_contiguous(nidlist))
-		return false;
-
-	return true;
-}
-EXPORT_SYMBOL(cfs_nidrange_is_contiguous);
-
-/**
- * Determines whether an expression list in an num nidrange contains exactly
- * one contiguous address range.
- *
- * \param	*nidlist
- *
- * \retval	true if contiguous
- * \retval	false if not contiguous
- */
-static bool cfs_num_is_contiguous(struct list_head *nidlist)
-{
-	struct nidrange *nr;
-	struct addrrange *ar;
-	struct cfs_expr_list *el;
-	struct cfs_range_expr *re;
-	int last_hi = 0;
-	__u32 last_end_nid = 0;
-	__u32 current_start_nid = 0;
-	__u32 current_end_nid = 0;
-
-	list_for_each_entry(nr, nidlist, nr_link) {
-		list_for_each_entry(ar, &nr->nr_addrranges, ar_link) {
-			cfs_num_ar_min_max(ar, &current_start_nid,
-					   &current_end_nid);
-			if (last_end_nid &&
-			    (current_start_nid - last_end_nid != 1))
-				return false;
-			last_end_nid = current_end_nid;
-			list_for_each_entry(el, &ar->ar_numaddr_ranges,
-					    el_link) {
-				list_for_each_entry(re, &el->el_exprs,
-						    re_link) {
-					if (re->re_stride > 1)
-						return false;
-					else if (last_hi &&
-						 re->re_hi - last_hi != 1)
-						return false;
-					last_hi = re->re_hi;
-				}
-			}
-		}
-	}
-
-	return true;
-}
-
-/**
- * Determines whether an expression list in an ip nidrange contains exactly
- * one contiguous address range.
- *
- * \param	*nidlist
- *
- * \retval	true if contiguous
- * \retval	false if not contiguous
- */
-static bool cfs_ip_is_contiguous(struct list_head *nidlist)
-{
-	struct nidrange *nr;
-	struct addrrange *ar;
-	struct cfs_expr_list *el;
-	struct cfs_range_expr *re;
-	int expr_count;
-	int last_hi = 255;
-	int last_diff = 0;
-	__u32 last_end_nid = 0;
-	__u32 current_start_nid = 0;
-	__u32 current_end_nid = 0;
-
-	list_for_each_entry(nr, nidlist, nr_link) {
-		list_for_each_entry(ar, &nr->nr_addrranges, ar_link) {
-			last_hi = 255;
-			last_diff = 0;
-			cfs_ip_ar_min_max(ar, &current_start_nid,
-					  &current_end_nid);
-			if (last_end_nid &&
-			    (current_start_nid - last_end_nid != 1))
-				return false;
-			last_end_nid = current_end_nid;
-			list_for_each_entry(el, &ar->ar_numaddr_ranges,
-					    el_link) {
-				expr_count = 0;
-				list_for_each_entry(re, &el->el_exprs,
-						    re_link) {
-					expr_count++;
-					if (re->re_stride > 1 ||
-					    (last_diff > 0 && last_hi != 255) ||
-					    (last_diff > 0 && last_hi == 255 &&
-					     re->re_lo > 0))
-						return false;
-					last_hi = re->re_hi;
-					last_diff = re->re_hi - re->re_lo;
-				}
-			}
-		}
-	}
-
-	return true;
-}
-
-/**
- * Takes a linked list of nidrange expressions, determines the minimum
- * and maximum nid and creates appropriate nid structures
- *
- * \param	*nidlist
- * \param	*min_nid
- * \param	*max_nid
- */
-void cfs_nidrange_find_min_max(struct list_head *nidlist, char *min_nid,
-			       char *max_nid, size_t nidstr_length)
-{
-	struct nidrange *nr;
-	struct netstrfns *nf = NULL;
-	int netnum = -1;
-	__u32 min_addr;
-	__u32 max_addr;
-	char *lndname = NULL;
-	char min_addr_str[IPSTRING_LENGTH];
-	char max_addr_str[IPSTRING_LENGTH];
-
-	list_for_each_entry(nr, nidlist, nr_link) {
-		nf = nr->nr_netstrfns;
-		lndname = nf->nf_name;
-		if (netnum == -1)
-			netnum = nr->nr_netnum;
-
-		nf->nf_min_max(nidlist, &min_addr, &max_addr);
-	}
-	nf->nf_addr2str(min_addr, min_addr_str, sizeof(min_addr_str));
-	nf->nf_addr2str(max_addr, max_addr_str, sizeof(max_addr_str));
-
-	snprintf(min_nid, nidstr_length, "%s@%s%d", min_addr_str, lndname,
-		 netnum);
-	snprintf(max_nid, nidstr_length, "%s@%s%d", max_addr_str, lndname,
-		 netnum);
-}
-EXPORT_SYMBOL(cfs_nidrange_find_min_max);
-
-/**
- * Determines the min and max NID values for num LNDs
- *
- * \param	*nidlist
- * \param	*min_nid
- * \param	*max_nid
- */
-static void cfs_num_min_max(struct list_head *nidlist, __u32 *min_nid,
-			    __u32 *max_nid)
-{
-	struct nidrange	*nr;
-	struct addrrange *ar;
-	unsigned int tmp_min_addr = 0;
-	unsigned int tmp_max_addr = 0;
-	unsigned int min_addr = 0;
-	unsigned int max_addr = 0;
-
-	list_for_each_entry(nr, nidlist, nr_link) {
-		list_for_each_entry(ar, &nr->nr_addrranges, ar_link) {
-			cfs_num_ar_min_max(ar, &tmp_min_addr,
-					   &tmp_max_addr);
-			if (tmp_min_addr < min_addr || !min_addr)
-				min_addr = tmp_min_addr;
-			if (tmp_max_addr > max_addr)
-				max_addr = tmp_min_addr;
-		}
-	}
-	*max_nid = max_addr;
-	*min_nid = min_addr;
-}
-
-/**
- * Takes an nidlist and determines the minimum and maximum
- * ip addresses.
- *
- * \param	*nidlist
- * \param	*min_nid
- * \param	*max_nid
- */
-static void cfs_ip_min_max(struct list_head *nidlist, __u32 *min_nid,
-			   __u32 *max_nid)
-{
-	struct nidrange *nr;
-	struct addrrange *ar;
-	__u32 tmp_min_ip_addr = 0;
-	__u32 tmp_max_ip_addr = 0;
-	__u32 min_ip_addr = 0;
-	__u32 max_ip_addr = 0;
-
-	list_for_each_entry(nr, nidlist, nr_link) {
-		list_for_each_entry(ar, &nr->nr_addrranges, ar_link) {
-			cfs_ip_ar_min_max(ar, &tmp_min_ip_addr,
-					  &tmp_max_ip_addr);
-			if (tmp_min_ip_addr < min_ip_addr || !min_ip_addr)
-				min_ip_addr = tmp_min_ip_addr;
-			if (tmp_max_ip_addr > max_ip_addr)
-				max_ip_addr = tmp_max_ip_addr;
-		}
-	}
-
-	if (min_nid)
-		*min_nid = min_ip_addr;
-	if (max_nid)
-		*max_nid = max_ip_addr;
-}
-
-static int
-libcfs_lo_str2addr(const char *str, int nob, __u32 *addr)
-{
-	*addr = 0;
-	return 1;
-}
-
-static void
-libcfs_ip_addr2str(__u32 addr, char *str, size_t size)
-{
-	snprintf(str, size, "%u.%u.%u.%u",
-		 (addr >> 24) & 0xff, (addr >> 16) & 0xff,
-		 (addr >> 8) & 0xff, addr & 0xff);
-}
-
-/*
- * CAVEAT EMPTOR XscanfX
- * I use "%n" at the end of a sscanf format to detect trailing junk.  However
- * sscanf may return immediately if it sees the terminating '0' in a string, so
- * I initialise the %n variable to the expected length.  If sscanf sets it;
- * fine, if it doesn't, then the scan ended at the end of the string, which is
- * fine too :)
- */
-static int
-libcfs_ip_str2addr(const char *str, int nob, __u32 *addr)
-{
-	unsigned int	a;
-	unsigned int	b;
-	unsigned int	c;
-	unsigned int	d;
-	int		n = nob; /* XscanfX */
-
-	/* numeric IP? */
-	if (sscanf(str, "%u.%u.%u.%u%n", &a, &b, &c, &d, &n) >= 4 &&
-	    n == nob &&
-	    !(a & ~0xff) && !(b & ~0xff) &&
-	    !(c & ~0xff) && !(d & ~0xff)) {
-		*addr = ((a << 24) | (b << 16) | (c << 8) | d);
-		return 1;
-	}
-
-	return 0;
-}
-
-/* Used by lnet/config.c so it can't be static */
-int
-cfs_ip_addr_parse(char *str, int len, struct list_head *list)
-{
-	struct cfs_expr_list *el;
-	struct cfs_lstr src;
-	int rc;
-	int i;
-
-	src.ls_str = str;
-	src.ls_len = len;
-	i = 0;
-
-	while (src.ls_str) {
-		struct cfs_lstr res;
-
-		if (!cfs_gettok(&src, '.', &res)) {
-			rc = -EINVAL;
-			goto out;
-		}
-
-		rc = cfs_expr_list_parse(res.ls_str, res.ls_len, 0, 255, &el);
-		if (rc)
-			goto out;
-
-		list_add_tail(&el->el_link, list);
-		i++;
-	}
-
-	if (i == 4)
-		return 0;
-
-	rc = -EINVAL;
-out:
-	cfs_expr_list_free_list(list);
-
-	return rc;
-}
-
-static int
-libcfs_ip_addr_range_print(char *buffer, int count, struct list_head *list)
-{
-	int i = 0, j = 0;
-	struct cfs_expr_list *el;
-
-	list_for_each_entry(el, list, el_link) {
-		LASSERT(j++ < 4);
-		if (i)
-			i += scnprintf(buffer + i, count - i, ".");
-		i += cfs_expr_list_print(buffer + i, count - i, el);
-	}
-	return i;
-}
-
-/**
- * Matches address (\a addr) against address set encoded in \a list.
- *
- * \retval 1 if \a addr matches
- * \retval 0 otherwise
- */
-int
-cfs_ip_addr_match(__u32 addr, struct list_head *list)
-{
-	struct cfs_expr_list *el;
-	int i = 0;
-
-	list_for_each_entry_reverse(el, list, el_link) {
-		if (!cfs_expr_list_match(addr & 0xff, el))
-			return 0;
-		addr >>= 8;
-		i++;
-	}
-
-	return i == 4;
-}
-
-static void
-libcfs_decnum_addr2str(__u32 addr, char *str, size_t size)
-{
-	snprintf(str, size, "%u", addr);
-}
-
-static int
-libcfs_num_str2addr(const char *str, int nob, __u32 *addr)
-{
-	int     n;
-
-	n = nob;
-	if (sscanf(str, "0x%x%n", addr, &n) >= 1 && n == nob)
-		return 1;
-
-	n = nob;
-	if (sscanf(str, "0X%x%n", addr, &n) >= 1 && n == nob)
-		return 1;
-
-	n = nob;
-	if (sscanf(str, "%u%n", addr, &n) >= 1 && n == nob)
-		return 1;
-
-	return 0;
-}
-
-/**
- * Nf_parse_addrlist method for networks using numeric addresses.
- *
- * Examples of such networks are gm and elan.
- *
- * \retval 0 if \a str parsed to numeric address
- * \retval errno otherwise
- */
-static int
-libcfs_num_parse(char *str, int len, struct list_head *list)
-{
-	struct cfs_expr_list *el;
-	int	rc;
-
-	rc = cfs_expr_list_parse(str, len, 0, MAX_NUMERIC_VALUE, &el);
-	if (!rc)
-		list_add_tail(&el->el_link, list);
-
-	return rc;
-}
-
-static int
-libcfs_num_addr_range_print(char *buffer, int count, struct list_head *list)
-{
-	int i = 0, j = 0;
-	struct cfs_expr_list *el;
-
-	list_for_each_entry(el, list, el_link) {
-		LASSERT(j++ < 1);
-		i += cfs_expr_list_print(buffer + i, count - i, el);
-	}
-	return i;
-}
-
-/*
- * Nf_match_addr method for networks using numeric addresses
- *
- * \retval 1 on match
- * \retval 0 otherwise
- */
-static int
-libcfs_num_match(__u32 addr, struct list_head *numaddr)
-{
-	struct cfs_expr_list *el;
-
-	LASSERT(!list_empty(numaddr));
-	el = list_entry(numaddr->next, struct cfs_expr_list, el_link);
-
-	return cfs_expr_list_match(addr, el);
-}
-
-static struct netstrfns libcfs_netstrfns[] = {
-	{ .nf_type		= LOLND,
-	  .nf_name		= "lo",
-	  .nf_modname		= "klolnd",
-	  .nf_addr2str		= libcfs_decnum_addr2str,
-	  .nf_str2addr		= libcfs_lo_str2addr,
-	  .nf_parse_addrlist	= libcfs_num_parse,
-	  .nf_print_addrlist	= libcfs_num_addr_range_print,
-	  .nf_match_addr	= libcfs_num_match,
-	  .nf_is_contiguous	= cfs_num_is_contiguous,
-	  .nf_min_max		= cfs_num_min_max },
-	{ .nf_type		= SOCKLND,
-	  .nf_name		= "tcp",
-	  .nf_modname		= "ksocklnd",
-	  .nf_addr2str		= libcfs_ip_addr2str,
-	  .nf_str2addr		= libcfs_ip_str2addr,
-	  .nf_parse_addrlist	= cfs_ip_addr_parse,
-	  .nf_print_addrlist	= libcfs_ip_addr_range_print,
-	  .nf_match_addr	= cfs_ip_addr_match,
-	  .nf_is_contiguous	= cfs_ip_is_contiguous,
-	  .nf_min_max		= cfs_ip_min_max },
-	{ .nf_type		= O2IBLND,
-	  .nf_name		= "o2ib",
-	  .nf_modname		= "ko2iblnd",
-	  .nf_addr2str		= libcfs_ip_addr2str,
-	  .nf_str2addr		= libcfs_ip_str2addr,
-	  .nf_parse_addrlist	= cfs_ip_addr_parse,
-	  .nf_print_addrlist	= libcfs_ip_addr_range_print,
-	  .nf_match_addr	= cfs_ip_addr_match,
-	  .nf_is_contiguous	= cfs_ip_is_contiguous,
-	  .nf_min_max		= cfs_ip_min_max },
-	{ .nf_type		= GNILND,
-	  .nf_name		= "gni",
-	  .nf_modname		= "kgnilnd",
-	  .nf_addr2str		= libcfs_decnum_addr2str,
-	  .nf_str2addr		= libcfs_num_str2addr,
-	  .nf_parse_addrlist	= libcfs_num_parse,
-	  .nf_print_addrlist	= libcfs_num_addr_range_print,
-	  .nf_match_addr	= libcfs_num_match,
-	  .nf_is_contiguous	= cfs_num_is_contiguous,
-	  .nf_min_max		= cfs_num_min_max },
-	{ .nf_type		= GNIIPLND,
-	  .nf_name		= "gip",
-	  .nf_modname		= "kgnilnd",
-	  .nf_addr2str		= libcfs_ip_addr2str,
-	  .nf_str2addr		= libcfs_ip_str2addr,
-	  .nf_parse_addrlist	= cfs_ip_addr_parse,
-	  .nf_print_addrlist	= libcfs_ip_addr_range_print,
-	  .nf_match_addr	= cfs_ip_addr_match,
-	  .nf_is_contiguous	= cfs_ip_is_contiguous,
-	  .nf_min_max		= cfs_ip_min_max },
-};
-
-static const size_t libcfs_nnetstrfns = ARRAY_SIZE(libcfs_netstrfns);
-
-static struct netstrfns *
-libcfs_lnd2netstrfns(__u32 lnd)
-{
-	int i;
-
-	for (i = 0; i < libcfs_nnetstrfns; i++)
-		if (lnd == libcfs_netstrfns[i].nf_type)
-			return &libcfs_netstrfns[i];
-
-	return NULL;
-}
-
-static struct netstrfns *
-libcfs_namenum2netstrfns(const char *name)
-{
-	struct netstrfns *nf;
-	int i;
-
-	for (i = 0; i < libcfs_nnetstrfns; i++) {
-		nf = &libcfs_netstrfns[i];
-		if (!strncmp(name, nf->nf_name, strlen(nf->nf_name)))
-			return nf;
-	}
-	return NULL;
-}
-
-static struct netstrfns *
-libcfs_name2netstrfns(const char *name)
-{
-	int    i;
-
-	for (i = 0; i < libcfs_nnetstrfns; i++)
-		if (!strcmp(libcfs_netstrfns[i].nf_name, name))
-			return &libcfs_netstrfns[i];
-
-	return NULL;
-}
-
-int
-libcfs_isknown_lnd(__u32 lnd)
-{
-	return !!libcfs_lnd2netstrfns(lnd);
-}
-EXPORT_SYMBOL(libcfs_isknown_lnd);
-
-char *
-libcfs_lnd2modname(__u32 lnd)
-{
-	struct netstrfns *nf = libcfs_lnd2netstrfns(lnd);
-
-	return nf ? nf->nf_modname : NULL;
-}
-EXPORT_SYMBOL(libcfs_lnd2modname);
-
-int
-libcfs_str2lnd(const char *str)
-{
-	struct netstrfns *nf = libcfs_name2netstrfns(str);
-
-	if (nf)
-		return nf->nf_type;
-
-	return -ENXIO;
-}
-EXPORT_SYMBOL(libcfs_str2lnd);
-
-char *
-libcfs_lnd2str_r(__u32 lnd, char *buf, size_t buf_size)
-{
-	struct netstrfns *nf;
-
-	nf = libcfs_lnd2netstrfns(lnd);
-	if (!nf)
-		snprintf(buf, buf_size, "?%u?", lnd);
-	else
-		snprintf(buf, buf_size, "%s", nf->nf_name);
-
-	return buf;
-}
-EXPORT_SYMBOL(libcfs_lnd2str_r);
-
-char *
-libcfs_net2str_r(__u32 net, char *buf, size_t buf_size)
-{
-	__u32 nnum = LNET_NETNUM(net);
-	__u32 lnd = LNET_NETTYP(net);
-	struct netstrfns *nf;
-
-	nf = libcfs_lnd2netstrfns(lnd);
-	if (!nf)
-		snprintf(buf, buf_size, "<%u:%u>", lnd, nnum);
-	else if (!nnum)
-		snprintf(buf, buf_size, "%s", nf->nf_name);
-	else
-		snprintf(buf, buf_size, "%s%u", nf->nf_name, nnum);
-
-	return buf;
-}
-EXPORT_SYMBOL(libcfs_net2str_r);
-
-char *
-libcfs_nid2str_r(lnet_nid_t nid, char *buf, size_t buf_size)
-{
-	__u32 addr = LNET_NIDADDR(nid);
-	__u32 net = LNET_NIDNET(nid);
-	__u32 nnum = LNET_NETNUM(net);
-	__u32 lnd = LNET_NETTYP(net);
-	struct netstrfns *nf;
-
-	if (nid == LNET_NID_ANY) {
-		strncpy(buf, "<?>", buf_size);
-		buf[buf_size - 1] = '\0';
-		return buf;
-	}
-
-	nf = libcfs_lnd2netstrfns(lnd);
-	if (!nf) {
-		snprintf(buf, buf_size, "%x@<%u:%u>", addr, lnd, nnum);
-	} else {
-		size_t addr_len;
-
-		nf->nf_addr2str(addr, buf, buf_size);
-		addr_len = strlen(buf);
-		if (!nnum)
-			snprintf(buf + addr_len, buf_size - addr_len, "@%s",
-				 nf->nf_name);
-		else
-			snprintf(buf + addr_len, buf_size - addr_len, "@%s%u",
-				 nf->nf_name, nnum);
-	}
-
-	return buf;
-}
-EXPORT_SYMBOL(libcfs_nid2str_r);
-
-static struct netstrfns *
-libcfs_str2net_internal(const char *str, __u32 *net)
-{
-	struct netstrfns *nf = NULL;
-	int nob;
-	unsigned int netnum;
-	int i;
-
-	for (i = 0; i < libcfs_nnetstrfns; i++) {
-		nf = &libcfs_netstrfns[i];
-		if (!strncmp(str, nf->nf_name, strlen(nf->nf_name)))
-			break;
-	}
-
-	if (i == libcfs_nnetstrfns)
-		return NULL;
-
-	nob = strlen(nf->nf_name);
-
-	if (strlen(str) == (unsigned int)nob) {
-		netnum = 0;
-	} else {
-		if (nf->nf_type == LOLND) /* net number not allowed */
-			return NULL;
-
-		str += nob;
-		i = strlen(str);
-		if (sscanf(str, "%u%n", &netnum, &i) < 1 ||
-		    i != (int)strlen(str))
-			return NULL;
-	}
-
-	*net = LNET_MKNET(nf->nf_type, netnum);
-	return nf;
-}
-
-__u32
-libcfs_str2net(const char *str)
-{
-	__u32  net;
-
-	if (libcfs_str2net_internal(str, &net))
-		return net;
-
-	return LNET_NIDNET(LNET_NID_ANY);
-}
-EXPORT_SYMBOL(libcfs_str2net);
-
-lnet_nid_t
-libcfs_str2nid(const char *str)
-{
-	const char *sep = strchr(str, '@');
-	struct netstrfns *nf;
-	__u32 net;
-	__u32 addr;
-
-	if (sep) {
-		nf = libcfs_str2net_internal(sep + 1, &net);
-		if (!nf)
-			return LNET_NID_ANY;
-	} else {
-		sep = str + strlen(str);
-		net = LNET_MKNET(SOCKLND, 0);
-		nf = libcfs_lnd2netstrfns(SOCKLND);
-		LASSERT(nf);
-	}
-
-	if (!nf->nf_str2addr(str, (int)(sep - str), &addr))
-		return LNET_NID_ANY;
-
-	return LNET_MKNID(net, addr);
-}
-EXPORT_SYMBOL(libcfs_str2nid);
-
-char *
-libcfs_id2str(struct lnet_process_id id)
-{
-	char *str = libcfs_next_nidstring();
-
-	if (id.pid == LNET_PID_ANY) {
-		snprintf(str, LNET_NIDSTR_SIZE,
-			 "LNET_PID_ANY-%s", libcfs_nid2str(id.nid));
-		return str;
-	}
-
-	snprintf(str, LNET_NIDSTR_SIZE, "%s%u-%s",
-		 id.pid & LNET_PID_USERFLAG ? "U" : "",
-		 id.pid & ~LNET_PID_USERFLAG, libcfs_nid2str(id.nid));
-	return str;
-}
-EXPORT_SYMBOL(libcfs_id2str);
-
-int
-libcfs_str2anynid(lnet_nid_t *nidp, const char *str)
-{
-	if (!strcmp(str, "*")) {
-		*nidp = LNET_NID_ANY;
-		return 1;
-	}
-
-	*nidp = libcfs_str2nid(str);
-	return *nidp != LNET_NID_ANY;
-}
-EXPORT_SYMBOL(libcfs_str2anynid);

+ 0 - 456
drivers/staging/lustre/lnet/lnet/peer.c

@@ -1,456 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/lnet/peer.c
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include <linux/lnet/lib-lnet.h>
-#include <uapi/linux/lnet/lnet-dlc.h>
-
-int
-lnet_peer_tables_create(void)
-{
-	struct lnet_peer_table *ptable;
-	struct list_head *hash;
-	int i;
-	int j;
-
-	the_lnet.ln_peer_tables = cfs_percpt_alloc(lnet_cpt_table(),
-						   sizeof(*ptable));
-	if (!the_lnet.ln_peer_tables) {
-		CERROR("Failed to allocate cpu-partition peer tables\n");
-		return -ENOMEM;
-	}
-
-	cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) {
-		INIT_LIST_HEAD(&ptable->pt_deathrow);
-
-		hash = kvmalloc_cpt(LNET_PEER_HASH_SIZE * sizeof(*hash),
-				    GFP_KERNEL, i);
-		if (!hash) {
-			CERROR("Failed to create peer hash table\n");
-			lnet_peer_tables_destroy();
-			return -ENOMEM;
-		}
-
-		for (j = 0; j < LNET_PEER_HASH_SIZE; j++)
-			INIT_LIST_HEAD(&hash[j]);
-		ptable->pt_hash = hash; /* sign of initialization */
-	}
-
-	return 0;
-}
-
-void
-lnet_peer_tables_destroy(void)
-{
-	struct lnet_peer_table *ptable;
-	struct list_head *hash;
-	int i;
-	int j;
-
-	if (!the_lnet.ln_peer_tables)
-		return;
-
-	cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) {
-		hash = ptable->pt_hash;
-		if (!hash) /* not initialized */
-			break;
-
-		LASSERT(list_empty(&ptable->pt_deathrow));
-
-		ptable->pt_hash = NULL;
-		for (j = 0; j < LNET_PEER_HASH_SIZE; j++)
-			LASSERT(list_empty(&hash[j]));
-
-		kvfree(hash);
-	}
-
-	cfs_percpt_free(the_lnet.ln_peer_tables);
-	the_lnet.ln_peer_tables = NULL;
-}
-
-static void
-lnet_peer_table_cleanup_locked(struct lnet_ni *ni,
-			       struct lnet_peer_table *ptable)
-{
-	int i;
-	struct lnet_peer *lp;
-	struct lnet_peer *tmp;
-
-	for (i = 0; i < LNET_PEER_HASH_SIZE; i++) {
-		list_for_each_entry_safe(lp, tmp, &ptable->pt_hash[i],
-					 lp_hashlist) {
-			if (ni && ni != lp->lp_ni)
-				continue;
-			list_del_init(&lp->lp_hashlist);
-			/* Lose hash table's ref */
-			ptable->pt_zombies++;
-			lnet_peer_decref_locked(lp);
-		}
-	}
-}
-
-static void
-lnet_peer_table_deathrow_wait_locked(struct lnet_peer_table *ptable,
-				     int cpt_locked)
-{
-	int i;
-
-	for (i = 3; ptable->pt_zombies; i++) {
-		lnet_net_unlock(cpt_locked);
-
-		if (is_power_of_2(i)) {
-			CDEBUG(D_WARNING,
-			       "Waiting for %d zombies on peer table\n",
-			       ptable->pt_zombies);
-		}
-		set_current_state(TASK_UNINTERRUPTIBLE);
-		schedule_timeout(HZ >> 1);
-		lnet_net_lock(cpt_locked);
-	}
-}
-
-static void
-lnet_peer_table_del_rtrs_locked(struct lnet_ni *ni,
-				struct lnet_peer_table *ptable,
-				int cpt_locked)
-{
-	struct lnet_peer *lp;
-	struct lnet_peer *tmp;
-	lnet_nid_t lp_nid;
-	int i;
-
-	for (i = 0; i < LNET_PEER_HASH_SIZE; i++) {
-		list_for_each_entry_safe(lp, tmp, &ptable->pt_hash[i],
-					 lp_hashlist) {
-			if (ni != lp->lp_ni)
-				continue;
-
-			if (!lp->lp_rtr_refcount)
-				continue;
-
-			lp_nid = lp->lp_nid;
-
-			lnet_net_unlock(cpt_locked);
-			lnet_del_route(LNET_NIDNET(LNET_NID_ANY), lp_nid);
-			lnet_net_lock(cpt_locked);
-		}
-	}
-}
-
-void
-lnet_peer_tables_cleanup(struct lnet_ni *ni)
-{
-	struct lnet_peer_table *ptable;
-	struct list_head deathrow;
-	struct lnet_peer *lp;
-	struct lnet_peer *temp;
-	int i;
-
-	INIT_LIST_HEAD(&deathrow);
-
-	LASSERT(the_lnet.ln_shutdown || ni);
-	/*
-	 * If just deleting the peers for a NI, get rid of any routes these
-	 * peers are gateways for.
-	 */
-	cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) {
-		lnet_net_lock(i);
-		lnet_peer_table_del_rtrs_locked(ni, ptable, i);
-		lnet_net_unlock(i);
-	}
-
-	/*
-	 * Start the process of moving the applicable peers to
-	 * deathrow.
-	 */
-	cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) {
-		lnet_net_lock(i);
-		lnet_peer_table_cleanup_locked(ni, ptable);
-		lnet_net_unlock(i);
-	}
-
-	/* Cleanup all entries on deathrow. */
-	cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) {
-		lnet_net_lock(i);
-		lnet_peer_table_deathrow_wait_locked(ptable, i);
-		list_splice_init(&ptable->pt_deathrow, &deathrow);
-		lnet_net_unlock(i);
-	}
-
-	list_for_each_entry_safe(lp, temp, &deathrow, lp_hashlist) {
-		list_del(&lp->lp_hashlist);
-		kfree(lp);
-	}
-}
-
-void
-lnet_destroy_peer_locked(struct lnet_peer *lp)
-{
-	struct lnet_peer_table *ptable;
-
-	LASSERT(!lp->lp_refcount);
-	LASSERT(!lp->lp_rtr_refcount);
-	LASSERT(list_empty(&lp->lp_txq));
-	LASSERT(list_empty(&lp->lp_hashlist));
-	LASSERT(!lp->lp_txqnob);
-
-	ptable = the_lnet.ln_peer_tables[lp->lp_cpt];
-	LASSERT(ptable->pt_number > 0);
-	ptable->pt_number--;
-
-	lnet_ni_decref_locked(lp->lp_ni, lp->lp_cpt);
-	lp->lp_ni = NULL;
-
-	list_add(&lp->lp_hashlist, &ptable->pt_deathrow);
-	LASSERT(ptable->pt_zombies > 0);
-	ptable->pt_zombies--;
-}
-
-struct lnet_peer *
-lnet_find_peer_locked(struct lnet_peer_table *ptable, lnet_nid_t nid)
-{
-	struct list_head *peers;
-	struct lnet_peer *lp;
-
-	LASSERT(!the_lnet.ln_shutdown);
-
-	peers = &ptable->pt_hash[lnet_nid2peerhash(nid)];
-	list_for_each_entry(lp, peers, lp_hashlist) {
-		if (lp->lp_nid == nid) {
-			lnet_peer_addref_locked(lp);
-			return lp;
-		}
-	}
-
-	return NULL;
-}
-
-int
-lnet_nid2peer_locked(struct lnet_peer **lpp, lnet_nid_t nid, int cpt)
-{
-	struct lnet_peer_table *ptable;
-	struct lnet_peer *lp = NULL;
-	struct lnet_peer *lp2;
-	int cpt2;
-	int rc = 0;
-
-	*lpp = NULL;
-	if (the_lnet.ln_shutdown) /* it's shutting down */
-		return -ESHUTDOWN;
-
-	/* cpt can be LNET_LOCK_EX if it's called from router functions */
-	cpt2 = cpt != LNET_LOCK_EX ? cpt : lnet_cpt_of_nid_locked(nid);
-
-	ptable = the_lnet.ln_peer_tables[cpt2];
-	lp = lnet_find_peer_locked(ptable, nid);
-	if (lp) {
-		*lpp = lp;
-		return 0;
-	}
-
-	if (!list_empty(&ptable->pt_deathrow)) {
-		lp = list_entry(ptable->pt_deathrow.next,
-				struct lnet_peer, lp_hashlist);
-		list_del(&lp->lp_hashlist);
-	}
-
-	/*
-	 * take extra refcount in case another thread has shutdown LNet
-	 * and destroyed locks and peer-table before I finish the allocation
-	 */
-	ptable->pt_number++;
-	lnet_net_unlock(cpt);
-
-	if (lp)
-		memset(lp, 0, sizeof(*lp));
-	else
-		lp = kzalloc_cpt(sizeof(*lp), GFP_NOFS, cpt2);
-
-	if (!lp) {
-		rc = -ENOMEM;
-		lnet_net_lock(cpt);
-		goto out;
-	}
-
-	INIT_LIST_HEAD(&lp->lp_txq);
-	INIT_LIST_HEAD(&lp->lp_rtrq);
-	INIT_LIST_HEAD(&lp->lp_routes);
-
-	lp->lp_notify = 0;
-	lp->lp_notifylnd = 0;
-	lp->lp_notifying = 0;
-	lp->lp_alive_count = 0;
-	lp->lp_timestamp = 0;
-	lp->lp_alive = !lnet_peers_start_down(); /* 1 bit!! */
-	lp->lp_last_alive = jiffies; /* assumes alive */
-	lp->lp_last_query = 0; /* haven't asked NI yet */
-	lp->lp_ping_timestamp = 0;
-	lp->lp_ping_feats = LNET_PING_FEAT_INVAL;
-	lp->lp_nid = nid;
-	lp->lp_cpt = cpt2;
-	lp->lp_refcount = 2;	/* 1 for caller; 1 for hash */
-	lp->lp_rtr_refcount = 0;
-
-	lnet_net_lock(cpt);
-
-	if (the_lnet.ln_shutdown) {
-		rc = -ESHUTDOWN;
-		goto out;
-	}
-
-	lp2 = lnet_find_peer_locked(ptable, nid);
-	if (lp2) {
-		*lpp = lp2;
-		goto out;
-	}
-
-	lp->lp_ni = lnet_net2ni_locked(LNET_NIDNET(nid), cpt2);
-	if (!lp->lp_ni) {
-		rc = -EHOSTUNREACH;
-		goto out;
-	}
-
-	lp->lp_txcredits = lp->lp_ni->ni_peertxcredits;
-	lp->lp_mintxcredits = lp->lp_ni->ni_peertxcredits;
-	lp->lp_rtrcredits = lnet_peer_buffer_credits(lp->lp_ni);
-	lp->lp_minrtrcredits = lnet_peer_buffer_credits(lp->lp_ni);
-
-	list_add_tail(&lp->lp_hashlist,
-		      &ptable->pt_hash[lnet_nid2peerhash(nid)]);
-	ptable->pt_version++;
-	*lpp = lp;
-
-	return 0;
-out:
-	if (lp)
-		list_add(&lp->lp_hashlist, &ptable->pt_deathrow);
-	ptable->pt_number--;
-	return rc;
-}
-
-void
-lnet_debug_peer(lnet_nid_t nid)
-{
-	char *aliveness = "NA";
-	struct lnet_peer *lp;
-	int rc;
-	int cpt;
-
-	cpt = lnet_cpt_of_nid(nid);
-	lnet_net_lock(cpt);
-
-	rc = lnet_nid2peer_locked(&lp, nid, cpt);
-	if (rc) {
-		lnet_net_unlock(cpt);
-		CDEBUG(D_WARNING, "No peer %s\n", libcfs_nid2str(nid));
-		return;
-	}
-
-	if (lnet_isrouter(lp) || lnet_peer_aliveness_enabled(lp))
-		aliveness = lp->lp_alive ? "up" : "down";
-
-	CDEBUG(D_WARNING, "%-24s %4d %5s %5d %5d %5d %5d %5d %ld\n",
-	       libcfs_nid2str(lp->lp_nid), lp->lp_refcount,
-	       aliveness, lp->lp_ni->ni_peertxcredits,
-	       lp->lp_rtrcredits, lp->lp_minrtrcredits,
-	       lp->lp_txcredits, lp->lp_mintxcredits, lp->lp_txqnob);
-
-	lnet_peer_decref_locked(lp);
-
-	lnet_net_unlock(cpt);
-}
-
-int
-lnet_get_peer_info(__u32 peer_index, __u64 *nid,
-		   char aliveness[LNET_MAX_STR_LEN],
-		   __u32 *cpt_iter, __u32 *refcount,
-		   __u32 *ni_peer_tx_credits, __u32 *peer_tx_credits,
-		   __u32 *peer_rtr_credits, __u32 *peer_min_rtr_credits,
-		   __u32 *peer_tx_qnob)
-{
-	struct lnet_peer_table *peer_table;
-	struct lnet_peer *lp;
-	bool found = false;
-	int lncpt, j;
-
-	/* get the number of CPTs */
-	lncpt = cfs_percpt_number(the_lnet.ln_peer_tables);
-
-	/*
-	 * if the cpt number to be examined is >= the number of cpts in
-	 * the system then indicate that there are no more cpts to examin
-	 */
-	if (*cpt_iter >= lncpt)
-		return -ENOENT;
-
-	/* get the current table */
-	peer_table = the_lnet.ln_peer_tables[*cpt_iter];
-	/* if the ptable is NULL then there are no more cpts to examine */
-	if (!peer_table)
-		return -ENOENT;
-
-	lnet_net_lock(*cpt_iter);
-
-	for (j = 0; j < LNET_PEER_HASH_SIZE && !found; j++) {
-		struct list_head *peers = &peer_table->pt_hash[j];
-
-		list_for_each_entry(lp, peers, lp_hashlist) {
-			if (peer_index-- > 0)
-				continue;
-
-			snprintf(aliveness, LNET_MAX_STR_LEN, "NA");
-			if (lnet_isrouter(lp) ||
-			    lnet_peer_aliveness_enabled(lp))
-				snprintf(aliveness, LNET_MAX_STR_LEN,
-					 lp->lp_alive ? "up" : "down");
-
-			*nid = lp->lp_nid;
-			*refcount = lp->lp_refcount;
-			*ni_peer_tx_credits = lp->lp_ni->ni_peertxcredits;
-			*peer_tx_credits = lp->lp_txcredits;
-			*peer_rtr_credits = lp->lp_rtrcredits;
-			*peer_min_rtr_credits = lp->lp_mintxcredits;
-			*peer_tx_qnob = lp->lp_txqnob;
-
-			found = true;
-		}
-	}
-	lnet_net_unlock(*cpt_iter);
-
-	*cpt_iter = lncpt;
-
-	return found ? 0 : -ENOENT;
-}

+ 0 - 1799
drivers/staging/lustre/lnet/lnet/router.c

@@ -1,1799 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- *
- *   This file is part of Portals
- *   http://sourceforge.net/projects/sandiaportals/
- *
- *   Portals is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
- *
- *   Portals is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include <linux/completion.h>
-#include <linux/lnet/lib-lnet.h>
-
-#define LNET_NRB_TINY_MIN	512	/* min value for each CPT */
-#define LNET_NRB_TINY		(LNET_NRB_TINY_MIN * 4)
-#define LNET_NRB_SMALL_MIN	4096	/* min value for each CPT */
-#define LNET_NRB_SMALL		(LNET_NRB_SMALL_MIN * 4)
-#define LNET_NRB_SMALL_PAGES	1
-#define LNET_NRB_LARGE_MIN	256	/* min value for each CPT */
-#define LNET_NRB_LARGE		(LNET_NRB_LARGE_MIN * 4)
-#define LNET_NRB_LARGE_PAGES   ((LNET_MTU + PAGE_SIZE - 1) >> \
-				 PAGE_SHIFT)
-
-static char *forwarding = "";
-module_param(forwarding, charp, 0444);
-MODULE_PARM_DESC(forwarding, "Explicitly enable/disable forwarding between networks");
-
-static int tiny_router_buffers;
-module_param(tiny_router_buffers, int, 0444);
-MODULE_PARM_DESC(tiny_router_buffers, "# of 0 payload messages to buffer in the router");
-static int small_router_buffers;
-module_param(small_router_buffers, int, 0444);
-MODULE_PARM_DESC(small_router_buffers, "# of small (1 page) messages to buffer in the router");
-static int large_router_buffers;
-module_param(large_router_buffers, int, 0444);
-MODULE_PARM_DESC(large_router_buffers, "# of large messages to buffer in the router");
-static int peer_buffer_credits;
-module_param(peer_buffer_credits, int, 0444);
-MODULE_PARM_DESC(peer_buffer_credits, "# router buffer credits per peer");
-
-static int auto_down = 1;
-module_param(auto_down, int, 0444);
-MODULE_PARM_DESC(auto_down, "Automatically mark peers down on comms error");
-
-int
-lnet_peer_buffer_credits(struct lnet_ni *ni)
-{
-	/* NI option overrides LNet default */
-	if (ni->ni_peerrtrcredits > 0)
-		return ni->ni_peerrtrcredits;
-	if (peer_buffer_credits > 0)
-		return peer_buffer_credits;
-
-	/*
-	 * As an approximation, allow this peer the same number of router
-	 * buffers as it is allowed outstanding sends
-	 */
-	return ni->ni_peertxcredits;
-}
-
-/* forward ref's */
-static int lnet_router_checker(void *);
-
-static int check_routers_before_use;
-module_param(check_routers_before_use, int, 0444);
-MODULE_PARM_DESC(check_routers_before_use, "Assume routers are down and ping them before use");
-
-int avoid_asym_router_failure = 1;
-module_param(avoid_asym_router_failure, int, 0644);
-MODULE_PARM_DESC(avoid_asym_router_failure, "Avoid asymmetrical router failures (0 to disable)");
-
-static int dead_router_check_interval = 60;
-module_param(dead_router_check_interval, int, 0644);
-MODULE_PARM_DESC(dead_router_check_interval, "Seconds between dead router health checks (<= 0 to disable)");
-
-static int live_router_check_interval = 60;
-module_param(live_router_check_interval, int, 0644);
-MODULE_PARM_DESC(live_router_check_interval, "Seconds between live router health checks (<= 0 to disable)");
-
-static int router_ping_timeout = 50;
-module_param(router_ping_timeout, int, 0644);
-MODULE_PARM_DESC(router_ping_timeout, "Seconds to wait for the reply to a router health query");
-
-int
-lnet_peers_start_down(void)
-{
-	return check_routers_before_use;
-}
-
-void
-lnet_notify_locked(struct lnet_peer *lp, int notifylnd, int alive,
-		   unsigned long when)
-{
-	if (time_before(when, lp->lp_timestamp)) { /* out of date information */
-		CDEBUG(D_NET, "Out of date\n");
-		return;
-	}
-
-	lp->lp_timestamp = when;		/* update timestamp */
-	lp->lp_ping_deadline = 0;	       /* disable ping timeout */
-
-	if (lp->lp_alive_count &&	  /* got old news */
-	    (!lp->lp_alive) == (!alive)) {      /* new date for old news */
-		CDEBUG(D_NET, "Old news\n");
-		return;
-	}
-
-	/* Flag that notification is outstanding */
-
-	lp->lp_alive_count++;
-	lp->lp_alive = !(!alive);	       /* 1 bit! */
-	lp->lp_notify = 1;
-	lp->lp_notifylnd |= notifylnd;
-	if (lp->lp_alive)
-		lp->lp_ping_feats = LNET_PING_FEAT_INVAL; /* reset */
-
-	CDEBUG(D_NET, "set %s %d\n", libcfs_nid2str(lp->lp_nid), alive);
-}
-
-static void
-lnet_ni_notify_locked(struct lnet_ni *ni, struct lnet_peer *lp)
-{
-	int alive;
-	int notifylnd;
-
-	/*
-	 * Notify only in 1 thread at any time to ensure ordered notification.
-	 * NB individual events can be missed; the only guarantee is that you
-	 * always get the most recent news
-	 */
-	if (lp->lp_notifying || !ni)
-		return;
-
-	lp->lp_notifying = 1;
-
-	while (lp->lp_notify) {
-		alive = lp->lp_alive;
-		notifylnd = lp->lp_notifylnd;
-
-		lp->lp_notifylnd = 0;
-		lp->lp_notify    = 0;
-
-		if (notifylnd && ni->ni_lnd->lnd_notify) {
-			lnet_net_unlock(lp->lp_cpt);
-
-			/*
-			 * A new notification could happen now; I'll handle it
-			 * when control returns to me
-			 */
-			ni->ni_lnd->lnd_notify(ni, lp->lp_nid, alive);
-
-			lnet_net_lock(lp->lp_cpt);
-		}
-	}
-
-	lp->lp_notifying = 0;
-}
-
-static void
-lnet_rtr_addref_locked(struct lnet_peer *lp)
-{
-	LASSERT(lp->lp_refcount > 0);
-	LASSERT(lp->lp_rtr_refcount >= 0);
-
-	/* lnet_net_lock must be exclusively locked */
-	lp->lp_rtr_refcount++;
-	if (lp->lp_rtr_refcount == 1) {
-		struct list_head *pos;
-
-		/* a simple insertion sort */
-		list_for_each_prev(pos, &the_lnet.ln_routers) {
-			struct lnet_peer *rtr;
-
-			rtr = list_entry(pos, struct lnet_peer, lp_rtr_list);
-			if (rtr->lp_nid < lp->lp_nid)
-				break;
-		}
-
-		list_add(&lp->lp_rtr_list, pos);
-		/* addref for the_lnet.ln_routers */
-		lnet_peer_addref_locked(lp);
-		the_lnet.ln_routers_version++;
-	}
-}
-
-static void
-lnet_rtr_decref_locked(struct lnet_peer *lp)
-{
-	LASSERT(lp->lp_refcount > 0);
-	LASSERT(lp->lp_rtr_refcount > 0);
-
-	/* lnet_net_lock must be exclusively locked */
-	lp->lp_rtr_refcount--;
-	if (!lp->lp_rtr_refcount) {
-		LASSERT(list_empty(&lp->lp_routes));
-
-		if (lp->lp_rcd) {
-			list_add(&lp->lp_rcd->rcd_list,
-				 &the_lnet.ln_rcd_deathrow);
-			lp->lp_rcd = NULL;
-		}
-
-		list_del(&lp->lp_rtr_list);
-		/* decref for the_lnet.ln_routers */
-		lnet_peer_decref_locked(lp);
-		the_lnet.ln_routers_version++;
-	}
-}
-
-struct lnet_remotenet *
-lnet_find_net_locked(__u32 net)
-{
-	struct lnet_remotenet *rnet;
-	struct list_head *rn_list;
-
-	LASSERT(!the_lnet.ln_shutdown);
-
-	rn_list = lnet_net2rnethash(net);
-	list_for_each_entry(rnet, rn_list, lrn_list) {
-		if (rnet->lrn_net == net)
-			return rnet;
-	}
-	return NULL;
-}
-
-static void lnet_shuffle_seed(void)
-{
-	static int seeded;
-	struct lnet_ni *ni;
-
-	if (seeded)
-		return;
-
-	/*
-	 * Nodes with small feet have little entropy
-	 * the NID for this node gives the most entropy in the low bits
-	 */
-	list_for_each_entry(ni, &the_lnet.ln_nis, ni_list) {
-		__u32 lnd_type, seed;
-
-		lnd_type = LNET_NETTYP(LNET_NIDNET(ni->ni_nid));
-		if (lnd_type != LOLND) {
-			seed = (LNET_NIDADDR(ni->ni_nid) | lnd_type);
-			add_device_randomness(&seed, sizeof(seed));
-		}
-	}
-
-	seeded = 1;
-}
-
-/* NB expects LNET_LOCK held */
-static void
-lnet_add_route_to_rnet(struct lnet_remotenet *rnet, struct lnet_route *route)
-{
-	unsigned int len = 0;
-	unsigned int offset = 0;
-	struct list_head *e;
-
-	lnet_shuffle_seed();
-
-	list_for_each(e, &rnet->lrn_routes) {
-		len++;
-	}
-
-	/* len+1 positions to add a new entry */
-	offset = prandom_u32_max(len + 1);
-	list_for_each(e, &rnet->lrn_routes) {
-		if (!offset)
-			break;
-		offset--;
-	}
-	list_add(&route->lr_list, e);
-	list_add(&route->lr_gwlist, &route->lr_gateway->lp_routes);
-
-	the_lnet.ln_remote_nets_version++;
-	lnet_rtr_addref_locked(route->lr_gateway);
-}
-
-int
-lnet_add_route(__u32 net, __u32 hops, lnet_nid_t gateway,
-	       unsigned int priority)
-{
-	struct list_head *e;
-	struct lnet_remotenet *rnet;
-	struct lnet_remotenet *rnet2;
-	struct lnet_route *route;
-	struct lnet_ni *ni;
-	int add_route;
-	int rc;
-
-	CDEBUG(D_NET, "Add route: net %s hops %d priority %u gw %s\n",
-	       libcfs_net2str(net), hops, priority, libcfs_nid2str(gateway));
-
-	if (gateway == LNET_NID_ANY ||
-	    LNET_NETTYP(LNET_NIDNET(gateway)) == LOLND ||
-	    net == LNET_NIDNET(LNET_NID_ANY) ||
-	    LNET_NETTYP(net) == LOLND ||
-	    LNET_NIDNET(gateway) == net ||
-	    (hops != LNET_UNDEFINED_HOPS && (hops < 1 || hops > 255)))
-		return -EINVAL;
-
-	if (lnet_islocalnet(net))	       /* it's a local network */
-		return -EEXIST;
-
-	/* Assume net, route, all new */
-	route = kzalloc(sizeof(*route), GFP_NOFS);
-	rnet = kzalloc(sizeof(*rnet), GFP_NOFS);
-	if (!route || !rnet) {
-		CERROR("Out of memory creating route %s %d %s\n",
-		       libcfs_net2str(net), hops, libcfs_nid2str(gateway));
-		kfree(route);
-		kfree(rnet);
-		return -ENOMEM;
-	}
-
-	INIT_LIST_HEAD(&rnet->lrn_routes);
-	rnet->lrn_net = net;
-	route->lr_hops = hops;
-	route->lr_net = net;
-	route->lr_priority = priority;
-
-	lnet_net_lock(LNET_LOCK_EX);
-
-	rc = lnet_nid2peer_locked(&route->lr_gateway, gateway, LNET_LOCK_EX);
-	if (rc) {
-		lnet_net_unlock(LNET_LOCK_EX);
-
-		kfree(route);
-		kfree(rnet);
-
-		if (rc == -EHOSTUNREACH) /* gateway is not on a local net */
-			return rc;	/* ignore the route entry */
-		CERROR("Error %d creating route %s %d %s\n", rc,
-		       libcfs_net2str(net), hops,
-		       libcfs_nid2str(gateway));
-		return rc;
-	}
-
-	LASSERT(!the_lnet.ln_shutdown);
-
-	rnet2 = lnet_find_net_locked(net);
-	if (!rnet2) {
-		/* new network */
-		list_add_tail(&rnet->lrn_list, lnet_net2rnethash(net));
-		rnet2 = rnet;
-	}
-
-	/* Search for a duplicate route (it's a NOOP if it is) */
-	add_route = 1;
-	list_for_each(e, &rnet2->lrn_routes) {
-		struct lnet_route *route2;
-
-		route2 = list_entry(e, struct lnet_route, lr_list);
-		if (route2->lr_gateway == route->lr_gateway) {
-			add_route = 0;
-			break;
-		}
-
-		/* our lookups must be true */
-		LASSERT(route2->lr_gateway->lp_nid != gateway);
-	}
-
-	if (add_route) {
-		lnet_peer_addref_locked(route->lr_gateway); /* +1 for notify */
-		lnet_add_route_to_rnet(rnet2, route);
-
-		ni = route->lr_gateway->lp_ni;
-		lnet_net_unlock(LNET_LOCK_EX);
-
-		/* XXX Assume alive */
-		if (ni->ni_lnd->lnd_notify)
-			ni->ni_lnd->lnd_notify(ni, gateway, 1);
-
-		lnet_net_lock(LNET_LOCK_EX);
-	}
-
-	/* -1 for notify or !add_route */
-	lnet_peer_decref_locked(route->lr_gateway);
-	lnet_net_unlock(LNET_LOCK_EX);
-	rc = 0;
-
-	if (!add_route) {
-		rc = -EEXIST;
-		kfree(route);
-	}
-
-	if (rnet != rnet2)
-		kfree(rnet);
-
-	/* indicate to startup the router checker if configured */
-	wake_up(&the_lnet.ln_rc_waitq);
-
-	return rc;
-}
-
-int
-lnet_check_routes(void)
-{
-	struct lnet_remotenet *rnet;
-	struct lnet_route *route;
-	struct lnet_route *route2;
-	struct list_head *e1;
-	struct list_head *e2;
-	int cpt;
-	struct list_head *rn_list;
-	int i;
-
-	cpt = lnet_net_lock_current();
-
-	for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++) {
-		rn_list = &the_lnet.ln_remote_nets_hash[i];
-		list_for_each(e1, rn_list) {
-			rnet = list_entry(e1, struct lnet_remotenet, lrn_list);
-
-			route2 = NULL;
-			list_for_each(e2, &rnet->lrn_routes) {
-				lnet_nid_t nid1;
-				lnet_nid_t nid2;
-				int net;
-
-				route = list_entry(e2, struct lnet_route, lr_list);
-
-				if (!route2) {
-					route2 = route;
-					continue;
-				}
-
-				if (route->lr_gateway->lp_ni ==
-				    route2->lr_gateway->lp_ni)
-					continue;
-
-				nid1 = route->lr_gateway->lp_nid;
-				nid2 = route2->lr_gateway->lp_nid;
-				net = rnet->lrn_net;
-
-				lnet_net_unlock(cpt);
-
-				CERROR("Routes to %s via %s and %s not supported\n",
-				       libcfs_net2str(net),
-				       libcfs_nid2str(nid1),
-				       libcfs_nid2str(nid2));
-				return -EINVAL;
-			}
-		}
-	}
-
-	lnet_net_unlock(cpt);
-	return 0;
-}
-
-int
-lnet_del_route(__u32 net, lnet_nid_t gw_nid)
-{
-	struct lnet_peer *gateway;
-	struct lnet_remotenet *rnet;
-	struct lnet_route *route;
-	struct list_head *e1;
-	struct list_head *e2;
-	int rc = -ENOENT;
-	struct list_head *rn_list;
-	int idx = 0;
-
-	CDEBUG(D_NET, "Del route: net %s : gw %s\n",
-	       libcfs_net2str(net), libcfs_nid2str(gw_nid));
-
-	/*
-	 * NB Caller may specify either all routes via the given gateway
-	 * or a specific route entry actual NIDs)
-	 */
-	lnet_net_lock(LNET_LOCK_EX);
-	if (net == LNET_NIDNET(LNET_NID_ANY))
-		rn_list = &the_lnet.ln_remote_nets_hash[0];
-	else
-		rn_list = lnet_net2rnethash(net);
-
- again:
-	list_for_each(e1, rn_list) {
-		rnet = list_entry(e1, struct lnet_remotenet, lrn_list);
-
-		if (!(net == LNET_NIDNET(LNET_NID_ANY) ||
-		      net == rnet->lrn_net))
-			continue;
-
-		list_for_each(e2, &rnet->lrn_routes) {
-			route = list_entry(e2, struct lnet_route, lr_list);
-
-			gateway = route->lr_gateway;
-			if (!(gw_nid == LNET_NID_ANY ||
-			      gw_nid == gateway->lp_nid))
-				continue;
-
-			list_del(&route->lr_list);
-			list_del(&route->lr_gwlist);
-			the_lnet.ln_remote_nets_version++;
-
-			if (list_empty(&rnet->lrn_routes))
-				list_del(&rnet->lrn_list);
-			else
-				rnet = NULL;
-
-			lnet_rtr_decref_locked(gateway);
-			lnet_peer_decref_locked(gateway);
-
-			lnet_net_unlock(LNET_LOCK_EX);
-
-			kfree(route);
-			kfree(rnet);
-
-			rc = 0;
-			lnet_net_lock(LNET_LOCK_EX);
-			goto again;
-		}
-	}
-
-	if (net == LNET_NIDNET(LNET_NID_ANY) &&
-	    ++idx < LNET_REMOTE_NETS_HASH_SIZE) {
-		rn_list = &the_lnet.ln_remote_nets_hash[idx];
-		goto again;
-	}
-	lnet_net_unlock(LNET_LOCK_EX);
-
-	return rc;
-}
-
-void
-lnet_destroy_routes(void)
-{
-	lnet_del_route(LNET_NIDNET(LNET_NID_ANY), LNET_NID_ANY);
-}
-
-int lnet_get_rtr_pool_cfg(int idx, struct lnet_ioctl_pool_cfg *pool_cfg)
-{
-	int i, rc = -ENOENT, j;
-
-	if (!the_lnet.ln_rtrpools)
-		return rc;
-
-	for (i = 0; i < LNET_NRBPOOLS; i++) {
-		struct lnet_rtrbufpool *rbp;
-
-		lnet_net_lock(LNET_LOCK_EX);
-		cfs_percpt_for_each(rbp, j, the_lnet.ln_rtrpools) {
-			if (i++ != idx)
-				continue;
-
-			pool_cfg->pl_pools[i].pl_npages = rbp[i].rbp_npages;
-			pool_cfg->pl_pools[i].pl_nbuffers = rbp[i].rbp_nbuffers;
-			pool_cfg->pl_pools[i].pl_credits = rbp[i].rbp_credits;
-			pool_cfg->pl_pools[i].pl_mincredits = rbp[i].rbp_mincredits;
-			rc = 0;
-			break;
-		}
-		lnet_net_unlock(LNET_LOCK_EX);
-	}
-
-	lnet_net_lock(LNET_LOCK_EX);
-	pool_cfg->pl_routing = the_lnet.ln_routing;
-	lnet_net_unlock(LNET_LOCK_EX);
-
-	return rc;
-}
-
-int
-lnet_get_route(int idx, __u32 *net, __u32 *hops,
-	       lnet_nid_t *gateway, __u32 *alive, __u32 *priority)
-{
-	struct list_head *e1;
-	struct list_head *e2;
-	struct lnet_remotenet *rnet;
-	struct lnet_route *route;
-	int cpt;
-	int i;
-	struct list_head *rn_list;
-
-	cpt = lnet_net_lock_current();
-
-	for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++) {
-		rn_list = &the_lnet.ln_remote_nets_hash[i];
-		list_for_each(e1, rn_list) {
-			rnet = list_entry(e1, struct lnet_remotenet, lrn_list);
-
-			list_for_each(e2, &rnet->lrn_routes) {
-				route = list_entry(e2, struct lnet_route,
-						   lr_list);
-
-				if (!idx--) {
-					*net      = rnet->lrn_net;
-					*hops     = route->lr_hops;
-					*priority = route->lr_priority;
-					*gateway  = route->lr_gateway->lp_nid;
-					*alive = lnet_is_route_alive(route);
-					lnet_net_unlock(cpt);
-					return 0;
-				}
-			}
-		}
-	}
-
-	lnet_net_unlock(cpt);
-	return -ENOENT;
-}
-
-void
-lnet_swap_pinginfo(struct lnet_ping_info *info)
-{
-	int i;
-	struct lnet_ni_status *stat;
-
-	__swab32s(&info->pi_magic);
-	__swab32s(&info->pi_features);
-	__swab32s(&info->pi_pid);
-	__swab32s(&info->pi_nnis);
-	for (i = 0; i < info->pi_nnis && i < LNET_MAX_RTR_NIS; i++) {
-		stat = &info->pi_ni[i];
-		__swab64s(&stat->ns_nid);
-		__swab32s(&stat->ns_status);
-	}
-}
-
-/**
- * parse router-checker pinginfo, record number of down NIs for remote
- * networks on that router.
- */
-static void
-lnet_parse_rc_info(struct lnet_rc_data *rcd)
-{
-	struct lnet_ping_info *info = rcd->rcd_pinginfo;
-	struct lnet_peer *gw = rcd->rcd_gateway;
-	struct lnet_route *rte;
-
-	if (!gw->lp_alive)
-		return;
-
-	if (info->pi_magic == __swab32(LNET_PROTO_PING_MAGIC))
-		lnet_swap_pinginfo(info);
-
-	/* NB always racing with network! */
-	if (info->pi_magic != LNET_PROTO_PING_MAGIC) {
-		CDEBUG(D_NET, "%s: Unexpected magic %08x\n",
-		       libcfs_nid2str(gw->lp_nid), info->pi_magic);
-		gw->lp_ping_feats = LNET_PING_FEAT_INVAL;
-		return;
-	}
-
-	gw->lp_ping_feats = info->pi_features;
-	if (!(gw->lp_ping_feats & LNET_PING_FEAT_MASK)) {
-		CDEBUG(D_NET, "%s: Unexpected features 0x%x\n",
-		       libcfs_nid2str(gw->lp_nid), gw->lp_ping_feats);
-		return; /* nothing I can understand */
-	}
-
-	if (!(gw->lp_ping_feats & LNET_PING_FEAT_NI_STATUS))
-		return; /* can't carry NI status info */
-
-	list_for_each_entry(rte, &gw->lp_routes, lr_gwlist) {
-		int down = 0;
-		int up = 0;
-		int i;
-
-		if (gw->lp_ping_feats & LNET_PING_FEAT_RTE_DISABLED) {
-			rte->lr_downis = 1;
-			continue;
-		}
-
-		for (i = 0; i < info->pi_nnis && i < LNET_MAX_RTR_NIS; i++) {
-			struct lnet_ni_status *stat = &info->pi_ni[i];
-			lnet_nid_t nid = stat->ns_nid;
-
-			if (nid == LNET_NID_ANY) {
-				CDEBUG(D_NET, "%s: unexpected LNET_NID_ANY\n",
-				       libcfs_nid2str(gw->lp_nid));
-				gw->lp_ping_feats = LNET_PING_FEAT_INVAL;
-				return;
-			}
-
-			if (LNET_NETTYP(LNET_NIDNET(nid)) == LOLND)
-				continue;
-
-			if (stat->ns_status == LNET_NI_STATUS_DOWN) {
-				down++;
-				continue;
-			}
-
-			if (stat->ns_status == LNET_NI_STATUS_UP) {
-				if (LNET_NIDNET(nid) == rte->lr_net) {
-					up = 1;
-					break;
-				}
-				continue;
-			}
-
-			CDEBUG(D_NET, "%s: Unexpected status 0x%x\n",
-			       libcfs_nid2str(gw->lp_nid), stat->ns_status);
-			gw->lp_ping_feats = LNET_PING_FEAT_INVAL;
-			return;
-		}
-
-		if (up) { /* ignore downed NIs if NI for dest network is up */
-			rte->lr_downis = 0;
-			continue;
-		}
-		/**
-		 * if @down is zero and this route is single-hop, it means
-		 * we can't find NI for target network
-		 */
-		if (!down && rte->lr_hops == 1)
-			down = 1;
-
-		rte->lr_downis = down;
-	}
-}
-
-static void
-lnet_router_checker_event(struct lnet_event *event)
-{
-	struct lnet_rc_data *rcd = event->md.user_ptr;
-	struct lnet_peer *lp;
-
-	LASSERT(rcd);
-
-	if (event->unlinked) {
-		LNetInvalidateMDHandle(&rcd->rcd_mdh);
-		return;
-	}
-
-	LASSERT(event->type == LNET_EVENT_SEND ||
-		event->type == LNET_EVENT_REPLY);
-
-	lp = rcd->rcd_gateway;
-	LASSERT(lp);
-
-	/*
-	 * NB: it's called with holding lnet_res_lock, we have a few
-	 * places need to hold both locks at the same time, please take
-	 * care of lock ordering
-	 */
-	lnet_net_lock(lp->lp_cpt);
-	if (!lnet_isrouter(lp) || lp->lp_rcd != rcd) {
-		/* ignore if no longer a router or rcd is replaced */
-		goto out;
-	}
-
-	if (event->type == LNET_EVENT_SEND) {
-		lp->lp_ping_notsent = 0;
-		if (!event->status)
-			goto out;
-	}
-
-	/* LNET_EVENT_REPLY */
-	/*
-	 * A successful REPLY means the router is up.  If _any_ comms
-	 * to the router fail I assume it's down (this will happen if
-	 * we ping alive routers to try to detect router death before
-	 * apps get burned).
-	 */
-	lnet_notify_locked(lp, 1, !event->status, jiffies);
-
-	/*
-	 * The router checker will wake up very shortly and do the
-	 * actual notification.
-	 * XXX If 'lp' stops being a router before then, it will still
-	 * have the notification pending!!!
-	 */
-	if (avoid_asym_router_failure && !event->status)
-		lnet_parse_rc_info(rcd);
-
- out:
-	lnet_net_unlock(lp->lp_cpt);
-}
-
-static void
-lnet_wait_known_routerstate(void)
-{
-	struct lnet_peer *rtr;
-	struct list_head *entry;
-	int all_known;
-
-	LASSERT(the_lnet.ln_rc_state == LNET_RC_STATE_RUNNING);
-
-	for (;;) {
-		int cpt = lnet_net_lock_current();
-
-		all_known = 1;
-		list_for_each(entry, &the_lnet.ln_routers) {
-			rtr = list_entry(entry, struct lnet_peer, lp_rtr_list);
-
-			if (!rtr->lp_alive_count) {
-				all_known = 0;
-				break;
-			}
-		}
-
-		lnet_net_unlock(cpt);
-
-		if (all_known)
-			return;
-
-		set_current_state(TASK_UNINTERRUPTIBLE);
-		schedule_timeout(HZ);
-	}
-}
-
-void
-lnet_router_ni_update_locked(struct lnet_peer *gw, __u32 net)
-{
-	struct lnet_route *rte;
-
-	if ((gw->lp_ping_feats & LNET_PING_FEAT_NI_STATUS)) {
-		list_for_each_entry(rte, &gw->lp_routes, lr_gwlist) {
-			if (rte->lr_net == net) {
-				rte->lr_downis = 0;
-				break;
-			}
-		}
-	}
-}
-
-static void
-lnet_update_ni_status_locked(void)
-{
-	struct lnet_ni *ni;
-	time64_t now;
-	int timeout;
-
-	LASSERT(the_lnet.ln_routing);
-
-	timeout = router_ping_timeout +
-		  max(live_router_check_interval, dead_router_check_interval);
-
-	now = ktime_get_real_seconds();
-	list_for_each_entry(ni, &the_lnet.ln_nis, ni_list) {
-		if (ni->ni_lnd->lnd_type == LOLND)
-			continue;
-
-		if (now < ni->ni_last_alive + timeout)
-			continue;
-
-		lnet_ni_lock(ni);
-		/* re-check with lock */
-		if (now < ni->ni_last_alive + timeout) {
-			lnet_ni_unlock(ni);
-			continue;
-		}
-
-		LASSERT(ni->ni_status);
-
-		if (ni->ni_status->ns_status != LNET_NI_STATUS_DOWN) {
-			CDEBUG(D_NET, "NI(%s:%d) status changed to down\n",
-			       libcfs_nid2str(ni->ni_nid), timeout);
-			/*
-			 * NB: so far, this is the only place to set
-			 * NI status to "down"
-			 */
-			ni->ni_status->ns_status = LNET_NI_STATUS_DOWN;
-		}
-		lnet_ni_unlock(ni);
-	}
-}
-
-static void
-lnet_destroy_rc_data(struct lnet_rc_data *rcd)
-{
-	LASSERT(list_empty(&rcd->rcd_list));
-	/* detached from network */
-	LASSERT(LNetMDHandleIsInvalid(rcd->rcd_mdh));
-
-	if (rcd->rcd_gateway) {
-		int cpt = rcd->rcd_gateway->lp_cpt;
-
-		lnet_net_lock(cpt);
-		lnet_peer_decref_locked(rcd->rcd_gateway);
-		lnet_net_unlock(cpt);
-	}
-
-	kfree(rcd->rcd_pinginfo);
-
-	kfree(rcd);
-}
-
-static struct lnet_rc_data *
-lnet_create_rc_data_locked(struct lnet_peer *gateway)
-{
-	struct lnet_rc_data *rcd = NULL;
-	struct lnet_ping_info *pi;
-	struct lnet_md md;
-	int rc;
-	int i;
-
-	lnet_net_unlock(gateway->lp_cpt);
-
-	rcd = kzalloc(sizeof(*rcd), GFP_NOFS);
-	if (!rcd)
-		goto out;
-
-	LNetInvalidateMDHandle(&rcd->rcd_mdh);
-	INIT_LIST_HEAD(&rcd->rcd_list);
-
-	pi = kzalloc(LNET_PINGINFO_SIZE, GFP_NOFS);
-	if (!pi)
-		goto out;
-
-	for (i = 0; i < LNET_MAX_RTR_NIS; i++) {
-		pi->pi_ni[i].ns_nid = LNET_NID_ANY;
-		pi->pi_ni[i].ns_status = LNET_NI_STATUS_INVALID;
-	}
-	rcd->rcd_pinginfo = pi;
-
-	md.start = pi;
-	md.user_ptr = rcd;
-	md.length = LNET_PINGINFO_SIZE;
-	md.threshold = LNET_MD_THRESH_INF;
-	md.options = LNET_MD_TRUNCATE;
-	md.eq_handle = the_lnet.ln_rc_eqh;
-
-	LASSERT(!LNetEQHandleIsInvalid(the_lnet.ln_rc_eqh));
-	rc = LNetMDBind(md, LNET_UNLINK, &rcd->rcd_mdh);
-	if (rc < 0) {
-		CERROR("Can't bind MD: %d\n", rc);
-		goto out;
-	}
-	LASSERT(!rc);
-
-	lnet_net_lock(gateway->lp_cpt);
-	/* router table changed or someone has created rcd for this gateway */
-	if (!lnet_isrouter(gateway) || gateway->lp_rcd) {
-		lnet_net_unlock(gateway->lp_cpt);
-		goto out;
-	}
-
-	lnet_peer_addref_locked(gateway);
-	rcd->rcd_gateway = gateway;
-	gateway->lp_rcd = rcd;
-	gateway->lp_ping_notsent = 0;
-
-	return rcd;
-
- out:
-	if (rcd) {
-		if (!LNetMDHandleIsInvalid(rcd->rcd_mdh)) {
-			rc = LNetMDUnlink(rcd->rcd_mdh);
-			LASSERT(!rc);
-		}
-		lnet_destroy_rc_data(rcd);
-	}
-
-	lnet_net_lock(gateway->lp_cpt);
-	return gateway->lp_rcd;
-}
-
-static int
-lnet_router_check_interval(struct lnet_peer *rtr)
-{
-	int secs;
-
-	secs = rtr->lp_alive ? live_router_check_interval :
-			       dead_router_check_interval;
-	if (secs < 0)
-		secs = 0;
-
-	return secs;
-}
-
-static void
-lnet_ping_router_locked(struct lnet_peer *rtr)
-{
-	struct lnet_rc_data *rcd = NULL;
-	unsigned long now = jiffies;
-	int secs;
-
-	lnet_peer_addref_locked(rtr);
-
-	if (rtr->lp_ping_deadline && /* ping timed out? */
-	    time_after(now, rtr->lp_ping_deadline))
-		lnet_notify_locked(rtr, 1, 0, now);
-
-	/* Run any outstanding notifications */
-	lnet_ni_notify_locked(rtr->lp_ni, rtr);
-
-	if (!lnet_isrouter(rtr) ||
-	    the_lnet.ln_rc_state != LNET_RC_STATE_RUNNING) {
-		/* router table changed or router checker is shutting down */
-		lnet_peer_decref_locked(rtr);
-		return;
-	}
-
-	rcd = rtr->lp_rcd ?
-	      rtr->lp_rcd : lnet_create_rc_data_locked(rtr);
-
-	if (!rcd)
-		return;
-
-	secs = lnet_router_check_interval(rtr);
-
-	CDEBUG(D_NET,
-	       "rtr %s %d: deadline %lu ping_notsent %d alive %d alive_count %d lp_ping_timestamp %lu\n",
-	       libcfs_nid2str(rtr->lp_nid), secs,
-	       rtr->lp_ping_deadline, rtr->lp_ping_notsent,
-	       rtr->lp_alive, rtr->lp_alive_count, rtr->lp_ping_timestamp);
-
-	if (secs && !rtr->lp_ping_notsent &&
-	    time_after(now, rtr->lp_ping_timestamp + secs * HZ)) {
-		int rc;
-		struct lnet_process_id id;
-		struct lnet_handle_md mdh;
-
-		id.nid = rtr->lp_nid;
-		id.pid = LNET_PID_LUSTRE;
-		CDEBUG(D_NET, "Check: %s\n", libcfs_id2str(id));
-
-		rtr->lp_ping_notsent   = 1;
-		rtr->lp_ping_timestamp = now;
-
-		mdh = rcd->rcd_mdh;
-
-		if (!rtr->lp_ping_deadline) {
-			rtr->lp_ping_deadline =
-				jiffies + router_ping_timeout * HZ;
-		}
-
-		lnet_net_unlock(rtr->lp_cpt);
-
-		rc = LNetGet(LNET_NID_ANY, mdh, id, LNET_RESERVED_PORTAL,
-			     LNET_PROTO_PING_MATCHBITS, 0);
-
-		lnet_net_lock(rtr->lp_cpt);
-		if (rc)
-			rtr->lp_ping_notsent = 0; /* no event pending */
-	}
-
-	lnet_peer_decref_locked(rtr);
-}
-
-int
-lnet_router_checker_start(void)
-{
-	struct task_struct *task;
-	int rc;
-	int eqsz = 0;
-
-	LASSERT(the_lnet.ln_rc_state == LNET_RC_STATE_SHUTDOWN);
-
-	if (check_routers_before_use &&
-	    dead_router_check_interval <= 0) {
-		LCONSOLE_ERROR_MSG(0x10a, "'dead_router_check_interval' must be set if 'check_routers_before_use' is set\n");
-		return -EINVAL;
-	}
-
-	init_completion(&the_lnet.ln_rc_signal);
-
-	rc = LNetEQAlloc(0, lnet_router_checker_event, &the_lnet.ln_rc_eqh);
-	if (rc) {
-		CERROR("Can't allocate EQ(%d): %d\n", eqsz, rc);
-		return -ENOMEM;
-	}
-
-	the_lnet.ln_rc_state = LNET_RC_STATE_RUNNING;
-	task = kthread_run(lnet_router_checker, NULL, "router_checker");
-	if (IS_ERR(task)) {
-		rc = PTR_ERR(task);
-		CERROR("Can't start router checker thread: %d\n", rc);
-		/* block until event callback signals exit */
-		wait_for_completion(&the_lnet.ln_rc_signal);
-		rc = LNetEQFree(the_lnet.ln_rc_eqh);
-		LASSERT(!rc);
-		the_lnet.ln_rc_state = LNET_RC_STATE_SHUTDOWN;
-		return -ENOMEM;
-	}
-
-	if (check_routers_before_use) {
-		/*
-		 * Note that a helpful side-effect of pinging all known routers
-		 * at startup is that it makes them drop stale connections they
-		 * may have to a previous instance of me.
-		 */
-		lnet_wait_known_routerstate();
-	}
-
-	return 0;
-}
-
-void
-lnet_router_checker_stop(void)
-{
-	int rc;
-
-	if (the_lnet.ln_rc_state == LNET_RC_STATE_SHUTDOWN)
-		return;
-
-	LASSERT(the_lnet.ln_rc_state == LNET_RC_STATE_RUNNING);
-	the_lnet.ln_rc_state = LNET_RC_STATE_STOPPING;
-	/* wakeup the RC thread if it's sleeping */
-	wake_up(&the_lnet.ln_rc_waitq);
-
-	/* block until event callback signals exit */
-	wait_for_completion(&the_lnet.ln_rc_signal);
-	LASSERT(the_lnet.ln_rc_state == LNET_RC_STATE_SHUTDOWN);
-
-	rc = LNetEQFree(the_lnet.ln_rc_eqh);
-	LASSERT(!rc);
-}
-
-static void
-lnet_prune_rc_data(int wait_unlink)
-{
-	struct lnet_rc_data *rcd;
-	struct lnet_rc_data *tmp;
-	struct lnet_peer *lp;
-	struct list_head head;
-	int i = 2;
-
-	if (likely(the_lnet.ln_rc_state == LNET_RC_STATE_RUNNING &&
-		   list_empty(&the_lnet.ln_rcd_deathrow) &&
-		   list_empty(&the_lnet.ln_rcd_zombie)))
-		return;
-
-	INIT_LIST_HEAD(&head);
-
-	lnet_net_lock(LNET_LOCK_EX);
-
-	if (the_lnet.ln_rc_state != LNET_RC_STATE_RUNNING) {
-		/* router checker is stopping, prune all */
-		list_for_each_entry(lp, &the_lnet.ln_routers,
-				    lp_rtr_list) {
-			if (!lp->lp_rcd)
-				continue;
-
-			LASSERT(list_empty(&lp->lp_rcd->rcd_list));
-			list_add(&lp->lp_rcd->rcd_list,
-				 &the_lnet.ln_rcd_deathrow);
-			lp->lp_rcd = NULL;
-		}
-	}
-
-	/* unlink all RCDs on deathrow list */
-	list_splice_init(&the_lnet.ln_rcd_deathrow, &head);
-
-	if (!list_empty(&head)) {
-		lnet_net_unlock(LNET_LOCK_EX);
-
-		list_for_each_entry(rcd, &head, rcd_list)
-			LNetMDUnlink(rcd->rcd_mdh);
-
-		lnet_net_lock(LNET_LOCK_EX);
-	}
-
-	list_splice_init(&head, &the_lnet.ln_rcd_zombie);
-
-	/* release all zombie RCDs */
-	while (!list_empty(&the_lnet.ln_rcd_zombie)) {
-		list_for_each_entry_safe(rcd, tmp, &the_lnet.ln_rcd_zombie,
-					 rcd_list) {
-			if (LNetMDHandleIsInvalid(rcd->rcd_mdh))
-				list_move(&rcd->rcd_list, &head);
-		}
-
-		wait_unlink = wait_unlink &&
-			      !list_empty(&the_lnet.ln_rcd_zombie);
-
-		lnet_net_unlock(LNET_LOCK_EX);
-
-		while (!list_empty(&head)) {
-			rcd = list_entry(head.next,
-					 struct lnet_rc_data, rcd_list);
-			list_del_init(&rcd->rcd_list);
-			lnet_destroy_rc_data(rcd);
-		}
-
-		if (!wait_unlink)
-			return;
-
-		i++;
-		CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET,
-		       "Waiting for rc buffers to unlink\n");
-		set_current_state(TASK_UNINTERRUPTIBLE);
-		schedule_timeout(HZ / 4);
-
-		lnet_net_lock(LNET_LOCK_EX);
-	}
-
-	lnet_net_unlock(LNET_LOCK_EX);
-}
-
-/*
- * This function is called to check if the RC should block indefinitely.
- * It's called from lnet_router_checker() as well as being passed to
- * wait_event_interruptible() to avoid the lost wake_up problem.
- *
- * When it's called from wait_event_interruptible() it is necessary to
- * also not sleep if the rc state is not running to avoid a deadlock
- * when the system is shutting down
- */
-static inline bool
-lnet_router_checker_active(void)
-{
-	if (the_lnet.ln_rc_state != LNET_RC_STATE_RUNNING)
-		return true;
-
-	/*
-	 * Router Checker thread needs to run when routing is enabled in
-	 * order to call lnet_update_ni_status_locked()
-	 */
-	if (the_lnet.ln_routing)
-		return true;
-
-	return !list_empty(&the_lnet.ln_routers) &&
-		(live_router_check_interval > 0 ||
-		 dead_router_check_interval > 0);
-}
-
-static int
-lnet_router_checker(void *arg)
-{
-	struct lnet_peer *rtr;
-	struct list_head *entry;
-
-	while (the_lnet.ln_rc_state == LNET_RC_STATE_RUNNING) {
-		__u64 version;
-		int cpt;
-		int cpt2;
-
-		cpt = lnet_net_lock_current();
-rescan:
-		version = the_lnet.ln_routers_version;
-
-		list_for_each(entry, &the_lnet.ln_routers) {
-			rtr = list_entry(entry, struct lnet_peer, lp_rtr_list);
-
-			cpt2 = lnet_cpt_of_nid_locked(rtr->lp_nid);
-			if (cpt != cpt2) {
-				lnet_net_unlock(cpt);
-				cpt = cpt2;
-				lnet_net_lock(cpt);
-				/* the routers list has changed */
-				if (version != the_lnet.ln_routers_version)
-					goto rescan;
-			}
-
-			lnet_ping_router_locked(rtr);
-
-			/* NB dropped lock */
-			if (version != the_lnet.ln_routers_version) {
-				/* the routers list has changed */
-				goto rescan;
-			}
-		}
-
-		if (the_lnet.ln_routing)
-			lnet_update_ni_status_locked();
-
-		lnet_net_unlock(cpt);
-
-		lnet_prune_rc_data(0); /* don't wait for UNLINK */
-
-		/*
-		 * Call schedule_timeout() here always adds 1 to load average
-		 * because kernel counts # active tasks as nr_running
-		 * + nr_uninterruptible.
-		 */
-		/*
-		 * if there are any routes then wakeup every second.  If
-		 * there are no routes then sleep indefinitely until woken
-		 * up by a user adding a route
-		 */
-		if (!lnet_router_checker_active())
-			wait_event_interruptible(the_lnet.ln_rc_waitq,
-						 lnet_router_checker_active());
-		else
-			wait_event_interruptible_timeout(the_lnet.ln_rc_waitq,
-							 false,
-							 HZ);
-	}
-
-	lnet_prune_rc_data(1); /* wait for UNLINK */
-
-	the_lnet.ln_rc_state = LNET_RC_STATE_SHUTDOWN;
-	complete(&the_lnet.ln_rc_signal);
-	/* The unlink event callback will signal final completion */
-	return 0;
-}
-
-void
-lnet_destroy_rtrbuf(struct lnet_rtrbuf *rb, int npages)
-{
-	while (--npages >= 0)
-		__free_page(rb->rb_kiov[npages].bv_page);
-
-	kfree(rb);
-}
-
-static struct lnet_rtrbuf *
-lnet_new_rtrbuf(struct lnet_rtrbufpool *rbp, int cpt)
-{
-	int npages = rbp->rbp_npages;
-	int sz = offsetof(struct lnet_rtrbuf, rb_kiov[npages]);
-	struct page *page;
-	struct lnet_rtrbuf *rb;
-	int i;
-
-	rb = kzalloc_cpt(sz, GFP_NOFS, cpt);
-	if (!rb)
-		return NULL;
-
-	rb->rb_pool = rbp;
-
-	for (i = 0; i < npages; i++) {
-		page = alloc_pages_node(
-				cfs_cpt_spread_node(lnet_cpt_table(), cpt),
-				GFP_KERNEL | __GFP_ZERO, 0);
-		if (!page) {
-			while (--i >= 0)
-				__free_page(rb->rb_kiov[i].bv_page);
-
-			kfree(rb);
-			return NULL;
-		}
-
-		rb->rb_kiov[i].bv_len = PAGE_SIZE;
-		rb->rb_kiov[i].bv_offset = 0;
-		rb->rb_kiov[i].bv_page = page;
-	}
-
-	return rb;
-}
-
-static void
-lnet_rtrpool_free_bufs(struct lnet_rtrbufpool *rbp, int cpt)
-{
-	int npages = rbp->rbp_npages;
-	struct list_head tmp;
-	struct lnet_rtrbuf *rb;
-	struct lnet_rtrbuf *temp;
-
-	if (!rbp->rbp_nbuffers) /* not initialized or already freed */
-		return;
-
-	INIT_LIST_HEAD(&tmp);
-
-	lnet_net_lock(cpt);
-	lnet_drop_routed_msgs_locked(&rbp->rbp_msgs, cpt);
-	list_splice_init(&rbp->rbp_bufs, &tmp);
-	rbp->rbp_req_nbuffers = 0;
-	rbp->rbp_nbuffers = 0;
-	rbp->rbp_credits = 0;
-	rbp->rbp_mincredits = 0;
-	lnet_net_unlock(cpt);
-
-	/* Free buffers on the free list. */
-	list_for_each_entry_safe(rb, temp, &tmp, rb_list) {
-		list_del(&rb->rb_list);
-		lnet_destroy_rtrbuf(rb, npages);
-	}
-}
-
-static int
-lnet_rtrpool_adjust_bufs(struct lnet_rtrbufpool *rbp, int nbufs, int cpt)
-{
-	struct list_head rb_list;
-	struct lnet_rtrbuf *rb;
-	int num_rb;
-	int num_buffers = 0;
-	int old_req_nbufs;
-	int npages = rbp->rbp_npages;
-
-	lnet_net_lock(cpt);
-	/*
-	 * If we are called for less buffers than already in the pool, we
-	 * just lower the req_nbuffers number and excess buffers will be
-	 * thrown away as they are returned to the free list.  Credits
-	 * then get adjusted as well.
-	 * If we already have enough buffers allocated to serve the
-	 * increase requested, then we can treat that the same way as we
-	 * do the decrease.
-	 */
-	num_rb = nbufs - rbp->rbp_nbuffers;
-	if (nbufs <= rbp->rbp_req_nbuffers || num_rb <= 0) {
-		rbp->rbp_req_nbuffers = nbufs;
-		lnet_net_unlock(cpt);
-		return 0;
-	}
-	/*
-	 * store the older value of rbp_req_nbuffers and then set it to
-	 * the new request to prevent lnet_return_rx_credits_locked() from
-	 * freeing buffers that we need to keep around
-	 */
-	old_req_nbufs = rbp->rbp_req_nbuffers;
-	rbp->rbp_req_nbuffers = nbufs;
-	lnet_net_unlock(cpt);
-
-	INIT_LIST_HEAD(&rb_list);
-
-	/*
-	 * allocate the buffers on a local list first.  If all buffers are
-	 * allocated successfully then join this list to the rbp buffer
-	 * list. If not then free all allocated buffers.
-	 */
-	while (num_rb-- > 0) {
-		rb = lnet_new_rtrbuf(rbp, cpt);
-		if (!rb) {
-			CERROR("Failed to allocate %d route bufs of %d pages\n",
-			       nbufs, npages);
-
-			lnet_net_lock(cpt);
-			rbp->rbp_req_nbuffers = old_req_nbufs;
-			lnet_net_unlock(cpt);
-
-			goto failed;
-		}
-
-		list_add(&rb->rb_list, &rb_list);
-		num_buffers++;
-	}
-
-	lnet_net_lock(cpt);
-
-	list_splice_tail(&rb_list, &rbp->rbp_bufs);
-	rbp->rbp_nbuffers += num_buffers;
-	rbp->rbp_credits += num_buffers;
-	rbp->rbp_mincredits = rbp->rbp_credits;
-	/*
-	 * We need to schedule blocked msg using the newly
-	 * added buffers.
-	 */
-	while (!list_empty(&rbp->rbp_bufs) &&
-	       !list_empty(&rbp->rbp_msgs))
-		lnet_schedule_blocked_locked(rbp);
-
-	lnet_net_unlock(cpt);
-
-	return 0;
-
-failed:
-	while (!list_empty(&rb_list)) {
-		rb = list_entry(rb_list.next, struct lnet_rtrbuf, rb_list);
-		list_del(&rb->rb_list);
-		lnet_destroy_rtrbuf(rb, npages);
-	}
-
-	return -ENOMEM;
-}
-
-static void
-lnet_rtrpool_init(struct lnet_rtrbufpool *rbp, int npages)
-{
-	INIT_LIST_HEAD(&rbp->rbp_msgs);
-	INIT_LIST_HEAD(&rbp->rbp_bufs);
-
-	rbp->rbp_npages = npages;
-	rbp->rbp_credits = 0;
-	rbp->rbp_mincredits = 0;
-}
-
-void
-lnet_rtrpools_free(int keep_pools)
-{
-	struct lnet_rtrbufpool *rtrp;
-	int i;
-
-	if (!the_lnet.ln_rtrpools) /* uninitialized or freed */
-		return;
-
-	cfs_percpt_for_each(rtrp, i, the_lnet.ln_rtrpools) {
-		lnet_rtrpool_free_bufs(&rtrp[LNET_TINY_BUF_IDX], i);
-		lnet_rtrpool_free_bufs(&rtrp[LNET_SMALL_BUF_IDX], i);
-		lnet_rtrpool_free_bufs(&rtrp[LNET_LARGE_BUF_IDX], i);
-	}
-
-	if (!keep_pools) {
-		cfs_percpt_free(the_lnet.ln_rtrpools);
-		the_lnet.ln_rtrpools = NULL;
-	}
-}
-
-static int
-lnet_nrb_tiny_calculate(void)
-{
-	int nrbs = LNET_NRB_TINY;
-
-	if (tiny_router_buffers < 0) {
-		LCONSOLE_ERROR_MSG(0x10c,
-				   "tiny_router_buffers=%d invalid when routing enabled\n",
-				   tiny_router_buffers);
-		return -EINVAL;
-	}
-
-	if (tiny_router_buffers > 0)
-		nrbs = tiny_router_buffers;
-
-	nrbs /= LNET_CPT_NUMBER;
-	return max(nrbs, LNET_NRB_TINY_MIN);
-}
-
-static int
-lnet_nrb_small_calculate(void)
-{
-	int nrbs = LNET_NRB_SMALL;
-
-	if (small_router_buffers < 0) {
-		LCONSOLE_ERROR_MSG(0x10c,
-				   "small_router_buffers=%d invalid when routing enabled\n",
-				   small_router_buffers);
-		return -EINVAL;
-	}
-
-	if (small_router_buffers > 0)
-		nrbs = small_router_buffers;
-
-	nrbs /= LNET_CPT_NUMBER;
-	return max(nrbs, LNET_NRB_SMALL_MIN);
-}
-
-static int
-lnet_nrb_large_calculate(void)
-{
-	int nrbs = LNET_NRB_LARGE;
-
-	if (large_router_buffers < 0) {
-		LCONSOLE_ERROR_MSG(0x10c,
-				   "large_router_buffers=%d invalid when routing enabled\n",
-				   large_router_buffers);
-		return -EINVAL;
-	}
-
-	if (large_router_buffers > 0)
-		nrbs = large_router_buffers;
-
-	nrbs /= LNET_CPT_NUMBER;
-	return max(nrbs, LNET_NRB_LARGE_MIN);
-}
-
-int
-lnet_rtrpools_alloc(int im_a_router)
-{
-	struct lnet_rtrbufpool *rtrp;
-	int nrb_tiny;
-	int nrb_small;
-	int nrb_large;
-	int rc;
-	int i;
-
-	if (!strcmp(forwarding, "")) {
-		/* not set either way */
-		if (!im_a_router)
-			return 0;
-	} else if (!strcmp(forwarding, "disabled")) {
-		/* explicitly disabled */
-		return 0;
-	} else if (!strcmp(forwarding, "enabled")) {
-		/* explicitly enabled */
-	} else {
-		LCONSOLE_ERROR_MSG(0x10b, "'forwarding' not set to either 'enabled' or 'disabled'\n");
-		return -EINVAL;
-	}
-
-	nrb_tiny = lnet_nrb_tiny_calculate();
-	if (nrb_tiny < 0)
-		return -EINVAL;
-
-	nrb_small = lnet_nrb_small_calculate();
-	if (nrb_small < 0)
-		return -EINVAL;
-
-	nrb_large = lnet_nrb_large_calculate();
-	if (nrb_large < 0)
-		return -EINVAL;
-
-	the_lnet.ln_rtrpools = cfs_percpt_alloc(lnet_cpt_table(),
-						LNET_NRBPOOLS *
-						sizeof(struct lnet_rtrbufpool));
-	if (!the_lnet.ln_rtrpools) {
-		LCONSOLE_ERROR_MSG(0x10c,
-				   "Failed to initialize router buffe pool\n");
-		return -ENOMEM;
-	}
-
-	cfs_percpt_for_each(rtrp, i, the_lnet.ln_rtrpools) {
-		lnet_rtrpool_init(&rtrp[LNET_TINY_BUF_IDX], 0);
-		rc = lnet_rtrpool_adjust_bufs(&rtrp[LNET_TINY_BUF_IDX],
-					      nrb_tiny, i);
-		if (rc)
-			goto failed;
-
-		lnet_rtrpool_init(&rtrp[LNET_SMALL_BUF_IDX],
-				  LNET_NRB_SMALL_PAGES);
-		rc = lnet_rtrpool_adjust_bufs(&rtrp[LNET_SMALL_BUF_IDX],
-					      nrb_small, i);
-		if (rc)
-			goto failed;
-
-		lnet_rtrpool_init(&rtrp[LNET_LARGE_BUF_IDX],
-				  LNET_NRB_LARGE_PAGES);
-		rc = lnet_rtrpool_adjust_bufs(&rtrp[LNET_LARGE_BUF_IDX],
-					      nrb_large, i);
-		if (rc)
-			goto failed;
-	}
-
-	lnet_net_lock(LNET_LOCK_EX);
-	the_lnet.ln_routing = 1;
-	lnet_net_unlock(LNET_LOCK_EX);
-
-	return 0;
-
- failed:
-	lnet_rtrpools_free(0);
-	return rc;
-}
-
-static int
-lnet_rtrpools_adjust_helper(int tiny, int small, int large)
-{
-	int nrb = 0;
-	int rc = 0;
-	int i;
-	struct lnet_rtrbufpool *rtrp;
-
-	/*
-	 * If the provided values for each buffer pool are different than the
-	 * configured values, we need to take action.
-	 */
-	if (tiny >= 0) {
-		tiny_router_buffers = tiny;
-		nrb = lnet_nrb_tiny_calculate();
-		cfs_percpt_for_each(rtrp, i, the_lnet.ln_rtrpools) {
-			rc = lnet_rtrpool_adjust_bufs(&rtrp[LNET_TINY_BUF_IDX],
-						      nrb, i);
-			if (rc)
-				return rc;
-		}
-	}
-	if (small >= 0) {
-		small_router_buffers = small;
-		nrb = lnet_nrb_small_calculate();
-		cfs_percpt_for_each(rtrp, i, the_lnet.ln_rtrpools) {
-			rc = lnet_rtrpool_adjust_bufs(&rtrp[LNET_SMALL_BUF_IDX],
-						      nrb, i);
-			if (rc)
-				return rc;
-		}
-	}
-	if (large >= 0) {
-		large_router_buffers = large;
-		nrb = lnet_nrb_large_calculate();
-		cfs_percpt_for_each(rtrp, i, the_lnet.ln_rtrpools) {
-			rc = lnet_rtrpool_adjust_bufs(&rtrp[LNET_LARGE_BUF_IDX],
-						      nrb, i);
-			if (rc)
-				return rc;
-		}
-	}
-
-	return 0;
-}
-
-int
-lnet_rtrpools_adjust(int tiny, int small, int large)
-{
-	/*
-	 * this function doesn't revert the changes if adding new buffers
-	 * failed.  It's up to the user space caller to revert the
-	 * changes.
-	 */
-	if (!the_lnet.ln_routing)
-		return 0;
-
-	return lnet_rtrpools_adjust_helper(tiny, small, large);
-}
-
-int
-lnet_rtrpools_enable(void)
-{
-	int rc = 0;
-
-	if (the_lnet.ln_routing)
-		return 0;
-
-	if (!the_lnet.ln_rtrpools)
-		/*
-		 * If routing is turned off, and we have never
-		 * initialized the pools before, just call the
-		 * standard buffer pool allocation routine as
-		 * if we are just configuring this for the first
-		 * time.
-		 */
-		rc = lnet_rtrpools_alloc(1);
-	else
-		rc = lnet_rtrpools_adjust_helper(0, 0, 0);
-	if (rc)
-		return rc;
-
-	lnet_net_lock(LNET_LOCK_EX);
-	the_lnet.ln_routing = 1;
-
-	the_lnet.ln_ping_info->pi_features &= ~LNET_PING_FEAT_RTE_DISABLED;
-	lnet_net_unlock(LNET_LOCK_EX);
-
-	return rc;
-}
-
-void
-lnet_rtrpools_disable(void)
-{
-	if (!the_lnet.ln_routing)
-		return;
-
-	lnet_net_lock(LNET_LOCK_EX);
-	the_lnet.ln_routing = 0;
-	the_lnet.ln_ping_info->pi_features |= LNET_PING_FEAT_RTE_DISABLED;
-
-	tiny_router_buffers = 0;
-	small_router_buffers = 0;
-	large_router_buffers = 0;
-	lnet_net_unlock(LNET_LOCK_EX);
-	lnet_rtrpools_free(1);
-}
-
-int
-lnet_notify(struct lnet_ni *ni, lnet_nid_t nid, int alive, unsigned long when)
-{
-	struct lnet_peer *lp = NULL;
-	unsigned long now = jiffies;
-	int cpt = lnet_cpt_of_nid(nid);
-
-	LASSERT(!in_interrupt());
-
-	CDEBUG(D_NET, "%s notifying %s: %s\n",
-	       !ni ? "userspace" : libcfs_nid2str(ni->ni_nid),
-	       libcfs_nid2str(nid),
-	       alive ? "up" : "down");
-
-	if (ni &&
-	    LNET_NIDNET(ni->ni_nid) != LNET_NIDNET(nid)) {
-		CWARN("Ignoring notification of %s %s by %s (different net)\n",
-		      libcfs_nid2str(nid), alive ? "birth" : "death",
-		      libcfs_nid2str(ni->ni_nid));
-		return -EINVAL;
-	}
-
-	/* can't do predictions... */
-	if (time_after(when, now)) {
-		CWARN("Ignoring prediction from %s of %s %s %ld seconds in the future\n",
-		      !ni ? "userspace" : libcfs_nid2str(ni->ni_nid),
-		      libcfs_nid2str(nid), alive ? "up" : "down",
-		      (when - now) / HZ);
-		return -EINVAL;
-	}
-
-	if (ni && !alive &&	     /* LND telling me she's down */
-	    !auto_down) {		       /* auto-down disabled */
-		CDEBUG(D_NET, "Auto-down disabled\n");
-		return 0;
-	}
-
-	lnet_net_lock(cpt);
-
-	if (the_lnet.ln_shutdown) {
-		lnet_net_unlock(cpt);
-		return -ESHUTDOWN;
-	}
-
-	lp = lnet_find_peer_locked(the_lnet.ln_peer_tables[cpt], nid);
-	if (!lp) {
-		/* nid not found */
-		lnet_net_unlock(cpt);
-		CDEBUG(D_NET, "%s not found\n", libcfs_nid2str(nid));
-		return 0;
-	}
-
-	/*
-	 * We can't fully trust LND on reporting exact peer last_alive
-	 * if he notifies us about dead peer. For example ksocklnd can
-	 * call us with when == _time_when_the_node_was_booted_ if
-	 * no connections were successfully established
-	 */
-	if (ni && !alive && when < lp->lp_last_alive)
-		when = lp->lp_last_alive;
-
-	lnet_notify_locked(lp, !ni, alive, when);
-
-	if (ni)
-		lnet_ni_notify_locked(ni, lp);
-
-	lnet_peer_decref_locked(lp);
-
-	lnet_net_unlock(cpt);
-	return 0;
-}
-EXPORT_SYMBOL(lnet_notify);

+ 0 - 907
drivers/staging/lustre/lnet/lnet/router_proc.c

@@ -1,907 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- *
- *   This file is part of Portals
- *   http://sourceforge.net/projects/sandiaportals/
- *
- *   Portals is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
- *
- *   Portals is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include <linux/lnet/lib-lnet.h>
-
-/*
- * This is really lnet_proc.c. You might need to update sanity test 215
- * if any file format is changed.
- */
-
-#define LNET_LOFFT_BITS		(sizeof(loff_t) * 8)
-/*
- * NB: max allowed LNET_CPT_BITS is 8 on 64-bit system and 2 on 32-bit system
- */
-#define LNET_PROC_CPT_BITS	(LNET_CPT_BITS + 1)
-/* change version, 16 bits or 8 bits */
-#define LNET_PROC_VER_BITS	max_t(size_t, min_t(size_t, LNET_LOFFT_BITS, 64) / 4, 8)
-
-#define LNET_PROC_HASH_BITS	LNET_PEER_HASH_BITS
-/*
- * bits for peer hash offset
- * NB: we don't use the highest bit of *ppos because it's signed
- */
-#define LNET_PROC_HOFF_BITS	(LNET_LOFFT_BITS -       \
-				 LNET_PROC_CPT_BITS -    \
-				 LNET_PROC_VER_BITS -    \
-				 LNET_PROC_HASH_BITS - 1)
-/* bits for hash index + position */
-#define LNET_PROC_HPOS_BITS	(LNET_PROC_HASH_BITS + LNET_PROC_HOFF_BITS)
-/* bits for peer hash table + hash version */
-#define LNET_PROC_VPOS_BITS	(LNET_PROC_HPOS_BITS + LNET_PROC_VER_BITS)
-
-#define LNET_PROC_CPT_MASK	((1ULL << LNET_PROC_CPT_BITS) - 1)
-#define LNET_PROC_VER_MASK	((1ULL << LNET_PROC_VER_BITS) - 1)
-#define LNET_PROC_HASH_MASK	((1ULL << LNET_PROC_HASH_BITS) - 1)
-#define LNET_PROC_HOFF_MASK	((1ULL << LNET_PROC_HOFF_BITS) - 1)
-
-#define LNET_PROC_CPT_GET(pos)				\
-	(int)(((pos) >> LNET_PROC_VPOS_BITS) & LNET_PROC_CPT_MASK)
-
-#define LNET_PROC_VER_GET(pos)				\
-	(int)(((pos) >> LNET_PROC_HPOS_BITS) & LNET_PROC_VER_MASK)
-
-#define LNET_PROC_HASH_GET(pos)				\
-	(int)(((pos) >> LNET_PROC_HOFF_BITS) & LNET_PROC_HASH_MASK)
-
-#define LNET_PROC_HOFF_GET(pos)				\
-	(int)((pos) & LNET_PROC_HOFF_MASK)
-
-#define LNET_PROC_POS_MAKE(cpt, ver, hash, off)		\
-	(((((loff_t)(cpt)) & LNET_PROC_CPT_MASK) << LNET_PROC_VPOS_BITS) |   \
-	((((loff_t)(ver)) & LNET_PROC_VER_MASK) << LNET_PROC_HPOS_BITS) |   \
-	((((loff_t)(hash)) & LNET_PROC_HASH_MASK) << LNET_PROC_HOFF_BITS) | \
-	((off) & LNET_PROC_HOFF_MASK))
-
-#define LNET_PROC_VERSION(v)	((unsigned int)((v) & LNET_PROC_VER_MASK))
-
-static int __proc_lnet_stats(void *data, int write,
-			     loff_t pos, void __user *buffer, int nob)
-{
-	int rc;
-	struct lnet_counters *ctrs;
-	int len;
-	char *tmpstr;
-	const int tmpsiz = 256; /* 7 %u and 4 %llu */
-
-	if (write) {
-		lnet_counters_reset();
-		return 0;
-	}
-
-	/* read */
-
-	ctrs = kzalloc(sizeof(*ctrs), GFP_NOFS);
-	if (!ctrs)
-		return -ENOMEM;
-
-	tmpstr = kmalloc(tmpsiz, GFP_KERNEL);
-	if (!tmpstr) {
-		kfree(ctrs);
-		return -ENOMEM;
-	}
-
-	lnet_counters_get(ctrs);
-
-	len = snprintf(tmpstr, tmpsiz,
-		       "%u %u %u %u %u %u %u %llu %llu %llu %llu",
-		       ctrs->msgs_alloc, ctrs->msgs_max,
-		       ctrs->errors,
-		       ctrs->send_count, ctrs->recv_count,
-		       ctrs->route_count, ctrs->drop_count,
-		       ctrs->send_length, ctrs->recv_length,
-		       ctrs->route_length, ctrs->drop_length);
-
-	if (pos >= min_t(int, len, strlen(tmpstr)))
-		rc = 0;
-	else
-		rc = cfs_trace_copyout_string(buffer, nob,
-					      tmpstr + pos, "\n");
-
-	kfree(tmpstr);
-	kfree(ctrs);
-	return rc;
-}
-
-static int proc_lnet_stats(struct ctl_table *table, int write,
-			   void __user *buffer, size_t *lenp, loff_t *ppos)
-{
-	return lprocfs_call_handler(table->data, write, ppos, buffer, lenp,
-				    __proc_lnet_stats);
-}
-
-static int proc_lnet_routes(struct ctl_table *table, int write,
-			    void __user *buffer, size_t *lenp, loff_t *ppos)
-{
-	const int tmpsiz = 256;
-	char *tmpstr;
-	char *s;
-	int rc = 0;
-	int len;
-	int ver;
-	int off;
-
-	BUILD_BUG_ON(sizeof(loff_t) < 4);
-
-	off = LNET_PROC_HOFF_GET(*ppos);
-	ver = LNET_PROC_VER_GET(*ppos);
-
-	LASSERT(!write);
-
-	if (!*lenp)
-		return 0;
-
-	tmpstr = kmalloc(tmpsiz, GFP_KERNEL);
-	if (!tmpstr)
-		return -ENOMEM;
-
-	s = tmpstr; /* points to current position in tmpstr[] */
-
-	if (!*ppos) {
-		s += snprintf(s, tmpstr + tmpsiz - s, "Routing %s\n",
-			      the_lnet.ln_routing ? "enabled" : "disabled");
-		LASSERT(tmpstr + tmpsiz - s > 0);
-
-		s += snprintf(s, tmpstr + tmpsiz - s, "%-8s %4s %8s %7s %s\n",
-			      "net", "hops", "priority", "state", "router");
-		LASSERT(tmpstr + tmpsiz - s > 0);
-
-		lnet_net_lock(0);
-		ver = (unsigned int)the_lnet.ln_remote_nets_version;
-		lnet_net_unlock(0);
-		*ppos = LNET_PROC_POS_MAKE(0, ver, 0, off);
-	} else {
-		struct list_head *n;
-		struct list_head *r;
-		struct lnet_route *route = NULL;
-		struct lnet_remotenet *rnet  = NULL;
-		int skip  = off - 1;
-		struct list_head *rn_list;
-		int i;
-
-		lnet_net_lock(0);
-
-		if (ver != LNET_PROC_VERSION(the_lnet.ln_remote_nets_version)) {
-			lnet_net_unlock(0);
-			kfree(tmpstr);
-			return -ESTALE;
-		}
-
-		for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE && !route; i++) {
-			rn_list = &the_lnet.ln_remote_nets_hash[i];
-
-			n = rn_list->next;
-
-			while (n != rn_list && !route) {
-				rnet = list_entry(n, struct lnet_remotenet,
-						  lrn_list);
-
-				r = rnet->lrn_routes.next;
-
-				while (r != &rnet->lrn_routes) {
-					struct lnet_route *re;
-
-					re = list_entry(r, struct lnet_route,
-							lr_list);
-					if (!skip) {
-						route = re;
-						break;
-					}
-
-					skip--;
-					r = r->next;
-				}
-
-				n = n->next;
-			}
-		}
-
-		if (route) {
-			__u32 net = rnet->lrn_net;
-			__u32 hops = route->lr_hops;
-			unsigned int priority = route->lr_priority;
-			lnet_nid_t nid = route->lr_gateway->lp_nid;
-			int alive = lnet_is_route_alive(route);
-
-			s += snprintf(s, tmpstr + tmpsiz - s,
-				      "%-8s %4u %8u %7s %s\n",
-				      libcfs_net2str(net), hops,
-				      priority,
-				      alive ? "up" : "down",
-				      libcfs_nid2str(nid));
-			LASSERT(tmpstr + tmpsiz - s > 0);
-		}
-
-		lnet_net_unlock(0);
-	}
-
-	len = s - tmpstr;     /* how many bytes was written */
-
-	if (len > *lenp) {    /* linux-supplied buffer is too small */
-		rc = -EINVAL;
-	} else if (len > 0) { /* wrote something */
-		if (copy_to_user(buffer, tmpstr, len)) {
-			rc = -EFAULT;
-		} else {
-			off += 1;
-			*ppos = LNET_PROC_POS_MAKE(0, ver, 0, off);
-		}
-	}
-
-	kfree(tmpstr);
-
-	if (!rc)
-		*lenp = len;
-
-	return rc;
-}
-
-static int proc_lnet_routers(struct ctl_table *table, int write,
-			     void __user *buffer, size_t *lenp, loff_t *ppos)
-{
-	int rc = 0;
-	char *tmpstr;
-	char *s;
-	const int tmpsiz = 256;
-	int len;
-	int ver;
-	int off;
-
-	off = LNET_PROC_HOFF_GET(*ppos);
-	ver = LNET_PROC_VER_GET(*ppos);
-
-	LASSERT(!write);
-
-	if (!*lenp)
-		return 0;
-
-	tmpstr = kmalloc(tmpsiz, GFP_KERNEL);
-	if (!tmpstr)
-		return -ENOMEM;
-
-	s = tmpstr; /* points to current position in tmpstr[] */
-
-	if (!*ppos) {
-		s += snprintf(s, tmpstr + tmpsiz - s,
-			      "%-4s %7s %9s %6s %12s %9s %8s %7s %s\n",
-			      "ref", "rtr_ref", "alive_cnt", "state",
-			      "last_ping", "ping_sent", "deadline",
-			      "down_ni", "router");
-		LASSERT(tmpstr + tmpsiz - s > 0);
-
-		lnet_net_lock(0);
-		ver = (unsigned int)the_lnet.ln_routers_version;
-		lnet_net_unlock(0);
-		*ppos = LNET_PROC_POS_MAKE(0, ver, 0, off);
-	} else {
-		struct list_head *r;
-		struct lnet_peer *peer = NULL;
-		int skip = off - 1;
-
-		lnet_net_lock(0);
-
-		if (ver != LNET_PROC_VERSION(the_lnet.ln_routers_version)) {
-			lnet_net_unlock(0);
-
-			kfree(tmpstr);
-			return -ESTALE;
-		}
-
-		r = the_lnet.ln_routers.next;
-
-		while (r != &the_lnet.ln_routers) {
-			struct lnet_peer *lp;
-
-			lp = list_entry(r, struct lnet_peer, lp_rtr_list);
-			if (!skip) {
-				peer = lp;
-				break;
-			}
-
-			skip--;
-			r = r->next;
-		}
-
-		if (peer) {
-			lnet_nid_t nid = peer->lp_nid;
-			unsigned long now = jiffies;
-			unsigned long deadline = peer->lp_ping_deadline;
-			int nrefs = peer->lp_refcount;
-			int nrtrrefs = peer->lp_rtr_refcount;
-			int alive_cnt = peer->lp_alive_count;
-			int alive = peer->lp_alive;
-			int pingsent = !peer->lp_ping_notsent;
-			int last_ping = (now - peer->lp_ping_timestamp) / HZ;
-			int down_ni = 0;
-			struct lnet_route *rtr;
-
-			if ((peer->lp_ping_feats &
-			     LNET_PING_FEAT_NI_STATUS)) {
-				list_for_each_entry(rtr, &peer->lp_routes,
-						    lr_gwlist) {
-					/*
-					 * downis on any route should be the
-					 * number of downis on the gateway
-					 */
-					if (rtr->lr_downis) {
-						down_ni = rtr->lr_downis;
-						break;
-					}
-				}
-			}
-
-			if (!deadline)
-				s += snprintf(s, tmpstr + tmpsiz - s,
-					      "%-4d %7d %9d %6s %12d %9d %8s %7d %s\n",
-					      nrefs, nrtrrefs, alive_cnt,
-					      alive ? "up" : "down", last_ping,
-					      pingsent, "NA", down_ni,
-					      libcfs_nid2str(nid));
-			else
-				s += snprintf(s, tmpstr + tmpsiz - s,
-					      "%-4d %7d %9d %6s %12d %9d %8lu %7d %s\n",
-					      nrefs, nrtrrefs, alive_cnt,
-					      alive ? "up" : "down", last_ping,
-					      pingsent,
-					      (deadline - now) / HZ,
-					      down_ni, libcfs_nid2str(nid));
-			LASSERT(tmpstr + tmpsiz - s > 0);
-		}
-
-		lnet_net_unlock(0);
-	}
-
-	len = s - tmpstr;     /* how many bytes was written */
-
-	if (len > *lenp) {    /* linux-supplied buffer is too small */
-		rc = -EINVAL;
-	} else if (len > 0) { /* wrote something */
-		if (copy_to_user(buffer, tmpstr, len)) {
-			rc = -EFAULT;
-		} else {
-			off += 1;
-			*ppos = LNET_PROC_POS_MAKE(0, ver, 0, off);
-		}
-	}
-
-	kfree(tmpstr);
-
-	if (!rc)
-		*lenp = len;
-
-	return rc;
-}
-
-static int proc_lnet_peers(struct ctl_table *table, int write,
-			   void __user *buffer, size_t *lenp, loff_t *ppos)
-{
-	const int tmpsiz  = 256;
-	struct lnet_peer_table *ptable;
-	char *tmpstr;
-	char *s;
-	int cpt  = LNET_PROC_CPT_GET(*ppos);
-	int ver  = LNET_PROC_VER_GET(*ppos);
-	int hash = LNET_PROC_HASH_GET(*ppos);
-	int hoff = LNET_PROC_HOFF_GET(*ppos);
-	int rc = 0;
-	int len;
-
-	BUILD_BUG_ON(LNET_PROC_HASH_BITS < LNET_PEER_HASH_BITS);
-	LASSERT(!write);
-
-	if (!*lenp)
-		return 0;
-
-	if (cpt >= LNET_CPT_NUMBER) {
-		*lenp = 0;
-		return 0;
-	}
-
-	tmpstr = kmalloc(tmpsiz, GFP_KERNEL);
-	if (!tmpstr)
-		return -ENOMEM;
-
-	s = tmpstr; /* points to current position in tmpstr[] */
-
-	if (!*ppos) {
-		s += snprintf(s, tmpstr + tmpsiz - s,
-			      "%-24s %4s %5s %5s %5s %5s %5s %5s %5s %s\n",
-			      "nid", "refs", "state", "last", "max",
-			      "rtr", "min", "tx", "min", "queue");
-		LASSERT(tmpstr + tmpsiz - s > 0);
-
-		hoff++;
-	} else {
-		struct lnet_peer *peer;
-		struct list_head *p;
-		int skip;
- again:
-		p = NULL;
-		peer = NULL;
-		skip = hoff - 1;
-
-		lnet_net_lock(cpt);
-		ptable = the_lnet.ln_peer_tables[cpt];
-		if (hoff == 1)
-			ver = LNET_PROC_VERSION(ptable->pt_version);
-
-		if (ver != LNET_PROC_VERSION(ptable->pt_version)) {
-			lnet_net_unlock(cpt);
-			kfree(tmpstr);
-			return -ESTALE;
-		}
-
-		while (hash < LNET_PEER_HASH_SIZE) {
-			if (!p)
-				p = ptable->pt_hash[hash].next;
-
-			while (p != &ptable->pt_hash[hash]) {
-				struct lnet_peer *lp;
-
-				lp = list_entry(p, struct lnet_peer,
-						lp_hashlist);
-				if (!skip) {
-					peer = lp;
-
-					/*
-					 * minor optimization: start from idx+1
-					 * on next iteration if we've just
-					 * drained lp_hashlist
-					 */
-					if (lp->lp_hashlist.next ==
-					    &ptable->pt_hash[hash]) {
-						hoff = 1;
-						hash++;
-					} else {
-						hoff++;
-					}
-
-					break;
-				}
-
-				skip--;
-				p = lp->lp_hashlist.next;
-			}
-
-			if (peer)
-				break;
-
-			p = NULL;
-			hoff = 1;
-			hash++;
-		}
-
-		if (peer) {
-			lnet_nid_t nid = peer->lp_nid;
-			int nrefs = peer->lp_refcount;
-			int lastalive = -1;
-			char *aliveness = "NA";
-			int maxcr = peer->lp_ni->ni_peertxcredits;
-			int txcr = peer->lp_txcredits;
-			int mintxcr = peer->lp_mintxcredits;
-			int rtrcr = peer->lp_rtrcredits;
-			int minrtrcr = peer->lp_minrtrcredits;
-			int txqnob = peer->lp_txqnob;
-
-			if (lnet_isrouter(peer) ||
-			    lnet_peer_aliveness_enabled(peer))
-				aliveness = peer->lp_alive ? "up" : "down";
-
-			if (lnet_peer_aliveness_enabled(peer)) {
-				unsigned long now = jiffies;
-				long delta;
-
-				delta = now - peer->lp_last_alive;
-				lastalive = (delta) / HZ;
-
-				/* No need to mess up peers contents with
-				 * arbitrarily long integers - it suffices to
-				 * know that lastalive is more than 10000s old
-				 */
-				if (lastalive >= 10000)
-					lastalive = 9999;
-			}
-
-			lnet_net_unlock(cpt);
-
-			s += snprintf(s, tmpstr + tmpsiz - s,
-				      "%-24s %4d %5s %5d %5d %5d %5d %5d %5d %d\n",
-				      libcfs_nid2str(nid), nrefs, aliveness,
-				      lastalive, maxcr, rtrcr, minrtrcr, txcr,
-				      mintxcr, txqnob);
-			LASSERT(tmpstr + tmpsiz - s > 0);
-
-		} else { /* peer is NULL */
-			lnet_net_unlock(cpt);
-		}
-
-		if (hash == LNET_PEER_HASH_SIZE) {
-			cpt++;
-			hash = 0;
-			hoff = 1;
-			if (!peer && cpt < LNET_CPT_NUMBER)
-				goto again;
-		}
-	}
-
-	len = s - tmpstr;     /* how many bytes was written */
-
-	if (len > *lenp) {    /* linux-supplied buffer is too small */
-		rc = -EINVAL;
-	} else if (len > 0) { /* wrote something */
-		if (copy_to_user(buffer, tmpstr, len))
-			rc = -EFAULT;
-		else
-			*ppos = LNET_PROC_POS_MAKE(cpt, ver, hash, hoff);
-	}
-
-	kfree(tmpstr);
-
-	if (!rc)
-		*lenp = len;
-
-	return rc;
-}
-
-static int __proc_lnet_buffers(void *data, int write,
-			       loff_t pos, void __user *buffer, int nob)
-{
-	char *s;
-	char *tmpstr;
-	int tmpsiz;
-	int idx;
-	int len;
-	int rc;
-	int i;
-
-	LASSERT(!write);
-
-	/* (4 %d) * 4 * LNET_CPT_NUMBER */
-	tmpsiz = 64 * (LNET_NRBPOOLS + 1) * LNET_CPT_NUMBER;
-	tmpstr = kvmalloc(tmpsiz, GFP_KERNEL);
-	if (!tmpstr)
-		return -ENOMEM;
-
-	s = tmpstr; /* points to current position in tmpstr[] */
-
-	s += snprintf(s, tmpstr + tmpsiz - s,
-		      "%5s %5s %7s %7s\n",
-		      "pages", "count", "credits", "min");
-	LASSERT(tmpstr + tmpsiz - s > 0);
-
-	if (!the_lnet.ln_rtrpools)
-		goto out; /* I'm not a router */
-
-	for (idx = 0; idx < LNET_NRBPOOLS; idx++) {
-		struct lnet_rtrbufpool *rbp;
-
-		lnet_net_lock(LNET_LOCK_EX);
-		cfs_percpt_for_each(rbp, i, the_lnet.ln_rtrpools) {
-			s += snprintf(s, tmpstr + tmpsiz - s,
-				      "%5d %5d %7d %7d\n",
-				      rbp[idx].rbp_npages,
-				      rbp[idx].rbp_nbuffers,
-				      rbp[idx].rbp_credits,
-				      rbp[idx].rbp_mincredits);
-			LASSERT(tmpstr + tmpsiz - s > 0);
-		}
-		lnet_net_unlock(LNET_LOCK_EX);
-	}
-
- out:
-	len = s - tmpstr;
-
-	if (pos >= min_t(int, len, strlen(tmpstr)))
-		rc = 0;
-	else
-		rc = cfs_trace_copyout_string(buffer, nob,
-					      tmpstr + pos, NULL);
-
-	kvfree(tmpstr);
-	return rc;
-}
-
-static int proc_lnet_buffers(struct ctl_table *table, int write,
-			     void __user *buffer, size_t *lenp, loff_t *ppos)
-{
-	return lprocfs_call_handler(table->data, write, ppos, buffer, lenp,
-				    __proc_lnet_buffers);
-}
-
-static int proc_lnet_nis(struct ctl_table *table, int write,
-			 void __user *buffer, size_t *lenp, loff_t *ppos)
-{
-	int tmpsiz = 128 * LNET_CPT_NUMBER;
-	int rc = 0;
-	char *tmpstr;
-	char *s;
-	int len;
-
-	LASSERT(!write);
-
-	if (!*lenp)
-		return 0;
-
-	tmpstr = kvmalloc(tmpsiz, GFP_KERNEL);
-	if (!tmpstr)
-		return -ENOMEM;
-
-	s = tmpstr; /* points to current position in tmpstr[] */
-
-	if (!*ppos) {
-		s += snprintf(s, tmpstr + tmpsiz - s,
-			      "%-24s %6s %5s %4s %4s %4s %5s %5s %5s\n",
-			      "nid", "status", "alive", "refs", "peer",
-			      "rtr", "max", "tx", "min");
-		LASSERT(tmpstr + tmpsiz - s > 0);
-	} else {
-		struct list_head *n;
-		struct lnet_ni *ni = NULL;
-		int skip = *ppos - 1;
-
-		lnet_net_lock(0);
-
-		n = the_lnet.ln_nis.next;
-
-		while (n != &the_lnet.ln_nis) {
-			struct lnet_ni *a_ni;
-
-			a_ni = list_entry(n, struct lnet_ni, ni_list);
-			if (!skip) {
-				ni = a_ni;
-				break;
-			}
-
-			skip--;
-			n = n->next;
-		}
-
-		if (ni) {
-			struct lnet_tx_queue *tq;
-			char *stat;
-			time64_t now = ktime_get_real_seconds();
-			int last_alive = -1;
-			int i;
-			int j;
-
-			if (the_lnet.ln_routing)
-				last_alive = now - ni->ni_last_alive;
-
-			/* @lo forever alive */
-			if (ni->ni_lnd->lnd_type == LOLND)
-				last_alive = 0;
-
-			lnet_ni_lock(ni);
-			LASSERT(ni->ni_status);
-			stat = (ni->ni_status->ns_status ==
-				LNET_NI_STATUS_UP) ? "up" : "down";
-			lnet_ni_unlock(ni);
-
-			/*
-			 * we actually output credits information for
-			 * TX queue of each partition
-			 */
-			cfs_percpt_for_each(tq, i, ni->ni_tx_queues) {
-				for (j = 0; ni->ni_cpts &&
-				     j < ni->ni_ncpts; j++) {
-					if (i == ni->ni_cpts[j])
-						break;
-				}
-
-				if (j == ni->ni_ncpts)
-					continue;
-
-				if (i)
-					lnet_net_lock(i);
-
-				s += snprintf(s, tmpstr + tmpsiz - s,
-					      "%-24s %6s %5d %4d %4d %4d %5d %5d %5d\n",
-					      libcfs_nid2str(ni->ni_nid), stat,
-					      last_alive, *ni->ni_refs[i],
-					      ni->ni_peertxcredits,
-					      ni->ni_peerrtrcredits,
-					      tq->tq_credits_max,
-					      tq->tq_credits,
-					      tq->tq_credits_min);
-				if (i)
-					lnet_net_unlock(i);
-			}
-			LASSERT(tmpstr + tmpsiz - s > 0);
-		}
-
-		lnet_net_unlock(0);
-	}
-
-	len = s - tmpstr;     /* how many bytes was written */
-
-	if (len > *lenp) {    /* linux-supplied buffer is too small */
-		rc = -EINVAL;
-	} else if (len > 0) { /* wrote something */
-		if (copy_to_user(buffer, tmpstr, len))
-			rc = -EFAULT;
-		else
-			*ppos += 1;
-	}
-
-	kvfree(tmpstr);
-
-	if (!rc)
-		*lenp = len;
-
-	return rc;
-}
-
-struct lnet_portal_rotors {
-	int pr_value;
-	const char *pr_name;
-	const char *pr_desc;
-};
-
-static struct lnet_portal_rotors	portal_rotors[] = {
-	{
-		.pr_value = LNET_PTL_ROTOR_OFF,
-		.pr_name  = "OFF",
-		.pr_desc  = "Turn off message rotor for wildcard portals"
-	},
-	{
-		.pr_value = LNET_PTL_ROTOR_ON,
-		.pr_name  = "ON",
-		.pr_desc  = "round-robin dispatch all PUT messages for wildcard portals"
-	},
-	{
-		.pr_value = LNET_PTL_ROTOR_RR_RT,
-		.pr_name  = "RR_RT",
-		.pr_desc  = "round-robin dispatch routed PUT message for wildcard portals"
-	},
-	{
-		.pr_value = LNET_PTL_ROTOR_HASH_RT,
-		.pr_name  = "HASH_RT",
-		.pr_desc  = "dispatch routed PUT message by hashing source NID for wildcard portals"
-	},
-	{
-		.pr_value = -1,
-		.pr_name  = NULL,
-		.pr_desc  = NULL
-	},
-};
-
-static int __proc_lnet_portal_rotor(void *data, int write,
-				    loff_t pos, void __user *buffer, int nob)
-{
-	const int buf_len = 128;
-	char *buf;
-	char *tmp;
-	int rc;
-	int i;
-
-	buf = kmalloc(buf_len, GFP_KERNEL);
-	if (!buf)
-		return -ENOMEM;
-
-	if (!write) {
-		lnet_res_lock(0);
-
-		for (i = 0; portal_rotors[i].pr_value >= 0; i++) {
-			if (portal_rotors[i].pr_value == portal_rotor)
-				break;
-		}
-
-		LASSERT(portal_rotors[i].pr_value == portal_rotor);
-		lnet_res_unlock(0);
-
-		rc = snprintf(buf, buf_len,
-			      "{\n\tportals: all\n"
-			      "\trotor: %s\n\tdescription: %s\n}",
-			      portal_rotors[i].pr_name,
-			      portal_rotors[i].pr_desc);
-
-		if (pos >= min_t(int, rc, buf_len)) {
-			rc = 0;
-		} else {
-			rc = cfs_trace_copyout_string(buffer, nob,
-						      buf + pos, "\n");
-		}
-		goto out;
-	}
-
-	rc = cfs_trace_copyin_string(buf, buf_len, buffer, nob);
-	if (rc < 0)
-		goto out;
-
-	tmp = strim(buf);
-
-	rc = -EINVAL;
-	lnet_res_lock(0);
-	for (i = 0; portal_rotors[i].pr_name; i++) {
-		if (!strncasecmp(portal_rotors[i].pr_name, tmp,
-				 strlen(portal_rotors[i].pr_name))) {
-			portal_rotor = portal_rotors[i].pr_value;
-			rc = 0;
-			break;
-		}
-	}
-	lnet_res_unlock(0);
-out:
-	kfree(buf);
-	return rc;
-}
-
-static int proc_lnet_portal_rotor(struct ctl_table *table, int write,
-				  void __user *buffer, size_t *lenp,
-				  loff_t *ppos)
-{
-	return lprocfs_call_handler(table->data, write, ppos, buffer, lenp,
-				    __proc_lnet_portal_rotor);
-}
-
-static struct ctl_table lnet_table[] = {
-	/*
-	 * NB No .strategy entries have been provided since sysctl(8) prefers
-	 * to go via /proc for portability.
-	 */
-	{
-		.procname     = "stats",
-		.mode         = 0644,
-		.proc_handler = &proc_lnet_stats,
-	},
-	{
-		.procname     = "routes",
-		.mode         = 0444,
-		.proc_handler = &proc_lnet_routes,
-	},
-	{
-		.procname     = "routers",
-		.mode         = 0444,
-		.proc_handler = &proc_lnet_routers,
-	},
-	{
-		.procname     = "peers",
-		.mode         = 0444,
-		.proc_handler = &proc_lnet_peers,
-	},
-	{
-		.procname     = "buffers",
-		.mode         = 0444,
-		.proc_handler = &proc_lnet_buffers,
-	},
-	{
-		.procname     = "nis",
-		.mode         = 0444,
-		.proc_handler = &proc_lnet_nis,
-	},
-	{
-		.procname     = "portal_rotor",
-		.mode         = 0644,
-		.proc_handler = &proc_lnet_portal_rotor,
-	},
-	{
-	}
-};
-
-void lnet_router_debugfs_init(void)
-{
-	lustre_insert_debugfs(lnet_table);
-}
-
-void lnet_router_debugfs_fini(void)
-{
-}

+ 0 - 7
drivers/staging/lustre/lnet/selftest/Makefile

@@ -1,7 +0,0 @@
-subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/include
-subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/lustre/include
-
-obj-$(CONFIG_LNET_SELFTEST) := lnet_selftest.o
-
-lnet_selftest-y := console.o conrpc.o conctl.o framework.o timer.o rpc.o \
-		   module.o ping_test.o brw_test.o

+ 0 - 526
drivers/staging/lustre/lnet/selftest/brw_test.c

@@ -1,526 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/selftest/brw_test.c
- *
- * Author: Isaac Huang <isaac@clusterfs.com>
- */
-
-#include "selftest.h"
-
-static int brw_srv_workitems = SFW_TEST_WI_MAX;
-module_param(brw_srv_workitems, int, 0644);
-MODULE_PARM_DESC(brw_srv_workitems, "# BRW server workitems");
-
-static int brw_inject_errors;
-module_param(brw_inject_errors, int, 0644);
-MODULE_PARM_DESC(brw_inject_errors, "# data errors to inject randomly, zero by default");
-
-#define BRW_POISON	0xbeefbeefbeefbeefULL
-#define BRW_MAGIC	0xeeb0eeb1eeb2eeb3ULL
-#define BRW_MSIZE	sizeof(u64)
-
-static void
-brw_client_fini(struct sfw_test_instance *tsi)
-{
-	struct srpc_bulk *bulk;
-	struct sfw_test_unit	*tsu;
-
-	LASSERT(tsi->tsi_is_client);
-
-	list_for_each_entry(tsu, &tsi->tsi_units, tsu_list) {
-		bulk = tsu->tsu_private;
-		if (!bulk)
-			continue;
-
-		srpc_free_bulk(bulk);
-		tsu->tsu_private = NULL;
-	}
-}
-
-static int
-brw_client_init(struct sfw_test_instance *tsi)
-{
-	struct sfw_session *sn = tsi->tsi_batch->bat_session;
-	int flags;
-	int off;
-	int npg;
-	int len;
-	int opc;
-	struct srpc_bulk *bulk;
-	struct sfw_test_unit *tsu;
-
-	LASSERT(sn);
-	LASSERT(tsi->tsi_is_client);
-
-	if (!(sn->sn_features & LST_FEAT_BULK_LEN)) {
-		struct test_bulk_req *breq = &tsi->tsi_u.bulk_v0;
-
-		opc = breq->blk_opc;
-		flags = breq->blk_flags;
-		npg = breq->blk_npg;
-		/*
-		 * NB: this is not going to work for variable page size,
-		 * but we have to keep it for compatibility
-		 */
-		len = npg * PAGE_SIZE;
-		off = 0;
-	} else {
-		struct test_bulk_req_v1 *breq = &tsi->tsi_u.bulk_v1;
-
-		/*
-		 * I should never get this step if it's unknown feature
-		 * because make_session will reject unknown feature
-		 */
-		LASSERT(!(sn->sn_features & ~LST_FEATS_MASK));
-
-		opc = breq->blk_opc;
-		flags = breq->blk_flags;
-		len = breq->blk_len;
-		off = breq->blk_offset & ~PAGE_MASK;
-		npg = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
-	}
-
-	if (off % BRW_MSIZE)
-		return -EINVAL;
-
-	if (npg > LNET_MAX_IOV || npg <= 0)
-		return -EINVAL;
-
-	if (opc != LST_BRW_READ && opc != LST_BRW_WRITE)
-		return -EINVAL;
-
-	if (flags != LST_BRW_CHECK_NONE &&
-	    flags != LST_BRW_CHECK_FULL && flags != LST_BRW_CHECK_SIMPLE)
-		return -EINVAL;
-
-	list_for_each_entry(tsu, &tsi->tsi_units, tsu_list) {
-		bulk = srpc_alloc_bulk(lnet_cpt_of_nid(tsu->tsu_dest.nid),
-				       off, npg, len, opc == LST_BRW_READ);
-		if (!bulk) {
-			brw_client_fini(tsi);
-			return -ENOMEM;
-		}
-
-		tsu->tsu_private = bulk;
-	}
-
-	return 0;
-}
-
-static int brw_inject_one_error(void)
-{
-	struct timespec64 ts;
-
-	if (brw_inject_errors <= 0)
-		return 0;
-
-	ktime_get_ts64(&ts);
-
-	if (!((ts.tv_nsec / NSEC_PER_USEC) & 1))
-		return 0;
-
-	return brw_inject_errors--;
-}
-
-static void
-brw_fill_page(struct page *pg, int off, int len, int pattern, __u64 magic)
-{
-	char *addr = page_address(pg) + off;
-	int i;
-
-	LASSERT(addr);
-	LASSERT(!(off % BRW_MSIZE) && !(len % BRW_MSIZE));
-
-	if (pattern == LST_BRW_CHECK_NONE)
-		return;
-
-	if (magic == BRW_MAGIC)
-		magic += brw_inject_one_error();
-
-	if (pattern == LST_BRW_CHECK_SIMPLE) {
-		memcpy(addr, &magic, BRW_MSIZE);
-		if (len > BRW_MSIZE) {
-			addr += PAGE_SIZE - BRW_MSIZE;
-			memcpy(addr, &magic, BRW_MSIZE);
-		}
-		return;
-	}
-
-	if (pattern == LST_BRW_CHECK_FULL) {
-		for (i = 0; i < len; i += BRW_MSIZE)
-			memcpy(addr + i, &magic, BRW_MSIZE);
-		return;
-	}
-
-	LBUG();
-}
-
-static int
-brw_check_page(struct page *pg, int off, int len, int pattern, __u64 magic)
-{
-	char *addr = page_address(pg) + off;
-	__u64 data = 0; /* make compiler happy */
-	int i;
-
-	LASSERT(addr);
-	LASSERT(!(off % BRW_MSIZE) && !(len % BRW_MSIZE));
-
-	if (pattern == LST_BRW_CHECK_NONE)
-		return 0;
-
-	if (pattern == LST_BRW_CHECK_SIMPLE) {
-		data = *((__u64 *)addr);
-		if (data != magic)
-			goto bad_data;
-
-		if (len > BRW_MSIZE) {
-			addr += PAGE_SIZE - BRW_MSIZE;
-			data = *((__u64 *)addr);
-			if (data != magic)
-				goto bad_data;
-		}
-		return 0;
-	}
-
-	if (pattern == LST_BRW_CHECK_FULL) {
-		for (i = 0; i < len; i += BRW_MSIZE) {
-			data = *(u64 *)(addr + i);
-			if (data != magic)
-				goto bad_data;
-		}
-		return 0;
-	}
-
-	LBUG();
-
-bad_data:
-	CERROR("Bad data in page %p: %#llx, %#llx expected\n",
-	       pg, data, magic);
-	return 1;
-}
-
-static void
-brw_fill_bulk(struct srpc_bulk *bk, int pattern, __u64 magic)
-{
-	int i;
-	struct page *pg;
-
-	for (i = 0; i < bk->bk_niov; i++) {
-		int off, len;
-
-		pg = bk->bk_iovs[i].bv_page;
-		off = bk->bk_iovs[i].bv_offset;
-		len = bk->bk_iovs[i].bv_len;
-		brw_fill_page(pg, off, len, pattern, magic);
-	}
-}
-
-static int
-brw_check_bulk(struct srpc_bulk *bk, int pattern, __u64 magic)
-{
-	int i;
-	struct page *pg;
-
-	for (i = 0; i < bk->bk_niov; i++) {
-		int off, len;
-
-		pg = bk->bk_iovs[i].bv_page;
-		off = bk->bk_iovs[i].bv_offset;
-		len = bk->bk_iovs[i].bv_len;
-		if (brw_check_page(pg, off, len, pattern, magic)) {
-			CERROR("Bulk page %p (%d/%d) is corrupted!\n",
-			       pg, i, bk->bk_niov);
-			return 1;
-		}
-	}
-
-	return 0;
-}
-
-static int
-brw_client_prep_rpc(struct sfw_test_unit *tsu, struct lnet_process_id dest,
-		    struct srpc_client_rpc **rpcpp)
-{
-	struct srpc_bulk *bulk = tsu->tsu_private;
-	struct sfw_test_instance *tsi = tsu->tsu_instance;
-	struct sfw_session *sn = tsi->tsi_batch->bat_session;
-	struct srpc_client_rpc *rpc;
-	struct srpc_brw_reqst *req;
-	int flags;
-	int npg;
-	int len;
-	int opc;
-	int rc;
-
-	LASSERT(sn);
-	LASSERT(bulk);
-
-	if (!(sn->sn_features & LST_FEAT_BULK_LEN)) {
-		struct test_bulk_req *breq = &tsi->tsi_u.bulk_v0;
-
-		opc = breq->blk_opc;
-		flags = breq->blk_flags;
-		npg = breq->blk_npg;
-		len = npg * PAGE_SIZE;
-	} else {
-		struct test_bulk_req_v1 *breq = &tsi->tsi_u.bulk_v1;
-		int off;
-
-		/*
-		 * I should never get this step if it's unknown feature
-		 * because make_session will reject unknown feature
-		 */
-		LASSERT(!(sn->sn_features & ~LST_FEATS_MASK));
-
-		opc = breq->blk_opc;
-		flags = breq->blk_flags;
-		len = breq->blk_len;
-		off = breq->blk_offset;
-		npg = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
-	}
-
-	rc = sfw_create_test_rpc(tsu, dest, sn->sn_features, npg, len, &rpc);
-	if (rc)
-		return rc;
-
-	memcpy(&rpc->crpc_bulk, bulk, offsetof(struct srpc_bulk, bk_iovs[npg]));
-	if (opc == LST_BRW_WRITE)
-		brw_fill_bulk(&rpc->crpc_bulk, flags, BRW_MAGIC);
-	else
-		brw_fill_bulk(&rpc->crpc_bulk, flags, BRW_POISON);
-
-	req = &rpc->crpc_reqstmsg.msg_body.brw_reqst;
-	req->brw_flags = flags;
-	req->brw_rw = opc;
-	req->brw_len = len;
-
-	*rpcpp = rpc;
-	return 0;
-}
-
-static void
-brw_client_done_rpc(struct sfw_test_unit *tsu, struct srpc_client_rpc *rpc)
-{
-	__u64 magic = BRW_MAGIC;
-	struct sfw_test_instance *tsi = tsu->tsu_instance;
-	struct sfw_session *sn = tsi->tsi_batch->bat_session;
-	struct srpc_msg *msg = &rpc->crpc_replymsg;
-	struct srpc_brw_reply *reply = &msg->msg_body.brw_reply;
-	struct srpc_brw_reqst *reqst = &rpc->crpc_reqstmsg.msg_body.brw_reqst;
-
-	LASSERT(sn);
-
-	if (rpc->crpc_status) {
-		CERROR("BRW RPC to %s failed with %d\n",
-		       libcfs_id2str(rpc->crpc_dest), rpc->crpc_status);
-		if (!tsi->tsi_stopping)	/* rpc could have been aborted */
-			atomic_inc(&sn->sn_brw_errors);
-		return;
-	}
-
-	if (msg->msg_magic != SRPC_MSG_MAGIC) {
-		__swab64s(&magic);
-		__swab32s(&reply->brw_status);
-	}
-
-	CDEBUG(reply->brw_status ? D_WARNING : D_NET,
-	       "BRW RPC to %s finished with brw_status: %d\n",
-	       libcfs_id2str(rpc->crpc_dest), reply->brw_status);
-
-	if (reply->brw_status) {
-		atomic_inc(&sn->sn_brw_errors);
-		rpc->crpc_status = -(int)reply->brw_status;
-		return;
-	}
-
-	if (reqst->brw_rw == LST_BRW_WRITE)
-		return;
-
-	if (brw_check_bulk(&rpc->crpc_bulk, reqst->brw_flags, magic)) {
-		CERROR("Bulk data from %s is corrupted!\n",
-		       libcfs_id2str(rpc->crpc_dest));
-		atomic_inc(&sn->sn_brw_errors);
-		rpc->crpc_status = -EBADMSG;
-	}
-}
-
-static void
-brw_server_rpc_done(struct srpc_server_rpc *rpc)
-{
-	struct srpc_bulk *blk = rpc->srpc_bulk;
-
-	if (!blk)
-		return;
-
-	if (rpc->srpc_status)
-		CERROR("Bulk transfer %s %s has failed: %d\n",
-		       blk->bk_sink ? "from" : "to",
-		       libcfs_id2str(rpc->srpc_peer), rpc->srpc_status);
-	else
-		CDEBUG(D_NET, "Transferred %d pages bulk data %s %s\n",
-		       blk->bk_niov, blk->bk_sink ? "from" : "to",
-		       libcfs_id2str(rpc->srpc_peer));
-
-	sfw_free_pages(rpc);
-}
-
-static int
-brw_bulk_ready(struct srpc_server_rpc *rpc, int status)
-{
-	__u64 magic = BRW_MAGIC;
-	struct srpc_brw_reply *reply = &rpc->srpc_replymsg.msg_body.brw_reply;
-	struct srpc_brw_reqst *reqst;
-	struct srpc_msg *reqstmsg;
-
-	LASSERT(rpc->srpc_bulk);
-	LASSERT(rpc->srpc_reqstbuf);
-
-	reqstmsg = &rpc->srpc_reqstbuf->buf_msg;
-	reqst = &reqstmsg->msg_body.brw_reqst;
-
-	if (status) {
-		CERROR("BRW bulk %s failed for RPC from %s: %d\n",
-		       reqst->brw_rw == LST_BRW_READ ? "READ" : "WRITE",
-		       libcfs_id2str(rpc->srpc_peer), status);
-		return -EIO;
-	}
-
-	if (reqst->brw_rw == LST_BRW_READ)
-		return 0;
-
-	if (reqstmsg->msg_magic != SRPC_MSG_MAGIC)
-		__swab64s(&magic);
-
-	if (brw_check_bulk(rpc->srpc_bulk, reqst->brw_flags, magic)) {
-		CERROR("Bulk data from %s is corrupted!\n",
-		       libcfs_id2str(rpc->srpc_peer));
-		reply->brw_status = EBADMSG;
-	}
-
-	return 0;
-}
-
-static int
-brw_server_handle(struct srpc_server_rpc *rpc)
-{
-	struct srpc_service *sv = rpc->srpc_scd->scd_svc;
-	struct srpc_msg *replymsg = &rpc->srpc_replymsg;
-	struct srpc_msg *reqstmsg = &rpc->srpc_reqstbuf->buf_msg;
-	struct srpc_brw_reply *reply = &replymsg->msg_body.brw_reply;
-	struct srpc_brw_reqst *reqst = &reqstmsg->msg_body.brw_reqst;
-	int npg;
-	int rc;
-
-	LASSERT(sv->sv_id == SRPC_SERVICE_BRW);
-
-	if (reqstmsg->msg_magic != SRPC_MSG_MAGIC) {
-		LASSERT(reqstmsg->msg_magic == __swab32(SRPC_MSG_MAGIC));
-
-		__swab32s(&reqst->brw_rw);
-		__swab32s(&reqst->brw_len);
-		__swab32s(&reqst->brw_flags);
-		__swab64s(&reqst->brw_rpyid);
-		__swab64s(&reqst->brw_bulkid);
-	}
-	LASSERT(reqstmsg->msg_type == (__u32)srpc_service2request(sv->sv_id));
-
-	reply->brw_status = 0;
-	rpc->srpc_done = brw_server_rpc_done;
-
-	if ((reqst->brw_rw != LST_BRW_READ && reqst->brw_rw != LST_BRW_WRITE) ||
-	    (reqst->brw_flags != LST_BRW_CHECK_NONE &&
-	     reqst->brw_flags != LST_BRW_CHECK_FULL &&
-	     reqst->brw_flags != LST_BRW_CHECK_SIMPLE)) {
-		reply->brw_status = EINVAL;
-		return 0;
-	}
-
-	if (reqstmsg->msg_ses_feats & ~LST_FEATS_MASK) {
-		replymsg->msg_ses_feats = LST_FEATS_MASK;
-		reply->brw_status = EPROTO;
-		return 0;
-	}
-
-	if (!(reqstmsg->msg_ses_feats & LST_FEAT_BULK_LEN)) {
-		/* compat with old version */
-		if (reqst->brw_len & ~PAGE_MASK) {
-			reply->brw_status = EINVAL;
-			return 0;
-		}
-		npg = reqst->brw_len >> PAGE_SHIFT;
-
-	} else {
-		npg = (reqst->brw_len + PAGE_SIZE - 1) >> PAGE_SHIFT;
-	}
-
-	replymsg->msg_ses_feats = reqstmsg->msg_ses_feats;
-
-	if (!reqst->brw_len || npg > LNET_MAX_IOV) {
-		reply->brw_status = EINVAL;
-		return 0;
-	}
-
-	rc = sfw_alloc_pages(rpc, rpc->srpc_scd->scd_cpt, npg,
-			     reqst->brw_len,
-			     reqst->brw_rw == LST_BRW_WRITE);
-	if (rc)
-		return rc;
-
-	if (reqst->brw_rw == LST_BRW_READ)
-		brw_fill_bulk(rpc->srpc_bulk, reqst->brw_flags, BRW_MAGIC);
-	else
-		brw_fill_bulk(rpc->srpc_bulk, reqst->brw_flags, BRW_POISON);
-
-	return 0;
-}
-
-struct sfw_test_client_ops brw_test_client;
-
-void brw_init_test_client(void)
-{
-	brw_test_client.tso_init = brw_client_init;
-	brw_test_client.tso_fini = brw_client_fini;
-	brw_test_client.tso_prep_rpc = brw_client_prep_rpc;
-	brw_test_client.tso_done_rpc = brw_client_done_rpc;
-};
-
-struct srpc_service brw_test_service;
-
-void brw_init_test_service(void)
-{
-	brw_test_service.sv_id = SRPC_SERVICE_BRW;
-	brw_test_service.sv_name = "brw_test";
-	brw_test_service.sv_handler = brw_server_handle;
-	brw_test_service.sv_bulk_ready = brw_bulk_ready;
-	brw_test_service.sv_wi_total = brw_srv_workitems;
-}

+ 0 - 801
drivers/staging/lustre/lnet/selftest/conctl.c

@@ -1,801 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/selftest/conctl.c
- *
- * IOC handle in kernel
- *
- * Author: Liang Zhen <liangzhen@clusterfs.com>
- */
-
-#include <linux/lnet/lib-lnet.h>
-#include <uapi/linux/lnet/lnetst.h>
-#include "console.h"
-
-static int
-lst_session_new_ioctl(struct lstio_session_new_args *args)
-{
-	char name[LST_NAME_SIZE + 1];
-	int rc;
-
-	if (!args->lstio_ses_idp ||	/* address for output sid */
-	    !args->lstio_ses_key ||	/* no key is specified */
-	    !args->lstio_ses_namep ||	/* session name */
-	    args->lstio_ses_nmlen <= 0 ||
-	    args->lstio_ses_nmlen > LST_NAME_SIZE)
-		return -EINVAL;
-
-	if (copy_from_user(name, args->lstio_ses_namep,
-			   args->lstio_ses_nmlen)) {
-		return -EFAULT;
-	}
-
-	name[args->lstio_ses_nmlen] = 0;
-
-	rc = lstcon_session_new(name,
-				args->lstio_ses_key,
-				args->lstio_ses_feats,
-				args->lstio_ses_timeout,
-				args->lstio_ses_force,
-				args->lstio_ses_idp);
-
-	return rc;
-}
-
-static int
-lst_session_end_ioctl(struct lstio_session_end_args *args)
-{
-	if (args->lstio_ses_key != console_session.ses_key)
-		return -EACCES;
-
-	return lstcon_session_end();
-}
-
-static int
-lst_session_info_ioctl(struct lstio_session_info_args *args)
-{
-	/* no checking of key */
-
-	if (!args->lstio_ses_idp ||	/* address for output sid */
-	    !args->lstio_ses_keyp ||	/* address for output key */
-	    !args->lstio_ses_featp ||	/* address for output features */
-	    !args->lstio_ses_ndinfo ||	/* address for output ndinfo */
-	    !args->lstio_ses_namep ||	/* address for output name */
-	    args->lstio_ses_nmlen <= 0 ||
-	    args->lstio_ses_nmlen > LST_NAME_SIZE)
-		return -EINVAL;
-
-	return lstcon_session_info(args->lstio_ses_idp,
-				   args->lstio_ses_keyp,
-				   args->lstio_ses_featp,
-				   args->lstio_ses_ndinfo,
-				   args->lstio_ses_namep,
-				   args->lstio_ses_nmlen);
-}
-
-static int
-lst_debug_ioctl(struct lstio_debug_args *args)
-{
-	char name[LST_NAME_SIZE + 1];
-	int client = 1;
-	int rc;
-
-	if (args->lstio_dbg_key != console_session.ses_key)
-		return -EACCES;
-
-	if (!args->lstio_dbg_resultp)
-		return -EINVAL;
-
-	if (args->lstio_dbg_namep &&	/* name of batch/group */
-	    (args->lstio_dbg_nmlen <= 0 ||
-	     args->lstio_dbg_nmlen > LST_NAME_SIZE))
-		return -EINVAL;
-
-	if (args->lstio_dbg_namep) {
-
-		if (copy_from_user(name, args->lstio_dbg_namep,
-				   args->lstio_dbg_nmlen))
-			return -EFAULT;
-
-		name[args->lstio_dbg_nmlen] = 0;
-	}
-
-	rc = -EINVAL;
-
-	switch (args->lstio_dbg_type) {
-	case LST_OPC_SESSION:
-		rc = lstcon_session_debug(args->lstio_dbg_timeout,
-					  args->lstio_dbg_resultp);
-		break;
-
-	case LST_OPC_BATCHSRV:
-		client = 0;
-		/* fall through */
-	case LST_OPC_BATCHCLI:
-		if (!args->lstio_dbg_namep)
-			goto out;
-
-		rc = lstcon_batch_debug(args->lstio_dbg_timeout,
-					name, client, args->lstio_dbg_resultp);
-		break;
-
-	case LST_OPC_GROUP:
-		if (!args->lstio_dbg_namep)
-			goto out;
-
-		rc = lstcon_group_debug(args->lstio_dbg_timeout,
-					name, args->lstio_dbg_resultp);
-		break;
-
-	case LST_OPC_NODES:
-		if (args->lstio_dbg_count <= 0 ||
-		    !args->lstio_dbg_idsp)
-			goto out;
-
-		rc = lstcon_nodes_debug(args->lstio_dbg_timeout,
-					args->lstio_dbg_count,
-					args->lstio_dbg_idsp,
-					args->lstio_dbg_resultp);
-		break;
-
-	default:
-		break;
-	}
-
-out:
-	return rc;
-}
-
-static int
-lst_group_add_ioctl(struct lstio_group_add_args *args)
-{
-	char name[LST_NAME_SIZE + 1];
-	int rc;
-
-	if (args->lstio_grp_key != console_session.ses_key)
-		return -EACCES;
-
-	if (!args->lstio_grp_namep ||
-	    args->lstio_grp_nmlen <= 0 ||
-	    args->lstio_grp_nmlen > LST_NAME_SIZE)
-		return -EINVAL;
-
-	if (copy_from_user(name, args->lstio_grp_namep,
-			   args->lstio_grp_nmlen))
-		return -EFAULT;
-
-	name[args->lstio_grp_nmlen] = 0;
-
-	rc = lstcon_group_add(name);
-
-	return rc;
-}
-
-static int
-lst_group_del_ioctl(struct lstio_group_del_args *args)
-{
-	int rc;
-	char name[LST_NAME_SIZE + 1];
-
-	if (args->lstio_grp_key != console_session.ses_key)
-		return -EACCES;
-
-	if (!args->lstio_grp_namep ||
-	    args->lstio_grp_nmlen <= 0 ||
-	    args->lstio_grp_nmlen > LST_NAME_SIZE)
-		return -EINVAL;
-
-	if (copy_from_user(name, args->lstio_grp_namep,
-			   args->lstio_grp_nmlen))
-		return -EFAULT;
-
-	name[args->lstio_grp_nmlen] = 0;
-
-	rc = lstcon_group_del(name);
-
-	return rc;
-}
-
-static int
-lst_group_update_ioctl(struct lstio_group_update_args *args)
-{
-	int rc;
-	char name[LST_NAME_SIZE + 1];
-
-	if (args->lstio_grp_key != console_session.ses_key)
-		return -EACCES;
-
-	if (!args->lstio_grp_resultp ||
-	    !args->lstio_grp_namep ||
-	    args->lstio_grp_nmlen <= 0 ||
-	    args->lstio_grp_nmlen > LST_NAME_SIZE)
-		return -EINVAL;
-
-	if (copy_from_user(name, args->lstio_grp_namep,
-			   args->lstio_grp_nmlen))
-		return -EFAULT;
-
-	name[args->lstio_grp_nmlen] = 0;
-
-	switch (args->lstio_grp_opc) {
-	case LST_GROUP_CLEAN:
-		rc = lstcon_group_clean(name, args->lstio_grp_args);
-		break;
-
-	case LST_GROUP_REFRESH:
-		rc = lstcon_group_refresh(name, args->lstio_grp_resultp);
-		break;
-
-	case LST_GROUP_RMND:
-		if (args->lstio_grp_count <= 0 ||
-		    !args->lstio_grp_idsp) {
-			rc = -EINVAL;
-			break;
-		}
-		rc = lstcon_nodes_remove(name, args->lstio_grp_count,
-					 args->lstio_grp_idsp,
-					 args->lstio_grp_resultp);
-		break;
-
-	default:
-		rc = -EINVAL;
-		break;
-	}
-
-	return rc;
-}
-
-static int
-lst_nodes_add_ioctl(struct lstio_group_nodes_args *args)
-{
-	unsigned int feats;
-	int rc;
-	char name[LST_NAME_SIZE + 1];
-
-	if (args->lstio_grp_key != console_session.ses_key)
-		return -EACCES;
-
-	if (!args->lstio_grp_idsp ||	/* array of ids */
-	    args->lstio_grp_count <= 0 ||
-	    !args->lstio_grp_resultp ||
-	    !args->lstio_grp_featp ||
-	    !args->lstio_grp_namep ||
-	    args->lstio_grp_nmlen <= 0 ||
-	    args->lstio_grp_nmlen > LST_NAME_SIZE)
-		return -EINVAL;
-
-	if (copy_from_user(name, args->lstio_grp_namep,
-			   args->lstio_grp_nmlen))
-		return -EFAULT;
-
-	name[args->lstio_grp_nmlen] = 0;
-
-	rc = lstcon_nodes_add(name, args->lstio_grp_count,
-			      args->lstio_grp_idsp, &feats,
-			      args->lstio_grp_resultp);
-
-	if (!rc &&
-	    copy_to_user(args->lstio_grp_featp, &feats, sizeof(feats))) {
-		return -EINVAL;
-	}
-
-	return rc;
-}
-
-static int
-lst_group_list_ioctl(struct lstio_group_list_args *args)
-{
-	if (args->lstio_grp_key != console_session.ses_key)
-		return -EACCES;
-
-	if (args->lstio_grp_idx < 0 ||
-	    !args->lstio_grp_namep ||
-	    args->lstio_grp_nmlen <= 0 ||
-	    args->lstio_grp_nmlen > LST_NAME_SIZE)
-		return -EINVAL;
-
-	return lstcon_group_list(args->lstio_grp_idx,
-				 args->lstio_grp_nmlen,
-				 args->lstio_grp_namep);
-}
-
-static int
-lst_group_info_ioctl(struct lstio_group_info_args *args)
-{
-	char name[LST_NAME_SIZE + 1];
-	int ndent;
-	int index;
-	int rc;
-
-	if (args->lstio_grp_key != console_session.ses_key)
-		return -EACCES;
-
-	if (!args->lstio_grp_namep ||
-	    args->lstio_grp_nmlen <= 0 ||
-	    args->lstio_grp_nmlen > LST_NAME_SIZE)
-		return -EINVAL;
-
-	if (!args->lstio_grp_entp &&	/* output: group entry */
-	    !args->lstio_grp_dentsp)	/* output: node entry */
-		return -EINVAL;
-
-	if (args->lstio_grp_dentsp) {		/* have node entry */
-		if (!args->lstio_grp_idxp ||	/* node index */
-		    !args->lstio_grp_ndentp)	/* # of node entry */
-			return -EINVAL;
-
-		if (copy_from_user(&ndent, args->lstio_grp_ndentp,
-				   sizeof(ndent)) ||
-		    copy_from_user(&index, args->lstio_grp_idxp,
-				   sizeof(index)))
-			return -EFAULT;
-
-		if (ndent <= 0 || index < 0)
-			return -EINVAL;
-	}
-
-	if (copy_from_user(name, args->lstio_grp_namep,
-			   args->lstio_grp_nmlen))
-		return -EFAULT;
-
-	name[args->lstio_grp_nmlen] = 0;
-
-	rc = lstcon_group_info(name, args->lstio_grp_entp,
-			       &index, &ndent, args->lstio_grp_dentsp);
-
-	if (rc)
-		return rc;
-
-	if (args->lstio_grp_dentsp &&
-	    (copy_to_user(args->lstio_grp_idxp, &index, sizeof(index)) ||
-	     copy_to_user(args->lstio_grp_ndentp, &ndent, sizeof(ndent))))
-		return -EFAULT;
-
-	return 0;
-}
-
-static int
-lst_batch_add_ioctl(struct lstio_batch_add_args *args)
-{
-	int rc;
-	char name[LST_NAME_SIZE + 1];
-
-	if (args->lstio_bat_key != console_session.ses_key)
-		return -EACCES;
-
-	if (!args->lstio_bat_namep ||
-	    args->lstio_bat_nmlen <= 0 ||
-	    args->lstio_bat_nmlen > LST_NAME_SIZE)
-		return -EINVAL;
-
-	if (copy_from_user(name, args->lstio_bat_namep,
-			   args->lstio_bat_nmlen))
-		return -EFAULT;
-
-	name[args->lstio_bat_nmlen] = 0;
-
-	rc = lstcon_batch_add(name);
-
-	return rc;
-}
-
-static int
-lst_batch_run_ioctl(struct lstio_batch_run_args *args)
-{
-	int rc;
-	char name[LST_NAME_SIZE + 1];
-
-	if (args->lstio_bat_key != console_session.ses_key)
-		return -EACCES;
-
-	if (!args->lstio_bat_namep ||
-	    args->lstio_bat_nmlen <= 0 ||
-	    args->lstio_bat_nmlen > LST_NAME_SIZE)
-		return -EINVAL;
-
-	if (copy_from_user(name, args->lstio_bat_namep,
-			   args->lstio_bat_nmlen))
-		return -EFAULT;
-
-	name[args->lstio_bat_nmlen] = 0;
-
-	rc = lstcon_batch_run(name, args->lstio_bat_timeout,
-			      args->lstio_bat_resultp);
-
-	return rc;
-}
-
-static int
-lst_batch_stop_ioctl(struct lstio_batch_stop_args *args)
-{
-	int rc;
-	char name[LST_NAME_SIZE + 1];
-
-	if (args->lstio_bat_key != console_session.ses_key)
-		return -EACCES;
-
-	if (!args->lstio_bat_resultp ||
-	    !args->lstio_bat_namep ||
-	    args->lstio_bat_nmlen <= 0 ||
-	    args->lstio_bat_nmlen > LST_NAME_SIZE)
-		return -EINVAL;
-
-	if (copy_from_user(name, args->lstio_bat_namep,
-			   args->lstio_bat_nmlen))
-		return -EFAULT;
-
-	name[args->lstio_bat_nmlen] = 0;
-
-	rc = lstcon_batch_stop(name, args->lstio_bat_force,
-			       args->lstio_bat_resultp);
-
-	return rc;
-}
-
-static int
-lst_batch_query_ioctl(struct lstio_batch_query_args *args)
-{
-	char name[LST_NAME_SIZE + 1];
-	int rc;
-
-	if (args->lstio_bat_key != console_session.ses_key)
-		return -EACCES;
-
-	if (!args->lstio_bat_resultp ||
-	    !args->lstio_bat_namep ||
-	    args->lstio_bat_nmlen <= 0 ||
-	    args->lstio_bat_nmlen > LST_NAME_SIZE)
-		return -EINVAL;
-
-	if (args->lstio_bat_testidx < 0)
-		return -EINVAL;
-
-	if (copy_from_user(name, args->lstio_bat_namep,
-			   args->lstio_bat_nmlen))
-		return -EFAULT;
-
-	name[args->lstio_bat_nmlen] = 0;
-
-	rc = lstcon_test_batch_query(name,
-				     args->lstio_bat_testidx,
-				     args->lstio_bat_client,
-				     args->lstio_bat_timeout,
-				     args->lstio_bat_resultp);
-
-	return rc;
-}
-
-static int
-lst_batch_list_ioctl(struct lstio_batch_list_args *args)
-{
-	if (args->lstio_bat_key != console_session.ses_key)
-		return -EACCES;
-
-	if (args->lstio_bat_idx < 0 ||
-	    !args->lstio_bat_namep ||
-	    args->lstio_bat_nmlen <= 0 ||
-	    args->lstio_bat_nmlen > LST_NAME_SIZE)
-		return -EINVAL;
-
-	return lstcon_batch_list(args->lstio_bat_idx,
-			      args->lstio_bat_nmlen,
-			      args->lstio_bat_namep);
-}
-
-static int
-lst_batch_info_ioctl(struct lstio_batch_info_args *args)
-{
-	char name[LST_NAME_SIZE + 1];
-	int rc;
-	int index;
-	int ndent;
-
-	if (args->lstio_bat_key != console_session.ses_key)
-		return -EACCES;
-
-	if (!args->lstio_bat_namep ||	/* batch name */
-	    args->lstio_bat_nmlen <= 0 ||
-	    args->lstio_bat_nmlen > LST_NAME_SIZE)
-		return -EINVAL;
-
-	if (!args->lstio_bat_entp &&	/* output: batch entry */
-	    !args->lstio_bat_dentsp)	/* output: node entry */
-		return -EINVAL;
-
-	if (args->lstio_bat_dentsp) {		/* have node entry */
-		if (!args->lstio_bat_idxp ||	/* node index */
-		    !args->lstio_bat_ndentp)	/* # of node entry */
-			return -EINVAL;
-
-		if (copy_from_user(&index, args->lstio_bat_idxp,
-				   sizeof(index)) ||
-		    copy_from_user(&ndent, args->lstio_bat_ndentp,
-				   sizeof(ndent)))
-			return -EFAULT;
-
-		if (ndent <= 0 || index < 0)
-			return -EINVAL;
-	}
-
-	if (copy_from_user(name, args->lstio_bat_namep,
-			   args->lstio_bat_nmlen))
-		return -EFAULT;
-
-	name[args->lstio_bat_nmlen] = 0;
-
-	rc = lstcon_batch_info(name, args->lstio_bat_entp,
-			       args->lstio_bat_server, args->lstio_bat_testidx,
-			       &index, &ndent, args->lstio_bat_dentsp);
-
-	if (rc)
-		return rc;
-
-	if (args->lstio_bat_dentsp &&
-	    (copy_to_user(args->lstio_bat_idxp, &index, sizeof(index)) ||
-	     copy_to_user(args->lstio_bat_ndentp, &ndent, sizeof(ndent))))
-		rc = -EFAULT;
-
-	return rc;
-}
-
-static int
-lst_stat_query_ioctl(struct lstio_stat_args *args)
-{
-	int rc;
-	char name[LST_NAME_SIZE + 1];
-
-	/* TODO: not finished */
-	if (args->lstio_sta_key != console_session.ses_key)
-		return -EACCES;
-
-	if (!args->lstio_sta_resultp)
-		return -EINVAL;
-
-	if (args->lstio_sta_idsp) {
-		if (args->lstio_sta_count <= 0)
-			return -EINVAL;
-
-		rc = lstcon_nodes_stat(args->lstio_sta_count,
-				       args->lstio_sta_idsp,
-				       args->lstio_sta_timeout,
-				       args->lstio_sta_resultp);
-	} else if (args->lstio_sta_namep) {
-		if (args->lstio_sta_nmlen <= 0 ||
-		    args->lstio_sta_nmlen > LST_NAME_SIZE)
-			return -EINVAL;
-
-		rc = copy_from_user(name, args->lstio_sta_namep,
-				    args->lstio_sta_nmlen);
-		if (!rc)
-			rc = lstcon_group_stat(name, args->lstio_sta_timeout,
-					       args->lstio_sta_resultp);
-		else
-			rc = -EFAULT;
-	} else {
-		rc = -EINVAL;
-	}
-
-	return rc;
-}
-
-static int lst_test_add_ioctl(struct lstio_test_args *args)
-{
-	char batch_name[LST_NAME_SIZE + 1];
-	char src_name[LST_NAME_SIZE + 1];
-	char dst_name[LST_NAME_SIZE + 1];
-	void *param = NULL;
-	int ret = 0;
-	int rc = -ENOMEM;
-
-	if (!args->lstio_tes_resultp ||
-	    !args->lstio_tes_retp ||
-	    !args->lstio_tes_bat_name ||	/* no specified batch */
-	    args->lstio_tes_bat_nmlen <= 0 ||
-	    args->lstio_tes_bat_nmlen > LST_NAME_SIZE ||
-	    !args->lstio_tes_sgrp_name ||	/* no source group */
-	    args->lstio_tes_sgrp_nmlen <= 0 ||
-	    args->lstio_tes_sgrp_nmlen > LST_NAME_SIZE ||
-	    !args->lstio_tes_dgrp_name ||	/* no target group */
-	    args->lstio_tes_dgrp_nmlen <= 0 ||
-	    args->lstio_tes_dgrp_nmlen > LST_NAME_SIZE)
-		return -EINVAL;
-
-	if (!args->lstio_tes_loop ||		/* negative is infinite */
-	    args->lstio_tes_concur <= 0 ||
-	    args->lstio_tes_dist <= 0 ||
-	    args->lstio_tes_span <= 0)
-		return -EINVAL;
-
-	/* have parameter, check if parameter length is valid */
-	if (args->lstio_tes_param &&
-	    (args->lstio_tes_param_len <= 0 ||
-	     args->lstio_tes_param_len >
-	     PAGE_SIZE - sizeof(struct lstcon_test)))
-		return -EINVAL;
-
-	/* Enforce zero parameter length if there's no parameter */
-	if (!args->lstio_tes_param && args->lstio_tes_param_len)
-		return -EINVAL;
-
-	if (args->lstio_tes_param) {
-		param = memdup_user(args->lstio_tes_param,
-				    args->lstio_tes_param_len);
-		if (IS_ERR(param))
-			return PTR_ERR(param);
-	}
-
-	rc = -EFAULT;
-	if (copy_from_user(batch_name, args->lstio_tes_bat_name,
-			   args->lstio_tes_bat_nmlen) ||
-	    copy_from_user(src_name, args->lstio_tes_sgrp_name,
-			   args->lstio_tes_sgrp_nmlen) ||
-	    copy_from_user(dst_name, args->lstio_tes_dgrp_name,
-			   args->lstio_tes_dgrp_nmlen))
-		goto out;
-
-	rc = lstcon_test_add(batch_name, args->lstio_tes_type,
-			     args->lstio_tes_loop, args->lstio_tes_concur,
-			     args->lstio_tes_dist, args->lstio_tes_span,
-			     src_name, dst_name, param,
-			     args->lstio_tes_param_len,
-			     &ret, args->lstio_tes_resultp);
-
-	if (!rc && ret)
-		rc = (copy_to_user(args->lstio_tes_retp, &ret,
-				   sizeof(ret))) ? -EFAULT : 0;
-out:
-	kfree(param);
-
-	return rc;
-}
-
-int
-lstcon_ioctl_entry(struct notifier_block *nb,
-		   unsigned long cmd, void *vdata)
-{
-	struct libcfs_ioctl_hdr *hdr = vdata;
-	char *buf = NULL;
-	struct libcfs_ioctl_data *data;
-	int opc;
-	int rc = -EINVAL;
-
-	if (cmd != IOC_LIBCFS_LNETST)
-		goto err;
-
-	data = container_of(hdr, struct libcfs_ioctl_data, ioc_hdr);
-
-	opc = data->ioc_u32[0];
-
-	if (data->ioc_plen1 > PAGE_SIZE)
-		goto err;
-
-	buf = kmalloc(data->ioc_plen1, GFP_KERNEL);
-	rc = -ENOMEM;
-	if (!buf)
-		goto err;
-
-	/* copy in parameter */
-	rc = -EFAULT;
-	if (copy_from_user(buf, data->ioc_pbuf1, data->ioc_plen1))
-		goto err;
-
-	mutex_lock(&console_session.ses_mutex);
-
-	console_session.ses_laststamp = ktime_get_real_seconds();
-
-	if (console_session.ses_shutdown) {
-		rc = -ESHUTDOWN;
-		goto out;
-	}
-
-	if (console_session.ses_expired)
-		lstcon_session_end();
-
-	if (opc != LSTIO_SESSION_NEW &&
-	    console_session.ses_state == LST_SESSION_NONE) {
-		CDEBUG(D_NET, "LST no active session\n");
-		rc = -ESRCH;
-		goto out;
-	}
-
-	memset(&console_session.ses_trans_stat, 0, sizeof(struct lstcon_trans_stat));
-
-	switch (opc) {
-	case LSTIO_SESSION_NEW:
-		rc = lst_session_new_ioctl((struct lstio_session_new_args *)buf);
-		break;
-	case LSTIO_SESSION_END:
-		rc = lst_session_end_ioctl((struct lstio_session_end_args *)buf);
-		break;
-	case LSTIO_SESSION_INFO:
-		rc = lst_session_info_ioctl((struct lstio_session_info_args *)buf);
-		break;
-	case LSTIO_DEBUG:
-		rc = lst_debug_ioctl((struct lstio_debug_args *)buf);
-		break;
-	case LSTIO_GROUP_ADD:
-		rc = lst_group_add_ioctl((struct lstio_group_add_args *)buf);
-		break;
-	case LSTIO_GROUP_DEL:
-		rc = lst_group_del_ioctl((struct lstio_group_del_args *)buf);
-		break;
-	case LSTIO_GROUP_UPDATE:
-		rc = lst_group_update_ioctl((struct lstio_group_update_args *)buf);
-		break;
-	case LSTIO_NODES_ADD:
-		rc = lst_nodes_add_ioctl((struct lstio_group_nodes_args *)buf);
-		break;
-	case LSTIO_GROUP_LIST:
-		rc = lst_group_list_ioctl((struct lstio_group_list_args *)buf);
-		break;
-	case LSTIO_GROUP_INFO:
-		rc = lst_group_info_ioctl((struct lstio_group_info_args *)buf);
-		break;
-	case LSTIO_BATCH_ADD:
-		rc = lst_batch_add_ioctl((struct lstio_batch_add_args *)buf);
-		break;
-	case LSTIO_BATCH_START:
-		rc = lst_batch_run_ioctl((struct lstio_batch_run_args *)buf);
-		break;
-	case LSTIO_BATCH_STOP:
-		rc = lst_batch_stop_ioctl((struct lstio_batch_stop_args *)buf);
-		break;
-	case LSTIO_BATCH_QUERY:
-		rc = lst_batch_query_ioctl((struct lstio_batch_query_args *)buf);
-		break;
-	case LSTIO_BATCH_LIST:
-		rc = lst_batch_list_ioctl((struct lstio_batch_list_args *)buf);
-		break;
-	case LSTIO_BATCH_INFO:
-		rc = lst_batch_info_ioctl((struct lstio_batch_info_args *)buf);
-		break;
-	case LSTIO_TEST_ADD:
-		rc = lst_test_add_ioctl((struct lstio_test_args *)buf);
-		break;
-	case LSTIO_STAT_QUERY:
-		rc = lst_stat_query_ioctl((struct lstio_stat_args *)buf);
-		break;
-	default:
-		rc = -EINVAL;
-		goto out;
-	}
-
-	if (copy_to_user(data->ioc_pbuf2, &console_session.ses_trans_stat,
-			 sizeof(struct lstcon_trans_stat)))
-		rc = -EFAULT;
-out:
-	mutex_unlock(&console_session.ses_mutex);
-err:
-	kfree(buf);
-
-	return notifier_from_ioctl_errno(rc);
-}

+ 0 - 1396
drivers/staging/lustre/lnet/selftest/conrpc.c

@@ -1,1396 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/selftest/conctl.c
- *
- * Console framework rpcs
- *
- * Author: Liang Zhen <liang@whamcloud.com>
- */
-
-#include <linux/lnet/lib-lnet.h>
-#include "timer.h"
-#include "conrpc.h"
-#include "console.h"
-
-void lstcon_rpc_stat_reply(struct lstcon_rpc_trans *, struct srpc_msg *,
-			   struct lstcon_node *, struct lstcon_trans_stat *);
-
-static void
-lstcon_rpc_done(struct srpc_client_rpc *rpc)
-{
-	struct lstcon_rpc *crpc = (struct lstcon_rpc *)rpc->crpc_priv;
-
-	LASSERT(crpc && rpc == crpc->crp_rpc);
-	LASSERT(crpc->crp_posted && !crpc->crp_finished);
-
-	spin_lock(&rpc->crpc_lock);
-
-	if (!crpc->crp_trans) {
-		/*
-		 * Orphan RPC is not in any transaction,
-		 * I'm just a poor body and nobody loves me
-		 */
-		spin_unlock(&rpc->crpc_lock);
-
-		/* release it */
-		lstcon_rpc_put(crpc);
-		return;
-	}
-
-	/* not an orphan RPC */
-	crpc->crp_finished = 1;
-
-	if (!crpc->crp_stamp) {
-		/* not aborted */
-		LASSERT(!crpc->crp_status);
-
-		crpc->crp_stamp = jiffies;
-		crpc->crp_status = rpc->crpc_status;
-	}
-
-	/* wakeup (transaction)thread if I'm the last RPC in the transaction */
-	if (atomic_dec_and_test(&crpc->crp_trans->tas_remaining))
-		wake_up(&crpc->crp_trans->tas_waitq);
-
-	spin_unlock(&rpc->crpc_lock);
-}
-
-static int
-lstcon_rpc_init(struct lstcon_node *nd, int service, unsigned int feats,
-		int bulk_npg, int bulk_len, int embedded,
-		struct lstcon_rpc *crpc)
-{
-	crpc->crp_rpc = sfw_create_rpc(nd->nd_id, service,
-				       feats, bulk_npg, bulk_len,
-				       lstcon_rpc_done, (void *)crpc);
-	if (!crpc->crp_rpc)
-		return -ENOMEM;
-
-	crpc->crp_trans = NULL;
-	crpc->crp_node = nd;
-	crpc->crp_posted = 0;
-	crpc->crp_finished = 0;
-	crpc->crp_unpacked = 0;
-	crpc->crp_status = 0;
-	crpc->crp_stamp = 0;
-	crpc->crp_embedded = embedded;
-	INIT_LIST_HEAD(&crpc->crp_link);
-
-	atomic_inc(&console_session.ses_rpc_counter);
-
-	return 0;
-}
-
-static int
-lstcon_rpc_prep(struct lstcon_node *nd, int service, unsigned int feats,
-		int bulk_npg, int bulk_len, struct lstcon_rpc **crpcpp)
-{
-	struct lstcon_rpc *crpc = NULL;
-	int rc;
-
-	spin_lock(&console_session.ses_rpc_lock);
-
-	crpc = list_first_entry_or_null(&console_session.ses_rpc_freelist,
-					struct lstcon_rpc, crp_link);
-	if (crpc)
-		list_del_init(&crpc->crp_link);
-
-	spin_unlock(&console_session.ses_rpc_lock);
-
-	if (!crpc) {
-		crpc = kzalloc(sizeof(*crpc), GFP_NOFS);
-		if (!crpc)
-			return -ENOMEM;
-	}
-
-	rc = lstcon_rpc_init(nd, service, feats, bulk_npg, bulk_len, 0, crpc);
-	if (!rc) {
-		*crpcpp = crpc;
-		return 0;
-	}
-
-	kfree(crpc);
-
-	return rc;
-}
-
-void
-lstcon_rpc_put(struct lstcon_rpc *crpc)
-{
-	struct srpc_bulk *bulk = &crpc->crp_rpc->crpc_bulk;
-	int i;
-
-	LASSERT(list_empty(&crpc->crp_link));
-
-	for (i = 0; i < bulk->bk_niov; i++) {
-		if (!bulk->bk_iovs[i].bv_page)
-			continue;
-
-		__free_page(bulk->bk_iovs[i].bv_page);
-	}
-
-	srpc_client_rpc_decref(crpc->crp_rpc);
-
-	if (crpc->crp_embedded) {
-		/* embedded RPC, don't recycle it */
-		memset(crpc, 0, sizeof(*crpc));
-		crpc->crp_embedded = 1;
-
-	} else {
-		spin_lock(&console_session.ses_rpc_lock);
-
-		list_add(&crpc->crp_link,
-			 &console_session.ses_rpc_freelist);
-
-		spin_unlock(&console_session.ses_rpc_lock);
-	}
-
-	/* RPC is not alive now */
-	atomic_dec(&console_session.ses_rpc_counter);
-}
-
-static void
-lstcon_rpc_post(struct lstcon_rpc *crpc)
-{
-	struct lstcon_rpc_trans *trans = crpc->crp_trans;
-
-	LASSERT(trans);
-
-	atomic_inc(&trans->tas_remaining);
-	crpc->crp_posted = 1;
-
-	sfw_post_rpc(crpc->crp_rpc);
-}
-
-static char *
-lstcon_rpc_trans_name(int transop)
-{
-	if (transop == LST_TRANS_SESNEW)
-		return "SESNEW";
-
-	if (transop == LST_TRANS_SESEND)
-		return "SESEND";
-
-	if (transop == LST_TRANS_SESQRY)
-		return "SESQRY";
-
-	if (transop == LST_TRANS_SESPING)
-		return "SESPING";
-
-	if (transop == LST_TRANS_TSBCLIADD)
-		return "TSBCLIADD";
-
-	if (transop == LST_TRANS_TSBSRVADD)
-		return "TSBSRVADD";
-
-	if (transop == LST_TRANS_TSBRUN)
-		return "TSBRUN";
-
-	if (transop == LST_TRANS_TSBSTOP)
-		return "TSBSTOP";
-
-	if (transop == LST_TRANS_TSBCLIQRY)
-		return "TSBCLIQRY";
-
-	if (transop == LST_TRANS_TSBSRVQRY)
-		return "TSBSRVQRY";
-
-	if (transop == LST_TRANS_STATQRY)
-		return "STATQRY";
-
-	return "Unknown";
-}
-
-int
-lstcon_rpc_trans_prep(struct list_head *translist, int transop,
-		      struct lstcon_rpc_trans **transpp)
-{
-	struct lstcon_rpc_trans *trans;
-
-	if (translist) {
-		list_for_each_entry(trans, translist, tas_link) {
-			/*
-			 * Can't enqueue two private transaction on
-			 * the same object
-			 */
-			if ((trans->tas_opc & transop) == LST_TRANS_PRIVATE)
-				return -EPERM;
-		}
-	}
-
-	/* create a trans group */
-	trans = kzalloc(sizeof(*trans), GFP_NOFS);
-	if (!trans)
-		return -ENOMEM;
-
-	trans->tas_opc = transop;
-
-	if (!translist)
-		INIT_LIST_HEAD(&trans->tas_olink);
-	else
-		list_add_tail(&trans->tas_olink, translist);
-
-	list_add_tail(&trans->tas_link, &console_session.ses_trans_list);
-
-	INIT_LIST_HEAD(&trans->tas_rpcs_list);
-	atomic_set(&trans->tas_remaining, 0);
-	init_waitqueue_head(&trans->tas_waitq);
-
-	spin_lock(&console_session.ses_rpc_lock);
-	trans->tas_features = console_session.ses_features;
-	spin_unlock(&console_session.ses_rpc_lock);
-
-	*transpp = trans;
-	return 0;
-}
-
-void
-lstcon_rpc_trans_addreq(struct lstcon_rpc_trans *trans, struct lstcon_rpc *crpc)
-{
-	list_add_tail(&crpc->crp_link, &trans->tas_rpcs_list);
-	crpc->crp_trans = trans;
-}
-
-void
-lstcon_rpc_trans_abort(struct lstcon_rpc_trans *trans, int error)
-{
-	struct srpc_client_rpc *rpc;
-	struct lstcon_rpc *crpc;
-	struct lstcon_node *nd;
-
-	list_for_each_entry(crpc, &trans->tas_rpcs_list, crp_link) {
-		rpc = crpc->crp_rpc;
-
-		spin_lock(&rpc->crpc_lock);
-
-		if (!crpc->crp_posted || /* not posted */
-		    crpc->crp_stamp) {	 /* rpc done or aborted already */
-			if (!crpc->crp_stamp) {
-				crpc->crp_stamp = jiffies;
-				crpc->crp_status = -EINTR;
-			}
-			spin_unlock(&rpc->crpc_lock);
-			continue;
-		}
-
-		crpc->crp_stamp = jiffies;
-		crpc->crp_status = error;
-
-		spin_unlock(&rpc->crpc_lock);
-
-		sfw_abort_rpc(rpc);
-
-		if (error != -ETIMEDOUT)
-			continue;
-
-		nd = crpc->crp_node;
-		if (time_after(nd->nd_stamp, crpc->crp_stamp))
-			continue;
-
-		nd->nd_stamp = crpc->crp_stamp;
-		nd->nd_state = LST_NODE_DOWN;
-	}
-}
-
-static int
-lstcon_rpc_trans_check(struct lstcon_rpc_trans *trans)
-{
-	if (console_session.ses_shutdown &&
-	    !list_empty(&trans->tas_olink)) /* Not an end session RPC */
-		return 1;
-
-	return !atomic_read(&trans->tas_remaining) ? 1 : 0;
-}
-
-int
-lstcon_rpc_trans_postwait(struct lstcon_rpc_trans *trans, int timeout)
-{
-	struct lstcon_rpc *crpc;
-	int rc;
-
-	if (list_empty(&trans->tas_rpcs_list))
-		return 0;
-
-	if (timeout < LST_TRANS_MIN_TIMEOUT)
-		timeout = LST_TRANS_MIN_TIMEOUT;
-
-	CDEBUG(D_NET, "Transaction %s started\n",
-	       lstcon_rpc_trans_name(trans->tas_opc));
-
-	/* post all requests */
-	list_for_each_entry(crpc, &trans->tas_rpcs_list, crp_link) {
-		LASSERT(!crpc->crp_posted);
-
-		lstcon_rpc_post(crpc);
-	}
-
-	mutex_unlock(&console_session.ses_mutex);
-
-	rc = wait_event_interruptible_timeout(trans->tas_waitq,
-					      lstcon_rpc_trans_check(trans),
-					      timeout * HZ);
-	rc = (rc > 0) ? 0 : ((rc < 0) ? -EINTR : -ETIMEDOUT);
-
-	mutex_lock(&console_session.ses_mutex);
-
-	if (console_session.ses_shutdown)
-		rc = -ESHUTDOWN;
-
-	if (rc || atomic_read(&trans->tas_remaining)) {
-		/* treat short timeout as canceled */
-		if (rc == -ETIMEDOUT && timeout < LST_TRANS_MIN_TIMEOUT * 2)
-			rc = -EINTR;
-
-		lstcon_rpc_trans_abort(trans, rc);
-	}
-
-	CDEBUG(D_NET, "Transaction %s stopped: %d\n",
-	       lstcon_rpc_trans_name(trans->tas_opc), rc);
-
-	lstcon_rpc_trans_stat(trans, lstcon_trans_stat());
-
-	return rc;
-}
-
-static int
-lstcon_rpc_get_reply(struct lstcon_rpc *crpc, struct srpc_msg **msgpp)
-{
-	struct lstcon_node *nd = crpc->crp_node;
-	struct srpc_client_rpc *rpc = crpc->crp_rpc;
-	struct srpc_generic_reply *rep;
-
-	LASSERT(nd && rpc);
-	LASSERT(crpc->crp_stamp);
-
-	if (crpc->crp_status) {
-		*msgpp = NULL;
-		return crpc->crp_status;
-	}
-
-	*msgpp = &rpc->crpc_replymsg;
-	if (!crpc->crp_unpacked) {
-		sfw_unpack_message(*msgpp);
-		crpc->crp_unpacked = 1;
-	}
-
-	if (time_after(nd->nd_stamp, crpc->crp_stamp))
-		return 0;
-
-	nd->nd_stamp = crpc->crp_stamp;
-	rep = &(*msgpp)->msg_body.reply;
-
-	if (rep->sid.ses_nid == LNET_NID_ANY)
-		nd->nd_state = LST_NODE_UNKNOWN;
-	else if (lstcon_session_match(rep->sid))
-		nd->nd_state = LST_NODE_ACTIVE;
-	else
-		nd->nd_state = LST_NODE_BUSY;
-
-	return 0;
-}
-
-void
-lstcon_rpc_trans_stat(struct lstcon_rpc_trans *trans, struct lstcon_trans_stat *stat)
-{
-	struct lstcon_rpc *crpc;
-	struct srpc_msg *rep;
-	int error;
-
-	LASSERT(stat);
-
-	memset(stat, 0, sizeof(*stat));
-
-	list_for_each_entry(crpc, &trans->tas_rpcs_list, crp_link) {
-		lstcon_rpc_stat_total(stat, 1);
-
-		LASSERT(crpc->crp_stamp);
-
-		error = lstcon_rpc_get_reply(crpc, &rep);
-		if (error) {
-			lstcon_rpc_stat_failure(stat, 1);
-			if (!stat->trs_rpc_errno)
-				stat->trs_rpc_errno = -error;
-
-			continue;
-		}
-
-		lstcon_rpc_stat_success(stat, 1);
-
-		lstcon_rpc_stat_reply(trans, rep, crpc->crp_node, stat);
-	}
-
-	if (trans->tas_opc == LST_TRANS_SESNEW && !stat->trs_fwk_errno) {
-		stat->trs_fwk_errno =
-		      lstcon_session_feats_check(trans->tas_features);
-	}
-
-	CDEBUG(D_NET, "transaction %s : success %d, failure %d, total %d, RPC error(%d), Framework error(%d)\n",
-	       lstcon_rpc_trans_name(trans->tas_opc),
-	       lstcon_rpc_stat_success(stat, 0),
-	       lstcon_rpc_stat_failure(stat, 0),
-	       lstcon_rpc_stat_total(stat, 0),
-	       stat->trs_rpc_errno, stat->trs_fwk_errno);
-}
-
-int
-lstcon_rpc_trans_interpreter(struct lstcon_rpc_trans *trans,
-			     struct list_head __user *head_up,
-			     lstcon_rpc_readent_func_t readent)
-{
-	struct list_head tmp;
-	struct list_head __user *next;
-	struct lstcon_rpc_ent *ent;
-	struct srpc_generic_reply *rep;
-	struct lstcon_rpc *crpc;
-	struct srpc_msg *msg;
-	struct lstcon_node *nd;
-	long dur;
-	struct timeval tv;
-	int error;
-
-	LASSERT(head_up);
-
-	next = head_up;
-
-	list_for_each_entry(crpc, &trans->tas_rpcs_list, crp_link) {
-		if (copy_from_user(&tmp, next,
-				   sizeof(struct list_head)))
-			return -EFAULT;
-
-		next = tmp.next;
-		if (next == head_up)
-			return 0;
-
-		ent = list_entry(next, struct lstcon_rpc_ent, rpe_link);
-
-		LASSERT(crpc->crp_stamp);
-
-		error = lstcon_rpc_get_reply(crpc, &msg);
-
-		nd = crpc->crp_node;
-
-		dur = (long)(crpc->crp_stamp -
-			     (unsigned long)console_session.ses_id.ses_stamp);
-		jiffies_to_timeval(dur, &tv);
-
-		if (copy_to_user(&ent->rpe_peer, &nd->nd_id,
-				 sizeof(struct lnet_process_id)) ||
-		    copy_to_user(&ent->rpe_stamp, &tv, sizeof(tv)) ||
-		    copy_to_user(&ent->rpe_state, &nd->nd_state,
-				 sizeof(nd->nd_state)) ||
-		    copy_to_user(&ent->rpe_rpc_errno, &error,
-				 sizeof(error)))
-			return -EFAULT;
-
-		if (error)
-			continue;
-
-		/* RPC is done */
-		rep = (struct srpc_generic_reply *)&msg->msg_body.reply;
-
-		if (copy_to_user(&ent->rpe_sid, &rep->sid, sizeof(rep->sid)) ||
-		    copy_to_user(&ent->rpe_fwk_errno, &rep->status,
-				 sizeof(rep->status)))
-			return -EFAULT;
-
-		if (!readent)
-			continue;
-
-		error = readent(trans->tas_opc, msg, ent);
-		if (error)
-			return error;
-	}
-
-	return 0;
-}
-
-void
-lstcon_rpc_trans_destroy(struct lstcon_rpc_trans *trans)
-{
-	struct srpc_client_rpc *rpc;
-	struct lstcon_rpc *crpc;
-	struct lstcon_rpc *tmp;
-	int count = 0;
-
-	list_for_each_entry_safe(crpc, tmp, &trans->tas_rpcs_list, crp_link) {
-		rpc = crpc->crp_rpc;
-
-		spin_lock(&rpc->crpc_lock);
-
-		/* free it if not posted or finished already */
-		if (!crpc->crp_posted || crpc->crp_finished) {
-			spin_unlock(&rpc->crpc_lock);
-
-			list_del_init(&crpc->crp_link);
-			lstcon_rpc_put(crpc);
-
-			continue;
-		}
-
-		/*
-		 * rpcs can be still not callbacked (even LNetMDUnlink is
-		 * called) because huge timeout for inaccessible network,
-		 * don't make user wait for them, just abandon them, they
-		 * will be recycled in callback
-		 */
-		LASSERT(crpc->crp_status);
-
-		crpc->crp_node = NULL;
-		crpc->crp_trans = NULL;
-		list_del_init(&crpc->crp_link);
-		count++;
-
-		spin_unlock(&rpc->crpc_lock);
-
-		atomic_dec(&trans->tas_remaining);
-	}
-
-	LASSERT(!atomic_read(&trans->tas_remaining));
-
-	list_del(&trans->tas_link);
-	if (!list_empty(&trans->tas_olink))
-		list_del(&trans->tas_olink);
-
-	CDEBUG(D_NET, "Transaction %s destroyed with %d pending RPCs\n",
-	       lstcon_rpc_trans_name(trans->tas_opc), count);
-
-	kfree(trans);
-}
-
-int
-lstcon_sesrpc_prep(struct lstcon_node *nd, int transop,
-		   unsigned int feats, struct lstcon_rpc **crpc)
-{
-	struct srpc_mksn_reqst *msrq;
-	struct srpc_rmsn_reqst *rsrq;
-	int rc;
-
-	switch (transop) {
-	case LST_TRANS_SESNEW:
-		rc = lstcon_rpc_prep(nd, SRPC_SERVICE_MAKE_SESSION,
-				     feats, 0, 0, crpc);
-		if (rc)
-			return rc;
-
-		msrq = &(*crpc)->crp_rpc->crpc_reqstmsg.msg_body.mksn_reqst;
-		msrq->mksn_sid = console_session.ses_id;
-		msrq->mksn_force = console_session.ses_force;
-		strlcpy(msrq->mksn_name, console_session.ses_name,
-			sizeof(msrq->mksn_name));
-		break;
-
-	case LST_TRANS_SESEND:
-		rc = lstcon_rpc_prep(nd, SRPC_SERVICE_REMOVE_SESSION,
-				     feats, 0, 0, crpc);
-		if (rc)
-			return rc;
-
-		rsrq = &(*crpc)->crp_rpc->crpc_reqstmsg.msg_body.rmsn_reqst;
-		rsrq->rmsn_sid = console_session.ses_id;
-		break;
-
-	default:
-		LBUG();
-	}
-
-	return 0;
-}
-
-int
-lstcon_dbgrpc_prep(struct lstcon_node *nd, unsigned int feats,
-		   struct lstcon_rpc **crpc)
-{
-	struct srpc_debug_reqst *drq;
-	int rc;
-
-	rc = lstcon_rpc_prep(nd, SRPC_SERVICE_DEBUG, feats, 0, 0, crpc);
-	if (rc)
-		return rc;
-
-	drq = &(*crpc)->crp_rpc->crpc_reqstmsg.msg_body.dbg_reqst;
-
-	drq->dbg_sid = console_session.ses_id;
-	drq->dbg_flags = 0;
-
-	return rc;
-}
-
-int
-lstcon_batrpc_prep(struct lstcon_node *nd, int transop, unsigned int feats,
-		   struct lstcon_tsb_hdr *tsb, struct lstcon_rpc **crpc)
-{
-	struct lstcon_batch *batch;
-	struct srpc_batch_reqst *brq;
-	int rc;
-
-	rc = lstcon_rpc_prep(nd, SRPC_SERVICE_BATCH, feats, 0, 0, crpc);
-	if (rc)
-		return rc;
-
-	brq = &(*crpc)->crp_rpc->crpc_reqstmsg.msg_body.bat_reqst;
-
-	brq->bar_sid = console_session.ses_id;
-	brq->bar_bid = tsb->tsb_id;
-	brq->bar_testidx = tsb->tsb_index;
-	brq->bar_opc = transop == LST_TRANS_TSBRUN ? SRPC_BATCH_OPC_RUN :
-		       (transop == LST_TRANS_TSBSTOP ? SRPC_BATCH_OPC_STOP :
-		       SRPC_BATCH_OPC_QUERY);
-
-	if (transop != LST_TRANS_TSBRUN &&
-	    transop != LST_TRANS_TSBSTOP)
-		return 0;
-
-	LASSERT(!tsb->tsb_index);
-
-	batch = (struct lstcon_batch *)tsb;
-	brq->bar_arg = batch->bat_arg;
-
-	return 0;
-}
-
-int
-lstcon_statrpc_prep(struct lstcon_node *nd, unsigned int feats,
-		    struct lstcon_rpc **crpc)
-{
-	struct srpc_stat_reqst *srq;
-	int rc;
-
-	rc = lstcon_rpc_prep(nd, SRPC_SERVICE_QUERY_STAT, feats, 0, 0, crpc);
-	if (rc)
-		return rc;
-
-	srq = &(*crpc)->crp_rpc->crpc_reqstmsg.msg_body.stat_reqst;
-
-	srq->str_sid = console_session.ses_id;
-	srq->str_type = 0; /* XXX remove it */
-
-	return 0;
-}
-
-static struct lnet_process_id_packed *
-lstcon_next_id(int idx, int nkiov, struct bio_vec *kiov)
-{
-	struct lnet_process_id_packed *pid;
-	int i;
-
-	i = idx / SFW_ID_PER_PAGE;
-
-	LASSERT(i < nkiov);
-
-	pid = (struct lnet_process_id_packed *)page_address(kiov[i].bv_page);
-
-	return &pid[idx % SFW_ID_PER_PAGE];
-}
-
-static int
-lstcon_dstnodes_prep(struct lstcon_group *grp, int idx,
-		     int dist, int span, int nkiov, struct bio_vec *kiov)
-{
-	struct lnet_process_id_packed *pid;
-	struct lstcon_ndlink *ndl;
-	struct lstcon_node *nd;
-	int start;
-	int end;
-	int i = 0;
-
-	LASSERT(dist >= 1);
-	LASSERT(span >= 1);
-	LASSERT(grp->grp_nnode >= 1);
-
-	if (span > grp->grp_nnode)
-		return -EINVAL;
-
-	start = ((idx / dist) * span) % grp->grp_nnode;
-	end = ((idx / dist) * span + span - 1) % grp->grp_nnode;
-
-	list_for_each_entry(ndl, &grp->grp_ndl_list, ndl_link) {
-		nd = ndl->ndl_node;
-		if (i < start) {
-			i++;
-			continue;
-		}
-
-		if (i > (end >= start ? end : grp->grp_nnode))
-			break;
-
-		pid = lstcon_next_id((i - start), nkiov, kiov);
-		pid->nid = nd->nd_id.nid;
-		pid->pid = nd->nd_id.pid;
-		i++;
-	}
-
-	if (start <= end) /* done */
-		return 0;
-
-	list_for_each_entry(ndl, &grp->grp_ndl_list, ndl_link) {
-		if (i > grp->grp_nnode + end)
-			break;
-
-		nd = ndl->ndl_node;
-		pid = lstcon_next_id((i - start), nkiov, kiov);
-		pid->nid = nd->nd_id.nid;
-		pid->pid = nd->nd_id.pid;
-		i++;
-	}
-
-	return 0;
-}
-
-static int
-lstcon_pingrpc_prep(struct lst_test_ping_param *param, struct srpc_test_reqst *req)
-{
-	struct test_ping_req *prq = &req->tsr_u.ping;
-
-	prq->png_size = param->png_size;
-	prq->png_flags = param->png_flags;
-	/* TODO dest */
-	return 0;
-}
-
-static int
-lstcon_bulkrpc_v0_prep(struct lst_test_bulk_param *param,
-		       struct srpc_test_reqst *req)
-{
-	struct test_bulk_req *brq = &req->tsr_u.bulk_v0;
-
-	brq->blk_opc = param->blk_opc;
-	brq->blk_npg = DIV_ROUND_UP(param->blk_size, PAGE_SIZE);
-	brq->blk_flags = param->blk_flags;
-
-	return 0;
-}
-
-static int
-lstcon_bulkrpc_v1_prep(struct lst_test_bulk_param *param, bool is_client,
-		       struct srpc_test_reqst *req)
-{
-	struct test_bulk_req_v1 *brq = &req->tsr_u.bulk_v1;
-
-	brq->blk_opc = param->blk_opc;
-	brq->blk_flags = param->blk_flags;
-	brq->blk_len = param->blk_size;
-	brq->blk_offset	= is_client ? param->blk_cli_off : param->blk_srv_off;
-
-	return 0;
-}
-
-int
-lstcon_testrpc_prep(struct lstcon_node *nd, int transop, unsigned int feats,
-		    struct lstcon_test *test, struct lstcon_rpc **crpc)
-{
-	struct lstcon_group *sgrp = test->tes_src_grp;
-	struct lstcon_group *dgrp = test->tes_dst_grp;
-	struct srpc_test_reqst *trq;
-	struct srpc_bulk *bulk;
-	int i;
-	int npg = 0;
-	int nob = 0;
-	int rc = 0;
-
-	if (transop == LST_TRANS_TSBCLIADD) {
-		npg = sfw_id_pages(test->tes_span);
-		nob = !(feats & LST_FEAT_BULK_LEN) ?
-		      npg * PAGE_SIZE :
-		      sizeof(struct lnet_process_id_packed) * test->tes_span;
-	}
-
-	rc = lstcon_rpc_prep(nd, SRPC_SERVICE_TEST, feats, npg, nob, crpc);
-	if (rc)
-		return rc;
-
-	trq = &(*crpc)->crp_rpc->crpc_reqstmsg.msg_body.tes_reqst;
-
-	if (transop == LST_TRANS_TSBSRVADD) {
-		int ndist = DIV_ROUND_UP(sgrp->grp_nnode, test->tes_dist);
-		int nspan = DIV_ROUND_UP(dgrp->grp_nnode, test->tes_span);
-		int nmax = DIV_ROUND_UP(ndist, nspan);
-
-		trq->tsr_ndest = 0;
-		trq->tsr_loop = nmax * test->tes_dist * test->tes_concur;
-	} else {
-		bulk = &(*crpc)->crp_rpc->crpc_bulk;
-
-		for (i = 0; i < npg; i++) {
-			int len;
-
-			LASSERT(nob > 0);
-
-			len = !(feats & LST_FEAT_BULK_LEN) ?
-			      PAGE_SIZE :
-			      min_t(int, nob, PAGE_SIZE);
-			nob -= len;
-
-			bulk->bk_iovs[i].bv_offset = 0;
-			bulk->bk_iovs[i].bv_len = len;
-			bulk->bk_iovs[i].bv_page = alloc_page(GFP_KERNEL);
-
-			if (!bulk->bk_iovs[i].bv_page) {
-				lstcon_rpc_put(*crpc);
-				return -ENOMEM;
-			}
-		}
-
-		bulk->bk_sink = 0;
-
-		LASSERT(transop == LST_TRANS_TSBCLIADD);
-
-		rc = lstcon_dstnodes_prep(test->tes_dst_grp,
-					  test->tes_cliidx++,
-					  test->tes_dist,
-					  test->tes_span,
-					  npg, &bulk->bk_iovs[0]);
-		if (rc) {
-			lstcon_rpc_put(*crpc);
-			return rc;
-		}
-
-		trq->tsr_ndest = test->tes_span;
-		trq->tsr_loop = test->tes_loop;
-	}
-
-	trq->tsr_sid = console_session.ses_id;
-	trq->tsr_bid = test->tes_hdr.tsb_id;
-	trq->tsr_concur = test->tes_concur;
-	trq->tsr_is_client = (transop == LST_TRANS_TSBCLIADD) ? 1 : 0;
-	trq->tsr_stop_onerr = !!test->tes_stop_onerr;
-
-	switch (test->tes_type) {
-	case LST_TEST_PING:
-		trq->tsr_service = SRPC_SERVICE_PING;
-		rc = lstcon_pingrpc_prep((struct lst_test_ping_param *)
-					 &test->tes_param[0], trq);
-		break;
-
-	case LST_TEST_BULK:
-		trq->tsr_service = SRPC_SERVICE_BRW;
-		if (!(feats & LST_FEAT_BULK_LEN)) {
-			rc = lstcon_bulkrpc_v0_prep((struct lst_test_bulk_param *)
-						    &test->tes_param[0], trq);
-		} else {
-			rc = lstcon_bulkrpc_v1_prep((struct lst_test_bulk_param *)
-						    &test->tes_param[0],
-						    trq->tsr_is_client, trq);
-		}
-
-		break;
-	default:
-		LBUG();
-		break;
-	}
-
-	return rc;
-}
-
-static int
-lstcon_sesnew_stat_reply(struct lstcon_rpc_trans *trans,
-			 struct lstcon_node *nd, struct srpc_msg *reply)
-{
-	struct srpc_mksn_reply *mksn_rep = &reply->msg_body.mksn_reply;
-	int status = mksn_rep->mksn_status;
-
-	if (!status &&
-	    (reply->msg_ses_feats & ~LST_FEATS_MASK)) {
-		mksn_rep->mksn_status = EPROTO;
-		status = EPROTO;
-	}
-
-	if (status == EPROTO) {
-		CNETERR("session protocol error from %s: %u\n",
-			libcfs_nid2str(nd->nd_id.nid),
-			reply->msg_ses_feats);
-	}
-
-	if (status)
-		return status;
-
-	if (!trans->tas_feats_updated) {
-		spin_lock(&console_session.ses_rpc_lock);
-		if (!trans->tas_feats_updated) {	/* recheck with lock */
-			trans->tas_feats_updated = 1;
-			trans->tas_features = reply->msg_ses_feats;
-		}
-		spin_unlock(&console_session.ses_rpc_lock);
-	}
-
-	if (reply->msg_ses_feats != trans->tas_features) {
-		CNETERR("Framework features %x from %s is different with features on this transaction: %x\n",
-			reply->msg_ses_feats, libcfs_nid2str(nd->nd_id.nid),
-			trans->tas_features);
-		mksn_rep->mksn_status = EPROTO;
-		status = EPROTO;
-	}
-
-	if (!status) {
-		/* session timeout on remote node */
-		nd->nd_timeout = mksn_rep->mksn_timeout;
-	}
-
-	return status;
-}
-
-void
-lstcon_rpc_stat_reply(struct lstcon_rpc_trans *trans, struct srpc_msg *msg,
-		      struct lstcon_node *nd, struct lstcon_trans_stat *stat)
-{
-	struct srpc_rmsn_reply *rmsn_rep;
-	struct srpc_debug_reply *dbg_rep;
-	struct srpc_batch_reply *bat_rep;
-	struct srpc_test_reply *test_rep;
-	struct srpc_stat_reply *stat_rep;
-	int rc = 0;
-
-	switch (trans->tas_opc) {
-	case LST_TRANS_SESNEW:
-		rc = lstcon_sesnew_stat_reply(trans, nd, msg);
-		if (!rc) {
-			lstcon_sesop_stat_success(stat, 1);
-			return;
-		}
-
-		lstcon_sesop_stat_failure(stat, 1);
-		break;
-
-	case LST_TRANS_SESEND:
-		rmsn_rep = &msg->msg_body.rmsn_reply;
-		/* ESRCH is not an error for end session */
-		if (!rmsn_rep->rmsn_status ||
-		    rmsn_rep->rmsn_status == ESRCH) {
-			lstcon_sesop_stat_success(stat, 1);
-			return;
-		}
-
-		lstcon_sesop_stat_failure(stat, 1);
-		rc = rmsn_rep->rmsn_status;
-		break;
-
-	case LST_TRANS_SESQRY:
-	case LST_TRANS_SESPING:
-		dbg_rep = &msg->msg_body.dbg_reply;
-
-		if (dbg_rep->dbg_status == ESRCH) {
-			lstcon_sesqry_stat_unknown(stat, 1);
-			return;
-		}
-
-		if (lstcon_session_match(dbg_rep->dbg_sid))
-			lstcon_sesqry_stat_active(stat, 1);
-		else
-			lstcon_sesqry_stat_busy(stat, 1);
-		return;
-
-	case LST_TRANS_TSBRUN:
-	case LST_TRANS_TSBSTOP:
-		bat_rep = &msg->msg_body.bat_reply;
-
-		if (!bat_rep->bar_status) {
-			lstcon_tsbop_stat_success(stat, 1);
-			return;
-		}
-
-		if (bat_rep->bar_status == EPERM &&
-		    trans->tas_opc == LST_TRANS_TSBSTOP) {
-			lstcon_tsbop_stat_success(stat, 1);
-			return;
-		}
-
-		lstcon_tsbop_stat_failure(stat, 1);
-		rc = bat_rep->bar_status;
-		break;
-
-	case LST_TRANS_TSBCLIQRY:
-	case LST_TRANS_TSBSRVQRY:
-		bat_rep = &msg->msg_body.bat_reply;
-
-		if (bat_rep->bar_active)
-			lstcon_tsbqry_stat_run(stat, 1);
-		else
-			lstcon_tsbqry_stat_idle(stat, 1);
-
-		if (!bat_rep->bar_status)
-			return;
-
-		lstcon_tsbqry_stat_failure(stat, 1);
-		rc = bat_rep->bar_status;
-		break;
-
-	case LST_TRANS_TSBCLIADD:
-	case LST_TRANS_TSBSRVADD:
-		test_rep = &msg->msg_body.tes_reply;
-
-		if (!test_rep->tsr_status) {
-			lstcon_tsbop_stat_success(stat, 1);
-			return;
-		}
-
-		lstcon_tsbop_stat_failure(stat, 1);
-		rc = test_rep->tsr_status;
-		break;
-
-	case LST_TRANS_STATQRY:
-		stat_rep = &msg->msg_body.stat_reply;
-
-		if (!stat_rep->str_status) {
-			lstcon_statqry_stat_success(stat, 1);
-			return;
-		}
-
-		lstcon_statqry_stat_failure(stat, 1);
-		rc = stat_rep->str_status;
-		break;
-
-	default:
-		LBUG();
-	}
-
-	if (!stat->trs_fwk_errno)
-		stat->trs_fwk_errno = rc;
-}
-
-int
-lstcon_rpc_trans_ndlist(struct list_head *ndlist,
-			struct list_head *translist, int transop,
-			void *arg, lstcon_rpc_cond_func_t condition,
-			struct lstcon_rpc_trans **transpp)
-{
-	struct lstcon_rpc_trans *trans;
-	struct lstcon_ndlink *ndl;
-	struct lstcon_node *nd;
-	struct lstcon_rpc *rpc;
-	unsigned int feats;
-	int rc;
-
-	/* Creating session RPG for list of nodes */
-
-	rc = lstcon_rpc_trans_prep(translist, transop, &trans);
-	if (rc) {
-		CERROR("Can't create transaction %d: %d\n", transop, rc);
-		return rc;
-	}
-
-	feats = trans->tas_features;
-	list_for_each_entry(ndl, ndlist, ndl_link) {
-		rc = !condition ? 1 :
-		     condition(transop, ndl->ndl_node, arg);
-
-		if (!rc)
-			continue;
-
-		if (rc < 0) {
-			CDEBUG(D_NET, "Condition error while creating RPC for transaction %d: %d\n",
-			       transop, rc);
-			break;
-		}
-
-		nd = ndl->ndl_node;
-
-		switch (transop) {
-		case LST_TRANS_SESNEW:
-		case LST_TRANS_SESEND:
-			rc = lstcon_sesrpc_prep(nd, transop, feats, &rpc);
-			break;
-		case LST_TRANS_SESQRY:
-		case LST_TRANS_SESPING:
-			rc = lstcon_dbgrpc_prep(nd, feats, &rpc);
-			break;
-		case LST_TRANS_TSBCLIADD:
-		case LST_TRANS_TSBSRVADD:
-			rc = lstcon_testrpc_prep(nd, transop, feats,
-						 (struct lstcon_test *)arg,
-						 &rpc);
-			break;
-		case LST_TRANS_TSBRUN:
-		case LST_TRANS_TSBSTOP:
-		case LST_TRANS_TSBCLIQRY:
-		case LST_TRANS_TSBSRVQRY:
-			rc = lstcon_batrpc_prep(nd, transop, feats,
-						(struct lstcon_tsb_hdr *)arg,
-						&rpc);
-			break;
-		case LST_TRANS_STATQRY:
-			rc = lstcon_statrpc_prep(nd, feats, &rpc);
-			break;
-		default:
-			rc = -EINVAL;
-			break;
-		}
-
-		if (rc) {
-			CERROR("Failed to create RPC for transaction %s: %d\n",
-			       lstcon_rpc_trans_name(transop), rc);
-			break;
-		}
-
-		lstcon_rpc_trans_addreq(trans, rpc);
-	}
-
-	if (!rc) {
-		*transpp = trans;
-		return 0;
-	}
-
-	lstcon_rpc_trans_destroy(trans);
-
-	return rc;
-}
-
-static void
-lstcon_rpc_pinger(void *arg)
-{
-	struct stt_timer *ptimer = (struct stt_timer *)arg;
-	struct lstcon_rpc_trans *trans;
-	struct lstcon_rpc *crpc;
-	struct srpc_msg *rep;
-	struct srpc_debug_reqst *drq;
-	struct lstcon_ndlink *ndl;
-	struct lstcon_node *nd;
-	int intv;
-	int count = 0;
-	int rc;
-
-	/*
-	 * RPC pinger is a special case of transaction,
-	 * it's called by timer at 8 seconds interval.
-	 */
-	mutex_lock(&console_session.ses_mutex);
-
-	if (console_session.ses_shutdown || console_session.ses_expired) {
-		mutex_unlock(&console_session.ses_mutex);
-		return;
-	}
-
-	if (!console_session.ses_expired &&
-	    ktime_get_real_seconds() - console_session.ses_laststamp >
-	    (time64_t)console_session.ses_timeout)
-		console_session.ses_expired = 1;
-
-	trans = console_session.ses_ping;
-
-	LASSERT(trans);
-
-	list_for_each_entry(ndl, &console_session.ses_ndl_list, ndl_link) {
-		nd = ndl->ndl_node;
-
-		if (console_session.ses_expired) {
-			/* idle console, end session on all nodes */
-			if (nd->nd_state != LST_NODE_ACTIVE)
-				continue;
-
-			rc = lstcon_sesrpc_prep(nd, LST_TRANS_SESEND,
-						trans->tas_features, &crpc);
-			if (rc) {
-				CERROR("Out of memory\n");
-				break;
-			}
-
-			lstcon_rpc_trans_addreq(trans, crpc);
-			lstcon_rpc_post(crpc);
-
-			continue;
-		}
-
-		crpc = &nd->nd_ping;
-
-		if (crpc->crp_rpc) {
-			LASSERT(crpc->crp_trans == trans);
-			LASSERT(!list_empty(&crpc->crp_link));
-
-			spin_lock(&crpc->crp_rpc->crpc_lock);
-
-			LASSERT(crpc->crp_posted);
-
-			if (!crpc->crp_finished) {
-				/* in flight */
-				spin_unlock(&crpc->crp_rpc->crpc_lock);
-				continue;
-			}
-
-			spin_unlock(&crpc->crp_rpc->crpc_lock);
-
-			lstcon_rpc_get_reply(crpc, &rep);
-
-			list_del_init(&crpc->crp_link);
-
-			lstcon_rpc_put(crpc);
-		}
-
-		if (nd->nd_state != LST_NODE_ACTIVE)
-			continue;
-
-		intv = (jiffies - nd->nd_stamp) / msecs_to_jiffies(MSEC_PER_SEC);
-		if (intv < nd->nd_timeout / 2)
-			continue;
-
-		rc = lstcon_rpc_init(nd, SRPC_SERVICE_DEBUG,
-				     trans->tas_features, 0, 0, 1, crpc);
-		if (rc) {
-			CERROR("Out of memory\n");
-			break;
-		}
-
-		drq = &crpc->crp_rpc->crpc_reqstmsg.msg_body.dbg_reqst;
-
-		drq->dbg_sid = console_session.ses_id;
-		drq->dbg_flags = 0;
-
-		lstcon_rpc_trans_addreq(trans, crpc);
-		lstcon_rpc_post(crpc);
-
-		count++;
-	}
-
-	if (console_session.ses_expired) {
-		mutex_unlock(&console_session.ses_mutex);
-		return;
-	}
-
-	CDEBUG(D_NET, "Ping %d nodes in session\n", count);
-
-	ptimer->stt_expires = ktime_get_real_seconds() + LST_PING_INTERVAL;
-	stt_add_timer(ptimer);
-
-	mutex_unlock(&console_session.ses_mutex);
-}
-
-int
-lstcon_rpc_pinger_start(void)
-{
-	struct stt_timer *ptimer;
-	int rc;
-
-	LASSERT(list_empty(&console_session.ses_rpc_freelist));
-	LASSERT(!atomic_read(&console_session.ses_rpc_counter));
-
-	rc = lstcon_rpc_trans_prep(NULL, LST_TRANS_SESPING,
-				   &console_session.ses_ping);
-	if (rc) {
-		CERROR("Failed to create console pinger\n");
-		return rc;
-	}
-
-	ptimer = &console_session.ses_ping_timer;
-	ptimer->stt_expires = ktime_get_real_seconds() + LST_PING_INTERVAL;
-
-	stt_add_timer(ptimer);
-
-	return 0;
-}
-
-void
-lstcon_rpc_pinger_stop(void)
-{
-	LASSERT(console_session.ses_shutdown);
-
-	stt_del_timer(&console_session.ses_ping_timer);
-
-	lstcon_rpc_trans_abort(console_session.ses_ping, -ESHUTDOWN);
-	lstcon_rpc_trans_stat(console_session.ses_ping, lstcon_trans_stat());
-	lstcon_rpc_trans_destroy(console_session.ses_ping);
-
-	memset(lstcon_trans_stat(), 0, sizeof(struct lstcon_trans_stat));
-
-	console_session.ses_ping = NULL;
-}
-
-void
-lstcon_rpc_cleanup_wait(void)
-{
-	struct lstcon_rpc_trans *trans;
-	struct lstcon_rpc *crpc;
-	struct lstcon_rpc *temp;
-	struct list_head *pacer;
-	struct list_head zlist;
-
-	/* Called with hold of global mutex */
-
-	LASSERT(console_session.ses_shutdown);
-
-	while (!list_empty(&console_session.ses_trans_list)) {
-		list_for_each(pacer, &console_session.ses_trans_list) {
-			trans = list_entry(pacer, struct lstcon_rpc_trans,
-					   tas_link);
-
-			CDEBUG(D_NET, "Session closed, wakeup transaction %s\n",
-			       lstcon_rpc_trans_name(trans->tas_opc));
-
-			wake_up(&trans->tas_waitq);
-		}
-
-		mutex_unlock(&console_session.ses_mutex);
-
-		CWARN("Session is shutting down, waiting for termination of transactions\n");
-		set_current_state(TASK_UNINTERRUPTIBLE);
-		schedule_timeout(HZ);
-
-		mutex_lock(&console_session.ses_mutex);
-	}
-
-	spin_lock(&console_session.ses_rpc_lock);
-
-	lst_wait_until(!atomic_read(&console_session.ses_rpc_counter),
-		       console_session.ses_rpc_lock,
-		       "Network is not accessible or target is down, waiting for %d console RPCs to being recycled\n",
-		       atomic_read(&console_session.ses_rpc_counter));
-
-	list_add(&zlist, &console_session.ses_rpc_freelist);
-	list_del_init(&console_session.ses_rpc_freelist);
-
-	spin_unlock(&console_session.ses_rpc_lock);
-
-	list_for_each_entry_safe(crpc, temp, &zlist, crp_link) {
-		list_del(&crpc->crp_link);
-		kfree(crpc);
-	}
-}
-
-int
-lstcon_rpc_module_init(void)
-{
-	INIT_LIST_HEAD(&console_session.ses_ping_timer.stt_list);
-	console_session.ses_ping_timer.stt_func = lstcon_rpc_pinger;
-	console_session.ses_ping_timer.stt_data = &console_session.ses_ping_timer;
-
-	console_session.ses_ping = NULL;
-
-	spin_lock_init(&console_session.ses_rpc_lock);
-	atomic_set(&console_session.ses_rpc_counter, 0);
-	INIT_LIST_HEAD(&console_session.ses_rpc_freelist);
-
-	return 0;
-}
-
-void
-lstcon_rpc_module_fini(void)
-{
-	LASSERT(list_empty(&console_session.ses_rpc_freelist));
-	LASSERT(!atomic_read(&console_session.ses_rpc_counter));
-}

+ 0 - 142
drivers/staging/lustre/lnet/selftest/conrpc.h

@@ -1,142 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * /lnet/selftest/conrpc.h
- *
- * Console rpc
- *
- * Author: Liang Zhen <liang@whamcloud.com>
- */
-
-#ifndef __LST_CONRPC_H__
-#define __LST_CONRPC_H__
-
-#include <linux/lnet/lib-types.h>
-#include <uapi/linux/lnet/lnetst.h>
-#include "rpc.h"
-#include "selftest.h"
-
-/* Console rpc and rpc transaction */
-#define LST_TRANS_TIMEOUT	30
-#define LST_TRANS_MIN_TIMEOUT	3
-
-#define LST_VALIDATE_TIMEOUT(t) min(max(t, LST_TRANS_MIN_TIMEOUT), LST_TRANS_TIMEOUT)
-
-#define LST_PING_INTERVAL	8
-
-struct lstcon_rpc_trans;
-struct lstcon_tsb_hdr;
-struct lstcon_test;
-struct lstcon_node;
-
-struct lstcon_rpc {
-	struct list_head	 crp_link;	/* chain on rpc transaction */
-	struct srpc_client_rpc	*crp_rpc;	/* client rpc */
-	struct lstcon_node	*crp_node;	/* destination node */
-	struct lstcon_rpc_trans *crp_trans;	/* conrpc transaction */
-
-	unsigned int		 crp_posted:1;	/* rpc is posted */
-	unsigned int		 crp_finished:1; /* rpc is finished */
-	unsigned int		 crp_unpacked:1; /* reply is unpacked */
-	/** RPC is embedded in other structure and can't free it */
-	unsigned int		 crp_embedded:1;
-	int			 crp_status;	/* console rpc errors */
-	unsigned long		 crp_stamp;	/* replied time stamp */
-};
-
-struct lstcon_rpc_trans {
-	struct list_head  tas_olink;	     /* link chain on owner list */
-	struct list_head  tas_link;	     /* link chain on global list */
-	int		  tas_opc;	     /* operation code of transaction */
-	unsigned int	  tas_feats_updated; /* features mask is uptodate */
-	unsigned int	  tas_features;      /* test features mask */
-	wait_queue_head_t tas_waitq;	     /* wait queue head */
-	atomic_t	  tas_remaining;     /* # of un-scheduled rpcs */
-	struct list_head  tas_rpcs_list;     /* queued requests */
-};
-
-#define LST_TRANS_PRIVATE	0x1000
-
-#define LST_TRANS_SESNEW	(LST_TRANS_PRIVATE | 0x01)
-#define LST_TRANS_SESEND	(LST_TRANS_PRIVATE | 0x02)
-#define LST_TRANS_SESQRY	0x03
-#define LST_TRANS_SESPING	0x04
-
-#define LST_TRANS_TSBCLIADD	(LST_TRANS_PRIVATE | 0x11)
-#define LST_TRANS_TSBSRVADD	(LST_TRANS_PRIVATE | 0x12)
-#define LST_TRANS_TSBRUN	(LST_TRANS_PRIVATE | 0x13)
-#define LST_TRANS_TSBSTOP	(LST_TRANS_PRIVATE | 0x14)
-#define LST_TRANS_TSBCLIQRY	0x15
-#define LST_TRANS_TSBSRVQRY	0x16
-
-#define LST_TRANS_STATQRY	0x21
-
-typedef int (*lstcon_rpc_cond_func_t)(int, struct lstcon_node *, void *);
-typedef int (*lstcon_rpc_readent_func_t)(int, struct srpc_msg *,
-					 struct lstcon_rpc_ent __user *);
-
-int  lstcon_sesrpc_prep(struct lstcon_node *nd, int transop,
-			unsigned int version, struct lstcon_rpc **crpc);
-int  lstcon_dbgrpc_prep(struct lstcon_node *nd,
-			unsigned int version, struct lstcon_rpc **crpc);
-int  lstcon_batrpc_prep(struct lstcon_node *nd, int transop,
-			unsigned int version, struct lstcon_tsb_hdr *tsb,
-			struct lstcon_rpc **crpc);
-int  lstcon_testrpc_prep(struct lstcon_node *nd, int transop,
-			 unsigned int version, struct lstcon_test *test,
-			 struct lstcon_rpc **crpc);
-int  lstcon_statrpc_prep(struct lstcon_node *nd, unsigned int version,
-			 struct lstcon_rpc **crpc);
-void lstcon_rpc_put(struct lstcon_rpc *crpc);
-int  lstcon_rpc_trans_prep(struct list_head *translist,
-			   int transop, struct lstcon_rpc_trans **transpp);
-int  lstcon_rpc_trans_ndlist(struct list_head *ndlist,
-			     struct list_head *translist, int transop,
-			     void *arg, lstcon_rpc_cond_func_t condition,
-			     struct lstcon_rpc_trans **transpp);
-void lstcon_rpc_trans_stat(struct lstcon_rpc_trans *trans,
-			   struct lstcon_trans_stat *stat);
-int  lstcon_rpc_trans_interpreter(struct lstcon_rpc_trans *trans,
-				  struct list_head __user *head_up,
-				  lstcon_rpc_readent_func_t readent);
-void lstcon_rpc_trans_abort(struct lstcon_rpc_trans *trans, int error);
-void lstcon_rpc_trans_destroy(struct lstcon_rpc_trans *trans);
-void lstcon_rpc_trans_addreq(struct lstcon_rpc_trans *trans,
-			     struct lstcon_rpc *req);
-int  lstcon_rpc_trans_postwait(struct lstcon_rpc_trans *trans, int timeout);
-int  lstcon_rpc_pinger_start(void);
-void lstcon_rpc_pinger_stop(void);
-void lstcon_rpc_cleanup_wait(void);
-int  lstcon_rpc_module_init(void);
-void lstcon_rpc_module_fini(void);
-
-#endif

+ 0 - 2104
drivers/staging/lustre/lnet/selftest/console.c

@@ -1,2104 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/selftest/conctl.c
- *
- * Infrastructure of LST console
- *
- * Author: Liang Zhen <liangzhen@clusterfs.com>
- */
-
-#include <linux/lnet/lib-lnet.h>
-#include "console.h"
-#include "conrpc.h"
-
-#define LST_NODE_STATE_COUNTER(nd, p)			\
-do {							\
-	if ((nd)->nd_state == LST_NODE_ACTIVE)		\
-		(p)->nle_nactive++;			\
-	else if ((nd)->nd_state == LST_NODE_BUSY)	\
-		(p)->nle_nbusy++;			\
-	else if ((nd)->nd_state == LST_NODE_DOWN)	\
-		(p)->nle_ndown++;			\
-	else						\
-		(p)->nle_nunknown++;			\
-	(p)->nle_nnode++;				\
-} while (0)
-
-struct lstcon_session console_session;
-
-static void
-lstcon_node_get(struct lstcon_node *nd)
-{
-	LASSERT(nd->nd_ref >= 1);
-
-	nd->nd_ref++;
-}
-
-static int
-lstcon_node_find(struct lnet_process_id id, struct lstcon_node **ndpp,
-		 int create)
-{
-	struct lstcon_ndlink	*ndl;
-	unsigned int idx = LNET_NIDADDR(id.nid) % LST_GLOBAL_HASHSIZE;
-
-	LASSERT(id.nid != LNET_NID_ANY);
-
-	list_for_each_entry(ndl, &console_session.ses_ndl_hash[idx],
-			    ndl_hlink) {
-		if (ndl->ndl_node->nd_id.nid != id.nid ||
-		    ndl->ndl_node->nd_id.pid != id.pid)
-			continue;
-
-		lstcon_node_get(ndl->ndl_node);
-		*ndpp = ndl->ndl_node;
-		return 0;
-	}
-
-	if (!create)
-		return -ENOENT;
-
-	*ndpp = kzalloc(sizeof(**ndpp) + sizeof(*ndl), GFP_KERNEL);
-	if (!*ndpp)
-		return -ENOMEM;
-
-	ndl = (struct lstcon_ndlink *)(*ndpp + 1);
-
-	ndl->ndl_node = *ndpp;
-
-	ndl->ndl_node->nd_ref = 1;
-	ndl->ndl_node->nd_id = id;
-	ndl->ndl_node->nd_stamp = jiffies;
-	ndl->ndl_node->nd_state = LST_NODE_UNKNOWN;
-	ndl->ndl_node->nd_timeout = 0;
-	memset(&ndl->ndl_node->nd_ping, 0, sizeof(struct lstcon_rpc));
-
-	/*
-	 * queued in global hash & list, no refcount is taken by
-	 * global hash & list, if caller release his refcount,
-	 * node will be released
-	 */
-	list_add_tail(&ndl->ndl_hlink, &console_session.ses_ndl_hash[idx]);
-	list_add_tail(&ndl->ndl_link, &console_session.ses_ndl_list);
-
-	return 0;
-}
-
-static void
-lstcon_node_put(struct lstcon_node *nd)
-{
-	struct lstcon_ndlink *ndl;
-
-	LASSERT(nd->nd_ref > 0);
-
-	if (--nd->nd_ref > 0)
-		return;
-
-	ndl = (struct lstcon_ndlink *)(nd + 1);
-
-	LASSERT(!list_empty(&ndl->ndl_link));
-	LASSERT(!list_empty(&ndl->ndl_hlink));
-
-	/* remove from session */
-	list_del(&ndl->ndl_link);
-	list_del(&ndl->ndl_hlink);
-
-	kfree(nd);
-}
-
-static int
-lstcon_ndlink_find(struct list_head *hash, struct lnet_process_id id,
-		   struct lstcon_ndlink **ndlpp, int create)
-{
-	unsigned int idx = LNET_NIDADDR(id.nid) % LST_NODE_HASHSIZE;
-	struct lstcon_ndlink *ndl;
-	struct lstcon_node *nd;
-	int rc;
-
-	if (id.nid == LNET_NID_ANY)
-		return -EINVAL;
-
-	/* search in hash */
-	list_for_each_entry(ndl, &hash[idx], ndl_hlink) {
-		if (ndl->ndl_node->nd_id.nid != id.nid ||
-		    ndl->ndl_node->nd_id.pid != id.pid)
-			continue;
-
-		*ndlpp = ndl;
-		return 0;
-	}
-
-	if (!create)
-		return -ENOENT;
-
-	/* find or create in session hash */
-	rc = lstcon_node_find(id, &nd, (create == 1) ? 1 : 0);
-	if (rc)
-		return rc;
-
-	ndl = kzalloc(sizeof(struct lstcon_ndlink), GFP_NOFS);
-	if (!ndl) {
-		lstcon_node_put(nd);
-		return -ENOMEM;
-	}
-
-	*ndlpp = ndl;
-
-	ndl->ndl_node = nd;
-	INIT_LIST_HEAD(&ndl->ndl_link);
-	list_add_tail(&ndl->ndl_hlink, &hash[idx]);
-
-	return 0;
-}
-
-static void
-lstcon_ndlink_release(struct lstcon_ndlink *ndl)
-{
-	LASSERT(list_empty(&ndl->ndl_link));
-	LASSERT(!list_empty(&ndl->ndl_hlink));
-
-	list_del(&ndl->ndl_hlink); /* delete from hash */
-	lstcon_node_put(ndl->ndl_node);
-
-	kfree(ndl);
-}
-
-static int
-lstcon_group_alloc(char *name, struct lstcon_group **grpp)
-{
-	struct lstcon_group *grp;
-	int i;
-
-	grp = kmalloc(offsetof(struct lstcon_group,
-			       grp_ndl_hash[LST_NODE_HASHSIZE]),
-		      GFP_KERNEL);
-	if (!grp)
-		return -ENOMEM;
-
-	grp->grp_ref = 1;
-	if (name) {
-		if (strlen(name) > sizeof(grp->grp_name) - 1) {
-			kfree(grp);
-			return -E2BIG;
-		}
-		strncpy(grp->grp_name, name, sizeof(grp->grp_name));
-	}
-
-	INIT_LIST_HEAD(&grp->grp_link);
-	INIT_LIST_HEAD(&grp->grp_ndl_list);
-	INIT_LIST_HEAD(&grp->grp_trans_list);
-
-	for (i = 0; i < LST_NODE_HASHSIZE; i++)
-		INIT_LIST_HEAD(&grp->grp_ndl_hash[i]);
-
-	*grpp = grp;
-
-	return 0;
-}
-
-static void
-lstcon_group_addref(struct lstcon_group *grp)
-{
-	grp->grp_ref++;
-}
-
-static void lstcon_group_ndlink_release(struct lstcon_group *,
-					struct lstcon_ndlink *);
-
-static void
-lstcon_group_drain(struct lstcon_group *grp, int keep)
-{
-	struct lstcon_ndlink *ndl;
-	struct lstcon_ndlink *tmp;
-
-	list_for_each_entry_safe(ndl, tmp, &grp->grp_ndl_list, ndl_link) {
-		if (!(ndl->ndl_node->nd_state & keep))
-			lstcon_group_ndlink_release(grp, ndl);
-	}
-}
-
-static void
-lstcon_group_decref(struct lstcon_group *grp)
-{
-	int i;
-
-	if (--grp->grp_ref > 0)
-		return;
-
-	if (!list_empty(&grp->grp_link))
-		list_del(&grp->grp_link);
-
-	lstcon_group_drain(grp, 0);
-
-	for (i = 0; i < LST_NODE_HASHSIZE; i++)
-		LASSERT(list_empty(&grp->grp_ndl_hash[i]));
-
-	kfree(grp);
-}
-
-static int
-lstcon_group_find(const char *name, struct lstcon_group **grpp)
-{
-	struct lstcon_group *grp;
-
-	list_for_each_entry(grp, &console_session.ses_grp_list, grp_link) {
-		if (strncmp(grp->grp_name, name, LST_NAME_SIZE))
-			continue;
-
-		lstcon_group_addref(grp); /* +1 ref for caller */
-		*grpp = grp;
-		return 0;
-	}
-
-	return -ENOENT;
-}
-
-static int
-lstcon_group_ndlink_find(struct lstcon_group *grp, struct lnet_process_id id,
-			 struct lstcon_ndlink **ndlpp, int create)
-{
-	int rc;
-
-	rc = lstcon_ndlink_find(&grp->grp_ndl_hash[0], id, ndlpp, create);
-	if (rc)
-		return rc;
-
-	if (!list_empty(&(*ndlpp)->ndl_link))
-		return 0;
-
-	list_add_tail(&(*ndlpp)->ndl_link, &grp->grp_ndl_list);
-	grp->grp_nnode++;
-
-	return 0;
-}
-
-static void
-lstcon_group_ndlink_release(struct lstcon_group *grp, struct lstcon_ndlink *ndl)
-{
-	list_del_init(&ndl->ndl_link);
-	lstcon_ndlink_release(ndl);
-	grp->grp_nnode--;
-}
-
-static void
-lstcon_group_ndlink_move(struct lstcon_group *old,
-			 struct lstcon_group *new, struct lstcon_ndlink *ndl)
-{
-	unsigned int idx = LNET_NIDADDR(ndl->ndl_node->nd_id.nid) %
-					LST_NODE_HASHSIZE;
-
-	list_del(&ndl->ndl_hlink);
-	list_del(&ndl->ndl_link);
-	old->grp_nnode--;
-
-	list_add_tail(&ndl->ndl_hlink, &new->grp_ndl_hash[idx]);
-	list_add_tail(&ndl->ndl_link, &new->grp_ndl_list);
-	new->grp_nnode++;
-}
-
-static void
-lstcon_group_move(struct lstcon_group *old, struct lstcon_group *new)
-{
-	struct lstcon_ndlink *ndl;
-
-	while (!list_empty(&old->grp_ndl_list)) {
-		ndl = list_entry(old->grp_ndl_list.next,
-				 struct lstcon_ndlink, ndl_link);
-		lstcon_group_ndlink_move(old, new, ndl);
-	}
-}
-
-static int
-lstcon_sesrpc_condition(int transop, struct lstcon_node *nd, void *arg)
-{
-	struct lstcon_group *grp = (struct lstcon_group *)arg;
-
-	switch (transop) {
-	case LST_TRANS_SESNEW:
-		if (nd->nd_state == LST_NODE_ACTIVE)
-			return 0;
-		break;
-
-	case LST_TRANS_SESEND:
-		if (nd->nd_state != LST_NODE_ACTIVE)
-			return 0;
-
-		if (grp && nd->nd_ref > 1)
-			return 0;
-		break;
-
-	case LST_TRANS_SESQRY:
-		break;
-
-	default:
-		LBUG();
-	}
-
-	return 1;
-}
-
-static int
-lstcon_sesrpc_readent(int transop, struct srpc_msg *msg,
-		      struct lstcon_rpc_ent __user *ent_up)
-{
-	struct srpc_debug_reply *rep;
-
-	switch (transop) {
-	case LST_TRANS_SESNEW:
-	case LST_TRANS_SESEND:
-		return 0;
-
-	case LST_TRANS_SESQRY:
-		rep = &msg->msg_body.dbg_reply;
-
-		if (copy_to_user(&ent_up->rpe_priv[0],
-				 &rep->dbg_timeout, sizeof(int)) ||
-		    copy_to_user(&ent_up->rpe_payload[0],
-				 &rep->dbg_name, LST_NAME_SIZE))
-			return -EFAULT;
-
-		return 0;
-
-	default:
-		LBUG();
-	}
-
-	return 0;
-}
-
-static int
-lstcon_group_nodes_add(struct lstcon_group *grp,
-		       int count, struct lnet_process_id __user *ids_up,
-		       unsigned int *featp,
-		       struct list_head __user *result_up)
-{
-	struct lstcon_rpc_trans *trans;
-	struct lstcon_ndlink	*ndl;
-	struct lstcon_group *tmp;
-	struct lnet_process_id id;
-	int i;
-	int rc;
-
-	rc = lstcon_group_alloc(NULL, &tmp);
-	if (rc) {
-		CERROR("Out of memory\n");
-		return -ENOMEM;
-	}
-
-	for (i = 0 ; i < count; i++) {
-		if (copy_from_user(&id, &ids_up[i], sizeof(id))) {
-			rc = -EFAULT;
-			break;
-		}
-
-		/* skip if it's in this group already */
-		rc = lstcon_group_ndlink_find(grp, id, &ndl, 0);
-		if (!rc)
-			continue;
-
-		/* add to tmp group */
-		rc = lstcon_group_ndlink_find(tmp, id, &ndl, 1);
-		if (rc) {
-			CERROR("Can't create ndlink, out of memory\n");
-			break;
-		}
-	}
-
-	if (rc) {
-		lstcon_group_decref(tmp);
-		return rc;
-	}
-
-	rc = lstcon_rpc_trans_ndlist(&tmp->grp_ndl_list,
-				     &tmp->grp_trans_list, LST_TRANS_SESNEW,
-				     tmp, lstcon_sesrpc_condition, &trans);
-	if (rc) {
-		CERROR("Can't create transaction: %d\n", rc);
-		lstcon_group_decref(tmp);
-		return rc;
-	}
-
-	/* post all RPCs */
-	lstcon_rpc_trans_postwait(trans, LST_TRANS_TIMEOUT);
-
-	rc = lstcon_rpc_trans_interpreter(trans, result_up,
-					  lstcon_sesrpc_readent);
-	*featp = trans->tas_features;
-
-	/* destroy all RPGs */
-	lstcon_rpc_trans_destroy(trans);
-
-	lstcon_group_move(tmp, grp);
-	lstcon_group_decref(tmp);
-
-	return rc;
-}
-
-static int
-lstcon_group_nodes_remove(struct lstcon_group *grp,
-			  int count, struct lnet_process_id __user *ids_up,
-			  struct list_head __user *result_up)
-{
-	struct lstcon_rpc_trans *trans;
-	struct lstcon_ndlink *ndl;
-	struct lstcon_group *tmp;
-	struct lnet_process_id id;
-	int rc;
-	int i;
-
-	/* End session and remove node from the group */
-
-	rc = lstcon_group_alloc(NULL, &tmp);
-	if (rc) {
-		CERROR("Out of memory\n");
-		return -ENOMEM;
-	}
-
-	for (i = 0; i < count; i++) {
-		if (copy_from_user(&id, &ids_up[i], sizeof(id))) {
-			rc = -EFAULT;
-			goto error;
-		}
-
-		/* move node to tmp group */
-		if (!lstcon_group_ndlink_find(grp, id, &ndl, 0))
-			lstcon_group_ndlink_move(grp, tmp, ndl);
-	}
-
-	rc = lstcon_rpc_trans_ndlist(&tmp->grp_ndl_list,
-				     &tmp->grp_trans_list, LST_TRANS_SESEND,
-				     tmp, lstcon_sesrpc_condition, &trans);
-	if (rc) {
-		CERROR("Can't create transaction: %d\n", rc);
-		goto error;
-	}
-
-	lstcon_rpc_trans_postwait(trans, LST_TRANS_TIMEOUT);
-
-	rc = lstcon_rpc_trans_interpreter(trans, result_up, NULL);
-
-	lstcon_rpc_trans_destroy(trans);
-	/* release nodes anyway, because we can't rollback status */
-	lstcon_group_decref(tmp);
-
-	return rc;
-error:
-	lstcon_group_move(tmp, grp);
-	lstcon_group_decref(tmp);
-
-	return rc;
-}
-
-int
-lstcon_group_add(char *name)
-{
-	struct lstcon_group *grp;
-	int rc;
-
-	rc = lstcon_group_find(name, &grp) ? 0 : -EEXIST;
-	if (rc) {
-		/* find a group with same name */
-		lstcon_group_decref(grp);
-		return rc;
-	}
-
-	rc = lstcon_group_alloc(name, &grp);
-	if (rc) {
-		CERROR("Can't allocate descriptor for group %s\n", name);
-		return -ENOMEM;
-	}
-
-	list_add_tail(&grp->grp_link, &console_session.ses_grp_list);
-
-	return rc;
-}
-
-int
-lstcon_nodes_add(char *name, int count, struct lnet_process_id __user *ids_up,
-		 unsigned int *featp, struct list_head __user *result_up)
-{
-	struct lstcon_group *grp;
-	int rc;
-
-	LASSERT(count > 0);
-	LASSERT(ids_up);
-
-	rc = lstcon_group_find(name, &grp);
-	if (rc) {
-		CDEBUG(D_NET, "Can't find group %s\n", name);
-		return rc;
-	}
-
-	if (grp->grp_ref > 2) {
-		/* referred by other threads or test */
-		CDEBUG(D_NET, "Group %s is busy\n", name);
-		lstcon_group_decref(grp);
-
-		return -EBUSY;
-	}
-
-	rc = lstcon_group_nodes_add(grp, count, ids_up, featp, result_up);
-
-	lstcon_group_decref(grp);
-
-	return rc;
-}
-
-int
-lstcon_group_del(char *name)
-{
-	struct lstcon_rpc_trans *trans;
-	struct lstcon_group *grp;
-	int rc;
-
-	rc = lstcon_group_find(name, &grp);
-	if (rc) {
-		CDEBUG(D_NET, "Can't find group: %s\n", name);
-		return rc;
-	}
-
-	if (grp->grp_ref > 2) {
-		/* referred by others threads or test */
-		CDEBUG(D_NET, "Group %s is busy\n", name);
-		lstcon_group_decref(grp);
-		return -EBUSY;
-	}
-
-	rc = lstcon_rpc_trans_ndlist(&grp->grp_ndl_list,
-				     &grp->grp_trans_list, LST_TRANS_SESEND,
-				     grp, lstcon_sesrpc_condition, &trans);
-	if (rc) {
-		CERROR("Can't create transaction: %d\n", rc);
-		lstcon_group_decref(grp);
-		return rc;
-	}
-
-	lstcon_rpc_trans_postwait(trans, LST_TRANS_TIMEOUT);
-
-	lstcon_rpc_trans_destroy(trans);
-
-	lstcon_group_decref(grp);
-	/*
-	 * -ref for session, it's destroyed,
-	 * status can't be rolled back, destroy group anyway
-	 */
-	lstcon_group_decref(grp);
-
-	return rc;
-}
-
-int
-lstcon_group_clean(char *name, int args)
-{
-	struct lstcon_group *grp = NULL;
-	int rc;
-
-	rc = lstcon_group_find(name, &grp);
-	if (rc) {
-		CDEBUG(D_NET, "Can't find group %s\n", name);
-		return rc;
-	}
-
-	if (grp->grp_ref > 2) {
-		/* referred by test */
-		CDEBUG(D_NET, "Group %s is busy\n", name);
-		lstcon_group_decref(grp);
-		return -EBUSY;
-	}
-
-	args = (LST_NODE_ACTIVE | LST_NODE_BUSY |
-		LST_NODE_DOWN | LST_NODE_UNKNOWN) & ~args;
-
-	lstcon_group_drain(grp, args);
-
-	lstcon_group_decref(grp);
-	/* release empty group */
-	if (list_empty(&grp->grp_ndl_list))
-		lstcon_group_decref(grp);
-
-	return 0;
-}
-
-int
-lstcon_nodes_remove(char *name, int count,
-		    struct lnet_process_id __user *ids_up,
-		    struct list_head __user *result_up)
-{
-	struct lstcon_group *grp = NULL;
-	int rc;
-
-	rc = lstcon_group_find(name, &grp);
-	if (rc) {
-		CDEBUG(D_NET, "Can't find group: %s\n", name);
-		return rc;
-	}
-
-	if (grp->grp_ref > 2) {
-		/* referred by test */
-		CDEBUG(D_NET, "Group %s is busy\n", name);
-		lstcon_group_decref(grp);
-		return -EBUSY;
-	}
-
-	rc = lstcon_group_nodes_remove(grp, count, ids_up, result_up);
-
-	lstcon_group_decref(grp);
-	/* release empty group */
-	if (list_empty(&grp->grp_ndl_list))
-		lstcon_group_decref(grp);
-
-	return rc;
-}
-
-int
-lstcon_group_refresh(char *name, struct list_head __user *result_up)
-{
-	struct lstcon_rpc_trans *trans;
-	struct lstcon_group *grp;
-	int rc;
-
-	rc = lstcon_group_find(name, &grp);
-	if (rc) {
-		CDEBUG(D_NET, "Can't find group: %s\n", name);
-		return rc;
-	}
-
-	if (grp->grp_ref > 2) {
-		/* referred by test */
-		CDEBUG(D_NET, "Group %s is busy\n", name);
-		lstcon_group_decref(grp);
-		return -EBUSY;
-	}
-
-	/* re-invite all inactive nodes int the group */
-	rc = lstcon_rpc_trans_ndlist(&grp->grp_ndl_list,
-				     &grp->grp_trans_list, LST_TRANS_SESNEW,
-				     grp, lstcon_sesrpc_condition, &trans);
-	if (rc) {
-		/* local error, return */
-		CDEBUG(D_NET, "Can't create transaction: %d\n", rc);
-		lstcon_group_decref(grp);
-		return rc;
-	}
-
-	lstcon_rpc_trans_postwait(trans, LST_TRANS_TIMEOUT);
-
-	rc = lstcon_rpc_trans_interpreter(trans, result_up, NULL);
-
-	lstcon_rpc_trans_destroy(trans);
-	/* -ref for me */
-	lstcon_group_decref(grp);
-
-	return rc;
-}
-
-int
-lstcon_group_list(int index, int len, char __user *name_up)
-{
-	struct lstcon_group *grp;
-
-	LASSERT(index >= 0);
-	LASSERT(name_up);
-
-	list_for_each_entry(grp, &console_session.ses_grp_list, grp_link) {
-		if (!index--) {
-			return copy_to_user(name_up, grp->grp_name, len) ?
-					    -EFAULT : 0;
-		}
-	}
-
-	return -ENOENT;
-}
-
-static int
-lstcon_nodes_getent(struct list_head *head, int *index_p,
-		    int *count_p, struct lstcon_node_ent __user *dents_up)
-{
-	struct lstcon_ndlink *ndl;
-	struct lstcon_node *nd;
-	int count = 0;
-	int index = 0;
-
-	LASSERT(index_p && count_p);
-	LASSERT(dents_up);
-	LASSERT(*index_p >= 0);
-	LASSERT(*count_p > 0);
-
-	list_for_each_entry(ndl, head, ndl_link) {
-		if (index++ < *index_p)
-			continue;
-
-		if (count >= *count_p)
-			break;
-
-		nd = ndl->ndl_node;
-		if (copy_to_user(&dents_up[count].nde_id,
-				 &nd->nd_id, sizeof(nd->nd_id)) ||
-		    copy_to_user(&dents_up[count].nde_state,
-				 &nd->nd_state, sizeof(nd->nd_state)))
-			return -EFAULT;
-
-		count++;
-	}
-
-	if (index <= *index_p)
-		return -ENOENT;
-
-	*count_p = count;
-	*index_p = index;
-
-	return 0;
-}
-
-int
-lstcon_group_info(char *name, struct lstcon_ndlist_ent __user *gents_p,
-		  int *index_p, int *count_p,
-		  struct lstcon_node_ent __user *dents_up)
-{
-	struct lstcon_ndlist_ent *gentp;
-	struct lstcon_group *grp;
-	struct lstcon_ndlink *ndl;
-	int rc;
-
-	rc = lstcon_group_find(name, &grp);
-	if (rc) {
-		CDEBUG(D_NET, "Can't find group %s\n", name);
-		return rc;
-	}
-
-	if (dents_up) {
-		/* verbose query */
-		rc = lstcon_nodes_getent(&grp->grp_ndl_list,
-					 index_p, count_p, dents_up);
-		lstcon_group_decref(grp);
-
-		return rc;
-	}
-
-	/* non-verbose query */
-	gentp = kzalloc(sizeof(struct lstcon_ndlist_ent), GFP_NOFS);
-	if (!gentp) {
-		CERROR("Can't allocate ndlist_ent\n");
-		lstcon_group_decref(grp);
-
-		return -ENOMEM;
-	}
-
-	list_for_each_entry(ndl, &grp->grp_ndl_list, ndl_link)
-		LST_NODE_STATE_COUNTER(ndl->ndl_node, gentp);
-
-	rc = copy_to_user(gents_p, gentp,
-			  sizeof(struct lstcon_ndlist_ent)) ? -EFAULT : 0;
-
-	kfree(gentp);
-
-	lstcon_group_decref(grp);
-
-	return rc;
-}
-
-static int
-lstcon_batch_find(const char *name, struct lstcon_batch **batpp)
-{
-	struct lstcon_batch *bat;
-
-	list_for_each_entry(bat, &console_session.ses_bat_list, bat_link) {
-		if (!strncmp(bat->bat_name, name, LST_NAME_SIZE)) {
-			*batpp = bat;
-			return 0;
-		}
-	}
-
-	return -ENOENT;
-}
-
-int
-lstcon_batch_add(char *name)
-{
-	struct lstcon_batch *bat;
-	int i;
-	int rc;
-
-	rc = !lstcon_batch_find(name, &bat) ? -EEXIST : 0;
-	if (rc) {
-		CDEBUG(D_NET, "Batch %s already exists\n", name);
-		return rc;
-	}
-
-	bat = kzalloc(sizeof(struct lstcon_batch), GFP_NOFS);
-	if (!bat) {
-		CERROR("Can't allocate descriptor for batch %s\n", name);
-		return -ENOMEM;
-	}
-
-	bat->bat_cli_hash = kmalloc(sizeof(struct list_head) * LST_NODE_HASHSIZE,
-				    GFP_KERNEL);
-	if (!bat->bat_cli_hash) {
-		CERROR("Can't allocate hash for batch %s\n", name);
-		kfree(bat);
-
-		return -ENOMEM;
-	}
-
-	bat->bat_srv_hash = kmalloc(sizeof(struct list_head) * LST_NODE_HASHSIZE,
-				    GFP_KERNEL);
-	if (!bat->bat_srv_hash) {
-		CERROR("Can't allocate hash for batch %s\n", name);
-		kfree(bat->bat_cli_hash);
-		kfree(bat);
-
-		return -ENOMEM;
-	}
-
-	if (strlen(name) > sizeof(bat->bat_name) - 1) {
-		kfree(bat->bat_srv_hash);
-		kfree(bat->bat_cli_hash);
-		kfree(bat);
-		return -E2BIG;
-	}
-	strncpy(bat->bat_name, name, sizeof(bat->bat_name));
-	bat->bat_hdr.tsb_index = 0;
-	bat->bat_hdr.tsb_id.bat_id = ++console_session.ses_id_cookie;
-
-	bat->bat_ntest = 0;
-	bat->bat_state = LST_BATCH_IDLE;
-
-	INIT_LIST_HEAD(&bat->bat_cli_list);
-	INIT_LIST_HEAD(&bat->bat_srv_list);
-	INIT_LIST_HEAD(&bat->bat_test_list);
-	INIT_LIST_HEAD(&bat->bat_trans_list);
-
-	for (i = 0; i < LST_NODE_HASHSIZE; i++) {
-		INIT_LIST_HEAD(&bat->bat_cli_hash[i]);
-		INIT_LIST_HEAD(&bat->bat_srv_hash[i]);
-	}
-
-	list_add_tail(&bat->bat_link, &console_session.ses_bat_list);
-
-	return rc;
-}
-
-int
-lstcon_batch_list(int index, int len, char __user *name_up)
-{
-	struct lstcon_batch *bat;
-
-	LASSERT(name_up);
-	LASSERT(index >= 0);
-
-	list_for_each_entry(bat, &console_session.ses_bat_list, bat_link) {
-		if (!index--) {
-			return copy_to_user(name_up, bat->bat_name, len) ?
-					    -EFAULT : 0;
-		}
-	}
-
-	return -ENOENT;
-}
-
-int
-lstcon_batch_info(char *name, struct lstcon_test_batch_ent __user *ent_up,
-		  int server, int testidx, int *index_p, int *ndent_p,
-		  struct lstcon_node_ent __user *dents_up)
-{
-	struct lstcon_test_batch_ent *entp;
-	struct list_head *clilst;
-	struct list_head *srvlst;
-	struct lstcon_test *test = NULL;
-	struct lstcon_batch *bat;
-	struct lstcon_ndlink	*ndl;
-	int rc;
-
-	rc = lstcon_batch_find(name, &bat);
-	if (rc) {
-		CDEBUG(D_NET, "Can't find batch %s\n", name);
-		return -ENOENT;
-	}
-
-	if (testidx > 0) {
-		/* query test, test index start from 1 */
-		list_for_each_entry(test, &bat->bat_test_list, tes_link) {
-			if (testidx-- == 1)
-				break;
-		}
-
-		if (testidx > 0) {
-			CDEBUG(D_NET, "Can't find specified test in batch\n");
-			return -ENOENT;
-		}
-	}
-
-	clilst = !test ? &bat->bat_cli_list :
-			 &test->tes_src_grp->grp_ndl_list;
-	srvlst = !test ? &bat->bat_srv_list :
-			 &test->tes_dst_grp->grp_ndl_list;
-
-	if (dents_up) {
-		rc = lstcon_nodes_getent((server ? srvlst : clilst),
-					 index_p, ndent_p, dents_up);
-		return rc;
-	}
-
-	/* non-verbose query */
-	entp = kzalloc(sizeof(struct lstcon_test_batch_ent), GFP_NOFS);
-	if (!entp)
-		return -ENOMEM;
-
-	if (!test) {
-		entp->u.tbe_batch.bae_ntest = bat->bat_ntest;
-		entp->u.tbe_batch.bae_state = bat->bat_state;
-	} else {
-		entp->u.tbe_test.tse_type = test->tes_type;
-		entp->u.tbe_test.tse_loop = test->tes_loop;
-		entp->u.tbe_test.tse_concur = test->tes_concur;
-	}
-
-	list_for_each_entry(ndl, clilst, ndl_link)
-		LST_NODE_STATE_COUNTER(ndl->ndl_node, &entp->tbe_cli_nle);
-
-	list_for_each_entry(ndl, srvlst, ndl_link)
-		LST_NODE_STATE_COUNTER(ndl->ndl_node, &entp->tbe_srv_nle);
-
-	rc = copy_to_user(ent_up, entp,
-			  sizeof(struct lstcon_test_batch_ent)) ? -EFAULT : 0;
-
-	kfree(entp);
-
-	return rc;
-}
-
-static int
-lstcon_batrpc_condition(int transop, struct lstcon_node *nd, void *arg)
-{
-	switch (transop) {
-	case LST_TRANS_TSBRUN:
-		if (nd->nd_state != LST_NODE_ACTIVE)
-			return -ENETDOWN;
-		break;
-
-	case LST_TRANS_TSBSTOP:
-		if (nd->nd_state != LST_NODE_ACTIVE)
-			return 0;
-		break;
-
-	case LST_TRANS_TSBCLIQRY:
-	case LST_TRANS_TSBSRVQRY:
-		break;
-	}
-
-	return 1;
-}
-
-static int
-lstcon_batch_op(struct lstcon_batch *bat, int transop,
-		struct list_head __user *result_up)
-{
-	struct lstcon_rpc_trans *trans;
-	int rc;
-
-	rc = lstcon_rpc_trans_ndlist(&bat->bat_cli_list,
-				     &bat->bat_trans_list, transop,
-				     bat, lstcon_batrpc_condition, &trans);
-	if (rc) {
-		CERROR("Can't create transaction: %d\n", rc);
-		return rc;
-	}
-
-	lstcon_rpc_trans_postwait(trans, LST_TRANS_TIMEOUT);
-
-	rc = lstcon_rpc_trans_interpreter(trans, result_up, NULL);
-
-	lstcon_rpc_trans_destroy(trans);
-
-	return rc;
-}
-
-int
-lstcon_batch_run(char *name, int timeout, struct list_head __user *result_up)
-{
-	struct lstcon_batch *bat;
-	int rc;
-
-	if (lstcon_batch_find(name, &bat)) {
-		CDEBUG(D_NET, "Can't find batch %s\n", name);
-		return -ENOENT;
-	}
-
-	bat->bat_arg = timeout;
-
-	rc = lstcon_batch_op(bat, LST_TRANS_TSBRUN, result_up);
-
-	/* mark batch as running if it's started in any node */
-	if (lstcon_tsbop_stat_success(lstcon_trans_stat(), 0))
-		bat->bat_state = LST_BATCH_RUNNING;
-
-	return rc;
-}
-
-int
-lstcon_batch_stop(char *name, int force, struct list_head __user *result_up)
-{
-	struct lstcon_batch *bat;
-	int rc;
-
-	if (lstcon_batch_find(name, &bat)) {
-		CDEBUG(D_NET, "Can't find batch %s\n", name);
-		return -ENOENT;
-	}
-
-	bat->bat_arg = force;
-
-	rc = lstcon_batch_op(bat, LST_TRANS_TSBSTOP, result_up);
-
-	/* mark batch as stopped if all RPCs finished */
-	if (!lstcon_tsbop_stat_failure(lstcon_trans_stat(), 0))
-		bat->bat_state = LST_BATCH_IDLE;
-
-	return rc;
-}
-
-static void
-lstcon_batch_destroy(struct lstcon_batch *bat)
-{
-	struct lstcon_ndlink *ndl;
-	struct lstcon_test *test;
-	int i;
-
-	list_del(&bat->bat_link);
-
-	while (!list_empty(&bat->bat_test_list)) {
-		test = list_entry(bat->bat_test_list.next,
-				  struct lstcon_test, tes_link);
-		LASSERT(list_empty(&test->tes_trans_list));
-
-		list_del(&test->tes_link);
-
-		lstcon_group_decref(test->tes_src_grp);
-		lstcon_group_decref(test->tes_dst_grp);
-
-		kfree(test);
-	}
-
-	LASSERT(list_empty(&bat->bat_trans_list));
-
-	while (!list_empty(&bat->bat_cli_list)) {
-		ndl = list_entry(bat->bat_cli_list.next,
-				 struct lstcon_ndlink, ndl_link);
-		list_del_init(&ndl->ndl_link);
-
-		lstcon_ndlink_release(ndl);
-	}
-
-	while (!list_empty(&bat->bat_srv_list)) {
-		ndl = list_entry(bat->bat_srv_list.next,
-				 struct lstcon_ndlink, ndl_link);
-		list_del_init(&ndl->ndl_link);
-
-		lstcon_ndlink_release(ndl);
-	}
-
-	for (i = 0; i < LST_NODE_HASHSIZE; i++) {
-		LASSERT(list_empty(&bat->bat_cli_hash[i]));
-		LASSERT(list_empty(&bat->bat_srv_hash[i]));
-	}
-
-	kfree(bat->bat_cli_hash);
-	kfree(bat->bat_srv_hash);
-	kfree(bat);
-}
-
-static int
-lstcon_testrpc_condition(int transop, struct lstcon_node *nd, void *arg)
-{
-	struct lstcon_test *test;
-	struct lstcon_batch *batch;
-	struct lstcon_ndlink *ndl;
-	struct list_head *hash;
-	struct list_head *head;
-
-	test = (struct lstcon_test *)arg;
-	LASSERT(test);
-
-	batch = test->tes_batch;
-	LASSERT(batch);
-
-	if (test->tes_oneside &&
-	    transop == LST_TRANS_TSBSRVADD)
-		return 0;
-
-	if (nd->nd_state != LST_NODE_ACTIVE)
-		return -ENETDOWN;
-
-	if (transop == LST_TRANS_TSBCLIADD) {
-		hash = batch->bat_cli_hash;
-		head = &batch->bat_cli_list;
-
-	} else {
-		LASSERT(transop == LST_TRANS_TSBSRVADD);
-
-		hash = batch->bat_srv_hash;
-		head = &batch->bat_srv_list;
-	}
-
-	LASSERT(nd->nd_id.nid != LNET_NID_ANY);
-
-	if (lstcon_ndlink_find(hash, nd->nd_id, &ndl, 1))
-		return -ENOMEM;
-
-	if (list_empty(&ndl->ndl_link))
-		list_add_tail(&ndl->ndl_link, head);
-
-	return 1;
-}
-
-static int
-lstcon_test_nodes_add(struct lstcon_test *test,
-		      struct list_head __user *result_up)
-{
-	struct lstcon_rpc_trans *trans;
-	struct lstcon_group *grp;
-	int transop;
-	int rc;
-
-	LASSERT(test->tes_src_grp);
-	LASSERT(test->tes_dst_grp);
-
-	transop = LST_TRANS_TSBSRVADD;
-	grp = test->tes_dst_grp;
-again:
-	rc = lstcon_rpc_trans_ndlist(&grp->grp_ndl_list,
-				     &test->tes_trans_list, transop,
-				     test, lstcon_testrpc_condition, &trans);
-	if (rc) {
-		CERROR("Can't create transaction: %d\n", rc);
-		return rc;
-	}
-
-	lstcon_rpc_trans_postwait(trans, LST_TRANS_TIMEOUT);
-
-	if (lstcon_trans_stat()->trs_rpc_errno ||
-	    lstcon_trans_stat()->trs_fwk_errno) {
-		lstcon_rpc_trans_interpreter(trans, result_up, NULL);
-
-		lstcon_rpc_trans_destroy(trans);
-		/* return if any error */
-		CDEBUG(D_NET, "Failed to add test %s, RPC error %d, framework error %d\n",
-		       transop == LST_TRANS_TSBCLIADD ? "client" : "server",
-		       lstcon_trans_stat()->trs_rpc_errno,
-		       lstcon_trans_stat()->trs_fwk_errno);
-
-		return rc;
-	}
-
-	lstcon_rpc_trans_destroy(trans);
-
-	if (transop == LST_TRANS_TSBCLIADD)
-		return rc;
-
-	transop = LST_TRANS_TSBCLIADD;
-	grp = test->tes_src_grp;
-	test->tes_cliidx = 0;
-
-	/* requests to test clients */
-	goto again;
-}
-
-static int
-lstcon_verify_batch(const char *name, struct lstcon_batch **batch)
-{
-	int rc;
-
-	rc = lstcon_batch_find(name, batch);
-	if (rc) {
-		CDEBUG(D_NET, "Can't find batch %s\n", name);
-		return rc;
-	}
-
-	if ((*batch)->bat_state != LST_BATCH_IDLE) {
-		CDEBUG(D_NET, "Can't change running batch %s\n", name);
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-static int
-lstcon_verify_group(const char *name, struct lstcon_group **grp)
-{
-	int rc;
-	struct lstcon_ndlink	*ndl;
-
-	rc = lstcon_group_find(name, grp);
-	if (rc) {
-		CDEBUG(D_NET, "can't find group %s\n", name);
-		return rc;
-	}
-
-	list_for_each_entry(ndl, &(*grp)->grp_ndl_list, ndl_link) {
-		if (ndl->ndl_node->nd_state == LST_NODE_ACTIVE)
-			return 0;
-	}
-
-	CDEBUG(D_NET, "Group %s has no ACTIVE nodes\n", name);
-
-	return -EINVAL;
-}
-
-int
-lstcon_test_add(char *batch_name, int type, int loop,
-		int concur, int dist, int span,
-		char *src_name, char *dst_name,
-		void *param, int paramlen, int *retp,
-		struct list_head __user *result_up)
-{
-	struct lstcon_test *test = NULL;
-	int rc;
-	struct lstcon_group *src_grp = NULL;
-	struct lstcon_group *dst_grp = NULL;
-	struct lstcon_batch *batch = NULL;
-
-	/*
-	 * verify that a batch of the given name exists, and the groups
-	 * that will be part of the batch exist and have at least one
-	 * active node
-	 */
-	rc = lstcon_verify_batch(batch_name, &batch);
-	if (rc)
-		goto out;
-
-	rc = lstcon_verify_group(src_name, &src_grp);
-	if (rc)
-		goto out;
-
-	rc = lstcon_verify_group(dst_name, &dst_grp);
-	if (rc)
-		goto out;
-
-	if (dst_grp->grp_userland)
-		*retp = 1;
-
-	test = kzalloc(offsetof(struct lstcon_test, tes_param[paramlen]),
-		       GFP_KERNEL);
-	if (!test) {
-		CERROR("Can't allocate test descriptor\n");
-		rc = -ENOMEM;
-
-		goto out;
-	}
-
-	test->tes_hdr.tsb_id = batch->bat_hdr.tsb_id;
-	test->tes_batch	= batch;
-	test->tes_type = type;
-	test->tes_oneside = 0; /* TODO */
-	test->tes_loop = loop;
-	test->tes_concur = concur;
-	test->tes_stop_onerr = 1; /* TODO */
-	test->tes_span = span;
-	test->tes_dist = dist;
-	test->tes_cliidx = 0; /* just used for creating RPC */
-	test->tes_src_grp = src_grp;
-	test->tes_dst_grp = dst_grp;
-	INIT_LIST_HEAD(&test->tes_trans_list);
-
-	if (param) {
-		test->tes_paramlen = paramlen;
-		memcpy(&test->tes_param[0], param, paramlen);
-	}
-
-	rc = lstcon_test_nodes_add(test, result_up);
-
-	if (rc)
-		goto out;
-
-	if (lstcon_trans_stat()->trs_rpc_errno ||
-	    lstcon_trans_stat()->trs_fwk_errno)
-		CDEBUG(D_NET, "Failed to add test %d to batch %s\n", type,
-		       batch_name);
-
-	/* add to test list anyway, so user can check what's going on */
-	list_add_tail(&test->tes_link, &batch->bat_test_list);
-
-	batch->bat_ntest++;
-	test->tes_hdr.tsb_index = batch->bat_ntest;
-
-	/*  hold groups so nobody can change them */
-	return rc;
-out:
-	kfree(test);
-
-	if (dst_grp)
-		lstcon_group_decref(dst_grp);
-
-	if (src_grp)
-		lstcon_group_decref(src_grp);
-
-	return rc;
-}
-
-static int
-lstcon_test_find(struct lstcon_batch *batch, int idx,
-		 struct lstcon_test **testpp)
-{
-	struct lstcon_test *test;
-
-	list_for_each_entry(test, &batch->bat_test_list, tes_link) {
-		if (idx == test->tes_hdr.tsb_index) {
-			*testpp = test;
-			return 0;
-		}
-	}
-
-	return -ENOENT;
-}
-
-static int
-lstcon_tsbrpc_readent(int transop, struct srpc_msg *msg,
-		      struct lstcon_rpc_ent __user *ent_up)
-{
-	struct srpc_batch_reply *rep = &msg->msg_body.bat_reply;
-
-	LASSERT(transop == LST_TRANS_TSBCLIQRY ||
-		transop == LST_TRANS_TSBSRVQRY);
-
-	/* positive errno, framework error code */
-	if (copy_to_user(&ent_up->rpe_priv[0], &rep->bar_active,
-			 sizeof(rep->bar_active)))
-		return -EFAULT;
-
-	return 0;
-}
-
-int
-lstcon_test_batch_query(char *name, int testidx, int client,
-			int timeout, struct list_head __user *result_up)
-{
-	struct lstcon_rpc_trans *trans;
-	struct list_head *translist;
-	struct list_head *ndlist;
-	struct lstcon_tsb_hdr *hdr;
-	struct lstcon_batch *batch;
-	struct lstcon_test *test = NULL;
-	int transop;
-	int rc;
-
-	rc = lstcon_batch_find(name, &batch);
-	if (rc) {
-		CDEBUG(D_NET, "Can't find batch: %s\n", name);
-		return rc;
-	}
-
-	if (!testidx) {
-		translist = &batch->bat_trans_list;
-		ndlist = &batch->bat_cli_list;
-		hdr = &batch->bat_hdr;
-	} else {
-		/* query specified test only */
-		rc = lstcon_test_find(batch, testidx, &test);
-		if (rc) {
-			CDEBUG(D_NET, "Can't find test: %d\n", testidx);
-			return rc;
-		}
-
-		translist = &test->tes_trans_list;
-		ndlist = &test->tes_src_grp->grp_ndl_list;
-		hdr = &test->tes_hdr;
-	}
-
-	transop = client ? LST_TRANS_TSBCLIQRY : LST_TRANS_TSBSRVQRY;
-
-	rc = lstcon_rpc_trans_ndlist(ndlist, translist, transop, hdr,
-				     lstcon_batrpc_condition, &trans);
-	if (rc) {
-		CERROR("Can't create transaction: %d\n", rc);
-		return rc;
-	}
-
-	lstcon_rpc_trans_postwait(trans, timeout);
-
-	/* query a batch, not a test */
-	if (!testidx &&
-	    !lstcon_rpc_stat_failure(lstcon_trans_stat(), 0) &&
-	    !lstcon_tsbqry_stat_run(lstcon_trans_stat(), 0)) {
-		/* all RPCs finished, and no active test */
-		batch->bat_state = LST_BATCH_IDLE;
-	}
-
-	rc = lstcon_rpc_trans_interpreter(trans, result_up,
-					  lstcon_tsbrpc_readent);
-	lstcon_rpc_trans_destroy(trans);
-
-	return rc;
-}
-
-static int
-lstcon_statrpc_readent(int transop, struct srpc_msg *msg,
-		       struct lstcon_rpc_ent __user *ent_up)
-{
-	struct srpc_stat_reply *rep = &msg->msg_body.stat_reply;
-	struct sfw_counters __user *sfwk_stat;
-	struct srpc_counters __user *srpc_stat;
-	struct lnet_counters __user *lnet_stat;
-
-	if (rep->str_status)
-		return 0;
-
-	sfwk_stat = (struct sfw_counters __user *)&ent_up->rpe_payload[0];
-	srpc_stat = (struct srpc_counters __user *)(sfwk_stat + 1);
-	lnet_stat = (struct lnet_counters __user *)(srpc_stat + 1);
-
-	if (copy_to_user(sfwk_stat, &rep->str_fw, sizeof(*sfwk_stat)) ||
-	    copy_to_user(srpc_stat, &rep->str_rpc, sizeof(*srpc_stat)) ||
-	    copy_to_user(lnet_stat, &rep->str_lnet, sizeof(*lnet_stat)))
-		return -EFAULT;
-
-	return 0;
-}
-
-static int
-lstcon_ndlist_stat(struct list_head *ndlist,
-		   int timeout, struct list_head __user *result_up)
-{
-	struct list_head head;
-	struct lstcon_rpc_trans *trans;
-	int rc;
-
-	INIT_LIST_HEAD(&head);
-
-	rc = lstcon_rpc_trans_ndlist(ndlist, &head,
-				     LST_TRANS_STATQRY, NULL, NULL, &trans);
-	if (rc) {
-		CERROR("Can't create transaction: %d\n", rc);
-		return rc;
-	}
-
-	lstcon_rpc_trans_postwait(trans, LST_VALIDATE_TIMEOUT(timeout));
-
-	rc = lstcon_rpc_trans_interpreter(trans, result_up,
-					  lstcon_statrpc_readent);
-	lstcon_rpc_trans_destroy(trans);
-
-	return rc;
-}
-
-int
-lstcon_group_stat(char *grp_name, int timeout,
-		  struct list_head __user *result_up)
-{
-	struct lstcon_group *grp;
-	int rc;
-
-	rc = lstcon_group_find(grp_name, &grp);
-	if (rc) {
-		CDEBUG(D_NET, "Can't find group %s\n", grp_name);
-		return rc;
-	}
-
-	rc = lstcon_ndlist_stat(&grp->grp_ndl_list, timeout, result_up);
-
-	lstcon_group_decref(grp);
-
-	return rc;
-}
-
-int
-lstcon_nodes_stat(int count, struct lnet_process_id __user *ids_up,
-		  int timeout, struct list_head __user *result_up)
-{
-	struct lstcon_ndlink	*ndl;
-	struct lstcon_group *tmp;
-	struct lnet_process_id id;
-	int i;
-	int rc;
-
-	rc = lstcon_group_alloc(NULL, &tmp);
-	if (rc) {
-		CERROR("Out of memory\n");
-		return -ENOMEM;
-	}
-
-	for (i = 0 ; i < count; i++) {
-		if (copy_from_user(&id, &ids_up[i], sizeof(id))) {
-			rc = -EFAULT;
-			break;
-		}
-
-		/* add to tmp group */
-		rc = lstcon_group_ndlink_find(tmp, id, &ndl, 2);
-		if (rc) {
-			CDEBUG((rc == -ENOMEM) ? D_ERROR : D_NET,
-			       "Failed to find or create %s: %d\n",
-			       libcfs_id2str(id), rc);
-			break;
-		}
-	}
-
-	if (rc) {
-		lstcon_group_decref(tmp);
-		return rc;
-	}
-
-	rc = lstcon_ndlist_stat(&tmp->grp_ndl_list, timeout, result_up);
-
-	lstcon_group_decref(tmp);
-
-	return rc;
-}
-
-static int
-lstcon_debug_ndlist(struct list_head *ndlist,
-		    struct list_head *translist,
-		    int timeout, struct list_head __user *result_up)
-{
-	struct lstcon_rpc_trans *trans;
-	int rc;
-
-	rc = lstcon_rpc_trans_ndlist(ndlist, translist, LST_TRANS_SESQRY,
-				     NULL, lstcon_sesrpc_condition, &trans);
-	if (rc) {
-		CERROR("Can't create transaction: %d\n", rc);
-		return rc;
-	}
-
-	lstcon_rpc_trans_postwait(trans, LST_VALIDATE_TIMEOUT(timeout));
-
-	rc = lstcon_rpc_trans_interpreter(trans, result_up,
-					  lstcon_sesrpc_readent);
-	lstcon_rpc_trans_destroy(trans);
-
-	return rc;
-}
-
-int
-lstcon_session_debug(int timeout, struct list_head __user *result_up)
-{
-	return lstcon_debug_ndlist(&console_session.ses_ndl_list,
-				   NULL, timeout, result_up);
-}
-
-int
-lstcon_batch_debug(int timeout, char *name,
-		   int client, struct list_head __user *result_up)
-{
-	struct lstcon_batch *bat;
-	int rc;
-
-	rc = lstcon_batch_find(name, &bat);
-	if (rc)
-		return -ENOENT;
-
-	rc = lstcon_debug_ndlist(client ? &bat->bat_cli_list :
-					  &bat->bat_srv_list,
-				 NULL, timeout, result_up);
-
-	return rc;
-}
-
-int
-lstcon_group_debug(int timeout, char *name,
-		   struct list_head __user *result_up)
-{
-	struct lstcon_group *grp;
-	int rc;
-
-	rc = lstcon_group_find(name, &grp);
-	if (rc)
-		return -ENOENT;
-
-	rc = lstcon_debug_ndlist(&grp->grp_ndl_list, NULL,
-				 timeout, result_up);
-	lstcon_group_decref(grp);
-
-	return rc;
-}
-
-int
-lstcon_nodes_debug(int timeout, int count,
-		   struct lnet_process_id __user *ids_up,
-		   struct list_head __user *result_up)
-{
-	struct lnet_process_id id;
-	struct lstcon_ndlink *ndl;
-	struct lstcon_group *grp;
-	int i;
-	int rc;
-
-	rc = lstcon_group_alloc(NULL, &grp);
-	if (rc) {
-		CDEBUG(D_NET, "Out of memory\n");
-		return rc;
-	}
-
-	for (i = 0; i < count; i++) {
-		if (copy_from_user(&id, &ids_up[i], sizeof(id))) {
-			rc = -EFAULT;
-			break;
-		}
-
-		/* node is added to tmp group */
-		rc = lstcon_group_ndlink_find(grp, id, &ndl, 1);
-		if (rc) {
-			CERROR("Can't create node link\n");
-			break;
-		}
-	}
-
-	if (rc) {
-		lstcon_group_decref(grp);
-		return rc;
-	}
-
-	rc = lstcon_debug_ndlist(&grp->grp_ndl_list, NULL,
-				 timeout, result_up);
-
-	lstcon_group_decref(grp);
-
-	return rc;
-}
-
-int
-lstcon_session_match(struct lst_sid sid)
-{
-	return (console_session.ses_id.ses_nid == sid.ses_nid &&
-		console_session.ses_id.ses_stamp == sid.ses_stamp) ? 1 : 0;
-}
-
-static void
-lstcon_new_session_id(struct lst_sid *sid)
-{
-	struct lnet_process_id id;
-
-	LASSERT(console_session.ses_state == LST_SESSION_NONE);
-
-	LNetGetId(1, &id);
-	sid->ses_nid = id.nid;
-	sid->ses_stamp = jiffies;
-}
-
-int
-lstcon_session_new(char *name, int key, unsigned int feats,
-		   int timeout, int force, struct lst_sid __user *sid_up)
-{
-	int rc = 0;
-	int i;
-
-	if (console_session.ses_state != LST_SESSION_NONE) {
-		/* session exists */
-		if (!force) {
-			CNETERR("Session %s already exists\n",
-				console_session.ses_name);
-			return -EEXIST;
-		}
-
-		rc = lstcon_session_end();
-
-		/* lstcon_session_end() only return local error */
-		if (rc)
-			return rc;
-	}
-
-	if (feats & ~LST_FEATS_MASK) {
-		CNETERR("Unknown session features %x\n",
-			(feats & ~LST_FEATS_MASK));
-		return -EINVAL;
-	}
-
-	for (i = 0; i < LST_GLOBAL_HASHSIZE; i++)
-		LASSERT(list_empty(&console_session.ses_ndl_hash[i]));
-
-	lstcon_new_session_id(&console_session.ses_id);
-
-	console_session.ses_key = key;
-	console_session.ses_state = LST_SESSION_ACTIVE;
-	console_session.ses_force = !!force;
-	console_session.ses_features = feats;
-	console_session.ses_feats_updated = 0;
-	console_session.ses_timeout = (timeout <= 0) ?
-				      LST_CONSOLE_TIMEOUT : timeout;
-
-	if (strlen(name) > sizeof(console_session.ses_name) - 1)
-		return -E2BIG;
-	strlcpy(console_session.ses_name, name,
-		sizeof(console_session.ses_name));
-
-	rc = lstcon_batch_add(LST_DEFAULT_BATCH);
-	if (rc)
-		return rc;
-
-	rc = lstcon_rpc_pinger_start();
-	if (rc) {
-		struct lstcon_batch *bat = NULL;
-
-		lstcon_batch_find(LST_DEFAULT_BATCH, &bat);
-		lstcon_batch_destroy(bat);
-
-		return rc;
-	}
-
-	if (!copy_to_user(sid_up, &console_session.ses_id,
-			  sizeof(struct lst_sid)))
-		return rc;
-
-	lstcon_session_end();
-
-	return -EFAULT;
-}
-
-int
-lstcon_session_info(struct lst_sid __user *sid_up, int __user *key_up,
-		    unsigned __user *featp,
-		    struct lstcon_ndlist_ent __user *ndinfo_up,
-		    char __user *name_up, int len)
-{
-	struct lstcon_ndlist_ent *entp;
-	struct lstcon_ndlink *ndl;
-	int rc = 0;
-
-	if (console_session.ses_state != LST_SESSION_ACTIVE)
-		return -ESRCH;
-
-	entp = kzalloc(sizeof(*entp), GFP_NOFS);
-	if (!entp)
-		return -ENOMEM;
-
-	list_for_each_entry(ndl, &console_session.ses_ndl_list, ndl_link)
-		LST_NODE_STATE_COUNTER(ndl->ndl_node, entp);
-
-	if (copy_to_user(sid_up, &console_session.ses_id,
-			 sizeof(*sid_up)) ||
-	    copy_to_user(key_up, &console_session.ses_key,
-			 sizeof(*key_up)) ||
-	    copy_to_user(featp, &console_session.ses_features,
-			 sizeof(*featp)) ||
-	    copy_to_user(ndinfo_up, entp, sizeof(*entp)) ||
-	    copy_to_user(name_up, console_session.ses_name, len))
-		rc = -EFAULT;
-
-	kfree(entp);
-
-	return rc;
-}
-
-int
-lstcon_session_end(void)
-{
-	struct lstcon_rpc_trans *trans;
-	struct lstcon_group *grp;
-	struct lstcon_batch *bat;
-	int rc = 0;
-
-	LASSERT(console_session.ses_state == LST_SESSION_ACTIVE);
-
-	rc = lstcon_rpc_trans_ndlist(&console_session.ses_ndl_list,
-				     NULL, LST_TRANS_SESEND, NULL,
-				     lstcon_sesrpc_condition, &trans);
-	if (rc) {
-		CERROR("Can't create transaction: %d\n", rc);
-		return rc;
-	}
-
-	console_session.ses_shutdown = 1;
-
-	lstcon_rpc_pinger_stop();
-
-	lstcon_rpc_trans_postwait(trans, LST_TRANS_TIMEOUT);
-
-	lstcon_rpc_trans_destroy(trans);
-	/* User can do nothing even rpc failed, so go on */
-
-	/* waiting for orphan rpcs to die */
-	lstcon_rpc_cleanup_wait();
-
-	console_session.ses_id = LST_INVALID_SID;
-	console_session.ses_state = LST_SESSION_NONE;
-	console_session.ses_key = 0;
-	console_session.ses_force = 0;
-	console_session.ses_feats_updated = 0;
-
-	/* destroy all batches */
-	while (!list_empty(&console_session.ses_bat_list)) {
-		bat = list_entry(console_session.ses_bat_list.next,
-				 struct lstcon_batch, bat_link);
-
-		lstcon_batch_destroy(bat);
-	}
-
-	/* destroy all groups */
-	while (!list_empty(&console_session.ses_grp_list)) {
-		grp = list_entry(console_session.ses_grp_list.next,
-				 struct lstcon_group, grp_link);
-		LASSERT(grp->grp_ref == 1);
-
-		lstcon_group_decref(grp);
-	}
-
-	/* all nodes should be released */
-	LASSERT(list_empty(&console_session.ses_ndl_list));
-
-	console_session.ses_shutdown = 0;
-	console_session.ses_expired = 0;
-
-	return rc;
-}
-
-int
-lstcon_session_feats_check(unsigned int feats)
-{
-	int rc = 0;
-
-	if (feats & ~LST_FEATS_MASK) {
-		CERROR("Can't support these features: %x\n",
-		       (feats & ~LST_FEATS_MASK));
-		return -EPROTO;
-	}
-
-	spin_lock(&console_session.ses_rpc_lock);
-
-	if (!console_session.ses_feats_updated) {
-		console_session.ses_feats_updated = 1;
-		console_session.ses_features = feats;
-	}
-
-	if (console_session.ses_features != feats)
-		rc = -EPROTO;
-
-	spin_unlock(&console_session.ses_rpc_lock);
-
-	if (rc) {
-		CERROR("remote features %x do not match with session features %x of console\n",
-		       feats, console_session.ses_features);
-	}
-
-	return rc;
-}
-
-static int
-lstcon_acceptor_handle(struct srpc_server_rpc *rpc)
-{
-	struct srpc_msg *rep	= &rpc->srpc_replymsg;
-	struct srpc_msg *req	= &rpc->srpc_reqstbuf->buf_msg;
-	struct srpc_join_reqst *jreq = &req->msg_body.join_reqst;
-	struct srpc_join_reply *jrep = &rep->msg_body.join_reply;
-	struct lstcon_group *grp = NULL;
-	struct lstcon_ndlink *ndl;
-	int rc = 0;
-
-	sfw_unpack_message(req);
-
-	mutex_lock(&console_session.ses_mutex);
-
-	jrep->join_sid = console_session.ses_id;
-
-	if (console_session.ses_id.ses_nid == LNET_NID_ANY) {
-		jrep->join_status = ESRCH;
-		goto out;
-	}
-
-	if (lstcon_session_feats_check(req->msg_ses_feats)) {
-		jrep->join_status = EPROTO;
-		goto out;
-	}
-
-	if (jreq->join_sid.ses_nid != LNET_NID_ANY &&
-	    !lstcon_session_match(jreq->join_sid)) {
-		jrep->join_status = EBUSY;
-		goto out;
-	}
-
-	if (lstcon_group_find(jreq->join_group, &grp)) {
-		rc = lstcon_group_alloc(jreq->join_group, &grp);
-		if (rc) {
-			CERROR("Out of memory\n");
-			goto out;
-		}
-
-		list_add_tail(&grp->grp_link,
-			      &console_session.ses_grp_list);
-		lstcon_group_addref(grp);
-	}
-
-	if (grp->grp_ref > 2) {
-		/* Group in using */
-		jrep->join_status = EBUSY;
-		goto out;
-	}
-
-	rc = lstcon_group_ndlink_find(grp, rpc->srpc_peer, &ndl, 0);
-	if (!rc) {
-		jrep->join_status = EEXIST;
-		goto out;
-	}
-
-	rc = lstcon_group_ndlink_find(grp, rpc->srpc_peer, &ndl, 1);
-	if (rc) {
-		CERROR("Out of memory\n");
-		goto out;
-	}
-
-	ndl->ndl_node->nd_state = LST_NODE_ACTIVE;
-	ndl->ndl_node->nd_timeout = console_session.ses_timeout;
-
-	if (!grp->grp_userland)
-		grp->grp_userland = 1;
-
-	strlcpy(jrep->join_session, console_session.ses_name,
-		sizeof(jrep->join_session));
-	jrep->join_timeout = console_session.ses_timeout;
-	jrep->join_status = 0;
-
-out:
-	rep->msg_ses_feats = console_session.ses_features;
-	if (grp)
-		lstcon_group_decref(grp);
-
-	mutex_unlock(&console_session.ses_mutex);
-
-	return rc;
-}
-
-static struct srpc_service lstcon_acceptor_service;
-
-static void lstcon_init_acceptor_service(void)
-{
-	/* initialize selftest console acceptor service table */
-	lstcon_acceptor_service.sv_name = "join session";
-	lstcon_acceptor_service.sv_handler = lstcon_acceptor_handle;
-	lstcon_acceptor_service.sv_id = SRPC_SERVICE_JOIN;
-	lstcon_acceptor_service.sv_wi_total = SFW_FRWK_WI_MAX;
-}
-
-static struct notifier_block lstcon_ioctl_handler = {
-	.notifier_call = lstcon_ioctl_entry,
-};
-
-/* initialize console */
-int
-lstcon_console_init(void)
-{
-	int i;
-	int rc;
-
-	memset(&console_session, 0, sizeof(struct lstcon_session));
-
-	console_session.ses_id = LST_INVALID_SID;
-	console_session.ses_state = LST_SESSION_NONE;
-	console_session.ses_timeout = 0;
-	console_session.ses_force = 0;
-	console_session.ses_expired = 0;
-	console_session.ses_feats_updated = 0;
-	console_session.ses_features = LST_FEATS_MASK;
-	console_session.ses_laststamp = ktime_get_real_seconds();
-
-	mutex_init(&console_session.ses_mutex);
-
-	INIT_LIST_HEAD(&console_session.ses_ndl_list);
-	INIT_LIST_HEAD(&console_session.ses_grp_list);
-	INIT_LIST_HEAD(&console_session.ses_bat_list);
-	INIT_LIST_HEAD(&console_session.ses_trans_list);
-
-	console_session.ses_ndl_hash =
-		kmalloc(sizeof(struct list_head) * LST_GLOBAL_HASHSIZE, GFP_KERNEL);
-	if (!console_session.ses_ndl_hash)
-		return -ENOMEM;
-
-	for (i = 0; i < LST_GLOBAL_HASHSIZE; i++)
-		INIT_LIST_HEAD(&console_session.ses_ndl_hash[i]);
-
-	/* initialize acceptor service table */
-	lstcon_init_acceptor_service();
-
-	rc = srpc_add_service(&lstcon_acceptor_service);
-	LASSERT(rc != -EBUSY);
-	if (rc) {
-		kfree(console_session.ses_ndl_hash);
-		return rc;
-	}
-
-	rc = srpc_service_add_buffers(&lstcon_acceptor_service,
-				      lstcon_acceptor_service.sv_wi_total);
-	if (rc) {
-		rc = -ENOMEM;
-		goto out;
-	}
-
-	rc = blocking_notifier_chain_register(&libcfs_ioctl_list,
-					      &lstcon_ioctl_handler);
-
-	if (!rc) {
-		lstcon_rpc_module_init();
-		return 0;
-	}
-
-out:
-	srpc_shutdown_service(&lstcon_acceptor_service);
-	srpc_remove_service(&lstcon_acceptor_service);
-
-	kfree(console_session.ses_ndl_hash);
-
-	srpc_wait_service_shutdown(&lstcon_acceptor_service);
-
-	return rc;
-}
-
-int
-lstcon_console_fini(void)
-{
-	int i;
-
-	blocking_notifier_chain_unregister(&libcfs_ioctl_list,
-					   &lstcon_ioctl_handler);
-
-	mutex_lock(&console_session.ses_mutex);
-
-	srpc_shutdown_service(&lstcon_acceptor_service);
-	srpc_remove_service(&lstcon_acceptor_service);
-
-	if (console_session.ses_state != LST_SESSION_NONE)
-		lstcon_session_end();
-
-	lstcon_rpc_module_fini();
-
-	mutex_unlock(&console_session.ses_mutex);
-
-	LASSERT(list_empty(&console_session.ses_ndl_list));
-	LASSERT(list_empty(&console_session.ses_grp_list));
-	LASSERT(list_empty(&console_session.ses_bat_list));
-	LASSERT(list_empty(&console_session.ses_trans_list));
-
-	for (i = 0; i < LST_NODE_HASHSIZE; i++)
-		LASSERT(list_empty(&console_session.ses_ndl_hash[i]));
-
-	kfree(console_session.ses_ndl_hash);
-
-	srpc_wait_service_shutdown(&lstcon_acceptor_service);
-
-	return 0;
-}

+ 0 - 244
drivers/staging/lustre/lnet/selftest/console.h

@@ -1,244 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/selftest/console.h
- *
- * kernel structure for LST console
- *
- * Author: Liang Zhen <liangzhen@clusterfs.com>
- */
-
-#ifndef __LST_CONSOLE_H__
-#define __LST_CONSOLE_H__
-
-#include <linux/lnet/lib-types.h>
-#include <uapi/linux/lnet/lnetst.h>
-#include "selftest.h"
-#include "conrpc.h"
-
-/* node descriptor */
-struct lstcon_node {
-	struct lnet_process_id	nd_id;	/* id of the node */
-	int		  nd_ref;     /* reference count */
-	int		  nd_state;   /* state of the node */
-	int		  nd_timeout; /* session timeout */
-	unsigned long	  nd_stamp;   /* timestamp of last replied RPC */
-	struct lstcon_rpc nd_ping;    /* ping rpc */
-};
-
-/* node link descriptor */
-struct lstcon_ndlink {
-	struct list_head ndl_link;    /* chain on list */
-	struct list_head ndl_hlink;   /* chain on hash */
-	struct lstcon_node	*ndl_node;	/* pointer to node */
-};
-
-/* (alias of nodes) group descriptor */
-struct lstcon_group {
-	struct list_head grp_link;		  /* chain on global group list
-						   */
-	int		 grp_ref;		  /* reference count */
-	int		 grp_userland;		  /* has userland nodes */
-	int		 grp_nnode;		  /* # of nodes */
-	char		 grp_name[LST_NAME_SIZE]; /* group name */
-
-	struct list_head grp_trans_list;	  /* transaction list */
-	struct list_head grp_ndl_list;		  /* nodes list */
-	struct list_head grp_ndl_hash[0];	  /* hash table for nodes */
-};
-
-#define LST_BATCH_IDLE	  0xB0	    /* idle batch */
-#define LST_BATCH_RUNNING 0xB1	    /* running batch */
-
-struct lstcon_tsb_hdr {
-	struct lst_bid	 tsb_id;	 /* batch ID */
-	int		 tsb_index;	 /* test index */
-};
-
-/* (tests ) batch descriptor */
-struct lstcon_batch {
-	struct lstcon_tsb_hdr	bat_hdr;	/* test_batch header */
-	struct list_head bat_link;	  /* chain on session's batches list */
-	int		 bat_ntest;	  /* # of test */
-	int		 bat_state;	  /* state of the batch */
-	int		 bat_arg;	  /* parameter for run|stop, timeout
-					   * for run, force for stop
-					   */
-	char		 bat_name[LST_NAME_SIZE];/* name of batch */
-
-	struct list_head bat_test_list;   /* list head of tests (struct lstcon_test)
-					   */
-	struct list_head bat_trans_list;  /* list head of transaction */
-	struct list_head bat_cli_list;	  /* list head of client nodes
-					   * (struct lstcon_node)
-					   */
-	struct list_head *bat_cli_hash;   /* hash table of client nodes */
-	struct list_head bat_srv_list;	  /* list head of server nodes */
-	struct list_head *bat_srv_hash;   /* hash table of server nodes */
-};
-
-/* a single test descriptor */
-struct lstcon_test {
-	struct lstcon_tsb_hdr	tes_hdr;	/* test batch header */
-	struct list_head tes_link;	 /* chain on batch's tests list */
-	struct lstcon_batch	*tes_batch;	 /* pointer to batch */
-
-	int		 tes_type;	 /* type of the test, i.e: bulk, ping */
-	int		 tes_stop_onerr; /* stop on error */
-	int		 tes_oneside;	 /* one-sided test */
-	int		 tes_concur;	 /* concurrency */
-	int		 tes_loop;	 /* loop count */
-	int		 tes_dist;	 /* nodes distribution of target group */
-	int		 tes_span;	 /* nodes span of target group */
-	int		 tes_cliidx;	 /* client index, used for RPC creating */
-
-	struct list_head tes_trans_list; /* transaction list */
-	struct lstcon_group	*tes_src_grp;	/* group run the test */
-	struct lstcon_group	*tes_dst_grp;	/* target group */
-
-	int		 tes_paramlen;	 /* test parameter length */
-	char		 tes_param[0];	 /* test parameter */
-};
-
-#define LST_GLOBAL_HASHSIZE 503	     /* global nodes hash table size */
-#define LST_NODE_HASHSIZE   239	     /* node hash table (for batch or group) */
-
-#define LST_SESSION_NONE    0x0	     /* no session */
-#define LST_SESSION_ACTIVE  0x1	     /* working session */
-
-#define LST_CONSOLE_TIMEOUT 300	     /* default console timeout */
-
-struct lstcon_session {
-	struct mutex	    ses_mutex;	      /* only 1 thread in session */
-	struct lst_sid	    ses_id;	      /* global session id */
-	int		    ses_key;	      /* local session key */
-	int		    ses_state;	      /* state of session */
-	int		    ses_timeout;      /* timeout in seconds */
-	time64_t	    ses_laststamp;    /* last operation stamp (seconds)
-					       */
-	unsigned int	    ses_features;     /* tests features of the session
-					       */
-	unsigned int	    ses_feats_updated:1; /* features are synced with
-						  * remote test nodes
-						  */
-	unsigned int	    ses_force:1;      /* force creating */
-	unsigned int	    ses_shutdown:1;   /* session is shutting down */
-	unsigned int	    ses_expired:1;    /* console is timedout */
-	__u64		    ses_id_cookie;    /* batch id cookie */
-	char		    ses_name[LST_NAME_SIZE];/* session name */
-	struct lstcon_rpc_trans	*ses_ping;		/* session pinger */
-	struct stt_timer	 ses_ping_timer;   /* timer for pinger */
-	struct lstcon_trans_stat ses_trans_stat;   /* transaction stats */
-
-	struct list_head    ses_trans_list;   /* global list of transaction */
-	struct list_head    ses_grp_list;     /* global list of groups */
-	struct list_head    ses_bat_list;     /* global list of batches */
-	struct list_head    ses_ndl_list;     /* global list of nodes */
-	struct list_head    *ses_ndl_hash;    /* hash table of nodes */
-
-	spinlock_t	    ses_rpc_lock;     /* serialize */
-	atomic_t	    ses_rpc_counter;  /* # of initialized RPCs */
-	struct list_head    ses_rpc_freelist; /* idle console rpc */
-}; /* session descriptor */
-
-extern struct lstcon_session	 console_session;
-
-static inline struct lstcon_trans_stat *
-lstcon_trans_stat(void)
-{
-	return &console_session.ses_trans_stat;
-}
-
-static inline struct list_head *
-lstcon_id2hash(struct lnet_process_id id, struct list_head *hash)
-{
-	unsigned int idx = LNET_NIDADDR(id.nid) % LST_NODE_HASHSIZE;
-
-	return &hash[idx];
-}
-
-int lstcon_ioctl_entry(struct notifier_block *nb,
-		       unsigned long cmd, void *vdata);
-int lstcon_console_init(void);
-int lstcon_console_fini(void);
-int lstcon_session_match(struct lst_sid sid);
-int lstcon_session_new(char *name, int key, unsigned int version,
-		       int timeout, int flags, struct lst_sid __user *sid_up);
-int lstcon_session_info(struct lst_sid __user *sid_up, int __user *key,
-			unsigned __user *verp, struct lstcon_ndlist_ent __user *entp,
-			char __user *name_up, int len);
-int lstcon_session_end(void);
-int lstcon_session_debug(int timeout, struct list_head __user *result_up);
-int lstcon_session_feats_check(unsigned int feats);
-int lstcon_batch_debug(int timeout, char *name,
-		       int client, struct list_head __user *result_up);
-int lstcon_group_debug(int timeout, char *name,
-		       struct list_head __user *result_up);
-int lstcon_nodes_debug(int timeout, int nnd,
-		       struct lnet_process_id __user *nds_up,
-		       struct list_head __user *result_up);
-int lstcon_group_add(char *name);
-int lstcon_group_del(char *name);
-int lstcon_group_clean(char *name, int args);
-int lstcon_group_refresh(char *name, struct list_head __user *result_up);
-int lstcon_nodes_add(char *name, int nnd, struct lnet_process_id __user *nds_up,
-		     unsigned int *featp, struct list_head __user *result_up);
-int lstcon_nodes_remove(char *name, int nnd,
-			struct lnet_process_id __user *nds_up,
-			struct list_head __user *result_up);
-int lstcon_group_info(char *name, struct lstcon_ndlist_ent __user *gent_up,
-		      int *index_p, int *ndent_p,
-		      struct lstcon_node_ent __user *ndents_up);
-int lstcon_group_list(int idx, int len, char __user *name_up);
-int lstcon_batch_add(char *name);
-int lstcon_batch_run(char *name, int timeout,
-		     struct list_head __user *result_up);
-int lstcon_batch_stop(char *name, int force,
-		      struct list_head __user *result_up);
-int lstcon_test_batch_query(char *name, int testidx,
-			    int client, int timeout,
-			    struct list_head __user *result_up);
-int lstcon_batch_del(char *name);
-int lstcon_batch_list(int idx, int namelen, char __user *name_up);
-int lstcon_batch_info(char *name, struct lstcon_test_batch_ent __user *ent_up,
-		      int server, int testidx, int *index_p,
-		      int *ndent_p, struct lstcon_node_ent __user *dents_up);
-int lstcon_group_stat(char *grp_name, int timeout,
-		      struct list_head __user *result_up);
-int lstcon_nodes_stat(int count, struct lnet_process_id __user *ids_up,
-		      int timeout, struct list_head __user *result_up);
-int lstcon_test_add(char *batch_name, int type, int loop,
-		    int concur, int dist, int span,
-		    char *src_name, char *dst_name,
-		    void *param, int paramlen, int *retp,
-		    struct list_head __user *result_up);
-#endif

+ 0 - 1786
drivers/staging/lustre/lnet/selftest/framework.c

@@ -1,1786 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/selftest/framework.c
- *
- * Author: Isaac Huang <isaac@clusterfs.com>
- * Author: Liang Zhen  <liangzhen@clusterfs.com>
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include "selftest.h"
-
-struct lst_sid LST_INVALID_SID = {LNET_NID_ANY, -1};
-
-static int session_timeout = 100;
-module_param(session_timeout, int, 0444);
-MODULE_PARM_DESC(session_timeout, "test session timeout in seconds (100 by default, 0 == never)");
-
-static int rpc_timeout = 64;
-module_param(rpc_timeout, int, 0644);
-MODULE_PARM_DESC(rpc_timeout, "rpc timeout in seconds (64 by default, 0 == never)");
-
-#define sfw_unpack_id(id)		\
-do {					\
-	__swab64s(&(id).nid);		\
-	__swab32s(&(id).pid);		\
-} while (0)
-
-#define sfw_unpack_sid(sid)		\
-do {					\
-	__swab64s(&(sid).ses_nid);	\
-	__swab64s(&(sid).ses_stamp);	\
-} while (0)
-
-#define sfw_unpack_fw_counters(fc)	  \
-do {					  \
-	__swab32s(&(fc).running_ms);	  \
-	__swab32s(&(fc).active_batches);  \
-	__swab32s(&(fc).zombie_sessions); \
-	__swab32s(&(fc).brw_errors);	  \
-	__swab32s(&(fc).ping_errors);	  \
-} while (0)
-
-#define sfw_unpack_rpc_counters(rc)	\
-do {					\
-	__swab32s(&(rc).errors);	\
-	__swab32s(&(rc).rpcs_sent);	\
-	__swab32s(&(rc).rpcs_rcvd);	\
-	__swab32s(&(rc).rpcs_dropped);	\
-	__swab32s(&(rc).rpcs_expired);	\
-	__swab64s(&(rc).bulk_get);	\
-	__swab64s(&(rc).bulk_put);	\
-} while (0)
-
-#define sfw_unpack_lnet_counters(lc)	\
-do {					\
-	__swab32s(&(lc).errors);	\
-	__swab32s(&(lc).msgs_max);	\
-	__swab32s(&(lc).msgs_alloc);	\
-	__swab32s(&(lc).send_count);	\
-	__swab32s(&(lc).recv_count);	\
-	__swab32s(&(lc).drop_count);	\
-	__swab32s(&(lc).route_count);	\
-	__swab64s(&(lc).send_length);	\
-	__swab64s(&(lc).recv_length);	\
-	__swab64s(&(lc).drop_length);	\
-	__swab64s(&(lc).route_length);	\
-} while (0)
-
-#define sfw_test_active(t)	(atomic_read(&(t)->tsi_nactive))
-#define sfw_batch_active(b)	(atomic_read(&(b)->bat_nactive))
-
-static struct smoketest_framework {
-	struct list_head  fw_zombie_rpcs;     /* RPCs to be recycled */
-	struct list_head  fw_zombie_sessions; /* stopping sessions */
-	struct list_head  fw_tests;	      /* registered test cases */
-	atomic_t	  fw_nzombies;	      /* # zombie sessions */
-	spinlock_t	  fw_lock;	      /* serialise */
-	struct sfw_session	  *fw_session;	      /* _the_ session */
-	int		  fw_shuttingdown;    /* shutdown in progress */
-	struct srpc_server_rpc *fw_active_srpc;/* running RPC */
-} sfw_data;
-
-/* forward ref's */
-int sfw_stop_batch(struct sfw_batch *tsb, int force);
-void sfw_destroy_session(struct sfw_session *sn);
-
-static inline struct sfw_test_case *
-sfw_find_test_case(int id)
-{
-	struct sfw_test_case *tsc;
-
-	LASSERT(id <= SRPC_SERVICE_MAX_ID);
-	LASSERT(id > SRPC_FRAMEWORK_SERVICE_MAX_ID);
-
-	list_for_each_entry(tsc, &sfw_data.fw_tests, tsc_list) {
-		if (tsc->tsc_srv_service->sv_id == id)
-			return tsc;
-	}
-
-	return NULL;
-}
-
-static int
-sfw_register_test(struct srpc_service *service,
-		  struct sfw_test_client_ops *cliops)
-{
-	struct sfw_test_case *tsc;
-
-	if (sfw_find_test_case(service->sv_id)) {
-		CERROR("Failed to register test %s (%d)\n",
-		       service->sv_name, service->sv_id);
-		return -EEXIST;
-	}
-
-	tsc = kzalloc(sizeof(struct sfw_test_case), GFP_NOFS);
-	if (!tsc)
-		return -ENOMEM;
-
-	tsc->tsc_cli_ops = cliops;
-	tsc->tsc_srv_service = service;
-
-	list_add_tail(&tsc->tsc_list, &sfw_data.fw_tests);
-	return 0;
-}
-
-static void
-sfw_add_session_timer(void)
-{
-	struct sfw_session *sn = sfw_data.fw_session;
-	struct stt_timer *timer = &sn->sn_timer;
-
-	LASSERT(!sfw_data.fw_shuttingdown);
-
-	if (!sn || !sn->sn_timeout)
-		return;
-
-	LASSERT(!sn->sn_timer_active);
-
-	sn->sn_timer_active = 1;
-	timer->stt_expires = ktime_get_real_seconds() + sn->sn_timeout;
-	stt_add_timer(timer);
-}
-
-static int
-sfw_del_session_timer(void)
-{
-	struct sfw_session *sn = sfw_data.fw_session;
-
-	if (!sn || !sn->sn_timer_active)
-		return 0;
-
-	LASSERT(sn->sn_timeout);
-
-	if (stt_del_timer(&sn->sn_timer)) { /* timer defused */
-		sn->sn_timer_active = 0;
-		return 0;
-	}
-
-	return -EBUSY; /* racing with sfw_session_expired() */
-}
-
-static void
-sfw_deactivate_session(void)
-__must_hold(&sfw_data.fw_lock)
-{
-	struct sfw_session *sn = sfw_data.fw_session;
-	int nactive = 0;
-	struct sfw_batch *tsb;
-	struct sfw_test_case *tsc;
-
-	if (!sn)
-		return;
-
-	LASSERT(!sn->sn_timer_active);
-
-	sfw_data.fw_session = NULL;
-	atomic_inc(&sfw_data.fw_nzombies);
-	list_add(&sn->sn_list, &sfw_data.fw_zombie_sessions);
-
-	spin_unlock(&sfw_data.fw_lock);
-
-	list_for_each_entry(tsc, &sfw_data.fw_tests, tsc_list) {
-		srpc_abort_service(tsc->tsc_srv_service);
-	}
-
-	spin_lock(&sfw_data.fw_lock);
-
-	list_for_each_entry(tsb, &sn->sn_batches, bat_list) {
-		if (sfw_batch_active(tsb)) {
-			nactive++;
-			sfw_stop_batch(tsb, 1);
-		}
-	}
-
-	if (nactive)
-		return;	/* wait for active batches to stop */
-
-	list_del_init(&sn->sn_list);
-	spin_unlock(&sfw_data.fw_lock);
-
-	sfw_destroy_session(sn);
-
-	spin_lock(&sfw_data.fw_lock);
-}
-
-static void
-sfw_session_expired(void *data)
-{
-	struct sfw_session *sn = data;
-
-	spin_lock(&sfw_data.fw_lock);
-
-	LASSERT(sn->sn_timer_active);
-	LASSERT(sn == sfw_data.fw_session);
-
-	CWARN("Session expired! sid: %s-%llu, name: %s\n",
-	      libcfs_nid2str(sn->sn_id.ses_nid),
-	      sn->sn_id.ses_stamp, &sn->sn_name[0]);
-
-	sn->sn_timer_active = 0;
-	sfw_deactivate_session();
-
-	spin_unlock(&sfw_data.fw_lock);
-}
-
-static inline void
-sfw_init_session(struct sfw_session *sn, struct lst_sid sid,
-		 unsigned int features, const char *name)
-{
-	struct stt_timer *timer = &sn->sn_timer;
-
-	memset(sn, 0, sizeof(struct sfw_session));
-	INIT_LIST_HEAD(&sn->sn_list);
-	INIT_LIST_HEAD(&sn->sn_batches);
-	atomic_set(&sn->sn_refcount, 1);	/* +1 for caller */
-	atomic_set(&sn->sn_brw_errors, 0);
-	atomic_set(&sn->sn_ping_errors, 0);
-	strlcpy(&sn->sn_name[0], name, sizeof(sn->sn_name));
-
-	sn->sn_timer_active = 0;
-	sn->sn_id = sid;
-	sn->sn_features = features;
-	sn->sn_timeout = session_timeout;
-	sn->sn_started = jiffies;
-
-	timer->stt_data = sn;
-	timer->stt_func = sfw_session_expired;
-	INIT_LIST_HEAD(&timer->stt_list);
-}
-
-/* completion handler for incoming framework RPCs */
-static void
-sfw_server_rpc_done(struct srpc_server_rpc *rpc)
-{
-	struct srpc_service *sv	= rpc->srpc_scd->scd_svc;
-	int status = rpc->srpc_status;
-
-	CDEBUG(D_NET, "Incoming framework RPC done: service %s, peer %s, status %s:%d\n",
-	       sv->sv_name, libcfs_id2str(rpc->srpc_peer),
-	       swi_state2str(rpc->srpc_wi.swi_state),
-	       status);
-
-	if (rpc->srpc_bulk)
-		sfw_free_pages(rpc);
-}
-
-static void
-sfw_client_rpc_fini(struct srpc_client_rpc *rpc)
-{
-	LASSERT(!rpc->crpc_bulk.bk_niov);
-	LASSERT(list_empty(&rpc->crpc_list));
-	LASSERT(!atomic_read(&rpc->crpc_refcount));
-
-	CDEBUG(D_NET, "Outgoing framework RPC done: service %d, peer %s, status %s:%d:%d\n",
-	       rpc->crpc_service, libcfs_id2str(rpc->crpc_dest),
-	       swi_state2str(rpc->crpc_wi.swi_state),
-	       rpc->crpc_aborted, rpc->crpc_status);
-
-	spin_lock(&sfw_data.fw_lock);
-
-	/* my callers must finish all RPCs before shutting me down */
-	LASSERT(!sfw_data.fw_shuttingdown);
-	list_add(&rpc->crpc_list, &sfw_data.fw_zombie_rpcs);
-
-	spin_unlock(&sfw_data.fw_lock);
-}
-
-static struct sfw_batch *
-sfw_find_batch(struct lst_bid bid)
-{
-	struct sfw_session *sn = sfw_data.fw_session;
-	struct sfw_batch *bat;
-
-	LASSERT(sn);
-
-	list_for_each_entry(bat, &sn->sn_batches, bat_list) {
-		if (bat->bat_id.bat_id == bid.bat_id)
-			return bat;
-	}
-
-	return NULL;
-}
-
-static struct sfw_batch *
-sfw_bid2batch(struct lst_bid bid)
-{
-	struct sfw_session *sn = sfw_data.fw_session;
-	struct sfw_batch *bat;
-
-	LASSERT(sn);
-
-	bat = sfw_find_batch(bid);
-	if (bat)
-		return bat;
-
-	bat = kzalloc(sizeof(struct sfw_batch), GFP_NOFS);
-	if (!bat)
-		return NULL;
-
-	bat->bat_error = 0;
-	bat->bat_session = sn;
-	bat->bat_id = bid;
-	atomic_set(&bat->bat_nactive, 0);
-	INIT_LIST_HEAD(&bat->bat_tests);
-
-	list_add_tail(&bat->bat_list, &sn->sn_batches);
-	return bat;
-}
-
-static int
-sfw_get_stats(struct srpc_stat_reqst *request, struct srpc_stat_reply *reply)
-{
-	struct sfw_session *sn = sfw_data.fw_session;
-	struct sfw_counters *cnt = &reply->str_fw;
-	struct sfw_batch *bat;
-
-	reply->str_sid = !sn ? LST_INVALID_SID : sn->sn_id;
-
-	if (request->str_sid.ses_nid == LNET_NID_ANY) {
-		reply->str_status = EINVAL;
-		return 0;
-	}
-
-	if (!sn || !sfw_sid_equal(request->str_sid, sn->sn_id)) {
-		reply->str_status = ESRCH;
-		return 0;
-	}
-
-	lnet_counters_get(&reply->str_lnet);
-	srpc_get_counters(&reply->str_rpc);
-
-	/*
-	 * send over the msecs since the session was started
-	 * with 32 bits to send, this is ~49 days
-	 */
-	cnt->running_ms = jiffies_to_msecs(jiffies - sn->sn_started);
-	cnt->brw_errors = atomic_read(&sn->sn_brw_errors);
-	cnt->ping_errors = atomic_read(&sn->sn_ping_errors);
-	cnt->zombie_sessions = atomic_read(&sfw_data.fw_nzombies);
-
-	cnt->active_batches = 0;
-	list_for_each_entry(bat, &sn->sn_batches, bat_list) {
-		if (atomic_read(&bat->bat_nactive) > 0)
-			cnt->active_batches++;
-	}
-
-	reply->str_status = 0;
-	return 0;
-}
-
-int
-sfw_make_session(struct srpc_mksn_reqst *request, struct srpc_mksn_reply *reply)
-{
-	struct sfw_session *sn = sfw_data.fw_session;
-	struct srpc_msg *msg = container_of(request, struct srpc_msg,
-				       msg_body.mksn_reqst);
-	int cplen = 0;
-
-	if (request->mksn_sid.ses_nid == LNET_NID_ANY) {
-		reply->mksn_sid = !sn ? LST_INVALID_SID : sn->sn_id;
-		reply->mksn_status = EINVAL;
-		return 0;
-	}
-
-	if (sn) {
-		reply->mksn_status = 0;
-		reply->mksn_sid = sn->sn_id;
-		reply->mksn_timeout = sn->sn_timeout;
-
-		if (sfw_sid_equal(request->mksn_sid, sn->sn_id)) {
-			atomic_inc(&sn->sn_refcount);
-			return 0;
-		}
-
-		if (!request->mksn_force) {
-			reply->mksn_status = EBUSY;
-			cplen = strlcpy(&reply->mksn_name[0], &sn->sn_name[0],
-					sizeof(reply->mksn_name));
-			if (cplen >= sizeof(reply->mksn_name))
-				return -E2BIG;
-			return 0;
-		}
-	}
-
-	/*
-	 * reject the request if it requires unknown features
-	 * NB: old version will always accept all features because it's not
-	 * aware of srpc_msg::msg_ses_feats, it's a defect but it's also
-	 * harmless because it will return zero feature to console, and it's
-	 * console's responsibility to make sure all nodes in a session have
-	 * same feature mask.
-	 */
-	if (msg->msg_ses_feats & ~LST_FEATS_MASK) {
-		reply->mksn_status = EPROTO;
-		return 0;
-	}
-
-	/* brand new or create by force */
-	sn = kzalloc(sizeof(struct sfw_session), GFP_NOFS);
-	if (!sn) {
-		CERROR("dropping RPC mksn under memory pressure\n");
-		return -ENOMEM;
-	}
-
-	sfw_init_session(sn, request->mksn_sid,
-			 msg->msg_ses_feats, &request->mksn_name[0]);
-
-	spin_lock(&sfw_data.fw_lock);
-
-	sfw_deactivate_session();
-	LASSERT(!sfw_data.fw_session);
-	sfw_data.fw_session = sn;
-
-	spin_unlock(&sfw_data.fw_lock);
-
-	reply->mksn_status = 0;
-	reply->mksn_sid = sn->sn_id;
-	reply->mksn_timeout = sn->sn_timeout;
-	return 0;
-}
-
-static int
-sfw_remove_session(struct srpc_rmsn_reqst *request,
-		   struct srpc_rmsn_reply *reply)
-{
-	struct sfw_session *sn = sfw_data.fw_session;
-
-	reply->rmsn_sid = !sn ? LST_INVALID_SID : sn->sn_id;
-
-	if (request->rmsn_sid.ses_nid == LNET_NID_ANY) {
-		reply->rmsn_status = EINVAL;
-		return 0;
-	}
-
-	if (!sn || !sfw_sid_equal(request->rmsn_sid, sn->sn_id)) {
-		reply->rmsn_status = !sn ? ESRCH : EBUSY;
-		return 0;
-	}
-
-	if (!atomic_dec_and_test(&sn->sn_refcount)) {
-		reply->rmsn_status = 0;
-		return 0;
-	}
-
-	spin_lock(&sfw_data.fw_lock);
-	sfw_deactivate_session();
-	spin_unlock(&sfw_data.fw_lock);
-
-	reply->rmsn_status = 0;
-	reply->rmsn_sid = LST_INVALID_SID;
-	LASSERT(!sfw_data.fw_session);
-	return 0;
-}
-
-static int
-sfw_debug_session(struct srpc_debug_reqst *request,
-		  struct srpc_debug_reply *reply)
-{
-	struct sfw_session *sn = sfw_data.fw_session;
-
-	if (!sn) {
-		reply->dbg_status = ESRCH;
-		reply->dbg_sid = LST_INVALID_SID;
-		return 0;
-	}
-
-	reply->dbg_status = 0;
-	reply->dbg_sid = sn->sn_id;
-	reply->dbg_timeout = sn->sn_timeout;
-	if (strlcpy(reply->dbg_name, &sn->sn_name[0], sizeof(reply->dbg_name))
-	    >= sizeof(reply->dbg_name))
-		return -E2BIG;
-
-	return 0;
-}
-
-static void
-sfw_test_rpc_fini(struct srpc_client_rpc *rpc)
-{
-	struct sfw_test_unit *tsu = rpc->crpc_priv;
-	struct sfw_test_instance *tsi = tsu->tsu_instance;
-
-	/* Called with hold of tsi->tsi_lock */
-	LASSERT(list_empty(&rpc->crpc_list));
-	list_add(&rpc->crpc_list, &tsi->tsi_free_rpcs);
-}
-
-static inline int
-sfw_test_buffers(struct sfw_test_instance *tsi)
-{
-	struct sfw_test_case *tsc;
-	struct srpc_service *svc;
-	int nbuf;
-
-	LASSERT(tsi);
-	tsc = sfw_find_test_case(tsi->tsi_service);
-	LASSERT(tsc);
-	svc = tsc->tsc_srv_service;
-	LASSERT(svc);
-
-	nbuf = min(svc->sv_wi_total, tsi->tsi_loop) / svc->sv_ncpts;
-	return max(SFW_TEST_WI_MIN, nbuf + SFW_TEST_WI_EXTRA);
-}
-
-static int
-sfw_load_test(struct sfw_test_instance *tsi)
-{
-	struct sfw_test_case *tsc;
-	struct srpc_service *svc;
-	int nbuf;
-	int rc;
-
-	LASSERT(tsi);
-	tsc = sfw_find_test_case(tsi->tsi_service);
-	nbuf = sfw_test_buffers(tsi);
-	LASSERT(tsc);
-	svc = tsc->tsc_srv_service;
-
-	if (tsi->tsi_is_client) {
-		tsi->tsi_ops = tsc->tsc_cli_ops;
-		return 0;
-	}
-
-	rc = srpc_service_add_buffers(svc, nbuf);
-	if (rc) {
-		CWARN("Failed to reserve enough buffers: service %s, %d needed: %d\n",
-		      svc->sv_name, nbuf, rc);
-		/*
-		 * NB: this error handler is not strictly correct, because
-		 * it may release more buffers than already allocated,
-		 * but it doesn't matter because request portal should
-		 * be lazy portal and will grow buffers if necessary.
-		 */
-		srpc_service_remove_buffers(svc, nbuf);
-		return -ENOMEM;
-	}
-
-	CDEBUG(D_NET, "Reserved %d buffers for test %s\n",
-	       nbuf * (srpc_serv_is_framework(svc) ?
-		       2 : cfs_cpt_number(cfs_cpt_tab)), svc->sv_name);
-	return 0;
-}
-
-static void
-sfw_unload_test(struct sfw_test_instance *tsi)
-{
-	struct sfw_test_case *tsc;
-
-	LASSERT(tsi);
-	tsc = sfw_find_test_case(tsi->tsi_service);
-	LASSERT(tsc);
-
-	if (tsi->tsi_is_client)
-		return;
-
-	/*
-	 * shrink buffers, because request portal is lazy portal
-	 * which can grow buffers at runtime so we may leave
-	 * some buffers behind, but never mind...
-	 */
-	srpc_service_remove_buffers(tsc->tsc_srv_service,
-				    sfw_test_buffers(tsi));
-}
-
-static void
-sfw_destroy_test_instance(struct sfw_test_instance *tsi)
-{
-	struct srpc_client_rpc *rpc;
-	struct sfw_test_unit *tsu;
-
-	if (!tsi->tsi_is_client)
-		goto clean;
-
-	tsi->tsi_ops->tso_fini(tsi);
-
-	LASSERT(!tsi->tsi_stopping);
-	LASSERT(list_empty(&tsi->tsi_active_rpcs));
-	LASSERT(!sfw_test_active(tsi));
-
-	while (!list_empty(&tsi->tsi_units)) {
-		tsu = list_entry(tsi->tsi_units.next,
-				 struct sfw_test_unit, tsu_list);
-		list_del(&tsu->tsu_list);
-		kfree(tsu);
-	}
-
-	while (!list_empty(&tsi->tsi_free_rpcs)) {
-		rpc = list_entry(tsi->tsi_free_rpcs.next,
-				 struct srpc_client_rpc, crpc_list);
-		list_del(&rpc->crpc_list);
-		kfree(rpc);
-	}
-
-clean:
-	sfw_unload_test(tsi);
-	kfree(tsi);
-}
-
-static void
-sfw_destroy_batch(struct sfw_batch *tsb)
-{
-	struct sfw_test_instance *tsi;
-
-	LASSERT(!sfw_batch_active(tsb));
-	LASSERT(list_empty(&tsb->bat_list));
-
-	while (!list_empty(&tsb->bat_tests)) {
-		tsi = list_entry(tsb->bat_tests.next,
-				 struct sfw_test_instance, tsi_list);
-		list_del_init(&tsi->tsi_list);
-		sfw_destroy_test_instance(tsi);
-	}
-
-	kfree(tsb);
-}
-
-void
-sfw_destroy_session(struct sfw_session *sn)
-{
-	struct sfw_batch *batch;
-
-	LASSERT(list_empty(&sn->sn_list));
-	LASSERT(sn != sfw_data.fw_session);
-
-	while (!list_empty(&sn->sn_batches)) {
-		batch = list_entry(sn->sn_batches.next,
-				   struct sfw_batch, bat_list);
-		list_del_init(&batch->bat_list);
-		sfw_destroy_batch(batch);
-	}
-
-	kfree(sn);
-	atomic_dec(&sfw_data.fw_nzombies);
-}
-
-static void
-sfw_unpack_addtest_req(struct srpc_msg *msg)
-{
-	struct srpc_test_reqst *req = &msg->msg_body.tes_reqst;
-
-	LASSERT(msg->msg_type == SRPC_MSG_TEST_REQST);
-	LASSERT(req->tsr_is_client);
-
-	if (msg->msg_magic == SRPC_MSG_MAGIC)
-		return;	/* no flipping needed */
-
-	LASSERT(msg->msg_magic == __swab32(SRPC_MSG_MAGIC));
-
-	if (req->tsr_service == SRPC_SERVICE_BRW) {
-		if (!(msg->msg_ses_feats & LST_FEAT_BULK_LEN)) {
-			struct test_bulk_req *bulk = &req->tsr_u.bulk_v0;
-
-			__swab32s(&bulk->blk_opc);
-			__swab32s(&bulk->blk_npg);
-			__swab32s(&bulk->blk_flags);
-
-		} else {
-			struct test_bulk_req_v1 *bulk = &req->tsr_u.bulk_v1;
-
-			__swab16s(&bulk->blk_opc);
-			__swab16s(&bulk->blk_flags);
-			__swab32s(&bulk->blk_offset);
-			__swab32s(&bulk->blk_len);
-		}
-
-		return;
-	}
-
-	if (req->tsr_service == SRPC_SERVICE_PING) {
-		struct test_ping_req *ping = &req->tsr_u.ping;
-
-		__swab32s(&ping->png_size);
-		__swab32s(&ping->png_flags);
-		return;
-	}
-
-	LBUG();
-}
-
-static int
-sfw_add_test_instance(struct sfw_batch *tsb, struct srpc_server_rpc *rpc)
-{
-	struct srpc_msg *msg = &rpc->srpc_reqstbuf->buf_msg;
-	struct srpc_test_reqst *req = &msg->msg_body.tes_reqst;
-	struct srpc_bulk *bk = rpc->srpc_bulk;
-	int ndest = req->tsr_ndest;
-	struct sfw_test_unit *tsu;
-	struct sfw_test_instance *tsi;
-	int i;
-	int rc;
-
-	tsi = kzalloc(sizeof(*tsi), GFP_NOFS);
-	if (!tsi) {
-		CERROR("Can't allocate test instance for batch: %llu\n",
-		       tsb->bat_id.bat_id);
-		return -ENOMEM;
-	}
-
-	spin_lock_init(&tsi->tsi_lock);
-	atomic_set(&tsi->tsi_nactive, 0);
-	INIT_LIST_HEAD(&tsi->tsi_units);
-	INIT_LIST_HEAD(&tsi->tsi_free_rpcs);
-	INIT_LIST_HEAD(&tsi->tsi_active_rpcs);
-
-	tsi->tsi_stopping = 0;
-	tsi->tsi_batch = tsb;
-	tsi->tsi_loop = req->tsr_loop;
-	tsi->tsi_concur = req->tsr_concur;
-	tsi->tsi_service = req->tsr_service;
-	tsi->tsi_is_client = !!(req->tsr_is_client);
-	tsi->tsi_stoptsu_onerr = !!(req->tsr_stop_onerr);
-
-	rc = sfw_load_test(tsi);
-	if (rc) {
-		kfree(tsi);
-		return rc;
-	}
-
-	LASSERT(!sfw_batch_active(tsb));
-
-	if (!tsi->tsi_is_client) {
-		/* it's test server, just add it to tsb */
-		list_add_tail(&tsi->tsi_list, &tsb->bat_tests);
-		return 0;
-	}
-
-	LASSERT(bk);
-	LASSERT(bk->bk_niov * SFW_ID_PER_PAGE >= (unsigned int)ndest);
-	LASSERT((unsigned int)bk->bk_len >=
-		sizeof(struct lnet_process_id_packed) * ndest);
-
-	sfw_unpack_addtest_req(msg);
-	memcpy(&tsi->tsi_u, &req->tsr_u, sizeof(tsi->tsi_u));
-
-	for (i = 0; i < ndest; i++) {
-		struct lnet_process_id_packed *dests;
-		struct lnet_process_id_packed id;
-		int j;
-
-		dests = page_address(bk->bk_iovs[i / SFW_ID_PER_PAGE].bv_page);
-		LASSERT(dests);  /* my pages are within KVM always */
-		id = dests[i % SFW_ID_PER_PAGE];
-		if (msg->msg_magic != SRPC_MSG_MAGIC)
-			sfw_unpack_id(id);
-
-		for (j = 0; j < tsi->tsi_concur; j++) {
-			tsu = kzalloc(sizeof(struct sfw_test_unit), GFP_NOFS);
-			if (!tsu) {
-				rc = -ENOMEM;
-				CERROR("Can't allocate tsu for %d\n",
-				       tsi->tsi_service);
-				goto error;
-			}
-
-			tsu->tsu_dest.nid = id.nid;
-			tsu->tsu_dest.pid = id.pid;
-			tsu->tsu_instance = tsi;
-			tsu->tsu_private = NULL;
-			list_add_tail(&tsu->tsu_list, &tsi->tsi_units);
-		}
-	}
-
-	rc = tsi->tsi_ops->tso_init(tsi);
-	if (!rc) {
-		list_add_tail(&tsi->tsi_list, &tsb->bat_tests);
-		return 0;
-	}
-
-error:
-	LASSERT(rc);
-	sfw_destroy_test_instance(tsi);
-	return rc;
-}
-
-static void
-sfw_test_unit_done(struct sfw_test_unit *tsu)
-{
-	struct sfw_test_instance *tsi = tsu->tsu_instance;
-	struct sfw_batch *tsb = tsi->tsi_batch;
-	struct sfw_session *sn = tsb->bat_session;
-
-	LASSERT(sfw_test_active(tsi));
-
-	if (!atomic_dec_and_test(&tsi->tsi_nactive))
-		return;
-
-	/* the test instance is done */
-	spin_lock(&tsi->tsi_lock);
-
-	tsi->tsi_stopping = 0;
-
-	spin_unlock(&tsi->tsi_lock);
-
-	spin_lock(&sfw_data.fw_lock);
-
-	if (!atomic_dec_and_test(&tsb->bat_nactive) ||	/* tsb still active */
-	    sn == sfw_data.fw_session) {		/* sn also active */
-		spin_unlock(&sfw_data.fw_lock);
-		return;
-	}
-
-	LASSERT(!list_empty(&sn->sn_list)); /* I'm a zombie! */
-
-	list_for_each_entry(tsb, &sn->sn_batches, bat_list) {
-		if (sfw_batch_active(tsb)) {
-			spin_unlock(&sfw_data.fw_lock);
-			return;
-		}
-	}
-
-	list_del_init(&sn->sn_list);
-	spin_unlock(&sfw_data.fw_lock);
-
-	sfw_destroy_session(sn);
-}
-
-static void
-sfw_test_rpc_done(struct srpc_client_rpc *rpc)
-{
-	struct sfw_test_unit *tsu = rpc->crpc_priv;
-	struct sfw_test_instance *tsi = tsu->tsu_instance;
-	int done = 0;
-
-	tsi->tsi_ops->tso_done_rpc(tsu, rpc);
-
-	spin_lock(&tsi->tsi_lock);
-
-	LASSERT(sfw_test_active(tsi));
-	LASSERT(!list_empty(&rpc->crpc_list));
-
-	list_del_init(&rpc->crpc_list);
-
-	/* batch is stopping or loop is done or get error */
-	if (tsi->tsi_stopping || !tsu->tsu_loop ||
-	    (rpc->crpc_status && tsi->tsi_stoptsu_onerr))
-		done = 1;
-
-	/* dec ref for poster */
-	srpc_client_rpc_decref(rpc);
-
-	spin_unlock(&tsi->tsi_lock);
-
-	if (!done) {
-		swi_schedule_workitem(&tsu->tsu_worker);
-		return;
-	}
-
-	sfw_test_unit_done(tsu);
-}
-
-int
-sfw_create_test_rpc(struct sfw_test_unit *tsu, struct lnet_process_id peer,
-		    unsigned int features, int nblk, int blklen,
-		    struct srpc_client_rpc **rpcpp)
-{
-	struct srpc_client_rpc *rpc = NULL;
-	struct sfw_test_instance *tsi = tsu->tsu_instance;
-
-	spin_lock(&tsi->tsi_lock);
-
-	LASSERT(sfw_test_active(tsi));
-		/* pick request from buffer */
-	rpc = list_first_entry_or_null(&tsi->tsi_free_rpcs,
-				       struct srpc_client_rpc, crpc_list);
-	if (rpc) {
-		LASSERT(nblk == rpc->crpc_bulk.bk_niov);
-		list_del_init(&rpc->crpc_list);
-	}
-
-	spin_unlock(&tsi->tsi_lock);
-
-	if (!rpc) {
-		rpc = srpc_create_client_rpc(peer, tsi->tsi_service, nblk,
-					     blklen, sfw_test_rpc_done,
-					     sfw_test_rpc_fini, tsu);
-	} else {
-		srpc_init_client_rpc(rpc, peer, tsi->tsi_service, nblk,
-				     blklen, sfw_test_rpc_done,
-				     sfw_test_rpc_fini, tsu);
-	}
-
-	if (!rpc) {
-		CERROR("Can't create rpc for test %d\n", tsi->tsi_service);
-		return -ENOMEM;
-	}
-
-	rpc->crpc_reqstmsg.msg_ses_feats = features;
-	*rpcpp = rpc;
-
-	return 0;
-}
-
-static void
-sfw_run_test(struct swi_workitem *wi)
-{
-	struct sfw_test_unit *tsu = container_of(wi, struct sfw_test_unit, tsu_worker);
-	struct sfw_test_instance *tsi = tsu->tsu_instance;
-	struct srpc_client_rpc *rpc = NULL;
-
-	if (tsi->tsi_ops->tso_prep_rpc(tsu, tsu->tsu_dest, &rpc)) {
-		LASSERT(!rpc);
-		goto test_done;
-	}
-
-	LASSERT(rpc);
-
-	spin_lock(&tsi->tsi_lock);
-
-	if (tsi->tsi_stopping) {
-		list_add(&rpc->crpc_list, &tsi->tsi_free_rpcs);
-		spin_unlock(&tsi->tsi_lock);
-		goto test_done;
-	}
-
-	if (tsu->tsu_loop > 0)
-		tsu->tsu_loop--;
-
-	list_add_tail(&rpc->crpc_list, &tsi->tsi_active_rpcs);
-	spin_unlock(&tsi->tsi_lock);
-
-	spin_lock(&rpc->crpc_lock);
-	rpc->crpc_timeout = rpc_timeout;
-	srpc_post_rpc(rpc);
-	spin_unlock(&rpc->crpc_lock);
-	return;
-
-test_done:
-	/*
-	 * No one can schedule me now since:
-	 * - previous RPC, if any, has done and
-	 * - no new RPC is initiated.
-	 * - my batch is still active; no one can run it again now.
-	 * Cancel pending schedules and prevent future schedule attempts:
-	 */
-	sfw_test_unit_done(tsu);
-}
-
-static int
-sfw_run_batch(struct sfw_batch *tsb)
-{
-	struct swi_workitem *wi;
-	struct sfw_test_unit *tsu;
-	struct sfw_test_instance *tsi;
-
-	if (sfw_batch_active(tsb)) {
-		CDEBUG(D_NET, "Batch already active: %llu (%d)\n",
-		       tsb->bat_id.bat_id, atomic_read(&tsb->bat_nactive));
-		return 0;
-	}
-
-	list_for_each_entry(tsi, &tsb->bat_tests, tsi_list) {
-		if (!tsi->tsi_is_client) /* skip server instances */
-			continue;
-
-		LASSERT(!tsi->tsi_stopping);
-		LASSERT(!sfw_test_active(tsi));
-
-		atomic_inc(&tsb->bat_nactive);
-
-		list_for_each_entry(tsu, &tsi->tsi_units, tsu_list) {
-			atomic_inc(&tsi->tsi_nactive);
-			tsu->tsu_loop = tsi->tsi_loop;
-			wi = &tsu->tsu_worker;
-			swi_init_workitem(wi, sfw_run_test,
-					  lst_test_wq[lnet_cpt_of_nid(tsu->tsu_dest.nid)]);
-			swi_schedule_workitem(wi);
-		}
-	}
-
-	return 0;
-}
-
-int
-sfw_stop_batch(struct sfw_batch *tsb, int force)
-{
-	struct sfw_test_instance *tsi;
-	struct srpc_client_rpc *rpc;
-
-	if (!sfw_batch_active(tsb)) {
-		CDEBUG(D_NET, "Batch %llu inactive\n", tsb->bat_id.bat_id);
-		return 0;
-	}
-
-	list_for_each_entry(tsi, &tsb->bat_tests, tsi_list) {
-		spin_lock(&tsi->tsi_lock);
-
-		if (!tsi->tsi_is_client ||
-		    !sfw_test_active(tsi) || tsi->tsi_stopping) {
-			spin_unlock(&tsi->tsi_lock);
-			continue;
-		}
-
-		tsi->tsi_stopping = 1;
-
-		if (!force) {
-			spin_unlock(&tsi->tsi_lock);
-			continue;
-		}
-
-		/* abort launched rpcs in the test */
-		list_for_each_entry(rpc, &tsi->tsi_active_rpcs, crpc_list) {
-			spin_lock(&rpc->crpc_lock);
-
-			srpc_abort_rpc(rpc, -EINTR);
-
-			spin_unlock(&rpc->crpc_lock);
-		}
-
-		spin_unlock(&tsi->tsi_lock);
-	}
-
-	return 0;
-}
-
-static int
-sfw_query_batch(struct sfw_batch *tsb, int testidx,
-		struct srpc_batch_reply *reply)
-{
-	struct sfw_test_instance *tsi;
-
-	if (testidx < 0)
-		return -EINVAL;
-
-	if (!testidx) {
-		reply->bar_active = atomic_read(&tsb->bat_nactive);
-		return 0;
-	}
-
-	list_for_each_entry(tsi, &tsb->bat_tests, tsi_list) {
-		if (testidx-- > 1)
-			continue;
-
-		reply->bar_active = atomic_read(&tsi->tsi_nactive);
-		return 0;
-	}
-
-	return -ENOENT;
-}
-
-void
-sfw_free_pages(struct srpc_server_rpc *rpc)
-{
-	srpc_free_bulk(rpc->srpc_bulk);
-	rpc->srpc_bulk = NULL;
-}
-
-int
-sfw_alloc_pages(struct srpc_server_rpc *rpc, int cpt, int npages, int len,
-		int sink)
-{
-	LASSERT(!rpc->srpc_bulk);
-	LASSERT(npages > 0 && npages <= LNET_MAX_IOV);
-
-	rpc->srpc_bulk = srpc_alloc_bulk(cpt, 0, npages, len, sink);
-	if (!rpc->srpc_bulk)
-		return -ENOMEM;
-
-	return 0;
-}
-
-static int
-sfw_add_test(struct srpc_server_rpc *rpc)
-{
-	struct sfw_session *sn = sfw_data.fw_session;
-	struct srpc_test_reply *reply = &rpc->srpc_replymsg.msg_body.tes_reply;
-	struct srpc_test_reqst *request;
-	int rc;
-	struct sfw_batch *bat;
-
-	request = &rpc->srpc_reqstbuf->buf_msg.msg_body.tes_reqst;
-	reply->tsr_sid = !sn ? LST_INVALID_SID : sn->sn_id;
-
-	if (!request->tsr_loop ||
-	    !request->tsr_concur ||
-	    request->tsr_sid.ses_nid == LNET_NID_ANY ||
-	    request->tsr_ndest > SFW_MAX_NDESTS ||
-	    (request->tsr_is_client && !request->tsr_ndest) ||
-	    request->tsr_concur > SFW_MAX_CONCUR ||
-	    request->tsr_service > SRPC_SERVICE_MAX_ID ||
-	    request->tsr_service <= SRPC_FRAMEWORK_SERVICE_MAX_ID) {
-		reply->tsr_status = EINVAL;
-		return 0;
-	}
-
-	if (!sn || !sfw_sid_equal(request->tsr_sid, sn->sn_id) ||
-	    !sfw_find_test_case(request->tsr_service)) {
-		reply->tsr_status = ENOENT;
-		return 0;
-	}
-
-	bat = sfw_bid2batch(request->tsr_bid);
-	if (!bat) {
-		CERROR("dropping RPC %s from %s under memory pressure\n",
-		       rpc->srpc_scd->scd_svc->sv_name,
-		       libcfs_id2str(rpc->srpc_peer));
-		return -ENOMEM;
-	}
-
-	if (sfw_batch_active(bat)) {
-		reply->tsr_status = EBUSY;
-		return 0;
-	}
-
-	if (request->tsr_is_client && !rpc->srpc_bulk) {
-		/* rpc will be resumed later in sfw_bulk_ready */
-		int npg = sfw_id_pages(request->tsr_ndest);
-		int len;
-
-		if (!(sn->sn_features & LST_FEAT_BULK_LEN)) {
-			len = npg * PAGE_SIZE;
-
-		} else {
-			len = sizeof(struct lnet_process_id_packed) *
-			      request->tsr_ndest;
-		}
-
-		return sfw_alloc_pages(rpc, CFS_CPT_ANY, npg, len, 1);
-	}
-
-	rc = sfw_add_test_instance(bat, rpc);
-	CDEBUG(!rc ? D_NET : D_WARNING,
-	       "%s test: sv %d %s, loop %d, concur %d, ndest %d\n",
-	       !rc ? "Added" : "Failed to add", request->tsr_service,
-	       request->tsr_is_client ? "client" : "server",
-	       request->tsr_loop, request->tsr_concur, request->tsr_ndest);
-
-	reply->tsr_status = (rc < 0) ? -rc : rc;
-	return 0;
-}
-
-static int
-sfw_control_batch(struct srpc_batch_reqst *request,
-		  struct srpc_batch_reply *reply)
-{
-	struct sfw_session *sn = sfw_data.fw_session;
-	int rc = 0;
-	struct sfw_batch *bat;
-
-	reply->bar_sid = !sn ? LST_INVALID_SID : sn->sn_id;
-
-	if (!sn || !sfw_sid_equal(request->bar_sid, sn->sn_id)) {
-		reply->bar_status = ESRCH;
-		return 0;
-	}
-
-	bat = sfw_find_batch(request->bar_bid);
-	if (!bat) {
-		reply->bar_status = ENOENT;
-		return 0;
-	}
-
-	switch (request->bar_opc) {
-	case SRPC_BATCH_OPC_RUN:
-		rc = sfw_run_batch(bat);
-		break;
-
-	case SRPC_BATCH_OPC_STOP:
-		rc = sfw_stop_batch(bat, request->bar_arg);
-		break;
-
-	case SRPC_BATCH_OPC_QUERY:
-		rc = sfw_query_batch(bat, request->bar_testidx, reply);
-		break;
-
-	default:
-		return -EINVAL; /* drop it */
-	}
-
-	reply->bar_status = (rc < 0) ? -rc : rc;
-	return 0;
-}
-
-static int
-sfw_handle_server_rpc(struct srpc_server_rpc *rpc)
-{
-	struct srpc_service *sv = rpc->srpc_scd->scd_svc;
-	struct srpc_msg *reply = &rpc->srpc_replymsg;
-	struct srpc_msg *request = &rpc->srpc_reqstbuf->buf_msg;
-	unsigned int features = LST_FEATS_MASK;
-	int rc = 0;
-
-	LASSERT(!sfw_data.fw_active_srpc);
-	LASSERT(sv->sv_id <= SRPC_FRAMEWORK_SERVICE_MAX_ID);
-
-	spin_lock(&sfw_data.fw_lock);
-
-	if (sfw_data.fw_shuttingdown) {
-		spin_unlock(&sfw_data.fw_lock);
-		return -ESHUTDOWN;
-	}
-
-	/* Remove timer to avoid racing with it or expiring active session */
-	if (sfw_del_session_timer()) {
-		CERROR("dropping RPC %s from %s: racing with expiry timer\n",
-		       sv->sv_name, libcfs_id2str(rpc->srpc_peer));
-		spin_unlock(&sfw_data.fw_lock);
-		return -EAGAIN;
-	}
-
-	sfw_data.fw_active_srpc = rpc;
-	spin_unlock(&sfw_data.fw_lock);
-
-	sfw_unpack_message(request);
-	LASSERT(request->msg_type == srpc_service2request(sv->sv_id));
-
-	/* rpc module should have checked this */
-	LASSERT(request->msg_version == SRPC_MSG_VERSION);
-
-	if (sv->sv_id != SRPC_SERVICE_MAKE_SESSION &&
-	    sv->sv_id != SRPC_SERVICE_DEBUG) {
-		struct sfw_session *sn = sfw_data.fw_session;
-
-		if (sn &&
-		    sn->sn_features != request->msg_ses_feats) {
-			CNETERR("Features of framework RPC don't match features of current session: %x/%x\n",
-				request->msg_ses_feats, sn->sn_features);
-			reply->msg_body.reply.status = EPROTO;
-			reply->msg_body.reply.sid = sn->sn_id;
-			goto out;
-		}
-
-	} else if (request->msg_ses_feats & ~LST_FEATS_MASK) {
-		/*
-		 * NB: at this point, old version will ignore features and
-		 * create new session anyway, so console should be able
-		 * to handle this
-		 */
-		reply->msg_body.reply.status = EPROTO;
-		goto out;
-	}
-
-	switch (sv->sv_id) {
-	default:
-		LBUG();
-	case SRPC_SERVICE_TEST:
-		rc = sfw_add_test(rpc);
-		break;
-
-	case SRPC_SERVICE_BATCH:
-		rc = sfw_control_batch(&request->msg_body.bat_reqst,
-				       &reply->msg_body.bat_reply);
-		break;
-
-	case SRPC_SERVICE_QUERY_STAT:
-		rc = sfw_get_stats(&request->msg_body.stat_reqst,
-				   &reply->msg_body.stat_reply);
-		break;
-
-	case SRPC_SERVICE_DEBUG:
-		rc = sfw_debug_session(&request->msg_body.dbg_reqst,
-				       &reply->msg_body.dbg_reply);
-		break;
-
-	case SRPC_SERVICE_MAKE_SESSION:
-		rc = sfw_make_session(&request->msg_body.mksn_reqst,
-				      &reply->msg_body.mksn_reply);
-		break;
-
-	case SRPC_SERVICE_REMOVE_SESSION:
-		rc = sfw_remove_session(&request->msg_body.rmsn_reqst,
-					&reply->msg_body.rmsn_reply);
-		break;
-	}
-
-	if (sfw_data.fw_session)
-		features = sfw_data.fw_session->sn_features;
- out:
-	reply->msg_ses_feats = features;
-	rpc->srpc_done = sfw_server_rpc_done;
-	spin_lock(&sfw_data.fw_lock);
-
-	if (!sfw_data.fw_shuttingdown)
-		sfw_add_session_timer();
-
-	sfw_data.fw_active_srpc = NULL;
-	spin_unlock(&sfw_data.fw_lock);
-	return rc;
-}
-
-static int
-sfw_bulk_ready(struct srpc_server_rpc *rpc, int status)
-{
-	struct srpc_service *sv = rpc->srpc_scd->scd_svc;
-	int rc;
-
-	LASSERT(rpc->srpc_bulk);
-	LASSERT(sv->sv_id == SRPC_SERVICE_TEST);
-	LASSERT(!sfw_data.fw_active_srpc);
-	LASSERT(rpc->srpc_reqstbuf->buf_msg.msg_body.tes_reqst.tsr_is_client);
-
-	spin_lock(&sfw_data.fw_lock);
-
-	if (status) {
-		CERROR("Bulk transfer failed for RPC: service %s, peer %s, status %d\n",
-		       sv->sv_name, libcfs_id2str(rpc->srpc_peer), status);
-		spin_unlock(&sfw_data.fw_lock);
-		return -EIO;
-	}
-
-	if (sfw_data.fw_shuttingdown) {
-		spin_unlock(&sfw_data.fw_lock);
-		return -ESHUTDOWN;
-	}
-
-	if (sfw_del_session_timer()) {
-		CERROR("dropping RPC %s from %s: racing with expiry timer\n",
-		       sv->sv_name, libcfs_id2str(rpc->srpc_peer));
-		spin_unlock(&sfw_data.fw_lock);
-		return -EAGAIN;
-	}
-
-	sfw_data.fw_active_srpc = rpc;
-	spin_unlock(&sfw_data.fw_lock);
-
-	rc = sfw_add_test(rpc);
-
-	spin_lock(&sfw_data.fw_lock);
-
-	if (!sfw_data.fw_shuttingdown)
-		sfw_add_session_timer();
-
-	sfw_data.fw_active_srpc = NULL;
-	spin_unlock(&sfw_data.fw_lock);
-	return rc;
-}
-
-struct srpc_client_rpc *
-sfw_create_rpc(struct lnet_process_id peer, int service,
-	       unsigned int features, int nbulkiov, int bulklen,
-	       void (*done)(struct srpc_client_rpc *), void *priv)
-{
-	struct srpc_client_rpc *rpc = NULL;
-
-	spin_lock(&sfw_data.fw_lock);
-
-	LASSERT(!sfw_data.fw_shuttingdown);
-	LASSERT(service <= SRPC_FRAMEWORK_SERVICE_MAX_ID);
-
-	if (!nbulkiov && !list_empty(&sfw_data.fw_zombie_rpcs)) {
-		rpc = list_entry(sfw_data.fw_zombie_rpcs.next,
-				 struct srpc_client_rpc, crpc_list);
-		list_del(&rpc->crpc_list);
-
-		srpc_init_client_rpc(rpc, peer, service, 0, 0,
-				     done, sfw_client_rpc_fini, priv);
-	}
-
-	spin_unlock(&sfw_data.fw_lock);
-
-	if (!rpc) {
-		rpc = srpc_create_client_rpc(peer, service,
-					     nbulkiov, bulklen, done,
-					     nbulkiov ?  NULL :
-					     sfw_client_rpc_fini,
-					     priv);
-	}
-
-	if (rpc) /* "session" is concept in framework */
-		rpc->crpc_reqstmsg.msg_ses_feats = features;
-
-	return rpc;
-}
-
-void
-sfw_unpack_message(struct srpc_msg *msg)
-{
-	if (msg->msg_magic == SRPC_MSG_MAGIC)
-		return; /* no flipping needed */
-
-	/* srpc module should guarantee I wouldn't get crap */
-	LASSERT(msg->msg_magic == __swab32(SRPC_MSG_MAGIC));
-
-	if (msg->msg_type == SRPC_MSG_STAT_REQST) {
-		struct srpc_stat_reqst *req = &msg->msg_body.stat_reqst;
-
-		__swab32s(&req->str_type);
-		__swab64s(&req->str_rpyid);
-		sfw_unpack_sid(req->str_sid);
-		return;
-	}
-
-	if (msg->msg_type == SRPC_MSG_STAT_REPLY) {
-		struct srpc_stat_reply *rep = &msg->msg_body.stat_reply;
-
-		__swab32s(&rep->str_status);
-		sfw_unpack_sid(rep->str_sid);
-		sfw_unpack_fw_counters(rep->str_fw);
-		sfw_unpack_rpc_counters(rep->str_rpc);
-		sfw_unpack_lnet_counters(rep->str_lnet);
-		return;
-	}
-
-	if (msg->msg_type == SRPC_MSG_MKSN_REQST) {
-		struct srpc_mksn_reqst *req = &msg->msg_body.mksn_reqst;
-
-		__swab64s(&req->mksn_rpyid);
-		__swab32s(&req->mksn_force);
-		sfw_unpack_sid(req->mksn_sid);
-		return;
-	}
-
-	if (msg->msg_type == SRPC_MSG_MKSN_REPLY) {
-		struct srpc_mksn_reply *rep = &msg->msg_body.mksn_reply;
-
-		__swab32s(&rep->mksn_status);
-		__swab32s(&rep->mksn_timeout);
-		sfw_unpack_sid(rep->mksn_sid);
-		return;
-	}
-
-	if (msg->msg_type == SRPC_MSG_RMSN_REQST) {
-		struct srpc_rmsn_reqst *req = &msg->msg_body.rmsn_reqst;
-
-		__swab64s(&req->rmsn_rpyid);
-		sfw_unpack_sid(req->rmsn_sid);
-		return;
-	}
-
-	if (msg->msg_type == SRPC_MSG_RMSN_REPLY) {
-		struct srpc_rmsn_reply *rep = &msg->msg_body.rmsn_reply;
-
-		__swab32s(&rep->rmsn_status);
-		sfw_unpack_sid(rep->rmsn_sid);
-		return;
-	}
-
-	if (msg->msg_type == SRPC_MSG_DEBUG_REQST) {
-		struct srpc_debug_reqst *req = &msg->msg_body.dbg_reqst;
-
-		__swab64s(&req->dbg_rpyid);
-		__swab32s(&req->dbg_flags);
-		sfw_unpack_sid(req->dbg_sid);
-		return;
-	}
-
-	if (msg->msg_type == SRPC_MSG_DEBUG_REPLY) {
-		struct srpc_debug_reply *rep = &msg->msg_body.dbg_reply;
-
-		__swab32s(&rep->dbg_nbatch);
-		__swab32s(&rep->dbg_timeout);
-		sfw_unpack_sid(rep->dbg_sid);
-		return;
-	}
-
-	if (msg->msg_type == SRPC_MSG_BATCH_REQST) {
-		struct srpc_batch_reqst *req = &msg->msg_body.bat_reqst;
-
-		__swab32s(&req->bar_opc);
-		__swab64s(&req->bar_rpyid);
-		__swab32s(&req->bar_testidx);
-		__swab32s(&req->bar_arg);
-		sfw_unpack_sid(req->bar_sid);
-		__swab64s(&req->bar_bid.bat_id);
-		return;
-	}
-
-	if (msg->msg_type == SRPC_MSG_BATCH_REPLY) {
-		struct srpc_batch_reply *rep = &msg->msg_body.bat_reply;
-
-		__swab32s(&rep->bar_status);
-		sfw_unpack_sid(rep->bar_sid);
-		return;
-	}
-
-	if (msg->msg_type == SRPC_MSG_TEST_REQST) {
-		struct srpc_test_reqst *req = &msg->msg_body.tes_reqst;
-
-		__swab64s(&req->tsr_rpyid);
-		__swab64s(&req->tsr_bulkid);
-		__swab32s(&req->tsr_loop);
-		__swab32s(&req->tsr_ndest);
-		__swab32s(&req->tsr_concur);
-		__swab32s(&req->tsr_service);
-		sfw_unpack_sid(req->tsr_sid);
-		__swab64s(&req->tsr_bid.bat_id);
-		return;
-	}
-
-	if (msg->msg_type == SRPC_MSG_TEST_REPLY) {
-		struct srpc_test_reply *rep = &msg->msg_body.tes_reply;
-
-		__swab32s(&rep->tsr_status);
-		sfw_unpack_sid(rep->tsr_sid);
-		return;
-	}
-
-	if (msg->msg_type == SRPC_MSG_JOIN_REQST) {
-		struct srpc_join_reqst *req = &msg->msg_body.join_reqst;
-
-		__swab64s(&req->join_rpyid);
-		sfw_unpack_sid(req->join_sid);
-		return;
-	}
-
-	if (msg->msg_type == SRPC_MSG_JOIN_REPLY) {
-		struct srpc_join_reply *rep = &msg->msg_body.join_reply;
-
-		__swab32s(&rep->join_status);
-		__swab32s(&rep->join_timeout);
-		sfw_unpack_sid(rep->join_sid);
-		return;
-	}
-
-	LBUG();
-}
-
-void
-sfw_abort_rpc(struct srpc_client_rpc *rpc)
-{
-	LASSERT(atomic_read(&rpc->crpc_refcount) > 0);
-	LASSERT(rpc->crpc_service <= SRPC_FRAMEWORK_SERVICE_MAX_ID);
-
-	spin_lock(&rpc->crpc_lock);
-	srpc_abort_rpc(rpc, -EINTR);
-	spin_unlock(&rpc->crpc_lock);
-}
-
-void
-sfw_post_rpc(struct srpc_client_rpc *rpc)
-{
-	spin_lock(&rpc->crpc_lock);
-
-	LASSERT(!rpc->crpc_closed);
-	LASSERT(!rpc->crpc_aborted);
-	LASSERT(list_empty(&rpc->crpc_list));
-	LASSERT(!sfw_data.fw_shuttingdown);
-
-	rpc->crpc_timeout = rpc_timeout;
-	srpc_post_rpc(rpc);
-
-	spin_unlock(&rpc->crpc_lock);
-}
-
-static struct srpc_service sfw_services[] = {
-	{
-		/* sv_id */    SRPC_SERVICE_DEBUG,
-		/* sv_name */  "debug",
-		0
-	},
-	{
-		/* sv_id */    SRPC_SERVICE_QUERY_STAT,
-		/* sv_name */  "query stats",
-		0
-	},
-	{
-		/* sv_id */    SRPC_SERVICE_MAKE_SESSION,
-		/* sv_name */  "make session",
-		0
-	},
-	{
-		/* sv_id */    SRPC_SERVICE_REMOVE_SESSION,
-		/* sv_name */  "remove session",
-		0
-	},
-	{
-		/* sv_id */    SRPC_SERVICE_BATCH,
-		/* sv_name */  "batch service",
-		0
-	},
-	{
-		/* sv_id */    SRPC_SERVICE_TEST,
-		/* sv_name */  "test service",
-		0
-	},
-	{
-		/* sv_id */    0,
-		/* sv_name */  NULL,
-		0
-	}
-};
-
-int
-sfw_startup(void)
-{
-	int i;
-	int rc;
-	int error;
-	struct srpc_service *sv;
-	struct sfw_test_case *tsc;
-
-	if (session_timeout < 0) {
-		CERROR("Session timeout must be non-negative: %d\n",
-		       session_timeout);
-		return -EINVAL;
-	}
-
-	if (rpc_timeout < 0) {
-		CERROR("RPC timeout must be non-negative: %d\n",
-		       rpc_timeout);
-		return -EINVAL;
-	}
-
-	if (!session_timeout)
-		CWARN("Zero session_timeout specified - test sessions never expire.\n");
-
-	if (!rpc_timeout)
-		CWARN("Zero rpc_timeout specified - test RPC never expire.\n");
-
-	memset(&sfw_data, 0, sizeof(struct smoketest_framework));
-
-	sfw_data.fw_session = NULL;
-	sfw_data.fw_active_srpc = NULL;
-	spin_lock_init(&sfw_data.fw_lock);
-	atomic_set(&sfw_data.fw_nzombies, 0);
-	INIT_LIST_HEAD(&sfw_data.fw_tests);
-	INIT_LIST_HEAD(&sfw_data.fw_zombie_rpcs);
-	INIT_LIST_HEAD(&sfw_data.fw_zombie_sessions);
-
-	brw_init_test_client();
-	brw_init_test_service();
-	rc = sfw_register_test(&brw_test_service, &brw_test_client);
-	LASSERT(!rc);
-
-	ping_init_test_client();
-	ping_init_test_service();
-	rc = sfw_register_test(&ping_test_service, &ping_test_client);
-	LASSERT(!rc);
-
-	error = 0;
-	list_for_each_entry(tsc, &sfw_data.fw_tests, tsc_list) {
-		sv = tsc->tsc_srv_service;
-
-		rc = srpc_add_service(sv);
-		LASSERT(rc != -EBUSY);
-		if (rc) {
-			CWARN("Failed to add %s service: %d\n",
-			      sv->sv_name, rc);
-			error = rc;
-		}
-	}
-
-	for (i = 0; ; i++) {
-		sv = &sfw_services[i];
-		if (!sv->sv_name)
-			break;
-
-		sv->sv_bulk_ready = NULL;
-		sv->sv_handler = sfw_handle_server_rpc;
-		sv->sv_wi_total = SFW_FRWK_WI_MAX;
-		if (sv->sv_id == SRPC_SERVICE_TEST)
-			sv->sv_bulk_ready = sfw_bulk_ready;
-
-		rc = srpc_add_service(sv);
-		LASSERT(rc != -EBUSY);
-		if (rc) {
-			CWARN("Failed to add %s service: %d\n",
-			      sv->sv_name, rc);
-			error = rc;
-		}
-
-		/* about to sfw_shutdown, no need to add buffer */
-		if (error)
-			continue;
-
-		rc = srpc_service_add_buffers(sv, sv->sv_wi_total);
-		if (rc) {
-			CWARN("Failed to reserve enough buffers: service %s, %d needed: %d\n",
-			      sv->sv_name, sv->sv_wi_total, rc);
-			error = -ENOMEM;
-		}
-	}
-
-	if (error)
-		sfw_shutdown();
-	return error;
-}
-
-void
-sfw_shutdown(void)
-{
-	struct srpc_service *sv;
-	struct sfw_test_case	*tsc;
-	int i;
-
-	spin_lock(&sfw_data.fw_lock);
-
-	sfw_data.fw_shuttingdown = 1;
-	lst_wait_until(!sfw_data.fw_active_srpc, sfw_data.fw_lock,
-		       "waiting for active RPC to finish.\n");
-
-	if (sfw_del_session_timer())
-		lst_wait_until(!sfw_data.fw_session, sfw_data.fw_lock,
-			       "waiting for session timer to explode.\n");
-
-	sfw_deactivate_session();
-	lst_wait_until(!atomic_read(&sfw_data.fw_nzombies),
-		       sfw_data.fw_lock,
-		       "waiting for %d zombie sessions to die.\n",
-		       atomic_read(&sfw_data.fw_nzombies));
-
-	spin_unlock(&sfw_data.fw_lock);
-
-	for (i = 0; ; i++) {
-		sv = &sfw_services[i];
-		if (!sv->sv_name)
-			break;
-
-		srpc_shutdown_service(sv);
-		srpc_remove_service(sv);
-	}
-
-	list_for_each_entry(tsc, &sfw_data.fw_tests, tsc_list) {
-		sv = tsc->tsc_srv_service;
-		srpc_shutdown_service(sv);
-		srpc_remove_service(sv);
-	}
-
-	while (!list_empty(&sfw_data.fw_zombie_rpcs)) {
-		struct srpc_client_rpc *rpc;
-
-		rpc = list_entry(sfw_data.fw_zombie_rpcs.next,
-				 struct srpc_client_rpc, crpc_list);
-		list_del(&rpc->crpc_list);
-
-		kfree(rpc);
-	}
-
-	for (i = 0; ; i++) {
-		sv = &sfw_services[i];
-		if (!sv->sv_name)
-			break;
-
-		srpc_wait_service_shutdown(sv);
-	}
-
-	while (!list_empty(&sfw_data.fw_tests)) {
-		tsc = list_entry(sfw_data.fw_tests.next,
-				 struct sfw_test_case, tsc_list);
-
-		srpc_wait_service_shutdown(tsc->tsc_srv_service);
-
-		list_del(&tsc->tsc_list);
-		kfree(tsc);
-	}
-}

+ 0 - 169
drivers/staging/lustre/lnet/selftest/module.c

@@ -1,169 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include "selftest.h"
-#include "console.h"
-
-enum {
-	LST_INIT_NONE		= 0,
-	LST_INIT_WI_SERIAL,
-	LST_INIT_WI_TEST,
-	LST_INIT_RPC,
-	LST_INIT_FW,
-	LST_INIT_CONSOLE
-};
-
-static int lst_init_step = LST_INIT_NONE;
-
-struct workqueue_struct *lst_serial_wq;
-struct workqueue_struct **lst_test_wq;
-
-static void
-lnet_selftest_exit(void)
-{
-	int i;
-
-	switch (lst_init_step) {
-	case LST_INIT_CONSOLE:
-		lstcon_console_fini();
-		/* fall through */
-	case LST_INIT_FW:
-		sfw_shutdown();
-		/* fall through */
-	case LST_INIT_RPC:
-		srpc_shutdown();
-		/* fall through */
-	case LST_INIT_WI_TEST:
-		for (i = 0;
-		     i < cfs_cpt_number(lnet_cpt_table()); i++) {
-			if (!lst_test_wq[i])
-				continue;
-			destroy_workqueue(lst_test_wq[i]);
-		}
-		kvfree(lst_test_wq);
-		lst_test_wq = NULL;
-		/* fall through */
-	case LST_INIT_WI_SERIAL:
-		destroy_workqueue(lst_serial_wq);
-		lst_serial_wq = NULL;
-	case LST_INIT_NONE:
-		break;
-	default:
-		LBUG();
-	}
-}
-
-static int
-lnet_selftest_init(void)
-{
-	int nscheds;
-	int rc;
-	int i;
-
-	rc = libcfs_setup();
-	if (rc)
-		return rc;
-
-	lst_serial_wq = alloc_ordered_workqueue("lst_s", 0);
-	if (!lst_serial_wq) {
-		CERROR("Failed to create serial WI scheduler for LST\n");
-		return -ENOMEM;
-	}
-	lst_init_step = LST_INIT_WI_SERIAL;
-
-	nscheds = cfs_cpt_number(lnet_cpt_table());
-	lst_test_wq = kvmalloc_array(nscheds, sizeof(lst_test_wq[0]),
-					GFP_KERNEL | __GFP_ZERO);
-	if (!lst_test_wq) {
-		rc = -ENOMEM;
-		goto error;
-	}
-
-	lst_init_step = LST_INIT_WI_TEST;
-	for (i = 0; i < nscheds; i++) {
-		int nthrs = cfs_cpt_weight(lnet_cpt_table(), i);
-		struct workqueue_attrs attrs = {0};
-		cpumask_var_t *mask = cfs_cpt_cpumask(lnet_cpt_table(), i);
-
-		/* reserve at least one CPU for LND */
-		nthrs = max(nthrs - 1, 1);
-		lst_test_wq[i] = alloc_workqueue("lst_t", WQ_UNBOUND, nthrs);
-		if (!lst_test_wq[i]) {
-			CWARN("Failed to create CPU partition affinity WI scheduler %d for LST\n",
-			      i);
-			rc = -ENOMEM;
-			goto error;
-		}
-
-		if (mask && alloc_cpumask_var(&attrs.cpumask, GFP_KERNEL)) {
-			cpumask_copy(attrs.cpumask, *mask);
-			apply_workqueue_attrs(lst_test_wq[i], &attrs);
-			free_cpumask_var(attrs.cpumask);
-		}
-	}
-
-	rc = srpc_startup();
-	if (rc) {
-		CERROR("LST can't startup rpc\n");
-		goto error;
-	}
-	lst_init_step = LST_INIT_RPC;
-
-	rc = sfw_startup();
-	if (rc) {
-		CERROR("LST can't startup framework\n");
-		goto error;
-	}
-	lst_init_step = LST_INIT_FW;
-
-	rc = lstcon_console_init();
-	if (rc) {
-		CERROR("LST can't startup console\n");
-		goto error;
-	}
-	lst_init_step = LST_INIT_CONSOLE;
-	return 0;
-error:
-	lnet_selftest_exit();
-	return rc;
-}
-
-MODULE_AUTHOR("OpenSFS, Inc. <http://www.lustre.org/>");
-MODULE_DESCRIPTION("LNet Selftest");
-MODULE_VERSION("2.7.0");
-MODULE_LICENSE("GPL");
-
-module_init(lnet_selftest_init);
-module_exit(lnet_selftest_exit);

+ 0 - 228
drivers/staging/lustre/lnet/selftest/ping_test.c

@@ -1,228 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/selftest/conctl.c
- *
- * Test client & Server
- *
- * Author: Liang Zhen <liangzhen@clusterfs.com>
- */
-
-#include "selftest.h"
-
-#define LST_PING_TEST_MAGIC	0xbabeface
-
-static int ping_srv_workitems = SFW_TEST_WI_MAX;
-module_param(ping_srv_workitems, int, 0644);
-MODULE_PARM_DESC(ping_srv_workitems, "# PING server workitems");
-
-struct lst_ping_data {
-	spinlock_t	pnd_lock;	/* serialize */
-	int		pnd_counter;	/* sequence counter */
-};
-
-static struct lst_ping_data  lst_ping_data;
-
-static int
-ping_client_init(struct sfw_test_instance *tsi)
-{
-	struct sfw_session *sn = tsi->tsi_batch->bat_session;
-
-	LASSERT(tsi->tsi_is_client);
-	LASSERT(sn && !(sn->sn_features & ~LST_FEATS_MASK));
-
-	spin_lock_init(&lst_ping_data.pnd_lock);
-	lst_ping_data.pnd_counter = 0;
-
-	return 0;
-}
-
-static void
-ping_client_fini(struct sfw_test_instance *tsi)
-{
-	struct sfw_session *sn = tsi->tsi_batch->bat_session;
-	int errors;
-
-	LASSERT(sn);
-	LASSERT(tsi->tsi_is_client);
-
-	errors = atomic_read(&sn->sn_ping_errors);
-	if (errors)
-		CWARN("%d pings have failed.\n", errors);
-	else
-		CDEBUG(D_NET, "Ping test finished OK.\n");
-}
-
-static int
-ping_client_prep_rpc(struct sfw_test_unit *tsu, struct lnet_process_id dest,
-		     struct srpc_client_rpc **rpc)
-{
-	struct srpc_ping_reqst *req;
-	struct sfw_test_instance *tsi = tsu->tsu_instance;
-	struct sfw_session *sn = tsi->tsi_batch->bat_session;
-	struct timespec64 ts;
-	int rc;
-
-	LASSERT(sn);
-	LASSERT(!(sn->sn_features & ~LST_FEATS_MASK));
-
-	rc = sfw_create_test_rpc(tsu, dest, sn->sn_features, 0, 0, rpc);
-	if (rc)
-		return rc;
-
-	req = &(*rpc)->crpc_reqstmsg.msg_body.ping_reqst;
-
-	req->pnr_magic = LST_PING_TEST_MAGIC;
-
-	spin_lock(&lst_ping_data.pnd_lock);
-	req->pnr_seq = lst_ping_data.pnd_counter++;
-	spin_unlock(&lst_ping_data.pnd_lock);
-
-	ktime_get_real_ts64(&ts);
-	req->pnr_time_sec = ts.tv_sec;
-	req->pnr_time_usec = ts.tv_nsec / NSEC_PER_USEC;
-
-	return rc;
-}
-
-static void
-ping_client_done_rpc(struct sfw_test_unit *tsu, struct srpc_client_rpc *rpc)
-{
-	struct sfw_test_instance *tsi = tsu->tsu_instance;
-	struct sfw_session *sn = tsi->tsi_batch->bat_session;
-	struct srpc_ping_reqst *reqst = &rpc->crpc_reqstmsg.msg_body.ping_reqst;
-	struct srpc_ping_reply *reply = &rpc->crpc_replymsg.msg_body.ping_reply;
-	struct timespec64 ts;
-
-	LASSERT(sn);
-
-	if (rpc->crpc_status) {
-		if (!tsi->tsi_stopping)	/* rpc could have been aborted */
-			atomic_inc(&sn->sn_ping_errors);
-		CERROR("Unable to ping %s (%d): %d\n",
-		       libcfs_id2str(rpc->crpc_dest),
-		       reqst->pnr_seq, rpc->crpc_status);
-		return;
-	}
-
-	if (rpc->crpc_replymsg.msg_magic != SRPC_MSG_MAGIC) {
-		__swab32s(&reply->pnr_seq);
-		__swab32s(&reply->pnr_magic);
-		__swab32s(&reply->pnr_status);
-	}
-
-	if (reply->pnr_magic != LST_PING_TEST_MAGIC) {
-		rpc->crpc_status = -EBADMSG;
-		atomic_inc(&sn->sn_ping_errors);
-		CERROR("Bad magic %u from %s, %u expected.\n",
-		       reply->pnr_magic, libcfs_id2str(rpc->crpc_dest),
-		       LST_PING_TEST_MAGIC);
-		return;
-	}
-
-	if (reply->pnr_seq != reqst->pnr_seq) {
-		rpc->crpc_status = -EBADMSG;
-		atomic_inc(&sn->sn_ping_errors);
-		CERROR("Bad seq %u from %s, %u expected.\n",
-		       reply->pnr_seq, libcfs_id2str(rpc->crpc_dest),
-		       reqst->pnr_seq);
-		return;
-	}
-
-	ktime_get_real_ts64(&ts);
-	CDEBUG(D_NET, "%d reply in %u usec\n", reply->pnr_seq,
-	       (unsigned int)((ts.tv_sec - reqst->pnr_time_sec) * 1000000 +
-			      (ts.tv_nsec / NSEC_PER_USEC - reqst->pnr_time_usec)));
-}
-
-static int
-ping_server_handle(struct srpc_server_rpc *rpc)
-{
-	struct srpc_service *sv = rpc->srpc_scd->scd_svc;
-	struct srpc_msg *reqstmsg = &rpc->srpc_reqstbuf->buf_msg;
-	struct srpc_msg *replymsg = &rpc->srpc_replymsg;
-	struct srpc_ping_reqst *req = &reqstmsg->msg_body.ping_reqst;
-	struct srpc_ping_reply *rep = &rpc->srpc_replymsg.msg_body.ping_reply;
-
-	LASSERT(sv->sv_id == SRPC_SERVICE_PING);
-
-	if (reqstmsg->msg_magic != SRPC_MSG_MAGIC) {
-		LASSERT(reqstmsg->msg_magic == __swab32(SRPC_MSG_MAGIC));
-
-		__swab32s(&req->pnr_seq);
-		__swab32s(&req->pnr_magic);
-		__swab64s(&req->pnr_time_sec);
-		__swab64s(&req->pnr_time_usec);
-	}
-	LASSERT(reqstmsg->msg_type == srpc_service2request(sv->sv_id));
-
-	if (req->pnr_magic != LST_PING_TEST_MAGIC) {
-		CERROR("Unexpected magic %08x from %s\n",
-		       req->pnr_magic, libcfs_id2str(rpc->srpc_peer));
-		return -EINVAL;
-	}
-
-	rep->pnr_seq = req->pnr_seq;
-	rep->pnr_magic = LST_PING_TEST_MAGIC;
-
-	if (reqstmsg->msg_ses_feats & ~LST_FEATS_MASK) {
-		replymsg->msg_ses_feats = LST_FEATS_MASK;
-		rep->pnr_status = EPROTO;
-		return 0;
-	}
-
-	replymsg->msg_ses_feats = reqstmsg->msg_ses_feats;
-
-	CDEBUG(D_NET, "Get ping %d from %s\n",
-	       req->pnr_seq, libcfs_id2str(rpc->srpc_peer));
-	return 0;
-}
-
-struct sfw_test_client_ops ping_test_client;
-
-void ping_init_test_client(void)
-{
-	ping_test_client.tso_init = ping_client_init;
-	ping_test_client.tso_fini = ping_client_fini;
-	ping_test_client.tso_prep_rpc = ping_client_prep_rpc;
-	ping_test_client.tso_done_rpc = ping_client_done_rpc;
-}
-
-struct srpc_service ping_test_service;
-
-void ping_init_test_service(void)
-{
-	ping_test_service.sv_id = SRPC_SERVICE_PING;
-	ping_test_service.sv_name = "ping_test";
-	ping_test_service.sv_handler = ping_server_handle;
-	ping_test_service.sv_wi_total = ping_srv_workitems;
-}

+ 0 - 1682
drivers/staging/lustre/lnet/selftest/rpc.c

@@ -1,1682 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/selftest/rpc.c
- *
- * Author: Isaac Huang <isaac@clusterfs.com>
- *
- * 2012-05-13: Liang Zhen <liang@whamcloud.com>
- * - percpt data for service to improve smp performance
- * - code cleanup
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include "selftest.h"
-
-enum srpc_state {
-	SRPC_STATE_NONE,
-	SRPC_STATE_NI_INIT,
-	SRPC_STATE_EQ_INIT,
-	SRPC_STATE_RUNNING,
-	SRPC_STATE_STOPPING,
-};
-
-static struct smoketest_rpc {
-	spinlock_t	 rpc_glock;	/* global lock */
-	struct srpc_service	*rpc_services[SRPC_SERVICE_MAX_ID + 1];
-	struct lnet_handle_eq	 rpc_lnet_eq;	/* _the_ LNet event queue */
-	enum srpc_state	 rpc_state;
-	struct srpc_counters	 rpc_counters;
-	__u64		 rpc_matchbits;	/* matchbits counter */
-} srpc_data;
-
-static inline int
-srpc_serv_portal(int svc_id)
-{
-	return svc_id < SRPC_FRAMEWORK_SERVICE_MAX_ID ?
-	       SRPC_FRAMEWORK_REQUEST_PORTAL : SRPC_REQUEST_PORTAL;
-}
-
-/* forward ref's */
-void srpc_handle_rpc(struct swi_workitem *wi);
-
-void srpc_get_counters(struct srpc_counters *cnt)
-{
-	spin_lock(&srpc_data.rpc_glock);
-	*cnt = srpc_data.rpc_counters;
-	spin_unlock(&srpc_data.rpc_glock);
-}
-
-void srpc_set_counters(const struct srpc_counters *cnt)
-{
-	spin_lock(&srpc_data.rpc_glock);
-	srpc_data.rpc_counters = *cnt;
-	spin_unlock(&srpc_data.rpc_glock);
-}
-
-static int
-srpc_add_bulk_page(struct srpc_bulk *bk, struct page *pg, int i, int off,
-		   int nob)
-{
-	LASSERT(off < PAGE_SIZE);
-	LASSERT(nob > 0 && nob <= PAGE_SIZE);
-
-	bk->bk_iovs[i].bv_offset = off;
-	bk->bk_iovs[i].bv_page = pg;
-	bk->bk_iovs[i].bv_len = nob;
-	return nob;
-}
-
-void
-srpc_free_bulk(struct srpc_bulk *bk)
-{
-	int i;
-	struct page *pg;
-
-	LASSERT(bk);
-
-	for (i = 0; i < bk->bk_niov; i++) {
-		pg = bk->bk_iovs[i].bv_page;
-		if (!pg)
-			break;
-
-		__free_page(pg);
-	}
-
-	kfree(bk);
-}
-
-struct srpc_bulk *
-srpc_alloc_bulk(int cpt, unsigned int bulk_off, unsigned int bulk_npg,
-		unsigned int bulk_len, int sink)
-{
-	struct srpc_bulk *bk;
-	int i;
-
-	LASSERT(bulk_npg > 0 && bulk_npg <= LNET_MAX_IOV);
-
-	bk = kzalloc_cpt(offsetof(struct srpc_bulk, bk_iovs[bulk_npg]),
-			 GFP_KERNEL, cpt);
-	if (!bk) {
-		CERROR("Can't allocate descriptor for %d pages\n", bulk_npg);
-		return NULL;
-	}
-
-	memset(bk, 0, offsetof(struct srpc_bulk, bk_iovs[bulk_npg]));
-	bk->bk_sink = sink;
-	bk->bk_len = bulk_len;
-	bk->bk_niov = bulk_npg;
-
-	for (i = 0; i < bulk_npg; i++) {
-		struct page *pg;
-		int nob;
-
-		pg = alloc_pages_node(cfs_cpt_spread_node(lnet_cpt_table(), cpt),
-				      GFP_KERNEL, 0);
-		if (!pg) {
-			CERROR("Can't allocate page %d of %d\n", i, bulk_npg);
-			srpc_free_bulk(bk);
-			return NULL;
-		}
-
-		nob = min_t(unsigned int, bulk_off + bulk_len, PAGE_SIZE) -
-		      bulk_off;
-		srpc_add_bulk_page(bk, pg, i, bulk_off, nob);
-		bulk_len -= nob;
-		bulk_off = 0;
-	}
-
-	return bk;
-}
-
-static inline __u64
-srpc_next_id(void)
-{
-	__u64 id;
-
-	spin_lock(&srpc_data.rpc_glock);
-	id = srpc_data.rpc_matchbits++;
-	spin_unlock(&srpc_data.rpc_glock);
-	return id;
-}
-
-static void
-srpc_init_server_rpc(struct srpc_server_rpc *rpc,
-		     struct srpc_service_cd *scd,
-		     struct srpc_buffer *buffer)
-{
-	memset(rpc, 0, sizeof(*rpc));
-	swi_init_workitem(&rpc->srpc_wi, srpc_handle_rpc,
-			  srpc_serv_is_framework(scd->scd_svc) ?
-			  lst_serial_wq : lst_test_wq[scd->scd_cpt]);
-
-	rpc->srpc_ev.ev_fired = 1; /* no event expected now */
-
-	rpc->srpc_scd = scd;
-	rpc->srpc_reqstbuf = buffer;
-	rpc->srpc_peer = buffer->buf_peer;
-	rpc->srpc_self = buffer->buf_self;
-	LNetInvalidateMDHandle(&rpc->srpc_replymdh);
-}
-
-static void
-srpc_service_fini(struct srpc_service *svc)
-{
-	struct srpc_service_cd *scd;
-	struct srpc_server_rpc *rpc;
-	struct srpc_buffer *buf;
-	struct list_head *q;
-	int i;
-
-	if (!svc->sv_cpt_data)
-		return;
-
-	cfs_percpt_for_each(scd, i, svc->sv_cpt_data) {
-		while (1) {
-			if (!list_empty(&scd->scd_buf_posted))
-				q = &scd->scd_buf_posted;
-			else if (!list_empty(&scd->scd_buf_blocked))
-				q = &scd->scd_buf_blocked;
-			else
-				break;
-
-			while (!list_empty(q)) {
-				buf = list_entry(q->next, struct srpc_buffer,
-						 buf_list);
-				list_del(&buf->buf_list);
-				kfree(buf);
-			}
-		}
-
-		LASSERT(list_empty(&scd->scd_rpc_active));
-
-		while (!list_empty(&scd->scd_rpc_free)) {
-			rpc = list_entry(scd->scd_rpc_free.next,
-					 struct srpc_server_rpc,
-					 srpc_list);
-			list_del(&rpc->srpc_list);
-			kfree(rpc);
-		}
-	}
-
-	cfs_percpt_free(svc->sv_cpt_data);
-	svc->sv_cpt_data = NULL;
-}
-
-static int
-srpc_service_nrpcs(struct srpc_service *svc)
-{
-	int nrpcs = svc->sv_wi_total / svc->sv_ncpts;
-
-	return srpc_serv_is_framework(svc) ?
-	       max(nrpcs, SFW_FRWK_WI_MIN) : max(nrpcs, SFW_TEST_WI_MIN);
-}
-
-void srpc_add_buffer(struct swi_workitem *wi);
-
-static int
-srpc_service_init(struct srpc_service *svc)
-{
-	struct srpc_service_cd *scd;
-	struct srpc_server_rpc *rpc;
-	int nrpcs;
-	int i;
-	int j;
-
-	svc->sv_shuttingdown = 0;
-
-	svc->sv_cpt_data = cfs_percpt_alloc(lnet_cpt_table(),
-					    sizeof(**svc->sv_cpt_data));
-	if (!svc->sv_cpt_data)
-		return -ENOMEM;
-
-	svc->sv_ncpts = srpc_serv_is_framework(svc) ?
-			1 : cfs_cpt_number(lnet_cpt_table());
-	nrpcs = srpc_service_nrpcs(svc);
-
-	cfs_percpt_for_each(scd, i, svc->sv_cpt_data) {
-		scd->scd_cpt = i;
-		scd->scd_svc = svc;
-		spin_lock_init(&scd->scd_lock);
-		INIT_LIST_HEAD(&scd->scd_rpc_free);
-		INIT_LIST_HEAD(&scd->scd_rpc_active);
-		INIT_LIST_HEAD(&scd->scd_buf_posted);
-		INIT_LIST_HEAD(&scd->scd_buf_blocked);
-
-		scd->scd_ev.ev_data = scd;
-		scd->scd_ev.ev_type = SRPC_REQUEST_RCVD;
-
-		/*
-		 * NB: don't use lst_serial_wq for adding buffer,
-		 * see details in srpc_service_add_buffers()
-		 */
-		swi_init_workitem(&scd->scd_buf_wi,
-				  srpc_add_buffer, lst_test_wq[i]);
-
-		if (i && srpc_serv_is_framework(svc)) {
-			/*
-			 * NB: framework service only needs srpc_service_cd for
-			 * one partition, but we allocate for all to make
-			 * it easier to implement, it will waste a little
-			 * memory but nobody should care about this
-			 */
-			continue;
-		}
-
-		for (j = 0; j < nrpcs; j++) {
-			rpc = kzalloc_cpt(sizeof(*rpc), GFP_NOFS, i);
-			if (!rpc) {
-				srpc_service_fini(svc);
-				return -ENOMEM;
-			}
-			list_add(&rpc->srpc_list, &scd->scd_rpc_free);
-		}
-	}
-
-	return 0;
-}
-
-int
-srpc_add_service(struct srpc_service *sv)
-{
-	int id = sv->sv_id;
-
-	LASSERT(0 <= id && id <= SRPC_SERVICE_MAX_ID);
-
-	if (srpc_service_init(sv))
-		return -ENOMEM;
-
-	spin_lock(&srpc_data.rpc_glock);
-
-	LASSERT(srpc_data.rpc_state == SRPC_STATE_RUNNING);
-
-	if (srpc_data.rpc_services[id]) {
-		spin_unlock(&srpc_data.rpc_glock);
-		goto failed;
-	}
-
-	srpc_data.rpc_services[id] = sv;
-	spin_unlock(&srpc_data.rpc_glock);
-
-	CDEBUG(D_NET, "Adding service: id %d, name %s\n", id, sv->sv_name);
-	return 0;
-
- failed:
-	srpc_service_fini(sv);
-	return -EBUSY;
-}
-
-int
-srpc_remove_service(struct srpc_service *sv)
-{
-	int id = sv->sv_id;
-
-	spin_lock(&srpc_data.rpc_glock);
-
-	if (srpc_data.rpc_services[id] != sv) {
-		spin_unlock(&srpc_data.rpc_glock);
-		return -ENOENT;
-	}
-
-	srpc_data.rpc_services[id] = NULL;
-	spin_unlock(&srpc_data.rpc_glock);
-	return 0;
-}
-
-static int
-srpc_post_passive_rdma(int portal, int local, __u64 matchbits, void *buf,
-		       int len, int options, struct lnet_process_id peer,
-		       struct lnet_handle_md *mdh, struct srpc_event *ev)
-{
-	int rc;
-	struct lnet_md md;
-	struct lnet_handle_me meh;
-
-	rc = LNetMEAttach(portal, peer, matchbits, 0, LNET_UNLINK,
-			  local ? LNET_INS_LOCAL : LNET_INS_AFTER, &meh);
-	if (rc) {
-		CERROR("LNetMEAttach failed: %d\n", rc);
-		LASSERT(rc == -ENOMEM);
-		return -ENOMEM;
-	}
-
-	md.threshold = 1;
-	md.user_ptr = ev;
-	md.start = buf;
-	md.length = len;
-	md.options = options;
-	md.eq_handle = srpc_data.rpc_lnet_eq;
-
-	rc = LNetMDAttach(meh, md, LNET_UNLINK, mdh);
-	if (rc) {
-		CERROR("LNetMDAttach failed: %d\n", rc);
-		LASSERT(rc == -ENOMEM);
-
-		rc = LNetMEUnlink(meh);
-		LASSERT(!rc);
-		return -ENOMEM;
-	}
-
-	CDEBUG(D_NET, "Posted passive RDMA: peer %s, portal %d, matchbits %#llx\n",
-	       libcfs_id2str(peer), portal, matchbits);
-	return 0;
-}
-
-static int
-srpc_post_active_rdma(int portal, __u64 matchbits, void *buf, int len,
-		      int options, struct lnet_process_id peer,
-		      lnet_nid_t self, struct lnet_handle_md *mdh,
-		      struct srpc_event *ev)
-{
-	int rc;
-	struct lnet_md md;
-
-	md.user_ptr = ev;
-	md.start = buf;
-	md.length = len;
-	md.eq_handle = srpc_data.rpc_lnet_eq;
-	md.threshold = options & LNET_MD_OP_GET ? 2 : 1;
-	md.options = options & ~(LNET_MD_OP_PUT | LNET_MD_OP_GET);
-
-	rc = LNetMDBind(md, LNET_UNLINK, mdh);
-	if (rc) {
-		CERROR("LNetMDBind failed: %d\n", rc);
-		LASSERT(rc == -ENOMEM);
-		return -ENOMEM;
-	}
-
-	/*
-	 * this is kind of an abuse of the LNET_MD_OP_{PUT,GET} options.
-	 * they're only meaningful for MDs attached to an ME (i.e. passive
-	 * buffers...
-	 */
-	if (options & LNET_MD_OP_PUT) {
-		rc = LNetPut(self, *mdh, LNET_NOACK_REQ, peer,
-			     portal, matchbits, 0, 0);
-	} else {
-		LASSERT(options & LNET_MD_OP_GET);
-
-		rc = LNetGet(self, *mdh, peer, portal, matchbits, 0);
-	}
-
-	if (rc) {
-		CERROR("LNet%s(%s, %d, %lld) failed: %d\n",
-		       options & LNET_MD_OP_PUT ? "Put" : "Get",
-		       libcfs_id2str(peer), portal, matchbits, rc);
-
-		/*
-		 * The forthcoming unlink event will complete this operation
-		 * with failure, so fall through and return success here.
-		 */
-		rc = LNetMDUnlink(*mdh);
-		LASSERT(!rc);
-	} else {
-		CDEBUG(D_NET, "Posted active RDMA: peer %s, portal %u, matchbits %#llx\n",
-		       libcfs_id2str(peer), portal, matchbits);
-	}
-	return 0;
-}
-
-static int
-srpc_post_passive_rqtbuf(int service, int local, void *buf, int len,
-			 struct lnet_handle_md *mdh, struct srpc_event *ev)
-{
-	struct lnet_process_id any = { 0 };
-
-	any.nid = LNET_NID_ANY;
-	any.pid = LNET_PID_ANY;
-
-	return srpc_post_passive_rdma(srpc_serv_portal(service),
-				      local, service, buf, len,
-				      LNET_MD_OP_PUT, any, mdh, ev);
-}
-
-static int
-srpc_service_post_buffer(struct srpc_service_cd *scd, struct srpc_buffer *buf)
-__must_hold(&scd->scd_lock)
-{
-	struct srpc_service *sv = scd->scd_svc;
-	struct srpc_msg	*msg = &buf->buf_msg;
-	int rc;
-
-	LNetInvalidateMDHandle(&buf->buf_mdh);
-	list_add(&buf->buf_list, &scd->scd_buf_posted);
-	scd->scd_buf_nposted++;
-	spin_unlock(&scd->scd_lock);
-
-	rc = srpc_post_passive_rqtbuf(sv->sv_id,
-				      !srpc_serv_is_framework(sv),
-				      msg, sizeof(*msg), &buf->buf_mdh,
-				      &scd->scd_ev);
-
-	/*
-	 * At this point, a RPC (new or delayed) may have arrived in
-	 * msg and its event handler has been called. So we must add
-	 * buf to scd_buf_posted _before_ dropping scd_lock
-	 */
-	spin_lock(&scd->scd_lock);
-
-	if (!rc) {
-		if (!sv->sv_shuttingdown)
-			return 0;
-
-		spin_unlock(&scd->scd_lock);
-		/*
-		 * srpc_shutdown_service might have tried to unlink me
-		 * when my buf_mdh was still invalid
-		 */
-		LNetMDUnlink(buf->buf_mdh);
-		spin_lock(&scd->scd_lock);
-		return 0;
-	}
-
-	scd->scd_buf_nposted--;
-	if (sv->sv_shuttingdown)
-		return rc; /* don't allow to change scd_buf_posted */
-
-	list_del(&buf->buf_list);
-	spin_unlock(&scd->scd_lock);
-
-	kfree(buf);
-
-	spin_lock(&scd->scd_lock);
-	return rc;
-}
-
-void
-srpc_add_buffer(struct swi_workitem *wi)
-{
-	struct srpc_service_cd *scd = container_of(wi, struct srpc_service_cd, scd_buf_wi);
-	struct srpc_buffer *buf;
-	int rc = 0;
-
-	/*
-	 * it's called by workitem scheduler threads, these threads
-	 * should have been set CPT affinity, so buffers will be posted
-	 * on CPT local list of Portal
-	 */
-	spin_lock(&scd->scd_lock);
-
-	while (scd->scd_buf_adjust > 0 &&
-	       !scd->scd_svc->sv_shuttingdown) {
-		scd->scd_buf_adjust--; /* consume it */
-		scd->scd_buf_posting++;
-
-		spin_unlock(&scd->scd_lock);
-
-		buf = kzalloc(sizeof(*buf), GFP_NOFS);
-		if (!buf) {
-			CERROR("Failed to add new buf to service: %s\n",
-			       scd->scd_svc->sv_name);
-			spin_lock(&scd->scd_lock);
-			rc = -ENOMEM;
-			break;
-		}
-
-		spin_lock(&scd->scd_lock);
-		if (scd->scd_svc->sv_shuttingdown) {
-			spin_unlock(&scd->scd_lock);
-			kfree(buf);
-
-			spin_lock(&scd->scd_lock);
-			rc = -ESHUTDOWN;
-			break;
-		}
-
-		rc = srpc_service_post_buffer(scd, buf);
-		if (rc)
-			break; /* buf has been freed inside */
-
-		LASSERT(scd->scd_buf_posting > 0);
-		scd->scd_buf_posting--;
-		scd->scd_buf_total++;
-		scd->scd_buf_low = max(2, scd->scd_buf_total / 4);
-	}
-
-	if (rc) {
-		scd->scd_buf_err_stamp = ktime_get_real_seconds();
-		scd->scd_buf_err = rc;
-
-		LASSERT(scd->scd_buf_posting > 0);
-		scd->scd_buf_posting--;
-	}
-
-	spin_unlock(&scd->scd_lock);
-}
-
-int
-srpc_service_add_buffers(struct srpc_service *sv, int nbuffer)
-{
-	struct srpc_service_cd *scd;
-	int rc = 0;
-	int i;
-
-	LASSERTF(nbuffer > 0, "nbuffer must be positive: %d\n", nbuffer);
-
-	cfs_percpt_for_each(scd, i, sv->sv_cpt_data) {
-		spin_lock(&scd->scd_lock);
-
-		scd->scd_buf_err = 0;
-		scd->scd_buf_err_stamp = 0;
-		scd->scd_buf_posting = 0;
-		scd->scd_buf_adjust = nbuffer;
-		/* start to post buffers */
-		swi_schedule_workitem(&scd->scd_buf_wi);
-		spin_unlock(&scd->scd_lock);
-
-		/* framework service only post buffer for one partition  */
-		if (srpc_serv_is_framework(sv))
-			break;
-	}
-
-	cfs_percpt_for_each(scd, i, sv->sv_cpt_data) {
-		spin_lock(&scd->scd_lock);
-		/*
-		 * NB: srpc_service_add_buffers() can be called inside
-		 * thread context of lst_serial_wq, and we don't normally
-		 * allow to sleep inside thread context of WI scheduler
-		 * because it will block current scheduler thread from doing
-		 * anything else, even worse, it could deadlock if it's
-		 * waiting on result from another WI of the same scheduler.
-		 * However, it's safe at here because scd_buf_wi is scheduled
-		 * by thread in a different WI scheduler (lst_test_wq),
-		 * so we don't have any risk of deadlock, though this could
-		 * block all WIs pending on lst_serial_wq for a moment
-		 * which is not good but not fatal.
-		 */
-		lst_wait_until(scd->scd_buf_err ||
-			       (!scd->scd_buf_adjust &&
-				!scd->scd_buf_posting),
-			       scd->scd_lock, "waiting for adding buffer\n");
-
-		if (scd->scd_buf_err && !rc)
-			rc = scd->scd_buf_err;
-
-		spin_unlock(&scd->scd_lock);
-	}
-
-	return rc;
-}
-
-void
-srpc_service_remove_buffers(struct srpc_service *sv, int nbuffer)
-{
-	struct srpc_service_cd *scd;
-	int num;
-	int i;
-
-	LASSERT(!sv->sv_shuttingdown);
-
-	cfs_percpt_for_each(scd, i, sv->sv_cpt_data) {
-		spin_lock(&scd->scd_lock);
-
-		num = scd->scd_buf_total + scd->scd_buf_posting;
-		scd->scd_buf_adjust -= min(nbuffer, num);
-
-		spin_unlock(&scd->scd_lock);
-	}
-}
-
-/* returns 1 if sv has finished, otherwise 0 */
-int
-srpc_finish_service(struct srpc_service *sv)
-{
-	struct srpc_service_cd *scd;
-	struct srpc_server_rpc *rpc;
-	int i;
-
-	LASSERT(sv->sv_shuttingdown); /* srpc_shutdown_service called */
-
-	cfs_percpt_for_each(scd, i, sv->sv_cpt_data) {
-		swi_cancel_workitem(&scd->scd_buf_wi);
-
-		spin_lock(&scd->scd_lock);
-
-		if (scd->scd_buf_nposted > 0) {
-			CDEBUG(D_NET, "waiting for %d posted buffers to unlink\n",
-			       scd->scd_buf_nposted);
-			spin_unlock(&scd->scd_lock);
-			return 0;
-		}
-
-		if (list_empty(&scd->scd_rpc_active)) {
-			spin_unlock(&scd->scd_lock);
-			continue;
-		}
-
-		rpc = list_entry(scd->scd_rpc_active.next,
-				 struct srpc_server_rpc, srpc_list);
-		CNETERR("Active RPC %p on shutdown: sv %s, peer %s, wi %s, ev fired %d type %d status %d lnet %d\n",
-			rpc, sv->sv_name, libcfs_id2str(rpc->srpc_peer),
-			swi_state2str(rpc->srpc_wi.swi_state),
-			rpc->srpc_ev.ev_fired, rpc->srpc_ev.ev_type,
-			rpc->srpc_ev.ev_status, rpc->srpc_ev.ev_lnet);
-		spin_unlock(&scd->scd_lock);
-		return 0;
-	}
-
-	/* no lock needed from now on */
-	srpc_service_fini(sv);
-	return 1;
-}
-
-/* called with sv->sv_lock held */
-static void
-srpc_service_recycle_buffer(struct srpc_service_cd *scd,
-			    struct srpc_buffer *buf)
-__must_hold(&scd->scd_lock)
-{
-	if (!scd->scd_svc->sv_shuttingdown && scd->scd_buf_adjust >= 0) {
-		if (srpc_service_post_buffer(scd, buf)) {
-			CWARN("Failed to post %s buffer\n",
-			      scd->scd_svc->sv_name);
-		}
-		return;
-	}
-
-	/* service is shutting down, or we want to recycle some buffers */
-	scd->scd_buf_total--;
-
-	if (scd->scd_buf_adjust < 0) {
-		scd->scd_buf_adjust++;
-		if (scd->scd_buf_adjust < 0 &&
-		    !scd->scd_buf_total && !scd->scd_buf_posting) {
-			CDEBUG(D_INFO,
-			       "Try to recycle %d buffers but nothing left\n",
-			       scd->scd_buf_adjust);
-			scd->scd_buf_adjust = 0;
-		}
-	}
-
-	spin_unlock(&scd->scd_lock);
-	kfree(buf);
-	spin_lock(&scd->scd_lock);
-}
-
-void
-srpc_abort_service(struct srpc_service *sv)
-{
-	struct srpc_service_cd *scd;
-	struct srpc_server_rpc *rpc;
-	int i;
-
-	CDEBUG(D_NET, "Aborting service: id %d, name %s\n",
-	       sv->sv_id, sv->sv_name);
-
-	cfs_percpt_for_each(scd, i, sv->sv_cpt_data) {
-		spin_lock(&scd->scd_lock);
-
-		/*
-		 * schedule in-flight RPCs to notice the abort, NB:
-		 * racing with incoming RPCs; complete fix should make test
-		 * RPCs carry session ID in its headers
-		 */
-		list_for_each_entry(rpc, &scd->scd_rpc_active, srpc_list) {
-			rpc->srpc_aborted = 1;
-			swi_schedule_workitem(&rpc->srpc_wi);
-		}
-
-		spin_unlock(&scd->scd_lock);
-	}
-}
-
-void
-srpc_shutdown_service(struct srpc_service *sv)
-{
-	struct srpc_service_cd *scd;
-	struct srpc_server_rpc *rpc;
-	struct srpc_buffer *buf;
-	int i;
-
-	CDEBUG(D_NET, "Shutting down service: id %d, name %s\n",
-	       sv->sv_id, sv->sv_name);
-
-	cfs_percpt_for_each(scd, i, sv->sv_cpt_data)
-		spin_lock(&scd->scd_lock);
-
-	sv->sv_shuttingdown = 1; /* i.e. no new active RPC */
-
-	cfs_percpt_for_each(scd, i, sv->sv_cpt_data)
-		spin_unlock(&scd->scd_lock);
-
-	cfs_percpt_for_each(scd, i, sv->sv_cpt_data) {
-		spin_lock(&scd->scd_lock);
-
-		/* schedule in-flight RPCs to notice the shutdown */
-		list_for_each_entry(rpc, &scd->scd_rpc_active, srpc_list)
-			swi_schedule_workitem(&rpc->srpc_wi);
-
-		spin_unlock(&scd->scd_lock);
-
-		/*
-		 * OK to traverse scd_buf_posted without lock, since no one
-		 * touches scd_buf_posted now
-		 */
-		list_for_each_entry(buf, &scd->scd_buf_posted, buf_list)
-			LNetMDUnlink(buf->buf_mdh);
-	}
-}
-
-static int
-srpc_send_request(struct srpc_client_rpc *rpc)
-{
-	struct srpc_event *ev = &rpc->crpc_reqstev;
-	int rc;
-
-	ev->ev_fired = 0;
-	ev->ev_data = rpc;
-	ev->ev_type = SRPC_REQUEST_SENT;
-
-	 rc = srpc_post_active_rdma(srpc_serv_portal(rpc->crpc_service),
-				    rpc->crpc_service, &rpc->crpc_reqstmsg,
-				    sizeof(struct srpc_msg), LNET_MD_OP_PUT,
-				    rpc->crpc_dest, LNET_NID_ANY,
-				    &rpc->crpc_reqstmdh, ev);
-	if (rc) {
-		LASSERT(rc == -ENOMEM);
-		ev->ev_fired = 1;  /* no more event expected */
-	}
-	return rc;
-}
-
-static int
-srpc_prepare_reply(struct srpc_client_rpc *rpc)
-{
-	struct srpc_event *ev = &rpc->crpc_replyev;
-	__u64 *id = &rpc->crpc_reqstmsg.msg_body.reqst.rpyid;
-	int rc;
-
-	ev->ev_fired = 0;
-	ev->ev_data = rpc;
-	ev->ev_type = SRPC_REPLY_RCVD;
-
-	*id = srpc_next_id();
-
-	rc = srpc_post_passive_rdma(SRPC_RDMA_PORTAL, 0, *id,
-				    &rpc->crpc_replymsg,
-				    sizeof(struct srpc_msg),
-				    LNET_MD_OP_PUT, rpc->crpc_dest,
-				    &rpc->crpc_replymdh, ev);
-	if (rc) {
-		LASSERT(rc == -ENOMEM);
-		ev->ev_fired = 1;  /* no more event expected */
-	}
-	return rc;
-}
-
-static int
-srpc_prepare_bulk(struct srpc_client_rpc *rpc)
-{
-	struct srpc_bulk *bk = &rpc->crpc_bulk;
-	struct srpc_event *ev = &rpc->crpc_bulkev;
-	__u64 *id = &rpc->crpc_reqstmsg.msg_body.reqst.bulkid;
-	int rc;
-	int opt;
-
-	LASSERT(bk->bk_niov <= LNET_MAX_IOV);
-
-	if (!bk->bk_niov)
-		return 0; /* nothing to do */
-
-	opt = bk->bk_sink ? LNET_MD_OP_PUT : LNET_MD_OP_GET;
-	opt |= LNET_MD_KIOV;
-
-	ev->ev_fired = 0;
-	ev->ev_data = rpc;
-	ev->ev_type = SRPC_BULK_REQ_RCVD;
-
-	*id = srpc_next_id();
-
-	rc = srpc_post_passive_rdma(SRPC_RDMA_PORTAL, 0, *id,
-				    &bk->bk_iovs[0], bk->bk_niov, opt,
-				    rpc->crpc_dest, &bk->bk_mdh, ev);
-	if (rc) {
-		LASSERT(rc == -ENOMEM);
-		ev->ev_fired = 1;  /* no more event expected */
-	}
-	return rc;
-}
-
-static int
-srpc_do_bulk(struct srpc_server_rpc *rpc)
-{
-	struct srpc_event *ev = &rpc->srpc_ev;
-	struct srpc_bulk *bk = rpc->srpc_bulk;
-	__u64 id = rpc->srpc_reqstbuf->buf_msg.msg_body.reqst.bulkid;
-	int rc;
-	int opt;
-
-	LASSERT(bk);
-
-	opt = bk->bk_sink ? LNET_MD_OP_GET : LNET_MD_OP_PUT;
-	opt |= LNET_MD_KIOV;
-
-	ev->ev_fired = 0;
-	ev->ev_data = rpc;
-	ev->ev_type = bk->bk_sink ? SRPC_BULK_GET_RPLD : SRPC_BULK_PUT_SENT;
-
-	rc = srpc_post_active_rdma(SRPC_RDMA_PORTAL, id,
-				   &bk->bk_iovs[0], bk->bk_niov, opt,
-				   rpc->srpc_peer, rpc->srpc_self,
-				   &bk->bk_mdh, ev);
-	if (rc)
-		ev->ev_fired = 1;  /* no more event expected */
-	return rc;
-}
-
-/* only called from srpc_handle_rpc */
-static void
-srpc_server_rpc_done(struct srpc_server_rpc *rpc, int status)
-{
-	struct srpc_service_cd *scd = rpc->srpc_scd;
-	struct srpc_service *sv = scd->scd_svc;
-	struct srpc_buffer *buffer;
-
-	LASSERT(status || rpc->srpc_wi.swi_state == SWI_STATE_DONE);
-
-	rpc->srpc_status = status;
-
-	CDEBUG_LIMIT(!status ? D_NET : D_NETERROR,
-		     "Server RPC %p done: service %s, peer %s, status %s:%d\n",
-		     rpc, sv->sv_name, libcfs_id2str(rpc->srpc_peer),
-		     swi_state2str(rpc->srpc_wi.swi_state), status);
-
-	if (status) {
-		spin_lock(&srpc_data.rpc_glock);
-		srpc_data.rpc_counters.rpcs_dropped++;
-		spin_unlock(&srpc_data.rpc_glock);
-	}
-
-	if (rpc->srpc_done)
-		(*rpc->srpc_done) (rpc);
-	LASSERT(!rpc->srpc_bulk);
-
-	spin_lock(&scd->scd_lock);
-
-	if (rpc->srpc_reqstbuf) {
-		/*
-		 * NB might drop sv_lock in srpc_service_recycle_buffer, but
-		 * sv won't go away for scd_rpc_active must not be empty
-		 */
-		srpc_service_recycle_buffer(scd, rpc->srpc_reqstbuf);
-		rpc->srpc_reqstbuf = NULL;
-	}
-
-	list_del(&rpc->srpc_list); /* from scd->scd_rpc_active */
-
-	/*
-	 * No one can schedule me now since:
-	 * - I'm not on scd_rpc_active.
-	 * - all LNet events have been fired.
-	 * Cancel pending schedules and prevent future schedule attempts:
-	 */
-	LASSERT(rpc->srpc_ev.ev_fired);
-
-	if (!sv->sv_shuttingdown && !list_empty(&scd->scd_buf_blocked)) {
-		buffer = list_entry(scd->scd_buf_blocked.next,
-				    struct srpc_buffer, buf_list);
-		list_del(&buffer->buf_list);
-
-		srpc_init_server_rpc(rpc, scd, buffer);
-		list_add_tail(&rpc->srpc_list, &scd->scd_rpc_active);
-		swi_schedule_workitem(&rpc->srpc_wi);
-	} else {
-		list_add(&rpc->srpc_list, &scd->scd_rpc_free);
-	}
-
-	spin_unlock(&scd->scd_lock);
-}
-
-/* handles an incoming RPC */
-void
-srpc_handle_rpc(struct swi_workitem *wi)
-{
-	struct srpc_server_rpc *rpc = container_of(wi, struct srpc_server_rpc, srpc_wi);
-	struct srpc_service_cd *scd = rpc->srpc_scd;
-	struct srpc_service *sv = scd->scd_svc;
-	struct srpc_event *ev = &rpc->srpc_ev;
-	int rc = 0;
-
-	LASSERT(wi == &rpc->srpc_wi);
-
-	spin_lock(&scd->scd_lock);
-
-	if (sv->sv_shuttingdown || rpc->srpc_aborted) {
-		spin_unlock(&scd->scd_lock);
-
-		if (rpc->srpc_bulk)
-			LNetMDUnlink(rpc->srpc_bulk->bk_mdh);
-		LNetMDUnlink(rpc->srpc_replymdh);
-
-		if (ev->ev_fired) { /* no more event, OK to finish */
-			srpc_server_rpc_done(rpc, -ESHUTDOWN);
-		}
-		return;
-	}
-
-	spin_unlock(&scd->scd_lock);
-
-	switch (wi->swi_state) {
-	default:
-		LBUG();
-	case SWI_STATE_NEWBORN: {
-		struct srpc_msg *msg;
-		struct srpc_generic_reply *reply;
-
-		msg = &rpc->srpc_reqstbuf->buf_msg;
-		reply = &rpc->srpc_replymsg.msg_body.reply;
-
-		if (!msg->msg_magic) {
-			/* moaned already in srpc_lnet_ev_handler */
-			srpc_server_rpc_done(rpc, EBADMSG);
-			return;
-		}
-
-		srpc_unpack_msg_hdr(msg);
-		if (msg->msg_version != SRPC_MSG_VERSION) {
-			CWARN("Version mismatch: %u, %u expected, from %s\n",
-			      msg->msg_version, SRPC_MSG_VERSION,
-			      libcfs_id2str(rpc->srpc_peer));
-			reply->status = EPROTO;
-			/* drop through and send reply */
-		} else {
-			reply->status = 0;
-			rc = (*sv->sv_handler)(rpc);
-			LASSERT(!reply->status || !rpc->srpc_bulk);
-			if (rc) {
-				srpc_server_rpc_done(rpc, rc);
-				return;
-			}
-		}
-
-		wi->swi_state = SWI_STATE_BULK_STARTED;
-
-		if (rpc->srpc_bulk) {
-			rc = srpc_do_bulk(rpc);
-			if (!rc)
-				return; /* wait for bulk */
-
-			LASSERT(ev->ev_fired);
-			ev->ev_status = rc;
-		}
-	}
-		/* fall through */
-	case SWI_STATE_BULK_STARTED:
-		LASSERT(!rpc->srpc_bulk || ev->ev_fired);
-
-		if (rpc->srpc_bulk) {
-			rc = ev->ev_status;
-
-			if (sv->sv_bulk_ready)
-				rc = (*sv->sv_bulk_ready) (rpc, rc);
-
-			if (rc) {
-				srpc_server_rpc_done(rpc, rc);
-				return;
-			}
-		}
-
-		wi->swi_state = SWI_STATE_REPLY_SUBMITTED;
-		rc = srpc_send_reply(rpc);
-		if (!rc)
-			return; /* wait for reply */
-		srpc_server_rpc_done(rpc, rc);
-		return;
-
-	case SWI_STATE_REPLY_SUBMITTED:
-		if (!ev->ev_fired) {
-			CERROR("RPC %p: bulk %p, service %d\n",
-			       rpc, rpc->srpc_bulk, sv->sv_id);
-			CERROR("Event: status %d, type %d, lnet %d\n",
-			       ev->ev_status, ev->ev_type, ev->ev_lnet);
-			LASSERT(ev->ev_fired);
-		}
-
-		wi->swi_state = SWI_STATE_DONE;
-		srpc_server_rpc_done(rpc, ev->ev_status);
-		return;
-	}
-}
-
-static void
-srpc_client_rpc_expired(void *data)
-{
-	struct srpc_client_rpc *rpc = data;
-
-	CWARN("Client RPC expired: service %d, peer %s, timeout %d.\n",
-	      rpc->crpc_service, libcfs_id2str(rpc->crpc_dest),
-	      rpc->crpc_timeout);
-
-	spin_lock(&rpc->crpc_lock);
-
-	rpc->crpc_timeout = 0;
-	srpc_abort_rpc(rpc, -ETIMEDOUT);
-
-	spin_unlock(&rpc->crpc_lock);
-
-	spin_lock(&srpc_data.rpc_glock);
-	srpc_data.rpc_counters.rpcs_expired++;
-	spin_unlock(&srpc_data.rpc_glock);
-}
-
-static void
-srpc_add_client_rpc_timer(struct srpc_client_rpc *rpc)
-{
-	struct stt_timer *timer = &rpc->crpc_timer;
-
-	if (!rpc->crpc_timeout)
-		return;
-
-	INIT_LIST_HEAD(&timer->stt_list);
-	timer->stt_data	= rpc;
-	timer->stt_func	= srpc_client_rpc_expired;
-	timer->stt_expires = ktime_get_real_seconds() + rpc->crpc_timeout;
-	stt_add_timer(timer);
-}
-
-/*
- * Called with rpc->crpc_lock held.
- *
- * Upon exit the RPC expiry timer is not queued and the handler is not
- * running on any CPU.
- */
-static void
-srpc_del_client_rpc_timer(struct srpc_client_rpc *rpc)
-{
-	/* timer not planted or already exploded */
-	if (!rpc->crpc_timeout)
-		return;
-
-	/* timer successfully defused */
-	if (stt_del_timer(&rpc->crpc_timer))
-		return;
-
-	/* timer detonated, wait for it to explode */
-	while (rpc->crpc_timeout) {
-		spin_unlock(&rpc->crpc_lock);
-
-		schedule();
-
-		spin_lock(&rpc->crpc_lock);
-	}
-}
-
-static void
-srpc_client_rpc_done(struct srpc_client_rpc *rpc, int status)
-{
-	struct swi_workitem *wi = &rpc->crpc_wi;
-
-	LASSERT(status || wi->swi_state == SWI_STATE_DONE);
-
-	spin_lock(&rpc->crpc_lock);
-
-	rpc->crpc_closed = 1;
-	if (!rpc->crpc_status)
-		rpc->crpc_status = status;
-
-	srpc_del_client_rpc_timer(rpc);
-
-	CDEBUG_LIMIT(!status ? D_NET : D_NETERROR,
-		     "Client RPC done: service %d, peer %s, status %s:%d:%d\n",
-		     rpc->crpc_service, libcfs_id2str(rpc->crpc_dest),
-		     swi_state2str(wi->swi_state), rpc->crpc_aborted, status);
-
-	/*
-	 * No one can schedule me now since:
-	 * - RPC timer has been defused.
-	 * - all LNet events have been fired.
-	 * - crpc_closed has been set, preventing srpc_abort_rpc from
-	 *   scheduling me.
-	 * Cancel pending schedules and prevent future schedule attempts:
-	 */
-	LASSERT(!srpc_event_pending(rpc));
-
-	spin_unlock(&rpc->crpc_lock);
-
-	(*rpc->crpc_done)(rpc);
-}
-
-/* sends an outgoing RPC */
-void
-srpc_send_rpc(struct swi_workitem *wi)
-{
-	int rc = 0;
-	struct srpc_client_rpc *rpc;
-	struct srpc_msg *reply;
-	int do_bulk;
-
-	LASSERT(wi);
-
-	rpc = container_of(wi, struct srpc_client_rpc, crpc_wi);
-
-	LASSERT(rpc);
-	LASSERT(wi == &rpc->crpc_wi);
-
-	reply = &rpc->crpc_replymsg;
-	do_bulk = rpc->crpc_bulk.bk_niov > 0;
-
-	spin_lock(&rpc->crpc_lock);
-
-	if (rpc->crpc_aborted) {
-		spin_unlock(&rpc->crpc_lock);
-		goto abort;
-	}
-
-	spin_unlock(&rpc->crpc_lock);
-
-	switch (wi->swi_state) {
-	default:
-		LBUG();
-	case SWI_STATE_NEWBORN:
-		LASSERT(!srpc_event_pending(rpc));
-
-		rc = srpc_prepare_reply(rpc);
-		if (rc) {
-			srpc_client_rpc_done(rpc, rc);
-			return;
-		}
-
-		rc = srpc_prepare_bulk(rpc);
-		if (rc)
-			break;
-
-		wi->swi_state = SWI_STATE_REQUEST_SUBMITTED;
-		rc = srpc_send_request(rpc);
-		break;
-
-	case SWI_STATE_REQUEST_SUBMITTED:
-		/*
-		 * CAVEAT EMPTOR: rqtev, rpyev, and bulkev may come in any
-		 * order; however, they're processed in a strict order:
-		 * rqt, rpy, and bulk.
-		 */
-		if (!rpc->crpc_reqstev.ev_fired)
-			break;
-
-		rc = rpc->crpc_reqstev.ev_status;
-		if (rc)
-			break;
-
-		wi->swi_state = SWI_STATE_REQUEST_SENT;
-		/* perhaps more events */
-		/* fall through */
-	case SWI_STATE_REQUEST_SENT: {
-		enum srpc_msg_type type = srpc_service2reply(rpc->crpc_service);
-
-		if (!rpc->crpc_replyev.ev_fired)
-			break;
-
-		rc = rpc->crpc_replyev.ev_status;
-		if (rc)
-			break;
-
-		srpc_unpack_msg_hdr(reply);
-		if (reply->msg_type != type ||
-		    (reply->msg_magic != SRPC_MSG_MAGIC &&
-		     reply->msg_magic != __swab32(SRPC_MSG_MAGIC))) {
-			CWARN("Bad message from %s: type %u (%d expected), magic %u (%d expected).\n",
-			      libcfs_id2str(rpc->crpc_dest),
-			      reply->msg_type, type,
-			      reply->msg_magic, SRPC_MSG_MAGIC);
-			rc = -EBADMSG;
-			break;
-		}
-
-		if (do_bulk && reply->msg_body.reply.status) {
-			CWARN("Remote error %d at %s, unlink bulk buffer in case peer didn't initiate bulk transfer\n",
-			      reply->msg_body.reply.status,
-			      libcfs_id2str(rpc->crpc_dest));
-			LNetMDUnlink(rpc->crpc_bulk.bk_mdh);
-		}
-
-		wi->swi_state = SWI_STATE_REPLY_RECEIVED;
-	}
-		/* fall through */
-	case SWI_STATE_REPLY_RECEIVED:
-		if (do_bulk && !rpc->crpc_bulkev.ev_fired)
-			break;
-
-		rc = do_bulk ? rpc->crpc_bulkev.ev_status : 0;
-
-		/*
-		 * Bulk buffer was unlinked due to remote error. Clear error
-		 * since reply buffer still contains valid data.
-		 * NB rpc->crpc_done shouldn't look into bulk data in case of
-		 * remote error.
-		 */
-		if (do_bulk && rpc->crpc_bulkev.ev_lnet == LNET_EVENT_UNLINK &&
-		    !rpc->crpc_status && reply->msg_body.reply.status)
-			rc = 0;
-
-		wi->swi_state = SWI_STATE_DONE;
-		srpc_client_rpc_done(rpc, rc);
-		return;
-	}
-
-	if (rc) {
-		spin_lock(&rpc->crpc_lock);
-		srpc_abort_rpc(rpc, rc);
-		spin_unlock(&rpc->crpc_lock);
-	}
-
-abort:
-	if (rpc->crpc_aborted) {
-		LNetMDUnlink(rpc->crpc_reqstmdh);
-		LNetMDUnlink(rpc->crpc_replymdh);
-		LNetMDUnlink(rpc->crpc_bulk.bk_mdh);
-
-		if (!srpc_event_pending(rpc)) {
-			srpc_client_rpc_done(rpc, -EINTR);
-			return;
-		}
-	}
-}
-
-struct srpc_client_rpc *
-srpc_create_client_rpc(struct lnet_process_id peer, int service,
-		       int nbulkiov, int bulklen,
-		       void (*rpc_done)(struct srpc_client_rpc *),
-		       void (*rpc_fini)(struct srpc_client_rpc *), void *priv)
-{
-	struct srpc_client_rpc *rpc;
-
-	rpc = kzalloc(offsetof(struct srpc_client_rpc,
-			       crpc_bulk.bk_iovs[nbulkiov]), GFP_KERNEL);
-	if (!rpc)
-		return NULL;
-
-	srpc_init_client_rpc(rpc, peer, service, nbulkiov,
-			     bulklen, rpc_done, rpc_fini, priv);
-	return rpc;
-}
-
-/* called with rpc->crpc_lock held */
-void
-srpc_abort_rpc(struct srpc_client_rpc *rpc, int why)
-{
-	LASSERT(why);
-
-	if (rpc->crpc_aborted ||	/* already aborted */
-	    rpc->crpc_closed)		/* callback imminent */
-		return;
-
-	CDEBUG(D_NET, "Aborting RPC: service %d, peer %s, state %s, why %d\n",
-	       rpc->crpc_service, libcfs_id2str(rpc->crpc_dest),
-	       swi_state2str(rpc->crpc_wi.swi_state), why);
-
-	rpc->crpc_aborted = 1;
-	rpc->crpc_status = why;
-	swi_schedule_workitem(&rpc->crpc_wi);
-}
-
-/* called with rpc->crpc_lock held */
-void
-srpc_post_rpc(struct srpc_client_rpc *rpc)
-{
-	LASSERT(!rpc->crpc_aborted);
-	LASSERT(srpc_data.rpc_state == SRPC_STATE_RUNNING);
-
-	CDEBUG(D_NET, "Posting RPC: peer %s, service %d, timeout %d\n",
-	       libcfs_id2str(rpc->crpc_dest), rpc->crpc_service,
-	       rpc->crpc_timeout);
-
-	srpc_add_client_rpc_timer(rpc);
-	swi_schedule_workitem(&rpc->crpc_wi);
-}
-
-int
-srpc_send_reply(struct srpc_server_rpc *rpc)
-{
-	struct srpc_event *ev = &rpc->srpc_ev;
-	struct srpc_msg *msg = &rpc->srpc_replymsg;
-	struct srpc_buffer *buffer = rpc->srpc_reqstbuf;
-	struct srpc_service_cd *scd = rpc->srpc_scd;
-	struct srpc_service *sv = scd->scd_svc;
-	__u64 rpyid;
-	int rc;
-
-	LASSERT(buffer);
-	rpyid = buffer->buf_msg.msg_body.reqst.rpyid;
-
-	spin_lock(&scd->scd_lock);
-
-	if (!sv->sv_shuttingdown && !srpc_serv_is_framework(sv)) {
-		/*
-		 * Repost buffer before replying since test client
-		 * might send me another RPC once it gets the reply
-		 */
-		if (srpc_service_post_buffer(scd, buffer))
-			CWARN("Failed to repost %s buffer\n", sv->sv_name);
-		rpc->srpc_reqstbuf = NULL;
-	}
-
-	spin_unlock(&scd->scd_lock);
-
-	ev->ev_fired = 0;
-	ev->ev_data = rpc;
-	ev->ev_type = SRPC_REPLY_SENT;
-
-	msg->msg_magic = SRPC_MSG_MAGIC;
-	msg->msg_version = SRPC_MSG_VERSION;
-	msg->msg_type = srpc_service2reply(sv->sv_id);
-
-	rc = srpc_post_active_rdma(SRPC_RDMA_PORTAL, rpyid, msg,
-				   sizeof(*msg), LNET_MD_OP_PUT,
-				   rpc->srpc_peer, rpc->srpc_self,
-				   &rpc->srpc_replymdh, ev);
-	if (rc)
-		ev->ev_fired = 1; /* no more event expected */
-	return rc;
-}
-
-/* when in kernel always called with LNET_LOCK() held, and in thread context */
-static void
-srpc_lnet_ev_handler(struct lnet_event *ev)
-{
-	struct srpc_service_cd *scd;
-	struct srpc_event *rpcev = ev->md.user_ptr;
-	struct srpc_client_rpc *crpc;
-	struct srpc_server_rpc *srpc;
-	struct srpc_buffer *buffer;
-	struct srpc_service *sv;
-	struct srpc_msg *msg;
-	enum srpc_msg_type type;
-
-	LASSERT(!in_interrupt());
-
-	if (ev->status) {
-		__u32 errors;
-
-		spin_lock(&srpc_data.rpc_glock);
-		if (ev->status != -ECANCELED) /* cancellation is not error */
-			srpc_data.rpc_counters.errors++;
-		errors = srpc_data.rpc_counters.errors;
-		spin_unlock(&srpc_data.rpc_glock);
-
-		CNETERR("LNet event status %d type %d, RPC errors %u\n",
-			ev->status, ev->type, errors);
-	}
-
-	rpcev->ev_lnet = ev->type;
-
-	switch (rpcev->ev_type) {
-	default:
-		CERROR("Unknown event: status %d, type %d, lnet %d\n",
-		       rpcev->ev_status, rpcev->ev_type, rpcev->ev_lnet);
-		LBUG();
-	case SRPC_REQUEST_SENT:
-		if (!ev->status && ev->type != LNET_EVENT_UNLINK) {
-			spin_lock(&srpc_data.rpc_glock);
-			srpc_data.rpc_counters.rpcs_sent++;
-			spin_unlock(&srpc_data.rpc_glock);
-		}
-		/* fall through */
-	case SRPC_REPLY_RCVD:
-	case SRPC_BULK_REQ_RCVD:
-		crpc = rpcev->ev_data;
-
-		if (rpcev != &crpc->crpc_reqstev &&
-		    rpcev != &crpc->crpc_replyev &&
-		    rpcev != &crpc->crpc_bulkev) {
-			CERROR("rpcev %p, crpc %p, reqstev %p, replyev %p, bulkev %p\n",
-			       rpcev, crpc, &crpc->crpc_reqstev,
-			       &crpc->crpc_replyev, &crpc->crpc_bulkev);
-			CERROR("Bad event: status %d, type %d, lnet %d\n",
-			       rpcev->ev_status, rpcev->ev_type, rpcev->ev_lnet);
-			LBUG();
-		}
-
-		spin_lock(&crpc->crpc_lock);
-
-		LASSERT(!rpcev->ev_fired);
-		rpcev->ev_fired = 1;
-		rpcev->ev_status = (ev->type == LNET_EVENT_UNLINK) ?
-						-EINTR : ev->status;
-		swi_schedule_workitem(&crpc->crpc_wi);
-
-		spin_unlock(&crpc->crpc_lock);
-		break;
-
-	case SRPC_REQUEST_RCVD:
-		scd = rpcev->ev_data;
-		sv = scd->scd_svc;
-
-		LASSERT(rpcev == &scd->scd_ev);
-
-		spin_lock(&scd->scd_lock);
-
-		LASSERT(ev->unlinked);
-		LASSERT(ev->type == LNET_EVENT_PUT ||
-			ev->type == LNET_EVENT_UNLINK);
-		LASSERT(ev->type != LNET_EVENT_UNLINK ||
-			sv->sv_shuttingdown);
-
-		buffer = container_of(ev->md.start, struct srpc_buffer, buf_msg);
-		buffer->buf_peer = ev->initiator;
-		buffer->buf_self = ev->target.nid;
-
-		LASSERT(scd->scd_buf_nposted > 0);
-		scd->scd_buf_nposted--;
-
-		if (sv->sv_shuttingdown) {
-			/*
-			 * Leave buffer on scd->scd_buf_nposted since
-			 * srpc_finish_service needs to traverse it.
-			 */
-			spin_unlock(&scd->scd_lock);
-			break;
-		}
-
-		if (scd->scd_buf_err_stamp &&
-		    scd->scd_buf_err_stamp < ktime_get_real_seconds()) {
-			/* re-enable adding buffer */
-			scd->scd_buf_err_stamp = 0;
-			scd->scd_buf_err = 0;
-		}
-
-		if (!scd->scd_buf_err &&	/* adding buffer is enabled */
-		    !scd->scd_buf_adjust &&
-		    scd->scd_buf_nposted < scd->scd_buf_low) {
-			scd->scd_buf_adjust = max(scd->scd_buf_total / 2,
-						  SFW_TEST_WI_MIN);
-			swi_schedule_workitem(&scd->scd_buf_wi);
-		}
-
-		list_del(&buffer->buf_list); /* from scd->scd_buf_posted */
-		msg = &buffer->buf_msg;
-		type = srpc_service2request(sv->sv_id);
-
-		if (ev->status || ev->mlength != sizeof(*msg) ||
-		    (msg->msg_type != type &&
-		     msg->msg_type != __swab32(type)) ||
-		    (msg->msg_magic != SRPC_MSG_MAGIC &&
-		     msg->msg_magic != __swab32(SRPC_MSG_MAGIC))) {
-			CERROR("Dropping RPC (%s) from %s: status %d mlength %d type %u magic %u.\n",
-			       sv->sv_name, libcfs_id2str(ev->initiator),
-			       ev->status, ev->mlength,
-			       msg->msg_type, msg->msg_magic);
-
-			/*
-			 * NB can't call srpc_service_recycle_buffer here since
-			 * it may call LNetM[DE]Attach. The invalid magic tells
-			 * srpc_handle_rpc to drop this RPC
-			 */
-			msg->msg_magic = 0;
-		}
-
-		if (!list_empty(&scd->scd_rpc_free)) {
-			srpc = list_entry(scd->scd_rpc_free.next,
-					  struct srpc_server_rpc,
-					  srpc_list);
-			list_del(&srpc->srpc_list);
-
-			srpc_init_server_rpc(srpc, scd, buffer);
-			list_add_tail(&srpc->srpc_list,
-				      &scd->scd_rpc_active);
-			swi_schedule_workitem(&srpc->srpc_wi);
-		} else {
-			list_add_tail(&buffer->buf_list,
-				      &scd->scd_buf_blocked);
-		}
-
-		spin_unlock(&scd->scd_lock);
-
-		spin_lock(&srpc_data.rpc_glock);
-		srpc_data.rpc_counters.rpcs_rcvd++;
-		spin_unlock(&srpc_data.rpc_glock);
-		break;
-
-	case SRPC_BULK_GET_RPLD:
-		LASSERT(ev->type == LNET_EVENT_SEND ||
-			ev->type == LNET_EVENT_REPLY ||
-			ev->type == LNET_EVENT_UNLINK);
-
-		if (!ev->unlinked)
-			break; /* wait for final event */
-		/* fall through */
-	case SRPC_BULK_PUT_SENT:
-		if (!ev->status && ev->type != LNET_EVENT_UNLINK) {
-			spin_lock(&srpc_data.rpc_glock);
-
-			if (rpcev->ev_type == SRPC_BULK_GET_RPLD)
-				srpc_data.rpc_counters.bulk_get += ev->mlength;
-			else
-				srpc_data.rpc_counters.bulk_put += ev->mlength;
-
-			spin_unlock(&srpc_data.rpc_glock);
-		}
-		/* fall through */
-	case SRPC_REPLY_SENT:
-		srpc = rpcev->ev_data;
-		scd = srpc->srpc_scd;
-
-		LASSERT(rpcev == &srpc->srpc_ev);
-
-		spin_lock(&scd->scd_lock);
-
-		rpcev->ev_fired = 1;
-		rpcev->ev_status = (ev->type == LNET_EVENT_UNLINK) ?
-				   -EINTR : ev->status;
-		swi_schedule_workitem(&srpc->srpc_wi);
-
-		spin_unlock(&scd->scd_lock);
-		break;
-	}
-}
-
-int
-srpc_startup(void)
-{
-	int rc;
-
-	memset(&srpc_data, 0, sizeof(struct smoketest_rpc));
-	spin_lock_init(&srpc_data.rpc_glock);
-
-	/* 1 second pause to avoid timestamp reuse */
-	set_current_state(TASK_UNINTERRUPTIBLE);
-	schedule_timeout(HZ);
-	srpc_data.rpc_matchbits = ((__u64)ktime_get_real_seconds()) << 48;
-
-	srpc_data.rpc_state = SRPC_STATE_NONE;
-
-	rc = LNetNIInit(LNET_PID_LUSTRE);
-	if (rc < 0) {
-		CERROR("LNetNIInit() has failed: %d\n", rc);
-		return rc;
-	}
-
-	srpc_data.rpc_state = SRPC_STATE_NI_INIT;
-
-	LNetInvalidateEQHandle(&srpc_data.rpc_lnet_eq);
-	rc = LNetEQAlloc(0, srpc_lnet_ev_handler, &srpc_data.rpc_lnet_eq);
-	if (rc) {
-		CERROR("LNetEQAlloc() has failed: %d\n", rc);
-		goto bail;
-	}
-
-	rc = LNetSetLazyPortal(SRPC_FRAMEWORK_REQUEST_PORTAL);
-	LASSERT(!rc);
-	rc = LNetSetLazyPortal(SRPC_REQUEST_PORTAL);
-	LASSERT(!rc);
-
-	srpc_data.rpc_state = SRPC_STATE_EQ_INIT;
-
-	rc = stt_startup();
-
-bail:
-	if (rc)
-		srpc_shutdown();
-	else
-		srpc_data.rpc_state = SRPC_STATE_RUNNING;
-
-	return rc;
-}
-
-void
-srpc_shutdown(void)
-{
-	int i;
-	int rc;
-	int state;
-
-	state = srpc_data.rpc_state;
-	srpc_data.rpc_state = SRPC_STATE_STOPPING;
-
-	switch (state) {
-	default:
-		LBUG();
-	case SRPC_STATE_RUNNING:
-		spin_lock(&srpc_data.rpc_glock);
-
-		for (i = 0; i <= SRPC_SERVICE_MAX_ID; i++) {
-			struct srpc_service *sv = srpc_data.rpc_services[i];
-
-			LASSERTF(!sv, "service not empty: id %d, name %s\n",
-				 i, sv->sv_name);
-		}
-
-		spin_unlock(&srpc_data.rpc_glock);
-
-		stt_shutdown();
-		/* fall through */
-	case SRPC_STATE_EQ_INIT:
-		rc = LNetClearLazyPortal(SRPC_FRAMEWORK_REQUEST_PORTAL);
-		rc = LNetClearLazyPortal(SRPC_REQUEST_PORTAL);
-		LASSERT(!rc);
-		rc = LNetEQFree(srpc_data.rpc_lnet_eq);
-		LASSERT(!rc); /* the EQ should have no user by now */
-		/* fall through */
-	case SRPC_STATE_NI_INIT:
-		LNetNIFini();
-	}
-}

+ 0 - 295
drivers/staging/lustre/lnet/selftest/rpc.h

@@ -1,295 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#ifndef __SELFTEST_RPC_H__
-#define __SELFTEST_RPC_H__
-
-#include <uapi/linux/lnet/lnetst.h>
-
-/*
- * LST wired structures
- *
- * XXX: *REPLY == *REQST + 1
- */
-enum srpc_msg_type {
-	SRPC_MSG_MKSN_REQST	= 0,
-	SRPC_MSG_MKSN_REPLY	= 1,
-	SRPC_MSG_RMSN_REQST	= 2,
-	SRPC_MSG_RMSN_REPLY	= 3,
-	SRPC_MSG_BATCH_REQST	= 4,
-	SRPC_MSG_BATCH_REPLY	= 5,
-	SRPC_MSG_STAT_REQST	= 6,
-	SRPC_MSG_STAT_REPLY	= 7,
-	SRPC_MSG_TEST_REQST	= 8,
-	SRPC_MSG_TEST_REPLY	= 9,
-	SRPC_MSG_DEBUG_REQST	= 10,
-	SRPC_MSG_DEBUG_REPLY	= 11,
-	SRPC_MSG_BRW_REQST	= 12,
-	SRPC_MSG_BRW_REPLY	= 13,
-	SRPC_MSG_PING_REQST	= 14,
-	SRPC_MSG_PING_REPLY	= 15,
-	SRPC_MSG_JOIN_REQST	= 16,
-	SRPC_MSG_JOIN_REPLY	= 17,
-};
-
-/* CAVEAT EMPTOR:
- * All srpc_*_reqst_t's 1st field must be matchbits of reply buffer,
- * and 2nd field matchbits of bulk buffer if any.
- *
- * All srpc_*_reply_t's 1st field must be a __u32 status, and 2nd field
- * session id if needed.
- */
-struct srpc_generic_reqst {
-	__u64			rpyid;		/* reply buffer matchbits */
-	__u64			bulkid;		/* bulk buffer matchbits */
-} WIRE_ATTR;
-
-struct srpc_generic_reply {
-	__u32			status;
-	struct lst_sid		sid;
-} WIRE_ATTR;
-
-/* FRAMEWORK RPCs */
-struct srpc_mksn_reqst {
-	__u64			mksn_rpyid;	/* reply buffer matchbits */
-	struct lst_sid		mksn_sid;	/* session id */
-	__u32			mksn_force;	/* use brute force */
-	char			mksn_name[LST_NAME_SIZE];
-} WIRE_ATTR; /* make session request */
-
-struct srpc_mksn_reply {
-	__u32			mksn_status;	/* session status */
-	struct lst_sid		mksn_sid;	/* session id */
-	__u32			mksn_timeout;	/* session timeout */
-	char			mksn_name[LST_NAME_SIZE];
-} WIRE_ATTR; /* make session reply */
-
-struct srpc_rmsn_reqst {
-	__u64			rmsn_rpyid;	/* reply buffer matchbits */
-	struct lst_sid		rmsn_sid;	/* session id */
-} WIRE_ATTR; /* remove session request */
-
-struct srpc_rmsn_reply {
-	__u32			rmsn_status;
-	struct lst_sid		rmsn_sid;	/* session id */
-} WIRE_ATTR; /* remove session reply */
-
-struct srpc_join_reqst {
-	__u64			join_rpyid;	/* reply buffer matchbits */
-	struct lst_sid		join_sid;	/* session id to join */
-	char			join_group[LST_NAME_SIZE]; /* group name */
-} WIRE_ATTR;
-
-struct srpc_join_reply {
-	__u32			join_status;	/* returned status */
-	struct lst_sid		join_sid;	/* session id */
-	__u32			join_timeout;	/* # seconds' inactivity to
-						 * expire
-						 */
-	char			join_session[LST_NAME_SIZE]; /* session name */
-} WIRE_ATTR;
-
-struct srpc_debug_reqst {
-	__u64			dbg_rpyid;	/* reply buffer matchbits */
-	struct lst_sid		dbg_sid;	/* session id */
-	__u32			dbg_flags;	/* bitmap of debug */
-} WIRE_ATTR;
-
-struct srpc_debug_reply {
-	__u32			dbg_status;	/* returned code */
-	struct lst_sid		dbg_sid;	/* session id */
-	__u32			dbg_timeout;	/* session timeout */
-	__u32			dbg_nbatch;	/* # of batches in the node */
-	char			dbg_name[LST_NAME_SIZE]; /* session name */
-} WIRE_ATTR;
-
-#define SRPC_BATCH_OPC_RUN	1
-#define SRPC_BATCH_OPC_STOP	2
-#define SRPC_BATCH_OPC_QUERY	3
-
-struct srpc_batch_reqst {
-	__u64		   bar_rpyid;	   /* reply buffer matchbits */
-	struct lst_sid	   bar_sid;	   /* session id */
-	struct lst_bid	   bar_bid;	   /* batch id */
-	__u32		   bar_opc;	   /* create/start/stop batch */
-	__u32		   bar_testidx;    /* index of test */
-	__u32		   bar_arg;	   /* parameters */
-} WIRE_ATTR;
-
-struct srpc_batch_reply {
-	__u32		   bar_status;	   /* status of request */
-	struct lst_sid	   bar_sid;	   /* session id */
-	__u32		   bar_active;	   /* # of active tests in batch/test */
-	__u32		   bar_time;	   /* remained time */
-} WIRE_ATTR;
-
-struct srpc_stat_reqst {
-	__u64		   str_rpyid;	   /* reply buffer matchbits */
-	struct lst_sid	   str_sid;	   /* session id */
-	__u32		   str_type;	   /* type of stat */
-} WIRE_ATTR;
-
-struct srpc_stat_reply {
-	__u32		   str_status;
-	struct lst_sid	   str_sid;
-	struct sfw_counters	str_fw;
-	struct srpc_counters	str_rpc;
-	struct lnet_counters    str_lnet;
-} WIRE_ATTR;
-
-struct test_bulk_req {
-	__u32		   blk_opc;	   /* bulk operation code */
-	__u32		   blk_npg;	   /* # of pages */
-	__u32		   blk_flags;	   /* reserved flags */
-} WIRE_ATTR;
-
-struct test_bulk_req_v1 {
-	__u16		   blk_opc;	   /* bulk operation code */
-	__u16		   blk_flags;	   /* data check flags */
-	__u32		   blk_len;	   /* data length */
-	__u32		   blk_offset;	   /* offset */
-} WIRE_ATTR;
-
-struct test_ping_req {
-	__u32		   png_size;	   /* size of ping message */
-	__u32		   png_flags;	   /* reserved flags */
-} WIRE_ATTR;
-
-struct srpc_test_reqst {
-	__u64			tsr_rpyid;	/* reply buffer matchbits */
-	__u64			tsr_bulkid;	/* bulk buffer matchbits */
-	struct lst_sid		tsr_sid;	/* session id */
-	struct lst_bid		tsr_bid;	/* batch id */
-	__u32			tsr_service;	/* test type: bulk|ping|... */
-	__u32			tsr_loop;	/* test client loop count or
-						 * # server buffers needed
-						 */
-	__u32			tsr_concur;	/* concurrency of test */
-	__u8			tsr_is_client;	/* is test client or not */
-	__u8			tsr_stop_onerr; /* stop on error */
-	__u32			tsr_ndest;	/* # of dest nodes */
-
-	union {
-		struct test_ping_req	ping;
-		struct test_bulk_req	bulk_v0;
-		struct test_bulk_req_v1	bulk_v1;
-	} tsr_u;
-} WIRE_ATTR;
-
-struct srpc_test_reply {
-	__u32			tsr_status;	/* returned code */
-	struct lst_sid		tsr_sid;
-} WIRE_ATTR;
-
-/* TEST RPCs */
-struct srpc_ping_reqst {
-	__u64		   pnr_rpyid;
-	__u32		   pnr_magic;
-	__u32		   pnr_seq;
-	__u64		   pnr_time_sec;
-	__u64		   pnr_time_usec;
-} WIRE_ATTR;
-
-struct srpc_ping_reply {
-	__u32		   pnr_status;
-	__u32		   pnr_magic;
-	__u32		   pnr_seq;
-} WIRE_ATTR;
-
-struct srpc_brw_reqst {
-	__u64		   brw_rpyid;	   /* reply buffer matchbits */
-	__u64		   brw_bulkid;	   /* bulk buffer matchbits */
-	__u32		   brw_rw;	   /* read or write */
-	__u32		   brw_len;	   /* bulk data len */
-	__u32		   brw_flags;	   /* bulk data patterns */
-} WIRE_ATTR; /* bulk r/w request */
-
-struct srpc_brw_reply {
-	__u32		   brw_status;
-} WIRE_ATTR; /* bulk r/w reply */
-
-#define SRPC_MSG_MAGIC		0xeeb0f00d
-#define SRPC_MSG_VERSION	1
-
-struct srpc_msg {
-	__u32	msg_magic;     /* magic number */
-	__u32	msg_version;   /* message version number */
-	__u32	msg_type;      /* type of message body: srpc_msg_type */
-	__u32	msg_reserved0;
-	__u32	msg_reserved1;
-	__u32	msg_ses_feats; /* test session features */
-	union {
-		struct srpc_generic_reqst	reqst;
-		struct srpc_generic_reply	reply;
-
-		struct srpc_mksn_reqst		mksn_reqst;
-		struct srpc_mksn_reply		mksn_reply;
-		struct srpc_rmsn_reqst		rmsn_reqst;
-		struct srpc_rmsn_reply		rmsn_reply;
-		struct srpc_debug_reqst		dbg_reqst;
-		struct srpc_debug_reply		dbg_reply;
-		struct srpc_batch_reqst		bat_reqst;
-		struct srpc_batch_reply		bat_reply;
-		struct srpc_stat_reqst		stat_reqst;
-		struct srpc_stat_reply		stat_reply;
-		struct srpc_test_reqst		tes_reqst;
-		struct srpc_test_reply		tes_reply;
-		struct srpc_join_reqst		join_reqst;
-		struct srpc_join_reply		join_reply;
-
-		struct srpc_ping_reqst		ping_reqst;
-		struct srpc_ping_reply		ping_reply;
-		struct srpc_brw_reqst		brw_reqst;
-		struct srpc_brw_reply		brw_reply;
-	}     msg_body;
-} WIRE_ATTR;
-
-static inline void
-srpc_unpack_msg_hdr(struct srpc_msg *msg)
-{
-	if (msg->msg_magic == SRPC_MSG_MAGIC)
-		return; /* no flipping needed */
-
-	/*
-	 * We do not swap the magic number here as it is needed to
-	 * determine whether the body needs to be swapped.
-	 */
-	/* __swab32s(&msg->msg_magic); */
-	__swab32s(&msg->msg_type);
-	__swab32s(&msg->msg_version);
-	__swab32s(&msg->msg_ses_feats);
-	__swab32s(&msg->msg_reserved0);
-	__swab32s(&msg->msg_reserved1);
-}
-
-#endif /* __SELFTEST_RPC_H__ */

+ 0 - 622
drivers/staging/lustre/lnet/selftest/selftest.h

@@ -1,622 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/selftest/selftest.h
- *
- * Author: Isaac Huang <isaac@clusterfs.com>
- */
-#ifndef __SELFTEST_SELFTEST_H__
-#define __SELFTEST_SELFTEST_H__
-
-#define LNET_ONLY
-
-#include <linux/lnet/lib-lnet.h>
-#include <linux/lnet/lib-types.h>
-#include <uapi/linux/lnet/lnetst.h>
-
-#include "rpc.h"
-#include "timer.h"
-
-#ifndef MADE_WITHOUT_COMPROMISE
-#define MADE_WITHOUT_COMPROMISE
-#endif
-
-#define SWI_STATE_NEWBORN		0
-#define SWI_STATE_REPLY_SUBMITTED	1
-#define SWI_STATE_REPLY_SENT		2
-#define SWI_STATE_REQUEST_SUBMITTED	3
-#define SWI_STATE_REQUEST_SENT		4
-#define SWI_STATE_REPLY_RECEIVED	5
-#define SWI_STATE_BULK_STARTED		6
-#define SWI_STATE_DONE			10
-
-/* forward refs */
-struct srpc_service;
-struct srpc_service_cd;
-struct sfw_test_unit;
-struct sfw_test_instance;
-
-/* services below SRPC_FRAMEWORK_SERVICE_MAX_ID are framework
- * services, e.g. create/modify session.
- */
-#define SRPC_SERVICE_DEBUG		0
-#define SRPC_SERVICE_MAKE_SESSION	1
-#define SRPC_SERVICE_REMOVE_SESSION	2
-#define SRPC_SERVICE_BATCH		3
-#define SRPC_SERVICE_TEST		4
-#define SRPC_SERVICE_QUERY_STAT		5
-#define SRPC_SERVICE_JOIN		6
-#define SRPC_FRAMEWORK_SERVICE_MAX_ID	10
-/* other services start from SRPC_FRAMEWORK_SERVICE_MAX_ID+1 */
-#define SRPC_SERVICE_BRW		11
-#define SRPC_SERVICE_PING		12
-#define SRPC_SERVICE_MAX_ID		12
-
-#define SRPC_REQUEST_PORTAL		50
-/* a lazy portal for framework RPC requests */
-#define SRPC_FRAMEWORK_REQUEST_PORTAL	51
-/* all reply/bulk RDMAs go to this portal */
-#define SRPC_RDMA_PORTAL		52
-
-static inline enum srpc_msg_type
-srpc_service2request(int service)
-{
-	switch (service) {
-	default:
-		LBUG();
-	case SRPC_SERVICE_DEBUG:
-		return SRPC_MSG_DEBUG_REQST;
-
-	case SRPC_SERVICE_MAKE_SESSION:
-		return SRPC_MSG_MKSN_REQST;
-
-	case SRPC_SERVICE_REMOVE_SESSION:
-		return SRPC_MSG_RMSN_REQST;
-
-	case SRPC_SERVICE_BATCH:
-		return SRPC_MSG_BATCH_REQST;
-
-	case SRPC_SERVICE_TEST:
-		return SRPC_MSG_TEST_REQST;
-
-	case SRPC_SERVICE_QUERY_STAT:
-		return SRPC_MSG_STAT_REQST;
-
-	case SRPC_SERVICE_BRW:
-		return SRPC_MSG_BRW_REQST;
-
-	case SRPC_SERVICE_PING:
-		return SRPC_MSG_PING_REQST;
-
-	case SRPC_SERVICE_JOIN:
-		return SRPC_MSG_JOIN_REQST;
-	}
-}
-
-static inline enum srpc_msg_type
-srpc_service2reply(int service)
-{
-	return srpc_service2request(service) + 1;
-}
-
-enum srpc_event_type {
-	SRPC_BULK_REQ_RCVD   = 1, /* passive bulk request(PUT sink/GET source)
-				   * received
-				   */
-	SRPC_BULK_PUT_SENT   = 2, /* active bulk PUT sent (source) */
-	SRPC_BULK_GET_RPLD   = 3, /* active bulk GET replied (sink) */
-	SRPC_REPLY_RCVD      = 4, /* incoming reply received */
-	SRPC_REPLY_SENT      = 5, /* outgoing reply sent */
-	SRPC_REQUEST_RCVD    = 6, /* incoming request received */
-	SRPC_REQUEST_SENT    = 7, /* outgoing request sent */
-};
-
-/* RPC event */
-struct srpc_event {
-	enum srpc_event_type	ev_type;	/* what's up */
-	enum lnet_event_kind	ev_lnet;	/* LNet event type */
-	int		  ev_fired;  /* LNet event fired? */
-	int		  ev_status; /* LNet event status */
-	void		  *ev_data;  /* owning server/client RPC */
-};
-
-/* bulk descriptor */
-struct srpc_bulk {
-	int		 bk_len;     /* len of bulk data */
-	struct lnet_handle_md	bk_mdh;
-	int		 bk_sink;    /* sink/source */
-	int		 bk_niov;    /* # iov in bk_iovs */
-	struct bio_vec		bk_iovs[0];
-};
-
-/* message buffer descriptor */
-struct srpc_buffer {
-	struct list_head  buf_list; /* chain on srpc_service::*_msgq */
-	struct srpc_msg	  buf_msg;
-	struct lnet_handle_md	buf_mdh;
-	lnet_nid_t	  buf_self;
-	struct lnet_process_id	buf_peer;
-};
-
-struct swi_workitem;
-typedef void (*swi_action_t) (struct swi_workitem *);
-
-struct swi_workitem {
-	struct workqueue_struct *swi_wq;
-	struct work_struct  swi_work;
-	swi_action_t	    swi_action;
-	int		    swi_state;
-};
-
-/* server-side state of a RPC */
-struct srpc_server_rpc {
-	/* chain on srpc_service::*_rpcq */
-	struct list_head       srpc_list;
-	struct srpc_service_cd *srpc_scd;
-	struct swi_workitem	srpc_wi;
-	struct srpc_event	srpc_ev;	/* bulk/reply event */
-	lnet_nid_t	       srpc_self;
-	struct lnet_process_id	srpc_peer;
-	struct srpc_msg		srpc_replymsg;
-	struct lnet_handle_md	srpc_replymdh;
-	struct srpc_buffer	*srpc_reqstbuf;
-	struct srpc_bulk	*srpc_bulk;
-
-	unsigned int	       srpc_aborted; /* being given up */
-	int		       srpc_status;
-	void		       (*srpc_done)(struct srpc_server_rpc *);
-};
-
-/* client-side state of a RPC */
-struct srpc_client_rpc {
-	struct list_head  crpc_list;	  /* chain on user's lists */
-	spinlock_t	  crpc_lock;	  /* serialize */
-	int		  crpc_service;
-	atomic_t	  crpc_refcount;
-	int		  crpc_timeout;   /* # seconds to wait for reply */
-	struct stt_timer       crpc_timer;
-	struct swi_workitem	crpc_wi;
-	struct lnet_process_id	crpc_dest;
-
-	void		  (*crpc_done)(struct srpc_client_rpc *);
-	void		  (*crpc_fini)(struct srpc_client_rpc *);
-	int		  crpc_status;	  /* completion status */
-	void		  *crpc_priv;	  /* caller data */
-
-	/* state flags */
-	unsigned int	  crpc_aborted:1; /* being given up */
-	unsigned int	  crpc_closed:1;  /* completed */
-
-	/* RPC events */
-	struct srpc_event	crpc_bulkev;	/* bulk event */
-	struct srpc_event	crpc_reqstev;	/* request event */
-	struct srpc_event	crpc_replyev;	/* reply event */
-
-	/* bulk, request(reqst), and reply exchanged on wire */
-	struct srpc_msg		crpc_reqstmsg;
-	struct srpc_msg		crpc_replymsg;
-	struct lnet_handle_md	crpc_reqstmdh;
-	struct lnet_handle_md	crpc_replymdh;
-	struct srpc_bulk	crpc_bulk;
-};
-
-#define srpc_client_rpc_size(rpc)					\
-offsetof(struct srpc_client_rpc, crpc_bulk.bk_iovs[(rpc)->crpc_bulk.bk_niov])
-
-#define srpc_client_rpc_addref(rpc)					\
-do {									\
-	CDEBUG(D_NET, "RPC[%p] -> %s (%d)++\n",				\
-	       (rpc), libcfs_id2str((rpc)->crpc_dest),			\
-	       atomic_read(&(rpc)->crpc_refcount));			\
-	LASSERT(atomic_read(&(rpc)->crpc_refcount) > 0);		\
-	atomic_inc(&(rpc)->crpc_refcount);				\
-} while (0)
-
-#define srpc_client_rpc_decref(rpc)					\
-do {									\
-	CDEBUG(D_NET, "RPC[%p] -> %s (%d)--\n",				\
-	       (rpc), libcfs_id2str((rpc)->crpc_dest),			\
-	       atomic_read(&(rpc)->crpc_refcount));			\
-	LASSERT(atomic_read(&(rpc)->crpc_refcount) > 0);		\
-	if (atomic_dec_and_test(&(rpc)->crpc_refcount))			\
-		srpc_destroy_client_rpc(rpc);				\
-} while (0)
-
-#define srpc_event_pending(rpc)   (!(rpc)->crpc_bulkev.ev_fired ||	\
-				   !(rpc)->crpc_reqstev.ev_fired ||	\
-				   !(rpc)->crpc_replyev.ev_fired)
-
-/* CPU partition data of srpc service */
-struct srpc_service_cd {
-	/** serialize */
-	spinlock_t		scd_lock;
-	/** backref to service */
-	struct srpc_service	*scd_svc;
-	/** event buffer */
-	struct srpc_event	scd_ev;
-	/** free RPC descriptors */
-	struct list_head	scd_rpc_free;
-	/** in-flight RPCs */
-	struct list_head	scd_rpc_active;
-	/** workitem for posting buffer */
-	struct swi_workitem	scd_buf_wi;
-	/** CPT id */
-	int			scd_cpt;
-	/** error code for scd_buf_wi */
-	int			scd_buf_err;
-	/** timestamp for scd_buf_err */
-	time64_t		scd_buf_err_stamp;
-	/** total # request buffers */
-	int			scd_buf_total;
-	/** # posted request buffers */
-	int			scd_buf_nposted;
-	/** in progress of buffer posting */
-	int			scd_buf_posting;
-	/** allocate more buffers if scd_buf_nposted < scd_buf_low */
-	int			scd_buf_low;
-	/** increase/decrease some buffers */
-	int			scd_buf_adjust;
-	/** posted message buffers */
-	struct list_head	scd_buf_posted;
-	/** blocked for RPC descriptor */
-	struct list_head	scd_buf_blocked;
-};
-
-/* number of server workitems (mini-thread) for testing service */
-#define SFW_TEST_WI_MIN		256
-#define SFW_TEST_WI_MAX		2048
-/* extra buffers for tolerating buggy peers, or unbalanced number
- * of peers between partitions
- */
-#define SFW_TEST_WI_EXTRA	64
-
-/* number of server workitems (mini-thread) for framework service */
-#define SFW_FRWK_WI_MIN		16
-#define SFW_FRWK_WI_MAX		256
-
-struct srpc_service {
-	int			sv_id;		/* service id */
-	const char		*sv_name;	/* human readable name */
-	int			sv_wi_total;	/* total server workitems */
-	int			sv_shuttingdown;
-	int			sv_ncpts;
-	/* percpt data for srpc_service */
-	struct srpc_service_cd	**sv_cpt_data;
-	/* Service callbacks:
-	 * - sv_handler: process incoming RPC request
-	 * - sv_bulk_ready: notify bulk data
-	 */
-	int (*sv_handler)(struct srpc_server_rpc *);
-	int (*sv_bulk_ready)(struct srpc_server_rpc *, int);
-};
-
-struct sfw_session {
-	struct list_head sn_list;    /* chain on fw_zombie_sessions */
-	struct lst_sid	 sn_id;      /* unique identifier */
-	unsigned int	 sn_timeout; /* # seconds' inactivity to expire */
-	int		 sn_timer_active;
-	unsigned int	 sn_features;
-	struct stt_timer      sn_timer;
-	struct list_head sn_batches; /* list of batches */
-	char		 sn_name[LST_NAME_SIZE];
-	atomic_t	 sn_refcount;
-	atomic_t	 sn_brw_errors;
-	atomic_t	 sn_ping_errors;
-	unsigned long	 sn_started;
-};
-
-#define sfw_sid_equal(sid0, sid1)     ((sid0).ses_nid == (sid1).ses_nid && \
-				       (sid0).ses_stamp == (sid1).ses_stamp)
-
-struct sfw_batch {
-	struct list_head bat_list;	/* chain on sn_batches */
-	struct lst_bid	 bat_id;	/* batch id */
-	int		 bat_error;	/* error code of batch */
-	struct sfw_session	*bat_session;	/* batch's session */
-	atomic_t	 bat_nactive;	/* # of active tests */
-	struct list_head bat_tests;	/* test instances */
-};
-
-struct sfw_test_client_ops {
-	int  (*tso_init)(struct sfw_test_instance *tsi); /* initialize test
-							  * client
-							  */
-	void (*tso_fini)(struct sfw_test_instance *tsi); /* finalize test
-							  * client
-							  */
-	int  (*tso_prep_rpc)(struct sfw_test_unit *tsu,
-			     struct lnet_process_id dest,
-			     struct srpc_client_rpc **rpc);	/* prep a tests rpc */
-	void (*tso_done_rpc)(struct sfw_test_unit *tsu,
-			     struct srpc_client_rpc *rpc);	/* done a test rpc */
-};
-
-struct sfw_test_instance {
-	struct list_head	   tsi_list;		/* chain on batch */
-	int			   tsi_service;		/* test type */
-	struct sfw_batch		*tsi_batch;	/* batch */
-	struct sfw_test_client_ops	*tsi_ops;	/* test client operation
-							 */
-
-	/* public parameter for all test units */
-	unsigned int		   tsi_is_client:1;	/* is test client */
-	unsigned int		   tsi_stoptsu_onerr:1; /* stop tsu on error */
-	int			   tsi_concur;		/* concurrency */
-	int			   tsi_loop;		/* loop count */
-
-	/* status of test instance */
-	spinlock_t		   tsi_lock;		/* serialize */
-	unsigned int		   tsi_stopping:1;	/* test is stopping */
-	atomic_t		   tsi_nactive;		/* # of active test
-							 * unit
-							 */
-	struct list_head	   tsi_units;		/* test units */
-	struct list_head	   tsi_free_rpcs;	/* free rpcs */
-	struct list_head	   tsi_active_rpcs;	/* active rpcs */
-
-	union {
-		struct test_ping_req	ping;		/* ping parameter */
-		struct test_bulk_req	bulk_v0;	/* bulk parameter */
-		struct test_bulk_req_v1	bulk_v1;	/* bulk v1 parameter */
-	} tsi_u;
-};
-
-/*
- * XXX: trailing (PAGE_SIZE % sizeof(struct lnet_process_id)) bytes at the end
- * of pages are not used
- */
-#define SFW_MAX_CONCUR	   LST_MAX_CONCUR
-#define SFW_ID_PER_PAGE    (PAGE_SIZE / sizeof(struct lnet_process_id_packed))
-#define SFW_MAX_NDESTS	   (LNET_MAX_IOV * SFW_ID_PER_PAGE)
-#define sfw_id_pages(n)    (((n) + SFW_ID_PER_PAGE - 1) / SFW_ID_PER_PAGE)
-
-struct sfw_test_unit {
-	struct list_head    tsu_list;	   /* chain on lst_test_instance */
-	struct lnet_process_id		tsu_dest;	/* id of dest node */
-	int		    tsu_loop;	   /* loop count of the test */
-	struct sfw_test_instance	*tsu_instance; /* pointer to test instance */
-	void		    *tsu_private;  /* private data */
-	struct swi_workitem	tsu_worker;	/* workitem of the test unit */
-};
-
-struct sfw_test_case {
-	struct list_head      tsc_list;		/* chain on fw_tests */
-	struct srpc_service		*tsc_srv_service;	/* test service */
-	struct sfw_test_client_ops	*tsc_cli_ops;	/* ops of test client */
-};
-
-struct srpc_client_rpc *
-sfw_create_rpc(struct lnet_process_id peer, int service,
-	       unsigned int features, int nbulkiov, int bulklen,
-	       void (*done)(struct srpc_client_rpc *), void *priv);
-int sfw_create_test_rpc(struct sfw_test_unit *tsu,
-			struct lnet_process_id peer, unsigned int features,
-			int nblk, int blklen, struct srpc_client_rpc **rpc);
-void sfw_abort_rpc(struct srpc_client_rpc *rpc);
-void sfw_post_rpc(struct srpc_client_rpc *rpc);
-void sfw_client_rpc_done(struct srpc_client_rpc *rpc);
-void sfw_unpack_message(struct srpc_msg *msg);
-void sfw_free_pages(struct srpc_server_rpc *rpc);
-void sfw_add_bulk_page(struct srpc_bulk *bk, struct page *pg, int i);
-int sfw_alloc_pages(struct srpc_server_rpc *rpc, int cpt, int npages, int len,
-		    int sink);
-int sfw_make_session(struct srpc_mksn_reqst *request,
-		     struct srpc_mksn_reply *reply);
-
-struct srpc_client_rpc *
-srpc_create_client_rpc(struct lnet_process_id peer, int service,
-		       int nbulkiov, int bulklen,
-		       void (*rpc_done)(struct srpc_client_rpc *),
-		       void (*rpc_fini)(struct srpc_client_rpc *), void *priv);
-void srpc_post_rpc(struct srpc_client_rpc *rpc);
-void srpc_abort_rpc(struct srpc_client_rpc *rpc, int why);
-void srpc_free_bulk(struct srpc_bulk *bk);
-struct srpc_bulk *srpc_alloc_bulk(int cpt, unsigned int off,
-				  unsigned int bulk_npg, unsigned int bulk_len,
-				  int sink);
-void srpc_send_rpc(struct swi_workitem *wi);
-int srpc_send_reply(struct srpc_server_rpc *rpc);
-int srpc_add_service(struct srpc_service *sv);
-int srpc_remove_service(struct srpc_service *sv);
-void srpc_shutdown_service(struct srpc_service *sv);
-void srpc_abort_service(struct srpc_service *sv);
-int srpc_finish_service(struct srpc_service *sv);
-int srpc_service_add_buffers(struct srpc_service *sv, int nbuffer);
-void srpc_service_remove_buffers(struct srpc_service *sv, int nbuffer);
-void srpc_get_counters(struct srpc_counters *cnt);
-void srpc_set_counters(const struct srpc_counters *cnt);
-
-extern struct workqueue_struct *lst_serial_wq;
-extern struct workqueue_struct **lst_test_wq;
-
-static inline int
-srpc_serv_is_framework(struct srpc_service *svc)
-{
-	return svc->sv_id < SRPC_FRAMEWORK_SERVICE_MAX_ID;
-}
-
-static void
-swi_wi_action(struct work_struct *wi)
-{
-	struct swi_workitem *swi;
-
-	swi = container_of(wi, struct swi_workitem, swi_work);
-
-	swi->swi_action(swi);
-}
-
-static inline void
-swi_init_workitem(struct swi_workitem *swi,
-		  swi_action_t action, struct workqueue_struct *wq)
-{
-	swi->swi_wq = wq;
-	swi->swi_action = action;
-	swi->swi_state = SWI_STATE_NEWBORN;
-	INIT_WORK(&swi->swi_work, swi_wi_action);
-}
-
-static inline void
-swi_schedule_workitem(struct swi_workitem *wi)
-{
-	queue_work(wi->swi_wq, &wi->swi_work);
-}
-
-static inline int
-swi_cancel_workitem(struct swi_workitem *swi)
-{
-	return cancel_work_sync(&swi->swi_work);
-}
-
-int sfw_startup(void);
-int srpc_startup(void);
-void sfw_shutdown(void);
-void srpc_shutdown(void);
-
-static inline void
-srpc_destroy_client_rpc(struct srpc_client_rpc *rpc)
-{
-	LASSERT(rpc);
-	LASSERT(!srpc_event_pending(rpc));
-	LASSERT(!atomic_read(&rpc->crpc_refcount));
-
-	if (!rpc->crpc_fini)
-		kfree(rpc);
-	else
-		(*rpc->crpc_fini)(rpc);
-}
-
-static inline void
-srpc_init_client_rpc(struct srpc_client_rpc *rpc, struct lnet_process_id peer,
-		     int service, int nbulkiov, int bulklen,
-		     void (*rpc_done)(struct srpc_client_rpc *),
-		     void (*rpc_fini)(struct srpc_client_rpc *), void *priv)
-{
-	LASSERT(nbulkiov <= LNET_MAX_IOV);
-
-	memset(rpc, 0, offsetof(struct srpc_client_rpc,
-				crpc_bulk.bk_iovs[nbulkiov]));
-
-	INIT_LIST_HEAD(&rpc->crpc_list);
-	swi_init_workitem(&rpc->crpc_wi, srpc_send_rpc,
-			  lst_test_wq[lnet_cpt_of_nid(peer.nid)]);
-	spin_lock_init(&rpc->crpc_lock);
-	atomic_set(&rpc->crpc_refcount, 1); /* 1 ref for caller */
-
-	rpc->crpc_dest = peer;
-	rpc->crpc_priv = priv;
-	rpc->crpc_service = service;
-	rpc->crpc_bulk.bk_len = bulklen;
-	rpc->crpc_bulk.bk_niov = nbulkiov;
-	rpc->crpc_done = rpc_done;
-	rpc->crpc_fini = rpc_fini;
-	LNetInvalidateMDHandle(&rpc->crpc_reqstmdh);
-	LNetInvalidateMDHandle(&rpc->crpc_replymdh);
-	LNetInvalidateMDHandle(&rpc->crpc_bulk.bk_mdh);
-
-	/* no event is expected at this point */
-	rpc->crpc_bulkev.ev_fired = 1;
-	rpc->crpc_reqstev.ev_fired = 1;
-	rpc->crpc_replyev.ev_fired = 1;
-
-	rpc->crpc_reqstmsg.msg_magic = SRPC_MSG_MAGIC;
-	rpc->crpc_reqstmsg.msg_version = SRPC_MSG_VERSION;
-	rpc->crpc_reqstmsg.msg_type = srpc_service2request(service);
-}
-
-static inline const char *
-swi_state2str(int state)
-{
-#define STATE2STR(x) case x: return #x
-	switch (state) {
-	default:
-		LBUG();
-	STATE2STR(SWI_STATE_NEWBORN);
-	STATE2STR(SWI_STATE_REPLY_SUBMITTED);
-	STATE2STR(SWI_STATE_REPLY_SENT);
-	STATE2STR(SWI_STATE_REQUEST_SUBMITTED);
-	STATE2STR(SWI_STATE_REQUEST_SENT);
-	STATE2STR(SWI_STATE_REPLY_RECEIVED);
-	STATE2STR(SWI_STATE_BULK_STARTED);
-	STATE2STR(SWI_STATE_DONE);
-	}
-#undef STATE2STR
-}
-
-#define selftest_wait_events()					\
-	do {							\
-		set_current_state(TASK_UNINTERRUPTIBLE);	\
-		schedule_timeout(HZ / 10);	\
-	} while (0)
-
-#define lst_wait_until(cond, lock, fmt, ...)				\
-do {									\
-	int __I = 2;							\
-	while (!(cond)) {						\
-		CDEBUG(is_power_of_2(++__I) ? D_WARNING : D_NET,	\
-		       fmt, ## __VA_ARGS__);				\
-		spin_unlock(&(lock));					\
-									\
-		selftest_wait_events();					\
-									\
-		spin_lock(&(lock));					\
-	}								\
-} while (0)
-
-static inline void
-srpc_wait_service_shutdown(struct srpc_service *sv)
-{
-	int i = 2;
-
-	LASSERT(sv->sv_shuttingdown);
-
-	while (!srpc_finish_service(sv)) {
-		i++;
-		CDEBUG(((i & -i) == i) ? D_WARNING : D_NET,
-		       "Waiting for %s service to shutdown...\n",
-		       sv->sv_name);
-		selftest_wait_events();
-	}
-}
-
-extern struct sfw_test_client_ops brw_test_client;
-void brw_init_test_client(void);
-
-extern struct srpc_service brw_test_service;
-void brw_init_test_service(void);
-
-extern struct sfw_test_client_ops ping_test_client;
-void ping_init_test_client(void);
-
-extern struct srpc_service ping_test_service;
-void ping_init_test_service(void);
-
-#endif /* __SELFTEST_SELFTEST_H__ */

+ 0 - 244
drivers/staging/lustre/lnet/selftest/timer.c

@@ -1,244 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/selftest/timer.c
- *
- * Author: Isaac Huang <isaac@clusterfs.com>
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include "selftest.h"
-
-/*
- * Timers are implemented as a sorted queue of expiry times. The queue
- * is slotted, with each slot holding timers which expire in a
- * 2**STTIMER_MINPOLL (8) second period. The timers in each slot are
- * sorted by increasing expiry time. The number of slots is 2**7 (128),
- * to cover a time period of 1024 seconds into the future before wrapping.
- */
-#define STTIMER_MINPOLL        3	/* log2 min poll interval (8 s) */
-#define STTIMER_SLOTTIME	BIT(STTIMER_MINPOLL)
-#define STTIMER_SLOTTIMEMASK   (~(STTIMER_SLOTTIME - 1))
-#define STTIMER_NSLOTS		BIT(7)
-#define STTIMER_SLOT(t)	       (&stt_data.stt_hash[(((t) >> STTIMER_MINPOLL) & \
-						    (STTIMER_NSLOTS - 1))])
-
-static struct st_timer_data {
-	spinlock_t	  stt_lock;
-	unsigned long	  stt_prev_slot; /* start time of the slot processed
-					  * previously
-					  */
-	struct list_head  stt_hash[STTIMER_NSLOTS];
-	int		  stt_shuttingdown;
-	wait_queue_head_t stt_waitq;
-	int		  stt_nthreads;
-} stt_data;
-
-void
-stt_add_timer(struct stt_timer *timer)
-{
-	struct list_head *pos;
-
-	spin_lock(&stt_data.stt_lock);
-
-	LASSERT(stt_data.stt_nthreads > 0);
-	LASSERT(!stt_data.stt_shuttingdown);
-	LASSERT(timer->stt_func);
-	LASSERT(list_empty(&timer->stt_list));
-	LASSERT(timer->stt_expires > ktime_get_real_seconds());
-
-	/* a simple insertion sort */
-	list_for_each_prev(pos, STTIMER_SLOT(timer->stt_expires)) {
-		struct stt_timer *old = list_entry(pos, struct stt_timer,
-						   stt_list);
-
-		if (timer->stt_expires >= old->stt_expires)
-			break;
-	}
-	list_add(&timer->stt_list, pos);
-
-	spin_unlock(&stt_data.stt_lock);
-}
-
-/*
- * The function returns whether it has deactivated a pending timer or not.
- * (ie. del_timer() of an inactive timer returns 0, del_timer() of an
- * active timer returns 1.)
- *
- * CAVEAT EMPTOR:
- * When 0 is returned, it is possible that timer->stt_func _is_ running on
- * another CPU.
- */
-int
-stt_del_timer(struct stt_timer *timer)
-{
-	int ret = 0;
-
-	spin_lock(&stt_data.stt_lock);
-
-	LASSERT(stt_data.stt_nthreads > 0);
-	LASSERT(!stt_data.stt_shuttingdown);
-
-	if (!list_empty(&timer->stt_list)) {
-		ret = 1;
-		list_del_init(&timer->stt_list);
-	}
-
-	spin_unlock(&stt_data.stt_lock);
-	return ret;
-}
-
-/* called with stt_data.stt_lock held */
-static int
-stt_expire_list(struct list_head *slot, time64_t now)
-{
-	int expired = 0;
-	struct stt_timer *timer;
-
-	while (!list_empty(slot)) {
-		timer = list_entry(slot->next, struct stt_timer, stt_list);
-
-		if (timer->stt_expires > now)
-			break;
-
-		list_del_init(&timer->stt_list);
-		spin_unlock(&stt_data.stt_lock);
-
-		expired++;
-		(*timer->stt_func) (timer->stt_data);
-
-		spin_lock(&stt_data.stt_lock);
-	}
-
-	return expired;
-}
-
-static int
-stt_check_timers(unsigned long *last)
-{
-	int expired = 0;
-	time64_t now;
-	unsigned long this_slot;
-
-	now = ktime_get_real_seconds();
-	this_slot = now & STTIMER_SLOTTIMEMASK;
-
-	spin_lock(&stt_data.stt_lock);
-
-	while (time_after_eq(this_slot, *last)) {
-		expired += stt_expire_list(STTIMER_SLOT(this_slot), now);
-		this_slot = this_slot - STTIMER_SLOTTIME;
-	}
-
-	*last = now & STTIMER_SLOTTIMEMASK;
-	spin_unlock(&stt_data.stt_lock);
-	return expired;
-}
-
-static int
-stt_timer_main(void *arg)
-{
-	int rc = 0;
-
-	while (!stt_data.stt_shuttingdown) {
-		stt_check_timers(&stt_data.stt_prev_slot);
-
-		rc = wait_event_timeout(stt_data.stt_waitq,
-					stt_data.stt_shuttingdown,
-					STTIMER_SLOTTIME * HZ);
-	}
-
-	spin_lock(&stt_data.stt_lock);
-	stt_data.stt_nthreads--;
-	spin_unlock(&stt_data.stt_lock);
-	return rc;
-}
-
-static int
-stt_start_timer_thread(void)
-{
-	struct task_struct *task;
-
-	LASSERT(!stt_data.stt_shuttingdown);
-
-	task = kthread_run(stt_timer_main, NULL, "st_timer");
-	if (IS_ERR(task))
-		return PTR_ERR(task);
-
-	spin_lock(&stt_data.stt_lock);
-	stt_data.stt_nthreads++;
-	spin_unlock(&stt_data.stt_lock);
-	return 0;
-}
-
-int
-stt_startup(void)
-{
-	int rc = 0;
-	int i;
-
-	stt_data.stt_shuttingdown = 0;
-	stt_data.stt_prev_slot = ktime_get_real_seconds() & STTIMER_SLOTTIMEMASK;
-
-	spin_lock_init(&stt_data.stt_lock);
-	for (i = 0; i < STTIMER_NSLOTS; i++)
-		INIT_LIST_HEAD(&stt_data.stt_hash[i]);
-
-	stt_data.stt_nthreads = 0;
-	init_waitqueue_head(&stt_data.stt_waitq);
-	rc = stt_start_timer_thread();
-	if (rc)
-		CERROR("Can't spawn timer thread: %d\n", rc);
-
-	return rc;
-}
-
-void
-stt_shutdown(void)
-{
-	int i;
-
-	spin_lock(&stt_data.stt_lock);
-
-	for (i = 0; i < STTIMER_NSLOTS; i++)
-		LASSERT(list_empty(&stt_data.stt_hash[i]));
-
-	stt_data.stt_shuttingdown = 1;
-
-	wake_up(&stt_data.stt_waitq);
-	lst_wait_until(!stt_data.stt_nthreads, stt_data.stt_lock,
-		       "waiting for %d threads to terminate\n",
-		       stt_data.stt_nthreads);
-
-	spin_unlock(&stt_data.stt_lock);
-}

Some files were not shown because too many files changed in this diff